Fixed deduplicated files from orphan media being deleted as well
Happens when a file is uploaded, then orphaned, and a similar file is used somewhere. The CleanMedia job service didn't consider that case Signed-off-by: Thomas Citharel <tcit@tcit.fr>
This commit is contained in:
parent
8eacf29705
commit
241710807c
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -31,6 +31,7 @@ priv/cert/
|
||||||
cover/
|
cover/
|
||||||
site/
|
site/
|
||||||
test/fixtures/image_tmp.jpg
|
test/fixtures/image_tmp.jpg
|
||||||
|
test/fixtures/picture_tmp.png
|
||||||
test/fixtures/DSCN0010_tmp.jpg
|
test/fixtures/DSCN0010_tmp.jpg
|
||||||
test/uploads/
|
test/uploads/
|
||||||
uploads/*
|
uploads/*
|
||||||
|
|
|
@ -1,107 +0,0 @@
|
||||||
defmodule Mix.Tasks.Mobilizon.Maintenance.FixUnattachedMediaInBody do
|
|
||||||
@moduledoc """
|
|
||||||
Task to reattach media files that were added in event, post or comment bodies without being attached to their entities.
|
|
||||||
|
|
||||||
This task should only be run once.
|
|
||||||
"""
|
|
||||||
use Mix.Task
|
|
||||||
|
|
||||||
import Mix.Tasks.Mobilizon.Common
|
|
||||||
alias Mobilizon.{Discussions, Events, Medias, Posts}
|
|
||||||
alias Mobilizon.Discussions.Comment
|
|
||||||
alias Mobilizon.Events.Event
|
|
||||||
alias Mobilizon.Posts.Post
|
|
||||||
alias Mobilizon.Storage.Repo
|
|
||||||
require Logger
|
|
||||||
|
|
||||||
@preferred_cli_env "prod"
|
|
||||||
|
|
||||||
# TODO: Remove me in Mobilizon 1.2
|
|
||||||
|
|
||||||
@shortdoc "Reattaches inline media from events and posts"
|
|
||||||
def run([]) do
|
|
||||||
start_mobilizon()
|
|
||||||
|
|
||||||
shell_info("Going to extract pictures from events")
|
|
||||||
extract_inline_pictures_from_bodies(Event)
|
|
||||||
shell_info("Going to extract pictures from posts")
|
|
||||||
extract_inline_pictures_from_bodies(Post)
|
|
||||||
shell_info("Going to extract pictures from comments")
|
|
||||||
extract_inline_pictures_from_bodies(Comment)
|
|
||||||
end
|
|
||||||
|
|
||||||
defp extract_inline_pictures_from_bodies(entity) do
|
|
||||||
Repo.transaction(
|
|
||||||
fn ->
|
|
||||||
entity
|
|
||||||
|> Repo.stream()
|
|
||||||
|> Stream.map(&extract_pictures(&1))
|
|
||||||
|> Stream.map(fn {entity, pics} -> save_entity(entity, pics) end)
|
|
||||||
|> Stream.run()
|
|
||||||
end,
|
|
||||||
timeout: :infinity
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
defp extract_pictures(entity) do
|
|
||||||
extracted_pictures = entity |> get_body() |> parse_body() |> get_media_entities_from_urls()
|
|
||||||
|
|
||||||
attached_picture = entity |> get_picture() |> get_media_entity_from_media_id()
|
|
||||||
attached_pictures = [attached_picture] |> Enum.filter(& &1)
|
|
||||||
|
|
||||||
{entity, extracted_pictures ++ attached_pictures}
|
|
||||||
end
|
|
||||||
|
|
||||||
defp get_body(%Event{description: description}), do: description
|
|
||||||
defp get_body(%Post{body: body}), do: body
|
|
||||||
defp get_body(%Comment{text: text}), do: text
|
|
||||||
|
|
||||||
defp get_picture(%Event{picture_id: picture_id}), do: picture_id
|
|
||||||
defp get_picture(%Post{picture_id: picture_id}), do: picture_id
|
|
||||||
defp get_picture(%Comment{}), do: nil
|
|
||||||
|
|
||||||
defp parse_body(nil), do: []
|
|
||||||
|
|
||||||
defp parse_body(body) do
|
|
||||||
with res <- Regex.scan(~r/<img\s[^>]*?src\s*=\s*['\"]([^'\"]*?)['\"][^>]*?>/, body),
|
|
||||||
res <- Enum.map(res, fn [_, res] -> res end) do
|
|
||||||
res
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
defp get_media_entities_from_urls(media_urls) do
|
|
||||||
media_urls
|
|
||||||
|> Enum.map(fn media_url ->
|
|
||||||
# We prefer orphan media, but fallback on already attached media just in case
|
|
||||||
Medias.get_unattached_media_by_url(media_url) || Medias.get_media_by_url(media_url)
|
|
||||||
end)
|
|
||||||
|> Enum.filter(& &1)
|
|
||||||
end
|
|
||||||
|
|
||||||
defp get_media_entity_from_media_id(nil), do: nil
|
|
||||||
|
|
||||||
defp get_media_entity_from_media_id(media_id) do
|
|
||||||
Medias.get_media(media_id)
|
|
||||||
end
|
|
||||||
|
|
||||||
defp save_entity(%Event{} = _event, []), do: :ok
|
|
||||||
|
|
||||||
defp save_entity(%Event{} = event, media) do
|
|
||||||
event = Repo.preload(event, [:contacts, :media])
|
|
||||||
Events.update_event(event, %{media: media})
|
|
||||||
end
|
|
||||||
|
|
||||||
defp save_entity(%Post{} = _post, []), do: :ok
|
|
||||||
|
|
||||||
defp save_entity(%Post{} = post, media) do
|
|
||||||
post = Repo.preload(post, [:media])
|
|
||||||
Posts.update_post(post, %{media: media})
|
|
||||||
end
|
|
||||||
|
|
||||||
defp save_entity(%Comment{} = _comment, []), do: :ok
|
|
||||||
|
|
||||||
defp save_entity(%Comment{} = comment, media) do
|
|
||||||
comment = Repo.preload(comment, [:media])
|
|
||||||
Discussions.update_comment(comment, %{media: media})
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -64,7 +64,9 @@ defmodule Mix.Tasks.Mobilizon.Media.CleanOrphan do
|
||||||
end
|
end
|
||||||
|
|
||||||
Enum.each(medias, fn media ->
|
Enum.each(medias, fn media ->
|
||||||
shell_info("ID: #{media.id}, Actor: #{media.actor_id}, URL: #{media.file.url}")
|
shell_info(
|
||||||
|
"ID: #{hd(media).id}, Actor: #{hd(media).actor_id}, Deduplicated #{length(media)} times, URL: #{hd(media).file.url}"
|
||||||
|
)
|
||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ defmodule Mobilizon.Medias do
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import Ecto.Query
|
import Ecto.Query
|
||||||
|
import Mobilizon.Storage.CustomFunctions
|
||||||
|
|
||||||
alias Ecto.Multi
|
alias Ecto.Multi
|
||||||
|
|
||||||
|
@ -40,23 +41,15 @@ defmodule Mobilizon.Medias do
|
||||||
end
|
end
|
||||||
|
|
||||||
@doc """
|
@doc """
|
||||||
Get an unattached media by it's URL
|
Get all media by an URL.
|
||||||
"""
|
"""
|
||||||
def get_unattached_media_by_url(url) do
|
@spec get_all_media_by_url(String.t()) :: Media.t() | nil
|
||||||
|
def get_all_media_by_url(url) do
|
||||||
url
|
url
|
||||||
|
|> String.split("?", parts: 2)
|
||||||
|
|> hd
|
||||||
|> media_by_url_query()
|
|> media_by_url_query()
|
||||||
|> join(:left, [m], e in assoc(m, :events))
|
|> Repo.all()
|
||||||
|> join(:left, [m], ep in assoc(m, :event_picture))
|
|
||||||
|> join(:left, [m], p in assoc(m, :posts))
|
|
||||||
|> join(:left, [m], pp in assoc(m, :posts_picture))
|
|
||||||
|> join(:left, [m], c in assoc(m, :comments))
|
|
||||||
|> where([_m, e], is_nil(e.id))
|
|
||||||
|> where([_m, _e, ep], is_nil(ep.id))
|
|
||||||
|> where([_m, _e, _ep, p], is_nil(p.id))
|
|
||||||
|> where([_m, _e, _ep, _p, pp], is_nil(pp.id))
|
|
||||||
|> where([_m, _e, _ep, _p, _pp, c], is_nil(c.id))
|
|
||||||
|> limit(1)
|
|
||||||
|> Repo.one()
|
|
||||||
end
|
end
|
||||||
|
|
||||||
@doc """
|
@doc """
|
||||||
|
@ -163,7 +156,7 @@ defmodule Mobilizon.Medias do
|
||||||
defp media_by_url_query(url) do
|
defp media_by_url_query(url) do
|
||||||
from(
|
from(
|
||||||
p in Media,
|
p in Media,
|
||||||
where: fragment("? @> ?", p.file, ~s|{"url": "#{url}"}|)
|
where: split_part(fragment("file->>'url'"), "?", 1) == ^url
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
10
lib/mobilizon/storage/custom_functions.ex
Normal file
10
lib/mobilizon/storage/custom_functions.ex
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
defmodule Mobilizon.Storage.CustomFunctions do
|
||||||
|
@moduledoc """
|
||||||
|
Helper module for custom PostgreSQL functions
|
||||||
|
"""
|
||||||
|
defmacro split_part(string, delimiter, position) do
|
||||||
|
quote do
|
||||||
|
fragment("split_part(?, ?, ?)", unquote(string), unquote(delimiter), unquote(position))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -5,6 +5,7 @@ defmodule Mobilizon.Service.CleanOrphanMedia do
|
||||||
|
|
||||||
alias Mobilizon.Actors.Actor
|
alias Mobilizon.Actors.Actor
|
||||||
alias Mobilizon.Medias
|
alias Mobilizon.Medias
|
||||||
|
alias Mobilizon.Medias.File
|
||||||
alias Mobilizon.Medias.Media
|
alias Mobilizon.Medias.Media
|
||||||
alias Mobilizon.Storage.Repo
|
alias Mobilizon.Storage.Repo
|
||||||
import Ecto.Query
|
import Ecto.Query
|
||||||
|
@ -25,8 +26,10 @@ defmodule Mobilizon.Service.CleanOrphanMedia do
|
||||||
if Keyword.get(opts, :dry_run, false) do
|
if Keyword.get(opts, :dry_run, false) do
|
||||||
{:ok, medias}
|
{:ok, medias}
|
||||||
else
|
else
|
||||||
Enum.each(medias, fn media ->
|
Enum.each(medias, fn media_list ->
|
||||||
Medias.delete_media(media, ignore_file_not_found: true)
|
Enum.each(media_list, fn media ->
|
||||||
|
Medias.delete_media(media, ignore_file_not_found: true)
|
||||||
|
end)
|
||||||
end)
|
end)
|
||||||
|
|
||||||
{:ok, medias}
|
{:ok, medias}
|
||||||
|
@ -49,7 +52,7 @@ defmodule Mobilizon.Service.CleanOrphanMedia do
|
||||||
|> Enum.join(" UNION ")
|
|> Enum.join(" UNION ")
|
||||||
|> (&"NOT EXISTS(#{&1})").()
|
|> (&"NOT EXISTS(#{&1})").()
|
||||||
|
|
||||||
@spec find_media(Keyword.t()) :: list(Media.t())
|
@spec find_media(Keyword.t()) :: list(list(Media.t()))
|
||||||
defp find_media(opts) do
|
defp find_media(opts) do
|
||||||
default_grace_period =
|
default_grace_period =
|
||||||
Mobilizon.Config.get([:instance, :orphan_upload_grace_period_hours], 48)
|
Mobilizon.Config.get([:instance, :orphan_upload_grace_period_hours], 48)
|
||||||
|
@ -68,6 +71,38 @@ defmodule Mobilizon.Service.CleanOrphanMedia do
|
||||||
where: fragment(@union_query)
|
where: fragment(@union_query)
|
||||||
)
|
)
|
||||||
|
|
||||||
Repo.all(query)
|
query
|
||||||
|
|> Repo.all()
|
||||||
|
|> Enum.filter(fn %Media{file: %File{url: url}} ->
|
||||||
|
is_all_media_orphan?(url, expiration_date)
|
||||||
|
end)
|
||||||
|
|> Enum.chunk_by(fn %Media{file: %File{url: url}} ->
|
||||||
|
url
|
||||||
|
|> String.split("?", parts: 2)
|
||||||
|
|> hd
|
||||||
|
end)
|
||||||
|
end
|
||||||
|
|
||||||
|
def is_all_media_orphan?(url, expiration_date) do
|
||||||
|
url
|
||||||
|
|> Medias.get_all_media_by_url()
|
||||||
|
|> Enum.all?(&is_media_orphan?(&1, expiration_date))
|
||||||
|
end
|
||||||
|
|
||||||
|
@spec is_media_orphan?(Media.t(), DateTime.t()) :: boolean()
|
||||||
|
def is_media_orphan?(%Media{id: media_id}, expiration_date) do
|
||||||
|
query =
|
||||||
|
from(m in Media,
|
||||||
|
as: :media,
|
||||||
|
distinct: true,
|
||||||
|
join: a in Actor,
|
||||||
|
on: a.id == m.actor_id,
|
||||||
|
where: m.id == ^media_id,
|
||||||
|
where: is_nil(a.domain),
|
||||||
|
where: m.inserted_at < ^expiration_date,
|
||||||
|
where: fragment(@union_query)
|
||||||
|
)
|
||||||
|
|
||||||
|
Repo.exists?(query)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -16,7 +16,7 @@ defmodule Mobilizon.Service.CleanOrphanMediaTest do
|
||||||
refute is_nil(Medias.get_media(media_id))
|
refute is_nil(Medias.get_media(media_id))
|
||||||
refute is_nil(Medias.get_media(media_2_id))
|
refute is_nil(Medias.get_media(media_2_id))
|
||||||
|
|
||||||
assert {:ok, [found_media]} = CleanOrphanMedia.clean()
|
assert {:ok, [[found_media]]} = CleanOrphanMedia.clean()
|
||||||
assert found_media.id == media_id
|
assert found_media.id == media_id
|
||||||
|
|
||||||
assert is_nil(Medias.get_media(media_id))
|
assert is_nil(Medias.get_media(media_id))
|
||||||
|
@ -31,7 +31,7 @@ defmodule Mobilizon.Service.CleanOrphanMediaTest do
|
||||||
refute is_nil(Medias.get_media(media_id))
|
refute is_nil(Medias.get_media(media_id))
|
||||||
refute is_nil(Medias.get_media(media_2_id))
|
refute is_nil(Medias.get_media(media_2_id))
|
||||||
|
|
||||||
assert {:ok, [found_media]} = CleanOrphanMedia.clean(dry_run: true)
|
assert {:ok, [[found_media]]} = CleanOrphanMedia.clean(dry_run: true)
|
||||||
assert found_media.id == media_id
|
assert found_media.id == media_id
|
||||||
|
|
||||||
refute is_nil(Medias.get_media(media_id))
|
refute is_nil(Medias.get_media(media_id))
|
||||||
|
@ -46,7 +46,7 @@ defmodule Mobilizon.Service.CleanOrphanMediaTest do
|
||||||
refute is_nil(Medias.get_media(media_id))
|
refute is_nil(Medias.get_media(media_id))
|
||||||
refute is_nil(Medias.get_media(media_2_id))
|
refute is_nil(Medias.get_media(media_2_id))
|
||||||
|
|
||||||
assert {:ok, [found_media]} = CleanOrphanMedia.clean(grace_period: 12)
|
assert {:ok, [[found_media]]} = CleanOrphanMedia.clean(grace_period: 12)
|
||||||
assert found_media.id == media_id
|
assert found_media.id == media_id
|
||||||
|
|
||||||
assert is_nil(Medias.get_media(media_id))
|
assert is_nil(Medias.get_media(media_id))
|
||||||
|
|
|
@ -280,7 +280,7 @@ defmodule Mobilizon.Factory do
|
||||||
%Mobilizon.Medias.File{
|
%Mobilizon.Medias.File{
|
||||||
name: "My Media",
|
name: "My Media",
|
||||||
url: url,
|
url: url,
|
||||||
content_type: "image/png",
|
content_type: "image/jpg",
|
||||||
size: 13_120
|
size: 13_120
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
|
@ -43,21 +43,22 @@ defmodule Mix.Tasks.Mobilizon.Media.CleanOrphanTest do
|
||||||
test "with media returned" do
|
test "with media returned" do
|
||||||
media1 = insert(:media)
|
media1 = insert(:media)
|
||||||
media2 = insert(:media)
|
media2 = insert(:media)
|
||||||
|
media3 = insert(:media, file: create_file())
|
||||||
|
|
||||||
with_mock CleanOrphanMedia,
|
with_mock CleanOrphanMedia,
|
||||||
clean: fn [dry_run: true, grace_period: 48] -> {:ok, [media1, media2]} end do
|
clean: fn [dry_run: true, grace_period: 48] -> {:ok, [[media1, media2], [media3]]} end do
|
||||||
CleanOrphan.run(["--dry-run"])
|
CleanOrphan.run(["--dry-run"])
|
||||||
assert_received {:mix_shell, :info, [output_received]}
|
assert_received {:mix_shell, :info, [output_received]}
|
||||||
assert output_received == "List of files that would have been deleted"
|
assert output_received == "List of files that would have been deleted"
|
||||||
assert_received {:mix_shell, :info, [output_received]}
|
assert_received {:mix_shell, :info, [output_received]}
|
||||||
|
|
||||||
assert output_received ==
|
assert output_received ==
|
||||||
"ID: #{media1.id}, Actor: #{media1.actor_id}, URL: #{media1.file.url}"
|
"ID: #{media1.id}, Actor: #{media1.actor_id}, Deduplicated 2 times, URL: #{media1.file.url}"
|
||||||
|
|
||||||
assert_received {:mix_shell, :info, [output_received]}
|
assert_received {:mix_shell, :info, [output_received]}
|
||||||
|
|
||||||
assert output_received ==
|
assert output_received ==
|
||||||
"ID: #{media2.id}, Actor: #{media2.actor_id}, URL: #{media2.file.url}"
|
"ID: #{media3.id}, Actor: #{media3.actor_id}, Deduplicated 1 times, URL: #{media3.file.url}"
|
||||||
|
|
||||||
assert_received {:mix_shell, :info, [output_received]}
|
assert_received {:mix_shell, :info, [output_received]}
|
||||||
assert output_received == "2 files would have been deleted"
|
assert output_received == "2 files would have been deleted"
|
||||||
|
@ -78,21 +79,22 @@ defmodule Mix.Tasks.Mobilizon.Media.CleanOrphanTest do
|
||||||
test "with media returned" do
|
test "with media returned" do
|
||||||
media1 = insert(:media)
|
media1 = insert(:media)
|
||||||
media2 = insert(:media)
|
media2 = insert(:media)
|
||||||
|
media3 = insert(:media, file: create_file())
|
||||||
|
|
||||||
with_mock CleanOrphanMedia,
|
with_mock CleanOrphanMedia,
|
||||||
clean: fn [dry_run: false, grace_period: 48] -> {:ok, [media1, media2]} end do
|
clean: fn [dry_run: false, grace_period: 48] -> {:ok, [[media1, media2], [media3]]} end do
|
||||||
CleanOrphan.run(["--verbose"])
|
CleanOrphan.run(["--verbose"])
|
||||||
assert_received {:mix_shell, :info, [output_received]}
|
assert_received {:mix_shell, :info, [output_received]}
|
||||||
assert output_received == "List of files that have been deleted"
|
assert output_received == "List of files that have been deleted"
|
||||||
assert_received {:mix_shell, :info, [output_received]}
|
assert_received {:mix_shell, :info, [output_received]}
|
||||||
|
|
||||||
assert output_received ==
|
assert output_received ==
|
||||||
"ID: #{media1.id}, Actor: #{media1.actor_id}, URL: #{media1.file.url}"
|
"ID: #{media1.id}, Actor: #{media1.actor_id}, Deduplicated 2 times, URL: #{media1.file.url}"
|
||||||
|
|
||||||
assert_received {:mix_shell, :info, [output_received]}
|
assert_received {:mix_shell, :info, [output_received]}
|
||||||
|
|
||||||
assert output_received ==
|
assert output_received ==
|
||||||
"ID: #{media2.id}, Actor: #{media2.actor_id}, URL: #{media2.file.url}"
|
"ID: #{media3.id}, Actor: #{media3.actor_id}, Deduplicated 1 times, URL: #{media3.file.url}"
|
||||||
|
|
||||||
assert_received {:mix_shell, :info, [output_received]}
|
assert_received {:mix_shell, :info, [output_received]}
|
||||||
assert output_received == "2 files have been deleted"
|
assert output_received == "2 files have been deleted"
|
||||||
|
@ -121,4 +123,29 @@ defmodule Mix.Tasks.Mobilizon.Media.CleanOrphanTest do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
defp create_file do
|
||||||
|
File.cp!("test/fixtures/picture.png", "test/fixtures/picture_tmp.png")
|
||||||
|
|
||||||
|
file = %Plug.Upload{
|
||||||
|
content_type: "image/png",
|
||||||
|
path: Path.absname("test/fixtures/picture_tmp.png"),
|
||||||
|
filename: "image.png"
|
||||||
|
}
|
||||||
|
|
||||||
|
{:ok, data} = Mobilizon.Web.Upload.store(file)
|
||||||
|
|
||||||
|
%{
|
||||||
|
content_type: "image/png",
|
||||||
|
name: "image.png",
|
||||||
|
url: url
|
||||||
|
} = data
|
||||||
|
|
||||||
|
%Mobilizon.Medias.File{
|
||||||
|
name: "My Media",
|
||||||
|
url: url,
|
||||||
|
content_type: "image/png",
|
||||||
|
size: 13_120
|
||||||
|
}
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue