Fixed deduplicated files from orphan media being deleted as well
Happens when a file is uploaded, then orphaned, and a similar file is used somewhere. The CleanMedia job service didn't consider that case Signed-off-by: Thomas Citharel <tcit@tcit.fr>
This commit is contained in:
parent
1ac2990cda
commit
ab843dff4c
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -31,6 +31,7 @@ priv/cert/
|
|||
cover/
|
||||
site/
|
||||
test/fixtures/image_tmp.jpg
|
||||
test/fixtures/picture_tmp.png
|
||||
test/fixtures/DSCN0010_tmp.jpg
|
||||
test/uploads/
|
||||
uploads/*
|
||||
|
|
|
@ -1,107 +0,0 @@
|
|||
defmodule Mix.Tasks.Mobilizon.Maintenance.FixUnattachedMediaInBody do
|
||||
@moduledoc """
|
||||
Task to reattach media files that were added in event, post or comment bodies without being attached to their entities.
|
||||
|
||||
This task should only be run once.
|
||||
"""
|
||||
use Mix.Task
|
||||
|
||||
import Mix.Tasks.Mobilizon.Common
|
||||
alias Mobilizon.{Discussions, Events, Medias, Posts}
|
||||
alias Mobilizon.Discussions.Comment
|
||||
alias Mobilizon.Events.Event
|
||||
alias Mobilizon.Posts.Post
|
||||
alias Mobilizon.Storage.Repo
|
||||
require Logger
|
||||
|
||||
@preferred_cli_env "prod"
|
||||
|
||||
# TODO: Remove me in Mobilizon 1.2
|
||||
|
||||
@shortdoc "Reattaches inline media from events and posts"
|
||||
def run([]) do
|
||||
start_mobilizon()
|
||||
|
||||
shell_info("Going to extract pictures from events")
|
||||
extract_inline_pictures_from_bodies(Event)
|
||||
shell_info("Going to extract pictures from posts")
|
||||
extract_inline_pictures_from_bodies(Post)
|
||||
shell_info("Going to extract pictures from comments")
|
||||
extract_inline_pictures_from_bodies(Comment)
|
||||
end
|
||||
|
||||
defp extract_inline_pictures_from_bodies(entity) do
|
||||
Repo.transaction(
|
||||
fn ->
|
||||
entity
|
||||
|> Repo.stream()
|
||||
|> Stream.map(&extract_pictures(&1))
|
||||
|> Stream.map(fn {entity, pics} -> save_entity(entity, pics) end)
|
||||
|> Stream.run()
|
||||
end,
|
||||
timeout: :infinity
|
||||
)
|
||||
end
|
||||
|
||||
defp extract_pictures(entity) do
|
||||
extracted_pictures = entity |> get_body() |> parse_body() |> get_media_entities_from_urls()
|
||||
|
||||
attached_picture = entity |> get_picture() |> get_media_entity_from_media_id()
|
||||
attached_pictures = [attached_picture] |> Enum.filter(& &1)
|
||||
|
||||
{entity, extracted_pictures ++ attached_pictures}
|
||||
end
|
||||
|
||||
defp get_body(%Event{description: description}), do: description
|
||||
defp get_body(%Post{body: body}), do: body
|
||||
defp get_body(%Comment{text: text}), do: text
|
||||
|
||||
defp get_picture(%Event{picture_id: picture_id}), do: picture_id
|
||||
defp get_picture(%Post{picture_id: picture_id}), do: picture_id
|
||||
defp get_picture(%Comment{}), do: nil
|
||||
|
||||
defp parse_body(nil), do: []
|
||||
|
||||
defp parse_body(body) do
|
||||
with res <- Regex.scan(~r/<img\s[^>]*?src\s*=\s*['\"]([^'\"]*?)['\"][^>]*?>/, body),
|
||||
res <- Enum.map(res, fn [_, res] -> res end) do
|
||||
res
|
||||
end
|
||||
end
|
||||
|
||||
defp get_media_entities_from_urls(media_urls) do
|
||||
media_urls
|
||||
|> Enum.map(fn media_url ->
|
||||
# We prefer orphan media, but fallback on already attached media just in case
|
||||
Medias.get_unattached_media_by_url(media_url) || Medias.get_media_by_url(media_url)
|
||||
end)
|
||||
|> Enum.filter(& &1)
|
||||
end
|
||||
|
||||
defp get_media_entity_from_media_id(nil), do: nil
|
||||
|
||||
defp get_media_entity_from_media_id(media_id) do
|
||||
Medias.get_media(media_id)
|
||||
end
|
||||
|
||||
defp save_entity(%Event{} = _event, []), do: :ok
|
||||
|
||||
defp save_entity(%Event{} = event, media) do
|
||||
event = Repo.preload(event, [:contacts, :media])
|
||||
Events.update_event(event, %{media: media})
|
||||
end
|
||||
|
||||
defp save_entity(%Post{} = _post, []), do: :ok
|
||||
|
||||
defp save_entity(%Post{} = post, media) do
|
||||
post = Repo.preload(post, [:media])
|
||||
Posts.update_post(post, %{media: media})
|
||||
end
|
||||
|
||||
defp save_entity(%Comment{} = _comment, []), do: :ok
|
||||
|
||||
defp save_entity(%Comment{} = comment, media) do
|
||||
comment = Repo.preload(comment, [:media])
|
||||
Discussions.update_comment(comment, %{media: media})
|
||||
end
|
||||
end
|
|
@ -64,7 +64,9 @@ defmodule Mix.Tasks.Mobilizon.Media.CleanOrphan do
|
|||
end
|
||||
|
||||
Enum.each(medias, fn media ->
|
||||
shell_info("ID: #{media.id}, Actor: #{media.actor_id}, URL: #{media.file.url}")
|
||||
shell_info(
|
||||
"ID: #{hd(media).id}, Actor: #{hd(media).actor_id}, Deduplicated #{length(media)} times, URL: #{hd(media).file.url}"
|
||||
)
|
||||
end)
|
||||
end
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ defmodule Mobilizon.Medias do
|
|||
"""
|
||||
|
||||
import Ecto.Query
|
||||
import Mobilizon.Storage.CustomFunctions
|
||||
|
||||
alias Ecto.Multi
|
||||
|
||||
|
@ -40,23 +41,15 @@ defmodule Mobilizon.Medias do
|
|||
end
|
||||
|
||||
@doc """
|
||||
Get an unattached media by it's URL
|
||||
Get all media by an URL.
|
||||
"""
|
||||
def get_unattached_media_by_url(url) do
|
||||
@spec get_all_media_by_url(String.t()) :: Media.t() | nil
|
||||
def get_all_media_by_url(url) do
|
||||
url
|
||||
|> String.split("?", parts: 2)
|
||||
|> hd
|
||||
|> media_by_url_query()
|
||||
|> join(:left, [m], e in assoc(m, :events))
|
||||
|> join(:left, [m], ep in assoc(m, :event_picture))
|
||||
|> join(:left, [m], p in assoc(m, :posts))
|
||||
|> join(:left, [m], pp in assoc(m, :posts_picture))
|
||||
|> join(:left, [m], c in assoc(m, :comments))
|
||||
|> where([_m, e], is_nil(e.id))
|
||||
|> where([_m, _e, ep], is_nil(ep.id))
|
||||
|> where([_m, _e, _ep, p], is_nil(p.id))
|
||||
|> where([_m, _e, _ep, _p, pp], is_nil(pp.id))
|
||||
|> where([_m, _e, _ep, _p, _pp, c], is_nil(c.id))
|
||||
|> limit(1)
|
||||
|> Repo.one()
|
||||
|> Repo.all()
|
||||
end
|
||||
|
||||
@doc """
|
||||
|
@ -163,7 +156,7 @@ defmodule Mobilizon.Medias do
|
|||
defp media_by_url_query(url) do
|
||||
from(
|
||||
p in Media,
|
||||
where: fragment("? @> ?", p.file, ~s|{"url": "#{url}"}|)
|
||||
where: split_part(fragment("file->>'url'"), "?", 1) == ^url
|
||||
)
|
||||
end
|
||||
|
||||
|
|
10
lib/mobilizon/storage/custom_functions.ex
Normal file
10
lib/mobilizon/storage/custom_functions.ex
Normal file
|
@ -0,0 +1,10 @@
|
|||
defmodule Mobilizon.Storage.CustomFunctions do
|
||||
@moduledoc """
|
||||
Helper module for custom PostgreSQL functions
|
||||
"""
|
||||
defmacro split_part(string, delimiter, position) do
|
||||
quote do
|
||||
fragment("split_part(?, ?, ?)", unquote(string), unquote(delimiter), unquote(position))
|
||||
end
|
||||
end
|
||||
end
|
|
@ -5,6 +5,7 @@ defmodule Mobilizon.Service.CleanOrphanMedia do
|
|||
|
||||
alias Mobilizon.Actors.Actor
|
||||
alias Mobilizon.Medias
|
||||
alias Mobilizon.Medias.File
|
||||
alias Mobilizon.Medias.Media
|
||||
alias Mobilizon.Storage.Repo
|
||||
import Ecto.Query
|
||||
|
@ -25,9 +26,11 @@ defmodule Mobilizon.Service.CleanOrphanMedia do
|
|||
if Keyword.get(opts, :dry_run, false) do
|
||||
{:ok, medias}
|
||||
else
|
||||
Enum.each(medias, fn media ->
|
||||
Enum.each(medias, fn media_list ->
|
||||
Enum.each(media_list, fn media ->
|
||||
Medias.delete_media(media, ignore_file_not_found: true)
|
||||
end)
|
||||
end)
|
||||
|
||||
{:ok, medias}
|
||||
end
|
||||
|
@ -49,7 +52,7 @@ defmodule Mobilizon.Service.CleanOrphanMedia do
|
|||
|> Enum.join(" UNION ")
|
||||
|> (&"NOT EXISTS(#{&1})").()
|
||||
|
||||
@spec find_media(Keyword.t()) :: list(Media.t())
|
||||
@spec find_media(Keyword.t()) :: list(list(Media.t()))
|
||||
defp find_media(opts) do
|
||||
default_grace_period =
|
||||
Mobilizon.Config.get([:instance, :orphan_upload_grace_period_hours], 48)
|
||||
|
@ -68,6 +71,38 @@ defmodule Mobilizon.Service.CleanOrphanMedia do
|
|||
where: fragment(@union_query)
|
||||
)
|
||||
|
||||
Repo.all(query)
|
||||
query
|
||||
|> Repo.all()
|
||||
|> Enum.filter(fn %Media{file: %File{url: url}} ->
|
||||
is_all_media_orphan?(url, expiration_date)
|
||||
end)
|
||||
|> Enum.chunk_by(fn %Media{file: %File{url: url}} ->
|
||||
url
|
||||
|> String.split("?", parts: 2)
|
||||
|> hd
|
||||
end)
|
||||
end
|
||||
|
||||
def is_all_media_orphan?(url, expiration_date) do
|
||||
url
|
||||
|> Medias.get_all_media_by_url()
|
||||
|> Enum.all?(&is_media_orphan?(&1, expiration_date))
|
||||
end
|
||||
|
||||
@spec is_media_orphan?(Media.t(), DateTime.t()) :: boolean()
|
||||
def is_media_orphan?(%Media{id: media_id}, expiration_date) do
|
||||
query =
|
||||
from(m in Media,
|
||||
as: :media,
|
||||
distinct: true,
|
||||
join: a in Actor,
|
||||
on: a.id == m.actor_id,
|
||||
where: m.id == ^media_id,
|
||||
where: is_nil(a.domain),
|
||||
where: m.inserted_at < ^expiration_date,
|
||||
where: fragment(@union_query)
|
||||
)
|
||||
|
||||
Repo.exists?(query)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -16,7 +16,7 @@ defmodule Mobilizon.Service.CleanOrphanMediaTest do
|
|||
refute is_nil(Medias.get_media(media_id))
|
||||
refute is_nil(Medias.get_media(media_2_id))
|
||||
|
||||
assert {:ok, [found_media]} = CleanOrphanMedia.clean()
|
||||
assert {:ok, [[found_media]]} = CleanOrphanMedia.clean()
|
||||
assert found_media.id == media_id
|
||||
|
||||
assert is_nil(Medias.get_media(media_id))
|
||||
|
@ -31,7 +31,7 @@ defmodule Mobilizon.Service.CleanOrphanMediaTest do
|
|||
refute is_nil(Medias.get_media(media_id))
|
||||
refute is_nil(Medias.get_media(media_2_id))
|
||||
|
||||
assert {:ok, [found_media]} = CleanOrphanMedia.clean(dry_run: true)
|
||||
assert {:ok, [[found_media]]} = CleanOrphanMedia.clean(dry_run: true)
|
||||
assert found_media.id == media_id
|
||||
|
||||
refute is_nil(Medias.get_media(media_id))
|
||||
|
@ -46,7 +46,7 @@ defmodule Mobilizon.Service.CleanOrphanMediaTest do
|
|||
refute is_nil(Medias.get_media(media_id))
|
||||
refute is_nil(Medias.get_media(media_2_id))
|
||||
|
||||
assert {:ok, [found_media]} = CleanOrphanMedia.clean(grace_period: 12)
|
||||
assert {:ok, [[found_media]]} = CleanOrphanMedia.clean(grace_period: 12)
|
||||
assert found_media.id == media_id
|
||||
|
||||
assert is_nil(Medias.get_media(media_id))
|
||||
|
|
|
@ -280,7 +280,7 @@ defmodule Mobilizon.Factory do
|
|||
%Mobilizon.Medias.File{
|
||||
name: "My Media",
|
||||
url: url,
|
||||
content_type: "image/png",
|
||||
content_type: "image/jpg",
|
||||
size: 13_120
|
||||
}
|
||||
end
|
||||
|
|
|
@ -43,21 +43,22 @@ defmodule Mix.Tasks.Mobilizon.Media.CleanOrphanTest do
|
|||
test "with media returned" do
|
||||
media1 = insert(:media)
|
||||
media2 = insert(:media)
|
||||
media3 = insert(:media, file: create_file())
|
||||
|
||||
with_mock CleanOrphanMedia,
|
||||
clean: fn [dry_run: true, grace_period: 48] -> {:ok, [media1, media2]} end do
|
||||
clean: fn [dry_run: true, grace_period: 48] -> {:ok, [[media1, media2], [media3]]} end do
|
||||
CleanOrphan.run(["--dry-run"])
|
||||
assert_received {:mix_shell, :info, [output_received]}
|
||||
assert output_received == "List of files that would have been deleted"
|
||||
assert_received {:mix_shell, :info, [output_received]}
|
||||
|
||||
assert output_received ==
|
||||
"ID: #{media1.id}, Actor: #{media1.actor_id}, URL: #{media1.file.url}"
|
||||
"ID: #{media1.id}, Actor: #{media1.actor_id}, Deduplicated 2 times, URL: #{media1.file.url}"
|
||||
|
||||
assert_received {:mix_shell, :info, [output_received]}
|
||||
|
||||
assert output_received ==
|
||||
"ID: #{media2.id}, Actor: #{media2.actor_id}, URL: #{media2.file.url}"
|
||||
"ID: #{media3.id}, Actor: #{media3.actor_id}, Deduplicated 1 times, URL: #{media3.file.url}"
|
||||
|
||||
assert_received {:mix_shell, :info, [output_received]}
|
||||
assert output_received == "2 files would have been deleted"
|
||||
|
@ -78,21 +79,22 @@ defmodule Mix.Tasks.Mobilizon.Media.CleanOrphanTest do
|
|||
test "with media returned" do
|
||||
media1 = insert(:media)
|
||||
media2 = insert(:media)
|
||||
media3 = insert(:media, file: create_file())
|
||||
|
||||
with_mock CleanOrphanMedia,
|
||||
clean: fn [dry_run: false, grace_period: 48] -> {:ok, [media1, media2]} end do
|
||||
clean: fn [dry_run: false, grace_period: 48] -> {:ok, [[media1, media2], [media3]]} end do
|
||||
CleanOrphan.run(["--verbose"])
|
||||
assert_received {:mix_shell, :info, [output_received]}
|
||||
assert output_received == "List of files that have been deleted"
|
||||
assert_received {:mix_shell, :info, [output_received]}
|
||||
|
||||
assert output_received ==
|
||||
"ID: #{media1.id}, Actor: #{media1.actor_id}, URL: #{media1.file.url}"
|
||||
"ID: #{media1.id}, Actor: #{media1.actor_id}, Deduplicated 2 times, URL: #{media1.file.url}"
|
||||
|
||||
assert_received {:mix_shell, :info, [output_received]}
|
||||
|
||||
assert output_received ==
|
||||
"ID: #{media2.id}, Actor: #{media2.actor_id}, URL: #{media2.file.url}"
|
||||
"ID: #{media3.id}, Actor: #{media3.actor_id}, Deduplicated 1 times, URL: #{media3.file.url}"
|
||||
|
||||
assert_received {:mix_shell, :info, [output_received]}
|
||||
assert output_received == "2 files have been deleted"
|
||||
|
@ -121,4 +123,29 @@ defmodule Mix.Tasks.Mobilizon.Media.CleanOrphanTest do
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
defp create_file do
|
||||
File.cp!("test/fixtures/picture.png", "test/fixtures/picture_tmp.png")
|
||||
|
||||
file = %Plug.Upload{
|
||||
content_type: "image/png",
|
||||
path: Path.absname("test/fixtures/picture_tmp.png"),
|
||||
filename: "image.png"
|
||||
}
|
||||
|
||||
{:ok, data} = Mobilizon.Web.Upload.store(file)
|
||||
|
||||
%{
|
||||
content_type: "image/png",
|
||||
name: "image.png",
|
||||
url: url
|
||||
} = data
|
||||
|
||||
%Mobilizon.Medias.File{
|
||||
name: "My Media",
|
||||
url: url,
|
||||
content_type: "image/png",
|
||||
size: 13_120
|
||||
}
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue