From ba097c736e3ac40c0af90b09c12ae7a9bf55d838 Mon Sep 17 00:00:00 2001 From: Thomas Citharel Date: Wed, 15 Dec 2021 12:58:31 +0100 Subject: [PATCH] Improve handling of media file deletion Signed-off-by: Thomas Citharel --- lib/mobilizon/actors/actors.ex | 4 +- lib/mobilizon/medias/medias.ex | 152 ++++++++++++++++-- lib/service/actor_suspension.ex | 5 +- lib/service/clean_orphan_media.ex | 82 +--------- mix.exs | 1 + .../transmogrifier/comments_test.exs | 2 +- test/mobilizon/media/media_test.exs | 67 +++++++- 7 files changed, 212 insertions(+), 101 deletions(-) diff --git a/lib/mobilizon/actors/actors.ex b/lib/mobilizon/actors/actors.ex index a1a0caaba..527aa5eef 100644 --- a/lib/mobilizon/actors/actors.ex +++ b/lib/mobilizon/actors/actors.ex @@ -14,11 +14,11 @@ defmodule Mobilizon.Actors do alias Mobilizon.Addresses.Address alias Mobilizon.Crypto alias Mobilizon.Events.FeedToken + alias Mobilizon.Medias alias Mobilizon.Service.Workers alias Mobilizon.Storage.{Page, Repo} alias Mobilizon.Users alias Mobilizon.Users.User - alias Mobilizon.Web.Upload require Logger @@ -1263,7 +1263,7 @@ defmodule Mobilizon.Actors do with %Ecto.Changeset{changes: %{url: new_url}} <- changes[key], %{url: old_url} <- data |> Map.from_struct() |> Map.get(key), false <- new_url == old_url do - Upload.remove(old_url) + Medias.delete_user_profile_media_by_url(old_url) end end end) diff --git a/lib/mobilizon/medias/medias.ex b/lib/mobilizon/medias/medias.ex index d781de872..ef4d258fa 100644 --- a/lib/mobilizon/medias/medias.ex +++ b/lib/mobilizon/medias/medias.ex @@ -107,19 +107,8 @@ defmodule Mobilizon.Medias do transaction = Multi.new() |> Multi.delete(:media, media) - |> Multi.run(:remove, fn _repo, %{media: %Media{file: %File{url: url}} = media} -> - case Upload.remove(url) do - {:error, err} -> - if err == :enofile and Keyword.get(opts, :ignore_file_not_found, false) do - Logger.info("Deleting media and ignoring absent file.") - {:ok, media} - else - {:error, err} - end - - {:ok, media} -> - {:ok, media} - end + |> Multi.run(:remove, fn _repo, %{media: %Media{} = media} -> + delete_file_from_media(media, opts) end) |> Repo.transaction() @@ -132,6 +121,143 @@ defmodule Mobilizon.Medias do end end + @spec delete_file_from_media(Media.t(), Keyword.t()) :: {:ok, Media.t()} | {:error, atom()} + defp delete_file_from_media(%Media{file: %File{url: url}} = media, opts) do + if can_delete_media_file?(media) do + case Upload.remove(url) do + {:error, err} -> + if err == :enofile and Keyword.get(opts, :ignore_file_not_found, false) do + Logger.info("Deleting media and ignoring absent file.") + {:ok, media} + else + {:error, err} + end + + {:ok, media} -> + {:ok, media} + end + else + Logger.debug("We cannot delete media file, it's still being used") + {:ok, media} + end + end + + @spec can_delete_media_file?(Media.t()) :: boolean() + defp can_delete_media_file?(%Media{file: %File{url: url}}) do + Logger.debug("Checking for other uses of the media file, by comparing it's URL…") + + case get_media_by_url(url) do + # No other media with this URL + nil -> + if url_is_also_a_profile_file?(url) do + Logger.debug("Found URL in actor profile, so we need to keep the file") + false + else + Logger.debug("All good, we can delete the media file") + true + end + + %Media{} -> + Logger.debug( + "Found media different from this once for this URL, so there's at least one other media" + ) + + false + end + end + + @spec delete_user_profile_media_by_url(String.t()) :: + {:ok, String.t() | :ignored} | {:error, atom()} + def delete_user_profile_media_by_url(url) do + if get_media_by_url(url) == nil && count_occurences_of_url_in_profiles(url) <= 1 do + # We have no media using this URL and only this profile is using this URL + Upload.remove(url) + else + {:ok, :ignored} + end + end + + # Ecto doesn't currently allow us to use exists with a subquery, + # so we can't create the union through Ecto + # https://github.com/elixir-ecto/ecto/issues/3619 + @union_query [ + [from: "events", param: "picture_id"], + [from: "events_medias", param: "media_id"], + [from: "posts", param: "picture_id"], + [from: "posts_medias", param: "media_id"], + [from: "comments_medias", param: "media_id"] + ] + |> Enum.map_join(" UNION ", fn [from: from, param: param] -> + "SELECT 1 FROM #{from} WHERE #{from}.#{param} = m0.id" + end) + |> (&"NOT EXISTS(#{&1})").() + + @spec find_media_to_clean(Keyword.t()) :: list(list(Media.t())) + def find_media_to_clean(opts) do + default_grace_period = + Mobilizon.Config.get([:instance, :orphan_upload_grace_period_hours], 48) + + grace_period = Keyword.get(opts, :grace_period, default_grace_period) + expiration_date = DateTime.add(DateTime.utc_now(), grace_period * -3600) + + query = + from(m in Media, + as: :media, + distinct: true, + join: a in Actor, + on: a.id == m.actor_id, + where: is_nil(a.domain), + where: m.inserted_at < ^expiration_date, + where: fragment(@union_query) + ) + + query + |> Repo.all(timeout: :infinity) + |> Enum.filter(fn %Media{file: %File{url: url}} -> + !url_is_also_a_profile_file?(url) && is_all_media_orphan?(url, expiration_date) + end) + |> Enum.chunk_by(fn %Media{file: %File{url: url}} -> + url + |> String.split("?", parts: 2) + |> hd + end) + end + + defp is_all_media_orphan?(url, expiration_date) do + url + |> get_all_media_by_url() + |> Enum.all?(&is_media_orphan?(&1, expiration_date)) + end + + @spec is_media_orphan?(Media.t(), DateTime.t()) :: boolean() + defp is_media_orphan?(%Media{id: media_id}, expiration_date) do + media_query = + from(m in Media, + as: :media, + distinct: true, + join: a in Actor, + on: a.id == m.actor_id, + where: m.id == ^media_id, + where: is_nil(a.domain), + where: m.inserted_at < ^expiration_date, + where: fragment(@union_query) + ) + + Repo.exists?(media_query) + end + + @spec url_is_also_a_profile_file?(String.t()) :: boolean() + defp url_is_also_a_profile_file?(url) when is_binary(url) do + count_occurences_of_url_in_profiles(url) > 0 + end + + @spec count_occurences_of_url_in_profiles(String.t()) :: integer() + defp count_occurences_of_url_in_profiles(url) when is_binary(url) do + Actor + |> where([a], fragment("avatar->>'url'") == ^url or fragment("banner->>'url'") == ^url) + |> Repo.aggregate(:count) + end + @spec media_by_url_query(String.t()) :: Ecto.Query.t() defp media_by_url_query(url) do from( diff --git a/lib/service/actor_suspension.ex b/lib/service/actor_suspension.ex index 838070259..4973208d0 100644 --- a/lib/service/actor_suspension.ex +++ b/lib/service/actor_suspension.ex @@ -4,7 +4,7 @@ defmodule Mobilizon.Service.ActorSuspension do """ alias Ecto.Multi - alias Mobilizon.{Actors, Events, Users} + alias Mobilizon.{Actors, Events, Medias, Users} alias Mobilizon.Actors.{Actor, Member} alias Mobilizon.Discussions.{Comment, Discussion} alias Mobilizon.Events.{Event, Participant} @@ -15,7 +15,6 @@ defmodule Mobilizon.Service.ActorSuspension do alias Mobilizon.Users.User alias Mobilizon.Web.Email.Actor, as: ActorEmail alias Mobilizon.Web.Email.Group - alias Mobilizon.Web.Upload require Logger import Ecto.Query @@ -249,7 +248,7 @@ defmodule Mobilizon.Service.ActorSuspension do @spec safe_remove_file(String.t(), Actor.t()) :: {:ok, Actor.t()} defp safe_remove_file(url, %Actor{} = actor) do - case Upload.remove(url) do + case Medias.delete_user_profile_media_by_url(url) do {:ok, _value} -> {:ok, actor} diff --git a/lib/service/clean_orphan_media.ex b/lib/service/clean_orphan_media.ex index d8ab94dd3..3a4344f76 100644 --- a/lib/service/clean_orphan_media.ex +++ b/lib/service/clean_orphan_media.ex @@ -3,12 +3,8 @@ defmodule Mobilizon.Service.CleanOrphanMedia do Service to clean orphan media """ - alias Mobilizon.Actors.Actor alias Mobilizon.Medias - alias Mobilizon.Medias.File alias Mobilizon.Medias.Media - alias Mobilizon.Storage.Repo - import Ecto.Query @doc """ Clean orphan media @@ -21,7 +17,7 @@ defmodule Mobilizon.Service.CleanOrphanMedia do """ @spec clean(Keyword.t()) :: {:ok, list(Media.t())} def clean(opts \\ []) do - medias = find_media(opts) + medias = Medias.find_media_to_clean(opts) if Keyword.get(opts, :dry_run, false) do {:ok, medias} @@ -35,80 +31,4 @@ defmodule Mobilizon.Service.CleanOrphanMedia do {:ok, medias} end end - - # Ecto doesn't currently allow us to use exists with a subquery, - # so we can't create the union through Ecto - # https://github.com/elixir-ecto/ecto/issues/3619 - @union_query [ - [from: "events", param: "picture_id"], - [from: "events_medias", param: "media_id"], - [from: "posts", param: "picture_id"], - [from: "posts_medias", param: "media_id"], - [from: "comments_medias", param: "media_id"] - ] - |> Enum.map_join(" UNION ", fn [from: from, param: param] -> - "SELECT 1 FROM #{from} WHERE #{from}.#{param} = m0.id" - end) - |> (&"NOT EXISTS(#{&1})").() - - @spec find_media(Keyword.t()) :: list(list(Media.t())) - defp find_media(opts) do - default_grace_period = - Mobilizon.Config.get([:instance, :orphan_upload_grace_period_hours], 48) - - grace_period = Keyword.get(opts, :grace_period, default_grace_period) - expiration_date = DateTime.add(DateTime.utc_now(), grace_period * -3600) - - query = - from(m in Media, - as: :media, - distinct: true, - join: a in Actor, - on: a.id == m.actor_id, - where: is_nil(a.domain), - where: m.inserted_at < ^expiration_date, - where: fragment(@union_query) - ) - - query - |> Repo.all(timeout: :infinity) - |> Enum.filter(fn %Media{file: %File{url: url}} -> - !url_is_also_a_profile_file?(url) && is_all_media_orphan?(url, expiration_date) - end) - |> Enum.chunk_by(fn %Media{file: %File{url: url}} -> - url - |> String.split("?", parts: 2) - |> hd - end) - end - - def is_all_media_orphan?(url, expiration_date) do - url - |> Medias.get_all_media_by_url() - |> Enum.all?(&is_media_orphan?(&1, expiration_date)) - end - - @spec is_media_orphan?(Media.t(), DateTime.t()) :: boolean() - def is_media_orphan?(%Media{id: media_id}, expiration_date) do - media_query = - from(m in Media, - as: :media, - distinct: true, - join: a in Actor, - on: a.id == m.actor_id, - where: m.id == ^media_id, - where: is_nil(a.domain), - where: m.inserted_at < ^expiration_date, - where: fragment(@union_query) - ) - - Repo.exists?(media_query) - end - - @spec url_is_also_a_profile_file?(String.t()) :: nil - defp url_is_also_a_profile_file?(url) when is_binary(url) do - Actor - |> where([a], fragment("avatar->>'url'") == ^url or fragment("banner->>'url'") == ^url) - |> Repo.exists?() - end end diff --git a/mix.exs b/mix.exs index fda269ab8..c60b75ff7 100644 --- a/mix.exs +++ b/mix.exs @@ -256,6 +256,7 @@ defmodule Mobilizon.Mixfile do end defp run_test(args) do + File.mkdir("test/uploads") Mix.Task.run("test", args) File.rm_rf!("test/uploads") end diff --git a/test/federation/activity_pub/transmogrifier/comments_test.exs b/test/federation/activity_pub/transmogrifier/comments_test.exs index d2f6f7d22..1f3589850 100644 --- a/test/federation/activity_pub/transmogrifier/comments_test.exs +++ b/test/federation/activity_pub/transmogrifier/comments_test.exs @@ -215,7 +215,7 @@ defmodule Mobilizon.Federation.ActivityPub.Transmogrifier.CommentsTest do assert capture_log([level: :warn], fn -> {:ok, _returned_activity, _entity} = Transmogrifier.handle_incoming(data) - end) =~ "[warn] Parent object is something we don't handle" + end) =~ "[warning] Parent object is something we don't handle" end test "it ignores incoming private notes" do diff --git a/test/mobilizon/media/media_test.exs b/test/mobilizon/media/media_test.exs index d6b6bdfdd..4463c3a24 100644 --- a/test/mobilizon/media/media_test.exs +++ b/test/mobilizon/media/media_test.exs @@ -4,6 +4,7 @@ defmodule Mobilizon.MediaTest do import Mobilizon.Factory alias Mobilizon.{Config, Medias} + alias Mobilizon.Medias.Media alias Mobilizon.Web.Upload.Uploader @@ -29,8 +30,16 @@ defmodule Mobilizon.MediaTest do assert media.file.name == "something old" end + end - test "delete_media/1 deletes the media" do + describe "delete_media/1" do + setup do + File.rm_rf!(Config.get!([Uploader.Local, :uploads])) + File.mkdir(Config.get!([Uploader.Local, :uploads])) + :ok + end + + test "deletes the media" do media = insert(:media) %URI{path: "/media/" <> path} = URI.parse(media.file.url) @@ -48,5 +57,61 @@ defmodule Mobilizon.MediaTest do "/" <> path ) end + + test "doesn't delete the media if two media entities are using the same URL" do + Config.put([Mobilizon.Web.Upload, :filters], [ + Mobilizon.Web.Upload.Filter.Dedupe + ]) + + media1 = insert(:media) + media2 = insert(:media) + + assert media1.file.url == media2.file.url + + %URI{path: "/media/" <> path} = URI.parse(media1.file.url) + + assert File.exists?( + Config.get!([Uploader.Local, :uploads]) <> + "/" <> path + ) + + assert {:ok, %Media{}} = Medias.delete_media(media1) + assert_raise Ecto.NoResultsError, fn -> Medias.get_media!(media1.id) end + + assert File.exists?( + Config.get!([Uploader.Local, :uploads]) <> + "/" <> path + ) + + Config.put([Mobilizon.Web.Upload, :filters], []) + end + + test "doesn't delete the media if the same file is being used in a profile" do + Config.put([Mobilizon.Web.Upload, :filters], [ + Mobilizon.Web.Upload.Filter.Dedupe + ]) + + actor = insert(:actor) + media = insert(:media) + + assert media.file.url == actor.avatar.url + + %URI{path: "/media/" <> path} = URI.parse(media.file.url) + + assert File.exists?( + Config.get!([Uploader.Local, :uploads]) <> + "/" <> path + ) + + assert {:ok, %Media{}} = Medias.delete_media(media) + assert_raise Ecto.NoResultsError, fn -> Medias.get_media!(media.id) end + + assert File.exists?( + Config.get!([Uploader.Local, :uploads]) <> + "/" <> path + ) + + Config.put([Mobilizon.Web.Upload, :filters], []) + end end end