Improve handling of media file deletion

Signed-off-by: Thomas Citharel <tcit@tcit.fr>
This commit is contained in:
Thomas Citharel 2021-12-15 12:58:31 +01:00
parent c174e18b39
commit ba097c736e
No known key found for this signature in database
GPG Key ID: A061B9DDE0CA0773
7 changed files with 212 additions and 101 deletions

View File

@ -14,11 +14,11 @@ defmodule Mobilizon.Actors do
alias Mobilizon.Addresses.Address alias Mobilizon.Addresses.Address
alias Mobilizon.Crypto alias Mobilizon.Crypto
alias Mobilizon.Events.FeedToken alias Mobilizon.Events.FeedToken
alias Mobilizon.Medias
alias Mobilizon.Service.Workers alias Mobilizon.Service.Workers
alias Mobilizon.Storage.{Page, Repo} alias Mobilizon.Storage.{Page, Repo}
alias Mobilizon.Users alias Mobilizon.Users
alias Mobilizon.Users.User alias Mobilizon.Users.User
alias Mobilizon.Web.Upload
require Logger require Logger
@ -1263,7 +1263,7 @@ defmodule Mobilizon.Actors do
with %Ecto.Changeset{changes: %{url: new_url}} <- changes[key], with %Ecto.Changeset{changes: %{url: new_url}} <- changes[key],
%{url: old_url} <- data |> Map.from_struct() |> Map.get(key), %{url: old_url} <- data |> Map.from_struct() |> Map.get(key),
false <- new_url == old_url do false <- new_url == old_url do
Upload.remove(old_url) Medias.delete_user_profile_media_by_url(old_url)
end end
end end
end) end)

View File

@ -107,19 +107,8 @@ defmodule Mobilizon.Medias do
transaction = transaction =
Multi.new() Multi.new()
|> Multi.delete(:media, media) |> Multi.delete(:media, media)
|> Multi.run(:remove, fn _repo, %{media: %Media{file: %File{url: url}} = media} -> |> Multi.run(:remove, fn _repo, %{media: %Media{} = media} ->
case Upload.remove(url) do delete_file_from_media(media, opts)
{:error, err} ->
if err == :enofile and Keyword.get(opts, :ignore_file_not_found, false) do
Logger.info("Deleting media and ignoring absent file.")
{:ok, media}
else
{:error, err}
end
{:ok, media} ->
{:ok, media}
end
end) end)
|> Repo.transaction() |> Repo.transaction()
@ -132,6 +121,143 @@ defmodule Mobilizon.Medias do
end end
end end
@spec delete_file_from_media(Media.t(), Keyword.t()) :: {:ok, Media.t()} | {:error, atom()}
defp delete_file_from_media(%Media{file: %File{url: url}} = media, opts) do
if can_delete_media_file?(media) do
case Upload.remove(url) do
{:error, err} ->
if err == :enofile and Keyword.get(opts, :ignore_file_not_found, false) do
Logger.info("Deleting media and ignoring absent file.")
{:ok, media}
else
{:error, err}
end
{:ok, media} ->
{:ok, media}
end
else
Logger.debug("We cannot delete media file, it's still being used")
{:ok, media}
end
end
@spec can_delete_media_file?(Media.t()) :: boolean()
defp can_delete_media_file?(%Media{file: %File{url: url}}) do
Logger.debug("Checking for other uses of the media file, by comparing it's URL…")
case get_media_by_url(url) do
# No other media with this URL
nil ->
if url_is_also_a_profile_file?(url) do
Logger.debug("Found URL in actor profile, so we need to keep the file")
false
else
Logger.debug("All good, we can delete the media file")
true
end
%Media{} ->
Logger.debug(
"Found media different from this once for this URL, so there's at least one other media"
)
false
end
end
@spec delete_user_profile_media_by_url(String.t()) ::
{:ok, String.t() | :ignored} | {:error, atom()}
def delete_user_profile_media_by_url(url) do
if get_media_by_url(url) == nil && count_occurences_of_url_in_profiles(url) <= 1 do
# We have no media using this URL and only this profile is using this URL
Upload.remove(url)
else
{:ok, :ignored}
end
end
# Ecto doesn't currently allow us to use exists with a subquery,
# so we can't create the union through Ecto
# https://github.com/elixir-ecto/ecto/issues/3619
@union_query [
[from: "events", param: "picture_id"],
[from: "events_medias", param: "media_id"],
[from: "posts", param: "picture_id"],
[from: "posts_medias", param: "media_id"],
[from: "comments_medias", param: "media_id"]
]
|> Enum.map_join(" UNION ", fn [from: from, param: param] ->
"SELECT 1 FROM #{from} WHERE #{from}.#{param} = m0.id"
end)
|> (&"NOT EXISTS(#{&1})").()
@spec find_media_to_clean(Keyword.t()) :: list(list(Media.t()))
def find_media_to_clean(opts) do
default_grace_period =
Mobilizon.Config.get([:instance, :orphan_upload_grace_period_hours], 48)
grace_period = Keyword.get(opts, :grace_period, default_grace_period)
expiration_date = DateTime.add(DateTime.utc_now(), grace_period * -3600)
query =
from(m in Media,
as: :media,
distinct: true,
join: a in Actor,
on: a.id == m.actor_id,
where: is_nil(a.domain),
where: m.inserted_at < ^expiration_date,
where: fragment(@union_query)
)
query
|> Repo.all(timeout: :infinity)
|> Enum.filter(fn %Media{file: %File{url: url}} ->
!url_is_also_a_profile_file?(url) && is_all_media_orphan?(url, expiration_date)
end)
|> Enum.chunk_by(fn %Media{file: %File{url: url}} ->
url
|> String.split("?", parts: 2)
|> hd
end)
end
defp is_all_media_orphan?(url, expiration_date) do
url
|> get_all_media_by_url()
|> Enum.all?(&is_media_orphan?(&1, expiration_date))
end
@spec is_media_orphan?(Media.t(), DateTime.t()) :: boolean()
defp is_media_orphan?(%Media{id: media_id}, expiration_date) do
media_query =
from(m in Media,
as: :media,
distinct: true,
join: a in Actor,
on: a.id == m.actor_id,
where: m.id == ^media_id,
where: is_nil(a.domain),
where: m.inserted_at < ^expiration_date,
where: fragment(@union_query)
)
Repo.exists?(media_query)
end
@spec url_is_also_a_profile_file?(String.t()) :: boolean()
defp url_is_also_a_profile_file?(url) when is_binary(url) do
count_occurences_of_url_in_profiles(url) > 0
end
@spec count_occurences_of_url_in_profiles(String.t()) :: integer()
defp count_occurences_of_url_in_profiles(url) when is_binary(url) do
Actor
|> where([a], fragment("avatar->>'url'") == ^url or fragment("banner->>'url'") == ^url)
|> Repo.aggregate(:count)
end
@spec media_by_url_query(String.t()) :: Ecto.Query.t() @spec media_by_url_query(String.t()) :: Ecto.Query.t()
defp media_by_url_query(url) do defp media_by_url_query(url) do
from( from(

View File

@ -4,7 +4,7 @@ defmodule Mobilizon.Service.ActorSuspension do
""" """
alias Ecto.Multi alias Ecto.Multi
alias Mobilizon.{Actors, Events, Users} alias Mobilizon.{Actors, Events, Medias, Users}
alias Mobilizon.Actors.{Actor, Member} alias Mobilizon.Actors.{Actor, Member}
alias Mobilizon.Discussions.{Comment, Discussion} alias Mobilizon.Discussions.{Comment, Discussion}
alias Mobilizon.Events.{Event, Participant} alias Mobilizon.Events.{Event, Participant}
@ -15,7 +15,6 @@ defmodule Mobilizon.Service.ActorSuspension do
alias Mobilizon.Users.User alias Mobilizon.Users.User
alias Mobilizon.Web.Email.Actor, as: ActorEmail alias Mobilizon.Web.Email.Actor, as: ActorEmail
alias Mobilizon.Web.Email.Group alias Mobilizon.Web.Email.Group
alias Mobilizon.Web.Upload
require Logger require Logger
import Ecto.Query import Ecto.Query
@ -249,7 +248,7 @@ defmodule Mobilizon.Service.ActorSuspension do
@spec safe_remove_file(String.t(), Actor.t()) :: {:ok, Actor.t()} @spec safe_remove_file(String.t(), Actor.t()) :: {:ok, Actor.t()}
defp safe_remove_file(url, %Actor{} = actor) do defp safe_remove_file(url, %Actor{} = actor) do
case Upload.remove(url) do case Medias.delete_user_profile_media_by_url(url) do
{:ok, _value} -> {:ok, _value} ->
{:ok, actor} {:ok, actor}

View File

@ -3,12 +3,8 @@ defmodule Mobilizon.Service.CleanOrphanMedia do
Service to clean orphan media Service to clean orphan media
""" """
alias Mobilizon.Actors.Actor
alias Mobilizon.Medias alias Mobilizon.Medias
alias Mobilizon.Medias.File
alias Mobilizon.Medias.Media alias Mobilizon.Medias.Media
alias Mobilizon.Storage.Repo
import Ecto.Query
@doc """ @doc """
Clean orphan media Clean orphan media
@ -21,7 +17,7 @@ defmodule Mobilizon.Service.CleanOrphanMedia do
""" """
@spec clean(Keyword.t()) :: {:ok, list(Media.t())} @spec clean(Keyword.t()) :: {:ok, list(Media.t())}
def clean(opts \\ []) do def clean(opts \\ []) do
medias = find_media(opts) medias = Medias.find_media_to_clean(opts)
if Keyword.get(opts, :dry_run, false) do if Keyword.get(opts, :dry_run, false) do
{:ok, medias} {:ok, medias}
@ -35,80 +31,4 @@ defmodule Mobilizon.Service.CleanOrphanMedia do
{:ok, medias} {:ok, medias}
end end
end end
# Ecto doesn't currently allow us to use exists with a subquery,
# so we can't create the union through Ecto
# https://github.com/elixir-ecto/ecto/issues/3619
@union_query [
[from: "events", param: "picture_id"],
[from: "events_medias", param: "media_id"],
[from: "posts", param: "picture_id"],
[from: "posts_medias", param: "media_id"],
[from: "comments_medias", param: "media_id"]
]
|> Enum.map_join(" UNION ", fn [from: from, param: param] ->
"SELECT 1 FROM #{from} WHERE #{from}.#{param} = m0.id"
end)
|> (&"NOT EXISTS(#{&1})").()
@spec find_media(Keyword.t()) :: list(list(Media.t()))
defp find_media(opts) do
default_grace_period =
Mobilizon.Config.get([:instance, :orphan_upload_grace_period_hours], 48)
grace_period = Keyword.get(opts, :grace_period, default_grace_period)
expiration_date = DateTime.add(DateTime.utc_now(), grace_period * -3600)
query =
from(m in Media,
as: :media,
distinct: true,
join: a in Actor,
on: a.id == m.actor_id,
where: is_nil(a.domain),
where: m.inserted_at < ^expiration_date,
where: fragment(@union_query)
)
query
|> Repo.all(timeout: :infinity)
|> Enum.filter(fn %Media{file: %File{url: url}} ->
!url_is_also_a_profile_file?(url) && is_all_media_orphan?(url, expiration_date)
end)
|> Enum.chunk_by(fn %Media{file: %File{url: url}} ->
url
|> String.split("?", parts: 2)
|> hd
end)
end
def is_all_media_orphan?(url, expiration_date) do
url
|> Medias.get_all_media_by_url()
|> Enum.all?(&is_media_orphan?(&1, expiration_date))
end
@spec is_media_orphan?(Media.t(), DateTime.t()) :: boolean()
def is_media_orphan?(%Media{id: media_id}, expiration_date) do
media_query =
from(m in Media,
as: :media,
distinct: true,
join: a in Actor,
on: a.id == m.actor_id,
where: m.id == ^media_id,
where: is_nil(a.domain),
where: m.inserted_at < ^expiration_date,
where: fragment(@union_query)
)
Repo.exists?(media_query)
end
@spec url_is_also_a_profile_file?(String.t()) :: nil
defp url_is_also_a_profile_file?(url) when is_binary(url) do
Actor
|> where([a], fragment("avatar->>'url'") == ^url or fragment("banner->>'url'") == ^url)
|> Repo.exists?()
end
end end

View File

@ -256,6 +256,7 @@ defmodule Mobilizon.Mixfile do
end end
defp run_test(args) do defp run_test(args) do
File.mkdir("test/uploads")
Mix.Task.run("test", args) Mix.Task.run("test", args)
File.rm_rf!("test/uploads") File.rm_rf!("test/uploads")
end end

View File

@ -215,7 +215,7 @@ defmodule Mobilizon.Federation.ActivityPub.Transmogrifier.CommentsTest do
assert capture_log([level: :warn], fn -> assert capture_log([level: :warn], fn ->
{:ok, _returned_activity, _entity} = Transmogrifier.handle_incoming(data) {:ok, _returned_activity, _entity} = Transmogrifier.handle_incoming(data)
end) =~ "[warn] Parent object is something we don't handle" end) =~ "[warning] Parent object is something we don't handle"
end end
test "it ignores incoming private notes" do test "it ignores incoming private notes" do

View File

@ -4,6 +4,7 @@ defmodule Mobilizon.MediaTest do
import Mobilizon.Factory import Mobilizon.Factory
alias Mobilizon.{Config, Medias} alias Mobilizon.{Config, Medias}
alias Mobilizon.Medias.Media
alias Mobilizon.Web.Upload.Uploader alias Mobilizon.Web.Upload.Uploader
@ -29,8 +30,16 @@ defmodule Mobilizon.MediaTest do
assert media.file.name == "something old" assert media.file.name == "something old"
end end
end
test "delete_media/1 deletes the media" do describe "delete_media/1" do
setup do
File.rm_rf!(Config.get!([Uploader.Local, :uploads]))
File.mkdir(Config.get!([Uploader.Local, :uploads]))
:ok
end
test "deletes the media" do
media = insert(:media) media = insert(:media)
%URI{path: "/media/" <> path} = URI.parse(media.file.url) %URI{path: "/media/" <> path} = URI.parse(media.file.url)
@ -48,5 +57,61 @@ defmodule Mobilizon.MediaTest do
"/" <> path "/" <> path
) )
end end
test "doesn't delete the media if two media entities are using the same URL" do
Config.put([Mobilizon.Web.Upload, :filters], [
Mobilizon.Web.Upload.Filter.Dedupe
])
media1 = insert(:media)
media2 = insert(:media)
assert media1.file.url == media2.file.url
%URI{path: "/media/" <> path} = URI.parse(media1.file.url)
assert File.exists?(
Config.get!([Uploader.Local, :uploads]) <>
"/" <> path
)
assert {:ok, %Media{}} = Medias.delete_media(media1)
assert_raise Ecto.NoResultsError, fn -> Medias.get_media!(media1.id) end
assert File.exists?(
Config.get!([Uploader.Local, :uploads]) <>
"/" <> path
)
Config.put([Mobilizon.Web.Upload, :filters], [])
end
test "doesn't delete the media if the same file is being used in a profile" do
Config.put([Mobilizon.Web.Upload, :filters], [
Mobilizon.Web.Upload.Filter.Dedupe
])
actor = insert(:actor)
media = insert(:media)
assert media.file.url == actor.avatar.url
%URI{path: "/media/" <> path} = URI.parse(media.file.url)
assert File.exists?(
Config.get!([Uploader.Local, :uploads]) <>
"/" <> path
)
assert {:ok, %Media{}} = Medias.delete_media(media)
assert_raise Ecto.NoResultsError, fn -> Medias.get_media!(media.id) end
assert File.exists?(
Config.get!([Uploader.Local, :uploads]) <>
"/" <> path
)
Config.put([Mobilizon.Web.Upload, :filters], [])
end
end end
end end