Improve rich media parsers

Signed-off-by: Thomas Citharel <tcit@tcit.fr>
This commit is contained in:
Thomas Citharel 2022-01-18 12:52:45 +01:00
parent a66f19cc5d
commit 2134e7b152
No known key found for this signature in database
GPG key ID: A061B9DDE0CA0773
6 changed files with 16 additions and 5 deletions

View file

@ -87,6 +87,10 @@ defmodule Mobilizon.Service.RichMedia.Parser do
{:ok, data}
{:ok, err} ->
Logger.debug("HTTP error: #{inspect(err)}")
{:error, "HTTP error: #{inspect(err)}"}
{:error, err} ->
Logger.debug("HTTP error: #{inspect(err)}")
{:error, "HTTP error: #{inspect(err)}"}
@ -196,6 +200,8 @@ defmodule Mobilizon.Service.RichMedia.Parser do
@spec maybe_parse(String.t()) :: map()
defp maybe_parse(html) do
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
Logger.debug("Using #{inspect(parser)} to parse link")
case parser.parse(html, acc) do
{:ok, data} ->
{:halt, data}

View file

@ -35,7 +35,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.Fallback do
defp get_page(html, :title) do
html
|> Floki.parse_document!()
|> Floki.find("html title")
|> Floki.find("title")
|> List.first()
|> Floki.text()
|> String.trim()

View file

@ -53,7 +53,10 @@ defmodule Mobilizon.Service.RichMedia.Parsers.MetaTagsParser do
end)
if data[to_string(key_name)] in Enum.map(allowed_attributes, &to_string/1) do
%{String.to_existing_atom(data[to_string(key_name)]) => data[to_string(value_name)]}
%{
String.to_existing_atom(data[to_string(key_name)]) =>
String.trim(data[to_string(value_name)])
}
else
%{}
end
@ -65,7 +68,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.MetaTagsParser do
defp maybe_put_title(meta, html) when meta != %{} do
case get_page_title(html) do
"" -> meta
title -> Map.put_new(meta, :title, title)
title -> Map.put_new(meta, :title, String.trim(title))
end
end
@ -80,7 +83,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.MetaTagsParser do
meta
description ->
Map.put_new(meta, :description, description)
Map.put_new(meta, :description, String.trim(description))
end
end

View file

@ -67,7 +67,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.OEmbed do
{:ok, data} <- Jason.decode(json),
data <-
data
|> Map.new(fn {k, v} -> {String.to_existing_atom(k), v} end)
|> Map.new(fn {k, v} -> {String.to_existing_atom(k), String.trim(v)} end)
|> Map.take(@oembed_allowed_attributes) do
{:ok, data}
end

View file

@ -54,6 +54,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.OGP do
defp transform_tags(data) do
data
|> Enum.reject(fn {_, v} -> is_nil(v) end)
|> Enum.map(fn {k, v} -> {k, String.trim(v)} end)
|> Map.new()
|> Map.update(:image_remote_url, Map.get(data, :image), & &1)
|> Map.update(:width, get_integer_value(data, :"image:width"), & &1)

View file

@ -63,6 +63,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.TwitterCard do
defp transform_tags(data) do
data
|> Enum.reject(fn {_, v} -> is_nil(v) end)
|> Enum.map(fn {k, v} -> {k, String.trim(v)} end)
|> Map.new()
|> Map.update(:image_remote_url, Map.get(data, :image), & &1)
end