Improve rich media parsers
Signed-off-by: Thomas Citharel <tcit@tcit.fr>
This commit is contained in:
parent
a66f19cc5d
commit
2134e7b152
|
@ -87,6 +87,10 @@ defmodule Mobilizon.Service.RichMedia.Parser do
|
|||
|
||||
{:ok, data}
|
||||
|
||||
{:ok, err} ->
|
||||
Logger.debug("HTTP error: #{inspect(err)}")
|
||||
{:error, "HTTP error: #{inspect(err)}"}
|
||||
|
||||
{:error, err} ->
|
||||
Logger.debug("HTTP error: #{inspect(err)}")
|
||||
{:error, "HTTP error: #{inspect(err)}"}
|
||||
|
@ -196,6 +200,8 @@ defmodule Mobilizon.Service.RichMedia.Parser do
|
|||
@spec maybe_parse(String.t()) :: map()
|
||||
defp maybe_parse(html) do
|
||||
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
|
||||
Logger.debug("Using #{inspect(parser)} to parse link")
|
||||
|
||||
case parser.parse(html, acc) do
|
||||
{:ok, data} ->
|
||||
{:halt, data}
|
||||
|
|
|
@ -35,7 +35,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.Fallback do
|
|||
defp get_page(html, :title) do
|
||||
html
|
||||
|> Floki.parse_document!()
|
||||
|> Floki.find("html title")
|
||||
|> Floki.find("title")
|
||||
|> List.first()
|
||||
|> Floki.text()
|
||||
|> String.trim()
|
||||
|
|
|
@ -53,7 +53,10 @@ defmodule Mobilizon.Service.RichMedia.Parsers.MetaTagsParser do
|
|||
end)
|
||||
|
||||
if data[to_string(key_name)] in Enum.map(allowed_attributes, &to_string/1) do
|
||||
%{String.to_existing_atom(data[to_string(key_name)]) => data[to_string(value_name)]}
|
||||
%{
|
||||
String.to_existing_atom(data[to_string(key_name)]) =>
|
||||
String.trim(data[to_string(value_name)])
|
||||
}
|
||||
else
|
||||
%{}
|
||||
end
|
||||
|
@ -65,7 +68,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.MetaTagsParser do
|
|||
defp maybe_put_title(meta, html) when meta != %{} do
|
||||
case get_page_title(html) do
|
||||
"" -> meta
|
||||
title -> Map.put_new(meta, :title, title)
|
||||
title -> Map.put_new(meta, :title, String.trim(title))
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -80,7 +83,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.MetaTagsParser do
|
|||
meta
|
||||
|
||||
description ->
|
||||
Map.put_new(meta, :description, description)
|
||||
Map.put_new(meta, :description, String.trim(description))
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -67,7 +67,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.OEmbed do
|
|||
{:ok, data} <- Jason.decode(json),
|
||||
data <-
|
||||
data
|
||||
|> Map.new(fn {k, v} -> {String.to_existing_atom(k), v} end)
|
||||
|> Map.new(fn {k, v} -> {String.to_existing_atom(k), String.trim(v)} end)
|
||||
|> Map.take(@oembed_allowed_attributes) do
|
||||
{:ok, data}
|
||||
end
|
||||
|
|
|
@ -54,6 +54,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.OGP do
|
|||
defp transform_tags(data) do
|
||||
data
|
||||
|> Enum.reject(fn {_, v} -> is_nil(v) end)
|
||||
|> Enum.map(fn {k, v} -> {k, String.trim(v)} end)
|
||||
|> Map.new()
|
||||
|> Map.update(:image_remote_url, Map.get(data, :image), & &1)
|
||||
|> Map.update(:width, get_integer_value(data, :"image:width"), & &1)
|
||||
|
|
|
@ -63,6 +63,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.TwitterCard do
|
|||
defp transform_tags(data) do
|
||||
data
|
||||
|> Enum.reject(fn {_, v} -> is_nil(v) end)
|
||||
|> Enum.map(fn {k, v} -> {k, String.trim(v)} end)
|
||||
|> Map.new()
|
||||
|> Map.update(:image_remote_url, Map.get(data, :image), & &1)
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue