Improve rich media parsers
Signed-off-by: Thomas Citharel <tcit@tcit.fr>
This commit is contained in:
parent
a66f19cc5d
commit
2134e7b152
|
@ -87,6 +87,10 @@ defmodule Mobilizon.Service.RichMedia.Parser do
|
||||||
|
|
||||||
{:ok, data}
|
{:ok, data}
|
||||||
|
|
||||||
|
{:ok, err} ->
|
||||||
|
Logger.debug("HTTP error: #{inspect(err)}")
|
||||||
|
{:error, "HTTP error: #{inspect(err)}"}
|
||||||
|
|
||||||
{:error, err} ->
|
{:error, err} ->
|
||||||
Logger.debug("HTTP error: #{inspect(err)}")
|
Logger.debug("HTTP error: #{inspect(err)}")
|
||||||
{:error, "HTTP error: #{inspect(err)}"}
|
{:error, "HTTP error: #{inspect(err)}"}
|
||||||
|
@ -196,6 +200,8 @@ defmodule Mobilizon.Service.RichMedia.Parser do
|
||||||
@spec maybe_parse(String.t()) :: map()
|
@spec maybe_parse(String.t()) :: map()
|
||||||
defp maybe_parse(html) do
|
defp maybe_parse(html) do
|
||||||
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
|
Enum.reduce_while(parsers(), %{}, fn parser, acc ->
|
||||||
|
Logger.debug("Using #{inspect(parser)} to parse link")
|
||||||
|
|
||||||
case parser.parse(html, acc) do
|
case parser.parse(html, acc) do
|
||||||
{:ok, data} ->
|
{:ok, data} ->
|
||||||
{:halt, data}
|
{:halt, data}
|
||||||
|
|
|
@ -35,7 +35,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.Fallback do
|
||||||
defp get_page(html, :title) do
|
defp get_page(html, :title) do
|
||||||
html
|
html
|
||||||
|> Floki.parse_document!()
|
|> Floki.parse_document!()
|
||||||
|> Floki.find("html title")
|
|> Floki.find("title")
|
||||||
|> List.first()
|
|> List.first()
|
||||||
|> Floki.text()
|
|> Floki.text()
|
||||||
|> String.trim()
|
|> String.trim()
|
||||||
|
|
|
@ -53,7 +53,10 @@ defmodule Mobilizon.Service.RichMedia.Parsers.MetaTagsParser do
|
||||||
end)
|
end)
|
||||||
|
|
||||||
if data[to_string(key_name)] in Enum.map(allowed_attributes, &to_string/1) do
|
if data[to_string(key_name)] in Enum.map(allowed_attributes, &to_string/1) do
|
||||||
%{String.to_existing_atom(data[to_string(key_name)]) => data[to_string(value_name)]}
|
%{
|
||||||
|
String.to_existing_atom(data[to_string(key_name)]) =>
|
||||||
|
String.trim(data[to_string(value_name)])
|
||||||
|
}
|
||||||
else
|
else
|
||||||
%{}
|
%{}
|
||||||
end
|
end
|
||||||
|
@ -65,7 +68,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.MetaTagsParser do
|
||||||
defp maybe_put_title(meta, html) when meta != %{} do
|
defp maybe_put_title(meta, html) when meta != %{} do
|
||||||
case get_page_title(html) do
|
case get_page_title(html) do
|
||||||
"" -> meta
|
"" -> meta
|
||||||
title -> Map.put_new(meta, :title, title)
|
title -> Map.put_new(meta, :title, String.trim(title))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -80,7 +83,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.MetaTagsParser do
|
||||||
meta
|
meta
|
||||||
|
|
||||||
description ->
|
description ->
|
||||||
Map.put_new(meta, :description, description)
|
Map.put_new(meta, :description, String.trim(description))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -67,7 +67,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.OEmbed do
|
||||||
{:ok, data} <- Jason.decode(json),
|
{:ok, data} <- Jason.decode(json),
|
||||||
data <-
|
data <-
|
||||||
data
|
data
|
||||||
|> Map.new(fn {k, v} -> {String.to_existing_atom(k), v} end)
|
|> Map.new(fn {k, v} -> {String.to_existing_atom(k), String.trim(v)} end)
|
||||||
|> Map.take(@oembed_allowed_attributes) do
|
|> Map.take(@oembed_allowed_attributes) do
|
||||||
{:ok, data}
|
{:ok, data}
|
||||||
end
|
end
|
||||||
|
|
|
@ -54,6 +54,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.OGP do
|
||||||
defp transform_tags(data) do
|
defp transform_tags(data) do
|
||||||
data
|
data
|
||||||
|> Enum.reject(fn {_, v} -> is_nil(v) end)
|
|> Enum.reject(fn {_, v} -> is_nil(v) end)
|
||||||
|
|> Enum.map(fn {k, v} -> {k, String.trim(v)} end)
|
||||||
|> Map.new()
|
|> Map.new()
|
||||||
|> Map.update(:image_remote_url, Map.get(data, :image), & &1)
|
|> Map.update(:image_remote_url, Map.get(data, :image), & &1)
|
||||||
|> Map.update(:width, get_integer_value(data, :"image:width"), & &1)
|
|> Map.update(:width, get_integer_value(data, :"image:width"), & &1)
|
||||||
|
|
|
@ -63,6 +63,7 @@ defmodule Mobilizon.Service.RichMedia.Parsers.TwitterCard do
|
||||||
defp transform_tags(data) do
|
defp transform_tags(data) do
|
||||||
data
|
data
|
||||||
|> Enum.reject(fn {_, v} -> is_nil(v) end)
|
|> Enum.reject(fn {_, v} -> is_nil(v) end)
|
||||||
|
|> Enum.map(fn {k, v} -> {k, String.trim(v)} end)
|
||||||
|> Map.new()
|
|> Map.new()
|
||||||
|> Map.update(:image_remote_url, Map.get(data, :image), & &1)
|
|> Map.update(:image_remote_url, Map.get(data, :image), & &1)
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue