2020-02-18 08:57:00 +01:00
|
|
|
# Portions of this file are derived from Pleroma:
|
|
|
|
# Pleroma: A lightweight social networking server
|
|
|
|
# Copyright © 2017-2020 Pleroma Authors <https://pleroma.social/>
|
|
|
|
# SPDX-License-Identifier: AGPL-3.0-only
|
|
|
|
|
|
|
|
defmodule Mobilizon.Service.RichMedia.Parsers.OEmbed do
|
|
|
|
@moduledoc """
|
|
|
|
Module to parse OEmbed data in HTML pages
|
|
|
|
"""
|
|
|
|
alias Mobilizon.Service.Formatter.HTML
|
|
|
|
require Logger
|
|
|
|
|
|
|
|
@http_options [
|
|
|
|
follow_redirect: true,
|
|
|
|
ssl: [{:versions, [:"tlsv1.2"]}]
|
|
|
|
]
|
|
|
|
|
2021-09-28 19:40:37 +02:00
|
|
|
@spec parse(String.t(), map()) :: {:ok, map()} | {:error, String.t()}
|
2020-02-18 08:57:00 +01:00
|
|
|
def parse(html, _data) do
|
|
|
|
Logger.debug("Using OEmbed parser")
|
|
|
|
|
|
|
|
with elements = [_ | _] <- get_discovery_data(html),
|
|
|
|
{:ok, oembed_url} <- get_oembed_url(elements),
|
|
|
|
{:ok, oembed_data} <- get_oembed_data(oembed_url),
|
2020-10-20 09:36:26 +02:00
|
|
|
{:ok, oembed_data} <- filter_oembed_data(oembed_data) do
|
2020-02-18 08:57:00 +01:00
|
|
|
Logger.debug("Data found with OEmbed parser")
|
|
|
|
Logger.debug(inspect(oembed_data))
|
|
|
|
{:ok, oembed_data}
|
|
|
|
else
|
|
|
|
_e ->
|
|
|
|
{:error, "No OEmbed data found"}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp get_discovery_data(html) do
|
2021-05-03 14:52:37 +02:00
|
|
|
with {:ok, document} <- Floki.parse_document(html) do
|
|
|
|
Floki.find(document, "link[type='application/json+oembed']")
|
|
|
|
end
|
2020-02-18 08:57:00 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
defp get_oembed_url(nodes) do
|
|
|
|
{"link", attributes, _children} = nodes |> hd()
|
|
|
|
|
|
|
|
{:ok, Enum.into(attributes, %{})["href"]}
|
|
|
|
end
|
|
|
|
|
2021-01-22 18:14:52 +01:00
|
|
|
@oembed_allowed_attributes [
|
|
|
|
:type,
|
|
|
|
:version,
|
|
|
|
:html,
|
|
|
|
:width,
|
|
|
|
:height,
|
|
|
|
:title,
|
|
|
|
:author_name,
|
|
|
|
:author_url,
|
|
|
|
:provider_name,
|
|
|
|
:provider_url,
|
|
|
|
:cache_age,
|
|
|
|
:thumbnail_url,
|
|
|
|
:thumbnail_width,
|
|
|
|
:thumbnail_height,
|
|
|
|
:url
|
|
|
|
]
|
|
|
|
|
2020-02-18 08:57:00 +01:00
|
|
|
defp get_oembed_data(url) do
|
2020-07-09 17:24:28 +02:00
|
|
|
with {:ok, %{body: json}} <- Tesla.get(url, opts: @http_options),
|
2020-02-18 08:57:00 +01:00
|
|
|
{:ok, data} <- Jason.decode(json),
|
2021-01-22 18:14:52 +01:00
|
|
|
data <-
|
|
|
|
data
|
2023-05-26 15:55:17 +02:00
|
|
|
|> Map.new(fn {k, v} -> {k, if(is_binary(v), do: String.trim(v), else: v)} end)
|
|
|
|
|> Map.take(Enum.map(@oembed_allowed_attributes, &to_string/1)) do
|
2020-02-18 08:57:00 +01:00
|
|
|
{:ok, data}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp filter_oembed_data(data) do
|
|
|
|
case Map.get(data, :type) do
|
|
|
|
nil ->
|
|
|
|
{:error, "No type declared for OEmbed data"}
|
|
|
|
|
|
|
|
"link" ->
|
2020-10-20 09:36:26 +02:00
|
|
|
data
|
|
|
|
|> Map.put(:image_remote_url, Map.get(data, :thumbnail_url))
|
|
|
|
|> (&{:ok, &1}).()
|
2020-02-18 08:57:00 +01:00
|
|
|
|
|
|
|
"photo" ->
|
|
|
|
if Map.get(data, :url, "") == "" do
|
|
|
|
{:error, "No URL for photo OEmbed data"}
|
|
|
|
else
|
|
|
|
data
|
|
|
|
|> Map.put(:image_remote_url, Map.get(data, :url))
|
|
|
|
|> Map.put(:width, Map.get(data, :width, 0))
|
|
|
|
|> Map.put(:height, Map.get(data, :height, 0))
|
2020-10-20 09:36:26 +02:00
|
|
|
|> (&{:ok, &1}).()
|
2020-02-18 08:57:00 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
"video" ->
|
|
|
|
{:ok, html} = data |> Map.get(:html, "") |> HTML.filter_tags_for_oembed()
|
|
|
|
|
|
|
|
data
|
|
|
|
|> Map.put(:html, html)
|
|
|
|
|> Map.put(:width, Map.get(data, :width, 0))
|
|
|
|
|> Map.put(:height, Map.get(data, :height, 0))
|
|
|
|
|> Map.put(:image_remote_url, Map.get(data, :thumbnail_url))
|
2020-10-20 09:36:26 +02:00
|
|
|
|> (&{:ok, &1}).()
|
2020-02-18 08:57:00 +01:00
|
|
|
|
|
|
|
"rich" ->
|
|
|
|
{:error, "OEmbed data has rich type, which we don't support"}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|