2020-01-22 02:14:42 +01:00
|
|
|
# Portions of this file are derived from Pleroma:
|
|
|
|
# Copyright © 2017-2019 Pleroma Authors <https://pleroma.social>
|
|
|
|
# SPDX-License-Identifier: AGPL-3.0-only
|
|
|
|
# Upstream: https://git.pleroma.social/pleroma/pleroma/blob/develop/lib/pleroma/html.ex
|
|
|
|
|
|
|
|
defmodule Mobilizon.Service.Formatter.HTML do
|
|
|
|
@moduledoc """
|
|
|
|
Service to filter tags out of HTML content.
|
|
|
|
"""
|
|
|
|
|
2020-02-18 08:57:00 +01:00
|
|
|
alias FastSanitize.Sanitizer
|
2020-01-22 02:14:42 +01:00
|
|
|
|
2020-02-18 08:57:00 +01:00
|
|
|
alias Mobilizon.Service.Formatter.{DefaultScrubbler, OEmbed}
|
2020-01-22 02:14:42 +01:00
|
|
|
|
2020-02-18 08:57:00 +01:00
|
|
|
def filter_tags(html), do: Sanitizer.scrub(html, DefaultScrubbler)
|
|
|
|
|
2023-11-20 09:35:21 +01:00
|
|
|
defdelegate basic_html(html), to: FastSanitize
|
|
|
|
|
2021-09-24 16:46:42 +02:00
|
|
|
@spec strip_tags(String.t()) :: String.t() | no_return()
|
2020-06-24 16:33:59 +02:00
|
|
|
def strip_tags(html) do
|
|
|
|
case FastSanitize.strip_tags(html) do
|
|
|
|
{:ok, html} ->
|
2021-03-29 19:26:49 +02:00
|
|
|
HtmlEntities.decode(html)
|
2020-06-24 16:33:59 +02:00
|
|
|
|
|
|
|
_ ->
|
|
|
|
raise "Failed to filter tags"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-07-31 11:19:42 +02:00
|
|
|
@doc """
|
|
|
|
Inserts a space before tags closing so that words are not attached once tags stripped
|
|
|
|
|
|
|
|
`<h1>test</h1>next` thing becomes `test next` instead of `testnext`
|
|
|
|
"""
|
|
|
|
@spec strip_tags_and_insert_spaces(String.t()) :: String.t()
|
2020-09-02 08:59:59 +02:00
|
|
|
def strip_tags_and_insert_spaces(html) when is_binary(html) do
|
2020-07-31 11:19:42 +02:00
|
|
|
html
|
2020-11-17 15:45:08 +01:00
|
|
|
|> String.replace("><", "> <")
|
2020-07-31 11:19:42 +02:00
|
|
|
|> strip_tags()
|
|
|
|
end
|
|
|
|
|
2020-09-02 08:59:59 +02:00
|
|
|
def strip_tags_and_insert_spaces(html), do: html
|
|
|
|
|
2023-12-09 21:48:23 +01:00
|
|
|
@spec html_to_text(String.t() | nil) :: String.t() | nil
|
|
|
|
def html_to_text(nil), do: nil
|
|
|
|
|
2023-11-20 09:35:21 +01:00
|
|
|
def html_to_text(html) do
|
|
|
|
html
|
|
|
|
|> String.replace(~r/<li>/, "\\g{1}- ", global: true)
|
|
|
|
|> String.replace(
|
|
|
|
~r/<\/?\s?br>|<\/\s?p>|<\/\s?li>|<\/\s?div>|<\/\s?h.>/,
|
|
|
|
"\\g{1}\n\r",
|
|
|
|
global: true
|
|
|
|
)
|
|
|
|
|> strip_tags()
|
|
|
|
end
|
|
|
|
|
2020-02-18 08:57:00 +01:00
|
|
|
def filter_tags_for_oembed(html), do: Sanitizer.scrub(html, OEmbed)
|
2020-01-22 02:14:42 +01:00
|
|
|
end
|