fix: allow verification when page size exceeds 1MB (using HTML5 parser) (#22879)

* fix: allow verification when page size exceeds 1MB
Truncates the page after 1MB instead

Closes #15316

* switch to HTML5 parser, fix rubocop errors

* undo rubocop fixes

Co-authored-by: Chris Zubak-Skees <chriszs@gmail.com>
This commit is contained in:
Markus Unterwaditzer 2023-01-11 21:59:13 +01:00 committed by GitHub
parent fd33bcb3b2
commit 0c689b9d01
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 43 additions and 5 deletions

View file

@ -154,9 +154,7 @@ class Request
end end
module ClientLimit module ClientLimit
def body_with_limit(limit = 1.megabyte) def truncated_body(limit = 1.megabyte)
raise Mastodon::LengthValidationError if content_length.present? && content_length > limit
if charset.nil? if charset.nil?
encoding = Encoding::BINARY encoding = Encoding::BINARY
else else
@ -173,11 +171,19 @@ class Request
contents << chunk contents << chunk
chunk.clear chunk.clear
raise Mastodon::LengthValidationError if contents.bytesize > limit break if contents.bytesize > limit
end end
contents contents
end end
def body_with_limit(limit = 1.megabyte)
raise Mastodon::LengthValidationError if content_length.present? && content_length > limit
contents = truncated_body(limit)
raise Mastodon::LengthValidationError if contents.bytesize > limit
contents
end
end end
if ::HTTP::Response.methods.include?(:body_with_limit) && !Rails.env.production? if ::HTTP::Response.methods.include?(:body_with_limit) && !Rails.env.production?

View file

@ -26,7 +26,7 @@ class VerifyLinkService < BaseService
def link_back_present? def link_back_present?
return false if @body.blank? return false if @body.blank?
links = Nokogiri::HTML(@body).xpath('//a[contains(concat(" ", normalize-space(@rel), " "), " me ")]|//link[contains(concat(" ", normalize-space(@rel), " "), " me ")]') links = Nokogiri::HTML5(@body).xpath('//a[contains(concat(" ", normalize-space(@rel), " "), " me ")]|//link[contains(concat(" ", normalize-space(@rel), " "), " me ")]')
if links.any? { |link| link['href']&.downcase == @link_back.downcase } if links.any? { |link| link['href']&.downcase == @link_back.downcase }
true true

View file

@ -120,6 +120,11 @@ describe Request do
expect { subject.perform { |response| response.body_with_limit } }.to raise_error Mastodon::LengthValidationError expect { subject.perform { |response| response.body_with_limit } }.to raise_error Mastodon::LengthValidationError
end end
it 'truncates large monolithic body' do
stub_request(:any, 'http://example.com').to_return(body: SecureRandom.random_bytes(2.megabytes), headers: { 'Content-Length' => 2.megabytes })
expect(subject.perform { |response| response.truncated_body.bytesize }).to be < 2.megabytes
end
it 'uses binary encoding if Content-Type does not tell encoding' do it 'uses binary encoding if Content-Type does not tell encoding' do
stub_request(:any, 'http://example.com').to_return(body: '', headers: { 'Content-Type' => 'text/html' }) stub_request(:any, 'http://example.com').to_return(body: '', headers: { 'Content-Type' => 'text/html' })
expect(subject.perform { |response| response.body_with_limit.encoding }).to eq Encoding::BINARY expect(subject.perform { |response| response.body_with_limit.encoding }).to eq Encoding::BINARY

View file

@ -73,6 +73,33 @@ RSpec.describe VerifyLinkService, type: :service do
end end
end end
context 'when a document is truncated but the link back is valid' do
let(:html) do
"
<!doctype html>
<body>
<a rel=\"me\" href=\"#{ActivityPub::TagManager.instance.url_for(account)}\"
"
end
it 'marks the field as not verified' do
expect(field.verified?).to be false
end
end
context 'when a link back might be truncated' do
let(:html) do
"
<!doctype html>
<body>
<a rel=\"me\" href=\"#{ActivityPub::TagManager.instance.url_for(account)}"
end
it 'does not mark the field as verified' do
expect(field.verified?).to be false
end
end
context 'when a link does not contain a link back' do context 'when a link does not contain a link back' do
let(:html) { '' } let(:html) { '' }