From e754083e8a33778b5dd8d43efc5604ed50efb0e4 Mon Sep 17 00:00:00 2001
From: Eugen Rochko <eugen@zeonfederated.com>
Date: Fri, 1 Sep 2023 09:43:12 +0200
Subject: [PATCH] Fix unmatched quotes and prefixes causing search to fail
 (#26701)

---
 app/lib/search_query_parser.rb            |   4 +-
 app/lib/search_query_transformer.rb       | 101 +++++++++++-----------
 spec/lib/search_query_parser_spec.rb      |  98 +++++++++++++++++++++
 spec/lib/search_query_transformer_spec.rb |  57 ++++++++++--
 4 files changed, 200 insertions(+), 60 deletions(-)
 create mode 100644 spec/lib/search_query_parser_spec.rb

diff --git a/app/lib/search_query_parser.rb b/app/lib/search_query_parser.rb
index 5d6ffbf29..1c57b9b02 100644
--- a/app/lib/search_query_parser.rb
+++ b/app/lib/search_query_parser.rb
@@ -6,10 +6,10 @@ class SearchQueryParser < Parslet::Parser
   rule(:colon)     { str(':') }
   rule(:space)     { match('\s').repeat(1) }
   rule(:operator)  { (str('+') | str('-')).as(:operator) }
-  rule(:prefix)    { (term >> colon).as(:prefix) }
+  rule(:prefix)    { term >> colon }
   rule(:shortcode) { (colon >> term >> colon.maybe).as(:shortcode) }
   rule(:phrase)    { (quote >> (term >> space.maybe).repeat >> quote).as(:phrase) }
-  rule(:clause)    { (operator.maybe >> prefix.maybe >> (phrase | term | shortcode)).as(:clause) }
+  rule(:clause)    { (operator.maybe >> prefix.maybe.as(:prefix) >> (phrase | term | shortcode)).as(:clause) | prefix.as(:clause) | quote.as(:junk) }
   rule(:query)     { (clause >> space.maybe).repeat.as(:query) }
   root(:query)
 end
diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb
index f10ccfb28..e81c0c081 100644
--- a/app/lib/search_query_transformer.rb
+++ b/app/lib/search_query_transformer.rb
@@ -1,50 +1,32 @@
 # frozen_string_literal: true
 
 class SearchQueryTransformer < Parslet::Transform
+  SUPPORTED_PREFIXES = %w(
+    has
+    is
+    language
+    from
+    before
+    after
+    during
+  ).freeze
+
   class Query
-    attr_reader :should_clauses, :must_not_clauses, :must_clauses, :filter_clauses
+    attr_reader :must_not_clauses, :must_clauses, :filter_clauses
 
     def initialize(clauses)
-      grouped = clauses.chunk(&:operator).to_h
-      @should_clauses = grouped.fetch(:should, [])
+      grouped = clauses.compact.chunk(&:operator).to_h
       @must_not_clauses = grouped.fetch(:must_not, [])
       @must_clauses = grouped.fetch(:must, [])
       @filter_clauses = grouped.fetch(:filter, [])
     end
 
     def apply(search)
-      should_clauses.each { |clause| search = search.query.should(clause_to_query(clause)) }
-      must_clauses.each { |clause| search = search.query.must(clause_to_query(clause)) }
-      must_not_clauses.each { |clause| search = search.query.must_not(clause_to_query(clause)) }
-      filter_clauses.each { |clause| search = search.filter(**clause_to_filter(clause)) }
+      must_clauses.each { |clause| search = search.query.must(clause.to_query) }
+      must_not_clauses.each { |clause| search = search.query.must_not(clause.to_query) }
+      filter_clauses.each { |clause| search = search.filter(**clause.to_query) }
       search.query.minimum_should_match(1)
     end
-
-    private
-
-    def clause_to_query(clause)
-      case clause
-      when TermClause
-        { multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'], operator: 'and' } }
-      when PhraseClause
-        { match_phrase: { text: { query: clause.phrase } } }
-      else
-        raise "Unexpected clause type: #{clause}"
-      end
-    end
-
-    def clause_to_filter(clause)
-      case clause
-      when PrefixClause
-        if clause.negated?
-          { bool: { must_not: { clause.type => { clause.filter => clause.term } } } }
-        else
-          { clause.type => { clause.filter => clause.term } }
-        end
-      else
-        raise "Unexpected clause type: #{clause}"
-      end
-    end
   end
 
   class Operator
@@ -63,31 +45,38 @@ class SearchQueryTransformer < Parslet::Transform
   end
 
   class TermClause
-    attr_reader :prefix, :operator, :term
+    attr_reader :operator, :term
 
-    def initialize(prefix, operator, term)
-      @prefix = prefix
+    def initialize(operator, term)
       @operator = Operator.symbol(operator)
       @term = term
     end
+
+    def to_query
+      { multi_match: { type: 'most_fields', query: @term, fields: ['text', 'text.stemmed'], operator: 'and' } }
+    end
   end
 
   class PhraseClause
-    attr_reader :prefix, :operator, :phrase
+    attr_reader :operator, :phrase
 
-    def initialize(prefix, operator, phrase)
-      @prefix = prefix
+    def initialize(operator, phrase)
       @operator = Operator.symbol(operator)
       @phrase = phrase
     end
+
+    def to_query
+      { match_phrase: { text: { query: @phrase } } }
+    end
   end
 
   class PrefixClause
-    attr_reader :type, :filter, :operator, :term
+    attr_reader :operator, :prefix, :term
 
     def initialize(prefix, operator, term, options = {})
-      @negated  = operator == '-'
-      @options  = options
+      @prefix = prefix
+      @negated = operator == '-'
+      @options = options
       @operator = :filter
 
       case prefix
@@ -116,12 +105,16 @@ class SearchQueryTransformer < Parslet::Transform
         @type = :range
         @term = { gte: term, lte: term, time_zone: @options[:current_account]&.user_time_zone || 'UTC' }
       else
-        raise Mastodon::SyntaxError
+        raise "Unknown prefix: #{prefix}"
       end
     end
 
-    def negated?
-      @negated
+    def to_query
+      if @negated
+        { bool: { must_not: { @type => { @filter => @term } } } }
+      else
+        { @type => { @filter => @term } }
+      end
     end
 
     private
@@ -159,18 +152,26 @@ class SearchQueryTransformer < Parslet::Transform
     prefix   = clause[:prefix][:term].to_s if clause[:prefix]
     operator = clause[:operator]&.to_s
 
-    if clause[:prefix]
+    if clause[:prefix] && SUPPORTED_PREFIXES.include?(prefix)
       PrefixClause.new(prefix, operator, clause[:term].to_s, current_account: current_account)
+    elsif clause[:prefix]
+      TermClause.new(operator, "#{prefix} #{clause[:term]}")
     elsif clause[:term]
-      TermClause.new(prefix, operator, clause[:term].to_s)
+      TermClause.new(operator, clause[:term].to_s)
     elsif clause[:shortcode]
-      TermClause.new(prefix, operator, ":#{clause[:term]}:")
+      TermClause.new(operator, ":#{clause[:term]}:")
     elsif clause[:phrase]
-      PhraseClause.new(prefix, operator, clause[:phrase].is_a?(Array) ? clause[:phrase].map { |p| p[:term].to_s }.join(' ') : clause[:phrase].to_s)
+      PhraseClause.new(operator, clause[:phrase].is_a?(Array) ? clause[:phrase].map { |p| p[:term].to_s }.join(' ') : clause[:phrase].to_s)
     else
       raise "Unexpected clause type: #{clause}"
     end
   end
 
-  rule(query: sequence(:clauses)) { Query.new(clauses) }
+  rule(junk: subtree(:junk)) do
+    nil
+  end
+
+  rule(query: sequence(:clauses)) do
+    Query.new(clauses)
+  end
 end
diff --git a/spec/lib/search_query_parser_spec.rb b/spec/lib/search_query_parser_spec.rb
new file mode 100644
index 000000000..66b0e8f9e
--- /dev/null
+++ b/spec/lib/search_query_parser_spec.rb
@@ -0,0 +1,98 @@
+# frozen_string_literal: true
+
+require 'rails_helper'
+require 'parslet/rig/rspec'
+
+describe SearchQueryParser do
+  let(:parser) { described_class.new }
+
+  context 'with term' do
+    it 'consumes "hello"' do
+      expect(parser.term).to parse('hello')
+    end
+  end
+
+  context 'with prefix' do
+    it 'consumes "foo:"' do
+      expect(parser.prefix).to parse('foo:')
+    end
+  end
+
+  context 'with operator' do
+    it 'consumes "+"' do
+      expect(parser.operator).to parse('+')
+    end
+
+    it 'consumes "-"' do
+      expect(parser.operator).to parse('-')
+    end
+  end
+
+  context 'with shortcode' do
+    it 'consumes ":foo:"' do
+      expect(parser.shortcode).to parse(':foo:')
+    end
+  end
+
+  context 'with phrase' do
+    it 'consumes "hello world"' do
+      expect(parser.phrase).to parse('"hello world"')
+    end
+  end
+
+  context 'with clause' do
+    it 'consumes "foo"' do
+      expect(parser.clause).to parse('foo')
+    end
+
+    it 'consumes "-foo"' do
+      expect(parser.clause).to parse('-foo')
+    end
+
+    it 'consumes "foo:bar"' do
+      expect(parser.clause).to parse('foo:bar')
+    end
+
+    it 'consumes "-foo:bar"' do
+      expect(parser.clause).to parse('-foo:bar')
+    end
+
+    it 'consumes \'foo:"hello world"\'' do
+      expect(parser.clause).to parse('foo:"hello world"')
+    end
+
+    it 'consumes \'-foo:"hello world"\'' do
+      expect(parser.clause).to parse('-foo:"hello world"')
+    end
+
+    it 'consumes "foo:"' do
+      expect(parser.clause).to parse('foo:')
+    end
+
+    it 'consumes \'"\'' do
+      expect(parser.clause).to parse('"')
+    end
+  end
+
+  context 'with query' do
+    it 'consumes "hello -world"' do
+      expect(parser.query).to parse('hello -world')
+    end
+
+    it 'consumes \'foo "hello world"\'' do
+      expect(parser.query).to parse('foo "hello world"')
+    end
+
+    it 'consumes "foo:bar hello"' do
+      expect(parser.query).to parse('foo:bar hello')
+    end
+
+    it 'consumes \'"hello" world "\'' do
+      expect(parser.query).to parse('"hello" world "')
+    end
+
+    it 'consumes "foo:bar bar: hello"' do
+      expect(parser.query).to parse('foo:bar bar: hello')
+    end
+  end
+end
diff --git a/spec/lib/search_query_transformer_spec.rb b/spec/lib/search_query_transformer_spec.rb
index 953f9acb2..17f06d283 100644
--- a/spec/lib/search_query_transformer_spec.rb
+++ b/spec/lib/search_query_transformer_spec.rb
@@ -3,16 +3,57 @@
 require 'rails_helper'
 
 describe SearchQueryTransformer do
-  describe 'initialization' do
-    let(:parser) { SearchQueryParser.new.parse('query') }
+  subject { described_class.new.apply(parser, current_account: nil) }
 
-    it 'sets attributes' do
-      transformer = described_class.new.apply(parser)
+  let(:parser) { SearchQueryParser.new.parse(query) }
 
-      expect(transformer.should_clauses.first).to be_nil
-      expect(transformer.must_clauses.first).to be_a(SearchQueryTransformer::TermClause)
-      expect(transformer.must_not_clauses.first).to be_nil
-      expect(transformer.filter_clauses.first).to be_nil
+  context 'with "hello world"' do
+    let(:query) { 'hello world' }
+
+    it 'transforms clauses' do
+      expect(subject.must_clauses.map(&:term)).to match_array %w(hello world)
+      expect(subject.must_not_clauses).to be_empty
+      expect(subject.filter_clauses).to be_empty
+    end
+  end
+
+  context 'with "hello -world"' do
+    let(:query) { 'hello -world' }
+
+    it 'transforms clauses' do
+      expect(subject.must_clauses.map(&:term)).to match_array %w(hello)
+      expect(subject.must_not_clauses.map(&:term)).to match_array %w(world)
+      expect(subject.filter_clauses).to be_empty
+    end
+  end
+
+  context 'with "hello is:reply"' do
+    let(:query) { 'hello is:reply' }
+
+    it 'transforms clauses' do
+      expect(subject.must_clauses.map(&:term)).to match_array %w(hello)
+      expect(subject.must_not_clauses).to be_empty
+      expect(subject.filter_clauses.map(&:term)).to match_array %w(reply)
+    end
+  end
+
+  context 'with "foo: bar"' do
+    let(:query) { 'foo: bar' }
+
+    it 'transforms clauses' do
+      expect(subject.must_clauses.map(&:term)).to match_array %w(foo bar)
+      expect(subject.must_not_clauses).to be_empty
+      expect(subject.filter_clauses).to be_empty
+    end
+  end
+
+  context 'with "foo:bar"' do
+    let(:query) { 'foo:bar' }
+
+    it 'transforms clauses' do
+      expect(subject.must_clauses.map(&:term)).to contain_exactly('foo bar')
+      expect(subject.must_not_clauses).to be_empty
+      expect(subject.filter_clauses).to be_empty
     end
   end
 end