From 68b4e36c82344fba7c5a01e9f8dc9ddbaaf4e3ff Mon Sep 17 00:00:00 2001
From: Eugen Rochko <eugen@zeonfederated.com>
Date: Mon, 4 Sep 2023 10:18:45 +0200
Subject: [PATCH] Fix `#hashtag` matching non-hashtagged posts in search
 (#26781)

---
 app/chewy/public_statuses_index.rb  | 13 ++++++++++++-
 app/chewy/statuses_index.rb         | 13 ++++++++++++-
 app/chewy/tags_index.rb             | 24 +++++++++++++++---------
 app/lib/search_query_transformer.rb |  6 +++++-
 4 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/app/chewy/public_statuses_index.rb b/app/chewy/public_statuses_index.rb
index 5c68a1365..4be204d4a 100644
--- a/app/chewy/public_statuses_index.rb
+++ b/app/chewy/public_statuses_index.rb
@@ -37,18 +37,29 @@ class PublicStatusesIndex < Chewy::Index
           english_stemmer
         ),
       },
+
+      hashtag: {
+        tokenizer: 'keyword',
+        filter: %w(
+          word_delimiter_graph
+          lowercase
+          asciifolding
+          cjk_width
+        ),
+      },
     },
   }
 
   index_scope ::Status.unscoped
                       .kept
                       .indexable
-                      .includes(:media_attachments, :preloadable_poll, :preview_cards)
+                      .includes(:media_attachments, :preloadable_poll, :preview_cards, :tags)
 
   root date_detection: false do
     field(:id, type: 'long')
     field(:account_id, type: 'long')
     field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') }
+    field(:tags, type: 'text', analyzer: 'hashtag', value: ->(status) { status.tags.map(&:display_name) })
     field(:language, type: 'keyword')
     field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties })
     field(:created_at, type: 'date')
diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb
index 2be7e4525..6b25dc9df 100644
--- a/app/chewy/statuses_index.rb
+++ b/app/chewy/statuses_index.rb
@@ -37,15 +37,26 @@ class StatusesIndex < Chewy::Index
           english_stemmer
         ),
       },
+
+      hashtag: {
+        tokenizer: 'keyword',
+        filter: %w(
+          word_delimiter_graph
+          lowercase
+          asciifolding
+          cjk_width
+        ),
+      },
     },
   }
 
-  index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preview_cards, :local_mentioned, :local_favorited, :local_reblogged, :local_bookmarked, preloadable_poll: :local_voters), delete_if: ->(status) { status.searchable_by.empty? }
+  index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preview_cards, :local_mentioned, :local_favorited, :local_reblogged, :local_bookmarked, :tags, preloadable_poll: :local_voters), delete_if: ->(status) { status.searchable_by.empty? }
 
   root date_detection: false do
     field(:id, type: 'long')
     field(:account_id, type: 'long')
     field(:text, type: 'text', analyzer: 'verbatim', value: ->(status) { status.searchable_text }) { field(:stemmed, type: 'text', analyzer: 'content') }
+    field(:tags, type: 'text', analyzer: 'hashtag',  value: ->(status) { status.tags.map(&:display_name) })
     field(:searchable_by, type: 'long', value: ->(status) { status.searchable_by })
     field(:language, type: 'keyword')
     field(:properties, type: 'keyword', value: ->(status) { status.searchable_properties })
diff --git a/app/chewy/tags_index.rb b/app/chewy/tags_index.rb
index b2d50a000..5b6349a96 100644
--- a/app/chewy/tags_index.rb
+++ b/app/chewy/tags_index.rb
@@ -5,12 +5,21 @@ class TagsIndex < Chewy::Index
     analyzer: {
       content: {
         tokenizer: 'keyword',
-        filter: %w(lowercase asciifolding cjk_width),
+        filter: %w(
+          word_delimiter_graph
+          lowercase
+          asciifolding
+          cjk_width
+        ),
       },
 
       edge_ngram: {
         tokenizer: 'edge_ngram',
-        filter: %w(lowercase asciifolding cjk_width),
+        filter: %w(
+          lowercase
+          asciifolding
+          cjk_width
+        ),
       },
     },
 
@@ -30,12 +39,9 @@ class TagsIndex < Chewy::Index
   end
 
   root date_detection: false do
-    field :name, type: 'text', analyzer: 'content' do
-      field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
-    end
-
-    field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }
-    field :usage, type: 'long', value: ->(tag, crutches) { tag.history.aggregate(crutches.time_period).accounts }
-    field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }
+    field(:name, type: 'text', analyzer: 'content', value: :display_name) { field(:edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content') }
+    field(:reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? })
+    field(:usage, type: 'long', value: ->(tag, crutches) { tag.history.aggregate(crutches.time_period).accounts })
+    field(:last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at })
   end
 end
diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb
index 2c8e95afe..af3964fd3 100644
--- a/app/lib/search_query_transformer.rb
+++ b/app/lib/search_query_transformer.rb
@@ -53,7 +53,11 @@ class SearchQueryTransformer < Parslet::Transform
     end
 
     def to_query
-      { multi_match: { type: 'most_fields', query: @term, fields: ['text', 'text.stemmed'], operator: 'and' } }
+      if @term.start_with?('#')
+        { match: { tags: { query: @term } } }
+      else
+        { multi_match: { type: 'most_fields', query: @term, fields: ['text', 'text.stemmed'], operator: 'and' } }
+      end
     end
   end