Disable language detection for texts shorter than 140 characters (#8010)
If the input text is blank after preparation (only mention, or only URL, or empty as in a media post), then use nil as language, since it's OK to show to everyone. Otherwise, always fall back to the server's default locale
This commit is contained in:
parent
0180037dfb
commit
38e9662d78
|
@ -3,12 +3,16 @@
|
|||
class LanguageDetector
|
||||
include Singleton
|
||||
|
||||
CHARACTER_THRESHOLD = 140
|
||||
|
||||
def initialize
|
||||
@identifier = CLD3::NNetLanguageIdentifier.new(1, 2048)
|
||||
end
|
||||
|
||||
def detect(text, account)
|
||||
detect_language_code(text) || default_locale(account)
|
||||
input_text = prepare_text(text)
|
||||
return if input_text.blank?
|
||||
detect_language_code(input_text) || default_locale(account)
|
||||
end
|
||||
|
||||
def language_names
|
||||
|
@ -23,8 +27,13 @@ class LanguageDetector
|
|||
simplify_text(text).strip
|
||||
end
|
||||
|
||||
def unreliable_input?(text)
|
||||
text.size < CHARACTER_THRESHOLD
|
||||
end
|
||||
|
||||
def detect_language_code(text)
|
||||
result = @identifier.find_language(prepare_text(text))
|
||||
return if unreliable_input?(text)
|
||||
result = @identifier.find_language(text)
|
||||
iso6391(result.language.to_s).to_sym if result.reliable?
|
||||
end
|
||||
|
||||
|
@ -66,6 +75,6 @@ class LanguageDetector
|
|||
end
|
||||
|
||||
def default_locale(account)
|
||||
account.user_locale&.to_sym
|
||||
account.user_locale&.to_sym || I18n.default_locale
|
||||
end
|
||||
end
|
||||
|
|
|
@ -57,7 +57,7 @@ describe LanguageDetector do
|
|||
end
|
||||
|
||||
it 'detects spanish language' do
|
||||
string = 'Obtener un Hola y bienvenidos a Mastodon'
|
||||
string = 'Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon'
|
||||
result = described_class.instance.detect(string, account_without_user_locale)
|
||||
|
||||
expect(result).to eq :es
|
||||
|
@ -86,7 +86,7 @@ describe LanguageDetector do
|
|||
account = double(user_locale: 'fr')
|
||||
result = described_class.instance.detect('', account)
|
||||
|
||||
expect(result).to eq :fr
|
||||
expect(result).to eq nil
|
||||
end
|
||||
|
||||
it 'uses nil when account is present but has no locale' do
|
||||
|
|
Loading…
Reference in a new issue