Disable language detection for texts shorter than 140 characters (#8010)
If the input text is blank after preparation (only mention, or only URL, or empty as in a media post), then use nil as language, since it's OK to show to everyone. Otherwise, always fall back to the server's default locale
This commit is contained in:
parent
0180037dfb
commit
38e9662d78
|
@ -3,12 +3,16 @@
|
||||||
class LanguageDetector
|
class LanguageDetector
|
||||||
include Singleton
|
include Singleton
|
||||||
|
|
||||||
|
CHARACTER_THRESHOLD = 140
|
||||||
|
|
||||||
def initialize
|
def initialize
|
||||||
@identifier = CLD3::NNetLanguageIdentifier.new(1, 2048)
|
@identifier = CLD3::NNetLanguageIdentifier.new(1, 2048)
|
||||||
end
|
end
|
||||||
|
|
||||||
def detect(text, account)
|
def detect(text, account)
|
||||||
detect_language_code(text) || default_locale(account)
|
input_text = prepare_text(text)
|
||||||
|
return if input_text.blank?
|
||||||
|
detect_language_code(input_text) || default_locale(account)
|
||||||
end
|
end
|
||||||
|
|
||||||
def language_names
|
def language_names
|
||||||
|
@ -23,8 +27,13 @@ class LanguageDetector
|
||||||
simplify_text(text).strip
|
simplify_text(text).strip
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def unreliable_input?(text)
|
||||||
|
text.size < CHARACTER_THRESHOLD
|
||||||
|
end
|
||||||
|
|
||||||
def detect_language_code(text)
|
def detect_language_code(text)
|
||||||
result = @identifier.find_language(prepare_text(text))
|
return if unreliable_input?(text)
|
||||||
|
result = @identifier.find_language(text)
|
||||||
iso6391(result.language.to_s).to_sym if result.reliable?
|
iso6391(result.language.to_s).to_sym if result.reliable?
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -66,6 +75,6 @@ class LanguageDetector
|
||||||
end
|
end
|
||||||
|
|
||||||
def default_locale(account)
|
def default_locale(account)
|
||||||
account.user_locale&.to_sym
|
account.user_locale&.to_sym || I18n.default_locale
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -57,7 +57,7 @@ describe LanguageDetector do
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'detects spanish language' do
|
it 'detects spanish language' do
|
||||||
string = 'Obtener un Hola y bienvenidos a Mastodon'
|
string = 'Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon'
|
||||||
result = described_class.instance.detect(string, account_without_user_locale)
|
result = described_class.instance.detect(string, account_without_user_locale)
|
||||||
|
|
||||||
expect(result).to eq :es
|
expect(result).to eq :es
|
||||||
|
@ -86,7 +86,7 @@ describe LanguageDetector do
|
||||||
account = double(user_locale: 'fr')
|
account = double(user_locale: 'fr')
|
||||||
result = described_class.instance.detect('', account)
|
result = described_class.instance.detect('', account)
|
||||||
|
|
||||||
expect(result).to eq :fr
|
expect(result).to eq nil
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'uses nil when account is present but has no locale' do
|
it 'uses nil when account is present but has no locale' do
|
||||||
|
|
Loading…
Reference in a new issue