Commit 0ebf0d59 authored by Michael Crouse's avatar Michael Crouse Committed by Chromium LUCI CQ

[LanguageDetection] Rename model elements of LanguageDetectionDetails.

This is a global replace of:

/cld_language/model_detected_language
/is_cld_reliable/is_model_reliable



Bug: 1157983
Change-Id: I9471600db6ae3221dd63fa92ee30d8c093748b20
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2587620
Auto-Submit: Michael Crouse <mcrouse@chromium.org>
Reviewed-by: default avatarDonn Denman <donnd@chromium.org>
Reviewed-by: default avatarJonathan Metzman <metzman@chromium.org>
Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarTrevor  Perrier <perrier@chromium.org>
Reviewed-by: default avatarScott Little <sclittle@chromium.org>
Commit-Queue: Daniel Cheng <dcheng@chromium.org>
Cr-Commit-Position: refs/heads/master@{#836839}
parent 5d4a8a09
...@@ -190,14 +190,14 @@ ContextualSearchContext::GetTranslationLanguages() const { ...@@ -190,14 +190,14 @@ ContextualSearchContext::GetTranslationLanguages() const {
std::string ContextualSearchContext::GetReliableLanguage( std::string ContextualSearchContext::GetReliableLanguage(
const base::string16& contents) const { const base::string16& contents) const {
std::string content_language; std::string model_detected_language;
std::string html_lang; bool is_model_reliable;
std::string cld_language;
bool is_cld_reliable;
std::string language = translate::DeterminePageLanguage( std::string language = translate::DeterminePageLanguage(
content_language, html_lang, contents, &cld_language, &is_cld_reliable); /*content_language=*/std::string(),
/*html_lang=*/std::string(), contents, &model_detected_language,
&is_model_reliable);
// Make sure we return an empty string when unreliable or an unknown result. // Make sure we return an empty string when unreliable or an unknown result.
if (!is_cld_reliable || language == translate::kUnknownLanguageCode) if (!is_model_reliable || language == translate::kUnknownLanguageCode)
language = ""; language = "";
return language; return language;
} }
......
...@@ -43,8 +43,8 @@ void IOSLanguageDetectionTabHelper::RemoveObserver(Observer* observer) { ...@@ -43,8 +43,8 @@ void IOSLanguageDetectionTabHelper::RemoveObserver(Observer* observer) {
void IOSLanguageDetectionTabHelper::OnLanguageDetermined( void IOSLanguageDetectionTabHelper::OnLanguageDetermined(
const translate::LanguageDetectionDetails& details) { const translate::LanguageDetectionDetails& details) {
// Update language histogram. // Update language histogram.
if (url_language_histogram_ && details.is_cld_reliable) { if (url_language_histogram_ && details.is_model_reliable) {
url_language_histogram_->OnPageVisited(details.cld_language); url_language_histogram_->OnPageVisited(details.model_detected_language);
} }
for (auto& observer : observer_list_) { for (auto& observer : observer_list_) {
......
...@@ -77,8 +77,8 @@ TEST_F(IOSLanguageDetectionTabHelperObserverBridgeTest, OnLanguageDetermined) { ...@@ -77,8 +77,8 @@ TEST_F(IOSLanguageDetectionTabHelperObserverBridgeTest, OnLanguageDetermined) {
translate::LanguageDetectionDetails details; translate::LanguageDetectionDetails details;
details.content_language = kContentLanguage; details.content_language = kContentLanguage;
details.cld_language = kUndefined; details.model_detected_language = kUndefined;
details.is_cld_reliable = true; details.is_model_reliable = true;
details.has_notranslate = true; details.has_notranslate = true;
details.html_root_language = kRootLanguage; details.html_root_language = kRootLanguage;
details.adopted_language = kAdoptedLanguage; details.adopted_language = kAdoptedLanguage;
...@@ -89,8 +89,8 @@ TEST_F(IOSLanguageDetectionTabHelperObserverBridgeTest, OnLanguageDetermined) { ...@@ -89,8 +89,8 @@ TEST_F(IOSLanguageDetectionTabHelperObserverBridgeTest, OnLanguageDetermined) {
const translate::LanguageDetectionDetails& forwarded_details = const translate::LanguageDetectionDetails& forwarded_details =
observer().languageDetectionDetails; observer().languageDetectionDetails;
EXPECT_EQ(kContentLanguage, forwarded_details.content_language); EXPECT_EQ(kContentLanguage, forwarded_details.content_language);
EXPECT_EQ(kUndefined, forwarded_details.cld_language); EXPECT_EQ(kUndefined, forwarded_details.model_detected_language);
EXPECT_TRUE(forwarded_details.is_cld_reliable); EXPECT_TRUE(forwarded_details.is_model_reliable);
EXPECT_TRUE(forwarded_details.has_notranslate); EXPECT_TRUE(forwarded_details.has_notranslate);
EXPECT_EQ(kRootLanguage, forwarded_details.html_root_language); EXPECT_EQ(kRootLanguage, forwarded_details.html_root_language);
EXPECT_EQ(kAdoptedLanguage, forwarded_details.adopted_language); EXPECT_EQ(kAdoptedLanguage, forwarded_details.adopted_language);
......
...@@ -20,10 +20,10 @@ LanguageDetectionServiceImpl::~LanguageDetectionServiceImpl() = default; ...@@ -20,10 +20,10 @@ LanguageDetectionServiceImpl::~LanguageDetectionServiceImpl() = default;
void LanguageDetectionServiceImpl::DetermineLanguage( void LanguageDetectionServiceImpl::DetermineLanguage(
const ::base::string16& text, const ::base::string16& text,
DetermineLanguageCallback callback) { DetermineLanguageCallback callback) {
bool is_cld_reliable = false; bool is_model_reliable = false;
std::string cld_language = translate::DetermineTextLanguage( std::string model_detected_language = translate::DetermineTextLanguage(
base::UTF16ToUTF8(text), &is_cld_reliable); base::UTF16ToUTF8(text), &is_model_reliable);
std::move(callback).Run(cld_language, is_cld_reliable); std::move(callback).Run(model_detected_language, is_model_reliable);
} }
} // namespace language_detection } // namespace language_detection
...@@ -296,8 +296,8 @@ void ContentTranslateDriver::RegisterPage( ...@@ -296,8 +296,8 @@ void ContentTranslateDriver::RegisterPage(
// If we have a language histogram (i.e. we're not in incognito), update it // If we have a language histogram (i.e. we're not in incognito), update it
// with the detected language of every page visited. // with the detected language of every page visited.
if (language_histogram_ && details.is_cld_reliable) if (language_histogram_ && details.is_model_reliable)
language_histogram_->OnPageVisited(details.cld_language); language_histogram_->OnPageVisited(details.model_detected_language);
translate_agents_[++next_page_seq_no_].Bind(std::move(translate_agent)); translate_agents_[++next_page_seq_no_].Bind(std::move(translate_agent));
translate_agents_[next_page_seq_no_].set_disconnect_handler( translate_agents_[next_page_seq_no_].set_disconnect_handler(
......
...@@ -354,8 +354,8 @@ void PerFrameContentTranslateDriver::OnPageLanguageDetermined( ...@@ -354,8 +354,8 @@ void PerFrameContentTranslateDriver::OnPageLanguageDetermined(
// If we have a language histogram (i.e. we're not in incognito), update it // If we have a language histogram (i.e. we're not in incognito), update it
// with the detected language of every page visited. // with the detected language of every page visited.
if (language_histogram() && details.is_cld_reliable) if (language_histogram() && details.is_model_reliable)
language_histogram()->OnPageVisited(details.cld_language); language_histogram()->OnPageVisited(details.model_detected_language);
if (translate_manager() && web_contents()) { if (translate_manager() && web_contents()) {
translate_manager()->GetLanguageState()->LanguageDetermined( translate_manager()->GetLanguageState()->LanguageDetermined(
...@@ -406,8 +406,8 @@ void PerFrameContentTranslateDriver::OnPageContentsLanguage( ...@@ -406,8 +406,8 @@ void PerFrameContentTranslateDriver::OnPageContentsLanguage(
const std::string& contents_language, const std::string& contents_language,
bool is_contents_language_reliable) { bool is_contents_language_reliable) {
awaiting_contents_ = false; awaiting_contents_ = false;
details_.cld_language = contents_language; details_.model_detected_language = contents_language;
details_.is_cld_reliable = is_contents_language_reliable; details_.is_model_reliable = is_contents_language_reliable;
if (!details_.url.is_empty()) if (!details_.url.is_empty())
ComputeActualPageLanguage(); ComputeActualPageLanguage();
...@@ -418,7 +418,7 @@ void PerFrameContentTranslateDriver::ComputeActualPageLanguage() { ...@@ -418,7 +418,7 @@ void PerFrameContentTranslateDriver::ComputeActualPageLanguage() {
// utility process. // utility process.
std::string language = DeterminePageLanguage( std::string language = DeterminePageLanguage(
details_.content_language, details_.html_root_language, details_.content_language, details_.html_root_language,
details_.cld_language, details_.is_cld_reliable); details_.model_detected_language, details_.is_model_reliable);
if (!language.empty()) { if (!language.empty()) {
details_.time = base::Time::Now(); details_.time = base::Time::Now();
......
...@@ -69,7 +69,7 @@ class PerFrameContentTranslateDriverTest ...@@ -69,7 +69,7 @@ class PerFrameContentTranslateDriverTest
} }
bool HasGoodContentDetection() const { bool HasGoodContentDetection() const {
return observer_.GetObservedDetails().is_cld_reliable; return observer_.GetObservedDetails().is_model_reliable;
} }
bool DoNotTranslate() const { bool DoNotTranslate() const {
......
...@@ -27,8 +27,8 @@ struct LanguageDetectionDetails { ...@@ -27,8 +27,8 @@ struct LanguageDetectionDetails {
mojo_base.mojom.Time time; mojo_base.mojom.Time time;
url.mojom.Url url; url.mojom.Url url;
string content_language; string content_language;
string cld_language; string model_detected_language;
bool is_cld_reliable; bool is_model_reliable;
bool has_notranslate; bool has_notranslate;
string html_root_language; string html_root_language;
string adopted_language; string adopted_language;
......
...@@ -102,10 +102,10 @@ bool StructTraits<translate::mojom::LanguageDetectionDetailsDataView, ...@@ -102,10 +102,10 @@ bool StructTraits<translate::mojom::LanguageDetectionDetailsDataView,
return false; return false;
if (!data.ReadContentLanguage(&out->content_language)) if (!data.ReadContentLanguage(&out->content_language))
return false; return false;
if (!data.ReadCldLanguage(&out->cld_language)) if (!data.ReadModelDetectedLanguage(&out->model_detected_language))
return false; return false;
out->is_cld_reliable = data.is_cld_reliable(); out->is_model_reliable = data.is_model_reliable();
out->has_notranslate = data.has_notranslate(); out->has_notranslate = data.has_notranslate();
if (!data.ReadHtmlRootLanguage(&out->html_root_language)) if (!data.ReadHtmlRootLanguage(&out->html_root_language))
......
...@@ -39,13 +39,13 @@ struct StructTraits<translate::mojom::LanguageDetectionDetailsDataView, ...@@ -39,13 +39,13 @@ struct StructTraits<translate::mojom::LanguageDetectionDetailsDataView,
return r.content_language; return r.content_language;
} }
static const std::string& cld_language( static const std::string& model_detected_language(
const translate::LanguageDetectionDetails& r) { const translate::LanguageDetectionDetails& r) {
return r.cld_language; return r.model_detected_language;
} }
static bool is_cld_reliable(const translate::LanguageDetectionDetails& r) { static bool is_model_reliable(const translate::LanguageDetectionDetails& r) {
return r.is_cld_reliable; return r.is_model_reliable;
} }
static bool has_notranslate(const translate::LanguageDetectionDetails& r) { static bool has_notranslate(const translate::LanguageDetectionDetails& r) {
......
...@@ -142,8 +142,8 @@ void TranslateAgent::PageCaptured(const base::string16& contents) { ...@@ -142,8 +142,8 @@ void TranslateAgent::PageCaptured(const base::string16& contents) {
details.time = base::Time::Now(); details.time = base::Time::Now();
details.url = web_detection_details.url; details.url = web_detection_details.url;
details.content_language = content_language; details.content_language = content_language;
details.cld_language = model_detected_language; details.model_detected_language = model_detected_language;
details.is_cld_reliable = is_model_reliable; details.is_model_reliable = is_model_reliable;
details.has_notranslate = web_detection_details.has_no_translate_meta; details.has_notranslate = web_detection_details.has_no_translate_meta;
details.html_root_language = html_lang; details.html_root_language = html_lang;
details.adopted_language = language; details.adopted_language = language;
......
...@@ -6,13 +6,11 @@ ...@@ -6,13 +6,11 @@
namespace translate { namespace translate {
LanguageDetectionDetails::LanguageDetectionDetails() LanguageDetectionDetails::LanguageDetectionDetails() = default;
: is_cld_reliable(false), has_notranslate(false) {
}
LanguageDetectionDetails::LanguageDetectionDetails( LanguageDetectionDetails::LanguageDetectionDetails(
const LanguageDetectionDetails& other) = default; const LanguageDetectionDetails& other) = default;
LanguageDetectionDetails::~LanguageDetectionDetails() {} LanguageDetectionDetails::~LanguageDetectionDetails() = default;
} // namespace translate } // namespace translate
...@@ -29,16 +29,16 @@ struct LanguageDetectionDetails { ...@@ -29,16 +29,16 @@ struct LanguageDetectionDetails {
// The language detected by the content (Content-Language). // The language detected by the content (Content-Language).
std::string content_language; std::string content_language;
// The language detected by CLD. // The language detected by the model.
std::string cld_language; std::string model_detected_language;
// Whether the CLD detection is reliable or not. // Whether the model detection is reliable or not.
bool is_cld_reliable; bool is_model_reliable = false;
// Whether the notranslate is specified in head tag as a meta; // Whether the notranslate is specified in head tag as a meta;
// <meta name="google" value="notranslate"> or // <meta name="google" value="notranslate"> or
// <meta name="google" content="notranslate">. // <meta name="google" content="notranslate">.
bool has_notranslate; bool has_notranslate = false;
// The language written in the lang attribute of the html element. // The language written in the lang attribute of the html element.
std::string html_root_language; std::string html_root_language;
......
...@@ -66,16 +66,16 @@ void ApplyLanguageCodeCorrection(std::string* code) { ...@@ -66,16 +66,16 @@ void ApplyLanguageCodeCorrection(std::string* code) {
language::ToTranslateLanguageSynonym(code); language::ToTranslateLanguageSynonym(code);
} }
// Checks if CLD can complement a sub code when the page language doesn't know // Checks if the model can complement a sub code when the page language doesn't
// the sub code. // know the sub code.
bool CanCLDComplementSubCode(const std::string& page_language, bool CanModelComplementSubCode(const std::string& page_language,
const std::string& cld_language) { const std::string& model_detected_language) {
// Translate server cannot treat general Chinese. If Content-Language and // Translate server cannot treat general Chinese. If Content-Language and
// CLD agree that the language is Chinese and Content-Language doesn't know // the detection model agree that the language is Chinese and Content-Language
// which dialect is used, CLD language has priority. // doesn't know which dialect is used, the model language has priority.
// TODO(hajimehoshi): How about the other dialects like zh-MO? // TODO(hajimehoshi): How about the other dialects like zh-MO?
return page_language == "zh" && return page_language == "zh" &&
base::StartsWith(cld_language, "zh-", base::StartsWith(model_detected_language, "zh-",
base::CompareCase::INSENSITIVE_ASCII); base::CompareCase::INSENSITIVE_ASCII);
} }
...@@ -121,10 +121,10 @@ std::string FilterDetectedLanguage(const std::string& utf8_text, ...@@ -121,10 +121,10 @@ std::string FilterDetectedLanguage(const std::string& utf8_text,
namespace translate { namespace translate {
// Returns the ISO 639 language code of the specified |utf8_text|, or 'unknown' // Returns the ISO 639 language code of the specified |utf8_text|, or 'unknown'
// if it failed. |is_cld_reliable| will be set as true if CLD says the detection // if it failed. |is_model_reliable| will be set as true if CLD says the
// is reliable. // detection is reliable.
std::string DetermineTextLanguage(const std::string& utf8_text, std::string DetermineTextLanguage(const std::string& utf8_text,
bool* is_cld_reliable) { bool* is_model_reliable) {
// Make a prediction. // Make a prediction.
base::TimeTicks lang_id_start = base::TimeTicks::Now(); base::TimeTicks lang_id_start = base::TimeTicks::Now();
chrome_lang_id::NNetLanguageIdentifier lang_id; chrome_lang_id::NNetLanguageIdentifier lang_id;
...@@ -145,8 +145,8 @@ std::string DetermineTextLanguage(const std::string& utf8_text, ...@@ -145,8 +145,8 @@ std::string DetermineTextLanguage(const std::string& utf8_text,
static_cast<int>(100 * lang_id_result.proportion)); static_cast<int>(100 * lang_id_result.proportion));
} }
if (is_cld_reliable != nullptr) { if (is_model_reliable != nullptr) {
*is_cld_reliable = is_detection_reliable; *is_model_reliable = is_detection_reliable;
} }
return FilterDetectedLanguage(utf8_text, detected_language, return FilterDetectedLanguage(utf8_text, detected_language,
is_detection_reliable); is_detection_reliable);
...@@ -155,26 +155,27 @@ std::string DetermineTextLanguage(const std::string& utf8_text, ...@@ -155,26 +155,27 @@ std::string DetermineTextLanguage(const std::string& utf8_text,
std::string DeterminePageLanguage(const std::string& code, std::string DeterminePageLanguage(const std::string& code,
const std::string& html_lang, const std::string& html_lang,
const base::string16& contents, const base::string16& contents,
std::string* cld_language_p, std::string* model_detected_language,
bool* is_cld_reliable_p) { bool* is_model_reliable) {
// First determine the language for the test contents. // First determine the language for the text contents.
bool is_cld_reliable; bool is_reliable;
const std::string utf8_text(base::UTF16ToUTF8(contents)); const std::string utf8_text(base::UTF16ToUTF8(contents));
std::string cld_language = DetermineTextLanguage(utf8_text, &is_cld_reliable); std::string detected_language =
if (cld_language_p != nullptr) DetermineTextLanguage(utf8_text, &is_reliable);
*cld_language_p = cld_language; if (model_detected_language != nullptr)
if (is_cld_reliable_p != nullptr) *model_detected_language = detected_language;
*is_cld_reliable_p = is_cld_reliable; if (is_model_reliable != nullptr)
language::ToTranslateLanguageSynonym(&cld_language); *is_model_reliable = is_reliable;
language::ToTranslateLanguageSynonym(&detected_language);
return DeterminePageLanguage(code, html_lang, cld_language, is_cld_reliable);
return DeterminePageLanguage(code, html_lang, detected_language, is_reliable);
} }
// Now consider the web page language details along with the contents language. // Now consider the web page language details along with the contents language.
std::string DeterminePageLanguage(const std::string& code, std::string DeterminePageLanguage(const std::string& code,
const std::string& html_lang, const std::string& html_lang,
const std::string& cld_language, const std::string& model_detected_language,
bool is_cld_reliable) { bool is_model_reliable) {
// Check if html lang attribute is valid. // Check if html lang attribute is valid.
std::string modified_html_lang; std::string modified_html_lang;
if (!html_lang.empty()) { if (!html_lang.empty()) {
...@@ -202,31 +203,31 @@ std::string DeterminePageLanguage(const std::string& code, ...@@ -202,31 +203,31 @@ std::string DeterminePageLanguage(const std::string& code,
if (language.empty()) { if (language.empty()) {
translate::ReportLanguageVerification( translate::ReportLanguageVerification(
translate::LANGUAGE_VERIFICATION_CLD_ONLY); translate::LANGUAGE_VERIFICATION_CLD_ONLY);
return cld_language; return model_detected_language;
} }
if (cld_language == kUnknownLanguageCode) { if (model_detected_language == kUnknownLanguageCode) {
translate::ReportLanguageVerification( translate::ReportLanguageVerification(
translate::LANGUAGE_VERIFICATION_UNKNOWN); translate::LANGUAGE_VERIFICATION_UNKNOWN);
return language; return language;
} }
if (CanCLDComplementSubCode(language, cld_language)) { if (CanModelComplementSubCode(language, model_detected_language)) {
translate::ReportLanguageVerification( translate::ReportLanguageVerification(
translate::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE); translate::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE);
return cld_language; return model_detected_language;
} }
if (IsSameOrSimilarLanguages(language, cld_language)) { if (IsSameOrSimilarLanguages(language, model_detected_language)) {
translate::ReportLanguageVerification( translate::ReportLanguageVerification(
translate::LANGUAGE_VERIFICATION_CLD_AGREE); translate::LANGUAGE_VERIFICATION_CLD_AGREE);
return language; return language;
} }
if (MaybeServerWrongConfiguration(language, cld_language)) { if (MaybeServerWrongConfiguration(language, model_detected_language)) {
translate::ReportLanguageVerification( translate::ReportLanguageVerification(
translate::LANGUAGE_VERIFICATION_TRUST_CLD); translate::LANGUAGE_VERIFICATION_TRUST_CLD);
return cld_language; return model_detected_language;
} }
// Content-Language value might be wrong because CLD says that this page is // Content-Language value might be wrong because CLD says that this page is
...@@ -298,33 +299,33 @@ bool IsValidLanguageCode(const std::string& code) { ...@@ -298,33 +299,33 @@ bool IsValidLanguageCode(const std::string& code) {
} }
bool IsSameOrSimilarLanguages(const std::string& page_language, bool IsSameOrSimilarLanguages(const std::string& page_language,
const std::string& cld_language) { const std::string& model_detected_language) {
std::vector<std::string> chunks = base::SplitString( std::vector<std::string> chunks = base::SplitString(
page_language, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); page_language, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
if (chunks.size() == 0) if (chunks.size() == 0)
return false; return false;
std::string page_language_main_part = chunks[0]; // Need copy. std::string page_language_main_part = chunks[0]; // Need copy.
chunks = base::SplitString( chunks = base::SplitString(model_detected_language, "-",
cld_language, "-", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
if (chunks.size() == 0) if (chunks.size() == 0)
return false; return false;
const std::string& cld_language_main_part = chunks[0]; const std::string& model_detected_language_main_part = chunks[0];
// Language code part of |page_language| is matched to one of |cld_language|. // Language code part of |page_language| is matched to one of
// Country code is ignored here. // |model_detected_language|. Country code is ignored here.
if (page_language_main_part == cld_language_main_part) { if (page_language_main_part == model_detected_language_main_part) {
// Languages are matched strictly. Reports false to metrics, but returns // Languages are matched strictly. Reports false to metrics, but returns
// true. // true.
translate::ReportSimilarLanguageMatch(false); translate::ReportSimilarLanguageMatch(false);
return true; return true;
} }
// Check if |page_language| and |cld_language| are in the similar language // Check if |page_language| and |model_detected_language| are in the similar
// list and belong to the same language group. // language list and belong to the same language group.
int page_code = GetSimilarLanguageGroupCode(page_language); int page_code = GetSimilarLanguageGroupCode(page_language);
bool match = page_code != 0 && bool match = page_code != 0 && page_code == GetSimilarLanguageGroupCode(
page_code == GetSimilarLanguageGroupCode(cld_language); model_detected_language);
translate::ReportSimilarLanguageMatch(match); translate::ReportSimilarLanguageMatch(match);
return match; return match;
...@@ -339,7 +340,7 @@ bool IsServerWrongConfigurationLanguage(const std::string& language_code) { ...@@ -339,7 +340,7 @@ bool IsServerWrongConfigurationLanguage(const std::string& language_code) {
} }
bool MaybeServerWrongConfiguration(const std::string& page_language, bool MaybeServerWrongConfiguration(const std::string& page_language,
const std::string& cld_language) { const std::string& model_detected_language) {
// If |page_language| is not "en-*", respect it and just return false here. // If |page_language| is not "en-*", respect it and just return false here.
if (!base::StartsWith(page_language, "en", if (!base::StartsWith(page_language, "en",
base::CompareCase::INSENSITIVE_ASCII)) base::CompareCase::INSENSITIVE_ASCII))
...@@ -347,10 +348,11 @@ bool MaybeServerWrongConfiguration(const std::string& page_language, ...@@ -347,10 +348,11 @@ bool MaybeServerWrongConfiguration(const std::string& page_language,
// A server provides a language meta information representing "en-*". But it // A server provides a language meta information representing "en-*". But it
// might be just a default value due to missing user configuration. // might be just a default value due to missing user configuration.
// Let's trust |cld_language| if the determined language is not difficult to // Let's trust |model_detected_language| if the determined language is not
// distinguish from English, and the language is one of well-known languages // difficult to distinguish from English, and the language is one of
// which often provide "en-*" meta information mistakenly. // well-known languages which often provide "en-*" meta information
return IsServerWrongConfigurationLanguage(cld_language); // mistakenly.
return IsServerWrongConfigurationLanguage(model_detected_language);
} }
} // namespace translate } // namespace translate
...@@ -12,26 +12,26 @@ ...@@ -12,26 +12,26 @@
namespace translate { namespace translate {
// Returns the ISO 639 language code of the specified |utf8_text|, or // Returns the ISO 639 language code of the specified |utf8_text|, or
// |translate::kUnknownLanguageCode| if it failed. |is_cld_reliable| will be // |translate::kUnknownLanguageCode| if it failed. |is_model_reliable| will be
// set as true if CLD says the detection is reliable. // set as true if CLD says the detection is reliable.
std::string DetermineTextLanguage(const std::string& utf8_text, std::string DetermineTextLanguage(const std::string& utf8_text,
bool* is_cld_reliable); bool* is_model_reliable);
// Determines content page language from Content-Language code and contents. // Determines content page language from Content-Language code and contents.
// Returns the contents language results in |cld_language_p| and // Returns the contents language results in |model_detected_language_p| and
// |is_cld_reliable_p|. // |is_model_reliable_p|.
std::string DeterminePageLanguage(const std::string& code, std::string DeterminePageLanguage(const std::string& code,
const std::string& html_lang, const std::string& html_lang,
const base::string16& contents, const base::string16& contents,
std::string* cld_language_p, std::string* model_detected_language,
bool* is_cld_reliable_p); bool* is_model_reliable);
// Determines content page language from Content-Language code and contents // Determines content page language from Content-Language code and contents
// language. // language.
std::string DeterminePageLanguage(const std::string& code, std::string DeterminePageLanguage(const std::string& code,
const std::string& html_lang, const std::string& html_lang,
const std::string& cld_language, const std::string& model_detected_language,
bool is_cld_reliable); bool is_model_reliable);
// Corrects language code if it contains well-known mistakes. // Corrects language code if it contains well-known mistakes.
// Called only by tests. // Called only by tests.
...@@ -46,13 +46,13 @@ bool IsValidLanguageCode(const std::string& code); ...@@ -46,13 +46,13 @@ bool IsValidLanguageCode(const std::string& code);
// distinguish. // distinguish.
// Called only by tests. // Called only by tests.
bool IsSameOrSimilarLanguages(const std::string& page_language, bool IsSameOrSimilarLanguages(const std::string& page_language,
const std::string& cld_language); const std::string& model_detected_language);
// Checks if languages pair is one of well-known pairs of wrong server // Checks if languages pair is one of well-known pairs of wrong server
// configuration. // configuration.
// Called only by tests. // Called only by tests.
bool MaybeServerWrongConfiguration(const std::string& page_language, bool MaybeServerWrongConfiguration(const std::string& page_language,
const std::string& cld_language); const std::string& model_detected_language);
// Returns true if the specified language often has the wrong server // Returns true if the specified language often has the wrong server
// configuration language, false otherwise. // configuration language, false otherwise.
......
...@@ -101,16 +101,14 @@ TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) { ...@@ -101,16 +101,14 @@ TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) {
"<body>This is a page apparently written in English. Even though " "<body>This is a page apparently written in English. Even though "
"content-language is provided, the value will be ignored if the value " "content-language is provided, the value will be ignored if the value "
"is suspicious.</body></html>"); "is suspicious.</body></html>");
std::string cld_language; std::string model_detected_language;
bool is_cld_reliable; bool is_model_reliable;
std::string language = translate::DeterminePageLanguage(std::string("ja"), std::string language = translate::DeterminePageLanguage(
std::string(), std::string("ja"), std::string(), contents, &model_detected_language,
contents, &is_model_reliable);
&cld_language,
&is_cld_reliable);
EXPECT_EQ(translate::kUnknownLanguageCode, language); EXPECT_EQ(translate::kUnknownLanguageCode, language);
EXPECT_EQ("en", cld_language); EXPECT_EQ("en", model_detected_language);
EXPECT_TRUE(is_cld_reliable); EXPECT_TRUE(is_model_reliable);
histogram_tester.ExpectTotalCount( histogram_tester.ExpectTotalCount(
"Translate.CLD3.TopLanguageEvaluationDuration", 1); "Translate.CLD3.TopLanguageEvaluationDuration", 1);
} }
...@@ -124,16 +122,14 @@ TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) { ...@@ -124,16 +122,14 @@ TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) {
"<body>This is a page apparently written in English. Even though " "<body>This is a page apparently written in English. Even though "
"content-language is provided, the value will be ignored if the value " "content-language is provided, the value will be ignored if the value "
"is suspicious.</body></html>"); "is suspicious.</body></html>");
std::string cld_language; std::string model_detected_language;
bool is_cld_reliable; bool is_model_reliable;
std::string language = translate::DeterminePageLanguage(std::string("en-US"), std::string language = translate::DeterminePageLanguage(
std::string(), std::string("en-US"), std::string(), contents, &model_detected_language,
contents, &is_model_reliable);
&cld_language,
&is_cld_reliable);
EXPECT_EQ("en", language); EXPECT_EQ("en", language);
EXPECT_EQ("en", cld_language); EXPECT_EQ("en", model_detected_language);
EXPECT_TRUE(is_cld_reliable); EXPECT_TRUE(is_model_reliable);
histogram_tester.ExpectTotalCount( histogram_tester.ExpectTotalCount(
"Translate.CLD3.TopLanguageEvaluationDuration", 1); "Translate.CLD3.TopLanguageEvaluationDuration", 1);
} }
...@@ -148,16 +144,14 @@ TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) { ...@@ -148,16 +144,14 @@ TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) {
"<body>This is a page apparently written in English. Even though " "<body>This is a page apparently written in English. Even though "
"content-language is provided, the value will be ignored and CLD's" "content-language is provided, the value will be ignored and CLD's"
" language will be adopted if the value is invalid.</body></html>"); " language will be adopted if the value is invalid.</body></html>");
std::string cld_language; std::string model_detected_language;
bool is_cld_reliable; bool is_model_reliable;
std::string language = translate::DeterminePageLanguage(std::string("utf-8"), std::string language = translate::DeterminePageLanguage(
std::string(), std::string("utf-8"), std::string(), contents, &model_detected_language,
contents, &is_model_reliable);
&cld_language,
&is_cld_reliable);
EXPECT_EQ("en", language); EXPECT_EQ("en", language);
EXPECT_EQ("en", cld_language); EXPECT_EQ("en", model_detected_language);
EXPECT_TRUE(is_cld_reliable); EXPECT_TRUE(is_model_reliable);
histogram_tester.ExpectTotalCount( histogram_tester.ExpectTotalCount(
"Translate.CLD3.TopLanguageEvaluationDuration", 1); "Translate.CLD3.TopLanguageEvaluationDuration", 1);
} }
...@@ -171,16 +165,14 @@ TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) { ...@@ -171,16 +165,14 @@ TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) {
"</head><body>This is a page apparently written in English. Even though " "</head><body>This is a page apparently written in English. Even though "
"content-language is provided, the value will be ignored if the value " "content-language is provided, the value will be ignored if the value "
"is suspicious.</body></html>"); "is suspicious.</body></html>");
std::string cld_language; std::string model_detected_language;
bool is_cld_reliable; bool is_model_reliable;
std::string language = translate::DeterminePageLanguage(std::string("ja"), std::string language = translate::DeterminePageLanguage(
std::string("en"), std::string("ja"), std::string("en"), contents, &model_detected_language,
contents, &is_model_reliable);
&cld_language,
&is_cld_reliable);
EXPECT_EQ("en", language); EXPECT_EQ("en", language);
EXPECT_EQ("en", cld_language); EXPECT_EQ("en", model_detected_language);
EXPECT_TRUE(is_cld_reliable); EXPECT_TRUE(is_model_reliable);
histogram_tester.ExpectTotalCount( histogram_tester.ExpectTotalCount(
"Translate.CLD3.TopLanguageEvaluationDuration", 1); "Translate.CLD3.TopLanguageEvaluationDuration", 1);
} }
......
...@@ -126,13 +126,13 @@ void LanguageDetectionController::OnTextRetrieved( ...@@ -126,13 +126,13 @@ void LanguageDetectionController::OnTextRetrieved(
const std::string& html_lang, const std::string& html_lang,
const GURL& url, const GURL& url,
const base::string16& text_content) { const base::string16& text_content) {
std::string cld_language; std::string model_detected_language;
bool is_cld_reliable; bool is_model_reliable;
std::string language = translate::DeterminePageLanguage( std::string language = translate::DeterminePageLanguage(
http_content_language, html_lang, http_content_language, html_lang,
GetStringByClippingLastWord(text_content, GetStringByClippingLastWord(text_content,
language_detection::kMaxIndexChars), language_detection::kMaxIndexChars),
&cld_language, &is_cld_reliable); &model_detected_language, &is_model_reliable);
if (language.empty()) if (language.empty())
return; // No language detected. return; // No language detected.
...@@ -143,8 +143,8 @@ void LanguageDetectionController::OnTextRetrieved( ...@@ -143,8 +143,8 @@ void LanguageDetectionController::OnTextRetrieved(
details.time = base::Time::Now(); details.time = base::Time::Now();
details.url = url; details.url = url;
details.content_language = http_content_language; details.content_language = http_content_language;
details.cld_language = cld_language; details.model_detected_language = model_detected_language;
details.is_cld_reliable = is_cld_reliable; details.is_model_reliable = is_model_reliable;
details.html_root_language = html_lang; details.html_root_language = html_lang;
details.adopted_language = language; details.adopted_language = language;
......
...@@ -111,8 +111,8 @@ TEST_F(LanguageDetectionControllerTest, OnTextCaptured) { ...@@ -111,8 +111,8 @@ TEST_F(LanguageDetectionControllerTest, OnTextCaptured) {
EXPECT_NE(nullptr, details); EXPECT_NE(nullptr, details);
EXPECT_EQ(kRootLanguage, details->html_root_language); EXPECT_EQ(kRootLanguage, details->html_root_language);
EXPECT_EQ(kContentLanguage, details->content_language); EXPECT_EQ(kContentLanguage, details->content_language);
EXPECT_FALSE(details->is_cld_reliable); EXPECT_FALSE(details->is_model_reliable);
EXPECT_EQ(kUndefined, details->cld_language); EXPECT_EQ(kUndefined, details->model_detected_language);
} }
// Tests that Content-Language response header is used if httpContentLanguage // Tests that Content-Language response header is used if httpContentLanguage
......
...@@ -398,9 +398,9 @@ cr.define('cr.translateInternals', function() { ...@@ -398,9 +398,9 @@ cr.define('cr.translateInternals', function() {
tr, formatLanguageCode(detail['content_language']), tr, formatLanguageCode(detail['content_language']),
'detection-logs-content-language'); 'detection-logs-content-language');
appendTD( appendTD(
tr, formatLanguageCode(detail['cld_language']), tr, formatLanguageCode(detail['model_detected_language']),
'detection-logs-cld-language'); 'detection-logs-cld-language');
appendTD(tr, detail['is_cld_reliable'], 'detection-logs-is-cld-reliable'); appendTD(tr, detail['is_model_reliable'], 'detection-logs-is-cld-reliable');
appendTD(tr, detail['has_notranslate'], 'detection-logs-has-notranslate'); appendTD(tr, detail['has_notranslate'], 'detection-logs-has-notranslate');
appendTD( appendTD(
tr, formatLanguageCode(detail['html_root_language']), tr, formatLanguageCode(detail['html_root_language']),
......
...@@ -92,8 +92,8 @@ void TranslateInternalsHandler::AddLanguageDetectionDetails( ...@@ -92,8 +92,8 @@ void TranslateInternalsHandler::AddLanguageDetectionDetails(
dict.SetDouble("time", details.time.ToJsTime()); dict.SetDouble("time", details.time.ToJsTime());
dict.SetString("url", details.url.spec()); dict.SetString("url", details.url.spec());
dict.SetString("content_language", details.content_language); dict.SetString("content_language", details.content_language);
dict.SetString("cld_language", details.cld_language); dict.SetString("model_detected_language", details.model_detected_language);
dict.SetBoolean("is_cld_reliable", details.is_cld_reliable); dict.SetBoolean("is_model_reliable", details.is_model_reliable);
dict.SetBoolean("has_notranslate", details.has_notranslate); dict.SetBoolean("has_notranslate", details.has_notranslate);
dict.SetString("html_root_language", details.html_root_language); dict.SetString("html_root_language", details.html_root_language);
dict.SetString("adopted_language", details.adopted_language); dict.SetString("adopted_language", details.adopted_language);
......
...@@ -29,9 +29,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { ...@@ -29,9 +29,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
base::string16 text( base::string16 text(
reinterpret_cast<const base::char16*>(data + lang_len + html_lang_len), reinterpret_cast<const base::char16*>(data + lang_len + html_lang_len),
text_len / 2); text_len / 2);
std::string cld_lang; std::string model_detected_language;
bool is_cld_reliable; bool is_model_reliable;
translate::DeterminePageLanguage(lang, html_lang, text, &cld_lang, translate::DeterminePageLanguage(
&is_cld_reliable); lang, html_lang, text, &model_detected_language, &is_model_reliable);
return 0; return 0;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment