Commit fe9e71d8 authored by Michael Crouse's avatar Michael Crouse Committed by Commit Bot

[Translate] Record CLD3 model evaluation time.

This change adds a histogram to record the time for evaluating the
current language detection model (CLD3). This will help planning
for an updated model and provide a baseline to compare that with.

Bug: 1121680
Change-Id: I0270ce12a656fc34ad1b963d1a774efc87677e0c
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2376305
Auto-Submit: Michael Crouse <mcrouse@chromium.org>
Reviewed-by: default avatarMegan Jablonski <megjablon@chromium.org>
Reviewed-by: default avatarTarun Bansal <tbansal@chromium.org>
Commit-Queue: Michael Crouse <mcrouse@chromium.org>
Cr-Commit-Position: refs/heads/master@{#801901}
parent 536ec4e9
......@@ -43,6 +43,7 @@ source_set("unit_tests") {
":chinese_script_classifier",
":language_detection",
"//base",
"//base/test:test_support",
"//components/translate/core/common",
"//testing/gtest",
"//third_party/icu",
......
......@@ -92,9 +92,12 @@ std::string DetermineTextLanguage(const base::string16& text,
const std::string utf8_text(base::UTF16ToUTF8(text));
// Make a prediction.
base::TimeTicks lang_id_start = base::TimeTicks::Now();
chrome_lang_id::NNetLanguageIdentifier lang_id;
const chrome_lang_id::NNetLanguageIdentifier::Result lang_id_result =
lang_id.FindTopNMostFreqLangs(utf8_text, /*num_langs=*/1).at(0);
base::UmaHistogramTimes("Translate.CLD3.TopLanguageEvaluationDuration",
base::TimeTicks::Now() - lang_id_start);
const bool prediction_reliable = lang_id_result.is_reliable;
const std::string& predicted_language = lang_id_result.language;
......
......@@ -6,6 +6,7 @@
#include "base/strings/string16.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/metrics/histogram_tester.h"
#include "components/translate/core/common/translate_constants.h"
#include "testing/gtest/include/gtest/gtest.h"
......@@ -94,6 +95,7 @@ TEST_F(LanguageDetectionUtilTest, WellKnownWrongConfiguration) {
// Tests that the language meta tag providing wrong information is ignored by
// LanguageDetectionUtil due to disagreement between meta tag and CLD.
TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) {
base::HistogramTester histogram_tester;
base::string16 contents = base::ASCIIToUTF16(
"<html><head><meta http-equiv='Content-Language' content='ja'></head>"
"<body>This is a page apparently written in English. Even though "
......@@ -109,11 +111,14 @@ TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) {
EXPECT_EQ(translate::kUnknownLanguageCode, language);
EXPECT_EQ("en", cld_language);
EXPECT_TRUE(is_cld_reliable);
histogram_tester.ExpectTotalCount(
"Translate.CLD3.TopLanguageEvaluationDuration", 1);
}
// Tests that the language meta tag providing "en-US" style information is
// agreed by CLD.
TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) {
base::HistogramTester histogram_tester;
base::string16 contents = base::ASCIIToUTF16(
"<html><head><meta http-equiv='Content-Language' content='en-US'></head>"
"<body>This is a page apparently written in English. Even though "
......@@ -129,12 +134,15 @@ TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) {
EXPECT_EQ("en", language);
EXPECT_EQ("en", cld_language);
EXPECT_TRUE(is_cld_reliable);
histogram_tester.ExpectTotalCount(
"Translate.CLD3.TopLanguageEvaluationDuration", 1);
}
// Tests that the language meta tag providing wrong information is ignored and
// CLD's language will be adopted by LanguageDetectionUtil due to an invalid
// meta tag.
TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) {
base::HistogramTester histogram_tester;
base::string16 contents = base::ASCIIToUTF16(
"<html><head><meta http-equiv='Content-Language' content='utf-8'></head>"
"<body>This is a page apparently written in English. Even though "
......@@ -150,11 +158,14 @@ TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) {
EXPECT_EQ("en", language);
EXPECT_EQ("en", cld_language);
EXPECT_TRUE(is_cld_reliable);
histogram_tester.ExpectTotalCount(
"Translate.CLD3.TopLanguageEvaluationDuration", 1);
}
// Tests that the language meta tag providing wrong information is ignored
// because of valid html lang attribute.
TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) {
base::HistogramTester histogram_tester;
base::string16 contents = base::ASCIIToUTF16(
"<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>"
"</head><body>This is a page apparently written in English. Even though "
......@@ -170,6 +181,8 @@ TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) {
EXPECT_EQ("en", language);
EXPECT_EQ("en", cld_language);
EXPECT_TRUE(is_cld_reliable);
histogram_tester.ExpectTotalCount(
"Translate.CLD3.TopLanguageEvaluationDuration", 1);
}
// Tests that languages that often have the wrong server configuration are
......
......@@ -183796,6 +183796,16 @@ should be kept until we use this API. -->
</summary>
</histogram>
<histogram name="Translate.CLD3.TopLanguageEvaluationDuration" units="ms"
expires_after="2021-02-28">
<owner>mcrouse@chromium.org</owner>
<owner>chrome-language@google.com</owner>
<summary>
The time to evaluate the CLD3 language detection model for the top language.
This information is logged on every request.
</summary>
</histogram>
<histogram name="Translate.CompactInfobar.Event" enum="TranslateCompactUIEvent"
expires_after="2021-01-31">
<owner>anthonyvd@chromium.org</owner>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment