Commit 11e2a159 authored by Michael Crouse's avatar Michael Crouse Committed by Chromium LUCI CQ

[LanguageDetection] Add the LanguageDetectionModel interface.

This change adds the language detection model interface to handle
a model file, memory map it to prepare for adding tflite model
loading and inference.

This change allows building the render/browser mojo connection
while the remaining necessary tflite libraries are added.

Bug: 1151413
Change-Id: Ic2fc2e9f40804c00d0b2d5d15efe52363e7aa4cf
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2587612Reviewed-by: default avatarSophie Chang <sophiechang@chromium.org>
Reviewed-by: default avatarScott Little <sclittle@chromium.org>
Reviewed-by: default avatarJosh Simmons <jds@google.com>
Commit-Queue: Michael Crouse <mcrouse@chromium.org>
Cr-Commit-Position: refs/heads/master@{#836448}
parent 87f6664b
......@@ -3,15 +3,58 @@
// found in the LICENSE file.
#include "base/run_loop.h"
#include "base/task/thread_pool/thread_pool_instance.h"
#include "base/test/metrics/histogram_tester.h"
#include "base/test/scoped_feature_list.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/translate/translate_model_service_factory.h"
#include "chrome/browser/ui/browser.h"
#include "chrome/test/base/in_process_browser_test.h"
#include "chrome/test/base/ui_test_utils.h"
#include "components/metrics/content/subprocess_metrics_provider.h"
#include "components/translate/core/common/translate_util.h"
#include "content/public/test/browser_test.h"
#include "content/public/test/browser_test_utils.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace {
// Fetch and calculate the total number of samples from all the bins for
// |histogram_name|. Note: from some browertests run (such as chromeos) there
// might be two profiles created, and this will return the total sample count
// across profiles.
int GetTotalHistogramSamples(const base::HistogramTester* histogram_tester,
const std::string& histogram_name) {
std::vector<base::Bucket> buckets =
histogram_tester->GetAllSamples(histogram_name);
int total = 0;
for (const auto& bucket : buckets)
total += bucket.count;
return total;
}
// Retries fetching |histogram_name| until it contains at least |count| samples.
int RetryForHistogramUntilCountReached(
const base::HistogramTester* histogram_tester,
const std::string& histogram_name,
int count) {
while (true) {
base::ThreadPoolInstance::Get()->FlushForTesting();
base::RunLoop().RunUntilIdle();
int total = GetTotalHistogramSamples(histogram_tester, histogram_name);
if (total >= count)
return total;
content::FetchHistogramsFromChildProcesses();
metrics::SubprocessMetricsProvider::MergeHistogramDeltasForTesting();
base::RunLoop().RunUntilIdle();
}
}
} // namespace
using TranslateModelServiceDisabledBrowserTest = InProcessBrowserTest;
IN_PROC_BROWSER_TEST_F(TranslateModelServiceDisabledBrowserTest,
......@@ -20,6 +63,16 @@ IN_PROC_BROWSER_TEST_F(TranslateModelServiceDisabledBrowserTest,
TranslateModelServiceFactory::GetForProfile(browser()->profile()));
}
IN_PROC_BROWSER_TEST_F(TranslateModelServiceDisabledBrowserTest,
LanguageDetectionModelNotCreated) {
base::HistogramTester histogram_tester;
ui_test_utils::NavigateToURL(browser(), GURL("https://test.com"));
RetryForHistogramUntilCountReached(
&histogram_tester, "Translate.CLD3.TopLanguageEvaluationDuration", 1);
histogram_tester.ExpectTotalCount(
"LanguageDetection.TFLiteModel.WasModelAvailableForDetection", 0);
}
class TranslateModelServiceBrowserTest
: public TranslateModelServiceDisabledBrowserTest {
public:
......@@ -45,3 +98,14 @@ IN_PROC_BROWSER_TEST_F(TranslateModelServiceBrowserTest,
EXPECT_TRUE(TranslateModelServiceFactory::GetForProfile(
browser()->profile()->GetPrimaryOTRProfile()));
}
IN_PROC_BROWSER_TEST_F(TranslateModelServiceBrowserTest,
LanguageDetectionModelCreated) {
base::HistogramTester histogram_tester;
ui_test_utils::NavigateToURL(browser(), GURL("https://test.com"));
RetryForHistogramUntilCountReached(
&histogram_tester,
"LanguageDetection.TFLiteModel.WasModelAvailableForDetection", 1);
histogram_tester.ExpectUniqueSample(
"LanguageDetection.TFLiteModel.WasModelAvailableForDetection", false, 1);
}
......@@ -13,6 +13,7 @@
#include "base/json/string_escape.h"
#include "base/location.h"
#include "base/metrics/histogram_macros.h"
#include "base/metrics/histogram_macros_local.h"
#include "base/notreached.h"
#include "base/single_thread_task_runner.h"
#include "base/strings/string16.h"
......@@ -22,6 +23,7 @@
#include "components/translate/core/common/translate_constants.h"
#include "components/translate/core/common/translate_metrics.h"
#include "components/translate/core/common/translate_util.h"
#include "components/translate/core/language_detection/language_detection_model.h"
#include "components/translate/core/language_detection/language_detection_util.h"
#include "content/public/common/content_constants.h"
#include "content/public/common/url_constants.h"
......@@ -58,6 +60,14 @@ const int kTranslateStatusCheckDelayMs = 400;
// Language name passed to the Translate element for it to detect the language.
const char kAutoDetectionLanguage[] = "auto";
// Returns the language detection model that is shared across the RenderFrames
// in the renderer.
translate::LanguageDetectionModel& GetLanguageDetectionModel() {
static base::NoDestructor<translate::LanguageDetectionModel> instance;
return *instance;
}
} // namespace
namespace translate {
......@@ -100,22 +110,40 @@ void TranslateAgent::PageCaptured(const base::string16& contents) {
WebLanguageDetectionDetails::CollectLanguageDetectionDetails(document);
std::string content_language = web_detection_details.content_language.Utf8();
std::string html_lang = web_detection_details.html_language.Utf8();
std::string cld_language;
bool is_cld_reliable;
std::string language = DeterminePageLanguage(
content_language, html_lang, contents, &cld_language, &is_cld_reliable);
std::string model_detected_language;
bool is_model_reliable;
std::string language;
if (translate::IsTFLiteLanguageDetectionEnabled()) {
translate::LanguageDetectionModel& language_detection_model =
GetLanguageDetectionModel();
bool is_available = language_detection_model.IsAvailable();
language = is_available ? language_detection_model.DeterminePageLanguage(
content_language, html_lang, contents,
&model_detected_language, &is_model_reliable)
: translate::kUnknownLanguageCode;
LOCAL_HISTOGRAM_BOOLEAN(
"LanguageDetection.TFLiteModel.WasModelAvailableForDetection",
is_available);
} else {
language =
DeterminePageLanguage(content_language, html_lang, contents,
&model_detected_language, &is_model_reliable);
}
if (language.empty())
return;
language_determined_time_ = base::TimeTicks::Now();
// TODO(crbug.com/1157983): Update the language detection details struct to be
// model agnostic.
LanguageDetectionDetails details;
details.time = base::Time::Now();
details.url = web_detection_details.url;
details.content_language = content_language;
details.cld_language = cld_language;
details.is_cld_reliable = is_cld_reliable;
details.cld_language = model_detected_language;
details.is_cld_reliable = is_model_reliable;
details.has_notranslate = web_detection_details.has_no_translate_meta;
details.html_root_language = html_lang;
details.adopted_language = language;
......
......@@ -13,6 +13,8 @@
namespace translate {
// TODO(crbug.com/1157983): Update the language detection details struct to be
// model agnostic.
struct LanguageDetectionDetails {
LanguageDetectionDetails();
LanguageDetectionDetails(const LanguageDetectionDetails& other);
......
......@@ -18,6 +18,8 @@ static_library("chinese_script_classifier") {
static_library("language_detection") {
sources = [
"language_detection_model.cc",
"language_detection_model.h",
"language_detection_util.cc",
"language_detection_util.h",
]
......@@ -37,6 +39,7 @@ source_set("unit_tests") {
testonly = true
sources = [
"chinese_script_classifier_test.cc",
"language_detection_model_unittest.cc",
"language_detection_util_unittest.cc",
]
deps = [
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/translate/core/language_detection/language_detection_model.h"
#include "base/files/memory_mapped_file.h"
#include "base/metrics/histogram_macros_local.h"
#include "components/translate/core/common/translate_constants.h"
namespace translate {
LanguageDetectionModel::LanguageDetectionModel() = default;
LanguageDetectionModel::~LanguageDetectionModel() = default;
void LanguageDetectionModel::UpdateWithFile(base::File model_file) {
// TODO(crbug.com/1157661): Update to be full histograms.
if (!model_file.IsValid()) {
LOCAL_HISTOGRAM_ENUMERATION(
"LanguageDetection.TFLiteModel.LanguageDetectionModelState",
LanguageDetectionModelState::kModelFileInvalid);
return;
}
if (!model_fb_.Initialize(std::move(model_file))) {
LOCAL_HISTOGRAM_ENUMERATION(
"LanguageDetection.TFLiteModel.LanguageDetectionModelState",
LanguageDetectionModelState::kModelFileInvalid);
return;
}
LOCAL_HISTOGRAM_ENUMERATION(
"LanguageDetection.TFLiteModel.LanguageDetectionModelState",
LanguageDetectionModelState::kModelFileValidAndMemoryMapped);
// TODO(crbug.com/1151413): Initialize tflite classifier with the provided
// language detection model in |model_fb_|.
}
bool LanguageDetectionModel::IsAvailable() const {
return model_fb_.IsValid();
}
std::string LanguageDetectionModel::DeterminePageLanguage(
const std::string& code,
const std::string& html_lang,
const base::string16& contents,
std::string* predicted_language,
bool* is_prediction_reliable) const {
DCHECK(IsAvailable());
// TODO(crbug.com/1151413): Execute the tflite language detection
// model and finalize the result with the language detection utilty.
LOCAL_HISTOGRAM_BOOLEAN("LanguageDetection.TFLite.DidDetectPageLanguage",
true);
*is_prediction_reliable = false;
*predicted_language = translate::kUnknownLanguageCode;
return translate::kUnknownLanguageCode;
}
} // namespace translate
\ No newline at end of file
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_TRANSLATE_CORE_LANGUAGE_DETECTION_LANGUAGE_DETECTION_MODEL_H_
#define COMPONENTS_TRANSLATE_CORE_LANGUAGE_DETECTION_LANGUAGE_DETECTION_MODEL_H_
#include <string>
#include "base/files/memory_mapped_file.h"
namespace translate {
// The state of the language detection model file needed for determining
// the language of the page.
enum class LanguageDetectionModelState {
// The language model state is not known.
kUnknown,
// The provided model file was not valid.
kModelFileInvalid,
// The language model is memory-mapped and available for
// use with TFLite.
kModelFileValidAndMemoryMapped,
// New values above this line.
kMaxValue = kModelFileValidAndMemoryMapped,
};
// A language detection model that will use a TFLite model to determine the
// language of the content of the web page.
class LanguageDetectionModel {
public:
LanguageDetectionModel();
~LanguageDetectionModel();
// Updates the language detection model for use by memory-mapping
// |model_file| used to detect the language of the page.
void UpdateWithFile(base::File model_file);
// Returns whether |this| is initialized and is available to handle requests
// to determine the language of the page.
bool IsAvailable() const;
// Determines content page language from Content-Language code and contents.
// Returns the contents language results in |predicted_language| and
// |is_prediction_reliable|.
std::string DeterminePageLanguage(const std::string& code,
const std::string& html_lang,
const base::string16& contents,
std::string* predicted_language,
bool* is_prediction_reliable) const;
private:
// A memory-mapped file that contains the TFLite model used for
// determining the language of a page. This must be valid in order
// to evaluate the model owned by |this|.
//
// TODO(crbug.com/1151413): Add the tflite language detection model.
base::MemoryMappedFile model_fb_;
};
} // namespace translate
#endif // COMPONENTS_TRANSLATE_CORE_LANGUAGE_DETECTION_LANGUAGE_DETECTION_MODEL_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/translate/core/language_detection/language_detection_model.h"
#include "base/files/file_util.h"
#include "base/files/scoped_temp_dir.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/metrics/histogram_tester.h"
#include "components/translate/core/common/translate_constants.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace translate {
base::File CreateValidModelFile() {
base::ScopedTempDir temp_dir;
EXPECT_TRUE(temp_dir.CreateUniqueTempDir());
base::FilePath file_path =
temp_dir.GetPath().AppendASCII("model_file.tflite");
base::File file(file_path, (base::File::FLAG_CREATE | base::File::FLAG_READ |
base::File::FLAG_WRITE |
base::File::FLAG_CAN_DELETE_ON_CLOSE));
EXPECT_TRUE(file.WriteAtCurrentPos("12345", 5));
return file;
}
TEST(LanguageDetectionModelTest, ModelUnavailable) {
LanguageDetectionModel language_detection_model;
EXPECT_FALSE(language_detection_model.IsAvailable());
}
TEST(LanguageDetectionModelTest, InvalidFileProvided) {
base::HistogramTester histogram_tester;
LanguageDetectionModel language_detection_model;
language_detection_model.UpdateWithFile(base::File());
EXPECT_FALSE(language_detection_model.IsAvailable());
histogram_tester.ExpectUniqueSample(
"LanguageDetection.TFLiteModel.LanguageDetectionModelState",
LanguageDetectionModelState::kModelFileInvalid, 1);
}
TEST(LanguageDetectionModelTest, ValidFileProvided) {
base::HistogramTester histogram_tester;
base::File file = CreateValidModelFile();
LanguageDetectionModel language_detection_model;
language_detection_model.UpdateWithFile(std::move(file));
EXPECT_TRUE(language_detection_model.IsAvailable());
histogram_tester.ExpectUniqueSample(
"LanguageDetection.TFLiteModel.LanguageDetectionModelState",
LanguageDetectionModelState::kModelFileValidAndMemoryMapped, 1);
}
TEST(LanguageDetectionModelTest, DeterminePageLanguage) {
base::HistogramTester histogram_tester;
base::File file = CreateValidModelFile();
LanguageDetectionModel language_detection_model;
language_detection_model.UpdateWithFile(std::move(file));
EXPECT_TRUE(language_detection_model.IsAvailable());
bool is_prediction_reliable;
std::string predicted_language;
base::string16 contents =
base::ASCIIToUTF16("This is a page apparently written in English.");
std::string language = language_detection_model.DeterminePageLanguage(
std::string("ja"), std::string(), contents, &predicted_language,
&is_prediction_reliable);
EXPECT_FALSE(is_prediction_reliable);
EXPECT_EQ(translate::kUnknownLanguageCode, predicted_language);
EXPECT_EQ(translate::kUnknownLanguageCode, language);
histogram_tester.ExpectUniqueSample(
"LanguageDetection.TFLite.DidDetectPageLanguage", true, 1);
}
} // namespace translate
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment