Commit cf9fd685 authored by Doug Arnett's avatar Doug Arnett Committed by Commit Bot

Creates a utility service for text language detection

This is part of supporting translation for sub frames.
Specifically this moves the analysis of the untrusted contents
obtained from an accessiblity tree snapshot out of the browser
process and into a sandboxed utility process.

Bug: 1063520
Change-Id: I0792d209541558a753f168f78f421e66745748d9
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2140605
Commit-Queue: Doug Arnett <dougarnett@chromium.org>
Reviewed-by: default avataranthonyvd <anthonyvd@chromium.org>
Reviewed-by: default avatarMegan Jablonski <megjablon@chromium.org>
Reviewed-by: default avatarScott Violet <sky@chromium.org>
Reviewed-by: default avatarKen Rockot <rockot@google.com>
Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Cr-Commit-Position: refs/heads/master@{#760958}
parent 7b1a02b9
...@@ -2128,6 +2128,8 @@ jumbo_static_library("browser") { ...@@ -2128,6 +2128,8 @@ jumbo_static_library("browser") {
"//components/security_state/core", "//components/security_state/core",
"//components/send_tab_to_self", "//components/send_tab_to_self",
"//components/services/heap_profiling", "//components/services/heap_profiling",
"//components/services/language_detection/public/cpp",
"//components/services/language_detection/public/mojom",
"//components/services/patch/content", "//components/services/patch/content",
"//components/services/quarantine", "//components/services/quarantine",
"//components/services/quarantine/public/mojom", "//components/services/quarantine/public/mojom",
......
...@@ -226,6 +226,8 @@ include_rules = [ ...@@ -226,6 +226,8 @@ include_rules = [
"+components/services/app_service/public", "+components/services/app_service/public",
"+components/services/filesystem/public/mojom", "+components/services/filesystem/public/mojom",
"+components/services/heap_profiling", "+components/services/heap_profiling",
"+components/services/language_detection/public/cpp",
"+components/services/language_detection/public/mojom",
"+components/services/patch/content", "+components/services/patch/content",
"+components/services/patch/public", "+components/services/patch/public",
"+components/services/print_compositor/public", "+components/services/print_compositor/public",
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <string>
#include "base/run_loop.h"
#include "base/strings/string16.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/bind_test_util.h"
#include "chrome/test/base/in_process_browser_test.h"
#include "components/services/language_detection/public/cpp/language_detection_service.h"
#include "testing/gtest/include/gtest/gtest.h"
class LanguageDetectionServiceTest : public InProcessBrowserTest {};
IN_PROC_BROWSER_TEST_F(LanguageDetectionServiceTest,
DetermineLanguageReliable) {
mojo::Remote<language_detection::mojom::LanguageDetectionService> service =
language_detection::LaunchLanguageDetectionService();
base::string16 text = base::UTF8ToUTF16(
"El niño atrapó un dorado muy grande con cebo vivo. Fileteó el "
"pescado y lo asó a la parrilla. Sabía excelente. Espera pescar otro "
"buen pescado mañana.");
base::RunLoop run_loop;
service->DetermineLanguage(
text, base::BindLambdaForTesting(
[&](const std::string& language, bool is_reliable) {
EXPECT_EQ("es", language);
EXPECT_TRUE(is_reliable);
run_loop.Quit();
}));
run_loop.Run();
}
IN_PROC_BROWSER_TEST_F(LanguageDetectionServiceTest,
DetermineLanguageUndeterminedUnreliable) {
mojo::Remote<language_detection::mojom::LanguageDetectionService> service =
language_detection::LaunchLanguageDetectionService();
base::string16 text = base::UTF8ToUTF16("Not enough text for detection");
base::RunLoop run_loop;
service->DetermineLanguage(
text, base::BindLambdaForTesting(
[&](const std::string& language, bool is_reliable) {
EXPECT_EQ("und", language);
EXPECT_FALSE(is_reliable);
run_loop.Quit();
}));
run_loop.Run();
}
...@@ -681,6 +681,7 @@ if (!is_android) { ...@@ -681,6 +681,7 @@ if (!is_android) {
"//components/resources", "//components/resources",
"//components/safe_browsing:buildflags", "//components/safe_browsing:buildflags",
"//components/safe_browsing/core/db:test_database_manager", "//components/safe_browsing/core/db:test_database_manager",
"//components/services/language_detection/public/cpp",
"//components/services/patch/public/mojom", "//components/services/patch/public/mojom",
"//components/services/quarantine:test_support", "//components/services/quarantine:test_support",
"//components/signin/core/browser", "//components/signin/core/browser",
...@@ -1228,6 +1229,7 @@ if (!is_android) { ...@@ -1228,6 +1229,7 @@ if (!is_android) {
"../browser/task_manager/task_manager_tester.h", "../browser/task_manager/task_manager_tester.h",
"../browser/themes/theme_service_browsertest.cc", "../browser/themes/theme_service_browsertest.cc",
"../browser/tracing/chrome_tracing_delegate_browsertest.cc", "../browser/tracing/chrome_tracing_delegate_browsertest.cc",
"../browser/translate/language_detection_service_browsertest.cc",
"../browser/translate/translate_manager_browsertest.cc", "../browser/translate/translate_manager_browsertest.cc",
"../browser/ui/ask_google_for_suggestions_dialog_browsertest.cc", "../browser/ui/ask_google_for_suggestions_dialog_browsertest.cc",
"../browser/ui/autofill/payments/card_unmask_prompt_view_browsertest.cc", "../browser/ui/autofill/payments/card_unmask_prompt_view_browsertest.cc",
......
...@@ -43,6 +43,7 @@ include_rules = [ ...@@ -43,6 +43,7 @@ include_rules = [
"+components/safe_browsing/buildflags.h", "+components/safe_browsing/buildflags.h",
"+components/safe_browsing/core/db", "+components/safe_browsing/core/db",
"+components/search_engines", "+components/search_engines",
"+components/services/language_detection/public/cpp",
"+components/services/quarantine", "+components/services/quarantine",
"+components/sessions", "+components/sessions",
"+components/signin/public", "+components/signin/public",
......
...@@ -38,6 +38,7 @@ static_library("utility") { ...@@ -38,6 +38,7 @@ static_library("utility") {
"//components/paint_preview/buildflags", "//components/paint_preview/buildflags",
"//components/safe_browsing:buildflags", "//components/safe_browsing:buildflags",
"//components/search_engines", "//components/search_engines",
"//components/services/language_detection",
"//components/services/paint_preview_compositor", "//components/services/paint_preview_compositor",
"//components/services/patch:lib", "//components/services/patch:lib",
"//components/services/unzip:lib", "//components/services/unzip:lib",
......
...@@ -27,6 +27,7 @@ include_rules = [ ...@@ -27,6 +27,7 @@ include_rules = [
"+components/paint_preview/buildflags", "+components/paint_preview/buildflags",
"+components/payments/content/utility", "+components/payments/content/utility",
"+components/safe_browsing/buildflags.h", "+components/safe_browsing/buildflags.h",
"+components/services/language_detection",
"+components/services/paint_preview_compositor", "+components/services/paint_preview_compositor",
"+components/services/patch", "+components/services/patch",
"+components/services/print_compositor", "+components/services/print_compositor",
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
#include "build/build_config.h" #include "build/build_config.h"
#include "components/paint_preview/buildflags/buildflags.h" #include "components/paint_preview/buildflags/buildflags.h"
#include "components/safe_browsing/buildflags.h" #include "components/safe_browsing/buildflags.h"
#include "components/services/language_detection/language_detection_service_impl.h"
#include "components/services/language_detection/public/mojom/language_detection.mojom.h"
#include "components/services/patch/file_patcher_impl.h" #include "components/services/patch/file_patcher_impl.h"
#include "components/services/patch/public/mojom/file_patcher.mojom.h" #include "components/services/patch/public/mojom/file_patcher.mojom.h"
#include "components/services/unzip/public/mojom/unzipper.mojom.h" #include "components/services/unzip/public/mojom/unzipper.mojom.h"
...@@ -97,6 +99,13 @@ auto RunUnzipper(mojo::PendingReceiver<unzip::mojom::Unzipper> receiver) { ...@@ -97,6 +99,13 @@ auto RunUnzipper(mojo::PendingReceiver<unzip::mojom::Unzipper> receiver) {
return std::make_unique<unzip::UnzipperImpl>(std::move(receiver)); return std::make_unique<unzip::UnzipperImpl>(std::move(receiver));
} }
auto RunLanguageDetectionService(
mojo::PendingReceiver<language_detection::mojom::LanguageDetectionService>
receiver) {
return std::make_unique<language_detection::LanguageDetectionServiceImpl>(
std::move(receiver));
}
#if defined(OS_WIN) #if defined(OS_WIN)
auto RunQuarantineService( auto RunQuarantineService(
mojo::PendingReceiver<quarantine::mojom::Quarantine> receiver) { mojo::PendingReceiver<quarantine::mojom::Quarantine> receiver) {
...@@ -238,6 +247,7 @@ mojo::ServiceFactory* GetMainThreadServiceFactory() { ...@@ -238,6 +247,7 @@ mojo::ServiceFactory* GetMainThreadServiceFactory() {
static base::NoDestructor<mojo::ServiceFactory> factory { static base::NoDestructor<mojo::ServiceFactory> factory {
RunFilePatcher, RunFilePatcher,
RunUnzipper, RunUnzipper,
RunLanguageDetectionService,
#if !defined(OS_ANDROID) #if !defined(OS_ANDROID)
RunProfileImporter, RunProfileImporter,
......
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import("//build/config/features.gni")
source_set("language_detection") {
sources = [
"language_detection_service_impl.cc",
"language_detection_service_impl.h",
]
deps = [
"//base",
"//components/translate/core/language_detection",
"//mojo/public/cpp/bindings",
]
public_deps = [
"//components/services/language_detection/public/mojom",
"//mojo/public/mojom/base",
]
}
include_rules = [
"+components/translate/core/language_detection",
]
file://components/translate/OWNERS
dougarnett@chromium.org
# COMPONENT: UI>Browser>Language>Translate
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/services/language_detection/language_detection_service_impl.h"
#include <string>
#include "components/translate/core/language_detection/language_detection_util.h"
namespace language_detection {
LanguageDetectionServiceImpl::LanguageDetectionServiceImpl(
mojo::PendingReceiver<mojom::LanguageDetectionService> receiver)
: receiver_(this, std::move(receiver)) {}
LanguageDetectionServiceImpl::~LanguageDetectionServiceImpl() = default;
void LanguageDetectionServiceImpl::DetermineLanguage(
const ::base::string16& text,
DetermineLanguageCallback callback) {
bool is_cld_reliable = false;
std::string cld_language =
translate::DetermineTextLanguage(text, &is_cld_reliable);
std::move(callback).Run(cld_language, is_cld_reliable);
}
} // namespace language_detection
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_SERVICES_LANGUAGE_DETECTION_LANGUAGE_DETECTION_SERVICE_IMPL_H_
#define COMPONENTS_SERVICES_LANGUAGE_DETECTION_LANGUAGE_DETECTION_SERVICE_IMPL_H_
#include "base/strings/string16.h"
#include "components/services/language_detection/public/mojom/language_detection.mojom.h"
#include "mojo/public/cpp/bindings/pending_receiver.h"
#include "mojo/public/cpp/bindings/receiver.h"
namespace language_detection {
// Language Detection Service implementation.
//
// This service implementation analyzes text content to determine the most
// likely language for it.
// It is intended to operate in an out-of-browser-process service.
class LanguageDetectionServiceImpl : public mojom::LanguageDetectionService {
public:
explicit LanguageDetectionServiceImpl(
mojo::PendingReceiver<mojom::LanguageDetectionService> receiver);
~LanguageDetectionServiceImpl() override;
private:
// chrome::mojom::LanguageDetectionService override.
void DetermineLanguage(const ::base::string16& text,
DetermineLanguageCallback callback) override;
mojo::Receiver<mojom::LanguageDetectionService> receiver_;
DISALLOW_COPY_AND_ASSIGN(LanguageDetectionServiceImpl);
};
} // namespace language_detection
#endif // COMPONENTS_SERVICES_LANGUAGE_DETECTION_LANGUAGE_DETECTION_SERVICE_IMPL_H_
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
source_set("cpp") {
sources = [
"language_detection_service.cc",
"language_detection_service.h",
]
deps = [
"//base",
"//components/services/language_detection/public/mojom",
"//content/public/browser",
]
}
include_rules = [
"+content/public/browser/service_process_host.h",
]
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/services/language_detection/public/cpp/language_detection_service.h"
#include "content/public/browser/service_process_host.h"
namespace language_detection {
mojo::Remote<mojom::LanguageDetectionService> LaunchLanguageDetectionService() {
return content::ServiceProcessHost::Launch<mojom::LanguageDetectionService>(
content::ServiceProcessHost::Options()
.WithSandboxType(service_manager::SandboxType::kUtility)
.WithDisplayName("Translate Language Detection")
.Pass());
}
} // namespace language_detection
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_SERVICES_LANGUAGE_DETECTION_PUBLIC_CPP_LANGUAGE_DETECTION_SERVICE_H_
#define COMPONENTS_SERVICES_LANGUAGE_DETECTION_PUBLIC_CPP_LANGUAGE_DETECTION_SERVICE_H_
#include "components/services/language_detection/public/mojom/language_detection.mojom.h"
#include "mojo/public/cpp/bindings/remote.h"
namespace language_detection {
// Launches a new instance of the LanguageDetectionService in an isolated,
// sandboxed process, and returns a remote interface to control the service. The
// lifetime of the process is tied to that of the Remote. May be called from any
// thread.
mojo::Remote<mojom::LanguageDetectionService> LaunchLanguageDetectionService();
} // namespace language_detection
#endif // COMPONENTS_SERVICES_LANGUAGE_DETECTION_PUBLIC_CPP_LANGUAGE_DETECTION_SERVICE_H_
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import("//build/config/features.gni")
import("//mojo/public/tools/bindings/mojom.gni")
mojom("mojom") {
sources = [ "language_detection.mojom" ]
public_deps = [ "//mojo/public/mojom/base" ]
}
per-file *.mojom=set noparent
per-file *.mojom=file://ipc/SECURITY_OWNERS
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
module language_detection.mojom;
import "mojo/public/mojom/base/string16.mojom";
// Interface for a service to detect the language of provided text.
interface LanguageDetectionService {
// Analyzes the provided |text| to determine its likely language.
// Returns the resulting language of the analysis and whether that
// result is expected to be reliable.
DetermineLanguage(mojo_base.mojom.BigString16 text)
=> (string language, bool is_reliable);
};
...@@ -66,6 +66,23 @@ void ApplyLanguageCodeCorrection(std::string* code) { ...@@ -66,6 +66,23 @@ void ApplyLanguageCodeCorrection(std::string* code) {
language::ToTranslateLanguageSynonym(code); language::ToTranslateLanguageSynonym(code);
} }
// Checks if CLD can complement a sub code when the page language doesn't know
// the sub code.
bool CanCLDComplementSubCode(const std::string& page_language,
const std::string& cld_language) {
// Translate server cannot treat general Chinese. If Content-Language and
// CLD agree that the language is Chinese and Content-Language doesn't know
// which dialect is used, CLD language has priority.
// TODO(hajimehoshi): How about the other dialects like zh-MO?
return page_language == "zh" &&
base::StartsWith(cld_language, "zh-",
base::CompareCase::INSENSITIVE_ASCII);
}
} // namespace
namespace translate {
// Returns the ISO 639 language code of the specified |text|, or 'unknown' if it // Returns the ISO 639 language code of the specified |text|, or 'unknown' if it
// failed. // failed.
// |is_cld_reliable| will be set as true if CLD says the detection is reliable. // |is_cld_reliable| will be set as true if CLD says the detection is reliable.
...@@ -129,23 +146,6 @@ std::string DetermineTextLanguage(const base::string16& text, ...@@ -129,23 +146,6 @@ std::string DetermineTextLanguage(const base::string16& text,
return language; return language;
} }
// Checks if CLD can complement a sub code when the page language doesn't know
// the sub code.
bool CanCLDComplementSubCode(
const std::string& page_language, const std::string& cld_language) {
// Translate server cannot treat general Chinese. If Content-Language and
// CLD agree that the language is Chinese and Content-Language doesn't know
// which dialect is used, CLD language has priority.
// TODO(hajimehoshi): How about the other dialects like zh-MO?
return page_language == "zh" &&
base::StartsWith(cld_language, "zh-",
base::CompareCase::INSENSITIVE_ASCII);
}
} // namespace
namespace translate {
std::string DeterminePageLanguage(const std::string& code, std::string DeterminePageLanguage(const std::string& code,
const std::string& html_lang, const std::string& html_lang,
const base::string16& contents, const base::string16& contents,
......
...@@ -11,6 +11,12 @@ ...@@ -11,6 +11,12 @@
namespace translate { namespace translate {
// Returns the ISO 639 language code of the specified |text|, or
// |translate::kUnknownLanguageCode| if it failed. |is_cld_reliable| will be
// set as true if CLD says the detection is reliable.
std::string DetermineTextLanguage(const base::string16& text,
bool* is_cld_reliable);
// Determines content page language from Content-Language code and contents. // Determines content page language from Content-Language code and contents.
std::string DeterminePageLanguage(const std::string& code, std::string DeterminePageLanguage(const std::string& code,
const std::string& html_lang, const std::string& html_lang,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment