Commit c713e709 authored by jkrcal's avatar jkrcal Committed by Commit bot

Add LanguageModel, a keyed service that collects language info from CLD.

Before this CL, user's statistics about language detection from the CLD3
library were not accessible to Chrome.

This CL introduces a new keyed service called LanguageModel that
collects such statistics and builds a simple model on top of that. This
allows other parts of Chrome to see top languages in which the user
consumes web content.

A design doc:
https://docs.google.com/a/google.com/document/d/1FBD79CEBUKkMtSJKk3fnF1xnlrXd23z2fHgIGQUFOE8/edit?usp=sharing

BUG=653058

Review-Url: https://codereview.chromium.org/2396783002
Cr-Commit-Position: refs/heads/master@{#423702}
parent c41f3976
......@@ -1197,6 +1197,8 @@ split_static_library("browser") {
"tracing/navigation_tracing.h",
"translate/chrome_translate_client.cc",
"translate/chrome_translate_client.h",
"translate/language_model_factory.cc",
"translate/language_model_factory.h",
"translate/translate_accept_languages_factory.cc",
"translate/translate_accept_languages_factory.h",
"translate/translate_service.cc",
......
......@@ -99,6 +99,7 @@
#include "components/subresource_filter/core/browser/ruleset_service.h"
#include "components/sync/driver/sync_prefs.h"
#include "components/syncable_prefs/pref_service_syncable.h"
#include "components/translate/core/browser/language_model.h"
#include "components/translate/core/browser/translate_prefs.h"
#include "components/update_client/update_client.h"
#include "components/variations/service/variations_service.h"
......@@ -495,6 +496,7 @@ void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry) {
RegisterBrowserUserPrefs(registry);
SessionStartupPref::RegisterProfilePrefs(registry);
TemplateURLPrepopulateData::RegisterProfilePrefs(registry);
translate::LanguageModel::RegisterProfilePrefs(registry);
translate::TranslatePrefs::RegisterProfilePrefs(registry);
ZeroSuggestProvider::RegisterProfilePrefs(registry);
browsing_data::prefs::RegisterBrowserUserPrefs(registry);
......
......@@ -14,6 +14,7 @@
#include "chrome/browser/chrome_notification_types.h"
#include "chrome/browser/infobars/infobar_service.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/translate/language_model_factory.h"
#include "chrome/browser/translate/translate_accept_languages_factory.h"
#include "chrome/browser/translate/translate_service.h"
#include "chrome/browser/ui/browser.h"
......@@ -26,6 +27,7 @@
#include "chrome/common/pref_names.h"
#include "chrome/grit/theme_resources.h"
#include "components/prefs/pref_service.h"
#include "components/translate/core/browser/language_model.h"
#include "components/translate/core/browser/language_state.h"
#include "components/translate/core/browser/page_translated_details.h"
#include "components/translate/core/browser/translate_accept_languages.h"
......@@ -46,7 +48,10 @@ ChromeTranslateClient::ChromeTranslateClient(content::WebContents* web_contents)
: content::WebContentsObserver(web_contents),
translate_driver_(&web_contents->GetController()),
translate_manager_(
new translate::TranslateManager(this, prefs::kAcceptLanguages)) {
new translate::TranslateManager(this, prefs::kAcceptLanguages)),
language_model_(
LanguageModelFactory::GetInstance()->GetForBrowserContext(
web_contents->GetBrowserContext())) {
translate_driver_.AddObserver(this);
translate_driver_.set_translate_manager(translate_manager_.get());
}
......@@ -276,6 +281,11 @@ void ChromeTranslateClient::OnLanguageDetermined(
chrome::NOTIFICATION_TAB_LANGUAGE_DETERMINED,
content::Source<content::WebContents>(web_contents()),
content::Details<const translate::LanguageDetectionDetails>(&details));
// Unless we have no language model (e.g., in incognito), notify the model
// about detected language of every page visited.
if (language_model_ && details.is_cld_reliable)
language_model_->OnPageVisited(details.cld_language);
}
void ChromeTranslateClient::OnPageTranslated(
......
......@@ -29,6 +29,7 @@ class ScopedCLDDynamicDataHarness;
class PrefService;
namespace translate {
class LanguageModel;
class LanguageState;
class TranslateAcceptLanguages;
class TranslatePrefs;
......@@ -122,6 +123,10 @@ class ChromeTranslateClient
translate::ContentTranslateDriver translate_driver_;
std::unique_ptr<translate::TranslateManager> translate_manager_;
// Model to be notified about detected language of every page visited. Not
// owned here.
translate::LanguageModel* language_model_;
DISALLOW_COPY_AND_ASSIGN(ChromeTranslateClient);
};
......
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/translate/language_model_factory.h"
#include "chrome/browser/profiles/profile.h"
#include "components/keyed_service/content/browser_context_dependency_manager.h"
#include "components/keyed_service/core/keyed_service.h"
#include "components/translate/core/browser/language_model.h"
// static
LanguageModelFactory*
LanguageModelFactory::GetInstance() {
return base::Singleton<LanguageModelFactory>::get();
}
// static
translate::LanguageModel* LanguageModelFactory::GetForBrowserContext(
content::BrowserContext* browser_context) {
return static_cast<translate::LanguageModel*>(
GetInstance()->GetServiceForBrowserContext(browser_context, true));
}
LanguageModelFactory::LanguageModelFactory()
: BrowserContextKeyedServiceFactory(
"LanguageModel",
BrowserContextDependencyManager::GetInstance()) {}
LanguageModelFactory::~LanguageModelFactory() {}
KeyedService* LanguageModelFactory::BuildServiceInstanceFor(
content::BrowserContext* browser_context) const {
Profile* profile = Profile::FromBrowserContext(browser_context);
return new translate::LanguageModel(profile->GetPrefs());
}
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_TRANSLATE_LANGUAGE_MODEL_FACTORY_H_
#define CHROME_BROWSER_TRANSLATE_LANGUAGE_MODEL_FACTORY_H_
#include "base/macros.h"
#include "base/memory/singleton.h"
#include "components/keyed_service/content/browser_context_keyed_service_factory.h"
namespace translate {
class LanguageModel;
}
class LanguageModelFactory
: public BrowserContextKeyedServiceFactory {
public:
static LanguageModelFactory* GetInstance();
static translate::LanguageModel* GetForBrowserContext(
content::BrowserContext* browser_context);
private:
friend struct base::DefaultSingletonTraits<LanguageModelFactory>;
LanguageModelFactory();
~LanguageModelFactory() override;
// BrowserContextKeyedServiceFactory:
KeyedService* BuildServiceInstanceFor(
content::BrowserContext* context) const override;
DISALLOW_COPY_AND_ASSIGN(LanguageModelFactory);
};
#endif // CHROME_BROWSER_TRANSLATE_LANGUAGE_MODEL_FACTORY_H_
......@@ -6,6 +6,8 @@ import("//build/config/ui.gni")
static_library("browser") {
sources = [
"language_model.cc",
"language_model.h",
"language_state.cc",
"language_state.h",
"page_translated_details.h",
......@@ -47,6 +49,7 @@ static_library("browser") {
"//base",
"//base:i18n",
"//components/data_use_measurement/core",
"//components/keyed_service/core",
"//components/language_usage_metrics",
"//components/metrics",
"//components/pref_registry",
......@@ -82,6 +85,7 @@ static_library("browser") {
source_set("unit_tests") {
testonly = true
sources = [
"language_model_unittest.cc",
"language_state_unittest.cc",
"mock_translate_driver.cc",
"mock_translate_driver.h",
......@@ -99,6 +103,7 @@ source_set("unit_tests") {
"//components/infobars/core",
"//components/pref_registry:test_support",
"//components/prefs",
"//components/prefs:test_support",
"//components/translate/core/browser/proto",
"//components/translate/core/common",
"//components/variations",
......
include_rules = [
"+components/keyed_service/core",
"+components/metrics",
]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/translate/core/browser/language_model.h"
#include <algorithm>
#include <map>
#include <set>
#include "components/prefs/pref_registry_simple.h"
#include "components/prefs/pref_service.h"
#include "components/prefs/scoped_user_pref_update.h"
namespace translate {
namespace {
const char kLanguageModelCounters[] = "language_model_counters";
const int kMaxCountersSum = 1000;
const float kCutoffRatio = 0.005f;
const float kDiscountFactor = 0.75f;
// Gets the sum of the counter for all languages in the model.
int GetCountersSum(const base::DictionaryValue& dict) {
int sum = 0;
int counter_value = 0;
for (base::DictionaryValue::Iterator itr(dict); !itr.IsAtEnd();
itr.Advance()) {
if (itr.value().GetAsInteger(&counter_value))
sum += counter_value;
}
return sum;
}
// Removes languages with small counter values and discount remaining counters.
void DiscountAndCleanCounters(base::DictionaryValue* dict) {
std::set<std::string> remove_keys;
int counter_value = 0;
for (base::DictionaryValue::Iterator itr(*dict); !itr.IsAtEnd();
itr.Advance()) {
// Remove languages with invalid or small values.
if (!itr.value().GetAsInteger(&counter_value) ||
counter_value < (kCutoffRatio * kMaxCountersSum)) {
remove_keys.insert(itr.key());
continue;
}
// Discount the value.
dict->SetInteger(itr.key(), counter_value * kDiscountFactor);
}
for (const std::string& lang_to_remove : remove_keys)
dict->Remove(lang_to_remove, nullptr);
}
// Transforms the counters from prefs into a list of LanguageInfo structs.
std::vector<LanguageModel::LanguageInfo> GetAllLanguages(
const base::DictionaryValue& dict) {
std::vector<LanguageModel::LanguageInfo> top_languages;
int counters_sum = GetCountersSum(dict);
int counter_value = 0;
for (base::DictionaryValue::Iterator itr(dict); !itr.IsAtEnd();
itr.Advance()) {
if (!itr.value().GetAsInteger(&counter_value))
continue;
top_languages.push_back(
{itr.key(), static_cast<float>(counter_value) / counters_sum});
}
return top_languages;
}
} // namespace
LanguageModel::LanguageModel(PrefService* pref_service)
: pref_service_(pref_service) {}
LanguageModel::~LanguageModel() = default;
// static
void LanguageModel::RegisterProfilePrefs(PrefRegistrySimple* registry) {
registry->RegisterDictionaryPref(kLanguageModelCounters);
}
std::vector<LanguageModel::LanguageInfo> LanguageModel::GetTopLanguages()
const {
std::vector<LanguageModel::LanguageInfo> top_languages =
GetAllLanguages(*pref_service_->GetDictionary(kLanguageModelCounters));
std::sort(top_languages.begin(), top_languages.end(),
[](LanguageModel::LanguageInfo a, LanguageModel::LanguageInfo b) {
return a.frequency > b.frequency;
});
return top_languages;
}
float LanguageModel::GetLanguageFrequency(
const std::string& language_code) const {
const base::DictionaryValue* dict =
pref_service_->GetDictionary(kLanguageModelCounters);
int counter_value = 0;
// If the key |language_code| does not exist, |counter_value| stays 0.
dict->GetInteger(language_code, &counter_value);
int counters_sum = GetCountersSum(*dict);
return static_cast<float>(counter_value) / counters_sum;
}
void LanguageModel::OnPageVisited(const std::string& language_code) {
DictionaryPrefUpdate update(pref_service_, kLanguageModelCounters);
base::DictionaryValue* dict = update.Get();
int counter_value = 0;
// If the key |language_code| does not exist, |counter_value| stays 0.
dict->GetInteger(language_code, &counter_value);
dict->SetInteger(language_code, counter_value + 1);
if (GetCountersSum(*dict) > kMaxCountersSum)
DiscountAndCleanCounters(dict);
}
} // namespace translate
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_TRANSLATE_CORE_BROWSER_LANGUAGE_MODEL_H_
#define COMPONENTS_TRANSLATE_CORE_BROWSER_LANGUAGE_MODEL_H_
#include <string>
#include <vector>
#include "base/macros.h"
#include "components/keyed_service/core/keyed_service.h"
class PrefRegistrySimple;
class PrefService;
namespace translate {
// Collects data about languages in which the user reads the web and provides
// access to current estimated language preferences. The past behaviour is
// discounted so that this model reflects changes in browsing habits. This model
// does not have to contain all languages that ever appeared in user's browsing,
// languages with insignificant frequency are removed, eventually.
class LanguageModel : public KeyedService {
public:
struct LanguageInfo {
// The ISO 639 language code.
std::string language_code;
// The current estimated frequency of the language share, a number between 0
// and 1 (can be understood as the probability that the next page the user
// opens is in this language). Frequencies over all LanguageInfos from
// GetTopLanguages() sum to 1.
float frequency;
bool operator==(const LanguageInfo& m) const {
return language_code == m.language_code;
}
};
explicit LanguageModel(PrefService* pref_service);
~LanguageModel() override;
// Registers profile prefs for the model.
static void RegisterProfilePrefs(PrefRegistrySimple* registry);
// Returns a list of the languages currently tracked by the model, sorted by
// frequency in decreasing order.
std::vector<LanguageInfo> GetTopLanguages() const;
// Returns the estimated frequency for the given language or 0 if the language
// is not among the top languages kept in the model.
float GetLanguageFrequency(const std::string& language_code) const;
// Informs the model that a page with the given language has been visited.
void OnPageVisited(const std::string& language_code);
private:
PrefService* pref_service_;
DISALLOW_COPY_AND_ASSIGN(LanguageModel);
};
} // namespace translate
#endif // COMPONENTS_TRANSLATE_CORE_BROWSER_LANGUAGE_MODEL_H_
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/translate/core/browser/language_model.h"
#include "components/prefs/testing_pref_service.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
using testing::ElementsAre;
using testing::FloatEq;
using testing::Gt;
namespace {
const char kLang1[] = "en";
const char kLang2[] = "de";
const char kLang3[] = "es";
} // namespace
namespace translate {
TEST(LanguageModelTest, ListSorted) {
TestingPrefServiceSimple prefs;
LanguageModel::RegisterProfilePrefs(prefs.registry());
LanguageModel model(&prefs);
model.OnPageVisited(kLang1);
model.OnPageVisited(kLang1);
model.OnPageVisited(kLang1);
model.OnPageVisited(kLang2);
EXPECT_THAT(model.GetTopLanguages(),
ElementsAre(LanguageModel::LanguageInfo{kLang1},
LanguageModel::LanguageInfo{kLang2}));
}
TEST(LanguageModelTest, ListSortedReversed) {
TestingPrefServiceSimple prefs;
LanguageModel::RegisterProfilePrefs(prefs.registry());
LanguageModel model(&prefs);
model.OnPageVisited(kLang2);
model.OnPageVisited(kLang1);
model.OnPageVisited(kLang1);
model.OnPageVisited(kLang1);
EXPECT_THAT(model.GetTopLanguages(),
ElementsAre(LanguageModel::LanguageInfo{kLang1},
LanguageModel::LanguageInfo{kLang2}));
}
TEST(LanguageModelTest, RightFrequencies) {
TestingPrefServiceSimple prefs;
LanguageModel::RegisterProfilePrefs(prefs.registry());
LanguageModel model(&prefs);
model.OnPageVisited(kLang1);
model.OnPageVisited(kLang1);
model.OnPageVisited(kLang1);
model.OnPageVisited(kLang2);
// Corresponding frequencies are given by the model.
EXPECT_THAT(model.GetLanguageFrequency(kLang1), FloatEq(0.75f));
EXPECT_THAT(model.GetLanguageFrequency(kLang2), FloatEq(0.25f));
// An unknown language gets frequency 0.
EXPECT_THAT(model.GetLanguageFrequency(kLang3), 0);
}
TEST(LanguageModelTest, RareLanguageDiscarded) {
TestingPrefServiceSimple prefs;
LanguageModel::RegisterProfilePrefs(prefs.registry());
LanguageModel model(&prefs);
model.OnPageVisited(kLang2);
for (int i = 0; i < 900; i++)
model.OnPageVisited(kLang1);
// Lang 2 is in the model.
EXPECT_THAT(model.GetLanguageFrequency(kLang2), Gt(0.0f));
// Another 100 visits cause the cleanup (total > 1000).
for (int i = 0; i < 100; i++)
model.OnPageVisited(kLang1);
// Lang 2 is removed from the model.
EXPECT_THAT(model.GetTopLanguages(),
ElementsAre(LanguageModel::LanguageInfo{kLang1, 1}));
}
} // namespace translate
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment