Use Finch to compare the performances of CLD1 and CLD2

Add a compile time constant CLD_VERSION, which indicates the version of CLD. If this is not define, Finch test to compare CLD1 and CLD2 is supposed to be used.

By this CL, each platform will have the below status:

Linux:    Use both CLD1 and CLD2 (and use Finch).
Mac OS X: Use both CLD1 and CLD2 (and use Finch).
Windows:  Use only CLD1 once because now CLD2 can't be compiled on Windows. After we can have CLD2 compiled on Windows, we will use CLD2 and Finch asap.
iOS:      Still use only CLD1. (It's because it is hard to use both CLD1 and CLD2 on mobile platform because of the binary size impact.)
Android:  Still use only CLD1. (The same reason as iOS)

So some platforms will have two CLD binaries, but this is temporal in the sense that we intend to use Finch only for Dev and Beta channel. Before releasing the stable Chromium version, we decide which version of CLD is adopted, make another CL to use only one CLD, and send a merge request. (Of course, we hope we will be able to adopt CLD2.)

BUG=240647

Review URL: https://chromiumcodereview.appspot.com/22867032

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@221380 0039d316-1c4b-4281-b951-d872f2087c98
parent 7b14092c
......@@ -46,7 +46,6 @@
'../printing/printing.gyp:*',
'../skia/skia.gyp:*',
'../third_party/cacheinvalidation/cacheinvalidation.gyp:*',
'../third_party/cld/cld.gyp:*',
'../third_party/codesighs/codesighs.gyp:*',
'../third_party/ffmpeg/ffmpeg.gyp:*',
'../third_party/iccjpeg/iccjpeg.gyp:*',
......
......@@ -341,6 +341,12 @@
# print, UI, etc.
'enable_printing%': 1,
# Set the version of CLD.
# 0: Don't specify the version. This option is for the Finch testing.
# 1: Use only CLD1.
# 2: Use only CLD2.
'cld_version%': 0,
# Enable spell checker.
'enable_spellcheck%': 1,
......@@ -510,10 +516,15 @@
'enable_one_click_signin%': 1,
}],
['OS=="win"', {
'cld_version%': 1,
}],
['OS=="android"', {
'enable_automation%': 0,
'enable_extensions%': 0,
'enable_google_now%': 0,
'cld_version%': 1,
'enable_spellcheck%': 0,
'enable_themes%': 0,
'remoting%': 0,
......@@ -563,6 +574,7 @@
'enable_automation%': 0,
'enable_extensions%': 0,
'enable_google_now%': 0,
'cld_version%': 1,
'enable_printing%': 0,
'enable_session_service%': 0,
'enable_themes%': 0,
......@@ -851,6 +863,7 @@
'enable_printing%': '<(enable_printing)',
'enable_spellcheck%': '<(enable_spellcheck)',
'enable_google_now%': '<(enable_google_now)',
'cld_version%': '<(cld_version)',
'enable_captive_portal_detection%': '<(enable_captive_portal_detection)',
'disable_ftp_support%': '<(disable_ftp_support)',
'enable_task_manager%': '<(enable_task_manager)',
......@@ -2272,6 +2285,9 @@
['enable_google_now==1', {
'defines': ['ENABLE_GOOGLE_NOW=1'],
}],
['cld_version!=0', {
'defines': ['CLD_VERSION=<(cld_version)'],
}],
['enable_printing==1', {
'defines': ['ENABLE_FULL_PRINTING=1', 'ENABLE_PRINTING=1'],
}],
......
......@@ -24,7 +24,6 @@
# TODO(gregoryd): chrome_resources and chrome_strings could be
# shared with the 64-bit target, but it does not work due to a gyp
# issue.
'../third_party/cld/cld.gyp:cld',
'common_net',
'common_version',
'installer_util',
......@@ -667,6 +666,16 @@
'common/print_messages.h',
]
}],
['cld_version==0 or cld_version==1', {
'dependencies': [
'<(DEPTH)/third_party/cld/cld.gyp:cld',
],
}],
['cld_version==0 or cld_version==2', {
'dependencies': [
'<(DEPTH)/third_party/cld_2/cld_2.gyp:cld_2',
],
}],
],
'target_conditions': [
['OS == "ios"', {
......
......@@ -107,7 +107,6 @@
'../crypto/crypto.gyp:crypto',
'../printing/printing.gyp:printing',
'../net/net.gyp:net_resources',
'../third_party/cld/cld.gyp:cld',
'../ui/views/views.gyp:views',
'../webkit/webkit_resources.gyp:webkit_resources',
],
......@@ -233,6 +232,16 @@
'../content/content.gyp:content_app_browser',
],
}],
['cld_version==0 or cld_version==1', {
'dependencies': [
'../third_party/cld/cld.gyp:cld',
],
}],
['cld_version==0 or cld_version==2', {
'dependencies': [
'../third_party/cld_2/cld_2.gyp:cld_2',
],
}],
['OS=="mac" and component!="shared_library"', {
'includes': [ 'chrome_dll_bundle.gypi' ],
}],
......
......@@ -1151,7 +1151,6 @@
'../testing/gmock.gyp:gmock',
'../testing/gtest.gyp:gtest',
'../third_party/cacheinvalidation/cacheinvalidation.gyp:cacheinvalidation',
'../third_party/cld/cld.gyp:cld',
'../third_party/icu/icu.gyp:icui18n',
'../third_party/icu/icu.gyp:icuuc',
'../third_party/leveldatabase/leveldatabase.gyp:leveldatabase',
......@@ -2165,7 +2164,6 @@
'../testing/gmock.gyp:gmock',
'../testing/gtest.gyp:gtest',
'../testing/perf/perf_test.gyp:*',
'../third_party/cld/cld.gyp:cld',
'../third_party/icu/icu.gyp:icui18n',
'../third_party/icu/icu.gyp:icuuc',
'../third_party/leveldatabase/leveldatabase.gyp:leveldatabase',
......
......@@ -487,14 +487,6 @@
'include_dirs': [
'..',
],
'defines': [
'CLD_WINDOWS',
],
'direct_dependent_settings': {
'defines': [
'CLD_WINDOWS',
],
},
'msvs_settings': {
'VCLinkerTool': {
'conditions': [
......@@ -1940,7 +1932,6 @@
'../skia/ext/skia_utils_mac_unittest.mm',
'../skia/ext/vector_canvas_unittest.cc',
'../testing/gtest_mac_unittest.mm',
'../third_party/cld/encodings/compact_lang_det/compact_lang_det_unittest_small.cc',
'../third_party/zlib/google/zip_reader_unittest.cc',
'../third_party/zlib/google/zip_unittest.cc',
'../tools/json_schema_compiler/test/additional_properties_unittest.cc',
......@@ -1978,7 +1969,6 @@
'../gpu/gpu.gyp:gpu_unittest_utils',
'../media/media.gyp:media_test_support',
'../ppapi/ppapi_internal.gyp:ppapi_unittest_shared',
'../third_party/cld/cld.gyp:cld',
'../third_party/leveldatabase/leveldatabase.gyp:leveldatabase',
'../third_party/libjingle/libjingle.gyp:libjingle',
'../third_party/libphonenumber/libphonenumber.gyp:libphonenumber',
......@@ -2635,6 +2625,22 @@
['exclude', '^browser/extensions/blacklist_unittest.cc'],
],
}],
['cld_version==0 or cld_version==1', {
'defines': [
'CLD_WINDOWS',
],
'direct_dependent_settings': {
'defines': [
'CLD_WINDOWS',
],
},
'sources': [
'../third_party/cld/encodings/compact_lang_det/compact_lang_det_unittest_small.cc',
],
'dependencies': [
'../third_party/cld/cld.gyp:cld',
],
}],
],
'target_conditions': [
['OS == "ios"', {
......
......@@ -26,6 +26,7 @@ include_rules = [
"+third_party/bzip2",
"+third_party/cld",
"+third_party/cld/encodings/compact_lang_det/win",
"+third_party/cld_2/src",
"+third_party/mt19937ar",
"+third_party/npapi",
"+third_party/re2",
......
......@@ -5,14 +5,23 @@
#include "chrome/common/translate/language_detection_util.h"
#include "base/logging.h"
#include "base/metrics/field_trial.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "chrome/common/chrome_constants.h"
#include "chrome/common/translate/translate_common_metrics.h"
#include "chrome/common/translate/translate_util.h"
#if !defined(CLD_VERSION) || CLD_VERSION==1
#include "third_party/cld/encodings/compact_lang_det/compact_lang_det.h"
#include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h"
#endif
#if !defined(CLD_VERSION) || CLD_VERSION==2
#include "third_party/cld_2/src/public/compact_lang_det.h"
#endif
namespace {
......@@ -61,18 +70,63 @@ void ApplyLanguageCodeCorrection(std::string* code) {
TranslateUtil::ToTranslateLanguageSynonym(code);
}
int GetCLDMajorVersion() {
#if !defined(CLD_VERSION)
std::string group_name = base::FieldTrialList::FindFullName("CLD1VsCLD2");
if (group_name == "CLD2")
return 2;
else
return 1;
#else
return CLD_VERSION;
#endif
}
// Returns the ISO 639 language code of the specified |text|, or 'unknown' if it
// failed.
// |is_cld_reliable| will be set as true if CLD says the detection is reliable.
std::string DetermineTextLanguage(const base::string16& text,
bool* is_cld_reliable) {
std::string language = chrome::kUnknownLanguageCode;
int num_languages = 0;
int text_bytes = 0;
bool is_reliable = false;
Language cld_language =
DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
&num_languages, NULL, &text_bytes);
// Language or CLD2::Language
int cld_language = 0;
bool is_valid_language = false;
switch (GetCLDMajorVersion()) {
#if !defined(CLD_VERSION) || CLD_VERSION==1
case 1: {
int num_languages = 0;
cld_language =
DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
&num_languages, NULL, &text_bytes);
is_valid_language = cld_language != NUM_LANGUAGES &&
cld_language != UNKNOWN_LANGUAGE &&
cld_language != TG_UNKNOWN_LANGUAGE;
break;
}
#endif
#if !defined(CLD_VERSION) || CLD_VERSION==2
case 2: {
std::string utf8_text(UTF16ToUTF8(text));
CLD2::Language language3[3];
int percent3[3];
cld_language =
CLD2::DetectLanguageSummary(utf8_text.c_str(), utf8_text.size(), true,
language3, percent3,
&text_bytes, &is_reliable);
is_valid_language = cld_language != CLD2::NUM_LANGUAGES &&
cld_language != CLD2::UNKNOWN_LANGUAGE &&
cld_language != CLD2::TG_UNKNOWN_LANGUAGE;
break;
}
#endif
default:
NOTREACHED();
}
if (is_cld_reliable != NULL)
*is_cld_reliable = is_reliable;
......@@ -82,15 +136,33 @@ std::string DetermineTextLanguage(const base::string16& text,
// TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that
// the determined language code is correct with 50% confidence. Chrome should
// handle the real confidence value to judge.
if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES &&
cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) {
if (is_reliable && text_bytes >= 100 && is_valid_language) {
// We should not use LanguageCode_ISO_639_1 because it does not cover all
// the languages CLD can detect. As a result, it'll return the invalid
// language code for tradtional Chinese among others.
// |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and
// 'other' tables to do the 'right' thing. In addition, it'll return zh-CN
// for Simplified Chinese.
language = LanguageCodeWithDialects(cld_language);
switch (GetCLDMajorVersion()) {
#if !defined(CLD_VERSION) || CLD_VERSION==1
case 1:
language =
LanguageCodeWithDialects(static_cast<Language>(cld_language));
break;
#endif
#if !defined(CLD_VERSION) || CLD_VERSION==2
case 2:
if (cld_language == CLD2::CHINESE) {
language = "zh-CN";
} else {
language =
CLD2::LanguageCode(static_cast<CLD2::Language>(cld_language));
}
break;
#endif
default:
NOTREACHED();
}
}
VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text
<< "\n*************************************\n";
......@@ -291,7 +363,19 @@ bool MaybeServerWrongConfiguration(const std::string& page_language,
}
std::string GetCLDVersion() {
return CompactLangDet::DetectLanguageVersion();
switch (GetCLDMajorVersion()) {
#if !defined(CLD_VERSION) || CLD_VERSION==1
case 1:
return CompactLangDet::DetectLanguageVersion();
#endif
#if !defined(CLD_VERSION) || CLD_VERSION==2
case 2:
return CLD2::DetectLanguageVersion();
#endif
default:
NOTREACHED();
}
return "";
}
} // namespace LanguageDetectionUtil
Name: Compact Language Detection
Short Name: cld
URL: http://src.chromium.org/viewvc/chrome/trunk/src/third_party/cld/
Version: unknown
Version: 0
License: BSD
Security Critical: yes
......
This diff is collapsed.
hajimehoshi@chromium.org
mad@chromium.org
toyoshim@chromium.org
Name: Compact Language Detection 2
Short Name: cld_2
URL: https://code.google.com/p/cld2/
Version: 0
License: Apache 2.0
Security Critical: yes
Description:
The CLD is used to determine the language of text. In Chromium, this is used
to determine if Chrome should offer Translate UX to the user.
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
{
'targets': [
{
'target_name': 'cld_2',
'type': 'static_library',
'include_dirs': [
'src/internal',
'src/public',
],
'sources': [
'src/internal/cld2_generated_cjk_compatible.cc',
'src/internal/cld2_generated_deltaoctachrome0614.cc',
'src/internal/cld2_generated_distinctoctachrome0604.cc',
'src/internal/cld2_generated_octa2_dummy.cc',
'src/internal/cld2_generated_quadchrome0715.cc',
'src/internal/cld2tablesummary.h',
'src/internal/cld_generated_cjk_delta_bi_32.cc',
'src/internal/cld_generated_cjk_delta_bi_4.cc',
'src/internal/cld_generated_cjk_uni_prop_80.cc',
'src/internal/cld_generated_score_quad_octa_1024_256.cc',
'src/internal/cldutil.cc',
'src/internal/cldutil.h',
'src/internal/cldutil_offline.cc',
'src/internal/cldutil_offline.h',
'src/internal/cldutil_shared.cc',
'src/internal/cldutil_shared.h',
'src/internal/compact_lang_det.cc',
'src/internal/compact_lang_det_hint_code.cc',
'src/internal/compact_lang_det_hint_code.h',
'src/internal/compact_lang_det_impl.cc',
'src/internal/compact_lang_det_impl.h',
'src/internal/debug.cc',
'src/internal/debug_empty.cc',
'src/internal/debug.h',
'src/internal/fixunicodevalue.cc',
'src/internal/fixunicodevalue.h',
'src/internal/generated_distinct_bi_0.cc',
'src/internal/generated_entities.cc',
'src/internal/generated_language.cc',
'src/internal/generated_language.h',
'src/internal/generated_ulscript.cc',
'src/internal/generated_ulscript.h',
'src/internal/getonescriptspan.cc',
'src/internal/getonescriptspan.h',
'src/internal/integral_types.h',
'src/internal/lang_script.cc',
'src/internal/lang_script.h',
'src/internal/langspan.h',
'src/internal/offsetmap.cc',
'src/internal/offsetmap.h',
'src/internal/port.h',
'src/internal/scoreonescriptspan.cc',
'src/internal/scoreonescriptspan.h',
'src/internal/stringpiece.h',
'src/internal/tote.cc',
'src/internal/tote.h',
'src/internal/utf8prop_lettermarkscriptnum.h',
'src/internal/utf8repl_lettermarklower.h',
'src/internal/utf8scannot_lettermarkspecial.h',
'src/internal/utf8statetable.cc',
'src/internal/utf8statetable.h',
'src/public/compact_lang_det.h',
'src/public/encodings.h',
],
},
],
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment