Commit 2d2cc9b4 authored by Matthias Körber's avatar Matthias Körber Committed by Commit Bot

[Autofill] Moved BorrowedTransliterator in its own file

This is needed to make the transliteration accessible to the merging
logic of structured address components.

Change-Id: I52738045df1d7b0b84e71adbfc5baf7f90cadf37
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2332628
Commit-Queue: Matthias Körber <koerber@google.com>
Reviewed-by: default avatarChristoph Schwering <schwering@google.com>
Cr-Commit-Position: refs/heads/master@{#794312}
parent 27671b92
......@@ -98,6 +98,8 @@ jumbo_static_library("browser") {
"data_model/autofill_structured_address_regex_provider.h",
"data_model/autofill_structured_address_utils.cc",
"data_model/autofill_structured_address_utils.h",
"data_model/borrowed_transliterator.cc",
"data_model/borrowed_transliterator.h",
"data_model/contact_info.cc",
"data_model/contact_info.h",
"data_model/credit_card.cc",
......@@ -583,6 +585,7 @@ source_set("unit_tests") {
"data_model/autofill_structured_address_name_unittest.cc",
"data_model/autofill_structured_address_regex_provider_unittest.cc",
"data_model/autofill_structured_address_utils_unittest.cc",
"data_model/borrowed_transliterator_unittest.cc",
"data_model/contact_info_unittest.cc",
"data_model/credit_card_unittest.cc",
"data_model/phone_number_unittest.cc",
......
......@@ -10,20 +10,16 @@
#include "base/i18n/case_conversion.h"
#include "base/i18n/char_iterator.h"
#include "base/i18n/unicodestring.h"
#include "base/no_destructor.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversion_utils.h"
#include "base/strings/utf_string_conversions.h"
#include "base/synchronization/lock.h"
#include "components/autofill/core/browser/address_rewriter.h"
#include "components/autofill/core/browser/autofill_data_util.h"
#include "components/autofill/core/browser/autofill_metrics.h"
#include "components/autofill/core/browser/geo/autofill_country.h"
#include "components/autofill/core/browser/geo/state_names.h"
#include "components/autofill/core/common/autofill_clock.h"
#include "third_party/icu/source/common/unicode/unistr.h"
#include "third_party/icu/source/i18n/unicode/translit.h"
#include "third_party/libphonenumber/phonenumber_api.h"
using base::UTF16ToUTF8;
......@@ -190,51 +186,6 @@ int32_t NormalizingIterator::GetNextChar() {
return iter_.get();
}
// This RAII class provides a thread-safe interface to a shared transliterator.
// Sharing a single transliterator is advisable due its high construction cost.
class BorrowedTransliterator {
public:
BorrowedTransliterator() : auto_lock_(GetLock()) {}
void Transliterate(icu::UnicodeString* text) const {
if (GetTransliterator() != nullptr) {
GetTransliterator()->transliterate(*text);
} else {
*text = text->toLower();
}
}
private:
static base::Lock& GetLock() {
static base::NoDestructor<base::Lock> instance;
return *instance;
}
// Use ICU transliteration to remove diacritics and fold case.
// See http://userguide.icu-project.org/transforms/general
static std::unique_ptr<icu::Transliterator> CreateTransliterator() {
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::Transliterator> transliterator(
icu::Transliterator::createInstance(
"NFD; [:Nonspacing Mark:] Remove; Lower; NFC", UTRANS_FORWARD,
status));
if (U_FAILURE(status) || transliterator == nullptr) {
// TODO(rogerm): Add a histogram to count how often this happens.
LOG(ERROR) << "Failed to create ICU Transliterator: "
<< u_errorName(status);
}
return transliterator;
}
static std::unique_ptr<icu::Transliterator>& GetTransliterator() {
static base::NoDestructor<std::unique_ptr<icu::Transliterator>> instance(
CreateTransliterator());
return *instance;
}
base::AutoLock auto_lock_;
};
} // namespace
AutofillProfileComparator::AutofillProfileComparator(
......@@ -327,9 +278,7 @@ base::string16 AutofillProfileComparator::NormalizeForComparison(
if (previous_was_whitespace && !result.empty())
result.resize(result.size() - 1);
icu::UnicodeString value = icu::UnicodeString(result.data(), result.length());
BorrowedTransliterator().Transliterate(&value);
return base::i18n::UnicodeStringToString16(value);
return RemoveDiacriticsAndConvertToLowerCase(result);
}
bool AutofillProfileComparator::AreMergeable(const AutofillProfile& p1,
......
......@@ -12,6 +12,7 @@
#include "base/strings/string_piece.h"
#include "components/autofill/core/browser/data_model/address.h"
#include "components/autofill/core/browser/data_model/autofill_profile.h"
#include "components/autofill/core/browser/data_model/borrowed_transliterator.h"
#include "components/autofill/core/browser/data_model/contact_info.h"
#include "components/autofill/core/common/autofill_l10n_util.h"
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/core/browser/data_model/borrowed_transliterator.h"
#include "base/logging.h"
namespace autofill {
BorrowedTransliterator::BorrowedTransliterator() : auto_lock_(GetLock()) {}
BorrowedTransliterator::~BorrowedTransliterator() = default;
void BorrowedTransliterator::Transliterate(icu::UnicodeString* text) const {
if (GetTransliterator() != nullptr) {
GetTransliterator()->transliterate(*text);
} else {
*text = text->toLower();
}
}
// static
base::Lock& BorrowedTransliterator::GetLock() {
static base::NoDestructor<base::Lock> instance;
return *instance;
}
// static
std::unique_ptr<icu::Transliterator>
BorrowedTransliterator::CreateTransliterator() {
UErrorCode status = U_ZERO_ERROR;
std::unique_ptr<icu::Transliterator> transliterator(
icu::Transliterator::createInstance(
"NFD; [:Nonspacing Mark:] Remove; Lower; NFC", UTRANS_FORWARD,
status));
if (U_FAILURE(status) || transliterator == nullptr) {
// TODO(rogerm): Add a histogram to count how often this happens.
LOG(ERROR) << "Failed to create ICU Transliterator: "
<< u_errorName(status);
}
return transliterator;
}
// static
std::unique_ptr<icu::Transliterator>&
BorrowedTransliterator::GetTransliterator() {
static base::NoDestructor<std::unique_ptr<icu::Transliterator>> instance(
CreateTransliterator());
return *instance;
}
base::string16 RemoveDiacriticsAndConvertToLowerCase(
base::StringPiece16 value) {
icu::UnicodeString result = icu::UnicodeString(value.data(), value.length());
BorrowedTransliterator().Transliterate(&result);
return base::i18n::UnicodeStringToString16(result);
}
} // namespace autofill
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_AUTOFILL_CORE_BROWSER_DATA_MODEL_BORROWED_TRANSLITERATOR_H_
#define COMPONENTS_AUTOFILL_CORE_BROWSER_DATA_MODEL_BORROWED_TRANSLITERATOR_H_
#include "base/i18n/unicodestring.h"
#include "base/no_destructor.h"
#include "base/synchronization/lock.h"
#include "third_party/icu/source/common/unicode/unistr.h"
#include "third_party/icu/source/i18n/unicode/translit.h"
namespace autofill {
// This RAII class provides a thread-safe interface to a shared transliterator.
// Sharing a single transliterator is advisable due its high construction cost.
class BorrowedTransliterator {
public:
BorrowedTransliterator();
virtual ~BorrowedTransliterator();
void Transliterate(icu::UnicodeString* text) const;
private:
static base::Lock& GetLock();
// Use ICU transliteration to remove diacritics and fold case.
// See http://userguide.icu-project.org/transforms/general
static std::unique_ptr<icu::Transliterator> CreateTransliterator();
static std::unique_ptr<icu::Transliterator>& GetTransliterator();
base::AutoLock auto_lock_;
};
// Apply the transliteration to a full string to convert it to lower case and to
// remove the diacritics.
// and remove the diacritics.
base::string16 RemoveDiacriticsAndConvertToLowerCase(base::StringPiece16 value);
} // namespace autofill
#endif // COMPONENTS_AUTOFILL_CORE_BROWSER_DATA_MODEL_BORROWED_TRANSLITERATOR_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/core/browser/data_model/borrowed_transliterator.h"
#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/gtest_util.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace autofill {
TEST(BorrowedTransliterator, RemoveDiacriticsAndConvertToLowerCase) {
EXPECT_EQ(RemoveDiacriticsAndConvertToLowerCase(
base::UTF8ToUTF16("āēaa11.īūčģķļņšžKāäǟḑēīļņōȯȱõȭŗšțūž")),
base::ASCIIToUTF16("aeaa11.iucgklnszkaaadeilnooooorstuz"));
}
} // namespace autofill
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment