Commit 12fa47e9 authored by Elizabeth Popova's avatar Elizabeth Popova Committed by Chromium LUCI CQ

[Autofill] Add parsing for dependent locality

Prior to this CL, dependent locality was a part of address scheme, but
was never detected in forms by Autofill. The missing parsing is
implemented behind kAutofillEnableDependentLocalityParsing flag.

Bug: 1154727
Change-Id: I1c3869549b0aca9f803c88d733239ed1414abc96
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2584843
Commit-Queue: Elizabeth Popova <lizapopova@google.com>
Reviewed-by: default avatarMatthias Körber <koerber@google.com>
Cr-Commit-Position: refs/heads/master@{#837058}
parent a4942fbc
......@@ -127,6 +127,10 @@ const char kZipCodeRe[] =
const char kZip4Re[] =
"zip|^-$|post2"
"|codpos2"; // pt-BR, pt-PT
const char kDependentLocalityRe[] =
"neighbo(u)?rhood" // en
"|bairro" // pt-BR, pt-PT
"|mahalle|köy"; // tr
const char kCityRe[] =
"city|town"
"|\\bort\\b|stadt" // de-DE
......
......@@ -25,6 +25,7 @@ extern const char kDependentLocality[];
extern const char kCountryLocationRe[];
extern const char kZipCodeRe[];
extern const char kZip4Re[];
extern const char kDependentLocalityRe[];
extern const char kCityRe[];
extern const char kStateRe[];
extern const char kNameOnCardRe[];
......
......@@ -584,6 +584,10 @@ TEST_P(FormDataImporterTest, ImportStructuredNameAddressProfile) {
}
TEST_P(FormDataImporterTest, ImportAddressProfiles) {
base::test::ScopedFeatureList dependent_locality_feature;
dependent_locality_feature.InitAndEnableFeature(
features::kAutofillEnableDependentLocalityParsing);
FormData form;
form.url = GURL("https://wwww.foo.com");
......@@ -600,6 +604,9 @@ TEST_P(FormDataImporterTest, ImportAddressProfiles) {
test::CreateTestFormField("Address:", "address1", "21 Laussat St", "text",
&field);
form.fields.push_back(field);
test::CreateTestFormField("Neighborhood:", "neighborhood", "Nob Hill", "text",
&field);
form.fields.push_back(field);
test::CreateTestFormField("City:", "city", "San Francisco", "text", &field);
form.fields.push_back(field);
test::CreateTestFormField("State:", "state", "California", "text", &field);
......@@ -613,8 +620,8 @@ TEST_P(FormDataImporterTest, ImportAddressProfiles) {
AutofillProfile expected(base::GenerateGUID(), test::kEmptyOrigin);
test::SetProfileInfo(&expected, "George", nullptr, "Washington",
"theprez@gmail.com", nullptr, "21 Laussat St", nullptr,
"San Francisco", "California", "94102", nullptr,
nullptr);
"Nob Hill", "San Francisco", "California", "94102",
nullptr, nullptr);
const std::vector<AutofillProfile*>& results =
personal_data_manager_->GetProfiles();
ASSERT_EQ(1U, results.size());
......
......@@ -38,6 +38,9 @@ bool SetFieldAndAdvanceCursor(AutofillScanner* scanner, AutofillField** field) {
const int AddressField::kZipCodeMatchType =
MATCH_DEFAULT | MATCH_TELEPHONE | MATCH_NUMBER;
const int AddressField::kDependentLocalityMatchType =
MATCH_DEFAULT | MATCH_SELECT | MATCH_SEARCH;
// Select fields are allowed here. This occurs on top-100 site rediff.com.
const int AddressField::kCityMatchType =
MATCH_DEFAULT | MATCH_SELECT | MATCH_SEARCH;
......@@ -100,8 +103,8 @@ std::unique_ptr<FormField> AddressField::Parse(
{.augment_types = MATCH_TEXT_AREA})) {
continue;
} else if (address_field->ParseAddress(scanner, page_language) ||
address_field->ParseCityStateCountryZipCode(scanner,
page_language) ||
address_field->ParseDependentLocalityCityStateCountryZipCode(
scanner, page_language) ||
address_field->ParseCompany(scanner, page_language)) {
has_trailing_non_labeled_fields = false;
continue;
......@@ -141,7 +144,8 @@ std::unique_ptr<FormField> AddressField::Parse(
address_field->street_address_ || address_field->city_ ||
address_field->state_ || address_field->zip_ || address_field->zip4_ ||
address_field->street_name_ || address_field->house_number_ ||
address_field->country_ || address_field->apartment_number_) {
address_field->country_ || address_field->apartment_number_ ||
address_field->dependent_locality_) {
// Don't slurp non-labeled fields at the end into the address.
if (has_trailing_non_labeled_fields)
scanner->RewindTo(begin_trailing_non_labeled_fields);
......@@ -174,6 +178,8 @@ void AddressField::AddClassifications(
field_candidates);
AddClassification(street_address_, ADDRESS_HOME_STREET_ADDRESS,
kBaseAddressParserScore, field_candidates);
AddClassification(dependent_locality_, ADDRESS_HOME_DEPENDENT_LOCALITY,
kBaseAddressParserScore, field_candidates);
AddClassification(city_, ADDRESS_HOME_CITY, kBaseAddressParserScore,
field_candidates);
AddClassification(state_, ADDRESS_HOME_STATE, kBaseAddressParserScore,
......@@ -416,6 +422,24 @@ bool AddressField::ParseZipCode(AutofillScanner* scanner,
return true;
}
bool AddressField::ParseDependentLocality(AutofillScanner* scanner,
const LanguageCode& page_language) {
const bool is_enabled_dependent_locality_parsing =
base::FeatureList::IsEnabled(
features::kAutofillEnableDependentLocalityParsing);
// TODO(crbug.com/1157405) Remove feature check when launched.
if (dependent_locality_ || !is_enabled_dependent_locality_parsing)
return false;
const std::vector<MatchingPattern>& dependent_locality_patterns =
PatternProvider::GetInstance().GetMatchPatterns(
"ADDRESS_HOME_DEPENDENT_LOCALITY", page_language);
return ParseFieldSpecifics(scanner, UTF8ToUTF16(kDependentLocalityRe),
kDependentLocalityMatchType,
dependent_locality_patterns, &dependent_locality_,
{log_manager_, "kDependentLocalityRe"});
}
bool AddressField::ParseCity(AutofillScanner* scanner,
const LanguageCode& page_language) {
if (city_)
......@@ -472,28 +496,43 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelSeparately(
return RESULT_MATCH_NONE;
}
bool AddressField::ParseCityStateCountryZipCode(
bool AddressField::ParseDependentLocalityCityStateCountryZipCode(
AutofillScanner* scanner,
const LanguageCode& page_language) {
// The |scanner| is not pointing at a field.
if (scanner->IsEnd())
return false;
int num_of_missing_types = 0;
for (const auto* field :
{dependent_locality_, city_, state_, country_, zip_}) {
if (!field)
++num_of_missing_types;
}
// All the field types have already been detected.
if (city_ && state_ && country_ && zip_)
if (num_of_missing_types == 0)
return false;
// Exactly one field type is missing.
if (state_ && country_ && zip_)
return ParseCity(scanner, page_language);
if (city_ && country_ && zip_)
return ParseState(scanner, page_language);
if (city_ && state_ && zip_)
return ParseCountry(scanner, page_language);
if (city_ && state_ && country_)
return ParseZipCode(scanner, page_language);
if (num_of_missing_types == 1) {
if (!dependent_locality_)
return ParseDependentLocality(scanner, page_language);
if (!city_)
return ParseCity(scanner, page_language);
if (!state_)
return ParseState(scanner, page_language);
if (!country_)
return ParseCountry(scanner, page_language);
if (!zip_)
return ParseZipCode(scanner, page_language);
}
// Check for matches to both the name and the label.
ParseNameLabelResult dependent_locality_result =
ParseNameAndLabelForDependentLocality(scanner, page_language);
if (dependent_locality_result == RESULT_MATCH_NAME_LABEL)
return true;
ParseNameLabelResult city_result =
ParseNameAndLabelForCity(scanner, page_language);
if (city_result == RESULT_MATCH_NAME_LABEL)
......@@ -511,36 +550,47 @@ bool AddressField::ParseCityStateCountryZipCode(
if (zip_result == RESULT_MATCH_NAME_LABEL)
return true;
int num_of_matches = 0;
for (const auto result : {dependent_locality_result, city_result,
state_result, country_result, zip_result}) {
if (result != RESULT_MATCH_NONE)
++num_of_matches;
}
// Check if there is only one potential match.
bool maybe_city = city_result != RESULT_MATCH_NONE;
bool maybe_state = state_result != RESULT_MATCH_NONE;
bool maybe_country = country_result != RESULT_MATCH_NONE;
bool maybe_zip = zip_result != RESULT_MATCH_NONE;
if (maybe_city && !maybe_state && !maybe_country && !maybe_zip)
return SetFieldAndAdvanceCursor(scanner, &city_);
if (maybe_state && !maybe_city && !maybe_country && !maybe_zip)
return SetFieldAndAdvanceCursor(scanner, &state_);
if (maybe_country && !maybe_city && !maybe_state && !maybe_zip)
return SetFieldAndAdvanceCursor(scanner, &country_);
if (maybe_zip && !maybe_city && !maybe_state && !maybe_country)
return ParseZipCode(scanner, page_language);
if (num_of_matches == 1) {
if (dependent_locality_result != RESULT_MATCH_NONE)
return SetFieldAndAdvanceCursor(scanner, &dependent_locality_);
if (city_result != RESULT_MATCH_NONE)
return SetFieldAndAdvanceCursor(scanner, &city_);
if (state_result != RESULT_MATCH_NONE)
return SetFieldAndAdvanceCursor(scanner, &state_);
if (country_result != RESULT_MATCH_NONE)
return SetFieldAndAdvanceCursor(scanner, &country_);
if (zip_result != RESULT_MATCH_NONE)
return ParseZipCode(scanner, page_language);
}
// If there is a clash between the country and the state, set the type of
// the field to the country.
if (maybe_state && maybe_country && !maybe_city && !maybe_zip)
if (num_of_matches == 2 && state_result != RESULT_MATCH_NONE &&
country_result != RESULT_MATCH_NONE)
return SetFieldAndAdvanceCursor(scanner, &country_);
// By default give the name priority over the label.
ParseNameLabelResult resultsToMatch[] = {RESULT_MATCH_NAME,
RESULT_MATCH_LABEL};
ParseNameLabelResult results_to_match[] = {RESULT_MATCH_NAME,
RESULT_MATCH_LABEL};
if (page_language == LanguageCode("tr") &&
base::FeatureList::IsEnabled(
features::kAutofillEnableLabelPrecedenceForTurkishAddresses)) {
// Give the label priority over the name.
std::swap(resultsToMatch[0], resultsToMatch[1]);
// Give the label priority over the name to avoid misclassifications when
// the name has a misleading value (e.g. province field is named "city").
std::swap(results_to_match[0], results_to_match[1]);
}
for (auto result : resultsToMatch) {
for (const auto result : results_to_match) {
if (dependent_locality_result == result)
return SetFieldAndAdvanceCursor(scanner, &dependent_locality_);
if (city_result == result)
return SetFieldAndAdvanceCursor(scanner, &city_);
if (state_result == result)
......@@ -596,6 +646,26 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode(
return result;
}
AddressField::ParseNameLabelResult
AddressField::ParseNameAndLabelForDependentLocality(
AutofillScanner* scanner,
const LanguageCode& page_language) {
const bool is_enabled_dependent_locality_parsing =
base::FeatureList::IsEnabled(
features::kAutofillEnableDependentLocalityParsing);
// TODO(crbug.com/1157405) Remove feature check when launched.
if (dependent_locality_ || !is_enabled_dependent_locality_parsing)
return RESULT_MATCH_NONE;
const std::vector<MatchingPattern>& dependent_locality_patterns =
PatternProvider::GetInstance().GetMatchPatterns(
"ADDRESS_HOME_DEPENDENT_LOCALITY", page_language);
return ParseNameAndLabelSeparately(
scanner, UTF8ToUTF16(kDependentLocalityRe), kDependentLocalityMatchType,
dependent_locality_patterns, &dependent_locality_,
{log_manager_, "kDependentLocalityRe"});
}
AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity(
AutofillScanner* scanner,
const LanguageCode& page_language) {
......
......@@ -52,6 +52,7 @@ class AddressField : public FormField {
static const int kZipCodeMatchType;
static const int kCityMatchType;
static const int kStateMatchType;
static const int kDependentLocalityMatchType;
explicit AddressField(LogManager* log_manager);
......@@ -73,15 +74,19 @@ class AddressField : public FormField {
bool ParseZipCode(AutofillScanner* scanner,
const LanguageCode& page_language);
bool ParseDependentLocality(AutofillScanner* scanner,
const LanguageCode& page_language);
bool ParseCity(AutofillScanner* scanner, const LanguageCode& page_language);
bool ParseState(AutofillScanner* scanner, const LanguageCode& page_language);
// Parses the current field pointed to by |scanner|, if it exists, and tries
// to figure out whether the field's type: city, state, country, zip, or
// none of those.
bool ParseCityStateCountryZipCode(AutofillScanner* scanner,
const LanguageCode& page_language);
// to determine if the field's type corresponds to one of the following:
// dependent locality, city, state, country, zip, or none of those.
bool ParseDependentLocalityCityStateCountryZipCode(
AutofillScanner* scanner,
const LanguageCode& page_language);
// Like ParseFieldSpecifics(), but applies |pattern| against the name and
// label of the current field separately. If the return value is
......@@ -103,6 +108,10 @@ class AddressField : public FormField {
AutofillScanner* scanner,
const LanguageCode& page_language);
ParseNameLabelResult ParseNameAndLabelForDependentLocality(
AutofillScanner* scanner,
const LanguageCode& page_language);
ParseNameLabelResult ParseNameAndLabelForCity(
AutofillScanner* scanner,
const LanguageCode& page_language);
......@@ -124,6 +133,7 @@ class AddressField : public FormField {
AutofillField* address3_ = nullptr;
AutofillField* street_address_ = nullptr;
AutofillField* apartment_number_ = nullptr;
AutofillField* dependent_locality_ = nullptr;
AutofillField* city_ = nullptr;
AutofillField* state_ = nullptr;
AutofillField* zip_ = nullptr;
......
......@@ -53,9 +53,12 @@ class AddressFieldTest : public testing::Test {
// Apply parsing and verify the expected types.
// |parsed| indicates if at least one field could be parsed successfully.
void ClassifyAndVerify(bool parsed = true) {
// |page_language| the language to be used for parsing, default empty value
// means the language is unknown and patterns of all languages are used.
void ClassifyAndVerify(bool parsed = true,
const LanguageCode& page_language = LanguageCode("")) {
AutofillScanner scanner(list_);
field_ = Parse(&scanner);
field_ = Parse(&scanner, page_language);
if (!parsed) {
ASSERT_EQ(nullptr, field_.get());
......@@ -83,12 +86,6 @@ class AddressFieldTest : public testing::Test {
static_cast<AddressField*>(field.release()));
}
static std::unique_ptr<AddressField> Parse(AutofillScanner* scanner) {
// An empty page_language means the language is unknown and patterns of all
// languages are used.
return Parse(scanner, LanguageCode(""));
}
FieldRendererId MakeFieldRendererId() {
return FieldRendererId(++id_counter_);
}
......@@ -193,6 +190,19 @@ TEST_F(AddressFieldTest, NotParseHouseNumberWithoutStreetName) {
ClassifyAndVerify(/*parsed=*/false);
}
// Tests that the dependent locality is correctly classified with
// an unambiguous field name and label.
TEST_F(AddressFieldTest, ParseDependentLocality) {
// TODO(crbug.com/1157405): Remove once launched.
base::test::ScopedFeatureList enabled;
enabled.InitAndEnableFeature(
features::kAutofillEnableDependentLocalityParsing);
AddTextFormFieldData("neighborhood", "Neighborhood",
ADDRESS_HOME_DEPENDENT_LOCALITY);
ClassifyAndVerify();
}
TEST_F(AddressFieldTest, ParseCity) {
AddTextFormFieldData("city", "City", ADDRESS_HOME_CITY);
ClassifyAndVerify();
......@@ -226,9 +236,17 @@ TEST_F(AddressFieldTest, ParseCompany) {
ClassifyAndVerify();
}
// Tests that the city, state, country and zip-code fields are correctly
// classfied with unambiguous field names and labels.
TEST_F(AddressFieldTest, ParseCityStateCountryZipcodeTogether) {
// Tests that the dependent locality, city, state, country and zip-code
// fields are correctly classfied with unambiguous field names and labels.
TEST_F(AddressFieldTest,
ParseDependentLocalityCityStateCountryZipcodeTogether) {
// TODO(crbug.com/1157405): Remove once launched.
base::test::ScopedFeatureList enabled;
enabled.InitAndEnableFeature(
features::kAutofillEnableDependentLocalityParsing);
AddTextFormFieldData("neighborhood", "Neighborhood",
ADDRESS_HOME_DEPENDENT_LOCALITY);
AddTextFormFieldData("city", "City", ADDRESS_HOME_CITY);
AddTextFormFieldData("state", "State", ADDRESS_HOME_STATE);
AddTextFormFieldData("country", "Country", ADDRESS_HOME_COUNTRY);
......@@ -259,32 +277,9 @@ TEST_F(AddressFieldTest, ParseTurkishCityStateWithLabelPrecedence) {
enabled.InitAndEnableFeature(
features::kAutofillEnableLabelPrecedenceForTurkishAddresses);
FormFieldData field;
field.form_control_type = "text";
field.label = ASCIIToUTF16("Il");
field.name = ASCIIToUTF16("city");
field.unique_renderer_id = MakeFieldRendererId();
list_.push_back(std::make_unique<AutofillField>(field));
FieldRendererId state = list_.back()->unique_renderer_id;
field.label = ASCIIToUTF16("Ilce");
field.name = ASCIIToUTF16("county");
field.unique_renderer_id = MakeFieldRendererId();
list_.push_back(std::make_unique<AutofillField>(field));
FieldRendererId city = list_.back()->unique_renderer_id;
AutofillScanner scanner(list_);
field_ = Parse(&scanner, LanguageCode("tr"));
ASSERT_NE(nullptr, field_.get());
field_->AddClassificationsForTesting(&field_candidates_map_);
ASSERT_TRUE(field_candidates_map_.find(state) != field_candidates_map_.end());
EXPECT_EQ(ADDRESS_HOME_STATE,
field_candidates_map_[state].BestHeuristicType());
ASSERT_TRUE(field_candidates_map_.find(city) != field_candidates_map_.end());
EXPECT_EQ(ADDRESS_HOME_CITY, field_candidates_map_[city].BestHeuristicType());
AddTextFormFieldData("city", "Il", ADDRESS_HOME_STATE);
AddTextFormFieldData("county", "Ilce", ADDRESS_HOME_CITY);
ClassifyAndVerify(/*parsed=*/true, LanguageCode("tr"));
}
} // namespace autofill
......@@ -936,6 +936,38 @@
}
]
},
"ADDRESS_HOME_DEPENDENT_LOCALITY": {
"en": [
{
"pattern_identifier": "en_dependent_locality_preserving",
"positive_pattern": "neighbo(u)?rhood",
"positive_score": 1.1,
"negative_pattern": null,
"match_field_attributes": 3,
"match_field_input_types": 137
}
],
"pt": [
{
"pattern_identifier": "pt_dependent_locality_preserving",
"positive_pattern": "bairro",
"positive_score": 1.1,
"negative_pattern": null,
"match_field_attributes": 3,
"match_field_input_types": 137
}
],
"tr": [
{
"pattern_identifier": "tr_dependent_locality_preserving",
"positive_pattern": "mahalle|köy",
"positive_score": 1.1,
"negative_pattern": null,
"match_field_attributes": 3,
"match_field_input_types": 137
}
]
},
"CITY": {
"en": [
{
......
......@@ -80,6 +80,12 @@ const base::Feature kAutofillEnableAugmentedPhoneCountryCode{
"AutofillEnableAugmentedPhoneCountryCode",
base::FEATURE_DISABLED_BY_DEFAULT};
// Controls if Autofill parses ADDRESS_HOME_DEPENDENT_LOCALITY.
// TODO(crbug.com/1157405): Remove once launched.
const base::Feature kAutofillEnableDependentLocalityParsing{
"AutofillEnableDependentLocalityParsing",
base::FEATURE_DISABLED_BY_DEFAULT};
// Controls whether we show "Hide suggestions" item in the suggestions menu.
const base::Feature kAutofillEnableHideSuggestionsUI{
"AutofillEnableHideSuggestionsUI", base::FEATURE_DISABLED_BY_DEFAULT};
......
......@@ -29,6 +29,7 @@ extern const base::Feature kAutofillCacheQueryResponses;
extern const base::Feature kAutofillCreateDataForTest;
extern const base::Feature kAutofillEnableAccountWalletStorage;
extern const base::Feature kAutofillEnableAugmentedPhoneCountryCode;
extern const base::Feature kAutofillEnableDependentLocalityParsing;
extern const base::Feature kAutofillEnableHideSuggestionsUI;
extern const base::Feature
kAutofillEnableInfoBarAccountIndicationFooterForSingleAccountUsers;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment