Commit 2fa09e20 authored by Christoph Schwering's avatar Christoph Schwering Committed by Commit Bot

[Autofill] Migrated parsing to PatternProvider.

Autofill uses lots of hard-coded regexps for parsing. PatternProvider
provides a cleaner API with page-language-dependent patterns.

This CL migrates the existing parsing code to the new pattern provider
in an equivalence-preserving way. The behaviour is only enabled if
either of the following features is enabled:
* kAutofillUsePageLanguageToSelectFieldParsingPatterns
* kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics

The code is not entirely ready for use because of an infrastructure
issue: all relevant tests must mock a PatternProvider that
synchronously loads the JSON. Otherwise they're prone to race
conditions. In a follow-up CL we'll solve this issue.

Change-Id: I235d2b599585522e1a1bea1cd25185ae209f5965
Bug: 1147624, 1147608
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2465847Reviewed-by: default avatarMatthias Körber <koerber@google.com>
Reviewed-by: default avatarChristoph Schwering <schwering@google.com>
Commit-Queue: Matthias Körber <koerber@google.com>
Cr-Commit-Position: refs/heads/master@{#826777}
parent 7d925a88
...@@ -58,30 +58,50 @@ std::unique_ptr<FormField> AddressField::Parse(AutofillScanner* scanner, ...@@ -58,30 +58,50 @@ std::unique_ptr<FormField> AddressField::Parse(AutofillScanner* scanner,
base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe); base::string16 attention_ignored = UTF8ToUTF16(kAttentionIgnoredRe);
base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe); base::string16 region_ignored = UTF8ToUTF16(kRegionIgnoredRe);
// In JSON : EMAIL_ADDRESS
auto& patterns_email = PatternProvider::GetInstance().GetMatchPatterns(
"EMAIL_ADDRESS", page_language);
// In JSON : ADDRESS_LOOKUP
auto& patterns_al = PatternProvider::GetInstance().GetMatchPatterns(
"ADDRESS_LOOKUP", page_language);
// In JSON : ADDRESS_NAME_IGNORED
auto& patterns_ni = PatternProvider::GetInstance().GetMatchPatterns(
"ADDRESS_NAME_IGNORED", page_language);
// In JSON : ATTENTION_IGNORED
auto& patterns_ai = PatternProvider::GetInstance().GetMatchPatterns(
"ATTENTION_IGNORED", page_language);
// In JSON : REGION_IGNORED
auto& patterns_ri = PatternProvider::GetInstance().GetMatchPatterns(
"REGION_IGNORED", page_language);
// Allow address fields to appear in any order. // Allow address fields to appear in any order.
size_t begin_trailing_non_labeled_fields = 0; size_t begin_trailing_non_labeled_fields = 0;
bool has_trailing_non_labeled_fields = false; bool has_trailing_non_labeled_fields = false;
while (!scanner->IsEnd()) { while (!scanner->IsEnd()) {
const size_t cursor = scanner->SaveCursor(); const size_t cursor = scanner->SaveCursor();
// Ignore "Address Lookup" field. http://crbug.com/427622 // Ignore "Address Lookup" field. http://crbug.com/427622
if (ParseField(scanner, base::UTF8ToUTF16(kAddressLookupRe), nullptr, if (ParseField(scanner, base::UTF8ToUTF16(kAddressLookupRe), patterns_al,
{log_manager, "kAddressLookupRe"}) || nullptr, {log_manager, "kAddressLookupRe"}) ||
ParseField(scanner, base::UTF8ToUTF16(kAddressNameIgnoredRe), nullptr, ParseField(scanner, base::UTF8ToUTF16(kAddressNameIgnoredRe),
patterns_ni, nullptr,
{log_manager, "kAddressNameIgnoreRe"})) { {log_manager, "kAddressNameIgnoreRe"})) {
continue; continue;
// Ignore email addresses. // Ignore email addresses.
} else if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kEmailRe), } else if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kEmailRe),
MATCH_DEFAULT | MATCH_TEXT_AREA, nullptr, MATCH_DEFAULT | MATCH_TEXT_AREA,
{log_manager, "kEmailRe"})) { patterns_email, nullptr,
{log_manager, "kEmailRe"},
{.augment_types = MATCH_TEXT_AREA})) {
continue; continue;
} else if (address_field->ParseAddress(scanner) || } else if (address_field->ParseAddress(scanner, page_language) ||
(address_field->ParseCityStateCountryZipCode(scanner)) || address_field->ParseCityStateCountryZipCode(scanner,
address_field->ParseCompany(scanner)) { page_language) ||
address_field->ParseCompany(scanner, page_language)) {
has_trailing_non_labeled_fields = false; has_trailing_non_labeled_fields = false;
continue; continue;
} else if (ParseField(scanner, attention_ignored, nullptr, } else if (ParseField(scanner, attention_ignored, patterns_ai, nullptr,
{log_manager, "kAttentionIgnoredRe"}) || {log_manager, "kAttentionIgnoredRe"}) ||
ParseField(scanner, region_ignored, nullptr, ParseField(scanner, region_ignored, patterns_ri, nullptr,
{log_manager, "kRegionIgnoredRe"})) { {log_manager, "kRegionIgnoredRe"})) {
// We ignore the following: // We ignore the following:
// * Attention. // * Attention.
...@@ -162,15 +182,20 @@ void AddressField::AddClassifications( ...@@ -162,15 +182,20 @@ void AddressField::AddClassifications(
kBaseAddressParserScore, field_candidates); kBaseAddressParserScore, field_candidates);
} }
bool AddressField::ParseCompany(AutofillScanner* scanner) { bool AddressField::ParseCompany(AutofillScanner* scanner,
const std::string& page_language) {
if (company_) if (company_)
return false; return false;
// In JSON : COMPANY
auto& patterns_c =
PatternProvider::GetInstance().GetMatchPatterns("COMPANY", page_language);
return ParseField(scanner, UTF8ToUTF16(kCompanyRe), &company_, return ParseField(scanner, UTF8ToUTF16(kCompanyRe), patterns_c, &company_,
{log_manager_, "kCompanyRe"}); {log_manager_, "kCompanyRe"});
} }
bool AddressField::ParseAddressFieldSequence(AutofillScanner* scanner) { bool AddressField::ParseAddressFieldSequence(AutofillScanner* scanner,
const std::string& page_language) {
// Search for a sequence of a street name field followed by a house number // Search for a sequence of a street name field followed by a house number
// field. Only if both are found in an abitrary order, the parsing is // field. Only if both are found in an abitrary order, the parsing is
// considered successful. // considered successful.
...@@ -182,16 +207,24 @@ bool AddressField::ParseAddressFieldSequence(AutofillScanner* scanner) { ...@@ -182,16 +207,24 @@ bool AddressField::ParseAddressFieldSequence(AutofillScanner* scanner) {
} }
const size_t cursor_position = scanner->CursorPosition(); const size_t cursor_position = scanner->CursorPosition();
// In JSON : ---- maybe ADDRESS_LINE1(2,3)
auto& patterns_s = PatternProvider::GetInstance().GetMatchPatterns(
ADDRESS_HOME_STREET_NAME, page_language);
// In JSON : ----
auto& patterns_h = PatternProvider::GetInstance().GetMatchPatterns(
ADDRESS_HOME_HOUSE_NUMBER, page_language);
while (!scanner->IsEnd()) { while (!scanner->IsEnd()) {
if (!street_name_ && if (!street_name_ &&
ParseFieldSpecifics(scanner, UTF8ToUTF16(kStreetNameRe), MATCH_DEFAULT, ParseFieldSpecifics(scanner, UTF8ToUTF16(kStreetNameRe), MATCH_DEFAULT,
&street_name_, {log_manager_, "kStreetNameRe"})) { patterns_s, &street_name_,
{log_manager_, "kStreetNameRe"})) {
continue; continue;
} }
if (!house_number_ && if (!house_number_ &&
ParseFieldSpecifics(scanner, UTF8ToUTF16(kHouseNumberRe), MATCH_DEFAULT, ParseFieldSpecifics(scanner, UTF8ToUTF16(kHouseNumberRe), MATCH_DEFAULT,
&house_number_, {log_manager_, "kHouseNumberRe"})) { patterns_h, &house_number_,
{log_manager_, "kHouseNumberRe"})) {
continue; continue;
} }
...@@ -210,14 +243,17 @@ bool AddressField::ParseAddressFieldSequence(AutofillScanner* scanner) { ...@@ -210,14 +243,17 @@ bool AddressField::ParseAddressFieldSequence(AutofillScanner* scanner) {
return false; return false;
} }
bool AddressField::ParseAddress(AutofillScanner* scanner) { bool AddressField::ParseAddress(AutofillScanner* scanner,
const std::string& page_language) {
if (street_name_ && house_number_) { if (street_name_ && house_number_) {
return false; return false;
} }
return ParseAddressFieldSequence(scanner) || ParseAddressLines(scanner); return ParseAddressFieldSequence(scanner, page_language) ||
ParseAddressLines(scanner, page_language);
} }
bool AddressField::ParseAddressLines(AutofillScanner* scanner) { bool AddressField::ParseAddressLines(AutofillScanner* scanner,
const std::string& page_language) {
// We only match the string "address" in page text, not in element names, // We only match the string "address" in page text, not in element names,
// because sometimes every element in a group of address fields will have // because sometimes every element in a group of address fields will have
// a name containing the string "address"; for example, on the page // a name containing the string "address"; for example, on the page
...@@ -231,17 +267,23 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) { ...@@ -231,17 +267,23 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
base::string16 pattern = UTF8ToUTF16(kAddressLine1Re); base::string16 pattern = UTF8ToUTF16(kAddressLine1Re);
base::string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe); base::string16 label_pattern = UTF8ToUTF16(kAddressLine1LabelRe);
if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, &address1_, // In JSON : ADDRESS_LINE_1
{log_manager_, "kAddressLine1Re"}) && auto& patterns_l1 = PatternProvider::GetInstance().GetMatchPatterns(
"ADDRESS_LINE_1", page_language);
if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, patterns_l1,
&address1_, {log_manager_, "kAddressLine1Re"}) &&
!ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
&address1_, patterns_l1, &address1_,
{log_manager_, "kAddressLine1LabelRe"}) && {log_manager_, "kAddressLine1LabelRe"}) &&
!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA, !ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_TEXT_AREA,
&street_address_, patterns_l1, &street_address_,
{log_manager_, "kAddressLine1Re"}) && {log_manager_, "kAddressLine1Re"},
!ParseFieldSpecifics(scanner, label_pattern, {.augment_types = MATCH_TEXT_AREA}) &&
MATCH_LABEL | MATCH_TEXT_AREA, &street_address_, !ParseFieldSpecifics(
{log_manager_, "kAddressLine1LabelRe"})) scanner, label_pattern, MATCH_LABEL | MATCH_TEXT_AREA, patterns_l1,
&street_address_, {log_manager_, "kAddressLine1LabelRe"},
{.augment_types = MATCH_TEXT_AREA}))
return false; return false;
if (street_address_) if (street_address_)
...@@ -252,19 +294,33 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) { ...@@ -252,19 +294,33 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
// discussion on https://codereview.chromium.org/741493003/ // discussion on https://codereview.chromium.org/741493003/
pattern = UTF8ToUTF16(kAddressLine2Re); pattern = UTF8ToUTF16(kAddressLine2Re);
label_pattern = UTF8ToUTF16(kAddressLine2LabelRe); label_pattern = UTF8ToUTF16(kAddressLine2LabelRe);
if (!ParseField(scanner, pattern, &address2_, // auto& patternsL2 = PatternProvider::GetInstance().GetMatchPatterns(
// "ADDRESS_HOME_LINE2", page_language);
// auto& patternsSA = PatternProvider::GetInstance().GetMatchPatterns(
// "ADDRESS_HOME_STREET_ADDRESS", page_language);
// In JSON : ADDRESS_LINE_2
auto& patterns_l2 = PatternProvider::GetInstance().GetMatchPatterns(
"ADDRESS_LINE_2", page_language);
// In JSON : ADDRESS_LINE_EXTRA
auto& patterns_le = PatternProvider::GetInstance().GetMatchPatterns(
"ADDRESS_LINE_EXTRA", page_language);
if (!ParseField(scanner, pattern, patterns_l2, &address2_,
{log_manager_, "kAddressLine2Re"}) && {log_manager_, "kAddressLine2Re"}) &&
!ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
&address2_, {log_manager_, "kAddressLine2LabelRe"})) patterns_l2, &address2_,
{log_manager_, "kAddressLine2LabelRe"}))
return true; return true;
// Optionally parse address line 3. This uses the same label regexp as // Optionally parse address line 3. This uses the same label regexp as
// address 2 above. // address 2 above.
pattern = UTF8ToUTF16(kAddressLinesExtraRe); pattern = UTF8ToUTF16(kAddressLinesExtraRe);
if (!ParseField(scanner, pattern, &address3_, if (!ParseField(scanner, pattern, patterns_le, &address3_,
{log_manager_, "kAddressLinesExtraRe"}) && {log_manager_, "kAddressLinesExtraRe"}) &&
!ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT, !ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
&address3_, {log_manager_, "kAddressLine2LabelRe"})) patterns_l2, &address3_,
{log_manager_, "kAddressLine2LabelRe"}))
return true; return true;
// Try for surplus lines, which we will promptly discard. Some pages have 4 // Try for surplus lines, which we will promptly discard. Some pages have 4
...@@ -273,21 +329,29 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) { ...@@ -273,21 +329,29 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner) {
// Since these are rare, don't bother considering unlabeled lines as extra // Since these are rare, don't bother considering unlabeled lines as extra
// address lines. // address lines.
pattern = UTF8ToUTF16(kAddressLinesExtraRe); pattern = UTF8ToUTF16(kAddressLinesExtraRe);
while (ParseField(scanner, pattern, nullptr, while (ParseField(scanner, pattern, patterns_le, nullptr,
{log_manager_, "kAddressLinesExtraRe"})) { {log_manager_, "kAddressLinesExtraRe"})) {
// Consumed a surplus line, try for another. // Consumed a surplus line, try for another.
} }
return true; return true;
} }
bool AddressField::ParseCountry(AutofillScanner* scanner) { bool AddressField::ParseCountry(AutofillScanner* scanner,
const std::string& page_language) {
if (country_) if (country_)
return false; return false;
// In JSON : COUNTRY
auto& patterns_c =
PatternProvider::GetInstance().GetMatchPatterns("COUNTRY", page_language);
auto& patterns_cl = PatternProvider::GetInstance().GetMatchPatterns(
"COUNTRY_LOCATION", page_language);
scanner->SaveCursor(); scanner->SaveCursor();
if (ParseFieldSpecifics(scanner, UTF8ToUTF16(kCountryRe), if (ParseFieldSpecifics(scanner, UTF8ToUTF16(kCountryRe),
MATCH_DEFAULT | MATCH_SELECT | MATCH_SEARCH, MATCH_DEFAULT | MATCH_SELECT | MATCH_SEARCH,
&country_, {log_manager_, "kCountryRe"})) { patterns_c, &country_,
{log_manager_, "kCountryRe"})) {
return true; return true;
} }
...@@ -296,46 +360,67 @@ bool AddressField::ParseCountry(AutofillScanner* scanner) { ...@@ -296,46 +360,67 @@ bool AddressField::ParseCountry(AutofillScanner* scanner) {
scanner->Rewind(); scanner->Rewind();
return ParseFieldSpecifics( return ParseFieldSpecifics(
scanner, UTF8ToUTF16(kCountryLocationRe), scanner, UTF8ToUTF16(kCountryLocationRe),
MATCH_LABEL | MATCH_NAME | MATCH_SELECT | MATCH_SEARCH, &country_, MATCH_LABEL | MATCH_NAME | MATCH_SELECT | MATCH_SEARCH, patterns_cl,
{log_manager_, "kCountryLocationRe"}); &country_, {log_manager_, "kCountryLocationRe"});
} }
bool AddressField::ParseZipCode(AutofillScanner* scanner) { bool AddressField::ParseZipCode(AutofillScanner* scanner,
const std::string& page_language) {
if (zip_) if (zip_)
return false; return false;
// auto& patternsZ = PatternProvider::GetInstance().GetMatchPatterns(
// "ADDRESS_HOME_ZIP", page_language);
// In JSON : ZIP_CODE
auto& patterns_z = PatternProvider::GetInstance().GetMatchPatterns(
"ZIP_CODE", page_language);
// In JSON : ZIP_4
auto& patterns_z4 =
PatternProvider::GetInstance().GetMatchPatterns("ZIP_4", page_language);
if (!ParseFieldSpecifics(scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, if (!ParseFieldSpecifics(scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType,
&zip_, {log_manager_, "kZipCodeRe"})) { patterns_z, &zip_, {log_manager_, "kZipCodeRe"})) {
return false; return false;
} }
// Look for a zip+4, whose field name will also often contain // Look for a zip+4, whose field name will also often contain
// the substring "zip". // the substring "zip".
ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType, &zip4_, ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType,
{log_manager_, "kZip4Re"}); patterns_z4, &zip4_, {log_manager_, "kZip4Re"});
return true; return true;
} }
bool AddressField::ParseCity(AutofillScanner* scanner) { bool AddressField::ParseCity(AutofillScanner* scanner,
const std::string& page_language) {
if (city_) if (city_)
return false; return false;
// In JSON : CITY
auto& patterns_city =
PatternProvider::GetInstance().GetMatchPatterns("CITY", page_language);
return ParseFieldSpecifics(scanner, UTF8ToUTF16(kCityRe), kCityMatchType, return ParseFieldSpecifics(scanner, UTF8ToUTF16(kCityRe), kCityMatchType,
&city_, {log_manager_, "kCityRe"}); patterns_city, &city_, {log_manager_, "kCityRe"});
} }
bool AddressField::ParseState(AutofillScanner* scanner) { bool AddressField::ParseState(AutofillScanner* scanner,
const std::string& page_language) {
if (state_) if (state_)
return false; return false;
// auto& patterns = PatternProvider::GetInstance().GetMatchPatterns(
// "ADDRESS_HOME_STATE", page_language);
// In JSON : STATE
auto& patterns_state =
PatternProvider::GetInstance().GetMatchPatterns("STATE", page_language);
return ParseFieldSpecifics(scanner, UTF8ToUTF16(kStateRe), kStateMatchType, return ParseFieldSpecifics(scanner, UTF8ToUTF16(kStateRe), kStateMatchType,
&state_, {log_manager_, "kStateRe"}); patterns_state, &state_,
{log_manager_, "kStateRe"});
} }
AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelSeparately( AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelSeparately(
AutofillScanner* scanner, AutofillScanner* scanner,
const base::string16& pattern, const base::string16& pattern,
int match_type, int match_type,
const std::vector<MatchingPattern>& patterns,
AutofillField** match, AutofillField** match,
const RegExLogging& logging) { const RegExLogging& logging) {
if (scanner->IsEnd()) if (scanner->IsEnd())
...@@ -344,10 +429,12 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelSeparately( ...@@ -344,10 +429,12 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelSeparately(
AutofillField* cur_match = nullptr; AutofillField* cur_match = nullptr;
size_t saved_cursor = scanner->SaveCursor(); size_t saved_cursor = scanner->SaveCursor();
bool parsed_name = ParseFieldSpecifics( bool parsed_name = ParseFieldSpecifics(
scanner, pattern, match_type & ~MATCH_LABEL, &cur_match, logging); scanner, pattern, match_type & ~MATCH_LABEL, patterns, &cur_match,
logging, {.restrict_attributes = MATCH_NAME});
scanner->RewindTo(saved_cursor); scanner->RewindTo(saved_cursor);
bool parsed_label = ParseFieldSpecifics( bool parsed_label = ParseFieldSpecifics(
scanner, pattern, match_type & ~MATCH_NAME, &cur_match, logging); scanner, pattern, match_type & ~MATCH_NAME, patterns, &cur_match, logging,
{.restrict_attributes = MATCH_LABEL});
if (parsed_name && parsed_label) { if (parsed_name && parsed_label) {
if (match) if (match)
*match = cur_match; *match = cur_match;
...@@ -362,27 +449,31 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelSeparately( ...@@ -362,27 +449,31 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelSeparately(
return RESULT_MATCH_NONE; return RESULT_MATCH_NONE;
} }
bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner,
const std::string& page_language) {
// Simple cases. // Simple cases.
if (scanner->IsEnd()) if (scanner->IsEnd())
return false; return false;
if (city_ && state_ && zip_) if (city_ && state_ && zip_)
return false; return false;
if (state_ && zip_) if (state_ && zip_)
return ParseCity(scanner); return ParseCity(scanner, page_language);
if (city_ && zip_) if (city_ && zip_)
return ParseState(scanner); return ParseState(scanner, page_language);
if (city_ && state_) if (city_ && state_)
return ParseZipCode(scanner); return ParseZipCode(scanner, page_language);
// Check for matches to both name and label. // Check for matches to both name and label.
ParseNameLabelResult city_result = ParseNameAndLabelForCity(scanner); ParseNameLabelResult city_result =
ParseNameAndLabelForCity(scanner, page_language);
if (city_result == RESULT_MATCH_NAME_LABEL) if (city_result == RESULT_MATCH_NAME_LABEL)
return true; return true;
ParseNameLabelResult state_result = ParseNameAndLabelForState(scanner); ParseNameLabelResult state_result =
ParseNameAndLabelForState(scanner, page_language);
if (state_result == RESULT_MATCH_NAME_LABEL) if (state_result == RESULT_MATCH_NAME_LABEL)
return true; return true;
ParseNameLabelResult zip_result = ParseNameAndLabelForZipCode(scanner); ParseNameLabelResult zip_result =
ParseNameAndLabelForZipCode(scanner, page_language);
if (zip_result == RESULT_MATCH_NAME_LABEL) if (zip_result == RESULT_MATCH_NAME_LABEL)
return true; return true;
...@@ -395,7 +486,7 @@ bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { ...@@ -395,7 +486,7 @@ bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) {
if (maybe_state && !maybe_city && !maybe_zip) if (maybe_state && !maybe_city && !maybe_zip)
return SetFieldAndAdvanceCursor(scanner, &state_); return SetFieldAndAdvanceCursor(scanner, &state_);
if (maybe_zip && !maybe_city && !maybe_state) if (maybe_zip && !maybe_city && !maybe_state)
return ParseZipCode(scanner); return ParseZipCode(scanner, page_language);
// Otherwise give name priority over label. // Otherwise give name priority over label.
if (city_result == RESULT_MATCH_NAME) if (city_result == RESULT_MATCH_NAME)
...@@ -403,19 +494,21 @@ bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) { ...@@ -403,19 +494,21 @@ bool AddressField::ParseCityStateZipCode(AutofillScanner* scanner) {
if (state_result == RESULT_MATCH_NAME) if (state_result == RESULT_MATCH_NAME)
return SetFieldAndAdvanceCursor(scanner, &state_); return SetFieldAndAdvanceCursor(scanner, &state_);
if (zip_result == RESULT_MATCH_NAME) if (zip_result == RESULT_MATCH_NAME)
return ParseZipCode(scanner); return ParseZipCode(scanner, page_language);
if (city_result == RESULT_MATCH_LABEL) if (city_result == RESULT_MATCH_LABEL)
return SetFieldAndAdvanceCursor(scanner, &city_); return SetFieldAndAdvanceCursor(scanner, &city_);
if (state_result == RESULT_MATCH_LABEL) if (state_result == RESULT_MATCH_LABEL)
return SetFieldAndAdvanceCursor(scanner, &state_); return SetFieldAndAdvanceCursor(scanner, &state_);
if (zip_result == RESULT_MATCH_LABEL) if (zip_result == RESULT_MATCH_LABEL)
return ParseZipCode(scanner); return ParseZipCode(scanner, page_language);
return false; return false;
} }
bool AddressField::ParseCityStateCountryZipCode(AutofillScanner* scanner) { bool AddressField::ParseCityStateCountryZipCode(
AutofillScanner* scanner,
const std::string& page_language) {
// The |scanner| is not pointing at a field. // The |scanner| is not pointing at a field.
if (scanner->IsEnd()) if (scanner->IsEnd())
return false; return false;
...@@ -426,25 +519,29 @@ bool AddressField::ParseCityStateCountryZipCode(AutofillScanner* scanner) { ...@@ -426,25 +519,29 @@ bool AddressField::ParseCityStateCountryZipCode(AutofillScanner* scanner) {
// Exactly one field type is missing. // Exactly one field type is missing.
if (state_ && country_ && zip_) if (state_ && country_ && zip_)
return ParseCity(scanner); return ParseCity(scanner, page_language);
if (city_ && country_ && zip_) if (city_ && country_ && zip_)
return ParseState(scanner); return ParseState(scanner, page_language);
if (city_ && state_ && zip_) if (city_ && state_ && zip_)
return ParseCountry(scanner); return ParseCountry(scanner, page_language);
if (city_ && state_ && country_) if (city_ && state_ && country_)
return ParseZipCode(scanner); return ParseZipCode(scanner, page_language);
// Check for matches to both the name and the label. // Check for matches to both the name and the label.
ParseNameLabelResult city_result = ParseNameAndLabelForCity(scanner); ParseNameLabelResult city_result =
ParseNameAndLabelForCity(scanner, page_language);
if (city_result == RESULT_MATCH_NAME_LABEL) if (city_result == RESULT_MATCH_NAME_LABEL)
return true; return true;
ParseNameLabelResult state_result = ParseNameAndLabelForState(scanner); ParseNameLabelResult state_result =
ParseNameAndLabelForState(scanner, page_language);
if (state_result == RESULT_MATCH_NAME_LABEL) if (state_result == RESULT_MATCH_NAME_LABEL)
return true; return true;
ParseNameLabelResult country_result = ParseNameAndLabelForCountry(scanner); ParseNameLabelResult country_result =
ParseNameAndLabelForCountry(scanner, page_language);
if (country_result == RESULT_MATCH_NAME_LABEL) if (country_result == RESULT_MATCH_NAME_LABEL)
return true; return true;
ParseNameLabelResult zip_result = ParseNameAndLabelForZipCode(scanner); ParseNameLabelResult zip_result =
ParseNameAndLabelForZipCode(scanner, page_language);
if (zip_result == RESULT_MATCH_NAME_LABEL) if (zip_result == RESULT_MATCH_NAME_LABEL)
return true; return true;
...@@ -460,7 +557,7 @@ bool AddressField::ParseCityStateCountryZipCode(AutofillScanner* scanner) { ...@@ -460,7 +557,7 @@ bool AddressField::ParseCityStateCountryZipCode(AutofillScanner* scanner) {
if (maybe_country && !maybe_city && !maybe_state && !maybe_zip) if (maybe_country && !maybe_city && !maybe_state && !maybe_zip)
return SetFieldAndAdvanceCursor(scanner, &country_); return SetFieldAndAdvanceCursor(scanner, &country_);
if (maybe_zip && !maybe_city && !maybe_state && !maybe_country) if (maybe_zip && !maybe_city && !maybe_state && !maybe_country)
return ParseZipCode(scanner); return ParseZipCode(scanner, page_language);
// If there is a clash between the country and the state, set the type of // If there is a clash between the country and the state, set the type of
// the field to the country. // the field to the country.
...@@ -475,7 +572,7 @@ bool AddressField::ParseCityStateCountryZipCode(AutofillScanner* scanner) { ...@@ -475,7 +572,7 @@ bool AddressField::ParseCityStateCountryZipCode(AutofillScanner* scanner) {
if (country_result == RESULT_MATCH_NAME) if (country_result == RESULT_MATCH_NAME)
return SetFieldAndAdvanceCursor(scanner, &country_); return SetFieldAndAdvanceCursor(scanner, &country_);
if (zip_result == RESULT_MATCH_NAME) if (zip_result == RESULT_MATCH_NAME)
return ParseZipCode(scanner); return ParseZipCode(scanner, page_language);
if (city_result == RESULT_MATCH_LABEL) if (city_result == RESULT_MATCH_LABEL)
return SetFieldAndAdvanceCursor(scanner, &city_); return SetFieldAndAdvanceCursor(scanner, &city_);
...@@ -484,30 +581,38 @@ bool AddressField::ParseCityStateCountryZipCode(AutofillScanner* scanner) { ...@@ -484,30 +581,38 @@ bool AddressField::ParseCityStateCountryZipCode(AutofillScanner* scanner) {
if (country_result == RESULT_MATCH_LABEL) if (country_result == RESULT_MATCH_LABEL)
return SetFieldAndAdvanceCursor(scanner, &country_); return SetFieldAndAdvanceCursor(scanner, &country_);
if (zip_result == RESULT_MATCH_LABEL) if (zip_result == RESULT_MATCH_LABEL)
return ParseZipCode(scanner); return ParseZipCode(scanner, page_language);
return false; return false;
} }
AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode(
AutofillScanner* scanner) { AutofillScanner* scanner,
const std::string& page_language) {
if (zip_) if (zip_)
return RESULT_MATCH_NONE; return RESULT_MATCH_NONE;
// In JSON : ZIP_CODE
auto& patterns_z = PatternProvider::GetInstance().GetMatchPatterns(
"ZIP_CODE", page_language);
// In JSON :
auto& patterns_z4 =
PatternProvider::GetInstance().GetMatchPatterns("ZIP_4", page_language);
ParseNameLabelResult result = ParseNameAndLabelSeparately( ParseNameLabelResult result = ParseNameAndLabelSeparately(
scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, &zip_, scanner, UTF8ToUTF16(kZipCodeRe), kZipCodeMatchType, patterns_z, &zip_,
{log_manager_, "kZipCodeRe"}); {log_manager_, "kZipCodeRe"});
if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd()) if (result != RESULT_MATCH_NAME_LABEL || scanner->IsEnd())
return result; return result;
size_t saved_cursor = scanner->SaveCursor(); size_t saved_cursor = scanner->SaveCursor();
bool found_non_zip4 = ParseCity(scanner); bool found_non_zip4 = ParseCity(scanner, page_language);
if (found_non_zip4) if (found_non_zip4)
city_ = nullptr; city_ = nullptr;
scanner->RewindTo(saved_cursor); scanner->RewindTo(saved_cursor);
if (!found_non_zip4) { if (!found_non_zip4) {
found_non_zip4 = ParseState(scanner); found_non_zip4 = ParseState(scanner, page_language);
if (found_non_zip4) if (found_non_zip4)
state_ = nullptr; state_ = nullptr;
scanner->RewindTo(saved_cursor); scanner->RewindTo(saved_cursor);
...@@ -517,40 +622,55 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode( ...@@ -517,40 +622,55 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForZipCode(
// Look for a zip+4, whose field name will also often contain // Look for a zip+4, whose field name will also often contain
// the substring "zip". // the substring "zip".
ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType, ParseFieldSpecifics(scanner, UTF8ToUTF16(kZip4Re), kZipCodeMatchType,
&zip4_, {log_manager_, "kZip4Re"}); patterns_z4, &zip4_, {log_manager_, "kZip4Re"});
} }
return result; return result;
} }
AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity( AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCity(
AutofillScanner* scanner) { AutofillScanner* scanner,
const std::string& page_language) {
if (city_) if (city_)
return RESULT_MATCH_NONE; return RESULT_MATCH_NONE;
// In JSON : CITY
auto& patterns_city =
PatternProvider::GetInstance().GetMatchPatterns("CITY", page_language);
return ParseNameAndLabelSeparately(scanner, UTF8ToUTF16(kCityRe), return ParseNameAndLabelSeparately(scanner, UTF8ToUTF16(kCityRe),
kCityMatchType, &city_, kCityMatchType, patterns_city, &city_,
{log_manager_, "kCityRe"}); {log_manager_, "kCityRe"});
} }
AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState( AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForState(
AutofillScanner* scanner) { AutofillScanner* scanner,
const std::string& page_language) {
if (state_) if (state_)
return RESULT_MATCH_NONE; return RESULT_MATCH_NONE;
// In JSON : STATE
auto& patterns_state =
PatternProvider::GetInstance().GetMatchPatterns("STATE", page_language);
return ParseNameAndLabelSeparately(scanner, UTF8ToUTF16(kStateRe), return ParseNameAndLabelSeparately(scanner, UTF8ToUTF16(kStateRe),
kStateMatchType, &state_, kStateMatchType, patterns_state, &state_,
{log_manager_, "kStateRe"}); {log_manager_, "kStateRe"});
} }
AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCountry( AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCountry(
AutofillScanner* scanner) { AutofillScanner* scanner,
const std::string& page_language) {
if (country_) if (country_)
return RESULT_MATCH_NONE; return RESULT_MATCH_NONE;
ParseNameLabelResult country_result = // In JSON : COUNTRY
ParseNameAndLabelSeparately(scanner, UTF8ToUTF16(kCountryRe), auto& patterns_c =
MATCH_DEFAULT | MATCH_SELECT | MATCH_SEARCH, PatternProvider::GetInstance().GetMatchPatterns("COUNTRY", page_language);
&country_, {log_manager_, "kCountryRe"}); auto& patterns_cl = PatternProvider::GetInstance().GetMatchPatterns(
"COUNTRY_LOCATION", page_language);
ParseNameLabelResult country_result = ParseNameAndLabelSeparately(
scanner, UTF8ToUTF16(kCountryRe),
MATCH_DEFAULT | MATCH_SELECT | MATCH_SEARCH, patterns_c, &country_,
{log_manager_, "kCountryRe"});
if (country_result != RESULT_MATCH_NONE) if (country_result != RESULT_MATCH_NONE)
return country_result; return country_result;
...@@ -558,8 +678,8 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCountry( ...@@ -558,8 +678,8 @@ AddressField::ParseNameLabelResult AddressField::ParseNameAndLabelForCountry(
// "location". However, this only makes sense for select tags. // "location". However, this only makes sense for select tags.
return ParseNameAndLabelSeparately( return ParseNameAndLabelSeparately(
scanner, UTF8ToUTF16(kCountryLocationRe), scanner, UTF8ToUTF16(kCountryLocationRe),
MATCH_LABEL | MATCH_NAME | MATCH_SELECT | MATCH_SEARCH, &country_, MATCH_LABEL | MATCH_NAME | MATCH_SELECT | MATCH_SEARCH, patterns_cl,
{log_manager_, "kCountryLocationRe"}); &country_, {log_manager_, "kCountryLocationRe"});
} }
} // namespace autofill } // namespace autofill
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "base/strings/string16.h" #include "base/strings/string16.h"
#include "components/autofill/core/browser/autofill_type.h" #include "components/autofill/core/browser/autofill_type.h"
#include "components/autofill/core/browser/form_parsing/form_field.h" #include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill { namespace autofill {
...@@ -53,45 +54,77 @@ class AddressField : public FormField { ...@@ -53,45 +54,77 @@ class AddressField : public FormField {
explicit AddressField(LogManager* log_manager); explicit AddressField(LogManager* log_manager);
bool ParseCompany(AutofillScanner* scanner); bool ParseCompany(AutofillScanner* scanner, const std::string& page_language);
bool ParseAddress(AutofillScanner* scanner);
bool ParseAddressFieldSequence(AutofillScanner* scanner); bool ParseAddress(AutofillScanner* scanner, const std::string& page_language);
bool ParseAddressLines(AutofillScanner* scanner);
bool ParseCountry(AutofillScanner* scanner); bool ParseAddressFieldSequence(AutofillScanner* scanner,
bool ParseZipCode(AutofillScanner* scanner); const std::string& page_language);
bool ParseCity(AutofillScanner* scanner);
bool ParseState(AutofillScanner* scanner); bool ParseAddressLines(AutofillScanner* scanner,
const std::string& page_language);
bool ParseCountry(AutofillScanner* scanner, const std::string& page_language);
bool ParseZipCode(AutofillScanner* scanner, const std::string& page_language);
bool ParseCity(AutofillScanner* scanner, const std::string& page_language);
bool ParseState(AutofillScanner* scanner, const std::string& page_language);
// Parses the current field pointed to by |scanner|, if it exists, and tries // Parses the current field pointed to by |scanner|, if it exists, and tries
// to figure out whether the field's type: city, state, zip, or none of those. // to figure out whether the field's type: city, state, zip, or none of those.
// TODO(crbug.com/1073555) Delete this once experiment // TODO(crbug.com/1073555) Delete this once experiment
// |kAutofillUseParseCityStateCountryZipCodeInHeuristic| has been launched. // |kAutofillUseParseCityStateCountryZipCodeInHeuristic| has been launched.
bool ParseCityStateZipCode(AutofillScanner* scanner); bool ParseCityStateZipCode(AutofillScanner* scanner,
const std::string& page_language);
// Parses the current field pointed to by |scanner|, if it exists, and tries // Parses the current field pointed to by |scanner|, if it exists, and tries
// to figure out whether the field's type: city, state, country, zip, or // to figure out whether the field's type: city, state, country, zip, or
// none of those. // none of those.
bool ParseCityStateCountryZipCode(AutofillScanner* scanner); bool ParseCityStateCountryZipCode(AutofillScanner* scanner,
const std::string& page_language);
// Like ParseFieldSpecifics(), but applies |pattern| against the name and // Like ParseFieldSpecifics(), but applies |pattern| against the name and
// label of the current field separately. If the return value is // label of the current field separately. If the return value is
// RESULT_MATCH_NAME_LABEL, then |scanner| advances and |match| is filled if // RESULT_MATCH_NAME_LABEL, then |scanner| advances and |match| is filled if
// it is non-NULL. Otherwise |scanner| does not advance and |match| does not // it is non-NULL. Otherwise |scanner| does not advance and |match| does not
// change. // change.
// ParseNameLabelResult ParseNameAndLabelSeparately(
// AutofillScanner* scanner,
// const base::string16& pattern,
// int match_type,
// AutofillField** match,
// const RegExLogging& logging);
// New version of function above using new structure MatchingPattern and
// PatternProvider.
ParseNameLabelResult ParseNameAndLabelSeparately( ParseNameLabelResult ParseNameAndLabelSeparately(
AutofillScanner* scanner, AutofillScanner* scanner,
const base::string16& pattern, const base::string16& pattern,
int match_type, int match_type,
const std::vector<MatchingPattern>& patterns,
AutofillField** match, AutofillField** match,
const RegExLogging& logging); const RegExLogging& logging);
// Run matches on the name and label separately. If the return result is // Run matches on the name and label separately. If the return result is
// RESULT_MATCH_NAME_LABEL, then |scanner| advances and the field is set. // RESULT_MATCH_NAME_LABEL, then |scanner| advances and the field is set.
// Otherwise |scanner| rewinds and the field is cleared. // Otherwise |scanner| rewinds and the field is cleared.
ParseNameLabelResult ParseNameAndLabelForZipCode(AutofillScanner* scanner); ParseNameLabelResult ParseNameAndLabelForZipCode(
ParseNameLabelResult ParseNameAndLabelForCity(AutofillScanner* scanner); AutofillScanner* scanner,
ParseNameLabelResult ParseNameAndLabelForCountry(AutofillScanner* scanner); const std::string& page_language);
ParseNameLabelResult ParseNameAndLabelForState(AutofillScanner* scanner);
ParseNameLabelResult ParseNameAndLabelForCity(
AutofillScanner* scanner,
const std::string& page_language);
ParseNameLabelResult ParseNameAndLabelForCountry(
AutofillScanner* scanner,
const std::string& page_language);
ParseNameLabelResult ParseNameAndLabelForState(
AutofillScanner* scanner,
const std::string& page_language);
LogManager* log_manager_; LogManager* log_manager_;
AutofillField* company_ = nullptr; AutofillField* company_ = nullptr;
......
...@@ -92,16 +92,28 @@ std::unique_ptr<FormField> CreditCardField::Parse( ...@@ -92,16 +92,28 @@ std::unique_ptr<FormField> CreditCardField::Parse(
size_t saved_cursor = scanner->SaveCursor(); size_t saved_cursor = scanner->SaveCursor();
int nb_unknown_fields = 0; int nb_unknown_fields = 0;
auto& patterns = PatternProvider::GetInstance().GetMatchPatterns(
"NAME_ON_CARD", page_language);
// In JSON : NAME_ON_CARD_CONTEXTUAL
auto& patterns_cont = PatternProvider::GetInstance().GetMatchPatterns(
"NAME_ON_CARD_CONTEXTUAL", page_language);
// In JSON : LAST_NAME
auto& patterns_nl = PatternProvider::GetInstance().GetMatchPatterns(
"LAST_NAME", page_language);
// In JSON : CARD_CVC
auto& patterns_cvc = PatternProvider::GetInstance().GetMatchPatterns(
CREDIT_CARD_VERIFICATION_CODE, page_language);
// Credit card fields can appear in many different orders. // Credit card fields can appear in many different orders.
// We loop until no more credit card related fields are found, see |break| at // We loop until no more credit card related fields are found, see |break| at
// the bottom of the loop. // the bottom of the loop.
for (int fields = 0; !scanner->IsEnd(); ++fields) { for (int fields = 0; !scanner->IsEnd(); ++fields) {
// Ignore gift card fields. // Ignore gift card fields.
if (IsGiftCardField(scanner, log_manager)) if (IsGiftCardField(scanner, log_manager, page_language))
break; break;
if (!credit_card_field->cardholder_) { if (!credit_card_field->cardholder_) {
if (ParseField(scanner, base::UTF8ToUTF16(kNameOnCardRe), if (ParseField(scanner, base::UTF8ToUTF16(kNameOnCardRe), patterns,
&credit_card_field->cardholder_, &credit_card_field->cardholder_,
{log_manager, "kNameOnCardRe"})) { {log_manager, "kNameOnCardRe"})) {
continue; continue;
...@@ -113,9 +125,10 @@ std::unique_ptr<FormField> CreditCardField::Parse( ...@@ -113,9 +125,10 @@ std::unique_ptr<FormField> CreditCardField::Parse(
// fields. So we search for "name" only when we've already parsed at // fields. So we search for "name" only when we've already parsed at
// least one other credit card field and haven't yet parsed the // least one other credit card field and haven't yet parsed the
// expiration date (which usually appears at the end). // expiration date (which usually appears at the end).
if (fields > 0 && !credit_card_field->expiration_month_ && if (fields > 0 && !credit_card_field->expiration_month_ &&
ParseField(scanner, base::UTF8ToUTF16(kNameOnCardContextualRe), ParseField(scanner, base::UTF8ToUTF16(kNameOnCardContextualRe),
&credit_card_field->cardholder_, patterns_cont, &credit_card_field->cardholder_,
{log_manager, "kNameOnCardContextualRe"})) { {log_manager, "kNameOnCardContextualRe"})) {
continue; continue;
} }
...@@ -125,7 +138,7 @@ std::unique_ptr<FormField> CreditCardField::Parse( ...@@ -125,7 +138,7 @@ std::unique_ptr<FormField> CreditCardField::Parse(
// and haven't yet parsed the expiration date (which usually appears at // and haven't yet parsed the expiration date (which usually appears at
// the end). // the end).
if (!credit_card_field->expiration_month_ && if (!credit_card_field->expiration_month_ &&
ParseField(scanner, base::UTF8ToUTF16(kLastNameRe), ParseField(scanner, base::UTF8ToUTF16(kLastNameRe), patterns_nl,
&credit_card_field->cardholder_last_, &credit_card_field->cardholder_last_,
{log_manager, "kLastNameRe"})) { {log_manager, "kLastNameRe"})) {
continue; continue;
...@@ -150,10 +163,12 @@ std::unique_ptr<FormField> CreditCardField::Parse( ...@@ -150,10 +163,12 @@ std::unique_ptr<FormField> CreditCardField::Parse(
// They also sometimes use type="password" for sensitive types. // They also sometimes use type="password" for sensitive types.
const int kMatchNumTelAndPwd = const int kMatchNumTelAndPwd =
MATCH_DEFAULT | MATCH_NUMBER | MATCH_TELEPHONE | MATCH_PASSWORD; MATCH_DEFAULT | MATCH_NUMBER | MATCH_TELEPHONE | MATCH_PASSWORD;
if (!credit_card_field->verification_ && if (!credit_card_field->verification_ &&
ParseFieldSpecifics( ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kCardCvcRe),
scanner, base::UTF8ToUTF16(kCardCvcRe), kMatchNumTelAndPwd, kMatchNumTelAndPwd, patterns_cvc,
&credit_card_field->verification_, {log_manager, "kCardCvcRe"})) { &credit_card_field->verification_,
{log_manager, "kCardCvcRe"})) {
// A couple of sites have multiple verification codes right after another. // A couple of sites have multiple verification codes right after another.
// Allow the classification of these codes one by one. // Allow the classification of these codes one by one.
AutofillField* const saved_cvv = credit_card_field->verification_; AutofillField* const saved_cvv = credit_card_field->verification_;
...@@ -165,8 +180,9 @@ std::unique_ptr<FormField> CreditCardField::Parse( ...@@ -165,8 +180,9 @@ std::unique_ptr<FormField> CreditCardField::Parse(
!credit_card_field->cardholder_ && scanner->SaveCursor() > 1) { !credit_card_field->cardholder_ && scanner->SaveCursor() > 1) {
// Check if the previous field was a verification code. // Check if the previous field was a verification code.
scanner->RewindTo(scanner->SaveCursor() - 2); scanner->RewindTo(scanner->SaveCursor() - 2);
if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kCardCvcRe), if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kCardCvcRe),
kMatchNumTelAndPwd, kMatchNumTelAndPwd, patterns_cvc,
&credit_card_field->verification_, &credit_card_field->verification_,
{log_manager, "kCardCvcRe"})) { {log_manager, "kCardCvcRe"})) {
// Reset the current cvv (The verification parse overwrote it). // Reset the current cvv (The verification parse overwrote it).
...@@ -189,8 +205,10 @@ std::unique_ptr<FormField> CreditCardField::Parse( ...@@ -189,8 +205,10 @@ std::unique_ptr<FormField> CreditCardField::Parse(
// TODO(crbug.com/591816): Make sure parsing cc-numbers of type password // TODO(crbug.com/591816): Make sure parsing cc-numbers of type password
// doesn't have bad side effects. // doesn't have bad side effects.
AutofillField* current_number_field; AutofillField* current_number_field;
auto& patterns = PatternProvider::GetInstance().GetMatchPatterns(
CREDIT_CARD_NUMBER, page_language);
if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kCardNumberRe), if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kCardNumberRe),
kMatchNumTelAndPwd, &current_number_field, kMatchNumTelAndPwd, patterns, &current_number_field,
{log_manager, "kCardNumberRe"})) { {log_manager, "kCardNumberRe"})) {
// Avoid autofilling any credit card number field having very low or high // Avoid autofilling any credit card number field having very low or high
// |start_index| on the HTML form. // |start_index| on the HTML form.
...@@ -215,7 +233,8 @@ std::unique_ptr<FormField> CreditCardField::Parse( ...@@ -215,7 +233,8 @@ std::unique_ptr<FormField> CreditCardField::Parse(
continue; continue;
} }
if (credit_card_field->ParseExpirationDate(scanner, log_manager)) { if (credit_card_field->ParseExpirationDate(scanner, log_manager,
page_language)) {
nb_unknown_fields = 0; nb_unknown_fields = 0;
continue; continue;
} }
...@@ -311,8 +330,10 @@ bool CreditCardField::LikelyCardMonthSelectField(AutofillScanner* scanner) { ...@@ -311,8 +330,10 @@ bool CreditCardField::LikelyCardMonthSelectField(AutofillScanner* scanner) {
} }
// static // static
bool CreditCardField::LikelyCardYearSelectField(AutofillScanner* scanner, bool CreditCardField::LikelyCardYearSelectField(
LogManager* log_manager) { AutofillScanner* scanner,
LogManager* log_manager,
const std::string& page_language) {
if (scanner->IsEnd()) if (scanner->IsEnd())
return false; return false;
...@@ -331,9 +352,12 @@ bool CreditCardField::LikelyCardYearSelectField(AutofillScanner* scanner, ...@@ -331,9 +352,12 @@ bool CreditCardField::LikelyCardYearSelectField(AutofillScanner* scanner,
} }
// Another way to eliminate days - filter out 'day' fields. // Another way to eliminate days - filter out 'day' fields.
// In JSON : DAY (only in JSON)
auto& patterns_day =
PatternProvider::GetInstance().GetMatchPatterns("DAY", page_language);
if (FormField::ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kDayRe), if (FormField::ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kDayRe),
MATCH_DEFAULT | MATCH_SELECT, nullptr, MATCH_DEFAULT | MATCH_SELECT, patterns_day,
{log_manager, "kDayRe"})) { nullptr, {log_manager, "kDayRe"})) {
return false; return false;
} }
...@@ -389,28 +413,40 @@ bool CreditCardField::LikelyCardTypeSelectField(AutofillScanner* scanner) { ...@@ -389,28 +413,40 @@ bool CreditCardField::LikelyCardTypeSelectField(AutofillScanner* scanner) {
// static // static
bool CreditCardField::IsGiftCardField(AutofillScanner* scanner, bool CreditCardField::IsGiftCardField(AutofillScanner* scanner,
LogManager* log_manager) { LogManager* log_manager,
const std::string& page_language) {
if (scanner->IsEnd()) if (scanner->IsEnd())
return false; return false;
const int kMatchFieldTypes = const int kMatchFieldTypes =
MATCH_DEFAULT | MATCH_NUMBER | MATCH_TELEPHONE | MATCH_SEARCH; MATCH_DEFAULT | MATCH_NUMBER | MATCH_TELEPHONE | MATCH_SEARCH;
size_t saved_cursor = scanner->SaveCursor(); size_t saved_cursor = scanner->SaveCursor();
// In JSON : DEBIT_CARD (only in JSON)
auto& patterns_d = PatternProvider::GetInstance().GetMatchPatterns(
"DEBIT_CARD", page_language);
// In JSON : DEBIT_GIFT_CARD (only in JSON)
auto& patterns_dg = PatternProvider::GetInstance().GetMatchPatterns(
"DEBIT_GIFT_CARD", page_language);
// In JSON : GIFT_CARD (only in JSON)
auto& patterns_g = PatternProvider::GetInstance().GetMatchPatterns(
"GIFT_CARD", page_language);
if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kDebitCardRe), if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kDebitCardRe),
kMatchFieldTypes, nullptr, kMatchFieldTypes, patterns_d, nullptr,
{log_manager, "kDebitCardRe"})) { {log_manager, "kDebitCardRe"})) {
scanner->RewindTo(saved_cursor); scanner->RewindTo(saved_cursor);
return false; return false;
} }
if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kDebitGiftCardRe), if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kDebitGiftCardRe),
kMatchFieldTypes, nullptr, kMatchFieldTypes, patterns_dg, nullptr,
{log_manager, "kDebitGiftCardRe"})) { {log_manager, "kDebitGiftCardRe"})) {
scanner->RewindTo(saved_cursor); scanner->RewindTo(saved_cursor);
return false; return false;
} }
return ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kGiftCardRe), return ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kGiftCardRe),
kMatchFieldTypes, nullptr, kMatchFieldTypes, patterns_g, nullptr,
{log_manager, "kGiftCardRe"}); {log_manager, "kGiftCardRe"});
} }
...@@ -467,7 +503,8 @@ void CreditCardField::AddClassifications( ...@@ -467,7 +503,8 @@ void CreditCardField::AddClassifications(
} }
bool CreditCardField::ParseExpirationDate(AutofillScanner* scanner, bool CreditCardField::ParseExpirationDate(AutofillScanner* scanner,
LogManager* log_manager) { LogManager* log_manager,
const std::string& page_language) {
if (!expiration_date_ && base::LowerCaseEqualsASCII( if (!expiration_date_ && base::LowerCaseEqualsASCII(
scanner->Cursor()->form_control_type, "month")) { scanner->Cursor()->form_control_type, "month")) {
expiration_date_ = scanner->Cursor(); expiration_date_ = scanner->Cursor();
...@@ -487,7 +524,7 @@ bool CreditCardField::ParseExpirationDate(AutofillScanner* scanner, ...@@ -487,7 +524,7 @@ bool CreditCardField::ParseExpirationDate(AutofillScanner* scanner,
if (LikelyCardMonthSelectField(scanner)) { if (LikelyCardMonthSelectField(scanner)) {
expiration_month_ = scanner->Cursor(); expiration_month_ = scanner->Cursor();
scanner->Advance(); scanner->Advance();
if (LikelyCardYearSelectField(scanner, log_manager)) { if (LikelyCardYearSelectField(scanner, log_manager, page_language)) {
expiration_year_ = scanner->Cursor(); expiration_year_ = scanner->Cursor();
scanner->Advance(); scanner->Advance();
return true; return true;
...@@ -500,11 +537,23 @@ bool CreditCardField::ParseExpirationDate(AutofillScanner* scanner, ...@@ -500,11 +537,23 @@ bool CreditCardField::ParseExpirationDate(AutofillScanner* scanner,
scanner->RewindTo(month_year_saved_cursor); scanner->RewindTo(month_year_saved_cursor);
const int kMatchCCType = MATCH_DEFAULT | MATCH_NUMBER | MATCH_TELEPHONE | const int kMatchCCType = MATCH_DEFAULT | MATCH_NUMBER | MATCH_TELEPHONE |
MATCH_SELECT | MATCH_SEARCH; MATCH_SELECT | MATCH_SEARCH;
// In JSON : CARD_EXP_MONTH
auto& patterns_m = PatternProvider::GetInstance().GetMatchPatterns(
CREDIT_CARD_EXP_MONTH, page_language);
// In JSON : CARD_EXP_YEAR
auto& patterns_y = PatternProvider::GetInstance().GetMatchPatterns(
"CREDIT_CARD_EXP_YEAR", page_language);
auto& patterns_mm = PatternProvider::GetInstance().GetMatchPatterns(
"CREDIT_CARD_EXP_MONTH_BEFORE_YEAR", page_language);
auto& patterns_yy = PatternProvider::GetInstance().GetMatchPatterns(
"CREDIT_CARD_EXP_YEAR_AFTER_MONTH", page_language);
if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kExpirationMonthRe), if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kExpirationMonthRe),
kMatchCCType, &expiration_month_, kMatchCCType, patterns_m, &expiration_month_,
{log_manager_, "kExpirationMonthRe"}) && {log_manager_, "kExpirationMonthRe"}) &&
ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kExpirationYearRe), ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kExpirationYearRe),
kMatchCCType, &expiration_year_, kMatchCCType, patterns_y, &expiration_year_,
{log_manager_, "kExpirationYearRe"})) { {log_manager_, "kExpirationYearRe"})) {
return true; return true;
} }
...@@ -512,9 +561,10 @@ bool CreditCardField::ParseExpirationDate(AutofillScanner* scanner, ...@@ -512,9 +561,10 @@ bool CreditCardField::ParseExpirationDate(AutofillScanner* scanner,
// If that fails, look for just MM and/or YY(YY). // If that fails, look for just MM and/or YY(YY).
scanner->RewindTo(month_year_saved_cursor); scanner->RewindTo(month_year_saved_cursor);
if (ParseFieldSpecifics(scanner, base::ASCIIToUTF16("^mm$"), kMatchCCType, if (ParseFieldSpecifics(scanner, base::ASCIIToUTF16("^mm$"), kMatchCCType,
&expiration_month_, {log_manager_, "^mm$"}) && patterns_mm, &expiration_month_,
{log_manager_, "^mm$"}) &&
ParseFieldSpecifics(scanner, base::ASCIIToUTF16("^(yy|yyyy)$"), ParseFieldSpecifics(scanner, base::ASCIIToUTF16("^(yy|yyyy)$"),
kMatchCCType, &expiration_year_, kMatchCCType, patterns_yy, &expiration_year_,
{log_manager_, "^(yy|yyyy)$"})) { {log_manager_, "^(yy|yyyy)$"})) {
return true; return true;
} }
...@@ -530,17 +580,23 @@ bool CreditCardField::ParseExpirationDate(AutofillScanner* scanner, ...@@ -530,17 +580,23 @@ bool CreditCardField::ParseExpirationDate(AutofillScanner* scanner,
return false; return false;
// Try to look for a 2-digit year expiration date. // Try to look for a 2-digit year expiration date.
if (ParseFieldSpecifics( // In JSON : CARD_EXP_DATE_2_DIGIT_YEAR
scanner, base::UTF8ToUTF16(kExpirationDate2DigitYearRe), kMatchCCType, auto& patterns_2dy = PatternProvider::GetInstance().GetMatchPatterns(
&expiration_date_, {log_manager_, "kExpirationDate2DigitYearRe"})) { CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR, page_language);
if (ParseFieldSpecifics(scanner,
base::UTF8ToUTF16(kExpirationDate2DigitYearRe),
kMatchCCType, patterns_2dy, &expiration_date_,
{log_manager_, "kExpirationDate2DigitYearRe"})) {
exp_year_type_ = CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR; exp_year_type_ = CREDIT_CARD_EXP_DATE_2_DIGIT_YEAR;
expiration_month_ = nullptr; expiration_month_ = nullptr;
return true; return true;
} }
// Try to look for a generic expiration date field. (2 or 4 digit year) // Try to look for a generic expiration date field. (2 or 4 digit year)
auto& patterns_exp_d = PatternProvider::GetInstance().GetMatchPatterns(
"CREDIT_CARD_EXP_DATE", page_language);
if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kExpirationDateRe), if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kExpirationDateRe),
kMatchCCType, &expiration_date_, kMatchCCType, patterns_exp_d, &expiration_date_,
{log_manager_, "kExpirationDateRe"})) { {log_manager_, "kExpirationDateRe"})) {
// If such a field exists, but it cannot fit a 4-digit year expiration // If such a field exists, but it cannot fit a 4-digit year expiration
// date, then the likely possibility is that it is a 2-digit year expiration // date, then the likely possibility is that it is a 2-digit year expiration
...@@ -554,11 +610,14 @@ bool CreditCardField::ParseExpirationDate(AutofillScanner* scanner, ...@@ -554,11 +610,14 @@ bool CreditCardField::ParseExpirationDate(AutofillScanner* scanner,
} }
// Try to look for a 4-digit year expiration date. // Try to look for a 4-digit year expiration date.
auto& patterns_4dy = PatternProvider::GetInstance().GetMatchPatterns(
CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR, page_language);
if (FieldCanFitDataForFieldType(current_field_max_length, if (FieldCanFitDataForFieldType(current_field_max_length,
CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR) && CREDIT_CARD_EXP_DATE_4_DIGIT_YEAR) &&
ParseFieldSpecifics( ParseFieldSpecifics(scanner,
scanner, base::UTF8ToUTF16(kExpirationDate4DigitYearRe), kMatchCCType, base::UTF8ToUTF16(kExpirationDate4DigitYearRe),
&expiration_date_, {log_manager_, "kExpirationDate4DigitYearRe"})) { kMatchCCType, patterns_4dy, &expiration_date_,
{log_manager_, "kExpirationDate4DigitYearRe"})) {
expiration_month_ = nullptr; expiration_month_ = nullptr;
return true; return true;
} }
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "base/macros.h" #include "base/macros.h"
#include "components/autofill/core/browser/autofill_type.h" #include "components/autofill/core/browser/autofill_type.h"
#include "components/autofill/core/browser/form_parsing/form_field.h" #include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill { namespace autofill {
...@@ -42,7 +43,8 @@ class CreditCardField : public FormField { ...@@ -42,7 +43,8 @@ class CreditCardField : public FormField {
// the next few years. |log_manager| is used to log any parsing details // the next few years. |log_manager| is used to log any parsing details
// to chrome://autofill-internals // to chrome://autofill-internals
static bool LikelyCardYearSelectField(AutofillScanner* scanner, static bool LikelyCardYearSelectField(AutofillScanner* scanner,
LogManager* log_manager); LogManager* log_manager,
const std::string& page_language);
// Returns true if |scanner| points to a <select> field that contains credit // Returns true if |scanner| points to a <select> field that contains credit
// card type options. // card type options.
...@@ -53,11 +55,14 @@ class CreditCardField : public FormField { ...@@ -53,11 +55,14 @@ class CreditCardField : public FormField {
// Prepaid debit cards do not count as gift cards, since they can be used like // Prepaid debit cards do not count as gift cards, since they can be used like
// a credit card. // a credit card.
static bool IsGiftCardField(AutofillScanner* scanner, static bool IsGiftCardField(AutofillScanner* scanner,
LogManager* log_manager); LogManager* log_manager,
const std::string& page_language);
// Parses the expiration month/year/date fields. Returns true if it finds // Parses the expiration month/year/date fields. Returns true if it finds
// something new. // something new.
bool ParseExpirationDate(AutofillScanner* scanner, LogManager* log_manager); bool ParseExpirationDate(AutofillScanner* scanner,
LogManager* log_manager,
const std::string& page_language);
// For the combined expiration field we return |exp_year_type_|; otherwise if // For the combined expiration field we return |exp_year_type_|; otherwise if
// |expiration_year_| is having year with |max_length| of 2-digits we return // |expiration_year_| is having year with |max_length| of 2-digits we return
......
...@@ -15,8 +15,10 @@ std::unique_ptr<FormField> EmailField::Parse(AutofillScanner* scanner, ...@@ -15,8 +15,10 @@ std::unique_ptr<FormField> EmailField::Parse(AutofillScanner* scanner,
const std::string& page_language, const std::string& page_language,
LogManager* log_manager) { LogManager* log_manager) {
AutofillField* field; AutofillField* field;
auto& patterns = PatternProvider::GetInstance().GetMatchPatterns(
"EMAIL_ADDRESS", page_language);
if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kEmailRe), if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kEmailRe),
MATCH_DEFAULT | MATCH_EMAIL, &field, MATCH_DEFAULT | MATCH_EMAIL, patterns, &field,
{log_manager, "kEmailRe"})) { {log_manager, "kEmailRe"})) {
return std::make_unique<EmailField>(field); return std::make_unique<EmailField>(field);
} }
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "base/compiler_specific.h" #include "base/compiler_specific.h"
#include "base/macros.h" #include "base/macros.h"
#include "components/autofill/core/browser/form_parsing/form_field.h" #include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill { namespace autofill {
......
...@@ -168,6 +168,22 @@ bool FormField::ParseField(AutofillScanner* scanner, ...@@ -168,6 +168,22 @@ bool FormField::ParseField(AutofillScanner* scanner,
return ParseFieldSpecifics(scanner, patterns, match, logging); return ParseFieldSpecifics(scanner, patterns, match, logging);
} }
bool FormField::ParseField(AutofillScanner* scanner,
const base::string16& pattern,
const std::vector<MatchingPattern>& patterns,
AutofillField** match,
const RegExLogging& logging) {
if (base::FeatureList::IsEnabled(
features::kAutofillUsePageLanguageToSelectFieldParsingPatterns) ||
base::FeatureList::IsEnabled(
features::
kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
return ParseField(scanner, patterns, match, logging);
} else {
return ParseField(scanner, pattern, match, logging);
}
}
bool FormField::ParseFieldSpecifics(AutofillScanner* scanner, bool FormField::ParseFieldSpecifics(AutofillScanner* scanner,
const base::string16& pattern, const base::string16& pattern,
int match_field_attributes, int match_field_attributes,
...@@ -238,6 +254,38 @@ bool FormField::ParseFieldSpecifics(AutofillScanner* scanner, ...@@ -238,6 +254,38 @@ bool FormField::ParseFieldSpecifics(AutofillScanner* scanner,
match_field_types, match, logging); match_field_types, match, logging);
} }
bool FormField::ParseFieldSpecifics(
AutofillScanner* scanner,
const base::string16& pattern,
int match_type,
const std::vector<MatchingPattern>& patterns,
AutofillField** match,
const RegExLogging& logging,
MatchFieldBitmasks match_field_bitmasks) {
if (base::FeatureList::IsEnabled(
features::kAutofillUsePageLanguageToSelectFieldParsingPatterns) ||
base::FeatureList::IsEnabled(
features::
kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
// TODO(crbug/1142936): This hack is to allow
// AddressField::ParseNameAndLabelSeparately().
if (match_field_bitmasks.restrict_attributes != ~0 ||
match_field_bitmasks.augment_types != 0) {
std::vector<MatchingPattern> patterns_with_restricted_match_type =
patterns;
for (MatchingPattern& mp : patterns_with_restricted_match_type) {
mp.match_field_attributes &= match_field_bitmasks.restrict_attributes;
mp.match_field_input_types |= match_field_bitmasks.augment_types;
}
return ParseFieldSpecifics(scanner, patterns_with_restricted_match_type,
match, logging);
}
return ParseFieldSpecifics(scanner, patterns, match, logging);
} else {
return ParseFieldSpecifics(scanner, pattern, match_type, match, logging);
}
}
// static // static
bool FormField::ParseEmptyLabel(AutofillScanner* scanner, bool FormField::ParseEmptyLabel(AutofillScanner* scanner,
AutofillField** match) { AutofillField** match) {
......
...@@ -72,6 +72,12 @@ class FormField { ...@@ -72,6 +72,12 @@ class FormField {
AutofillField** match, AutofillField** match,
const RegExLogging& logging = {}); const RegExLogging& logging = {});
static bool ParseField(AutofillScanner* scanner,
const base::string16& pattern,
const std::vector<MatchingPattern>& patterns,
AutofillField** match,
const RegExLogging& logging = {});
// Parses the stream of fields in |scanner| with regular expression |pattern| // Parses the stream of fields in |scanner| with regular expression |pattern|
// as specified in the |match_type| bit field (see |MatchType|). If |match| // as specified in the |match_type| bit field (see |MatchType|). If |match|
// is non-NULL and the pattern matches, |match| will be set to the matched // is non-NULL and the pattern matches, |match| will be set to the matched
...@@ -96,6 +102,20 @@ class FormField { ...@@ -96,6 +102,20 @@ class FormField {
int match_field_input_types, int match_field_input_types,
AutofillField** match, AutofillField** match,
const RegExLogging& logging = {}); const RegExLogging& logging = {});
struct MatchFieldBitmasks {
int restrict_attributes = ~0;
int augment_types = 0;
};
static bool ParseFieldSpecifics(AutofillScanner* scanner,
const base::string16& pattern,
int match_type,
const std::vector<MatchingPattern>& patterns,
AutofillField** match,
const RegExLogging& logging,
MatchFieldBitmasks match_field_bitmasks = {
.restrict_attributes = ~0,
.augment_types = 0});
// Attempts to parse a field with an empty label. Returns true // Attempts to parse a field with an empty label. Returns true
// on success and fills |match| with a pointer to the field. // on success and fills |match| with a pointer to the field.
......
...@@ -24,6 +24,7 @@ namespace { ...@@ -24,6 +24,7 @@ namespace {
class FullNameField : public NameField { class FullNameField : public NameField {
public: public:
static std::unique_ptr<FullNameField> Parse(AutofillScanner* scanner, static std::unique_ptr<FullNameField> Parse(AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager); LogManager* log_manager);
explicit FullNameField(AutofillField* field); explicit FullNameField(AutofillField* field);
...@@ -42,9 +43,12 @@ class FirstTwoLastNamesField : public NameField { ...@@ -42,9 +43,12 @@ class FirstTwoLastNamesField : public NameField {
public: public:
static std::unique_ptr<FirstTwoLastNamesField> ParseComponentNames( static std::unique_ptr<FirstTwoLastNamesField> ParseComponentNames(
AutofillScanner* scanner, AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager);
static std::unique_ptr<FirstTwoLastNamesField> Parse(
AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager); LogManager* log_manager);
static std::unique_ptr<FirstTwoLastNamesField> Parse(AutofillScanner* scanner,
LogManager* log_manager);
protected: protected:
void AddClassifications(FieldCandidatesMap* field_candidates) const override; void AddClassifications(FieldCandidatesMap* field_candidates) const override;
...@@ -67,12 +71,16 @@ class FirstLastNameField : public NameField { ...@@ -67,12 +71,16 @@ class FirstLastNameField : public NameField {
public: public:
static std::unique_ptr<FirstLastNameField> ParseSpecificName( static std::unique_ptr<FirstLastNameField> ParseSpecificName(
AutofillScanner* scanner, AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager); LogManager* log_manager);
static std::unique_ptr<FirstLastNameField> ParseComponentNames( static std::unique_ptr<FirstLastNameField> ParseComponentNames(
AutofillScanner* scanner, AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager);
static std::unique_ptr<FirstLastNameField> Parse(
AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager); LogManager* log_manager);
static std::unique_ptr<FirstLastNameField> Parse(AutofillScanner* scanner,
LogManager* log_manager);
protected: protected:
void AddClassifications(FieldCandidatesMap* field_candidates) const override; void AddClassifications(FieldCandidatesMap* field_candidates) const override;
...@@ -103,11 +111,11 @@ std::unique_ptr<FormField> NameField::Parse(AutofillScanner* scanner, ...@@ -103,11 +111,11 @@ std::unique_ptr<FormField> NameField::Parse(AutofillScanner* scanner,
std::unique_ptr<FormField> field; std::unique_ptr<FormField> field;
if (!field && base::FeatureList::IsEnabled( if (!field && base::FeatureList::IsEnabled(
features::kAutofillEnableSupportForMoreStructureInNames)) features::kAutofillEnableSupportForMoreStructureInNames))
field = FirstTwoLastNamesField::Parse(scanner, log_manager); field = FirstTwoLastNamesField::Parse(scanner, page_language, log_manager);
if (!field) if (!field)
field = FirstLastNameField::Parse(scanner, log_manager); field = FirstLastNameField::Parse(scanner, page_language, log_manager);
if (!field) if (!field)
field = FullNameField::Parse(scanner, log_manager); field = FullNameField::Parse(scanner, page_language, log_manager);
return field; return field;
} }
...@@ -116,12 +124,17 @@ void NameField::AddClassifications(FieldCandidatesMap* field_candidates) const { ...@@ -116,12 +124,17 @@ void NameField::AddClassifications(FieldCandidatesMap* field_candidates) const {
} }
// static // static
std::unique_ptr<FullNameField> FullNameField::Parse(AutofillScanner* scanner, std::unique_ptr<FullNameField> FullNameField::Parse(
LogManager* log_manager) { AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager) {
// Exclude e.g. "username" or "nickname" fields. // Exclude e.g. "username" or "nickname" fields.
scanner->SaveCursor(); scanner->SaveCursor();
bool should_ignore = ParseField(scanner, UTF8ToUTF16(kNameIgnoredRe), nullptr, auto& patterns_ni = PatternProvider::GetInstance().GetMatchPatterns(
{log_manager, "kNameIgnoredRe"}); "NAME_IGNORED", page_language);
bool should_ignore =
ParseField(scanner, UTF8ToUTF16(kNameIgnoredRe), patterns_ni, nullptr,
{log_manager, "kNameIgnoredRe"});
scanner->Rewind(); scanner->Rewind();
if (should_ignore) if (should_ignore)
return nullptr; return nullptr;
...@@ -130,7 +143,10 @@ std::unique_ptr<FullNameField> FullNameField::Parse(AutofillScanner* scanner, ...@@ -130,7 +143,10 @@ std::unique_ptr<FullNameField> FullNameField::Parse(AutofillScanner* scanner,
// for example, Travelocity_Edit travel profile.html contains a field // for example, Travelocity_Edit travel profile.html contains a field
// "Travel Profile Name". // "Travel Profile Name".
AutofillField* field = nullptr; AutofillField* field = nullptr;
if (ParseField(scanner, UTF8ToUTF16(kNameRe), &field, // In JSON : FULL_NAME (closest vatiant)
auto& patterns_name = PatternProvider::GetInstance().GetMatchPatterns(
"FULL_NAME", page_language);
if (ParseField(scanner, UTF8ToUTF16(kNameRe), patterns_name, &field,
{log_manager, "kNameRe"})) {log_manager, "kNameRe"}))
return std::make_unique<FullNameField>(field); return std::make_unique<FullNameField>(field);
...@@ -149,17 +165,32 @@ FirstTwoLastNamesField::FirstTwoLastNamesField() = default; ...@@ -149,17 +165,32 @@ FirstTwoLastNamesField::FirstTwoLastNamesField() = default;
// static // static
std::unique_ptr<FirstTwoLastNamesField> FirstTwoLastNamesField::Parse( std::unique_ptr<FirstTwoLastNamesField> FirstTwoLastNamesField::Parse(
AutofillScanner* scanner, AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager) { LogManager* log_manager) {
return ParseComponentNames(scanner, log_manager); return ParseComponentNames(scanner, page_language, log_manager);
} }
// static // static
std::unique_ptr<FirstTwoLastNamesField> std::unique_ptr<FirstTwoLastNamesField>
FirstTwoLastNamesField::ParseComponentNames(AutofillScanner* scanner, FirstTwoLastNamesField::ParseComponentNames(AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager) { LogManager* log_manager) {
std::unique_ptr<FirstTwoLastNamesField> v(new FirstTwoLastNamesField); std::unique_ptr<FirstTwoLastNamesField> v(new FirstTwoLastNamesField);
scanner->SaveCursor(); scanner->SaveCursor();
auto& patterns_hp = PatternProvider::GetInstance().GetMatchPatterns(
"HONORIFIC_PREFIX", page_language);
auto& patterns_ni = PatternProvider::GetInstance().GetMatchPatterns(
"NAME_IGNORED", page_language);
auto& patterns_fn = PatternProvider::GetInstance().GetMatchPatterns(
"FIRST_NAME", page_language);
auto& patterns_mn = PatternProvider::GetInstance().GetMatchPatterns(
"MIDDLE_NAME", page_language);
auto& patterns_ln1 = PatternProvider::GetInstance().GetMatchPatterns(
"LAST_NAME_FIRST", page_language);
auto& patterns_ln2 = PatternProvider::GetInstance().GetMatchPatterns(
"LAST_NAME_SECOND", page_language);
// Allow name fields to appear in any order. // Allow name fields to appear in any order.
while (!scanner->IsEnd()) { while (!scanner->IsEnd()) {
// Scan for the honorific prefix before checking for unrelated name fields // Scan for the honorific prefix before checking for unrelated name fields
...@@ -168,7 +199,7 @@ FirstTwoLastNamesField::ParseComponentNames(AutofillScanner* scanner, ...@@ -168,7 +199,7 @@ FirstTwoLastNamesField::ParseComponentNames(AutofillScanner* scanner,
// TODO(crbug.com/1098943): Remove check once feature is launched or // TODO(crbug.com/1098943): Remove check once feature is launched or
// removed. // removed.
if (!v->honorific_prefix_ && if (!v->honorific_prefix_ &&
ParseField(scanner, UTF8ToUTF16(kHonorificPrefixRe), ParseField(scanner, UTF8ToUTF16(kHonorificPrefixRe), patterns_hp,
&v->honorific_prefix_, &v->honorific_prefix_,
{log_manager, "kHonorificPrefixRe"})) { {log_manager, "kHonorificPrefixRe"})) {
continue; continue;
...@@ -177,30 +208,31 @@ FirstTwoLastNamesField::ParseComponentNames(AutofillScanner* scanner, ...@@ -177,30 +208,31 @@ FirstTwoLastNamesField::ParseComponentNames(AutofillScanner* scanner,
// Skip over any unrelated fields, e.g. "username" or "nickname". // Skip over any unrelated fields, e.g. "username" or "nickname".
if (ParseFieldSpecifics(scanner, UTF8ToUTF16(kNameIgnoredRe), if (ParseFieldSpecifics(scanner, UTF8ToUTF16(kNameIgnoredRe),
MATCH_DEFAULT | MATCH_SELECT | MATCH_SEARCH, MATCH_DEFAULT | MATCH_SELECT | MATCH_SEARCH,
nullptr, {log_manager, "kNameIgnoredRe"})) { patterns_ni, nullptr,
{log_manager, "kNameIgnoredRe"})) {
continue; continue;
} }
if (!v->first_name_ && if (!v->first_name_ &&
ParseField(scanner, UTF8ToUTF16(kFirstNameRe), &v->first_name_, ParseField(scanner, UTF8ToUTF16(kFirstNameRe), patterns_fn,
{log_manager, "kFirstNameRe"})) { &v->first_name_, {log_manager, "kFirstNameRe"})) {
continue; continue;
} }
if (!v->middle_name_ && if (!v->middle_name_ &&
ParseField(scanner, UTF8ToUTF16(kMiddleNameRe), &v->middle_name_, ParseField(scanner, UTF8ToUTF16(kMiddleNameRe), patterns_mn,
{log_manager, "kMiddleNameRe"})) { &v->middle_name_, {log_manager, "kMiddleNameRe"})) {
continue; continue;
} }
if (!v->first_last_name_ && if (!v->first_last_name_ &&
ParseField(scanner, UTF8ToUTF16(kNameLastFirstRe), &v->first_last_name_, ParseField(scanner, UTF8ToUTF16(kNameLastFirstRe), patterns_ln1,
{log_manager, "kNameLastFirstRe"})) { &v->first_last_name_, {log_manager, "kNameLastFirstRe"})) {
continue; continue;
} }
if (!v->second_last_name_ && if (!v->second_last_name_ &&
ParseField(scanner, UTF8ToUTF16(kNameLastSecondRe), ParseField(scanner, UTF8ToUTF16(kNameLastSecondRe), patterns_ln2,
&v->second_last_name_, &v->second_last_name_,
{log_manager, "kNameLastSecondtRe"})) { {log_manager, "kNameLastSecondtRe"})) {
continue; continue;
...@@ -235,6 +267,7 @@ void FirstTwoLastNamesField::AddClassifications( ...@@ -235,6 +267,7 @@ void FirstTwoLastNamesField::AddClassifications(
std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseSpecificName( std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseSpecificName(
AutofillScanner* scanner, AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager) { LogManager* log_manager) {
// Some pages (e.g. Overstock_comBilling.html, SmithsonianCheckout.html) // Some pages (e.g. Overstock_comBilling.html, SmithsonianCheckout.html)
// have the label "Name" followed by two or three text fields. // have the label "Name" followed by two or three text fields.
...@@ -242,8 +275,11 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseSpecificName( ...@@ -242,8 +275,11 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseSpecificName(
scanner->SaveCursor(); scanner->SaveCursor();
AutofillField* next = nullptr; AutofillField* next = nullptr;
if (ParseField(scanner, UTF8ToUTF16(kNameSpecificRe), &v->first_name_, auto& patterns_ns = PatternProvider::GetInstance().GetMatchPatterns(
{log_manager, "kNameSpecificRe"}) && "NAME_SPECIFIC", page_language);
if (ParseField(scanner, UTF8ToUTF16(kNameSpecificRe), patterns_ns,
&v->first_name_, {log_manager, "kNameSpecificRe"}) &&
ParseEmptyLabel(scanner, &next)) { ParseEmptyLabel(scanner, &next)) {
if (ParseEmptyLabel(scanner, &v->last_name_)) { if (ParseEmptyLabel(scanner, &v->last_name_)) {
// There are three name fields; assume that the middle one is a // There are three name fields; assume that the middle one is a
...@@ -264,6 +300,7 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseSpecificName( ...@@ -264,6 +300,7 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseSpecificName(
// static // static
std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseComponentNames( std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseComponentNames(
AutofillScanner* scanner, AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager) { LogManager* log_manager) {
std::unique_ptr<FirstLastNameField> v(new FirstLastNameField); std::unique_ptr<FirstLastNameField> v(new FirstLastNameField);
scanner->SaveCursor(); scanner->SaveCursor();
...@@ -279,6 +316,20 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseComponentNames( ...@@ -279,6 +316,20 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseComponentNames(
// The ".*last$" matches fields ending in "last" (example in sample8.html). // The ".*last$" matches fields ending in "last" (example in sample8.html).
// Allow name fields to appear in any order. // Allow name fields to appear in any order.
auto& patterns_hp = PatternProvider::GetInstance().GetMatchPatterns(
"HONORIFIC_PREFIX", page_language);
auto& patterns_ni = PatternProvider::GetInstance().GetMatchPatterns(
"NAME_IGNORED", page_language);
auto& patterns_fn = PatternProvider::GetInstance().GetMatchPatterns(
"FIRST_NAME", page_language);
auto& patterns_mi = PatternProvider::GetInstance().GetMatchPatterns(
"MIDDLE_INITIAL", page_language);
auto& patterns_mn = PatternProvider::GetInstance().GetMatchPatterns(
"MIDDLE_NAME", page_language);
auto& patterns_ln = PatternProvider::GetInstance().GetMatchPatterns(
"LAST_NAME", page_language);
while (!scanner->IsEnd()) { while (!scanner->IsEnd()) {
// Scan for the honorific prefix before checking for unrelated fields // Scan for the honorific prefix before checking for unrelated fields
// because a honorific prefix field is expected to have very specific labels // because a honorific prefix field is expected to have very specific labels
...@@ -288,7 +339,7 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseComponentNames( ...@@ -288,7 +339,7 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseComponentNames(
if (base::FeatureList::IsEnabled( if (base::FeatureList::IsEnabled(
features::kAutofillEnableSupportForMoreStructureInNames)) { features::kAutofillEnableSupportForMoreStructureInNames)) {
if (!v->honorific_prefix_ && if (!v->honorific_prefix_ &&
ParseField(scanner, UTF8ToUTF16(kHonorificPrefixRe), ParseField(scanner, UTF8ToUTF16(kHonorificPrefixRe), patterns_hp,
&v->honorific_prefix_, &v->honorific_prefix_,
{log_manager, "kHonorificPrefixRe"})) { {log_manager, "kHonorificPrefixRe"})) {
continue; continue;
...@@ -298,13 +349,14 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseComponentNames( ...@@ -298,13 +349,14 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseComponentNames(
// Skip over any unrelated name fields, e.g. "username" or "nickname". // Skip over any unrelated name fields, e.g. "username" or "nickname".
if (ParseFieldSpecifics(scanner, UTF8ToUTF16(kNameIgnoredRe), if (ParseFieldSpecifics(scanner, UTF8ToUTF16(kNameIgnoredRe),
MATCH_DEFAULT | MATCH_SELECT | MATCH_SEARCH, MATCH_DEFAULT | MATCH_SELECT | MATCH_SEARCH,
nullptr, {log_manager, "kNameIgnoredRe"})) { patterns_ni, nullptr,
{log_manager, "kNameIgnoredRe"})) {
continue; continue;
} }
if (!v->first_name_ && if (!v->first_name_ &&
ParseField(scanner, UTF8ToUTF16(kFirstNameRe), &v->first_name_, ParseField(scanner, UTF8ToUTF16(kFirstNameRe), patterns_fn,
{log_manager, "kFirstNameRe"})) { &v->first_name_, {log_manager, "kFirstNameRe"})) {
continue; continue;
} }
...@@ -314,21 +366,21 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseComponentNames( ...@@ -314,21 +366,21 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseComponentNames(
// "txtmiddlename"); such a field probably actually represents a // "txtmiddlename"); such a field probably actually represents a
// middle initial. // middle initial.
if (!v->middle_name_ && if (!v->middle_name_ &&
ParseField(scanner, UTF8ToUTF16(kMiddleInitialRe), &v->middle_name_, ParseField(scanner, UTF8ToUTF16(kMiddleInitialRe), patterns_mi,
{log_manager, "kMiddleInitialRe"})) { &v->middle_name_, {log_manager, "kMiddleInitialRe"})) {
v->middle_initial_ = true; v->middle_initial_ = true;
continue; continue;
} }
if (!v->middle_name_ && if (!v->middle_name_ &&
ParseField(scanner, UTF8ToUTF16(kMiddleNameRe), &v->middle_name_, ParseField(scanner, UTF8ToUTF16(kMiddleNameRe), patterns_mn,
{log_manager, "kMiddleNameRe"})) { &v->middle_name_, {log_manager, "kMiddleNameRe"})) {
continue; continue;
} }
if (!v->last_name_ && if (!v->last_name_ &&
ParseField(scanner, UTF8ToUTF16(kLastNameRe), &v->last_name_, ParseField(scanner, UTF8ToUTF16(kLastNameRe), patterns_ln,
{log_manager, "kLastNameRe"})) { &v->last_name_, {log_manager, "kLastNameRe"})) {
continue; continue;
} }
...@@ -347,11 +399,12 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseComponentNames( ...@@ -347,11 +399,12 @@ std::unique_ptr<FirstLastNameField> FirstLastNameField::ParseComponentNames(
// static // static
std::unique_ptr<FirstLastNameField> FirstLastNameField::Parse( std::unique_ptr<FirstLastNameField> FirstLastNameField::Parse(
AutofillScanner* scanner, AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager) { LogManager* log_manager) {
std::unique_ptr<FirstLastNameField> field = std::unique_ptr<FirstLastNameField> field =
ParseSpecificName(scanner, log_manager); ParseSpecificName(scanner, page_language, log_manager);
if (!field) if (!field)
field = ParseComponentNames(scanner, log_manager); field = ParseComponentNames(scanner, page_language, log_manager);
return field; return field;
} }
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "base/macros.h" #include "base/macros.h"
#include "components/autofill/core/browser/autofill_field.h" #include "components/autofill/core/browser/autofill_field.h"
#include "components/autofill/core/browser/form_parsing/form_field.h" #include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill { namespace autofill {
......
...@@ -246,7 +246,8 @@ std::unique_ptr<FormField> PhoneField::Parse(AutofillScanner* scanner, ...@@ -246,7 +246,8 @@ std::unique_ptr<FormField> PhoneField::Parse(AutofillScanner* scanner,
scanner, GetRegExp(kPhoneFieldGrammars[i].regex), scanner, GetRegExp(kPhoneFieldGrammars[i].regex),
&parsed_fields[kPhoneFieldGrammars[i].phone_part], &parsed_fields[kPhoneFieldGrammars[i].phone_part],
{log_manager, GetRegExpName(kPhoneFieldGrammars[i].regex)}, {log_manager, GetRegExpName(kPhoneFieldGrammars[i].regex)},
is_country_code_field)) is_country_code_field,
GetJSONFieldType(kPhoneFieldGrammars[i].regex), page_language))
break; break;
if (kPhoneFieldGrammars[i].max_size && if (kPhoneFieldGrammars[i].max_size &&
(!parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length || (!parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length ||
...@@ -291,11 +292,13 @@ std::unique_ptr<FormField> PhoneField::Parse(AutofillScanner* scanner, ...@@ -291,11 +292,13 @@ std::unique_ptr<FormField> PhoneField::Parse(AutofillScanner* scanner,
if (!ParsePhoneField(scanner, kPhoneSuffixRe, if (!ParsePhoneField(scanner, kPhoneSuffixRe,
&phone_field->parsed_phone_fields_[FIELD_SUFFIX], &phone_field->parsed_phone_fields_[FIELD_SUFFIX],
{log_manager, "kPhoneSuffixRe"}, {log_manager, "kPhoneSuffixRe"},
/*is_country_code_field=*/false)) { /*is_country_code_field=*/false, "PHONE_SUFFIX",
page_language)) {
ParsePhoneField(scanner, kPhoneSuffixSeparatorRe, ParsePhoneField(scanner, kPhoneSuffixSeparatorRe,
&phone_field->parsed_phone_fields_[FIELD_SUFFIX], &phone_field->parsed_phone_fields_[FIELD_SUFFIX],
{log_manager, "kPhoneSuffixSeparatorRe"}, {log_manager, "kPhoneSuffixSeparatorRe"},
/*is_country_code_field=*/false); /*is_country_code_field=*/false, "PHONE_SUFFIX_SEPARATOR",
page_language);
} }
} }
...@@ -305,7 +308,8 @@ std::unique_ptr<FormField> PhoneField::Parse(AutofillScanner* scanner, ...@@ -305,7 +308,8 @@ std::unique_ptr<FormField> PhoneField::Parse(AutofillScanner* scanner,
ParsePhoneField(scanner, kPhoneExtensionRe, ParsePhoneField(scanner, kPhoneExtensionRe,
&phone_field->parsed_phone_fields_[FIELD_EXTENSION], &phone_field->parsed_phone_fields_[FIELD_EXTENSION],
{log_manager, "kPhoneExtensionRe"}, {log_manager, "kPhoneExtensionRe"},
/*is_country_code_field=*/false); /*is_country_code_field=*/false, "PHONE_EXTENSION",
page_language);
return std::move(phone_field); return std::move(phone_field);
} }
...@@ -416,19 +420,52 @@ const char* PhoneField::GetRegExpName(RegexType regex_id) { ...@@ -416,19 +420,52 @@ const char* PhoneField::GetRegExpName(RegexType regex_id) {
return ""; return "";
} }
//
std::string PhoneField::GetJSONFieldType(RegexType phonetype_id) {
switch (phonetype_id) {
case REGEX_COUNTRY:
return "PHONE_COUNTRY_CODE";
case REGEX_AREA:
return "PHONE_AREA_CODE";
case REGEX_AREA_NOTEXT:
return "PHONE_AREA_CODE_NO_TEXT";
case REGEX_PHONE:
return "PHONE";
case REGEX_PREFIX_SEPARATOR:
return "PHONE_PREFIX_SEPARATOR";
case REGEX_PREFIX:
return "PHONE_PREFIX";
case REGEX_SUFFIX_SEPARATOR:
return "PHONE_SUFFIX_SEPARATOR";
case REGEX_SUFFIX:
return "PHONE_SUFFIX";
case REGEX_EXTENSION:
return "PHONE_EXTENSION";
default:
NOTREACHED();
break;
}
return std::string();
}
// static // static
bool PhoneField::ParsePhoneField(AutofillScanner* scanner, bool PhoneField::ParsePhoneField(AutofillScanner* scanner,
const std::string& regex, const std::string& regex,
AutofillField** field, AutofillField** field,
const RegExLogging& logging, const RegExLogging& logging,
const bool is_country_code_field) { const bool is_country_code_field,
const std::string& json_field_type,
const std::string& page_language) {
int match_type = MATCH_DEFAULT | MATCH_TELEPHONE | MATCH_NUMBER; int match_type = MATCH_DEFAULT | MATCH_TELEPHONE | MATCH_NUMBER;
// Include the selection boxes too for the matching of the phone country code. // Include the selection boxes too for the matching of the phone country code.
if (is_country_code_field) if (is_country_code_field)
match_type |= MATCH_SELECT; match_type |= MATCH_SELECT;
auto& patterns = PatternProvider::GetInstance().GetMatchPatterns(
json_field_type, page_language);
return ParseFieldSpecifics(scanner, base::UTF8ToUTF16(regex), match_type, return ParseFieldSpecifics(scanner, base::UTF8ToUTF16(regex), match_type,
field, logging); patterns, field, logging);
} }
} // namespace autofill } // namespace autofill
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "components/autofill/core/browser/autofill_type.h" #include "components/autofill/core/browser/autofill_type.h"
#include "components/autofill/core/browser/data_model/phone_number.h" #include "components/autofill/core/browser/data_model/phone_number.h"
#include "components/autofill/core/browser/form_parsing/form_field.h" #include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill { namespace autofill {
...@@ -96,12 +97,18 @@ class PhoneField : public FormField { ...@@ -96,12 +97,18 @@ class PhoneField : public FormField {
// This is useful for logging purposes. // This is useful for logging purposes.
static const char* GetRegExpName(RegexType regex_id); static const char* GetRegExpName(RegexType regex_id);
// Returns the name of field type which indicated in JSON corresponding to
// |regex_id|.
static std::string GetJSONFieldType(RegexType phonetype_id);
// Convenient wrapper for ParseFieldSpecifics(). // Convenient wrapper for ParseFieldSpecifics().
static bool ParsePhoneField(AutofillScanner* scanner, static bool ParsePhoneField(AutofillScanner* scanner,
const std::string& regex, const std::string& regex,
AutofillField** field, AutofillField** field,
const RegExLogging& logging, const RegExLogging& logging,
const bool is_country_code_field); const bool is_country_code_field,
const std::string& json_field_type,
const std::string& page_language);
// Returns true if |scanner| points to a <select> field that appears to be the // Returns true if |scanner| points to a <select> field that appears to be the
// phone country code by looking at its option contents. // phone country code by looking at its option contents.
......
...@@ -16,10 +16,13 @@ std::unique_ptr<FormField> PriceField::Parse(AutofillScanner* scanner, ...@@ -16,10 +16,13 @@ std::unique_ptr<FormField> PriceField::Parse(AutofillScanner* scanner,
const std::string& page_language, const std::string& page_language,
LogManager* log_manager) { LogManager* log_manager) {
AutofillField* field; AutofillField* field;
auto& patterns =
PatternProvider::GetInstance().GetMatchPatterns("PRICE", page_language);
if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kPriceRe), if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kPriceRe),
MATCH_DEFAULT | MATCH_NUMBER | MATCH_SELECT | MATCH_DEFAULT | MATCH_NUMBER | MATCH_SELECT |
MATCH_TEXT_AREA | MATCH_SEARCH, MATCH_TEXT_AREA | MATCH_SEARCH,
&field, {log_manager, kPriceRe})) { patterns, &field, {log_manager, kPriceRe})) {
return std::make_unique<PriceField>(field); return std::make_unique<PriceField>(field);
} }
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "base/compiler_specific.h" #include "base/compiler_specific.h"
#include "base/macros.h" #include "base/macros.h"
#include "components/autofill/core/browser/form_parsing/form_field.h" #include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill { namespace autofill {
......
...@@ -16,9 +16,12 @@ std::unique_ptr<FormField> SearchField::Parse(AutofillScanner* scanner, ...@@ -16,9 +16,12 @@ std::unique_ptr<FormField> SearchField::Parse(AutofillScanner* scanner,
const std::string& page_language, const std::string& page_language,
LogManager* log_manager) { LogManager* log_manager) {
AutofillField* field; AutofillField* field;
auto& patterns = PatternProvider::GetInstance().GetMatchPatterns(
SEARCH_TERM, page_language);
if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kSearchTermRe), if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kSearchTermRe),
MATCH_DEFAULT | MATCH_SEARCH | MATCH_TEXT_AREA, MATCH_DEFAULT | MATCH_SEARCH | MATCH_TEXT_AREA,
&field, {log_manager, "kSearchTermRe"})) { patterns, &field, {log_manager, "kSearchTermRe"})) {
return std::make_unique<SearchField>(field); return std::make_unique<SearchField>(field);
} }
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "base/compiler_specific.h" #include "base/compiler_specific.h"
#include "base/macros.h" #include "base/macros.h"
#include "components/autofill/core/browser/form_parsing/form_field.h" #include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill { namespace autofill {
......
...@@ -21,17 +21,25 @@ std::unique_ptr<FormField> TravelField::Parse(AutofillScanner* scanner, ...@@ -21,17 +21,25 @@ std::unique_ptr<FormField> TravelField::Parse(AutofillScanner* scanner,
if (!scanner || scanner->IsEnd()) { if (!scanner || scanner->IsEnd()) {
return nullptr; return nullptr;
} }
auto& patternsP = PatternProvider::GetInstance().GetMatchPatterns(
"PASSPORT", page_language);
auto& patternsTO = PatternProvider::GetInstance().GetMatchPatterns(
"TRAVEL_ORIGIN", page_language);
auto& patternsTD = PatternProvider::GetInstance().GetMatchPatterns(
"TRAVEL_DESTINATION", page_language);
auto& patternsF =
PatternProvider::GetInstance().GetMatchPatterns("FLIGHT", page_language);
auto travel_field = std::make_unique<TravelField>(); auto travel_field = std::make_unique<TravelField>();
if (ParseField(scanner, base::UTF8ToUTF16(kPassportRe), if (ParseField(scanner, base::UTF8ToUTF16(kPassportRe), patternsP,
&travel_field->passport_, {log_manager, "kPassportRe"}) || &travel_field->passport_, {log_manager, "kPassportRe"}) ||
ParseField(scanner, base::UTF8ToUTF16(kTravelOriginRe), ParseField(scanner, base::UTF8ToUTF16(kTravelOriginRe), patternsTO,
&travel_field->origin_, {log_manager, "kTravelOriginRe"}) || &travel_field->origin_, {log_manager, "kTravelOriginRe"}) ||
ParseField(scanner, base::UTF8ToUTF16(kTravelDestinationRe), ParseField(scanner, base::UTF8ToUTF16(kTravelDestinationRe), patternsTD,
&travel_field->destination_, &travel_field->destination_,
{log_manager, "kTravelDestinationRe"}) || {log_manager, "kTravelDestinationRe"}) ||
ParseField(scanner, base::UTF8ToUTF16(kFlightRe), &travel_field->flight_, ParseField(scanner, base::UTF8ToUTF16(kFlightRe), patternsF,
{log_manager, "kFlightRe"})) { &travel_field->flight_, {log_manager, "kFlightRe"})) {
// If any regex matches, then we found a travel field. // If any regex matches, then we found a travel field.
return std::move(travel_field); return std::move(travel_field);
} }
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "components/autofill/core/browser/form_parsing/autofill_scanner.h" #include "components/autofill/core/browser/form_parsing/autofill_scanner.h"
#include "components/autofill/core/browser/form_parsing/form_field.h" #include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill { namespace autofill {
......
...@@ -72,7 +72,17 @@ PatternProvider& PatternProvider::GetInstance() { ...@@ -72,7 +72,17 @@ PatternProvider& PatternProvider::GetInstance() {
if (!g_pattern_provider) { if (!g_pattern_provider) {
static base::NoDestructor<PatternProvider> instance; static base::NoDestructor<PatternProvider> instance;
g_pattern_provider = instance.get(); g_pattern_provider = instance.get();
field_type_parsing::PopulateFromResourceBundle(); // TODO(crbug/1147608) This is an ugly hack to avoid loading the JSON. The
// motivation is that some Android unit tests fail because a dependency is
// missing. Instead of fixing this dependency, we'll go for an alternative
// solution that avoids the whole async/sync problem.
if (base::FeatureList::IsEnabled(
features::kAutofillUsePageLanguageToSelectFieldParsingPatterns) ||
base::FeatureList::IsEnabled(
features::
kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
field_type_parsing::PopulateFromResourceBundle();
}
} }
return *g_pattern_provider; return *g_pattern_provider;
} }
......
...@@ -112,6 +112,11 @@ TEST(AutofillPatternProvider, Single_Match) { ...@@ -112,6 +112,11 @@ TEST(AutofillPatternProvider, Single_Match) {
// Test that the default pattern provider loads without crashing. // Test that the default pattern provider loads without crashing.
TEST(AutofillPatternProviderPipelineTest, DefaultPatternProviderLoads) { TEST(AutofillPatternProviderPipelineTest, DefaultPatternProviderLoads) {
base::test::ScopedFeatureList scoped_feature_list;
// Enable so that PatternProvider::GetInstance() actually does load the JSON.
scoped_feature_list.InitAndEnableFeature(
autofill::features::kAutofillUsePageLanguageToSelectFieldParsingPatterns);
base::test::TaskEnvironment task_environment_; base::test::TaskEnvironment task_environment_;
data_decoder::test::InProcessDataDecoder in_process_data_decoder_; data_decoder::test::InProcessDataDecoder in_process_data_decoder_;
...@@ -131,6 +136,11 @@ TEST(AutofillPatternProviderPipelineTest, DefaultPatternProviderLoads) { ...@@ -131,6 +136,11 @@ TEST(AutofillPatternProviderPipelineTest, DefaultPatternProviderLoads) {
// needed to test the DefaultPatternProvider. Warning: If this crashes, check // needed to test the DefaultPatternProvider. Warning: If this crashes, check
// that no state carried over from other tests using the singleton. // that no state carried over from other tests using the singleton.
TEST(AutofillPatternProviderPipelineTest, TestParsingEquivalent) { TEST(AutofillPatternProviderPipelineTest, TestParsingEquivalent) {
base::test::ScopedFeatureList scoped_feature_list;
// Enable so that PatternProvider::GetInstance() actually does load the JSON.
scoped_feature_list.InitAndEnableFeature(
autofill::features::kAutofillUsePageLanguageToSelectFieldParsingPatterns);
base::test::TaskEnvironment task_environment_; base::test::TaskEnvironment task_environment_;
data_decoder::test::InProcessDataDecoder in_process_data_decoder_; data_decoder::test::InProcessDataDecoder in_process_data_decoder_;
......
...@@ -4,17 +4,29 @@ ...@@ -4,17 +4,29 @@
#include "components/autofill/core/browser/pattern_provider/test_pattern_provider.h" #include "components/autofill/core/browser/pattern_provider/test_pattern_provider.h"
#include "base/feature_list.h"
#include "components/autofill/core/browser/pattern_provider/pattern_configuration_parser.h" #include "components/autofill/core/browser/pattern_provider/pattern_configuration_parser.h"
#include "components/autofill/core/common/autofill_features.h"
namespace autofill { namespace autofill {
TestPatternProvider::TestPatternProvider() { TestPatternProvider::TestPatternProvider() {
base::Optional<PatternProvider::Map> patterns = // TODO(crbug/1147608) This is an ugly hack to avoid loading the JSON. The
field_type_parsing::GetPatternsFromResourceBundleSynchronously(); // motivation is that some Android unit tests fail because a dependency is
if (patterns) // missing. Instead of fixing this dependency, we'll go for an alternative
SetPatterns(patterns.value(), base::Version(), true); // solution that avoids the whole async/sync problem.
if (base::FeatureList::IsEnabled(
features::kAutofillUsePageLanguageToSelectFieldParsingPatterns) ||
base::FeatureList::IsEnabled(
features::
kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
base::Optional<PatternProvider::Map> patterns =
field_type_parsing::GetPatternsFromResourceBundleSynchronously();
if (patterns)
SetPatterns(patterns.value(), base::Version(), true);
PatternProvider::SetPatternProviderForTesting(this); PatternProvider::SetPatternProviderForTesting(this);
}
} }
TestPatternProvider::~TestPatternProvider() { TestPatternProvider::~TestPatternProvider() {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment