Commit 2fa09e20 authored by Christoph Schwering's avatar Christoph Schwering Committed by Commit Bot

[Autofill] Migrated parsing to PatternProvider.

Autofill uses lots of hard-coded regexps for parsing. PatternProvider
provides a cleaner API with page-language-dependent patterns.

This CL migrates the existing parsing code to the new pattern provider
in an equivalence-preserving way. The behaviour is only enabled if
either of the following features is enabled:
* kAutofillUsePageLanguageToSelectFieldParsingPatterns
* kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics

The code is not entirely ready for use because of an infrastructure
issue: all relevant tests must mock a PatternProvider that
synchronously loads the JSON. Otherwise they're prone to race
conditions. In a follow-up CL we'll solve this issue.

Change-Id: I235d2b599585522e1a1bea1cd25185ae209f5965
Bug: 1147624, 1147608
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2465847Reviewed-by: default avatarMatthias Körber <koerber@google.com>
Reviewed-by: default avatarChristoph Schwering <schwering@google.com>
Commit-Queue: Matthias Körber <koerber@google.com>
Cr-Commit-Position: refs/heads/master@{#826777}
parent 7d925a88
......@@ -14,6 +14,7 @@
#include "base/strings/string16.h"
#include "components/autofill/core/browser/autofill_type.h"
#include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill {
......@@ -53,45 +54,77 @@ class AddressField : public FormField {
explicit AddressField(LogManager* log_manager);
bool ParseCompany(AutofillScanner* scanner);
bool ParseAddress(AutofillScanner* scanner);
bool ParseAddressFieldSequence(AutofillScanner* scanner);
bool ParseAddressLines(AutofillScanner* scanner);
bool ParseCountry(AutofillScanner* scanner);
bool ParseZipCode(AutofillScanner* scanner);
bool ParseCity(AutofillScanner* scanner);
bool ParseState(AutofillScanner* scanner);
bool ParseCompany(AutofillScanner* scanner, const std::string& page_language);
bool ParseAddress(AutofillScanner* scanner, const std::string& page_language);
bool ParseAddressFieldSequence(AutofillScanner* scanner,
const std::string& page_language);
bool ParseAddressLines(AutofillScanner* scanner,
const std::string& page_language);
bool ParseCountry(AutofillScanner* scanner, const std::string& page_language);
bool ParseZipCode(AutofillScanner* scanner, const std::string& page_language);
bool ParseCity(AutofillScanner* scanner, const std::string& page_language);
bool ParseState(AutofillScanner* scanner, const std::string& page_language);
// Parses the current field pointed to by |scanner|, if it exists, and tries
// to figure out whether the field's type: city, state, zip, or none of those.
// TODO(crbug.com/1073555) Delete this once experiment
// |kAutofillUseParseCityStateCountryZipCodeInHeuristic| has been launched.
bool ParseCityStateZipCode(AutofillScanner* scanner);
bool ParseCityStateZipCode(AutofillScanner* scanner,
const std::string& page_language);
// Parses the current field pointed to by |scanner|, if it exists, and tries
// to figure out whether the field's type: city, state, country, zip, or
// none of those.
bool ParseCityStateCountryZipCode(AutofillScanner* scanner);
bool ParseCityStateCountryZipCode(AutofillScanner* scanner,
const std::string& page_language);
// Like ParseFieldSpecifics(), but applies |pattern| against the name and
// label of the current field separately. If the return value is
// RESULT_MATCH_NAME_LABEL, then |scanner| advances and |match| is filled if
// it is non-NULL. Otherwise |scanner| does not advance and |match| does not
// change.
// ParseNameLabelResult ParseNameAndLabelSeparately(
// AutofillScanner* scanner,
// const base::string16& pattern,
// int match_type,
// AutofillField** match,
// const RegExLogging& logging);
// New version of function above using new structure MatchingPattern and
// PatternProvider.
ParseNameLabelResult ParseNameAndLabelSeparately(
AutofillScanner* scanner,
const base::string16& pattern,
int match_type,
const std::vector<MatchingPattern>& patterns,
AutofillField** match,
const RegExLogging& logging);
// Run matches on the name and label separately. If the return result is
// RESULT_MATCH_NAME_LABEL, then |scanner| advances and the field is set.
// Otherwise |scanner| rewinds and the field is cleared.
ParseNameLabelResult ParseNameAndLabelForZipCode(AutofillScanner* scanner);
ParseNameLabelResult ParseNameAndLabelForCity(AutofillScanner* scanner);
ParseNameLabelResult ParseNameAndLabelForCountry(AutofillScanner* scanner);
ParseNameLabelResult ParseNameAndLabelForState(AutofillScanner* scanner);
ParseNameLabelResult ParseNameAndLabelForZipCode(
AutofillScanner* scanner,
const std::string& page_language);
ParseNameLabelResult ParseNameAndLabelForCity(
AutofillScanner* scanner,
const std::string& page_language);
ParseNameLabelResult ParseNameAndLabelForCountry(
AutofillScanner* scanner,
const std::string& page_language);
ParseNameLabelResult ParseNameAndLabelForState(
AutofillScanner* scanner,
const std::string& page_language);
LogManager* log_manager_;
AutofillField* company_ = nullptr;
......
......@@ -12,6 +12,7 @@
#include "base/macros.h"
#include "components/autofill/core/browser/autofill_type.h"
#include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill {
......@@ -42,7 +43,8 @@ class CreditCardField : public FormField {
// the next few years. |log_manager| is used to log any parsing details
// to chrome://autofill-internals
static bool LikelyCardYearSelectField(AutofillScanner* scanner,
LogManager* log_manager);
LogManager* log_manager,
const std::string& page_language);
// Returns true if |scanner| points to a <select> field that contains credit
// card type options.
......@@ -53,11 +55,14 @@ class CreditCardField : public FormField {
// Prepaid debit cards do not count as gift cards, since they can be used like
// a credit card.
static bool IsGiftCardField(AutofillScanner* scanner,
LogManager* log_manager);
LogManager* log_manager,
const std::string& page_language);
// Parses the expiration month/year/date fields. Returns true if it finds
// something new.
bool ParseExpirationDate(AutofillScanner* scanner, LogManager* log_manager);
bool ParseExpirationDate(AutofillScanner* scanner,
LogManager* log_manager,
const std::string& page_language);
// For the combined expiration field we return |exp_year_type_|; otherwise if
// |expiration_year_| is having year with |max_length| of 2-digits we return
......
......@@ -15,8 +15,10 @@ std::unique_ptr<FormField> EmailField::Parse(AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager) {
AutofillField* field;
auto& patterns = PatternProvider::GetInstance().GetMatchPatterns(
"EMAIL_ADDRESS", page_language);
if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kEmailRe),
MATCH_DEFAULT | MATCH_EMAIL, &field,
MATCH_DEFAULT | MATCH_EMAIL, patterns, &field,
{log_manager, "kEmailRe"})) {
return std::make_unique<EmailField>(field);
}
......
......@@ -10,6 +10,7 @@
#include "base/compiler_specific.h"
#include "base/macros.h"
#include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill {
......
......@@ -168,6 +168,22 @@ bool FormField::ParseField(AutofillScanner* scanner,
return ParseFieldSpecifics(scanner, patterns, match, logging);
}
bool FormField::ParseField(AutofillScanner* scanner,
const base::string16& pattern,
const std::vector<MatchingPattern>& patterns,
AutofillField** match,
const RegExLogging& logging) {
if (base::FeatureList::IsEnabled(
features::kAutofillUsePageLanguageToSelectFieldParsingPatterns) ||
base::FeatureList::IsEnabled(
features::
kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
return ParseField(scanner, patterns, match, logging);
} else {
return ParseField(scanner, pattern, match, logging);
}
}
bool FormField::ParseFieldSpecifics(AutofillScanner* scanner,
const base::string16& pattern,
int match_field_attributes,
......@@ -238,6 +254,38 @@ bool FormField::ParseFieldSpecifics(AutofillScanner* scanner,
match_field_types, match, logging);
}
bool FormField::ParseFieldSpecifics(
AutofillScanner* scanner,
const base::string16& pattern,
int match_type,
const std::vector<MatchingPattern>& patterns,
AutofillField** match,
const RegExLogging& logging,
MatchFieldBitmasks match_field_bitmasks) {
if (base::FeatureList::IsEnabled(
features::kAutofillUsePageLanguageToSelectFieldParsingPatterns) ||
base::FeatureList::IsEnabled(
features::
kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
// TODO(crbug/1142936): This hack is to allow
// AddressField::ParseNameAndLabelSeparately().
if (match_field_bitmasks.restrict_attributes != ~0 ||
match_field_bitmasks.augment_types != 0) {
std::vector<MatchingPattern> patterns_with_restricted_match_type =
patterns;
for (MatchingPattern& mp : patterns_with_restricted_match_type) {
mp.match_field_attributes &= match_field_bitmasks.restrict_attributes;
mp.match_field_input_types |= match_field_bitmasks.augment_types;
}
return ParseFieldSpecifics(scanner, patterns_with_restricted_match_type,
match, logging);
}
return ParseFieldSpecifics(scanner, patterns, match, logging);
} else {
return ParseFieldSpecifics(scanner, pattern, match_type, match, logging);
}
}
// static
bool FormField::ParseEmptyLabel(AutofillScanner* scanner,
AutofillField** match) {
......
......@@ -72,6 +72,12 @@ class FormField {
AutofillField** match,
const RegExLogging& logging = {});
static bool ParseField(AutofillScanner* scanner,
const base::string16& pattern,
const std::vector<MatchingPattern>& patterns,
AutofillField** match,
const RegExLogging& logging = {});
// Parses the stream of fields in |scanner| with regular expression |pattern|
// as specified in the |match_type| bit field (see |MatchType|). If |match|
// is non-NULL and the pattern matches, |match| will be set to the matched
......@@ -96,6 +102,20 @@ class FormField {
int match_field_input_types,
AutofillField** match,
const RegExLogging& logging = {});
struct MatchFieldBitmasks {
int restrict_attributes = ~0;
int augment_types = 0;
};
static bool ParseFieldSpecifics(AutofillScanner* scanner,
const base::string16& pattern,
int match_type,
const std::vector<MatchingPattern>& patterns,
AutofillField** match,
const RegExLogging& logging,
MatchFieldBitmasks match_field_bitmasks = {
.restrict_attributes = ~0,
.augment_types = 0});
// Attempts to parse a field with an empty label. Returns true
// on success and fills |match| with a pointer to the field.
......
......@@ -13,6 +13,7 @@
#include "base/macros.h"
#include "components/autofill/core/browser/autofill_field.h"
#include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill {
......
......@@ -246,7 +246,8 @@ std::unique_ptr<FormField> PhoneField::Parse(AutofillScanner* scanner,
scanner, GetRegExp(kPhoneFieldGrammars[i].regex),
&parsed_fields[kPhoneFieldGrammars[i].phone_part],
{log_manager, GetRegExpName(kPhoneFieldGrammars[i].regex)},
is_country_code_field))
is_country_code_field,
GetJSONFieldType(kPhoneFieldGrammars[i].regex), page_language))
break;
if (kPhoneFieldGrammars[i].max_size &&
(!parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length ||
......@@ -291,11 +292,13 @@ std::unique_ptr<FormField> PhoneField::Parse(AutofillScanner* scanner,
if (!ParsePhoneField(scanner, kPhoneSuffixRe,
&phone_field->parsed_phone_fields_[FIELD_SUFFIX],
{log_manager, "kPhoneSuffixRe"},
/*is_country_code_field=*/false)) {
/*is_country_code_field=*/false, "PHONE_SUFFIX",
page_language)) {
ParsePhoneField(scanner, kPhoneSuffixSeparatorRe,
&phone_field->parsed_phone_fields_[FIELD_SUFFIX],
{log_manager, "kPhoneSuffixSeparatorRe"},
/*is_country_code_field=*/false);
/*is_country_code_field=*/false, "PHONE_SUFFIX_SEPARATOR",
page_language);
}
}
......@@ -305,7 +308,8 @@ std::unique_ptr<FormField> PhoneField::Parse(AutofillScanner* scanner,
ParsePhoneField(scanner, kPhoneExtensionRe,
&phone_field->parsed_phone_fields_[FIELD_EXTENSION],
{log_manager, "kPhoneExtensionRe"},
/*is_country_code_field=*/false);
/*is_country_code_field=*/false, "PHONE_EXTENSION",
page_language);
return std::move(phone_field);
}
......@@ -416,19 +420,52 @@ const char* PhoneField::GetRegExpName(RegexType regex_id) {
return "";
}
//
std::string PhoneField::GetJSONFieldType(RegexType phonetype_id) {
switch (phonetype_id) {
case REGEX_COUNTRY:
return "PHONE_COUNTRY_CODE";
case REGEX_AREA:
return "PHONE_AREA_CODE";
case REGEX_AREA_NOTEXT:
return "PHONE_AREA_CODE_NO_TEXT";
case REGEX_PHONE:
return "PHONE";
case REGEX_PREFIX_SEPARATOR:
return "PHONE_PREFIX_SEPARATOR";
case REGEX_PREFIX:
return "PHONE_PREFIX";
case REGEX_SUFFIX_SEPARATOR:
return "PHONE_SUFFIX_SEPARATOR";
case REGEX_SUFFIX:
return "PHONE_SUFFIX";
case REGEX_EXTENSION:
return "PHONE_EXTENSION";
default:
NOTREACHED();
break;
}
return std::string();
}
// static
bool PhoneField::ParsePhoneField(AutofillScanner* scanner,
const std::string& regex,
AutofillField** field,
const RegExLogging& logging,
const bool is_country_code_field) {
const bool is_country_code_field,
const std::string& json_field_type,
const std::string& page_language) {
int match_type = MATCH_DEFAULT | MATCH_TELEPHONE | MATCH_NUMBER;
// Include the selection boxes too for the matching of the phone country code.
if (is_country_code_field)
match_type |= MATCH_SELECT;
auto& patterns = PatternProvider::GetInstance().GetMatchPatterns(
json_field_type, page_language);
return ParseFieldSpecifics(scanner, base::UTF8ToUTF16(regex), match_type,
field, logging);
patterns, field, logging);
}
} // namespace autofill
......@@ -17,6 +17,7 @@
#include "components/autofill/core/browser/autofill_type.h"
#include "components/autofill/core/browser/data_model/phone_number.h"
#include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill {
......@@ -96,12 +97,18 @@ class PhoneField : public FormField {
// This is useful for logging purposes.
static const char* GetRegExpName(RegexType regex_id);
// Returns the name of field type which indicated in JSON corresponding to
// |regex_id|.
static std::string GetJSONFieldType(RegexType phonetype_id);
// Convenient wrapper for ParseFieldSpecifics().
static bool ParsePhoneField(AutofillScanner* scanner,
const std::string& regex,
AutofillField** field,
const RegExLogging& logging,
const bool is_country_code_field);
const bool is_country_code_field,
const std::string& json_field_type,
const std::string& page_language);
// Returns true if |scanner| points to a <select> field that appears to be the
// phone country code by looking at its option contents.
......
......@@ -16,10 +16,13 @@ std::unique_ptr<FormField> PriceField::Parse(AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager) {
AutofillField* field;
auto& patterns =
PatternProvider::GetInstance().GetMatchPatterns("PRICE", page_language);
if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kPriceRe),
MATCH_DEFAULT | MATCH_NUMBER | MATCH_SELECT |
MATCH_TEXT_AREA | MATCH_SEARCH,
&field, {log_manager, kPriceRe})) {
patterns, &field, {log_manager, kPriceRe})) {
return std::make_unique<PriceField>(field);
}
......
......@@ -10,6 +10,7 @@
#include "base/compiler_specific.h"
#include "base/macros.h"
#include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill {
......
......@@ -16,9 +16,12 @@ std::unique_ptr<FormField> SearchField::Parse(AutofillScanner* scanner,
const std::string& page_language,
LogManager* log_manager) {
AutofillField* field;
auto& patterns = PatternProvider::GetInstance().GetMatchPatterns(
SEARCH_TERM, page_language);
if (ParseFieldSpecifics(scanner, base::UTF8ToUTF16(kSearchTermRe),
MATCH_DEFAULT | MATCH_SEARCH | MATCH_TEXT_AREA,
&field, {log_manager, "kSearchTermRe"})) {
patterns, &field, {log_manager, "kSearchTermRe"})) {
return std::make_unique<SearchField>(field);
}
......
......@@ -10,6 +10,7 @@
#include "base/compiler_specific.h"
#include "base/macros.h"
#include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill {
......
......@@ -21,17 +21,25 @@ std::unique_ptr<FormField> TravelField::Parse(AutofillScanner* scanner,
if (!scanner || scanner->IsEnd()) {
return nullptr;
}
auto& patternsP = PatternProvider::GetInstance().GetMatchPatterns(
"PASSPORT", page_language);
auto& patternsTO = PatternProvider::GetInstance().GetMatchPatterns(
"TRAVEL_ORIGIN", page_language);
auto& patternsTD = PatternProvider::GetInstance().GetMatchPatterns(
"TRAVEL_DESTINATION", page_language);
auto& patternsF =
PatternProvider::GetInstance().GetMatchPatterns("FLIGHT", page_language);
auto travel_field = std::make_unique<TravelField>();
if (ParseField(scanner, base::UTF8ToUTF16(kPassportRe),
if (ParseField(scanner, base::UTF8ToUTF16(kPassportRe), patternsP,
&travel_field->passport_, {log_manager, "kPassportRe"}) ||
ParseField(scanner, base::UTF8ToUTF16(kTravelOriginRe),
ParseField(scanner, base::UTF8ToUTF16(kTravelOriginRe), patternsTO,
&travel_field->origin_, {log_manager, "kTravelOriginRe"}) ||
ParseField(scanner, base::UTF8ToUTF16(kTravelDestinationRe),
ParseField(scanner, base::UTF8ToUTF16(kTravelDestinationRe), patternsTD,
&travel_field->destination_,
{log_manager, "kTravelDestinationRe"}) ||
ParseField(scanner, base::UTF8ToUTF16(kFlightRe), &travel_field->flight_,
{log_manager, "kFlightRe"})) {
ParseField(scanner, base::UTF8ToUTF16(kFlightRe), patternsF,
&travel_field->flight_, {log_manager, "kFlightRe"})) {
// If any regex matches, then we found a travel field.
return std::move(travel_field);
}
......
......@@ -9,6 +9,7 @@
#include "components/autofill/core/browser/form_parsing/autofill_scanner.h"
#include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/pattern_provider/pattern_provider.h"
namespace autofill {
......
......@@ -72,7 +72,17 @@ PatternProvider& PatternProvider::GetInstance() {
if (!g_pattern_provider) {
static base::NoDestructor<PatternProvider> instance;
g_pattern_provider = instance.get();
field_type_parsing::PopulateFromResourceBundle();
// TODO(crbug/1147608) This is an ugly hack to avoid loading the JSON. The
// motivation is that some Android unit tests fail because a dependency is
// missing. Instead of fixing this dependency, we'll go for an alternative
// solution that avoids the whole async/sync problem.
if (base::FeatureList::IsEnabled(
features::kAutofillUsePageLanguageToSelectFieldParsingPatterns) ||
base::FeatureList::IsEnabled(
features::
kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
field_type_parsing::PopulateFromResourceBundle();
}
}
return *g_pattern_provider;
}
......
......@@ -112,6 +112,11 @@ TEST(AutofillPatternProvider, Single_Match) {
// Test that the default pattern provider loads without crashing.
TEST(AutofillPatternProviderPipelineTest, DefaultPatternProviderLoads) {
base::test::ScopedFeatureList scoped_feature_list;
// Enable so that PatternProvider::GetInstance() actually does load the JSON.
scoped_feature_list.InitAndEnableFeature(
autofill::features::kAutofillUsePageLanguageToSelectFieldParsingPatterns);
base::test::TaskEnvironment task_environment_;
data_decoder::test::InProcessDataDecoder in_process_data_decoder_;
......@@ -131,6 +136,11 @@ TEST(AutofillPatternProviderPipelineTest, DefaultPatternProviderLoads) {
// needed to test the DefaultPatternProvider. Warning: If this crashes, check
// that no state carried over from other tests using the singleton.
TEST(AutofillPatternProviderPipelineTest, TestParsingEquivalent) {
base::test::ScopedFeatureList scoped_feature_list;
// Enable so that PatternProvider::GetInstance() actually does load the JSON.
scoped_feature_list.InitAndEnableFeature(
autofill::features::kAutofillUsePageLanguageToSelectFieldParsingPatterns);
base::test::TaskEnvironment task_environment_;
data_decoder::test::InProcessDataDecoder in_process_data_decoder_;
......
......@@ -4,17 +4,29 @@
#include "components/autofill/core/browser/pattern_provider/test_pattern_provider.h"
#include "base/feature_list.h"
#include "components/autofill/core/browser/pattern_provider/pattern_configuration_parser.h"
#include "components/autofill/core/common/autofill_features.h"
namespace autofill {
TestPatternProvider::TestPatternProvider() {
base::Optional<PatternProvider::Map> patterns =
field_type_parsing::GetPatternsFromResourceBundleSynchronously();
if (patterns)
SetPatterns(patterns.value(), base::Version(), true);
// TODO(crbug/1147608) This is an ugly hack to avoid loading the JSON. The
// motivation is that some Android unit tests fail because a dependency is
// missing. Instead of fixing this dependency, we'll go for an alternative
// solution that avoids the whole async/sync problem.
if (base::FeatureList::IsEnabled(
features::kAutofillUsePageLanguageToSelectFieldParsingPatterns) ||
base::FeatureList::IsEnabled(
features::
kAutofillApplyNegativePatternsForFieldTypeDetectionHeuristics)) {
base::Optional<PatternProvider::Map> patterns =
field_type_parsing::GetPatternsFromResourceBundleSynchronously();
if (patterns)
SetPatterns(patterns.value(), base::Version(), true);
PatternProvider::SetPatternProviderForTesting(this);
PatternProvider::SetPatternProviderForTesting(this);
}
}
TestPatternProvider::~TestPatternProvider() {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment