Commit 2a87c5fb authored by dhollowa@chromium.org's avatar dhollowa@chromium.org

Heuristics for grabber-continental.com.out (select-one)

Adds specific input types to the field_type.h bitfield to utilize the "select-one" signal in identifying the Country, State, and Credit Card date fields.  Other fields should not classify now if the input type is "select-one".

This is Phase 1 of fixes for the grabber-continental.com.html.  Phase 2 will add lookahead logic for the "BusinessPhone" and "Email Address" issues.

BUG=76299
TEST=FormStructureBrowserTest.DataDrivenHeuristics with test file grabber-continental.com.html

Review URL: http://codereview.chromium.org/7063031

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@86717 0039d316-1c4b-4281-b951-d872f2087c98
parent 44c9416b
......@@ -195,7 +195,7 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner,
l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE);
if (!ParseField(scanner, pattern, &address_field->address1_) &&
!ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL,
!ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
&address_field->address1_)) {
return false;
}
......@@ -214,7 +214,7 @@ bool AddressField::ParseAddressLines(AutofillScanner* scanner,
l10n_util::GetStringUTF16(IDS_AUTOFILL_ADDRESS_LINE_1_LABEL_RE);
if (!ParseEmptyLabel(scanner, &address_field->address2_) &&
!ParseField(scanner, pattern, &address_field->address2_)) {
ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL,
ParseFieldSpecifics(scanner, label_pattern, MATCH_LABEL | MATCH_TEXT,
&address_field->address2_);
}
}
......@@ -250,7 +250,8 @@ bool AddressField::ParseCountry(AutofillScanner* scanner,
else
pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_COUNTRY_RE);
return ParseField(scanner, pattern, &address_field->country_);
return ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT,
&address_field->country_);
}
// static
......@@ -334,7 +335,8 @@ bool AddressField::ParseState(AutofillScanner* scanner,
else
pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_STATE_RE);
return ParseField(scanner, pattern, &address_field->state_);
return ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT,
&address_field->state_);
}
AddressType AddressField::AddressTypeFromText(const string16 &text) {
......
......@@ -43,7 +43,7 @@
zip|^-$|post2<!-- pt-BR, pt-PT -->|codpos2
</message>
<message name="IDS_AUTOFILL_CITY_RE">
city|town<!-- de-DE -->|ort|stadt<!-- en-AU -->|suburb<!-- es -->|ciudad|provincia|localidad|poblacion<!-- fr-FR -->|ville|commune<!-- it-IT -->|localita<!-- ja-JP -->|市区町村<!-- pt-BR, pt-PT -->|cidade<!-- ru -->|Город<!-- zh-CN -->|市<!-- zh-TW -->|分區
city|town<!-- de-DE -->|^ort$|stadt<!-- en-AU -->|suburb<!-- es -->|ciudad|provincia|localidad|poblacion<!-- fr-FR -->|ville|commune<!-- it-IT -->|localita<!-- ja-JP -->|市区町村<!-- pt-BR, pt-PT -->|cidade<!-- ru -->|Город<!-- zh-CN -->|市<!-- zh-TW -->|分區
</message>
<message name="IDS_AUTOFILL_STATE_RE">
state|county|region|province<!-- de-DE -->|land<!-- en-UK -->|county|principality<!-- ja-JP -->|都道府県<!-- pt-BR, pt-PT -->|estado|provincia<!-- ru -->|область<!-- zh-CN -->|省<!-- zh-TW -->|地區
......
......@@ -122,13 +122,14 @@ CreditCardField* CreditCardField::Parse(AutofillScanner* scanner,
if ((!credit_card_field->expiration_month_ ||
credit_card_field->expiration_month_->IsEmpty()) &&
ParseField(scanner, pattern, &credit_card_field->expiration_month_)) {
ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT,
&credit_card_field->expiration_month_)) {
if (is_ecml)
pattern = GetEcmlPattern(kEcmlCardExpireYear);
else
pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_EXPIRATION_DATE_RE);
if (!ParseField(scanner, pattern,
if (!ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT | MATCH_SELECT,
&credit_card_field->expiration_year_)) {
scanner->Rewind();
return NULL;
......@@ -137,8 +138,10 @@ CreditCardField* CreditCardField::Parse(AutofillScanner* scanner,
}
}
if (ParseField(scanner, GetEcmlPattern(kEcmlCardExpireDay), NULL))
if (ParseFieldSpecifics(scanner, GetEcmlPattern(kEcmlCardExpireDay),
MATCH_DEFAULT | MATCH_SELECT, NULL)) {
continue;
}
// Some pages (e.g. ExpediaBilling.html) have a "card description"
// field; we parse this field but ignore it.
......
......@@ -20,8 +20,10 @@ EmailField* EmailField::Parse(AutofillScanner* scanner, bool is_ecml) {
pattern = l10n_util::GetStringUTF16(IDS_AUTOFILL_EMAIL_RE);
const AutofillField* field;
if (ParseField(scanner, pattern, &field))
if (ParseFieldSpecifics(scanner, pattern,
MATCH_DEFAULT | MATCH_EMAIL, &field)) {
return new EmailField(field);
}
return NULL;
}
......
......@@ -81,6 +81,26 @@ FormField* ParseFormField(AutofillScanner* scanner, bool is_ecml) {
return NameField::Parse(scanner, is_ecml);
}
bool IsTextField(const string16& type) {
return type == ASCIIToUTF16("text");
}
bool IsEmailField(const string16& type) {
return type == ASCIIToUTF16("email");
}
bool IsMonthField(const string16& type) {
return type == ASCIIToUTF16("month");
}
bool IsTelephoneField(const string16& type) {
return type == ASCIIToUTF16("tel");
}
bool IsSelectField(const string16& type) {
return type == ASCIIToUTF16("select-one");
}
} // namespace
// static
......@@ -109,7 +129,7 @@ void FormField::ParseFormFields(const std::vector<AutofillField*>& fields,
bool FormField::ParseField(AutofillScanner* scanner,
const string16& pattern,
const AutofillField** match) {
return ParseFieldSpecifics(scanner, pattern, MATCH_ALL, match);
return ParseFieldSpecifics(scanner, pattern, MATCH_DEFAULT, match);
}
// static
......@@ -121,20 +141,31 @@ bool FormField::ParseFieldSpecifics(AutofillScanner* scanner,
return false;
const AutofillField* field = scanner->Cursor();
if (Match(field, pattern, match_type)) {
if (match)
*match = field;
scanner->Advance();
return true;
if ((match_type & MATCH_TEXT) && IsTextField(field->form_control_type))
return MatchAndAdvance(scanner, pattern, match_type, match);
if ((match_type & MATCH_EMAIL) && IsEmailField(field->form_control_type))
return MatchAndAdvance(scanner, pattern, match_type, match);
if ((match_type & MATCH_TELEPHONE) &&
IsTelephoneField(field->form_control_type)) {
return MatchAndAdvance(scanner, pattern, match_type, match);
}
if ((match_type & MATCH_SELECT) && IsSelectField(field->form_control_type))
return MatchAndAdvance(scanner, pattern, match_type, match);
return false;
}
// static
bool FormField::ParseEmptyLabel(AutofillScanner* scanner,
const AutofillField** match) {
return ParseFieldSpecifics(scanner, ASCIIToUTF16("^$"), MATCH_LABEL, match);
return ParseFieldSpecifics(scanner,
ASCIIToUTF16("^$"),
MATCH_LABEL | MATCH_ALL_INPUTS,
match);
}
// static
......@@ -148,6 +179,22 @@ bool FormField::AddClassification(const AutofillField* field,
return map->insert(make_pair(field->unique_name(), type)).second;
}
// static.
bool FormField::MatchAndAdvance(AutofillScanner* scanner,
const string16& pattern,
int match_type,
const AutofillField** match) {
const AutofillField* field = scanner->Cursor();
if (FormField::Match(field, pattern, match_type)) {
if (match)
*match = field;
scanner->Advance();
return true;
}
return false;
}
// static
bool FormField::Match(const AutofillField* field,
const string16& pattern,
......
......@@ -32,9 +32,20 @@ class FormField {
protected:
// A bit-field used for matching specific parts of a field in question.
enum MatchType {
// Attributes.
MATCH_LABEL = 1 << 0,
MATCH_NAME = 1 << 1,
MATCH_ALL = MATCH_LABEL | MATCH_NAME
// Input types.
MATCH_TEXT = 1 << 2,
MATCH_EMAIL = 1 << 3,
MATCH_TELEPHONE = 1 << 4,
MATCH_SELECT = 1 << 5,
MATCH_ALL_INPUTS =
MATCH_TEXT | MATCH_EMAIL | MATCH_TELEPHONE | MATCH_SELECT,
// By default match label and name for input/text types.
MATCH_DEFAULT = MATCH_LABEL | MATCH_NAME | MATCH_TEXT,
};
// Only derived classes may instantiate.
......@@ -75,6 +86,15 @@ class FormField {
private:
FRIEND_TEST_ALL_PREFIXES(FormFieldTest, Match);
// Matches |pattern| to the contents of the field at the head of the
// |scanner|.
// Returns |true| if a match is found according to |match_type|, and |false|
// otherwise.
static bool MatchAndAdvance(AutofillScanner* scanner,
const string16& pattern,
int match_type,
const AutofillField** match);
// Matches the regular expression |pattern| against the components of |field|
// as specified in the |match_type| bit field (see |MatchType|).
static bool Match(const AutofillField* field,
......
......@@ -269,8 +269,10 @@ bool PhoneField::ParseInternal(PhoneField *phone_field,
// Attempt to parse according to the next grammar.
for (; i < arraysize(phone_field_grammars_) &&
phone_field_grammars_[i].regex != REGEX_SEPARATOR; ++i) {
if (!ParseField(scanner,
if (!ParseFieldSpecifics(
scanner,
phone_field->GetRegExp(phone_field_grammars_[i].regex),
MATCH_DEFAULT | MATCH_TELEPHONE,
&parsed_fields[phone_field_grammars_[i].phone_part]))
break;
if (phone_field_grammars_[i].max_size &&
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment