Commit 2b71280b authored by Elizabeth Popova's avatar Elizabeth Popova Committed by Chromium LUCI CQ

[Autofill] Parse address components sequence in label as full address

Some websites use a single field for an address with the expected
address components listed in the label. Parsing of such field might fail
if the name of the field is ambiguous.

Added en, tr and ru regexes, which help to identify address line 1 or
street address if the label contains street and at least one other
address component, in any order.

Bug: 1154727
Change-Id: I82d51ce285e3112ba554ae45332e2f710cdd6b9e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2601370Reviewed-by: default avatarMatthias Körber <koerber@google.com>
Commit-Queue: Elizabeth Popova <lizapopova@google.com>
Cr-Commit-Position: refs/heads/master@{#840159}
parent 1f03a5a4
......@@ -77,7 +77,13 @@ const char kAddressLine1LabelRe[] =
"|住所" // ja-JP
"|地址" // zh-CN
"|(\\b|_)adres(?! tarifi)(\\b|_)" // tr
"|주소"; // ko-KR
"|주소" // ko-KR
// Should contain street and any other address component, in any order
"|street.*(house|building|apartment|floor)" // en
"|(house|building|apartment|floor).*street"
"|(sokak|cadde).*(apartman|bina|daire|mahalle)" // tr
"|(apartman|bina|daire|mahalle).*(sokak|cadde)"
"|улиц.*(дом|корпус|квартир|этаж)|(дом|корпус|квартир|этаж).*улиц"; // ru
const char kAddressLine2Re[] =
"address[_-]?line(2|two)|address2|addr2|street|suite|unit"
"|adresszusatz|ergänzende.?angaben" // de-DE
......
......@@ -288,4 +288,20 @@ TEST_F(AddressFieldTest, NotParseAddressName) {
ClassifyAndVerify(/*parsed=*/false, LanguageCode("tr"));
}
// Tests that the address components sequence in a label is classified
// as |ADDRESS_HOME_LINE1|.
TEST_F(AddressFieldTest, ParseAddressComponentsSequenceAsAddressLine1) {
AddTextFormFieldData("detail", "Улица, дом, квартира", ADDRESS_HOME_LINE1);
ClassifyAndVerify(/*parsed=*/true, LanguageCode("ru"));
}
// Tests that the address components sequence in a label is classified
// as |ADDRESS_HOME_STREET_ADDRESS|.
TEST_F(AddressFieldTest, ParseAddressComponentsSequenceAsStreetAddress) {
AddFormFieldData("textarea", "detail",
"Mahalle, sokak, cadde ve diğer bilgilerinizi girin",
ADDRESS_HOME_STREET_ADDRESS);
ClassifyAndVerify(/*parsed=*/true, LanguageCode("tr"));
}
} // namespace autofill
......@@ -355,7 +355,7 @@
},
{
"pattern_identifier": "en_address_line_1_label_preserving",
"positive_pattern": "(^\\W*address)|(address\\W*$)|(?:shipping|billing|mailing|pick.?up|drop.?off|delivery|sender|postal|recipient|home|work|office|school|business|mail)[\\s\\-]+address|address\\s+(of|for|to|from)",
"positive_pattern": "(^\\W*address)|(address\\W*$)|(?:shipping|billing|mailing|pick.?up|drop.?off|delivery|sender|postal|recipient|home|work|office|school|business|mail)[\\s\\-]+address|address\\s+(of|for|to|from)|street.*(house|building|apartment|floor)|(house|building|apartment|floor).*street",
"positive_score": 1.1,
"negative_pattern": null,
"match_field_attributes": 1,
......@@ -454,6 +454,14 @@
"negative_pattern": null,
"match_field_attributes": 3,
"match_field_input_types": 1
},
{
"pattern_identifier": "ru_address_line_1_label_preserving",
"positive_pattern": "улиц.*(дом|корпус|квартир|этаж)|(дом|корпус|квартир|этаж).*улиц",
"positive_score": 1.1,
"negative_pattern": null,
"match_field_attributes": 1,
"match_field_input_types": 1
}
],
"zh-CN": [
......@@ -477,7 +485,7 @@
},
{
"pattern_identifier": "tr_address_line_1_label_preserving",
"positive_pattern": "(\\b|_)adres(?! tarifi)(\\b|_)",
"positive_pattern": "(\\b|_)adres(?! tarifi)(\\b|_)|(sokak|cadde).*(apartman|bina|daire|mahalle)|(apartman|bina|daire|mahalle).*(sokak|cadde)",
"positive_score": 1.1,
"negative_pattern": null,
"match_field_attributes": 1,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment