Commit a57325e1 authored by dyu@chromium.org's avatar dyu@chromium.org

Test Autofill's ability to merge duplicate profiles and

throw away junk profiles. Includes a dataset converter script
to convert csv file into profile dictionary list.

testMergeDuplicateProfilesInAutofill

Added additional tests:
testFilterMalformedEmailAddresses - covers fixed bug 73654.
testFilterIncompleteAddresses - covers fixed bug 71710.

BUG=none
TEST=none
Review URL: http://codereview.chromium.org/6246147

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@75344 0039d316-1c4b-4281-b951-d872f2087c98
parent b964574e
This dataset file can be parsed with dataset_converter.py
first_name, middle_name, last_name, email, company_name, address_line_1, address_line_2, city, state, zipcode, country, phone, fax
John||Doe|john.doe@gmail.com||1950 Amphitheatre Ave #2||Mountain View|California|14888|US|4195551234|
John||Doe|john.doe@movitron.com|Movitron|Iceberg @ 10 Pennsylvania|Movitron 5th Floor|Mountain View|CA|14890|US|4195551234|
John||Doe|john@incarnate.net||||SFO|-08:00 Pacific Time (US & Canada); Tijuana||US||
Aman||Radmon||Current or past|||Miles|North America||United States||
|||||16th btw Mission & Valencia||||14888||4195551234|
John|K|Doe|John.doe@gmail.com||||||Postal Code|||
Betty||Blue|betty.blue@gmail.com||1950 Amphitheatre Ave 2|||||AF||
JOHN||DOE||||||||||
Movitron|||||10 Pennsylvania St.|at Spear|Mountain View||14889|CA||
Movitron|||||10 Pennsylvania St.|at Spear st|Mountain View||14889|CA||
Movitron|||||10 Pennsylvania St.|at Spear st|Mountain View||14889|CA||
John||Doe|john.doe@gmail.com|self|Movitron 5th Floor||Mountain View|CA|14891|US|4195551234|
Annie||Doe||||||||||
John||Doe|john.doe@gmail.com|Movitron|1950 Amphitheatre #2||Mountain View|CA|14888|United States|4195551234|
Movitron|||||10 Pennsylvania St.|at Spear st|Mountain View||14889|CA||
John M Doe|US|false|john.doe@gmail.com||1950 Amphitheatre Ave #2||Mountain View|CA|14888|US|2935190284|
John||Doe|john.doe@gmail.com|Movitron, Inc. / Compa.ny|john.doe@gmail.com|john.doe@gmail.com|Mountain View|CA|14888|US||
John||Doe|4195551234|Movitron, Inc.|john.doe@gmail.com|||CA||USA|4195551234|
John||Doe|4195551234|Movitron, Inc.|john.doe@gmail.com|john.doe@gmail.com||CA|14888|USA|4195551234|
john||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
John|K|Doe|john.doe@movitron.com|||||||||
James||Ralph|john.doe@gmail.com|CP|||||14888|us||
John||Doe|1|Movitron, Inc.|john.doe@gmail.com|john.doe@gmail.com|||14888||4195551234|
John||Doe|john.doe@gmail.com|Movitron, Inc. / Compa.ny|john.doe@gmail.com|john.doe@gmail.com|Mountain View|CA|14888|US||
John|K|Doe|John.doe@gmail.com||||||Postal Code|||
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
john||doe|john.doe@gmail.com||1950 Amphitheatre Ave #2||||14888|US|4195551234|
john||doe|john.doe@gmail.com||1950 Amphitheatre Ave #2||||14888|US|4195551234|
John||Doe|john@incarnate.net|C|1950 Amphitheatre #2||Mountain View|CA|14888|us|4195551234|
John||Doe|john.doe@gmail.com||john.doe@gmail.com|john.doe@gmail.com|||14888|81||
John||Doe|john.doe@gmail.com|Movitron|1950 Amphitheatre #2||California|Commercial/Industrial|14888|81|4195551234|
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
John||Doe|john.doe@gmail.com|Movitron, Inc. / Compa.ny|16th btw Mission & Valencia||Mountain View|CA|14888|US|4195551234|
cardinal||||CP|||||17505|us||
Betty||Blue|betty.blue@gmail.com||1950 Amphitheatre Ave #2||||14888|AF||
John||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
||||Choose a company...||||||-||
John|K|Doe|john.doe@gmail.com||||||Postal Code|United States|4195551234|
Doe||House|||827 Peaches||Ben|Tx|53077|||
DodoLand|||||827 Peaches||Ben|Tx|53077|||
John||Doe|4195551234|Movitron, Inc.|john.doe@gmail.com|john.doe@gmail.com||CA|14888|USA|4195551234|
John|K|Doe|john.doe@gmail.com||||||Postal Code||4195551234|
john||doe|john.doe@gmail.com||1950 Amphitheatre Ave #2||||14888|US|4195551234|
John||Doe|john.doe@gmail.com|self|Movitron 5th Floor||Mountain View|CA|14891|US|4195551234|
John||Doe|john.doe+goto@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|US|4195551234|
James||Ralph|john.doe@gmail.com|CP|||||14888|us||
||||lowercase capital||||||dk||
||||lowercase capital||||||dk||
Betty||Blue|betty.blue@gmail.com||1950 Amphitheatre Ave #2||||14888|AF||
JOHN||DOE||||||||||
Annie||Doe||||||||||
John doe|1950 Amphitheatre #2|Doe|john.doe@gmail.com||||Mountain View|CA|14888|US|5551234|
John doe|1950 Amphitheatre #2|Doe|john.doe@gmail.com||||Mountain View|CA|14888|US|5551234|
John||Doe|john.doe+goto@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|US|4195551234|
John||Doe|john@incarnate.net|CP|1950 Amphitheatre #2||Mountain View|CA|14888|us|4195551234|
John|K|Doe|john.doe@movitron.com|||||||||
john|k|doe|john.doe@gmail.com|||true|Mountain View|9110|14891|4601|4195551234|
Betty||Blue|betty.blue@gmail.com||1950 Amphitheatre Ave 2|||||AF||
John||Doe|PILOT PROGRAM - wireless||1950 Amphitheatre Ave #2||7x54lbbs||14888|us|4195551234|
||||lowercase capital||||||dk||
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
John M Doe|US|false|john.doe@gmail.com||1950 Amphitheatre Ave #2||Mountain View|CA|14888|US|2935190284|
john||doe|john.doe@gmail.com||1950 Amphitheatre Ave #2||||14888|US|4195551234|
John|K|Doe|john.doe@gmail.com||16th btw Mission & Valencia||Mountian View|9110|14888|4601|4195551234|
|||||9128 s. Broadway|||TX|53077|||
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
john||doe|john.doe@gmail.com||1950 Amphitheatre Ave #2||||14888|US|4195551234|
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
John|K|Doe|john.doe@gmail.com||16th btw Mission & Valencia||||14888|us|4195551234|
john||Doe|john.doe@gmail.com||||||14888|||
John||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
john||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
john||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
john||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
john||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
John||Doe|john.doe@gmail.com||1950 Amphitheatre Ave #2||Mountain View|CA|14888|United States||
John||Doe|john.doe@gmail.com|www.movitron.com||||||US|4195551234|
|||||9128 s. Broadway|||TX|53077|||
John|K|Doe|john.doe@gmail.com||16th btw Mission & Valencia||Mountian View|9110|14888|4601|4195551234|
john||doe|john.doe@gmail.com||1950 Amphitheatre Ave #2||||14888|US|4195551234|
|||||9128 s. Broadway|||TX|53077|||
John M Doe|US|false|john.doe@gmail.com||1950 Amphitheatre Ave #2||Mountain View|CA|14888|US|2935190284|
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
John|K|Doe|john.doe@gmail.com||16th btw Mission & Valencia||||14888|us|4195551234|
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
john||doe|john.doe@gmail.com||1950 Amphitheatre Ave #2||||14888|US|4195551234|
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
John||Doe|john.doe@gmail.com|||||||1||
john||Doe|john.doe@gmail.com||||||14888|||
|||||||MOUNTAINVIEW|California||United States||
||||Movitron, Inc.|Iceberg @ 10 Pennsylvania|Movitron 5th Fl|Mountain View|California|14891|United States|4195551234|
john||Doe|john.doe@gmail.com|myself only||||CA|14888|myself only||
John||Doe|john.doe@gmail.com||||||14888|81||
John||Doe|Evening||1950 Amphitheatre Ave #2||Mountain View||14888|Belmont|4195551234|
John||Doe|Every New Message|Movitron, Inc.|Iceberg @ 10 Pennsylvania||Mountain View|CA|14888|US|4195551234|
John||Doe|john.doe@gmail.com|Movitron|1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
john||doe|john.doe@gmail.com||||||14888|14888||
cardinal||||CP|||||17505|us||
Movitron|||||10 Pennsylvania St.|at Spear st|Mountain View||14889|CA||
Movitron|||||10 Pennsylvania St.|at Spear st|Mountain View||14889|CA||
John||Doe|john.doe@gmail.com|Movitron, Inc. / Compa.ny|john.doe@gmail.com|john.doe@gmail.com|Mountain View|CA|14888|US||
John|K|Doe|John.doe@gmail.com||||||Postal Code|||
John|K|Doe|john.doe@movitron.com|||||||||
Betty||Blue|betty.blue@gmail.com||1950 Amphitheatre Ave 2|||||AF||
Betty||Blue|betty.blue@gmail.com||1950 Amphitheatre Ave #2||||14888|AF||
JOHN||DOE||||||||||
John||Doe|john.doe@gmail.com|self|btw bryant and forida|||CA||US|4195551234|
James||Ralph|john.doe@gmail.com|CP|||||14888|us||
James||Ralph|john.doe@gmail.com|CP|||||14888|us||
John||Doe|john.doe@gmail.com|self|16th & mission|||CA||US|4195551234|
john||doe|john.doe@gmail.com||1950 Amphitheatre Ave #2||||14888|US|4195551234|
Annie||Doe||||||||||
John doe|1950 Amphitheatre #2|Doe|john.doe@gmail.com||||Mountain View|CA|14888|US|5551234|
John||Doe|john.doe+goto@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|US|4195551234|
James||Ralph|john.doe@gmail.com|CP|||||14888|us||
||||lowercase capital||||||dk||
John||Doe|4195551234|Movitron, Inc.|john.doe@gmail.com|john.doe@gmail.com||CA|14888|USA|4195551234|
John|K|Doe|john.doe@gmail.com||||||Postal Code||4195551234|
john||doe|john.doe@gmail.com||1950 Amphitheatre Ave #2||||14888|US|4195551234|
John||Doe|john.doe@gmail.com|self|Movitron 5th Floor||Mountain View|CA|14891|US|4195551234|
AT&T||Mobility|||AT&T Mobility|PO BOX 897234|Santa Cruz|CA|12395||8005557612|
John M Doe|US|false|john.doe@gmail.com||1950 Amphitheatre Ave #2||Mountain View|CA|14888|US|2935190284|
john|k|doe|john.doe@gmail.com|||true|Mountain View|9110|14891|4601|4195551234|
John M Doe|US|false|john.doe@gmail.com||1950 Amphitheatre Ave #2||Mountain View|CA|14888|US|2935190284|
John doe|req|the recipient's shipping Name|||||Mountain View|req|14888|CA|4195551234|
John||Doe|john.doe@gmail.com||john.doe@gmail.com|john.doe@gmail.com|||14888|81||
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
john||doe|john.doe@gmail.com||1950 Amphitheatre Ave #2||||14888|US|4195551234|
AT&T||Mobility|||AT&T Mobility|PO BOX 897234|Santa Cruz||12345||8005557612|
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
John|K|Doe|john.doe@gmail.com||16th btw Mission & Valencia||||14888|us|4195551234|
John|K|Doe|john.doe@gmail.com||||Mountian View|9110|14888|4601|4195551234|
John|K|Doe|john.doe@gmail.com||||||14888|us|4195551234|
John|K|Doe|john.doe@gmail.com||16th btw Mission & Valencia||Mountian View|9110|14888|4601|4195551234|
john||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
cardinal|||john.doe@gmail.com|CP|||||14888|us||
John||Doe|1|Movitron, Inc.|john.doe@gmail.com|john.doe@gmail.com|||14888||4195551234|
john|k|doe|john.doe@gmail.com|||true|Mountain View|9110|14891|4601|4195551234|
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
cardinal||||CP|||||17505|us||
James||Ralph|john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
john||doe|john.doe@gmail.com||1950 Amphitheatre Ave #2||||14888|US|4195551234|
john||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
1|||john.doe@gmail.com|||||||2||
John||Doe|john.doe@gmail.com|self|btw bryant and forida|||CA|14888|US|4195551234|
John||Doe|john.doe@gmail.com|self|16th & mission|||CA|14888|US|4195551234|
john||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
john||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
1|||john.doe@gmail.com||||||14888|2||
john||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
john||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
|||html|||||||||
john||Doe|john.doe@gmail.com||||||14888|||
John||Doe|john.doe@gmail.com|Movitron, Inc. / Compa.ny|16th btw Mission & Valencia||Mountain View|CA|14888|US|4195551234|
John doe|req|the recipient's shipping Name|john.doe@gmail.com||16th btw Mission & Valencia||Mountain View|req|14888|CA|4195551234|
john||doe|john.doe@gmail.com||16th btw Mission & Valencia||||14888|14888|4195551234|
John||Doe|john.doe@gmail.com|Movitron, Inc. / Compa.ny|16th btw Mission & Valencia||Mountain View|CA|14888|US|4195551234|
cardinal|||john.doe@gmail.com|CP|16th btw Mission & Valencia||||14888|us|4195551234|
John||Doe|gmail||16th btw Mission & Valencia||||14888|US|4195551234|
john|k|doe|john.doe@gmail.com||16th btw Mission & Valencia|true|||14888|us|4195551234|
John||Doe|1|Movitron, Inc.|john.doe@gmail.com|john.doe@gmail.com|||14888||4195551234|
John||Doe|john.doe@gmail.com||john.doe@gmail.com|john.doe@gmail.com|||14888|81||
John||Doe|1|Movitron, Inc.|john.doe@gmail.com||||||4195551234|
John||Doe|4195551234|Movitron, Inc.|john.doe@gmail.com|||CA||USA|4195551234|
John||Doe|4195551234|Movitron, Inc.|john.doe@gmail.com|||CA||USA|4195551234|
John||Doe|john.doe@movitron.com|Movitron, Inc.|john.doe@gmail.com|||||Mountain View, CA|4195551234|
||||lowercase capital||||||dk||
John||Doe|john.doe@movitron.com|Movitron, Inc.|john.doe@gmail.com|john.doe@gmail.com|||14888|Mountain View, CA|4195551234|
John||Doe|john.doe@gmail.com|Movitron, Inc. / Compa.ny|john.doe@gmail.com|john.doe@gmail.com|Mountain View|CA|14888|US||
John||Doe|4195551234|Movitron, Inc.|john.doe@gmail.com|john.doe@gmail.com||CA|14888|USA|4195551234|
John||Doe|gmail||john.doe@gmail.com|john.doe@gmail.com|||14888|US|4195551234|
John||Doe|john.doe@gmail.com|Movitron, Inc. / Compa.ny|john.doe@gmail.com|john.doe@gmail.com|Mountain View|CA|14888|US||
John||Doe|4195551234|Movitron, Inc.|john.doe@gmail.com|john.doe@gmail.com||CA|14888|USA|4195551234|
John||Doe|john.doe@movitron.com|Movitron, Inc.|example.com||||||4195551234|
John||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
||||lowercase capital||||||dk||
||||lowercase capital||||||dk||
John||Doe|john.doe@gmail.com|self|16th & mission||7x54lbbs|CA|14888|US|4195551234|
John||Doe|john.doe@gmail.com|self|btw bryant and forida||7x54lbbs|CA|14888|US|4195551234|
John||Doe|john.doe+goto@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|US|4195551234|
John||Doe|PILOT PROGRAM - wireless||1950 Amphitheatre Ave #2||7x54lbbs||14888|us|4195551234|
John||Doe|john.doe@gmail.com|self|btw Mission and Valencia off 16th||7x54lbbs|CA|14888|US|4195551234|
John||Doe|john.doe@gmail.com|self|Movitron 5th Floor||Mountain View|CA|14891|US|4195551234|
John||Doe|419.555.1234||1950 Amphitheatre #2|||CA|14888|US||
John|K|Doe|john.doe@gmail.com||||||Postal Code||4195551234|
John doe|1950 Amphitheatre #2|Doe|john.doe@gmail.com||||Mountain View|CA|14888|US|5551234|
John||Doe|gmail|Movitron, Inc.|john.doe@gmail.com|john.doe@gmail.com|7x54lbbs||14888|US|4195551234|
Annie||Doe||||||||||
JOHN||DOE||||||||||
Betty||Blue|betty.blue@gmail.com||1950 Amphitheatre Ave #2||||14888|AF||
Betty||Blue|betty.blue@gmail.com||1950 Amphitheatre Ave 2|||||AF||
John|K|Doe|john.doe@movitron.com|||||||||
John||Doe|john@incarnate.net|CP|1950 Amphitheatre #2||Mountain View|CA|14888|us|4195551234|
John||Dodo|john.dodo@gmail.com||||Mountain View||14888||4195551236|
John||Doe|john.doe+goto@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|US|4195551234|
John doe|1950 Amphitheatre #2||john.doe@gmail.com||||||14888|US||
John doe|1950 Amphitheatre #2|Doe|john.doe@gmail.com||||Mountain View|CA|14888|US|5551234|
John||Doe|PILOT PROGRAM - wireless||1950 Amphitheatre Ave #2||||||4195551234|
John||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
Betty||Blue|betty.blue@gmail.com||1950 Amphitheatre Ave #2||||14888|AF||
John|K|Doe|John.doe@gmail.com||||||Postal Code|||
John|K|Doe|john.doe@movitron.com|||||||||
John||Doe|john.doe@gmail.com|self|btw Mission and Valencia off 16th|||CA|14888|US|4195551234|
John|K|Doe|john.doe@gmail.com||||||Postal Code||4195551234|
John||Doe|john@incarnate.net|C|1950 Amphitheatre #2||Mountain View|CA|14888|us|4195551234|
John||Doe|john.doe@gmail.com|self|Movitron 5th Floor||Mountain View|CA|14891|US|4195551234|
Movitron|||||10 Pennsylvania St.|at Spear st|Mountain View||14889|CA||
John||Doe|PILOT PROGRAM - wireless||1950 Amphitheatre Ave #2||Mountain View|9110|14891|4601|4195551234|
John||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|CA|14888|USA|4195551234|
John||Doe|john@incarnate.net|CP|1950 Amphitheatre #2||Mountain View|CA|14888|us|4195551234|
John||Doe|PILOT PROGRAM - wireless||1950 Amphitheatre Ave #2||7x54lbbs||14888|us|4195551234|
John|K|Doe|John.doe@gmail.com||||||Postal Code|||
John M Doe|US|false|||1950 Amphitheatre Ave #2||Mountain View|CA|14888|US|2935190284|
John doe|1950 Amphitheatre #2||john.doe@gmail.com|Movitron, Inc.|Iceberg @ 10 Pennsylvania 5th Floor||Mountain View|CA|14891|US|4195551234|
JOHN||DOE||||||||||
Betty||Blue|betty.blue@gmail.com||1950 Amphitheatre Ave 2|||||AF||
Annie||Doe||||||||||
John|K|Doe|john.doe@movitron.com||1950 Amphitheatre Ave|No. 2|Mountain View|CA|14891|||
John||Doe|john@incarnate.net|C|1950 Amphitheatre #2||Mountain View|CA|14888|us|4195551234|
John Doe|john.doe@gmail.com|web resume||||||||||
John||Doe|419.555.1234||1950 Amphitheatre #2||7x54lbbs|CA|14888|US||
John||Doe|john.doe@movitron.com|26 or more|1950 Amphitheatre #2||Mountain View|CA|14888|US|4195551234|
JOHN|K|DOE|john.doe@gmail.com||10 Pennsylvania 2nd floor||Mountain View|CA|14777|US|4195551234|
JOHN|–|DOE|john.doe@movitron.com||10 Pennsylvania 2nd floor|Market & Spear|Mountain View|CA|14777|US|4195551234|
Movitron|||||10 Pennsylvania St.|at Spear st|Mountain View||14889|CA||
Movitron|||||10 Pennsylvania St.|at Spear|Mountain View||14889|CA||
JOHN|K|DOE|john.doe@gmail.com||10 Pennsylvania 2nd floor||Mountain View|CA|14891|US|4195551234|
John||Doe|john.doe@gmail.com||1950 Amphitheatre #2||Mountain View|California|14888|United States|4195551234|
John|Doe|john.doe@gmail.com||||||||||
John||Doe|john.doe@gmail.com|Movitron, Inc.|1950 Amphitheatre #2||Mountain View|5551768|14888|27701|4195551234|
John||Doe|john.doe@gmail.com||1950 Amphitheatre Ave Apt 2||Mountain View|CA|14888|Mountain View, CA|4195551234|
subdomain.dreamhost.com|||john.doe@gmail.com||1950 Amphitheatre #2||Mountin View|CA|14890|US|4195551234|
Movitron|– Champlain Meeting|Room|||10 Pennsylvania 5th floor|Market & Spear|Mountain View||14891|USA||
Movitron|– Pythagoras Telepresence|Room|||10 Pennsylvania 2nd floor|Market & Spear|Mountain View||14891|USA||
Movitron|– Champlain Meeting|Room|||10 Pennsylvania 2nd floor|Market & Spear|Mountain View||14891|USA||
Movitron|– Euclid Meeting|Room|||10 Pennsylvania 2nd floor|Market & Spear|Mountain View||14891|USA||
Movitron|– Copernicus Meeting|Room|||10 Pennsylvania 2nd floor|Market & Spear|Mountain View||14891|USA||
Movitron|– Newton Meeting|Room|||10 Pennsylvania 2nd floor|Market & Spear|Mountain View||14891|USA||
Movitron|– Ockham Meeting|Room|||10 Pennsylvania 2nd floor|Market & Spear|Mountain View||14891|USA||
Movitron|– Archimedes Meeting|Room|||10 Pennsylvania 2nd floor|Market & Spear|Mountain View||14891|USA||
Movitron|– Brahmagupta Meeting|Room|||10 Pennsylvania 2nd floor|Market & Spear|Mountain View||14891|USA||
John||Doe|john.doe@gmail.com|Movitron, Inc.|Iceberg @ 10 Pennsylvania 5th Floor||Mountain View|CA|14891|USA|4195551234|
|||||||Miles|WORLD||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
<html>
<head>
<title>AutoFill Form</title>
</head>
<body>
<form id="merge_dup" method="post">
<p>
<!-- Profile -->
<!-- The form element names must match the keys in
dataset_duplicate-profiles.txt -->
<label for="NAME_FIRST">First Name:</label>
<input type="text" id="NAME_FIRST" name="firstname"><br/>
<label for="NAME_MIDDLE">Middle Name:</label>
<input type="text" id="NAME_MIDDLE" name="middlename"><br/>
<label for="NAME_LAST">Last Name:</label>
<input type="text" id="NAME_LAST" name="lastname"><br/>
<label for="EMAIL_ADDRESS">Email:</label>
<input type="text" id="EMAIL_ADDRESS" name="email"><br/>
<label for="COMPANY_NAME">Company:</label>
<input type="text" id="COMPANY_NAME" name="company"><br/>
<label for="ADDRESS_HOME_LINE1">Address:</label>
<input type="text" id="ADDRESS_HOME_LINE1" name="address"><br/>
<label for="ADDRESS_HOME_LINE2">Address 2:</label>
<input type="text" id="ADDRESS_HOME_LINE2" name="address2"><br/>
<label for="ADDRESS_HOME_CITY">City:</label>
<input type="text" id="ADDRESS_HOME_CITY" name="city"><br/>
<label for="ADDRESS_HOME_STATE">State:</label>
<input type="text" id="ADDRESS_HOME_STATE" name="state"><br/>
<label for="ADDRESS_HOME_ZIP">Zip:</label>
<input type="text" id="ADDRESS_HOME_ZIP" name="zipcode"><br/>
<label for="ADDRESS_HOME_COUNTRY">Country:</label>
<input type="text" id="ADDRESS_HOME_COUNTRY" name="country"><br/>
<label for="PHONE_HOME_WHOLE_NUMBER">Phone:</label>
<input type="text" id="PHONE_HOME_WHOLE_NUMBER" name="phone"><br/>
<label for="PHONE_FAX_WHOLE_NUMBER">Fax:</label>
<input type="text" id="PHONE_FAX_WHOLE_NUMBER" name="fax"><br/>
<input type="submit" value="send"> <input type="reset">
</p>
</form>
</body>
</html>
......@@ -29,7 +29,10 @@
'autofill',
# Not part of the continous build.
# Used for testing Autofill crowdsourcing.
'-autofill.AutoFillTest.testAutofillCrowdSourcing',
'-autofill.AutoFillTest.testAutofillCrowdsourcing',
# Not part of the continous build.
# Used for testing Autofill duplicate profile merging.
'-autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill',
'bookmark_bar',
'bookmarks',
'browser',
......
......@@ -3,8 +3,11 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import logging
import os
import pickle
import dataset_converter
import pyauto_functional # Must be imported before pyauto
import pyauto
......@@ -93,10 +96,63 @@ class AutoFillTest(pyauto.PyUITest):
self.assertEqual([expected_credit_card],
self.GetAutoFillProfile()['credit_cards'])
def testAutofillCrowdSourcing(self):
"""Test able to send POST request of web form to crowd source server.
Require a loop of 1000 submits as the source server only collects 1% of
the data posted."""
def testFilterIncompleteAddresses(self):
"""Test Autofill filters out profile with incomplete address info."""
profile = {'NAME_FIRST': 'Bob',
'NAME_LAST': 'Smith',
'EMAIL_ADDRESS': 'bsmith@example.com',
'COMPANY_NAME': 'Company X',
'PHONE_HOME_WHOLE_NUMBER': '650-123-4567',}
url = self.GetHttpURLForDataPath(
os.path.join('autofill', 'dup-profiles-test.html'))
self.NavigateToURL(url)
for key, value in profile.iteritems():
script = ('document.getElementById("%s").value = "%s"; '
'window.domAutomationController.send("done");') % (key, value)
self.ExecuteJavascript(script, 0, 0)
js_code = """
document.getElementById("merge_dup").submit();
window.addEventListener("unload", function() {
window.domAutomationController.send("done");
});
"""
self.ExecuteJavascript(js_code, 0, 0)
self.assertEqual([], self.GetAutoFillProfile()['profiles'])
def testFilterMalformedEmailAddresses(self):
"""Test Autofill filters out malformed email address during form submit."""
profile = {'NAME_FIRST': 'Bob',
'NAME_LAST': 'Smith',
'EMAIL_ADDRESS': 'garbage',
'ADDRESS_HOME_LINE1': '1234 H St.',
'ADDRESS_HOME_CITY': 'San Jose',
'ADDRESS_HOME_STATE': 'CA',
'ADDRESS_HOME_ZIP': '95110',
'COMPANY_NAME': 'Company X',
'PHONE_HOME_WHOLE_NUMBER': '408-123-4567',}
url = self.GetHttpURLForDataPath(
os.path.join('autofill', 'dup-profiles-test.html'))
self.NavigateToURL(url)
for key, value in profile.iteritems():
script = ('document.getElementById("%s").value = "%s"; '
'window.domAutomationController.send("done");') % (key, value)
self.ExecuteJavascript(script, 0, 0)
js_code = """
document.getElementById("merge_dup").submit();
window.addEventListener("unload", function() {
window.domAutomationController.send("done");
});
"""
self.ExecuteJavascript(js_code, 0, 0)
if 'EMAIL_ADDRESS' in self.GetAutoFillProfile()['profiles'][0]:
raise KeyError('TEST FAIL: Malformed email address is saved in profiles.')
def testAutofillCrowdsourcing(self):
"""Test able to send POST request of web form to Autofill server.
The Autofill server processes the data offline, so it can take a few days
for the result to be detectable. Manual verification is required.
"""
# HTML file needs to be run from a specific http:// url to be able to verify
# the results a few days later by visiting the same url.
url = 'http://www.corp.google.com/~dyu/autofill/crowdsourcing-test.html'
......@@ -105,24 +161,60 @@ class AutoFillTest(pyauto.PyUITest):
'crowdsource_autofill.txt')
profiles = self.EvalDataFrom(file_path)
self.FillAutoFillProfile(profiles=profiles)
# Autofill server captures 2.5% of the data posted.
# Looping 1000 times is a safe minimum to exceed the server's threshold or
# noise.
for i in range(1000):
fname = self.GetAutoFillProfile()['profiles'][0]['NAME_FIRST']
lname = self.GetAutoFillProfile()['profiles'][0]['NAME_LAST']
email = self.GetAutoFillProfile()['profiles'][0]['EMAIL_ADDRESS']
# Submit form to collect crowdsourcing data for Autofill.
self.NavigateToURL(url, 0, 0)
fname_field = 'document.getElementById("fn").value = "%s"; ' \
'window.domAutomationController.send("done")' % fname
lname_field = 'document.getElementById("ln").value = "%s"; ' \
'window.domAutomationController.send("done")' % lname
email_field = 'document.getElementById("em").value = "%s"; ' \
'window.domAutomationController.send("done")' % email
fname_field = ('document.getElementById("fn").value = "%s"; '
'window.domAutomationController.send("done");') % fname
lname_field = ('document.getElementById("ln").value = "%s"; '
'window.domAutomationController.send("done");') % lname
email_field = ('document.getElementById("em").value = "%s"; '
'window.domAutomationController.send("done");') % email
self.ExecuteJavascript(fname_field, 0, 0);
self.ExecuteJavascript(lname_field, 0, 0);
self.ExecuteJavascript(email_field, 0, 0);
self.ExecuteJavascript('document.getElementById("frmsubmit").submit();'
'window.domAutomationController.send("done")',
'window.domAutomationController.send("done");',
0, 0)
def testMergeDuplicateProfilesInAutofill(self):
"""Test Autofill ability to merge duplicate profiles and throw away junk."""
# HTML file needs to be run from a http:// url.
url = self.GetHttpURLForDataPath(
os.path.join('autofill', 'duplicate_profiles_test.html'))
# Run the parser script to generate the dictionary list needed for the
# profiles.
c = dataset_converter.DatasetConverter(
os.path.join(self.DataDir(), 'autofill', 'dataset.txt'),
logging_level=logging.INFO) # Set verbosity to INFO, WARNING, ERROR.
list_of_dict = c.Convert()
for profile in list_of_dict:
self.NavigateToURL(url)
for key, value in profile.iteritems():
script = ('document.getElementById("%s").value = "%s"; '
'window.domAutomationController.send("done");') % (key, value)
self.ExecuteJavascript(script, 0, 0)
self.ExecuteJavascript('document.getElementById("merge_dup").submit();'
'window.domAutomationController.send("done");',
0, 0)
# Verify total number of inputted profiles is greater than the final number
# of profiles after merging.
self.assertTrue(
len(list_of_dict) > len(self.GetAutoFillProfile()['profiles']))
# Write profile dictionary to a file.
merged_profile = os.path.join(self.DataDir(), 'autofill',
'merged-profiles.txt')
profile_dict = self.GetAutoFillProfile()['profiles']
output = open(merged_profile, 'wb')
pickle.dump(profile_dict, output)
output.close()
if __name__ == '__main__':
......
#!/usr/bin/python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Converts profile datasets to dictionary list for Autofill profiles.
Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.
"""
import codecs
import logging
import os
import re
import sys
class DatasetConverter(object):
_fields = [
u'NAME_FIRST',
u'NAME_MIDDLE',
u'NAME_LAST',
u'EMAIL_ADDRESS',
u'COMPANY_NAME',
u'ADDRESS_HOME_LINE1',
u'ADDRESS_HOME_LINE2',
u'ADDRESS_HOME_CITY',
u'ADDRESS_HOME_STATE',
u'ADDRESS_HOME_ZIP',
u'ADDRESS_HOME_COUNTRY',
u'PHONE_HOME_WHOLE_NUMBER',
u'PHONE_FAX_WHOLE_NUMBER',
]
_record_length = len(_fields)
_output_pattern = u'{'
for key in _fields:
_output_pattern += u"u'%s': u'%%s', " % key
_output_pattern = _output_pattern[:-1] + '},'
_re_single_quote = re.compile("'", re.UNICODE)
_logger = logging.getLogger(__name__)
def __init__(self, input_filename, output_filename=None,
logging_level=logging.ERROR):
"""Constructs a dataset converter object.
Full input pattern:
'(?P<NAME_FIRST>.*?)\|(?P<MIDDLE_NAME>.*?)\|(?P<NAME_LAST>.*?)\|
(?P<EMAIL_ADDRESS>.*?)\|(?P<COMPANY_NAME>.*?)\|(?P<ADDRESS_HOME_LINE1>.*?)
\|(?P<ADDRESS_HOME_LINE2>.*?)\|(?P<ADDRESS_HOME_CITY>.*?)\|
(?P<ADDRESS_HOME_STATE>.*?)\|(?P<ADDRESS_HOME_ZIP>.*?)\|
(?P<ADDRESS_HOME_COUNTRY>.*?)\|
(?P<PHONE_HOME_WHOLE_NUMBER>.*?)\|(?P<PHONE_FAX_WHOLE_NUMBER>.*?)$'
Full ouput pattern:
"{u'NAME_FIRST': u'%s', u'NAME_MIDDLE': u'%s', u'NAME_LAST': u'%s',
u'EMAIL_ADDRESS': u'%s', u'COMPANY_NAME': u'%s', u'ADDRESS_HOME_LINE1':
u'%s', u'ADDRESS_HOME_LINE2': u'%s', u'ADDRESS_HOME_CITY': u'%s',
u'ADDRESS_HOME_STATE': u'%s', u'ADDRESS_HOME_ZIP': u'%s',
u'ADDRESS_HOME_COUNTRY': u'%s', u'PHONE_HOME_WHOLE_NUMBER': u'%s',
u'PHONE_FAX_WHOLE_NUMBER': u'%s',},"
Args:
input_filename: name and path of the input dataset.
output_filename: name and path of the converted file, default is none.
logging_level: set verbosity levels, default is ERROR.
Raises:
IOError: error if input file does not exist.
"""
console = logging.StreamHandler()
console.setLevel(logging_level)
self._logger.addHandler(console)
self._input_filename = os.path.join(os.path.dirname(sys.argv[0]),
input_filename)
if not os.path.isfile(self._input_filename):
msg = 'File "%s" does not exist' % self._input_filename
self._logger.error(msg)
raise IOError(msg)
self._output_filename = output_filename
def _CreateDictionaryFromRecord(self, record):
"""Constructs and returns a dictionary from a record in the dataset file.
Escapes single quotation first and uses split('|') to separate values.
The method assumes a valid record always contains at least one "|"
character.
Example:
Take an argument as a string u'John|Doe|Mountain View'
and returns a dictionary
{
u'NAME_FIRST': u'John',
u'NAME_LAST': u'Doe',
u'ADDRESS_HOME_CITY': u'Mountain View',
}
Args:
record: row of record from the dataset file.
Returns:
None if the current record line is invalid or a dictionary representing a
single record from the dataset file.
"""
# Ignore irrelevant record lines that do not contain '|'.
if not '|' in record:
return
# Escaping single quote: "'" -> "\'"
record = self._re_single_quote.sub(r"\'", record)
record_list = record.split('|')
if record_list:
# Check for case when a record may have more or less fields than expected.
if len(record_list) != self._record_length:
self._logger.warning(
'A "|" separated line has %d fields instead of %d: %s' % (
len(record_list), self._record_length, record))
return
out_record = {}
for i, key in enumerate(self._fields):
out_record[key] = record_list[i]
return out_record
def Convert(self):
"""Function to convert input data into the desired output format.
Returns:
List that holds all the dictionaries.
"""
with open(self._input_filename) as input_file:
if self._output_filename:
output_file = codecs.open(self._output_filename, mode='wb',
encoding='utf-8-sig')
else:
output_file = None
try:
list_of_dict = []
i = 0
if output_file:
output_file.write('[')
output_file.write(os.linesep)
for line in input_file.readlines():
line = line.strip()
if not line:
continue
line = unicode(line, 'UTF-8')
output_record = self._CreateDictionaryFromRecord(line)
if output_record:
i += 1
list_of_dict.append(output_record)
output_line = self._output_pattern % tuple(
[output_record[key] for key in self._fields])
if output_file:
output_file.write(output_line)
output_file.write(os.linesep)
self._logger.info('%d: %s' % (i, line.encode(sys.stdout.encoding,
'ignore')))
self._logger.info('\tconverted to: %s' %
output_line.encode(sys.stdout.encoding, 'ignore'))
if output_file:
output_file.write(']')
output_file.write(os.linesep)
self._logger.info('%d lines converted SUCCESSFULLY!' % i)
self._logger.info('--- FINISHED ---')
return list_of_dict
finally:
if output_file:
output_file.close()
def main():
c = DatasetConverter(r'../data/autofill/dataset.txt',
r'../data/autofill/dataset_duplicate-profiles.txt',
logging.INFO)
c.Convert()
if __name__ == '__main__':
main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment