Commit a57325e1 authored by dyu@chromium.org's avatar dyu@chromium.org

Test Autofill's ability to merge duplicate profiles and

throw away junk profiles. Includes a dataset converter script
to convert csv file into profile dictionary list.

testMergeDuplicateProfilesInAutofill

Added additional tests:
testFilterMalformedEmailAddresses - covers fixed bug 73654.
testFilterIncompleteAddresses - covers fixed bug 71710.

BUG=none
TEST=none
Review URL: http://codereview.chromium.org/6246147

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@75344 0039d316-1c4b-4281-b951-d872f2087c98
parent b964574e
This diff is collapsed.
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
<html>
<head>
<title>AutoFill Form</title>
</head>
<body>
<form id="merge_dup" method="post">
<p>
<!-- Profile -->
<!-- The form element names must match the keys in
dataset_duplicate-profiles.txt -->
<label for="NAME_FIRST">First Name:</label>
<input type="text" id="NAME_FIRST" name="firstname"><br/>
<label for="NAME_MIDDLE">Middle Name:</label>
<input type="text" id="NAME_MIDDLE" name="middlename"><br/>
<label for="NAME_LAST">Last Name:</label>
<input type="text" id="NAME_LAST" name="lastname"><br/>
<label for="EMAIL_ADDRESS">Email:</label>
<input type="text" id="EMAIL_ADDRESS" name="email"><br/>
<label for="COMPANY_NAME">Company:</label>
<input type="text" id="COMPANY_NAME" name="company"><br/>
<label for="ADDRESS_HOME_LINE1">Address:</label>
<input type="text" id="ADDRESS_HOME_LINE1" name="address"><br/>
<label for="ADDRESS_HOME_LINE2">Address 2:</label>
<input type="text" id="ADDRESS_HOME_LINE2" name="address2"><br/>
<label for="ADDRESS_HOME_CITY">City:</label>
<input type="text" id="ADDRESS_HOME_CITY" name="city"><br/>
<label for="ADDRESS_HOME_STATE">State:</label>
<input type="text" id="ADDRESS_HOME_STATE" name="state"><br/>
<label for="ADDRESS_HOME_ZIP">Zip:</label>
<input type="text" id="ADDRESS_HOME_ZIP" name="zipcode"><br/>
<label for="ADDRESS_HOME_COUNTRY">Country:</label>
<input type="text" id="ADDRESS_HOME_COUNTRY" name="country"><br/>
<label for="PHONE_HOME_WHOLE_NUMBER">Phone:</label>
<input type="text" id="PHONE_HOME_WHOLE_NUMBER" name="phone"><br/>
<label for="PHONE_FAX_WHOLE_NUMBER">Fax:</label>
<input type="text" id="PHONE_FAX_WHOLE_NUMBER" name="fax"><br/>
<input type="submit" value="send"> <input type="reset">
</p>
</form>
</body>
</html>
...@@ -29,7 +29,10 @@ ...@@ -29,7 +29,10 @@
'autofill', 'autofill',
# Not part of the continous build. # Not part of the continous build.
# Used for testing Autofill crowdsourcing. # Used for testing Autofill crowdsourcing.
'-autofill.AutoFillTest.testAutofillCrowdSourcing', '-autofill.AutoFillTest.testAutofillCrowdsourcing',
# Not part of the continous build.
# Used for testing Autofill duplicate profile merging.
'-autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill',
'bookmark_bar', 'bookmark_bar',
'bookmarks', 'bookmarks',
'browser', 'browser',
......
...@@ -3,8 +3,11 @@ ...@@ -3,8 +3,11 @@
# Use of this source code is governed by a BSD-style license that can be # Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. # found in the LICENSE file.
import logging
import os import os
import pickle
import dataset_converter
import pyauto_functional # Must be imported before pyauto import pyauto_functional # Must be imported before pyauto
import pyauto import pyauto
...@@ -93,10 +96,63 @@ class AutoFillTest(pyauto.PyUITest): ...@@ -93,10 +96,63 @@ class AutoFillTest(pyauto.PyUITest):
self.assertEqual([expected_credit_card], self.assertEqual([expected_credit_card],
self.GetAutoFillProfile()['credit_cards']) self.GetAutoFillProfile()['credit_cards'])
def testAutofillCrowdSourcing(self): def testFilterIncompleteAddresses(self):
"""Test able to send POST request of web form to crowd source server. """Test Autofill filters out profile with incomplete address info."""
Require a loop of 1000 submits as the source server only collects 1% of profile = {'NAME_FIRST': 'Bob',
the data posted.""" 'NAME_LAST': 'Smith',
'EMAIL_ADDRESS': 'bsmith@example.com',
'COMPANY_NAME': 'Company X',
'PHONE_HOME_WHOLE_NUMBER': '650-123-4567',}
url = self.GetHttpURLForDataPath(
os.path.join('autofill', 'dup-profiles-test.html'))
self.NavigateToURL(url)
for key, value in profile.iteritems():
script = ('document.getElementById("%s").value = "%s"; '
'window.domAutomationController.send("done");') % (key, value)
self.ExecuteJavascript(script, 0, 0)
js_code = """
document.getElementById("merge_dup").submit();
window.addEventListener("unload", function() {
window.domAutomationController.send("done");
});
"""
self.ExecuteJavascript(js_code, 0, 0)
self.assertEqual([], self.GetAutoFillProfile()['profiles'])
def testFilterMalformedEmailAddresses(self):
"""Test Autofill filters out malformed email address during form submit."""
profile = {'NAME_FIRST': 'Bob',
'NAME_LAST': 'Smith',
'EMAIL_ADDRESS': 'garbage',
'ADDRESS_HOME_LINE1': '1234 H St.',
'ADDRESS_HOME_CITY': 'San Jose',
'ADDRESS_HOME_STATE': 'CA',
'ADDRESS_HOME_ZIP': '95110',
'COMPANY_NAME': 'Company X',
'PHONE_HOME_WHOLE_NUMBER': '408-123-4567',}
url = self.GetHttpURLForDataPath(
os.path.join('autofill', 'dup-profiles-test.html'))
self.NavigateToURL(url)
for key, value in profile.iteritems():
script = ('document.getElementById("%s").value = "%s"; '
'window.domAutomationController.send("done");') % (key, value)
self.ExecuteJavascript(script, 0, 0)
js_code = """
document.getElementById("merge_dup").submit();
window.addEventListener("unload", function() {
window.domAutomationController.send("done");
});
"""
self.ExecuteJavascript(js_code, 0, 0)
if 'EMAIL_ADDRESS' in self.GetAutoFillProfile()['profiles'][0]:
raise KeyError('TEST FAIL: Malformed email address is saved in profiles.')
def testAutofillCrowdsourcing(self):
"""Test able to send POST request of web form to Autofill server.
The Autofill server processes the data offline, so it can take a few days
for the result to be detectable. Manual verification is required.
"""
# HTML file needs to be run from a specific http:// url to be able to verify # HTML file needs to be run from a specific http:// url to be able to verify
# the results a few days later by visiting the same url. # the results a few days later by visiting the same url.
url = 'http://www.corp.google.com/~dyu/autofill/crowdsourcing-test.html' url = 'http://www.corp.google.com/~dyu/autofill/crowdsourcing-test.html'
...@@ -105,24 +161,60 @@ class AutoFillTest(pyauto.PyUITest): ...@@ -105,24 +161,60 @@ class AutoFillTest(pyauto.PyUITest):
'crowdsource_autofill.txt') 'crowdsource_autofill.txt')
profiles = self.EvalDataFrom(file_path) profiles = self.EvalDataFrom(file_path)
self.FillAutoFillProfile(profiles=profiles) self.FillAutoFillProfile(profiles=profiles)
# Autofill server captures 2.5% of the data posted.
# Looping 1000 times is a safe minimum to exceed the server's threshold or
# noise.
for i in range(1000): for i in range(1000):
fname = self.GetAutoFillProfile()['profiles'][0]['NAME_FIRST'] fname = self.GetAutoFillProfile()['profiles'][0]['NAME_FIRST']
lname = self.GetAutoFillProfile()['profiles'][0]['NAME_LAST'] lname = self.GetAutoFillProfile()['profiles'][0]['NAME_LAST']
email = self.GetAutoFillProfile()['profiles'][0]['EMAIL_ADDRESS'] email = self.GetAutoFillProfile()['profiles'][0]['EMAIL_ADDRESS']
# Submit form to collect crowdsourcing data for Autofill. # Submit form to collect crowdsourcing data for Autofill.
self.NavigateToURL(url, 0, 0) self.NavigateToURL(url, 0, 0)
fname_field = 'document.getElementById("fn").value = "%s"; ' \ fname_field = ('document.getElementById("fn").value = "%s"; '
'window.domAutomationController.send("done")' % fname 'window.domAutomationController.send("done");') % fname
lname_field = 'document.getElementById("ln").value = "%s"; ' \ lname_field = ('document.getElementById("ln").value = "%s"; '
'window.domAutomationController.send("done")' % lname 'window.domAutomationController.send("done");') % lname
email_field = 'document.getElementById("em").value = "%s"; ' \ email_field = ('document.getElementById("em").value = "%s"; '
'window.domAutomationController.send("done")' % email 'window.domAutomationController.send("done");') % email
self.ExecuteJavascript(fname_field, 0, 0); self.ExecuteJavascript(fname_field, 0, 0);
self.ExecuteJavascript(lname_field, 0, 0); self.ExecuteJavascript(lname_field, 0, 0);
self.ExecuteJavascript(email_field, 0, 0); self.ExecuteJavascript(email_field, 0, 0);
self.ExecuteJavascript('document.getElementById("frmsubmit").submit();' self.ExecuteJavascript('document.getElementById("frmsubmit").submit();'
'window.domAutomationController.send("done")', 'window.domAutomationController.send("done");',
0, 0)
def testMergeDuplicateProfilesInAutofill(self):
"""Test Autofill ability to merge duplicate profiles and throw away junk."""
# HTML file needs to be run from a http:// url.
url = self.GetHttpURLForDataPath(
os.path.join('autofill', 'duplicate_profiles_test.html'))
# Run the parser script to generate the dictionary list needed for the
# profiles.
c = dataset_converter.DatasetConverter(
os.path.join(self.DataDir(), 'autofill', 'dataset.txt'),
logging_level=logging.INFO) # Set verbosity to INFO, WARNING, ERROR.
list_of_dict = c.Convert()
for profile in list_of_dict:
self.NavigateToURL(url)
for key, value in profile.iteritems():
script = ('document.getElementById("%s").value = "%s"; '
'window.domAutomationController.send("done");') % (key, value)
self.ExecuteJavascript(script, 0, 0)
self.ExecuteJavascript('document.getElementById("merge_dup").submit();'
'window.domAutomationController.send("done");',
0, 0) 0, 0)
# Verify total number of inputted profiles is greater than the final number
# of profiles after merging.
self.assertTrue(
len(list_of_dict) > len(self.GetAutoFillProfile()['profiles']))
# Write profile dictionary to a file.
merged_profile = os.path.join(self.DataDir(), 'autofill',
'merged-profiles.txt')
profile_dict = self.GetAutoFillProfile()['profiles']
output = open(merged_profile, 'wb')
pickle.dump(profile_dict, output)
output.close()
if __name__ == '__main__': if __name__ == '__main__':
......
#!/usr/bin/python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Converts profile datasets to dictionary list for Autofill profiles.
Used for test autofill.AutoFillTest.testMergeDuplicateProfilesInAutofill.
"""
import codecs
import logging
import os
import re
import sys
class DatasetConverter(object):
_fields = [
u'NAME_FIRST',
u'NAME_MIDDLE',
u'NAME_LAST',
u'EMAIL_ADDRESS',
u'COMPANY_NAME',
u'ADDRESS_HOME_LINE1',
u'ADDRESS_HOME_LINE2',
u'ADDRESS_HOME_CITY',
u'ADDRESS_HOME_STATE',
u'ADDRESS_HOME_ZIP',
u'ADDRESS_HOME_COUNTRY',
u'PHONE_HOME_WHOLE_NUMBER',
u'PHONE_FAX_WHOLE_NUMBER',
]
_record_length = len(_fields)
_output_pattern = u'{'
for key in _fields:
_output_pattern += u"u'%s': u'%%s', " % key
_output_pattern = _output_pattern[:-1] + '},'
_re_single_quote = re.compile("'", re.UNICODE)
_logger = logging.getLogger(__name__)
def __init__(self, input_filename, output_filename=None,
logging_level=logging.ERROR):
"""Constructs a dataset converter object.
Full input pattern:
'(?P<NAME_FIRST>.*?)\|(?P<MIDDLE_NAME>.*?)\|(?P<NAME_LAST>.*?)\|
(?P<EMAIL_ADDRESS>.*?)\|(?P<COMPANY_NAME>.*?)\|(?P<ADDRESS_HOME_LINE1>.*?)
\|(?P<ADDRESS_HOME_LINE2>.*?)\|(?P<ADDRESS_HOME_CITY>.*?)\|
(?P<ADDRESS_HOME_STATE>.*?)\|(?P<ADDRESS_HOME_ZIP>.*?)\|
(?P<ADDRESS_HOME_COUNTRY>.*?)\|
(?P<PHONE_HOME_WHOLE_NUMBER>.*?)\|(?P<PHONE_FAX_WHOLE_NUMBER>.*?)$'
Full ouput pattern:
"{u'NAME_FIRST': u'%s', u'NAME_MIDDLE': u'%s', u'NAME_LAST': u'%s',
u'EMAIL_ADDRESS': u'%s', u'COMPANY_NAME': u'%s', u'ADDRESS_HOME_LINE1':
u'%s', u'ADDRESS_HOME_LINE2': u'%s', u'ADDRESS_HOME_CITY': u'%s',
u'ADDRESS_HOME_STATE': u'%s', u'ADDRESS_HOME_ZIP': u'%s',
u'ADDRESS_HOME_COUNTRY': u'%s', u'PHONE_HOME_WHOLE_NUMBER': u'%s',
u'PHONE_FAX_WHOLE_NUMBER': u'%s',},"
Args:
input_filename: name and path of the input dataset.
output_filename: name and path of the converted file, default is none.
logging_level: set verbosity levels, default is ERROR.
Raises:
IOError: error if input file does not exist.
"""
console = logging.StreamHandler()
console.setLevel(logging_level)
self._logger.addHandler(console)
self._input_filename = os.path.join(os.path.dirname(sys.argv[0]),
input_filename)
if not os.path.isfile(self._input_filename):
msg = 'File "%s" does not exist' % self._input_filename
self._logger.error(msg)
raise IOError(msg)
self._output_filename = output_filename
def _CreateDictionaryFromRecord(self, record):
"""Constructs and returns a dictionary from a record in the dataset file.
Escapes single quotation first and uses split('|') to separate values.
The method assumes a valid record always contains at least one "|"
character.
Example:
Take an argument as a string u'John|Doe|Mountain View'
and returns a dictionary
{
u'NAME_FIRST': u'John',
u'NAME_LAST': u'Doe',
u'ADDRESS_HOME_CITY': u'Mountain View',
}
Args:
record: row of record from the dataset file.
Returns:
None if the current record line is invalid or a dictionary representing a
single record from the dataset file.
"""
# Ignore irrelevant record lines that do not contain '|'.
if not '|' in record:
return
# Escaping single quote: "'" -> "\'"
record = self._re_single_quote.sub(r"\'", record)
record_list = record.split('|')
if record_list:
# Check for case when a record may have more or less fields than expected.
if len(record_list) != self._record_length:
self._logger.warning(
'A "|" separated line has %d fields instead of %d: %s' % (
len(record_list), self._record_length, record))
return
out_record = {}
for i, key in enumerate(self._fields):
out_record[key] = record_list[i]
return out_record
def Convert(self):
"""Function to convert input data into the desired output format.
Returns:
List that holds all the dictionaries.
"""
with open(self._input_filename) as input_file:
if self._output_filename:
output_file = codecs.open(self._output_filename, mode='wb',
encoding='utf-8-sig')
else:
output_file = None
try:
list_of_dict = []
i = 0
if output_file:
output_file.write('[')
output_file.write(os.linesep)
for line in input_file.readlines():
line = line.strip()
if not line:
continue
line = unicode(line, 'UTF-8')
output_record = self._CreateDictionaryFromRecord(line)
if output_record:
i += 1
list_of_dict.append(output_record)
output_line = self._output_pattern % tuple(
[output_record[key] for key in self._fields])
if output_file:
output_file.write(output_line)
output_file.write(os.linesep)
self._logger.info('%d: %s' % (i, line.encode(sys.stdout.encoding,
'ignore')))
self._logger.info('\tconverted to: %s' %
output_line.encode(sys.stdout.encoding, 'ignore'))
if output_file:
output_file.write(']')
output_file.write(os.linesep)
self._logger.info('%d lines converted SUCCESSFULLY!' % i)
self._logger.info('--- FINISHED ---')
return list_of_dict
finally:
if output_file:
output_file.close()
def main():
c = DatasetConverter(r'../data/autofill/dataset.txt',
r'../data/autofill/dataset_duplicate-profiles.txt',
logging.INFO)
c.Convert()
if __name__ == '__main__':
main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment