Commit f446efcd authored by Torne (Richard Coles)'s avatar Torne (Richard Coles) Committed by Commit Bot

Only match addresses without zip codes if at the end.

The old native implementation of findAddress only accepted addresses
without zip codes if they appeared at the end of the string. This was
probably a bug as the documentation implies this should work in all
cases, but fixing this bug has caused a lot of false positives while not
fixing very many false negatives and this functionality is being
deprecated anyway, so change it back.

Bug: 829411
Change-Id: Id280f524eaaade1851ae23e7ed17d0755ddb2ba9
Reviewed-on: https://chromium-review.googlesource.com/998754Reviewed-by: default avatarChangwan Ryu <changwan@chromium.org>
Commit-Queue: Richard Coles <torne@chromium.org>
Cr-Commit-Position: refs/heads/master@{#548873}
parent a17997b9
...@@ -444,20 +444,21 @@ public class FindAddress { ...@@ -444,20 +444,21 @@ public class FindAddress {
// At this point we've matched a state; try to match a zip code after it. // At this point we've matched a state; try to match a zip code after it.
Matcher zipMatcher = sWordRe.matcher(content); Matcher zipMatcher = sWordRe.matcher(content);
if (zipMatcher.find(stateMatch.end()) if (zipMatcher.find(stateMatch.end())) {
&& isValidZipCode(zipMatcher.group(0), stateMatch)) { if (isValidZipCode(zipMatcher.group(0), stateMatch)) {
return zipMatcher.end(); return zipMatcher.end();
}
} else {
// The content ends with a state but no zip
// code. This is a legal match according to the
// documentation. N.B. This is equivalent to the
// original c++ implementation, which only allowed
// the zip code to be optional at the end of the
// string, which presumably is a bug. We tried
// relaxing this to work in other places but it
// caused too many false positives.
nonZipMatch = stateMatch.end();
} }
// The content ends with a state but no zip
// code. This is a legal match according to the
// documentation. N.B. This differs from the
// original c++ implementation, which only allowed
// the zip code to be optional at the end of the
// string, which presumably is a bug. Now we
// prefer to find a match with a zip code, but
// remember non-zip matches and return them if
// necessary.
nonZipMatch = stateMatch.end();
} }
} }
} }
......
...@@ -128,6 +128,15 @@ public class FindAddressTest { ...@@ -128,6 +128,15 @@ public class FindAddressTest {
public void testFullAddressWithoutZipCode() { public void testFullAddressWithoutZipCode() {
assertIsAddress("1600 Amphitheatre Parkway Mountain View, CA"); assertIsAddress("1600 Amphitheatre Parkway Mountain View, CA");
assertIsAddress("201 S. Division St. Suite 500 Ann Arbor, MI"); assertIsAddress("201 S. Division St. Suite 500 Ann Arbor, MI");
// Check that addresses without a zip code are only accepted at the end of the string.
// This isn't implied by the documentation but was the case in the old implementation
// and fixing this bug creates a lot of false positives while fixing relatively few
// false negatives. In these examples, "one point" is parsed as a street and "as" is a
// state abbreviation (this is taken from a false positive reported in a bug).
Assert.assertTrue(containsAddress("one point I was as"));
Assert.assertTrue(containsAddress("At one point I was as ignorant as"));
Assert.assertFalse(containsAddress("At one point I was as ignorant as them"));
} }
@Test @Test
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment