Commit be0d6207 authored by Joe DeBlasio's avatar Joe DeBlasio Committed by Chromium LUCI CQ

[Lookalikes] Ignore edit distance matches that only differ with a "-"

This CL marks edit distance matches that only differ due to a single
dash. This is a temporary mitigation for a large class of false
positives until a DAL approach is able to offer better tools for domain
owners.

Fixed: 1159473
Change-Id: Ibe2f34b71ee219a01383b0d46937d2749ba50a5b
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2628073
Commit-Queue: Joe DeBlasio <jdeblasio@chromium.org>
Reviewed-by: default avatarMustafa Emre Acer <meacer@chromium.org>
Cr-Commit-Position: refs/heads/master@{#843257}
parent 3995f3f4
......@@ -347,6 +347,22 @@ bool IsAllowedToBeEmbedded(
EndsWithPermittedDomains(embedded_target, embedding_domain);
}
// Returns the first character of the first string that is different from the
// second string. Strings should be at least 1 edit distance apart.
char GetFirstDifferentChar(const std::string& str1, const std::string& str2) {
std::string::const_iterator i1 = str1.begin();
std::string::const_iterator i2 = str2.begin();
while (i1 != str1.end() && i2 != str2.end()) {
if (*i1 != *i2) {
return *i1;
}
i1++;
i2++;
}
NOTREACHED();
return 0;
}
} // namespace
DomainInfo::DomainInfo(const std::string& arg_hostname,
......@@ -497,6 +513,17 @@ bool IsLikelyEditDistanceFalsePositive(const DomainInfo& navigated_domain,
}
}
// Ignore domains that only differ by an insertion of a "-".
if (nav_dom_len != matched_dom_len) {
if (nav_dom_len < matched_dom_len &&
GetFirstDifferentChar(matched_dom, nav_dom) == '-') {
return true;
} else if (nav_dom_len > matched_dom_len &&
GetFirstDifferentChar(nav_dom, matched_dom) == '-') {
return true;
}
}
return false;
}
......
......@@ -138,7 +138,7 @@ bool IsEditDistanceAtMostOne(const base::string16& str1,
// Returns whether |navigated_domain| and |matched_domain| are likely to be edit
// distance false positives, and thus the user should *not* be warned.
//
// Assumes |navigated_domain| and |matched_domain| are edit distance matches.
// Assumes |navigated_domain| and |matched_domain| are edit distance of 1 apart.
bool IsLikelyEditDistanceFalsePositive(const DomainInfo& navigated_domain,
const DomainInfo& matched_domain);
......
......@@ -105,6 +105,14 @@ TEST(LookalikeUrlUtilTest, EditDistanceExcludesCommonFalsePositives) {
{"abcde.com", "axbcde.com", false}, // Deletion
{"axbcde.com", "abcde.com", false}, // Insertion
{"axbcde.com", "aybcde.com", false}, // Substitution
// We permit matches that only differ due to a single "-".
{"-abcde.com", "abcde.com", true},
{"ab-cde.com", "abcde.com", true},
{"abcde-.com", "abcde.com", true},
{"abcde.com", "-abcde.com", true},
{"abcde.com", "ab-cde.com", true},
{"abcde.com", "abcde-.com", true},
};
for (const TestCase& test_case : kTestCases) {
auto navigated =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment