Commit abcbe0cd authored by Joe DeBlasio's avatar Joe DeBlasio Committed by Commit Bot

[Simplified domains] Add finch param for excluding keyword matches in e2LD.

This CL is a small tweak to allow us to tune whether or not the keyword
matching heuristic applies to the e2LD or not. To maintain current
behavior, the default value of the parameter is kept to true.

Fixed: 1123151
Change-Id: Ifc85b464776e874a24fe4ec69be6c2aae0960eda
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2382404
Commit-Queue: Joe DeBlasio <jdeblasio@chromium.org>
Reviewed-by: default avatarEmily Stark <estark@chromium.org>
Cr-Commit-Position: refs/heads/master@{#802886}
parent d99b0d41
...@@ -111,13 +111,15 @@ bool ShouldTriggerSafetyTipFromKeywordInURL( ...@@ -111,13 +111,15 @@ bool ShouldTriggerSafetyTipFromKeywordInURL(
const char* const sensitive_keywords[], const char* const sensitive_keywords[],
const size_t num_sensitive_keywords) { const size_t num_sensitive_keywords) {
return HostnameContainsKeyword(url, navigated_domain.domain_and_registry, return HostnameContainsKeyword(url, navigated_domain.domain_and_registry,
sensitive_keywords, num_sensitive_keywords); sensitive_keywords, num_sensitive_keywords,
/* search_e2ld = */ true);
} }
bool HostnameContainsKeyword(const GURL& url, bool HostnameContainsKeyword(const GURL& url,
const std::string& eTLD_plus_one, const std::string& eTLD_plus_one,
const char* const keywords[], const char* const keywords[],
const size_t num_keywords) { const size_t num_keywords,
bool search_e2ld) {
// We never want to trigger this heuristic on any non-http / https sites. // We never want to trigger this heuristic on any non-http / https sites.
if (!url.SchemeIsHTTPOrHTTPS()) { if (!url.SchemeIsHTTPOrHTTPS()) {
return false; return false;
...@@ -150,10 +152,10 @@ bool HostnameContainsKeyword(const GURL& url, ...@@ -150,10 +152,10 @@ bool HostnameContainsKeyword(const GURL& url,
// Any problems that would result in an empty e2LD should have been caught via // Any problems that would result in an empty e2LD should have been caught via
// the |eTLD_plus_one| check. // the |eTLD_plus_one| check.
// If the e2LD is itself a keyword, then chop that off and only // If we want to exclude the e2LD, or if the e2LD is itself a keyword, then
// search the rest of it. Otherwise, we keep the full e2LD included to // chop that off and only search the rest of it. Otherwise, we keep the full
// detect hyphenated spoofs (e.g. "evil-google.com"). // e2LD included to detect hyphenated spoofs (e.g. "evil-google.com").
if (SortedWordListContains(e2LD, keywords, num_keywords)) { if (!search_e2ld || SortedWordListContains(e2LD, keywords, num_keywords)) {
// If the user visited the eTLD+1 directly, bail here. // If the user visited the eTLD+1 directly, bail here.
if (search_substr.size() == e2LD.size()) { if (search_substr.size() == e2LD.size()) {
return false; return false;
......
...@@ -39,10 +39,14 @@ bool ShouldTriggerSafetyTipFromKeywordInURL( ...@@ -39,10 +39,14 @@ bool ShouldTriggerSafetyTipFromKeywordInURL(
// Checks to see whether a hostname contains sensitive keywords in a way // Checks to see whether a hostname contains sensitive keywords in a way
// that violates our hostname elision policy. // that violates our hostname elision policy.
// //
// If |search_e2ld| is false, only finds keywords in subdomains below the e2LD
// (e.g. it will only search through "foo.bar" in "foo.bar.example.com").
//
// Returns false when called with a URL without a TLD or with an unknown TLD. // Returns false when called with a URL without a TLD or with an unknown TLD.
bool HostnameContainsKeyword(const GURL& url, bool HostnameContainsKeyword(const GURL& url,
const std::string& eTLD_plus_one, const std::string& eTLD_plus_one,
const char* const sensitive_keywords[], const char* const sensitive_keywords[],
const size_t num_sensitive_keywords); const size_t num_sensitive_keywords,
bool search_e2ld);
#endif // CHROME_BROWSER_REPUTATION_LOCAL_HEURISTICS_H_ #endif // CHROME_BROWSER_REPUTATION_LOCAL_HEURISTICS_H_
...@@ -20,6 +20,8 @@ const base::FeatureParam<int> kMaximumUnelidedHostnameLength{ ...@@ -20,6 +20,8 @@ const base::FeatureParam<int> kMaximumUnelidedHostnameLength{
&omnibox::kMaybeElideToRegistrableDomain, "max_unelided_host_length", 25}; &omnibox::kMaybeElideToRegistrableDomain, "max_unelided_host_length", 25};
const base::FeatureParam<bool> kEnableKeywordBasedElision{ const base::FeatureParam<bool> kEnableKeywordBasedElision{
&omnibox::kMaybeElideToRegistrableDomain, "enable_keyword_elision", true}; &omnibox::kMaybeElideToRegistrableDomain, "enable_keyword_elision", true};
const base::FeatureParam<bool> kSearchE2LDForKeywords{
&omnibox::kMaybeElideToRegistrableDomain, "search_e2ld_for_keywords", true};
} // namespace } // namespace
...@@ -46,7 +48,8 @@ bool ShouldElideToRegistrableDomain(const GURL& url) { ...@@ -46,7 +48,8 @@ bool ShouldElideToRegistrableDomain(const GURL& url) {
auto eTLD_plus_one = GetETLDPlusOne(host); auto eTLD_plus_one = GetETLDPlusOne(host);
if (kEnableKeywordBasedElision.Get() && if (kEnableKeywordBasedElision.Get() &&
HostnameContainsKeyword(url, eTLD_plus_one, top500_domains::kTopKeywords, HostnameContainsKeyword(url, eTLD_plus_one, top500_domains::kTopKeywords,
top500_domains::kNumTopKeywords)) { top500_domains::kNumTopKeywords,
kSearchE2LDForKeywords.Get())) {
return true; return true;
} }
......
...@@ -21,6 +21,12 @@ namespace { ...@@ -21,6 +21,12 @@ namespace {
// hostnames that are reliably shorter/longer than the limit. // hostnames that are reliably shorter/longer than the limit.
const char* kMaxUnelidedHostLengthParam = "20"; const char* kMaxUnelidedHostLengthParam = "20";
enum class KeywordSearchConfig {
kDisabled,
kEnabledWithE2LD,
kEnabledWithoutE2LD
};
} // namespace } // namespace
class UrlElisionPolicyTest : public testing::Test { class UrlElisionPolicyTest : public testing::Test {
...@@ -105,27 +111,43 @@ TEST_F(UrlElisionPolicyTest, ElidesKeywordedDomainsAppropriately) { ...@@ -105,27 +111,43 @@ TEST_F(UrlElisionPolicyTest, ElidesKeywordedDomainsAppropriately) {
ShouldElideToRegistrableDomain(GURL("ftp://google.login.com/xyz"))); ShouldElideToRegistrableDomain(GURL("ftp://google.login.com/xyz")));
} }
class UrlElisionKeywordPolicyTest : public UrlElisionPolicyTest, class UrlElisionKeywordPolicyTest
public testing::WithParamInterface<bool> { : public UrlElisionPolicyTest,
public testing::WithParamInterface<KeywordSearchConfig> {
public: public:
UrlElisionKeywordPolicyTest() { UrlElisionKeywordPolicyTest() {
if (!GetParam()) { switch (GetParam()) {
case KeywordSearchConfig::kDisabled:
scoped_feature_list_.InitWithFeaturesAndParameters( scoped_feature_list_.InitWithFeaturesAndParameters(
{{omnibox::kMaybeElideToRegistrableDomain, {{omnibox::kMaybeElideToRegistrableDomain,
{{"max_unelided_host_length", kMaxUnelidedHostLengthParam}}}}, {{"max_unelided_host_length", kMaxUnelidedHostLengthParam},
{"enable_keyword_elision", "false"}}}},
{}); {});
} else { break;
case KeywordSearchConfig::kEnabledWithE2LD:
scoped_feature_list_.InitWithFeaturesAndParameters( scoped_feature_list_.InitWithFeaturesAndParameters(
{{omnibox::kMaybeElideToRegistrableDomain, {{omnibox::kMaybeElideToRegistrableDomain,
{{"max_unelided_host_length", kMaxUnelidedHostLengthParam}, {{"max_unelided_host_length", kMaxUnelidedHostLengthParam},
{"enable_keyword_elision", "false"}}}}, {"enable_keyword_elision", "true"},
{"search_e2ld_for_keywords", "true"}}}},
{});
break;
case KeywordSearchConfig::kEnabledWithoutE2LD:
scoped_feature_list_.InitWithFeaturesAndParameters(
{{omnibox::kMaybeElideToRegistrableDomain,
{{"max_unelided_host_length", kMaxUnelidedHostLengthParam},
{"enable_keyword_elision", "true"},
{"search_e2ld_for_keywords", "false"}}}},
{}); {});
break;
} }
} }
~UrlElisionKeywordPolicyTest() override = default; ~UrlElisionKeywordPolicyTest() override = default;
bool IsKeywordElisionEnabled() { return !GetParam(); } bool IsKeywordElisionEnabled() {
return GetParam() != KeywordSearchConfig::kDisabled;
}
private: private:
base::test::ScopedFeatureList scoped_feature_list_; base::test::ScopedFeatureList scoped_feature_list_;
...@@ -133,10 +155,21 @@ class UrlElisionKeywordPolicyTest : public UrlElisionPolicyTest, ...@@ -133,10 +155,21 @@ class UrlElisionKeywordPolicyTest : public UrlElisionPolicyTest,
DISALLOW_COPY_AND_ASSIGN(UrlElisionKeywordPolicyTest); DISALLOW_COPY_AND_ASSIGN(UrlElisionKeywordPolicyTest);
}; };
INSTANTIATE_TEST_SUITE_P(All, UrlElisionKeywordPolicyTest, testing::Bool()); INSTANTIATE_TEST_SUITE_P(
All,
UrlElisionKeywordPolicyTest,
::testing::Values(KeywordSearchConfig::kDisabled,
KeywordSearchConfig::kEnabledWithE2LD,
KeywordSearchConfig::kEnabledWithoutE2LD));
// Verify that keyword elision follows the feature parameter. // Verify that keyword elision follows the feature parameter.
TEST_P(UrlElisionKeywordPolicyTest, ElidesOnKeywords) { TEST_P(UrlElisionKeywordPolicyTest, ElidesOnKeywords) {
EXPECT_EQ(IsKeywordElisionEnabled(), EXPECT_EQ(IsKeywordElisionEnabled(),
ShouldElideToRegistrableDomain(GURL("http://google.evil.com/xyz")));
}
// Verify that keyword elision respects the e2LD inclusion parameter.
TEST_P(UrlElisionKeywordPolicyTest, RespectsE2LDParam) {
EXPECT_EQ(GetParam() == KeywordSearchConfig::kEnabledWithE2LD,
ShouldElideToRegistrableDomain(GURL("http://google-evil.com/xyz"))); ShouldElideToRegistrableDomain(GURL("http://google-evil.com/xyz")));
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment