Commit 91290d4b authored by Behnood Momenzadeh's avatar Behnood Momenzadeh Committed by Commit Bot

Trigger Safety Tips in Target Embedding for some another-TLD matchings.

Target Embedding at times matches top domains that are embedded with a
wrong TLD (e.g. google-info-foo.com). If the embedded TLD does not match
the top domain's TLD and it is a ccTLD, we trigger a Safety Tip instead
of a Lookalike Interstitial. The reason behind this decision is that,
ccTLDs are short TLDs and are often used as locales, which can lead to
more false positives.

Bug: 1087636
Change-Id: I2dfe76c037070c98649df1399c8948f004c2bdf5
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2225747
Commit-Queue: Behnood Momenzadeh <behnoodm@google.com>
Reviewed-by: default avatarJoe DeBlasio <jdeblasio@chromium.org>
Reviewed-by: default avatarMustafa Emre Acer <meacer@chromium.org>
Cr-Commit-Position: refs/heads/master@{#774448}
parent 6d801322
...@@ -444,6 +444,19 @@ IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest, ...@@ -444,6 +444,19 @@ IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
LookalikeUrlMatchType::kTargetEmbedding); LookalikeUrlMatchType::kTargetEmbedding);
} }
// Target Embedding should not show an interstitial when target is using another
// TLD and the embedded TLD is a ccTLD. We will show a Safety Tip in this
// situation.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
TargetEmbedding_AnotherTLD_Match) {
const GURL kNavigatedUrl = GetURL("google.br-test.com");
const GURL kExpectedSuggestedUrl = GetURLWithoutPath("google.com");
SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement);
TestInterstitialNotShown(browser(), kNavigatedUrl);
CheckUkm({kNavigatedUrl}, "MatchType",
LookalikeUrlMatchType::kTargetEmbeddingForSafetyTips);
}
// Target embedding should not trigger on allowlisted domains. // Target embedding should not trigger on allowlisted domains.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest, IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
TargetEmbedding_Allowlist) { TargetEmbedding_Allowlist) {
......
...@@ -69,6 +69,9 @@ bool ShouldTriggerSafetyTipFromLookalike( ...@@ -69,6 +69,9 @@ bool ShouldTriggerSafetyTipFromLookalike(
case LookalikeUrlMatchType::kEditDistanceSiteEngagement: case LookalikeUrlMatchType::kEditDistanceSiteEngagement:
return kEnableLookalikeEditDistanceSiteEngagement.Get(); return kEnableLookalikeEditDistanceSiteEngagement.Get();
case LookalikeUrlMatchType::kTargetEmbedding: case LookalikeUrlMatchType::kTargetEmbedding:
// Target Embedding should block URL Navigation.
return false;
case LookalikeUrlMatchType::kTargetEmbeddingForSafetyTips:
return kEnableLookalikeTargetEmbedding.Get(); return kEnableLookalikeTargetEmbedding.Get();
case LookalikeUrlMatchType::kSiteEngagement: case LookalikeUrlMatchType::kSiteEngagement:
case LookalikeUrlMatchType::kSkeletonMatchTop500: case LookalikeUrlMatchType::kSkeletonMatchTop500:
......
...@@ -733,8 +733,9 @@ IN_PROC_BROWSER_TEST_P(SafetyTipPageInfoBubbleViewBrowserTest, ...@@ -733,8 +733,9 @@ IN_PROC_BROWSER_TEST_P(SafetyTipPageInfoBubbleViewBrowserTest,
// domain will trigger Safety Tips. // domain will trigger Safety Tips.
IN_PROC_BROWSER_TEST_P(SafetyTipPageInfoBubbleViewBrowserTest, IN_PROC_BROWSER_TEST_P(SafetyTipPageInfoBubbleViewBrowserTest,
TriggersOnTargetEmbedding) { TriggersOnTargetEmbedding) {
// This domain has google.com embedded. // This domain has google.com embedded and because it has a non-matching
const GURL kNavigatedUrl = GetURL("test-google.com-site.com"); // ccTLD, it will not trigger an Interstitial, but will trigger a SafetyTip.
const GURL kNavigatedUrl = GetURL("test-google.br-site.com");
SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement); SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement);
SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement); SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement);
...@@ -746,8 +747,9 @@ IN_PROC_BROWSER_TEST_P(SafetyTipPageInfoBubbleViewBrowserTest, ...@@ -746,8 +747,9 @@ IN_PROC_BROWSER_TEST_P(SafetyTipPageInfoBubbleViewBrowserTest,
// engaged domain will trigger Safety Tips. // engaged domain will trigger Safety Tips.
IN_PROC_BROWSER_TEST_P(SafetyTipPageInfoBubbleViewBrowserTest, IN_PROC_BROWSER_TEST_P(SafetyTipPageInfoBubbleViewBrowserTest,
TriggersOnHighEngagementTargetEmbedding) { TriggersOnHighEngagementTargetEmbedding) {
// This domain has foo.com embedded. // This domain has foo.com embedded and because it has a non-matching ccTLD,
const GURL kNavigatedUrl = GetURL("test-foo.com-site.com"); // it will not trigger an Interstitial, but will trigger a SafetyTip.
const GURL kNavigatedUrl = GetURL("test-foo.br-site.com");
const GURL kEngagedDomain = GetURL("foo.com"); const GURL kEngagedDomain = GetURL("foo.com");
SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement); SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement);
SetEngagementScore(browser(), kEngagedDomain, kHighEngagement); SetEngagementScore(browser(), kEngagedDomain, kHighEngagement);
......
...@@ -411,11 +411,6 @@ bool IsAllowedToBeEmbedded( ...@@ -411,11 +411,6 @@ bool IsAllowedToBeEmbedded(
if (embedded_target.size() < kMinTargetHostnameSize) { if (embedded_target.size() < kMinTargetHostnameSize) {
return true; return true;
} }
// TODO(crbug/1087636): When the length is less than
// kMinWrongTLDLengthForInterstitial, we want to trigger a SafetyTip instead.
if (TLD.size() < kMinWrongTLDLengthForInterstitial && TLD != target_tld) {
return true;
}
// All common words in |kAdditionalCommonWords| flag are considered as // All common words in |kAdditionalCommonWords| flag are considered as
// |CommonWordType::kAllTLDs|. // |CommonWordType::kAllTLDs|.
...@@ -626,12 +621,19 @@ bool GetMatchingDomain( ...@@ -626,12 +621,19 @@ bool GetMatchingDomain(
return true; return true;
} }
} }
if (IsTargetEmbeddingLookalike(navigated_domain.hostname, engaged_sites,
in_target_allowlist, matched_domain)) { TargetEmbeddingType embedding_type =
GetTargetEmbeddingType(navigated_domain.hostname, engaged_sites,
in_target_allowlist, matched_domain);
if (embedding_type == TargetEmbeddingType::kSafetyTip) {
*match_type = LookalikeUrlMatchType::kTargetEmbeddingForSafetyTips;
return true;
} else if (embedding_type == TargetEmbeddingType::kInterstitial) {
*match_type = LookalikeUrlMatchType::kTargetEmbedding; *match_type = LookalikeUrlMatchType::kTargetEmbedding;
return true; return true;
} }
DCHECK(embedding_type == TargetEmbeddingType::kNone);
return false; return false;
} }
...@@ -655,12 +657,16 @@ void RecordUMAFromMatchType(LookalikeUrlMatchType match_type) { ...@@ -655,12 +657,16 @@ void RecordUMAFromMatchType(LookalikeUrlMatchType match_type) {
case LookalikeUrlMatchType::kSkeletonMatchTop5k: case LookalikeUrlMatchType::kSkeletonMatchTop5k:
RecordEvent(NavigationSuggestionEvent::kMatchSkeletonTop5k); RecordEvent(NavigationSuggestionEvent::kMatchSkeletonTop5k);
break; break;
case LookalikeUrlMatchType::kTargetEmbeddingForSafetyTips:
RecordEvent(
NavigationSuggestionEvent::kMatchTargetEmbeddingForSafetyTips);
break;
case LookalikeUrlMatchType::kNone: case LookalikeUrlMatchType::kNone:
break; break;
} }
} }
bool IsTargetEmbeddingLookalike( TargetEmbeddingType GetTargetEmbeddingType(
const std::string& hostname, const std::string& hostname,
const std::vector<DomainInfo>& engaged_sites, const std::vector<DomainInfo>& engaged_sites,
const LookalikeTargetAllowlistChecker& in_target_allowlist, const LookalikeTargetAllowlistChecker& in_target_allowlist,
...@@ -699,14 +705,17 @@ bool IsTargetEmbeddingLookalike( ...@@ -699,14 +705,17 @@ bool IsTargetEmbeddingLookalike(
if (!subdomains_tokens_so_far.empty()) { if (!subdomains_tokens_so_far.empty()) {
subdomains_tokens_so_far.pop_back(); subdomains_tokens_so_far.pop_back();
} }
if (!IsAllowedToBeEmbedded(prev_token, token, *safe_hostname, if (!IsAllowedToBeEmbedded(prev_token, token, *safe_hostname,
subdomains_tokens_so_far, in_target_allowlist)) { subdomains_tokens_so_far, in_target_allowlist)) {
return true; return token.size() < kMinWrongTLDLengthForInterstitial &&
token !=
safe_hostname->substr(safe_hostname->rfind(".") + 1)
? TargetEmbeddingType::kSafetyTip
: TargetEmbeddingType::kInterstitial;
} }
// A target is found but it was allowed to be embedded. // A target is found but it was allowed to be embedded.
*safe_hostname = std::string(); *safe_hostname = std::string();
prev_token = token; prev_token = token;
} }
return false; return TargetEmbeddingType::kNone;
} }
...@@ -35,6 +35,15 @@ enum class CommonWordType { ...@@ -35,6 +35,15 @@ enum class CommonWordType {
kNonMatchingTLDs = 1, kNonMatchingTLDs = 1,
}; };
// Used for |GetTargetEmbeddingType| return value. It shows if the target
// embedding triggers on the input domain, and if it does, what type of warning
// should be shown to the user.
enum class TargetEmbeddingType {
kNone = 0,
kInterstitial = 1,
kSafetyTip = 2,
};
// Used for UKM. There is only a single LookalikeUrlMatchType per navigation. // Used for UKM. There is only a single LookalikeUrlMatchType per navigation.
enum class LookalikeUrlMatchType { enum class LookalikeUrlMatchType {
kNone = 0, kNone = 0,
...@@ -46,10 +55,11 @@ enum class LookalikeUrlMatchType { ...@@ -46,10 +55,11 @@ enum class LookalikeUrlMatchType {
kTargetEmbedding = 5, kTargetEmbedding = 5,
kSkeletonMatchTop500 = 6, kSkeletonMatchTop500 = 6,
kSkeletonMatchTop5k = 7, kSkeletonMatchTop5k = 7,
kTargetEmbeddingForSafetyTips = 8,
// Append new items to the end of the list above; do not modify or replace // Append new items to the end of the list above; do not modify or replace
// existing values. Comment out obsolete items. // existing values. Comment out obsolete items.
kMaxValue = kSkeletonMatchTop5k, kMaxValue = kTargetEmbeddingForSafetyTips,
}; };
// Used for UKM. There is only a single LookalikeUrlBlockingPageUserAction per // Used for UKM. There is only a single LookalikeUrlBlockingPageUserAction per
...@@ -79,10 +89,11 @@ enum class NavigationSuggestionEvent { ...@@ -79,10 +89,11 @@ enum class NavigationSuggestionEvent {
kMatchTargetEmbedding = 7, kMatchTargetEmbedding = 7,
kMatchSkeletonTop500 = 8, kMatchSkeletonTop500 = 8,
kMatchSkeletonTop5k = 9, kMatchSkeletonTop5k = 9,
kMatchTargetEmbeddingForSafetyTips = 10,
// Append new items to the end of the list above; do not modify or // Append new items to the end of the list above; do not modify or
// replace existing values. Comment out obsolete items. // replace existing values. Comment out obsolete items.
kMaxValue = kMatchSkeletonTop5k, kMaxValue = kMatchTargetEmbeddingForSafetyTips,
}; };
struct DomainInfo { struct DomainInfo {
...@@ -154,14 +165,19 @@ void RecordUMAFromMatchType(LookalikeUrlMatchType match_type); ...@@ -154,14 +165,19 @@ void RecordUMAFromMatchType(LookalikeUrlMatchType match_type);
// |safe_hostname| to the url of the embedded target domain. // |safe_hostname| to the url of the embedded target domain.
// At the moment we consider the following cases as Target Embedding: // At the moment we consider the following cases as Target Embedding:
// example-google.com-site.com, example.google.com-site.com, // example-google.com-site.com, example.google.com-site.com,
// example-google-com-site.com, example.google.com.site.com, // example-google-info-site.com, example.google.com.site.com,
// example-googlé.com-site.com where the embedded target is google.com. We // example-googlé.com-site.com where the embedded target is google.com. We
// detect embeddings of top 500 domains and engaged domains. However, to reduce // detect embeddings of top 500 domains and engaged domains. However, to reduce
// false positives, we do not protect domains that are shorter than 7 characters // false positives, we do not protect domains that are shorter than 7 characters
// long (e.g. com.ru). // long (e.g. com.ru).
// This function checks possible targets against |in_target_allowlist| to skip // This function checks possible targets against |in_target_allowlist| to skip
// permitted embeddings. // permitted embeddings.
bool IsTargetEmbeddingLookalike( // If no target embedding is found, the return value will be set to |kNonw|.
// When the target is embedded with another TLD instead of its actual TLD, it
// should trigger a Safety Tip when the embedded TLD is a ccTLD. In this
// situation, return value will be |kSafetyTip|. All the other triggers will
// result in a |kInterstitial| return value.
TargetEmbeddingType GetTargetEmbeddingType(
const std::string& hostname, const std::string& hostname,
const std::vector<DomainInfo>& engaged_sites, const std::vector<DomainInfo>& engaged_sites,
const LookalikeTargetAllowlistChecker& in_target_allowlist, const LookalikeTargetAllowlistChecker& in_target_allowlist,
......
...@@ -46284,13 +46284,17 @@ Called by update_use_counter_css.py.--> ...@@ -46284,13 +46284,17 @@ Called by update_use_counter_css.py.-->
engaged site"/> engaged site"/>
<int value="7" <int value="7"
label="A navigation suggestion is found using target embedding against label="A navigation suggestion is found using target embedding against
a top domain"/> a top 500 domain or an engaged site"/>
<int value="8" <int value="8"
label="A navigation suggestion is found using a skeleton match against label="A navigation suggestion is found using a skeleton match against
a top 500 domain"/> a top 500 domain"/>
<int value="9" <int value="9"
label="A navigation suggestion is found using a skeleton match against label="A navigation suggestion is found using a skeleton match against
a top 5k (but not top 500) domain"/> a top 5k (but not top 500) domain"/>
<int value="10"
label="A navigation suggestion is found using target embedding against
a top 500 domain or an engaged site, but with confidence high
enough only for a Safety Tip."/>
</enum> </enum>
<enum name="NavigationURLScheme"> <enum name="NavigationURLScheme">
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment