Commit 91290d4b authored by Behnood Momenzadeh's avatar Behnood Momenzadeh Committed by Commit Bot

Trigger Safety Tips in Target Embedding for some another-TLD matchings.

Target Embedding at times matches top domains that are embedded with a
wrong TLD (e.g. google-info-foo.com). If the embedded TLD does not match
the top domain's TLD and it is a ccTLD, we trigger a Safety Tip instead
of a Lookalike Interstitial. The reason behind this decision is that,
ccTLDs are short TLDs and are often used as locales, which can lead to
more false positives.

Bug: 1087636
Change-Id: I2dfe76c037070c98649df1399c8948f004c2bdf5
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2225747
Commit-Queue: Behnood Momenzadeh <behnoodm@google.com>
Reviewed-by: default avatarJoe DeBlasio <jdeblasio@chromium.org>
Reviewed-by: default avatarMustafa Emre Acer <meacer@chromium.org>
Cr-Commit-Position: refs/heads/master@{#774448}
parent 6d801322
......@@ -444,6 +444,19 @@ IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
LookalikeUrlMatchType::kTargetEmbedding);
}
// Target Embedding should not show an interstitial when target is using another
// TLD and the embedded TLD is a ccTLD. We will show a Safety Tip in this
// situation.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
TargetEmbedding_AnotherTLD_Match) {
const GURL kNavigatedUrl = GetURL("google.br-test.com");
const GURL kExpectedSuggestedUrl = GetURLWithoutPath("google.com");
SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement);
TestInterstitialNotShown(browser(), kNavigatedUrl);
CheckUkm({kNavigatedUrl}, "MatchType",
LookalikeUrlMatchType::kTargetEmbeddingForSafetyTips);
}
// Target embedding should not trigger on allowlisted domains.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
TargetEmbedding_Allowlist) {
......
......@@ -69,6 +69,9 @@ bool ShouldTriggerSafetyTipFromLookalike(
case LookalikeUrlMatchType::kEditDistanceSiteEngagement:
return kEnableLookalikeEditDistanceSiteEngagement.Get();
case LookalikeUrlMatchType::kTargetEmbedding:
// Target Embedding should block URL Navigation.
return false;
case LookalikeUrlMatchType::kTargetEmbeddingForSafetyTips:
return kEnableLookalikeTargetEmbedding.Get();
case LookalikeUrlMatchType::kSiteEngagement:
case LookalikeUrlMatchType::kSkeletonMatchTop500:
......
......@@ -733,8 +733,9 @@ IN_PROC_BROWSER_TEST_P(SafetyTipPageInfoBubbleViewBrowserTest,
// domain will trigger Safety Tips.
IN_PROC_BROWSER_TEST_P(SafetyTipPageInfoBubbleViewBrowserTest,
TriggersOnTargetEmbedding) {
// This domain has google.com embedded.
const GURL kNavigatedUrl = GetURL("test-google.com-site.com");
// This domain has google.com embedded and because it has a non-matching
// ccTLD, it will not trigger an Interstitial, but will trigger a SafetyTip.
const GURL kNavigatedUrl = GetURL("test-google.br-site.com");
SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement);
SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement);
......@@ -746,8 +747,9 @@ IN_PROC_BROWSER_TEST_P(SafetyTipPageInfoBubbleViewBrowserTest,
// engaged domain will trigger Safety Tips.
IN_PROC_BROWSER_TEST_P(SafetyTipPageInfoBubbleViewBrowserTest,
TriggersOnHighEngagementTargetEmbedding) {
// This domain has foo.com embedded.
const GURL kNavigatedUrl = GetURL("test-foo.com-site.com");
// This domain has foo.com embedded and because it has a non-matching ccTLD,
// it will not trigger an Interstitial, but will trigger a SafetyTip.
const GURL kNavigatedUrl = GetURL("test-foo.br-site.com");
const GURL kEngagedDomain = GetURL("foo.com");
SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement);
SetEngagementScore(browser(), kEngagedDomain, kHighEngagement);
......
......@@ -411,11 +411,6 @@ bool IsAllowedToBeEmbedded(
if (embedded_target.size() < kMinTargetHostnameSize) {
return true;
}
// TODO(crbug/1087636): When the length is less than
// kMinWrongTLDLengthForInterstitial, we want to trigger a SafetyTip instead.
if (TLD.size() < kMinWrongTLDLengthForInterstitial && TLD != target_tld) {
return true;
}
// All common words in |kAdditionalCommonWords| flag are considered as
// |CommonWordType::kAllTLDs|.
......@@ -626,12 +621,19 @@ bool GetMatchingDomain(
return true;
}
}
if (IsTargetEmbeddingLookalike(navigated_domain.hostname, engaged_sites,
in_target_allowlist, matched_domain)) {
TargetEmbeddingType embedding_type =
GetTargetEmbeddingType(navigated_domain.hostname, engaged_sites,
in_target_allowlist, matched_domain);
if (embedding_type == TargetEmbeddingType::kSafetyTip) {
*match_type = LookalikeUrlMatchType::kTargetEmbeddingForSafetyTips;
return true;
} else if (embedding_type == TargetEmbeddingType::kInterstitial) {
*match_type = LookalikeUrlMatchType::kTargetEmbedding;
return true;
}
DCHECK(embedding_type == TargetEmbeddingType::kNone);
return false;
}
......@@ -655,12 +657,16 @@ void RecordUMAFromMatchType(LookalikeUrlMatchType match_type) {
case LookalikeUrlMatchType::kSkeletonMatchTop5k:
RecordEvent(NavigationSuggestionEvent::kMatchSkeletonTop5k);
break;
case LookalikeUrlMatchType::kTargetEmbeddingForSafetyTips:
RecordEvent(
NavigationSuggestionEvent::kMatchTargetEmbeddingForSafetyTips);
break;
case LookalikeUrlMatchType::kNone:
break;
}
}
bool IsTargetEmbeddingLookalike(
TargetEmbeddingType GetTargetEmbeddingType(
const std::string& hostname,
const std::vector<DomainInfo>& engaged_sites,
const LookalikeTargetAllowlistChecker& in_target_allowlist,
......@@ -699,14 +705,17 @@ bool IsTargetEmbeddingLookalike(
if (!subdomains_tokens_so_far.empty()) {
subdomains_tokens_so_far.pop_back();
}
if (!IsAllowedToBeEmbedded(prev_token, token, *safe_hostname,
subdomains_tokens_so_far, in_target_allowlist)) {
return true;
return token.size() < kMinWrongTLDLengthForInterstitial &&
token !=
safe_hostname->substr(safe_hostname->rfind(".") + 1)
? TargetEmbeddingType::kSafetyTip
: TargetEmbeddingType::kInterstitial;
}
// A target is found but it was allowed to be embedded.
*safe_hostname = std::string();
prev_token = token;
}
return false;
return TargetEmbeddingType::kNone;
}
......@@ -35,6 +35,15 @@ enum class CommonWordType {
kNonMatchingTLDs = 1,
};
// Used for |GetTargetEmbeddingType| return value. It shows if the target
// embedding triggers on the input domain, and if it does, what type of warning
// should be shown to the user.
enum class TargetEmbeddingType {
kNone = 0,
kInterstitial = 1,
kSafetyTip = 2,
};
// Used for UKM. There is only a single LookalikeUrlMatchType per navigation.
enum class LookalikeUrlMatchType {
kNone = 0,
......@@ -46,10 +55,11 @@ enum class LookalikeUrlMatchType {
kTargetEmbedding = 5,
kSkeletonMatchTop500 = 6,
kSkeletonMatchTop5k = 7,
kTargetEmbeddingForSafetyTips = 8,
// Append new items to the end of the list above; do not modify or replace
// existing values. Comment out obsolete items.
kMaxValue = kSkeletonMatchTop5k,
kMaxValue = kTargetEmbeddingForSafetyTips,
};
// Used for UKM. There is only a single LookalikeUrlBlockingPageUserAction per
......@@ -79,10 +89,11 @@ enum class NavigationSuggestionEvent {
kMatchTargetEmbedding = 7,
kMatchSkeletonTop500 = 8,
kMatchSkeletonTop5k = 9,
kMatchTargetEmbeddingForSafetyTips = 10,
// Append new items to the end of the list above; do not modify or
// replace existing values. Comment out obsolete items.
kMaxValue = kMatchSkeletonTop5k,
kMaxValue = kMatchTargetEmbeddingForSafetyTips,
};
struct DomainInfo {
......@@ -154,14 +165,19 @@ void RecordUMAFromMatchType(LookalikeUrlMatchType match_type);
// |safe_hostname| to the url of the embedded target domain.
// At the moment we consider the following cases as Target Embedding:
// example-google.com-site.com, example.google.com-site.com,
// example-google-com-site.com, example.google.com.site.com,
// example-google-info-site.com, example.google.com.site.com,
// example-googlé.com-site.com where the embedded target is google.com. We
// detect embeddings of top 500 domains and engaged domains. However, to reduce
// false positives, we do not protect domains that are shorter than 7 characters
// long (e.g. com.ru).
// This function checks possible targets against |in_target_allowlist| to skip
// permitted embeddings.
bool IsTargetEmbeddingLookalike(
// If no target embedding is found, the return value will be set to |kNonw|.
// When the target is embedded with another TLD instead of its actual TLD, it
// should trigger a Safety Tip when the embedded TLD is a ccTLD. In this
// situation, return value will be |kSafetyTip|. All the other triggers will
// result in a |kInterstitial| return value.
TargetEmbeddingType GetTargetEmbeddingType(
const std::string& hostname,
const std::vector<DomainInfo>& engaged_sites,
const LookalikeTargetAllowlistChecker& in_target_allowlist,
......
......@@ -46284,13 +46284,17 @@ Called by update_use_counter_css.py.-->
engaged site"/>
<int value="7"
label="A navigation suggestion is found using target embedding against
a top domain"/>
a top 500 domain or an engaged site"/>
<int value="8"
label="A navigation suggestion is found using a skeleton match against
a top 500 domain"/>
<int value="9"
label="A navigation suggestion is found using a skeleton match against
a top 5k (but not top 500) domain"/>
<int value="10"
label="A navigation suggestion is found using target embedding against
a top 500 domain or an engaged site, but with confidence high
enough only for a Safety Tip."/>
</enum>
<enum name="NavigationURLScheme">
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment