Commit a88c74f2 authored by Mustafa Emre Acer's avatar Mustafa Emre Acer Committed by Commit Bot

[Reland] Lookalikes: Record histogram only once per navigation

NavigationSuggestion.Event histogram records each lookalike domain in
a redirect chain as a separate entry. This slightly overcounts lookalike
matches. This CL changes the histogram name to
NavigationSuggestion.Event2 and records only one entry per navigation.

Bug: 1136296
Change-Id: Icc0beae3d497718a7700f1875db7e5646e729fc6
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2459430
Commit-Queue: Mustafa Emre Acer <meacer@chromium.org>
Reviewed-by: default avatarAlexei Svitkine <asvitkine@chromium.org>
Reviewed-by: default avatarChris Thompson <cthomp@chromium.org>
Reviewed-by: default avatarJoe DeBlasio <jdeblasio@chromium.org>
Cr-Original-Commit-Position: refs/heads/master@{#816816}
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2473064
Cr-Commit-Position: refs/heads/master@{#817534}
parent af52bf4c
...@@ -217,9 +217,6 @@ ThrottleCheckResult LookalikeUrlNavigationThrottle::PerformChecks( ...@@ -217,9 +217,6 @@ ThrottleCheckResult LookalikeUrlNavigationThrottle::PerformChecks(
IsLookalikeUrl(last_url, engaged_sites, &last_match_type, IsLookalikeUrl(last_url, engaged_sites, &last_match_type,
&last_suggested_url); &last_suggested_url);
if (!first_is_lookalike && !last_is_lookalike) {
return content::NavigationThrottle::PROCEED;
}
// If the first URL is a lookalike, but we ended up on the suggested site // If the first URL is a lookalike, but we ended up on the suggested site
// anyway, don't warn. // anyway, don't warn.
...@@ -228,6 +225,12 @@ ThrottleCheckResult LookalikeUrlNavigationThrottle::PerformChecks( ...@@ -228,6 +225,12 @@ ThrottleCheckResult LookalikeUrlNavigationThrottle::PerformChecks(
first_is_lookalike = false; first_is_lookalike = false;
} }
if (!first_is_lookalike && !last_is_lookalike) {
return content::NavigationThrottle::PROCEED;
}
// IMPORTANT: Do not modify first_is_lookalike or last_is_lookalike beyond
// this line. See crbug.com/1138138 for an example bug.
// source_id corresponds to last_url, even when first_url is what triggered. // source_id corresponds to last_url, even when first_url is what triggered.
// TODO(crbug.com/1133598): disambiguate first_- vs. last_urls. // TODO(crbug.com/1133598): disambiguate first_- vs. last_urls.
ukm::SourceId source_id = ukm::ConvertToSourceId( ukm::SourceId source_id = ukm::ConvertToSourceId(
...@@ -235,15 +238,19 @@ ThrottleCheckResult LookalikeUrlNavigationThrottle::PerformChecks( ...@@ -235,15 +238,19 @@ ThrottleCheckResult LookalikeUrlNavigationThrottle::PerformChecks(
if (first_is_lookalike && if (first_is_lookalike &&
ShouldBlockLookalikeUrlNavigation(first_match_type)) { ShouldBlockLookalikeUrlNavigation(first_match_type)) {
RecordUMAFromMatchType(first_match_type);
return ShowInterstitial(first_suggested_url, first_url, source_id, return ShowInterstitial(first_suggested_url, first_url, source_id,
first_match_type); first_match_type);
} }
if (last_is_lookalike && ShouldBlockLookalikeUrlNavigation(last_match_type)) { if (last_is_lookalike && ShouldBlockLookalikeUrlNavigation(last_match_type)) {
RecordUMAFromMatchType(last_match_type);
return ShowInterstitial(last_suggested_url, last_url, source_id, return ShowInterstitial(last_suggested_url, last_url, source_id,
last_match_type); last_match_type);
} }
RecordUMAFromMatchType(first_is_lookalike ? first_match_type
: last_match_type);
// Interstitial normally records UKM, but still record when it's not shown. // Interstitial normally records UKM, but still record when it's not shown.
RecordUkmForLookalikeUrlBlockingPage( RecordUkmForLookalikeUrlBlockingPage(
source_id, first_is_lookalike ? first_match_type : last_match_type, source_id, first_is_lookalike ? first_match_type : last_match_type,
...@@ -313,8 +320,6 @@ bool LookalikeUrlNavigationThrottle::IsLookalikeUrl( ...@@ -313,8 +320,6 @@ bool LookalikeUrlNavigationThrottle::IsLookalikeUrl(
&matched_domain, match_type)) { &matched_domain, match_type)) {
DCHECK(!matched_domain.empty()); DCHECK(!matched_domain.empty());
RecordUMAFromMatchType(*match_type);
// matched_domain can be a top domain or an engaged domain. Simply use its // matched_domain can be a top domain or an engaged domain. Simply use its
// eTLD+1 as the suggested domain. // eTLD+1 as the suggested domain.
// 1. If matched_domain is a top domain: Top domain list already contains // 1. If matched_domain is a top domain: Top domain list already contains
...@@ -344,7 +349,6 @@ bool LookalikeUrlNavigationThrottle::IsLookalikeUrl( ...@@ -344,7 +349,6 @@ bool LookalikeUrlNavigationThrottle::IsLookalikeUrl(
if (ShouldBlockBySpoofCheckResult(navigated_domain)) { if (ShouldBlockBySpoofCheckResult(navigated_domain)) {
*match_type = LookalikeUrlMatchType::kFailedSpoofChecks; *match_type = LookalikeUrlMatchType::kFailedSpoofChecks;
*suggested_url = GURL(); *suggested_url = GURL();
RecordUMAFromMatchType(*match_type);
return true; return true;
} }
......
...@@ -497,6 +497,39 @@ IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest, ...@@ -497,6 +497,39 @@ IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
LookalikeUrlMatchType::kTargetEmbedding); LookalikeUrlMatchType::kTargetEmbedding);
} }
// Same as TargetEmbedding_TopDomain_Match, but has a redirect where the first
// and last URLs are both target embedding matches. Should only record
// metrics for the first URL. Regression test for crbug.com/1136296.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
TargetEmbedding_TopDomain_Redirect_Match) {
const GURL kNavigatedUrl = GetLongRedirect("google.com-test.com", "site.com",
"youtube.com-test.com");
// UKM will record the final URL of the redirect:
const GURL kLastUrl = GetURL("youtube.com-test.com");
const GURL kExpectedSuggestedUrl = GetURLWithoutPath("google.com");
SetEngagementScore(browser(), kNavigatedUrl, kLowEngagement);
// |TestMetricsRecordedAndInterstitialShown| assumes everything should be
// recorded if target embedding is not disabled. But only for target embedding
// checks, if TargetEmbedding is not explicitly enabled, it should be treated
// just like it is disabled. So we make sure an interstitial is not shown if
// target embedding is not enabled. And defer to
// |TestMetricsRecordedAndInterstitialShown| otherwise.
if (!target_embedding_enabled()) {
base::HistogramTester histograms;
TestInterstitialNotShown(browser(), kNavigatedUrl);
histograms.ExpectTotalCount(lookalikes::kHistogramName, 1);
histograms.ExpectBucketCount(
lookalikes::kHistogramName,
NavigationSuggestionEvent::kMatchTargetEmbedding, 1);
} else {
TestMetricsRecordedAndInterstitialShown(
browser(), kNavigatedUrl, kExpectedSuggestedUrl,
NavigationSuggestionEvent::kMatchTargetEmbedding);
}
CheckUkm({kLastUrl}, "MatchType", LookalikeUrlMatchType::kTargetEmbedding);
}
// Target embedding should not trigger on allowlisted embedder domains. // Target embedding should not trigger on allowlisted embedder domains.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest, IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationThrottleBrowserTest,
TargetEmbedding_EmbedderAllowlist) { TargetEmbedding_EmbedderAllowlist) {
......
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
namespace lookalikes { namespace lookalikes {
const char kHistogramName[] = "NavigationSuggestion.Event"; const char kHistogramName[] = "NavigationSuggestion.Event2";
void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry) { void RegisterProfilePrefs(user_prefs::PrefRegistrySyncable* registry) {
registry->RegisterListPref(prefs::kLookalikeWarningAllowlistDomains); registry->RegisterListPref(prefs::kLookalikeWarningAllowlistDomains);
......
...@@ -665,6 +665,10 @@ reviews. Googlers can read more about this at go/gwsq-gerrit. ...@@ -665,6 +665,10 @@ reviews. Googlers can read more about this at go/gwsq-gerrit.
<histogram name="NavigationSuggestion.Event" enum="NavigationSuggestionEvent" <histogram name="NavigationSuggestion.Event" enum="NavigationSuggestionEvent"
expires_after="M89"> expires_after="M89">
<obsolete>
Replaced with NavigationSuggestion.Event2 on 2020/10 because of
crbug.com/1136296.
</obsolete>
<owner>meacer@chromium.org</owner> <owner>meacer@chromium.org</owner>
<owner>security-enamel@chromium.org</owner> <owner>security-enamel@chromium.org</owner>
<summary> <summary>
...@@ -674,6 +678,23 @@ reviews. Googlers can read more about this at go/gwsq-gerrit. ...@@ -674,6 +678,23 @@ reviews. Googlers can read more about this at go/gwsq-gerrit.
</summary> </summary>
</histogram> </histogram>
<histogram name="NavigationSuggestion.Event2" enum="NavigationSuggestionEvent"
expires_after="M91">
<owner>meacer@chromium.org</owner>
<owner>security-enamel@chromium.org</owner>
<summary>
Tracks events when the currently navigated domain name is a lookalike to one
of the top 10K domains or a domain that the user interacted with, resulting
in a navigation suggestion interstitial.
Before M88, NavigationSuggestion.Event recorded a separate entry for each
lookalike URL in a redirect chain. E.g. lookalike1.com -&gt; site.com -&gt;
lookalike2.com recorded two events. This updated histogram only records an
entry for the first or last URL in the redirect if any of those is a
lookalike.
</summary>
</histogram>
</histograms> </histograms>
</histogram-configuration> </histogram-configuration>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment