Commit bc034a4a authored by Ryan Sturm's avatar Ryan Sturm Committed by Commit Bot

Rely on UKM filtering instead of custom filtering for NavPredictor

This CL replaces the behavior of sampling the 10 largest links on the
page with beahve to filter entire pages via UKM event filtering. We set
reasonable limits on the repeated evets (10 for clicked, and 40/100 for
anchor elements). The 40/100 is inherited from the renderer, which only
sends that many metrics to the browser.

Bug: 1001593
Change-Id: If12e30e1a2cf50cb3975f5bc3749faac4534e241
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1797329
Commit-Queue: Ryan Sturm <ryansturm@chromium.org>
Reviewed-by: default avatarTarun Bansal <tbansal@chromium.org>
Cr-Commit-Position: refs/heads/master@{#695782}
parent 736e1af8
...@@ -73,7 +73,8 @@ struct NavigationPredictor::NavigationScore { ...@@ -73,7 +73,8 @@ struct NavigationPredictor::NavigationScore {
double score, double score,
double ratio_distance_root_top, double ratio_distance_root_top,
bool contains_image, bool contains_image,
bool is_in_iframe) bool is_in_iframe,
size_t index)
: url(url), : url(url),
ratio_area(ratio_area), ratio_area(ratio_area),
is_url_incremented_by_one(is_url_incremented_by_one), is_url_incremented_by_one(is_url_incremented_by_one),
...@@ -81,7 +82,8 @@ struct NavigationPredictor::NavigationScore { ...@@ -81,7 +82,8 @@ struct NavigationPredictor::NavigationScore {
score(score), score(score),
ratio_distance_root_top(ratio_distance_root_top), ratio_distance_root_top(ratio_distance_root_top),
contains_image(contains_image), contains_image(contains_image),
is_in_iframe(is_in_iframe) {} is_in_iframe(is_in_iframe),
index(index) {}
// URL of the target link. // URL of the target link.
const GURL url; const GURL url;
...@@ -112,6 +114,9 @@ struct NavigationPredictor::NavigationScore { ...@@ -112,6 +114,9 @@ struct NavigationPredictor::NavigationScore {
// |url| is in an iframe. // |url| is in an iframe.
const bool is_in_iframe; const bool is_in_iframe;
// An index reported to UKM.
const size_t index;
// Rank of the |score| in this document. It starts at 0, a lower rank implies // Rank of the |score| in this document. It starts at 0, a lower rank implies
// a higher |score|. // a higher |score|.
base::Optional<size_t> score_rank; base::Optional<size_t> score_rank;
...@@ -470,35 +475,32 @@ void NavigationPredictor::MaybeSendMetricsToUkm() const { ...@@ -470,35 +475,32 @@ void NavigationPredictor::MaybeSendMetricsToUkm() const {
page_link_builder.Record(ukm_recorder_); page_link_builder.Record(ukm_recorder_);
for (int i = 0; i < static_cast<int>(top_urls_.size()); i++) { for (const auto& navigation_score_tuple : navigation_scores_map_) {
const auto& navigation_score = navigation_score_tuple.second;
ukm::builders::NavigationPredictorAnchorElementMetrics ukm::builders::NavigationPredictorAnchorElementMetrics
anchor_element_builder(ukm_source_id_); anchor_element_builder(ukm_source_id_);
std::string url = top_urls_[i].spec(); // Offset index to be 1-based indexing.
auto iter = navigation_scores_map_.find(url); anchor_element_builder.SetAnchorIndex(navigation_score->index);
anchor_element_builder.SetIsInIframe(navigation_score->is_in_iframe);
if (iter != navigation_scores_map_.end()) { anchor_element_builder.SetIsURLIncrementedByOne(
// Offset index to be 1-based indexing. navigation_score->is_url_incremented_by_one);
anchor_element_builder.SetAnchorIndex(i + 1); anchor_element_builder.SetContainsImage(navigation_score->contains_image);
anchor_element_builder.SetIsInIframe(iter->second->is_in_iframe); anchor_element_builder.SetSameOrigin(navigation_score->url.GetOrigin() ==
anchor_element_builder.SetIsURLIncrementedByOne( document_origin_.GetURL());
iter->second->is_url_incremented_by_one);
anchor_element_builder.SetContainsImage(iter->second->contains_image); // Convert the ratio area and ratio distance from [0,1] to [0,100].
anchor_element_builder.SetSameOrigin(iter->second->url.GetOrigin() == int percent_ratio_area =
document_origin_.GetURL()); static_cast<int>(navigation_score->ratio_area * 100);
int percent_ratio_distance_root_top =
// Convert the ratio area and ratio distance from [0,1] to [0,100]. static_cast<int>(navigation_score->ratio_distance_root_top * 100);
int percent_ratio_area = static_cast<int>(iter->second->ratio_area * 100);
int percent_ratio_distance_root_top = anchor_element_builder.SetPercentClickableArea(
static_cast<int>(iter->second->ratio_distance_root_top * 100); GetLinearBucketForRatioArea(percent_ratio_area));
anchor_element_builder.SetPercentVerticalDistance(
anchor_element_builder.SetPercentClickableArea( GetLinearBucketForLinkLocation(percent_ratio_distance_root_top));
GetLinearBucketForRatioArea(percent_ratio_area));
anchor_element_builder.SetPercentVerticalDistance( anchor_element_builder.Record(ukm_recorder_);
GetLinearBucketForLinkLocation(percent_ratio_distance_root_top));
anchor_element_builder.Record(ukm_recorder_);
}
} }
} }
...@@ -520,9 +522,14 @@ void NavigationPredictor::MaybeSendClickMetricsToUkm( ...@@ -520,9 +522,14 @@ void NavigationPredictor::MaybeSendClickMetricsToUkm(
return; return;
} }
auto iter = std::find(top_urls_.begin(), top_urls_.end(), clicked_url); if (clicked_count_ > 10)
int anchor_element_index = return;
(iter == top_urls_.end()) ? 0 : iter - top_urls_.begin() + 1;
auto nav_score = navigation_scores_map_.find(clicked_url);
int anchor_element_index = (nav_score == navigation_scores_map_.end())
? 0
: nav_score->second->index;
ukm::builders::NavigationPredictorPageLinkClick builder(ukm_source_id_); ukm::builders::NavigationPredictorPageLinkClick builder(ukm_source_id_);
builder.SetAnchorElementIndex(anchor_element_index); builder.SetAnchorElementIndex(anchor_element_index);
...@@ -585,6 +592,8 @@ void NavigationPredictor::ReportAnchorElementMetricsOnClick( ...@@ -585,6 +592,8 @@ void NavigationPredictor::ReportAnchorElementMetricsOnClick(
static_cast<int>(target_score)); static_cast<int>(target_score));
} }
clicked_count_++;
RecordActionAccuracyOnClick(metrics->target_url); RecordActionAccuracyOnClick(metrics->target_url);
MaybeSendClickMetricsToUkm(metrics->target_url.spec()); MaybeSendClickMetricsToUkm(metrics->target_url.spec());
...@@ -829,20 +838,18 @@ void NavigationPredictor::ReportAnchorElementMetricsOnLoad( ...@@ -829,20 +838,18 @@ void NavigationPredictor::ReportAnchorElementMetricsOnLoad(
return a->ratio_area > b->ratio_area; return a->ratio_area > b->ratio_area;
}); });
// Store either the top 10 links (or all the links, if the page
// contains fewer than 10 links), in |top_urls_|. Then, shuffle the
// list to randomize data sent to the UKM.
int top_urls_size = std::min(10, static_cast<int>(metrics.size()));
top_urls_.reserve(top_urls_size);
for (int i = 0; i < top_urls_size; i++) {
top_urls_.push_back(metrics[i]->target_url);
}
base::RandomShuffle(top_urls_.begin(), top_urls_.end());
// Loop |metrics| to compute navigation scores. // Loop |metrics| to compute navigation scores.
std::vector<std::unique_ptr<NavigationScore>> navigation_scores; std::vector<std::unique_ptr<NavigationScore>> navigation_scores;
navigation_scores.reserve(metrics.size()); navigation_scores.reserve(metrics.size());
double total_score = 0.0; double total_score = 0.0;
std::vector<int> indices(metrics.size());
std::generate(indices.begin(), indices.end(),
[n = 1]() mutable { return n++; });
// Shuffle the indices to keep metrics less identifiable in UKM.
base::RandomShuffle(indices.begin(), indices.end());
for (size_t i = 0; i != metrics.size(); ++i) { for (size_t i = 0; i != metrics.size(); ++i) {
const auto& metric = metrics[i]; const auto& metric = metrics[i];
RecordMetricsOnLoad(*metric); RecordMetricsOnLoad(*metric);
...@@ -875,7 +882,7 @@ void NavigationPredictor::ReportAnchorElementMetricsOnLoad( ...@@ -875,7 +882,7 @@ void NavigationPredictor::ReportAnchorElementMetricsOnLoad(
metric->target_url, static_cast<double>(metric->ratio_area), metric->target_url, static_cast<double>(metric->ratio_area),
metric->is_url_incremented_by_one, area_rank, score, metric->is_url_incremented_by_one, area_rank, score,
metric->ratio_distance_root_top, metric->contains_image, metric->ratio_distance_root_top, metric->contains_image,
metric->is_in_iframe)); metric->is_in_iframe, indices[i]));
} }
if (normalize_navigation_scores_) { if (normalize_navigation_scores_) {
...@@ -1091,34 +1098,20 @@ base::Optional<GURL> NavigationPredictor::GetUrlToPrefetch( ...@@ -1091,34 +1098,20 @@ base::Optional<GURL> NavigationPredictor::GetUrlToPrefetch(
if (source_is_default_search_engine_page_) if (source_is_default_search_engine_page_)
return base::nullopt; return base::nullopt;
if (sorted_navigation_scores.empty() || top_urls_.empty()) if (sorted_navigation_scores.empty())
return base::nullopt; return base::nullopt;
// Find which URL in |top_urls_| has the highest navigation score. double highest_navigation_score = sorted_navigation_scores[0]->score;
double highest_navigation_score; GURL url_to_prefetch = sorted_navigation_scores[0]->url;
base::Optional<GURL> url_to_prefetch;
for (const auto& nav_score : sorted_navigation_scores) {
auto url_iter =
std::find(top_urls_.begin(), top_urls_.end(), nav_score->url);
if (url_iter != top_urls_.end()) {
url_to_prefetch = nav_score->url;
highest_navigation_score = nav_score->score;
break;
}
}
UMA_HISTOGRAM_COUNTS_100( UMA_HISTOGRAM_COUNTS_100(
"AnchorElementMetrics.Visible.HighestNavigationScore", "AnchorElementMetrics.Visible.HighestNavigationScore",
static_cast<int>(highest_navigation_score)); static_cast<int>(highest_navigation_score));
if (!url_to_prefetch)
return url_to_prefetch;
// Only the same origin URLs are eligible for prefetching. If the URL with // Only the same origin URLs are eligible for prefetching. If the URL with
// the highest score is from a different origin, then we skip prefetching // the highest score is from a different origin, then we skip prefetching
// since same origin URLs are not likely to be clicked. // since same origin URLs are not likely to be clicked.
if (url::Origin::Create(url_to_prefetch.value()) != document_origin) { if (url::Origin::Create(url_to_prefetch) != document_origin) {
return base::nullopt; return base::nullopt;
} }
......
...@@ -200,9 +200,7 @@ class NavigationPredictor : public blink::mojom::AnchorElementMetricsHost, ...@@ -200,9 +200,7 @@ class NavigationPredictor : public blink::mojom::AnchorElementMetricsHost,
void MaybeSendMetricsToUkm() const; void MaybeSendMetricsToUkm() const;
// After an in-page click, sends the index of the url that was clicked to the // After an in-page click, sends the index of the url that was clicked to the
// UKM id at |ukm_source_id_|. The index sent corresponds to the index of that // UKM id at |ukm_source_id_|.
// url in |top_urls_|, and is 1-indexed. If the url does not appear in
// |top_urls_|, a 0 is returned.
void MaybeSendClickMetricsToUkm(const std::string& clicked_url) const; void MaybeSendClickMetricsToUkm(const std::string& clicked_url) const;
// Returns the minimum of the bucket that |value| belongs in, for page-wide // Returns the minimum of the bucket that |value| belongs in, for page-wide
...@@ -224,11 +222,6 @@ class NavigationPredictor : public blink::mojom::AnchorElementMetricsHost, ...@@ -224,11 +222,6 @@ class NavigationPredictor : public blink::mojom::AnchorElementMetricsHost,
std::unordered_map<std::string, std::unique_ptr<NavigationScore>> std::unordered_map<std::string, std::unique_ptr<NavigationScore>>
navigation_scores_map_; navigation_scores_map_;
// The urls of the top anchor elements in the page, in a random order.
// If there are 10 or more urls on the page, |top_urls_| contains 10 urls.
// Otherwise, it contains all the urls.
std::vector<GURL> top_urls_;
// Total number of anchors that: href has the same host as the document, // Total number of anchors that: href has the same host as the document,
// contains image, inside an iframe, href incremented by 1 from document url. // contains image, inside an iframe, href incremented by 1 from document url.
int number_of_anchors_same_host_ = 0; int number_of_anchors_same_host_ = 0;
...@@ -299,6 +292,9 @@ class NavigationPredictor : public blink::mojom::AnchorElementMetricsHost, ...@@ -299,6 +292,9 @@ class NavigationPredictor : public blink::mojom::AnchorElementMetricsHost,
// all navigation scores for a page. // all navigation scores for a page.
const bool normalize_navigation_scores_; const bool normalize_navigation_scores_;
// A count of clicks to prevent reporting more than 10 clicks to UKM.
size_t clicked_count_ = 0;
// Timing of document loaded and last click. // Timing of document loaded and last click.
base::TimeTicks document_loaded_timing_; base::TimeTicks document_loaded_timing_;
base::TimeTicks last_click_timing_; base::TimeTicks last_click_timing_;
......
...@@ -676,14 +676,14 @@ TEST_F(NavigationPredictorSendUkmMetricsEnabledTest, SendClickUkmMetrics) { ...@@ -676,14 +676,14 @@ TEST_F(NavigationPredictorSendUkmMetricsEnabledTest, SendClickUkmMetrics) {
// ratio area. // ratio area.
auto all_ukm_entries = auto all_ukm_entries =
test_ukm_recorder.GetEntriesByName(LoadUkmEntry::kEntryName); test_ukm_recorder.GetEntriesByName(LoadUkmEntry::kEntryName);
int index = 1; int index = -1;
for (auto* entry : all_ukm_entries) { for (auto* entry : all_ukm_entries) {
int entry_ratio_area = static_cast<int>(*test_ukm_recorder.GetEntryMetric( int entry_ratio_area = static_cast<int>(*test_ukm_recorder.GetEntryMetric(
entry, LoadUkmEntry::kPercentClickableAreaName)); entry, LoadUkmEntry::kPercentClickableAreaName));
if (entry_ratio_area == 100) { if (entry_ratio_area == 100) {
index = static_cast<int>(*test_ukm_recorder.GetEntryMetric(
entry, LoadUkmEntry::kAnchorIndexName));
break; break;
} else {
index++;
} }
} }
...@@ -783,43 +783,6 @@ class NavigationPredictorPrefetchAfterPreconnectEnabledTest ...@@ -783,43 +783,6 @@ class NavigationPredictorPrefetchAfterPreconnectEnabledTest
} }
}; };
// Tests that a prefetch only occurs for the URL with the highest navigation
// score in |top_urls_|, not the URL with the highest navigation score overall.
TEST_F(NavigationPredictorPrefetchAfterPreconnectEnabledTest,
PrefetchOnlyURLInTopURLs) {
const std::string source = "https://example1.com";
const std::string url_to_prefetch = "https://example1.com/large";
// Simulate the case where the highest navigation score in |navigation_scores|
// doesn't contain any of the URLs in |top_urls_| by overriding
// |CalculateAnchorNavigationScore| to only take ratio distance into account.
std::vector<blink::mojom::AnchorElementMetricsPtr> metrics;
// The URL with the largest navigation score overall will be that with the
// highest ratio distance.
metrics.push_back(CreateMetricsPtrWithRatioDistance(
source, "https://example2.com/small", 1, 10));
// However, |top_urls_| will contain the top 10 links that have the highest
// ratio area, so the link with the highest ratio distance will not appear
// in the list.
metrics.push_back(
CreateMetricsPtrWithRatioDistance(source, url_to_prefetch, 10, 5));
for (int i = 0; i < 9; i++) {
metrics.push_back(CreateMetricsPtrWithRatioDistance(
source,
std::string("https://example2.com/xsmall")
.append(base::NumberToString(i)),
10, 0));
}
predictor_service()->ReportAnchorElementMetricsOnLoad(std::move(metrics),
GetDefaultViewport());
base::RunLoop().RunUntilIdle();
EXPECT_EQ(prefetch_url().value(), url_to_prefetch);
}
// Test that a prefetch after preconnect occurs only when the current tab is // Test that a prefetch after preconnect occurs only when the current tab is
// in the foreground, and that it does not occur multiple times for the same // in the foreground, and that it does not occur multiple times for the same
// URL. // URL.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment