Commit 808b82a3 authored by Mikel Astiz's avatar Mikel Astiz Committed by Commit Bot

[bookmarks] Add more metrics around URL duplicates

The newly-introduced metrics explore different predicates to determine
whether a bookmark is a duplicate. These variants are more strict and
involve not only URL equality, but also title equality and (in a
separate metric) parent equality.

Change-Id: I8ac7ce46b235e31ec855d55fcc5bf0cad8ef9fff
Bug: 1068378
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2514155Reviewed-by: default avatarJesse Doherty <jwd@chromium.org>
Reviewed-by: default avatarScott Violet <sky@chromium.org>
Commit-Queue: Mikel Astiz <mastiz@chromium.org>
Cr-Commit-Position: refs/heads/master@{#824004}
parent facbfdcd
...@@ -323,6 +323,9 @@ IN_PROC_BROWSER_TEST_F(BookmarkBrowsertest, DragMultipleBookmarks) { ...@@ -323,6 +323,9 @@ IN_PROC_BROWSER_TEST_F(BookmarkBrowsertest, DragMultipleBookmarks) {
IN_PROC_BROWSER_TEST_F(BookmarkBrowsertest, PRE_EmitUmaForDuplicates) { IN_PROC_BROWSER_TEST_F(BookmarkBrowsertest, PRE_EmitUmaForDuplicates) {
BookmarkModel* bookmark_model = WaitForBookmarkModel(browser()->profile()); BookmarkModel* bookmark_model = WaitForBookmarkModel(browser()->profile());
const BookmarkNode* parent = bookmarks::GetParentForNewNodes(bookmark_model); const BookmarkNode* parent = bookmarks::GetParentForNewNodes(bookmark_model);
const BookmarkNode* other_parent =
bookmark_model->AddFolder(parent, 0, base::ASCIIToUTF16("Folder"));
// Add one bookmark with a unique URL, two other bookmarks with a shared URL, // Add one bookmark with a unique URL, two other bookmarks with a shared URL,
// and three more with another shared URL. // and three more with another shared URL.
bookmark_model->AddURL(parent, parent->children().size(), bookmark_model->AddURL(parent, parent->children().size(),
...@@ -336,7 +339,9 @@ IN_PROC_BROWSER_TEST_F(BookmarkBrowsertest, PRE_EmitUmaForDuplicates) { ...@@ -336,7 +339,9 @@ IN_PROC_BROWSER_TEST_F(BookmarkBrowsertest, PRE_EmitUmaForDuplicates) {
bookmark_model->AddURL(parent, parent->children().size(), bookmark_model->AddURL(parent, parent->children().size(),
base::ASCIIToUTF16("title5"), GURL("http://c.com")); base::ASCIIToUTF16("title5"), GURL("http://c.com"));
bookmark_model->AddURL(parent, parent->children().size(), bookmark_model->AddURL(parent, parent->children().size(),
base::ASCIIToUTF16("title6"), GURL("http://c.com")); base::ASCIIToUTF16("title5"), GURL("http://c.com"));
bookmark_model->AddURL(other_parent, other_parent->children().size(),
base::ASCIIToUTF16("title5"), GURL("http://c.com"));
} }
IN_PROC_BROWSER_TEST_F(BookmarkBrowsertest, EmitUmaForDuplicates) { IN_PROC_BROWSER_TEST_F(BookmarkBrowsertest, EmitUmaForDuplicates) {
...@@ -345,9 +350,22 @@ IN_PROC_BROWSER_TEST_F(BookmarkBrowsertest, EmitUmaForDuplicates) { ...@@ -345,9 +350,22 @@ IN_PROC_BROWSER_TEST_F(BookmarkBrowsertest, EmitUmaForDuplicates) {
ASSERT_THAT( ASSERT_THAT(
histogram_tester()->GetAllSamples("Bookmarks.Count.OnProfileLoad"), histogram_tester()->GetAllSamples("Bookmarks.Count.OnProfileLoad"),
testing::ElementsAre(base::Bucket(/*min=*/6, /*count=*/1))); testing::ElementsAre(base::Bucket(/*min=*/6, /*count=*/1)));
// 2 bookmarks have URL http://b.com and 4 have http://c.com. This counts as 4
// duplicates.
EXPECT_THAT(histogram_tester()->GetAllSamples( EXPECT_THAT(histogram_tester()->GetAllSamples(
"Bookmarks.Count.OnProfileLoad.DuplicateUrl2"), "Bookmarks.Count.OnProfileLoad.DuplicateUrl2"),
testing::ElementsAre(base::Bucket(/*min=*/3, /*count=*/1))); testing::ElementsAre(base::Bucket(/*min=*/4, /*count=*/1)));
// 3 bookmarks have the pair (http://c.com, title5). This counts as 2
// duplicates when considering URLs and titles.
EXPECT_THAT(histogram_tester()->GetAllSamples(
"Bookmarks.Count.OnProfileLoad.DuplicateUrlAndTitle"),
testing::ElementsAre(base::Bucket(/*min=*/2, /*count=*/1)));
// Among the three above, only two have the same parent. This means only one
// counts as duplicate when considering all three attributes.
EXPECT_THAT(
histogram_tester()->GetAllSamples(
"Bookmarks.Count.OnProfileLoad.DuplicateUrlAndTitleAndParent"),
testing::ElementsAre(base::Bucket(/*min=*/1, /*count=*/1)));
} }
#endif // !defined(OS_CHROMEOS) #endif // !defined(OS_CHROMEOS)
...@@ -90,6 +90,20 @@ void LoadBookmarks(const base::FilePath& path, ...@@ -90,6 +90,20 @@ void LoadBookmarks(const base::FilePath& path,
"Bookmarks.Count.OnProfileLoad.DuplicateUrl2", "Bookmarks.Count.OnProfileLoad.DuplicateUrl2",
base::saturated_cast<int>(stats.duplicate_url_bookmark_count)); base::saturated_cast<int>(stats.duplicate_url_bookmark_count));
} }
if (stats.duplicate_url_and_title_bookmark_count != 0) {
base::UmaHistogramCounts100000(
"Bookmarks.Count.OnProfileLoad.DuplicateUrlAndTitle",
base::saturated_cast<int>(
stats.duplicate_url_and_title_bookmark_count));
}
if (stats.duplicate_url_and_title_and_parent_bookmark_count != 0) {
base::UmaHistogramCounts100000(
"Bookmarks.Count.OnProfileLoad.DuplicateUrlAndTitleAndParent",
base::saturated_cast<int>(
stats.duplicate_url_and_title_and_parent_bookmark_count));
}
} }
} // namespace } // namespace
......
...@@ -12,6 +12,50 @@ ...@@ -12,6 +12,50 @@
namespace bookmarks { namespace bookmarks {
namespace {
// Computes and aggregates into |*stats| metrics corresponding to a particular
// group of bookmarks with the same URL, listed in |*bookmarks_with_same_url|.
// The caller is responsible to guarantee that these provided bookmarks all
// share the same URL. Upon completion of this function, the state of
// |*bookmarks_with_same_url| is unspecified, because the implementation is free
// to modify its state desirable to avoid additional memory allocations on the
// calling site.
void AddStatsForBookmarksWithSameUrl(
std::vector<const BookmarkNode*>* bookmarks_with_same_url,
UrlIndex::Stats* stats) {
if (bookmarks_with_same_url->size() <= 1)
return;
stats->duplicate_url_bookmark_count += bookmarks_with_same_url->size() - 1;
// Sort only if there are 3 or more bookmarks. With exactly two (which is
// believed to be a common case) the precise ordering is irrelevant for the
// logic that follows.
if (bookmarks_with_same_url->size() > 2) {
std::sort(bookmarks_with_same_url->begin(), bookmarks_with_same_url->end(),
[](const BookmarkNode* a, const BookmarkNode* b) {
DCHECK_EQ(a->url(), b->url());
if (a->GetTitle() != b->GetTitle())
return a->GetTitle() < b->GetTitle();
return a->parent() < b->parent();
});
}
auto prev_i = bookmarks_with_same_url->begin();
for (auto i = std::next(prev_i); i != bookmarks_with_same_url->end();
++i, ++prev_i) {
DCHECK_EQ((*prev_i)->url(), (*i)->url());
if ((*prev_i)->GetTitle() == (*i)->GetTitle()) {
++stats->duplicate_url_and_title_bookmark_count;
if ((*prev_i)->parent() == (*i)->parent())
++stats->duplicate_url_and_title_and_parent_bookmark_count;
}
}
}
} // namespace
UrlIndex::UrlIndex(std::unique_ptr<BookmarkNode> root) UrlIndex::UrlIndex(std::unique_ptr<BookmarkNode> root)
: root_(std::move(root)) { : root_(std::move(root)) {
base::AutoLock url_lock(url_lock_); base::AutoLock url_lock(url_lock_);
...@@ -76,14 +120,23 @@ UrlIndex::Stats UrlIndex::ComputeStats() const { ...@@ -76,14 +120,23 @@ UrlIndex::Stats UrlIndex::ComputeStats() const {
base::AutoLock url_lock(url_lock_); base::AutoLock url_lock(url_lock_);
UrlIndex::Stats stats; UrlIndex::Stats stats;
stats.total_url_bookmark_count = nodes_ordered_by_url_set_.size(); stats.total_url_bookmark_count = nodes_ordered_by_url_set_.size();
if (stats.total_url_bookmark_count > 1) {
auto prev_i = nodes_ordered_by_url_set_.begin(); if (stats.total_url_bookmark_count <= 1)
for (auto i = std::next(prev_i); i != nodes_ordered_by_url_set_.end(); return stats;
++i, ++prev_i) {
if ((*prev_i)->url() == (*i)->url()) std::vector<const BookmarkNode*> bookmarks_with_same_url;
++stats.duplicate_url_bookmark_count; auto prev_i = nodes_ordered_by_url_set_.begin();
for (auto i = std::next(prev_i); i != nodes_ordered_by_url_set_.end();
++i, ++prev_i) {
if ((*prev_i)->url() != (*i)->url()) {
AddStatsForBookmarksWithSameUrl(&bookmarks_with_same_url, &stats);
bookmarks_with_same_url.clear();
} }
bookmarks_with_same_url.push_back(*i);
} }
AddStatsForBookmarksWithSameUrl(&bookmarks_with_same_url, &stats);
return stats; return stats;
} }
......
...@@ -70,8 +70,17 @@ class UrlIndex : public HistoryBookmarkModel { ...@@ -70,8 +70,17 @@ class UrlIndex : public HistoryBookmarkModel {
// Number of bookmark in the index excluding folders. // Number of bookmark in the index excluding folders.
size_t total_url_bookmark_count = 0; size_t total_url_bookmark_count = 0;
// Number of bookmarks (excluding folders) with a URL that is used by at // Number of bookmarks (excluding folders) with a URL that is used by at
// least one other bookmark. // least one other bookmark, excluding one bookmark per unique URL (i.e. all
// except one are considered duplicates).
size_t duplicate_url_bookmark_count = 0; size_t duplicate_url_bookmark_count = 0;
// Number of bookmarks (excluding folders) with the pair <URL, title> that
// is used by at least one other bookmark, excluding one bookmark per unique
// URL (i.e. all except one are considered duplicates).
size_t duplicate_url_and_title_bookmark_count = 0;
// Number of bookmarks (excluding folders) with the triple <URL, title,
// parent> that is used by at least one other bookmark, excluding one
// bookmark per unique URL (i.e. all except one are considered duplicates).
size_t duplicate_url_and_title_and_parent_bookmark_count = 0;
}; };
Stats ComputeStats() const; Stats ComputeStats() const;
......
...@@ -1497,6 +1497,34 @@ reviews. Googlers can read more about this at go/gwsq-gerrit. ...@@ -1497,6 +1497,34 @@ reviews. Googlers can read more about this at go/gwsq-gerrit.
</summary> </summary>
</histogram> </histogram>
<histogram name="Bookmarks.Count.OnProfileLoad.DuplicateUrlAndTitle"
units="bookmarks" expires_after="2021-04-04">
<owner>mastiz@chromium.org</owner>
<owner>sky@chromium.org</owner>
<summary>
The number of bookmarks a user has saved with a URL and title pair that is
also present in at least one other bookmark. This excludes not only folders
(which don't have a URL) but it also excludes one bookmark per unique URL
and title pair (that is, all except one are considered duplicates). Recorded
when bookmarks are loaded into storage from disk if there is at least one
duplicate.
</summary>
</histogram>
<histogram name="Bookmarks.Count.OnProfileLoad.DuplicateUrlAndTitleAndParent"
units="bookmarks" expires_after="2021-04-04">
<owner>mastiz@chromium.org</owner>
<owner>sky@chromium.org</owner>
<summary>
The number of bookmarks a user has saved with a URL, title and parent triple
that is also present in at least one other bookmark. This excludes not only
folders (which don't have a URL) but it also excludes one bookmark per
unique URL, title and parent triple (that is, all except one are considered
duplicates). Recorded when bookmarks are loaded into storage from disk if
there is at least one duplicate.
</summary>
</histogram>
<histogram name="Bookmarks.Count.OpenInIncognito" units="bookmarks" <histogram name="Bookmarks.Count.OpenInIncognito" units="bookmarks"
expires_after="M82"> expires_after="M82">
<owner>twellington@google.com</owner> <owner>twellington@google.com</owner>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment