Commit d29fbaad authored by manuk's avatar manuk Committed by Commit Bot

[omnibox] Dedup HQP matches V2.

HQP has a provider limit of 3. To make full use of the capacity, this CL
dedupes HQP matches.

The first attempt at HQP deduping (crrev.com/c/2219027) deduped matches
too early in the algorithm which could result in unintionally removing a
suggestion even if its duplicate suggestion would later be trimmed or
scored lower. This was reverted (crrev.com/c/2327955).

This CL implements deduping after trimming and scoring of HQP matches.

Bug: 591981
Change-Id: I27c418e2048c495d2622b7937f080792ba43acc5
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2334819
Commit-Queue: manuk hovanesian <manukh@chromium.org>
Reviewed-by: default avatarMark Pearson <mpearson@chromium.org>
Reviewed-by: default avatarTommy Li <tommycli@chromium.org>
Cr-Commit-Position: refs/heads/master@{#796639}
parent a1aebd96
...@@ -657,6 +657,7 @@ GURL AutocompleteResult::ComputeAlternateNavUrl( ...@@ -657,6 +657,7 @@ GURL AutocompleteResult::ComputeAlternateNavUrl(
: GURL(); : GURL();
} }
// static
void AutocompleteResult::DeduplicateMatches(ACMatches* matches) { void AutocompleteResult::DeduplicateMatches(ACMatches* matches) {
// Group matches by stripped URL and whether it's a calculator suggestion. // Group matches by stripped URL and whether it's a calculator suggestion.
std::unordered_map<std::pair<GURL, bool>, std::vector<ACMatches::iterator>, std::unordered_map<std::pair<GURL, bool>, std::vector<ACMatches::iterator>,
......
...@@ -162,6 +162,7 @@ class AutocompleteResult { ...@@ -162,6 +162,7 @@ class AutocompleteResult {
size_t EstimateMemoryUsage() const; size_t EstimateMemoryUsage() const;
// Get a list of comparators used for deduping for the matches in this result. // Get a list of comparators used for deduping for the matches in this result.
// This is only used for logging.
std::vector<MatchDedupComparator> GetMatchDedupComparators() const; std::vector<MatchDedupComparator> GetMatchDedupComparators() const;
// Gets the header string associated with |suggestion_group_id|. Returns an // Gets the header string associated with |suggestion_group_id|. Returns an
......
...@@ -152,7 +152,7 @@ class HistoryQuickProviderTest : public testing::Test { ...@@ -152,7 +152,7 @@ class HistoryQuickProviderTest : public testing::Test {
void RunTest(const base::string16& text, void RunTest(const base::string16& text,
bool prevent_inline_autocomplete, bool prevent_inline_autocomplete,
const std::vector<std::string>& expected_urls, const std::vector<std::string>& expected_urls,
bool can_inline_top_result, bool expected_can_inline_top_result,
const base::string16& expected_fill_into_edit, const base::string16& expected_fill_into_edit,
const base::string16& autocompletion); const base::string16& autocompletion);
...@@ -161,9 +161,10 @@ class HistoryQuickProviderTest : public testing::Test { ...@@ -161,9 +161,10 @@ class HistoryQuickProviderTest : public testing::Test {
const size_t cursor_position, const size_t cursor_position,
bool prevent_inline_autocomplete, bool prevent_inline_autocomplete,
const std::vector<std::string>& expected_urls, const std::vector<std::string>& expected_urls,
bool can_inline_top_result, bool expected_can_inline_top_result,
const base::string16& expected_fill_into_edit, const base::string16& expected_fill_into_edit,
const base::string16& autocompletion); const base::string16& autocompletion,
bool duplicates_ok = false);
// TODO(shess): From history_service.h in reference to history_backend: // TODO(shess): From history_service.h in reference to history_backend:
// > This class has most of the implementation and runs on the 'thread_'. // > This class has most of the implementation and runs on the 'thread_'.
...@@ -267,7 +268,15 @@ HistoryQuickProviderTest::GetTestData() { ...@@ -267,7 +268,15 @@ HistoryQuickProviderTest::GetTestData() {
"%8C%E5%A4%A7%E6%88%A6#.E3.83.B4.E3.82.A7.E3.83.AB.E3.82.B5.E3.82.A4.E3." "%8C%E5%A4%A7%E6%88%A6#.E3.83.B4.E3.82.A7.E3.83.AB.E3.82.B5.E3.82.A4.E3."
"83.A6.E4.BD.93.E5.88.B6", "83.A6.E4.BD.93.E5.88.B6",
"Title Unimportant", 2, 2, 0}, "Title Unimportant", 2, 2, 0},
{"https://twitter.com/fungoodtimes", "relatable!", 1, 1, 0}, {"https://twitter.com/fungoodtimes", "fungoodtimes", 10, 10, 0},
{"https://deduping-test.com/high-scoring", "xyz", 20, 20, 0},
{"https://deduping-test.com/med-scoring", "xyz", 10, 10, 0},
{"https://suffix.com/prefixsuffix1",
"'pre suf' should score higher than 'presuf'", 3, 3, 1},
{"https://suffix.com/prefixsuffix2",
"'pre suf' should score higher than 'presuf'", 3, 3, 2},
{"https://suffix.com/prefixsuffix3",
"'pre suf' should score higher than 'presuf'", 3, 3, 3},
}; };
} }
...@@ -301,11 +310,11 @@ void HistoryQuickProviderTest::RunTest( ...@@ -301,11 +310,11 @@ void HistoryQuickProviderTest::RunTest(
const base::string16& text, const base::string16& text,
bool prevent_inline_autocomplete, bool prevent_inline_autocomplete,
const std::vector<std::string>& expected_urls, const std::vector<std::string>& expected_urls,
bool can_inline_top_result, bool expected_can_inline_top_result,
const base::string16& expected_fill_into_edit, const base::string16& expected_fill_into_edit,
const base::string16& expected_autocompletion) { const base::string16& expected_autocompletion) {
RunTestWithCursor(text, base::string16::npos, prevent_inline_autocomplete, RunTestWithCursor(text, base::string16::npos, prevent_inline_autocomplete,
expected_urls, can_inline_top_result, expected_urls, expected_can_inline_top_result,
expected_fill_into_edit, expected_autocompletion); expected_fill_into_edit, expected_autocompletion);
} }
...@@ -316,7 +325,8 @@ void HistoryQuickProviderTest::RunTestWithCursor( ...@@ -316,7 +325,8 @@ void HistoryQuickProviderTest::RunTestWithCursor(
const std::vector<std::string>& expected_urls, const std::vector<std::string>& expected_urls,
bool expected_can_inline_top_result, bool expected_can_inline_top_result,
const base::string16& expected_fill_into_edit, const base::string16& expected_fill_into_edit,
const base::string16& expected_autocompletion) { const base::string16& expected_autocompletion,
bool duplicates_ok) {
SCOPED_TRACE(text); // Minimal hint to query being run. SCOPED_TRACE(text); // Minimal hint to query being run.
base::RunLoop().RunUntilIdle(); base::RunLoop().RunUntilIdle();
AutocompleteInput input(text, cursor_position, AutocompleteInput input(text, cursor_position,
...@@ -334,13 +344,26 @@ void HistoryQuickProviderTest::RunTestWithCursor( ...@@ -334,13 +344,26 @@ void HistoryQuickProviderTest::RunTestWithCursor(
// If the number of expected and actual matches aren't equal then we need // If the number of expected and actual matches aren't equal then we need
// test no further, but let's do anyway so that we know which URLs failed. // test no further, but let's do anyway so that we know which URLs failed.
EXPECT_EQ(expected_urls.size(), ac_matches_.size()); if (duplicates_ok)
EXPECT_LE(expected_urls.size(), ac_matches_.size());
else
EXPECT_EQ(expected_urls.size(), ac_matches_.size());
// Verify that all expected URLs were found and that all found URLs // Verify that all expected URLs were found and that all found URLs
// were expected. // were expected.
std::set<std::string> leftovers = std::set<std::string> leftovers;
for_each(expected_urls.begin(), expected_urls.end(), if (duplicates_ok) {
SetShouldContain(ac_matches_)).LeftOvers(); for (auto match : ac_matches_)
leftovers.insert(match.destination_url.spec());
for (auto expected : expected_urls)
EXPECT_EQ(1U, leftovers.count(expected)) << "Expected URL " << expected;
for (auto expected : expected_urls)
leftovers.erase(expected);
} else {
leftovers = for_each(expected_urls.begin(), expected_urls.end(),
SetShouldContain(ac_matches_))
.LeftOvers();
}
EXPECT_EQ(0U, leftovers.size()) << "There were " << leftovers.size() EXPECT_EQ(0U, leftovers.size()) << "There were " << leftovers.size()
<< " unexpected results, one of which was: '" << " unexpected results, one of which was: '"
<< *(leftovers.begin()) << "'."; << *(leftovers.begin()) << "'.";
...@@ -405,13 +428,85 @@ TEST_F(HistoryQuickProviderTest, SingleMatchWithCursor) { ...@@ -405,13 +428,85 @@ TEST_F(HistoryQuickProviderTest, SingleMatchWithCursor) {
} }
TEST_F(HistoryQuickProviderTest, MatchWithAndWithoutCursorWordBreak) { TEST_F(HistoryQuickProviderTest, MatchWithAndWithoutCursorWordBreak) {
// The input 'twitter.com/fungoo|times' matches only with a cursor word break.
// We should retrieve the desired result but it should not be allowed to be
// the default match.
std::vector<std::string> expected_urls; std::vector<std::string> expected_urls;
expected_urls.push_back("https://twitter.com/fungoodtimes"); expected_urls.push_back("https://twitter.com/fungoodtimes");
// With cursor after ".com", we should retrieve the desired result but it
// should not be allowed to be the default match.
RunTestWithCursor( RunTestWithCursor(
ASCIIToUTF16("twitter.comfungoodtimes"), 11, false, expected_urls, false, ASCIIToUTF16("twitter.com/fungootime"), 18, true, expected_urls, false,
ASCIIToUTF16("https://twitter.com/fungoodtimes"), base::string16()); ASCIIToUTF16("https://twitter.com/fungoodtimes"), base::string16());
// The input 'twitter.com/fungood|times' matches both with and without a
// cursor word break. We should retrieve both suggestions but neither should
// be allowed to be the default match.
RunTestWithCursor(
ASCIIToUTF16("twitter.com/fungoodtime"), 19, true, expected_urls, false,
ASCIIToUTF16("https://twitter.com/fungoodtimes"), base::string16(), true);
// A suggestion with a cursor not at the input end can only be default if
// the input matches suggestion exactly.
RunTestWithCursor(
ASCIIToUTF16("twitter.com/fungoodtimes"), 19, true, expected_urls, true,
ASCIIToUTF16("https://twitter.com/fungoodtimes"), base::string16(), true);
}
TEST_F(HistoryQuickProviderTest, MatchWithAndWithoutCursorWordBreak_Dedupe) {
std::vector<std::string> expected_urls;
// An input can match a suggestion both with and without cursor word break.
// When doing so exceeds 3 (|max_matches|), they should be deduped and return
// unique suggestions. Cursor position selected arbitrarily; it doesn't matter
// as long as it's not at the start or the end.
expected_urls.push_back("https://deduping-test.com/high-scoring");
expected_urls.push_back("https://deduping-test.com/med-scoring");
RunTestWithCursor(
ASCIIToUTF16("deduping-test"), 1, true, expected_urls, false,
ASCIIToUTF16("https://deduping-test.com/high-scoring"), base::string16());
}
TEST_F(HistoryQuickProviderTest,
MatchWithAndWithoutCursorWordBreak_DedupingKeepsHigherScoredSuggestion) {
// When the input matches a suggestion both with and without a cursor word
// break, HQP will generate the suggestion twice. When doing so exceeds
// 3 (|max_matches|), they should be deduped and the higher scored suggestions
// should be kept, as oppposed to the first suggestion encountered.
std::vector<std::string> expected_urls;
expected_urls.push_back("https://suffix.com/prefixsuffix1");
expected_urls.push_back("https://suffix.com/prefixsuffix2");
expected_urls.push_back("https://suffix.com/prefixsuffix3");
// Get scores for 'prefixsuffix'
RunTestWithCursor(ASCIIToUTF16("prefixsuffix"), std::string::npos, false,
expected_urls, false,
ASCIIToUTF16("https://suffix.com/prefixsuffix1"),
base::string16());
std::vector<int> unbroken_scores(3);
std::transform(ac_matches().begin(), ac_matches().end(),
unbroken_scores.begin(),
[](const auto& match) { return match.relevance; });
// Get scores for 'prefix suffix'
RunTestWithCursor(ASCIIToUTF16("prefix suffix"), std::string::npos, false,
expected_urls, false,
ASCIIToUTF16("https://suffix.com/prefixsuffix1"),
base::string16());
std::vector<int> broken_scores(3);
std::transform(ac_matches().begin(), ac_matches().end(),
broken_scores.begin(),
[](const auto& match) { return match.relevance; });
// Ensure the latter scores are higher than the former.
for (size_t i = 0; i < 3; ++i)
EXPECT_GT(broken_scores[i], unbroken_scores[i]);
// Get scores for 'prefix|suffix', which will create duplicate
// ScoredHistoryMatches.
RunTestWithCursor(ASCIIToUTF16("prefixsuffix"), 6, true, expected_urls, false,
ASCIIToUTF16("https://suffix.com/prefixsuffix1"),
base::string16());
// Ensure the higher scored ScoredHistoryMatches are promoted to suggestions
// during deduping.
for (size_t i = 0; i < 3; ++i)
EXPECT_EQ(ac_matches()[i].relevance, broken_scores[i]);
} }
TEST_F(HistoryQuickProviderTest, WordBoundariesWithPunctuationMatch) { TEST_F(HistoryQuickProviderTest, WordBoundariesWithPunctuationMatch) {
......
...@@ -221,10 +221,26 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( ...@@ -221,10 +221,26 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
} }
// Select and sort only the top |max_matches| results. // Select and sort only the top |max_matches| results.
if (scored_items.size() > max_matches) { if (scored_items.size() > max_matches) {
std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches, // Sort the top |max_matches| * 2 matches which is cheaper than sorting all
scored_items.end(), // matches yet likely sufficient to contain |max_matches| unique matches
ScoredHistoryMatch::MatchScoreGreater); // most of the time.
scored_items.resize(max_matches); auto first_pass_size = std::min(scored_items.size(), max_matches * 2);
std::partial_sort(
scored_items.begin(), scored_items.begin() + first_pass_size,
scored_items.end(), ScoredHistoryMatch::MatchScoreGreater);
scored_items.resize(first_pass_size);
// Filter unique matches to maximize the use of the |max_matches| capacity.
std::set<HistoryID> seen_history_ids;
base::EraseIf(scored_items, [&](const auto& scored_item) {
HistoryID scored_item_id = scored_item.url_info.id();
bool duplicate = seen_history_ids.count(scored_item_id);
seen_history_ids.insert(scored_item_id);
return duplicate;
});
if (scored_items.size() > max_matches)
scored_items.resize(max_matches);
} else { } else {
std::sort(scored_items.begin(), scored_items.end(), std::sort(scored_items.begin(), scored_items.end(),
ScoredHistoryMatch::MatchScoreGreater); ScoredHistoryMatch::MatchScoreGreater);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment