Commit afaee234 authored by Moe Ahmadi's avatar Moe Ahmadi Committed by Commit Bot

[omnibox] Ignore invalid duplicates in local history zero-suggest

Invalid duplicates are those search terms extracted from search query
URLs which are identical or nearly identical to the original search
query URL and issued too closely to it. They are typically recorded as
a result of back/forward navigations or user interactions in the search
result page and are likely not newly initiated searches.

With frecency ranking, these duplicates must be ignored or they will
erroneously boost those queries.

For the purpose of local history ZPS, a duplicated query issued within 5
minutes of the original query is considered an invalid duplicate.

Bug: 1108531,1096615
Change-Id: I7855c0b72ee47d7a55bc77315734e7e48c2df355
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2337676Reviewed-by: default avatarVictor Costan <pwnall@chromium.org>
Reviewed-by: default avatarMark Pearson <mpearson@chromium.org>
Reviewed-by: default avatarScott Violet <sky@chromium.org>
Commit-Queue: Moe Ahmadi <mahmadi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#800211}
parent 2fbabe06
......@@ -631,17 +631,46 @@ URLDatabase::GetMostRecentNormalizedKeywordSearchTerms(
if (!keyword_id)
return {};
sql::Statement statement(GetDB().GetCachedStatement(
SQL_FROM_HERE,
"SELECT "
"kv.normalized_term, SUM(u.visit_count), MAX(u.last_visit_time) "
"FROM keyword_search_terms kv JOIN urls u ON kv.url_id = u.id "
"WHERE kv.keyword_id = ? AND u.last_visit_time > ? "
"AND kv.normalized_term IS NOT NULL AND kv.normalized_term != \"\" "
"GROUP BY kv.normalized_term"));
statement.BindInt64(0, keyword_id);
statement.BindInt64(1, age_threshold.ToInternalValue());
// Extracts the most recent normalized search terms from the
// keyword_search_terms table joined with the urls table. For a given search
// term, those search query URLs that are visited too closely to the original
// search query URL are ignored in order to avoid erroneously boosting the
// term when frecency ranking is used. This is done by rounding down the URLs'
// last_visit_time to the largest ? ms interval and picking the oldest URL out
// of all the URLs with the same rounded last visit time. The average of visit
// counts for those URLs is then used as the visit count of this emerging
// deduplicated URL This way no bare column (chosen at random) is returned by
// the aggregate query.
sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
R"(
SELECT
normalized_term,
SUM(visit_count) AS visit_count,
MAX(last_visit_time) AS last_visit_time
FROM
(
SELECT
normalized_term,
AVG(visit_count) AS visit_count,
MIN(u.last_visit_time) AS last_visit_time,
u.last_visit_time - (u.last_visit_time % ?) as rnd_last_visit_time
FROM
keyword_search_terms kv JOIN urls u ON kv.url_id = u.id
WHERE
kv.keyword_id = ?
AND u.last_visit_time > ?
AND kv.normalized_term IS NOT NULL
AND kv.normalized_term != ""
GROUP BY normalized_term, rnd_last_visit_time
)
GROUP BY normalized_term
ORDER BY last_visit_time DESC
)"));
statement.BindInt64(
0, kAutocompleteDuplicateVisitIntervalThreshold.ToInternalValue());
statement.BindInt64(1, keyword_id);
statement.BindInt64(2, age_threshold.ToInternalValue());
std::vector<NormalizedKeywordSearchTermVisit> visits;
while (statement.Step()) {
......@@ -762,6 +791,9 @@ const int kLowQualityMatchTypedLimit = 1;
const int kLowQualityMatchVisitLimit = 4;
const int kLowQualityMatchAgeLimitInDays = 3;
const base::TimeDelta kAutocompleteDuplicateVisitIntervalThreshold =
base::TimeDelta::FromMinutes(5);
base::Time AutocompleteAgeThreshold() {
return (base::Time::Now() -
base::TimeDelta::FromDays(kLowQualityMatchAgeLimitInDays));
......
......@@ -339,6 +339,15 @@ extern const int kLowQualityMatchTypedLimit;
extern const int kLowQualityMatchVisitLimit;
extern const int kLowQualityMatchAgeLimitInDays;
// The time interval within which a duplicate query is considered invalid for
// autocomplete purposes.
// These invalid duplicates are extracted from search query URLs which are
// identical or nearly identical to the original search query URL and issued too
// closely to it, i.e., within this time interval. They are typically recorded
// as a result of back/forward navigations or user interactions in the search
// result page and are likely not newly initiated searches.
extern const base::TimeDelta kAutocompleteDuplicateVisitIntervalThreshold;
// Returns the date threshold for considering an history item as significant.
base::Time AutocompleteAgeThreshold();
......
......@@ -42,7 +42,7 @@ namespace {
struct TestURLData {
const TemplateURL* search_provider;
std::string search_terms;
std::string other_query_params;
std::string additional_query_params;
int age_in_seconds;
int visit_count = 1;
std::string title = "";
......@@ -154,6 +154,8 @@ void LocalHistoryZeroSuggestProviderTest::LoadURLs(
for (const auto& entry : url_data_list) {
TemplateURLRef::SearchTermsArgs search_terms_args(
base::UTF8ToUTF16(entry.search_terms));
search_terms_args.additional_query_params.append(
entry.additional_query_params);
const auto& search_terms_data =
client_->GetTemplateURLService()->search_terms_data();
std::string search_url =
......@@ -406,25 +408,40 @@ TEST_F(LocalHistoryZeroSuggestProviderTest, DefaultSearchProvider) {
// collapsed, are lowercased and deduplicated) without loss of unicode encoding.
TEST_F(LocalHistoryZeroSuggestProviderTest, Normalization) {
LoadURLs({
{default_search_provider(), "hello world", "&foo=bar", 3},
{default_search_provider(), "hello world", "&foo=bar", 4},
{default_search_provider(), "hello world", "&foo=bar", 5},
// Issued too closely to the original query; will be ignored:
{default_search_provider(), "HELLO WORLD ", "&foo=bar4", 1},
{default_search_provider(), "سلام دنیا", "&bar=baz", 2},
// Issued too closely to the original query; will be ignored:
{default_search_provider(), "hello world", "&foo=bar3", 3},
// Issued too closely to the original query; will be ignored:
{default_search_provider(), "hello world", "&foo=bar2", 4},
// Issued too closely to the original query; will be ignored:
{default_search_provider(), "hello world", "&foo=bar1", 5},
{default_search_provider(), "hello world", "&foo=bar", 6},
{default_search_provider(), "HELLO WORLD ", "&foo=bar", 1},
{default_search_provider(), "سلام دنیا", "&foo=bar", 2},
});
StartProviderAndWaitUntilDone();
ExpectMatches({{"hello world", 500}, {"سلام دنیا", 499}});
ExpectMatches({{"سلام دنیا", 500}, {"hello world", 499}});
}
// Tests that the suggestions are ranked correctly.
TEST_F(LocalHistoryZeroSuggestProviderTest, Ranking) {
int original_query_age =
history::kAutocompleteDuplicateVisitIntervalThreshold.InSeconds() + 3;
LoadURLs({
{default_search_provider(), "less recent more frequent search",
"&foo=bar", /*age_in_seconds=*/2, /*visit_count=*/2},
{default_search_provider(), "more recent less frequent search",
"&foo=bar", /*age_in_seconds=*/1, /*visit_count=*/1},
// Issued far enough from the original query; won't be ignored:
{default_search_provider(), "more recent search", "&bar=baz2",
/*age_in_seconds=*/0},
// Issued far enough from the original query; won't be ignored:
{default_search_provider(), "less recent search", "&foo=bar3",
/*age_in_seconds=*/1},
// Issued too closely to the original query; will be ignored:
{default_search_provider(), "less recent search", "&foo=bar2",
/*age_in_seconds=*/original_query_age - 1},
{default_search_provider(), "more recent search", "&bar=baz",
/*age_in_seconds=*/original_query_age},
{default_search_provider(), "less recent search", "&foo=bar",
/*age_in_seconds=*/original_query_age},
});
// With frecency ranking disabled, more recent searches are ranked higher.
......@@ -434,11 +451,10 @@ TEST_F(LocalHistoryZeroSuggestProviderTest, Ranking) {
{omnibox::kOmniboxLocalZeroSuggestFrecencyRanking});
StartProviderAndWaitUntilDone();
ExpectMatches({{"more recent less frequent search", 500},
{"less recent more frequent search", 499}});
ExpectMatches({{"more recent search", 500}, {"less recent search", 499}});
// With frecency ranking enabled, more recent searches are not necessarily
// ranked higher.
// With frecency ranking enabled, more recent searches are ranked higher when
// searches are just as frequent.
scoped_feature_list_ = std::make_unique<base::test::ScopedFeatureList>();
scoped_feature_list_->InitWithFeatures(
{omnibox::kReactiveZeroSuggestionsOnNTPRealbox, // Enables the provider.
......@@ -446,8 +462,18 @@ TEST_F(LocalHistoryZeroSuggestProviderTest, Ranking) {
{});
StartProviderAndWaitUntilDone();
ExpectMatches({{"less recent more frequent search", 500},
{"more recent less frequent search", 499}});
ExpectMatches({{"more recent search", 500}, {"less recent search", 499}});
// With frecency ranking enabled, more frequent searches are ranked higher
// when searches are nearly as old.
LoadURLs({
// Issued far enough from the original query; won't be ignored:
{default_search_provider(), "less recent search", "&foo=bar4",
/*age_in_seconds=*/2, /*visit_count=*/5},
});
StartProviderAndWaitUntilDone();
ExpectMatches({{"less recent search", 500}, {"more recent search", 499}});
}
// Tests that suggestions are created from fresh search histories only and that
......@@ -490,12 +516,15 @@ TEST_F(LocalHistoryZeroSuggestProviderTest, Deletion) {
auto* other_search_provider = template_url_service->Add(
std::make_unique<TemplateURL>(*GenerateDummyTemplateURLData("other")));
LoadURLs({
{default_search_provider(), "hello world", "&foo=bar&aqs=1", 1},
{default_search_provider(), "HELLO WORLD ", "&foo=bar&aqs=12", 1},
{default_search_provider(), "hello world", "&foo=bar&aqs=123", 1},
{default_search_provider(), "hello world", "&foo=bar&aqs=1234", 1},
{default_search_provider(), "not to be deleted", "&foo=bar&aqs=12345", 1},
{other_search_provider, "hello world", "&foo=bar&aqs=123456", 1},
// Issued too closely to the original query; will be ignored:
{default_search_provider(), "hello world", "&foo=bar1", 1},
// Issued too closely to the original query; will be ignored:
{default_search_provider(), "HELLO WORLD ", "&foo=bar2", 2},
// Issued too closely to the original query; will be ignored:
{default_search_provider(), "hello world", "foo=bar3", 3},
{default_search_provider(), "hello world", "foo=bar4", 4},
{other_search_provider, "hello world", "", 5},
{default_search_provider(), "not to be deleted", "", 6},
});
StartProviderAndWaitUntilDone();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment