Commit 54b4106d authored by Moe Ahmadi's avatar Moe Ahmadi Committed by Commit Bot

[omnibox] Moves ranking of local history zero-prefix suggestions to C++

In order to support different ranking approaches for the local history
zero-prefix suggestions, this CL modifies the current implementation to
allow ranking the normalized search terms in C++.

To accomplish this, LocalHistoryZeroSuggestProvider now requests an
unordered list of rows grouped by |normalized_term| from the URL
database in which |visit_count| is the sum of all visit counts and
|last_visit_time| is the latest visit time for that normalized term.

It also simplifies the LocalHistoryZeroSuggestProvider implementation by
moving the freshness and validity checks for the normalized term into
the SQL query.

This change is not expected to alter the existing behavior as confirmed
by the unit tests.

Bug: 1096615
Change-Id: I01c29afff51f348e5c32766e4e53b3ff7687a3b2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2278625Reviewed-by: default avatarJustin Donnelly <jdonnelly@chromium.org>
Reviewed-by: default avatarScott Violet <sky@chromium.org>
Commit-Queue: Moe Ahmadi <mahmadi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#786019}
parent 4d74f7dc
......@@ -6,6 +6,9 @@
namespace history {
NormalizedKeywordSearchTermVisit::~NormalizedKeywordSearchTermVisit() = default;
KeywordSearchTermVisit::KeywordSearchTermVisit() : visits(0) {}
KeywordSearchTermVisit::~KeywordSearchTermVisit() {}
......
......@@ -12,6 +12,20 @@
namespace history {
// NormalizedKeywordSearchTermVisit is returned by
// GetMostRecentNormalizedKeywordSearchTerms. It contains the time of the most
// recent visit and the visit count for the normalized search term aggregated
// from the keyword visits.
struct NormalizedKeywordSearchTermVisit {
NormalizedKeywordSearchTermVisit() = default;
~NormalizedKeywordSearchTermVisit();
base::string16 normalized_term; // The search term, in lower case and with
// extra whitespaces collapsed.
int visits{0}; // The visit count.
base::Time most_recent_visit_time; // The time of the most recent visit.
};
// KeywordSearchTermVisit is returned from GetMostRecentKeywordSearchTerms. It
// gives the time and search term of the keyword visit.
struct KeywordSearchTermVisit {
......
......@@ -13,6 +13,7 @@
#include "base/stl_util.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "components/history/core/browser/keyword_search_term.h"
#include "components/url_formatter/url_formatter.h"
#include "sql/statement.h"
......@@ -631,9 +632,10 @@ void URLDatabase::GetMostRecentKeywordSearchTerms(
}
}
std::vector<KeywordSearchTermVisit>
URLDatabase::GetMostRecentKeywordSearchTerms(KeywordID keyword_id,
int max_count) {
std::vector<NormalizedKeywordSearchTermVisit>
URLDatabase::GetMostRecentNormalizedKeywordSearchTerms(
KeywordID keyword_id,
base::Time age_threshold) {
// NOTE: the keyword_id can be zero if on first run the user does a query
// before the TemplateURLService has finished loading. As the chances of this
// occurring are small, we ignore it.
......@@ -642,22 +644,23 @@ URLDatabase::GetMostRecentKeywordSearchTerms(KeywordID keyword_id,
sql::Statement statement(GetDB().GetCachedStatement(
SQL_FROM_HERE,
"SELECT DISTINCT "
"kv.term, kv.normalized_term, u.visit_count, u.last_visit_time "
"SELECT "
"kv.normalized_term, SUM(u.visit_count), MAX(u.last_visit_time) "
"FROM keyword_search_terms kv JOIN urls u ON kv.url_id = u.id "
"WHERE kv.keyword_id = ? "
"ORDER BY u.last_visit_time DESC LIMIT ?"));
"WHERE kv.keyword_id = ? AND u.last_visit_time > ? "
"AND kv.normalized_term IS NOT NULL AND kv.normalized_term != \"\" "
"GROUP BY kv.normalized_term"));
statement.BindInt64(0, keyword_id);
statement.BindInt(1, max_count);
statement.BindInt64(1, age_threshold.ToInternalValue());
std::vector<KeywordSearchTermVisit> visits;
std::vector<NormalizedKeywordSearchTermVisit> visits;
while (statement.Step()) {
KeywordSearchTermVisit visit;
visit.term = statement.ColumnString16(0);
visit.normalized_term = statement.ColumnString16(1);
visit.visits = statement.ColumnInt(2);
visit.time = base::Time::FromInternalValue(statement.ColumnInt64(3));
NormalizedKeywordSearchTermVisit visit;
visit.normalized_term = statement.ColumnString16(0);
visit.visits = statement.ColumnInt(1);
visit.most_recent_visit_time =
base::Time::FromInternalValue(statement.ColumnInt64(2));
visits.push_back(visit);
}
return visits;
......
......@@ -18,6 +18,10 @@
class GURL;
namespace base {
class Time;
}
namespace sql {
class Database;
}
......@@ -26,6 +30,8 @@ namespace history {
struct KeywordSearchTermRow;
struct KeywordSearchTermVisit;
struct NormalizedKeywordSearchTermVisit;
class VisitDatabase; // For friend statement.
// Encapsulates an SQL database that holds URL info. This is a subset of the
......@@ -225,10 +231,12 @@ class URLDatabase {
int max_count,
std::vector<KeywordSearchTermVisit>* matches);
// Returns up to max_count of the most recent search terms.
std::vector<KeywordSearchTermVisit> GetMostRecentKeywordSearchTerms(
KeywordID keyword_id,
int max_count);
// Returns the most recent (i.e., no older than |age_threshold|) normalized
// search terms (i.e., search terms in lower case with whitespaces collapsed)
// for the specified keyword.
std::vector<NormalizedKeywordSearchTermVisit>
GetMostRecentNormalizedKeywordSearchTerms(KeywordID keyword_id,
base::Time age_threshold);
// Deletes all searches matching |term|.
bool DeleteKeywordSearchTerm(const base::string16& term);
......
......@@ -196,9 +196,10 @@ TEST_F(URLDatabaseTest, KeywordSearchTermVisit) {
ASSERT_EQ(1U, matches.size());
ASSERT_EQ(keyword, matches[0].term);
auto zero_prefix_matches = GetMostRecentKeywordSearchTerms(keyword_id, 10);
std::vector<NormalizedKeywordSearchTermVisit> zero_prefix_matches =
GetMostRecentNormalizedKeywordSearchTerms(
keyword_id, history::AutocompleteAgeThreshold());
ASSERT_EQ(1U, zero_prefix_matches.size());
ASSERT_EQ(keyword, zero_prefix_matches[0].term);
ASSERT_EQ(normalized_keyword, zero_prefix_matches[0].normalized_term);
KeywordSearchTermRow keyword_search_term_row;
......@@ -215,6 +216,10 @@ TEST_F(URLDatabaseTest, KeywordSearchTermVisit) {
GetMostRecentKeywordSearchTerms(keyword_id, keyword, 10, &matches);
ASSERT_EQ(0U, matches.size());
zero_prefix_matches = GetMostRecentNormalizedKeywordSearchTerms(
keyword_id, history::AutocompleteAgeThreshold());
ASSERT_EQ(0U, zero_prefix_matches.size());
ASSERT_FALSE(GetKeywordSearchTermRow(url_id, &keyword_search_term_row));
}
......
......@@ -4,6 +4,7 @@
#include "components/omnibox/browser/local_history_zero_suggest_provider.h"
#include <algorithm>
#include <set>
#include <string>
......@@ -217,34 +218,19 @@ void LocalHistoryZeroSuggestProvider::QueryURLDatabase(
return;
}
// Request 5x more search terms than the number of matches the provider
// intends to return hoping to have enough left once ineligible ones are
// filtered out.
const auto& results = url_db->GetMostRecentKeywordSearchTerms(
template_url_service->GetDefaultSearchProvider()->id(), max_matches_ * 5);
auto results = url_db->GetMostRecentNormalizedKeywordSearchTerms(
template_url_service->GetDefaultSearchProvider()->id(),
history::AutocompleteAgeThreshold());
// Used to filter out duplicate query suggestions.
std::set<base::string16> seen_suggestions_set;
std::sort(results.begin(), results.end(), [](const auto& a, const auto& b) {
return a.most_recent_visit_time > b.most_recent_visit_time;
});
int relevance = kLocalHistoryZeroSuggestRelevance;
size_t search_terms_seen_count = 0;
for (const auto& result : results) {
search_terms_seen_count++;
// Discard the result if it is not fresh enough.
if (result.time < history::AutocompleteAgeThreshold())
continue;
base::string16 search_terms = result.normalized_term;
if (search_terms.empty())
continue;
// Filter out duplicate query suggestions.
if (seen_suggestions_set.count(search_terms))
continue;
seen_suggestions_set.insert(search_terms);
SearchSuggestionParser::SuggestResult suggestion(
/*suggestion=*/search_terms, AutocompleteMatchType::SEARCH_HISTORY,
/*suggestion=*/result.normalized_term,
AutocompleteMatchType::SEARCH_HISTORY,
/*subtype_identifier=*/0, /*from_keyword=*/false, relevance--,
/*relevance_from_server=*/0,
/*input_text=*/base::ASCIIToUTF16(std::string()));
......@@ -263,8 +249,7 @@ void LocalHistoryZeroSuggestProvider::QueryURLDatabase(
}
UMA_HISTOGRAM_COUNTS_1000(
"Omnibox.LocalHistoryZeroSuggest.SearchTermsSeenCount",
search_terms_seen_count);
"Omnibox.LocalHistoryZeroSuggest.SearchTermsSeenCount", results.size());
UMA_HISTOGRAM_COUNTS_1000("Omnibox.LocalHistoryZeroSuggest.MaxMatchesCount",
max_matches_);
......
......@@ -442,10 +442,10 @@ TEST_F(LocalHistoryZeroSuggestProviderTest, Delete) {
ExpectMatches({{"hello world", 500}, {"not to be deleted", 499}});
// The keyword search terms database should be queried for the search terms
// submitted to the default search provider only; which are 4 unique search
// terms in this case.
// submitted to the default search provider only; which are 2 unique
// normalized search terms in this case.
histogram_tester.ExpectUniqueSample(
"Omnibox.LocalHistoryZeroSuggest.SearchTermsSeenCount", 4, 1);
"Omnibox.LocalHistoryZeroSuggest.SearchTermsSeenCount", 2, 1);
provider_->DeleteMatch(provider_->matches()[0]);
......@@ -479,16 +479,16 @@ TEST_F(LocalHistoryZeroSuggestProviderTest, Delete) {
// produce the deleted match are deleted.
history::URLDatabase* url_db =
client_->GetHistoryService()->InMemoryDatabase();
std::vector<history::KeywordSearchTermVisit> visits =
url_db->GetMostRecentKeywordSearchTerms(default_search_provider()->id(),
/*max_count=*/10);
std::vector<history::NormalizedKeywordSearchTermVisit> visits =
url_db->GetMostRecentNormalizedKeywordSearchTerms(
default_search_provider()->id(), history::AutocompleteAgeThreshold());
EXPECT_EQ(1U, visits.size());
EXPECT_EQ(base::ASCIIToUTF16("not to be deleted"), visits[0].term);
EXPECT_EQ(base::ASCIIToUTF16("not to be deleted"), visits[0].normalized_term);
// Make sure search terms from other search providers that would produce the
// deleted match are not deleted.
visits = url_db->GetMostRecentKeywordSearchTerms(other_search_provider->id(),
/*max_count=*/10);
visits = url_db->GetMostRecentNormalizedKeywordSearchTerms(
other_search_provider->id(), history::AutocompleteAgeThreshold());
EXPECT_EQ(1U, visits.size());
EXPECT_EQ(base::ASCIIToUTF16("hello world"), visits[0].term);
EXPECT_EQ(base::ASCIIToUTF16("hello world"), visits[0].normalized_term);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment