Commit 54b4106d authored by Moe Ahmadi's avatar Moe Ahmadi Committed by Commit Bot

[omnibox] Moves ranking of local history zero-prefix suggestions to C++

In order to support different ranking approaches for the local history
zero-prefix suggestions, this CL modifies the current implementation to
allow ranking the normalized search terms in C++.

To accomplish this, LocalHistoryZeroSuggestProvider now requests an
unordered list of rows grouped by |normalized_term| from the URL
database in which |visit_count| is the sum of all visit counts and
|last_visit_time| is the latest visit time for that normalized term.

It also simplifies the LocalHistoryZeroSuggestProvider implementation by
moving the freshness and validity checks for the normalized term into
the SQL query.

This change is not expected to alter the existing behavior as confirmed
by the unit tests.

Bug: 1096615
Change-Id: I01c29afff51f348e5c32766e4e53b3ff7687a3b2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2278625Reviewed-by: default avatarJustin Donnelly <jdonnelly@chromium.org>
Reviewed-by: default avatarScott Violet <sky@chromium.org>
Commit-Queue: Moe Ahmadi <mahmadi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#786019}
parent 4d74f7dc
...@@ -6,6 +6,9 @@ ...@@ -6,6 +6,9 @@
namespace history { namespace history {
NormalizedKeywordSearchTermVisit::~NormalizedKeywordSearchTermVisit() = default;
KeywordSearchTermVisit::KeywordSearchTermVisit() : visits(0) {} KeywordSearchTermVisit::KeywordSearchTermVisit() : visits(0) {}
KeywordSearchTermVisit::~KeywordSearchTermVisit() {} KeywordSearchTermVisit::~KeywordSearchTermVisit() {}
......
...@@ -12,6 +12,20 @@ ...@@ -12,6 +12,20 @@
namespace history { namespace history {
// NormalizedKeywordSearchTermVisit is returned by
// GetMostRecentNormalizedKeywordSearchTerms. It contains the time of the most
// recent visit and the visit count for the normalized search term aggregated
// from the keyword visits.
struct NormalizedKeywordSearchTermVisit {
NormalizedKeywordSearchTermVisit() = default;
~NormalizedKeywordSearchTermVisit();
base::string16 normalized_term; // The search term, in lower case and with
// extra whitespaces collapsed.
int visits{0}; // The visit count.
base::Time most_recent_visit_time; // The time of the most recent visit.
};
// KeywordSearchTermVisit is returned from GetMostRecentKeywordSearchTerms. It // KeywordSearchTermVisit is returned from GetMostRecentKeywordSearchTerms. It
// gives the time and search term of the keyword visit. // gives the time and search term of the keyword visit.
struct KeywordSearchTermVisit { struct KeywordSearchTermVisit {
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "base/stl_util.h" #include "base/stl_util.h"
#include "base/strings/string_util.h" #include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "components/history/core/browser/keyword_search_term.h" #include "components/history/core/browser/keyword_search_term.h"
#include "components/url_formatter/url_formatter.h" #include "components/url_formatter/url_formatter.h"
#include "sql/statement.h" #include "sql/statement.h"
...@@ -631,9 +632,10 @@ void URLDatabase::GetMostRecentKeywordSearchTerms( ...@@ -631,9 +632,10 @@ void URLDatabase::GetMostRecentKeywordSearchTerms(
} }
} }
std::vector<KeywordSearchTermVisit> std::vector<NormalizedKeywordSearchTermVisit>
URLDatabase::GetMostRecentKeywordSearchTerms(KeywordID keyword_id, URLDatabase::GetMostRecentNormalizedKeywordSearchTerms(
int max_count) { KeywordID keyword_id,
base::Time age_threshold) {
// NOTE: the keyword_id can be zero if on first run the user does a query // NOTE: the keyword_id can be zero if on first run the user does a query
// before the TemplateURLService has finished loading. As the chances of this // before the TemplateURLService has finished loading. As the chances of this
// occurring are small, we ignore it. // occurring are small, we ignore it.
...@@ -642,22 +644,23 @@ URLDatabase::GetMostRecentKeywordSearchTerms(KeywordID keyword_id, ...@@ -642,22 +644,23 @@ URLDatabase::GetMostRecentKeywordSearchTerms(KeywordID keyword_id,
sql::Statement statement(GetDB().GetCachedStatement( sql::Statement statement(GetDB().GetCachedStatement(
SQL_FROM_HERE, SQL_FROM_HERE,
"SELECT DISTINCT " "SELECT "
"kv.term, kv.normalized_term, u.visit_count, u.last_visit_time " "kv.normalized_term, SUM(u.visit_count), MAX(u.last_visit_time) "
"FROM keyword_search_terms kv JOIN urls u ON kv.url_id = u.id " "FROM keyword_search_terms kv JOIN urls u ON kv.url_id = u.id "
"WHERE kv.keyword_id = ? " "WHERE kv.keyword_id = ? AND u.last_visit_time > ? "
"ORDER BY u.last_visit_time DESC LIMIT ?")); "AND kv.normalized_term IS NOT NULL AND kv.normalized_term != \"\" "
"GROUP BY kv.normalized_term"));
statement.BindInt64(0, keyword_id); statement.BindInt64(0, keyword_id);
statement.BindInt(1, max_count); statement.BindInt64(1, age_threshold.ToInternalValue());
std::vector<KeywordSearchTermVisit> visits; std::vector<NormalizedKeywordSearchTermVisit> visits;
while (statement.Step()) { while (statement.Step()) {
KeywordSearchTermVisit visit; NormalizedKeywordSearchTermVisit visit;
visit.term = statement.ColumnString16(0); visit.normalized_term = statement.ColumnString16(0);
visit.normalized_term = statement.ColumnString16(1); visit.visits = statement.ColumnInt(1);
visit.visits = statement.ColumnInt(2); visit.most_recent_visit_time =
visit.time = base::Time::FromInternalValue(statement.ColumnInt64(3)); base::Time::FromInternalValue(statement.ColumnInt64(2));
visits.push_back(visit); visits.push_back(visit);
} }
return visits; return visits;
......
...@@ -18,6 +18,10 @@ ...@@ -18,6 +18,10 @@
class GURL; class GURL;
namespace base {
class Time;
}
namespace sql { namespace sql {
class Database; class Database;
} }
...@@ -26,6 +30,8 @@ namespace history { ...@@ -26,6 +30,8 @@ namespace history {
struct KeywordSearchTermRow; struct KeywordSearchTermRow;
struct KeywordSearchTermVisit; struct KeywordSearchTermVisit;
struct NormalizedKeywordSearchTermVisit;
class VisitDatabase; // For friend statement. class VisitDatabase; // For friend statement.
// Encapsulates an SQL database that holds URL info. This is a subset of the // Encapsulates an SQL database that holds URL info. This is a subset of the
...@@ -225,10 +231,12 @@ class URLDatabase { ...@@ -225,10 +231,12 @@ class URLDatabase {
int max_count, int max_count,
std::vector<KeywordSearchTermVisit>* matches); std::vector<KeywordSearchTermVisit>* matches);
// Returns up to max_count of the most recent search terms. // Returns the most recent (i.e., no older than |age_threshold|) normalized
std::vector<KeywordSearchTermVisit> GetMostRecentKeywordSearchTerms( // search terms (i.e., search terms in lower case with whitespaces collapsed)
KeywordID keyword_id, // for the specified keyword.
int max_count); std::vector<NormalizedKeywordSearchTermVisit>
GetMostRecentNormalizedKeywordSearchTerms(KeywordID keyword_id,
base::Time age_threshold);
// Deletes all searches matching |term|. // Deletes all searches matching |term|.
bool DeleteKeywordSearchTerm(const base::string16& term); bool DeleteKeywordSearchTerm(const base::string16& term);
......
...@@ -196,9 +196,10 @@ TEST_F(URLDatabaseTest, KeywordSearchTermVisit) { ...@@ -196,9 +196,10 @@ TEST_F(URLDatabaseTest, KeywordSearchTermVisit) {
ASSERT_EQ(1U, matches.size()); ASSERT_EQ(1U, matches.size());
ASSERT_EQ(keyword, matches[0].term); ASSERT_EQ(keyword, matches[0].term);
auto zero_prefix_matches = GetMostRecentKeywordSearchTerms(keyword_id, 10); std::vector<NormalizedKeywordSearchTermVisit> zero_prefix_matches =
GetMostRecentNormalizedKeywordSearchTerms(
keyword_id, history::AutocompleteAgeThreshold());
ASSERT_EQ(1U, zero_prefix_matches.size()); ASSERT_EQ(1U, zero_prefix_matches.size());
ASSERT_EQ(keyword, zero_prefix_matches[0].term);
ASSERT_EQ(normalized_keyword, zero_prefix_matches[0].normalized_term); ASSERT_EQ(normalized_keyword, zero_prefix_matches[0].normalized_term);
KeywordSearchTermRow keyword_search_term_row; KeywordSearchTermRow keyword_search_term_row;
...@@ -215,6 +216,10 @@ TEST_F(URLDatabaseTest, KeywordSearchTermVisit) { ...@@ -215,6 +216,10 @@ TEST_F(URLDatabaseTest, KeywordSearchTermVisit) {
GetMostRecentKeywordSearchTerms(keyword_id, keyword, 10, &matches); GetMostRecentKeywordSearchTerms(keyword_id, keyword, 10, &matches);
ASSERT_EQ(0U, matches.size()); ASSERT_EQ(0U, matches.size());
zero_prefix_matches = GetMostRecentNormalizedKeywordSearchTerms(
keyword_id, history::AutocompleteAgeThreshold());
ASSERT_EQ(0U, zero_prefix_matches.size());
ASSERT_FALSE(GetKeywordSearchTermRow(url_id, &keyword_search_term_row)); ASSERT_FALSE(GetKeywordSearchTermRow(url_id, &keyword_search_term_row));
} }
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include "components/omnibox/browser/local_history_zero_suggest_provider.h" #include "components/omnibox/browser/local_history_zero_suggest_provider.h"
#include <algorithm>
#include <set> #include <set>
#include <string> #include <string>
...@@ -217,34 +218,19 @@ void LocalHistoryZeroSuggestProvider::QueryURLDatabase( ...@@ -217,34 +218,19 @@ void LocalHistoryZeroSuggestProvider::QueryURLDatabase(
return; return;
} }
// Request 5x more search terms than the number of matches the provider auto results = url_db->GetMostRecentNormalizedKeywordSearchTerms(
// intends to return hoping to have enough left once ineligible ones are template_url_service->GetDefaultSearchProvider()->id(),
// filtered out. history::AutocompleteAgeThreshold());
const auto& results = url_db->GetMostRecentKeywordSearchTerms(
template_url_service->GetDefaultSearchProvider()->id(), max_matches_ * 5);
// Used to filter out duplicate query suggestions. std::sort(results.begin(), results.end(), [](const auto& a, const auto& b) {
std::set<base::string16> seen_suggestions_set; return a.most_recent_visit_time > b.most_recent_visit_time;
});
int relevance = kLocalHistoryZeroSuggestRelevance; int relevance = kLocalHistoryZeroSuggestRelevance;
size_t search_terms_seen_count = 0;
for (const auto& result : results) { for (const auto& result : results) {
search_terms_seen_count++;
// Discard the result if it is not fresh enough.
if (result.time < history::AutocompleteAgeThreshold())
continue;
base::string16 search_terms = result.normalized_term;
if (search_terms.empty())
continue;
// Filter out duplicate query suggestions.
if (seen_suggestions_set.count(search_terms))
continue;
seen_suggestions_set.insert(search_terms);
SearchSuggestionParser::SuggestResult suggestion( SearchSuggestionParser::SuggestResult suggestion(
/*suggestion=*/search_terms, AutocompleteMatchType::SEARCH_HISTORY, /*suggestion=*/result.normalized_term,
AutocompleteMatchType::SEARCH_HISTORY,
/*subtype_identifier=*/0, /*from_keyword=*/false, relevance--, /*subtype_identifier=*/0, /*from_keyword=*/false, relevance--,
/*relevance_from_server=*/0, /*relevance_from_server=*/0,
/*input_text=*/base::ASCIIToUTF16(std::string())); /*input_text=*/base::ASCIIToUTF16(std::string()));
...@@ -263,8 +249,7 @@ void LocalHistoryZeroSuggestProvider::QueryURLDatabase( ...@@ -263,8 +249,7 @@ void LocalHistoryZeroSuggestProvider::QueryURLDatabase(
} }
UMA_HISTOGRAM_COUNTS_1000( UMA_HISTOGRAM_COUNTS_1000(
"Omnibox.LocalHistoryZeroSuggest.SearchTermsSeenCount", "Omnibox.LocalHistoryZeroSuggest.SearchTermsSeenCount", results.size());
search_terms_seen_count);
UMA_HISTOGRAM_COUNTS_1000("Omnibox.LocalHistoryZeroSuggest.MaxMatchesCount", UMA_HISTOGRAM_COUNTS_1000("Omnibox.LocalHistoryZeroSuggest.MaxMatchesCount",
max_matches_); max_matches_);
......
...@@ -442,10 +442,10 @@ TEST_F(LocalHistoryZeroSuggestProviderTest, Delete) { ...@@ -442,10 +442,10 @@ TEST_F(LocalHistoryZeroSuggestProviderTest, Delete) {
ExpectMatches({{"hello world", 500}, {"not to be deleted", 499}}); ExpectMatches({{"hello world", 500}, {"not to be deleted", 499}});
// The keyword search terms database should be queried for the search terms // The keyword search terms database should be queried for the search terms
// submitted to the default search provider only; which are 4 unique search // submitted to the default search provider only; which are 2 unique
// terms in this case. // normalized search terms in this case.
histogram_tester.ExpectUniqueSample( histogram_tester.ExpectUniqueSample(
"Omnibox.LocalHistoryZeroSuggest.SearchTermsSeenCount", 4, 1); "Omnibox.LocalHistoryZeroSuggest.SearchTermsSeenCount", 2, 1);
provider_->DeleteMatch(provider_->matches()[0]); provider_->DeleteMatch(provider_->matches()[0]);
...@@ -479,16 +479,16 @@ TEST_F(LocalHistoryZeroSuggestProviderTest, Delete) { ...@@ -479,16 +479,16 @@ TEST_F(LocalHistoryZeroSuggestProviderTest, Delete) {
// produce the deleted match are deleted. // produce the deleted match are deleted.
history::URLDatabase* url_db = history::URLDatabase* url_db =
client_->GetHistoryService()->InMemoryDatabase(); client_->GetHistoryService()->InMemoryDatabase();
std::vector<history::KeywordSearchTermVisit> visits = std::vector<history::NormalizedKeywordSearchTermVisit> visits =
url_db->GetMostRecentKeywordSearchTerms(default_search_provider()->id(), url_db->GetMostRecentNormalizedKeywordSearchTerms(
/*max_count=*/10); default_search_provider()->id(), history::AutocompleteAgeThreshold());
EXPECT_EQ(1U, visits.size()); EXPECT_EQ(1U, visits.size());
EXPECT_EQ(base::ASCIIToUTF16("not to be deleted"), visits[0].term); EXPECT_EQ(base::ASCIIToUTF16("not to be deleted"), visits[0].normalized_term);
// Make sure search terms from other search providers that would produce the // Make sure search terms from other search providers that would produce the
// deleted match are not deleted. // deleted match are not deleted.
visits = url_db->GetMostRecentKeywordSearchTerms(other_search_provider->id(), visits = url_db->GetMostRecentNormalizedKeywordSearchTerms(
/*max_count=*/10); other_search_provider->id(), history::AutocompleteAgeThreshold());
EXPECT_EQ(1U, visits.size()); EXPECT_EQ(1U, visits.size());
EXPECT_EQ(base::ASCIIToUTF16("hello world"), visits[0].term); EXPECT_EQ(base::ASCIIToUTF16("hello world"), visits[0].normalized_term);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment