Commit b00a1356 authored by Moe Ahmadi's avatar Moe Ahmadi Committed by Commit Bot

Replaces lower_term with normalized_term in keyword search terms table

Replaces the lower_term column in the keyword search terms table which
contains search term in lower case, with a new normalized_term column
containing the search term in lower case with the whitespaces collapsed
which will be used as a key to delete entries in addition to prefix search.

Depending on whether a search is initiated from the omnibox or the web,
the search term stored in this table may or may not have its whitespaces
collapsed. Since both of the following searches result in the same
zero-prefix suggestion, the new column can be used as a key to delete
both entries.

Omnibox search:
https://www.google.com/search?q=WEEKLY+NEWS&oq=WEEKLY+++NEWS+&sourceid=chrome...

Web search:
https://www.google.com/search?q=WEEKLY+++NEWS+++&oq=WEEKLY+++NEWS+++&...

Bug: 996516
Change-Id: I7ae5bd6a6992480b3e394499a9b94ca11d1fc8ca
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1832903
Reviewed-by: default avatarScott Violet <sky@chromium.org>
Commit-Queue: Moe Ahmadi <mahmadi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#703869}
parent 6e985945
......@@ -170,6 +170,7 @@ bundle_data("unit_tests_bundle_data") {
"//components/test/data/history/history.38.sql",
"//components/test/data/history/history.39.sql",
"//components/test/data/history/history.40.sql",
"//components/test/data/history/history.41.sql",
"//components/test/data/history/thumbnail_wild/Favicons.corrupt_meta.disable",
"//components/test/data/history/thumbnail_wild/Favicons.v2.init.sql",
"//components/test/data/history/thumbnail_wild/Favicons.v3.init.sql",
......
......@@ -27,6 +27,7 @@
#include "base/bind.h"
#include "base/format_macros.h"
#include "base/guid.h"
#include "base/i18n/case_conversion.h"
#include "base/run_loop.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
......@@ -36,6 +37,7 @@
#include "components/history/core/browser/download_row.h"
#include "components/history/core/browser/history_constants.h"
#include "components/history/core/browser/history_database.h"
#include "components/history/core/browser/keyword_search_term.h"
#include "components/history/core/browser/page_usage_data.h"
#include "components/history/core/test/history_backend_db_base_test.h"
#include "components/history/core/test/test_history_database.h"
......@@ -1699,6 +1701,44 @@ TEST_F(HistoryBackendDBTest,
EXPECT_FALSE(visit_row.incremented_omnibox_typed_score);
}
// Tests that the migration code correctly replaces the lower_term column in the
// keyword search terms table which normalized_term which contains the
// normalized search term during migration to version 42.
TEST_F(HistoryBackendDBTest, MigrateKeywordSearchTerms) {
ASSERT_NO_FATAL_FAILURE(CreateDBVersion(41));
const KeywordID keyword_id = 12;
const URLID url_id = 34;
const base::string16 term = base::ASCIIToUTF16("WEEKLY NEWS ");
const base::string16 lower_term = base::i18n::ToLower(term);
const base::string16 normalized_term =
base::CollapseWhitespace(lower_term, false);
sql::Database db;
ASSERT_TRUE(db.Open(history_dir_.Append(kHistoryFilename)));
sql::Statement insert_statement(
db.GetUniqueStatement("INSERT INTO keyword_search_terms (keyword_id, "
"url_id, lower_term, term) VALUES (?,?,?,?)"));
insert_statement.BindInt64(0, keyword_id);
insert_statement.BindInt64(1, url_id);
insert_statement.BindString16(2, lower_term);
insert_statement.BindString16(3, term);
ASSERT_TRUE(insert_statement.Run());
// Re-open the db, triggering migration.
CreateBackendAndDatabase();
// The version should have been updated.
ASSERT_GE(HistoryDatabase::GetCurrentVersion(), 42);
history::KeywordSearchTermRow keyword_search_term_row;
ASSERT_TRUE(db_->GetKeywordSearchTermRow(url_id, &keyword_search_term_row));
EXPECT_EQ(keyword_id, keyword_search_term_row.keyword_id);
EXPECT_EQ(url_id, keyword_search_term_row.url_id);
EXPECT_EQ(term, keyword_search_term_row.term);
EXPECT_EQ(normalized_term, keyword_search_term_row.normalized_term);
}
// Test to verify the left-over typed_url sync metadata gets cleared correctly
// during migration to version 41.
TEST_F(HistoryBackendDBTest, MigrateTypedURLLeftoverMetadata) {
......
......@@ -37,7 +37,7 @@ namespace {
// Current version number. We write databases at the "current" version number,
// but any previous version that can read the "compatible" one can make do with
// our database without *too* many bad effects.
const int kCurrentVersionNumber = 41;
const int kCurrentVersionNumber = 42;
const int kCompatibleVersionNumber = 16;
const char kEarlyExpirationThresholdKey[] = "early_expiration_threshold";
......@@ -129,7 +129,6 @@ sql::InitStatus HistoryDatabase::Init(const base::FilePath& history_name) {
!InitSegmentTables() || !InitSyncTable())
return LogInitFailure(InitStep::CREATE_TABLES);
CreateMainURLIndex();
CreateKeywordSearchTermsIndices();
// TODO(benjhayden) Remove at some point.
meta_table_.DeleteKey("next_download_id");
......@@ -295,7 +294,6 @@ bool HistoryDatabase::RecreateAllTablesButURL() {
if (!InitSegmentTables())
return false;
CreateKeywordSearchTermsIndices();
return true;
}
......@@ -586,6 +584,13 @@ sql::InitStatus HistoryDatabase::EnsureCurrentVersion() {
meta_table_.SetVersionNumber(cur_version);
}
if (cur_version == 41) {
if (!MigrateKeywordsSearchTermsLowerTermColumn())
return LogMigrationFailure(41);
cur_version++;
meta_table_.SetVersionNumber(cur_version);
}
// ========================= ^^ new migration code goes here ^^
// ADDING NEW MIGRATION CODE
// =========================
......
......@@ -54,7 +54,6 @@ bool InMemoryDatabase::InitFromScratch() {
// InitDB doesn't create the index so in the disk-loading case, it can be
// added afterwards.
CreateMainURLIndex();
CreateKeywordSearchTermsIndices();
return true;
}
......@@ -139,7 +138,6 @@ bool InMemoryDatabase::InitFromDisk(const base::FilePath& history_name) {
// Index the table, this is faster than creating the index first and then
// inserting into it.
CreateMainURLIndex();
CreateKeywordSearchTermsIndices();
return true;
}
......
......@@ -13,6 +13,9 @@ KeywordSearchTermVisit::~KeywordSearchTermVisit() {}
KeywordSearchTermRow::KeywordSearchTermRow() : keyword_id(0), url_id(0) {}
KeywordSearchTermRow::KeywordSearchTermRow(const KeywordSearchTermRow& other) =
default;
KeywordSearchTermRow::~KeywordSearchTermRow() {}
} // namespace history
......@@ -19,6 +19,8 @@ struct KeywordSearchTermVisit {
~KeywordSearchTermVisit();
base::string16 term; // The search term that was used.
base::string16 normalized_term; // The search term, in lower case and with
// extra whitespaces collapsed.
int visits; // The visit count.
base::Time time; // The time of the most recent visit.
};
......@@ -26,11 +28,14 @@ struct KeywordSearchTermVisit {
// Used for URLs that have a search term associated with them.
struct KeywordSearchTermRow {
KeywordSearchTermRow();
KeywordSearchTermRow(const KeywordSearchTermRow& other);
~KeywordSearchTermRow();
KeywordID keyword_id; // ID of the keyword.
URLID url_id; // ID of the url.
base::string16 term; // The search term that was used.
base::string16 normalized_term; // The search term, in lower case and with
// extra whitespaces collapsed.
};
} // namespace history
......
......@@ -71,6 +71,55 @@ void URLDatabase::FillURLRow(const sql::Statement& s, URLRow* i) {
i->set_hidden(s.ColumnInt(6) != 0);
}
bool URLDatabase::MigrateKeywordsSearchTermsLowerTermColumn() {
// Create a temporary keyword search terms table.
if (!GetDB().Execute(
"CREATE TABLE temp_keyword_search_terms ("
"keyword_id INTEGER NOT NULL," // ID of the TemplateURL.
"url_id INTEGER NOT NULL," // ID of the url.
"term LONGVARCHAR NOT NULL," // The actual search term.
// The search term, in lower case, and with whitespaces collapsed.
"normalized_term LONGVARCHAR NOT NULL)")) {
return false;
}
// Extract rows from the keyword search terms table, convert lower_term to
// normalized_term, and insert them into the temporary table.
sql::Statement select_statement(
GetDB().GetCachedStatement(SQL_FROM_HERE,
"SELECT keyword_id, url_id, lower_term, term "
"FROM keyword_search_terms"));
while (select_statement.Step()) {
sql::Statement insert_statement(GetDB().GetCachedStatement(
SQL_FROM_HERE,
"INSERT INTO temp_keyword_search_terms "
"(keyword_id, url_id, term, normalized_term) VALUES (?,?,?,?)"));
insert_statement.BindInt64(0, select_statement.ColumnInt64(0));
insert_statement.BindInt64(1, select_statement.ColumnInt64(1));
insert_statement.BindString16(2, select_statement.ColumnString16(3));
insert_statement.BindString16(
3, base::CollapseWhitespace(select_statement.ColumnString16(2), false));
if (!insert_statement.Run())
return false;
}
if (!select_statement.Succeeded())
return false;
// Replace the keyword search terms table with the temporary one.
if (!GetDB().Execute("DROP TABLE keyword_search_terms"))
return false;
if (!GetDB().Execute("ALTER TABLE temp_keyword_search_terms RENAME TO "
"keyword_search_terms")) {
return false;
}
// Index the table, this is faster than creating the index first and then
// inserting into it.
CreateKeywordSearchTermsIndices();
return true;
}
bool URLDatabase::GetURLRow(URLID url_id, URLRow* info) {
// TODO(brettw) We need check for empty URLs to handle the case where
// there are old URLs in the database that are empty that got in before
......@@ -412,12 +461,16 @@ bool URLDatabase::GetTextMatchesWithAlgorithm(
bool URLDatabase::InitKeywordSearchTermsTable() {
has_keyword_search_terms_ = true;
if (!GetDB().DoesTableExist("keyword_search_terms")) {
if (!GetDB().Execute("CREATE TABLE keyword_search_terms ("
"keyword_id INTEGER NOT NULL," // ID of the TemplateURL.
"url_id INTEGER NOT NULL," // ID of the url.
"lower_term LONGVARCHAR NOT NULL," // The search term, in lower case.
"term LONGVARCHAR NOT NULL)")) // The actual search term.
if (!GetDB().Execute(
"CREATE TABLE keyword_search_terms ("
"keyword_id INTEGER NOT NULL," // ID of the TemplateURL.
"url_id INTEGER NOT NULL," // ID of the url.
"term LONGVARCHAR NOT NULL," // The actual search term.
// The search term, in lower case, and with whitespaces collapsed.
"normalized_term LONGVARCHAR NOT NULL)") ||
!CreateKeywordSearchTermsIndices()) {
return false;
}
}
return true;
}
......@@ -426,7 +479,7 @@ bool URLDatabase::CreateKeywordSearchTermsIndices() {
// For searching.
if (!GetDB().Execute(
"CREATE INDEX IF NOT EXISTS keyword_search_terms_index1 ON "
"keyword_search_terms (keyword_id, lower_term)")) {
"keyword_search_terms (keyword_id, normalized_term)")) {
return false;
}
......@@ -468,21 +521,25 @@ bool URLDatabase::SetKeywordSearchTermsForURL(URLID url_id,
if (!exist_statement.Succeeded())
return false;
sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
"INSERT INTO keyword_search_terms (keyword_id, url_id, lower_term, term) "
"VALUES (?,?,?,?)"));
sql::Statement statement(GetDB().GetCachedStatement(
SQL_FROM_HERE,
"INSERT INTO keyword_search_terms (keyword_id, url_id, term, "
"normalized_term) VALUES (?,?,?,?)"));
statement.BindInt64(0, keyword_id);
statement.BindInt64(1, url_id);
statement.BindString16(2, base::i18n::ToLower(term));
statement.BindString16(3, term);
statement.BindString16(2, term);
statement.BindString16(
3, base::i18n::ToLower(base::CollapseWhitespace(term, false)));
return statement.Run();
}
bool URLDatabase::GetKeywordSearchTermRow(URLID url_id,
KeywordSearchTermRow* row) {
DCHECK(url_id);
sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
"SELECT keyword_id, term FROM keyword_search_terms WHERE url_id=?"));
sql::Statement statement(
GetDB().GetCachedStatement(SQL_FROM_HERE,
"SELECT keyword_id, term, normalized_term "
"FROM keyword_search_terms WHERE url_id=?"));
statement.BindInt64(0, url_id);
if (!statement.Step())
......@@ -492,6 +549,7 @@ bool URLDatabase::GetKeywordSearchTermRow(URLID url_id,
row->url_id = url_id;
row->keyword_id = statement.ColumnInt64(0);
row->term = statement.ColumnString16(1);
row->normalized_term = statement.ColumnString16(2);
}
return true;
}
......@@ -499,8 +557,10 @@ bool URLDatabase::GetKeywordSearchTermRow(URLID url_id,
bool URLDatabase::GetKeywordSearchTermRows(
const base::string16& term,
std::vector<KeywordSearchTermRow>* rows) {
sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
"SELECT keyword_id, url_id FROM keyword_search_terms WHERE term=?"));
sql::Statement statement(
GetDB().GetCachedStatement(SQL_FROM_HERE,
"SELECT keyword_id, url_id, normalized_term "
"FROM keyword_search_terms WHERE term=?"));
statement.BindString16(0, term);
if (!statement.is_valid())
......@@ -511,6 +571,7 @@ bool URLDatabase::GetKeywordSearchTermRows(
row.url_id = statement.ColumnInt64(1);
row.keyword_id = statement.ColumnInt64(0);
row.term = term;
row.normalized_term = statement.ColumnInt64(2);
rows->push_back(row);
}
return true;
......@@ -538,20 +599,24 @@ void URLDatabase::GetMostRecentKeywordSearchTerms(
return;
DCHECK(!prefix.empty());
sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
sql::Statement statement(GetDB().GetCachedStatement(
SQL_FROM_HERE,
"SELECT DISTINCT kv.term, u.visit_count, u.last_visit_time "
"FROM keyword_search_terms kv "
"JOIN urls u ON kv.url_id = u.id "
"WHERE kv.keyword_id = ? AND kv.lower_term >= ? AND kv.lower_term < ? "
"WHERE kv.keyword_id = ? AND kv.normalized_term >= ? AND "
"kv.normalized_term < ? "
"ORDER BY u.last_visit_time DESC LIMIT ?"));
// NOTE: Keep this ToLower() call in sync with search_provider.cc.
base::string16 lower_prefix = base::i18n::ToLower(prefix);
// NOTE: Keep these CollapseWhitespace() and ToLower() calls in sync with
// search_provider.cc.
base::string16 normalized_prefix =
base::CollapseWhitespace(base::i18n::ToLower(prefix), false);
// This magic gives us a prefix search.
base::string16 next_prefix = lower_prefix;
base::string16 next_prefix = normalized_prefix;
next_prefix.back() = next_prefix.back() + 1;
statement.BindInt64(0, keyword_id);
statement.BindString16(1, lower_prefix);
statement.BindString16(1, normalized_prefix);
statement.BindString16(2, next_prefix);
statement.BindInt(3, max_count);
......@@ -564,6 +629,38 @@ void URLDatabase::GetMostRecentKeywordSearchTerms(
}
}
std::vector<KeywordSearchTermVisit>
URLDatabase::GetMostRecentKeywordSearchTerms(KeywordID keyword_id,
int max_count) {
// NOTE: the keyword_id can be zero if on first run the user does a query
// before the TemplateURLService has finished loading. As the chances of this
// occurring are small, we ignore it.
if (!keyword_id)
return {};
sql::Statement statement(GetDB().GetCachedStatement(
SQL_FROM_HERE,
"SELECT DISTINCT "
"kv.term, kv.normalized_term, u.visit_count, u.last_visit_time "
"FROM keyword_search_terms kv JOIN urls u ON kv.url_id = u.id "
"WHERE kv.keyword_id = ? "
"ORDER BY u.last_visit_time DESC LIMIT ?"));
statement.BindInt64(0, keyword_id);
statement.BindInt(1, max_count);
std::vector<KeywordSearchTermVisit> visits;
while (statement.Step()) {
KeywordSearchTermVisit visit;
visit.term = statement.ColumnString16(0);
visit.normalized_term = statement.ColumnString16(1);
visit.visits = statement.ColumnInt(2);
visit.time = base::Time::FromInternalValue(statement.ColumnInt64(3));
visits.push_back(visit);
}
return visits;
}
bool URLDatabase::DeleteKeywordSearchTerm(const base::string16& term) {
sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
"DELETE FROM keyword_search_terms WHERE term=?"));
......
......@@ -225,6 +225,11 @@ class URLDatabase {
int max_count,
std::vector<KeywordSearchTermVisit>* matches);
// Returns up to max_count of the most recent search terms.
std::vector<KeywordSearchTermVisit> GetMostRecentKeywordSearchTerms(
KeywordID keyword_id,
int max_count);
// Deletes all searches matching |term|.
bool DeleteKeywordSearchTerm(const base::string16& term);
......@@ -299,6 +304,11 @@ class URLDatabase {
// this class implements these functions to return its objects.
virtual sql::Database& GetDB() = 0;
// Replaces the lower_term column in the keyword search terms table with
// normalized_term which contains the search term, in lower case, and with
// whitespaces collapsed for migration to version 42.
bool MigrateKeywordsSearchTermsLowerTermColumn();
private:
// True if InitKeywordSearchTermsTable() has been invoked. Not all subclasses
// have keyword search terms.
......
......@@ -185,15 +185,22 @@ TEST_F(URLDatabaseTest, KeywordSearchTermVisit) {
// Add a keyword visit.
KeywordID keyword_id = 100;
base::string16 keyword = base::UTF8ToUTF16("visit");
base::string16 keyword = base::UTF8ToUTF16(" VISIT ");
base::string16 normalized_keyword = base::UTF8ToUTF16("visit");
ASSERT_TRUE(SetKeywordSearchTermsForURL(url_id, keyword_id, keyword));
// Make sure we get it back.
std::vector<KeywordSearchTermVisit> matches;
GetMostRecentKeywordSearchTerms(keyword_id, keyword, 10, &matches);
GetMostRecentKeywordSearchTerms(keyword_id, base::UTF8ToUTF16("vi"), 10,
&matches);
ASSERT_EQ(1U, matches.size());
ASSERT_EQ(keyword, matches[0].term);
auto zero_prefix_matches = GetMostRecentKeywordSearchTerms(keyword_id, 10);
ASSERT_EQ(1U, zero_prefix_matches.size());
ASSERT_EQ(keyword, zero_prefix_matches[0].term);
ASSERT_EQ(normalized_keyword, zero_prefix_matches[0].normalized_term);
KeywordSearchTermRow keyword_search_term_row;
ASSERT_TRUE(GetKeywordSearchTermRow(url_id, &keyword_search_term_row));
EXPECT_EQ(keyword_id, keyword_search_term_row.keyword_id);
......
PRAGMA foreign_keys=OFF;
BEGIN TRANSACTION;
CREATE TABLE meta(key LONGVARCHAR NOT NULL UNIQUE PRIMARY KEY, value LONGVARCHAR);
INSERT INTO meta VALUES('mmap_status','-1');
INSERT INTO meta VALUES('version','41');
INSERT INTO meta VALUES('last_compatible_version','16');
CREATE TABLE urls(id INTEGER PRIMARY KEY AUTOINCREMENT,url LONGVARCHAR,title LONGVARCHAR,visit_count INTEGER DEFAULT 0 NOT NULL,typed_count INTEGER DEFAULT 0 NOT NULL,last_visit_time INTEGER NOT NULL,hidden INTEGER DEFAULT 0 NOT NULL);
CREATE TABLE visits(id INTEGER PRIMARY KEY,url INTEGER NOT NULL,visit_time INTEGER NOT NULL,from_visit INTEGER,transition INTEGER DEFAULT 0 NOT NULL,segment_id INTEGER,visit_duration INTEGER DEFAULT 0 NOT NULL,incremented_omnibox_typed_score BOOLEAN DEFAULT FALSE NOT NULL);
CREATE TABLE visit_source(id INTEGER PRIMARY KEY,source INTEGER NOT NULL);
CREATE TABLE keyword_search_terms (keyword_id INTEGER NOT NULL,url_id INTEGER NOT NULL,lower_term LONGVARCHAR NOT NULL,term LONGVARCHAR NOT NULL);
CREATE TABLE downloads (id INTEGER PRIMARY KEY,guid VARCHAR NOT NULL,current_path LONGVARCHAR NOT NULL,target_path LONGVARCHAR NOT NULL,start_time INTEGER NOT NULL,received_bytes INTEGER NOT NULL,total_bytes INTEGER NOT NULL,state INTEGER NOT NULL,danger_type INTEGER NOT NULL,interrupt_reason INTEGER NOT NULL,hash BLOB NOT NULL,end_time INTEGER NOT NULL,opened INTEGER NOT NULL,last_access_time INTEGER NOT NULL,transient INTEGER NOT NULL,referrer VARCHAR NOT NULL,site_url VARCHAR NOT NULL,tab_url VARCHAR NOT NULL,tab_referrer_url VARCHAR NOT NULL,http_method VARCHAR NOT NULL,by_ext_id VARCHAR NOT NULL,by_ext_name VARCHAR NOT NULL,etag VARCHAR NOT NULL,last_modified VARCHAR NOT NULL,mime_type VARCHAR(255) NOT NULL,original_mime_type VARCHAR(255) NOT NULL);
CREATE TABLE downloads_url_chains (id INTEGER NOT NULL,chain_index INTEGER NOT NULL,url LONGVARCHAR NOT NULL, PRIMARY KEY (id, chain_index) );
CREATE TABLE downloads_slices (download_id INTEGER NOT NULL,offset INTEGER NOT NULL,received_bytes INTEGER NOT NULL,finished INTEGER NOT NULL DEFAULT 0,PRIMARY KEY (download_id, offset) );
CREATE TABLE segments (id INTEGER PRIMARY KEY,name VARCHAR,url_id INTEGER NON NULL);
CREATE TABLE segment_usage (id INTEGER PRIMARY KEY,segment_id INTEGER NOT NULL,time_slot INTEGER NOT NULL,visit_count INTEGER DEFAULT 0 NOT NULL);
CREATE TABLE typed_url_sync_metadata (storage_key INTEGER PRIMARY KEY NOT NULL,value BLOB);
DELETE FROM sqlite_sequence;
CREATE INDEX visits_url_index ON visits (url);
CREATE INDEX visits_from_index ON visits (from_visit);
CREATE INDEX visits_time_index ON visits (visit_time);
CREATE INDEX segments_name ON segments(name);
CREATE INDEX segments_url_id ON segments(url_id);
CREATE INDEX segment_usage_time_slot_segment_id ON segment_usage(time_slot, segment_id);
CREATE INDEX segments_usage_seg_id ON segment_usage(segment_id);
CREATE INDEX urls_url_index ON urls (url);
CREATE INDEX keyword_search_terms_index1 ON keyword_search_terms (keyword_id, lower_term);
CREATE INDEX keyword_search_terms_index2 ON keyword_search_terms (url_id);
CREATE INDEX keyword_search_terms_index3 ON keyword_search_terms (term);
COMMIT;
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment