Commit 42cd4404 authored by Jan Krcal's avatar Jan Krcal Committed by Commit Bot

[Typed URLs] Migrate the history DB to clear orphanaged metadata

This CL performs a one-off clean-up in the typed_url_sync_metadata table
in the history DB. A previous bug in the code (from M64 Stable) caused
that metadata for expired URLs stayed in the table. This CL removes such
orphanaged metadata.

Bug: 827111
Change-Id: If200517752298b889b79fbfe2b5371c4489b4773
Reviewed-on: https://chromium-review.googlesource.com/1078227
Commit-Queue: Jan Krcal <jkrcal@chromium.org>
Reviewed-by: default avatarSylvain Defresne <sdefresne@chromium.org>
Reviewed-by: default avatarMikel Astiz <mastiz@chromium.org>
Cr-Commit-Position: refs/heads/master@{#566401}
parent cfbd764e
......@@ -176,6 +176,7 @@ bundle_data("unit_tests_bundle_data") {
"//components/test/data/history/history.32.sql",
"//components/test/data/history/history.38.sql",
"//components/test/data/history/history.39.sql",
"//components/test/data/history/history.40.sql",
"//components/test/data/history/thumbnail_wild/Favicons.corrupt_meta.disable",
"//components/test/data/history/thumbnail_wild/Favicons.v2.init.sql",
"//components/test/data/history/thumbnail_wild/Favicons.v3.init.sql",
......
......@@ -1698,6 +1698,118 @@ TEST_F(HistoryBackendDBTest,
EXPECT_FALSE(visit_row.incremented_omnibox_typed_score);
}
// Test to verify the left-over typed_url sync metadata gets cleared correctly
// during migration to version 41.
TEST_F(HistoryBackendDBTest, MigrateTypedURLLeftoverMetadata) {
ASSERT_NO_FATAL_FAILURE(CreateDBVersion(40));
// Define common uninteresting data for visits.
const VisitID referring_visit = 0;
const ui::PageTransition transition = ui::PAGE_TRANSITION_TYPED;
const base::Time visit_time(base::Time::Now());
const base::TimeDelta visit_duration(base::TimeDelta::FromSeconds(30));
// The first visit has both a DB entry and a metadata entry.
const VisitID visit_id1 = 1;
const URLID url_id1 = 10;
const SegmentID segment_id1 = 20;
const std::string metadata_value1 = "BLOB1";
// The second one as well has both a DB entry and a metadata entry.
const VisitID visit_id2 = 2;
const URLID url_id2 = 11;
const SegmentID segment_id2 = 21;
const std::string metadata_value2 = "BLOB2";
// The second visit has only a left-over metadata entry.
const URLID url_id3 = 12;
const std::string metadata_value3 = "BLOB3";
{
// Open the db for manual manipulation.
sql::Connection db;
ASSERT_TRUE(db.Open(history_dir_.Append(kHistoryFilename)));
const char kInsertVisitStatement[] =
"INSERT INTO visits "
"(id, url, visit_time, from_visit, transition, segment_id, "
"visit_duration) VALUES (?, ?, ?, ?, ?, ?, ?)";
{
sql::Statement s(db.GetUniqueStatement(kInsertVisitStatement));
s.BindInt64(0, visit_id1);
s.BindInt64(1, url_id1);
s.BindInt64(2, visit_time.ToDeltaSinceWindowsEpoch().InMicroseconds());
s.BindInt64(3, referring_visit);
s.BindInt64(4, transition);
s.BindInt64(5, segment_id1);
s.BindInt64(6, visit_duration.InMicroseconds());
ASSERT_TRUE(s.Run());
}
{
sql::Statement s(db.GetUniqueStatement(kInsertVisitStatement));
s.BindInt64(0, visit_id2);
s.BindInt64(1, url_id2);
s.BindInt64(2, visit_time.ToDeltaSinceWindowsEpoch().InMicroseconds());
s.BindInt64(3, referring_visit);
s.BindInt64(4, transition);
s.BindInt64(5, segment_id2);
s.BindInt64(6, visit_duration.InMicroseconds());
ASSERT_TRUE(s.Run());
}
const char kInsertMetadataStatement[] =
"INSERT INTO typed_url_sync_metadata (storage_key, value) VALUES (?, "
"?)";
{
sql::Statement s(db.GetUniqueStatement(kInsertMetadataStatement));
s.BindInt64(0, url_id3);
s.BindString(1, metadata_value3);
ASSERT_TRUE(s.Run());
}
{
sql::Statement s(db.GetUniqueStatement(kInsertMetadataStatement));
s.BindInt64(0, url_id2);
s.BindString(1, metadata_value2);
ASSERT_TRUE(s.Run());
}
{
sql::Statement s(db.GetUniqueStatement(kInsertMetadataStatement));
s.BindInt64(0, url_id1);
s.BindString(1, metadata_value1);
ASSERT_TRUE(s.Run());
}
}
// Re-open the db, triggering migration.
CreateBackendAndDatabase();
DeleteBackend();
{
// Re-open the db for manual manipulation.
sql::Connection db;
ASSERT_TRUE(db.Open(history_dir_.Append(kHistoryFilename)));
{
// The version should have been updated.
sql::Statement s(db.GetUniqueStatement(
"SELECT value FROM meta WHERE key = 'version'"));
ASSERT_GE(HistoryDatabase::GetCurrentVersion(), 41);
EXPECT_TRUE(s.Step());
EXPECT_EQ(HistoryDatabase::GetCurrentVersion(), s.ColumnInt(0));
}
{
// Check that the left-over metadata entry is deleted.
sql::Statement s(db.GetUniqueStatement(
"SELECT storage_key FROM typed_url_sync_metadata"));
std::set<URLID> remaining_metadata;
while (s.Step()) {
remaining_metadata.insert(s.ColumnInt64(0));
}
EXPECT_EQ(remaining_metadata.count(url_id3), 0u);
EXPECT_EQ(remaining_metadata.count(url_id2), 1u);
EXPECT_EQ(remaining_metadata.count(url_id1), 1u);
}
}
}
bool FilterURL(const GURL& url) {
return url.SchemeIsHTTPOrHTTPS();
}
......
......@@ -38,7 +38,7 @@ namespace {
// Current version number. We write databases at the "current" version number,
// but any previous version that can read the "compatible" one can make do with
// our database without *too* many bad effects.
const int kCurrentVersionNumber = 40;
const int kCurrentVersionNumber = 41;
const int kCompatibleVersionNumber = 16;
const char kEarlyExpirationThresholdKey[] = "early_expiration_threshold";
const int kMaxHostsInMemory = 10000;
......@@ -607,6 +607,18 @@ sql::InitStatus HistoryDatabase::EnsureCurrentVersion() {
meta_table_.SetVersionNumber(cur_version);
}
if (cur_version == 40) {
std::vector<URLID> visited_url_rowids_sorted;
if (!GetAllVisitedURLRowidsForMigrationToVersion40(
&visited_url_rowids_sorted) ||
!CleanTypedURLOrphanedMetadataForMigrationToVersion40(
visited_url_rowids_sorted)) {
return LogMigrationFailure(40);
}
cur_version++;
meta_table_.SetVersionNumber(cur_version);
}
// ========================= ^^ new migration code goes here ^^
// ADDING NEW MIGRATION CODE
// =========================
......
......@@ -116,6 +116,49 @@ bool TypedURLSyncMetadataDatabase::InitSyncTable() {
return true;
}
bool TypedURLSyncMetadataDatabase::
CleanTypedURLOrphanedMetadataForMigrationToVersion40(
const std::vector<URLID>& sorted_valid_rowids) {
DCHECK(
std::is_sorted(sorted_valid_rowids.begin(), sorted_valid_rowids.end()));
std::vector<URLID> invalid_metadata_rowids;
auto valid_rowids_iter = sorted_valid_rowids.begin();
sql::Statement sorted_metadata_rowids(GetDB().GetUniqueStatement(
"SELECT storage_key FROM typed_url_sync_metadata ORDER BY storage_key"));
while (sorted_metadata_rowids.Step()) {
URLID metadata_rowid = sorted_metadata_rowids.ColumnInt64(0);
// Both collections are sorted, we check whether |metadata_rowid| is valid
// by iterating both at the same time.
// First, skip all valid IDs that are omitted in |sorted_metadata_rowids|.
while (valid_rowids_iter != sorted_valid_rowids.end() &&
*valid_rowids_iter < metadata_rowid) {
valid_rowids_iter++;
}
// Now, is |metadata_rowid| invalid?
if (valid_rowids_iter == sorted_valid_rowids.end() ||
*valid_rowids_iter != metadata_rowid) {
invalid_metadata_rowids.push_back(metadata_rowid);
}
}
if (!sorted_metadata_rowids.Succeeded()) {
return false;
}
for (const URLID& rowid : invalid_metadata_rowids) {
sql::Statement del(GetDB().GetCachedStatement(
SQL_FROM_HERE,
"DELETE FROM typed_url_sync_metadata WHERE storage_key=?"));
del.BindInt64(0, rowid);
if (!del.Run())
return false;
}
return true;
}
bool TypedURLSyncMetadataDatabase::GetAllSyncEntityMetadata(
syncer::MetadataBatch* metadata_batch) {
DCHECK(metadata_batch);
......
......@@ -5,6 +5,8 @@
#ifndef COMPONENTS_HISTORY_CORE_BROWSER_TYPED_URL_SYNC_METADATA_DATABASE_H_
#define COMPONENTS_HISTORY_CORE_BROWSER_TYPED_URL_SYNC_METADATA_DATABASE_H_
#include <vector>
#include "base/macros.h"
#include "components/history/core/browser/url_row.h"
#include "components/sync/base/model_type.h"
......@@ -59,6 +61,13 @@ class TypedURLSyncMetadataDatabase : public syncer::SyncMetadataStore {
// and indices are properly set up. Must be called before anything else.
bool InitSyncTable();
// Cleans up orphaned metadata for typed URLs, i.e. deletes all metadata
// entries for rowids not present in |sorted_valid_rowids| (which must be
// sorted in ascending order). Returns true if the clean up finishes without
// any DB error.
bool CleanTypedURLOrphanedMetadataForMigrationToVersion40(
const std::vector<URLID>& sorted_valid_rowids);
private:
// Read all sync_pb::EntityMetadata for typed URL and fill
// |metadata_records| with it.
......
......@@ -720,4 +720,16 @@ bool VisitDatabase::MigrateVisitsWithoutIncrementedOmniboxTypedScore() {
return true;
}
bool VisitDatabase::GetAllVisitedURLRowidsForMigrationToVersion40(
std::vector<URLID>* visited_url_rowids_sorted) {
DCHECK(visited_url_rowids_sorted);
sql::Statement statement(GetDB().GetUniqueStatement(
"SELECT DISTINCT url FROM visits ORDER BY url"));
while (statement.Step()) {
visited_url_rowids_sorted->push_back(statement.ColumnInt64(0));
}
return statement.Succeeded();
}
} // namespace history
......@@ -227,6 +227,10 @@ class VisitDatabase {
// don't have incremented_omnibox_typed_score column yet.
bool MigrateVisitsWithoutIncrementedOmniboxTypedScore();
// A subprocedure in the process of migration to version 40.
bool GetAllVisitedURLRowidsForMigrationToVersion40(
std::vector<URLID>* visited_url_rowids_sorted);
private:
DISALLOW_COPY_AND_ASSIGN(VisitDatabase);
......
PRAGMA foreign_keys=OFF;
BEGIN TRANSACTION;
CREATE TABLE meta(key LONGVARCHAR NOT NULL UNIQUE PRIMARY KEY, value LONGVARCHAR);
INSERT INTO meta VALUES('mmap_status','-1');
INSERT INTO meta VALUES('version','40');
INSERT INTO meta VALUES('last_compatible_version','16');
CREATE TABLE urls(id INTEGER PRIMARY KEY AUTOINCREMENT,url LONGVARCHAR,title LONGVARCHAR,visit_count INTEGER DEFAULT 0 NOT NULL,typed_count INTEGER DEFAULT 0 NOT NULL,last_visit_time INTEGER NOT NULL,hidden INTEGER DEFAULT 0 NOT NULL);
CREATE TABLE visits(id INTEGER PRIMARY KEY,url INTEGER NOT NULL,visit_time INTEGER NOT NULL,from_visit INTEGER,transition INTEGER DEFAULT 0 NOT NULL,segment_id INTEGER,visit_duration INTEGER DEFAULT 0 NOT NULL,incremented_omnibox_typed_score BOOLEAN DEFAULT FALSE NOT NULL);
CREATE TABLE visit_source(id INTEGER PRIMARY KEY,source INTEGER NOT NULL);
CREATE TABLE keyword_search_terms (keyword_id INTEGER NOT NULL,url_id INTEGER NOT NULL,lower_term LONGVARCHAR NOT NULL,term LONGVARCHAR NOT NULL);
CREATE TABLE downloads (id INTEGER PRIMARY KEY,guid VARCHAR NOT NULL,current_path LONGVARCHAR NOT NULL,target_path LONGVARCHAR NOT NULL,start_time INTEGER NOT NULL,received_bytes INTEGER NOT NULL,total_bytes INTEGER NOT NULL,state INTEGER NOT NULL,danger_type INTEGER NOT NULL,interrupt_reason INTEGER NOT NULL,hash BLOB NOT NULL,end_time INTEGER NOT NULL,opened INTEGER NOT NULL,last_access_time INTEGER NOT NULL,transient INTEGER NOT NULL,referrer VARCHAR NOT NULL,site_url VARCHAR NOT NULL,tab_url VARCHAR NOT NULL,tab_referrer_url VARCHAR NOT NULL,http_method VARCHAR NOT NULL,by_ext_id VARCHAR NOT NULL,by_ext_name VARCHAR NOT NULL,etag VARCHAR NOT NULL,last_modified VARCHAR NOT NULL,mime_type VARCHAR(255) NOT NULL,original_mime_type VARCHAR(255) NOT NULL);
CREATE TABLE downloads_url_chains (id INTEGER NOT NULL,chain_index INTEGER NOT NULL,url LONGVARCHAR NOT NULL, PRIMARY KEY (id, chain_index) );
CREATE TABLE downloads_slices (download_id INTEGER NOT NULL,offset INTEGER NOT NULL,received_bytes INTEGER NOT NULL,finished INTEGER DEFAULT 0 NOT NULL,PRIMARY KEY (download_id, offset) );
CREATE TABLE segments (id INTEGER PRIMARY KEY,name VARCHAR,url_id INTEGER NON NULL);
CREATE TABLE segment_usage (id INTEGER PRIMARY KEY,segment_id INTEGER NOT NULL,time_slot INTEGER NOT NULL,visit_count INTEGER DEFAULT 0 NOT NULL);
CREATE TABLE typed_url_sync_metadata (storage_key INTEGER PRIMARY KEY NOT NULL,value BLOB);
DELETE FROM sqlite_sequence;
CREATE INDEX visits_url_index ON visits (url);
CREATE INDEX visits_from_index ON visits (from_visit);
CREATE INDEX visits_time_index ON visits (visit_time);
CREATE INDEX segments_name ON segments(name);
CREATE INDEX segments_url_id ON segments(url_id);
CREATE INDEX segment_usage_time_slot_segment_id ON segment_usage(time_slot, segment_id);
CREATE INDEX segments_usage_seg_id ON segment_usage(segment_id);
CREATE INDEX urls_url_index ON urls (url);
CREATE INDEX keyword_search_terms_index1 ON keyword_search_terms (keyword_id, lower_term);
CREATE INDEX keyword_search_terms_index2 ON keyword_search_terms (url_id);
CREATE INDEX keyword_search_terms_index3 ON keyword_search_terms (term);
COMMIT;
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment