Commit d0adc363 authored by mvrable@chromium.org's avatar mvrable@chromium.org

[Activity log] Make database writes in counting policy more robust

Rework the way that new actions are logged in the counting policy so
that it properly handles the case where there are duplicate records that
weren't properly coalesced.  (Duplicate records could arise from
clearing URL history, for example.)

This comes at the cost of additional queries--an update always takes 2
queries, instead 1 or 2 as before.  However, benchmarking shows the cost
to be not much different (worst case was ~5% slower, other cases were
indistinguishable or even a bit faster).

BUG=279465

Review URL: https://chromiumcodereview.appspot.com/23907004

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@221860 0039d316-1c4b-4281-b951-d872f2087c98
parent b5b9bfcc
......@@ -223,23 +223,35 @@ bool CountingPolicy::FlushDatabase(sql::Connection* db) {
if (!transaction.Begin())
return false;
// Adding an Action to the database is a two step process that depends on
// whether the count on an existing row can be incremented or a new row needs
// to be inserted.
// 1. Run the query in locate_str to search for a row which matches and can
// have the count incremented.
// 2a. If found, increment the count using update_str and the rowid found in
// step 1, or
// 2b. If not found, insert a new row using insert_str.
std::string locate_str =
"SELECT rowid FROM " + std::string(kTableName) +
" WHERE time >= ? AND time < ?";
std::string insert_str =
"INSERT INTO " + std::string(kTableName) + "(count, time";
std::string update_str =
"UPDATE " + std::string(kTableName) +
" SET count = count + ?, time = max(?, time)"
" WHERE time >= ? AND time < ?";
" WHERE rowid = ?";
for (size_t i = 0; i < arraysize(matched_columns); i++) {
locate_str = base::StringPrintf(
"%s AND %s IS ?", locate_str.c_str(), matched_columns[i]);
insert_str =
base::StringPrintf("%s, %s", insert_str.c_str(), matched_columns[i]);
update_str = base::StringPrintf(
"%s AND %s IS ?", update_str.c_str(), matched_columns[i]);
}
insert_str += ") VALUES (?, ?";
for (size_t i = 0; i < arraysize(matched_columns); i++) {
insert_str += ", ?";
}
locate_str += " ORDER BY time DESC LIMIT 1";
insert_str += ")";
for (ActionQueue::iterator i = queue.begin(); i != queue.end(); ++i) {
......@@ -315,14 +327,12 @@ bool CountingPolicy::FlushDatabase(sql::Connection* db) {
matched_values.push_back(-1);
}
// Assume there is an existing row for this action, and try to update the
// count.
sql::Statement update_statement(db->GetCachedStatement(
sql::StatementID(SQL_FROM_HERE), update_str.c_str()));
update_statement.BindInt(0, count);
update_statement.BindInt64(1, action.time().ToInternalValue());
update_statement.BindInt64(2, day_start.ToInternalValue());
update_statement.BindInt64(3, next_day.ToInternalValue());
// Search for a matching row for this action whose count can be
// incremented.
sql::Statement locate_statement(db->GetCachedStatement(
sql::StatementID(SQL_FROM_HERE), locate_str.c_str()));
locate_statement.BindInt64(0, day_start.ToInternalValue());
locate_statement.BindInt64(1, next_day.ToInternalValue());
for (size_t j = 0; j < matched_values.size(); j++) {
// A call to BindNull when matched_values contains -1 is likely not
// necessary as parameters default to null before they are explicitly
......@@ -330,23 +340,23 @@ bool CountingPolicy::FlushDatabase(sql::Connection* db) {
// ever comes with some values already bound, we bind all parameters
// (even null ones) explicitly.
if (matched_values[j] == -1)
update_statement.BindNull(j + 4);
locate_statement.BindNull(j + 2);
else
update_statement.BindInt64(j + 4, matched_values[j]);
locate_statement.BindInt64(j + 2, matched_values[j]);
}
if (locate_statement.Step()) {
// A matching row was found. Update the count and time.
int64 rowid = locate_statement.ColumnInt64(0);
sql::Statement update_statement(db->GetCachedStatement(
sql::StatementID(SQL_FROM_HERE), update_str.c_str()));
update_statement.BindInt(0, count);
update_statement.BindInt64(1, action.time().ToInternalValue());
update_statement.BindInt64(2, rowid);
if (!update_statement.Run())
return false;
// Check if the update succeeded (was the count of updated rows non-zero)?
// If it failed because no matching row existed, fall back to inserting a
// new record.
if (db->GetLastChangeCount() > 0) {
if (db->GetLastChangeCount() > 1) {
LOG(WARNING) << "Found and updated multiple rows in the activity log "
<< "database; counts may be off!";
}
continue;
}
} else if (locate_statement.Succeeded()) {
// No matching row was found, so we need to insert one.
sql::Statement insert_statement(db->GetCachedStatement(
sql::StatementID(SQL_FROM_HERE), insert_str.c_str()));
insert_statement.BindInt(0, count);
......@@ -359,6 +369,10 @@ bool CountingPolicy::FlushDatabase(sql::Connection* db) {
}
if (!insert_statement.Run())
return false;
} else {
// Database error.
return false;
}
}
if (clean_database) {
......
......@@ -14,7 +14,12 @@
namespace extensions {
// A policy for logging the stream of actions, but without arguments.
// A policy for logging the stream of actions, but with most arguments stripped
// out (to improve privacy and reduce database size) and with multiple
// identical rows combined together using a count column to track the total
// number of repetitions. Identical rows within the same day are merged, but
// actions on separate days are kept distinct. Data is kept for up to a few
// days then deleted.
class CountingPolicy : public ActivityLogDatabasePolicy {
public:
explicit CountingPolicy(Profile* profile);
......
......@@ -256,6 +256,15 @@ class CountingPolicyTest : public testing::Test {
"[\"vamoose\"]", "", "", "", 1);
}
static void CheckDuplicates(scoped_ptr<Action::ActionVector> actions) {
ASSERT_EQ(2u, actions->size());
int total_count = 0;
for (size_t i = 0; i < actions->size(); i++) {
total_count += actions->at(i)->count();
}
ASSERT_EQ(3, total_count);
}
static void CheckAction(const Action& action,
const std::string& expected_id,
const Action::ActionType& expected_type,
......@@ -991,4 +1000,44 @@ TEST_F(CountingPolicyTest, DeleteActions) {
policy->Close();
}
// Tests that duplicate rows in the activity log database are handled properly
// when updating counts.
TEST_F(CountingPolicyTest, DuplicateRows) {
CountingPolicy* policy = new CountingPolicy(profile_.get());
base::SimpleTestClock* mock_clock = new base::SimpleTestClock();
mock_clock->SetNow(base::Time::Now().LocalMidnight() +
base::TimeDelta::FromHours(12));
policy->SetClockForTesting(scoped_ptr<base::Clock>(mock_clock));
// Record two actions with distinct URLs.
scoped_refptr<Action> action;
action = new Action(
"punky", mock_clock->Now(), Action::ACTION_API_CALL, "brewster");
action->set_page_url(GURL("http://www.google.com"));
policy->ProcessAction(action);
action = new Action(
"punky", mock_clock->Now(), Action::ACTION_API_CALL, "brewster");
action->set_page_url(GURL("http://www.google.co.uk"));
policy->ProcessAction(action);
// Manipulate the database to clear the URLs, so that we end up with
// duplicate rows.
std::vector<GURL> no_url_restrictions;
policy->RemoveURLs(no_url_restrictions);
// Record one more action, with no URL. This should increment the count on
// one, and exactly one, of the existing rows.
action = new Action(
"punky", mock_clock->Now(), Action::ACTION_API_CALL, "brewster");
policy->ProcessAction(action);
CheckReadData(
policy,
"punky",
0,
base::Bind(&CountingPolicyTest::CheckDuplicates));
policy->Close();
}
} // namespace extensions
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment