Commit 8c95d713 authored by atwilson@chromium.org's avatar atwilson@chromium.org

Limit size of typed url node.

Now, we cap the # visits for each node to 100 items, to avoid hitting the 10K
node size limit on the server. We also ignore all reload visits, since those
are not used by the omnibox suggestion algorithm and tend to be the bulk of
the visits in the large nodes we've encountered.

BUG=89460
TEST=Run unit tests.


Review URL: http://codereview.chromium.org/7550062

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@96287 0039d316-1c4b-4281-b951-d872f2087c98
parent 8f1124e5
......@@ -18,6 +18,10 @@
namespace browser_sync {
// The server backend can't handle arbitrarily large node sizes, so to keep
// the size under control we limit the visit array.
static const int kMaxTypedUrlVisits = 100;
const char kTypedUrlTag[] = "google_chrome_typed_urls";
static bool CheckVisitOrdering(const history::VisitVector& visits) {
......@@ -537,23 +541,80 @@ void TypedUrlModelAssociator::WriteToSyncNode(
const history::URLRow& url,
const history::VisitVector& visits,
sync_api::WriteNode* node) {
sync_pb::TypedUrlSpecifics typed_url;
WriteToTypedUrlSpecifics(url, visits, &typed_url);
node->SetTypedUrlSpecifics(typed_url);
}
void TypedUrlModelAssociator::WriteToTypedUrlSpecifics(
const history::URLRow& url,
const history::VisitVector& visits,
sync_pb::TypedUrlSpecifics* typed_url) {
DCHECK(!url.last_visit().is_null());
DCHECK(!visits.empty());
DCHECK(url.last_visit() == visits.back().visit_time);
sync_pb::TypedUrlSpecifics typed_url;
typed_url.set_url(url.url().spec());
typed_url.set_title(UTF16ToUTF8(url.title()));
typed_url.set_hidden(url.hidden());
typed_url->set_url(url.url().spec());
typed_url->set_title(UTF16ToUTF8(url.title()));
typed_url->set_hidden(url.hidden());
DCHECK(CheckVisitOrdering(visits));
bool only_typed = false;
int skip_count = 0;
if (visits.size() > static_cast<size_t>(kMaxTypedUrlVisits)) {
int typed_count = 0;
int total = 0;
// Walk the passed-in visit vector and count the # of typed visits.
for (history::VisitVector::const_iterator visit = visits.begin();
visit != visits.end(); ++visit) {
PageTransition::Type transition = static_cast<PageTransition::Type>(
visit->transition) & PageTransition::CORE_MASK;
// We ignore reload visits.
if (transition == PageTransition::RELOAD)
continue;
++total;
if (transition == PageTransition::TYPED)
++typed_count;
}
if (typed_count > kMaxTypedUrlVisits) {
only_typed = true;
skip_count = typed_count - kMaxTypedUrlVisits;
} else if (total > kMaxTypedUrlVisits) {
skip_count = total - kMaxTypedUrlVisits;
}
}
for (history::VisitVector::const_iterator visit = visits.begin();
visit != visits.end(); ++visit) {
typed_url.add_visits(visit->visit_time.ToInternalValue());
typed_url.add_visit_transitions(visit->transition);
PageTransition::Type transition = static_cast<PageTransition::Type>(
visit->transition) & PageTransition::CORE_MASK;
// Skip reload visits.
if (transition == PageTransition::RELOAD)
continue;
// If we only have room for typed visits, then only add typed visits.
if (only_typed && transition != PageTransition::TYPED)
continue;
if (skip_count > 0) {
// We have too many entries to fit, so we need to skip the oldest ones.
// Only skip typed URLs if there are too many typed URLs to fit.
if (only_typed || transition != PageTransition::TYPED) {
--skip_count;
continue;
}
}
typed_url->add_visits(visit->visit_time.ToInternalValue());
typed_url->add_visit_transitions(visit->transition);
}
node->SetTypedUrlSpecifics(typed_url);
DCHECK_EQ(skip_count, 0);
CHECK_GT(typed_url->visits_size(), 0);
CHECK_LE(typed_url->visits_size(), kMaxTypedUrlVisits);
}
// static
......
......@@ -149,6 +149,12 @@ class TypedUrlModelAssociator
std::vector<history::VisitInfo>* new_visits,
history::VisitVector* removed_visits);
// Converts the passed URL information to a TypedUrlSpecifics structure for
// writing to the sync DB
static void WriteToTypedUrlSpecifics(const history::URLRow& url,
const history::VisitVector& visits,
sync_pb::TypedUrlSpecifics* specifics);
// Updates the passed |url_row| based on the values in |specifics|. Fields
// that are not contained in |specifics| (such as typed_count) are left
// unchanged.
......
......@@ -29,7 +29,8 @@ class TypedUrlModelAssociatorTest : public testing::Test {
base::Time::FromInternalValue(last_visit));
history_url.set_hidden(hidden);
visits->push_back(history::VisitRow(
history_url.id(), history_url.last_visit(), 0, 0, 0));
history_url.id(), history_url.last_visit(), 0,
PageTransition::RELOAD, 0));
history_url.set_visit_count(visits->size());
return history_url;
}
......@@ -234,3 +235,79 @@ TEST_F(TypedUrlModelAssociatorTest, DiffVisitsAdd) {
EXPECT_EQ(new_visits[c].second, PageTransition::TYPED);
}
}
static history::VisitRow CreateVisit(PageTransition::Type type,
int64 timestamp) {
return history::VisitRow(0, base::Time::FromInternalValue(timestamp), 0,
type, 0);
}
TEST_F(TypedUrlModelAssociatorTest, WriteTypedUrlSpecifics) {
history::VisitVector visits;
visits.push_back(CreateVisit(PageTransition::TYPED, 1));
visits.push_back(CreateVisit(PageTransition::RELOAD, 2));
visits.push_back(CreateVisit(PageTransition::LINK, 3));
history::URLRow url(MakeTypedUrlRow("http://pie.com/", "pie",
1, 100, false, &visits));
sync_pb::TypedUrlSpecifics typed_url;
TypedUrlModelAssociator::WriteToTypedUrlSpecifics(url, visits, &typed_url);
// RELOAD visits should be removed.
EXPECT_EQ(2, typed_url.visits_size());
EXPECT_EQ(typed_url.visit_transitions_size(), typed_url.visits_size());
EXPECT_EQ(1, typed_url.visits(0));
EXPECT_EQ(3, typed_url.visits(1));
EXPECT_EQ(PageTransition::TYPED,
static_cast<PageTransition::Type>(typed_url.visit_transitions(0)));
EXPECT_EQ(PageTransition::LINK,
static_cast<PageTransition::Type>(typed_url.visit_transitions(1)));
}
TEST_F(TypedUrlModelAssociatorTest, TooManyVisits) {
history::VisitVector visits;
int64 timestamp = 1000;
visits.push_back(CreateVisit(PageTransition::TYPED, timestamp++));
for (int i = 0 ; i < 100; ++i) {
visits.push_back(CreateVisit(PageTransition::LINK, timestamp++));
}
history::URLRow url(MakeTypedUrlRow("http://pie.com/", "pie",
1, timestamp++, false, &visits));
sync_pb::TypedUrlSpecifics typed_url;
TypedUrlModelAssociator::WriteToTypedUrlSpecifics(url, visits, &typed_url);
// # visits should be capped at 100.
EXPECT_EQ(100, typed_url.visits_size());
EXPECT_EQ(typed_url.visit_transitions_size(), typed_url.visits_size());
EXPECT_EQ(1000, typed_url.visits(0));
// Visit with timestamp of 1001 should be omitted since we should have
// skipped that visit to stay under the cap.
EXPECT_EQ(1002, typed_url.visits(1));
EXPECT_EQ(PageTransition::TYPED,
static_cast<PageTransition::Type>(typed_url.visit_transitions(0)));
EXPECT_EQ(PageTransition::LINK,
static_cast<PageTransition::Type>(typed_url.visit_transitions(1)));
}
TEST_F(TypedUrlModelAssociatorTest, TooManyTypedVisits) {
history::VisitVector visits;
int64 timestamp = 1000;
for (int i = 0 ; i < 102; ++i) {
visits.push_back(CreateVisit(PageTransition::TYPED, timestamp++));
visits.push_back(CreateVisit(PageTransition::LINK, timestamp++));
visits.push_back(CreateVisit(PageTransition::RELOAD, timestamp++));
}
history::URLRow url(MakeTypedUrlRow("http://pie.com/", "pie",
1, timestamp++, false, &visits));
sync_pb::TypedUrlSpecifics typed_url;
TypedUrlModelAssociator::WriteToTypedUrlSpecifics(url, visits, &typed_url);
// # visits should be capped at 100.
EXPECT_EQ(100, typed_url.visits_size());
EXPECT_EQ(typed_url.visit_transitions_size(), typed_url.visits_size());
// First two typed visits should be skipped.
EXPECT_EQ(1006, typed_url.visits(0));
// Ensure there are no non-typed visits since that's all that should fit.
for (int i = 0; i < typed_url.visits_size(); ++i) {
EXPECT_EQ(PageTransition::TYPED, static_cast<PageTransition::Type>(
typed_url.visit_transitions(i)));
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment