Commit c164ede5 authored by Mikel Astiz's avatar Mikel Astiz Committed by Commit Bot

Handle duplicate bookmark GUIDs in remote data

Server-side data can theoretically contain duplicate GUIDs, and the end
result today is a crash.

It is hard to deal with this case or even reason about what a reasonable
behavior is, so this patch instead detects and resolves the issue in a
preprocessing stage, such that offending GUIDs are ignored.

Doing so allows further refactorings to simplify the merge logic.

Bug: 978430
Change-Id: I4926bfe88aee2724633a447b0ed9ce086358b439
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1939974
Commit-Queue: Mikel Astiz <mastiz@chromium.org>
Reviewed-by: default avatarMohamed Amir Yosef <mamir@chromium.org>
Reviewed-by: default avatarMarc Treib <treib@chromium.org>
Cr-Commit-Position: refs/heads/master@{#720217}
parent f863cc78
......@@ -6,7 +6,6 @@
#include <stdint.h>
#include <algorithm>
#include <map>
#include <utility>
#include <vector>
......@@ -36,31 +35,6 @@ namespace syncer {
namespace {
bool ContainsDuplicate(std::vector<std::string> values) {
std::sort(values.begin(), values.end());
return std::adjacent_find(values.begin(), values.end()) != values.end();
}
bool ContainsDuplicateClientTagHash(const UpdateResponseDataList& updates) {
std::vector<std::string> raw_client_tag_hashes;
for (const std::unique_ptr<UpdateResponseData>& update : updates) {
DCHECK(update);
if (!update->entity->client_tag_hash.value().empty()) {
raw_client_tag_hashes.push_back(update->entity->client_tag_hash.value());
}
}
return ContainsDuplicate(std::move(raw_client_tag_hashes));
}
bool ContainsDuplicateServerID(const UpdateResponseDataList& updates) {
std::vector<std::string> server_ids;
for (const std::unique_ptr<UpdateResponseData>& update : updates) {
DCHECK(update);
server_ids.push_back(update->entity->id);
}
return ContainsDuplicate(std::move(server_ids));
}
// Enumeration of possible values for the positioning schemes used in Sync
// entities. Used in UMA metrics. Do not re-order or delete these entries; they
// are used in a UMA histogram. Please edit SyncPositioningScheme in enums.xml
......@@ -482,19 +456,16 @@ void ModelTypeWorker::ApplyPendingUpdates() {
<< base::StringPrintf("Delivering %" PRIuS " applicable updates.",
pending_updates_.size());
// Having duplicates should be rare, so only do the de-duping if
// we've actually detected one.
// Deduplicate updates first based on server ids.
if (ContainsDuplicateServerID(pending_updates_)) {
DeduplicatePendingUpdatesBasedOnServerId();
}
// Deduplicate updates first based on server ids, which is the only legit
// source of duplicates, specially due to pagination.
DeduplicatePendingUpdatesBasedOnServerId();
// Check for duplicate client tag hashes after removing duplicate server
// ids, and deduplicate updates based on client tag hashes if necessary.
if (ContainsDuplicateClientTagHash(pending_updates_)) {
DeduplicatePendingUpdatesBasedOnClientTagHash();
}
// As extra precaution, and although it shouldn't be necessary without a
// misbehaving server, deduplicate based on client tags and originator item
// IDs. This allows further code to use DCHECKs without relying on external
// behavior.
DeduplicatePendingUpdatesBasedOnClientTagHash();
DeduplicatePendingUpdatesBasedOnOriginatorClientItemId();
int num_updates_applied = pending_updates_.size();
model_type_processor_->OnUpdateReceived(model_type_state_,
......@@ -690,6 +661,7 @@ void ModelTypeWorker::DecryptStoredEntities() {
void ModelTypeWorker::DeduplicatePendingUpdatesBasedOnServerId() {
UpdateResponseDataList candidates;
pending_updates_.swap(candidates);
pending_updates_.reserve(candidates.size());
std::map<std::string, size_t> id_to_index;
for (std::unique_ptr<UpdateResponseData>& candidate : candidates) {
......@@ -716,6 +688,7 @@ void ModelTypeWorker::DeduplicatePendingUpdatesBasedOnServerId() {
void ModelTypeWorker::DeduplicatePendingUpdatesBasedOnClientTagHash() {
UpdateResponseDataList candidates;
pending_updates_.swap(candidates);
pending_updates_.reserve(candidates.size());
std::map<ClientTagHash, size_t> tag_to_index;
for (std::unique_ptr<UpdateResponseData>& candidate : candidates) {
......@@ -741,6 +714,36 @@ void ModelTypeWorker::DeduplicatePendingUpdatesBasedOnClientTagHash() {
}
}
void ModelTypeWorker::DeduplicatePendingUpdatesBasedOnOriginatorClientItemId() {
UpdateResponseDataList candidates;
pending_updates_.swap(candidates);
pending_updates_.reserve(candidates.size());
std::map<std::string, size_t> id_to_index;
for (std::unique_ptr<UpdateResponseData>& candidate : candidates) {
DCHECK(candidate);
// Items with empty item ID just get passed through (which is the case for
// all datatypes except bookmarks).
if (candidate->entity->originator_client_item_id.empty()) {
pending_updates_.push_back(std::move(candidate));
continue;
}
// Try to insert. If we already saw an item with the same originator item
// ID, this will fail but give us its iterator.
auto it_and_success = id_to_index.emplace(
candidate->entity->originator_client_item_id, pending_updates_.size());
if (it_and_success.second) {
// New item ID, append at the end. Note that we already inserted the
// correct index (|pending_updates_.size()|) above.
pending_updates_.push_back(std::move(candidate));
} else {
// Duplicate! Overwrite the existing item.
size_t existing_index = it_and_success.first->second;
pending_updates_[existing_index] = std::move(candidate);
}
}
}
// static
bool ModelTypeWorker::DecryptSpecifics(const Cryptographer& cryptographer,
const sync_pb::EntitySpecifics& in,
......
......@@ -205,6 +205,11 @@ class ModelTypeWorker : public UpdateHandler,
// tag hash. It discards all of them except the last one.
void DeduplicatePendingUpdatesBasedOnClientTagHash();
// Filters our duplicate updates from |pending_updates_| based on the
// originator item ID (in practice used for bookmarks only). It discards all
// of them except the last one.
void DeduplicatePendingUpdatesBasedOnOriginatorClientItemId();
ModelType type_;
DataTypeDebugInfoEmitter* debug_info_emitter_;
......
......@@ -864,11 +864,10 @@ TEST_F(ModelTypeWorkerTest, ReceiveUpdates_MultipleDuplicateHashes) {
EXPECT_EQ(kValue3, result[2]->entity->specifics.preference().value());
}
// Covers the scenario where two updates have the same client tag hash but
// different server IDs. This scenario is considered a bug on the server.
TEST_F(ModelTypeWorkerTest,
ReceiveUpdates_DuplicateClientTagHashesForDistinctServerIds) {
// This is testing that in a a scenario where two updates are having the same
// client tag hashes and different server ids, the proper UMA metrics are
// emitted. This scenario is considered a bug on the server.
NormalInitialize();
// First create two entities with different tags, so they get assigned
......@@ -897,6 +896,44 @@ TEST_F(ModelTypeWorkerTest,
EXPECT_EQ(entity2.id_string(), result[0]->entity->id);
}
// Covers the scenario where two updates have the same originator client item ID
// but different server IDs. This scenario is considered a bug on the server.
TEST_F(ModelTypeWorkerTest,
ReceiveUpdates_DuplicateOriginatorClientIdForDistinctServerIds) {
const std::string kOriginatorClientItemId = "itemid";
const std::string kURL1 = "http://url1";
const std::string kURL2 = "http://url2";
const std::string kServerId1 = "serverid1";
const std::string kServerId2 = "serverid2";
NormalInitialize();
sync_pb::SyncEntity entity1;
sync_pb::SyncEntity entity2;
// Generate two entities with the same originator client item ID.
entity1.set_id_string(kServerId1);
entity2.set_id_string(kServerId2);
entity1.mutable_specifics()->mutable_bookmark()->set_url(kURL1);
entity2.mutable_specifics()->mutable_bookmark()->set_url(kURL2);
entity1.set_originator_client_item_id(kOriginatorClientItemId);
entity2.set_originator_client_item_id(kOriginatorClientItemId);
worker()->ProcessGetUpdatesResponse(
server()->GetProgress(), server()->GetContext(), {&entity1, &entity2},
status_controller());
ApplyUpdates();
// Make sure the first update has been discarded.
ASSERT_EQ(1u, processor()->GetNumUpdateResponses());
std::vector<const UpdateResponseData*> result =
processor()->GetNthUpdateResponse(0);
ASSERT_EQ(1u, result.size());
ASSERT_TRUE(result[0]);
EXPECT_EQ(kURL2, result[0]->entity->specifics.bookmark().url());
}
// Test that an update download coming in multiple parts gets accumulated into
// one call to the processor.
TEST_F(ModelTypeWorkerTest, ReceiveMultiPartUpdates) {
......@@ -1902,6 +1939,7 @@ TEST_F(ModelTypeWorkerBookmarksTest, CanDecryptUpdateWithMissingBookmarkGUID) {
sync_pb::SyncEntity entity;
entity.mutable_specifics()->mutable_bookmark()->set_url("www.foo.com");
entity.mutable_specifics()->mutable_bookmark()->set_title("Title");
entity.set_id_string("testserverid");
entity.set_originator_client_item_id(kGuid1);
*entity.mutable_unique_position() =
UniquePosition::InitialPosition(UniquePosition::RandomSuffix()).ToProto();
......@@ -1950,6 +1988,7 @@ TEST_F(ModelTypeWorkerBookmarksTest,
sync_pb::SyncEntity entity;
entity.mutable_specifics()->mutable_bookmark()->set_url("www.foo.com");
entity.mutable_specifics()->mutable_bookmark()->set_title("Title");
entity.set_id_string("testserverid");
entity.set_originator_client_item_id(kInvalidOCII);
*entity.mutable_unique_position() =
UniquePosition::InitialPosition(UniquePosition::RandomSuffix()).ToProto();
......@@ -1996,6 +2035,7 @@ TEST_F(ModelTypeWorkerBookmarksTest,
// Generate specifics without a GUID.
sync_pb::SyncEntity entity;
entity.mutable_specifics()->mutable_bookmark();
entity.set_id_string("testserverid");
entity.set_originator_client_item_id(kGuid1);
*entity.mutable_unique_position() =
UniquePosition::InitialPosition(UniquePosition::RandomSuffix()).ToProto();
......@@ -2037,6 +2077,7 @@ TEST_F(ModelTypeWorkerBookmarksTest,
// originator_client_item_id.
sync_pb::SyncEntity entity;
entity.mutable_specifics()->mutable_bookmark();
entity.set_id_string("testserverid");
entity.set_originator_client_item_id(kInvalidOCII);
*entity.mutable_unique_position() =
UniquePosition::InitialPosition(UniquePosition::RandomSuffix()).ToProto();
......
......@@ -107,11 +107,6 @@ class BookmarkModelMerger {
void ProcessLocalCreation(const bookmarks::BookmarkNode* parent,
size_t index);
// Gets the bookmark node corresponding to a permanent folder identified by
// |server_defined_unique_tag|.
const bookmarks::BookmarkNode* GetPermanentFolder(
const std::string& server_defined_unique_tag) const;
// Looks for a local node under |local_parent| that matches |remote_node|,
// starting at index |local_child_start_index|. First attempts to find a match
// by GUID and otherwise attempts to find one by semantics. If no match is
......
......@@ -23,6 +23,7 @@
using testing::_;
using testing::Eq;
using testing::Ne;
using testing::NotNull;
using testing::UnorderedElementsAre;
......@@ -64,6 +65,7 @@ std::unique_ptr<syncer::UpdateResponseData> CreateUpdateResponseData(
auto data = std::make_unique<syncer::EntityData>();
data->id = server_id;
data->originator_client_item_id = *guid;
data->parent_id = parent_id;
data->unique_position = unique_position.ToProto();
......@@ -1255,4 +1257,149 @@ TEST(BookmarkModelMergerTest, ShouldIgnoreRemoteGUIDIfInvalidSpecifics) {
EXPECT_THAT(tracker->GetEntityForBookmarkNode(bookmark), NotNull());
}
// Tests that the GUID-based matching algorithm does not match two remote nodes
// with the same local node, even if the remote data contains duplicate GUIDs.
TEST(BookmarkModelMergerTest, ShouldIgnoreRemoteDuplicateGUID) {
base::test::ScopedFeatureList override_features;
override_features.InitAndEnableFeature(switches::kMergeBookmarksUsingGUIDs);
const std::string kId1 = "Id1";
const std::string kId2 = "Id2";
const std::string kTitle1 = "Title1";
const std::string kTitle2 = "Title2";
const std::string kLocalTitle = "LocalTitle";
const std::string kUrl = "http://www.foo.com/";
const std::string kGuid = base::GenerateGUID();
std::unique_ptr<bookmarks::BookmarkModel> bookmark_model =
bookmarks::TestBookmarkClient::CreateModel();
// -------- The local model --------
// | - bookmark(kGuid/kUrl/kLocalTitle)
const bookmarks::BookmarkNode* bookmark_bar_node =
bookmark_model->bookmark_bar_node();
const bookmarks::BookmarkNode* bookmark = bookmark_model->AddURL(
/*parent=*/bookmark_bar_node, /*index=*/0, base::UTF8ToUTF16(kLocalTitle),
GURL(kUrl), nullptr, base::Time::Now(), kGuid);
ASSERT_TRUE(bookmark);
ASSERT_THAT(bookmark_bar_node->children(), ElementRawPointersAre(bookmark));
// -------- The remote model --------
// bookmark_bar
// | - bookmark (kGuid/kUrl/kTitle1)
// | - bookmark (kGuid/kUrl/kTitle2)
const std::string suffix = syncer::UniquePosition::RandomSuffix();
syncer::UniquePosition position1 =
syncer::UniquePosition::InitialPosition(suffix);
syncer::UniquePosition position2 =
syncer::UniquePosition::After(position1, suffix);
syncer::UpdateResponseDataList updates;
updates.push_back(CreateBookmarkBarNodeUpdateData());
updates.push_back(CreateUpdateResponseData(
/*server_id=*/kId1, /*parent_id=*/kBookmarkBarId, kTitle1,
/*url=*/kUrl,
/*is_folder=*/false, /*unique_position=*/position1,
/*guid=*/kGuid));
updates.push_back(CreateUpdateResponseData(
/*server_id=*/kId2, /*parent_id=*/kBookmarkBarId, kTitle2,
/*url=*/kUrl,
/*is_folder=*/false, /*unique_position=*/position2,
/*guid=*/kGuid));
// |originator_client_item_id| cannot itself be duplicated because
// ModelTypeWorker guarantees otherwise.
updates.back()->entity->originator_client_item_id = base::GenerateGUID();
std::unique_ptr<SyncedBookmarkTracker> tracker =
Merge(std::move(updates), bookmark_model.get());
// -------- The merged model --------
// | - bookmark (kGuid/kUrl/kTitle1)
// | - bookmark (<some-other-guid>/kUrl/kTitle2)
// Both remote nodes should be present in the merged tree.
ASSERT_EQ(bookmark_bar_node->children().size(), 2u);
const bookmarks::BookmarkNode* bookmark1 =
bookmark_model->bookmark_bar_node()->children()[0].get();
const bookmarks::BookmarkNode* bookmark2 =
bookmark_model->bookmark_bar_node()->children()[1].get();
EXPECT_THAT(bookmark1->guid(), Eq(kGuid));
EXPECT_THAT(bookmark2->guid(), Ne(kGuid));
EXPECT_THAT(tracker->GetEntityForBookmarkNode(bookmark1), NotNull());
EXPECT_THAT(tracker->GetEntityForBookmarkNode(bookmark2), NotNull());
}
// Same as previous test but in addition all nodes match semantically.
TEST(BookmarkModelMergerTest, ShouldIgnoreRemoteDuplicateGUIDAndSemanticMatch) {
base::test::ScopedFeatureList override_features;
override_features.InitAndEnableFeature(switches::kMergeBookmarksUsingGUIDs);
const std::string kId1 = "Id1";
const std::string kId2 = "Id2";
const std::string kTitle = "Title";
const std::string kUrl = "http://www.foo.com/";
const std::string kGuid = base::GenerateGUID();
std::unique_ptr<bookmarks::BookmarkModel> bookmark_model =
bookmarks::TestBookmarkClient::CreateModel();
// -------- The local model --------
// | - bookmark(kGuid/kUrl/kTitle)
const bookmarks::BookmarkNode* bookmark_bar_node =
bookmark_model->bookmark_bar_node();
const bookmarks::BookmarkNode* bookmark = bookmark_model->AddURL(
/*parent=*/bookmark_bar_node, /*index=*/0, base::UTF8ToUTF16(kTitle),
GURL(kUrl), nullptr, base::Time::Now(), kGuid);
ASSERT_TRUE(bookmark);
ASSERT_THAT(bookmark_bar_node->children(), ElementRawPointersAre(bookmark));
// -------- The remote model --------
// bookmark_bar
// | - bookmark (kGuid/kUrl/kTitle)
// | - bookmark (kGuid/kUrl/kTitle)
const std::string suffix = syncer::UniquePosition::RandomSuffix();
syncer::UniquePosition position1 =
syncer::UniquePosition::InitialPosition(suffix);
syncer::UniquePosition position2 =
syncer::UniquePosition::After(position1, suffix);
syncer::UpdateResponseDataList updates;
updates.push_back(CreateBookmarkBarNodeUpdateData());
updates.push_back(CreateUpdateResponseData(
/*server_id=*/kId1, /*parent_id=*/kBookmarkBarId, kTitle,
/*url=*/kUrl,
/*is_folder=*/false, /*unique_position=*/position1,
/*guid=*/kGuid));
updates.push_back(CreateUpdateResponseData(
/*server_id=*/kId2, /*parent_id=*/kBookmarkBarId, kTitle,
/*url=*/kUrl,
/*is_folder=*/false, /*unique_position=*/position2,
/*guid=*/kGuid));
// |originator_client_item_id| cannot itself be duplicated because
// ModelTypeWorker guarantees otherwise.
updates.back()->entity->originator_client_item_id = base::GenerateGUID();
std::unique_ptr<SyncedBookmarkTracker> tracker =
Merge(std::move(updates), bookmark_model.get());
// -------- The merged model --------
// | - bookmark (kGuid/kUrl/kTitle)
// | - bookmark (<some-other-guid>/kUrl/kTitle)
// Both remote nodes should be present in the merged tree.
ASSERT_EQ(bookmark_bar_node->children().size(), 2u);
const bookmarks::BookmarkNode* bookmark1 =
bookmark_model->bookmark_bar_node()->children()[0].get();
const bookmarks::BookmarkNode* bookmark2 =
bookmark_model->bookmark_bar_node()->children()[1].get();
EXPECT_THAT(bookmark1->guid(), Eq(kGuid));
EXPECT_THAT(bookmark2->guid(), Ne(kGuid));
EXPECT_THAT(tracker->GetEntityForBookmarkNode(bookmark1), NotNull());
EXPECT_THAT(tracker->GetEntityForBookmarkNode(bookmark2), NotNull());
}
} // namespace sync_bookmarks
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment