Commit e8a0c7d9 authored by Jia's avatar Jia Committed by Commit Bot

[cros search service] Enforce correct ordering of async operations

If a client makes rapid index updates that are intrinsically async,
there may be a race condition so that updates are not done in the
right order. This cl checks ongoing updates and queue later updates
when necessary.

Bug: 1133931
Change-Id: I0edbf57f2039b8a4dc51b75292788f22464eef9e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2440320
Commit-Queue: Jia Meng <jiameng@chromium.org>
Reviewed-by: default avatarThanh Nguyen <thanhdng@chromium.org>
Reviewed-by: default avatarAndrew Moylan <amoylan@chromium.org>
Cr-Commit-Position: refs/heads/master@{#812923}
parent f3887d5b
...@@ -240,20 +240,18 @@ void InvertedIndex::AddDocuments(const DocumentToUpdate& documents) { ...@@ -240,20 +240,18 @@ void InvertedIndex::AddDocuments(const DocumentToUpdate& documents) {
uint32_t InvertedIndex::RemoveDocuments( uint32_t InvertedIndex::RemoveDocuments(
const std::vector<std::string>& document_ids) { const std::vector<std::string>& document_ids) {
uint32_t num_erase = 0; if (document_ids.empty())
return 0;
for (const auto& id : document_ids) { for (const auto& id : document_ids) {
if (doc_length_.find(id) == doc_length_.end()) // |doc_length_| could be in the process of being updated when this function
continue; // is called, hence we cannot use it to verify the existence of the |id|.
num_erase++;
documents_to_update_.push_back({id, std::vector<Token>()}); documents_to_update_.push_back({id, std::vector<Token>()});
} }
if (num_erase == 0)
return num_erase;
is_index_built_ = false; is_index_built_ = false;
InvertedIndexController(); InvertedIndexController();
return num_erase; return document_ids.size();
} }
std::vector<TfidfResult> InvertedIndex::GetTfidf( std::vector<TfidfResult> InvertedIndex::GetTfidf(
...@@ -320,8 +318,8 @@ void InvertedIndex::InvertedIndexController() { ...@@ -320,8 +318,8 @@ void InvertedIndex::InvertedIndexController() {
update_in_progress_ = true; update_in_progress_ = true;
base::ThreadPool::PostTaskAndReplyWithResult( base::ThreadPool::PostTaskAndReplyWithResult(
FROM_HERE, {base::MayBlock(), base::TaskPriority::BEST_EFFORT}, FROM_HERE, {base::MayBlock(), base::TaskPriority::BEST_EFFORT},
// Can't move doc_length_ since it is used to check if a document exists // TODO(jiameng): |doc_length_| can be moved since it's not used for
// or not. // document existence checking any more.
base::BindOnce(&UpdateDocuments, std::move(documents_to_update_), base::BindOnce(&UpdateDocuments, std::move(documents_to_update_),
doc_length_, std::move(dictionary_), doc_length_, std::move(dictionary_),
std::move(terms_to_be_updated_)), std::move(terms_to_be_updated_)),
......
...@@ -81,9 +81,11 @@ class InvertedIndex { ...@@ -81,9 +81,11 @@ class InvertedIndex {
void AddDocuments(const DocumentToUpdate& documents); void AddDocuments(const DocumentToUpdate& documents);
// Removes documents from the inverted index. Do nothing if the document id is // Removes documents from the inverted index. Do nothing if the document id is
// not in the index. Returns number of documents deleted. // not in the index.
// This function doesn't modify any cache. It only removes // This function doesn't modify any cache. It only removes
// documents and tokens from the index. // documents and tokens from the index.
// As other operations may be running on a separate thread, this function
// returns size of |document_ids| and not actually deleted documents.
uint32_t RemoveDocuments(const std::vector<std::string>& document_ids); uint32_t RemoveDocuments(const std::vector<std::string>& document_ids);
// Gets TF-IDF scores for a term. This function returns the TF-IDF score from // Gets TF-IDF scores for a term. This function returns the TF-IDF score from
......
...@@ -80,18 +80,37 @@ uint64_t InvertedIndexSearch::GetSize() { ...@@ -80,18 +80,37 @@ uint64_t InvertedIndexSearch::GetSize() {
void InvertedIndexSearch::AddOrUpdate( void InvertedIndexSearch::AddOrUpdate(
const std::vector<local_search_service::Data>& data) { const std::vector<local_search_service::Data>& data) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(!data.empty());
++num_queued_index_updates_;
base::PostTaskAndReplyWithResult( base::PostTaskAndReplyWithResult(
blocking_task_runner_.get(), FROM_HERE, blocking_task_runner_.get(), FROM_HERE,
base::BindOnce(&ExtractDocumentsContent, data), base::BindOnce(&ExtractDocumentsContent, data),
base::BindOnce(&InvertedIndexSearch::OnExtractDocumentsContentDone, base::BindOnce(&InvertedIndexSearch::FinalizeAddOrUpdate,
weak_ptr_factory_.GetWeakPtr())); weak_ptr_factory_.GetWeakPtr()));
} }
uint32_t InvertedIndexSearch::Delete(const std::vector<std::string>& ids) { uint32_t InvertedIndexSearch::Delete(const std::vector<std::string>& ids) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
uint32_t num_deleted = inverted_index_->RemoveDocuments(ids); DCHECK(!ids.empty());
inverted_index_->BuildInvertedIndex();
return num_deleted; if (num_queued_index_updates_ == 0) {
// We can remove the documents immediately as there is no earlier
// index-modifying operation.
inverted_index_->RemoveDocuments(ids);
MaybeBuildInvertedIndex();
return ids.size();
}
// If there is an earlier index-modifying operation, Delete should wait until
// the other operations are complete. Delete is queued and we create a no-op
// to run on the same |blocking_task_runner_|.
++num_queued_index_updates_;
blocking_task_runner_->PostTaskAndReply(
FROM_HERE, base::DoNothing(),
base::BindOnce(&InvertedIndexSearch::FinalizeDelete,
weak_ptr_factory_.GetWeakPtr(), ids));
return ids.size();
} }
void InvertedIndexSearch::ClearIndex() { void InvertedIndexSearch::ClearIndex() {
...@@ -156,11 +175,26 @@ InvertedIndexSearch::FindTermForTesting(const base::string16& term) const { ...@@ -156,11 +175,26 @@ InvertedIndexSearch::FindTermForTesting(const base::string16& term) const {
return doc_with_freq; return doc_with_freq;
} }
void InvertedIndexSearch::OnExtractDocumentsContentDone( void InvertedIndexSearch::FinalizeAddOrUpdate(
const ExtractedContent& documents) { const ExtractedContent& documents) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
--num_queued_index_updates_;
inverted_index_->AddDocuments(documents); inverted_index_->AddDocuments(documents);
inverted_index_->BuildInvertedIndex(); MaybeBuildInvertedIndex();
}
void InvertedIndexSearch::FinalizeDelete(const std::vector<std::string>& ids) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
--num_queued_index_updates_;
inverted_index_->RemoveDocuments(ids);
MaybeBuildInvertedIndex();
}
void InvertedIndexSearch::MaybeBuildInvertedIndex() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (num_queued_index_updates_ == 0) {
inverted_index_->BuildInvertedIndex();
}
} }
} // namespace local_search_service } // namespace local_search_service
......
...@@ -39,6 +39,9 @@ class InvertedIndexSearch : public Index { ...@@ -39,6 +39,9 @@ class InvertedIndexSearch : public Index {
void AddOrUpdate(const std::vector<Data>& data) override; void AddOrUpdate(const std::vector<Data>& data) override;
// TODO(jiameng): we always build the index after documents are deleted. May // TODO(jiameng): we always build the index after documents are deleted. May
// revise this strategy if there is a different use case. // revise this strategy if there is a different use case.
// TODO(jiameng): for inverted index, the Delete function returns |ids| size,
// and not actual number of documents deleted. This would change in the next
// cl when these operations become async.
uint32_t Delete(const std::vector<std::string>& ids) override; uint32_t Delete(const std::vector<std::string>& ids) override;
void ClearIndex() override; void ClearIndex() override;
// Returns matching results for a given query by approximately matching the // Returns matching results for a given query by approximately matching the
...@@ -55,9 +58,26 @@ class InvertedIndexSearch : public Index { ...@@ -55,9 +58,26 @@ class InvertedIndexSearch : public Index {
const base::string16& term) const; const base::string16& term) const;
private: private:
void OnExtractDocumentsContentDone( void FinalizeAddOrUpdate(
const std::vector<std::pair<std::string, std::vector<Token>>>& documents); const std::vector<std::pair<std::string, std::vector<Token>>>& documents);
// FinalizeDelete is called if Delete cannot be immediately done because
// there's another index updating operation before it, i.e.
// |num_queued_index_updates_| is not zero.
void FinalizeDelete(const std::vector<std::string>& ids);
// In order to reduce unnecessary inverted index building, we only build the
// index if there's no upcoming modification to the index's document list.
void MaybeBuildInvertedIndex();
// AddOrUpdate requires content extraction to be done before index is updated
// (tokens added, index built). As content extraction runs on another thread
// (|blocking_task_runner_|), we need to keep track of how many index-update
// operations are to be done (and queued). Delete may be queued as well if
// there is an AddOrUpdate before it. We need to ensure documents are added or
// modified or deleted in the same order as they're given by the index client.
int num_queued_index_updates_ = 0;
std::unique_ptr<InvertedIndex> inverted_index_; std::unique_ptr<InvertedIndex> inverted_index_;
scoped_refptr<base::SequencedTaskRunner> blocking_task_runner_; scoped_refptr<base::SequencedTaskRunner> blocking_task_runner_;
SEQUENCE_CHECKER(sequence_checker_); SEQUENCE_CHECKER(sequence_checker_);
......
...@@ -158,7 +158,7 @@ TEST_F(InvertedIndexSearchTest, Delete) { ...@@ -158,7 +158,7 @@ TEST_F(InvertedIndexSearchTest, Delete) {
Wait(); Wait();
EXPECT_EQ(search_->GetSize(), 2u); EXPECT_EQ(search_->GetSize(), 2u);
EXPECT_EQ(search_->Delete({"id1", "id3"}), 1u); EXPECT_EQ(search_->Delete({"id1", "id3"}), 2u);
Wait(); Wait();
{ {
...@@ -298,5 +298,32 @@ TEST_F(InvertedIndexSearchTest, Find) { ...@@ -298,5 +298,32 @@ TEST_F(InvertedIndexSearchTest, Find) {
} }
} }
TEST_F(InvertedIndexSearchTest, SequenceOfDeletes) {
const std::map<std::string, std::vector<ContentWithId>> data_to_register = {
{"id1",
{{"cid_1", "This is a help wi-fi article"},
{"cid_2", "Another help help wi-fi"}}},
{"id2", {{"cid_3", "help article on wi-fi"}}}};
const std::vector<Data> data = CreateTestData(data_to_register);
search_->AddOrUpdate(data);
const std::map<std::string, std::vector<ContentWithId>> data_to_update = {
{"id1",
{{"cid_1", "This is a help bluetooth article"},
{"cid_2", "Google Playstore Google Music"}}},
{"id3", {{"cid_3", "Google Map"}}}};
const std::vector<Data> updated_data = CreateTestData(data_to_update);
search_->AddOrUpdate(updated_data);
EXPECT_EQ(search_->Delete({"id1"}), 1u);
EXPECT_EQ(search_->Delete({"id2", "id3"}), 2u);
// Force all operations to complete.
Wait();
EXPECT_EQ(search_->GetSize(), 0u);
}
} // namespace local_search_service } // namespace local_search_service
} // namespace chromeos } // namespace chromeos
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment