Commit 83ded747 authored by Thanh Nguyen's avatar Thanh Nguyen Committed by Commit Bot

[cros search service] Add ClearIndex function to the index

This CL ClearIndex function to Index and its subclasses. This function
clears all the data stored in the index.

Bug: 1132170
Change-Id: Idbec8ed4c721f09577b40c59b9cf24ebe5060252
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2428273Reviewed-by: default avatarJia Meng <jiameng@chromium.org>
Commit-Queue: Thanh Nguyen <thanhdng@chromium.org>
Cr-Commit-Position: refs/heads/master@{#811136}
parent 5545eaa8
......@@ -47,6 +47,9 @@ class Index {
// IDs should not be empty.
virtual uint32_t Delete(const std::vector<std::string>& ids) = 0;
// Clears all data in the index.
virtual void ClearIndex() = 0;
// Returns matching results for a given query.
// Zero |max_results| means no max.
// Search behaviour depends on the implementation.
......
......@@ -138,6 +138,25 @@ DocumentStateVariables UpdateDocuments(DocumentToUpdate&& documents_to_update,
return std::make_tuple(std::move(new_doc_length), std::move(dictionary),
std::move(terms_to_be_updated));
}
// Given the index variables, clear all the data.
std::pair<DocumentStateVariables, TfidfCache> ClearData(
DocumentToUpdate&& documents_to_update,
const DocLength& doc_length,
Dictionary&& dictionary,
TermSet&& terms_to_be_updated,
TfidfCache&& tfidf_cache) {
DCHECK(!::content::BrowserThread::CurrentlyOn(::content::BrowserThread::UI));
DocLength new_doc_length;
documents_to_update.clear();
dictionary.clear();
terms_to_be_updated.clear();
tfidf_cache.clear();
return std::make_pair(
std::make_tuple(std::move(new_doc_length), std::move(dictionary),
std::move(terms_to_be_updated)),
std::move(tfidf_cache));
}
} // namespace
InvertedIndex::InvertedIndex() = default;
......@@ -252,11 +271,32 @@ void InvertedIndex::BuildInvertedIndex() {
InvertedIndexController();
}
void InvertedIndex::ClearInvertedIndex() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
request_to_clear_index_ = true;
InvertedIndexController();
}
void InvertedIndex::InvertedIndexController() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
// TODO(thanhdng): A clear-index call should ideally cancel all other update
// operations. Need to update the code to reflect this.
if (update_in_progress_)
return;
if (request_to_clear_index_) {
update_in_progress_ = true;
request_to_clear_index_ = false;
base::ThreadPool::PostTaskAndReplyWithResult(
FROM_HERE, {base::MayBlock(), base::TaskPriority::BEST_EFFORT},
base::BindOnce(&ClearData, std::move(documents_to_update_), doc_length_,
std::move(dictionary_), std::move(terms_to_be_updated_),
std::move(tfidf_cache_)),
base::BindOnce(&InvertedIndex::OnDataCleared,
weak_ptr_factory_.GetWeakPtr()));
return;
}
if (documents_to_update_.empty()) {
if (request_to_build_index_) {
update_in_progress_ = true;
......@@ -311,5 +351,18 @@ void InvertedIndex::OnUpdateDocumentsComplete(
InvertedIndexController();
}
void InvertedIndex::OnDataCleared(
std::pair<DocumentStateVariables, TfidfCache>&& inverted_index_data) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
doc_length_ = std::move(std::get<0>(inverted_index_data.first));
dictionary_ = std::move(std::get<1>(inverted_index_data.first));
terms_to_be_updated_ = std::move(std::get<2>(inverted_index_data.first));
tfidf_cache_ = std::move(inverted_index_data.second);
num_docs_from_last_update_ = 0;
update_in_progress_ = false;
InvertedIndexController();
}
} // namespace local_search_service
} // namespace chromeos
......@@ -95,6 +95,9 @@ class InvertedIndex {
// Builds the inverted index.
void BuildInvertedIndex();
// Clears all the data from the inverted index.
void ClearInvertedIndex();
// Checks if the inverted index has been built: returns |true| if the inverted
// index is up to date, returns |false| if there are some modified document
// since the last time the index has been built.
......@@ -119,6 +122,9 @@ class InvertedIndex {
void OnUpdateDocumentsComplete(
DocumentStateVariables&& document_state_variables);
void OnDataCleared(
std::pair<DocumentStateVariables, TfidfCache>&& inverted_index_data);
base::RepeatingCallback<void()> on_index_built_;
// |is_index_built_| is only true if index's TF-IDF is consistent with the
......@@ -145,6 +151,7 @@ class InvertedIndex {
bool request_to_build_index_ = false;
bool update_in_progress_ = false;
bool index_building_in_progress_ = false;
bool request_to_clear_index_ = false;
SEQUENCE_CHECKER(sequence_checker_);
......
......@@ -94,6 +94,10 @@ uint32_t InvertedIndexSearch::Delete(const std::vector<std::string>& ids) {
return num_deleted;
}
void InvertedIndexSearch::ClearIndex() {
inverted_index_->ClearInvertedIndex();
}
ResponseStatus InvertedIndexSearch::Find(const base::string16& query,
uint32_t max_results,
std::vector<Result>* results) {
......
......@@ -40,6 +40,7 @@ class InvertedIndexSearch : public Index {
// TODO(jiameng): we always build the index after documents are deleted. May
// revise this strategy if there is a different use case.
uint32_t Delete(const std::vector<std::string>& ids) override;
void ClearIndex() override;
// Returns matching results for a given query by approximately matching the
// query with terms in the documents. Documents are ranked by TF-IDF scores.
// Scores in results are positive but not guaranteed to be in any particular
......
......@@ -170,6 +170,24 @@ TEST_F(InvertedIndexSearchTest, Delete) {
}
}
TEST_F(InvertedIndexSearchTest, ClearIndex) {
const std::map<std::string, std::vector<ContentWithId>> data_to_register = {
{"id1",
{{"cid_1", "This is a help wi-fi article"},
{"cid_2", "Another help help wi-fi"}}},
{"id2", {{"cid_3", "help article on wi-fi"}}}};
const std::vector<Data> data = CreateTestData(data_to_register);
search_->AddOrUpdate(data);
Wait();
EXPECT_EQ(search_->GetSize(), 2u);
search_->ClearIndex();
Wait();
EXPECT_EQ(search_->GetSize(), 0u);
}
TEST_F(InvertedIndexSearchTest, Find) {
const std::map<std::string, std::vector<WeightedContentWithId>>
data_to_register = {{"id1",
......
......@@ -103,6 +103,8 @@ class InvertedIndexTest : public ::testing::Test {
index_.RemoveDocuments(doc_ids);
}
void ClearInvertedIndex() { index_.ClearInvertedIndex(); }
std::vector<TfidfResult> GetTfidf(const base::string16& term) {
return index_.GetTfidf(term);
}
......@@ -467,6 +469,37 @@ TEST_F(InvertedIndexTest, UpdateIndexTest) {
testing::UnorderedElementsAre(expected_tfidf_D_doc1));
}
TEST_F(InvertedIndexTest, ClearInvertedIndexTest) {
EXPECT_EQ(GetTfidfCache().size(), 0u);
BuildInvertedIndex();
Wait();
EXPECT_EQ(NumBuilt(), 1);
EXPECT_TRUE(BuildIndexCompleted());
EXPECT_TRUE(IsInvertedIndexBuilt());
EXPECT_EQ(GetTfidfCache().size(), 3u);
// Add a document and clear the index simultaneously.
const base::string16 a_utf16(base::UTF8ToUTF16("A"));
const base::string16 d_utf16(base::UTF8ToUTF16("D"));
AddDocuments({{"doc3",
{{a_utf16,
{{kDefaultWeight, {"header", 1, 1}},
{kDefaultWeight / 2, {"body", 2, 1}},
{kDefaultWeight, {"header", 4, 1}}}},
{d_utf16,
{{kDefaultWeight, {"header", 3, 1}},
{kDefaultWeight / 2, {"body", 5, 1}}}}}}});
ClearInvertedIndex();
Wait();
EXPECT_EQ(GetTfidfCache().size(), 0u);
EXPECT_EQ(GetTermToBeUpdated().size(), 0u);
EXPECT_EQ(GetDocLength().size(), 0u);
EXPECT_EQ(GetDictionary().size(), 0u);
EXPECT_EQ(GetDocumentsToUpdate().size(), 0u);
}
TEST_F(InvertedIndexTest, FindMatchingDocumentsApproximatelyTest) {
const double prefix_threshold = 1.0;
const double block_threshold = 1.0;
......
......@@ -109,6 +109,10 @@ uint32_t LinearMapSearch::Delete(const std::vector<std::string>& ids) {
return num_deleted;
}
void LinearMapSearch::ClearIndex() {
data_.clear();
}
ResponseStatus LinearMapSearch::Find(const base::string16& query,
uint32_t max_results,
std::vector<Result>* results) {
......
......@@ -41,6 +41,7 @@ class LinearMapSearch : public Index {
uint64_t GetSize() override;
void AddOrUpdate(const std::vector<Data>& data) override;
uint32_t Delete(const std::vector<std::string>& ids) override;
void ClearIndex() override;
// For each data in the index, we return the 1st search tag that matches
// the query (i.e. above the threshold). Client should put the most
// important search tag first when registering the data in the index.
......
......@@ -185,5 +185,19 @@ TEST_F(LinearMapSearchTest, ResultFound) {
/*max_results=*/-1, ResponseStatus::kSuccess, {});
}
TEST_F(LinearMapSearchTest, ClearIndex) {
const std::map<std::string, std::vector<ContentWithId>> data_to_register = {
{"id1", {{"cid1", "id1"}, {"cid2", "tag1a"}, {"cid3", "tag1b"}}},
{"xyz", {{"cid4", "xyz"}}}};
std::vector<Data> data = CreateTestData(data_to_register);
EXPECT_EQ(data.size(), 2u);
index_->AddOrUpdate(data);
EXPECT_EQ(index_->GetSize(), 2u);
index_->ClearIndex();
EXPECT_EQ(index_->GetSize(), 0u);
}
} // namespace local_search_service
} // namespace chromeos
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment