Commit 9efc019f authored by Jia's avatar Jia Committed by Commit Bot

[cros search service] Make inverted-index-search's content extraction non-blocking

Bug: 1101877
Change-Id: I0fdf95df121f5f0b3783d7005f2c7988f49837e8
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2362365Reviewed-by: default avatarThanh Nguyen <thanhdng@chromium.org>
Commit-Queue: Jia Meng <jiameng@chromium.org>
Cr-Commit-Position: refs/heads/master@{#799927}
parent 6b595786
...@@ -7,14 +7,18 @@ ...@@ -7,14 +7,18 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "base/bind.h"
#include "base/i18n/rtl.h" #include "base/i18n/rtl.h"
#include "base/optional.h" #include "base/optional.h"
#include "base/strings/string_split.h" #include "base/strings/string_split.h"
#include "base/strings/string_util.h" #include "base/strings/string_util.h"
#include "base/task/task_traits.h"
#include "base/task/thread_pool.h"
#include "base/time/time.h" #include "base/time/time.h"
#include "chromeos/components/local_search_service/content_extraction_utils.h" #include "chromeos/components/local_search_service/content_extraction_utils.h"
#include "chromeos/components/local_search_service/inverted_index.h" #include "chromeos/components/local_search_service/inverted_index.h"
#include "chromeos/components/string_matching/tokenized_string.h" #include "chromeos/components/string_matching/tokenized_string.h"
#include "content/public/browser/browser_thread.h"
namespace chromeos { namespace chromeos {
namespace local_search_service { namespace local_search_service {
...@@ -22,6 +26,8 @@ namespace local_search_service { ...@@ -22,6 +26,8 @@ namespace local_search_service {
namespace { namespace {
using chromeos::string_matching::TokenizedString; using chromeos::string_matching::TokenizedString;
using ExtractedContent =
std::vector<std::pair<std::string, std::vector<Token>>>;
std::vector<Token> ExtractDocumentTokens(const Data& data) { std::vector<Token> ExtractDocumentTokens(const Data& data) {
// Use input locale unless it's empty. In this case we will use system // Use input locale unless it's empty. In this case we will use system
...@@ -40,32 +46,49 @@ std::vector<Token> ExtractDocumentTokens(const Data& data) { ...@@ -40,32 +46,49 @@ std::vector<Token> ExtractDocumentTokens(const Data& data) {
return ConsolidateToken(document_tokens); return ConsolidateToken(document_tokens);
} }
ExtractedContent ExtractDocumentsContent(const std::vector<Data>& data) {
DCHECK(!content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
ExtractedContent documents;
for (const Data& d : data) {
const std::vector<Token> document_tokens = ExtractDocumentTokens(d);
DCHECK(!document_tokens.empty());
documents.push_back({d.id, document_tokens});
}
return documents;
}
} // namespace } // namespace
InvertedIndexSearch::InvertedIndexSearch(IndexId index_id, InvertedIndexSearch::InvertedIndexSearch(IndexId index_id,
PrefService* local_state) PrefService* local_state)
: Index(index_id, Backend::kInvertedIndex, local_state), : Index(index_id, Backend::kInvertedIndex, local_state),
inverted_index_(std::make_unique<InvertedIndex>()) {} inverted_index_(std::make_unique<InvertedIndex>()),
blocking_task_runner_(base::ThreadPool::CreateSequencedTaskRunner(
{base::TaskPriority::BEST_EFFORT, base::MayBlock(),
base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN})) {}
InvertedIndexSearch::~InvertedIndexSearch() = default; InvertedIndexSearch::~InvertedIndexSearch() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
}
uint64_t InvertedIndexSearch::GetSize() { uint64_t InvertedIndexSearch::GetSize() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
return inverted_index_->NumberDocuments(); return inverted_index_->NumberDocuments();
} }
void InvertedIndexSearch::AddOrUpdate( void InvertedIndexSearch::AddOrUpdate(
const std::vector<local_search_service::Data>& data) { const std::vector<local_search_service::Data>& data) {
std::vector<std::pair<std::string, std::vector<Token>>> documents; DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
for (const Data& d : data) { base::PostTaskAndReplyWithResult(
const std::vector<Token> document_tokens = ExtractDocumentTokens(d); blocking_task_runner_.get(), FROM_HERE,
DCHECK(!document_tokens.empty()); base::BindOnce(&ExtractDocumentsContent, data),
documents.push_back({d.id, document_tokens}); base::BindOnce(&InvertedIndexSearch::OnExtractDocumentsContentDone,
} weak_ptr_factory_.GetWeakPtr()));
inverted_index_->AddDocuments(documents);
inverted_index_->BuildInvertedIndex();
} }
uint32_t InvertedIndexSearch::Delete(const std::vector<std::string>& ids) { uint32_t InvertedIndexSearch::Delete(const std::vector<std::string>& ids) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
uint32_t num_deleted = inverted_index_->RemoveDocuments(ids); uint32_t num_deleted = inverted_index_->RemoveDocuments(ids);
inverted_index_->BuildInvertedIndex(); inverted_index_->BuildInvertedIndex();
return num_deleted; return num_deleted;
...@@ -74,6 +97,7 @@ uint32_t InvertedIndexSearch::Delete(const std::vector<std::string>& ids) { ...@@ -74,6 +97,7 @@ uint32_t InvertedIndexSearch::Delete(const std::vector<std::string>& ids) {
ResponseStatus InvertedIndexSearch::Find(const base::string16& query, ResponseStatus InvertedIndexSearch::Find(const base::string16& query,
uint32_t max_results, uint32_t max_results,
std::vector<Result>* results) { std::vector<Result>* results) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
const base::TimeTicks start = base::TimeTicks::Now(); const base::TimeTicks start = base::TimeTicks::Now();
DCHECK(results); DCHECK(results);
results->clear(); results->clear();
...@@ -118,6 +142,7 @@ ResponseStatus InvertedIndexSearch::Find(const base::string16& query, ...@@ -118,6 +142,7 @@ ResponseStatus InvertedIndexSearch::Find(const base::string16& query,
std::vector<std::pair<std::string, uint32_t>> std::vector<std::pair<std::string, uint32_t>>
InvertedIndexSearch::FindTermForTesting(const base::string16& term) const { InvertedIndexSearch::FindTermForTesting(const base::string16& term) const {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
const PostingList posting_list = inverted_index_->FindTerm(term); const PostingList posting_list = inverted_index_->FindTerm(term);
std::vector<std::pair<std::string, uint32_t>> doc_with_freq; std::vector<std::pair<std::string, uint32_t>> doc_with_freq;
for (const auto& kv : posting_list) { for (const auto& kv : posting_list) {
...@@ -127,5 +152,12 @@ InvertedIndexSearch::FindTermForTesting(const base::string16& term) const { ...@@ -127,5 +152,12 @@ InvertedIndexSearch::FindTermForTesting(const base::string16& term) const {
return doc_with_freq; return doc_with_freq;
} }
void InvertedIndexSearch::OnExtractDocumentsContentDone(
const ExtractedContent& documents) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
inverted_index_->AddDocuments(documents);
inverted_index_->BuildInvertedIndex();
}
} // namespace local_search_service } // namespace local_search_service
} // namespace chromeos } // namespace chromeos
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#include <vector> #include <vector>
#include "base/macros.h" #include "base/macros.h"
#include "base/sequence_checker.h"
#include "base/sequenced_task_runner.h"
#include "base/strings/string16.h" #include "base/strings/string16.h"
#include "chromeos/components/local_search_service/index.h" #include "chromeos/components/local_search_service/index.h"
#include "chromeos/components/local_search_service/shared_structs.h" #include "chromeos/components/local_search_service/shared_structs.h"
...@@ -52,7 +54,14 @@ class InvertedIndexSearch : public Index { ...@@ -52,7 +54,14 @@ class InvertedIndexSearch : public Index {
const base::string16& term) const; const base::string16& term) const;
private: private:
void OnExtractDocumentsContentDone(
const std::vector<std::pair<std::string, std::vector<Token>>>& documents);
std::unique_ptr<InvertedIndex> inverted_index_; std::unique_ptr<InvertedIndex> inverted_index_;
scoped_refptr<base::SequencedTaskRunner> blocking_task_runner_;
SEQUENCE_CHECKER(sequence_checker_);
base::WeakPtrFactory<InvertedIndexSearch> weak_ptr_factory_{this};
}; };
} // namespace local_search_service } // namespace local_search_service
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "base/test/task_environment.h"
#include "chromeos/components/local_search_service/index.h" #include "chromeos/components/local_search_service/index.h"
#include "chromeos/components/local_search_service/local_search_service.h" #include "chromeos/components/local_search_service/local_search_service.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
...@@ -19,6 +20,9 @@ namespace local_search_service { ...@@ -19,6 +20,9 @@ namespace local_search_service {
class LocalSearchServiceTest : public testing::Test { class LocalSearchServiceTest : public testing::Test {
protected: protected:
LocalSearchService service_; LocalSearchService service_;
base::test::TaskEnvironment task_environment_{
base::test::TaskEnvironment::MainThreadType::DEFAULT,
base::test::TaskEnvironment::ThreadPoolExecutionMode::QUEUED};
}; };
TEST_F(LocalSearchServiceTest, GetLinearMapSearch) { TEST_F(LocalSearchServiceTest, GetLinearMapSearch) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment