Commit c7370b66 authored by manukh's avatar manukh Committed by Commit Bot

[omnibox] [bookmark-paths] Clean bookmark matches lookup by term.

Currently, TitledUrlIndex::GetResultsMatchingTerm has 2
responsibilities:
- Searches the index for nodes matching a term param.
- Updates the aggregate results for all terms.

This CL splits these responsibilities; leaving the former in
GetResultsMatchingTerm while moving the latter to
GetResultsMatchingTerms.

Additionally, this CL creates a helper method
TitledUrlIndex::ExtractIndexTerms extracting some of the logic
duplicated in Add and Remove.

This is a refactor with no behavior change.

Bug: 1129524
Change-Id: Icc8325fd359c2fb856d22980e0e3dd80f0e84097
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2476723
Commit-Queue: manuk hovanesian <manukh@chromium.org>
Reviewed-by: default avatarMikel Astiz <mastiz@chromium.org>
Cr-Commit-Position: refs/heads/master@{#822343}
parent e977adb7
...@@ -60,25 +60,13 @@ void TitledUrlIndex::SetNodeSorter( ...@@ -60,25 +60,13 @@ void TitledUrlIndex::SetNodeSorter(
} }
void TitledUrlIndex::Add(const TitledUrlNode* node) { void TitledUrlIndex::Add(const TitledUrlNode* node) {
std::vector<base::string16> terms = for (const base::string16& term : ExtractIndexTerms(node))
ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle())); RegisterNode(term, node);
for (size_t i = 0; i < terms.size(); ++i)
RegisterNode(terms[i], node);
terms = ExtractQueryWords(
CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr));
for (size_t i = 0; i < terms.size(); ++i)
RegisterNode(terms[i], node);
} }
void TitledUrlIndex::Remove(const TitledUrlNode* node) { void TitledUrlIndex::Remove(const TitledUrlNode* node) {
std::vector<base::string16> terms = for (const base::string16& term : ExtractIndexTerms(node))
ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle())); UnregisterNode(term, node);
for (size_t i = 0; i < terms.size(); ++i)
UnregisterNode(terms[i], node);
terms = ExtractQueryWords(
CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr));
for (size_t i = 0; i < terms.size(); ++i)
UnregisterNode(terms[i], node);
} }
std::vector<TitledUrlMatch> TitledUrlIndex::GetResultsMatching( std::vector<TitledUrlMatch> TitledUrlIndex::GetResultsMatching(
...@@ -87,16 +75,11 @@ std::vector<TitledUrlMatch> TitledUrlIndex::GetResultsMatching( ...@@ -87,16 +75,11 @@ std::vector<TitledUrlMatch> TitledUrlIndex::GetResultsMatching(
query_parser::MatchingAlgorithm matching_algorithm) { query_parser::MatchingAlgorithm matching_algorithm) {
const base::string16 query = Normalize(input_query); const base::string16 query = Normalize(input_query);
std::vector<base::string16> terms = ExtractQueryWords(query); std::vector<base::string16> terms = ExtractQueryWords(query);
if (terms.empty())
return {};
TitledUrlNodeSet matches; TitledUrlNodeSet matches =
for (size_t i = 0; i < terms.size(); ++i) { RetrieveNodesMatchingAllTerms(terms, matching_algorithm);
if (!GetResultsMatchingTerm(terms[i], i == 0, matching_algorithm, if (matches.empty())
&matches)) {
return {}; return {};
}
}
TitledUrlNodes sorted_nodes; TitledUrlNodes sorted_nodes;
SortMatches(matches, &sorted_nodes); SortMatches(matches, &sorted_nodes);
...@@ -183,49 +166,52 @@ base::Optional<TitledUrlMatch> TitledUrlIndex::MatchTitledUrlNodeWithQuery( ...@@ -183,49 +166,52 @@ base::Optional<TitledUrlMatch> TitledUrlIndex::MatchTitledUrlNodeWithQuery(
return match; return match;
} }
bool TitledUrlIndex::GetResultsMatchingTerm( TitledUrlIndex::TitledUrlNodeSet TitledUrlIndex::RetrieveNodesMatchingAllTerms(
const std::vector<base::string16>& terms,
query_parser::MatchingAlgorithm matching_algorithm) const {
if (terms.empty())
return {};
TitledUrlNodeSet matches =
RetrieveNodesMatchingTerm(terms[0], matching_algorithm);
for (size_t i = 1; i < terms.size() && !matches.empty(); ++i) {
TitledUrlNodeSet term_matches =
RetrieveNodesMatchingTerm(terms[i], matching_algorithm);
// Compute intersection between the two sets.
base::EraseIf(matches, base::IsNotIn<TitledUrlNodeSet>(term_matches));
}
return matches;
}
TitledUrlIndex::TitledUrlNodeSet TitledUrlIndex::RetrieveNodesMatchingTerm(
const base::string16& term, const base::string16& term,
bool first_term, query_parser::MatchingAlgorithm matching_algorithm) const {
query_parser::MatchingAlgorithm matching_algorithm,
TitledUrlNodeSet* matches) {
Index::const_iterator i = index_.lower_bound(term); Index::const_iterator i = index_.lower_bound(term);
if (i == index_.end()) if (i == index_.end())
return false; return {};
if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch( if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(
term, matching_algorithm)) { term, matching_algorithm)) {
// Term is too short for prefix match, compare using exact match. // Term is too short for prefix match, compare using exact match.
if (i->first != term) if (i->first != term)
return false; // No title/URL pairs with this term. return {}; // No title/URL pairs with this term.
return i->second;
if (first_term) {
(*matches) = i->second;
return true;
} }
base::EraseIf(*matches, base::IsNotIn<TitledUrlNodeSet>(i->second));
} else {
// Loop through index adding all entries that start with term to // Loop through index adding all entries that start with term to
// |prefix_matches|. // |prefix_matches|.
TitledUrlNodeSet tmp_prefix_matches; TitledUrlNodes prefix_matches;
// If this is the first term, then store the result directly in |matches| while (i != index_.end() && i->first.size() >= term.size() &&
// to avoid calling stl intersection (which requires a copy).
TitledUrlNodeSet* prefix_matches =
first_term ? matches : &tmp_prefix_matches;
while (i != index_.end() &&
i->first.size() >= term.size() &&
term.compare(0, term.size(), i->first, 0, term.size()) == 0) { term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
for (auto n = i->second.begin(); n != i->second.end(); ++n) { prefix_matches.insert(prefix_matches.end(), i->second.begin(),
prefix_matches->insert(prefix_matches->end(), *n); i->second.end());
}
++i; ++i;
} }
if (!first_term) { return prefix_matches;
base::EraseIf(*matches, base::IsNotIn<TitledUrlNodeSet>(*prefix_matches));
}
}
return !matches->empty();
} }
// static
std::vector<base::string16> TitledUrlIndex::ExtractQueryWords( std::vector<base::string16> TitledUrlIndex::ExtractQueryWords(
const base::string16& query) { const base::string16& query) {
std::vector<base::string16> terms; std::vector<base::string16> terms;
...@@ -238,6 +224,24 @@ std::vector<base::string16> TitledUrlIndex::ExtractQueryWords( ...@@ -238,6 +224,24 @@ std::vector<base::string16> TitledUrlIndex::ExtractQueryWords(
return terms; return terms;
} }
// static
std::vector<base::string16> TitledUrlIndex::ExtractIndexTerms(
const TitledUrlNode* node) {
std::vector<base::string16> terms;
for (const base::string16& term :
ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle()))) {
terms.push_back(term);
}
for (const base::string16& term : ExtractQueryWords(CleanUpUrlForMatching(
node->GetTitledUrlNodeUrl(), /*adjustments=*/nullptr))) {
terms.push_back(term);
}
return terms;
}
void TitledUrlIndex::RegisterNode(const base::string16& term, void TitledUrlIndex::RegisterNode(const base::string16& term,
const TitledUrlNode* node) { const TitledUrlNode* node) {
index_[term].insert(node); index_[term].insert(node);
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <vector> #include <vector>
#include "base/containers/flat_set.h" #include "base/containers/flat_set.h"
#include "base/gtest_prod_util.h"
#include "base/macros.h" #include "base/macros.h"
#include "base/optional.h" #include "base/optional.h"
#include "base/strings/string16.h" #include "base/strings/string16.h"
...@@ -31,6 +32,8 @@ struct TitledUrlMatch; ...@@ -31,6 +32,8 @@ struct TitledUrlMatch;
// TitledUrlNodes that contain that string in their title or URL. // TitledUrlNodes that contain that string in their title or URL.
class TitledUrlIndex { class TitledUrlIndex {
public: public:
using TitledUrlNodeSet = base::flat_set<const TitledUrlNode*>;
// Constructs a TitledUrlIndex. |sorter| is used to construct a sorted list // Constructs a TitledUrlIndex. |sorter| is used to construct a sorted list
// of matches when matches are returned from the index. If null, matches are // of matches when matches are returned from the index. If null, matches are
// returned unsorted. // returned unsorted.
...@@ -54,9 +57,15 @@ class TitledUrlIndex { ...@@ -54,9 +57,15 @@ class TitledUrlIndex {
size_t max_count, size_t max_count,
query_parser::MatchingAlgorithm matching_algorithm); query_parser::MatchingAlgorithm matching_algorithm);
// For testing only.
TitledUrlNodeSet RetrieveNodesMatchingAllTermsForTesting(
const std::vector<base::string16>& terms,
query_parser::MatchingAlgorithm matching_algorithm) const {
return RetrieveNodesMatchingAllTerms(terms, matching_algorithm);
}
private: private:
using TitledUrlNodes = std::vector<const TitledUrlNode*>; using TitledUrlNodes = std::vector<const TitledUrlNode*>;
using TitledUrlNodeSet = base::flat_set<const TitledUrlNode*>;
using Index = std::map<base::string16, TitledUrlNodeSet>; using Index = std::map<base::string16, TitledUrlNodeSet>;
// Constructs |sorted_nodes| by copying the matches in |matches| and sorting // Constructs |sorted_nodes| by copying the matches in |matches| and sorting
...@@ -71,17 +80,24 @@ class TitledUrlIndex { ...@@ -71,17 +80,24 @@ class TitledUrlIndex {
query_parser::QueryParser* parser, query_parser::QueryParser* parser,
const query_parser::QueryNodeVector& query_nodes); const query_parser::QueryNodeVector& query_nodes);
// Populates |matches| for the specified term. If |first_term| is true, this // Return matches for the specified |terms|. This is an intersection of each
// is the first term in the query. Returns true if there is at least one node // term's matches.
// matching the term. TitledUrlNodeSet RetrieveNodesMatchingAllTerms(
bool GetResultsMatchingTerm( const std::vector<base::string16>& terms,
query_parser::MatchingAlgorithm matching_algorithm) const;
// Return matches for the specified |term|.
TitledUrlNodeSet RetrieveNodesMatchingTerm(
const base::string16& term, const base::string16& term,
bool first_term, query_parser::MatchingAlgorithm matching_algorithm) const;
query_parser::MatchingAlgorithm matching_algorithm,
TitledUrlNodeSet* matches);
// Returns the set of query words from |query|. // Returns the set of query words from |query|.
std::vector<base::string16> ExtractQueryWords(const base::string16& query); static std::vector<base::string16> ExtractQueryWords(
const base::string16& query);
// Return the index terms for |node|.
static std::vector<base::string16> ExtractIndexTerms(
const TitledUrlNode* node);
// Adds |node| to |index_|. // Adds |node| to |index_|.
void RegisterNode(const base::string16& term, const TitledUrlNode* node); void RegisterNode(const base::string16& term, const TitledUrlNode* node);
......
...@@ -53,7 +53,7 @@ class BookmarkClientMock : public TestBookmarkClient { ...@@ -53,7 +53,7 @@ class BookmarkClientMock : public TestBookmarkClient {
DISALLOW_COPY_AND_ASSIGN(BookmarkClientMock); DISALLOW_COPY_AND_ASSIGN(BookmarkClientMock);
}; };
// Minimalistic implementatio of TitledUrlNode. // Minimal implementation of TitledUrlNode.
class TestTitledUrlNode : public TitledUrlNode { class TestTitledUrlNode : public TitledUrlNode {
public: public:
TestTitledUrlNode(const base::string16& title, const GURL& url) TestTitledUrlNode(const base::string16& title, const GURL& url)
...@@ -544,5 +544,36 @@ TEST_F(TitledUrlIndexTest, GetResultsSortedByTypedCount) { ...@@ -544,5 +544,36 @@ TEST_F(TitledUrlIndexTest, GetResultsSortedByTypedCount) {
EXPECT_EQ(data[3].url, matches[1].node->GetTitledUrlNodeUrl()); EXPECT_EQ(data[3].url, matches[1].node->GetTitledUrlNodeUrl());
} }
TEST_F(TitledUrlIndexTest, RetrieveNodesMatchingAllTerms) {
TitledUrlNode* node =
AddNode("termA termB otherTerm xyz ab", GURL("http://foo.com"));
struct TestData {
const std::string query;
const bool should_be_retrieved;
} data[] = {// Should return matches if all input terms match, even if not all
// node terms match.
{"term other", true},
// Should not match midword.
{"term ther", false},
// Short input terms should only return exact matches.
{"xy", false},
{"ab", true}};
for (const TestData& test_data : data) {
SCOPED_TRACE("Query: " + test_data.query);
std::vector<base::string16> terms = base::SplitString(
base::UTF8ToUTF16(test_data.query), base::UTF8ToUTF16(" "),
base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
auto matches = index()->RetrieveNodesMatchingAllTermsForTesting(
terms, query_parser::MatchingAlgorithm::DEFAULT);
if (test_data.should_be_retrieved) {
EXPECT_TRUE(matches.contains(node));
EXPECT_EQ(matches.size(), 1u);
} else
EXPECT_TRUE(matches.empty());
};
}
} // namespace } // namespace
} // namespace bookmarks } // namespace bookmarks
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment