Commit c7370b66 authored by manukh's avatar manukh Committed by Commit Bot

[omnibox] [bookmark-paths] Clean bookmark matches lookup by term.

Currently, TitledUrlIndex::GetResultsMatchingTerm has 2
responsibilities:
- Searches the index for nodes matching a term param.
- Updates the aggregate results for all terms.

This CL splits these responsibilities; leaving the former in
GetResultsMatchingTerm while moving the latter to
GetResultsMatchingTerms.

Additionally, this CL creates a helper method
TitledUrlIndex::ExtractIndexTerms extracting some of the logic
duplicated in Add and Remove.

This is a refactor with no behavior change.

Bug: 1129524
Change-Id: Icc8325fd359c2fb856d22980e0e3dd80f0e84097
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2476723
Commit-Queue: manuk hovanesian <manukh@chromium.org>
Reviewed-by: default avatarMikel Astiz <mastiz@chromium.org>
Cr-Commit-Position: refs/heads/master@{#822343}
parent e977adb7
......@@ -60,25 +60,13 @@ void TitledUrlIndex::SetNodeSorter(
}
void TitledUrlIndex::Add(const TitledUrlNode* node) {
std::vector<base::string16> terms =
ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle()));
for (size_t i = 0; i < terms.size(); ++i)
RegisterNode(terms[i], node);
terms = ExtractQueryWords(
CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr));
for (size_t i = 0; i < terms.size(); ++i)
RegisterNode(terms[i], node);
for (const base::string16& term : ExtractIndexTerms(node))
RegisterNode(term, node);
}
void TitledUrlIndex::Remove(const TitledUrlNode* node) {
std::vector<base::string16> terms =
ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle()));
for (size_t i = 0; i < terms.size(); ++i)
UnregisterNode(terms[i], node);
terms = ExtractQueryWords(
CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr));
for (size_t i = 0; i < terms.size(); ++i)
UnregisterNode(terms[i], node);
for (const base::string16& term : ExtractIndexTerms(node))
UnregisterNode(term, node);
}
std::vector<TitledUrlMatch> TitledUrlIndex::GetResultsMatching(
......@@ -87,16 +75,11 @@ std::vector<TitledUrlMatch> TitledUrlIndex::GetResultsMatching(
query_parser::MatchingAlgorithm matching_algorithm) {
const base::string16 query = Normalize(input_query);
std::vector<base::string16> terms = ExtractQueryWords(query);
if (terms.empty())
return {};
TitledUrlNodeSet matches;
for (size_t i = 0; i < terms.size(); ++i) {
if (!GetResultsMatchingTerm(terms[i], i == 0, matching_algorithm,
&matches)) {
return {};
}
}
TitledUrlNodeSet matches =
RetrieveNodesMatchingAllTerms(terms, matching_algorithm);
if (matches.empty())
return {};
TitledUrlNodes sorted_nodes;
SortMatches(matches, &sorted_nodes);
......@@ -183,49 +166,52 @@ base::Optional<TitledUrlMatch> TitledUrlIndex::MatchTitledUrlNodeWithQuery(
return match;
}
bool TitledUrlIndex::GetResultsMatchingTerm(
TitledUrlIndex::TitledUrlNodeSet TitledUrlIndex::RetrieveNodesMatchingAllTerms(
const std::vector<base::string16>& terms,
query_parser::MatchingAlgorithm matching_algorithm) const {
if (terms.empty())
return {};
TitledUrlNodeSet matches =
RetrieveNodesMatchingTerm(terms[0], matching_algorithm);
for (size_t i = 1; i < terms.size() && !matches.empty(); ++i) {
TitledUrlNodeSet term_matches =
RetrieveNodesMatchingTerm(terms[i], matching_algorithm);
// Compute intersection between the two sets.
base::EraseIf(matches, base::IsNotIn<TitledUrlNodeSet>(term_matches));
}
return matches;
}
TitledUrlIndex::TitledUrlNodeSet TitledUrlIndex::RetrieveNodesMatchingTerm(
const base::string16& term,
bool first_term,
query_parser::MatchingAlgorithm matching_algorithm,
TitledUrlNodeSet* matches) {
query_parser::MatchingAlgorithm matching_algorithm) const {
Index::const_iterator i = index_.lower_bound(term);
if (i == index_.end())
return false;
return {};
if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(
term, matching_algorithm)) {
// Term is too short for prefix match, compare using exact match.
if (i->first != term)
return false; // No title/URL pairs with this term.
return {}; // No title/URL pairs with this term.
return i->second;
}
if (first_term) {
(*matches) = i->second;
return true;
}
base::EraseIf(*matches, base::IsNotIn<TitledUrlNodeSet>(i->second));
} else {
// Loop through index adding all entries that start with term to
// |prefix_matches|.
TitledUrlNodeSet tmp_prefix_matches;
// If this is the first term, then store the result directly in |matches|
// to avoid calling stl intersection (which requires a copy).
TitledUrlNodeSet* prefix_matches =
first_term ? matches : &tmp_prefix_matches;
while (i != index_.end() &&
i->first.size() >= term.size() &&
term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
for (auto n = i->second.begin(); n != i->second.end(); ++n) {
prefix_matches->insert(prefix_matches->end(), *n);
}
++i;
}
if (!first_term) {
base::EraseIf(*matches, base::IsNotIn<TitledUrlNodeSet>(*prefix_matches));
}
// Loop through index adding all entries that start with term to
// |prefix_matches|.
TitledUrlNodes prefix_matches;
while (i != index_.end() && i->first.size() >= term.size() &&
term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
prefix_matches.insert(prefix_matches.end(), i->second.begin(),
i->second.end());
++i;
}
return !matches->empty();
return prefix_matches;
}
// static
std::vector<base::string16> TitledUrlIndex::ExtractQueryWords(
const base::string16& query) {
std::vector<base::string16> terms;
......@@ -238,6 +224,24 @@ std::vector<base::string16> TitledUrlIndex::ExtractQueryWords(
return terms;
}
// static
std::vector<base::string16> TitledUrlIndex::ExtractIndexTerms(
const TitledUrlNode* node) {
std::vector<base::string16> terms;
for (const base::string16& term :
ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle()))) {
terms.push_back(term);
}
for (const base::string16& term : ExtractQueryWords(CleanUpUrlForMatching(
node->GetTitledUrlNodeUrl(), /*adjustments=*/nullptr))) {
terms.push_back(term);
}
return terms;
}
void TitledUrlIndex::RegisterNode(const base::string16& term,
const TitledUrlNode* node) {
index_[term].insert(node);
......
......@@ -12,6 +12,7 @@
#include <vector>
#include "base/containers/flat_set.h"
#include "base/gtest_prod_util.h"
#include "base/macros.h"
#include "base/optional.h"
#include "base/strings/string16.h"
......@@ -31,6 +32,8 @@ struct TitledUrlMatch;
// TitledUrlNodes that contain that string in their title or URL.
class TitledUrlIndex {
public:
using TitledUrlNodeSet = base::flat_set<const TitledUrlNode*>;
// Constructs a TitledUrlIndex. |sorter| is used to construct a sorted list
// of matches when matches are returned from the index. If null, matches are
// returned unsorted.
......@@ -54,9 +57,15 @@ class TitledUrlIndex {
size_t max_count,
query_parser::MatchingAlgorithm matching_algorithm);
// For testing only.
TitledUrlNodeSet RetrieveNodesMatchingAllTermsForTesting(
const std::vector<base::string16>& terms,
query_parser::MatchingAlgorithm matching_algorithm) const {
return RetrieveNodesMatchingAllTerms(terms, matching_algorithm);
}
private:
using TitledUrlNodes = std::vector<const TitledUrlNode*>;
using TitledUrlNodeSet = base::flat_set<const TitledUrlNode*>;
using Index = std::map<base::string16, TitledUrlNodeSet>;
// Constructs |sorted_nodes| by copying the matches in |matches| and sorting
......@@ -71,17 +80,24 @@ class TitledUrlIndex {
query_parser::QueryParser* parser,
const query_parser::QueryNodeVector& query_nodes);
// Populates |matches| for the specified term. If |first_term| is true, this
// is the first term in the query. Returns true if there is at least one node
// matching the term.
bool GetResultsMatchingTerm(
// Return matches for the specified |terms|. This is an intersection of each
// term's matches.
TitledUrlNodeSet RetrieveNodesMatchingAllTerms(
const std::vector<base::string16>& terms,
query_parser::MatchingAlgorithm matching_algorithm) const;
// Return matches for the specified |term|.
TitledUrlNodeSet RetrieveNodesMatchingTerm(
const base::string16& term,
bool first_term,
query_parser::MatchingAlgorithm matching_algorithm,
TitledUrlNodeSet* matches);
query_parser::MatchingAlgorithm matching_algorithm) const;
// Returns the set of query words from |query|.
std::vector<base::string16> ExtractQueryWords(const base::string16& query);
static std::vector<base::string16> ExtractQueryWords(
const base::string16& query);
// Return the index terms for |node|.
static std::vector<base::string16> ExtractIndexTerms(
const TitledUrlNode* node);
// Adds |node| to |index_|.
void RegisterNode(const base::string16& term, const TitledUrlNode* node);
......
......@@ -53,7 +53,7 @@ class BookmarkClientMock : public TestBookmarkClient {
DISALLOW_COPY_AND_ASSIGN(BookmarkClientMock);
};
// Minimalistic implementatio of TitledUrlNode.
// Minimal implementation of TitledUrlNode.
class TestTitledUrlNode : public TitledUrlNode {
public:
TestTitledUrlNode(const base::string16& title, const GURL& url)
......@@ -544,5 +544,36 @@ TEST_F(TitledUrlIndexTest, GetResultsSortedByTypedCount) {
EXPECT_EQ(data[3].url, matches[1].node->GetTitledUrlNodeUrl());
}
TEST_F(TitledUrlIndexTest, RetrieveNodesMatchingAllTerms) {
TitledUrlNode* node =
AddNode("termA termB otherTerm xyz ab", GURL("http://foo.com"));
struct TestData {
const std::string query;
const bool should_be_retrieved;
} data[] = {// Should return matches if all input terms match, even if not all
// node terms match.
{"term other", true},
// Should not match midword.
{"term ther", false},
// Short input terms should only return exact matches.
{"xy", false},
{"ab", true}};
for (const TestData& test_data : data) {
SCOPED_TRACE("Query: " + test_data.query);
std::vector<base::string16> terms = base::SplitString(
base::UTF8ToUTF16(test_data.query), base::UTF8ToUTF16(" "),
base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
auto matches = index()->RetrieveNodesMatchingAllTermsForTesting(
terms, query_parser::MatchingAlgorithm::DEFAULT);
if (test_data.should_be_retrieved) {
EXPECT_TRUE(matches.contains(node));
EXPECT_EQ(matches.size(), 1u);
} else
EXPECT_TRUE(matches.empty());
};
}
} // namespace
} // namespace bookmarks
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment