[omnibox] [bookmark-paths] Clean bookmark matches lookup by term.

Currently, TitledUrlIndex::GetResultsMatchingTerm has 2 responsibilities: - Searches the index for nodes matching a term param. - Updates the aggregate results for all terms. This CL splits these responsibilities; leaving the former in GetResultsMatchingTerm while moving the latter to GetResultsMatchingTerms. Additionally, this CL creates a helper method TitledUrlIndex::ExtractIndexTerms extracting some of the logic duplicated in Add and Remove. This is a refactor with no behavior change. Bug: 1129524 Change-Id: Icc8325fd359c2fb856d22980e0e3dd80f0e84097 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2476723 Commit-Queue: manuk hovanesian <manukh@chromium.org> Reviewed-by: Mikel Astiz <mastiz@chromium.org> Cr-Commit-Position: refs/heads/master@{#822343}

[omnibox] [bookmark-paths] Clean bookmark matches lookup by term.
Currently, TitledUrlIndex::GetResultsMatchingTerm has 2 responsibilities: - Searches the index for nodes matching a term param. - Updates the aggregate results for all terms. This CL splits these responsibilities; leaving the former in GetResultsMatchingTerm while moving the latter to GetResultsMatchingTerms. Additionally, this CL creates a helper method TitledUrlIndex::ExtractIndexTerms extracting some of the logic duplicated in Add and Remove. This is a refactor with no behavior change. Bug: 1129524 Change-Id: Icc8325fd359c2fb856d22980e0e3dd80f0e84097 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2476723 Commit-Queue: manuk hovanesian <manukh@chromium.org> Reviewed-by: Mikel Astiz <mastiz@chromium.org> Cr-Commit-Position: refs/heads/master@{#822343}
c7370b66 · manukh · Commit Bot · e977adb7 · c7370b66 · c7370b66
Commit c7370b66 authored Oct 29, 2020 by manukh Committed by Commit Bot Oct 29, 2020
3 changed files
--- a/components/bookmarks/browser/titled_url_index.cc
+++ b/components/bookmarks/browser/titled_url_index.cc
@@ -60,25 +60,13 @@ void TitledUrlIndex::SetNodeSorter(
 }

 void TitledUrlIndex::Add(const TitledUrlNode* node) {
-  std::vector<base::string16> terms =
-      ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle()));
-  for (size_t i = 0; i < terms.size(); ++i)
-    RegisterNode(terms[i], node);
-  terms = ExtractQueryWords(
-      CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr));
-  for (size_t i = 0; i < terms.size(); ++i)
-    RegisterNode(terms[i], node);
+  for (const base::string16& term : ExtractIndexTerms(node))
+    RegisterNode(term, node);
 }

 void TitledUrlIndex::Remove(const TitledUrlNode* node) {
-  std::vector<base::string16> terms =
-      ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle()));
-  for (size_t i = 0; i < terms.size(); ++i)
-    UnregisterNode(terms[i], node);
-  terms = ExtractQueryWords(
-      CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr));
-  for (size_t i = 0; i < terms.size(); ++i)
-    UnregisterNode(terms[i], node);
+  for (const base::string16& term : ExtractIndexTerms(node))
+    UnregisterNode(term, node);
 }

 std::vector<TitledUrlMatch> TitledUrlIndex::GetResultsMatching(
@@ -87,16 +75,11 @@ std::vector<TitledUrlMatch> TitledUrlIndex::GetResultsMatching(
    query_parser::MatchingAlgorithm matching_algorithm) {
  const base::string16 query = Normalize(input_query);
  std::vector<base::string16> terms = ExtractQueryWords(query);
-  if (terms.empty())
-    return {};

-  TitledUrlNodeSet matches;
-  for (size_t i = 0; i < terms.size(); ++i) {
-    if (!GetResultsMatchingTerm(terms[i], i == 0, matching_algorithm,
-                                &matches)) {
-      return {};
-    }
-  }
+  TitledUrlNodeSet matches =
+      RetrieveNodesMatchingAllTerms(terms, matching_algorithm);
+  if (matches.empty())
+    return {};

  TitledUrlNodes sorted_nodes;
  SortMatches(matches, &sorted_nodes);
@@ -183,49 +166,52 @@ base::Optional<TitledUrlMatch> TitledUrlIndex::MatchTitledUrlNodeWithQuery(
  return match;
 }

-bool TitledUrlIndex::GetResultsMatchingTerm(
+TitledUrlIndex::TitledUrlNodeSet TitledUrlIndex::RetrieveNodesMatchingAllTerms(
+    const std::vector<base::string16>& terms,
+    query_parser::MatchingAlgorithm matching_algorithm) const {
+  if (terms.empty())
+    return {};
+
+  TitledUrlNodeSet matches =
+      RetrieveNodesMatchingTerm(terms[0], matching_algorithm);
+  for (size_t i = 1; i < terms.size() && !matches.empty(); ++i) {
+    TitledUrlNodeSet term_matches =
+        RetrieveNodesMatchingTerm(terms[i], matching_algorithm);
+    // Compute intersection between the two sets.
+    base::EraseIf(matches, base::IsNotIn<TitledUrlNodeSet>(term_matches));
+  }
+
+  return matches;
+}
+
+TitledUrlIndex::TitledUrlNodeSet TitledUrlIndex::RetrieveNodesMatchingTerm(
    const base::string16& term,
-    bool first_term,
-    query_parser::MatchingAlgorithm matching_algorithm,
-    TitledUrlNodeSet* matches) {
+    query_parser::MatchingAlgorithm matching_algorithm) const {
  Index::const_iterator i = index_.lower_bound(term);
  if (i == index_.end())
-    return false;
+    return {};

  if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(
      term, matching_algorithm)) {
    // Term is too short for prefix match, compare using exact match.
    if (i->first != term)
-      return false;  // No title/URL pairs with this term.
+      return {};  // No title/URL pairs with this term.
+    return i->second;
+  }

-    if (first_term) {
-      (*matches) = i->second;
-      return true;
-    }
-    base::EraseIf(*matches, base::IsNotIn<TitledUrlNodeSet>(i->second));
-  } else {
-    // Loop through index adding all entries that start with term to
-    // |prefix_matches|.
-    TitledUrlNodeSet tmp_prefix_matches;
-    // If this is the first term, then store the result directly in |matches|
-    // to avoid calling stl intersection (which requires a copy).
-    TitledUrlNodeSet* prefix_matches =
-        first_term ? matches : &tmp_prefix_matches;
-    while (i != index_.end() &&
-           i->first.size() >= term.size() &&
-           term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
-      for (auto n = i->second.begin(); n != i->second.end(); ++n) {
-        prefix_matches->insert(prefix_matches->end(), *n);
-      }
-      ++i;
-    }
-    if (!first_term) {
-      base::EraseIf(*matches, base::IsNotIn<TitledUrlNodeSet>(*prefix_matches));
-    }
+  // Loop through index adding all entries that start with term to
+  // |prefix_matches|.
+  TitledUrlNodes prefix_matches;
+  while (i != index_.end() && i->first.size() >= term.size() &&
+         term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
+    prefix_matches.insert(prefix_matches.end(), i->second.begin(),
+                          i->second.end());
+    ++i;
  }
-  return !matches->empty();
+  return prefix_matches;
 }

+// static
 std::vector<base::string16> TitledUrlIndex::ExtractQueryWords(
    const base::string16& query) {
  std::vector<base::string16> terms;
@@ -238,6 +224,24 @@ std::vector<base::string16> TitledUrlIndex::ExtractQueryWords(
  return terms;
 }

+// static
+std::vector<base::string16> TitledUrlIndex::ExtractIndexTerms(
+    const TitledUrlNode* node) {
+  std::vector<base::string16> terms;
+
+  for (const base::string16& term :
+       ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle()))) {
+    terms.push_back(term);
+  }
+
+  for (const base::string16& term : ExtractQueryWords(CleanUpUrlForMatching(
+           node->GetTitledUrlNodeUrl(), /*adjustments=*/nullptr))) {
+    terms.push_back(term);
+  }
+
+  return terms;
+}
+
 void TitledUrlIndex::RegisterNode(const base::string16& term,
                                 const TitledUrlNode* node) {
  index_[term].insert(node);

--- a/components/bookmarks/browser/titled_url_index.h
+++ b/components/bookmarks/browser/titled_url_index.h
@@ -12,6 +12,7 @@
 #include <vector>

 #include "base/containers/flat_set.h"
+#include "base/gtest_prod_util.h"
 #include "base/macros.h"
 #include "base/optional.h"
 #include "base/strings/string16.h"
@@ -31,6 +32,8 @@ struct TitledUrlMatch;
 // TitledUrlNodes that contain that string in their title or URL.
 class TitledUrlIndex {
 public:
+  using TitledUrlNodeSet = base::flat_set<const TitledUrlNode*>;
+
  // Constructs a TitledUrlIndex. |sorter| is used to construct a sorted list
  // of matches when matches are returned from the index. If null, matches are
  // returned unsorted.
@@ -54,9 +57,15 @@ class TitledUrlIndex {
      size_t max_count,
      query_parser::MatchingAlgorithm matching_algorithm);

+  // For testing only.
+  TitledUrlNodeSet RetrieveNodesMatchingAllTermsForTesting(
+      const std::vector<base::string16>& terms,
+      query_parser::MatchingAlgorithm matching_algorithm) const {
+    return RetrieveNodesMatchingAllTerms(terms, matching_algorithm);
+  }
+
 private:
  using TitledUrlNodes = std::vector<const TitledUrlNode*>;
-  using TitledUrlNodeSet = base::flat_set<const TitledUrlNode*>;
  using Index = std::map<base::string16, TitledUrlNodeSet>;

  // Constructs |sorted_nodes| by copying the matches in |matches| and sorting
@@ -71,17 +80,24 @@ class TitledUrlIndex {
      query_parser::QueryParser* parser,
      const query_parser::QueryNodeVector& query_nodes);

-  // Populates |matches| for the specified term. If |first_term| is true, this
-  // is the first term in the query. Returns true if there is at least one node
-  // matching the term.
-  bool GetResultsMatchingTerm(
+  // Return matches for the specified |terms|. This is an intersection of each
+  // term's matches.
+  TitledUrlNodeSet RetrieveNodesMatchingAllTerms(
+      const std::vector<base::string16>& terms,
+      query_parser::MatchingAlgorithm matching_algorithm) const;
+
+  // Return matches for the specified |term|.
+  TitledUrlNodeSet RetrieveNodesMatchingTerm(
      const base::string16& term,
-      bool first_term,
-      query_parser::MatchingAlgorithm matching_algorithm,
-      TitledUrlNodeSet* matches);
+      query_parser::MatchingAlgorithm matching_algorithm) const;

  // Returns the set of query words from |query|.
-  std::vector<base::string16> ExtractQueryWords(const base::string16& query);
+  static std::vector<base::string16> ExtractQueryWords(
+      const base::string16& query);
+
+  // Return the index terms for |node|.
+  static std::vector<base::string16> ExtractIndexTerms(
+      const TitledUrlNode* node);

  // Adds |node| to |index_|.
  void RegisterNode(const base::string16& term, const TitledUrlNode* node);

--- a/components/bookmarks/browser/titled_url_index_unittest.cc
+++ b/components/bookmarks/browser/titled_url_index_unittest.cc
@@ -53,7 +53,7 @@ class BookmarkClientMock : public TestBookmarkClient {
  DISALLOW_COPY_AND_ASSIGN(BookmarkClientMock);
 };

-// Minimalistic implementatio of TitledUrlNode.
+// Minimal implementation of TitledUrlNode.
 class TestTitledUrlNode : public TitledUrlNode {
 public:
  TestTitledUrlNode(const base::string16& title, const GURL& url)
@@ -544,5 +544,36 @@ TEST_F(TitledUrlIndexTest, GetResultsSortedByTypedCount) {
  EXPECT_EQ(data[3].url, matches[1].node->GetTitledUrlNodeUrl());
 }

+TEST_F(TitledUrlIndexTest, RetrieveNodesMatchingAllTerms) {
+  TitledUrlNode* node =
+      AddNode("termA termB otherTerm xyz ab", GURL("http://foo.com"));
+
+  struct TestData {
+    const std::string query;
+    const bool should_be_retrieved;
+  } data[] = {// Should return matches if all input terms match, even if not all
+              // node terms match.
+              {"term other", true},
+              // Should not match midword.
+              {"term ther", false},
+              // Short input terms should only return exact matches.
+              {"xy", false},
+              {"ab", true}};
+
+  for (const TestData& test_data : data) {
+    SCOPED_TRACE("Query: " + test_data.query);
+    std::vector<base::string16> terms = base::SplitString(
+        base::UTF8ToUTF16(test_data.query), base::UTF8ToUTF16(" "),
+        base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
+    auto matches = index()->RetrieveNodesMatchingAllTermsForTesting(
+        terms, query_parser::MatchingAlgorithm::DEFAULT);
+    if (test_data.should_be_retrieved) {
+      EXPECT_TRUE(matches.contains(node));
+      EXPECT_EQ(matches.size(), 1u);
+    } else
+      EXPECT_TRUE(matches.empty());
+  };
+}
+
 }  // namespace
 }  // namespace bookmarks