Commit 92134fdb authored by manukh's avatar manukh Committed by Commit Bot

[omnibox] [bookmark-paths] Allow path matching in TitledUrlIndex lookup.

Currently, bookmark suggestions are limited to title & URL matches. E.g
the input 'planets jupiter' won't suggest a bookmark titled 'jupiter' in
a directory 'planets'.

This is the 4th of 5 CLs implementing bookmark-paths which will allow
inputs to match bookmark paths (without contributing to the bookmark
suggestion's score).

This CL updates TitledUrlIndex::GetResultsMatching to (optionally)
include bookmarks that match the path. This won't include bookmarks that
*only* match the path. E.g., this would allow the bookmark 'path/title'
to match the query 'path title' but not the query 'path'.

Bug: 1129524
Change-Id: I745009139dde14c49e7a958ff5a105d0b39bdeb7
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2441153
Commit-Queue: manuk hovanesian <manukh@chromium.org>
Reviewed-by: default avatarMikel Astiz <mastiz@chromium.org>
Cr-Commit-Position: refs/heads/master@{#824549}
parent 8047aa45
......@@ -721,14 +721,15 @@ void BookmarkModel::ResetDateFolderModified(const BookmarkNode* node) {
std::vector<TitledUrlMatch> BookmarkModel::GetBookmarksMatching(
const base::string16& query,
size_t max_count,
query_parser::MatchingAlgorithm matching_algorithm) {
query_parser::MatchingAlgorithm matching_algorithm,
bool match_ancestor_titles) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (!loaded_)
return {};
return titled_url_index_->GetResultsMatching(query, max_count,
matching_algorithm);
return titled_url_index_->GetResultsMatching(
query, max_count, matching_algorithm, match_ancestor_titles);
}
void BookmarkModel::ClearStore() {
......
......@@ -244,13 +244,15 @@ class BookmarkModel : public BookmarkUndoProvider,
// combobox of most recently modified folders.
void ResetDateFolderModified(const BookmarkNode* node);
// Returns up to |max_count| of bookmarks containing each term from |query|
// in either the title or the URL. |matching_algorithm| determines
// the algorithm used by QueryParser internally to parse |query|.
// Returns up to |max_count| bookmarks containing each term from |query| in
// either the title, URL, or, if |match_ancestor_titles| is true, the titles
// of ancestors. |matching_algorithm| determines the algorithm used by
// QueryParser internally to parse |query|.
std::vector<TitledUrlMatch> GetBookmarksMatching(
const base::string16& query,
size_t max_count,
query_parser::MatchingAlgorithm matching_algorithm);
query_parser::MatchingAlgorithm matching_algorithm,
bool match_ancestor_titles = false);
// Sets the store to NULL, making it so the BookmarkModel does not persist
// any changes to disk. This is only useful during testing to speed up
......
......@@ -9,6 +9,7 @@
#include "base/guid.h"
#include "base/memory/ptr_util.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "components/strings/grit/components_strings.h"
......@@ -118,6 +119,14 @@ const GURL& BookmarkNode::GetTitledUrlNodeUrl() const {
return url_;
}
std::vector<base::StringPiece16> BookmarkNode::GetTitledUrlNodeAncestorTitles()
const {
std::vector<base::StringPiece16> paths;
for (const BookmarkNode* n = this; n->parent(); n = n->parent())
paths.push_back(n->parent()->GetTitle());
return paths;
}
BookmarkNode::BookmarkNode(int64_t id,
const std::string& guid,
const GURL& url,
......
......@@ -12,6 +12,7 @@
#include <string>
#include "base/macros.h"
#include "base/strings/string_piece.h"
#include "base/task/cancelable_task_tracker.h"
#include "base/time/time.h"
#include "components/bookmarks/browser/titled_url_node.h"
......@@ -127,6 +128,8 @@ class BookmarkNode : public ui::TreeNode<BookmarkNode>, public TitledUrlNode {
// TitledUrlNode interface methods.
const base::string16& GetTitledUrlNodeTitle() const override;
const GURL& GetTitledUrlNodeUrl() const override;
std::vector<base::StringPiece16> GetTitledUrlNodeAncestorTitles()
const override;
// TODO(sky): Consider adding last visit time here, it'll greatly simplify
// HistoryContentsProvider.
......@@ -199,7 +202,7 @@ class BookmarkNode : public ui::TreeNode<BookmarkNode>, public TitledUrlNode {
// If not base::CancelableTaskTracker::kBadTaskId, it indicates
// we're loading the
// favicon and the task is tracked by CancelabelTaskTracker.
// favicon and the task is tracked by CancelableTaskTracker.
base::CancelableTaskTracker::TaskId favicon_load_task_id_ =
base::CancelableTaskTracker::kBadTaskId;
......
......@@ -72,12 +72,19 @@ void TitledUrlIndex::Remove(const TitledUrlNode* node) {
std::vector<TitledUrlMatch> TitledUrlIndex::GetResultsMatching(
const base::string16& input_query,
size_t max_count,
query_parser::MatchingAlgorithm matching_algorithm) {
query_parser::MatchingAlgorithm matching_algorithm,
bool match_ancestor_titles) {
const base::string16 query = Normalize(input_query);
std::vector<base::string16> terms = ExtractQueryWords(query);
// When |match_ancestor_titles| is true, |matches| shouldn't exclude nodes
// that don't match every query term, as the query terms may match in the
// ancestors. |MatchTitledUrlNodeWithQuery()| below will filter out nodes that
// neither match nor ancestor-match every query term.
TitledUrlNodeSet matches =
RetrieveNodesMatchingAllTerms(terms, matching_algorithm);
match_ancestor_titles
? RetrieveNodesMatchingAnyTerms(terms, matching_algorithm)
: RetrieveNodesMatchingAllTerms(terms, matching_algorithm);
if (matches.empty())
return {};
......@@ -99,8 +106,8 @@ std::vector<TitledUrlMatch> TitledUrlIndex::GetResultsMatching(
std::vector<TitledUrlMatch> results;
for (TitledUrlNodes::const_iterator i = sorted_nodes.begin();
i != sorted_nodes.end() && results.size() < max_count; ++i) {
base::Optional<TitledUrlMatch> match =
MatchTitledUrlNodeWithQuery(*i, &parser, query_nodes);
base::Optional<TitledUrlMatch> match = MatchTitledUrlNodeWithQuery(
*i, &parser, query_nodes, match_ancestor_titles);
if (match)
results.push_back(match.value());
}
......@@ -108,7 +115,7 @@ std::vector<TitledUrlMatch> TitledUrlIndex::GetResultsMatching(
}
void TitledUrlIndex::SortMatches(const TitledUrlNodeSet& matches,
TitledUrlNodes* sorted_nodes) const {
TitledUrlNodes* sorted_nodes) const {
if (sorter_) {
sorter_->SortMatches(matches, sorted_nodes);
} else {
......@@ -119,7 +126,8 @@ void TitledUrlIndex::SortMatches(const TitledUrlNodeSet& matches,
base::Optional<TitledUrlMatch> TitledUrlIndex::MatchTitledUrlNodeWithQuery(
const TitledUrlNode* node,
query_parser::QueryParser* parser,
const query_parser::QueryNodeVector& query_nodes) {
const query_parser::QueryNodeVector& query_nodes,
bool match_ancestor_titles) {
if (!node) {
return base::nullopt;
}
......@@ -128,7 +136,7 @@ base::Optional<TitledUrlMatch> TitledUrlIndex::MatchTitledUrlNodeWithQuery(
// of QueryParser may filter it out. For example, the query
// ["thi"] will match the title [Thinking], but since
// ["thi"] is quoted we don't want to do a prefix match.
query_parser::QueryWordVector title_words, url_words;
query_parser::QueryWordVector title_words, url_words, ancestor_words;
const base::string16 lower_title =
base::i18n::ToLower(Normalize(node->GetTitledUrlNodeTitle()));
parser->ExtractQueryWords(lower_title, &title_words);
......@@ -136,16 +144,30 @@ base::Optional<TitledUrlMatch> TitledUrlIndex::MatchTitledUrlNodeWithQuery(
parser->ExtractQueryWords(
CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), &adjustments),
&url_words);
if (match_ancestor_titles) {
for (auto ancestor : node->GetTitledUrlNodeAncestorTitles()) {
parser->ExtractQueryWords(
base::i18n::ToLower(Normalize(base::string16(ancestor))),
&ancestor_words);
}
}
query_parser::Snippet::MatchPositions title_matches, url_matches;
bool query_has_ancestor_matches = false;
for (const auto& node : query_nodes) {
const bool has_title_matches =
node->HasMatchIn(title_words, &title_matches);
const bool has_url_matches = node->HasMatchIn(url_words, &url_matches);
if (!has_title_matches && !has_url_matches)
const bool has_ancestor_matches =
match_ancestor_titles && node->HasMatchIn(ancestor_words);
query_has_ancestor_matches =
query_has_ancestor_matches || has_ancestor_matches;
if (!has_title_matches && !has_url_matches && !has_ancestor_matches)
return base::nullopt;
query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches);
query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches);
}
TitledUrlMatch match;
if (lower_title.length() == node->GetTitledUrlNodeTitle().length()) {
// Only use title matches if the lowercase string is the same length
......@@ -162,6 +184,7 @@ base::Optional<TitledUrlMatch> TitledUrlIndex::MatchTitledUrlNodeWithQuery(
url_matches =
TitledUrlMatch::ReplaceOffsetsInMatchPositions(url_matches, offsets);
match.url_match_positions.swap(url_matches);
match.has_ancestor_match = query_has_ancestor_matches;
match.node = node;
return match;
}
......@@ -184,7 +207,25 @@ TitledUrlIndex::TitledUrlNodeSet TitledUrlIndex::RetrieveNodesMatchingAllTerms(
return matches;
}
TitledUrlIndex::TitledUrlNodeSet TitledUrlIndex::RetrieveNodesMatchingTerm(
TitledUrlIndex::TitledUrlNodeSet TitledUrlIndex::RetrieveNodesMatchingAnyTerms(
const std::vector<base::string16>& terms,
query_parser::MatchingAlgorithm matching_algorithm) const {
if (terms.empty())
return {};
TitledUrlNodes matches =
RetrieveNodesMatchingTerm(terms[0], matching_algorithm);
for (size_t i = 1; i < terms.size(); ++i) {
TitledUrlNodes term_matches =
RetrieveNodesMatchingTerm(terms[i], matching_algorithm);
std::copy(term_matches.begin(), term_matches.end(),
std::back_inserter(matches));
}
return TitledUrlNodeSet(matches);
}
TitledUrlIndex::TitledUrlNodes TitledUrlIndex::RetrieveNodesMatchingTerm(
const base::string16& term,
query_parser::MatchingAlgorithm matching_algorithm) const {
Index::const_iterator i = index_.lower_bound(term);
......@@ -196,7 +237,7 @@ TitledUrlIndex::TitledUrlNodeSet TitledUrlIndex::RetrieveNodesMatchingTerm(
// Term is too short for prefix match, compare using exact match.
if (i->first != term)
return {}; // No title/URL pairs with this term.
return i->second;
return TitledUrlNodes(i->second.begin(), i->second.end());
}
// Loop through index adding all entries that start with term to
......
......@@ -12,7 +12,6 @@
#include <vector>
#include "base/containers/flat_set.h"
#include "base/gtest_prod_util.h"
#include "base/macros.h"
#include "base/optional.h"
#include "base/strings/string16.h"
......@@ -50,12 +49,14 @@ class TitledUrlIndex {
void Remove(const TitledUrlNode* node);
// Returns up to |max_count| of matches containing each term from the text
// |query| in either the title or the URL. |matching_algorithm| determines
// the algorithm used by QueryParser internally to parse |query|.
// |query| in either the title, URL, or, if |match_ancestor_titles| is true,
// the titles of ancestor nodes. |matching_algorithm| determines the algorithm
// used by QueryParser internally to parse |query|.
std::vector<TitledUrlMatch> GetResultsMatching(
const base::string16& query,
size_t max_count,
query_parser::MatchingAlgorithm matching_algorithm);
query_parser::MatchingAlgorithm matching_algorithm,
bool match_ancestor_titles);
// For testing only.
TitledUrlNodeSet RetrieveNodesMatchingAllTermsForTesting(
......@@ -64,6 +65,13 @@ class TitledUrlIndex {
return RetrieveNodesMatchingAllTerms(terms, matching_algorithm);
}
// For testing only.
TitledUrlNodeSet RetrieveNodesMatchingAnyTermsForTesting(
const std::vector<base::string16>& terms,
query_parser::MatchingAlgorithm matching_algorithm) const {
return RetrieveNodesMatchingAnyTerms(terms, matching_algorithm);
}
private:
using TitledUrlNodes = std::vector<const TitledUrlNode*>;
using Index = std::map<base::string16, TitledUrlNodeSet>;
......@@ -78,7 +86,8 @@ class TitledUrlIndex {
base::Optional<TitledUrlMatch> MatchTitledUrlNodeWithQuery(
const TitledUrlNode* node,
query_parser::QueryParser* parser,
const query_parser::QueryNodeVector& query_nodes);
const query_parser::QueryNodeVector& query_nodes,
bool match_ancestor_titles);
// Return matches for the specified |terms|. This is an intersection of each
// term's matches.
......@@ -86,8 +95,12 @@ class TitledUrlIndex {
const std::vector<base::string16>& terms,
query_parser::MatchingAlgorithm matching_algorithm) const;
// Return matches for the specified |term|.
TitledUrlNodeSet RetrieveNodesMatchingTerm(
TitledUrlNodeSet RetrieveNodesMatchingAnyTerms(
const std::vector<base::string16>& terms,
query_parser::MatchingAlgorithm matching_algorithm) const;
// Return matches for the specified |term|. May return duplicates.
TitledUrlNodes RetrieveNodesMatchingTerm(
const base::string16& term,
query_parser::MatchingAlgorithm matching_algorithm) const;
......
......@@ -10,6 +10,7 @@
#include "base/stl_util.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
......@@ -56,8 +57,10 @@ class BookmarkClientMock : public TestBookmarkClient {
// Minimal implementation of TitledUrlNode.
class TestTitledUrlNode : public TitledUrlNode {
public:
TestTitledUrlNode(const base::string16& title, const GURL& url)
: title_(title), url_(url) {}
TestTitledUrlNode(const base::string16& title,
const GURL& url,
const base::string16& ancestor_title)
: title_(title), url_(url), ancestor_title_(ancestor_title) {}
~TestTitledUrlNode() override = default;
......@@ -67,9 +70,15 @@ class TestTitledUrlNode : public TitledUrlNode {
const GURL& GetTitledUrlNodeUrl() const override { return url_; }
std::vector<base::StringPiece16> GetTitledUrlNodeAncestorTitles()
const override {
return {ancestor_title_};
}
private:
base::string16 title_;
GURL url_;
base::string16 ancestor_title_;
};
class TitledUrlIndexTest : public testing::Test {
......@@ -85,12 +94,18 @@ class TitledUrlIndexTest : public testing::Test {
owned_nodes_.clear();
}
TitledUrlNode* AddNode(const std::string& title, const GURL& url) {
return AddNode(UTF8ToUTF16(title), url);
TitledUrlNode* AddNode(const std::string& title,
const GURL& url,
const std::string& ancestor_title = "") {
return AddNode(UTF8ToUTF16(title), url, UTF8ToUTF16(ancestor_title));
}
TitledUrlNode* AddNode(const base::string16& title, const GURL& url) {
owned_nodes_.push_back(std::make_unique<TestTitledUrlNode>(title, url));
TitledUrlNode* AddNode(
const base::string16& title,
const GURL& url,
const base::string16& ancestor_title = base::string16()) {
owned_nodes_.push_back(
std::make_unique<TestTitledUrlNode>(title, url, ancestor_title));
index_->Add(owned_nodes_.back().get());
return owned_nodes_.back().get();
}
......@@ -100,10 +115,13 @@ class TitledUrlIndexTest : public testing::Test {
AddNode(titles[i], GURL(urls[i]));
}
std::vector<TitledUrlMatch> GetResultsMatching(const std::string& query,
size_t max_count) {
std::vector<TitledUrlMatch> GetResultsMatching(
const std::string& query,
size_t max_count,
bool match_ancestor_titles = false) {
return index_->GetResultsMatching(UTF8ToUTF16(query), max_count,
query_parser::MatchingAlgorithm::DEFAULT);
query_parser::MatchingAlgorithm::DEFAULT,
match_ancestor_titles);
}
void ExpectMatches(const std::string& query,
......@@ -120,7 +138,7 @@ class TitledUrlIndexTest : public testing::Test {
query_parser::MatchingAlgorithm matching_algorithm,
const std::vector<std::string>& expected_titles) {
std::vector<TitledUrlMatch> matches = index_->GetResultsMatching(
UTF8ToUTF16(query), 1000, matching_algorithm);
UTF8ToUTF16(query), 1000, matching_algorithm, false);
ASSERT_EQ(expected_titles.size(), matches.size());
for (const std::string& expected_title : expected_titles) {
bool found = false;
......@@ -568,8 +586,86 @@ TEST_F(TitledUrlIndexTest, RetrieveNodesMatchingAllTerms) {
auto matches = index()->RetrieveNodesMatchingAllTermsForTesting(
terms, query_parser::MatchingAlgorithm::DEFAULT);
if (test_data.should_be_retrieved) {
EXPECT_EQ(matches.size(), 1u);
EXPECT_TRUE(matches.contains(node));
} else
EXPECT_TRUE(matches.empty());
};
}
TEST_F(TitledUrlIndexTest, RetrieveNodesMatchingAnyTerms) {
TitledUrlNode* node =
AddNode("termA termB otherTerm xyz ab", GURL("http://foo.com"));
struct TestData {
const std::string query;
const bool should_be_retrieved;
} data[] = {// Should return matches if any input terms match, even if not all
// node terms match.
{"term not", true},
// Should not return duplicate matches.
{"term termA termB", true},
// Should not early exit when there are no intermediate matches.
{"not term", true},
// Should not match midword.
{"ther", false},
// Short input terms should only return exact matches.
{"xy", false},
{"ab", true}};
for (const TestData& test_data : data) {
SCOPED_TRACE("Query: " + test_data.query);
std::vector<base::string16> terms = base::SplitString(
base::UTF8ToUTF16(test_data.query), base::UTF8ToUTF16(" "),
base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
auto matches = index()->RetrieveNodesMatchingAnyTermsForTesting(
terms, query_parser::MatchingAlgorithm::DEFAULT);
if (test_data.should_be_retrieved) {
EXPECT_EQ(matches.size(), 1u);
EXPECT_TRUE(matches.contains(node));
} else
EXPECT_TRUE(matches.empty());
};
}
TEST_F(TitledUrlIndexTest, GetResultsMatchingAncestors) {
TitledUrlNode* node = AddNode("leaf pare", GURL("http://foo.com"), "parent");
struct TestData {
const std::string query;
const bool match_ancestor_titles;
const bool should_be_retrieved;
const bool should_have_ancestor_match;
} data[] = {
// Should exclude matches with ancestor matches when
// |match_ancestor_titles| is false.
{"leaf parent", false, false, false},
// Should allow ancestor matches when |match_ancestor_titles| is true.
{"leaf parent", true, true, true},
// Should not early exit when there are no accumulated
// non-ancestor matches.
{"parent leaf", true, true, true},
// Should still require at least 1 non-ancestor match when
// |match_ancestor_titles| is true.
{"parent", true, false, false},
// Should set |has_ancestor_match| to true even if a term matched
// both an ancestor and title/URL.
{"pare", true, true, true},
// Short inputs should only match exact title or ancestor terms.
{"pa", true, false, false},
// Should not return matches if a term matches neither the title
// nor ancestor.
{"term not parent", true, false, false}};
for (const TestData& test_data : data) {
SCOPED_TRACE("Query: " + test_data.query);
auto matches = GetResultsMatching(test_data.query, 10,
test_data.match_ancestor_titles);
if (test_data.should_be_retrieved) {
EXPECT_EQ(matches.size(), 1u);
EXPECT_EQ(matches[0].node, node);
EXPECT_EQ(matches[0].has_ancestor_match,
test_data.should_have_ancestor_match);
} else
EXPECT_TRUE(matches.empty());
};
......
......@@ -44,6 +44,9 @@ struct TitledUrlMatch {
// Location of the matching words in the URL of the node.
MatchPositions url_match_positions;
// Whether there was at least 1 match in the titles of ancestors of the node.
bool has_ancestor_match;
};
} // namespace bookmarks
......
......@@ -5,6 +5,8 @@
#ifndef COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_NODE_H_
#define COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_NODE_H_
#include "base/containers/span.h"
#include "base/strings/string_piece.h"
#include "url/gurl.h"
namespace bookmarks {
......@@ -20,6 +22,11 @@ class TitledUrlNode {
// Returns the URL for the node.
virtual const GURL& GetTitledUrlNodeUrl() const = 0;
// Returns the titles of this node's ancestors ordered from child to parent.
// If |include_self| is true, will include its own title as well.
virtual std::vector<base::StringPiece16> GetTitledUrlNodeAncestorTitles()
const = 0;
protected:
virtual ~TitledUrlNode() {}
};
......
......@@ -7,6 +7,7 @@
#include <memory>
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_piece.h"
#include "base/strings/utf_string_conversions.h"
#include "components/bookmarks/browser/titled_url_match.h"
#include "components/bookmarks/browser/titled_url_node.h"
......@@ -44,6 +45,10 @@ class MockTitledUrlNode : public bookmarks::TitledUrlNode {
return title_;
}
const GURL& GetTitledUrlNodeUrl() const override { return url_; }
std::vector<base::StringPiece16> GetTitledUrlNodeAncestorTitles()
const override {
return {};
}
private:
base::string16 title_;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment