Commit 02f304ba authored by mpearson's avatar mpearson Committed by Commit bot

Omnibox: BookmarksProvider: Make Multiple Prefix Matches Work

This code significantly simplies the logic for matching.

Indeed, the old logic was buggy because multiple prefix matches could
results in explosive growth of the list of matches (even if only one
bookmark matches).  Reviewer, I would avoid reading and trying to
understand what CombineMatches() and the code that calls it is currently
broken.  See the second test case I added.  Before this
change, that test never terminates (in the time I waited) and explodes
the machine's memory.  Luckily this was rare; there are few reports on
crbug.

It's possible this new code is slower or faster than before (when the
old code behaved correctly).  I plan to look at the
Omnibox.Providertime.Bookmarks numbers before and after this change.

TBR=sky for owners stamp; review it if you feel like

BUG=450850,434604

Review URL: https://codereview.chromium.org/882823004

Cr-Commit-Position: refs/heads/master@{#313574}
parent d9bb3b78
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "base/i18n/case_conversion.h" #include "base/i18n/case_conversion.h"
#include "base/logging.h" #include "base/logging.h"
#include "base/stl_util.h"
#include "base/strings/utf_offset_string_conversions.h" #include "base/strings/utf_offset_string_conversions.h"
#include "components/bookmarks/browser/bookmark_client.h" #include "components/bookmarks/browser/bookmark_client.h"
#include "components/bookmarks/browser/bookmark_match.h" #include "components/bookmarks/browser/bookmark_match.h"
...@@ -62,40 +63,6 @@ struct NodeTypedCountPairExtractNodeFunctor ...@@ -62,40 +63,6 @@ struct NodeTypedCountPairExtractNodeFunctor
} // namespace } // namespace
// Used when finding the set of bookmarks that match a query. Each match
// represents a set of terms (as an interator into the Index) matching the
// query as well as the set of nodes that contain those terms in their titles.
struct BookmarkIndex::Match {
// List of terms matching the query.
std::list<Index::const_iterator> terms;
// The set of nodes matching the terms. As an optimization this is empty
// when we match only one term, and is filled in when we get more than one
// term. We can do this as when we have only one matching term we know
// the set of matching nodes is terms.front()->second.
//
// Use nodes_begin() and nodes_end() to get an iterator over the set as
// it handles the necessary switching between nodes and terms.front().
NodeSet nodes;
// Returns an iterator to the beginning of the matching nodes. See
// description of nodes for why this should be used over nodes.begin().
NodeSet::const_iterator nodes_begin() const;
// Returns an iterator to the beginning of the matching nodes. See
// description of nodes for why this should be used over nodes.end().
NodeSet::const_iterator nodes_end() const;
};
BookmarkIndex::NodeSet::const_iterator
BookmarkIndex::Match::nodes_begin() const {
return nodes.empty() ? terms.front()->second.begin() : nodes.begin();
}
BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const {
return nodes.empty() ? terms.front()->second.end() : nodes.end();
}
BookmarkIndex::BookmarkIndex(BookmarkClient* client, BookmarkIndex::BookmarkIndex(BookmarkClient* client,
const std::string& languages) const std::string& languages)
: client_(client), : client_(client),
...@@ -143,7 +110,7 @@ void BookmarkIndex::GetBookmarksMatching( ...@@ -143,7 +110,7 @@ void BookmarkIndex::GetBookmarksMatching(
if (terms.empty()) if (terms.empty())
return; return;
Matches matches; NodeSet matches;
for (size_t i = 0; i < terms.size(); ++i) { for (size_t i = 0; i < terms.size(); ++i) {
if (!GetBookmarksMatchingTerm( if (!GetBookmarksMatchingTerm(
terms[i], i == 0, matching_algorithm, &matches)) { terms[i], i == 0, matching_algorithm, &matches)) {
...@@ -172,23 +139,12 @@ void BookmarkIndex::GetBookmarksMatching( ...@@ -172,23 +139,12 @@ void BookmarkIndex::GetBookmarksMatching(
AddMatchToResults(*i, &parser, query_nodes.get(), results); AddMatchToResults(*i, &parser, query_nodes.get(), results);
} }
void BookmarkIndex::SortMatches(const Matches& matches, void BookmarkIndex::SortMatches(const NodeSet& matches,
Nodes* sorted_nodes) const { Nodes* sorted_nodes) const {
NodeSet nodes; sorted_nodes->reserve(matches.size());
for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) {
#if !defined(OS_ANDROID)
nodes.insert(i->nodes_begin(), i->nodes_end());
#else
// Work around a bug in the implementation of std::set::insert in the STL
// used on android (http://crbug.com/367050).
for (NodeSet::const_iterator n = i->nodes_begin(); n != i->nodes_end(); ++n)
nodes.insert(nodes.end(), *n);
#endif
}
sorted_nodes->reserve(sorted_nodes->size() + nodes.size());
if (client_->SupportsTypedCountForNodes()) { if (client_->SupportsTypedCountForNodes()) {
NodeTypedCountPairs node_typed_counts; NodeTypedCountPairs node_typed_counts;
client_->GetTypedCountForNodes(nodes, &node_typed_counts); client_->GetTypedCountForNodes(matches, &node_typed_counts);
std::sort(node_typed_counts.begin(), std::sort(node_typed_counts.begin(),
node_typed_counts.end(), node_typed_counts.end(),
NodeTypedCountPairSortFunctor()); NodeTypedCountPairSortFunctor());
...@@ -197,7 +153,7 @@ void BookmarkIndex::SortMatches(const Matches& matches, ...@@ -197,7 +153,7 @@ void BookmarkIndex::SortMatches(const Matches& matches,
std::back_inserter(*sorted_nodes), std::back_inserter(*sorted_nodes),
NodeTypedCountPairExtractNodeFunctor()); NodeTypedCountPairExtractNodeFunctor());
} else { } else {
sorted_nodes->insert(sorted_nodes->end(), nodes.begin(), nodes.end()); sorted_nodes->insert(sorted_nodes->end(), matches.begin(), matches.end());
} }
} }
...@@ -254,7 +210,7 @@ bool BookmarkIndex::GetBookmarksMatchingTerm( ...@@ -254,7 +210,7 @@ bool BookmarkIndex::GetBookmarksMatchingTerm(
const base::string16& term, const base::string16& term,
bool first_term, bool first_term,
query_parser::MatchingAlgorithm matching_algorithm, query_parser::MatchingAlgorithm matching_algorithm,
Matches* matches) { NodeSet* matches) {
Index::const_iterator i = index_.lower_bound(term); Index::const_iterator i = index_.lower_bound(term);
if (i == index_.end()) if (i == index_.end())
return false; return false;
...@@ -266,75 +222,37 @@ bool BookmarkIndex::GetBookmarksMatchingTerm( ...@@ -266,75 +222,37 @@ bool BookmarkIndex::GetBookmarksMatchingTerm(
return false; // No bookmarks with this term. return false; // No bookmarks with this term.
if (first_term) { if (first_term) {
Match match; (*matches) = i->second;
match.terms.push_back(i);
matches->push_back(match);
return true; return true;
} }
CombineMatchesInPlace(i, matches); *matches = base::STLSetIntersection<NodeSet>(i->second, *matches);
} else if (first_term) {
// This is the first term and we're doing a prefix match. Loop through
// index adding all entries that start with term to matches.
while (i != index_.end() &&
i->first.size() >= term.size() &&
term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
Match match;
match.terms.push_back(i);
matches->push_back(match);
++i;
}
} else { } else {
// Prefix match and not the first term. Loop through index combining // Loop through index adding all entries that start with term to
// current matches in matches with term, placing result in result. // |prefix_matches|.
Matches result; NodeSet tmp_prefix_matches;
// If this is the first term, then store the result directly in |matches|
// to avoid calling stl intersection (which requires a copy).
NodeSet* prefix_matches = first_term ? matches : &tmp_prefix_matches;
while (i != index_.end() && while (i != index_.end() &&
i->first.size() >= term.size() && i->first.size() >= term.size() &&
term.compare(0, term.size(), i->first, 0, term.size()) == 0) { term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
CombineMatches(i, *matches, &result); #if !defined(OS_ANDROID)
prefix_matches->insert(i->second.begin(), i->second.end());
#else
// Work around a bug in the implementation of std::set::insert in the STL
// used on android (http://crbug.com/367050).
for (NodeSet::const_iterator n = i->second.begin(); n != i->second.end();
++n)
prefix_matches->insert(prefix_matches->end(), *n);
#endif
++i; ++i;
} }
matches->swap(result); if (!first_term)
*matches = base::STLSetIntersection<NodeSet>(*prefix_matches, *matches);
} }
return !matches->empty(); return !matches->empty();
} }
void BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator& index_i,
Matches* matches) {
for (size_t i = 0; i < matches->size(); ) {
Match* match = &((*matches)[i]);
NodeSet intersection;
std::set_intersection(match->nodes_begin(), match->nodes_end(),
index_i->second.begin(), index_i->second.end(),
std::inserter(intersection, intersection.begin()));
if (intersection.empty()) {
matches->erase(matches->begin() + i);
} else {
match->terms.push_back(index_i);
match->nodes.swap(intersection);
++i;
}
}
}
void BookmarkIndex::CombineMatches(const Index::const_iterator& index_i,
const Matches& current_matches,
Matches* result) {
for (size_t i = 0; i < current_matches.size(); ++i) {
const Match& match = current_matches[i];
NodeSet intersection;
std::set_intersection(match.nodes_begin(), match.nodes_end(),
index_i->second.begin(), index_i->second.end(),
std::inserter(intersection, intersection.begin()));
if (!intersection.empty()) {
result->push_back(Match());
Match& combined_match = result->back();
combined_match.terms = match.terms;
combined_match.terms.push_back(index_i);
combined_match.nodes.swap(intersection);
}
}
}
std::vector<base::string16> BookmarkIndex::ExtractQueryWords( std::vector<base::string16> BookmarkIndex::ExtractQueryWords(
const base::string16& query) { const base::string16& query) {
std::vector<base::string16> terms; std::vector<base::string16> terms;
......
...@@ -53,12 +53,10 @@ class BookmarkIndex { ...@@ -53,12 +53,10 @@ class BookmarkIndex {
typedef std::set<const BookmarkNode*> NodeSet; typedef std::set<const BookmarkNode*> NodeSet;
typedef std::map<base::string16, NodeSet> Index; typedef std::map<base::string16, NodeSet> Index;
struct Match; // Constructs |sorted_nodes| by taking the matches in |matches| and sorting
typedef std::vector<Match> Matches; // them in decreasing order of typed count (if supported by the client) and
// deduping them.
// Extracts |matches.nodes| into Nodes, sorts the pairs in decreasing order of void SortMatches(const NodeSet& matches, Nodes* sorted_nodes) const;
// typed count (if supported by the client), and then de-dupes the matches.
void SortMatches(const Matches& matches, Nodes* sorted_nodes) const;
// Add |node| to |results| if the node matches the query. // Add |node| to |results| if the node matches the query.
void AddMatchToResults( void AddMatchToResults(
...@@ -74,28 +72,7 @@ class BookmarkIndex { ...@@ -74,28 +72,7 @@ class BookmarkIndex {
const base::string16& term, const base::string16& term,
bool first_term, bool first_term,
query_parser::MatchingAlgorithm matching_algorithm, query_parser::MatchingAlgorithm matching_algorithm,
Matches* matches); NodeSet* matches);
// Iterates over |matches| updating each Match's nodes to contain the
// intersection of the Match's current nodes and the nodes at |index_i|.
// If the intersection is empty, the Match is removed.
//
// This is invoked from GetBookmarksMatchingTerm.
void CombineMatchesInPlace(const Index::const_iterator& index_i,
Matches* matches);
// Iterates over |current_matches| calculating the intersection between the
// Match's nodes and the nodes at |index_i|. If the intersection between the
// two is non-empty, a new match is added to |result|.
//
// This differs from CombineMatchesInPlace in that if the intersection is
// non-empty the result is added to result, not combined in place. This
// variant is used for prefix matching.
//
// This is invoked from GetBookmarksMatchingTerm.
void CombineMatches(const Index::const_iterator& index_i,
const Matches& current_matches,
Matches* result);
// Returns the set of query words from |query|. // Returns the set of query words from |query|.
std::vector<base::string16> ExtractQueryWords(const base::string16& query); std::vector<base::string16> ExtractQueryWords(const base::string16& query);
......
...@@ -153,6 +153,9 @@ TEST_F(BookmarkIndexTest, GetBookmarksMatching) { ...@@ -153,6 +153,9 @@ TEST_F(BookmarkIndexTest, GetBookmarksMatching) {
// Trivial test case of only one term, exact match. // Trivial test case of only one term, exact match.
{ "a;b", "A", "a" }, { "a;b", "A", "a" },
// Two terms, exact matches.
{ "a b;b", "a b", "a b" },
// Prefix match, one term. // Prefix match, one term.
{ "abcd;abc;b", "abc", "abcd;abc" }, { "abcd;abc;b", "abc", "abcd;abc" },
...@@ -176,13 +179,17 @@ TEST_F(BookmarkIndexTest, GetBookmarksMatching) { ...@@ -176,13 +179,17 @@ TEST_F(BookmarkIndexTest, GetBookmarksMatching) {
// Prefix matches against multiple candidates. // Prefix matches against multiple candidates.
{ "abc1 abc2 abc3 abc4", "abc", "abc1 abc2 abc3 abc4"}, { "abc1 abc2 abc3 abc4", "abc", "abc1 abc2 abc3 abc4"},
// Multiple prefix matches (with a lot of redundancy) against multiple
// candidates.
{ "abc1 abc2 abc3 abc4 def1 def2 def3 def4",
"abc def abc def abc def abc def abc def",
"abc1 abc2 abc3 abc4 def1 def2 def3 def4"},
// Prefix match on the first term. // Prefix match on the first term.
{ "abc", "a", "" }, { "abc", "a", "" },
// Prefix match on subsequent terms. // Prefix match on subsequent terms.
{ "abc def", "abc d", "" }, { "abc def", "abc d", "" },
}; };
for (size_t i = 0; i < arraysize(data); ++i) { for (size_t i = 0; i < arraysize(data); ++i) {
std::vector<std::string> titles; std::vector<std::string> titles;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment