Commit f1a7a373 authored by kkimlabs's avatar kkimlabs Committed by Commit bot

Allow systematic prefix search in bookmarks.

This CL allows the bookmark model's search to always use prefix search, even
when terms are too short.

BUG=415774

Review URL: https://codereview.chromium.org/703553002

Cr-Commit-Position: refs/heads/master@{#302598}
parent bb0f3e6a
......@@ -133,9 +133,11 @@ void BookmarkIndex::Remove(const BookmarkNode* node) {
UnregisterNode(terms[i], node);
}
void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query,
size_t max_count,
std::vector<BookmarkMatch>* results) {
void BookmarkIndex::GetBookmarksMatching(
const base::string16& input_query,
size_t max_count,
query_parser::MatchingAlgorithm matching_algorithm,
std::vector<BookmarkMatch>* results) {
const base::string16 query = Normalize(input_query);
std::vector<base::string16> terms = ExtractQueryWords(query);
if (terms.empty())
......@@ -143,8 +145,10 @@ void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query,
Matches matches;
for (size_t i = 0; i < terms.size(); ++i) {
if (!GetBookmarksMatchingTerm(terms[i], i == 0, &matches))
if (!GetBookmarksMatchingTerm(
terms[i], i == 0, matching_algorithm, &matches)) {
return;
}
}
Nodes sorted_nodes;
......@@ -155,7 +159,7 @@ void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query,
// matches and so this shouldn't be performance critical.
query_parser::QueryParser parser;
ScopedVector<query_parser::QueryNode> query_nodes;
parser.ParseQueryNodes(query, &query_nodes.get());
parser.ParseQueryNodes(query, matching_algorithm, &query_nodes.get());
// The highest typed counts should be at the beginning of the results vector
// so that the best matches will always be included in the results. The loop
......@@ -246,14 +250,17 @@ void BookmarkIndex::AddMatchToResults(
results->push_back(match);
}
bool BookmarkIndex::GetBookmarksMatchingTerm(const base::string16& term,
bool first_term,
Matches* matches) {
bool BookmarkIndex::GetBookmarksMatchingTerm(
const base::string16& term,
bool first_term,
query_parser::MatchingAlgorithm matching_algorithm,
Matches* matches) {
Index::const_iterator i = index_.lower_bound(term);
if (i == index_.end())
return false;
if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(term)) {
if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(
term, matching_algorithm)) {
// Term is too short for prefix match, compare using exact match.
if (i->first != term)
return false; // No bookmarks with this term.
......@@ -334,7 +341,9 @@ std::vector<base::string16> BookmarkIndex::ExtractQueryWords(
if (query.empty())
return std::vector<base::string16>();
query_parser::QueryParser parser;
parser.ParseQueryWords(base::i18n::ToLower(query), &terms);
parser.ParseQueryWords(base::i18n::ToLower(query),
query_parser::MatchingAlgorithm::DEFAULT,
&terms);
return terms;
}
......
......@@ -41,12 +41,12 @@ class BookmarkIndex {
// Invoked when a bookmark has been removed from the model.
void Remove(const BookmarkNode* node);
// Returns up to |max_count| of bookmarks containing each term from
// the text |query| in either the title or the URL.
void GetBookmarksMatching(
const base::string16& query,
size_t max_count,
std::vector<BookmarkMatch>* results);
// Returns up to |max_count| of bookmarks containing each term from the text
// |query| in either the title or the URL.
void GetBookmarksMatching(const base::string16& query,
size_t max_count,
query_parser::MatchingAlgorithm matching_algorithm,
std::vector<BookmarkMatch>* results);
private:
typedef std::vector<const BookmarkNode*> Nodes;
......@@ -70,9 +70,11 @@ class BookmarkIndex {
// Populates |matches| for the specified term. If |first_term| is true, this
// is the first term in the query. Returns true if there is at least one node
// matching the term.
bool GetBookmarksMatchingTerm(const base::string16& term,
bool first_term,
Matches* matches);
bool GetBookmarksMatchingTerm(
const base::string16& term,
bool first_term,
query_parser::MatchingAlgorithm matching_algorithm,
Matches* matches);
// Iterates over |matches| updating each Match's nodes to contain the
// intersection of the Match's current nodes and the nodes at |index_i|.
......
......@@ -83,13 +83,16 @@ class BookmarkIndexTest : public testing::Test {
std::vector<std::string> title_vector;
for (size_t i = 0; i < expected_count; ++i)
title_vector.push_back(expected_titles[i]);
ExpectMatches(query, title_vector);
ExpectMatches(query, query_parser::MatchingAlgorithm::DEFAULT,
title_vector);
}
void ExpectMatches(const std::string& query,
query_parser::MatchingAlgorithm matching_algorithm,
const std::vector<std::string>& expected_titles) {
std::vector<BookmarkMatch> matches;
model_->GetBookmarksMatching(ASCIIToUTF16(query), 1000, &matches);
model_->GetBookmarksMatching(ASCIIToUTF16(query), 1000, matching_algorithm,
&matches);
ASSERT_EQ(expected_titles.size(), matches.size());
for (size_t i = 0; i < expected_titles.size(); ++i) {
bool found = false;
......@@ -171,7 +174,74 @@ TEST_F(BookmarkIndexTest, GetBookmarksMatching) {
{ "think", "\"thi\"", ""},
// Prefix matches against multiple candidates.
{ "abc1 abc2 abc3 abc4", "abc", "abc1 abc2 abc3 abc4"},
{ "abc1 abc2 abc3 abc4", "abc", "abc1 abc2 abc3 abc4"},
// Prefix match on the first term.
{ "abc", "a", "" },
// Prefix match on subsequent terms.
{ "abc def", "abc d", "" },
};
for (size_t i = 0; i < arraysize(data); ++i) {
std::vector<std::string> titles;
base::SplitString(data[i].titles, ';', &titles);
std::vector<TitleAndURL> bookmarks;
for (size_t j = 0; j < titles.size(); ++j) {
TitleAndURL bookmark(titles[j], kAboutBlankURL);
bookmarks.push_back(bookmark);
}
AddBookmarks(bookmarks);
std::vector<std::string> expected;
if (!data[i].expected.empty())
base::SplitString(data[i].expected, ';', &expected);
ExpectMatches(data[i].query, query_parser::MatchingAlgorithm::DEFAULT,
expected);
model_ = client_.CreateModel();
}
}
TEST_F(BookmarkIndexTest, GetBookmarksMatchingAlwaysPrefixSearch) {
struct TestData {
const std::string titles;
const std::string query;
const std::string expected;
} data[] = {
// Trivial test case of only one term, exact match.
{ "z;y", "Z", "z" },
// Prefix match, one term.
{ "abcd;abc;b", "abc", "abcd;abc" },
// Prefix match, multiple terms.
{ "abcd cdef;abcd;abcd cdefg", "abc cde", "abcd cdef;abcd cdefg" },
// Exact and prefix match.
{ "ab cdef ghij;ab;cde;cdef;ghi;cdef ab;ghij ab",
"ab cde ghi",
"ab cdef ghij" },
// Title with term multiple times.
{ "ab ab", "ab", "ab ab" },
// Make sure quotes don't do a prefix match.
{ "think", "\"thi\"", "" },
// Prefix matches against multiple candidates.
{ "abc1 abc2 abc3 abc4", "abc", "abc1 abc2 abc3 abc4" },
// Prefix match on the first term.
{ "abc", "a", "abc" },
// Prefix match on subsequent terms.
{ "abc def", "abc d", "abc def" },
// Exact and prefix match.
{ "ab cdef;abcd;abcd cdefg", "ab cdef", "ab cdef;abcd cdefg" },
};
for (size_t i = 0; i < arraysize(data); ++i) {
std::vector<std::string> titles;
......@@ -187,7 +257,9 @@ TEST_F(BookmarkIndexTest, GetBookmarksMatching) {
if (!data[i].expected.empty())
base::SplitString(data[i].expected, ';', &expected);
ExpectMatches(data[i].query, expected);
ExpectMatches(data[i].query,
query_parser::MatchingAlgorithm::ALWAYS_PREFIX_SEARCH,
expected);
model_ = client_.CreateModel();
}
......@@ -247,7 +319,8 @@ TEST_F(BookmarkIndexTest, GetBookmarksMatchingWithURLs) {
if (data[i].should_be_retrieved)
expected.push_back(data[i].title);
ExpectMatches(data[i].query, expected);
ExpectMatches(data[i].query, query_parser::MatchingAlgorithm::DEFAULT,
expected);
}
}
......
......@@ -689,14 +689,22 @@ void BookmarkModel::ResetDateFolderModified(const BookmarkNode* node) {
SetDateFolderModified(node, Time());
}
void BookmarkModel::GetBookmarksMatching(const base::string16& text,
size_t max_count,
std::vector<BookmarkMatch>* matches) {
GetBookmarksMatching(text, max_count,
query_parser::MatchingAlgorithm::DEFAULT, matches);
}
void BookmarkModel::GetBookmarksMatching(
const base::string16& text,
size_t max_count,
query_parser::MatchingAlgorithm matching_algorithm,
std::vector<BookmarkMatch>* matches) {
if (!loaded_)
return;
index_->GetBookmarksMatching(text, max_count, matches);
index_->GetBookmarksMatching(text, max_count, matching_algorithm, matches);
}
void BookmarkModel::ClearStore() {
......
......@@ -46,6 +46,10 @@ namespace favicon_base {
struct FaviconImageResult;
}
namespace query_parser {
enum class MatchingAlgorithm;
}
// BookmarkModel --------------------------------------------------------------
// BookmarkModel provides a directed acyclic graph of URLs and folders.
......@@ -235,11 +239,19 @@ class BookmarkModel : public KeyedService {
void ResetDateFolderModified(const BookmarkNode* node);
// Returns up to |max_count| of bookmarks containing each term from |text|
// in either the title or the URL.
// in either the title or the URL. It uses default matching algorithm.
void GetBookmarksMatching(const base::string16& text,
size_t max_count,
std::vector<bookmarks::BookmarkMatch>* matches);
// Returns up to |max_count| of bookmarks containing each term from |text|
// in either the title or the URL.
void GetBookmarksMatching(
const base::string16& text,
size_t max_count,
query_parser::MatchingAlgorithm matching_algorithm,
std::vector<bookmarks::BookmarkMatch>* matches);
// Sets the store to NULL, making it so the BookmarkModel does not persist
// any changes to disk. This is only useful during testing to speed up
// testing.
......
......@@ -350,6 +350,7 @@ void GetBookmarksMatchingProperties(BookmarkModel* model,
query_parser::QueryParser parser;
if (query.word_phrase_query) {
parser.ParseQueryWords(base::i18n::ToLower(*query.word_phrase_query),
query_parser::MatchingAlgorithm::DEFAULT,
&query_words);
if (query_words.empty())
return;
......
......@@ -357,7 +357,8 @@ bool URLDatabase::FindShortestURLFromBase(const std::string& base,
bool URLDatabase::GetTextMatches(const base::string16& query,
URLRows* results) {
ScopedVector<query_parser::QueryNode> query_nodes;
query_parser_.ParseQueryNodes(query, &query_nodes.get());
query_parser_.ParseQueryNodes(
query, query_parser::MatchingAlgorithm::DEFAULT, &query_nodes.get());
results->clear();
sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
......
......@@ -66,11 +66,13 @@ bool IsQueryQuote(wchar_t ch) {
// A QueryNodeWord is a single word in the query.
class QueryNodeWord : public QueryNode {
public:
explicit QueryNodeWord(const base::string16& word);
explicit QueryNodeWord(const base::string16& word,
MatchingAlgorithm matching_algorithm);
~QueryNodeWord() override;
const base::string16& word() const { return word_; }
bool literal() const { return literal_; };
void set_literal(bool literal) { literal_ = literal; }
// QueryNode:
......@@ -85,13 +87,16 @@ class QueryNodeWord : public QueryNode {
private:
base::string16 word_;
bool literal_;
const MatchingAlgorithm matching_algorithm_;
DISALLOW_COPY_AND_ASSIGN(QueryNodeWord);
};
QueryNodeWord::QueryNodeWord(const base::string16& word)
QueryNodeWord::QueryNodeWord(const base::string16& word,
MatchingAlgorithm matching_algorithm)
: word_(word),
literal_(false) {}
literal_(false),
matching_algorithm_(matching_algorithm) {}
QueryNodeWord::~QueryNodeWord() {}
......@@ -99,7 +104,8 @@ int QueryNodeWord::AppendToSQLiteQuery(base::string16* query) const {
query->append(word_);
// Use prefix search if we're not literal and long enough.
if (!literal_ && QueryParser::IsWordLongEnoughForPrefixSearch(word_))
if (!literal_ &&
QueryParser::IsWordLongEnoughForPrefixSearch(word_, matching_algorithm_))
*query += L'*';
return 1;
}
......@@ -109,7 +115,8 @@ bool QueryNodeWord::IsWord() const {
}
bool QueryNodeWord::Matches(const base::string16& word, bool exact) const {
if (exact || !QueryParser::IsWordLongEnoughForPrefixSearch(word_))
if (exact ||
!QueryParser::IsWordLongEnoughForPrefixSearch(word_, matching_algorithm_))
return word == word_;
return word.size() >= word_.size() &&
(word_.compare(0, word_.size(), word, 0, word_.size()) == 0);
......@@ -315,7 +322,11 @@ bool QueryNodePhrase::HasMatchIn(const QueryWordVector& words) const {
QueryParser::QueryParser() {}
// static
bool QueryParser::IsWordLongEnoughForPrefixSearch(const base::string16& word) {
bool QueryParser::IsWordLongEnoughForPrefixSearch(
const base::string16& word, MatchingAlgorithm matching_algorithm) {
if (matching_algorithm == MatchingAlgorithm::ALWAYS_PREFIX_SEARCH)
return true;
DCHECK(!word.empty());
size_t minimum_length = 3;
// We intentionally exclude Hangul Jamos (both Conjoining and compatibility)
......@@ -327,25 +338,28 @@ bool QueryParser::IsWordLongEnoughForPrefixSearch(const base::string16& word) {
}
int QueryParser::ParseQuery(const base::string16& query,
MatchingAlgorithm matching_algorithm,
base::string16* sqlite_query) {
QueryNodeList root;
if (!ParseQueryImpl(query, &root))
if (!ParseQueryImpl(query, matching_algorithm, &root))
return 0;
return root.AppendToSQLiteQuery(sqlite_query);
}
void QueryParser::ParseQueryWords(const base::string16& query,
MatchingAlgorithm matching_algorithm,
std::vector<base::string16>* words) {
QueryNodeList root;
if (!ParseQueryImpl(query, &root))
if (!ParseQueryImpl(query, matching_algorithm, &root))
return;
root.AppendWords(words);
}
void QueryParser::ParseQueryNodes(const base::string16& query,
MatchingAlgorithm matching_algorithm,
QueryNodeStarVector* nodes) {
QueryNodeList root;
if (ParseQueryImpl(base::i18n::ToLower(query), &root))
if (ParseQueryImpl(base::i18n::ToLower(query), matching_algorithm, &root))
nodes->swap(*root.children());
}
......@@ -393,6 +407,7 @@ bool QueryParser::DoesQueryMatch(const QueryWordVector& query_words,
}
bool QueryParser::ParseQueryImpl(const base::string16& query,
MatchingAlgorithm matching_algorithm,
QueryNodeList* root) {
base::i18n::BreakIterator iter(query, base::i18n::BreakIterator::BREAK_WORD);
// TODO(evanm): support a locale here
......@@ -410,7 +425,8 @@ bool QueryParser::ParseQueryImpl(const base::string16& query,
// is not necessarily a word, but could also be a sequence of punctuation
// or whitespace.
if (iter.IsWord()) {
QueryNodeWord* word_node = new QueryNodeWord(iter.GetString());
QueryNodeWord* word_node = new QueryNodeWord(iter.GetString(),
matching_algorithm);
if (in_quotes)
word_node->set_literal(true);
query_stack.back()->AddChild(word_node);
......
......@@ -24,6 +24,14 @@ struct QueryWord {
size_t position;
};
enum class MatchingAlgorithm {
// Only words long enough are considered for prefix search. Shorter words are
// considered for exact matches.
DEFAULT,
// All words are considered for a prefix search.
ALWAYS_PREFIX_SEARCH,
};
typedef std::vector<query_parser::QueryWord> QueryWordVector;
// QueryNode is used by QueryParser to represent the elements that constitute a
......@@ -72,23 +80,29 @@ class QueryParser {
// point doing anything for them and we only adjust the minimum length
// to 2 for Korean Hangul while using 3 for others. This is a temporary
// hack until we have a segmentation support.
static bool IsWordLongEnoughForPrefixSearch(const base::string16& word);
static bool IsWordLongEnoughForPrefixSearch(
const base::string16& word,
MatchingAlgorithm matching_algorithm);
// Parse a query into a SQLite query. The resulting query is placed in
// |sqlite_query| and the number of words is returned.
int ParseQuery(const base::string16& query, base::string16* sqlite_query);
int ParseQuery(const base::string16& query,
MatchingAlgorithm matching_algorithm,
base::string16* sqlite_query);
// Parses |query|, returning the words that make up it. Any words in quotes
// are put in |words| without the quotes. For example, the query text
// "foo bar" results in two entries being added to words, one for foo and one
// for bar.
void ParseQueryWords(const base::string16& query,
MatchingAlgorithm matching_algorithm,
std::vector<base::string16>* words);
// Parses |query|, returning the nodes that constitute the valid words in the
// query. This is intended for later usage with DoesQueryMatch. Ownership of
// the nodes passes to the caller.
void ParseQueryNodes(const base::string16& query,
MatchingAlgorithm matching_algorithm,
QueryNodeStarVector* nodes);
// Returns true if the string text matches the query nodes created by a call
......@@ -114,7 +128,9 @@ class QueryParser {
private:
// Does the work of parsing |query|; creates nodes in |root| as appropriate.
// This is invoked from both of the ParseQuery methods.
bool ParseQueryImpl(const base::string16& query, QueryNodeList* root);
bool ParseQueryImpl(const base::string16& query,
MatchingAlgorithm matching_algorithm,
QueryNodeList* root);
DISALLOW_COPY_AND_ASSIGN(QueryParser);
};
......
......@@ -27,7 +27,9 @@ class QueryParserTest : public testing::Test {
// convenience) to a SQLite query string.
std::string QueryParserTest::QueryToString(const std::string& query) {
base::string16 sqlite_query;
query_parser_.ParseQuery(base::UTF8ToUTF16(query), &sqlite_query);
query_parser_.ParseQuery(base::UTF8ToUTF16(query),
MatchingAlgorithm::DEFAULT,
&sqlite_query);
return base::UTF16ToUTF8(sqlite_query);
}
......@@ -85,6 +87,7 @@ TEST_F(QueryParserTest, NumWords) {
base::string16 query_string;
EXPECT_EQ(data[i].expected_word_count,
query_parser_.ParseQuery(base::UTF8ToUTF16(data[i].input),
MatchingAlgorithm::DEFAULT,
&query_string));
}
}
......@@ -120,6 +123,7 @@ TEST_F(QueryParserTest, ParseQueryNodesAndMatch) {
QueryParser parser;
ScopedVector<QueryNode> query_nodes;
parser.ParseQueryNodes(base::UTF8ToUTF16(data[i].query),
MatchingAlgorithm::DEFAULT,
&query_nodes.get());
Snippet::MatchPositions match_positions;
ASSERT_EQ(data[i].matches,
......@@ -157,7 +161,9 @@ TEST_F(QueryParserTest, ParseQueryWords) {
for (size_t i = 0; i < arraysize(data); ++i) {
std::vector<base::string16> results;
QueryParser parser;
parser.ParseQueryWords(base::UTF8ToUTF16(data[i].text), &results);
parser.ParseQueryWords(base::UTF8ToUTF16(data[i].text),
MatchingAlgorithm::DEFAULT,
&results);
ASSERT_EQ(data[i].word_count, results.size());
EXPECT_EQ(data[i].w1, base::UTF16ToUTF8(results[0]));
if (results.size() == 2)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment