Commit eccdb2b8 authored by open.hyperion's avatar open.hyperion Committed by Commit bot

Updated and refactored URLIndexPrivateData::HistoryItemsForTerms to handle...

Updated and refactored URLIndexPrivateData::HistoryItemsForTerms to handle searching the history for both the original search string as well as the string with a break inserted.

The goal is to support mid-word autocomplete in the Omnibox.  Currently, if the user types "funtimes", inserts the cursor between the "n" and the "t" and begins typing the word "good" the Ominbox will search for URL results that match "fungood times" only.  We want to also search for "fungoodtimes".

BUG=591979
TEST=0. Clear browser history.
1. Visit the following link: https://twitter.com/fungoodtimes
2. Open a new browser tab.
3.Type into the Omnibox "funtimes".  Note the lack of the suggestion for the above URL.
4. Insert the cursor between the "n" and "t" in "funtime" and type "good".
5. The above URL should show in the autocomplete list.

Review-Url: https://codereview.chromium.org/2364553004
Cr-Commit-Position: refs/heads/master@{#420721}
parent 3fd37d92
...@@ -387,6 +387,7 @@ Laszlo Gombos <l.gombos@samsung.com> ...@@ -387,6 +387,7 @@ Laszlo Gombos <l.gombos@samsung.com>
Laszlo Radanyi <bekkra@gmail.com> Laszlo Radanyi <bekkra@gmail.com>
Lauren Yeun Kim <lauren.yeun.kim@gmail.com> Lauren Yeun Kim <lauren.yeun.kim@gmail.com>
Lauri Oherd <lauri.oherd@gmail.com> Lauri Oherd <lauri.oherd@gmail.com>
Lavar Askew <open.hyperion@gmail.com>
Legend Lee <guanxian.li@intel.com> Legend Lee <guanxian.li@intel.com>
Leith Bade <leith@leithalweapon.geek.nz> Leith Bade <leith@leithalweapon.geek.nz>
Leo Wolf <jclw@ymail.com> Leo Wolf <jclw@ymail.com>
......
...@@ -101,8 +101,9 @@ struct TestURLInfo { ...@@ -101,8 +101,9 @@ struct TestURLInfo {
// The encoded stuff between /wiki/ and the # is 第二次世界大戦 // The encoded stuff between /wiki/ and the # is 第二次世界大戦
{"http://ja.wikipedia.org/wiki/%E7%AC%AC%E4%BA%8C%E6%AC%A1%E4%B8%96%E7%95" {"http://ja.wikipedia.org/wiki/%E7%AC%AC%E4%BA%8C%E6%AC%A1%E4%B8%96%E7%95"
"%8C%E5%A4%A7%E6%88%A6#.E3.83.B4.E3.82.A7.E3.83.AB.E3.82.B5.E3.82.A4.E3." "%8C%E5%A4%A7%E6%88%A6#.E3.83.B4.E3.82.A7.E3.83.AB.E3.82.B5.E3.82.A4.E3."
"83.A6.E4.BD.93.E5.88.B6", "Title Unimportant", 2, 2, 0} "83.A6.E4.BD.93.E5.88.B6",
}; "Title Unimportant", 2, 2, 0},
{"https://twitter.com/fungoodtimes", "relatable!", 1, 1, 0}};
// Waits for OnURLsDeletedNotification and when run quits the supplied run loop. // Waits for OnURLsDeletedNotification and when run quits the supplied run loop.
class WaitForURLsDeletedObserver : public history::HistoryServiceObserver { class WaitForURLsDeletedObserver : public history::HistoryServiceObserver {
...@@ -496,6 +497,16 @@ TEST_F(HistoryQuickProviderTest, SingleMatchWithCursor) { ...@@ -496,6 +497,16 @@ TEST_F(HistoryQuickProviderTest, SingleMatchWithCursor) {
base::string16()); base::string16());
} }
TEST_F(HistoryQuickProviderTest, MatchWithAndWithoutCursorWordBreak) {
std::vector<std::string> expected_urls;
expected_urls.push_back("https://twitter.com/fungoodtimes");
// With cursor after "good", we should retrieve the desired result but it
// should not be allowed to be the default match.
RunTestWithCursor(ASCIIToUTF16("fungoodtimes"), 7, false, expected_urls,
false, ASCIIToUTF16("https://twitter.com/fungoodtimes"),
base::string16());
}
TEST_F(HistoryQuickProviderTest, WordBoundariesWithPunctuationMatch) { TEST_F(HistoryQuickProviderTest, WordBoundariesWithPunctuationMatch) {
std::vector<std::string> expected_urls; std::vector<std::string> expected_urls;
expected_urls.push_back("http://popularsitewithpathonly.com/moo"); expected_urls.push_back("http://popularsitewithpathonly.com/moo");
......
...@@ -153,79 +153,85 @@ URLIndexPrivateData::URLIndexPrivateData() ...@@ -153,79 +153,85 @@ URLIndexPrivateData::URLIndexPrivateData()
} }
ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
base::string16 search_string, base::string16 original_search_string,
size_t cursor_position, size_t cursor_position,
size_t max_matches, size_t max_matches,
bookmarks::BookmarkModel* bookmark_model, bookmarks::BookmarkModel* bookmark_model,
TemplateURLService* template_url_service) { TemplateURLService* template_url_service) {
// If cursor position is set and useful (not at either end of the
// string), allow the search string to be broken at cursor position.
// We do this by pretending there's a space where the cursor is.
if ((cursor_position != base::string16::npos) &&
(cursor_position < search_string.length()) &&
(cursor_position > 0)) {
search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
}
pre_filter_item_count_ = 0; pre_filter_item_count_ = 0;
post_filter_item_count_ = 0; post_filter_item_count_ = 0;
post_scoring_item_count_ = 0; post_scoring_item_count_ = 0;
// This list will contain the original search string and any other string
// transformations.
String16Vector search_strings;
search_strings.push_back(original_search_string);
if ((cursor_position != base::string16::npos) &&
(cursor_position < original_search_string.length()) &&
(cursor_position > 0)) {
// The original search_string broken at cursor position. This is one type of
// transformation.
base::string16 transformed_search_string(original_search_string);
transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
search_strings.push_back(transformed_search_string);
}
ScoredHistoryMatches scored_items;
// Invalidate the term cache and return if we have indexed no words (probably
// because we've not been initialized yet).
if (word_list_.empty()) {
search_term_cache_.clear();
return scored_items;
}
// Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
// approach.
ResetSearchTermCache();
for (const base::string16& search_string : search_strings) {
// The search string we receive may contain escaped characters. For reducing // The search string we receive may contain escaped characters. For reducing
// the index we need individual, lower-cased words, ignoring escapings. For // the index we need individual, lower-cased words, ignoring escapings. For
// the final filtering we need whitespace separated substrings possibly // the final filtering we need whitespace separated substrings possibly
// containing escaped characters. // containing escaped characters.
base::string16 lower_raw_string(base::i18n::ToLower(search_string)); base::string16 lower_raw_string(base::i18n::ToLower(search_string));
base::string16 lower_unescaped_string = base::string16 lower_unescaped_string = net::UnescapeURLComponent(
net::UnescapeURLComponent(lower_raw_string, lower_raw_string,
net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
// Extract individual 'words' (as opposed to 'terms'; see below) from the // Extract individual 'words' (as opposed to 'terms'; see below) from the
// search string. When the user types "colspec=ID%20Mstone Release" we get // search string. When the user types "colspec=ID%20Mstone Release" we get
// four 'words': "colspec", "id", "mstone" and "release". // four 'words': "colspec", "id", "mstone" and "release".
String16Vector lower_words( String16Vector lower_words(
String16VectorFromString16(lower_unescaped_string, false, nullptr)); String16VectorFromString16(lower_unescaped_string, false, nullptr));
ScoredHistoryMatches scored_items; if (lower_words.empty())
continue;
// Do nothing if we have indexed no words (probably because we've not been
// initialized yet) or the search string has no words.
if (word_list_.empty() || lower_words.empty()) {
search_term_cache_.clear(); // Invalidate the term cache.
return scored_items;
}
// Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
// approach.
ResetSearchTermCache();
HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words); HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
// Trim the candidate pool if it is large. Note that we do not filter out // Trim the candidate pool if it is large. Note that we do not filter out
// items that do not contain the search terms as proper substrings -- doing // items that do not contain the search terms as proper substrings --
// so is the performance-costly operation we are trying to avoid in order // doing so is the performance-costly operation we are trying to avoid in
// to maintain omnibox responsiveness. // order to maintain omnibox responsiveness.
const size_t kItemsToScoreLimit = 500; const size_t kItemsToScoreLimit = 500;
pre_filter_item_count_ = history_id_set.size(); pre_filter_item_count_ += history_id_set.size();
// If we trim the results set we do not want to cache the results for next // If we trim the results set we do not want to cache the results for next
// time as the user's ultimately desired result could easily be eliminated // time as the user's ultimately desired result could easily be eliminated
// in this early rough filter. // in this early rough filter.
bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit); if (pre_filter_item_count_ > kItemsToScoreLimit) {
if (was_trimmed) {
HistoryIDVector history_ids; HistoryIDVector history_ids;
std::copy(history_id_set.begin(), history_id_set.end(), std::copy(history_id_set.begin(), history_id_set.end(),
std::back_inserter(history_ids)); std::back_inserter(history_ids));
// Trim down the set by sorting by typed-count, visit-count, and last // Trim down the set by sorting by typed-count, visit-count, and last
// visit. // visit.
HistoryItemFactorGreater HistoryItemFactorGreater item_factor_functor(history_info_map_);
item_factor_functor(history_info_map_);
std::partial_sort(history_ids.begin(), std::partial_sort(history_ids.begin(),
history_ids.begin() + kItemsToScoreLimit, history_ids.begin() + kItemsToScoreLimit,
history_ids.end(), history_ids.end(), item_factor_functor);
item_factor_functor);
history_id_set.clear(); history_id_set.clear();
std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit, std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
std::inserter(history_id_set, history_id_set.end())); std::inserter(history_id_set, history_id_set.end()));
post_filter_item_count_ = history_id_set.size(); post_filter_item_count_ += history_id_set.size();
} else {
post_filter_item_count_ += pre_filter_item_count_;
} }
// Pass over all of the candidates filtering out any without a proper // Pass over all of the candidates filtering out any without a proper
// substring match, inserting those which pass in order by score. Note that // substring match, inserting those which pass in order by score. Note that
// in this step we are using the raw search string complete with escaped // in this step we are using the raw search string complete with escaped
...@@ -234,34 +240,34 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( ...@@ -234,34 +240,34 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
// specific substring appears in the URL or page title. // specific substring appears in the URL or page title.
// We call these 'terms' (as opposed to 'words'; see above) as in this case // We call these 'terms' (as opposed to 'words'; see above) as in this case
// we only want to break up the search string on 'true' whitespace rather than // we only want to break up the search string on 'true' whitespace rather
// escaped whitespace. When the user types "colspec=ID%20Mstone Release" we // than escaped whitespace. When the user types "colspec=ID%20Mstone
// get two 'terms': "colspec=id%20mstone" and "release". // Release" we get two 'terms': "colspec=id%20mstone" and "release".
String16Vector lower_raw_terms = base::SplitString( String16Vector lower_raw_terms =
lower_raw_string, base::kWhitespaceUTF16, base::KEEP_WHITESPACE, base::SplitString(lower_raw_string, base::kWhitespaceUTF16,
base::SPLIT_WANT_NONEMPTY); base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
if (lower_raw_terms.empty()) {
// Don't score matches when there are no terms to score against. (It's // Don't score matches when there are no terms to score against. (It's
// possible that the word break iterater that extracts words to search // possible that the word break iterater that extracts words to search
// for in the database allows some whitespace "words" whereas SplitString // for in the database allows some whitespace "words" whereas SplitString
// excludes a long list of whitespace.) One could write a scoring // excludes a long list of whitespace.) One could write a scoring function
// function that gives a reasonable order to matches when there // that gives a reasonable order to matches when there are no terms (i.e.,
// are no terms (i.e., all the words are some form of whitespace), // all the words are some form of whitespace), but this is such a rare edge
// but this is such a rare edge case that it's not worth the time. // case that it's not worth the time.
return scored_items; if (lower_raw_terms.empty())
continue;
ScoredHistoryMatches temp_scored_items =
std::for_each(history_id_set.begin(), history_id_set.end(),
AddHistoryMatch(bookmark_model, template_url_service,
*this, lower_raw_string, lower_raw_terms,
base::Time::Now()))
.ScoredMatches();
scored_items.insert(scored_items.end(), temp_scored_items.begin(),
temp_scored_items.end());
} }
scored_items =
std::for_each(
history_id_set.begin(), history_id_set.end(),
AddHistoryMatch(bookmark_model, template_url_service, *this,
lower_raw_string, lower_raw_terms,
base::Time::Now())).ScoredMatches();
// Select and sort only the top |max_matches| results. // Select and sort only the top |max_matches| results.
if (scored_items.size() > max_matches) { if (scored_items.size() > max_matches) {
std::partial_sort(scored_items.begin(), std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,
scored_items.begin() +
max_matches,
scored_items.end(), scored_items.end(),
ScoredHistoryMatch::MatchScoreGreater); ScoredHistoryMatch::MatchScoreGreater);
scored_items.resize(max_matches); scored_items.resize(max_matches);
...@@ -270,8 +276,7 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( ...@@ -270,8 +276,7 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
ScoredHistoryMatch::MatchScoreGreater); ScoredHistoryMatch::MatchScoreGreater);
} }
post_scoring_item_count_ = scored_items.size(); post_scoring_item_count_ = scored_items.size();
if (pre_filter_item_count_ > post_filter_item_count_) {
if (was_trimmed) {
search_term_cache_.clear(); // Invalidate the term cache. search_term_cache_.clear(); // Invalidate the term cache.
} else { } else {
// Remove any stale SearchTermCacheItems. // Remove any stale SearchTermCacheItems.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment