Commit eccdb2b8 authored by open.hyperion's avatar open.hyperion Committed by Commit bot

Updated and refactored URLIndexPrivateData::HistoryItemsForTerms to handle...

Updated and refactored URLIndexPrivateData::HistoryItemsForTerms to handle searching the history for both the original search string as well as the string with a break inserted.

The goal is to support mid-word autocomplete in the Omnibox.  Currently, if the user types "funtimes", inserts the cursor between the "n" and the "t" and begins typing the word "good" the Ominbox will search for URL results that match "fungood times" only.  We want to also search for "fungoodtimes".

BUG=591979
TEST=0. Clear browser history.
1. Visit the following link: https://twitter.com/fungoodtimes
2. Open a new browser tab.
3.Type into the Omnibox "funtimes".  Note the lack of the suggestion for the above URL.
4. Insert the cursor between the "n" and "t" in "funtime" and type "good".
5. The above URL should show in the autocomplete list.

Review-Url: https://codereview.chromium.org/2364553004
Cr-Commit-Position: refs/heads/master@{#420721}
parent 3fd37d92
......@@ -387,6 +387,7 @@ Laszlo Gombos <l.gombos@samsung.com>
Laszlo Radanyi <bekkra@gmail.com>
Lauren Yeun Kim <lauren.yeun.kim@gmail.com>
Lauri Oherd <lauri.oherd@gmail.com>
Lavar Askew <open.hyperion@gmail.com>
Legend Lee <guanxian.li@intel.com>
Leith Bade <leith@leithalweapon.geek.nz>
Leo Wolf <jclw@ymail.com>
......
......@@ -101,8 +101,9 @@ struct TestURLInfo {
// The encoded stuff between /wiki/ and the # is 第二次世界大戦
{"http://ja.wikipedia.org/wiki/%E7%AC%AC%E4%BA%8C%E6%AC%A1%E4%B8%96%E7%95"
"%8C%E5%A4%A7%E6%88%A6#.E3.83.B4.E3.82.A7.E3.83.AB.E3.82.B5.E3.82.A4.E3."
"83.A6.E4.BD.93.E5.88.B6", "Title Unimportant", 2, 2, 0}
};
"83.A6.E4.BD.93.E5.88.B6",
"Title Unimportant", 2, 2, 0},
{"https://twitter.com/fungoodtimes", "relatable!", 1, 1, 0}};
// Waits for OnURLsDeletedNotification and when run quits the supplied run loop.
class WaitForURLsDeletedObserver : public history::HistoryServiceObserver {
......@@ -496,6 +497,16 @@ TEST_F(HistoryQuickProviderTest, SingleMatchWithCursor) {
base::string16());
}
TEST_F(HistoryQuickProviderTest, MatchWithAndWithoutCursorWordBreak) {
std::vector<std::string> expected_urls;
expected_urls.push_back("https://twitter.com/fungoodtimes");
// With cursor after "good", we should retrieve the desired result but it
// should not be allowed to be the default match.
RunTestWithCursor(ASCIIToUTF16("fungoodtimes"), 7, false, expected_urls,
false, ASCIIToUTF16("https://twitter.com/fungoodtimes"),
base::string16());
}
TEST_F(HistoryQuickProviderTest, WordBoundariesWithPunctuationMatch) {
std::vector<std::string> expected_urls;
expected_urls.push_back("http://popularsitewithpathonly.com/moo");
......
......@@ -153,79 +153,85 @@ URLIndexPrivateData::URLIndexPrivateData()
}
ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
base::string16 search_string,
base::string16 original_search_string,
size_t cursor_position,
size_t max_matches,
bookmarks::BookmarkModel* bookmark_model,
TemplateURLService* template_url_service) {
// If cursor position is set and useful (not at either end of the
// string), allow the search string to be broken at cursor position.
// We do this by pretending there's a space where the cursor is.
if ((cursor_position != base::string16::npos) &&
(cursor_position < search_string.length()) &&
(cursor_position > 0)) {
search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
}
pre_filter_item_count_ = 0;
post_filter_item_count_ = 0;
post_scoring_item_count_ = 0;
// This list will contain the original search string and any other string
// transformations.
String16Vector search_strings;
search_strings.push_back(original_search_string);
if ((cursor_position != base::string16::npos) &&
(cursor_position < original_search_string.length()) &&
(cursor_position > 0)) {
// The original search_string broken at cursor position. This is one type of
// transformation.
base::string16 transformed_search_string(original_search_string);
transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
search_strings.push_back(transformed_search_string);
}
ScoredHistoryMatches scored_items;
// Invalidate the term cache and return if we have indexed no words (probably
// because we've not been initialized yet).
if (word_list_.empty()) {
search_term_cache_.clear();
return scored_items;
}
// Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
// approach.
ResetSearchTermCache();
for (const base::string16& search_string : search_strings) {
// The search string we receive may contain escaped characters. For reducing
// the index we need individual, lower-cased words, ignoring escapings. For
// the final filtering we need whitespace separated substrings possibly
// containing escaped characters.
base::string16 lower_raw_string(base::i18n::ToLower(search_string));
base::string16 lower_unescaped_string =
net::UnescapeURLComponent(lower_raw_string,
base::string16 lower_unescaped_string = net::UnescapeURLComponent(
lower_raw_string,
net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
// Extract individual 'words' (as opposed to 'terms'; see below) from the
// search string. When the user types "colspec=ID%20Mstone Release" we get
// four 'words': "colspec", "id", "mstone" and "release".
String16Vector lower_words(
String16VectorFromString16(lower_unescaped_string, false, nullptr));
ScoredHistoryMatches scored_items;
// Do nothing if we have indexed no words (probably because we've not been
// initialized yet) or the search string has no words.
if (word_list_.empty() || lower_words.empty()) {
search_term_cache_.clear(); // Invalidate the term cache.
return scored_items;
}
// Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
// approach.
ResetSearchTermCache();
if (lower_words.empty())
continue;
HistoryIDSet history_id_set = HistoryIDSetFromWords(lower_words);
// Trim the candidate pool if it is large. Note that we do not filter out
// items that do not contain the search terms as proper substrings -- doing
// so is the performance-costly operation we are trying to avoid in order
// to maintain omnibox responsiveness.
// items that do not contain the search terms as proper substrings --
// doing so is the performance-costly operation we are trying to avoid in
// order to maintain omnibox responsiveness.
const size_t kItemsToScoreLimit = 500;
pre_filter_item_count_ = history_id_set.size();
pre_filter_item_count_ += history_id_set.size();
// If we trim the results set we do not want to cache the results for next
// time as the user's ultimately desired result could easily be eliminated
// in this early rough filter.
bool was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);
if (was_trimmed) {
if (pre_filter_item_count_ > kItemsToScoreLimit) {
HistoryIDVector history_ids;
std::copy(history_id_set.begin(), history_id_set.end(),
std::back_inserter(history_ids));
// Trim down the set by sorting by typed-count, visit-count, and last
// visit.
HistoryItemFactorGreater
item_factor_functor(history_info_map_);
HistoryItemFactorGreater item_factor_functor(history_info_map_);
std::partial_sort(history_ids.begin(),
history_ids.begin() + kItemsToScoreLimit,
history_ids.end(),
item_factor_functor);
history_ids.end(), item_factor_functor);
history_id_set.clear();
std::copy(history_ids.begin(), history_ids.begin() + kItemsToScoreLimit,
std::inserter(history_id_set, history_id_set.end()));
post_filter_item_count_ = history_id_set.size();
post_filter_item_count_ += history_id_set.size();
} else {
post_filter_item_count_ += pre_filter_item_count_;
}
// Pass over all of the candidates filtering out any without a proper
// substring match, inserting those which pass in order by score. Note that
// in this step we are using the raw search string complete with escaped
......@@ -234,34 +240,34 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
// specific substring appears in the URL or page title.
// We call these 'terms' (as opposed to 'words'; see above) as in this case
// we only want to break up the search string on 'true' whitespace rather than
// escaped whitespace. When the user types "colspec=ID%20Mstone Release" we
// get two 'terms': "colspec=id%20mstone" and "release".
String16Vector lower_raw_terms = base::SplitString(
lower_raw_string, base::kWhitespaceUTF16, base::KEEP_WHITESPACE,
base::SPLIT_WANT_NONEMPTY);
if (lower_raw_terms.empty()) {
// we only want to break up the search string on 'true' whitespace rather
// than escaped whitespace. When the user types "colspec=ID%20Mstone
// Release" we get two 'terms': "colspec=id%20mstone" and "release".
String16Vector lower_raw_terms =
base::SplitString(lower_raw_string, base::kWhitespaceUTF16,
base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
// Don't score matches when there are no terms to score against. (It's
// possible that the word break iterater that extracts words to search
// for in the database allows some whitespace "words" whereas SplitString
// excludes a long list of whitespace.) One could write a scoring
// function that gives a reasonable order to matches when there
// are no terms (i.e., all the words are some form of whitespace),
// but this is such a rare edge case that it's not worth the time.
return scored_items;
// excludes a long list of whitespace.) One could write a scoring function
// that gives a reasonable order to matches when there are no terms (i.e.,
// all the words are some form of whitespace), but this is such a rare edge
// case that it's not worth the time.
if (lower_raw_terms.empty())
continue;
ScoredHistoryMatches temp_scored_items =
std::for_each(history_id_set.begin(), history_id_set.end(),
AddHistoryMatch(bookmark_model, template_url_service,
*this, lower_raw_string, lower_raw_terms,
base::Time::Now()))
.ScoredMatches();
scored_items.insert(scored_items.end(), temp_scored_items.begin(),
temp_scored_items.end());
}
scored_items =
std::for_each(
history_id_set.begin(), history_id_set.end(),
AddHistoryMatch(bookmark_model, template_url_service, *this,
lower_raw_string, lower_raw_terms,
base::Time::Now())).ScoredMatches();
// Select and sort only the top |max_matches| results.
if (scored_items.size() > max_matches) {
std::partial_sort(scored_items.begin(),
scored_items.begin() +
max_matches,
std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,
scored_items.end(),
ScoredHistoryMatch::MatchScoreGreater);
scored_items.resize(max_matches);
......@@ -270,8 +276,7 @@ ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
ScoredHistoryMatch::MatchScoreGreater);
}
post_scoring_item_count_ = scored_items.size();
if (was_trimmed) {
if (pre_filter_item_count_ > post_filter_item_count_) {
search_term_cache_.clear(); // Invalidate the term cache.
} else {
// Remove any stale SearchTermCacheItems.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment