Commit 2c0a4317 authored by pkasting@chromium.org's avatar pkasting@chromium.org

Cleanup:

  * Remove using directives
  * Make more functions file-scope instead of private + static
  * Make .cc and .h order match

BUG=none
TEST=none
Review URL: http://codereview.chromium.org/7661005

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@96973 0039d316-1c4b-4281-b951-d872f2087c98
parent d600e2d2
...@@ -26,18 +26,141 @@ ...@@ -26,18 +26,141 @@
#include "googleurl/src/url_util.h" #include "googleurl/src/url_util.h"
#include "net/base/net_util.h" #include "net/base/net_util.h"
using base::Time; namespace {
using base::TimeDelta;
using base::TimeTicks; // Ensures that |matches| contains an entry for |info|, which may mean adding a
using history::Prefix; // new such entry (using |input_location| and |match_in_scheme|).
using history::Prefixes; //
using history::HistoryMatch; // If |promote| is true, this also ensures the entry is the first element in
using history::HistoryMatches; // |matches|, moving or adding it to the front as appropriate. When |promote|
// is false, existing matches are left in place, and newly added matches are
namespace history { // placed at the back.
void EnsureMatchPresent(const history::URLRow& info,
// Returns true if |url| is just a host (e.g. "http://www.google.com/") and size_t input_location,
// not some other subpage (e.g. "http://www.google.com/foo.html"). bool match_in_scheme,
history::HistoryMatches* matches,
bool promote) {
// |matches| may already have an entry for this.
for (history::HistoryMatches::iterator i(matches->begin());
i != matches->end(); ++i) {
if (i->url_info.url() == info.url()) {
// Rotate it to the front if the caller wishes.
if (promote)
std::rotate(matches->begin(), i, i + 1);
return;
}
}
// No entry, so create one.
history::HistoryMatch match(info, input_location, match_in_scheme, true);
if (promote)
matches->push_front(match);
else
matches->push_back(match);
}
// Given the user's |input| and a |match| created from it, reduce the match's
// URL to just a host. If this host still matches the user input, return it.
// Returns the empty string on failure.
GURL ConvertToHostOnly(const history::HistoryMatch& match,
const string16& input) {
// See if we should try to do host-only suggestions for this URL. Nonstandard
// schemes means there's no authority section, so suggesting the host name
// is useless. File URLs are standard, but host suggestion is not useful for
// them either.
const GURL& url = match.url_info.url();
if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile())
return GURL();
// Transform to a host-only match. Bail if the host no longer matches the
// user input (e.g. because the user typed more than just a host).
GURL host = url.GetWithEmptyPath();
if ((host.spec().length() < (match.input_location + input.length())))
return GURL(); // User typing is longer than this host suggestion.
const string16 spec = UTF8ToUTF16(host.spec());
if (spec.compare(match.input_location, input.length(), input))
return GURL(); // User typing is no longer a prefix.
return host;
}
// See if a shorter version of the best match should be created, and if so place
// it at the front of |matches|. This can suggest history URLs that are
// prefixes of the best match (if they've been visited enough, compared to the
// best match), or create host-only suggestions even when they haven't been
// visited before: if the user visited http://example.com/asdf once, we'll
// suggest http://example.com/ even if they've never been to it.
void PromoteOrCreateShorterSuggestion(
history::URLDatabase* db,
const HistoryURLProviderParams& params,
bool have_what_you_typed_match,
const AutocompleteMatch& what_you_typed_match,
history::HistoryMatches* matches) {
if (matches->empty())
return; // No matches, nothing to do.
// Determine the base URL from which to search, and whether that URL could
// itself be added as a match. We can add the base iff it's not "effectively
// the same" as any "what you typed" match.
const history::HistoryMatch& match = matches->front();
GURL search_base = ConvertToHostOnly(match, params.input.text());
bool can_add_search_base_to_matches = !have_what_you_typed_match;
if (search_base.is_empty()) {
// Search from what the user typed when we couldn't reduce the best match
// to a host. Careful: use a substring of |match| here, rather than the
// first match in |params|, because they might have different prefixes. If
// the user typed "google.com", |what_you_typed_match| will hold
// "http://google.com/", but |match| might begin with
// "http://www.google.com/".
// TODO: this should be cleaned up, and is probably incorrect for IDN.
std::string new_match = match.url_info.url().possibly_invalid_spec().
substr(0, match.input_location + params.input.text().length());
search_base = GURL(new_match);
// TODO(mrossetti): There is a degenerate case where the following may
// cause a failure: http://www/~someword/fubar.html. Diagnose.
// See: http://crbug.com/50101
if (search_base.is_empty())
return; // Can't construct a valid URL from which to start a search.
} else if (!can_add_search_base_to_matches) {
can_add_search_base_to_matches =
(search_base != what_you_typed_match.destination_url);
}
if (search_base == match.url_info.url())
return; // Couldn't shorten |match|, so no range of URLs to search over.
// Search the DB for short URLs between our base and |match|.
history::URLRow info(search_base);
bool promote = true;
// A short URL is only worth suggesting if it's been visited at least a third
// as often as the longer URL.
const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1;
// For stability between the in-memory and on-disk autocomplete passes, when
// the long URL has been typed before, only suggest shorter URLs that have
// also been typed. Otherwise, the on-disk pass could suggest a shorter URL
// (which hasn't been typed) that the in-memory pass doesn't know about,
// thereby making the top match, and thus the behavior of inline
// autocomplete, unstable.
const int min_typed_count = match.url_info.typed_count() ? 1 : 0;
if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(),
match.url_info.url().possibly_invalid_spec(), min_visit_count,
min_typed_count, can_add_search_base_to_matches, &info)) {
if (!can_add_search_base_to_matches)
return; // Couldn't find anything and can't add the search base, bail.
// Try to get info on the search base itself. Promote it to the top if the
// original best match isn't good enough to autocomplete.
db->GetRowForURL(search_base, &info);
promote = match.url_info.typed_count() <= 1;
}
// Promote or add the desired URL to the list of matches.
EnsureMatchPresent(info, match.input_location, match.match_in_scheme,
matches, promote);
}
// Returns true if |url| is just a host (e.g. "http://www.google.com/") and not
// some other subpage (e.g. "http://www.google.com/foo.html").
bool IsHostOnly(const GURL& url) { bool IsHostOnly(const GURL& url) {
DCHECK(url.is_valid()); DCHECK(url.is_valid());
return (!url.has_path() || (url.path() == "/")) && !url.has_query() && return (!url.has_path() || (url.path() == "/")) && !url.has_query() &&
...@@ -45,7 +168,8 @@ bool IsHostOnly(const GURL& url) { ...@@ -45,7 +168,8 @@ bool IsHostOnly(const GURL& url) {
} }
// Acts like the > operator for URLInfo classes. // Acts like the > operator for URLInfo classes.
bool CompareHistoryMatch(const HistoryMatch& a, const HistoryMatch& b) { bool CompareHistoryMatch(const history::HistoryMatch& a,
const history::HistoryMatch& b) {
// A URL that has been typed at all is better than one that has never been // A URL that has been typed at all is better than one that has never been
// typed. (Note "!"s on each side) // typed. (Note "!"s on each side)
if (!a.url_info.typed_count() != !b.url_info.typed_count()) if (!a.url_info.typed_count() != !b.url_info.typed_count())
...@@ -63,8 +187,8 @@ bool CompareHistoryMatch(const HistoryMatch& a, const HistoryMatch& b) { ...@@ -63,8 +187,8 @@ bool CompareHistoryMatch(const HistoryMatch& a, const HistoryMatch& b) {
// For URLs that have each been typed once, a host (alone) is better than a // For URLs that have each been typed once, a host (alone) is better than a
// page inside. // page inside.
if (a.url_info.typed_count() == 1) { if (a.url_info.typed_count() == 1) {
const bool a_is_host_only = history::IsHostOnly(a.url_info.url()); const bool a_is_host_only = IsHostOnly(a.url_info.url());
if (a_is_host_only != history::IsHostOnly(b.url_info.url())) if (a_is_host_only != IsHostOnly(b.url_info.url()))
return a_is_host_only; return a_is_host_only;
} }
...@@ -76,29 +200,46 @@ bool CompareHistoryMatch(const HistoryMatch& a, const HistoryMatch& b) { ...@@ -76,29 +200,46 @@ bool CompareHistoryMatch(const HistoryMatch& a, const HistoryMatch& b) {
return a.url_info.last_visit() > b.url_info.last_visit(); return a.url_info.last_visit() > b.url_info.last_visit();
} }
// Given the user's |input| and a |match| created from it, reduce the // Determines the confidence for a |match| when compared to all the |matches|.
// match's URL to just a host. If this host still matches the user input, // Returns a number in the range [0, 1].
// return it. Returns the empty string on failure. float CalculateConfidence(const history::HistoryMatch& match,
GURL ConvertToHostOnly(const HistoryMatch& match, const string16& input) { const history::HistoryMatches& matches) {
// See if we should try to do host-only suggestions for this URL. Nonstandard // Calculate a score based on typed count.
// schemes means there's no authority section, so suggesting the host name const float typed_numerator = match.url_info.typed_count();
// is useless. File URLs are standard, but host suggestion is not useful for float typed_denominator = 0.0f;
// them either. for (history::HistoryMatches::const_iterator it = matches.begin();
const GURL& url = match.url_info.url(); it != matches.end(); ++it) {
if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile()) typed_denominator += it->url_info.typed_count();
return GURL(); }
const float typed_score = (typed_denominator > 0.0f) ?
(typed_numerator / typed_denominator) : 0.0f;
// Transform to a host-only match. Bail if the host no longer matches the // Calculate a score based on visit count
// user input (e.g. because the user typed more than just a host). const float visit_numerator = match.url_info.visit_count();
GURL host = url.GetWithEmptyPath(); float visit_denominator = 0.0f;
if ((host.spec().length() < (match.input_location + input.length()))) for (history::HistoryMatches::const_iterator it = matches.begin();
return GURL(); // User typing is longer than this host suggestion. it != matches.end(); ++it) {
visit_denominator += it->url_info.visit_count();
}
const float visit_score = (visit_denominator > 0.0f) ?
(visit_numerator / visit_denominator) : 0.0f;
const string16 spec = UTF8ToUTF16(host.spec()); // Calculate a score based on innermost matching.
if (spec.compare(match.input_location, input.length(), input)) const float innermost_score = (match.innermost_match ? 1.0f : 0.0f);
return GURL(); // User typing is no longer a prefix.
return host; // TODO(dominich): Add a boost for bookmarked pages?
// Prefer typed count to visit count as:
// - It's a better indicator of what the user wants to open given that they
// are typing in the address bar (users tend to open certain URLs by typing
// and others by e.g. bookmarks, so visit_count is a good indicator of
// overall interest but a bad one for specifically omnibox interest).
// - Since the DB query is sorted by typed_count, the results may be
// effectively a random selection as far as visit_counts are concerned
// (meaning many high-visit_count-URLs may be present in one query and
// absent in a similar one), leading to wild swings in confidence for the
// same result across distinct queries.
// Add a boost for innermost matches (matches after scheme or 'www.').
return (0.5f * typed_score) + (0.3f * visit_score) + (0.2f * innermost_score);
} }
} // namespace history } // namespace history
...@@ -116,7 +257,8 @@ HistoryURLProviderParams::HistoryURLProviderParams( ...@@ -116,7 +257,8 @@ HistoryURLProviderParams::HistoryURLProviderParams(
dont_suggest_exact_input(false) { dont_suggest_exact_input(false) {
} }
HistoryURLProviderParams::~HistoryURLProviderParams() {} HistoryURLProviderParams::~HistoryURLProviderParams() {
}
HistoryURLProvider::HistoryURLProvider(ACProviderListener* listener, HistoryURLProvider::HistoryURLProvider(ACProviderListener* listener,
Profile* profile) Profile* profile)
...@@ -158,12 +300,12 @@ void HistoryURLProvider::ExecuteWithDB(history::HistoryBackend* backend, ...@@ -158,12 +300,12 @@ void HistoryURLProvider::ExecuteWithDB(history::HistoryBackend* backend,
if (!db) { if (!db) {
params->failed = true; params->failed = true;
} else if (!params->cancel_flag.IsSet()) { } else if (!params->cancel_flag.IsSet()) {
TimeTicks beginning_time = TimeTicks::Now(); base::TimeTicks beginning_time = base::TimeTicks::Now();
DoAutocomplete(backend, db, params); DoAutocomplete(backend, db, params);
UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime", UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime",
TimeTicks::Now() - beginning_time); base::TimeTicks::Now() - beginning_time);
} }
// Return the results (if any) to the main thread. // Return the results (if any) to the main thread.
...@@ -198,10 +340,10 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, ...@@ -198,10 +340,10 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend,
// Get the matching URLs from the DB // Get the matching URLs from the DB
typedef std::vector<history::URLRow> URLRowVector; typedef std::vector<history::URLRow> URLRowVector;
URLRowVector url_matches; URLRowVector url_matches;
HistoryMatches history_matches; history::HistoryMatches history_matches;
for (Prefixes::const_iterator i(prefixes_.begin()); i != prefixes_.end(); for (history::Prefixes::const_iterator i(prefixes_.begin());
++i) { i != prefixes_.end(); ++i) {
if (params->cancel_flag.IsSet()) if (params->cancel_flag.IsSet())
return; // Canceled in the middle of a query, give up. return; // Canceled in the middle of a query, give up.
// We only need kMaxMatches results in the end, but before we get there we // We only need kMaxMatches results in the end, but before we get there we
...@@ -214,9 +356,9 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, ...@@ -214,9 +356,9 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend,
kMaxMatches * 2, (backend == NULL), &url_matches); kMaxMatches * 2, (backend == NULL), &url_matches);
for (URLRowVector::const_iterator j(url_matches.begin()); for (URLRowVector::const_iterator j(url_matches.begin());
j != url_matches.end(); ++j) { j != url_matches.end(); ++j) {
const Prefix* best_prefix = BestPrefix(j->url(), string16()); const history::Prefix* best_prefix = BestPrefix(j->url(), string16());
DCHECK(best_prefix != NULL); DCHECK(best_prefix != NULL);
history_matches.push_back(HistoryMatch(*j, i->prefix.length(), history_matches.push_back(history::HistoryMatch(*j, i->prefix.length(),
!i->num_components, !i->num_components,
i->num_components >= best_prefix->num_components)); i->num_components >= best_prefix->num_components));
} }
...@@ -265,7 +407,7 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, ...@@ -265,7 +407,7 @@ void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend,
// Convert the history matches to autocomplete matches. // Convert the history matches to autocomplete matches.
for (size_t i = first_match; i < history_matches.size(); ++i) { for (size_t i = first_match; i < history_matches.size(); ++i) {
const HistoryMatch& match = history_matches[i]; const history::HistoryMatch& match = history_matches[i];
DCHECK(!have_what_you_typed_match || DCHECK(!have_what_you_typed_match ||
(match.url_info.url() != (match.url_info.url() !=
GURL(params->matches.front().destination_url))); GURL(params->matches.front().destination_url)));
...@@ -304,325 +446,57 @@ void HistoryURLProvider::QueryComplete( ...@@ -304,325 +446,57 @@ void HistoryURLProvider::QueryComplete(
listener_->OnProviderUpdate(true); listener_->OnProviderUpdate(true);
} }
AutocompleteMatch HistoryURLProvider::SuggestExactInput( HistoryURLProvider::~HistoryURLProvider() {
const AutocompleteInput& input, // Note: This object can get leaked on shutdown if there are pending
bool trim_http) { // requests on the database (which hold a reference to us). Normally, these
// TODO(dominich): Find a confidence measure for this. // messages get flushed for each thread. We do a round trip from main, to
AutocompleteMatch match(this, // history, back to main while holding a reference. If the main thread
CalculateRelevance(input.type(), WHAT_YOU_TYPED, 0), 0.0f, false, // completes before the history thread, the message to delegate back to the
AutocompleteMatch::URL_WHAT_YOU_TYPED); // main thread will not run and the reference will leak. Therefore, don't do
UMA_HISTOGRAM_COUNTS_100("Autocomplete.Confidence_HistoryUrl", // anything on destruction.
match.confidence * 100); }
const GURL& url = input.canonicalized_url(); // static
if (url.is_valid()) { history::Prefixes HistoryURLProvider::GetPrefixes() {
match.destination_url = url; // We'll complete text following these prefixes.
// NOTE: There's no requirement that these be in any particular order.
history::Prefixes prefixes;
prefixes.push_back(history::Prefix(ASCIIToUTF16("https://www."), 2));
prefixes.push_back(history::Prefix(ASCIIToUTF16("http://www."), 2));
prefixes.push_back(history::Prefix(ASCIIToUTF16("ftp://ftp."), 2));
prefixes.push_back(history::Prefix(ASCIIToUTF16("ftp://www."), 2));
prefixes.push_back(history::Prefix(ASCIIToUTF16("https://"), 1));
prefixes.push_back(history::Prefix(ASCIIToUTF16("http://"), 1));
prefixes.push_back(history::Prefix(ASCIIToUTF16("ftp://"), 1));
// Empty string catches within-scheme matches as well.
prefixes.push_back(history::Prefix(string16(), 0));
return prefixes;
}
// Trim off "http://" if the user didn't type it. // static
// NOTE: We use TrimHttpPrefix() here rather than StringForURLDisplay() to int HistoryURLProvider::CalculateRelevance(AutocompleteInput::Type input_type,
// strip the scheme as we need to know the offset so we can adjust the MatchType match_type,
// |match_location| below. StringForURLDisplay() and TrimHttpPrefix() have size_t match_number) {
// slightly different behavior as well (the latter will strip even without switch (match_type) {
// two slashes after the scheme). case INLINE_AUTOCOMPLETE:
string16 display_string(StringForURLDisplay(url, false, false)); return 1400;
const size_t offset = trim_http ? TrimHttpPrefix(&display_string) : 0;
match.fill_into_edit =
AutocompleteInput::FormattedStringWithEquivalentMeaning(url,
display_string);
// NOTE: Don't set match.input_location (to allow inline autocompletion)
// here, it's surprising and annoying.
// Try to highlight "innermost" match location. If we fix up "w" into case WHAT_YOU_TYPED:
// "www.w.com", we want to highlight the fifth character, not the first. return 1200;
// This relies on match.destination_url being the non-prefix-trimmed version
// of match.contents.
match.contents = display_string;
const Prefix* best_prefix = BestPrefix(match.destination_url, input.text());
// Because of the vagaries of GURL, it's possible for match.destination_url
// to not contain the user's input at all. In this case don't mark anything
// as a match.
const size_t match_location = (best_prefix == NULL) ?
string16::npos : best_prefix->prefix.length() - offset;
AutocompleteMatch::ClassifyLocationInString(match_location,
input.text().length(),
match.contents.length(),
ACMatchClassification::URL,
&match.contents_class);
match.is_history_what_you_typed_match = true; default:
return 900 + static_cast<int>(match_number);
} }
return match;
} }
bool HistoryURLProvider::FixupExactSuggestion(history::URLDatabase* db, void HistoryURLProvider::RunAutocompletePasses(
const AutocompleteInput& input, const AutocompleteInput& input,
AutocompleteMatch* match, bool fixup_input_and_run_pass_1) {
HistoryMatches* matches) const { matches_.clear();
DCHECK(match != NULL);
DCHECK(matches != NULL);
// Tricky corner case: The user has visited intranet site "foo", but not if ((input.type() == AutocompleteInput::INVALID) ||
// internet site "www.foo.com". He types in foo (getting an exact match), (input.type() == AutocompleteInput::FORCED_QUERY))
// then tries to hit ctrl-enter. When pressing ctrl, the what-you-typed return;
// match ("www.foo.com") doesn't show up in history, and thus doesn't get a
// promoted relevance, but a different match from the input ("foo") does, and
// gets promoted for inline autocomplete. Thus instead of getting
// "www.foo.com", the user still gets "foo" (and, before hitting enter,
// probably gets an odd-looking inline autocomplete of "/").
//
// We detect this crazy case as follows:
// * If the what-you-typed match is not in the history DB,
// * and the user has specified a TLD,
// * and the input _without_ the TLD _is_ in the history DB,
// * ...then just before pressing "ctrl" the best match we supplied was the
// what-you-typed match, so stick with it by promoting this.
history::URLRow info;
MatchType type = INLINE_AUTOCOMPLETE;
if (!db->GetRowForURL(match->destination_url, &info)) {
if (input.desired_tld().empty())
return false;
GURL destination_url(URLFixerUpper::FixupURL(UTF16ToUTF8(input.text()),
std::string()));
if (!db->GetRowForURL(destination_url, NULL))
return false;
// If we got here, then we hit the tricky corner case. Make sure that
// |info| corresponds to the right URL.
info = history::URLRow(match->destination_url);
} else {
// We have data for this match, use it.
match->deletable = true;
match->description = info.title();
AutocompleteMatch::ClassifyMatchInString(input.text(),
info.title(),
ACMatchClassification::NONE, &match->description_class);
if (!info.typed_count()) {
// If we reach here, we must be in the second pass, and we must not have
// promoted this match as an exact match during the first pass. That
// means it will have been outscored by the "search what you typed match".
// We need to maintain that ordering in order to not make the destination
// for the user's typing change depending on when they hit enter. So
// lower the score here enough to let the search provider continue to
// outscore this match.
type = WHAT_YOU_TYPED;
}
}
// Promote as an exact match.
match->relevance = CalculateRelevance(input.type(), type, 0);
// Put it on the front of the HistoryMatches for redirect culling.
EnsureMatchPresent(info, string16::npos, false, matches, true);
return true;
}
bool HistoryURLProvider::PromoteMatchForInlineAutocomplete(
HistoryURLProviderParams* params,
const HistoryMatch& match,
const HistoryMatches& matches) {
// Promote the first match if it's been typed at least n times, where n == 1
// for "simple" (host-only) URLs and n == 2 for others. We set a higher bar
// for these long URLs because it's less likely that users will want to visit
// them again. Even though we don't increment the typed_count for pasted-in
// URLs, if the user manually edits the URL or types some long thing in by
// hand, we wouldn't want to immediately start autocompleting it.
if (!match.url_info.typed_count() ||
((match.url_info.typed_count() == 1) &&
!history::IsHostOnly(match.url_info.url())))
return false;
// In the case where the user has typed "foo.com" and visited (but not typed)
// "foo/", and the input is "foo", we can reach here for "foo.com" during the
// first pass but have the second pass suggest the exact input as a better
// URL. Since we need both passes to agree, and since during the first pass
// there's no way to know about "foo/", make reaching this point prevent any
// future pass from suggesting the exact input as a better match.
params->dont_suggest_exact_input = true;
params->matches.push_back(HistoryMatchToACMatch(params, match, matches,
INLINE_AUTOCOMPLETE, 0));
return true;
}
HistoryURLProvider::~HistoryURLProvider() {}
// static
history::Prefixes HistoryURLProvider::GetPrefixes() {
// We'll complete text following these prefixes.
// NOTE: There's no requirement that these be in any particular order.
Prefixes prefixes;
prefixes.push_back(Prefix(ASCIIToUTF16("https://www."), 2));
prefixes.push_back(Prefix(ASCIIToUTF16("http://www."), 2));
prefixes.push_back(Prefix(ASCIIToUTF16("ftp://ftp."), 2));
prefixes.push_back(Prefix(ASCIIToUTF16("ftp://www."), 2));
prefixes.push_back(Prefix(ASCIIToUTF16("https://"), 1));
prefixes.push_back(Prefix(ASCIIToUTF16("http://"), 1));
prefixes.push_back(Prefix(ASCIIToUTF16("ftp://"), 1));
// Empty string catches within-scheme matches as well.
prefixes.push_back(Prefix(string16(), 0));
return prefixes;
}
// static
int HistoryURLProvider::CalculateRelevance(AutocompleteInput::Type input_type,
MatchType match_type,
size_t match_number) {
switch (match_type) {
case INLINE_AUTOCOMPLETE:
return 1400;
case WHAT_YOU_TYPED:
return 1200;
default:
return 900 + static_cast<int>(match_number);
}
}
// static
float HistoryURLProvider::CalculateConfidence(
const history::HistoryMatch& match,
const history::HistoryMatches& matches) {
// Calculate a score based on typed count.
const float typed_numerator = match.url_info.typed_count();
float typed_denominator = 0.0f;
for (history::HistoryMatches::const_iterator it = matches.begin();
it != matches.end(); ++it) {
typed_denominator += it->url_info.typed_count();
}
const float typed_score = (typed_denominator > 0.0f) ?
(typed_numerator / typed_denominator) : 0.0f;
// Calculate a score based on visit count
const float visit_numerator = match.url_info.visit_count();
float visit_denominator = 0.0f;
for (history::HistoryMatches::const_iterator it = matches.begin();
it != matches.end(); ++it) {
visit_denominator += it->url_info.visit_count();
}
const float visit_score = (visit_denominator > 0.0f) ?
(visit_numerator / visit_denominator) : 0.0f;
// Calculate a score based on innermost matching.
const float innermost_score = (match.innermost_match ? 1.0f : 0.0f);
// TODO(dominich): Add a boost for bookmarked pages?
// Prefer typed count to visit count as:
// - It's a better indicator of what the user wants to open given that they
// are typing in the address bar (users tend to open certain URLs by typing
// and others by e.g. bookmarks, so visit_count is a good indicator of
// overall interest but a bad one for specifically omnibox interest).
// - Since the DB query is sorted by typed_count, the results may be
// effectively a random selection as far as visit_counts are concerned
// (meaning many high-visit_count-URLs may be present in one query and
// absent in a similar one), leading to wild swings in confidence for the
// same result across distinct queries.
// Add a boost for innermost matches (matches after scheme or 'www.').
return (0.5f * typed_score) + (0.3f * visit_score) + (0.2f * innermost_score);
}
// static
void HistoryURLProvider::PromoteOrCreateShorterSuggestion(
history::URLDatabase* db,
const HistoryURLProviderParams& params,
bool have_what_you_typed_match,
const AutocompleteMatch& what_you_typed_match,
HistoryMatches* matches) {
if (matches->empty())
return; // No matches, nothing to do.
// Determine the base URL from which to search, and whether that URL could
// itself be added as a match. We can add the base iff it's not "effectively
// the same" as any "what you typed" match.
const HistoryMatch& match = matches->front();
GURL search_base = history::ConvertToHostOnly(match, params.input.text());
bool can_add_search_base_to_matches = !have_what_you_typed_match;
if (search_base.is_empty()) {
// Search from what the user typed when we couldn't reduce the best match
// to a host. Careful: use a substring of |match| here, rather than the
// first match in |params|, because they might have different prefixes. If
// the user typed "google.com", |what_you_typed_match| will hold
// "http://google.com/", but |match| might begin with
// "http://www.google.com/".
// TODO: this should be cleaned up, and is probably incorrect for IDN.
std::string new_match = match.url_info.url().possibly_invalid_spec().
substr(0, match.input_location + params.input.text().length());
search_base = GURL(new_match);
// TODO(mrossetti): There is a degenerate case where the following may
// cause a failure: http://www/~someword/fubar.html. Diagnose.
// See: http://crbug.com/50101
if (search_base.is_empty())
return; // Can't construct a valid URL from which to start a search.
} else if (!can_add_search_base_to_matches) {
can_add_search_base_to_matches =
(search_base != what_you_typed_match.destination_url);
}
if (search_base == match.url_info.url())
return; // Couldn't shorten |match|, so no range of URLs to search over.
// Search the DB for short URLs between our base and |match|.
history::URLRow info(search_base);
bool promote = true;
// A short URL is only worth suggesting if it's been visited at least a third
// as often as the longer URL.
const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1;
// For stability between the in-memory and on-disk autocomplete passes, when
// the long URL has been typed before, only suggest shorter URLs that have
// also been typed. Otherwise, the on-disk pass could suggest a shorter URL
// (which hasn't been typed) that the in-memory pass doesn't know about,
// thereby making the top match, and thus the behavior of inline
// autocomplete, unstable.
const int min_typed_count = match.url_info.typed_count() ? 1 : 0;
if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(),
match.url_info.url().possibly_invalid_spec(), min_visit_count,
min_typed_count, can_add_search_base_to_matches, &info)) {
if (!can_add_search_base_to_matches)
return; // Couldn't find anything and can't add the search base, bail.
// Try to get info on the search base itself. Promote it to the top if the
// original best match isn't good enough to autocomplete.
db->GetRowForURL(search_base, &info);
promote = match.url_info.typed_count() <= 1;
}
// Promote or add the desired URL to the list of matches.
EnsureMatchPresent(info, match.input_location, match.match_in_scheme,
matches, promote);
}
// static
void HistoryURLProvider::EnsureMatchPresent(const history::URLRow& info,
size_t input_location,
bool match_in_scheme,
HistoryMatches* matches,
bool promote) {
// |matches| may already have an entry for this.
for (HistoryMatches::iterator i(matches->begin()); i != matches->end();
++i) {
if (i->url_info.url() == info.url()) {
// Rotate it to the front if the caller wishes.
if (promote)
std::rotate(matches->begin(), i, i + 1);
return;
}
}
// No entry, so create one.
HistoryMatch match(info, input_location, match_in_scheme, true);
if (promote)
matches->push_front(match);
else
matches->push_back(match);
}
void HistoryURLProvider::RunAutocompletePasses(
const AutocompleteInput& input,
bool fixup_input_and_run_pass_1) {
matches_.clear();
if ((input.type() == AutocompleteInput::INVALID) ||
(input.type() == AutocompleteInput::FORCED_QUERY))
return;
// Create a match for exactly what the user typed. This will only be used as // Create a match for exactly what the user typed. This will only be used as
// a fallback in case we can't get the history service or URL DB; otherwise, // a fallback in case we can't get the history service or URL DB; otherwise,
...@@ -704,10 +578,10 @@ void HistoryURLProvider::RunAutocompletePasses( ...@@ -704,10 +578,10 @@ void HistoryURLProvider::RunAutocompletePasses(
const history::Prefix* HistoryURLProvider::BestPrefix( const history::Prefix* HistoryURLProvider::BestPrefix(
const GURL& url, const GURL& url,
const string16& prefix_suffix) const { const string16& prefix_suffix) const {
const Prefix* best_prefix = NULL; const history::Prefix* best_prefix = NULL;
const string16 text(UTF8ToUTF16(url.spec())); const string16 text(UTF8ToUTF16(url.spec()));
for (Prefixes::const_iterator i(prefixes_.begin()); i != prefixes_.end(); for (history::Prefixes::const_iterator i(prefixes_.begin());
++i) { i != prefixes_.end(); ++i) {
if ((best_prefix == NULL) || if ((best_prefix == NULL) ||
(i->num_components > best_prefix->num_components)) { (i->num_components > best_prefix->num_components)) {
string16 prefix_with_suffix(i->prefix + prefix_suffix); string16 prefix_with_suffix(i->prefix + prefix_suffix);
...@@ -719,9 +593,151 @@ const history::Prefix* HistoryURLProvider::BestPrefix( ...@@ -719,9 +593,151 @@ const history::Prefix* HistoryURLProvider::BestPrefix(
return best_prefix; return best_prefix;
} }
void HistoryURLProvider::SortMatches(HistoryMatches* matches) const { AutocompleteMatch HistoryURLProvider::SuggestExactInput(
const AutocompleteInput& input,
bool trim_http) {
// TODO(dominich): Find a confidence measure for this.
AutocompleteMatch match(this,
CalculateRelevance(input.type(), WHAT_YOU_TYPED, 0), 0.0f, false,
AutocompleteMatch::URL_WHAT_YOU_TYPED);
UMA_HISTOGRAM_COUNTS_100("Autocomplete.Confidence_HistoryUrl",
match.confidence * 100);
const GURL& url = input.canonicalized_url();
if (url.is_valid()) {
match.destination_url = url;
// Trim off "http://" if the user didn't type it.
// NOTE: We use TrimHttpPrefix() here rather than StringForURLDisplay() to
// strip the scheme as we need to know the offset so we can adjust the
// |match_location| below. StringForURLDisplay() and TrimHttpPrefix() have
// slightly different behavior as well (the latter will strip even without
// two slashes after the scheme).
string16 display_string(StringForURLDisplay(url, false, false));
const size_t offset = trim_http ? TrimHttpPrefix(&display_string) : 0;
match.fill_into_edit =
AutocompleteInput::FormattedStringWithEquivalentMeaning(url,
display_string);
// NOTE: Don't set match.input_location (to allow inline autocompletion)
// here, it's surprising and annoying.
// Try to highlight "innermost" match location. If we fix up "w" into
// "www.w.com", we want to highlight the fifth character, not the first.
// This relies on match.destination_url being the non-prefix-trimmed version
// of match.contents.
match.contents = display_string;
const history::Prefix* best_prefix =
BestPrefix(match.destination_url, input.text());
// Because of the vagaries of GURL, it's possible for match.destination_url
// to not contain the user's input at all. In this case don't mark anything
// as a match.
const size_t match_location = (best_prefix == NULL) ?
string16::npos : best_prefix->prefix.length() - offset;
AutocompleteMatch::ClassifyLocationInString(match_location,
input.text().length(),
match.contents.length(),
ACMatchClassification::URL,
&match.contents_class);
match.is_history_what_you_typed_match = true;
}
return match;
}
bool HistoryURLProvider::FixupExactSuggestion(
history::URLDatabase* db,
const AutocompleteInput& input,
AutocompleteMatch* match,
history::HistoryMatches* matches) const {
DCHECK(match != NULL);
DCHECK(matches != NULL);
// Tricky corner case: The user has visited intranet site "foo", but not
// internet site "www.foo.com". He types in foo (getting an exact match),
// then tries to hit ctrl-enter. When pressing ctrl, the what-you-typed
// match ("www.foo.com") doesn't show up in history, and thus doesn't get a
// promoted relevance, but a different match from the input ("foo") does, and
// gets promoted for inline autocomplete. Thus instead of getting
// "www.foo.com", the user still gets "foo" (and, before hitting enter,
// probably gets an odd-looking inline autocomplete of "/").
//
// We detect this crazy case as follows:
// * If the what-you-typed match is not in the history DB,
// * and the user has specified a TLD,
// * and the input _without_ the TLD _is_ in the history DB,
// * ...then just before pressing "ctrl" the best match we supplied was the
// what-you-typed match, so stick with it by promoting this.
history::URLRow info;
MatchType type = INLINE_AUTOCOMPLETE;
if (!db->GetRowForURL(match->destination_url, &info)) {
if (input.desired_tld().empty())
return false;
GURL destination_url(URLFixerUpper::FixupURL(UTF16ToUTF8(input.text()),
std::string()));
if (!db->GetRowForURL(destination_url, NULL))
return false;
// If we got here, then we hit the tricky corner case. Make sure that
// |info| corresponds to the right URL.
info = history::URLRow(match->destination_url);
} else {
// We have data for this match, use it.
match->deletable = true;
match->description = info.title();
AutocompleteMatch::ClassifyMatchInString(input.text(),
info.title(),
ACMatchClassification::NONE, &match->description_class);
if (!info.typed_count()) {
// If we reach here, we must be in the second pass, and we must not have
// promoted this match as an exact match during the first pass. That
// means it will have been outscored by the "search what you typed match".
// We need to maintain that ordering in order to not make the destination
// for the user's typing change depending on when they hit enter. So
// lower the score here enough to let the search provider continue to
// outscore this match.
type = WHAT_YOU_TYPED;
}
}
// Promote as an exact match.
match->relevance = CalculateRelevance(input.type(), type, 0);
// Put it on the front of the HistoryMatches for redirect culling.
EnsureMatchPresent(info, string16::npos, false, matches, true);
return true;
}
bool HistoryURLProvider::PromoteMatchForInlineAutocomplete(
HistoryURLProviderParams* params,
const history::HistoryMatch& match,
const history::HistoryMatches& matches) {
// Promote the first match if it's been typed at least n times, where n == 1
// for "simple" (host-only) URLs and n == 2 for others. We set a higher bar
// for these long URLs because it's less likely that users will want to visit
// them again. Even though we don't increment the typed_count for pasted-in
// URLs, if the user manually edits the URL or types some long thing in by
// hand, we wouldn't want to immediately start autocompleting it.
if (!match.url_info.typed_count() ||
((match.url_info.typed_count() == 1) &&
!IsHostOnly(match.url_info.url())))
return false;
// In the case where the user has typed "foo.com" and visited (but not typed)
// "foo/", and the input is "foo", we can reach here for "foo.com" during the
// first pass but have the second pass suggest the exact input as a better
// URL. Since we need both passes to agree, and since during the first pass
// there's no way to know about "foo/", make reaching this point prevent any
// future pass from suggesting the exact input as a better match.
params->dont_suggest_exact_input = true;
params->matches.push_back(HistoryMatchToACMatch(params, match, matches,
INLINE_AUTOCOMPLETE, 0));
return true;
}
void HistoryURLProvider::SortMatches(history::HistoryMatches* matches) const {
// Sort by quality, best first. // Sort by quality, best first.
std::sort(matches->begin(), matches->end(), &history::CompareHistoryMatch); std::sort(matches->begin(), matches->end(), &CompareHistoryMatch);
// Remove duplicate matches (caused by the search string appearing in one of // Remove duplicate matches (caused by the search string appearing in one of
// the prefixes as well as after it). Consider the following scenario: // the prefixes as well as after it). Consider the following scenario:
...@@ -743,8 +759,8 @@ void HistoryURLProvider::SortMatches(HistoryMatches* matches) const { ...@@ -743,8 +759,8 @@ void HistoryURLProvider::SortMatches(HistoryMatches* matches) const {
// we use an index instead of an iterator in the outer loop, and don't // we use an index instead of an iterator in the outer loop, and don't
// precalculate the ending position. // precalculate the ending position.
for (size_t i = 0; i < matches->size(); ++i) { for (size_t i = 0; i < matches->size(); ++i) {
HistoryMatches::iterator j(matches->begin() + i + 1); for (history::HistoryMatches::iterator j(matches->begin() + i + 1);
while (j != matches->end()) { j != matches->end(); ) {
if ((*matches)[i].url_info.url() == j->url_info.url()) if ((*matches)[i].url_info.url() == j->url_info.url())
j = matches->erase(j); j = matches->erase(j);
else else
...@@ -753,9 +769,11 @@ void HistoryURLProvider::SortMatches(HistoryMatches* matches) const { ...@@ -753,9 +769,11 @@ void HistoryURLProvider::SortMatches(HistoryMatches* matches) const {
} }
} }
void HistoryURLProvider::CullPoorMatches(HistoryMatches* matches) const { void HistoryURLProvider::CullPoorMatches(
history::HistoryMatches* matches) const {
const base::Time& threshold(history::AutocompleteAgeThreshold()); const base::Time& threshold(history::AutocompleteAgeThreshold());
for (HistoryMatches::iterator i(matches->begin()); i != matches->end();) { for (history::HistoryMatches::iterator i(matches->begin());
i != matches->end();) {
if (RowQualifiesAsSignificant(i->url_info, threshold)) if (RowQualifiesAsSignificant(i->url_info, threshold))
++i; ++i;
else else
...@@ -764,7 +782,7 @@ void HistoryURLProvider::CullPoorMatches(HistoryMatches* matches) const { ...@@ -764,7 +782,7 @@ void HistoryURLProvider::CullPoorMatches(HistoryMatches* matches) const {
} }
void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend, void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend,
HistoryMatches* matches, history::HistoryMatches* matches,
size_t max_results) const { size_t max_results) const {
for (size_t source = 0; for (size_t source = 0;
(source < matches->size()) && (source < max_results); ) { (source < matches->size()) && (source < max_results); ) {
...@@ -794,38 +812,37 @@ void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend, ...@@ -794,38 +812,37 @@ void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend,
} }
size_t HistoryURLProvider::RemoveSubsequentMatchesOf( size_t HistoryURLProvider::RemoveSubsequentMatchesOf(
HistoryMatches* matches, history::HistoryMatches* matches,
size_t source_index, size_t source_index,
const std::vector<GURL>& remove) const { const std::vector<GURL>& remove) const {
size_t next_index = source_index + 1; // return value = item after source size_t next_index = source_index + 1; // return value = item after source
// Find the first occurrence of any URL in the redirect chain. We want to // Find the first occurrence of any URL in the redirect chain. We want to
// keep this one since it is rated the highest. // keep this one since it is rated the highest.
HistoryMatches::iterator first(std::find_first_of( history::HistoryMatches::iterator first(std::find_first_of(
matches->begin(), matches->end(), remove.begin(), remove.end())); matches->begin(), matches->end(), remove.begin(), remove.end()));
DCHECK(first != matches->end()) << DCHECK(first != matches->end()) << "We should have always found at least the "
"We should have always found at least the original URL."; "original URL.";
// Find any following occurrences of any URL in the redirect chain, these // Find any following occurrences of any URL in the redirect chain, these
// should be deleted. // should be deleted.
HistoryMatches::iterator next(first); for (history::HistoryMatches::iterator next(std::find_first_of(first + 1,
next++; // Start searching immediately after the one we found already. matches->end(), remove.begin(), remove.end()));
while (next != matches->end() && next != matches->end(); next = std::find_first_of(next, matches->end(),
(next = std::find_first_of(next, matches->end(), remove.begin(), remove.begin(), remove.end())) {
remove.end())) != matches->end()) {
// Remove this item. When we remove an item before the source index, we // Remove this item. When we remove an item before the source index, we
// need to shift it to the right and remember that so we can return it. // need to shift it to the right and remember that so we can return it.
next = matches->erase(next); next = matches->erase(next);
if (static_cast<size_t>(next - matches->begin()) < next_index) if (static_cast<size_t>(next - matches->begin()) < next_index)
next_index--; --next_index;
} }
return next_index; return next_index;
} }
AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch( AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch(
HistoryURLProviderParams* params, HistoryURLProviderParams* params,
const HistoryMatch& history_match, const history::HistoryMatch& history_match,
const HistoryMatches& history_matches, const history::HistoryMatches& history_matches,
MatchType match_type, MatchType match_type,
size_t match_number) { size_t match_number) {
const history::URLRow& info = history_match.url_info; const history::URLRow& info = history_match.url_info;
......
...@@ -135,14 +135,6 @@ struct HistoryURLProviderParams { ...@@ -135,14 +135,6 @@ struct HistoryURLProviderParams {
// This class is an autocomplete provider and is also a pseudo-internal // This class is an autocomplete provider and is also a pseudo-internal
// component of the history system. See comments above. // component of the history system. See comments above.
//
// Note: This object can get leaked on shutdown if there are pending
// requests on the database (which hold a reference to us). Normally, these
// messages get flushed for each thread. We do a round trip from main, to
// history, back to main while holding a reference. If the main thread
// completes before the history thread, the message to delegate back to the
// main thread will not run and the reference will leak. Therefore, don't do
// anything on destruction.
class HistoryURLProvider : public HistoryProvider { class HistoryURLProvider : public HistoryProvider {
public: public:
HistoryURLProvider(ACProviderListener* listener, Profile* profile); HistoryURLProvider(ACProviderListener* listener, Profile* profile);
...@@ -156,7 +148,6 @@ class HistoryURLProvider : public HistoryProvider { ...@@ -156,7 +148,6 @@ class HistoryURLProvider : public HistoryProvider {
params_(NULL), params_(NULL),
languages_(languages) {} languages_(languages) {}
#endif #endif
// no destructor (see note above)
// AutocompleteProvider // AutocompleteProvider
virtual void Start(const AutocompleteInput& input, virtual void Start(const AutocompleteInput& input,
...@@ -196,44 +187,6 @@ class HistoryURLProvider : public HistoryProvider { ...@@ -196,44 +187,6 @@ class HistoryURLProvider : public HistoryProvider {
MatchType match_type, MatchType match_type,
size_t match_number); size_t match_number);
// Determines the confidence for a |match| when compared to all the
// |matches|. Returns a number in the range [0, 1].
static float CalculateConfidence(const history::HistoryMatch& match,
const history::HistoryMatches& matches);
// Given the user's |input| and a |match| created from it, reduce the
// match's URL to just a host. If this host still matches the user input,
// return it. Returns the empty string on failure.
static GURL ConvertToHostOnly(const history::HistoryMatch& match,
const string16& input);
// See if a shorter version of the best match should be created, and if so
// place it at the front of |matches|. This can suggest history URLs that
// are prefixes of the best match (if they've been visited enough, compared
// to the best match), or create host-only suggestions even when they haven't
// been visited before: if the user visited http://example.com/asdf once,
// we'll suggest http://example.com/ even if they've never been to it. See
// the function body for the exact heuristics used.
static void PromoteOrCreateShorterSuggestion(
history::URLDatabase* db,
const HistoryURLProviderParams& params,
bool have_what_you_typed_match,
const AutocompleteMatch& what_you_typed_match,
history::HistoryMatches* matches);
// Ensures that |matches| contains an entry for |info|, which may mean adding
// a new such entry (using |input_location| and |match_in_scheme|).
//
// If |promote| is true, this also ensures the entry is the first element in
// |matches|, moving or adding it to the front as appropriate. When
// |promote| is false, existing matches are left in place, and newly added
// matches are placed at the back.
static void EnsureMatchPresent(const history::URLRow& info,
size_t input_location,
bool match_in_scheme,
history::HistoryMatches* matches,
bool promote);
// Helper function that actually launches the two autocomplete passes. // Helper function that actually launches the two autocomplete passes.
void RunAutocompletePasses(const AutocompleteInput& input, void RunAutocompletePasses(const AutocompleteInput& input,
bool fixup_input_and_run_pass_1); bool fixup_input_and_run_pass_1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment