Commit 4e38f7d2 authored by mpearson@chromium.org's avatar mpearson@chromium.org

Omnibiox: Tweak HQP New Scoring and Re-Enable Field Trial

Tweaks HQP "new scoring" by
(1) By a smooth squash, prevent HQP from scoring above 1400.  This makes sure inlineable, high-quality, prefix matches (whether scored by HUP or by the HUP-like mode in HQP), which has scores in the 1400-1410 range, are not trumped.
(2) Remove any credit for mid-word matches in titles.  I decided spacing in titles should always make sense and allowing mid-word matches here seems wrong.

Also, I changed the name of the "new scoring" field trial as well so we can distinguish the new behavior trial from the old behavior trial.

This trial is set up independent of the "old scoring" ignore-all-mid-word-matches field trial.  I prefer it this way because then I can easily enable or disable one without having to change much code.  Will analyze them in a dependent/correlated fashion--that is, I will only look at this trial when in the control group of the ignore-mid-word-matches trial and will only analyze the mid-word-matches trial when in the control group of this trial.

BUG=161911
TEST=


Review URL: https://chromiumcodereview.appspot.com/11416285

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@170869 0039d316-1c4b-4281-b951-d872f2087c98
parent 15cb74a7
...@@ -21,7 +21,8 @@ static const char kDisallowInlineHQPFieldTrialName[] = ...@@ -21,7 +21,8 @@ static const char kDisallowInlineHQPFieldTrialName[] =
// was created as part of the name. // was created as part of the name.
static const char kSuggestFieldTrialStarted2012Q4Name[] = static const char kSuggestFieldTrialStarted2012Q4Name[] =
"OmniboxSearchSuggestTrialStarted2012Q4"; "OmniboxSearchSuggestTrialStarted2012Q4";
static const char kHQPNewScoringFieldTrialName[] = "OmniboxHQPNewScoring"; static const char kHQPNewScoringFieldTrialName[] =
"OmniboxHQPNewScoringMax1400";
static const char kHUPCullRedirectsFieldTrialName[] = "OmniboxHUPCullRedirects"; static const char kHUPCullRedirectsFieldTrialName[] = "OmniboxHUPCullRedirects";
static const char kHUPCreateShorterMatchFieldTrialName[] = static const char kHUPCreateShorterMatchFieldTrialName[] =
"OmniboxHUPCreateShorterMatch"; "OmniboxHUPCreateShorterMatch";
...@@ -43,11 +44,11 @@ const base::FieldTrial::Probability ...@@ -43,11 +44,11 @@ const base::FieldTrial::Probability
// will decide what behavior (if any) to change based on the group. // will decide what behavior (if any) to change based on the group.
const int kSuggestFieldTrialNumberOfGroups = 20; const int kSuggestFieldTrialNumberOfGroups = 20;
// For History Quick Provider new scoring field trial, put 0% ( = 0/100 ) // For History Quick Provider new scoring field trial, put 25% ( = 25/100 )
// of the users in the new scoring experiment group. // of the users in the new scoring experiment group.
const base::FieldTrial::Probability kHQPNewScoringFieldTrialDivisor = 100; const base::FieldTrial::Probability kHQPNewScoringFieldTrialDivisor = 100;
const base::FieldTrial::Probability const base::FieldTrial::Probability
kHQPNewScoringFieldTrialExperimentFraction = 0; kHQPNewScoringFieldTrialExperimentFraction = 25;
// For HistoryURL provider cull redirects field trial, put 0% ( = 0/100 ) // For HistoryURL provider cull redirects field trial, put 0% ( = 0/100 )
// of the users in the don't-cull-redirects experiment group. // of the users in the don't-cull-redirects experiment group.
...@@ -166,10 +167,10 @@ void AutocompleteFieldTrial::Activate() { ...@@ -166,10 +167,10 @@ void AutocompleteFieldTrial::Activate() {
trial->group(); trial->group();
// Create inline History Quick Provider new scoring field trial. // Create inline History Quick Provider new scoring field trial.
// Make it expire on January 14, 2013. // Make it expire on April 14, 2013.
trial = base::FieldTrialList::FactoryGetFieldTrial( trial = base::FieldTrialList::FactoryGetFieldTrial(
kHQPNewScoringFieldTrialName, kHQPNewScoringFieldTrialDivisor, kHQPNewScoringFieldTrialName, kHQPNewScoringFieldTrialDivisor,
"Standard", 2013, 1, 14, NULL); "Standard", 2013, 4, 14, NULL);
trial->UseOneTimeRandomization(); trial->UseOneTimeRandomization();
hqp_new_scoring_experiment_group = trial->AppendGroup("NewScoring", hqp_new_scoring_experiment_group = trial->AppendGroup("NewScoring",
kHQPNewScoringFieldTrialExperimentFraction); kHQPNewScoringFieldTrialExperimentFraction);
......
...@@ -150,14 +150,8 @@ ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, ...@@ -150,14 +150,8 @@ ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row,
(now - row.last_visit()).InDays()); (now - row.last_visit()).InDays());
const float popularity_score = GetPopularityScore( const float popularity_score = GetPopularityScore(
row.typed_count() + bookmark_boost, row.visit_count()); row.typed_count() + bookmark_boost, row.visit_count());
raw_score = GetFinalRelevancyScore(
// Combine recency, popularity, and topicality scores into one. topicality_score, recency_score, popularity_score);
// Example of how this functions: Suppose the omnibox has one
// input term. Suppose we have a URL that has 30 typed visits with
// the most recent being within a day and the omnibox input term
// has a single URL hostname hit at a word boundary. Then this
// URL will score 1200 ( = 30 * 40.0).
raw_score = 40.0 * topicality_score * recency_score * popularity_score;
raw_score = raw_score =
(raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max;
} else { // "old" scoring } else { // "old" scoring
...@@ -531,7 +525,7 @@ float ScoredHistoryMatch::GetTopicalityScore( ...@@ -531,7 +525,7 @@ float ScoredHistoryMatch::GetTopicalityScore(
if (word_num >= 10) break; // only count the first ten words if (word_num >= 10) break; // only count the first ten words
const bool at_word_boundary = (next_word_starts != end_word_starts) && const bool at_word_boundary = (next_word_starts != end_word_starts) &&
(*next_word_starts == iter->offset); (*next_word_starts == iter->offset);
term_scores[iter->term_num] += at_word_boundary ? 8 : 2; term_scores[iter->term_num] += at_word_boundary ? 8 : 0;
} }
// TODO(mpearson): Restore logic for penalizing out-of-order matches. // TODO(mpearson): Restore logic for penalizing out-of-order matches.
// (Perhaps discount them by 0.8?) // (Perhaps discount them by 0.8?)
...@@ -644,6 +638,31 @@ float ScoredHistoryMatch::GetPopularityScore(int typed_count, ...@@ -644,6 +638,31 @@ float ScoredHistoryMatch::GetPopularityScore(int typed_count,
(5.0 + 3.0); (5.0 + 3.0);
} }
// static
float ScoredHistoryMatch::GetFinalRelevancyScore(
float topicality_score, float recency_score, float popularity_score) {
// Here's how to interpret intermediate_score: Suppose the omnibox
// has one input term. Suppose we have a URL that has 5 typed
// visits with the most recent being within a day and the omnibox
// input term has a single URL hostname hit at a word boundary.
// This URL will have an intermediate_score of 5.0 (= 1 topicality *
// 1 recency * 5 popularity).
float intermediate_score =
topicality_score * recency_score * popularity_score;
// The below code takes intermediate_score from [0, infinity) to
// relevancy scores in the range [0, 1400).
float attenuating_factor = 1.0;
if (intermediate_score < 4) {
// The formula in the final return line in this function only works if
// intermediate_score > 4. For lower scores, we linearly interpolate
// between 0 and the formula when intermediate_score = 4.0.
attenuating_factor = intermediate_score / 4.0;
intermediate_score = 4.0;
}
DCHECK_GE(intermediate_score, 4.0);
return attenuating_factor * 1400.0 * (2.0 - exp(2.0 / intermediate_score));
}
void ScoredHistoryMatch::InitializeNewScoringField() { void ScoredHistoryMatch::InitializeNewScoringField() {
enum NewScoringOption { enum NewScoringOption {
OLD_SCORING = 0, OLD_SCORING = 0,
......
...@@ -114,6 +114,13 @@ struct ScoredHistoryMatch : public history::HistoryMatch { ...@@ -114,6 +114,13 @@ struct ScoredHistoryMatch : public history::HistoryMatch {
static float GetPopularityScore(int typed_count, static float GetPopularityScore(int typed_count,
int visit_count); int visit_count);
// Combines the three component scores into a final score that's
// an appropriate value to use as a relevancy score.
static float GetFinalRelevancyScore(
float topicality_score,
float recency_score,
float popularity_score);
// Sets use_new_scoring based on command line flags and/or // Sets use_new_scoring based on command line flags and/or
// field trial state. // field trial state.
static void InitializeNewScoringField(); static void InitializeNewScoringField();
......
...@@ -398,19 +398,15 @@ TEST_F(ScoredHistoryMatchTest, GetTopicalityScore) { ...@@ -398,19 +398,15 @@ TEST_F(ScoredHistoryMatchTest, GetTopicalityScore) {
// various types of URL matches. // various types of URL matches.
EXPECT_GT(title_score, arg_score); EXPECT_GT(title_score, arg_score);
EXPECT_GT(arg_score, title_mid_word_score); EXPECT_GT(arg_score, title_mid_word_score);
EXPECT_GT(title_mid_word_score, arg_mid_word_score);
// Finally, verify that protocol matches and top level domain name // Finally, verify that protocol matches and top level domain name
// matches (.com, .net, etc.) score worse than everything (except // matches (.com, .net, etc.) score worse than everything (except
// possibly mid-word matches in the ?arg section of the URL--I can // possibly mid-word matches in the ?arg section of the URL and
// imagine scoring those pretty harshly as well). // mid-word title matches--I can imagine scoring those pretty
// harshly as well).
EXPECT_GT(path_mid_word_score, protocol_score); EXPECT_GT(path_mid_word_score, protocol_score);
EXPECT_GT(path_mid_word_score, protocol_mid_word_score); EXPECT_GT(path_mid_word_score, protocol_mid_word_score);
EXPECT_GT(title_mid_word_score, protocol_score);
EXPECT_GT(title_mid_word_score, protocol_mid_word_score);
EXPECT_GT(path_mid_word_score, tld_score); EXPECT_GT(path_mid_word_score, tld_score);
EXPECT_GT(path_mid_word_score, tld_mid_word_score); EXPECT_GT(path_mid_word_score, tld_mid_word_score);
EXPECT_GT(title_mid_word_score, tld_score);
EXPECT_GT(title_mid_word_score, tld_mid_word_score);
} }
} // namespace history } // namespace history
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment