Commit 68c76ec1 authored by mpearson@chromium.org's avatar mpearson@chromium.org

Omnibox: Create Field Trial for HQP to Ignore Mid-Word Matches

Adds code to HistoryQuick provider to make it optionally only consider terms when they match at the beginning of a word boundary in the URL or titles.  The current behavior allows terms to match anywhere regardless of word boundaries.

Then, creates a field trial to control this behavior.  Enables the trial for 25% of users.

BUG=161911


Review URL: https://chromiumcodereview.appspot.com/11421139

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@170784 0039d316-1c4b-4281-b951-d872f2087c98
parent 57892182
......@@ -27,6 +27,8 @@ static const char kHUPCreateShorterMatchFieldTrialName[] =
"OmniboxHUPCreateShorterMatch";
static const char kHQPReplaceHUPScoringFieldTrialName[] =
"OmniboxHQPReplaceHUPNumComponentsFix";
static const char kHQPOnlyCountMatchesAtWordBoundariesFieldTrialName[] =
"OmniboxHQPOnlyCountMatchesAtWordBoundaries";
// Field trial experiment probabilities.
......@@ -74,6 +76,13 @@ const base::FieldTrial::Probability
const base::FieldTrial::Probability
kHQPReplaceHUPScoringFieldTrialExperimentFraction = 25;
// For the field trial that ignores all mid-term matches in HistoryQuick
// provider, put 25% ( = 25/100 ) of the users in the experiment group.
const base::FieldTrial::Probability
kHQPOnlyCountMatchesAtWordBoundariesFieldTrialDivisor = 100;
const base::FieldTrial::Probability
kHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentFraction = 25;
// Field trial IDs.
// Though they are not literally "const", they are set only once, in
......@@ -97,6 +106,10 @@ int hup_dont_create_shorter_match_experiment_group = 0;
// experiment group.
int hqp_replace_hup_scoring_experiment_group = 0;
// Field trial ID for the HistoryQuick provider only count matches at
// word boundaries experiment group.
int hqp_only_count_matches_at_word_boundaries_experiment_group = 0;
}
......@@ -191,6 +204,18 @@ void AutocompleteFieldTrial::Activate() {
trial->UseOneTimeRandomization();
hqp_replace_hup_scoring_experiment_group = trial->AppendGroup("HQPReplaceHUP",
kHQPReplaceHUPScoringFieldTrialExperimentFraction);
// Create the field trial that makes HistoryQuick provider score
// ignore all matches that happen in the middle of a word. Make it
// expire on June 23, 2013.
trial = base::FieldTrialList::FactoryGetFieldTrial(
kHQPOnlyCountMatchesAtWordBoundariesFieldTrialName,
kHQPOnlyCountMatchesAtWordBoundariesFieldTrialDivisor,
"Standard", 2013, 6, 23, NULL);
trial->UseOneTimeRandomization();
hqp_only_count_matches_at_word_boundaries_experiment_group =
trial->AppendGroup("HQPOnlyCountMatchesAtWordBoundaries",
kHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentFraction);
}
bool AutocompleteFieldTrial::InDisallowInlineHQPFieldTrial() {
......@@ -264,3 +289,19 @@ bool AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup() {
kHQPReplaceHUPScoringFieldTrialName);
return group == hqp_replace_hup_scoring_experiment_group;
}
bool AutocompleteFieldTrial::InHQPOnlyCountMatchesAtWordBoundariesFieldTrial() {
return base::FieldTrialList::TrialExists(
kHQPOnlyCountMatchesAtWordBoundariesFieldTrialName);
}
bool AutocompleteFieldTrial::
InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup() {
if (!InHQPOnlyCountMatchesAtWordBoundariesFieldTrial())
return false;
// Return true if we're in the experiment group.
const int group = base::FieldTrialList::FindValue(
kHQPOnlyCountMatchesAtWordBoundariesFieldTrialName);
return group == hqp_only_count_matches_at_word_boundaries_experiment_group;
}
......@@ -83,6 +83,18 @@ class AutocompleteFieldTrial {
// is larger.
static bool InHQPReplaceHUPScoringFieldTrialExperimentGroup();
// ---------------------------------------------------------
// For the HistoryQuick provider only count matches at word boundaries
// field trial.
// Returns whether the user is in any group for this field trial.
// (Should always be true unless initialization went wrong.)
static bool InHQPOnlyCountMatchesAtWordBoundariesFieldTrial();
// Returns whether we should ignore all mid-word matches in
// HistoryQuick provider.
static bool InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup();
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(AutocompleteFieldTrial);
};
......
......@@ -56,6 +56,7 @@ const int kBaseScoreForUntypedResultsInHUPLikeScoring = 900;
bool ScoredHistoryMatch::initialized_ = false;
bool ScoredHistoryMatch::use_new_scoring = false;
bool ScoredHistoryMatch::only_count_matches_at_word_boundaries = false;
bool ScoredHistoryMatch::also_do_hup_like_scoring = false;
ScoredHistoryMatch::ScoredHistoryMatch()
......@@ -63,6 +64,7 @@ ScoredHistoryMatch::ScoredHistoryMatch()
can_inline(false) {
if (!initialized_) {
InitializeNewScoringField();
InitializeOnlyCountMatchesAtWordBoundariesField();
InitializeAlsoDoHUPLikeScoringField();
initialized_ = true;
}
......@@ -79,6 +81,7 @@ ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row,
can_inline(false) {
if (!initialized_) {
InitializeNewScoringField();
InitializeOnlyCountMatchesAtWordBoundariesField();
InitializeAlsoDoHUPLikeScoringField();
initialized_ = true;
}
......@@ -161,10 +164,13 @@ ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row,
// Get partial scores based on term matching. Note that the score for
// each of the URL and title are adjusted by the fraction of the
// terms appearing in each.
int url_score = ScoreComponentForMatches(url_matches, url.length()) *
int url_score =
ScoreComponentForMatches(url_matches, word_starts.url_word_starts_,
url.length()) *
std::min(url_matches.size(), terms.size()) / terms.size();
int title_score =
ScoreComponentForMatches(title_matches, title.length()) *
ScoreComponentForMatches(title_matches, word_starts.title_word_starts_,
title.length()) *
std::min(title_matches.size(), terms.size()) / terms.size();
// Arbitrarily pick the best.
// TODO(mrossetti): It might make sense that a term which appears in both
......@@ -287,8 +293,23 @@ int AccumulateMatchLength(int total, const TermMatch& match) {
}
// static
int ScoredHistoryMatch::ScoreComponentForMatches(const TermMatches& matches,
size_t max_length) {
int ScoredHistoryMatch::ScoreComponentForMatches(
const TermMatches& provided_matches,
const WordStarts& word_starts,
size_t max_length) {
if (provided_matches.empty())
return 0;
TermMatches matches_at_word_boundaries;
if (only_count_matches_at_word_boundaries) {
MakeTermMatchesOnlyAtWordBoundaries(provided_matches, word_starts,
&matches_at_word_boundaries);
}
// The actual matches we'll use for matching. This is |provided_matches|
// with all the matches not at a word boundary removed (if told to do so).
const TermMatches& matches = only_count_matches_at_word_boundaries ?
matches_at_word_boundaries : provided_matches;
if (matches.empty())
return 0;
......@@ -345,6 +366,31 @@ int ScoredHistoryMatch::ScoreComponentForMatches(const TermMatches& matches,
return ScoreForValue(raw_score, kTermScoreLevel);
}
// static
void ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries(
const TermMatches& provided_matches,
const WordStarts& word_starts,
TermMatches* matches_at_word_boundaries) {
matches_at_word_boundaries->clear();
// Resize it to an upper-bound estimate of the correct size.
matches_at_word_boundaries->reserve(provided_matches.size());
WordStarts::const_iterator next_word_starts = word_starts.begin();
for (TermMatches::const_iterator iter = provided_matches.begin();
iter != provided_matches.end(); ++iter) {
// Advance next_word_starts until it's >= the position of the term
// we're considering.
while ((next_word_starts != word_starts.end()) &&
(*next_word_starts < iter->offset)) {
++next_word_starts;
}
if ((next_word_starts != word_starts.end()) &&
(*next_word_starts == iter->offset)) {
// At word boundary: copy this element into |matches_at_word_boundaries|.
matches_at_word_boundaries->push_back(*iter);
}
}
}
// static
int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) {
int i = 0;
......@@ -657,6 +703,14 @@ void ScoredHistoryMatch::InitializeNewScoringField() {
new_scoring_option, NUM_OPTIONS);
}
void ScoredHistoryMatch::InitializeOnlyCountMatchesAtWordBoundariesField() {
only_count_matches_at_word_boundaries =
AutocompleteFieldTrial::
InHQPOnlyCountMatchesAtWordBoundariesFieldTrial() &&
AutocompleteFieldTrial::
InHQPOnlyCountMatchesAtWordBoundariesFieldTrialExperimentGroup();
}
void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringField() {
also_do_hup_like_scoring =
AutocompleteFieldTrial::InHQPReplaceHUPScoringFieldTrial() &&
......
......@@ -38,12 +38,26 @@ struct ScoredHistoryMatch : public history::HistoryMatch {
BookmarkService* bookmark_service);
~ScoredHistoryMatch();
// Calculates a component score based on position, ordering and total
// substring match size using metrics recorded in |matches|. |max_length|
// is the length of the string against which the terms are being searched.
static int ScoreComponentForMatches(const TermMatches& matches,
// Calculates a component score based on position, ordering, word
// boundaries, and total substring match size using metrics recorded
// in |matches| and |word_starts|. |max_length| is the length of
// the string against which the terms are being searched.
// |provided_matches| should already be sorted and de-duped, and
// |word_starts| must be sorted.
static int ScoreComponentForMatches(const TermMatches& provided_matches,
const WordStarts& word_starts,
size_t max_length);
// Given a set of term matches |provided_matches| and word boundaries
// |word_starts|, fills in |matches_at_word_boundaries| with only the
// matches in |provided_matches| that are at word boundaries.
// |provided_matches| should already be sorted and de-duped, and
// |word_starts| must be sorted.
static void MakeTermMatchesOnlyAtWordBoundaries(
const TermMatches& provided_matches,
const WordStarts& word_starts,
TermMatches* matches_at_word_boundaries);
// Converts a raw value for some particular scoring factor into a score
// component for that factor. The conversion function is piecewise linear,
// with input values provided in |value_ranks| and resulting output scores
......@@ -74,7 +88,9 @@ struct ScoredHistoryMatch : public history::HistoryMatch {
// boundaries). |url_matches| and |title_matches| provide details
// about where the matches in the URL and title are and what terms
// (identified by a term number < |num_terms|) match where.
// |word_starts| explains where word boundaries are.
// |word_starts| explains where word boundaries are. Its parts (title
// and url) must be sorted. Also, |url_matches| and
// |titles_matches| should already be sorted and de-duped.
static float GetTopicalityScore(const int num_terms,
const string16& url,
const TermMatches& url_matches,
......@@ -102,6 +118,9 @@ struct ScoredHistoryMatch : public history::HistoryMatch {
// field trial state.
static void InitializeNewScoringField();
// Sets only_count_matches_at_word_boundaries based on the field trial state.
static void InitializeOnlyCountMatchesAtWordBoundariesField();
// Sets also_do_hup_like_scoring based on the field trial state.
static void InitializeAlsoDoHUPLikeScoringField();
......@@ -134,7 +153,7 @@ struct ScoredHistoryMatch : public history::HistoryMatch {
static const int kMaxRawTermScore = 30;
static float* raw_term_score_to_topicality_score;
// Allows us to determing setting for use_new_scoring_ only once.
// Used so we initialize static variables only once (on first use).
static bool initialized_;
// Whether to use new-scoring or old-scoring. Set in the
......@@ -145,6 +164,9 @@ struct ScoredHistoryMatch : public history::HistoryMatch {
// class as well (see boolean below).
static bool use_new_scoring;
// If true, we ignore all matches that are in the middle of a word.
static bool only_count_matches_at_word_boundaries;
// If true, assign raw scores to be max(whatever it normally would be,
// a score that's similar to the score HistoryURL provider would assign).
// This variable is set in the constructor by examining the field trial
......
......@@ -77,6 +77,85 @@ float ScoredHistoryMatchTest::GetTopicalityScoreOfTermAgainstURLAndTitle(
1, url, url_matches, title_matches, word_starts);
}
TEST_F(ScoredHistoryMatchTest, MakeTermMatchesOnlyAtWordBoundaries) {
TermMatches matches, matches_at_word_boundaries;
WordStarts word_starts;
// no matches but some word starts -> no matches at word boundary
matches.clear();
word_starts.clear();
word_starts.push_back(2);
word_starts.push_back(5);
word_starts.push_back(10);
ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries(
matches, word_starts, &matches_at_word_boundaries);
EXPECT_EQ(0u, matches_at_word_boundaries.size());
// matches but no word starts -> no matches at word boundary
matches.clear();
matches.push_back(TermMatch(0, 1, 2)); // 2-character match at pos 1
matches.push_back(TermMatch(0, 7, 2)); // 2-character match at pos 7
word_starts.clear();
ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries(
matches, word_starts, &matches_at_word_boundaries);
EXPECT_EQ(0u, matches_at_word_boundaries.size());
// matches and word starts don't overlap -> no matches at word boundary
matches.clear();
matches.push_back(TermMatch(0, 1, 2)); // 2-character match at pos 1
matches.push_back(TermMatch(0, 7, 2)); // 2-character match at pos 7
word_starts.clear();
word_starts.push_back(2);
word_starts.push_back(5);
word_starts.push_back(10);
ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries(
matches, word_starts, &matches_at_word_boundaries);
EXPECT_EQ(0u, matches_at_word_boundaries.size());
// some matches are at word boundary and some aren't
matches.clear();
matches.push_back(TermMatch(0, 1, 2)); // 2-character match at pos 1
matches.push_back(TermMatch(1, 6, 3)); // 3-character match at pos 6
matches.push_back(TermMatch(0, 8, 2)); // 2-character match at pos 8
matches.push_back(TermMatch(2, 15, 7)); // 7-character match at pos 15
matches.push_back(TermMatch(1, 26, 3)); // 3-character match at pos 26
word_starts.clear();
word_starts.push_back(0);
word_starts.push_back(6);
word_starts.push_back(9);
word_starts.push_back(15);
word_starts.push_back(24);
ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries(
matches, word_starts, &matches_at_word_boundaries);
EXPECT_EQ(2u, matches_at_word_boundaries.size());
EXPECT_EQ(1, matches_at_word_boundaries[0].term_num);
EXPECT_EQ(6u, matches_at_word_boundaries[0].offset);
EXPECT_EQ(3u, matches_at_word_boundaries[0].length);
EXPECT_EQ(2, matches_at_word_boundaries[1].term_num);
EXPECT_EQ(15u, matches_at_word_boundaries[1].offset);
EXPECT_EQ(7u, matches_at_word_boundaries[1].length);
// all matches are at word boundary
matches.clear();
matches.push_back(TermMatch(0, 2, 2)); // 2-character match at pos 2
matches.push_back(TermMatch(1, 9, 3)); // 3-character match at pos 9
word_starts.clear();
word_starts.push_back(0);
word_starts.push_back(2);
word_starts.push_back(6);
word_starts.push_back(9);
word_starts.push_back(15);
ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries(
matches, word_starts, &matches_at_word_boundaries);
EXPECT_EQ(2u, matches_at_word_boundaries.size());
EXPECT_EQ(0, matches_at_word_boundaries[0].term_num);
EXPECT_EQ(2u, matches_at_word_boundaries[0].offset);
EXPECT_EQ(2u, matches_at_word_boundaries[0].length);
EXPECT_EQ(1, matches_at_word_boundaries[1].term_num);
EXPECT_EQ(9u, matches_at_word_boundaries[1].offset);
EXPECT_EQ(3u, matches_at_word_boundaries[1].length);
}
TEST_F(ScoredHistoryMatchTest, Scoring) {
URLRow row_a(MakeURLRow("http://abcdef", "fedcba", 3, 30, 1));
// We use NowFromSystemTime() because MakeURLRow uses the same function
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment