Commit 0e28cacd authored by Thanh Nguyen's avatar Thanh Nguyen Committed by Commit Bot

[cros-fuzzy-app] Use parameters to control experiment

This CL:
1. Add parameters to control experiment through Finch flag
2. Add a simple edit distance algorithm
3. Add unittest for the finch parameters

Bug: 990684
Change-Id: I919d1dad0eaeb69b9305b14438eab3c930b425af
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1913224Reviewed-by: default avatarJia Meng <jiameng@chromium.org>
Commit-Queue: Thanh Nguyen <thanhdng@chromium.org>
Cr-Commit-Position: refs/heads/master@{#715198}
parent d4f1f5c4
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
namespace app_list { namespace app_list {
namespace { namespace {
constexpr bool kDefaultUseWeightedRatio = true;
constexpr double kDefaultRelevanceThreshold = 0.3; constexpr double kDefaultRelevanceThreshold = 0.3;
constexpr double kMinScore = 0.0; constexpr double kMinScore = 0.0;
constexpr double kMaxScore = 1.0; constexpr double kMaxScore = 1.0;
...@@ -139,12 +140,10 @@ double FuzzyTokenizedStringMatch::TokenSetRatio( ...@@ -139,12 +140,10 @@ double FuzzyTokenizedStringMatch::TokenSetRatio(
PartialRatio(query_rewritten, text_rewritten)}); PartialRatio(query_rewritten, text_rewritten)});
} }
return std::max({SequenceMatcher(intersection_string, query_rewritten) return std::max(
.Ratio(false /*use_edit_distance*/), {SequenceMatcher(intersection_string, query_rewritten).Ratio(),
SequenceMatcher(intersection_string, text_rewritten) SequenceMatcher(intersection_string, text_rewritten).Ratio(),
.Ratio(false /*use_edit_distance*/), SequenceMatcher(query_rewritten, text_rewritten).Ratio()});
SequenceMatcher(query_rewritten, text_rewritten)
.Ratio(false /*use_edit_distance*/)});
} }
double FuzzyTokenizedStringMatch::TokenSortRatio( double FuzzyTokenizedStringMatch::TokenSortRatio(
...@@ -159,8 +158,7 @@ double FuzzyTokenizedStringMatch::TokenSortRatio( ...@@ -159,8 +158,7 @@ double FuzzyTokenizedStringMatch::TokenSortRatio(
if (partial) { if (partial) {
return PartialRatio(query_sorted, text_sorted); return PartialRatio(query_sorted, text_sorted);
} }
return SequenceMatcher(query_sorted, text_sorted) return SequenceMatcher(query_sorted, text_sorted).Ratio();
.Ratio(false /*use_edit_distance*/);
} }
double FuzzyTokenizedStringMatch::PartialRatio(const base::string16& query, double FuzzyTokenizedStringMatch::PartialRatio(const base::string16& query,
...@@ -191,7 +189,7 @@ double FuzzyTokenizedStringMatch::PartialRatio(const base::string16& query, ...@@ -191,7 +189,7 @@ double FuzzyTokenizedStringMatch::PartialRatio(const base::string16& query,
partial_ratio = std::max( partial_ratio = std::max(
partial_ratio, partial_ratio,
SequenceMatcher(shorter, longer.substr(long_start, shorter.size())) SequenceMatcher(shorter, longer.substr(long_start, shorter.size()))
.Ratio(false /*use_edit_distance*/)); .Ratio());
if (partial_ratio > 0.995) { if (partial_ratio > 0.995) {
return kMaxScore; return kMaxScore;
} }
...@@ -209,8 +207,8 @@ double FuzzyTokenizedStringMatch::WeightedRatio( ...@@ -209,8 +207,8 @@ double FuzzyTokenizedStringMatch::WeightedRatio(
base::JoinString(query.tokens(), base::UTF8ToUTF16(" "))); base::JoinString(query.tokens(), base::UTF8ToUTF16(" ")));
const base::string16 text_normalized( const base::string16 text_normalized(
base::JoinString(text.tokens(), base::UTF8ToUTF16(" "))); base::JoinString(text.tokens(), base::UTF8ToUTF16(" ")));
double weighted_ratio = SequenceMatcher(query_normalized, text_normalized) double weighted_ratio =
.Ratio(false /*use_edit_distance*/); SequenceMatcher(query_normalized, text_normalized).Ratio();
const double length_ratio = const double length_ratio =
static_cast<double>( static_cast<double>(
std::max(query_normalized.size(), text_normalized.size())) / std::max(query_normalized.size(), text_normalized.size())) /
...@@ -254,13 +252,51 @@ bool FuzzyTokenizedStringMatch::IsRelevant(const ash::TokenizedString& query, ...@@ -254,13 +252,51 @@ bool FuzzyTokenizedStringMatch::IsRelevant(const ash::TokenizedString& query,
match.pos_second_string + match.length)); match.pos_second_string + match.length));
} }
} }
// |relevance_| is the average of WeightedRatio and PrefixMatcher scores.
relevance_ = (WeightedRatio(query, text) + PrefixMatcher(query, text)) / 2; // If the query is much longer than the text then it's often not a match.
if (query.text().size() >= text.text().size() * 2) {
return false;
}
const double relevance_threshold = base::GetFieldTrialParamByFeatureAsDouble( const double relevance_threshold = base::GetFieldTrialParamByFeatureAsDouble(
app_list_features::kEnableFuzzyAppSearch, "relevance_threshold", app_list_features::kEnableFuzzyAppSearch, "relevance_threshold",
kDefaultRelevanceThreshold); kDefaultRelevanceThreshold);
return relevance_ > relevance_threshold; const double prefix_score = PrefixMatcher(query, text);
if (base::GetFieldTrialParamByFeatureAsBool(
app_list_features::kEnableFuzzyAppSearch, "use_prefix_only", false) &&
prefix_score >= relevance_threshold) {
// If the prefix score is already higher than |relevance_threshold|, use
// prefix score as final score.
relevance_ = prefix_score;
return true;
}
const bool use_weighted_ratio = base::GetFieldTrialParamByFeatureAsBool(
app_list_features::kEnableFuzzyAppSearch, "use_weighted_ratio",
kDefaultUseWeightedRatio);
if (use_weighted_ratio) {
// If WeightedRatio is used, |relevance_| is the average of WeightedRatio
// and PrefixMatcher scores.
relevance_ = (WeightedRatio(query, text) + prefix_score) / 2;
} else {
// Use simple algorithm to calculate match ratio.
double partial_match = 0.0;
for (const auto& query_token : query.tokens()) {
for (const auto& text_token : text.tokens()) {
partial_match = std::max(
partial_match, SequenceMatcher(query_token, text_token).Ratio());
}
}
const double partial_scale = 0.9;
relevance_ = (std::max(SequenceMatcher(query.text(), text.text()).Ratio(),
partial_match * partial_scale) +
prefix_score) /
2;
}
return relevance_ >= relevance_threshold;
} }
} // namespace app_list } // namespace app_list
...@@ -256,4 +256,20 @@ TEST_F(FuzzyTokenizedStringMatchTest, ParamThresholdTest2) { ...@@ -256,4 +256,20 @@ TEST_F(FuzzyTokenizedStringMatchTest, ParamThresholdTest2) {
} }
} }
TEST_F(FuzzyTokenizedStringMatchTest, OtherParamTest) {
base::test::ScopedFeatureList feature_list;
feature_list.InitWithFeaturesAndParameters(
{{app_list_features::kEnableFuzzyAppSearch,
{{"relevance_threshold", "0.35"},
{"use_weighted_ratio", "false"},
{"use_edit_distance", "true"}}}},
{});
FuzzyTokenizedStringMatch match;
base::string16 query(base::UTF8ToUTF16("anonymous"));
base::string16 text(base::UTF8ToUTF16("famous"));
EXPECT_FALSE(match.IsRelevant(ash::TokenizedString(query),
ash::TokenizedString(text)));
EXPECT_NEAR(match.relevance(), 0.33, 0.01);
}
} // namespace app_list } // namespace app_list
...@@ -7,9 +7,13 @@ ...@@ -7,9 +7,13 @@
#include <algorithm> #include <algorithm>
#include <queue> #include <queue>
#include "ash/public/cpp/app_list/app_list_features.h"
#include "base/metrics/field_trial_params.h"
namespace app_list { namespace app_list {
namespace { namespace {
constexpr bool kDefaultUseEditDistance = false;
using Match = SequenceMatcher::Match; using Match = SequenceMatcher::Match;
using Matches = std::vector<Match>; using Matches = std::vector<Match>;
...@@ -36,6 +40,9 @@ SequenceMatcher::SequenceMatcher(const base::string16& first_string, ...@@ -36,6 +40,9 @@ SequenceMatcher::SequenceMatcher(const base::string16& first_string,
for (size_t i = 0; i < second_string_.size(); i++) { for (size_t i = 0; i < second_string_.size(); i++) {
char_to_positions_[second_string_[i]].emplace_back(i); char_to_positions_[second_string_[i]].emplace_back(i);
} }
use_edit_distance_ = base::GetFieldTrialParamByFeatureAsBool(
app_list_features::kEnableFuzzyAppSearch, "use_edit_distance",
kDefaultUseEditDistance);
} }
Match SequenceMatcher::FindLongestMatch(int first_start, Match SequenceMatcher::FindLongestMatch(int first_start,
...@@ -171,8 +178,8 @@ int SequenceMatcher::EditDistance() { ...@@ -171,8 +178,8 @@ int SequenceMatcher::EditDistance() {
return edit_distance_; return edit_distance_;
} }
double SequenceMatcher::Ratio(bool use_edit_distance) { double SequenceMatcher::Ratio() {
if (use_edit_distance) { if (use_edit_distance_) {
if (edit_distance_ratio_ < 0) { if (edit_distance_ratio_ < 0) {
const int edit_distance = EditDistance(); const int edit_distance = EditDistance();
edit_distance_ratio_ = edit_distance_ratio_ =
......
...@@ -37,9 +37,7 @@ class SequenceMatcher { ...@@ -37,9 +37,7 @@ class SequenceMatcher {
~SequenceMatcher() = default; ~SequenceMatcher() = default;
// Calculates similarity ratio of |first_string_| and |second_string_|. // Calculates similarity ratio of |first_string_| and |second_string_|.
// |use_edit_distance| is the option to use edit distance or block matching double Ratio();
// as the algorithm.
double Ratio(bool use_edit_distance = true);
// Calculates the Damerau–Levenshtein distance between |first_string_| and // Calculates the Damerau–Levenshtein distance between |first_string_| and
// |second_string_|. // |second_string_|.
// See https://en.wikipedia.org/wiki/Damerau–Levenshtein_distance for more // See https://en.wikipedia.org/wiki/Damerau–Levenshtein_distance for more
...@@ -65,6 +63,8 @@ class SequenceMatcher { ...@@ -65,6 +63,8 @@ class SequenceMatcher {
double block_matching_ratio_ = -1.0; double block_matching_ratio_ = -1.0;
std::vector<Match> matching_blocks_; std::vector<Match> matching_blocks_;
// Controls whether to use edit distance to calculate ratio.
bool use_edit_distance_;
int edit_distance_ = -1; int edit_distance_ = -1;
// For each character |c| in |second_string_|, this variable // For each character |c| in |second_string_|, this variable
// |char_to_positions_| stores all positions where |c| occurs in // |char_to_positions_| stores all positions where |c| occurs in
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment