Commit 2b965865 authored by Jia's avatar Jia Committed by Commit Bot

[cros search service] Add a parameter to SequenceMatcher

This cl adds a penalty factor to SequenceMatcher so that
we can penalize lots of short matching blocks.

This cl also update search parameters.

Bug: 1081584,1090181,1090154,1090148
Change-Id: Idd35ddd7cfbcdb3928e4b966f2ec757d0b75296c
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2227721
Commit-Queue: Jia Meng <jiameng@chromium.org>
Reviewed-by: default avatarThanh Nguyen <thanhdng@chromium.org>
Reviewed-by: default avatarKyle Horimoto <khorimoto@chromium.org>
Cr-Commit-Position: refs/heads/master@{#774930}
parent 43c8771f
......@@ -74,7 +74,7 @@ bool IsItemRelevant(
FuzzyTokenizedStringMatch match;
if (match.IsRelevant(query, *tag, relevance_threshold, use_prefix_only,
use_weighted_ratio, use_edit_distance,
partial_match_penalty_rate)) {
partial_match_penalty_rate, 0.1)) {
*relevance_score = match.relevance();
for (const auto& hit : match.hits()) {
local_search_service::Range range;
......
......@@ -40,7 +40,7 @@ struct SearchParams {
double partial_match_penalty_rate = 0.9;
bool use_prefix_only = false;
bool use_edit_distance = false;
bool split_search_tags = true;
bool split_search_tags = false;
};
// A numeric range used to represent the start and end position.
......
......@@ -133,6 +133,9 @@ TEST_F(IndexTest, SearchTagSplit) {
std::vector<Data> data = CreateTestData(data_to_register);
EXPECT_EQ(data.size(), 2u);
SearchParams search_params;
search_params.split_search_tags = true;
index_.SetSearchParams(search_params);
index_.AddOrUpdate(data);
EXPECT_EQ(index_.GetSize(), 2u);
......
......@@ -107,21 +107,21 @@ class SearchHandlerTest : public testing::Test {
};
TEST_F(SearchHandlerTest, AddAndRemove) {
// Add printing search tags to registry and search for "Printing".
// Add printing search tags to registry and search for "Print".
search_tag_registry_.AddSearchTags(GetPrintingSearchConcepts());
std::vector<mojom::SearchResultPtr> search_results;
// 2 results should be available for a "Printing" query.
// 3 results should be available for a "Print" query.
mojom::SearchHandlerAsyncWaiter(handler_remote_.get())
.Search(base::ASCIIToUTF16("Printing"),
.Search(base::ASCIIToUTF16("Print"),
/*max_num_results=*/3u,
mojom::ParentResultBehavior::kDoNotIncludeParentResults,
&search_results);
EXPECT_EQ(search_results.size(), 2u);
EXPECT_EQ(search_results.size(), 3u);
// Limit results to 1 max and ensure that only 1 result is returned.
mojom::SearchHandlerAsyncWaiter(handler_remote_.get())
.Search(base::ASCIIToUTF16("Printing"),
.Search(base::ASCIIToUTF16("Print"),
/*max_num_results=*/1u,
mojom::ParentResultBehavior::kDoNotIncludeParentResults,
&search_results);
......@@ -139,7 +139,7 @@ TEST_F(SearchHandlerTest, AddAndRemove) {
// returned for "Printing".
search_tag_registry_.RemoveSearchTags(GetPrintingSearchConcepts());
mojom::SearchHandlerAsyncWaiter(handler_remote_.get())
.Search(base::ASCIIToUTF16("Printing"),
.Search(base::ASCIIToUTF16("Print"),
/*max_num_results=*/3u,
mojom::ParentResultBehavior::kDoNotIncludeParentResults,
&search_results);
......@@ -187,11 +187,11 @@ TEST_F(SearchHandlerTest, DefaultRank) {
search_tag_registry_.AddSearchTags(GetPrintingSearchConcepts());
std::vector<mojom::SearchResultPtr> search_results;
// Search for "Printing". Only the IDS_OS_SETTINGS_TAG_PRINTING result
// Search for "Print". Only the IDS_OS_SETTINGS_TAG_PRINTING result
// contains the word "Printing", but the other results have the similar word
// "Printer". Thus, "Printing" has a higher relevance score.
mojom::SearchHandlerAsyncWaiter(handler_remote_.get())
.Search(base::ASCIIToUTF16("Printing"),
.Search(base::ASCIIToUTF16("Print"),
/*max_num_results=*/3u,
mojom::ParentResultBehavior::kAllowParentResults,
&search_results);
......
......@@ -91,7 +91,8 @@ double FuzzyTokenizedStringMatch::TokenSetRatio(
const TokenizedString& text,
bool partial,
double partial_match_penalty_rate,
bool use_edit_distance) {
bool use_edit_distance,
double num_matching_blocks_penalty) {
std::set<base::string16> query_token(query.tokens().begin(),
query.tokens().end());
std::set<base::string16> text_token(text.tokens().begin(),
......@@ -127,21 +128,26 @@ double FuzzyTokenizedStringMatch::TokenSetRatio(
base::JoinString(text_diff_query, base::UTF8ToUTF16(" "))});
if (partial) {
return std::max(
{PartialRatio(intersection_string, query_rewritten,
partial_match_penalty_rate, use_edit_distance),
PartialRatio(intersection_string, text_rewritten,
partial_match_penalty_rate, use_edit_distance),
PartialRatio(query_rewritten, text_rewritten,
partial_match_penalty_rate, use_edit_distance)});
return std::max({PartialRatio(intersection_string, query_rewritten,
partial_match_penalty_rate, use_edit_distance,
num_matching_blocks_penalty),
PartialRatio(intersection_string, text_rewritten,
partial_match_penalty_rate, use_edit_distance,
num_matching_blocks_penalty),
PartialRatio(query_rewritten, text_rewritten,
partial_match_penalty_rate, use_edit_distance,
num_matching_blocks_penalty)});
}
return std::max(
{SequenceMatcher(intersection_string, query_rewritten, use_edit_distance)
{SequenceMatcher(intersection_string, query_rewritten, use_edit_distance,
num_matching_blocks_penalty)
.Ratio(),
SequenceMatcher(intersection_string, text_rewritten, use_edit_distance)
SequenceMatcher(intersection_string, text_rewritten, use_edit_distance,
num_matching_blocks_penalty)
.Ratio(),
SequenceMatcher(query_rewritten, text_rewritten, use_edit_distance)
SequenceMatcher(query_rewritten, text_rewritten, use_edit_distance,
num_matching_blocks_penalty)
.Ratio()});
}
......@@ -150,7 +156,8 @@ double FuzzyTokenizedStringMatch::TokenSortRatio(
const TokenizedString& text,
bool partial,
double partial_match_penalty_rate,
bool use_edit_distance) {
bool use_edit_distance,
double num_matching_blocks_penalty) {
const base::string16 query_sorted =
base::JoinString(ProcessAndSort(query), base::UTF8ToUTF16(" "));
const base::string16 text_sorted =
......@@ -158,16 +165,19 @@ double FuzzyTokenizedStringMatch::TokenSortRatio(
if (partial) {
return PartialRatio(query_sorted, text_sorted, partial_match_penalty_rate,
use_edit_distance);
use_edit_distance, num_matching_blocks_penalty);
}
return SequenceMatcher(query_sorted, text_sorted, use_edit_distance).Ratio();
return SequenceMatcher(query_sorted, text_sorted, use_edit_distance,
num_matching_blocks_penalty)
.Ratio();
}
double FuzzyTokenizedStringMatch::PartialRatio(
const base::string16& query,
const base::string16& text,
double partial_match_penalty_rate,
bool use_edit_distance) {
bool use_edit_distance,
double num_matching_blocks_penalty) {
if (query.empty() || text.empty()) {
return kMinScore;
}
......@@ -180,7 +190,9 @@ double FuzzyTokenizedStringMatch::PartialRatio(
}
const auto matching_blocks =
SequenceMatcher(shorter, longer, use_edit_distance).GetMatchingBlocks();
SequenceMatcher(shorter, longer, use_edit_distance,
num_matching_blocks_penalty)
.GetMatchingBlocks();
double partial_ratio = 0;
for (const auto& block : matching_blocks) {
......@@ -203,7 +215,7 @@ double FuzzyTokenizedStringMatch::PartialRatio(
partial_ratio = std::max(
partial_ratio,
SequenceMatcher(shorter, longer.substr(long_start, shorter.size()),
use_edit_distance)
use_edit_distance, num_matching_blocks_penalty)
.Ratio() *
penalty);
......@@ -218,7 +230,8 @@ double FuzzyTokenizedStringMatch::WeightedRatio(
const TokenizedString& query,
const TokenizedString& text,
double partial_match_penalty_rate,
bool use_edit_distance) {
bool use_edit_distance,
double num_matching_blocks_penalty) {
const double unbase_scale = 0.95;
// Since query.text() and text.text() is not normalized, we use query.tokens()
// and text.tokens() instead.
......@@ -227,7 +240,8 @@ double FuzzyTokenizedStringMatch::WeightedRatio(
const base::string16 text_normalized(
base::JoinString(text.tokens(), base::UTF8ToUTF16(" ")));
double weighted_ratio =
SequenceMatcher(query_normalized, text_normalized, use_edit_distance)
SequenceMatcher(query_normalized, text_normalized, use_edit_distance,
num_matching_blocks_penalty)
.Ratio();
const double length_ratio =
static_cast<double>(
......@@ -245,23 +259,25 @@ double FuzzyTokenizedStringMatch::WeightedRatio(
weighted_ratio =
std::max(weighted_ratio,
PartialRatio(query_normalized, text_normalized,
partial_match_penalty_rate, use_edit_distance) *
partial_match_penalty_rate, use_edit_distance,
num_matching_blocks_penalty) *
partial_scale);
}
weighted_ratio =
std::max(weighted_ratio,
TokenSortRatio(query, text, use_partial /*partial*/,
partial_match_penalty_rate, use_edit_distance) *
partial_match_penalty_rate, use_edit_distance,
num_matching_blocks_penalty) *
unbase_scale * partial_scale);
// Do not use partial match for token set because the match between the
// intersection string and query/text rewrites will always return an extremely
// high value.
weighted_ratio =
std::max(weighted_ratio,
TokenSetRatio(query, text, false /*partial*/,
partial_match_penalty_rate, use_edit_distance) *
unbase_scale * partial_scale);
weighted_ratio = std::max(
weighted_ratio,
TokenSetRatio(query, text, false /*partial*/, partial_match_penalty_rate,
use_edit_distance, num_matching_blocks_penalty) *
unbase_scale * partial_scale);
return weighted_ratio;
}
......@@ -276,7 +292,8 @@ bool FuzzyTokenizedStringMatch::IsRelevant(const TokenizedString& query,
bool use_prefix_only,
bool use_weighted_ratio,
bool use_edit_distance,
double partial_match_penalty_rate) {
double partial_match_penalty_rate,
double num_matching_blocks_penalty) {
// If there is an exact match, relevance will be 1.0 and there is only 1 hit
// that is the entire text/query.
const auto& query_text = query.text();
......@@ -292,7 +309,8 @@ bool FuzzyTokenizedStringMatch::IsRelevant(const TokenizedString& query,
// Find |hits_| using SequenceMatcher on original query and text.
for (const auto& match :
SequenceMatcher(query_text, text_text, use_edit_distance)
SequenceMatcher(query_text, text_text, use_edit_distance,
num_matching_blocks_penalty)
.GetMatchingBlocks()) {
if (match.length > 0) {
hits_.push_back(gfx::Range(match.pos_second_string,
......@@ -317,15 +335,17 @@ bool FuzzyTokenizedStringMatch::IsRelevant(const TokenizedString& query,
if (use_weighted_ratio) {
// If WeightedRatio is used, |relevance_| is the average of WeightedRatio
// and PrefixMatcher scores.
relevance_ = (WeightedRatio(query, text, partial_match_penalty_rate,
use_edit_distance) +
prefix_score) /
2;
relevance_ =
(WeightedRatio(query, text, partial_match_penalty_rate,
use_edit_distance, num_matching_blocks_penalty) +
prefix_score) /
2;
} else {
// Use simple algorithm to calculate match ratio.
relevance_ =
(SequenceMatcher(base::i18n::ToLower(query_text),
base::i18n::ToLower(text_text), use_edit_distance)
base::i18n::ToLower(text_text), use_edit_distance,
num_matching_blocks_penalty)
.Ratio() +
prefix_score) /
2;
......
......@@ -44,7 +44,8 @@ class FuzzyTokenizedStringMatch {
const TokenizedString& text,
bool partial,
double partial_match_penalty_rate,
bool use_edit_distance);
bool use_edit_distance,
double num_matching_blocks_penalty);
// TokenSortRatio takes two set of tokens, sorts them and find the similarity
// between two sorted strings. This function assumes that TokenizedString is
......@@ -53,7 +54,8 @@ class FuzzyTokenizedStringMatch {
const TokenizedString& text,
bool partial,
double partial_match_penalty_rate,
bool use_edit_distance);
bool use_edit_distance,
double num_matching_blocks_penalty);
// Finds the best ratio of shorter text with a part of longer text.
// This function assumes that TokenizedString is already normalized (converted
......@@ -61,7 +63,8 @@ class FuzzyTokenizedStringMatch {
static double PartialRatio(const base::string16& query,
const base::string16& text,
double partial_match_penalty_rate,
bool use_edit_distance);
bool use_edit_distance,
double num_matching_blocks_penalty);
// Combines scores from different ratio functions. This function assumes that
// TokenizedString is already normalized (converted to lower cases).
......@@ -69,7 +72,8 @@ class FuzzyTokenizedStringMatch {
static double WeightedRatio(const TokenizedString& query,
const TokenizedString& text,
double partial_match_penalty_rate,
bool use_edit_distance);
bool use_edit_distance,
double num_matching_blocks_penalty);
// Since prefix match should always be favored over other matches, this
// function is dedicated to calculate a prefix match score in range of [0, 1].
// This score has two components: first character match and whole prefix
......@@ -85,7 +89,8 @@ class FuzzyTokenizedStringMatch {
bool use_prefix_only,
bool use_weighted_ratio,
bool use_edit_distance,
double partial_match_penalty_rate);
double partial_match_penalty_rate,
double num_matching_blocks_penalty = 0.0);
double relevance() const { return relevance_; }
const Hits& hits() const { return hits_; }
......
......@@ -28,9 +28,11 @@ SequenceMatcher::Match::Match(int pos_first, int pos_second, int len)
SequenceMatcher::SequenceMatcher(const base::string16& first_string,
const base::string16& second_string,
bool use_edit_distance)
bool use_edit_distance,
double num_matching_blocks_penalty)
: first_string_(first_string),
second_string_(second_string),
num_matching_blocks_penalty_(num_matching_blocks_penalty),
dp_common_string_(second_string.size() + 1, 0) {
DCHECK(!first_string_.empty() || !second_string_.empty());
......@@ -189,10 +191,15 @@ double SequenceMatcher::Ratio() {
int sum_match = 0;
const int sum_length = first_string_.size() + second_string_.size();
DCHECK_NE(sum_length, 0);
const int num_blocks = GetMatchingBlocks().size();
for (const auto& match : GetMatchingBlocks()) {
sum_match += match.length;
}
block_matching_ratio_ = 2.0 * sum_match / sum_length;
// Subtract two because the last one is always an "empty block". Hence
// actual number of matching blocks is |num_blocks - 1|.
block_matching_ratio_ =
2.0 * sum_match / sum_length *
exp(-(num_blocks - 2) * num_matching_blocks_penalty_);
}
return block_matching_ratio_;
}
......@@ -30,9 +30,16 @@ class SequenceMatcher {
// Length of the common substring.
int length;
};
// |num_matching_blocks_penalty| is used to penalize too many small matching
// blocks. For the same number of matching characters, we prefer fewer
// matching blocks. Value equal to 0 means no penalty. Values greater than 0
// means heavier penalty will be applied to larger number of blocks. This is
// only appled if |use_edit_distance| is false.
SequenceMatcher(const base::string16& first_string,
const base::string16& second_string,
bool use_edit_distance);
bool use_edit_distance,
double num_matching_blocks_penalty);
~SequenceMatcher() = default;
......@@ -59,6 +66,7 @@ class SequenceMatcher {
private:
base::string16 first_string_;
base::string16 second_string_;
double num_matching_blocks_penalty_ = 0.0;
double edit_distance_ratio_ = -1.0;
double block_matching_ratio_ = -1.0;
std::vector<Match> matching_blocks_;
......
......@@ -22,54 +22,58 @@ class SequenceMatcherTest : public testing::Test {};
TEST_F(SequenceMatcherTest, TestEditDistance) {
// Transposition
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abcd"),
base::UTF8ToUTF16("abdc"), kDefaultUseEditDistance)
.EditDistance(),
1);
ASSERT_EQ(
SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("abdc"),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
1);
// Deletion
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abcde"),
base::UTF8ToUTF16("abcd"), kDefaultUseEditDistance)
.EditDistance(),
1);
ASSERT_EQ(
SequenceMatcher(base::UTF8ToUTF16("abcde"), base::UTF8ToUTF16("abcd"),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
1);
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("12"), base::UTF8ToUTF16(""),
kDefaultUseEditDistance)
kDefaultUseEditDistance, 0.0)
.EditDistance(),
2);
// Insertion
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abc"),
base::UTF8ToUTF16("abxbc"), kDefaultUseEditDistance)
.EditDistance(),
2);
ASSERT_EQ(
SequenceMatcher(base::UTF8ToUTF16("abc"), base::UTF8ToUTF16("abxbc"),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
2);
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16(""), base::UTF8ToUTF16("abxbc"),
kDefaultUseEditDistance)
kDefaultUseEditDistance, 0.0)
.EditDistance(),
5);
// Substitution
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("book"),
base::UTF8ToUTF16("back"), kDefaultUseEditDistance)
.EditDistance(),
2);
// Combination
ASSERT_EQ(
SequenceMatcher(base::UTF8ToUTF16("caclulation"),
base::UTF8ToUTF16("calculator"), kDefaultUseEditDistance)
SequenceMatcher(base::UTF8ToUTF16("book"), base::UTF8ToUTF16("back"),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
3);
ASSERT_EQ(
SequenceMatcher(base::UTF8ToUTF16("sunday"),
base::UTF8ToUTF16("saturday"), kDefaultUseEditDistance)
.EditDistance(),
3);
2);
// Combination
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("caclulation"),
base::UTF8ToUTF16("calculator"),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
3);
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("sunday"),
base::UTF8ToUTF16("saturday"),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
3);
}
TEST_F(SequenceMatcherTest, TestFindLongestMatch) {
SequenceMatcher sequence_match(base::UTF8ToUTF16("miscellanious"),
base::UTF8ToUTF16("miscellaneous"),
kDefaultUseEditDistance);
kDefaultUseEditDistance, 0.0);
ASSERT_TRUE(MatchEqual(sequence_match.FindLongestMatch(0, 13, 0, 13),
Match(0, 0, 9)));
ASSERT_TRUE(MatchEqual(sequence_match.FindLongestMatch(7, 13, 7, 13),
......@@ -77,17 +81,17 @@ TEST_F(SequenceMatcherTest, TestFindLongestMatch) {
ASSERT_TRUE(MatchEqual(
SequenceMatcher(base::UTF8ToUTF16(""), base::UTF8ToUTF16("abcd"),
kDefaultUseEditDistance)
kDefaultUseEditDistance, 0.0)
.FindLongestMatch(0, 0, 0, 4),
Match(0, 0, 0)));
ASSERT_TRUE(MatchEqual(
SequenceMatcher(base::UTF8ToUTF16("abababbababa"),
base::UTF8ToUTF16("ababbaba"), kDefaultUseEditDistance)
.FindLongestMatch(0, 12, 0, 8),
Match(2, 0, 8)));
ASSERT_TRUE(MatchEqual(SequenceMatcher(base::UTF8ToUTF16("abababbababa"),
base::UTF8ToUTF16("ababbaba"),
kDefaultUseEditDistance, 0.0)
.FindLongestMatch(0, 12, 0, 8),
Match(2, 0, 8)));
ASSERT_TRUE(MatchEqual(
SequenceMatcher(base::UTF8ToUTF16("aaaaaa"), base::UTF8ToUTF16("aaaaa"),
kDefaultUseEditDistance)
kDefaultUseEditDistance, 0.0)
.FindLongestMatch(0, 6, 0, 5),
Match(0, 0, 5)));
}
......@@ -96,7 +100,7 @@ TEST_F(SequenceMatcherTest, TestGetMatchingBlocks) {
SequenceMatcher sequence_match(
base::UTF8ToUTF16("This is a demo sentence!!!"),
base::UTF8ToUTF16("This demo sentence is good!!!"),
kDefaultUseEditDistance);
kDefaultUseEditDistance, 0.0);
const std::vector<Match> true_matches = {Match(0, 0, 4), Match(9, 4, 14),
Match(23, 26, 3), Match(26, 29, 0)};
const std::vector<Match> matches = sequence_match.GetMatchingBlocks();
......@@ -107,30 +111,76 @@ TEST_F(SequenceMatcherTest, TestGetMatchingBlocks) {
}
TEST_F(SequenceMatcherTest, TestSequenceMatcherRatio) {
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abcd"),
base::UTF8ToUTF16("adbc"), kDefaultUseEditDistance)
.Ratio(),
0.75);
ASSERT_EQ(
SequenceMatcher(base::UTF8ToUTF16("white cats"),
base::UTF8ToUTF16("cats white"), kDefaultUseEditDistance)
SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("adbc"),
kDefaultUseEditDistance, 0.0)
.Ratio(),
0.75);
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("white cats"),
base::UTF8ToUTF16("cats white"),
kDefaultUseEditDistance, 0.0)
.Ratio(),
0.5);
}
TEST_F(SequenceMatcherTest, TestSequenceMatcherRatioWithoutPenalty) {
// Two matching blocks, total matching blocks length is 4.
EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("word"),
base::UTF8ToUTF16("hello world"),
kDefaultUseEditDistance, 0.0)
.Ratio(),
0.533, 0.001);
// One matching block, length is 4.
EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("worl"),
base::UTF8ToUTF16("hello world"),
kDefaultUseEditDistance, 0.0)
.Ratio(),
0.533, 0.001);
// No matching block at all.
EXPECT_NEAR(
SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("xyz"),
kDefaultUseEditDistance, 0.0)
.Ratio(),
0.0, 0.001);
}
TEST_F(SequenceMatcherTest, TestSequenceMatcherRatioWithPenalty) {
// Two matching blocks, total matching blocks length is 4.
EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("word"),
base::UTF8ToUTF16("hello world"),
kDefaultUseEditDistance, 0.1)
.Ratio(),
0.4825, 0.0001);
// One matching block, length is 4.
EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("worl"),
base::UTF8ToUTF16("hello world"),
kDefaultUseEditDistance, 0.1)
.Ratio(),
0.533, 0.001);
// No matching block at all.
EXPECT_NEAR(
SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("xyz"),
kDefaultUseEditDistance, 0.1)
.Ratio(),
0.5);
0.0, 0.001);
}
TEST_F(SequenceMatcherTest, TestEditDistanceRatio) {
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abcd"),
base::UTF8ToUTF16("adbc"), true)
base::UTF8ToUTF16("adbc"), true, 0.0)
.Ratio(),
0.5);
EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("white cats"),
base::UTF8ToUTF16("cats white"), true)
base::UTF8ToUTF16("cats white"), true, 0.0)
.Ratio(),
0.2, 0.01);
// Totally different
EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("dog"),
base::UTF8ToUTF16("elphant"), true)
base::UTF8ToUTF16("elphant"), true, 0.0)
.Ratio(),
0.0, 0.01);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment