[cros search service] Add a parameter to SequenceMatcher

This cl adds a penalty factor to SequenceMatcher so that we can penalize lots of short matching blocks. This cl also update search parameters. Bug: 1081584,1090181,1090154,1090148 Change-Id: Idd35ddd7cfbcdb3928e4b966f2ec757d0b75296c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2227721 Commit-Queue: Jia Meng <jiameng@chromium.org> Reviewed-by: Thanh Nguyen <thanhdng@chromium.org> Reviewed-by: Kyle Horimoto <khorimoto@chromium.org> Cr-Commit-Position: refs/heads/master@{#774930}

[cros search service] Add a parameter to SequenceMatcher
This cl adds a penalty factor to SequenceMatcher so that we can penalize lots of short matching blocks. This cl also update search parameters. Bug: 1081584,1090181,1090154,1090148 Change-Id: Idd35ddd7cfbcdb3928e4b966f2ec757d0b75296c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2227721 Commit-Queue: Jia Meng <jiameng@chromium.org> Reviewed-by: Thanh Nguyen <thanhdng@chromium.org> Reviewed-by: Kyle Horimoto <khorimoto@chromium.org> Cr-Commit-Position: refs/heads/master@{#774930}
2b965865 · Jia · Commit Bot · 43c8771f · 2b965865 · 2b965865
Commit 2b965865 authored Jun 04, 2020 by Jia Committed by Commit Bot Jun 04, 2020
10 changed files
--- a/chrome/browser/chromeos/local_search_service/index.cc
+++ b/chrome/browser/chromeos/local_search_service/index.cc
@@ -74,7 +74,7 @@ bool IsItemRelevant(
    FuzzyTokenizedStringMatch match;
    if (match.IsRelevant(query, *tag, relevance_threshold, use_prefix_only,
                         use_weighted_ratio, use_edit_distance,
-                         partial_match_penalty_rate)) {
+                         partial_match_penalty_rate, 0.1)) {
      *relevance_score = match.relevance();
      for (const auto& hit : match.hits()) {
        local_search_service::Range range;

--- a/chrome/browser/chromeos/local_search_service/index.h
+++ b/chrome/browser/chromeos/local_search_service/index.h
@@ -40,7 +40,7 @@ struct SearchParams {
  double partial_match_penalty_rate = 0.9;
  bool use_prefix_only = false;
  bool use_edit_distance = false;
-  bool split_search_tags = true;
+  bool split_search_tags = false;
 };

 // A numeric range used to represent the start and end position.

--- a/chrome/browser/chromeos/local_search_service/index_unittest.cc
+++ b/chrome/browser/chromeos/local_search_service/index_unittest.cc
@@ -133,6 +133,9 @@ TEST_F(IndexTest, SearchTagSplit) {
  std::vector<Data> data = CreateTestData(data_to_register);
  EXPECT_EQ(data.size(), 2u);

+  SearchParams search_params;
+  search_params.split_search_tags = true;
+  index_.SetSearchParams(search_params);
  index_.AddOrUpdate(data);
  EXPECT_EQ(index_.GetSize(), 2u);


--- a/chrome/browser/ui/webui/settings/chromeos/search/search_handler_unittest.cc
+++ b/chrome/browser/ui/webui/settings/chromeos/search/search_handler_unittest.cc
@@ -107,21 +107,21 @@ class SearchHandlerTest : public testing::Test {
 };

 TEST_F(SearchHandlerTest, AddAndRemove) {
-  // Add printing search tags to registry and search for "Printing".
+  // Add printing search tags to registry and search for "Print".
  search_tag_registry_.AddSearchTags(GetPrintingSearchConcepts());
  std::vector<mojom::SearchResultPtr> search_results;

-  // 2 results should be available for a "Printing" query.
+  // 3 results should be available for a "Print" query.
  mojom::SearchHandlerAsyncWaiter(handler_remote_.get())
-      .Search(base::ASCIIToUTF16("Printing"),
+      .Search(base::ASCIIToUTF16("Print"),
              /*max_num_results=*/3u,
              mojom::ParentResultBehavior::kDoNotIncludeParentResults,
              &search_results);
-  EXPECT_EQ(search_results.size(), 2u);
+  EXPECT_EQ(search_results.size(), 3u);

  // Limit results to 1 max and ensure that only 1 result is returned.
  mojom::SearchHandlerAsyncWaiter(handler_remote_.get())
-      .Search(base::ASCIIToUTF16("Printing"),
+      .Search(base::ASCIIToUTF16("Print"),
              /*max_num_results=*/1u,
              mojom::ParentResultBehavior::kDoNotIncludeParentResults,
              &search_results);
@@ -139,7 +139,7 @@ TEST_F(SearchHandlerTest, AddAndRemove) {
  // returned for "Printing".
  search_tag_registry_.RemoveSearchTags(GetPrintingSearchConcepts());
  mojom::SearchHandlerAsyncWaiter(handler_remote_.get())
-      .Search(base::ASCIIToUTF16("Printing"),
+      .Search(base::ASCIIToUTF16("Print"),
              /*max_num_results=*/3u,
              mojom::ParentResultBehavior::kDoNotIncludeParentResults,
              &search_results);
@@ -187,11 +187,11 @@ TEST_F(SearchHandlerTest, DefaultRank) {
  search_tag_registry_.AddSearchTags(GetPrintingSearchConcepts());
  std::vector<mojom::SearchResultPtr> search_results;

-  // Search for "Printing". Only the IDS_OS_SETTINGS_TAG_PRINTING result
+  // Search for "Print". Only the IDS_OS_SETTINGS_TAG_PRINTING result
  // contains the word "Printing", but the other results have the similar word
  // "Printer". Thus, "Printing" has a higher relevance score.
  mojom::SearchHandlerAsyncWaiter(handler_remote_.get())
-      .Search(base::ASCIIToUTF16("Printing"),
+      .Search(base::ASCIIToUTF16("Print"),
              /*max_num_results=*/3u,
              mojom::ParentResultBehavior::kAllowParentResults,
              &search_results);

--- a/chrome/common/string_matching/fuzzy_tokenized_string_match.cc
+++ b/chrome/common/string_matching/fuzzy_tokenized_string_match.cc
@@ -91,7 +91,8 @@ double FuzzyTokenizedStringMatch::TokenSetRatio(
    const TokenizedString& text,
    bool partial,
    double partial_match_penalty_rate,
-    bool use_edit_distance) {
+    bool use_edit_distance,
+    double num_matching_blocks_penalty) {
  std::set<base::string16> query_token(query.tokens().begin(),
                                       query.tokens().end());
  std::set<base::string16> text_token(text.tokens().begin(),
@@ -127,21 +128,26 @@ double FuzzyTokenizedStringMatch::TokenSetRatio(
                 base::JoinString(text_diff_query, base::UTF8ToUTF16(" "))});

  if (partial) {
-    return std::max(
-        {PartialRatio(intersection_string, query_rewritten,
-                      partial_match_penalty_rate, use_edit_distance),
-         PartialRatio(intersection_string, text_rewritten,
-                      partial_match_penalty_rate, use_edit_distance),
-         PartialRatio(query_rewritten, text_rewritten,
-                      partial_match_penalty_rate, use_edit_distance)});
+    return std::max({PartialRatio(intersection_string, query_rewritten,
+                                  partial_match_penalty_rate, use_edit_distance,
+                                  num_matching_blocks_penalty),
+                     PartialRatio(intersection_string, text_rewritten,
+                                  partial_match_penalty_rate, use_edit_distance,
+                                  num_matching_blocks_penalty),
+                     PartialRatio(query_rewritten, text_rewritten,
+                                  partial_match_penalty_rate, use_edit_distance,
+                                  num_matching_blocks_penalty)});
  }

  return std::max(
-      {SequenceMatcher(intersection_string, query_rewritten, use_edit_distance)
+      {SequenceMatcher(intersection_string, query_rewritten, use_edit_distance,
+                       num_matching_blocks_penalty)
           .Ratio(),
-       SequenceMatcher(intersection_string, text_rewritten, use_edit_distance)
+       SequenceMatcher(intersection_string, text_rewritten, use_edit_distance,
+                       num_matching_blocks_penalty)
           .Ratio(),
-       SequenceMatcher(query_rewritten, text_rewritten, use_edit_distance)
+       SequenceMatcher(query_rewritten, text_rewritten, use_edit_distance,
+                       num_matching_blocks_penalty)
           .Ratio()});
 }

@@ -150,7 +156,8 @@ double FuzzyTokenizedStringMatch::TokenSortRatio(
    const TokenizedString& text,
    bool partial,
    double partial_match_penalty_rate,
-    bool use_edit_distance) {
+    bool use_edit_distance,
+    double num_matching_blocks_penalty) {
  const base::string16 query_sorted =
      base::JoinString(ProcessAndSort(query), base::UTF8ToUTF16(" "));
  const base::string16 text_sorted =
@@ -158,16 +165,19 @@ double FuzzyTokenizedStringMatch::TokenSortRatio(

  if (partial) {
    return PartialRatio(query_sorted, text_sorted, partial_match_penalty_rate,
-                        use_edit_distance);
+                        use_edit_distance, num_matching_blocks_penalty);
  }
-  return SequenceMatcher(query_sorted, text_sorted, use_edit_distance).Ratio();
+  return SequenceMatcher(query_sorted, text_sorted, use_edit_distance,
+                         num_matching_blocks_penalty)
+      .Ratio();
 }

 double FuzzyTokenizedStringMatch::PartialRatio(
    const base::string16& query,
    const base::string16& text,
    double partial_match_penalty_rate,
-    bool use_edit_distance) {
+    bool use_edit_distance,
+    double num_matching_blocks_penalty) {
  if (query.empty() || text.empty()) {
    return kMinScore;
  }
@@ -180,7 +190,9 @@ double FuzzyTokenizedStringMatch::PartialRatio(
  }

  const auto matching_blocks =
-      SequenceMatcher(shorter, longer, use_edit_distance).GetMatchingBlocks();
+      SequenceMatcher(shorter, longer, use_edit_distance,
+                      num_matching_blocks_penalty)
+          .GetMatchingBlocks();
  double partial_ratio = 0;

  for (const auto& block : matching_blocks) {
@@ -203,7 +215,7 @@ double FuzzyTokenizedStringMatch::PartialRatio(
    partial_ratio = std::max(
        partial_ratio,
        SequenceMatcher(shorter, longer.substr(long_start, shorter.size()),
-                        use_edit_distance)
+                        use_edit_distance, num_matching_blocks_penalty)
                .Ratio() *
            penalty);

@@ -218,7 +230,8 @@ double FuzzyTokenizedStringMatch::WeightedRatio(
    const TokenizedString& query,
    const TokenizedString& text,
    double partial_match_penalty_rate,
-    bool use_edit_distance) {
+    bool use_edit_distance,
+    double num_matching_blocks_penalty) {
  const double unbase_scale = 0.95;
  // Since query.text() and text.text() is not normalized, we use query.tokens()
  // and text.tokens() instead.
@@ -227,7 +240,8 @@ double FuzzyTokenizedStringMatch::WeightedRatio(
  const base::string16 text_normalized(
      base::JoinString(text.tokens(), base::UTF8ToUTF16(" ")));
  double weighted_ratio =
-      SequenceMatcher(query_normalized, text_normalized, use_edit_distance)
+      SequenceMatcher(query_normalized, text_normalized, use_edit_distance,
+                      num_matching_blocks_penalty)
          .Ratio();
  const double length_ratio =
      static_cast<double>(
@@ -245,23 +259,25 @@ double FuzzyTokenizedStringMatch::WeightedRatio(
    weighted_ratio =
        std::max(weighted_ratio,
                 PartialRatio(query_normalized, text_normalized,
-                              partial_match_penalty_rate, use_edit_distance) *
+                              partial_match_penalty_rate, use_edit_distance,
+                              num_matching_blocks_penalty) *
                     partial_scale);
  }
  weighted_ratio =
      std::max(weighted_ratio,
               TokenSortRatio(query, text, use_partial /*partial*/,
-                              partial_match_penalty_rate, use_edit_distance) *
+                              partial_match_penalty_rate, use_edit_distance,
+                              num_matching_blocks_penalty) *
                   unbase_scale * partial_scale);

  // Do not use partial match for token set because the match between the
  // intersection string and query/text rewrites will always return an extremely
  // high value.
-  weighted_ratio =
-      std::max(weighted_ratio,
-               TokenSetRatio(query, text, false /*partial*/,
-                             partial_match_penalty_rate, use_edit_distance) *
-                   unbase_scale * partial_scale);
+  weighted_ratio = std::max(
+      weighted_ratio,
+      TokenSetRatio(query, text, false /*partial*/, partial_match_penalty_rate,
+                    use_edit_distance, num_matching_blocks_penalty) *
+          unbase_scale * partial_scale);
  return weighted_ratio;
 }

@@ -276,7 +292,8 @@ bool FuzzyTokenizedStringMatch::IsRelevant(const TokenizedString& query,
                                           bool use_prefix_only,
                                           bool use_weighted_ratio,
                                           bool use_edit_distance,
-                                           double partial_match_penalty_rate) {
+                                           double partial_match_penalty_rate,
+                                           double num_matching_blocks_penalty) {
  // If there is an exact match, relevance will be 1.0 and there is only 1 hit
  // that is the entire text/query.
  const auto& query_text = query.text();
@@ -292,7 +309,8 @@ bool FuzzyTokenizedStringMatch::IsRelevant(const TokenizedString& query,

  // Find |hits_| using SequenceMatcher on original query and text.
  for (const auto& match :
-       SequenceMatcher(query_text, text_text, use_edit_distance)
+       SequenceMatcher(query_text, text_text, use_edit_distance,
+                       num_matching_blocks_penalty)
           .GetMatchingBlocks()) {
    if (match.length > 0) {
      hits_.push_back(gfx::Range(match.pos_second_string,
@@ -317,15 +335,17 @@ bool FuzzyTokenizedStringMatch::IsRelevant(const TokenizedString& query,
  if (use_weighted_ratio) {
    // If WeightedRatio is used, |relevance_| is the average of WeightedRatio
    // and PrefixMatcher scores.
-    relevance_ = (WeightedRatio(query, text, partial_match_penalty_rate,
-                                use_edit_distance) +
-                  prefix_score) /
-                 2;
+    relevance_ =
+        (WeightedRatio(query, text, partial_match_penalty_rate,
+                       use_edit_distance, num_matching_blocks_penalty) +
+         prefix_score) /
+        2;
  } else {
    // Use simple algorithm to calculate match ratio.
    relevance_ =
        (SequenceMatcher(base::i18n::ToLower(query_text),
-                         base::i18n::ToLower(text_text), use_edit_distance)
+                         base::i18n::ToLower(text_text), use_edit_distance,
+                         num_matching_blocks_penalty)
             .Ratio() +
         prefix_score) /
        2;

--- a/chrome/common/string_matching/fuzzy_tokenized_string_match.h
+++ b/chrome/common/string_matching/fuzzy_tokenized_string_match.h
@@ -44,7 +44,8 @@ class FuzzyTokenizedStringMatch {
                              const TokenizedString& text,
                              bool partial,
                              double partial_match_penalty_rate,
-                              bool use_edit_distance);
+                              bool use_edit_distance,
+                              double num_matching_blocks_penalty);

  // TokenSortRatio takes two set of tokens, sorts them and find the similarity
  // between two sorted strings. This function assumes that TokenizedString is
@@ -53,7 +54,8 @@ class FuzzyTokenizedStringMatch {
                               const TokenizedString& text,
                               bool partial,
                               double partial_match_penalty_rate,
-                               bool use_edit_distance);
+                               bool use_edit_distance,
+                               double num_matching_blocks_penalty);

  // Finds the best ratio of shorter text with a part of longer text.
  // This function assumes that TokenizedString is already normalized (converted
@@ -61,7 +63,8 @@ class FuzzyTokenizedStringMatch {
  static double PartialRatio(const base::string16& query,
                             const base::string16& text,
                             double partial_match_penalty_rate,
-                             bool use_edit_distance);
+                             bool use_edit_distance,
+                             double num_matching_blocks_penalty);

  // Combines scores from different ratio functions. This function assumes that
  // TokenizedString is already normalized (converted to lower cases).
@@ -69,7 +72,8 @@ class FuzzyTokenizedStringMatch {
  static double WeightedRatio(const TokenizedString& query,
                              const TokenizedString& text,
                              double partial_match_penalty_rate,
-                              bool use_edit_distance);
+                              bool use_edit_distance,
+                              double num_matching_blocks_penalty);
  // Since prefix match should always be favored over other matches, this
  // function is dedicated to calculate a prefix match score in range of [0, 1].
  // This score has two components: first character match and whole prefix
@@ -85,7 +89,8 @@ class FuzzyTokenizedStringMatch {
                  bool use_prefix_only,
                  bool use_weighted_ratio,
                  bool use_edit_distance,
-                  double partial_match_penalty_rate);
+                  double partial_match_penalty_rate,
+                  double num_matching_blocks_penalty = 0.0);
  double relevance() const { return relevance_; }
  const Hits& hits() const { return hits_; }


--- a/chrome/common/string_matching/fuzzy_tokenized_string_match_unittest.cc
+++ b/chrome/common/string_matching/fuzzy_tokenized_string_match_unittest.cc
--- a/chrome/common/string_matching/sequence_matcher.cc
+++ b/chrome/common/string_matching/sequence_matcher.cc
@@ -28,9 +28,11 @@ SequenceMatcher::Match::Match(int pos_first, int pos_second, int len)

 SequenceMatcher::SequenceMatcher(const base::string16& first_string,
                                 const base::string16& second_string,
-                                 bool use_edit_distance)
+                                 bool use_edit_distance,
+                                 double num_matching_blocks_penalty)
    : first_string_(first_string),
      second_string_(second_string),
+      num_matching_blocks_penalty_(num_matching_blocks_penalty),
      dp_common_string_(second_string.size() + 1, 0) {
  DCHECK(!first_string_.empty() || !second_string_.empty());

@@ -189,10 +191,15 @@ double SequenceMatcher::Ratio() {
    int sum_match = 0;
    const int sum_length = first_string_.size() + second_string_.size();
    DCHECK_NE(sum_length, 0);
+    const int num_blocks = GetMatchingBlocks().size();
    for (const auto& match : GetMatchingBlocks()) {
      sum_match += match.length;
    }
-    block_matching_ratio_ = 2.0 * sum_match / sum_length;
+    // Subtract two because the last one is always an "empty block". Hence
+    // actual number of matching blocks is |num_blocks - 1|.
+    block_matching_ratio_ =
+        2.0 * sum_match / sum_length *
+        exp(-(num_blocks - 2) * num_matching_blocks_penalty_);
  }
  return block_matching_ratio_;
 }
--- a/chrome/common/string_matching/sequence_matcher.h
+++ b/chrome/common/string_matching/sequence_matcher.h
@@ -30,9 +30,16 @@ class SequenceMatcher {
    // Length of the common substring.
    int length;
  };
+
+  // |num_matching_blocks_penalty| is used to penalize too many small matching
+  // blocks. For the same number of matching characters, we prefer fewer
+  // matching blocks. Value equal to 0 means no penalty. Values greater than 0
+  // means heavier penalty will be applied to larger number of blocks. This is
+  // only appled if |use_edit_distance| is false.
  SequenceMatcher(const base::string16& first_string,
                  const base::string16& second_string,
-                  bool use_edit_distance);
+                  bool use_edit_distance,
+                  double num_matching_blocks_penalty);

  ~SequenceMatcher() = default;

@@ -59,6 +66,7 @@ class SequenceMatcher {
 private:
  base::string16 first_string_;
  base::string16 second_string_;
+  double num_matching_blocks_penalty_ = 0.0;
  double edit_distance_ratio_ = -1.0;
  double block_matching_ratio_ = -1.0;
  std::vector<Match> matching_blocks_;

--- a/chrome/common/string_matching/sequence_matcher_unittest.cc
+++ b/chrome/common/string_matching/sequence_matcher_unittest.cc
@@ -22,54 +22,58 @@ class SequenceMatcherTest : public testing::Test {};

 TEST_F(SequenceMatcherTest, TestEditDistance) {
  // Transposition
-  ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abcd"),
-                            base::UTF8ToUTF16("abdc"), kDefaultUseEditDistance)
-                .EditDistance(),
-            1);
+  ASSERT_EQ(
+      SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("abdc"),
+                      kDefaultUseEditDistance, 0.0)
+          .EditDistance(),
+      1);

  // Deletion
-  ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abcde"),
-                            base::UTF8ToUTF16("abcd"), kDefaultUseEditDistance)
-                .EditDistance(),
-            1);
+  ASSERT_EQ(
+      SequenceMatcher(base::UTF8ToUTF16("abcde"), base::UTF8ToUTF16("abcd"),
+                      kDefaultUseEditDistance, 0.0)
+          .EditDistance(),
+      1);
  ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("12"), base::UTF8ToUTF16(""),
-                            kDefaultUseEditDistance)
+                            kDefaultUseEditDistance, 0.0)
                .EditDistance(),
            2);

  // Insertion
-  ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abc"),
-                            base::UTF8ToUTF16("abxbc"), kDefaultUseEditDistance)
-                .EditDistance(),
-            2);
+  ASSERT_EQ(
+      SequenceMatcher(base::UTF8ToUTF16("abc"), base::UTF8ToUTF16("abxbc"),
+                      kDefaultUseEditDistance, 0.0)
+          .EditDistance(),
+      2);
  ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16(""), base::UTF8ToUTF16("abxbc"),
-                            kDefaultUseEditDistance)
+                            kDefaultUseEditDistance, 0.0)
                .EditDistance(),
            5);

  // Substitution
-  ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("book"),
-                            base::UTF8ToUTF16("back"), kDefaultUseEditDistance)
-                .EditDistance(),
-            2);
-
-  // Combination
  ASSERT_EQ(
-      SequenceMatcher(base::UTF8ToUTF16("caclulation"),
-                      base::UTF8ToUTF16("calculator"), kDefaultUseEditDistance)
+      SequenceMatcher(base::UTF8ToUTF16("book"), base::UTF8ToUTF16("back"),
+                      kDefaultUseEditDistance, 0.0)
          .EditDistance(),
-      3);
-  ASSERT_EQ(
-      SequenceMatcher(base::UTF8ToUTF16("sunday"),
-                      base::UTF8ToUTF16("saturday"), kDefaultUseEditDistance)
-          .EditDistance(),
-      3);
+      2);
+
+  // Combination
+  ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("caclulation"),
+                            base::UTF8ToUTF16("calculator"),
+                            kDefaultUseEditDistance, 0.0)
+                .EditDistance(),
+            3);
+  ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("sunday"),
+                            base::UTF8ToUTF16("saturday"),
+                            kDefaultUseEditDistance, 0.0)
+                .EditDistance(),
+            3);
 }

 TEST_F(SequenceMatcherTest, TestFindLongestMatch) {
  SequenceMatcher sequence_match(base::UTF8ToUTF16("miscellanious"),
                                 base::UTF8ToUTF16("miscellaneous"),
-                                 kDefaultUseEditDistance);
+                                 kDefaultUseEditDistance, 0.0);
  ASSERT_TRUE(MatchEqual(sequence_match.FindLongestMatch(0, 13, 0, 13),
                         Match(0, 0, 9)));
  ASSERT_TRUE(MatchEqual(sequence_match.FindLongestMatch(7, 13, 7, 13),
@@ -77,17 +81,17 @@ TEST_F(SequenceMatcherTest, TestFindLongestMatch) {

  ASSERT_TRUE(MatchEqual(
      SequenceMatcher(base::UTF8ToUTF16(""), base::UTF8ToUTF16("abcd"),
-                      kDefaultUseEditDistance)
+                      kDefaultUseEditDistance, 0.0)
          .FindLongestMatch(0, 0, 0, 4),
      Match(0, 0, 0)));
-  ASSERT_TRUE(MatchEqual(
-      SequenceMatcher(base::UTF8ToUTF16("abababbababa"),
-                      base::UTF8ToUTF16("ababbaba"), kDefaultUseEditDistance)
-          .FindLongestMatch(0, 12, 0, 8),
-      Match(2, 0, 8)));
+  ASSERT_TRUE(MatchEqual(SequenceMatcher(base::UTF8ToUTF16("abababbababa"),
+                                         base::UTF8ToUTF16("ababbaba"),
+                                         kDefaultUseEditDistance, 0.0)
+                             .FindLongestMatch(0, 12, 0, 8),
+                         Match(2, 0, 8)));
  ASSERT_TRUE(MatchEqual(
      SequenceMatcher(base::UTF8ToUTF16("aaaaaa"), base::UTF8ToUTF16("aaaaa"),
-                      kDefaultUseEditDistance)
+                      kDefaultUseEditDistance, 0.0)
          .FindLongestMatch(0, 6, 0, 5),
      Match(0, 0, 5)));
 }
@@ -96,7 +100,7 @@ TEST_F(SequenceMatcherTest, TestGetMatchingBlocks) {
  SequenceMatcher sequence_match(
      base::UTF8ToUTF16("This is a demo sentence!!!"),
      base::UTF8ToUTF16("This demo sentence is good!!!"),
-      kDefaultUseEditDistance);
+      kDefaultUseEditDistance, 0.0);
  const std::vector<Match> true_matches = {Match(0, 0, 4), Match(9, 4, 14),
                                           Match(23, 26, 3), Match(26, 29, 0)};
  const std::vector<Match> matches = sequence_match.GetMatchingBlocks();
@@ -107,30 +111,76 @@ TEST_F(SequenceMatcherTest, TestGetMatchingBlocks) {
 }

 TEST_F(SequenceMatcherTest, TestSequenceMatcherRatio) {
-  ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abcd"),
-                            base::UTF8ToUTF16("adbc"), kDefaultUseEditDistance)
-                .Ratio(),
-            0.75);
  ASSERT_EQ(
-      SequenceMatcher(base::UTF8ToUTF16("white cats"),
-                      base::UTF8ToUTF16("cats white"), kDefaultUseEditDistance)
+      SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("adbc"),
+                      kDefaultUseEditDistance, 0.0)
+          .Ratio(),
+      0.75);
+  ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("white cats"),
+                            base::UTF8ToUTF16("cats white"),
+                            kDefaultUseEditDistance, 0.0)
+                .Ratio(),
+            0.5);
+}
+
+TEST_F(SequenceMatcherTest, TestSequenceMatcherRatioWithoutPenalty) {
+  // Two matching blocks, total matching blocks length is 4.
+  EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("word"),
+                              base::UTF8ToUTF16("hello world"),
+                              kDefaultUseEditDistance, 0.0)
+                  .Ratio(),
+              0.533, 0.001);
+
+  // One matching block, length is 4.
+  EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("worl"),
+                              base::UTF8ToUTF16("hello world"),
+                              kDefaultUseEditDistance, 0.0)
+                  .Ratio(),
+              0.533, 0.001);
+
+  // No matching block at all.
+  EXPECT_NEAR(
+      SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("xyz"),
+                      kDefaultUseEditDistance, 0.0)
+          .Ratio(),
+      0.0, 0.001);
+}
+
+TEST_F(SequenceMatcherTest, TestSequenceMatcherRatioWithPenalty) {
+  // Two matching blocks, total matching blocks length is 4.
+  EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("word"),
+                              base::UTF8ToUTF16("hello world"),
+                              kDefaultUseEditDistance, 0.1)
+                  .Ratio(),
+              0.4825, 0.0001);
+  // One matching block, length is 4.
+  EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("worl"),
+                              base::UTF8ToUTF16("hello world"),
+                              kDefaultUseEditDistance, 0.1)
+                  .Ratio(),
+              0.533, 0.001);
+
+  // No matching block at all.
+  EXPECT_NEAR(
+      SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("xyz"),
+                      kDefaultUseEditDistance, 0.1)
          .Ratio(),
-      0.5);
+      0.0, 0.001);
 }

 TEST_F(SequenceMatcherTest, TestEditDistanceRatio) {
  ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abcd"),
-                            base::UTF8ToUTF16("adbc"), true)
+                            base::UTF8ToUTF16("adbc"), true, 0.0)
                .Ratio(),
            0.5);
  EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("white cats"),
-                              base::UTF8ToUTF16("cats white"), true)
+                              base::UTF8ToUTF16("cats white"), true, 0.0)
                  .Ratio(),
              0.2, 0.01);

  // Totally different
  EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("dog"),
-                              base::UTF8ToUTF16("elphant"), true)
+                              base::UTF8ToUTF16("elphant"), true, 0.0)
                  .Ratio(),
              0.0, 0.01);
 }