[Autofill] Fixed bug in subset merge strategy

Currently, it is assumed that a name consisting of multiple words has the same number of words after normalization. In fact, the normalization replaces a list of known separators with spaces and the space is used to divide a name into its word. Therefore, running the normalization first can yield a larger number of tokens. This CL fixes the root cause of an issue that lead to a stability issue. The issue was mitigated by disabling this merge strategy since it was only used unintentionally. With this fix, the strategy becomes workable for future usage. Change-Id: I7f987e7e81f6ce968cbf728b529b3d6fe58a9313 Bug: 1158810 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2625884Reviewed-by: Christoph Schwering <schwering@google.com> Commit-Queue: Matthias Körber <koerber@google.com> Cr-Commit-Position: refs/heads/master@{#844937}

[Autofill] Fixed bug in subset merge strategy
Currently, it is assumed that a name consisting of multiple words has the same number of words after normalization. In fact, the normalization replaces a list of known separators with spaces and the space is used to divide a name into its word. Therefore, running the normalization first can yield a larger number of tokens. This CL fixes the root cause of an issue that lead to a stability issue. The issue was mitigated by disabling this merge strategy since it was only used unintentionally. With this fix, the strategy becomes workable for future usage. Change-Id: I7f987e7e81f6ce968cbf728b529b3d6fe58a9313 Bug: 1158810 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2625884Reviewed-by: Christoph Schwering <schwering@google.com> Commit-Queue: Matthias Körber <koerber@google.com> Cr-Commit-Position: refs/heads/master@{#844937}
5052c7ae · Matthias Körber · Chromium LUCI CQ · c55ed6d9 · 5052c7ae · 5052c7ae
Commit 5052c7ae authored Jan 19, 2021 by Matthias Körber Committed by Chromium LUCI CQ Jan 19, 2021
3 changed files
--- a/components/autofill/core/browser/data_model/autofill_structured_address_component.cc
+++ b/components/autofill/core/browser/data_model/autofill_structured_address_component.cc
@@ -819,7 +819,13 @@ bool AddressComponent::IsMergeableWithComponent(
  if ((merge_mode_ & kRecursivelyMergeSingleTokenSubset) &&
      token_comparison_result.IsSingleTokenSuperset()) {
-    return true;
+    // This strategy is only applicable if also the unnormalized values have a
+    // single-token-superset relation.
+    SortedTokenComparisonResult unnormalized_token_comparison_result =
+        CompareSortedTokens(GetValue(), newer_component.GetValue());
+    if (unnormalized_token_comparison_result.IsSingleTokenSuperset()) {
+      return true;
+    }
  }
  if (merge_mode_ == kUseNewerIfDifferent)
@@ -924,9 +930,14 @@ bool AddressComponent::MergeWithComponent(
      token_comparison_result.IsSingleTokenSuperset()) {
    // For the merging of subset token, the tokenization must be done without
    // prior normalization of the values.
-    SortedTokenComparisonResult token_comparison_result =
+    SortedTokenComparisonResult unnormalized_token_comparison_result =
        CompareSortedTokens(GetValue(), newer_component.GetValue());
-    return MergeSubsetComponent(newer_component, token_comparison_result);
+    // The merging strategy can only be applied when the comparison of the
+    // unnormalized tokens still yields a single token superset.
+    if (unnormalized_token_comparison_result.IsSingleTokenSuperset()) {
+      return MergeSubsetComponent(newer_component,
+                                  unnormalized_token_comparison_result);
+    }
  }
  // Replace the older value with the newer one if the corresponding mode is

--- a/components/autofill/core/browser/data_model/autofill_structured_address_component.h
+++ b/components/autofill/core/browser/data_model/autofill_structured_address_component.h
@@ -384,6 +384,10 @@ class AddressComponent {
  // Sets the merge mode for testing purposes.
  void SetMergeModeForTesting(int merge_mode) { merge_mode_ = merge_mode; }
+  // Returns the value used for comparison for testing purposes.
+  base::string16 ValueForComparisonForTesting() const {
+    return ValueForComparison();
+  }
 #endif
 protected:

--- a/components/autofill/core/browser/data_model/autofill_structured_address_name_unittest.cc
+++ b/components/autofill/core/browser/data_model/autofill_structured_address_name_unittest.cc
@@ -731,6 +731,76 @@ TEST(AutofillStructuredName, MergeSubsetLastname) {
  VerifyTestValues(&name, name_values);
 }
+TEST(AutofillStructuredName, MergeSubsetLastname_WithNonSpaceSeparators) {
+  NameFull name;
+  NameFull subset_name;
+  name.SetMergeModeForTesting(kRecursivelyMergeSingleTokenSubset |
+                              kRecursivelyMergeTokenEquivalentValues);
+  AddressComponentTestValues name_values = {
+      {.type = NAME_FULL,
+       .value = "Thomas-Neo-Anderson",
+       .status = VerificationStatus::kUserVerified},
+      {.type = NAME_FIRST,
+       .value = "Thomas",
+       .status = VerificationStatus::kObserved},
+      {.type = NAME_MIDDLE,
+       .value = "Thomas",
+       .status = VerificationStatus::kObserved},
+      {.type = NAME_LAST,
+       .value = "Anderson",
+       .status = VerificationStatus::kObserved},
+  };
+  AddressComponentTestValues subset_name_values = {
+      {.type = NAME_FULL,
+       .value = "Thomas-Anderson",
+       .status = VerificationStatus::kObserved},
+      {.type = NAME_FIRST,
+       .value = "Thomas",
+       .status = VerificationStatus::kObserved},
+      {.type = NAME_LAST,
+       .value = "Anderson",
+       .status = VerificationStatus::kObserved},
+  };
+  AddressComponentTestValues expectation = {
+      {.type = NAME_FULL,
+       .value = "Thomas-Neo-Anderson",
+       .status = VerificationStatus::kUserVerified},
+      {.type = NAME_FIRST,
+       .value = "Thomas",
+       .status = VerificationStatus::kObserved},
+      {.type = NAME_MIDDLE,
+       .value = "Thomas",
+       .status = VerificationStatus::kObserved},
+      {.type = NAME_LAST,
+       .value = "Anderson",
+       .status = VerificationStatus::kObserved},
+  };
+  SetTestValues(&name, name_values);
+  SetTestValues(&subset_name, subset_name_values);
+  // After normalization, the two names should have a single-token-superset
+  // relation.
+  SortedTokenComparisonResult token_comparison_result =
+      CompareSortedTokens(name.ValueForComparisonForTesting(),
+                          subset_name.ValueForComparisonForTesting());
+  EXPECT_TRUE(token_comparison_result.IsSingleTokenSuperset());
+  // Without normalization, the two names should be considered distinct.
+  token_comparison_result =
+      CompareSortedTokens(name.GetValue(), subset_name.GetValue());
+  EXPECT_TRUE(token_comparison_result.status == DISTINCT);
+  // Verify that those two names are not considered mergeable.
+  EXPECT_FALSE(name.IsMergeableWithComponent(subset_name));
+  EXPECT_FALSE(name.MergeWithComponent(subset_name));
+  VerifyTestValues(&name, expectation);
+}
 TEST(AutofillStructuredName, MergeSubsetLastname2) {
  NameFull name;
  NameFull subset_name;