[Autofill][SlimShady] Component for structured names.

This CL adds the static address component tree for names that support two surnames as they are common for Hispanic/Latinx names. Change-Id: Icd54f986ab192101fbea5a8576c8abbabfffe636 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2264423 Commit-Queue: Matthias Körber <koerber@google.com> Reviewed-by: Dominic Battré <battre@chromium.org> Cr-Commit-Position: refs/heads/master@{#793110}

[Autofill][SlimShady] Component for structured names.
This CL adds the static address component tree for names that support two surnames as they are common for Hispanic/Latinx names. Change-Id: Icd54f986ab192101fbea5a8576c8abbabfffe636 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2264423 Commit-Queue: Matthias Körber <koerber@google.com> Reviewed-by: Dominic Battré <battre@chromium.org> Cr-Commit-Position: refs/heads/master@{#793110}
deea5c39 · Matthias Körber · Commit Bot · 73f0449d · deea5c39 · deea5c39
Commit deea5c39 authored Jul 30, 2020 by Matthias Körber Committed by Commit Bot Jul 30, 2020
12 changed files
--- a/components/autofill/core/browser/BUILD.gn
+++ b/components/autofill/core/browser/BUILD.gn
@@ -92,6 +92,8 @@ jumbo_static_library("browser") {
    "data_model/autofill_structured_address_component.h",
    "data_model/autofill_structured_address_constants.cc",
    "data_model/autofill_structured_address_constants.h",
+    "data_model/autofill_structured_address_name.cc",
+    "data_model/autofill_structured_address_name.h",
    "data_model/autofill_structured_address_regex_provider.cc",
    "data_model/autofill_structured_address_regex_provider.h",
    "data_model/autofill_structured_address_utils.cc",
@@ -578,6 +580,7 @@ source_set("unit_tests") {
    "data_model/autofill_profile_comparator_unittest.cc",
    "data_model/autofill_profile_unittest.cc",
    "data_model/autofill_structured_address_component_unittest.cc",
+    "data_model/autofill_structured_address_name_unittest.cc",
    "data_model/autofill_structured_address_regex_provider_unittest.cc",
    "data_model/autofill_structured_address_utils_unittest.cc",
    "data_model/contact_info_unittest.cc",

--- a/components/autofill/core/browser/autofill_data_util_unittest.cc
+++ b/components/autofill/core/browser/autofill_data_util_unittest.cc
@@ -19,9 +19,21 @@ using data_util::bit_field_type_groups::kName;
 using data_util::bit_field_type_groups::kPhone;
 TEST(AutofillDataUtilTest, DetermineGroupsForHomeNameAndAddress) {
-  const std::vector<ServerFieldType> field_types{
+  const std::vector<ServerFieldType> field_types{NAME_HONORIFIC_PREFIX,
-      NAME_FIRST,        NAME_LAST,          ADDRESS_HOME_LINE1,
+                                                 NAME_FULL,
-      ADDRESS_HOME_CITY, ADDRESS_HOME_STATE, ADDRESS_HOME_ZIP};
+                                                 NAME_FIRST,
+                                                 NAME_MIDDLE,
+                                                 NAME_MIDDLE_INITIAL,
+                                                 NAME_LAST,
+                                                 NAME_LAST_FIRST,
+                                                 NAME_LAST_CONJUNCTION,
+                                                 NAME_LAST_SECOND,
+                                                 NAME_FIRST,
+                                                 NAME_LAST,
+                                                 ADDRESS_HOME_LINE1,
+                                                 ADDRESS_HOME_CITY,
+                                                 ADDRESS_HOME_STATE,
+                                                 ADDRESS_HOME_ZIP};
  const uint32_t expected_group_bitmask = kName | kAddress;
  const uint32_t group_bitmask = data_util::DetermineGroups(field_types);
@@ -195,9 +207,8 @@ INSTANTIATE_TEST_SUITE_P(
                         "황목"},  // Korean name, Hangul
        // It occasionally happens that a full name is 2 characters, 1/1.
-        FullNameTestCase{"이도", "도", "", "이"},  // Korean name, Hangul
+        FullNameTestCase{"이도", "도", "", "이"},    // Korean name, Hangul
-        FullNameTestCase{"孫文", "文", "", "孫"}   // Chinese name, Unihan
+        FullNameTestCase{"孫文", "文", "", "孫"}));  // Chinese name, Unihan
-        ));
 class JoinNamePartsTest : public testing::TestWithParam<FullNameTestCase> {};
@@ -229,9 +240,7 @@ INSTANTIATE_TEST_SUITE_P(
        // These are no CJK names for us, they're just bogus.
        FullNameTestCase{"Homer シンプソン", "Homer", "", "シンプソン"},
        FullNameTestCase{"ホーマー Simpson", "ホーマー", "", "Simpson"},
-        FullNameTestCase{"반 기 문", "반", "기", "문"}
+        FullNameTestCase{"반 기 문", "반", "기", "문"}));
-        // Has a middle-name, too unusual
-        ));
 struct ValidCountryCodeTestCase {
  std::string country_code;

--- a/components/autofill/core/browser/data_model/autofill_structured_address_constants.cc
+++ b/components/autofill/core/browser/data_model/autofill_structured_address_constants.cc
@@ -7,7 +7,7 @@
 namespace autofill {
 namespace structured_address {
-const char kSingleWordRe[] = "(?:\\w+)";
+const char kNameSeparators[] = " -";
 }  // namespace structured_address
 }  // namespace autofill
--- a/components/autofill/core/browser/data_model/autofill_structured_address_constants.h
+++ b/components/autofill/core/browser/data_model/autofill_structured_address_constants.h
@@ -8,8 +8,8 @@
 namespace autofill {
 namespace structured_address {
-// Regular expression pattern to match a single word.
+// List of name separators.
-extern const char kSingleWordRe[];
+extern const char kNameSeparators[];
 }  // namespace structured_address
 }  // namespace autofill

--- a/components/autofill/core/browser/data_model/autofill_structured_address_name.cc
+++ b/components/autofill/core/browser/data_model/autofill_structured_address_name.cc
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+#include "components/autofill/core/browser/data_model/autofill_structured_address_name.h"
+#include <utility>
+#include "base/i18n/case_conversion.h"
+#include "base/strings/strcat.h"
+#include "base/strings/string_split.h"
+#include "base/strings/string_util.h"
+#include "base/strings/utf_string_conversions.h"
+#include "components/autofill/core/browser/autofill_type.h"
+#include "components/autofill/core/browser/data_model/autofill_structured_address_constants.h"
+#include "components/autofill/core/browser/data_model/autofill_structured_address_regex_provider.h"
+#include "components/autofill/core/browser/data_model/autofill_structured_address_utils.h"
+#include "components/autofill/core/browser/field_types.h"
+namespace autofill {
+namespace structured_address {
+base::string16 ReduceToInitials(const base::string16& value) {
+  if (value.empty())
+    return base::string16();
+  std::vector<base::string16> middle_name_tokens =
+      base::SplitString(value, base::ASCIIToUTF16(kNameSeparators),
+                        base::WhitespaceHandling::TRIM_WHITESPACE,
+                        base::SplitResult::SPLIT_WANT_NONEMPTY);
+  base::string16 result;
+  result.reserve(middle_name_tokens.size());
+  for (const auto& token : middle_name_tokens) {
+    DCHECK(!token.empty());
+    result += token[0];
+  }
+  return base::i18n::ToUpper(result);
+}
+bool HasHispanicLatinxNameCharaceristics(const std::string& name) {
+  // Check if the name contains one of the most common Hispanic/Latinx
+  // last names.
+  if (IsPartialMatch(name, RegEx::kMatchHispanicCommonNameCharacteristics))
+    return true;
+  // Check if it contains a last name conjunction.
+  if (IsPartialMatch(name,
+                     RegEx::kMatchHispanicLastNameConjuctionCharacteristics))
+    return true;
+  // If none of the above, there is not sufficient reason to assume this is a
+  // Hispanic/Latinx name.
+  return false;
+}
+bool HasCjkNameCharacteristics(const std::string& name) {
+  return IsPartialMatch(name, RegEx::kMatchCjkNameCharacteristics);
+}
+bool HasMiddleNameInitialsCharacteristics(const std::string& middle_name) {
+  return IsPartialMatch(middle_name,
+                        RegEx::kMatchMiddleNameInitialsCharacteristics);
+}
+NameHonorific::NameHonorific() : NameHonorific(nullptr) {}
+NameHonorific::NameHonorific(AddressComponent* parent)
+    : AddressComponent(NAME_HONORIFIC_PREFIX, parent) {}
+NameHonorific::~NameHonorific() = default;
+NameFirst::NameFirst() : NameFirst(nullptr) {}
+NameFirst::NameFirst(AddressComponent* parent)
+    : AddressComponent(NAME_FIRST, parent) {}
+NameFirst::~NameFirst() = default;
+NameMiddle::NameMiddle() : NameMiddle(nullptr) {}
+NameMiddle::NameMiddle(AddressComponent* parent)
+    : AddressComponent(NAME_MIDDLE, parent) {}
+NameMiddle::~NameMiddle() = default;
+void NameMiddle::GetAdditionalSupportedFieldTypes(
+    ServerFieldTypeSet* supported_types) const {
+  supported_types->insert(NAME_MIDDLE_INITIAL);
+}
+bool NameMiddle::ConvertAndGetTheValueForAdditionalFieldTypeName(
+    const std::string& type_name,
+    base::string16* value) const {
+  if (type_name == AutofillType(NAME_MIDDLE_INITIAL).ToString()) {
+    if (value) {
+      // If the stored value has the characteristics of containing only
+      // initials, use the value as it is. Otherwise, convert it to a
+      // sequence of upper case letters, one for each space- or hyphen-separated
+      // token.
+      if (HasMiddleNameInitialsCharacteristics(base::UTF16ToUTF8(GetValue()))) {
+        *value = GetValue();
+      } else {
+        *value = ReduceToInitials(GetValue());
+      }
+    }
+    return true;
+  }
+  return false;
+}
+bool NameMiddle::ConvertAndSetValueForAdditionalFieldTypeName(
+    const std::string& type_name,
+    const base::string16& value,
+    const VerificationStatus& status) {
+  if (type_name == AutofillType(NAME_MIDDLE_INITIAL).ToString()) {
+    SetValue(value, status);
+    return true;
+  }
+  return false;
+}
+NameLastFirst::NameLastFirst() : NameLastFirst(nullptr) {}
+NameLastFirst::NameLastFirst(AddressComponent* parent)
+    : AddressComponent(NAME_LAST_FIRST, parent) {}
+NameLastFirst::~NameLastFirst() = default;
+NameLastConjunction::NameLastConjunction() : NameLastConjunction(nullptr) {}
+NameLastConjunction::NameLastConjunction(AddressComponent* parent)
+    : AddressComponent(NAME_LAST_CONJUNCTION, parent) {}
+NameLastConjunction::~NameLastConjunction() = default;
+std::vector<const RE2*> NameLast::GetParseRegularExpressionsByRelevance()
+    const {
+  auto* pattern_provider = StructuredAddressesRegExProvider::Instance();
+  DCHECK(pattern_provider);
+  // Check if the name has the characteristics of an Hispanic/Latinx name.
+  if (HasHispanicLatinxNameCharaceristics(base::UTF16ToUTF8(GetValue())))
+    return {pattern_provider->GetRegEx(RegEx::kParseHispanicLastName)};
+  return {pattern_provider->GetRegEx(RegEx::kParseLastNameIntoSecondLastName)};
+}
+NameLastSecond::NameLastSecond() : NameLastSecond(nullptr) {}
+NameLastSecond::NameLastSecond(AddressComponent* parent)
+    : AddressComponent(NAME_LAST_SECOND, parent) {}
+NameLastSecond::~NameLastSecond() = default;
+NameLast::NameLast() : NameLast(nullptr) {}
+NameLast::NameLast(AddressComponent* parent)
+    : AddressComponent(NAME_LAST, parent, {&first_, &conjunction_, &second_}) {}
+NameLast::~NameLast() = default;
+void NameLast::ParseValueAndAssignSubcomponentsByFallbackMethod() {
+  SetValueForTypeIfPossible(NAME_LAST_SECOND, GetValue(),
+                            VerificationStatus::kParsed);
+}
+NameFull::NameFull() : NameFull(nullptr) {}
+NameFull::NameFull(AddressComponent* parent)
+    : AddressComponent(
+          NAME_FULL,
+          parent,
+          {&name_honorific_, &name_first_, &name_middle_, &name_last_}) {}
+std::vector<const RE2*> NameFull::GetParseRegularExpressionsByRelevance()
+    const {
+  auto* pattern_provider = StructuredAddressesRegExProvider::Instance();
+  DCHECK(pattern_provider);
+  // If the name is a CJK name, try to match in the following order:
+  //
+  // * Match CJK names that include a separator.
+  // If a separator is present, dividing the name between first and last name is
+  // trivial.
+  //
+  // * Match Korean 4+ character names with two-character last names.
+  // Note, although some of the two-character last names are ambiguous in the
+  // sense that they share a common prefix with single character last names. For
+  // 4+ character names, it is more likely that the first two characters belong
+  // to the last name.
+  //
+  // * Match known two-character CJK last names.
+  // Note, this expressions uses only non-ambiguous two-character last names.
+  //
+  // * Match only the first character into the last name.
+  // This is the catch all expression that uses only the first character for the
+  // last name and puts all other characters into the first name.
+  //
+  if (HasCjkNameCharacteristics(base::UTF16ToUTF8(GetValue()))) {
+    return {
+        pattern_provider->GetRegEx(RegEx::kParseSeparatedCjkName),
+        pattern_provider->GetRegEx(RegEx::kParseKoreanTwoCharacterLastName),
+        pattern_provider->GetRegEx(RegEx::kParseCommonCjkTwoCharacterLastName),
+        pattern_provider->GetRegEx(RegEx::kParseCjkSingleCharacterLastName)};
+  }
+  if (HasHispanicLatinxNameCharaceristics(base::UTF16ToUTF8(GetValue())))
+    return {pattern_provider->GetRegEx(RegEx::kParseHispanicFullName)};
+  return {pattern_provider->GetRegEx(RegEx::kParseOnlyLastName),
+          pattern_provider->GetRegEx(RegEx::kParseLastCommaFirstMiddleName),
+          pattern_provider->GetRegEx(RegEx::kParseFirstMiddleLastName)};
+}
+NameFull::~NameFull() = default;
+}  // namespace structured_address
+}  // namespace autofill
--- a/components/autofill/core/browser/data_model/autofill_structured_address_name.h
+++ b/components/autofill/core/browser/data_model/autofill_structured_address_name.h
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+#ifndef COMPONENTS_AUTOFILL_CORE_BROWSER_DATA_MODEL_AUTOFILL_STRUCTURED_ADDRESS_NAME_H_
+#define COMPONENTS_AUTOFILL_CORE_BROWSER_DATA_MODEL_AUTOFILL_STRUCTURED_ADDRESS_NAME_H_
+#include <string>
+#include <vector>
+#include "components/autofill/core/browser/data_model/autofill_structured_address_component.h"
+using autofill::structured_address::AddressComponent;
+namespace autofill {
+namespace structured_address {
+// Returns true if |name| has the characteristics of a Chinese, Japanese or
+// Korean name:
+// * It must only contain CJK characters with at most one separator in between.
+bool HasCjkNameCharacteristics(const std::string& name);
+// Returns true if |name| has one of the characteristics of an Hispanic/Latinx
+// name:
+// * Name contains a very common Hispanic/Latinx surname.
+// * Name uses a surname conjunction.
+bool HasHispanicLatinxNameCharaceristics(const std::string& name);
+// Return true if |middle_name| has the characteristics of a containing only
+// initials:
+// * The string contains only upper case letters that may be preceded by a
+// point.
+// * Between each letter, there can be a space or a hyphen.
+bool HasMiddleNameInitialsCharacteristics(const std::string& middle_name);
+// Reduces a name to the initials in upper case.
+// Example: George walker -> GW, Hans-Peter -> HP
+base::string16 ReduceToInitials(const base::string16& value);
+// Atomic component that represents the honorific prefix of a name.
+class NameHonorific : public AddressComponent {
+ public:
+  NameHonorific();
+  explicit NameHonorific(AddressComponent* parent);
+  ~NameHonorific() override;
+};
+// Atomic components that represents the first name.
+class NameFirst : public AddressComponent {
+ public:
+  NameFirst();
+  explicit NameFirst(AddressComponent* parent);
+  ~NameFirst() override;
+};
+// Atomic component that represents the middle name.
+class NameMiddle : public AddressComponent {
+ public:
+  NameMiddle();
+  explicit NameMiddle(AddressComponent* parent);
+  ~NameMiddle() override;
+  void GetAdditionalSupportedFieldTypes(
+      ServerFieldTypeSet* supported_types) const override;
+ protected:
+  // Implements support for getting for a value for the |MIDDLE_NAME_INITIAL|
+  // type.
+  bool ConvertAndGetTheValueForAdditionalFieldTypeName(
+      const std::string& type_name,
+      base::string16* value) const override;
+  // Implements support for setting the |MIDDLE_NAME_INITIAL| type.
+  bool ConvertAndSetValueForAdditionalFieldTypeName(
+      const std::string& type_name,
+      const base::string16& value,
+      const VerificationStatus& status) override;
+};
+// Atomic component that represents the first part of a last name.
+class NameLastFirst : public AddressComponent {
+ public:
+  NameLastFirst();
+  explicit NameLastFirst(AddressComponent* parent);
+  ~NameLastFirst() override;
+};
+// Atomic component that represents the conjunction in a Hispanic/Latinx
+// surname.
+class NameLastConjunction : public AddressComponent {
+ public:
+  NameLastConjunction();
+  explicit NameLastConjunction(AddressComponent* parent);
+  ~NameLastConjunction() override;
+};
+// Atomic component that represents the second part of a surname.
+class NameLastSecond : public AddressComponent {
+ public:
+  NameLastSecond();
+  explicit NameLastSecond(AddressComponent* parent);
+  ~NameLastSecond() override;
+};
+// Compound that represent a last name. It contains a first and second last name
+// and a conjunction as it is used in Hispanic/Latinx names. Note, that compound
+// family names like Miller-Smith are not supposed to be split up into two
+// components. If a name contains only a single component, the component is
+// stored in the second part by default.
+//
+//               +-------+
+//               | _LAST |
+//               +--------
+//               /    |    \
+//             /      |      \
+//           /        |        \
+// +--------+ +-----------+ +---------+
+// | _FIRST | | _CONJUNC. | | _SECOND |
+// +--------+ +-----------+ +---------+
+//
+class NameLast : public AddressComponent {
+ public:
+  NameLast();
+  explicit NameLast(AddressComponent* parent);
+  ~NameLast() override;
+  std::vector<const RE2*> GetParseRegularExpressionsByRelevance()
+      const override;
+ private:
+  // As the fallback, write everything to the second last name.
+  void ParseValueAndAssignSubcomponentsByFallbackMethod() override;
+  NameLastFirst first_;
+  NameLastConjunction conjunction_;
+  NameLastSecond second_;
+};
+// Compound that represents a full name. It contains a honorific, a first
+// name, a middle name and a last name. The last name is a compound itself.
+//
+//                     +----------+
+//                     | NAME_FULL|
+//                     +----------+
+//                    /  |      |  \
+//                  /    |      |    \
+//                /      |      |      \
+//              /        |      |        \
+// +------------+ +--------+ +---------+ +-------+
+// | _HONORIFIC | | _FIRST | | _MIDDLE | | _LAST |
+// +------------+ +--------+ +---------+ +-------+
+//                                        /   |   \
+//                                      /     |     \
+//                                    /       |       \
+//                                  /         |         \
+//                         +--------+ +-----------+ +---------+
+//                         | _FIRST | | _CONJUNC. | | _SECOND |
+//                         +--------+ +-----------+ +---------+
+//
+class NameFull : public AddressComponent {
+ public:
+  NameFull();
+  explicit NameFull(AddressComponent* parent);
+  ~NameFull() override;
+  std::vector<const RE2*> GetParseRegularExpressionsByRelevance()
+      const override;
+ private:
+  NameHonorific name_honorific_;
+  NameFirst name_first_;
+  NameMiddle name_middle_;
+  NameLast name_last_;
+};
+}  // namespace structured_address
+}  // namespace autofill
+#endif  // COMPONENTS_AUTOFILL_CORE_BROWSER_DATA_MODEL_AUTOFILL_STRUCTURED_ADDRESS_NAME_H_
--- a/components/autofill/core/browser/data_model/autofill_structured_address_name_unittest.cc
+++ b/components/autofill/core/browser/data_model/autofill_structured_address_name_unittest.cc
--- a/components/autofill/core/browser/data_model/autofill_structured_address_regex_provider.cc
+++ b/components/autofill/core/browser/data_model/autofill_structured_address_regex_provider.cc
--- a/components/autofill/core/browser/data_model/autofill_structured_address_regex_provider.h
+++ b/components/autofill/core/browser/data_model/autofill_structured_address_regex_provider.h
@@ -20,7 +20,21 @@ namespace structured_address {
 // values in an AddressComponent tree.
 enum class RegEx {
  kSingleWord,
-  kLastRegEx = kSingleWord,
+  kParseSeparatedCjkName,
+  kParseCommonCjkTwoCharacterLastName,
+  kParseKoreanTwoCharacterLastName,
+  kParseCjkSingleCharacterLastName,
+  kMatchCjkNameCharacteristics,
+  kMatchHispanicCommonNameCharacteristics,
+  kMatchHispanicLastNameConjuctionCharacteristics,
+  kParseOnlyLastName,
+  kParseLastCommaFirstMiddleName,
+  kParseFirstMiddleLastName,
+  kParseHispanicLastName,
+  kParseHispanicFullName,
+  kParseLastNameIntoSecondLastName,
+  kMatchMiddleNameInitialsCharacteristics,
+  kLastRegEx = kParseLastNameIntoSecondLastName,
 };
 // This singleton class builds and caches the regular expressions for value
@@ -73,4 +87,5 @@ class StructuredAddressesRegExProvider {
 }  // namespace structured_address
 }  // namespace autofill
-#endif  // COMPONENTS_AUTOFILL_CORE_BROWSER_DATA_MODEL_AUTOFILL_STRUCTURED_ADDRESS_PATTERN_REGEX_H_
+#endif  // COMPONENTS_AUTOFILL_CORE_BROWSER_DATA_MODEL_AUTOFILL_STRUCTURED_ADDRESS_REGEX_PROVIDER_H_
--- a/components/autofill/core/browser/data_model/autofill_structured_address_utils.cc
+++ b/components/autofill/core/browser/data_model/autofill_structured_address_utils.cc
@@ -13,6 +13,7 @@
 #include "base/debug/alias.h"
 #include "base/debug/dump_without_crashing.h"
 #include "base/strings/strcat.h"
+#include "components/autofill/core/browser/data_model/autofill_structured_address_regex_provider.h"
 namespace autofill {
 namespace structured_address {
@@ -45,9 +46,13 @@ const RE2* Re2RegExCache::GetRegEx(const std::string& pattern) {
  return result.first->second.get();
 }
-std::unique_ptr<const RE2> BuildRegExFromPattern(std::string pattern) {
+std::unique_ptr<const RE2> BuildRegExFromPattern(const std::string& pattern) {
  RE2::Options opt;
-  opt.set_case_sensitive(false);
+  // By default, patters are case sensitive.
+  // Note that, the named-capture-group patterns build with
+  // |CaptureTypeWithPattern()| apply a flag to make the matching case
+  // insensitive.
+  opt.set_case_sensitive(true);
  auto regex = std::make_unique<const RE2>(pattern, opt);
@@ -108,12 +113,17 @@ bool ParseValueByRegularExpression(
  return true;
 }
+bool IsPartialMatch(const std::string& value, RegEx regex) {
+  return IsPartialMatch(
+      value, StructuredAddressesRegExProvider::Instance()->GetRegEx(regex));
+}
 bool IsPartialMatch(const std::string& value, const std::string& pattern) {
-  const RE2* regex = Re2RegExCache::Instance()->GetRegEx(pattern);
+  return IsPartialMatch(value, Re2RegExCache::Instance()->GetRegEx(pattern));
-  if (!regex || !regex->ok())
+}
-    return false;
-  return RE2::PartialMatch(value, *regex);
+bool IsPartialMatch(const std::string& value, const RE2* expression) {
+  return RE2::PartialMatch(value, *expression);
 }
 std::vector<std::string> GetAllPartialMatches(const std::string& value,
@@ -172,7 +182,8 @@ std::string CaptureTypeWithPattern(const ServerFieldType& type,
      quantifier = "";
  }
-  return base::StrCat({"(?:(?P<", AutofillType(type).ToString(), ">", pattern,
+  // By adding an "i" in the first group, the capturing is case insensitive.
+  return base::StrCat({"(?i:(?P<", AutofillType(type).ToString(), ">", pattern,
                       ")(?:", options.separator, "))", quantifier});
 }

--- a/components/autofill/core/browser/data_model/autofill_structured_address_utils.h
+++ b/components/autofill/core/browser/data_model/autofill_structured_address_utils.h
@@ -21,6 +21,8 @@
 namespace autofill {
 namespace structured_address {
+enum class RegEx;
 // Enum to express the few quantifiers needed to parse values.
 enum MatchQuantifier {
  // The capture group is required.
@@ -39,7 +41,7 @@ struct CaptureOptions {
  // By default, a group must be either followed by a space-like character (\s)
  // or it must be the last group in the line. The separator is allowed to be
  // empty.
-  std::string separator = "\\s|$";
+  std::string separator = "\\s+|$";
  // Indicates if the group is required, optional or even lazy optional.
  MatchQuantifier quantifier = MATCH_REQUIRED;
 };
@@ -55,7 +57,7 @@ class Re2RegExCache {
  static Re2RegExCache* Instance();
  // Returns a pointer to a constant compiled expression that matches |pattern|
-  // case-insensitively.
+  // case-sensitively.
  const RE2* GetRegEx(const std::string& pattern);
 #ifdef UNIT_TEST
@@ -96,12 +98,19 @@ bool ParseValueByRegularExpression(
    const RE2* regex,
    std::map<std::string, std::string>* result_map);
-// Returns a compiled case insensitive regular expression for |pattern|.
+// Returns a compiled case sensitive regular expression for |pattern|.
-std::unique_ptr<const RE2> BuildRegExFromPattern(std::string pattern);
+std::unique_ptr<const RE2> BuildRegExFromPattern(const std::string& pattern);
+// Returns true if |value| can be matched by the enumuerated RegEx |regex|.
+bool IsPartialMatch(const std::string& value, RegEx regex);
 // Returns true if |value| can be matched with |pattern|.
 bool IsPartialMatch(const std::string& value, const std::string& pattern);
+// Same as above, but accepts a compiled regular expression instead of the
+// pattern.
+bool IsPartialMatch(const std::string& value, const RE2* expression);
 // Returns a vector that contains all partial matches of |pattern| in |value|;
 std::vector<std::string> GetAllPartialMatches(const std::string& value,
                                              const std::string& pattern);

--- a/components/autofill/core/browser/data_model/autofill_structured_address_utils_unittest.cc
+++ b/components/autofill/core/browser/data_model/autofill_structured_address_utils_unittest.cc
@@ -193,17 +193,17 @@ TEST(AutofillStructuredAddressUtils, TestGetPlaceholderToken) {
 }
 TEST(AutofillStructuredAddressUtils, CaptureTypeWithPattern) {
-  EXPECT_EQ("(?:(?P<NAME_FULL>abs\\w)(?:\\s|$))?",
+  EXPECT_EQ("(?i:(?P<NAME_FULL>abs\\w)(?:\\s+|$))?",
            CaptureTypeWithPattern(NAME_FULL, {"abs", "\\w"},
                                   {.quantifier = MATCH_OPTIONAL}));
-  EXPECT_EQ("(?:(?P<NAME_FULL>abs\\w)(?:\\s|$))",
+  EXPECT_EQ("(?i:(?P<NAME_FULL>abs\\w)(?:\\s+|$))",
            CaptureTypeWithPattern(NAME_FULL, {"abs", "\\w"}));
-  EXPECT_EQ("(?:(?P<NAME_FULL>abs\\w)(?:\\s|$))??",
+  EXPECT_EQ("(?i:(?P<NAME_FULL>abs\\w)(?:\\s+|$))??",
            CaptureTypeWithPattern(NAME_FULL, "abs\\w",
                                   {.quantifier = MATCH_LAZY_OPTIONAL}));
-  EXPECT_EQ("(?:(?P<NAME_FULL>abs\\w)(?:\\s|$))",
+  EXPECT_EQ("(?i:(?P<NAME_FULL>abs\\w)(?:\\s+|$))",
            CaptureTypeWithPattern(NAME_FULL, "abs\\w"));
-  EXPECT_EQ("(?:(?P<NAME_FULL>abs\\w)(?:_))",
+  EXPECT_EQ("(?i:(?P<NAME_FULL>abs\\w)(?:_))",
            CaptureTypeWithPattern(NAME_FULL, "abs\\w", {.separator = "_"}));
 }