Commit 3738a022 authored by Matthias Körber's avatar Matthias Körber Committed by Commit Bot

[Autofill][Slimshady] merging of names that contain one token less

Change-Id: I90f75fd591537eeb1c6b23892a4703289af2872f
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2367612Reviewed-by: default avatarChristoph Schwering <schwering@google.com>
Commit-Queue: Matthias Körber <koerber@google.com>
Cr-Commit-Position: refs/heads/master@{#800452}
parent 48c2e8ef
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <string> #include <string>
#include <utility> #include <utility>
#include "base/feature_list.h"
#include "base/notreached.h" #include "base/notreached.h"
#include "base/strings/strcat.h" #include "base/strings/strcat.h"
#include "base/strings/string16.h" #include "base/strings/string16.h"
...@@ -20,6 +21,7 @@ ...@@ -20,6 +21,7 @@
#include "components/autofill/core/browser/data_model/autofill_structured_address_constants.h" #include "components/autofill/core/browser/data_model/autofill_structured_address_constants.h"
#include "components/autofill/core/browser/data_model/autofill_structured_address_utils.h" #include "components/autofill/core/browser/data_model/autofill_structured_address_utils.h"
#include "components/autofill/core/browser/field_types.h" #include "components/autofill/core/browser/field_types.h"
#include "components/autofill/core/common/autofill_features.h"
namespace autofill { namespace autofill {
...@@ -618,8 +620,19 @@ bool AddressComponent::IsMergeableWithComponent( ...@@ -618,8 +620,19 @@ bool AddressComponent::IsMergeableWithComponent(
if (*this == newer_component) if (*this == newer_component)
return true; return true;
return AreSortedTokensEqual(GetSortedTokens(), SortedTokenComparisonResult token_comparison_result =
newer_component.GetSortedTokens()); CompareSortedTokens(GetSortedTokens(), newer_component.GetSortedTokens());
if (token_comparison_result.status == MATCH)
return true;
if (base::FeatureList::IsEnabled(
features::kAutofillEnableSupportForMergingSubsetNames)) {
if (token_comparison_result.status == SINGLE_TOKEN_SUPERSET)
return true;
}
return false;
} }
bool AddressComponent::MergeWithComponent( bool AddressComponent::MergeWithComponent(
...@@ -628,19 +641,32 @@ bool AddressComponent::MergeWithComponent( ...@@ -628,19 +641,32 @@ bool AddressComponent::MergeWithComponent(
if (*this == newer_component) if (*this == newer_component)
return true; return true;
if (!IsMergeableWithComponent(newer_component)) SortedTokenComparisonResult token_comparison_result =
return false; CompareSortedTokens(GetSortedTokens(), newer_component.GetSortedTokens());
// Applies the merging strategy for two token-equivalent components. switch (token_comparison_result.status) {
if (AreSortedTokensEqual(GetSortedTokens(), case MATCH:
newer_component.GetSortedTokens())) {
return MergeTokenEquivalentComponent(newer_component); return MergeTokenEquivalentComponent(newer_component);
case SINGLE_TOKEN_SUPERSET:
if (base::FeatureList::IsEnabled(
features::kAutofillEnableSupportForMergingSubsetNames)) {
return MergeSubsetComponent(newer_component, token_comparison_result);
}
break;
default:
return false;
} }
return false; return false;
} }
bool AddressComponent::MergeTokenEquivalentComponent( bool AddressComponent::MergeTokenEquivalentComponent(
const AddressComponent& newer_component) { const AddressComponent& newer_component) {
if (!AreSortedTokensEqual(GetSortedTokens(),
newer_component.GetSortedTokens()))
return false;
// Assumption: // Assumption:
// The values of both components are a permutation of the same tokens. // The values of both components are a permutation of the same tokens.
// The componentization of the components can be different in terms of // The componentization of the components can be different in terms of
...@@ -690,7 +716,7 @@ bool AddressComponent::MergeTokenEquivalentComponent( ...@@ -690,7 +716,7 @@ bool AddressComponent::MergeTokenEquivalentComponent(
const std::vector<AddressComponent*> other_subcomponents = const std::vector<AddressComponent*> other_subcomponents =
newer_component.Subcomponents(); newer_component.Subcomponents();
DCHECK(subcomponents_.size() == newer_component.Subcomponents().size()); DCHECK(subcomponents_.size() == other_subcomponents.size());
int this_component_verification_score = 0; int this_component_verification_score = 0;
int newer_component_verification_score = 0; int newer_component_verification_score = 0;
...@@ -705,7 +731,8 @@ bool AddressComponent::MergeTokenEquivalentComponent( ...@@ -705,7 +731,8 @@ bool AddressComponent::MergeTokenEquivalentComponent(
// If the components can't be merged directly, store the ungermed index and // If the components can't be merged directly, store the ungermed index and
// sum the verification scores to decide which component's substructure to // sum the verification scores to decide which component's substructure to
// use. // use.
if (!subcomponents_[i]->MergeWithComponent(*other_subcomponents.at(i))) { if (!subcomponents_[i]->MergeTokenEquivalentComponent(
*other_subcomponents.at(i))) {
this_component_verification_score += this_component_verification_score +=
subcomponents_[i]->GetStructureVerificationScore(); subcomponents_[i]->GetStructureVerificationScore();
newer_component_verification_score += newer_component_verification_score +=
...@@ -725,6 +752,113 @@ bool AddressComponent::MergeTokenEquivalentComponent( ...@@ -725,6 +752,113 @@ bool AddressComponent::MergeTokenEquivalentComponent(
return true; return true;
} }
void AddressComponent::ConsumeAdditionalToken(
const base::string16& token_value) {
if (IsAtomic()) {
if (GetValue().empty()) {
SetValue(token_value, VerificationStatus::kParsed);
} else {
SetValue(base::StrCat({GetValue(), base::ASCIIToUTF16(" "), token_value}),
VerificationStatus::kParsed);
}
return;
}
// Try the first free subcomponent.
for (auto* subcomponent : subcomponents_) {
if (subcomponent->GetValue().empty()) {
subcomponent->SetValue(token_value, VerificationStatus::kParsed);
return;
}
}
// Otherwise append the value to the first component.
subcomponents_[0]->SetValue(
base::StrCat({GetValue(), base::ASCIIToUTF16(" "), token_value}),
VerificationStatus::kParsed);
}
bool AddressComponent::MergeSubsetComponent(
const AddressComponent& subset_component,
const SortedTokenComparisonResult& token_comparison_result) {
DCHECK(token_comparison_result.status == SINGLE_TOKEN_SUPERSET);
DCHECK(token_comparison_result.additional_tokens.size() == 1);
base::string16 token_to_consume =
token_comparison_result.additional_tokens.back().value;
int this_component_verification_score = 0;
int newer_component_verification_score = 0;
bool found_subset_component = false;
std::vector<int> unmerged_indices;
unmerged_indices.reserve(subcomponents_.size());
const std::vector<AddressComponent*>& subset_subcomponents =
subset_component.Subcomponents();
unmerged_indices.reserve(subcomponents_.size());
for (size_t i = 0; i < subcomponents_.size(); i++) {
DCHECK(subcomponents_[i]->GetStorageType() ==
subset_subcomponents.at(i)->GetStorageType());
AddressComponent* subcomponent = subcomponents_[i];
const AddressComponent* subset_subcomponent = subset_subcomponents.at(i);
base::string16 additional_token;
// If the additional token is the value of this token. Just leave it in.
if (!found_subset_component &&
subcomponent->GetValue() == token_to_consume &&
subset_subcomponent->GetValue().empty()) {
found_subset_component = true;
continue;
}
SortedTokenComparisonResult subtoken_comparison_result =
CompareSortedTokens(subcomponent->GetSortedTokens(),
subset_subcomponent->GetSortedTokens());
// Recursive case.
if (!found_subset_component &&
subtoken_comparison_result.status == SINGLE_TOKEN_SUPERSET) {
found_subset_component = true;
subcomponent->MergeSubsetComponent(*subset_subcomponent,
subtoken_comparison_result);
continue;
}
// If the tokens are the equivalent, they can directly be merged.
if (subtoken_comparison_result.status == MATCH) {
subcomponent->MergeTokenEquivalentComponent(*subset_subcomponent);
continue;
}
// Otherwise calculate the verification score.
this_component_verification_score +=
subcomponent->GetStructureVerificationScore();
newer_component_verification_score +=
subset_subcomponent->GetStructureVerificationScore();
unmerged_indices.emplace_back(i);
}
// If the total verification score of all unmerged components of the other
// component is equal or larger than the score of this component, use its
// subcomponents including their substructure for all unmerged components.
if (newer_component_verification_score >= this_component_verification_score) {
for (size_t i : unmerged_indices)
*subcomponents_[i] = *subset_subcomponents[i];
if (!found_subset_component)
this->ConsumeAdditionalToken(token_to_consume);
}
// In the current implementation it is always possible to merge.
// Once more tokens are supported this may change.
return true;
}
int AddressComponent::GetStructureVerificationScore() const { int AddressComponent::GetStructureVerificationScore() const {
int result = 0; int result = 0;
switch (GetVerificationStatus()) { switch (GetVerificationStatus()) {
......
...@@ -20,6 +20,9 @@ class RE2; ...@@ -20,6 +20,9 @@ class RE2;
namespace autofill { namespace autofill {
namespace structured_address { namespace structured_address {
struct AddressToken;
struct SortedTokenComparisonResult;
// Represents the validation status of value stored in the AutofillProfile. // Represents the validation status of value stored in the AutofillProfile.
// The associated integer values used to store the verification code in SQL and // The associated integer values used to store the verification code in SQL and
// should not be modified. // should not be modified.
...@@ -269,7 +272,7 @@ class AddressComponent { ...@@ -269,7 +272,7 @@ class AddressComponent {
// Returns a constant reference to the sorted canonicalized tokens of the // Returns a constant reference to the sorted canonicalized tokens of the
// value of the component. // value of the component.
const std::vector<base::string16>& GetSortedTokens() const { const std::vector<AddressToken>& GetSortedTokens() const {
return sorted_normalized_tokens_; return sorted_normalized_tokens_;
} }
...@@ -359,6 +362,17 @@ class AddressComponent { ...@@ -359,6 +362,17 @@ class AddressComponent {
// Clears all parsed and formatted values. // Clears all parsed and formatted values.
void ClearAllParsedAndFormattedValues(); void ClearAllParsedAndFormattedValues();
// Merge a component that has exactly one token less.
bool MergeSubsetComponent(
const AddressComponent& subset_component,
const SortedTokenComparisonResult& token_comparison_result);
// Consumes an additional token into the most appropriate subcomponent.
// Can be implemented by the specific node types.
// The fall-back solution uses the first empty node.
// If no empty node is available, it appends the value to the first node.
virtual void ConsumeAdditionalToken(const base::string16& token_value);
private: private:
// Returns a reference to the constant root node of the tree. // Returns a reference to the constant root node of the tree.
const AddressComponent& GetRootNode() const; const AddressComponent& GetRootNode() const;
...@@ -410,7 +424,7 @@ class AddressComponent { ...@@ -410,7 +424,7 @@ class AddressComponent {
// meaning that it was converted to lower case and diacritics have been // meaning that it was converted to lower case and diacritics have been
// removed. |value_| is tokenized by splitting the string by white spaces and // removed. |value_| is tokenized by splitting the string by white spaces and
// commas. It is calculated when |value_| is set. // commas. It is calculated when |value_| is set.
std::vector<base::string16> sorted_normalized_tokens_; std::vector<AddressToken> sorted_normalized_tokens_;
// A pointer to the parent node. It is set to nullptr if the node is the root // A pointer to the parent node. It is set to nullptr if the node is the root
// node of the AddressComponent tree. // node of the AddressComponent tree.
......
...@@ -9,8 +9,11 @@ ...@@ -9,8 +9,11 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "base/feature_list.h"
#include "base/strings/utf_string_conversions.h" #include "base/strings/utf_string_conversions.h"
#include "base/test/scoped_feature_list.h"
#include "components/autofill/core/browser/data_model/autofill_structured_address_utils.h" #include "components/autofill/core/browser/data_model/autofill_structured_address_utils.h"
#include "components/autofill/core/common/autofill_features.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
using base::ASCIIToUTF16; using base::ASCIIToUTF16;
...@@ -20,6 +23,38 @@ namespace structured_address { ...@@ -20,6 +23,38 @@ namespace structured_address {
namespace { namespace {
struct AddressComponentTestValue {
ServerFieldType type;
std::string value;
VerificationStatus status;
};
struct AddressComponentTestValues {
std::vector<AddressComponentTestValue> values;
};
void SetTestValues(AddressComponent* component,
const AddressComponentTestValues& test_values,
bool finalize = true) {
for (const auto& test_value : test_values.values) {
component->SetValueForTypeIfPossible(test_value.type,
base::UTF8ToUTF16(test_value.value),
test_value.status);
}
if (finalize)
component->CompleteFullTree();
}
void VerifyTestValues(AddressComponent* component,
const AddressComponentTestValues test_values) {
for (const auto& test_value : test_values.values) {
EXPECT_EQ(component->GetValueForType(test_value.type),
base::UTF8ToUTF16(test_value.value));
EXPECT_EQ(component->GetVerificationStatusForType(test_value.type),
test_value.status);
}
}
// A test record that contains all entries of the hybrid-structure name tree. // A test record that contains all entries of the hybrid-structure name tree.
struct NameParserTestRecord { struct NameParserTestRecord {
std::string full; std::string full;
...@@ -666,5 +701,133 @@ TEST(AutofillStructuredName, MigrationFromLegacyStructure_WithoutFullName) { ...@@ -666,5 +701,133 @@ TEST(AutofillStructuredName, MigrationFromLegacyStructure_WithoutFullName) {
VerificationStatus::kObserved); VerificationStatus::kObserved);
} }
TEST(AutofillStructuredName, MergeSubsetLastname) {
base::test::ScopedFeatureList scoped_feature;
scoped_feature.InitAndEnableFeature(
features::kAutofillEnableSupportForMergingSubsetNames);
NameFull name;
NameFull subset_name;
AddressComponentTestValues name_values = {
.values =
{
{.type = NAME_FIRST,
.value = "Thomas",
.status = VerificationStatus::kObserved},
{.type = NAME_MIDDLE,
.value = "Neo",
.status = VerificationStatus::kObserved},
{.type = NAME_LAST,
.value = "Anderson y Smith",
.status = VerificationStatus::kObserved},
},
};
AddressComponentTestValues subset_name_values = {
.values =
{
{.type = NAME_FIRST,
.value = "Thomas",
.status = VerificationStatus::kObserved},
{.type = NAME_MIDDLE,
.value = "Neo",
.status = VerificationStatus::kObserved},
{.type = NAME_LAST_FIRST,
.value = "Anderson",
.status = VerificationStatus::kObserved},
{.type = NAME_LAST_SECOND,
.value = "Smith",
.status = VerificationStatus::kObserved},
},
};
AddressComponentTestValues expectation = {
.values =
{
{.type = NAME_FIRST,
.value = "Thomas",
.status = VerificationStatus::kObserved},
{.type = NAME_MIDDLE,
.value = "Neo",
.status = VerificationStatus::kObserved},
{.type = NAME_LAST_FIRST,
.value = "Anderson",
.status = VerificationStatus::kObserved},
{.type = NAME_LAST_CONJUNCTION,
.value = "y",
.status = VerificationStatus::kObserved},
{.type = NAME_LAST_SECOND,
.value = "Smith",
.status = VerificationStatus::kObserved},
},
};
SetTestValues(&name, name_values);
SetTestValues(&subset_name, subset_name_values);
EXPECT_TRUE(name.IsMergeableWithComponent(subset_name));
EXPECT_TRUE(name.MergeWithComponent(subset_name));
VerifyTestValues(&name, name_values);
}
TEST(AutofillStructuredName, MergeSubsetLastname2) {
base::test::ScopedFeatureList scoped_feature;
scoped_feature.InitAndEnableFeature(
features::kAutofillEnableSupportForMergingSubsetNames);
NameFull name;
NameFull subset_name;
AddressComponentTestValues name_values = {
.values =
{
{.type = NAME_FIRST,
.value = "Thomas",
.status = VerificationStatus::kObserved},
{.type = NAME_MIDDLE,
.value = "Neo",
.status = VerificationStatus::kObserved},
{.type = NAME_LAST,
.value = "Anderson",
.status = VerificationStatus::kObserved},
},
};
AddressComponentTestValues subset_name_values = {
.values =
{
{.type = NAME_FIRST,
.value = "Thomas",
.status = VerificationStatus::kObserved},
{.type = NAME_LAST,
.value = "Anderson",
.status = VerificationStatus::kObserved},
},
};
AddressComponentTestValues expectation = {
.values =
{
{.type = NAME_FIRST,
.value = "Thomas",
.status = VerificationStatus::kObserved},
{.type = NAME_MIDDLE,
.value = "Neo",
.status = VerificationStatus::kObserved},
{.type = NAME_LAST,
.value = "Anderson",
.status = VerificationStatus::kObserved},
},
};
SetTestValues(&name, name_values);
SetTestValues(&subset_name, subset_name_values);
EXPECT_TRUE(name.IsMergeableWithComponent(subset_name));
EXPECT_TRUE(name.MergeWithComponent(subset_name));
VerifyTestValues(&name, name_values);
}
} // namespace structured_address } // namespace structured_address
} // namespace autofill } // namespace autofill
...@@ -26,6 +26,16 @@ ...@@ -26,6 +26,16 @@
namespace autofill { namespace autofill {
namespace structured_address { namespace structured_address {
SortedTokenComparisonResult::SortedTokenComparisonResult(
SortedTokenComparisonStatus status,
std::vector<AddressToken> additional_tokens)
: status(status), additional_tokens(additional_tokens) {}
SortedTokenComparisonResult::~SortedTokenComparisonResult() = default;
SortedTokenComparisonResult::SortedTokenComparisonResult(
const SortedTokenComparisonResult& other) = default;
bool StructuredNamesEnabled() { bool StructuredNamesEnabled() {
return base::FeatureList::IsEnabled( return base::FeatureList::IsEnabled(
features::kAutofillEnableSupportForMoreStructureInNames); features::kAutofillEnableSupportForMoreStructureInNames);
...@@ -241,41 +251,87 @@ bool AreStringTokenEquivalent(const base::string16& one, ...@@ -241,41 +251,87 @@ bool AreStringTokenEquivalent(const base::string16& one,
return AreSortedTokensEqual(TokenizeValue(one), TokenizeValue(other)); return AreSortedTokensEqual(TokenizeValue(one), TokenizeValue(other));
} }
bool AreSortedTokensEqual(const std::vector<base::string16>& first, SortedTokenComparisonResult CompareSortedTokens(
const std::vector<base::string16>& second) { const std::vector<AddressToken>& first,
// It is assumed that the vectors are sorted. const std::vector<AddressToken>& second) {
DCHECK(std::is_sorted(first.begin(), first.end()) && // Lambda to compare the normalized values of two AddressTokens.
std::is_sorted(second.begin(), second.end())); auto cmp_normalized = [](const auto& a, const auto& b) {
// If there is a different number of tokens, it can't be a permutation. return a.normalized_value < b.normalized_value;
if (first.size() != second.size()) };
return false;
// Return true if both vectors are component-wise equal. // Verify that the two multi sets are sorted.
return std::equal(first.begin(), first.end(), second.begin()); DCHECK(std::is_sorted(first.begin(), first.end(), cmp_normalized) &&
std::is_sorted(second.begin(), second.end(), cmp_normalized));
bool is_supserset = std::includes(first.begin(), first.end(), second.begin(),
second.end(), cmp_normalized);
bool is_subset = std::includes(second.begin(), second.end(), first.begin(),
first.end(), cmp_normalized);
// If first is both a superset and a subset it is the same.
if (is_supserset && is_subset)
return SortedTokenComparisonResult(MATCH);
// If it is neither, both are distinct.
if (!is_supserset && !is_subset)
return SortedTokenComparisonResult(DISTINCT);
std::vector<AddressToken> additional_tokens;
// Collect the additional tokens from the superset.
// Note, that the superset property is already assured.
std::set_symmetric_difference(
first.begin(), first.end(), second.begin(), second.end(),
std::back_inserter(additional_tokens), cmp_normalized);
if (is_supserset) {
return SortedTokenComparisonResult(additional_tokens.size() == 1
? SINGLE_TOKEN_SUPERSET
: MULTI_TOKEN_SUPERSET,
additional_tokens);
}
return SortedTokenComparisonResult(
additional_tokens.size() == 1 ? SINGLE_TOKEN_SUBSET : MULTI_TOKEN_SUBSET,
additional_tokens);
}
bool AreSortedTokensEqual(const std::vector<AddressToken>& first,
const std::vector<AddressToken>& second) {
return CompareSortedTokens(first, second).status == MATCH;
} }
std::vector<base::string16> TokenizeValue(const base::string16 value) { std::vector<AddressToken> TokenizeValue(const base::string16 value) {
// Canonicalize the value. std::vector<AddressToken> tokens;
base::string16 cannonicalized_value = NormalizeValue(value); int index = 0;
// CJK names are a special case and are tokenized by character without the // CJK names are a special case and are tokenized by character without the
// separators. // separators.
std::vector<base::string16> tokens;
if (HasCjkNameCharacteristics(base::UTF16ToUTF8(value))) { if (HasCjkNameCharacteristics(base::UTF16ToUTF8(value))) {
tokens.reserve(value.size()); tokens.reserve(value.size());
for (size_t i = 0; i < value.size(); i++) { for (size_t i = 0; i < value.size(); i++) {
base::string16 cjk_separators = base::UTF8ToUTF16("・·  "); base::string16 cjk_separators = base::UTF8ToUTF16("・·  ");
if (cjk_separators.find(value.substr(i, 1)) == base::string16::npos) { if (cjk_separators.find(value.substr(i, 1)) == base::string16::npos) {
tokens.emplace_back(value.substr(i, 1)); tokens.emplace_back(AddressToken{.value = value.substr(i, 1),
.normalized_value = value.substr(i, 1),
.position = index++});
} }
} }
} else { } else {
// Split it by white spaces and commas into non-empty values. // Split it by white spaces and commas into non-empty values.
tokens = for (const auto& token :
base::SplitString(cannonicalized_value, base::ASCIIToUTF16(", "), base::SplitString(value, base::ASCIIToUTF16(", "),
base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY)) {
tokens.emplace_back(
AddressToken{.value = token,
.normalized_value = NormalizeValue(token),
.position = index++});
}
} }
// Sort the tokens lexicographically. // Sort the tokens lexicographically by their normalized value.
std::sort(tokens.begin(), tokens.end()); std::sort(tokens.begin(), tokens.end(), [](const auto& a, const auto& b) {
return a.normalized_value < b.normalized_value;
});
return tokens; return tokens;
} }
......
...@@ -22,6 +22,15 @@ ...@@ -22,6 +22,15 @@
namespace autofill { namespace autofill {
namespace structured_address { namespace structured_address {
struct AddressToken {
// The original value.
base::string16 value;
// The normalized value.
base::string16 normalized_value;
// The token position in the original string.
int position;
};
enum class RegEx; enum class RegEx;
// Enum to express the few quantifiers needed to parse values. // Enum to express the few quantifiers needed to parse values.
...@@ -35,6 +44,40 @@ enum MatchQuantifier { ...@@ -35,6 +44,40 @@ enum MatchQuantifier {
MATCH_LAZY_OPTIONAL, MATCH_LAZY_OPTIONAL,
}; };
// The result status of comparing two sets of sorted tokens.
enum SortedTokenComparisonStatus {
// The tokens are neither the same nor super/sub sets.
DISTINCT,
// The token exactly match.
MATCH,
// The first tokens are a superset of the second with only one additional
// element.
SINGLE_TOKEN_SUPERSET,
// The second tokens are a subset of the second with only one additional
// element.
SINGLE_TOKEN_SUBSET,
// The first tokens are a superset of the second with multiple additional
// elements.
MULTI_TOKEN_SUPERSET,
// The second tokens are a subset of the second with multiple additional
// elements.
MULTI_TOKEN_SUBSET
};
// The result from comparing two sets of sorted tokens containing the status and
// the additional tokens in the super/sub sets.
struct SortedTokenComparisonResult {
explicit SortedTokenComparisonResult(
SortedTokenComparisonStatus status,
std::vector<AddressToken> additional_tokens = {});
~SortedTokenComparisonResult();
SortedTokenComparisonResult(const SortedTokenComparisonResult& other);
// The status of the token comparison.
SortedTokenComparisonStatus status = DISTINCT;
// The additional elements in the super/subsets.
std::vector<AddressToken> additional_tokens{};
};
// Options for capturing a named group using the // Options for capturing a named group using the
// |CaptureTypeWithPattern(...)| functions. // |CaptureTypeWithPattern(...)| functions.
struct CaptureOptions { struct CaptureOptions {
...@@ -176,8 +219,8 @@ std::string CaptureTypeWithPattern(const ServerFieldType& type, ...@@ -176,8 +219,8 @@ std::string CaptureTypeWithPattern(const ServerFieldType& type,
base::string16 NormalizeValue(const base::string16& value); base::string16 NormalizeValue(const base::string16& value);
// Returns true of both vectors contain the same tokens in the same order. // Returns true of both vectors contain the same tokens in the same order.
bool AreSortedTokensEqual(const std::vector<base::string16>& first, bool AreSortedTokensEqual(const std::vector<AddressToken>& first,
const std::vector<base::string16>& second); const std::vector<AddressToken>& second);
// Returns true if both strings contain the same tokens after normalization. // Returns true if both strings contain the same tokens after normalization.
bool AreStringTokenEquivalent(const base::string16& one, bool AreStringTokenEquivalent(const base::string16& one,
...@@ -186,7 +229,13 @@ bool AreStringTokenEquivalent(const base::string16& one, ...@@ -186,7 +229,13 @@ bool AreStringTokenEquivalent(const base::string16& one,
// Returns a sorted vector containing the tokens of |value| after |value| was // Returns a sorted vector containing the tokens of |value| after |value| was
// canonicalized. |value| is tokenized by splitting it by white spaces and // canonicalized. |value| is tokenized by splitting it by white spaces and
// commas. // commas.
std::vector<base::string16> TokenizeValue(const base::string16 value); std::vector<AddressToken> TokenizeValue(const base::string16 value);
// Compares two vectors of sorted AddressTokens and returns the
// SortedTokenComparisonResult;
SortedTokenComparisonResult CompareSortedTokens(
const std::vector<AddressToken>& first,
const std::vector<AddressToken>& second);
} // namespace structured_address } // namespace structured_address
......
...@@ -17,6 +17,13 @@ ...@@ -17,6 +17,13 @@
namespace autofill { namespace autofill {
namespace structured_address { namespace structured_address {
// Element-wise comparison operator.
bool operator==(const AddressToken& lhs, const AddressToken& rhs) {
return lhs.value == rhs.value &&
lhs.normalized_value == rhs.normalized_value &&
lhs.position == rhs.position;
}
// Regular expression with named capture groups for parsing US-style names. // Regular expression with named capture groups for parsing US-style names.
char kFirstMiddleLastRe[] = char kFirstMiddleLastRe[] =
"^(?P<NAME_FULL>((?P<NAME_FIRST>\\w+)\\s)?" "^(?P<NAME_FULL>((?P<NAME_FIRST>\\w+)\\s)?"
...@@ -210,16 +217,19 @@ TEST(AutofillStructuredAddressUtils, CaptureTypeWithPattern) { ...@@ -210,16 +217,19 @@ TEST(AutofillStructuredAddressUtils, CaptureTypeWithPattern) {
} }
TEST(AutofillStructuredAddressUtils, TokenizeValue) { TEST(AutofillStructuredAddressUtils, TokenizeValue) {
std::vector<base::string16> expected_tokens = { std::vector<AddressToken> expected_tokens = {
base::ASCIIToUTF16("and"), base::ASCIIToUTF16("anotherone"), {base::ASCIIToUTF16("AnD"), base::ASCIIToUTF16("and"), 1},
base::ASCIIToUTF16("value")}; {base::ASCIIToUTF16("anotherOne"), base::ASCIIToUTF16("anotherone"), 2},
{base::ASCIIToUTF16("valUe"), base::ASCIIToUTF16("value"), 0}};
EXPECT_EQ(TokenizeValue(base::ASCIIToUTF16(" valUe AnD anotherOne")), EXPECT_EQ(TokenizeValue(base::ASCIIToUTF16(" valUe AnD anotherOne")),
expected_tokens); expected_tokens);
std::vector<base::string16> expected_cjk_tokens = {base::UTF8ToUTF16("영"), std::vector<AddressToken> expected_cjk_tokens = {
base::UTF8ToUTF16("이"), {base::UTF8ToUTF16("영"), base::UTF8ToUTF16("영"), 1},
base::UTF8ToUTF16("호")}; {base::UTF8ToUTF16("이"), base::UTF8ToUTF16("이"), 0},
{base::UTF8ToUTF16("호"), base::UTF8ToUTF16("호"), 2}};
EXPECT_EQ(TokenizeValue(base::UTF8ToUTF16("이영 호")), expected_cjk_tokens); EXPECT_EQ(TokenizeValue(base::UTF8ToUTF16("이영 호")), expected_cjk_tokens);
EXPECT_EQ(TokenizeValue(base::UTF8ToUTF16("이・영호")), expected_cjk_tokens); EXPECT_EQ(TokenizeValue(base::UTF8ToUTF16("이・영호")), expected_cjk_tokens);
EXPECT_EQ(TokenizeValue(base::UTF8ToUTF16("이영 호")), expected_cjk_tokens); EXPECT_EQ(TokenizeValue(base::UTF8ToUTF16("이영 호")), expected_cjk_tokens);
...@@ -230,25 +240,5 @@ TEST(AutofillStructuredAddressUtils, NormalizeValue) { ...@@ -230,25 +240,5 @@ TEST(AutofillStructuredAddressUtils, NormalizeValue) {
base::UTF8ToUTF16("muller orber")); base::UTF8ToUTF16("muller orber"));
} }
TEST(AutofillStructuredAddressUtils, AreSortedTokensEqual) {
EXPECT_FALSE(AreSortedTokensEqual(
{base::ASCIIToUTF16("aaaa"), base::ASCIIToUTF16("bbb")},
{base::ASCIIToUTF16("aaa"), base::ASCIIToUTF16("bbb")}));
EXPECT_TRUE(AreSortedTokensEqual(
{base::ASCIIToUTF16("aaa"), base::ASCIIToUTF16("bbb")},
{base::ASCIIToUTF16("aaa"), base::ASCIIToUTF16("bbb")}));
EXPECT_FALSE(AreSortedTokensEqual(
{base::ASCIIToUTF16("aaa")},
{base::ASCIIToUTF16("aaa"), base::ASCIIToUTF16("bbb")}));
}
TEST(AutofillStructuredAddressUtils, AreStringTokenEquivalent) {
EXPECT_TRUE(AreStringTokenEquivalent(base::ASCIIToUTF16("A B C"),
base::ASCIIToUTF16("A C B")));
EXPECT_FALSE(AreStringTokenEquivalent(base::ASCIIToUTF16("A Bb C"),
base::ASCIIToUTF16("A C B")));
}
} // namespace structured_address } // namespace structured_address
} // namespace autofill } // namespace autofill
...@@ -97,6 +97,12 @@ const base::Feature kAutofillEnableSupportForMoreStructureInNames{ ...@@ -97,6 +97,12 @@ const base::Feature kAutofillEnableSupportForMoreStructureInNames{
"AutofillEnableSupportForMoreStructureInNames", "AutofillEnableSupportForMoreStructureInNames",
base::FEATURE_DISABLED_BY_DEFAULT}; base::FEATURE_DISABLED_BY_DEFAULT};
// Controls if Autofill supports merging subset names.
// TODO(crbug.com/1098943): Remove once launched.
const base::Feature kAutofillEnableSupportForMergingSubsetNames{
"AutofillEnableSupportForMergingSubsetNames",
base::FEATURE_DISABLED_BY_DEFAULT};
// Controls whether or not a minimum number of fields is required before // Controls whether or not a minimum number of fields is required before
// heuristic field type prediction is run for a form. // heuristic field type prediction is run for a form.
const base::Feature kAutofillEnforceMinRequiredFieldsForHeuristics{ const base::Feature kAutofillEnforceMinRequiredFieldsForHeuristics{
......
...@@ -35,6 +35,7 @@ extern const base::Feature kAutofillEnableAugmentedPhoneCountryCode; ...@@ -35,6 +35,7 @@ extern const base::Feature kAutofillEnableAugmentedPhoneCountryCode;
extern const base::Feature kAutofillEnableCompanyName; extern const base::Feature kAutofillEnableCompanyName;
extern const base::Feature kAutofillEnableHideSuggestionsUI; extern const base::Feature kAutofillEnableHideSuggestionsUI;
extern const base::Feature kAutofillEnableSupportForMoreStructureInNames; extern const base::Feature kAutofillEnableSupportForMoreStructureInNames;
extern const base::Feature kAutofillEnableSupportForMergingSubsetNames;
extern const base::Feature kAutofillEnforceMinRequiredFieldsForHeuristics; extern const base::Feature kAutofillEnforceMinRequiredFieldsForHeuristics;
extern const base::Feature kAutofillEnforceMinRequiredFieldsForQuery; extern const base::Feature kAutofillEnforceMinRequiredFieldsForQuery;
extern const base::Feature kAutofillEnforceMinRequiredFieldsForUpload; extern const base::Feature kAutofillEnforceMinRequiredFieldsForUpload;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment