Commit d5a7000f authored by Matthias Körber's avatar Matthias Körber Committed by Chromium LUCI CQ

[Autofill] Add ability to determine shared labels

This CL implements the ability to divide a label between a series of
fields to address the following scenario in a form:

Street Name / House Number: [             ][  ]

or

First Name & Last Name: [           ][           ]

([       ]: indicates a text input field in a form.)

Here, it is anticipated that the label is divided between the two
following fields.

For this to happen, the label is split by a list of separator characters
and words. If the field is followed by a series of unlabeled fields such
that the total number of fields exactly matches the non-empty components
of the first fields' label after the split, the label is divided.

There are additional conditions for the label sharing:
* The maximum number of fields to share a label is limited to 3.
* The maximum length of the label to be shared is limited to 40.

To store the divided labels, a new field |parseable_label_| is added to
|AutofillField| which is only supposed to be used in the heuristic
type detection.

Note, this change only makes the |parseable_label_| available,
but does not utilize this yet. This will be subject to a subsequent CL.

Change-Id: If098b2aa5a0c3c29a28fff846a4c694c64b837f5
Bug: 1167702
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2623547Reviewed-by: default avatarChristoph Schwering <schwering@google.com>
Commit-Queue: Matthias Körber <koerber@google.com>
Cr-Commit-Position: refs/heads/master@{#844596}
parent 4ce9f25c
...@@ -162,6 +162,8 @@ static_library("browser") { ...@@ -162,6 +162,8 @@ static_library("browser") {
"form_parsing/search_field.h", "form_parsing/search_field.h",
"form_parsing/travel_field.cc", "form_parsing/travel_field.cc",
"form_parsing/travel_field.h", "form_parsing/travel_field.h",
"form_processing/label_processing_util.cc",
"form_processing/label_processing_util.h",
"form_processing/name_processing_util.cc", "form_processing/name_processing_util.cc",
"form_processing/name_processing_util.h", "form_processing/name_processing_util.h",
"form_structure.cc", "form_structure.cc",
...@@ -660,6 +662,7 @@ source_set("unit_tests") { ...@@ -660,6 +662,7 @@ source_set("unit_tests") {
"form_parsing/phone_field_unittest.cc", "form_parsing/phone_field_unittest.cc",
"form_parsing/price_field_unittest.cc", "form_parsing/price_field_unittest.cc",
"form_parsing/search_field_unittest.cc", "form_parsing/search_field_unittest.cc",
"form_processing/label_processing_util_unittest.cc",
"form_processing/name_processing_util_unittest.cc", "form_processing/name_processing_util_unittest.cc",
"form_structure_unittest.cc", "form_structure_unittest.cc",
"geo/address_i18n_unittest.cc", "geo/address_i18n_unittest.cc",
......
...@@ -30,7 +30,8 @@ AutofillField::AutofillField(const FormFieldData& field, ...@@ -30,7 +30,8 @@ AutofillField::AutofillField(const FormFieldData& field,
const base::string16& unique_name) const base::string16& unique_name)
: FormFieldData(field), : FormFieldData(field),
unique_name_(unique_name), unique_name_(unique_name),
parseable_name_(field.name) { parseable_name_(field.name),
parseable_label_(field.label) {
field_signature_ = field_signature_ =
CalculateFieldSignatureByNameAndType(name, form_control_type); CalculateFieldSignatureByNameAndType(name, form_control_type);
} }
......
...@@ -74,6 +74,7 @@ class AutofillField : public FormFieldData { ...@@ -74,6 +74,7 @@ class AutofillField : public FormFieldData {
PhonePart phone_part() const { return phone_part_; } PhonePart phone_part() const { return phone_part_; }
bool previously_autofilled() const { return previously_autofilled_; } bool previously_autofilled() const { return previously_autofilled_; }
const base::string16& parseable_name() const { return parseable_name_; } const base::string16& parseable_name() const { return parseable_name_; }
const base::string16& parseable_label() const { return parseable_label_; }
bool only_fill_when_focused() const { return only_fill_when_focused_; } bool only_fill_when_focused() const { return only_fill_when_focused_; }
// Setters for the detected types. // Setters for the detected types.
...@@ -107,6 +108,9 @@ class AutofillField : public FormFieldData { ...@@ -107,6 +108,9 @@ class AutofillField : public FormFieldData {
void set_parseable_name(const base::string16& parseable_name) { void set_parseable_name(const base::string16& parseable_name) {
parseable_name_ = parseable_name; parseable_name_ = parseable_name;
} }
void set_parseable_label(const base::string16& parseable_label) {
parseable_label_ = parseable_label;
}
void set_only_fill_when_focused(bool fill_when_focused) { void set_only_fill_when_focused(bool fill_when_focused) {
only_fill_when_focused_ = fill_when_focused; only_fill_when_focused_ = fill_when_focused;
...@@ -274,6 +278,10 @@ class AutofillField : public FormFieldData { ...@@ -274,6 +278,10 @@ class AutofillField : public FormFieldData {
// parsing. // parsing.
base::string16 parseable_name_; base::string16 parseable_name_;
// The parseable label attribute is potentially only a part of the original
// label when the label is divided between subsequent fields.
base::string16 parseable_label_;
// The type of password generation event, if it happened. // The type of password generation event, if it happened.
AutofillUploadContents::Field::PasswordGenerationType generation_type_ = AutofillUploadContents::Field::PasswordGenerationType generation_type_ =
AutofillUploadContents::Field::NO_GENERATION; AutofillUploadContents::Field::NO_GENERATION;
......
// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/core/browser/form_processing/label_processing_util.h"
#include "base/ranges/algorithm.h"
#include "base/strings/string_split.h"
#include "base/strings/utf_string_conversions.h"
namespace autofill {
using LabelPieces = std::vector<base::StringPiece16>;
// The maximum number of fields that can share a label.
const int kMaxNumberOfFieldsToShareALabel = 3;
// The maximum length of a label that can be shared among fields.
const int kMaxLengthOfShareableLabel = 40;
base::Optional<std::vector<base::string16>> GetParseableLabels(
const LabelPieces& labels) {
// Make a copy of the labels.
LabelPieces shared_labels = labels;
// Tracks if at least one shared label was found.
bool shared_labels_found = false;
// The index of the current field that may be eligible to share its label with
// the subsequent fields.
size_t label_index = 0;
while (label_index < labels.size()) {
const auto& label = labels.at(label_index);
// If the label is empty or has a size that exceeds
// |kMaxLengthOfShareableLabel| it can not be shared with subsequent fields.
if (label.empty() || label.size() > kMaxLengthOfShareableLabel) {
++label_index;
continue;
}
// Otherwise search if the subsequent fields are empty.
size_t scan_index = label_index + 1;
while (scan_index < labels.size()) {
if (!labels.at(scan_index).empty()) {
break;
}
++scan_index;
}
// After the loop, the |scan_index| points to the first subsequent field
// that does not have an empty label or is the first out-of-bound index.
// Calculate the number of fields that may share a label.
size_t fields_to_share_label = scan_index - label_index;
// Remember the current index and increment it to continue with the next
// non-empty field.
size_t shared_label_starting_index = label_index;
label_index = scan_index;
// Determine if there is the correct number of fields that may share a
// label.
if (fields_to_share_label == 1 ||
fields_to_share_label > kMaxNumberOfFieldsToShareALabel) {
continue;
}
// Otherwise, try to split the label by single character separators.
LabelPieces label_components = base::SplitStringPiece(
label, base::ASCIIToUTF16("/,&-"), base::TRIM_WHITESPACE,
base::SPLIT_WANT_NONEMPTY);
// If the number of components does not match, try to split by common
// separating words.
if (label_components.size() != fields_to_share_label) {
for (const char* word : {" and ", " und ", " et ", " y "}) {
label_components = base::SplitStringPieceUsingSubstr(
label, base::ASCIIToUTF16(word), base::TRIM_WHITESPACE,
base::SPLIT_WANT_NONEMPTY);
if (label_components.size() == fields_to_share_label)
break;
}
}
// Continue to the next field if the right number of components has not
// been found.
if (label_components.size() != fields_to_share_label)
continue;
shared_labels_found = true;
// Otherwise assign the label components to the fields.
for (size_t i = 0; i < label_components.size(); ++i) {
shared_labels[shared_label_starting_index + i] = label_components.at(i);
}
}
if (!shared_labels_found) {
return base::nullopt;
}
// Otherwise convert the shared label string pieces into strings for memory
// safety.
std::vector<base::string16> result;
result.reserve(shared_labels.size());
base::ranges::transform(shared_labels, std::back_inserter(result),
[](auto& s) { return base::string16(s); });
return base::make_optional(std::move(result));
}
} // namespace autofill
// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_AUTOFILL_CORE_BROWSER_FORM_PROCESSING_LABEL_PROCESSING_UTIL_H_
#define COMPONENTS_AUTOFILL_CORE_BROWSER_FORM_PROCESSING_LABEL_PROCESSING_UTIL_H_
#include <vector>
#include "base/optional.h"
#include "base/strings/string_piece.h"
namespace autofill {
// If parseable labels can be derived from |labels|, a vector of
// |base::string16| is return that is aligned with |labels|.
// Parseable labels can be derived by splitting one label between multiple
// adjacent fields. If there aren't any changes to the labels, |base::nullopt|
// is returned.
base::Optional<std::vector<base::string16>> GetParseableLabels(
const std::vector<base::StringPiece16>& labels);
} // namespace autofill
#endif // COMPONENTS_AUTOFILL_CORE_BROWSER_FORM_PROCESSING_LABEL_PROCESSING_UTIL_H_
// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/core/browser/form_processing/label_processing_util.h"
#include "base/feature_list.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/scoped_feature_list.h"
#include "components/autofill/core/common/autofill_features.h"
#include "testing/gtest/include/gtest/gtest.h"
using base::ASCIIToUTF16;
namespace {
std::vector<base::StringPiece16> StringsToStringPieces(
const std::vector<base::string16>& strings) {
std::vector<base::StringPiece16> string_pieces;
for (const auto& s : strings) {
string_pieces.emplace_back(base::StringPiece16(s));
}
return string_pieces;
}
} // namespace
namespace autofill {
TEST(LabelProcessingUtil, GetParseableNameStringPieces) {
std::vector<base::string16> labels;
labels.push_back(ASCIIToUTF16("City"));
labels.push_back(ASCIIToUTF16("Street & House Number"));
labels.push_back(ASCIIToUTF16(""));
labels.push_back(ASCIIToUTF16("Zip"));
auto expectation = base::make_optional(std::vector<base::string16>());
expectation->push_back(ASCIIToUTF16("City"));
expectation->push_back(ASCIIToUTF16("Street"));
expectation->push_back(ASCIIToUTF16("House Number"));
expectation->push_back(ASCIIToUTF16("Zip"));
EXPECT_EQ(GetParseableLabels(StringsToStringPieces(labels)), expectation);
}
TEST(LabelProcessingUtil, GetParseableNameStringPieces_ThreeComponents) {
std::vector<base::string16> labels;
labels.push_back(ASCIIToUTF16("City"));
labels.push_back(ASCIIToUTF16("Street & House Number & Floor"));
labels.push_back(ASCIIToUTF16(""));
labels.push_back(ASCIIToUTF16(""));
labels.push_back(ASCIIToUTF16("Zip"));
auto expectation = base::make_optional(std::vector<base::string16>());
expectation->push_back(ASCIIToUTF16("City"));
expectation->push_back(ASCIIToUTF16("Street"));
expectation->push_back(ASCIIToUTF16("House Number"));
expectation->push_back(ASCIIToUTF16("Floor"));
expectation->push_back(ASCIIToUTF16("Zip"));
EXPECT_EQ(GetParseableLabels(StringsToStringPieces(labels)), expectation);
}
TEST(LabelProcessingUtil, GetParseableNameStringPieces_TooManyComponents) {
std::vector<base::string16> labels;
labels.push_back(ASCIIToUTF16("City"));
labels.push_back(ASCIIToUTF16("Street & House Number & Floor & Stairs"));
labels.push_back(ASCIIToUTF16(""));
labels.push_back(ASCIIToUTF16(""));
labels.push_back(ASCIIToUTF16(""));
labels.push_back(ASCIIToUTF16("Zip"));
base::Optional<std::vector<base::string16>> expectation = base::nullopt;
;
EXPECT_EQ(GetParseableLabels(StringsToStringPieces(labels)), expectation);
}
TEST(LabelProcessingUtil, GetParseableNameStringPieces_UnmachtingComponents) {
std::vector<base::string16> labels;
labels.push_back(ASCIIToUTF16("City"));
labels.push_back(ASCIIToUTF16("Street & House Number & Floor"));
labels.push_back(ASCIIToUTF16(""));
labels.push_back(ASCIIToUTF16("Zip"));
base::Optional<std::vector<base::string16>> expectation = base::nullopt;
EXPECT_EQ(GetParseableLabels(StringsToStringPieces(labels)), expectation);
}
TEST(LabelProcessingUtil, GetParseableNameStringPieces_SplitableLabelAtEnd) {
std::vector<base::string16> labels;
labels.push_back(ASCIIToUTF16("City"));
labels.push_back(ASCIIToUTF16(""));
labels.push_back(ASCIIToUTF16("Zip"));
labels.push_back(ASCIIToUTF16("Street & House Number & Floor"));
base::Optional<std::vector<base::string16>> expectation = base::nullopt;
EXPECT_EQ(GetParseableLabels(StringsToStringPieces(labels)), expectation);
}
TEST(LabelProcessingUtil, GetParseableNameStringPieces_TooLongLabel) {
std::vector<base::string16> labels;
labels.push_back(ASCIIToUTF16("City"));
labels.push_back(
ASCIIToUTF16("Street & House Number with a lot of additional text that "
"exceeds 40 characters by far"));
labels.push_back(ASCIIToUTF16(""));
labels.push_back(ASCIIToUTF16("Zip"));
base::Optional<std::vector<base::string16>> expectation = base::nullopt;
EXPECT_EQ(GetParseableLabels(StringsToStringPieces(labels)), expectation);
}
} // namespace autofill
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "components/autofill/core/browser/field_types.h" #include "components/autofill/core/browser/field_types.h"
#include "components/autofill/core/browser/form_parsing/field_candidates.h" #include "components/autofill/core/browser/form_parsing/field_candidates.h"
#include "components/autofill/core/browser/form_parsing/form_field.h" #include "components/autofill/core/browser/form_parsing/form_field.h"
#include "components/autofill/core/browser/form_processing/label_processing_util.h"
#include "components/autofill/core/browser/form_processing/name_processing_util.h" #include "components/autofill/core/browser/form_processing/name_processing_util.h"
#include "components/autofill/core/browser/logging/log_manager.h" #include "components/autofill/core/browser/logging/log_manager.h"
#include "components/autofill/core/browser/randomized_encoder.h" #include "components/autofill/core/browser/randomized_encoder.h"
...@@ -2385,11 +2386,47 @@ void FormStructure::ProcessExtractedFields() { ...@@ -2385,11 +2386,47 @@ void FormStructure::ProcessExtractedFields() {
// Extracts the |parseable_name_| by removing common affixes from the // Extracts the |parseable_name_| by removing common affixes from the
// field names. // field names.
ExtractParseableFieldNames(); ExtractParseableFieldNames();
// Extracts the |parsable_label_| for each field.
ExtractParseableFieldLabels();
}
void FormStructure::ExtractParseableFieldLabels() {
std::vector<base::StringPiece16> field_labels;
field_labels.reserve(field_count());
for (const auto& field : *this) {
// Skip fields that are not a text input or not visible.
if (!field->IsTextInputElement() || !field->IsVisible()) {
continue;
}
field_labels.push_back(field->label);
}
// Determine the parsable labels and write them back.
base::Optional<std::vector<base::string16>> parsable_labels =
GetParseableLabels(field_labels);
// If not single label was split, the function can return, because the
// |parsable_label_| is assigned to |label| by default.
if (!parsable_labels.has_value()) {
return;
}
size_t idx = 0;
for (auto& field : *this) {
if (!field->IsTextInputElement() || !field->IsVisible()) {
// For those fields, set the original label.
field->set_parseable_label(field->label);
continue;
}
DCHECK(idx < parsable_labels->size());
field->set_parseable_label(parsable_labels->at(idx++));
}
} }
void FormStructure::ExtractParseableFieldNames() { void FormStructure::ExtractParseableFieldNames() {
// Create a vector of string pieces containing the field names. // Create a vector of string pieces containing the field names.
std::vector<base::StringPiece16> names; std::vector<base::StringPiece16> names;
names.reserve(field_count());
for (const auto& field : *this) { for (const auto& field : *this) {
names.push_back(base::StringPiece16(field->name)); names.push_back(base::StringPiece16(field->name));
} }
......
...@@ -523,6 +523,10 @@ class FormStructure { ...@@ -523,6 +523,10 @@ class FormStructure {
// Extracts the parseable field name by removing a common affix. // Extracts the parseable field name by removing a common affix.
void ExtractParseableFieldNames(); void ExtractParseableFieldNames();
// Extract parseable field labels by potentially splitting labels between
// adjacent fields.
void ExtractParseableFieldLabels();
// The language detected for this form's page, before any translations // The language detected for this form's page, before any translations
// performed by Chrome. // performed by Chrome.
LanguageCode current_page_language_; LanguageCode current_page_language_;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment