Commit 07a7b5a5 authored by Vaclav Brozek's avatar Vaclav Brozek Committed by Commit Bot

Username predictions into FormData

Username predictions list text field from a form in a descending
likelihood that they are usernames. The predictions are obtained by
running a classifier (locally) on the DOM tree containing the form. They
are represented as a vector of unique renderer ids of the identified
fields.

This CL makes the username predictions part of FormData and teaches the
new FormData -> PasswordForm parser to use them.

FormData serves both address autofill and password autofill, yet
username predictions are only relevant to the latter. In this sense,
password autofill is polluting the struct for address autofill. However,
given the close relationship between the predictions and the actual
FormFieldData they refer to, and given the not-so-high overhead of
passing around an empty vector (compared to the overall size of
FormData), encapsulating the predictions with FormData was chosen as the
approach to go with.

Bug: 845426
Change-Id: I92ca2eb8d2a24d08541e4878a0732092a473c74f
Reviewed-on: https://chromium-review.googlesource.com/1101027
Commit-Queue: Vaclav Brozek <vabr@chromium.org>
Reviewed-by: default avatarRobert Sesek <rsesek@chromium.org>
Reviewed-by: default avatarVadym Doroshenko <dvadym@chromium.org>
Cr-Commit-Position: refs/heads/master@{#568393}
parent 15cac611
...@@ -143,6 +143,7 @@ struct FormData { ...@@ -143,6 +143,7 @@ struct FormData {
bool is_formless_checkout; bool is_formless_checkout;
uint32 unique_renderer_id; uint32 unique_renderer_id;
array<FormFieldData> fields; array<FormFieldData> fields;
array<uint32> username_predictions;
}; };
// autofill::FormFieldDataPredictions // autofill::FormFieldDataPredictions
......
...@@ -615,6 +615,9 @@ bool StructTraits<autofill::mojom::FormDataDataView, autofill::FormData>::Read( ...@@ -615,6 +615,9 @@ bool StructTraits<autofill::mojom::FormDataDataView, autofill::FormData>::Read(
if (!data.ReadFields(&out->fields)) if (!data.ReadFields(&out->fields))
return false; return false;
if (!data.ReadUsernamePredictions(&out->username_predictions))
return false;
return true; return true;
} }
......
...@@ -264,6 +264,11 @@ struct StructTraits<autofill::mojom::FormDataDataView, autofill::FormData> { ...@@ -264,6 +264,11 @@ struct StructTraits<autofill::mojom::FormDataDataView, autofill::FormData> {
return r.fields; return r.fields;
} }
static const std::vector<uint32_t>& username_predictions(
const autofill::FormData& r) {
return r.username_predictions;
}
static bool Read(autofill::mojom::FormDataDataView data, static bool Read(autofill::mojom::FormDataDataView data,
autofill::FormData* out); autofill::FormData* out);
}; };
......
...@@ -341,6 +341,7 @@ TEST_F(AutofillTypeTraitsTestImpl, PassFormFieldData) { ...@@ -341,6 +341,7 @@ TEST_F(AutofillTypeTraitsTestImpl, PassFormFieldData) {
TEST_F(AutofillTypeTraitsTestImpl, PassFormData) { TEST_F(AutofillTypeTraitsTestImpl, PassFormData) {
FormData input; FormData input;
test::CreateTestAddressFormData(&input); test::CreateTestAddressFormData(&input);
input.username_predictions = {1, 13, 2};
base::RunLoop loop; base::RunLoop loop;
mojom::TypeTraitsTestPtr proxy = GetTypeTraitsTestProxy(); mojom::TypeTraitsTestPtr proxy = GetTypeTraitsTestProxy();
......
...@@ -10,7 +10,6 @@ ...@@ -10,7 +10,6 @@
#include <set> #include <set>
#include <string> #include <string>
#include "base/containers/flat_set.h"
#include "base/i18n/case_conversion.h" #include "base/i18n/case_conversion.h"
#include "base/lazy_instance.h" #include "base/lazy_instance.h"
#include "base/macros.h" #include "base/macros.h"
...@@ -406,20 +405,15 @@ bool IsEnabledPasswordFieldPresent(const std::vector<FormFieldData>& fields) { ...@@ -406,20 +405,15 @@ bool IsEnabledPasswordFieldPresent(const std::vector<FormFieldData>& fields) {
// Find the first element in |username_predictions| (i.e. the most reliable // Find the first element in |username_predictions| (i.e. the most reliable
// prediction) that occurs in |possible_usernames|. // prediction) that occurs in |possible_usernames|.
const FormFieldData* FindUsernameInPredictions( const FormFieldData* FindUsernameInPredictions(
const std::vector<const FormFieldData*>& username_predictions, const std::vector<uint32_t>& username_predictions,
const std::vector<const FormFieldData*>& possible_usernames) { const std::vector<const FormFieldData*>& possible_usernames) {
// To speed-up the matching for-loop below, convert |possible_usernames| to a for (uint32_t predicted_id : username_predictions) {
// set. Creating is O(N log N) for N=possible_usernames.size(). Retrieval is auto iter =
// O(log N), so the whole for-loop is O(M log N) for std::find_if(possible_usernames.begin(), possible_usernames.end(),
// M=username_predictions.size(). Use flat_set, because of cache locality (the [predicted_id](const FormFieldData* field) {
// M and N are likely small, so this can make a difference) and less heap return field->unique_renderer_id == predicted_id;
// allocations. });
const base::flat_set<const FormFieldData*> usernames( if (iter != possible_usernames.end()) {
possible_usernames.begin(), possible_usernames.end());
for (const FormFieldData* prediction : username_predictions) {
auto iter = usernames.find(prediction);
if (iter != usernames.end()) {
return *iter; return *iter;
} }
} }
...@@ -430,11 +424,11 @@ const FormFieldData* FindUsernameInPredictions( ...@@ -430,11 +424,11 @@ const FormFieldData* FindUsernameInPredictions(
// elements of the form, |form_data| should be the already extracted FormData // elements of the form, |form_data| should be the already extracted FormData
// representation of that form. |username_detector_cache| is optional, and can // representation of that form. |username_detector_cache| is optional, and can
// be used to spare recomputation if called multiple times for the same form. // be used to spare recomputation if called multiple times for the same form.
std::vector<const FormFieldData*> GetUsernamePredictions( std::vector<uint32_t> GetUsernamePredictions(
const std::vector<blink::WebFormControlElement>& control_elements, const std::vector<blink::WebFormControlElement>& control_elements,
const FormData& form_data, const FormData& form_data,
UsernameDetectorCache* username_detector_cache) { UsernameDetectorCache* username_detector_cache) {
std::vector<const FormFieldData*> username_predictions; std::vector<uint32_t> username_predictions;
// Dummy cache stores the predictions in case no real cache was passed to // Dummy cache stores the predictions in case no real cache was passed to
// here. // here.
UsernameDetectorCache dummy_cache; UsernameDetectorCache dummy_cache;
...@@ -445,21 +439,8 @@ std::vector<const FormFieldData*> GetUsernamePredictions( ...@@ -445,21 +439,8 @@ std::vector<const FormFieldData*> GetUsernamePredictions(
GetPredictionsFieldBasedOnHtmlAttributes(control_elements, form_data, GetPredictionsFieldBasedOnHtmlAttributes(control_elements, form_data,
username_detector_cache); username_detector_cache);
username_predictions.reserve(username_predictions_dom.size()); username_predictions.reserve(username_predictions_dom.size());
// Convert the DOM elements to FormFieldData.
std::map<uint32_t, const FormFieldData*> id_to_fields;
for (const FormFieldData& field : form_data.fields) {
auto insert_result =
id_to_fields.insert({field.unique_renderer_id, &field});
DCHECK(insert_result.second) << "Unique ID is not unique.";
}
for (const WebInputElement& element : username_predictions_dom) { for (const WebInputElement& element : username_predictions_dom) {
std::map<uint32_t, const FormFieldData*>::const_iterator prediction_it = username_predictions.push_back(element.UniqueRendererFormControlId());
id_to_fields.find(element.UniqueRendererFormControlId());
// Note: some of the |element|s may not have an equivalent in
// |form_data.fields|, e.g., because those are not autofillable.
if (prediction_it != id_to_fields.end())
username_predictions.push_back(prediction_it->second);
} }
return username_predictions; return username_predictions;
} }
...@@ -483,11 +464,10 @@ bool GetPasswordForm( ...@@ -483,11 +464,10 @@ bool GetPasswordForm(
return false; return false;
// Evaluate the context of the fields. // Evaluate the context of the fields.
std::vector<const FormFieldData*> username_predictions;
if (base::FeatureList::IsEnabled( if (base::FeatureList::IsEnabled(
password_manager::features::kHtmlBasedUsernameDetector)) { password_manager::features::kHtmlBasedUsernameDetector)) {
username_predictions = GetUsernamePredictions(control_elements, form_data, password_form->form_data.username_predictions = GetUsernamePredictions(
username_detector_cache); control_elements, form_data, username_detector_cache);
} }
// Narrow the scope to enabled inputs. // Narrow the scope to enabled inputs.
...@@ -664,8 +644,8 @@ bool GetPasswordForm( ...@@ -664,8 +644,8 @@ bool GetPasswordForm(
// Use HTML based username detector only if neither server predictions nor // Use HTML based username detector only if neither server predictions nor
// autocomplete attributes were useful to detect the username. // autocomplete attributes were useful to detect the username.
if (!predicted_username_field && !username_by_attribute) { if (!predicted_username_field && !username_by_attribute) {
username_field_by_context = username_field_by_context = FindUsernameInPredictions(
FindUsernameInPredictions(username_predictions, plausible_usernames); form_data.username_predictions, plausible_usernames);
} }
} }
......
...@@ -78,7 +78,8 @@ FormData::FormData(const FormData& data) ...@@ -78,7 +78,8 @@ FormData::FormData(const FormData& data)
is_form_tag(data.is_form_tag), is_form_tag(data.is_form_tag),
is_formless_checkout(data.is_formless_checkout), is_formless_checkout(data.is_formless_checkout),
unique_renderer_id(data.unique_renderer_id), unique_renderer_id(data.unique_renderer_id),
fields(data.fields) {} fields(data.fields),
username_predictions(data.username_predictions) {}
FormData::~FormData() { FormData::~FormData() {
} }
...@@ -114,7 +115,8 @@ bool FormData::operator==(const FormData& form) const { ...@@ -114,7 +115,8 @@ bool FormData::operator==(const FormData& form) const {
unique_renderer_id == form.unique_renderer_id && unique_renderer_id == form.unique_renderer_id &&
is_form_tag == form.is_form_tag && is_form_tag == form.is_form_tag &&
is_formless_checkout == form.is_formless_checkout && is_formless_checkout == form.is_formless_checkout &&
fields == form.fields; fields == form.fields &&
username_predictions == form.username_predictions;
} }
bool FormData::operator!=(const FormData& form) const { bool FormData::operator!=(const FormData& form) const {
......
...@@ -61,6 +61,12 @@ struct FormData { ...@@ -61,6 +61,12 @@ struct FormData {
uint32_t unique_renderer_id = kNotSetFormRendererId; uint32_t unique_renderer_id = kNotSetFormRendererId;
// A vector of all the input fields in the form. // A vector of all the input fields in the form.
std::vector<FormFieldData> fields; std::vector<FormFieldData> fields;
// Contains unique renderer IDs of text elements which are predicted to be
// usernames. The order matters: elements are sorted in descending likelihood
// of being a username (the first one is the most likely username). Can
// contain IDs of elements which are not in |fields|. This is only used during
// parsing into PasswordForm, and hence not serialised for storage.
std::vector<uint32_t> username_predictions;
}; };
// For testing. // For testing.
......
...@@ -33,6 +33,7 @@ static_library("form_parsing") { ...@@ -33,6 +33,7 @@ static_library("form_parsing") {
"//components/autofill/core/browser", "//components/autofill/core/browser",
"//components/autofill/core/browser/proto", "//components/autofill/core/browser/proto",
"//components/autofill/core/common", "//components/autofill/core/common",
"//components/password_manager/core/common",
] ]
} }
......
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
#include "components/password_manager/core/browser/form_parsing/form_parser.h" #include "components/password_manager/core/browser/form_parsing/form_parser.h"
#include <stdint.h>
#include <algorithm> #include <algorithm>
#include <iterator> #include <iterator>
#include <set> #include <set>
...@@ -14,6 +16,7 @@ ...@@ -14,6 +16,7 @@
#include "base/strings/string_split.h" #include "base/strings/string_split.h"
#include "components/autofill/core/common/form_data.h" #include "components/autofill/core/common/form_data.h"
#include "components/autofill/core/common/password_form.h" #include "components/autofill/core/common/password_form.h"
#include "components/password_manager/core/common/password_manager_features.h"
using autofill::FieldPropertiesFlags; using autofill::FieldPropertiesFlags;
using autofill::FormFieldData; using autofill::FormFieldData;
...@@ -376,9 +379,14 @@ const FormFieldData* FindUsernameFieldBaseHeuristics( ...@@ -376,9 +379,14 @@ const FormFieldData* FindUsernameFieldBaseHeuristics(
return focusable_username ? focusable_username : username; return focusable_username ? focusable_username : username;
} }
// Tries to find the username and password fields in |processed_fields| based on
// the structure (how the fields are ordered). If |mode| is SAVING, only
// consideres non-empty fields. If |username_hint| is not null, it is returned
// as the username.
std::unique_ptr<ParseResult> ParseUsingBaseHeuristics( std::unique_ptr<ParseResult> ParseUsingBaseHeuristics(
const std::vector<ProcessedField>& processed_fields, const std::vector<ProcessedField>& processed_fields,
FormParsingMode mode) { FormParsingMode mode,
const FormFieldData* username_hint) {
// What is the best interactability among passwords? // What is the best interactability among passwords?
Interactability password_max = Interactability::kUnlikely; Interactability password_max = Interactability::kUnlikely;
for (const ProcessedField& processed_field : processed_fields) { for (const ProcessedField& processed_field : processed_fields) {
...@@ -402,6 +410,12 @@ std::unique_ptr<ParseResult> ParseUsingBaseHeuristics( ...@@ -402,6 +410,12 @@ std::unique_ptr<ParseResult> ParseUsingBaseHeuristics(
if (result->IsEmpty()) if (result->IsEmpty())
return nullptr; return nullptr;
if (username_hint &&
!(mode == FormParsingMode::SAVING && username_hint->value.empty())) {
result->username_field = username_hint;
return result;
}
// What is the best interactability among text fields preceding the passwords? // What is the best interactability among text fields preceding the passwords?
Interactability username_max = Interactability::kUnlikely; Interactability username_max = Interactability::kUnlikely;
for (auto it = processed_fields.begin(); it != first_relevant_password; for (auto it = processed_fields.begin(); it != first_relevant_password;
...@@ -504,6 +518,24 @@ std::vector<ProcessedField> ProcessFields( ...@@ -504,6 +518,24 @@ std::vector<ProcessedField> ProcessFields(
return result; return result;
} }
// Find the first element in |username_predictions| (i.e. the most reliable
// prediction) that occurs in |processed_fields|.
const FormFieldData* FindUsernameInPredictions(
const std::vector<uint32_t>& username_predictions,
const std::vector<ProcessedField>& processed_fields) {
for (uint32_t predicted_id : username_predictions) {
auto iter = std::find_if(
processed_fields.begin(), processed_fields.end(),
[predicted_id](const ProcessedField& processed_field) {
return processed_field.field->unique_renderer_id == predicted_id;
});
if (iter != processed_fields.end()) {
return iter->field;
}
}
return nullptr;
}
} // namespace } // namespace
std::unique_ptr<PasswordForm> ParseFormData( std::unique_ptr<PasswordForm> ParseFormData(
...@@ -546,9 +578,17 @@ std::unique_ptr<PasswordForm> ParseFormData( ...@@ -546,9 +578,17 @@ std::unique_ptr<PasswordForm> ParseFormData(
return result; return result;
} }
// Try to find the username based on the context of the fields.
const FormFieldData* username_field_by_context = nullptr;
if (base::FeatureList::IsEnabled(
password_manager::features::kHtmlBasedUsernameDetector)) {
username_field_by_context = FindUsernameInPredictions(
form_data.username_predictions, processed_fields);
}
// Try to parse with base heuristic. // Try to parse with base heuristic.
auto base_heuristics_parse_result = auto base_heuristics_parse_result = ParseUsingBaseHeuristics(
ParseUsingBaseHeuristics(processed_fields, mode); processed_fields, mode, username_field_by_context);
if (base_heuristics_parse_result) { if (base_heuristics_parse_result) {
SetFields(*base_heuristics_parse_result, result.get()); SetFields(*base_heuristics_parse_result, result.get());
return result; return result;
......
...@@ -66,6 +66,9 @@ struct FieldDataDescription { ...@@ -66,6 +66,9 @@ struct FieldDataDescription {
const char* name = kNonimportantValue; const char* name = kNonimportantValue;
const char* form_control_type = "text"; const char* form_control_type = "text";
PasswordFieldPrediction prediction = {.type = autofill::MAX_VALID_FIELD_TYPE}; PasswordFieldPrediction prediction = {.type = autofill::MAX_VALID_FIELD_TYPE};
// If not -1, indicates on which rank among predicted usernames this should
// be. Unused ranks will be padded with unique IDs (not found in any fields).
int predicted_username = -1;
}; };
// Describes a test case for the parser. // Describes a test case for the parser.
...@@ -81,7 +84,7 @@ struct FormParsingTestCase { ...@@ -81,7 +84,7 @@ struct FormParsingTestCase {
// Returns numbers which are distinct from each other within the scope of one // Returns numbers which are distinct from each other within the scope of one
// test. // test.
uint32_t GetUniqueId() { uint32_t GetUniqueId() {
static uint32_t counter = 0; static uint32_t counter = 10;
return counter++; return counter++;
} }
...@@ -191,6 +194,19 @@ FormData GetFormDataAndExpectation( ...@@ -191,6 +194,19 @@ FormData GetFormDataAndExpectation(
if (field_description.prediction.type != autofill::MAX_VALID_FIELD_TYPE) { if (field_description.prediction.type != autofill::MAX_VALID_FIELD_TYPE) {
(*predictions)[unique_id] = field_description.prediction; (*predictions)[unique_id] = field_description.prediction;
} }
if (field_description.predicted_username >= 0) {
size_t index = static_cast<size_t>(field_description.predicted_username);
if (form_data.username_predictions.size() <= index)
form_data.username_predictions.resize(index + 1);
form_data.username_predictions[index] = field.unique_renderer_id;
}
}
// Fill unused ranks in predictions with fresh IDs to check that those are
// correctly ignored. In real situation, this might correspond, e.g., to
// fields which were not fillable and hence dropped from the selection.
for (uint32_t& id : form_data.username_predictions) {
if (id == 0)
id = GetUniqueId();
} }
return form_data; return form_data;
} }
...@@ -1117,6 +1133,71 @@ TEST(FormParserTest, AllPossiblePasswords) { ...@@ -1117,6 +1133,71 @@ TEST(FormParserTest, AllPossiblePasswords) {
}); });
} }
TEST(FormParserTest, UsernamePredictions) {
CheckTestData({
{
"Username prediction overrides structure",
{
{.role = ElementRole::USERNAME,
.form_control_type = "text",
.predicted_username = 0},
{.form_control_type = "text"},
{.role = ElementRole::CURRENT_PASSWORD,
.form_control_type = "password"},
},
},
{
"Username prediction does not override structure if empty and mode "
"is SAVING",
{
{.role = ElementRole::USERNAME_FILLING,
.form_control_type = "text",
.predicted_username = 2,
.value = ""},
{.role = ElementRole::USERNAME_SAVING,
.form_control_type = "text"},
{.role = ElementRole::CURRENT_PASSWORD,
.form_control_type = "password"},
},
},
{
"Username prediction does not override autocomplete analysis",
{
{.form_control_type = "text", .predicted_username = 0},
{.role = ElementRole::USERNAME,
.form_control_type = "text",
.autocomplete_attribute = "username"},
{.role = ElementRole::CURRENT_PASSWORD,
.form_control_type = "password",
.autocomplete_attribute = "current-password"},
},
},
{
"Username prediction does not override server hints",
{
{.role = ElementRole::USERNAME,
.form_control_type = "text",
.prediction = {.type = autofill::USERNAME_AND_EMAIL_ADDRESS}},
{.form_control_type = "text", .predicted_username = 0},
{.role = ElementRole::CURRENT_PASSWORD,
.prediction = {.type = autofill::PASSWORD},
.form_control_type = "password"},
},
},
{
"Username prediction order matters",
{
{.role = ElementRole::USERNAME,
.form_control_type = "text",
.predicted_username = 1},
{.form_control_type = "text", .predicted_username = 4},
{.role = ElementRole::CURRENT_PASSWORD,
.form_control_type = "password"},
},
},
});
}
} // namespace } // namespace
} // namespace password_manager } // namespace password_manager
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment