Commit 07a7b5a5 authored by Vaclav Brozek's avatar Vaclav Brozek Committed by Commit Bot

Username predictions into FormData

Username predictions list text field from a form in a descending
likelihood that they are usernames. The predictions are obtained by
running a classifier (locally) on the DOM tree containing the form. They
are represented as a vector of unique renderer ids of the identified
fields.

This CL makes the username predictions part of FormData and teaches the
new FormData -> PasswordForm parser to use them.

FormData serves both address autofill and password autofill, yet
username predictions are only relevant to the latter. In this sense,
password autofill is polluting the struct for address autofill. However,
given the close relationship between the predictions and the actual
FormFieldData they refer to, and given the not-so-high overhead of
passing around an empty vector (compared to the overall size of
FormData), encapsulating the predictions with FormData was chosen as the
approach to go with.

Bug: 845426
Change-Id: I92ca2eb8d2a24d08541e4878a0732092a473c74f
Reviewed-on: https://chromium-review.googlesource.com/1101027
Commit-Queue: Vaclav Brozek <vabr@chromium.org>
Reviewed-by: default avatarRobert Sesek <rsesek@chromium.org>
Reviewed-by: default avatarVadym Doroshenko <dvadym@chromium.org>
Cr-Commit-Position: refs/heads/master@{#568393}
parent 15cac611
......@@ -143,6 +143,7 @@ struct FormData {
bool is_formless_checkout;
uint32 unique_renderer_id;
array<FormFieldData> fields;
array<uint32> username_predictions;
};
// autofill::FormFieldDataPredictions
......
......@@ -615,6 +615,9 @@ bool StructTraits<autofill::mojom::FormDataDataView, autofill::FormData>::Read(
if (!data.ReadFields(&out->fields))
return false;
if (!data.ReadUsernamePredictions(&out->username_predictions))
return false;
return true;
}
......
......@@ -264,6 +264,11 @@ struct StructTraits<autofill::mojom::FormDataDataView, autofill::FormData> {
return r.fields;
}
static const std::vector<uint32_t>& username_predictions(
const autofill::FormData& r) {
return r.username_predictions;
}
static bool Read(autofill::mojom::FormDataDataView data,
autofill::FormData* out);
};
......
......@@ -341,6 +341,7 @@ TEST_F(AutofillTypeTraitsTestImpl, PassFormFieldData) {
TEST_F(AutofillTypeTraitsTestImpl, PassFormData) {
FormData input;
test::CreateTestAddressFormData(&input);
input.username_predictions = {1, 13, 2};
base::RunLoop loop;
mojom::TypeTraitsTestPtr proxy = GetTypeTraitsTestProxy();
......
......@@ -10,7 +10,6 @@
#include <set>
#include <string>
#include "base/containers/flat_set.h"
#include "base/i18n/case_conversion.h"
#include "base/lazy_instance.h"
#include "base/macros.h"
......@@ -406,20 +405,15 @@ bool IsEnabledPasswordFieldPresent(const std::vector<FormFieldData>& fields) {
// Find the first element in |username_predictions| (i.e. the most reliable
// prediction) that occurs in |possible_usernames|.
const FormFieldData* FindUsernameInPredictions(
const std::vector<const FormFieldData*>& username_predictions,
const std::vector<uint32_t>& username_predictions,
const std::vector<const FormFieldData*>& possible_usernames) {
// To speed-up the matching for-loop below, convert |possible_usernames| to a
// set. Creating is O(N log N) for N=possible_usernames.size(). Retrieval is
// O(log N), so the whole for-loop is O(M log N) for
// M=username_predictions.size(). Use flat_set, because of cache locality (the
// M and N are likely small, so this can make a difference) and less heap
// allocations.
const base::flat_set<const FormFieldData*> usernames(
possible_usernames.begin(), possible_usernames.end());
for (const FormFieldData* prediction : username_predictions) {
auto iter = usernames.find(prediction);
if (iter != usernames.end()) {
for (uint32_t predicted_id : username_predictions) {
auto iter =
std::find_if(possible_usernames.begin(), possible_usernames.end(),
[predicted_id](const FormFieldData* field) {
return field->unique_renderer_id == predicted_id;
});
if (iter != possible_usernames.end()) {
return *iter;
}
}
......@@ -430,11 +424,11 @@ const FormFieldData* FindUsernameInPredictions(
// elements of the form, |form_data| should be the already extracted FormData
// representation of that form. |username_detector_cache| is optional, and can
// be used to spare recomputation if called multiple times for the same form.
std::vector<const FormFieldData*> GetUsernamePredictions(
std::vector<uint32_t> GetUsernamePredictions(
const std::vector<blink::WebFormControlElement>& control_elements,
const FormData& form_data,
UsernameDetectorCache* username_detector_cache) {
std::vector<const FormFieldData*> username_predictions;
std::vector<uint32_t> username_predictions;
// Dummy cache stores the predictions in case no real cache was passed to
// here.
UsernameDetectorCache dummy_cache;
......@@ -445,21 +439,8 @@ std::vector<const FormFieldData*> GetUsernamePredictions(
GetPredictionsFieldBasedOnHtmlAttributes(control_elements, form_data,
username_detector_cache);
username_predictions.reserve(username_predictions_dom.size());
// Convert the DOM elements to FormFieldData.
std::map<uint32_t, const FormFieldData*> id_to_fields;
for (const FormFieldData& field : form_data.fields) {
auto insert_result =
id_to_fields.insert({field.unique_renderer_id, &field});
DCHECK(insert_result.second) << "Unique ID is not unique.";
}
for (const WebInputElement& element : username_predictions_dom) {
std::map<uint32_t, const FormFieldData*>::const_iterator prediction_it =
id_to_fields.find(element.UniqueRendererFormControlId());
// Note: some of the |element|s may not have an equivalent in
// |form_data.fields|, e.g., because those are not autofillable.
if (prediction_it != id_to_fields.end())
username_predictions.push_back(prediction_it->second);
username_predictions.push_back(element.UniqueRendererFormControlId());
}
return username_predictions;
}
......@@ -483,11 +464,10 @@ bool GetPasswordForm(
return false;
// Evaluate the context of the fields.
std::vector<const FormFieldData*> username_predictions;
if (base::FeatureList::IsEnabled(
password_manager::features::kHtmlBasedUsernameDetector)) {
username_predictions = GetUsernamePredictions(control_elements, form_data,
username_detector_cache);
password_form->form_data.username_predictions = GetUsernamePredictions(
control_elements, form_data, username_detector_cache);
}
// Narrow the scope to enabled inputs.
......@@ -664,8 +644,8 @@ bool GetPasswordForm(
// Use HTML based username detector only if neither server predictions nor
// autocomplete attributes were useful to detect the username.
if (!predicted_username_field && !username_by_attribute) {
username_field_by_context =
FindUsernameInPredictions(username_predictions, plausible_usernames);
username_field_by_context = FindUsernameInPredictions(
form_data.username_predictions, plausible_usernames);
}
}
......
......@@ -78,7 +78,8 @@ FormData::FormData(const FormData& data)
is_form_tag(data.is_form_tag),
is_formless_checkout(data.is_formless_checkout),
unique_renderer_id(data.unique_renderer_id),
fields(data.fields) {}
fields(data.fields),
username_predictions(data.username_predictions) {}
FormData::~FormData() {
}
......@@ -114,7 +115,8 @@ bool FormData::operator==(const FormData& form) const {
unique_renderer_id == form.unique_renderer_id &&
is_form_tag == form.is_form_tag &&
is_formless_checkout == form.is_formless_checkout &&
fields == form.fields;
fields == form.fields &&
username_predictions == form.username_predictions;
}
bool FormData::operator!=(const FormData& form) const {
......
......@@ -61,6 +61,12 @@ struct FormData {
uint32_t unique_renderer_id = kNotSetFormRendererId;
// A vector of all the input fields in the form.
std::vector<FormFieldData> fields;
// Contains unique renderer IDs of text elements which are predicted to be
// usernames. The order matters: elements are sorted in descending likelihood
// of being a username (the first one is the most likely username). Can
// contain IDs of elements which are not in |fields|. This is only used during
// parsing into PasswordForm, and hence not serialised for storage.
std::vector<uint32_t> username_predictions;
};
// For testing.
......
......@@ -33,6 +33,7 @@ static_library("form_parsing") {
"//components/autofill/core/browser",
"//components/autofill/core/browser/proto",
"//components/autofill/core/common",
"//components/password_manager/core/common",
]
}
......
......@@ -4,6 +4,8 @@
#include "components/password_manager/core/browser/form_parsing/form_parser.h"
#include <stdint.h>
#include <algorithm>
#include <iterator>
#include <set>
......@@ -14,6 +16,7 @@
#include "base/strings/string_split.h"
#include "components/autofill/core/common/form_data.h"
#include "components/autofill/core/common/password_form.h"
#include "components/password_manager/core/common/password_manager_features.h"
using autofill::FieldPropertiesFlags;
using autofill::FormFieldData;
......@@ -376,9 +379,14 @@ const FormFieldData* FindUsernameFieldBaseHeuristics(
return focusable_username ? focusable_username : username;
}
// Tries to find the username and password fields in |processed_fields| based on
// the structure (how the fields are ordered). If |mode| is SAVING, only
// consideres non-empty fields. If |username_hint| is not null, it is returned
// as the username.
std::unique_ptr<ParseResult> ParseUsingBaseHeuristics(
const std::vector<ProcessedField>& processed_fields,
FormParsingMode mode) {
FormParsingMode mode,
const FormFieldData* username_hint) {
// What is the best interactability among passwords?
Interactability password_max = Interactability::kUnlikely;
for (const ProcessedField& processed_field : processed_fields) {
......@@ -402,6 +410,12 @@ std::unique_ptr<ParseResult> ParseUsingBaseHeuristics(
if (result->IsEmpty())
return nullptr;
if (username_hint &&
!(mode == FormParsingMode::SAVING && username_hint->value.empty())) {
result->username_field = username_hint;
return result;
}
// What is the best interactability among text fields preceding the passwords?
Interactability username_max = Interactability::kUnlikely;
for (auto it = processed_fields.begin(); it != first_relevant_password;
......@@ -504,6 +518,24 @@ std::vector<ProcessedField> ProcessFields(
return result;
}
// Find the first element in |username_predictions| (i.e. the most reliable
// prediction) that occurs in |processed_fields|.
const FormFieldData* FindUsernameInPredictions(
const std::vector<uint32_t>& username_predictions,
const std::vector<ProcessedField>& processed_fields) {
for (uint32_t predicted_id : username_predictions) {
auto iter = std::find_if(
processed_fields.begin(), processed_fields.end(),
[predicted_id](const ProcessedField& processed_field) {
return processed_field.field->unique_renderer_id == predicted_id;
});
if (iter != processed_fields.end()) {
return iter->field;
}
}
return nullptr;
}
} // namespace
std::unique_ptr<PasswordForm> ParseFormData(
......@@ -546,9 +578,17 @@ std::unique_ptr<PasswordForm> ParseFormData(
return result;
}
// Try to find the username based on the context of the fields.
const FormFieldData* username_field_by_context = nullptr;
if (base::FeatureList::IsEnabled(
password_manager::features::kHtmlBasedUsernameDetector)) {
username_field_by_context = FindUsernameInPredictions(
form_data.username_predictions, processed_fields);
}
// Try to parse with base heuristic.
auto base_heuristics_parse_result =
ParseUsingBaseHeuristics(processed_fields, mode);
auto base_heuristics_parse_result = ParseUsingBaseHeuristics(
processed_fields, mode, username_field_by_context);
if (base_heuristics_parse_result) {
SetFields(*base_heuristics_parse_result, result.get());
return result;
......
......@@ -66,6 +66,9 @@ struct FieldDataDescription {
const char* name = kNonimportantValue;
const char* form_control_type = "text";
PasswordFieldPrediction prediction = {.type = autofill::MAX_VALID_FIELD_TYPE};
// If not -1, indicates on which rank among predicted usernames this should
// be. Unused ranks will be padded with unique IDs (not found in any fields).
int predicted_username = -1;
};
// Describes a test case for the parser.
......@@ -81,7 +84,7 @@ struct FormParsingTestCase {
// Returns numbers which are distinct from each other within the scope of one
// test.
uint32_t GetUniqueId() {
static uint32_t counter = 0;
static uint32_t counter = 10;
return counter++;
}
......@@ -191,6 +194,19 @@ FormData GetFormDataAndExpectation(
if (field_description.prediction.type != autofill::MAX_VALID_FIELD_TYPE) {
(*predictions)[unique_id] = field_description.prediction;
}
if (field_description.predicted_username >= 0) {
size_t index = static_cast<size_t>(field_description.predicted_username);
if (form_data.username_predictions.size() <= index)
form_data.username_predictions.resize(index + 1);
form_data.username_predictions[index] = field.unique_renderer_id;
}
}
// Fill unused ranks in predictions with fresh IDs to check that those are
// correctly ignored. In real situation, this might correspond, e.g., to
// fields which were not fillable and hence dropped from the selection.
for (uint32_t& id : form_data.username_predictions) {
if (id == 0)
id = GetUniqueId();
}
return form_data;
}
......@@ -1117,6 +1133,71 @@ TEST(FormParserTest, AllPossiblePasswords) {
});
}
TEST(FormParserTest, UsernamePredictions) {
CheckTestData({
{
"Username prediction overrides structure",
{
{.role = ElementRole::USERNAME,
.form_control_type = "text",
.predicted_username = 0},
{.form_control_type = "text"},
{.role = ElementRole::CURRENT_PASSWORD,
.form_control_type = "password"},
},
},
{
"Username prediction does not override structure if empty and mode "
"is SAVING",
{
{.role = ElementRole::USERNAME_FILLING,
.form_control_type = "text",
.predicted_username = 2,
.value = ""},
{.role = ElementRole::USERNAME_SAVING,
.form_control_type = "text"},
{.role = ElementRole::CURRENT_PASSWORD,
.form_control_type = "password"},
},
},
{
"Username prediction does not override autocomplete analysis",
{
{.form_control_type = "text", .predicted_username = 0},
{.role = ElementRole::USERNAME,
.form_control_type = "text",
.autocomplete_attribute = "username"},
{.role = ElementRole::CURRENT_PASSWORD,
.form_control_type = "password",
.autocomplete_attribute = "current-password"},
},
},
{
"Username prediction does not override server hints",
{
{.role = ElementRole::USERNAME,
.form_control_type = "text",
.prediction = {.type = autofill::USERNAME_AND_EMAIL_ADDRESS}},
{.form_control_type = "text", .predicted_username = 0},
{.role = ElementRole::CURRENT_PASSWORD,
.prediction = {.type = autofill::PASSWORD},
.form_control_type = "password"},
},
},
{
"Username prediction order matters",
{
{.role = ElementRole::USERNAME,
.form_control_type = "text",
.predicted_username = 1},
{.form_control_type = "text", .predicted_username = 4},
{.role = ElementRole::CURRENT_PASSWORD,
.form_control_type = "password"},
},
},
});
}
} // namespace
} // namespace password_manager
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment