Commit b0a64211 authored by Vaclav Brozek's avatar Vaclav Brozek Committed by Commit Bot

FormData parser tries to find missing username

When parsing FormData into PasswordForm, the new parser tries to keep
results from different types of analysis separate: if autocomplete
attributes or server hints only provide the password fields, the
parser currently does not try to figure out username with structural
analysis.

The server data currently lack username quite often, and also
autocomplete mark-up often lacks the username. As a result, the new
parser fails to find the username in many cases when the old parser
could.

Once server data improve, this issue will get much smaller (the
autocomplete data will also get replaced by server hints). However, it
is unlikely that this happens soon enough. Therefore, to keep the
effect of the change of parsers in M69 small, this CL adds the ability
to merge username from structural analysis / HTML classifier with the
results from server hints or autocomplete attributes.

The CL also renames ParseResult to SignificantFields, because both
"parse" and "result" are somewhat overloaded in this file. The CL
further does a few similar minor changes to naming, comments and
code structure, in an attempt to increase readability

Bug: 845426
Change-Id: Ie5a6297a2f7eb1a0d89421ea91a93bdd5a7112b5
Reviewed-on: https://chromium-review.googlesource.com/1117183
Commit-Queue: Vaclav Brozek <vabr@chromium.org>
Reviewed-by: default avatarVadym Doroshenko <dvadym@chromium.org>
Cr-Commit-Position: refs/heads/master@{#571937}
parent a1981d69
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "base/stl_util.h"
#include "base/strings/string_piece.h" #include "base/strings/string_piece.h"
#include "base/strings/string_split.h" #include "base/strings/string_split.h"
#include "components/autofill/core/common/form_data.h" #include "components/autofill/core/common/form_data.h"
...@@ -125,17 +126,20 @@ bool MatchesInteractability(const ProcessedField& processed_field, ...@@ -125,17 +126,20 @@ bool MatchesInteractability(const ProcessedField& processed_field,
FieldPropertiesFlags::AUTOFILLED)); FieldPropertiesFlags::AUTOFILLED));
} }
// Helper struct that is used to return results from the parsing function. // A helper struct that is used to capture significant fields to be used for
struct ParseResult { // the construction of a PasswordForm.
const FormFieldData* username_field = nullptr; struct SignificantFields {
const FormFieldData* password_field = nullptr; const FormFieldData* username = nullptr;
const FormFieldData* new_password_field = nullptr; const FormFieldData* password = nullptr;
const FormFieldData* confirmation_password_field = nullptr; const FormFieldData* new_password = nullptr;
const FormFieldData* confirmation_password = nullptr;
bool IsEmpty() {
DCHECK(!confirmation_password_field || new_password_field) // Returns true if some password field is present. This is the minimal
// requirement for a successful creation of a PasswordForm is present.
bool HasPasswords() const {
DCHECK(!confirmation_password || new_password)
<< "There is no password to confirm if there is no new password field."; << "There is no password to confirm if there is no new password field.";
return password_field == nullptr && new_password_field == nullptr; return password || new_password;
} }
}; };
...@@ -152,29 +156,29 @@ const FormFieldData* FindFieldWithUniqueRendererId( ...@@ -152,29 +156,29 @@ const FormFieldData* FindFieldWithUniqueRendererId(
} }
// Tries to parse |processed_fields| based on server |predictions|. // Tries to parse |processed_fields| based on server |predictions|.
std::unique_ptr<ParseResult> ParseUsingPredictions( std::unique_ptr<SignificantFields> ParseUsingPredictions(
const std::vector<ProcessedField>& processed_fields, const std::vector<ProcessedField>& processed_fields,
const FormPredictions& predictions) { const FormPredictions& predictions) {
auto result = std::make_unique<ParseResult>(); auto result = std::make_unique<SignificantFields>();
// Note: The code does not check whether there is at most 1 username, 1 // Note: The code does not check whether there is at most 1 username, 1
// current password and at most 2 new passwords. It is assumed that server // current password and at most 2 new passwords. It is assumed that server
// side predictions are sane. // side predictions are sane.
for (const auto& prediction : predictions) { for (const auto& prediction : predictions) {
switch (DeriveFromServerFieldType(prediction.second.type)) { switch (DeriveFromServerFieldType(prediction.second.type)) {
case CredentialFieldType::kUsername: case CredentialFieldType::kUsername:
result->username_field = result->username =
FindFieldWithUniqueRendererId(processed_fields, prediction.first); FindFieldWithUniqueRendererId(processed_fields, prediction.first);
break; break;
case CredentialFieldType::kCurrentPassword: case CredentialFieldType::kCurrentPassword:
result->password_field = result->password =
FindFieldWithUniqueRendererId(processed_fields, prediction.first); FindFieldWithUniqueRendererId(processed_fields, prediction.first);
break; break;
case CredentialFieldType::kNewPassword: case CredentialFieldType::kNewPassword:
result->new_password_field = result->new_password =
FindFieldWithUniqueRendererId(processed_fields, prediction.first); FindFieldWithUniqueRendererId(processed_fields, prediction.first);
break; break;
case CredentialFieldType::kConfirmationPassword: case CredentialFieldType::kConfirmationPassword:
result->confirmation_password_field = result->confirmation_password =
FindFieldWithUniqueRendererId(processed_fields, prediction.first); FindFieldWithUniqueRendererId(processed_fields, prediction.first);
break; break;
case CredentialFieldType::kNone: case CredentialFieldType::kNone:
...@@ -183,10 +187,10 @@ std::unique_ptr<ParseResult> ParseUsingPredictions( ...@@ -183,10 +187,10 @@ std::unique_ptr<ParseResult> ParseUsingPredictions(
} }
// If the server suggests there is a confirmation field but no new password, // If the server suggests there is a confirmation field but no new password,
// something went wrong. Sanitize the result. // something went wrong. Sanitize the result.
if (result->confirmation_password_field && !result->new_password_field) if (result->confirmation_password && !result->new_password)
result->confirmation_password_field = nullptr; result->confirmation_password = nullptr;
return result->IsEmpty() ? nullptr : std::move(result); return result->HasPasswords() ? std::move(result) : nullptr;
} }
// Tries to parse |processed_fields| based on autocomplete attributes. // Tries to parse |processed_fields| based on autocomplete attributes.
...@@ -199,30 +203,30 @@ std::unique_ptr<ParseResult> ParseUsingPredictions( ...@@ -199,30 +203,30 @@ std::unique_ptr<ParseResult> ParseUsingPredictions(
// Are these assumptions violated, or is there no password with an autocomplete // Are these assumptions violated, or is there no password with an autocomplete
// attribute, parsing is unsuccessful. Returns nullptr if parsing is // attribute, parsing is unsuccessful. Returns nullptr if parsing is
// unsuccessful. // unsuccessful.
std::unique_ptr<ParseResult> ParseUsingAutocomplete( std::unique_ptr<SignificantFields> ParseUsingAutocomplete(
const std::vector<ProcessedField>& processed_fields) { const std::vector<ProcessedField>& processed_fields) {
auto result = std::make_unique<ParseResult>(); auto result = std::make_unique<SignificantFields>();
for (const ProcessedField& processed_field : processed_fields) { for (const ProcessedField& processed_field : processed_fields) {
switch (processed_field.autocomplete_flag) { switch (processed_field.autocomplete_flag) {
case AutocompleteFlag::kUsername: case AutocompleteFlag::kUsername:
if (processed_field.is_password || result->username_field) if (processed_field.is_password || result->username)
return nullptr; return nullptr;
result->username_field = processed_field.field; result->username = processed_field.field;
break; break;
case AutocompleteFlag::kCurrentPassword: case AutocompleteFlag::kCurrentPassword:
if (!processed_field.is_password || result->password_field) if (!processed_field.is_password || result->password)
return nullptr; return nullptr;
result->password_field = processed_field.field; result->password = processed_field.field;
break; break;
case AutocompleteFlag::kNewPassword: case AutocompleteFlag::kNewPassword:
if (!processed_field.is_password) if (!processed_field.is_password)
return nullptr; return nullptr;
// The first field with autocomplete=new-password is considered to be // The first field with autocomplete=new-password is considered to be
// new_password_field and the second is confirmation_password_field. // new_password and the second is confirmation_password.
if (!result->new_password_field) if (!result->new_password)
result->new_password_field = processed_field.field; result->new_password = processed_field.field;
else if (!result->confirmation_password_field) else if (!result->confirmation_password)
result->confirmation_password_field = processed_field.field; result->confirmation_password = processed_field.field;
else else
return nullptr; return nullptr;
break; break;
...@@ -234,7 +238,7 @@ std::unique_ptr<ParseResult> ParseUsingAutocomplete( ...@@ -234,7 +238,7 @@ std::unique_ptr<ParseResult> ParseUsingAutocomplete(
} }
} }
return result->IsEmpty() ? nullptr : std::move(result); return result->HasPasswords() ? std::move(result) : nullptr;
} }
// Returns only relevant password fields from |processed_fields|. Namely, if // Returns only relevant password fields from |processed_fields|. Namely, if
...@@ -386,42 +390,70 @@ const FormFieldData* FindUsernameFieldBaseHeuristics( ...@@ -386,42 +390,70 @@ const FormFieldData* FindUsernameFieldBaseHeuristics(
return focusable_username ? focusable_username : username; return focusable_username ? focusable_username : username;
} }
// A helper to return a |field|'s unique_renderer_id or
// kNotSetFormControlRendererId if |field| is null.
uint32_t ExtractUniqueId(const FormFieldData* field) {
return field ? field->unique_renderer_id : FormFieldData::kNotSetFormControlRendererId;
}
// Tries to find the username and password fields in |processed_fields| based on // Tries to find the username and password fields in |processed_fields| based on
// the structure (how the fields are ordered). If |mode| is SAVING, only // the structure (how the fields are ordered). If |mode| is SAVING, only
// consideres non-empty fields. If |username_hint| is not null, it is returned // considers non-empty fields. The |found_fields| is both an input and output
// as the username. // argument: if some password field and the username are already present, the
std::unique_ptr<ParseResult> ParseUsingBaseHeuristics( // the function exits early. If something is missing, the function tries to
// complete it. The result is stored back in |found_fields|.
void ParseUsingBaseHeuristics(
const std::vector<ProcessedField>& processed_fields, const std::vector<ProcessedField>& processed_fields,
FormParsingMode mode, FormParsingMode mode,
const FormFieldData* username_hint) { SignificantFields* found_fields) {
// What is the best interactability among passwords? // If there is both the username and the minimal set of fields to build a
Interactability password_max = Interactability::kUnlikely; // PasswordForm, return early -- no more work to do.
for (const ProcessedField& processed_field : processed_fields) { if (found_fields->HasPasswords() && found_fields->username)
if (processed_field.is_password) return;
password_max = std::max(password_max, processed_field.interactability);
} // Will point to the password included in |found_field| which is first in the
// order of fields in |processed_fields|.
// Try to find password elements (current, new, confirmation) among those with
// best interactability.
std::vector<ProcessedField>::const_iterator first_relevant_password = std::vector<ProcessedField>::const_iterator first_relevant_password =
processed_fields.end(); processed_fields.end();
std::vector<const FormFieldData*> passwords = GetRelevantPasswords(
processed_fields, mode, password_max, &first_relevant_password);
if (passwords.empty())
return nullptr;
DCHECK(first_relevant_password != processed_fields.end());
auto result = std::make_unique<ParseResult>();
LocateSpecificPasswords(passwords, &result->password_field,
&result->new_password_field,
&result->confirmation_password_field);
if (result->IsEmpty())
return nullptr;
if (username_hint && if (!found_fields->HasPasswords()) {
!(mode == FormParsingMode::SAVING && username_hint->value.empty())) { // What is the best interactability among passwords?
result->username_field = username_hint; Interactability password_max = Interactability::kUnlikely;
return result; for (const ProcessedField& processed_field : processed_fields) {
if (processed_field.is_password)
password_max = std::max(password_max, processed_field.interactability);
}
// Try to find password elements (current, new, confirmation) among those
// with best interactability.
first_relevant_password = processed_fields.end();
std::vector<const FormFieldData*> passwords = GetRelevantPasswords(
processed_fields, mode, password_max, &first_relevant_password);
if (passwords.empty())
return;
LocateSpecificPasswords(passwords, &found_fields->password,
&found_fields->new_password,
&found_fields->confirmation_password);
if (!found_fields->HasPasswords())
return;
} else {
const uint32_t password_ids[] = {
ExtractUniqueId(found_fields->password),
ExtractUniqueId(found_fields->new_password),
ExtractUniqueId(found_fields->confirmation_password)};
for (auto it = processed_fields.begin(); it != processed_fields.end();
++it) {
if (it->is_password &&
base::ContainsValue(password_ids, it->field->unique_renderer_id)) {
first_relevant_password = it;
break;
}
}
} }
DCHECK(first_relevant_password != processed_fields.end());
if (found_fields->username)
return;
// What is the best interactability among text fields preceding the passwords? // What is the best interactability among text fields preceding the passwords?
Interactability username_max = Interactability::kUnlikely; Interactability username_max = Interactability::kUnlikely;
...@@ -431,37 +463,38 @@ std::unique_ptr<ParseResult> ParseUsingBaseHeuristics( ...@@ -431,37 +463,38 @@ std::unique_ptr<ParseResult> ParseUsingBaseHeuristics(
username_max = std::max(username_max, it->interactability); username_max = std::max(username_max, it->interactability);
} }
// If password elements are found then try to find a username. found_fields->username = FindUsernameFieldBaseHeuristics(
result->username_field = FindUsernameFieldBaseHeuristics(
processed_fields, first_relevant_password, mode, username_max); processed_fields, first_relevant_password, mode, username_max);
return result; return;
} }
// Set username and password fields from |parse_result| in |password_form|. // Set username and password fields in |password_form| based on
void SetFields(const ParseResult& parse_result, PasswordForm* password_form) { // |significant_fields| .
void SetFields(const SignificantFields& significant_fields,
PasswordForm* password_form) {
password_form->has_renderer_ids = true; password_form->has_renderer_ids = true;
if (parse_result.username_field) { if (significant_fields.username) {
password_form->username_element = parse_result.username_field->name; password_form->username_element = significant_fields.username->name;
password_form->username_value = parse_result.username_field->value; password_form->username_value = significant_fields.username->value;
password_form->username_element_renderer_id = password_form->username_element_renderer_id =
parse_result.username_field->unique_renderer_id; significant_fields.username->unique_renderer_id;
} }
if (parse_result.password_field) { if (significant_fields.password) {
password_form->password_element = parse_result.password_field->name; password_form->password_element = significant_fields.password->name;
password_form->password_value = parse_result.password_field->value; password_form->password_value = significant_fields.password->value;
password_form->password_element_renderer_id = password_form->password_element_renderer_id =
parse_result.password_field->unique_renderer_id; significant_fields.password->unique_renderer_id;
} }
if (parse_result.new_password_field) { if (significant_fields.new_password) {
password_form->new_password_element = parse_result.new_password_field->name; password_form->new_password_element = significant_fields.new_password->name;
password_form->new_password_value = parse_result.new_password_field->value; password_form->new_password_value = significant_fields.new_password->value;
} }
if (parse_result.confirmation_password_field) { if (significant_fields.confirmation_password) {
password_form->confirmation_password_element = password_form->confirmation_password_element =
parse_result.confirmation_password_field->name; significant_fields.confirmation_password->name;
} }
} }
...@@ -543,20 +576,19 @@ const FormFieldData* FindUsernameInPredictions( ...@@ -543,20 +576,19 @@ const FormFieldData* FindUsernameInPredictions(
return nullptr; return nullptr;
} }
} // namespace // Puts together a PasswordForm, the result of the parsing, based on the
// |form_data| description of the form metadata (e.g., action), the already
std::unique_ptr<PasswordForm> ParseFormData( // parsed information about what are the |significant_fields|, and the list
// |all_possible_passwords| of all non-empty password values and associated
// element names which occurred in the form.
std::unique_ptr<PasswordForm> AssemblePasswordForm(
const autofill::FormData& form_data, const autofill::FormData& form_data,
const FormPredictions* form_predictions, const SignificantFields* significant_fields,
FormParsingMode mode) { autofill::ValueElementVector all_possible_passwords) {
autofill::ValueElementVector all_possible_passwords; if (!significant_fields || !significant_fields->HasPasswords())
std::vector<ProcessedField> processed_fields =
ProcessFields(form_data.fields, &all_possible_passwords);
if (processed_fields.empty())
return nullptr; return nullptr;
// Create parse result and set non-field related information. // Create the PasswordForm and set data not related to specific fields.
auto result = std::make_unique<PasswordForm>(); auto result = std::make_unique<PasswordForm>();
result->origin = form_data.origin; result->origin = form_data.origin;
result->signon_realm = form_data.origin.GetOrigin().spec(); result->signon_realm = form_data.origin.GetOrigin().spec();
...@@ -568,40 +600,57 @@ std::unique_ptr<PasswordForm> ParseFormData( ...@@ -568,40 +600,57 @@ std::unique_ptr<PasswordForm> ParseFormData(
result->blacklisted_by_user = false; result->blacklisted_by_user = false;
result->type = PasswordForm::TYPE_MANUAL; result->type = PasswordForm::TYPE_MANUAL;
if (form_predictions) { // Set data related to specific fields.
// Try to parse with server predictions. SetFields(*significant_fields, result.get());
auto predictions_parse_result = return result;
}
} // namespace
std::unique_ptr<PasswordForm> ParseFormData(
const autofill::FormData& form_data,
const FormPredictions* form_predictions,
FormParsingMode mode) {
autofill::ValueElementVector all_possible_passwords;
std::vector<ProcessedField> processed_fields =
ProcessFields(form_data.fields, &all_possible_passwords);
if (processed_fields.empty())
return nullptr;
std::unique_ptr<SignificantFields> significant_fields;
// (1) First, try to parse with server predictions.
if (form_predictions)
significant_fields =
ParseUsingPredictions(processed_fields, *form_predictions); ParseUsingPredictions(processed_fields, *form_predictions);
if (predictions_parse_result) {
SetFields(*predictions_parse_result, result.get());
return result;
}
}
// Try to parse with autocomplete attributes. // (2) If that failed, try to parse with autocomplete attributes.
auto autocomplete_parse_result = ParseUsingAutocomplete(processed_fields); if (!significant_fields)
if (autocomplete_parse_result) { significant_fields = ParseUsingAutocomplete(processed_fields);
SetFields(*autocomplete_parse_result, result.get());
return result; // (3) Now try to fill the gaps.
} if (!significant_fields)
significant_fields = std::make_unique<SignificantFields>();
// Try to find the username based on the context of the fields. // Try to find the username based on the context of the fields.
const FormFieldData* username_field_by_context = nullptr; if (!significant_fields->username &&
if (base::FeatureList::IsEnabled( base::FeatureList::IsEnabled(
password_manager::features::kHtmlBasedUsernameDetector)) { password_manager::features::kHtmlBasedUsernameDetector)) {
username_field_by_context = FindUsernameInPredictions( const FormFieldData* username_field_by_context = FindUsernameInPredictions(
form_data.username_predictions, processed_fields); form_data.username_predictions, processed_fields);
if (username_field_by_context &&
!(mode == FormParsingMode::SAVING &&
username_field_by_context->value.empty())) {
significant_fields->username = username_field_by_context;
}
} }
// Try to parse with base heuristic. // Try to parse with base heuristic.
auto base_heuristics_parse_result = ParseUsingBaseHeuristics( ParseUsingBaseHeuristics(processed_fields, mode, significant_fields.get());
processed_fields, mode, username_field_by_context);
if (base_heuristics_parse_result) {
SetFields(*base_heuristics_parse_result, result.get());
return result;
}
return nullptr; return AssemblePasswordForm(form_data, significant_fields.get(),
std::move(all_possible_passwords));
} }
} // namespace password_manager } // namespace password_manager
...@@ -698,18 +698,6 @@ TEST(FormParserTest, TestAutocomplete) { ...@@ -698,18 +698,6 @@ TEST(FormParserTest, TestAutocomplete) {
.form_control_type = "password"}, .form_control_type = "password"},
}, },
}, },
{
"Partial autocomplete analysis is not complemented by basic "
"heuristics",
// Username not found because there was a valid autocomplete mark-up
// but it did not include the plain text field.
{
{.form_control_type = "text"},
{.role = ElementRole::CURRENT_PASSWORD,
.form_control_type = "password",
.autocomplete_attribute = "current-password"},
},
},
{ {
"Partial autocomplete analysis fails if no passwords are found", "Partial autocomplete analysis fails if no passwords are found",
// The attribute 'username' is ignored, because there was no password // The attribute 'username' is ignored, because there was no password
...@@ -1198,6 +1186,53 @@ TEST(FormParserTest, UsernamePredictions) { ...@@ -1198,6 +1186,53 @@ TEST(FormParserTest, UsernamePredictions) {
}); });
} }
// In some situations, server hints or autocomplete mark-up do not provide the
// username might be omitted. Sometimes this is a truthful signal (there might
// be no username despite the presence of plain text fields), but often this is
// just incomplete data. In the long term, the server hints should be complete
// and also cover cases when the autocomplete mark-up is lacking; at that point,
// the parser should just trust that the signal is truthful. Until then,
// however, the parser is trying to complement the signal with its structural
// heuristics.
TEST(FormParserTest, ComplementingResults) {
CheckTestData({
{
"Current password from autocomplete analysis, username from basic "
"heuristics",
{
{.role = ElementRole::USERNAME, .form_control_type = "text"},
{.role = ElementRole::CURRENT_PASSWORD,
.form_control_type = "password",
.autocomplete_attribute = "current-password"},
},
},
{
"New and confirmation passwords from server, username from basic "
"heuristics",
{
{.role = ElementRole::USERNAME, .form_control_type = "text"},
{.role = ElementRole::CONFIRMATION_PASSWORD,
.prediction = {.type = autofill::CONFIRMATION_PASSWORD},
.form_control_type = "password"},
{.form_control_type = "text"},
{.role = ElementRole::NEW_PASSWORD,
.prediction = {.type = autofill::NEW_PASSWORD},
.form_control_type = "password"},
},
},
{
"No password from server still means that serve hints are ignored.",
{
{.prediction = {.type = autofill::USERNAME_AND_EMAIL_ADDRESS},
.form_control_type = "text"},
{.role = ElementRole::USERNAME, .form_control_type = "text"},
{.role = ElementRole::CURRENT_PASSWORD,
.form_control_type = "password"},
},
},
});
}
} // namespace } // namespace
} // namespace password_manager } // namespace password_manager
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment