Commit 03551952 authored by Vaclav Brozek's avatar Vaclav Brozek Committed by Commit Bot

Improve visibility handling in FormData->PasswordForm parser

When the new FormData->PasswordForm parser looks for the username with
its basic heuristics, it only considers the most likely fields the
user would interact with (visible > with some value > the rest).

This filter is on purpose not applied to server hints and autocomplete
attribute analysis.

But it should be applied to the local HTML classifier.

This CL makes that happen by reordering the analysis steps to do basic
heuristics before the HTML classifier, because the heuristics need to
compute the bar for interactability. With that bar computed, the
parser additionally runs the HTML classifier on fields meeting that
bar.

Bug: 851808
Change-Id: I0e54b6c1c6387b6130d297131b0dcb83fe52e265
Reviewed-on: https://chromium-review.googlesource.com/1141873
Commit-Queue: Vaclav Brozek <vabr@chromium.org>
Reviewed-by: default avatarVadym Doroshenko <dvadym@chromium.org>
Cr-Commit-Position: refs/heads/master@{#576063}
parent 637d303d
......@@ -396,16 +396,20 @@ uint32_t ExtractUniqueId(const FormFieldData* field) {
return field ? field->unique_renderer_id : FormFieldData::kNotSetFormControlRendererId;
}
// Tries to find the username and password fields in |processed_fields| based on
// the structure (how the fields are ordered). If |mode| is SAVING, only
// Tries to find the username and password fields in |processed_fields| based
// on the structure (how the fields are ordered). If |mode| is SAVING, only
// considers non-empty fields. The |found_fields| is both an input and output
// argument: if some password field and the username are already present, the
// the function exits early. If something is missing, the function tries to
// complete it. The result is stored back in |found_fields|.
// complete it. The result is stored back in |found_fields|. The best
// interactability for usernames, which depends on position of the found
// passwords as well, is returned through |username_max| to be used in other
// kinds of analysis.
void ParseUsingBaseHeuristics(
const std::vector<ProcessedField>& processed_fields,
FormParsingMode mode,
SignificantFields* found_fields) {
SignificantFields* found_fields,
Interactability* username_max) {
// If there is both the username and the minimal set of fields to build a
// PasswordForm, return early -- no more work to do.
if (found_fields->HasPasswords() && found_fields->username)
......@@ -456,15 +460,15 @@ void ParseUsingBaseHeuristics(
return;
// What is the best interactability among text fields preceding the passwords?
Interactability username_max = Interactability::kUnlikely;
*username_max = Interactability::kUnlikely;
for (auto it = processed_fields.begin(); it != first_relevant_password;
++it) {
if (!it->is_password)
username_max = std::max(username_max, it->interactability);
*username_max = std::max(*username_max, it->interactability);
}
found_fields->username = FindUsernameFieldBaseHeuristics(
processed_fields, first_relevant_password, mode, username_max);
processed_fields, first_relevant_password, mode, *username_max);
return;
}
......@@ -559,15 +563,18 @@ std::vector<ProcessedField> ProcessFields(
}
// Find the first element in |username_predictions| (i.e. the most reliable
// prediction) that occurs in |processed_fields|.
// prediction) that occurs in |processed_fields| and has interactability level
// at least |username_max|.
const FormFieldData* FindUsernameInPredictions(
const std::vector<uint32_t>& username_predictions,
const std::vector<ProcessedField>& processed_fields) {
const std::vector<ProcessedField>& processed_fields,
Interactability username_max) {
for (uint32_t predicted_id : username_predictions) {
auto iter = std::find_if(
processed_fields.begin(), processed_fields.end(),
[predicted_id](const ProcessedField& processed_field) {
return processed_field.field->unique_renderer_id == predicted_id;
[predicted_id, username_max](const ProcessedField& processed_field) {
return processed_field.field->unique_renderer_id == predicted_id &&
MatchesInteractability(processed_field, username_max);
});
if (iter != processed_fields.end()) {
return iter->field;
......@@ -653,12 +660,22 @@ std::unique_ptr<PasswordForm> ParseFormData(
if (!significant_fields)
significant_fields = std::make_unique<SignificantFields>();
// Try to find the username based on the context of the fields.
if (!significant_fields->username &&
const bool username_found_before_heuristic = significant_fields->username;
// Try to parse with base heuristic.
Interactability username_max = Interactability::kUnlikely;
ParseUsingBaseHeuristics(processed_fields, mode, significant_fields.get(),
&username_max);
// Additionally, and based on the best interactability computed by base
// heuristics, try to improve the username based on the context of the
// fields, unless the username already came from more reliable types of
// analysis.
if (!username_found_before_heuristic &&
base::FeatureList::IsEnabled(
password_manager::features::kHtmlBasedUsernameDetector)) {
const FormFieldData* username_field_by_context = FindUsernameInPredictions(
form_data.username_predictions, processed_fields);
form_data.username_predictions, processed_fields, username_max);
if (username_field_by_context &&
!(mode == FormParsingMode::SAVING &&
username_field_by_context->value.empty())) {
......@@ -666,9 +683,6 @@ std::unique_ptr<PasswordForm> ParseFormData(
}
}
// Try to parse with base heuristic.
ParseUsingBaseHeuristics(processed_fields, mode, significant_fields.get());
return AssemblePasswordForm(form_data, significant_fields.get(),
std::move(all_possible_passwords),
form_predictions);
......
......@@ -1042,6 +1042,20 @@ TEST(FormParserTest, Interactability) {
{.form_control_type = "text", .is_focusable = true, .value = ""},
},
},
{
"Interactability also matters for HTML classifier.",
{
{.form_control_type = "text",
.is_focusable = false,
.predicted_username = 0},
{.role = ElementRole::USERNAME,
.form_control_type = "text",
.is_focusable = true},
{.role = ElementRole::CURRENT_PASSWORD,
.form_control_type = "password",
.is_focusable = true},
},
},
});
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment