Commit 357f5e2d authored by Vaclav Brozek's avatar Vaclav Brozek Committed by Commit Bot

Pull FindUsernameInPredictions out of html_based_username_detector.cc

The HTML based username detector first computes an ordered list of
potential usernames out of all form fields, and then tries to identify the
front-most of that list which is also on a list of plausible username fields
computed by other parts of Chrome.

This bundling may become a source of trouble once most of the parsing code,
including the one which produces the plausible username fields, gets moved to
a different process (renderer -> browser) and will get run after the HTML
classifier.

To solve this problem, this CL keeps the first part within
html_based_username_detector.cc, but moves the latter to the callsite, the
password_form_conversion_utils.cc. This way the HTML based detector only
needs the list of all form fields and loses its dependency on the code
computing the plausible fields.

Bug: 833838
Change-Id: I66eef8098cd523c52fc54d02197a46382ada2fc6
Reviewed-on: https://chromium-review.googlesource.com/1032738Reviewed-by: default avatarVadym Doroshenko <dvadym@chromium.org>
Commit-Queue: Vaclav Brozek <vabr@chromium.org>
Cr-Commit-Position: refs/heads/master@{#559093}
parent 63139565
...@@ -65,23 +65,13 @@ struct CategoryOfWords { ...@@ -65,23 +65,13 @@ struct CategoryOfWords {
// Used only inside DCHECK. // Used only inside DCHECK.
bool AllElementsBelongsToSameForm( bool AllElementsBelongsToSameForm(
const std::vector<blink::WebFormControlElement>& all_control_elements, const std::vector<WebFormControlElement>& all_control_elements) {
const std::vector<blink::WebInputElement>& possible_usernames) { return std::adjacent_find(all_control_elements.begin(),
if (std::adjacent_find( all_control_elements.end(),
possible_usernames.begin(), possible_usernames.end(), [](const WebFormControlElement& a,
[](const blink::WebInputElement& a, const blink::WebInputElement& b) { const WebFormControlElement& b) {
return a.Form() != b.Form(); return a.Form() != b.Form();
}) != possible_usernames.end()) }) == all_control_elements.end();
return false;
if (std::adjacent_find(all_control_elements.begin(),
all_control_elements.end(),
[](const blink::WebFormControlElement& a,
const blink::WebFormControlElement& b) {
return a.Form() != b.Form();
}) != all_control_elements.end())
return false;
DCHECK(!all_control_elements.empty());
return all_control_elements[0].Form() == possible_usernames[0].Form();
} }
// 1. Removes delimiters from |raw_value| and appends the remainder to // 1. Removes delimiters from |raw_value| and appends the remainder to
...@@ -295,69 +285,38 @@ void FindUsernameFieldInternal( ...@@ -295,69 +285,38 @@ void FindUsernameFieldInternal(
} }
} }
// Find the first element in |username_predictions| (i.e. the most reliable
// prediction) that occurs in |possible_usernames|. If the element found, the
// method saves it to |username_element| and returns true.
bool FindUsernameInPredictions(
const std::vector<blink::WebInputElement>& username_predictions,
const std::vector<blink::WebInputElement>& possible_usernames,
WebInputElement* username_element) {
// To keep linear time complexity, convert |possible_usernames| to a set.
const base::flat_set<blink::WebInputElement> usernames(
possible_usernames.begin(), possible_usernames.end());
for (const blink::WebInputElement& prediction : username_predictions) {
auto iter = usernames.find(prediction);
if (iter != usernames.end()) {
*username_element = *iter;
return true;
}
}
return false;
}
} // namespace } // namespace
bool GetUsernameFieldBasedOnHtmlAttributes( const std::vector<WebInputElement>& GetPredictionsFieldBasedOnHtmlAttributes(
const std::vector<blink::WebFormControlElement>& all_control_elements, const std::vector<WebFormControlElement>& all_control_elements,
const std::vector<blink::WebInputElement>& possible_usernames,
const FormData& form_data, const FormData& form_data,
WebInputElement* username_element,
UsernameDetectorCache* username_detector_cache) { UsernameDetectorCache* username_detector_cache) {
DCHECK(username_element); // The cache will store the object referenced in the return value, so it must
// exist. It can be empty.
DCHECK(username_detector_cache);
if (possible_usernames.empty()) DCHECK(!all_control_elements.empty());
return false;
// All elements in |possible_usernames| and |all_control_elements| should have // All elements in |all_control_elements| should have the same |Form()|.
// the same |Form()|. DCHECK(AllElementsBelongsToSameForm(all_control_elements));
DCHECK(
AllElementsBelongsToSameForm(all_control_elements, possible_usernames));
const blink::WebFormElement form = possible_usernames[0].Form(); const WebFormElement form = all_control_elements[0].Form();
// True if the cache has no entry for |form|. // True if the cache has no entry for |form|.
bool cache_miss = true; bool cache_miss = true;
// Iterator pointing to the entry for |form| if the entry for |form| is found. // Iterator pointing to the entry for |form| if the entry for |form| is found.
UsernameDetectorCache::iterator form_position; UsernameDetectorCache::iterator form_position;
if (username_detector_cache) { std::tie(form_position, cache_miss) = username_detector_cache->insert(
std::tie(form_position, cache_miss) = username_detector_cache->insert( std::make_pair(form, std::vector<WebInputElement>()));
std::make_pair(form, std::vector<blink::WebInputElement>()));
}
if (!username_detector_cache || cache_miss) { if (cache_miss) {
std::vector<blink::WebInputElement> username_predictions; std::vector<WebInputElement> username_predictions;
FindUsernameFieldInternal(all_control_elements, form_data, FindUsernameFieldInternal(all_control_elements, form_data,
&username_predictions); &username_predictions);
bool result = FindUsernameInPredictions( if (!username_predictions.empty())
username_predictions, possible_usernames, username_element);
if (username_detector_cache && !username_predictions.empty())
form_position->second = std::move(username_predictions); form_position->second = std::move(username_predictions);
return result;
} }
return form_position->second;
return FindUsernameInPredictions(form_position->second, possible_usernames,
username_element);
} }
} // namespace autofill } // namespace autofill
...@@ -19,21 +19,20 @@ using UsernameDetectorCache = ...@@ -19,21 +19,20 @@ using UsernameDetectorCache =
std::map<blink::WebFormElement, std::vector<blink::WebInputElement>>; std::map<blink::WebFormElement, std::vector<blink::WebInputElement>>;
// Classifier for getting username field by analyzing HTML attribute values. // Classifier for getting username field by analyzing HTML attribute values.
// The algorithm looks for words that are likely to point to username field // The algorithm looks for words that are likely to point to username field (ex.
// (ex. "username", "loginid" etc.), in the attribute values. When the first // "username", "loginid" etc.), in the attribute values. When the first match is
// match is found, the currently analyzed field is saved in |username_element|, // found, the currently analyzed field is saved in |username_element|, and the
// and the algorithm ends. By searching for words in order of their probability // algorithm ends. By searching for words in order of their probability to be
// to be username words, it is sure that the first match will also be the best // username words, it is sure that the first match will also be the best one.
// one. The function returns true if username element was found.
// If detector's outcome for the given form is cached in // If detector's outcome for the given form is cached in
// |username_detector_cache|, then |username_element| is set based on the cached // |username_detector_cache|, then |username_element| is set based on the cached
// data. Otherwise, the detector will be run and the outcome will be saved to // data. Otherwise, the detector will be run and the outcome will be saved to
// the cache. |username_detector_cache| can be null. // the cache. The function returns a reference to the vector of predictions,
bool GetUsernameFieldBasedOnHtmlAttributes( // which is stored in the cache.
const std::vector<blink::WebInputElement>&
GetPredictionsFieldBasedOnHtmlAttributes(
const std::vector<blink::WebFormControlElement>& all_control_elements, const std::vector<blink::WebFormControlElement>& all_control_elements,
const std::vector<blink::WebInputElement>& possible_usernames,
const FormData& form_data, const FormData& form_data,
blink::WebInputElement* username_element,
UsernameDetectorCache* username_detector_cache); UsernameDetectorCache* username_detector_cache);
} // namespace autofill } // namespace autofill
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <set> #include <set>
#include <string> #include <string>
#include "base/containers/flat_set.h"
#include "base/i18n/case_conversion.h" #include "base/i18n/case_conversion.h"
#include "base/lazy_instance.h" #include "base/lazy_instance.h"
#include "base/macros.h" #include "base/macros.h"
...@@ -454,6 +455,32 @@ bool IsEnabledPasswordFieldPresent(const std::vector<FormFieldData>& fields) { ...@@ -454,6 +455,32 @@ bool IsEnabledPasswordFieldPresent(const std::vector<FormFieldData>& fields) {
}) != fields.end(); }) != fields.end();
} }
// Find the first element in |username_predictions| (i.e. the most reliable
// prediction) that occurs in |possible_usernames|. If the element is found, the
// method saves it to |username_element| and returns true.
bool FindUsernameInPredictions(
const std::vector<blink::WebInputElement>& username_predictions,
const std::vector<blink::WebInputElement>& possible_usernames,
WebInputElement* username_element) {
// To speed-up the matching for-loop below, convert |possible_usernames| to a
// set. Creating is O(N log N) for N=possible_usernames.size(). Retrieval is
// O(log N), so the whole for-loop is O(M log N) for
// M=username_predictions.size(). Use flat_set, because of cache locality (the
// M and N are likely small, so this can make a difference) and less heap
// allocations.
const base::flat_set<blink::WebInputElement> usernames(
possible_usernames.begin(), possible_usernames.end());
for (const blink::WebInputElement& prediction : username_predictions) {
auto iter = usernames.find(prediction);
if (iter != usernames.end()) {
*username_element = *iter;
return true;
}
}
return false;
}
// Get information about a login form encapsulated in a PasswordForm struct. // Get information about a login form encapsulated in a PasswordForm struct.
// If an element of |form| has an entry in |nonscript_modified_values|, the // If an element of |form| has an entry in |nonscript_modified_values|, the
// associated string is used instead of the element's value to create // associated string is used instead of the element's value to create
...@@ -464,6 +491,8 @@ bool GetPasswordForm( ...@@ -464,6 +491,8 @@ bool GetPasswordForm(
const FieldValueAndPropertiesMaskMap* field_value_and_properties_map, const FieldValueAndPropertiesMaskMap* field_value_and_properties_map,
const FormsPredictionsMap* form_predictions, const FormsPredictionsMap* form_predictions,
UsernameDetectorCache* username_detector_cache) { UsernameDetectorCache* username_detector_cache) {
DCHECK(!form.control_elements.empty());
// Early exit if no passwords to be typed into. // Early exit if no passwords to be typed into.
if (!IsEnabledPasswordFieldPresent(password_form->form_data.fields)) if (!IsEnabledPasswordFieldPresent(password_form->form_data.fields))
return false; return false;
...@@ -632,9 +661,21 @@ bool GetPasswordForm( ...@@ -632,9 +661,21 @@ bool GetPasswordForm(
// Call HTML based username detector only if neither server predictions nor // Call HTML based username detector only if neither server predictions nor
// autocomplete attributes were useful to detect the username. // autocomplete attributes were useful to detect the username.
if (predicted_username_element.IsNull() && username_by_attribute.IsNull()) { if (predicted_username_element.IsNull() && username_by_attribute.IsNull()) {
GetUsernameFieldBasedOnHtmlAttributes( // Dummy cache stores the predictions in case no real cache was passed to
form.control_elements, plausible_usernames, password_form->form_data, // here.
&username_element_by_context, username_detector_cache); UsernameDetectorCache dummy_cache;
if (!username_detector_cache)
username_detector_cache = &dummy_cache;
const std::vector<blink::WebInputElement>& username_predictions =
GetPredictionsFieldBasedOnHtmlAttributes(form.control_elements,
password_form->form_data,
username_detector_cache);
if (!FindUsernameInPredictions(username_predictions, plausible_usernames,
&username_element_by_context)) {
username_element_by_context.Reset();
}
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment