Commit 7284655c authored by Maxim Kolosovskiy's avatar Maxim Kolosovskiy Committed by Commit Bot

[Password Manager] Polishing HTML based Username Detector


Bug: 699530
Change-Id: I6325ed2be5868d519c5b0d0cb720d09e7b80da4d
Reviewed-on: https://chromium-review.googlesource.com/759792
Commit-Queue: Maxim Kolosovskiy <kolos@chromium.org>
Reviewed-by: default avatarVaclav Brozek <vabr@chromium.org>
Cr-Commit-Position: refs/heads/master@{#521018}
parent e59817eb
...@@ -14,6 +14,8 @@ static_library("renderer") { ...@@ -14,6 +14,8 @@ static_library("renderer") {
"form_classifier.h", "form_classifier.h",
"html_based_username_detector.cc", "html_based_username_detector.cc",
"html_based_username_detector.h", "html_based_username_detector.h",
"html_based_username_detector_vocabulary.cc",
"html_based_username_detector_vocabulary.h",
"page_form_analyser_logger.cc", "page_form_analyser_logger.cc",
"page_form_analyser_logger.h", "page_form_analyser_logger.h",
"page_passwords_analyser.cc", "page_passwords_analyser.cc",
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include <map>
#include "components/autofill/core/common/password_form.h" #include "components/autofill/core/common/password_form.h"
#include "third_party/WebKit/public/web/WebFormControlElement.h" #include "third_party/WebKit/public/web/WebFormControlElement.h"
#include "third_party/WebKit/public/web/WebInputElement.h" #include "third_party/WebKit/public/web/WebInputElement.h"
......
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
namespace autofill {
// Words that certainly point to a non-username field.
// If field values contain at least one negative word, then the field is
// excluded from the list of possible usernames.
extern const char* const kNegativeLatin[];
extern const int kNegativeLatinSize;
extern const char* const kNegativeNonLatin[];
extern const int kNegativeNonLatinSize;
// Translations of "username".
extern const char* const kUsernameLatin[];
extern const int kUsernameLatinSize;
extern const char* const kUsernameNonLatin[];
extern const int kUsernameNonLatinSize;
// Translations of "user".
extern const char* const kUserLatin[];
extern const int kUserLatinSize;
extern const char* const kUserNonLatin[];
extern const int kUserNonLatinSize;
// Words that certainly point to a username field, if they appear in developer
// value. They are technical words, because they can only be used as variable
// names, and not as stand-alone words.
extern const char* const kTechnicalWords[];
extern const int kTechnicalWordsSize;
// Words that might point to a username field.They have the smallest priority
// in the heuristic, because there are also field attribute values that
// contain them, but are not username fields.
extern const char* const kWeakWords[];
extern const int kWeakWordsSize;
} // namespace autofill
...@@ -221,7 +221,8 @@ class MAYBE_PasswordFormConversionUtilsTest : public content::RenderViewTest { ...@@ -221,7 +221,8 @@ class MAYBE_PasswordFormConversionUtilsTest : public content::RenderViewTest {
} }
return CreatePasswordFormFromWebForm( return CreatePasswordFormFromWebForm(
form, with_user_input ? &user_input : nullptr, predictions, nullptr); form, with_user_input ? &user_input : nullptr, predictions,
&username_detector_cache_);
} }
// Iterates on the form generated by the |html| and adds the fields and type // Iterates on the form generated by the |html| and adds the fields and type
...@@ -270,6 +271,8 @@ class MAYBE_PasswordFormConversionUtilsTest : public content::RenderViewTest { ...@@ -270,6 +271,8 @@ class MAYBE_PasswordFormConversionUtilsTest : public content::RenderViewTest {
*form = forms[0]; *form = forms[0];
} }
UsernameDetectorCache username_detector_cache_;
private: private:
DISALLOW_COPY_AND_ASSIGN(MAYBE_PasswordFormConversionUtilsTest); DISALLOW_COPY_AND_ASSIGN(MAYBE_PasswordFormConversionUtilsTest);
}; };
...@@ -334,7 +337,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, OnlyDisabledFields) { ...@@ -334,7 +337,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, OnlyDisabledFields) {
} }
TEST_F(MAYBE_PasswordFormConversionUtilsTest, TEST_F(MAYBE_PasswordFormConversionUtilsTest,
IdentifyingUsernameFieldsFromDeveloperGroupWithHTMLDetector) { HTMLDetector_DeveloperGroupAttributes) {
base::test::ScopedFeatureList feature_list; base::test::ScopedFeatureList feature_list;
feature_list.InitAndEnableFeature( feature_list.InitAndEnableFeature(
password_manager::features::kEnableHtmlBasedUsernameDetector); password_manager::features::kEnableHtmlBasedUsernameDetector);
...@@ -408,17 +411,18 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, ...@@ -408,17 +411,18 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest,
{"email", "", "js@google.com"}, {"email", "", "js@google.com"},
"email", "email",
"js@google.com"}, "js@google.com"},
// If word matches in maximum 2 fields, it is accepted. // If a word matches in maximum 2 fields, it is accepted.
// First encounter is selected as username. // First encounter is selected as username.
{{"loginusername", "", "johnsmith"}, {{"username", "", "johnsmith"},
{"loginemail", "", "js@google.com"}, {"repeat_username", "", "johnsmith"},
"loginusername", "username",
"johnsmith"}, "johnsmith"},
// Check treatment for short dictionary words. // A short word should be enclosed between delimiters. Otherwise, an
{{"identity_name", "", "johnsmith"}, // Occurrence doesn't count.
{"email", "", "js@google.com"}, {{"identity_name", "idn", "johnsmith"},
"email", {"id", "id", "123"},
"js@google.com"}}; "id",
"123"}};
for (size_t i = 0; i < arraysize(cases); ++i) { for (size_t i = 0; i < arraysize(cases); ++i) {
SCOPED_TRACE(testing::Message() << "Iteration " << i); SCOPED_TRACE(testing::Message() << "Iteration " << i);
...@@ -436,6 +440,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, ...@@ -436,6 +440,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest,
builder.AddSubmitButton("submit"); builder.AddSubmitButton("submit");
std::string html = builder.ProduceHTML(); std::string html = builder.ProduceHTML();
username_detector_cache_.clear();
std::unique_ptr<PasswordForm> password_form = std::unique_ptr<PasswordForm> password_form =
LoadHTMLAndConvertForm(html, nullptr, false); LoadHTMLAndConvertForm(html, nullptr, false);
...@@ -445,7 +450,19 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, ...@@ -445,7 +450,19 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest,
password_form->username_element); password_form->username_element);
EXPECT_EQ(base::UTF8ToUTF16(cases[i].expected_username_value), EXPECT_EQ(base::UTF8ToUTF16(cases[i].expected_username_value),
password_form->username_value); password_form->username_value);
// Check that the username field was found by HTML detector.
ASSERT_EQ(1u, username_detector_cache_.size());
ASSERT_FALSE(username_detector_cache_.begin()->second.IsNull());
EXPECT_EQ(
cases[i].expected_username_element,
username_detector_cache_.begin()->second.NameForAutofill().Utf8());
} }
}
TEST_F(MAYBE_PasswordFormConversionUtilsTest, HTMLDetector_SeveralDetections) {
base::test::ScopedFeatureList feature_list;
feature_list.InitAndEnableFeature(
password_manager::features::kEnableHtmlBasedUsernameDetector);
// If word matches in more than 2 fields, we don't match on it. // If word matches in more than 2 fields, we don't match on it.
// We search for match with another word. // We search for match with another word.
...@@ -459,6 +476,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, ...@@ -459,6 +476,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest,
builder.AddSubmitButton("submit"); builder.AddSubmitButton("submit");
std::string html = builder.ProduceHTML(); std::string html = builder.ProduceHTML();
DCHECK(username_detector_cache_.empty());
std::unique_ptr<PasswordForm> password_form = std::unique_ptr<PasswordForm> password_form =
LoadHTMLAndConvertForm(html, nullptr, false); LoadHTMLAndConvertForm(html, nullptr, false);
...@@ -466,10 +484,15 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, ...@@ -466,10 +484,15 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest,
EXPECT_EQ(base::UTF8ToUTF16("loginid"), password_form->username_element); EXPECT_EQ(base::UTF8ToUTF16("loginid"), password_form->username_element);
EXPECT_EQ(base::UTF8ToUTF16("johnsmith"), password_form->username_value); EXPECT_EQ(base::UTF8ToUTF16("johnsmith"), password_form->username_value);
// Check that the username field was found by HTML detector.
ASSERT_EQ(1u, username_detector_cache_.size());
ASSERT_FALSE(username_detector_cache_.begin()->second.IsNull());
EXPECT_EQ("loginid",
username_detector_cache_.begin()->second.NameForAutofill().Utf8());
} }
TEST_F(MAYBE_PasswordFormConversionUtilsTest, TEST_F(MAYBE_PasswordFormConversionUtilsTest,
IdentifyingUsernameFieldsFromUserGroupWithHTMLDetector) { HTMLDetector_UserGroupAttributes) {
base::test::ScopedFeatureList feature_list; base::test::ScopedFeatureList feature_list;
feature_list.InitAndEnableFeature( feature_list.InitAndEnableFeature(
password_manager::features::kEnableHtmlBasedUsernameDetector); password_manager::features::kEnableHtmlBasedUsernameDetector);
...@@ -483,12 +506,12 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, ...@@ -483,12 +506,12 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest,
struct TestCase { struct TestCase {
// Field parameters represent, in order of appearance, field name, field // Field parameters represent, in order of appearance, field name, field
// id, field value and field label or placeholder. // id, field value and field label or placeholder.
// Field name and field id don't contain any significant information.
const char* first_text_field_parameters[4]; const char* first_text_field_parameters[4];
const char* second_text_field_parameters[4]; const char* second_text_field_parameters[4];
const char* expected_username_element; const char* expected_username_element;
const char* expected_username_value; const char* expected_username_value;
} cases[] = { } cases[] = {
// Developer group does not contain any significant information.
// Label information will decide username. // Label information will decide username.
{{"name1", "id1", "johnsmith", "Username:"}, {{"name1", "id1", "johnsmith", "Username:"},
{"name2", "id2", "js@google.com", "Email:"}, {"name2", "id2", "js@google.com", "Email:"},
...@@ -545,9 +568,10 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, ...@@ -545,9 +568,10 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest,
{"username", "", "johnsmith", "Email:"}, {"username", "", "johnsmith", "Email:"},
"email", "email",
"js@google.com"}, "js@google.com"},
// Check treatment for short dictionary words. // Check treatment for short dictionary words. "uid" has higher priority,
// but its occurrence is ignored because it is a part of another word.
{{"name1", "", "johnsmith", "Insert your id:"}, {{"name1", "", "johnsmith", "Insert your id:"},
{"name2", "", "js@google.com", "Insert something:"}, {"name2", "uidentical", "js@google.com", "Insert something:"},
"name1", "name1",
"johnsmith"}}; "johnsmith"}};
...@@ -569,6 +593,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, ...@@ -569,6 +593,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest,
builder.AddSubmitButton("submit"); builder.AddSubmitButton("submit");
std::string html = builder.ProduceHTML(); std::string html = builder.ProduceHTML();
username_detector_cache_.clear();
std::unique_ptr<PasswordForm> password_form = std::unique_ptr<PasswordForm> password_form =
LoadHTMLAndConvertForm(html, nullptr, false); LoadHTMLAndConvertForm(html, nullptr, false);
...@@ -578,6 +603,12 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, ...@@ -578,6 +603,12 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest,
password_form->username_element); password_form->username_element);
EXPECT_EQ(base::UTF8ToUTF16(cases[i].expected_username_value), EXPECT_EQ(base::UTF8ToUTF16(cases[i].expected_username_value),
password_form->username_value); password_form->username_value);
// Check that the username field was found by HTML detector.
ASSERT_EQ(1u, username_detector_cache_.size());
ASSERT_FALSE(username_detector_cache_.begin()->second.IsNull());
EXPECT_EQ(
cases[i].expected_username_element,
username_detector_cache_.begin()->second.NameForAutofill().Utf8());
} }
} }
...@@ -613,7 +644,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, HTMLDetectorCache) { ...@@ -613,7 +644,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, HTMLDetectorCache) {
// will be the same because it was cached in |username_detector_cache|. // will be the same because it was cached in |username_detector_cache|.
WebVector<WebFormControlElement> control_elements; WebVector<WebFormControlElement> control_elements;
form.GetFormControlElements(control_elements); form.GetFormControlElements(control_elements);
control_elements[0].SetAttribute("name", "login"); control_elements[0].SetAttribute("name", "id");
password_form = CreatePasswordFormFromWebForm(form, nullptr, nullptr, password_form = CreatePasswordFormFromWebForm(form, nullptr, nullptr,
&username_detector_cache); &username_detector_cache);
EXPECT_TRUE(password_form); EXPECT_TRUE(password_form);
...@@ -633,7 +664,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, HTMLDetectorCache) { ...@@ -633,7 +664,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, HTMLDetectorCache) {
ASSERT_EQ(1u, username_detector_cache.size()); ASSERT_EQ(1u, username_detector_cache.size());
EXPECT_EQ(form, username_detector_cache.begin()->first); EXPECT_EQ(form, username_detector_cache.begin()->first);
ASSERT_FALSE(username_detector_cache.begin()->second.IsNull()); ASSERT_FALSE(username_detector_cache.begin()->second.IsNull());
EXPECT_EQ("login", EXPECT_EQ("id",
username_detector_cache.begin()->second.NameForAutofill().Utf8()); username_detector_cache.begin()->second.NameForAutofill().Utf8());
EXPECT_THAT( EXPECT_THAT(
histogram_tester.GetAllSamples("PasswordManager.UsernameDetectionMethod"), histogram_tester.GetAllSamples("PasswordManager.UsernameDetectionMethod"),
...@@ -650,7 +681,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, HTMLDetectorCache) { ...@@ -650,7 +681,7 @@ TEST_F(MAYBE_PasswordFormConversionUtilsTest, HTMLDetectorCache) {
ASSERT_EQ(1u, username_detector_cache.size()); ASSERT_EQ(1u, username_detector_cache.size());
EXPECT_EQ(form, username_detector_cache.begin()->first); EXPECT_EQ(form, username_detector_cache.begin()->first);
ASSERT_FALSE(username_detector_cache.begin()->second.IsNull()); ASSERT_FALSE(username_detector_cache.begin()->second.IsNull());
EXPECT_EQ("login", EXPECT_EQ("id",
username_detector_cache.begin()->second.NameForAutofill().Utf8()); username_detector_cache.begin()->second.NameForAutofill().Utf8());
EXPECT_THAT( EXPECT_THAT(
histogram_tester.GetAllSamples("PasswordManager.UsernameDetectionMethod"), histogram_tester.GetAllSamples("PasswordManager.UsernameDetectionMethod"),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment