Commit 0d069b50 authored by noelutz@google.com's avatar noelutz@google.com

Change the client-side phishing detection hashing function to

mimic what we're doing on the server.

BUG=
TEST=BrowserFeatureExtractorTest


Review URL: http://codereview.chromium.org/7793012

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@98848 0039d316-1c4b-4281-b951-d872f2087c98
parent 331f7d23
......@@ -9,6 +9,7 @@
#include "base/stl_util.h"
#include "base/stringprintf.h"
#include "base/string_util.h"
#include "base/task.h"
#include "base/time.h"
#include "chrome/common/safe_browsing/csd.pb.h"
......@@ -27,7 +28,7 @@
namespace safe_browsing {
const int BrowserFeatureExtractor::kSuffixPrefixHashLength = 5;
const int BrowserFeatureExtractor::kHashPrefixLength = 5;
BrowseInfo::BrowseInfo() {}
......@@ -453,9 +454,27 @@ void BrowserFeatureExtractor::ComputeURLHash(
&host, &path, &query);
DCHECK(!host.empty()) << request->url();
DCHECK(!path.empty()) << request->url();
request->set_suffix_prefix_hash(
crypto::SHA256HashString(host + path).substr(
0, kSuffixPrefixHashLength));
// Lowercase the URL. Note: canonicalization converts the URL to ASCII.
// Percent encoded characters will not be lowercased but this is consistent
// with what we're doing on the server side.
StringToLowerASCII(&host);
StringToLowerASCII(&path);
// Remove leading 'www.' from the host.
if (host.size() > 4 && host.substr(0, 4) == "www.") {
host.erase(0, 4);
}
// Remove everything after the last '/' to broaden the pattern.
if (path.size() > 1 && *(path.rbegin()) != '/') {
// The pattern never ends in foo.com/test? because we stripped CGI params.
// Remove everything that comes after the last '/'.
size_t last_path = path.rfind("/");
path.erase(last_path + 1);
}
request->set_hash_prefix(crypto::SHA256HashString(host + path).substr(
0, kHashPrefixLength));
}
}; // namespace safe_browsing
......@@ -74,9 +74,9 @@ class BrowserFeatureExtractor {
ClientPhishingRequest* request,
DoneCallback* callback);
// The size of hash prefix to use for
// ClientPhishingRequest.suffix_prefix_hash. Public for testing.
static const int kSuffixPrefixHashLength;
// The size of hash prefix to use for ClientPhishingRequest.hash_prefix.
// Public for testing.
static const int kHashPrefixLength;
private:
friend class DeleteTask<BrowserFeatureExtractor>;
......
......@@ -488,8 +488,8 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) {
EXPECT_TRUE(ExtractFeatures(&request));
EXPECT_EQ(crypto::SHA256HashString("host.com/").substr(
0, BrowserFeatureExtractor::kSuffixPrefixHashLength),
request.suffix_prefix_hash());
0, BrowserFeatureExtractor::kHashPrefixLength),
request.hash_prefix());
request.set_url("http://www.host.com/path/");
history_service()->AddPage(GURL("http://www.host.com/path/"),
......@@ -497,9 +497,9 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) {
contents()->NavigateAndCommit(GURL("http://www.host.com/path/"));
EXPECT_TRUE(ExtractFeatures(&request));
EXPECT_EQ(crypto::SHA256HashString("www.host.com/path/").substr(
0, BrowserFeatureExtractor::kSuffixPrefixHashLength),
request.suffix_prefix_hash());
EXPECT_EQ(crypto::SHA256HashString("host.com/path/").substr(
0, BrowserFeatureExtractor::kHashPrefixLength),
request.hash_prefix());
request.set_url("http://user@www.host.com:1111/path/123?args");
history_service()->AddPage(
......@@ -509,9 +509,9 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) {
GURL("http://user@www.host.com:1111/path/123?args"));
EXPECT_TRUE(ExtractFeatures(&request));
EXPECT_EQ(crypto::SHA256HashString("www.host.com/path/123").substr(
0, BrowserFeatureExtractor::kSuffixPrefixHashLength),
request.suffix_prefix_hash());
EXPECT_EQ(crypto::SHA256HashString("host.com/path/").substr(
0, BrowserFeatureExtractor::kHashPrefixLength),
request.hash_prefix());
// Check that escaping matches the SafeBrowsing specification.
request.set_url("http://www.host.com/A%21//B");
......@@ -520,8 +520,8 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) {
contents()->NavigateAndCommit(GURL("http://www.host.com/A%21//B"));
EXPECT_TRUE(ExtractFeatures(&request));
EXPECT_EQ(crypto::SHA256HashString("www.host.com/A!/B").substr(
0, BrowserFeatureExtractor::kSuffixPrefixHashLength),
request.suffix_prefix_hash());
EXPECT_EQ(crypto::SHA256HashString("host.com/a!/").substr(
0, BrowserFeatureExtractor::kHashPrefixLength),
request.hash_prefix());
}
} // namespace safe_browsing
......@@ -278,9 +278,8 @@ void ClientSideDetectionService::SanitizeRequestForPingback(
ClientPhishingRequest* sanitized_request) {
DCHECK(full_request.IsInitialized());
sanitized_request->Clear();
if (full_request.has_suffix_prefix_hash()) {
sanitized_request->set_suffix_prefix_hash(
full_request.suffix_prefix_hash());
if (full_request.has_hash_prefix()) {
sanitized_request->set_hash_prefix(full_request.hash_prefix());
}
sanitized_request->set_client_score(full_request.client_score());
if (full_request.has_is_phishing()) {
......
......@@ -666,7 +666,7 @@ TEST_F(ClientSideDetectionServiceTest, SetEnabled) {
TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) {
ClientPhishingRequest request;
request.set_url("http://www.us.host.com/blah");
request.set_suffix_prefix_hash("hash");
request.set_hash_prefix("hash");
request.set_client_score(0.8f);
request.set_is_phishing(true);
AddFeature(std::string(features::kUrlTldToken) + "com", 1.0, &request);
......@@ -709,7 +709,7 @@ TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) {
// For easier debugging, we'll check the output protobuf fields individually.
ClientPhishingRequest expected;
expected.set_suffix_prefix_hash(request.suffix_prefix_hash());
expected.set_hash_prefix(request.hash_prefix());
expected.set_client_score(request.client_score());
expected.set_is_phishing(request.is_phishing());
AddFeature(features::kUrlNumOtherHostTokensGTOne, 1.0, &expected);
......@@ -719,8 +719,7 @@ TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) {
AddNonModelFeature(features::kUrlHistoryVisitCount, 5.0, &expected);
EXPECT_FALSE(sanitized_request.has_url());
EXPECT_EQ(expected.suffix_prefix_hash(),
sanitized_request.suffix_prefix_hash());
EXPECT_EQ(expected.hash_prefix(), sanitized_request.hash_prefix());
EXPECT_FLOAT_EQ(expected.client_score(), sanitized_request.client_score());
EXPECT_EQ(expected.is_phishing(), sanitized_request.is_phishing());
......
......@@ -20,10 +20,11 @@ message ClientPhishingRequest {
// client. This field is ONLY set for UMA-enabled users.
optional string url = 1;
// A 5-byte SHA-256 hash prefix of the URL, in SafeBrowsing host sufffix/path
// prefix form with query parameters stripped (i.e. "www.example.com/1/2/").
// A 5-byte SHA-256 hash prefix of the URL. Before hashing the URL is
// canonicalized, converted to a suffix-prefix expression and broadened
// (www prefix is removed and everything past the last '/' is stripped).
// Unlike "url", this is sent for all users.
optional bytes suffix_prefix_hash = 10;
optional bytes hash_prefix = 10;
// Score that was computed on the client. Value is between 0.0 and 1.0.
// The larger the value the more likely the url is phishing.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment