Commit 0d069b50 authored by noelutz@google.com's avatar noelutz@google.com

Change the client-side phishing detection hashing function to

mimic what we're doing on the server.

BUG=
TEST=BrowserFeatureExtractorTest


Review URL: http://codereview.chromium.org/7793012

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@98848 0039d316-1c4b-4281-b951-d872f2087c98
parent 331f7d23
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "base/stl_util.h" #include "base/stl_util.h"
#include "base/stringprintf.h" #include "base/stringprintf.h"
#include "base/string_util.h"
#include "base/task.h" #include "base/task.h"
#include "base/time.h" #include "base/time.h"
#include "chrome/common/safe_browsing/csd.pb.h" #include "chrome/common/safe_browsing/csd.pb.h"
...@@ -27,7 +28,7 @@ ...@@ -27,7 +28,7 @@
namespace safe_browsing { namespace safe_browsing {
const int BrowserFeatureExtractor::kSuffixPrefixHashLength = 5; const int BrowserFeatureExtractor::kHashPrefixLength = 5;
BrowseInfo::BrowseInfo() {} BrowseInfo::BrowseInfo() {}
...@@ -453,9 +454,27 @@ void BrowserFeatureExtractor::ComputeURLHash( ...@@ -453,9 +454,27 @@ void BrowserFeatureExtractor::ComputeURLHash(
&host, &path, &query); &host, &path, &query);
DCHECK(!host.empty()) << request->url(); DCHECK(!host.empty()) << request->url();
DCHECK(!path.empty()) << request->url(); DCHECK(!path.empty()) << request->url();
request->set_suffix_prefix_hash(
crypto::SHA256HashString(host + path).substr( // Lowercase the URL. Note: canonicalization converts the URL to ASCII.
0, kSuffixPrefixHashLength)); // Percent encoded characters will not be lowercased but this is consistent
// with what we're doing on the server side.
StringToLowerASCII(&host);
StringToLowerASCII(&path);
// Remove leading 'www.' from the host.
if (host.size() > 4 && host.substr(0, 4) == "www.") {
host.erase(0, 4);
}
// Remove everything after the last '/' to broaden the pattern.
if (path.size() > 1 && *(path.rbegin()) != '/') {
// The pattern never ends in foo.com/test? because we stripped CGI params.
// Remove everything that comes after the last '/'.
size_t last_path = path.rfind("/");
path.erase(last_path + 1);
}
request->set_hash_prefix(crypto::SHA256HashString(host + path).substr(
0, kHashPrefixLength));
} }
}; // namespace safe_browsing }; // namespace safe_browsing
...@@ -74,9 +74,9 @@ class BrowserFeatureExtractor { ...@@ -74,9 +74,9 @@ class BrowserFeatureExtractor {
ClientPhishingRequest* request, ClientPhishingRequest* request,
DoneCallback* callback); DoneCallback* callback);
// The size of hash prefix to use for // The size of hash prefix to use for ClientPhishingRequest.hash_prefix.
// ClientPhishingRequest.suffix_prefix_hash. Public for testing. // Public for testing.
static const int kSuffixPrefixHashLength; static const int kHashPrefixLength;
private: private:
friend class DeleteTask<BrowserFeatureExtractor>; friend class DeleteTask<BrowserFeatureExtractor>;
......
...@@ -488,8 +488,8 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) { ...@@ -488,8 +488,8 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) {
EXPECT_TRUE(ExtractFeatures(&request)); EXPECT_TRUE(ExtractFeatures(&request));
EXPECT_EQ(crypto::SHA256HashString("host.com/").substr( EXPECT_EQ(crypto::SHA256HashString("host.com/").substr(
0, BrowserFeatureExtractor::kSuffixPrefixHashLength), 0, BrowserFeatureExtractor::kHashPrefixLength),
request.suffix_prefix_hash()); request.hash_prefix());
request.set_url("http://www.host.com/path/"); request.set_url("http://www.host.com/path/");
history_service()->AddPage(GURL("http://www.host.com/path/"), history_service()->AddPage(GURL("http://www.host.com/path/"),
...@@ -497,9 +497,9 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) { ...@@ -497,9 +497,9 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) {
contents()->NavigateAndCommit(GURL("http://www.host.com/path/")); contents()->NavigateAndCommit(GURL("http://www.host.com/path/"));
EXPECT_TRUE(ExtractFeatures(&request)); EXPECT_TRUE(ExtractFeatures(&request));
EXPECT_EQ(crypto::SHA256HashString("www.host.com/path/").substr( EXPECT_EQ(crypto::SHA256HashString("host.com/path/").substr(
0, BrowserFeatureExtractor::kSuffixPrefixHashLength), 0, BrowserFeatureExtractor::kHashPrefixLength),
request.suffix_prefix_hash()); request.hash_prefix());
request.set_url("http://user@www.host.com:1111/path/123?args"); request.set_url("http://user@www.host.com:1111/path/123?args");
history_service()->AddPage( history_service()->AddPage(
...@@ -509,9 +509,9 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) { ...@@ -509,9 +509,9 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) {
GURL("http://user@www.host.com:1111/path/123?args")); GURL("http://user@www.host.com:1111/path/123?args"));
EXPECT_TRUE(ExtractFeatures(&request)); EXPECT_TRUE(ExtractFeatures(&request));
EXPECT_EQ(crypto::SHA256HashString("www.host.com/path/123").substr( EXPECT_EQ(crypto::SHA256HashString("host.com/path/").substr(
0, BrowserFeatureExtractor::kSuffixPrefixHashLength), 0, BrowserFeatureExtractor::kHashPrefixLength),
request.suffix_prefix_hash()); request.hash_prefix());
// Check that escaping matches the SafeBrowsing specification. // Check that escaping matches the SafeBrowsing specification.
request.set_url("http://www.host.com/A%21//B"); request.set_url("http://www.host.com/A%21//B");
...@@ -520,8 +520,8 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) { ...@@ -520,8 +520,8 @@ TEST_F(BrowserFeatureExtractorTest, URLHashes) {
contents()->NavigateAndCommit(GURL("http://www.host.com/A%21//B")); contents()->NavigateAndCommit(GURL("http://www.host.com/A%21//B"));
EXPECT_TRUE(ExtractFeatures(&request)); EXPECT_TRUE(ExtractFeatures(&request));
EXPECT_EQ(crypto::SHA256HashString("www.host.com/A!/B").substr( EXPECT_EQ(crypto::SHA256HashString("host.com/a!/").substr(
0, BrowserFeatureExtractor::kSuffixPrefixHashLength), 0, BrowserFeatureExtractor::kHashPrefixLength),
request.suffix_prefix_hash()); request.hash_prefix());
} }
} // namespace safe_browsing } // namespace safe_browsing
...@@ -278,9 +278,8 @@ void ClientSideDetectionService::SanitizeRequestForPingback( ...@@ -278,9 +278,8 @@ void ClientSideDetectionService::SanitizeRequestForPingback(
ClientPhishingRequest* sanitized_request) { ClientPhishingRequest* sanitized_request) {
DCHECK(full_request.IsInitialized()); DCHECK(full_request.IsInitialized());
sanitized_request->Clear(); sanitized_request->Clear();
if (full_request.has_suffix_prefix_hash()) { if (full_request.has_hash_prefix()) {
sanitized_request->set_suffix_prefix_hash( sanitized_request->set_hash_prefix(full_request.hash_prefix());
full_request.suffix_prefix_hash());
} }
sanitized_request->set_client_score(full_request.client_score()); sanitized_request->set_client_score(full_request.client_score());
if (full_request.has_is_phishing()) { if (full_request.has_is_phishing()) {
......
...@@ -666,7 +666,7 @@ TEST_F(ClientSideDetectionServiceTest, SetEnabled) { ...@@ -666,7 +666,7 @@ TEST_F(ClientSideDetectionServiceTest, SetEnabled) {
TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) { TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) {
ClientPhishingRequest request; ClientPhishingRequest request;
request.set_url("http://www.us.host.com/blah"); request.set_url("http://www.us.host.com/blah");
request.set_suffix_prefix_hash("hash"); request.set_hash_prefix("hash");
request.set_client_score(0.8f); request.set_client_score(0.8f);
request.set_is_phishing(true); request.set_is_phishing(true);
AddFeature(std::string(features::kUrlTldToken) + "com", 1.0, &request); AddFeature(std::string(features::kUrlTldToken) + "com", 1.0, &request);
...@@ -709,7 +709,7 @@ TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) { ...@@ -709,7 +709,7 @@ TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) {
// For easier debugging, we'll check the output protobuf fields individually. // For easier debugging, we'll check the output protobuf fields individually.
ClientPhishingRequest expected; ClientPhishingRequest expected;
expected.set_suffix_prefix_hash(request.suffix_prefix_hash()); expected.set_hash_prefix(request.hash_prefix());
expected.set_client_score(request.client_score()); expected.set_client_score(request.client_score());
expected.set_is_phishing(request.is_phishing()); expected.set_is_phishing(request.is_phishing());
AddFeature(features::kUrlNumOtherHostTokensGTOne, 1.0, &expected); AddFeature(features::kUrlNumOtherHostTokensGTOne, 1.0, &expected);
...@@ -719,8 +719,7 @@ TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) { ...@@ -719,8 +719,7 @@ TEST_F(ClientSideDetectionServiceTest, SanitizeRequestForPingback) {
AddNonModelFeature(features::kUrlHistoryVisitCount, 5.0, &expected); AddNonModelFeature(features::kUrlHistoryVisitCount, 5.0, &expected);
EXPECT_FALSE(sanitized_request.has_url()); EXPECT_FALSE(sanitized_request.has_url());
EXPECT_EQ(expected.suffix_prefix_hash(), EXPECT_EQ(expected.hash_prefix(), sanitized_request.hash_prefix());
sanitized_request.suffix_prefix_hash());
EXPECT_FLOAT_EQ(expected.client_score(), sanitized_request.client_score()); EXPECT_FLOAT_EQ(expected.client_score(), sanitized_request.client_score());
EXPECT_EQ(expected.is_phishing(), sanitized_request.is_phishing()); EXPECT_EQ(expected.is_phishing(), sanitized_request.is_phishing());
......
...@@ -20,10 +20,11 @@ message ClientPhishingRequest { ...@@ -20,10 +20,11 @@ message ClientPhishingRequest {
// client. This field is ONLY set for UMA-enabled users. // client. This field is ONLY set for UMA-enabled users.
optional string url = 1; optional string url = 1;
// A 5-byte SHA-256 hash prefix of the URL, in SafeBrowsing host sufffix/path // A 5-byte SHA-256 hash prefix of the URL. Before hashing the URL is
// prefix form with query parameters stripped (i.e. "www.example.com/1/2/"). // canonicalized, converted to a suffix-prefix expression and broadened
// (www prefix is removed and everything past the last '/' is stripped).
// Unlike "url", this is sent for all users. // Unlike "url", this is sent for all users.
optional bytes suffix_prefix_hash = 10; optional bytes hash_prefix = 10;
// Score that was computed on the client. Value is between 0.0 and 1.0. // Score that was computed on the client. Value is between 0.0 and 1.0.
// The larger the value the more likely the url is phishing. // The larger the value the more likely the url is phishing.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment