Commit 0f871357 authored by Daniel Rubery's avatar Daniel Rubery Committed by Commit Bot

Use IsVisualMatch in the PhishingClassifier

This CL adds code to the PhishingClassifier to extract a screenshot of
the current page. It also sends this screenshot to the Scorer to
determine if the visual model matches the given screenshot.

Bug: 1068617
Change-Id: I9e4cb1ba8888fa76b116405611bf64280a87def0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2226974Reviewed-by: default avatarvmpstr <vmpstr@chromium.org>
Reviewed-by: default avatarBettina Dea <bdea@chromium.org>
Commit-Queue: Daniel Rubery <drubery@chromium.org>
Cr-Commit-Position: refs/heads/master@{#775835}
parent 0cc05978
......@@ -74,8 +74,8 @@ ClientSideDetectionService::ClientSideDetectionService(Profile* profile)
: nullptr) {
profile_ = profile;
// profile_ can be null in unit tests
if (!profile_)
// |profile_| and |url_loader_factory_| can be null in unit tests
if (!profile_ || !url_loader_factory_)
return;
pref_change_registrar_.Init(profile_->GetPrefs());
......@@ -83,6 +83,9 @@ ClientSideDetectionService::ClientSideDetectionService(Profile* profile)
prefs::kSafeBrowsingEnabled,
base::Bind(&ClientSideDetectionService::OnPrefsUpdated,
base::Unretained(this)));
// Do an initial check of the prefs.
OnPrefsUpdated();
}
ClientSideDetectionService::ClientSideDetectionService(
......
include_rules = [
"+components/safe_browsing/content/renderer",
"+components/safe_browsing/content/password_protection/visual_utils.h",
"+components/safe_browsing/core/common",
"+components/safe_browsing/core/proto/csd.pb.h",
"+components/safe_browsing/core/proto/client_model.pb.h",
"+components/safe_browsing/core/features.h",
"+cc/paint",
"+third_party/smhasher",
]
......
......@@ -16,6 +16,7 @@
#include "base/single_thread_task_runner.h"
#include "base/strings/string_util.h"
#include "base/threading/thread_task_runner_handle.h"
#include "cc/paint/skia_paint_canvas.h"
#include "chrome/common/url_constants.h"
#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/features.h"
......@@ -158,34 +159,67 @@ void PhishingClassifier::DOMExtractionFinished(bool success) {
void PhishingClassifier::TermExtractionFinished(bool success) {
if (success) {
blink::WebLocalFrame* main_frame = render_frame_->GetWebFrame();
// Hash all of the features so that they match the model, then compute
// the score.
FeatureMap hashed_features;
ClientPhishingRequest verdict;
verdict.set_model_version(scorer_->model_version());
verdict.set_url(main_frame->GetDocument().Url().GetString().Utf8());
for (const auto& it : features_->features()) {
bool result = hashed_features.AddRealFeature(
crypto::SHA256HashString(it.first), it.second);
DCHECK(result);
ClientPhishingRequest::Feature* feature = verdict.add_feature_map();
feature->set_name(it.first);
feature->set_value(it.second);
}
for (const auto& it : *shingle_hashes_) {
verdict.add_shingle_hashes(it);
}
float score = static_cast<float>(scorer_->ComputeScore(hashed_features));
verdict.set_client_score(score);
verdict.set_is_phishing(score >= scorer_->threshold_probability());
RunCallback(verdict);
ExtractVisualFeatures();
} else {
RunFailureCallback();
}
}
void PhishingClassifier::ExtractVisualFeatures() {
blink::WebLocalFrame* frame = render_frame_->GetWebFrame();
gfx::Rect bounds = gfx::Rect(0, 0, frame->DocumentSize().width,
frame->DocumentSize().height);
bitmap_ = std::make_unique<SkBitmap>();
// Use the Rec. 2020 color space, in case the user input is wide-gamut.
sk_sp<SkColorSpace> rec2020 = SkColorSpace::MakeRGB(
{2.22222f, 0.909672f, 0.0903276f, 0.222222f, 0.0812429f, 0, 0},
SkNamedGamut::kRec2020);
SkImageInfo bitmap_info = SkImageInfo::Make(
bounds.width(), bounds.height(), SkColorType::kRGBA_8888_SkColorType,
SkAlphaType::kUnpremul_SkAlphaType, rec2020);
if (!bitmap_->tryAllocPixels(bitmap_info))
return VisualExtractionFinished(/*success=*/false);
SkCanvas sk_canvas(*bitmap_);
cc::SkiaPaintCanvas cc_canvas(&sk_canvas);
VisualExtractionFinished(frame->CapturePaintPreview(bounds, &cc_canvas));
}
void PhishingClassifier::VisualExtractionFinished(bool success) {
if (!success) {
RunFailureCallback();
return;
}
blink::WebLocalFrame* main_frame = render_frame_->GetWebFrame();
// Hash all of the features so that they match the model, then compute
// the score.
FeatureMap hashed_features;
ClientPhishingRequest verdict;
verdict.set_model_version(scorer_->model_version());
verdict.set_url(main_frame->GetDocument().Url().GetString().Utf8());
for (const auto& it : features_->features()) {
bool result = hashed_features.AddRealFeature(
crypto::SHA256HashString(it.first), it.second);
DCHECK(result);
ClientPhishingRequest::Feature* feature = verdict.add_feature_map();
feature->set_name(it.first);
feature->set_value(it.second);
}
for (const auto& it : *shingle_hashes_) {
verdict.add_shingle_hashes(it);
}
float score = static_cast<float>(scorer_->ComputeScore(hashed_features));
verdict.set_client_score(score);
verdict.set_is_phishing(score >= scorer_->threshold_probability());
if (scorer_->GetMatchingVisualTargets(*bitmap_, &verdict)) {
verdict.set_is_phishing(true);
}
RunCallback(verdict);
}
void PhishingClassifier::CheckNoPendingClassification() {
DCHECK(done_callback_.is_null());
DCHECK(!page_text_);
......@@ -215,6 +249,7 @@ void PhishingClassifier::Clear() {
done_callback_.Reset();
features_.reset(nullptr);
shingle_hashes_.reset(nullptr);
bitmap_.reset(nullptr);
}
} // namespace safe_browsing
......@@ -27,6 +27,7 @@
#include "base/macros.h"
#include "base/memory/weak_ptr.h"
#include "base/strings/string16.h"
#include "third_party/skia/include/core/SkBitmap.h"
namespace content {
class RenderFrame;
......@@ -110,10 +111,18 @@ class PhishingClassifier {
void DOMExtractionFinished(bool success);
// Callback to be run when term feature extraction is complete.
// If it was successful, begins visual feature extraction, otherwise runs the
// DoneCallback with a non-phishy verdict.
void TermExtractionFinished(bool success);
// Called to extract the visual features of the current page.
void ExtractVisualFeatures();
// Callback when visual feature extraction is complete.
// If it was successful, computes a score and runs the DoneCallback.
// If extraction was unsuccessful, runs the DoneCallback with a
// non-phishy verdict.
void TermExtractionFinished(bool success);
void VisualExtractionFinished(bool success);
// Helper to verify that there is no pending phishing classification. Dies
// in debug builds if the state is not as expected. This is a no-op in
......@@ -141,6 +150,7 @@ class PhishingClassifier {
std::unique_ptr<FeatureMap> features_;
std::unique_ptr<std::set<uint32_t>> shingle_hashes_;
const base::string16* page_text_; // owned by the caller
std::unique_ptr<SkBitmap> bitmap_;
DoneCallback done_callback_;
// Used in scheduling BeginFeatureExtraction tasks.
......
......@@ -14,6 +14,7 @@
#include "base/metrics/histogram_macros.h"
#include "base/strings/string_piece.h"
#include "chrome/renderer/safe_browsing/features.h"
#include "components/safe_browsing/content/password_protection/visual_utils.h"
#include "components/safe_browsing/core/proto/client_model.pb.h"
namespace {
......@@ -86,6 +87,21 @@ double Scorer::ComputeScore(const FeatureMap& features) const {
return LogOdds2Prob(logodds);
}
bool Scorer::GetMatchingVisualTargets(const SkBitmap& bitmap,
ClientPhishingRequest* request) const {
bool has_match = false;
for (const VisualTarget& target : model_.vision_model().targets()) {
base::Optional<VisionMatchResult> result =
visual_utils::IsVisualMatch(bitmap, target);
if (result.has_value()) {
*request->add_vision_match() = result.value();
has_match = true;
}
}
return has_match;
}
int Scorer::model_version() const {
return model_.version();
}
......
......@@ -23,6 +23,7 @@
#include "base/macros.h"
#include "base/strings/string_piece.h"
#include "components/safe_browsing/core/proto/client_model.pb.h"
#include "third_party/skia/include/core/SkBitmap.h"
namespace safe_browsing {
class FeatureMap;
......@@ -41,6 +42,11 @@ class Scorer {
// (range is inclusive on both ends).
virtual double ComputeScore(const FeatureMap& features) const;
// This method matches the given |bitmap| against the visual model. It returns
// true if any visual target matches, and populates |request| appropriately.
virtual bool GetMatchingVisualTargets(const SkBitmap& bitmap,
ClientPhishingRequest* request) const;
// Returns the version number of the loaded client model.
int model_version() const;
......
......@@ -59,9 +59,39 @@ class PhishingScorerTest : public ::testing::Test {
model_.set_murmur_hash_seed(12345U);
model_.set_max_shingles_per_page(10);
model_.set_shingle_size(3);
// The first target hash is all 1-bits, except the first 8.
std::vector<unsigned char> target_hash;
target_hash.push_back('\x30');
for (int i = 0; i < 288; i++)
target_hash.push_back('\xff');
target_hash[1] = '\x00';
VisualTarget* target1 = model_.mutable_vision_model()->add_targets();
target1->set_digest("target1");
target1->set_hash(target_hash.data(), target_hash.size());
target1->mutable_match_config()->add_match_rule()->set_hash_distance(8.0);
// The second target hash is all 1-bits, except the second 8.
target_hash[1] = '\xff';
target_hash[2] = '\x00';
VisualTarget* target2 = model_.mutable_vision_model()->add_targets();
target2->set_digest("target2");
target2->set_hash(target_hash.data(), target_hash.size());
target2->mutable_match_config()->add_match_rule()->set_hash_distance(8.0);
// Allocate a bitmap for testing visual scoring
sk_sp<SkColorSpace> rec2020 = SkColorSpace::MakeRGB(
{2.22222f, 0.909672f, 0.0903276f, 0.222222f, 0.0812429f, 0, 0},
SkNamedGamut::kRec2020);
SkImageInfo bitmap_info =
SkImageInfo::Make(1000, 1000, SkColorType::kRGBA_8888_SkColorType,
SkAlphaType::kUnpremul_SkAlphaType, rec2020);
ASSERT_TRUE(bitmap_.tryAllocPixels(bitmap_info));
}
ClientSideModel model_;
SkBitmap bitmap_;
};
TEST_F(PhishingScorerTest, HasValidModel) {
......@@ -145,4 +175,48 @@ TEST_F(PhishingScorerTest, ComputeScore) {
EXPECT_TRUE(features.AddBooleanFeature("feature2"));
EXPECT_DOUBLE_EQ(0.77729986117469119, scorer->ComputeScore(features));
}
TEST_F(PhishingScorerTest, GetMatchingVisualTargetsMatchOne) {
std::unique_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
// Make the whole image white
for (int x = 0; x < 1000; x++)
for (int y = 0; y < 1000; y++)
*bitmap_.getAddr32(x, y) = 0xffffffff;
// Make the first 164 pixels black. This will make the first 8 bits of the
// hash 0.
for (int x = 0; x < 164; x++)
*bitmap_.getAddr32(x, 0) = 0xff000000;
ClientPhishingRequest request;
scorer->GetMatchingVisualTargets(bitmap_, &request);
ASSERT_EQ(request.vision_match_size(), 1);
EXPECT_EQ(request.vision_match(0).matched_target_digest(), "target1");
}
TEST_F(PhishingScorerTest, GetMatchingVisualTargetsMatchBoth) {
std::unique_ptr<Scorer> scorer(Scorer::Create(model_.SerializeAsString()));
// Make the whole image white
for (int x = 0; x < 1000; x++)
for (int y = 0; y < 1000; y++)
*bitmap_.getAddr32(x, y) = 0xffffffff;
// Create an alternating black/white pattern to match both targets. The
// pattern is 84 black pixels, then 84 white, then 84 black, then 84 white.
// This causes the hash to start 0F0F, for a distance of 8 from both targets.
for (int x = 0; x < 84; x++)
*bitmap_.getAddr32(x, 0) = 0xff000000;
for (int x = 168; x < 248; x++)
*bitmap_.getAddr32(x, 0) = 0xff000000;
ClientPhishingRequest request;
scorer->GetMatchingVisualTargets(bitmap_, &request);
ASSERT_EQ(request.vision_match_size(), 2);
EXPECT_EQ(request.vision_match(0).matched_target_digest(), "target1");
EXPECT_EQ(request.vision_match(1).matched_target_digest(), "target2");
}
} // namespace safe_browsing
......@@ -304,17 +304,19 @@ std::unique_ptr<SkBitmap> BlockMeanAverage(const SkBitmap& image,
return target;
}
bool IsVisualMatch(const SkBitmap& image, const VisualTarget& target) {
base::Optional<VisionMatchResult> IsVisualMatch(const SkBitmap& image,
const VisualTarget& target) {
VisualFeatures::BlurredImage blurred_image;
if (!GetBlurredImage(image, &blurred_image))
return false;
return base::nullopt;
std::string hash = GetHashFromBlurredImage(blurred_image);
size_t hash_distance;
bool has_hash_distance = GetHashDistance(hash, target.hash(), &hash_distance);
VisualFeatures::ColorHistogram histogram;
if (!GetHistogramForImage(image, &histogram))
return false;
return base::nullopt;
opencv::PointDistribution point_distribution =
HistogramBinsToPointDistribution(histogram.bins());
base::Optional<double> color_distance = opencv::EMD(
......@@ -337,11 +339,17 @@ bool IsVisualMatch(const SkBitmap& image, const VisualTarget& target) {
}
if (is_match) {
return true;
VisionMatchResult result;
result.set_matched_target_digest(target.digest());
if (has_hash_distance)
result.set_vision_matched_phash_score(hash_distance);
if (color_distance.has_value())
result.set_vision_matched_emd_score(color_distance.value());
return result;
}
}
return false;
return base::nullopt;
}
} // namespace visual_utils
......
......@@ -7,6 +7,7 @@
#include <string>
#include "base/optional.h"
#include "components/safe_browsing/core/proto/client_model.pb.h"
#include "components/safe_browsing/core/proto/csd.pb.h"
#include "third_party/skia/include/core/SkBitmap.h"
......@@ -40,8 +41,10 @@ bool GetBlurredImage(const SkBitmap& image,
std::unique_ptr<SkBitmap> BlockMeanAverage(const SkBitmap& image,
int block_size);
// Returns whether the given |image| is a match for the |target|.
bool IsVisualMatch(const SkBitmap& image, const VisualTarget& target);
// Returns whether the given |image| is a match for the |target|. Returns
// nullopt in the case of no match, and the VisionMatchResult if it is a match.
base::Optional<VisionMatchResult> IsVisualMatch(const SkBitmap& image,
const VisualTarget& target);
} // namespace visual_utils
} // namespace safe_browsing
......
......@@ -267,7 +267,7 @@ TEST_F(VisualUtilsTest, IsVisualMatchHash) {
VisualTarget target;
target.set_hash(target_hash.data(), target_hash.size());
target.mutable_match_config()->add_match_rule()->set_hash_distance(0.0);
EXPECT_TRUE(IsVisualMatch(bitmap_, target));
EXPECT_TRUE(IsVisualMatch(bitmap_, target).has_value());
}
{
......@@ -287,7 +287,7 @@ TEST_F(VisualUtilsTest, IsVisualMatchHash) {
target.set_hash(target_hash.data(), target_hash.size());
target.mutable_match_config()->add_match_rule()->set_hash_distance(0.0);
EXPECT_TRUE(IsVisualMatch(bitmap_, target));
EXPECT_TRUE(IsVisualMatch(bitmap_, target).has_value());
}
}
......@@ -311,9 +311,9 @@ TEST_F(VisualUtilsTest, IsVisualMatchHashPartialMatch) {
VisualTarget target;
target.set_hash(target_hash.data(), target_hash.size());
target.mutable_match_config()->add_match_rule()->set_hash_distance(23.0);
EXPECT_FALSE(IsVisualMatch(bitmap_, target));
EXPECT_FALSE(IsVisualMatch(bitmap_, target).has_value());
target.mutable_match_config()->add_match_rule()->set_hash_distance(24.0);
EXPECT_TRUE(IsVisualMatch(bitmap_, target));
EXPECT_TRUE(IsVisualMatch(bitmap_, target).has_value());
}
TEST_F(VisualUtilsTest, IsVisualMatchHashStrideComparison) {
......@@ -329,11 +329,11 @@ TEST_F(VisualUtilsTest, IsVisualMatchHashStrideComparison) {
VisualTarget target;
target.set_hash(target_hash.data(), target_hash.size());
target.mutable_match_config()->add_match_rule()->set_hash_distance(0.0);
EXPECT_TRUE(IsVisualMatch(bitmap_, target));
EXPECT_TRUE(IsVisualMatch(bitmap_, target).has_value());
target_hash[0] = '\x00';
target.set_hash(target_hash.data(), target_hash.size());
EXPECT_FALSE(IsVisualMatch(bitmap_, target));
EXPECT_FALSE(IsVisualMatch(bitmap_, target).has_value());
}
TEST_F(VisualUtilsTest, IsVisualMatchHistogramOnly) {
......@@ -353,7 +353,7 @@ TEST_F(VisualUtilsTest, IsVisualMatchHistogramOnly) {
bin->set_quantized_b(7);
bin->set_weight(1.0);
target.mutable_match_config()->add_match_rule()->set_color_distance(0.0);
EXPECT_TRUE(IsVisualMatch(bitmap_, target));
EXPECT_TRUE(IsVisualMatch(bitmap_, target).has_value());
}
{
......@@ -370,10 +370,10 @@ TEST_F(VisualUtilsTest, IsVisualMatchHistogramOnly) {
MatchRule* match_rule = target.mutable_match_config()->add_match_rule();
match_rule->set_color_distance(0.5);
EXPECT_TRUE(IsVisualMatch(bitmap_, target));
EXPECT_TRUE(IsVisualMatch(bitmap_, target).has_value());
match_rule->set_color_distance(0.4);
EXPECT_FALSE(IsVisualMatch(bitmap_, target));
EXPECT_FALSE(IsVisualMatch(bitmap_, target).has_value());
}
{
......@@ -390,10 +390,10 @@ TEST_F(VisualUtilsTest, IsVisualMatchHistogramOnly) {
MatchRule* match_rule = target.mutable_match_config()->add_match_rule();
match_rule->set_color_distance(0.2);
EXPECT_TRUE(IsVisualMatch(bitmap_, target));
EXPECT_TRUE(IsVisualMatch(bitmap_, target).has_value());
match_rule->set_color_distance(0.1);
EXPECT_FALSE(IsVisualMatch(bitmap_, target));
EXPECT_FALSE(IsVisualMatch(bitmap_, target).has_value());
}
}
......@@ -413,21 +413,21 @@ TEST_F(VisualUtilsTest, IsVisualMatchColorRange) {
color_range->set_high(target_hue);
// Blue hue present
EXPECT_TRUE(IsVisualMatch(bitmap_, target));
EXPECT_TRUE(IsVisualMatch(bitmap_, target).has_value());
// Color range too high
color_range->set_low(target_hue + 1);
color_range->set_high(target_hue + 1);
EXPECT_FALSE(IsVisualMatch(bitmap_, target));
EXPECT_FALSE(IsVisualMatch(bitmap_, target).has_value());
// Color range too low
color_range->set_low(target_hue - 1);
color_range->set_high(target_hue - 1);
EXPECT_FALSE(IsVisualMatch(bitmap_, target));
EXPECT_FALSE(IsVisualMatch(bitmap_, target).has_value());
// No blue hue present
*bitmap_.getAddr32(0, 0) = kWhite;
EXPECT_FALSE(IsVisualMatch(bitmap_, target));
EXPECT_FALSE(IsVisualMatch(bitmap_, target).has_value());
}
TEST_F(VisualUtilsTest, IsVisualMatchMultipleColorRanges) {
......@@ -453,20 +453,20 @@ TEST_F(VisualUtilsTest, IsVisualMatchMultipleColorRanges) {
color_range->set_high(green_hue);
// Both hues present
EXPECT_TRUE(IsVisualMatch(bitmap_, target));
EXPECT_TRUE(IsVisualMatch(bitmap_, target).has_value());
// No blue hue present
*bitmap_.getAddr32(0, 0) = kWhite;
EXPECT_FALSE(IsVisualMatch(bitmap_, target));
EXPECT_FALSE(IsVisualMatch(bitmap_, target).has_value());
// No green hue present
*bitmap_.getAddr32(0, 0) = kBlue;
*bitmap_.getAddr32(1, 0) = kWhite;
EXPECT_FALSE(IsVisualMatch(bitmap_, target));
EXPECT_FALSE(IsVisualMatch(bitmap_, target).has_value());
// Neither hue present
*bitmap_.getAddr32(0, 0) = kWhite;
EXPECT_FALSE(IsVisualMatch(bitmap_, target));
EXPECT_FALSE(IsVisualMatch(bitmap_, target).has_value());
}
TEST_F(VisualUtilsTest, IsVisualMatchMultipleMatchRules) {
......@@ -495,20 +495,20 @@ TEST_F(VisualUtilsTest, IsVisualMatchMultipleMatchRules) {
color_range->set_high(green_hue);
// Both hues present
EXPECT_TRUE(IsVisualMatch(bitmap_, target));
EXPECT_TRUE(IsVisualMatch(bitmap_, target).has_value());
// No blue hue present
*bitmap_.getAddr32(0, 0) = kWhite;
EXPECT_TRUE(IsVisualMatch(bitmap_, target));
EXPECT_TRUE(IsVisualMatch(bitmap_, target).has_value());
// No green hue present
*bitmap_.getAddr32(0, 0) = kBlue;
*bitmap_.getAddr32(1, 0) = kWhite;
EXPECT_TRUE(IsVisualMatch(bitmap_, target));
EXPECT_TRUE(IsVisualMatch(bitmap_, target).has_value());
// Neither hue present
*bitmap_.getAddr32(0, 0) = kWhite;
EXPECT_FALSE(IsVisualMatch(bitmap_, target));
EXPECT_FALSE(IsVisualMatch(bitmap_, target).has_value());
}
} // namespace visual_utils
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment