Commit 80665dbd authored by Daniel Rubery's avatar Daniel Rubery Committed by Chromium LUCI CQ

Add field is_dom_match to ClientPhishingRequest

This CL adds a new boolean field is_dom_match to CSD pings. This makes
it easier to separate out the pings due to the DOM model from the ones
due to the visual model. In principle, this can be computed by comparing
the score in the ping to the model threshold, but this process is fairly
complex due to the potential for models to change on the server-side.

Change-Id: I8685f5795eec41dddb433fe8f6dec5a4f30bee0c
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2610951Reviewed-by: default avatarBettina Dea <bdea@chromium.org>
Commit-Queue: Daniel Rubery <drubery@chromium.org>
Cr-Commit-Position: refs/heads/master@{#840467}
parent e5b17137
...@@ -139,6 +139,7 @@ class PhishingClassifierTest : public ChromeRenderViewTest { ...@@ -139,6 +139,7 @@ class PhishingClassifierTest : public ChromeRenderViewTest {
verdict.feature_map(i).value()); verdict.feature_map(i).value());
} }
is_phishing_ = verdict.is_phishing(); is_phishing_ = verdict.is_phishing();
is_dom_match_ = verdict.is_dom_match();
screenshot_digest_ = verdict.screenshot_digest(); screenshot_digest_ = verdict.screenshot_digest();
screenshot_phash_ = verdict.screenshot_phash(); screenshot_phash_ = verdict.screenshot_phash();
phash_dimension_size_ = verdict.phash_dimension_size(); phash_dimension_size_ = verdict.phash_dimension_size();
...@@ -174,6 +175,7 @@ class PhishingClassifierTest : public ChromeRenderViewTest { ...@@ -174,6 +175,7 @@ class PhishingClassifierTest : public ChromeRenderViewTest {
std::string screenshot_digest_; std::string screenshot_digest_;
std::string screenshot_phash_; std::string screenshot_phash_;
int phash_dimension_size_; int phash_dimension_size_;
bool is_dom_match_;
// A DiscardableMemoryAllocator is needed for certain Skia operations. // A DiscardableMemoryAllocator is needed for certain Skia operations.
base::TestDiscardableMemoryAllocator test_allocator_; base::TestDiscardableMemoryAllocator test_allocator_;
...@@ -292,6 +294,25 @@ TEST_F(PhishingClassifierTest, TestSendsVisualDigest) { ...@@ -292,6 +294,25 @@ TEST_F(PhishingClassifierTest, TestSendsVisualDigest) {
} }
#endif #endif
TEST_F(PhishingClassifierTest, TestPhishingPagesAreDomMatches) {
LoadHtml(
GURL("http://host.net"),
"<html><body><a href=\"http://phishing.com/\">login</a></body></html>");
RunPhishingClassifier(&page_text_);
EXPECT_TRUE(is_phishing_);
EXPECT_TRUE(is_dom_match_);
}
TEST_F(PhishingClassifierTest, TestSafePagesAreNotDomMatches) {
LoadHtml(GURL("http://host.net"),
"<html><body><a href=\"http://safe.com/\">login</a></body></html>");
RunPhishingClassifier(&page_text_);
EXPECT_FALSE(is_phishing_);
EXPECT_FALSE(is_dom_match_);
}
// TODO(jialiul): Add test to verify that classification only starts on GET // TODO(jialiul): Add test to verify that classification only starts on GET
// method. It seems there is no easy way to simulate a HTTP POST in // method. It seems there is no easy way to simulate a HTTP POST in
// ChromeRenderViewTest. // ChromeRenderViewTest.
......
...@@ -231,7 +231,9 @@ void PhishingClassifier::VisualExtractionFinished(bool success) { ...@@ -231,7 +231,9 @@ void PhishingClassifier::VisualExtractionFinished(bool success) {
} }
float score = static_cast<float>(scorer_->ComputeScore(hashed_features)); float score = static_cast<float>(scorer_->ComputeScore(hashed_features));
verdict->set_client_score(score); verdict->set_client_score(score);
verdict->set_is_phishing(score >= scorer_->threshold_probability()); bool is_dom_match = (score >= scorer_->threshold_probability());
verdict->set_is_phishing(is_dom_match);
verdict->set_is_dom_match(is_dom_match);
#if BUILDFLAG(FULL_SAFE_BROWSING) #if BUILDFLAG(FULL_SAFE_BROWSING)
visual_matching_start_ = base::TimeTicks::Now(); visual_matching_start_ = base::TimeTicks::Now();
......
...@@ -147,6 +147,11 @@ message ClientPhishingRequest { ...@@ -147,6 +147,11 @@ message ClientPhishingRequest {
// The visual feature match results. // The visual feature match results.
// Allow multiple results if there are more than one matched target. // Allow multiple results if there are more than one matched target.
repeated VisionMatchResult vision_match = 20; repeated VisionMatchResult vision_match = 20;
// Indicates whether the request is due to a match on the DOM features.
optional bool is_dom_match = 21;
// next available tag number: 22.
} }
// Vision match result for one target image. // Vision match result for one target image.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment