Add field is_dom_match to ClientPhishingRequest

This CL adds a new boolean field is_dom_match to CSD pings. This makes it easier to separate out the pings due to the DOM model from the ones due to the visual model. In principle, this can be computed by comparing the score in the ping to the model threshold, but this process is fairly complex due to the potential for models to change on the server-side. Change-Id: I8685f5795eec41dddb433fe8f6dec5a4f30bee0c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2610951Reviewed-by: Bettina Dea <bdea@chromium.org> Commit-Queue: Daniel Rubery <drubery@chromium.org> Cr-Commit-Position: refs/heads/master@{#840467}

Add field is_dom_match to ClientPhishingRequest
This CL adds a new boolean field is_dom_match to CSD pings. This makes it easier to separate out the pings due to the DOM model from the ones due to the visual model. In principle, this can be computed by comparing the score in the ping to the model threshold, but this process is fairly complex due to the potential for models to change on the server-side. Change-Id: I8685f5795eec41dddb433fe8f6dec5a4f30bee0c Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2610951Reviewed-by: Bettina Dea <bdea@chromium.org> Commit-Queue: Daniel Rubery <drubery@chromium.org> Cr-Commit-Position: refs/heads/master@{#840467}
80665dbd · Daniel Rubery · Chromium LUCI CQ · e5b17137 · 80665dbd · 80665dbd
Commit 80665dbd authored Jan 06, 2021 by Daniel Rubery Committed by Chromium LUCI CQ Jan 06, 2021
3 changed files
--- a/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
+++ b/chrome/renderer/safe_browsing/phishing_classifier_browsertest.cc
@@ -139,6 +139,7 @@ class PhishingClassifierTest : public ChromeRenderViewTest {
                                  verdict.feature_map(i).value());
    }
    is_phishing_ = verdict.is_phishing();
+    is_dom_match_ = verdict.is_dom_match();
    screenshot_digest_ = verdict.screenshot_digest();
    screenshot_phash_ = verdict.screenshot_phash();
    phash_dimension_size_ = verdict.phash_dimension_size();
@@ -174,6 +175,7 @@ class PhishingClassifierTest : public ChromeRenderViewTest {
  std::string screenshot_digest_;
  std::string screenshot_phash_;
  int phash_dimension_size_;
+  bool is_dom_match_;
  // A DiscardableMemoryAllocator is needed for certain Skia operations.
  base::TestDiscardableMemoryAllocator test_allocator_;
@@ -292,6 +294,25 @@ TEST_F(PhishingClassifierTest, TestSendsVisualDigest) {
 }
 #endif
+TEST_F(PhishingClassifierTest, TestPhishingPagesAreDomMatches) {
+  LoadHtml(
+      GURL("http://host.net"),
+      "<html><body><a href=\"http://phishing.com/\">login</a></body></html>");
+  RunPhishingClassifier(&page_text_);
+  EXPECT_TRUE(is_phishing_);
+  EXPECT_TRUE(is_dom_match_);
+}
+TEST_F(PhishingClassifierTest, TestSafePagesAreNotDomMatches) {
+  LoadHtml(GURL("http://host.net"),
+           "<html><body><a href=\"http://safe.com/\">login</a></body></html>");
+  RunPhishingClassifier(&page_text_);
+  EXPECT_FALSE(is_phishing_);
+  EXPECT_FALSE(is_dom_match_);
+}
 // TODO(jialiul): Add test to verify that classification only starts on GET
 // method. It seems there is no easy way to simulate a HTTP POST in
 // ChromeRenderViewTest.

--- a/components/safe_browsing/content/renderer/phishing_classifier/phishing_classifier.cc
+++ b/components/safe_browsing/content/renderer/phishing_classifier/phishing_classifier.cc
@@ -231,7 +231,9 @@ void PhishingClassifier::VisualExtractionFinished(bool success) {
  }
  float score = static_cast<float>(scorer_->ComputeScore(hashed_features));
  verdict->set_client_score(score);
-  verdict->set_is_phishing(score >= scorer_->threshold_probability());
+  bool is_dom_match = (score >= scorer_->threshold_probability());
+  verdict->set_is_phishing(is_dom_match);
+  verdict->set_is_dom_match(is_dom_match);
 #if BUILDFLAG(FULL_SAFE_BROWSING)
  visual_matching_start_ = base::TimeTicks::Now();

--- a/components/safe_browsing/core/proto/csd.proto
+++ b/components/safe_browsing/core/proto/csd.proto
@@ -147,6 +147,11 @@ message ClientPhishingRequest {
  // The visual feature match results.
  // Allow multiple results if there are more than one matched target.
  repeated VisionMatchResult vision_match = 20;
+  // Indicates whether the request is due to a match on the DOM features.
+  optional bool is_dom_match = 21;
+  // next available tag number: 22.
 }
 // Vision match result for one target image.