Log cache false positive rate of real time URL check.

If a cache is hit during real time URL check, we'll fall back to hash-based check. We'd like to know how often that the cache is safe while the response from hash-based check is unsafe. Metrics added in this CL: SafeBrowsing.RT.GetCache.FalsePositive Bug: 1157331 Change-Id: Ib91e8c6ae2149645f31e69252d5f2f74594cb6de Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2594095Reviewed-by: Daniel Rubery <drubery@chromium.org> Reviewed-by: Ilya Sherman <isherman@chromium.org> Commit-Queue: Xinghui Lu <xinghuilu@chromium.org> Cr-Commit-Position: refs/heads/master@{#838188}

Log cache false positive rate of real time URL check.
If a cache is hit during real time URL check, we'll fall back to hash-based check. We'd like to know how often that the cache is safe while the response from hash-based check is unsafe. Metrics added in this CL: SafeBrowsing.RT.GetCache.FalsePositive Bug: 1157331 Change-Id: Ib91e8c6ae2149645f31e69252d5f2f74594cb6de Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2594095Reviewed-by: Daniel Rubery <drubery@chromium.org> Reviewed-by: Ilya Sherman <isherman@chromium.org> Commit-Queue: Xinghui Lu <xinghuilu@chromium.org> Cr-Commit-Position: refs/heads/master@{#838188}
31e5ea78 · Xinghui Lu · Chromium LUCI CQ · 9c240e22 · 31e5ea78 · 31e5ea78
Commit 31e5ea78 authored Dec 17, 2020 by Xinghui Lu Committed by Chromium LUCI CQ Dec 17, 2020
6 changed files
--- a/components/safe_browsing/core/browser/safe_browsing_url_checker_impl.cc
+++ b/components/safe_browsing/core/browser/safe_browsing_url_checker_impl.cc
@@ -13,6 +13,7 @@
 #include "components/safe_browsing/core/browser/url_checker_delegate.h"
 #include "components/safe_browsing/core/common/safebrowsing_constants.h"
 #include "components/safe_browsing/core/common/thread_utils.h"
+#include "components/safe_browsing/core/db/v4_protocol_manager_util.h"
 #include "components/safe_browsing/core/features.h"
 #include "components/safe_browsing/core/realtime/policy_engine.h"
 #include "components/safe_browsing/core/realtime/url_lookup_service_base.h"
@@ -87,8 +88,12 @@ void SafeBrowsingUrlCheckerImpl::Notifier::OnCompleteCheck(

 SafeBrowsingUrlCheckerImpl::UrlInfo::UrlInfo(const GURL& in_url,
                                             const std::string& in_method,
-                                             Notifier in_notifier)
-    : url(in_url), method(in_method), notifier(std::move(in_notifier)) {}
+                                             Notifier in_notifier,
+                                             bool in_is_cached_safe_url)
+    : url(in_url),
+      method(in_method),
+      notifier(std::move(in_notifier)),
+      is_cached_safe_url(in_is_cached_safe_url) {}

 SafeBrowsingUrlCheckerImpl::UrlInfo::UrlInfo(UrlInfo&& other) = default;

@@ -215,6 +220,10 @@ void SafeBrowsingUrlCheckerImpl::OnUrlResult(const GURL& url,

  timer_.Stop();
  RecordCheckUrlTimeout(/*timed_out=*/false);
+  if (urls_[next_index_].is_cached_safe_url) {
+    UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.RT.GetCache.FallbackThreatType",
+                              threat_type, SB_THREAT_TYPE_MAX + 1);
+  }

  TRACE_EVENT_ASYNC_END1("safe_browsing", "CheckUrl", this, "url", url.spec());

@@ -304,7 +313,8 @@ void SafeBrowsingUrlCheckerImpl::CheckUrlImpl(const GURL& url,
  DCHECK(CurrentlyOnThread(ThreadID::IO));

  DVLOG(1) << "SafeBrowsingUrlCheckerImpl checks URL: " << url;
-  urls_.emplace_back(url, method, std::move(notifier));
+  urls_.emplace_back(url, method, std::move(notifier),
+                     /*safe_from_real_time_cache=*/false);

  ProcessUrls();
 }
@@ -616,14 +626,12 @@ void SafeBrowsingUrlCheckerImpl::OnRTLookupResponse(
  if (response && (response->threat_info_size() > 0) &&
      response->threat_info(0).verdict_type() ==
          RTLookupResponse::ThreatInfo::DANGEROUS) {
-    // TODO(crbug.com/1033692): Only take the first threat info into account
-    // because threat infos are returned in decreasing order of severity.
-    // Consider extend it to support multiple threat types.
    sb_threat_type =
        RealTimeUrlLookupServiceBase::GetSBThreatTypeForRTThreatType(
            response->threat_info(0).threat_type());
  }
  if (is_cached_response && sb_threat_type == SB_THREAT_TYPE_SAFE) {
+    urls_[next_index_].is_cached_safe_url = true;
    PerformHashBasedCheck(url);
  } else {
    OnUrlResult(url, sb_threat_type, ThreatMetadata(),

--- a/components/safe_browsing/core/browser/safe_browsing_url_checker_impl.h
+++ b/components/safe_browsing/core/browser/safe_browsing_url_checker_impl.h
@@ -230,7 +230,10 @@ class SafeBrowsingUrlCheckerImpl : public mojom::SafeBrowsingUrlChecker,
  };

  struct UrlInfo {
-    UrlInfo(const GURL& url, const std::string& method, Notifier notifier);
+    UrlInfo(const GURL& url,
+            const std::string& method,
+            Notifier notifier,
+            bool is_cached_safe_url);
    UrlInfo(UrlInfo&& other);

    ~UrlInfo();
@@ -238,6 +241,9 @@ class SafeBrowsingUrlCheckerImpl : public mojom::SafeBrowsingUrlChecker,
    GURL url;
    std::string method;
    Notifier notifier;
+    // If the URL is classified as safe in cache manager during real time
+    // lookup.
+    bool is_cached_safe_url;
  };

  const net::HttpRequestHeaders headers_;

--- a/components/safe_browsing/core/browser/safe_browsing_url_checker_impl_unittest.cc
+++ b/components/safe_browsing/core/browser/safe_browsing_url_checker_impl_unittest.cc
@@ -7,12 +7,14 @@

 #include "base/run_loop.h"
 #include "base/task/post_task.h"
+#include "base/test/metrics/histogram_tester.h"
 #include "base/test/mock_callback.h"
 #include "base/test/task_environment.h"
 #include "components/safe_browsing/core/browser/url_checker_delegate.h"
 #include "components/safe_browsing/core/common/test_task_environment.h"
 #include "components/safe_browsing/core/common/thread_utils.h"
 #include "components/safe_browsing/core/db/test_database_manager.h"
+#include "components/safe_browsing/core/db/v4_protocol_manager_util.h"
 #include "components/safe_browsing/core/proto/csd.pb.h"
 #include "components/safe_browsing/core/realtime/url_lookup_service.h"
 #include "services/network/public/cpp/shared_url_loader_factory.h"
@@ -203,19 +205,24 @@ class MockRealTimeUrlLookupService : public RealTimeUrlLookupService {
    threat_info.set_threat_type(threat_type);
    threat_info.set_verdict_type(verdict_type);
    *new_threat_info = threat_info;
-    base::PostTask(
-        FROM_HERE, CreateTaskTraits(ThreadID::IO),
-        base::BindOnce(std::move(response_callback),
-                       /* is_rt_lookup_successful */ true,
-                       /* is_cached_response */ false, std::move(response)));
+    base::PostTask(FROM_HERE, CreateTaskTraits(ThreadID::IO),
+                   base::BindOnce(std::move(response_callback),
+                                  /* is_rt_lookup_successful */ true,
+                                  /* is_cached_response */ is_cached_response_,
+                                  std::move(response)));
  }

  void SetThreatTypeForUrl(const GURL& gurl, SBThreatType threat_type) {
    urls_threat_type_[gurl.spec()] = threat_type;
  }

+  void SetIsCachedResponse(bool is_cached_response) {
+    is_cached_response_ = is_cached_response;
+  }
+
 private:
  base::flat_map<std::string, SBThreatType> urls_threat_type_;
+  bool is_cached_response_ = false;
 };

 class SafeBrowsingUrlCheckerTest : public PlatformTest {
@@ -384,6 +391,7 @@ TEST_F(SafeBrowsingUrlCheckerTest, CheckUrl_RealTimeEnabledAllowlistMatch) {
 }

 TEST_F(SafeBrowsingUrlCheckerTest, CheckUrl_RealTimeEnabledSafeUrl) {
+  base::HistogramTester histograms;
  auto safe_browsing_url_checker = CreateSafeBrowsingUrlChecker(
      /*real_time_lookup_enabled=*/true, /*can_check_safe_browsing_db=*/true);

@@ -401,6 +409,67 @@ TEST_F(SafeBrowsingUrlCheckerTest, CheckUrl_RealTimeEnabledSafeUrl) {
  safe_browsing_url_checker->CheckUrl(url, "GET", callback.Get());

  task_environment_->RunUntilIdle();
+
+  // The false positive metric should not be logged, because the
+  // verdict is not from cache.
+  histograms.ExpectTotalCount("SafeBrowsing.RT.GetCache.FallbackThreatType",
+                              /* total_count */ 0);
+}
+
+TEST_F(SafeBrowsingUrlCheckerTest, CheckUrl_RealTimeEnabledSafeUrlFromCache) {
+  base::HistogramTester histograms;
+  auto safe_browsing_url_checker = CreateSafeBrowsingUrlChecker(
+      /*real_time_lookup_enabled=*/true, /*can_check_safe_browsing_db=*/true);
+
+  GURL url("https://example.test/");
+  database_manager_->SetAllowlistResultForUrl(url, AsyncMatch::NO_MATCH);
+  database_manager_->SetThreatTypeForUrl(url, SB_THREAT_TYPE_SAFE,
+                                         /*delayed_callback=*/false);
+  url_lookup_service_->SetThreatTypeForUrl(url, SB_THREAT_TYPE_SAFE);
+  url_lookup_service_->SetIsCachedResponse(true);
+
+  base::MockCallback<SafeBrowsingUrlCheckerImpl::NativeCheckUrlCallback>
+      callback;
+  EXPECT_CALL(callback,
+              Run(_, /*proceed=*/true, /*showed_interstitial=*/false));
+  EXPECT_CALL(*url_checker_delegate_,
+              StartDisplayingBlockingPageHelper(_, _, _, _, _))
+      .Times(0);
+  safe_browsing_url_checker->CheckUrl(url, "GET", callback.Get());
+
+  task_environment_->RunUntilIdle();
+
+  histograms.ExpectUniqueSample("SafeBrowsing.RT.GetCache.FallbackThreatType",
+                                /* sample */ SB_THREAT_TYPE_SAFE,
+                                /* expected_count */ 1);
+}
+
+TEST_F(SafeBrowsingUrlCheckerTest,
+       CheckUrl_RealTimeEnabledSafeUrlFromCacheFalsePositive) {
+  base::HistogramTester histograms;
+  auto safe_browsing_url_checker = CreateSafeBrowsingUrlChecker(
+      /*real_time_lookup_enabled=*/true, /*can_check_safe_browsing_db=*/true);
+
+  GURL url("https://example.test/");
+  database_manager_->SetAllowlistResultForUrl(url, AsyncMatch::NO_MATCH);
+  database_manager_->SetThreatTypeForUrl(url, SB_THREAT_TYPE_URL_PHISHING,
+                                         /*delayed_callback=*/false);
+  url_lookup_service_->SetThreatTypeForUrl(url, SB_THREAT_TYPE_SAFE);
+  url_lookup_service_->SetIsCachedResponse(true);
+
+  base::MockCallback<SafeBrowsingUrlCheckerImpl::NativeCheckUrlCallback>
+      callback;
+  EXPECT_CALL(*url_checker_delegate_,
+              StartDisplayingBlockingPageHelper(
+                  IsSameThreatSource(ThreatSource::UNKNOWN), _, _, _, _))
+      .Times(1);
+  safe_browsing_url_checker->CheckUrl(url, "GET", callback.Get());
+
+  task_environment_->RunUntilIdle();
+
+  histograms.ExpectUniqueSample("SafeBrowsing.RT.GetCache.FallbackThreatType",
+                                /* sample */ SB_THREAT_TYPE_URL_PHISHING,
+                                /* expected_count */ 1);
 }

 TEST_F(SafeBrowsingUrlCheckerTest,

--- a/components/safe_browsing/core/db/v4_protocol_manager_util.h
+++ b/components/safe_browsing/core/db/v4_protocol_manager_util.h
@@ -94,89 +94,93 @@ std::string GetReportUrl(

 // Different types of threats that SafeBrowsing protects against. This is the
 // type that's returned to the clients of SafeBrowsing in Chromium.
+// These values are persisted to logs. Entries should not be renumbered and
+// numeric values should never be reused.
 // GENERATED_JAVA_ENUM_PACKAGE: org.chromium.components.safe_browsing
 // GENERATED_JAVA_PREFIX_TO_STRIP: SB_THREAT_TYPE_
 enum SBThreatType {
  // This type can be used for lists that can be checked synchronously so a
  // client callback isn't required, or for whitelists.
-  SB_THREAT_TYPE_UNUSED,
+  SB_THREAT_TYPE_UNUSED = 0,

  // No threat at all.
-  SB_THREAT_TYPE_SAFE,
+  SB_THREAT_TYPE_SAFE = 1,

  // The URL is being used for phishing.
-  SB_THREAT_TYPE_URL_PHISHING,
+  SB_THREAT_TYPE_URL_PHISHING = 2,

  // The URL hosts malware.
-  SB_THREAT_TYPE_URL_MALWARE,
+  SB_THREAT_TYPE_URL_MALWARE = 3,

  // The URL hosts unwanted programs.
-  SB_THREAT_TYPE_URL_UNWANTED,
+  SB_THREAT_TYPE_URL_UNWANTED = 4,

  // The download URL is malware.
-  SB_THREAT_TYPE_URL_BINARY_MALWARE,
+  SB_THREAT_TYPE_URL_BINARY_MALWARE = 5,

  // Url detected by the client-side phishing model.  Note that unlike the
  // above values, this does not correspond to a downloaded list.
-  SB_THREAT_TYPE_URL_CLIENT_SIDE_PHISHING,
+  SB_THREAT_TYPE_URL_CLIENT_SIDE_PHISHING = 6,

  // The Chrome extension or app (given by its ID) is malware.
-  SB_THREAT_TYPE_EXTENSION,
+  SB_THREAT_TYPE_EXTENSION = 7,

  // Url detected by the client-side malware IP list. This IP list is part
  // of the client side detection model.
-  SB_THREAT_TYPE_URL_CLIENT_SIDE_MALWARE,
+  SB_THREAT_TYPE_URL_CLIENT_SIDE_MALWARE = 8,

  // Url leads to a blacklisted resource script. Note that no warnings should be
  // shown on this threat type, but an incident report might be sent.
-  SB_THREAT_TYPE_BLACKLISTED_RESOURCE,
+  SB_THREAT_TYPE_BLACKLISTED_RESOURCE = 9,

  // Url abuses a permission API.
-  SB_THREAT_TYPE_API_ABUSE,
+  SB_THREAT_TYPE_API_ABUSE = 10,

  // Activation patterns for the Subresource Filter.
-  SB_THREAT_TYPE_SUBRESOURCE_FILTER,
+  SB_THREAT_TYPE_SUBRESOURCE_FILTER = 11,

  // CSD Phishing whitelist.  This "threat" means a URL matched the whitelist.
-  SB_THREAT_TYPE_CSD_WHITELIST,
+  SB_THREAT_TYPE_CSD_WHITELIST = 12,

  // DEPRECATED. Url detected by password protection service.
-  DEPRECATED_SB_THREAT_TYPE_URL_PASSWORD_PROTECTION_PHISHING,
+  DEPRECATED_SB_THREAT_TYPE_URL_PASSWORD_PROTECTION_PHISHING = 13,

  // Saved password reuse detected on low reputation page,
-  SB_THREAT_TYPE_SAVED_PASSWORD_REUSE,
+  SB_THREAT_TYPE_SAVED_PASSWORD_REUSE = 14,

  // Chrome signed in and syncing gaia password reuse detected on low reputation
  // page,
-  SB_THREAT_TYPE_SIGNED_IN_SYNC_PASSWORD_REUSE,
+  SB_THREAT_TYPE_SIGNED_IN_SYNC_PASSWORD_REUSE = 15,

  // Chrome signed in non syncing gaia password reuse detected on low reputation
  // page,
-  SB_THREAT_TYPE_SIGNED_IN_NON_SYNC_PASSWORD_REUSE,
+  SB_THREAT_TYPE_SIGNED_IN_NON_SYNC_PASSWORD_REUSE = 16,

  // A Google ad that caused a blocked autoredirect was collected
-  SB_THREAT_TYPE_BLOCKED_AD_REDIRECT,
+  SB_THREAT_TYPE_BLOCKED_AD_REDIRECT = 17,

  // A sample of an ad was collected
-  SB_THREAT_TYPE_AD_SAMPLE,
+  SB_THREAT_TYPE_AD_SAMPLE = 18,

  // A report of Google ad that caused a blocked popup was collected.
-  SB_THREAT_TYPE_BLOCKED_AD_POPUP,
+  SB_THREAT_TYPE_BLOCKED_AD_POPUP = 19,

  // The page loaded a resource from the Suspicious Site list.
-  SB_THREAT_TYPE_SUSPICIOUS_SITE,
+  SB_THREAT_TYPE_SUSPICIOUS_SITE = 20,

  // Enterprise password reuse detected on low reputation page.
-  SB_THREAT_TYPE_ENTERPRISE_PASSWORD_REUSE,
+  SB_THREAT_TYPE_ENTERPRISE_PASSWORD_REUSE = 21,

  // Potential billing detected.
-  SB_THREAT_TYPE_BILLING,
+  SB_THREAT_TYPE_BILLING = 22,

  // Off-market APK file downloaded, which could be potentially dangerous.
-  SB_THREAT_TYPE_APK_DOWNLOAD,
+  SB_THREAT_TYPE_APK_DOWNLOAD = 23,

  // Match found in the local high-confidence allowlist.
-  SB_THREAT_TYPE_HIGH_CONFIDENCE_ALLOWLIST,
+  SB_THREAT_TYPE_HIGH_CONFIDENCE_ALLOWLIST = 24,
+
+  SB_THREAT_TYPE_MAX = SB_THREAT_TYPE_HIGH_CONFIDENCE_ALLOWLIST,
 };

 using SBThreatTypeSet = base::flat_set<SBThreatType>;

--- a/tools/metrics/histograms/enums.xml
+++ b/tools/metrics/histograms/enums.xml
@@ -66130,6 +66130,34 @@ https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_2.7.1.pdf
  <int value="4" label="Phishing classifier was destructed"/>
 </enum>

+<enum name="SBThreatType">
+  <int value="0" label="Unused"/>
+  <int value="1" label="Safe"/>
+  <int value="2" label="URL phishing"/>
+  <int value="3" label="URL malware"/>
+  <int value="4" label="URL unwanted"/>
+  <int value="5" label="URL binary malware"/>
+  <int value="6" label="URL client side phishing"/>
+  <int value="7" label="Extension"/>
+  <int value="8" label="URL client side malware"/>
+  <int value="9" label="Blacklisted resource"/>
+  <int value="10" label="API abuse"/>
+  <int value="11" label="Subresource filter"/>
+  <int value="12" label="CSD whitelist"/>
+  <int value="13" label="URL password protection phishing"/>
+  <int value="14" label="Saved password reuse"/>
+  <int value="15" label="Signed in sync password reuse"/>
+  <int value="16" label="Signed in non sync password reuse"/>
+  <int value="17" label="Blocked ad redirect"/>
+  <int value="18" label="Ad sample"/>
+  <int value="19" label="Blocked ad popup"/>
+  <int value="20" label="Suspicious site"/>
+  <int value="21" label="Enterprise password reuse"/>
+  <int value="22" label="Billing"/>
+  <int value="23" label="Apk download"/>
+  <int value="24" label="High confidence allowlist"/>
+</enum>
+
 <enum name="ScanAppEntryPoint">
 <!-- This must be kept current with ScanAppEntryPoint in
     chromeos/components/scanning/scanning_uma.h.
--- a/tools/metrics/histograms/histograms_xml/safe_browsing/histograms.xml
+++ b/tools/metrics/histograms/histograms_xml/safe_browsing/histograms.xml
@@ -861,6 +861,19 @@ reviews. Googlers can read more about this at go/gwsq-gerrit.
  </summary>
 </histogram>

+<histogram name="SafeBrowsing.RT.GetCache.FallbackThreatType"
+    enum="SBThreatType" expires_after="2021-12-16">
+  <owner>xinghuilu@chromium.org</owner>
+  <owner>chrome-safebrowsing-alerts@google.com</owner>
+  <summary>
+    Logs the threat type of cached verdicts. If the threat type is not safe, it
+    means the cache is a false positive. False positive verdicts are safe
+    verdicts in cache manager but turns out to be dangerous in the Safe Browsing
+    database. Logged each time a URL is checked by real time lookup and the
+    response is safe from the cache manager.
+  </summary>
+</histogram>
+
 <histogram name="SafeBrowsing.RT.GetCache.Time" units="ms"
    expires_after="2021-04-25">
  <owner>xinghuilu@chromium.org</owner>