Commit 31e5ea78 authored by Xinghui Lu's avatar Xinghui Lu Committed by Chromium LUCI CQ

Log cache false positive rate of real time URL check.

If a cache is hit during real time URL check, we'll fall back to
hash-based check. We'd like to know how often that the cache is safe
while the response from hash-based check is unsafe.

Metrics added in this CL:
SafeBrowsing.RT.GetCache.FalsePositive

Bug: 1157331
Change-Id: Ib91e8c6ae2149645f31e69252d5f2f74594cb6de
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2594095Reviewed-by: default avatarDaniel Rubery <drubery@chromium.org>
Reviewed-by: default avatarIlya Sherman <isherman@chromium.org>
Commit-Queue: Xinghui Lu <xinghuilu@chromium.org>
Cr-Commit-Position: refs/heads/master@{#838188}
parent 9c240e22
......@@ -13,6 +13,7 @@
#include "components/safe_browsing/core/browser/url_checker_delegate.h"
#include "components/safe_browsing/core/common/safebrowsing_constants.h"
#include "components/safe_browsing/core/common/thread_utils.h"
#include "components/safe_browsing/core/db/v4_protocol_manager_util.h"
#include "components/safe_browsing/core/features.h"
#include "components/safe_browsing/core/realtime/policy_engine.h"
#include "components/safe_browsing/core/realtime/url_lookup_service_base.h"
......@@ -87,8 +88,12 @@ void SafeBrowsingUrlCheckerImpl::Notifier::OnCompleteCheck(
SafeBrowsingUrlCheckerImpl::UrlInfo::UrlInfo(const GURL& in_url,
const std::string& in_method,
Notifier in_notifier)
: url(in_url), method(in_method), notifier(std::move(in_notifier)) {}
Notifier in_notifier,
bool in_is_cached_safe_url)
: url(in_url),
method(in_method),
notifier(std::move(in_notifier)),
is_cached_safe_url(in_is_cached_safe_url) {}
SafeBrowsingUrlCheckerImpl::UrlInfo::UrlInfo(UrlInfo&& other) = default;
......@@ -215,6 +220,10 @@ void SafeBrowsingUrlCheckerImpl::OnUrlResult(const GURL& url,
timer_.Stop();
RecordCheckUrlTimeout(/*timed_out=*/false);
if (urls_[next_index_].is_cached_safe_url) {
UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.RT.GetCache.FallbackThreatType",
threat_type, SB_THREAT_TYPE_MAX + 1);
}
TRACE_EVENT_ASYNC_END1("safe_browsing", "CheckUrl", this, "url", url.spec());
......@@ -304,7 +313,8 @@ void SafeBrowsingUrlCheckerImpl::CheckUrlImpl(const GURL& url,
DCHECK(CurrentlyOnThread(ThreadID::IO));
DVLOG(1) << "SafeBrowsingUrlCheckerImpl checks URL: " << url;
urls_.emplace_back(url, method, std::move(notifier));
urls_.emplace_back(url, method, std::move(notifier),
/*safe_from_real_time_cache=*/false);
ProcessUrls();
}
......@@ -616,14 +626,12 @@ void SafeBrowsingUrlCheckerImpl::OnRTLookupResponse(
if (response && (response->threat_info_size() > 0) &&
response->threat_info(0).verdict_type() ==
RTLookupResponse::ThreatInfo::DANGEROUS) {
// TODO(crbug.com/1033692): Only take the first threat info into account
// because threat infos are returned in decreasing order of severity.
// Consider extend it to support multiple threat types.
sb_threat_type =
RealTimeUrlLookupServiceBase::GetSBThreatTypeForRTThreatType(
response->threat_info(0).threat_type());
}
if (is_cached_response && sb_threat_type == SB_THREAT_TYPE_SAFE) {
urls_[next_index_].is_cached_safe_url = true;
PerformHashBasedCheck(url);
} else {
OnUrlResult(url, sb_threat_type, ThreatMetadata(),
......
......@@ -230,7 +230,10 @@ class SafeBrowsingUrlCheckerImpl : public mojom::SafeBrowsingUrlChecker,
};
struct UrlInfo {
UrlInfo(const GURL& url, const std::string& method, Notifier notifier);
UrlInfo(const GURL& url,
const std::string& method,
Notifier notifier,
bool is_cached_safe_url);
UrlInfo(UrlInfo&& other);
~UrlInfo();
......@@ -238,6 +241,9 @@ class SafeBrowsingUrlCheckerImpl : public mojom::SafeBrowsingUrlChecker,
GURL url;
std::string method;
Notifier notifier;
// If the URL is classified as safe in cache manager during real time
// lookup.
bool is_cached_safe_url;
};
const net::HttpRequestHeaders headers_;
......
......@@ -7,12 +7,14 @@
#include "base/run_loop.h"
#include "base/task/post_task.h"
#include "base/test/metrics/histogram_tester.h"
#include "base/test/mock_callback.h"
#include "base/test/task_environment.h"
#include "components/safe_browsing/core/browser/url_checker_delegate.h"
#include "components/safe_browsing/core/common/test_task_environment.h"
#include "components/safe_browsing/core/common/thread_utils.h"
#include "components/safe_browsing/core/db/test_database_manager.h"
#include "components/safe_browsing/core/db/v4_protocol_manager_util.h"
#include "components/safe_browsing/core/proto/csd.pb.h"
#include "components/safe_browsing/core/realtime/url_lookup_service.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
......@@ -203,19 +205,24 @@ class MockRealTimeUrlLookupService : public RealTimeUrlLookupService {
threat_info.set_threat_type(threat_type);
threat_info.set_verdict_type(verdict_type);
*new_threat_info = threat_info;
base::PostTask(
FROM_HERE, CreateTaskTraits(ThreadID::IO),
base::BindOnce(std::move(response_callback),
/* is_rt_lookup_successful */ true,
/* is_cached_response */ false, std::move(response)));
base::PostTask(FROM_HERE, CreateTaskTraits(ThreadID::IO),
base::BindOnce(std::move(response_callback),
/* is_rt_lookup_successful */ true,
/* is_cached_response */ is_cached_response_,
std::move(response)));
}
void SetThreatTypeForUrl(const GURL& gurl, SBThreatType threat_type) {
urls_threat_type_[gurl.spec()] = threat_type;
}
void SetIsCachedResponse(bool is_cached_response) {
is_cached_response_ = is_cached_response;
}
private:
base::flat_map<std::string, SBThreatType> urls_threat_type_;
bool is_cached_response_ = false;
};
class SafeBrowsingUrlCheckerTest : public PlatformTest {
......@@ -384,6 +391,7 @@ TEST_F(SafeBrowsingUrlCheckerTest, CheckUrl_RealTimeEnabledAllowlistMatch) {
}
TEST_F(SafeBrowsingUrlCheckerTest, CheckUrl_RealTimeEnabledSafeUrl) {
base::HistogramTester histograms;
auto safe_browsing_url_checker = CreateSafeBrowsingUrlChecker(
/*real_time_lookup_enabled=*/true, /*can_check_safe_browsing_db=*/true);
......@@ -401,6 +409,67 @@ TEST_F(SafeBrowsingUrlCheckerTest, CheckUrl_RealTimeEnabledSafeUrl) {
safe_browsing_url_checker->CheckUrl(url, "GET", callback.Get());
task_environment_->RunUntilIdle();
// The false positive metric should not be logged, because the
// verdict is not from cache.
histograms.ExpectTotalCount("SafeBrowsing.RT.GetCache.FallbackThreatType",
/* total_count */ 0);
}
TEST_F(SafeBrowsingUrlCheckerTest, CheckUrl_RealTimeEnabledSafeUrlFromCache) {
base::HistogramTester histograms;
auto safe_browsing_url_checker = CreateSafeBrowsingUrlChecker(
/*real_time_lookup_enabled=*/true, /*can_check_safe_browsing_db=*/true);
GURL url("https://example.test/");
database_manager_->SetAllowlistResultForUrl(url, AsyncMatch::NO_MATCH);
database_manager_->SetThreatTypeForUrl(url, SB_THREAT_TYPE_SAFE,
/*delayed_callback=*/false);
url_lookup_service_->SetThreatTypeForUrl(url, SB_THREAT_TYPE_SAFE);
url_lookup_service_->SetIsCachedResponse(true);
base::MockCallback<SafeBrowsingUrlCheckerImpl::NativeCheckUrlCallback>
callback;
EXPECT_CALL(callback,
Run(_, /*proceed=*/true, /*showed_interstitial=*/false));
EXPECT_CALL(*url_checker_delegate_,
StartDisplayingBlockingPageHelper(_, _, _, _, _))
.Times(0);
safe_browsing_url_checker->CheckUrl(url, "GET", callback.Get());
task_environment_->RunUntilIdle();
histograms.ExpectUniqueSample("SafeBrowsing.RT.GetCache.FallbackThreatType",
/* sample */ SB_THREAT_TYPE_SAFE,
/* expected_count */ 1);
}
TEST_F(SafeBrowsingUrlCheckerTest,
CheckUrl_RealTimeEnabledSafeUrlFromCacheFalsePositive) {
base::HistogramTester histograms;
auto safe_browsing_url_checker = CreateSafeBrowsingUrlChecker(
/*real_time_lookup_enabled=*/true, /*can_check_safe_browsing_db=*/true);
GURL url("https://example.test/");
database_manager_->SetAllowlistResultForUrl(url, AsyncMatch::NO_MATCH);
database_manager_->SetThreatTypeForUrl(url, SB_THREAT_TYPE_URL_PHISHING,
/*delayed_callback=*/false);
url_lookup_service_->SetThreatTypeForUrl(url, SB_THREAT_TYPE_SAFE);
url_lookup_service_->SetIsCachedResponse(true);
base::MockCallback<SafeBrowsingUrlCheckerImpl::NativeCheckUrlCallback>
callback;
EXPECT_CALL(*url_checker_delegate_,
StartDisplayingBlockingPageHelper(
IsSameThreatSource(ThreatSource::UNKNOWN), _, _, _, _))
.Times(1);
safe_browsing_url_checker->CheckUrl(url, "GET", callback.Get());
task_environment_->RunUntilIdle();
histograms.ExpectUniqueSample("SafeBrowsing.RT.GetCache.FallbackThreatType",
/* sample */ SB_THREAT_TYPE_URL_PHISHING,
/* expected_count */ 1);
}
TEST_F(SafeBrowsingUrlCheckerTest,
......
......@@ -94,89 +94,93 @@ std::string GetReportUrl(
// Different types of threats that SafeBrowsing protects against. This is the
// type that's returned to the clients of SafeBrowsing in Chromium.
// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
// GENERATED_JAVA_ENUM_PACKAGE: org.chromium.components.safe_browsing
// GENERATED_JAVA_PREFIX_TO_STRIP: SB_THREAT_TYPE_
enum SBThreatType {
// This type can be used for lists that can be checked synchronously so a
// client callback isn't required, or for whitelists.
SB_THREAT_TYPE_UNUSED,
SB_THREAT_TYPE_UNUSED = 0,
// No threat at all.
SB_THREAT_TYPE_SAFE,
SB_THREAT_TYPE_SAFE = 1,
// The URL is being used for phishing.
SB_THREAT_TYPE_URL_PHISHING,
SB_THREAT_TYPE_URL_PHISHING = 2,
// The URL hosts malware.
SB_THREAT_TYPE_URL_MALWARE,
SB_THREAT_TYPE_URL_MALWARE = 3,
// The URL hosts unwanted programs.
SB_THREAT_TYPE_URL_UNWANTED,
SB_THREAT_TYPE_URL_UNWANTED = 4,
// The download URL is malware.
SB_THREAT_TYPE_URL_BINARY_MALWARE,
SB_THREAT_TYPE_URL_BINARY_MALWARE = 5,
// Url detected by the client-side phishing model. Note that unlike the
// above values, this does not correspond to a downloaded list.
SB_THREAT_TYPE_URL_CLIENT_SIDE_PHISHING,
SB_THREAT_TYPE_URL_CLIENT_SIDE_PHISHING = 6,
// The Chrome extension or app (given by its ID) is malware.
SB_THREAT_TYPE_EXTENSION,
SB_THREAT_TYPE_EXTENSION = 7,
// Url detected by the client-side malware IP list. This IP list is part
// of the client side detection model.
SB_THREAT_TYPE_URL_CLIENT_SIDE_MALWARE,
SB_THREAT_TYPE_URL_CLIENT_SIDE_MALWARE = 8,
// Url leads to a blacklisted resource script. Note that no warnings should be
// shown on this threat type, but an incident report might be sent.
SB_THREAT_TYPE_BLACKLISTED_RESOURCE,
SB_THREAT_TYPE_BLACKLISTED_RESOURCE = 9,
// Url abuses a permission API.
SB_THREAT_TYPE_API_ABUSE,
SB_THREAT_TYPE_API_ABUSE = 10,
// Activation patterns for the Subresource Filter.
SB_THREAT_TYPE_SUBRESOURCE_FILTER,
SB_THREAT_TYPE_SUBRESOURCE_FILTER = 11,
// CSD Phishing whitelist. This "threat" means a URL matched the whitelist.
SB_THREAT_TYPE_CSD_WHITELIST,
SB_THREAT_TYPE_CSD_WHITELIST = 12,
// DEPRECATED. Url detected by password protection service.
DEPRECATED_SB_THREAT_TYPE_URL_PASSWORD_PROTECTION_PHISHING,
DEPRECATED_SB_THREAT_TYPE_URL_PASSWORD_PROTECTION_PHISHING = 13,
// Saved password reuse detected on low reputation page,
SB_THREAT_TYPE_SAVED_PASSWORD_REUSE,
SB_THREAT_TYPE_SAVED_PASSWORD_REUSE = 14,
// Chrome signed in and syncing gaia password reuse detected on low reputation
// page,
SB_THREAT_TYPE_SIGNED_IN_SYNC_PASSWORD_REUSE,
SB_THREAT_TYPE_SIGNED_IN_SYNC_PASSWORD_REUSE = 15,
// Chrome signed in non syncing gaia password reuse detected on low reputation
// page,
SB_THREAT_TYPE_SIGNED_IN_NON_SYNC_PASSWORD_REUSE,
SB_THREAT_TYPE_SIGNED_IN_NON_SYNC_PASSWORD_REUSE = 16,
// A Google ad that caused a blocked autoredirect was collected
SB_THREAT_TYPE_BLOCKED_AD_REDIRECT,
SB_THREAT_TYPE_BLOCKED_AD_REDIRECT = 17,
// A sample of an ad was collected
SB_THREAT_TYPE_AD_SAMPLE,
SB_THREAT_TYPE_AD_SAMPLE = 18,
// A report of Google ad that caused a blocked popup was collected.
SB_THREAT_TYPE_BLOCKED_AD_POPUP,
SB_THREAT_TYPE_BLOCKED_AD_POPUP = 19,
// The page loaded a resource from the Suspicious Site list.
SB_THREAT_TYPE_SUSPICIOUS_SITE,
SB_THREAT_TYPE_SUSPICIOUS_SITE = 20,
// Enterprise password reuse detected on low reputation page.
SB_THREAT_TYPE_ENTERPRISE_PASSWORD_REUSE,
SB_THREAT_TYPE_ENTERPRISE_PASSWORD_REUSE = 21,
// Potential billing detected.
SB_THREAT_TYPE_BILLING,
SB_THREAT_TYPE_BILLING = 22,
// Off-market APK file downloaded, which could be potentially dangerous.
SB_THREAT_TYPE_APK_DOWNLOAD,
SB_THREAT_TYPE_APK_DOWNLOAD = 23,
// Match found in the local high-confidence allowlist.
SB_THREAT_TYPE_HIGH_CONFIDENCE_ALLOWLIST,
SB_THREAT_TYPE_HIGH_CONFIDENCE_ALLOWLIST = 24,
SB_THREAT_TYPE_MAX = SB_THREAT_TYPE_HIGH_CONFIDENCE_ALLOWLIST,
};
using SBThreatTypeSet = base::flat_set<SBThreatType>;
......
......@@ -66130,6 +66130,34 @@ https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_2.7.1.pdf
<int value="4" label="Phishing classifier was destructed"/>
</enum>
<enum name="SBThreatType">
<int value="0" label="Unused"/>
<int value="1" label="Safe"/>
<int value="2" label="URL phishing"/>
<int value="3" label="URL malware"/>
<int value="4" label="URL unwanted"/>
<int value="5" label="URL binary malware"/>
<int value="6" label="URL client side phishing"/>
<int value="7" label="Extension"/>
<int value="8" label="URL client side malware"/>
<int value="9" label="Blacklisted resource"/>
<int value="10" label="API abuse"/>
<int value="11" label="Subresource filter"/>
<int value="12" label="CSD whitelist"/>
<int value="13" label="URL password protection phishing"/>
<int value="14" label="Saved password reuse"/>
<int value="15" label="Signed in sync password reuse"/>
<int value="16" label="Signed in non sync password reuse"/>
<int value="17" label="Blocked ad redirect"/>
<int value="18" label="Ad sample"/>
<int value="19" label="Blocked ad popup"/>
<int value="20" label="Suspicious site"/>
<int value="21" label="Enterprise password reuse"/>
<int value="22" label="Billing"/>
<int value="23" label="Apk download"/>
<int value="24" label="High confidence allowlist"/>
</enum>
<enum name="ScanAppEntryPoint">
<!-- This must be kept current with ScanAppEntryPoint in
chromeos/components/scanning/scanning_uma.h.
......@@ -861,6 +861,19 @@ reviews. Googlers can read more about this at go/gwsq-gerrit.
</summary>
</histogram>
<histogram name="SafeBrowsing.RT.GetCache.FallbackThreatType"
enum="SBThreatType" expires_after="2021-12-16">
<owner>xinghuilu@chromium.org</owner>
<owner>chrome-safebrowsing-alerts@google.com</owner>
<summary>
Logs the threat type of cached verdicts. If the threat type is not safe, it
means the cache is a false positive. False positive verdicts are safe
verdicts in cache manager but turns out to be dangerous in the Safe Browsing
database. Logged each time a URL is checked by real time lookup and the
response is safe from the cache manager.
</summary>
</histogram>
<histogram name="SafeBrowsing.RT.GetCache.Time" units="ms"
expires_after="2021-04-25">
<owner>xinghuilu@chromium.org</owner>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment