Commit 21b12f09 authored by Xinghui Lu's avatar Xinghui Lu Committed by Commit Bot

Add match type support for RTLookupResponse.

Several changes in realtimeapi.proto are made in this CL:

1. The old cache_expression field is renamed to
cache_expression_covering_match, this is to avoid breaking
old clients that are using this field.

2. Add a new enum named cache_expression_match_type,
if this field is not set, ignore this entry; if this field
is set to EXACT_MATCH, this entry is applicable for exact
url matching; if this field is set to COVERING_MATCH, this
entry is applicable for host-suffix path-prefix matching.

3. Add a new cache_expression field, this field will be
used as the cache expression for new clients.

For details, see go/chrome-protego-cache-matching-field.
Server-side change: http://cl/292004484

Bug: 1041675
Change-Id: I5a40babdb9be0ebad8661361a93194daa445f479
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2008080Reviewed-by: default avatarVarun Khaneja <vakh@chromium.org>
Commit-Queue: Xinghui Lu <xinghuilu@chromium.org>
Cr-Commit-Position: refs/heads/master@{#736972}
parent 3780cdac
......@@ -1015,8 +1015,9 @@ base::Value SerializeRTThreatInfo(
"cache_duration_sec",
base::Value(static_cast<double>(threat_info.cache_duration_sec())));
threat_info_dict.SetKey("cache_expression",
base::Value(threat_info.cache_expression()));
threat_info_dict.SetKey(
"cache_expression_covering_match",
base::Value(threat_info.cache_expression_covering_match()));
std::string verdict_type;
switch (threat_info.verdict_type()) {
......@@ -1032,6 +1033,23 @@ base::Value SerializeRTThreatInfo(
}
threat_info_dict.SetKey("verdict_type", base::Value(verdict_type));
std::string cache_expression_match_type;
switch (threat_info.cache_expression_match_type()) {
case RTLookupResponse::ThreatInfo::MATCH_TYPE_UNSPECIFIED:
cache_expression_match_type = "MATCH_TYPE_UNSPECIFIED";
break;
case RTLookupResponse::ThreatInfo::COVERING_MATCH:
cache_expression_match_type = "COVERING_MATCH";
break;
case RTLookupResponse::ThreatInfo::EXACT_MATCH:
cache_expression_match_type = "EXACT_MATCH";
break;
}
threat_info_dict.SetKey("cache_expression_match_type",
base::Value(cache_expression_match_type));
threat_info_dict.SetKey("cache_expression",
base::Value(threat_info.cache_expression()));
return std::move(threat_info_dict);
}
......
......@@ -56,8 +56,13 @@ message RTLookupResponse {
optional ThreatType threat_type = 1;
// TTL of the verdict in seconds.
optional int64 cache_duration_sec = 2;
// A host-suffix/path-prefix expression for caching the verdict
optional string cache_expression = 3;
// This field is only used by previous versions of Chrome(M81 Canary and
// Dev) that only support "COVERING_MATCH". This field is deprecated in
// favor of "cache_expression" below.
optional string cache_expression_covering_match = 3;
// Type of verdicts issued by the server. Different levels of verdicts from
// 1 to 100 can be added in future based on the confidence of the verdict.
// 1 being confidently safe to 100 being confidently dangerous.
......@@ -67,7 +72,34 @@ message RTLookupResponse {
DANGEROUS = 100;
}
optional VerdictType verdict_type = 4;
enum CacheExpressionMatchType {
MATCH_TYPE_UNSPECIFIED = 0;
// The returned cache expression applies to all URLs covered by it. See
// the following for how covering works:
// https://developers.google.com/safe-browsing/v4/urls-hashing e.g.
// "test.com/foo1" of type COVERING_MATCH will not apply to
// "test.com/foo2" or "test.com/", but will apply to "test.com/foo1/bar2"
// and "baz.test.com/foo1".
COVERING_MATCH = 1;
// The returned cache expression only applies to URLs with the same host
// and path after canonicalization. e.g. "test.com/foo1" of type
// EXACT_MATCH will not apply to "test.com/" or "test.com/foo1/bar2", but
// will apply to "test.com/foo1"
EXACT_MATCH = 2;
}
optional CacheExpressionMatchType cache_expression_match_type = 5;
// The new cache expression. "cache_expression_match_type" indicates how
// this expression should be used for matching on the client. If
// “cache_expression_match_type” is not set, it means this expression is not
// applicable for caching, and the entry should be ignored.
optional string cache_expression = 6;
}
// Each matching url can have multiple threats detected, if the response
// contains multiple threat_info messages, then they are in decreasing order
// of severity so that the client could choose first applicable threat_info
......
......@@ -25,6 +25,27 @@ const char kRealTimeThreatInfoProto[] = "rt_threat_info_proto";
const char kPasswordOnFocusCacheKey[] = "password_on_focus_cache_key";
const char kRealTimeUrlCacheKey[] = "real_time_url_cache_key";
// A helper class to include all match params. It is used as a centralized
// place to determine if the current cache entry should be considered as a
// match.
struct MatchParams {
MatchParams()
: is_exact_host(false),
is_exact_path(false),
is_only_exact_match_allowed(true) {}
bool ShouldMatch() {
return !is_only_exact_match_allowed || (is_exact_host && is_exact_path);
}
// Indicates whether the current cache entry and the url have the same host.
bool is_exact_host;
// Indicates whether the current cache entry and the url have the same path.
bool is_exact_path;
// Indicates whether the current cache entry is only applicable for exact
// match.
bool is_only_exact_match_allowed;
};
// Given a URL of either http or https scheme, return its http://hostname.
// e.g., "https://www.foo.com:80/bar/test.cgi" -> "http://www.foo.com".
GURL GetHostNameWithHTTPScheme(const GURL& url) {
......@@ -70,8 +91,9 @@ std::unique_ptr<base::DictionaryValue> CreateDictionaryFromVerdict(
void GeneratePathVariantsWithoutQuery(const GURL& url,
std::vector<std::string>* paths) {
std::string canonical_path;
V4ProtocolManagerUtil::CanonicalizeUrl(url, nullptr, &canonical_path,
nullptr);
V4ProtocolManagerUtil::CanonicalizeUrl(
url, /*canonicalized_hostname=*/nullptr, &canonical_path,
/*canonicalized_query=*/nullptr);
V4ProtocolManagerUtil::GeneratePathVariantsToCheck(canonical_path,
std::string(), paths);
}
......@@ -181,6 +203,26 @@ std::string GetKeyOfTypeFromTriggerType(
password_type.account_type()));
}
// If the verdict doesn't have |cache_expression_match_type| field, always
// interpret it as exact match only.
template <typename T>
bool IsOnlyExactMatchAllowed(T verdict) {
NOTREACHED();
return true;
}
template <>
bool IsOnlyExactMatchAllowed<RTLookupResponse::ThreatInfo>(
RTLookupResponse::ThreatInfo verdict) {
return verdict.cache_expression_match_type() ==
RTLookupResponse::ThreatInfo::EXACT_MATCH;
}
// Always do fuzzy matching for password protection verdicts.
template <>
bool IsOnlyExactMatchAllowed<LoginReputationClientResponse>(
LoginReputationClientResponse verdict) {
return false;
}
template <class T>
typename T::VerdictType GetMostMatchingCachedVerdictWithPathMatching(
const GURL& url,
......@@ -188,7 +230,8 @@ typename T::VerdictType GetMostMatchingCachedVerdictWithPathMatching(
scoped_refptr<HostContentSettingsMap> content_settings,
const ContentSettingsType contents_setting_type,
const char* proto_name,
T* out_response) {
T* out_response,
MatchParams match_params) {
DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
GURL hostname = GetHostNameWithHTTPScheme(url);
......@@ -207,6 +250,12 @@ typename T::VerdictType GetMostMatchingCachedVerdictWithPathMatching(
std::vector<std::string> paths;
GeneratePathVariantsWithoutQuery(url, &paths);
std::string root_path;
V4ProtocolManagerUtil::CanonicalizeUrl(
url, /*canonicalized_hostname*/ nullptr, &root_path,
/*canonicalized_query*/ nullptr);
int max_path_depth = -1;
typename T::VerdictType most_matching_verdict_type =
T::VERDICT_TYPE_UNSPECIFIED;
......@@ -226,10 +275,13 @@ typename T::VerdictType GetMostMatchingCachedVerdictWithPathMatching(
std::string cache_expression_path =
GetCacheExpressionPath(verdict.cache_expression());
match_params.is_only_exact_match_allowed = IsOnlyExactMatchAllowed(verdict);
match_params.is_exact_path = (root_path == cache_expression_path);
// Finds the most specific match.
int path_depth = static_cast<int>(GetPathDepth(cache_expression_path));
if (path_depth > max_path_depth &&
PathVariantsMatchCacheExpression(paths, cache_expression_path)) {
PathVariantsMatchCacheExpression(paths, cache_expression_path) &&
match_params.ShouldMatch()) {
max_path_depth = path_depth;
// If the most matching verdict is expired, set the result to
// VERDICT_TYPE_UNSPECIFIED.
......@@ -251,20 +303,23 @@ typename T::VerdictType GetMostMatchingCachedVerdictWithHostAndPathMatching(
const ContentSettingsType contents_setting_type,
const char* proto_name,
T* out_response) {
DCHECK(proto_name == kVerdictProto);
DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
auto most_matching_verdict_type = T::VERDICT_TYPE_UNSPECIFIED;
MatchParams match_params;
std::string root_host, root_path;
V4ProtocolManagerUtil::CanonicalizeUrl(url, &root_host, &root_path, nullptr);
V4ProtocolManagerUtil::CanonicalizeUrl(url, &root_host, &root_path,
/*canonicalized_query*/ nullptr);
std::vector<std::string> host_variants;
V4ProtocolManagerUtil::GenerateHostVariantsToCheck(root_host, &host_variants);
int max_path_depth = -1;
for (auto host : host_variants) {
for (const auto& host : host_variants) {
int depth = static_cast<int>(GetHostDepth(host));
GURL url_to_check = GetUrlWithHostAndPath(host, root_path);
match_params.is_exact_host = (root_host == host);
auto verdict_type = GetMostMatchingCachedVerdictWithPathMatching<T>(
url_to_check, type_key, content_settings, contents_setting_type,
proto_name, out_response);
proto_name, out_response, match_params);
if (depth > max_path_depth && verdict_type != T::VERDICT_TYPE_UNSPECIFIED) {
max_path_depth = depth;
most_matching_verdict_type = verdict_type;
......@@ -355,7 +410,6 @@ VerdictCacheManager::GetCachedPhishGuardVerdict(
std::string type_key =
GetKeyOfTypeFromTriggerType(trigger_type, password_type);
return GetMostMatchingCachedVerdictWithHostAndPathMatching<
LoginReputationClientResponse>(url, type_key, content_settings_,
ContentSettingsType::PASSWORD_PROTECTION,
......@@ -413,6 +467,11 @@ void VerdictCacheManager::CacheRealTimeUrlVerdict(
std::vector<std::string> visited_cache_expressions;
for (const auto& threat_info : verdict.threat_info()) {
// If |cache_expression_match_type| is unspecified, ignore this entry.
if (threat_info.cache_expression_match_type() ==
RTLookupResponse::ThreatInfo::MATCH_TYPE_UNSPECIFIED) {
continue;
}
std::string cache_expression = threat_info.cache_expression();
// TODO(crbug.com/1033692): For the same cache_expression, threat_info is in
// decreasing order of severity. To avoid lower severity threat being
......@@ -462,7 +521,7 @@ RTLookupResponse::ThreatInfo::VerdictType
VerdictCacheManager::GetCachedRealTimeUrlVerdict(
const GURL& url,
RTLookupResponse::ThreatInfo* out_threat_info) {
return GetMostMatchingCachedVerdictWithPathMatching<
return GetMostMatchingCachedVerdictWithHostAndPathMatching<
RTLookupResponse::ThreatInfo>(
url, kRealTimeUrlCacheKey, content_settings_,
ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,
......
......@@ -57,12 +57,16 @@ class VerdictCacheManagerTest : public ::testing::Test {
RTLookupResponse::ThreatInfo::VerdictType verdict_type,
RTLookupResponse::ThreatInfo::ThreatType threat_type,
int cache_duration_sec,
const std::string& cache_expression) {
const std::string& cache_expression,
RTLookupResponse::ThreatInfo::CacheExpressionMatchType
cache_expression_match_type) {
RTLookupResponse::ThreatInfo* new_threat_info = response.add_threat_info();
new_threat_info->set_verdict_type(verdict_type);
new_threat_info->set_threat_type(threat_type);
new_threat_info->set_cache_duration_sec(cache_duration_sec);
new_threat_info->set_cache_expression(cache_expression);
new_threat_info->set_cache_expression_match_type(
cache_expression_match_type);
}
protected:
......@@ -324,10 +328,12 @@ TEST_F(VerdictCacheManagerTest, TestCleanUpExpiredVerdict) {
RTLookupResponse response;
AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 0,
"www.example.com/");
"www.example.com/",
RTLookupResponse::ThreatInfo::EXACT_MATCH);
AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
RTLookupResponse::ThreatInfo::UNWANTED_SOFTWARE, 60,
"www.example.com/path");
"www.example.com/path",
RTLookupResponse::ThreatInfo::EXACT_MATCH);
cache_manager_->CacheRealTimeUrlVerdict(GURL("https://www.example.com/"),
response, base::Time::Now());
ASSERT_EQ(2, cache_manager_->GetStoredRealTimeUrlCheckVerdictCount());
......@@ -440,10 +446,12 @@ TEST_F(VerdictCacheManagerTest, TestCanRetrieveCachedRealTimeUrlCheckVerdict) {
RTLookupResponse response;
AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::SAFE,
RTLookupResponse::ThreatInfo::THREAT_TYPE_UNSPECIFIED,
60, "www.example.com/");
60, "www.example.com/",
RTLookupResponse::ThreatInfo::EXACT_MATCH);
AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 60,
"www.example.com/path");
"www.example.com/path",
RTLookupResponse::ThreatInfo::EXACT_MATCH);
cache_manager_->CacheRealTimeUrlVerdict(url, response, base::Time::Now());
RTLookupResponse::ThreatInfo out_verdict;
......@@ -463,16 +471,20 @@ TEST_F(VerdictCacheManagerTest,
RTLookupResponse response;
AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 60,
"www.example.com/");
"www.example.com/",
RTLookupResponse::ThreatInfo::EXACT_MATCH);
AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
RTLookupResponse::ThreatInfo::UNWANTED_SOFTWARE, 60,
"www.example.com/");
"www.example.com/",
RTLookupResponse::ThreatInfo::EXACT_MATCH);
AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
RTLookupResponse::ThreatInfo::UNWANTED_SOFTWARE, 60,
"www.example.com/path");
"www.example.com/path",
RTLookupResponse::ThreatInfo::EXACT_MATCH);
AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
RTLookupResponse::ThreatInfo::UNCLEAR_BILLING, 60,
"www.example.com/path");
"www.example.com/path",
RTLookupResponse::ThreatInfo::EXACT_MATCH);
cache_manager_->CacheRealTimeUrlVerdict(url2, response, base::Time::Now());
RTLookupResponse::ThreatInfo out_verdict;
......@@ -496,7 +508,8 @@ TEST_F(VerdictCacheManagerTest,
RTLookupResponse response;
AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 0,
"www.example.com/path");
"www.example.com/path",
RTLookupResponse::ThreatInfo::EXACT_MATCH);
cache_manager_->CacheRealTimeUrlVerdict(url, response, base::Time::Now());
RTLookupResponse::ThreatInfo out_verdict;
......@@ -511,7 +524,8 @@ TEST_F(VerdictCacheManagerTest,
RTLookupResponse response;
AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 60,
"www.example.com/path");
"www.example.com/path",
RTLookupResponse::ThreatInfo::EXACT_MATCH);
cache_manager_->CacheRealTimeUrlVerdict(url, response, base::Time::Now());
RTLookupResponse::ThreatInfo out_verdict;
EXPECT_EQ(RTLookupResponse::ThreatInfo::DANGEROUS,
......@@ -543,6 +557,18 @@ TEST_F(VerdictCacheManagerTest, TestHostSuffixMatching) {
GURL("https://b.example.test/path/path2"),
LoginReputationClientRequest::PASSWORD_REUSE_EVENT,
password_type, &cached_verdict));
// Real time url check verdict.
RTLookupResponse response;
AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 60,
"example.test/path/",
RTLookupResponse::ThreatInfo::COVERING_MATCH);
cache_manager_->CacheRealTimeUrlVerdict(url, response, base::Time::Now());
RTLookupResponse::ThreatInfo out_verdict;
EXPECT_EQ(RTLookupResponse::ThreatInfo::DANGEROUS,
cache_manager_->GetCachedRealTimeUrlVerdict(
GURL("https://b.example.test/path/path2"), &out_verdict));
}
TEST_F(VerdictCacheManagerTest, TestHostSuffixMatchingMostExactMatching) {
......@@ -571,4 +597,50 @@ TEST_F(VerdictCacheManagerTest, TestHostSuffixMatchingMostExactMatching) {
password_type, &cached_verdict));
}
TEST_F(VerdictCacheManagerTest, TestExactMatching) {
RTLookupResponse response;
AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 60,
"a.example.test/path1/",
RTLookupResponse::ThreatInfo::EXACT_MATCH);
cache_manager_->CacheRealTimeUrlVerdict(
GURL("https://a.example.test/path1/path2"), response, base::Time::Now());
RTLookupResponse::ThreatInfo out_verdict;
EXPECT_EQ(RTLookupResponse::ThreatInfo::DANGEROUS,
cache_manager_->GetCachedRealTimeUrlVerdict(
GURL("https://a.example.test/path1/"), &out_verdict));
// Since |cache_expression_exact_matching| is set to EXACT_MATCH, cache is not
// found.
EXPECT_EQ(RTLookupResponse::ThreatInfo::VERDICT_TYPE_UNSPECIFIED,
cache_manager_->GetCachedRealTimeUrlVerdict(
GURL("https://a.example.test/path1/path2"), &out_verdict));
}
TEST_F(VerdictCacheManagerTest, TestMatchingTypeNotSet) {
std::string cache_expression = "a.example.test/path1";
GURL url("https://a.example.test/path1");
RTLookupResponse response;
RTLookupResponse::ThreatInfo* new_threat_info = response.add_threat_info();
new_threat_info->set_verdict_type(RTLookupResponse::ThreatInfo::DANGEROUS);
new_threat_info->set_threat_type(
RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING);
new_threat_info->set_cache_duration_sec(60);
new_threat_info->set_cache_expression(cache_expression);
cache_manager_->CacheRealTimeUrlVerdict(url, response, base::Time::Now());
RTLookupResponse::ThreatInfo out_verdict;
// If |cache_expression_match_type| is not set, ignore this cache.
EXPECT_EQ(RTLookupResponse::ThreatInfo::VERDICT_TYPE_UNSPECIFIED,
cache_manager_->GetCachedRealTimeUrlVerdict(url, &out_verdict));
new_threat_info->set_cache_expression_match_type(
RTLookupResponse::ThreatInfo::EXACT_MATCH);
cache_manager_->CacheRealTimeUrlVerdict(url, response, base::Time::Now());
// Should be able to get the cache if |cache_expression_match_type| is set.
EXPECT_EQ(RTLookupResponse::ThreatInfo::DANGEROUS,
cache_manager_->GetCachedRealTimeUrlVerdict(url, &out_verdict));
}
} // namespace safe_browsing
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment