Add match type support for RTLookupResponse.

Several changes in realtimeapi.proto are made in this CL: 1. The old cache_expression field is renamed to cache_expression_covering_match, this is to avoid breaking old clients that are using this field. 2. Add a new enum named cache_expression_match_type, if this field is not set, ignore this entry; if this field is set to EXACT_MATCH, this entry is applicable for exact url matching; if this field is set to COVERING_MATCH, this entry is applicable for host-suffix path-prefix matching. 3. Add a new cache_expression field, this field will be used as the cache expression for new clients. For details, see go/chrome-protego-cache-matching-field. Server-side change: http://cl/292004484 Bug: 1041675 Change-Id: I5a40babdb9be0ebad8661361a93194daa445f479 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2008080Reviewed-by: Varun Khaneja <vakh@chromium.org> Commit-Queue: Xinghui Lu <xinghuilu@chromium.org> Cr-Commit-Position: refs/heads/master@{#736972}

Add match type support for RTLookupResponse.
Several changes in realtimeapi.proto are made in this CL: 1. The old cache_expression field is renamed to cache_expression_covering_match, this is to avoid breaking old clients that are using this field. 2. Add a new enum named cache_expression_match_type, if this field is not set, ignore this entry; if this field is set to EXACT_MATCH, this entry is applicable for exact url matching; if this field is set to COVERING_MATCH, this entry is applicable for host-suffix path-prefix matching. 3. Add a new cache_expression field, this field will be used as the cache expression for new clients. For details, see go/chrome-protego-cache-matching-field. Server-side change: http://cl/292004484 Bug: 1041675 Change-Id: I5a40babdb9be0ebad8661361a93194daa445f479 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2008080Reviewed-by: Varun Khaneja <vakh@chromium.org> Commit-Queue: Xinghui Lu <xinghuilu@chromium.org> Cr-Commit-Position: refs/heads/master@{#736972}
21b12f09 · Xinghui Lu · Commit Bot · 3780cdac · 21b12f09 · 21b12f09
Commit 21b12f09 authored Jan 30, 2020 by Xinghui Lu Committed by Commit Bot Jan 30, 2020
4 changed files
--- a/components/safe_browsing/content/web_ui/safe_browsing_ui.cc
+++ b/components/safe_browsing/content/web_ui/safe_browsing_ui.cc
@@ -1015,8 +1015,9 @@ base::Value SerializeRTThreatInfo(
      "cache_duration_sec",
      base::Value(static_cast<double>(threat_info.cache_duration_sec())));

-  threat_info_dict.SetKey("cache_expression",
-                          base::Value(threat_info.cache_expression()));
+  threat_info_dict.SetKey(
+      "cache_expression_covering_match",
+      base::Value(threat_info.cache_expression_covering_match()));

  std::string verdict_type;
  switch (threat_info.verdict_type()) {
@@ -1032,6 +1033,23 @@ base::Value SerializeRTThreatInfo(
  }
  threat_info_dict.SetKey("verdict_type", base::Value(verdict_type));

+  std::string cache_expression_match_type;
+  switch (threat_info.cache_expression_match_type()) {
+    case RTLookupResponse::ThreatInfo::MATCH_TYPE_UNSPECIFIED:
+      cache_expression_match_type = "MATCH_TYPE_UNSPECIFIED";
+      break;
+    case RTLookupResponse::ThreatInfo::COVERING_MATCH:
+      cache_expression_match_type = "COVERING_MATCH";
+      break;
+    case RTLookupResponse::ThreatInfo::EXACT_MATCH:
+      cache_expression_match_type = "EXACT_MATCH";
+      break;
+  }
+
+  threat_info_dict.SetKey("cache_expression_match_type",
+                          base::Value(cache_expression_match_type));
+  threat_info_dict.SetKey("cache_expression",
+                          base::Value(threat_info.cache_expression()));
  return std::move(threat_info_dict);
 }


--- a/components/safe_browsing/core/proto/realtimeapi.proto
+++ b/components/safe_browsing/core/proto/realtimeapi.proto
@@ -56,8 +56,13 @@ message RTLookupResponse {
    optional ThreatType threat_type = 1;
    // TTL of the verdict in seconds.
    optional int64 cache_duration_sec = 2;
+
    // A host-suffix/path-prefix expression for caching the verdict
-    optional string cache_expression = 3;
+    // This field is only used by previous versions of Chrome(M81 Canary and
+    // Dev) that only support "COVERING_MATCH". This field is deprecated in
+    // favor of "cache_expression" below.
+    optional string cache_expression_covering_match = 3;
+
    // Type of verdicts issued by the server. Different levels of verdicts from
    // 1 to 100 can be added in future based on the confidence of the verdict.
    // 1 being confidently safe to 100 being confidently dangerous.
@@ -67,7 +72,34 @@ message RTLookupResponse {
      DANGEROUS = 100;
    }
    optional VerdictType verdict_type = 4;
+
+    enum CacheExpressionMatchType {
+      MATCH_TYPE_UNSPECIFIED = 0;
+
+      // The returned cache expression applies to all URLs covered by it. See
+      // the following for how covering works:
+      // https://developers.google.com/safe-browsing/v4/urls-hashing e.g.
+      // "test.com/foo1" of type COVERING_MATCH will not apply to
+      // "test.com/foo2" or "test.com/", but will apply to "test.com/foo1/bar2"
+      // and "baz.test.com/foo1".
+      COVERING_MATCH = 1;
+
+      // The returned cache expression only applies to URLs with the same host
+      // and path after canonicalization. e.g. "test.com/foo1" of type
+      // EXACT_MATCH will not apply to "test.com/" or "test.com/foo1/bar2", but
+      // will apply to "test.com/foo1"
+      EXACT_MATCH = 2;
+    }
+
+    optional CacheExpressionMatchType cache_expression_match_type = 5;
+
+    // The new cache expression. "cache_expression_match_type" indicates how
+    // this expression should be used for matching on the client. If
+    // “cache_expression_match_type” is not set, it means this expression is not
+    // applicable for caching, and the entry should be ignored.
+    optional string cache_expression = 6;
  }
+
  // Each matching url can have multiple threats detected, if the response
  // contains multiple threat_info messages, then they are in decreasing order
  // of severity so that the client could choose first applicable threat_info

--- a/components/safe_browsing/core/verdict_cache_manager.cc
+++ b/components/safe_browsing/core/verdict_cache_manager.cc
@@ -25,6 +25,27 @@ const char kRealTimeThreatInfoProto[] = "rt_threat_info_proto";
 const char kPasswordOnFocusCacheKey[] = "password_on_focus_cache_key";
 const char kRealTimeUrlCacheKey[] = "real_time_url_cache_key";

+// A helper class to include all match params. It is used as a centralized
+// place to determine if the current cache entry should be considered as a
+// match.
+struct MatchParams {
+  MatchParams()
+      : is_exact_host(false),
+        is_exact_path(false),
+        is_only_exact_match_allowed(true) {}
+
+  bool ShouldMatch() {
+    return !is_only_exact_match_allowed || (is_exact_host && is_exact_path);
+  }
+  // Indicates whether the current cache entry and the url have the same host.
+  bool is_exact_host;
+  // Indicates whether the current cache entry and the url have the same path.
+  bool is_exact_path;
+  // Indicates whether the current cache entry is only applicable for exact
+  // match.
+  bool is_only_exact_match_allowed;
+};
+
 // Given a URL of either http or https scheme, return its http://hostname.
 // e.g., "https://www.foo.com:80/bar/test.cgi" -> "http://www.foo.com".
 GURL GetHostNameWithHTTPScheme(const GURL& url) {
@@ -70,8 +91,9 @@ std::unique_ptr<base::DictionaryValue> CreateDictionaryFromVerdict(
 void GeneratePathVariantsWithoutQuery(const GURL& url,
                                      std::vector<std::string>* paths) {
  std::string canonical_path;
-  V4ProtocolManagerUtil::CanonicalizeUrl(url, nullptr, &canonical_path,
-                                         nullptr);
+  V4ProtocolManagerUtil::CanonicalizeUrl(
+      url, /*canonicalized_hostname=*/nullptr, &canonical_path,
+      /*canonicalized_query=*/nullptr);
  V4ProtocolManagerUtil::GeneratePathVariantsToCheck(canonical_path,
                                                     std::string(), paths);
 }
@@ -181,6 +203,26 @@ std::string GetKeyOfTypeFromTriggerType(
                       password_type.account_type()));
 }

+// If the verdict doesn't have |cache_expression_match_type| field, always
+// interpret it as exact match only.
+template <typename T>
+bool IsOnlyExactMatchAllowed(T verdict) {
+  NOTREACHED();
+  return true;
+}
+template <>
+bool IsOnlyExactMatchAllowed<RTLookupResponse::ThreatInfo>(
+    RTLookupResponse::ThreatInfo verdict) {
+  return verdict.cache_expression_match_type() ==
+         RTLookupResponse::ThreatInfo::EXACT_MATCH;
+}
+// Always do fuzzy matching for password protection verdicts.
+template <>
+bool IsOnlyExactMatchAllowed<LoginReputationClientResponse>(
+    LoginReputationClientResponse verdict) {
+  return false;
+}
+
 template <class T>
 typename T::VerdictType GetMostMatchingCachedVerdictWithPathMatching(
    const GURL& url,
@@ -188,7 +230,8 @@ typename T::VerdictType GetMostMatchingCachedVerdictWithPathMatching(
    scoped_refptr<HostContentSettingsMap> content_settings,
    const ContentSettingsType contents_setting_type,
    const char* proto_name,
-    T* out_response) {
+    T* out_response,
+    MatchParams match_params) {
  DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);

  GURL hostname = GetHostNameWithHTTPScheme(url);
@@ -207,6 +250,12 @@ typename T::VerdictType GetMostMatchingCachedVerdictWithPathMatching(

  std::vector<std::string> paths;
  GeneratePathVariantsWithoutQuery(url, &paths);
+
+  std::string root_path;
+  V4ProtocolManagerUtil::CanonicalizeUrl(
+      url, /*canonicalized_hostname*/ nullptr, &root_path,
+      /*canonicalized_query*/ nullptr);
+
  int max_path_depth = -1;
  typename T::VerdictType most_matching_verdict_type =
      T::VERDICT_TYPE_UNSPECIFIED;
@@ -226,10 +275,13 @@ typename T::VerdictType GetMostMatchingCachedVerdictWithPathMatching(
    std::string cache_expression_path =
        GetCacheExpressionPath(verdict.cache_expression());

+    match_params.is_only_exact_match_allowed = IsOnlyExactMatchAllowed(verdict);
+    match_params.is_exact_path = (root_path == cache_expression_path);
    // Finds the most specific match.
    int path_depth = static_cast<int>(GetPathDepth(cache_expression_path));
    if (path_depth > max_path_depth &&
-        PathVariantsMatchCacheExpression(paths, cache_expression_path)) {
+        PathVariantsMatchCacheExpression(paths, cache_expression_path) &&
+        match_params.ShouldMatch()) {
      max_path_depth = path_depth;
      // If the most matching verdict is expired, set the result to
      // VERDICT_TYPE_UNSPECIFIED.
@@ -251,20 +303,23 @@ typename T::VerdictType GetMostMatchingCachedVerdictWithHostAndPathMatching(
    const ContentSettingsType contents_setting_type,
    const char* proto_name,
    T* out_response) {
-  DCHECK(proto_name == kVerdictProto);
+  DCHECK(proto_name == kVerdictProto || proto_name == kRealTimeThreatInfoProto);
  auto most_matching_verdict_type = T::VERDICT_TYPE_UNSPECIFIED;
+  MatchParams match_params;

  std::string root_host, root_path;
-  V4ProtocolManagerUtil::CanonicalizeUrl(url, &root_host, &root_path, nullptr);
+  V4ProtocolManagerUtil::CanonicalizeUrl(url, &root_host, &root_path,
+                                         /*canonicalized_query*/ nullptr);
  std::vector<std::string> host_variants;
  V4ProtocolManagerUtil::GenerateHostVariantsToCheck(root_host, &host_variants);
  int max_path_depth = -1;
-  for (auto host : host_variants) {
+  for (const auto& host : host_variants) {
    int depth = static_cast<int>(GetHostDepth(host));
    GURL url_to_check = GetUrlWithHostAndPath(host, root_path);
+    match_params.is_exact_host = (root_host == host);
    auto verdict_type = GetMostMatchingCachedVerdictWithPathMatching<T>(
        url_to_check, type_key, content_settings, contents_setting_type,
-        proto_name, out_response);
+        proto_name, out_response, match_params);
    if (depth > max_path_depth && verdict_type != T::VERDICT_TYPE_UNSPECIFIED) {
      max_path_depth = depth;
      most_matching_verdict_type = verdict_type;
@@ -355,7 +410,6 @@ VerdictCacheManager::GetCachedPhishGuardVerdict(

  std::string type_key =
      GetKeyOfTypeFromTriggerType(trigger_type, password_type);
-
  return GetMostMatchingCachedVerdictWithHostAndPathMatching<
      LoginReputationClientResponse>(url, type_key, content_settings_,
                                     ContentSettingsType::PASSWORD_PROTECTION,
@@ -413,6 +467,11 @@ void VerdictCacheManager::CacheRealTimeUrlVerdict(

  std::vector<std::string> visited_cache_expressions;
  for (const auto& threat_info : verdict.threat_info()) {
+    // If |cache_expression_match_type| is unspecified, ignore this entry.
+    if (threat_info.cache_expression_match_type() ==
+        RTLookupResponse::ThreatInfo::MATCH_TYPE_UNSPECIFIED) {
+      continue;
+    }
    std::string cache_expression = threat_info.cache_expression();
    // TODO(crbug.com/1033692): For the same cache_expression, threat_info is in
    // decreasing order of severity. To avoid lower severity threat being
@@ -462,7 +521,7 @@ RTLookupResponse::ThreatInfo::VerdictType
 VerdictCacheManager::GetCachedRealTimeUrlVerdict(
    const GURL& url,
    RTLookupResponse::ThreatInfo* out_threat_info) {
-  return GetMostMatchingCachedVerdictWithPathMatching<
+  return GetMostMatchingCachedVerdictWithHostAndPathMatching<
      RTLookupResponse::ThreatInfo>(
      url, kRealTimeUrlCacheKey, content_settings_,
      ContentSettingsType::SAFE_BROWSING_URL_CHECK_DATA,

--- a/components/safe_browsing/core/verdict_cache_manager_unittest.cc
+++ b/components/safe_browsing/core/verdict_cache_manager_unittest.cc
@@ -57,12 +57,16 @@ class VerdictCacheManagerTest : public ::testing::Test {
      RTLookupResponse::ThreatInfo::VerdictType verdict_type,
      RTLookupResponse::ThreatInfo::ThreatType threat_type,
      int cache_duration_sec,
-      const std::string& cache_expression) {
+      const std::string& cache_expression,
+      RTLookupResponse::ThreatInfo::CacheExpressionMatchType
+          cache_expression_match_type) {
    RTLookupResponse::ThreatInfo* new_threat_info = response.add_threat_info();
    new_threat_info->set_verdict_type(verdict_type);
    new_threat_info->set_threat_type(threat_type);
    new_threat_info->set_cache_duration_sec(cache_duration_sec);
    new_threat_info->set_cache_expression(cache_expression);
+    new_threat_info->set_cache_expression_match_type(
+        cache_expression_match_type);
  }

 protected:
@@ -324,10 +328,12 @@ TEST_F(VerdictCacheManagerTest, TestCleanUpExpiredVerdict) {
  RTLookupResponse response;
  AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
                          RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 0,
-                          "www.example.com/");
+                          "www.example.com/",
+                          RTLookupResponse::ThreatInfo::EXACT_MATCH);
  AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
                          RTLookupResponse::ThreatInfo::UNWANTED_SOFTWARE, 60,
-                          "www.example.com/path");
+                          "www.example.com/path",
+                          RTLookupResponse::ThreatInfo::EXACT_MATCH);
  cache_manager_->CacheRealTimeUrlVerdict(GURL("https://www.example.com/"),
                                          response, base::Time::Now());
  ASSERT_EQ(2, cache_manager_->GetStoredRealTimeUrlCheckVerdictCount());
@@ -440,10 +446,12 @@ TEST_F(VerdictCacheManagerTest, TestCanRetrieveCachedRealTimeUrlCheckVerdict) {
  RTLookupResponse response;
  AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::SAFE,
                          RTLookupResponse::ThreatInfo::THREAT_TYPE_UNSPECIFIED,
-                          60, "www.example.com/");
+                          60, "www.example.com/",
+                          RTLookupResponse::ThreatInfo::EXACT_MATCH);
  AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
                          RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 60,
-                          "www.example.com/path");
+                          "www.example.com/path",
+                          RTLookupResponse::ThreatInfo::EXACT_MATCH);
  cache_manager_->CacheRealTimeUrlVerdict(url, response, base::Time::Now());

  RTLookupResponse::ThreatInfo out_verdict;
@@ -463,16 +471,20 @@ TEST_F(VerdictCacheManagerTest,
  RTLookupResponse response;
  AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
                          RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 60,
-                          "www.example.com/");
+                          "www.example.com/",
+                          RTLookupResponse::ThreatInfo::EXACT_MATCH);
  AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
                          RTLookupResponse::ThreatInfo::UNWANTED_SOFTWARE, 60,
-                          "www.example.com/");
+                          "www.example.com/",
+                          RTLookupResponse::ThreatInfo::EXACT_MATCH);
  AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
                          RTLookupResponse::ThreatInfo::UNWANTED_SOFTWARE, 60,
-                          "www.example.com/path");
+                          "www.example.com/path",
+                          RTLookupResponse::ThreatInfo::EXACT_MATCH);
  AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
                          RTLookupResponse::ThreatInfo::UNCLEAR_BILLING, 60,
-                          "www.example.com/path");
+                          "www.example.com/path",
+                          RTLookupResponse::ThreatInfo::EXACT_MATCH);
  cache_manager_->CacheRealTimeUrlVerdict(url2, response, base::Time::Now());

  RTLookupResponse::ThreatInfo out_verdict;
@@ -496,7 +508,8 @@ TEST_F(VerdictCacheManagerTest,
  RTLookupResponse response;
  AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
                          RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 0,
-                          "www.example.com/path");
+                          "www.example.com/path",
+                          RTLookupResponse::ThreatInfo::EXACT_MATCH);
  cache_manager_->CacheRealTimeUrlVerdict(url, response, base::Time::Now());

  RTLookupResponse::ThreatInfo out_verdict;
@@ -511,7 +524,8 @@ TEST_F(VerdictCacheManagerTest,
  RTLookupResponse response;
  AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
                          RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 60,
-                          "www.example.com/path");
+                          "www.example.com/path",
+                          RTLookupResponse::ThreatInfo::EXACT_MATCH);
  cache_manager_->CacheRealTimeUrlVerdict(url, response, base::Time::Now());
  RTLookupResponse::ThreatInfo out_verdict;
  EXPECT_EQ(RTLookupResponse::ThreatInfo::DANGEROUS,
@@ -543,6 +557,18 @@ TEST_F(VerdictCacheManagerTest, TestHostSuffixMatching) {
                GURL("https://b.example.test/path/path2"),
                LoginReputationClientRequest::PASSWORD_REUSE_EVENT,
                password_type, &cached_verdict));
+
+  // Real time url check verdict.
+  RTLookupResponse response;
+  AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
+                          RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 60,
+                          "example.test/path/",
+                          RTLookupResponse::ThreatInfo::COVERING_MATCH);
+  cache_manager_->CacheRealTimeUrlVerdict(url, response, base::Time::Now());
+  RTLookupResponse::ThreatInfo out_verdict;
+  EXPECT_EQ(RTLookupResponse::ThreatInfo::DANGEROUS,
+            cache_manager_->GetCachedRealTimeUrlVerdict(
+                GURL("https://b.example.test/path/path2"), &out_verdict));
 }

 TEST_F(VerdictCacheManagerTest, TestHostSuffixMatchingMostExactMatching) {
@@ -571,4 +597,50 @@ TEST_F(VerdictCacheManagerTest, TestHostSuffixMatchingMostExactMatching) {
                password_type, &cached_verdict));
 }

+TEST_F(VerdictCacheManagerTest, TestExactMatching) {
+  RTLookupResponse response;
+  AddThreatInfoToResponse(response, RTLookupResponse::ThreatInfo::DANGEROUS,
+                          RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING, 60,
+                          "a.example.test/path1/",
+                          RTLookupResponse::ThreatInfo::EXACT_MATCH);
+  cache_manager_->CacheRealTimeUrlVerdict(
+      GURL("https://a.example.test/path1/path2"), response, base::Time::Now());
+
+  RTLookupResponse::ThreatInfo out_verdict;
+  EXPECT_EQ(RTLookupResponse::ThreatInfo::DANGEROUS,
+            cache_manager_->GetCachedRealTimeUrlVerdict(
+                GURL("https://a.example.test/path1/"), &out_verdict));
+  // Since |cache_expression_exact_matching| is set to EXACT_MATCH, cache is not
+  // found.
+  EXPECT_EQ(RTLookupResponse::ThreatInfo::VERDICT_TYPE_UNSPECIFIED,
+            cache_manager_->GetCachedRealTimeUrlVerdict(
+                GURL("https://a.example.test/path1/path2"), &out_verdict));
+}
+
+TEST_F(VerdictCacheManagerTest, TestMatchingTypeNotSet) {
+  std::string cache_expression = "a.example.test/path1";
+  GURL url("https://a.example.test/path1");
+
+  RTLookupResponse response;
+  RTLookupResponse::ThreatInfo* new_threat_info = response.add_threat_info();
+  new_threat_info->set_verdict_type(RTLookupResponse::ThreatInfo::DANGEROUS);
+  new_threat_info->set_threat_type(
+      RTLookupResponse::ThreatInfo::SOCIAL_ENGINEERING);
+  new_threat_info->set_cache_duration_sec(60);
+  new_threat_info->set_cache_expression(cache_expression);
+  cache_manager_->CacheRealTimeUrlVerdict(url, response, base::Time::Now());
+
+  RTLookupResponse::ThreatInfo out_verdict;
+  // If |cache_expression_match_type| is not set, ignore this cache.
+  EXPECT_EQ(RTLookupResponse::ThreatInfo::VERDICT_TYPE_UNSPECIFIED,
+            cache_manager_->GetCachedRealTimeUrlVerdict(url, &out_verdict));
+
+  new_threat_info->set_cache_expression_match_type(
+      RTLookupResponse::ThreatInfo::EXACT_MATCH);
+  cache_manager_->CacheRealTimeUrlVerdict(url, response, base::Time::Now());
+  // Should be able to get the cache if |cache_expression_match_type| is set.
+  EXPECT_EQ(RTLookupResponse::ThreatInfo::DANGEROUS,
+            cache_manager_->GetCachedRealTimeUrlVerdict(url, &out_verdict));
+}
+
 }  // namespace safe_browsing