Commit 8055c5c0 authored by Joe DeBlasio's avatar Joe DeBlasio Committed by Commit Bot

Safety Tips: add lookalike heuristic checking.

This CL adds lookalike heuristic checking to safety tips. A safety tip
is only triggered when a lookalike domain is identified, but an
interstitial would NOT be triggered. In practice, this means a safety
tip is triggered when:
 - there's an edit distance of 1 to a top or engaged domain, or
 - there's skeleton similarity to a non-top-500 top domain.

 This CL also tidies the logic in lookalikes slightly. Now
 Chrome identifies lookalikes first and as a separate step decides
 whether to display an interstitial. Both functions are used when
 determining whether to trigger a safety tip, so as interstitialling
 logic changes, safety tips will move out of the way appropriately.

 Bug: 984725, 996733

Change-Id: I590f6d707a5347d55fb48e71e1420434b14bccb7
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1771940
Commit-Queue: Joe DeBlasio <jdeblasio@chromium.org>
Reviewed-by: default avatarEmily Stark <estark@chromium.org>
Cr-Commit-Position: refs/heads/master@{#690940}
parent eb519b64
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "base/memory/weak_ptr.h" #include "base/memory/weak_ptr.h"
#include "chrome/browser/engagement/site_engagement_details.mojom.h" #include "chrome/browser/engagement/site_engagement_details.mojom.h"
#include "chrome/browser/lookalikes/lookalike_url_interstitial_page.h" #include "chrome/browser/lookalikes/lookalike_url_interstitial_page.h"
#include "chrome/browser/lookalikes/safety_tips/reputation_service.h"
#include "components/url_formatter/url_formatter.h" #include "components/url_formatter/url_formatter.h"
#include "content/public/browser/navigation_throttle.h" #include "content/public/browser/navigation_throttle.h"
#include "services/metrics/public/cpp/ukm_source_id.h" #include "services/metrics/public/cpp/ukm_source_id.h"
...@@ -26,6 +27,9 @@ namespace lookalikes { ...@@ -26,6 +27,9 @@ namespace lookalikes {
struct DomainInfo; struct DomainInfo;
// Returns true if the domain given by |domain_info| is a top domain.
bool IsTopDomain(const DomainInfo& domain_info);
// Returns true if the Levenshtein distance between |str1| and |str2| is at most // Returns true if the Levenshtein distance between |str1| and |str2| is at most
// one. This has O(max(n,m)) complexity as opposed to O(n*m) of the usual edit // one. This has O(max(n,m)) complexity as opposed to O(n*m) of the usual edit
// distance computation. // distance computation.
...@@ -73,6 +77,21 @@ class LookalikeUrlNavigationThrottle : public content::NavigationThrottle { ...@@ -73,6 +77,21 @@ class LookalikeUrlNavigationThrottle : public content::NavigationThrottle {
static std::unique_ptr<LookalikeUrlNavigationThrottle> static std::unique_ptr<LookalikeUrlNavigationThrottle>
MaybeCreateNavigationThrottle(content::NavigationHandle* navigation_handle); MaybeCreateNavigationThrottle(content::NavigationHandle* navigation_handle);
static bool ShouldDisplayInterstitial(
LookalikeUrlInterstitialPage::MatchType match_type,
const DomainInfo& navigated_domain);
// Returns true if a domain is visually similar to the hostname of |url|. The
// matching domain can be a top domain or an engaged site. Similarity
// check is made using both visual skeleton and edit distance comparison. If
// this returns true, match details will be written into |matched_domain|.
// Pointer arguments can't be nullptr.
static bool GetMatchingDomain(
const DomainInfo& navigated_domain,
const std::vector<DomainInfo>& engaged_sites,
std::string* matched_domain,
LookalikeUrlInterstitialPage::MatchType* match_type);
private: private:
FRIEND_TEST_ALL_PREFIXES(LookalikeUrlNavigationThrottleTest, FRIEND_TEST_ALL_PREFIXES(LookalikeUrlNavigationThrottleTest,
IsEditDistanceAtMostOne); IsEditDistanceAtMostOne);
...@@ -96,22 +115,6 @@ class LookalikeUrlNavigationThrottle : public content::NavigationThrottle { ...@@ -96,22 +115,6 @@ class LookalikeUrlNavigationThrottle : public content::NavigationThrottle {
bool check_safe_redirect, bool check_safe_redirect,
const std::vector<DomainInfo>& engaged_sites); const std::vector<DomainInfo>& engaged_sites);
bool ShouldDisplayInterstitial(
LookalikeUrlInterstitialPage::MatchType match_type) const;
// Returns true if a domain is visually similar to the hostname of |url|. The
// matching domain can be a top 500 domain or an engaged site. Similarity
// check is made using both visual skeleton and edit distance comparison. If
// this returns true, match details will be written into |matched_domain| and
// |match_type|. They cannot be nullptr.
// |force_record_ukm| can be set to true even if the function returns false.
// In this case, the caller must record a UKM using |match_type|.
bool GetMatchingDomain(const DomainInfo& navigated_domain,
const std::vector<DomainInfo>& engaged_sites,
std::string* matched_domain,
LookalikeUrlInterstitialPage::MatchType* match_type,
bool* force_record_ukm);
ThrottleCheckResult ShowInterstitial( ThrottleCheckResult ShowInterstitial(
const GURL& safe_domain, const GURL& safe_domain,
const GURL& url, const GURL& url,
......
...@@ -10,10 +10,13 @@ ...@@ -10,10 +10,13 @@
#include "base/macros.h" #include "base/macros.h"
#include "base/memory/scoped_refptr.h" #include "base/memory/scoped_refptr.h"
#include "base/memory/singleton.h" #include "base/memory/singleton.h"
#include "chrome/browser/lookalikes/lookalike_url_interstitial_page.h"
#include "chrome/browser/lookalikes/lookalike_url_navigation_throttle.h"
#include "chrome/browser/lookalikes/lookalike_url_service.h" #include "chrome/browser/lookalikes/lookalike_url_service.h"
#include "chrome/browser/lookalikes/safety_tips/safety_tips_config.h" #include "chrome/browser/lookalikes/safety_tips/safety_tips_config.h"
#include "chrome/browser/profiles/incognito_helpers.h" #include "chrome/browser/profiles/incognito_helpers.h"
#include "chrome/browser/profiles/profile.h" #include "chrome/browser/profiles/profile.h"
#include "chrome/common/chrome_features.h"
#include "components/keyed_service/content/browser_context_dependency_manager.h" #include "components/keyed_service/content/browser_context_dependency_manager.h"
#include "components/keyed_service/content/browser_context_keyed_service_factory.h" #include "components/keyed_service/content/browser_context_keyed_service_factory.h"
#include "components/safe_browsing/db/v4_protocol_manager_util.h" #include "components/safe_browsing/db/v4_protocol_manager_util.h"
...@@ -22,10 +25,42 @@ namespace { ...@@ -22,10 +25,42 @@ namespace {
using chrome_browser_safety_tips::FlaggedPage; using chrome_browser_safety_tips::FlaggedPage;
using lookalikes::DomainInfo; using lookalikes::DomainInfo;
using lookalikes::LookalikeUrlNavigationThrottle;
using lookalikes::LookalikeUrlService; using lookalikes::LookalikeUrlService;
using LookalikeMatchType = LookalikeUrlInterstitialPage::MatchType;
using safe_browsing::V4ProtocolManagerUtil; using safe_browsing::V4ProtocolManagerUtil;
using safety_tips::ReputationService; using safety_tips::ReputationService;
const base::FeatureParam<bool> kEnableLookalikeEditDistance{
&features::kSafetyTipUI, "editdistance", false};
bool ShouldTriggerSafetyTipFromLookalike(
const GURL& url,
const DomainInfo& navigated_domain,
const std::vector<DomainInfo>& engaged_sites) {
std::string matched_domain;
LookalikeMatchType match_type;
if (!LookalikeUrlNavigationThrottle::GetMatchingDomain(
navigated_domain, engaged_sites, &matched_domain, &match_type)) {
return false;
}
// If we're already displaying an interstitial, don't warn again.
if (LookalikeUrlNavigationThrottle::ShouldDisplayInterstitial(
match_type, navigated_domain)) {
return false;
}
// Edit distance has higher false positives, so it gets its own feature param
if (match_type == LookalikeMatchType::kEditDistance ||
match_type == LookalikeMatchType::kEditDistanceSiteEngagement) {
return kEnableLookalikeEditDistance.Get();
}
return true;
}
// This factory helps construct and find the singleton ReputationService linked // This factory helps construct and find the singleton ReputationService linked
// to a Profile. // to a Profile.
class ReputationServiceFactory : public BrowserContextKeyedServiceFactory { class ReputationServiceFactory : public BrowserContextKeyedServiceFactory {
...@@ -165,8 +200,9 @@ void ReputationService::GetReputationStatusWithEngagedSites( ...@@ -165,8 +200,9 @@ void ReputationService::GetReputationStatusWithEngagedSites(
return (navigated_domain.domain_and_registry == return (navigated_domain.domain_and_registry ==
engaged_domain.domain_and_registry); engaged_domain.domain_and_registry);
}); });
if (already_engaged != engaged_sites.end()) if (already_engaged != engaged_sites.end()) {
return; return;
}
// 2. Server-side blocklist check. // 2. Server-side blocklist check.
SafetyTipType type = GetUrlBlockType(url); SafetyTipType type = GetUrlBlockType(url);
...@@ -175,7 +211,21 @@ void ReputationService::GetReputationStatusWithEngagedSites( ...@@ -175,7 +211,21 @@ void ReputationService::GetReputationStatusWithEngagedSites(
return; return;
} }
// TODO(crbug/984725): 3. Client-side heuristics or lookalike check. // 3. Protect against bad false positives by allowing top domains.
// Empty domain_and_registry happens on private domains.
if (navigated_domain.domain_and_registry.empty() ||
lookalikes::IsTopDomain(navigated_domain)) {
return;
}
// 4. Lookalike heuristics.
if (ShouldTriggerSafetyTipFromLookalike(url, navigated_domain,
engaged_sites)) {
std::move(callback).Run(SafetyTipType::kLookalikeUrl, IsIgnored(url), url);
return;
}
// TODO(crbug/984725): 5. Additional client-side heuristics
} }
SafetyTipType GetUrlBlockType(const GURL& url) { SafetyTipType GetUrlBlockType(const GURL& url) {
......
...@@ -28,6 +28,7 @@ void LeaveSite(content::WebContents* web_contents) { ...@@ -28,6 +28,7 @@ void LeaveSite(content::WebContents* web_contents) {
int GetSafetyTipTitleId(SafetyTipType warning_type) { int GetSafetyTipTitleId(SafetyTipType warning_type) {
switch (warning_type) { switch (warning_type) {
case SafetyTipType::kBadReputation: case SafetyTipType::kBadReputation:
case SafetyTipType::kLookalikeUrl:
#if defined(OS_ANDROID) #if defined(OS_ANDROID)
return IDS_SAFETY_TIP_ANDROID_BAD_REPUTATION_TITLE; return IDS_SAFETY_TIP_ANDROID_BAD_REPUTATION_TITLE;
#else #else
...@@ -35,7 +36,6 @@ int GetSafetyTipTitleId(SafetyTipType warning_type) { ...@@ -35,7 +36,6 @@ int GetSafetyTipTitleId(SafetyTipType warning_type) {
#endif #endif
// These don't have strings yet, so they're just an error: // These don't have strings yet, so they're just an error:
case SafetyTipType::kUncommonDomain: case SafetyTipType::kUncommonDomain:
case SafetyTipType::kLookalikeUrl:
case SafetyTipType::kNone: case SafetyTipType::kNone:
NOTREACHED(); NOTREACHED();
} }
...@@ -51,9 +51,10 @@ int GetSafetyTipDescriptionId(SafetyTipType warning_type) { ...@@ -51,9 +51,10 @@ int GetSafetyTipDescriptionId(SafetyTipType warning_type) {
#else #else
return IDS_PAGE_INFO_SAFETY_TIP_BAD_REPUTATION_DESCRIPTION; return IDS_PAGE_INFO_SAFETY_TIP_BAD_REPUTATION_DESCRIPTION;
#endif #endif
case SafetyTipType::kLookalikeUrl:
return IDS_LOOKALIKE_URL_PRIMARY_PARAGRAPH;
// These don't have strings yet, so they're just an error: // These don't have strings yet, so they're just an error:
case SafetyTipType::kUncommonDomain: case SafetyTipType::kUncommonDomain:
case SafetyTipType::kLookalikeUrl:
case SafetyTipType::kNone: case SafetyTipType::kNone:
NOTREACHED(); NOTREACHED();
} }
......
...@@ -71,6 +71,8 @@ security_state::SafetyTipStatus GetSecurityStateSafetyTipType( ...@@ -71,6 +71,8 @@ security_state::SafetyTipStatus GetSecurityStateSafetyTipType(
return security_state::SafetyTipStatus::SAFETY_TIP_STATUS_NONE; return security_state::SafetyTipStatus::SAFETY_TIP_STATUS_NONE;
case safety_tips::SafetyTipType::kBadReputation: case safety_tips::SafetyTipType::kBadReputation:
return security_state::SafetyTipStatus::SAFETY_TIP_STATUS_BAD_REPUTATION; return security_state::SafetyTipStatus::SAFETY_TIP_STATUS_BAD_REPUTATION;
case safety_tips::SafetyTipType::kLookalikeUrl:
return security_state::SafetyTipStatus::SAFETY_TIP_STATUS_LOOKALIKE;
default: default:
NOTREACHED(); NOTREACHED();
return security_state::SafetyTipStatus::SAFETY_TIP_STATUS_NONE; return security_state::SafetyTipStatus::SAFETY_TIP_STATUS_NONE;
......
...@@ -291,11 +291,16 @@ PageInfoUI::GetSecurityDescription(const IdentityInfo& identity_info) const { ...@@ -291,11 +291,16 @@ PageInfoUI::GetSecurityDescription(const IdentityInfo& identity_info) const {
IDS_PAGE_INFO_BILLING_DETAILS); IDS_PAGE_INFO_BILLING_DETAILS);
} }
if (identity_info.safety_tip_status == switch (identity_info.safety_tip_status) {
security_state::SAFETY_TIP_STATUS_BAD_REPUTATION) { case security_state::SAFETY_TIP_STATUS_BAD_REPUTATION:
return CreateSecurityDescription( case security_state::SAFETY_TIP_STATUS_LOOKALIKE:
SecuritySummaryColor::RED, IDS_PAGE_INFO_SAFETY_TIP_SUMMARY, // TODO(jdeblasio): The BAD_REPUTATION string is generic enough to use for
IDS_PAGE_INFO_SAFETY_TIP_BAD_REPUTATION_DESCRIPTION); // lookalikes too, but it probably deserves its own string.
return CreateSecurityDescription(
SecuritySummaryColor::RED, IDS_PAGE_INFO_SAFETY_TIP_SUMMARY,
IDS_PAGE_INFO_SAFETY_TIP_BAD_REPUTATION_DESCRIPTION);
case security_state::SAFETY_TIP_STATUS_NONE:
break;
} }
switch (identity_info.identity_status) { switch (identity_info.identity_status) {
......
...@@ -102,6 +102,7 @@ enum MaliciousContentStatus { ...@@ -102,6 +102,7 @@ enum MaliciousContentStatus {
enum SafetyTipStatus { enum SafetyTipStatus {
SAFETY_TIP_STATUS_NONE, SAFETY_TIP_STATUS_NONE,
SAFETY_TIP_STATUS_BAD_REPUTATION, SAFETY_TIP_STATUS_BAD_REPUTATION,
SAFETY_TIP_STATUS_LOOKALIKE,
}; };
// Contains the security state relevant to computing the SecurityLevel // Contains the security state relevant to computing the SecurityLevel
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment