Improve activity log ad metrics

- Add metrics for ad injector install location.
- Add metrics for injected ad type.
- Ignore added elements that have a src of the current page.

BUG=357204

Review URL: https://codereview.chromium.org/292313006

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@273066 0039d316-1c4b-4281-b951-d872f2087c98
parent fb6705a1
...@@ -107,7 +107,7 @@ void ActiveScriptController::OnActiveTabPermissionGranted( ...@@ -107,7 +107,7 @@ void ActiveScriptController::OnActiveTabPermissionGranted(
} }
void ActiveScriptController::OnAdInjectionDetected( void ActiveScriptController::OnAdInjectionDetected(
const std::vector<std::string> ad_injectors) { const std::set<std::string> ad_injectors) {
// We're only interested in data if there are ad injectors detected. // We're only interested in data if there are ad injectors detected.
if (ad_injectors.empty()) if (ad_injectors.empty())
return; return;
......
...@@ -62,7 +62,7 @@ class ActiveScriptController : public LocationBarController::ActionProvider, ...@@ -62,7 +62,7 @@ class ActiveScriptController : public LocationBarController::ActionProvider,
void OnActiveTabPermissionGranted(const Extension* extension); void OnActiveTabPermissionGranted(const Extension* extension);
// Notifies the ActiveScriptController of detected ad injection. // Notifies the ActiveScriptController of detected ad injection.
void OnAdInjectionDetected(const std::vector<std::string> ad_injectors); void OnAdInjectionDetected(const std::set<std::string> ad_injectors);
// LocationBarControllerProvider implementation. // LocationBarControllerProvider implementation.
virtual ExtensionAction* GetActionForExtension( virtual ExtensionAction* GetActionForExtension(
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "base/logging.h" #include "base/logging.h"
#include "base/macros.h" #include "base/macros.h"
#include "base/memory/singleton.h" #include "base/memory/singleton.h"
#include "base/metrics/histogram.h"
#include "base/strings/string_number_conversions.h" #include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h" #include "base/strings/string_util.h"
#include "base/strings/stringprintf.h" #include "base/strings/stringprintf.h"
...@@ -40,34 +41,18 @@ namespace keys = ad_injection_constants::keys; ...@@ -40,34 +41,18 @@ namespace keys = ad_injection_constants::keys;
// The list of APIs for which we upload the URL to RAPPOR. // The list of APIs for which we upload the URL to RAPPOR.
const char* kApisForRapporMetric[] = { const char* kApisForRapporMetric[] = {
"HTMLIFrameElement.src", ad_injection_constants::kHtmlIframeSrcApiName,
"HTMLEmbedElement.src", ad_injection_constants::kHtmlEmbedSrcApiName,
"HTMLAnchorElement.href", ad_injection_constants::kHtmlAnchorHrefApiName
}; };
const char* kExtensionAdInjectionRapporMetricName = const char* kExtensionAdInjectionRapporMetricName =
"Extensions.PossibleAdInjection"; "Extensions.PossibleAdInjection";
// The elements for which we check the 'src' attribute to look for ads. // The names of different types of HTML elements we check for ad injection.
const char* kSrcElements[] = { const char* kIframeElementType = "HTMLIFrameElement";
"HTMLIFrameElement", const char* kEmbedElementType = "HTMLEmbedElement";
"HTMLEmbedElement" const char* kAnchorElementType = "HTMLAnchorElement";
};
// The elements for which we check the 'href' attribute to look for ads.
const char* kHrefElements[] = {
"HTMLAnchorElement",
};
bool IsSrcElement(const std::string& str) {
static const char** end = kSrcElements + arraysize(kSrcElements);
return std::find(kSrcElements, end, str) != end;
}
bool IsHrefElement(const std::string& str) {
static const char** end = kHrefElements + arraysize(kHrefElements);
return std::find(kHrefElements, end, str) != end;
}
std::string Serialize(const base::Value* value) { std::string Serialize(const base::Value* value) {
std::string value_as_text; std::string value_as_text;
...@@ -80,52 +65,6 @@ std::string Serialize(const base::Value* value) { ...@@ -80,52 +65,6 @@ std::string Serialize(const base::Value* value) {
return value_as_text; return value_as_text;
} }
Action::InjectionType CheckDomObject(const base::DictionaryValue* object) {
std::string type;
object->GetString(keys::kType, &type);
std::string url_key;
if (IsSrcElement(type))
url_key = keys::kSrc;
else if (IsHrefElement(type))
url_key = keys::kHref;
if (!url_key.empty()) {
std::string url;
if (object->GetString(url_key, &url)) {
GURL gurl(url);
if (AdNetworkDatabase::Get()->IsAdNetwork(gurl))
return Action::INJECTION_NEW_AD;
// If the extension injected an URL which is not local to itself, there is
// a good chance it could be a new ad, and our database missed it.
// This could be noisier than other metrics, because there are perfectly
// acceptable uses for this, like "Show my mail".
if (gurl.is_valid() &&
!gurl.is_empty() &&
!gurl.SchemeIs(kExtensionScheme)) {
return Action::INJECTION_LIKELY_NEW_AD;
}
}
}
const base::ListValue* children = NULL;
if (object->GetList(keys::kChildren, &children)) {
const base::DictionaryValue* child = NULL;
for (size_t i = 0;
i < children->GetSize() &&
i < ad_injection_constants::kMaximumChildrenToCheck;
++i) {
if (children->GetDictionary(i, &child)) {
Action::InjectionType type = CheckDomObject(child);
if (type != Action::NO_AD_INJECTION)
return type;
}
}
}
return Action::NO_AD_INJECTION;
}
} // namespace } // namespace
using api::activity_log_private::ExtensionActivity; using api::activity_log_private::ExtensionActivity;
...@@ -169,22 +108,39 @@ Action::InjectionType Action::DidInjectAd( ...@@ -169,22 +108,39 @@ Action::InjectionType Action::DidInjectAd(
rappor::RapporService* rappor_service) const { rappor::RapporService* rappor_service) const {
MaybeUploadUrl(rappor_service); MaybeUploadUrl(rappor_service);
// Currently, we do not have the list of ad networks, so we exit immediately // We should always have an AdNetworkDatabase, but, on the offchance we don't,
// with NO_AD_INJECTION (unless the database has been set by a test). // don't crash in a release build.
if (!AdNetworkDatabase::Get()) if (!AdNetworkDatabase::Get()) {
NOTREACHED();
return NO_AD_INJECTION; return NO_AD_INJECTION;
}
AdType ad_type = AD_TYPE_NONE;
InjectionType injection_type = NO_AD_INJECTION;
if (api_name_ == ad_injection_constants::kHtmlIframeSrcApiName || if (EndsWith(api_name_,
api_name_ == ad_injection_constants::kHtmlEmbedSrcApiName || ad_injection_constants::kAppendChildApiSuffix,
api_name_ == ad_injection_constants::kHtmlAnchorHrefApiName) { true /* case senstive */)) {
return CheckSrcModification(); injection_type = CheckAppendChild(&ad_type);
} else if (EndsWith(api_name_, } else {
ad_injection_constants::kAppendChildApiSuffix, // Check if the action modified an element's src/href.
true /* case senstive */)) { if (api_name_ == ad_injection_constants::kHtmlIframeSrcApiName)
return CheckAppendChild(); ad_type = AD_TYPE_IFRAME;
else if (api_name_ == ad_injection_constants::kHtmlEmbedSrcApiName)
ad_type = AD_TYPE_EMBED;
else if (api_name_ == ad_injection_constants::kHtmlAnchorHrefApiName)
ad_type = AD_TYPE_ANCHOR;
if (ad_type != AD_TYPE_NONE)
injection_type = CheckSrcModification();
} }
return NO_AD_INJECTION; if (injection_type != NO_AD_INJECTION) {
UMA_HISTOGRAM_ENUMERATION(
"Extensions.AdInjection.Type", ad_type, Action::NUM_AD_TYPES);
}
return injection_type;
} }
void Action::set_args(scoped_ptr<base::ListValue> args) { void Action::set_args(scoped_ptr<base::ListValue> args) {
...@@ -403,16 +359,19 @@ std::string Action::PrintForDebug() const { ...@@ -403,16 +359,19 @@ std::string Action::PrintForDebug() const {
return result; return result;
} }
void Action::MaybeUploadUrl(rappor::RapporService* rappor_service) const { bool Action::UrlCouldBeAd(const GURL& url) const {
// If there's no given |rappor_service|, abort immediately. // Ads can only be valid urls that don't match the page's host (linking to the
if (!rappor_service) // current page should be considered valid use), and aren't local to the
return; // extension.
return url.is_valid() &&
!url.is_empty() &&
url.host() != page_url_.host() &&
!url.SchemeIs(kExtensionScheme);
}
// If the action has no url, or the url is empty, then return. void Action::MaybeUploadUrl(rappor::RapporService* rappor_service) const {
if (!arg_url_.is_valid() || arg_url_.is_empty()) // Don't bother recording if the url is innocuous (or no |rappor_service|).
return; if (!rappor_service || !UrlCouldBeAd(arg_url_))
std::string host = arg_url_.host();
if (host.empty())
return; return;
bool can_inject_ads = false; bool can_inject_ads = false;
...@@ -429,13 +388,13 @@ void Action::MaybeUploadUrl(rappor::RapporService* rappor_service) const { ...@@ -429,13 +388,13 @@ void Action::MaybeUploadUrl(rappor::RapporService* rappor_service) const {
// Record the URL - an ad *may* have been injected. // Record the URL - an ad *may* have been injected.
rappor_service->RecordSample(kExtensionAdInjectionRapporMetricName, rappor_service->RecordSample(kExtensionAdInjectionRapporMetricName,
rappor::ETLD_PLUS_ONE_RAPPOR_TYPE, rappor::ETLD_PLUS_ONE_RAPPOR_TYPE,
host); arg_url_.host());
} }
Action::InjectionType Action::CheckSrcModification() const { Action::InjectionType Action::CheckSrcModification() const {
const AdNetworkDatabase* database = AdNetworkDatabase::Get(); const AdNetworkDatabase* database = AdNetworkDatabase::Get();
bool arg_url_valid = arg_url_.is_valid() && !arg_url_.is_empty(); bool arg_url_could_be_ad = UrlCouldBeAd(arg_url_);
GURL prev_url; GURL prev_url;
std::string prev_url_string; std::string prev_url_string;
...@@ -444,7 +403,7 @@ Action::InjectionType Action::CheckSrcModification() const { ...@@ -444,7 +403,7 @@ Action::InjectionType Action::CheckSrcModification() const {
bool prev_url_valid = prev_url.is_valid() && !prev_url.is_empty(); bool prev_url_valid = prev_url.is_valid() && !prev_url.is_empty();
bool injected_ad = arg_url_valid && database->IsAdNetwork(arg_url_); bool injected_ad = arg_url_could_be_ad && database->IsAdNetwork(arg_url_);
bool replaced_ad = prev_url_valid && database->IsAdNetwork(prev_url); bool replaced_ad = prev_url_valid && database->IsAdNetwork(prev_url);
if (injected_ad && replaced_ad) if (injected_ad && replaced_ad)
...@@ -457,7 +416,7 @@ Action::InjectionType Action::CheckSrcModification() const { ...@@ -457,7 +416,7 @@ Action::InjectionType Action::CheckSrcModification() const {
// If the extension modified the URL with an external, valid URL then there's // If the extension modified the URL with an external, valid URL then there's
// a good chance it's ad injection. Log it as a likely one, which also helps // a good chance it's ad injection. Log it as a likely one, which also helps
// us determine the effectiveness of our IsAdNetwork() recognition. // us determine the effectiveness of our IsAdNetwork() recognition.
if (arg_url_valid && !arg_url_.SchemeIs(kExtensionScheme)) { if (arg_url_could_be_ad) {
if (prev_url_valid) if (prev_url_valid)
return INJECTION_LIKELY_REPLACED_AD; return INJECTION_LIKELY_REPLACED_AD;
return INJECTION_LIKELY_NEW_AD; return INJECTION_LIKELY_NEW_AD;
...@@ -466,12 +425,66 @@ Action::InjectionType Action::CheckSrcModification() const { ...@@ -466,12 +425,66 @@ Action::InjectionType Action::CheckSrcModification() const {
return NO_AD_INJECTION; return NO_AD_INJECTION;
} }
Action::InjectionType Action::CheckAppendChild() const { Action::InjectionType Action::CheckAppendChild(AdType* ad_type_out) const {
const base::DictionaryValue* child = NULL; const base::DictionaryValue* child = NULL;
if (!args_->GetDictionary(0u, &child)) if (!args_->GetDictionary(0u, &child))
return NO_AD_INJECTION; return NO_AD_INJECTION;
return CheckDomObject(child); return CheckDomObject(child, ad_type_out);
}
Action::InjectionType Action::CheckDomObject(
const base::DictionaryValue* object,
AdType* ad_type_out) const {
DCHECK(ad_type_out);
std::string type;
object->GetString(keys::kType, &type);
AdType ad_type = AD_TYPE_NONE;
std::string url_key;
if (type == kIframeElementType) {
ad_type = AD_TYPE_IFRAME;
url_key = keys::kSrc;
} else if (type == kEmbedElementType) {
ad_type = AD_TYPE_EMBED;
url_key = keys::kSrc;
} else if (type == kAnchorElementType) {
ad_type = AD_TYPE_ANCHOR;
url_key = keys::kHref;
}
if (!url_key.empty()) {
std::string url;
if (object->GetString(url_key, &url)) {
GURL gurl(url);
if (UrlCouldBeAd(gurl)) {
*ad_type_out = ad_type;
if (AdNetworkDatabase::Get()->IsAdNetwork(gurl))
return INJECTION_NEW_AD;
// If the extension injected an URL which is not local to itself or the
// page, there is a good chance it could be a new ad, and our database
// missed it.
return INJECTION_LIKELY_NEW_AD;
}
}
}
const base::ListValue* children = NULL;
if (object->GetList(keys::kChildren, &children)) {
const base::DictionaryValue* child = NULL;
for (size_t i = 0;
i < children->GetSize() &&
i < ad_injection_constants::kMaximumChildrenToCheck;
++i) {
if (children->GetDictionary(i, &child)) {
InjectionType type = CheckDomObject(child, ad_type_out);
if (type != NO_AD_INJECTION)
return type;
}
}
}
return NO_AD_INJECTION;
} }
bool ActionComparator::operator()( bool ActionComparator::operator()(
......
...@@ -45,7 +45,8 @@ class Action : public base::RefCountedThreadSafe<Action> { ...@@ -45,7 +45,8 @@ class Action : public base::RefCountedThreadSafe<Action> {
ACTION_ANY = 1001, // Used for lookups of unspecified type. ACTION_ANY = 1001, // Used for lookups of unspecified type.
}; };
// The type of ad injection an action performed. // The type of ad injection an action performed. Do not delete or reorder
// these metrics, as they are used in histogramming.
enum InjectionType { enum InjectionType {
// No ad injection occurred. // No ad injection occurred.
NO_AD_INJECTION = 0, NO_AD_INJECTION = 0,
...@@ -66,6 +67,17 @@ class Action : public base::RefCountedThreadSafe<Action> { ...@@ -66,6 +67,17 @@ class Action : public base::RefCountedThreadSafe<Action> {
NUM_INJECTION_TYPES NUM_INJECTION_TYPES
}; };
// The type of ad which was injected.
enum AdType {
AD_TYPE_NONE,
AD_TYPE_IFRAME,
AD_TYPE_EMBED,
AD_TYPE_ANCHOR,
// Place any new injection types above this entry.
NUM_AD_TYPES
};
// A useful shorthand for methods that take or return collections of Action // A useful shorthand for methods that take or return collections of Action
// objects. // objects.
typedef std::vector<scoped_refptr<Action> > ActionVector; typedef std::vector<scoped_refptr<Action> > ActionVector;
...@@ -166,14 +178,24 @@ class Action : public base::RefCountedThreadSafe<Action> { ...@@ -166,14 +178,24 @@ class Action : public base::RefCountedThreadSafe<Action> {
private: private:
friend class base::RefCountedThreadSafe<Action>; friend class base::RefCountedThreadSafe<Action>;
// Returns true if a given |url| could be an ad.
bool UrlCouldBeAd(const GURL& url) const;
// Uploads the URL to RAPPOR (preserving privacy) if this might have been an // Uploads the URL to RAPPOR (preserving privacy) if this might have been an
// ad injection. // ad injection.
void MaybeUploadUrl(rappor::RapporService* rappor_service) const; void MaybeUploadUrl(rappor::RapporService* rappor_service) const;
// Checks an action with the appendChild API for ad injection.
InjectionType CheckAppendChild() const;
// Checks an action that modified the src of an element for ad injection. // Checks an action that modified the src of an element for ad injection.
InjectionType CheckSrcModification() const; InjectionType CheckSrcModification() const;
// Checks an action with the appendChild API for ad injection.
// |ad_type_out| is populated with the type of ad which was injected, if there
// was an injection.
InjectionType CheckAppendChild(AdType* ad_type_out) const;
// Checks a DOM object (e.g. an appended child) for ad injection.
// |ad_type_out| is populated with the type of ad which was injected, if there
// was an injection.
InjectionType CheckDomObject(const base::DictionaryValue* object,
AdType* ad_type_out) const;
std::string extension_id_; std::string extension_id_;
base::Time time_; base::Time time_;
......
...@@ -17,7 +17,10 @@ ...@@ -17,7 +17,10 @@
#include "chrome/common/url_constants.h" #include "chrome/common/url_constants.h"
#include "content/public/browser/web_contents.h" #include "content/public/browser/web_contents.h"
#include "content/public/common/url_constants.h" #include "content/public/common/url_constants.h"
#include "extensions/browser/extension_registry.h"
#include "extensions/common/dom_action_types.h" #include "extensions/common/dom_action_types.h"
#include "extensions/common/extension.h"
#include "extensions/common/manifest.h"
namespace extensions { namespace extensions {
...@@ -194,7 +197,7 @@ void UmaPolicy::HistogramOnClose(const std::string& cleaned_url, ...@@ -194,7 +197,7 @@ void UmaPolicy::HistogramOnClose(const std::string& cleaned_url,
ActiveScriptController::GetForWebContents(web_contents); ActiveScriptController::GetForWebContents(web_contents);
SiteMap::iterator site_lookup = url_status_.find(cleaned_url); SiteMap::iterator site_lookup = url_status_.find(cleaned_url);
const ExtensionMap& exts = site_lookup->second; const ExtensionMap& exts = site_lookup->second;
std::vector<std::string> ad_injectors; std::set<std::string> ad_injectors;
for (ExtensionMap::const_iterator ext_iter = exts.begin(); for (ExtensionMap::const_iterator ext_iter = exts.begin();
ext_iter != exts.end(); ext_iter != exts.end();
++ext_iter) { ++ext_iter) {
...@@ -205,12 +208,25 @@ void UmaPolicy::HistogramOnClose(const std::string& cleaned_url, ...@@ -205,12 +208,25 @@ void UmaPolicy::HistogramOnClose(const std::string& cleaned_url,
statuses[i-1]++; statuses[i-1]++;
} }
if ((ext_iter->second & kAnyAdActivity) && active_script_controller) if (ext_iter->second & kAnyAdActivity)
ad_injectors.push_back(ext_iter->first); ad_injectors.insert(ext_iter->first);
} }
if (active_script_controller) if (active_script_controller)
active_script_controller->OnAdInjectionDetected(ad_injectors); active_script_controller->OnAdInjectionDetected(ad_injectors);
ExtensionRegistry* registry = ExtensionRegistry::Get(profile_);
for (std::set<std::string>::const_iterator iter = ad_injectors.begin();
iter != ad_injectors.end();
++iter) {
const Extension* extension =
registry->GetExtensionById(*iter, ExtensionRegistry::EVERYTHING);
if (extension) {
UMA_HISTOGRAM_ENUMERATION("Extensions.AdInjection.InstallLocation",
extension->location(),
Manifest::NUM_LOCATIONS);
}
}
std::string prefix = "ExtensionActivity."; std::string prefix = "ExtensionActivity.";
if (GURL(cleaned_url).host() != "www.google.com") { if (GURL(cleaned_url).host() != "www.google.com") {
UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT), UMA_HISTOGRAM_COUNTS_100(prefix + GetHistogramName(CONTENT_SCRIPT),
......
...@@ -309,6 +309,24 @@ functions.push(function NewAnchorLikelyAd() { ...@@ -309,6 +309,24 @@ functions.push(function NewAnchorLikelyAd() {
return INJECTION_LIKELY_NEW_AD; return INJECTION_LIKELY_NEW_AD;
}); });
// Test that an extension adding an element is not considered likely ad
// injection if the element has a local resource.
functions.push(function LocalResourceNotConsideredAd() {
var anchor = document.createElement('a');
document.body.appendChild(anchor).href = chrome.extension.getURL('foo.html');
return NO_AD_INJECTION;
});
// Test that an extension adding an element with the same host as the current
// page is not considered ad injection.
functions.push(function SamePageUrlNotConsideredAd() {
var anchor = document.createElement('a');
// This source is something like 'http://127.0.0.1:49725/foo.html'.
document.body.appendChild(anchor).href = document.URL + 'foo.html';
return NO_AD_INJECTION;
});
functions.push(function ModifyExistingAnchorToAdNetwork() { functions.push(function ModifyExistingAnchorToAdNetwork() {
var anchor = $('non-ad-anchor'); var anchor = $('non-ad-anchor');
anchor.href = kAdNetwork; anchor.href = kAdNetwork;
......
...@@ -6036,6 +6036,22 @@ Therefore, the affected-histogram name has to have at least one dot in it. ...@@ -6036,6 +6036,22 @@ Therefore, the affected-histogram name has to have at least one dot in it.
</summary> </summary>
</histogram> </histogram>
<histogram name="Extensions.AdInjection.AdType" enum="InjectedAdType">
<owner>felt@chromium.org</owner>
<owner>rdevlin.cronin@chromium.org</owner>
<summary>The type of ad that was injected.</summary>
</histogram>
<histogram name="Extensions.AdInjection.InstallLocation"
enum="ExtensionLocation">
<owner>felt@chromium.org</owner>
<owner>rdevlin.cronin@chromium.org</owner>
<summary>
The install location of an ad-injecting extension. Recorded upon page close
for any extension that injected ads on that page.
</summary>
</histogram>
<histogram name="Extensions.AllocatePortIdPairOverflow"> <histogram name="Extensions.AllocatePortIdPairOverflow">
<owner>Please list the metric's owners. Add more owner tags as needed.</owner> <owner>Please list the metric's owners. Add more owner tags as needed.</owner>
<summary> <summary>
...@@ -36799,6 +36815,13 @@ Therefore, the affected-histogram name has to have at least one dot in it. ...@@ -36799,6 +36815,13 @@ Therefore, the affected-histogram name has to have at least one dot in it.
<int value="3" label="InfoBar dismissed by clicking the 'X'"/> <int value="3" label="InfoBar dismissed by clicking the 'X'"/>
</enum> </enum>
<enum name="InjectedAdType" type="int">
<int value="0" label="Invalid"/>
<int value="1" label="IFrame"/>
<int value="2" label="Embed"/>
<int value="3" label="Anchor"/>
</enum>
<enum name="InstantControllerEvent" type="int"> <enum name="InstantControllerEvent" type="int">
<int value="0" label="URL_ADDED_TO_BLACKLIST"/> <int value="0" label="URL_ADDED_TO_BLACKLIST"/>
<int value="1" label="URL_REMOVED_FROM_BLACKLIST"/> <int value="1" label="URL_REMOVED_FROM_BLACKLIST"/>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment