Commit b5547a84 authored by Archana Simha's avatar Archana Simha Committed by Commit Bot

Include inline JS and onclick content in Chrome RIND report.

Elements with the tag-name "a" or "img" with "onclick" attributes
will be included in the RIND report.

The inline JS is stored in the inner_html field of HTMLElement.
This string is not modified or inspected before the report is generated.

The capturing of inline JS and onclick content is disabled.

http://gpaste/5722548836761600

Bug: 959964
Change-Id: I8c59cfb83fdb211dd5bc8486d4d21eb0378037df
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1594034Reviewed-by: default avatarMustafa Emre Acer <meacer@chromium.org>
Reviewed-by: default avatarVarun Khaneja <vakh@chromium.org>
Reviewed-by: default avatarLuke Z <lpz@chromium.org>
Commit-Queue: Archana Simha <archanasimha@google.com>
Cr-Commit-Position: refs/heads/master@{#670221}
parent 7662f044
......@@ -1114,7 +1114,7 @@ IN_PROC_BROWSER_TEST_P(SafeBrowsingBlockingPageBrowserTest,
ASSERT_EQ(2, report.dom_size());
// Because the order of elements is not deterministic, we basically need to
// verify the relationship. Namely that there is an IFRAME element and that
// its has a DIV as its parent.
// it has a DIV as its parent.
int iframe_node_id = -1;
for (const HTMLElement& elem : report.dom()) {
if (elem.tag() == "IFRAME") {
......
......@@ -54,7 +54,7 @@ TEST_F(ThreatDOMDetailsTest, Everything) {
safe_browsing::ThreatDOMDetails::kMaxNodes = 50;
safe_browsing::ThreatDOMDetails::kMaxAttributes = 5;
const char urlprefix[] = "data:text/html;charset=utf-8,";
const char kUrlPrefix[] = "data:text/html;charset=utf-8,";
const char kMaxNodesExceededMetric[] =
"SafeBrowsing.ThreatReport.MaxNodesExceededInFrame";
{
......@@ -66,7 +66,7 @@ TEST_F(ThreatDOMDetailsTest, Everything) {
details->ExtractResources(&params);
ASSERT_EQ(1u, params.size());
auto* param = params[0].get();
EXPECT_EQ(GURL(urlprefix + net::EscapeQueryParamValue(html, false)),
EXPECT_EQ(GURL(kUrlPrefix + net::EscapeQueryParamValue(html, false)),
param->url);
EXPECT_EQ(0, param->node_id);
EXPECT_EQ(0, param->parent_node_id);
......@@ -84,7 +84,7 @@ TEST_F(ThreatDOMDetailsTest, Everything) {
std::string html = "<html><head><script src=\"" + script1_url.spec() +
"\"></script><script src=\"" + script2_url.spec() +
"\"></script></head></html>";
GURL url(urlprefix + net::EscapeQueryParamValue(html, false));
GURL url(kUrlPrefix + net::EscapeQueryParamValue(html, false));
LoadHTML(html.c_str());
std::vector<safe_browsing::mojom::ThreatDOMDetailsNodePtr> params;
......@@ -131,17 +131,17 @@ TEST_F(ThreatDOMDetailsTest, Everything) {
// divs with attribute bar. So div foo will be collected and contain iframe1
// and div baz as children.
std::string iframe2_html = "<html><body>iframe2</body></html>";
GURL iframe2_url(urlprefix + iframe2_html);
GURL iframe2_url(kUrlPrefix + iframe2_html);
std::string iframe1_html = "<iframe src=\"" +
net::EscapeForHTML(iframe2_url.spec()) +
"\"></iframe>";
GURL iframe1_url(urlprefix + iframe1_html);
GURL iframe1_url(kUrlPrefix + iframe1_html);
std::string html =
"<html><head><div foo=1 foo2=2><img foo=1><div bar=1><div baz=1></div>"
"<iframe src=\"" +
net::EscapeForHTML(iframe1_url.spec()) +
"\"></iframe></div></div></head></html>";
GURL url(urlprefix + net::EscapeQueryParamValue(html, false));
GURL url(kUrlPrefix + net::EscapeQueryParamValue(html, false));
LoadHTML(html.c_str());
std::vector<safe_browsing::mojom::ThreatDOMDetailsNodePtr> params;
......@@ -199,11 +199,11 @@ TEST_F(ThreatDOMDetailsTest, Everything) {
std::string html;
for (int i = 0; i < 55; ++i) {
// The iframe contents is just a number.
GURL iframe_url(base::StringPrintf("%s%d", urlprefix, i));
GURL iframe_url(base::StringPrintf("%s%d", kUrlPrefix, i));
html += "<iframe src=\"" + net::EscapeForHTML(iframe_url.spec()) +
"\"></iframe>";
}
GURL url(urlprefix + html);
GURL url(kUrlPrefix + html);
LoadHTML(html.c_str());
base::HistogramTester histograms;
......@@ -228,11 +228,11 @@ TEST_F(ThreatDOMDetailsTest, Everything) {
std::string html;
for (int i = 0; i < 55; ++i) {
// The iframe contents is just a number.
GURL script_url(base::StringPrintf("%s%d", urlprefix, i));
GURL script_url(base::StringPrintf("%s%d", kUrlPrefix, i));
html += "<script src=\"" + net::EscapeForHTML(script_url.spec()) +
"\"></script>";
}
GURL url(urlprefix + html);
GURL url(kUrlPrefix + html);
LoadHTML(html.c_str());
base::HistogramTester histograms;
......@@ -261,7 +261,7 @@ TEST_F(ThreatDOMDetailsTest, Everything) {
LoadHTML(html.c_str());
std::vector<safe_browsing::mojom::ThreatDOMDetailsNodePtr> params;
details->ExtractResources(&params);
GURL url = GURL(urlprefix + net::EscapeQueryParamValue(html, false));
GURL url = GURL(kUrlPrefix + net::EscapeQueryParamValue(html, false));
ASSERT_EQ(2u, params.size());
auto* param = params[0].get();
EXPECT_TRUE(param->url.is_empty());
......@@ -302,7 +302,7 @@ TEST_F(ThreatDOMDetailsTest, DefaultTagAndAttributesList) {
std::unique_ptr<safe_browsing::ThreatDOMDetails> details(
safe_browsing::ThreatDOMDetails::Create(view_->GetMainRenderFrame(),
registry_.get()));
const char urlprefix[] = "data:text/html;charset=utf-8,";
const char kUrlPrefix[] = "data:text/html;charset=utf-8,";
// A page with some divs containing an iframe which itself contains an
// iframe. Tag "img foo" exists to ensure we honour both the tag name and
......@@ -320,13 +320,13 @@ TEST_F(ThreatDOMDetailsTest, DefaultTagAndAttributesList) {
// since it is the direct child of the main frame, but it would not
// go inside of the iframe.
std::string iframe2_html = "<html><body>iframe2</body></html>";
GURL iframe2_url(urlprefix + iframe2_html);
GURL iframe2_url(kUrlPrefix + iframe2_html);
std::string html =
"<html><head><div data-google-query-id=foo><div id=bar>"
"<iframe id=baz><iframe src=\"" +
net::EscapeForHTML(iframe2_url.spec()) +
"\"></iframe></iframe></div></div></head></html>";
GURL url(urlprefix + net::EscapeQueryParamValue(html, false));
GURL url(kUrlPrefix + net::EscapeQueryParamValue(html, false));
LoadHTML(html.c_str());
std::vector<safe_browsing::mojom::ThreatDOMDetailsNodePtr> params;
......@@ -382,3 +382,92 @@ TEST_F(ThreatDOMDetailsTest, DefaultTagAndAttributesList) {
EXPECT_EQ(0, param->parent_node_id);
EXPECT_TRUE(param->child_node_ids.empty());
}
TEST_F(ThreatDOMDetailsTest, CheckTagAndAttributeListIsSorted) {
std::unique_ptr<base::test::ScopedFeatureList> scoped_list(
new base::test::ScopedFeatureList);
scoped_list->InitAndEnableFeature(
safe_browsing::kCaptureInlineJavascriptForGoogleAds);
std::unique_ptr<safe_browsing::ThreatDOMDetails> details(
safe_browsing::ThreatDOMDetails::Create(view_->GetMainRenderFrame(),
registry_.get()));
std::vector<safe_browsing::TagAndAttributesItem> tag_and_attr_list =
details->GetTagAndAttributesListForTest();
bool is_sorted;
std::vector<std::string> tag_names;
for (auto item : tag_and_attr_list) {
tag_names.push_back(item.tag_name);
// Check that list of attributes is sorted.
is_sorted = std::is_sorted(item.attributes.begin(), item.attributes.end());
EXPECT_TRUE(is_sorted);
}
// Check that the tags are sorted.
is_sorted = std::is_sorted(tag_names.begin(), tag_names.end());
EXPECT_TRUE(is_sorted);
}
TEST_F(ThreatDOMDetailsTest, CaptureInnerHtmlContent) {
std::unique_ptr<base::test::ScopedFeatureList> scoped_list(
new base::test::ScopedFeatureList);
scoped_list->InitAndEnableFeature(
safe_browsing::kCaptureInlineJavascriptForGoogleAds);
std::unique_ptr<safe_browsing::ThreatDOMDetails> details(
safe_browsing::ThreatDOMDetails::Create(view_->GetMainRenderFrame(),
registry_.get()));
const char kUrlPrefix[] = "data:text/html;charset=utf-8,";
const char kMaxNodesExceededMetric[] =
"SafeBrowsing.ThreatReport.MaxNodesExceededInFrame";
{
// A page with a html element without an onclick element, html element with
// an onclick element, an internal script. Html elements without onclick
// elements should not be recorded in ThreatDomDetails.
std::string html =
"<html><head><a></a><a onclick=\"var y = 2;\"></a><img onclick=\"var z "
"= 3;\"></img><script>var x = 1;</script></head></html>";
LoadHTML(html.c_str());
base::HistogramTester histograms;
std::vector<safe_browsing::mojom::ThreatDOMDetailsNodePtr> params;
details->ExtractResources(&params);
ASSERT_EQ(4u, params.size());
auto* param = params[0].get();
EXPECT_EQ(GURL(), param->url);
EXPECT_EQ("A", param->tag_name);
EXPECT_EQ(1, param->node_id);
EXPECT_EQ(0, param->parent_node_id);
EXPECT_TRUE(param->child_node_ids.empty());
EXPECT_EQ(1u, param->attributes.size());
EXPECT_EQ("onclick", param->attributes[0]->name);
EXPECT_EQ("var y = 2;", param->attributes[0]->value);
param = params[1].get();
EXPECT_EQ(GURL(), param->url);
EXPECT_EQ("IMG", param->tag_name);
EXPECT_EQ(2, param->node_id);
EXPECT_EQ(0, param->parent_node_id);
EXPECT_TRUE(param->child_node_ids.empty());
EXPECT_TRUE(param->child_node_ids.empty());
EXPECT_EQ(1u, param->attributes.size());
EXPECT_EQ("onclick", param->attributes[0]->name);
EXPECT_EQ("var z = 3;", param->attributes[0]->value);
param = params[2].get();
EXPECT_EQ(GURL(), param->url);
EXPECT_EQ("SCRIPT", param->tag_name);
EXPECT_EQ(3, param->node_id);
EXPECT_EQ(0, param->parent_node_id);
EXPECT_TRUE(param->child_node_ids.empty());
EXPECT_EQ("var x = 1;", param->inner_html);
param = params[3].get();
EXPECT_EQ(GURL(kUrlPrefix + net::EscapeQueryParamValue(html, false)),
param->url);
EXPECT_EQ(0, param->node_id);
EXPECT_EQ(0, param->parent_node_id);
EXPECT_TRUE(param->child_node_ids.empty());
histograms.ExpectBucketCount(kMaxNodesExceededMetric, false, 1);
}
}
......@@ -488,6 +488,7 @@ void ThreatDetails::AddDomElement(
const std::string& tagname,
const int parent_element_node_id,
const std::vector<mojom::AttributeNameValuePtr> attributes,
const std::string& inner_html,
const ClientSafeBrowsingReportRequest::Resource* resource) {
// Create the element. It should not exist already since this function should
// only be called once for each element.
......@@ -512,6 +513,10 @@ void ThreatDetails::AddDomElement(
}
}
if (!inner_html.empty()) {
cur_element->set_inner_html(inner_html);
}
if (resource) {
cur_element->set_resource_id(resource->id());
}
......@@ -718,7 +723,8 @@ void ThreatDetails::AddDOMDetails(
// Check for a tag_name to avoid adding the summary node to the DOM.
if (!node.tag_name.empty()) {
AddDomElement(frame_tree_node_id, node.node_id, node.tag_name,
node.parent_node_id, std::move(node.attributes), resource);
node.parent_node_id, std::move(node.attributes),
node.inner_html, resource);
}
}
}
......
......@@ -177,12 +177,14 @@ class ThreatDetails : public content::WebContentsObserver {
// ID of the element within the frame. |tag_name| is the tag of the element.
// |parent_element_node_id| is the unique ID of the parent element within the
// frame. |attributes| contains the names and values of the element's
// attributes. |resource| is set if this element is a resource.
// attributes. |inner_html| is set if the element contains inline JavaScript.
// |resource| is set if this element is a resource.
void AddDomElement(const int frame_tree_node_id,
const int element_node_id,
const std::string& tag_name,
const int parent_element_node_id,
const std::vector<mojom::AttributeNameValuePtr> attributes,
const std::string& inner_html,
const ClientSafeBrowsingReportRequest::Resource* resource);
// Populates the referrer chain data in |report_|. This may be skipped if the
......
......@@ -95,6 +95,8 @@ struct ThreatDOMDetailsNode {
// routing ID of the local or remote frame in this renderer process that is
// responsible for rendering the contents of this frame (to handle OOPIFs).
int32 child_frame_routing_id;
// This field holds inline JavaScript content and remotely hosted scripts.
string inner_html;
};
interface ThreatReporter {
......
......@@ -23,6 +23,9 @@ namespace safe_browsing {
const base::Feature kAdSamplerTriggerFeature{"SafeBrowsingAdSamplerTrigger",
base::FEATURE_DISABLED_BY_DEFAULT};
const base::Feature kCaptureInlineJavascriptForGoogleAds{
"CaptureInlineJavascriptForGoogleAds", base::FEATURE_DISABLED_BY_DEFAULT};
const base::Feature kCaptureSafetyNetId{"SafeBrowsingCaptureSafetyNetId",
base::FEATURE_DISABLED_BY_DEFAULT};
......@@ -75,6 +78,7 @@ constexpr struct {
bool show_state;
} kExperimentalFeatures[]{
{&kAdSamplerTriggerFeature, false},
{&kCaptureInlineJavascriptForGoogleAds, true},
{&kCaptureSafetyNetId, true},
{&kCheckByURLLoaderThrottle, true},
{&kCommittedSBInterstitials, true},
......
......@@ -21,6 +21,10 @@ namespace safe_browsing {
// Features list
extern const base::Feature kAdSamplerTriggerFeature;
// Controls whether we sample inline JavaScript for ads in RIND
// reports.
extern const base::Feature kCaptureInlineJavascriptForGoogleAds;
// Controls whether we try to get the SafetyNet ID of the device for use when
// a SBER user downloads an APK file.
extern const base::Feature kCaptureSafetyNetId;
......
......@@ -56,12 +56,19 @@ class TagNameIs {
void GetDefaultTagAndAttributeList(
std::vector<TagAndAttributesItem>* tag_and_attributes_list) {
tag_and_attributes_list->clear();
// These entries must be sorted by tag name.
bool should_capture_js =
base::FeatureList::IsEnabled(kCaptureInlineJavascriptForGoogleAds);
if (should_capture_js)
tag_and_attributes_list->push_back(TagAndAttributesItem("a", {"onclick"}));
// These entries must be sorted by tag name.
// These tags are related to identifying Google ads.
tag_and_attributes_list->push_back(
TagAndAttributesItem("div", {"data-google-query-id", "id"}));
tag_and_attributes_list->push_back(TagAndAttributesItem("iframe", {"id"}));
if (should_capture_js)
tag_and_attributes_list->push_back(
TagAndAttributesItem("img", {"onclick"}));
}
void ParseTagAndAttributeParams(
......@@ -153,7 +160,6 @@ void HandleElement(
child_node->url = child_url;
child_node->tag_name = element.TagName().Utf8();
child_node->parent = summary_node->url;
// The body of an iframe may be in a different renderer. Look up the routing
// ID of the local or remote frame and store it with the iframe node. If this
// element is not a frame then the result of the lookup will be null.
......@@ -162,7 +168,10 @@ void HandleElement(
child_node->child_frame_routing_id =
content::RenderFrame::GetRoutingIdForWebFrame(subframe);
}
if (base::FeatureList::IsEnabled(kCaptureInlineJavascriptForGoogleAds) &&
child_node->tag_name == "SCRIPT") {
child_node->inner_html = element.TextContent().Utf8();
}
// Populate the element's attributes, but only collect the ones that are
// configured in the finch study.
const auto& tag_attribute_iter = std::find_if(
......@@ -226,7 +235,10 @@ bool ShouldHandleElement(
element.HasAttribute("src")) {
return true;
}
if (base::FeatureList::IsEnabled(kCaptureInlineJavascriptForGoogleAds) &&
element.HasHTMLTagName("script")) {
return true;
}
std::string tag_name_lower = base::ToLowerASCII(element.TagName().Ascii());
const auto& tag_attribute_iter =
std::find_if(tag_and_attributes_list.begin(),
......
......@@ -60,6 +60,10 @@ class ThreatDOMDetails : public content::RenderFrameObserver,
// Exposed for testing.
void ExtractResources(std::vector<mojom::ThreatDOMDetailsNodePtr>* resources);
std::vector<TagAndAttributesItem> GetTagAndAttributesListForTest() {
return tag_and_attributes_list_;
}
private:
// Creates a ThreatDOMDetails for the specified RenderFrame.
// The ThreatDOMDetails should be destroyed prior to destroying
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment