Commit 74136aa0 authored by Luke Zielinski's avatar Luke Zielinski Committed by Commit Bot

Fix issue of DOM elements referring to missing resources.

Maintain a set of resource IDs that should be kept because they are
referred to by DOM elements that are being kept. This avoid trimming
away resources that are shared between "kept" and "trimmed" DOM elements.

Bug: b/118395372
Change-Id: I23489a400f4b9028daf3de50e78f403b816d6446
Reviewed-on: https://chromium-review.googlesource.com/c/1312934
Commit-Queue: Varun Khaneja <vakh@chromium.org>
Reviewed-by: default avatarVarun Khaneja <vakh@chromium.org>
Reviewed-by: default avatarDaniel Rubery <drubery@chromium.org>
Cr-Commit-Position: refs/heads/master@{#605362}
parent 071c08ce
......@@ -997,12 +997,17 @@ TEST_F(ThreatDetailsTest, ThreatDOMDetails_AmbiguousDOM) {
// This test uses the following structure.
// kDOMParentURL
// \- <div id=outer> # Trimmed
// \- <script id=shared-resource src=kFirstRedirectURL> # Trimmed
// \- <script id=outer-sibling src=kReferrerURL> # Reported (parent of ad ID)
// \- <script id=sibling src=kFirstRedirectURL> # Reported (sibling of ad ID)
// \- <div data-google-query-id=ad-tag> # Reported (ad ID)
// \- <iframe src=kDOMChildURL foo=bar> # Reported (child of ad ID)
// \- <div id=inner bar=baz/> # Reported (child of ad ID)
// \- <script src=kDOMChildURL2> # Reported (child of ad ID)
//
// *Note: the best way to match the inputs and expectations in the body of the
// test with the structure above, is to use URLs for resources, and the ID
// attributes for DOM elements.
TEST_F(ThreatDetailsTest, ThreatDOMDetails_TrimToAdTags) {
// Create a child renderer inside the main frame to house the inner iframe.
// Perform the navigation first in order to manipulate the frame tree.
......@@ -1022,12 +1027,22 @@ TEST_F(ThreatDetailsTest, ThreatDOMDetails_TrimToAdTags) {
mojom::AttributeNameValue::New("id", "outer"));
outer_params.push_back(std::move(outer_div));
mojom::ThreatDOMDetailsNodePtr shared_resource_script =
mojom::ThreatDOMDetailsNode::New();
shared_resource_script->node_id = 2;
shared_resource_script->tag_name = "script";
shared_resource_script->url = GURL(kFirstRedirectURL);
shared_resource_script->parent = GURL(kDOMParentURL);
shared_resource_script->attributes.push_back(
mojom::AttributeNameValue::New("id", "shared-resource"));
outer_params.push_back(std::move(shared_resource_script));
mojom::ThreatDOMDetailsNodePtr outer_sibling_script =
mojom::ThreatDOMDetailsNode::New();
outer_sibling_script->node_id = 2;
outer_sibling_script->node_id = 3;
outer_sibling_script->url = GURL(kReferrerURL);
outer_sibling_script->child_node_ids.push_back(3);
outer_sibling_script->child_node_ids.push_back(4);
outer_sibling_script->child_node_ids.push_back(5);
outer_sibling_script->tag_name = "script";
outer_sibling_script->parent = GURL(kDOMParentURL);
outer_sibling_script->attributes.push_back(
......@@ -1038,11 +1053,11 @@ TEST_F(ThreatDetailsTest, ThreatDOMDetails_TrimToAdTags) {
mojom::ThreatDOMDetailsNodePtr sibling_script =
mojom::ThreatDOMDetailsNode::New();
sibling_script->node_id = 3;
sibling_script->node_id = 4;
sibling_script->url = GURL(kFirstRedirectURL);
sibling_script->tag_name = "script";
sibling_script->parent = GURL(kDOMParentURL);
sibling_script->parent_node_id = 2;
sibling_script->parent_node_id = 3;
sibling_script->attributes.push_back(
mojom::AttributeNameValue::New("src", kFirstRedirectURL));
sibling_script->attributes.push_back(
......@@ -1051,9 +1066,9 @@ TEST_F(ThreatDetailsTest, ThreatDOMDetails_TrimToAdTags) {
mojom::ThreatDOMDetailsNodePtr outer_ad_tag_div =
mojom::ThreatDOMDetailsNode::New();
outer_ad_tag_div->node_id = 4;
outer_ad_tag_div->parent_node_id = 2;
outer_ad_tag_div->child_node_ids.push_back(5);
outer_ad_tag_div->node_id = 5;
outer_ad_tag_div->parent_node_id = 3;
outer_ad_tag_div->child_node_ids.push_back(6);
outer_ad_tag_div->tag_name = "div";
outer_ad_tag_div->parent = GURL(kDOMParentURL);
outer_ad_tag_div->attributes.push_back(
......@@ -1062,8 +1077,8 @@ TEST_F(ThreatDetailsTest, ThreatDOMDetails_TrimToAdTags) {
mojom::ThreatDOMDetailsNodePtr outer_child_iframe =
mojom::ThreatDOMDetailsNode::New();
outer_child_iframe->node_id = 5;
outer_child_iframe->parent_node_id = 4;
outer_child_iframe->node_id = 6;
outer_child_iframe->parent_node_id = 5;
outer_child_iframe->url = GURL(kDOMChildURL);
outer_child_iframe->tag_name = "iframe";
outer_child_iframe->parent = GURL(kDOMParentURL);
......@@ -1148,14 +1163,14 @@ TEST_F(ThreatDetailsTest, ThreatDOMDetails_TrimToAdTags) {
ClientSafeBrowsingReportRequest::Resource* res_ad_parent =
expected.add_resources();
res_ad_parent->set_id(4);
res_ad_parent->set_id(6);
res_ad_parent->set_url(kReferrerURL);
res_ad_parent->set_parent_id(5);
res_ad_parent->set_tag_name("script");
ClientSafeBrowsingReportRequest::Resource* res_sibling =
expected.add_resources();
res_sibling->set_id(6);
res_sibling->set_id(4);
res_sibling->set_url(kFirstRedirectURL);
res_sibling->set_parent_id(5);
res_sibling->set_tag_name("script");
......@@ -1165,22 +1180,22 @@ TEST_F(ThreatDetailsTest, ThreatDOMDetails_TrimToAdTags) {
res_dom_parent->set_id(5);
res_dom_parent->set_url(kDOMParentURL);
res_dom_parent->add_child_ids(3);
res_dom_parent->add_child_ids(4);
res_dom_parent->add_child_ids(6);
res_dom_parent->add_child_ids(4);
HTMLElement* elem_dom_parent_script = expected.add_dom();
elem_dom_parent_script->set_id(3);
elem_dom_parent_script->set_id(4);
elem_dom_parent_script->set_tag("SCRIPT");
elem_dom_parent_script->set_resource_id(res_ad_parent->id());
elem_dom_parent_script->add_attribute()->set_name("src");
elem_dom_parent_script->mutable_attribute(0)->set_value(kReferrerURL);
elem_dom_parent_script->add_attribute()->set_name("id");
elem_dom_parent_script->mutable_attribute(1)->set_value("outer-sibling");
elem_dom_parent_script->add_child_ids(4);
elem_dom_parent_script->add_child_ids(5);
elem_dom_parent_script->add_child_ids(6);
HTMLElement* elem_dom_sibling_script = expected.add_dom();
elem_dom_sibling_script->set_id(4);
elem_dom_sibling_script->set_id(5);
elem_dom_sibling_script->set_tag("SCRIPT");
elem_dom_sibling_script->set_resource_id(res_sibling->id());
elem_dom_sibling_script->add_attribute()->set_name("src");
......@@ -1189,14 +1204,14 @@ TEST_F(ThreatDetailsTest, ThreatDOMDetails_TrimToAdTags) {
elem_dom_sibling_script->mutable_attribute(1)->set_value("sibling");
HTMLElement* elem_dom_ad_tag_div = expected.add_dom();
elem_dom_ad_tag_div->set_id(5);
elem_dom_ad_tag_div->set_id(6);
elem_dom_ad_tag_div->set_tag("DIV");
elem_dom_ad_tag_div->add_attribute()->set_name("data-google-query-id");
elem_dom_ad_tag_div->mutable_attribute(0)->set_value("ad-tag");
elem_dom_ad_tag_div->add_child_ids(6);
elem_dom_ad_tag_div->add_child_ids(7);
HTMLElement* elem_dom_outer_iframe = expected.add_dom();
elem_dom_outer_iframe->set_id(6);
elem_dom_outer_iframe->set_id(7);
elem_dom_outer_iframe->set_tag("IFRAME");
elem_dom_outer_iframe->set_resource_id(res_dom_child->id());
elem_dom_outer_iframe->add_attribute()->set_name("src");
......
......@@ -211,10 +211,11 @@ void TrimElements(const std::set<int> target_ids,
// the immediate parent, the siblings, and the children of the target ids.
// By keeping the parent of the target and all of its children, this covers
// the target's siblings as well.
std::vector<int> ids_to_keep;
// Keep track of ids that were kept to avoid duplication. We still need the
// vector above for handling the children where it is used like a queue.
std::unordered_set<int> kept_ids;
std::vector<int> element_ids_to_keep;
// Resource IDs are also tracked so that we remember which resources are
// attached to elements that we are keeping. This avoids deleting resources
// that are shared between kept elements and trimmed elements.
std::vector<int> kept_resource_ids;
for (int target_id : target_ids) {
const int parent_id = element_id_to_parent_id[target_id];
if (parent_id == kElementIdNoParent) {
......@@ -227,36 +228,55 @@ void TrimElements(const std::set<int> target_ids,
// Otherwise, insert the parent ID into the list of ids to keep. This will
// capture the parent and siblings of the target element, as well as each of
// their children.
if (kept_ids.count(parent_id) == 0) {
ids_to_keep.push_back(parent_id);
kept_ids.insert(parent_id);
if (!base::ContainsValue(element_ids_to_keep, parent_id)) {
element_ids_to_keep.push_back(parent_id);
// Check if this element has a resource. If so, remember to also keep the
// resource.
const HTMLElement& elem = *elements_by_id[parent_id];
if (elem.has_resource_id()) {
kept_resource_ids.push_back(elem.resource_id());
}
}
}
// Walk through |ids_to_keep| and append the children of each of element to
// |ids_to_keep|. This is effectively a breadth-first traversal of the tree.
// The list will stop growing when we reach the leaf nodes that have no more
// children.
for (size_t index = 0; index < ids_to_keep.size(); ++index) {
int cur_element_id = ids_to_keep[index];
// Walk through |element_ids_to_keep| and append the children of each of
// element to |element_ids_to_keep|. This is effectively a breadth-first
// traversal of the tree. The list will stop growing when we reach the leaf
// nodes that have no more children.
for (size_t index = 0; index < element_ids_to_keep.size(); ++index) {
int cur_element_id = element_ids_to_keep[index];
const HTMLElement& element = *(elements_by_id[cur_element_id]);
if (element.has_resource_id()) {
kept_resource_ids.push_back(element.resource_id());
}
for (int child_id : element.child_ids()) {
ids_to_keep.push_back(child_id);
element_ids_to_keep.push_back(child_id);
// Check if each child element has a resource. If so, remember to also
// keep the resource.
const HTMLElement& child_element = *elements_by_id[child_id];
if (child_element.has_resource_id()) {
kept_resource_ids.push_back(child_element.resource_id());
}
}
}
// Sort the list for easier lookup below.
std::sort(ids_to_keep.begin(), ids_to_keep.end());
std::sort(element_ids_to_keep.begin(), element_ids_to_keep.end());
// Now we know which elements we want to keep, scan through |elements| and
// erase anything that we aren't keeping. If an erased element refers to a
// resource then remove it from |resources| as well.
// erase anything that we aren't keeping.
for (auto element_iter = elements->begin();
element_iter != elements->end();) {
const HTMLElement& element = *element_iter->second;
// Delete any elements that we do not want to keep.
if (!base::ContainsValue(ids_to_keep, element.id())) {
if (element.has_resource_id()) {
if (!base::ContainsValue(element_ids_to_keep, element.id())) {
// If this element has a resource then maybe delete the resouce too. Some
// resources may be shared between kept and trimmed elements, and those
// ones should not be deleted.
if (element.has_resource_id() &&
!base::ContainsValue(kept_resource_ids, element.resource_id())) {
const std::string& resource_url =
resource_id_to_url[element.resource_id()];
resources->erase(resource_url);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment