Commit f5e07b74 authored by jialiul's avatar jialiul Committed by Commit bot

In identifying download referrer chain, if the subframe url is not

available (e.g. empty), we should use main frame url to search for
previous navigation.

This will help us correctly identify referrer of downloads that
happen in sub-frame.
For example, page A has a link to page B, and page B has a
subframe C, and subframe C links to download D.

A --->  B
        \iframe C -----> D
If we only look at the frame D's in, we can only reach C in
attribution. But in fact it should be D-C-B-A.

BUG=639467

Review-Url: https://codereview.chromium.org/2612663002
Cr-Commit-Position: refs/heads/master@{#441454}
parent be820b8e
...@@ -1054,11 +1054,10 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest, ...@@ -1054,11 +1054,10 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest,
// Click a link in a subframe and start download. // Click a link in a subframe and start download.
IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest, IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest,
SubFrameDirectDownload) { SubFrameDirectDownload) {
ui_test_utils::NavigateToURL( GURL initial_url = embedded_test_server()->GetURL(kSingleFrameTestURL);
browser(), embedded_test_server()->GetURL(kMultiFrameTestURL)); ClickTestLink("sub_frame_download_attribution", 1, initial_url);
std::string test_name = std::string test_name =
base::StringPrintf("%s', '%s", "iframe1", "iframe_direct_download"); base::StringPrintf("%s', '%s", "iframe1", "iframe_direct_download");
GURL initial_url = embedded_test_server()->GetURL(kSingleFrameTestURL);
GURL multi_frame_test_url = GURL multi_frame_test_url =
embedded_test_server()->GetURL(kMultiFrameTestURL); embedded_test_server()->GetURL(kMultiFrameTestURL);
GURL iframe_url = embedded_test_server()->GetURL(kIframeDirectDownloadURL); GURL iframe_url = embedded_test_server()->GetURL(kIframeDirectDownloadURL);
...@@ -1083,8 +1082,8 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest, ...@@ -1083,8 +1082,8 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest,
true, // has_committed true, // has_committed
false, // has_server_redirect false, // has_server_redirect
nav_map->at(initial_url).at(0)); nav_map->at(initial_url).at(0));
VerifyNavigationEvent(GURL(), // source_url VerifyNavigationEvent(initial_url, // source_url
GURL(), // source_main_frame_url initial_url, // source_main_frame_url
multi_frame_test_url, // original_request_url multi_frame_test_url, // original_request_url
multi_frame_test_url, // destination_url multi_frame_test_url, // destination_url
true, // is_user_initiated, true, // is_user_initiated,
...@@ -1118,7 +1117,7 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest, ...@@ -1118,7 +1117,7 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest,
VerifyHostToIpMap(); VerifyHostToIpMap();
auto referrer_chain = IdentifyReferrerChain(GetDownload()); auto referrer_chain = IdentifyReferrerChain(GetDownload());
ASSERT_EQ(2U, referrer_chain.size()); ASSERT_EQ(4U, referrer_chain.size());
VerifyReferrerChainEntry(download_url, // url VerifyReferrerChainEntry(download_url, // url
ReferrerChainEntry::DOWNLOAD_URL, // type ReferrerChainEntry::DOWNLOAD_URL, // type
test_server_ip, // ip_address test_server_ip, // ip_address
...@@ -1133,16 +1132,29 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest, ...@@ -1133,16 +1132,29 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest,
multi_frame_test_url, // referrer_main_frame_url multi_frame_test_url, // referrer_main_frame_url
false, // is_retargeting false, // is_retargeting
referrer_chain[1]); referrer_chain[1]);
VerifyReferrerChainEntry(multi_frame_test_url, // url
ReferrerChainEntry::CLIENT_REDIRECT, // type
test_server_ip, // ip_address
initial_url, // referrer_url
initial_url, // referrer_main_frame_url
false, // is_retargeting
referrer_chain[2]);
VerifyReferrerChainEntry(initial_url, // url
ReferrerChainEntry::LANDING_REFERRER, // type
test_server_ip, // ip_address
GURL(), // referrer_url
GURL(), // referrer_main_frame_url
false, // is_retargeting
referrer_chain[3]);
} }
// Click a link in a subframe and open download in a new tab. // Click a link in a subframe and open download in a new tab.
IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest, IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest,
SubFrameNewTabDownload) { SubFrameNewTabDownload) {
ui_test_utils::NavigateToURL( GURL initial_url = embedded_test_server()->GetURL(kSingleFrameTestURL);
browser(), embedded_test_server()->GetURL(kMultiFrameTestURL)); ClickTestLink("sub_frame_download_attribution", 1, initial_url);
std::string test_name = std::string test_name =
base::StringPrintf("%s', '%s", "iframe2", "iframe_new_tab_download"); base::StringPrintf("%s', '%s", "iframe2", "iframe_new_tab_download");
GURL initial_url = embedded_test_server()->GetURL(kSingleFrameTestURL);
GURL multi_frame_test_url = GURL multi_frame_test_url =
embedded_test_server()->GetURL(kMultiFrameTestURL); embedded_test_server()->GetURL(kMultiFrameTestURL);
GURL iframe_url = embedded_test_server()->GetURL(kIframeDirectDownloadURL); GURL iframe_url = embedded_test_server()->GetURL(kIframeDirectDownloadURL);
...@@ -1154,7 +1166,7 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest, ...@@ -1154,7 +1166,7 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest,
std::string test_server_ip(embedded_test_server()->host_port_pair().host()); std::string test_server_ip(embedded_test_server()->host_port_pair().host());
auto nav_map = navigation_map(); auto nav_map = navigation_map();
ASSERT_TRUE(nav_map); ASSERT_TRUE(nav_map);
ASSERT_EQ(std::size_t(6), nav_map->size()); ASSERT_EQ(6U, nav_map->size());
ASSERT_EQ(1U, nav_map->at(multi_frame_test_url).size()); ASSERT_EQ(1U, nav_map->at(multi_frame_test_url).size());
ASSERT_EQ(1U, nav_map->at(iframe_url).size()); ASSERT_EQ(1U, nav_map->at(iframe_url).size());
ASSERT_EQ(1U, nav_map->at(iframe_retargeting_url).size()); ASSERT_EQ(1U, nav_map->at(iframe_retargeting_url).size());
...@@ -1169,8 +1181,8 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest, ...@@ -1169,8 +1181,8 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest,
true, // has_committed true, // has_committed
false, // has_server_redirect false, // has_server_redirect
nav_map->at(initial_url).at(0)); nav_map->at(initial_url).at(0));
VerifyNavigationEvent(GURL(), // source_url VerifyNavigationEvent(initial_url, // source_url
GURL(), // source_main_frame_url initial_url, // source_main_frame_url
multi_frame_test_url, // original_request_url multi_frame_test_url, // original_request_url
multi_frame_test_url, // destination_url multi_frame_test_url, // destination_url
true, // is_user_initiated, true, // is_user_initiated,
...@@ -1220,7 +1232,7 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest, ...@@ -1220,7 +1232,7 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest,
VerifyHostToIpMap(); VerifyHostToIpMap();
auto referrer_chain = IdentifyReferrerChain(GetDownload()); auto referrer_chain = IdentifyReferrerChain(GetDownload());
EXPECT_EQ(std::size_t(3), referrer_chain.size()); EXPECT_EQ(5U, referrer_chain.size());
VerifyReferrerChainEntry(download_url, // url VerifyReferrerChainEntry(download_url, // url
ReferrerChainEntry::DOWNLOAD_URL, // type ReferrerChainEntry::DOWNLOAD_URL, // type
test_server_ip, // ip_address test_server_ip, // ip_address
...@@ -1242,6 +1254,20 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest, ...@@ -1242,6 +1254,20 @@ IN_PROC_BROWSER_TEST_F(SBNavigationObserverBrowserTest,
multi_frame_test_url, // referrer_main_frame_url multi_frame_test_url, // referrer_main_frame_url
false, // is_retargeting false, // is_retargeting
referrer_chain[2]); referrer_chain[2]);
VerifyReferrerChainEntry(multi_frame_test_url, // url
ReferrerChainEntry::CLIENT_REDIRECT, // type
test_server_ip, // ip_address
initial_url, // referrer_url
initial_url, // referrer_main_frame_url
false, // is_retargeting
referrer_chain[3]);
VerifyReferrerChainEntry(initial_url, // url
ReferrerChainEntry::LANDING_REFERRER, // type
test_server_ip, // ip_address
GURL(), // referrer_url
GURL(), // referrer_main_frame_url
false, // is_retargeting
referrer_chain[4]);
} }
// Click a link which redirects to the landing page, and then click on the // Click a link which redirects to the landing page, and then click on the
......
...@@ -164,7 +164,10 @@ SafeBrowsingNavigationObserverManager::IdentifyReferrerChain( ...@@ -164,7 +164,10 @@ SafeBrowsingNavigationObserverManager::IdentifyReferrerChain(
if (!target_url.is_valid()) if (!target_url.is_valid())
return INVALID_URL; return INVALID_URL;
NavigationEvent* nav_event = FindNavigationEvent(target_url, target_tab_id); NavigationEvent* nav_event = FindNavigationEvent(
target_url,
GURL(),
target_tab_id);
if (!nav_event) { if (!nav_event) {
// We cannot find a single navigation event related to this download. // We cannot find a single navigation event related to this download.
return NAVIGATION_EVENT_NOT_FOUND; return NAVIGATION_EVENT_NOT_FOUND;
...@@ -178,7 +181,9 @@ SafeBrowsingNavigationObserverManager::IdentifyReferrerChain( ...@@ -178,7 +181,9 @@ SafeBrowsingNavigationObserverManager::IdentifyReferrerChain(
// Back trace to the next nav_event that was initiated by the user. // Back trace to the next nav_event that was initiated by the user.
while (!nav_event->is_user_initiated) { while (!nav_event->is_user_initiated) {
nav_event = nav_event =
FindNavigationEvent(nav_event->source_url, nav_event->source_tab_id); FindNavigationEvent(nav_event->source_url,
nav_event->source_main_frame_url,
nav_event->source_tab_id);
if (!nav_event) if (!nav_event)
return result; return result;
AddToReferrerChain(out_referrer_chain, nav_event, AddToReferrerChain(out_referrer_chain, nav_event,
...@@ -200,7 +205,9 @@ SafeBrowsingNavigationObserverManager::IdentifyReferrerChain( ...@@ -200,7 +205,9 @@ SafeBrowsingNavigationObserverManager::IdentifyReferrerChain(
} }
nav_event = nav_event =
FindNavigationEvent(nav_event->source_url, nav_event->source_tab_id); FindNavigationEvent(nav_event->source_url,
nav_event->source_main_frame_url,
nav_event->source_tab_id);
if (!nav_event) if (!nav_event)
return result; return result;
...@@ -344,8 +351,17 @@ void SafeBrowsingNavigationObserverManager::ScheduleNextCleanUpAfterInterval( ...@@ -344,8 +351,17 @@ void SafeBrowsingNavigationObserverManager::ScheduleNextCleanUpAfterInterval(
NavigationEvent* SafeBrowsingNavigationObserverManager::FindNavigationEvent( NavigationEvent* SafeBrowsingNavigationObserverManager::FindNavigationEvent(
const GURL& target_url, const GURL& target_url,
const GURL& target_main_frame_url,
int target_tab_id) { int target_tab_id) {
auto it = navigation_map_.find(target_url); if (target_url.is_empty() && target_main_frame_url.is_empty())
return nullptr;
// If target_url is empty, we should back trace navigation based on its
// main frame URL instead.
const GURL& search_url =
target_url.is_empty() ? target_main_frame_url : target_url;
auto it = navigation_map_.find(search_url);
if (it == navigation_map_.end()) { if (it == navigation_map_.end()) {
return nullptr; return nullptr;
} }
...@@ -353,7 +369,7 @@ NavigationEvent* SafeBrowsingNavigationObserverManager::FindNavigationEvent( ...@@ -353,7 +369,7 @@ NavigationEvent* SafeBrowsingNavigationObserverManager::FindNavigationEvent(
// the vector in reverse order to get the latest match. // the vector in reverse order to get the latest match.
for (auto rit = it->second.rbegin(); rit != it->second.rend(); ++rit) { for (auto rit = it->second.rbegin(); rit != it->second.rend(); ++rit) {
// If tab id is not valid, we only compare url, otherwise we compare both. // If tab id is not valid, we only compare url, otherwise we compare both.
if (rit->destination_url == target_url && if (rit->destination_url == search_url &&
(target_tab_id == -1 || rit->target_tab_id == target_tab_id)) { (target_tab_id == -1 || rit->target_tab_id == target_tab_id)) {
// If both source_url and source_main_frame_url are empty, and this // If both source_url and source_main_frame_url are empty, and this
// navigation is not triggered by user, a retargeting navigation probably // navigation is not triggered by user, a retargeting navigation probably
......
...@@ -139,9 +139,14 @@ class SafeBrowsingNavigationObserverManager ...@@ -139,9 +139,14 @@ class SafeBrowsingNavigationObserverManager
void ScheduleNextCleanUpAfterInterval(base::TimeDelta interval); void ScheduleNextCleanUpAfterInterval(base::TimeDelta interval);
// Find the most recent navigation event that navigated to |target_url| in the // Find the most recent navigation event that navigated to |target_url| and
// tab with ID |target_tab_id|. If |target_tab_id| is not available (-1), we // its associated |target_main_frame_url| in the tab with ID |target_tab_id|.
// look for all tabs for the most recent navigation to |target_url|. // If navigation happened in the main frame, |target_url| and |target_main_
// frame_url| are the same.
// If |target_url| is empty, we use its main frame url (a.k.a.
// |target_main_frame_url|) to search for navigation events.
// If |target_tab_id| is not available (-1), we look for all tabs for the most
// recent navigation to |target_url| or |target_main_frame_url|.
// For some cases, the most recent navigation to |target_url| may not be // For some cases, the most recent navigation to |target_url| may not be
// relevant. // relevant.
// For example, url1 in window A opens url2 in window B, url1 then opens an // For example, url1 in window A opens url2 in window B, url1 then opens an
...@@ -156,6 +161,7 @@ class SafeBrowsingNavigationObserverManager ...@@ -156,6 +161,7 @@ class SafeBrowsingNavigationObserverManager
// However, it does not prevent us to attribute url1 in Window A as the cause // However, it does not prevent us to attribute url1 in Window A as the cause
// of all these navigations. // of all these navigations.
NavigationEvent* FindNavigationEvent(const GURL& target_url, NavigationEvent* FindNavigationEvent(const GURL& target_url,
const GURL& target_main_frame_url,
int target_tab_id); int target_tab_id);
void AddToReferrerChain(std::vector<ReferrerChainEntry>* referrer_chain, void AddToReferrerChain(std::vector<ReferrerChainEntry>* referrer_chain,
...@@ -168,8 +174,8 @@ class SafeBrowsingNavigationObserverManager ...@@ -168,8 +174,8 @@ class SafeBrowsingNavigationObserverManager
// original target url. Since the same url can be requested multiple times // original target url. Since the same url can be requested multiple times
// across different tabs and frames, the value of this map is a vector of // across different tabs and frames, the value of this map is a vector of
// NavigationEvent ordered by navigation finish time. // NavigationEvent ordered by navigation finish time.
// TODO(jialiul): Entries in navigation_map_ will be removed if they are older // Entries in navigation_map_ will be removed if they are older than 2 minutes
// than 2 minutes since their corresponding navigations finish. // since their corresponding navigations finish.
NavigationMap navigation_map_; NavigationMap navigation_map_;
// user_gesture_map_ keeps track of the timestamp of last user gesture in // user_gesture_map_ keeps track of the timestamp of last user gesture in
......
...@@ -119,6 +119,10 @@ ...@@ -119,6 +119,10 @@
Download via HTML5 file system API Download via HTML5 file system API
</a><br> </a><br>
<a id="sub_frame_download_attribution" href="navigation_observer_multi_frame_tests.html">
Download in subframe.
</a><br>
<a id="complete_referrer_chain" href="redirect_to_landing.html"> <a id="complete_referrer_chain" href="redirect_to_landing.html">
Click on landing referrer and landing page then reach download Click on landing referrer and landing page then reach download
</a><br> </a><br>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment