Commit 5ca5f6a4 authored by horo's avatar horo Committed by Commit Bot

Move Google search related util methods to page_load_metrics_util

BUG=

Review-Url: https://codereview.chromium.org/2936543002
Cr-Commit-Position: refs/heads/master@{#478654}
parent c268151b
...@@ -316,135 +316,18 @@ bool WasAbortedBeforeInteraction( ...@@ -316,135 +316,18 @@ bool WasAbortedBeforeInteraction(
} // namespace } // namespace
// See
// https://docs.google.com/document/d/1jNPZ6Aeh0KV6umw1yZrrkfXRfxWNruwu7FELLx_cpOg/edit
// for additional details.
// static
bool FromGWSPageLoadMetricsLogger::IsGoogleSearchHostname(const GURL& url) {
base::Optional<std::string> result =
page_load_metrics::GetGoogleHostnamePrefix(url);
return result && result.value() == "www";
}
// static
bool FromGWSPageLoadMetricsLogger::IsGoogleSearchResultUrl(const GURL& url) {
// NOTE: we do not require 'q=' in the query, as AJAXy search may instead
// store the query in the URL fragment.
if (!IsGoogleSearchHostname(url)) {
return false;
}
if (!QueryContainsComponentPrefix(url.query_piece(), "q=") &&
!QueryContainsComponentPrefix(url.ref_piece(), "q=")) {
return false;
}
const base::StringPiece path = url.path_piece();
return path == "/search" || path == "/webhp" || path == "/custom" ||
path == "/";
}
// static
bool FromGWSPageLoadMetricsLogger::IsGoogleSearchRedirectorUrl(
const GURL& url) {
if (!IsGoogleSearchHostname(url))
return false;
// The primary search redirector. Google search result redirects are
// differentiated from other general google redirects by 'source=web' in the
// query string.
if (url.path_piece() == "/url" && url.has_query() &&
QueryContainsComponent(url.query_piece(), "source=web")) {
return true;
}
// Intent-based navigations from search are redirected through a second
// redirector, which receives its redirect URL in the fragment/hash/ref
// portion of the URL (the portion after '#'). We don't check for the presence
// of certain params in the ref since this redirector is only used for
// redirects from search.
return url.path_piece() == "/searchurl/r.html" && url.has_ref();
}
// static
bool FromGWSPageLoadMetricsLogger::QueryContainsComponent(
const base::StringPiece query,
const base::StringPiece component) {
return QueryContainsComponentHelper(query, component, false);
}
// static
bool FromGWSPageLoadMetricsLogger::QueryContainsComponentPrefix(
const base::StringPiece query,
const base::StringPiece component) {
return QueryContainsComponentHelper(query, component, true);
}
// static
bool FromGWSPageLoadMetricsLogger::QueryContainsComponentHelper(
const base::StringPiece query,
const base::StringPiece component,
bool component_is_prefix) {
if (query.empty() || component.empty() ||
component.length() > query.length()) {
return false;
}
// Verify that the provided query string does not include the query or
// fragment start character, as the logic below depends on this character not
// being included.
DCHECK(query[0] != '?' && query[0] != '#');
// We shouldn't try to find matches beyond the point where there aren't enough
// characters left in query to fully match the component.
const size_t last_search_start = query.length() - component.length();
// We need to search for matches in a loop, rather than stopping at the first
// match, because we may initially match a substring that isn't a full query
// string component. Consider, for instance, the query string 'ab=cd&b=c'. If
// we search for component 'b=c', the first substring match will be characters
// 1-3 (zero-based) in the query string. However, this isn't a full component
// (the full component is ab=cd) so the match will fail. Thus, we must
// continue our search to find the second substring match, which in the
// example is at characters 6-8 (the end of the query string) and is a
// successful component match.
for (size_t start_offset = 0; start_offset <= last_search_start;
start_offset += component.length()) {
start_offset = query.find(component, start_offset);
if (start_offset == std::string::npos) {
// We searched to end of string and did not find a match.
return false;
}
// Verify that the character prior to the component is valid (either we're
// at the beginning of the query string, or are preceded by an ampersand).
if (start_offset != 0 && query[start_offset - 1] != '&') {
continue;
}
if (!component_is_prefix) {
// Verify that the character after the component substring is valid
// (either we're at the end of the query string, or are followed by an
// ampersand).
const size_t after_offset = start_offset + component.length();
if (after_offset < query.length() && query[after_offset] != '&') {
continue;
}
}
return true;
}
return false;
}
FromGWSPageLoadMetricsLogger::FromGWSPageLoadMetricsLogger() {} FromGWSPageLoadMetricsLogger::FromGWSPageLoadMetricsLogger() {}
void FromGWSPageLoadMetricsLogger::SetPreviouslyCommittedUrl(const GURL& url) { void FromGWSPageLoadMetricsLogger::SetPreviouslyCommittedUrl(const GURL& url) {
previously_committed_url_is_search_results_ = IsGoogleSearchResultUrl(url); previously_committed_url_is_search_results_ =
page_load_metrics::IsGoogleSearchResultUrl(url);
previously_committed_url_is_search_redirector_ = previously_committed_url_is_search_redirector_ =
IsGoogleSearchRedirectorUrl(url); page_load_metrics::IsGoogleSearchRedirectorUrl(url);
} }
void FromGWSPageLoadMetricsLogger::SetProvisionalUrl(const GURL& url) { void FromGWSPageLoadMetricsLogger::SetProvisionalUrl(const GURL& url) {
provisional_url_has_search_hostname_ = IsGoogleSearchHostname(url); provisional_url_has_search_hostname_ =
page_load_metrics::IsGoogleSearchHostname(url);
} }
FromGWSPageLoadMetricsObserver::FromGWSPageLoadMetricsObserver() {} FromGWSPageLoadMetricsObserver::FromGWSPageLoadMetricsObserver() {}
...@@ -601,7 +484,7 @@ void FromGWSPageLoadMetricsLogger::OnFailedProvisionalLoad( ...@@ -601,7 +484,7 @@ void FromGWSPageLoadMetricsLogger::OnFailedProvisionalLoad(
bool FromGWSPageLoadMetricsLogger::ShouldLogFailedProvisionalLoadMetrics() { bool FromGWSPageLoadMetricsLogger::ShouldLogFailedProvisionalLoadMetrics() {
// See comment in ShouldLogPostCommitMetrics above the call to // See comment in ShouldLogPostCommitMetrics above the call to
// IsGoogleSearchHostname for more info on this if test. // page_load_metrics::IsGoogleSearchHostname for more info on this if test.
if (provisional_url_has_search_hostname_) if (provisional_url_has_search_hostname_)
return false; return false;
...@@ -621,7 +504,7 @@ bool FromGWSPageLoadMetricsLogger::ShouldLogPostCommitMetrics(const GURL& url) { ...@@ -621,7 +504,7 @@ bool FromGWSPageLoadMetricsLogger::ShouldLogPostCommitMetrics(const GURL& url) {
// these cases are relatively uncommon, and we run the risk of logging metrics // these cases are relatively uncommon, and we run the risk of logging metrics
// for some search redirector URLs. Thus we choose the more conservative // for some search redirector URLs. Thus we choose the more conservative
// approach of ignoring all urls on known search hostnames. // approach of ignoring all urls on known search hostnames.
if (IsGoogleSearchHostname(url)) if (page_load_metrics::IsGoogleSearchHostname(url))
return false; return false;
// We're only interested in tracking navigations (e.g. clicks) initiated via // We're only interested in tracking navigations (e.g. clicks) initiated via
......
...@@ -96,30 +96,12 @@ class FromGWSPageLoadMetricsLogger { ...@@ -96,30 +96,12 @@ class FromGWSPageLoadMetricsLogger {
const page_load_metrics::PageLoadExtraInfo& extra_info); const page_load_metrics::PageLoadExtraInfo& extra_info);
// The methods below are public only for testing. // The methods below are public only for testing.
static bool IsGoogleSearchHostname(const GURL& url);
static bool IsGoogleSearchResultUrl(const GURL& url);
static bool IsGoogleSearchRedirectorUrl(const GURL& url);
bool ShouldLogFailedProvisionalLoadMetrics(); bool ShouldLogFailedProvisionalLoadMetrics();
bool ShouldLogPostCommitMetrics(const GURL& url); bool ShouldLogPostCommitMetrics(const GURL& url);
bool ShouldLogForegroundEventAfterCommit( bool ShouldLogForegroundEventAfterCommit(
const base::Optional<base::TimeDelta>& event, const base::Optional<base::TimeDelta>& event,
const page_load_metrics::PageLoadExtraInfo& info); const page_load_metrics::PageLoadExtraInfo& info);
// Whether the given query string contains the given component. The query
// parameter should contain the query string of a URL (the portion following
// the question mark, excluding the question mark). The component must fully
// match a component in the query string. For example, 'foo=bar' would match
// the query string 'a=b&foo=bar&c=d' but would not match 'a=b&zzzfoo=bar&c=d'
// since, though foo=bar appears in the query string, the key specified in the
// component 'foo' does not match the full key in the query string
// 'zzzfoo'. For QueryContainsComponent, the component should of the form
// 'key=value'. For QueryContainsComponentPrefix, the component should be of
// the form 'key=' (where the value is not specified).
static bool QueryContainsComponent(const base::StringPiece query,
const base::StringPiece component);
static bool QueryContainsComponentPrefix(const base::StringPiece query,
const base::StringPiece component);
private: private:
bool previously_committed_url_is_search_results_ = false; bool previously_committed_url_is_search_results_ = false;
bool previously_committed_url_is_search_redirector_ = false; bool previously_committed_url_is_search_redirector_ = false;
...@@ -134,11 +116,6 @@ class FromGWSPageLoadMetricsLogger { ...@@ -134,11 +116,6 @@ class FromGWSPageLoadMetricsLogger {
// The time of first user interaction after paint from navigation start. // The time of first user interaction after paint from navigation start.
base::Optional<base::TimeDelta> first_user_interaction_after_paint_; base::Optional<base::TimeDelta> first_user_interaction_after_paint_;
// Common helper for QueryContainsComponent and QueryContainsComponentPrefix.
static bool QueryContainsComponentHelper(const base::StringPiece query,
const base::StringPiece component,
bool component_is_prefix);
DISALLOW_COPY_AND_ASSIGN(FromGWSPageLoadMetricsLogger); DISALLOW_COPY_AND_ASSIGN(FromGWSPageLoadMetricsLogger);
}; };
......
...@@ -732,161 +732,6 @@ TEST_F(FromGWSPageLoadMetricsObserverTest, ProvisionalIntent) { ...@@ -732,161 +732,6 @@ TEST_F(FromGWSPageLoadMetricsObserverTest, ProvisionalIntent) {
internal::kHistogramFromGWSAbortCloseBeforeCommit, 0); internal::kHistogramFromGWSAbortCloseBeforeCommit, 0);
} }
TEST_F(FromGWSPageLoadMetricsLoggerTest, IsGoogleSearchHostname) {
struct {
bool expected_result;
const char* url;
} test_cases[] = {
{true, "https://www.google.com/"},
{true, "https://www.google.co.uk/"},
{true, "https://www.google.co.in/"},
{false, "https://other.google.com/"},
{false, "https://other.www.google.com/"},
{false, "https://www.other.google.com/"},
{false, "https://www.www.google.com/"},
{false, "https://www.google.appspot.com/"},
{false, "https://www.google.example.com/"},
// Search results are not served from the bare google.com domain.
{false, "https://google.com/"},
};
for (const auto& test : test_cases) {
EXPECT_EQ(
test.expected_result,
FromGWSPageLoadMetricsLogger::IsGoogleSearchHostname(GURL(test.url)))
<< "for URL: " << test.url;
}
}
TEST_F(FromGWSPageLoadMetricsLoggerTest, IsGoogleSearchResultUrl) {
struct {
bool expected_result;
const char* url;
} test_cases[] = {
{true, "https://www.google.com/#q=test"},
{true, "https://www.google.com/search#q=test"},
{true, "https://www.google.com/search?q=test"},
{true, "https://www.google.com/webhp#q=test"},
{true, "https://www.google.com/webhp?q=test"},
{true, "https://www.google.com/webhp?a=b&q=test"},
{true, "https://www.google.com/webhp?a=b&q=test&c=d"},
{true, "https://www.google.com/webhp#a=b&q=test&c=d"},
{true, "https://www.google.com/webhp?#a=b&q=test&c=d"},
{false, "https://www.google.com/"},
{false, "https://www.google.com/about/"},
{false, "https://other.google.com/"},
{false, "https://other.google.com/webhp?q=test"},
{false, kExampleUrl},
{false, "https://www.example.com/webhp?q=test"},
{false, "https://google.com/#q=test"},
};
for (const auto& test : test_cases) {
EXPECT_EQ(
test.expected_result,
FromGWSPageLoadMetricsLogger::IsGoogleSearchResultUrl(GURL(test.url)))
<< "for URL: " << test.url;
}
}
TEST_F(FromGWSPageLoadMetricsLoggerTest, IsGoogleSearchRedirectorUrl) {
struct {
bool expected_result;
const char* url;
} test_cases[] = {
{true, "https://www.google.com/url?source=web"},
{true, "https://www.google.com/url?source=web#foo"},
{true, "https://www.google.com/searchurl/r.html#foo"},
{true, "https://www.google.com/url?a=b&source=web&c=d"},
{false, "https://www.google.com/?"},
{false, "https://www.google.com/?url"},
{false, "https://www.example.com/url?source=web"},
{false, "https://google.com/url?"},
{false, "https://www.google.com/?source=web"},
{false, "https://www.google.com/source=web"},
{false, "https://www.example.com/url?source=web"},
{false, "https://www.google.com/url?"},
{false, "https://www.google.com/url?a=b"},
};
for (const auto& test : test_cases) {
EXPECT_EQ(test.expected_result,
FromGWSPageLoadMetricsLogger::IsGoogleSearchRedirectorUrl(
GURL(test.url)))
<< "for URL: " << test.url;
}
}
TEST_F(FromGWSPageLoadMetricsLoggerTest, QueryContainsComponent) {
struct {
bool expected_result;
const char* query;
const char* component;
} test_cases[] = {
{true, "a=b", "a=b"},
{true, "a=b&c=d", "a=b"},
{true, "a=b&c=d", "c=d"},
{true, "a=b&c=d&e=f", "c=d"},
{true, "za=b&a=b", "a=b"},
{true, "a=bz&a=b", "a=b"},
{true, "a=ba=b&a=b", "a=b"},
{true, "a=a=a&a=a", "a=a"},
{true, "source=web", "source=web"},
{true, "a=b&source=web", "source=web"},
{true, "a=b&source=web&c=d", "source=web"},
{false, "a=a=a", "a=a"},
{false, "", ""},
{false, "a=b", ""},
{false, "", "a=b"},
{false, "za=b", "a=b"},
{false, "za=bz", "a=b"},
{false, "a=bz", "a=b"},
{false, "za=b&c=d", "a=b"},
{false, "a=b&c=dz", "c=d"},
{false, "a=b&zc=d&e=f", "c=d"},
{false, "a=b&c=dz&e=f", "c=d"},
{false, "a=b&zc=dz&e=f", "c=d"},
{false, "a=b&foosource=web&c=d", "source=web"},
{false, "a=b&source=webbar&c=d", "source=web"},
{false, "a=b&foosource=webbar&c=d", "source=web"},
};
for (const auto& test : test_cases) {
EXPECT_EQ(test.expected_result,
FromGWSPageLoadMetricsLogger::QueryContainsComponent(
test.query, test.component))
<< "For query: " << test.query << " with component: " << test.component;
}
}
TEST_F(FromGWSPageLoadMetricsLoggerTest, QueryContainsComponentPrefix) {
struct {
bool expected_result;
const char* query;
const char* component;
} test_cases[] = {
{true, "a=b", "a="},
{true, "a=b&c=d", "a="},
{true, "a=b&c=d", "c="},
{true, "a=b&c=d&e=f", "c="},
{true, "za=b&a=b", "a="},
{true, "ba=a=b&a=b", "a="},
{true, "q=test", "q="},
{true, "a=b&q=test", "q="},
{true, "q=test&c=d", "q="},
{true, "a=b&q=test&c=d", "q="},
{false, "", ""},
{false, "za=b", "a="},
{false, "za=b&c=d", "a="},
{false, "a=b&zc=d", "c="},
{false, "a=b&zc=d&e=f", "c="},
{false, "a=b&zq=test&c=d", "q="},
{false, "ba=a=b", "a="},
};
for (const auto& test : test_cases) {
EXPECT_EQ(test.expected_result,
FromGWSPageLoadMetricsLogger::QueryContainsComponentPrefix(
test.query, test.component))
<< "For query: " << test.query << " with component: " << test.component;
}
}
TEST_F(FromGWSPageLoadMetricsLoggerTest, Basic) { TEST_F(FromGWSPageLoadMetricsLoggerTest, Basic) {
FromGWSPageLoadMetricsLogger logger; FromGWSPageLoadMetricsLogger logger;
ASSERT_FALSE(logger.ShouldLogPostCommitMetrics(GURL(kExampleUrl))); ASSERT_FALSE(logger.ShouldLogPostCommitMetrics(GURL(kExampleUrl)));
......
...@@ -118,7 +118,7 @@ void ServiceWorkerPageLoadMetricsObserver::OnFirstContentfulPaintInPage( ...@@ -118,7 +118,7 @@ void ServiceWorkerPageLoadMetricsObserver::OnFirstContentfulPaintInPage(
if (!IsServiceWorkerControlled(info)) { if (!IsServiceWorkerControlled(info)) {
if (!WasStartedInForegroundOptionalEventInForeground( if (!WasStartedInForegroundOptionalEventInForeground(
timing.paint_timing->first_contentful_paint, info) || timing.paint_timing->first_contentful_paint, info) ||
!FromGWSPageLoadMetricsLogger::IsGoogleSearchResultUrl(info.url)) { !page_load_metrics::IsGoogleSearchResultUrl(info.url)) {
return; return;
} }
PAGE_LOAD_HISTOGRAM( PAGE_LOAD_HISTOGRAM(
...@@ -153,7 +153,7 @@ void ServiceWorkerPageLoadMetricsObserver::OnFirstContentfulPaintInPage( ...@@ -153,7 +153,7 @@ void ServiceWorkerPageLoadMetricsObserver::OnFirstContentfulPaintInPage(
internal::kHistogramServiceWorkerParseStartToFirstContentfulPaintInbox, internal::kHistogramServiceWorkerParseStartToFirstContentfulPaintInbox,
timing.paint_timing->first_contentful_paint.value() - timing.paint_timing->first_contentful_paint.value() -
timing.parse_timing->parse_start.value()); timing.parse_timing->parse_start.value());
} else if (FromGWSPageLoadMetricsLogger::IsGoogleSearchResultUrl(info.url)) { } else if (page_load_metrics::IsGoogleSearchResultUrl(info.url)) {
PAGE_LOAD_HISTOGRAM( PAGE_LOAD_HISTOGRAM(
internal::kHistogramServiceWorkerFirstContentfulPaintSearch, internal::kHistogramServiceWorkerFirstContentfulPaintSearch,
timing.paint_timing->first_contentful_paint.value()); timing.paint_timing->first_contentful_paint.value());
...@@ -173,7 +173,7 @@ void ServiceWorkerPageLoadMetricsObserver:: ...@@ -173,7 +173,7 @@ void ServiceWorkerPageLoadMetricsObserver::
return; return;
} }
if (!IsServiceWorkerControlled(info)) { if (!IsServiceWorkerControlled(info)) {
if (!FromGWSPageLoadMetricsLogger::IsGoogleSearchResultUrl(info.url)) if (!page_load_metrics::IsGoogleSearchResultUrl(info.url))
return; return;
PAGE_LOAD_HISTOGRAM( PAGE_LOAD_HISTOGRAM(
internal::kHistogramNoServiceWorkerFirstMeaningfulPaintSearch, internal::kHistogramNoServiceWorkerFirstMeaningfulPaintSearch,
...@@ -200,7 +200,7 @@ void ServiceWorkerPageLoadMetricsObserver:: ...@@ -200,7 +200,7 @@ void ServiceWorkerPageLoadMetricsObserver::
internal::kHistogramServiceWorkerParseStartToFirstMeaningfulPaintInbox, internal::kHistogramServiceWorkerParseStartToFirstMeaningfulPaintInbox,
timing.paint_timing->first_meaningful_paint.value() - timing.paint_timing->first_meaningful_paint.value() -
timing.parse_timing->parse_start.value()); timing.parse_timing->parse_start.value());
} else if (FromGWSPageLoadMetricsLogger::IsGoogleSearchResultUrl(info.url)) { } else if (page_load_metrics::IsGoogleSearchResultUrl(info.url)) {
PAGE_LOAD_HISTOGRAM( PAGE_LOAD_HISTOGRAM(
internal::kHistogramServiceWorkerFirstMeaningfulPaintSearch, internal::kHistogramServiceWorkerFirstMeaningfulPaintSearch,
timing.paint_timing->first_meaningful_paint.value()); timing.paint_timing->first_meaningful_paint.value());
...@@ -219,7 +219,7 @@ void ServiceWorkerPageLoadMetricsObserver::OnDomContentLoadedEventStart( ...@@ -219,7 +219,7 @@ void ServiceWorkerPageLoadMetricsObserver::OnDomContentLoadedEventStart(
return; return;
} }
if (!IsServiceWorkerControlled(info)) { if (!IsServiceWorkerControlled(info)) {
if (!FromGWSPageLoadMetricsLogger::IsGoogleSearchResultUrl(info.url)) if (!page_load_metrics::IsGoogleSearchResultUrl(info.url))
return; return;
PAGE_LOAD_HISTOGRAM( PAGE_LOAD_HISTOGRAM(
internal::kHistogramNoServiceWorkerDomContentLoadedSearch, internal::kHistogramNoServiceWorkerDomContentLoadedSearch,
...@@ -233,7 +233,7 @@ void ServiceWorkerPageLoadMetricsObserver::OnDomContentLoadedEventStart( ...@@ -233,7 +233,7 @@ void ServiceWorkerPageLoadMetricsObserver::OnDomContentLoadedEventStart(
PAGE_LOAD_HISTOGRAM( PAGE_LOAD_HISTOGRAM(
internal::kHistogramServiceWorkerDomContentLoadedInbox, internal::kHistogramServiceWorkerDomContentLoadedInbox,
timing.document_timing->dom_content_loaded_event_start.value()); timing.document_timing->dom_content_loaded_event_start.value());
} else if (FromGWSPageLoadMetricsLogger::IsGoogleSearchResultUrl(info.url)) { } else if (page_load_metrics::IsGoogleSearchResultUrl(info.url)) {
PAGE_LOAD_HISTOGRAM( PAGE_LOAD_HISTOGRAM(
internal::kHistogramServiceWorkerDomContentLoadedSearch, internal::kHistogramServiceWorkerDomContentLoadedSearch,
timing.document_timing->dom_content_loaded_event_start.value()); timing.document_timing->dom_content_loaded_event_start.value());
...@@ -247,7 +247,7 @@ void ServiceWorkerPageLoadMetricsObserver::OnLoadEventStart( ...@@ -247,7 +247,7 @@ void ServiceWorkerPageLoadMetricsObserver::OnLoadEventStart(
timing.document_timing->load_event_start, info)) timing.document_timing->load_event_start, info))
return; return;
if (!IsServiceWorkerControlled(info)) { if (!IsServiceWorkerControlled(info)) {
if (!FromGWSPageLoadMetricsLogger::IsGoogleSearchResultUrl(info.url)) if (!page_load_metrics::IsGoogleSearchResultUrl(info.url))
return; return;
PAGE_LOAD_HISTOGRAM(internal::kHistogramNoServiceWorkerLoadSearch, PAGE_LOAD_HISTOGRAM(internal::kHistogramNoServiceWorkerLoadSearch,
timing.document_timing->load_event_start.value()); timing.document_timing->load_event_start.value());
...@@ -258,7 +258,7 @@ void ServiceWorkerPageLoadMetricsObserver::OnLoadEventStart( ...@@ -258,7 +258,7 @@ void ServiceWorkerPageLoadMetricsObserver::OnLoadEventStart(
if (IsInboxSite(info.url)) { if (IsInboxSite(info.url)) {
PAGE_LOAD_HISTOGRAM(internal::kHistogramServiceWorkerLoadInbox, PAGE_LOAD_HISTOGRAM(internal::kHistogramServiceWorkerLoadInbox,
timing.document_timing->load_event_start.value()); timing.document_timing->load_event_start.value());
} else if (FromGWSPageLoadMetricsLogger::IsGoogleSearchResultUrl(info.url)) { } else if (page_load_metrics::IsGoogleSearchResultUrl(info.url)) {
PAGE_LOAD_HISTOGRAM(internal::kHistogramServiceWorkerLoadSearch, PAGE_LOAD_HISTOGRAM(internal::kHistogramServiceWorkerLoadSearch,
timing.document_timing->load_event_start.value()); timing.document_timing->load_event_start.value());
} }
......
...@@ -41,6 +41,59 @@ PageAbortReason GetAbortReasonForEndReason(PageEndReason end_reason) { ...@@ -41,6 +41,59 @@ PageAbortReason GetAbortReasonForEndReason(PageEndReason end_reason) {
} }
} }
// Common helper for QueryContainsComponent and QueryContainsComponentPrefix.
bool QueryContainsComponentHelper(const base::StringPiece query,
const base::StringPiece component,
bool component_is_prefix) {
if (query.empty() || component.empty() ||
component.length() > query.length()) {
return false;
}
// Verify that the provided query string does not include the query or
// fragment start character, as the logic below depends on this character not
// being included.
DCHECK(query[0] != '?' && query[0] != '#');
// We shouldn't try to find matches beyond the point where there aren't enough
// characters left in query to fully match the component.
const size_t last_search_start = query.length() - component.length();
// We need to search for matches in a loop, rather than stopping at the first
// match, because we may initially match a substring that isn't a full query
// string component. Consider, for instance, the query string 'ab=cd&b=c'. If
// we search for component 'b=c', the first substring match will be characters
// 1-3 (zero-based) in the query string. However, this isn't a full component
// (the full component is ab=cd) so the match will fail. Thus, we must
// continue our search to find the second substring match, which in the
// example is at characters 6-8 (the end of the query string) and is a
// successful component match.
for (size_t start_offset = 0; start_offset <= last_search_start;
start_offset += component.length()) {
start_offset = query.find(component, start_offset);
if (start_offset == std::string::npos) {
// We searched to end of string and did not find a match.
return false;
}
// Verify that the character prior to the component is valid (either we're
// at the beginning of the query string, or are preceded by an ampersand).
if (start_offset != 0 && query[start_offset - 1] != '&') {
continue;
}
if (!component_is_prefix) {
// Verify that the character after the component substring is valid
// (either we're at the end of the query string, or are followed by an
// ampersand).
const size_t after_offset = start_offset + component.length();
if (after_offset < query.length() && query[after_offset] != '&') {
continue;
}
}
return true;
}
return false;
}
} // namespace } // namespace
bool WasStartedInForegroundOptionalEventInForeground( bool WasStartedInForegroundOptionalEventInForeground(
...@@ -110,4 +163,57 @@ bool DidObserveLoadingBehaviorInAnyFrame( ...@@ -110,4 +163,57 @@ bool DidObserveLoadingBehaviorInAnyFrame(
return (all_frame_loading_behavior_flags & behavior) != 0; return (all_frame_loading_behavior_flags & behavior) != 0;
} }
bool IsGoogleSearchHostname(const GURL& url) {
base::Optional<std::string> result =
page_load_metrics::GetGoogleHostnamePrefix(url);
return result && result.value() == "www";
}
bool IsGoogleSearchResultUrl(const GURL& url) {
// NOTE: we do not require 'q=' in the query, as AJAXy search may instead
// store the query in the URL fragment.
if (!IsGoogleSearchHostname(url)) {
return false;
}
if (!QueryContainsComponentPrefix(url.query_piece(), "q=") &&
!QueryContainsComponentPrefix(url.ref_piece(), "q=")) {
return false;
}
const base::StringPiece path = url.path_piece();
return path == "/search" || path == "/webhp" || path == "/custom" ||
path == "/";
}
bool IsGoogleSearchRedirectorUrl(const GURL& url) {
if (!IsGoogleSearchHostname(url))
return false;
// The primary search redirector. Google search result redirects are
// differentiated from other general google redirects by 'source=web' in the
// query string.
if (url.path_piece() == "/url" && url.has_query() &&
QueryContainsComponent(url.query_piece(), "source=web")) {
return true;
}
// Intent-based navigations from search are redirected through a second
// redirector, which receives its redirect URL in the fragment/hash/ref
// portion of the URL (the portion after '#'). We don't check for the presence
// of certain params in the ref since this redirector is only used for
// redirects from search.
return url.path_piece() == "/searchurl/r.html" && url.has_ref();
}
bool QueryContainsComponent(const base::StringPiece query,
const base::StringPiece component) {
return QueryContainsComponentHelper(query, component, false);
}
bool QueryContainsComponentPrefix(const base::StringPiece query,
const base::StringPiece component) {
return QueryContainsComponentHelper(query, component, true);
}
} // namespace page_load_metrics } // namespace page_load_metrics
...@@ -134,6 +134,42 @@ bool DidObserveLoadingBehaviorInAnyFrame( ...@@ -134,6 +134,42 @@ bool DidObserveLoadingBehaviorInAnyFrame(
const page_load_metrics::PageLoadExtraInfo& info, const page_load_metrics::PageLoadExtraInfo& info,
blink::WebLoadingBehaviorFlag behavior); blink::WebLoadingBehaviorFlag behavior);
// Whether the given url has a Google Search hostname.
// Examples:
// https://www.google.com -> true
// https://www.google.co.jp -> true
// https://www.google.example.com -> false
// https://docs.google.com -> false
bool IsGoogleSearchHostname(const GURL& url);
// Whether the given url is for a Google Search results page. See
// https://docs.google.com/document/d/1jNPZ6Aeh0KV6umw1yZrrkfXRfxWNruwu7FELLx_cpOg/edit
// for additional details.
// Examples:
// https://www.google.com/#q=test -> true
// https://www.google.com/search?q=test -> true
// https://www.google.com/ -> false
// https://www.google.com/about/ -> false
bool IsGoogleSearchResultUrl(const GURL& url);
// Whether the given url is a Google Search redirector URL.
bool IsGoogleSearchRedirectorUrl(const GURL& url);
// Whether the given query string contains the given component. The query
// parameter should contain the query string of a URL (the portion following
// the question mark, excluding the question mark). The component must fully
// match a component in the query string. For example, 'foo=bar' would match
// the query string 'a=b&foo=bar&c=d' but would not match 'a=b&zzzfoo=bar&c=d'
// since, though foo=bar appears in the query string, the key specified in the
// component 'foo' does not match the full key in the query string
// 'zzzfoo'. For QueryContainsComponent, the component should of the form
// 'key=value'. For QueryContainsComponentPrefix, the component should be of
// the form 'key=' (where the value is not specified).
bool QueryContainsComponent(const base::StringPiece query,
const base::StringPiece component);
bool QueryContainsComponentPrefix(const base::StringPiece query,
const base::StringPiece component);
} // namespace page_load_metrics } // namespace page_load_metrics
#endif // CHROME_BROWSER_PAGE_LOAD_METRICS_PAGE_LOAD_METRICS_UTIL_H_ #endif // CHROME_BROWSER_PAGE_LOAD_METRICS_PAGE_LOAD_METRICS_UTIL_H_
...@@ -78,3 +78,154 @@ TEST_F(PageLoadMetricsUtilTest, GetGoogleHostnamePrefix) { ...@@ -78,3 +78,154 @@ TEST_F(PageLoadMetricsUtilTest, GetGoogleHostnamePrefix) {
} }
} }
} }
TEST_F(PageLoadMetricsUtilTest, IsGoogleSearchHostname) {
struct {
bool expected_result;
const char* url;
} test_cases[] = {
{true, "https://www.google.com/"},
{true, "https://www.google.co.uk/"},
{true, "https://www.google.co.in/"},
{false, "https://other.google.com/"},
{false, "https://other.www.google.com/"},
{false, "https://www.other.google.com/"},
{false, "https://www.www.google.com/"},
{false, "https://www.google.appspot.com/"},
{false, "https://www.google.example.com/"},
// Search results are not served from the bare google.com domain.
{false, "https://google.com/"},
};
for (const auto& test : test_cases) {
EXPECT_EQ(test.expected_result,
page_load_metrics::IsGoogleSearchHostname(GURL(test.url)))
<< "for URL: " << test.url;
}
}
TEST_F(PageLoadMetricsUtilTest, IsGoogleSearchResultUrl) {
struct {
bool expected_result;
const char* url;
} test_cases[] = {
{true, "https://www.google.com/#q=test"},
{true, "https://www.google.com/search#q=test"},
{true, "https://www.google.com/search?q=test"},
{true, "https://www.google.com/webhp#q=test"},
{true, "https://www.google.com/webhp?q=test"},
{true, "https://www.google.com/webhp?a=b&q=test"},
{true, "https://www.google.com/webhp?a=b&q=test&c=d"},
{true, "https://www.google.com/webhp#a=b&q=test&c=d"},
{true, "https://www.google.com/webhp?#a=b&q=test&c=d"},
{false, "https://www.google.com/"},
{false, "https://www.google.com/about/"},
{false, "https://other.google.com/"},
{false, "https://other.google.com/webhp?q=test"},
{false, "http://www.example.com/"},
{false, "https://www.example.com/webhp?q=test"},
{false, "https://google.com/#q=test"},
};
for (const auto& test : test_cases) {
EXPECT_EQ(test.expected_result,
page_load_metrics::IsGoogleSearchResultUrl(GURL(test.url)))
<< "for URL: " << test.url;
}
}
TEST_F(PageLoadMetricsUtilTest, IsGoogleSearchRedirectorUrl) {
struct {
bool expected_result;
const char* url;
} test_cases[] = {
{true, "https://www.google.com/url?source=web"},
{true, "https://www.google.com/url?source=web#foo"},
{true, "https://www.google.com/searchurl/r.html#foo"},
{true, "https://www.google.com/url?a=b&source=web&c=d"},
{false, "https://www.google.com/?"},
{false, "https://www.google.com/?url"},
{false, "https://www.example.com/url?source=web"},
{false, "https://google.com/url?"},
{false, "https://www.google.com/?source=web"},
{false, "https://www.google.com/source=web"},
{false, "https://www.example.com/url?source=web"},
{false, "https://www.google.com/url?"},
{false, "https://www.google.com/url?a=b"},
};
for (const auto& test : test_cases) {
EXPECT_EQ(test.expected_result,
page_load_metrics::IsGoogleSearchRedirectorUrl(GURL(test.url)))
<< "for URL: " << test.url;
}
}
TEST_F(PageLoadMetricsUtilTest, QueryContainsComponent) {
struct {
bool expected_result;
const char* query;
const char* component;
} test_cases[] = {
{true, "a=b", "a=b"},
{true, "a=b&c=d", "a=b"},
{true, "a=b&c=d", "c=d"},
{true, "a=b&c=d&e=f", "c=d"},
{true, "za=b&a=b", "a=b"},
{true, "a=bz&a=b", "a=b"},
{true, "a=ba=b&a=b", "a=b"},
{true, "a=a=a&a=a", "a=a"},
{true, "source=web", "source=web"},
{true, "a=b&source=web", "source=web"},
{true, "a=b&source=web&c=d", "source=web"},
{false, "a=a=a", "a=a"},
{false, "", ""},
{false, "a=b", ""},
{false, "", "a=b"},
{false, "za=b", "a=b"},
{false, "za=bz", "a=b"},
{false, "a=bz", "a=b"},
{false, "za=b&c=d", "a=b"},
{false, "a=b&c=dz", "c=d"},
{false, "a=b&zc=d&e=f", "c=d"},
{false, "a=b&c=dz&e=f", "c=d"},
{false, "a=b&zc=dz&e=f", "c=d"},
{false, "a=b&foosource=web&c=d", "source=web"},
{false, "a=b&source=webbar&c=d", "source=web"},
{false, "a=b&foosource=webbar&c=d", "source=web"},
};
for (const auto& test : test_cases) {
EXPECT_EQ(test.expected_result, page_load_metrics::QueryContainsComponent(
test.query, test.component))
<< "For query: " << test.query << " with component: " << test.component;
}
}
TEST_F(PageLoadMetricsUtilTest, QueryContainsComponentPrefix) {
struct {
bool expected_result;
const char* query;
const char* component;
} test_cases[] = {
{true, "a=b", "a="},
{true, "a=b&c=d", "a="},
{true, "a=b&c=d", "c="},
{true, "a=b&c=d&e=f", "c="},
{true, "za=b&a=b", "a="},
{true, "ba=a=b&a=b", "a="},
{true, "q=test", "q="},
{true, "a=b&q=test", "q="},
{true, "q=test&c=d", "q="},
{true, "a=b&q=test&c=d", "q="},
{false, "", ""},
{false, "za=b", "a="},
{false, "za=b&c=d", "a="},
{false, "a=b&zc=d", "c="},
{false, "a=b&zc=d&e=f", "c="},
{false, "a=b&zq=test&c=d", "q="},
{false, "ba=a=b", "a="},
};
for (const auto& test : test_cases) {
EXPECT_EQ(test.expected_result,
page_load_metrics::QueryContainsComponentPrefix(test.query,
test.component))
<< "For query: " << test.query << " with component: " << test.component;
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment