Commit 3f89d55f authored by stevet@chromium.org's avatar stevet@chromium.org

Add declarations and helpers for Google Search counting.

We want to count the number of searches from various access points in Chrome. To do this, we've added a new UMA enumerated histogram to track these values (GoogleSearch.AccessPoint). These are the necessary declarations for various search access points that will be added shortly.

We've also added a IsGoogleSearchUrl helper to google_util to identify search URLs.

BUG=
TEST=Ensure that unit tests GoogleUtilTest all succeed.


Review URL: http://codereview.chromium.org/9452039

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@124587 0039d316-1c4b-4281-b951-d872f2087c98
parent fa931bbb
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/google/google_search_metrics.h"
#include "base/logging.h"
#include "base/metrics/histogram.h"
#include "chrome/browser/metrics/metrics_service.h"
GoogleSearchMetrics::GoogleSearchMetrics() {
}
GoogleSearchMetrics::~GoogleSearchMetrics() {
}
void GoogleSearchMetrics::RecordGoogleSearch(AccessPoint ap) const {
DCHECK_NE(AP_BOUNDARY, ap);
UMA_HISTOGRAM_ENUMERATION("GoogleSearch.AccessPoint", ap, AP_BOUNDARY);
}
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_GOOGLE_GOOGLE_SEARCH_METRICS_H_
#define CHROME_BROWSER_GOOGLE_GOOGLE_SEARCH_METRICS_H_
// A thin helper class used by parties interested in reporting Google search
// metrics (mostly counts of searches from different access points). This class
// partly exists to make testing easier.
class GoogleSearchMetrics {
public:
// Various Google Search access points, to be used with UMA enumeration
// histograms.
enum AccessPoint {
AP_OMNIBOX,
AP_OMNIBOX_INSTANT,
AP_DIRECT_NAV,
AP_DIRECT_NAV_INSTANT,
AP_HOME_PAGE,
AP_HOME_PAGE_INSTANT,
AP_SEARCH_APP,
AP_SEARCH_APP_INSTANT,
AP_BOUNDARY,
};
GoogleSearchMetrics();
virtual ~GoogleSearchMetrics();
// Record a single Google search from source |ap|.
virtual void RecordGoogleSearch(AccessPoint ap) const;
};
#endif // CHROME_BROWSER_GOOGLE_GOOGLE_SEARCH_METRICS_H_
...@@ -5,9 +5,11 @@ ...@@ -5,9 +5,11 @@
#include "chrome/browser/google/google_util.h" #include "chrome/browser/google/google_util.h"
#include <string> #include <string>
#include <vector>
#include "base/command_line.h" #include "base/command_line.h"
#include "base/string16.h" #include "base/string16.h"
#include "base/string_split.h"
#include "base/string_util.h" #include "base/string_util.h"
#include "base/utf_string_conversions.h" #include "base/utf_string_conversions.h"
#include "chrome/browser/browser_process.h" #include "chrome/browser/browser_process.h"
...@@ -26,6 +28,51 @@ namespace { ...@@ -26,6 +28,51 @@ namespace {
const char* brand_for_testing = NULL; const char* brand_for_testing = NULL;
// True iff |str| contains a "q=" query parameter with a non-empty value.
// |str| should be a URL parameter or a hash fragment, without the ? or # (as
// returned by GURL::query() or GURL::ref().
bool HasQueryParameter(const std::string& str) {
std::vector<std::string> parameters;
base::SplitString(str, '&', &parameters);
for (std::vector<std::string>::const_iterator itr = parameters.begin();
itr != parameters.end();
++itr) {
if (StartsWithASCII(*itr, "q=", false) && itr->size() > 2)
return true;
}
return false;
}
// True if |url| is an HTTP[S] request with host "[www.]google.<TLD>".
bool IsGoogleDomainUrl(const GURL& url) {
if (!url.is_valid())
return false;
// Make sure the scheme is valid.
if (!url.SchemeIs("http") && !url.SchemeIs("https"))
return false;
// Make sure port is default for the respective scheme.
if (!url.port().empty())
return false;
// Accept only valid TLD.
size_t tld_length = net::RegistryControlledDomainService::GetRegistryLength(
url, false);
if (tld_length == 0 || tld_length == std::string::npos)
return false;
// We only accept "[www.]google." in front of the TLD.
std::string host = url.host();
host = host.substr(0, host.length() - tld_length);
if (!LowerCaseEqualsASCII(host, "www.google.") &&
!LowerCaseEqualsASCII(host, "google."))
return false;
return true;
}
} // anonymous namespace } // anonymous namespace
namespace google_util { namespace google_util {
...@@ -119,39 +166,49 @@ bool GetReactivationBrand(std::string* brand) { ...@@ -119,39 +166,49 @@ bool GetReactivationBrand(std::string* brand) {
bool IsGoogleHomePageUrl(const std::string& url) { bool IsGoogleHomePageUrl(const std::string& url) {
GURL original_url(url); GURL original_url(url);
if (!original_url.is_valid())
return false;
// Make sure the scheme is valid. // First check to see if this has a Google domain.
if (!original_url.SchemeIs("http") && !original_url.SchemeIs("https")) if (!IsGoogleDomainUrl(original_url))
return false; return false;
// Make sure port is default for the respective scheme. // Make sure the path is a known home page path.
if (!original_url.port().empty()) std::string path(original_url.path());
if (path != "/" && path != "/webhp" &&
!StartsWithASCII(path, "/ig", false)) {
return false; return false;
}
// Accept only valid TLD. return true;
size_t tld_length = net::RegistryControlledDomainService::GetRegistryLength( }
original_url, false);
if (tld_length == 0 || tld_length == std::string::npos)
return false;
// We only accept "www.google." in front of the TLD. bool IsGoogleSearchUrl(const std::string& url) {
std::string host = original_url.host(); GURL original_url(url);
host = host.substr(0, host.length() - tld_length);
if (!LowerCaseEqualsASCII(host, "www.google.") && // First check to see if this has a Google domain.
!LowerCaseEqualsASCII(host, "google.")) if (!IsGoogleDomainUrl(original_url))
return false; return false;
// Make sure the path is a known home page path. // Make sure the path is a known search path.
std::string path(original_url.path()); std::string path(original_url.path());
if (!LowerCaseEqualsASCII(path, "/") && bool has_valid_path = false;
!LowerCaseEqualsASCII(path, "/webhp") && bool is_home_page_base = false;
!StartsWithASCII(path, "/ig", false)) { if (path == "/search") {
return false; has_valid_path = true;
} else if (path == "/webhp" || path == "/") {
// Note that we allow both "/" and "" paths, but GURL spits them
// both out as just "/".
has_valid_path = true;
is_home_page_base = true;
} }
if (!has_valid_path)
return false;
return true; // Check for query parameter in URL parameter and hash fragment, depending on
// the path type.
std::string query(original_url.query());
std::string ref(original_url.ref());
return HasQueryParameter(ref) ||
(!is_home_page_base && HasQueryParameter(query));
} }
bool IsOrganic(const std::string& brand) { bool IsOrganic(const std::string& brand) {
......
// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
// //
...@@ -41,6 +41,9 @@ bool GetReactivationBrand(std::string* brand); ...@@ -41,6 +41,9 @@ bool GetReactivationBrand(std::string* brand);
// True if |url| represents a valid Google home page URL. // True if |url| represents a valid Google home page URL.
bool IsGoogleHomePageUrl(const std::string& url); bool IsGoogleHomePageUrl(const std::string& url);
// True if |url| represents a valid Google search URL.
bool IsGoogleSearchUrl(const std::string& url);
// True if a build is strictly organic, according to its brand code. // True if a build is strictly organic, according to its brand code.
bool IsOrganic(const std::string& brand); bool IsOrganic(const std::string& brand);
......
// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
using google_util::IsGoogleHomePageUrl; using google_util::IsGoogleHomePageUrl;
using google_util::IsGoogleSearchUrl;
TEST(GoogleUtilTest, GoodHomePagesNonSecure) { TEST(GoogleUtilTest, GoodHomePagesNonSecure) {
// Valid home page hosts. // Valid home page hosts.
...@@ -77,4 +78,166 @@ TEST(GoogleUtilTest, BadHomePages) { ...@@ -77,4 +78,166 @@ TEST(GoogleUtilTest, BadHomePages) {
EXPECT_FALSE(IsGoogleHomePageUrl("http://www.google.com/webhp/abc")); EXPECT_FALSE(IsGoogleHomePageUrl("http://www.google.com/webhp/abc"));
EXPECT_FALSE(IsGoogleHomePageUrl("http://www.google.com/abcig")); EXPECT_FALSE(IsGoogleHomePageUrl("http://www.google.com/abcig"));
EXPECT_FALSE(IsGoogleHomePageUrl("http://www.google.com/webhp/ig")); EXPECT_FALSE(IsGoogleHomePageUrl("http://www.google.com/webhp/ig"));
// A search URL should not be identified as a home page URL.
EXPECT_FALSE(IsGoogleHomePageUrl("http://www.google.com/search?q=something"));
// Path is case sensitive.
EXPECT_FALSE(IsGoogleHomePageUrl("https://www.google.com/WEBHP"));
}
TEST(GoogleUtilTest, GoodSearchPagesNonSecure) {
// Queries with path "/search" need to have the query parameter in either
// the url parameter or the hash fragment.
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/search?q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/search#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/search?name=bob&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/search?name=bob#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/search?name=bob#age=24&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.co.uk/search?q=something"));
// It's actually valid for both to have the query parameter.
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/search?q=something#q=other"));
// Queries with path "/webhp", "/" or "" need to have the query parameter in
// the hash fragment.
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/webhp#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/webhp#name=bob&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/webhp?name=bob#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/webhp?name=bob#age=24&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/#name=bob&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/?name=bob#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com/?name=bob#age=24&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com#name=bob&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com?name=bob#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"http://www.google.com?name=bob#age=24&q=something"));
}
TEST(GoogleUtilTest, GoodSearchPagesSecure) {
// Queries with path "/search" need to have the query parameter in either
// the url parameter or the hash fragment.
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/search?q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/search#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/search?name=bob&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/search?name=bob#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/search?name=bob#age=24&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.co.uk/search?q=something"));
// It's actually valid for both to have the query parameter.
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/search?q=something#q=other"));
// Queries with path "/webhp", "/" or "" need to have the query parameter in
// the hash fragment.
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/webhp#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/webhp#name=bob&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/webhp?name=bob#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/webhp?name=bob#age=24&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/#name=bob&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/?name=bob#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com/?name=bob#age=24&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com#name=bob&q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com?name=bob#q=something"));
EXPECT_TRUE(IsGoogleSearchUrl(
"https://www.google.com?name=bob#age=24&q=something"));
}
TEST(GoogleUtilTest, BadSearches) {
// A home page URL should not be identified as a search URL.
EXPECT_FALSE(IsGoogleSearchUrl(GoogleURLTracker::kDefaultGoogleHomepage));
EXPECT_FALSE(IsGoogleSearchUrl("http://google.com"));
EXPECT_FALSE(IsGoogleSearchUrl("http://www.google.com"));
EXPECT_FALSE(IsGoogleSearchUrl("http://www.google.com/search"));
EXPECT_FALSE(IsGoogleSearchUrl("http://www.google.com/search?"));
// Must be http or https
EXPECT_FALSE(IsGoogleSearchUrl(
"ftp://www.google.com/search?q=something"));
EXPECT_FALSE(IsGoogleSearchUrl(
"file://does/not/exist/search?q=something"));
EXPECT_FALSE(IsGoogleSearchUrl(
"bad://www.google.com/search?q=something"));
EXPECT_FALSE(IsGoogleSearchUrl(
"www.google.com/search?q=something"));
// Can't have an empty query parameter.
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/search?q="));
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/search?name=bob&q="));
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/webhp#q="));
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/webhp#name=bob&q="));
// Home page searches without a hash fragment query parameter are invalid.
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/webhp?q=something"));
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/webhp?q=something#no=good"));
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/webhp?name=bob&q=something"));
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/?q=something"));
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com?q=something"));
// Some paths are outright invalid as searches.
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/notreal?q=something"));
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/chrome?q=something"));
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/search/nogood?q=something"));
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/webhp/nogood#q=something"));
EXPECT_FALSE(IsGoogleSearchUrl(""));
// Case sensitive paths.
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/SEARCH?q=something"));
EXPECT_FALSE(IsGoogleSearchUrl(
"http://www.google.com/WEBHP#q=something"));
} }
...@@ -1437,6 +1437,8 @@ ...@@ -1437,6 +1437,8 @@
'browser/geolocation/wifi_data_provider_chromeos.h', 'browser/geolocation/wifi_data_provider_chromeos.h',
'browser/global_keyboard_shortcuts_mac.h', 'browser/global_keyboard_shortcuts_mac.h',
'browser/global_keyboard_shortcuts_mac.mm', 'browser/global_keyboard_shortcuts_mac.mm',
'browser/google/google_search_metrics.cc',
'browser/google/google_search_metrics.h',
'browser/google/google_update.cc', 'browser/google/google_update.cc',
'browser/google/google_update.h', 'browser/google/google_update.h',
'browser/google/google_update_settings_posix.cc', 'browser/google/google_update_settings_posix.cc',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment