Commit bb8ddd93 authored by Mingjing Zhang's avatar Mingjing Zhang Committed by Commit Bot

Add HistoryService API for domain diversity metric

This CL adds HistoryService API to query the number of unique domains
visited during a given range of dates. This API is a prerequisite for
the domain diversity metric.

Bug: 1015494
Change-Id: Ic5f49adfeae5f8b9f18f23b97a7eab02ba89c6a2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1869755
Commit-Queue: Mingjing Zhang <mjzhang@chromium.org>
Reviewed-by: default avatarScott Violet <sky@chromium.org>
Reviewed-by: default avatarMark Pearson <mpearson@chromium.org>
Cr-Commit-Position: refs/heads/master@{#723054}
parent a1c15f10
......@@ -28,6 +28,7 @@
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/time/time.h"
#include "base/timer/elapsed_timer.h"
#include "base/trace_event/trace_event.h"
#include "build/build_config.h"
#include "components/favicon_base/favicon_util.h"
......@@ -144,6 +145,19 @@ const int kMaxRedirectCount = 32;
// and is deleted.
const int kExpireDaysThreshold = 90;
// The maximum number of days for which domain visit metrics are computed
// each time HistoryBackend::GetDomainDiversity() is called.
constexpr int kDomainDiversityBacktrackMaxDays = 7;
// An offset that corrects possible error in date/time arithmetic caused by
// fluctuation of day length due to Daylight Saving Time (DST). For example,
// given midnight M, its next midnight can be computed as (M + 24 hour
// + offset).LocalMidnight(). In most modern DST systems, the DST shift is
// typically 1 hour. However, a larger value of 4 is chosen here to
// accommodate larger DST shifts that have been used historically and to
// avoid other potential issues.
constexpr int kDSTRoundingOffsetHours = 4;
bool IsFaviconBitmapExpired(base::Time last_updated) {
return (Time::Now() - last_updated) >
TimeDelta::FromDays(kFaviconRefetchDays);
......@@ -181,6 +195,12 @@ base::string16 FormatUrlForRedirectComparison(const GURL& url) {
net::UnescapeRule::NONE, nullptr, nullptr, nullptr);
}
base::Time MidnightNDaysLater(base::Time time, int days) {
return (time.LocalMidnight() + base::TimeDelta::FromDays(days) +
base::TimeDelta::FromHours(kDSTRoundingOffsetHours))
.LocalMidnight();
}
QueuedHistoryDBTask::QueuedHistoryDBTask(
std::unique_ptr<HistoryDBTask> task,
scoped_refptr<base::SingleThreadTaskRunner> origin_loop,
......@@ -1201,6 +1221,59 @@ HistoryCountResult HistoryBackend::CountUniqueHostsVisitedLastMonth() {
return {!!db_, db_ ? db_->CountUniqueHostsVisitedLastMonth() : 0};
}
DomainDiversityResults HistoryBackend::GetDomainDiversity(
base::Time report_time,
int number_of_days_to_report,
DomainMetricBitmaskType metric_type_bitmask) {
DCHECK_GE(number_of_days_to_report, 0);
DCHECK_LE(number_of_days_to_report, kDomainDiversityBacktrackMaxDays);
DomainDiversityResults result;
if (!db_)
return result;
number_of_days_to_report =
std::min(number_of_days_to_report, kDomainDiversityBacktrackMaxDays);
base::Time current_midnight = report_time.LocalMidnight();
base::ElapsedTimer db_timer;
for (int days_back = 0; days_back < number_of_days_to_report; ++days_back) {
DomainMetricSet single_metric_set;
single_metric_set.end_time = current_midnight;
if (metric_type_bitmask & kEnableLast1DayMetric) {
base::Time last_midnight = MidnightNDaysLater(current_midnight, -1);
single_metric_set.one_day_metric = DomainMetricCountType(
db_->CountUniqueDomainsVisited(last_midnight, current_midnight),
last_midnight);
}
if (metric_type_bitmask & kEnableLast7DayMetric) {
base::Time seven_midnights_ago = MidnightNDaysLater(current_midnight, -7);
single_metric_set.seven_day_metric = DomainMetricCountType(
db_->CountUniqueDomainsVisited(seven_midnights_ago, current_midnight),
seven_midnights_ago);
}
if (metric_type_bitmask & kEnableLast28DayMetric) {
base::Time twenty_eight_midnights_ago =
MidnightNDaysLater(current_midnight, -28);
single_metric_set.twenty_eight_day_metric = DomainMetricCountType(
db_->CountUniqueDomainsVisited(twenty_eight_midnights_ago,
current_midnight),
twenty_eight_midnights_ago);
}
result.push_back(single_metric_set);
current_midnight = MidnightNDaysLater(current_midnight, -1);
}
UMA_HISTOGRAM_COUNTS_10000("History.DomainCountQueryTime",
db_timer.Elapsed().InMilliseconds());
return result;
}
HistoryLastVisitToHostResult HistoryBackend::GetLastVisitToHost(
const GURL& host,
base::Time begin_time,
......
......@@ -71,6 +71,11 @@ static const size_t kMaxFaviconBitmapsPerIconURL = 8;
// username/password, and any trivial subdomains (e.g., "www.", "m.") removed.
base::string16 FormatUrlForRedirectComparison(const GURL& url);
// Advances (if |day| >= 0) or backtracks (if |day| < 0) from |time| by
// abs(|day|) calendar days in local timezone and returns the midnight of the
// resulting day.
base::Time MidnightNDaysLater(base::Time time, int days);
// Keeps track of a queued HistoryDBTask. This class lives solely on the
// DB thread.
class QueuedHistoryDBTask {
......@@ -275,6 +280,35 @@ class HistoryBackend : public base::RefCountedThreadSafe<HistoryBackend>,
// Returns the number of hosts visited in the last month.
HistoryCountResult CountUniqueHostsVisitedLastMonth();
// Returns a collection of domain diversity metrics. Each metric is an
// unsigned integer representing the number of unique domains (effective
// top-level domain (eTLD) + 1, e.g. "foo.com", "bar.co.uk") visited within
// the 1-day, 7-day or 28-day span that ends at a midnight in local timezone.
//
// For each of the most recent |number_of_days_to_report| midnights before
// |report_time|(inclusive), this function computes a subset of
// {1-day, 7-day, 28-day} metrics whose spanning periods all end on that
// midnight. This subset of metrics to compute is specified by a bitmask
// |metric_type_bitmask|, which takes a bitwise combination of
// kEnableLast1DayMetric, kEnableLast7DayMetric and kEnableLast28DayMetric.
//
// All computed metrics are stored in DomainDiversityResults, which represents
// a collection of DomainMetricSet's. Each DomainMetricSet contains up to 3
// metrics ending at one unique midnight in the time range of
// |number_of_days_to_report| days before |report_time|. The collection of
// DomainMetricSet is sorted reverse chronologically by the ending midnight.
//
// For example, when |report_time| = 2019/11/01 00:01am, |number_of_days| = 3,
// |metric_type_bitmask| = kEnableLast28DayMetric | kEnableLast1DayMetric,
// DomainDiversityResults will hold 3 DomainMetricSets, each containing 2
// metrics measuring domain visit counts spanning the following date ranges
// (all dates are inclusive):
// {{10/30, 10/3~10/30}, {10/29, 10/2~10/29}, {10/28, 10/1~10/28}}
DomainDiversityResults GetDomainDiversity(
base::Time report_time,
int number_of_days_to_report,
DomainMetricBitmaskType metric_type_bitmask);
// Gets the last time any webpage on the given host was visited within the
// time range [|begin_time|, |end_time|). If the given host has not been
// visited in the given time range, the result will have a null base::Time,
......
......@@ -22,6 +22,7 @@
#include "base/time/time.h"
#include "build/build_config.h"
#include "components/history/core/browser/url_utils.h"
#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
#include "sql/meta_table.h"
#include "sql/statement.h"
#include "sql/transaction.h"
......@@ -249,6 +250,39 @@ int HistoryDatabase::CountUniqueHostsVisitedLastMonth() {
return hosts.size();
}
int HistoryDatabase::CountUniqueDomainsVisited(base::Time begin_time,
base::Time end_time) {
sql::Statement url_sql(db_.GetUniqueStatement(
"SELECT urls.url FROM urls JOIN visits "
"WHERE urls.id = visits.url "
"AND (transition & ?) != 0 " // CHAIN_END
"AND (transition & ?) NOT IN (?, ?, ?) " // NO SUBFRAME or
// KEYWORD_GENERATED
"AND hidden = 0 AND visit_time >= ? AND visit_time < ?"));
url_sql.BindInt(0, ui::PAGE_TRANSITION_CHAIN_END);
url_sql.BindInt(1, ui::PAGE_TRANSITION_CORE_MASK);
url_sql.BindInt(2, ui::PAGE_TRANSITION_AUTO_SUBFRAME);
url_sql.BindInt(3, ui::PAGE_TRANSITION_MANUAL_SUBFRAME);
url_sql.BindInt(4, ui::PAGE_TRANSITION_KEYWORD_GENERATED);
url_sql.BindInt64(5, begin_time.ToDeltaSinceWindowsEpoch().InMicroseconds());
url_sql.BindInt64(6, end_time.ToDeltaSinceWindowsEpoch().InMicroseconds());
std::set<std::string> domains;
while (url_sql.Step()) {
GURL url(url_sql.ColumnString(0));
std::string domain = net::registry_controlled_domains::GetDomainAndRegistry(
url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
// IP addresses, empty URLs, and URLs with empty or unregistered TLDs are
// all excluded.
if (!domain.empty())
domains.insert(domain);
}
return domains.size();
}
void HistoryDatabase::BeginExclusiveMode() {
// We can't use set_exclusive_locking() since that only has an effect before
// the DB is opened.
......
......@@ -90,6 +90,10 @@ class HistoryDatabase : public DownloadDatabase,
// Counts the number of unique Hosts visited in the last month.
int CountUniqueHostsVisitedLastMonth();
// Counts the number of unique domains (eLTD+1) visited within
// [|begin_time|, |end_time|).
int CountUniqueDomainsVisited(base::Time begin_time, base::Time end_time);
// Call to set the mode on the database to exclusive. The default locking mode
// is "normal" but we want to run in exclusive mode for slightly better
// performance since we know nobody else is using the database. This is
......
......@@ -726,6 +726,23 @@ void HistoryService::CountUniqueHostsVisitedLastMonth(
std::move(callback));
}
void HistoryService::GetDomainDiversity(
base::Time report_time,
int number_of_days_to_report,
DomainMetricBitmaskType metric_type_bitmask,
DomainDiversityCallback callback,
base::CancelableTaskTracker* tracker) {
DCHECK(backend_task_runner_) << "History service being called after cleanup";
DCHECK(thread_checker_.CalledOnValidThread());
tracker->PostTaskAndReplyWithResult(
backend_task_runner_.get(), FROM_HERE,
base::BindOnce(&HistoryBackend::GetDomainDiversity, history_backend_,
report_time, number_of_days_to_report,
metric_type_bitmask),
std::move(callback));
}
base::CancelableTaskTracker::TaskId HistoryService::GetLastVisitToHost(
const GURL& host,
base::Time begin_time,
......
......@@ -319,6 +319,16 @@ class HistoryService : public KeyedService {
void CountUniqueHostsVisitedLastMonth(GetHistoryCountCallback callback,
base::CancelableTaskTracker* tracker);
// For each of the continuous |number_of_days_to_report| midnights
// immediately preceding |report_time| (inclusive), report (a subset of) the
// last 1-day, 7-day and 28-day domain visit counts ending at that midnight.
// The subset of metric types to report is specified by |metric_type_bitmask|.
void GetDomainDiversity(base::Time report_time,
int number_of_days_to_report,
DomainMetricBitmaskType metric_type_bitmask,
DomainDiversityCallback callback,
base::CancelableTaskTracker* tracker);
using GetLastVisitToHostCallback =
base::OnceCallback<void(HistoryLastVisitToHostResult)>;
......
......@@ -275,6 +275,13 @@ HistoryAddPageArgs::HistoryAddPageArgs(const HistoryAddPageArgs& other) =
HistoryAddPageArgs::~HistoryAddPageArgs() {}
// DomainMetricSet ------------------------------------------------------------
DomainMetricSet::DomainMetricSet() {}
DomainMetricSet::DomainMetricSet(const DomainMetricSet&) = default;
DomainMetricSet::~DomainMetricSet() {}
DomainMetricSet& DomainMetricSet::operator=(const DomainMetricSet&) = default;
// IconMapping ----------------------------------------------------------------
IconMapping::IconMapping() {}
......
......@@ -432,6 +432,53 @@ struct HistoryCountResult {
int count = 0;
};
// DomainDiversity -----------------------------------------------------------
struct DomainMetricCountType {
DomainMetricCountType(const int metric_count,
const base::Time& metric_start_time)
: count(metric_count), start_time(metric_start_time) {}
int count;
base::Time start_time;
};
// DomainMetricSet represents a set of 1-day, 7-day and 28-day domain visit
// counts whose spanning periods all end at the same time.
struct DomainMetricSet {
DomainMetricSet();
DomainMetricSet(const DomainMetricSet&);
~DomainMetricSet();
DomainMetricSet& operator=(const DomainMetricSet&);
base::Optional<DomainMetricCountType> one_day_metric;
base::Optional<DomainMetricCountType> seven_day_metric;
base::Optional<DomainMetricCountType> twenty_eight_day_metric;
// The end time of the spanning periods. All 3 metrics should have the same
// end time.
base::Time end_time;
};
// DomainDiversityResults is a collection of DomainMetricSet's computed for
// a continuous range of end dates. Typically, each DomainMetricSet holds a
// metric set whose 1-day, 7-day and 28-day spanning periods all end at one
// unique midnight in that date range.
using DomainDiversityResults = std::vector<DomainMetricSet>;
// The callback to process all domain diversity metrics
using DomainDiversityCallback =
base::OnceCallback<void(DomainDiversityResults)>;
// The bitmask to specify the types of metrics to compute in
// HistoryBackend::GetDomainDiversity()
using DomainMetricBitmaskType = uint32_t;
enum DomainMetricType : DomainMetricBitmaskType {
kNoMetric = 0,
kEnableLast1DayMetric = 1 << 0,
kEnableLast7DayMetric = 1 << 1,
kEnableLast28DayMetric = 1 << 2
};
// HistoryLastVisitToHostResult encapsulates the result of a call to
// HistoryBackend::GetLastVisitToHost().
struct HistoryLastVisitToHostResult {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment