Commit ee60735b authored by Chris Mumford's avatar Chris Mumford Committed by Commit Bot

Revert "Improve DoH server availability tracking"

This reverts commit f02f7639.

Reason for revert: Failed to build on Cast Audio Linux

https://ci.chromium.org/p/chromium/builders/ci/Cast%20Audio%20Linux/67917

Original change's description:
> Improve DoH server availability tracking
> 
> Now that server availability and stats are tracked per-context, DoH
> servers only need to be disabled for consecutive failures. And now that
> availability behaves much more similarly to normal server failure
> tracking, the availability has been mostly merged into the normal
> tracking instead of using a separate flag.  A DoH server is now
> considered "available" (eligible for use in automatic mode) if
> |ServerStats::last_failure_count| is less than
> |kAutomaticModeFailureLimit| and there has been at least one success
> recorded on the current connection.
> 
> Net effect is that DoH servers can be enabled or failure-tracking reset
> for any successful request or probe to that server on the same context,
> even if a failure disable has taken effect (eg a transaction started
> before the server was disabled, or a new probe when we restart probes in
> a future CL).
> 
> Also, previously the DoH probe logic reset availability when restarted.
> Because the probes were generally only restarted for connection change,
> this had the effect of resetting availability for connection change.
> Now, the ResolveContext handles that itself on connection change by
> resetting the flag that it has seen at least one success on the current
> connection.
> 
> Bug: 1022059
> Change-Id: I872057cbb9f7a0a41f580dff57d906b1e4e5d33c
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2062676
> Reviewed-by: Matt Menke <mmenke@chromium.org>
> Commit-Queue: Eric Orth <ericorth@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#745551}

TBR=mmenke@chromium.org,ericorth@chromium.org

Change-Id: I0005490d361cb8c86583ab1b0f3819eeee13c4cd
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: 1022059
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2080446Reviewed-by: default avatarChris Mumford <cmumford@google.com>
Commit-Queue: Chris Mumford <cmumford@google.com>
Cr-Commit-Position: refs/heads/master@{#745554}
parent 8aaf55c6
......@@ -147,8 +147,7 @@ TEST_F(DnsClientTest, CanUseSecureDnsTransactions_ProbeSuccess) {
EXPECT_TRUE(
client_->FallbackFromSecureTransactionPreferred(&resolve_context_));
resolve_context_.RecordServerSuccess(0u /* server_index */,
true /* is_doh_server */,
resolve_context_.SetProbeSuccess(0u, true /* success */,
client_->GetCurrentSession());
EXPECT_TRUE(client_->CanUseSecureDnsTransactions());
EXPECT_FALSE(
......@@ -177,8 +176,7 @@ TEST_F(DnsClientTest, AllAllowed) {
client_->SetInsecureEnabled(true);
client_->SetSystemConfig(ValidConfigWithDoh());
resolve_context_.InvalidateCaches(client_->GetCurrentSession());
resolve_context_.RecordServerSuccess(0u /* server_index */,
true /* is_doh_server */,
resolve_context_.SetProbeSuccess(0u, true /* success */,
client_->GetCurrentSession());
EXPECT_TRUE(client_->CanUseSecureDnsTransactions());
......
......@@ -910,6 +910,7 @@ class DnsOverHttpsProbeRunner : public DnsProbeRunner {
i++) {
// Clear the existing probe stats.
probe_stats_list_[i] = std::make_unique<ProbeStats>();
context_->SetProbeSuccess(i, false /* success */, session_.get());
ContinueProbe(i, probe_stats_list_[i]->weak_factory.GetWeakPtr(),
network_change,
base::TimeTicks::Now() /* sequence_start_time */);
......@@ -987,6 +988,8 @@ class DnsOverHttpsProbeRunner : public DnsProbeRunner {
context_->RecordRtt(doh_server_index, true /* is_doh_server */,
base::TimeTicks::Now() - query_start_time, rv,
session_.get());
context_->SetProbeSuccess(doh_server_index, true /* success */,
session_.get());
probe_stats_list_[doh_server_index] = nullptr;
success = true;
}
......
This diff is collapsed.
......@@ -5673,8 +5673,7 @@ TEST_F(HostResolverManagerDnsTest,
// Mark a DoH server successful only for |resolve_context2|. Note that this
// must come after the resolver's configuration is set because this relies on
// the specific configuration containing a DoH server.
resolve_context2.RecordServerSuccess(0u /* server_index */,
true /* is_doh_server */,
resolve_context2.SetProbeSuccess(0u, true /* success */,
dns_client_->GetCurrentSession());
// No available DoH servers for |resolve_context1|, so expect a non-secure
......@@ -7558,8 +7557,7 @@ TEST_F(HostResolverManagerDnsTest,
ChangeDnsConfig(CreateValidDnsConfig());
DnsSession* session_before = dns_client_->GetCurrentSession();
resolve_context_->RecordServerSuccess(
0u /* server_index */, true /* is_doh_server */, session_before);
resolve_context_->SetProbeSuccess(0u, true, session_before);
ASSERT_TRUE(resolve_context_->GetDohServerAvailability(0u, session_before));
// Flush data by triggering a DnsConfigOverrides change.
......@@ -7573,8 +7571,7 @@ TEST_F(HostResolverManagerDnsTest,
EXPECT_FALSE(resolve_context_->GetDohServerAvailability(0u, session_after));
// Confirm new session is in use.
resolve_context_->RecordServerSuccess(
0u /* server_index */, true /* is_doh_server */, session_after);
resolve_context_->SetProbeSuccess(0u, true, session_after);
EXPECT_TRUE(resolve_context_->GetDohServerAvailability(0u, session_after));
}
......@@ -7702,8 +7699,7 @@ TEST_F(HostResolverManagerDnsTest,
ChangeDnsConfig(original_config);
DnsSession* session_before = dns_client_->GetCurrentSession();
resolve_context_->RecordServerSuccess(
0u /* server_index */, true /* is_doh_server */, session_before);
resolve_context_->SetProbeSuccess(0u, true, session_before);
ASSERT_TRUE(resolve_context_->GetDohServerAvailability(0u, session_before));
// Flush data by triggering a config change.
......@@ -7717,8 +7713,7 @@ TEST_F(HostResolverManagerDnsTest,
EXPECT_FALSE(resolve_context_->GetDohServerAvailability(0u, session_after));
// Confirm new session is in use.
resolve_context_->RecordServerSuccess(
0u /* server_index */, true /* is_doh_server */, session_after);
resolve_context_->SetProbeSuccess(0u, true, session_after);
EXPECT_TRUE(resolve_context_->GetDohServerAvailability(0u, session_after));
}
......
......@@ -89,10 +89,6 @@ struct ResolveContext::ServerStats {
// Count of consecutive failures after last success.
int last_failure_count;
// True if any success has ever been recorded for this server for the current
// connection.
bool current_connection_success = false;
// Last time when server returned failure or timeout.
base::TimeTicks last_failure;
// Last time when server returned success.
......@@ -176,17 +172,17 @@ base::Optional<size_t> ResolveContext::DohServerIndexToUse(
if (!IsCurrentSession(session))
return base::nullopt;
CHECK_LT(starting_doh_server_index, doh_server_stats_.size());
CHECK_LT(starting_doh_server_index, doh_server_availability_.size());
size_t index = starting_doh_server_index;
base::TimeTicks oldest_server_failure;
base::Optional<size_t> oldest_available_server_failure_index;
do {
CHECK_LT(index, doh_server_stats_.size());
CHECK_LT(index, doh_server_availability_.size());
// For a server to be considered "available", the server must have a
// successful probe status if we are in AUTOMATIC mode.
if (secure_dns_mode == DnsConfig::SecureDnsMode::SECURE ||
GetDohServerAvailability(index, session)) {
doh_server_availability_[index]) {
// If number of failures on this server doesn't exceed |config_.attempts|,
// return its index. |config_.attempts| will generally be more restrictive
// than |kAutomaticModeFailureLimit|, although this is not guaranteed.
......@@ -213,21 +209,41 @@ base::Optional<size_t> ResolveContext::DohServerIndexToUse(
return oldest_available_server_failure_index;
}
size_t ResolveContext::NumAvailableDohServers(const DnsSession* session) const {
if (!IsCurrentSession(session))
return 0;
size_t count = 0;
for (const auto& probe_result : doh_server_availability_) {
if (probe_result)
count++;
}
return count;
}
bool ResolveContext::GetDohServerAvailability(size_t doh_server_index,
const DnsSession* session) const {
if (!IsCurrentSession(session))
return false;
CHECK_LT(doh_server_index, doh_server_stats_.size());
return ServerStatsToDohAvailability(doh_server_stats_[doh_server_index]);
CHECK_LT(doh_server_index, doh_server_availability_.size());
return doh_server_availability_[doh_server_index];
}
size_t ResolveContext::NumAvailableDohServers(const DnsSession* session) const {
void ResolveContext::SetProbeSuccess(size_t doh_server_index,
bool success,
const DnsSession* session) {
if (!IsCurrentSession(session))
return 0;
return;
return std::count_if(doh_server_stats_.cbegin(), doh_server_stats_.cend(),
&ServerStatsToDohAvailability);
bool doh_available_before = NumAvailableDohServers(session) > 0;
CHECK_LT(doh_server_index, doh_server_availability_.size());
doh_server_availability_[doh_server_index] = success;
// TODO(crbug.com/1022059): Consider figuring out some way to only for the
// first context enabling DoH or the last context disabling DoH.
if (doh_available_before != NumAvailableDohServers(session) > 0)
NetworkChangeNotifier::TriggerNonSystemDnsChange();
}
void ResolveContext::RecordServerFailure(size_t server_index,
......@@ -236,17 +252,14 @@ void ResolveContext::RecordServerFailure(size_t server_index,
if (!IsCurrentSession(session))
return;
bool doh_available_before = NumAvailableDohServers(session) > 0;
ServerStats* stats = GetServerStats(server_index, is_doh_server);
++(stats->last_failure_count);
stats->last_failure = base::TimeTicks::Now();
// TODO(crbug.com/1022059): Consider figuring out some way to only for the
// first context enabling DoH or the last context disabling DoH.
bool doh_available_now = NumAvailableDohServers(session) > 0;
if (doh_available_before != doh_available_now)
NetworkChangeNotifier::TriggerNonSystemDnsChange();
if (is_doh_server &&
stats->last_failure_count >= kAutomaticModeFailureLimit) {
SetProbeSuccess(server_index, false /* success */, session);
}
}
void ResolveContext::RecordServerSuccess(size_t server_index,
......@@ -255,19 +268,15 @@ void ResolveContext::RecordServerSuccess(size_t server_index,
if (!IsCurrentSession(session))
return;
bool doh_available_before = NumAvailableDohServers(session) > 0;
ServerStats* stats = GetServerStats(server_index, is_doh_server);
// DoH queries can be sent using more than one URLRequestContext. A success
// from one URLRequestContext shouldn't zero out failures that may be
// consistently occurring for another URLRequestContext.
if (!is_doh_server)
stats->last_failure_count = 0;
stats->current_connection_success = true;
stats->last_failure = base::TimeTicks();
stats->last_success = base::TimeTicks::Now();
// TODO(crbug.com/1022059): Consider figuring out some way to only for the
// first context enabling DoH or the last context disabling DoH.
bool doh_available_now = NumAvailableDohServers(session) > 0;
if (doh_available_before != doh_available_now)
NetworkChangeNotifier::TriggerNonSystemDnsChange();
}
void ResolveContext::RecordRtt(size_t server_index,
......@@ -278,7 +287,7 @@ void ResolveContext::RecordRtt(size_t server_index,
if (!IsCurrentSession(session))
return;
RecordRttForUma(server_index, is_doh_server, rtt, rv, session);
RecordRttForUma(server_index, is_doh_server, rtt, rv);
ServerStats* stats = GetServerStats(server_index, is_doh_server);
......@@ -327,6 +336,7 @@ void ResolveContext::InvalidateCaches(const DnsSession* new_session) {
current_session_.reset();
classic_server_stats_.clear();
doh_server_stats_.clear();
doh_server_availability_.clear();
initial_timeout_ = base::TimeDelta();
if (!new_session)
......@@ -343,11 +353,16 @@ void ResolveContext::InvalidateCaches(const DnsSession* new_session) {
++i) {
doh_server_stats_.emplace_back(initial_timeout_, GetRttBuckets());
}
doh_server_availability_.insert(
doh_server_availability_.begin(),
new_session->config().dns_over_https_servers.size(), false);
CHECK_EQ(new_session->config().nameservers.size(),
classic_server_stats_.size());
CHECK_EQ(new_session->config().dns_over_https_servers.size(),
doh_server_stats_.size());
CHECK_EQ(new_session->config().dns_over_https_servers.size(),
doh_server_availability_.size());
}
bool ResolveContext::IsCurrentSession(const DnsSession* session) const {
......@@ -357,6 +372,8 @@ bool ResolveContext::IsCurrentSession(const DnsSession* session) const {
classic_server_stats_.size());
CHECK_EQ(current_session_->config().dns_over_https_servers.size(),
doh_server_stats_.size());
CHECK_EQ(doh_server_availability_.size(),
current_session_->config().dns_over_https_servers.size());
return true;
}
......@@ -406,15 +423,15 @@ base::TimeDelta ResolveContext::NextTimeoutHelper(ServerStats* server_stats,
void ResolveContext::RecordRttForUma(size_t server_index,
bool is_doh_server,
base::TimeDelta rtt,
int rv,
const DnsSession* session) {
DCHECK(IsCurrentSession(session));
int rv) {
DCHECK(current_session_);
std::string query_type;
std::string provider_id;
if (is_doh_server) {
// Secure queries are validated if the DoH server state is available.
if (GetDohServerAvailability(server_index, session))
CHECK_LT(server_index, doh_server_availability_.size());
if (doh_server_availability_[server_index])
query_type = "SecureValidated";
else
query_type = "SecureNotValidated";
......@@ -446,31 +463,10 @@ void ResolveContext::RecordRttForUma(size_t server_index,
void ResolveContext::OnNetworkChanged(
NetworkChangeNotifier::ConnectionType type) {
// Reset per-connection state on CONNECTION_NONE notifications (which are
// always sent before the new connection type on changes). Resetting this
// state is a "destructive" activity (see
// NetworkChangeObserver::OnNetworkChanged()), and it's also important that
// this reset occurs before probes are restarted on non-CONNECTION_NONE
// notifications.
if (type != NetworkChangeNotifier::CONNECTION_NONE)
return;
if (current_session_) {
if (current_session_)
initial_timeout_ = GetDefaultTimeout(current_session_->config());
for (ServerStats& classic_server_stat : classic_server_stats_)
classic_server_stat.current_connection_success = false;
for (ServerStats& doh_server_stat : doh_server_stats_)
doh_server_stat.current_connection_success = false;
}
max_timeout_ = GetMaxTimeout();
}
// static
bool ResolveContext::ServerStatsToDohAvailability(
const ResolveContext::ServerStats& stats) {
return stats.last_failure_count < kAutomaticModeFailureLimit &&
stats.current_connection_success;
}
} // namespace net
......@@ -34,8 +34,9 @@ class NET_EXPORT_PRIVATE ResolveContext
// that have reached this limit.
//
// This limit is different from the failure limit that governs insecure async
// resolver bypass in multiple ways: NXDOMAIN responses are never counted as
// failures, and the outcome of fallback queries is not taken into account.
// resolver bypass in several ways: the failures need not be consecutive,
// NXDOMAIN responses are never counted as failures, and the outcome of
// fallback queries is not taken into account.
static const int kAutomaticModeFailureLimit = 10;
ResolveContext(URLRequestContext* url_request_context, bool enable_caching);
......@@ -57,8 +58,8 @@ class NET_EXPORT_PRIVATE ResolveContext
size_t FirstServerIndex(bool doh_server, const DnsSession* session);
// Find the index of a non-DoH server to use for this attempt. Starts from
// |starting_server| and finds the first server (wrapping around as necessary)
// below failure limits (|DnsConfig::attempts|), or if no servers are below
// |starting_server| and finds the first eligible server (wrapping around as
// necessary) below failure limits, or if no eligible servers are below
// failure limits, the one with the oldest last failure. If |session| is not
// the current session, assumes all servers are below failure limits and thus
// always returns |starting_server|.
......@@ -67,29 +68,30 @@ class NET_EXPORT_PRIVATE ResolveContext
// Find the index of a DoH server to use for this attempt. Starts from
// |starting_doh_server_index| and finds the first eligible server (wrapping
// around as necessary) below failure limits (|DnsConfig::attempts|), or of no
// eligible servers are below failure limits, the eligible one with the oldest
// last failure. Returns nullopt if there are no eligible DoH servers or
// |session| is not the current session.
//
// If in AUTOMATIC mode, DoH servers are only eligible if "available". See
// GetDohServerAvailability() for details.
// around as necessary) below failure limits, or of no eligible servers are
// below failure limits, the one with the oldest last failure. If in AUTOMATIC
// mode, a server is only eligible after a successful DoH probe. Returns
// nullopt if there are no eligible DoH servers or |session| is not the
// current session.
base::Optional<size_t> DohServerIndexToUse(
size_t starting_doh_server_index,
DnsConfig::SecureDnsMode secure_dns_mode,
const DnsSession* session);
// Returns whether |doh_server_index| is eligible for use in AUTOMATIC mode,
// that is that consecutive failures are less than kAutomaticModeFailureLimit
// and the server has had at least one successful query or probe. Always
// |false| if |session| is not the current session.
// Returns the number of DoH servers with successful probe states. Always 0 if
// |session| is not the current session.
size_t NumAvailableDohServers(const DnsSession* session) const;
// Returns whether |doh_server_index| is marked available. Always |false| if
// |session| is not the current session.
bool GetDohServerAvailability(size_t doh_server_index,
const DnsSession* session) const;
// Returns the number of DoH servers available for use in AUTOMATIC mode (see
// GetDohServerAvailability()). Always 0 if |session| is not the current
// Record the latest DoH probe state. Noop if |session| is not the current
// session.
size_t NumAvailableDohServers(const DnsSession* session) const;
void SetProbeSuccess(size_t doh_server_index,
bool success,
const DnsSession* session);
// Record that server failed to respond (due to SRV_FAIL or timeout). If
// |is_doh_server| and the number of failures has surpassed a threshold,
......@@ -158,14 +160,11 @@ class NET_EXPORT_PRIVATE ResolveContext
void RecordRttForUma(size_t server_index,
bool is_doh_server,
base::TimeDelta rtt,
int rv,
const DnsSession* session);
int rv);
// NetworkChangeNotifier::NetworkChangeObserver:
void OnNetworkChanged(NetworkChangeNotifier::ConnectionType type) override;
static bool ServerStatsToDohAvailability(const ServerStats& stats);
URLRequestContext* url_request_context_;
std::unique_ptr<HostCache> host_cache_;
......@@ -183,6 +182,7 @@ class NET_EXPORT_PRIVATE ResolveContext
// TODO(crbug.com/1022059): Make const DnsSession once server stats have been
// moved and no longer need to be read from DnsSession for availability logic.
base::WeakPtr<const DnsSession> current_session_;
std::vector<bool> doh_server_availability_;
// Current index into |config_.nameservers| to begin resolution with.
int classic_server_index_ = 0;
base::TimeDelta initial_timeout_;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment