Commit 433202f7 authored by Zhongyi Shi's avatar Zhongyi Shi Committed by Commit Bot

Unresponsive network detection: record connections closed due to

connectivity related errors.

Early data indicating that most connections has been closed by the time
a network change happens, which is the main motivation for me to track
the count for closed connections caused by connectivity related errors.

Some connection may get closed early due to connectivity related issues:
1. public reset by the peer post handshake is typically caused by NAT
   rebinding.
2. QUIC_PACKET_WRITE_ERROR by self is typically early signal indicating
   network no longer being usable.
3. QUIC_TOO_MANY_RTOS by self is typically caused by silent blackholing.

This change also moves metrics logging to QuicConnectivityMonitor.

Bug: 1090532
Change-Id: Ic0d05fc788aeb35b49ae853b0b8b652673c16ea2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2293261Reviewed-by: default avatarKen Rockot <rockot@google.com>
Commit-Queue: Zhongyi Shi <zhongyi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#788498}
parent 306180f9
...@@ -1109,6 +1109,7 @@ void QuicChromiumClientSession::RemoveHandle(Handle* handle) { ...@@ -1109,6 +1109,7 @@ void QuicChromiumClientSession::RemoveHandle(Handle* handle) {
void QuicChromiumClientSession::AddConnectivityObserver( void QuicChromiumClientSession::AddConnectivityObserver(
ConnectivityObserver* observer) { ConnectivityObserver* observer) {
connectivity_observer_list_.AddObserver(observer); connectivity_observer_list_.AddObserver(observer);
observer->OnSessionRegistered(this, GetCurrentNetwork());
} }
void QuicChromiumClientSession::RemoveConnectivityObserver( void QuicChromiumClientSession::RemoveConnectivityObserver(
...@@ -1674,6 +1675,12 @@ void QuicChromiumClientSession::OnConnectionClosed( ...@@ -1674,6 +1675,12 @@ void QuicChromiumClientSession::OnConnectionClosed(
RecordConnectionCloseErrorCode(frame, source, session_key_.host(), RecordConnectionCloseErrorCode(frame, source, session_key_.host(),
OneRttKeysAvailable()); OneRttKeysAvailable());
if (OneRttKeysAvailable()) {
NetworkChangeNotifier::NetworkHandle current_network = GetCurrentNetwork();
for (auto& observer : connectivity_observer_list_)
observer.OnSessionClosedAfterHandshake(this, current_network, source,
frame.quic_error_code);
}
const quic::QuicErrorCode error = frame.quic_error_code; const quic::QuicErrorCode error = frame.quic_error_code;
const std::string& error_details = frame.error_details; const std::string& error_details = frame.error_details;
......
...@@ -164,6 +164,20 @@ class NET_EXPORT_PRIVATE QuicChromiumClientSession ...@@ -164,6 +164,20 @@ class NET_EXPORT_PRIVATE QuicChromiumClientSession
NetworkChangeNotifier::NetworkHandle network, NetworkChangeNotifier::NetworkHandle network,
int error_code) = 0; int error_code) = 0;
// Called when |session| is closed by |source| with |error_code|
// and handshake has been confirmed.
virtual void OnSessionClosedAfterHandshake(
QuicChromiumClientSession* session,
NetworkChangeNotifier::NetworkHandle network,
quic::ConnectionCloseSource source,
quic::QuicErrorCode error_code) = 0;
// Called when |this| is registered to monitor the connectivity of the
// |session|.
virtual void OnSessionRegistered(
QuicChromiumClientSession* session,
NetworkChangeNotifier::NetworkHandle network) = 0;
// Called when |session| is removed. // Called when |session| is removed.
virtual void OnSessionRemoved(QuicChromiumClientSession* session) = 0; virtual void OnSessionRemoved(QuicChromiumClientSession* session) = 0;
}; };
......
...@@ -14,6 +14,39 @@ QuicConnectivityMonitor::QuicConnectivityMonitor( ...@@ -14,6 +14,39 @@ QuicConnectivityMonitor::QuicConnectivityMonitor(
QuicConnectivityMonitor::~QuicConnectivityMonitor() = default; QuicConnectivityMonitor::~QuicConnectivityMonitor() = default;
void QuicConnectivityMonitor::RecordConnectivityStatsToHistograms(
const std::string& notification,
NetworkChangeNotifier::NetworkHandle affected_network) const {
if (notification == "OnNetworkSoonToDisconnect" ||
notification == "OnNetworkDisconnected") {
// If the disconnected network is not the default network, ignore
// stats collections.
if (affected_network != default_network_)
return;
}
// TODO(crbug.com/1090532): rename histograms prefix to
// Net.QuicConnectivityMonitor.
UMA_HISTOGRAM_COUNTS_100(
"Net.QuicStreamFactory.NumQuicSessionsAtNetworkChange",
active_sessions_.size());
// Skip degrading session collection if there are less than two sessions.
if (active_sessions_.size() < 2)
return;
size_t num_degrading_sessions = GetNumDegradingSessions();
const std::string raw_histogram_name =
"Net.QuicStreamFactory.NumDegradingSessions." + notification;
base::UmaHistogramExactLinear(raw_histogram_name, num_degrading_sessions,
101);
int percentage = num_degrading_sessions * 100 / active_sessions_.size();
const std::string percentage_histogram_name =
"Net.QuicStreamFactory.PercentageDegradingSessions." + notification;
base::UmaHistogramExactLinear(percentage_histogram_name, percentage, 101);
}
size_t QuicConnectivityMonitor::GetNumDegradingSessions() const { size_t QuicConnectivityMonitor::GetNumDegradingSessions() const {
return degrading_sessions_.size(); return degrading_sessions_.size();
} }
...@@ -51,16 +84,53 @@ void QuicConnectivityMonitor::OnSessionEncounteringWriteError( ...@@ -51,16 +84,53 @@ void QuicConnectivityMonitor::OnSessionEncounteringWriteError(
++write_error_map_[error_code]; ++write_error_map_[error_code];
} }
void QuicConnectivityMonitor::OnSessionClosedAfterHandshake(
QuicChromiumClientSession* session,
NetworkChangeNotifier::NetworkHandle network,
quic::ConnectionCloseSource source,
quic::QuicErrorCode error_code) {
if (network != default_network_)
return;
if (source == quic::ConnectionCloseSource::FROM_PEER) {
// Connection closed by the peer post handshake with PUBLIC RESET
// is most likely a NAT rebinding issue.
if (error_code == quic::QUIC_PUBLIC_RESET)
quic_error_map_[error_code]++;
return;
}
// Connection close by self with PACKET_WRITE_ERROR or TOO_MANY_RTOS
// is likely a connectivity issue.
if (error_code == quic::QUIC_PACKET_WRITE_ERROR ||
error_code == quic::QUIC_TOO_MANY_RTOS) {
quic_error_map_[error_code]++;
}
}
void QuicConnectivityMonitor::OnSessionRegistered(
QuicChromiumClientSession* session,
NetworkChangeNotifier::NetworkHandle network) {
if (network == default_network_) {
active_sessions_.insert(session);
total_num_sessions_tracked_++;
}
}
void QuicConnectivityMonitor::OnSessionRemoved( void QuicConnectivityMonitor::OnSessionRemoved(
QuicChromiumClientSession* session) { QuicChromiumClientSession* session) {
degrading_sessions_.erase(session); degrading_sessions_.erase(session);
active_sessions_.erase(session);
} }
void QuicConnectivityMonitor::OnDefaultNetworkUpdated( void QuicConnectivityMonitor::OnDefaultNetworkUpdated(
NetworkChangeNotifier::NetworkHandle default_network) { NetworkChangeNotifier::NetworkHandle default_network) {
default_network_ = default_network; default_network_ = default_network;
active_sessions_.clear();
total_num_sessions_tracked_ = 0u;
degrading_sessions_.clear(); degrading_sessions_.clear();
write_error_map_.clear(); write_error_map_.clear();
quic_error_map_.clear();
} }
void QuicConnectivityMonitor::OnIPAddressChanged() { void QuicConnectivityMonitor::OnIPAddressChanged() {
......
...@@ -25,6 +25,11 @@ class NET_EXPORT_PRIVATE QuicConnectivityMonitor ...@@ -25,6 +25,11 @@ class NET_EXPORT_PRIVATE QuicConnectivityMonitor
~QuicConnectivityMonitor() override; ~QuicConnectivityMonitor() override;
// Records connectivity related stats to histograms.
void RecordConnectivityStatsToHistograms(
const std::string& platform_notification,
NetworkChangeNotifier::NetworkHandle affected_network) const;
// Returns the number of sessions that are currently degrading on the default // Returns the number of sessions that are currently degrading on the default
// network interface. // network interface.
size_t GetNumDegradingSessions() const; size_t GetNumDegradingSessions() const;
...@@ -65,20 +70,40 @@ class NET_EXPORT_PRIVATE QuicConnectivityMonitor ...@@ -65,20 +70,40 @@ class NET_EXPORT_PRIVATE QuicConnectivityMonitor
NetworkChangeNotifier::NetworkHandle network, NetworkChangeNotifier::NetworkHandle network,
int error_code) override; int error_code) override;
void OnSessionClosedAfterHandshake(
QuicChromiumClientSession* session,
NetworkChangeNotifier::NetworkHandle network,
quic::ConnectionCloseSource source,
quic::QuicErrorCode error_code) override;
void OnSessionRegistered(
QuicChromiumClientSession* session,
NetworkChangeNotifier::NetworkHandle network) override;
void OnSessionRemoved(QuicChromiumClientSession* session) override; void OnSessionRemoved(QuicChromiumClientSession* session) override;
private: private:
// Size chosen per net.QuicSession.WriteError histogram. // Size chosen per net.QuicSession.WriteError histogram.
using WriteErrorMap = quic::QuicSmallMap<int, size_t, 20>; using WriteErrorMap = quic::QuicSmallMap<int, size_t, 20>;
// The most common QuicErrorCode cared by this monitor is:
// QUIC_PUBLIC_RESET by the peer, or
// QUIC_PACKET_WRITE_ERROR/QUIC_TOO_MANY_RTOS by self.
using QuicErrorCodeMap = quic::QuicSmallMap<quic::QuicErrorCode, size_t, 5>;
// If NetworkHandle is not supported, always set to // If NetworkHandle is not supported, always set to
// NetworkChangeNotifier::kInvalidNetworkHandle. // NetworkChangeNotifier::kInvalidNetworkHandle.
NetworkChangeNotifier::NetworkHandle default_network_; NetworkChangeNotifier::NetworkHandle default_network_;
// Sessions that are currently degrading on the |default_network_|. // Sessions that are currently degrading on the |default_network_|.
quic::QuicHashSet<QuicChromiumClientSession*> degrading_sessions_; quic::QuicHashSet<QuicChromiumClientSession*> degrading_sessions_;
// Sessions that are currently active on the |default_network_|.
quic::QuicHashSet<QuicChromiumClientSession*> active_sessions_;
// Total number of sessions that has been tracked on the current network.
// Sessions may have been closed.
size_t total_num_sessions_tracked_{0u};
// Map from the write error code to the corresponding number of reports. // Map from the write error code to the corresponding number of reports.
WriteErrorMap write_error_map_; WriteErrorMap write_error_map_;
QuicErrorCodeMap quic_error_map_;
base::WeakPtrFactory<QuicConnectivityMonitor> weak_factory_{this}; base::WeakPtrFactory<QuicConnectivityMonitor> weak_factory_{this};
DISALLOW_COPY_AND_ASSIGN(QuicConnectivityMonitor); DISALLOW_COPY_AND_ASSIGN(QuicConnectivityMonitor);
......
...@@ -2193,35 +2193,8 @@ void QuicStreamFactory::CollectDataOnPlatformNotification( ...@@ -2193,35 +2193,8 @@ void QuicStreamFactory::CollectDataOnPlatformNotification(
NetworkChangeNotifier::NetworkHandle affected_network) const { NetworkChangeNotifier::NetworkHandle affected_network) const {
UMA_HISTOGRAM_ENUMERATION("Net.QuicSession.PlatformNotification", UMA_HISTOGRAM_ENUMERATION("Net.QuicSession.PlatformNotification",
notification, NETWORK_NOTIFICATION_MAX); notification, NETWORK_NOTIFICATION_MAX);
if (notification == NETWORK_SOON_TO_DISCONNECT || connectivity_monitor_.RecordConnectivityStatsToHistograms(
notification == NETWORK_DISCONNECTED) { QuicPlatformNotificationToString(notification), affected_network);
// If the disconnected network is not the default network, ignore
// stats collections.
if (affected_network != default_network_)
return;
}
UMA_HISTOGRAM_COUNTS_100(
"Net.QuicStreamFactory.NumQuicSessionsAtNetworkChange",
all_sessions_.size());
// Skip degrading session collection if there are less than two sessions.
if (all_sessions_.size() < 2)
return;
size_t num_degrading_sessions =
connectivity_monitor_.GetNumDegradingSessions();
const std::string raw_histogram_name =
"Net.QuicStreamFactory.NumDegradingSessions." +
QuicPlatformNotificationToString(notification);
base::UmaHistogramExactLinear(raw_histogram_name, num_degrading_sessions,
101);
int percentage = num_degrading_sessions * 100 / all_sessions_.size();
const std::string percentage_histogram_name =
"Net.QuicStreamFactory.PercentageDegradingSessions." +
QuicPlatformNotificationToString(notification);
base::UmaHistogramExactLinear(percentage_histogram_name, percentage, 101);
} }
std::unique_ptr<QuicCryptoClientConfigHandle> std::unique_ptr<QuicCryptoClientConfigHandle>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment