HPACK optimal Huffman code instrumentation and UMA collection.

Adds support for aggregating observed character counts in literals emitted by
a per-origin HPACK encoder. This is intended to approximate the literal
encodings which would be used were the user agent speaking HTTP/2 to the
origin. Character counts sampled and aggregated across a population will be used
to design an optimial static Huffman compression table for the HTTP/2 standard.

See design doc:
https://docs.google.com/a/google.com/document/d/1Y6pnBf8u24AHNFcA6GADn2XgHzfIvZiIN39Kf11raIY/edit#

BUG=368301

Review URL: https://codereview.chromium.org/243153003

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@267131 0039d316-1c4b-4281-b951-d872f2087c98
parent 28fe9850
......@@ -24,6 +24,7 @@
#include "net/socket/client_socket_factory.h"
#include "net/socket/client_socket_pool_manager_impl.h"
#include "net/socket/next_proto.h"
#include "net/spdy/hpack_huffman_aggregator.h"
#include "net/spdy/spdy_session_pool.h"
namespace {
......@@ -142,6 +143,10 @@ HttpNetworkSession::HttpNetworkSession(const Params& params)
DCHECK(proxy_service_);
DCHECK(ssl_config_service_.get());
CHECK(http_server_properties_);
if (HpackHuffmanAggregator::UseAggregator()) {
huffman_aggregator_.reset(new HpackHuffmanAggregator());
}
}
HttpNetworkSession::~HttpNetworkSession() {
......
......@@ -31,6 +31,7 @@ class CertVerifier;
class ClientSocketFactory;
class ClientSocketPoolManager;
class HostResolver;
class HpackHuffmanAggregator;
class HttpAuthHandlerFactory;
class HttpNetworkSessionPeer;
class HttpProxyClientSocketPool;
......@@ -152,6 +153,9 @@ class NET_EXPORT HttpNetworkSession
NetLog* net_log() {
return net_log_;
}
HpackHuffmanAggregator* huffman_aggregator() {
return huffman_aggregator_.get();
}
// Creates a Value summary of the state of the socket pools. The caller is
// responsible for deleting the returned value.
......@@ -206,6 +210,9 @@ class NET_EXPORT HttpNetworkSession
scoped_ptr<HttpStreamFactory> http_stream_factory_for_websocket_;
std::set<HttpResponseBodyDrainer*> response_drainers_;
// TODO(jgraettinger): Remove when Huffman collection is complete.
scoped_ptr<HpackHuffmanAggregator> huffman_aggregator_;
Params params_;
};
......
......@@ -54,6 +54,7 @@
#include "net/socket/ssl_client_socket.h"
#include "net/socket/ssl_client_socket_pool.h"
#include "net/socket/transport_client_socket_pool.h"
#include "net/spdy/hpack_huffman_aggregator.h"
#include "net/spdy/spdy_http_stream.h"
#include "net/spdy/spdy_session.h"
#include "net/spdy/spdy_session_pool.h"
......@@ -1099,6 +1100,14 @@ int HttpNetworkTransaction::DoReadHeadersComplete(int result) {
stream_->GetSSLInfo(&response_.ssl_info);
headers_valid_ = true;
if (session_->huffman_aggregator()) {
session_->huffman_aggregator()->AggregateTransactionCharacterCounts(
*request_,
request_headers_,
proxy_info_.proxy_server(),
*response_.headers);
}
return OK;
}
......
......@@ -1020,6 +1020,8 @@
'spdy/hpack_entry.h',
'spdy/hpack_header_table.cc',
'spdy/hpack_header_table.h',
'spdy/hpack_huffman_aggregator.cc',
'spdy/hpack_huffman_aggregator.h',
'spdy/hpack_huffman_table.cc',
'spdy/hpack_huffman_table.h',
'spdy/hpack_input_stream.cc',
......@@ -1579,6 +1581,7 @@
'spdy/hpack_encoder_test.cc',
'spdy/hpack_entry_test.cc',
'spdy/hpack_header_table_test.cc',
'spdy/hpack_huffman_aggregator_test.cc',
'spdy/hpack_huffman_table_test.cc',
'spdy/hpack_input_stream_test.cc',
'spdy/hpack_output_stream_test.cc',
......
......@@ -31,7 +31,9 @@ const uint8 kReferencedThisEncoding = 3;
HpackEncoder::HpackEncoder(const HpackHuffmanTable& table)
: output_stream_(),
allow_huffman_compression_(true),
huffman_table_(table) {}
huffman_table_(table),
char_counts_(NULL),
total_char_counts_(NULL) {}
HpackEncoder::~HpackEncoder() {}
......@@ -178,6 +180,7 @@ void HpackEncoder::EmitString(StringPiece str) {
output_stream_.AppendUint32(str.size());
output_stream_.AppendBytes(str);
}
UpdateCharacterCounts(str);
}
// static
......@@ -237,6 +240,23 @@ HpackEncoder::Representations HpackEncoder::DetermineEncodingDelta(
return explicit_set;
}
void HpackEncoder::SetCharCountsStorage(std::vector<size_t>* char_counts,
size_t* total_char_counts) {
CHECK_LE(256u, char_counts->size());
char_counts_ = char_counts;
total_char_counts_ = total_char_counts;
}
void HpackEncoder::UpdateCharacterCounts(base::StringPiece str) {
if (char_counts_ == NULL || total_char_counts_ == NULL) {
return;
}
for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) {
++(*char_counts_)[static_cast<uint8>(*it)];
}
(*total_char_counts_) += str.size();
}
// static
void HpackEncoder::CookieToCrumbs(const Representation& cookie,
Representations* out) {
......
......@@ -56,6 +56,11 @@ class NET_EXPORT_PRIVATE HpackEncoder {
header_table_.SetSettingsHeaderTableSize(size_setting);
}
// Sets externally-owned storage for aggregating character counts of emitted
// literal representations.
void SetCharCountsStorage(std::vector<size_t>* char_counts,
size_t* total_char_counts);
private:
typedef std::pair<base::StringPiece, base::StringPiece> Representation;
typedef std::vector<Representation> Representations;
......@@ -72,6 +77,8 @@ class NET_EXPORT_PRIVATE HpackEncoder {
// Emits a Huffman or identity string (whichever is smaller).
void EmitString(base::StringPiece str);
void UpdateCharacterCounts(base::StringPiece str);
// Determines the representation delta required to encode |header_set| in
// the current header table context. Entries in the reference set are
// enumerated and marked with membership in the current |header_set|.
......@@ -88,6 +95,10 @@ class NET_EXPORT_PRIVATE HpackEncoder {
bool allow_huffman_compression_;
const HpackHuffmanTable& huffman_table_;
// Externally-owned, nullable storage for character counts of literals.
std::vector<size_t>* char_counts_;
size_t* total_char_counts_;
DISALLOW_COPY_AND_ASSIGN(HpackEncoder);
};
......
......@@ -57,6 +57,9 @@ class HpackEncoderPeer {
void TakeString(string* out) {
encoder_->output_stream_.TakeString(out);
}
void UpdateCharacterCounts(StringPiece str) {
encoder_->UpdateCharacterCounts(str);
}
static void CookieToCrumbs(StringPiece cookie,
std::vector<StringPiece>* out) {
Representations tmp;
......@@ -420,6 +423,27 @@ TEST_F(HpackEncoderTest, CookieToCrumbs) {
EXPECT_THAT(out, ElementsAre("foo", "bar", "baz", "bing", ""));
}
TEST_F(HpackEncoderTest, UpdateCharacterCounts) {
std::vector<size_t> counts(256, 0);
size_t total_counts = 0;
encoder_.SetCharCountsStorage(&counts, &total_counts);
char kTestString[] = "foo\0\1\xff""boo";
peer_.UpdateCharacterCounts(
StringPiece(kTestString, arraysize(kTestString) - 1));
std::vector<size_t> expect(256, 0);
expect[static_cast<uint8>('f')] = 1;
expect[static_cast<uint8>('o')] = 4;
expect[static_cast<uint8>('\0')] = 1;
expect[static_cast<uint8>('\1')] = 1;
expect[static_cast<uint8>('\xff')] = 1;
expect[static_cast<uint8>('b')] = 1;
EXPECT_EQ(expect, counts);
EXPECT_EQ(9u, total_counts);
}
} // namespace
} // namespace net
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "net/spdy/hpack_huffman_aggregator.h"
#include "base/metrics/bucket_ranges.h"
#include "base/metrics/field_trial.h"
#include "base/metrics/histogram.h"
#include "base/metrics/sample_vector.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "net/base/load_flags.h"
#include "net/http/http_request_headers.h"
#include "net/http/http_request_info.h"
#include "net/http/http_response_headers.h"
#include "net/spdy/hpack_encoder.h"
#include "net/spdy/spdy_http_utils.h"
namespace net {
namespace {
const char kHistogramName[] = "Net.SpdyHpackEncodedCharacterFrequency";
const size_t kTotalCountsPublishThreshold = 50000;
// Each encoder uses the default dynamic table size of 4096 total bytes.
const size_t kMaxEncoders = 20;
} // namespace
HpackHuffmanAggregator::HpackHuffmanAggregator()
: counts_(256, 0),
total_counts_(0),
max_encoders_(kMaxEncoders) {
}
HpackHuffmanAggregator::~HpackHuffmanAggregator() {
}
void HpackHuffmanAggregator::AggregateTransactionCharacterCounts(
const HttpRequestInfo& request,
const HttpRequestHeaders& request_headers,
const ProxyServer& proxy,
const HttpResponseHeaders& response_headers) {
if (IsCrossOrigin(request)) {
return;
}
HostPortPair endpoint = HostPortPair(request.url.HostNoBrackets(),
request.url.EffectiveIntPort());
HpackEncoder* encoder = ObtainEncoder(
SpdySessionKey(endpoint, proxy, request.privacy_mode));
// Convert and encode the request and response header sets.
{
SpdyHeaderBlock headers;
CreateSpdyHeadersFromHttpRequest(
request, request_headers, &headers, SPDY4, false);
std::string tmp_out;
encoder->EncodeHeaderSet(headers, &tmp_out);
}
{
SpdyHeaderBlock headers;
CreateSpdyHeadersFromHttpResponse(response_headers, &headers);
std::string tmp_out;
encoder->EncodeHeaderSet(headers, &tmp_out);
}
if (total_counts_ >= kTotalCountsPublishThreshold) {
PublishCounts();
}
}
// static
bool HpackHuffmanAggregator::UseAggregator() {
const std::string group_name =
base::FieldTrialList::FindFullName("HpackHuffmanAggregator");
if (group_name == "Enabled") {
return true;
}
return false;
}
// static
void HpackHuffmanAggregator::CreateSpdyHeadersFromHttpResponse(
const HttpResponseHeaders& headers,
SpdyHeaderBlock* headers_out) {
// Lower-case header names, and coalesce multiple values delimited by \0.
// Also add the fixed status header.
std::string name, value;
void* it = NULL;
while (headers.EnumerateHeaderLines(&it, &name, &value)) {
StringToLowerASCII(&name);
if (headers_out->find(name) == headers_out->end()) {
(*headers_out)[name] = value;
} else {
(*headers_out)[name] += std::string(1, '\0') + value;
}
}
(*headers_out)[":status"] = base::IntToString(headers.response_code());
}
// static
bool HpackHuffmanAggregator::IsCrossOrigin(const HttpRequestInfo& request) {
// Require that the request is top-level, or that it shares
// an origin with its referer.
HostPortPair endpoint = HostPortPair(request.url.HostNoBrackets(),
request.url.EffectiveIntPort());
if ((request.load_flags & LOAD_MAIN_FRAME) == 0) {
std::string referer_str;
if (!request.extra_headers.GetHeader(HttpRequestHeaders::kReferer,
&referer_str)) {
// Require a referer.
return true;
}
GURL referer(referer_str);
HostPortPair referer_endpoint = HostPortPair(referer.HostNoBrackets(),
referer.EffectiveIntPort());
if (!endpoint.Equals(referer_endpoint)) {
// Cross-origin request.
return true;
}
}
return false;
}
HpackEncoder* HpackHuffmanAggregator::ObtainEncoder(const SpdySessionKey& key) {
for (OriginEncoders::iterator it = encoders_.begin();
it != encoders_.end(); ++it) {
if (key.Equals(it->first)) {
// Move to head of list and return.
OriginEncoder origin_encoder = *it;
encoders_.erase(it);
encoders_.push_front(origin_encoder);
return origin_encoder.second;
}
}
// Not found. Create a new encoder, evicting one if needed.
encoders_.push_front(std::make_pair(
key, new HpackEncoder(ObtainHpackHuffmanTable())));
if (encoders_.size() > max_encoders_) {
delete encoders_.back().second;
encoders_.pop_back();
}
encoders_.front().second->SetCharCountsStorage(&counts_, &total_counts_);
return encoders_.front().second;
}
void HpackHuffmanAggregator::PublishCounts() {
// base::Histogram requires that values be 1-indexed.
const size_t kRangeMin = 1;
const size_t kRangeMax = counts_.size() + 1;
const size_t kBucketCount = kRangeMax + 1;
base::BucketRanges ranges(kBucketCount + 1);
for (size_t i = 0; i != ranges.size(); ++i) {
ranges.set_range(i, i);
}
ranges.ResetChecksum();
// Copy |counts_| into a SampleVector.
base::SampleVector samples(&ranges);
for (size_t i = 0; i != counts_.size(); ++i) {
samples.Accumulate(i + 1, counts_[i]);
}
STATIC_HISTOGRAM_POINTER_BLOCK(
kHistogramName,
AddSamples(samples),
base::LinearHistogram::FactoryGet(
kHistogramName, kRangeMin, kRangeMax, kBucketCount,
base::HistogramBase::kUmaTargetedHistogramFlag));
// Clear counts.
counts_.assign(counts_.size(), 0);
total_counts_ = 0;
}
} // namespace net
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <list>
#include <vector>
#include "base/macros.h"
#include "net/base/net_export.h"
#include "net/spdy/spdy_header_block.h"
#include "net/spdy/spdy_protocol.h"
#include "net/spdy/spdy_session_key.h"
namespace net {
class HpackEncoder;
class HttpRequestHeaders;
struct HttpRequestInfo;
class HttpResponseHeaders;
class ProxyServer;
namespace test {
class HpackHuffmanAggregatorPeer;
} // namespace test
class NET_EXPORT_PRIVATE HpackHuffmanAggregator {
public:
friend class test::HpackHuffmanAggregatorPeer;
HpackHuffmanAggregator();
~HpackHuffmanAggregator();
// Encodes the request and response headers of the transaction with an
// HpackEncoder keyed on the transaction's SpdySessionKey. Literal headers
// emitted by that encoder are aggregated into internal character counts,
// which are periodically published to a UMA histogram.
void AggregateTransactionCharacterCounts(
const HttpRequestInfo& request,
const HttpRequestHeaders& request_headers,
const ProxyServer& proxy,
const HttpResponseHeaders& response_headers);
// Returns whether the aggregator is enabled for the session by a field trial.
static bool UseAggregator();
private:
typedef std::pair<SpdySessionKey, HpackEncoder*> OriginEncoder;
typedef std::list<OriginEncoder> OriginEncoders;
// Returns true if the request is considered cross-origin,
// and should not be aggregated.
static bool IsCrossOrigin(const HttpRequestInfo& request);
// Converts |headers| into SPDY headers block |headers_out|.
static void CreateSpdyHeadersFromHttpResponse(
const HttpResponseHeaders& headers,
SpdyHeaderBlock* headers_out);
// Creates or returns an encoder for the origin key.
HpackEncoder* ObtainEncoder(const SpdySessionKey& key);
// Publishes aggregated counts to a UMA histogram.
void PublishCounts();
std::vector<size_t> counts_;
size_t total_counts_;
OriginEncoders encoders_;
size_t max_encoders_;
DISALLOW_COPY_AND_ASSIGN(HpackHuffmanAggregator);
};
} // namespace net
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "net/spdy/hpack_huffman_aggregator.h"
#include "base/metrics/histogram.h"
#include "base/metrics/statistics_recorder.h"
#include "net/base/load_flags.h"
#include "net/http/http_request_headers.h"
#include "net/http/http_request_info.h"
#include "net/http/http_response_headers.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace net {
using ::testing::Each;
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::Pair;
namespace {
const char kHistogramName[] = "Net.SpdyHpackEncodedCharacterFrequency";
} // namespace
namespace test {
class HpackHuffmanAggregatorPeer {
public:
explicit HpackHuffmanAggregatorPeer(HpackHuffmanAggregator* agg)
: agg_(agg) {}
std::vector<size_t>* counts() {
return &agg_->counts_;
}
HpackHuffmanAggregator::OriginEncoders* encoders() {
return &agg_->encoders_;
}
size_t total_counts() {
return agg_->total_counts_;
}
void set_total_counts(size_t total_counts) {
agg_->total_counts_ = total_counts;
}
void set_max_encoders(size_t max_encoders) {
agg_->max_encoders_ = max_encoders;
}
static bool IsCrossOrigin(const HttpRequestInfo& request) {
return HpackHuffmanAggregator::IsCrossOrigin(request);
}
static void CreateSpdyHeadersFromHttpResponse(
const HttpResponseHeaders& headers,
SpdyHeaderBlock* headers_out) {
HpackHuffmanAggregator::CreateSpdyHeadersFromHttpResponse(
headers, headers_out);
}
HpackEncoder* ObtainEncoder(const SpdySessionKey& key) {
return agg_->ObtainEncoder(key);
}
void PublishCounts() {
agg_->PublishCounts();
}
private:
HpackHuffmanAggregator* agg_;
};
} // namespace test
class HpackHuffmanAggregatorTest : public ::testing::Test {
protected:
HpackHuffmanAggregatorTest()
: peer_(&agg_) {}
HpackHuffmanAggregator agg_;
test::HpackHuffmanAggregatorPeer peer_;
};
TEST_F(HpackHuffmanAggregatorTest, CrossOriginDetermination) {
HttpRequestInfo request;
request.url = GURL("https://www.foo.com/a/page");
// Main load without referer.
request.load_flags = LOAD_MAIN_FRAME;
EXPECT_FALSE(peer_.IsCrossOrigin(request));
// Non-main load without referer. Treated as cross-origin.
request.load_flags = 0;
EXPECT_TRUE(peer_.IsCrossOrigin(request));
// Main load with different referer origin.
request.load_flags = LOAD_MAIN_FRAME;
request.extra_headers.SetHeader(HttpRequestHeaders::kReferer,
"https://www.bar.com/other/page");
EXPECT_FALSE(peer_.IsCrossOrigin(request));
// Non-main load with different referer orign.
request.load_flags = 0;
EXPECT_TRUE(peer_.IsCrossOrigin(request));
// Non-main load with same referer orign.
request.extra_headers.SetHeader(HttpRequestHeaders::kReferer,
"https://www.foo.com/other/page");
EXPECT_FALSE(peer_.IsCrossOrigin(request));
// Non-main load with same referer host but different schemes.
request.extra_headers.SetHeader(HttpRequestHeaders::kReferer,
"http://www.foo.com/other/page");
EXPECT_TRUE(peer_.IsCrossOrigin(request));
}
TEST_F(HpackHuffmanAggregatorTest, EncoderLRUQueue) {
peer_.set_max_encoders(2);
SpdySessionKey key1(HostPortPair("one.com", 443), ProxyServer::Direct(),
PRIVACY_MODE_ENABLED);
SpdySessionKey key2(HostPortPair("two.com", 443), ProxyServer::Direct(),
PRIVACY_MODE_ENABLED);
SpdySessionKey key3(HostPortPair("three.com", 443), ProxyServer::Direct(),
PRIVACY_MODE_ENABLED);
// Creates one.com.
HpackEncoder* one = peer_.ObtainEncoder(key1);
EXPECT_EQ(1u, peer_.encoders()->size());
// Creates two.com. No evictions.
HpackEncoder* two = peer_.ObtainEncoder(key2);
EXPECT_EQ(2u, peer_.encoders()->size());
EXPECT_NE(one, two);
// Touch one.com.
EXPECT_EQ(one, peer_.ObtainEncoder(key1));
// Creates three.com. Evicts two.com, as it's least-recently used.
HpackEncoder* three = peer_.ObtainEncoder(key3);
EXPECT_EQ(one, peer_.ObtainEncoder(key1));
EXPECT_NE(one, three);
EXPECT_EQ(2u, peer_.encoders()->size());
}
TEST_F(HpackHuffmanAggregatorTest, PublishCounts) {
(*peer_.counts())[0] = 1;
(*peer_.counts())[255] = 10;
(*peer_.counts())[128] = 101;
peer_.set_total_counts(112);
peer_.PublishCounts();
// Internal counts were reset after being published.
EXPECT_THAT(*peer_.counts(), Each(Eq(0u)));
EXPECT_EQ(0u, peer_.total_counts());
// Verify histogram counts match the expectation.
scoped_ptr<base::HistogramSamples> samples =
base::StatisticsRecorder::FindHistogram(kHistogramName)
->SnapshotSamples();
EXPECT_EQ(0, samples->GetCount(0));
EXPECT_EQ(1, samples->GetCount(1));
EXPECT_EQ(101, samples->GetCount(129));
EXPECT_EQ(10, samples->GetCount(256));
EXPECT_EQ(112, samples->TotalCount());
// Publish a second round of counts;
(*peer_.counts())[1] = 32;
(*peer_.counts())[128] = 5;
peer_.set_total_counts(37);
peer_.PublishCounts();
// Verify they've been aggregated into the previous counts.
samples = base::StatisticsRecorder::FindHistogram(kHistogramName)
->SnapshotSamples();
EXPECT_EQ(0, samples->GetCount(0));
EXPECT_EQ(1, samples->GetCount(1));
EXPECT_EQ(32, samples->GetCount(2));
EXPECT_EQ(106, samples->GetCount(129));
EXPECT_EQ(10, samples->GetCount(256));
EXPECT_EQ(149, samples->TotalCount());
}
TEST_F(HpackHuffmanAggregatorTest, CreateSpdyResponseHeaders) {
char kRawHeaders[] =
"HTTP/1.1 202 Accepted \0"
"Content-TYPE : text/html; charset=utf-8 \0"
"Set-Cookie: foo=bar \0"
"Set-Cookie: baz=bing \0"
"Cache-Control: pragma=no-cache \0"
"Cache-CONTROL: expires=12345 \0\0";
scoped_refptr<HttpResponseHeaders> parsed_headers(new HttpResponseHeaders(
std::string(kRawHeaders, arraysize(kRawHeaders) - 1)));
SpdyHeaderBlock headers;
peer_.CreateSpdyHeadersFromHttpResponse(*parsed_headers, &headers);
EXPECT_THAT(headers, ElementsAre(
Pair(":status", "202"),
Pair("cache-control", std::string("pragma=no-cache\0expires=12345", 29)),
Pair("content-type", "text/html; charset=utf-8"),
Pair("set-cookie", std::string("foo=bar\0baz=bing", 16))));
}
} // namespace net
......@@ -13353,6 +13353,16 @@ Therefore, the affected-histogram name has to have at least one dot in it.
</summary>
</histogram>
<histogram name="Net.SpdyHpackEncodedCharacterFrequency" units="ASCII codes">
<owner>jgraettinger@chromium.org</owner>
<summary>
Frequencies of characters observed in request and response headers.
Temporarily being collected to inform the construction of an optimized
Huffman code for the HTTP/2 specification. Buckets are ASCII codes offset by
1.
</summary>
</histogram>
<histogram name="Net.SpdyIPPoolDomainMatch" enum="SpdyIPPoolDomainMatch"
units="count">
<owner>Please list the metric's owners. Add more owner tags as needed.</owner>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment