Commit 89be8ae4 authored by xunjieli's avatar xunjieli Committed by Commit bot

Add net::GzipSourceStream

This CL adds a net::GzipSourceStream which implements
net::FilterSourceStream to do gzip decoding.

This is a part of the efforts to convert net::Filter
into a pull-based interface. See the linked bug for
more details.

BUG=474859

Review-Url: https://codereview.chromium.org/2334773002
Cr-Commit-Position: refs/heads/master@{#421314}
parent fb4e32fc
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "net/filter/gzip_source_stream.h"
#include "base/bind.h"
#include "base/bit_cast.h"
#include "base/logging.h"
#include "net/base/io_buffer.h"
#include "third_party/zlib/zlib.h"
namespace net {
namespace {
const char kDeflate[] = "DEFLATE";
const char kGzip[] = "GZIP";
const char kGzipFallback[] = "GZIP_FALLBACK";
} // namespace
GzipSourceStream::~GzipSourceStream() {
if (zlib_stream_)
inflateEnd(zlib_stream_.get());
}
std::unique_ptr<GzipSourceStream> GzipSourceStream::Create(
std::unique_ptr<SourceStream> upstream,
SourceStream::SourceType type) {
std::unique_ptr<GzipSourceStream> source(
new GzipSourceStream(std::move(upstream), type));
if (!source->Init())
return nullptr;
return source;
}
GzipSourceStream::GzipSourceStream(std::unique_ptr<SourceStream> upstream,
SourceStream::SourceType type)
: FilterSourceStream(type, std::move(upstream)),
zlib_header_added_(false),
gzip_footer_bytes_left_(0),
input_state_(STATE_START) {}
bool GzipSourceStream::Init() {
zlib_stream_.reset(new z_stream);
if (!zlib_stream_)
return false;
memset(zlib_stream_.get(), 0, sizeof(z_stream));
int ret;
if (type() == TYPE_GZIP || type() == TYPE_GZIP_FALLBACK) {
ret = inflateInit2(zlib_stream_.get(), -MAX_WBITS);
} else {
ret = inflateInit(zlib_stream_.get());
}
DCHECK_NE(Z_VERSION_ERROR, ret);
return ret == Z_OK;
}
std::string GzipSourceStream::GetTypeAsString() const {
switch (type()) {
case TYPE_GZIP:
return kGzip;
case TYPE_GZIP_FALLBACK:
return kGzipFallback;
case TYPE_DEFLATE:
return kDeflate;
default:
NOTREACHED();
return "";
}
}
int GzipSourceStream::FilterData(IOBuffer* output_buffer,
int output_buffer_size,
IOBuffer* input_buffer,
int input_buffer_size,
int* consumed_bytes,
bool /*upstream_end_reached*/) {
*consumed_bytes = 0;
char* input_data = input_buffer->data();
int input_data_size = input_buffer_size;
int bytes_out = 0;
bool state_compressed_entered = false;
while (input_data_size > 0) {
InputState state = input_state_;
switch (state) {
case STATE_START: {
if (type() == TYPE_DEFLATE) {
input_state_ = STATE_COMPRESSED_BODY;
break;
}
// If this stream is not really gzipped as detected by
// ShouldFallbackToPlain, pretend that the zlib stream has ended.
DCHECK_LT(0, input_data_size);
if (ShouldFallbackToPlain(input_data[0])) {
input_state_ = STATE_UNCOMPRESSED_BODY;
} else {
input_state_ = STATE_GZIP_HEADER;
}
break;
}
case STATE_GZIP_HEADER: {
const size_t kGzipFooterBytes = 8;
const char* end = nullptr;
GZipHeader::Status status =
gzip_header_.ReadMore(input_data, input_data_size, &end);
if (status == GZipHeader::INCOMPLETE_HEADER) {
input_data += input_data_size;
input_data_size = 0;
} else if (status == GZipHeader::COMPLETE_HEADER) {
// If there is a valid header, there should also be a valid footer.
gzip_footer_bytes_left_ = kGzipFooterBytes;
int bytes_consumed = end - input_data;
input_data += bytes_consumed;
input_data_size -= bytes_consumed;
input_state_ = STATE_COMPRESSED_BODY;
} else if (status == GZipHeader::INVALID_HEADER) {
return ERR_CONTENT_DECODING_FAILED;
}
break;
}
case STATE_COMPRESSED_BODY: {
DCHECK(!state_compressed_entered);
DCHECK_LE(0, input_data_size);
state_compressed_entered = true;
zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data);
zlib_stream_.get()->avail_in = input_data_size;
zlib_stream_.get()->next_out = bit_cast<Bytef*>(output_buffer->data());
zlib_stream_.get()->avail_out = output_buffer_size;
int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
// Sometimes misconfigured servers omit the zlib header, relying on
// clients to splice it back in.
if (ret < 0 && !zlib_header_added_) {
zlib_header_added_ = true;
if (!InsertZlibHeader())
return ERR_CONTENT_DECODING_FAILED;
zlib_stream_.get()->next_in = bit_cast<Bytef*>(input_data);
zlib_stream_.get()->avail_in = input_data_size;
zlib_stream_.get()->next_out =
bit_cast<Bytef*>(output_buffer->data());
zlib_stream_.get()->avail_out = output_buffer_size;
ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
// TODO(xunjieli): add a histogram to see how often this happens. The
// original bug for this behavior was ancient and maybe it doesn't
// happen in the wild any more? crbug.com/649339
}
if (ret != Z_STREAM_END && ret != Z_OK)
return ERR_CONTENT_DECODING_FAILED;
int bytes_used = input_data_size - zlib_stream_.get()->avail_in;
bytes_out = output_buffer_size - zlib_stream_.get()->avail_out;
input_data_size -= bytes_used;
input_data += bytes_used;
if (ret == Z_STREAM_END) {
input_state_ = STATE_GZIP_FOOTER;
break;
}
// Return early here since zlib has written as much data to
// |output_buffer| as it could. There might still be some unconsumed
// data in |input_buffer| if there is no space in |output_buffer|.
DCHECK_EQ(Z_OK, ret);
*consumed_bytes = input_buffer_size - input_data_size;
return bytes_out;
}
case STATE_GZIP_FOOTER: {
size_t to_read = std::min(gzip_footer_bytes_left_,
base::checked_cast<size_t>(input_data_size));
gzip_footer_bytes_left_ -= to_read;
input_data_size -= to_read;
input_data += to_read;
if (gzip_footer_bytes_left_ == 0)
input_state_ = STATE_UNCOMPRESSED_BODY;
break;
}
case STATE_UNCOMPRESSED_BODY: {
int to_copy = std::min(input_data_size, output_buffer_size - bytes_out);
memcpy(output_buffer->data() + bytes_out, input_data, to_copy);
input_data_size -= to_copy;
input_data += to_copy;
bytes_out += to_copy;
break;
}
}
}
*consumed_bytes = input_buffer_size - input_data_size;
return bytes_out;
}
bool GzipSourceStream::InsertZlibHeader() {
char dummy_header[] = {0x78, 0x01};
char dummy_output[4];
inflateReset(zlib_stream_.get());
zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_header[0]);
zlib_stream_.get()->avail_in = sizeof(dummy_header);
zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]);
zlib_stream_.get()->avail_out = sizeof(dummy_output);
int ret = inflate(zlib_stream_.get(), Z_NO_FLUSH);
return ret == Z_OK;
}
// Dumb heuristic. Gzip files always start with a two-byte magic value per RFC
// 1952 2.3.1, so if the first byte isn't the first byte of the gzip magic, and
// this filter is checking whether it should fallback, then fallback.
bool GzipSourceStream::ShouldFallbackToPlain(char first_byte) {
if (type() != TYPE_GZIP_FALLBACK)
return false;
static const char kGzipFirstByte = 0x1f;
return first_byte != kGzipFirstByte;
}
} // namespace net
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef NET_FILTER_GZIP_SOURCE_STREAM_H_
#define NET_FILTER_GZIP_SOURCE_STREAM_H_
#include <memory>
#include "base/macros.h"
#include "base/memory/ref_counted.h"
#include "net/base/io_buffer.h"
#include "net/base/net_export.h"
#include "net/filter/filter_source_stream.h"
#include "net/filter/gzip_header.h"
typedef struct z_stream_s z_stream;
namespace net {
class IOBuffer;
// GZipSourceStream applies gzip and deflate content encoding/decoding to a data
// stream. As specified by HTTP 1.1, with gzip encoding the content is
// wrapped with a gzip header, and with deflate encoding the content is in
// a raw, headerless DEFLATE stream.
//
// Internally GZipSourceStream uses zlib inflate to do decoding.
//
class NET_EXPORT_PRIVATE GzipSourceStream : public FilterSourceStream {
public:
~GzipSourceStream() override;
// Creates a GzipSourceStream. Return nullptr if initialization fails.
static std::unique_ptr<GzipSourceStream> Create(
std::unique_ptr<SourceStream> previous,
SourceStream::SourceType type);
private:
enum InputState {
// Starts processing the input stream. Checks whether the stream is valid
// and whether a fallback to plain data is needed.
STATE_START,
// Gzip header of the input stream is being processed.
STATE_GZIP_HEADER,
// The input stream is being decoded.
STATE_COMPRESSED_BODY,
// Gzip footer of the input stream is being processed.
STATE_GZIP_FOOTER,
// The input stream is being passed through undecoded.
STATE_UNCOMPRESSED_BODY,
};
GzipSourceStream(std::unique_ptr<SourceStream> previous,
SourceStream::SourceType type);
// Returns true if initialization is successful, false otherwise.
// For instance, this method returns false if there is not enough memory or
// if there is a version mismatch.
bool Init();
// SourceStream implementation
std::string GetTypeAsString() const override;
int FilterData(IOBuffer* output_buffer,
int output_buffer_size,
IOBuffer* input_buffer,
int input_buffer_size,
int* consumed_bytes,
bool upstream_end_reached) override;
// Inserts a zlib header to the data stream before calling zlib inflate.
// This is used to work around server bugs. The function returns true on
// success.
bool InsertZlibHeader();
// Returns whether this stream looks like it could be plain text (ie, not
// actually gzipped). Right now this uses an extremely simple heuristic; see
// the source for details. This method checks the first byte of the stream.
bool ShouldFallbackToPlain(char first_byte);
// The control block of zlib which actually does the decoding.
// This data structure is initialized by Init and updated only by
// FilterData(), with InsertZlibHeader() being the exception as a workaround.
std::unique_ptr<z_stream> zlib_stream_;
// A flag used by FilterData() to record whether we've successfully added
// a zlib header to this stream.
bool zlib_header_added_;
// Used to parse the gzip header in gzip stream.
// It is used when the decoding mode is GZIP_SOURCE_STREAM_GZIP.
GZipHeader gzip_header_;
// Tracks how many bytes of gzip footer are yet to be filtered.
size_t gzip_footer_bytes_left_;
// Tracks the state of the input stream.
InputState input_state_;
DISALLOW_COPY_AND_ASSIGN(GzipSourceStream);
};
} // namespace net
#endif // NET_FILTER_GZIP_SOURCE_STREAM_H__
This diff is collapsed.
...@@ -646,6 +646,8 @@ ...@@ -646,6 +646,8 @@
'filter/gzip_filter.h', 'filter/gzip_filter.h',
'filter/gzip_header.cc', 'filter/gzip_header.cc',
'filter/gzip_header.h', 'filter/gzip_header.h',
'filter/gzip_source_stream.cc',
'filter/gzip_source_stream.h',
'filter/sdch_filter.cc', 'filter/sdch_filter.cc',
'filter/sdch_filter.h', 'filter/sdch_filter.h',
'filter/source_stream.cc', 'filter/source_stream.cc',
...@@ -1515,6 +1517,7 @@ ...@@ -1515,6 +1517,7 @@
'filter/filter_source_stream_unittest.cc', 'filter/filter_source_stream_unittest.cc',
'filter/filter_unittest.cc', 'filter/filter_unittest.cc',
'filter/gzip_filter_unittest.cc', 'filter/gzip_filter_unittest.cc',
'filter/gzip_source_stream_unittest.cc',
'filter/mock_filter_context.cc', 'filter/mock_filter_context.cc',
'filter/mock_filter_context.h', 'filter/mock_filter_context.h',
'filter/mock_source_stream.cc', 'filter/mock_source_stream.cc',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment