Make DecompressionStream error checking stricter

Error the stream if the input to DecompressionStream is truncated, or there is junk after the end of the stream. This will help avoid accidental loss of data. Also add a thorough test of the behaviour when various fields of a compressed stream are corrupted. Also test that errors are correctly issued when the stream is truncated or has trailing junk. BUG=1014422 Change-Id: I8aceeafd279b75d4fea76d7edca19024856417c0 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1928428Reviewed-by: Yutaka Hirano <yhirano@chromium.org> Commit-Queue: Adam Rice <ricea@chromium.org> Cr-Commit-Position: refs/heads/master@{#718543}

Make DecompressionStream error checking stricter
Error the stream if the input to DecompressionStream is truncated, or there is junk after the end of the stream. This will help avoid accidental loss of data. Also add a thorough test of the behaviour when various fields of a compressed stream are corrupted. Also test that errors are correctly issued when the stream is truncated or has trailing junk. BUG=1014422 Change-Id: I8aceeafd279b75d4fea76d7edca19024856417c0 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1928428Reviewed-by: Yutaka Hirano <yhirano@chromium.org> Commit-Queue: Adam Rice <ricea@chromium.org> Cr-Commit-Position: refs/heads/master@{#718543}
38d64f34 · Adam Rice · Commit Bot · a38a2ef2 · 38d64f34 · 38d64f34
Commit 38d64f34 authored Nov 25, 2019 by Adam Rice Committed by Commit Bot Nov 25, 2019
3 changed files
--- a/third_party/blink/renderer/modules/compression/inflate_transformer.cc
+++ b/third_party/blink/renderer/modules/compression/inflate_transformer.cc
@@ -19,6 +19,7 @@
 #include "third_party/blink/renderer/modules/compression/zlib_partition_alloc.h"
 #include "third_party/blink/renderer/platform/bindings/exception_state.h"
 #include "third_party/blink/renderer/platform/bindings/to_v8.h"
+#include "third_party/blink/renderer/platform/wtf/text/wtf_string.h"
 #include "v8/include/v8.h"

 namespace blink {
@@ -85,6 +86,10 @@ ScriptPromise InflateTransformer::Flush(
  was_flush_called_ = true;
  out_buffer_.clear();

+  if (!reached_end_) {
+    exception_state.ThrowTypeError("Compressed input was truncated.");
+  }
+
  return ScriptPromise::CastUndefined(script_state_);
 }

@@ -94,6 +99,13 @@ void InflateTransformer::Inflate(
    IsFinished finished,
    TransformStreamDefaultControllerInterface* controller,
    ExceptionState& exception_state) {
+  if (reached_end_ && length != 0) {
+    // zlib will ignore data after the end of the stream, so we have to
+    // explicitly throw an error.
+    exception_state.ThrowTypeError("Junk found after end of compressed data.");
+    return;
+  }
+
  stream_.avail_in = length;
  // Zlib treats this pointer as const, so this cast is safe.
  stream_.next_in = const_cast<uint8_t*>(start);
@@ -101,9 +113,15 @@ void InflateTransformer::Inflate(
  do {
    stream_.avail_out = out_buffer_.size();
    stream_.next_out = out_buffer_.data();
-    int err = inflate(&stream_, finished ? Z_FINISH : Z_NO_FLUSH);
+    const int err = inflate(&stream_, finished ? Z_FINISH : Z_NO_FLUSH);
    if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) {
-      exception_state.ThrowTypeError("The compressed data was not valid.");
+      DCHECK_NE(err, Z_STREAM_ERROR);
+      if (err == Z_DATA_ERROR) {
+        exception_state.ThrowTypeError(
+            String("The compressed data was not valid: ") + stream_.msg + ".");
+      } else {
+        exception_state.ThrowTypeError("The compressed data was not valid.");
+      }
      return;
    }

@@ -116,6 +134,15 @@ void InflateTransformer::Inflate(
        return;
      }
    }
+
+    if (err == Z_STREAM_END) {
+      reached_end_ = true;
+      if (stream_.next_in < start + length) {
+        exception_state.ThrowTypeError(
+            "Junk found after end of compressed data.");
+      }
+      return;
+    }
  } while (stream_.avail_out == 0);
 }


--- a/third_party/blink/renderer/modules/compression/inflate_transformer.h
+++ b/third_party/blink/renderer/modules/compression/inflate_transformer.h
@@ -48,6 +48,7 @@ class InflateTransformer final : public TransformStreamTransformer {
  Vector<uint8_t> out_buffer_;

  bool was_flush_called_ = false;
+  bool reached_end_ = false;

  // This buffer size has been experimentally verified to be optimal.
  static constexpr wtf_size_t kBufferSize = 65536;

--- a/third_party/blink/web_tests/external/wpt/compression/decompression-corrupt-input.any.js
+++ b/third_party/blink/web_tests/external/wpt/compression/decompression-corrupt-input.any.js
+// META global=worker
+
+// This test checks that DecompressionStream behaves according to the standard
+// when the input is corrupted. To avoid a combinatorial explosion in the
+// number of tests, we only mutate one field at a time, and we only test
+// "interesting" values.
+
+'use strict';
+
+// The many different cases are summarised in this data structure.
+const expectations = [
+  {
+    format: 'deflate',
+
+    // Decompresses to 'expected output'.
+    baseInput: [120, 156, 75, 173, 40, 72, 77, 46, 73, 77, 81, 200, 47, 45, 41,
+                40, 45, 1, 0, 48, 173, 6, 36],
+
+    // See RFC1950 for the definition of the various fields used by deflate:
+    // https://tools.ietf.org/html/rfc1950.
+    fields: [
+      {
+        // The function of this field. This matches the name used in the RFC.
+        name: 'CMF',
+
+        // The offset of the field in bytes from the start of the input.
+        offset: 0,
+
+        // The length of the field in bytes.
+        length: 1,
+
+        cases: [
+          {
+            // The value to set the field to. If the field contains multiple
+            // bytes, all the bytes will be set to this value.
+            value: 0,
+
+            // The expected result. 'success' means the input is decoded
+            // successfully. 'error' means that the stream will be errored.
+            result: 'error'
+          }
+        ]
+      },
+      {
+        name: 'FLG',
+        offset: 1,
+        length: 1,
+
+        // FLG contains a 4-bit checksum (FCHECK) which is calculated in such a
+        // way that there are 4 valid values for this field.
+        cases: [
+          {
+            value: 218,
+            result: 'success'
+          },
+          {
+            value: 1,
+            result: 'success'
+          },
+          {
+            value: 94,
+            result: 'success'
+          },
+          {
+            // The remaining 252 values cause an error.
+            value: 157,
+            result: 'error'
+          }
+        ]
+      },
+      {
+        name: 'DATA',
+        // In general, changing any bit of the data will trigger a checksum
+        // error. Only the last byte does anything else.
+        offset: 18,
+        length: 1,
+        cases: [
+          {
+            value: 4,
+            result: 'success'
+          },
+          {
+            value: 5,
+            result: 'error'
+          }
+        ]
+      },
+      {
+        name: 'ADLER',
+        offset: -4,
+        length: 4,
+        cases: [
+          {
+            value: 255,
+            result: 'error'
+          }
+        ]
+      }
+    ]
+  },
+  {
+    format: 'gzip',
+
+    // Decompresses to 'expected output'.
+    baseInput: [31, 139, 8, 0, 0, 0, 0, 0, 0, 3, 75, 173, 40, 72, 77, 46, 73,
+                77, 81, 200, 47, 45, 41, 40, 45, 1, 0, 176, 1, 57, 179, 15, 0,
+                0, 0],
+
+    // See RFC1952 for the definition of the various fields used by gzip:
+    // https://tools.ietf.org/html/rfc1952.
+    fields: [
+      {
+        name: 'ID',
+        offset: 0,
+        length: 2,
+        cases: [
+          {
+            value: 255,
+            result: 'error'
+          }
+        ]
+      },
+      {
+        name: 'CM',
+        offset: 2,
+        length: 1,
+        cases: [
+          {
+            value: 0,
+            result: 'error'
+          }
+        ]
+      },
+      {
+        name: 'FLG',
+        offset: 3,
+        length: 1,
+        cases: [
+          {
+            value: 1, // FTEXT
+            result: 'success'
+          },
+          {
+            value: 2, // FHCRC
+            result: 'error'
+          }
+        ]
+      },
+      {
+        name: 'MTIME',
+        offset: 4,
+        length: 4,
+        cases: [
+          {
+            // Any value is valid for this field.
+            value: 255,
+            result: 'success'
+          }
+        ]
+      },
+      {
+        name: 'XFL',
+        offset: 8,
+        length: 1,
+        cases: [
+          {
+            // Any value is accepted.
+            value: 255,
+            result: 'success'
+          }
+        ]
+      },
+      {
+        name: 'OS',
+        offset: 9,
+        length: 1,
+        cases: [
+          {
+            // Any value is accepted.
+            value: 128,
+            result: 'success'
+          }
+        ]
+      },
+      {
+        name: 'DATA',
+
+        // The last byte of the data is the most interesting.
+        offset: 26,
+        length: 1,
+        cases: [
+          {
+            value: 3,
+            result: 'error'
+          },
+          {
+            value: 4,
+            result: 'success'
+          }
+        ]
+      },
+      {
+        name: 'CRC',
+        offset: -8,
+        length: 4,
+        cases: [
+          {
+            // Any change will error the stream.
+            value: 0,
+            result: 'error'
+          }
+        ]
+      },
+      {
+        name: 'ISIZE',
+        offset: -4,
+        length: 4,
+        cases: [
+          {
+            // A mismatch will error the stream.
+            value: 1,
+            result: 'error'
+          }
+        ]
+      }
+    ]
+  }
+];
+
+async function tryDecompress(input, format) {
+  const ds = new DecompressionStream(format);
+  const reader = ds.readable.getReader();
+  const writer = ds.writable.getWriter();
+  writer.write(input).catch(() => {});
+  writer.close().catch(() => {});
+  let out = [];
+  while (true) {
+    try {
+      const { value, done } = await reader.read();
+      if (done) {
+        break;
+      }
+      out = out.concat(Array.from(value));
+    } catch (e) {
+      return { result: 'error' };
+    }
+  }
+  const expectedOutput = 'expected output';
+  if (out.length !== expectedOutput.length) {
+    return { result: 'corrupt' };
+  }
+  for (let i = 0; i < out.length; ++i) {
+    if (out[i] !== expectedOutput.charCodeAt(i)) {
+      return { result: 'corrupt' };
+    }
+  }
+  return { result: 'success' };
+}
+
+function corruptInput(input, offset, length, value) {
+  const output = new Uint8Array(input);
+  if (offset < 0) {
+    offset += input.length;
+  }
+  for (let i = offset; i < offset + length; ++i) {
+    output[i] = value;
+  }
+  return output;
+}
+
+for (const { format, baseInput, fields } of expectations) {
+  promise_test(async () => {
+    const { result } = await tryDecompress(new Uint8Array(baseInput), format);
+    assert_equals(result, 'success', 'decompression should succeed');
+  }, `the unchanged input for '${format}' should decompress successfully`);
+
+  promise_test(async () => {
+    const truncatedInput = new Uint8Array(baseInput.slice(0, -1));
+    const { result } = await tryDecompress(truncatedInput, format);
+    assert_equals(result, 'error', 'decompression should fail');
+  }, `truncating the input for '${format}' should give an error`);
+
+  promise_test(async () => {
+    const extendedInput = new Uint8Array(baseInput.concat([0]));
+    const { result } = await tryDecompress(extendedInput, format);
+    assert_equals(result, 'error', 'decompression should fail');
+  }, `trailing junk for '${format}' should give an error`);
+
+  for (const { name, offset, length, cases } of fields) {
+    for (const { value, result } of cases) {
+      promise_test(async () => {
+        const corruptedInput = corruptInput(baseInput, offset, length, value);
+        const { result: actual } =
+              await tryDecompress(corruptedInput, format);
+        assert_equals(actual, result, 'result should match');
+      }, `format '${format}' field ${name} should be ${result} for ${value}`);
+    }
+  }
+}