Commit 36baddf2 authored by Vaclav Brozek's avatar Vaclav Brozek Committed by Commit Bot

Introduce DataAccessor (FormData parser fuzzing support)

This is part of the effort to introduce a fuzzer for code parsing
FormData into PasswordForms.

As described in the design linked from https://crbug.com/827945#c2, in
order to create FormData from an arbitrary input string supplied by the
fuzzer framework, a DataAccessor class should be created to wrap turning
the input string into bits, numbers and strings.

This CL adds DataAccessor, including tests (because the contained logic
is not straightforward).

Bug: 827945
Change-Id: Ib7b2fe54d74bc096afa7e8bc8ff72ab68a8c5977
Reviewed-on: https://chromium-review.googlesource.com/992312
Commit-Queue: Vaclav Brozek <vabr@chromium.org>
Reviewed-by: default avatarVasilii Sukhanov <vasilii@chromium.org>
Cr-Commit-Position: refs/heads/master@{#548772}
parent c9d03f9e
......@@ -396,6 +396,7 @@ source_set("unit_tests") {
"//components/autofill/core/common",
"//components/os_crypt:test_support",
"//components/password_manager/core/browser:proto",
"//components/password_manager/core/browser/form_parsing/fuzzer:unit_tests",
"//components/password_manager/core/common",
"//components/prefs:test_support",
"//components/security_state/core",
......
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
static_library("fuzzer_support") {
sources = [
"data_accessor.cc",
"data_accessor.h",
]
deps = [
"//base",
]
}
source_set("unit_tests") {
testonly = true
sources = [
"data_accessor_unittest.cc",
]
deps = [
":fuzzer_support",
"//base",
"//testing/gtest",
]
}
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/password_manager/core/browser/form_parsing/fuzzer/data_accessor.h"
#include <string.h>
#include <algorithm>
#include <bitset>
#include "base/logging.h"
namespace password_manager {
namespace {
// The maximum byte length of a string to be returned by |ConsumeString*|.
constexpr size_t kMaxStringBytes = 254;
} // namespace
DataAccessor::DataAccessor(const uint8_t* data, size_t size)
: data_(data), bits_consumed_(0), size_(size) {
DCHECK(data_ || size_ == 0); // Enforce the first invariant for data members.
}
DataAccessor::~DataAccessor() = default;
bool DataAccessor::ConsumeBit() {
return ConsumeNumber(1) != 0;
}
size_t DataAccessor::ConsumeNumber(size_t bit_length) {
CHECK_LE(bit_length, sizeof(size_t) * 8);
// Fast track.
if (bit_length == 0)
return 0;
// No genuine input bits left, return padding.
if (size_ == 0)
return 0;
// Compute the number recursively, processing one byte from |data_| at a time.
std::bitset<8> b(*data_);
if (bits_consumed_ + bit_length < 8) { // Base case: all within |*data_|.
// Shift the |bit_length|-sized interesting window up and down to discard
// uninteresting bits. An alternative approach would be:
// b << bit_length; // Discard consumed bits.
// b &= std::bitset<8>((1 << bit_length) - 1); // Discard the tail.
// But the shifting below avoids the construction of the temproary bitset.
b <<= (8 - bits_consumed_ - bit_length);
b >>= (8 - bit_length);
bits_consumed_ += bit_length;
return b.to_ulong();
}
// Recursive case: crossing the byte boundary in |data_|.
const size_t original_bits_consumed = bits_consumed_;
bit_length -= (8 - bits_consumed_);
bits_consumed_ = 0;
++data_;
--size_;
return (b.to_ulong() | (ConsumeNumber(bit_length) << 8)) >>
original_bits_consumed;
}
void DataAccessor::ConsumeBytesToBuffer(size_t length, uint8_t* string_buffer) {
// First of all, align to a whole byte for efficiency.
if (size_ > 0 && bits_consumed_ != 0) {
bits_consumed_ = 0;
++data_;
--size_;
}
size_t non_padded_length = std::min(length, size_);
std::memcpy(string_buffer, data_, non_padded_length);
if (non_padded_length != length) {
// Pad with zeroes as needed.
std::memset(string_buffer + non_padded_length, 0,
length - non_padded_length);
// The rest of the input string was not enough, so now it's certainly
// depleted.
size_ = 0;
} else {
// There was either more of the input string than needed, or just exactly
// enough bytes of it. Either way, the update below reflects the new
// situation.
size_ -= length;
data_ += length;
}
}
std::string DataAccessor::ConsumeString(size_t length) {
CHECK_LE(length, kMaxStringBytes);
uint8_t string_buffer[kMaxStringBytes];
ConsumeBytesToBuffer(length, string_buffer);
return std::string(reinterpret_cast<const char*>(string_buffer), length);
}
base::string16 DataAccessor::ConsumeString16(size_t length) {
CHECK_LE(2 * length, kMaxStringBytes);
uint8_t string_buffer[kMaxStringBytes];
ConsumeBytesToBuffer(2 * length, string_buffer);
return base::string16(
reinterpret_cast<base::string16::value_type*>(string_buffer), length);
}
} // namespace password_manager
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_PASSWORD_MANAGER_CORE_BROWSER_FORM_PARSING_FUZZER_DATA_ACCESSOR_H_
#define COMPONENTS_PASSWORD_MANAGER_CORE_BROWSER_FORM_PARSING_FUZZER_DATA_ACCESSOR_H_
#include <stddef.h>
#include <stdint.h>
#include <string>
#include "base/macros.h"
#include "base/strings/string16.h"
namespace password_manager {
// DataAccessor is an encapsulation over the input string delivered by the
// fuzzer framework. It pads the input string with zeroes after its end and
// deliveres data based on the consumed string. More details in the design doc
// linked from https://crbug.com/827945#c2.
class DataAccessor {
public:
// Wraps the input string of length |size| at address |data|. Does not own the
// input string. It sets the "reading head" to the start of the string and
// advances it with each Consume* operation to avoid generating data from the
// same part of input twice.
DataAccessor(const uint8_t* data, size_t size);
~DataAccessor();
// Return the next bit and advance the "reading head" by one bit.
bool ConsumeBit();
// Return the number stored on the next |bit_length| bits and advance the
// "reading head" by |bit_length| bits.
size_t ConsumeNumber(size_t bit_length);
// Advance the "reading head" to the next whole-byte boundary, if needed, then
// return the string stored in the next |length| characters, advancing the
// "reading head" to point past the read data. A "character" means byte for
// std::string and two bytes for base::string16. At most 254 bytes can be
// consumed at once, hence |length| is restricted as noted below.
std::string ConsumeString(size_t length); // |length| <= 254
base::string16 ConsumeString16(size_t length); // |length| <= 127
private:
// Helper for |ConsumeString*|. It combines the |data_| and padding, if
// needed, into |string_buffer|, to provide |length| bytes for creating a new
// string. It also updates |data_|, |bits_consumed_| and |size_| accordingly.
void ConsumeBytesToBuffer(size_t length, uint8_t* string_buffer);
// The remaining portion of the input string (without padding) starts at
// |data_| without the least significant |bits_consumed_| bits, and lasts
// until |data_ + size_|, exclusively. If |size_| is 0, then there is not a
// single bit left and all available is just the 0-padding.
// Invariants:
// * |data_| is not null as long as |size_| > 0
// * |bits_consumed_| < 8
// * if |size_| == 0 then |bits_consumed_| == 0
const uint8_t* data_;
size_t bits_consumed_;
size_t size_;
DISALLOW_COPY_AND_ASSIGN(DataAccessor);
};
} // namespace password_manager
#endif // COMPONENTS_PASSWORD_MANAGER_CORE_BROWSER_FORM_PARSING_FUZZER_DATA_ACCESSOR_H_
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/password_manager/core/browser/form_parsing/fuzzer/data_accessor.h"
#include "base/strings/string16.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
using base::UTF8ToUTF16;
namespace password_manager {
namespace {
TEST(DataAccessorTest, NullInput) {
DataAccessor accessor(nullptr, 0);
EXPECT_EQ(0u, accessor.ConsumeNumber(13));
EXPECT_EQ(false, accessor.ConsumeBit());
EXPECT_EQ(std::string("\0\0\0", 3), accessor.ConsumeString(3));
EXPECT_EQ(base::string16(), accessor.ConsumeString16(0));
}
TEST(DataAccessorTest, Bit) {
const uint8_t x = 0b10110001;
DataAccessor accessor(&x, 1);
EXPECT_EQ(true, accessor.ConsumeBit());
EXPECT_EQ(false, accessor.ConsumeBit());
EXPECT_EQ(false, accessor.ConsumeBit());
EXPECT_EQ(false, accessor.ConsumeBit());
EXPECT_EQ(true, accessor.ConsumeBit());
EXPECT_EQ(true, accessor.ConsumeBit());
EXPECT_EQ(false, accessor.ConsumeBit());
EXPECT_EQ(true, accessor.ConsumeBit());
}
TEST(DataAccessorTest, Number) {
const uint8_t xs[] = {0b01100110, 0b11100110};
DataAccessor accessor(xs, sizeof(xs));
accessor.ConsumeBit(); // Just skip the first bit for fun.
EXPECT_EQ(0b011u, accessor.ConsumeNumber(3));
EXPECT_EQ(0b0u, accessor.ConsumeNumber(1));
EXPECT_EQ(0b11u, accessor.ConsumeNumber(2));
// 10 (2nd byte) ++ 0 (1st byte):
EXPECT_EQ(0b100u, accessor.ConsumeNumber(3));
EXPECT_EQ(0u, accessor.ConsumeNumber(0)); // An empty string represents 0.
EXPECT_EQ(0b11001u, accessor.ConsumeNumber(5));
EXPECT_EQ(0b01u, accessor.ConsumeNumber(2)); // 1, also reaching padding
EXPECT_EQ(0b0000000u, accessor.ConsumeNumber(7)); // padding
}
TEST(DataAccessorTest, String) {
const std::string str = "Test string 123.";
DataAccessor accessor(reinterpret_cast<const uint8_t*>(str.c_str()),
str.size());
EXPECT_EQ("Test", accessor.ConsumeString(4));
accessor.ConsumeNumber(3); // Skip 3 bits to test re-alignment.
EXPECT_EQ("string 123", accessor.ConsumeString(10));
EXPECT_EQ(std::string(), accessor.ConsumeString(0));
// Test also that padding is included.
EXPECT_EQ(std::string(".\0\0", 3), accessor.ConsumeString(3));
}
TEST(DataAccessorTest, String16) {
const base::string16 str = UTF8ToUTF16("Test string 123.");
DataAccessor accessor(reinterpret_cast<const uint8_t*>(str.c_str()),
str.size() * 2);
EXPECT_EQ(UTF8ToUTF16("Test"), accessor.ConsumeString16(4));
accessor.ConsumeNumber(13); // Skip 13 bits to test re-alignment.
EXPECT_EQ(UTF8ToUTF16("string 123"), accessor.ConsumeString16(10));
EXPECT_EQ(base::string16(), accessor.ConsumeString16(0));
// Test also that padding is included.
EXPECT_EQ(UTF8ToUTF16(std::string(".\0\0", 3)), accessor.ConsumeString16(3));
}
TEST(DataAccessorTest, Mix) {
const uint8_t xs[] = {'a', 'b', 0b11100101, 5, 9,
0b10000001, 'c', 'd', 'e', 0};
DataAccessor accessor(xs, sizeof(xs));
EXPECT_EQ("ab", accessor.ConsumeString(2));
EXPECT_EQ(true, accessor.ConsumeBit());
EXPECT_EQ(0b1110010u, accessor.ConsumeNumber(7));
EXPECT_EQ(5u, accessor.ConsumeNumber(8));
EXPECT_EQ(9u + (1u << 8), accessor.ConsumeNumber(9));
EXPECT_EQ(false, accessor.ConsumeBit());
EXPECT_EQ("cd", accessor.ConsumeString(2));
EXPECT_EQ(UTF8ToUTF16("e"), accessor.ConsumeString16(1));
}
} // namespace
} // namespace password_manager
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment