Commit 9067d0cb authored by Doug Arnett's avatar Doug Arnett Committed by Commit Bot

Adds BloomFilter implementation for large scale blacklist support

This is needed to support a server provided blacklist (via Cacao) for
HTTPS Server Previews. Details are specified in
  https://docs.google.com/document/d/13Pj85rq3aUfVxxMe4Dad9uqz54n0GgD8iPqUowdsD-c/edit?usp=sharing

Bug: 864640
Change-Id: Ie9b4a56dd0edc97df48f2d6e066ad2a0b1104458
Reviewed-on: https://chromium-review.googlesource.com/1207574
Commit-Queue: Doug Arnett <dougarnett@chromium.org>
Reviewed-by: default avatarJohn Abd-El-Malek <jam@chromium.org>
Reviewed-by: default avatarRyan Sturm <ryansturm@chromium.org>
Cr-Commit-Position: refs/heads/master@{#589216}
parent ca672c42
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
static_library("core") { static_library("core") {
sources = [ sources = [
"bloom_filter.cc",
"bloom_filter.h",
"previews_black_list.cc", "previews_black_list.cc",
"previews_black_list.h", "previews_black_list.h",
"previews_decider.h", "previews_decider.h",
...@@ -26,6 +28,7 @@ static_library("core") { ...@@ -26,6 +28,7 @@ static_library("core") {
"//components/variations", "//components/variations",
"//net:net", "//net:net",
"//third_party/re2", "//third_party/re2",
"//third_party/smhasher:murmurhash3",
"//url:url", "//url:url",
] ]
} }
...@@ -51,6 +54,7 @@ static_library("test_support") { ...@@ -51,6 +54,7 @@ static_library("test_support") {
source_set("unit_tests") { source_set("unit_tests") {
testonly = true testonly = true
sources = [ sources = [
"bloom_filter_unittest.cc",
"previews_black_list_unittest.cc", "previews_black_list_unittest.cc",
"previews_experiments_unittest.cc", "previews_experiments_unittest.cc",
"previews_logger_unittest.cc", "previews_logger_unittest.cc",
......
...@@ -2,5 +2,6 @@ include_rules = [ ...@@ -2,5 +2,6 @@ include_rules = [
"+components/blacklist/opt_out_blacklist", "+components/blacklist/opt_out_blacklist",
"+components/variations", "+components/variations",
"+net", "+net",
"+third_party/re2" "+third_party/re2",
"+third_party/smhasher"
] ]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/previews/core/bloom_filter.h"
#include <stddef.h>
#include <stdint.h>
#include "base/logging.h"
#include "third_party/smhasher/src/MurmurHash3.h"
namespace previews {
namespace {
uint64_t MurmurHash3(const std::string& str, uint32_t seed) {
// Uses MurmurHash3 in coordination with server as it is a fast hashing
// function with compatible public client and private server implementations.
// DO NOT CHANGE this hashing function without coordination and migration
// plan with the server.
uint64_t output[2];
MurmurHash3_x64_128(str.data(), str.size(), seed, &output);
// Drop the last 64 bits.
return output[0];
}
} // namespace
BloomFilter::BloomFilter(uint32_t num_bits,
ByteVector filter_data,
uint32_t num_hash_functions)
: num_bits_(num_bits),
bytes_(filter_data),
num_hash_functions_(num_hash_functions) {
CHECK_GE(filter_data.size() * 8, num_bits);
}
BloomFilter::~BloomFilter() {}
bool BloomFilter::Contains(const std::string& str) const {
for (size_t i = 0; i < num_hash_functions_; ++i) {
uint64_t n = MurmurHash3(str, i) % num_bits_;
uint32_t byte_index = (n / 8);
uint32_t bit_index = n % 8;
if ((bytes_[byte_index] & (1 << bit_index)) == 0)
return false;
}
return true;
}
void BloomFilter::Add(const std::string& str) {
for (size_t i = 0; i < num_hash_functions_; ++i) {
uint64_t n = MurmurHash3(str, i) % num_bits_;
uint32_t byte_index = (n / 8);
uint32_t bit_index = n % 8;
bytes_[byte_index] |= 1 << bit_index;
}
}
} // namespace previews
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_PREVIEWS_CORE_BLOOM_FILTER_H_
#define COMPONENTS_PREVIEWS_CORE_BLOOM_FILTER_H_
#include <stdint.h>
#include <string>
#include <vector>
#include "base/macros.h"
namespace previews {
// A vector of bytes (or 8-bit integers).
typedef std::vector<uint8_t> ByteVector;
// BloomFilter is a simple Bloom filter for keeping track of a set of strings.
class BloomFilter {
public:
// Constructs a Bloom filter of |num_bits| size with data initialized from
// the |filter_data| byte vector and using |num_hash_functions| per entry.
BloomFilter(uint32_t num_bits,
ByteVector filter_data,
uint32_t num_hash_functions);
~BloomFilter();
// Returns whether this Bloom filter contains |str|.
bool Contains(const std::string& str) const;
// Adds |str| to this Bloom filter.
void Add(const std::string& str);
// Returns the bit array data of this Bloom filter as vector of bytes.
const ByteVector& bytes() const { return bytes_; };
private:
// Number of bits in the filter.
uint32_t num_bits_;
// Byte data for the filter.
ByteVector bytes_;
// Number of bits to set for each added string.
uint32_t num_hash_functions_;
DISALLOW_COPY_AND_ASSIGN(BloomFilter);
};
} // namespace previews
#endif // COMPONENTS_PREVIEWS_CORE_BLOOM_FILTER_H_
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/previews/core/bloom_filter.h"
#include <stdint.h>
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace previews {
namespace {
int CountBits(const ByteVector& vector) {
int bit_count = 0;
for (size_t i = 0; i < vector.size(); ++i) {
uint8_t byte = vector[i];
for (int j = 0; j < 8; ++j) {
if (byte & (1 << j))
bit_count++;
}
}
return bit_count;
}
} // namespace
TEST(BloomFilterTest, SingleHash) {
ByteVector data(2, 0);
BloomFilter filter(16 /* num_bits */, data, 1 /* num_hash_functions */);
EXPECT_EQ(2u, filter.bytes().size());
EXPECT_EQ(0, CountBits(filter.bytes()));
EXPECT_FALSE(filter.Contains("Alfa"));
EXPECT_FALSE(filter.Contains("Bravo"));
EXPECT_FALSE(filter.Contains("Charlie"));
filter.Add("Alfa");
EXPECT_EQ(1, CountBits(filter.bytes()));
EXPECT_TRUE(filter.Contains("Alfa"));
EXPECT_FALSE(filter.Contains("Bravo"));
EXPECT_FALSE(filter.Contains("Charlie"));
filter.Add("Bravo");
filter.Add("Chuck");
EXPECT_EQ(3, CountBits(filter.bytes()));
EXPECT_TRUE(filter.Contains("Alfa"));
EXPECT_TRUE(filter.Contains("Bravo"));
EXPECT_FALSE(filter.Contains("Charlie"));
}
TEST(BloomFilterTest, FalsePositivesWithSingleBitFilterCollisions) {
ByteVector data(1, 0);
BloomFilter filter(1 /* num_bits */, data, 1 /* num_hash_functions */);
EXPECT_FALSE(filter.Contains("Alfa"));
EXPECT_FALSE(filter.Contains("Bravo"));
EXPECT_FALSE(filter.Contains("Charlie"));
filter.Add("Alfa");
EXPECT_TRUE(filter.Contains("Alfa"));
EXPECT_TRUE(filter.Contains("Bravo"));
EXPECT_TRUE(filter.Contains("Charlie"));
}
TEST(BloomFilterTest, MultiHash) {
ByteVector data(10, 0);
BloomFilter filter(75 /* num_bits */, data, 3 /* num_hash_functions */);
EXPECT_EQ(10u, filter.bytes().size());
EXPECT_EQ(0, CountBits(filter.bytes()));
EXPECT_FALSE(filter.Contains("Alfa"));
EXPECT_FALSE(filter.Contains("Bravo"));
EXPECT_FALSE(filter.Contains("Charlie"));
filter.Add("Alfa");
EXPECT_EQ(3, CountBits(filter.bytes()));
EXPECT_TRUE(filter.Contains("Alfa"));
EXPECT_FALSE(filter.Contains("Bravo"));
EXPECT_FALSE(filter.Contains("Charlie"));
filter.Add("Bravo");
filter.Add("Chuck");
EXPECT_EQ(9, CountBits(filter.bytes()));
EXPECT_TRUE(filter.Contains("Alfa"));
EXPECT_TRUE(filter.Contains("Bravo"));
EXPECT_FALSE(filter.Contains("Charlie"));
}
TEST(BloomFilterTest, EverythingMatches) {
// Set all bits ON in byte vector.
ByteVector data(1024, 0xff);
BloomFilter filter(8191 /* num_bits */, data, 7 /* num_hash_functions */);
EXPECT_TRUE(filter.Contains("Alfa"));
EXPECT_TRUE(filter.Contains("Bravo"));
EXPECT_TRUE(filter.Contains("Charlie"));
EXPECT_TRUE(filter.Contains("Delta"));
EXPECT_TRUE(filter.Contains("Echo"));
}
#if !defined(OS_IOS)
TEST(BloomFilterTest, ByteVectorTooSmall) {
ByteVector data(1023, 0xff);
EXPECT_DEATH({ BloomFilter filter(8191 /* num_bits */, data, 7); },
"Check failed");
}
#endif
} // namespace previews
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment