Commit e179e7d1 authored by holte@chromium.org's avatar holte@chromium.org

Randomly select a single rappor sample when more than one is collected.

BUG=400553

Review URL: https://codereview.chromium.org/419683014

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@287698 0039d316-1c4b-4281-b951-d872f2087c98
parent 5b0baa74
......@@ -20,7 +20,10 @@ BloomFilter::BloomFilter(uint32_t bytes_size,
BloomFilter::~BloomFilter() {}
void BloomFilter::AddString(const std::string& str) {
void BloomFilter::SetString(const std::string& str) {
for (size_t i = 0; i < bytes_.size(); ++i) {
bytes_[i] = 0;
}
for (size_t i = 0; i < hash_function_count_; ++i) {
// Using CityHash here because we have support for it in Dremel. Many hash
// functions, such as MD5, SHA1, or Murmur, would probably also work.
......@@ -33,4 +36,11 @@ void BloomFilter::AddString(const std::string& str) {
}
}
void BloomFilter::SetBytesForTesting(const ByteVector& bytes) {
DCHECK_EQ(bytes_.size(), bytes.size());
for (size_t i = 0; i < bytes_.size(); ++i) {
bytes_[i] = bytes[i];
}
}
} // namespace rappor
......@@ -25,12 +25,15 @@ class BloomFilter {
uint32_t hash_seed_offset);
~BloomFilter();
// Add a single string to the Bloom filter.
void AddString(const std::string& str);
// Sets the Bloom filter bits to contain a single string.
void SetString(const std::string& str);
// Returns the current value of the Bloom filter's bit array.
const ByteVector& bytes() const { return bytes_; };
// Sets bytes for testing purposes.
void SetBytesForTesting(const ByteVector& bytes);
private:
// Stores the byte array of the Bloom filter.
ByteVector bytes_;
......
......@@ -17,21 +17,21 @@ TEST(BloomFilterTest, TinyFilter) {
EXPECT_EQ(0x00, filter.bytes()[0]);
// "Test" has a self-collision, and only sets 3 bits.
filter.AddString("Test");
filter.SetString("Test");
EXPECT_EQ(0x2a, filter.bytes()[0]);
// Adding the same value shouldn't change anything.
filter.AddString("Test");
// Setting the same value shouldn't change anything.
filter.SetString("Test");
EXPECT_EQ(0x2a, filter.bytes()[0]);
BloomFilter filter2(1u, 4u, 0u);
EXPECT_EQ(0x00, filter2.bytes()[0]);
filter2.AddString("Bar");
filter2.SetString("Bar");
EXPECT_EQ(0xa8, filter2.bytes()[0]);
// Adding a colliding string should just set new bits.
filter.AddString("Bar");
EXPECT_EQ(0xaa, filter.bytes()[0]);
// The new string should replace the old one.
filter.SetString("Bar");
EXPECT_EQ(0xa8, filter.bytes()[0]);
}
TEST(BloomFilterTest, HugeFilter) {
......@@ -43,11 +43,11 @@ TEST(BloomFilterTest, HugeFilter) {
EXPECT_EQ(500u, filter.bytes().size());
EXPECT_EQ(0, CountBits(filter.bytes()));
filter.AddString("Bar");
filter.SetString("Bar");
EXPECT_EQ(1, CountBits(filter.bytes()));
// Adding the same value shouldn't change anything.
filter.AddString("Bar");
filter.SetString("Bar");
EXPECT_EQ(1, CountBits(filter.bytes()));
}
......
......@@ -5,6 +5,7 @@
#include "components/rappor/rappor_metric.h"
#include "base/logging.h"
#include "base/rand_util.h"
namespace rappor {
......@@ -13,6 +14,7 @@ RapporMetric::RapporMetric(const std::string& metric_name,
int32_t cohort_seed)
: metric_name_(metric_name),
parameters_(parameters),
sample_count_(0),
bloom_filter_(parameters.bloom_filter_size_bytes,
parameters.bloom_filter_hash_function_count,
(cohort_seed % parameters.num_cohorts) *
......@@ -24,7 +26,12 @@ RapporMetric::RapporMetric(const std::string& metric_name,
RapporMetric::~RapporMetric() {}
void RapporMetric::AddSample(const std::string& str) {
bloom_filter_.AddString(str);
++sample_count_;
// Replace the previous sample with a 1 in sample_count_ chance so that each
// sample has equal probability of being reported.
if (base::RandGenerator(sample_count_) == 0) {
bloom_filter_.SetString(str);
}
}
ByteVector RapporMetric::GetReport(const std::string& secret) const {
......@@ -59,4 +66,8 @@ ByteVector RapporMetric::GetReport(const std::string& secret) const {
return *ByteVectorMerge(*fake_and_redacted_bits, zero_coins, &one_coins);
}
void RapporMetric::SetBytesForTesting(const ByteVector& bytes) {
bloom_filter_.SetBytesForTesting(bytes);
}
} // namespace rappor
......@@ -35,6 +35,8 @@ class RapporMetric {
~RapporMetric();
// Records an additional sample in the Bloom filter.
// A random sample will be used when reporting this metric when more than one
// sample is collected in the same reporting interval.
void AddSample(const std::string& str);
// Retrieves the current Bloom filter bits.
......@@ -48,9 +50,13 @@ class RapporMetric {
// final report bits.
ByteVector GetReport(const std::string& secret) const;
// Specify the bytes to generate a report from, for testing purposes.
void SetBytesForTesting(const ByteVector& bytes);
private:
const std::string metric_name_;
const RapporParameters parameters_;
uint32_t sample_count_;
BloomFilter bloom_filter_;
DISALLOW_COPY_AND_ASSIGN(RapporMetric);
......
......@@ -33,7 +33,6 @@ const RapporParameters kTestStatsRapporParameters = {
// Check for basic syntax and use.
TEST(RapporMetricTest, BasicMetric) {
RapporMetric testMetric("MyRappor", kTestRapporParameters, 0);
testMetric.AddSample("Foo");
testMetric.AddSample("Bar");
EXPECT_EQ(0x80, testMetric.bytes()[1]);
}
......@@ -49,10 +48,12 @@ TEST(RapporMetricTest, GetReport) {
TEST(RapporMetricTest, GetReportStatistics) {
RapporMetric metric("MyStatsRappor", kTestStatsRapporParameters, 0);
for (char i = 0; i < 50; i++) {
metric.AddSample(base::StringPrintf("%d", i));
ByteVector real_bits(kTestStatsRapporParameters.bloom_filter_size_bytes);
// Set 152 bits (19 bytes)
for (char i = 0; i < 19; i++) {
real_bits[i] = 0xff;
}
const ByteVector real_bits = metric.bytes();
metric.SetBytesForTesting(real_bits);
const int real_bit_count = CountBits(real_bits);
EXPECT_EQ(real_bit_count, 152);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment