Commit 7c0efea0 authored by Paul Miller's avatar Paul Miller Committed by Commit Bot

Implement NormalizedMurmurHashEntropyProvider

Design document:
https://docs.google.com/document/d/1cPF5PruriWNP2Z5gSkq4MBTm0wSZqLyIJkUO9ekibeo

BUG=890413

Change-Id: Ib372a573b1a0f68467f785ce74ef7821c9d48614
Reviewed-on: https://chromium-review.googlesource.com/c/1322350Reviewed-by: default avatarGrace Kloba <klobag@chromium.org>
Reviewed-by: default avatarAlexei Svitkine <asvitkine@chromium.org>
Commit-Queue: Paul Miller <paulmiller@chromium.org>
Cr-Commit-Position: refs/heads/master@{#607816}
parent 9165404a
...@@ -58,6 +58,8 @@ static_library("variations") { ...@@ -58,6 +58,8 @@ static_library("variations") {
"variations_http_header_provider.h", "variations_http_header_provider.h",
"variations_id_collection.cc", "variations_id_collection.cc",
"variations_id_collection.h", "variations_id_collection.h",
"variations_murmur_hash.cc",
"variations_murmur_hash.h",
"variations_request_scheduler.cc", "variations_request_scheduler.cc",
"variations_request_scheduler.h", "variations_request_scheduler.h",
"variations_seed_processor.cc", "variations_seed_processor.cc",
...@@ -147,6 +149,7 @@ source_set("unit_tests") { ...@@ -147,6 +149,7 @@ source_set("unit_tests") {
"variations_crash_keys_unittest.cc", "variations_crash_keys_unittest.cc",
"variations_http_header_provider_unittest.cc", "variations_http_header_provider_unittest.cc",
"variations_id_collection_unittest.cc", "variations_id_collection_unittest.cc",
"variations_murmur_hash_unittest.cc",
"variations_request_scheduler_unittest.cc", "variations_request_scheduler_unittest.cc",
"variations_seed_processor_unittest.cc", "variations_seed_processor_unittest.cc",
"variations_seed_simulator_unittest.cc", "variations_seed_simulator_unittest.cc",
...@@ -167,6 +170,7 @@ source_set("unit_tests") { ...@@ -167,6 +170,7 @@ source_set("unit_tests") {
"//components/variations/field_trial_config:field_trial_config", "//components/variations/field_trial_config:field_trial_config",
"//testing/gmock", "//testing/gmock",
"//testing/gtest", "//testing/gtest",
"//third_party/smhasher:murmurhash3",
"//third_party/zlib/google:compression_utils", "//third_party/zlib/google:compression_utils",
] ]
} }
......
...@@ -9,5 +9,6 @@ include_rules = [ ...@@ -9,5 +9,6 @@ include_rules = [
"+crypto", "+crypto",
"-net", "-net",
"+third_party/protobuf", "+third_party/protobuf",
"+third_party/smhasher",
"+third_party/zlib/google", "+third_party/zlib/google",
] ]
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "base/strings/string_number_conversions.h" #include "base/strings/string_number_conversions.h"
#include "base/sys_byteorder.h" #include "base/sys_byteorder.h"
#include "components/variations/hashing.h" #include "components/variations/hashing.h"
#include "components/variations/variations_murmur_hash.h"
namespace variations { namespace variations {
...@@ -132,4 +133,41 @@ uint16_t PermutedEntropyProvider::GetPermutedValue( ...@@ -132,4 +133,41 @@ uint16_t PermutedEntropyProvider::GetPermutedValue(
return mapping[low_entropy_source_]; return mapping[low_entropy_source_];
} }
NormalizedMurmurHashEntropyProvider::NormalizedMurmurHashEntropyProvider(
uint16_t low_entropy_source,
size_t low_entropy_source_max)
: low_entropy_source_(low_entropy_source),
low_entropy_source_max_(low_entropy_source_max) {
DCHECK_LT(low_entropy_source, low_entropy_source_max);
DCHECK_LE(low_entropy_source_max, std::numeric_limits<uint16_t>::max());
}
NormalizedMurmurHashEntropyProvider::~NormalizedMurmurHashEntropyProvider() {}
double NormalizedMurmurHashEntropyProvider::GetEntropyForTrial(
const std::string& trial_name,
uint32_t randomization_seed) const {
if (randomization_seed == 0) {
randomization_seed = internal::VariationsMurmurHash::Hash(
internal::VariationsMurmurHash::StringToLE32(trial_name),
trial_name.length());
}
uint32_t x = internal::VariationsMurmurHash::Hash16(randomization_seed,
low_entropy_source_);
int x_ordinal = 0;
for (uint32_t i = 0; i < low_entropy_source_max_; i++) {
uint32_t y = internal::VariationsMurmurHash::Hash16(randomization_seed, i);
x_ordinal += (y < x);
}
DCHECK_GE(x_ordinal, 0);
// There must have been at least one iteration where |x| == |y|, because
// |i| == |low_entropy_source_|, and |x_ordinal| was not incremented in that
// iteration, so |x_ordinal| < |low_entropy_source_max_|.
DCHECK_LT(static_cast<size_t>(x_ordinal), low_entropy_source_max_);
return static_cast<double>(x_ordinal) / low_entropy_source_max_;
}
} // namespace variations } // namespace variations
...@@ -58,7 +58,7 @@ class SHA1EntropyProvider : public base::FieldTrial::EntropyProvider { ...@@ -58,7 +58,7 @@ class SHA1EntropyProvider : public base::FieldTrial::EntropyProvider {
uint32_t randomization_seed) const override; uint32_t randomization_seed) const override;
private: private:
std::string entropy_source_; const std::string entropy_source_;
DISALLOW_COPY_AND_ASSIGN(SHA1EntropyProvider); DISALLOW_COPY_AND_ASSIGN(SHA1EntropyProvider);
}; };
...@@ -86,12 +86,36 @@ class PermutedEntropyProvider : public base::FieldTrial::EntropyProvider { ...@@ -86,12 +86,36 @@ class PermutedEntropyProvider : public base::FieldTrial::EntropyProvider {
virtual uint16_t GetPermutedValue(uint32_t randomization_seed) const; virtual uint16_t GetPermutedValue(uint32_t randomization_seed) const;
private: private:
uint16_t low_entropy_source_; const uint16_t low_entropy_source_;
size_t low_entropy_source_max_; const size_t low_entropy_source_max_;
DISALLOW_COPY_AND_ASSIGN(PermutedEntropyProvider); DISALLOW_COPY_AND_ASSIGN(PermutedEntropyProvider);
}; };
// NormalizedMurmurHashEntropyProvider is an entropy provider suitable for low
// entropy sources (below 16 bits). It uses MurmurHash3_32 to hash the study
// name along with all possible low entropy sources. It finds the index where
// the actual low entropy source's hash would fall in the sorted list of all
// those hashes, and uses that as the final value. For more info, see:
// https://docs.google.com/document/d/1cPF5PruriWNP2Z5gSkq4MBTm0wSZqLyIJkUO9ekibeo
class NormalizedMurmurHashEntropyProvider
: public base::FieldTrial::EntropyProvider {
public:
NormalizedMurmurHashEntropyProvider(uint16_t low_entropy_source,
size_t low_entropy_source_max);
~NormalizedMurmurHashEntropyProvider() override;
// base::FieldTrial::EntropyProvider:
double GetEntropyForTrial(const std::string& trial_name,
uint32_t randomization_seed) const override;
private:
const uint16_t low_entropy_source_;
const size_t low_entropy_source_max_;
DISALLOW_COPY_AND_ASSIGN(NormalizedMurmurHashEntropyProvider);
};
} // namespace variations } // namespace variations
#endif // COMPONENTS_VARIATIONS_ENTROPY_PROVIDER_H_ #endif // COMPONENTS_VARIATIONS_ENTROPY_PROVIDER_H_
...@@ -64,6 +64,15 @@ double GeneratePermutedEntropy(uint16_t entropy_source, ...@@ -64,6 +64,15 @@ double GeneratePermutedEntropy(uint16_t entropy_source,
return permuted_provider.GetEntropyForTrial(trial_name, 0); return permuted_provider.GetEntropyForTrial(trial_name, 0);
} }
// Generates normalized MurmurHash-based entropy for the given |trial_name|
// based on |entropy_source| which must be in the range [0, entropy_max).
double GenerateNormalizedMurmurHashEntropy(uint16_t entropy_source,
size_t entropy_max,
const std::string& trial_name) {
NormalizedMurmurHashEntropyProvider provider(entropy_source, entropy_max);
return provider.GetEntropyForTrial(trial_name, 0);
}
// Make a vector of consecutive integers for shuffling. // Make a vector of consecutive integers for shuffling.
std::vector<uint16_t> MakeRange(size_t vector_size) { std::vector<uint16_t> MakeRange(size_t vector_size) {
std::vector<uint16_t> range(vector_size); std::vector<uint16_t> range(vector_size);
...@@ -105,7 +114,7 @@ class SHA1EntropyGenerator : public TrialEntropyGenerator { ...@@ -105,7 +114,7 @@ class SHA1EntropyGenerator : public TrialEntropyGenerator {
} }
private: private:
std::string trial_name_; const std::string trial_name_;
DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator); DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
}; };
...@@ -139,6 +148,28 @@ class PermutedEntropyGenerator : public TrialEntropyGenerator { ...@@ -139,6 +148,28 @@ class PermutedEntropyGenerator : public TrialEntropyGenerator {
DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator); DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
}; };
// An TrialEntropyGenerator that uses the normalized MurmurHash entropy provider
// algorithm, using 13-bit low entropy source values.
class NormalizedMurmurHashEntropyGenerator : public TrialEntropyGenerator {
public:
explicit NormalizedMurmurHashEntropyGenerator(const std::string& trial_name)
: trial_name_(trial_name) {}
~NormalizedMurmurHashEntropyGenerator() override {}
double GenerateEntropyValue() const override {
const int low_entropy_source =
static_cast<uint16_t>(base::RandInt(0, kMaxLowEntropySize - 1));
return GenerateNormalizedMurmurHashEntropy(low_entropy_source,
kMaxLowEntropySize, trial_name_);
}
private:
const std::string trial_name_;
DISALLOW_COPY_AND_ASSIGN(NormalizedMurmurHashEntropyGenerator);
};
// Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness // Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
// of Fit Test. // of Fit Test.
void PerformEntropyUniformityTest( void PerformEntropyUniformityTest(
...@@ -197,8 +228,8 @@ TEST(EntropyProviderTest, UseOneTimeRandomizationSHA1) { ...@@ -197,8 +228,8 @@ TEST(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
// that have different names, normally generate different results. // that have different names, normally generate different results.
// //
// Note that depending on the one-time random initialization, they // Note that depending on the one-time random initialization, they
// _might_ actually give the same result, but we know that given // _might_ actually give the same result, but we know that given the
// the particular client_id we use for unit tests they won't. // particular client_id we use for unit tests they won't.
base::FieldTrialList field_trial_list( base::FieldTrialList field_trial_list(
std::make_unique<SHA1EntropyProvider>("client_id")); std::make_unique<SHA1EntropyProvider>("client_id"));
const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear; const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
...@@ -227,8 +258,8 @@ TEST(EntropyProviderTest, UseOneTimeRandomizationPermuted) { ...@@ -227,8 +258,8 @@ TEST(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
// that have different names, normally generate different results. // that have different names, normally generate different results.
// //
// Note that depending on the one-time random initialization, they // Note that depending on the one-time random initialization, they
// _might_ actually give the same result, but we know that given // _might_ actually give the same result, but we know that given the
// the particular client_id we use for unit tests they won't. // particular low_entropy_source we use for unit tests they won't.
base::FieldTrialList field_trial_list( base::FieldTrialList field_trial_list(
std::make_unique<PermutedEntropyProvider>(1234, kMaxLowEntropySize)); std::make_unique<PermutedEntropyProvider>(1234, kMaxLowEntropySize));
const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear; const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
...@@ -252,6 +283,37 @@ TEST(EntropyProviderTest, UseOneTimeRandomizationPermuted) { ...@@ -252,6 +283,37 @@ TEST(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
EXPECT_NE(trials[0]->group_name(), trials[1]->group_name()); EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
} }
TEST(EntropyProviderTest, UseOneTimeRandomizationNormalizedMurmurHash) {
// Simply asserts that two trials using one-time randomization
// that have different names, normally generate different results.
//
// Note that depending on the one-time random initialization, they
// _might_ actually give the same result, but we know that given
// the particular low_entropy_source we use for unit tests they won't.
base::FieldTrialList field_trial_list(
std::make_unique<NormalizedMurmurHashEntropyProvider>(
1234, kMaxLowEntropySize));
const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
scoped_refptr<base::FieldTrial> trials[] = {
base::FieldTrialList::FactoryGetFieldTrial(
"one", 100, "default", kNoExpirationYear, 1, 1,
base::FieldTrial::ONE_TIME_RANDOMIZED, nullptr),
base::FieldTrialList::FactoryGetFieldTrial(
"two", 100, "default", kNoExpirationYear, 1, 1,
base::FieldTrial::ONE_TIME_RANDOMIZED, nullptr),
};
for (size_t i = 0; i < base::size(trials); ++i) {
for (int j = 0; j < 100; ++j)
trials[i]->AppendGroup(std::string(), 1);
}
// The trials are most likely to give different results since they have
// different names.
EXPECT_NE(trials[0]->group(), trials[1]->group());
EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
}
TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) { TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) {
// Ensures that two trials with different names but the same custom seed used // Ensures that two trials with different names but the same custom seed used
// for one time randomization produce the same group assignments. // for one time randomization produce the same group assignments.
...@@ -306,6 +368,35 @@ TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedSHA1) { ...@@ -306,6 +368,35 @@ TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedSHA1) {
EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name()); EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
} }
TEST(EntropyProviderTest,
UseOneTimeRandomizationWithCustomSeedNormalizedMurmurHash) {
// Ensures that two trials with different names but the same custom seed used
// for one time randomization produce the same group assignments.
base::FieldTrialList field_trial_list(
std::make_unique<NormalizedMurmurHashEntropyProvider>(
1234, kMaxLowEntropySize));
const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear;
const uint32_t kCustomSeed = 9001;
scoped_refptr<base::FieldTrial> trials[] = {
base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
"one", 100, "default", kNoExpirationYear, 1, 1,
base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, nullptr, nullptr),
base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed(
"two", 100, "default", kNoExpirationYear, 1, 1,
base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, nullptr, nullptr),
};
for (size_t i = 0; i < base::size(trials); ++i) {
for (int j = 0; j < 100; ++j)
trials[i]->AppendGroup(std::string(), 1);
}
// Normally, these trials should produce different groups, but if the same
// custom seed is used, they should produce the same group assignment.
EXPECT_EQ(trials[0]->group(), trials[1]->group());
EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name());
}
TEST(EntropyProviderTest, SHA1Entropy) { TEST(EntropyProviderTest, SHA1Entropy) {
const double results[] = { GenerateSHA1Entropy("hi", "1"), const double results[] = { GenerateSHA1Entropy("hi", "1"),
GenerateSHA1Entropy("there", "1") }; GenerateSHA1Entropy("there", "1") };
...@@ -339,6 +430,25 @@ TEST(EntropyProviderTest, PermutedEntropy) { ...@@ -339,6 +430,25 @@ TEST(EntropyProviderTest, PermutedEntropy) {
GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else")); GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
} }
TEST(EntropyProviderTest, NormalizedMurmurHashEntropy) {
const double results[] = {
GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"),
GenerateNormalizedMurmurHashEntropy(4321, kMaxLowEntropySize, "1")};
EXPECT_NE(results[0], results[1]);
for (size_t i = 0; i < base::size(results); ++i) {
EXPECT_LE(0.0, results[i]);
EXPECT_GT(1.0, results[i]);
}
EXPECT_EQ(GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"),
GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "1"));
EXPECT_NE(GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize,
"something"),
GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize,
"else"));
}
TEST(EntropyProviderTest, PermutedEntropyProviderResults) { TEST(EntropyProviderTest, PermutedEntropyProviderResults) {
// Verifies that PermutedEntropyProvider produces expected results. This // Verifies that PermutedEntropyProvider produces expected results. This
// ensures that the results are the same between platforms and ensures that // ensures that the results are the same between platforms and ensures that
...@@ -352,6 +462,23 @@ TEST(EntropyProviderTest, PermutedEntropyProviderResults) { ...@@ -352,6 +462,23 @@ TEST(EntropyProviderTest, PermutedEntropyProviderResults) {
GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo")); GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
} }
TEST(EntropyProviderTest, NormalizedMurmurHashEntropyProviderResults) {
// Verifies that NormalizedMurmurHashEntropyProvider produces expected
// results. This ensures that the results are the same between platforms and
// ensures that changes to the implementation do not regress this
// accidentally.
EXPECT_DOUBLE_EQ(
1612 / static_cast<double>(kMaxLowEntropySize),
GenerateNormalizedMurmurHashEntropy(1234, kMaxLowEntropySize, "XYZ"));
EXPECT_DOUBLE_EQ(
7066 / static_cast<double>(kMaxLowEntropySize),
GenerateNormalizedMurmurHashEntropy(1, kMaxLowEntropySize, "Test"));
EXPECT_DOUBLE_EQ(
5668 / static_cast<double>(kMaxLowEntropySize),
GenerateNormalizedMurmurHashEntropy(5000, kMaxLowEntropySize, "Foo"));
}
TEST(EntropyProviderTest, SHA1EntropyIsUniform) { TEST(EntropyProviderTest, SHA1EntropyIsUniform) {
for (size_t i = 0; i < base::size(kTestTrialNames); ++i) { for (size_t i = 0; i < base::size(kTestTrialNames); ++i) {
SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]); SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
...@@ -366,6 +493,13 @@ TEST(EntropyProviderTest, PermutedEntropyIsUniform) { ...@@ -366,6 +493,13 @@ TEST(EntropyProviderTest, PermutedEntropyIsUniform) {
} }
} }
TEST(EntropyProviderTest, NormalizedMurmurHashEntropyIsUniform) {
for (size_t i = 0; i < base::size(kTestTrialNames); ++i) {
NormalizedMurmurHashEntropyGenerator entropy_generator(kTestTrialNames[i]);
PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
}
}
TEST(EntropyProviderTest, PermutedEntropyConsistency) { TEST(EntropyProviderTest, PermutedEntropyConsistency) {
std::vector<uint16_t> to_shuffle = MakeRange(10); std::vector<uint16_t> to_shuffle = MakeRange(10);
std::vector<uint16_t> expected = {7, 6, 8, 3, 2, 0, 1, 4, 9, 5}; std::vector<uint16_t> expected = {7, 6, 8, 3, 2, 0, 1, 4, 9, 5};
......
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/variations/variations_murmur_hash.h"
#include "base/compiler_specific.h"
#include "base/sys_byteorder.h"
#include "build/build_config.h"
#if !(defined(ARCH_CPU_LITTLE_ENDIAN) || defined(ARCH_CPU_BIG_ENDIAN))
#error "unknown endianness"
#endif
namespace variations {
namespace internal {
// static
std::vector<uint32_t> VariationsMurmurHash::StringToLE32(
const std::string& data) {
const size_t data_size = data.size();
const size_t word_num = (data_size + 3) / 4; // data_size / 4, rounding up
std::vector<uint32_t> words(word_num, 0);
DCHECK_GE(words.size() * sizeof(uint32_t), data_size * sizeof(char));
memcpy(words.data(), data.data(), data_size);
#if defined(ARCH_CPU_BIG_ENDIAN)
// When packing chars into uint32_t, "abcd" may become 0x61626364 (big endian)
// or 0x64636261 (little endian). If big endian, swap everything, so we get
// the same values across platforms.
for (auto it = words.begin(); it != words.end(); ++it)
*it = base::ByteSwapToLE32(*it);
#endif // defined(ARCH_CPU_BIG_ENDIAN)
return words;
}
// static
uint32_t VariationsMurmurHash::Hash(const std::vector<uint32_t>& data,
size_t length) {
DCHECK_LE(length, data.size() * sizeof(uint32_t));
uint32_t h1 = 0;
// body
size_t num_full_blocks = length / sizeof(uint32_t);
for (size_t i = 0; i < num_full_blocks; i++) {
uint32_t k1 = data[i];
k1 *= c1;
k1 = RotateLeft(k1, 15);
k1 *= c2;
h1 ^= k1;
h1 = RotateLeft(h1, 13);
h1 = h1 * 5 + 0xe6546b64;
}
// tail
uint32_t k1 = 0;
switch (length & 3) {
case 3:
k1 |= data[num_full_blocks] & 0xFF0000;
FALLTHROUGH;
case 2:
k1 |= data[num_full_blocks] & 0xFF00;
FALLTHROUGH;
case 1:
k1 |= data[num_full_blocks] & 0xFF;
}
k1 *= c1;
k1 = RotateLeft(k1, 15);
k1 *= c2;
h1 ^= k1;
// finalization
h1 ^= length;
h1 = FinalMix(h1);
return h1;
}
} // namespace internal
} // namespace variations
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_
#define COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_
#include <cstdint>
#include <string>
#include <vector>
#include "base/compiler_specific.h"
namespace variations {
namespace internal {
// Hash utilities for NormalizedMurmurHashEntropyProvider. For more info, see:
// https://docs.google.com/document/d/1cPF5PruriWNP2Z5gSkq4MBTm0wSZqLyIJkUO9ekibeo
class VariationsMurmurHash {
public:
// Prepares data to be hashed by VariationsMurmurHash: align and zero-pad to a
// multiple of 4 bytes, and produce the same uint32_t values regardless of
// platform endianness. ("abcd" will always become 0x64636261). Any padding
// will appear in the more-significant bytes of the last uint32_t.
static std::vector<uint32_t> StringToLE32(const std::string& data);
// Hash is a reimplementation of MurmurHash3_x86_32 from third_party/smhasher/
// which works on all architectures. MurmurHash3_x86_32 does unaligned reads
// (not generally safe on ARM) if the input bytes start on an unaligned
// address, and it assumes little-endianness. Hash produces the same result
// for the same input uint32_t values, regardless of platform endianness, and
// it produces the same results that MurmurHash3_x86_32 would produce on a
// little-endian platform.
//
// |length| is the number of bytes to hash. It mustn't exceed
// padded_data.size() * 4. If length % 4 != 0, Hash will consume the
// less-significant bytes of the last uint32_t first.
//
// MurmurHash3_x86_32 takes a seed, for which 0 is the typical value. Hash
// hard-codes the seed to 0, since NormalizedMurmurHashEntropyProvider doesn't
// use it.
static uint32_t Hash(const std::vector<uint32_t>& data, size_t length);
// A version of Hash which is specialized for exactly 2 bytes of data and
// allows a nonzero seed. NormalizedMurmurHashEntropyProvider calls this in a
// loop, |kMaxLowEntropySize| times per study, so it must be fast.
ALWAYS_INLINE static uint32_t Hash16(uint32_t seed, uint16_t data) {
uint32_t h1 = seed, k1 = data;
// tail
k1 *= c1;
k1 = RotateLeft(k1, 15);
k1 *= c2;
h1 ^= k1;
// finalization
h1 ^= 2;
h1 = FinalMix(h1);
return h1;
}
private:
static const uint32_t c1 = 0xcc9e2d51;
static const uint32_t c2 = 0x1b873593;
ALWAYS_INLINE static uint32_t RotateLeft(uint32_t x, int n) {
return (x << n) | (x >> (32 - n));
}
ALWAYS_INLINE static uint32_t FinalMix(uint32_t h) {
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
};
} // namespace internal
} // namespace variations
#endif // COMPONENTS_VARIATIONS_VARIATIONS_MURMUR_HASH_H_
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/variations/variations_murmur_hash.h"
#include <limits>
#include <vector>
#include "build/build_config.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/smhasher/src/MurmurHash3.h"
namespace variations {
namespace internal {
TEST(VariationsMurmurHashTest, StringToLE32) {
EXPECT_EQ(std::vector<uint32_t>(),
VariationsMurmurHash::StringToLE32(""));
EXPECT_EQ(std::vector<uint32_t>({0x00000061}),
VariationsMurmurHash::StringToLE32("a"));
EXPECT_EQ(std::vector<uint32_t>({0x00006261}),
VariationsMurmurHash::StringToLE32("ab"));
EXPECT_EQ(std::vector<uint32_t>({0x00636261}),
VariationsMurmurHash::StringToLE32("abc"));
EXPECT_EQ(std::vector<uint32_t>({0x64636261}),
VariationsMurmurHash::StringToLE32("abcd"));
EXPECT_EQ(std::vector<uint32_t>({0x64636261, 0x00000065}),
VariationsMurmurHash::StringToLE32("abcde"));
EXPECT_EQ(std::vector<uint32_t>({0x64636261, 0x00006665}),
VariationsMurmurHash::StringToLE32("abcdef"));
}
// The tests inside this #if compare VariationsMurmurHash to the reference
// implementation, MurmurHash3_x86_32, which only works on little-endian.
#if defined(ARCH_CPU_LITTLE_ENDIAN)
// Compare VariationsMurmurHash::Hash to MurmurHash3_x86_32 for every prefix of
// |data|, from the empty string to all of |data|.
TEST(VariationsMurmurHashTest, Hash) {
// Random bytes generated manually and hard-coded for reproducability
const std::vector<uint32_t> data({
2704264845, 2929902289, 1679431515, 1427187834, 1300338468,
576307953, 1209988079, 1918627109, 3926412991, 74087765});
size_t max_size = data.size() * sizeof(uint32_t);
for (size_t size = 0; size <= max_size; size++) {
uint32_t expected;
MurmurHash3_x86_32(data.data(), size, /*seed=*/0, &expected);
EXPECT_EQ(expected, VariationsMurmurHash::Hash(data, size))
<< "size=" << size;
}
}
TEST(VariationsMurmurHashTest, Hash16) {
// Pick some likely edge case values.
constexpr uint32_t max32 = std::numeric_limits<uint32_t>::max();
uint32_t seeds[] = {
0, max32 / 2 - 1, max32 - 2,
1, max32 / 2, max32 - 1,
2, max32 / 2 + 1, max32};
constexpr uint16_t max16 = std::numeric_limits<uint16_t>::max();
uint16_t data[] = {
0, max16 / 2 - 1, max16 - 2,
1, max16 / 2, max16 - 1,
2, max16 / 2 + 1, max16};
for (uint32_t seed : seeds) {
for (uint16_t datum : data) {
uint32_t expected;
MurmurHash3_x86_32(&datum, sizeof(datum), seed, &expected);
EXPECT_EQ(expected, VariationsMurmurHash::Hash16(seed, datum))
<< "seed=" << seed << ", datum=" << datum;
}
}
}
#endif // defined(ARCH_CPU_LITTLE_ENDIAN)
} // namespace internal
} // namespace variations
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment