Commit b892c887 authored by Asanka Herath's avatar Asanka Herath Committed by Commit Bot

[privacy_budget] Switch hash function.

The hash function originally picked was SuperFastHash(), but we are running into a few limitations.

Justification for the new function is explained in identifiability_metrics.cc.

Bug: 973801
Change-Id: I6743d72c5b18dc86cf9fe3d4a8d98ef49fbcd7b3
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2213403Reviewed-by: default avatarCaleb Raitto <caraitto@chromium.org>
Commit-Queue: Asanka Herath <asanka@chromium.org>
Cr-Commit-Position: refs/heads/master@{#772077}
parent 230a1786
......@@ -6,14 +6,66 @@
#include <cstdint>
#include "base/hash/hash.h"
#include "base/containers/span.h"
#include "base/hash/legacy_hash.h"
namespace blink {
uint64_t IdentifiabilityDigestOfBytes(base::span<const uint8_t> in) {
// NOTE: As documented at the point of declaration, the digest calculated here
// should be stable once released.
return base::PersistentHash(in);
// The chosen hash function satisfies the following requirements:
//
// * Fast. These hashes will need to be calculated during performance
// critical code.
// * Suitable for fingerprinting. I.e. broad domain, good diffusion, low
// collision rate.
// * Resistant to hash flooding.
// * Able to use the entire 64-bit space we have at our disposal.
// * Either support iterative operation or be usable as a primitive for
// constructing one.
// * Remains stable for the duration of the identifiability study O(months).
// This one is trivial. It just means that the hash is not in danger of
// imminent change.
// * Implemented, well tested, and usable by //content, //chrome, as well
// as //blink/common.
//
// It is not a requirement for the digest to be a cryptographic hash. I.e. not
// necessary to deter second-preimage construction.
//
// base::PersistentHash(): (Rejected)
// - Based on SuperFastHash() which doesn't meet the fingerprinting
// requirement due to a high collision rate.
// - Digest is 32-bits.
// - No stateful implementation in //base. Blink's StringHasher is
// interestingly a stateful implementation of SuperFastHash but is not
// available in //blink/public/common.
//
// base::legacy::CityHash64{WithSeed}(): (Selected)
// - Based on Google's CityHash 1.0.3. Some known weaknesses, but still
// good enough.
// - No ready-to-use chaining implementation.
// + Digest is 64-bits.
// + Seeded variant is a useful primitive for a chained hash function.
// Would be better if it took two seeds, but one is also usable.
//
// Other hash functions were considered, but were rejected due to one or more
// of the following reasons:
// - An implementation was not available.
// - The version available has significant known weaknesses.
//
// One in particular that would have been nice to have is FarmHash.
//
// CityHash is quite efficient for small buffers. Operation counts are
// roughly as follows. For small buffers, fetches dominate.:
//
// Length │ Fetches │ Muls │ Shifts │
// ───────┼──────────┼─────────┼─────────┤
// 1..16 │ 3 │ 3 │ 4 │
// ───────┼──────────┼─────────┼─────────┤
// 17..32 │ 4 │ 3 │ 8 │
// ───────┼──────────┼─────────┼─────────┤
// 33..64 │ 10 │ 4 │ 18 │
// ───────┴──────────┴─────────┴─────────┘
return base::legacy::CityHash64(in);
}
} // namespace blink
......@@ -18,7 +18,7 @@ TEST(IdentifiabilityMetricsTest, IdentifiabilityDigestOfBytes_Basic) {
// Due to our requirement that the digest be stable and persistable, this test
// should always pass once the code reaches the stable branch.
EXPECT_EQ(UINT64_C(238255523), digest);
EXPECT_EQ(UINT64_C(0x7cd845f1db5ad659), digest);
}
TEST(IdentifiabilityMetricsTest, IdentifiabilityDigestOfBytes_Padding) {
......@@ -26,8 +26,9 @@ TEST(IdentifiabilityMetricsTest, IdentifiabilityDigestOfBytes_Padding) {
const std::vector<uint8_t> kLong(16 * 1024, 'x');
// Ideally we should be using all 64-bits or at least the 56 LSBs.
EXPECT_EQ(UINT64_C(2790220116), IdentifiabilityDigestOfBytes(kTwoBytes));
EXPECT_EQ(UINT64_C(2857827930), IdentifiabilityDigestOfBytes(kLong));
EXPECT_EQ(UINT64_C(0xb74c74c9fcf0505a),
IdentifiabilityDigestOfBytes(kTwoBytes));
EXPECT_EQ(UINT64_C(0x76b3567105dc5253), IdentifiabilityDigestOfBytes(kLong));
}
TEST(IdentifiabilityMetricsTest, IdentifiabilityDigestOfBytes_EdgeCases) {
......@@ -35,8 +36,9 @@ TEST(IdentifiabilityMetricsTest, IdentifiabilityDigestOfBytes_EdgeCases) {
const uint8_t kOneByte[] = {1};
// As before, these tests should always pass.
EXPECT_EQ(0x0u, IdentifiabilityDigestOfBytes(kEmpty));
EXPECT_EQ(UINT64_C(0x9e76b331), IdentifiabilityDigestOfBytes(kOneByte));
EXPECT_EQ(UINT64_C(0x9ae16a3b2f90404f), IdentifiabilityDigestOfBytes(kEmpty));
EXPECT_EQ(UINT64_C(0x6209312a69a56947),
IdentifiabilityDigestOfBytes(kOneByte));
}
TEST(IdentifiabilityMetricsTest, PassInt) {
......@@ -103,17 +105,17 @@ TEST(IdentifiabilityMetricsTest, PassEnumClass64) {
TEST(IdentifiabilityMetricsTest, PassSpan) {
const int data[] = {1, 2, 3};
EXPECT_EQ(UINT64_C(825881411),
EXPECT_EQ(UINT64_C(0xb0dd8c7041b0a8bb),
IdentifiabilityDigestHelper(base::make_span(data)));
}
TEST(IdentifiabilityMetricsTest, PassSpanDouble) {
const double data[] = {1.0, 2.0, 3.0};
EXPECT_EQ(UINT64_C(2487485222),
EXPECT_EQ(UINT64_C(0x95f52e9784f9582a),
IdentifiabilityDigestHelper(base::make_span(data)));
}
constexpr uint64_t kExpectedCombinationResult = 2636419788;
constexpr uint64_t kExpectedCombinationResult = UINT64_C(0xa5e30a57547cd49b);
TEST(IdentifiabilityMetricsTest, Combination) {
const int data[] = {1, 2, 3};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment