Commit 17809751 authored by Charles Harrison's avatar Charles Harrison Committed by Commit Bot

[safe-browsing] prefix matching improvements and perftest

This CL does a few things:
1. Replaces HashPrefixMatches which was implemented with a custom binary
   search with a custom iterator and std::binary_search.
2. Adds a perftest stress testing this method.

Running the perftest before and after the binary_search change shows that
using binary_search improves performance by 30-40%. This was measured
on Linux using a release (non-official) build.

It is also imo a code health win, as an iterator is easier to reason
about than a custom binsearch algorithm.

Bug: 787092
Change-Id: Ic8be26b11ef750c70ee241dfb2718a3c0855fc8c
Reviewed-on: https://chromium-review.googlesource.com/959623
Commit-Queue: Charlie Harrison <csharrison@chromium.org>
Reviewed-by: default avatarScott Violet <sky@chromium.org>
Reviewed-by: default avatarVarun Khaneja <vakh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#543391}
parent b3513d72
......@@ -581,5 +581,15 @@ if (!is_ios) {
deps += [ "//v8:v8_external_startup_data_assets" ]
}
}
if (!is_android) {
sources += [ "safe_browsing/db/v4_store_perftest.cc" ]
deps += [
"//components/safe_browsing/db:v4_protocol_manager_util",
"//components/safe_browsing/db:v4_store",
"//components/safe_browsing/db:v4_test_util",
"//crypto",
]
}
}
}
......@@ -217,6 +217,17 @@ source_set("v4_protocol_manager_util") {
]
}
source_set("prefix_iterator") {
sources = [
"prefix_iterator.cc",
"prefix_iterator.h",
]
deps = [
":v4_protocol_manager_util",
"//base",
]
}
if (is_android) {
import("//build/config/android/rules.gni")
java_cpp_enum("sb_threat_values") {
......@@ -247,6 +258,7 @@ source_set("v4_store") {
":v4_store_proto",
]
deps = [
":prefix_iterator",
":v4_protocol_manager_util",
":v4_rice",
"//base",
......
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/safe_browsing/db/prefix_iterator.h"
namespace safe_browsing {
PrefixIterator::PrefixIterator(base::StringPiece prefixes,
size_t index,
size_t size)
: prefixes_(prefixes), index_(index), size_(size) {}
PrefixIterator::PrefixIterator(const PrefixIterator& rhs)
: prefixes_(rhs.prefixes_), index_(rhs.index_), size_(rhs.size_) {}
} // namespace safe_browsing
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_SAFE_BROWSING_DB_PREFIX_ITERATOR_H_
#define COMPONENTS_SAFE_BROWSING_DB_PREFIX_ITERATOR_H_
#include <cstddef>
#include <iterator>
#include "base/strings/string_piece.h"
#include "components/safe_browsing/db/v4_protocol_manager_util.h"
namespace safe_browsing {
// The prefix iterator is used to binary search within a |HashPrefixes|. It is
// essentially a random access iterator that steps |PrefixSize| steps within the
// underlying buffer.
class PrefixIterator
: public std::iterator<std::random_access_iterator_tag, base::StringPiece> {
public:
using difference_type =
typename std::iterator<std::random_access_iterator_tag,
base::StringPiece>::difference_type;
PrefixIterator(base::StringPiece prefixes, size_t index, size_t size);
PrefixIterator(const PrefixIterator& rhs);
base::StringPiece operator*() const { return GetPiece(index_); }
base::StringPiece operator[](const int& rhs) const {
return GetPiece(index_ + rhs);
}
PrefixIterator& operator=(const PrefixIterator& rhs) {
index_ = rhs.index_;
return *this;
}
PrefixIterator& operator+=(const int& rhs) {
index_ += rhs;
return *this;
}
PrefixIterator& operator-=(const int& rhs) {
index_ -= rhs;
return *this;
}
PrefixIterator& operator++() {
index_++;
return *this;
}
PrefixIterator& operator--() {
index_--;
return *this;
}
PrefixIterator operator+(const PrefixIterator& rhs) const {
return PrefixIterator(prefixes_, index_ + rhs.index_, size_);
}
difference_type operator-(const PrefixIterator& rhs) const {
return index_ - rhs.index_;
}
PrefixIterator operator+(const int& rhs) const {
return PrefixIterator(prefixes_, index_ + rhs, size_);
}
PrefixIterator operator-(const int& rhs) const {
return PrefixIterator(prefixes_, index_ - rhs, size_);
}
bool operator==(const PrefixIterator& rhs) const {
return index_ == rhs.index_;
}
bool operator!=(const PrefixIterator& rhs) const {
return index_ != rhs.index_;
}
bool operator>(const PrefixIterator& rhs) const {
return index_ > rhs.index_;
}
bool operator<(const PrefixIterator& rhs) const {
return index_ < rhs.index_;
}
bool operator>=(const PrefixIterator& rhs) const {
return index_ >= rhs.index_;
}
bool operator<=(const PrefixIterator& rhs) const {
return index_ <= rhs.index_;
}
private:
base::StringPiece GetPiece(size_t index) const {
return prefixes_.substr(index * size_, size_);
}
base::StringPiece prefixes_;
size_t index_;
size_t size_;
};
} // namespace safe_browsing
#endif // COMPONENTS_SAFE_BROWSING_DB_PREFIX_ITERATOR_H_
......@@ -4,6 +4,7 @@
#include "components/safe_browsing/db/v4_store.h"
#include <algorithm>
#include <utility>
#include "base/base64.h"
......@@ -13,6 +14,7 @@
#include "base/metrics/histogram_macros.h"
#include "base/stl_util.h"
#include "base/strings/stringprintf.h"
#include "components/safe_browsing/db/prefix_iterator.h"
#include "components/safe_browsing/db/v4_rice.h"
#include "components/safe_browsing/db/v4_store.pb.h"
#include "components/safe_browsing/proto/webui.pb.h"
......@@ -782,37 +784,20 @@ HashPrefix V4Store::GetMatchingHashPrefix(const FullHash& full_hash) {
checks_attempted_++;
for (const auto& pair : hash_prefix_map_) {
const PrefixSize& prefix_size = pair.first;
const HashPrefixes& hash_prefixes = pair.second;
HashPrefix hash_prefix = full_hash.substr(0, prefix_size);
if (HashPrefixMatches(hash_prefix, hash_prefixes.begin(),
hash_prefixes.end())) {
return hash_prefix;
}
base::StringPiece hash_prefix =
base::StringPiece(full_hash).substr(0, prefix_size);
if (HashPrefixMatches(hash_prefix, pair.second, prefix_size))
return hash_prefix.as_string();
}
return HashPrefix();
}
// static
bool V4Store::HashPrefixMatches(const HashPrefix& hash_prefix,
const HashPrefixes::const_iterator& begin,
const HashPrefixes::const_iterator& end) {
if (begin == end) {
return false;
}
size_t distance = std::distance(begin, end);
const PrefixSize prefix_size = hash_prefix.length();
DCHECK_EQ(0u, distance % prefix_size);
size_t mid_prefix_index = ((distance / prefix_size) / 2) * prefix_size;
HashPrefixes::const_iterator mid = begin + mid_prefix_index;
HashPrefix mid_prefix = HashPrefix(mid, mid + prefix_size);
int result = hash_prefix.compare(mid_prefix);
if (result == 0) {
return true;
} else if (result < 0) {
return HashPrefixMatches(hash_prefix, begin, mid);
} else {
return HashPrefixMatches(hash_prefix, mid + prefix_size, end);
}
bool V4Store::HashPrefixMatches(base::StringPiece prefix,
const HashPrefixes& prefixes,
const PrefixSize& size) {
return std::binary_search(
PrefixIterator(prefixes, 0, size),
PrefixIterator(prefixes, prefixes.size() / size, size), prefix);
}
bool V4Store::VerifyChecksum() {
......
......@@ -319,10 +319,11 @@ class V4Store {
const IteratorMap& iterator_map,
HashPrefix* smallest_hash_prefix);
// Returns true if |hash_prefix| exists between |begin| and |end| iterators.
static bool HashPrefixMatches(const HashPrefix& hash_prefix,
const HashPrefixes::const_iterator& begin,
const HashPrefixes::const_iterator& end);
// Returns true if |hash_prefix| with PrefixSize |size| exists in |prefixes|.
// This small method is exposed in the header so it can be tested separately.
static bool HashPrefixMatches(base::StringPiece prefix,
const HashPrefixes& prefixes,
const PrefixSize& size);
// For each key in |hash_prefix_map|, sets the iterator at that key
// |iterator_map| to hash_prefix_map[key].begin().
......
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "base/macros.h"
#include "base/memory/ref_counted.h"
#include "base/strings/stringprintf.h"
#include "base/test/test_simple_task_runner.h"
#include "base/timer/elapsed_timer.h"
#include "components/safe_browsing/db/v4_protocol_manager_util.h"
#include "components/safe_browsing/db/v4_test_util.h"
#include "crypto/sha2.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "testing/perf/perf_test.h"
namespace safe_browsing {
class V4StorePerftest : public testing::Test {};
TEST_F(V4StorePerftest, StressTest) {
const int kNumPrefixes = 2000000;
std::vector<std::string> prefixes;
std::vector<std::string> full_hashes;
for (size_t i = 0; i < kNumPrefixes; i++) {
std::string sha256 = crypto::SHA256HashString(base::StringPrintf("%zu", i));
DCHECK_EQ(crypto::kSHA256Length, kMaxHashPrefixLength);
full_hashes.push_back(sha256);
prefixes.push_back(sha256.substr(0, kMinHashPrefixLength));
}
auto store = std::make_unique<TestV4Store>(
base::MakeRefCounted<base::TestSimpleTaskRunner>(), base::FilePath());
store->SetPrefixes(std::move(prefixes), kMinHashPrefixLength);
size_t matches = 0;
base::ElapsedTimer timer;
for (const auto& full_hash : full_hashes) {
matches += !store->GetMatchingHashPrefix(full_hash).empty();
}
perf_test::PrintResult("GetMachingHashPrefix", "", "",
timer.Elapsed().InMillisecondsF(), "ms", true);
EXPECT_EQ(matches, full_hashes.size());
}
} // namespace safe_browsing
......@@ -593,43 +593,37 @@ TEST_F(V4StoreTest, TestReadFullResponseWithInvalidHashPrefixMap) {
TEST_F(V4StoreTest, TestHashPrefixExistsAtTheBeginning) {
HashPrefixes hash_prefixes = "abcdebbbbbccccc";
HashPrefix hash_prefix = "abcde";
EXPECT_TRUE(V4Store::HashPrefixMatches(hash_prefix, std::begin(hash_prefixes),
std::end(hash_prefixes)));
EXPECT_TRUE(V4Store::HashPrefixMatches(hash_prefix, hash_prefixes, 5));
}
TEST_F(V4StoreTest, TestHashPrefixExistsInTheMiddle) {
HashPrefixes hash_prefixes = "abcdebbbbbccccc";
HashPrefix hash_prefix = "bbbbb";
EXPECT_TRUE(V4Store::HashPrefixMatches(hash_prefix, std::begin(hash_prefixes),
std::end(hash_prefixes)));
EXPECT_TRUE(V4Store::HashPrefixMatches(hash_prefix, hash_prefixes, 5));
}
TEST_F(V4StoreTest, TestHashPrefixExistsAtTheEnd) {
HashPrefixes hash_prefixes = "abcdebbbbbccccc";
HashPrefix hash_prefix = "ccccc";
EXPECT_TRUE(V4Store::HashPrefixMatches(hash_prefix, std::begin(hash_prefixes),
std::end(hash_prefixes)));
EXPECT_TRUE(V4Store::HashPrefixMatches(hash_prefix, hash_prefixes, 5));
}
TEST_F(V4StoreTest, TestHashPrefixExistsAtTheBeginningOfEven) {
HashPrefixes hash_prefixes = "abcdebbbbb";
HashPrefix hash_prefix = "abcde";
EXPECT_TRUE(V4Store::HashPrefixMatches(hash_prefix, std::begin(hash_prefixes),
std::end(hash_prefixes)));
EXPECT_TRUE(V4Store::HashPrefixMatches(hash_prefix, hash_prefixes, 5));
}
TEST_F(V4StoreTest, TestHashPrefixExistsAtTheEndOfEven) {
HashPrefixes hash_prefixes = "abcdebbbbb";
HashPrefix hash_prefix = "bbbbb";
EXPECT_TRUE(V4Store::HashPrefixMatches(hash_prefix, std::begin(hash_prefixes),
std::end(hash_prefixes)));
EXPECT_TRUE(V4Store::HashPrefixMatches(hash_prefix, hash_prefixes, 5));
}
TEST_F(V4StoreTest, TestHashPrefixDoesNotExistInConcatenatedList) {
HashPrefixes hash_prefixes = "abcdebbbbb";
HashPrefix hash_prefix = "bbbbc";
EXPECT_FALSE(V4Store::HashPrefixMatches(
hash_prefix, std::begin(hash_prefixes), std::end(hash_prefixes)));
EXPECT_FALSE(V4Store::HashPrefixMatches(hash_prefix, hash_prefixes, 5));
}
TEST_F(V4StoreTest, TestFullHashExistsInMapWithSingleSize) {
......
......@@ -4,9 +4,11 @@
#include "components/safe_browsing/db/v4_test_util.h"
#include <algorithm>
#include <string>
#include <utility>
#include "base/strings/strcat.h"
#include "components/safe_browsing/db/util.h"
#include "crypto/sha2.h"
......@@ -47,7 +49,16 @@ bool TestV4Store::HasValidData() const {
}
void TestV4Store::MarkPrefixAsBad(HashPrefix prefix) {
hash_prefix_map_[prefix.size()] += prefix;
auto& vec = mock_prefixes_[prefix.size()];
vec.insert(std::upper_bound(vec.begin(), vec.end(), prefix), prefix);
hash_prefix_map_[prefix.size()] = base::StrCat(vec);
}
void TestV4Store::SetPrefixes(std::vector<HashPrefix> prefixes,
PrefixSize size) {
std::sort(prefixes.begin(), prefixes.end());
mock_prefixes_[size] = prefixes;
hash_prefix_map_[size] = base::StrCat(prefixes);
}
TestV4Database::TestV4Database(
......
......@@ -7,8 +7,10 @@
// Contains classes and methods useful for tests.
#include <map>
#include <memory>
#include <ostream>
#include <vector>
#include "components/safe_browsing/db/v4_database.h"
#include "components/safe_browsing/db/v4_get_hash_protocol_manager.h"
......@@ -31,6 +33,14 @@ class TestV4Store : public V4Store {
bool HasValidData() const override;
void MarkPrefixAsBad(HashPrefix prefix);
// |prefixes| does not need to be sorted.
void SetPrefixes(std::vector<HashPrefix> prefixes, PrefixSize size);
private:
// Holds mock prefixes from calls to MarkPrefixAsBad / SetPrefixes. Stored as
// a vector for simplicity.
std::map<PrefixSize, std::vector<HashPrefix>> mock_prefixes_;
};
class TestV4StoreFactory : public V4StoreFactory {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment