Commit 64d8c1e2 authored by Oleg Davydov's avatar Oleg Davydov Committed by Commit Bot

[CV] Move hashes computing to ComputedHashes

Historically we've computed extension resources' hashes in
ContentHash::CreateHashes, but logically it's a job for ComputedHashes
class, so now we do it in ComputedHashes::Compute method.

In the future this will allow us to reuse ComputedHashes::Create for
other purposes (eg. testing or computing hashes upon installation for
self-hosted extensions).

Bug: 796395, 958794
Change-Id: Ie16c53ee682a4b8066dd2e929e13cbbc5be2d6ea
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1913415
Commit-Queue: Oleg Davydov <burunduk@chromium.org>
Reviewed-by: default avatarNikita Podguzov <nikitapodguzov@chromium.org>
Reviewed-by: default avatarIstiaque Ahmed <lazyboy@chromium.org>
Cr-Commit-Position: refs/heads/master@{#721051}
parent 684f7d53
......@@ -5,9 +5,11 @@
#include "extensions/browser/computed_hashes.h"
#include <memory>
#include <set>
#include <utility>
#include "base/base64.h"
#include "base/files/file_enumerator.h"
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/json/json_reader.h"
......@@ -34,6 +36,8 @@ const int kVersion = 2;
namespace {
using SortedFilePathSet = std::set<base::FilePath>;
const char kUMAComputedHashesReadResult[] =
"Extensions.ContentVerification.ComputedHashesReadResult";
const char kUMAComputedHashesInitTime[] =
......@@ -41,16 +45,6 @@ const char kUMAComputedHashesInitTime[] =
} // namespace
ComputedHashes::Data::Data() = default;
ComputedHashes::Data::~Data() = default;
ComputedHashes::Data::Data(Data&&) = default;
ComputedHashes::Data& ComputedHashes::Data::operator=(Data&&) = default;
void ComputedHashes::Data::AddHashes(const base::FilePath& relative_path,
int block_size,
std::vector<std::string> hashes) {
data_[relative_path] = HashInfo(block_size, std::move(hashes));
}
ComputedHashes::ComputedHashes(Data&& data) : data_(std::move(data)) {}
ComputedHashes::~ComputedHashes() = default;
......@@ -122,20 +116,63 @@ base::Optional<ComputedHashes> ComputedHashes::CreateFromFile(
if (!base::Base64Decode(encoded, decoded))
return base::nullopt;
}
data.AddHashes(relative_path, *block_size, std::move(hashes));
data[relative_path] = HashInfo(*block_size, std::move(hashes));
}
uma_recorder.RecordSuccess();
return ComputedHashes(std::move(data));
}
// static
base::Optional<ComputedHashes::Data> ComputedHashes::Compute(
const base::FilePath& extension_root,
int block_size,
const IsCancelledCallback& is_cancelled,
const ShouldComputeHashesCallback& should_compute_hashes_for_resource) {
base::FileEnumerator enumerator(extension_root, /*recursive=*/true,
base::FileEnumerator::FILES);
// First discover all the file paths and put them in a sorted set.
SortedFilePathSet paths;
while (true) {
if (is_cancelled && is_cancelled.Run())
return base::nullopt;
base::FilePath full_path = enumerator.Next();
if (full_path.empty())
break;
paths.insert(full_path);
}
// Now iterate over all the paths in sorted order and compute the block hashes
// for each one.
std::map<base::FilePath, HashInfo> data;
for (const auto& full_path : paths) {
if (is_cancelled && is_cancelled.Run())
return base::nullopt;
base::FilePath relative_unix_path;
extension_root.AppendRelativePath(full_path, &relative_unix_path);
relative_unix_path = relative_unix_path.NormalizePathSeparatorsTo('/');
if (!should_compute_hashes_for_resource.Run(relative_unix_path))
continue;
base::Optional<std::vector<std::string>> hashes =
ComputeAndCheckResourceHash(full_path, relative_unix_path, block_size);
if (hashes)
data[relative_unix_path] =
HashInfo(block_size, std::move(hashes.value()));
}
return data;
}
bool ComputedHashes::GetHashes(const base::FilePath& relative_path,
int* block_size,
std::vector<std::string>* hashes) const {
base::FilePath path = relative_path.NormalizePathSeparatorsTo('/');
const std::map<base::FilePath, HashInfo>& data = data_.data_;
auto find_data = [&](const base::FilePath& normalized_path) {
auto i = data.find(normalized_path);
if (i == data.end()) {
auto i = data_.find(normalized_path);
if (i == data_.end()) {
// If we didn't find the entry using exact match, it's possible the
// developer is using a path with some letters in the incorrect case,
// which happens to work on windows/osx. So try doing a linear scan to
......@@ -144,7 +181,7 @@ bool ComputedHashes::GetHashes(const base::FilePath& relative_path,
// not too big here. Also for crbug.com/29941 we plan to start warning
// developers when they are making this mistake, since their extension
// will be broken on linux/chromeos.
for (i = data.begin(); i != data.end(); ++i) {
for (i = data_.begin(); i != data_.end(); ++i) {
const base::FilePath& entry = i->first;
if (base::FilePath::CompareEqualIgnoreCase(entry.value(),
normalized_path.value())) {
......@@ -156,7 +193,7 @@ bool ComputedHashes::GetHashes(const base::FilePath& relative_path,
};
auto i = find_data(path);
#if defined(OS_WIN)
if (i == data.end()) {
if (i == data_.end()) {
base::FilePath::StringType trimmed_path_value;
// Also search for path with (.| )+ suffix trimmed as they are ignored in
// windows. This matches the canonicalization behavior of
......@@ -167,7 +204,7 @@ bool ComputedHashes::GetHashes(const base::FilePath& relative_path,
}
}
#endif // defined(OS_WIN)
if (i == data.end())
if (i == data_.end())
return false;
const HashInfo& info = i->second;
......@@ -177,8 +214,12 @@ bool ComputedHashes::GetHashes(const base::FilePath& relative_path,
}
bool ComputedHashes::WriteToFile(const base::FilePath& path) const {
// Make sure the directory exists.
if (!base::CreateDirectoryAndGetError(path.DirName(), nullptr))
return false;
base::Value file_list(base::Value::Type::LIST);
for (const auto& resource_info : data_.data_) {
for (const auto& resource_info : data_) {
const base::FilePath& relative_path = resource_info.first;
int block_size = resource_info.second.first;
const std::vector<std::string>& hashes = resource_info.second.second;
......@@ -249,4 +290,23 @@ std::vector<std::string> ComputedHashes::GetHashesForContent(
return hashes;
}
// static
base::Optional<std::vector<std::string>>
ComputedHashes::ComputeAndCheckResourceHash(
const base::FilePath& full_path,
const base::FilePath& relative_unix_path,
int block_size) {
std::string contents;
if (!base::ReadFileToString(full_path, &contents)) {
LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII();
return base::nullopt;
}
// Iterate through taking the hash of each block of size |block_size| of the
// file.
std::vector<std::string> hashes = GetHashesForContent(contents, block_size);
return base::make_optional(std::move(hashes));
}
} // namespace extensions
......@@ -12,6 +12,7 @@
#include <string>
#include <vector>
#include "base/callback.h"
#include "base/optional.h"
namespace base {
......@@ -20,35 +21,16 @@ class FilePath;
namespace extensions {
using IsCancelledCallback = base::RepeatingCallback<bool(void)>;
using ShouldComputeHashesCallback =
base::RepeatingCallback<bool(const base::FilePath& relative_path)>;
// A class for storage and serialization of a set of SHA256 block hashes
// computed over the files inside an extension.
class ComputedHashes {
public:
using HashInfo = std::pair<int, std::vector<std::string>>;
// While |ComputedHashes| itself is a read-only view for the hashes, this is a
// subclass for modifying (eg. while computing hashes for the first time).
class Data {
public:
Data();
~Data();
Data(const Data&) = delete;
Data& operator=(const Data&) = delete;
Data(Data&&);
Data& operator=(Data&&);
// Adds hashes for |relative_path|. Should not be called more than once for
// a given |relative_path|.
void AddHashes(const base::FilePath& relative_path,
int block_size,
std::vector<std::string> hashes);
private:
// Map of relative path to hash info (block size, hashes).
std::map<base::FilePath, HashInfo> data_;
friend class ComputedHashes;
};
using Data = std::map<base::FilePath, HashInfo>;
explicit ComputedHashes(Data&& data);
ComputedHashes(const ComputedHashes&) = delete;
......@@ -62,6 +44,19 @@ class ComputedHashes {
static base::Optional<ComputedHashes> CreateFromFile(
const base::FilePath& path);
// Computes hashes for files in |extension_root|. Returns nullopt upon any
// failure. Callback |should_compute_hashes_for| is used to determine whether
// we need hashes for a resource or not.
// TODO(https://crbug.com/796395#c24) To support per-file block size instead
// of passing |block_size| as an argument make callback
// |should_compute_hashes_for| return optional<int>: nullopt if hashes are not
// needed for this file, block size for this file otherwise.
static base::Optional<ComputedHashes::Data> Compute(
const base::FilePath& extension_root,
int block_size,
const IsCancelledCallback& is_cancelled,
const ShouldComputeHashesCallback& should_compute_hashes_for_resource);
// Saves computed hashes to given file, returns false upon any failure (and
// true on success).
bool WriteToFile(const base::FilePath& path) const;
......@@ -79,6 +74,14 @@ class ComputedHashes {
size_t block_size);
private:
// Builds hashes for one resource and checks them against
// verified_contents.json if needed. Returns nullopt if nothing should be
// added to computed_hashes.json for this resource.
static base::Optional<std::vector<std::string>> ComputeAndCheckResourceHash(
const base::FilePath& full_path,
const base::FilePath& relative_unix_path,
int block_size);
Data data_;
};
......
......@@ -46,8 +46,10 @@ testing::AssertionResult WriteThenReadComputedHashes(
base::FilePath computed_hashes_path =
scoped_dir.GetPath().AppendASCII("computed_hashes.json");
extensions::ComputedHashes::Data computed_hashes_data;
for (const auto& info : hash_infos)
computed_hashes_data.AddHashes(info.path, info.block_size, info.hashes);
for (const auto& info : hash_infos) {
computed_hashes_data[info.path] =
extensions::ComputedHashes::HashInfo(info.block_size, info.hashes);
}
if (!extensions::ComputedHashes(std::move(computed_hashes_data))
.WriteToFile(computed_hashes_path)) {
......
......@@ -4,8 +4,10 @@
#include "extensions/browser/content_verifier/content_hash.h"
#include <set>
#include "base/bind.h"
#include "base/files/file_enumerator.h"
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/json/json_reader.h"
#include "base/metrics/histogram_macros.h"
......@@ -26,8 +28,6 @@ namespace extensions {
namespace {
using SortedFilePathSet = std::set<base::FilePath>;
bool CreateDirAndWriteFile(const base::FilePath& destination,
const std::string& content) {
DCHECK(GetExtensionFileTaskRunner()->RunsTasksInCurrentSequence());
......@@ -194,10 +194,9 @@ void ContentHash::GetVerifiedContents(
}
// static
void ContentHash::FetchVerifiedContents(
ContentHash::FetchKey key,
const ContentHash::IsCancelledCallback& is_cancelled,
GetVerifiedContentsCallback callback) {
void ContentHash::FetchVerifiedContents(ContentHash::FetchKey key,
const IsCancelledCallback& is_cancelled,
GetVerifiedContentsCallback callback) {
// |fetcher| deletes itself when it's done.
internals::ContentHashFetcher* fetcher =
new internals::ContentHashFetcher(std::move(key));
......@@ -310,88 +309,70 @@ void ContentHash::RecordFetchResult(bool success) {
UMA_HISTOGRAM_BOOLEAN("Extensions.ContentVerification.FetchResult", success);
}
base::Optional<std::vector<std::string>>
ContentHash::ComputeAndCheckResourceHash(
const base::FilePath& full_path,
bool ContentHash::ShouldComputeHashesForResource(
const base::FilePath& relative_unix_path) {
DCHECK(source_type_ !=
ContentVerifierDelegate::VerifierSourceType::SIGNED_HASHES ||
verified_contents_);
if (source_type_ ==
ContentVerifierDelegate::VerifierSourceType::SIGNED_HASHES &&
!verified_contents_->HasTreeHashRoot(relative_unix_path)) {
return base::nullopt;
}
std::string contents;
if (!base::ReadFileToString(full_path, &contents)) {
LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII();
return base::nullopt;
if (source_type_ !=
ContentVerifierDelegate::VerifierSourceType::SIGNED_HASHES) {
return true;
}
DCHECK(verified_contents_);
return verified_contents_->HasTreeHashRoot(relative_unix_path);
}
// Iterate through taking the hash of each block of size (block_size_) of
// the file.
std::vector<std::string> hashes =
ComputedHashes::GetHashesForContent(contents, block_size_);
std::set<base::FilePath> ContentHash::GetMismatchedComputedHashes(
ComputedHashes::Data* computed_hashes_data) {
DCHECK(computed_hashes_data);
if (source_type_ !=
ContentVerifierDelegate::VerifierSourceType::SIGNED_HASHES) {
return base::make_optional(std::move(hashes));
return std::set<base::FilePath>();
}
std::string root =
ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length);
if (!verified_contents_->TreeHashRootEquals(relative_unix_path, root)) {
VLOG(1) << "content mismatch for " << relative_unix_path.AsUTF8Unsafe();
hash_mismatch_unix_paths_.insert(relative_unix_path);
return base::nullopt;
std::set<base::FilePath> mismatched_hashes;
for (const auto& resource_info : *computed_hashes_data) {
const base::FilePath& relative_unix_path = resource_info.first;
const std::vector<std::string>& hashes = resource_info.second.second;
std::string root =
ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length);
if (!verified_contents_->TreeHashRootEquals(relative_unix_path, root))
mismatched_hashes.insert(relative_unix_path);
}
return base::make_optional(std::move(hashes));
return mismatched_hashes;
}
bool ContentHash::CreateHashes(const base::FilePath& hashes_file,
const IsCancelledCallback& is_cancelled) {
DCHECK_EQ(ContentVerifierDelegate::VerifierSourceType::SIGNED_HASHES,
source_type_);
base::ElapsedTimer timer;
did_attempt_creating_computed_hashes_ = true;
// Make sure the directory exists.
if (!base::CreateDirectoryAndGetError(hashes_file.DirName(), nullptr))
return false;
base::FileEnumerator enumerator(extension_root_, true, /* recursive */
base::FileEnumerator::FILES);
// First discover all the file paths and put them in a sorted set.
SortedFilePathSet paths;
for (;;) {
if (is_cancelled && is_cancelled.Run())
return false;
base::FilePath full_path = enumerator.Next();
if (full_path.empty())
break;
paths.insert(full_path);
base::Optional<ComputedHashes::Data> computed_hashes_data =
ComputedHashes::Compute(
extension_root_, block_size_, is_cancelled,
// Using base::Unretained is safe here as
// ShouldComputeHashesForResource is only called synchronously from
// ComputedHashes::Compute.
base::BindRepeating(&ContentHash::ShouldComputeHashesForResource,
base::Unretained(this)));
if (computed_hashes_data) {
std::set<base::FilePath> hashes_mismatch =
GetMismatchedComputedHashes(&computed_hashes_data.value());
for (const auto& relative_unix_path : hashes_mismatch) {
VLOG(1) << "content mismatch for " << relative_unix_path.AsUTF8Unsafe();
// Remove hash entry to keep computed_hashes.json file clear of mismatched
// hashes.
computed_hashes_data->erase(relative_unix_path);
}
hash_mismatch_unix_paths_ = std::move(hashes_mismatch);
}
// Now iterate over all the paths in sorted order and compute the block hashes
// for each one.
ComputedHashes::Data computed_hashes_data;
for (auto i = paths.begin(); i != paths.end(); ++i) {
if (is_cancelled && is_cancelled.Run())
return false;
const base::FilePath& full_path = *i;
base::FilePath relative_unix_path;
extension_root_.AppendRelativePath(full_path, &relative_unix_path);
relative_unix_path = relative_unix_path.NormalizePathSeparatorsTo('/');
base::Optional<std::vector<std::string>> hashes =
ComputeAndCheckResourceHash(full_path, relative_unix_path);
if (hashes)
computed_hashes_data.AddHashes(relative_unix_path, block_size_,
std::move(hashes.value()));
}
bool result =
ComputedHashes(std::move(computed_hashes_data)).WriteToFile(hashes_file);
bool result = computed_hashes_data &&
ComputedHashes(std::move(computed_hashes_data.value()))
.WriteToFile(hashes_file);
UMA_HISTOGRAM_TIMES("ExtensionContentHashFetcher.CreateHashesTime",
timer.Elapsed());
......
......@@ -94,8 +94,6 @@ class ContentHash : public base::RefCountedThreadSafe<ContentHash> {
HASH_MISMATCH
};
using IsCancelledCallback = base::RepeatingCallback<bool(void)>;
// Factory:
// Returns ContentHash through |created_callback|, the returned values are:
// - |hash| The content hash. This will never be nullptr, but
......@@ -209,19 +207,22 @@ class ContentHash : public base::RefCountedThreadSafe<ContentHash> {
bool CreateHashes(const base::FilePath& hashes_file,
const IsCancelledCallback& is_cancelled);
// Builds hashes for one resource and checks them against
// verified_contents.json if needed. Returns nullopt if nothing should be
// added to computed_hashes.json for this resource.
base::Optional<std::vector<std::string>> ComputeAndCheckResourceHash(
const base::FilePath& full_path,
const base::FilePath& relative_unix_path);
// Builds computed_hashes. Possibly after creating computed_hashes.json file
// if necessary.
void BuildComputedHashes(bool did_fetch_verified_contents,
bool force_build,
const IsCancelledCallback& is_cancelled);
// Helper callback for ComputedHashes::Create. Checks whether we want the hash
// of the given resource be in computes_hashes.json or not.
bool ShouldComputeHashesForResource(const base::FilePath& relative_unix_path);
// If needed (|source_type_| is SIGNED_HASHES) checks each hash from
// |computed_hashes| against data from verified_contenst.json and returns list
// of mismatches. If not needed, just returns empty list.
std::set<base::FilePath> GetMismatchedComputedHashes(
ComputedHashes::Data* computed_hashes);
bool has_verified_contents() const { return verified_contents_ != nullptr; }
const ExtensionId extension_id_;
......
......@@ -59,8 +59,8 @@ class TestExtensionBuilder {
for (const auto& resource : extension_resources_) {
std::vector<std::string> hashes =
ComputedHashes::GetHashesForContent(resource.contents, block_size);
computed_hashes_data.AddHashes(resource.relative_path, block_size,
hashes);
computed_hashes_data[resource.relative_path] =
ComputedHashes::HashInfo(block_size, hashes);
}
ASSERT_TRUE(ComputedHashes(std::move(computed_hashes_data))
......
......@@ -259,8 +259,7 @@ void ContentHashWaiter::CreatedCallback(scoped_refptr<ContentHash> content_hash,
void ContentHashWaiter::CreateContentHash(
ContentHash::FetchKey key,
ContentVerifierDelegate::VerifierSourceType source_type) {
ContentHash::Create(std::move(key), source_type,
ContentHash::IsCancelledCallback(),
ContentHash::Create(std::move(key), source_type, IsCancelledCallback(),
base::BindOnce(&ContentHashWaiter::CreatedCallback,
base::Unretained(this)));
}
......
......@@ -76,7 +76,8 @@ void WriteComputedHashes(
for (const auto& resource : contents) {
std::vector<std::string> hashes =
ComputedHashes::GetHashesForContent(resource.second, block_size);
computed_hashes_data.AddHashes(resource.first, block_size, hashes);
computed_hashes_data[resource.first] =
ComputedHashes::HashInfo(block_size, hashes);
}
base::CreateDirectory(extension_root.Append(kMetadataFolder));
......@@ -364,7 +365,8 @@ void WriteIncorrectComputedHashes(const base::FilePath& extension_path,
const std::string kFakeContents = "fake contents";
std::vector<std::string> hashes =
ComputedHashes::GetHashesForContent(kFakeContents, block_size);
incorrect_computed_hashes_data.AddHashes(resource_path, block_size, hashes);
incorrect_computed_hashes_data[resource_path] =
ComputedHashes::HashInfo(block_size, hashes);
ASSERT_TRUE(
ComputedHashes(std::move(incorrect_computed_hashes_data))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment