[CV] Move hashes computing to ComputedHashes

Historically we've computed extension resources' hashes in ContentHash::CreateHashes, but logically it's a job for ComputedHashes class, so now we do it in ComputedHashes::Compute method. In the future this will allow us to reuse ComputedHashes::Create for other purposes (eg. testing or computing hashes upon installation for self-hosted extensions). Bug: 796395, 958794 Change-Id: Ie16c53ee682a4b8066dd2e929e13cbbc5be2d6ea Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1913415 Commit-Queue: Oleg Davydov <burunduk@chromium.org> Reviewed-by: Nikita Podguzov <nikitapodguzov@chromium.org> Reviewed-by: Istiaque Ahmed <lazyboy@chromium.org> Cr-Commit-Position: refs/heads/master@{#721051}

[CV] Move hashes computing to ComputedHashes
Historically we've computed extension resources' hashes in ContentHash::CreateHashes, but logically it's a job for ComputedHashes class, so now we do it in ComputedHashes::Compute method. In the future this will allow us to reuse ComputedHashes::Create for other purposes (eg. testing or computing hashes upon installation for self-hosted extensions). Bug: 796395, 958794 Change-Id: Ie16c53ee682a4b8066dd2e929e13cbbc5be2d6ea Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1913415 Commit-Queue: Oleg Davydov <burunduk@chromium.org> Reviewed-by: Nikita Podguzov <nikitapodguzov@chromium.org> Reviewed-by: Istiaque Ahmed <lazyboy@chromium.org> Cr-Commit-Position: refs/heads/master@{#721051}
64d8c1e2 · Oleg Davydov · Commit Bot · 684f7d53 · 64d8c1e2 · 64d8c1e2
Commit 64d8c1e2 authored Dec 03, 2019 by Oleg Davydov Committed by Commit Bot Dec 03, 2019
8 changed files
--- a/extensions/browser/computed_hashes.cc
+++ b/extensions/browser/computed_hashes.cc
@@ -5,9 +5,11 @@
 #include "extensions/browser/computed_hashes.h"

 #include <memory>
+#include <set>
 #include <utility>

 #include "base/base64.h"
+#include "base/files/file_enumerator.h"
 #include "base/files/file_path.h"
 #include "base/files/file_util.h"
 #include "base/json/json_reader.h"
@@ -34,6 +36,8 @@ const int kVersion = 2;

 namespace {

+using SortedFilePathSet = std::set<base::FilePath>;
+
 const char kUMAComputedHashesReadResult[] =
    "Extensions.ContentVerification.ComputedHashesReadResult";
 const char kUMAComputedHashesInitTime[] =
@@ -41,16 +45,6 @@ const char kUMAComputedHashesInitTime[] =

 }  // namespace

-ComputedHashes::Data::Data() = default;
-ComputedHashes::Data::~Data() = default;
-ComputedHashes::Data::Data(Data&&) = default;
-ComputedHashes::Data& ComputedHashes::Data::operator=(Data&&) = default;
-
-void ComputedHashes::Data::AddHashes(const base::FilePath& relative_path,
-                                     int block_size,
-                                     std::vector<std::string> hashes) {
-  data_[relative_path] = HashInfo(block_size, std::move(hashes));
-}

 ComputedHashes::ComputedHashes(Data&& data) : data_(std::move(data)) {}
 ComputedHashes::~ComputedHashes() = default;
@@ -122,20 +116,63 @@ base::Optional<ComputedHashes> ComputedHashes::CreateFromFile(
      if (!base::Base64Decode(encoded, decoded))
        return base::nullopt;
    }
-    data.AddHashes(relative_path, *block_size, std::move(hashes));
+    data[relative_path] = HashInfo(*block_size, std::move(hashes));
  }
  uma_recorder.RecordSuccess();
  return ComputedHashes(std::move(data));
 }

+// static
+base::Optional<ComputedHashes::Data> ComputedHashes::Compute(
+    const base::FilePath& extension_root,
+    int block_size,
+    const IsCancelledCallback& is_cancelled,
+    const ShouldComputeHashesCallback& should_compute_hashes_for_resource) {
+  base::FileEnumerator enumerator(extension_root, /*recursive=*/true,
+                                  base::FileEnumerator::FILES);
+  // First discover all the file paths and put them in a sorted set.
+  SortedFilePathSet paths;
+  while (true) {
+    if (is_cancelled && is_cancelled.Run())
+      return base::nullopt;
+
+    base::FilePath full_path = enumerator.Next();
+    if (full_path.empty())
+      break;
+    paths.insert(full_path);
+  }
+
+  // Now iterate over all the paths in sorted order and compute the block hashes
+  // for each one.
+  std::map<base::FilePath, HashInfo> data;
+  for (const auto& full_path : paths) {
+    if (is_cancelled && is_cancelled.Run())
+      return base::nullopt;
+
+    base::FilePath relative_unix_path;
+    extension_root.AppendRelativePath(full_path, &relative_unix_path);
+    relative_unix_path = relative_unix_path.NormalizePathSeparatorsTo('/');
+
+    if (!should_compute_hashes_for_resource.Run(relative_unix_path))
+      continue;
+
+    base::Optional<std::vector<std::string>> hashes =
+        ComputeAndCheckResourceHash(full_path, relative_unix_path, block_size);
+    if (hashes)
+      data[relative_unix_path] =
+          HashInfo(block_size, std::move(hashes.value()));
+  }
+
+  return data;
+}
+
 bool ComputedHashes::GetHashes(const base::FilePath& relative_path,
                               int* block_size,
                               std::vector<std::string>* hashes) const {
  base::FilePath path = relative_path.NormalizePathSeparatorsTo('/');
-  const std::map<base::FilePath, HashInfo>& data = data_.data_;
  auto find_data = [&](const base::FilePath& normalized_path) {
-    auto i = data.find(normalized_path);
-    if (i == data.end()) {
+    auto i = data_.find(normalized_path);
+    if (i == data_.end()) {
      // If we didn't find the entry using exact match, it's possible the
      // developer is using a path with some letters in the incorrect case,
      // which happens to work on windows/osx. So try doing a linear scan to
@@ -144,7 +181,7 @@ bool ComputedHashes::GetHashes(const base::FilePath& relative_path,
      // not too big here. Also for crbug.com/29941 we plan to start warning
      // developers when they are making this mistake, since their extension
      // will be broken on linux/chromeos.
-      for (i = data.begin(); i != data.end(); ++i) {
+      for (i = data_.begin(); i != data_.end(); ++i) {
        const base::FilePath& entry = i->first;
        if (base::FilePath::CompareEqualIgnoreCase(entry.value(),
                                                   normalized_path.value())) {
@@ -156,7 +193,7 @@ bool ComputedHashes::GetHashes(const base::FilePath& relative_path,
  };
  auto i = find_data(path);
 #if defined(OS_WIN)
-  if (i == data.end()) {
+  if (i == data_.end()) {
    base::FilePath::StringType trimmed_path_value;
    // Also search for path with (.| )+ suffix trimmed as they are ignored in
    // windows. This matches the canonicalization behavior of
@@ -167,7 +204,7 @@ bool ComputedHashes::GetHashes(const base::FilePath& relative_path,
    }
  }
 #endif  // defined(OS_WIN)
-  if (i == data.end())
+  if (i == data_.end())
    return false;

  const HashInfo& info = i->second;
@@ -177,8 +214,12 @@ bool ComputedHashes::GetHashes(const base::FilePath& relative_path,
 }

 bool ComputedHashes::WriteToFile(const base::FilePath& path) const {
+  // Make sure the directory exists.
+  if (!base::CreateDirectoryAndGetError(path.DirName(), nullptr))
+    return false;
+
  base::Value file_list(base::Value::Type::LIST);
-  for (const auto& resource_info : data_.data_) {
+  for (const auto& resource_info : data_) {
    const base::FilePath& relative_path = resource_info.first;
    int block_size = resource_info.second.first;
    const std::vector<std::string>& hashes = resource_info.second.second;
@@ -249,4 +290,23 @@ std::vector<std::string> ComputedHashes::GetHashesForContent(
  return hashes;
 }

+// static
+base::Optional<std::vector<std::string>>
+ComputedHashes::ComputeAndCheckResourceHash(
+    const base::FilePath& full_path,
+    const base::FilePath& relative_unix_path,
+    int block_size) {
+  std::string contents;
+  if (!base::ReadFileToString(full_path, &contents)) {
+    LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII();
+    return base::nullopt;
+  }
+
+  // Iterate through taking the hash of each block of size |block_size| of the
+  // file.
+  std::vector<std::string> hashes = GetHashesForContent(contents, block_size);
+
+  return base::make_optional(std::move(hashes));
+}
+
 }  // namespace extensions
--- a/extensions/browser/computed_hashes.h
+++ b/extensions/browser/computed_hashes.h
@@ -12,6 +12,7 @@
 #include <string>
 #include <vector>

+#include "base/callback.h"
 #include "base/optional.h"

 namespace base {
@@ -20,35 +21,16 @@ class FilePath;

 namespace extensions {

+using IsCancelledCallback = base::RepeatingCallback<bool(void)>;
+using ShouldComputeHashesCallback =
+    base::RepeatingCallback<bool(const base::FilePath& relative_path)>;
+
 // A class for storage and serialization of a set of SHA256 block hashes
 // computed over the files inside an extension.
 class ComputedHashes {
 public:
  using HashInfo = std::pair<int, std::vector<std::string>>;
-
-  // While |ComputedHashes| itself is a read-only view for the hashes, this is a
-  // subclass for modifying (eg. while computing hashes for the first time).
-  class Data {
-   public:
-    Data();
-    ~Data();
-    Data(const Data&) = delete;
-    Data& operator=(const Data&) = delete;
-    Data(Data&&);
-    Data& operator=(Data&&);
-
-    // Adds hashes for |relative_path|. Should not be called more than once for
-    // a given |relative_path|.
-    void AddHashes(const base::FilePath& relative_path,
-                   int block_size,
-                   std::vector<std::string> hashes);
-
-   private:
-    // Map of relative path to hash info (block size, hashes).
-    std::map<base::FilePath, HashInfo> data_;
-
-    friend class ComputedHashes;
-  };
+  using Data = std::map<base::FilePath, HashInfo>;

  explicit ComputedHashes(Data&& data);
  ComputedHashes(const ComputedHashes&) = delete;
@@ -62,6 +44,19 @@ class ComputedHashes {
  static base::Optional<ComputedHashes> CreateFromFile(
      const base::FilePath& path);

+  // Computes hashes for files in |extension_root|. Returns nullopt upon any
+  // failure. Callback |should_compute_hashes_for| is used to determine whether
+  // we need hashes for a resource or not.
+  // TODO(https://crbug.com/796395#c24) To support per-file block size instead
+  // of passing |block_size| as an argument make callback
+  // |should_compute_hashes_for| return optional<int>: nullopt if hashes are not
+  // needed for this file, block size for this file otherwise.
+  static base::Optional<ComputedHashes::Data> Compute(
+      const base::FilePath& extension_root,
+      int block_size,
+      const IsCancelledCallback& is_cancelled,
+      const ShouldComputeHashesCallback& should_compute_hashes_for_resource);
+
  // Saves computed hashes to given file, returns false upon any failure (and
  // true on success).
  bool WriteToFile(const base::FilePath& path) const;
@@ -79,6 +74,14 @@ class ComputedHashes {
      size_t block_size);

 private:
+  // Builds hashes for one resource and checks them against
+  // verified_contents.json if needed. Returns nullopt if nothing should be
+  // added to computed_hashes.json for this resource.
+  static base::Optional<std::vector<std::string>> ComputeAndCheckResourceHash(
+      const base::FilePath& full_path,
+      const base::FilePath& relative_unix_path,
+      int block_size);
+
  Data data_;
 };


--- a/extensions/browser/computed_hashes_unittest.cc
+++ b/extensions/browser/computed_hashes_unittest.cc
@@ -46,8 +46,10 @@ testing::AssertionResult WriteThenReadComputedHashes(
  base::FilePath computed_hashes_path =
      scoped_dir.GetPath().AppendASCII("computed_hashes.json");
  extensions::ComputedHashes::Data computed_hashes_data;
-  for (const auto& info : hash_infos)
-    computed_hashes_data.AddHashes(info.path, info.block_size, info.hashes);
+  for (const auto& info : hash_infos) {
+    computed_hashes_data[info.path] =
+        extensions::ComputedHashes::HashInfo(info.block_size, info.hashes);
+  }

  if (!extensions::ComputedHashes(std::move(computed_hashes_data))
           .WriteToFile(computed_hashes_path)) {

--- a/extensions/browser/content_verifier/content_hash.cc
+++ b/extensions/browser/content_verifier/content_hash.cc
@@ -4,8 +4,10 @@

 #include "extensions/browser/content_verifier/content_hash.h"

+#include <set>
+
 #include "base/bind.h"
-#include "base/files/file_enumerator.h"
+#include "base/files/file_path.h"
 #include "base/files/file_util.h"
 #include "base/json/json_reader.h"
 #include "base/metrics/histogram_macros.h"
@@ -26,8 +28,6 @@ namespace extensions {

 namespace {

-using SortedFilePathSet = std::set<base::FilePath>;
-
 bool CreateDirAndWriteFile(const base::FilePath& destination,
                           const std::string& content) {
  DCHECK(GetExtensionFileTaskRunner()->RunsTasksInCurrentSequence());
@@ -194,10 +194,9 @@ void ContentHash::GetVerifiedContents(
 }

 // static
-void ContentHash::FetchVerifiedContents(
-    ContentHash::FetchKey key,
-    const ContentHash::IsCancelledCallback& is_cancelled,
-    GetVerifiedContentsCallback callback) {
+void ContentHash::FetchVerifiedContents(ContentHash::FetchKey key,
+                                        const IsCancelledCallback& is_cancelled,
+                                        GetVerifiedContentsCallback callback) {
  // |fetcher| deletes itself when it's done.
  internals::ContentHashFetcher* fetcher =
      new internals::ContentHashFetcher(std::move(key));
@@ -310,88 +309,70 @@ void ContentHash::RecordFetchResult(bool success) {
  UMA_HISTOGRAM_BOOLEAN("Extensions.ContentVerification.FetchResult", success);
 }

-base::Optional<std::vector<std::string>>
-ContentHash::ComputeAndCheckResourceHash(
-    const base::FilePath& full_path,
+bool ContentHash::ShouldComputeHashesForResource(
    const base::FilePath& relative_unix_path) {
-  DCHECK(source_type_ !=
-             ContentVerifierDelegate::VerifierSourceType::SIGNED_HASHES ||
-         verified_contents_);
-
-  if (source_type_ ==
-          ContentVerifierDelegate::VerifierSourceType::SIGNED_HASHES &&
-      !verified_contents_->HasTreeHashRoot(relative_unix_path)) {
-    return base::nullopt;
-  }
-
-  std::string contents;
-  if (!base::ReadFileToString(full_path, &contents)) {
-    LOG(ERROR) << "Could not read " << full_path.MaybeAsASCII();
-    return base::nullopt;
+  if (source_type_ !=
+      ContentVerifierDelegate::VerifierSourceType::SIGNED_HASHES) {
+    return true;
  }
+  DCHECK(verified_contents_);
+  return verified_contents_->HasTreeHashRoot(relative_unix_path);
+}

-  // Iterate through taking the hash of each block of size (block_size_) of
-  // the file.
-  std::vector<std::string> hashes =
-      ComputedHashes::GetHashesForContent(contents, block_size_);
+std::set<base::FilePath> ContentHash::GetMismatchedComputedHashes(
+    ComputedHashes::Data* computed_hashes_data) {
+  DCHECK(computed_hashes_data);
  if (source_type_ !=
      ContentVerifierDelegate::VerifierSourceType::SIGNED_HASHES) {
-    return base::make_optional(std::move(hashes));
+    return std::set<base::FilePath>();
  }

-  std::string root =
-      ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length);
-  if (!verified_contents_->TreeHashRootEquals(relative_unix_path, root)) {
-    VLOG(1) << "content mismatch for " << relative_unix_path.AsUTF8Unsafe();
-    hash_mismatch_unix_paths_.insert(relative_unix_path);
-    return base::nullopt;
+  std::set<base::FilePath> mismatched_hashes;
+
+  for (const auto& resource_info : *computed_hashes_data) {
+    const base::FilePath& relative_unix_path = resource_info.first;
+    const std::vector<std::string>& hashes = resource_info.second.second;
+
+    std::string root =
+        ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length);
+    if (!verified_contents_->TreeHashRootEquals(relative_unix_path, root))
+      mismatched_hashes.insert(relative_unix_path);
  }

-  return base::make_optional(std::move(hashes));
+  return mismatched_hashes;
 }

 bool ContentHash::CreateHashes(const base::FilePath& hashes_file,
                               const IsCancelledCallback& is_cancelled) {
+  DCHECK_EQ(ContentVerifierDelegate::VerifierSourceType::SIGNED_HASHES,
+            source_type_);
  base::ElapsedTimer timer;
  did_attempt_creating_computed_hashes_ = true;
-  // Make sure the directory exists.
-  if (!base::CreateDirectoryAndGetError(hashes_file.DirName(), nullptr))
-    return false;

-  base::FileEnumerator enumerator(extension_root_, true, /* recursive */
-                                  base::FileEnumerator::FILES);
-  // First discover all the file paths and put them in a sorted set.
-  SortedFilePathSet paths;
-  for (;;) {
-    if (is_cancelled && is_cancelled.Run())
-      return false;
-
-    base::FilePath full_path = enumerator.Next();
-    if (full_path.empty())
-      break;
-    paths.insert(full_path);
+  base::Optional<ComputedHashes::Data> computed_hashes_data =
+      ComputedHashes::Compute(
+          extension_root_, block_size_, is_cancelled,
+          // Using base::Unretained is safe here as
+          // ShouldComputeHashesForResource is only called synchronously from
+          // ComputedHashes::Compute.
+          base::BindRepeating(&ContentHash::ShouldComputeHashesForResource,
+                              base::Unretained(this)));
+
+  if (computed_hashes_data) {
+    std::set<base::FilePath> hashes_mismatch =
+        GetMismatchedComputedHashes(&computed_hashes_data.value());
+    for (const auto& relative_unix_path : hashes_mismatch) {
+      VLOG(1) << "content mismatch for " << relative_unix_path.AsUTF8Unsafe();
+      // Remove hash entry to keep computed_hashes.json file clear of mismatched
+      // hashes.
+      computed_hashes_data->erase(relative_unix_path);
+    }
+    hash_mismatch_unix_paths_ = std::move(hashes_mismatch);
  }

-  // Now iterate over all the paths in sorted order and compute the block hashes
-  // for each one.
-  ComputedHashes::Data computed_hashes_data;
-  for (auto i = paths.begin(); i != paths.end(); ++i) {
-    if (is_cancelled && is_cancelled.Run())
-      return false;
-
-    const base::FilePath& full_path = *i;
-    base::FilePath relative_unix_path;
-    extension_root_.AppendRelativePath(full_path, &relative_unix_path);
-    relative_unix_path = relative_unix_path.NormalizePathSeparatorsTo('/');
-
-    base::Optional<std::vector<std::string>> hashes =
-        ComputeAndCheckResourceHash(full_path, relative_unix_path);
-    if (hashes)
-      computed_hashes_data.AddHashes(relative_unix_path, block_size_,
-                                     std::move(hashes.value()));
-  }
-  bool result =
-      ComputedHashes(std::move(computed_hashes_data)).WriteToFile(hashes_file);
+  bool result = computed_hashes_data &&
+                ComputedHashes(std::move(computed_hashes_data.value()))
+                    .WriteToFile(hashes_file);
  UMA_HISTOGRAM_TIMES("ExtensionContentHashFetcher.CreateHashesTime",
                      timer.Elapsed());


--- a/extensions/browser/content_verifier/content_hash.h
+++ b/extensions/browser/content_verifier/content_hash.h
@@ -94,8 +94,6 @@ class ContentHash : public base::RefCountedThreadSafe<ContentHash> {
    HASH_MISMATCH
  };

-  using IsCancelledCallback = base::RepeatingCallback<bool(void)>;
-
  // Factory:
  // Returns ContentHash through |created_callback|, the returned values are:
  //   - |hash| The content hash. This will never be nullptr, but
@@ -209,19 +207,22 @@ class ContentHash : public base::RefCountedThreadSafe<ContentHash> {
  bool CreateHashes(const base::FilePath& hashes_file,
                    const IsCancelledCallback& is_cancelled);

-  // Builds hashes for one resource and checks them against
-  // verified_contents.json if needed. Returns nullopt if nothing should be
-  // added to computed_hashes.json for this resource.
-  base::Optional<std::vector<std::string>> ComputeAndCheckResourceHash(
-      const base::FilePath& full_path,
-      const base::FilePath& relative_unix_path);
-
  // Builds computed_hashes. Possibly after creating computed_hashes.json file
  // if necessary.
  void BuildComputedHashes(bool did_fetch_verified_contents,
                           bool force_build,
                           const IsCancelledCallback& is_cancelled);

+  // Helper callback for ComputedHashes::Create. Checks whether we want the hash
+  // of the given resource be in computes_hashes.json or not.
+  bool ShouldComputeHashesForResource(const base::FilePath& relative_unix_path);
+
+  // If needed (|source_type_| is SIGNED_HASHES) checks each hash from
+  // |computed_hashes| against data from verified_contenst.json and returns list
+  // of mismatches. If not needed, just returns empty list.
+  std::set<base::FilePath> GetMismatchedComputedHashes(
+      ComputedHashes::Data* computed_hashes);
+
  bool has_verified_contents() const { return verified_contents_ != nullptr; }

  const ExtensionId extension_id_;

--- a/extensions/browser/content_verifier/content_hash_unittest.cc
+++ b/extensions/browser/content_verifier/content_hash_unittest.cc
@@ -59,8 +59,8 @@ class TestExtensionBuilder {
    for (const auto& resource : extension_resources_) {
      std::vector<std::string> hashes =
          ComputedHashes::GetHashesForContent(resource.contents, block_size);
-      computed_hashes_data.AddHashes(resource.relative_path, block_size,
-                                     hashes);
+      computed_hashes_data[resource.relative_path] =
+          ComputedHashes::HashInfo(block_size, hashes);
    }

    ASSERT_TRUE(ComputedHashes(std::move(computed_hashes_data))

--- a/extensions/browser/content_verifier/test_utils.cc
+++ b/extensions/browser/content_verifier/test_utils.cc
@@ -259,8 +259,7 @@ void ContentHashWaiter::CreatedCallback(scoped_refptr<ContentHash> content_hash,
 void ContentHashWaiter::CreateContentHash(
    ContentHash::FetchKey key,
    ContentVerifierDelegate::VerifierSourceType source_type) {
-  ContentHash::Create(std::move(key), source_type,
-                      ContentHash::IsCancelledCallback(),
+  ContentHash::Create(std::move(key), source_type, IsCancelledCallback(),
                      base::BindOnce(&ContentHashWaiter::CreatedCallback,
                                     base::Unretained(this)));
 }

--- a/extensions/browser/content_verify_job_unittest.cc
+++ b/extensions/browser/content_verify_job_unittest.cc
@@ -76,7 +76,8 @@ void WriteComputedHashes(
  for (const auto& resource : contents) {
    std::vector<std::string> hashes =
        ComputedHashes::GetHashesForContent(resource.second, block_size);
-    computed_hashes_data.AddHashes(resource.first, block_size, hashes);
+    computed_hashes_data[resource.first] =
+        ComputedHashes::HashInfo(block_size, hashes);
  }

  base::CreateDirectory(extension_root.Append(kMetadataFolder));
@@ -364,7 +365,8 @@ void WriteIncorrectComputedHashes(const base::FilePath& extension_path,
  const std::string kFakeContents = "fake contents";
  std::vector<std::string> hashes =
      ComputedHashes::GetHashesForContent(kFakeContents, block_size);
-  incorrect_computed_hashes_data.AddHashes(resource_path, block_size, hashes);
+  incorrect_computed_hashes_data[resource_path] =
+      ComputedHashes::HashInfo(block_size, hashes);

  ASSERT_TRUE(
      ComputedHashes(std::move(incorrect_computed_hashes_data))