Add FilePath canonicalization logic to ComputedHashes.

Move ComputedHashes's underlying data into its own class ComputedHashes::Data, that wraps the canonicalization logic in it. Adding items to Data would canonicalize paths, obviating the need for a) the linear search in case-insensitive OS. b) additional dot-space trimming treatment on windows. Bug: 796395 Change-Id: I1517f6daa8253562b731cf62a5c54fb95f2bd10e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2003261 Commit-Queue: Istiaque Ahmed <lazyboy@chromium.org> Reviewed-by: Oleg Davydov <burunduk@chromium.org> Cr-Commit-Position: refs/heads/master@{#733821}

Add FilePath canonicalization logic to ComputedHashes.
Move ComputedHashes's underlying data into its own class ComputedHashes::Data, that wraps the canonicalization logic in it. Adding items to Data would canonicalize paths, obviating the need for a) the linear search in case-insensitive OS. b) additional dot-space trimming treatment on windows. Bug: 796395 Change-Id: I1517f6daa8253562b731cf62a5c54fb95f2bd10e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2003261 Commit-Queue: Istiaque Ahmed <lazyboy@chromium.org> Reviewed-by: Oleg Davydov <burunduk@chromium.org> Cr-Commit-Position: refs/heads/master@{#733821}
e8bdcf9d · Istiaque Ahmed · Commit Bot · 01e754b5 · e8bdcf9d · e8bdcf9d
Commit e8bdcf9d authored Jan 22, 2020 by Istiaque Ahmed Committed by Commit Bot Jan 22, 2020
10 changed files
--- a/extensions/browser/computed_hashes.cc
+++ b/extensions/browser/computed_hashes.cc
@@ -45,6 +45,54 @@ const char kUMAComputedHashesInitTime[] =
 }  // namespace
+ComputedHashes::Data::Data() = default;
+ComputedHashes::Data::~Data() = default;
+ComputedHashes::Data::Data(ComputedHashes::Data&& data) = default;
+ComputedHashes::Data& ComputedHashes::Data::operator=(
+    ComputedHashes::Data&& data) = default;
+ComputedHashes::Data::HashInfo::HashInfo(int block_size,
+                                         std::vector<std::string> hashes,
+                                         base::FilePath relative_unix_path)
+    : block_size(block_size),
+      hashes(std::move(hashes)),
+      relative_unix_path(std::move(relative_unix_path)) {}
+ComputedHashes::Data::HashInfo::~HashInfo() = default;
+ComputedHashes::Data::HashInfo::HashInfo(ComputedHashes::Data::HashInfo&&) =
+    default;
+ComputedHashes::Data::HashInfo& ComputedHashes::Data::HashInfo::operator=(
+    ComputedHashes::Data::HashInfo&&) = default;
+const ComputedHashes::Data::HashInfo* ComputedHashes::Data::GetItem(
+    const base::FilePath& relative_path) const {
+  base::FilePath::StringType canonicalized_path =
+      content_verifier_utils::CanonicalizeRelativePath(relative_path);
+  auto iter = items_.find(canonicalized_path);
+  return iter == items_.end() ? nullptr : &iter->second;
+}
+void ComputedHashes::Data::Add(const base::FilePath& relative_path,
+                               int block_size,
+                               std::vector<std::string> hashes) {
+  base::FilePath::StringType canonicalized_path =
+      content_verifier_utils::CanonicalizeRelativePath(relative_path);
+  items_.insert(
+      std::make_pair(canonicalized_path,
+                     HashInfo(block_size, std::move(hashes),
+                              relative_path.NormalizePathSeparatorsTo('/'))));
+}
+void ComputedHashes::Data::Remove(const base::FilePath& relative_path) {
+  base::FilePath::StringType canonicalized_path =
+      content_verifier_utils::CanonicalizeRelativePath(relative_path);
+  items_.erase(canonicalized_path);
+}
+const std::map<base::FilePath::StringType, ComputedHashes::Data::HashInfo>&
+ComputedHashes::Data::items() const {
+  return items_;
+}
 ComputedHashes::ComputedHashes(Data&& data) : data_(std::move(data)) {}
 ComputedHashes::~ComputedHashes() = default;
@@ -102,8 +150,6 @@ base::Optional<ComputedHashes> ComputedHashes::CreateFromFile(
    base::FilePath relative_path =
        base::FilePath::FromUTF8Unsafe(*relative_path_utf8);
-    relative_path = relative_path.NormalizePathSeparatorsTo('/');
    std::vector<std::string> hashes;
    for (const base::Value& value : block_hashes->GetList()) {
@@ -116,7 +162,7 @@ base::Optional<ComputedHashes> ComputedHashes::CreateFromFile(
      if (!base::Base64Decode(encoded, decoded))
        return base::nullopt;
    }
-    data[relative_path] = HashInfo(*block_size, std::move(hashes));
+    data.Add(relative_path, *block_size, std::move(hashes));
  }
  uma_recorder.RecordSuccess();
  return ComputedHashes(std::move(data));
@@ -144,23 +190,21 @@ base::Optional<ComputedHashes::Data> ComputedHashes::Compute(
  // Now iterate over all the paths in sorted order and compute the block hashes
  // for each one.
-  std::map<base::FilePath, HashInfo> data;
+  Data data;
  for (const auto& full_path : paths) {
    if (is_cancelled && is_cancelled.Run())
      return base::nullopt;
-    base::FilePath relative_unix_path;
+    base::FilePath relative_path;
-    extension_root.AppendRelativePath(full_path, &relative_unix_path);
+    extension_root.AppendRelativePath(full_path, &relative_path);
-    relative_unix_path = relative_unix_path.NormalizePathSeparatorsTo('/');
-    if (!should_compute_hashes_for_resource.Run(relative_unix_path))
+    if (!should_compute_hashes_for_resource.Run(relative_path))
      continue;
    base::Optional<std::vector<std::string>> hashes =
-        ComputeAndCheckResourceHash(full_path, relative_unix_path, block_size);
+        ComputeAndCheckResourceHash(full_path, block_size);
    if (hashes)
-      data[relative_unix_path] =
+      data.Add(relative_path, block_size, std::move(hashes.value()));
-          HashInfo(block_size, std::move(hashes.value()));
  }
  return data;
@@ -169,50 +213,12 @@ base::Optional<ComputedHashes::Data> ComputedHashes::Compute(
 bool ComputedHashes::GetHashes(const base::FilePath& relative_path,
                               int* block_size,
                               std::vector<std::string>* hashes) const {
-  base::FilePath path = relative_path.NormalizePathSeparatorsTo('/');
+  const Data::HashInfo* hash_info = data_.GetItem(relative_path);
-  // TODO(lazyboy): Align treatment of |data_| with that of
+  if (!hash_info)
-  // VerifiedContents::root_hashes_, so that we don't have to perform the linear
-  // lookup below.
-  auto find_data = [&](const base::FilePath& normalized_path) {
-    auto i = data_.find(normalized_path);
-    if (i == data_.end() &&
-        !content_verifier_utils::IsFileAccessCaseSensitive()) {
-      // If we didn't find the entry using exact match, it's possible the
-      // developer is using a path with some letters in the incorrect case,
-      // which happens to work on windows/osx. So try doing a linear scan to
-      // look for a case-insensitive match. In practice most extensions don't
-      // have that big a list of files so the performance penalty is probably
-      // not too big here. Also for crbug.com/29941 we plan to start warning
-      // developers when they are making this mistake, since their extension
-      // will be broken on linux/chromeos.
-      for (i = data_.begin(); i != data_.end(); ++i) {
-        const base::FilePath& entry = i->first;
-        if (base::FilePath::CompareEqualIgnoreCase(entry.value(),
-                                                   normalized_path.value())) {
-          break;
-        }
-      }
-    }
-    return i;
-  };
-  auto i = find_data(path);
-  if (i == data_.end() &&
-      content_verifier_utils::IsDotSpaceFilenameSuffixIgnored()) {
-    base::FilePath::StringType trimmed_path_value;
-    // Also search for path with (.| )+ suffix trimmed as they are ignored in
-    // windows. This matches the canonicalization behavior of
-    // VerifiedContents::Create.
-    if (content_verifier_utils::TrimDotSpaceSuffix(path.value(),
-                                                   &trimmed_path_value)) {
-      i = find_data(base::FilePath(trimmed_path_value));
-    }
-  }
-  if (i == data_.end())
    return false;
-  const HashInfo& info = i->second;
+  *block_size = hash_info->block_size;
-  *block_size = info.first;
+  *hashes = hash_info->hashes;
-  *hashes = info.second;
  return true;
 }
@@ -222,10 +228,10 @@ bool ComputedHashes::WriteToFile(const base::FilePath& path) const {
    return false;
  base::Value file_list(base::Value::Type::LIST);
-  for (const auto& resource_info : data_) {
+  for (const auto& resource_info : data_.items()) {
-    const base::FilePath& relative_path = resource_info.first;
+    const Data::HashInfo& hash_info = resource_info.second;
-    int block_size = resource_info.second.first;
+    int block_size = hash_info.block_size;
-    const std::vector<std::string>& hashes = resource_info.second.second;
+    const std::vector<std::string>& hashes = hash_info.hashes;
    base::Value::ListStorage block_hashes;
    block_hashes.reserve(hashes.size());
@@ -236,9 +242,8 @@ bool ComputedHashes::WriteToFile(const base::FilePath& path) const {
    }
    base::Value dict(base::Value::Type::DICTIONARY);
-    dict.SetStringKey(
+    dict.SetStringKey(computed_hashes::kPathKey,
-        computed_hashes::kPathKey,
+                      hash_info.relative_unix_path.AsUTF8Unsafe());
-        relative_path.NormalizePathSeparatorsTo('/').AsUTF8Unsafe());
    dict.SetIntKey(computed_hashes::kBlockSizeKey, block_size);
    dict.SetKey(computed_hashes::kBlockHashesKey,
                base::Value(std::move(block_hashes)));
@@ -298,7 +303,6 @@ std::vector<std::string> ComputedHashes::GetHashesForContent(
 base::Optional<std::vector<std::string>>
 ComputedHashes::ComputeAndCheckResourceHash(
    const base::FilePath& full_path,
-    const base::FilePath& relative_unix_path,
    int block_size) {
  std::string contents;
  if (!base::ReadFileToString(full_path, &contents)) {

--- a/extensions/browser/computed_hashes.h
+++ b/extensions/browser/computed_hashes.h
@@ -13,12 +13,9 @@
 #include <vector>
 #include "base/callback.h"
+#include "base/files/file_path.h"
 #include "base/optional.h"
-namespace base {
-class FilePath;
-}
 namespace extensions {
 using IsCancelledCallback = base::RepeatingCallback<bool(void)>;
@@ -29,8 +26,60 @@ using ShouldComputeHashesCallback =
 // computed over the files inside an extension.
 class ComputedHashes {
 public:
-  using HashInfo = std::pair<int, std::vector<std::string>>;
+  // Hashes data for relative paths.
-  using Data = std::map<base::FilePath, HashInfo>;
+  // System specific path canonicalization is taken care of inside this class.
+  class Data {
+   public:
+    struct HashInfo {
+      int block_size;
+      std::vector<std::string> hashes;
+      // The relative unix style path.
+      // Note that we use canonicalized paths as keys to HashInfo's container
+      // |items_|.
+      //
+      // TODO(http://crbug.com/796395#c28): Consider removing this once
+      // ContentVerifier::ShouldVerifyAnyPaths works with canonicalized relative
+      // paths.
+      base::FilePath relative_unix_path;
+      HashInfo(int block_size,
+               std::vector<std::string> hashes,
+               base::FilePath relative_unix_path);
+      ~HashInfo();
+      HashInfo(const HashInfo&) = delete;
+      HashInfo& operator=(const HashInfo&) = delete;
+      HashInfo(HashInfo&&);
+      HashInfo& operator=(HashInfo&&);
+    };
+    using Items = std::map<base::FilePath::StringType, HashInfo>;
+    Data();
+    ~Data();
+    Data(const Data&) = delete;
+    Data& operator=(const Data&) = delete;
+    Data(Data&&);
+    Data& operator=(Data&&);
+    // For |relative_path|, adds hash information with |block_size| and
+    // |hashes|.
+    // Note that |relative_path| will be canonicalized.
+    void Add(const base::FilePath& relative_path,
+             int block_size,
+             std::vector<std::string> hashes);
+    // Removes the item that corresponds to |relative_path|.
+    void Remove(const base::FilePath& relative_path);
+    // Returns HashInfo* for |relative_path| or nullptr if not found.
+    const HashInfo* GetItem(const base::FilePath& relative_path) const;
+    const Items& items() const;
+   private:
+    // All items, stored by canonicalized FilePath::StringType key.
+    Items items_;
+  };
  explicit ComputedHashes(Data&& data);
  ComputedHashes(const ComputedHashes&) = delete;
@@ -79,7 +128,6 @@ class ComputedHashes {
  // added to computed_hashes.json for this resource.
  static base::Optional<std::vector<std::string>> ComputeAndCheckResourceHash(
      const base::FilePath& full_path,
-      const base::FilePath& relative_unix_path,
      int block_size);
  Data data_;

--- a/extensions/browser/computed_hashes_unittest.cc
+++ b/extensions/browser/computed_hashes_unittest.cc
@@ -43,10 +43,8 @@ testing::AssertionResult WriteThenReadComputedHashes(
  base::FilePath computed_hashes_path =
      scoped_dir.GetPath().AppendASCII("computed_hashes.json");
  extensions::ComputedHashes::Data computed_hashes_data;
-  for (const auto& info : hash_infos) {
+  for (const auto& info : hash_infos)
-    computed_hashes_data[info.path] =
+    computed_hashes_data.Add(info.path, info.block_size, info.hashes);
-        extensions::ComputedHashes::HashInfo(info.block_size, info.hashes);
-  }
  if (!extensions::ComputedHashes(std::move(computed_hashes_data))
           .WriteToFile(computed_hashes_path)) {

--- a/extensions/browser/content_verifier.h
+++ b/extensions/browser/content_verifier.h
@@ -39,6 +39,25 @@ class Extension;
 // Used for managing overall content verification - both fetching content
 // hashes as needed, and supplying job objects to verify file contents as they
 // are read.
+//
+// Some notes about extension resource paths:
+// An extension resource path is a path relative to it's extension root
+// directory. For the purposes of content verification system, there can be
+// several transformations of the relative path:
+//   1. Relative path: Relative path as is. This is base::FilePath that simply
+//      is the relative path of the resource.
+//   2. Relative unix path: Some underlying parts of content-verification
+//      require uniform separator, we use '/' as separator so it is effectively
+//      unix style. Note that this is a reversible transformation.
+//   3. Canonicalized relative_path: Canonicalized paths are used as keys of
+//      maps within VerifiedContents and ComputedHashes. This takes care of OS
+//      specific file access issues:
+//      - windows/mac is case insensitive while accessing files.
+//      - windows ignores (.| )+ suffixes in filename while accessing a file.
+//      Canonicalization consists of normalizing the separators, lower casing
+//      the filepath in case-insensitive systems and trimming ignored suffixes
+//      if appropriate.
+//      See content_verifier_utils::CanonicalizeRelativePath() for details.
 class ContentVerifier : public base::RefCountedThreadSafe<ContentVerifier>,
                        public ExtensionRegistryObserver {
 public:

--- a/extensions/browser/content_verifier/content_hash.cc
+++ b/extensions/browser/content_verifier/content_hash.cc
@@ -329,14 +329,15 @@ std::set<base::FilePath> ContentHash::GetMismatchedComputedHashes(
  std::set<base::FilePath> mismatched_hashes;
-  for (const auto& resource_info : *computed_hashes_data) {
+  for (const auto& resource_info : computed_hashes_data->items()) {
-    const base::FilePath& relative_unix_path = resource_info.first;
+    const ComputedHashes::Data::HashInfo& hash_info = resource_info.second;
-    const std::vector<std::string>& hashes = resource_info.second.second;
+    std::string root = ComputeTreeHashRoot(hash_info.hashes,
-    std::string root =
+                                           block_size_ / crypto::kSHA256Length);
-        ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length);
+    if (!verified_contents_->TreeHashRootEquals(hash_info.relative_unix_path,
-    if (!verified_contents_->TreeHashRootEquals(relative_unix_path, root))
+                                                root)) {
-      mismatched_hashes.insert(relative_unix_path);
+      mismatched_hashes.insert(hash_info.relative_unix_path);
+    }
  }
  return mismatched_hashes;
@@ -365,7 +366,7 @@ bool ContentHash::CreateHashes(const base::FilePath& hashes_file,
      VLOG(1) << "content mismatch for " << relative_unix_path.AsUTF8Unsafe();
      // Remove hash entry to keep computed_hashes.json file clear of mismatched
      // hashes.
-      computed_hashes_data->erase(relative_unix_path);
+      computed_hashes_data->Remove(relative_unix_path);
    }
    hash_mismatch_unix_paths_ = std::move(hashes_mismatch);
  }

--- a/extensions/browser/content_verifier/content_hash_unittest.cc
+++ b/extensions/browser/content_verifier/content_hash_unittest.cc
@@ -59,8 +59,7 @@ class TestExtensionBuilder {
    for (const auto& resource : extension_resources_) {
      std::vector<std::string> hashes =
          ComputedHashes::GetHashesForContent(resource.contents, block_size);
-      computed_hashes_data[resource.relative_path] =
+      computed_hashes_data.Add(resource.relative_path, block_size, hashes);
-          ComputedHashes::HashInfo(block_size, hashes);
    }
    ASSERT_TRUE(ComputedHashes(std::move(computed_hashes_data))

--- a/extensions/browser/content_verifier/content_verifier_utils.cc
+++ b/extensions/browser/content_verifier/content_verifier_utils.cc
@@ -22,9 +22,10 @@ bool TrimDotSpaceSuffix(const base::FilePath::StringType& path,
  return true;
 }
-base::FilePath::StringType CanonicalizeFilePath(const base::FilePath& path) {
+base::FilePath::StringType CanonicalizeRelativePath(
+    const base::FilePath& relative_path) {
  base::FilePath::StringType canonicalized_path =
-      path.NormalizePathSeparatorsTo('/').value();
+      relative_path.NormalizePathSeparatorsTo('/').value();
  if (!IsFileAccessCaseSensitive())
    canonicalized_path = base::ToLowerASCII(canonicalized_path);
  if (IsDotSpaceFilenameSuffixIgnored())

--- a/extensions/browser/content_verifier/content_verifier_utils.h
+++ b/extensions/browser/content_verifier/content_verifier_utils.h
@@ -37,9 +37,10 @@ constexpr bool IsDotSpaceFilenameSuffixIgnored() {
 #endif
 }
-// Returns platform specific canonicalized version of |path| for content
+// Returns platform specific canonicalized version of |relative_path| for
-// verification system.
+// content verification system.
-base::FilePath::StringType CanonicalizeFilePath(const base::FilePath& path);
+base::FilePath::StringType CanonicalizeRelativePath(
+    const base::FilePath& relative_path);
 }  // namespace content_verifier_utils
 }  // namespace extensions

--- a/extensions/browser/content_verify_job_unittest.cc
+++ b/extensions/browser/content_verify_job_unittest.cc
@@ -76,8 +76,7 @@ void WriteComputedHashes(
  for (const auto& resource : contents) {
    std::vector<std::string> hashes =
        ComputedHashes::GetHashesForContent(resource.second, block_size);
-    computed_hashes_data[resource.first] =
+    computed_hashes_data.Add(resource.first, block_size, std::move(hashes));
-        ComputedHashes::HashInfo(block_size, hashes);
  }
  base::CreateDirectory(extension_root.Append(kMetadataFolder));
@@ -365,8 +364,8 @@ void WriteIncorrectComputedHashes(const base::FilePath& extension_path,
  const std::string kFakeContents = "fake contents";
  std::vector<std::string> hashes =
      ComputedHashes::GetHashesForContent(kFakeContents, block_size);
-  incorrect_computed_hashes_data[resource_path] =
+  incorrect_computed_hashes_data.Add(resource_path, block_size,
-      ComputedHashes::HashInfo(block_size, hashes);
+                                     std::move(hashes));
  ASSERT_TRUE(
      ComputedHashes(std::move(incorrect_computed_hashes_data))

--- a/extensions/browser/verified_contents.cc
+++ b/extensions/browser/verified_contents.cc
@@ -172,7 +172,7 @@ std::unique_ptr<VerifiedContents> VerifiedContents::Create(
      }
      base::FilePath::StringType canonicalized_path =
-          content_verifier_utils::CanonicalizeFilePath(
+          content_verifier_utils::CanonicalizeRelativePath(
              base::FilePath::FromUTF8Unsafe(*file_path_string));
      auto i = verified_contents->root_hashes_.insert(
          std::make_pair(canonicalized_path, std::string()));
@@ -189,13 +189,14 @@ bool VerifiedContents::HasTreeHashRoot(
    const base::FilePath& relative_path) const {
  return base::Contains(
      root_hashes_,
-      content_verifier_utils::CanonicalizeFilePath(relative_path));
+      content_verifier_utils::CanonicalizeRelativePath(relative_path));
 }
 bool VerifiedContents::TreeHashRootEquals(const base::FilePath& relative_path,
                                          const std::string& expected) const {
  return TreeHashRootEqualsImpl(
-      content_verifier_utils::CanonicalizeFilePath(relative_path), expected);
+      content_verifier_utils::CanonicalizeRelativePath(relative_path),
+      expected);
 }
 // We're loosely following the "JSON Web Signature" draft spec for signing