Fix for 0-length file problem in extension content verification

The problem is that we were generating zero hashes when there were no input bytes, when we should have generated one. To make this fix a little easier to test, I moved the code for generating hashes of content into a utility funciton in computed_hashes.{h,cc}. Also, in order to fix things for any profiles that might have incorrect computed_hashes.json files, I changed the format of it to add the notion of a version. When the format or version isn't recognized, the code will return an error from the Init function, and then the code that uses it elsewhere in the content verification code will automatically try to recreate it. BUG=399251 Review URL: https://codereview.chromium.org/436563004 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@287740 0039d316-1c4b-4281-b951-d872f2087c98

Fix for 0-length file problem in extension content verification
The problem is that we were generating zero hashes when there were no input bytes, when we should have generated one. To make this fix a little easier to test, I moved the code for generating hashes of content into a utility funciton in computed_hashes.{h,cc}. Also, in order to fix things for any profiles that might have incorrect computed_hashes.json files, I changed the format of it to add the notion of a version. When the format or version isn't recognized, the code will return an error from the Init function, and then the code that uses it elsewhere in the content verification code will automatically try to recreate it. BUG=399251 Review URL: https://codereview.chromium.org/436563004 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@287740 0039d316-1c4b-4281-b951-d872f2087c98
de00aeb7 · asargent@chromium.org · 75e9e841 · de00aeb7 · de00aeb7 · de00aeb7
Commit de00aeb7 authored Aug 06, 2014 by asargent@chromium.org
4 changed files
--- a/extensions/browser/computed_hashes.cc
+++ b/extensions/browser/computed_hashes.cc
@@ -9,17 +9,25 @@
 #include "base/files/file_path.h"
 #include "base/json/json_reader.h"
 #include "base/json/json_writer.h"
+#include "base/stl_util.h"
+#include "base/values.h"
+#include "crypto/secure_hash.h"
+#include "crypto/sha2.h"

 namespace {
-const char kPathKey[] = "path";
-const char kBlockSizeKey[] = "block_size";
 const char kBlockHashesKey[] = "block_hashes";
-}
+const char kBlockSizeKey[] = "block_size";
+const char kFileHashesKey[] = "file_hashes";
+const char kPathKey[] = "path";
+const char kVersionKey[] = "version";
+const int kVersion = 2;
+}  // namespace

 namespace extensions {

 ComputedHashes::Reader::Reader() {
 }
+
 ComputedHashes::Reader::~Reader() {
 }

@@ -28,9 +36,19 @@ bool ComputedHashes::Reader::InitFromFile(const base::FilePath& path) {
  if (!base::ReadFileToString(path, &contents))
    return false;

-  base::ListValue* all_hashes = NULL;
+  base::DictionaryValue* top_dictionary = NULL;
  scoped_ptr<base::Value> value(base::JSONReader::Read(contents));
-  if (!value.get() || !value->GetAsList(&all_hashes))
+  if (!value.get() || !value->GetAsDictionary(&top_dictionary))
+    return false;
+
+  // For now we don't support forwards or backwards compatability in the
+  // format, so we return false on version mismatch.
+  int version = 0;
+  if (!top_dictionary->GetInteger(kVersionKey, &version) || version != kVersion)
+    return false;
+
+  base::ListValue* all_hashes = NULL;
+  if (!top_dictionary->GetList(kFileHashesKey, &all_hashes))
    return false;

  for (size_t i = 0; i < all_hashes->GetSize(); i++) {
@@ -91,8 +109,9 @@ bool ComputedHashes::Reader::GetHashes(const base::FilePath& relative_path,
  return true;
 }

-ComputedHashes::Writer::Writer() {
+ComputedHashes::Writer::Writer() : file_list_(new base::ListValue) {
 }
+
 ComputedHashes::Writer::~Writer() {
 }

@@ -101,7 +120,7 @@ void ComputedHashes::Writer::AddHashes(const base::FilePath& relative_path,
                                       const std::vector<std::string>& hashes) {
  base::DictionaryValue* dict = new base::DictionaryValue();
  base::ListValue* block_hashes = new base::ListValue();
-  file_list_.Append(dict);
+  file_list_->Append(dict);
  dict->SetString(kPathKey,
                  relative_path.NormalizePathSeparatorsTo('/').AsUTF8Unsafe());
  dict->SetInteger(kBlockSizeKey, block_size);
@@ -118,15 +137,46 @@ void ComputedHashes::Writer::AddHashes(const base::FilePath& relative_path,

 bool ComputedHashes::Writer::WriteToFile(const base::FilePath& path) {
  std::string json;
-  if (!base::JSONWriter::Write(&file_list_, &json))
+  base::DictionaryValue top_dictionary;
+  top_dictionary.SetInteger(kVersionKey, kVersion);
+  top_dictionary.Set(kFileHashesKey, file_list_.release());
+
+  if (!base::JSONWriter::Write(&top_dictionary, &json))
    return false;
  int written = base::WriteFile(path, json.data(), json.size());
  if (static_cast<unsigned>(written) != json.size()) {
-    LOG(ERROR) << "Error writing " << path.MaybeAsASCII()
+    LOG(ERROR) << "Error writing " << path.AsUTF8Unsafe()
               << " ; write result:" << written << " expected:" << json.size();
    return false;
  }
  return true;
 }

+void ComputedHashes::ComputeHashesForContent(const std::string& contents,
+                                             size_t block_size,
+                                             std::vector<std::string>* hashes) {
+  size_t offset = 0;
+  // Even when the contents is empty, we want to output at least one hash
+  // block (the hash of the empty string).
+  do {
+    const char* block_start = contents.data() + offset;
+    DCHECK(offset <= contents.size());
+    size_t bytes_to_read = std::min(contents.size() - offset, block_size);
+    scoped_ptr<crypto::SecureHash> hash(
+        crypto::SecureHash::Create(crypto::SecureHash::SHA256));
+    hash->Update(block_start, bytes_to_read);
+
+    hashes->push_back(std::string());
+    std::string* buffer = &(hashes->back());
+    buffer->resize(crypto::kSHA256Length);
+    hash->Finish(string_as_array(buffer), buffer->size());
+
+    // If |contents| is empty, then we want to just exit here.
+    if (bytes_to_read == 0)
+      break;
+
+    offset += bytes_to_read;
+  } while (offset < contents.size());
+}
+
 }  // namespace extensions
--- a/extensions/browser/computed_hashes.h
+++ b/extensions/browser/computed_hashes.h
@@ -9,10 +9,11 @@
 #include <string>
 #include <vector>

-#include "base/values.h"
+#include "base/memory/scoped_ptr.h"

 namespace base {
 class FilePath;
+class ListValue;
 }

 namespace extensions {
@@ -54,9 +55,16 @@ class ComputedHashes {
    bool WriteToFile(const base::FilePath& path);

   private:
-    // The top-level object that will be serialized as JSON.
-    base::ListValue file_list_;
+    // Each element of this list contains the path and block hashes for one
+    // file.
+    scoped_ptr<base::ListValue> file_list_;
  };
+
+  // Computes the SHA256 hash of each |block_size| chunk in |contents|, placing
+  // the results into |hashes|.
+  static void ComputeHashesForContent(const std::string& contents,
+                                      size_t block_size,
+                                      std::vector<std::string>* hashes);
 };

 }  // namespace extensions

--- a/extensions/browser/computed_hashes_unittest.cc
+++ b/extensions/browser/computed_hashes_unittest.cc
@@ -2,12 +2,24 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

+#include "base/base64.h"
 #include "base/files/file_path.h"
 #include "base/files/scoped_temp_dir.h"
 #include "crypto/sha2.h"
 #include "extensions/browser/computed_hashes.h"
 #include "testing/gtest/include/gtest/gtest.h"

+namespace {
+
+// Helper to return base64 encode result by value.
+std::string Base64Encode(const std::string& data) {
+  std::string result;
+  base::Base64Encode(data, &result);
+  return result;
+}
+
+}  // namespace
+
 namespace extensions {

 TEST(ComputedHashes, ComputedHashes) {
@@ -59,4 +71,47 @@ TEST(ComputedHashes, ComputedHashes) {
  EXPECT_EQ(hashes2, read_hashes2);
 }

+// Note: the expected hashes used in this test were generated using linux
+// command line tools. E.g., from a bash prompt:
+//  $ printf "hello world" | openssl dgst -sha256 -binary | base64
+//
+// The file with multiple-blocks expectations were generated by doing:
+// $ for i in `seq 500 ; do printf "hello world" ; done > hello.txt
+// $ dd if=hello.txt bs=4096 count=1 | openssl dgst -sha256 -binary | base64
+// $ dd if=hello.txt skip=1 bs=4096 count=1 | \
+//   openssl dgst -sha256 -binary | base64
+TEST(ComputedHashes, ComputeHashesForContent) {
+  const int block_size = 4096;
+
+  // Simple short input.
+  std::string content1 = "hello world";
+  std::string content1_expected_hash =
+      "uU0nuZNNPgilLlLX2n2r+sSE7+N6U4DukIj3rOLvzek=";
+  std::vector<std::string> hashes1;
+  ComputedHashes::ComputeHashesForContent(content1, block_size, &hashes1);
+  ASSERT_EQ(1u, hashes1.size());
+  EXPECT_EQ(content1_expected_hash, Base64Encode(hashes1[0]));
+
+  // Multiple blocks input.
+  std::string content2;
+  for (int i = 0; i < 500; i++)
+    content2 += "hello world";
+  const char* content2_expected_hashes[] = {
+      "bvtt5hXo8xvHrlzGAhhoqPL/r+4zJXHx+6wAvkv15V8=",
+      "lTD45F7P6I/HOdi8u7FLRA4qzAYL+7xSNVeusG6MJI0="};
+  std::vector<std::string> hashes2;
+  ComputedHashes::ComputeHashesForContent(content2, block_size, &hashes2);
+  ASSERT_EQ(2u, hashes2.size());
+  EXPECT_EQ(content2_expected_hashes[0], Base64Encode(hashes2[0]));
+  EXPECT_EQ(content2_expected_hashes[1], Base64Encode(hashes2[1]));
+
+  // Now an empty input.
+  std::string content3;
+  std::vector<std::string> hashes3;
+  ComputedHashes::ComputeHashesForContent(content3, block_size, &hashes3);
+  ASSERT_EQ(1u, hashes3.size());
+  ASSERT_EQ(std::string("47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="),
+            Base64Encode(hashes3[0]));
+}
+
 }  // namespace extensions
--- a/extensions/browser/content_hash_fetcher.cc
+++ b/extensions/browser/content_hash_fetcher.cc
@@ -12,14 +12,12 @@
 #include "base/json/json_reader.h"
 #include "base/memory/ref_counted.h"
 #include "base/metrics/histogram.h"
-#include "base/stl_util.h"
 #include "base/synchronization/lock.h"
 #include "base/task_runner_util.h"
 #include "base/timer/elapsed_timer.h"
 #include "base/version.h"
 #include "content/public/browser/browser_context.h"
 #include "content/public/browser/browser_thread.h"
-#include "crypto/secure_hash.h"
 #include "crypto/sha2.h"
 #include "extensions/browser/computed_hashes.h"
 #include "extensions/browser/content_hash_tree.h"
@@ -367,26 +365,7 @@ bool ContentHashFetcherJob::CreateHashes(const base::FilePath& hashes_file) {
    // Iterate through taking the hash of each block of size (block_size_) of
    // the file.
    std::vector<std::string> hashes;
-    size_t offset = 0;
-    while (offset < contents.size()) {
-      if (IsCancelled())
-        return false;
-      const char* block_start = contents.data() + offset;
-      size_t bytes_to_read =
-          std::min(contents.size() - offset, static_cast<size_t>(block_size_));
-      DCHECK(bytes_to_read > 0);
-      scoped_ptr<crypto::SecureHash> hash(
-          crypto::SecureHash::Create(crypto::SecureHash::SHA256));
-      hash->Update(block_start, bytes_to_read);
-
-      hashes.push_back(std::string());
-      std::string* buffer = &hashes.back();
-      buffer->resize(crypto::kSHA256Length);
-      hash->Finish(string_as_array(buffer), buffer->size());
-
-      // Get ready for next iteration.
-      offset += bytes_to_read;
-    }
+    ComputedHashes::ComputeHashesForContent(contents, block_size_, &hashes);
    std::string root =
        ComputeTreeHashRoot(hashes, block_size_ / crypto::kSHA256Length);
    if (expected_root && *expected_root != root) {