Remove the length field from the index entries in data pack files.

After the last index entry, we put an extra entry (id of 0, offset pointing to where the next entry would start) so we can compute the size of the last index entry. This saves 4 bytes per item in data pack files, which is about 10k per locale pak file. When compressed, this saves about 237k. BUG=76285 Review URL: http://codereview.chromium.org/7604012 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@96415 0039d316-1c4b-4281-b951-d872f2087c98

Remove the length field from the index entries in data pack files.
After the last index entry, we put an extra entry (id of 0, offset pointing to where the next entry would start) so we can compute the size of the last index entry. This saves 4 bytes per item in data pack files, which is about 10k per locale pak file. When compressed, this saves about 237k. BUG=76285 Review URL: http://codereview.chromium.org/7604012 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@96415 0039d316-1c4b-4281-b951-d872f2087c98
5078bb0c · tony@chromium.org · 81ac4a3f · 5078bb0c · 5078bb0c · 5078bb0c
Commit 5078bb0c authored Aug 11, 2011 by tony@chromium.org
7 changed files
--- a/chrome/browser/themes/browser_theme_pack.cc
+++ b/chrome/browser/themes/browser_theme_pack.cc
@@ -29,7 +29,7 @@ namespace {

 // Version number of the current theme pack. We just throw out and rebuild
 // theme packs that aren't int-equal to this.
-const int kThemePackVersion = 16;
+const int kThemePackVersion = 17;

 // IDs that are in the DataPack won't clash with the positive integer
 // uint16. kHeaderID should always have the maximum value because we want the

--- a/chrome/browser/themes/browser_theme_pack_unittest.cc
+++ b/chrome/browser/themes/browser_theme_pack_unittest.cc
@@ -393,7 +393,7 @@ TEST_F(BrowserThemePackTest, TestNonExistantImages) {
 TEST_F(BrowserThemePackTest, CanBuildAndReadPack) {
  ScopedTempDir dir;
  ASSERT_TRUE(dir.CreateUniqueTempDir());
-  FilePath file = dir.path().Append(FILE_PATH_LITERAL("data.pak"));
+  FilePath file = dir.path().AppendASCII("data.pak");

  // Part 1: Build the pack from an extension.
  {

--- a/tools/data_pack/data_pack.py
+++ b/tools/data_pack/data_pack.py
@@ -9,7 +9,7 @@ See base/pack_file* for details.

 import struct

-FILE_FORMAT_VERSION = 2
+FILE_FORMAT_VERSION = 3
 HEADER_LENGTH = 2 * 4  # Two uint32s. (file version and number of entries)

 class WrongFileVersion(Exception):
@@ -26,13 +26,17 @@ def ReadDataPack(input_file):
    raise WrongFileVersion

  resources = {}
+  if num_entries == 0:
+    return resources
+
  # Read the index and data.
  data = data[HEADER_LENGTH:]
-  kIndexEntrySize = 2 + 2 * 4  # Each entry is 1 uint16 and 2 uint32s.
+  kIndexEntrySize = 2 + 4  # Each entry is a uint16 and a uint32.
  for _ in range(num_entries):
-    id, offset, length = struct.unpack("<HII", data[:kIndexEntrySize])
+    id, offset = struct.unpack("<HI", data[:kIndexEntrySize])
    data = data[kIndexEntrySize:]
-    resources[id] = original_data[offset:offset + length]
+    next_id, next_offset = struct.unpack("<HI", data[:kIndexEntrySize])
+    resources[id] = original_data[offset:next_offset]

  return resources

@@ -44,15 +48,18 @@ def WriteDataPack(resources, output_file):
  # Write file header.
  file.write(struct.pack("<II", FILE_FORMAT_VERSION, len(ids)))

-  # Each entry is 1 uint16 and 2 uint32s.
-  index_length = len(ids) * (2 + 2 * 4)
+  # Each entry is a uint16 and a uint32. We have one extra entry for the last
+  # item.
+  index_length = (len(ids) + 1) * (2 + 4)

  # Write index.
  data_offset = HEADER_LENGTH + index_length
  for id in ids:
-    file.write(struct.pack("<HII", id, data_offset, len(resources[id])))
+    file.write(struct.pack("<HI", id, data_offset))
    data_offset += len(resources[id])

+  file.write(struct.pack("<HI", 0, data_offset))
+
  # Write data.
  for id in ids:
    file.write(resources[id])

--- a/tools/grit/grit/format/data_pack.py
+++ b/tools/grit/grit/format/data_pack.py
@@ -15,7 +15,7 @@ from grit.node import message
 from grit.node import misc


-PACK_FILE_VERSION = 2
+PACK_FILE_VERSION = 3


 class DataPack(interface.ItemFormatter):
@@ -60,15 +60,18 @@ class DataPack(interface.ItemFormatter):
    ret.append(struct.pack("<II", PACK_FILE_VERSION, len(ids)))
    HEADER_LENGTH = 2 * 4             # Two uint32s.

-    # Each entry is 1 uint16 + 2 uint32s.
-    index_length = len(ids) * (2 + 2 * 4)
+    # Each entry is a uint16 + a uint32s. We have one extra entry for the last
+    # item.
+    index_length = (len(ids) + 1) * (2 + 4)

    # Write index.
    data_offset = HEADER_LENGTH + index_length
    for id in ids:
-      ret.append(struct.pack("<HII", id, data_offset, len(resources[id])))
+      ret.append(struct.pack("<HI", id, data_offset))
      data_offset += len(resources[id])

+    ret.append(struct.pack("<HI", 0, data_offset))
+
    # Write data.
    for id in ids:
      ret.append(resources[id])

--- a/tools/grit/grit/format/data_pack_unittest.py
+++ b/tools/grit/grit/format/data_pack_unittest.py
@@ -15,10 +15,14 @@ from grit.format import data_pack

 class FormatDataPackUnittest(unittest.TestCase):
  def testWriteDataPack(self):
-    expected = ('\x02\x00\x00\x00\x04\x00\x00\x00\x01\x000\x00\x00'
-        '\x00\x00\x00\x00\x00\x04\x000\x00\x00\x00\x0c\x00\x00\x00'
-        '\x06\x00<\x00\x00\x00\x0c\x00\x00\x00\n\x00H\x00'
-        '\x00\x00\x00\x00\x00\x00this is id 4this is id 6')
+    expected = (
+        '\x03\x00\x00\x00\x04\x00\x00\x00'  # header (version, no. entries)
+        '\x01\x00\x26\x00\x00\x00'          # index entry 1
+        '\x04\x00\x26\x00\x00\x00'          # index entry 4
+        '\x06\x00\x32\x00\x00\x00'          # index entry 6
+        '\x0a\x00\x3e\x00\x00\x00'          # index entry 10
+        '\x00\x00\x3e\x00\x00\x00'          # extra entry for the size of last
+        'this is id 4this is id 6')         # data
    input = { 1: "", 4: "this is id 4", 6: "this is id 6", 10: "" }
    output = data_pack.DataPack.WriteDataPack(input)
    self.failUnless(output == expected)

--- a/ui/base/resource/data_pack.cc
+++ b/ui/base/resource/data_pack.cc
@@ -17,7 +17,7 @@

 namespace {

-static const uint32 kFileFormatVersion = 2;
+static const uint32 kFileFormatVersion = 3;
 // Length of file header: version and entry count.
 static const size_t kHeaderLength = 2 * sizeof(uint32);

@@ -25,7 +25,6 @@ static const size_t kHeaderLength = 2 * sizeof(uint32);
 struct DataPackEntry {
  uint16 resource_id;
  uint32 file_offset;
-  uint32 length;

  static int CompareById(const void* void_key, const void* void_entry) {
    uint16 key = *reinterpret_cast<const uint16*>(void_key);
@@ -42,7 +41,7 @@ struct DataPackEntry {
 };
 #pragma pack(pop)

-COMPILE_ASSERT(sizeof(DataPackEntry) == 10, size_of_header_must_be_ten);
+COMPILE_ASSERT(sizeof(DataPackEntry) == 6, size_of_entry_must_be_six);

 // We're crashing when trying to load a pak file on Windows.  Add some error
 // codes for logging.
@@ -101,11 +100,12 @@ bool DataPack::Load(const FilePath& path) {
    mmap_.reset();
    return false;
  }
-  // 2) Verify the entries are within the appropriate bounds.
-  for (size_t i = 0; i < resource_count_; ++i) {
+  // 2) Verify the entries are within the appropriate bounds. There's an extra
+  // entry after the last item which gives us the length of the last item.
+  for (size_t i = 0; i < resource_count_ + 1; ++i) {
    const DataPackEntry* entry = reinterpret_cast<const DataPackEntry*>(
        mmap_->data() + kHeaderLength + (i * sizeof(DataPackEntry)));
-    if (entry->file_offset + entry->length > mmap_->length()) {
+    if (entry->file_offset > mmap_->length()) {
      LOG(ERROR) << "Entry #" << i << " in data pack points off end of file. "
                 << "Was the file corrupted?";
      UMA_HISTOGRAM_ENUMERATION("DataPack.Load", ENTRY_NOT_FOUND,
@@ -131,14 +131,17 @@ bool DataPack::GetStringPiece(uint16 resource_id,
  #error DataPack assumes little endian
 #endif

-  DataPackEntry* target = reinterpret_cast<DataPackEntry*>(
+  const DataPackEntry* target = reinterpret_cast<const DataPackEntry*>(
      bsearch(&resource_id, mmap_->data() + kHeaderLength, resource_count_,
              sizeof(DataPackEntry), DataPackEntry::CompareById));
  if (!target) {
    return false;
  }

-  data->set(mmap_->data() + target->file_offset, target->length);
+  const DataPackEntry* next_entry = target + 1;
+  size_t length = next_entry->file_offset - target->file_offset;
+
+  data->set(mmap_->data() + target->file_offset, length);
  return true;
 }

@@ -173,8 +176,9 @@ bool DataPack::WritePack(const FilePath& path,
    return false;
  }

-  // Each entry is 1 uint16 + 2 uint32s.
-  uint32 index_length = entry_count * sizeof(DataPackEntry);
+  // Each entry is a uint16 + a uint32. We have an extra entry after the last
+  // item so we can compute the size of the list item.
+  uint32 index_length = (entry_count + 1) * sizeof(DataPackEntry);
  uint32 data_offset = kHeaderLength + index_length;
  for (std::map<uint16, base::StringPiece>::const_iterator it =
           resources.begin();
@@ -192,14 +196,22 @@ bool DataPack::WritePack(const FilePath& path,
      return false;
    }

-    uint32 len = it->second.length();
-    if (fwrite(&len, sizeof(len), 1, file) != 1) {
-      LOG(ERROR) << "Failed to write length for " << resource_id;
-      file_util::CloseFile(file);
-      return false;
-    }
+    data_offset += it->second.length();
+  }

-    data_offset += len;
+  // We place an extra entry after the last item that allows us to read the
+  // size of the last item.
+  uint16 resource_id = 0;
+  if (fwrite(&resource_id, sizeof(resource_id), 1, file) != 1) {
+    LOG(ERROR) << "Failed to write extra resource id.";
+    file_util::CloseFile(file);
+    return false;
+  }
+
+  if (fwrite(&data_offset, sizeof(data_offset), 1, file) != 1) {
+    LOG(ERROR) << "Failed to write extra offset.";
+    file_util::CloseFile(file);
+    return false;
  }

  for (std::map<uint16, base::StringPiece>::const_iterator it =

--- a/ui/base/test/data/data_pack_unittest/sample.pak
+++ b/ui/base/test/data/data_pack_unittest/sample.pak