Commit 5078bb0c authored by tony@chromium.org's avatar tony@chromium.org

Remove the length field from the index entries in data pack files.

After the last index entry, we put an extra entry (id of 0, offset
pointing to where the next entry would start) so we can compute
the size of the last index entry.

This saves 4 bytes per item in data pack files, which is about 10k per
locale pak file.  When compressed, this saves about 237k.

BUG=76285

Review URL: http://codereview.chromium.org/7604012

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@96415 0039d316-1c4b-4281-b951-d872f2087c98
parent 81ac4a3f
...@@ -29,7 +29,7 @@ namespace { ...@@ -29,7 +29,7 @@ namespace {
// Version number of the current theme pack. We just throw out and rebuild // Version number of the current theme pack. We just throw out and rebuild
// theme packs that aren't int-equal to this. // theme packs that aren't int-equal to this.
const int kThemePackVersion = 16; const int kThemePackVersion = 17;
// IDs that are in the DataPack won't clash with the positive integer // IDs that are in the DataPack won't clash with the positive integer
// uint16. kHeaderID should always have the maximum value because we want the // uint16. kHeaderID should always have the maximum value because we want the
......
...@@ -393,7 +393,7 @@ TEST_F(BrowserThemePackTest, TestNonExistantImages) { ...@@ -393,7 +393,7 @@ TEST_F(BrowserThemePackTest, TestNonExistantImages) {
TEST_F(BrowserThemePackTest, CanBuildAndReadPack) { TEST_F(BrowserThemePackTest, CanBuildAndReadPack) {
ScopedTempDir dir; ScopedTempDir dir;
ASSERT_TRUE(dir.CreateUniqueTempDir()); ASSERT_TRUE(dir.CreateUniqueTempDir());
FilePath file = dir.path().Append(FILE_PATH_LITERAL("data.pak")); FilePath file = dir.path().AppendASCII("data.pak");
// Part 1: Build the pack from an extension. // Part 1: Build the pack from an extension.
{ {
......
...@@ -9,7 +9,7 @@ See base/pack_file* for details. ...@@ -9,7 +9,7 @@ See base/pack_file* for details.
import struct import struct
FILE_FORMAT_VERSION = 2 FILE_FORMAT_VERSION = 3
HEADER_LENGTH = 2 * 4 # Two uint32s. (file version and number of entries) HEADER_LENGTH = 2 * 4 # Two uint32s. (file version and number of entries)
class WrongFileVersion(Exception): class WrongFileVersion(Exception):
...@@ -26,13 +26,17 @@ def ReadDataPack(input_file): ...@@ -26,13 +26,17 @@ def ReadDataPack(input_file):
raise WrongFileVersion raise WrongFileVersion
resources = {} resources = {}
if num_entries == 0:
return resources
# Read the index and data. # Read the index and data.
data = data[HEADER_LENGTH:] data = data[HEADER_LENGTH:]
kIndexEntrySize = 2 + 2 * 4 # Each entry is 1 uint16 and 2 uint32s. kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32.
for _ in range(num_entries): for _ in range(num_entries):
id, offset, length = struct.unpack("<HII", data[:kIndexEntrySize]) id, offset = struct.unpack("<HI", data[:kIndexEntrySize])
data = data[kIndexEntrySize:] data = data[kIndexEntrySize:]
resources[id] = original_data[offset:offset + length] next_id, next_offset = struct.unpack("<HI", data[:kIndexEntrySize])
resources[id] = original_data[offset:next_offset]
return resources return resources
...@@ -44,15 +48,18 @@ def WriteDataPack(resources, output_file): ...@@ -44,15 +48,18 @@ def WriteDataPack(resources, output_file):
# Write file header. # Write file header.
file.write(struct.pack("<II", FILE_FORMAT_VERSION, len(ids))) file.write(struct.pack("<II", FILE_FORMAT_VERSION, len(ids)))
# Each entry is 1 uint16 and 2 uint32s. # Each entry is a uint16 and a uint32. We have one extra entry for the last
index_length = len(ids) * (2 + 2 * 4) # item.
index_length = (len(ids) + 1) * (2 + 4)
# Write index. # Write index.
data_offset = HEADER_LENGTH + index_length data_offset = HEADER_LENGTH + index_length
for id in ids: for id in ids:
file.write(struct.pack("<HII", id, data_offset, len(resources[id]))) file.write(struct.pack("<HI", id, data_offset))
data_offset += len(resources[id]) data_offset += len(resources[id])
file.write(struct.pack("<HI", 0, data_offset))
# Write data. # Write data.
for id in ids: for id in ids:
file.write(resources[id]) file.write(resources[id])
......
...@@ -15,7 +15,7 @@ from grit.node import message ...@@ -15,7 +15,7 @@ from grit.node import message
from grit.node import misc from grit.node import misc
PACK_FILE_VERSION = 2 PACK_FILE_VERSION = 3
class DataPack(interface.ItemFormatter): class DataPack(interface.ItemFormatter):
...@@ -60,15 +60,18 @@ class DataPack(interface.ItemFormatter): ...@@ -60,15 +60,18 @@ class DataPack(interface.ItemFormatter):
ret.append(struct.pack("<II", PACK_FILE_VERSION, len(ids))) ret.append(struct.pack("<II", PACK_FILE_VERSION, len(ids)))
HEADER_LENGTH = 2 * 4 # Two uint32s. HEADER_LENGTH = 2 * 4 # Two uint32s.
# Each entry is 1 uint16 + 2 uint32s. # Each entry is a uint16 + a uint32s. We have one extra entry for the last
index_length = len(ids) * (2 + 2 * 4) # item.
index_length = (len(ids) + 1) * (2 + 4)
# Write index. # Write index.
data_offset = HEADER_LENGTH + index_length data_offset = HEADER_LENGTH + index_length
for id in ids: for id in ids:
ret.append(struct.pack("<HII", id, data_offset, len(resources[id]))) ret.append(struct.pack("<HI", id, data_offset))
data_offset += len(resources[id]) data_offset += len(resources[id])
ret.append(struct.pack("<HI", 0, data_offset))
# Write data. # Write data.
for id in ids: for id in ids:
ret.append(resources[id]) ret.append(resources[id])
......
...@@ -15,10 +15,14 @@ from grit.format import data_pack ...@@ -15,10 +15,14 @@ from grit.format import data_pack
class FormatDataPackUnittest(unittest.TestCase): class FormatDataPackUnittest(unittest.TestCase):
def testWriteDataPack(self): def testWriteDataPack(self):
expected = ('\x02\x00\x00\x00\x04\x00\x00\x00\x01\x000\x00\x00' expected = (
'\x00\x00\x00\x00\x00\x04\x000\x00\x00\x00\x0c\x00\x00\x00' '\x03\x00\x00\x00\x04\x00\x00\x00' # header (version, no. entries)
'\x06\x00<\x00\x00\x00\x0c\x00\x00\x00\n\x00H\x00' '\x01\x00\x26\x00\x00\x00' # index entry 1
'\x00\x00\x00\x00\x00\x00this is id 4this is id 6') '\x04\x00\x26\x00\x00\x00' # index entry 4
'\x06\x00\x32\x00\x00\x00' # index entry 6
'\x0a\x00\x3e\x00\x00\x00' # index entry 10
'\x00\x00\x3e\x00\x00\x00' # extra entry for the size of last
'this is id 4this is id 6') # data
input = { 1: "", 4: "this is id 4", 6: "this is id 6", 10: "" } input = { 1: "", 4: "this is id 4", 6: "this is id 6", 10: "" }
output = data_pack.DataPack.WriteDataPack(input) output = data_pack.DataPack.WriteDataPack(input)
self.failUnless(output == expected) self.failUnless(output == expected)
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
namespace { namespace {
static const uint32 kFileFormatVersion = 2; static const uint32 kFileFormatVersion = 3;
// Length of file header: version and entry count. // Length of file header: version and entry count.
static const size_t kHeaderLength = 2 * sizeof(uint32); static const size_t kHeaderLength = 2 * sizeof(uint32);
...@@ -25,7 +25,6 @@ static const size_t kHeaderLength = 2 * sizeof(uint32); ...@@ -25,7 +25,6 @@ static const size_t kHeaderLength = 2 * sizeof(uint32);
struct DataPackEntry { struct DataPackEntry {
uint16 resource_id; uint16 resource_id;
uint32 file_offset; uint32 file_offset;
uint32 length;
static int CompareById(const void* void_key, const void* void_entry) { static int CompareById(const void* void_key, const void* void_entry) {
uint16 key = *reinterpret_cast<const uint16*>(void_key); uint16 key = *reinterpret_cast<const uint16*>(void_key);
...@@ -42,7 +41,7 @@ struct DataPackEntry { ...@@ -42,7 +41,7 @@ struct DataPackEntry {
}; };
#pragma pack(pop) #pragma pack(pop)
COMPILE_ASSERT(sizeof(DataPackEntry) == 10, size_of_header_must_be_ten); COMPILE_ASSERT(sizeof(DataPackEntry) == 6, size_of_entry_must_be_six);
// We're crashing when trying to load a pak file on Windows. Add some error // We're crashing when trying to load a pak file on Windows. Add some error
// codes for logging. // codes for logging.
...@@ -101,11 +100,12 @@ bool DataPack::Load(const FilePath& path) { ...@@ -101,11 +100,12 @@ bool DataPack::Load(const FilePath& path) {
mmap_.reset(); mmap_.reset();
return false; return false;
} }
// 2) Verify the entries are within the appropriate bounds. // 2) Verify the entries are within the appropriate bounds. There's an extra
for (size_t i = 0; i < resource_count_; ++i) { // entry after the last item which gives us the length of the last item.
for (size_t i = 0; i < resource_count_ + 1; ++i) {
const DataPackEntry* entry = reinterpret_cast<const DataPackEntry*>( const DataPackEntry* entry = reinterpret_cast<const DataPackEntry*>(
mmap_->data() + kHeaderLength + (i * sizeof(DataPackEntry))); mmap_->data() + kHeaderLength + (i * sizeof(DataPackEntry)));
if (entry->file_offset + entry->length > mmap_->length()) { if (entry->file_offset > mmap_->length()) {
LOG(ERROR) << "Entry #" << i << " in data pack points off end of file. " LOG(ERROR) << "Entry #" << i << " in data pack points off end of file. "
<< "Was the file corrupted?"; << "Was the file corrupted?";
UMA_HISTOGRAM_ENUMERATION("DataPack.Load", ENTRY_NOT_FOUND, UMA_HISTOGRAM_ENUMERATION("DataPack.Load", ENTRY_NOT_FOUND,
...@@ -131,14 +131,17 @@ bool DataPack::GetStringPiece(uint16 resource_id, ...@@ -131,14 +131,17 @@ bool DataPack::GetStringPiece(uint16 resource_id,
#error DataPack assumes little endian #error DataPack assumes little endian
#endif #endif
DataPackEntry* target = reinterpret_cast<DataPackEntry*>( const DataPackEntry* target = reinterpret_cast<const DataPackEntry*>(
bsearch(&resource_id, mmap_->data() + kHeaderLength, resource_count_, bsearch(&resource_id, mmap_->data() + kHeaderLength, resource_count_,
sizeof(DataPackEntry), DataPackEntry::CompareById)); sizeof(DataPackEntry), DataPackEntry::CompareById));
if (!target) { if (!target) {
return false; return false;
} }
data->set(mmap_->data() + target->file_offset, target->length); const DataPackEntry* next_entry = target + 1;
size_t length = next_entry->file_offset - target->file_offset;
data->set(mmap_->data() + target->file_offset, length);
return true; return true;
} }
...@@ -173,8 +176,9 @@ bool DataPack::WritePack(const FilePath& path, ...@@ -173,8 +176,9 @@ bool DataPack::WritePack(const FilePath& path,
return false; return false;
} }
// Each entry is 1 uint16 + 2 uint32s. // Each entry is a uint16 + a uint32. We have an extra entry after the last
uint32 index_length = entry_count * sizeof(DataPackEntry); // item so we can compute the size of the list item.
uint32 index_length = (entry_count + 1) * sizeof(DataPackEntry);
uint32 data_offset = kHeaderLength + index_length; uint32 data_offset = kHeaderLength + index_length;
for (std::map<uint16, base::StringPiece>::const_iterator it = for (std::map<uint16, base::StringPiece>::const_iterator it =
resources.begin(); resources.begin();
...@@ -192,14 +196,22 @@ bool DataPack::WritePack(const FilePath& path, ...@@ -192,14 +196,22 @@ bool DataPack::WritePack(const FilePath& path,
return false; return false;
} }
uint32 len = it->second.length(); data_offset += it->second.length();
if (fwrite(&len, sizeof(len), 1, file) != 1) { }
LOG(ERROR) << "Failed to write length for " << resource_id;
file_util::CloseFile(file);
return false;
}
data_offset += len; // We place an extra entry after the last item that allows us to read the
// size of the last item.
uint16 resource_id = 0;
if (fwrite(&resource_id, sizeof(resource_id), 1, file) != 1) {
LOG(ERROR) << "Failed to write extra resource id.";
file_util::CloseFile(file);
return false;
}
if (fwrite(&data_offset, sizeof(data_offset), 1, file) != 1) {
LOG(ERROR) << "Failed to write extra offset.";
file_util::CloseFile(file);
return false;
} }
for (std::map<uint16, base::StringPiece>::const_iterator it = for (std::map<uint16, base::StringPiece>::const_iterator it =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment