Commit 18d614e2 authored by Andrew Grieve's avatar Andrew Grieve Committed by Commit Bot

Add size breakdown to //tools/grit/pak_util.py print

Example output for: pak_util.py print locales/zh-CN.pak
version: 5
encoding: utf-8
num_resources: 3120
num_aliases: 413
size_header: 12
size_id_table: 16248
size_alias_table: 1652
size_data: 99490
total_size: 117402
Entry(id=423, canonical_id=423, size=271, sha1=bd30bee710): ...
...

Bug: 749521
Change-Id: Id0c3e788c83d9e97d67b7eb6a4090ca68cb97431
Reviewed-on: https://chromium-review.googlesource.com/794340
Commit-Queue: agrieve <agrieve@chromium.org>
Reviewed-by: default avatarRobert Flack <flackr@chromium.org>
Cr-Commit-Position: refs/heads/master@{#520138}
parent 603dfcf4
......@@ -33,8 +33,42 @@ class CorruptDataPack(Exception):
pass
DataPackContents = collections.namedtuple(
'DataPackContents', 'resources encoding')
class DataPackSizes(object):
def __init__(self, header, id_table, alias_table, data):
self.header = header
self.id_table = id_table
self.alias_table = alias_table
self.data = data
@property
def total(self):
return sum(v for v in self.__dict__.itervalues())
def __iter__(self):
yield ('header', self.header)
yield ('id_table', self.id_table)
yield ('alias_table', self.alias_table)
yield ('data', self.data)
def __eq__(self, other):
return self.__dict__ == other.__dict__
def __repr__(self):
return self.__class__.__name__ + repr(self.__dict__)
class DataPackContents(object):
def __init__(self, resources, encoding, version, aliases, sizes):
# Map of resource_id -> str.
self.resources = resources
# Encoding (int).
self.encoding = encoding
# Version (int).
self.version = version
# Map of resource_id->canonical_resource_id
self.aliases = aliases
# DataPackSizes instance.
self.sizes = sizes
def Format(root, lang='en', output_dir='.'):
......@@ -59,45 +93,51 @@ def ReadDataPack(input_file):
def ReadDataPackFromString(data):
"""Reads a data pack file and returns a dictionary."""
original_data = data
# Read the header.
version = struct.unpack('<I', data[:4])[0]
if version == 4:
resource_count, encoding = struct.unpack('<IB', data[4:9])
alias_count = 0
data = data[9:]
header_size = 9
elif version == 5:
encoding, resource_count, alias_count = struct.unpack('<BxxxHH', data[4:12])
data = data[12:]
header_size = 12
else:
raise WrongFileVersion('Found version: ' + str(version))
resources = {}
kIndexEntrySize = 2 + 4 # Each entry is a uint16 and a uint32.
def entry_at_index(idx):
offset = idx * kIndexEntrySize
offset = header_size + idx * kIndexEntrySize
return struct.unpack('<HI', data[offset:offset + kIndexEntrySize])
prev_resource_id, prev_offset = entry_at_index(0)
for i in xrange(1, resource_count + 1):
resource_id, offset = entry_at_index(i)
resources[prev_resource_id] = original_data[prev_offset:offset]
resources[prev_resource_id] = data[prev_offset:offset]
prev_resource_id, prev_offset = resource_id, offset
id_table_size = (resource_count + 1) * kIndexEntrySize
# Read the alias table.
alias_data = data[(resource_count + 1) * kIndexEntrySize:]
kAliasEntrySize = 2 + 2 # uint16, uint16
def alias_at_index(idx):
offset = idx * kAliasEntrySize
return struct.unpack('<HH', alias_data[offset:offset + kAliasEntrySize])
offset = header_size + id_table_size + idx * kAliasEntrySize
return struct.unpack('<HH', data[offset:offset + kAliasEntrySize])
aliases = {}
for i in xrange(alias_count):
resource_id, index = alias_at_index(i)
aliased_id = entry_at_index(index)[0]
aliases[resource_id] = aliased_id
resources[resource_id] = resources[aliased_id]
return DataPackContents(resources, encoding)
alias_table_size = kAliasEntrySize * alias_count
sizes = DataPackSizes(
header_size, id_table_size, alias_table_size,
len(data) - header_size - id_table_size - alias_table_size)
assert sizes.total == len(data), 'original={} computed={}'.format(
len(data), sizes.total)
return DataPackContents(resources, encoding, version, aliases, sizes)
def WriteDataPackToString(resources, encoding):
......@@ -181,8 +221,9 @@ def RePack(output_file, input_files, whitelist_file=None,
if whitelist_file:
whitelist = util.ReadFile(whitelist_file, util.RAW_TEXT).strip().split('\n')
whitelist = set(map(int, whitelist))
inputs = [(p.resources, p.encoding) for p in input_data_packs]
resources, encoding = RePackFromDataPackStrings(
input_data_packs, whitelist, suppress_removed_key_output)
inputs, whitelist, suppress_removed_key_output)
WriteDataPack(resources, output_file, encoding)
with open(output_file + '.info', 'w') as output_info_file:
for filename in input_info_files:
......@@ -192,17 +233,16 @@ def RePack(output_file, input_files, whitelist_file=None,
def RePackFromDataPackStrings(inputs, whitelist,
suppress_removed_key_output=False):
"""Returns a data pack string that combines the resources from inputs.
"""Combines all inputs into one.
Args:
inputs: a list of data pack strings that need to be combined.
inputs: a list of (resources_by_id, encoding) tuples to be combined.
whitelist: a list of resource IDs that should be kept in the output string
or None to include all resources.
suppress_removed_key_output: Do not print removed keys.
Returns:
DataPackContents: a tuple containing the new combined data pack and its
encoding.
Returns (resources_by_id, encoding).
Raises:
KeyError: if there are duplicate keys or resource encoding is
......@@ -210,36 +250,36 @@ def RePackFromDataPackStrings(inputs, whitelist,
"""
resources = {}
encoding = None
for content in inputs:
for input_resources, input_encoding in inputs:
# Make sure we have no dups.
duplicate_keys = set(content.resources.keys()) & set(resources.keys())
duplicate_keys = set(input_resources.keys()) & set(resources.keys())
if duplicate_keys:
raise exceptions.KeyError('Duplicate keys: ' + str(list(duplicate_keys)))
# Make sure encoding is consistent.
if encoding in (None, BINARY):
encoding = content.encoding
elif content.encoding not in (BINARY, encoding):
encoding = input_encoding
elif input_encoding not in (BINARY, encoding):
raise exceptions.KeyError('Inconsistent encodings: ' + str(encoding) +
' vs ' + str(content.encoding))
' vs ' + str(input_encoding))
if whitelist:
whitelisted_resources = dict([(key, content.resources[key])
for key in content.resources.keys()
whitelisted_resources = dict([(key, input_resources[key])
for key in input_resources.keys()
if key in whitelist])
resources.update(whitelisted_resources)
removed_keys = [key for key in content.resources.keys()
removed_keys = [key for key in input_resources.keys()
if key not in whitelist]
if not suppress_removed_key_output:
for key in removed_keys:
print 'RePackFromDataPackStrings Removed Key:', key
else:
resources.update(content.resources)
resources.update(input_resources)
# Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16
if encoding is None:
encoding = BINARY
return DataPackContents(resources, encoding)
return resources, encoding
def main():
......
......@@ -28,16 +28,15 @@ class FormatDataPackUnittest(unittest.TestCase):
'\x0a\x00\x3f\x00\x00\x00' # index entry 10
'\x00\x00\x3f\x00\x00\x00' # extra entry for the size of last
'this is id 4this is id 6') # data
expected_resources = {
1: '',
4: 'this is id 4',
6: 'this is id 6',
10: '',
}
expected_data_pack = data_pack.DataPackContents(
expected_resources, data_pack.UTF8)
{
1: '',
4: 'this is id 4',
6: 'this is id 6',
10: '',
}, data_pack.UTF8, 4, {}, data_pack.DataPackSizes(9, 30, 0, 24))
loaded = data_pack.ReadDataPackFromString(expected_data)
self.assertEquals(loaded, expected_data_pack)
self.assertDictEqual(expected_data_pack.__dict__, loaded.__dict__)
def testReadWriteDataPackV5(self):
expected_data = (
......@@ -51,19 +50,24 @@ class FormatDataPackUnittest(unittest.TestCase):
'\x00\x00\x40\x00\x00\x00' # extra entry for the size of last
'\x0a\x00\x01\x00' # alias table
'this is id 4this is id 6') # data
expected_resources = {
input_resources = {
1: '',
4: 'this is id 4',
6: 'this is id 6',
10: 'this is id 4',
}
data = data_pack.WriteDataPackToString(expected_resources, data_pack.UTF8)
data = data_pack.WriteDataPackToString(input_resources, data_pack.UTF8)
self.assertEquals(data, expected_data)
expected_data_pack = data_pack.DataPackContents(
expected_resources, data_pack.UTF8)
{
1: '',
4: input_resources[4],
6: input_resources[6],
10: input_resources[4],
}, data_pack.UTF8, 5, {10: 4}, data_pack.DataPackSizes(12, 24, 4, 24))
loaded = data_pack.ReadDataPackFromString(expected_data)
self.assertEquals(loaded, expected_data_pack)
self.assertDictEqual(expected_data_pack.__dict__, loaded.__dict__)
def testRePackUnittest(self):
expected_with_whitelist = {
......@@ -79,8 +83,7 @@ class FormatDataPackUnittest(unittest.TestCase):
{40: 'Never', 50: 'gonna run around and', 60: 'desert you'},
{65: 'Close', 70: 'Awww, snap!'}]
whitelist = [1, 10, 20, 30, 40, 50, 60]
inputs = [data_pack.DataPackContents(input, data_pack.UTF8) for input
in inputs]
inputs = [(i, data_pack.UTF8) for i in inputs]
# RePack using whitelist
output, _ = data_pack.RePackFromDataPackStrings(
......
......@@ -33,7 +33,7 @@ def _ExtractMain(args):
def _PrintMain(args):
pak = data_pack.ReadDataPack(args.pak_file)
id_map = {id(v): k for k, v in sorted(pak.resources.items(), reverse=True)}
output = args.output
encoding = 'binary'
if pak.encoding == 1:
encoding = 'utf-8'
......@@ -41,14 +41,21 @@ def _PrintMain(args):
encoding = 'utf-16'
else:
encoding = '?' + str(pak.encoding)
print 'Encoding:', encoding
try_decode = encoding.startswith('utf')
output.write('version: {}\n'.format(pak.version))
output.write('encoding: {}\n'.format(encoding))
output.write('num_resources: {}\n'.format(len(pak.resources)))
output.write('num_aliases: {}\n'.format(len(pak.aliases)))
breakdown = ', '.join('{}: {}'.format(*x) for x in pak.sizes)
output.write('total_size: {} ({})\n'.format(pak.sizes.total, breakdown))
try_decode = args.decode and encoding.startswith('utf')
# Print IDs in ascending order, since that's the order in which they appear in
# the file (order is lost by Python dict).
for resource_id in sorted(pak.resources):
data = pak.resources[resource_id]
desc = '<binary>'
canonical_id = pak.aliases.get(resource_id, resource_id)
desc = '<data>'
if try_decode:
try:
desc = unicode(data, encoding)
......@@ -58,19 +65,14 @@ def _PrintMain(args):
except UnicodeDecodeError:
pass
sha1 = hashlib.sha1(data).hexdigest()[:10]
canonical_id = id_map[id(data)]
if resource_id == canonical_id:
line = u'Entry(id={}, len={}, sha1={}): {}'.format(
resource_id, len(data), sha1, desc)
else:
line = u'Entry(id={}, alias_of={}): {}'.format(
resource_id, canonical_id, desc)
print line.encode('utf-8')
output.write(
u'Entry(id={}, canonical_id={}, size={}, sha1={}): {}\n'.format(
resource_id, canonical_id, len(data), sha1, desc).encode('utf-8'))
def _ListMain(args):
resources, _ = data_pack.ReadDataPack(args.pak_file)
for resource_id in sorted(resources.keys()):
pak = data_pack.ReadDataPack(args.pak_file)
for resource_id in sorted(pak.resources):
args.output.write('%d\n' % resource_id)
......@@ -99,8 +101,13 @@ def main():
sub_parser = sub_parsers.add_parser('print',
help='Prints all pak IDs and contents. Useful for diffing.')
sub_parser.add_argument('pak_file')
sub_parser.add_argument('--output', type=argparse.FileType('w'),
default=sys.stdout,
help='The resource list path to write (default stdout)')
sub_parser.add_argument('--no-decode', dest='decode', action='store_false',
default=True, help='Do not print entry data.')
sub_parser.set_defaults(func=_PrintMain)
sub_parser = sub_parsers.add_parser('list-id',
help='Outputs all resource IDs to a file.')
sub_parser.add_argument('pak_file')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment