Commit 719d8624 authored by lizeb's avatar lizeb Committed by Commit bot

Refactor the symbol parsing, and move to NDK's nm.

First part of the orderfile generating script refactor. The aim here is to make
sure that all scripts parse the object files the same way, using the same
tool. This commit extracts the parsing logic from patch_orderfile.py. It uses nm
from the Android NDK.

BUG=452879

Review URL: https://codereview.chromium.org/884113002

Cr-Commit-Position: refs/heads/master@{#313528}
parent 20fc9d44
......@@ -26,17 +26,15 @@ The general pipeline is:
import collections
import logging
import subprocess
import sys
import symbol_extractor
# Prefixes for the symbols. We strip them from the incoming symbols, and add
# them back in the output file.
_PREFIXES = ('.text.startup.', '.text.hot.', '.text.unlikely.', '.text.')
SymbolInfo = collections.namedtuple('SymbolInfo', ['offset', 'size', 'name'])
def _RemoveClone(name):
"""Return name up to the ".clone." marker."""
clone_index = name.find('.clone.')
......@@ -45,61 +43,41 @@ def _RemoveClone(name):
return name
def _GetSymbolInfosFromStream(nm_lines):
"""Parses the output of nm, and get all the symbols from a binary.
def _GroupSymbolInfos(symbol_infos):
"""Group the symbol infos by name and offset.
Args:
nm_lines: An iterable of lines
symbol_infos: an iterable of SymbolInfo
Returns:
The same output as GetSymbolsFromBinary.
The same output as _GroupSymbolInfosFromBinary.
"""
# TODO(lizeb): Consider switching to objdump to simplify parsing.
symbol_infos = []
for line in nm_lines:
# We are interested in two types of lines:
# This:
# 00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev
# offset size <symbol_type> symbol_name
# And that:
# 0070ee8c T WebRtcSpl_ComplexBitReverse
# In the second case we don't have a size, so use -1 as a sentinel
parts = line.split()
if len(parts) == 4:
symbol_infos.append(SymbolInfo(
offset=int(parts[0], 16), size=int(parts[1], 16), name=parts[3]))
elif len(parts) == 3:
symbol_infos.append(SymbolInfo(
offset=int(parts[0], 16), size=-1, name=parts[2]))
# Map the addresses to symbols.
offset_to_symbol_infos = collections.defaultdict(list)
name_to_symbol_infos = collections.defaultdict(list)
for symbol in symbol_infos:
symbol = SymbolInfo(symbol[0], symbol[1], _RemoveClone(symbol[2]))
symbol = symbol_extractor.SymbolInfo(name=_RemoveClone(symbol.name),
offset=symbol.offset,
size=symbol.size)
offset_to_symbol_infos[symbol.offset].append(symbol)
name_to_symbol_infos[symbol.name].append(symbol)
return (offset_to_symbol_infos, name_to_symbol_infos)
return (dict(offset_to_symbol_infos), dict(name_to_symbol_infos))
def _GetSymbolInfosFromBinary(binary_filename):
"""Runs nm to get all the symbols from a binary.
def _GroupSymbolInfosFromBinary(binary_filename):
"""Group all the symbols from a binary by name and offset.
Args:
binary_filename: path to the binary.
Returns:
A tuple of collection.defaultdict:
A tuple of dict:
(offset_to_symbol_infos, name_to_symbol_infos):
- offset_to_symbol_infos: {offset: [symbol_info1, ...]}
- name_to_symbol_infos: {name: [symbol_info1, ...]}
"""
command = 'nm -S -n %s | egrep "( t )|( W )|( T )"' % binary_filename
p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
try:
result = _GetSymbolInfosFromStream(p.stdout)
return result
finally:
p.wait()
symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename)
return _GroupSymbolInfos(symbol_infos)
def _StripPrefix(line):
......@@ -230,7 +208,7 @@ def main(argv):
return 1
orderfile_filename = argv[1]
binary_filename = argv[2]
(offset_to_symbol_infos, name_to_symbol_infos) = _GetSymbolInfosFromBinary(
(offset_to_symbol_infos, name_to_symbol_infos) = _GroupSymbolInfosFromBinary(
binary_filename)
profiled_symbols = _GetSymbolsFromOrderfile(orderfile_filename)
expanded_symbols = _ExpandSymbols(
......
......@@ -3,9 +3,11 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import patch_orderfile
import unittest
import patch_orderfile
import symbol_extractor
class TestPatchOrderFile(unittest.TestCase):
def testRemoveClone(self):
......@@ -15,78 +17,39 @@ class TestPatchOrderFile(unittest.TestCase):
self.assertEquals(
"this.does.contain", patch_orderfile._RemoveClone(with_clone))
def testGetSymbolInfosFromStreamWithSize(self):
lines = [
"00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev"]
test_name = "_ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev"
test_offset = 0x210d59
def testAliasClonedSymbols(self):
symbol_infos = [
symbol_extractor.SymbolInfo(name='aSymbol', offset=0x42, size=0x12),
symbol_extractor.SymbolInfo(name='aSymbol.clone.', offset=8, size=1)]
(offset_to_symbol_infos, name_to_symbol_infos) = \
patch_orderfile._GetSymbolInfosFromStream(lines)
self.assertEquals(len(offset_to_symbol_infos), 1)
patch_orderfile._GroupSymbolInfos(symbol_infos)
self.assertEquals(len(offset_to_symbol_infos), 2)
for i in range(2):
s = symbol_infos[i]
matching = offset_to_symbol_infos[s.offset][0]
self.assertEquals(matching.offset, s.offset)
self.assertEquals(matching.size, s.size)
self.assertEquals(len(name_to_symbol_infos), 1)
self.assertIn(test_name, name_to_symbol_infos)
self.assertIn(test_offset, offset_to_symbol_infos)
self.assertEquals(len(name_to_symbol_infos['aSymbol']), 2)
self.assertEquals(len(name_to_symbol_infos[test_name]), 1)
s = name_to_symbol_infos[test_name][0]
self.assertEquals(test_offset, s.offset)
self.assertEquals(2, s.size)
self.assertEquals(test_name, s.name)
self.assertEquals(len(offset_to_symbol_infos[test_offset]), 1)
s = offset_to_symbol_infos[test_offset][0]
self.assertEquals(test_offset, s.offset)
self.assertEquals(2, s.size)
self.assertEquals(test_name, s.name)
def testGetSymbolInfosFromStreamWithoutSize(self):
lines = [
"0070ee8c T WebRtcSpl_ComplexBitReverse"]
test_name = "WebRtcSpl_ComplexBitReverse"
test_offset = 0x70ee8c
(offset_to_symbol_infos, name_to_symbol_infos) = \
patch_orderfile._GetSymbolInfosFromStream(lines)
def testGroupSymbolsByOffset(self):
symbol_infos = (
symbol_extractor.SymbolInfo(name='aSymbol', offset=0x42, size=0x12),
symbol_extractor.SymbolInfo(name='anotherSymbol', offset=0x42, size=1))
(offset_to_symbol_infos, _) = \
patch_orderfile._GroupSymbolInfos(symbol_infos)
self.assertEquals(len(offset_to_symbol_infos), 1)
self.assertEquals(len(name_to_symbol_infos), 1)
self.assertIn(test_name, name_to_symbol_infos)
self.assertIn(test_offset, offset_to_symbol_infos)
self.assertEquals(len(name_to_symbol_infos[test_name]), 1)
s = name_to_symbol_infos[test_name][0]
self.assertEquals(test_offset, s.offset)
self.assertEquals(-1, s.size)
self.assertEquals(test_name, s.name)
self.assertEquals(len(offset_to_symbol_infos[test_offset]), 1)
s = offset_to_symbol_infos[test_offset][0]
self.assertEquals(test_offset, s.offset)
self.assertEquals(-1, s.size)
self.assertEquals(test_name, s.name)
def testGetSymbolsFromStream(self):
lines = [".text.startup.",
".text.with.a.prefix",
"",
"_ZN2v88internal33HEnvironmentLivenessAnalysisPhase3RunEv",
".text",
".text.*"]
names = patch_orderfile._GetSymbolsFromStream(lines)
self.assertEquals(len(names), 2)
self.assertEquals(
names[0], "with.a.prefix")
self.assertEquals(
names[1], "_ZN2v88internal33HEnvironmentLivenessAnalysisPhase3RunEv")
self.assertEquals(tuple(offset_to_symbol_infos[0x42]), symbol_infos)
def testExpandSymbols(self):
symbol_name = "dummySymbol"
symbol_name2 = "other"
profiled_symbol_names = [symbol_name, "symbolThatShouldntMatch"]
name_to_symbol_infos = {symbol_name: [
patch_orderfile.SymbolInfo(0x42, 0x12, symbol_name)]}
symbol_extractor.SymbolInfo(symbol_name, 0x42, 0x12)]}
offset_to_symbol_infos = {
0x42: [patch_orderfile.SymbolInfo(0x42, 0x12, symbol_name),
patch_orderfile.SymbolInfo(0x42, 0x12, symbol_name2)]}
0x42: [symbol_extractor.SymbolInfo(symbol_name, 0x42, 0x12),
symbol_extractor.SymbolInfo(symbol_name2, 0x42, 0x12)]}
symbol_names = patch_orderfile._ExpandSymbols(
profiled_symbol_names, name_to_symbol_infos, offset_to_symbol_infos)
self.assertEquals(len(symbol_names), 3)
......
#!/usr/bin/python
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Utilities to get and manipulate symbols from a binary."""
import collections
import os
import re
import subprocess
import sys
sys.path.insert(
0, os.path.join(sys.path[0], '..', '..', 'third_party', 'android_platform',
'development', 'scripts'))
import symbol
# TODO(lizeb): Change symbol.ARCH to the proper value when "arm" is no longer
# the only possible value.
_NM_BINARY = symbol.ToolPath('nm')
SymbolInfo = collections.namedtuple('SymbolInfo', ('name', 'offset', 'size'))
def FromNmLine(line):
"""Create a SymbolInfo by parsing a properly formatted nm output line.
Args:
line: line from nm
Returns:
An instance of SymbolInfo if the line represents a symbol, None otherwise.
"""
# We are interested in two types of lines:
# This:
# 00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev
# offset size <symbol_type> symbol_name
# And that:
# 0070ee8c T WebRtcSpl_ComplexBitReverse
# In the second case we don't have a size, so use -1 as a sentinel
if not re.search(' (t|W|T) ', line):
return None
parts = line.split()
if len(parts) == 4:
return SymbolInfo(
offset=int(parts[0], 16), size=int(parts[1], 16), name=parts[3])
elif len(parts) == 3:
return SymbolInfo(
offset=int(parts[0], 16), size=-1, name=parts[2])
else:
return None
def SymbolInfosFromStream(nm_lines):
"""Parses the output of nm, and get all the symbols from a binary.
Args:
nm_lines: An iterable of lines
Returns:
A list of SymbolInfo.
"""
# TODO(lizeb): Consider switching to objdump to simplify parsing.
symbol_infos = []
for line in nm_lines:
symbol_info = FromNmLine(line)
if symbol_info is not None:
symbol_infos.append(symbol_info)
return symbol_infos
def SymbolInfosFromBinary(binary_filename):
"""Runs nm to get all the symbols from a binary.
Args:
binary_filename: path to the binary.
Returns:
A list of SymbolInfo from the binary.
"""
command = (_NM_BINARY, '-S', '-n', binary_filename)
p = subprocess.Popen(command, shell=False, stdout=subprocess.PIPE)
try:
result = SymbolInfosFromStream(p.stdout)
return result
finally:
p.wait()
def GroupSymbolInfosByOffset(symbol_infos):
"""Create a dict {offset: [symbol_info1, ...], ...}.
As several symbols can be at the same offset, this is a 1-to-many
relationship.
Args:
symbol_infos: iterable of SymbolInfo instances
Returns:
a dict {offset: [symbol_info1, ...], ...}
"""
offset_to_symbol_infos = collections.defaultdict(list)
for symbol_info in symbol_infos:
offset_to_symbol_infos[symbol_info.offset].append(symbol_info)
return dict(offset_to_symbol_infos)
def CreateNameToSymbolInfo(symbol_infos):
"""Create a dict {name: symbol_info, ...}.
Args:
symbol_infos: iterable of SymbolInfo instances
Returns:
a dict {name: symbol_info, ...}
"""
return {symbol_info.name: symbol_info for symbol_info in symbol_infos}
#!/usr/bin/python
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import symbol_extractor
import unittest
class TestSymbolInfo(unittest.TestCase):
def testIgnoresBlankLine(self):
symbol_info = symbol_extractor.FromNmLine('')
self.assertIsNone(symbol_info)
def testIgnoresMalformedLine(self):
line = ('00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev '
'too many fields')
symbol_info = symbol_extractor.FromNmLine(line)
self.assertIsNone(symbol_info)
# Wrong marker
line = '00210d59 00000002 A _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev'
symbol_info = symbol_extractor.FromNmLine(line)
self.assertIsNone(symbol_info)
# Too short
line = '00210d59 t'
symbol_info = symbol_extractor.FromNmLine(line)
self.assertIsNone(symbol_info)
def testSymbolInfoWithSize(self):
line = '00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev'
test_name = '_ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev'
test_offset = 0x210d59
test_size = 2
symbol_info = symbol_extractor.FromNmLine(line)
self.assertIsNotNone(symbol_info)
self.assertEquals(test_offset, symbol_info.offset)
self.assertEquals(test_size, symbol_info.size)
self.assertEquals(test_name, symbol_info.name)
def testSymbolInfoWithoutSize(self):
line = '0070ee8c T WebRtcSpl_ComplexBitReverse'
test_name = 'WebRtcSpl_ComplexBitReverse'
test_offset = 0x70ee8c
symbol_info = symbol_extractor.FromNmLine(line)
self.assertIsNotNone(symbol_info)
self.assertEquals(test_offset, symbol_info.offset)
self.assertEquals(-1, symbol_info.size)
self.assertEquals(test_name, symbol_info.name)
class TestSymbolInfosFromStream(unittest.TestCase):
def testSymbolInfosFromStream(self):
lines = ['Garbage',
'',
('00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev '
'too many fields'),
'00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev',
'00210d59 00000002 A _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev',
'0070ee8c T WebRtcSpl_ComplexBitReverse']
symbol_infos = symbol_extractor.SymbolInfosFromStream(lines)
self.assertEquals(len(symbol_infos), 2)
first = symbol_extractor.SymbolInfo(
'_ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev', 0x00210d59, 2)
self.assertEquals(first, symbol_infos[0])
second = symbol_extractor.SymbolInfo(
'WebRtcSpl_ComplexBitReverse', 0x0070ee8c, -1)
self.assertEquals(second, symbol_infos[1])
class TestSymbolInfoMappings(unittest.TestCase):
def setUp(self):
self.symbol_infos = [
symbol_extractor.SymbolInfo('firstNameAtOffset', 0x42, 42),
symbol_extractor.SymbolInfo('secondNameAtOffset', 0x42, 42),
symbol_extractor.SymbolInfo('thirdSymbol', 0x64, 20)]
def testGroupSymbolInfosByOffset(self):
offset_to_symbol_info = symbol_extractor.GroupSymbolInfosByOffset(
self.symbol_infos)
self.assertEquals(len(offset_to_symbol_info), 2)
self.assertIn(0x42, offset_to_symbol_info)
self.assertEquals(offset_to_symbol_info[0x42][0], self.symbol_infos[0])
self.assertEquals(offset_to_symbol_info[0x42][1], self.symbol_infos[1])
self.assertIn(0x64, offset_to_symbol_info)
self.assertEquals(offset_to_symbol_info[0x64][0], self.symbol_infos[2])
def testCreateNameToSymbolInfos(self):
name_to_symbol_info = symbol_extractor.CreateNameToSymbolInfo(
self.symbol_infos)
self.assertEquals(len(name_to_symbol_info), 3)
for i in range(3):
name = self.symbol_infos[i].name
self.assertIn(name, name_to_symbol_info)
self.assertEquals(self.symbol_infos[i], name_to_symbol_info[name])
if __name__ == '__main__':
unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment