Commit 719d8624 authored by lizeb's avatar lizeb Committed by Commit bot

Refactor the symbol parsing, and move to NDK's nm.

First part of the orderfile generating script refactor. The aim here is to make
sure that all scripts parse the object files the same way, using the same
tool. This commit extracts the parsing logic from patch_orderfile.py. It uses nm
from the Android NDK.

BUG=452879

Review URL: https://codereview.chromium.org/884113002

Cr-Commit-Position: refs/heads/master@{#313528}
parent 20fc9d44
...@@ -26,17 +26,15 @@ The general pipeline is: ...@@ -26,17 +26,15 @@ The general pipeline is:
import collections import collections
import logging import logging
import subprocess
import sys import sys
import symbol_extractor
# Prefixes for the symbols. We strip them from the incoming symbols, and add # Prefixes for the symbols. We strip them from the incoming symbols, and add
# them back in the output file. # them back in the output file.
_PREFIXES = ('.text.startup.', '.text.hot.', '.text.unlikely.', '.text.') _PREFIXES = ('.text.startup.', '.text.hot.', '.text.unlikely.', '.text.')
SymbolInfo = collections.namedtuple('SymbolInfo', ['offset', 'size', 'name'])
def _RemoveClone(name): def _RemoveClone(name):
"""Return name up to the ".clone." marker.""" """Return name up to the ".clone." marker."""
clone_index = name.find('.clone.') clone_index = name.find('.clone.')
...@@ -45,61 +43,41 @@ def _RemoveClone(name): ...@@ -45,61 +43,41 @@ def _RemoveClone(name):
return name return name
def _GetSymbolInfosFromStream(nm_lines): def _GroupSymbolInfos(symbol_infos):
"""Parses the output of nm, and get all the symbols from a binary. """Group the symbol infos by name and offset.
Args: Args:
nm_lines: An iterable of lines symbol_infos: an iterable of SymbolInfo
Returns: Returns:
The same output as GetSymbolsFromBinary. The same output as _GroupSymbolInfosFromBinary.
""" """
# TODO(lizeb): Consider switching to objdump to simplify parsing.
symbol_infos = []
for line in nm_lines:
# We are interested in two types of lines:
# This:
# 00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev
# offset size <symbol_type> symbol_name
# And that:
# 0070ee8c T WebRtcSpl_ComplexBitReverse
# In the second case we don't have a size, so use -1 as a sentinel
parts = line.split()
if len(parts) == 4:
symbol_infos.append(SymbolInfo(
offset=int(parts[0], 16), size=int(parts[1], 16), name=parts[3]))
elif len(parts) == 3:
symbol_infos.append(SymbolInfo(
offset=int(parts[0], 16), size=-1, name=parts[2]))
# Map the addresses to symbols. # Map the addresses to symbols.
offset_to_symbol_infos = collections.defaultdict(list) offset_to_symbol_infos = collections.defaultdict(list)
name_to_symbol_infos = collections.defaultdict(list) name_to_symbol_infos = collections.defaultdict(list)
for symbol in symbol_infos: for symbol in symbol_infos:
symbol = SymbolInfo(symbol[0], symbol[1], _RemoveClone(symbol[2])) symbol = symbol_extractor.SymbolInfo(name=_RemoveClone(symbol.name),
offset=symbol.offset,
size=symbol.size)
offset_to_symbol_infos[symbol.offset].append(symbol) offset_to_symbol_infos[symbol.offset].append(symbol)
name_to_symbol_infos[symbol.name].append(symbol) name_to_symbol_infos[symbol.name].append(symbol)
return (offset_to_symbol_infos, name_to_symbol_infos) return (dict(offset_to_symbol_infos), dict(name_to_symbol_infos))
def _GetSymbolInfosFromBinary(binary_filename): def _GroupSymbolInfosFromBinary(binary_filename):
"""Runs nm to get all the symbols from a binary. """Group all the symbols from a binary by name and offset.
Args: Args:
binary_filename: path to the binary. binary_filename: path to the binary.
Returns: Returns:
A tuple of collection.defaultdict: A tuple of dict:
(offset_to_symbol_infos, name_to_symbol_infos): (offset_to_symbol_infos, name_to_symbol_infos):
- offset_to_symbol_infos: {offset: [symbol_info1, ...]} - offset_to_symbol_infos: {offset: [symbol_info1, ...]}
- name_to_symbol_infos: {name: [symbol_info1, ...]} - name_to_symbol_infos: {name: [symbol_info1, ...]}
""" """
command = 'nm -S -n %s | egrep "( t )|( W )|( T )"' % binary_filename symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename)
p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) return _GroupSymbolInfos(symbol_infos)
try:
result = _GetSymbolInfosFromStream(p.stdout)
return result
finally:
p.wait()
def _StripPrefix(line): def _StripPrefix(line):
...@@ -230,7 +208,7 @@ def main(argv): ...@@ -230,7 +208,7 @@ def main(argv):
return 1 return 1
orderfile_filename = argv[1] orderfile_filename = argv[1]
binary_filename = argv[2] binary_filename = argv[2]
(offset_to_symbol_infos, name_to_symbol_infos) = _GetSymbolInfosFromBinary( (offset_to_symbol_infos, name_to_symbol_infos) = _GroupSymbolInfosFromBinary(
binary_filename) binary_filename)
profiled_symbols = _GetSymbolsFromOrderfile(orderfile_filename) profiled_symbols = _GetSymbolsFromOrderfile(orderfile_filename)
expanded_symbols = _ExpandSymbols( expanded_symbols = _ExpandSymbols(
......
...@@ -3,9 +3,11 @@ ...@@ -3,9 +3,11 @@
# Use of this source code is governed by a BSD-style license that can be # Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. # found in the LICENSE file.
import patch_orderfile
import unittest import unittest
import patch_orderfile
import symbol_extractor
class TestPatchOrderFile(unittest.TestCase): class TestPatchOrderFile(unittest.TestCase):
def testRemoveClone(self): def testRemoveClone(self):
...@@ -15,78 +17,39 @@ class TestPatchOrderFile(unittest.TestCase): ...@@ -15,78 +17,39 @@ class TestPatchOrderFile(unittest.TestCase):
self.assertEquals( self.assertEquals(
"this.does.contain", patch_orderfile._RemoveClone(with_clone)) "this.does.contain", patch_orderfile._RemoveClone(with_clone))
def testGetSymbolInfosFromStreamWithSize(self): def testAliasClonedSymbols(self):
lines = [ symbol_infos = [
"00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev"] symbol_extractor.SymbolInfo(name='aSymbol', offset=0x42, size=0x12),
test_name = "_ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev" symbol_extractor.SymbolInfo(name='aSymbol.clone.', offset=8, size=1)]
test_offset = 0x210d59
(offset_to_symbol_infos, name_to_symbol_infos) = \ (offset_to_symbol_infos, name_to_symbol_infos) = \
patch_orderfile._GetSymbolInfosFromStream(lines) patch_orderfile._GroupSymbolInfos(symbol_infos)
self.assertEquals(len(offset_to_symbol_infos), 1) self.assertEquals(len(offset_to_symbol_infos), 2)
for i in range(2):
s = symbol_infos[i]
matching = offset_to_symbol_infos[s.offset][0]
self.assertEquals(matching.offset, s.offset)
self.assertEquals(matching.size, s.size)
self.assertEquals(len(name_to_symbol_infos), 1) self.assertEquals(len(name_to_symbol_infos), 1)
self.assertIn(test_name, name_to_symbol_infos) self.assertEquals(len(name_to_symbol_infos['aSymbol']), 2)
self.assertIn(test_offset, offset_to_symbol_infos)
self.assertEquals(len(name_to_symbol_infos[test_name]), 1) def testGroupSymbolsByOffset(self):
s = name_to_symbol_infos[test_name][0] symbol_infos = (
self.assertEquals(test_offset, s.offset) symbol_extractor.SymbolInfo(name='aSymbol', offset=0x42, size=0x12),
self.assertEquals(2, s.size) symbol_extractor.SymbolInfo(name='anotherSymbol', offset=0x42, size=1))
self.assertEquals(test_name, s.name) (offset_to_symbol_infos, _) = \
patch_orderfile._GroupSymbolInfos(symbol_infos)
self.assertEquals(len(offset_to_symbol_infos[test_offset]), 1)
s = offset_to_symbol_infos[test_offset][0]
self.assertEquals(test_offset, s.offset)
self.assertEquals(2, s.size)
self.assertEquals(test_name, s.name)
def testGetSymbolInfosFromStreamWithoutSize(self):
lines = [
"0070ee8c T WebRtcSpl_ComplexBitReverse"]
test_name = "WebRtcSpl_ComplexBitReverse"
test_offset = 0x70ee8c
(offset_to_symbol_infos, name_to_symbol_infos) = \
patch_orderfile._GetSymbolInfosFromStream(lines)
self.assertEquals(len(offset_to_symbol_infos), 1) self.assertEquals(len(offset_to_symbol_infos), 1)
self.assertEquals(len(name_to_symbol_infos), 1) self.assertEquals(tuple(offset_to_symbol_infos[0x42]), symbol_infos)
self.assertIn(test_name, name_to_symbol_infos)
self.assertIn(test_offset, offset_to_symbol_infos)
self.assertEquals(len(name_to_symbol_infos[test_name]), 1)
s = name_to_symbol_infos[test_name][0]
self.assertEquals(test_offset, s.offset)
self.assertEquals(-1, s.size)
self.assertEquals(test_name, s.name)
self.assertEquals(len(offset_to_symbol_infos[test_offset]), 1)
s = offset_to_symbol_infos[test_offset][0]
self.assertEquals(test_offset, s.offset)
self.assertEquals(-1, s.size)
self.assertEquals(test_name, s.name)
def testGetSymbolsFromStream(self):
lines = [".text.startup.",
".text.with.a.prefix",
"",
"_ZN2v88internal33HEnvironmentLivenessAnalysisPhase3RunEv",
".text",
".text.*"]
names = patch_orderfile._GetSymbolsFromStream(lines)
self.assertEquals(len(names), 2)
self.assertEquals(
names[0], "with.a.prefix")
self.assertEquals(
names[1], "_ZN2v88internal33HEnvironmentLivenessAnalysisPhase3RunEv")
def testExpandSymbols(self): def testExpandSymbols(self):
symbol_name = "dummySymbol" symbol_name = "dummySymbol"
symbol_name2 = "other" symbol_name2 = "other"
profiled_symbol_names = [symbol_name, "symbolThatShouldntMatch"] profiled_symbol_names = [symbol_name, "symbolThatShouldntMatch"]
name_to_symbol_infos = {symbol_name: [ name_to_symbol_infos = {symbol_name: [
patch_orderfile.SymbolInfo(0x42, 0x12, symbol_name)]} symbol_extractor.SymbolInfo(symbol_name, 0x42, 0x12)]}
offset_to_symbol_infos = { offset_to_symbol_infos = {
0x42: [patch_orderfile.SymbolInfo(0x42, 0x12, symbol_name), 0x42: [symbol_extractor.SymbolInfo(symbol_name, 0x42, 0x12),
patch_orderfile.SymbolInfo(0x42, 0x12, symbol_name2)]} symbol_extractor.SymbolInfo(symbol_name2, 0x42, 0x12)]}
symbol_names = patch_orderfile._ExpandSymbols( symbol_names = patch_orderfile._ExpandSymbols(
profiled_symbol_names, name_to_symbol_infos, offset_to_symbol_infos) profiled_symbol_names, name_to_symbol_infos, offset_to_symbol_infos)
self.assertEquals(len(symbol_names), 3) self.assertEquals(len(symbol_names), 3)
......
#!/usr/bin/python
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Utilities to get and manipulate symbols from a binary."""
import collections
import os
import re
import subprocess
import sys
sys.path.insert(
0, os.path.join(sys.path[0], '..', '..', 'third_party', 'android_platform',
'development', 'scripts'))
import symbol
# TODO(lizeb): Change symbol.ARCH to the proper value when "arm" is no longer
# the only possible value.
_NM_BINARY = symbol.ToolPath('nm')
SymbolInfo = collections.namedtuple('SymbolInfo', ('name', 'offset', 'size'))
def FromNmLine(line):
"""Create a SymbolInfo by parsing a properly formatted nm output line.
Args:
line: line from nm
Returns:
An instance of SymbolInfo if the line represents a symbol, None otherwise.
"""
# We are interested in two types of lines:
# This:
# 00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev
# offset size <symbol_type> symbol_name
# And that:
# 0070ee8c T WebRtcSpl_ComplexBitReverse
# In the second case we don't have a size, so use -1 as a sentinel
if not re.search(' (t|W|T) ', line):
return None
parts = line.split()
if len(parts) == 4:
return SymbolInfo(
offset=int(parts[0], 16), size=int(parts[1], 16), name=parts[3])
elif len(parts) == 3:
return SymbolInfo(
offset=int(parts[0], 16), size=-1, name=parts[2])
else:
return None
def SymbolInfosFromStream(nm_lines):
"""Parses the output of nm, and get all the symbols from a binary.
Args:
nm_lines: An iterable of lines
Returns:
A list of SymbolInfo.
"""
# TODO(lizeb): Consider switching to objdump to simplify parsing.
symbol_infos = []
for line in nm_lines:
symbol_info = FromNmLine(line)
if symbol_info is not None:
symbol_infos.append(symbol_info)
return symbol_infos
def SymbolInfosFromBinary(binary_filename):
"""Runs nm to get all the symbols from a binary.
Args:
binary_filename: path to the binary.
Returns:
A list of SymbolInfo from the binary.
"""
command = (_NM_BINARY, '-S', '-n', binary_filename)
p = subprocess.Popen(command, shell=False, stdout=subprocess.PIPE)
try:
result = SymbolInfosFromStream(p.stdout)
return result
finally:
p.wait()
def GroupSymbolInfosByOffset(symbol_infos):
"""Create a dict {offset: [symbol_info1, ...], ...}.
As several symbols can be at the same offset, this is a 1-to-many
relationship.
Args:
symbol_infos: iterable of SymbolInfo instances
Returns:
a dict {offset: [symbol_info1, ...], ...}
"""
offset_to_symbol_infos = collections.defaultdict(list)
for symbol_info in symbol_infos:
offset_to_symbol_infos[symbol_info.offset].append(symbol_info)
return dict(offset_to_symbol_infos)
def CreateNameToSymbolInfo(symbol_infos):
"""Create a dict {name: symbol_info, ...}.
Args:
symbol_infos: iterable of SymbolInfo instances
Returns:
a dict {name: symbol_info, ...}
"""
return {symbol_info.name: symbol_info for symbol_info in symbol_infos}
#!/usr/bin/python
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import symbol_extractor
import unittest
class TestSymbolInfo(unittest.TestCase):
def testIgnoresBlankLine(self):
symbol_info = symbol_extractor.FromNmLine('')
self.assertIsNone(symbol_info)
def testIgnoresMalformedLine(self):
line = ('00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev '
'too many fields')
symbol_info = symbol_extractor.FromNmLine(line)
self.assertIsNone(symbol_info)
# Wrong marker
line = '00210d59 00000002 A _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev'
symbol_info = symbol_extractor.FromNmLine(line)
self.assertIsNone(symbol_info)
# Too short
line = '00210d59 t'
symbol_info = symbol_extractor.FromNmLine(line)
self.assertIsNone(symbol_info)
def testSymbolInfoWithSize(self):
line = '00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev'
test_name = '_ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev'
test_offset = 0x210d59
test_size = 2
symbol_info = symbol_extractor.FromNmLine(line)
self.assertIsNotNone(symbol_info)
self.assertEquals(test_offset, symbol_info.offset)
self.assertEquals(test_size, symbol_info.size)
self.assertEquals(test_name, symbol_info.name)
def testSymbolInfoWithoutSize(self):
line = '0070ee8c T WebRtcSpl_ComplexBitReverse'
test_name = 'WebRtcSpl_ComplexBitReverse'
test_offset = 0x70ee8c
symbol_info = symbol_extractor.FromNmLine(line)
self.assertIsNotNone(symbol_info)
self.assertEquals(test_offset, symbol_info.offset)
self.assertEquals(-1, symbol_info.size)
self.assertEquals(test_name, symbol_info.name)
class TestSymbolInfosFromStream(unittest.TestCase):
def testSymbolInfosFromStream(self):
lines = ['Garbage',
'',
('00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev '
'too many fields'),
'00210d59 00000002 t _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev',
'00210d59 00000002 A _ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev',
'0070ee8c T WebRtcSpl_ComplexBitReverse']
symbol_infos = symbol_extractor.SymbolInfosFromStream(lines)
self.assertEquals(len(symbol_infos), 2)
first = symbol_extractor.SymbolInfo(
'_ZN34BrowserPluginHostMsg_Attach_ParamsD2Ev', 0x00210d59, 2)
self.assertEquals(first, symbol_infos[0])
second = symbol_extractor.SymbolInfo(
'WebRtcSpl_ComplexBitReverse', 0x0070ee8c, -1)
self.assertEquals(second, symbol_infos[1])
class TestSymbolInfoMappings(unittest.TestCase):
def setUp(self):
self.symbol_infos = [
symbol_extractor.SymbolInfo('firstNameAtOffset', 0x42, 42),
symbol_extractor.SymbolInfo('secondNameAtOffset', 0x42, 42),
symbol_extractor.SymbolInfo('thirdSymbol', 0x64, 20)]
def testGroupSymbolInfosByOffset(self):
offset_to_symbol_info = symbol_extractor.GroupSymbolInfosByOffset(
self.symbol_infos)
self.assertEquals(len(offset_to_symbol_info), 2)
self.assertIn(0x42, offset_to_symbol_info)
self.assertEquals(offset_to_symbol_info[0x42][0], self.symbol_infos[0])
self.assertEquals(offset_to_symbol_info[0x42][1], self.symbol_infos[1])
self.assertIn(0x64, offset_to_symbol_info)
self.assertEquals(offset_to_symbol_info[0x64][0], self.symbol_infos[2])
def testCreateNameToSymbolInfos(self):
name_to_symbol_info = symbol_extractor.CreateNameToSymbolInfo(
self.symbol_infos)
self.assertEquals(len(name_to_symbol_info), 3)
for i in range(3):
name = self.symbol_infos[i].name
self.assertIn(name, name_to_symbol_info)
self.assertEquals(self.symbol_infos[i], name_to_symbol_info[name])
if __name__ == '__main__':
unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment