Commit 7819ba6a authored by Siddhartha's avatar Siddhartha Committed by Commit Bot

Extract CFI table info from breakpad symbol file for unwinding

The extract_unwind_tables script dumps the breakpad symbol file and
extracts cfi unwind table from it.
The output format is discussed in this doc:
https://docs.google.com/document/d/1TLuUZ1HaMO6Rv0Q9Y1-w4a-9wcyia1VygQbs4Osb7Oo
This is just a basic format and will be changed in subsequent cl for
size efficiency and performance.

BUG=819888

Change-Id: I987aa2e4ef37f86b6553410d7ee73c15f655495b
Reviewed-on: https://chromium-review.googlesource.com/956971
Commit-Queue: Siddhartha S <ssid@chromium.org>
Reviewed-by: default avataragrieve <agrieve@chromium.org>
Cr-Commit-Position: refs/heads/master@{#542736}
parent 2639b101
......@@ -3,36 +3,50 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Extracts the unwind tables in ARM EHABI format.
The ARM EHABI format requires 2 sections for unwinding, ARM.exidx and ARM.extab.
This script copies these sections from unstripped binary into an output file.
"""Extracts the unwind tables in from breakpad symbol files
Runs dump_syms on the given binary file and extracts the CFI data into the
given output file.
The output file is a binary file containing CFI rows ordered based on function
address. The output file only contains rows that match the most popular rule
type in CFI table, to reduce the output size and specify data in compact format.
See doc https://github.com/google/breakpad/blob/master/docs/symbol_files.md.
1. The CFA rules should be of postfix form "SP <val> +".
2. The RA rules should be of postfix form "CFA <val> + ^".
Note: breakpad represents dereferencing address with '^' operator.
The output rows are all 64 bits. We have 2 types of rows, FUNCTION and CFI.
Each function with CFI info has a single FUNCTION row, followed by one or more
CFI rows. All the addresses of the CFI rows will be within the function.
1. FUNCTION. Bits in order of high to low represent:
31 bits: specifies function address, without the last bit (always 0).
1 bit : always 1. Specifies the row type is FUNCTION.
32 bits: length of the current function.
2. CFI. Bits in order of high to low represent:
31 bits: instruction address in the current function.
1 bit : always 0. Specifies teh row type is CFI.
30 bits: CFA offset / 4.
2 bits: RA offset / 4.
Usage:
extract_unwind_tables.py --input_path [root path to unstripped chrome.so]
--output_path [output path]
--output_path [output path] --dump_syms_path [path to dump_syms binary]
"""
import argparse
import re
import struct
import subprocess
import sys
import tempfile
def _GetArmSectionsFromObjdump(input_path):
"""Parses the objdump of the binary and returns the relevant sections."""
objdump = subprocess.check_output(['objdump', '-h', input_path]).splitlines()
sections = {}
for line in objdump:
if '.ARM' not in line:
continue
parts = line.split()
section = {}
section['size'] = int(parts[2], 16)
section['offset'] = int(parts[5], 16)
sections[parts[1]] = section
return sections
_CFA_REG = '.cfa'
_RA_REG = '.ra'
_ADDR_ENTRY = 0
_LENGTH_ENTRY = 1
def _Write4Bytes(output_file, val):
......@@ -40,15 +54,113 @@ def _Write4Bytes(output_file, val):
output_file.write(struct.pack('<L', val));
def _AddSectionToOutput(input_path, output_file, section):
"""Copies the specified section from input file to output."""
_Write4Bytes(output_file, section['size'])
_Write4Bytes(output_file, section['offset'])
def _FindRuleForRegister(cfi_row, reg):
"""Returns the postfix expression as string for a given register.
Breakpad CFI row format specifies rules for unwinding each register in postfix
expression form separated by space. Each rule starts with register name and a
colon. Eg: "CFI R1: <rule> R2: <rule>".
"""
out = []
found_register = False
for part in cfi_row:
if found_register:
if part[-1] == ':':
break
out.append(part)
elif part == reg + ':':
found_register = True
return ' '.join(out)
def _GetCfaAndRaOffset(cfi_row):
"""Returns a tuple with 2 numbers (cfa_offset, ra_offset).
Returns right values if rule matches the predefined criteria. Returns (0, 0)
otherwise. The criteria for CFA rule is postfix form "SP <val> +" and RA rule
is postfix form "CFA -<val> + ^".
"""
cfa_offset = 0
ra_offset = 0
cfa_rule = _FindRuleForRegister(cfi_row, _CFA_REG)
ra_rule = _FindRuleForRegister(cfi_row, _RA_REG)
if cfa_rule and re.match(r'sp [0-9]+ \+', cfa_rule):
cfa_offset = int(cfa_rule.split()[1], 10)
if ra_rule:
if not re.match(r'.cfa -[0-9]+ \+ \^', ra_rule):
return (0, 0)
ra_offset = -1 * int(ra_rule.split()[1], 10)
return (cfa_offset, ra_offset)
def _GetAllCfiRows(symbol_file):
"""Returns parsed CFI data from given symbol_file.
Each entry in the cfi data dictionary returned is a map from function start
address to array of function rows, starting with FUNCTION type, followed by
one or more CFI rows.
"""
cfi_data = {}
current_func = []
for line in symbol_file:
if 'STACK CFI' not in line:
continue
parts = line.split()
data = {}
if parts[2] == 'INIT':
# Add the previous function to the output
if len(current_func) > 1:
cfi_data[current_func[0][_ADDR_ENTRY]] = current_func
current_func = []
# The function line is of format "STACK CFI INIT <addr> <length> ..."
data[_ADDR_ENTRY] = int(parts[3], 16)
data[_LENGTH_ENTRY] = int(parts[4], 16)
if data[_LENGTH_ENTRY] == 0:
continue # Skip the current function.
else:
# The current function is skipped.
if len(current_func) == 0:
continue
# The CFI row is of format "STACK CFI <addr> .cfa: <expr> .ra: <expr> ..."
data[_ADDR_ENTRY] = int(parts[2], 16)
(data[_CFA_REG], data[_RA_REG]) = _GetCfaAndRaOffset(parts)
if (data[_CFA_REG]) == 0 or data[_RA_REG] >= 16:
current_func = []
continue
assert data[_CFA_REG] % 4 == 0
if data[_ADDR_ENTRY] == 0:
# Skip current function, delete all previous entries.
current_func = []
continue
assert data[_ADDR_ENTRY] % 2 == 0
current_func.append(data)
with open(input_path, 'rb') as f:
f.seek(section['offset'])
data = f.read(section['size'])
output_file.write(data)
if len(current_func) > 1:
cfi_data[current_func[0][_ADDR_ENTRY]] = current_func
return cfi_data
def _WriteCfiData(cfi_data, out_file):
"""Writes the CFI data in defined format to out_file."""
for addr, function in sorted(cfi_data.iteritems()):
assert len(function) > 1
_Write4Bytes(out_file, addr | 1)
_Write4Bytes(out_file, function[0][_LENGTH_ENTRY])
for row in function[1:]:
_Write4Bytes(out_file, row[_ADDR_ENTRY])
_Write4Bytes(out_file, (row[_CFA_REG]) | (row[_RA_REG] / 4))
def _ParseCfiData(sym_file, output_path):
with open(sym_file, 'r') as f:
cfi_data = _GetAllCfiRows(f)
with open(output_path, 'wb') as out_file:
_WriteCfiData(cfi_data, out_file)
def main():
......@@ -59,17 +171,19 @@ def main():
parser.add_argument(
'--output_path', required=True,
help='The path of the output file')
args = parser.parse_args()
parser.add_argument(
'--dump_syms_path', required=True,
help='The path of the dump_syms binary')
sections = _GetArmSectionsFromObjdump(args.input_path)
exidx = sections.get('.ARM.exidx')
extab = sections.get('.ARM.extab')
if not exidx or not extab:
raise Exception('No arm32 exception section found.')
with open(args.output_path, 'wb') as outputFile:
_AddSectionToOutput(args.input_path, outputFile, exidx)
_AddSectionToOutput(args.input_path, outputFile, extab)
args = parser.parse_args()
sym_file = tempfile.NamedTemporaryFile()
with tempfile.NamedTemporaryFile() as sym_file:
out = subprocess.call(
['./' +args.dump_syms_path, args.input_path], stdout=sym_file)
assert not out
sym_file.flush()
_ParseCfiData(sym_file.name, args.output_path)
return 0
if __name__ == '__main__':
......
#!/usr/bin/env python
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Tests for extract_unwind_tables.py
This test suite contains various tests for extracting CFI tables from breakpad
symbol files.
"""
import optparse
import os
import struct
import sys
import tempfile
import unittest
import extract_unwind_tables
sys.path.append(os.path.join(os.path.dirname(__file__), "gyp"))
from util import build_utils
class TestExtractUnwindTables(unittest.TestCase):
def testExtractCfi(self):
with tempfile.NamedTemporaryFile() as input_file, \
tempfile.NamedTemporaryFile() as output_file:
input_file.write("""
MODULE Linux arm CDE12FE1DF2B37A9C6560B4CBEE056420 lib_chrome.so
INFO CODE_ID E12FE1CD2BDFA937C6560B4CBEE05642
FILE 0 ../../base/allocator/allocator_check.cc
FILE 1 ../../base/allocator/allocator_extension.cc
FILE 2 ../../base/allocator/allocator_shim.cc
FUNC 1adcb60 54 0 i2d_name_canon
1adcb60 1a 509 17054
3b94c70 2 69 40
PUBLIC e17001 0 assist_ranker::(anonymous namespace)::FakePredict::Initialize()
PUBLIC e17005 0 (anonymous namespace)::FileDeleter(base::File)
STACK CFI INIT e17000 4 .cfa: sp 0 + .ra: lr
STACK CFI INIT 0 4 .cfa: sp 0 + .ra: lr
STACK CFI 2 .cfa: sp 4 +
STACK CFI 4 .cfa: sp 12 + .ra: .cfa -8 + ^ r7: .cfa -12 + ^
STACK CFI 6 .cfa: sp 16 +
STACK CFI INIT e1a96e 20 .cfa: sp 0 + .ra: lr
STACK CFI e1a970 .cfa: sp 4 +
STACK CFI e1a972 .cfa: sp 12 + .ra: .cfa -8 + ^ r7: .cfa -12 + ^
STACK CFI e1a974 .cfa: sp 16 +
STACK CFI INIT e1a1e4 b0 .cfa: sp 0 + .ra: lr
STACK CFI e1a1e6 .cfa: sp 16 + .ra: .cfa -4 + ^ r4: .cfa -16 + ^ r5: .cfa -12 +
STACK CFI e1a1e8 .cfa: sp 80 +
STACK CFI INIT 0 4 .cfa: sp 0 + .ra: lr
STACK CFI INIT 3b92e24 3c .cfa: sp 0 + .ra: lr
STACK CFI 3b92e4c .cfa: sp 16 + .ra: .cfa -12 + ^
STACK CFI INIT e17004 0 .cfa: sp 0 + .ra: lr
STACK CFI e17004 2 .cfa: sp 0 + .ra: lr
STACK CFI INIT 3b92e70 38 .cfa: sp 0 + .ra: lr
STACK CFI 3b92e74 .cfa: sp 8 + .ra: .cfa -4 + ^ r4: .cfa -8 + ^
STACK CFI 3b92e90 .cfa: sp 0 + .ra: .ra r4: r4
STACK CFI INIT 3b93114 6c .cfa: sp 0 + .ra: lr
STACK CFI 3b93118 .cfa: r7 16 + .ra: .cfa -4 + ^
""")
input_file.flush()
extract_unwind_tables._ParseCfiData(input_file.name, output_file.name)
expected_output_rows = [
0xe1a1e4 | 1, 0xb0,
0xe1a1e6 , 16 + 4 / 4,
0xe1a1e8 , 80 + 0,
0xe1a96e | 1, 0x20,
0xe1a970 , 4 + 0,
0xe1a972 , 12 + 8 / 4,
0xe1a974 , 16 + 0,
0x3b92e24 | 1, 0x3c,
0x3b92e4c , 16 + 12 / 4
]
actual_output = []
with open(output_file.name, 'rb') as f:
while True:
read = f.read(4)
if not read:
break
actual_output.append(struct.unpack('i', read)[0])
self.assertEqual(expected_output_rows, actual_output)
if __name__ == '__main__':
unittest.main()
......@@ -141,6 +141,7 @@ PYTHON_UNIT_TEST_SUITES = {
'test_modules': [
'java_cpp_enum_tests',
'java_google_api_keys_tests',
'extract_unwind_tables_tests',
]
},
}
......
......@@ -22,13 +22,18 @@ template("unwind_table_asset") {
root_build_dir),
"--output_path",
rebase_path(_asset_path, root_build_dir),
"--dump_syms_path",
rebase_path("$root_out_dir/dump_syms", root_build_dir),
]
deps = [
":${invoker.library_target}",
"//third_party/breakpad:dump_syms",
]
}
android_assets(target_name) {
testonly = invoker.testonly
if (defined(invoker.testonly)) {
testonly = invoker.testonly
}
sources = [
_asset_path,
]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment