Extract CFI table info from breakpad symbol file for unwinding

The extract_unwind_tables script dumps the breakpad symbol file and extracts cfi unwind table from it. The output format is discussed in this doc: https://docs.google.com/document/d/1TLuUZ1HaMO6Rv0Q9Y1-w4a-9wcyia1VygQbs4Osb7Oo This is just a basic format and will be changed in subsequent cl for size efficiency and performance. BUG=819888 Change-Id: I987aa2e4ef37f86b6553410d7ee73c15f655495b Reviewed-on: https://chromium-review.googlesource.com/956971 Commit-Queue: Siddhartha S <ssid@chromium.org> Reviewed-by: agrieve <agrieve@chromium.org> Cr-Commit-Position: refs/heads/master@{#542736}

Extract CFI table info from breakpad symbol file for unwinding
The extract_unwind_tables script dumps the breakpad symbol file and extracts cfi unwind table from it. The output format is discussed in this doc: https://docs.google.com/document/d/1TLuUZ1HaMO6Rv0Q9Y1-w4a-9wcyia1VygQbs4Osb7Oo This is just a basic format and will be changed in subsequent cl for size efficiency and performance. BUG=819888 Change-Id: I987aa2e4ef37f86b6553410d7ee73c15f655495b Reviewed-on: https://chromium-review.googlesource.com/956971 Commit-Queue: Siddhartha S <ssid@chromium.org> Reviewed-by: agrieve <agrieve@chromium.org> Cr-Commit-Position: refs/heads/master@{#542736}
7819ba6a · Siddhartha · Commit Bot · 2639b101 · 7819ba6a · 7819ba6a
Commit 7819ba6a authored Mar 13, 2018 by Siddhartha Committed by Commit Bot Mar 13, 2018
4 changed files
--- a/build/android/gyp/extract_unwind_tables.py
+++ b/build/android/gyp/extract_unwind_tables.py
@@ -3,36 +3,50 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

-"""Extracts the unwind tables in ARM EHABI format.
-
-The ARM EHABI format requires 2 sections for unwinding, ARM.exidx and ARM.extab.
-This script copies these sections from unstripped binary into an output file.
+"""Extracts the unwind tables in from breakpad symbol files
+
+Runs dump_syms on the given binary file and extracts the CFI data into the
+given output file.
+The output file is a binary file containing CFI rows ordered based on function
+address. The output file only contains rows that match the most popular rule
+type in CFI table, to reduce the output size and specify data in compact format.
+See doc https://github.com/google/breakpad/blob/master/docs/symbol_files.md.
+1. The CFA rules should be of postfix form "SP <val> +".
+2. The RA rules should be of postfix form "CFA <val> + ^".
+Note: breakpad represents dereferencing address with '^' operator.
+
+The output rows are all 64 bits. We have 2 types of rows, FUNCTION and CFI.
+Each function with CFI info has a single FUNCTION row, followed by one or more
+CFI rows. All the addresses of the CFI rows will be within the function.
+1. FUNCTION. Bits in order of high to low represent:
+    31 bits: specifies function address, without the last bit (always 0).
+     1 bit : always 1. Specifies the row type is FUNCTION.
+    32 bits: length of the current function.
+
+2. CFI. Bits in order of high to low represent:
+    31 bits: instruction address in the current function.
+     1 bit : always 0. Specifies teh row type is CFI.
+    30 bits: CFA offset / 4.
+     2 bits: RA offset / 4.

 Usage:
  extract_unwind_tables.py --input_path [root path to unstripped chrome.so]
-      --output_path [output path]
-
+      --output_path [output path] --dump_syms_path [path to dump_syms binary]
 """

 import argparse
+import re
 import struct
 import subprocess
 import sys
+import tempfile


-def _GetArmSectionsFromObjdump(input_path):
-  """Parses the objdump of the binary and returns the relevant sections."""
-  objdump = subprocess.check_output(['objdump', '-h', input_path]).splitlines()
-  sections = {}
-  for line in objdump:
-    if '.ARM' not in line:
-      continue
-    parts = line.split()
-    section = {}
-    section['size'] = int(parts[2], 16)
-    section['offset'] = int(parts[5], 16)
-    sections[parts[1]] = section
-  return sections
+_CFA_REG = '.cfa'
+_RA_REG = '.ra'
+
+_ADDR_ENTRY = 0
+_LENGTH_ENTRY = 1


 def _Write4Bytes(output_file, val):
@@ -40,15 +54,113 @@ def _Write4Bytes(output_file, val):
  output_file.write(struct.pack('<L', val));


-def _AddSectionToOutput(input_path, output_file, section):
-  """Copies the specified section from input file to output."""
-  _Write4Bytes(output_file, section['size'])
-  _Write4Bytes(output_file, section['offset'])
+def _FindRuleForRegister(cfi_row, reg):
+  """Returns the postfix expression as string for a given register.
+
+  Breakpad CFI row format specifies rules for unwinding each register in postfix
+  expression form separated by space. Each rule starts with register name and a
+  colon. Eg: "CFI R1: <rule> R2: <rule>".
+  """
+  out = []
+  found_register = False
+  for part in cfi_row:
+    if found_register:
+      if part[-1] == ':':
+        break
+      out.append(part)
+    elif part == reg + ':':
+      found_register = True
+  return ' '.join(out)
+
+
+def _GetCfaAndRaOffset(cfi_row):
+  """Returns a tuple with 2 numbers (cfa_offset, ra_offset).
+
+  Returns right values if rule matches the predefined criteria. Returns (0, 0)
+  otherwise. The criteria for CFA rule is postfix form "SP <val> +" and RA rule
+  is postfix form "CFA -<val> + ^".
+  """
+  cfa_offset = 0
+  ra_offset = 0
+  cfa_rule = _FindRuleForRegister(cfi_row, _CFA_REG)
+  ra_rule = _FindRuleForRegister(cfi_row, _RA_REG)
+  if cfa_rule and re.match(r'sp [0-9]+ \+', cfa_rule):
+    cfa_offset = int(cfa_rule.split()[1], 10)
+  if ra_rule:
+    if not re.match(r'.cfa -[0-9]+ \+ \^', ra_rule):
+      return (0, 0)
+    ra_offset = -1 * int(ra_rule.split()[1], 10)
+  return (cfa_offset, ra_offset)
+
+
+def _GetAllCfiRows(symbol_file):
+  """Returns parsed CFI data from given symbol_file.
+
+  Each entry in the cfi data dictionary returned is a map from function start
+  address to array of function rows, starting with FUNCTION type, followed by
+  one or more CFI rows.
+  """
+  cfi_data = {}
+  current_func = []
+  for line in symbol_file:
+    if 'STACK CFI' not in line:
+      continue
+
+    parts = line.split()
+    data = {}
+    if parts[2] == 'INIT':
+      # Add the previous function to the output
+      if len(current_func) > 1:
+        cfi_data[current_func[0][_ADDR_ENTRY]] = current_func
+      current_func = []
+
+      # The function line is of format "STACK CFI INIT <addr> <length> ..."
+      data[_ADDR_ENTRY] = int(parts[3], 16)
+      data[_LENGTH_ENTRY] = int(parts[4], 16)
+      if data[_LENGTH_ENTRY] == 0:
+        continue  # Skip the current function.
+    else:
+      # The current function is skipped.
+      if len(current_func) == 0:
+        continue
+
+      # The CFI row is of format "STACK CFI <addr> .cfa: <expr> .ra: <expr> ..."
+      data[_ADDR_ENTRY] = int(parts[2], 16)
+      (data[_CFA_REG], data[_RA_REG]) = _GetCfaAndRaOffset(parts)
+      if (data[_CFA_REG]) == 0 or data[_RA_REG] >= 16:
+        current_func = []
+        continue
+      assert data[_CFA_REG] % 4 == 0
+
+    if data[_ADDR_ENTRY] == 0:
+      # Skip current function, delete all previous entries.
+      current_func = []
+      continue
+    assert data[_ADDR_ENTRY] % 2 == 0
+    current_func.append(data)

-  with open(input_path, 'rb') as f:
-    f.seek(section['offset'])
-    data = f.read(section['size'])
-    output_file.write(data)
+  if len(current_func) > 1:
+    cfi_data[current_func[0][_ADDR_ENTRY]] = current_func
+  return cfi_data
+
+
+def _WriteCfiData(cfi_data, out_file):
+  """Writes the CFI data in defined format to out_file."""
+  for addr, function in sorted(cfi_data.iteritems()):
+    assert len(function) > 1
+    _Write4Bytes(out_file, addr | 1)
+    _Write4Bytes(out_file, function[0][_LENGTH_ENTRY])
+    for row in function[1:]:
+      _Write4Bytes(out_file, row[_ADDR_ENTRY])
+      _Write4Bytes(out_file, (row[_CFA_REG]) | (row[_RA_REG] / 4))
+
+
+def _ParseCfiData(sym_file, output_path):
+  with open(sym_file, 'r') as f:
+    cfi_data =  _GetAllCfiRows(f)
+
+  with open(output_path, 'wb') as out_file:
+    _WriteCfiData(cfi_data, out_file)


 def main():
@@ -59,17 +171,19 @@ def main():
  parser.add_argument(
      '--output_path', required=True,
      help='The path of the output file')
-  args = parser.parse_args()
+  parser.add_argument(
+      '--dump_syms_path', required=True,
+      help='The path of the dump_syms binary')

-  sections = _GetArmSectionsFromObjdump(args.input_path)
-  exidx = sections.get('.ARM.exidx')
-  extab = sections.get('.ARM.extab')
-  if not exidx or not extab:
-    raise Exception('No arm32 exception section found.')
-  with open(args.output_path, 'wb') as outputFile:
-    _AddSectionToOutput(args.input_path, outputFile, exidx)
-    _AddSectionToOutput(args.input_path, outputFile, extab)
+  args = parser.parse_args()

+  sym_file = tempfile.NamedTemporaryFile()
+  with tempfile.NamedTemporaryFile() as sym_file:
+    out = subprocess.call(
+        ['./' +args.dump_syms_path, args.input_path], stdout=sym_file)
+    assert not out
+    sym_file.flush()
+    _ParseCfiData(sym_file.name, args.output_path)
  return 0

 if __name__ == '__main__':

--- a/build/android/gyp/extract_unwind_tables_tests.py
+++ b/build/android/gyp/extract_unwind_tables_tests.py
+#!/usr/bin/env python
+# Copyright 2018 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Tests for extract_unwind_tables.py
+
+This test suite contains various tests for extracting CFI tables from breakpad
+symbol files.
+"""
+
+import optparse
+import os
+import struct
+import sys
+import tempfile
+import unittest
+
+import extract_unwind_tables
+
+sys.path.append(os.path.join(os.path.dirname(__file__), "gyp"))
+from util import build_utils
+
+
+class TestExtractUnwindTables(unittest.TestCase):
+  def testExtractCfi(self):
+    with tempfile.NamedTemporaryFile() as input_file, \
+        tempfile.NamedTemporaryFile() as output_file:
+      input_file.write("""
+MODULE Linux arm CDE12FE1DF2B37A9C6560B4CBEE056420 lib_chrome.so
+INFO CODE_ID E12FE1CD2BDFA937C6560B4CBEE05642
+FILE 0 ../../base/allocator/allocator_check.cc
+FILE 1 ../../base/allocator/allocator_extension.cc
+FILE 2 ../../base/allocator/allocator_shim.cc
+FUNC 1adcb60 54 0 i2d_name_canon
+1adcb60 1a 509 17054
+3b94c70 2 69 40
+PUBLIC e17001 0 assist_ranker::(anonymous namespace)::FakePredict::Initialize()
+PUBLIC e17005 0 (anonymous namespace)::FileDeleter(base::File)
+STACK CFI INIT e17000 4 .cfa: sp 0 + .ra: lr
+STACK CFI INIT 0 4 .cfa: sp 0 + .ra: lr
+STACK CFI 2 .cfa: sp 4 +
+STACK CFI 4 .cfa: sp 12 + .ra: .cfa -8 + ^ r7: .cfa -12 + ^
+STACK CFI 6 .cfa: sp 16 +
+STACK CFI INIT e1a96e 20 .cfa: sp 0 + .ra: lr
+STACK CFI e1a970 .cfa: sp 4 +
+STACK CFI e1a972 .cfa: sp 12 + .ra: .cfa -8 + ^ r7: .cfa -12 + ^
+STACK CFI e1a974 .cfa: sp 16 +
+STACK CFI INIT e1a1e4 b0 .cfa: sp 0 + .ra: lr
+STACK CFI e1a1e6 .cfa: sp 16 + .ra: .cfa -4 + ^ r4: .cfa -16 + ^ r5: .cfa -12 +
+STACK CFI e1a1e8 .cfa: sp 80 +
+STACK CFI INIT 0 4 .cfa: sp 0 + .ra: lr
+STACK CFI INIT 3b92e24 3c .cfa: sp 0 + .ra: lr
+STACK CFI 3b92e4c .cfa: sp 16 + .ra: .cfa -12 + ^
+STACK CFI INIT e17004 0 .cfa: sp 0 + .ra: lr
+STACK CFI e17004 2 .cfa: sp 0 + .ra: lr
+STACK CFI INIT 3b92e70 38 .cfa: sp 0 + .ra: lr
+STACK CFI 3b92e74 .cfa: sp 8 + .ra: .cfa -4 + ^ r4: .cfa -8 + ^
+STACK CFI 3b92e90 .cfa: sp 0 + .ra: .ra r4: r4
+STACK CFI INIT 3b93114 6c .cfa: sp 0 + .ra: lr
+STACK CFI 3b93118 .cfa: r7 16 + .ra: .cfa -4 + ^
+""")
+      input_file.flush()
+      extract_unwind_tables._ParseCfiData(input_file.name, output_file.name)
+
+      expected_output_rows = [
+        0xe1a1e4 | 1, 0xb0,
+        0xe1a1e6    , 16 + 4 / 4,
+        0xe1a1e8    , 80 + 0,
+
+        0xe1a96e | 1, 0x20,
+        0xe1a970    , 4 + 0,
+        0xe1a972    , 12 + 8 / 4,
+        0xe1a974    , 16 + 0,
+
+        0x3b92e24 | 1, 0x3c,
+        0x3b92e4c    , 16 + 12 / 4
+      ]
+      actual_output = []
+      with open(output_file.name, 'rb') as f:
+        while True:
+          read = f.read(4)
+          if not read:
+            break
+          actual_output.append(struct.unpack('i', read)[0])
+      self.assertEqual(expected_output_rows, actual_output)
+
+
+if __name__ == '__main__':
+  unittest.main()
--- a/build/android/pylib/constants/__init__.py
+++ b/build/android/pylib/constants/__init__.py
@@ -141,6 +141,7 @@ PYTHON_UNIT_TEST_SUITES = {
    'test_modules': [
      'java_cpp_enum_tests',
      'java_google_api_keys_tests',
+      'extract_unwind_tables_tests',
    ]
  },
 }

--- a/build/config/android/extract_unwind_tables.gni
+++ b/build/config/android/extract_unwind_tables.gni
@@ -22,13 +22,18 @@ template("unwind_table_asset") {
          root_build_dir),
      "--output_path",
      rebase_path(_asset_path, root_build_dir),
+      "--dump_syms_path",
+      rebase_path("$root_out_dir/dump_syms", root_build_dir),
    ]
    deps = [
      ":${invoker.library_target}",
+      "//third_party/breakpad:dump_syms",
    ]
  }
  android_assets(target_name) {
-    testonly = invoker.testonly
+    if (defined(invoker.testonly)) {
+      testonly = invoker.testonly
+    }
    sources = [
      _asset_path,
    ]