Skip to content

Commit 7fd1614

Browse files
committed
Add name section and object symbol table support to emsymbolizer
With llvm/llvm-project#82083, llvm-symbolizer works correctly with name sections, so emsymbolizer can use it. Also do the same for object files with symbol tables.
1 parent 71c3d00 commit 7fd1614

File tree

2 files changed

+54
-20
lines changed

2 files changed

+54
-20
lines changed

emsymbolizer.py

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88
# line/column number, potentially including inlining.
99
# If the wasm has separate DWARF info, do the above with the side file
1010
# If there is a source map, we can parse it to get file and line number.
11-
# If there is an emscripten symbol map, we can parse that to get the symbol name
12-
# If there is a name section or symbol table, llvm-nm can show the symbol name.
11+
# If there is an emscripten symbol map, we can use that to get the symbol name
12+
# If there is a name section or symbol table, llvm-symbolizer can show the
13+
# symbol name.
14+
# Separate DWARF and emscripten symbol maps are not supported yet.
1315

1416
import argparse
1517
import json
@@ -50,21 +52,27 @@ def get_codesec_offset(module):
5052

5153

5254
def has_debug_line_section(module):
53-
for sec in module.sections():
54-
if sec.name == ".debug_line":
55-
return True
56-
return False
55+
return module.get_custom_section('.debug_line') is not None
56+
57+
58+
def has_name_section(module):
59+
return module.get_custom_section('name') is not None
60+
5761

62+
def has_linking_section(module):
63+
return module.get_custom_section('linking') is not None
5864

59-
def symbolize_address_dwarf(module, address):
60-
vma_adjust = get_codesec_offset(module)
65+
66+
def symbolize_address_symbolizer(module, address, is_dwarf=False):
67+
vma_adjust = get_codesec_offset(module) if is_dwarf else 0
6168
cmd = [LLVM_SYMBOLIZER, '-e', module.filename, f'--adjust-vma={vma_adjust}',
6269
str(address)]
6370
out = shared.run_process(cmd, stdout=subprocess.PIPE).stdout.strip()
6471
out_lines = out.splitlines()
72+
6573
# Source location regex, e.g., /abc/def.c:3:5
6674
SOURCE_LOC_RE = re.compile(r'(.+):(\d+):(\d+)$')
67-
# llvm-dwarfdump prints two lines per location. The first line contains a
75+
# llvm-symbolizer prints two lines per location. The first line contains a
6876
# function name, and the second contains a source location like
6977
# '/abc/def.c:3:5'. If the function or source info is not available, it will
7078
# be printed as '??', in which case we store None. If the line and column info
@@ -210,30 +218,32 @@ def main(args):
210218
with webassembly.Module(args.wasm_file) as module:
211219
base = 16 if args.address.lower().startswith('0x') else 10
212220
address = int(args.address, base)
213-
symbolized = 0
214221

215222
if args.addrtype == 'code':
216223
address += get_codesec_offset(module)
217224

218225
if ((has_debug_line_section(module) and not args.source) or
219-
'dwarf' in args.source):
220-
symbolize_address_dwarf(module, address)
221-
symbolized += 1
222-
223-
if ((get_sourceMappingURL_section(module) and not args.source) or
224-
'sourcemap' in args.source):
226+
'dwarf' in args.source):
227+
symbolize_address_symbolizer(module, address, is_dwarf=True)
228+
elif ((get_sourceMappingURL_section(module) and not args.source) or
229+
'sourcemap' in args.source):
225230
symbolize_address_sourcemap(module, address, args.file)
226-
symbolized += 1
227-
228-
if not symbolized:
231+
elif ((has_name_section(module) and not args.source) or
232+
'names' in args.source):
233+
symbolize_address_symbolizer(module, address, is_dwarf=False)
234+
elif ((has_linking_section(module) and not args.source) or
235+
'symtab' in args.source):
236+
symbolize_address_symbolizer(module, address, is_dwarf=False)
237+
else:
229238
raise Error('No .debug_line or sourceMappingURL section found in '
230239
f'{module.filename}.'
231240
" I don't know how to symbolize this file yet")
232241

233242

234243
def get_args():
235244
parser = argparse.ArgumentParser()
236-
parser.add_argument('-s', '--source', choices=['dwarf', 'sourcemap'],
245+
parser.add_argument('-s', '--source', choices=['dwarf', 'sourcemap',
246+
'names', 'symtab'],
237247
help='Force debug info source type', default=())
238248
parser.add_argument('-f', '--file', action='store',
239249
help='Force debug info source file')

test/test_other.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9753,6 +9753,12 @@ def check_source_map_loc_info(address, loc):
97539753
stdout=PIPE).stdout
97549754
self.assertIn(loc, out)
97559755

9756+
def check_func_info(address, func):
9757+
out = self.run_process(
9758+
[emsymbolizer, 'test_dwarf.wasm', address], stdout=PIPE).stdout
9759+
print(out)
9760+
self.assertIn(func, out)
9761+
97569762
# Runs llvm-objdump to get the address of the first occurrence of the
97579763
# specified line within the given function. llvm-objdump's output format
97589764
# example is as follows:
@@ -9825,6 +9831,24 @@ def get_addr(text):
98259831
out_to_js_call_loc)
98269832
check_dwarf_loc_info(unreachable_addr, unreachable_func, unreachable_loc)
98279833

9834+
# 4. Test name section only
9835+
self.run_process([emstrip, '--strip-debug', 'test_dwarf.wasm'])
9836+
with webassembly.Module('test_dwarf.wasm') as wasm:
9837+
self.assertTrue(wasm.has_name_section())
9838+
self.assertIsNone(wasm.get_custom_section('.debug_info'))
9839+
check_func_info(out_to_js_call_addr, out_to_js_call_func[0])
9840+
# The name section will not reflect bar being inlined into main
9841+
check_func_info(unreachable_addr, '__original_main')
9842+
9843+
# 5. Test an object file with a symbol table
9844+
self.run_process([EMCC, test_file('core/test_dwarf.c'),
9845+
'-O1', '-c', '-o', 'test_dwarf.o'])
9846+
# The code addresses will be different in the object file (section offsets)
9847+
out_to_js_call_addr_obj = get_addr('call\t0')
9848+
unreachable_addr_obj = get_addr('unreachable')
9849+
check_func_info(out_to_js_call_addr_obj, out_to_js_call_func[0])
9850+
check_func_info(unreachable_addr_obj, '__original_main')
9851+
98289852
def test_separate_dwarf(self):
98299853
self.run_process([EMCC, test_file('hello_world.c'), '-g'])
98309854
self.assertExists('a.out.wasm')

0 commit comments

Comments
 (0)