|
| 1 | +""" |
| 2 | +## Call graph walk |
| 3 | +
|
| 4 | +TODO this is currenty broken. I half gave up on it when I noticed that |
| 5 | +the bottleneck was filtering functions by directory, which is harder on this walk implementation |
| 6 | +because we have to break at the callq instructions. |
| 7 | +I'm keeping this file as some of the walking code might be reused. |
| 8 | +
|
| 9 | +This implementation disassembles every function it stops at, |
| 10 | +and adds a breakpoint at the functions callq commands. |
| 11 | +
|
| 12 | +This is slower runtime than initial `rbreak .`, |
| 13 | +but typically faster as we move locally. |
| 14 | +
|
| 15 | +- http://stackoverflow.com/questions/9549693/gdb-list-of-all-function-calls-made-in-an-application |
| 16 | +- http://stackoverflow.com/questions/311948/make-gdb-print-control-flow-of-functions-as-they-are-called |
| 17 | +""" |
| 18 | + |
| 19 | +import re |
| 20 | + |
| 21 | +path_filter_re = re.compile(r'/gcc/') |
| 22 | + |
| 23 | +gdb.execute('file cc1', to_string=True) |
| 24 | +gdb.execute('set args hello_world.c', to_string=True) |
| 25 | +gdb.execute('start', to_string=True) |
| 26 | +depth_string = 4 * ' ' |
| 27 | +thread = gdb.inferiors()[0].threads()[0] |
| 28 | +# Function addresses that have already been disassembled. |
| 29 | +disassembled_functions = set() |
| 30 | +# Function addresses whose path does not match the filter regex. |
| 31 | +function_blacklist = set() |
| 32 | +while True: |
| 33 | + path_filter_re_matches = path_filter_re.search(source_path) |
| 34 | + frame = gdb.selected_frame() |
| 35 | + |
| 36 | + if not path_filter_re_matches: |
| 37 | + pass |
| 38 | + # TODO |
| 39 | + |
| 40 | + stack_depth = 0 |
| 41 | + f = frame |
| 42 | + while f: |
| 43 | + stack_depth += 1 |
| 44 | + f = f.older() |
| 45 | + |
| 46 | + # Not present for files without debug symbols. |
| 47 | + source_path = '???' |
| 48 | + symtab = frame.find_sal().symtab |
| 49 | + if symtab: |
| 50 | + source_path = symtab.filename |
| 51 | + |
| 52 | + # Not present for files without debug symbols. |
| 53 | + args = '???' |
| 54 | + block = None |
| 55 | + try: |
| 56 | + block = frame.block() |
| 57 | + except: |
| 58 | + pass |
| 59 | + if block: |
| 60 | + args = '' |
| 61 | + for symbol in block: |
| 62 | + if symbol.is_argument: |
| 63 | + args += '{} = {}, '.format(symbol.name, symbol.value(frame)) |
| 64 | + |
| 65 | + if path_filter_re_matches: |
| 66 | + # Put a breakpoint on the address of every funtion called from this function. |
| 67 | + # Only do that the first time we enter a function (TODO implement.) |
| 68 | + start = block.start |
| 69 | + if not start in disassembled_functions: |
| 70 | + disassembled_functions.add(start) |
| 71 | + end = block.end |
| 72 | + arch = frame.architecture() |
| 73 | + pc = gdb.selected_frame().pc() |
| 74 | + instructions = arch.disassemble(start, end - 1) |
| 75 | + for instruction in instructions: |
| 76 | + # This is UGLY. I wish there was a disassembly Python interface to GDB, |
| 77 | + # like https://github.com/aquynh/capstone which allows to extract |
| 78 | + # the opcode without parsing. |
| 79 | + instruction_parts = instruction['asm'].split() |
| 80 | + opcode = instruction_parts[0] |
| 81 | + if opcode == 'callq': |
| 82 | + # fails for instructions that start with *, basically rip (could be resolved to an address statically) |
| 83 | + # or rax (must be broken upon every time since we don't know where it will jump to). |
| 84 | + try: |
| 85 | + target_address = int(instruction_parts[1][2:], 16) |
| 86 | + except ValueError: |
| 87 | + target_address = None |
| 88 | + if not (target_address and target_address in function_blacklist): |
| 89 | + gdb.Breakpoint('*{}'.format(instruction['addr']), internal=True) |
| 90 | + |
| 91 | + print('{}{} : {} : {}'.format( |
| 92 | + stack_depth * depth_string, |
| 93 | + source_path, |
| 94 | + frame.name(), |
| 95 | + args |
| 96 | + )) |
| 97 | + |
| 98 | + # We are at the call instruction. |
| 99 | + gdb.execute('continue', to_string=True) |
| 100 | + if thread.is_valid(): |
| 101 | + # We are at the first instruction of the called function. |
| 102 | + gdb.execute('stepi', to_string=True) |
| 103 | + else: |
| 104 | + break |
0 commit comments