From fe9c31f7373b998e71dd5f39ead87dbf1c392dc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?= =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= Date: Sun, 12 May 2019 00:00:00 +0000 Subject: [PATCH] fix run-toolchain, qemu-monitor, trace-boot, trace2line, bisect-linux-boot-gem5. Fixes part of #63 I'm sad no one reported qemu-monitor break, that one is kind of important. count.out arguments broke it as an init program, since the kernel adds trash parameters to every init. Is anyone using this repo, I wonder? Keep pushing, keep pushing. One day it gets good enough, and the whole world will see. --- README.adoc | 46 +++++++++++++++++--------- bisect-linux-boot-gem5 | 45 +++++++------------------ build-docker | 1 - cli_function.py | 7 ++-- common.py | 9 ++++- qemu-monitor | 65 +++++++++++++++++++----------------- run | 6 ++-- run-gdb-user | 60 ++++++++++++++++++--------------- run-toolchain | 75 +++++++++++++++++++++++++----------------- shell_helpers.py | 24 +++++++------- trace-boot | 6 ++-- trace2line | 43 +++++++++++++++--------- userland/posix/count.c | 11 ++----- 13 files changed, 214 insertions(+), 184 deletions(-) diff --git a/README.adoc b/README.adoc index 4587ae8..55d5436 100644 --- a/README.adoc +++ b/README.adoc @@ -1891,13 +1891,13 @@ For executables from the link:userland/[] directory such as link:userland/posix/ * Shell 2: + .... -./run-gdb-user count main +./run-gdb-user userland/posix/count.c main .... + Alternatively, we could also pass the full path to the executable: + .... -./run-gdb-user "$(./getvar userland_build_dir)/sleep_forever.out" main +./run-gdb-user "$(./getvar userland_build_dir)/posix/count.out" main .... + Path resolution is analogous to <>. @@ -1952,7 +1952,7 @@ Non-init process: * Shell 2: + .... -./run-gdb-user linux/myinsmod main +./run-gdb-user userland/linux/myinsmod.c main .... * Shell 1 after the boot finishes: + @@ -1982,7 +1982,7 @@ We have also double checked the address with: .... ./run-toolchain --arch arm readelf -- \ - -s "$(./getvar --arch arm kernel_modules_build_subdir)/fops.ko" | \ + -s "$(./getvar --arch arm userland_build_dir)/linux/myinsmod.out" | \ grep main .... @@ -2520,16 +2520,16 @@ Source: link:rootfs_overlay/lkmc/gdbserver.sh[]. And on host: .... -./run-gdbserver linux/myinsmod +./run-gdbserver userland/linux/myinsmod.c .... -or alternatively with the full path: +or alternatively with the path to the executable itself: .... ./run-gdbserver "$(./getvar userland_build_dir)/linux/myinsmod.out" .... -https://reverseengineering.stackexchange.com/questions/8829/cross-debugging-for-arm-mips-elf-with-qemu-toolchain/16214#16214 +Bibliography: https://reverseengineering.stackexchange.com/questions/8829/cross-debugging-for-arm-mips-elf-with-qemu-toolchain/16214#16214 === gdbserver BusyBox @@ -3832,7 +3832,7 @@ gem5 user mode: make \ -B \ -C "$(./getvar --arch arm buildroot_build_build_dir)/dhrystone-2" \ - CC="$(./run-toolchain --arch arm --dry gcc)" \ + CC="$(./run-toolchain --arch arm --print-tool gcc)" \ CFLAGS=-static \ ; time \ @@ -5733,7 +5733,7 @@ vermagic: 4.17.0 SMP mod_unload modversions Module information is stored in a special `.modinfo` section of the ELF file: .... -./run-toolchain readelf -- -SW "$(./getvar target_dir)/module_info.ko" +./run-toolchain readelf -- -SW "$(./getvar kernel_modules_build_subdir)/module_info.ko" .... contains: @@ -5745,7 +5745,7 @@ contains: and: .... -./run-toolchain readelf -- -x .modinfo "$(./getvar buildroot_build_build_dir)/module_info.ko" +./run-toolchain readelf -- -x .modinfo "$(./getvar kernel_modules_build_subdir)/module_info.ko" .... gives: @@ -9169,9 +9169,9 @@ http://gedare-csphd.blogspot.co.uk/2013/02/adding-simple-io-device-to-gem5.html === QEMU monitor -The QEMU monitor is a terminal that allows you to send text commands to the QEMU VM: https://en.wikibooks.org/wiki/QEMU/Monitor +The QEMU monitor is a magic terminal that allows you to send text commands to the QEMU VM itself: https://en.wikibooks.org/wiki/QEMU/Monitor -On another terminal, run: +While QEMU is running, on another terminal, run: .... ./qemu-monitor @@ -9193,7 +9193,7 @@ Source: link:qemu-monitor[] `qemu-monitor` uses the `-monitor` QEMU command line option, which makes the monitor listen from a socket. -Alternatively, from text mode: +Alternatively, we can also enter the QEMU monitor from inside `-nographics` <> with: .... Ctrl-A C @@ -9208,7 +9208,7 @@ Ctrl-A C * http://stackoverflow.com/questions/14165158/how-to-switch-to-qemu-monitor-console-when-running-with-curses * https://superuser.com/questions/488263/how-to-switch-to-the-qemu-control-panel-with-nographics -And in graphic mode from the GUI: +When in graphic mode, we can do it from the GUI: .... Ctrl-Alt ? @@ -9216,6 +9216,20 @@ Ctrl-Alt ? where `?` is a digit `1`, or `2`, or, `3`, etc. depending on what else is available on the GUI: serial, parallel and frame buffer. +Finally, we can also access QEMU monitor commands directly from <> with the `monitor` command: + +.... +./run-gdb +.... + +then inside that shell: + +.... +monitor info qtree +.... + +This way you can use both QEMU monitor and GDB commands to inspect the guest from inside a single shell! Pretty awesome. + In general, `./qemu-monitor` is the best option, as it: * works on both modes @@ -9439,6 +9453,8 @@ We can further use Binutils' `addr2line` to get the line that corresponds to eac less "$(./getvar --arch x86_64 run_dir)/trace-lines.txt" .... +The last commands takes several seconds. + The format is as follows: .... @@ -11350,7 +11366,7 @@ Note that dots cannot be used as in `1.5G`, so just use Megs as in `1500M` inste Unfortunately, TODO we don't have a perfect way to find the right value for `BR2_TARGET_ROOTFS_EXT2_SIZE`. One good heuristic is: .... -du -hsx "$(./getvar --arch arm target_dir)" +du -hsx "$(./getvar --arch arm buildroot_target_dir)" .... Some promising ways to overcome this problem include: diff --git a/bisect-linux-boot-gem5 b/bisect-linux-boot-gem5 index f438c62..55fe176 100755 --- a/bisect-linux-boot-gem5 +++ b/bisect-linux-boot-gem5 @@ -1,34 +1,11 @@ -#!/usr/bin/env python3 - -import os -import shutil -import sys - -import common - -build_linux = common.import_path_relative_root('build-linux') -run = common.import_path_relative_root('run') - -parser = self.get_argparse( - argparse_args={ - 'description': '''Bisect the Linux kernel on gem5 boots. - -More information at: https://github.com/cirosantilli/linux-kernel-module-cheat#bisection -'''}, - default_args={ - 'emulators': ['gem5'], - 'linux_build_id': 'bisect', - }, -) -args = self.setup(parser) -# We need a clean rebuild because rebuilds at different revisions: -# - may fail -# - may not actually rebuild all files, e.g. on header changes -self.rmrf(kwargs['linux_build_dir']) -build_linux.LinuxComponent().do_build(args) -status = run.main(args, { - 'eval': 'm5 exit', -}) -if status == 125 or status == 127: - status = 1 -sys.exit(status) +#!/usr/bin/env bash +set -eu +./build-linux --clean "$@" +./build-linux "$@" +set +e +./run --eval 'm5 exit' "$@" || status=$? +# https://stackoverflow.com/questions/4713088/how-to-use-git-bisect/22592593#22592593 +if [ "$status" -eq 125 ] || [ "$status" -gt 127 ]; then + status=1 +fi +exit "$status" diff --git a/build-docker b/build-docker index 6c2ef2c..02a31cd 100755 --- a/build-docker +++ b/build-docker @@ -7,7 +7,6 @@ import tarfile import common from shell_helpers import LF - class DockerComponent(self.Component): def get_argparse_args(self): return { diff --git a/cli_function.py b/cli_function.py index e7747f8..ddddbf5 100755 --- a/cli_function.py +++ b/cli_function.py @@ -114,8 +114,7 @@ class _Argument: class CliFunction: ''' - Represent a function that can be called either from Python code, or - from the command line. + A function that can be called either from Python code, or from the command line. Features: @@ -135,6 +134,10 @@ class CliFunction: * that decorator API is insane * CLI + Python for single functions was wontfixed: https://github.com/pallets/click/issues/40 + + + Oh, and I commented on that issue pointing to this alternative and they deleted my comment: + https://github.com/pallets/click/issues/40#event-2088718624 Lol. It could have been useful + for other Googlers and as an implementation reference. ''' def __call__(self, **kwargs): ''' diff --git a/common.py b/common.py index c1a355c..3941605 100644 --- a/common.py +++ b/common.py @@ -170,7 +170,14 @@ class LkmcCliFunction(cli_function.CliFunction): Common functionality shared across our CLI functions: * command timing - * some common flags, e.g.: --arch, --dry-run, --quiet, --verbose + * a lot some common flags, e.g.: --arch, --dry-run, --quiet, --verbose + * a lot of helpers that depend on self.env + + + self.env contains the command line arguments + a ton of values derived from those. + + + It would be beautiful to do this evaluation in a lazy way, e.g. with functions + + cache decorators: + https://stackoverflow.com/questions/815110/is-there-a-decorator-to-simply-cache-function-return-values ''' def __init__( self, diff --git a/qemu-monitor b/qemu-monitor index 50e8f51..7da27b7 100755 --- a/qemu-monitor +++ b/qemu-monitor @@ -5,41 +5,44 @@ import sys import telnetlib import common -from shell_helpers import LF -prompt = b'\n(qemu) ' - -parser = self.get_argparse({ - 'description': '''\ +class Main(common.LkmcCliFunction): + def __init__(self): + super().__init__( + description='''\ Run a command on the QEMU monitor of a running QEMU instance If the stdin is a terminal, open an interact shell. Otherwise, run commands from stdin and quit. -''' -}) -parser.add_argument( - 'command', - help='If given, run this command and quit', - nargs='*', -) -args = self.setup(parser) +''', + ) + self.add_argument( + 'command', + help='If given, run this command and quit', + nargs='*', + ) -def write_and_read(tn, cmd, prompt): - tn.write(cmd.encode('utf-8')) - return '\n'.join(tn.read_until(prompt).decode('utf-8').splitlines()[1:])[:-len(prompt)] + def timed_main(self): + def write_and_read(tn, cmd, prompt): + tn.write(cmd.encode('utf-8')) + return '\n'.join(tn.read_until(prompt).decode('utf-8').splitlines()[1:])[:-len(prompt)] -with telnetlib.Telnet('localhost', kwargs['qemu_monitor_port']) as tn: - # Couldn't disable server echo, so just removing the write for now. - # https://stackoverflow.com/questions/12421799/how-to-disable-telnet-echo-in-python-telnetlib - # sock = tn.get_socket() - # sock.send(telnetlib.IAC + telnetlib.WILL + telnetlib.ECHO) - if os.isatty(sys.stdin.fileno()): - if kwargs['command'] == []: - print(tn.read_until(prompt).decode('utf-8'), end='') - tn.interact() - else: - tn.read_until(prompt) - print(write_and_read(tn, ' '.join(kwargs['command']) + '\n', prompt)) - else: - tn.read_until(prompt) - print(write_and_read(tn, sys.stdin.read() + '\n', prompt)) + with telnetlib.Telnet('localhost', self.env['qemu_monitor_port']) as tn: + prompt = b'\n(qemu) ' + # Couldn't disable server echo, so just removing the write for now. + # https://stackoverflow.com/questions/12421799/how-to-disable-telnet-echo-in-python-telnetlib + # sock = tn.get_socket() + # sock.send(telnetlib.IAC + telnetlib.WILL + telnetlib.ECHO) + if os.isatty(sys.stdin.fileno()): + if self.env['command'] == []: + print(tn.read_until(prompt).decode('utf-8'), end='') + tn.interact() + else: + tn.read_until(prompt) + print(write_and_read(tn, ' '.join(self.env['command']) + '\n', prompt)) + else: + tn.read_until(prompt) + print(write_and_read(tn, sys.stdin.read() + '\n', prompt)) + +if __name__ == '__main__': + Main().cli() diff --git a/run b/run index 2447ecd..635c7a9 100755 --- a/run +++ b/run @@ -56,7 +56,7 @@ which is what you usually want. '-E', '--eval', help='''\ -Replace the normal init with a minimal init that just evals the given string. +Replace the normal init with a minimal init that just evals the given sh string. See: https://github.com/cirosantilli/linux-kernel-module-cheat#replace-init chdir into lkmc_home before running the command: https://github.com/cirosantilli/linux-kernel-module-cheat#lkmc_home @@ -66,8 +66,8 @@ https://github.com/cirosantilli/linux-kernel-module-cheat#lkmc_home '-F', '--eval-after', help='''\ -Pass a base64 encoded command line parameter that gets evalled at the end of -the normal init. +Similar to --eval, but the string gets evaled at the last init script, +after the normal init finished. See: https://github.com/cirosantilli/linux-kernel-module-cheat#init-busybox ''' ) diff --git a/run-gdb-user b/run-gdb-user index a7f71c6..d697f80 100755 --- a/run-gdb-user +++ b/run-gdb-user @@ -1,36 +1,42 @@ #!/usr/bin/env python3 import os -import sys import common -rungdb = common.import_path_relative_root('run-gdb') -parser = self.get_argparse(argparse_args={ - 'description': '''GDB step debug guest userland processes without gdbserver. +class Main(common.LkmcCliFunction): + def __init__(self): + super().__init__( + description='''GDB step debug guest userland processes without gdbserver. More information at: https://github.com/cirosantilli/linux-kernel-module-cheat#gdb-step-debug-userland-processes ''' -}) -parser.add_argument( - 'executable', - help='Path to the executable to be debugged relative to the Buildroot build directory.' -) -parser.add_argument( - 'break_at', - default=None, - help='Break at this point, e.g. main.', - nargs='?' -) -args = self.setup(parser) -executable = self.resolve_userland_executable(kwargs['executable']) -addr = self.get_elf_entry(os.path.join(kwargs['buildroot_build_build_dir'], executable)) -extra_args = {} -extra_args['before'] = '-ex \"add-symbol-file {} {}\"'.format(executable, hex(addr)) -# Or else lx-symbols throws for arm: -# gdb.MemoryError: Cannot access memory at address 0xbf0040cc -# TODO understand better. -# Also, lx-symbols overrides the add-symbol-file commands. -extra_args['no_lxsymbols'] = True -extra_args['break_at'] = kwargs['break_at'] -sys.exit(rungdb.main(args, extra_args)) + ) + self.add_argument( + 'executable', + help='Path to the executable to be debugged relative to the Buildroot build directory.' + ) + self.add_argument( + 'break_at', + default=None, + help='Break at this point, e.g. main.', + nargs='?' + ) + + def timed_main(self): + raise Exception("This is known to be broken, but fixing shouldn't be too hard! Keyword: get_argparse. See also: https://github.com/cirosantilli/linux-kernel-module-cheat/issues/63") + executable = self.env['image'] + addr = self.get_elf_entry(os.path.join(self.env['buildroot_build_build_dir'], executable)) + args = {} + args['before'] = '-ex \"add-symbol-file {} {}\"'.format(executable, hex(addr)) + # Or else lx-symbols throws for arm: + # gdb.MemoryError: Cannot access memory at address 0xbf0040cc + # TODO understand better. + # Also, lx-symbols overrides the add-symbol-file commands. + args['no_lxsymbols'] = True + args['break_at'] = self.env['break_at'] + rungdb = common.import_path_main('run-gdb') + return rungdb(**args) + +if __name__ == '__main__': + Main().cli() diff --git a/run-toolchain b/run-toolchain index 3672979..259654a 100755 --- a/run-toolchain +++ b/run-toolchain @@ -1,13 +1,17 @@ #!/usr/bin/env python3 import os -import sys import common from shell_helpers import LF -parser = self.get_argparse(argparse_args={ - 'description': '''Run a Buildroot ToolChain tool like readelf or objdump. +class Main(common.LkmcCliFunction): + def __init__(self): + super().__init__( + defaults = { + 'show_time': False, + }, + description='''Run a Buildroot ToolChain tool like readelf or objdump. For example, to get some information about the arm vmlinux: @@ -20,31 +24,40 @@ Get the list of available tools with: .... ls "$(./getvar -a arm buildroot_host_bin_dir)" .... -''' -}) -parser.add_argument( - '--dry', - help='Just output the tool path to stdout but actually run it', -) -parser.add_argument('tool', help='Which tool to run.') -parser.add_argument( - 'extra_args', - default=[], - help='Extra arguments for the tool.', - metavar='extra-args', - nargs='*' -) -args = self.setup(parser) -if kwargs['baremetal'] is None: - image = kwargs['vmlinux'] -else: - image = kwargs['image'] -tool= self.get_toolchain_tool(kwargs['tool']) -if kwargs['dry']: - print(tool) -else: - sys.exit(self.sh.run_cmd( - [tool, LF] - + self.sh.add_newlines(kwargs['extra_args']), - cmd_file=os.path.join(kwargs['run_dir'], 'run-toolchain.sh'), - )) +''', + ) + self.add_argument( + '--print-tool', + default=False, + help=''' +Just output print tool path to stdout but don't actually run it. +Suitable for programmatic consumption by other shell programs. +''', + ) + self.add_argument('tool', help='Which tool to run.') + self.add_argument( + 'extra_args', + default=[], + help='Extra arguments for the tool.', + metavar='extra-args', + nargs='*' + ) + + def timed_main(self): + if self.env['baremetal'] is None: + image = self.env['vmlinux'] + else: + image = self.env['image'] + tool = self.get_toolchain_tool(self.env['tool']) + if self.env['print_tool']: + print(tool) + return 0 + else: + return self.sh.run_cmd( + [tool, LF] + + self.sh.add_newlines(self.env['extra_args']), + cmd_file=os.path.join(self.env['run_dir'], 'run-toolchain.sh'), + ) + +if __name__ == '__main__': + Main().cli() diff --git a/shell_helpers.py b/shell_helpers.py index dcbe968..40e84de 100644 --- a/shell_helpers.py +++ b/shell_helpers.py @@ -189,18 +189,18 @@ class ShellHelpers: os.unlink(path) def run_cmd( - self, - cmd, - cmd_file=None, - out_file=None, - show_stdout=True, - show_cmd=True, - extra_env=None, - extra_paths=None, - delete_env=None, - raise_on_failure=True, - **kwargs - ): + self, + cmd, + cmd_file=None, + out_file=None, + show_stdout=True, + show_cmd=True, + extra_env=None, + extra_paths=None, + delete_env=None, + raise_on_failure=True, + **kwargs + ): ''' Run a command. Write the command to stdout before running it. diff --git a/trace-boot b/trace-boot index 9d442d8..bea5688 100755 --- a/trace-boot +++ b/trace-boot @@ -17,14 +17,14 @@ More information at: https://github.com/cirosantilli/linux-kernel-module-cheat#t run = common.import_path_main('run') if self.env['emulator'] == 'gem5': args['trace'] = 'Exec,-ExecSymbol,-ExecMicro' - run.main(**args) + run(**args) elif self.env['emulator'] == 'qemu': run_args = args.copy() run_args['trace'] = 'exec_tb' run_args['quit_after_boot'] = True - run.main(**run_args) + run(**run_args) qemu_trace2txt = common.import_path_main('qemu-trace2txt') - qemu_trace2txt.main(**args) + qemu_trace2txt(**args) # Instruction count. # We could put this on a separate script, but it just adds more arch boilerplate to a new script. # So let's just leave it here for now since it did not add a significant processing time. diff --git a/trace2line b/trace2line index cd53d98..4a27a1e 100755 --- a/trace2line +++ b/trace2line @@ -8,27 +8,38 @@ now... ''' import os -import re -import subprocess -import sys import common from shell_helpers import LF -parser = self.get_argparse(argparse_args={ - 'description': 'Convert an execution trace containing PC values into the Linux kernel linex executed' -}) -args = self.setup(parser) -sys.exit(subprocess.Popen([ - os.path.join(kwargs['root_dir'], 'trace2line.sh'), - 'true' if kwargs['emulator'] == 'gem5' else 'false', - kwargs['trace_txt_file'], - self.get_toolchain_tool('addr2line'), - kwargs['vmlinux'], - kwargs['run_dir'], -]).wait()) +class Main(common.LkmcCliFunction): + def __init__(self): + super().__init__( + defaults = { + 'show_time': False, + }, + description='''\ +Convert an execution trace containing PC values into the Linux kernel lines executed. +''', + ) -# This was the full conversion attempt. + def timed_main(self): + self.sh.run_cmd([ + os.path.join(self.env['root_dir'], 'trace2line.sh'), LF, + 'true' if self.env['emulator'] == 'gem5' else 'false', LF, + self.env['trace_txt_file'], LF, + self.get_toolchain_tool('addr2line'), LF, + self.env['vmlinux'], LF, + self.env['run_dir'], LF, + ]) + +if __name__ == '__main__': + Main().cli() + +# This was the old full Python port attempt that was failing: + +# import subprocess +# import sys # if kwargs['emulator'] == 'gem5': # def get_pc(line): diff --git a/userland/posix/count.c b/userland/posix/count.c index d48e6ab..5aacc16 100644 --- a/userland/posix/count.c +++ b/userland/posix/count.c @@ -8,14 +8,9 @@ #include #include -int main(int argc, char **argv) { - unsigned long i = 0, max; - if (argc > 1) { - max = strtoul(argv[1], NULL, 10); - } else { - max = ULONG_MAX; - } - while (i < max) { +int main(void) { + unsigned long i = 0; + while (1) { printf("%lu\n", i); i++; sleep(1);