From d334a0db538db1a82db62fd5f74a169ca9e21f23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?= =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= Date: Sat, 13 Jul 2019 00:00:00 +0000 Subject: [PATCH] gem5: fix --gem5-readfile Was completely broken due to confusion between --gem5-readfile and a common variable with the same name which referred to the file path... OMG. Conclusion: no one has ever used this tutorial! Improve ./gem5.sh documentation. Also fix ./gem5-bench-dhrystone. --- README.adoc | 87 +++++++++++++++++++++++++++++++++++--------- common.py | 2 +- gem5-bench-dhrystone | 6 +-- run | 6 +-- 4 files changed, 77 insertions(+), 24 deletions(-) diff --git a/README.adoc b/README.adoc index 08c9dd4..8a7e770 100644 --- a/README.adoc +++ b/README.adoc @@ -4541,7 +4541,8 @@ Outcome: <> [[image-x11]] .X11 Buildroot graphical user interface screenshot -image::x11.png[image] +[link=x11.png] +image::x11.png[] We don't build X11 by default because it takes a considerable amount of time (about 20%), and is not expected to be used by most users: you need to pass the `-x` flag to enable it. @@ -9941,13 +9942,65 @@ OK, this is why we used gem5 in the first place, performance measurements! Let's see how many cycles https://en.wikipedia.org/wiki/Dhrystone[Dhrystone], which Buildroot provides, takes for a few different input parameters. -First build Dhrystone into the root filesystem: +We will do that for various input parameters on full system by taking a checkpoint after the boot finishes a fast atomic CPU boot, and then we will restore in a more detailed mode and run the benchmark: .... ./build-buildroot --config 'BR2_PACKAGE_DHRYSTONE=y' +# Boot fast, take checkpoint, and exit. +./run --arch aarch64 --emulator gem5 --eval-after './gem5.sh' + +# Restore the checkpoint after boot, and benchmark with input 1000. +./run \ + --arch aarch64 \ + --emulator gem5 \ + --eval-after './gem5.sh' \ + --gem5-readfile 'm5 resetstats;dhrystone 1000;m5 dumpstats' \ + --gem5-restore 1 \ + -- \ + --cpu-type=HPI \ + --restore-with-cpu=HPI \ + --caches \ + --l2cache \ + --l1d_size=64kB \ + --l1i_size=64kB \ + --l2_size=256kB \ +; +# Get the value for number of cycles. +# head because there are two lines: our dumpstats and the +# automatic dumpstats at the end which we don't care about. +./gem5-stat --arch aarch64 | head -n 1 + +# Now for input 10000. +./run \ + --arch aarch64 \ + --emulator gem5 \ + --eval-after './gem5.sh' \ + --gem5-readfile 'm5 resetstats;dhrystone 10000;m5 dumpstats' \ + --gem5-restore 1 \ + -- \ + --cpu-type=HPI \ + --restore-with-cpu=HPI \ + --caches \ + --l2cache \ + --l1d_size=64kB \ + --l1i_size=64kB \ + --l2_size=256kB \ +; +./gem5-stat --arch aarch64 | head -n 1 .... -Then, a flexible setup is demonstrated at: +If you ever need a shell to quickly inspect the system state after boot, you can just use: + +.... +./run \ + --arch aarch64 \ + --emulator gem5 \ + --eval-after './gem5.sh' \ + --gem5-readfile 'sh' \ + --gem5-restore 1 \ +.... + +This procedure is further automated and DRYed up at: .... ./gem5-bench-dhrystone @@ -9956,28 +10009,26 @@ cat out/gem5-bench-dhrystone.txt Source: link:gem5-bench-dhrystone[] -Sample output: +Output at 2438410c25e200d9766c8c65773ee7469b599e4a + 1: .... n cycles -1000 12898577 -10000 23441629 -100000 128428617 +1000 13665219 +10000 20559002 +100000 85977065 .... so as expected, the Dhrystone run with a larger input parameter `100000` took more cycles than the ones with smaller input parameters. The `gem5-stats` commands output the approximate number of CPU cycles it took Dhrystone to run. -Another interesting example can be found at: link:gem5-bench-cache[]. - A more naive and simpler to understand approach would be a direct: .... ./run --arch aarch64 --emulator gem5 --eval 'm5 checkpoint;m5 resetstats;dhrystone 10000;m5 exit' .... -but the problem is that this method does not allow to easily run a different script without running the boot again, see: <>. +but the problem is that this method does not allow to easily run a different script without running the boot again. The `./gem5.sh` script works around that by using <> as explained further at: <>. Now you can play a fun little game with your friends: @@ -10713,27 +10764,29 @@ So we can do it like: .... # Boot, checkpoint and exit. -printf 'echo "setup run";m5 exit' > "$(./getvar gem5_readfile)" +printf 'echo "setup run";m5 exit' > "$(./getvar gem5_readfile_file)" ./run --emulator gem5 --eval 'm5 checkpoint;m5 readfile > a.sh;sh a.sh' # Restore and run the first benchmark. -printf 'echo "first benchmark";m5 exit' > "$(./getvar gem5_readfile)" +printf 'echo "first benchmark";m5 exit' > "$(./getvar gem5_readfile_file)" ./run --emulator gem5 --gem5-restore 1 # Restore and run the second benchmark. -printf 'echo "second benchmark";m5 exit' > "$(./getvar gem5_readfile)" +printf 'echo "second benchmark";m5 exit' > "$(./getvar gem5_readfile_file)" ./run --emulator gem5 --gem5-restore 1 # If something weird happened, create an interactive shell to examine the system. -printf 'sh' > "$(./getvar gem5_readfile)" +printf 'sh' > "$(./getvar gem5_readfile_file)" ./run --emulator gem5 --gem5-restore 1 .... -Since this is such a common setup, we provide some helpers for it as described at <>: +Since this is such a common setup, we provide the following helpers for this operation: * link:rootfs_overlay/lkmc/gem5.sh[]. This script is analogous to gem5's in-tree link:https://github.com/gem5/gem5/blob/2b4b94d0556c2d03172ebff63f7fc502c3c26ff8/configs/boot/hack_back_ckpt.rcS[hack_back_ckpt.rcS], but with less noise. * `./run --gem5-readfile` is a convenient way to set the `m5 readfile` +Their usage us exemplified at <>. + Other loophole possibilities include: * <<9p>> @@ -10913,7 +10966,7 @@ https://stackoverflow.com/questions/49516399/how-to-use-m5-readfile-and-m5-execf Host: .... -date > "$(./getvar gem5_readfile)" +date > "$(./getvar gem5_readfile_file)" .... Guest: @@ -10952,7 +11005,7 @@ Host: .... printf '#!/bin/sh echo asdf -' > "$(./getvar gem5_readfile)" +' > "$(./getvar gem5_readfile_file)" .... Guest: diff --git a/common.py b/common.py index aaefa70..1c1ff23 100644 --- a/common.py +++ b/common.py @@ -811,7 +811,7 @@ Incompatible archs are skipped. env['stats_file'] = join(env['m5out_dir'], 'stats.txt') env['gem5_trace_txt_file'] = join(env['m5out_dir'], 'trace.txt') env['gem5_guest_terminal_file'] = join(env['m5out_dir'], 'system.terminal') - env['gem5_readfile'] = join(env['gem5_run_dir'], 'readfile') + env['gem5_readfile_file'] = join(env['gem5_run_dir'], 'readfile') env['gem5_termout_file'] = join(env['gem5_run_dir'], 'termout.txt') env['qemu_run_dir'] = join(env['run_dir_base'], 'qemu', env['arch'], str(env['run_id'])) env['qemu_termout_file'] = join(env['qemu_run_dir'], 'termout.txt') diff --git a/gem5-bench-dhrystone b/gem5-bench-dhrystone index 3cb956c..d2cf872 100755 --- a/gem5-bench-dhrystone +++ b/gem5-bench-dhrystone @@ -6,7 +6,7 @@ set -eu root_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)" outfile="${root_dir}/out/gem5-bench-dhrystone.txt" arch=aarch64 -cmd="./run --arch '$arch' --emulator gem5 --eval-busybox '/gem5.sh'" +cmd="./run --arch '$arch' --emulator gem5 --eval-after './gem5.sh'" # These cache sizes roughly match the ARM Cortex A75 # https://en.wikipedia.org/wiki/ARM_Cortex-A75 @@ -20,7 +20,7 @@ printf 'n cycles\n' > "$outfile" for n in 1000 10000 100000; do # Restore the most recent checkpoint taken with the more detailed and slower HPI CPU, # and run the benchmark with different parameters. We skip the boot completely, saving time! - eval "${cmd} --gem5-readfile 'dhrystone ${n}' ${restore}" &>/dev/null + eval "${cmd} --gem5-readfile 'm5 resetstats;dhrystone ${n};m5 dumpstats' ${restore}" &>/dev/null printf "${n} " >> "$outfile" - ./gem5-stat -a "$arch" >> "$outfile" + ./gem5-stat --arch "$arch" | head -n 1 >> "$outfile" done diff --git a/run b/run index 87c36fb..26e5eb8 100755 --- a/run +++ b/run @@ -432,8 +432,8 @@ Extra options to append at the end of the emulator command line. if not os.path.exists(self.env['linux_image']): raise_image_not_found(self.env['image']) self.sh.run_cmd([os.path.join(self.env['extract_vmlinux'], self.env['linux_image'])]) - os.makedirs(os.path.dirname(self.env['gem5_readfile']), exist_ok=True) - self.sh.write_string_to_file(self.env['gem5_readfile'], self.env['gem5_readfile']) + os.makedirs(os.path.dirname(self.env['gem5_readfile_file']), exist_ok=True) + self.sh.write_string_to_file(self.env['gem5_readfile_file'], self.env['gem5_readfile']) memory = '{}B'.format(self.env['memory']) gem5_exe_args = self.sh.shlex_split(self.env['gem5_exe_args']) if do_trace: @@ -476,7 +476,7 @@ Extra options to append at the end of the emulator command line. '--kernel', self.env['image'], LF, '--mem-size', memory, LF, '--num-cpus', str(self.env['cpus']), LF, - '--script', self.env['gem5_readfile'], LF, + '--script', self.env['gem5_readfile_file'], LF, ]) if self.env['arch'] == 'x86_64': if self.env['kvm']: