From e38a1dea9223bf4658384cbd93aaaae748d728de Mon Sep 17 00:00:00 2001 From: Ciro Santilli Date: Wed, 4 Apr 2018 10:09:17 +0100 Subject: [PATCH 1/4] Linux v4.16 --- README.adoc | 19 ++++++++++--------- linux | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/README.adoc b/README.adoc index 8aefe3b..b054900 100644 --- a/README.adoc +++ b/README.adoc @@ -9,7 +9,7 @@ :toclevels: 6 :toc-title: -Run one command, get a QEMU or gem5 Buildroot BusyBox virtual machine built from source with several minimal Linux kernel 4.15 module development example tutorials with GDB and KGDB step debugging and minimal educational hardware models. "Tested" in x86, ARM and MIPS guests, Ubuntu 17.10 host. +Run one command, get a QEMU or gem5 Buildroot BusyBox virtual machine built from source with several minimal Linux kernel 4.16 module development example tutorials with GDB and KGDB step debugging and minimal educational hardware models. "Tested" in x86, ARM and MIPS guests, Ubuntu 17.10 host. toc::[] @@ -1492,30 +1492,31 @@ git log | grep -E ' Linux [0-9]+\.' | head .... # Last point before out patches. -last_mainline_revision=v4.14 -next_mainline_revision=v4.15 +last_mainline_revision=v4.15 +next_mainline_revision=v4.16 cd linux -# Create a branch before the rebase. -git branch "lkmc-${last_mainline_revision}" +# Create a branch before the rebase in case things go wrong. +git checkout -b "lkmc-${last_mainline_revision}" git remote set-url origin git@github.com:cirosantilli/linux.git git push git remote add up git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git git fetch up git rebase --onto "$next_mainline_revision" "$last_mainline_revision" -./build -l -# Manually fix our kernel modules if necessary. cd .. +./build -lk +# Manually fix broken kernel modules if necessary. git branch "buildroot-2017.08-linux-${last_mainline_revision}" git add . +# And update the README to show off. git commit -m "Linux ${next_mainline_revision}" +# Test the heck out of it, especially kernel modules and GDB. +./run git push .... -and update the README! - During update all you kernel modules may break since the kernel API is not stable. They are usually trivial breaks of things moving around headers or to sub-structs. diff --git a/linux b/linux index 225d02d..4378981 160000 --- a/linux +++ b/linux @@ -1 +1 @@ -Subproject commit 225d02dc63dd537b3c84abcbdcad2d81b8ec7f03 +Subproject commit 43789817a25e242ce8eb974eb1c01df31321b001 From 32920fd25d6473934a89c48c56f4e12d03abde85 Mon Sep 17 00:00:00 2001 From: Ciro Santilli Date: Thu, 5 Apr 2018 08:52:30 +0100 Subject: [PATCH 2/4] Reorganize the benchmark section. Automate Linux kernel measures and move them into that new section. --- README.adoc | 298 ++++++++++++++++++------------ common | 11 +- eeval | 4 + gem5-bench-cache | 63 +++++++ gem5-stat | 25 +++ parsec-benchmark/parsec-benchmark | 2 +- 6 files changed, 277 insertions(+), 126 deletions(-) create mode 100755 eeval create mode 100755 gem5-bench-cache create mode 100755 gem5-stat diff --git a/README.adoc b/README.adoc index b054900..9f247cb 100644 --- a/README.adoc +++ b/README.adoc @@ -1682,11 +1682,32 @@ Results (boot not excluded): [options="header"] |=== |Commit |Arch |Simulator |Instruction count -|7228f75ac74c896417fb8c5ba3d375a14ed4d36b |arm |QEMU |680k -|7228f75ac74c896417fb8c5ba3d375a14ed4d36b |arm |gem5 AtomicSimpleCPU |160M -|7228f75ac74c896417fb8c5ba3d375a14ed4d36b |arm |gem5 HPI |155M -|7228f75ac74c896417fb8c5ba3d375a14ed4d36b |x86_64 |QEMU |3M -|7228f75ac74c896417fb8c5ba3d375a14ed4d36b |x86_64 |gem5 AtomicSimpleCPU |528M + +|7228f75ac74c896417fb8c5ba3d375a14ed4d36b +|arm +|QEMU +|680k + +|7228f75ac74c896417fb8c5ba3d375a14ed4d36b +|arm +|gem5 AtomicSimpleCPU +|160M + +|7228f75ac74c896417fb8c5ba3d375a14ed4d36b +|arm +|gem5 HPI +|155M + +|7228f75ac74c896417fb8c5ba3d375a14ed4d36b +|x86_64 +|QEMU +|3M + +|7228f75ac74c896417fb8c5ba3d375a14ed4d36b +|x86_64 +|gem5 AtomicSimpleCPU +|528M + |=== QEMU: @@ -2309,10 +2330,10 @@ Using text mode is another workaround if you don't need GUI features. gem5 is a system simulator, much <>: http://gem5.org/ -For the most part, just add the `-g` option to the QEMU commands and everything should magically work: +For the most part, just add the `-g` option to all commands and everything should magically work: .... -./configure -g && ./build -a arm -g && ./run -a arm -g +./configure -g && ./build -a aarch64 -g && ./run -a aarch64 -g .... On another shell: @@ -2366,95 +2387,6 @@ This suits chip makers that want to distribute forks with secret IP to their cus + On the other hand, the chip makers tend to upstream less, and the project becomes more crappy in average :-) -==== gem5 vs QEMU performance - -We have benchmarked a Linux kernel boot with the commands: - -.... -# Try to manually hit Ctrl + C as soon as system shutdown message appears. -time ./run -a arm -e 'init=/poweroff.out' -time ./run -a arm -E 'm5 exit' -g -time ./run -a arm -E 'm5 exit' -g -- --caches --cpu-type=HPI -time ./run -a x86_64 -e 'init=/poweroff.out' -time ./run -a x86_64 -e 'init=/poweroff.out' -- -enable-kvm -time ./run -a x86_64 -e 'init=/poweroff.out' -g -.... - -and the results were: - -[options="header"] -|=== -|Arch |Emulator |Subtype |Time |N times slower than QEMU |Instruction count |Commit - -|arm -|QEMU -| -|6 seconds -|1 -| -|da79d6c6cde0fbe5473ce868c9be4771160a003b - -|arm -|gem5 -|AtomicSimpleCPU -|1 minute 40 seconds -|17 -| -|da79d6c6cde0fbe5473ce868c9be4771160a003b - -|arm -|gem5 -|HPI -|10 minutes -|100 -| -|da79d6c6cde0fbe5473ce868c9be4771160a003b - -|aarch64 -|QEMU -| -|1.3 seconds -|1 -|170k -|b6e8a7d1d1cb8a1d10d57aa92ae66cec9bfb2d01 - -|aarch64 -|gem5 -|AtomicSimpleCPU -|1 minute -|43 -|110M -|b6e8a7d1d1cb8a1d10d57aa92ae66cec9bfb2d01 - -|x86_64 -|QEMU -| -|3.8 seconds -|1 -|1.8M -|4cb8a543eeaf7322d2e4493f689735cb5bfd48df - -|x86_64 -|QEMU -|KVM -|1.3 seconds -|0.3 -| -|4cb8a543eeaf7322d2e4493f689735cb5bfd48df - -|x86_64 -|gem5 -|AtomicSimpleCPU -|6 minutes 30 seconds -|102 -|630M -|4cb8a543eeaf7322d2e4493f689735cb5bfd48df -|=== - -tested on the <>. - -One methodology problem is that gem5 and QEMU were run with different kernel configs, due to <>. This could have been improved if we normalized by instruction counts, but we didn't think of that previously. - === gem5 run benchmark OK, this is why we used gem5 in the first place, performance measurements! @@ -2464,25 +2396,29 @@ Let's benchmark https://en.wikipedia.org/wiki/Dhrystone[Dhrystone] which Buildro The most flexible way is to do: .... +arch=aarch64 + # Generate a checkpoint after Linux boots. # The boot takes a while, be patient young Padawan. printf 'm5 exit' >readfile.gitignore -./run -a aarch64 -g -E 'm5 checkpoint;m5 readfile > a.sh;sh a.sh' +./run -a "$arch" -g -E 'm5 checkpoint;m5 readfile > a.sh;sh a.sh' # Restore the checkpoint, and run the benchmark with parameter 1.000. # We skip the boot completely, saving time! printf 'm5 resetstats;dhrystone 1000;m5 exit' >readfile.gitignore -./run -a aarch64 -g -- -r 1 -./gem5-ncycles -a aarch64 +./run -a "$arch" -g -- -r 1 +./gem5-stat -a "$arch" # Now with another parameter 10.000. printf 'm5 resetstats;dhrystone 10000;m5 exit' >readfile.gitignore -./run -a aarch64 -g -- -r 1 -./gem5-ncycles -a aarch64 +./run -a "$arch" -g -- -r 1 +./gem5-stat -a "$arch" .... These commands output the approximate number of CPU cycles it took Dhrystone to run. +For more serious tests, you will likely want to automate logging the commands ran and results to files, a good example is: link:gem5-bench-cache[]. + A more naive and simpler to understand approach would be a direct: .... @@ -2580,6 +2516,7 @@ But keep in mind that it only affects benchmark performance of the most detailed |ARM |`HPI` |yes + |=== {empty}*: couldn't test because of: @@ -3705,8 +3642,12 @@ I put an `echo f` in `check_bin_arch`, and it just loops forever, does not stop In this section document how fast the build and clone are, and how to investigate them. +This is to give an idea to people of what they should expect. + Send a pull request if you try it out on something significantly different. +Ideally, we should setup an automated build server that benchmarks those things continuously for us. + === Find which packages are making the build slow .... @@ -3737,27 +3678,135 @@ We do our best to reduce the instruction and feature count to the bare minimum n + One possibility we could play with is to build loadable modules instead of built-in modules to reduce runtime, but make it easier to get started with the modules. -=== Benchmark machines +=== Benchmark this repo benchmarks -The build times are calculated after doing link:https://buildroot.org/downloads/manual/manual.html#_offline_builds[`make source`], which downloads the sources, and basically benchmarks the Internet. +==== Benchmark Linux kernel boot -https://stackoverflow.com/questions/47997565/gem5-system-requirements-for-decent-performance/48941793#48941793 +.... +./bench-boot +.... -==== P51 +Output: -Lenovo ThinkPad link:https://www3.lenovo.com/gb/en/laptops/thinkpad/p-series/P51/p/22TP2WPWP51[P51 laptop]: +.... +cmd ./run -a arm -E '/poweroff.out' +time 6.77 +cmd ./run -a arm -E 'm5 exit' -g +time 146.96 +insts 230209017 +cmd ./run -a arm -E 'm5 exit' -g -- --caches --cpu-type=HPI +time > 3600 +insts > 373227765 +cmd ./run -a aarch64 -E '/poweroff.out' +time 1.28 +cmd ./run -a aarch64 -E 'm5 exit' -g +time 57.77 +insts 111512915 +cmd ./run -a aarch64 -E 'm5 exit' -g -- --caches --cpu-type=HPI +time 360.90 +insts 111655309 +cmd ./run -a x86_64 -E '/poweroff.out' +time 3.50 +cmd ./run -a x86_64 -E '/poweroff.out' -- -enable-kvm +time 1.30 +cmd ./run -a x86_64 -E 'm5 exit' -g +time 376.03 +insts 634548425 +.... -* 2500 USD in 2018 (high end) -* Intel Core i7-7820HQ Processor (8MB Cache, up to 3.90GHz) (4 cores 8 threads) -* 32GB(16+16) DDR4 2400MHz SODIMM -* 512GB SSD PCIe TLC OPAL2 -* Ubuntu 17.10 +For ARM `arm` QEMU, we just try to manually hit Ctrl + C as soon as system shutdown message appears: <>. + +The results on the <> were: + +[options="header"] +|=== +|Arch |Emulator |Subtype |Time |N times slower than QEMU |Instruction count |Commit + +|arm +|QEMU +| +|6 seconds +|1 +| +|da79d6c6cde0fbe5473ce868c9be4771160a003b + +|arm +|gem5 +|AtomicSimpleCPU +|1 minute 40 seconds +|17 +| +|da79d6c6cde0fbe5473ce868c9be4771160a003b + +|arm +|gem5 +|HPI +|10 minutes +|100 +| +|da79d6c6cde0fbe5473ce868c9be4771160a003b + +|aarch64 +|QEMU +| +|1.3 seconds +|1 +|170k +|b6e8a7d1d1cb8a1d10d57aa92ae66cec9bfb2d01 + +|aarch64 +|gem5 +|AtomicSimpleCPU +|1 minute +|43 +|110M +|b6e8a7d1d1cb8a1d10d57aa92ae66cec9bfb2d01 + +|aarch64 +|gem5 +|HPI +|6 minutes 4 seconds +| +|534.812.447 +|f482f24f87e2b1814ea9ed74f2c87ab30a4cb019 + +|x86_64 +|QEMU +| +|3.8 seconds +|1 +|1.8M +|4cb8a543eeaf7322d2e4493f689735cb5bfd48df + +|x86_64 +|QEMU +|KVM +|1.3 seconds +|0.3 +| +|4cb8a543eeaf7322d2e4493f689735cb5bfd48df + +|x86_64 +|gem5 +|AtomicSimpleCPU +|6 minutes 30 seconds +|102 +|630M +|4cb8a543eeaf7322d2e4493f689735cb5bfd48df + +|=== + +One methodology problem is that some gem5 and QEMU were run with different kernel configs, due to <>. This could have been improved if we normalized by instruction counts, but to do that we would have to enable tracing which makes QEMU run much slower. + +==== Benchmark initial build + +The build times are calculated after doing `./configure` and link:https://buildroot.org/downloads/manual/manual.html#_offline_builds[`make source`], which downloads the sources, and basically benchmarks the Internet. Build time at 2c12b21b304178a81c9912817b782ead0286d282: 28 minutes, 15 with full ccache hits. Breakdown: 19% GCC, 13% Linux kernel, 7% uclibc, 6% host-python, 5% host-qemu, 5% host-gdb, 2% host-binutils Single file change on `./build kernel_module-reconfigure`: 7 seconds. -===== P51 baseline benchmarks +==== Benchmark Buildroot build baseline This is the minimal build we could expect to get away with. @@ -3786,17 +3835,23 @@ This is consistent with the fact that ccache reduces the build time only partial The instructions counts varied very little between the baseline and LKMC, so runtime overhead is not a big deal apparently. -==== P51 gem5 +==== Benchmark gem5 build -How long it takes to build gem5 itself: +How long it takes to build gem5 itself on <> * x86 at 68af229490fc811aebddf68b3e2e09e63a5fa475: 9m40s -==== T430 +=== Benchmark machines -Build time: 2 hours. +==== P51 -TODO specs, SHA. +Lenovo ThinkPad link:https://www3.lenovo.com/gb/en/laptops/thinkpad/p-series/P51/p/22TP2WPWP51[P51 laptop]: + +* 2500 USD in 2018 (high end) +* Intel Core i7-7820HQ Processor (8MB Cache, up to 3.90GHz) (4 cores 8 threads) +* 32GB(16+16) DDR4 2400MHz SODIMM +* 512GB SSD PCIe TLC OPAL2 +* Ubuntu 17.10 === Benchmark Internets @@ -3809,6 +3864,15 @@ TODO specs, SHA. Google M-lab speed test: 36.4Mbps +=== Benchmark this repo bibliography + +gem5: + +* link:https://www.mail-archive.com/gem5-users@gem5.org/msg15262.html[] which parts of the gem5 code make it slow +* what are the minimum system requirements: +** https://stackoverflow.com/questions/47997565/gem5-system-requirements-for-decent-performance/48941793#48941793 +** https://github.com/gem5/gem5/issues/25 + == Conversation === kmod diff --git a/common b/common index 1afc470..f406690 100644 --- a/common +++ b/common @@ -1,19 +1,12 @@ #!/usr/bin/env bash -eeval() ( - cmd="$1" - echo "$cmd" | tee -a "${2:-/dev/null}" - eval "$cmd" -) set_common_vars() { arch="$1" gem5="$2" - root_dir="$(pwd)" buildroot_dir="${root_dir}/buildroot" arch_dir="$arch" if "$gem5" && [ ! "$arch" = aarch64 ]; then arch_dir="${arch}-gem5" fi - out_dir="${root_dir}/out" out_arch_dir="${out_dir}/${arch_dir}" buildroot_out_dir="${out_arch_dir}/buildroot" build_dir="${buildroot_out_dir}/build" @@ -21,8 +14,10 @@ set_common_vars() { gem5_out_dir="${out_arch_dir}/gem5" m5out_dir="${gem5_out_dir}/m5out" qemu_out_dir="${out_arch_dir}/qemu" - common_dir="${out_dir}/common" } +root_dir="$(pwd)" +out_dir="${root_dir}/out" +common_dir="${out_dir}/common" f=cli.gitignore if [ -f "$f" ]; then . "$f" diff --git a/eeval b/eeval new file mode 100755 index 0000000..7cef72b --- /dev/null +++ b/eeval @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +cmd="$1" +echo "$cmd" | tee -a "${2:-/dev/null}" +eval "$cmd" diff --git a/gem5-bench-cache b/gem5-bench-cache new file mode 100755 index 0000000..330fda1 --- /dev/null +++ b/gem5-bench-cache @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +set -eu +. common +while getopts a:gh OPT; do + case "$OPT" in + a) + arch="$OPTARG" + ;; + esac +done +shift "$(($OPTIND - 1))" + +# Vars +set_common_vars "$arch" true +cmd="./run -a $arch -g" +cpt="-E 'm5 checkpoint;m5 readfile > a.sh;sh a.sh'" +cache_small='--caches --l2cache --l1d_size=1024 --l1i_size=1024 --l2_size=1024 --l3_size=1024' +cache_large='--caches --l2cache --l1d_size=1024kB --l1i_size=1024kB --l2_size=1024kB --l3_size=1024kB' +result_file="${gem5_out_dir}/bench-cache.txt" + +bench() ( + cmd="$1" + eeval "$cmd" "$result_file" + ./gem5-ncycles -a "$arch" >> "$result_file" +) + +bench-all() ( + bench "$cmd -- -r 1" + bench "$cmd -- -r 2 $cache_small" + bench "$cmd -- -r 3 $cache_large" + bench "$cmd -- -r 4 $cache_small --cpu-type=HPI" + bench "$cmd -- -r 5 $cache_large --cpu-type=HPI" +) + +# Files. +rm -rf \ + "$result_file" \ + "${m5out_dir}/cpt.*" \ +; + +# Create the checkpoints after the kernel boot. +printf 'm5 exit' >readfile.gitignore +eeval "$cmd $cpt" +eeval "$cmd $cpt -- $cache_small" +eeval "$cmd $cpt -- $cache_large" +eeval "$cmd $cpt -- $cache_small --cpu-type=HPI" +eeval "$cmd $cpt -- $cache_large --cpu-type=HPI" + +# dhrystone 1.000 +printf '#!/bin/sh +m5 resetstats +dhrystone 1000 +m5 exit +' >readfile.gitignore +bench-all + +# dhrystone 10.000 +sed -Ei 's/^dhrystone .*/dhrystone 10000/' readfile.gitignore +bench-all + +# dhrystone 100.000 +sed -Ei 's/^dhrystone .*/dhrystone 100000/' readfile.gitignore +bench-all diff --git a/gem5-stat b/gem5-stat new file mode 100755 index 0000000..70ec419 --- /dev/null +++ b/gem5-stat @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -eu +. common +while getopts a:hs: OPT; do + case "$OPT" in + a) + arch="$OPTARG" + ;; + h) + printf "\ +usage: $0 [-a arch] [stat=system.cpu.numCycles] +Get the value for a gem5 stat from the stats.txt file. +" 1>&2 + exit + ;; + esac +done +shift "$(($OPTIND - 1))" +if [ $# -gt 0 ]; then + stat="$1" +else + stat=system.cpu.numCycles +fi +set_common_vars "$arch" true +awk "/^$stat /{ print \$2 }" "${m5out_dir}/stats.txt" diff --git a/parsec-benchmark/parsec-benchmark b/parsec-benchmark/parsec-benchmark index 05c650d..f1b8a70 160000 --- a/parsec-benchmark/parsec-benchmark +++ b/parsec-benchmark/parsec-benchmark @@ -1 +1 @@ -Subproject commit 05c650df71d6aba890421b23374477abf7a392e8 +Subproject commit f1b8a70c7930fdd150649dfe43f0ea3b27f7937b From 8a6e4bcd35fc3eee92e21e11f12d2261536f09db Mon Sep 17 00:00:00 2001 From: Ciro Santilli Date: Thu, 5 Apr 2018 09:59:45 +0100 Subject: [PATCH 3/4] readme: document gem5 kvm failure error message --- README.adoc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.adoc b/README.adoc index b054900..9b9cc57 100644 --- a/README.adoc +++ b/README.adoc @@ -1210,7 +1210,11 @@ You can make QEMU or gem5 <> by passing ena ./run -K .... -but it was broken in gem5 with pending patches: https://www.mail-archive.com/gem5-users@gem5.org/msg15046.html +but it was broken in gem5 with pending patches: https://www.mail-archive.com/gem5-users@gem5.org/msg15046.html It fails immediately on: + +.... +panic: KVM: Failed to enter virtualized mode (hw reason: 0x80000021) +.... KVM uses the link:https://en.wikipedia.org/wiki/Kernel-based_Virtual_Machine[KVM Linux kernel feature] of the host to run most instructions natively. From 7d9102373d60bd159920abfe96d636420afedd67 Mon Sep 17 00:00:00 2001 From: Ciro Santilli Date: Thu, 5 Apr 2018 11:57:25 +0100 Subject: [PATCH 4/4] gem5-bench-cache: allow bench without regenerating checkpoints Print Dhrystone size on the output as well. --- README.adoc | 36 +++++++++++++++++- gem5-bench-cache | 97 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 99 insertions(+), 34 deletions(-) diff --git a/README.adoc b/README.adoc index 8417af6..396689c 100644 --- a/README.adoc +++ b/README.adoc @@ -2553,7 +2553,39 @@ So we take a performance measurement approach instead: cat out/aarch64/gem5/bench-cache.txt .... -TODO: sort out HPI, and then paste results here, why the `--cpu-type=HPI` there always generates a `switch_cpu`, even if the original run was also on HPI? +which gives: + +[options="header",cols="3*>"] +|=== +|test size +|cache size +|cycle count + +|1000 +|1k +|52.432.956 + +|1000 +|1M +|6.328.325 + +|10000 +|1k +|141.637.834 + +|10000 +|1M +|16.969.057 + +|100000 +|1k +|1.034.500.724 + +|100000 +|1M +|121.728.035 + +|=== ===== gem5 memory latency @@ -3027,7 +3059,7 @@ Internals: * <> is a guest utility present inside the gem5 tree which we cross-compiled and installed into the guest [[gem5-restore-new-scrip]] -===== gem5 checkpoint restore and run a different script +==== gem5 checkpoint restore and run a different script You want to automate running several tests from a single pristine post-boot state. diff --git a/gem5-bench-cache b/gem5-bench-cache index 330fda1..4d26ce5 100755 --- a/gem5-bench-cache +++ b/gem5-bench-cache @@ -1,11 +1,15 @@ #!/usr/bin/env bash set -eu . common -while getopts a:gh OPT; do +generate_checkpoints=true +while getopts a:C OPT; do case "$OPT" in a) arch="$OPTARG" ;; + C) + generate_checkpoints=false + ;; esac done shift "$(($OPTIND - 1))" @@ -13,51 +17,80 @@ shift "$(($OPTIND - 1))" # Vars set_common_vars "$arch" true cmd="./run -a $arch -g" -cpt="-E 'm5 checkpoint;m5 readfile > a.sh;sh a.sh'" -cache_small='--caches --l2cache --l1d_size=1024 --l1i_size=1024 --l2_size=1024 --l3_size=1024' +cache_small='--caches --l2cache --l1d_size=1024 --l1i_size=1024 --l2_size=1024 --l3_size=1024 ' cache_large='--caches --l2cache --l1d_size=1024kB --l1i_size=1024kB --l2_size=1024kB --l3_size=1024kB' result_file="${gem5_out_dir}/bench-cache.txt" bench() ( cmd="$1" + printf 'cmd ' >> "$result_file" eeval "$cmd" "$result_file" - ./gem5-ncycles -a "$arch" >> "$result_file" + { + printf 'cycles ' + ./gem5-stat -a "$arch" + # RESTORE_INVESTIGATION + #cycles_switch="$(./gem5-stat -a "$arch" system.switch_cpus.numCycles)" + #if [ -n "$cycles_switch" ]; then + # printf "cycles_switch ${cycles_switch}\n" + #fi + } >> "$result_file" ) bench-all() ( - bench "$cmd -- -r 1" - bench "$cmd -- -r 2 $cache_small" - bench "$cmd -- -r 3 $cache_large" - bench "$cmd -- -r 4 $cache_small --cpu-type=HPI" - bench "$cmd -- -r 5 $cache_large --cpu-type=HPI" + bench "${cmd} -- -r 1 ${cache_small} --cpu-type=HPI --restore-with-cpu=HPI" + bench "${cmd} -- -r 1 ${cache_large} --cpu-type=HPI --restore-with-cpu=HPI" + # RESTORE_INVESTIGATION + # These were mostly to investigate what happens on restore: + # https://stackoverflow.com/questions/49011096/how-to-switch-cpu-models-in-gem5-after-restoring-a-checkpoint-and-then-observe-t + #bench "$cmd -- -r 1" + #bench "$cmd -- -r 1 $cache_small" + #bench "$cmd -- -r 1 $cache_large" + #bench "$cmd -- -r 2 $cache_small" + #bench "$cmd -- -r 3 $cache_large" + #bench "$cmd -- -r 4 $cache_small --cpu-type=HPI" + #bench "$cmd -- -r 5 $cache_large --cpu-type=HPI" + ## Restore from AtomicSimpleCPU to HPI. + #bench "$cmd -- -r 2 $cache_small --cpu-type=HPI --restore-with-cpu=HPI" + #bench "$cmd -- -r 3 $cache_large --cpu-type=HPI --restore-with-cpu=HPI" + #bench "$cmd -- -r 2 $cache_small --restore-with-cpu=HPI" + #bench "$cmd -- -r 3 $cache_large --restore-with-cpu=HPI" + #bench "$cmd -- -r 2 $cache_small --cpu-type=HPI" + #bench "$cmd -- -r 3 $cache_large --cpu-type=HPI" + ## Restore HPI with different cache sizes and see if it is used. + #bench "$cmd -- -r 4 $cache_large --cpu-type=HPI" + #bench "$cmd -- -r 5 $cache_small --cpu-type=HPI" + #bench "$cmd -- -r 2 $cache_large --cpu-type=HPI" + #bench "$cmd -- -r 3 $cache_small --cpu-type=HPI" ) -# Files. -rm -rf \ - "$result_file" \ - "${m5out_dir}/cpt.*" \ -; -# Create the checkpoints after the kernel boot. -printf 'm5 exit' >readfile.gitignore -eeval "$cmd $cpt" -eeval "$cmd $cpt -- $cache_small" -eeval "$cmd $cpt -- $cache_large" -eeval "$cmd $cpt -- $cache_small --cpu-type=HPI" -eeval "$cmd $cpt -- $cache_large --cpu-type=HPI" +if "$generate_checkpoints"; then + # Create the checkpoints after the kernel boot. + rm -rf "${m5out_dir}/cpt.*"; + printf 'm5 exit' >readfile.gitignore + cpt_cmd="-E 'm5 checkpoint;m5 readfile > a.sh;sh a.sh'" + # 1 + eeval "$cmd $cpt_cmd" + # RESTORE_INVESTIGATION + ## 2 + #eeval "$cmd $cpt_cmd -- $cache_small" + ## 3 + #eeval "$cmd $cpt_cmd -- $cache_large" + ## 4 + #eeval "$cmd $cpt_cmd -- $cache_small --cpu-type=HPI" + ## 5 + #eeval "$cmd $cpt_cmd -- $cache_large --cpu-type=HPI" +fi -# dhrystone 1.000 +# Restore and run benchmarks. +rm -f "$result_file" printf '#!/bin/sh m5 resetstats -dhrystone 1000 +dhrystone XXX m5 exit ' >readfile.gitignore -bench-all - -# dhrystone 10.000 -sed -Ei 's/^dhrystone .*/dhrystone 10000/' readfile.gitignore -bench-all - -# dhrystone 100.000 -sed -Ei 's/^dhrystone .*/dhrystone 100000/' readfile.gitignore -bench-all +for n in 1000 10000 100000; do + printf "n ${n}\n" >> "$result_file" + sed -Ei "s/^dhrystone .*/dhrystone ${n}/" readfile.gitignore + bench-all +done