trace-boot: automate instruction count on arm

Clean old trace boot results with new automated results.
This commit is contained in:
Ciro Santilli
2018-04-10 03:38:24 +01:00
parent fb317f4778
commit b6dc20cb74
5 changed files with 86 additions and 121 deletions

View File

@@ -379,7 +379,7 @@ Our workaround is:
./qemumonitor
....
+
`sendkey sendkey ctrl-c` does not work on the text terminal either.
`sendkey ctrl-c` does not work on the text terminal either.
+
This is however fortunate when running QEMU with GDB, as the `Ctrl-C` reaches GDB and breaks.
* Not everything generates interrupts, only the final enter.
@@ -3925,120 +3925,57 @@ One possibility we could play with is to build loadable modules instead of built
....
./bench-boot
cat out/bench-boot.txt
....
Output:
Output on <<p51>> fb317f4778633692b91c9174224dccc6a3a02893:
....
cmd ./run -a arm -E '/poweroff.out'
time 6.77
cmd ./run -a arm -E 'm5 exit' -g
time 146.96
insts 230209017
cmd ./run -a arm -E 'm5 exit' -g -- --caches --cpu-type=HPI
time > 3600
insts > 373227765
cmd ./run -a aarch64 -E '/poweroff.out'
time 1.28
cmd ./run -a aarch64 -E 'm5 exit' -g
time 57.77
insts 111512915
cmd ./run -a aarch64 -E 'm5 exit' -g -- --caches --cpu-type=HPI
time 360.90
insts 111655309
cmd ./run -a x86_64 -E '/poweroff.out'
time 3.50
time 3.64
cmd ./run -a x86_64 -E '/poweroff.out' -- -enable-kvm
time 1.30
cmd ./run -a x86_64 -E '/poweroff.out' -T exec_tb
time 4.00
instructions 2437556
cmd ./run -a arm -E '/poweroff.out'
time 6.78
cmd ./run -a arm -E '/poweroff.out' -T exec_tb
time 7.08
instructions 760143
cmd ./run -a x86_64 -E 'm5 exit' -g
time 376.03
insts 634548425
time 411.16
instructions 637326092
cmd ./run -a arm -E 'm5 exit' -g
time 160.96
instructions 230209017
cmd ./run -a aarch64 -E '/poweroff.out'
time 1.32
cmd ./run -a aarch64 -E '/poweroff.out' -T exec_tb
time 1.35
instructions 178990
cmd ./run -a aarch64 -E 'm5 exit' -g
time 59.69
instructions 115759248
cmd ./run -a aarch64 -E 'm5 exit' -g -- --caches --cpu-type=HPI
time 371.64
instructions 115898484
....
For ARM `arm` QEMU, we just try to manually hit `Ctrl-C` as soon as system shutdown message appears: <<arm-shutdown>>.
The results on the <<p51>> were:
TODO the following takes more than 1 hour to finish:
[options="header"]
|===
|Arch |Emulator |Subtype |Time |N times slower than QEMU |Instruction count |Commit
....
cmd ./run -a arm -E 'm5 exit' -g -- --caches --cpu-type=HPI
....
|arm
|QEMU
|
|6 seconds
|1
|
|da79d6c6cde0fbe5473ce868c9be4771160a003b
|arm
|gem5
|AtomicSimpleCPU
|1 minute 40 seconds
|17
|
|da79d6c6cde0fbe5473ce868c9be4771160a003b
|arm
|gem5
|HPI
|10 minutes
|100
|
|da79d6c6cde0fbe5473ce868c9be4771160a003b
|aarch64
|QEMU
|
|1.3 seconds
|1
|170k
|b6e8a7d1d1cb8a1d10d57aa92ae66cec9bfb2d01
|aarch64
|gem5
|AtomicSimpleCPU
|1 minute
|43
|110M
|b6e8a7d1d1cb8a1d10d57aa92ae66cec9bfb2d01
|aarch64
|gem5
|HPI
|6 minutes 4 seconds
|
|534.812.447
|f482f24f87e2b1814ea9ed74f2c87ab30a4cb019
|x86_64
|QEMU
|
|3.8 seconds
|1
|1.8M
|4cb8a543eeaf7322d2e4493f689735cb5bfd48df
|x86_64
|QEMU
|KVM
|1.3 seconds
|0.3
|
|4cb8a543eeaf7322d2e4493f689735cb5bfd48df
|x86_64
|gem5
|AtomicSimpleCPU
|6 minutes 30 seconds
|102
|630M
|4cb8a543eeaf7322d2e4493f689735cb5bfd48df
|===
Why so long? I had previously documented on README 10 minutes at: fb317f4778633692b91c9174224dccc6a3a02893 to be confirmed and bisected.
One methodology problem is that some gem5 and QEMU were run with different kernel configs, due to <<gem5-qemu-config>>. This could have been improved if we normalized by instruction counts, but to do that we would have to enable tracing which makes QEMU run much slower.
But TODO: on aarch64 both use the same kernel build alrady, and the gem5 instruction count is much higher, why?
==== Benchmark initial build
The build times are calculated after doing `./configure` and link:https://buildroot.org/downloads/manual/manual.html#_offline_builds[`make source`], which downloads the sources, and basically benchmarks the Internet.

View File

@@ -6,29 +6,40 @@ bench() (
printf 'cmd ' >> "$results_file"
env time --append -f 'time %e' --output="$results_file" ./eeval "./run -a $1" "$results_file"
)
insts() (
{ printf 'instructions '; ./gem5-stat -a "$1" sim_insts; } >> "$results_file"
gem5_insts() (
printf "instructions $(./gem5-stat -a "$1" sim_insts)\n" >> "$results_file"
)
qemu_insts() (
arch="$1"
./qemu-trace2txt -a "$arch"
set_common_vars "$arch"
printf "instructions $(wc -l "${qemu_trace_txt_file}" | cut -d' ' -f1)\n" >> "$results_file"
)
rm -f "${results_file}"
arch=arm
# Manual Ctrl+C required.
#bench "$arch -E '/poweroff.out'"
bench "$arch -E 'm5 exit' -g"
insts "$arch"
# TODO takes more than 1 hours to finish. Why so long?
#bench "$arch -E 'm5 exit' -g -- --caches --cpu-type=HPI"
#insts "$arch"
arch=aarch64
bench "$arch -E '/poweroff.out'"
bench "$arch -E 'm5 exit' -g"
insts "$arch"
bench "$arch -E 'm5 exit' -g -- --caches --cpu-type=HPI"
insts "$arch"
arch=x86_64
bench "$arch -E '/poweroff.out'"
bench "$arch -E '/poweroff.out' -- -enable-kvm"
bench "$arch -E '/poweroff.out' -T exec_tb"
qemu_insts "$arch"
bench "$arch -E 'm5 exit' -g"
insts "$arch"
gem5_insts "$arch"
arch=arm
# Manual Ctrl+C required.
#bench "$arch -E '/poweroff.out'"
#bench "$arch -E '/poweroff.out' -T exec_tb"
#qemu_insts "$arch"
bench "$arch -E 'm5 exit' -g"
gem5_insts "$arch"
#bench "$arch -E 'm5 exit' -g -- --caches --cpu-type=HPI"
#gem5_insts "$arch"
arch=aarch64
bench "$arch -E '/poweroff.out'"
bench "$arch -E '/poweroff.out' -T exec_tb"
qemu_insts "$arch"
bench "$arch -E 'm5 exit' -g"
gem5_insts "$arch"
bench "$arch -E 'm5 exit' -g -- --caches --cpu-type=HPI"
gem5_insts "$arch"

3
common
View File

@@ -1,7 +1,7 @@
#!/usr/bin/env bash
set_common_vars() {
arch="$1"
gem5="$2"
gem5="${2:-false}"
buildroot_dir="${root_dir}/buildroot"
arch_dir="$arch"
if "$gem5" && [ ! "$arch" = aarch64 ]; then
@@ -14,6 +14,7 @@ set_common_vars() {
gem5_out_dir="${out_arch_dir}/gem5"
m5out_dir="${gem5_out_dir}/m5out"
qemu_out_dir="${out_arch_dir}/qemu"
qemu_trace_txt_file="${qemu_out_dir}/trace.txt"
}
root_dir="$(pwd)"
out_dir="${root_dir}/out"

16
qemu-trace2txt Executable file
View File

@@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -eu
. common
set -- ${cli_qemu_trace2txt:-} "$@"
while getopts a: OPT; do
case "$OPT" in
a)
arch="$OPTARG"
;;
esac
done
set_common_vars "$arch" false
./qemu/scripts/simpletrace.py \
"${build_dir}/host-qemu-custom/trace-events-all" \
"${qemu_out_dir}/trace.bin" \
> "${qemu_trace_txt_file}"

View File

@@ -11,13 +11,13 @@ while getopts a: OPT; do
done
set_common_vars "$arch" false
time ./run -a "$arch" -e 'init=/poweroff.out' -T exec_tb
time ./qemu/scripts/simpletrace.py "${build_dir}/host-qemu-custom/trace-events-all" "${qemu_out_dir}/trace.bin" >"${qemu_out_dir}/trace.txt"
time ./qemu-trace2txt -a "$arch"
# Instruction count.
# We could put this on a separate script, but it just adds more arch boilerplate to a new script.
# So let's just leave it here for now since it did not add a significant processing time.
echo "instructions $(wc -l "${qemu_out_dir}/trace.txt" | cut -d' ' -f1)"
echo "instructions $(wc -l "${qemu_trace_txt_file}" | cut -d' ' -f1)"
entry_addr=$("${host_dir}"/bin/*-buildroot-*-readelf -h "${build_dir}/linux-custom/vmlinux" | grep 'Entry point address' | sed -E 's/.*: *//')
echo "entry_address ${entry_addr}"
sed "/${entry_addr}/q" "${qemu_out_dir}/trace.txt" >"${qemu_out_dir}/trace-boot.txt"
sed "/${entry_addr}/q" "${qemu_trace_txt_file}" >"${qemu_out_dir}/trace-boot.txt"
echo "instructions_firmware $(wc -l "${qemu_out_dir}/trace-boot.txt" | cut -d' ' -f1)"