From 6f73a9eb3020e327488534eb4e625ac94202f9a3 Mon Sep 17 00:00:00 2001 From: Ciro Santilli Date: Sun, 9 Sep 2018 17:03:06 +0100 Subject: [PATCH] bench-boot looks fine --- README.adoc | 76 ++++++++++++++++----------------- bench-all | 36 +++++++--------- bench-boot | 82 +++++++++++++++++++++++------------- build | 2 +- build-all | 10 ++--- build-gem5 | 118 ++++++++++++++++++++++++++++------------------------ 6 files changed, 177 insertions(+), 147 deletions(-) diff --git a/README.adoc b/README.adoc index 2c8fb8d..3d382e1 100644 --- a/README.adoc +++ b/README.adoc @@ -69,10 +69,11 @@ If you don't know which one to go for, start with <> The trade-offs are basically a balance between: -* how long and how much disk space does the build take +* how long and how much disk space does the build and run take * visibility: can you GDB step debug everything and read source code? * modifiability: can you modify the source code and rebuild a modified version? * how portable the setup is: does it work on Windows? Could it ever? +* accuracy: how accurate does the simulation represent real hardware? === QEMU Buildroot setup @@ -9355,6 +9356,8 @@ We tried to automate it on Travis with link:.travis.yml[] but it hits the curren ==== Benchmark Linux kernel boot +Benchmark all: + .... ./build-all ./bench-boot @@ -9365,45 +9368,45 @@ Sample results at 2bddcc2891b7e5ac38c10d509bdfc1c8fe347b94: .... cmd ./run --arch x86_64 --eval '/poweroff.out' -time 3.58 +time 7.46 exit_status 0 + cmd ./run --arch x86_64 --eval '/poweroff.out' --kvm -time 0.89 +time 7.61 exit_status 0 + cmd ./run --arch x86_64 --eval '/poweroff.out' --trace exec_tb -time 4.12 +time 8.04 exit_status 0 -instructions 2343768 +instructions 1665023 + cmd ./run --arch x86_64 --eval 'm5 exit' --gem5 -time 451.10 +time 254.32 exit_status 0 -instructions 706187020 +instructions 380799337 + cmd ./run --arch arm --eval '/poweroff.out' -time 1.85 +time 5.56 exit_status 0 + cmd ./run --arch arm --eval '/poweroff.out' --trace exec_tb -time 1.92 +time 5.78 exit_status 0 -instructions 681000 -cmd ./run --arch arm --eval 'm5 exit' --gem5 -time 94.85 -exit_status 0 -instructions 139895210 +instructions 742319 + cmd ./run --arch aarch64 --eval '/poweroff.out' -time 1.36 +time 4.85 exit_status 0 + cmd ./run --arch aarch64 --eval '/poweroff.out' --trace exec_tb -time 1.37 +time 4.91 exit_status 0 -instructions 178879 +instructions 245471 + cmd ./run --arch aarch64 --eval 'm5 exit' --gem5 -time 72.50 +time 68.71 exit_status 0 -instructions 115754212 -cmd ./run --arch aarch64 --eval 'm5 exit' --gem5 -- --cpu-type=HPI --caches --l2cache --l1d_size=1024kB --l1i_size=1024kB --l2_size=1024kB --l3_size=1024kB -time 369.13 -exit_status 0 -instructions 115774177 +instructions 120555566 .... TODO: aarch64 gem5 and QEMU use the same kernel, so why is the gem5 instruction count so much much higher? @@ -9455,32 +9458,29 @@ Or to conveniently do a clean build without affecting your current one: cat ../linux-kernel-module-cheat-regression/*/build-time.log .... -===== Find which packages are making the build slow +===== Find which packages are making the build slow and big .... -cd "$(./getvar buildroot_out_dir) -make graph-build graph-depends -xdg-open graphs/build.pie-packages.pdf -xdg-open graphs/graph-depends.pdf +./build --skip-configure -- graph-build graph-size graph-depends +cd "$(./getvar buildroot_out_dir)/graphs" +xdg-open build.pie-packages.pdf +xdg-open graph-depends.pdf +xdg-open graph-size.pdf .... Our philosophy is: -* if something adds little to the build time, build it in by default -* otherwise, make it optional -* try to keep the toolchain (GCC, Binutils) unchanged, otherwise a full rebuild is required. -+ -So we generally just enable all toolchain options by default, even though this adds a bit of time to the build. -* if something is very valuable, we just add it by default even if it increases the Build time, notably GDB and QEMU -* runtime is sacred. -+ -We do our best to reduce the instruction and feature count to the bare minimum needed, to make the system: +* keep the root filesystem as tiny as possible to make prebuilts small. It is easy to add new packages once you have the toolchain. +* enable every feature possible on the toolchain (GCC, Binutils), because changes imply Buildroot rebuilds +* runtime is sacred. Faster systems are: + -- ** easier to understand -** run faster, specially for <> +** run faster, which is specially for <> which is slow -- + +Runtime basically just comes down to how we configure the Linux kernel, since in the root filesystem all that matters is `init=`, and that is easy to control. ++ One possibility we could play with is to build loadable modules instead of built-in modules to reduce runtime, but make it easier to get started with the modules. [[prebuilt-toolchain]] diff --git a/bench-all b/bench-all index a5502e5..81629ab 100755 --- a/bench-all +++ b/bench-all @@ -85,31 +85,27 @@ BR2_TARGET_ROOTFS_EXT2=n fi if "$bench_gem5_build"; then - arches='x86_64 arm' - for common_arch in $arches; do - common_gem5_out_dir="$("$getvar" --arch "$common_arch" gem5_out_dir)" - common_gem5_src_dir="$("$getvar" --arch "$common_arch" gem5_src_dir)" - common_out_dir="$("$getvar" --arch "$common_arch" out_dir)" - cd "${common_gem5_src_dir}" - git clean -xdf - results_file="${common_gem5_out_dir}/bench-build.txt" - gem5_outdir="${common_out_dir}/bench_build" - rm -fr "$results_file" "${gem5_outdir}" - # TODO understand better: --foreground required otherwise we cannot - # kill the build with Ctrl+C if something goes wrong, can be minimized to: - # bash -c "eval 'timeout 5 sleep 3'" - "${root_dir}/bench-cmd" "timeout --foreground 900 ../build -a '$common_arch' -o '${gem5_outdir}'" "$results_file" - cp "$results_file" "${new_dir}/gem5-bench-build-${common_arch}.txt" - cd "${root_dir}/gem5/gem5" - git clean -xdf - rm -fr "${gem5_outdir}" - done + common_arch="$default_arch" + gem5_build_id=bench-build + common_gem5_out_dir="$("$getvar" --arch "$common_arch" --gem5-build-id "$gem5_build_id" gem5_out_dir)" + common_gem5_src_dir="$("$getvar" --arch "$common_arch" --gem5-build-id "$gem5_build_id" gem5_src_dir)" + results_file="${common_gem5_out_dir}/bench-build.txt" + git -C "${common_gem5_src_dir}" clean -xdf + rm -f "$results_file" + "${root_dir}/build-gem5" --arch "$common_arch" --clean --gem5-build-id "$gem5_build_id" + # TODO understand better: --foreground required otherwise we cannot + # kill the build with Ctrl+C if something goes wrong, can be minimized to: + # bash -c "eval 'timeout 5 sleep 3'" + "${root_dir}/bench-cmd" "timeout --foreground 900 ./build-gem5 --arch '$common_arch' --gem5-build-id '$gem5_build_id'" "$results_file" + cp "$results_file" "${new_dir}/gem5-bench-build-${common_arch}.txt" + git -C "${common_gem5_src_dir}" clean -xdf + "${root_dir}/build-gem5" --arch "$common_arch" --clean --gem5-build-id "$gem5_build_id" fi if "$bench_linux_boot"; then cd "${root_dir}" ./build-all - ./bench-boot + ./bench-boot -t 3 cp "$(${root_dir}/getvar bench_boot)" "$new_dir" fi diff --git a/bench-boot b/bench-boot index c47eef8..3c04ac4 100755 --- a/bench-boot +++ b/bench-boot @@ -17,57 +17,83 @@ while getopts t: OPT; do esac done shift "$(($OPTIND - 1))" -extra_args="$*" -common_bench_boot="$(${root_dir}/getvar bench_boot)" - +if [ $# -gt 1 ]; then + extra_args=" $*" +else + extra_args= +fi +getvar="${root_dir}/getvar" +common_bench_boot="$("$getvar" bench_boot)" caches='--caches --l2cache --l1d_size=1024kB --l1i_size=1024kB --l2_size=1024kB --l3_size=1024kB' + bench() ( - "${root_dir}/bench-cmd" "./run -a ${1} ${extra_args}" "$common_bench_boot" - echo >> "$common_bench_boot" + "${root_dir}/bench-cmd" "./run --arch ${1}${extra_args}" "$common_bench_boot" ) + gem5_insts() ( - printf "instructions $(./gem5-stat -a "$1" sim_insts)\n" >> "$common_bench_boot" + printf "instructions $(./gem5-stat --arch "$1" sim_insts)\n" >> "$common_bench_boot" ) + qemu_insts() ( common_arch="$1" - ./qemu-trace2txt -a "$common_arch" - common_setup - printf "instructions $(wc -l "${common_trace_txt_file}" | cut -d' ' -f1)\n" >> "$common_bench_boot" + ./qemu-trace2txt --arch "$common_arch" + common_qemu_trace_txt_file="$("$getvar" --arch "$common_arch" qemu_trace_txt_file)" + printf "instructions $(wc -l "${common_qemu_trace_txt_file}" | cut -d' ' -f1)\n" >> "$common_bench_boot" ) + +newline() ( + echo >> "$common_bench_boot" +) + rm -f "${common_bench_boot}" arch=x86_64 -bench "$arch -E '/poweroff.out'" -bench "$arch -E '/poweroff.out' -K" +bench "${arch} --eval '/poweroff.out'" +newline +bench "${arch} --eval '/poweroff.out' --kvm" +newline if [ "$test_size" -ge 2 ]; then - bench "$arch -E '/poweroff.out' -T exec_tb" + bench "${arch} --eval '/poweroff.out' --trace exec_tb" qemu_insts "$arch" -fi -if [ "$test_size" -ge 2 ]; then - bench "$arch -E 'm5 exit' -g" + newline + bench "$arch --eval 'm5 exit' --gem5" gem5_insts "$arch" + newline fi -#bench "$arch -E 'm5 exit' -g -- --cpu-type=DerivO3CPU ${caches}" +#bench "$arch --eval 'm5 exit' --gem5 -- --cpu-type=DerivO3CPU ${caches}" #gem5_insts "$arch" +#newline arch=arm -bench "$arch -E '/poweroff.out'" +bench "$arch --eval '/poweroff.out'" +newline if [ "$test_size" -ge 2 ]; then - bench "$arch -E '/poweroff.out' -T exec_tb" + bench "$arch --eval '/poweroff.out' --trace exec_tb" qemu_insts "$arch" + newline + #bench "$arch --eval 'm5 exit' --gem5" + #gem5_insts "$arch" + #newline fi -#bench "$arch -E 'm5 exit' -g" -#gem5_insts "$arch" -#bench "$arch -E 'm5 exit' -g -- --cpu-type=HPI ${caches}" -#gem5_insts "$arch" +#if [ "$test_size" -ge 3 ]; then +# bench "$arch --eval 'm5 exit' --gem5 -- --cpu-type=HPI ${caches}" +# gem5_insts "$arch" +# newline +#fi arch=aarch64 -bench "$arch -E '/poweroff.out'" +bench "$arch --eval '/poweroff.out'" +newline if [ "$test_size" -ge 2 ]; then - bench "$arch -E '/poweroff.out' -T exec_tb" + bench "$arch --eval '/poweroff.out' --trace exec_tb" qemu_insts "$arch" + newline + bench "$arch --eval 'm5 exit' --gem5" + gem5_insts "$arch" + newline +fi +if [ "$test_size" -ge 3 ]; then + bench "$arch --eval 'm5 exit' --gem5 -- --cpu-type=HPI ${caches}" + gem5_insts "$arch" + newline fi -#bench "$arch -E 'm5 exit' -g" -#gem5_insts "$arch" -#bench "$arch -E 'm5 exit' -g -- --cpu-type=HPI ${caches}" -#gem5_insts "$arch" diff --git a/build b/build index f9f2d80..0082dde 100755 --- a/build +++ b/build @@ -283,7 +283,7 @@ https://stackoverflow.com/questions/49260466/why-when-i-change-br2-linux-kernel- help='Do a verbose build' ) parser.add_argument( - 'extra-make-args', default=defaults['extra_make_args'], nargs='*' + 'extra_make_args', default=defaults['extra_make_args'], metavar='extra-make-args', nargs='*' ) return parser diff --git a/build-all b/build-all index 77d85a3..f361bb1 100755 --- a/build-all +++ b/build-all @@ -1,22 +1,22 @@ #!/usr/bin/env bash set -eu archs='x86_64 arm aarch64' -gem5=-g +gem5=true while getopts A:G OPT; do case "$OPT" in A) archs="$OPTARG" ;; G) - gem5= + gem5=false ;; esac done shift "$(($OPTIND - 1))" for arch in $archs; do - ./build -a "$arch" -k -l "$@" - ./build-qemu -a "$arch" + ./build --arch "$arch" --kernel-modules -l "$@" + ./build-qemu --arch "$arch" if "$gem5"; then - ./build-gem5 -a "$arch" + ./build-gem5 --arch "$arch" fi done diff --git a/build-gem5 b/build-gem5 index b7c8c75..abcb779 100755 --- a/build-gem5 +++ b/build-gem5 @@ -10,6 +10,11 @@ import subprocess import common parser = common.get_argparse() +parser.add_argument( + '-c', '--clean', + help='Clean the build instead of building.', + action='store_true', +) parser.add_argument( 'extra_scons_args', default=[], @@ -19,61 +24,64 @@ parser.add_argument( args = common.setup(parser) binaries_dir = os.path.join(common.gem5_system_dir, 'binaries') disks_dir = os.path.join(common.gem5_system_dir, 'disks') -os.makedirs(binaries_dir, exist_ok=True) -os.makedirs(disks_dir, exist_ok=True) -if not os.path.exists(os.path.join(common.gem5_src_dir, '.git')): - subprocess.check_call([ - 'git', - '-C', common.gem5_default_src_dir, - 'worktree', 'add', - '-b', os.path.join('wt', args.gem5_build_id), - common.gem5_src_dir - ]) -if args.arch == 'x86_64': - dummy_img_path = os.path.join(disks_dir, 'linux-bigswap2.img') - with open(dummy_img_path, 'wb') as dummy_img_file: - zeroes = b'\x00' * (2 ** 16) - for i in range(2 ** 10): - dummy_img_file.write(zeroes) - subprocess.check_call(['mkswap', dummy_img_path]) - with open(os.path.join(binaries_dir, 'x86_64-vmlinux-2.6.22.9'), 'w'): - # This file must always be present, despite --kernel overriding that default and selecting the kernel. - # I'm not even joking. No one has ever built x86 gem5 without the magic dist dir present. - pass -elif args.arch == 'arm' or args.arch == 'aarch64': - gem5_system_src_dir = os.path.join(common.gem5_src_dir, 'system') +if args.clean: + shutil.rmtree(common.gem5_out_dir) +else: + os.makedirs(binaries_dir, exist_ok=True) + os.makedirs(disks_dir, exist_ok=True) + if not os.path.exists(os.path.join(common.gem5_src_dir, '.git')): + subprocess.check_call([ + 'git', + '-C', common.gem5_default_src_dir, + 'worktree', 'add', + '-b', os.path.join('wt', args.gem5_build_id), + common.gem5_src_dir + ]) + if args.arch == 'x86_64': + dummy_img_path = os.path.join(disks_dir, 'linux-bigswap2.img') + with open(dummy_img_path, 'wb') as dummy_img_file: + zeroes = b'\x00' * (2 ** 16) + for i in range(2 ** 10): + dummy_img_file.write(zeroes) + subprocess.check_call(['mkswap', dummy_img_path]) + with open(os.path.join(binaries_dir, 'x86_64-vmlinux-2.6.22.9'), 'w'): + # This file must always be present, despite --kernel overriding that default and selecting the kernel. + # I'm not even joking. No one has ever built x86 gem5 without the magic dist dir present. + pass + elif args.arch == 'arm' or args.arch == 'aarch64': + gem5_system_src_dir = os.path.join(common.gem5_src_dir, 'system') - # dtb - dt_src_dir = os.path.join(gem5_system_src_dir, 'arm', 'dt') - dt_build_dir = os.path.join(common.gem5_system_dir, 'arm', 'dt') - subprocess.check_call(['make', '-C', dt_src_dir]) - os.makedirs(dt_build_dir, exist_ok=True) - for dt in glob.glob(os.path.join(dt_src_dir, '*.dtb')): - shutil.copy2(dt, dt_build_dir) + # dtb + dt_src_dir = os.path.join(gem5_system_src_dir, 'arm', 'dt') + dt_build_dir = os.path.join(common.gem5_system_dir, 'arm', 'dt') + subprocess.check_call(['make', '-C', dt_src_dir]) + os.makedirs(dt_build_dir, exist_ok=True) + for dt in glob.glob(os.path.join(dt_src_dir, '*.dtb')): + shutil.copy2(dt, dt_build_dir) - # Bootloader 32. - bootloader32_dir = os.path.join(gem5_system_src_dir, 'arm', 'simple_bootloader') - # TODO use the buildroot cross compiler here, and remove the dependencies from configure. - subprocess.check_call(['make', '-C', bootloader32_dir]) - # bootloader - shutil.copy2(os.path.join(bootloader32_dir, 'boot_emm.arm'), binaries_dir) + # Bootloader 32. + bootloader32_dir = os.path.join(gem5_system_src_dir, 'arm', 'simple_bootloader') + # TODO use the buildroot cross compiler here, and remove the dependencies from configure. + subprocess.check_call(['make', '-C', bootloader32_dir]) + # bootloader + shutil.copy2(os.path.join(bootloader32_dir, 'boot_emm.arm'), binaries_dir) - # Bootloader 64. - bootloader64_dir = os.path.join(gem5_system_src_dir, 'arm', 'aarch64_bootloader') - # TODO cross_compile is ignored because the make does not use CC... - subprocess.check_call(['make', '-C', bootloader64_dir]) - shutil.copy2(os.path.join(bootloader64_dir, 'boot_emm.arm64'), binaries_dir) -assert common.run_cmd([ - 'scons', - # TODO factor with build. - '-j', str(multiprocessing.cpu_count()), - '--ignore-style', - common.gem5_executable - ] + - args.extra_scons_args, - cwd=common.gem5_src_dir, - extra_env={'PATH': '/usr/lib/ccache:' + os.environ['PATH']}, -) == 0 -term_src_dir = os.path.join(common.gem5_src_dir, 'util/term') -subprocess.check_call(['make', '-C', term_src_dir]) -shutil.copy2(os.path.join(term_src_dir, 'm5term'), common.gem5_m5term) + # Bootloader 64. + bootloader64_dir = os.path.join(gem5_system_src_dir, 'arm', 'aarch64_bootloader') + # TODO cross_compile is ignored because the make does not use CC... + subprocess.check_call(['make', '-C', bootloader64_dir]) + shutil.copy2(os.path.join(bootloader64_dir, 'boot_emm.arm64'), binaries_dir) + assert common.run_cmd([ + 'scons', + # TODO factor with build. + '-j', str(multiprocessing.cpu_count()), + '--ignore-style', + common.gem5_executable + ] + + args.extra_scons_args, + cwd=common.gem5_src_dir, + extra_env={'PATH': '/usr/lib/ccache:' + os.environ['PATH']}, + ) == 0 + term_src_dir = os.path.join(common.gem5_src_dir, 'util/term') + subprocess.check_call(['make', '-C', term_src_dir]) + shutil.copy2(os.path.join(term_src_dir, 'm5term'), common.gem5_m5term)