From efb9c5458e121e90c79f55a1d05798c5a542a099 Mon Sep 17 00:00:00 2001 From: Ciro Santilli Date: Fri, 9 Mar 2018 19:06:17 +0000 Subject: [PATCH] parsec: more details, fix errors, more tests --- .gitignore | 1 + README.adoc | 156 ++++++++++++++++++++++--------- build | 9 +- buildroot_config_fragment_parsec | 7 +- kernel_config_fragment | 11 +++ kernel_module/external.mk | 16 ++-- parsec-benchmark/Config.in | 32 +++++-- parsec-benchmark/external.mk | 38 ++++---- parsec-benchmark/test.sh | 41 +++++++- 9 files changed, 224 insertions(+), 87 deletions(-) diff --git a/.gitignore b/.gitignore index e9104ca..cc680e8 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ *.tmp *~ .tmp_versions +/buildroot_config_fragment_cli /rootfs_overlay/etc/init.d/S99 /rootfs_overlay/ignore.sh /9p diff --git a/README.adoc b/README.adoc index daf88c4..89a0261 100644 --- a/README.adoc +++ b/README.adoc @@ -1473,10 +1473,16 @@ Some QEMU specific features to play with and limitations to cry over. === 9P -https://superuser.com/questions/628169/how-to-share-a-directory-with-the-host-without-networking-in-qemu +This protocol allows sharing a mountable filesystem between guest and host. With networking, it's boring, we can just use any of the old tools like sshfs and NFS. +https://superuser.com/questions/628169/how-to-share-a-directory-with-the-host-without-networking-in-qemu + +One advantage of this method over NFS is that can run without `sudo` on host, or having to pass host cretendials on guest for sshfs. + +TODO performance compared to NFS. + As usual, we have already set everything up for you. On host: .... @@ -1521,6 +1527,8 @@ writes from guest failed due to user mismatch problems: https://serverfault.com/ The feature is documented at: https://wiki.qemu.org/Documentation/9psetup +==== 9P arm + TODO: not working on `arm`, manual mount failed with: .... @@ -1537,6 +1545,13 @@ A few hits: * https://superuser.com/questions/502205/libvirt-9p-kvm-mount-in-fstab-fails-to-mount-at-boot-time +==== 9P gem5 + +Seems possible! Lets do it: + +* http://gem5.org/wiki/images/b/b8/Summit2017_wa_devlib.pdf +* http://gem5.org/WA-gem5 + === QEMU user mode This has nothing to do with the Linux kernel, but it is cool: @@ -1958,16 +1973,107 @@ External open source benchmarks. We will try to create Buildroot packages for th ===== PARSEC benchmark -We have ported parts of the link:http://parsec.cs.princeton.edu[PARSEC benchmark] for cross compilation at: https://github.com/cirosantilli/parsec-benchmark See the documentation on that repo to find out which benchmarks have been ported. +We have ported parts of the link:http://parsec.cs.princeton.edu[PARSEC benchmark] for cross compilation at: https://github.com/cirosantilli/parsec-benchmark See the documentation on that repo to find out which benchmarks have been ported. Furthermore, some of the benchmarks were are segfaulting, see link:parsec-benchmark/test.sh[] -This repo makes it trivial to get started with it: +There are two ways to run PARSEC with this repo: + +* <>, most likely what you want +* <> + +====== PARSEC benchmark without parsecmgmt .... configure -gpq && ./build -a arm -g -i buildroot_config_fragment_parsec ./run -a arm -g .... -Once inside the guest, we could in theory launch PARSEC exactly as you would launch it on the host: +Once inside the guest, launch one of the `test` input sized benchmarks manually as in: + +.... +cd /parsec/ext/splash2x/apps/fmm/run +../inst/arm-linux.gcc/bin/fmm 1 < input_1 +.... + +To find out how to run many of the benchmarks, you can either: + +* have a look at: link:parsec-benchmark/test.sh[] +* do a search on the build stdout on your terminal for a line of type: ++ +.... +Running /parsec/ext/splash2x/apps/fmm/inst/arm-linux.gcc/bin/fmm 1 < input_1: +.... ++ +Yes, we do run the benchmarks on host just to unpack / generate inputs... and they almost always fail to run since they were build for the guest instead of host. Hopefully, since we don't want to wait for them to finish anyways. +* have a quick peak at the package sources, usually `src/run.sh` and `parsec/*.runconf`. + +PARSEC simply wasn't designed with non native machines in mind. + +Running a benchmark of a different size requires a rebuild wit: + +.... +./build \ + -a arm \ + -c 'BR2_PACKAGE_PARSEC_BENCHMARK_INPUT_SIZE="simsmall"' \ + -c BR2_TARGET_ROOTFS_EXT2_SIZE="500M" \ + -g \ + -i buildroot_config_fragment_parsec \ + -- parsec-benchmark-reconfigure \ +; +.... + +and then try running the benchmarks as before. + +The rebuild is required because some of the input sizes + +Separating input sizes also allows to create smaller images when only running the smaller benchmarks. + +We don't have a perfect way to find the right value for `BR2_TARGET_ROOTFS_EXT2_SIZE`, one good heuristic is: + +.... +du -hsx buildroot/output.arm-gem5~/target/parsec +.... + +Also dots cannot be used as in `1.5G`, so just use Megs as in `1500M` instead. + +If you don't set it high enough, you will get the message: + +.... +Maybe you need to increase the filesystem size (BR2_TARGET_ROOTFS_EXT2_SIZE) +.... + +https://stackoverflow.com/questions/49211241/is-there-a-way-to-automatically-detect-the-minimum-required-br2-target-rootfs-ex + +TODO: mount the benchmarks from host instead of installing them on guest. <<9p>> would be perfect for this, but we need to get it working on gem5 and arm first. + +====== PARSEC benchmark with parsecmgmt + +Most users won't want to use this method because: + +* running the `parsecmgmt` Bash scripts takes forever before it ever starts running the actual benchmarks on gem5 ++ +Running on QEMU is feasible, but not the main use case, since QEMU cannot be used for performance measurements +* it requires putting the full `.tar` inputs on the guest, which makes the image twice as large (1x for the `.tar`, 1x for the unpacked input files) + +It would be awesome if it were possible to use this method, since this is what Parsec supports officially, and so: + +* you don't have to dig into what raw command to run +* there is an easy way to run all the benchmarks in one go to test them out +* you can just run any of the benchmarks that you want + +but it simply is not feasible in gem5 because it takes too long. + +If you still want to run this, try it out with: + +.... +./build -a arm \ + -c BR2_TARGET_ROOTFS_EXT2_SIZE="3G" \ + -g + -i buildroot_config_fragment_parsec + -- parsec-benchmark-reconfigure \ +; +.... + +And then you can run it just as you would on the host: .... cd /parsec/ @@ -1976,48 +2082,6 @@ bash parsecmgmt -a run -p splash2x.fmm -i test .... -TODO: `splash2x.barnes` segfaults on `arsecmgmt -a run -p splash2x.fmm -i simsmall` inside QEMU. Why? Other benchmarks ran fine. - -.... -[PARSEC] [---------- Beginning of output ----------] -Generating input file input_1... -Running /parsec/ext/splash2x/apps/barnes/inst/arm-linux.gcc/bin/barnes 1 < input_1: -reading input file : - -Segmentation fault -.... - -However, while this is fine inside QEMU, it is not practical in gem5, since the `parsecmgmt` Bash scripts just takes too long to run in that case! - -So instead, you must find out the raw executable command, and run it manually yourself. - -This command can be found from the `Running` line that `parsecmgmt` outputs when running the programs. - -"Luckily", we run the run scripts while creating the image to extract the inputs, so you can just do a find in your shell history to find the run command and find a line of type: - -.... -Running /parsec/ext/splash2x/apps/fmm/inst/arm-linux.gcc/bin/fmm 1 < input_1: -.... - -which teaches you that you can run `fmm` as: - -.... -cd /parsec/ext/splash2x/apps/fmm/run -../inst/arm-linux.gcc/bin/fmm 1 < input_1 -.... - -We are also collecting more raw commands for testing at: link:parsec-benchmark/test.sh[] - -And so inside of `gem5`, you likely want to do: - -.... -cd /parsec/ext/splash2x/apps/fmm/run -m5 checkpoint -m5 resetstats && /parsec/ext/splash2x/apps/fmm/inst/arm-linux.gcc/bin/fmm 1 < input_1 && m5 dumpstats -.... - -You will always want to `cd` into the `run` directory first, which is where the input is located. - ====== PARSEC change the input size One limitation is that only one input size is available on the guest for a given build. diff --git a/build b/build index 39b1240..8d35b6a 100755 --- a/build +++ b/build @@ -1,19 +1,24 @@ #!/usr/bin/env bash set -eu arch=x86_64 +rm -f buildroot_config_fragment_cli +touch buildroot_config_fragment_cli configure=true -config_fragments=buildroot_config_fragment +config_fragments='buildroot_config_fragment buildroot_config_fragment_cli' extra_make_args='' gem5=false j="$(($(nproc) - 2))" post_script_args='' qemu_sdl='--enable-sdl --with-sdlabi=2.0' v=0 -while getopts 'a:Cgj:i:klp:qS:v' OPT; do +while getopts 'a:c:Cgj:i:klp:qS:v' OPT; do case "$OPT" in a) arch="$OPTARG" ;; + c) + echo "$OPTARG" >> buildroot_config_fragment_cli + ;; C) configure=false ;; diff --git a/buildroot_config_fragment_parsec b/buildroot_config_fragment_parsec index 896066e..fbde2e1 100644 --- a/buildroot_config_fragment_parsec +++ b/buildroot_config_fragment_parsec @@ -1,8 +1,13 @@ BR2_PACKAGE_PARSEC_BENCHMARK=y -#BR2_PACKAGE_PARSEC_BENCHMARK_BUILD_LIST="splash2x" + +#BR2_PACKAGE_PARSEC_BENCHMARK_BUILD_LIST="splash2x.fmm" #BR2_PACKAGE_PARSEC_BENCHMARK_INPUT_SIZE="simsmall" + # Because PARSEC + its data are huge. TODO: can't we automate calculating the size? # Problems will arise if someone tries to use two such benchmarks. # Cannot be selected automatically from Kconfig: # https://stackoverflow.com/questions/40309054/how-to-select-the-value-of-a-string-option-from-another-option-in-kbuild-kconfig/49096538#49096538 BR2_TARGET_ROOTFS_EXT2_SIZE="128M" + +#BR2_PACKAGE_PARSEC_BENCHMARK_PARSECMGMT=y +#BR2_TARGET_ROOTFS_EXT2_SIZE="1500M" diff --git a/kernel_config_fragment b/kernel_config_fragment index 4bb891d..422ac9c 100644 --- a/kernel_config_fragment +++ b/kernel_config_fragment @@ -63,10 +63,21 @@ CONFIG_TRACER_SNAPSHOT=y # 9P CONFIG_9P_FS=y CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NET_9P=y CONFIG_NET_9P_DEBUG=y CONFIG_NET_9P_VIRTIO=y +## Virtio. TODO: aarch64 hangs before boot if I do all of these. +#CONFIG_RPMSG_VIRTIO=y +#CONFIG_VIRTIO_BALLOON=y +#CONFIG_VIRTIO_BLK=y +#CONFIG_VIRTIO_BLK_SCSI=y +#CONFIG_VIRTIO_INPUT=y +#CONFIG_VIRTIO_PCI=y +#CONFIG_VIRTIO_VSOCKETS=y + ## Networking # Will everything blow up? diff --git a/kernel_module/external.mk b/kernel_module/external.mk index 22b1c3a..0db8306 100644 --- a/kernel_module/external.mk +++ b/kernel_module/external.mk @@ -9,17 +9,17 @@ KERNEL_MODULE_SITE = $(BR2_EXTERNAL_KERNEL_MODULE_PATH) KERNEL_MODULE_SITE_METHOD = local define KERNEL_MODULE_BUILD_CMDS - $(MAKE) -C '$(@D)/user' CC="$(TARGET_CC)" LD="$(TARGET_LD)" + $(MAKE) -C '$(@D)/user' CC="$(TARGET_CC)" LD="$(TARGET_LD)" endef define KERNEL_MODULE_INSTALL_TARGET_CMDS - # The modules are already installed by the kernel-module package type - # under /lib/modules/**, but let's also copy the modules to the root - # for insmod convenience. - # - # Modules can be still be easily inserted with "modprobe module" however. - $(INSTALL) -D -m 0655 $(@D)/*.ko '$(TARGET_DIR)' - $(INSTALL) -D -m 0755 $(@D)/user/*.out '$(TARGET_DIR)' + # The modules are already installed by the kernel-module package type + # under /lib/modules/**, but let's also copy the modules to the root + # for insmod convenience. + # + # Modules can be still be easily inserted with "modprobe module" however. + $(INSTALL) -D -m 0655 $(@D)/*.ko '$(TARGET_DIR)' + $(INSTALL) -D -m 0755 $(@D)/user/*.out '$(TARGET_DIR)' endef $(eval $(kernel-module)) diff --git a/parsec-benchmark/Config.in b/parsec-benchmark/Config.in index 139c4c2..f8731f1 100644 --- a/parsec-benchmark/Config.in +++ b/parsec-benchmark/Config.in @@ -1,12 +1,5 @@ config BR2_PACKAGE_PARSEC_BENCHMARK bool "PARSEC_BENCHMARK" -# Parsec shell scripts use a hardcoded /bin/bash -# One option would be to try and use /bin/sh. -# But symlinking fails because of BusyBox' symlink mechanism. -# The other option would be to patch Parsec to use /bin/sh and be POSIX compliant. -# But let's take the path of smallest resistance for now. - select BR2_PACKAGE_BUSYBOX_SHOW_OTHERS - select BR2_PACKAGE_BASH help Parsec system benchmark. @@ -20,6 +13,31 @@ config BR2_PACKAGE_PARSEC_BENCHMARK_BUILD_LIST help Space separated list of parsec packages to build. +config BR2_PACKAGE_PARSEC_BENCHMARK_PARSECMGMT + bool "parsecmgmt" + select BR2_PACKAGE_BUSYBOX_SHOW_OTHERS + select BR2_PACKAGE_BASH + help +# Needed if you want to run the full parsec benchmark inside the guest. +# +# Parsec shell scripts use a hardcoded /bin/bash +# +# One option would be to try and use /bin/sh. +# But symlinking fails because of BusyBox' symlink mechanism. +# The other option would be to patch Parsec to use /bin/sh and be POSIX compliant. +# But let's take the path of smallest resistance for now. +# +# This is a bit coarse and makes the image larger with useless source code. +# +# But according to du, the source accounts for only 1/5 of the total size, +# so benchmarks dominate, and it doesn't matter much. +# +# Also it is not so critical for simulators anyways unlike real embedded systems. +# +# One possibility to make this better may be to install only the 'inst/' and 'input/' +# folders for each package + toplevel '/bin/' and '/config/', but of course we won't +# know if this works until time consuming testing is done :-) + config BR2_PACKAGE_PARSEC_BENCHMARK_INPUT_SIZE string "input_size" default test diff --git a/parsec-benchmark/external.mk b/parsec-benchmark/external.mk index 30dd9f4..ab80e64 100644 --- a/parsec-benchmark/external.mk +++ b/parsec-benchmark/external.mk @@ -23,33 +23,29 @@ define PARSEC_BENCHMARK_BUILD_CMDS export TARGET_CROSS='$(TARGET_CROSS)'; \ export HOSTTYPE='$(BR2_ARCH)'; \ parsecmgmt -a build -p $$pkg; \ - parsecmgmt -a run -p $$pkg -i $(BR2_PACKAGE_PARSEC_BENCHMARK_INPUT_SIZE); \ + if [ ! '$(BR2_PACKAGE_PARSEC_BENCHMARK_PARSECMGMT)' = y ]; then \ + parsecmgmt -a run -p $$pkg -i $(BR2_PACKAGE_PARSEC_BENCHMARK_INPUT_SIZE); \ + fi \ done endef define PARSEC_BENCHMARK_INSTALL_TARGET_CMDS - # This is a bit coarse and makes the image larger with useless source code. - # - # But according to du, the source accounts for only 1/5 of the total size, - # so benchmarks dominate, and it doesn't matter much. - # - # Also it is not so critical for simulators anyways unlike real embedded systems. - # - # One possibility to make this better may be to install only the 'inst/' and 'input/' - # folders for each package + toplevel '/bin/' and '/config/', but of course we won't - # know if this works until time consuming testing is done :-) mkdir -p '$(TARGET_DIR)/parsec/' - $(INSTALL) -D -m 0755 '$(PARSEC_BENCHMARK_PKGDIR)/test.sh' '$(TARGET_DIR)/parsec/' - - rsync -am $(if $(filter $(V),1),-v,) --include '*/' \ - --include '/bin/***' \ - --include '/config/***' \ - --include '/env.sh' \ - --include 'inst/***' \ - --include 'run/***' \ - --exclude '*' '$(@D)/' '$(TARGET_DIR)/parsec/' \ - ; + if [ '$(BR2_PACKAGE_PARSEC_BENCHMARK_PARSECMGMT)' = y ]; then \ + rsync -am $(if $(filter $(V),1),-v,) \ + --exclude '**/obj/' \ + --exclude '**/run/' \ + --exclude '**/src/' \ + '$(@D)/' '$(TARGET_DIR)/parsec/' \ + ; \ + else \ + rsync -am $(if $(filter $(V),1),-v,) --include '*/' \ + --include 'inst/***' \ + --include 'run/***' \ + --exclude '*' '$(@D)/' '$(TARGET_DIR)/parsec/' \ + ; \ + fi # rsync finished. endef diff --git a/parsec-benchmark/test.sh b/parsec-benchmark/test.sh index a6483d7..9250c59 100755 --- a/parsec-benchmark/test.sh +++ b/parsec-benchmark/test.sh @@ -8,19 +8,56 @@ set -ex -ncpus=1 +ncpus="${1:-1}" + +## apps + +# TODO segfaulting. +#cd /parsec/ext/splash2x/apps/barnes/run +#../inst/*/bin/barnes 1 < input_1 cd /parsec/ext/splash2x/apps/fmm/run ../inst/*/bin/fmm "$ncpus" < input_1 +cd /parsec/ext/splash2x/apps/ocean_cp/run +../inst/*/bin/ocean_cp -n258 -p"$ncpus" -e1e-07 -r20000 -t28800 + +# TODO segfaulting. +# cd /parsec/ext/splash2x/apps/ocean_ncp/run +# ../inst/*/bin/ocean_ncp -n258 -p"$ncpus" -e1e-07 -r20000 -t28800 + +cd /parsec/ext/splash2x/apps/radiosity/run +../inst/*/bin/radiosity -bf 1.5e-1 -batch -room -p "$ncpus" + cd /parsec/ext/splash2x/apps/raytrace/run ../inst/*/bin/raytrace -s -p"$ncpus" -a4 teapot.env cd /parsec/ext/splash2x/apps/volrend/run -../inst/*/bin/volrend "$ncpus" head-scaleddown4 4 < input_1 +../inst/*/bin/volrend "$ncpus" head-scaleddown4 4 cd /parsec/ext/splash2x/apps/water_nsquared/run ../inst/*/bin/water_nsquared "$ncpus" < input_1 cd /parsec/ext/splash2x/apps/water_spatial/run ../inst/*/bin/water_spatial "$ncpus" < input_1 + +## kernels + +cd /parsec/ext/splash2x/kernels/cholesky/run +../inst/*/bin/cholesky -p"$ncpus" < tk14.O + +cd /parsec/ext/splash2x/kernels/cholesky/run +../inst/*/bin/cholesky "$ncpus" test + +cd /parsec/ext/splash2x/kernels/fft/run +../inst/*/bin/fft -m18 -p"$ncpus" + +cd /parsec/ext/splash2x/kernels/lu_cb/run +../inst/*/bin/lu_cb -p"$ncpus" -n512 -b16 + +cd /parsec/ext/splash2x/kernels/lu_ncb/run +../inst/*/bin/lu_ncb -p"$ncpus" -n512 -b16 + +# TODO ERROR: Cannot malloc enough memory for global +# cd /parsec/ext/splash2x/kernels/radix/run +# ../inst/*/bin/radix -p"$ncpus" -r4096 -n262144 -m524288