diff --git a/.gitmodules b/.gitmodules index 834b2a4..c577874 100644 --- a/.gitmodules +++ b/.gitmodules @@ -39,3 +39,6 @@ [submodule "submodules/xen"] path = submodules/xen url = https://github.com/cirosantilli/xen +[submodule "submodules/stream-benchmark"] + path = submodules/stream-benchmark + url = https://github.com/cirosantilli/stream-benchmark diff --git a/README.adoc b/README.adoc index a539e56..0a895a9 100644 --- a/README.adoc +++ b/README.adoc @@ -12115,6 +12115,8 @@ Indirect leak of 1346 byte(s) in 2 object(s) allocated from: From the message, this appears however to be a Python / pyenv11 bug however and not in gem5 specifically. I think it worked when I tried it in the past in an older gem5 / Ubuntu. +`--without-tcmalloc` is needed / a good idea when using `--with-asan`: https://stackoverflow.com/questions/42712555/address-sanitizer-fsanitize-address-works-with-tcmalloc since both do more or less similar jobs, see also <>. + ==== gem5 Ruby build Ruby is a system that includes the SLICC domain specific language to describe memory systems: http://gem5.org/Ruby @@ -12666,6 +12668,8 @@ Tested at b4879ae5b0b6644e6836b0881e4da05c64a6550d. ===== gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis +TODO: analyze better what each of the memory event mean. For now, we have just collected a bunch of data there, but needs interpreting. The CPU specifics in this section are already insightful however. + <> should be the second simplest CPU to analyze, so let's give it a try: .... @@ -14506,20 +14510,26 @@ Build and run on QEMU <>: .... git submodule update --init submodules/dhrystone -./build-dhrystone --mode userland +./build-dhrystone --optimization-level 3 ./run --userland "$(./getvar userland_build_dir)/submodules/dhrystone/dhrystone" .... +Increase the number of loops to try and reach more meaningful results: + +.... +./run --userland "$(./getvar userland_build_dir)/submodules/dhrystone/dhrystone" --userland-args 100000000 +.... + Build and run on gem5 user mode: .... -./build-dhrystone --mode userland --static --force-rebuild -./run --emulator gem5 --userland "$(./getvar userland_build_dir)/submodules/dhrystone/dhrystone" +./build-dhrystone --optimization-level 3 --static +./run --emulator gem5 --userland "$(./getvar --static userland_build_dir)/submodules/dhrystone/dhrystone" .... TODO automate run more nicely. -Build for <> execution and run it in baremetal QEMU: +Build for <> execution and run it in baremetal QEMU. TODO: fix the build, just need to factor out all run arguments from link:build-baremetal[] into link:common.py[] and it should just work, no missing syscalls. .... # Build our Newlib stubs. @@ -14528,8 +14538,6 @@ Build for <> execution and run it in baremetal QEMU: ./run --arch aarch64 --baremetal "$(./getvar baremetal_build_dir)/submodules/dhrystone/dhrystone" .... -TODO: fix the build, just need to factor out all run arguments from link:build-baremetal[] into link:common.py[] and it should just work, no missing syscalls. - If you really want the Buildroot package for some reason, build it with: .... @@ -14542,6 +14550,68 @@ and run inside the guest from `PATH` with: dhrystone .... +==== STREAM benchmark + +http://www.cs.virginia.edu/stream/ref.html + +Very simple memory width benchmark with one C and one Fortran version, originally published in 1991, and the latest version at the time of writing is from 2013. + +Its operation is very simple: fork one thread for each CPU in the system (using OpenMP) and do the following four array operations (4 separate loops of individual operations): + +.... +/* Copy. */ +times[0 * ntimes + k] = mysecond(); +#pragma omp parallel for +for (j=0; j>. Build and run on QEMU <>: + +.... +git submodule update --init submodules/stream-benchmark +./build-stream --optimization-level 3 +./run --userland "$(./getvar userland_build_dir)/submodules/stream-benchmark/stream_c.exe" +.... + +Decrease the benchmark size and the retry count to finish simulation faster, but possibly have a less representative result: + +.... +./run --userland "$(./getvar userland_build_dir)/submodules/stream-benchmark/stream_c.exe" --userland-args '100 2' +.... + +Build and run on gem5 user mode: + +.... +./build-stream --optimization-level 3 --static +./run --emulator gem5 --userland "$(./getvar --static userland_build_dir)/submodules/stream-benchmark/stream_c.exe" --userland-args '1000 2' +.... + ==== PARSEC benchmark We have ported parts of the http://parsec.cs.princeton.edu[PARSEC benchmark] for cross compilation at: https://github.com/cirosantilli/parsec-benchmark See the documentation on that repo to find out which benchmarks have been ported. Some of the benchmarks were are segfaulting, they are documented in that repo. diff --git a/build-dhrystone b/build-dhrystone index c32f8e9..9273b65 100755 --- a/build-dhrystone +++ b/build-dhrystone @@ -42,13 +42,13 @@ https://cirosantilli.com/linux-kernel-module-cheat#dhrystone '-j', str(self.env['nproc']), LF, '-C', os.path.join(self.env['submodules_dir'], 'dhrystone'), LF, 'CC={}'.format(self.env['gcc_path']), LF, - 'CFLAGS={}'.format(' '.join(cflags)), LF, + 'CFLAGS_EXTRA={}'.format(' '.join(cflags)), LF, 'EXTRA_OBJS={}'.format(' '.join(extra_objs)), LF, 'OUT_DIR={}'.format(build_dir), LF, ] + extra_flags ) - if ret == 0 and env['copy_overlay']: + if ret == 0 and self.env['copy_overlay']: self.sh.copy_file_if_update( os.path.join(build_dir, 'dhrystone'), os.path.join(self.env['out_rootfs_overlay_lkmc_dir'], self.root_relpath, 'dhrystone'), diff --git a/build-stream b/build-stream new file mode 100755 index 0000000..72a5ff9 --- /dev/null +++ b/build-stream @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +import os +import shutil + +import common +import shlex +from shell_helpers import LF + +class Main(common.BuildCliFunction): + def __init__(self): + super().__init__( + description='''\ +https://cirosantilli.com/linux-kernel-module-cheat#stream-benchmark +''' + ) + self._add_argument('--ccflags') + self._add_argument('--force-rebuild') + self._add_argument('--optimization-level') + + def setup(self, env): + self.root_relpath = os.path.join('submodules', 'stream-benchmark') + + def build(self): + build_dir = self.get_build_dir() + cflags = ['-O{}'.format(self.env['optimization_level'])] + extra_flags = [] + if self.env['static']: + cflags.extend(['-static']) + if self.env['force_rebuild']: + extra_flags.extend(['-B', LF]) + if self.env['mode'] == 'baremetal': + extra_objs = [ + self.env['baremetal_syscalls_obj'], + self.env['baremetal_syscalls_asm_obj'] + ] + else: + extra_objs = [] + ret = self.sh.run_cmd( + [ + 'make', LF, + '-j', str(self.env['nproc']), LF, + '-C', os.path.join(self.env['submodules_dir'], 'stream-benchmark'), LF, + 'CC={}'.format(self.env['gcc_path']), LF, + 'CFLAGS_EXTRA={}'.format(' '.join(cflags)), LF, + 'EXTRA_OBJS={}'.format(' '.join(extra_objs)), LF, + 'FC={}'.format(self.env['gfortran_path']), LF, + 'OUT_DIR={}'.format(build_dir), LF, + ] + + extra_flags + ) + if ret == 0 and self.env['copy_overlay']: + self.sh.copy_file_if_update( + os.path.join(build_dir, 'stream_c.exe'), + os.path.join(self.env['out_rootfs_overlay_lkmc_dir'], self.root_relpath, 'stream-benchmark'), + ) + return ret + + def get_build_dir(self): + return os.path.join(self.env['build_dir'], self.root_relpath) + +if __name__ == '__main__': + Main().cli() diff --git a/common.py b/common.py index 93f3f3b..c3ffc77 100644 --- a/common.py +++ b/common.py @@ -1011,7 +1011,7 @@ Incompatible archs are skipped. ) if env['mode'] == 'baremetal': env['build_dir'] = env['baremetal_build_dir'] - elif env['mode'] == 'userland': + else: env['build_dir'] = env['userland_build_dir'] # Docker @@ -1119,6 +1119,7 @@ lunch aosp_{}-eng env['toolchain_prefix_dash'] = '' else: env['toolchain_prefix_dash'] = '{}-'.format(env['toolchain_prefix']) + env['gfortran_path'] = self.get_toolchain_tool('gfortran') env['gcc_path'] = self.get_toolchain_tool('gcc') env['gxx_path'] = self.get_toolchain_tool('g++') env['ld_path'] = self.get_toolchain_tool('ld') diff --git a/submodules/stream-benchmark b/submodules/stream-benchmark new file mode 160000 index 0000000..eaa0a90 --- /dev/null +++ b/submodules/stream-benchmark @@ -0,0 +1 @@ +Subproject commit eaa0a90ded985a1cb738f1b811ba0531bfa46a03 diff --git a/userland/c/multidimentional_array.c b/userland/c/multidimentional_array.c new file mode 100644 index 0000000..b784c86 --- /dev/null +++ b/userland/c/multidimentional_array.c @@ -0,0 +1,34 @@ +/* https://cirosantilli.com/linux-kernel-module-cheat#c + * + * Multidimentional arrays are generally a bad idea that confuses + * everyone, use single dimentional arrays + indexing if possible. + * But here goes nothing. + */ + +#include +#include +#include + +int main(void) { + /* Initialized in the code. */ + { + /* We can skip the first dimension as it is inferred. */ + int is[][3] = { + {1, 2, 3,}, + {4, 5, 5,}, + }; + assert(is[0][0] == 1); + assert(is[0][1] == 2); + assert(is[1][0] == 4); + + /* We can get the total sizes of either the entire array, + * or of just on row. */ + assert(sizeof(is) == 6 * sizeof(is[0][0])); + assert(sizeof(is[0]) == 3 * sizeof(is[0][0])); + + /* Multi dimentional arrays are contiguous and row major. */ + assert(&is[0][1] - &is[0][0] == 1); + assert(&is[1][0] - &is[0][0] == 3); + } + return EXIT_SUCCESS; +}