From 3b3f073e2c6067baa1a5b1461329c79bceeb963e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?= =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= Date: Fri, 10 Jan 2020 00:00:00 +0000 Subject: [PATCH] userland/arch/aarch64/freestanding/linux/gem5_exit.S to benchmark immediate gem5 exit ldxr_stxr does an ldxr on thread 0 to fall in the ISA case where event is certain. Also add ldxr_str which tests yet another case where event is certain: non conditional write. --- README.adoc | 63 ++++++++++++++++--- path_properties.py | 1 + .../aarch64/freestanding/linux/gem5_exit.S | 10 +++ .../arch/aarch64/inline_asm/wfe_ldxr_str.cpp | 35 +++++++++++ .../arch/aarch64/inline_asm/wfe_ldxr_stxr.cpp | 2 +- 5 files changed, 102 insertions(+), 9 deletions(-) create mode 100644 userland/arch/aarch64/freestanding/linux/gem5_exit.S create mode 100644 userland/arch/aarch64/inline_asm/wfe_ldxr_str.cpp diff --git a/README.adoc b/README.adoc index f328abe..dc3e426 100644 --- a/README.adoc +++ b/README.adoc @@ -9951,7 +9951,9 @@ The `--verbose` is optional, but shows clearly each GCC build command so that yo The build outputs are automatically stored in a different directories for optimized and debug builds, which prevents `debug` files from overwriting `opt` ones. Therefore, `--gem5-build-id` is not required. -The price to pay for debuggability is high however: a Linux kernel boot was about 3x slower in QEMU and 14 times slower in gem5 debug compared to opt, see benchmarks at: xref:benchmark-linux-kernel-boot[xrefstyle=full] +The price to pay for debuggability is high however: a Linux kernel boot was about 3x slower in QEMU and 14 times slower in gem5 debug compared to opt, see benchmarks at: xref:benchmark-linux-kernel-boot[xrefstyle=full]. + +Similar slowdowns can be observed at: xref:benchmark-emulators-on-userland-executables[xrefstyle=full]. When in <>, using `--debug-vm` makes Ctrl-C not get passed to the QEMU guest anymore: it is instead captured by GDB itself, so allow breaking. So e.g. you won't be able to easily quit from a guest program like: @@ -14069,7 +14071,9 @@ Programs under link:userland/cpp/[] are examples of https://en.wikipedia.org/wik * random ** link:userland/cpp/random.cpp[] * containers -** link:userland/cpp/set.cpp[]: `std::set` contains unique keys +** associative +*** <> contains a benchmark comparison of different c++ containers +*** link:userland/cpp/set.cpp[]: `std::set` contains unique keys [[cpp-multithreading]] ==== C++ multithreading @@ -18715,7 +18719,10 @@ For how userland spinlocks and mutexes are implemented see <> at LKMC a18f28e263c91362519ef55 |1.10018162 * 10^8 |1 +|a18f28e263c91362519ef550150b5c9d75fa3679 + 1 +|link:userland/gcc/busy_loop.c[] `-O0` +|`gem5 --arch aarch64 --gem5-build-id debug` +|10^5 +|32 +|2.528728 * 10^6 +|0.08 + |a18f28e263c91362519ef550150b5c9d75fa3679 + 1 |link:userland/gcc/busy_loop.c[] `-O0` |`+gem5 --arch aarch64 -- --cpu-type MinorCPU --caches+` @@ -19755,7 +19770,7 @@ Summary of manually collected results on <> at LKMC a18f28e263c91362519ef55 |1.6 |ab6f7331406b22f8ab6e2df5f8b8e464fb35b611 -|link:userland/c/m5ops.c[] `-O0` +|glibc C pre-main link:userland/c/m5ops.c[] `-O0` |`gem5 --arch aarch64 --userland-args e` |1 |2 @@ -19763,13 +19778,45 @@ Summary of manually collected results on <> at LKMC a18f28e263c91362519ef55 |0.05 |ab6f7331406b22f8ab6e2df5f8b8e464fb35b611 -|link:userland/cpp/m5ops.cpp[] `-O0` +|glibc C pre-main link:userland/c/m5ops.c[] `-O0` +|`gem5 --arch aarch64 --userland-args e --gem5-build-type debug` +|1 +|2 +|1.26479 * 10^5 +|0.05 + +|ab6f7331406b22f8ab6e2df5f8b8e464fb35b611 +|glibc C++ pre-main link:userland/cpp/m5ops.cpp[] `-O0` |`gem5 --arch aarch64 --userland-args e` |1 |2 |2.385012 * 10^6 |1 +|ab6f7331406b22f8ab6e2df5f8b8e464fb35b611 +|glibc C++ pre-main link:userland/cpp/m5ops.cpp[] `-O0` +|`gem5 --arch aarch64 --userland-args e --gem5-build-type debug` +|1 +|25 +|2.385012 * 10^6 +|0.1 + +|ab6f7331406b22f8ab6e2df5f8b8e464fb35b611 +|immediate exit link:userland/arch/aarch64/freestanding/linux/gem5_exit.S[] `-O0` +|`gem5 --arch aarch64` +|1 +|1 +| +| + +|ab6f7331406b22f8ab6e2df5f8b8e464fb35b611 +|immediate exit link:userland/arch/aarch64/freestanding/linux/gem5_exit.S[] `-O0` +|`gem5 --arch aarch64 --gem5-build-type debug` +|1 +|1 +| +| + |=== The first step is to determine a number of loops that will run long enough to have meaningful results, but not too long that we will get bored, so about 1 minute. diff --git a/path_properties.py b/path_properties.py index 2acd4b5..7eacec9 100644 --- a/path_properties.py +++ b/path_properties.py @@ -542,6 +542,7 @@ path_properties_tuples = ( 'linux': ( {}, { + 'gem5_exit.S': {'allowed_emulators': {'gem5'}}, 'wfe.S': {'more_than_1s': True}, 'wfe_wfe.S': {'more_than_1s': True}, } diff --git a/userland/arch/aarch64/freestanding/linux/gem5_exit.S b/userland/arch/aarch64/freestanding/linux/gem5_exit.S new file mode 100644 index 0000000..ba4842d --- /dev/null +++ b/userland/arch/aarch64/freestanding/linux/gem5_exit.S @@ -0,0 +1,10 @@ +/* https://cirosantilli.com/linux-kernel-module-cheat#benchmark-emulators-on-userland-executables */ + +#define LKMC_M5OPS_ENABLE 1 +#include "lkmc/m5ops.h" + +.text +.global _start +_start: +asm_main_after_prologue: + LKMC_M5OPS_EXIT_ASM diff --git a/userland/arch/aarch64/inline_asm/wfe_ldxr_str.cpp b/userland/arch/aarch64/inline_asm/wfe_ldxr_str.cpp new file mode 100644 index 0000000..3e1bee9 --- /dev/null +++ b/userland/arch/aarch64/inline_asm/wfe_ldxr_str.cpp @@ -0,0 +1,35 @@ +// https://cirosantilli.com/linux-kernel-module-cheat#arm-wfe-global-monitor-events + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include + +#include + +std::atomic_ulong done; +int futex = 1; + +void myfunc() { + __asm__ __volatile__ ("ldxr x0, [%0];wfe;wfe" : : "r" (&futex) : "x0"); + done.store(futex); +} + +int main(int argc, char **argv) { + bool do_sev = true; + if (argc > 1) { + do_sev = (argv[1][0] != '0'); + } + done.store(0); + std::thread thread; + thread = std::thread(myfunc); + while (!done.load()) { + if (do_sev) { + __asm__ __volatile__ ("mov x0, 1;str x0, [%0]" : : "r" (&futex) : "x0", "x1"); + } + } + thread.join(); +} diff --git a/userland/arch/aarch64/inline_asm/wfe_ldxr_stxr.cpp b/userland/arch/aarch64/inline_asm/wfe_ldxr_stxr.cpp index 4366d20..a2d63c6 100644 --- a/userland/arch/aarch64/inline_asm/wfe_ldxr_stxr.cpp +++ b/userland/arch/aarch64/inline_asm/wfe_ldxr_stxr.cpp @@ -28,7 +28,7 @@ int main(int argc, char **argv) { thread = std::thread(myfunc); while (!done.load()) { if (do_sev) { - __asm__ __volatile__ ("mov x0, 1;stxr w1, x0, [%0]" : : "r" (&futex) : "x0", "x1"); + __asm__ __volatile__ ("mov x0, 1;ldxr x0, [%0];stxr w1, x0, [%0]" : : "r" (&futex) : "x0", "x1"); } } thread.join();