mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-23 02:05:57 +01:00
userland/arch/aarch64/freestanding/linux/gem5_exit.S to benchmark immediate gem5 exit
ldxr_stxr does an ldxr on thread 0 to fall in the ISA case where event is certain. Also add ldxr_str which tests yet another case where event is certain: non conditional write.
This commit is contained in:
63
README.adoc
63
README.adoc
@@ -9951,7 +9951,9 @@ The `--verbose` is optional, but shows clearly each GCC build command so that yo
|
||||
|
||||
The build outputs are automatically stored in a different directories for optimized and debug builds, which prevents `debug` files from overwriting `opt` ones. Therefore, `--gem5-build-id` is not required.
|
||||
|
||||
The price to pay for debuggability is high however: a Linux kernel boot was about 3x slower in QEMU and 14 times slower in gem5 debug compared to opt, see benchmarks at: xref:benchmark-linux-kernel-boot[xrefstyle=full]
|
||||
The price to pay for debuggability is high however: a Linux kernel boot was about 3x slower in QEMU and 14 times slower in gem5 debug compared to opt, see benchmarks at: xref:benchmark-linux-kernel-boot[xrefstyle=full].
|
||||
|
||||
Similar slowdowns can be observed at: xref:benchmark-emulators-on-userland-executables[xrefstyle=full].
|
||||
|
||||
When in <<qemu-text-mode>>, using `--debug-vm` makes Ctrl-C not get passed to the QEMU guest anymore: it is instead captured by GDB itself, so allow breaking. So e.g. you won't be able to easily quit from a guest program like:
|
||||
|
||||
@@ -14069,7 +14071,9 @@ Programs under link:userland/cpp/[] are examples of https://en.wikipedia.org/wik
|
||||
* random
|
||||
** link:userland/cpp/random.cpp[]
|
||||
* containers
|
||||
** link:userland/cpp/set.cpp[]: `std::set` contains unique keys
|
||||
** associative
|
||||
*** <<algorithms>> contains a benchmark comparison of different c++ containers
|
||||
*** link:userland/cpp/set.cpp[]: `std::set` contains unique keys
|
||||
|
||||
[[cpp-multithreading]]
|
||||
==== C++ multithreading
|
||||
@@ -18715,7 +18719,10 @@ For how userland spinlocks and mutexes are implemented see <<userland-mutex-impl
|
||||
|
||||
====== ARM WFE global monitor events
|
||||
|
||||
link:userland/arch/aarch64/inline_asm/wfe_ldxr_stxr.cpp[]
|
||||
Examples:
|
||||
|
||||
* link:userland/arch/aarch64/inline_asm/wfe_ldxr_stxr.cpp[]
|
||||
* link:userland/arch/aarch64/inline_asm/wfe_ldxr_str.cpp[]
|
||||
|
||||
SEV is not the only thing that can wake up a WFE, it is only an explicit software way to do it.
|
||||
|
||||
@@ -19544,9 +19551,9 @@ cd -
|
||||
./bench-all -A
|
||||
....
|
||||
|
||||
=== Continuous integraion
|
||||
=== Continuous integration
|
||||
|
||||
We have exploreed a few Continuous integration solutions.
|
||||
We have explored a few Continuous integration solutions.
|
||||
|
||||
We haven't setup any of them yet.
|
||||
|
||||
@@ -19556,7 +19563,7 @@ We tried to automate it on Travis with link:.travis.yml[] but it hits the curren
|
||||
|
||||
==== CircleCI
|
||||
|
||||
This setup sucessfully built gem5 on every commit: link:.circleci/config.yml[]
|
||||
This setup successfully built gem5 on every commit: link:.circleci/config.yml[]
|
||||
|
||||
Enabling it is however blocked on: https://github.com/cirosantilli/linux-kernel-module-cheat/issues/79 so we disabled the builds on the web UI.
|
||||
|
||||
@@ -19714,6 +19721,14 @@ Summary of manually collected results on <<p51>> at LKMC a18f28e263c91362519ef55
|
||||
|1.10018162 * 10^8
|
||||
|1
|
||||
|
||||
|a18f28e263c91362519ef550150b5c9d75fa3679 + 1
|
||||
|link:userland/gcc/busy_loop.c[] `-O0`
|
||||
|`gem5 --arch aarch64 --gem5-build-id debug`
|
||||
|10^5
|
||||
|32
|
||||
|2.528728 * 10^6
|
||||
|0.08
|
||||
|
||||
|a18f28e263c91362519ef550150b5c9d75fa3679 + 1
|
||||
|link:userland/gcc/busy_loop.c[] `-O0`
|
||||
|`+gem5 --arch aarch64 -- --cpu-type MinorCPU --caches+`
|
||||
@@ -19755,7 +19770,7 @@ Summary of manually collected results on <<p51>> at LKMC a18f28e263c91362519ef55
|
||||
|1.6
|
||||
|
||||
|ab6f7331406b22f8ab6e2df5f8b8e464fb35b611
|
||||
|link:userland/c/m5ops.c[] `-O0`
|
||||
|glibc C pre-main link:userland/c/m5ops.c[] `-O0`
|
||||
|`gem5 --arch aarch64 --userland-args e`
|
||||
|1
|
||||
|2
|
||||
@@ -19763,13 +19778,45 @@ Summary of manually collected results on <<p51>> at LKMC a18f28e263c91362519ef55
|
||||
|0.05
|
||||
|
||||
|ab6f7331406b22f8ab6e2df5f8b8e464fb35b611
|
||||
|link:userland/cpp/m5ops.cpp[] `-O0`
|
||||
|glibc C pre-main link:userland/c/m5ops.c[] `-O0`
|
||||
|`gem5 --arch aarch64 --userland-args e --gem5-build-type debug`
|
||||
|1
|
||||
|2
|
||||
|1.26479 * 10^5
|
||||
|0.05
|
||||
|
||||
|ab6f7331406b22f8ab6e2df5f8b8e464fb35b611
|
||||
|glibc C++ pre-main link:userland/cpp/m5ops.cpp[] `-O0`
|
||||
|`gem5 --arch aarch64 --userland-args e`
|
||||
|1
|
||||
|2
|
||||
|2.385012 * 10^6
|
||||
|1
|
||||
|
||||
|ab6f7331406b22f8ab6e2df5f8b8e464fb35b611
|
||||
|glibc C++ pre-main link:userland/cpp/m5ops.cpp[] `-O0`
|
||||
|`gem5 --arch aarch64 --userland-args e --gem5-build-type debug`
|
||||
|1
|
||||
|25
|
||||
|2.385012 * 10^6
|
||||
|0.1
|
||||
|
||||
|ab6f7331406b22f8ab6e2df5f8b8e464fb35b611
|
||||
|immediate exit link:userland/arch/aarch64/freestanding/linux/gem5_exit.S[] `-O0`
|
||||
|`gem5 --arch aarch64`
|
||||
|1
|
||||
|1
|
||||
|
|
||||
|
|
||||
|
||||
|ab6f7331406b22f8ab6e2df5f8b8e464fb35b611
|
||||
|immediate exit link:userland/arch/aarch64/freestanding/linux/gem5_exit.S[] `-O0`
|
||||
|`gem5 --arch aarch64 --gem5-build-type debug`
|
||||
|1
|
||||
|1
|
||||
|
|
||||
|
|
||||
|
||||
|===
|
||||
|
||||
The first step is to determine a number of loops that will run long enough to have meaningful results, but not too long that we will get bored, so about 1 minute.
|
||||
|
||||
@@ -542,6 +542,7 @@ path_properties_tuples = (
|
||||
'linux': (
|
||||
{},
|
||||
{
|
||||
'gem5_exit.S': {'allowed_emulators': {'gem5'}},
|
||||
'wfe.S': {'more_than_1s': True},
|
||||
'wfe_wfe.S': {'more_than_1s': True},
|
||||
}
|
||||
|
||||
10
userland/arch/aarch64/freestanding/linux/gem5_exit.S
Normal file
10
userland/arch/aarch64/freestanding/linux/gem5_exit.S
Normal file
@@ -0,0 +1,10 @@
|
||||
/* https://cirosantilli.com/linux-kernel-module-cheat#benchmark-emulators-on-userland-executables */
|
||||
|
||||
#define LKMC_M5OPS_ENABLE 1
|
||||
#include "lkmc/m5ops.h"
|
||||
|
||||
.text
|
||||
.global _start
|
||||
_start:
|
||||
asm_main_after_prologue:
|
||||
LKMC_M5OPS_EXIT_ASM
|
||||
35
userland/arch/aarch64/inline_asm/wfe_ldxr_str.cpp
Normal file
35
userland/arch/aarch64/inline_asm/wfe_ldxr_str.cpp
Normal file
@@ -0,0 +1,35 @@
|
||||
// https://cirosantilli.com/linux-kernel-module-cheat#arm-wfe-global-monitor-events
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
#include <lkmc/futex.h>
|
||||
|
||||
std::atomic_ulong done;
|
||||
int futex = 1;
|
||||
|
||||
void myfunc() {
|
||||
__asm__ __volatile__ ("ldxr x0, [%0];wfe;wfe" : : "r" (&futex) : "x0");
|
||||
done.store(futex);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
bool do_sev = true;
|
||||
if (argc > 1) {
|
||||
do_sev = (argv[1][0] != '0');
|
||||
}
|
||||
done.store(0);
|
||||
std::thread thread;
|
||||
thread = std::thread(myfunc);
|
||||
while (!done.load()) {
|
||||
if (do_sev) {
|
||||
__asm__ __volatile__ ("mov x0, 1;str x0, [%0]" : : "r" (&futex) : "x0", "x1");
|
||||
}
|
||||
}
|
||||
thread.join();
|
||||
}
|
||||
@@ -28,7 +28,7 @@ int main(int argc, char **argv) {
|
||||
thread = std::thread(myfunc);
|
||||
while (!done.load()) {
|
||||
if (do_sev) {
|
||||
__asm__ __volatile__ ("mov x0, 1;stxr w1, x0, [%0]" : : "r" (&futex) : "x0", "x1");
|
||||
__asm__ __volatile__ ("mov x0, 1;ldxr x0, [%0];stxr w1, x0, [%0]" : : "r" (&futex) : "x0", "x1");
|
||||
}
|
||||
}
|
||||
thread.join();
|
||||
|
||||
Reference in New Issue
Block a user