mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-23 02:05:57 +01:00
userland/arch/aarch64/inline_asm/reg_var.c: use 64 bit variables
32-bit ones likely copy paste error from coming from arm v7. Also create userland/arch/aarch64/inline_asm/int_32.c: Also create aarch64_ldaxr_stlxr.cpp and start documenting LDAXR and STLXR.
This commit is contained in:
63
README.adoc
63
README.adoc
@@ -12056,7 +12056,7 @@ The exact same can be achieved with the older hardcoded `--maxinsts` mechanism p
|
|||||||
|
|
||||||
Other related fs.py options are:
|
Other related fs.py options are:
|
||||||
|
|
||||||
* `--abs-max-tick`: set the maximum number of ticks rather than instructions
|
* `--abs-max-tick`: set the maximum guest simulation time. The same scale as the ExecAll trace is used. E.g., for the above example with 3 instructions, the same trace would be achieved with a value of 3000.
|
||||||
|
|
||||||
The message also shows on <<user-mode-simulation>> deadlocks, for example in link:userland/posix/pthread_deadlock.c[]:
|
The message also shows on <<user-mode-simulation>> deadlocks, for example in link:userland/posix/pthread_deadlock.c[]:
|
||||||
|
|
||||||
@@ -13988,6 +13988,7 @@ In this set of examples, we exemplify various synchronization mechanisms, includ
|
|||||||
|
|
||||||
* link:userland/cpp/atomic/main.hpp[]: contains all the code which is then specialized in separated `.cpp` files with macros
|
* link:userland/cpp/atomic/main.hpp[]: contains all the code which is then specialized in separated `.cpp` files with macros
|
||||||
* link:userland/cpp/atomic/aarch64_add.cpp[]: non synchronized aarch64 inline assembly
|
* link:userland/cpp/atomic/aarch64_add.cpp[]: non synchronized aarch64 inline assembly
|
||||||
|
* link:userland/cpp/atomic/aarch64_ldaxr_stlxr.cpp[]: see: <<arm-ldxr-and-stxr-instructions>>
|
||||||
* link:userland/cpp/atomic/aarch64_ldadd.cpp[]: synchronized aarch64 inline assembly with the <<arm-lse>> LDADD instruction
|
* link:userland/cpp/atomic/aarch64_ldadd.cpp[]: synchronized aarch64 inline assembly with the <<arm-lse>> LDADD instruction
|
||||||
* link:userland/cpp/atomic/fail.cpp[]: non synchronized C++ operator `++`
|
* link:userland/cpp/atomic/fail.cpp[]: non synchronized C++ operator `++`
|
||||||
* link:userland/cpp/atomic/mutex.cpp[]: synchronized `std::mutex`. `std;`
|
* link:userland/cpp/atomic/mutex.cpp[]: synchronized `std::mutex`. `std;`
|
||||||
@@ -14287,7 +14288,9 @@ The following sections are related to multithreading in userland:
|
|||||||
** <<pthreads>>
|
** <<pthreads>>
|
||||||
* ISA topics:
|
* ISA topics:
|
||||||
** <<x86-thread-synchronization-primitives>>
|
** <<x86-thread-synchronization-primitives>>
|
||||||
** <<arm-lse>>
|
** <<arm-thread-synchronization-primitives>>
|
||||||
|
*** <<arm-ldxr-stxr>>
|
||||||
|
*** <<arm-lse>>
|
||||||
* emulator topics:
|
* emulator topics:
|
||||||
** <<qemu-user-mode-multithreading>>
|
** <<qemu-user-mode-multithreading>>
|
||||||
** <<gem5-syscall-emulation-multithreading>>
|
** <<gem5-syscall-emulation-multithreading>>
|
||||||
@@ -15338,6 +15341,7 @@ Examples under `arch/<arch>/c/` directories show to how use inline assembly from
|
|||||||
* aarch64
|
* aarch64
|
||||||
** link:userland/arch/aarch64/inline_asm/earlyclobber.c[]
|
** link:userland/arch/aarch64/inline_asm/earlyclobber.c[]
|
||||||
** link:userland/arch/aarch64/inline_asm/inc.c[]
|
** link:userland/arch/aarch64/inline_asm/inc.c[]
|
||||||
|
** link:userland/arch/aarch64/inline_asm/inc_32.c[]: how to use 32-bit `w` registers in aarch64. We have to add `w` to the `%` as in `%w[io]` instead of `%[io]`
|
||||||
** link:userland/arch/aarch64/inline_asm/multiline.cpp[]
|
** link:userland/arch/aarch64/inline_asm/multiline.cpp[]
|
||||||
|
|
||||||
==== GCC inline assembly register variables
|
==== GCC inline assembly register variables
|
||||||
@@ -17468,25 +17472,54 @@ We then download the zip from: https://developer.arm.com/docs/ddi0584/latest/arm
|
|||||||
|
|
||||||
That document then describes the SVE instructions and registers.
|
That document then describes the SVE instructions and registers.
|
||||||
|
|
||||||
|
=== ARM thread synchronization primitives
|
||||||
|
|
||||||
|
Parent section: <<userland-multithreading>>.
|
||||||
|
|
||||||
|
==== ARM LDXR and STXR instructions
|
||||||
|
|
||||||
|
Parent section: <<atomic-cpp>>
|
||||||
|
|
||||||
|
link:userland/cpp/atomic/aarch64_ldaxr_stlxr.cpp[]
|
||||||
|
|
||||||
|
LDXR and STXR vs LDAXR and STLXR: https://stackoverflow.com/questions/21535058/arm64-ldxr-stxr-vs-ldaxr-stlxr TODO understand better and example.
|
||||||
|
|
||||||
|
LDXR and STXR for a so-called "Load-link/store-conditional" (LLSC) pattern: https://en.wikipedia.org/wiki/Load-link/store-conditional which appears in many RISC ISAs.
|
||||||
|
|
||||||
|
This pattern makes it such that basically:
|
||||||
|
|
||||||
|
* LDXR marks an address for exclusive access by the current CPU
|
||||||
|
* STXR:
|
||||||
|
** marks the address as not being exclusive to other CPUs that may have done LDXR before
|
||||||
|
** loads fine if the address is still marked as exclusive, and stores 0 on a third register for success
|
||||||
|
** fails to load if the address is not, and stores 1 on the third register for failure
|
||||||
|
|
||||||
|
In case of failure, we just have to loop back to just before the LDXR and try again.
|
||||||
|
|
||||||
|
This is therefore basically a spinlock and should only be used to cover very short critical sections such as atomic increments.
|
||||||
|
|
||||||
|
C++ `std::atomic` uses this for increments before v8.1 <<arm-lse>>: https://stackoverflow.com/questions/56810/how-do-i-start-threads-in-plain-c/52453291#52453291
|
||||||
|
|
||||||
|
[[arm-lse]]
|
||||||
|
==== ARM Large System Extensions (LSE)
|
||||||
|
|
||||||
|
Set of atomic and synchronization primitives added in <<armv8-1-architecture-extension>>.
|
||||||
|
|
||||||
|
Documented at <<armarm8-db>> "ARMv8.1-LSE, ARMv8.1 Large System Extensions"
|
||||||
|
|
||||||
|
* LDADD: link:userland/cpp/atomic/aarch64_ldadd.cpp[], see also: <<atomic-cpp>>. Kernel inspiration: https://github.com/torvalds/linux/blob/v5.4/arch/arm64/include/asm/atomic_lse.h#L56
|
||||||
|
|
||||||
|
Bibliography:
|
||||||
|
|
||||||
|
* https://preshing.com/20120710/memory-barriers-are-like-source-control-operations/
|
||||||
|
|
||||||
=== ARMv8 architecture extensions
|
=== ARMv8 architecture extensions
|
||||||
|
|
||||||
==== ARMv8.1 architecture extension
|
==== ARMv8.1 architecture extension
|
||||||
|
|
||||||
<<armarm8-db>> A1.7.3 "The ARMv8.1 architecture extension"
|
<<armarm8-db>> A1.7.3 "The ARMv8.1 architecture extension"
|
||||||
|
|
||||||
[[arm-lse]]
|
* <<arm-lse>>
|
||||||
===== ARM Large System Extensions (LSE)
|
|
||||||
|
|
||||||
Parent section: <<arm-baremetal-multicore>>.
|
|
||||||
|
|
||||||
<<armarm8-db>> "ARMv8.1-LSE, ARMv8.1 Large System Extensions"
|
|
||||||
|
|
||||||
* LDADD: link:userland/cpp/atomic/aarch64_ldadd.cpp[], see also: <<atomic-cpp>>. Kernel inspiration: https://github.com/torvalds/linux/blob/v5.4/arch/arm64/include/asm/atomic_lse.h#L56
|
|
||||||
|
|
||||||
Bibliography:
|
|
||||||
|
|
||||||
* https://stackoverflow.com/questions/21535058/arm64-ldxr-stxr-vs-ldaxr-stlxr
|
|
||||||
* https://preshing.com/20120710/memory-barriers-are-like-source-control-operations/
|
|
||||||
|
|
||||||
=== ARM assembly bibliography
|
=== ARM assembly bibliography
|
||||||
|
|
||||||
|
|||||||
15
userland/arch/aarch64/inline_asm/inc_32.c
Normal file
15
userland/arch/aarch64/inline_asm/inc_32.c
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
/* https://cirosantilli.com/linux-kernel-module-cheat#gcc-inline-assembly */
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
uint32_t io = 1;
|
||||||
|
__asm__ (
|
||||||
|
"add %w[io], %w[io], 1;"
|
||||||
|
: [io] "+r" (io)
|
||||||
|
:
|
||||||
|
:
|
||||||
|
);
|
||||||
|
assert(io == 2);
|
||||||
|
}
|
||||||
@@ -4,10 +4,10 @@
|
|||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
register uint32_t x0 __asm__ ("x0");
|
register uint64_t x0 __asm__ ("x0");
|
||||||
register uint32_t x1 __asm__ ("x1");
|
register uint64_t x1 __asm__ ("x1");
|
||||||
uint32_t new_x0;
|
uint64_t new_x0;
|
||||||
uint32_t new_x1;
|
uint64_t new_x1;
|
||||||
{
|
{
|
||||||
x0 = 1;
|
x0 = 1;
|
||||||
x1 = 2;
|
x1 = 2;
|
||||||
|
|||||||
2
userland/cpp/atomic/aarch64_ldaxr_stlxr.cpp
Normal file
2
userland/cpp/atomic/aarch64_ldaxr_stlxr.cpp
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
#define LKMC_USERLAND_ATOMIC_LDAXR_STLXR 1
|
||||||
|
#include "main.hpp"
|
||||||
@@ -51,11 +51,28 @@ void threadMain() {
|
|||||||
:
|
:
|
||||||
:
|
:
|
||||||
);
|
);
|
||||||
|
#elif LKMC_USERLAND_ATOMIC_LDAXR_STLXR
|
||||||
|
// Was used by std::atomic before LDADD was added
|
||||||
|
uint64_t scratch64;
|
||||||
|
uint64_t scratch32;
|
||||||
|
__asm__ __volatile__ (
|
||||||
|
"1:"
|
||||||
|
"ldaxr %[scratch64], [%[addr]];"
|
||||||
|
"add %[scratch64], %[scratch64], 1;"
|
||||||
|
"stlxr %w[scratch32], %[scratch64], [%[addr]];"
|
||||||
|
"cbnz %w[scratch32], 1b;"
|
||||||
|
: "=m" (global), // indicate that global is modified
|
||||||
|
"+g" (i), // to prevent loop unrolling
|
||||||
|
[scratch64] "=&r" (scratch64),
|
||||||
|
[scratch32] "=&r" (scratch32)
|
||||||
|
: [addr] "r" (&global)
|
||||||
|
:
|
||||||
|
);
|
||||||
#elif LKMC_USERLAND_ATOMIC_AARCH64_LDADD
|
#elif LKMC_USERLAND_ATOMIC_AARCH64_LDADD
|
||||||
// https://cirosantilli.com/linux-kernel-module-cheat#arm-lse
|
// https://cirosantilli.com/linux-kernel-module-cheat#arm-lse
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
"ldadd %[inc], xzr, [%[addr]];"
|
"ldadd %[inc], xzr, [%[addr]];"
|
||||||
: "=m" (global),
|
: "=m" (global), // indicate that global is modified
|
||||||
"+g" (i) // to prevent loop unrolling
|
"+g" (i) // to prevent loop unrolling
|
||||||
: [inc] "r" (1),
|
: [inc] "r" (1),
|
||||||
[addr] "r" (&global)
|
[addr] "r" (&global)
|
||||||
|
|||||||
Reference in New Issue
Block a user