From ce3d546ac82201e2f346bc3eb20429e6af4f7e28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?= =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= Date: Wed, 26 Jun 2019 00:00:00 +0000 Subject: [PATCH] x86 asm: move exchange instructions from x86-assembly-cheat --- README.adoc | 48 +++++++++++++++++++++++++++++++++- userland/arch/x86_64/cmpxchg.S | 28 ++++++++++++++++++++ userland/arch/x86_64/xadd.S | 11 ++++++++ userland/arch/x86_64/xchg.S | 16 ++++++++++++ userland/cpp/atomic.cpp | 22 +++++++++++----- 5 files changed, 117 insertions(+), 8 deletions(-) create mode 100644 userland/arch/x86_64/cmpxchg.S create mode 100644 userland/arch/x86_64/xadd.S create mode 100644 userland/arch/x86_64/xchg.S diff --git a/README.adoc b/README.adoc index c4c9f16..34d3357 100644 --- a/README.adoc +++ b/README.adoc @@ -11793,9 +11793,20 @@ Programs under link:userland/cpp/[] are examples of link:https://en.wikipedia.or * link:userland/cpp/empty.cpp[] * link:userland/cpp/hello.cpp[] -* `` 32 "Atomic operations library" +* ``: <> 32 "Atomic operations library" ** link:userland/cpp/atomic.cpp[] +==== C++ standards + +Like for C, you have to pay for the standards... insane. So we just use the closest free drafts instead. + +https://stackoverflow.com/questions/81656/where-do-i-find-the-current-c-or-c-standard-documents + +[[cpp17]] +===== C++17 N4659 standards draft + +http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/n4659.pdf + === POSIX Programs under link:userland/posix/[] are examples of POSIX C programming. @@ -12538,6 +12549,33 @@ Bibliography: * link:userland/arch/x86_64/bswap.S[]: BSWAP: convert between little endian and big endian * link:userland/arch/x86_64/pushf.S[] PUSHF: <> the <> to / from the stack +==== x86 exchange instructions + +<> 7.3.1.2 "Exchange Instructions": + +* link:userland/arch/x86_64/xadd.S[] XADD: exchange and add. This is how C++ ``'s' `++` is implemented in GCC 5.1. TODO: why is the exchange part needed? +* link:userland/arch/x86_64/xchg.S[] XCHG: exchange two values + +TODO: concrete multi-thread <> examples of how all those instructions are normally used as synchronization primitives. + +===== x86 CMPXCHG instruction + +link:userland/arch/x86_64/cmpxchg.S[] + +CMPXCHG: compare and exchange. `cmpxchg a, b` does: + +.... +if (RAX == b) { + ZF = 1 + b = a +} else { + ZF = 0 + RAX = b +} +.... + +TODO application: https://stackoverflow.com/questions/6935442/x86-spinlock-using-cmpxchg + ==== x86 PUSH and POP instructions link:userland/arch/x86_64/push.S[] @@ -13086,6 +13124,14 @@ TODO We didn't manage to find a working ARM analogue to < * https://stackoverflow.com/questions/31620375/arm-cortex-a7-returning-pmccntr-0-in-kernel-mode-and-illegal-instruction-in-u/31649809#31649809 * https://blog.regehr.org/archives/794 +=== x86 LOCK prefix + +Ensures that memory modifications are visible across all CPUs, which is fundamental for thread synchronization. + +Inline assembly example at: link:userland/cpp/atomic.cpp[] + +Apparently already automatically implied by some of the <> + === x86 assembly bibliography ==== x86 official bibliography diff --git a/userland/arch/x86_64/cmpxchg.S b/userland/arch/x86_64/cmpxchg.S new file mode 100644 index 0000000..595b928 --- /dev/null +++ b/userland/arch/x86_64/cmpxchg.S @@ -0,0 +1,28 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#cmpxchg-instruction */ + +#include + +LKMC_PROLOGUE + /* rax != r13 */ + mov $0, %rax + mov $1, %r13 + mov $2, %r14 + cmpxchg %r14, %r13 + mov %rax, %r12 + LKMC_ASSERT(jnz) + LKMC_ASSERT_EQ(%rax, $1) + LKMC_ASSERT_EQ(%r13, $1) + LKMC_ASSERT_EQ(%r14, $2) + + /* rax == r13 */ + mov $0, %rax + mov $0, %r13 + mov $2, %r14 + cmpxchg %r14, %r13 + mov %rax, %r12 + LKMC_ASSERT(jz) + LKMC_ASSERT_EQ(%rax, $0) + LKMC_ASSERT_EQ(%r13, $2) + LKMC_ASSERT_EQ(%r14, $2) + +LKMC_EPILOGUE diff --git a/userland/arch/x86_64/xadd.S b/userland/arch/x86_64/xadd.S new file mode 100644 index 0000000..1227eac --- /dev/null +++ b/userland/arch/x86_64/xadd.S @@ -0,0 +1,11 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-exchange-instructions */ + +#include + +LKMC_PROLOGUE + mov $1, %rax + mov $2, %rbx + xadd %rbx, %rax + LKMC_ASSERT_EQ(%rax, $3) + LKMC_ASSERT_EQ(%rbx, $1) +LKMC_EPILOGUE diff --git a/userland/arch/x86_64/xchg.S b/userland/arch/x86_64/xchg.S new file mode 100644 index 0000000..a1b753f --- /dev/null +++ b/userland/arch/x86_64/xchg.S @@ -0,0 +1,16 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-exchange-instructions */ + +#include + +LKMC_PROLOGUE + mov $0, %rax + mov $1, %rbx + + xchg %rbx, %rax + LKMC_ASSERT_EQ(%rax, $1) + LKMC_ASSERT_EQ(%rbx, $0) + + xchg %rbx, %rax + LKMC_ASSERT_EQ(%rax, $0) + LKMC_ASSERT_EQ(%rbx, $1) +LKMC_EPILOGUE diff --git a/userland/cpp/atomic.cpp b/userland/cpp/atomic.cpp index 34e9aab..cb5635b 100644 --- a/userland/cpp/atomic.cpp +++ b/userland/cpp/atomic.cpp @@ -1,19 +1,27 @@ -// https://github.com/cirosantilli/linux-kernel-module-cheat#atomic +// https://github.com/cirosantilli/linux-kernel-module-cheat#cpp +// https://github.com/cirosantilli/linux-kernel-module-cheat#x86-lock-prefix // -// More restricted than mutex as it can only protect a few operations on integers. +// The non-atomic counters have undefined values which get printed: +// they are extremely likely to be less than the correct value due to +// race conditions on the data read and update of the ++. // -// But if that is the use case, may be more efficient. +// The atomic counters have defined values, and are asserted // -// On GCC 4.8 x86-64, using atomic is a huge peformance improvement -// over the same program with mutexes (5x). +// Atomic operations are more restricted than mutex as they can +// only protect a few operations on integers. +// +// But when they can be used, they can be much more efficient than mutees. +// +// On GCC 4.8 x86-64, using atomic offered a 5x peformance improvement +// over the same program with mutexes. + +#if __cplusplus >= 201103L #include #include #include #include #include - -#if __cplusplus >= 201103L std::atomic_ulong my_atomic_ulong(0); unsigned long my_non_atomic_ulong = 0; #if defined(__x86_64__)