x86 asm: move exchange instructions from x86-assembly-cheat

This commit is contained in:
Ciro Santilli 六四事件 法轮功
2019-06-26 00:00:00 +00:00
parent 88a1c914c9
commit ce3d546ac8
5 changed files with 117 additions and 8 deletions

View File

@@ -11793,9 +11793,20 @@ Programs under link:userland/cpp/[] are examples of link:https://en.wikipedia.or
* link:userland/cpp/empty.cpp[]
* link:userland/cpp/hello.cpp[]
* `<atomic>` 32 "Atomic operations library"
* `<atomic>`: <<cpp17>> 32 "Atomic operations library"
** link:userland/cpp/atomic.cpp[]
==== C++ standards
Like for C, you have to pay for the standards... insane. So we just use the closest free drafts instead.
https://stackoverflow.com/questions/81656/where-do-i-find-the-current-c-or-c-standard-documents
[[cpp17]]
===== C++17 N4659 standards draft
http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/n4659.pdf
=== POSIX
Programs under link:userland/posix/[] are examples of POSIX C programming.
@@ -12538,6 +12549,33 @@ Bibliography:
* link:userland/arch/x86_64/bswap.S[]: BSWAP: convert between little endian and big endian
* link:userland/arch/x86_64/pushf.S[] PUSHF: <<x86-push-and-pop-instructions,push and pop>> the <<x86-flags-registers>> to / from the stack
==== x86 exchange instructions
<<intel-manual-1>> 7.3.1.2 "Exchange Instructions":
* link:userland/arch/x86_64/xadd.S[] XADD: exchange and add. This is how C++ `<atomic>`'s' `++` is implemented in GCC 5.1. TODO: why is the exchange part needed?
* link:userland/arch/x86_64/xchg.S[] XCHG: exchange two values
TODO: concrete multi-thread <<gcc-inline-assembly>> examples of how all those instructions are normally used as synchronization primitives.
===== x86 CMPXCHG instruction
link:userland/arch/x86_64/cmpxchg.S[]
CMPXCHG: compare and exchange. `cmpxchg a, b` does:
....
if (RAX == b) {
ZF = 1
b = a
} else {
ZF = 0
RAX = b
}
....
TODO application: https://stackoverflow.com/questions/6935442/x86-spinlock-using-cmpxchg
==== x86 PUSH and POP instructions
link:userland/arch/x86_64/push.S[]
@@ -13086,6 +13124,14 @@ TODO We didn't manage to find a working ARM analogue to <<x86-rdtsc-instruction>
* https://stackoverflow.com/questions/31620375/arm-cortex-a7-returning-pmccntr-0-in-kernel-mode-and-illegal-instruction-in-u/31649809#31649809
* https://blog.regehr.org/archives/794
=== x86 LOCK prefix
Ensures that memory modifications are visible across all CPUs, which is fundamental for thread synchronization.
Inline assembly example at: link:userland/cpp/atomic.cpp[]
Apparently already automatically implied by some of the <<x86-exchange-instructions>>
=== x86 assembly bibliography
==== x86 official bibliography

View File

@@ -0,0 +1,28 @@
/* https://github.com/cirosantilli/linux-kernel-module-cheat#cmpxchg-instruction */
#include <lkmc.h>
LKMC_PROLOGUE
/* rax != r13 */
mov $0, %rax
mov $1, %r13
mov $2, %r14
cmpxchg %r14, %r13
mov %rax, %r12
LKMC_ASSERT(jnz)
LKMC_ASSERT_EQ(%rax, $1)
LKMC_ASSERT_EQ(%r13, $1)
LKMC_ASSERT_EQ(%r14, $2)
/* rax == r13 */
mov $0, %rax
mov $0, %r13
mov $2, %r14
cmpxchg %r14, %r13
mov %rax, %r12
LKMC_ASSERT(jz)
LKMC_ASSERT_EQ(%rax, $0)
LKMC_ASSERT_EQ(%r13, $2)
LKMC_ASSERT_EQ(%r14, $2)
LKMC_EPILOGUE

View File

@@ -0,0 +1,11 @@
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-exchange-instructions */
#include <lkmc.h>
LKMC_PROLOGUE
mov $1, %rax
mov $2, %rbx
xadd %rbx, %rax
LKMC_ASSERT_EQ(%rax, $3)
LKMC_ASSERT_EQ(%rbx, $1)
LKMC_EPILOGUE

View File

@@ -0,0 +1,16 @@
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-exchange-instructions */
#include <lkmc.h>
LKMC_PROLOGUE
mov $0, %rax
mov $1, %rbx
xchg %rbx, %rax
LKMC_ASSERT_EQ(%rax, $1)
LKMC_ASSERT_EQ(%rbx, $0)
xchg %rbx, %rax
LKMC_ASSERT_EQ(%rax, $0)
LKMC_ASSERT_EQ(%rbx, $1)
LKMC_EPILOGUE

View File

@@ -1,19 +1,27 @@
// https://github.com/cirosantilli/linux-kernel-module-cheat#atomic
// https://github.com/cirosantilli/linux-kernel-module-cheat#cpp
// https://github.com/cirosantilli/linux-kernel-module-cheat#x86-lock-prefix
//
// More restricted than mutex as it can only protect a few operations on integers.
// The non-atomic counters have undefined values which get printed:
// they are extremely likely to be less than the correct value due to
// race conditions on the data read and update of the ++.
//
// But if that is the use case, may be more efficient.
// The atomic counters have defined values, and are asserted
//
// On GCC 4.8 x86-64, using atomic is a huge peformance improvement
// over the same program with mutexes (5x).
// Atomic operations are more restricted than mutex as they can
// only protect a few operations on integers.
//
// But when they can be used, they can be much more efficient than mutees.
//
// On GCC 4.8 x86-64, using atomic offered a 5x peformance improvement
// over the same program with mutexes.
#if __cplusplus >= 201103L
#include <atomic>
#include <cassert>
#include <iostream>
#include <thread>
#include <vector>
#if __cplusplus >= 201103L
std::atomic_ulong my_atomic_ulong(0);
unsigned long my_non_atomic_ulong = 0;
#if defined(__x86_64__)