mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-23 02:05:57 +01:00
Rationalize -mcpu for emulators, compilers and assemblers on ARM
Move SVE example in from arm-assembly-cheat. atomic.cpp aarch64 add LSE ldadd placeholder, not compiling yet
This commit is contained in:
49
userland/arch/aarch64/sve.S
Normal file
49
userland/arch/aarch64/sve.S
Normal file
@@ -0,0 +1,49 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#arm-sve */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
x: .double 1.5, 2.5, 3.5, 4.5
|
||||
y: .double 5.0, 6.0, 7.0, 8.0
|
||||
y_expect: .double 8.0, 11.0, 14.0, 17.0
|
||||
a: .double 2.0
|
||||
n: .word 4
|
||||
|
||||
LKMC_PROLOGUE
|
||||
adr x0, x
|
||||
adr x1, y
|
||||
adr x2, a
|
||||
adr x3, n
|
||||
bl daxpy
|
||||
LKMC_ASSERT_MEMCMP(y, y_expect, =0x20)
|
||||
LKMC_EPILOGUE
|
||||
|
||||
/* Multiply by a scalar and add.
|
||||
*
|
||||
* Operation:
|
||||
*
|
||||
* Y += a * X
|
||||
*
|
||||
* C signature:
|
||||
*
|
||||
* void daxpy(double *x, double *y, double *a, int *n)
|
||||
*
|
||||
* The name "daxpy" comes from LAPACK:
|
||||
* http://www.netlib.org/lapack/explore-html/de/da4/group__double__blas__level1_ga8f99d6a644d3396aa32db472e0cfc91c.html
|
||||
*
|
||||
* Adapted from: https://alastairreid.github.io/papers/sve-ieee-micro-2017.pdf
|
||||
*/
|
||||
daxpy:
|
||||
ldrsw x3, [x3]
|
||||
mov x4, 0
|
||||
whilelt p0.d, x4, x3
|
||||
ld1rd z0.d, p0/z, [x2]
|
||||
.loop:
|
||||
ld1d z1.d, p0/z, [x0, x4, lsl 3]
|
||||
ld1d z2.d, p0/z, [x1, x4, lsl 3]
|
||||
fmla z2.d, p0/m, z1.d, z0.d
|
||||
st1d z2.d, p0, [x1, x4, lsl 3]
|
||||
incd x4
|
||||
whilelt p0.d, x4, x3
|
||||
b.first .loop
|
||||
ret
|
||||
@@ -1,4 +1,4 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#cmpxchg-instruction */
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-cmpxchg-instruction */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
@@ -24,5 +24,4 @@ LKMC_PROLOGUE
|
||||
LKMC_ASSERT_EQ(%rax, $0)
|
||||
LKMC_ASSERT_EQ(%r13, $2)
|
||||
LKMC_ASSERT_EQ(%r14, $2)
|
||||
|
||||
LKMC_EPILOGUE
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
// https://github.com/cirosantilli/linux-kernel-module-cheat#cpp
|
||||
// https://github.com/cirosantilli/linux-kernel-module-cheat#x86-lock-prefix
|
||||
//
|
||||
// The non-atomic counters have undefined values which get printed:
|
||||
// they are extremely likely to be less than the correct value due to
|
||||
@@ -15,7 +14,6 @@
|
||||
// On GCC 4.8 x86-64, using atomic offered a 5x peformance improvement
|
||||
// over the same program with mutexes.
|
||||
|
||||
|
||||
#if __cplusplus >= 201103L
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
@@ -24,7 +22,7 @@
|
||||
#include <vector>
|
||||
std::atomic_ulong my_atomic_ulong(0);
|
||||
unsigned long my_non_atomic_ulong = 0;
|
||||
#if defined(__x86_64__)
|
||||
#if defined(__x86_64__) || defined(__aarch64__)
|
||||
unsigned long my_arch_atomic_ulong = 0;
|
||||
unsigned long my_arch_non_atomic_ulong = 0;
|
||||
#endif
|
||||
@@ -41,6 +39,7 @@ void threadMain() {
|
||||
:
|
||||
:
|
||||
);
|
||||
// https://github.com/cirosantilli/linux-kernel-module-cheat#x86-lock-prefix
|
||||
__asm__ __volatile__ (
|
||||
"lock;"
|
||||
"incq %0;"
|
||||
@@ -48,6 +47,21 @@ void threadMain() {
|
||||
:
|
||||
:
|
||||
);
|
||||
#elif defined(__aarch64__)
|
||||
__asm__ __volatile__ (
|
||||
"add %0, %0, 1;"
|
||||
: "+r" (my_arch_non_atomic_ulong)
|
||||
:
|
||||
:
|
||||
);
|
||||
// https://github.com/cirosantilli/linux-kernel-module-cheat#arm-lse
|
||||
__asm__ __volatile__ (
|
||||
"ldadd %[inc], xzr, [%[addr]];"
|
||||
: "=m" (my_arch_atomic_ulong)
|
||||
: [inc] "r" (1),
|
||||
[addr] "r" (&my_arch_atomic_ulong)
|
||||
:
|
||||
);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -75,7 +89,7 @@ int main(int argc, char **argv) {
|
||||
// We can also use the atomics direclty through `operator T` conversion.
|
||||
assert(my_atomic_ulong == my_atomic_ulong.load());
|
||||
std::cout << "my_non_atomic_ulong " << my_non_atomic_ulong << std::endl;
|
||||
#if defined(__x86_64__)
|
||||
#if defined(__x86_64__) || defined(__aarch64__)
|
||||
assert(my_arch_atomic_ulong == nthreads * niters);
|
||||
std::cout << "my_arch_non_atomic_ulong " << my_arch_non_atomic_ulong << std::endl;
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user