diff --git a/userland/cpp/atomic.hpp b/userland/cpp/atomic.hpp new file mode 100644 index 0000000..26606e5 --- /dev/null +++ b/userland/cpp/atomic.hpp @@ -0,0 +1,101 @@ +// https://cirosantilli.com/linux-kernel-module-cheat#cpp-multithreading +// +// The non-atomic counters have undefined values which get printed: +// they are extremely likely to be less than the correct value due to +// race conditions on the data read and update of the ++. +// +// The atomic counters have defined values, and are asserted +// +// Atomic operations are more restricted than mutex as they can +// only protect a few operations on integers. +// +// But when they can be used, they can be much more efficient than mutees. +// +// On GCC 4.8 x86-64, using atomic offered a 5x peformance improvement +// over the same program with mutexes. + +#if __cplusplus >= 201103L +#include +#include +#include +#include +#include + +std::atomic_ulong my_atomic_ulong(0); +unsigned long my_non_atomic_ulong = 0; +#if defined(__x86_64__) || defined(__aarch64__) +unsigned long my_arch_atomic_ulong = 0; +unsigned long my_arch_non_atomic_ulong = 0; +#endif +size_t niters; + +void threadMain() { + for (size_t i = 0; i < niters; ++i) { +#if defined(LKMC_USERLAND_CPP_ATOMIC_FAIL) + my_atomic_ulong++; +#elif defined(LKMC_USERLAND_CPP_ATOMIC_STD_ATOMIC) + my_non_atomic_ulong++; +#endif +#if defined(__x86_64__) + __asm__ __volatile__ ( + "incq %0;" + : "+m" (my_arch_non_atomic_ulong) + : + : + ); + // https://cirosantilli.com/linux-kernel-module-cheat#x86-lock-prefix + __asm__ __volatile__ ( + "lock;" + "incq %0;" + : "+m" (my_arch_atomic_ulong) + : + : + ); +#elif defined(__aarch64__) + __asm__ __volatile__ ( + "add %0, %0, 1;" + : "+r" (my_arch_non_atomic_ulong) + : + : + ); + // https://cirosantilli.com/linux-kernel-module-cheat#arm-lse + __asm__ __volatile__ ( + "ldadd %[inc], xzr, [%[addr]];" + : "=m" (my_arch_atomic_ulong) + : [inc] "r" (1), + [addr] "r" (&my_arch_atomic_ulong) + : + ); +#endif + } +} +#endif + +int main(int argc, char **argv) { +#if __cplusplus >= 201103L + size_t nthreads; + if (argc > 1) { + nthreads = std::stoull(argv[1], NULL, 0); + } else { + nthreads = 2; + } + if (argc > 2) { + niters = std::stoull(argv[2], NULL, 0); + } else { + niters = 10; + } + std::vector threads(nthreads); + for (size_t i = 0; i < nthreads; ++i) + threads[i] = std::thread(threadMain); + for (size_t i = 0; i < nthreads; ++i) + threads[i].join(); + assert(my_atomic_ulong.load() == nthreads * niters); + // We can also use the atomics direclty through `operator T` conversion. + assert(my_atomic_ulong == my_atomic_ulong.load()); + std::cout << "my_non_atomic_ulong " << my_non_atomic_ulong << std::endl; +#if defined(__x86_64__) || defined(__aarch64__) + assert(my_arch_atomic_ulong == nthreads * niters); + std::cout << "my_arch_non_atomic_ulong " << my_arch_non_atomic_ulong << std::endl; +#endif +#endif +} diff --git a/userland/cpp/atomic/build b/userland/cpp/atomic/build new file mode 120000 index 0000000..ab18017 --- /dev/null +++ b/userland/cpp/atomic/build @@ -0,0 +1 @@ +../build \ No newline at end of file diff --git a/userland/cpp/atomic/fail.cpp b/userland/cpp/atomic/fail.cpp new file mode 100644 index 0000000..1d9d023 --- /dev/null +++ b/userland/cpp/atomic/fail.cpp @@ -0,0 +1,2 @@ +#define LKMC_USERLAND_ATOMIC_FAIL +#include "main.hpp" diff --git a/userland/cpp/atomic/main.hpp b/userland/cpp/atomic/main.hpp new file mode 100644 index 0000000..b03cc0b --- /dev/null +++ b/userland/cpp/atomic/main.hpp @@ -0,0 +1,128 @@ +// https://cirosantilli.com/linux-kernel-module-cheat#cpp-multithreading +// +// The non-atomic counters have undefined values which get printed: +// they are extremely likely to be less than the correct value due to +// race conditions on the data read and update of the ++. +// +// The atomic counters have defined values, and are asserted +// +// Atomic operations are more restricted than mutex as they can +// only protect a few operations on integers. +// +// But when they can be used, they can be much more efficient than mutees. +// +// On GCC 4.8 x86-64, using atomic offered a 5x peformance improvement +// over the same program with mutexes. + +#if __cplusplus >= 201103L +#include +#include +#include +#include +#include +#include + +size_t niters; + +#if defined(LKMC_USERLAND_ATOMIC_STD_ATOMIC) +std::atomic_ulong global(0); +#else +unsigned long global = 0; +#endif + +#if defined(LKMC_USERLAND_ATOMIC_MUTEX) +std::mutex mutex; +#endif + +#if defined(__x86_64__) || defined(__aarch64__) +unsigned long my_arch_atomic_ulong = 0; +unsigned long my_arch_non_atomic_ulong = 0; +#endif + +void threadMain() { + for (size_t i = 0; i < niters; ++i) { +#if defined(LKMC_USERLAND_ATOMIC_MUTEX) + mutex.lock(); +#endif +#if defined(LKMC_USERLAND_ATOMIC_FAIL) || \ + defined(LKMC_USERLAND_ATOMIC_STD_ATOMIC) || \ + defined(LKMC_USERLAND_ATOMIC_MUTEX) + global++; +#endif +#if defined(LKMC_USERLAND_ATOMIC_MUTEX) + mutex.unlock(); +#endif +#if 0 +#if defined(__x86_64__) + __asm__ __volatile__ ( + "incq %0;" + : "+m" (my_arch_non_atomic_ulong) + : + : + ); + // https://cirosantilli.com/linux-kernel-module-cheat#x86-lock-prefix + __asm__ __volatile__ ( + "lock;" + "incq %0;" + : "+m" (my_arch_atomic_ulong) + : + : + ); +#elif defined(__aarch64__) + __asm__ __volatile__ ( + "add %0, %0, 1;" + : "+r" (my_arch_non_atomic_ulong) + : + : + ); + // https://cirosantilli.com/linux-kernel-module-cheat#arm-lse + __asm__ __volatile__ ( + "ldadd %[inc], xzr, [%[addr]];" + : "=m" (my_arch_atomic_ulong) + : [inc] "r" (1), + [addr] "r" (&my_arch_atomic_ulong) + : + ); +#endif +#endif + } +} +#endif + +int main(int argc, char **argv) { +#if __cplusplus >= 201103L + size_t nthreads; + if (argc > 1) { + nthreads = std::stoull(argv[1], NULL, 0); + } else { + nthreads = 2; + } + if (argc > 2) { + niters = std::stoull(argv[2], NULL, 0); + } else { + niters = 10; + } + std::vector threads(nthreads); + for (size_t i = 0; i < nthreads; ++i) + threads[i] = std::thread(threadMain); + for (size_t i = 0; i < nthreads; ++i) + threads[i].join(); + unsigned long expect = nthreads * niters; +#if defined(LKMC_USERLAND_ATOMIC_FAIL) + std::cout << "expect " << expect << std::endl; + std::cout << "global " << global << std::endl; +#elif defined(LKMC_USERLAND_ATOMIC_STD_ATOMIC) + //assert(global.load() == expect); + assert(global == expect); +#else + // We can also use the atomics direclty through `operator T` conversion. + assert(global == expect); +#endif +#if 0 +#if defined(__x86_64__) || defined(__aarch64__) + assert(my_arch_atomic_ulong == nthreads * niters); + std::cout << "my_arch_non_atomic_ulong " << my_arch_non_atomic_ulong << std::endl; +#endif +#endif +#endif +} diff --git a/userland/cpp/atomic/mutex.cpp b/userland/cpp/atomic/mutex.cpp new file mode 100644 index 0000000..27c2946 --- /dev/null +++ b/userland/cpp/atomic/mutex.cpp @@ -0,0 +1,2 @@ +#define LKMC_USERLAND_ATOMIC_MUTEX +#include "main.hpp" diff --git a/userland/cpp/atomic/std_atomic.cpp b/userland/cpp/atomic/std_atomic.cpp new file mode 100644 index 0000000..0569abc --- /dev/null +++ b/userland/cpp/atomic/std_atomic.cpp @@ -0,0 +1,2 @@ +#define LKMC_USERLAND_ATOMIC_STD_ATOMIC +#include "main.hpp" diff --git a/userland/cpp/atomic/test b/userland/cpp/atomic/test new file mode 120000 index 0000000..419df4f --- /dev/null +++ b/userland/cpp/atomic/test @@ -0,0 +1 @@ +../test \ No newline at end of file