diff --git a/README.adoc b/README.adoc index 73886da..0e8417e 100644 --- a/README.adoc +++ b/README.adoc @@ -373,7 +373,7 @@ Have you ever felt that a single `inc` instruction was not enough? Really? Me to So let's hack the <>, which is part of link:https://en.wikipedia.org/wiki/GNU_Binutils[GNU Binutils], to add a new shiny version of `inc` called... `myinc`! -GCC uses GNU GAS as its backend, so we will test out new mnemonic with an <> test program: link:userland/arch/x86_64/c/binutils_hack.c[], which is just a copy of link:userland/arch/x86_64/c/binutils_nohack.c[] but with `myinc` instead of `inc`. +GCC uses GNU GAS as its backend, so we will test out new mnemonic with an <> test program: link:userland/arch/x86_64/binutils_hack.c[], which is just a copy of link:userland/arch/x86_64/binutils_nohack.c[] but with `myinc` instead of `inc`. The inline assembly is disabled with an `#ifdef`, so first modify the source to enable that. @@ -2742,7 +2742,7 @@ Sources: * link:kernel_modules/ring0.c[] * link:lkmc/ring0.h[] -* link:userland/arch/x86_64/c/ring0.c[] +* link:userland/arch/x86_64/ring0.c[] In both cases, we attempt to run the exact same code which is shared on the `ring0.h` header file. @@ -8611,9 +8611,24 @@ If we pass to QEMU the xen image directly instead of the boot wrapper one: -kernel ../xen/xen/xen .... -then Xen messages do show up, so it seems that the configuration failure lies in the boot wrapper itself rather than Xen. +then Xen messages do show up! So it seems that the configuration failure lies in the boot wrapper itself rather than Xen. -Bibliography: this attempt was based on: https://wiki.xenproject.org/wiki/Xen_ARM_with_Virtualization_Extensions/FastModels which is the documentation for the ARM Fast Models closed source simulators. +Maybe it is also possible to run Xen directly like this: QEMU can already load multiple images at different memory locations with the generic loader: https://github.com/qemu/qemu/blob/master/docs/generic-loader.txt which looks something along: + +.... +-kernel file1.elf -device loader,file=file2.elf +.... + +so as long as we craft the correct DTB and feed it into Xen so that it can see the kernel, it should work. TODO does QEMU support patching the auto-generated DTB with pre-generated options? In the worst case we can just dump it hand hack it up though with `-machine dumpdtb`: <>. + +Bibliography: + +* this attempt was based on: https://wiki.xenproject.org/wiki/Xen_ARM_with_Virtualization_Extensions/FastModels which is the documentation for the ARM Fast Models closed source simulators. +* https://wiki.xenproject.org/wiki/Xen_ARM_with_Virtualization_Extensions/qemu-system-aarch64 this is the only QEMU aarch64 Xen page on the web. It uses the Ubuntu aarc64 image, which has EDK2. ++ +I however see no joy on blobs. Buildroot does not seem to support EDK 2. + +Link on readme https://stackoverflow.com/questions/49348453/xen-on-qemu-with-arm64-architecture == QEMU @@ -11878,25 +11893,89 @@ You are now left on the very first instruction of our tiny executable! Examples under `arch//c/` directories show to how use inline assembly from higher level languages such as C: * x86_64 -** link:userland/arch/x86_64/c/inc.c[] -** link:userland/arch/x86_64/c/add.c[] +** link:userland/arch/x86_64/inline_asm/inc.c[] +** link:userland/arch/x86_64/inline_asm/add.c[] * arm -** link:userland/arch/arm/c/inc.c[] -** link:userland/arch/arm/c/inc_memory.c[] -** link:userland/arch/arm/c/inc_memory_global.c[] -** link:userland/arch/arm/c/add.c[] +** link:userland/arch/arm/inline_asm/inc.c[] +** link:userland/arch/arm/inline_asm/inc_memory.c[] +** link:userland/arch/arm/inline_asm/inc_memory_global.c[] +** link:userland/arch/arm/inline_asm/add.c[] * aarch64 -** link:userland/arch/aarch64/c/earlyclobber.c[] -** link:userland/arch/aarch64/c/inc.c[] -** link:userland/arch/aarch64/c/multiline.cpp[] +** link:userland/arch/aarch64/inline_asm/earlyclobber.c[] +** link:userland/arch/aarch64/inline_asm/inc.c[] +** link:userland/arch/aarch64/inline_asm/multiline.cpp[] + +==== GCC intrinsics + +Pre-existing C wrappers using inline assembly, this is what production programs should use instead of inline assembly for SIMD: + +* x86_64 +** link:userland/arch/x86_64/intrinsics/paddq.c[]. Intrinsics version of link:userland/arch/x86_64/paddq.S[] +** link:userland/arch/x86_64/intrinsics/addpd.c[]. Intrinsics version of link:userland/arch/x86_64/addpd.S[] + +===== GCC x86 intrinsics + +Good official cheatsheet with all intrinsics and what they expand to: https://software.intel.com/sites/landingpage/IntrinsicsGuide + +The functions use the the following naming convention: + +.... +__ +.... + +where: + +* ``: +** `mm`: 128-bit vectors (SSE) +** `mm256`: 256-bit vectors (AVX and AVX2) +** `mm512`: 512-bit vectors (AVX512) +* ``: operation of the intrinsic function, e.g. add, sub, mul, etc. +* ``: data type: +** `ps`: 4 floats (Packed Single) +** `pd`: 2 doubles (Packed Double) +** `ss`: 1 float (Single Single) +** `sd`: 1 double (Single Double) +** `ep` integer types, e.g.: +*** `epi32`: 32 bit signed integers +*** `epu16`: 16 bit unsigned integers + +Data types: + +* `__m128`: four floats +* `__m128d`: two doubles +* `__m128i`: integers: 8 x 16-bit, 4 x 32-bit, 2 x 64-bit + +The headers to include are clarified at: https://stackoverflow.com/questions/11228855/header-files-for-x86-simd-intrinsics + +.... +x86intrin.h everything +mmintrin.h MMX +xmmintrin.h SSE +emmintrin.h SSE2 +pmmintrin.h SSE3 +tmmintrin.h SSSE3 +smmintrin.h SSE4.1 +nmmintrin.h SSE4.2 +ammintrin.h SSE4A +wmmintrin.h AES +immintrin.h AVX +zmmintrin.h AVX512 +.... + +Present in `gcc-7_3_0-release` tree at: `gcc/config/i386/x86intrin.h`. + +Bibliography: + +* https://www.cs.virginia.edu/~cr4bd/3330/S2018/simdref.html +* https://www.cs.virginia.edu/~cr4bd/3330/S2018/simdref.html ==== GCC inline assembly register variables Used notably in some of the <> setups: -* link:userland/arch/arm/c/reg_var.c[] -* link:userland/arch/aarch64/c/reg_var.c[] -* link:userland/arch/aarch64/c/reg_var_float.c[] +* link:userland/arch/arm/inline_asm/reg_var.c[] +* link:userland/arch/aarch64/inline_asm/reg_var.c[] +* link:userland/arch/aarch64/inline_asm/reg_var_float.c[] In x86, makes it possible to access variables not exposed with the one letter register constraints. @@ -11911,8 +11990,8 @@ Documentation: https://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Explicit-Reg-Vars.ht How to use temporary registers in inline assembly: * x86_64 -** link:userland/arch/x86_64/c/scratch.c[] -** link:userland/arch/x86_64/c/scratch_hardcode.c[] +** link:userland/arch/x86_64/inline_asm/scratch.c[] +** link:userland/arch/x86_64/inline_asm/scratch_hardcode.c[] Bibliography: https://stackoverflow.com/questions/6682733/gcc-prohibit-use-of-some-registers/54963829#54963829 @@ -11928,8 +12007,8 @@ The assertion may fail without it. It actually does fail in GCC 8.2.0. Not documented as of GCC 8.2, but possible: https://stackoverflow.com/questions/53960240/armv8-floating-point-output-inline-assembly -* link:userland/arch/arm/c/inc_float.c[] -* link:userland/arch/aarch64/c/inc_float.c[] +* link:userland/arch/arm/inline_asm/inc_float.c[] +* link:userland/arch/aarch64/inline_asm/inc_float.c[] === Linux system calls @@ -11937,15 +12016,15 @@ The following <> programs illustrate how to make system calls: * x86_64 ** link:userland/arch/x86_64/freestanding/linux/hello.S[] -** link:userland/arch/x86_64/c/freestanding/linux/hello.c[] -** link:userland/arch/x86_64/c/freestanding/linux/hello_regvar.c[] +** link:userland/arch/x86_64/inline_asm/freestanding/linux/hello.c[] +** link:userland/arch/x86_64/inline_asm/freestanding/linux/hello_regvar.c[] * arm ** link:userland/arch/arm/freestanding/linux/hello.S[] -** link:userland/arch/arm/c/freestanding/linux/hello.c[] +** link:userland/arch/arm/inline_asm/freestanding/linux/hello.c[] * aarch64 ** link:userland/arch/aarch64/freestanding/linux/hello.S[] -** link:userland/arch/aarch64/c/freestanding/linux/hello.c[] -** link:userland/arch/aarch64/c/freestanding/linux/hello_clobbers.c[] +** link:userland/arch/aarch64/inline_asm/freestanding/linux/hello.c[] +** link:userland/arch/aarch64/inline_asm/freestanding/linux/hello_clobbers.c[] Determining the ARM syscall numbers: @@ -12010,7 +12089,7 @@ Call C standard library functions from assembly and vice versa. ** link:userland/arch/arm/linux/c_from_asm.S[] * aarch64 ** link:lkmc/aarch64.h[] `ENTRY` and `EXIT` -** link:userland/arch/aarch64/c/linux/asm_from_c.c[] +** link:userland/arch/aarch64/inline_asm/linux/asm_from_c.c[] ARM Architecture Procedure Call Standard (AAPCS) is the name that ARM Holdings gives to the calling convention. @@ -12153,12 +12232,12 @@ Once those are done, everything else left on userland is just to learn a huge li History: -* link:https://en.wikipedia.org/wiki/MMX_(instruction_set)[MMX]: 1997 -* link:https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions[SSE]: Streaming SIMD Extensions. 1999. 128-bit XMM registers. +* link:https://en.wikipedia.org/wiki/MMX_(instruction_set)[MMX]: MultiMedia eXtension (unofficial name). 1997. MM0-MM7 64-bit registers. +* link:https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions[SSE]: Streaming SIMD Extensions. 1999. XMM0-XMM7 128-bit registers, XMM0-XMM15 for AMD in 64-bit mode. * link:https://en.wikipedia.org/wiki/SSE2[SSE2]: 2004 * link:https://en.wikipedia.org/wiki/SSE3[SSE3]: 2006 * link:https://en.wikipedia.org/wiki/SSE4[SSE4]: 2006 -* link:https://en.wikipedia.org/wiki/Advanced_Vector_Extensions[AVX]: Advanced Vector Extensions. 2011. 256-bit YMM registers. Extension of XMM. +* link:https://en.wikipedia.org/wiki/Advanced_Vector_Extensions[AVX]: Advanced Vector Extensions. 2011. YMM0–YMM15 256-bit registers in 64-bit mode. Extension of XMM. * AVX2:2013 * AVX-512: 2016. 512-bit ZMM registers. Extension of YMM. @@ -12183,12 +12262,12 @@ TODO: review this section, make a more controlled userland experiment with <> `system.cpu.numCycles` cycle count with the link:https://en.wikipedia.org/wiki/Time_Stamp_Counter[x86 `rdtsc` instruction] that is supposed to do the same thing: .... -./build-userland --static userland/arch/x86_64/c/rdtsc.c +./build-userland --static userland/arch/x86_64/inline_asm/rdtsc.c ./run --eval './arch/x86_64/c/rdtsc.out;m5 exit;' --emulator gem5 ./gem5-stat .... -Source: link:userland/arch/x86_64/c/rdtsc.c[] +Source: link:userland/arch/x86_64/rdtsc.c[] `rdtsc` outputs a cycle count which we compare with gem5's `gem5-stat`: @@ -13307,7 +13386,7 @@ To test it out, I first hack link:common.py[] to enable `C++`: consts['baremetal_build_in_exts'] = consts['build_in_exts'] .... -and then I hack link:userland/arch/aarch64/c/multiline.cpp[] to consist only of an empty main: +and then I hack link:userland/arch/aarch64/inline_asm/multiline.cpp[] to consist only of an empty main: .... int main() {} @@ -13317,7 +13396,7 @@ then for example: .... ./build-baremetal --arch aarch64 -./run --arch aarch64 --baremetal userland/arch/aarch64/c/multiline.cpp +./run --arch aarch64 --baremetal userland/arch/aarch64/inline_asm/multiline.cpp .... fails with: @@ -13330,7 +13409,7 @@ qemu-system-aarch64: rom check and register reset failed and the gem5 build fails completely: .... -./build-baremetal --arch aarch64 --emulator gem5 userland/arch/aarch64/c/multiline.cpp +./build-baremetal --arch aarch64 --emulator gem5 userland/arch/aarch64/inline_asm/multiline.cpp .... fails with: diff --git a/path_properties.py b/path_properties.py index 7e2e00c..4dad4a0 100644 --- a/path_properties.py +++ b/path_properties.py @@ -348,7 +348,7 @@ path_properties_tuples = ( ] }, { - 'c': ( + 'inline_asm': ( { }, { @@ -371,7 +371,7 @@ path_properties_tuples = ( 'aarch64': ( {'allowed_archs': {'aarch64'}}, { - 'c': ( + 'inline_asm': ( { }, { @@ -393,19 +393,19 @@ path_properties_tuples = ( 'x86_64': ( {'allowed_archs': {'x86_64'}}, { - 'c': ( + 'inline_asm': ( { }, { 'freestanding': freestanding_properties, - 'ring0.c': { - 'signal_received': signal.Signals.SIGSEGV - } } ), 'freestanding': freestanding_properties, 'lkmc_assert_eq_fail.S': {'signal_received': signal.Signals.SIGABRT}, 'lkmc_assert_memcmp_fail.S': {'signal_received': signal.Signals.SIGABRT}, + 'ring0.c': { + 'signal_received': signal.Signals.SIGSEGV, + } } ), } diff --git a/userland/arch/aarch64/c/build b/userland/arch/aarch64/inline_asm/build similarity index 100% rename from userland/arch/aarch64/c/build rename to userland/arch/aarch64/inline_asm/build diff --git a/userland/arch/aarch64/c/earlyclobber.c b/userland/arch/aarch64/inline_asm/earlyclobber.c similarity index 100% rename from userland/arch/aarch64/c/earlyclobber.c rename to userland/arch/aarch64/inline_asm/earlyclobber.c diff --git a/userland/arch/aarch64/c/freestanding/build b/userland/arch/aarch64/inline_asm/freestanding/build similarity index 100% rename from userland/arch/aarch64/c/freestanding/build rename to userland/arch/aarch64/inline_asm/freestanding/build diff --git a/userland/arch/aarch64/c/freestanding/linux/build b/userland/arch/aarch64/inline_asm/freestanding/linux/build similarity index 100% rename from userland/arch/aarch64/c/freestanding/linux/build rename to userland/arch/aarch64/inline_asm/freestanding/linux/build diff --git a/userland/arch/aarch64/c/freestanding/linux/hello.c b/userland/arch/aarch64/inline_asm/freestanding/linux/hello.c similarity index 100% rename from userland/arch/aarch64/c/freestanding/linux/hello.c rename to userland/arch/aarch64/inline_asm/freestanding/linux/hello.c diff --git a/userland/arch/aarch64/c/freestanding/linux/hello_clobbers.c b/userland/arch/aarch64/inline_asm/freestanding/linux/hello_clobbers.c similarity index 100% rename from userland/arch/aarch64/c/freestanding/linux/hello_clobbers.c rename to userland/arch/aarch64/inline_asm/freestanding/linux/hello_clobbers.c diff --git a/userland/arch/aarch64/c/freestanding/linux/test b/userland/arch/aarch64/inline_asm/freestanding/linux/test similarity index 100% rename from userland/arch/aarch64/c/freestanding/linux/test rename to userland/arch/aarch64/inline_asm/freestanding/linux/test diff --git a/userland/arch/aarch64/c/freestanding/test b/userland/arch/aarch64/inline_asm/freestanding/test similarity index 100% rename from userland/arch/aarch64/c/freestanding/test rename to userland/arch/aarch64/inline_asm/freestanding/test diff --git a/userland/arch/aarch64/c/inc.c b/userland/arch/aarch64/inline_asm/inc.c similarity index 100% rename from userland/arch/aarch64/c/inc.c rename to userland/arch/aarch64/inline_asm/inc.c diff --git a/userland/arch/aarch64/c/inc_float.c b/userland/arch/aarch64/inline_asm/inc_float.c similarity index 100% rename from userland/arch/aarch64/c/inc_float.c rename to userland/arch/aarch64/inline_asm/inc_float.c diff --git a/userland/arch/aarch64/c/linux/asm_from_c.c b/userland/arch/aarch64/inline_asm/linux/asm_from_c.c similarity index 100% rename from userland/arch/aarch64/c/linux/asm_from_c.c rename to userland/arch/aarch64/inline_asm/linux/asm_from_c.c diff --git a/userland/arch/aarch64/c/linux/build b/userland/arch/aarch64/inline_asm/linux/build similarity index 100% rename from userland/arch/aarch64/c/linux/build rename to userland/arch/aarch64/inline_asm/linux/build diff --git a/userland/arch/aarch64/c/linux/test b/userland/arch/aarch64/inline_asm/linux/test similarity index 100% rename from userland/arch/aarch64/c/linux/test rename to userland/arch/aarch64/inline_asm/linux/test diff --git a/userland/arch/aarch64/c/multiline.cpp b/userland/arch/aarch64/inline_asm/multiline.cpp similarity index 100% rename from userland/arch/aarch64/c/multiline.cpp rename to userland/arch/aarch64/inline_asm/multiline.cpp diff --git a/userland/arch/aarch64/c/reg_var.c b/userland/arch/aarch64/inline_asm/reg_var.c similarity index 100% rename from userland/arch/aarch64/c/reg_var.c rename to userland/arch/aarch64/inline_asm/reg_var.c diff --git a/userland/arch/aarch64/c/reg_var_float.c b/userland/arch/aarch64/inline_asm/reg_var_float.c similarity index 100% rename from userland/arch/aarch64/c/reg_var_float.c rename to userland/arch/aarch64/inline_asm/reg_var_float.c diff --git a/userland/arch/aarch64/c/test b/userland/arch/aarch64/inline_asm/test similarity index 100% rename from userland/arch/aarch64/c/test rename to userland/arch/aarch64/inline_asm/test diff --git a/userland/arch/arm/c/add.c b/userland/arch/arm/inline_asm/add.c similarity index 100% rename from userland/arch/arm/c/add.c rename to userland/arch/arm/inline_asm/add.c diff --git a/userland/arch/arm/c/build b/userland/arch/arm/inline_asm/build similarity index 100% rename from userland/arch/arm/c/build rename to userland/arch/arm/inline_asm/build diff --git a/userland/arch/arm/c/freestanding/build b/userland/arch/arm/inline_asm/freestanding/build similarity index 100% rename from userland/arch/arm/c/freestanding/build rename to userland/arch/arm/inline_asm/freestanding/build diff --git a/userland/arch/arm/c/freestanding/linux/build b/userland/arch/arm/inline_asm/freestanding/linux/build similarity index 100% rename from userland/arch/arm/c/freestanding/linux/build rename to userland/arch/arm/inline_asm/freestanding/linux/build diff --git a/userland/arch/arm/c/freestanding/linux/hello.c b/userland/arch/arm/inline_asm/freestanding/linux/hello.c similarity index 100% rename from userland/arch/arm/c/freestanding/linux/hello.c rename to userland/arch/arm/inline_asm/freestanding/linux/hello.c diff --git a/userland/arch/arm/c/freestanding/linux/test b/userland/arch/arm/inline_asm/freestanding/linux/test similarity index 100% rename from userland/arch/arm/c/freestanding/linux/test rename to userland/arch/arm/inline_asm/freestanding/linux/test diff --git a/userland/arch/arm/c/freestanding/test b/userland/arch/arm/inline_asm/freestanding/test similarity index 100% rename from userland/arch/arm/c/freestanding/test rename to userland/arch/arm/inline_asm/freestanding/test diff --git a/userland/arch/arm/c/inc.c b/userland/arch/arm/inline_asm/inc.c similarity index 100% rename from userland/arch/arm/c/inc.c rename to userland/arch/arm/inline_asm/inc.c diff --git a/userland/arch/arm/c/inc_float.c b/userland/arch/arm/inline_asm/inc_float.c similarity index 100% rename from userland/arch/arm/c/inc_float.c rename to userland/arch/arm/inline_asm/inc_float.c diff --git a/userland/arch/arm/c/inc_memory.c b/userland/arch/arm/inline_asm/inc_memory.c similarity index 100% rename from userland/arch/arm/c/inc_memory.c rename to userland/arch/arm/inline_asm/inc_memory.c diff --git a/userland/arch/arm/c/inc_memory_global.c b/userland/arch/arm/inline_asm/inc_memory_global.c similarity index 100% rename from userland/arch/arm/c/inc_memory_global.c rename to userland/arch/arm/inline_asm/inc_memory_global.c diff --git a/userland/arch/arm/c/reg_var.c b/userland/arch/arm/inline_asm/reg_var.c similarity index 100% rename from userland/arch/arm/c/reg_var.c rename to userland/arch/arm/inline_asm/reg_var.c diff --git a/userland/arch/arm/c/test b/userland/arch/arm/inline_asm/test similarity index 100% rename from userland/arch/arm/c/test rename to userland/arch/arm/inline_asm/test diff --git a/userland/arch/x86_64/addpd.S b/userland/arch/x86_64/addpd.S index e217680..9822248 100644 --- a/userland/arch/x86_64/addpd.S +++ b/userland/arch/x86_64/addpd.S @@ -1,6 +1,6 @@ /* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-addpq-instruction * - * Add a bunch of floating point numbers in one go. + * Add a few floating point numbers in one go (P == packaged) */ #include diff --git a/userland/arch/x86_64/c/binutils_hack.c b/userland/arch/x86_64/binutils_hack.c similarity index 100% rename from userland/arch/x86_64/c/binutils_hack.c rename to userland/arch/x86_64/binutils_hack.c diff --git a/userland/arch/x86_64/c/binutils_nohack.c b/userland/arch/x86_64/binutils_nohack.c similarity index 100% rename from userland/arch/x86_64/c/binutils_nohack.c rename to userland/arch/x86_64/binutils_nohack.c diff --git a/userland/arch/x86_64/c/add.c b/userland/arch/x86_64/inline_asm/add.c similarity index 100% rename from userland/arch/x86_64/c/add.c rename to userland/arch/x86_64/inline_asm/add.c diff --git a/userland/arch/x86_64/c/build b/userland/arch/x86_64/inline_asm/build similarity index 100% rename from userland/arch/x86_64/c/build rename to userland/arch/x86_64/inline_asm/build diff --git a/userland/arch/x86_64/c/freestanding/build b/userland/arch/x86_64/inline_asm/freestanding/build similarity index 100% rename from userland/arch/x86_64/c/freestanding/build rename to userland/arch/x86_64/inline_asm/freestanding/build diff --git a/userland/arch/x86_64/c/freestanding/linux/build b/userland/arch/x86_64/inline_asm/freestanding/linux/build similarity index 100% rename from userland/arch/x86_64/c/freestanding/linux/build rename to userland/arch/x86_64/inline_asm/freestanding/linux/build diff --git a/userland/arch/x86_64/c/freestanding/linux/hello.c b/userland/arch/x86_64/inline_asm/freestanding/linux/hello.c similarity index 100% rename from userland/arch/x86_64/c/freestanding/linux/hello.c rename to userland/arch/x86_64/inline_asm/freestanding/linux/hello.c diff --git a/userland/arch/x86_64/c/freestanding/linux/hello_regvar.c b/userland/arch/x86_64/inline_asm/freestanding/linux/hello_regvar.c similarity index 100% rename from userland/arch/x86_64/c/freestanding/linux/hello_regvar.c rename to userland/arch/x86_64/inline_asm/freestanding/linux/hello_regvar.c diff --git a/userland/arch/x86_64/c/freestanding/linux/test b/userland/arch/x86_64/inline_asm/freestanding/linux/test similarity index 100% rename from userland/arch/x86_64/c/freestanding/linux/test rename to userland/arch/x86_64/inline_asm/freestanding/linux/test diff --git a/userland/arch/x86_64/c/freestanding/test b/userland/arch/x86_64/inline_asm/freestanding/test similarity index 100% rename from userland/arch/x86_64/c/freestanding/test rename to userland/arch/x86_64/inline_asm/freestanding/test diff --git a/userland/arch/x86_64/c/inc.c b/userland/arch/x86_64/inline_asm/inc.c similarity index 100% rename from userland/arch/x86_64/c/inc.c rename to userland/arch/x86_64/inline_asm/inc.c diff --git a/userland/arch/x86_64/c/scratch.c b/userland/arch/x86_64/inline_asm/scratch.c similarity index 100% rename from userland/arch/x86_64/c/scratch.c rename to userland/arch/x86_64/inline_asm/scratch.c diff --git a/userland/arch/x86_64/c/scratch_hardcode.c b/userland/arch/x86_64/inline_asm/scratch_hardcode.c similarity index 100% rename from userland/arch/x86_64/c/scratch_hardcode.c rename to userland/arch/x86_64/inline_asm/scratch_hardcode.c diff --git a/userland/arch/x86_64/c/test b/userland/arch/x86_64/inline_asm/test similarity index 100% rename from userland/arch/x86_64/c/test rename to userland/arch/x86_64/inline_asm/test diff --git a/userland/arch/x86_64/intrinsics/README.adoc b/userland/arch/x86_64/intrinsics/README.adoc new file mode 100644 index 0000000..0451faa --- /dev/null +++ b/userland/arch/x86_64/intrinsics/README.adoc @@ -0,0 +1 @@ +https://github.com/cirosantilli/linux-kernel-module-cheat#gcc-intrinsics diff --git a/userland/arch/x86_64/intrinsics/addpd.c b/userland/arch/x86_64/intrinsics/addpd.c new file mode 100644 index 0000000..e22c035 --- /dev/null +++ b/userland/arch/x86_64/intrinsics/addpd.c @@ -0,0 +1,46 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#gcc-intrinsics */ + +#include + +#include + +int main(void) { + + /* 32-bit add (addps). */ + { + __m128 input0 = _mm_set_ps(1.5f, 2.5f, 3.5f, 4.5f); + __m128 input1 = _mm_set_ps(5.5f, 6.5f, 7.5f, 8.5f); + __m128 output = _mm_add_ps(input0, input1); + /* _mm_extract_ps returns int instead of float: + * * https://stackoverflow.com/questions/5526658/intel-sse-why-does-mm-extract-ps-return-int-instead-of-float + * * https://stackoverflow.com/questions/3130169/how-to-convert-a-hex-float-to-a-float-in-c-c-using-mm-extract-ps-sse-gcc-inst + * so we must use instead: _MM_EXTRACT_FLOAT + */ + float f; + _MM_EXTRACT_FLOAT(f, output, 3); + assert(f == 7.0f); + _MM_EXTRACT_FLOAT(f, output, 2); + assert(f == 9.0f); + _MM_EXTRACT_FLOAT(f, output, 1); + assert(f == 11.0f); + _MM_EXTRACT_FLOAT(f, output, 0); + assert(f == 13.0f); + } + + /* 64-bit add (addpd). */ + { + __m128d input0 = _mm_set_pd(1.5, 2.5); + __m128d input1 = _mm_set_pd(5.5, 6.5); + __m128d output = _mm_add_pd(input0, input1); + double d; + /* TODO: there is no _MM_EXTRACT_DOUBLE, and the asserts below fail. */ +#if 0 + _MM_EXTRACT_FLOAT(d, output, 1); + assert(d == 7.0); + _MM_EXTRACT_FLOAT(d, output, 0); + assert(d == 9.0); +#endif + } + + return 0; +} diff --git a/userland/arch/x86_64/intrinsics/build b/userland/arch/x86_64/intrinsics/build new file mode 120000 index 0000000..ab18017 --- /dev/null +++ b/userland/arch/x86_64/intrinsics/build @@ -0,0 +1 @@ +../build \ No newline at end of file diff --git a/userland/arch/x86_64/intrinsics/paddq.c b/userland/arch/x86_64/intrinsics/paddq.c new file mode 100644 index 0000000..2e86203 --- /dev/null +++ b/userland/arch/x86_64/intrinsics/paddq.c @@ -0,0 +1,56 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#gcc-intrinsics */ + +#include + +#include + +int main(void) { + + /* 32-bit add hello world. */ + { + __m128i input0 = _mm_set_epi32(1, 2, 3, 4); + __m128i input1 = _mm_set_epi32(5, 6, 7, 8); + __m128i output = _mm_add_epi32(input0, input1); + assert(_mm_extract_epi32(output, 3) == 6); + assert(_mm_extract_epi32(output, 2) == 8); + assert(_mm_extract_epi32(output, 1) == 10); + assert(_mm_extract_epi32(output, 0) == 12); + } + + /* Now a bunch of other sizes. */ + { + __m128i input0 = _mm_set_epi32(0xF1F1F1F1, 0xF2F2F2F2, 0xF3F3F3F3, 0xF4F4F4F4); + __m128i input1 = _mm_set_epi32(0x12121212, 0x13131313, 0x14141414, 0x15151515); + __m128i output; + + /* 8-bit integers (paddb) */ + output = _mm_add_epi8(input0, input1); + assert(_mm_extract_epi32(output, 3) == 0x03030303); + assert(_mm_extract_epi32(output, 2) == 0x05050505); + assert(_mm_extract_epi32(output, 1) == 0x07070707); + assert(_mm_extract_epi32(output, 0) == 0x09090909); + + /* 32-bit integers (paddw) */ + output = _mm_add_epi16(input0, input1); + assert(_mm_extract_epi32(output, 3) == 0x04030403); + assert(_mm_extract_epi32(output, 2) == 0x06050605); + assert(_mm_extract_epi32(output, 1) == 0x08070807); + assert(_mm_extract_epi32(output, 0) == 0x0A090A09); + + /* 32-bit integers (paddd) */ + output = _mm_add_epi32(input0, input1); + assert(_mm_extract_epi32(output, 3) == 0x04040403); + assert(_mm_extract_epi32(output, 2) == 0x06060605); + assert(_mm_extract_epi32(output, 1) == 0x08080807); + assert(_mm_extract_epi32(output, 0) == 0x0A0A0A09); + + /* 64-bit integers (paddq) */ + output = _mm_add_epi64(input0, input1); + assert(_mm_extract_epi32(output, 3) == 0x04040404); + assert(_mm_extract_epi32(output, 2) == 0x06060605); + assert(_mm_extract_epi32(output, 1) == 0x08080808); + assert(_mm_extract_epi32(output, 0) == 0x0A0A0A09); + } + + return 0; +} diff --git a/userland/arch/x86_64/intrinsics/test b/userland/arch/x86_64/intrinsics/test new file mode 120000 index 0000000..419df4f --- /dev/null +++ b/userland/arch/x86_64/intrinsics/test @@ -0,0 +1 @@ +../test \ No newline at end of file diff --git a/userland/arch/x86_64/paddq.S b/userland/arch/x86_64/paddq.S index f89cff8..50c8ef3 100644 --- a/userland/arch/x86_64/paddq.S +++ b/userland/arch/x86_64/paddq.S @@ -1,8 +1,9 @@ /* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-paddq-instruction * - * Add a bunch of integers in one go. + * Add several integers in one go. * - * The different variants basically determine if carries get forwarded or not. + * The different variants basically determine integer size, which basically + * determines if carries get forwarded or not. */ #include diff --git a/userland/arch/x86_64/c/rdtsc.c b/userland/arch/x86_64/rdtsc.c similarity index 100% rename from userland/arch/x86_64/c/rdtsc.c rename to userland/arch/x86_64/rdtsc.c diff --git a/userland/arch/x86_64/c/ring0.c b/userland/arch/x86_64/ring0.c similarity index 100% rename from userland/arch/x86_64/c/ring0.c rename to userland/arch/x86_64/ring0.c