x86 asm: move the rest of SIMD from x86-assembly-cheat

2026-01-23 02:05:57 +01:00 · 2019-06-23 00:00:02 +00:00
parent dcd866260c
commit d62070d934
7 changed files with 154 additions and 8 deletions
--- a/userland/arch/x86_64/addpd.S
+++ b/userland/arch/x86_64/addpd.S
@@ -1,6 +1,6 @@
-/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-addpd-instruction
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-sse-packed-arithmetic-instructions
 *
- * Add a few floating point numbers in one go (P == packaged)
+ * Add a few floating point numbers in one go (P == packaged).
 */

 #include <lkmc.h>
--- a/userland/arch/x86_64/cvttss2si.S
+++ b/userland/arch/x86_64/cvttss2si.S
@@ -0,0 +1,20 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-sse-packed-arithmetic-instructions */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+.data
+    .align 16
+    input_2_5: .float 2.5
+    input_minus_2_5: .float -2.5
+.text
+    /* Positive input. */
+    movss input_2_5, %xmm0
+    cvttss2si %xmm0, %eax
+    LKMC_ASSERT_EQ_32(%eax, $2)
+
+    /* Negative input. */
+    movss input_minus_2_5, %xmm0
+    cvttss2si %xmm0, %eax
+    LKMC_ASSERT_EQ_32(%eax, $-2)
+LKMC_EPILOGUE
--- a/userland/arch/x86_64/movaps.S
+++ b/userland/arch/x86_64/movaps.S
@@ -0,0 +1,18 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-sse-packed-arithmetic-instructions */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+.data
+    /* Ensure that the memory is 16-byte aligned. */
+    .align 16
+    input: .float 1.5, 2.5, 3.5, 4.5
+.bss
+    .align 16
+    output: .skip 16
+.text
+    movaps input, %xmm0
+    movaps %xmm0, %xmm1
+    movaps %xmm1, output
+    LKMC_ASSERT_MEMCMP(input, output, $16)
+LKMC_EPILOGUE
--- a/userland/arch/x86_64/movss.S
+++ b/userland/arch/x86_64/movss.S
@@ -0,0 +1,14 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-sse-data-transfer-instructions */
+
+#include <lkmc.h>
+
+.data
+    input:  .float 1.5
+.bss
+    output: .skip 4
+LKMC_PROLOGUE
+    movss input, %xmm0
+    movss %xmm0, %xmm1
+    movss %xmm1, output
+    LKMC_ASSERT_MEMCMP(input, output, $4)
+LKMC_EPILOGUE
--- a/userland/arch/x86_64/movups.S
+++ b/userland/arch/x86_64/movups.S
@@ -0,0 +1,16 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-sse-packed-arithmetic-instructions */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+.data
+    /* Unlike MOVAPS, we don't need to align memory here. */
+    input: .float 1.5, 2.5, 3.5, 4.5
+.bss
+    output: .skip 16
+.text
+    movups input, %xmm0
+    movups %xmm0, %xmm1
+    movups %xmm1, output
+    LKMC_ASSERT_MEMCMP(input, output, $16)
+LKMC_EPILOGUE
--- a/userland/arch/x86_64/vfmadd132pd.S
+++ b/userland/arch/x86_64/vfmadd132pd.S
@@ -0,0 +1,23 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-fma */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+.data
+    .align 16
+    input0: .double 1.5,  2.5
+    input1: .double 2.0,  4.0
+    input2: .double 2.5,  3.5
+    expect: .double 6.5, 16.5
+.bss
+    .align 16
+    output: .skip 16
+.text
+    movaps input1, %xmm0
+    movaps input0, %xmm1
+    movaps input2, %xmm2
+    /* xmm2 = xmm1 + (xmm0 * xmm2) */
+    vfmadd132pd %xmm0, %xmm1, %xmm2
+    movaps %xmm2, output
+    LKMC_ASSERT_MEMCMP(output, expect, $0x10)
+LKMC_EPILOGUE