x86 assembly: addpd

2026-01-25 19:21:35 +01:00 · 2019-05-12 00:00:01 +00:00
parent fe9c31f737
commit 7cf3c20a40
4 changed files with 47 additions and 7 deletions
--- a/README.adoc
+++ b/README.adoc
@@ -11578,7 +11578,12 @@ Sources:
 * link:userland/arch/arm/add.S[]
 * link:userland/arch/aarch64/add.S[]

-This verifies that the venerable `add` instruction and our setup are working.
+These examples use the venerable ADD instruction to:
+
+* introduce the basics of how a given assembly works: how many inputs / outputs, who is input and output, can it use memory or just registers, etc.
+
+It is then a big copy paste for most other data instructions.
+* verify that the venerable `add` instruction and our assertions are working

 Then, modify that program to make the assertion fail:

@@ -11849,7 +11854,8 @@ History:
 * link:https://en.wikipedia.org/wiki/MMX_(instruction_set)[MMX]: 1997
 * link:https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions[SSE]: Streaming SIMD Extensions. 1999. 128-bit XMM registers.
 * link:https://en.wikipedia.org/wiki/SSE2[SSE2]: 2004
-** link:userland/arch/x86_64/paddq.S[]
+** link:userland/arch/x86_64/addpd.S[]: `ADDPS`, `ADDPD`
+** link:userland/arch/x86_64/paddq.S[]: `PADDQ`, `PADDL`, `PADDW`, `PADDB`
 * link:https://en.wikipedia.org/wiki/SSE3[SSE3]: 2006
 * link:https://en.wikipedia.org/wiki/SSE4[SSE4]: 2006
 * link:https://en.wikipedia.org/wiki/Advanced_Vector_Extensions[AVX]: Advanced Vector Extensions. 2011. 256-bit YMM registers. Extension of XMM.
--- a/userland/arch/aarch64/simd.S
+++ b/userland/arch/aarch64/simd.S
@@ -35,7 +35,7 @@ ENTRY
    u64_1:          .quad 0x1555555515555555, 0x1666666616666666
    u64_sum_expect: .quad 0x0666666706666666, 0x0888888908888888
 .bss
-    u64_sum: .skip 16
+    u64_sum:        .skip 16
 .text
    adr x0, u64_0
    ld1 {v0.2d}, [x0]
@@ -56,7 +56,7 @@ ENTRY
    f32_1:          .float 5.5, 6.5,  7.5,  8.5
    f32_sum_expect: .float 7.0, 9.0, 11.0, 13.0
 .bss
-    f32_sum: .skip 16
+    f32_sum:        .skip 16
 .text
    adr x0, f32_0
    ld1 {v0.4s}, [x0]
--- a/userland/arch/x86_64/addpd.S
+++ b/userland/arch/x86_64/addpd.S
@@ -0,0 +1,32 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-simd
+ *
+ * Add a bunch of floating point numbers in one go.
+ */
+
+#include "common.h"
+
+ENTRY
+.bss
+    output: .skip 16
+
+#define t
+
+.data
+    addps_input0: .float 1.5, 2.5,  3.5,  4.5
+    addps_input1: .float 5.5, 6.5,  7.5,  8.5
+    addps_expect: .float 7.0, 9.0, 11.0, 13.0
+    addpd_input0: .double 1.5, 2.5
+    addpd_input1: .double 5.5, 6.5
+    addpd_expect: .double 7.0, 9.0
+.text
+#define TEST(size) \
+    movups addp ## size ## _input0, %xmm0; \
+    movups addp ## size ## _input1, %xmm1; \
+    addp ## size %xmm1, %xmm0; \
+    movups %xmm0, output; \
+    ASSERT_MEMCMP(output, addp ## size ## _expect, $0x10)
+
+    TEST(s)
+    TEST(d)
+#undef TEST
+EXIT
--- a/userland/arch/x86_64/paddq.S
+++ b/userland/arch/x86_64/paddq.S
@@ -1,6 +1,8 @@
 /* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-simd
 *
 * Add a bunch of integers in one go.
+ *
+ * The different variants basically determine if carries get forwarded or not.
 */

 #include "common.h"
@@ -14,14 +16,14 @@ ENTRY
    paddd_expect: .long 0x04040403, 0x06060605, 0x08080807, 0x0A0A0A09
    paddq_expect: .long 0x04040403, 0x06060606, 0x08080807, 0x0A0A0A0A
 .bss
-    result: .skip 16
+    output:       .skip 16
 .text
    movups input1, %xmm1
 #define TEST(size) \
    movups input0, %xmm0; \
    padd ## size %xmm1, %xmm0; \
-    movups %xmm0, result; \
-    ASSERT_MEMCMP(result, padd ## size ## _expect, $0x10)
+    movups %xmm0, output; \
+    ASSERT_MEMCMP(output, padd ## size ## _expect, $0x10)

    TEST(b)
    TEST(w)