mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-25 19:21:35 +01:00
x86 assembly: addpd
This commit is contained in:
10
README.adoc
10
README.adoc
@@ -11578,7 +11578,12 @@ Sources:
|
||||
* link:userland/arch/arm/add.S[]
|
||||
* link:userland/arch/aarch64/add.S[]
|
||||
|
||||
This verifies that the venerable `add` instruction and our setup are working.
|
||||
These examples use the venerable ADD instruction to:
|
||||
|
||||
* introduce the basics of how a given assembly works: how many inputs / outputs, who is input and output, can it use memory or just registers, etc.
|
||||
+
|
||||
It is then a big copy paste for most other data instructions.
|
||||
* verify that the venerable `add` instruction and our assertions are working
|
||||
|
||||
Then, modify that program to make the assertion fail:
|
||||
|
||||
@@ -11849,7 +11854,8 @@ History:
|
||||
* link:https://en.wikipedia.org/wiki/MMX_(instruction_set)[MMX]: 1997
|
||||
* link:https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions[SSE]: Streaming SIMD Extensions. 1999. 128-bit XMM registers.
|
||||
* link:https://en.wikipedia.org/wiki/SSE2[SSE2]: 2004
|
||||
** link:userland/arch/x86_64/paddq.S[]
|
||||
** link:userland/arch/x86_64/addpd.S[]: `ADDPS`, `ADDPD`
|
||||
** link:userland/arch/x86_64/paddq.S[]: `PADDQ`, `PADDL`, `PADDW`, `PADDB`
|
||||
* link:https://en.wikipedia.org/wiki/SSE3[SSE3]: 2006
|
||||
* link:https://en.wikipedia.org/wiki/SSE4[SSE4]: 2006
|
||||
* link:https://en.wikipedia.org/wiki/Advanced_Vector_Extensions[AVX]: Advanced Vector Extensions. 2011. 256-bit YMM registers. Extension of XMM.
|
||||
|
||||
@@ -35,7 +35,7 @@ ENTRY
|
||||
u64_1: .quad 0x1555555515555555, 0x1666666616666666
|
||||
u64_sum_expect: .quad 0x0666666706666666, 0x0888888908888888
|
||||
.bss
|
||||
u64_sum: .skip 16
|
||||
u64_sum: .skip 16
|
||||
.text
|
||||
adr x0, u64_0
|
||||
ld1 {v0.2d}, [x0]
|
||||
@@ -56,7 +56,7 @@ ENTRY
|
||||
f32_1: .float 5.5, 6.5, 7.5, 8.5
|
||||
f32_sum_expect: .float 7.0, 9.0, 11.0, 13.0
|
||||
.bss
|
||||
f32_sum: .skip 16
|
||||
f32_sum: .skip 16
|
||||
.text
|
||||
adr x0, f32_0
|
||||
ld1 {v0.4s}, [x0]
|
||||
|
||||
32
userland/arch/x86_64/addpd.S
Normal file
32
userland/arch/x86_64/addpd.S
Normal file
@@ -0,0 +1,32 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-simd
|
||||
*
|
||||
* Add a bunch of floating point numbers in one go.
|
||||
*/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
.bss
|
||||
output: .skip 16
|
||||
|
||||
#define t
|
||||
|
||||
.data
|
||||
addps_input0: .float 1.5, 2.5, 3.5, 4.5
|
||||
addps_input1: .float 5.5, 6.5, 7.5, 8.5
|
||||
addps_expect: .float 7.0, 9.0, 11.0, 13.0
|
||||
addpd_input0: .double 1.5, 2.5
|
||||
addpd_input1: .double 5.5, 6.5
|
||||
addpd_expect: .double 7.0, 9.0
|
||||
.text
|
||||
#define TEST(size) \
|
||||
movups addp ## size ## _input0, %xmm0; \
|
||||
movups addp ## size ## _input1, %xmm1; \
|
||||
addp ## size %xmm1, %xmm0; \
|
||||
movups %xmm0, output; \
|
||||
ASSERT_MEMCMP(output, addp ## size ## _expect, $0x10)
|
||||
|
||||
TEST(s)
|
||||
TEST(d)
|
||||
#undef TEST
|
||||
EXIT
|
||||
@@ -1,6 +1,8 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-simd
|
||||
*
|
||||
* Add a bunch of integers in one go.
|
||||
*
|
||||
* The different variants basically determine if carries get forwarded or not.
|
||||
*/
|
||||
|
||||
#include "common.h"
|
||||
@@ -14,14 +16,14 @@ ENTRY
|
||||
paddd_expect: .long 0x04040403, 0x06060605, 0x08080807, 0x0A0A0A09
|
||||
paddq_expect: .long 0x04040403, 0x06060606, 0x08080807, 0x0A0A0A0A
|
||||
.bss
|
||||
result: .skip 16
|
||||
output: .skip 16
|
||||
.text
|
||||
movups input1, %xmm1
|
||||
#define TEST(size) \
|
||||
movups input0, %xmm0; \
|
||||
padd ## size %xmm1, %xmm0; \
|
||||
movups %xmm0, result; \
|
||||
ASSERT_MEMCMP(result, padd ## size ## _expect, $0x10)
|
||||
movups %xmm0, output; \
|
||||
ASSERT_MEMCMP(output, padd ## size ## _expect, $0x10)
|
||||
|
||||
TEST(b)
|
||||
TEST(w)
|
||||
|
||||
Reference in New Issue
Block a user