mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-28 04:24:26 +01:00
x86 assembly: addpd
This commit is contained in:
10
README.adoc
10
README.adoc
@@ -11578,7 +11578,12 @@ Sources:
|
|||||||
* link:userland/arch/arm/add.S[]
|
* link:userland/arch/arm/add.S[]
|
||||||
* link:userland/arch/aarch64/add.S[]
|
* link:userland/arch/aarch64/add.S[]
|
||||||
|
|
||||||
This verifies that the venerable `add` instruction and our setup are working.
|
These examples use the venerable ADD instruction to:
|
||||||
|
|
||||||
|
* introduce the basics of how a given assembly works: how many inputs / outputs, who is input and output, can it use memory or just registers, etc.
|
||||||
|
+
|
||||||
|
It is then a big copy paste for most other data instructions.
|
||||||
|
* verify that the venerable `add` instruction and our assertions are working
|
||||||
|
|
||||||
Then, modify that program to make the assertion fail:
|
Then, modify that program to make the assertion fail:
|
||||||
|
|
||||||
@@ -11849,7 +11854,8 @@ History:
|
|||||||
* link:https://en.wikipedia.org/wiki/MMX_(instruction_set)[MMX]: 1997
|
* link:https://en.wikipedia.org/wiki/MMX_(instruction_set)[MMX]: 1997
|
||||||
* link:https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions[SSE]: Streaming SIMD Extensions. 1999. 128-bit XMM registers.
|
* link:https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions[SSE]: Streaming SIMD Extensions. 1999. 128-bit XMM registers.
|
||||||
* link:https://en.wikipedia.org/wiki/SSE2[SSE2]: 2004
|
* link:https://en.wikipedia.org/wiki/SSE2[SSE2]: 2004
|
||||||
** link:userland/arch/x86_64/paddq.S[]
|
** link:userland/arch/x86_64/addpd.S[]: `ADDPS`, `ADDPD`
|
||||||
|
** link:userland/arch/x86_64/paddq.S[]: `PADDQ`, `PADDL`, `PADDW`, `PADDB`
|
||||||
* link:https://en.wikipedia.org/wiki/SSE3[SSE3]: 2006
|
* link:https://en.wikipedia.org/wiki/SSE3[SSE3]: 2006
|
||||||
* link:https://en.wikipedia.org/wiki/SSE4[SSE4]: 2006
|
* link:https://en.wikipedia.org/wiki/SSE4[SSE4]: 2006
|
||||||
* link:https://en.wikipedia.org/wiki/Advanced_Vector_Extensions[AVX]: Advanced Vector Extensions. 2011. 256-bit YMM registers. Extension of XMM.
|
* link:https://en.wikipedia.org/wiki/Advanced_Vector_Extensions[AVX]: Advanced Vector Extensions. 2011. 256-bit YMM registers. Extension of XMM.
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ ENTRY
|
|||||||
u64_1: .quad 0x1555555515555555, 0x1666666616666666
|
u64_1: .quad 0x1555555515555555, 0x1666666616666666
|
||||||
u64_sum_expect: .quad 0x0666666706666666, 0x0888888908888888
|
u64_sum_expect: .quad 0x0666666706666666, 0x0888888908888888
|
||||||
.bss
|
.bss
|
||||||
u64_sum: .skip 16
|
u64_sum: .skip 16
|
||||||
.text
|
.text
|
||||||
adr x0, u64_0
|
adr x0, u64_0
|
||||||
ld1 {v0.2d}, [x0]
|
ld1 {v0.2d}, [x0]
|
||||||
@@ -56,7 +56,7 @@ ENTRY
|
|||||||
f32_1: .float 5.5, 6.5, 7.5, 8.5
|
f32_1: .float 5.5, 6.5, 7.5, 8.5
|
||||||
f32_sum_expect: .float 7.0, 9.0, 11.0, 13.0
|
f32_sum_expect: .float 7.0, 9.0, 11.0, 13.0
|
||||||
.bss
|
.bss
|
||||||
f32_sum: .skip 16
|
f32_sum: .skip 16
|
||||||
.text
|
.text
|
||||||
adr x0, f32_0
|
adr x0, f32_0
|
||||||
ld1 {v0.4s}, [x0]
|
ld1 {v0.4s}, [x0]
|
||||||
|
|||||||
32
userland/arch/x86_64/addpd.S
Normal file
32
userland/arch/x86_64/addpd.S
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-simd
|
||||||
|
*
|
||||||
|
* Add a bunch of floating point numbers in one go.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
ENTRY
|
||||||
|
.bss
|
||||||
|
output: .skip 16
|
||||||
|
|
||||||
|
#define t
|
||||||
|
|
||||||
|
.data
|
||||||
|
addps_input0: .float 1.5, 2.5, 3.5, 4.5
|
||||||
|
addps_input1: .float 5.5, 6.5, 7.5, 8.5
|
||||||
|
addps_expect: .float 7.0, 9.0, 11.0, 13.0
|
||||||
|
addpd_input0: .double 1.5, 2.5
|
||||||
|
addpd_input1: .double 5.5, 6.5
|
||||||
|
addpd_expect: .double 7.0, 9.0
|
||||||
|
.text
|
||||||
|
#define TEST(size) \
|
||||||
|
movups addp ## size ## _input0, %xmm0; \
|
||||||
|
movups addp ## size ## _input1, %xmm1; \
|
||||||
|
addp ## size %xmm1, %xmm0; \
|
||||||
|
movups %xmm0, output; \
|
||||||
|
ASSERT_MEMCMP(output, addp ## size ## _expect, $0x10)
|
||||||
|
|
||||||
|
TEST(s)
|
||||||
|
TEST(d)
|
||||||
|
#undef TEST
|
||||||
|
EXIT
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-simd
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-simd
|
||||||
*
|
*
|
||||||
* Add a bunch of integers in one go.
|
* Add a bunch of integers in one go.
|
||||||
|
*
|
||||||
|
* The different variants basically determine if carries get forwarded or not.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
@@ -14,14 +16,14 @@ ENTRY
|
|||||||
paddd_expect: .long 0x04040403, 0x06060605, 0x08080807, 0x0A0A0A09
|
paddd_expect: .long 0x04040403, 0x06060605, 0x08080807, 0x0A0A0A09
|
||||||
paddq_expect: .long 0x04040403, 0x06060606, 0x08080807, 0x0A0A0A0A
|
paddq_expect: .long 0x04040403, 0x06060606, 0x08080807, 0x0A0A0A0A
|
||||||
.bss
|
.bss
|
||||||
result: .skip 16
|
output: .skip 16
|
||||||
.text
|
.text
|
||||||
movups input1, %xmm1
|
movups input1, %xmm1
|
||||||
#define TEST(size) \
|
#define TEST(size) \
|
||||||
movups input0, %xmm0; \
|
movups input0, %xmm0; \
|
||||||
padd ## size %xmm1, %xmm0; \
|
padd ## size %xmm1, %xmm0; \
|
||||||
movups %xmm0, result; \
|
movups %xmm0, output; \
|
||||||
ASSERT_MEMCMP(result, padd ## size ## _expect, $0x10)
|
ASSERT_MEMCMP(output, padd ## size ## _expect, $0x10)
|
||||||
|
|
||||||
TEST(b)
|
TEST(b)
|
||||||
TEST(w)
|
TEST(w)
|
||||||
|
|||||||
Reference in New Issue
Block a user