mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-23 02:05:57 +01:00
x86 asm: move the rest of SIMD from x86-assembly-cheat
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-addpd-instruction
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-sse-packed-arithmetic-instructions
|
||||
*
|
||||
* Add a few floating point numbers in one go (P == packaged)
|
||||
* Add a few floating point numbers in one go (P == packaged).
|
||||
*/
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
20
userland/arch/x86_64/cvttss2si.S
Normal file
20
userland/arch/x86_64/cvttss2si.S
Normal file
@@ -0,0 +1,20 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-sse-packed-arithmetic-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
LKMC_PROLOGUE
|
||||
.data
|
||||
.align 16
|
||||
input_2_5: .float 2.5
|
||||
input_minus_2_5: .float -2.5
|
||||
.text
|
||||
/* Positive input. */
|
||||
movss input_2_5, %xmm0
|
||||
cvttss2si %xmm0, %eax
|
||||
LKMC_ASSERT_EQ_32(%eax, $2)
|
||||
|
||||
/* Negative input. */
|
||||
movss input_minus_2_5, %xmm0
|
||||
cvttss2si %xmm0, %eax
|
||||
LKMC_ASSERT_EQ_32(%eax, $-2)
|
||||
LKMC_EPILOGUE
|
||||
18
userland/arch/x86_64/movaps.S
Normal file
18
userland/arch/x86_64/movaps.S
Normal file
@@ -0,0 +1,18 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-sse-packed-arithmetic-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
LKMC_PROLOGUE
|
||||
.data
|
||||
/* Ensure that the memory is 16-byte aligned. */
|
||||
.align 16
|
||||
input: .float 1.5, 2.5, 3.5, 4.5
|
||||
.bss
|
||||
.align 16
|
||||
output: .skip 16
|
||||
.text
|
||||
movaps input, %xmm0
|
||||
movaps %xmm0, %xmm1
|
||||
movaps %xmm1, output
|
||||
LKMC_ASSERT_MEMCMP(input, output, $16)
|
||||
LKMC_EPILOGUE
|
||||
14
userland/arch/x86_64/movss.S
Normal file
14
userland/arch/x86_64/movss.S
Normal file
@@ -0,0 +1,14 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-sse-data-transfer-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
input: .float 1.5
|
||||
.bss
|
||||
output: .skip 4
|
||||
LKMC_PROLOGUE
|
||||
movss input, %xmm0
|
||||
movss %xmm0, %xmm1
|
||||
movss %xmm1, output
|
||||
LKMC_ASSERT_MEMCMP(input, output, $4)
|
||||
LKMC_EPILOGUE
|
||||
16
userland/arch/x86_64/movups.S
Normal file
16
userland/arch/x86_64/movups.S
Normal file
@@ -0,0 +1,16 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-sse-packed-arithmetic-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
LKMC_PROLOGUE
|
||||
.data
|
||||
/* Unlike MOVAPS, we don't need to align memory here. */
|
||||
input: .float 1.5, 2.5, 3.5, 4.5
|
||||
.bss
|
||||
output: .skip 16
|
||||
.text
|
||||
movups input, %xmm0
|
||||
movups %xmm0, %xmm1
|
||||
movups %xmm1, output
|
||||
LKMC_ASSERT_MEMCMP(input, output, $16)
|
||||
LKMC_EPILOGUE
|
||||
23
userland/arch/x86_64/vfmadd132pd.S
Normal file
23
userland/arch/x86_64/vfmadd132pd.S
Normal file
@@ -0,0 +1,23 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-fma */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
LKMC_PROLOGUE
|
||||
.data
|
||||
.align 16
|
||||
input0: .double 1.5, 2.5
|
||||
input1: .double 2.0, 4.0
|
||||
input2: .double 2.5, 3.5
|
||||
expect: .double 6.5, 16.5
|
||||
.bss
|
||||
.align 16
|
||||
output: .skip 16
|
||||
.text
|
||||
movaps input1, %xmm0
|
||||
movaps input0, %xmm1
|
||||
movaps input2, %xmm2
|
||||
/* xmm2 = xmm1 + (xmm0 * xmm2) */
|
||||
vfmadd132pd %xmm0, %xmm1, %xmm2
|
||||
movaps %xmm2, output
|
||||
LKMC_ASSERT_MEMCMP(output, expect, $0x10)
|
||||
LKMC_EPILOGUE
|
||||
Reference in New Issue
Block a user