mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-27 12:04:27 +01:00
x86 asm: intrinsics from memory
This commit is contained in:
@@ -11933,8 +11933,9 @@ where:
|
|||||||
* `<suffix>`: data type:
|
* `<suffix>`: data type:
|
||||||
** `ps`: 4 floats (Packed Single)
|
** `ps`: 4 floats (Packed Single)
|
||||||
** `pd`: 2 doubles (Packed Double)
|
** `pd`: 2 doubles (Packed Double)
|
||||||
** `ss`: 1 float (Single Single)
|
** `ss`: 1 float (Single Single), often the lowest order one
|
||||||
** `sd`: 1 double (Single Double)
|
** `sd`: 1 double (Single Double)
|
||||||
|
** `si128`: 128-bits of integers of any size
|
||||||
** `ep<int_type>` integer types, e.g.:
|
** `ep<int_type>` integer types, e.g.:
|
||||||
*** `epi32`: 32 bit signed integers
|
*** `epi32`: 32 bit signed integers
|
||||||
*** `epu16`: 16 bit unsigned integers
|
*** `epu16`: 16 bit unsigned integers
|
||||||
@@ -11967,7 +11968,7 @@ Present in `gcc-7_3_0-release` tree at: `gcc/config/i386/x86intrin.h`.
|
|||||||
Bibliography:
|
Bibliography:
|
||||||
|
|
||||||
* https://www.cs.virginia.edu/~cr4bd/3330/S2018/simdref.html
|
* https://www.cs.virginia.edu/~cr4bd/3330/S2018/simdref.html
|
||||||
* https://www.cs.virginia.edu/~cr4bd/3330/S2018/simdref.html
|
* https://software.intel.com/en-us/articles/how-to-use-intrinsics
|
||||||
|
|
||||||
==== GCC inline assembly register variables
|
==== GCC inline assembly register variables
|
||||||
|
|
||||||
|
|||||||
@@ -6,15 +6,17 @@
|
|||||||
#include <lkmc.h>
|
#include <lkmc.h>
|
||||||
|
|
||||||
LKMC_PROLOGUE
|
LKMC_PROLOGUE
|
||||||
.bss
|
|
||||||
output: .skip 16
|
|
||||||
.data
|
.data
|
||||||
|
.align 16
|
||||||
addps_input0: .float 1.5, 2.5, 3.5, 4.5
|
addps_input0: .float 1.5, 2.5, 3.5, 4.5
|
||||||
addps_input1: .float 5.5, 6.5, 7.5, 8.5
|
addps_input1: .float 5.5, 6.5, 7.5, 8.5
|
||||||
addps_expect: .float 7.0, 9.0, 11.0, 13.0
|
addps_expect: .float 7.0, 9.0, 11.0, 13.0
|
||||||
addpd_input0: .double 1.5, 2.5
|
addpd_input0: .double 1.5, 2.5
|
||||||
addpd_input1: .double 5.5, 6.5
|
addpd_input1: .double 5.5, 6.5
|
||||||
addpd_expect: .double 7.0, 9.0
|
addpd_expect: .double 7.0, 9.0
|
||||||
|
.bss
|
||||||
|
.align 16
|
||||||
|
output: .skip 16
|
||||||
.text
|
.text
|
||||||
#define TEST(size) \
|
#define TEST(size) \
|
||||||
movups addp ## size ## _input0, %xmm0; \
|
movups addp ## size ## _input0, %xmm0; \
|
||||||
|
|||||||
@@ -1,11 +1,16 @@
|
|||||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#gcc-intrinsics */
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#gcc-intrinsics */
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
|
|
||||||
int main(void) {
|
float global_input0[] __attribute__((aligned(16))) = {1.5f, 2.5f, 3.5f, 4.5f};
|
||||||
|
float global_input1[] __attribute__((aligned(16))) = {5.5f, 6.5f, 7.5f, 8.5f};
|
||||||
|
float global_output[4] __attribute__((aligned(16)));
|
||||||
|
float global_expected[] __attribute__((aligned(16))) = {7.0f, 9.0f, 11.0f, 13.0f};
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
/* 32-bit add (addps). */
|
/* 32-bit add (addps). */
|
||||||
{
|
{
|
||||||
__m128 input0 = _mm_set_ps(1.5f, 2.5f, 3.5f, 4.5f);
|
__m128 input0 = _mm_set_ps(1.5f, 2.5f, 3.5f, 4.5f);
|
||||||
@@ -33,6 +38,14 @@ int main(void) {
|
|||||||
assert(_mm_cvtss_f32(_mm_shuffle_ps(output, output, 3)) == 7.0f);
|
assert(_mm_cvtss_f32(_mm_shuffle_ps(output, output, 3)) == 7.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Now from memory. */
|
||||||
|
{
|
||||||
|
__m128 *input0 = (__m128 *)global_input0;
|
||||||
|
__m128 *input1 = (__m128 *)global_input1;
|
||||||
|
_mm_store_ps(global_output, _mm_add_ps(*input0, *input1));
|
||||||
|
assert(!memcmp(global_output, global_expected, sizeof(global_output)));
|
||||||
|
}
|
||||||
|
|
||||||
/* 64-bit add (addpd). */
|
/* 64-bit add (addpd). */
|
||||||
{
|
{
|
||||||
__m128d input0 = _mm_set_pd(1.5, 2.5);
|
__m128d input0 = _mm_set_pd(1.5, 2.5);
|
||||||
|
|||||||
@@ -1,9 +1,16 @@
|
|||||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#gcc-intrinsics */
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#gcc-intrinsics */
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
|
|
||||||
|
uint32_t global_input0[] __attribute__((aligned(16))) = {1, 2, 3, 4};
|
||||||
|
uint32_t global_input1[] __attribute__((aligned(16))) = {5, 6, 7, 8};
|
||||||
|
uint32_t global_output[4] __attribute__((aligned(16)));
|
||||||
|
uint32_t global_expected[] __attribute__((aligned(16))) = {6, 8, 10, 12};
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
|
|
||||||
/* 32-bit add hello world. */
|
/* 32-bit add hello world. */
|
||||||
@@ -19,6 +26,14 @@ int main(void) {
|
|||||||
assert(_mm_extract_epi32(output, 0) == 12);
|
assert(_mm_extract_epi32(output, 0) == 12);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Now from memory. */
|
||||||
|
{
|
||||||
|
__m128i *input0 = (__m128i *)global_input0;
|
||||||
|
__m128i *input1 = (__m128i *)global_input1;
|
||||||
|
_mm_store_si128((__m128i *)global_output, _mm_add_epi32(*input0, *input1));
|
||||||
|
assert(!memcmp(global_output, global_expected, sizeof(global_output)));
|
||||||
|
}
|
||||||
|
|
||||||
/* Now a bunch of other sizes. */
|
/* Now a bunch of other sizes. */
|
||||||
{
|
{
|
||||||
__m128i input0 = _mm_set_epi32(0xF1F1F1F1, 0xF2F2F2F2, 0xF3F3F3F3, 0xF4F4F4F4);
|
__m128i input0 = _mm_set_epi32(0xF1F1F1F1, 0xF2F2F2F2, 0xF3F3F3F3, 0xF4F4F4F4);
|
||||||
|
|||||||
@@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
LKMC_PROLOGUE
|
LKMC_PROLOGUE
|
||||||
.data
|
.data
|
||||||
|
.align 16
|
||||||
input0: .long 0xF1F1F1F1, 0xF2F2F2F2, 0xF3F3F3F3, 0xF4F4F4F4
|
input0: .long 0xF1F1F1F1, 0xF2F2F2F2, 0xF3F3F3F3, 0xF4F4F4F4
|
||||||
input1: .long 0x12121212, 0x13131313, 0x14141414, 0x15151515
|
input1: .long 0x12121212, 0x13131313, 0x14141414, 0x15151515
|
||||||
paddb_expect: .long 0x03030303, 0x05050505, 0x07070707, 0x09090909
|
paddb_expect: .long 0x03030303, 0x05050505, 0x07070707, 0x09090909
|
||||||
@@ -17,13 +18,14 @@ LKMC_PROLOGUE
|
|||||||
paddd_expect: .long 0x04040403, 0x06060605, 0x08080807, 0x0A0A0A09
|
paddd_expect: .long 0x04040403, 0x06060605, 0x08080807, 0x0A0A0A09
|
||||||
paddq_expect: .long 0x04040403, 0x06060606, 0x08080807, 0x0A0A0A0A
|
paddq_expect: .long 0x04040403, 0x06060606, 0x08080807, 0x0A0A0A0A
|
||||||
.bss
|
.bss
|
||||||
|
.align 16
|
||||||
output: .skip 16
|
output: .skip 16
|
||||||
.text
|
.text
|
||||||
movups input1, %xmm1
|
movaps input1, %xmm1
|
||||||
#define TEST(size) \
|
#define TEST(size) \
|
||||||
movups input0, %xmm0; \
|
movaps input0, %xmm0; \
|
||||||
padd ## size %xmm1, %xmm0; \
|
padd ## size %xmm1, %xmm0; \
|
||||||
movups %xmm0, output; \
|
movaps %xmm0, output; \
|
||||||
LKMC_ASSERT_MEMCMP(output, padd ## size ## _expect, $0x10)
|
LKMC_ASSERT_MEMCMP(output, padd ## size ## _expect, $0x10)
|
||||||
|
|
||||||
/* 16x 8-bit */
|
/* 16x 8-bit */
|
||||||
|
|||||||
Reference in New Issue
Block a user