From 16ecab4af03c534347976434d010bd62458809ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?= =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= Date: Fri, 10 May 2019 00:00:00 +0000 Subject: [PATCH] x86: paddq --- README.adoc | 25 ++++++++++++++----------- userland/arch/aarch64/simd.S | 2 +- userland/arch/main.c | 2 +- userland/arch/x86_64/paddq.S | 21 +++++++++++++++++++++ 4 files changed, 37 insertions(+), 13 deletions(-) create mode 100644 userland/arch/x86_64/paddq.S diff --git a/README.adoc b/README.adoc index b43ccd7..5cfd548 100644 --- a/README.adoc +++ b/README.adoc @@ -11493,7 +11493,7 @@ One "downside" of glibc is that it exercises much more kernel functionality on i == C -Programs under link:userland/c/[] are examples of link:https://en.wikipedia.org/wiki/ANSI_C[ANSI C] programming. +Programs under link:userland/c/[] are examples of link:https://en.wikipedia.org/wiki/ANSI_C[ANSI C] programming: * Standard library ** assert.h @@ -11541,19 +11541,19 @@ What is POSIX: == Userland assembly -Programs under `userland/arch//` are examples of userland assembly programming: +Programs under `userland/arch//` are examples of userland assembly programming. -* link:userland/arch/x86_64/[] moved from: https://github.com/cirosantilli/x86-assembly-cheat -* link:userland/arch/arm/[] moved from: https://github.com/cirosantilli/arm-assembly-cheat -* link:userland/arch/aarch64/[] moved from: https://github.com/cirosantilli/arm-assembly-cheat +This section will document ISA agnostic concepts. + +ISA specifics are covered at: + +* <> under link:userland/arch/x86_64/[], originally migrated from: https://github.com/cirosantilli/x86-assembly-cheat +* <> under originally migrated from https://github.com/cirosantilli/arm-assembly-cheat under: +** link:userland/arch/arm/[] +** link:userland/arch/aarch64/[] Like other userland programs, these programs can be run as explained at: <>. -This section will document ISA generic ideas. ISA specifics are documented on the following sections: - -* <> -* <> - The first example that you want to run for each arch is: .... @@ -11836,7 +11836,10 @@ Once those are done, everything else left on userland is just to learn a huge li === x86 userland assembly instructions -TODO +==== x86 SIMD + +* SVE2 +** link:userland/arch/x86_64/paddq.S[] === rdtsc diff --git a/userland/arch/aarch64/simd.S b/userland/arch/aarch64/simd.S index 516cff1..4ff9b39 100644 --- a/userland/arch/aarch64/simd.S +++ b/userland/arch/aarch64/simd.S @@ -13,7 +13,7 @@ ENTRY .data u32_0: .word 0xF111F111, 0xF222F222, 0xF333F333, 0xF444F444 u32_1: .word 0x15551555, 0x16661666, 0x17771777, 0x18881888 - u32_sum_expect: .word 0x06670666, 0x08890888, 0x0AAB0AAA, 0x0CCD0CCC + u32_sum_expect: .word 0x06670666, 0x08890889, 0x0AAB0AAA, 0x0CCD0CCC .bss u32_sum: .skip 16 .text diff --git a/userland/arch/main.c b/userland/arch/main.c index 7e51702..eddcbcf 100644 --- a/userland/arch/main.c +++ b/userland/arch/main.c @@ -39,7 +39,7 @@ int assert_memcmp(const void *s1, const void *s2, size_t n) { printf( "%s failed: " "byte1, byte2, index: " - "0x%02" PRIX8 " 0x%02" PRIX8 " 0x%zx\n", + "0x%02" PRIX8 " 0x%02" PRIX8 " 0x%zX\n", __func__, b1, b2, diff --git a/userland/arch/x86_64/paddq.S b/userland/arch/x86_64/paddq.S new file mode 100644 index 0000000..49ef7d1 --- /dev/null +++ b/userland/arch/x86_64/paddq.S @@ -0,0 +1,21 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-simd + * + * Add 4 32-bit integeres in one go. + */ + +#include "common.h" + +ENTRY +.data + u32_0: .long 0xF111F111, 0xF222F222, 0xF333F333, 0xF444F444 + u32_1: .long 0x15551555, 0x16661666, 0x17771777, 0x18881888 + u32_expect: .long 0x06670666, 0x08890889, 0x0AAB0AAA, 0x0CCD0CCD +.bss + u32_result: .skip 16 +.text + movups u32_0, %xmm0 + movups u32_1, %xmm1 + paddq %xmm1, %xmm0 + movups %xmm0, u32_result + ASSERT_MEMCMP(u32_result, u32_expect, $0x10) +EXIT