mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-23 02:05:57 +01:00
x86 asm: move x87 FPU instructions from x86-assembly-cheat
This commit is contained in:
52
README.adoc
52
README.adoc
@@ -11939,6 +11939,23 @@ Then it is just a huge copy paste of infinite boring details:
|
||||
* <<x86-simd>>
|
||||
* <<arm-simd>>
|
||||
|
||||
To debug these instructoins, you can see the register values in GDB with:
|
||||
|
||||
....
|
||||
info registers float
|
||||
....
|
||||
|
||||
or alternatively with register names (here the ARMv8 V0 register):
|
||||
|
||||
....
|
||||
print $v0
|
||||
....
|
||||
|
||||
as mentioned at:
|
||||
|
||||
* https://stackoverflow.com/questions/5429137/how-to-print-register-values-in-gdb/38036152#38036152
|
||||
* https://reverseengineering.stackexchange.com/questions/8992/floating-point-registers-on-arm/20623#20623
|
||||
|
||||
Bibliography: https://stackoverflow.com/questions/1389712/getting-started-with-intel-x86-sse-simd-instructions/56409539#56409539
|
||||
|
||||
=== User vs system assembly
|
||||
@@ -11995,6 +12012,7 @@ Examples under `arch/<arch>/c/` directories show to how use inline assembly from
|
||||
* x86_64
|
||||
** link:userland/arch/x86_64/inline_asm/inc.c[]
|
||||
** link:userland/arch/x86_64/inline_asm/add.c[]
|
||||
** link:userland/arch/x86_64/inline_asm/sqrt_x87.c[] Shows how to use the <<x86-x87-fpu-instructions>> from inline assembly. Bibliography: https://stackoverflow.com/questions/6514537/how-do-i-specify-immediate-floating-point-numbers-with-inline-assembly/52906126#52906126
|
||||
* arm
|
||||
** link:userland/arch/arm/inline_asm/inc.c[]
|
||||
** link:userland/arch/arm/inline_asm/inc_memory.c[]
|
||||
@@ -12395,6 +12413,7 @@ Common combo with idiv 32-bit, which takes the input from `edx:eax`: so you need
|
||||
|
||||
Has some Intel vs AT&T name overload hell:
|
||||
|
||||
* https://stackoverflow.com/questions/6555094/what-does-cltq-do-in-assembly/45386217#45386217
|
||||
* https://stackoverflow.com/questions/17170388/trying-to-understand-the-assembly-instruction-cltd-on-x86/50315201#50315201
|
||||
* https://sourceware.org/binutils/docs/as/i386_002dMnemonics.html
|
||||
|
||||
@@ -12703,6 +12722,39 @@ There is also the `cpuinfo` command line tool that parses the CPUID instruction
|
||||
|
||||
Old floating point unit that you should likely not use anymore, prefer instead the newer <<x86-simd>> instructions.
|
||||
|
||||
* FPU basic examples, start here
|
||||
** link:userland/arch/x86_64/fadd.S[] FADD. The x76 FPU works on a stack of floating point numbers.
|
||||
** link:userland/arch/x86_64/faddp.S[] FADDP. Instructions with the P suffix also Pop the stack. This is often what you want for most computations, where the intermediate results don't matter.
|
||||
** link:userland/arch/x86_64/fldl_literal.S[] FLDL literal. It does not seem possible to either https://stackoverflow.com/questions/6514537/how-do-i-specify-immediate-floating-point-numbers-with-inline-assembly
|
||||
*** load floating point immediates into x86 x87 FPU registers
|
||||
*** encode floating point literals in x86 instructions, including MOV
|
||||
* Bulk instructions
|
||||
** link:userland/arch/x86_64/fabs.S[] FABS: absolute value: `ST0 = |ST0|`
|
||||
** link:userland/arch/x86_64/fchs.S[] FCHS: change sign: `ST0 = -ST0`
|
||||
** link:userland/arch/x86_64/fild.S[] FILD: Integer Load. Convert integer to float.
|
||||
** link:userland/arch/x86_64/fld1.S[] FLD1: Push 1.0 to ST0. CISC!
|
||||
** link:userland/arch/x86_64/fldz.S[] FLDZ: Push 0.0 to ST0.
|
||||
** link:userland/arch/x86_64/fscale.S[] FSCALE: `ST0 = ST0 * 2 ^ RoundTowardZero(ST1)`
|
||||
** link:userland/arch/x86_64/fsqrt.S[] FSQRT: square root
|
||||
** link:userland/arch/x86_64/fxch.S[] FXCH: swap ST0 and another register
|
||||
|
||||
==== x86 x87 FPU vs SIMD
|
||||
|
||||
http://stackoverflow.com/questions/1844669/benefits-of-x87-over-sse
|
||||
|
||||
Modern x86 has two main ways of doing floating point operations:
|
||||
|
||||
* <<x86-x87-fpu-instructions>>
|
||||
* <<x86-simd>>
|
||||
|
||||
Advantages of FPU:
|
||||
|
||||
* present in old CPUs, while SSE2 is only required in x86-64
|
||||
* contains some instructions no present in SSE, e.g. trigonometric
|
||||
* higher precision: FPU holds 80 bit Intel extension, while SSE2 only does up to 64 bit operations despite having the 128-bit register
|
||||
|
||||
In GCC, you can choose between them with `-mfpmath=`.
|
||||
|
||||
=== x86 SIMD
|
||||
|
||||
History:
|
||||
|
||||
24
userland/arch/x86_64/fabs.S
Normal file
24
userland/arch/x86_64/fabs.S
Normal file
@@ -0,0 +1,24 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-x87-fpu-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
double_1_0: .double 1.0
|
||||
double_minus_1_0: .double -1.0
|
||||
LKMC_PROLOGUE
|
||||
/* |-1| == 1 */
|
||||
fldl double_minus_1_0
|
||||
fabs
|
||||
fldl double_1_0
|
||||
fcomip %st(1)
|
||||
LKMC_ASSERT(je)
|
||||
finit
|
||||
|
||||
/* |1| == 1 */
|
||||
fldl double_1_0
|
||||
fabs
|
||||
fldl double_1_0
|
||||
fcomip %st(1)
|
||||
LKMC_ASSERT(je)
|
||||
finit
|
||||
LKMC_EPILOGUE
|
||||
40
userland/arch/x86_64/fadd.S
Normal file
40
userland/arch/x86_64/fadd.S
Normal file
@@ -0,0 +1,40 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-x87-fpu-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
double_1_5: .double 1.5
|
||||
double_2_5: .double 2.5
|
||||
double_4_0: .double 4.0
|
||||
LKMC_PROLOGUE
|
||||
/* Load to the FPU stack.
|
||||
* Push value from memory to the FPU stack. */
|
||||
fldl double_1_5
|
||||
/* FPU stack after operation:
|
||||
* ST0 == 1.5 */
|
||||
|
||||
fldl double_2_5
|
||||
/* FPU stack after operation:
|
||||
* ST0 == 2.5
|
||||
* ST1 == 1.5 */
|
||||
|
||||
/* ST0 = ST0 + ST1 */
|
||||
fadd %st, %st(1)
|
||||
/* FPU stack after operation:
|
||||
* ST0 == 4.0
|
||||
* ST1 == 1.5 */
|
||||
|
||||
fldl double_4_0
|
||||
/* FPU stack after operation:
|
||||
* ST0 == 4.0
|
||||
* ST1 == 1.5
|
||||
* ST2 == 4.0 */
|
||||
|
||||
/* Compare ST0 == ST2 */
|
||||
fcomi %st(2)
|
||||
/* FPU stack after operation:
|
||||
* ST0 == 4.0
|
||||
* ST1 == 1.5
|
||||
* ST2 == 4.0 */
|
||||
LKMC_ASSERT(je)
|
||||
LKMC_EPILOGUE
|
||||
36
userland/arch/x86_64/faddp.S
Normal file
36
userland/arch/x86_64/faddp.S
Normal file
@@ -0,0 +1,36 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-x87-fpu-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
double_1_5: .double 1.5
|
||||
double_2_5: .double 2.5
|
||||
double_4_0: .double 4.0
|
||||
LKMC_PROLOGUE
|
||||
fldl double_1_5
|
||||
/* FPU stack after operation:
|
||||
* ST0 == 1.5 */
|
||||
|
||||
fldl double_2_5
|
||||
/* FPU stack after operation:
|
||||
* ST0 == 2.5
|
||||
* ST1 == 1.5 */
|
||||
|
||||
/* ST0 = ST0 + ST1
|
||||
* Pop ST0. */
|
||||
faddp %st, %st(1)
|
||||
/* FPU stack after operation:
|
||||
* ST0 == 4.0 */
|
||||
|
||||
fldl double_4_0
|
||||
/* FPU stack after operation:
|
||||
* ST0 == 4.0
|
||||
* ST1 == 4.0 */
|
||||
|
||||
/* Compare ST0 == ST1
|
||||
* Pop ST0. */
|
||||
fcomip %st(1)
|
||||
/* FPU stack after operation:
|
||||
* ST0 == 4.0 */
|
||||
LKMC_ASSERT(je)
|
||||
LKMC_EPILOGUE
|
||||
24
userland/arch/x86_64/fchs.S
Normal file
24
userland/arch/x86_64/fchs.S
Normal file
@@ -0,0 +1,24 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-x87-fpu-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
double_1: .double 1.0
|
||||
double_minus_1: .double -1.0
|
||||
LKMC_PROLOGUE
|
||||
/* -(1) == -1 */
|
||||
fldl double_1
|
||||
fchs
|
||||
fldl double_minus_1
|
||||
fcomip %st(1)
|
||||
LKMC_ASSERT(je)
|
||||
finit
|
||||
|
||||
/* -(-1) == 1 */
|
||||
fldl double_minus_1
|
||||
fchs
|
||||
fldl double_1
|
||||
fcomip %st(1)
|
||||
LKMC_ASSERT(je)
|
||||
finit
|
||||
LKMC_EPILOGUE
|
||||
16
userland/arch/x86_64/fild.S
Normal file
16
userland/arch/x86_64/fild.S
Normal file
@@ -0,0 +1,16 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-x87-fpu-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
double_10_0: .double 10.0
|
||||
.bss
|
||||
double_10_0_2: .skip 8
|
||||
LKMC_PROLOGUE
|
||||
movl $10, double_10_0_2
|
||||
fildl double_10_0_2
|
||||
fldl double_10_0
|
||||
fcomip %st(1)
|
||||
LKMC_ASSERT(je)
|
||||
finit
|
||||
LKMC_EPILOGUE
|
||||
12
userland/arch/x86_64/fld1.S
Normal file
12
userland/arch/x86_64/fld1.S
Normal file
@@ -0,0 +1,12 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-x87-fpu-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
double_1_0: .double 1.0
|
||||
LKMC_PROLOGUE
|
||||
fld1
|
||||
fldl double_1_0
|
||||
fcomip %st(1)
|
||||
LKMC_ASSERT(je)
|
||||
LKMC_EPILOGUE
|
||||
18
userland/arch/x86_64/fldl_literal.S
Normal file
18
userland/arch/x86_64/fldl_literal.S
Normal file
@@ -0,0 +1,18 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-x87-fpu-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
double_1_5: .double 1.5
|
||||
.bss
|
||||
double_1_5_2: .skip 8
|
||||
LKMC_PROLOGUE
|
||||
#if 0
|
||||
/* Error: junk `.5' after expression */
|
||||
movq $1.5, double_1_5_2
|
||||
fldl double_1_5
|
||||
fldl double_1_5_2
|
||||
fcomi %st(1)
|
||||
LKMC_ASSERT(je)
|
||||
#endif
|
||||
LKMC_EPILOGUE
|
||||
12
userland/arch/x86_64/fldz.S
Normal file
12
userland/arch/x86_64/fldz.S
Normal file
@@ -0,0 +1,12 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-x87-fpu-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
double_0_0: .double 0.0
|
||||
LKMC_PROLOGUE
|
||||
fldz
|
||||
fldl double_0_0
|
||||
fcomip %st(1)
|
||||
LKMC_ASSERT(je)
|
||||
LKMC_EPILOGUE
|
||||
34
userland/arch/x86_64/fscale.S
Normal file
34
userland/arch/x86_64/fscale.S
Normal file
@@ -0,0 +1,34 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-x87-fpu-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
double_1_0: .double 1.0
|
||||
double_2_5: .double 2.5
|
||||
double_4_0: .double 4.0
|
||||
LKMC_PROLOGUE
|
||||
fldl double_4_0
|
||||
# ST0 = 4.0
|
||||
|
||||
fldl double_2_5
|
||||
# ST0 = 2.5
|
||||
# ST1 = 4.0
|
||||
|
||||
fldl double_1_0
|
||||
# ST0 = 1.0
|
||||
# ST1 = 2.5
|
||||
# ST2 = 4.0
|
||||
|
||||
# ST0 = 1 * 2 ^ (RoundTowardZero(2.5))
|
||||
# = 1 * 2 ^ 2
|
||||
# = 4
|
||||
fscale
|
||||
# ST0 = 4.0
|
||||
# ST1 = 2.5
|
||||
# ST2 = 4.0
|
||||
|
||||
fcomip %st(2)
|
||||
# ST0 = 4.0
|
||||
# ST1 = 2.5
|
||||
LKMC_ASSERT(je)
|
||||
LKMC_EPILOGUE
|
||||
26
userland/arch/x86_64/fsqrt.S
Normal file
26
userland/arch/x86_64/fsqrt.S
Normal file
@@ -0,0 +1,26 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-x87-fpu-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
double_1_41: .double 1.41
|
||||
double_1_42: .double 1.42
|
||||
double_2_0: .double 2.0
|
||||
double_4_0: .double 4.0
|
||||
LKMC_PROLOGUE
|
||||
/* sqrt(4) == 4 */
|
||||
fldl double_4_0
|
||||
fsqrt
|
||||
fldl double_2_0
|
||||
fcomip %st(1)
|
||||
LKMC_ASSERT(je)
|
||||
|
||||
/* 1.41 < sqrt(2) < 1.42 */
|
||||
fsqrt
|
||||
fldl double_1_41
|
||||
fcomip %st(1)
|
||||
LKMC_ASSERT(jbe)
|
||||
fldl double_1_42
|
||||
fcomip %st(1)
|
||||
LKMC_ASSERT(jae)
|
||||
LKMC_EPILOGUE
|
||||
45
userland/arch/x86_64/fxch.S
Normal file
45
userland/arch/x86_64/fxch.S
Normal file
@@ -0,0 +1,45 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-x87-fpu-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
double_0_0: .double 0.0
|
||||
double_1_0: .double 1.0
|
||||
LKMC_PROLOGUE
|
||||
fldz
|
||||
# ST0 = 0.0
|
||||
|
||||
fld1
|
||||
# ST0 = 1.0
|
||||
# ST1 = 0.0
|
||||
|
||||
# Swap ST0 and ST1.
|
||||
fxch %st(1)
|
||||
# ST0 = 0.0
|
||||
# ST1 = 1.0
|
||||
|
||||
fldz
|
||||
# ST0 = 0.0
|
||||
# ST1 = 0.0
|
||||
# ST2 = 1.0
|
||||
|
||||
fcomip %st(1)
|
||||
# ST0 = 0.0
|
||||
# ST1 = 1.0
|
||||
LKMC_ASSERT(je)
|
||||
|
||||
# Swap ST0 and ST1.
|
||||
fxch %st(1)
|
||||
# ST0 = 1.0
|
||||
# ST1 = 0.0
|
||||
|
||||
fld1
|
||||
# ST0 = 1.0
|
||||
# ST1 = 1.0
|
||||
# ST2 = 0.0
|
||||
|
||||
fcomip %st(1)
|
||||
# ST0 = 1.0
|
||||
# ST1 = 0.0
|
||||
LKMC_ASSERT(je)
|
||||
LKMC_EPILOGUE
|
||||
Reference in New Issue
Block a user