mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-23 02:05:57 +01:00
x86 asm: move binary arithmetic instructions from x86-assembly-cheat except cmp
This commit is contained in:
23
README.adoc
23
README.adoc
@@ -10208,13 +10208,13 @@ There are not yet enabled, but it should be easy to so, see: <<add-new-buildroot
|
|||||||
|
|
||||||
https://stackoverflow.com/questions/6147242/heap-vs-binary-search-tree-bst/29548834#29548834
|
https://stackoverflow.com/questions/6147242/heap-vs-binary-search-tree-bst/29548834#29548834
|
||||||
|
|
||||||
Usage:
|
First we build it with <<m5ops-instructions>> enabled, and then we extract the stats:
|
||||||
|
|
||||||
....
|
....
|
||||||
./build-userland \
|
./build-userland \
|
||||||
--arch aarch64 \
|
--arch aarch64 \
|
||||||
--ccflagg='-DLKMC_M5OPS_ENABLE=1' \
|
--ccflags='-DLKMC_M5OPS_ENABLE=1' \
|
||||||
--force-build cpp/bst_vs_heap \
|
--force-rebuild cpp/bst_vs_heap \
|
||||||
--static \
|
--static \
|
||||||
;
|
;
|
||||||
./run \
|
./run \
|
||||||
@@ -10910,7 +10910,7 @@ To use that file, first rebuild `m5ops.out` with the m5ops instructions enabled
|
|||||||
./build-userland \
|
./build-userland \
|
||||||
--arch aarch64 \
|
--arch aarch64 \
|
||||||
--ccflags='-DLKMC_M5OPS_ENABLE=1' \
|
--ccflags='-DLKMC_M5OPS_ENABLE=1' \
|
||||||
--force-build c/m5ops \
|
--force-rebuild c/m5ops \
|
||||||
--static \
|
--static \
|
||||||
;
|
;
|
||||||
./build-buildroot --arch aarch64
|
./build-buildroot --arch aarch64
|
||||||
@@ -10941,7 +10941,7 @@ In theory, the cleanest way to add m5ops to your benchmarks would be to do exact
|
|||||||
|
|
||||||
However, I think it is usually not worth the trouble of hacking up the build system of the benchmark to do this, and I recommend just hardcoding in a few raw instructions here and there, and managing it with version control + `sed`.
|
However, I think it is usually not worth the trouble of hacking up the build system of the benchmark to do this, and I recommend just hardcoding in a few raw instructions here and there, and managing it with version control + `sed`.
|
||||||
|
|
||||||
Bibliography:x
|
Bibliography:
|
||||||
|
|
||||||
* https://stackoverflow.com/questions/56506154/how-to-analyze-only-interest-area-in-source-code-by-using-gem5/56506419#56506419
|
* https://stackoverflow.com/questions/56506154/how-to-analyze-only-interest-area-in-source-code-by-using-gem5/56506419#56506419
|
||||||
* https://www.mail-archive.com/gem5-users@gem5.org/msg15418.html
|
* https://www.mail-archive.com/gem5-users@gem5.org/msg15418.html
|
||||||
@@ -12352,9 +12352,18 @@ Bibliography:
|
|||||||
<<intel-manual-1>> 5.1.2 "Binary Arithmetic Instructions":
|
<<intel-manual-1>> 5.1.2 "Binary Arithmetic Instructions":
|
||||||
|
|
||||||
* link:userland/arch/x86_64/add.S[ADD]
|
* link:userland/arch/x86_64/add.S[ADD]
|
||||||
* link:userland/arch/x86_64/dec.S[DEC]
|
** link:userland/arch/x86_64/inc.S[INC]
|
||||||
* link:userland/arch/x86_64/inc.S[INC]
|
** link:userland/arch/x86_64/adc.S[ADC]
|
||||||
* link:userland/arch/x86_64/sub.S[SUB]
|
* link:userland/arch/x86_64/sub.S[SUB]
|
||||||
|
** link:userland/arch/x86_64/dec.S[DEC]
|
||||||
|
** link:userland/arch/x86_64/sbb.S[SBB]
|
||||||
|
* link:userland/arch/x86_64/mul.S[MUL]
|
||||||
|
** link:userland/arch/x86_64/neg.S[NEG]
|
||||||
|
** link:userland/arch/x86_64/imul.S[IMUL]
|
||||||
|
* link:userland/arch/x86_64/div.S[DIV]
|
||||||
|
** link:userland/arch/x86_64/div_overflow.S[DIV overflow]
|
||||||
|
** link:userland/arch/x86_64/div_zero.S[DIV zero]
|
||||||
|
** link:userland/arch/x86_64/idiv.S[IDIV]
|
||||||
|
|
||||||
=== x86 SIMD
|
=== x86 SIMD
|
||||||
|
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ https://github.com/cirosantilli/linux-kernel-module-cheat-regression#gem5-unit-t
|
|||||||
'git', LF,
|
'git', LF,
|
||||||
'-C', self.env['gem5_default_source_dir'], LF,
|
'-C', self.env['gem5_default_source_dir'], LF,
|
||||||
'worktree', 'add', LF,
|
'worktree', 'add', LF,
|
||||||
'-b', os.path.join('wt', self.env['gem5_build_id']), LF,
|
'-b', os.path.join('wt', self.env['gem5_worktree']), LF,
|
||||||
self.env['gem5_source_dir'], LF,
|
self.env['gem5_source_dir'], LF,
|
||||||
])
|
])
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -406,6 +406,8 @@ path_properties_tuples = (
|
|||||||
'freestanding': freestanding_properties,
|
'freestanding': freestanding_properties,
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
|
'div_overflow.S': {'signal_received': signal.Signals.SIGFPE},
|
||||||
|
'div_zero.S': {'signal_received': signal.Signals.SIGFPE},
|
||||||
'freestanding': freestanding_properties,
|
'freestanding': freestanding_properties,
|
||||||
'lkmc_assert_eq_fail.S': {'signal_received': signal.Signals.SIGABRT},
|
'lkmc_assert_eq_fail.S': {'signal_received': signal.Signals.SIGABRT},
|
||||||
'lkmc_assert_memcmp_fail.S': {'signal_received': signal.Signals.SIGABRT},
|
'lkmc_assert_memcmp_fail.S': {'signal_received': signal.Signals.SIGABRT},
|
||||||
|
|||||||
25
userland/arch/x86_64/adc.S
Normal file
25
userland/arch/x86_64/adc.S
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
|
||||||
|
*
|
||||||
|
* Add with Carry. Like add, but if the carry flag is set, add 1 to the addition.
|
||||||
|
*
|
||||||
|
* This allows implementing arbitrary precision arithmetic.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <lkmc.h>
|
||||||
|
|
||||||
|
LKMC_PROLOGUE
|
||||||
|
/* rax : rbx += rcx : rdx
|
||||||
|
* 1 : 0x8000000000000001 += 0x10 : 0x8000000000000010
|
||||||
|
* 0x12 : 0x11
|
||||||
|
*/
|
||||||
|
mov $0x1, %rax
|
||||||
|
mov $0x8000000000000001, %rbx
|
||||||
|
mov $0x10, %rcx
|
||||||
|
mov $0x8000000000000010, %rdx
|
||||||
|
add %rdx, %rbx
|
||||||
|
adc %rcx, %rax
|
||||||
|
mov %rax, %r12
|
||||||
|
mov %rbx, %r13
|
||||||
|
LKMC_ASSERT_EQ(%r12, $0x12)
|
||||||
|
LKMC_ASSERT_EQ(%r13, $0x11)
|
||||||
|
LKMC_EPILOGUE
|
||||||
40
userland/arch/x86_64/div.S
Normal file
40
userland/arch/x86_64/div.S
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
|
||||||
|
*
|
||||||
|
* Unsigned integer division, interface similar to MUL:
|
||||||
|
*
|
||||||
|
* ....
|
||||||
|
* rax = rdx:rax / SRC
|
||||||
|
* rdx = rdx:rax % SRC
|
||||||
|
* ....
|
||||||
|
*
|
||||||
|
* DIV can be used to calculate modulus, but GCC does not use it becaues it is slow,
|
||||||
|
* and choses alternative techniques instead
|
||||||
|
* http://stackoverflow.com/questions/4361979/how-does-the-gcc-implementation-of-module-work-and-why-does-it-not-use-the
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <lkmc.h>
|
||||||
|
|
||||||
|
LKMC_PROLOGUE
|
||||||
|
/* 64-bit hello world:
|
||||||
|
*
|
||||||
|
* 5 / 2 = 2 with leftover of 1.
|
||||||
|
*/
|
||||||
|
mov $0, %rdx
|
||||||
|
mov $5, %rax
|
||||||
|
mov $2, %rbx
|
||||||
|
div %rbx
|
||||||
|
mov %rax, %r12
|
||||||
|
mov %rdx, %r13
|
||||||
|
LKMC_ASSERT_EQ(%r12, $2)
|
||||||
|
LKMC_ASSERT_EQ(%r13, $1)
|
||||||
|
|
||||||
|
/* Now with a simple carry. */
|
||||||
|
mov $1, %rdx
|
||||||
|
mov $2, %rax
|
||||||
|
mov $2, %rbx
|
||||||
|
div %rbx
|
||||||
|
mov %rax, %r12
|
||||||
|
mov %rdx, %r13
|
||||||
|
LKMC_ASSERT_EQ(%r12, $0x8000000000000001)
|
||||||
|
LKMC_ASSERT_EQ(%r13, $0)
|
||||||
|
LKMC_EPILOGUE
|
||||||
14
userland/arch/x86_64/div_overflow.S
Normal file
14
userland/arch/x86_64/div_overflow.S
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
|
||||||
|
*
|
||||||
|
* If the result of div does not fit into the output register rax, then we get SIGFPE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <lkmc.h>
|
||||||
|
|
||||||
|
LKMC_PROLOGUE
|
||||||
|
/* rdx:rax / 2 == 2:0 / 2 == 1:0 */
|
||||||
|
mov $2, %rdx
|
||||||
|
mov $0, %rax
|
||||||
|
mov $2, %rbx
|
||||||
|
div %rbx
|
||||||
|
LKMC_EPILOGUE
|
||||||
17
userland/arch/x86_64/div_zero.S
Normal file
17
userland/arch/x86_64/div_zero.S
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
|
||||||
|
*
|
||||||
|
* SIGFPE :-)
|
||||||
|
*
|
||||||
|
* Signal handlind discussed at:
|
||||||
|
* https://stackoverflow.com/questions/39431879/c-handle-signal-sigfpe-and-continue-execution/39431923#39431923
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <lkmc.h>
|
||||||
|
|
||||||
|
LKMC_PROLOGUE
|
||||||
|
/* rdx:rax / 0 */
|
||||||
|
mov $0, %rdx
|
||||||
|
mov $1, %rax
|
||||||
|
mov $0, %rbx
|
||||||
|
div %rbx
|
||||||
|
LKMC_EPILOGUE
|
||||||
34
userland/arch/x86_64/idiv.S
Normal file
34
userland/arch/x86_64/idiv.S
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
|
||||||
|
*
|
||||||
|
* Signed integer division.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <lkmc.h>
|
||||||
|
|
||||||
|
LKMC_PROLOGUE
|
||||||
|
/* Without operands, it works like DIV.
|
||||||
|
* -5 = (2 * -2) + (-1)
|
||||||
|
*/
|
||||||
|
mov $-5, %rax
|
||||||
|
/* Sign extend rax into rdx:rax
|
||||||
|
* https://stackoverflow.com/questions/17170388/trying-to-understand-the-assembly-instruction-cltd-on-x86/50315201#50315201
|
||||||
|
*/
|
||||||
|
cqo
|
||||||
|
mov $2, %rbx
|
||||||
|
idiv %rbx
|
||||||
|
mov %rax, %r12
|
||||||
|
mov %rdx, %r13
|
||||||
|
LKMC_ASSERT_EQ(%r12, $-2)
|
||||||
|
LKMC_ASSERT_EQ(%r13, $-1)
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
/* Unlike IMUL vs MUL, IDIV does not have a multi operand interface.
|
||||||
|
* Likely because it need 2 output registers unlike IMUL.
|
||||||
|
*
|
||||||
|
* ....
|
||||||
|
* Error: number of operands mismatch for `idiv'
|
||||||
|
* ....
|
||||||
|
*/
|
||||||
|
idiv %rax, $2, %rbx
|
||||||
|
#endif
|
||||||
|
LKMC_EPILOGUE
|
||||||
42
userland/arch/x86_64/imul.S
Normal file
42
userland/arch/x86_64/imul.S
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
|
||||||
|
*
|
||||||
|
* Signed multiply.
|
||||||
|
*
|
||||||
|
* Has many more forms than MUL including immediate and up to three arguments.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <lkmc.h>
|
||||||
|
|
||||||
|
LKMC_PROLOGUE
|
||||||
|
|
||||||
|
/* The single register forms are just like MUL, and
|
||||||
|
* extend over rdx:rax.
|
||||||
|
*
|
||||||
|
* rdx : rax = rax * rbx
|
||||||
|
* = -1 * 2
|
||||||
|
* = -2
|
||||||
|
* = 0xFFFFFFFFFFFFFFFF : 0xFFFFFFFFFFFFFFFE
|
||||||
|
*/
|
||||||
|
mov $-1, %rax
|
||||||
|
mov $2, %rbx
|
||||||
|
mov $42, %rdx
|
||||||
|
imul %rbx
|
||||||
|
mov %rax, %r12
|
||||||
|
mov %rdx, %r13
|
||||||
|
LKMC_ASSERT_EQ(%r12, $0xFFFFFFFFFFFFFFFE)
|
||||||
|
LKMC_ASSERT_EQ(%r13, $0xFFFFFFFFFFFFFFFF)
|
||||||
|
|
||||||
|
/* The multi-argument formas don't extend over rdx, but
|
||||||
|
* are more convenient in many cases.
|
||||||
|
*
|
||||||
|
* rax = rbx * 3
|
||||||
|
*/
|
||||||
|
mov $42, %rax
|
||||||
|
mov $-2, %rbx
|
||||||
|
mov $42, %rdx
|
||||||
|
imul $3, %rbx, %rax
|
||||||
|
mov %rax, %r12
|
||||||
|
mov %rdx, %r13
|
||||||
|
LKMC_ASSERT_EQ(%r12, $-6)
|
||||||
|
LKMC_ASSERT_EQ(%r13, $42)
|
||||||
|
LKMC_EPILOGUE
|
||||||
98
userland/arch/x86_64/mul.S
Normal file
98
userland/arch/x86_64/mul.S
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
|
||||||
|
*
|
||||||
|
* Unsigned multiply.
|
||||||
|
*
|
||||||
|
* The result is spread across edx:eax.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <lkmc.h>
|
||||||
|
|
||||||
|
LKMC_PROLOGUE
|
||||||
|
|
||||||
|
/* 64-bit hello world:
|
||||||
|
*
|
||||||
|
* rdx : rax = rax * rbx
|
||||||
|
* 0x0 : 4 = 2 * 2
|
||||||
|
*/
|
||||||
|
mov $2, %rax
|
||||||
|
mov $2, %rbx
|
||||||
|
mul %rbx
|
||||||
|
/* Move to callee saved registers to persist after our asserts. */
|
||||||
|
mov %rax, %r12
|
||||||
|
mov %rdx, %r13
|
||||||
|
mov %rbx, %r14
|
||||||
|
LKMC_ASSERT_EQ(%r12, $4)
|
||||||
|
LKMC_ASSERT_EQ(%r13, $0)
|
||||||
|
/* rbx is untouched. */
|
||||||
|
LKMC_ASSERT_EQ(%r14, $2)
|
||||||
|
|
||||||
|
/* 64-bit with a carry:
|
||||||
|
*
|
||||||
|
* rdx : rax = rax * rbx
|
||||||
|
* 0x1 : 0x0000000000000002 = 0x8000000000000001 * 2
|
||||||
|
*/
|
||||||
|
mov $0x8000000000000001, %rax
|
||||||
|
mov $2, %rbx
|
||||||
|
mul %rbx
|
||||||
|
mov %rax, %r12
|
||||||
|
mov %rdx, %r13
|
||||||
|
LKMC_ASSERT_EQ(%r12, $2)
|
||||||
|
LKMC_ASSERT_EQ(%r13, $1)
|
||||||
|
|
||||||
|
/* 8-bit is special: does not use dx for output:
|
||||||
|
*
|
||||||
|
* ah : al = al * bl
|
||||||
|
* 0x10 : 0 = 2 * 0x80
|
||||||
|
*/
|
||||||
|
mov $0, %eax
|
||||||
|
mov $2, %al
|
||||||
|
mov $0x80, %bl
|
||||||
|
mov $0, %dl
|
||||||
|
mul %bl
|
||||||
|
LKMC_ASSERT_EQ_32(%eax, $0x100)
|
||||||
|
|
||||||
|
/* 16-bit
|
||||||
|
*
|
||||||
|
* dx : ax = ax * bx
|
||||||
|
* 0x1 : 0x0000 = 2 * 0x8000
|
||||||
|
*/
|
||||||
|
mov $0, %eax
|
||||||
|
mov $0, %edx
|
||||||
|
mov $2, %ax
|
||||||
|
mov $0x8000, %bx
|
||||||
|
mov $0, %dx
|
||||||
|
mul %bx
|
||||||
|
mov %eax, %r12d
|
||||||
|
mov %edx, %r13d
|
||||||
|
LKMC_ASSERT_EQ_32(%r12d, $0)
|
||||||
|
LKMC_ASSERT_EQ_32(%r13d, $1)
|
||||||
|
|
||||||
|
/* 32-bit */
|
||||||
|
mov $2, %eax
|
||||||
|
mov $0x80000000, %ebx
|
||||||
|
mov $0, %edx
|
||||||
|
mul %ebx
|
||||||
|
mov %eax, %r12d
|
||||||
|
mov %edx, %r13d
|
||||||
|
LKMC_ASSERT_EQ_32(%r12d, $0)
|
||||||
|
LKMC_ASSERT_EQ_32(%r13d, $1)
|
||||||
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
/* No immediate form, although imul has one:
|
||||||
|
* http://stackoverflow.com/questions/20499141/is-it-possible-to-multiply-by-and-immediate-with-mul-in-x86-assembly/33202309#33202309
|
||||||
|
*
|
||||||
|
* Error: operand type mismatch for `mul'
|
||||||
|
*/
|
||||||
|
mul $2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Memory version */
|
||||||
|
.data
|
||||||
|
mylong: .long 0x11111111
|
||||||
|
.text
|
||||||
|
movl $2, %eax
|
||||||
|
mull mylong
|
||||||
|
LKMC_ASSERT_EQ_32(%eax, $0x22222222)
|
||||||
|
|
||||||
|
LKMC_EPILOGUE
|
||||||
14
userland/arch/x86_64/neg.S
Normal file
14
userland/arch/x86_64/neg.S
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
|
||||||
|
*
|
||||||
|
* Negate: i *= -1.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <lkmc.h>
|
||||||
|
|
||||||
|
LKMC_PROLOGUE
|
||||||
|
mov $2, %rax
|
||||||
|
neg %rax
|
||||||
|
LKMC_ASSERT_EQ(%rax, $-2)
|
||||||
|
neg %eax
|
||||||
|
LKMC_ASSERT_EQ(%rax, $2)
|
||||||
|
LKMC_EPILOGUE
|
||||||
23
userland/arch/x86_64/sbb.S
Normal file
23
userland/arch/x86_64/sbb.S
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
|
||||||
|
*
|
||||||
|
* Subtract with Borrow. Like ADC is for ADD, but for subtraction.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <lkmc.h>
|
||||||
|
|
||||||
|
LKMC_PROLOGUE
|
||||||
|
/* rax : rbx -= rcx : rdx
|
||||||
|
* 1 : 0 -= 0 : 0x8000000000000000
|
||||||
|
* 0 : 0x8000000000000000
|
||||||
|
*/
|
||||||
|
mov $0x1, %rax
|
||||||
|
mov $0x0, %rbx
|
||||||
|
mov $0x0, %rcx
|
||||||
|
mov $0x8000000000000000, %rdx
|
||||||
|
sub %rdx, %rbx
|
||||||
|
sbb %rcx, %rax
|
||||||
|
mov %rax, %r12
|
||||||
|
mov %rbx, %r13
|
||||||
|
LKMC_ASSERT_EQ(%r12, $0x0)
|
||||||
|
LKMC_ASSERT_EQ(%r13, $0x8000000000000000)
|
||||||
|
LKMC_EPILOGUE
|
||||||
Reference in New Issue
Block a user