x86 asm: move binary arithmetic instructions from x86-assembly-cheat

This commit is contained in:
Ciro Santilli 六四事件 法轮功
2019-06-11 00:00:00 +00:00
parent 90925e7e06
commit 20990604fb
9 changed files with 324 additions and 6 deletions

View File

@@ -10208,13 +10208,13 @@ There are not yet enabled, but it should be easy to so, see: <<add-new-buildroot
https://stackoverflow.com/questions/6147242/heap-vs-binary-search-tree-bst/29548834#29548834
Usage:
First we build it with <<m5ops-instructions>> enabled, and then we extract the stats:
....
./build-userland \
--arch aarch64 \
--ccflagg='-DLKMC_M5OPS_ENABLE=1' \
--force-build cpp/bst_vs_heap \
--ccflags='-DLKMC_M5OPS_ENABLE=1' \
--force-rebuild cpp/bst_vs_heap \
--static \
;
./run \
@@ -10910,7 +10910,7 @@ To use that file, first rebuild `m5ops.out` with the m5ops instructions enabled
./build-userland \
--arch aarch64 \
--ccflags='-DLKMC_M5OPS_ENABLE=1' \
--force-build c/m5ops \
--force-rebuild c/m5ops \
--static \
;
./build-buildroot --arch aarch64
@@ -10941,7 +10941,7 @@ In theory, the cleanest way to add m5ops to your benchmarks would be to do exact
However, I think it is usually not worth the trouble of hacking up the build system of the benchmark to do this, and I recommend just hardcoding in a few raw instructions here and there, and managing it with version control + `sed`.
Bibliography:x
Bibliography:
* https://stackoverflow.com/questions/56506154/how-to-analyze-only-interest-area-in-source-code-by-using-gem5/56506419#56506419
* https://www.mail-archive.com/gem5-users@gem5.org/msg15418.html
@@ -12353,7 +12353,10 @@ Bibliography:
* link:userland/arch/x86_64/add.S[ADD]
* link:userland/arch/x86_64/dec.S[DEC]
* link:userland/arch/x86_64/imul.S[IMUL]
* link:userland/arch/x86_64/inc.S[INC]
* link:userland/arch/x86_64/mul.S[MUL]
* link:userland/arch/x86_64/neg.S[NEG]
* link:userland/arch/x86_64/sub.S[SUB]
=== x86 SIMD

View File

@@ -60,7 +60,7 @@ https://github.com/cirosantilli/linux-kernel-module-cheat-regression#gem5-unit-t
'git', LF,
'-C', self.env['gem5_default_source_dir'], LF,
'worktree', 'add', LF,
'-b', os.path.join('wt', self.env['gem5_build_id']), LF,
'-b', os.path.join('wt', self.env['gem5_worktree']), LF,
self.env['gem5_source_dir'], LF,
])
else:

View File

@@ -0,0 +1,16 @@
# Add with carry.
#
# edx:eax += ebx:ecx
#include <lkmc.h>
LKMC_PROLOGUE
movl $0x80000000, %eax
movl $0x80000000, %ecx
movl $0, %ebx
movl $0, %edx
addl %ecx, %eax
adcl %ebx, %edx
LKMC_ASSERT_EQ_32(%eax, $0)
LKMC_ASSERT_EQ_32(%edx, $1)
LKMC_EPILOGUE

View File

@@ -0,0 +1,92 @@
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
*
* Unsigned integer division, interface similar to MUL:
*
* ....
* rax = rdx:rax / SRC
* rdx = rdx:rax % SRC
* ....
*
* DIV can be used to calculate modulus, but GCC does not use it becaues it is slow,
* and choses alternative techniques instead
* http://stackoverflow.com/questions/4361979/how-does-the-gcc-implementation-of-module-work-and-why-does-it-not-use-the
*/
#include <lkmc.h>
LKMC_PROLOGUE
/* 64-bit hello world:
*
* 5 / 2 = 2 with leftover of 1.
*/
mov $0, %rdx
mov $5, %rax
mov $2, %rbx
div %rbx
mov %rax, %r12
mov %rdx, %r13
LKMC_ASSERT_EQ(%r12, $2)
LKMC_ASSERT_EQ(%r13, $1)
/* Now with a simple carry. */
mov $1, %rdx
mov $2, %rax
mov $2, %rbx
div %rbx
mov %rax, %r12
mov %rdx, %r13
LKMC_ASSERT_EQ(%r12, $0x8000000000000001)
LKMC_ASSERT_EQ(%r13, $0)
/* TODO SIGFPE example does not fit into rax. */
mov $2, %rdx
mov $0, %rax
mov $2, %rbx
div %rbx
#if 0
/* 32 bit */
mov $1, %eax
mov $1, %edx
mov $2, %ecx
div %ecx
LKMC_ASSERT_EQ_32(%eax, $0x80000000)
LKMC_ASSERT_EQ_32(%edx, $1)
# # Division by zero
# # Division overflow
# If either
# - divisor == 0
# - result > output register size
# A divide error exception occurs.
# It then gets handled by the interrupt service 0.
# Both 0 division and overflow are treated exactly the same!
# Linux treats this by sending a signal to the process and killing it.
# Minimal 16-bit example of handling the interrupt:
# https://github.com/cirosantilli/x86-bare-metal-examples/blob/9e58c1dc656dab54aa69daa38f84eb8c0aa6151e/idt_zero_divide.S
# Output does not fit into edx.
#mov eax, 0
#mov edx, 1
#mov ecx, 1
#div ecx
# Division by zero.
#mov eax, 1
#mov edx, 0
#mov ecx, 0
#div ecx
# There is no immediate version:
# http://stackoverflow.com/questions/4529260/mul-instruction-doesnt-support-an-immediate-value
#endif
LKMC_EPILOGUE

View File

@@ -0,0 +1,34 @@
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
*
* Signed integer division.
*
* Much like MUL vs IMUL.
*/
#include <lkmc.h>
LKMC_PROLOGUE
movl $-5, %eax
# Don't forget this!
cltd
movl $-2, %ecx
idivl %ecx
LKMC_ASSERT_EQ_32(%eax, $2)
LKMC_ASSERT_EQ_32 edx, -1
movl $1, %eax
movl $1, %edx
movl $4, %ecx
idivl %ecx
LKMC_ASSERT_EQ_32(%eax, $0x40000000)
LKMC_ASSERT_EQ_32(%edx, $1)
# RUNTIME ERROR: result must fit into signed dword:
#mov eax, 1
#mov edx, 1
#mov ecx, 2
#idiv ecx
# TODO division by zero
LKMC_EPILOGUE

View File

@@ -0,0 +1,42 @@
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
*
* Signed multiply.
*
* Has many more forms than MUL including immediate and up to three arguments.
*/
#include <lkmc.h>
LKMC_PROLOGUE
/* The single register forms are just like MUL, and
* extend over rdx:rax.
*
* rdx : rax = rax * rbx
* = -1 * 2
* = -2
* = 0xFFFFFFFFFFFFFFFF : 0xFFFFFFFFFFFFFFFE
*/
mov $-1, %rax
mov $2, %rbx
mov $42, %rdx
imul %rbx
mov %rax, %r12
mov %rdx, %r13
LKMC_ASSERT_EQ(%r12, $0xFFFFFFFFFFFFFFFE)
LKMC_ASSERT_EQ(%r13, $0xFFFFFFFFFFFFFFFF)
/* The multi-argument formas don't extend over rdx, but
* are more convenient in many cases.
*
* rax = rbx * 3
*/
mov $42, %rax
mov $-2, %rbx
mov $42, %rdx
imul $3, %rbx, %rax
mov %rax, %r12
mov %rdx, %r13
LKMC_ASSERT_EQ(%r12, $-6)
LKMC_ASSERT_EQ(%r13, $42)
LKMC_EPILOGUE

View File

@@ -0,0 +1,98 @@
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
*
* Unsigned multiply.
*
* The result is spread across edx:eax.
*/
#include <lkmc.h>
LKMC_PROLOGUE
/* 64-bit hello world:
*
* rdx : rax = rax * rbx
* 0x0 : 4 = 2 * 2
*/
mov $2, %rax
mov $2, %rbx
mul %rbx
/* Move to callee saved registers to persist after our asserts. */
mov %rax, %r12
mov %rdx, %r13
mov %rbx, %r14
LKMC_ASSERT_EQ(%r12, $4)
LKMC_ASSERT_EQ(%r13, $0)
/* rbx is untouched. */
LKMC_ASSERT_EQ(%r14, $2)
/* 64-bit with a carry:
*
* rdx : rax = rax * rbx
* 0x1 : 0x0000000000000002 = 0x8000000000000001 * 2
*/
mov $0x8000000000000001, %rax
mov $2, %rbx
mul %rbx
mov %rax, %r12
mov %rdx, %r13
LKMC_ASSERT_EQ(%r12, $2)
LKMC_ASSERT_EQ(%r13, $1)
/* 8-bit is special: does not use dx for output:
*
* ah : al = al * bl
* 0x10 : 0 = 2 * 0x80
*/
mov $0, %eax
mov $2, %al
mov $0x80, %bl
mov $0, %dl
mul %bl
LKMC_ASSERT_EQ_32(%eax, $0x100)
/* 16-bit
*
* dx : ax = ax * bx
* 0x1 : 0x0000 = 2 * 0x8000
*/
mov $0, %eax
mov $0, %edx
mov $2, %ax
mov $0x8000, %bx
mov $0, %dx
mul %bx
mov %eax, %r12d
mov %edx, %r13d
LKMC_ASSERT_EQ_32(%r12d, $0)
LKMC_ASSERT_EQ_32(%r13d, $1)
/* 32-bit */
mov $2, %eax
mov $0x80000000, %ebx
mov $0, %edx
mul %ebx
mov %eax, %r12d
mov %edx, %r13d
LKMC_ASSERT_EQ_32(%r12d, $0)
LKMC_ASSERT_EQ_32(%r13d, $1)
#if 0
/* No immediate form, although imul has one:
* http://stackoverflow.com/questions/20499141/is-it-possible-to-multiply-by-and-immediate-with-mul-in-x86-assembly/33202309#33202309
*
* Error: operand type mismatch for `mul'
*/
mul $2
#endif
/* Memory version */
.data
mylong: .long 0x11111111
.text
movl $2, %eax
mull mylong
LKMC_ASSERT_EQ_32(%eax, $0x22222222)
LKMC_EPILOGUE

View File

@@ -0,0 +1,14 @@
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
*
* Negate: i *= -1.
*/
#include <lkmc.h>
LKMC_PROLOGUE
mov $2, %rax
neg %rax
LKMC_ASSERT_EQ(%rax, $-2)
neg %eax
LKMC_ASSERT_EQ(%rax, $2)
LKMC_EPILOGUE

View File

@@ -0,0 +1,19 @@
# Subtract with borrow:
#
# edx:eax -= ebx:ecx
#include <lkmc.h>
LKMC_PROLOGUE
movl $0, %eax
movl $0, %ebx
movl $0x80000000, %ecx
movl $1, %edx
subl %ecx, %eax
sbbl %ebx, %edx
LKMC_ASSERT_EQ_32(%eax, $0x80000000)
LKMC_ASSERT_EQ_32(%edx, $0)
LKMC_EPILOGUE