From 0028ff0ebdd46c2499056c2985c8ce2af248bb5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?= =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= Date: Wed, 12 Jun 2019 00:00:00 +0000 Subject: [PATCH] x86 asm: move binary arithmetic instructions from x86-assembly-cheat except cmp --- README.adoc | 23 ++++--- build-gem5 | 2 +- path_properties.py | 2 + userland/arch/x86_64/adc.S | 25 ++++++++ userland/arch/x86_64/div.S | 40 ++++++++++++ userland/arch/x86_64/div_overflow.S | 14 +++++ userland/arch/x86_64/div_zero.S | 17 +++++ userland/arch/x86_64/idiv.S | 34 ++++++++++ userland/arch/x86_64/imul.S | 42 +++++++++++++ userland/arch/x86_64/mul.S | 98 +++++++++++++++++++++++++++++ userland/arch/x86_64/neg.S | 14 +++++ userland/arch/x86_64/sbb.S | 23 +++++++ 12 files changed, 326 insertions(+), 8 deletions(-) create mode 100644 userland/arch/x86_64/adc.S create mode 100644 userland/arch/x86_64/div.S create mode 100644 userland/arch/x86_64/div_overflow.S create mode 100644 userland/arch/x86_64/div_zero.S create mode 100644 userland/arch/x86_64/idiv.S create mode 100644 userland/arch/x86_64/imul.S create mode 100644 userland/arch/x86_64/mul.S create mode 100644 userland/arch/x86_64/neg.S create mode 100644 userland/arch/x86_64/sbb.S diff --git a/README.adoc b/README.adoc index db2bac3..d55d16f 100644 --- a/README.adoc +++ b/README.adoc @@ -10208,13 +10208,13 @@ There are not yet enabled, but it should be easy to so, see: <> enabled, and then we extract the stats: .... ./build-userland \ --arch aarch64 \ - --ccflagg='-DLKMC_M5OPS_ENABLE=1' \ - --force-build cpp/bst_vs_heap \ + --ccflags='-DLKMC_M5OPS_ENABLE=1' \ + --force-rebuild cpp/bst_vs_heap \ --static \ ; ./run \ @@ -10910,7 +10910,7 @@ To use that file, first rebuild `m5ops.out` with the m5ops instructions enabled ./build-userland \ --arch aarch64 \ --ccflags='-DLKMC_M5OPS_ENABLE=1' \ - --force-build c/m5ops \ + --force-rebuild c/m5ops \ --static \ ; ./build-buildroot --arch aarch64 @@ -10941,7 +10941,7 @@ In theory, the cleanest way to add m5ops to your benchmarks would be to do exact However, I think it is usually not worth the trouble of hacking up the build system of the benchmark to do this, and I recommend just hardcoding in a few raw instructions here and there, and managing it with version control + `sed`. -Bibliography:x +Bibliography: * https://stackoverflow.com/questions/56506154/how-to-analyze-only-interest-area-in-source-code-by-using-gem5/56506419#56506419 * https://www.mail-archive.com/gem5-users@gem5.org/msg15418.html @@ -12352,9 +12352,18 @@ Bibliography: <> 5.1.2 "Binary Arithmetic Instructions": * link:userland/arch/x86_64/add.S[ADD] -* link:userland/arch/x86_64/dec.S[DEC] -* link:userland/arch/x86_64/inc.S[INC] +** link:userland/arch/x86_64/inc.S[INC] +** link:userland/arch/x86_64/adc.S[ADC] * link:userland/arch/x86_64/sub.S[SUB] +** link:userland/arch/x86_64/dec.S[DEC] +** link:userland/arch/x86_64/sbb.S[SBB] +* link:userland/arch/x86_64/mul.S[MUL] +** link:userland/arch/x86_64/neg.S[NEG] +** link:userland/arch/x86_64/imul.S[IMUL] +* link:userland/arch/x86_64/div.S[DIV] +** link:userland/arch/x86_64/div_overflow.S[DIV overflow] +** link:userland/arch/x86_64/div_zero.S[DIV zero] +** link:userland/arch/x86_64/idiv.S[IDIV] === x86 SIMD diff --git a/build-gem5 b/build-gem5 index 5fd9f66..968a4a7 100755 --- a/build-gem5 +++ b/build-gem5 @@ -60,7 +60,7 @@ https://github.com/cirosantilli/linux-kernel-module-cheat-regression#gem5-unit-t 'git', LF, '-C', self.env['gem5_default_source_dir'], LF, 'worktree', 'add', LF, - '-b', os.path.join('wt', self.env['gem5_build_id']), LF, + '-b', os.path.join('wt', self.env['gem5_worktree']), LF, self.env['gem5_source_dir'], LF, ]) else: diff --git a/path_properties.py b/path_properties.py index b201ba7..2302590 100644 --- a/path_properties.py +++ b/path_properties.py @@ -406,6 +406,8 @@ path_properties_tuples = ( 'freestanding': freestanding_properties, } ), + 'div_overflow.S': {'signal_received': signal.Signals.SIGFPE}, + 'div_zero.S': {'signal_received': signal.Signals.SIGFPE}, 'freestanding': freestanding_properties, 'lkmc_assert_eq_fail.S': {'signal_received': signal.Signals.SIGABRT}, 'lkmc_assert_memcmp_fail.S': {'signal_received': signal.Signals.SIGABRT}, diff --git a/userland/arch/x86_64/adc.S b/userland/arch/x86_64/adc.S new file mode 100644 index 0000000..02fcc13 --- /dev/null +++ b/userland/arch/x86_64/adc.S @@ -0,0 +1,25 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions + * + * Add with Carry. Like add, but if the carry flag is set, add 1 to the addition. + * + * This allows implementing arbitrary precision arithmetic. + */ + +#include + +LKMC_PROLOGUE + /* rax : rbx += rcx : rdx + * 1 : 0x8000000000000001 += 0x10 : 0x8000000000000010 + * 0x12 : 0x11 + */ + mov $0x1, %rax + mov $0x8000000000000001, %rbx + mov $0x10, %rcx + mov $0x8000000000000010, %rdx + add %rdx, %rbx + adc %rcx, %rax + mov %rax, %r12 + mov %rbx, %r13 + LKMC_ASSERT_EQ(%r12, $0x12) + LKMC_ASSERT_EQ(%r13, $0x11) +LKMC_EPILOGUE diff --git a/userland/arch/x86_64/div.S b/userland/arch/x86_64/div.S new file mode 100644 index 0000000..2d980b2 --- /dev/null +++ b/userland/arch/x86_64/div.S @@ -0,0 +1,40 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions + * + * Unsigned integer division, interface similar to MUL: + * + * .... + * rax = rdx:rax / SRC + * rdx = rdx:rax % SRC + * .... + * + * DIV can be used to calculate modulus, but GCC does not use it becaues it is slow, + * and choses alternative techniques instead + * http://stackoverflow.com/questions/4361979/how-does-the-gcc-implementation-of-module-work-and-why-does-it-not-use-the + */ + +#include + +LKMC_PROLOGUE + /* 64-bit hello world: + * + * 5 / 2 = 2 with leftover of 1. + */ + mov $0, %rdx + mov $5, %rax + mov $2, %rbx + div %rbx + mov %rax, %r12 + mov %rdx, %r13 + LKMC_ASSERT_EQ(%r12, $2) + LKMC_ASSERT_EQ(%r13, $1) + + /* Now with a simple carry. */ + mov $1, %rdx + mov $2, %rax + mov $2, %rbx + div %rbx + mov %rax, %r12 + mov %rdx, %r13 + LKMC_ASSERT_EQ(%r12, $0x8000000000000001) + LKMC_ASSERT_EQ(%r13, $0) +LKMC_EPILOGUE diff --git a/userland/arch/x86_64/div_overflow.S b/userland/arch/x86_64/div_overflow.S new file mode 100644 index 0000000..44ffb30 --- /dev/null +++ b/userland/arch/x86_64/div_overflow.S @@ -0,0 +1,14 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions + * + * If the result of div does not fit into the output register rax, then we get SIGFPE. + */ + +#include + +LKMC_PROLOGUE + /* rdx:rax / 2 == 2:0 / 2 == 1:0 */ + mov $2, %rdx + mov $0, %rax + mov $2, %rbx + div %rbx +LKMC_EPILOGUE diff --git a/userland/arch/x86_64/div_zero.S b/userland/arch/x86_64/div_zero.S new file mode 100644 index 0000000..108ca5d --- /dev/null +++ b/userland/arch/x86_64/div_zero.S @@ -0,0 +1,17 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions + * + * SIGFPE :-) + * + * Signal handlind discussed at: + * https://stackoverflow.com/questions/39431879/c-handle-signal-sigfpe-and-continue-execution/39431923#39431923 + */ + +#include + +LKMC_PROLOGUE + /* rdx:rax / 0 */ + mov $0, %rdx + mov $1, %rax + mov $0, %rbx + div %rbx +LKMC_EPILOGUE diff --git a/userland/arch/x86_64/idiv.S b/userland/arch/x86_64/idiv.S new file mode 100644 index 0000000..4d6b7b3 --- /dev/null +++ b/userland/arch/x86_64/idiv.S @@ -0,0 +1,34 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions + * + * Signed integer division. + */ + +#include + +LKMC_PROLOGUE + /* Without operands, it works like DIV. + * -5 = (2 * -2) + (-1) + */ + mov $-5, %rax + /* Sign extend rax into rdx:rax + * https://stackoverflow.com/questions/17170388/trying-to-understand-the-assembly-instruction-cltd-on-x86/50315201#50315201 + */ + cqo + mov $2, %rbx + idiv %rbx + mov %rax, %r12 + mov %rdx, %r13 + LKMC_ASSERT_EQ(%r12, $-2) + LKMC_ASSERT_EQ(%r13, $-1) + +#if 0 + /* Unlike IMUL vs MUL, IDIV does not have a multi operand interface. + * Likely because it need 2 output registers unlike IMUL. + * + * .... + * Error: number of operands mismatch for `idiv' + * .... + */ + idiv %rax, $2, %rbx +#endif +LKMC_EPILOGUE diff --git a/userland/arch/x86_64/imul.S b/userland/arch/x86_64/imul.S new file mode 100644 index 0000000..9c30694 --- /dev/null +++ b/userland/arch/x86_64/imul.S @@ -0,0 +1,42 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions + * + * Signed multiply. + * + * Has many more forms than MUL including immediate and up to three arguments. + */ + +#include + +LKMC_PROLOGUE + + /* The single register forms are just like MUL, and + * extend over rdx:rax. + * + * rdx : rax = rax * rbx + * = -1 * 2 + * = -2 + * = 0xFFFFFFFFFFFFFFFF : 0xFFFFFFFFFFFFFFFE + */ + mov $-1, %rax + mov $2, %rbx + mov $42, %rdx + imul %rbx + mov %rax, %r12 + mov %rdx, %r13 + LKMC_ASSERT_EQ(%r12, $0xFFFFFFFFFFFFFFFE) + LKMC_ASSERT_EQ(%r13, $0xFFFFFFFFFFFFFFFF) + + /* The multi-argument formas don't extend over rdx, but + * are more convenient in many cases. + * + * rax = rbx * 3 + */ + mov $42, %rax + mov $-2, %rbx + mov $42, %rdx + imul $3, %rbx, %rax + mov %rax, %r12 + mov %rdx, %r13 + LKMC_ASSERT_EQ(%r12, $-6) + LKMC_ASSERT_EQ(%r13, $42) +LKMC_EPILOGUE diff --git a/userland/arch/x86_64/mul.S b/userland/arch/x86_64/mul.S new file mode 100644 index 0000000..9b343c3 --- /dev/null +++ b/userland/arch/x86_64/mul.S @@ -0,0 +1,98 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions + * + * Unsigned multiply. + * + * The result is spread across edx:eax. + */ + +#include + +LKMC_PROLOGUE + + /* 64-bit hello world: + * + * rdx : rax = rax * rbx + * 0x0 : 4 = 2 * 2 + */ + mov $2, %rax + mov $2, %rbx + mul %rbx + /* Move to callee saved registers to persist after our asserts. */ + mov %rax, %r12 + mov %rdx, %r13 + mov %rbx, %r14 + LKMC_ASSERT_EQ(%r12, $4) + LKMC_ASSERT_EQ(%r13, $0) + /* rbx is untouched. */ + LKMC_ASSERT_EQ(%r14, $2) + + /* 64-bit with a carry: + * + * rdx : rax = rax * rbx + * 0x1 : 0x0000000000000002 = 0x8000000000000001 * 2 + */ + mov $0x8000000000000001, %rax + mov $2, %rbx + mul %rbx + mov %rax, %r12 + mov %rdx, %r13 + LKMC_ASSERT_EQ(%r12, $2) + LKMC_ASSERT_EQ(%r13, $1) + + /* 8-bit is special: does not use dx for output: + * + * ah : al = al * bl + * 0x10 : 0 = 2 * 0x80 + */ + mov $0, %eax + mov $2, %al + mov $0x80, %bl + mov $0, %dl + mul %bl + LKMC_ASSERT_EQ_32(%eax, $0x100) + + /* 16-bit + * + * dx : ax = ax * bx + * 0x1 : 0x0000 = 2 * 0x8000 + */ + mov $0, %eax + mov $0, %edx + mov $2, %ax + mov $0x8000, %bx + mov $0, %dx + mul %bx + mov %eax, %r12d + mov %edx, %r13d + LKMC_ASSERT_EQ_32(%r12d, $0) + LKMC_ASSERT_EQ_32(%r13d, $1) + + /* 32-bit */ + mov $2, %eax + mov $0x80000000, %ebx + mov $0, %edx + mul %ebx + mov %eax, %r12d + mov %edx, %r13d + LKMC_ASSERT_EQ_32(%r12d, $0) + LKMC_ASSERT_EQ_32(%r13d, $1) + + +#if 0 + /* No immediate form, although imul has one: + * http://stackoverflow.com/questions/20499141/is-it-possible-to-multiply-by-and-immediate-with-mul-in-x86-assembly/33202309#33202309 + * + * Error: operand type mismatch for `mul' + */ + mul $2 +#endif + + /* Memory version */ +.data + mylong: .long 0x11111111 +.text + movl $2, %eax + mull mylong + LKMC_ASSERT_EQ_32(%eax, $0x22222222) + +LKMC_EPILOGUE diff --git a/userland/arch/x86_64/neg.S b/userland/arch/x86_64/neg.S new file mode 100644 index 0000000..8a7529b --- /dev/null +++ b/userland/arch/x86_64/neg.S @@ -0,0 +1,14 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions + * + * Negate: i *= -1. + */ + +#include + +LKMC_PROLOGUE + mov $2, %rax + neg %rax + LKMC_ASSERT_EQ(%rax, $-2) + neg %eax + LKMC_ASSERT_EQ(%rax, $2) +LKMC_EPILOGUE diff --git a/userland/arch/x86_64/sbb.S b/userland/arch/x86_64/sbb.S new file mode 100644 index 0000000..d993348 --- /dev/null +++ b/userland/arch/x86_64/sbb.S @@ -0,0 +1,23 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions + * + * Subtract with Borrow. Like ADC is for ADD, but for subtraction. + */ + +#include + +LKMC_PROLOGUE + /* rax : rbx -= rcx : rdx + * 1 : 0 -= 0 : 0x8000000000000000 + * 0 : 0x8000000000000000 + */ + mov $0x1, %rax + mov $0x0, %rbx + mov $0x0, %rcx + mov $0x8000000000000000, %rdx + sub %rdx, %rbx + sbb %rcx, %rax + mov %rax, %r12 + mov %rbx, %r13 + LKMC_ASSERT_EQ(%r12, $0x0) + LKMC_ASSERT_EQ(%r13, $0x8000000000000000) +LKMC_EPILOGUE