From 0028ff0ebdd46c2499056c2985c8ce2af248bb5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?=
 =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= <ciro.santilli@gmail.com>
Date: Wed, 12 Jun 2019 00:00:00 +0000
Subject: [PATCH] x86 asm: move binary arithmetic instructions from
 x86-assembly-cheat except cmp

---
 README.adoc                         | 23 ++++---
 build-gem5                          |  2 +-
 path_properties.py                  |  2 +
 userland/arch/x86_64/adc.S          | 25 ++++++++
 userland/arch/x86_64/div.S          | 40 ++++++++++++
 userland/arch/x86_64/div_overflow.S | 14 +++++
 userland/arch/x86_64/div_zero.S     | 17 +++++
 userland/arch/x86_64/idiv.S         | 34 ++++++++++
 userland/arch/x86_64/imul.S         | 42 +++++++++++++
 userland/arch/x86_64/mul.S          | 98 +++++++++++++++++++++++++++++
 userland/arch/x86_64/neg.S          | 14 +++++
 userland/arch/x86_64/sbb.S          | 23 +++++++
 12 files changed, 326 insertions(+), 8 deletions(-)
 create mode 100644 userland/arch/x86_64/adc.S
 create mode 100644 userland/arch/x86_64/div.S
 create mode 100644 userland/arch/x86_64/div_overflow.S
 create mode 100644 userland/arch/x86_64/div_zero.S
 create mode 100644 userland/arch/x86_64/idiv.S
 create mode 100644 userland/arch/x86_64/imul.S
 create mode 100644 userland/arch/x86_64/mul.S
 create mode 100644 userland/arch/x86_64/neg.S
 create mode 100644 userland/arch/x86_64/sbb.S
diff --git a/README.adoc b/README.adoc
index db2bac3..d55d16f 100644
--- a/README.adoc
+++ b/README.adoc
@@ -10208,13 +10208,13 @@ There are not yet enabled, but it should be easy to so, see: <<add-new-buildroot
 
 https://stackoverflow.com/questions/6147242/heap-vs-binary-search-tree-bst/29548834#29548834
 
-Usage:
+First we build it with <<m5ops-instructions>> enabled, and then we extract the stats:
 
 ....
 ./build-userland \
   --arch aarch64 \
-  --ccflagg='-DLKMC_M5OPS_ENABLE=1' \
-  --force-build cpp/bst_vs_heap \
+  --ccflags='-DLKMC_M5OPS_ENABLE=1' \
+  --force-rebuild cpp/bst_vs_heap \
   --static \
 ;
 ./run \
@@ -10910,7 +10910,7 @@ To use that file, first rebuild `m5ops.out` with the m5ops instructions enabled
 ./build-userland \
   --arch aarch64 \
   --ccflags='-DLKMC_M5OPS_ENABLE=1' \
-  --force-build c/m5ops \
+  --force-rebuild c/m5ops \
   --static \
 ;
 ./build-buildroot --arch aarch64
@@ -10941,7 +10941,7 @@ In theory, the cleanest way to add m5ops to your benchmarks would be to do exact
 
 However, I think it is usually not worth the trouble of hacking up the build system of the benchmark to do this, and I recommend just hardcoding in a few raw instructions here and there, and managing it with version control + `sed`.
 
-Bibliography:x
+Bibliography:
 
 * https://stackoverflow.com/questions/56506154/how-to-analyze-only-interest-area-in-source-code-by-using-gem5/56506419#56506419
 * https://www.mail-archive.com/gem5-users@gem5.org/msg15418.html
@@ -12352,9 +12352,18 @@ Bibliography:
 <<intel-manual-1>> 5.1.2 "Binary Arithmetic Instructions":
 
 * link:userland/arch/x86_64/add.S[ADD]
-* link:userland/arch/x86_64/dec.S[DEC]
-* link:userland/arch/x86_64/inc.S[INC]
+** link:userland/arch/x86_64/inc.S[INC]
+** link:userland/arch/x86_64/adc.S[ADC]
 * link:userland/arch/x86_64/sub.S[SUB]
+** link:userland/arch/x86_64/dec.S[DEC]
+** link:userland/arch/x86_64/sbb.S[SBB]
+* link:userland/arch/x86_64/mul.S[MUL]
+** link:userland/arch/x86_64/neg.S[NEG]
+** link:userland/arch/x86_64/imul.S[IMUL]
+* link:userland/arch/x86_64/div.S[DIV]
+** link:userland/arch/x86_64/div_overflow.S[DIV overflow]
+** link:userland/arch/x86_64/div_zero.S[DIV zero]
+** link:userland/arch/x86_64/idiv.S[IDIV]
 
 === x86 SIMD
 
diff --git a/build-gem5 b/build-gem5
index 5fd9f66..968a4a7 100755
--- a/build-gem5
+++ b/build-gem5
@@ -60,7 +60,7 @@ https://github.com/cirosantilli/linux-kernel-module-cheat-regression#gem5-unit-t
                     'git', LF,
                     '-C', self.env['gem5_default_source_dir'], LF,
                     'worktree', 'add', LF,
-                    '-b', os.path.join('wt', self.env['gem5_build_id']), LF,
+                    '-b', os.path.join('wt', self.env['gem5_worktree']), LF,
                     self.env['gem5_source_dir'], LF,
                 ])
             else:
diff --git a/path_properties.py b/path_properties.py
index b201ba7..2302590 100644
--- a/path_properties.py
+++ b/path_properties.py
@@ -406,6 +406,8 @@ path_properties_tuples = (
                                         'freestanding': freestanding_properties,
                                     }
                                 ),
+                                'div_overflow.S': {'signal_received': signal.Signals.SIGFPE},
+                                'div_zero.S': {'signal_received': signal.Signals.SIGFPE},
                                 'freestanding': freestanding_properties,
                                 'lkmc_assert_eq_fail.S': {'signal_received': signal.Signals.SIGABRT},
                                 'lkmc_assert_memcmp_fail.S': {'signal_received': signal.Signals.SIGABRT},
diff --git a/userland/arch/x86_64/adc.S b/userland/arch/x86_64/adc.S
new file mode 100644
index 0000000..02fcc13
--- /dev/null
+++ b/userland/arch/x86_64/adc.S
@@ -0,0 +1,25 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
+ *
+ * Add with Carry. Like add, but if the carry flag is set, add 1 to the addition.
+ *
+ * This allows implementing arbitrary precision arithmetic.
+ */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+    /*  rax :                rbx +=  rcx :                rdx
+     *    1 : 0x8000000000000001 += 0x10 : 0x8000000000000010
+     * 0x12 :               0x11
+     */
+    mov $0x1, %rax
+    mov $0x8000000000000001, %rbx
+    mov $0x10, %rcx
+    mov $0x8000000000000010, %rdx
+    add %rdx, %rbx
+    adc %rcx, %rax
+    mov %rax, %r12
+    mov %rbx, %r13
+    LKMC_ASSERT_EQ(%r12, $0x12)
+    LKMC_ASSERT_EQ(%r13, $0x11)
+LKMC_EPILOGUE
diff --git a/userland/arch/x86_64/div.S b/userland/arch/x86_64/div.S
new file mode 100644
index 0000000..2d980b2
--- /dev/null
+++ b/userland/arch/x86_64/div.S
@@ -0,0 +1,40 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
+ *
+ * Unsigned integer division, interface similar to MUL:
+ *
+ * ....
+ * rax = rdx:rax / SRC
+ * rdx = rdx:rax % SRC
+ * ....
+ *
+ * DIV can be used to calculate modulus, but GCC does not use it becaues it is slow, 
+ * and choses alternative techniques instead
+ * http://stackoverflow.com/questions/4361979/how-does-the-gcc-implementation-of-module-work-and-why-does-it-not-use-the
+ */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+    /* 64-bit hello world:
+     *
+     * 5 / 2 = 2 with leftover of 1.
+     */
+    mov $0, %rdx
+    mov $5, %rax
+    mov $2, %rbx
+    div %rbx
+    mov %rax, %r12
+    mov %rdx, %r13
+    LKMC_ASSERT_EQ(%r12, $2)
+    LKMC_ASSERT_EQ(%r13, $1)
+
+    /* Now with a simple carry. */
+    mov $1, %rdx
+    mov $2, %rax
+    mov $2, %rbx
+    div %rbx
+    mov %rax, %r12
+    mov %rdx, %r13
+    LKMC_ASSERT_EQ(%r12, $0x8000000000000001)
+    LKMC_ASSERT_EQ(%r13, $0)
+LKMC_EPILOGUE
diff --git a/userland/arch/x86_64/div_overflow.S b/userland/arch/x86_64/div_overflow.S
new file mode 100644
index 0000000..44ffb30
--- /dev/null
+++ b/userland/arch/x86_64/div_overflow.S
@@ -0,0 +1,14 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
+ *
+ * If the result of div does not fit into the output register rax, then we get SIGFPE.
+ */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+    /* rdx:rax / 2 == 2:0 / 2 == 1:0 */
+    mov $2, %rdx
+    mov $0, %rax
+    mov $2, %rbx
+    div %rbx
+LKMC_EPILOGUE
diff --git a/userland/arch/x86_64/div_zero.S b/userland/arch/x86_64/div_zero.S
new file mode 100644
index 0000000..108ca5d
--- /dev/null
+++ b/userland/arch/x86_64/div_zero.S
@@ -0,0 +1,17 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
+ *
+ * SIGFPE :-)
+ *
+ * Signal handlind discussed at:
+ * https://stackoverflow.com/questions/39431879/c-handle-signal-sigfpe-and-continue-execution/39431923#39431923
+ */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+    /* rdx:rax / 0 */
+    mov $0, %rdx
+    mov $1, %rax
+    mov $0, %rbx
+    div %rbx
+LKMC_EPILOGUE
diff --git a/userland/arch/x86_64/idiv.S b/userland/arch/x86_64/idiv.S
new file mode 100644
index 0000000..4d6b7b3
--- /dev/null
+++ b/userland/arch/x86_64/idiv.S
@@ -0,0 +1,34 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
+ *
+ * Signed integer division.
+ */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+    /* Without operands, it works like DIV.
+     * -5 = (2 * -2) + (-1)
+     */
+    mov $-5, %rax
+    /* Sign extend rax into rdx:rax
+     * https://stackoverflow.com/questions/17170388/trying-to-understand-the-assembly-instruction-cltd-on-x86/50315201#50315201
+     */
+    cqo
+    mov $2, %rbx
+    idiv %rbx
+    mov %rax, %r12
+    mov %rdx, %r13
+    LKMC_ASSERT_EQ(%r12, $-2)
+    LKMC_ASSERT_EQ(%r13, $-1)
+
+#if 0
+    /* Unlike IMUL vs MUL, IDIV does not have a multi operand interface.
+     * Likely because it need 2 output registers unlike IMUL.
+     *
+     * ....
+     * Error: number of operands mismatch for `idiv'
+     * ....
+     */
+    idiv %rax, $2, %rbx
+#endif
+LKMC_EPILOGUE
diff --git a/userland/arch/x86_64/imul.S b/userland/arch/x86_64/imul.S
new file mode 100644
index 0000000..9c30694
--- /dev/null
+++ b/userland/arch/x86_64/imul.S
@@ -0,0 +1,42 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
+ *
+ * Signed multiply.
+ *
+ * Has many more forms than MUL including immediate and up to three arguments.
+ */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+
+    /* The single register forms are just like MUL, and
+     * extend over rdx:rax.
+     *
+     * rdx : rax =  rax * rbx
+     *           =   -1 *   2
+     *           =  -2
+     *           =  0xFFFFFFFFFFFFFFFF : 0xFFFFFFFFFFFFFFFE
+     */
+    mov $-1, %rax
+    mov $2, %rbx
+    mov $42, %rdx
+    imul %rbx
+    mov %rax, %r12
+    mov %rdx, %r13
+    LKMC_ASSERT_EQ(%r12, $0xFFFFFFFFFFFFFFFE)
+    LKMC_ASSERT_EQ(%r13, $0xFFFFFFFFFFFFFFFF)
+
+    /* The multi-argument formas don't extend over rdx, but
+     * are more convenient in many cases.
+     *
+     * rax = rbx * 3
+     */
+    mov $42, %rax
+    mov $-2, %rbx
+    mov $42, %rdx
+    imul $3, %rbx, %rax
+    mov %rax, %r12
+    mov %rdx, %r13
+    LKMC_ASSERT_EQ(%r12, $-6)
+    LKMC_ASSERT_EQ(%r13, $42)
+LKMC_EPILOGUE
diff --git a/userland/arch/x86_64/mul.S b/userland/arch/x86_64/mul.S
new file mode 100644
index 0000000..9b343c3
--- /dev/null
+++ b/userland/arch/x86_64/mul.S
@@ -0,0 +1,98 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
+ *
+ * Unsigned multiply.
+ *
+ * The result is spread across edx:eax.
+ */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+
+    /* 64-bit hello world:
+     *
+     * rdx : rax = rax * rbx
+     * 0x0 : 4   =   2 *   2
+     */
+    mov $2, %rax
+    mov $2, %rbx
+    mul %rbx
+    /* Move to callee saved registers to persist after our asserts. */
+    mov %rax, %r12
+    mov %rdx, %r13
+    mov %rbx, %r14
+    LKMC_ASSERT_EQ(%r12, $4)
+    LKMC_ASSERT_EQ(%r13, $0)
+    /* rbx is untouched. */
+    LKMC_ASSERT_EQ(%r14, $2)
+
+    /* 64-bit with a carry:
+     *
+     * rdx :                rax = rax                * rbx
+     * 0x1 : 0x0000000000000002 = 0x8000000000000001 *   2
+     */
+    mov $0x8000000000000001, %rax
+    mov $2, %rbx
+    mul %rbx
+    mov %rax, %r12
+    mov %rdx, %r13
+    LKMC_ASSERT_EQ(%r12, $2)
+    LKMC_ASSERT_EQ(%r13, $1)
+
+    /* 8-bit is special: does not use dx for output:
+     *
+     *   ah : al = al *   bl
+     * 0x10 :  0 =  2 * 0x80
+     */
+    mov $0, %eax
+    mov $2, %al
+    mov $0x80, %bl
+    mov $0, %dl
+    mul %bl
+    LKMC_ASSERT_EQ_32(%eax, $0x100)
+
+    /* 16-bit
+     *
+     *  dx :     ax = ax *     bx
+     * 0x1 : 0x0000 =  2 * 0x8000
+     */
+    mov $0, %eax
+    mov $0, %edx
+    mov $2, %ax
+    mov $0x8000, %bx
+    mov $0, %dx
+    mul %bx
+    mov %eax, %r12d
+    mov %edx, %r13d
+    LKMC_ASSERT_EQ_32(%r12d, $0)
+    LKMC_ASSERT_EQ_32(%r13d, $1)
+
+    /* 32-bit */
+    mov $2, %eax
+    mov $0x80000000, %ebx
+    mov $0, %edx
+    mul %ebx
+    mov %eax, %r12d
+    mov %edx, %r13d
+    LKMC_ASSERT_EQ_32(%r12d, $0)
+    LKMC_ASSERT_EQ_32(%r13d, $1)
+
+
+#if 0
+    /* No immediate form, although imul has one:
+     * http://stackoverflow.com/questions/20499141/is-it-possible-to-multiply-by-and-immediate-with-mul-in-x86-assembly/33202309#33202309
+     *
+     * Error: operand type mismatch for `mul'
+     */
+    mul $2
+#endif
+
+    /* Memory version */
+.data
+    mylong: .long 0x11111111
+.text
+    movl $2, %eax
+    mull mylong
+    LKMC_ASSERT_EQ_32(%eax, $0x22222222)
+
+LKMC_EPILOGUE
diff --git a/userland/arch/x86_64/neg.S b/userland/arch/x86_64/neg.S
new file mode 100644
index 0000000..8a7529b
--- /dev/null
+++ b/userland/arch/x86_64/neg.S
@@ -0,0 +1,14 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
+ *
+ * Negate: i *= -1.
+ */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+    mov $2, %rax
+    neg %rax
+    LKMC_ASSERT_EQ(%rax, $-2)
+    neg %eax
+    LKMC_ASSERT_EQ(%rax, $2)
+LKMC_EPILOGUE
diff --git a/userland/arch/x86_64/sbb.S b/userland/arch/x86_64/sbb.S
new file mode 100644
index 0000000..d993348
--- /dev/null
+++ b/userland/arch/x86_64/sbb.S
@@ -0,0 +1,23 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-binary-arithmetic-instructions
+ *
+ * Subtract with Borrow. Like ADC is for ADD, but for subtraction.
+ */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+    /*  rax :                rbx -=  rcx :                rdx
+     *    1 :                  0 -=    0 : 0x8000000000000000
+     *    0 : 0x8000000000000000
+     */
+    mov $0x1, %rax
+    mov $0x0, %rbx
+    mov $0x0, %rcx
+    mov $0x8000000000000000, %rdx
+    sub %rdx, %rbx
+    sbb %rcx, %rax
+    mov %rax, %r12
+    mov %rbx, %r13
+    LKMC_ASSERT_EQ(%r12, $0x0)
+    LKMC_ASSERT_EQ(%r13, $0x8000000000000000)
+LKMC_EPILOGUE