From f470d474a63531773e498b9d12c5d482fc8a2576 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?=
 =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= <ciro.santilli@gmail.com>
Date: Fri, 21 Jun 2019 00:00:01 +0000
Subject: [PATCH] x86 asm: move most data transfer instructions from
 x86-assembly-cheat

---
 README.adoc                   | 96 +++++++++++++++++++++++++++++++++++
 userland/arch/x86_64/cltq.S   | 27 ++++++++++
 userland/arch/x86_64/cmovcc.S | 35 +++++++++++++
 userland/arch/x86_64/cqto.S   | 54 ++++++++++++++++++++
 userland/arch/x86_64/lea.S    | 24 +++++++++
 userland/arch/x86_64/movsx.S  | 18 +++++++
 userland/arch/x86_64/movzx.S  | 33 ++++++++++++
 7 files changed, 287 insertions(+)
 create mode 100644 userland/arch/x86_64/cltq.S
 create mode 100644 userland/arch/x86_64/cmovcc.S
 create mode 100644 userland/arch/x86_64/cqto.S
 create mode 100644 userland/arch/x86_64/lea.S
 create mode 100644 userland/arch/x86_64/movsx.S
 create mode 100644 userland/arch/x86_64/movzx.S
diff --git a/README.adoc b/README.adoc
index 6796e79..ad7c466 100644
--- a/README.adoc
+++ b/README.adoc
@@ -12361,6 +12361,102 @@ Bibliography:
 * <<intel-manual-1>> 3.7.5 "Specifying an Offset"
 * https://sourceware.org/binutils/docs-2.18/as/i386_002dMemory.html
 
+=== x86 data transfer instructions
+
+5.1.1 "Data Transfer Instructions"
+
+* link:userland/arch/x86_64/lea.S[]: LEA
+* Integer typecasts
+** link:userland/arch/x86_64/movzx.S[]: MOVZX
+** link:userland/arch/x86_64/movsx.S[]: MOVSX
+
+==== x86 CQTO and CLTQ instructions
+
+Examples:
+
+* link:userland/arch/x86_64/cqto.S[] CQTO
+* link:userland/arch/x86_64/cltq.S[] CLTQ
+
+Instructions without E suffix: sign extend RAX into RDX:RAX.
+
+Instructions E suffix: sign extend withing RAX itself.
+
+Common combo with idiv 32-bit, which takes the input from `edx:eax`: so you need to set up `edx` before calling it.
+
+Has some Intel vs AT&T name overload hell:
+
+* https://stackoverflow.com/questions/17170388/trying-to-understand-the-assembly-instruction-cltd-on-x86/50315201#50315201
+* https://sourceware.org/binutils/docs/as/i386_002dMnemonics.html
+
+GNU GAS accepts both syntaxes:
+
+[options="header"]
+|===
+|Intel |AT&T |From |To
+
+|CBW
+|CBTW
+|AL
+|AX
+
+|CWDE
+|CWTL
+|AX
+|EAX
+
+|CWD
+|CWTD
+|AX
+|DX:AX
+
+|CDQ
+|CLTD
+|EAX
+|EDX:EAX
+
+|CDQE
+|CLTQ
+|EAX
+|RAX
+
+|CQO
+|CQTO
+|RAX
+|RDX:RAX
+
+|===
+
+==== x86 CMOVcc instructions
+
+* link:userland/arch/x86_64/cmovcc.S[]: CMOVcc
+
+mov if a condition is met:
+
+....
+CMOVcc a, b
+....
+
+Equals:
+
+....
+if(flag) a = b
+....
+
+where `cc` are the same flags as Jcc.
+
+Vs jmp:
+
+* http://stackoverflow.com/questions/14131096/why-is-a-conditional-move-not-vulnerable-for-branch-prediction-failure
+* http://stackoverflow.com/questions/27136961/what-is-it-about-cmov-which-improves-cpu-pipeline-performance
+* http://stackoverflow.com/questions/26154488/difference-between-conditional-instructions-cmov-and-jump-instructions
+* http://stackoverflow.com/questions/6754454/speed-difference-between-if-else-and-ternary-operator-in-c?lq=1#comment8007791_6754495
+
+Not necessarily faster because of branch prediction.
+
+This is partly why the ternary `?` C operator exists: http://stackoverflow.com/questions/3565368/ternary-operator-vs-if-else
+
+It is interesting to compare this with ARMv7 conditional executaion: which is available for all instructions: <<arm-conditional-execution>>
+
 === x86 binary arithmetic instructions
 
 <<intel-manual-1>> 5.1.2 "Binary Arithmetic Instructions":
diff --git a/userland/arch/x86_64/cltq.S b/userland/arch/x86_64/cltq.S
new file mode 100644
index 0000000..8cb2061
--- /dev/null
+++ b/userland/arch/x86_64/cltq.S
@@ -0,0 +1,27 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-cqto-and-cltq-instructions */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+    /* CLTQ: top bit is zero: extend with zeroes. */
+    mov $0x123456787FFFFFFF, %rax
+    cltq
+    LKMC_ASSERT_EQ(%rax, $0x000000007FFFFFFF)
+
+    /* CLTQ: top bit is one: extend with ones. */
+    mov $0x1234567880000000, %rax
+    cltq
+    LKMC_ASSERT_EQ(%rax, $0xFFFFFFFF80000000)
+
+    /* CWTL: zeroes top 32-bits. */
+    mov $0x123456789ABC8EF0, %rax
+    cwtl
+    LKMC_ASSERT_EQ(%rax, $0xFFFF8EF0)
+    CWTL
+
+    /* CBTW. */
+    mov $0x123456789ABCDE80, %rax
+    cbtw
+    LKMC_ASSERT_EQ(%rax, $0x123456789ABCFF80)
+    CWTL
+LKMC_EPILOGUE
diff --git a/userland/arch/x86_64/cmovcc.S b/userland/arch/x86_64/cmovcc.S
new file mode 100644
index 0000000..ed6d31a
--- /dev/null
+++ b/userland/arch/x86_64/cmovcc.S
@@ -0,0 +1,35 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-cmovcc-instructions */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+    /* Carry flag clear. */
+    clc
+    mov $0, %rax
+    mov $1, %rbx
+    /* Don't move: carry flag not set. */
+    cmovc %rbx, %rax
+    LKMC_ASSERT_EQ(%rax, $0)
+
+    /* Carry flag clear. */
+    clc
+    mov $0, %rax
+    mov $1, %rbx
+    /* Move because checking NC. */
+    cmovnc %rbx, %rax
+    LKMC_ASSERT_EQ(%rax, $1)
+
+    /* Carry flag set. */
+    stc
+    mov $0, %rax
+    mov $1, %rbx
+    /* Move. */
+    cmovc %rbx, %rax
+    LKMC_ASSERT_EQ(%rax, $1)
+
+#if 0
+    /* Immediates not supported:
+     * Error: operand type mismatch for `cmovc' */
+    cmovc $1, %rax
+#endif
+LKMC_EPILOGUE
diff --git a/userland/arch/x86_64/cqto.S b/userland/arch/x86_64/cqto.S
new file mode 100644
index 0000000..86c16ee
--- /dev/null
+++ b/userland/arch/x86_64/cqto.S
@@ -0,0 +1,54 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-cqto-and-cltq-instructions */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+    /* Quad to Octo: top bit is zero: extend with zeroes. */
+    mov $0x7FFFFFFFFFFFFFFF, %rax
+    mov $0x123456789ABCDEF0, %rdx
+    cqto
+    mov %rax, %r12
+    mov %rdx, %r13
+    /* rax is unchanged. */
+    LKMC_ASSERT_EQ(%r12, $0x7FFFFFFFFFFFFFFF)
+    /* rdx is filled with zeros. */
+    LKMC_ASSERT_EQ(%r13, $0)
+
+    /* Quad to Octo: top bit is one: extend with ones. */
+    mov $0x8000000000000000, %rax
+    mov $0x123456789ABCDEF0, %rdx
+    cqto
+    mov %rax, %r12
+    mov %rdx, %r13
+    LKMC_ASSERT_EQ(%r12, $0x8000000000000000)
+    LKMC_ASSERT_EQ(%r13, $0xFFFFFFFFFFFFFFFF)
+
+    /* Intel equivalent syntax also accepte by GNU GAS. */
+    mov $0x7FFFFFFFFFFFFFFF, %rax
+    mov $0x123456789ABCDEF0, %rdx
+    cqo
+    mov %rax, %r12
+    mov %rdx, %r13
+    LKMC_ASSERT_EQ(%r12, $0x7FFFFFFFFFFFFFFF)
+    LKMC_ASSERT_EQ(%r13, $0)
+
+    /* Smaller size example: Double to Quad.
+     * Also zeroes top 32-bits of RDX like many 32 to 64 operaions. */
+    mov $0xFFFFFFFF7FFFFFFF, %rax
+    mov $0x123456789ABCDEF0, %rdx
+    cltd
+    mov %rax, %r12
+    mov %rdx, %r13
+    LKMC_ASSERT_EQ(%r12, $0xFFFFFFFF7FFFFFFF)
+    LKMC_ASSERT_EQ(%r13, $0)
+
+    /* Even smaller size example: Word to Doubleword.
+     * Unlike the 32-bit one, does not zero out the top 32-bits of RDX. */
+    mov $0xFFFFFFFFFFFF7FFF, %rax
+    mov $0x123456789ABCDEF0, %rdx
+    cwtd
+    mov %rax, %r12
+    mov %rdx, %r13
+    LKMC_ASSERT_EQ(%r12, $0xFFFFFFFFFFFF7FFF)
+    LKMC_ASSERT_EQ(%r13, $0x123456789ABC0000)
+LKMC_EPILOGUE
diff --git a/userland/arch/x86_64/lea.S b/userland/arch/x86_64/lea.S
new file mode 100644
index 0000000..b18bb42
--- /dev/null
+++ b/userland/arch/x86_64/lea.S
@@ -0,0 +1,24 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-data-transfer-instructions
+ *
+ * Load Effective Address.
+ *
+ * Like MOV, but load the address instead of the value.
+ *
+ * Useful in particular for RIP relative addressing.
+ */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+    /* RIP relative addressing. */
+    lea my_label(%rip), %rax
+    LKMC_ASSERT_EQ(%rax, $my_label)
+
+    /* Also supports the usual addressing operations. */
+    mov $my_label, %rax
+    mov $2, %rbx
+    lea 4(%rax,%rbx,2), %rdx
+    LKMC_ASSERT_EQ(%rdx, $my_label_2)
+LKMC_EPILOGUE
+my_label: .skip 8
+my_label_2:
diff --git a/userland/arch/x86_64/movsx.S b/userland/arch/x86_64/movsx.S
new file mode 100644
index 0000000..ad07302
--- /dev/null
+++ b/userland/arch/x86_64/movsx.S
@@ -0,0 +1,18 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-data-transfer-instructions
+ *
+ * LIke MOVZX but sign extend.
+ */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+    /* Top bit is 0, extend with zero. */
+    mov $0x1234567F, %eax
+    movsx %al, %ax
+    LKMC_ASSERT_EQ_32(%eax, $0x1234007F)
+
+    /* Top bit is 1: extend with one. */
+    mov $0x12345680, %eax
+    movsx %al, %ax
+    LKMC_ASSERT_EQ_32(%eax, $0x1234FF80)
+LKMC_EPILOGUE
diff --git a/userland/arch/x86_64/movzx.S b/userland/arch/x86_64/movzx.S
new file mode 100644
index 0000000..1a10ddc
--- /dev/null
+++ b/userland/arch/x86_64/movzx.S
@@ -0,0 +1,33 @@
+/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-data-transfer-instructions
+ *
+ * mov and zero extend
+ *
+ * unsigned typecast to larger types in 2's complement.
+ *
+ * MOV does this automatically from 32 to 64 bits:
+ * https://stackoverflow.com/questions/11177137/why-do-x86-64-instructions-on-32-bit-registers-zero-the-upper-part-of-the-full-6
+ */
+
+#include <lkmc.h>
+
+LKMC_PROLOGUE
+    /* Top bit is 0, extend with zero. */
+    mov $0x1234567F, %eax
+    movzx %al, %ax
+    LKMC_ASSERT_EQ_32(%eax, $0x1234007F)
+
+    /* Top bit is 1: does not matter, stil zero extends. */
+    mov $0x1234568F, %eax
+    movzx %al, %ax
+    LKMC_ASSERT_EQ_32(%eax, $0x1234008F)
+
+#if 0
+    /* must be a register, otherwise x86 cannot know how to size it:
+     * Error: unsupported syntax for `movzx' */
+    movzx $0, %eax
+
+    /* Operands have the same size. Fist must be larger.
+     * Error: unsupported syntax for `movzx' */
+    movzx %al, %al
+#endif
+LKMC_EPILOGUE