From 82129820ca959a03deebee5618de6ea43bd6183a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?= =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= Date: Thu, 6 Jun 2019 00:00:00 +0000 Subject: [PATCH] x86 asm: address modes, LKMC_ASSET_EQ_32 and intel manuals --- README.adoc | 180 +++++++++++++++++++-------- lkmc/x86_64.h | 9 ++ userland/arch/x86_64/address_modes.S | 95 ++++++++++++++ 3 files changed, 230 insertions(+), 54 deletions(-) create mode 100644 userland/arch/x86_64/address_modes.S diff --git a/README.adoc b/README.adoc index cbfe67b..b83fc17 100644 --- a/README.adoc +++ b/README.adoc @@ -11737,34 +11737,34 @@ Like other userland programs, these programs can be run as explained at: <> if your host can run the examples, e.g. x86 example on an x86 host +* <> if your host can run the examples, e.g. x86 example on an x86 host: * <> otherwise However, as usual, it is saner to build your toolchain as explained at: <>. -The first examples that you want to run for each arch are: +The first examples you should look into are: -* how to move data between registers and memory -* how to add two numbers! - -These examples are located at: - -* x86 +* add ** link:userland/arch/x86_64/add.S[] -** link:userland/arch/x86_64/mov.S[] -* arm -** <> ** link:userland/arch/arm/add.S[] ** link:userland/arch/aarch64/add.S[] +* mov between register and memory +** link:userland/arch/x86_64/mov.S[] +** <> +** <> +* addressing modes +** <> +** <> +* registers: <> -These examples use the venerable ADD instruction to: +The add examples in particular: * introduce the basics of how a given assembly works: how many inputs / outputs, who is input and output, can it use memory or just registers, etc. + It is then a big copy paste for most other data instructions. * verify that the venerable `add` instruction and our assertions are working -Then, modify that program to make the assertion fail: +Now try to modify modify the x86_64 add program to see the assertion fail: .... LKMC_ASSERT_EQ(%rax, $4) @@ -11969,6 +11969,47 @@ Examples under `arch//c/` directories show to how use inline assembly from ** link:userland/arch/aarch64/inline_asm/inc.c[] ** link:userland/arch/aarch64/inline_asm/multiline.cpp[] +==== GCC inline assembly register variables + +Used notably in some of the <> setups: + +* link:userland/arch/arm/inline_asm/reg_var.c[] +* link:userland/arch/aarch64/inline_asm/reg_var.c[] +* link:userland/arch/aarch64/inline_asm/reg_var_float.c[] + +In x86, makes it possible to access variables not exposed with the one letter register constraints. + +In arm, it is the only way to achieve this effect: https://stackoverflow.com/questions/10831792/how-to-use-specific-register-in-arm-inline-assembler + +This feature notably useful for making system calls from C, see: <>. + +Documentation: https://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Explicit-Reg-Vars.html + +==== GCC inline assembly scratch registers + +How to use temporary registers in inline assembly: + +* x86_64 +** link:userland/arch/x86_64/inline_asm/scratch.c[] +** link:userland/arch/x86_64/inline_asm/scratch_hardcode.c[] + +Bibliography: https://stackoverflow.com/questions/6682733/gcc-prohibit-use-of-some-registers/54963829#54963829 + +==== GCC inline assembly early-clobbers + +An example of using the `&` early-clobber modifier: link:userland/arch/aarch64/earlyclobber.c + +More details at: https://stackoverflow.com/questions/15819794/when-to-use-earlyclobber-constraint-in-extended-gcc-inline-assembly/54853663#54853663 + +The assertion may fail without it. It actually does fail in GCC 8.2.0. + +==== GCC inline assembly floating point ARM + +Not documented as of GCC 8.2, but possible: https://stackoverflow.com/questions/53960240/armv8-floating-point-output-inline-assembly + +* link:userland/arch/arm/inline_asm/inc_float.c[] +* link:userland/arch/aarch64/inline_asm/inc_float.c[] + ==== GCC intrinsics Pre-existing C wrappers using inline assembly, this is what production programs should use instead of inline assembly for SIMD: @@ -12034,47 +12075,6 @@ Bibliography: * https://www.cs.virginia.edu/~cr4bd/3330/S2018/simdref.html * https://software.intel.com/en-us/articles/how-to-use-intrinsics -==== GCC inline assembly register variables - -Used notably in some of the <> setups: - -* link:userland/arch/arm/inline_asm/reg_var.c[] -* link:userland/arch/aarch64/inline_asm/reg_var.c[] -* link:userland/arch/aarch64/inline_asm/reg_var_float.c[] - -In x86, makes it possible to access variables not exposed with the one letter register constraints. - -In arm, it is the only way to achieve this effect: https://stackoverflow.com/questions/10831792/how-to-use-specific-register-in-arm-inline-assembler - -This feature notably useful for making system calls from C, see: <>. - -Documentation: https://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Explicit-Reg-Vars.html - -==== GCC inline assembly scratch registers - -How to use temporary registers in inline assembly: - -* x86_64 -** link:userland/arch/x86_64/inline_asm/scratch.c[] -** link:userland/arch/x86_64/inline_asm/scratch_hardcode.c[] - -Bibliography: https://stackoverflow.com/questions/6682733/gcc-prohibit-use-of-some-registers/54963829#54963829 - -==== GCC inline assembly early-clobbers - -An example of using the `&` early-clobber modifier: link:userland/arch/aarch64/earlyclobber.c - -More details at: https://stackoverflow.com/questions/15819794/when-to-use-earlyclobber-constraint-in-extended-gcc-inline-assembly/54853663#54853663 - -The assertion may fail without it. It actually does fail in GCC 8.2.0. - -==== GCC inline assembly floating point ARM - -Not documented as of GCC 8.2, but possible: https://stackoverflow.com/questions/53960240/armv8-floating-point-output-inline-assembly - -* link:userland/arch/arm/inline_asm/inc_float.c[] -* link:userland/arch/aarch64/inline_asm/inc_float.c[] - === Linux system calls The following <> programs illustrate how to make system calls: @@ -12285,6 +12285,41 @@ Bibliography: https://stackoverflow.com/questions/27147043/n-suffix-to-branch-in Arch agnostic infrastructure getting started at: <>. +=== x86 addressing modes + +Example: link:userland/arch/x86_64/address_modes.S[] + +Several x86 instructions can calculate addresses of a complex form: + +.... +s:a(b, c, d) +.... + +which expands to: + +.... +a + b + c * d +.... + +Where the instruction encoding allows for: + +* `a`: any 8 or 32-bit general purpose register +* `b`: any 32-bit general purpose register except ESP +* `c`: 1, 2, 4 or 8 (encoded in 2 SIB bits) +* `d`: immediate constant +* `s`: a segment register. Cannot be tested simply from userland, so we won't talk about them here. See: https://github.com/cirosantilli/x86-bare-metal-examples/blob/6606a2647d44bc14e6fd695c0ea2b6b7a5f04ca3/segment_registers_real_mode.S + +The common compiler usage is: + +* `a`: base pointer +* `b`: array offset +* `c` and `d`: struct offset + +Bibliography: + +* <> 3.7.5 "Specifying an Offset" +* https://sourceware.org/binutils/docs-2.18/as/i386_002dMemory.html + === x86 SIMD History: @@ -12348,6 +12383,43 @@ TODO We didn't manage to find a working ARM analogue to < * https://stackoverflow.com/questions/31620375/arm-cortex-a7-returning-pmccntr-0-in-kernel-mode-and-illegal-instruction-in-u/31649809#31649809 * https://blog.regehr.org/archives/794 +=== x86 assembly bibliography + +==== x86 official bibliography + +[[intel-manual]] +===== Intel 64 and IA-32 Architectures Software Developer's Manuals + +We are using the May 2019 version unless otherwise noted. + +There are a few download forms at: https://software.intel.com/en-us/articles/intel-sdm + +The single PDF one is useless however because it does not have a unified ToC nor inter Volume links, so I just download the 4-part one. + +The Volumes are well split, so it is usually easy to guess where you should look into. + +Also I can't find older versions on the website easily, so I just web archive everything. + +[[intel-manual-1]] +====== Intel 64 and IA-32 Architectures Software Developer's Manuals Volume 1 + +Userland basics: http://web.archive.org/web/20190606075544/https://software.intel.com/sites/default/files/managed/a4/60/253665-sdm-vol-1.pdf + +[[intel-manual-2]] +====== Intel 64 and IA-32 Architectures Software Developer's Manuals Volume 2 + +Instruction list: http://web.archive.org/web/20190606075330/https://software.intel.com/sites/default/files/managed/a4/60/325383-sdm-vol-2abcd.pdf + +[[intel-manual-3]] +====== Intel 64 and IA-32 Architectures Software Developer's Manuals Volume 2 + +Kernel land: http://web.archive.org/web/20190606075534/https://software.intel.com/sites/default/files/managed/a4/60/325384-sdm-vol-3abcd.pdf + +[[intel-manual-4]] +====== Intel 64 and IA-32 Architectures Software Developer's Manuals Volume 2 + +Model specific extensions: http://web.archive.org/web/20190606075325/https://software.intel.com/sites/default/files/managed/22/0d/335592-sdm-vol-4.pdf + == ARM userland assembly Arch general getting started at: <>. diff --git a/lkmc/x86_64.h b/lkmc/x86_64.h index de4cce0..96067a0 100644 --- a/lkmc/x86_64.h +++ b/lkmc/x86_64.h @@ -12,6 +12,15 @@ call lkmc_assert_eq_64; \ ; +#define LKMC_ASSERT_EQ_32(general1, general2) \ + mov general2, %edi; \ + push %rdi; \ + mov general1, %edi; \ + pop %rsi; \ + mov $__LINE__, %edx; \ + call lkmc_assert_eq_32; \ +; + #define LKMC_ASSERT_FAIL \ mov $__LINE__, %edi; \ call lkmc_assert_fail; \ diff --git a/userland/arch/x86_64/address_modes.S b/userland/arch/x86_64/address_modes.S new file mode 100644 index 0000000..8556349 --- /dev/null +++ b/userland/arch/x86_64/address_modes.S @@ -0,0 +1,95 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-addressing-modes */ + +#include + +LKMC_PROLOGUE + + /* First we play around with lea which is easier to assert. */ + + /* Full form with immediates: + * + * rbx + rcx * 2 + 4 = + * 3 + 4 * 2 + 4 = + * 3 + 8 + 4 = + * 3 + 8 + 4 = + * 15 + */ + mov $0, %rax + mov $3, %rbx + mov $4, %rcx + /* GAS 2.24 Warning: segment override on `lea' is ineffectual. */ + /*lea %ds:4(%rbx, %rcx, 2), %rax*/ + lea 4(%rbx, %rcx, 2), %rax + LKMC_ASSERT_EQ(%rax, $15) + + /* Omit the mulitplicator d. + * a(b,c) == a(b,c,1) + */ + mov $0, %rax + mov $3, %rbx + mov $4, %rcx + lea 2(%rbx, %rcx), %rax + LKMC_ASSERT_EQ(%rax, $9) + + /* Omit c and d. */ + mov $0, %rax + mov $1, %rbx + lea 2(%rbx), %rax + LKMC_ASSERT_EQ(%rax, $3) + + /* Register only address. We can omit commas. */ + mov $0, %rax + mov $1, %rbx + lea (%rbx), %rax + LKMC_ASSERT_EQ(%rax, $1) + + /* TODO What is this syntax for? Compare to the next example. */ + mov $0, %rax + lea 2(,1), %rax + LKMC_ASSERT_EQ(%rax, $2) + + mov $0, %rax + lea 2, %rax + LKMC_ASSERT_EQ(%rax, $2) + + /* TODO What is this syntax for? Compare to the previous example. */ + mov $0, %rax + lea (2), %rax + LKMC_ASSERT_EQ(%rax, $2) + + mov $0, %rax + mov $3, %rbx + lea 2(,%rbx,4), %rax + LKMC_ASSERT_EQ(%rax, $14) + + /* Expressions like (1 + 1) or more commonly (label + 1) + * can be used like anywhere else: the assembler / linker resolve + * them for us. + */ + mov $1, %rax + lea (1 + 1)(%rax), %rax + LKMC_ASSERT_EQ(%rax, $3) + + /* Now some examples with the label and actual memory movs just for concreteness. */ +.data + myint: .long 0x12345678 +.text + + /* Pointer dereference: To get the actual address instead of the data, use `$`: */ + mov $myint, %rbx + mov (%rbx), %eax + LKMC_ASSERT_EQ_32(%eax, myint) + + /* Regular memory IO is just a subcase of the full addressing mode syntax! */ + mov $0, %rax + movl $0x9ABCDEF0, myint + mov myint, %rax + LKMC_ASSERT_EQ_32(%eax, $0x9ABCDEF0) + + /* Other instructions like add can also use the addressing. */ + movl $1, myint + mov $myint, %rbx + addl $2, (%rbx) + LKMC_ASSERT_EQ_32(myint, $3) + +LKMC_EPILOGUE