mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-23 02:05:57 +01:00
x86 asm: move string instructions from x86-assembly-cheat
This commit is contained in:
131
README.adoc
131
README.adoc
@@ -12311,6 +12311,17 @@ When reading disassembly, many instructions have either a `.n` or `.w` suffix.
|
||||
|
||||
Bibliography: https://stackoverflow.com/questions/27147043/n-suffix-to-branch-instruction
|
||||
|
||||
=== NOP instructions
|
||||
|
||||
* x86: link:userland/arch/x86_64/nop.S[NOP]
|
||||
* ARM: <<arm-nop-instruction>>
|
||||
|
||||
No OPeration.
|
||||
|
||||
Does nothing except take up one processor cycle and occupy some instruction memory.
|
||||
|
||||
Applications: http://stackoverflow.com/questions/234906/whats-the-purpose-of-the-nop-opcode
|
||||
|
||||
== x86 userland assembly
|
||||
|
||||
Arch agnostic infrastructure getting started at: <<userland-assembly>>.
|
||||
@@ -12354,29 +12365,29 @@ Bibliography:
|
||||
|
||||
<<intel-manual-1>> 5.1.2 "Binary Arithmetic Instructions":
|
||||
|
||||
* link:userland/arch/x86_64/add.S[ADD]
|
||||
** link:userland/arch/x86_64/inc.S[INC]
|
||||
** link:userland/arch/x86_64/adc.S[ADC]
|
||||
* link:userland/arch/x86_64/sub.S[SUB]
|
||||
** link:userland/arch/x86_64/dec.S[DEC]
|
||||
** link:userland/arch/x86_64/sbb.S[SBB]
|
||||
* link:userland/arch/x86_64/mul.S[MUL]
|
||||
** link:userland/arch/x86_64/neg.S[NEG]
|
||||
** link:userland/arch/x86_64/imul.S[IMUL]
|
||||
* link:userland/arch/x86_64/div.S[DIV]
|
||||
** link:userland/arch/x86_64/div_overflow.S[DIV overflow]
|
||||
** link:userland/arch/x86_64/div_zero.S[DIV zero]
|
||||
** link:userland/arch/x86_64/idiv.S[IDIV]
|
||||
* link:userland/arch/x86_64/cmp.S[CMP]
|
||||
* link:userland/arch/x86_64/add.S[]: ADD
|
||||
** link:userland/arch/x86_64/inc.S[]: INC
|
||||
** link:userland/arch/x86_64/adc.S[]: ADC
|
||||
* link:userland/arch/x86_64/sub.S[]: SUB
|
||||
** link:userland/arch/x86_64/dec.S[]: DEC
|
||||
** link:userland/arch/x86_64/sbb.S[]: SBB
|
||||
* link:userland/arch/x86_64/mul.S[]: MUL
|
||||
** link:userland/arch/x86_64/neg.S[]: NEG
|
||||
** link:userland/arch/x86_64/imul.S[]: IMUL
|
||||
* link:userland/arch/x86_64/div.S[]: DIV
|
||||
** link:userland/arch/x86_64/div_overflow.S[]: DIV overflow
|
||||
** link:userland/arch/x86_64/div_zero.S[]: DIV zero
|
||||
** link:userland/arch/x86_64/idiv.S[]: IDIV
|
||||
* link:userland/arch/x86_64/cmp.S[]: CMP
|
||||
|
||||
=== x86 logical instructions
|
||||
|
||||
<<intel-manual-1>> 5.1.4 "Logical Instructions"
|
||||
|
||||
* link:userland/arch/x86_64/and.S[AND]
|
||||
* link:userland/arch/x86_64/not.S[NOT]
|
||||
* link:userland/arch/x86_64/or.S[OR]
|
||||
* link:userland/arch/x86_64/xor.S[XOR]
|
||||
* link:userland/arch/x86_64/and.S[]: AND
|
||||
* link:userland/arch/x86_64/not.S[]: NOT
|
||||
* link:userland/arch/x86_64/or.S[]: OR
|
||||
* link:userland/arch/x86_64/xor.S[]: XOR
|
||||
|
||||
=== x86 shift and rotate instructions
|
||||
|
||||
@@ -12400,10 +12411,10 @@ Keeps the same sign on right shift.
|
||||
Not directly exposed in C, for which signed shift is undetermined behavior, but does exist in Java via the `>>>` operator. C compilers can omit it however.
|
||||
+
|
||||
SHL and SAL are exactly the same and have the same encoding: https://stackoverflow.com/questions/8373415/difference-between-shl-and-sal-in-80x86/56621271#56621271
|
||||
* link:userland/arch/x86_64/rol.S[ROL and ROR]
|
||||
* link:userland/arch/x86_64/rol.S[]: ROL and ROR
|
||||
+
|
||||
Rotates the bit that is going out around to the other side.
|
||||
* link:userland/arch/x86_64/rol.S[RCL and RCR]
|
||||
* link:userland/arch/x86_64/rol.S[]: RCL and RCR
|
||||
+
|
||||
Like ROL and ROR, but insert the carry bit instead, which effectively generates a rotation of 8 + 1 bits. TODO application.
|
||||
|
||||
@@ -12411,26 +12422,28 @@ Like ROL and ROR, but insert the carry bit instead, which effectively generates
|
||||
|
||||
<<intel-manual-1>> 5.1.6 "Bit and Byte Instructions"
|
||||
|
||||
* link:userland/arch/x86_64/bt.S[BT]
|
||||
* link:userland/arch/x86_64/bt.S[]: BT
|
||||
+
|
||||
Bit test: test if the Nth bit a bit of a register is set and store the result in the CF FLAG.
|
||||
+
|
||||
....
|
||||
CF = reg[N]
|
||||
....
|
||||
* link:userland/arch/x86_64/btr.S[BTR]
|
||||
* link:userland/arch/x86_64/btr.S[]: BTR
|
||||
+
|
||||
Do a BT and then set the bit to 0.
|
||||
* link:userland/arch/x86_64/btc.S[BTC]
|
||||
* link:userland/arch/x86_64/btc.S[]: BTC
|
||||
+
|
||||
Do a BT and then swap the value of the tested bit.
|
||||
* link:userland/arch/x86_64/setcc.S[SETcc]
|
||||
* link:userland/arch/x86_64/setcc.S[]: SETcc
|
||||
+
|
||||
Set a a byte of a register to 0 or 1 depending on the cc condition.
|
||||
* link:userland/arch/x86_64/popcnt.S[POPCNT]
|
||||
Set a byte of a register to 0 or 1 depending on the cc condition.
|
||||
+
|
||||
Bibliography: https://stackoverflow.com/questions/1406783/how-to-read-and-write-x86-flags-registers-directly/30952577#30952577
|
||||
* link:userland/arch/x86_64/popcnt.S[]: POPCNT
|
||||
+
|
||||
Count the number of 1 bits.
|
||||
* link:userland/arch/x86_64/test.S[TEST]
|
||||
* link:userland/arch/x86_64/test.S[]: TEST
|
||||
+
|
||||
Like <<x86-binary-arithmetic-instructions,CMP>> but does AND instead of SUB:
|
||||
+
|
||||
@@ -12442,12 +12455,12 @@ ZF = (!(X && Y)) ? 1 : 0
|
||||
|
||||
<<intel-manual-1>> 5.1.7 "Control Transfer Instructions"
|
||||
|
||||
* link:userland/arch/x86_64/jmp.S[JMP]
|
||||
** link:userland/arch/x86_64/jmp_indirect.S[JMP indirect]
|
||||
* link:userland/arch/x86_64/jmp.S[]: JMP
|
||||
** link:userland/arch/x86_64/jmp_indirect.S[]: JMP indirect
|
||||
|
||||
==== x86 Jcc instructions
|
||||
|
||||
link:userland/arch/x86_64/jcc.S[Jcc]
|
||||
link:userland/arch/x86_64/jcc.S[]
|
||||
|
||||
Jump if certain conditions of the flags register are met.
|
||||
|
||||
@@ -12472,29 +12485,61 @@ JG vs JA and JL vs JB:
|
||||
|
||||
==== x86 LOOP instruction
|
||||
|
||||
link:userland/arch/x86_64/loop.S[LOOP]
|
||||
link:userland/arch/x86_64/loop.S[]
|
||||
|
||||
Vs <<x86-jcc-instructions,Jcc>>: https://stackoverflow.com/questions/6805692/x86-assembly-programming-loops-with-ecx-and-loop-instruction-versus-jmp-jcond Holy CISC!
|
||||
|
||||
==== x86 string instructions
|
||||
|
||||
<<intel-manual-1>> 5.1.8 "String Instructions"
|
||||
|
||||
These instructions do some operation on an array item, and automatically update the index to the next item:
|
||||
|
||||
* First example explained in more detail
|
||||
** link:userland/arch/x86_64/stos.S[]: STOS: STOre String: store register to memory. STOSD is called STOSL in GNU GAS as usual: https://stackoverflow.com/questions/6211629/gcc-inline-assembly-error-no-such-instruction-stosd
|
||||
* Further examples
|
||||
** link:userland/arch/x86_64/cmps.S[]: CMPS: CoMPare Strings: compare two values in memory with addresses given by RSI and RDI. Could be used to implement `memcmp`. Store the result in JZ as usual.
|
||||
** link:userland/arch/x86_64/lods.S[]: LODS: LOaD String: load from memory to register.
|
||||
** link:userland/arch/x86_64/movs.S[]: MOVS: MOV String: move from one memory to another with addresses given by RSI and RDI. Could be used to implement `memmov`.
|
||||
** link:userland/arch/x86_64/scas.S[]: SCAS: SCan String: compare memory to the value in a register. Could be used to implement `strchr`.
|
||||
|
||||
The RSI and RDI registers are actually named after these intructions! S is the source of string instructions, D is the destination of string instructions.
|
||||
|
||||
The direction of the index increment depends on the direction flag of the FLAGS register: 0 means forward and 1 means backward: https://stackoverflow.com/questions/9636691/what-are-cld-and-std-for-in-x86-assembly-language-what-does-df-do
|
||||
|
||||
These instructions were originally developed to speed up "string" operations such as those present in the `<string.h>` header of the C standard library.
|
||||
|
||||
However, as computer architecture evolved, those instructions might not offer considerable speedups anymore, and modern glibc such as 2.29 just uses <<x86-simd>> operations instead:, see also: https://stackoverflow.com/questions/33480999/how-can-the-rep-stosb-instruction-execute-faster-than-the-equivalent-loop
|
||||
|
||||
===== x86 REP prefix
|
||||
|
||||
Example: link:userland/arch/x86_64/rep.S[]
|
||||
|
||||
Repeat a string instruction RCX times:
|
||||
|
||||
As the repetitions happen:
|
||||
|
||||
* RCX decreases, until it reaches 0
|
||||
* RDI and RSI increase
|
||||
|
||||
The variants: REPZ, REPNZ (alias REPE, REPNE) repeat a given instruction until something happens.
|
||||
|
||||
REP and REPZ also additionally stop if the comparison operation they repeat fails.
|
||||
|
||||
* REP: INS, OUTS, MOVS, LODS, and STOS
|
||||
* REPZ: CMPS and SCAS
|
||||
|
||||
=== x86 miscellaneous instructions
|
||||
|
||||
<<intel-manual-1>> 5.1.13 "Miscellaneous Instructions"
|
||||
|
||||
==== x86 NOP instruction
|
||||
|
||||
link:userland/arch/x86_64/nop.S[NOP]
|
||||
|
||||
No OPeration.
|
||||
|
||||
Does nothing except take up one processor cycle and occupy some instruction memory.
|
||||
|
||||
Applications: http://stackoverflow.com/questions/234906/whats-the-purpose-of-the-nop-opcode
|
||||
NOP: <<nop-instructions>>
|
||||
|
||||
=== x86 random number generator instructions
|
||||
|
||||
<<intel-manual-1>> 5.1.15 Random Number Generator Instructions
|
||||
|
||||
Example: link:userland/arch/x86_64/rdrand.S[RDRAND]
|
||||
Example: link:userland/arch/x86_64/rdrand.S[]: RDRAND
|
||||
|
||||
If you run that executable multiple times, it prints a random number every time to stdout.
|
||||
|
||||
@@ -12508,7 +12553,7 @@ RDRAND sets the carry flag when data is ready so we must loop if the carry flag
|
||||
|
||||
==== x86 CPUID instruction
|
||||
|
||||
Example: link:userland/arch/x86_64/cpuid.S[CPUID]
|
||||
Example: link:userland/arch/x86_64/cpuid.S[]
|
||||
|
||||
Fills EAX, EBX, ECX and EDX with CPU information.
|
||||
|
||||
@@ -13299,6 +13344,8 @@ See: <<arm-adr-instruction>>.
|
||||
|
||||
==== ARM NOP instruction
|
||||
|
||||
Parent section: <<nop-instructions>>
|
||||
|
||||
There are a few different ways to encode NOP, notably MOV a register into itself, and a dedicated miscellaneous instruction.
|
||||
|
||||
Example: link:userland/arch/arm/nop.S[]
|
||||
|
||||
24
userland/arch/x86_64/cmps.S
Normal file
24
userland/arch/x86_64/cmps.S
Normal file
@@ -0,0 +1,24 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-string-instructions */
|
||||
|
||||
# Compare two arrays
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.section .rodata
|
||||
my_quad_array_1: .quad 1, 2
|
||||
my_quad_array_2: .quad 1, 3
|
||||
LKMC_PROLOGUE
|
||||
mov $0, %r12
|
||||
mov $0, %r13
|
||||
cld
|
||||
lea my_quad_array_1(%rip), %rsi
|
||||
lea my_quad_array_2(%rip), %rdi
|
||||
cmpsq
|
||||
setz %r12b
|
||||
cmpsq
|
||||
setz %r13b
|
||||
/* 1 == 1 */
|
||||
LKMC_ASSERT_EQ(%r12, $1)
|
||||
/* 2 != 3 */
|
||||
LKMC_ASSERT_EQ(%r13, $0)
|
||||
LKMC_EPILOGUE
|
||||
16
userland/arch/x86_64/lods.S
Normal file
16
userland/arch/x86_64/lods.S
Normal file
@@ -0,0 +1,16 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-string-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.section .rodata
|
||||
my_quad_array: .quad 1, 2
|
||||
LKMC_PROLOGUE
|
||||
lea my_quad_array(%rip), %rsi
|
||||
cld
|
||||
lodsq
|
||||
mov %rax, %r12
|
||||
lodsq
|
||||
mov %rax, %r13
|
||||
LKMC_ASSERT_EQ(%r12, $1)
|
||||
LKMC_ASSERT_EQ(%r13, $2)
|
||||
LKMC_EPILOGUE
|
||||
22
userland/arch/x86_64/movs.S
Normal file
22
userland/arch/x86_64/movs.S
Normal file
@@ -0,0 +1,22 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-string-instructions */
|
||||
# # movs
|
||||
|
||||
# Copy one string into another.
|
||||
|
||||
# Input pointed by esi, output by edi.
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.section .rodata
|
||||
src: .quad 1, 2
|
||||
.bss
|
||||
dest: .skip 16
|
||||
LKMC_PROLOGUE
|
||||
cld
|
||||
lea src(%rip), %rsi
|
||||
lea dest(%rip), %rdi
|
||||
movsq
|
||||
movsq
|
||||
LKMC_ASSERT_EQ(dest + 0, $1)
|
||||
LKMC_ASSERT_EQ(dest + 8, $2)
|
||||
LKMC_EPILOGUE
|
||||
@@ -1,4 +1,4 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-nop-instruction */
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#nop-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
|
||||
73
userland/arch/x86_64/rep.S
Normal file
73
userland/arch/x86_64/rep.S
Normal file
@@ -0,0 +1,73 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-rep-prefix */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.bss
|
||||
src: .skip 16
|
||||
dst: .skip 16
|
||||
LKMC_PROLOGUE
|
||||
|
||||
/* memset: REP STOSQ */
|
||||
cld
|
||||
lea dst(%rip), %rdi
|
||||
/* 2 elements. */
|
||||
mov $2, %rcx
|
||||
/* Set every element to 42. */
|
||||
mov $0x2A, %rax
|
||||
rep stosq
|
||||
/* RCX was decremented down to zero. */
|
||||
LKMC_ASSERT_EQ(%rcx, $0)
|
||||
/* And the memory was set. */
|
||||
LKMC_ASSERT_EQ(dst + 0, $0x2A)
|
||||
LKMC_ASSERT_EQ(dst + 8, $0x2A)
|
||||
|
||||
/* memcpy: REP MOVSQ */
|
||||
cld
|
||||
movq $2, src + 0
|
||||
movq $3, src + 8
|
||||
lea src(%rip), %rsi
|
||||
lea dst(%rip), %rdi
|
||||
mov $2, %rcx
|
||||
rep movsq
|
||||
LKMC_ASSERT_EQ(dst + 0, $2)
|
||||
LKMC_ASSERT_EQ(dst + 8, $3)
|
||||
|
||||
/* memcmp: REPZ CMPSQ */
|
||||
|
||||
/* Setup src. */
|
||||
movl $2, src + 0x0
|
||||
movl $3, src + 0x4
|
||||
movl $4, src + 0x8
|
||||
movl $5, src + 0xA
|
||||
|
||||
/* Equal. */
|
||||
movl $2, dst + 0x0
|
||||
movl $3, dst + 0x4
|
||||
movl $4, dst + 0x8
|
||||
movl $5, dst + 0xA
|
||||
cld
|
||||
mov $src, %rsi
|
||||
mov $dst, %rdi
|
||||
mov $4, %rcx
|
||||
repz cmpsl
|
||||
mov %rcx, %r12
|
||||
/* Last flag was equal. */
|
||||
LKMC_ASSERT(jz)
|
||||
/* RCX was decreased all the way to zero. */
|
||||
LKMC_ASSERT_EQ(%r12, $0)
|
||||
|
||||
/* Different. */
|
||||
movl $2, dst + 0x0
|
||||
movl $3, dst + 0x4
|
||||
movl $2, dst + 0x8
|
||||
movl $5, dst + 0xA
|
||||
mov $src, %rsi
|
||||
mov $dst, %rdi
|
||||
mov $4, %rcx
|
||||
repz cmpsl
|
||||
mov %rcx, %r12
|
||||
LKMC_ASSERT(jnz)
|
||||
/* We stopped half-way with 1 comparision missing. */
|
||||
LKMC_ASSERT_EQ(%r12, $1)
|
||||
|
||||
LKMC_EPILOGUE
|
||||
25
userland/arch/x86_64/scas.S
Normal file
25
userland/arch/x86_64/scas.S
Normal file
@@ -0,0 +1,25 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-string-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.section .rodata
|
||||
my_quad_array: .quad 1, 2
|
||||
LKMC_PROLOGUE
|
||||
mov $0, %r12
|
||||
mov $0, %r13
|
||||
/* RDI holds the address. */
|
||||
lea my_quad_array(%rip), %rdi
|
||||
cld
|
||||
mov $1, %rax
|
||||
/* Compare RAX to *RDI (1 == 1) */
|
||||
scasq
|
||||
setz %r12b
|
||||
mov $3, %rax
|
||||
/* Compare RAX to *RDI (3 == 2) */
|
||||
scasq
|
||||
setz %r13b
|
||||
/* 1 == 1 */
|
||||
LKMC_ASSERT_EQ(%r12, $1)
|
||||
/* 2 != 3 */
|
||||
LKMC_ASSERT_EQ(%r13, $0)
|
||||
LKMC_EPILOGUE
|
||||
62
userland/arch/x86_64/stos.S
Normal file
62
userland/arch/x86_64/stos.S
Normal file
@@ -0,0 +1,62 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-string-instructions */
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
.data
|
||||
my_quad_array: .quad 0, 0
|
||||
my_quad_array_expect_forward: .quad 1, 2
|
||||
my_quad_array_expect_backwards: .quad 4, 3
|
||||
LKMC_PROLOGUE
|
||||
|
||||
/* Clear the direction flag: move forward. */
|
||||
cld
|
||||
|
||||
/* The target address is stored in RDI. */
|
||||
lea my_quad_array(%rip), %rdi
|
||||
|
||||
/* my_quad_array[0] = 1 */
|
||||
mov $1, %rax
|
||||
/* RAX is automatically used as the source. */
|
||||
stosq
|
||||
|
||||
/* my_quad_array[1] = 2 */
|
||||
mov $2, %rax
|
||||
stosq
|
||||
|
||||
/* RDI moved 2x 8 bytes forward. */
|
||||
sub $my_quad_array, %rdi
|
||||
LKMC_ASSERT_EQ(%rdi, $0x10)
|
||||
|
||||
/* The memory was modified. */
|
||||
LKMC_ASSERT_MEMCMP(
|
||||
my_quad_array,
|
||||
my_quad_array_expect_forward,
|
||||
$0x10
|
||||
)
|
||||
|
||||
/* Now with backwards direction. */
|
||||
std
|
||||
|
||||
/* The target address is stored in RDI. */
|
||||
lea (my_quad_array + 8)(%rip), %rdi
|
||||
|
||||
/* my_quad_array[1] = 3 */
|
||||
mov $3, %rax
|
||||
stosq
|
||||
|
||||
/* my_quad_array[0] = 4 */
|
||||
mov $4, %rax
|
||||
stosq
|
||||
|
||||
/* RDI moved 2x 8 bytes backwards. */
|
||||
sub $my_quad_array, %rdi
|
||||
LKMC_ASSERT_EQ(%rdi, $-0x8)
|
||||
|
||||
/* The memory was modified. */
|
||||
LKMC_ASSERT_MEMCMP(
|
||||
my_quad_array,
|
||||
my_quad_array_expect_backwards,
|
||||
$0x10
|
||||
)
|
||||
|
||||
LKMC_EPILOGUE
|
||||
Reference in New Issue
Block a user