mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-26 03:31:36 +01:00
userland: add assembly support
Move arm assembly cheat here, and start some work on x86 cheat as well.
This commit is contained in:
9
userland/arch/aarch64/add.S
Normal file
9
userland/arch/aarch64/add.S
Normal file
@@ -0,0 +1,9 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#data-processing-instructions */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
mov x0, 1
|
||||
add x1, x0, 2
|
||||
ASSERT_EQ(x1, 3)
|
||||
EXIT
|
||||
21
userland/arch/aarch64/adr.S
Normal file
21
userland/arch/aarch64/adr.S
Normal file
@@ -0,0 +1,21 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#adr */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
.data
|
||||
data_label:
|
||||
.word 0x1234678
|
||||
ENTRY
|
||||
/* This is not possible in v7 because the label is in another section.
|
||||
* objdump says that this generates a R_AARCH64_ADR_PRE relocation.
|
||||
* which looks specific to ADR, and therefore makes it more likely
|
||||
* that there was no such relocation in v7.
|
||||
*
|
||||
* This relocation is particularly important because str does not have a
|
||||
* pc-relative mode in ARMv8.
|
||||
*/
|
||||
adr x0, data_label
|
||||
ldr x1, =data_label
|
||||
label:
|
||||
ASSERT_EQ_REG(x0, x1)
|
||||
EXIT
|
||||
13
userland/arch/aarch64/adrp.S
Normal file
13
userland/arch/aarch64/adrp.S
Normal file
@@ -0,0 +1,13 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#adr */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
adrp x0, label
|
||||
adr x1, label
|
||||
label:
|
||||
/* Clear the lower 12 bits. */
|
||||
bic x1, x1, 0xFF
|
||||
bic x1, x1, 0xF00
|
||||
ASSERT_EQ_REG(x0, x1)
|
||||
EXIT
|
||||
@@ -1,13 +0,0 @@
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
int main(void) {
|
||||
uint32_t myvar = 1;
|
||||
__asm__ (
|
||||
"add %[myvar], %[myvar], 1;"
|
||||
: [myvar] "=r" (myvar)
|
||||
:
|
||||
:
|
||||
);
|
||||
assert(myvar == 2);
|
||||
}
|
||||
33
userland/arch/aarch64/beq.S
Normal file
33
userland/arch/aarch64/beq.S
Normal file
@@ -0,0 +1,33 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#cbz */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* cbz == 0 */
|
||||
mov x0, 0
|
||||
cbz x0, 1f
|
||||
FAIL
|
||||
1:
|
||||
|
||||
/* cbz != 0 */
|
||||
mov x0, 1
|
||||
cbz x0, 1f
|
||||
b 2f
|
||||
1:
|
||||
FAIL
|
||||
2:
|
||||
|
||||
/* cbnz != 0 */
|
||||
mov x0, 1
|
||||
cbnz x0, 1f
|
||||
FAIL
|
||||
1:
|
||||
|
||||
/* cbnz == 0 */
|
||||
mov x0, 0
|
||||
cbnz x0, 1f
|
||||
b 2f
|
||||
1:
|
||||
FAIL
|
||||
2:
|
||||
EXIT
|
||||
11
userland/arch/aarch64/bfi.S
Normal file
11
userland/arch/aarch64/bfi.S
Normal file
@@ -0,0 +1,11 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#bfi */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
ldr x0, =0x1122334455667788
|
||||
|
||||
ldr x1, =0xFFFFFFFFFFFFFFFF
|
||||
bfi x1, x0, 16, 32
|
||||
ASSERT_EQ(x1, 0xFFFF55667788FFFF)
|
||||
EXIT
|
||||
39
userland/arch/aarch64/c/asm_from_c.c
Normal file
39
userland/arch/aarch64/c/asm_from_c.c
Normal file
@@ -0,0 +1,39 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#calling-convention */
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
uint64_t my_asm_func(void);
|
||||
/* { return 42; } */
|
||||
__asm__(
|
||||
".global my_asm_func;"
|
||||
"my_asm_func:"
|
||||
"mov x0, 42;"
|
||||
"ret;"
|
||||
);
|
||||
|
||||
/* Now a more complex example that also calls a C function.
|
||||
* We have to store the return value x30 for later because bl modifies it.
|
||||
* https://stackoverflow.com/questions/27941220/push-lr-and-pop-lr-in-arm-arch64/34504752#34504752
|
||||
* We are not modifying any other callee saved register in this function,
|
||||
* since my_c_func is not either (unless GCC has a bug ;-)), so everything else if fine.
|
||||
*/
|
||||
uint64_t my_asm_func_2(void);
|
||||
/* { return my_c_func(); } */
|
||||
__asm__(
|
||||
".global my_asm_func_2;"
|
||||
"my_asm_func_2:"
|
||||
"str x30, [sp, -16]!;"
|
||||
"bl my_c_func;"
|
||||
"ldr x30, [sp], 16;"
|
||||
"ret;"
|
||||
);
|
||||
|
||||
uint64_t my_c_func(void) {
|
||||
return 42;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
assert(my_asm_func() == 42);
|
||||
assert(my_asm_func_2() == 42);
|
||||
}
|
||||
1
userland/arch/aarch64/c/build
Symbolic link
1
userland/arch/aarch64/c/build
Symbolic link
@@ -0,0 +1 @@
|
||||
../build
|
||||
21
userland/arch/aarch64/c/earlyclobber.c
Normal file
21
userland/arch/aarch64/c/earlyclobber.c
Normal file
@@ -0,0 +1,21 @@
|
||||
/* An example of using the '&' earlyclobber modifier.
|
||||
* https://stackoverflow.com/questions/15819794/when-to-use-earlyclobber-constraint-in-extended-gcc-inline-assembly/54853663#54853663
|
||||
* The assertion may fail without it. It actually does fail in GCC 8.2.0 at
|
||||
* 34017bcd0bc96a3cf77f6acba4d58350e67c2694 + 1.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
int main(void) {
|
||||
uint64_t in = 1;
|
||||
uint64_t out;
|
||||
__asm__ (
|
||||
"add %[out], %[in], 1;"
|
||||
"add %[out], %[in], 1;"
|
||||
: [out] "=&r" (out)
|
||||
: [in] "r" (in)
|
||||
:
|
||||
);
|
||||
assert(out == 2);
|
||||
}
|
||||
1
userland/arch/aarch64/c/freestanding/build
Symbolic link
1
userland/arch/aarch64/c/freestanding/build
Symbolic link
@@ -0,0 +1 @@
|
||||
../build
|
||||
37
userland/arch/aarch64/c/freestanding/hello.c
Normal file
37
userland/arch/aarch64/c/freestanding/hello.c
Normal file
@@ -0,0 +1,37 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#freestanding-linux-inline-assembly-system-calls */
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
void _start(void) {
|
||||
uint64_t exit_status;
|
||||
|
||||
/* write */
|
||||
{
|
||||
char msg[] = "hello\n";
|
||||
uint64_t syscall_return;
|
||||
register uint64_t x0 __asm__ ("x0") = 1; /* stdout */
|
||||
register char *x1 __asm__ ("x1") = msg;
|
||||
register uint64_t x2 __asm__ ("x2") = sizeof(msg);
|
||||
register uint64_t x8 __asm__ ("x8") = 64; /* syscall number */
|
||||
__asm__ __volatile__ (
|
||||
"svc 0;"
|
||||
: "+r" (x0)
|
||||
: "r" (x1), "r" (x2), "r" (x8)
|
||||
: "memory"
|
||||
);
|
||||
syscall_return = x0;
|
||||
exit_status = (syscall_return != sizeof(msg));
|
||||
}
|
||||
|
||||
/* exit */
|
||||
{
|
||||
register uint64_t x0 __asm__ ("x0") = exit_status;
|
||||
register uint64_t x8 __asm__ ("x8") = 93;
|
||||
__asm__ __volatile__ (
|
||||
"svc 0;"
|
||||
: "+r" (x0)
|
||||
: "r" (x8)
|
||||
:
|
||||
);
|
||||
}
|
||||
}
|
||||
40
userland/arch/aarch64/c/freestanding/hello_clobbers.c
Normal file
40
userland/arch/aarch64/c/freestanding/hello_clobbers.c
Normal file
@@ -0,0 +1,40 @@
|
||||
/* Like hello.c trying to do it without named register variables.
|
||||
* The code is more complicated, and I was not able to get as efficient,
|
||||
* so better just stick to named register variables.
|
||||
*/
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
void _start(void) {
|
||||
uint64_t exit_status;
|
||||
|
||||
/* write */
|
||||
{
|
||||
char msg[] = "hello\n";
|
||||
uint64_t syscall_return;
|
||||
__asm__ (
|
||||
"mov x0, 1;" /* stdout */
|
||||
"mov x1, %[msg];"
|
||||
"mov x2, %[len];"
|
||||
"mov x8, 64;" /* syscall number */
|
||||
"svc 0;"
|
||||
"mov %[syscall_return], x0;"
|
||||
: [syscall_return] "=r" (syscall_return)
|
||||
: [msg] "p" (msg),
|
||||
[len] "i" (sizeof(msg))
|
||||
: "x0", "x1", "x2", "x8", "memory"
|
||||
);
|
||||
exit_status = (syscall_return != sizeof(msg));
|
||||
}
|
||||
|
||||
/* exit */
|
||||
__asm__ (
|
||||
"mov x0, %[exit_status];"
|
||||
"mov x8, 93;" /* syscall number */
|
||||
"svc 0;"
|
||||
:
|
||||
: [exit_status] "r" (exit_status)
|
||||
: "x0", "x8"
|
||||
);
|
||||
}
|
||||
|
||||
13
userland/arch/aarch64/c/inc.c
Normal file
13
userland/arch/aarch64/c/inc.c
Normal file
@@ -0,0 +1,13 @@
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
int main(void) {
|
||||
uint64_t io = 1;
|
||||
__asm__ (
|
||||
"add %[io], %[io], 1;"
|
||||
: [io] "+r" (io)
|
||||
:
|
||||
:
|
||||
);
|
||||
assert(io == 2);
|
||||
}
|
||||
28
userland/arch/aarch64/c/inc_float.c
Normal file
28
userland/arch/aarch64/c/inc_float.c
Normal file
@@ -0,0 +1,28 @@
|
||||
/* https://stackoverflow.com/questions/53960240/armv8-floating-point-output-inline-assembly
|
||||
*
|
||||
* We use the undocumented %s and %d modifiers!
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
int main(void) {
|
||||
float my_float = 1.5;
|
||||
__asm__ (
|
||||
"fmov s0, 1.0;"
|
||||
"fadd %s[my_float], %s[my_float], s0;"
|
||||
: [my_float] "+w" (my_float)
|
||||
:
|
||||
: "s0"
|
||||
);
|
||||
assert(my_float == 2.5);
|
||||
|
||||
double my_double = 1.5;
|
||||
__asm__ (
|
||||
"fmov d0, 1.0;"
|
||||
"fadd %d[my_double], %d[my_double], d0;"
|
||||
: [my_double] "+w" (my_double)
|
||||
:
|
||||
: "d0"
|
||||
);
|
||||
assert(my_double == 2.5);
|
||||
}
|
||||
18
userland/arch/aarch64/c/multiline.cpp
Normal file
18
userland/arch/aarch64/c/multiline.cpp
Normal file
@@ -0,0 +1,18 @@
|
||||
// https://stackoverflow.com/questions/3666013/how-to-write-multiline-inline-assembly-code-in-gcc-c/54575948#54575948
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
int main(void) {
|
||||
uint64_t io = 0;
|
||||
__asm__ (
|
||||
R"(
|
||||
add %[io], %[io], #1
|
||||
add %[io], %[io], #1
|
||||
)"
|
||||
: [io] "+r" (io)
|
||||
:
|
||||
:
|
||||
);
|
||||
assert(io == 2);
|
||||
}
|
||||
27
userland/arch/aarch64/c/reg_var.c
Normal file
27
userland/arch/aarch64/c/reg_var.c
Normal file
@@ -0,0 +1,27 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#register-variables */
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
int main(void) {
|
||||
register uint32_t x0 __asm__ ("x0");
|
||||
register uint32_t x1 __asm__ ("x1");
|
||||
uint32_t new_x0;
|
||||
uint32_t new_x1;
|
||||
{
|
||||
x0 = 1;
|
||||
x1 = 2;
|
||||
__asm__ (
|
||||
"add %[x0], x0, #1;"
|
||||
"add %[x1], x1, #1;"
|
||||
: [x0] "+r" (x0),
|
||||
[x1] "+r" (x1)
|
||||
:
|
||||
:
|
||||
);
|
||||
new_x0 = x0;
|
||||
new_x1 = x1;
|
||||
}
|
||||
assert(new_x0 == 2);
|
||||
assert(new_x1 == 3);
|
||||
}
|
||||
28
userland/arch/aarch64/c/reg_var_float.c
Normal file
28
userland/arch/aarch64/c/reg_var_float.c
Normal file
@@ -0,0 +1,28 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#register-variables */
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
int main(void) {
|
||||
register double d0 __asm__ ("d0");
|
||||
register double d1 __asm__ ("d1");
|
||||
double new_d0;
|
||||
double new_d1;
|
||||
{
|
||||
d0 = 1.5;
|
||||
d1 = 2.5;
|
||||
__asm__ (
|
||||
"fmov d2, 1.5;"
|
||||
"fadd %d[d0], d0, d2;"
|
||||
"fadd %d[d1], d1, d2;"
|
||||
: [d0] "+w" (d0),
|
||||
[d1] "+w" (d1)
|
||||
:
|
||||
: "d2"
|
||||
);
|
||||
new_d0 = d0;
|
||||
new_d1 = d1;
|
||||
}
|
||||
assert(new_d0 == 3.0);
|
||||
assert(new_d1 == 4.0);
|
||||
}
|
||||
19
userland/arch/aarch64/cbz.S
Normal file
19
userland/arch/aarch64/cbz.S
Normal file
@@ -0,0 +1,19 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#cbz */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* Branch. */
|
||||
mov x0, 0x0
|
||||
cbz x0, ok
|
||||
FAIL
|
||||
ok:
|
||||
|
||||
/* Don't branch. */
|
||||
mov x0, 0x1
|
||||
cbz x0, ko
|
||||
|
||||
EXIT
|
||||
ko:
|
||||
FAIL
|
||||
17
userland/arch/aarch64/comments.S
Normal file
17
userland/arch/aarch64/comments.S
Normal file
@@ -0,0 +1,17 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#comments */
|
||||
|
||||
#include "common.h"
|
||||
ENTRY
|
||||
# mycomment
|
||||
|
||||
/* ARMv8 has // instead of @ as for comments. */
|
||||
// mycomment
|
||||
nop // mycomment
|
||||
|
||||
/* All these fail. Lol, different than v7, no consistency. */
|
||||
#if 0
|
||||
nop # mycomment
|
||||
@ mycomment
|
||||
nop @ mycomment
|
||||
#endif
|
||||
EXIT
|
||||
64
userland/arch/aarch64/common_arch.h
Normal file
64
userland/arch/aarch64/common_arch.h
Normal file
@@ -0,0 +1,64 @@
|
||||
#ifndef COMMON_ARCH_H
|
||||
#define COMMON_ARCH_H
|
||||
|
||||
#define ASSERT_EQ(reg, const) \
|
||||
ldr x11, =const; \
|
||||
cmp reg, x11; \
|
||||
ASSERT(beq); \
|
||||
;
|
||||
|
||||
#define ASSERT_MEMCMP(s1, s2, n) \
|
||||
MEMCMP(s1, s2, n); \
|
||||
ASSERT_EQ(x0, 0); \
|
||||
;
|
||||
|
||||
#define ENTRY \
|
||||
.text; \
|
||||
.global asm_main; \
|
||||
asm_main: \
|
||||
sub sp, sp, 0xA0; \
|
||||
stp x29, x30, [sp]; \
|
||||
stp x27, x28, [sp, 0x10]; \
|
||||
stp x25, x26, [sp, 0x20]; \
|
||||
stp x23, x24, [sp, 0x30]; \
|
||||
stp x21, x22, [sp, 0x40]; \
|
||||
stp x19, x20, [sp, 0x50]; \
|
||||
stp x6, x7, [sp, 0x60]; \
|
||||
stp x4, x5, [sp, 0x70]; \
|
||||
stp x2, x3, [sp, 0x80]; \
|
||||
stp x0, x1, [sp, 0x90]; \
|
||||
asm_main_after_prologue: \
|
||||
;
|
||||
|
||||
#define EXIT \
|
||||
mov w0, 0; \
|
||||
mov w1, 0; \
|
||||
b pass; \
|
||||
fail: \
|
||||
ldr x1, [sp, 0x90]; \
|
||||
str w0, [x1]; \
|
||||
mov w0, 1; \
|
||||
pass: \
|
||||
ldp x19, x20, [sp, 0x50]; \
|
||||
ldp x21, x22, [sp, 0x40]; \
|
||||
ldp x23, x24, [sp, 0x30]; \
|
||||
ldp x25, x26, [sp, 0x20]; \
|
||||
ldp x27, x28, [sp, 0x10]; \
|
||||
ldp x29, x30, [sp]; \
|
||||
add sp, sp, 0xA0; \
|
||||
ret; \
|
||||
;
|
||||
|
||||
#define FAIL \
|
||||
ldr w0, =__LINE__; \
|
||||
b fail; \
|
||||
;
|
||||
|
||||
#define MEMCMP(s1, s2, n) \
|
||||
adr x0, s1; \
|
||||
adr x1, s2; \
|
||||
ldr x2, =n; \
|
||||
bl memcmp; \
|
||||
;
|
||||
|
||||
#endif
|
||||
28
userland/arch/aarch64/cset.S
Normal file
28
userland/arch/aarch64/cset.S
Normal file
@@ -0,0 +1,28 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#cset */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* Test values. */
|
||||
mov x0, 0
|
||||
mov x1, 1
|
||||
|
||||
/* eq is true, set x2 = 1. */
|
||||
cmp x0, x0
|
||||
cset x2, eq
|
||||
ASSERT_EQ(x2, 1)
|
||||
|
||||
/* eq is false, set x2 = 0. */
|
||||
cmp x0, x1
|
||||
cset x2, eq
|
||||
ASSERT_EQ(x2, 0)
|
||||
|
||||
/* Same for ne. */
|
||||
cmp x0, x0
|
||||
cset x2, ne
|
||||
ASSERT_EQ(x2, 0)
|
||||
|
||||
cmp x0, x1
|
||||
cset x2, ne
|
||||
ASSERT_EQ(x2, 1)
|
||||
EXIT
|
||||
1
userland/arch/aarch64/empty.S
Symbolic link
1
userland/arch/aarch64/empty.S
Symbolic link
@@ -0,0 +1 @@
|
||||
../empty.S
|
||||
1
userland/arch/aarch64/fail.S
Symbolic link
1
userland/arch/aarch64/fail.S
Symbolic link
@@ -0,0 +1 @@
|
||||
../fail.S
|
||||
60
userland/arch/aarch64/floating_point.S
Normal file
60
userland/arch/aarch64/floating_point.S
Normal file
@@ -0,0 +1,60 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#advanced-simd-instructions */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* 1.5 + 2.5 == 4.0
|
||||
* using 64-bit double immediates.
|
||||
*/
|
||||
fmov d0, 1.5
|
||||
fmov d1, 2.5
|
||||
fadd d2, d0, d1
|
||||
fmov d3, 4.0
|
||||
/* Unlike VFP vcmp, this stores the status
|
||||
* automatically in the main CPSR.
|
||||
*/
|
||||
fcmp d2, d3
|
||||
ASSERT(beq)
|
||||
|
||||
/* Now with a memory stored value. */
|
||||
.data
|
||||
my_double_0:
|
||||
.double 1.5
|
||||
my_double_1:
|
||||
.double 2.5
|
||||
my_double_sum_expect:
|
||||
.double 4.0
|
||||
.text
|
||||
ldr d0, my_double_0
|
||||
ldr d1, my_double_1
|
||||
fadd d2, d0, d1
|
||||
ldr d3, my_double_sum_expect
|
||||
fcmp d2, d3
|
||||
ASSERT(beq)
|
||||
|
||||
/* Now in 32-bit. */
|
||||
fmov s0, 1.5
|
||||
fmov s1, 2.5
|
||||
fadd s2, s0, s1
|
||||
fmov s3, 4.0
|
||||
fcmp s2, s3
|
||||
ASSERT(beq)
|
||||
|
||||
/* TODO why? What's the point of q then?
|
||||
* Error: operand mismatch -- `fmov q0,1.5'
|
||||
*/
|
||||
#if 0
|
||||
fmov q0, 1.5
|
||||
#endif
|
||||
|
||||
/* Much like integers, immediates are constrained to
|
||||
* fit in 32-byte instructions. TODO exact rules.
|
||||
*
|
||||
* Assembly here would fail with:
|
||||
*
|
||||
* Error: invalid floating-point constant at operand 2
|
||||
*/
|
||||
#if 0
|
||||
fmov d0, 1.23456798
|
||||
#endif
|
||||
EXIT
|
||||
1
userland/arch/aarch64/freestanding/build
Symbolic link
1
userland/arch/aarch64/freestanding/build
Symbolic link
@@ -0,0 +1 @@
|
||||
../build
|
||||
20
userland/arch/aarch64/freestanding/hello.S
Normal file
20
userland/arch/aarch64/freestanding/hello.S
Normal file
@@ -0,0 +1,20 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#linux-system-calls */
|
||||
|
||||
.text
|
||||
.global _start
|
||||
_start:
|
||||
asm_main_after_prologue:
|
||||
/* write */
|
||||
mov x0, 1 /* stdout */
|
||||
adr x1, msg /* buffer */
|
||||
ldr x2, =len /* len */
|
||||
mov x8, 64 /* syscall number */
|
||||
svc 0
|
||||
|
||||
/* exit */
|
||||
mov x0, 0 /* exit status */
|
||||
mov x8, 93 /* syscall number */
|
||||
svc 0
|
||||
msg:
|
||||
.ascii "hello\n"
|
||||
len = . - msg
|
||||
6
userland/arch/aarch64/hello_driver.S
Normal file
6
userland/arch/aarch64/hello_driver.S
Normal file
@@ -0,0 +1,6 @@
|
||||
.text
|
||||
.global asm_main
|
||||
asm_main:
|
||||
asm_main_after_prologue:
|
||||
mov w0, 0
|
||||
ret
|
||||
9
userland/arch/aarch64/immediates.S
Normal file
9
userland/arch/aarch64/immediates.S
Normal file
@@ -0,0 +1,9 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#immediates */
|
||||
|
||||
#include "common.h"
|
||||
ENTRY
|
||||
mov x0, 1
|
||||
mov x0, 0x1
|
||||
mov x0, 1
|
||||
mov x0, 0x1
|
||||
EXIT
|
||||
26
userland/arch/aarch64/movk.S
Normal file
26
userland/arch/aarch64/movk.S
Normal file
@@ -0,0 +1,26 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#movk */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
movk x0, 0x4444, lsl 0
|
||||
movk x0, 0x3333, lsl 16
|
||||
movk x0, 0x2222, lsl 32
|
||||
movk x0, 0x1111, lsl 48
|
||||
ASSERT_EQ(x0, 0x1111222233334444)
|
||||
|
||||
/* Set a label (addresses are 48-bit) with immediates:
|
||||
*
|
||||
* * https://stackoverflow.com/questions/38570495/aarch64-relocation-prefixes
|
||||
* * https://sourceware.org/binutils/docs-2.26/as/AArch64_002dRelocations.html
|
||||
*
|
||||
* This could be used if the label is too far away for
|
||||
* adr relative addressing.
|
||||
*/
|
||||
movz x0, :abs_g2:label /* bits 32-47, overflow check */
|
||||
movk x0, :abs_g1_nc:label /* bits 16-31, no overflow check */
|
||||
movk x0, :abs_g0_nc:label /* bits 0-15, no overflow check */
|
||||
adr x1, label
|
||||
label:
|
||||
ASSERT_EQ_REG(x0, x1)
|
||||
EXIT
|
||||
9
userland/arch/aarch64/movn.S
Normal file
9
userland/arch/aarch64/movn.S
Normal file
@@ -0,0 +1,9 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#movn */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
ldr x0, =0x123456789ABCDEF0
|
||||
movn x0, 0x8888, lsl 16
|
||||
ASSERT_EQ(x0, 0xFFFFFFFF7777FFFF)
|
||||
EXIT
|
||||
78
userland/arch/aarch64/pc.S
Normal file
78
userland/arch/aarch64/pc.S
Normal file
@@ -0,0 +1,78 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#registers */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
#if 0
|
||||
/* Unlike v7, we can't use PC like any other register in ARMv8,
|
||||
* since it is not a general purpose register anymore.
|
||||
*
|
||||
* Only branch instructions can modify the PC.
|
||||
*
|
||||
* B1.2.1 "Registers in AArch64 state" says:
|
||||
*
|
||||
* Software cannot write directly to the PC. It
|
||||
* can only be updated on a branch, exception entry or
|
||||
* exception return.
|
||||
*/
|
||||
ldr pc, =10f
|
||||
FAIL
|
||||
10:
|
||||
#endif
|
||||
#if 0
|
||||
mov x0, pc
|
||||
#endif
|
||||
|
||||
/* LDR PC-relative loads exist in ARMv8, but they have a separate encoding
|
||||
* "LDR (literal)" instead of "LDR (immediate)":
|
||||
* https://stackoverflow.com/questions/28638981/howto-write-pc-relative-adressing-on-arm-asm/54480999#54480999
|
||||
*/
|
||||
ldr x0, pc_relative_ldr
|
||||
b 1f
|
||||
pc_relative_ldr:
|
||||
.quad 0x123456789ABCDEF0
|
||||
1:
|
||||
ASSERT_EQ(x0, 0x123456789ABCDEF0)
|
||||
|
||||
/* Just for fun, we can also use relative numbers instead of labels.
|
||||
* https://reverseengineering.stackexchange.com/questions/17666/how-does-the-ldr-instruction-work-on-arm/20567#20567
|
||||
*/
|
||||
ldr x0, 0x8
|
||||
b 1f
|
||||
.quad 0x123456789ABCDEF0
|
||||
1:
|
||||
ASSERT_EQ(x0, 0x123456789ABCDEF0)
|
||||
|
||||
/* Analogous for b with PC. */
|
||||
mov x0, 0
|
||||
/* Jumps over mov to ASSERT_EQ. */
|
||||
b 8
|
||||
mov x0, 1
|
||||
ASSERT_EQ(x0, 0)
|
||||
|
||||
/* Trying to use the old "LDR (immediate)" PC-relative
|
||||
* syntax does not work.
|
||||
*/
|
||||
#if 0
|
||||
/* 64-bit integer or SP register expected at operand 2 -- `ldr x0,[pc]' */
|
||||
ldr x0, [pc]
|
||||
#endif
|
||||
|
||||
/* There is however no analogue for str. TODO rationale? */
|
||||
#if 0
|
||||
/* Error: invalid addressing mode at operand 2 -- `str x0,pc_relative_str' */
|
||||
str x0, pc_relative_str
|
||||
#endif
|
||||
|
||||
/* You just have to use adr + "STR (register)". */
|
||||
ldr x0, pc_relative_str
|
||||
ASSERT_EQ(x0, 0x0)
|
||||
adr x1, pc_relative_str
|
||||
ldr x0, pc_relative_ldr
|
||||
str x0, [x1]
|
||||
ldr x0, pc_relative_str
|
||||
ASSERT_EQ(x0, 0x123456789ABCDEF0)
|
||||
EXIT
|
||||
.data
|
||||
pc_relative_str:
|
||||
.quad 0x0000000000000000
|
||||
47
userland/arch/aarch64/regs.S
Normal file
47
userland/arch/aarch64/regs.S
Normal file
@@ -0,0 +1,47 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#armv8-registers */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* 31 64-bit eXtended general purpose registers. */
|
||||
mov x0, 0
|
||||
mov x1, 1
|
||||
mov x2, 2
|
||||
mov x3, 3
|
||||
mov x4, 4
|
||||
mov x5, 5
|
||||
mov x6, 6
|
||||
mov x7, 7
|
||||
mov x8, 8
|
||||
mov x9, 9
|
||||
mov x10, 10
|
||||
mov x11, 11
|
||||
mov x12, 12
|
||||
mov x13, 13
|
||||
mov x14, 14
|
||||
mov x15, 15
|
||||
mov x16, 16
|
||||
mov x17, 17
|
||||
mov x18, 18
|
||||
mov x19, 19
|
||||
mov x20, 20
|
||||
mov x21, 21
|
||||
mov x22, 22
|
||||
mov x23, 23
|
||||
mov x24, 24
|
||||
mov x25, 25
|
||||
mov x26, 26
|
||||
mov x27, 27
|
||||
mov x28, 28
|
||||
mov x29, 29
|
||||
|
||||
/* x30 is the link register. BL stores the return address here. */
|
||||
/*mov x30, 30*/
|
||||
|
||||
/* W form addresses the lower 4 bytes word, and zeroes the top. */
|
||||
ldr x0, =0x1111222233334444
|
||||
ldr x1, =0x5555666677778888
|
||||
mov w0, w1
|
||||
ASSERT_EQ(x0, 0x0000000077778888)
|
||||
EXIT
|
||||
28
userland/arch/aarch64/ret.S
Normal file
28
userland/arch/aarch64/ret.S
Normal file
@@ -0,0 +1,28 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#bl */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
mov x0, 1
|
||||
bl inc
|
||||
ASSERT_EQ(x0, 2)
|
||||
bl inc2
|
||||
ASSERT_EQ(x0, 3)
|
||||
bl inc3
|
||||
ASSERT_EQ(x0, 4)
|
||||
EXIT
|
||||
|
||||
/* void inc(uint64_t *i) { (*i)++ } */
|
||||
inc:
|
||||
add x0, x0, 1
|
||||
ret
|
||||
|
||||
/* Same but explicit return register. */
|
||||
inc2:
|
||||
add x0, x0, 1
|
||||
ret x30
|
||||
|
||||
/* Same but with br. */
|
||||
inc3:
|
||||
add x0, x0, 1
|
||||
br x30
|
||||
86
userland/arch/aarch64/simd.S
Normal file
86
userland/arch/aarch64/simd.S
Normal file
@@ -0,0 +1,86 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#advanced-simd-instructions */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* 4x 32-bit integer add.
|
||||
*
|
||||
* s stands for single == 32 bits.
|
||||
*
|
||||
* 1 in ld1 means to load just one register, see:
|
||||
* https://github.com/cirosantilli/arm-assembly-cheat#simd-interleaving
|
||||
*/
|
||||
.data
|
||||
u32_0: .word 0xF111F111, 0xF222F222, 0xF333F333, 0xF444F444
|
||||
u32_1: .word 0x15551555, 0x16661666, 0x17771777, 0x18881888
|
||||
u32_sum_expect: .word 0x06670666, 0x08890888, 0x0AAB0AAA, 0x0CCD0CCC
|
||||
.bss
|
||||
u32_sum: .skip 16
|
||||
.text
|
||||
adr x0, u32_0
|
||||
ld1 {v0.4s}, [x0]
|
||||
adr x1, u32_1
|
||||
ld1 {v1.4s}, [x1]
|
||||
add v2.4s, v0.4s, v1.4s
|
||||
adr x0, u32_sum
|
||||
st1 {v2.4s}, [x0]
|
||||
ASSERT_MEMCMP(u32_sum, u32_sum_expect, 0x10)
|
||||
|
||||
/* 2x 64-bit integer add.
|
||||
*
|
||||
* d stands for double == 64 bits.
|
||||
*/
|
||||
.data
|
||||
u64_0: .quad 0xF1111111F1111111, 0xF2222222F2222222
|
||||
u64_1: .quad 0x1555555515555555, 0x1666666616666666
|
||||
u64_sum_expect: .quad 0x0666666706666666, 0x0888888908888888
|
||||
.bss
|
||||
u64_sum: .skip 16
|
||||
.text
|
||||
adr x0, u64_0
|
||||
ld1 {v0.2d}, [x0]
|
||||
adr x1, u64_1
|
||||
ld1 {v1.2d}, [x1]
|
||||
add v2.2d, v0.2d, v1.2d
|
||||
adr x0, u64_sum
|
||||
st1 {v2.2d}, [x0]
|
||||
ASSERT_MEMCMP(u64_sum, u64_sum_expect, 0x10)
|
||||
|
||||
/* 4x 32-bit float add.
|
||||
*
|
||||
* The only difference between the integer point version
|
||||
* is that we use fadd instead of add.
|
||||
*/
|
||||
.data
|
||||
f32_0: .float 1.5, 2.5, 3.5, 4.5
|
||||
f32_1: .float 5.5, 6.5, 7.5, 8.5
|
||||
f32_sum_expect: .float 7.0, 9.0, 11.0, 13.0
|
||||
.bss
|
||||
f32_sum: .skip 16
|
||||
.text
|
||||
adr x0, f32_0
|
||||
ld1 {v0.4s}, [x0]
|
||||
adr x1, f32_1
|
||||
ld1 {v1.4s}, [x1]
|
||||
fadd v2.4s, v0.4s, v1.4s
|
||||
adr x0, f32_sum
|
||||
st1 {v2.4s}, [x0]
|
||||
ASSERT_MEMCMP(f32_sum, f32_sum_expect, 0x10)
|
||||
|
||||
/* 2x 64-bit float add. */
|
||||
.data
|
||||
f64_0: .double 1.5, 2.5
|
||||
f64_1: .double 5.5, 6.5
|
||||
f64_sum_expect: .double 7.0, 9.0
|
||||
.bss
|
||||
f64_sum: .skip 16
|
||||
.text
|
||||
adr x0, f64_0
|
||||
ld1 {v0.2d}, [x0]
|
||||
adr x1, f64_1
|
||||
ld1 {v1.2d}, [x1]
|
||||
fadd v2.2d, v0.2d, v1.2d
|
||||
adr x0, f64_sum
|
||||
st1 {v2.2d}, [x0]
|
||||
ASSERT_MEMCMP(f64_sum, f64_sum_expect, 0x10)
|
||||
EXIT
|
||||
26
userland/arch/aarch64/simd_interleave.S
Normal file
26
userland/arch/aarch64/simd_interleave.S
Normal file
@@ -0,0 +1,26 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#simd-interleaving */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
.data
|
||||
u32_interleave: .word \
|
||||
0x11111111, 0x55555555, \
|
||||
0x22222222, 0x66666666, \
|
||||
0x33333333, 0x77777777, \
|
||||
0x44444444, 0x88888888
|
||||
u32_interleave_sum_expect: .word \
|
||||
0x66666666, \
|
||||
0x88888888, \
|
||||
0xAAAAAAAA, \
|
||||
0xCCCCCCCC
|
||||
.bss
|
||||
u32_interleave_sum: .skip 16
|
||||
.text
|
||||
adr x0, u32_interleave
|
||||
ld2 {v0.4s, v1.4s}, [x0]
|
||||
add v2.4s, v0.4s, v1.4s
|
||||
adr x0, u32_interleave_sum
|
||||
st1 {v2.4s}, [x0]
|
||||
ASSERT_MEMCMP(u32_interleave_sum, u32_interleave_sum_expect, 0x10)
|
||||
EXIT
|
||||
13
userland/arch/aarch64/str.S
Normal file
13
userland/arch/aarch64/str.S
Normal file
@@ -0,0 +1,13 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#armv8-str */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
ldr x0, myvar
|
||||
ASSERT_EQ(x0, 0x12346789ABCDEF0)
|
||||
#if 0
|
||||
/* Error: invalid addressing mode at operand 2 -- `str x0,myvar' */
|
||||
str x0, myvar
|
||||
#endif
|
||||
EXIT
|
||||
myvar: .quad 0x12346789ABCDEF0
|
||||
17
userland/arch/aarch64/ubfm.S
Normal file
17
userland/arch/aarch64/ubfm.S
Normal file
@@ -0,0 +1,17 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#ubfm */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
ldr x0, =0x1122334455667788
|
||||
|
||||
// lsr alias: imms == 63
|
||||
|
||||
ldr x1, =0xFFFFFFFFFFFFFFFF
|
||||
ubfm x1, x0, 16, 63
|
||||
ASSERT_EQ(x1, 0x0000112233445566)
|
||||
|
||||
ldr x1, =0xFFFFFFFFFFFFFFFF
|
||||
ubfm x1, x0, 32, 63
|
||||
ASSERT_EQ(x1, 0x0000000011223344)
|
||||
EXIT
|
||||
15
userland/arch/aarch64/ubfx.S
Normal file
15
userland/arch/aarch64/ubfx.S
Normal file
@@ -0,0 +1,15 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#ubfx */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
ldr x0, =0x1122334455667788
|
||||
|
||||
ldr x1, =0xFFFFFFFFFFFFFFFF
|
||||
ubfx x1, x0, 8, 16
|
||||
ASSERT_EQ(x1, 0x0000000000006677)
|
||||
|
||||
ldr x1, =0xFFFFFFFFFFFFFFFF
|
||||
ubfx x1, x0, 8, 32
|
||||
ASSERT_EQ(x1, 0x0000000044556677)
|
||||
EXIT
|
||||
51
userland/arch/aarch64/x31.S
Normal file
51
userland/arch/aarch64/x31.S
Normal file
@@ -0,0 +1,51 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#x31 */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* ERROR: can never use the name x31. */
|
||||
#if 0
|
||||
mov x31, 31
|
||||
#endif
|
||||
|
||||
/* mov (register) is an alias for ORR, which accepts xzr. */
|
||||
mov x0, 1
|
||||
mov x0, xzr
|
||||
ASSERT_EQ(x0, 0)
|
||||
|
||||
/* Same encoding as the mov version. */
|
||||
mov x0, 1
|
||||
orr x0, xzr, xzr
|
||||
ASSERT_EQ(x0, 0)
|
||||
|
||||
/* So, orr, which is not an alias, can only take xzr, not sp. */
|
||||
#if 0
|
||||
orr sp, sp, sp
|
||||
#endif
|
||||
|
||||
/* Zero register discards result if written to. */
|
||||
mov x0, 1
|
||||
orr xzr, x0, x0
|
||||
ASSERT_EQ(xzr, 0)
|
||||
|
||||
/* MOV (to/from SP) is an alias for ADD (immediate). */
|
||||
mov x0, sp
|
||||
mov sp, 1
|
||||
/* Alias to add. */
|
||||
mov x1, sp
|
||||
/* Exact same encoding as above. */
|
||||
add x1, sp, 0
|
||||
ASSERT_EQ(x1, 1)
|
||||
mov sp, x0
|
||||
|
||||
/* So, ADD (immediate), which is not an alias, can only take sp, not xzr. */
|
||||
#if 0
|
||||
/* Error: integer register expected in the extended/shifted operand register at operand 3 -- `add xzr,xzr,1' */
|
||||
add xzr, xzr, 1
|
||||
#endif
|
||||
|
||||
/* Note however that ADD (register), unlike ADD (immediate),
|
||||
* does not say anything about SP, and so does accept xzr just fine.
|
||||
*/
|
||||
add xzr, xzr, xzr
|
||||
EXIT
|
||||
58
userland/arch/arm/add.S
Normal file
58
userland/arch/arm/add.S
Normal file
@@ -0,0 +1,58 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#data-processing-instructions */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* Immediate encoding.
|
||||
*
|
||||
* r1 = r0 + 2
|
||||
*/
|
||||
mov r0, 1
|
||||
/* r1 = r0 + 2 */
|
||||
add r1, r0, 2
|
||||
ASSERT_EQ(r1, 3)
|
||||
|
||||
/* If src == dest, we can omit one of them.
|
||||
*
|
||||
* r0 = r0 + 2
|
||||
*/
|
||||
mov r0, 1
|
||||
add r0, 2
|
||||
ASSERT_EQ(r0, 3)
|
||||
|
||||
/* Same as above but explicit. */
|
||||
mov r0, 1
|
||||
add r0, r0, 2
|
||||
ASSERT_EQ(r0, 3)
|
||||
|
||||
#if 0
|
||||
/* But we cannot omit the register if there is a shift when using .syntx unified:
|
||||
* https://github.com/cirosantilli/arm-assembly-cheat#shift-suffixes
|
||||
*/
|
||||
.syntax unified
|
||||
/* Error: garbage following instruction */
|
||||
add r0, r1, lsl 1
|
||||
/* OK */
|
||||
add r0, r0, r1, lsl 1
|
||||
#endif
|
||||
|
||||
/* Register encoding.
|
||||
*
|
||||
* r2 = r0 + r1
|
||||
*/
|
||||
mov r0, 1
|
||||
mov r1, 2
|
||||
add r2, r0, r1
|
||||
ASSERT_EQ(r2, 3)
|
||||
|
||||
/* Register encoding, omit implicit register.
|
||||
*
|
||||
* r1 = r1 + r0
|
||||
*/
|
||||
mov r0, 1
|
||||
mov r1, 2
|
||||
add r1, r0
|
||||
ASSERT_EQ(r1, 3)
|
||||
|
||||
EXIT
|
||||
51
userland/arch/arm/address_modes.S
Normal file
51
userland/arch/arm/address_modes.S
Normal file
@@ -0,0 +1,51 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#addressing-modes */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* Offset mode with immediate. Add 4 to the address register, which ends up
|
||||
* reading myvar2 instead of myvar.
|
||||
*/
|
||||
adr r0, myvar
|
||||
ldr r1, [r0, 4]
|
||||
ASSERT_EQ(r1, 0x9ABCDEF0)
|
||||
/* r0 was not modified. */
|
||||
ASSERT_EQ(r0, myvar)
|
||||
|
||||
/* Pre-indexed mode */
|
||||
adr r0, myvar
|
||||
ldr r1, [r0, 4]!
|
||||
ASSERT_EQ(r1, 0x9ABCDEF0)
|
||||
/* r0 was modified. */
|
||||
ASSERT_EQ(r0, myvar2)
|
||||
|
||||
/* Post-indexed mode */
|
||||
adr r0, myvar
|
||||
ldr r1, [r0], 4
|
||||
ASSERT_EQ(r1, 0x12345678)
|
||||
/* r0 was modified. */
|
||||
ASSERT_EQ(r0, myvar2)
|
||||
|
||||
/* Offset in register. */
|
||||
adr r0, myvar
|
||||
mov r1, 4
|
||||
ldr r2, [r0, r1]
|
||||
ASSERT_EQ(r2, 0x9ABCDEF0)
|
||||
|
||||
/* Offset in shifted register:
|
||||
* r2 =
|
||||
* (r0 + (r1 << 1))
|
||||
* == *(myvar + (2 << 1))
|
||||
* == *(myvar + 4)
|
||||
*/
|
||||
adr r0, myvar
|
||||
mov r1, 2
|
||||
ldr r2, [r0, r1, lsl 1]
|
||||
ASSERT_EQ(r2, 0x9ABCDEF0)
|
||||
|
||||
EXIT
|
||||
myvar:
|
||||
.word 0x12345678
|
||||
myvar2:
|
||||
.word 0x9ABCDEF0
|
||||
33
userland/arch/arm/adr.S
Normal file
33
userland/arch/arm/adr.S
Normal file
@@ -0,0 +1,33 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#adr */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
.data
|
||||
data_label:
|
||||
.word 0x1234678
|
||||
ENTRY
|
||||
adr r0, label
|
||||
/* objdump tells us that this uses the literal pool,
|
||||
* it does not get converted to adr, which is the better
|
||||
* alternative here.
|
||||
*/
|
||||
adr r1, label
|
||||
adrl r2, label
|
||||
label:
|
||||
ASSERT_EQ_REG(r0, r1)
|
||||
ASSERT_EQ_REG(r0, r2)
|
||||
|
||||
#if 0
|
||||
/* Error: symbol .data is in a different section.
|
||||
*
|
||||
* It works however in ARMv8.
|
||||
* I think this means that there is no relocation type
|
||||
* that takes care of this encoding in ARMv8, but there
|
||||
* is one in ARMv8.
|
||||
*
|
||||
* If you have no idea what I'm talking about, read this:
|
||||
* https://stackoverflow.com/questions/3322911/what-do-linkers-do/33690144#33690144
|
||||
*/
|
||||
adr r1, data_label
|
||||
#endif
|
||||
EXIT
|
||||
27
userland/arch/arm/and.S
Normal file
27
userland/arch/arm/and.S
Normal file
@@ -0,0 +1,27 @@
|
||||
/* Bitwise AND. */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* 0x00 && 0xFF == 0x00 */
|
||||
mov r0, 0x00
|
||||
and r0, 0xFF
|
||||
ASSERT_EQ(r0, 0x00)
|
||||
|
||||
/* 0x0F && 0xF0 == 0x00 */
|
||||
mov r0, 0x0F
|
||||
and r0, 0xF0
|
||||
ASSERT_EQ(r0, 0x00)
|
||||
|
||||
/* 0x0F && 0xFF == 0x0F */
|
||||
mov r0, 0x0F
|
||||
and r0, 0xFF
|
||||
ASSERT_EQ(r0, 0x0F)
|
||||
|
||||
/* 0xF0 && 0xFF == 0xF0 */
|
||||
mov r0, 0xF0
|
||||
and r0, 0xFF
|
||||
ASSERT_EQ(r0, 0xF0)
|
||||
|
||||
EXIT
|
||||
9
userland/arch/arm/b.S
Normal file
9
userland/arch/arm/b.S
Normal file
@@ -0,0 +1,9 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#b */
|
||||
|
||||
#include "common.h"
|
||||
ENTRY
|
||||
/* Jump over the fail. 26-bit PC-relative. */
|
||||
b ok
|
||||
FAIL
|
||||
ok:
|
||||
EXIT
|
||||
28
userland/arch/arm/beq.S
Normal file
28
userland/arch/arm/beq.S
Normal file
@@ -0,0 +1,28 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#beq */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* Smaller*/
|
||||
mov r0, 1
|
||||
cmp r0, 2
|
||||
ASSERT(ble)
|
||||
ASSERT(blt)
|
||||
ASSERT(bne)
|
||||
|
||||
/* Equal. */
|
||||
mov r1, 0
|
||||
cmp r1, 0
|
||||
ASSERT(beq)
|
||||
ASSERT(bge)
|
||||
ASSERT(ble)
|
||||
|
||||
/* Greater. */
|
||||
mov r0, 2
|
||||
cmp r0, 1
|
||||
ASSERT(bge)
|
||||
ASSERT(bgt)
|
||||
ASSERT(bne)
|
||||
|
||||
EXIT
|
||||
10
userland/arch/arm/bfi.S
Normal file
10
userland/arch/arm/bfi.S
Normal file
@@ -0,0 +1,10 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#bfi */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
ldr r0, =0x11223344
|
||||
ldr r1, =0xFFFFFFFF
|
||||
bfi r1, r0, 8, 16
|
||||
ASSERT_EQ(r1, 0xFF3344FF)
|
||||
EXIT
|
||||
10
userland/arch/arm/bic.S
Normal file
10
userland/arch/arm/bic.S
Normal file
@@ -0,0 +1,10 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#bic */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* 0x0F & ~0x55 == 0x0F & 0xAA == 0x0A */
|
||||
mov r0, 0x0F
|
||||
bic r0, 0x55
|
||||
ASSERT_EQ(r0, 0x0A)
|
||||
EXIT
|
||||
14
userland/arch/arm/bl.S
Normal file
14
userland/arch/arm/bl.S
Normal file
@@ -0,0 +1,14 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#bl */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
mov r0, 1
|
||||
bl inc
|
||||
ASSERT_EQ(r0, 2)
|
||||
EXIT
|
||||
|
||||
/* void inc(int *i) { (*i)++ } */
|
||||
inc:
|
||||
add r0, 1
|
||||
bx lr
|
||||
1
userland/arch/arm/build
Symbolic link
1
userland/arch/arm/build
Symbolic link
@@ -0,0 +1 @@
|
||||
../build
|
||||
17
userland/arch/arm/c/add.c
Normal file
17
userland/arch/arm/c/add.c
Normal file
@@ -0,0 +1,17 @@
|
||||
/* 1 + 2 == 3 */
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
int main(void) {
|
||||
uint32_t in0 = 1, in1 = 2, out;
|
||||
__asm__ (
|
||||
"add %[out], %[in0], %[in1];"
|
||||
: [out] "=r" (out)
|
||||
: [in0] "r" (in0),
|
||||
[in1] "r" (in1)
|
||||
);
|
||||
assert(in0 == 1);
|
||||
assert(in1 == 2);
|
||||
assert(out == 3);
|
||||
}
|
||||
1
userland/arch/arm/c/build
Symbolic link
1
userland/arch/arm/c/build
Symbolic link
@@ -0,0 +1 @@
|
||||
../build
|
||||
1
userland/arch/arm/c/freestanding/build
Symbolic link
1
userland/arch/arm/c/freestanding/build
Symbolic link
@@ -0,0 +1 @@
|
||||
../build
|
||||
35
userland/arch/arm/c/freestanding/hello.c
Normal file
35
userland/arch/arm/c/freestanding/hello.c
Normal file
@@ -0,0 +1,35 @@
|
||||
#include <inttypes.h>
|
||||
|
||||
void _start(void) {
|
||||
uint32_t exit_status;
|
||||
|
||||
/* write */
|
||||
{
|
||||
char msg[] = "hello\n";
|
||||
uint32_t syscall_return;
|
||||
register uint32_t r0 __asm__ ("r0") = 1; /* stdout */
|
||||
register char *r1 __asm__ ("r1") = msg;
|
||||
register uint32_t r2 __asm__ ("r2") = sizeof(msg);
|
||||
register uint32_t r8 __asm__ ("r7") = 4; /* syscall number */
|
||||
__asm__ __volatile__ (
|
||||
"svc 0;"
|
||||
: "+r" (r0)
|
||||
: "r" (r1), "r" (r2), "r" (r8)
|
||||
: "memory"
|
||||
);
|
||||
syscall_return = r0;
|
||||
exit_status = (syscall_return != sizeof(msg));
|
||||
}
|
||||
|
||||
/* exit */
|
||||
{
|
||||
register uint32_t r0 __asm__ ("r0") = exit_status;
|
||||
register uint32_t r7 __asm__ ("r7") = 1;
|
||||
__asm__ __volatile__ (
|
||||
"svc 0;"
|
||||
: "+r" (r0)
|
||||
: "r" (r7)
|
||||
:
|
||||
);
|
||||
}
|
||||
}
|
||||
15
userland/arch/arm/c/inc.c
Normal file
15
userland/arch/arm/c/inc.c
Normal file
@@ -0,0 +1,15 @@
|
||||
/* Increment a variable in inline assembly. */
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
int main(void) {
|
||||
uint32_t my_local_var = 1;
|
||||
__asm__ (
|
||||
"add %[my_local_var], %[my_local_var], #1;"
|
||||
: [my_local_var] "+r" (my_local_var)
|
||||
:
|
||||
:
|
||||
);
|
||||
assert(my_local_var == 2);
|
||||
}
|
||||
28
userland/arch/arm/c/inc_float.c
Normal file
28
userland/arch/arm/c/inc_float.c
Normal file
@@ -0,0 +1,28 @@
|
||||
/* https://stackoverflow.com/questions/53960240/armv8-floating-point-output-inline-assembly */
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
int main(void) {
|
||||
float my_float = 1.5;
|
||||
__asm__ (
|
||||
"vmov s0, 1.0;"
|
||||
"vadd.f32 %[my_float], %[my_float], s0;"
|
||||
: [my_float] "+t" (my_float)
|
||||
:
|
||||
: "s0"
|
||||
);
|
||||
assert(my_float == 2.5);
|
||||
|
||||
/* Undocumented %P
|
||||
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89482
|
||||
*/
|
||||
double my_double = 1.5;
|
||||
__asm__ (
|
||||
"vmov.f64 d0, 1.0;"
|
||||
"vadd.f64 %P[my_double], %P[my_double], d0;"
|
||||
: [my_double] "+w" (my_double)
|
||||
:
|
||||
: "d0"
|
||||
);
|
||||
assert(my_double == 2.5);
|
||||
}
|
||||
32
userland/arch/arm/c/inc_memory.c
Normal file
32
userland/arch/arm/c/inc_memory.c
Normal file
@@ -0,0 +1,32 @@
|
||||
/* Like inc.c but less good since we do more work ourselves.
|
||||
*
|
||||
* Just doing this to test out the "m" memory constraint.
|
||||
*
|
||||
* GCC 8.2.0 -O0 assembles ldr line to:
|
||||
*
|
||||
* ....
|
||||
* ldr r0, [fp, #-12]
|
||||
* ....
|
||||
*
|
||||
* and `-O3` assembles to:
|
||||
*
|
||||
* ....
|
||||
* ldr r0, [sp]
|
||||
* ....
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
int main(void) {
|
||||
uint32_t my_local_var = 1;
|
||||
__asm__ (
|
||||
"ldr r0, %[my_local_var];"
|
||||
"add r0, r0, #1;"
|
||||
"str r0, %[my_local_var];"
|
||||
: [my_local_var] "+m" (my_local_var)
|
||||
:
|
||||
: "r0"
|
||||
);
|
||||
assert(my_local_var == 2);
|
||||
}
|
||||
25
userland/arch/arm/c/inc_memory_global.c
Normal file
25
userland/arch/arm/c/inc_memory_global.c
Normal file
@@ -0,0 +1,25 @@
|
||||
/* GCC 8.2.0 -O0 and -O3 assembles ldr line to:
|
||||
*
|
||||
* ....
|
||||
* movw r3, #<lower address part>
|
||||
* movt r3, #<higher address part>
|
||||
* ldr r0, [r3]
|
||||
* ....
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
uint32_t my_global_var = 1;
|
||||
|
||||
int main(void) {
|
||||
__asm__ (
|
||||
"ldr r0, %[my_global_var];"
|
||||
"add r0, r0, #1;"
|
||||
"str r0, %[my_global_var];"
|
||||
: [my_global_var] "+m" (my_global_var)
|
||||
:
|
||||
: "r0"
|
||||
);
|
||||
assert(my_global_var == 2);
|
||||
}
|
||||
38
userland/arch/arm/c/reg_var.c
Normal file
38
userland/arch/arm/c/reg_var.c
Normal file
@@ -0,0 +1,38 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#register-variables */
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
int main(void) {
|
||||
register uint32_t r0 __asm__ ("r0");
|
||||
register uint32_t r1 __asm__ ("r1");
|
||||
uint32_t new_r0;
|
||||
uint32_t new_r1;
|
||||
{
|
||||
/* We must set the registers immediately before calling,
|
||||
* without making any function calls in between.
|
||||
*/
|
||||
r0 = 1;
|
||||
r1 = 2;
|
||||
__asm__ (
|
||||
/* We intentionally use an explicit r0 and r1 here,
|
||||
* just to illustrate that we are certain that the
|
||||
* r0 variable will go in r0. Real code would never do this.
|
||||
*/
|
||||
"add %[r0], r0, #1;"
|
||||
"add %[r1], r1, #1;"
|
||||
/* We have to specify r0 in the constraints.*/
|
||||
: [r0] "+r" (r0),
|
||||
[r1] "+r" (r1)
|
||||
:
|
||||
:
|
||||
);
|
||||
/* When we are done, we must immediatly assign
|
||||
* the register variables to regular variables.
|
||||
*/
|
||||
new_r0 = r0;
|
||||
new_r1 = r1;
|
||||
}
|
||||
assert(new_r0 == 2);
|
||||
assert(new_r1 == 3);
|
||||
}
|
||||
59
userland/arch/arm/c_from_asm.S
Normal file
59
userland/arch/arm/c_from_asm.S
Normal file
@@ -0,0 +1,59 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#calling-convention */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
.data
|
||||
puts_s:
|
||||
.asciz "hello puts"
|
||||
printf_format:
|
||||
.asciz "hello printf %x\n"
|
||||
my_array_0:
|
||||
.word 0x11111111, 0x22222222, 0x33333333, 0x44444444
|
||||
my_array_1:
|
||||
.word 0x55555555, 0x66666666, 0x77777777, 0x88888888
|
||||
|
||||
ENTRY
|
||||
/* puts("hello world") */
|
||||
/* r0 is first argument. */
|
||||
ldr r0, =puts_s
|
||||
bl puts
|
||||
/* Check exit statut >= 0 for success. */
|
||||
cmp r0, 0
|
||||
ASSERT(bge)
|
||||
|
||||
/* printf */
|
||||
ldr r0, =printf_format
|
||||
ldr r1, =0x12345678
|
||||
bl printf
|
||||
cmp r0, 0
|
||||
ASSERT(bge)
|
||||
|
||||
/* memcpy and memcmp. */
|
||||
|
||||
/* Smaller. */
|
||||
ldr r0, =my_array_0
|
||||
ldr r1, =my_array_1
|
||||
ldr r2, =0x10
|
||||
bl memcmp
|
||||
cmp r0, 0
|
||||
ASSERT(blt)
|
||||
|
||||
/* Copy. */
|
||||
ldr r0, =my_array_0
|
||||
ldr r1, =my_array_1
|
||||
ldr r2, =0x10
|
||||
bl memcpy
|
||||
|
||||
/* Equal. */
|
||||
ldr r0, =my_array_0
|
||||
ldr r1, =my_array_1
|
||||
ldr r2, =0x10
|
||||
bl memcmp
|
||||
ASSERT_EQ(r0, 0)
|
||||
|
||||
/* exit(0) */
|
||||
mov r0, 0
|
||||
bl exit
|
||||
|
||||
/* Never reached, just for the fail symbol. */
|
||||
EXIT
|
||||
17
userland/arch/arm/clz.S
Normal file
17
userland/arch/arm/clz.S
Normal file
@@ -0,0 +1,17 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#data-processing-instructions */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
ldr r0, =0x7FFFFFFF
|
||||
clz r1, r0
|
||||
ASSERT_EQ(r1, 1)
|
||||
|
||||
ldr r0, =0x3FFFFFFF
|
||||
clz r1, r0
|
||||
ASSERT_EQ(r1, 2)
|
||||
|
||||
ldr r0, =0x1FFFFFFF
|
||||
clz r1, r0
|
||||
ASSERT_EQ(r1, 3)
|
||||
EXIT
|
||||
14
userland/arch/arm/comments.S
Normal file
14
userland/arch/arm/comments.S
Normal file
@@ -0,0 +1,14 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#comments */
|
||||
|
||||
#include "common.h"
|
||||
ENTRY
|
||||
# mycomment
|
||||
@ mycomment
|
||||
/* # only works at the beginning of the line.
|
||||
* Error: garbage following instruction -- `nop #comment'
|
||||
*/
|
||||
#if 0
|
||||
nop # mycomment
|
||||
#endif
|
||||
nop @ mycomment
|
||||
EXIT
|
||||
71
userland/arch/arm/common_arch.h
Normal file
71
userland/arch/arm/common_arch.h
Normal file
@@ -0,0 +1,71 @@
|
||||
#ifndef COMMON_ARCH_H
|
||||
#define COMMON_ARCH_H
|
||||
|
||||
.syntax unified
|
||||
|
||||
/* Assert that a register equals a constant.
|
||||
* * reg: the register to check. Can be r0-r10, but not r11. r11 is overwritten.
|
||||
* * const: the constant to compare to. Only works for literals or labels, not for registers.
|
||||
* For register / register comparision, use ASSERT_EQ_REG.
|
||||
*/
|
||||
#define ASSERT_EQ(reg, const) \
|
||||
ldr r11, =const; \
|
||||
cmp reg, r11; \
|
||||
ASSERT(beq); \
|
||||
;
|
||||
|
||||
/* Assert that two arrays are the same. */
|
||||
#define ASSERT_MEMCMP(s1, s2, n) \
|
||||
MEMCMP(s1, s2, n); \
|
||||
ASSERT_EQ(r0, 0); \
|
||||
;
|
||||
|
||||
/* Store all callee saved registers, and LR in case we make further BL calls.
|
||||
*
|
||||
* Also save the input arguments r0-r3 on the stack, so we can access them later on,
|
||||
* despite those registers being overwritten.
|
||||
*/
|
||||
#define ENTRY \
|
||||
.text; \
|
||||
.global asm_main; \
|
||||
asm_main: \
|
||||
stmdb sp!, {r0-r12, lr}; \
|
||||
asm_main_after_prologue: \
|
||||
;
|
||||
|
||||
/* Meant to be called at the end of ENTRY.*
|
||||
*
|
||||
* Branching to "fail" makes tests fail with exit status 1.
|
||||
*
|
||||
* If EXIT is reached, the program ends successfully.
|
||||
*
|
||||
* Restore LR and bx jump to it to return from asm_main.
|
||||
*/
|
||||
#define EXIT \
|
||||
mov r0, 0; \
|
||||
mov r1, 0; \
|
||||
b pass; \
|
||||
fail: \
|
||||
ldr r1, [sp]; \
|
||||
str r0, [r1]; \
|
||||
mov r0, 1; \
|
||||
pass: \
|
||||
add sp, 16; \
|
||||
ldmia sp!, {r4-r12, lr}; \
|
||||
bx lr; \
|
||||
;
|
||||
|
||||
/* Always fail. */
|
||||
#define FAIL \
|
||||
ldr r0, =__LINE__; \
|
||||
b fail; \
|
||||
;
|
||||
|
||||
#define MEMCMP(s1, s2, n) \
|
||||
ldr r0, =s1; \
|
||||
ldr r1, =s2; \
|
||||
ldr r2, =n; \
|
||||
bl memcmp; \
|
||||
;
|
||||
|
||||
#endif
|
||||
16
userland/arch/arm/cond.S
Normal file
16
userland/arch/arm/cond.S
Normal file
@@ -0,0 +1,16 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#conditional-execution */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
mov r0, 0
|
||||
mov r1, 1
|
||||
cmp r0, 1
|
||||
/* Previous cmp failed, skip this operation. */
|
||||
addeq r1, 1
|
||||
ASSERT_EQ(r1, 1)
|
||||
cmp r0, 0
|
||||
/* Previous passed, do this operation. */
|
||||
addeq r1, 1
|
||||
ASSERT_EQ(r1, 2)
|
||||
EXIT
|
||||
1
userland/arch/arm/empty.S
Symbolic link
1
userland/arch/arm/empty.S
Symbolic link
@@ -0,0 +1 @@
|
||||
../empty.S
|
||||
1
userland/arch/arm/fail.S
Symbolic link
1
userland/arch/arm/fail.S
Symbolic link
@@ -0,0 +1 @@
|
||||
../fail.S
|
||||
1
userland/arch/arm/freestanding/build
Symbolic link
1
userland/arch/arm/freestanding/build
Symbolic link
@@ -0,0 +1 @@
|
||||
../build
|
||||
21
userland/arch/arm/freestanding/hello.S
Normal file
21
userland/arch/arm/freestanding/hello.S
Normal file
@@ -0,0 +1,21 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#linux-system-calls */
|
||||
|
||||
.syntax unified
|
||||
.text
|
||||
.global _start
|
||||
_start:
|
||||
asm_main_after_prologue:
|
||||
/* write */
|
||||
mov r0, 1 /* stdout */
|
||||
adr r1, msg /* buffer */
|
||||
ldr r2, =len /* len */
|
||||
mov r7, 4 /* syscall number */
|
||||
svc 0
|
||||
|
||||
/* exit */
|
||||
mov r0, 0 /* exit status */
|
||||
mov r7, 1 /* syscall number */
|
||||
svc 0
|
||||
msg:
|
||||
.ascii "hello\n"
|
||||
len = . - msg
|
||||
23
userland/arch/arm/hello_driver.S
Normal file
23
userland/arch/arm/hello_driver.S
Normal file
@@ -0,0 +1,23 @@
|
||||
/* Minimal example using driver.
|
||||
*
|
||||
* Controls the exit status of the program.
|
||||
*/
|
||||
|
||||
.syntax unified
|
||||
.text
|
||||
.global asm_main
|
||||
asm_main:
|
||||
asm_main_after_prologue:
|
||||
|
||||
/* Set the return value according to the ARM calling convention. */
|
||||
mov r0, 0
|
||||
|
||||
/* Try some whacky value to see tests break. */
|
||||
/*mov r0, 77*/
|
||||
|
||||
/* Branch to the address at register lr.
|
||||
* That is the return value which was put there by the C driver (likely with a bl).
|
||||
*
|
||||
* X means eXchange encoding from thumb back to ARM, which is what the driver uses.
|
||||
*/
|
||||
bx lr
|
||||
24
userland/arch/arm/immediates.S
Normal file
24
userland/arch/arm/immediates.S
Normal file
@@ -0,0 +1,24 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#immediates */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* This is the default. We hack it in common.h however. */
|
||||
.syntax divided
|
||||
/* These fail. */
|
||||
#if 0
|
||||
mov r0, 1
|
||||
mov r0, 0x1
|
||||
#endif
|
||||
mov r0, #1
|
||||
mov r0, #0x1
|
||||
mov r0, $1
|
||||
mov r0, $0x1
|
||||
.syntax unified
|
||||
mov r0, 1
|
||||
mov r0, 0x1
|
||||
mov r0, 1
|
||||
mov r0, 0x1
|
||||
mov r0, $1
|
||||
mov r0, $0x1
|
||||
EXIT
|
||||
27
userland/arch/arm/inc_array.S
Normal file
27
userland/arch/arm/inc_array.S
Normal file
@@ -0,0 +1,27 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#loop-over-array */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#define NELEM 4
|
||||
#define ELEM_SIZE 4
|
||||
|
||||
.data;
|
||||
my_array:
|
||||
.word 0x11111111, 0x22222222, 0x33333333, 0x44444444
|
||||
my_array_expect:
|
||||
.word 0x11111112, 0x22222223, 0x33333334, 0x44444445
|
||||
|
||||
ENTRY
|
||||
/* Increment. */
|
||||
ldr r0, =my_array
|
||||
mov r1, NELEM
|
||||
increment:
|
||||
ldr r2, [r0]
|
||||
add r2, 1
|
||||
/* Post index usage. */
|
||||
str r2, [r0], ELEM_SIZE
|
||||
sub r1, 1
|
||||
cmp r1, 0
|
||||
bne increment
|
||||
ASSERT_MEMCMP(my_array, my_array_expect, 0x10)
|
||||
EXIT
|
||||
62
userland/arch/arm/ldmia.S
Normal file
62
userland/arch/arm/ldmia.S
Normal file
@@ -0,0 +1,62 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#loop-over-array */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#define NELEM 4
|
||||
#define ELEM_SIZE 4
|
||||
|
||||
.data;
|
||||
my_array_0:
|
||||
.word 0x11111111, 0x22222222, 0x33333333, 0x44444444
|
||||
my_array_1:
|
||||
.word 0x55555555, 0x66666666, 0x77777777, 0x88888888
|
||||
|
||||
ENTRY
|
||||
|
||||
/* Load r1, r2, r3 and r4 starting from the address in r0. Don't change r0 */
|
||||
ldr r0, =my_array_0
|
||||
ldr r1, =0
|
||||
ldr r2, =0
|
||||
ldr r3, =0
|
||||
ldr r4, =0
|
||||
ldmia r0, {r1-r4}
|
||||
ASSERT_EQ(r0, my_array_0)
|
||||
ASSERT_EQ(r1, 0x11111111)
|
||||
ASSERT_EQ(r2, 0x22222222)
|
||||
ASSERT_EQ(r3, 0x33333333)
|
||||
ASSERT_EQ(r4, 0x44444444)
|
||||
|
||||
/* Swapping the order of r1 and r2 on the mnemonic makes no difference to load order.
|
||||
*
|
||||
* But it gives an assembler warning, so we won't do it by default:
|
||||
*
|
||||
* ldmia.S: Assembler messages:
|
||||
* ldmia.S:32: Warning: register range not in ascending order
|
||||
*/
|
||||
#if 0
|
||||
ldr r0, =my_array_0
|
||||
ldr r1, =0
|
||||
ldr r2, =0
|
||||
ldmia r0, {r2,r1}
|
||||
ASSERT_EQ(r1, 0x11111111)
|
||||
ASSERT_EQ(r2, 0x22222222)
|
||||
#endif
|
||||
|
||||
/* Modify the array */
|
||||
ldr r0, =my_array_1
|
||||
ldr r1, =0x55555555
|
||||
ldr r2, =0x66666666
|
||||
ldr r3, =0x77777777
|
||||
ldr r4, =0x88888888
|
||||
stmdb r0, {r1-r4}
|
||||
|
||||
/* Verify that my_array_0 changed and is equal to my_array_1. */
|
||||
MEMCMP(my_array_0, my_array_1, 0x10)
|
||||
ASSERT_EQ(r0, 0)
|
||||
|
||||
/* Load registers and increment r0. */
|
||||
ldr r0, =my_array_0
|
||||
ldmia r0!, {r1-r4}
|
||||
ASSERT_EQ(r0, my_array_1)
|
||||
|
||||
EXIT
|
||||
65
userland/arch/arm/ldr_pseudo.S
Normal file
65
userland/arch/arm/ldr_pseudo.S
Normal file
@@ -0,0 +1,65 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#ldr-pseudo-instruction */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* Mnemonic for a PC relative load:
|
||||
*
|
||||
* ....
|
||||
* ldr r0, [pc, offset]
|
||||
* r0 = myvar
|
||||
* ....
|
||||
*/
|
||||
ldr r0, myvar
|
||||
ASSERT_EQ(r0, 0x12345678)
|
||||
|
||||
/* Mnemonic PC relative load with an offset.
|
||||
* Load myvar2 instead of myvar.
|
||||
*/
|
||||
ldr r0, myvar + 4
|
||||
ASSERT_EQ(r0, 0x9ABCDEF0)
|
||||
|
||||
/* First store the address in r0 using a magic =myvar, which creates
|
||||
* a new variable containing the address and PC-relative addresses it
|
||||
* https://stackoverflow.com/questions/17214962/what-is-the-difference-between-label-equals-sign-and-label-brackets-in-ar
|
||||
*
|
||||
* Use the adr instruction would likely be better for this application however.
|
||||
*
|
||||
* ....
|
||||
* r0 = &myvar
|
||||
* r1 = *r0
|
||||
* ....
|
||||
*/
|
||||
ldr r0, =myvar
|
||||
ldr r1, [r0]
|
||||
ASSERT_EQ(r1, 0x12345678)
|
||||
|
||||
/* More efficiently, use r0 as the address to read, and write to r0 itself. */
|
||||
ldr r0, =myvar
|
||||
ldr r0, [r0]
|
||||
ASSERT_EQ(r0, 0x12345678)
|
||||
|
||||
/* Same as =myvar but store a constant to a register.
|
||||
* Can also be done with movw and movt. */
|
||||
ldr r0, =0x11112222
|
||||
ASSERT_EQ(r0, 0x11112222)
|
||||
|
||||
/* We can also use GAS tolower16 and topper16 and movw and movt
|
||||
* to load the address of myvar into r0 with two immediates.
|
||||
*
|
||||
* This results in one extra 4 byte instruction read from memory,
|
||||
* and one less data read, so it is likely more cache efficient.
|
||||
*
|
||||
* https://sourceware.org/binutils/docs-2.19/as/ARM_002dRelocations.html
|
||||
*/
|
||||
movw r0, #:lower16:myvar
|
||||
movt r0, #:upper16:myvar
|
||||
ldr r1, [r0]
|
||||
ASSERT_EQ(r1, 0x12345678)
|
||||
|
||||
EXIT
|
||||
myvar:
|
||||
.word 0x12345678
|
||||
myvar2:
|
||||
.word 0x9ABCDEF0
|
||||
12
userland/arch/arm/ldrb.S
Normal file
12
userland/arch/arm/ldrb.S
Normal file
@@ -0,0 +1,12 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#ldrh-and-ldrb */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
ldr r0, =myvar
|
||||
mov r1, 0x0
|
||||
ldrb r1, [r0]
|
||||
ASSERT_EQ(r1, 0x00000078)
|
||||
EXIT
|
||||
myvar:
|
||||
.word 0x12345678
|
||||
12
userland/arch/arm/ldrh.S
Normal file
12
userland/arch/arm/ldrh.S
Normal file
@@ -0,0 +1,12 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#ldrh-and-ldrb */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
ldr r0, =myvar
|
||||
mov r1, 0x0
|
||||
ldrh r1, [r0]
|
||||
ASSERT_EQ(r1, 0x00005678)
|
||||
EXIT
|
||||
myvar:
|
||||
.word 0x12345678
|
||||
19
userland/arch/arm/mov.S
Normal file
19
userland/arch/arm/mov.S
Normal file
@@ -0,0 +1,19 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#mov */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* Immediate. */
|
||||
mov r0, 0
|
||||
ASSERT_EQ(r0, 0)
|
||||
mov r0, 1
|
||||
ASSERT_EQ(r0, 1)
|
||||
|
||||
/* Register. */
|
||||
mov r0, 0
|
||||
mov r1, 1
|
||||
mov r1, r0
|
||||
ASSERT_EQ(r1, 0)
|
||||
|
||||
EXIT
|
||||
27
userland/arch/arm/movw.S
Normal file
27
userland/arch/arm/movw.S
Normal file
@@ -0,0 +1,27 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#movw-and-movt */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* movt (top) and movw (TODO what is w) set the higher
|
||||
* and lower 16 bits of the register.
|
||||
*/
|
||||
movw r0, 0xFFFF
|
||||
movt r0, 0x1234
|
||||
add r0, 1
|
||||
ASSERT_EQ(r0, 0x12350000)
|
||||
|
||||
/* movw also zeroes out the top bits, allowing small 16-bit
|
||||
* C constants to be assigned in a single instruction.
|
||||
*
|
||||
* It differs from mov because mov can only encode 8 bits
|
||||
* at a time, while movw can encode 16.
|
||||
*
|
||||
* movt does not modify the lower bits however.
|
||||
*/
|
||||
ldr r0, =0x12345678
|
||||
movw r0, 0x1111
|
||||
ASSERT_EQ(r0, 0x00001111)
|
||||
|
||||
EXIT
|
||||
12
userland/arch/arm/mul.S
Normal file
12
userland/arch/arm/mul.S
Normal file
@@ -0,0 +1,12 @@
|
||||
/* Multiplication. */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* 2 * 3 = 6 */
|
||||
mov r0, 0
|
||||
mov r1, 2
|
||||
mov r2, 3
|
||||
mul r1, r2
|
||||
ASSERT_EQ(r1, 6)
|
||||
EXIT
|
||||
32
userland/arch/arm/nop.S
Normal file
32
userland/arch/arm/nop.S
Normal file
@@ -0,0 +1,32 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#nop */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* Disassembles as:
|
||||
*
|
||||
* ....
|
||||
* nop {0}
|
||||
* ....
|
||||
*
|
||||
* TODO what is the `{0}`?
|
||||
*/
|
||||
nop
|
||||
|
||||
/* Disassembles as:
|
||||
*
|
||||
* ....
|
||||
* nop ; (mov r0, r0)
|
||||
* ....
|
||||
*/
|
||||
mov r0, r0
|
||||
|
||||
/* Disassemble as mov. TODO Why not as nop as in `mov r0, r0`?
|
||||
* Do they have any effect?
|
||||
*/
|
||||
mov r1, r1
|
||||
mov r8, r8
|
||||
|
||||
/* And there are other nops as well? Disassembles as `and`. */
|
||||
and r0, r0, r0
|
||||
EXIT
|
||||
31
userland/arch/arm/push.S
Normal file
31
userland/arch/arm/push.S
Normal file
@@ -0,0 +1,31 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#ldmia */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* Save sp before push. */
|
||||
mov r0, sp
|
||||
|
||||
/* Push. */
|
||||
mov r1, 1
|
||||
mov r2, 2
|
||||
push {r1, r2}
|
||||
|
||||
/* Save sp after push. */
|
||||
mov r1, sp
|
||||
|
||||
/* Restore. */
|
||||
mov r3, 0
|
||||
mov r4, 0
|
||||
pop {r3, r4}
|
||||
ASSERT_EQ(r3, 1)
|
||||
ASSERT_EQ(r4, 2)
|
||||
|
||||
/* Check that stack pointer moved down by 8 bytes
|
||||
* (2 registers x 4 bytes each).
|
||||
*/
|
||||
sub r0, r1
|
||||
ASSERT_EQ(r0, 8)
|
||||
|
||||
EXIT
|
||||
9
userland/arch/arm/rbit.S
Normal file
9
userland/arch/arm/rbit.S
Normal file
@@ -0,0 +1,9 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#rbit */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
ldr r0, =0b00000001001000110100010101100101
|
||||
rbit r1, r0
|
||||
ASSERT_EQ(r1, 0b10100110101000101100010010000000)
|
||||
EXIT
|
||||
69
userland/arch/arm/regs.S
Normal file
69
userland/arch/arm/regs.S
Normal file
@@ -0,0 +1,69 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#registers */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* 13 general purpose registers. */
|
||||
mov r0, 0
|
||||
mov r1, 1
|
||||
mov r2, 2
|
||||
mov r3, 3
|
||||
mov r4, 4
|
||||
mov r5, 5
|
||||
mov r6, 6
|
||||
mov r7, 7
|
||||
mov r8, 8
|
||||
mov r9, 9
|
||||
mov r10, 10
|
||||
mov r11, 11
|
||||
mov r12, 12
|
||||
|
||||
/* * r11: aliased to FP (frame pointer, debug stack trace usage only)
|
||||
* +
|
||||
* I think FP is only a convention with no instruction impact, but TODO:
|
||||
* not mentioned on AAPCS. aarch64 AAPCS mentions it though.
|
||||
* * r13: aliased to SP (stack pointer), what push / pop use
|
||||
* * r14: aliased to LR (link register), what bl writes the return address to
|
||||
* * r15: aliased to PC (program counter), contains the current instruction address
|
||||
*
|
||||
* In ARMv8, SP and PC have dedicated registers in addition to
|
||||
* the 32-general purpose ones. LR is still general purpose as before.
|
||||
*
|
||||
* Therefore, it is possible to use those registers in any place
|
||||
* other registers may be used.
|
||||
*
|
||||
* This is not possible in ARMv8 anymore.
|
||||
*
|
||||
* For example, we can load an address into PC, which is very similar to what B / BX does:
|
||||
* https://stackoverflow.com/questions/32304646/arm-assembly-branch-to-address-inside-register-or-memory/54145818#54145818
|
||||
*/
|
||||
ldr pc, =10f
|
||||
FAIL
|
||||
10:
|
||||
|
||||
/* Same with r15, which is the same as pc. */
|
||||
ldr r15, =10f
|
||||
FAIL
|
||||
10:
|
||||
|
||||
/* Another example with mov reading from pc. */
|
||||
pc_addr:
|
||||
mov r0, pc
|
||||
/* Why sub 8:
|
||||
* https://stackoverflow.com/questions/24091566/why-does-the-arm-pc-register-point-to-the-instruction-after-the-next-one-to-be-e
|
||||
*/
|
||||
sub r0, r0, 8
|
||||
|
||||
/* pc-relative load also just work just like any other register. */
|
||||
ldr r0, [pc]
|
||||
b 1f
|
||||
.word 0x12345678
|
||||
1:
|
||||
ASSERT_EQ(r0, 0x12345678)
|
||||
|
||||
/* We can also use fp in GNU GAS assembly. */
|
||||
mov r11, 0
|
||||
mov fp, 1
|
||||
ASSERT_EQ(r11, 1)
|
||||
EXIT
|
||||
15
userland/arch/arm/rev.S
Normal file
15
userland/arch/arm/rev.S
Normal file
@@ -0,0 +1,15 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#data-processing-instructions */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* All bytes in register. */
|
||||
ldr r0, =0x11223344
|
||||
rev r1, r0
|
||||
ASSERT_EQ(r1, 0x44332211)
|
||||
|
||||
/* Groups of 16-bits. */
|
||||
ldr r0, =0x11223344
|
||||
rev16 r1, r0
|
||||
ASSERT_EQ(r1, 0x22114433)
|
||||
EXIT
|
||||
35
userland/arch/arm/s_suffix.S
Normal file
35
userland/arch/arm/s_suffix.S
Normal file
@@ -0,0 +1,35 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#s-suffix */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* Result is 0, set beq. */
|
||||
movs r0, 0
|
||||
ASSERT(beq)
|
||||
|
||||
/* The opposite. */
|
||||
movs r0, 1
|
||||
ASSERT(bne)
|
||||
|
||||
/* mov without s does not set the status. */
|
||||
movs r0, 0
|
||||
mov r0, 1
|
||||
ASSERT(beq)
|
||||
|
||||
/* movs still moves... */
|
||||
mov r0, 0
|
||||
movs r0, 1
|
||||
ASSERT_EQ(r0, 1)
|
||||
|
||||
/* add: the result is 0. */
|
||||
mov r0, 1
|
||||
adds r0, -1
|
||||
ASSERT(beq)
|
||||
|
||||
/* add: result non 0. */
|
||||
mov r0, 1
|
||||
adds r0, 1
|
||||
ASSERT(bne)
|
||||
|
||||
EXIT
|
||||
79
userland/arch/arm/shift.S
Normal file
79
userland/arch/arm/shift.S
Normal file
@@ -0,0 +1,79 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#shift-suffixes */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* lsr */
|
||||
ldr r0, =0xFFF00FFF
|
||||
mov r1, r0, lsl 8
|
||||
ldr r2, =0xF00FFF00
|
||||
ASSERT_EQ_REG(r1, r2)
|
||||
|
||||
/* lsl */
|
||||
ldr r0, =0xFFF00FFF
|
||||
mov r1, r0, lsr 8
|
||||
ldr r2, =0x00FFF00F
|
||||
ASSERT_EQ_REG(r1, r2)
|
||||
|
||||
/* ror */
|
||||
ldr r0, =0xFFF00FFF
|
||||
mov r1, r0, ror 8
|
||||
ldr r2, =0xFFFFF00F
|
||||
ASSERT_EQ_REG(r1, r2)
|
||||
|
||||
/* asr negative */
|
||||
ldr r0, =0x80000008
|
||||
mov r1, r0, asr 1
|
||||
ldr r2, =0xC0000004
|
||||
ASSERT_EQ_REG(r1, r2)
|
||||
|
||||
/* asr positive */
|
||||
ldr r0, =0x40000008
|
||||
mov r1, r0, asr 1
|
||||
ldr r2, =0x20000004
|
||||
ASSERT_EQ_REG(r1, r2)
|
||||
|
||||
/* There are also direct shift mnemonics for the mov shifts.
|
||||
*
|
||||
* They assembly to the exact same bytes as the mov version
|
||||
*/
|
||||
ldr r0, =0xFFF00FFF
|
||||
lsl r1, r0, 8
|
||||
ldr r2, =0xF00FFF00
|
||||
ASSERT_EQ_REG(r1, r2)
|
||||
|
||||
/* If used with the `mov` instruction, it results in a pure shift,
|
||||
* but the suffixes also exist for all the other data processing instructions.
|
||||
*
|
||||
* Here we illustrate a shifted add instruction which calculates:
|
||||
*
|
||||
* ....
|
||||
* r1 = r1 + (r0 << 1)
|
||||
* ....
|
||||
*/
|
||||
ldr r0, =0x10
|
||||
ldr r1, =0x100
|
||||
add r1, r1, r0, lsl 1
|
||||
ldr r2, =0x00000120
|
||||
ASSERT_EQ_REG(r1, r2)
|
||||
|
||||
/* The shift takes up the same encoding slot as the immediate,
|
||||
* therefore it is not possible to both use an immediate and shift.
|
||||
*
|
||||
* Error: shift expression expected -- `add r1,r0,1,lsl#1'
|
||||
*/
|
||||
#if 0
|
||||
add r1, r0, 1, lsl 1
|
||||
#endif
|
||||
|
||||
/* However, you can still encode shifted bitmasks of
|
||||
* limited width in immediates, so why not just use the
|
||||
* assembler pre-processing for it?
|
||||
*/
|
||||
ldr r1, =0x100
|
||||
add r1, r1, (0x10 << 1)
|
||||
ldr r2, =0x00000120
|
||||
ASSERT_EQ_REG(r1, r2)
|
||||
|
||||
EXIT
|
||||
113
userland/arch/arm/simd.S
Normal file
113
userland/arch/arm/simd.S
Normal file
@@ -0,0 +1,113 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#advanced-simd-instructions */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* vadd.u32
|
||||
*
|
||||
* Add 4x 32-bit unsigned integers in one go.
|
||||
*
|
||||
* q means 128-bits.
|
||||
*
|
||||
* u32 means that we treat memory as uint32_t types.
|
||||
*
|
||||
* 4 is deduced: in 128 bits you can fit 4 u32.
|
||||
*
|
||||
* Observe how the carry is propagated within u32 integers,
|
||||
* but not across them.
|
||||
*/
|
||||
.data
|
||||
u32_0: .word 0xF111F111, 0xF222F222, 0xF333F333, 0xF444F444
|
||||
u32_1: .word 0x15551555, 0x16661666, 0x17771777, 0x18881888
|
||||
u32_sum_expect: .word 0x06670666, 0x08890888, 0x0AAB0AAA, 0x0CCD0CCC
|
||||
.bss
|
||||
u32_sum: .skip 0x10
|
||||
.text
|
||||
ldr r0, =u32_0
|
||||
vld1.32 {q0}, [r0]
|
||||
ldr r0, =u32_1
|
||||
vld1.32 {q1}, [r0]
|
||||
vadd.u32 q2, q0, q1
|
||||
ldr r0, =u32_sum
|
||||
vst1.u32 {q2}, [r0]
|
||||
ASSERT_MEMCMP(u32_sum, u32_sum_expect, 0x10)
|
||||
|
||||
/* vadd.u64: 2x 64-bit unsigned integer add. */
|
||||
.data
|
||||
u64_0: .quad 0xF1111111F1111111, 0xF2222222F2222222
|
||||
u64_1: .quad 0x1555555515555555, 0x1666666616666666
|
||||
u64_sum_expect: .quad 0x0666666706666666, 0x0888888908888888
|
||||
.bss
|
||||
u64_sum: .skip 0x10
|
||||
.text
|
||||
ldr r0, =u64_0
|
||||
vld1.64 {q0}, [r0]
|
||||
ldr r0, =u64_1
|
||||
vld1.64 {q1}, [r0]
|
||||
vadd.u64 q2, q0, q1
|
||||
ldr r0, =u64_sum
|
||||
vst1.u64 {q2}, [r0]
|
||||
ASSERT_MEMCMP(u64_sum, u64_sum_expect, 0x10)
|
||||
|
||||
/* vadd.s64: 2x 64-bit signed integer add. TODO: how to differentiate
|
||||
* it from signed? I think signed and unsigned addition are identical
|
||||
* in two's complement, the only difference is overflow / carry detection
|
||||
* flags. But how do flags work when there are many values being added
|
||||
* at once?
|
||||
*/
|
||||
.data
|
||||
s64_0: .quad -1, -2
|
||||
s64_1: .quad -1, -2
|
||||
s64_sum_expect: .quad -2, -4
|
||||
.bss
|
||||
s64_sum: .skip 0x10
|
||||
.text
|
||||
ldr r0, =s64_0
|
||||
vld1.64 {q0}, [r0]
|
||||
ldr r0, =s64_1
|
||||
vld1.64 {q1}, [r0]
|
||||
vadd.s64 q2, q0, q1
|
||||
ldr r0, =s64_sum
|
||||
vst1.s64 {q2}, [r0]
|
||||
ASSERT_MEMCMP(s64_sum, s64_sum_expect, 0x10)
|
||||
|
||||
/* vadd.f32: 4x 32-bit float add. */
|
||||
.data
|
||||
f32_0: .float 1.5, 2.5, 3.5, 4.5
|
||||
f32_1: .float 5.5, 6.5, 7.5, 8.5
|
||||
f32_sum_expect: .float 7.0, 9.0, 11.0, 13.0
|
||||
.bss
|
||||
f32_sum: .skip 0x10
|
||||
.text
|
||||
ldr r0, =f32_0
|
||||
vld1.32 {q0}, [r0]
|
||||
ldr r0, =f32_1
|
||||
vld1.32 {q1}, [r0]
|
||||
vadd.f32 q2, q0, q1
|
||||
ldr r0, =f32_sum
|
||||
vst1.32 {q2}, [r0]
|
||||
ASSERT_MEMCMP(f32_sum, f32_sum_expect, 0x10)
|
||||
|
||||
/* vadd.f64: 2x 64-bit float add: appears not possible.
|
||||
*
|
||||
* https://stackoverflow.com/questions/36052564/does-arm-support-simd-operations-for-64-bit-floating-point-numbers
|
||||
*/
|
||||
.data
|
||||
f64_0: .double 1.5, 2.5
|
||||
f64_1: .double 5.5, 6.5
|
||||
f64_sum_expect: .double 7.0, 9.0
|
||||
.bss
|
||||
f64_sum: .skip 0x10
|
||||
.text
|
||||
ldr r0, =f64_0
|
||||
vld1.64 {q0}, [r0]
|
||||
ldr r0, =f64_1
|
||||
vld1.64 {q1}, [r0]
|
||||
#if 0
|
||||
/* bad type in Neon instruction -- `vadd.f64 q2,q0,q1' */
|
||||
vadd.f64 q2, q0, q1
|
||||
ldr r0, =f64_sum
|
||||
vst1.64 {q2}, [r0]
|
||||
ASSERT_MEMCMP(f64_sum, f64_sum_expect, 0x10)
|
||||
#endif
|
||||
EXIT
|
||||
60
userland/arch/arm/str.S
Normal file
60
userland/arch/arm/str.S
Normal file
@@ -0,0 +1,60 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#load-and-store-instructions */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
.data;
|
||||
/* Must be in the .data section, since we want to modify it. */
|
||||
myvar:
|
||||
.word 0x12345678
|
||||
|
||||
ENTRY
|
||||
/* r0 will contain the address. */
|
||||
ldr r0, =myvar
|
||||
|
||||
/* Sanity check. */
|
||||
ldr r1, [r0]
|
||||
movw r2, 0x5678
|
||||
movt r2, 0x1234
|
||||
ASSERT_EQ_REG(r1, r2)
|
||||
|
||||
/* Modify the value. */
|
||||
movw r1, 0xDEF0
|
||||
movt r1, 0x9ABC
|
||||
str r1, [r0]
|
||||
|
||||
/* Check that it changed. */
|
||||
ldr r1, [r0]
|
||||
movw r2, 0xDEF0
|
||||
movt r2, 0x9ABC
|
||||
ASSERT_EQ_REG(r1, r2)
|
||||
|
||||
/* Cannot use PC relative addressing to a different segment,
|
||||
* or else it fails with:
|
||||
*
|
||||
* ....
|
||||
* Error: internal_relocation (type: OFFSET_IMM) not fixed up
|
||||
* ....
|
||||
*
|
||||
* https://stackoverflow.com/questions/10094282/internal-relocation-not-fixed-up
|
||||
*/
|
||||
/*ldr r0, myvar*/
|
||||
|
||||
#if 0
|
||||
/* We could in theory write this to set the address of myvar,
|
||||
* but it will always segfault under Linux because the text segment is read-only.
|
||||
* This is however useful in baremetal programming.
|
||||
* This construct is not possible in ARMv8 for str:
|
||||
* https://github.com/cirosantilli/arm-assembly-cheat#armv8-str
|
||||
*/
|
||||
str r1, var_in_same_section
|
||||
var_in_same_section:
|
||||
#endif
|
||||
|
||||
/* = sign just doesn't make sense for str, you can't set the
|
||||
* address of a variable.
|
||||
*/
|
||||
#if 0
|
||||
str r1, =myvar
|
||||
#endif
|
||||
|
||||
EXIT
|
||||
11
userland/arch/arm/sub.S
Normal file
11
userland/arch/arm/sub.S
Normal file
@@ -0,0 +1,11 @@
|
||||
/* Subtraction. */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* 3 - 2 == 1 , register version.*/
|
||||
mov r0, 3
|
||||
mov r1, 2
|
||||
sub r0, r0, r1
|
||||
ASSERT_EQ(r0, 1)
|
||||
EXIT
|
||||
17
userland/arch/arm/thumb.S
Normal file
17
userland/arch/arm/thumb.S
Normal file
@@ -0,0 +1,17 @@
|
||||
/* Illustrates features that are only available in thumb. */
|
||||
|
||||
.syntax unified
|
||||
.text
|
||||
.thumb_func
|
||||
.global asm_main
|
||||
asm_main:
|
||||
asm_main_after_prologue:
|
||||
|
||||
/* CBZ: cmp and branch if zero instruction. Equivalent to CMP + BEQ.
|
||||
* TODO create an interesting assertion here.
|
||||
*/
|
||||
cbz r1, 1f
|
||||
1:
|
||||
|
||||
mov r0, 0
|
||||
bx lr
|
||||
19
userland/arch/arm/tst.S
Normal file
19
userland/arch/arm/tst.S
Normal file
@@ -0,0 +1,19 @@
|
||||
/* Test. Same as ands, but don't store the result, just update flags. */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
|
||||
/* 0x0F && 0xF0 == 0x00, so beq. */
|
||||
mov r0, 0x0F
|
||||
tst r0, 0xF0
|
||||
ASSERT(beq)
|
||||
|
||||
/* bne */
|
||||
mov r0, 0xFF
|
||||
tst r0, 0x0F
|
||||
ASSERT(bne)
|
||||
# r0 was not modified.
|
||||
ASSERT_EQ(r0, 0xFF)
|
||||
|
||||
EXIT
|
||||
90
userland/arch/arm/vcvt.S
Normal file
90
userland/arch/arm/vcvt.S
Normal file
@@ -0,0 +1,90 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#vcvt */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* SIMD positive. */
|
||||
.data
|
||||
vcvt_positive_0: .float 1.25, 2.5, 3.75, 4.0
|
||||
vcvt_positive_expect: .word 1, 2, 3, 4
|
||||
.bss
|
||||
vcvt_positive_result: .skip 0x10
|
||||
.text
|
||||
ldr r0, =vcvt_positive_0
|
||||
vld1.32 {q0}, [r0]
|
||||
vcvt.u32.f32 q1, q0
|
||||
ldr r0, =vcvt_positive_result
|
||||
vst1.32 {q1}, [r0]
|
||||
ASSERT_MEMCMP(vcvt_positive_result, vcvt_positive_expect, 0x10)
|
||||
|
||||
/* SIMD negative. */
|
||||
.data
|
||||
vcvt_negative_0: .float -1.25, -2.5, -3.75, -4.0
|
||||
vcvt_negative_expect: .word -1, -2, -3, -4
|
||||
.bss
|
||||
vcvt_negative_result: .skip 0x10
|
||||
.text
|
||||
ldr r0, =vcvt_negative_0
|
||||
vld1.32 {q0}, [r0]
|
||||
vcvt.s32.f32 q1, q0
|
||||
ldr r0, =vcvt_negative_result
|
||||
vst1.32 {q1}, [r0]
|
||||
ASSERT_MEMCMP(vcvt_negative_result, vcvt_negative_expect, 0x10)
|
||||
|
||||
/* Floating point. */
|
||||
.data
|
||||
vcvt_positive_float_0: .float 1.5, 2.5
|
||||
vcvt_positive_float_expect: .word 1
|
||||
.float 2.5
|
||||
.bss
|
||||
vcvt_positive_float_result: .skip 0x8
|
||||
.text
|
||||
ldr r0, =vcvt_positive_float_0
|
||||
vld1.32 {d0}, [r0]
|
||||
vcvt.u32.f32 s0, s0
|
||||
ldr r0, =vcvt_positive_float_result
|
||||
vst1.32 {d0}, [r0]
|
||||
ASSERT_MEMCMP(vcvt_positive_float_result, vcvt_positive_float_expect, 0x8)
|
||||
|
||||
/* Floating point but with immediates.
|
||||
*
|
||||
* You have to worry of course about representability of
|
||||
* the immediate in 4 bytes, which is even more fun for
|
||||
* floating point numbers :-)
|
||||
*
|
||||
* Doing this mostly to illustrate the joys of vmov.i32.
|
||||
*
|
||||
* For some reason, there is no vmov.i32 sn, only dn.
|
||||
* If you try to use sn, it does the same as .f32 and
|
||||
* stores a float instead. Horrible!
|
||||
*/
|
||||
vmov.f32 d0, 1.5
|
||||
vcvt.u32.f32 s0, s0
|
||||
vmov.i32 d1, 1
|
||||
vcmp.f32 s0, s2
|
||||
vmrs apsr_nzcv, fpscr
|
||||
ASSERT(beq)
|
||||
/* Check that s1 wasn't modified by vcvt. */
|
||||
vmov.f32 s2, 1.5
|
||||
vcmp.f32 s1, s2
|
||||
vmrs apsr_nzcv, fpscr
|
||||
ASSERT(beq)
|
||||
|
||||
/* Floating point double precision. */
|
||||
.data
|
||||
vcvt_positive_double_0: .double 1.5
|
||||
vcvt_positive_double_expect: .word 1
|
||||
.bss
|
||||
vcvt_positive_double_result: .skip 0x8
|
||||
.text
|
||||
ldr r0, =vcvt_positive_double_0
|
||||
vld1.64 {d0}, [r0]
|
||||
vcvt.u32.f64 s0, d0
|
||||
ldr r0, =vcvt_positive_double_result
|
||||
vst1.32 {d0}, [r0]
|
||||
ASSERT_MEMCMP(
|
||||
vcvt_positive_double_result,
|
||||
vcvt_positive_double_expect,
|
||||
0x4
|
||||
)
|
||||
EXIT
|
||||
41
userland/arch/arm/vcvta.S
Normal file
41
userland/arch/arm/vcvta.S
Normal file
@@ -0,0 +1,41 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#vcvta */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
/* SIMD positive. */
|
||||
.data
|
||||
vcvta_positive_0: .float 1.25, 2.5, 3.75, 4.0
|
||||
vcvta_positive_expect: .word 1, 3, 4, 4
|
||||
.bss
|
||||
vcvta_positive_result: .skip 0x10
|
||||
.text
|
||||
ldr r0, =vcvta_positive_0
|
||||
vld1.32 {q0}, [r0]
|
||||
vcvta.u32.f32 q1, q0
|
||||
ldr r0, =vcvta_positive_result
|
||||
vst1.32 {q1}, [r0]
|
||||
ASSERT_MEMCMP(
|
||||
vcvta_positive_result,
|
||||
vcvta_positive_expect,
|
||||
0x10
|
||||
)
|
||||
|
||||
/* SIMD negative. */
|
||||
.data
|
||||
vcvta_negative_0: .float -1.25, -2.5, -3.75, -4.0
|
||||
vcvta_negative_expect: .word -1, -3, -4, -4
|
||||
.bss
|
||||
vcvta_negative_result: .skip 0x10
|
||||
.text
|
||||
ldr r0, =vcvta_negative_0
|
||||
vld1.32 {q0}, [r0]
|
||||
vcvta.s32.f32 q1, q0
|
||||
ldr r0, =vcvta_negative_result
|
||||
vst1.32 {q1}, [r0]
|
||||
ASSERT_MEMCMP(
|
||||
vcvta_negative_result,
|
||||
vcvta_negative_expect,
|
||||
0x10
|
||||
)
|
||||
EXIT
|
||||
46
userland/arch/arm/vcvtr.S
Normal file
46
userland/arch/arm/vcvtr.S
Normal file
@@ -0,0 +1,46 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#vcvtrr */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
.data
|
||||
vcvtr_0: .float 1.25, 2.5, 3.75, 4.0
|
||||
vcvtr_expect_zero: .word 1, 2, 3, 4
|
||||
vcvtr_expect_plus_infinity: .word 2, 3, 4, 4
|
||||
.bss
|
||||
vcvtr_result_zero: .skip 0x10
|
||||
vcvtr_result_plus_infinity: .skip 0x10
|
||||
.text
|
||||
ldr r0, =vcvtr_0
|
||||
vld1.32 {q0}, [r0]
|
||||
|
||||
/* zero */
|
||||
vmrs r0, fpscr
|
||||
orr r0, r0, (3 << 22)
|
||||
vmsr fpscr, r0
|
||||
vcvtr.u32.f32 q1, q0
|
||||
ldr r0, =vcvtr_result_zero
|
||||
vst1.32 {q1}, [r0]
|
||||
ASSERT_MEMCMP(
|
||||
vcvtr_result_zero,
|
||||
vcvtr_expect_zero,
|
||||
0x10
|
||||
)
|
||||
|
||||
#if 0
|
||||
/* TODO why is this not working? Rounds to zero still. */
|
||||
/* plus infinity */
|
||||
vmrs r0, fpscr
|
||||
mov r1, 1
|
||||
bfi r0, r1, 22, 2
|
||||
vmsr fpscr, r0
|
||||
vcvtr.u32.f32 q1, q0
|
||||
ldr r0, =vcvtr_result_plus_infinity
|
||||
vst1.32 {q1}, [r0]
|
||||
ASSERT_MEMCMP(
|
||||
vcvtr_result_plus_infinity,
|
||||
vcvtr_expect_plus_infinity,
|
||||
0x10
|
||||
)
|
||||
#endif
|
||||
EXIT
|
||||
152
userland/arch/arm/vfp.S
Normal file
152
userland/arch/arm/vfp.S
Normal file
@@ -0,0 +1,152 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#vfp
|
||||
* Adapted from: https://mindplusplus.wordpress.com/2013/06/27/arm-vfp-vector-programming-part-2-examples/ */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
.data;
|
||||
a1:
|
||||
.float 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5
|
||||
a2:
|
||||
.float 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5
|
||||
sum:
|
||||
.skip 32
|
||||
sum_expect:
|
||||
.float 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0
|
||||
|
||||
ENTRY
|
||||
/* Minimal single precision floating point example.
|
||||
* TODO: floating point representation constraints due to 4-byte instruction?
|
||||
*/
|
||||
vmov s0, 1.5
|
||||
vmov s1, 2.5
|
||||
vadd.f32 s2, s0, s1
|
||||
vmov s3, 4.0
|
||||
/* Compare two floating point registers. Stores results in fpscr:
|
||||
* (floating point status and control register).
|
||||
*/
|
||||
vcmp.f32 s2, s3
|
||||
/* Move the nzcv bits from fpscr to apsr */
|
||||
vmrs apsr_nzcv, fpscr
|
||||
/* This branch uses the Z bit of apsr, which was set accordingly. */
|
||||
ASSERT(beq)
|
||||
|
||||
/* Now the same from memory with vldr and vstr. */
|
||||
.data
|
||||
my_float_0:
|
||||
.float 1.5
|
||||
my_float_1:
|
||||
.float 2.5
|
||||
my_float_sum_expect:
|
||||
.float 4.0
|
||||
.bss
|
||||
my_float_sum:
|
||||
.skip 4
|
||||
.text
|
||||
ldr r0, =my_float_0
|
||||
vldr s0, [r0]
|
||||
ldr r0, =my_float_1
|
||||
vldr s1, [r0]
|
||||
vadd.f32 s2, s0, s1
|
||||
ldr r0, =my_float_sum
|
||||
vstr.f32 s2, [r0]
|
||||
ASSERT_MEMCMP(my_float_sum, my_float_sum_expect, 4)
|
||||
|
||||
#if 0
|
||||
/* We can't do pseudo vldr as for ldr, fails with:
|
||||
* Error: cannot represent CP_OFF_IMM relocation in this object file format
|
||||
* It works on ARMv8 however, so the relocation must have been added.
|
||||
*/
|
||||
vldr s0, my_float_0
|
||||
#endif
|
||||
|
||||
/* Minimal double precision floating point example. */
|
||||
vmov.f64 d0, 1.5
|
||||
vmov.f64 d1, 2.5
|
||||
vadd.f64 d2, d0, d1
|
||||
vmov.f64 d3, 4.0
|
||||
vcmp.f64 d2, d3
|
||||
vmrs apsr_nzcv, fpscr
|
||||
ASSERT(beq)
|
||||
|
||||
/* vmov can also move to general purpose registers.
|
||||
*
|
||||
* Just remember that we can't use float immediates with general purpose registers:
|
||||
* https://stackoverflow.com/questions/6514537/how-do-i-specify-immediate-floating-point-numbers-with-inline-assembly/52906126#52906126
|
||||
*/
|
||||
mov r1, 2
|
||||
mov r0, 1
|
||||
vmov s0, r0
|
||||
vmov s1, s0
|
||||
vmov r1, s1
|
||||
ASSERT_EQ_REG(r0, r1)
|
||||
|
||||
/* Now a more complex test function. */
|
||||
ldr r0, =sum
|
||||
ldr r1, =a1
|
||||
ldr r2, =a2
|
||||
mov r3, 8
|
||||
bl vec_sum
|
||||
/* The assert works easily because all floats used
|
||||
* have exact base-2 representation.
|
||||
*/
|
||||
ASSERT_MEMCMP(sum, sum_expect, 0x20)
|
||||
EXIT
|
||||
|
||||
/* void vec_sum(float *sum, float *a1, float *a2, int length) {
|
||||
* int i;
|
||||
* for (i=0; i < length; i++)
|
||||
* *(sum+i) = *(a1+i) + *(a2+i);
|
||||
* }
|
||||
*/
|
||||
vec_sum:
|
||||
/* Setup */
|
||||
push {r0, r1, r4, lr}
|
||||
push {r0, r1}
|
||||
mov r0, 1
|
||||
mov r1, 8
|
||||
bl reconfig
|
||||
pop {r0, r1}
|
||||
asr r3, 3
|
||||
|
||||
/* Do the sum. */
|
||||
1:
|
||||
fldmias r1!, {s8-s15}
|
||||
fldmias r2!, {s16-s23}
|
||||
vadd.f32 s24, s8, s16
|
||||
fstmias r0!, {s24-s31}
|
||||
subs r3, r3, 1
|
||||
bne 1b
|
||||
|
||||
/* Teardown. */
|
||||
bl deconfig
|
||||
pop {r0, r1, r4, pc}
|
||||
|
||||
/* inputs:
|
||||
* r0: desired vector stride (1 or 2)
|
||||
* r1: desired vector length (min. 1, max. 8)
|
||||
* outputs: (none)
|
||||
* modified: r0, r1, FPSCR
|
||||
* notes:
|
||||
* r0 and r1 will be truncated before fitting into FPSCR
|
||||
*/
|
||||
reconfig:
|
||||
push {r0-r2}
|
||||
and r0, r0, 3
|
||||
eor r0, r0, 1
|
||||
sub r1, r1, 1
|
||||
and r1, r1, 7
|
||||
mov r0, r0, lsl 20
|
||||
orr r0, r0, r1, lsl 16
|
||||
vmrs r2, fpscr
|
||||
bic r2, 55*65536
|
||||
orr r2, r2, r0
|
||||
vmsr fpscr, r0
|
||||
pop {r0-r2}
|
||||
bx lr
|
||||
|
||||
deconfig:
|
||||
push {r0, r1, lr}
|
||||
mov r0, 1
|
||||
mov r1, 1
|
||||
bl reconfig
|
||||
pop {r0, r1, pc}
|
||||
28
userland/arch/common.h
Normal file
28
userland/arch/common.h
Normal file
@@ -0,0 +1,28 @@
|
||||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
|
||||
/* We define in this header only macros that are the same on all archs. */
|
||||
|
||||
/* common_arch.h contains arch specific macros. */
|
||||
#include "common_arch.h"
|
||||
|
||||
.extern \
|
||||
exit, \
|
||||
printf, \
|
||||
puts \
|
||||
;
|
||||
|
||||
/* Assert that the given branch instruction is taken. */
|
||||
#define ASSERT(branch_if_pass) \
|
||||
branch_if_pass 1f; \
|
||||
FAIL; \
|
||||
1: \
|
||||
;
|
||||
|
||||
/* Assert that a register equals another register. */
|
||||
#define ASSERT_EQ_REG(reg1, reg2) \
|
||||
cmp reg1, reg2; \
|
||||
ASSERT(beq); \
|
||||
;
|
||||
|
||||
#endif
|
||||
6
userland/arch/empty.S
Normal file
6
userland/arch/empty.S
Normal file
@@ -0,0 +1,6 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#about */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
EXIT
|
||||
10
userland/arch/fail.S
Normal file
10
userland/arch/fail.S
Normal file
@@ -0,0 +1,10 @@
|
||||
/* See what happens on test failure. */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
ENTRY
|
||||
#if 0
|
||||
/* Uncomment this to see it fail. */
|
||||
FAIL
|
||||
#endif
|
||||
EXIT
|
||||
17
userland/arch/main.c
Normal file
17
userland/arch/main.c
Normal file
@@ -0,0 +1,17 @@
|
||||
/* This is the main entrypoint for all .S examples. */
|
||||
|
||||
#include "stdio.h"
|
||||
#include "stdint.h"
|
||||
|
||||
#include "lkmc.h"
|
||||
|
||||
int asm_main(uint32_t *line);
|
||||
|
||||
int main(void) {
|
||||
uint32_t ret, line;
|
||||
ret = asm_main(&line);
|
||||
if (ret) {
|
||||
printf("error %d at line %d\n", ret, line);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
16
userland/arch/x86_64/c/add.c
Normal file
16
userland/arch/x86_64/c/add.c
Normal file
@@ -0,0 +1,16 @@
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
int main(void) {
|
||||
uint64_t in1 = 0xFFFFFFFF;
|
||||
uint64_t in2 = 0x1;
|
||||
uint64_t out;
|
||||
__asm__ (
|
||||
"lea (%[in1], %[in2]), %[out];"
|
||||
: [out] "=r" (out)
|
||||
: [in1] "r" (in1),
|
||||
[in2] "r" (in2)
|
||||
:
|
||||
);
|
||||
assert(out == 0x100000000);
|
||||
}
|
||||
@@ -1,3 +1,5 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#your-first-binutils-hack */
|
||||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user