userland: add assembly support

Move arm assembly cheat here, and start some work on x86 cheat as well.
This commit is contained in:
Ciro Santilli 六四事件 法轮功
2019-03-22 00:00:00 +00:00
parent 4943c9ed2e
commit 287c83f3f9
117 changed files with 3870 additions and 547 deletions

58
userland/arch/arm/add.S Normal file
View File

@@ -0,0 +1,58 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#data-processing-instructions */
#include "common.h"
ENTRY
/* Immediate encoding.
*
* r1 = r0 + 2
*/
mov r0, 1
/* r1 = r0 + 2 */
add r1, r0, 2
ASSERT_EQ(r1, 3)
/* If src == dest, we can omit one of them.
*
* r0 = r0 + 2
*/
mov r0, 1
add r0, 2
ASSERT_EQ(r0, 3)
/* Same as above but explicit. */
mov r0, 1
add r0, r0, 2
ASSERT_EQ(r0, 3)
#if 0
/* But we cannot omit the register if there is a shift when using .syntx unified:
* https://github.com/cirosantilli/arm-assembly-cheat#shift-suffixes
*/
.syntax unified
/* Error: garbage following instruction */
add r0, r1, lsl 1
/* OK */
add r0, r0, r1, lsl 1
#endif
/* Register encoding.
*
* r2 = r0 + r1
*/
mov r0, 1
mov r1, 2
add r2, r0, r1
ASSERT_EQ(r2, 3)
/* Register encoding, omit implicit register.
*
* r1 = r1 + r0
*/
mov r0, 1
mov r1, 2
add r1, r0
ASSERT_EQ(r1, 3)
EXIT

View File

@@ -0,0 +1,51 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#addressing-modes */
#include "common.h"
ENTRY
/* Offset mode with immediate. Add 4 to the address register, which ends up
* reading myvar2 instead of myvar.
*/
adr r0, myvar
ldr r1, [r0, 4]
ASSERT_EQ(r1, 0x9ABCDEF0)
/* r0 was not modified. */
ASSERT_EQ(r0, myvar)
/* Pre-indexed mode */
adr r0, myvar
ldr r1, [r0, 4]!
ASSERT_EQ(r1, 0x9ABCDEF0)
/* r0 was modified. */
ASSERT_EQ(r0, myvar2)
/* Post-indexed mode */
adr r0, myvar
ldr r1, [r0], 4
ASSERT_EQ(r1, 0x12345678)
/* r0 was modified. */
ASSERT_EQ(r0, myvar2)
/* Offset in register. */
adr r0, myvar
mov r1, 4
ldr r2, [r0, r1]
ASSERT_EQ(r2, 0x9ABCDEF0)
/* Offset in shifted register:
* r2 =
* (r0 + (r1 << 1))
* == *(myvar + (2 << 1))
* == *(myvar + 4)
*/
adr r0, myvar
mov r1, 2
ldr r2, [r0, r1, lsl 1]
ASSERT_EQ(r2, 0x9ABCDEF0)
EXIT
myvar:
.word 0x12345678
myvar2:
.word 0x9ABCDEF0

33
userland/arch/arm/adr.S Normal file
View File

@@ -0,0 +1,33 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#adr */
#include "common.h"
.data
data_label:
.word 0x1234678
ENTRY
adr r0, label
/* objdump tells us that this uses the literal pool,
* it does not get converted to adr, which is the better
* alternative here.
*/
adr r1, label
adrl r2, label
label:
ASSERT_EQ_REG(r0, r1)
ASSERT_EQ_REG(r0, r2)
#if 0
/* Error: symbol .data is in a different section.
*
* It works however in ARMv8.
* I think this means that there is no relocation type
* that takes care of this encoding in ARMv8, but there
* is one in ARMv8.
*
* If you have no idea what I'm talking about, read this:
* https://stackoverflow.com/questions/3322911/what-do-linkers-do/33690144#33690144
*/
adr r1, data_label
#endif
EXIT

27
userland/arch/arm/and.S Normal file
View File

@@ -0,0 +1,27 @@
/* Bitwise AND. */
#include "common.h"
ENTRY
/* 0x00 && 0xFF == 0x00 */
mov r0, 0x00
and r0, 0xFF
ASSERT_EQ(r0, 0x00)
/* 0x0F && 0xF0 == 0x00 */
mov r0, 0x0F
and r0, 0xF0
ASSERT_EQ(r0, 0x00)
/* 0x0F && 0xFF == 0x0F */
mov r0, 0x0F
and r0, 0xFF
ASSERT_EQ(r0, 0x0F)
/* 0xF0 && 0xFF == 0xF0 */
mov r0, 0xF0
and r0, 0xFF
ASSERT_EQ(r0, 0xF0)
EXIT

9
userland/arch/arm/b.S Normal file
View File

@@ -0,0 +1,9 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#b */
#include "common.h"
ENTRY
/* Jump over the fail. 26-bit PC-relative. */
b ok
FAIL
ok:
EXIT

28
userland/arch/arm/beq.S Normal file
View File

@@ -0,0 +1,28 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#beq */
#include "common.h"
ENTRY
/* Smaller*/
mov r0, 1
cmp r0, 2
ASSERT(ble)
ASSERT(blt)
ASSERT(bne)
/* Equal. */
mov r1, 0
cmp r1, 0
ASSERT(beq)
ASSERT(bge)
ASSERT(ble)
/* Greater. */
mov r0, 2
cmp r0, 1
ASSERT(bge)
ASSERT(bgt)
ASSERT(bne)
EXIT

10
userland/arch/arm/bfi.S Normal file
View File

@@ -0,0 +1,10 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#bfi */
#include "common.h"
ENTRY
ldr r0, =0x11223344
ldr r1, =0xFFFFFFFF
bfi r1, r0, 8, 16
ASSERT_EQ(r1, 0xFF3344FF)
EXIT

10
userland/arch/arm/bic.S Normal file
View File

@@ -0,0 +1,10 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#bic */
#include "common.h"
ENTRY
/* 0x0F & ~0x55 == 0x0F & 0xAA == 0x0A */
mov r0, 0x0F
bic r0, 0x55
ASSERT_EQ(r0, 0x0A)
EXIT

14
userland/arch/arm/bl.S Normal file
View File

@@ -0,0 +1,14 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#bl */
#include "common.h"
ENTRY
mov r0, 1
bl inc
ASSERT_EQ(r0, 2)
EXIT
/* void inc(int *i) { (*i)++ } */
inc:
add r0, 1
bx lr

1
userland/arch/arm/build Symbolic link
View File

@@ -0,0 +1 @@
../build

17
userland/arch/arm/c/add.c Normal file
View File

@@ -0,0 +1,17 @@
/* 1 + 2 == 3 */
#include <assert.h>
#include <inttypes.h>
int main(void) {
uint32_t in0 = 1, in1 = 2, out;
__asm__ (
"add %[out], %[in0], %[in1];"
: [out] "=r" (out)
: [in0] "r" (in0),
[in1] "r" (in1)
);
assert(in0 == 1);
assert(in1 == 2);
assert(out == 3);
}

1
userland/arch/arm/c/build Symbolic link
View File

@@ -0,0 +1 @@
../build

View File

@@ -0,0 +1 @@
../build

View File

@@ -0,0 +1,35 @@
#include <inttypes.h>
void _start(void) {
uint32_t exit_status;
/* write */
{
char msg[] = "hello\n";
uint32_t syscall_return;
register uint32_t r0 __asm__ ("r0") = 1; /* stdout */
register char *r1 __asm__ ("r1") = msg;
register uint32_t r2 __asm__ ("r2") = sizeof(msg);
register uint32_t r8 __asm__ ("r7") = 4; /* syscall number */
__asm__ __volatile__ (
"svc 0;"
: "+r" (r0)
: "r" (r1), "r" (r2), "r" (r8)
: "memory"
);
syscall_return = r0;
exit_status = (syscall_return != sizeof(msg));
}
/* exit */
{
register uint32_t r0 __asm__ ("r0") = exit_status;
register uint32_t r7 __asm__ ("r7") = 1;
__asm__ __volatile__ (
"svc 0;"
: "+r" (r0)
: "r" (r7)
:
);
}
}

15
userland/arch/arm/c/inc.c Normal file
View File

@@ -0,0 +1,15 @@
/* Increment a variable in inline assembly. */
#include <assert.h>
#include <inttypes.h>
int main(void) {
uint32_t my_local_var = 1;
__asm__ (
"add %[my_local_var], %[my_local_var], #1;"
: [my_local_var] "+r" (my_local_var)
:
:
);
assert(my_local_var == 2);
}

View File

@@ -0,0 +1,28 @@
/* https://stackoverflow.com/questions/53960240/armv8-floating-point-output-inline-assembly */
#include <assert.h>
int main(void) {
float my_float = 1.5;
__asm__ (
"vmov s0, 1.0;"
"vadd.f32 %[my_float], %[my_float], s0;"
: [my_float] "+t" (my_float)
:
: "s0"
);
assert(my_float == 2.5);
/* Undocumented %P
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89482
*/
double my_double = 1.5;
__asm__ (
"vmov.f64 d0, 1.0;"
"vadd.f64 %P[my_double], %P[my_double], d0;"
: [my_double] "+w" (my_double)
:
: "d0"
);
assert(my_double == 2.5);
}

View File

@@ -0,0 +1,32 @@
/* Like inc.c but less good since we do more work ourselves.
*
* Just doing this to test out the "m" memory constraint.
*
* GCC 8.2.0 -O0 assembles ldr line to:
*
* ....
* ldr r0, [fp, #-12]
* ....
*
* and `-O3` assembles to:
*
* ....
* ldr r0, [sp]
* ....
*/
#include <assert.h>
#include <inttypes.h>
int main(void) {
uint32_t my_local_var = 1;
__asm__ (
"ldr r0, %[my_local_var];"
"add r0, r0, #1;"
"str r0, %[my_local_var];"
: [my_local_var] "+m" (my_local_var)
:
: "r0"
);
assert(my_local_var == 2);
}

View File

@@ -0,0 +1,25 @@
/* GCC 8.2.0 -O0 and -O3 assembles ldr line to:
*
* ....
* movw r3, #<lower address part>
* movt r3, #<higher address part>
* ldr r0, [r3]
* ....
*/
#include <assert.h>
#include <inttypes.h>
uint32_t my_global_var = 1;
int main(void) {
__asm__ (
"ldr r0, %[my_global_var];"
"add r0, r0, #1;"
"str r0, %[my_global_var];"
: [my_global_var] "+m" (my_global_var)
:
: "r0"
);
assert(my_global_var == 2);
}

View File

@@ -0,0 +1,38 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#register-variables */
#include <assert.h>
#include <inttypes.h>
int main(void) {
register uint32_t r0 __asm__ ("r0");
register uint32_t r1 __asm__ ("r1");
uint32_t new_r0;
uint32_t new_r1;
{
/* We must set the registers immediately before calling,
* without making any function calls in between.
*/
r0 = 1;
r1 = 2;
__asm__ (
/* We intentionally use an explicit r0 and r1 here,
* just to illustrate that we are certain that the
* r0 variable will go in r0. Real code would never do this.
*/
"add %[r0], r0, #1;"
"add %[r1], r1, #1;"
/* We have to specify r0 in the constraints.*/
: [r0] "+r" (r0),
[r1] "+r" (r1)
:
:
);
/* When we are done, we must immediatly assign
* the register variables to regular variables.
*/
new_r0 = r0;
new_r1 = r1;
}
assert(new_r0 == 2);
assert(new_r1 == 3);
}

View File

@@ -0,0 +1,59 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#calling-convention */
#include "common.h"
.data
puts_s:
.asciz "hello puts"
printf_format:
.asciz "hello printf %x\n"
my_array_0:
.word 0x11111111, 0x22222222, 0x33333333, 0x44444444
my_array_1:
.word 0x55555555, 0x66666666, 0x77777777, 0x88888888
ENTRY
/* puts("hello world") */
/* r0 is first argument. */
ldr r0, =puts_s
bl puts
/* Check exit statut >= 0 for success. */
cmp r0, 0
ASSERT(bge)
/* printf */
ldr r0, =printf_format
ldr r1, =0x12345678
bl printf
cmp r0, 0
ASSERT(bge)
/* memcpy and memcmp. */
/* Smaller. */
ldr r0, =my_array_0
ldr r1, =my_array_1
ldr r2, =0x10
bl memcmp
cmp r0, 0
ASSERT(blt)
/* Copy. */
ldr r0, =my_array_0
ldr r1, =my_array_1
ldr r2, =0x10
bl memcpy
/* Equal. */
ldr r0, =my_array_0
ldr r1, =my_array_1
ldr r2, =0x10
bl memcmp
ASSERT_EQ(r0, 0)
/* exit(0) */
mov r0, 0
bl exit
/* Never reached, just for the fail symbol. */
EXIT

17
userland/arch/arm/clz.S Normal file
View File

@@ -0,0 +1,17 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#data-processing-instructions */
#include "common.h"
ENTRY
ldr r0, =0x7FFFFFFF
clz r1, r0
ASSERT_EQ(r1, 1)
ldr r0, =0x3FFFFFFF
clz r1, r0
ASSERT_EQ(r1, 2)
ldr r0, =0x1FFFFFFF
clz r1, r0
ASSERT_EQ(r1, 3)
EXIT

View File

@@ -0,0 +1,14 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#comments */
#include "common.h"
ENTRY
# mycomment
@ mycomment
/* # only works at the beginning of the line.
* Error: garbage following instruction -- `nop #comment'
*/
#if 0
nop # mycomment
#endif
nop @ mycomment
EXIT

View File

@@ -0,0 +1,71 @@
#ifndef COMMON_ARCH_H
#define COMMON_ARCH_H
.syntax unified
/* Assert that a register equals a constant.
* * reg: the register to check. Can be r0-r10, but not r11. r11 is overwritten.
* * const: the constant to compare to. Only works for literals or labels, not for registers.
* For register / register comparision, use ASSERT_EQ_REG.
*/
#define ASSERT_EQ(reg, const) \
ldr r11, =const; \
cmp reg, r11; \
ASSERT(beq); \
;
/* Assert that two arrays are the same. */
#define ASSERT_MEMCMP(s1, s2, n) \
MEMCMP(s1, s2, n); \
ASSERT_EQ(r0, 0); \
;
/* Store all callee saved registers, and LR in case we make further BL calls.
*
* Also save the input arguments r0-r3 on the stack, so we can access them later on,
* despite those registers being overwritten.
*/
#define ENTRY \
.text; \
.global asm_main; \
asm_main: \
stmdb sp!, {r0-r12, lr}; \
asm_main_after_prologue: \
;
/* Meant to be called at the end of ENTRY.*
*
* Branching to "fail" makes tests fail with exit status 1.
*
* If EXIT is reached, the program ends successfully.
*
* Restore LR and bx jump to it to return from asm_main.
*/
#define EXIT \
mov r0, 0; \
mov r1, 0; \
b pass; \
fail: \
ldr r1, [sp]; \
str r0, [r1]; \
mov r0, 1; \
pass: \
add sp, 16; \
ldmia sp!, {r4-r12, lr}; \
bx lr; \
;
/* Always fail. */
#define FAIL \
ldr r0, =__LINE__; \
b fail; \
;
#define MEMCMP(s1, s2, n) \
ldr r0, =s1; \
ldr r1, =s2; \
ldr r2, =n; \
bl memcmp; \
;
#endif

16
userland/arch/arm/cond.S Normal file
View File

@@ -0,0 +1,16 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#conditional-execution */
#include "common.h"
ENTRY
mov r0, 0
mov r1, 1
cmp r0, 1
/* Previous cmp failed, skip this operation. */
addeq r1, 1
ASSERT_EQ(r1, 1)
cmp r0, 0
/* Previous passed, do this operation. */
addeq r1, 1
ASSERT_EQ(r1, 2)
EXIT

1
userland/arch/arm/empty.S Symbolic link
View File

@@ -0,0 +1 @@
../empty.S

1
userland/arch/arm/fail.S Symbolic link
View File

@@ -0,0 +1 @@
../fail.S

View File

@@ -0,0 +1 @@
../build

View File

@@ -0,0 +1,21 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#linux-system-calls */
.syntax unified
.text
.global _start
_start:
asm_main_after_prologue:
/* write */
mov r0, 1 /* stdout */
adr r1, msg /* buffer */
ldr r2, =len /* len */
mov r7, 4 /* syscall number */
svc 0
/* exit */
mov r0, 0 /* exit status */
mov r7, 1 /* syscall number */
svc 0
msg:
.ascii "hello\n"
len = . - msg

View File

@@ -0,0 +1,23 @@
/* Minimal example using driver.
*
* Controls the exit status of the program.
*/
.syntax unified
.text
.global asm_main
asm_main:
asm_main_after_prologue:
/* Set the return value according to the ARM calling convention. */
mov r0, 0
/* Try some whacky value to see tests break. */
/*mov r0, 77*/
/* Branch to the address at register lr.
* That is the return value which was put there by the C driver (likely with a bl).
*
* X means eXchange encoding from thumb back to ARM, which is what the driver uses.
*/
bx lr

View File

@@ -0,0 +1,24 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#immediates */
#include "common.h"
ENTRY
/* This is the default. We hack it in common.h however. */
.syntax divided
/* These fail. */
#if 0
mov r0, 1
mov r0, 0x1
#endif
mov r0, #1
mov r0, #0x1
mov r0, $1
mov r0, $0x1
.syntax unified
mov r0, 1
mov r0, 0x1
mov r0, 1
mov r0, 0x1
mov r0, $1
mov r0, $0x1
EXIT

View File

@@ -0,0 +1,27 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#loop-over-array */
#include "common.h"
#define NELEM 4
#define ELEM_SIZE 4
.data;
my_array:
.word 0x11111111, 0x22222222, 0x33333333, 0x44444444
my_array_expect:
.word 0x11111112, 0x22222223, 0x33333334, 0x44444445
ENTRY
/* Increment. */
ldr r0, =my_array
mov r1, NELEM
increment:
ldr r2, [r0]
add r2, 1
/* Post index usage. */
str r2, [r0], ELEM_SIZE
sub r1, 1
cmp r1, 0
bne increment
ASSERT_MEMCMP(my_array, my_array_expect, 0x10)
EXIT

62
userland/arch/arm/ldmia.S Normal file
View File

@@ -0,0 +1,62 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#loop-over-array */
#include "common.h"
#define NELEM 4
#define ELEM_SIZE 4
.data;
my_array_0:
.word 0x11111111, 0x22222222, 0x33333333, 0x44444444
my_array_1:
.word 0x55555555, 0x66666666, 0x77777777, 0x88888888
ENTRY
/* Load r1, r2, r3 and r4 starting from the address in r0. Don't change r0 */
ldr r0, =my_array_0
ldr r1, =0
ldr r2, =0
ldr r3, =0
ldr r4, =0
ldmia r0, {r1-r4}
ASSERT_EQ(r0, my_array_0)
ASSERT_EQ(r1, 0x11111111)
ASSERT_EQ(r2, 0x22222222)
ASSERT_EQ(r3, 0x33333333)
ASSERT_EQ(r4, 0x44444444)
/* Swapping the order of r1 and r2 on the mnemonic makes no difference to load order.
*
* But it gives an assembler warning, so we won't do it by default:
*
* ldmia.S: Assembler messages:
* ldmia.S:32: Warning: register range not in ascending order
*/
#if 0
ldr r0, =my_array_0
ldr r1, =0
ldr r2, =0
ldmia r0, {r2,r1}
ASSERT_EQ(r1, 0x11111111)
ASSERT_EQ(r2, 0x22222222)
#endif
/* Modify the array */
ldr r0, =my_array_1
ldr r1, =0x55555555
ldr r2, =0x66666666
ldr r3, =0x77777777
ldr r4, =0x88888888
stmdb r0, {r1-r4}
/* Verify that my_array_0 changed and is equal to my_array_1. */
MEMCMP(my_array_0, my_array_1, 0x10)
ASSERT_EQ(r0, 0)
/* Load registers and increment r0. */
ldr r0, =my_array_0
ldmia r0!, {r1-r4}
ASSERT_EQ(r0, my_array_1)
EXIT

View File

@@ -0,0 +1,65 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#ldr-pseudo-instruction */
#include "common.h"
ENTRY
/* Mnemonic for a PC relative load:
*
* ....
* ldr r0, [pc, offset]
* r0 = myvar
* ....
*/
ldr r0, myvar
ASSERT_EQ(r0, 0x12345678)
/* Mnemonic PC relative load with an offset.
* Load myvar2 instead of myvar.
*/
ldr r0, myvar + 4
ASSERT_EQ(r0, 0x9ABCDEF0)
/* First store the address in r0 using a magic =myvar, which creates
* a new variable containing the address and PC-relative addresses it
* https://stackoverflow.com/questions/17214962/what-is-the-difference-between-label-equals-sign-and-label-brackets-in-ar
*
* Use the adr instruction would likely be better for this application however.
*
* ....
* r0 = &myvar
* r1 = *r0
* ....
*/
ldr r0, =myvar
ldr r1, [r0]
ASSERT_EQ(r1, 0x12345678)
/* More efficiently, use r0 as the address to read, and write to r0 itself. */
ldr r0, =myvar
ldr r0, [r0]
ASSERT_EQ(r0, 0x12345678)
/* Same as =myvar but store a constant to a register.
* Can also be done with movw and movt. */
ldr r0, =0x11112222
ASSERT_EQ(r0, 0x11112222)
/* We can also use GAS tolower16 and topper16 and movw and movt
* to load the address of myvar into r0 with two immediates.
*
* This results in one extra 4 byte instruction read from memory,
* and one less data read, so it is likely more cache efficient.
*
* https://sourceware.org/binutils/docs-2.19/as/ARM_002dRelocations.html
*/
movw r0, #:lower16:myvar
movt r0, #:upper16:myvar
ldr r1, [r0]
ASSERT_EQ(r1, 0x12345678)
EXIT
myvar:
.word 0x12345678
myvar2:
.word 0x9ABCDEF0

12
userland/arch/arm/ldrb.S Normal file
View File

@@ -0,0 +1,12 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#ldrh-and-ldrb */
#include "common.h"
ENTRY
ldr r0, =myvar
mov r1, 0x0
ldrb r1, [r0]
ASSERT_EQ(r1, 0x00000078)
EXIT
myvar:
.word 0x12345678

12
userland/arch/arm/ldrh.S Normal file
View File

@@ -0,0 +1,12 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#ldrh-and-ldrb */
#include "common.h"
ENTRY
ldr r0, =myvar
mov r1, 0x0
ldrh r1, [r0]
ASSERT_EQ(r1, 0x00005678)
EXIT
myvar:
.word 0x12345678

19
userland/arch/arm/mov.S Normal file
View File

@@ -0,0 +1,19 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#mov */
#include "common.h"
ENTRY
/* Immediate. */
mov r0, 0
ASSERT_EQ(r0, 0)
mov r0, 1
ASSERT_EQ(r0, 1)
/* Register. */
mov r0, 0
mov r1, 1
mov r1, r0
ASSERT_EQ(r1, 0)
EXIT

27
userland/arch/arm/movw.S Normal file
View File

@@ -0,0 +1,27 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#movw-and-movt */
#include "common.h"
ENTRY
/* movt (top) and movw (TODO what is w) set the higher
* and lower 16 bits of the register.
*/
movw r0, 0xFFFF
movt r0, 0x1234
add r0, 1
ASSERT_EQ(r0, 0x12350000)
/* movw also zeroes out the top bits, allowing small 16-bit
* C constants to be assigned in a single instruction.
*
* It differs from mov because mov can only encode 8 bits
* at a time, while movw can encode 16.
*
* movt does not modify the lower bits however.
*/
ldr r0, =0x12345678
movw r0, 0x1111
ASSERT_EQ(r0, 0x00001111)
EXIT

12
userland/arch/arm/mul.S Normal file
View File

@@ -0,0 +1,12 @@
/* Multiplication. */
#include "common.h"
ENTRY
/* 2 * 3 = 6 */
mov r0, 0
mov r1, 2
mov r2, 3
mul r1, r2
ASSERT_EQ(r1, 6)
EXIT

32
userland/arch/arm/nop.S Normal file
View File

@@ -0,0 +1,32 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#nop */
#include "common.h"
ENTRY
/* Disassembles as:
*
* ....
* nop {0}
* ....
*
* TODO what is the `{0}`?
*/
nop
/* Disassembles as:
*
* ....
* nop ; (mov r0, r0)
* ....
*/
mov r0, r0
/* Disassemble as mov. TODO Why not as nop as in `mov r0, r0`?
* Do they have any effect?
*/
mov r1, r1
mov r8, r8
/* And there are other nops as well? Disassembles as `and`. */
and r0, r0, r0
EXIT

31
userland/arch/arm/push.S Normal file
View File

@@ -0,0 +1,31 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#ldmia */
#include "common.h"
ENTRY
/* Save sp before push. */
mov r0, sp
/* Push. */
mov r1, 1
mov r2, 2
push {r1, r2}
/* Save sp after push. */
mov r1, sp
/* Restore. */
mov r3, 0
mov r4, 0
pop {r3, r4}
ASSERT_EQ(r3, 1)
ASSERT_EQ(r4, 2)
/* Check that stack pointer moved down by 8 bytes
* (2 registers x 4 bytes each).
*/
sub r0, r1
ASSERT_EQ(r0, 8)
EXIT

9
userland/arch/arm/rbit.S Normal file
View File

@@ -0,0 +1,9 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#rbit */
#include "common.h"
ENTRY
ldr r0, =0b00000001001000110100010101100101
rbit r1, r0
ASSERT_EQ(r1, 0b10100110101000101100010010000000)
EXIT

69
userland/arch/arm/regs.S Normal file
View File

@@ -0,0 +1,69 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#registers */
#include "common.h"
ENTRY
/* 13 general purpose registers. */
mov r0, 0
mov r1, 1
mov r2, 2
mov r3, 3
mov r4, 4
mov r5, 5
mov r6, 6
mov r7, 7
mov r8, 8
mov r9, 9
mov r10, 10
mov r11, 11
mov r12, 12
/* * r11: aliased to FP (frame pointer, debug stack trace usage only)
* +
* I think FP is only a convention with no instruction impact, but TODO:
* not mentioned on AAPCS. aarch64 AAPCS mentions it though.
* * r13: aliased to SP (stack pointer), what push / pop use
* * r14: aliased to LR (link register), what bl writes the return address to
* * r15: aliased to PC (program counter), contains the current instruction address
*
* In ARMv8, SP and PC have dedicated registers in addition to
* the 32-general purpose ones. LR is still general purpose as before.
*
* Therefore, it is possible to use those registers in any place
* other registers may be used.
*
* This is not possible in ARMv8 anymore.
*
* For example, we can load an address into PC, which is very similar to what B / BX does:
* https://stackoverflow.com/questions/32304646/arm-assembly-branch-to-address-inside-register-or-memory/54145818#54145818
*/
ldr pc, =10f
FAIL
10:
/* Same with r15, which is the same as pc. */
ldr r15, =10f
FAIL
10:
/* Another example with mov reading from pc. */
pc_addr:
mov r0, pc
/* Why sub 8:
* https://stackoverflow.com/questions/24091566/why-does-the-arm-pc-register-point-to-the-instruction-after-the-next-one-to-be-e
*/
sub r0, r0, 8
/* pc-relative load also just work just like any other register. */
ldr r0, [pc]
b 1f
.word 0x12345678
1:
ASSERT_EQ(r0, 0x12345678)
/* We can also use fp in GNU GAS assembly. */
mov r11, 0
mov fp, 1
ASSERT_EQ(r11, 1)
EXIT

15
userland/arch/arm/rev.S Normal file
View File

@@ -0,0 +1,15 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#data-processing-instructions */
#include "common.h"
ENTRY
/* All bytes in register. */
ldr r0, =0x11223344
rev r1, r0
ASSERT_EQ(r1, 0x44332211)
/* Groups of 16-bits. */
ldr r0, =0x11223344
rev16 r1, r0
ASSERT_EQ(r1, 0x22114433)
EXIT

View File

@@ -0,0 +1,35 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#s-suffix */
#include "common.h"
ENTRY
/* Result is 0, set beq. */
movs r0, 0
ASSERT(beq)
/* The opposite. */
movs r0, 1
ASSERT(bne)
/* mov without s does not set the status. */
movs r0, 0
mov r0, 1
ASSERT(beq)
/* movs still moves... */
mov r0, 0
movs r0, 1
ASSERT_EQ(r0, 1)
/* add: the result is 0. */
mov r0, 1
adds r0, -1
ASSERT(beq)
/* add: result non 0. */
mov r0, 1
adds r0, 1
ASSERT(bne)
EXIT

79
userland/arch/arm/shift.S Normal file
View File

@@ -0,0 +1,79 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#shift-suffixes */
#include "common.h"
ENTRY
/* lsr */
ldr r0, =0xFFF00FFF
mov r1, r0, lsl 8
ldr r2, =0xF00FFF00
ASSERT_EQ_REG(r1, r2)
/* lsl */
ldr r0, =0xFFF00FFF
mov r1, r0, lsr 8
ldr r2, =0x00FFF00F
ASSERT_EQ_REG(r1, r2)
/* ror */
ldr r0, =0xFFF00FFF
mov r1, r0, ror 8
ldr r2, =0xFFFFF00F
ASSERT_EQ_REG(r1, r2)
/* asr negative */
ldr r0, =0x80000008
mov r1, r0, asr 1
ldr r2, =0xC0000004
ASSERT_EQ_REG(r1, r2)
/* asr positive */
ldr r0, =0x40000008
mov r1, r0, asr 1
ldr r2, =0x20000004
ASSERT_EQ_REG(r1, r2)
/* There are also direct shift mnemonics for the mov shifts.
*
* They assembly to the exact same bytes as the mov version
*/
ldr r0, =0xFFF00FFF
lsl r1, r0, 8
ldr r2, =0xF00FFF00
ASSERT_EQ_REG(r1, r2)
/* If used with the `mov` instruction, it results in a pure shift,
* but the suffixes also exist for all the other data processing instructions.
*
* Here we illustrate a shifted add instruction which calculates:
*
* ....
* r1 = r1 + (r0 << 1)
* ....
*/
ldr r0, =0x10
ldr r1, =0x100
add r1, r1, r0, lsl 1
ldr r2, =0x00000120
ASSERT_EQ_REG(r1, r2)
/* The shift takes up the same encoding slot as the immediate,
* therefore it is not possible to both use an immediate and shift.
*
* Error: shift expression expected -- `add r1,r0,1,lsl#1'
*/
#if 0
add r1, r0, 1, lsl 1
#endif
/* However, you can still encode shifted bitmasks of
* limited width in immediates, so why not just use the
* assembler pre-processing for it?
*/
ldr r1, =0x100
add r1, r1, (0x10 << 1)
ldr r2, =0x00000120
ASSERT_EQ_REG(r1, r2)
EXIT

113
userland/arch/arm/simd.S Normal file
View File

@@ -0,0 +1,113 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#advanced-simd-instructions */
#include "common.h"
ENTRY
/* vadd.u32
*
* Add 4x 32-bit unsigned integers in one go.
*
* q means 128-bits.
*
* u32 means that we treat memory as uint32_t types.
*
* 4 is deduced: in 128 bits you can fit 4 u32.
*
* Observe how the carry is propagated within u32 integers,
* but not across them.
*/
.data
u32_0: .word 0xF111F111, 0xF222F222, 0xF333F333, 0xF444F444
u32_1: .word 0x15551555, 0x16661666, 0x17771777, 0x18881888
u32_sum_expect: .word 0x06670666, 0x08890888, 0x0AAB0AAA, 0x0CCD0CCC
.bss
u32_sum: .skip 0x10
.text
ldr r0, =u32_0
vld1.32 {q0}, [r0]
ldr r0, =u32_1
vld1.32 {q1}, [r0]
vadd.u32 q2, q0, q1
ldr r0, =u32_sum
vst1.u32 {q2}, [r0]
ASSERT_MEMCMP(u32_sum, u32_sum_expect, 0x10)
/* vadd.u64: 2x 64-bit unsigned integer add. */
.data
u64_0: .quad 0xF1111111F1111111, 0xF2222222F2222222
u64_1: .quad 0x1555555515555555, 0x1666666616666666
u64_sum_expect: .quad 0x0666666706666666, 0x0888888908888888
.bss
u64_sum: .skip 0x10
.text
ldr r0, =u64_0
vld1.64 {q0}, [r0]
ldr r0, =u64_1
vld1.64 {q1}, [r0]
vadd.u64 q2, q0, q1
ldr r0, =u64_sum
vst1.u64 {q2}, [r0]
ASSERT_MEMCMP(u64_sum, u64_sum_expect, 0x10)
/* vadd.s64: 2x 64-bit signed integer add. TODO: how to differentiate
* it from signed? I think signed and unsigned addition are identical
* in two's complement, the only difference is overflow / carry detection
* flags. But how do flags work when there are many values being added
* at once?
*/
.data
s64_0: .quad -1, -2
s64_1: .quad -1, -2
s64_sum_expect: .quad -2, -4
.bss
s64_sum: .skip 0x10
.text
ldr r0, =s64_0
vld1.64 {q0}, [r0]
ldr r0, =s64_1
vld1.64 {q1}, [r0]
vadd.s64 q2, q0, q1
ldr r0, =s64_sum
vst1.s64 {q2}, [r0]
ASSERT_MEMCMP(s64_sum, s64_sum_expect, 0x10)
/* vadd.f32: 4x 32-bit float add. */
.data
f32_0: .float 1.5, 2.5, 3.5, 4.5
f32_1: .float 5.5, 6.5, 7.5, 8.5
f32_sum_expect: .float 7.0, 9.0, 11.0, 13.0
.bss
f32_sum: .skip 0x10
.text
ldr r0, =f32_0
vld1.32 {q0}, [r0]
ldr r0, =f32_1
vld1.32 {q1}, [r0]
vadd.f32 q2, q0, q1
ldr r0, =f32_sum
vst1.32 {q2}, [r0]
ASSERT_MEMCMP(f32_sum, f32_sum_expect, 0x10)
/* vadd.f64: 2x 64-bit float add: appears not possible.
*
* https://stackoverflow.com/questions/36052564/does-arm-support-simd-operations-for-64-bit-floating-point-numbers
*/
.data
f64_0: .double 1.5, 2.5
f64_1: .double 5.5, 6.5
f64_sum_expect: .double 7.0, 9.0
.bss
f64_sum: .skip 0x10
.text
ldr r0, =f64_0
vld1.64 {q0}, [r0]
ldr r0, =f64_1
vld1.64 {q1}, [r0]
#if 0
/* bad type in Neon instruction -- `vadd.f64 q2,q0,q1' */
vadd.f64 q2, q0, q1
ldr r0, =f64_sum
vst1.64 {q2}, [r0]
ASSERT_MEMCMP(f64_sum, f64_sum_expect, 0x10)
#endif
EXIT

60
userland/arch/arm/str.S Normal file
View File

@@ -0,0 +1,60 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#load-and-store-instructions */
#include "common.h"
.data;
/* Must be in the .data section, since we want to modify it. */
myvar:
.word 0x12345678
ENTRY
/* r0 will contain the address. */
ldr r0, =myvar
/* Sanity check. */
ldr r1, [r0]
movw r2, 0x5678
movt r2, 0x1234
ASSERT_EQ_REG(r1, r2)
/* Modify the value. */
movw r1, 0xDEF0
movt r1, 0x9ABC
str r1, [r0]
/* Check that it changed. */
ldr r1, [r0]
movw r2, 0xDEF0
movt r2, 0x9ABC
ASSERT_EQ_REG(r1, r2)
/* Cannot use PC relative addressing to a different segment,
* or else it fails with:
*
* ....
* Error: internal_relocation (type: OFFSET_IMM) not fixed up
* ....
*
* https://stackoverflow.com/questions/10094282/internal-relocation-not-fixed-up
*/
/*ldr r0, myvar*/
#if 0
/* We could in theory write this to set the address of myvar,
* but it will always segfault under Linux because the text segment is read-only.
* This is however useful in baremetal programming.
* This construct is not possible in ARMv8 for str:
* https://github.com/cirosantilli/arm-assembly-cheat#armv8-str
*/
str r1, var_in_same_section
var_in_same_section:
#endif
/* = sign just doesn't make sense for str, you can't set the
* address of a variable.
*/
#if 0
str r1, =myvar
#endif
EXIT

11
userland/arch/arm/sub.S Normal file
View File

@@ -0,0 +1,11 @@
/* Subtraction. */
#include "common.h"
ENTRY
/* 3 - 2 == 1 , register version.*/
mov r0, 3
mov r1, 2
sub r0, r0, r1
ASSERT_EQ(r0, 1)
EXIT

17
userland/arch/arm/thumb.S Normal file
View File

@@ -0,0 +1,17 @@
/* Illustrates features that are only available in thumb. */
.syntax unified
.text
.thumb_func
.global asm_main
asm_main:
asm_main_after_prologue:
/* CBZ: cmp and branch if zero instruction. Equivalent to CMP + BEQ.
* TODO create an interesting assertion here.
*/
cbz r1, 1f
1:
mov r0, 0
bx lr

19
userland/arch/arm/tst.S Normal file
View File

@@ -0,0 +1,19 @@
/* Test. Same as ands, but don't store the result, just update flags. */
#include "common.h"
ENTRY
/* 0x0F && 0xF0 == 0x00, so beq. */
mov r0, 0x0F
tst r0, 0xF0
ASSERT(beq)
/* bne */
mov r0, 0xFF
tst r0, 0x0F
ASSERT(bne)
# r0 was not modified.
ASSERT_EQ(r0, 0xFF)
EXIT

90
userland/arch/arm/vcvt.S Normal file
View File

@@ -0,0 +1,90 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#vcvt */
#include "common.h"
ENTRY
/* SIMD positive. */
.data
vcvt_positive_0: .float 1.25, 2.5, 3.75, 4.0
vcvt_positive_expect: .word 1, 2, 3, 4
.bss
vcvt_positive_result: .skip 0x10
.text
ldr r0, =vcvt_positive_0
vld1.32 {q0}, [r0]
vcvt.u32.f32 q1, q0
ldr r0, =vcvt_positive_result
vst1.32 {q1}, [r0]
ASSERT_MEMCMP(vcvt_positive_result, vcvt_positive_expect, 0x10)
/* SIMD negative. */
.data
vcvt_negative_0: .float -1.25, -2.5, -3.75, -4.0
vcvt_negative_expect: .word -1, -2, -3, -4
.bss
vcvt_negative_result: .skip 0x10
.text
ldr r0, =vcvt_negative_0
vld1.32 {q0}, [r0]
vcvt.s32.f32 q1, q0
ldr r0, =vcvt_negative_result
vst1.32 {q1}, [r0]
ASSERT_MEMCMP(vcvt_negative_result, vcvt_negative_expect, 0x10)
/* Floating point. */
.data
vcvt_positive_float_0: .float 1.5, 2.5
vcvt_positive_float_expect: .word 1
.float 2.5
.bss
vcvt_positive_float_result: .skip 0x8
.text
ldr r0, =vcvt_positive_float_0
vld1.32 {d0}, [r0]
vcvt.u32.f32 s0, s0
ldr r0, =vcvt_positive_float_result
vst1.32 {d0}, [r0]
ASSERT_MEMCMP(vcvt_positive_float_result, vcvt_positive_float_expect, 0x8)
/* Floating point but with immediates.
*
* You have to worry of course about representability of
* the immediate in 4 bytes, which is even more fun for
* floating point numbers :-)
*
* Doing this mostly to illustrate the joys of vmov.i32.
*
* For some reason, there is no vmov.i32 sn, only dn.
* If you try to use sn, it does the same as .f32 and
* stores a float instead. Horrible!
*/
vmov.f32 d0, 1.5
vcvt.u32.f32 s0, s0
vmov.i32 d1, 1
vcmp.f32 s0, s2
vmrs apsr_nzcv, fpscr
ASSERT(beq)
/* Check that s1 wasn't modified by vcvt. */
vmov.f32 s2, 1.5
vcmp.f32 s1, s2
vmrs apsr_nzcv, fpscr
ASSERT(beq)
/* Floating point double precision. */
.data
vcvt_positive_double_0: .double 1.5
vcvt_positive_double_expect: .word 1
.bss
vcvt_positive_double_result: .skip 0x8
.text
ldr r0, =vcvt_positive_double_0
vld1.64 {d0}, [r0]
vcvt.u32.f64 s0, d0
ldr r0, =vcvt_positive_double_result
vst1.32 {d0}, [r0]
ASSERT_MEMCMP(
vcvt_positive_double_result,
vcvt_positive_double_expect,
0x4
)
EXIT

41
userland/arch/arm/vcvta.S Normal file
View File

@@ -0,0 +1,41 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#vcvta */
#include "common.h"
ENTRY
/* SIMD positive. */
.data
vcvta_positive_0: .float 1.25, 2.5, 3.75, 4.0
vcvta_positive_expect: .word 1, 3, 4, 4
.bss
vcvta_positive_result: .skip 0x10
.text
ldr r0, =vcvta_positive_0
vld1.32 {q0}, [r0]
vcvta.u32.f32 q1, q0
ldr r0, =vcvta_positive_result
vst1.32 {q1}, [r0]
ASSERT_MEMCMP(
vcvta_positive_result,
vcvta_positive_expect,
0x10
)
/* SIMD negative. */
.data
vcvta_negative_0: .float -1.25, -2.5, -3.75, -4.0
vcvta_negative_expect: .word -1, -3, -4, -4
.bss
vcvta_negative_result: .skip 0x10
.text
ldr r0, =vcvta_negative_0
vld1.32 {q0}, [r0]
vcvta.s32.f32 q1, q0
ldr r0, =vcvta_negative_result
vst1.32 {q1}, [r0]
ASSERT_MEMCMP(
vcvta_negative_result,
vcvta_negative_expect,
0x10
)
EXIT

46
userland/arch/arm/vcvtr.S Normal file
View File

@@ -0,0 +1,46 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#vcvtrr */
#include "common.h"
ENTRY
.data
vcvtr_0: .float 1.25, 2.5, 3.75, 4.0
vcvtr_expect_zero: .word 1, 2, 3, 4
vcvtr_expect_plus_infinity: .word 2, 3, 4, 4
.bss
vcvtr_result_zero: .skip 0x10
vcvtr_result_plus_infinity: .skip 0x10
.text
ldr r0, =vcvtr_0
vld1.32 {q0}, [r0]
/* zero */
vmrs r0, fpscr
orr r0, r0, (3 << 22)
vmsr fpscr, r0
vcvtr.u32.f32 q1, q0
ldr r0, =vcvtr_result_zero
vst1.32 {q1}, [r0]
ASSERT_MEMCMP(
vcvtr_result_zero,
vcvtr_expect_zero,
0x10
)
#if 0
/* TODO why is this not working? Rounds to zero still. */
/* plus infinity */
vmrs r0, fpscr
mov r1, 1
bfi r0, r1, 22, 2
vmsr fpscr, r0
vcvtr.u32.f32 q1, q0
ldr r0, =vcvtr_result_plus_infinity
vst1.32 {q1}, [r0]
ASSERT_MEMCMP(
vcvtr_result_plus_infinity,
vcvtr_expect_plus_infinity,
0x10
)
#endif
EXIT

152
userland/arch/arm/vfp.S Normal file
View File

@@ -0,0 +1,152 @@
/* https://github.com/cirosantilli/arm-assembly-cheat#vfp
* Adapted from: https://mindplusplus.wordpress.com/2013/06/27/arm-vfp-vector-programming-part-2-examples/ */
#include "common.h"
.data;
a1:
.float 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5
a2:
.float 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5
sum:
.skip 32
sum_expect:
.float 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0
ENTRY
/* Minimal single precision floating point example.
* TODO: floating point representation constraints due to 4-byte instruction?
*/
vmov s0, 1.5
vmov s1, 2.5
vadd.f32 s2, s0, s1
vmov s3, 4.0
/* Compare two floating point registers. Stores results in fpscr:
* (floating point status and control register).
*/
vcmp.f32 s2, s3
/* Move the nzcv bits from fpscr to apsr */
vmrs apsr_nzcv, fpscr
/* This branch uses the Z bit of apsr, which was set accordingly. */
ASSERT(beq)
/* Now the same from memory with vldr and vstr. */
.data
my_float_0:
.float 1.5
my_float_1:
.float 2.5
my_float_sum_expect:
.float 4.0
.bss
my_float_sum:
.skip 4
.text
ldr r0, =my_float_0
vldr s0, [r0]
ldr r0, =my_float_1
vldr s1, [r0]
vadd.f32 s2, s0, s1
ldr r0, =my_float_sum
vstr.f32 s2, [r0]
ASSERT_MEMCMP(my_float_sum, my_float_sum_expect, 4)
#if 0
/* We can't do pseudo vldr as for ldr, fails with:
* Error: cannot represent CP_OFF_IMM relocation in this object file format
* It works on ARMv8 however, so the relocation must have been added.
*/
vldr s0, my_float_0
#endif
/* Minimal double precision floating point example. */
vmov.f64 d0, 1.5
vmov.f64 d1, 2.5
vadd.f64 d2, d0, d1
vmov.f64 d3, 4.0
vcmp.f64 d2, d3
vmrs apsr_nzcv, fpscr
ASSERT(beq)
/* vmov can also move to general purpose registers.
*
* Just remember that we can't use float immediates with general purpose registers:
* https://stackoverflow.com/questions/6514537/how-do-i-specify-immediate-floating-point-numbers-with-inline-assembly/52906126#52906126
*/
mov r1, 2
mov r0, 1
vmov s0, r0
vmov s1, s0
vmov r1, s1
ASSERT_EQ_REG(r0, r1)
/* Now a more complex test function. */
ldr r0, =sum
ldr r1, =a1
ldr r2, =a2
mov r3, 8
bl vec_sum
/* The assert works easily because all floats used
* have exact base-2 representation.
*/
ASSERT_MEMCMP(sum, sum_expect, 0x20)
EXIT
/* void vec_sum(float *sum, float *a1, float *a2, int length) {
* int i;
* for (i=0; i &lt; length; i++)
* *(sum+i) = *(a1+i) + *(a2+i);
* }
*/
vec_sum:
/* Setup */
push {r0, r1, r4, lr}
push {r0, r1}
mov r0, 1
mov r1, 8
bl reconfig
pop {r0, r1}
asr r3, 3
/* Do the sum. */
1:
fldmias r1!, {s8-s15}
fldmias r2!, {s16-s23}
vadd.f32 s24, s8, s16
fstmias r0!, {s24-s31}
subs r3, r3, 1
bne 1b
/* Teardown. */
bl deconfig
pop {r0, r1, r4, pc}
/* inputs:
* r0: desired vector stride (1 or 2)
* r1: desired vector length (min. 1, max. 8)
* outputs: (none)
* modified: r0, r1, FPSCR
* notes:
* r0 and r1 will be truncated before fitting into FPSCR
*/
reconfig:
push {r0-r2}
and r0, r0, 3
eor r0, r0, 1
sub r1, r1, 1
and r1, r1, 7
mov r0, r0, lsl 20
orr r0, r0, r1, lsl 16
vmrs r2, fpscr
bic r2, 55*65536
orr r2, r2, r0
vmsr fpscr, r0
pop {r0-r2}
bx lr
deconfig:
push {r0, r1, lr}
mov r0, 1
mov r1, 1
bl reconfig
pop {r0, r1, pc}