mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-28 12:34:26 +01:00
userland: add assembly support
Move arm assembly cheat here, and start some work on x86 cheat as well.
This commit is contained in:
152
userland/arch/arm/vfp.S
Normal file
152
userland/arch/arm/vfp.S
Normal file
@@ -0,0 +1,152 @@
|
||||
/* https://github.com/cirosantilli/arm-assembly-cheat#vfp
|
||||
* Adapted from: https://mindplusplus.wordpress.com/2013/06/27/arm-vfp-vector-programming-part-2-examples/ */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
.data;
|
||||
a1:
|
||||
.float 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5
|
||||
a2:
|
||||
.float 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5
|
||||
sum:
|
||||
.skip 32
|
||||
sum_expect:
|
||||
.float 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0
|
||||
|
||||
ENTRY
|
||||
/* Minimal single precision floating point example.
|
||||
* TODO: floating point representation constraints due to 4-byte instruction?
|
||||
*/
|
||||
vmov s0, 1.5
|
||||
vmov s1, 2.5
|
||||
vadd.f32 s2, s0, s1
|
||||
vmov s3, 4.0
|
||||
/* Compare two floating point registers. Stores results in fpscr:
|
||||
* (floating point status and control register).
|
||||
*/
|
||||
vcmp.f32 s2, s3
|
||||
/* Move the nzcv bits from fpscr to apsr */
|
||||
vmrs apsr_nzcv, fpscr
|
||||
/* This branch uses the Z bit of apsr, which was set accordingly. */
|
||||
ASSERT(beq)
|
||||
|
||||
/* Now the same from memory with vldr and vstr. */
|
||||
.data
|
||||
my_float_0:
|
||||
.float 1.5
|
||||
my_float_1:
|
||||
.float 2.5
|
||||
my_float_sum_expect:
|
||||
.float 4.0
|
||||
.bss
|
||||
my_float_sum:
|
||||
.skip 4
|
||||
.text
|
||||
ldr r0, =my_float_0
|
||||
vldr s0, [r0]
|
||||
ldr r0, =my_float_1
|
||||
vldr s1, [r0]
|
||||
vadd.f32 s2, s0, s1
|
||||
ldr r0, =my_float_sum
|
||||
vstr.f32 s2, [r0]
|
||||
ASSERT_MEMCMP(my_float_sum, my_float_sum_expect, 4)
|
||||
|
||||
#if 0
|
||||
/* We can't do pseudo vldr as for ldr, fails with:
|
||||
* Error: cannot represent CP_OFF_IMM relocation in this object file format
|
||||
* It works on ARMv8 however, so the relocation must have been added.
|
||||
*/
|
||||
vldr s0, my_float_0
|
||||
#endif
|
||||
|
||||
/* Minimal double precision floating point example. */
|
||||
vmov.f64 d0, 1.5
|
||||
vmov.f64 d1, 2.5
|
||||
vadd.f64 d2, d0, d1
|
||||
vmov.f64 d3, 4.0
|
||||
vcmp.f64 d2, d3
|
||||
vmrs apsr_nzcv, fpscr
|
||||
ASSERT(beq)
|
||||
|
||||
/* vmov can also move to general purpose registers.
|
||||
*
|
||||
* Just remember that we can't use float immediates with general purpose registers:
|
||||
* https://stackoverflow.com/questions/6514537/how-do-i-specify-immediate-floating-point-numbers-with-inline-assembly/52906126#52906126
|
||||
*/
|
||||
mov r1, 2
|
||||
mov r0, 1
|
||||
vmov s0, r0
|
||||
vmov s1, s0
|
||||
vmov r1, s1
|
||||
ASSERT_EQ_REG(r0, r1)
|
||||
|
||||
/* Now a more complex test function. */
|
||||
ldr r0, =sum
|
||||
ldr r1, =a1
|
||||
ldr r2, =a2
|
||||
mov r3, 8
|
||||
bl vec_sum
|
||||
/* The assert works easily because all floats used
|
||||
* have exact base-2 representation.
|
||||
*/
|
||||
ASSERT_MEMCMP(sum, sum_expect, 0x20)
|
||||
EXIT
|
||||
|
||||
/* void vec_sum(float *sum, float *a1, float *a2, int length) {
|
||||
* int i;
|
||||
* for (i=0; i < length; i++)
|
||||
* *(sum+i) = *(a1+i) + *(a2+i);
|
||||
* }
|
||||
*/
|
||||
vec_sum:
|
||||
/* Setup */
|
||||
push {r0, r1, r4, lr}
|
||||
push {r0, r1}
|
||||
mov r0, 1
|
||||
mov r1, 8
|
||||
bl reconfig
|
||||
pop {r0, r1}
|
||||
asr r3, 3
|
||||
|
||||
/* Do the sum. */
|
||||
1:
|
||||
fldmias r1!, {s8-s15}
|
||||
fldmias r2!, {s16-s23}
|
||||
vadd.f32 s24, s8, s16
|
||||
fstmias r0!, {s24-s31}
|
||||
subs r3, r3, 1
|
||||
bne 1b
|
||||
|
||||
/* Teardown. */
|
||||
bl deconfig
|
||||
pop {r0, r1, r4, pc}
|
||||
|
||||
/* inputs:
|
||||
* r0: desired vector stride (1 or 2)
|
||||
* r1: desired vector length (min. 1, max. 8)
|
||||
* outputs: (none)
|
||||
* modified: r0, r1, FPSCR
|
||||
* notes:
|
||||
* r0 and r1 will be truncated before fitting into FPSCR
|
||||
*/
|
||||
reconfig:
|
||||
push {r0-r2}
|
||||
and r0, r0, 3
|
||||
eor r0, r0, 1
|
||||
sub r1, r1, 1
|
||||
and r1, r1, 7
|
||||
mov r0, r0, lsl 20
|
||||
orr r0, r0, r1, lsl 16
|
||||
vmrs r2, fpscr
|
||||
bic r2, 55*65536
|
||||
orr r2, r2, r0
|
||||
vmsr fpscr, r0
|
||||
pop {r0-r2}
|
||||
bx lr
|
||||
|
||||
deconfig:
|
||||
push {r0, r1, lr}
|
||||
mov r0, 1
|
||||
mov r1, 1
|
||||
bl reconfig
|
||||
pop {r0, r1, pc}
|
||||
Reference in New Issue
Block a user