Files
linux-kernel-module-cheat/userland/arch/arm/vfp.S
Ciro Santilli 六四事件 法轮功 64855767b4 arm assembly: move some more in
2019-05-12 00:00:06 +00:00

153 lines
3.5 KiB
ArmAsm

/* https://github.com/cirosantilli/linux-kernel-module-cheat#vfp
* Adapted from: https://mindplusplus.wordpress.com/2013/06/27/arm-vfp-vector-programming-part-2-examples/ */
#include "common.h"
.data;
a1:
.float 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5
a2:
.float 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5
sum:
.skip 32
sum_expect:
.float 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0
ENTRY
/* Minimal single precision floating point example.
* TODO: floating point representation constraints due to 4-byte instruction?
*/
vmov s0, 1.5
vmov s1, 2.5
vadd.f32 s2, s0, s1
vmov s3, 4.0
/* Compare two floating point registers. Stores results in fpscr:
* (floating point status and control register).
*/
vcmp.f32 s2, s3
/* Move the nzcv bits from fpscr to apsr */
vmrs apsr_nzcv, fpscr
/* This branch uses the Z bit of apsr, which was set accordingly. */
ASSERT(beq)
/* Now the same from memory with vldr and vstr. */
.data
my_float_0:
.float 1.5
my_float_1:
.float 2.5
my_float_sum_expect:
.float 4.0
.bss
my_float_sum:
.skip 4
.text
ldr r0, =my_float_0
vldr s0, [r0]
ldr r0, =my_float_1
vldr s1, [r0]
vadd.f32 s2, s0, s1
ldr r0, =my_float_sum
vstr.f32 s2, [r0]
ASSERT_MEMCMP(my_float_sum, my_float_sum_expect, 4)
#if 0
/* We can't do pseudo vldr as for ldr, fails with:
* Error: cannot represent CP_OFF_IMM relocation in this object file format
* It works on ARMv8 however, so the relocation must have been added.
*/
vldr s0, my_float_0
#endif
/* Minimal double precision floating point example. */
vmov.f64 d0, 1.5
vmov.f64 d1, 2.5
vadd.f64 d2, d0, d1
vmov.f64 d3, 4.0
vcmp.f64 d2, d3
vmrs apsr_nzcv, fpscr
ASSERT(beq)
/* vmov can also move to general purpose registers.
*
* Just remember that we can't use float immediates with general purpose registers:
* https://stackoverflow.com/questions/6514537/how-do-i-specify-immediate-floating-point-numbers-with-inline-assembly/52906126#52906126
*/
mov r1, 2
mov r0, 1
vmov s0, r0
vmov s1, s0
vmov r1, s1
ASSERT_EQ_REG(r0, r1)
/* Now a more complex test function. */
ldr r0, =sum
ldr r1, =a1
ldr r2, =a2
mov r3, 8
bl vec_sum
/* The assert works easily because all floats used
* have exact base-2 representation.
*/
ASSERT_MEMCMP(sum, sum_expect, 0x20)
EXIT
/* void vec_sum(float *sum, float *a1, float *a2, int length) {
* int i;
* for (i=0; i < length; i++)
* *(sum+i) = *(a1+i) + *(a2+i);
* }
*/
vec_sum:
/* Setup */
push {r0, r1, r4, lr}
push {r0, r1}
mov r0, 1
mov r1, 8
bl reconfig
pop {r0, r1}
asr r3, 3
/* Do the sum. */
1:
fldmias r1!, {s8-s15}
fldmias r2!, {s16-s23}
vadd.f32 s24, s8, s16
fstmias r0!, {s24-s31}
subs r3, r3, 1
bne 1b
/* Teardown. */
bl deconfig
pop {r0, r1, r4, pc}
/* inputs:
* r0: desired vector stride (1 or 2)
* r1: desired vector length (min. 1, max. 8)
* outputs: (none)
* modified: r0, r1, FPSCR
* notes:
* r0 and r1 will be truncated before fitting into FPSCR
*/
reconfig:
push {r0-r2}
and r0, r0, 3
eor r0, r0, 1
sub r1, r1, 1
and r1, r1, 7
mov r0, r0, lsl 20
orr r0, r0, r1, lsl 16
vmrs r2, fpscr
bic r2, 55*65536
orr r2, r2, r0
vmsr fpscr, r0
pop {r0-r2}
bx lr
deconfig:
push {r0, r1, lr}
mov r0, 1
mov r1, 1
bl reconfig
pop {r0, r1, pc}