/* https://github.com/cirosantilli/arm-assembly-cheat#vfp * Adapted from: https://mindplusplus.wordpress.com/2013/06/27/arm-vfp-vector-programming-part-2-examples/ */ #include "common.h" .data; a1: .float 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5 a2: .float 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5 sum: .skip 32 sum_expect: .float 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 ENTRY /* Minimal single precision floating point example. * TODO: floating point representation constraints due to 4-byte instruction? */ vmov s0, 1.5 vmov s1, 2.5 vadd.f32 s2, s0, s1 vmov s3, 4.0 /* Compare two floating point registers. Stores results in fpscr: * (floating point status and control register). */ vcmp.f32 s2, s3 /* Move the nzcv bits from fpscr to apsr */ vmrs apsr_nzcv, fpscr /* This branch uses the Z bit of apsr, which was set accordingly. */ ASSERT(beq) /* Now the same from memory with vldr and vstr. */ .data my_float_0: .float 1.5 my_float_1: .float 2.5 my_float_sum_expect: .float 4.0 .bss my_float_sum: .skip 4 .text ldr r0, =my_float_0 vldr s0, [r0] ldr r0, =my_float_1 vldr s1, [r0] vadd.f32 s2, s0, s1 ldr r0, =my_float_sum vstr.f32 s2, [r0] ASSERT_MEMCMP(my_float_sum, my_float_sum_expect, 4) #if 0 /* We can't do pseudo vldr as for ldr, fails with: * Error: cannot represent CP_OFF_IMM relocation in this object file format * It works on ARMv8 however, so the relocation must have been added. */ vldr s0, my_float_0 #endif /* Minimal double precision floating point example. */ vmov.f64 d0, 1.5 vmov.f64 d1, 2.5 vadd.f64 d2, d0, d1 vmov.f64 d3, 4.0 vcmp.f64 d2, d3 vmrs apsr_nzcv, fpscr ASSERT(beq) /* vmov can also move to general purpose registers. * * Just remember that we can't use float immediates with general purpose registers: * https://stackoverflow.com/questions/6514537/how-do-i-specify-immediate-floating-point-numbers-with-inline-assembly/52906126#52906126 */ mov r1, 2 mov r0, 1 vmov s0, r0 vmov s1, s0 vmov r1, s1 ASSERT_EQ_REG(r0, r1) /* Now a more complex test function. */ ldr r0, =sum ldr r1, =a1 ldr r2, =a2 mov r3, 8 bl vec_sum /* The assert works easily because all floats used * have exact base-2 representation. */ ASSERT_MEMCMP(sum, sum_expect, 0x20) EXIT /* void vec_sum(float *sum, float *a1, float *a2, int length) { * int i; * for (i=0; i < length; i++) * *(sum+i) = *(a1+i) + *(a2+i); * } */ vec_sum: /* Setup */ push {r0, r1, r4, lr} push {r0, r1} mov r0, 1 mov r1, 8 bl reconfig pop {r0, r1} asr r3, 3 /* Do the sum. */ 1: fldmias r1!, {s8-s15} fldmias r2!, {s16-s23} vadd.f32 s24, s8, s16 fstmias r0!, {s24-s31} subs r3, r3, 1 bne 1b /* Teardown. */ bl deconfig pop {r0, r1, r4, pc} /* inputs: * r0: desired vector stride (1 or 2) * r1: desired vector length (min. 1, max. 8) * outputs: (none) * modified: r0, r1, FPSCR * notes: * r0 and r1 will be truncated before fitting into FPSCR */ reconfig: push {r0-r2} and r0, r0, 3 eor r0, r0, 1 sub r1, r1, 1 and r1, r1, 7 mov r0, r0, lsl 20 orr r0, r0, r1, lsl 16 vmrs r2, fpscr bic r2, 55*65536 orr r2, r2, r0 vmsr fpscr, r0 pop {r0-r2} bx lr deconfig: push {r0, r1, lr} mov r0, 1 mov r1, 1 bl reconfig pop {r0, r1, pc}