userland: add assembly support

Move arm assembly cheat here, and start some work on x86 cheat as well.
2026-01-28 12:34:26 +01:00 · 2019-03-22 00:00:00 +00:00
parent 4943c9ed2e
commit 287c83f3f9
117 changed files with 3870 additions and 547 deletions
--- a/userland/arch/arm/vfp.S
+++ b/userland/arch/arm/vfp.S
@@ -0,0 +1,152 @@
+/* https://github.com/cirosantilli/arm-assembly-cheat#vfp
+ * Adapted from: https://mindplusplus.wordpress.com/2013/06/27/arm-vfp-vector-programming-part-2-examples/ */
+
+#include "common.h"
+
+.data;
+a1:
+    .float 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5
+a2:
+    .float 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5
+sum:
+    .skip 32
+sum_expect:
+    .float 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0
+
+ENTRY
+    /* Minimal single precision floating point example.
+     * TODO: floating point representation constraints due to 4-byte instruction?
+     */
+    vmov s0, 1.5
+    vmov s1, 2.5
+    vadd.f32 s2, s0, s1
+    vmov s3, 4.0
+    /* Compare two floating point registers. Stores results in fpscr:
+     * (floating point status and control register).
+     */
+    vcmp.f32 s2, s3
+    /* Move the nzcv bits from fpscr to apsr */
+    vmrs apsr_nzcv, fpscr
+    /* This branch uses the Z bit of apsr, which was set accordingly. */
+    ASSERT(beq)
+
+    /* Now the same from memory with vldr and vstr. */
+.data
+my_float_0:
+    .float 1.5
+my_float_1:
+    .float 2.5
+my_float_sum_expect:
+    .float 4.0
+.bss
+my_float_sum:
+    .skip 4
+.text
+    ldr r0, =my_float_0
+    vldr s0, [r0]
+    ldr r0, =my_float_1
+    vldr s1, [r0]
+    vadd.f32 s2, s0, s1
+    ldr r0, =my_float_sum
+    vstr.f32 s2, [r0]
+    ASSERT_MEMCMP(my_float_sum, my_float_sum_expect, 4)
+
+#if 0
+    /* We can't do pseudo vldr as for ldr, fails with:
+     * Error: cannot represent CP_OFF_IMM relocation in this object file format
+     * It works on ARMv8 however, so the relocation must have been added.
+     */
+    vldr s0, my_float_0
+#endif
+
+    /* Minimal double precision floating point example. */
+    vmov.f64 d0, 1.5
+    vmov.f64 d1, 2.5
+    vadd.f64 d2, d0, d1
+    vmov.f64 d3, 4.0
+    vcmp.f64 d2, d3
+    vmrs apsr_nzcv, fpscr
+    ASSERT(beq)
+
+    /* vmov can also move to general purpose registers.
+     *
+     * Just remember that we can't use float immediates with general purpose registers:
+     * https://stackoverflow.com/questions/6514537/how-do-i-specify-immediate-floating-point-numbers-with-inline-assembly/52906126#52906126
+     */
+    mov r1, 2
+    mov r0, 1
+    vmov s0, r0
+    vmov s1, s0
+    vmov r1, s1
+    ASSERT_EQ_REG(r0, r1)
+
+    /* Now a more complex test function. */
+    ldr r0, =sum
+    ldr r1, =a1
+    ldr r2, =a2
+    mov r3, 8
+    bl vec_sum
+    /* The assert works easily because all floats used
+     * have exact base-2 representation.
+     */
+    ASSERT_MEMCMP(sum, sum_expect, 0x20)
+EXIT
+
+/* void vec_sum(float *sum, float *a1, float *a2, int length) {
+ *   int i;
+ *   for (i=0; i &lt; length; i++)
+ *     *(sum+i) = *(a1+i) + *(a2+i);
+ * }
+ */
+vec_sum:
+    /* Setup */
+    push {r0, r1, r4, lr}
+    push {r0, r1}
+    mov r0, 1
+    mov r1, 8
+    bl reconfig
+    pop {r0, r1}
+    asr r3, 3
+
+    /* Do the sum. */
+1:
+    fldmias r1!, {s8-s15}
+    fldmias r2!, {s16-s23}
+    vadd.f32 s24, s8, s16
+    fstmias r0!, {s24-s31}
+    subs r3, r3, 1
+    bne 1b
+
+    /* Teardown. */
+    bl deconfig
+    pop {r0, r1, r4, pc}
+
+/* inputs:
+ * r0: desired vector stride (1 or 2)
+ * r1: desired vector length (min. 1, max. 8)
+ * outputs: (none)
+ * modified: r0, r1, FPSCR
+ * notes:
+ * r0 and r1 will be truncated before fitting into FPSCR
+ */
+reconfig:
+    push {r0-r2}
+    and r0, r0, 3
+    eor r0, r0, 1
+    sub r1, r1, 1
+    and r1, r1, 7
+    mov r0, r0, lsl 20
+    orr r0, r0, r1, lsl 16
+    vmrs r2, fpscr
+    bic r2, 55*65536
+    orr r2, r2, r0
+    vmsr fpscr, r0
+    pop {r0-r2}
+    bx lr
+
+deconfig:
+    push {r0, r1, lr}
+    mov r0, 1
+    mov r1, 1
+    bl reconfig
+    pop {r0, r1, pc}