From 87e846fc1f9c57840e143513ebd69c638bd37aa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?= =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= Date: Thu, 25 Jul 2019 00:00:00 +0000 Subject: [PATCH] arm sve: enable on baremetal by setting missing bits CPACR_EL1.ZEN --- README.adoc | 14 +++++++++----- baremetal/lib/aarch64.S | 3 +++ userland/arch/arm/and.S | 8 ++++---- userland/arch/arm/orr.S | 27 +++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 9 deletions(-) create mode 100644 userland/arch/arm/orr.S diff --git a/README.adoc b/README.adoc index 1bd5fa8..a9a6033 100644 --- a/README.adoc +++ b/README.adoc @@ -14317,9 +14317,9 @@ ARMv8-only, likely because in ARMv8 you can't have conditional suffixes for ever ==== ARM bitwise instructions -* link:userland/arch/arm/and.S[] +* link:userland/arch/arm/and.S[] AND * EOR: exclusive OR -* ORR: OR +* link:userland/arch/arm/orr.S[]: OR * link:userland/arch/arm/clz.S[]: count leading zeroes ===== ARM BIC instruction @@ -14741,6 +14741,8 @@ Official spec: https://developer.arm.com/docs/100891/latest/sve-overview/introdu SVE support is indicated by `ID_AA64PFR0_EL1.SVE` which is dumped from link:baremetal/arch/aarch64/dump_regs.c[]. +Using SVE normally requires setting the CPACR_EL1.FPEN and ZEN bits, which as as of lkmc 29fd625f3fda79f5e0ee6cac43517ba74340d513 + 1 we also enable in our <>, see also: <>. + ===== SVE bibliography * https://www.rico.cat/files/ICS18-gem5-sve-tutorial.pdf step by step of a complete code execution examples, the best initial tutorial so far @@ -15803,15 +15805,17 @@ msr cpacr_el1, x1 isb .... -`cpacr_el1` is documented at <> D10.2.29 "CPACR_EL1, Architectural Feature Access Control Register". +CPACR_EL1 is documented at <> D10.2.29 "CPACR_EL1, Architectural Feature Access Control Register". -Here we touch the FPEN bits to 3, which enable floating point operations: +Here we touch the CPACR_EL1.FPEN bits to 3, which enable floating point operations: ____ 11 This control does not cause any instructions to be trapped. ____ -Without that, the `printf`: +We later also added an enable for the CPACR_EL1.ZEN bits, which are needed for <>. + +Without CPACR_EL1.FPEN, the `printf`: .... printf("got: %c\n", c); diff --git a/baremetal/lib/aarch64.S b/baremetal/lib/aarch64.S index eb73ed4..351ddef 100644 --- a/baremetal/lib/aarch64.S +++ b/baremetal/lib/aarch64.S @@ -7,7 +7,10 @@ lkmc_start: msr vbar_el1, x0 /* https://cirosantilli.com/linux-kernel-module-cheat#aarch64-baremetal-neon-setup */ + /* CPACR_EL1.FPEN */ mov x1, 0x3 << 20 + /* CPACR_EL1.ZEN */ + orr x1, x1, 0x3 << 16 msr cpacr_el1, x1 isb diff --git a/userland/arch/arm/and.S b/userland/arch/arm/and.S index 234bd39..4ba8e5e 100644 --- a/userland/arch/arm/and.S +++ b/userland/arch/arm/and.S @@ -4,22 +4,22 @@ LKMC_PROLOGUE - /* 0x00 && 0xFF == 0x00 */ + /* 0x00 & 0xFF == 0x00 */ mov r0, 0x00 and r0, 0xFF LKMC_ASSERT_EQ(r0, =0x00) - /* 0x0F && 0xF0 == 0x00 */ + /* 0x0F & 0xF0 == 0x00 */ mov r0, 0x0F and r0, 0xF0 LKMC_ASSERT_EQ(r0, =0x00) - /* 0x0F && 0xFF == 0x0F */ + /* 0x0F & 0xFF == 0x0F */ mov r0, 0x0F and r0, 0xFF LKMC_ASSERT_EQ(r0, =0x0F) - /* 0xF0 && 0xFF == 0xF0 */ + /* 0xF0 & 0xFF == 0xF0 */ mov r0, 0xF0 and r0, 0xFF LKMC_ASSERT_EQ(r0, =0xF0) diff --git a/userland/arch/arm/orr.S b/userland/arch/arm/orr.S new file mode 100644 index 0000000..0e34feb --- /dev/null +++ b/userland/arch/arm/orr.S @@ -0,0 +1,27 @@ +/* https://cirosantilli.com/linux-kernel-module-cheat#arm-bitwise-instructions */ + +#include + +LKMC_PROLOGUE + + /* 0x00 | 0xFF == 0x00 */ + mov r0, 0x00 + orr r0, 0xFF + LKMC_ASSERT_EQ(r0, =0xFF) + + /* 0x0F | 0xF0 == 0x00 */ + mov r0, 0x0F + orr r0, 0xF0 + LKMC_ASSERT_EQ(r0, =0xFF) + + /* 0x0F | 0x0F == 0x0F */ + mov r0, 0x0F + orr r0, 0x0F + LKMC_ASSERT_EQ(r0, =0x0F) + + /* 0xF0 | 0xF0 == 0xF0 */ + mov r0, 0xF0 + and r0, 0xF0 + LKMC_ASSERT_EQ(r0, =0xF0) + +LKMC_EPILOGUE