From 287c83f3f99db8c1ff9bbc85a79576da6a78e986 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?= =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= Date: Fri, 22 Mar 2019 00:00:00 +0000 Subject: [PATCH] userland: add assembly support Move arm assembly cheat here, and start some work on x86 cheat as well. --- README.adoc | 762 ++++++++++-------- build-userland | 375 +++++---- build-userland-in-tree | 7 +- common.py | 6 +- shell_helpers.py | 16 +- thread_pool.py | 221 +++++ userland/arch/aarch64/add.S | 9 + userland/arch/aarch64/adr.S | 21 + userland/arch/aarch64/adrp.S | 13 + userland/arch/aarch64/asm_hello.c | 13 - userland/arch/aarch64/beq.S | 33 + userland/arch/aarch64/bfi.S | 11 + userland/arch/aarch64/c/asm_from_c.c | 39 + userland/arch/aarch64/c/build | 1 + userland/arch/aarch64/c/earlyclobber.c | 21 + userland/arch/aarch64/c/freestanding/build | 1 + userland/arch/aarch64/c/freestanding/hello.c | 37 + .../aarch64/c/freestanding/hello_clobbers.c | 40 + userland/arch/aarch64/c/inc.c | 13 + userland/arch/aarch64/c/inc_float.c | 28 + userland/arch/aarch64/c/multiline.cpp | 18 + userland/arch/aarch64/c/reg_var.c | 27 + userland/arch/aarch64/c/reg_var_float.c | 28 + userland/arch/aarch64/cbz.S | 19 + userland/arch/aarch64/comments.S | 17 + userland/arch/aarch64/common_arch.h | 64 ++ userland/arch/aarch64/cset.S | 28 + userland/arch/aarch64/empty.S | 1 + userland/arch/aarch64/fail.S | 1 + userland/arch/aarch64/floating_point.S | 60 ++ userland/arch/aarch64/freestanding/build | 1 + userland/arch/aarch64/freestanding/hello.S | 20 + userland/arch/aarch64/hello_driver.S | 6 + userland/arch/aarch64/immediates.S | 9 + userland/arch/aarch64/movk.S | 26 + userland/arch/aarch64/movn.S | 9 + userland/arch/aarch64/pc.S | 78 ++ userland/arch/aarch64/regs.S | 47 ++ userland/arch/aarch64/ret.S | 28 + userland/arch/aarch64/simd.S | 86 ++ userland/arch/aarch64/simd_interleave.S | 26 + userland/arch/aarch64/str.S | 13 + userland/arch/aarch64/ubfm.S | 17 + userland/arch/aarch64/ubfx.S | 15 + userland/arch/aarch64/x31.S | 51 ++ userland/arch/arm/add.S | 58 ++ userland/arch/arm/address_modes.S | 51 ++ userland/arch/arm/adr.S | 33 + userland/arch/arm/and.S | 27 + userland/arch/arm/b.S | 9 + userland/arch/arm/beq.S | 28 + userland/arch/arm/bfi.S | 10 + userland/arch/arm/bic.S | 10 + userland/arch/arm/bl.S | 14 + userland/arch/arm/build | 1 + userland/arch/arm/c/add.c | 17 + userland/arch/arm/c/build | 1 + userland/arch/arm/c/freestanding/build | 1 + userland/arch/arm/c/freestanding/hello.c | 35 + userland/arch/arm/c/inc.c | 15 + userland/arch/arm/c/inc_float.c | 28 + userland/arch/arm/c/inc_memory.c | 32 + userland/arch/arm/c/inc_memory_global.c | 25 + userland/arch/arm/c/reg_var.c | 38 + userland/arch/arm/c_from_asm.S | 59 ++ userland/arch/arm/clz.S | 17 + userland/arch/arm/comments.S | 14 + userland/arch/arm/common_arch.h | 71 ++ userland/arch/arm/cond.S | 16 + userland/arch/arm/empty.S | 1 + userland/arch/arm/fail.S | 1 + userland/arch/arm/freestanding/build | 1 + userland/arch/arm/freestanding/hello.S | 21 + userland/arch/arm/hello_driver.S | 23 + userland/arch/arm/immediates.S | 24 + userland/arch/arm/inc_array.S | 27 + userland/arch/arm/ldmia.S | 62 ++ userland/arch/arm/ldr_pseudo.S | 65 ++ userland/arch/arm/ldrb.S | 12 + userland/arch/arm/ldrh.S | 12 + userland/arch/arm/mov.S | 19 + userland/arch/arm/movw.S | 27 + userland/arch/arm/mul.S | 12 + userland/arch/arm/nop.S | 32 + userland/arch/arm/push.S | 31 + userland/arch/arm/rbit.S | 9 + userland/arch/arm/regs.S | 69 ++ userland/arch/arm/rev.S | 15 + userland/arch/arm/s_suffix.S | 35 + userland/arch/arm/shift.S | 79 ++ userland/arch/arm/simd.S | 113 +++ userland/arch/arm/str.S | 60 ++ userland/arch/arm/sub.S | 11 + userland/arch/arm/thumb.S | 17 + userland/arch/arm/tst.S | 19 + userland/arch/arm/vcvt.S | 90 +++ userland/arch/arm/vcvta.S | 41 + userland/arch/arm/vcvtr.S | 46 ++ userland/arch/arm/vfp.S | 152 ++++ userland/arch/common.h | 28 + userland/arch/empty.S | 6 + userland/arch/fail.S | 10 + userland/arch/main.c | 17 + userland/arch/x86_64/c/add.c | 16 + userland/arch/x86_64/{ => c}/binutils_hack.c | 0 .../{asm_hello.c => c/binutils_nohack.c} | 2 + userland/arch/x86_64/c/build | 1 + userland/arch/x86_64/c/freestanding/build | 1 + userland/arch/x86_64/c/freestanding/hello.c | 31 + .../arch/x86_64/c/freestanding/hello_regvar.c | 37 + userland/arch/x86_64/c/inc.c | 13 + userland/arch/x86_64/c/scratch.c | 22 + userland/arch/x86_64/c/scratch_hardcode.c | 20 + userland/arch/x86_64/common_arch.h | 84 ++ userland/arch/x86_64/empty.S | 1 + userland/arch/x86_64/fail.S | 1 + userland/arch/x86_64/freestanding/hello.S | 29 +- 117 files changed, 3870 insertions(+), 547 deletions(-) create mode 100644 thread_pool.py create mode 100644 userland/arch/aarch64/add.S create mode 100644 userland/arch/aarch64/adr.S create mode 100644 userland/arch/aarch64/adrp.S delete mode 100644 userland/arch/aarch64/asm_hello.c create mode 100644 userland/arch/aarch64/beq.S create mode 100644 userland/arch/aarch64/bfi.S create mode 100644 userland/arch/aarch64/c/asm_from_c.c create mode 120000 userland/arch/aarch64/c/build create mode 100644 userland/arch/aarch64/c/earlyclobber.c create mode 120000 userland/arch/aarch64/c/freestanding/build create mode 100644 userland/arch/aarch64/c/freestanding/hello.c create mode 100644 userland/arch/aarch64/c/freestanding/hello_clobbers.c create mode 100644 userland/arch/aarch64/c/inc.c create mode 100644 userland/arch/aarch64/c/inc_float.c create mode 100644 userland/arch/aarch64/c/multiline.cpp create mode 100644 userland/arch/aarch64/c/reg_var.c create mode 100644 userland/arch/aarch64/c/reg_var_float.c create mode 100644 userland/arch/aarch64/cbz.S create mode 100644 userland/arch/aarch64/comments.S create mode 100644 userland/arch/aarch64/common_arch.h create mode 100644 userland/arch/aarch64/cset.S create mode 120000 userland/arch/aarch64/empty.S create mode 120000 userland/arch/aarch64/fail.S create mode 100644 userland/arch/aarch64/floating_point.S create mode 120000 userland/arch/aarch64/freestanding/build create mode 100644 userland/arch/aarch64/freestanding/hello.S create mode 100644 userland/arch/aarch64/hello_driver.S create mode 100644 userland/arch/aarch64/immediates.S create mode 100644 userland/arch/aarch64/movk.S create mode 100644 userland/arch/aarch64/movn.S create mode 100644 userland/arch/aarch64/pc.S create mode 100644 userland/arch/aarch64/regs.S create mode 100644 userland/arch/aarch64/ret.S create mode 100644 userland/arch/aarch64/simd.S create mode 100644 userland/arch/aarch64/simd_interleave.S create mode 100644 userland/arch/aarch64/str.S create mode 100644 userland/arch/aarch64/ubfm.S create mode 100644 userland/arch/aarch64/ubfx.S create mode 100644 userland/arch/aarch64/x31.S create mode 100644 userland/arch/arm/add.S create mode 100644 userland/arch/arm/address_modes.S create mode 100644 userland/arch/arm/adr.S create mode 100644 userland/arch/arm/and.S create mode 100644 userland/arch/arm/b.S create mode 100644 userland/arch/arm/beq.S create mode 100644 userland/arch/arm/bfi.S create mode 100644 userland/arch/arm/bic.S create mode 100644 userland/arch/arm/bl.S create mode 120000 userland/arch/arm/build create mode 100644 userland/arch/arm/c/add.c create mode 120000 userland/arch/arm/c/build create mode 120000 userland/arch/arm/c/freestanding/build create mode 100644 userland/arch/arm/c/freestanding/hello.c create mode 100644 userland/arch/arm/c/inc.c create mode 100644 userland/arch/arm/c/inc_float.c create mode 100644 userland/arch/arm/c/inc_memory.c create mode 100644 userland/arch/arm/c/inc_memory_global.c create mode 100644 userland/arch/arm/c/reg_var.c create mode 100644 userland/arch/arm/c_from_asm.S create mode 100644 userland/arch/arm/clz.S create mode 100644 userland/arch/arm/comments.S create mode 100644 userland/arch/arm/common_arch.h create mode 100644 userland/arch/arm/cond.S create mode 120000 userland/arch/arm/empty.S create mode 120000 userland/arch/arm/fail.S create mode 120000 userland/arch/arm/freestanding/build create mode 100644 userland/arch/arm/freestanding/hello.S create mode 100644 userland/arch/arm/hello_driver.S create mode 100644 userland/arch/arm/immediates.S create mode 100644 userland/arch/arm/inc_array.S create mode 100644 userland/arch/arm/ldmia.S create mode 100644 userland/arch/arm/ldr_pseudo.S create mode 100644 userland/arch/arm/ldrb.S create mode 100644 userland/arch/arm/ldrh.S create mode 100644 userland/arch/arm/mov.S create mode 100644 userland/arch/arm/movw.S create mode 100644 userland/arch/arm/mul.S create mode 100644 userland/arch/arm/nop.S create mode 100644 userland/arch/arm/push.S create mode 100644 userland/arch/arm/rbit.S create mode 100644 userland/arch/arm/regs.S create mode 100644 userland/arch/arm/rev.S create mode 100644 userland/arch/arm/s_suffix.S create mode 100644 userland/arch/arm/shift.S create mode 100644 userland/arch/arm/simd.S create mode 100644 userland/arch/arm/str.S create mode 100644 userland/arch/arm/sub.S create mode 100644 userland/arch/arm/thumb.S create mode 100644 userland/arch/arm/tst.S create mode 100644 userland/arch/arm/vcvt.S create mode 100644 userland/arch/arm/vcvta.S create mode 100644 userland/arch/arm/vcvtr.S create mode 100644 userland/arch/arm/vfp.S create mode 100644 userland/arch/common.h create mode 100644 userland/arch/empty.S create mode 100644 userland/arch/fail.S create mode 100644 userland/arch/main.c create mode 100644 userland/arch/x86_64/c/add.c rename userland/arch/x86_64/{ => c}/binutils_hack.c (100%) rename userland/arch/x86_64/{asm_hello.c => c/binutils_nohack.c} (73%) create mode 120000 userland/arch/x86_64/c/build create mode 120000 userland/arch/x86_64/c/freestanding/build create mode 100644 userland/arch/x86_64/c/freestanding/hello.c create mode 100644 userland/arch/x86_64/c/freestanding/hello_regvar.c create mode 100644 userland/arch/x86_64/c/inc.c create mode 100644 userland/arch/x86_64/c/scratch.c create mode 100644 userland/arch/x86_64/c/scratch_hardcode.c create mode 100644 userland/arch/x86_64/common_arch.h create mode 120000 userland/arch/x86_64/empty.S create mode 120000 userland/arch/x86_64/fail.S diff --git a/README.adoc b/README.adoc index 0a73786..2748542 100644 --- a/README.adoc +++ b/README.adoc @@ -422,7 +422,7 @@ index af583ce578..3cc341f303 100644 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, .... -Finally, rebuild Binutils, userland and test our program with <>: +Finally, rebuild Binutils, userland and test our program with <>: .... ./build-buildroot -- host-binutils-rebuild @@ -438,7 +438,7 @@ Tested on b60784d59bee993bf0de5cde6c6380dd69420dda + 1. OK, now time to hack GCC. -For convenience, let's use the <>. +For convenience, let's use the <>. If we run the program link:userland/gcc_hack.c[]: @@ -929,6 +929,115 @@ sudo rmmod hello.ko dmesg .... +=== Userland setup + +==== About the userland setup + +In order to test the kernel and emulators, userland content in the form of executables and scripts is of course required, and we store it mostly under: + +* link:userland/[] +* <> +* <> + +When we started this repository, it only contained content that interacted very closely with the kernel, or that had required performance analysis. + +However, we soon started to notice that this had an increasing overlap with other userland test repositories: we were duplicating build and test infrastructure and even some examples. + +Therefore, we decided to consolidate other userland tutorials that we had scattered around into this repository. + +Notable userland content included / moving into this repository includes: + +* <> +* <> +* <> +* <> +* <> +* https://github.com/cirosantilli/algorithm-cheat will be good to move here for performance analysis + +==== Userland setup getting started + +There are several ways to run our userland content, notably: + +* natively on the host as shown at: <> ++ +Can only run examples compatible with your host architecture and OS, but has the fastest setup and runtimes. +* from user mode simulation as shown at: <> ++ +Can run most examples, with the notable exception of examples that rely on kernel modules. +* from full system simulation as shown at: <>. ++ +This is the most reproducible and controlled environment, and all examples work there. But also the slower one to setup. + +===== Userland setup getting started natively + +With this setup, we will use the host toolchain and execute executables directly on the host. + +No installation or toolchain build is reuqired, so you can just jump straight into it. + +Build, run and example, and clean it in-tree with: + +.... +cd userland +./build +./c/hello.out +./build --clean +.... + +Source: link:userland/c/hello.c[]. + +Or build just one directory: + +.... +./build c +.... + +or just one executable: + +.... +./build c/hello +.... + +Do a more clean out of tree build and run the program instead: + +.... +./build-userland --gcc-which host --userland-build-id host +"$(./getvar --userland-build-id host userland_build_dir)/hello.out" +.... + +===== Userland setup getting started full system + +First ensure that <> is working. + +After doing that setup, you can already execute your userland programs from inside QEMU: the only missing step is how to rebuild executables and run them. + +And the answer is exactly analogous to what is shown at: <> + +For example, if we modify link:userland/c/hello.c[] to print out something different, we can just rebuild it with: + +.... +./build-userland +.... + +Source: link:build-userland[]. `./build` calls that script automatically for us when doing the initial full build. + +Now, run the program either without rebooting use the <<9p>> mount: + +.... +/mnt/9p/out_rootfs_overlay/c/hello.out +.... + +or shutdown QEMU, add the executable to the root filesystem: + +.... +./build-buildroot +.... + +reboot and use the root filesystem as usual: + +.... +/c/hello.out +.... + === Baremetal setup ==== About the baremetal setup @@ -1076,8 +1185,8 @@ But just stick to newer and better `VExpress_GEM5_V1` unless you have a good rea When doing bare metal programming, it is likely that you will want to learn assembly language basics. Have a look at these tutorials for the userland part: -* https://github.com/cirosantilli/x86-assembly-cheat -* https://github.com/cirosantilli/arm-assembly-cheat +* <> +* <> For more information on baremetal, see the section: <>. @@ -1086,14 +1195,6 @@ The following subjects are particularly important: * <> * <> -=== User mode setup - -Much like <>, this is another fun setup that does not require Buildroot or the Linux kernel. - -Getting started at: <>. - -Introduction at: <>. - [[gdb]] == GDB step debug @@ -1700,7 +1801,7 @@ since GDB does not know that libc is loaded. This is the userland debug setup most likely to work, since at init time there is only one userland executable running. -For executables from the <> such as link:userland/count.c[]: +For executables from the link:userland/[] directory such as link:userland/count.c[]: * Shell 1: + @@ -3288,7 +3389,7 @@ qw er `./run --userland` path resolution is analogous to <>. -`./build user-mode-qemu` first builds Buildroot, and then runs `./build-userland`, which is further documented at: <>. It also builds QEMU. If you ahve already done a <> previously, this will be very fast. +`./build user-mode-qemu` first builds Buildroot, and then runs `./build-userland`, which is further documented at: <>. It also builds QEMU. If you ahve already done a <> previously, this will be very fast. If you modify the userland programs, rebuild simply with: @@ -12033,6 +12134,295 @@ make CROSS_COMPILE_DIR=/usr/bin ; .... +== C + +Programs under link:userland/c/[] are examples of link:https://en.wikipedia.org/wiki/ANSI_C[ANSI C] programming. + +[[cpp]] +== C++ + +Programs under link:userland/cpp/[] are examples of link:https://en.wikipedia.org/wiki/C%2B%2B#Standardization[ISO C] programming. + +== POSIX + +Programs under link:userland/posix/[] are examples of POSIX C programming. + +What is POSIX: + +* https://stackoverflow.com/questions/1780599/what-is-the-meaning-of-posix/31865755#31865755 +* https://unix.stackexchange.com/questions/11983/what-exactly-is-posix/220877#220877 + +== x86 userland + +Programs under link:userland/arch/x86_64/[] are examples of x86 userland assembly programming. + +Those examples are progressively being moved out of: https://github.com/cirosantilli/x86-assembly-cheat + +== arm userland + +Programs under: + +* link:userland/arch/arm/[] +* link:userland/arch/aarch64/[] + +are examples of ARM userland assembly programming. + +== Android + +Remember: Android AOSP is a huge undocumented piece of bloatware. It's integration into this repo will likely never be super good. + +Verbose setup description: https://stackoverflow.com/questions/1809774/how-to-compile-the-android-aosp-kernel-and-test-it-with-the-android-emulator/48310014#48310014 + +Download, build and run with the prebuilt AOSP QEMU emulator and the AOSP kernel: + +.... +./build-android \ + --android-base-dir /path/to/your/hd \ + --android-version 8.1.0_r60 \ + download \ + build \ +; +./run-android \ + --android-base-dir /path/to/your/hd \ + --android-version 8.1.0_r60 \ +; +.... + +Sources: + +* link:build-android[] +* link:run-android[] + +TODO how to hack the AOSP kernel, userland and emulator? + +Other archs work as well as usual with `--arch` parameter. However, running in non-x86 is very slow due to the lack of KVM. + +Tested on: `8.1.0_r60`. + +=== Android image structure + +https://source.android.com/devices/bootloader/partitions-images + +The messy AOSP generates a ton of images instead of just one. + +When the emulator launches, we can see them through QEMU `-drive` arguments: + +.... +emulator: argv[21] = "-initrd" +emulator: argv[22] = "/data/aosp/8.1.0_r60/out/target/product/generic_x86_64/ramdisk.img" +emulator: argv[23] = "-drive" +emulator: argv[24] = "if=none,index=0,id=system,file=/path/to/aosp/8.1.0_r60/out/target/product/generic_x86_64/system-qemu.img,read-only" +emulator: argv[25] = "-device" +emulator: argv[26] = "virtio-blk-pci,drive=system,iothread=disk-iothread,modern-pio-notify" +emulator: argv[27] = "-drive" +emulator: argv[28] = "if=none,index=1,id=cache,file=/path/to/aosp/8.1.0_r60/out/target/product/generic_x86_64/cache.img.qcow2,overlap-check=none,cache=unsafe,l2-cache-size=1048576" +emulator: argv[29] = "-device" +emulator: argv[30] = "virtio-blk-pci,drive=cache,iothread=disk-iothread,modern-pio-notify" +emulator: argv[31] = "-drive" +emulator: argv[32] = "if=none,index=2,id=userdata,file=/path/to/aosp/8.1.0_r60/out/target/product/generic_x86_64/userdata-qemu.img.qcow2,overlap-check=none,cache=unsafe,l2-cache-size=1048576" +emulator: argv[33] = "-device" +emulator: argv[34] = "virtio-blk-pci,drive=userdata,iothread=disk-iothread,modern-pio-notify" +emulator: argv[35] = "-drive" +emulator: argv[36] = "if=none,index=3,id=encrypt,file=/path/to/aosp/8.1.0_r60/out/target/product/generic_x86_64/encryptionkey.img.qcow2,overlap-check=none,cache=unsafe,l2-cache-size=1048576" +emulator: argv[37] = "-device" +emulator: argv[38] = "virtio-blk-pci,drive=encrypt,iothread=disk-iothread,modern-pio-notify" +emulator: argv[39] = "-drive" +emulator: argv[40] = "if=none,index=4,id=vendor,file=/path/to/aosp/8.1.0_r60/out/target/product/generic_x86_64/vendor-qemu.img,read-only" +emulator: argv[41] = "-device" +emulator: argv[42] = "virtio-blk-pci,drive=vendor,iothread=disk-iothread,modern-pio-notify" +.... + +The root directory is the <> given on the QEMU CLI, which `/proc/mounts` reports at: + +.... +rootfs on / type rootfs (ro,seclabel,size=886392k,nr_inodes=221598) +.... + +This contains the <>, which through `.rc` must be mounting mounts the drives int o the right places TODO find exact point. + +The drive order is: + +.... +system +cache +userdata +encryptionkey +vendor-qemu +.... + +Then, on the terminal: + +.... +mount | grep vd +.... + +gives: + +.... +/dev/block/vda1 on /system type ext4 (ro,seclabel,relatime,data=ordered) +/dev/block/vde1 on /vendor type ext4 (ro,seclabel,relatime,data=ordered) +/dev/block/vdb on /cache type ext4 (rw,seclabel,nosuid,nodev,noatime,errors=panic,data=ordered) +.... + +and we see that the order of `vda`, `vdb`, etc. matches that in which `-drive` were given to QEMU. + +Tested on: `8.1.0_r60`. + +==== Android images read-only + +From `mount`, we can see that some of the mounted images are `ro`. + +Basically, every image that was given to QEMU as qcow2 is writable, and that qcow2 is an overlay over the actual original image. + +In order to make `/system` and `/vendor` writable by using qcow2 for them as well, we must use the `-writable-system` option: + +.... +./run-android -- -writable-system +.... + +* https://android.stackexchange.com/questions/110927/how-to-mount-system-rewritable-or-read-only-rw-ro/207200#207200 +* https://stackoverflow.com/questions/13089694/adb-remount-permission-denied-but-able-to-access-super-user-in-shell-android/43163693#43163693 + +then: + +.... +su +mount -o rw,remount /system +date >/system/a +.... + +Now reboot, and relaunch with `-writable-system` once again to pick up the modified qcow2 images: + +.... +./run-android -- -writable-system +.... + +and the newly created file is still there: + +.... +date >/system/a +.... + +`/system` and `/vendor` can be nuked quickly with: + +.... +./build-android --extra-args snod +./build-android --extra-args vnod +.... + +as mentioned at: https://stackoverflow.com/questions/29023406/how-to-just-build-android-system-image and on: + +.... +./build-android --extra-args help +.... + +Tested on: `8.1.0_r60`. + +==== Android /data partition + +When I install an app like F-Droid, it goes under `/data` according to: + +.... +find / -iname '*fdroid*' +.... + +and it <>. + +`/data` is behind a RW LVM device: + +.... +/dev/block/dm-0 on /data type ext4 (rw,seclabel,nosuid,nodev,noatime,errors=panic,data=ordered) +.... + +but TODO I can't find where it comes from since I don't have the CLI tools mentioned at: + +* https://superuser.com/questions/131519/what-is-this-dm-0-device +* https://unix.stackexchange.com/questions/185057/where-does-lvm-store-its-configuration + +However, by looking at: + +.... +./run-android -- -help +.... + +we see: + +.... +-data data image (default /userdata-qemu.img +.... + +which confirms the suspicion that this data goes in `userdata-qemu.img`. + +To reset images to their original state, just remove the qcow2 overlay and regenerate it: https://stackoverflow.com/questions/54446680/how-to-reset-the-userdata-image-when-building-android-aosp-and-running-it-on-the + +Tested on: `8.1.0_r60`. + +=== Install Android apps + +I don't know how to download files from the web on Vanilla android, the default browser does not download anything, and there is no `wget`: + +* https://android.stackexchange.com/questions/6984/how-to-download-files-from-the-web-in-the-android-browser +* https://stackoverflow.com/questions/26775079/wget-in-android-terminal + +Installing with `adb install` does however work: https://stackoverflow.com/questions/7076240/install-an-apk-file-from-command-prompt + +link:https://f-droid.org[F-Droid] installed fine like that, however it does not have permission to install apps: https://www.maketecheasier.com/install-apps-from-unknown-sources-android/ + +And the `Settings` app crashes so I can't change it, logcat contains: + +.... +No service published for: wifip2p +.... + +which is mentioned at: https://stackoverflow.com/questions/47839955/android-8-settings-app-crashes-on-emulator-with-clean-aosp-build + +We also tried to enable it from the command line with: + +.... +settings put secure install_non_market_apps 1 +.... + +as mentioned at: https://android.stackexchange.com/questions/77280/allow-unknown-sources-from-terminal-without-going-to-settings-app but it didn't work either. + +No person alive seems to know how to pre-install apps on AOSP: https://stackoverflow.com/questions/6249458/pre-installing-android-application + +Tested on: `8.1.0_r60`. + +=== Android init + +For Linux in general, see: <>. + +The `/init` executable interprets the `/init.rc` files, which is in a custom Android init system language: https://android.googlesource.com/platform/system/core/+/ee0e63f71d90537bb0570e77aa8a699cc222cfaf/init/README.md + +The top of that file then sources other `.rc` files present on the root directory: + +.... +import /init.environ.rc +import /init.usb.rc +import /init.${ro.hardware}.rc +import /vendor/etc/init/hw/init.${ro.hardware}.rc +import /init.usb.configfs.rc +import /init.${ro.zygote}.rc +.... + +TODO: how is `ro.hardware` determined? https://stackoverflow.com/questions/20572781/android-boot-where-is-the-init-hardware-rc-read-in-init-c-where-are-servic It is a system property and can be obtained with: + +.... +getprop ro.hardware +.... + +This gives: + +.... +ranchu +.... + +which is the codename for the QEMU virtual platform we are running on: https://www.oreilly.com/library/view/android-system-programming/9781787125360/9736a97c-cd09-40c3-b14d-955717648302.xhtml + +TODO: is it possible to add a custom `.rc` file without modifying the initrd that <>? https://stackoverflow.com/questions/9768103/make-persistent-changes-to-init-rc + +Tested on: `8.1.0_r60`. + == Benchmark this repo TODO: didn't fully port during refactor after 3b0a343647bed577586989fb702b760bd280844a. Reimplementing should not be hard. @@ -12316,266 +12706,6 @@ gem5: ** https://stackoverflow.com/questions/47997565/gem5-system-requirements-for-decent-performance/48941793#48941793 ** https://github.com/gem5/gem5/issues/25 -== WIP - -Big new features that are not yet working. - -=== Android - -Remember: Android AOSP is a huge undocumented piece of bloatware. It's integration into this repo will likely never be super good. - -Verbose setup description: https://stackoverflow.com/questions/1809774/how-to-compile-the-android-aosp-kernel-and-test-it-with-the-android-emulator/48310014#48310014 - -Download, build and run with the prebuilt AOSP QEMU emulator and the AOSP kernel: - -.... -./build-android \ - --android-base-dir /path/to/your/hd \ - --android-version 8.1.0_r60 \ - download \ - build \ -; -./run-android \ - --android-base-dir /path/to/your/hd \ - --android-version 8.1.0_r60 \ -; -.... - -Sources: - -* link:build-android[] -* link:run-android[] - -TODO how to hack the AOSP kernel, userland and emulator? - -Other archs work as well as usual with `--arch` parameter. However, running in non-x86 is very slow due to the lack of KVM. - -Tested on: `8.1.0_r60`. - -==== Android image structure - -https://source.android.com/devices/bootloader/partitions-images - -The messy AOSP generates a ton of images instead of just one. - -When the emulator launches, we can see them through QEMU `-drive` arguments: - -.... -emulator: argv[21] = "-initrd" -emulator: argv[22] = "/data/aosp/8.1.0_r60/out/target/product/generic_x86_64/ramdisk.img" -emulator: argv[23] = "-drive" -emulator: argv[24] = "if=none,index=0,id=system,file=/path/to/aosp/8.1.0_r60/out/target/product/generic_x86_64/system-qemu.img,read-only" -emulator: argv[25] = "-device" -emulator: argv[26] = "virtio-blk-pci,drive=system,iothread=disk-iothread,modern-pio-notify" -emulator: argv[27] = "-drive" -emulator: argv[28] = "if=none,index=1,id=cache,file=/path/to/aosp/8.1.0_r60/out/target/product/generic_x86_64/cache.img.qcow2,overlap-check=none,cache=unsafe,l2-cache-size=1048576" -emulator: argv[29] = "-device" -emulator: argv[30] = "virtio-blk-pci,drive=cache,iothread=disk-iothread,modern-pio-notify" -emulator: argv[31] = "-drive" -emulator: argv[32] = "if=none,index=2,id=userdata,file=/path/to/aosp/8.1.0_r60/out/target/product/generic_x86_64/userdata-qemu.img.qcow2,overlap-check=none,cache=unsafe,l2-cache-size=1048576" -emulator: argv[33] = "-device" -emulator: argv[34] = "virtio-blk-pci,drive=userdata,iothread=disk-iothread,modern-pio-notify" -emulator: argv[35] = "-drive" -emulator: argv[36] = "if=none,index=3,id=encrypt,file=/path/to/aosp/8.1.0_r60/out/target/product/generic_x86_64/encryptionkey.img.qcow2,overlap-check=none,cache=unsafe,l2-cache-size=1048576" -emulator: argv[37] = "-device" -emulator: argv[38] = "virtio-blk-pci,drive=encrypt,iothread=disk-iothread,modern-pio-notify" -emulator: argv[39] = "-drive" -emulator: argv[40] = "if=none,index=4,id=vendor,file=/path/to/aosp/8.1.0_r60/out/target/product/generic_x86_64/vendor-qemu.img,read-only" -emulator: argv[41] = "-device" -emulator: argv[42] = "virtio-blk-pci,drive=vendor,iothread=disk-iothread,modern-pio-notify" -.... - -The root directory is the <> given on the QEMU CLI, which `/proc/mounts` reports at: - -.... -rootfs on / type rootfs (ro,seclabel,size=886392k,nr_inodes=221598) -.... - -This contains the <>, which through `.rc` must be mounting mounts the drives int o the right places TODO find exact point. - -The drive order is: - -.... -system -cache -userdata -encryptionkey -vendor-qemu -.... - -Then, on the terminal: - -.... -mount | grep vd -.... - -gives: - -.... -/dev/block/vda1 on /system type ext4 (ro,seclabel,relatime,data=ordered) -/dev/block/vde1 on /vendor type ext4 (ro,seclabel,relatime,data=ordered) -/dev/block/vdb on /cache type ext4 (rw,seclabel,nosuid,nodev,noatime,errors=panic,data=ordered) -.... - -and we see that the order of `vda`, `vdb`, etc. matches that in which `-drive` were given to QEMU. - -Tested on: `8.1.0_r60`. - -===== Android images read-only - -From `mount`, we can see that some of the mounted images are `ro`. - -Basically, every image that was given to QEMU as qcow2 is writable, and that qcow2 is an overlay over the actual original image. - -In order to make `/system` and `/vendor` writable by using qcow2 for them as well, we must use the `-writable-system` option: - -.... -./run-android -- -writable-system -.... - -* https://android.stackexchange.com/questions/110927/how-to-mount-system-rewritable-or-read-only-rw-ro/207200#207200 -* https://stackoverflow.com/questions/13089694/adb-remount-permission-denied-but-able-to-access-super-user-in-shell-android/43163693#43163693 - -then: - -.... -su -mount -o rw,remount /system -date >/system/a -.... - -Now reboot, and relaunch with `-writable-system` once again to pick up the modified qcow2 images: - -.... -./run-android -- -writable-system -.... - -and the newly created file is still there: - -.... -date >/system/a -.... - -`/system` and `/vendor` can be nuked quickly with: - -.... -./build-android --extra-args snod -./build-android --extra-args vnod -.... - -as mentioned at: https://stackoverflow.com/questions/29023406/how-to-just-build-android-system-image and on: - -.... -./build-android --extra-args help -.... - -Tested on: `8.1.0_r60`. - -===== Android /data partition - -When I install an app like F-Droid, it goes under `/data` according to: - -.... -find / -iname '*fdroid*' -.... - -and it <>. - -`/data` is behind a RW LVM device: - -.... -/dev/block/dm-0 on /data type ext4 (rw,seclabel,nosuid,nodev,noatime,errors=panic,data=ordered) -.... - -but TODO I can't find where it comes from since I don't have the CLI tools mentioned at: - -* https://superuser.com/questions/131519/what-is-this-dm-0-device -* https://unix.stackexchange.com/questions/185057/where-does-lvm-store-its-configuration - -However, by looking at: - -.... -./run-android -- -help -.... - -we see: - -.... --data data image (default /userdata-qemu.img -.... - -which confirms the suspicion that this data goes in `userdata-qemu.img`. - -To reset images to their original state, just remove the qcow2 overlay and regenerate it: https://stackoverflow.com/questions/54446680/how-to-reset-the-userdata-image-when-building-android-aosp-and-running-it-on-the - -Tested on: `8.1.0_r60`. - -==== Install Android apps - -I don't know how to download files from the web on Vanilla android, the default browser does not download anything, and there is no `wget`: - -* https://android.stackexchange.com/questions/6984/how-to-download-files-from-the-web-in-the-android-browser -* https://stackoverflow.com/questions/26775079/wget-in-android-terminal - -Installing with `adb install` does however work: https://stackoverflow.com/questions/7076240/install-an-apk-file-from-command-prompt - -link:https://f-droid.org[F-Droid] installed fine like that, however it does not have permission to install apps: https://www.maketecheasier.com/install-apps-from-unknown-sources-android/ - -And the `Settings` app crashes so I can't change it, logcat contains: - -.... -No service published for: wifip2p -.... - -which is mentioned at: https://stackoverflow.com/questions/47839955/android-8-settings-app-crashes-on-emulator-with-clean-aosp-build - -We also tried to enable it from the command line with: - -.... -settings put secure install_non_market_apps 1 -.... - -as mentioned at: https://android.stackexchange.com/questions/77280/allow-unknown-sources-from-terminal-without-going-to-settings-app but it didn't work either. - -No person alive seems to know how to pre-install apps on AOSP: https://stackoverflow.com/questions/6249458/pre-installing-android-application - -Tested on: `8.1.0_r60`. - -=== Android init - -For Linux in general, see: <>. - -The `/init` executable interprets the `/init.rc` files, which is in a custom Android init system language: https://android.googlesource.com/platform/system/core/+/ee0e63f71d90537bb0570e77aa8a699cc222cfaf/init/README.md - -The top of that file then sources other `.rc` files present on the root directory: - -.... -import /init.environ.rc -import /init.usb.rc -import /init.${ro.hardware}.rc -import /vendor/etc/init/hw/init.${ro.hardware}.rc -import /init.usb.configfs.rc -import /init.${ro.zygote}.rc -.... - -TODO: how is `ro.hardware` determined? https://stackoverflow.com/questions/20572781/android-boot-where-is-the-init-hardware-rc-read-in-init-c-where-are-servic It is a system property and can be obtained with: - -.... -getprop ro.hardware -.... - -This gives: - -.... -ranchu -.... - -which is the codename for the QEMU virtual platform we are running on: https://www.oreilly.com/library/view/android-system-programming/9781787125360/9736a97c-cd09-40c3-b14d-955717648302.xhtml - -TODO: is it possible to add a custom `.rc` file without modifying the initrd that <>? https://stackoverflow.com/questions/9768103/make-persistent-changes-to-init-rc - -Tested on: `8.1.0_r60`. - == About this repo === Supported hosts @@ -13057,88 +13187,6 @@ link:include/[] contains headers that are shared across both kernel modules and They contain data structs and magic constant for kernel to userland communication. -==== userland directory - -Userland test programs. They can be used in the following ways: - -* inside a full system simulation, e.g.: <> -* inside <> -* directly on the host: <> - -For usage inside full system simulation, first ensure that Buildroot has been built for the toolchain, and then build the examples with: - -.... -./build-userland -.... - -Source: link:build-userland[]. - -This makes them visible immediately on the <<9p>> mount of a running simulator. - -In order to place them in the root filesystem image itself, you must also run: - -.... -./build-buildroot -.... - -===== userland directory host build - -It is possible to build and run some of the userland examples directly on your host: - -.... -cd userland -make -./hello.out -make clean -.... - -or more cleanly out of tree: - -.... -./build-userland --gcc-which host --userland-build-id host -"$(./getvar --userland-build-id host userland_build_dir)/hello.out" -.... - -Extra make flags may be passed as: - -.... -./build-userland --gcc-which host --userland-build-id host-static --make-args='-B CFLAGS_EXTRA=-static' -"$(./getvar --userland-build-id host-static userland_build_dir)/hello.out" -.... - -This for example would both force a rebuild due to `-B` and link statically due to `CFLAGS_EXTRA=-static`. - -TODO: OpenMP does not like `-static`: - -.... -/usr/lib/gcc/x86_64-linux-gnu/5/libgomp.a(target.o): In function `gomp_target_init': -(.text+0xba): warning: Using 'dlopen' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking -.... - -See: https://stackoverflow.com/questions/23869981/linking-openmp-statically-with-gcc - -===== userland cheats - -We have accumulated considerable material in the following userland subjects. - -====== C - -Programs under link:userland/c/[] are examples of link:https://en.wikipedia.org/wiki/ANSI_C[ANSI C] programming. - -[[cpp]] -====== C++ - -Programs under link:userland/cpp/[] are examples of link:https://en.wikipedia.org/wiki/C%2B%2B#Standardization[ISO C] programming. - -====== POSIX - -Programs under link:userland/posix/[] are examples of POSIX C programming. - -What is POSIX: - -* https://stackoverflow.com/questions/1780599/what-is-the-meaning-of-posix/31865755#31865755 -* https://unix.stackexchange.com/questions/11983/what-exactly-is-posix/220877#220877 - ==== buildroot_packages directory Source: link:buildroot_packages/[] @@ -13171,7 +13219,7 @@ You can force a rebuild with: ./build-buildroot --config 'BR2_PACKAGE_SAMPLE_PACKAGE=y' -- sample_package-reconfigure .... -Buildroot packages are convenient, but in general, if a package if very important to you, but not really mergeable back to Buildroot, you might want to just use a custom build script for it, and point it to the Buildroot toolchain, and then use `BR2_ROOTFS_OVERLAY`, much like we do for <>. +Buildroot packages are convenient, but in general, if a package if very important to you, but not really mergeable back to Buildroot, you might want to just use a custom build script for it, and point it to the Buildroot toolchain, and then use `BR2_ROOTFS_OVERLAY`, much like we do for <>. A custom build script can give you more flexibility: e.g. the package can be made work with other root filesystems more easily, have better <<9p>> support, and rebuild faster as it evades some Buildroot boilerplate. diff --git a/build-userland b/build-userland index a7bfed3..f01db01 100755 --- a/build-userland +++ b/build-userland @@ -2,13 +2,12 @@ import os import shlex - -import common -import threading import subprocess -from shell_helpers import LF +import threading -error = False +from shell_helpers import LF +import common +from thread_pool import ThreadPool class Main(common.BuildCliFunction): def __init__(self): @@ -32,10 +31,15 @@ allows us to build examples that rely on it. '--in-tree', default=False, help='''\ -Magic build mode tailored to build from within the source tree: - -* place build output inside soure tree to conveniently run it -* if not targets are given, build use the current working directory +Place build output inside soure tree to conveniently run it, especially when +building with the host toolchain. +''', + ) + self.add_argument( + '--target-cwd', + default=False, + help='''\ +Treat targets as relative to the current working directory. ''', ) self.add_argument( @@ -63,85 +67,76 @@ Default: build all examples that have their package dependencies met, e.g.: extra_deps=None, extra_objs=None, link=True, - raise_on_failure=True, - thread_limiter=None, ): - try: - if extra_deps is None: - extra_deps = [] - if extra_objs is None: - extra_objs = [] - if ccflags_after is None: - ccflags_after = [] - ret = 0 - if self.need_rebuild([in_path] + extra_objs + extra_deps, out_path): - ccflags = ccflags.copy() - if not link: - ccflags.extend(['-c', LF]) - in_ext = os.path.splitext(in_path)[1] - do_compile = True - if in_ext == self.env['c_ext']: - cc = self.env['gcc'] - if cstd is None: - std = self.default_cstd - else: - std = cstd - ccflags.extend([ - '-fopenmp', LF, - ]) - elif in_ext == self.env['cxx_ext']: - cc = self.env['gxx'] - if cxxstd is None: - std = self.default_cxxstd - else: - std = cxxstd + if extra_deps is None: + extra_deps = [] + if extra_objs is None: + extra_objs = [] + if ccflags_after is None: + ccflags_after = [] + ret = 0 + if self.need_rebuild([in_path] + extra_objs + extra_deps, out_path): + ccflags = ccflags.copy() + if not link: + ccflags.extend(['-c', LF]) + in_ext = os.path.splitext(in_path)[1] + do_compile = True + if in_ext in (self.env['c_ext'], self.env['asm_ext']): + cc = self.env['gcc'] + if cstd is None: + std = self.default_cstd else: - do_compile = False - if do_compile: - ret = self.sh.run_cmd( - ( - [ - cc, LF, - ] + - ccflags + - [ - '-std={}'.format(std), LF, - '-o', out_path, LF, - in_path, LF, - ] + - self.sh.add_newlines(extra_objs) + - [ - '-lm', LF, - '-pthread', LF, - ] + - ccflags_after - ), - extra_paths=[self.env['ccache_dir']], - raise_on_failure=raise_on_failure, - ) - finally: - if thread_limiter is not None: - thread_limiter.release() - if ret != 0: - self.error = True + std = cstd + ccflags.extend([ + '-fopenmp', LF, + ]) + elif in_ext == self.env['cxx_ext']: + cc = self.env['gxx'] + if cxxstd is None: + std = self.default_cxxstd + else: + std = cxxstd + else: + do_compile = False + if do_compile: + os.makedirs(os.path.dirname(out_path), exist_ok=True) + ret = self.sh.run_cmd( + ( + [ + cc, LF, + ] + + ccflags + + [ + '-std={}'.format(std), LF, + '-o', out_path, LF, + in_path, LF, + ] + + self.sh.add_newlines(extra_objs) + + [ + '-lm', LF, + '-pthread', LF, + ] + + ccflags_after + ), + extra_paths=[self.env['ccache_dir']], + ) return ret def _get_targets(self): if self.env['_args_given']['targets']: targets = self.env['targets'] - if self.env['in_tree']: + if self.env['target_cwd']: cwd = os.getcwd() targets = [os.path.join(cwd, target) for target in targets] return targets else: - if self.env['in_tree']: + if self.env['target_cwd']: return [os.getcwd()] else: return [self.env['userland_source_dir']] def build(self): build_dir = self.get_build_dir() - os.makedirs(build_dir, exist_ok=True) has_packages = set(self.env['has_package']) ccflags = [ '-I', self.env['root_dir'], LF, @@ -166,6 +161,25 @@ Default: build all examples that have their package dependencies met, e.g.: extra_deps=[self.env['common_h']], link=False, ) + common_obj_asm = os.path.join( + build_dir, + 'arch', + 'main' + self.env['obj_ext'] + ) + common_obj_asm_relpath = os.path.join( + 'arch', + 'main' + self.env['c_ext'] + ) + self._build_one( + in_path=os.path.join( + self.env['userland_source_dir'], + common_obj_asm_relpath + ), + out_path=common_obj_asm, + ccflags=ccflags, + extra_deps=[self.env['common_h']], + link=False, + ) pkgs = { 'eigen': { # TODO: was failing with: @@ -189,84 +203,136 @@ Default: build all examples that have their package dependencies met, e.g.: 'openblas': {}, } rootdir_abs_len = len(self.env['userland_source_dir']) - thread_limiter = threading.BoundedSemaphore(self.env['nproc']) - self.error = False - for target in self._get_targets(): - target = self.resolve_userland_source(target) - for path, in_dirnames, in_filenames in self.sh.walk(target): - in_dirnames.sort() - path_abs = os.path.abspath(path) - dirpath_relative_root = path_abs[rootdir_abs_len + 1:] - dirpath_relative_root_components = dirpath_relative_root.split(os.sep) - if ( - len(dirpath_relative_root_components) < 2 or - dirpath_relative_root_components[0] != 'arch' or - dirpath_relative_root_components[1] == self.env['arch'] - ): - out_dir = os.path.join( - build_dir, - dirpath_relative_root - ) - os.makedirs(out_dir, exist_ok=True) - ccflags_dir = ccflags.copy() - if dirpath_relative_root_components == ['gcc']: - cstd = 'gnu11' - cxxstd = 'gnu++17' - else: - cstd = self.default_cstd - cxxstd = self.default_cxxstd - # -pedantic complains even if we use -std=gnu11. - ccflags_dir.extend(['-pedantic', LF]) - for in_filename in in_filenames: - in_path = os.path.join(path, in_filename) - in_name, in_ext = os.path.splitext(in_filename) - out_path = os.path.join( - out_dir, - in_name + self.env['userland_build_ext'] + thread_pool = ThreadPool( + self._build_one, + nthreads=self.env['nproc'], + ) + class ExitLoop(Exception): pass + try: + for target in self._get_targets(): + target = self.resolve_userland_source(target) + for path, in_dirnames, in_filenames in self.sh.walk(target): + in_dirnames.sort() + in_filenames.sort() + path_abs = os.path.abspath(path) + dirpath_relative_root = path_abs[rootdir_abs_len + 1:] + dirpath_relative_root_components = dirpath_relative_root.split(os.sep) + dirpath_relative_root_components_len = len(dirpath_relative_root_components) + do_build_dir = True + in_arch = False + if dirpath_relative_root_components_len > 0: + if dirpath_relative_root_components[0] == 'arch': + if dirpath_relative_root_components_len > 1: + if dirpath_relative_root_components[1] == self.env['arch']: + in_arch = True + else: + do_build_dir = False + else: + do_build_dir = False + if do_build_dir: + out_dir = os.path.join( + build_dir, + dirpath_relative_root ) - pkg_key = in_name.split('_')[0] - ccflags_file = ccflags_dir.copy() - ccflags_after = [] - if pkg_key in pkgs: - if pkg_key not in has_packages: + common_objs_dir = [common_obj] + ccflags_dir = ccflags.copy() + if dirpath_relative_root_components == ['gcc']: + cstd = 'gnu11' + cxxstd = 'gnu++17' + else: + cstd = self.default_cstd + cxxstd = self.default_cxxstd + # -pedantic complains even if we use -std=gnu11. + ccflags_dir.extend(['-pedantic', LF]) + if in_arch: + ccflags_dir.extend([ + '-I', os.path.join(self.env['userland_source_arch_arch_dir']), LF, + '-I', os.path.join(self.env['userland_source_arch_dir']), LF, + '-fno-pie', LF, + '-no-pie', LF, + ]) + if 'freestanding' in dirpath_relative_root_components: + common_objs_dir = [] + ccflags_dir.extend([ + '-ffreestanding', LF, + '-nostdlib', LF, + '-static', LF, + ]) + else: + if 'c' in dirpath_relative_root_components: + common_objs_dir = [] + else: + common_objs_dir = [common_obj_asm] + if self.env['arch'] == 'arm': + ccflags_dir.extend([ + '-Xassembler', '-mcpu=cortex-a72', LF, + # To prevent: + # > vfp.S: Error: selected processor does not support in ARM mode + # https://stackoverflow.com/questions/41131432/cross-compiling-error-selected-processor-does-not-support-fmrx-r3-fpexc-in/52875732#52875732 + # We aim to take the most extended mode currently available that works on QEMU. + '-Xassembler', '-mfpu=crypto-neon-fp-armv8.1', LF, + '-Xassembler', '-meabi=5', LF, + # Treat inline assembly as arm instead of thumb + # The opposite of -mthumb. + '-marm', LF, + # Make gcc generate .syntax unified for inline assembly. + # However, it gets ignored if -marm is given, which a GCC bug that was recently fixed: + # https://stackoverflow.com/questions/54078112/how-to-write-syntax-unified-ual-armv7-inline-assembly-in-gcc/54132097#54132097 + # So we just write divided inline assembly for now. + '-masm-syntax-unified', LF, + ]) + for in_filename in in_filenames: + path_relative_root = os.path.join(dirpath_relative_root, in_filename) + if path_relative_root == common_obj_asm_relpath: continue - pkg = pkgs[pkg_key] - if 'ccflags' in pkg: - ccflags_file.extend(pkg['ccflags']) - else: - pkg_config_output = subprocess.check_output([ - self.env['buildroot_pkg_config'], - '--cflags', - pkg_key - ]).decode() - ccflags_file.extend(self.sh.shlex_split(pkg_config_output)) - if 'ccflags_after' in pkg: - ccflags_file.extend(pkg['ccflags_after']) - else: - pkg_config_output = subprocess.check_output([ - self.env['buildroot_pkg_config'], - '--libs', - pkg_key - ]).decode() - ccflags_after.extend(self.sh.shlex_split(pkg_config_output)) - thread_limiter.acquire() - if self.error: - return 1 - thread = threading.Thread( - target=self._build_one, - kwargs={ - 'in_path': in_path, - 'out_path': out_path, - 'ccflags': ccflags_file, - 'cstd': cstd, - 'cxxstd': cxxstd, - 'extra_objs': [common_obj], - 'ccflags_after': ccflags_after, - 'raise_on_failure': False, - 'thread_limiter': thread_limiter, - } - ) - thread.start() + in_path = os.path.join(path, in_filename) + in_name, in_ext = os.path.splitext(in_filename) + out_path = os.path.join( + out_dir, + in_name + self.env['userland_build_ext'] + ) + pkg_key = in_name.split('_')[0] + ccflags_file = ccflags_dir.copy() + ccflags_after = [] + if pkg_key in pkgs: + if pkg_key not in has_packages: + continue + pkg = pkgs[pkg_key] + if 'ccflags' in pkg: + ccflags_file.extend(pkg['ccflags']) + else: + pkg_config_output = subprocess.check_output([ + self.env['buildroot_pkg_config'], + '--cflags', + pkg_key + ]).decode() + ccflags_file.extend(self.sh.shlex_split(pkg_config_output)) + if 'ccflags_after' in pkg: + ccflags_file.extend(pkg['ccflags_after']) + else: + pkg_config_output = subprocess.check_output([ + self.env['buildroot_pkg_config'], + '--libs', + pkg_key + ]).decode() + ccflags_after.extend(self.sh.shlex_split(pkg_config_output)) + error = thread_pool.submit({ + 'in_path': in_path, + 'out_path': out_path, + 'ccflags': ccflags_file, + 'cstd': cstd, + 'cxxstd': cxxstd, + 'extra_objs': common_objs_dir, + 'ccflags_after': ccflags_after, + }) + if error is not None: + raise ExitLoop() + except ExitLoop: + pass + error = thread_pool.join() + if error is not None: + print(error) + return 1 self.sh.copy_dir_if_update( srcdir=build_dir, destdir=self.env['out_rootfs_overlay_dir'], @@ -277,12 +343,15 @@ Default: build all examples that have their package dependencies met, e.g.: def clean(self): if self.env['in_tree']: for target in self._get_targets(): - for path, dirnames, filenames in os.walk(target): - filenames.sort() - dirnames.sort() - for filename in filenames: - if os.path.splitext(filename)[1] in self.env['userland_out_exts']: - self.sh.rmrf(os.path.join(path, filename)) + if os.path.exists(target): + for path, dirnames, filenames in os.walk(target): + filenames.sort() + dirnames.sort() + for filename in filenames: + if os.path.splitext(filename)[1] in self.env['userland_out_exts']: + self.sh.rmrf(os.path.join(path, filename)) + else: + raise Exception('Path does not exist: ' + target) else: self.sh.rmrf(self.get_build_dir()) diff --git a/build-userland-in-tree b/build-userland-in-tree index 08d43af..b3af05d 100755 --- a/build-userland-in-tree +++ b/build-userland-in-tree @@ -1,2 +1,7 @@ #!/usr/bin/env bash -"$(git rev-parse --show-toplevel)/build-userland" --gcc-which host --in-tree "$@" +"$(git rev-parse --show-toplevel)/build-userland" \ + --gcc-which host \ + --in-tree \ + --target-cwd \ + "$@" \ +; diff --git a/common.py b/common.py index 27d8c6c..604f57c 100644 --- a/common.py +++ b/common.py @@ -56,6 +56,7 @@ consts['kernel_modules_subdir'] = 'kernel_modules' consts['kernel_modules_source_dir'] = os.path.join(consts['root_dir'], consts['kernel_modules_subdir']) consts['userland_subdir'] = 'userland' consts['userland_source_dir'] = os.path.join(consts['root_dir'], consts['userland_subdir']) +consts['userland_source_arch_dir'] = os.path.join(consts['userland_source_dir'], 'arch') consts['userland_build_ext'] = '.out' consts['include_subdir'] = 'include' consts['include_source_dir'] = os.path.join(consts['root_dir'], consts['include_subdir']) @@ -714,12 +715,15 @@ Valid emulators: {} env['initarg'] = 'init' env['quit_init'] = '{}={}'.format(env['initarg'], env['userland_quit_cmd']) + # Userland + env['userland_source_arch_arch_dir'] = join(env['userland_source_arch_dir'], env['arch']) + env['userland_build_dir'] = join(env['out_dir'], 'userland', env['userland_build_id'], env['arch']) + # Kernel modules. env['kernel_modules_build_dir'] = join(env['kernel_modules_build_base_dir'], env['arch']) env['kernel_modules_build_subdir'] = join(env['kernel_modules_build_dir'], env['kernel_modules_subdir']) env['kernel_modules_build_host_dir'] = join(env['kernel_modules_build_base_dir'], 'host') env['kernel_modules_build_host_subdir'] = join(env['kernel_modules_build_host_dir'], env['kernel_modules_subdir']) - env['userland_build_dir'] = join(env['out_dir'], 'userland', env['userland_build_id'], env['arch']) env['out_rootfs_overlay_dir'] = join(env['out_dir'], 'rootfs_overlay', env['arch']) env['out_rootfs_overlay_bin_dir'] = join(env['out_rootfs_overlay_dir'], 'bin') diff --git a/shell_helpers.py b/shell_helpers.py index 8a57f19..727dd0f 100644 --- a/shell_helpers.py +++ b/shell_helpers.py @@ -126,12 +126,12 @@ class ShellHelpers: src = os.path.join(srcdir, basename) if os.path.isfile(src): noext, ext = os.path.splitext(basename) - if filter_ext is not None and ext == filter_ext: - distutils.file_util.copy_file( - src, - os.path.join(destdir, basename), - update=1, - ) + dest = os.path.join(destdir, basename) + if ( + (filter_ext is not None and ext == filter_ext) and + (os.path.exists(dest) and os.path.getmtime(src) > os.path.getmtime(dest)) + ): + self.cp(src, dest) def copy_dir_if_update(self, srcdir, destdir, filter_ext=None): self.copy_dir_if_update_non_recursive(srcdir, destdir, filter_ext) @@ -283,7 +283,9 @@ class ShellHelpers: #signal.signal(signal.SIGPIPE, sigpipe_old) returncode = proc.returncode if returncode != 0 and raise_on_failure: - raise Exception('Command exited with status: {}'.format(returncode)) + e = Exception('Command exited with status: {}'.format(returncode)) + e.returncode = returncode + raise e return returncode else: return 0 diff --git a/thread_pool.py b/thread_pool.py new file mode 100644 index 0000000..2f4ba25 --- /dev/null +++ b/thread_pool.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 + +from typing import Any, Callable, Dict, Iterable, Union +import multiprocessing +import queue +import sys +import threading +import time + +class ThreadPool: + ''' + Start a pool of a limited number of threads to do some work. + + This is similar to the stdlib concurrent, but I could not find + how to reach all my design goals with that implementation: + + - the input function does not need to be modified + - limit the number of threads + - queue sizes closely follow number of threads + - if an exception happens, optionally stop soon afterwards + + Functional form and further discussion at: + https://stackoverflow.com/questions/19369724/the-right-way-to-limit-maximum-number-of-threads-running-at-once/55263676#55263676 + + This class form allows to use your own while loops with submit(). + + Quick test with: + + ./thread_limit.py 2 -10 20 0 + ./thread_limit.py 2 -10 20 1 + ./thread_limit.py 2 -10 20 2 + ./thread_limit.py 2 -10 20 3 + + These ensure that execution stops neatly on error. + ''' + def __init__( + self, + func: Callable, + handle_output: Union[Callable[[Any,Any,Exception],Any],None] = None, + nthreads: Union[int,None] = None + ): + ''' + Start in a thread pool immediately. + + join() must be called afterwards at some point. + + :param func: main work function to be evaluated. + :param handle_output: called on func return values as they + are returned. + + Signature is: handle_output(input, output, exception) where: + + - input: input given to func + - output: return value of func + - exception: the exception that func raised, or None otherwise + + If this function returns non-None or raises, stop feeding + new input and exit ASAP when all currently running threads + have finished. + + Default: a handler that does nothing and just exits on exception. + :param nthreads: number of threads to use. Default: nproc. + ''' + self.func = func + if handle_output is None: + handle_output = lambda input, output, exception: exception + self.handle_output = handle_output + if nthreads is None: + nthreads = multiprocessing.cpu_count() + self.nthreads = nthreads + self.error_output = None + self.error_output_lock = threading.Lock() + self.in_queue = queue.Queue(maxsize=nthreads) + self.threads = [] + for i in range(self.nthreads): + thread = threading.Thread( + target=self._func_runner, + ) + self.threads.append(thread) + thread.start() + + def submit(self, work): + ''' + Submit work. Block if there is already enough work scheduled (~nthreads). + + :return: if an error occurred in some previously executed thread, the error. + Otherwise, None. This allows the caller to stop submitting further + work if desired. + ''' + self.in_queue.put(work) + return self.error_output + + def join(self): + ''' + Request all threads to stop after they finish currently submitted work. + + :return: same as submit() + ''' + for thread in range(self.nthreads): + self.in_queue.put(None) + for thread in self.threads: + thread.join() + return self.error_output + + def _func_runner(self): + while True: + work = self.in_queue.get(block=True) + if work is None: + break + try: + exception = None + out = self.func(**work) + except Exception as e: + exception = e + try: + handle_output_return = self.handle_output(work, out, exception) + except Exception as e: + self.error_output_lock.acquire() + self.error_output = (work, out, e) + self.error_output_lock.release() + else: + if handle_output_return is not None: + self.error_output_lock.acquire() + self.error_output = handle_output_return + self.error_output_lock.release() + finally: + self.in_queue.task_done() + +if __name__ == '__main__': + def my_func(i): + ''' + The main function that will be evaluated. + + It sleeps to simulate an IO operation. + ''' + time.sleep((abs(i) % 4) / 10.0) + return 10.0 / i + + def get_work(min_, max_): + ''' + Generate simple range work for my_func. + ''' + for i in range(min_, max_): + yield {'i': i} + + def handle_output_print(input, output, exception): + ''' + Print outputs and exit immediately on failure. + ''' + print('{!r} {!r} {!r}'.format(input, output, exception)) + return exception + + def handle_output_print_no_exit(input, output, exception): + ''' + Print outputs, don't exit on failure. + ''' + print('{!r} {!r} {!r}'.format(input, output, exception)) + + out_queue = queue.Queue() + def handle_output_queue(input, output, exception): + ''' + Store outputs in a queue for later usage. + ''' + global out_queue + out_queue.put((input, output, exception)) + return exception + + def handle_output_raise(input, output, exception): + ''' + Raise if input == 10, to test that execution + stops nicely if this raises. + ''' + print('{!r} {!r} {!r}'.format(input, output, exception)) + if input['i'] == 10: + raise Exception + + # CLI arguments. + argv_len = len(sys.argv) + if argv_len > 1: + nthreads = int(sys.argv[1]) + if nthreads == 0: + nthreads = None + else: + nthreads = None + if argv_len > 2: + min_ = int(sys.argv[2]) + else: + min_ = 1 + if argv_len > 3: + max_ = int(sys.argv[3]) + else: + max_ = 100 + if argv_len > 4: + c = sys.argv[4][0] + else: + c = '0' + if c == '1': + handle_output = handle_output_print_no_exit + elif c == '2': + handle_output = handle_output_queue + elif c == '3': + handle_output = handle_output_raise + else: + handle_output = handle_output_print + + # Action. + thread_pool = ThreadPool( + my_func, + handle_output, + nthreads + ) + for work in get_work(min_, max_): + error = thread_pool.submit(work) + if error is not None: + break + error = thread_pool.join() + if error is not None: + print('error: {!r}'.format(error)) + if handle_output == handle_output_queue: + while not out_queue.empty(): + print(out_queue.get()) diff --git a/userland/arch/aarch64/add.S b/userland/arch/aarch64/add.S new file mode 100644 index 0000000..7aff26d --- /dev/null +++ b/userland/arch/aarch64/add.S @@ -0,0 +1,9 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#data-processing-instructions */ + +#include "common.h" + +ENTRY + mov x0, 1 + add x1, x0, 2 + ASSERT_EQ(x1, 3) +EXIT diff --git a/userland/arch/aarch64/adr.S b/userland/arch/aarch64/adr.S new file mode 100644 index 0000000..5f2de64 --- /dev/null +++ b/userland/arch/aarch64/adr.S @@ -0,0 +1,21 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#adr */ + +#include "common.h" + +.data +data_label: + .word 0x1234678 +ENTRY + /* This is not possible in v7 because the label is in another section. + * objdump says that this generates a R_AARCH64_ADR_PRE relocation. + * which looks specific to ADR, and therefore makes it more likely + * that there was no such relocation in v7. + * + * This relocation is particularly important because str does not have a + * pc-relative mode in ARMv8. + */ + adr x0, data_label + ldr x1, =data_label +label: + ASSERT_EQ_REG(x0, x1) +EXIT diff --git a/userland/arch/aarch64/adrp.S b/userland/arch/aarch64/adrp.S new file mode 100644 index 0000000..ffea71e --- /dev/null +++ b/userland/arch/aarch64/adrp.S @@ -0,0 +1,13 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#adr */ + +#include "common.h" + +ENTRY + adrp x0, label + adr x1, label +label: + /* Clear the lower 12 bits. */ + bic x1, x1, 0xFF + bic x1, x1, 0xF00 + ASSERT_EQ_REG(x0, x1) +EXIT diff --git a/userland/arch/aarch64/asm_hello.c b/userland/arch/aarch64/asm_hello.c deleted file mode 100644 index 8ca733a..0000000 --- a/userland/arch/aarch64/asm_hello.c +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include - -int main(void) { - uint32_t myvar = 1; - __asm__ ( - "add %[myvar], %[myvar], 1;" - : [myvar] "=r" (myvar) - : - : - ); - assert(myvar == 2); -} diff --git a/userland/arch/aarch64/beq.S b/userland/arch/aarch64/beq.S new file mode 100644 index 0000000..fa35a2e --- /dev/null +++ b/userland/arch/aarch64/beq.S @@ -0,0 +1,33 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#cbz */ + +#include "common.h" + +ENTRY + /* cbz == 0 */ + mov x0, 0 + cbz x0, 1f + FAIL +1: + + /* cbz != 0 */ + mov x0, 1 + cbz x0, 1f + b 2f +1: + FAIL +2: + + /* cbnz != 0 */ + mov x0, 1 + cbnz x0, 1f + FAIL +1: + + /* cbnz == 0 */ + mov x0, 0 + cbnz x0, 1f + b 2f +1: + FAIL +2: +EXIT diff --git a/userland/arch/aarch64/bfi.S b/userland/arch/aarch64/bfi.S new file mode 100644 index 0000000..44e7125 --- /dev/null +++ b/userland/arch/aarch64/bfi.S @@ -0,0 +1,11 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#bfi */ + +#include "common.h" + +ENTRY + ldr x0, =0x1122334455667788 + + ldr x1, =0xFFFFFFFFFFFFFFFF + bfi x1, x0, 16, 32 + ASSERT_EQ(x1, 0xFFFF55667788FFFF) +EXIT diff --git a/userland/arch/aarch64/c/asm_from_c.c b/userland/arch/aarch64/c/asm_from_c.c new file mode 100644 index 0000000..7712771 --- /dev/null +++ b/userland/arch/aarch64/c/asm_from_c.c @@ -0,0 +1,39 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#calling-convention */ + +#include +#include + +uint64_t my_asm_func(void); +/* { return 42; } */ +__asm__( + ".global my_asm_func;" + "my_asm_func:" + "mov x0, 42;" + "ret;" +); + +/* Now a more complex example that also calls a C function. + * We have to store the return value x30 for later because bl modifies it. + * https://stackoverflow.com/questions/27941220/push-lr-and-pop-lr-in-arm-arch64/34504752#34504752 + * We are not modifying any other callee saved register in this function, + * since my_c_func is not either (unless GCC has a bug ;-)), so everything else if fine. + */ +uint64_t my_asm_func_2(void); +/* { return my_c_func(); } */ +__asm__( + ".global my_asm_func_2;" + "my_asm_func_2:" + "str x30, [sp, -16]!;" + "bl my_c_func;" + "ldr x30, [sp], 16;" + "ret;" +); + +uint64_t my_c_func(void) { + return 42; +} + +int main(void) { + assert(my_asm_func() == 42); + assert(my_asm_func_2() == 42); +} diff --git a/userland/arch/aarch64/c/build b/userland/arch/aarch64/c/build new file mode 120000 index 0000000..ab18017 --- /dev/null +++ b/userland/arch/aarch64/c/build @@ -0,0 +1 @@ +../build \ No newline at end of file diff --git a/userland/arch/aarch64/c/earlyclobber.c b/userland/arch/aarch64/c/earlyclobber.c new file mode 100644 index 0000000..4501488 --- /dev/null +++ b/userland/arch/aarch64/c/earlyclobber.c @@ -0,0 +1,21 @@ +/* An example of using the '&' earlyclobber modifier. + * https://stackoverflow.com/questions/15819794/when-to-use-earlyclobber-constraint-in-extended-gcc-inline-assembly/54853663#54853663 + * The assertion may fail without it. It actually does fail in GCC 8.2.0 at + * 34017bcd0bc96a3cf77f6acba4d58350e67c2694 + 1. + */ + +#include +#include + +int main(void) { + uint64_t in = 1; + uint64_t out; + __asm__ ( + "add %[out], %[in], 1;" + "add %[out], %[in], 1;" + : [out] "=&r" (out) + : [in] "r" (in) + : + ); + assert(out == 2); +} diff --git a/userland/arch/aarch64/c/freestanding/build b/userland/arch/aarch64/c/freestanding/build new file mode 120000 index 0000000..ab18017 --- /dev/null +++ b/userland/arch/aarch64/c/freestanding/build @@ -0,0 +1 @@ +../build \ No newline at end of file diff --git a/userland/arch/aarch64/c/freestanding/hello.c b/userland/arch/aarch64/c/freestanding/hello.c new file mode 100644 index 0000000..55d6b31 --- /dev/null +++ b/userland/arch/aarch64/c/freestanding/hello.c @@ -0,0 +1,37 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#freestanding-linux-inline-assembly-system-calls */ + +#include + +void _start(void) { + uint64_t exit_status; + + /* write */ + { + char msg[] = "hello\n"; + uint64_t syscall_return; + register uint64_t x0 __asm__ ("x0") = 1; /* stdout */ + register char *x1 __asm__ ("x1") = msg; + register uint64_t x2 __asm__ ("x2") = sizeof(msg); + register uint64_t x8 __asm__ ("x8") = 64; /* syscall number */ + __asm__ __volatile__ ( + "svc 0;" + : "+r" (x0) + : "r" (x1), "r" (x2), "r" (x8) + : "memory" + ); + syscall_return = x0; + exit_status = (syscall_return != sizeof(msg)); + } + + /* exit */ + { + register uint64_t x0 __asm__ ("x0") = exit_status; + register uint64_t x8 __asm__ ("x8") = 93; + __asm__ __volatile__ ( + "svc 0;" + : "+r" (x0) + : "r" (x8) + : + ); + } +} diff --git a/userland/arch/aarch64/c/freestanding/hello_clobbers.c b/userland/arch/aarch64/c/freestanding/hello_clobbers.c new file mode 100644 index 0000000..48dd355 --- /dev/null +++ b/userland/arch/aarch64/c/freestanding/hello_clobbers.c @@ -0,0 +1,40 @@ +/* Like hello.c trying to do it without named register variables. + * The code is more complicated, and I was not able to get as efficient, + * so better just stick to named register variables. + */ + +#include + +void _start(void) { + uint64_t exit_status; + + /* write */ + { + char msg[] = "hello\n"; + uint64_t syscall_return; + __asm__ ( + "mov x0, 1;" /* stdout */ + "mov x1, %[msg];" + "mov x2, %[len];" + "mov x8, 64;" /* syscall number */ + "svc 0;" + "mov %[syscall_return], x0;" + : [syscall_return] "=r" (syscall_return) + : [msg] "p" (msg), + [len] "i" (sizeof(msg)) + : "x0", "x1", "x2", "x8", "memory" + ); + exit_status = (syscall_return != sizeof(msg)); + } + + /* exit */ + __asm__ ( + "mov x0, %[exit_status];" + "mov x8, 93;" /* syscall number */ + "svc 0;" + : + : [exit_status] "r" (exit_status) + : "x0", "x8" + ); +} + diff --git a/userland/arch/aarch64/c/inc.c b/userland/arch/aarch64/c/inc.c new file mode 100644 index 0000000..3728bc2 --- /dev/null +++ b/userland/arch/aarch64/c/inc.c @@ -0,0 +1,13 @@ +#include +#include + +int main(void) { + uint64_t io = 1; + __asm__ ( + "add %[io], %[io], 1;" + : [io] "+r" (io) + : + : + ); + assert(io == 2); +} diff --git a/userland/arch/aarch64/c/inc_float.c b/userland/arch/aarch64/c/inc_float.c new file mode 100644 index 0000000..32883cd --- /dev/null +++ b/userland/arch/aarch64/c/inc_float.c @@ -0,0 +1,28 @@ +/* https://stackoverflow.com/questions/53960240/armv8-floating-point-output-inline-assembly + * + * We use the undocumented %s and %d modifiers! + */ + +#include + +int main(void) { + float my_float = 1.5; + __asm__ ( + "fmov s0, 1.0;" + "fadd %s[my_float], %s[my_float], s0;" + : [my_float] "+w" (my_float) + : + : "s0" + ); + assert(my_float == 2.5); + + double my_double = 1.5; + __asm__ ( + "fmov d0, 1.0;" + "fadd %d[my_double], %d[my_double], d0;" + : [my_double] "+w" (my_double) + : + : "d0" + ); + assert(my_double == 2.5); +} diff --git a/userland/arch/aarch64/c/multiline.cpp b/userland/arch/aarch64/c/multiline.cpp new file mode 100644 index 0000000..f618db6 --- /dev/null +++ b/userland/arch/aarch64/c/multiline.cpp @@ -0,0 +1,18 @@ +// https://stackoverflow.com/questions/3666013/how-to-write-multiline-inline-assembly-code-in-gcc-c/54575948#54575948 + +#include +#include + +int main(void) { + uint64_t io = 0; + __asm__ ( + R"( +add %[io], %[io], #1 +add %[io], %[io], #1 +)" + : [io] "+r" (io) + : + : + ); + assert(io == 2); +} diff --git a/userland/arch/aarch64/c/reg_var.c b/userland/arch/aarch64/c/reg_var.c new file mode 100644 index 0000000..1fd5750 --- /dev/null +++ b/userland/arch/aarch64/c/reg_var.c @@ -0,0 +1,27 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#register-variables */ + +#include +#include + +int main(void) { + register uint32_t x0 __asm__ ("x0"); + register uint32_t x1 __asm__ ("x1"); + uint32_t new_x0; + uint32_t new_x1; + { + x0 = 1; + x1 = 2; + __asm__ ( + "add %[x0], x0, #1;" + "add %[x1], x1, #1;" + : [x0] "+r" (x0), + [x1] "+r" (x1) + : + : + ); + new_x0 = x0; + new_x1 = x1; + } + assert(new_x0 == 2); + assert(new_x1 == 3); +} diff --git a/userland/arch/aarch64/c/reg_var_float.c b/userland/arch/aarch64/c/reg_var_float.c new file mode 100644 index 0000000..4e79f26 --- /dev/null +++ b/userland/arch/aarch64/c/reg_var_float.c @@ -0,0 +1,28 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#register-variables */ + +#include +#include + +int main(void) { + register double d0 __asm__ ("d0"); + register double d1 __asm__ ("d1"); + double new_d0; + double new_d1; + { + d0 = 1.5; + d1 = 2.5; + __asm__ ( + "fmov d2, 1.5;" + "fadd %d[d0], d0, d2;" + "fadd %d[d1], d1, d2;" + : [d0] "+w" (d0), + [d1] "+w" (d1) + : + : "d2" + ); + new_d0 = d0; + new_d1 = d1; + } + assert(new_d0 == 3.0); + assert(new_d1 == 4.0); +} diff --git a/userland/arch/aarch64/cbz.S b/userland/arch/aarch64/cbz.S new file mode 100644 index 0000000..7db0479 --- /dev/null +++ b/userland/arch/aarch64/cbz.S @@ -0,0 +1,19 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#cbz */ + +#include "common.h" + +ENTRY + + /* Branch. */ + mov x0, 0x0 + cbz x0, ok + FAIL +ok: + + /* Don't branch. */ + mov x0, 0x1 + cbz x0, ko + +EXIT +ko: + FAIL diff --git a/userland/arch/aarch64/comments.S b/userland/arch/aarch64/comments.S new file mode 100644 index 0000000..11c9d2c --- /dev/null +++ b/userland/arch/aarch64/comments.S @@ -0,0 +1,17 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#comments */ + +#include "common.h" +ENTRY + # mycomment + + /* ARMv8 has // instead of @ as for comments. */ + // mycomment + nop // mycomment + + /* All these fail. Lol, different than v7, no consistency. */ +#if 0 + nop # mycomment + @ mycomment + nop @ mycomment +#endif +EXIT diff --git a/userland/arch/aarch64/common_arch.h b/userland/arch/aarch64/common_arch.h new file mode 100644 index 0000000..c3f47ec --- /dev/null +++ b/userland/arch/aarch64/common_arch.h @@ -0,0 +1,64 @@ +#ifndef COMMON_ARCH_H +#define COMMON_ARCH_H + +#define ASSERT_EQ(reg, const) \ + ldr x11, =const; \ + cmp reg, x11; \ + ASSERT(beq); \ +; + +#define ASSERT_MEMCMP(s1, s2, n) \ + MEMCMP(s1, s2, n); \ + ASSERT_EQ(x0, 0); \ +; + +#define ENTRY \ +.text; \ +.global asm_main; \ +asm_main: \ + sub sp, sp, 0xA0; \ + stp x29, x30, [sp]; \ + stp x27, x28, [sp, 0x10]; \ + stp x25, x26, [sp, 0x20]; \ + stp x23, x24, [sp, 0x30]; \ + stp x21, x22, [sp, 0x40]; \ + stp x19, x20, [sp, 0x50]; \ + stp x6, x7, [sp, 0x60]; \ + stp x4, x5, [sp, 0x70]; \ + stp x2, x3, [sp, 0x80]; \ + stp x0, x1, [sp, 0x90]; \ +asm_main_after_prologue: \ +; + +#define EXIT \ + mov w0, 0; \ + mov w1, 0; \ + b pass; \ +fail: \ + ldr x1, [sp, 0x90]; \ + str w0, [x1]; \ + mov w0, 1; \ +pass: \ + ldp x19, x20, [sp, 0x50]; \ + ldp x21, x22, [sp, 0x40]; \ + ldp x23, x24, [sp, 0x30]; \ + ldp x25, x26, [sp, 0x20]; \ + ldp x27, x28, [sp, 0x10]; \ + ldp x29, x30, [sp]; \ + add sp, sp, 0xA0; \ + ret; \ +; + +#define FAIL \ + ldr w0, =__LINE__; \ + b fail; \ +; + +#define MEMCMP(s1, s2, n) \ + adr x0, s1; \ + adr x1, s2; \ + ldr x2, =n; \ + bl memcmp; \ +; + +#endif diff --git a/userland/arch/aarch64/cset.S b/userland/arch/aarch64/cset.S new file mode 100644 index 0000000..0213183 --- /dev/null +++ b/userland/arch/aarch64/cset.S @@ -0,0 +1,28 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#cset */ + +#include "common.h" + +ENTRY + /* Test values. */ + mov x0, 0 + mov x1, 1 + + /* eq is true, set x2 = 1. */ + cmp x0, x0 + cset x2, eq + ASSERT_EQ(x2, 1) + + /* eq is false, set x2 = 0. */ + cmp x0, x1 + cset x2, eq + ASSERT_EQ(x2, 0) + + /* Same for ne. */ + cmp x0, x0 + cset x2, ne + ASSERT_EQ(x2, 0) + + cmp x0, x1 + cset x2, ne + ASSERT_EQ(x2, 1) +EXIT diff --git a/userland/arch/aarch64/empty.S b/userland/arch/aarch64/empty.S new file mode 120000 index 0000000..6bdf9f9 --- /dev/null +++ b/userland/arch/aarch64/empty.S @@ -0,0 +1 @@ +../empty.S \ No newline at end of file diff --git a/userland/arch/aarch64/fail.S b/userland/arch/aarch64/fail.S new file mode 120000 index 0000000..d5427d8 --- /dev/null +++ b/userland/arch/aarch64/fail.S @@ -0,0 +1 @@ +../fail.S \ No newline at end of file diff --git a/userland/arch/aarch64/floating_point.S b/userland/arch/aarch64/floating_point.S new file mode 100644 index 0000000..e6e59c3 --- /dev/null +++ b/userland/arch/aarch64/floating_point.S @@ -0,0 +1,60 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#advanced-simd-instructions */ + +#include "common.h" + +ENTRY + /* 1.5 + 2.5 == 4.0 + * using 64-bit double immediates. + */ + fmov d0, 1.5 + fmov d1, 2.5 + fadd d2, d0, d1 + fmov d3, 4.0 + /* Unlike VFP vcmp, this stores the status + * automatically in the main CPSR. + */ + fcmp d2, d3 + ASSERT(beq) + + /* Now with a memory stored value. */ +.data +my_double_0: + .double 1.5 +my_double_1: + .double 2.5 +my_double_sum_expect: + .double 4.0 +.text + ldr d0, my_double_0 + ldr d1, my_double_1 + fadd d2, d0, d1 + ldr d3, my_double_sum_expect + fcmp d2, d3 + ASSERT(beq) + + /* Now in 32-bit. */ + fmov s0, 1.5 + fmov s1, 2.5 + fadd s2, s0, s1 + fmov s3, 4.0 + fcmp s2, s3 + ASSERT(beq) + + /* TODO why? What's the point of q then? + * Error: operand mismatch -- `fmov q0,1.5' + */ +#if 0 + fmov q0, 1.5 +#endif + + /* Much like integers, immediates are constrained to + * fit in 32-byte instructions. TODO exact rules. + * + * Assembly here would fail with: + * + * Error: invalid floating-point constant at operand 2 + */ +#if 0 + fmov d0, 1.23456798 +#endif +EXIT diff --git a/userland/arch/aarch64/freestanding/build b/userland/arch/aarch64/freestanding/build new file mode 120000 index 0000000..ab18017 --- /dev/null +++ b/userland/arch/aarch64/freestanding/build @@ -0,0 +1 @@ +../build \ No newline at end of file diff --git a/userland/arch/aarch64/freestanding/hello.S b/userland/arch/aarch64/freestanding/hello.S new file mode 100644 index 0000000..d5c193a --- /dev/null +++ b/userland/arch/aarch64/freestanding/hello.S @@ -0,0 +1,20 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#linux-system-calls */ + +.text +.global _start +_start: +asm_main_after_prologue: + /* write */ + mov x0, 1 /* stdout */ + adr x1, msg /* buffer */ + ldr x2, =len /* len */ + mov x8, 64 /* syscall number */ + svc 0 + + /* exit */ + mov x0, 0 /* exit status */ + mov x8, 93 /* syscall number */ + svc 0 +msg: + .ascii "hello\n" +len = . - msg diff --git a/userland/arch/aarch64/hello_driver.S b/userland/arch/aarch64/hello_driver.S new file mode 100644 index 0000000..aeabd44 --- /dev/null +++ b/userland/arch/aarch64/hello_driver.S @@ -0,0 +1,6 @@ +.text +.global asm_main +asm_main: +asm_main_after_prologue: + mov w0, 0 + ret diff --git a/userland/arch/aarch64/immediates.S b/userland/arch/aarch64/immediates.S new file mode 100644 index 0000000..d803773 --- /dev/null +++ b/userland/arch/aarch64/immediates.S @@ -0,0 +1,9 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#immediates */ + +#include "common.h" +ENTRY + mov x0, 1 + mov x0, 0x1 + mov x0, 1 + mov x0, 0x1 +EXIT diff --git a/userland/arch/aarch64/movk.S b/userland/arch/aarch64/movk.S new file mode 100644 index 0000000..fa9151f --- /dev/null +++ b/userland/arch/aarch64/movk.S @@ -0,0 +1,26 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#movk */ + +#include "common.h" + +ENTRY + movk x0, 0x4444, lsl 0 + movk x0, 0x3333, lsl 16 + movk x0, 0x2222, lsl 32 + movk x0, 0x1111, lsl 48 + ASSERT_EQ(x0, 0x1111222233334444) + + /* Set a label (addresses are 48-bit) with immediates: + * + * * https://stackoverflow.com/questions/38570495/aarch64-relocation-prefixes + * * https://sourceware.org/binutils/docs-2.26/as/AArch64_002dRelocations.html + * + * This could be used if the label is too far away for + * adr relative addressing. + */ + movz x0, :abs_g2:label /* bits 32-47, overflow check */ + movk x0, :abs_g1_nc:label /* bits 16-31, no overflow check */ + movk x0, :abs_g0_nc:label /* bits 0-15, no overflow check */ + adr x1, label +label: + ASSERT_EQ_REG(x0, x1) +EXIT diff --git a/userland/arch/aarch64/movn.S b/userland/arch/aarch64/movn.S new file mode 100644 index 0000000..e7955e3 --- /dev/null +++ b/userland/arch/aarch64/movn.S @@ -0,0 +1,9 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#movn */ + +#include "common.h" + +ENTRY + ldr x0, =0x123456789ABCDEF0 + movn x0, 0x8888, lsl 16 + ASSERT_EQ(x0, 0xFFFFFFFF7777FFFF) +EXIT diff --git a/userland/arch/aarch64/pc.S b/userland/arch/aarch64/pc.S new file mode 100644 index 0000000..fe1b8a3 --- /dev/null +++ b/userland/arch/aarch64/pc.S @@ -0,0 +1,78 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#registers */ + +#include "common.h" + +ENTRY +#if 0 + /* Unlike v7, we can't use PC like any other register in ARMv8, + * since it is not a general purpose register anymore. + * + * Only branch instructions can modify the PC. + * + * B1.2.1 "Registers in AArch64 state" says: + * + * Software cannot write directly to the PC. It + * can only be updated on a branch, exception entry or + * exception return. + */ + ldr pc, =10f + FAIL +10: +#endif +#if 0 + mov x0, pc +#endif + + /* LDR PC-relative loads exist in ARMv8, but they have a separate encoding + * "LDR (literal)" instead of "LDR (immediate)": + * https://stackoverflow.com/questions/28638981/howto-write-pc-relative-adressing-on-arm-asm/54480999#54480999 + */ + ldr x0, pc_relative_ldr + b 1f +pc_relative_ldr: + .quad 0x123456789ABCDEF0 +1: + ASSERT_EQ(x0, 0x123456789ABCDEF0) + + /* Just for fun, we can also use relative numbers instead of labels. + * https://reverseengineering.stackexchange.com/questions/17666/how-does-the-ldr-instruction-work-on-arm/20567#20567 + */ + ldr x0, 0x8 + b 1f + .quad 0x123456789ABCDEF0 +1: + ASSERT_EQ(x0, 0x123456789ABCDEF0) + + /* Analogous for b with PC. */ + mov x0, 0 + /* Jumps over mov to ASSERT_EQ. */ + b 8 + mov x0, 1 + ASSERT_EQ(x0, 0) + + /* Trying to use the old "LDR (immediate)" PC-relative + * syntax does not work. + */ +#if 0 + /* 64-bit integer or SP register expected at operand 2 -- `ldr x0,[pc]' */ + ldr x0, [pc] +#endif + + /* There is however no analogue for str. TODO rationale? */ +#if 0 + /* Error: invalid addressing mode at operand 2 -- `str x0,pc_relative_str' */ + str x0, pc_relative_str +#endif + + /* You just have to use adr + "STR (register)". */ + ldr x0, pc_relative_str + ASSERT_EQ(x0, 0x0) + adr x1, pc_relative_str + ldr x0, pc_relative_ldr + str x0, [x1] + ldr x0, pc_relative_str + ASSERT_EQ(x0, 0x123456789ABCDEF0) +EXIT +.data +pc_relative_str: + .quad 0x0000000000000000 diff --git a/userland/arch/aarch64/regs.S b/userland/arch/aarch64/regs.S new file mode 100644 index 0000000..7334bda --- /dev/null +++ b/userland/arch/aarch64/regs.S @@ -0,0 +1,47 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#armv8-registers */ + +#include "common.h" + +ENTRY + + /* 31 64-bit eXtended general purpose registers. */ + mov x0, 0 + mov x1, 1 + mov x2, 2 + mov x3, 3 + mov x4, 4 + mov x5, 5 + mov x6, 6 + mov x7, 7 + mov x8, 8 + mov x9, 9 + mov x10, 10 + mov x11, 11 + mov x12, 12 + mov x13, 13 + mov x14, 14 + mov x15, 15 + mov x16, 16 + mov x17, 17 + mov x18, 18 + mov x19, 19 + mov x20, 20 + mov x21, 21 + mov x22, 22 + mov x23, 23 + mov x24, 24 + mov x25, 25 + mov x26, 26 + mov x27, 27 + mov x28, 28 + mov x29, 29 + + /* x30 is the link register. BL stores the return address here. */ + /*mov x30, 30*/ + + /* W form addresses the lower 4 bytes word, and zeroes the top. */ + ldr x0, =0x1111222233334444 + ldr x1, =0x5555666677778888 + mov w0, w1 + ASSERT_EQ(x0, 0x0000000077778888) +EXIT diff --git a/userland/arch/aarch64/ret.S b/userland/arch/aarch64/ret.S new file mode 100644 index 0000000..ab77e24 --- /dev/null +++ b/userland/arch/aarch64/ret.S @@ -0,0 +1,28 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#bl */ + +#include "common.h" + +ENTRY + mov x0, 1 + bl inc + ASSERT_EQ(x0, 2) + bl inc2 + ASSERT_EQ(x0, 3) + bl inc3 + ASSERT_EQ(x0, 4) +EXIT + +/* void inc(uint64_t *i) { (*i)++ } */ +inc: + add x0, x0, 1 + ret + +/* Same but explicit return register. */ +inc2: + add x0, x0, 1 + ret x30 + +/* Same but with br. */ +inc3: + add x0, x0, 1 + br x30 diff --git a/userland/arch/aarch64/simd.S b/userland/arch/aarch64/simd.S new file mode 100644 index 0000000..516cff1 --- /dev/null +++ b/userland/arch/aarch64/simd.S @@ -0,0 +1,86 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#advanced-simd-instructions */ + +#include "common.h" + +ENTRY + /* 4x 32-bit integer add. + * + * s stands for single == 32 bits. + * + * 1 in ld1 means to load just one register, see: + * https://github.com/cirosantilli/arm-assembly-cheat#simd-interleaving + */ +.data + u32_0: .word 0xF111F111, 0xF222F222, 0xF333F333, 0xF444F444 + u32_1: .word 0x15551555, 0x16661666, 0x17771777, 0x18881888 + u32_sum_expect: .word 0x06670666, 0x08890888, 0x0AAB0AAA, 0x0CCD0CCC +.bss + u32_sum: .skip 16 +.text + adr x0, u32_0 + ld1 {v0.4s}, [x0] + adr x1, u32_1 + ld1 {v1.4s}, [x1] + add v2.4s, v0.4s, v1.4s + adr x0, u32_sum + st1 {v2.4s}, [x0] + ASSERT_MEMCMP(u32_sum, u32_sum_expect, 0x10) + + /* 2x 64-bit integer add. + * + * d stands for double == 64 bits. + */ +.data + u64_0: .quad 0xF1111111F1111111, 0xF2222222F2222222 + u64_1: .quad 0x1555555515555555, 0x1666666616666666 + u64_sum_expect: .quad 0x0666666706666666, 0x0888888908888888 +.bss + u64_sum: .skip 16 +.text + adr x0, u64_0 + ld1 {v0.2d}, [x0] + adr x1, u64_1 + ld1 {v1.2d}, [x1] + add v2.2d, v0.2d, v1.2d + adr x0, u64_sum + st1 {v2.2d}, [x0] + ASSERT_MEMCMP(u64_sum, u64_sum_expect, 0x10) + + /* 4x 32-bit float add. + * + * The only difference between the integer point version + * is that we use fadd instead of add. + */ +.data + f32_0: .float 1.5, 2.5, 3.5, 4.5 + f32_1: .float 5.5, 6.5, 7.5, 8.5 + f32_sum_expect: .float 7.0, 9.0, 11.0, 13.0 +.bss + f32_sum: .skip 16 +.text + adr x0, f32_0 + ld1 {v0.4s}, [x0] + adr x1, f32_1 + ld1 {v1.4s}, [x1] + fadd v2.4s, v0.4s, v1.4s + adr x0, f32_sum + st1 {v2.4s}, [x0] + ASSERT_MEMCMP(f32_sum, f32_sum_expect, 0x10) + + /* 2x 64-bit float add. */ +.data + f64_0: .double 1.5, 2.5 + f64_1: .double 5.5, 6.5 + f64_sum_expect: .double 7.0, 9.0 +.bss + f64_sum: .skip 16 +.text + adr x0, f64_0 + ld1 {v0.2d}, [x0] + adr x1, f64_1 + ld1 {v1.2d}, [x1] + fadd v2.2d, v0.2d, v1.2d + adr x0, f64_sum + st1 {v2.2d}, [x0] + ASSERT_MEMCMP(f64_sum, f64_sum_expect, 0x10) +EXIT diff --git a/userland/arch/aarch64/simd_interleave.S b/userland/arch/aarch64/simd_interleave.S new file mode 100644 index 0000000..7ebc157 --- /dev/null +++ b/userland/arch/aarch64/simd_interleave.S @@ -0,0 +1,26 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#simd-interleaving */ + +#include "common.h" + +ENTRY +.data + u32_interleave: .word \ + 0x11111111, 0x55555555, \ + 0x22222222, 0x66666666, \ + 0x33333333, 0x77777777, \ + 0x44444444, 0x88888888 + u32_interleave_sum_expect: .word \ + 0x66666666, \ + 0x88888888, \ + 0xAAAAAAAA, \ + 0xCCCCCCCC +.bss + u32_interleave_sum: .skip 16 +.text + adr x0, u32_interleave + ld2 {v0.4s, v1.4s}, [x0] + add v2.4s, v0.4s, v1.4s + adr x0, u32_interleave_sum + st1 {v2.4s}, [x0] + ASSERT_MEMCMP(u32_interleave_sum, u32_interleave_sum_expect, 0x10) +EXIT diff --git a/userland/arch/aarch64/str.S b/userland/arch/aarch64/str.S new file mode 100644 index 0000000..ebcdf49 --- /dev/null +++ b/userland/arch/aarch64/str.S @@ -0,0 +1,13 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#armv8-str */ + +#include "common.h" + +ENTRY + ldr x0, myvar + ASSERT_EQ(x0, 0x12346789ABCDEF0) +#if 0 + /* Error: invalid addressing mode at operand 2 -- `str x0,myvar' */ + str x0, myvar +#endif +EXIT + myvar: .quad 0x12346789ABCDEF0 diff --git a/userland/arch/aarch64/ubfm.S b/userland/arch/aarch64/ubfm.S new file mode 100644 index 0000000..4821f7a --- /dev/null +++ b/userland/arch/aarch64/ubfm.S @@ -0,0 +1,17 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#ubfm */ + +#include "common.h" + +ENTRY + ldr x0, =0x1122334455667788 + + // lsr alias: imms == 63 + + ldr x1, =0xFFFFFFFFFFFFFFFF + ubfm x1, x0, 16, 63 + ASSERT_EQ(x1, 0x0000112233445566) + + ldr x1, =0xFFFFFFFFFFFFFFFF + ubfm x1, x0, 32, 63 + ASSERT_EQ(x1, 0x0000000011223344) +EXIT diff --git a/userland/arch/aarch64/ubfx.S b/userland/arch/aarch64/ubfx.S new file mode 100644 index 0000000..7a1735b --- /dev/null +++ b/userland/arch/aarch64/ubfx.S @@ -0,0 +1,15 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#ubfx */ + +#include "common.h" + +ENTRY + ldr x0, =0x1122334455667788 + + ldr x1, =0xFFFFFFFFFFFFFFFF + ubfx x1, x0, 8, 16 + ASSERT_EQ(x1, 0x0000000000006677) + + ldr x1, =0xFFFFFFFFFFFFFFFF + ubfx x1, x0, 8, 32 + ASSERT_EQ(x1, 0x0000000044556677) +EXIT diff --git a/userland/arch/aarch64/x31.S b/userland/arch/aarch64/x31.S new file mode 100644 index 0000000..c53f148 --- /dev/null +++ b/userland/arch/aarch64/x31.S @@ -0,0 +1,51 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#x31 */ + +#include "common.h" + +ENTRY + /* ERROR: can never use the name x31. */ +#if 0 + mov x31, 31 +#endif + + /* mov (register) is an alias for ORR, which accepts xzr. */ + mov x0, 1 + mov x0, xzr + ASSERT_EQ(x0, 0) + + /* Same encoding as the mov version. */ + mov x0, 1 + orr x0, xzr, xzr + ASSERT_EQ(x0, 0) + + /* So, orr, which is not an alias, can only take xzr, not sp. */ +#if 0 + orr sp, sp, sp +#endif + + /* Zero register discards result if written to. */ + mov x0, 1 + orr xzr, x0, x0 + ASSERT_EQ(xzr, 0) + + /* MOV (to/from SP) is an alias for ADD (immediate). */ + mov x0, sp + mov sp, 1 + /* Alias to add. */ + mov x1, sp + /* Exact same encoding as above. */ + add x1, sp, 0 + ASSERT_EQ(x1, 1) + mov sp, x0 + + /* So, ADD (immediate), which is not an alias, can only take sp, not xzr. */ +#if 0 + /* Error: integer register expected in the extended/shifted operand register at operand 3 -- `add xzr,xzr,1' */ + add xzr, xzr, 1 +#endif + + /* Note however that ADD (register), unlike ADD (immediate), + * does not say anything about SP, and so does accept xzr just fine. + */ + add xzr, xzr, xzr +EXIT diff --git a/userland/arch/arm/add.S b/userland/arch/arm/add.S new file mode 100644 index 0000000..1735a4e --- /dev/null +++ b/userland/arch/arm/add.S @@ -0,0 +1,58 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#data-processing-instructions */ + +#include "common.h" + +ENTRY + + /* Immediate encoding. + * + * r1 = r0 + 2 + */ + mov r0, 1 + /* r1 = r0 + 2 */ + add r1, r0, 2 + ASSERT_EQ(r1, 3) + + /* If src == dest, we can omit one of them. + * + * r0 = r0 + 2 + */ + mov r0, 1 + add r0, 2 + ASSERT_EQ(r0, 3) + + /* Same as above but explicit. */ + mov r0, 1 + add r0, r0, 2 + ASSERT_EQ(r0, 3) + +#if 0 + /* But we cannot omit the register if there is a shift when using .syntx unified: + * https://github.com/cirosantilli/arm-assembly-cheat#shift-suffixes + */ + .syntax unified + /* Error: garbage following instruction */ + add r0, r1, lsl 1 + /* OK */ + add r0, r0, r1, lsl 1 +#endif + + /* Register encoding. + * + * r2 = r0 + r1 + */ + mov r0, 1 + mov r1, 2 + add r2, r0, r1 + ASSERT_EQ(r2, 3) + + /* Register encoding, omit implicit register. + * + * r1 = r1 + r0 + */ + mov r0, 1 + mov r1, 2 + add r1, r0 + ASSERT_EQ(r1, 3) + +EXIT diff --git a/userland/arch/arm/address_modes.S b/userland/arch/arm/address_modes.S new file mode 100644 index 0000000..d263d9d --- /dev/null +++ b/userland/arch/arm/address_modes.S @@ -0,0 +1,51 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#addressing-modes */ + +#include "common.h" + +ENTRY + + /* Offset mode with immediate. Add 4 to the address register, which ends up + * reading myvar2 instead of myvar. + */ + adr r0, myvar + ldr r1, [r0, 4] + ASSERT_EQ(r1, 0x9ABCDEF0) + /* r0 was not modified. */ + ASSERT_EQ(r0, myvar) + + /* Pre-indexed mode */ + adr r0, myvar + ldr r1, [r0, 4]! + ASSERT_EQ(r1, 0x9ABCDEF0) + /* r0 was modified. */ + ASSERT_EQ(r0, myvar2) + + /* Post-indexed mode */ + adr r0, myvar + ldr r1, [r0], 4 + ASSERT_EQ(r1, 0x12345678) + /* r0 was modified. */ + ASSERT_EQ(r0, myvar2) + + /* Offset in register. */ + adr r0, myvar + mov r1, 4 + ldr r2, [r0, r1] + ASSERT_EQ(r2, 0x9ABCDEF0) + + /* Offset in shifted register: + * r2 = + * (r0 + (r1 << 1)) + * == *(myvar + (2 << 1)) + * == *(myvar + 4) + */ + adr r0, myvar + mov r1, 2 + ldr r2, [r0, r1, lsl 1] + ASSERT_EQ(r2, 0x9ABCDEF0) + +EXIT +myvar: + .word 0x12345678 +myvar2: + .word 0x9ABCDEF0 diff --git a/userland/arch/arm/adr.S b/userland/arch/arm/adr.S new file mode 100644 index 0000000..510f97f --- /dev/null +++ b/userland/arch/arm/adr.S @@ -0,0 +1,33 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#adr */ + +#include "common.h" + +.data +data_label: + .word 0x1234678 +ENTRY + adr r0, label + /* objdump tells us that this uses the literal pool, + * it does not get converted to adr, which is the better + * alternative here. + */ + adr r1, label + adrl r2, label +label: + ASSERT_EQ_REG(r0, r1) + ASSERT_EQ_REG(r0, r2) + +#if 0 + /* Error: symbol .data is in a different section. + * + * It works however in ARMv8. + * I think this means that there is no relocation type + * that takes care of this encoding in ARMv8, but there + * is one in ARMv8. + * + * If you have no idea what I'm talking about, read this: + * https://stackoverflow.com/questions/3322911/what-do-linkers-do/33690144#33690144 + */ + adr r1, data_label +#endif +EXIT diff --git a/userland/arch/arm/and.S b/userland/arch/arm/and.S new file mode 100644 index 0000000..8d4caca --- /dev/null +++ b/userland/arch/arm/and.S @@ -0,0 +1,27 @@ +/* Bitwise AND. */ + +#include "common.h" + +ENTRY + + /* 0x00 && 0xFF == 0x00 */ + mov r0, 0x00 + and r0, 0xFF + ASSERT_EQ(r0, 0x00) + + /* 0x0F && 0xF0 == 0x00 */ + mov r0, 0x0F + and r0, 0xF0 + ASSERT_EQ(r0, 0x00) + + /* 0x0F && 0xFF == 0x0F */ + mov r0, 0x0F + and r0, 0xFF + ASSERT_EQ(r0, 0x0F) + + /* 0xF0 && 0xFF == 0xF0 */ + mov r0, 0xF0 + and r0, 0xFF + ASSERT_EQ(r0, 0xF0) + +EXIT diff --git a/userland/arch/arm/b.S b/userland/arch/arm/b.S new file mode 100644 index 0000000..cc3af56 --- /dev/null +++ b/userland/arch/arm/b.S @@ -0,0 +1,9 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#b */ + +#include "common.h" +ENTRY + /* Jump over the fail. 26-bit PC-relative. */ + b ok + FAIL +ok: +EXIT diff --git a/userland/arch/arm/beq.S b/userland/arch/arm/beq.S new file mode 100644 index 0000000..16435f1 --- /dev/null +++ b/userland/arch/arm/beq.S @@ -0,0 +1,28 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#beq */ + +#include "common.h" + +ENTRY + + /* Smaller*/ + mov r0, 1 + cmp r0, 2 + ASSERT(ble) + ASSERT(blt) + ASSERT(bne) + + /* Equal. */ + mov r1, 0 + cmp r1, 0 + ASSERT(beq) + ASSERT(bge) + ASSERT(ble) + + /* Greater. */ + mov r0, 2 + cmp r0, 1 + ASSERT(bge) + ASSERT(bgt) + ASSERT(bne) + +EXIT diff --git a/userland/arch/arm/bfi.S b/userland/arch/arm/bfi.S new file mode 100644 index 0000000..d2ff08c --- /dev/null +++ b/userland/arch/arm/bfi.S @@ -0,0 +1,10 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#bfi */ + +#include "common.h" + +ENTRY + ldr r0, =0x11223344 + ldr r1, =0xFFFFFFFF + bfi r1, r0, 8, 16 + ASSERT_EQ(r1, 0xFF3344FF) +EXIT diff --git a/userland/arch/arm/bic.S b/userland/arch/arm/bic.S new file mode 100644 index 0000000..a605434 --- /dev/null +++ b/userland/arch/arm/bic.S @@ -0,0 +1,10 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#bic */ + +#include "common.h" + +ENTRY + /* 0x0F & ~0x55 == 0x0F & 0xAA == 0x0A */ + mov r0, 0x0F + bic r0, 0x55 + ASSERT_EQ(r0, 0x0A) +EXIT diff --git a/userland/arch/arm/bl.S b/userland/arch/arm/bl.S new file mode 100644 index 0000000..e8fd4d5 --- /dev/null +++ b/userland/arch/arm/bl.S @@ -0,0 +1,14 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#bl */ + +#include "common.h" + +ENTRY + mov r0, 1 + bl inc + ASSERT_EQ(r0, 2) +EXIT + +/* void inc(int *i) { (*i)++ } */ +inc: + add r0, 1 + bx lr diff --git a/userland/arch/arm/build b/userland/arch/arm/build new file mode 120000 index 0000000..ab18017 --- /dev/null +++ b/userland/arch/arm/build @@ -0,0 +1 @@ +../build \ No newline at end of file diff --git a/userland/arch/arm/c/add.c b/userland/arch/arm/c/add.c new file mode 100644 index 0000000..af52de6 --- /dev/null +++ b/userland/arch/arm/c/add.c @@ -0,0 +1,17 @@ +/* 1 + 2 == 3 */ + +#include +#include + +int main(void) { + uint32_t in0 = 1, in1 = 2, out; + __asm__ ( + "add %[out], %[in0], %[in1];" + : [out] "=r" (out) + : [in0] "r" (in0), + [in1] "r" (in1) + ); + assert(in0 == 1); + assert(in1 == 2); + assert(out == 3); +} diff --git a/userland/arch/arm/c/build b/userland/arch/arm/c/build new file mode 120000 index 0000000..ab18017 --- /dev/null +++ b/userland/arch/arm/c/build @@ -0,0 +1 @@ +../build \ No newline at end of file diff --git a/userland/arch/arm/c/freestanding/build b/userland/arch/arm/c/freestanding/build new file mode 120000 index 0000000..ab18017 --- /dev/null +++ b/userland/arch/arm/c/freestanding/build @@ -0,0 +1 @@ +../build \ No newline at end of file diff --git a/userland/arch/arm/c/freestanding/hello.c b/userland/arch/arm/c/freestanding/hello.c new file mode 100644 index 0000000..5d24a18 --- /dev/null +++ b/userland/arch/arm/c/freestanding/hello.c @@ -0,0 +1,35 @@ +#include + +void _start(void) { + uint32_t exit_status; + + /* write */ + { + char msg[] = "hello\n"; + uint32_t syscall_return; + register uint32_t r0 __asm__ ("r0") = 1; /* stdout */ + register char *r1 __asm__ ("r1") = msg; + register uint32_t r2 __asm__ ("r2") = sizeof(msg); + register uint32_t r8 __asm__ ("r7") = 4; /* syscall number */ + __asm__ __volatile__ ( + "svc 0;" + : "+r" (r0) + : "r" (r1), "r" (r2), "r" (r8) + : "memory" + ); + syscall_return = r0; + exit_status = (syscall_return != sizeof(msg)); + } + + /* exit */ + { + register uint32_t r0 __asm__ ("r0") = exit_status; + register uint32_t r7 __asm__ ("r7") = 1; + __asm__ __volatile__ ( + "svc 0;" + : "+r" (r0) + : "r" (r7) + : + ); + } +} diff --git a/userland/arch/arm/c/inc.c b/userland/arch/arm/c/inc.c new file mode 100644 index 0000000..0339da8 --- /dev/null +++ b/userland/arch/arm/c/inc.c @@ -0,0 +1,15 @@ +/* Increment a variable in inline assembly. */ + +#include +#include + +int main(void) { + uint32_t my_local_var = 1; + __asm__ ( + "add %[my_local_var], %[my_local_var], #1;" + : [my_local_var] "+r" (my_local_var) + : + : + ); + assert(my_local_var == 2); +} diff --git a/userland/arch/arm/c/inc_float.c b/userland/arch/arm/c/inc_float.c new file mode 100644 index 0000000..b50253d --- /dev/null +++ b/userland/arch/arm/c/inc_float.c @@ -0,0 +1,28 @@ +/* https://stackoverflow.com/questions/53960240/armv8-floating-point-output-inline-assembly */ + +#include + +int main(void) { + float my_float = 1.5; + __asm__ ( + "vmov s0, 1.0;" + "vadd.f32 %[my_float], %[my_float], s0;" + : [my_float] "+t" (my_float) + : + : "s0" + ); + assert(my_float == 2.5); + + /* Undocumented %P + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89482 + */ + double my_double = 1.5; + __asm__ ( + "vmov.f64 d0, 1.0;" + "vadd.f64 %P[my_double], %P[my_double], d0;" + : [my_double] "+w" (my_double) + : + : "d0" + ); + assert(my_double == 2.5); +} diff --git a/userland/arch/arm/c/inc_memory.c b/userland/arch/arm/c/inc_memory.c new file mode 100644 index 0000000..aa3c64a --- /dev/null +++ b/userland/arch/arm/c/inc_memory.c @@ -0,0 +1,32 @@ +/* Like inc.c but less good since we do more work ourselves. + * + * Just doing this to test out the "m" memory constraint. + * + * GCC 8.2.0 -O0 assembles ldr line to: + * + * .... + * ldr r0, [fp, #-12] + * .... + * + * and `-O3` assembles to: + * + * .... + * ldr r0, [sp] + * .... + */ + +#include +#include + +int main(void) { + uint32_t my_local_var = 1; + __asm__ ( + "ldr r0, %[my_local_var];" + "add r0, r0, #1;" + "str r0, %[my_local_var];" + : [my_local_var] "+m" (my_local_var) + : + : "r0" + ); + assert(my_local_var == 2); +} diff --git a/userland/arch/arm/c/inc_memory_global.c b/userland/arch/arm/c/inc_memory_global.c new file mode 100644 index 0000000..14a1ab1 --- /dev/null +++ b/userland/arch/arm/c/inc_memory_global.c @@ -0,0 +1,25 @@ +/* GCC 8.2.0 -O0 and -O3 assembles ldr line to: + * + * .... + * movw r3, # + * movt r3, # + * ldr r0, [r3] + * .... + */ + +#include +#include + +uint32_t my_global_var = 1; + +int main(void) { + __asm__ ( + "ldr r0, %[my_global_var];" + "add r0, r0, #1;" + "str r0, %[my_global_var];" + : [my_global_var] "+m" (my_global_var) + : + : "r0" + ); + assert(my_global_var == 2); +} diff --git a/userland/arch/arm/c/reg_var.c b/userland/arch/arm/c/reg_var.c new file mode 100644 index 0000000..a2367b2 --- /dev/null +++ b/userland/arch/arm/c/reg_var.c @@ -0,0 +1,38 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#register-variables */ + +#include +#include + +int main(void) { + register uint32_t r0 __asm__ ("r0"); + register uint32_t r1 __asm__ ("r1"); + uint32_t new_r0; + uint32_t new_r1; + { + /* We must set the registers immediately before calling, + * without making any function calls in between. + */ + r0 = 1; + r1 = 2; + __asm__ ( + /* We intentionally use an explicit r0 and r1 here, + * just to illustrate that we are certain that the + * r0 variable will go in r0. Real code would never do this. + */ + "add %[r0], r0, #1;" + "add %[r1], r1, #1;" + /* We have to specify r0 in the constraints.*/ + : [r0] "+r" (r0), + [r1] "+r" (r1) + : + : + ); + /* When we are done, we must immediatly assign + * the register variables to regular variables. + */ + new_r0 = r0; + new_r1 = r1; + } + assert(new_r0 == 2); + assert(new_r1 == 3); +} diff --git a/userland/arch/arm/c_from_asm.S b/userland/arch/arm/c_from_asm.S new file mode 100644 index 0000000..6f415b4 --- /dev/null +++ b/userland/arch/arm/c_from_asm.S @@ -0,0 +1,59 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#calling-convention */ + +#include "common.h" + +.data +puts_s: + .asciz "hello puts" +printf_format: + .asciz "hello printf %x\n" +my_array_0: + .word 0x11111111, 0x22222222, 0x33333333, 0x44444444 +my_array_1: + .word 0x55555555, 0x66666666, 0x77777777, 0x88888888 + +ENTRY + /* puts("hello world") */ + /* r0 is first argument. */ + ldr r0, =puts_s + bl puts + /* Check exit statut >= 0 for success. */ + cmp r0, 0 + ASSERT(bge) + + /* printf */ + ldr r0, =printf_format + ldr r1, =0x12345678 + bl printf + cmp r0, 0 + ASSERT(bge) + + /* memcpy and memcmp. */ + + /* Smaller. */ + ldr r0, =my_array_0 + ldr r1, =my_array_1 + ldr r2, =0x10 + bl memcmp + cmp r0, 0 + ASSERT(blt) + + /* Copy. */ + ldr r0, =my_array_0 + ldr r1, =my_array_1 + ldr r2, =0x10 + bl memcpy + + /* Equal. */ + ldr r0, =my_array_0 + ldr r1, =my_array_1 + ldr r2, =0x10 + bl memcmp + ASSERT_EQ(r0, 0) + + /* exit(0) */ + mov r0, 0 + bl exit + + /* Never reached, just for the fail symbol. */ +EXIT diff --git a/userland/arch/arm/clz.S b/userland/arch/arm/clz.S new file mode 100644 index 0000000..9ac5903 --- /dev/null +++ b/userland/arch/arm/clz.S @@ -0,0 +1,17 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#data-processing-instructions */ + +#include "common.h" + +ENTRY + ldr r0, =0x7FFFFFFF + clz r1, r0 + ASSERT_EQ(r1, 1) + + ldr r0, =0x3FFFFFFF + clz r1, r0 + ASSERT_EQ(r1, 2) + + ldr r0, =0x1FFFFFFF + clz r1, r0 + ASSERT_EQ(r1, 3) +EXIT diff --git a/userland/arch/arm/comments.S b/userland/arch/arm/comments.S new file mode 100644 index 0000000..2d3169b --- /dev/null +++ b/userland/arch/arm/comments.S @@ -0,0 +1,14 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#comments */ + +#include "common.h" +ENTRY + # mycomment + @ mycomment + /* # only works at the beginning of the line. + * Error: garbage following instruction -- `nop #comment' + */ +#if 0 + nop # mycomment +#endif + nop @ mycomment +EXIT diff --git a/userland/arch/arm/common_arch.h b/userland/arch/arm/common_arch.h new file mode 100644 index 0000000..c61fa57 --- /dev/null +++ b/userland/arch/arm/common_arch.h @@ -0,0 +1,71 @@ +#ifndef COMMON_ARCH_H +#define COMMON_ARCH_H + +.syntax unified + +/* Assert that a register equals a constant. + * * reg: the register to check. Can be r0-r10, but not r11. r11 is overwritten. + * * const: the constant to compare to. Only works for literals or labels, not for registers. + * For register / register comparision, use ASSERT_EQ_REG. + */ +#define ASSERT_EQ(reg, const) \ + ldr r11, =const; \ + cmp reg, r11; \ + ASSERT(beq); \ +; + +/* Assert that two arrays are the same. */ +#define ASSERT_MEMCMP(s1, s2, n) \ + MEMCMP(s1, s2, n); \ + ASSERT_EQ(r0, 0); \ +; + +/* Store all callee saved registers, and LR in case we make further BL calls. + * + * Also save the input arguments r0-r3 on the stack, so we can access them later on, + * despite those registers being overwritten. + */ +#define ENTRY \ +.text; \ +.global asm_main; \ +asm_main: \ + stmdb sp!, {r0-r12, lr}; \ +asm_main_after_prologue: \ +; + +/* Meant to be called at the end of ENTRY.* + * + * Branching to "fail" makes tests fail with exit status 1. + * + * If EXIT is reached, the program ends successfully. + * + * Restore LR and bx jump to it to return from asm_main. + */ +#define EXIT \ + mov r0, 0; \ + mov r1, 0; \ + b pass; \ +fail: \ + ldr r1, [sp]; \ + str r0, [r1]; \ + mov r0, 1; \ +pass: \ + add sp, 16; \ + ldmia sp!, {r4-r12, lr}; \ + bx lr; \ +; + +/* Always fail. */ +#define FAIL \ + ldr r0, =__LINE__; \ + b fail; \ +; + +#define MEMCMP(s1, s2, n) \ + ldr r0, =s1; \ + ldr r1, =s2; \ + ldr r2, =n; \ + bl memcmp; \ +; + +#endif diff --git a/userland/arch/arm/cond.S b/userland/arch/arm/cond.S new file mode 100644 index 0000000..b45ba7b --- /dev/null +++ b/userland/arch/arm/cond.S @@ -0,0 +1,16 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#conditional-execution */ + +#include "common.h" + +ENTRY + mov r0, 0 + mov r1, 1 + cmp r0, 1 + /* Previous cmp failed, skip this operation. */ + addeq r1, 1 + ASSERT_EQ(r1, 1) + cmp r0, 0 + /* Previous passed, do this operation. */ + addeq r1, 1 + ASSERT_EQ(r1, 2) +EXIT diff --git a/userland/arch/arm/empty.S b/userland/arch/arm/empty.S new file mode 120000 index 0000000..6bdf9f9 --- /dev/null +++ b/userland/arch/arm/empty.S @@ -0,0 +1 @@ +../empty.S \ No newline at end of file diff --git a/userland/arch/arm/fail.S b/userland/arch/arm/fail.S new file mode 120000 index 0000000..d5427d8 --- /dev/null +++ b/userland/arch/arm/fail.S @@ -0,0 +1 @@ +../fail.S \ No newline at end of file diff --git a/userland/arch/arm/freestanding/build b/userland/arch/arm/freestanding/build new file mode 120000 index 0000000..ab18017 --- /dev/null +++ b/userland/arch/arm/freestanding/build @@ -0,0 +1 @@ +../build \ No newline at end of file diff --git a/userland/arch/arm/freestanding/hello.S b/userland/arch/arm/freestanding/hello.S new file mode 100644 index 0000000..e53750c --- /dev/null +++ b/userland/arch/arm/freestanding/hello.S @@ -0,0 +1,21 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#linux-system-calls */ + +.syntax unified +.text +.global _start +_start: +asm_main_after_prologue: + /* write */ + mov r0, 1 /* stdout */ + adr r1, msg /* buffer */ + ldr r2, =len /* len */ + mov r7, 4 /* syscall number */ + svc 0 + + /* exit */ + mov r0, 0 /* exit status */ + mov r7, 1 /* syscall number */ + svc 0 +msg: + .ascii "hello\n" +len = . - msg diff --git a/userland/arch/arm/hello_driver.S b/userland/arch/arm/hello_driver.S new file mode 100644 index 0000000..2bd778d --- /dev/null +++ b/userland/arch/arm/hello_driver.S @@ -0,0 +1,23 @@ +/* Minimal example using driver. + * + * Controls the exit status of the program. + */ + +.syntax unified +.text +.global asm_main +asm_main: +asm_main_after_prologue: + + /* Set the return value according to the ARM calling convention. */ + mov r0, 0 + + /* Try some whacky value to see tests break. */ + /*mov r0, 77*/ + + /* Branch to the address at register lr. + * That is the return value which was put there by the C driver (likely with a bl). + * + * X means eXchange encoding from thumb back to ARM, which is what the driver uses. + */ + bx lr diff --git a/userland/arch/arm/immediates.S b/userland/arch/arm/immediates.S new file mode 100644 index 0000000..6abcff6 --- /dev/null +++ b/userland/arch/arm/immediates.S @@ -0,0 +1,24 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#immediates */ + +#include "common.h" + +ENTRY + /* This is the default. We hack it in common.h however. */ +.syntax divided + /* These fail. */ +#if 0 + mov r0, 1 + mov r0, 0x1 +#endif + mov r0, #1 + mov r0, #0x1 + mov r0, $1 + mov r0, $0x1 +.syntax unified + mov r0, 1 + mov r0, 0x1 + mov r0, 1 + mov r0, 0x1 + mov r0, $1 + mov r0, $0x1 +EXIT diff --git a/userland/arch/arm/inc_array.S b/userland/arch/arm/inc_array.S new file mode 100644 index 0000000..ca56cb1 --- /dev/null +++ b/userland/arch/arm/inc_array.S @@ -0,0 +1,27 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#loop-over-array */ + +#include "common.h" + +#define NELEM 4 +#define ELEM_SIZE 4 + +.data; +my_array: + .word 0x11111111, 0x22222222, 0x33333333, 0x44444444 +my_array_expect: + .word 0x11111112, 0x22222223, 0x33333334, 0x44444445 + +ENTRY + /* Increment. */ + ldr r0, =my_array + mov r1, NELEM +increment: + ldr r2, [r0] + add r2, 1 + /* Post index usage. */ + str r2, [r0], ELEM_SIZE + sub r1, 1 + cmp r1, 0 + bne increment + ASSERT_MEMCMP(my_array, my_array_expect, 0x10) +EXIT diff --git a/userland/arch/arm/ldmia.S b/userland/arch/arm/ldmia.S new file mode 100644 index 0000000..3f0c38f --- /dev/null +++ b/userland/arch/arm/ldmia.S @@ -0,0 +1,62 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#loop-over-array */ + +#include "common.h" + +#define NELEM 4 +#define ELEM_SIZE 4 + +.data; +my_array_0: + .word 0x11111111, 0x22222222, 0x33333333, 0x44444444 +my_array_1: + .word 0x55555555, 0x66666666, 0x77777777, 0x88888888 + +ENTRY + + /* Load r1, r2, r3 and r4 starting from the address in r0. Don't change r0 */ + ldr r0, =my_array_0 + ldr r1, =0 + ldr r2, =0 + ldr r3, =0 + ldr r4, =0 + ldmia r0, {r1-r4} + ASSERT_EQ(r0, my_array_0) + ASSERT_EQ(r1, 0x11111111) + ASSERT_EQ(r2, 0x22222222) + ASSERT_EQ(r3, 0x33333333) + ASSERT_EQ(r4, 0x44444444) + + /* Swapping the order of r1 and r2 on the mnemonic makes no difference to load order. + * + * But it gives an assembler warning, so we won't do it by default: + * + * ldmia.S: Assembler messages: + * ldmia.S:32: Warning: register range not in ascending order + */ +#if 0 + ldr r0, =my_array_0 + ldr r1, =0 + ldr r2, =0 + ldmia r0, {r2,r1} + ASSERT_EQ(r1, 0x11111111) + ASSERT_EQ(r2, 0x22222222) +#endif + + /* Modify the array */ + ldr r0, =my_array_1 + ldr r1, =0x55555555 + ldr r2, =0x66666666 + ldr r3, =0x77777777 + ldr r4, =0x88888888 + stmdb r0, {r1-r4} + + /* Verify that my_array_0 changed and is equal to my_array_1. */ + MEMCMP(my_array_0, my_array_1, 0x10) + ASSERT_EQ(r0, 0) + + /* Load registers and increment r0. */ + ldr r0, =my_array_0 + ldmia r0!, {r1-r4} + ASSERT_EQ(r0, my_array_1) + +EXIT diff --git a/userland/arch/arm/ldr_pseudo.S b/userland/arch/arm/ldr_pseudo.S new file mode 100644 index 0000000..eec5880 --- /dev/null +++ b/userland/arch/arm/ldr_pseudo.S @@ -0,0 +1,65 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#ldr-pseudo-instruction */ + +#include "common.h" + +ENTRY + + /* Mnemonic for a PC relative load: + * + * .... + * ldr r0, [pc, offset] + * r0 = myvar + * .... + */ + ldr r0, myvar + ASSERT_EQ(r0, 0x12345678) + + /* Mnemonic PC relative load with an offset. + * Load myvar2 instead of myvar. + */ + ldr r0, myvar + 4 + ASSERT_EQ(r0, 0x9ABCDEF0) + + /* First store the address in r0 using a magic =myvar, which creates + * a new variable containing the address and PC-relative addresses it + * https://stackoverflow.com/questions/17214962/what-is-the-difference-between-label-equals-sign-and-label-brackets-in-ar + * + * Use the adr instruction would likely be better for this application however. + * + * .... + * r0 = &myvar + * r1 = *r0 + * .... + */ + ldr r0, =myvar + ldr r1, [r0] + ASSERT_EQ(r1, 0x12345678) + + /* More efficiently, use r0 as the address to read, and write to r0 itself. */ + ldr r0, =myvar + ldr r0, [r0] + ASSERT_EQ(r0, 0x12345678) + + /* Same as =myvar but store a constant to a register. + * Can also be done with movw and movt. */ + ldr r0, =0x11112222 + ASSERT_EQ(r0, 0x11112222) + + /* We can also use GAS tolower16 and topper16 and movw and movt + * to load the address of myvar into r0 with two immediates. + * + * This results in one extra 4 byte instruction read from memory, + * and one less data read, so it is likely more cache efficient. + * + * https://sourceware.org/binutils/docs-2.19/as/ARM_002dRelocations.html + */ + movw r0, #:lower16:myvar + movt r0, #:upper16:myvar + ldr r1, [r0] + ASSERT_EQ(r1, 0x12345678) + +EXIT +myvar: + .word 0x12345678 +myvar2: + .word 0x9ABCDEF0 diff --git a/userland/arch/arm/ldrb.S b/userland/arch/arm/ldrb.S new file mode 100644 index 0000000..390981a --- /dev/null +++ b/userland/arch/arm/ldrb.S @@ -0,0 +1,12 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#ldrh-and-ldrb */ + +#include "common.h" + +ENTRY + ldr r0, =myvar + mov r1, 0x0 + ldrb r1, [r0] + ASSERT_EQ(r1, 0x00000078) +EXIT +myvar: + .word 0x12345678 diff --git a/userland/arch/arm/ldrh.S b/userland/arch/arm/ldrh.S new file mode 100644 index 0000000..386bab9 --- /dev/null +++ b/userland/arch/arm/ldrh.S @@ -0,0 +1,12 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#ldrh-and-ldrb */ + +#include "common.h" + +ENTRY + ldr r0, =myvar + mov r1, 0x0 + ldrh r1, [r0] + ASSERT_EQ(r1, 0x00005678) +EXIT +myvar: + .word 0x12345678 diff --git a/userland/arch/arm/mov.S b/userland/arch/arm/mov.S new file mode 100644 index 0000000..6ef0b69 --- /dev/null +++ b/userland/arch/arm/mov.S @@ -0,0 +1,19 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#mov */ + +#include "common.h" + +ENTRY + + /* Immediate. */ + mov r0, 0 + ASSERT_EQ(r0, 0) + mov r0, 1 + ASSERT_EQ(r0, 1) + + /* Register. */ + mov r0, 0 + mov r1, 1 + mov r1, r0 + ASSERT_EQ(r1, 0) + +EXIT diff --git a/userland/arch/arm/movw.S b/userland/arch/arm/movw.S new file mode 100644 index 0000000..212dc57 --- /dev/null +++ b/userland/arch/arm/movw.S @@ -0,0 +1,27 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#movw-and-movt */ + +#include "common.h" + +ENTRY + + /* movt (top) and movw (TODO what is w) set the higher + * and lower 16 bits of the register. + */ + movw r0, 0xFFFF + movt r0, 0x1234 + add r0, 1 + ASSERT_EQ(r0, 0x12350000) + + /* movw also zeroes out the top bits, allowing small 16-bit + * C constants to be assigned in a single instruction. + * + * It differs from mov because mov can only encode 8 bits + * at a time, while movw can encode 16. + * + * movt does not modify the lower bits however. + */ + ldr r0, =0x12345678 + movw r0, 0x1111 + ASSERT_EQ(r0, 0x00001111) + +EXIT diff --git a/userland/arch/arm/mul.S b/userland/arch/arm/mul.S new file mode 100644 index 0000000..7dcb82c --- /dev/null +++ b/userland/arch/arm/mul.S @@ -0,0 +1,12 @@ +/* Multiplication. */ + +#include "common.h" + +ENTRY + /* 2 * 3 = 6 */ + mov r0, 0 + mov r1, 2 + mov r2, 3 + mul r1, r2 + ASSERT_EQ(r1, 6) +EXIT diff --git a/userland/arch/arm/nop.S b/userland/arch/arm/nop.S new file mode 100644 index 0000000..ace3c57 --- /dev/null +++ b/userland/arch/arm/nop.S @@ -0,0 +1,32 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#nop */ + +#include "common.h" + +ENTRY + /* Disassembles as: + * + * .... + * nop {0} + * .... + * + * TODO what is the `{0}`? + */ + nop + + /* Disassembles as: + * + * .... + * nop ; (mov r0, r0) + * .... + */ + mov r0, r0 + + /* Disassemble as mov. TODO Why not as nop as in `mov r0, r0`? + * Do they have any effect? + */ + mov r1, r1 + mov r8, r8 + + /* And there are other nops as well? Disassembles as `and`. */ + and r0, r0, r0 +EXIT diff --git a/userland/arch/arm/push.S b/userland/arch/arm/push.S new file mode 100644 index 0000000..d336a7d --- /dev/null +++ b/userland/arch/arm/push.S @@ -0,0 +1,31 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#ldmia */ + +#include "common.h" + +ENTRY + + /* Save sp before push. */ + mov r0, sp + + /* Push. */ + mov r1, 1 + mov r2, 2 + push {r1, r2} + + /* Save sp after push. */ + mov r1, sp + + /* Restore. */ + mov r3, 0 + mov r4, 0 + pop {r3, r4} + ASSERT_EQ(r3, 1) + ASSERT_EQ(r4, 2) + + /* Check that stack pointer moved down by 8 bytes + * (2 registers x 4 bytes each). + */ + sub r0, r1 + ASSERT_EQ(r0, 8) + +EXIT diff --git a/userland/arch/arm/rbit.S b/userland/arch/arm/rbit.S new file mode 100644 index 0000000..d6cf245 --- /dev/null +++ b/userland/arch/arm/rbit.S @@ -0,0 +1,9 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#rbit */ + +#include "common.h" + +ENTRY + ldr r0, =0b00000001001000110100010101100101 + rbit r1, r0 + ASSERT_EQ(r1, 0b10100110101000101100010010000000) +EXIT diff --git a/userland/arch/arm/regs.S b/userland/arch/arm/regs.S new file mode 100644 index 0000000..2a15b84 --- /dev/null +++ b/userland/arch/arm/regs.S @@ -0,0 +1,69 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#registers */ + +#include "common.h" + +ENTRY + + /* 13 general purpose registers. */ + mov r0, 0 + mov r1, 1 + mov r2, 2 + mov r3, 3 + mov r4, 4 + mov r5, 5 + mov r6, 6 + mov r7, 7 + mov r8, 8 + mov r9, 9 + mov r10, 10 + mov r11, 11 + mov r12, 12 + + /* * r11: aliased to FP (frame pointer, debug stack trace usage only) + * + + * I think FP is only a convention with no instruction impact, but TODO: + * not mentioned on AAPCS. aarch64 AAPCS mentions it though. + * * r13: aliased to SP (stack pointer), what push / pop use + * * r14: aliased to LR (link register), what bl writes the return address to + * * r15: aliased to PC (program counter), contains the current instruction address + * + * In ARMv8, SP and PC have dedicated registers in addition to + * the 32-general purpose ones. LR is still general purpose as before. + * + * Therefore, it is possible to use those registers in any place + * other registers may be used. + * + * This is not possible in ARMv8 anymore. + * + * For example, we can load an address into PC, which is very similar to what B / BX does: + * https://stackoverflow.com/questions/32304646/arm-assembly-branch-to-address-inside-register-or-memory/54145818#54145818 + */ + ldr pc, =10f + FAIL +10: + + /* Same with r15, which is the same as pc. */ + ldr r15, =10f + FAIL +10: + + /* Another example with mov reading from pc. */ +pc_addr: + mov r0, pc + /* Why sub 8: + * https://stackoverflow.com/questions/24091566/why-does-the-arm-pc-register-point-to-the-instruction-after-the-next-one-to-be-e + */ + sub r0, r0, 8 + + /* pc-relative load also just work just like any other register. */ + ldr r0, [pc] + b 1f + .word 0x12345678 +1: + ASSERT_EQ(r0, 0x12345678) + + /* We can also use fp in GNU GAS assembly. */ + mov r11, 0 + mov fp, 1 + ASSERT_EQ(r11, 1) +EXIT diff --git a/userland/arch/arm/rev.S b/userland/arch/arm/rev.S new file mode 100644 index 0000000..ee491e4 --- /dev/null +++ b/userland/arch/arm/rev.S @@ -0,0 +1,15 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#data-processing-instructions */ + +#include "common.h" + +ENTRY + /* All bytes in register. */ + ldr r0, =0x11223344 + rev r1, r0 + ASSERT_EQ(r1, 0x44332211) + + /* Groups of 16-bits. */ + ldr r0, =0x11223344 + rev16 r1, r0 + ASSERT_EQ(r1, 0x22114433) +EXIT diff --git a/userland/arch/arm/s_suffix.S b/userland/arch/arm/s_suffix.S new file mode 100644 index 0000000..bc2cc13 --- /dev/null +++ b/userland/arch/arm/s_suffix.S @@ -0,0 +1,35 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#s-suffix */ + +#include "common.h" + +ENTRY + + /* Result is 0, set beq. */ + movs r0, 0 + ASSERT(beq) + + /* The opposite. */ + movs r0, 1 + ASSERT(bne) + + /* mov without s does not set the status. */ + movs r0, 0 + mov r0, 1 + ASSERT(beq) + + /* movs still moves... */ + mov r0, 0 + movs r0, 1 + ASSERT_EQ(r0, 1) + + /* add: the result is 0. */ + mov r0, 1 + adds r0, -1 + ASSERT(beq) + + /* add: result non 0. */ + mov r0, 1 + adds r0, 1 + ASSERT(bne) + +EXIT diff --git a/userland/arch/arm/shift.S b/userland/arch/arm/shift.S new file mode 100644 index 0000000..d85e727 --- /dev/null +++ b/userland/arch/arm/shift.S @@ -0,0 +1,79 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#shift-suffixes */ + +#include "common.h" + +ENTRY + + /* lsr */ + ldr r0, =0xFFF00FFF + mov r1, r0, lsl 8 + ldr r2, =0xF00FFF00 + ASSERT_EQ_REG(r1, r2) + + /* lsl */ + ldr r0, =0xFFF00FFF + mov r1, r0, lsr 8 + ldr r2, =0x00FFF00F + ASSERT_EQ_REG(r1, r2) + + /* ror */ + ldr r0, =0xFFF00FFF + mov r1, r0, ror 8 + ldr r2, =0xFFFFF00F + ASSERT_EQ_REG(r1, r2) + + /* asr negative */ + ldr r0, =0x80000008 + mov r1, r0, asr 1 + ldr r2, =0xC0000004 + ASSERT_EQ_REG(r1, r2) + + /* asr positive */ + ldr r0, =0x40000008 + mov r1, r0, asr 1 + ldr r2, =0x20000004 + ASSERT_EQ_REG(r1, r2) + + /* There are also direct shift mnemonics for the mov shifts. + * + * They assembly to the exact same bytes as the mov version + */ + ldr r0, =0xFFF00FFF + lsl r1, r0, 8 + ldr r2, =0xF00FFF00 + ASSERT_EQ_REG(r1, r2) + + /* If used with the `mov` instruction, it results in a pure shift, + * but the suffixes also exist for all the other data processing instructions. + * + * Here we illustrate a shifted add instruction which calculates: + * + * .... + * r1 = r1 + (r0 << 1) + * .... + */ + ldr r0, =0x10 + ldr r1, =0x100 + add r1, r1, r0, lsl 1 + ldr r2, =0x00000120 + ASSERT_EQ_REG(r1, r2) + + /* The shift takes up the same encoding slot as the immediate, + * therefore it is not possible to both use an immediate and shift. + * + * Error: shift expression expected -- `add r1,r0,1,lsl#1' + */ +#if 0 + add r1, r0, 1, lsl 1 +#endif + + /* However, you can still encode shifted bitmasks of + * limited width in immediates, so why not just use the + * assembler pre-processing for it? + */ + ldr r1, =0x100 + add r1, r1, (0x10 << 1) + ldr r2, =0x00000120 + ASSERT_EQ_REG(r1, r2) + +EXIT diff --git a/userland/arch/arm/simd.S b/userland/arch/arm/simd.S new file mode 100644 index 0000000..ddec03d --- /dev/null +++ b/userland/arch/arm/simd.S @@ -0,0 +1,113 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#advanced-simd-instructions */ + +#include "common.h" + +ENTRY + /* vadd.u32 + * + * Add 4x 32-bit unsigned integers in one go. + * + * q means 128-bits. + * + * u32 means that we treat memory as uint32_t types. + * + * 4 is deduced: in 128 bits you can fit 4 u32. + * + * Observe how the carry is propagated within u32 integers, + * but not across them. + */ +.data + u32_0: .word 0xF111F111, 0xF222F222, 0xF333F333, 0xF444F444 + u32_1: .word 0x15551555, 0x16661666, 0x17771777, 0x18881888 + u32_sum_expect: .word 0x06670666, 0x08890888, 0x0AAB0AAA, 0x0CCD0CCC +.bss + u32_sum: .skip 0x10 +.text + ldr r0, =u32_0 + vld1.32 {q0}, [r0] + ldr r0, =u32_1 + vld1.32 {q1}, [r0] + vadd.u32 q2, q0, q1 + ldr r0, =u32_sum + vst1.u32 {q2}, [r0] + ASSERT_MEMCMP(u32_sum, u32_sum_expect, 0x10) + + /* vadd.u64: 2x 64-bit unsigned integer add. */ +.data + u64_0: .quad 0xF1111111F1111111, 0xF2222222F2222222 + u64_1: .quad 0x1555555515555555, 0x1666666616666666 + u64_sum_expect: .quad 0x0666666706666666, 0x0888888908888888 +.bss + u64_sum: .skip 0x10 +.text + ldr r0, =u64_0 + vld1.64 {q0}, [r0] + ldr r0, =u64_1 + vld1.64 {q1}, [r0] + vadd.u64 q2, q0, q1 + ldr r0, =u64_sum + vst1.u64 {q2}, [r0] + ASSERT_MEMCMP(u64_sum, u64_sum_expect, 0x10) + + /* vadd.s64: 2x 64-bit signed integer add. TODO: how to differentiate + * it from signed? I think signed and unsigned addition are identical + * in two's complement, the only difference is overflow / carry detection + * flags. But how do flags work when there are many values being added + * at once? + */ +.data + s64_0: .quad -1, -2 + s64_1: .quad -1, -2 + s64_sum_expect: .quad -2, -4 +.bss + s64_sum: .skip 0x10 +.text + ldr r0, =s64_0 + vld1.64 {q0}, [r0] + ldr r0, =s64_1 + vld1.64 {q1}, [r0] + vadd.s64 q2, q0, q1 + ldr r0, =s64_sum + vst1.s64 {q2}, [r0] + ASSERT_MEMCMP(s64_sum, s64_sum_expect, 0x10) + + /* vadd.f32: 4x 32-bit float add. */ +.data + f32_0: .float 1.5, 2.5, 3.5, 4.5 + f32_1: .float 5.5, 6.5, 7.5, 8.5 + f32_sum_expect: .float 7.0, 9.0, 11.0, 13.0 +.bss + f32_sum: .skip 0x10 +.text + ldr r0, =f32_0 + vld1.32 {q0}, [r0] + ldr r0, =f32_1 + vld1.32 {q1}, [r0] + vadd.f32 q2, q0, q1 + ldr r0, =f32_sum + vst1.32 {q2}, [r0] + ASSERT_MEMCMP(f32_sum, f32_sum_expect, 0x10) + + /* vadd.f64: 2x 64-bit float add: appears not possible. + * + * https://stackoverflow.com/questions/36052564/does-arm-support-simd-operations-for-64-bit-floating-point-numbers + */ +.data + f64_0: .double 1.5, 2.5 + f64_1: .double 5.5, 6.5 + f64_sum_expect: .double 7.0, 9.0 +.bss + f64_sum: .skip 0x10 +.text + ldr r0, =f64_0 + vld1.64 {q0}, [r0] + ldr r0, =f64_1 + vld1.64 {q1}, [r0] +#if 0 + /* bad type in Neon instruction -- `vadd.f64 q2,q0,q1' */ + vadd.f64 q2, q0, q1 + ldr r0, =f64_sum + vst1.64 {q2}, [r0] + ASSERT_MEMCMP(f64_sum, f64_sum_expect, 0x10) +#endif +EXIT diff --git a/userland/arch/arm/str.S b/userland/arch/arm/str.S new file mode 100644 index 0000000..0543292 --- /dev/null +++ b/userland/arch/arm/str.S @@ -0,0 +1,60 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#load-and-store-instructions */ + +#include "common.h" + +.data; + /* Must be in the .data section, since we want to modify it. */ +myvar: + .word 0x12345678 + +ENTRY + /* r0 will contain the address. */ + ldr r0, =myvar + + /* Sanity check. */ + ldr r1, [r0] + movw r2, 0x5678 + movt r2, 0x1234 + ASSERT_EQ_REG(r1, r2) + + /* Modify the value. */ + movw r1, 0xDEF0 + movt r1, 0x9ABC + str r1, [r0] + + /* Check that it changed. */ + ldr r1, [r0] + movw r2, 0xDEF0 + movt r2, 0x9ABC + ASSERT_EQ_REG(r1, r2) + + /* Cannot use PC relative addressing to a different segment, + * or else it fails with: + * + * .... + * Error: internal_relocation (type: OFFSET_IMM) not fixed up + * .... + * + * https://stackoverflow.com/questions/10094282/internal-relocation-not-fixed-up + */ + /*ldr r0, myvar*/ + +#if 0 + /* We could in theory write this to set the address of myvar, + * but it will always segfault under Linux because the text segment is read-only. + * This is however useful in baremetal programming. + * This construct is not possible in ARMv8 for str: + * https://github.com/cirosantilli/arm-assembly-cheat#armv8-str + */ + str r1, var_in_same_section +var_in_same_section: +#endif + + /* = sign just doesn't make sense for str, you can't set the + * address of a variable. + */ +#if 0 + str r1, =myvar +#endif + +EXIT diff --git a/userland/arch/arm/sub.S b/userland/arch/arm/sub.S new file mode 100644 index 0000000..e3fb48c --- /dev/null +++ b/userland/arch/arm/sub.S @@ -0,0 +1,11 @@ +/* Subtraction. */ + +#include "common.h" + +ENTRY + /* 3 - 2 == 1 , register version.*/ + mov r0, 3 + mov r1, 2 + sub r0, r0, r1 + ASSERT_EQ(r0, 1) +EXIT diff --git a/userland/arch/arm/thumb.S b/userland/arch/arm/thumb.S new file mode 100644 index 0000000..e055eda --- /dev/null +++ b/userland/arch/arm/thumb.S @@ -0,0 +1,17 @@ +/* Illustrates features that are only available in thumb. */ + +.syntax unified +.text +.thumb_func +.global asm_main +asm_main: +asm_main_after_prologue: + + /* CBZ: cmp and branch if zero instruction. Equivalent to CMP + BEQ. + * TODO create an interesting assertion here. + */ + cbz r1, 1f + 1: + + mov r0, 0 + bx lr diff --git a/userland/arch/arm/tst.S b/userland/arch/arm/tst.S new file mode 100644 index 0000000..831243f --- /dev/null +++ b/userland/arch/arm/tst.S @@ -0,0 +1,19 @@ +/* Test. Same as ands, but don't store the result, just update flags. */ + +#include "common.h" + +ENTRY + + /* 0x0F && 0xF0 == 0x00, so beq. */ + mov r0, 0x0F + tst r0, 0xF0 + ASSERT(beq) + + /* bne */ + mov r0, 0xFF + tst r0, 0x0F + ASSERT(bne) + # r0 was not modified. + ASSERT_EQ(r0, 0xFF) + +EXIT diff --git a/userland/arch/arm/vcvt.S b/userland/arch/arm/vcvt.S new file mode 100644 index 0000000..248069d --- /dev/null +++ b/userland/arch/arm/vcvt.S @@ -0,0 +1,90 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#vcvt */ + +#include "common.h" + +ENTRY + /* SIMD positive. */ +.data + vcvt_positive_0: .float 1.25, 2.5, 3.75, 4.0 + vcvt_positive_expect: .word 1, 2, 3, 4 +.bss + vcvt_positive_result: .skip 0x10 +.text + ldr r0, =vcvt_positive_0 + vld1.32 {q0}, [r0] + vcvt.u32.f32 q1, q0 + ldr r0, =vcvt_positive_result + vst1.32 {q1}, [r0] + ASSERT_MEMCMP(vcvt_positive_result, vcvt_positive_expect, 0x10) + + /* SIMD negative. */ +.data + vcvt_negative_0: .float -1.25, -2.5, -3.75, -4.0 + vcvt_negative_expect: .word -1, -2, -3, -4 +.bss + vcvt_negative_result: .skip 0x10 +.text + ldr r0, =vcvt_negative_0 + vld1.32 {q0}, [r0] + vcvt.s32.f32 q1, q0 + ldr r0, =vcvt_negative_result + vst1.32 {q1}, [r0] + ASSERT_MEMCMP(vcvt_negative_result, vcvt_negative_expect, 0x10) + + /* Floating point. */ +.data + vcvt_positive_float_0: .float 1.5, 2.5 + vcvt_positive_float_expect: .word 1 + .float 2.5 +.bss + vcvt_positive_float_result: .skip 0x8 +.text + ldr r0, =vcvt_positive_float_0 + vld1.32 {d0}, [r0] + vcvt.u32.f32 s0, s0 + ldr r0, =vcvt_positive_float_result + vst1.32 {d0}, [r0] + ASSERT_MEMCMP(vcvt_positive_float_result, vcvt_positive_float_expect, 0x8) + + /* Floating point but with immediates. + * + * You have to worry of course about representability of + * the immediate in 4 bytes, which is even more fun for + * floating point numbers :-) + * + * Doing this mostly to illustrate the joys of vmov.i32. + * + * For some reason, there is no vmov.i32 sn, only dn. + * If you try to use sn, it does the same as .f32 and + * stores a float instead. Horrible! + */ + vmov.f32 d0, 1.5 + vcvt.u32.f32 s0, s0 + vmov.i32 d1, 1 + vcmp.f32 s0, s2 + vmrs apsr_nzcv, fpscr + ASSERT(beq) + /* Check that s1 wasn't modified by vcvt. */ + vmov.f32 s2, 1.5 + vcmp.f32 s1, s2 + vmrs apsr_nzcv, fpscr + ASSERT(beq) + + /* Floating point double precision. */ +.data + vcvt_positive_double_0: .double 1.5 + vcvt_positive_double_expect: .word 1 +.bss + vcvt_positive_double_result: .skip 0x8 +.text + ldr r0, =vcvt_positive_double_0 + vld1.64 {d0}, [r0] + vcvt.u32.f64 s0, d0 + ldr r0, =vcvt_positive_double_result + vst1.32 {d0}, [r0] + ASSERT_MEMCMP( + vcvt_positive_double_result, + vcvt_positive_double_expect, + 0x4 + ) +EXIT diff --git a/userland/arch/arm/vcvta.S b/userland/arch/arm/vcvta.S new file mode 100644 index 0000000..26705a3 --- /dev/null +++ b/userland/arch/arm/vcvta.S @@ -0,0 +1,41 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#vcvta */ + +#include "common.h" + +ENTRY + /* SIMD positive. */ +.data + vcvta_positive_0: .float 1.25, 2.5, 3.75, 4.0 + vcvta_positive_expect: .word 1, 3, 4, 4 +.bss + vcvta_positive_result: .skip 0x10 +.text + ldr r0, =vcvta_positive_0 + vld1.32 {q0}, [r0] + vcvta.u32.f32 q1, q0 + ldr r0, =vcvta_positive_result + vst1.32 {q1}, [r0] + ASSERT_MEMCMP( + vcvta_positive_result, + vcvta_positive_expect, + 0x10 + ) + + /* SIMD negative. */ +.data + vcvta_negative_0: .float -1.25, -2.5, -3.75, -4.0 + vcvta_negative_expect: .word -1, -3, -4, -4 +.bss + vcvta_negative_result: .skip 0x10 +.text + ldr r0, =vcvta_negative_0 + vld1.32 {q0}, [r0] + vcvta.s32.f32 q1, q0 + ldr r0, =vcvta_negative_result + vst1.32 {q1}, [r0] + ASSERT_MEMCMP( + vcvta_negative_result, + vcvta_negative_expect, + 0x10 + ) +EXIT diff --git a/userland/arch/arm/vcvtr.S b/userland/arch/arm/vcvtr.S new file mode 100644 index 0000000..b590d39 --- /dev/null +++ b/userland/arch/arm/vcvtr.S @@ -0,0 +1,46 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#vcvtrr */ + +#include "common.h" + +ENTRY +.data + vcvtr_0: .float 1.25, 2.5, 3.75, 4.0 + vcvtr_expect_zero: .word 1, 2, 3, 4 + vcvtr_expect_plus_infinity: .word 2, 3, 4, 4 +.bss + vcvtr_result_zero: .skip 0x10 + vcvtr_result_plus_infinity: .skip 0x10 +.text + ldr r0, =vcvtr_0 + vld1.32 {q0}, [r0] + + /* zero */ + vmrs r0, fpscr + orr r0, r0, (3 << 22) + vmsr fpscr, r0 + vcvtr.u32.f32 q1, q0 + ldr r0, =vcvtr_result_zero + vst1.32 {q1}, [r0] + ASSERT_MEMCMP( + vcvtr_result_zero, + vcvtr_expect_zero, + 0x10 + ) + +#if 0 + /* TODO why is this not working? Rounds to zero still. */ + /* plus infinity */ + vmrs r0, fpscr + mov r1, 1 + bfi r0, r1, 22, 2 + vmsr fpscr, r0 + vcvtr.u32.f32 q1, q0 + ldr r0, =vcvtr_result_plus_infinity + vst1.32 {q1}, [r0] + ASSERT_MEMCMP( + vcvtr_result_plus_infinity, + vcvtr_expect_plus_infinity, + 0x10 + ) +#endif +EXIT diff --git a/userland/arch/arm/vfp.S b/userland/arch/arm/vfp.S new file mode 100644 index 0000000..4371c16 --- /dev/null +++ b/userland/arch/arm/vfp.S @@ -0,0 +1,152 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#vfp + * Adapted from: https://mindplusplus.wordpress.com/2013/06/27/arm-vfp-vector-programming-part-2-examples/ */ + +#include "common.h" + +.data; +a1: + .float 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5 +a2: + .float 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5 +sum: + .skip 32 +sum_expect: + .float 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 + +ENTRY + /* Minimal single precision floating point example. + * TODO: floating point representation constraints due to 4-byte instruction? + */ + vmov s0, 1.5 + vmov s1, 2.5 + vadd.f32 s2, s0, s1 + vmov s3, 4.0 + /* Compare two floating point registers. Stores results in fpscr: + * (floating point status and control register). + */ + vcmp.f32 s2, s3 + /* Move the nzcv bits from fpscr to apsr */ + vmrs apsr_nzcv, fpscr + /* This branch uses the Z bit of apsr, which was set accordingly. */ + ASSERT(beq) + + /* Now the same from memory with vldr and vstr. */ +.data +my_float_0: + .float 1.5 +my_float_1: + .float 2.5 +my_float_sum_expect: + .float 4.0 +.bss +my_float_sum: + .skip 4 +.text + ldr r0, =my_float_0 + vldr s0, [r0] + ldr r0, =my_float_1 + vldr s1, [r0] + vadd.f32 s2, s0, s1 + ldr r0, =my_float_sum + vstr.f32 s2, [r0] + ASSERT_MEMCMP(my_float_sum, my_float_sum_expect, 4) + +#if 0 + /* We can't do pseudo vldr as for ldr, fails with: + * Error: cannot represent CP_OFF_IMM relocation in this object file format + * It works on ARMv8 however, so the relocation must have been added. + */ + vldr s0, my_float_0 +#endif + + /* Minimal double precision floating point example. */ + vmov.f64 d0, 1.5 + vmov.f64 d1, 2.5 + vadd.f64 d2, d0, d1 + vmov.f64 d3, 4.0 + vcmp.f64 d2, d3 + vmrs apsr_nzcv, fpscr + ASSERT(beq) + + /* vmov can also move to general purpose registers. + * + * Just remember that we can't use float immediates with general purpose registers: + * https://stackoverflow.com/questions/6514537/how-do-i-specify-immediate-floating-point-numbers-with-inline-assembly/52906126#52906126 + */ + mov r1, 2 + mov r0, 1 + vmov s0, r0 + vmov s1, s0 + vmov r1, s1 + ASSERT_EQ_REG(r0, r1) + + /* Now a more complex test function. */ + ldr r0, =sum + ldr r1, =a1 + ldr r2, =a2 + mov r3, 8 + bl vec_sum + /* The assert works easily because all floats used + * have exact base-2 representation. + */ + ASSERT_MEMCMP(sum, sum_expect, 0x20) +EXIT + +/* void vec_sum(float *sum, float *a1, float *a2, int length) { + * int i; + * for (i=0; i < length; i++) + * *(sum+i) = *(a1+i) + *(a2+i); + * } + */ +vec_sum: + /* Setup */ + push {r0, r1, r4, lr} + push {r0, r1} + mov r0, 1 + mov r1, 8 + bl reconfig + pop {r0, r1} + asr r3, 3 + + /* Do the sum. */ +1: + fldmias r1!, {s8-s15} + fldmias r2!, {s16-s23} + vadd.f32 s24, s8, s16 + fstmias r0!, {s24-s31} + subs r3, r3, 1 + bne 1b + + /* Teardown. */ + bl deconfig + pop {r0, r1, r4, pc} + +/* inputs: + * r0: desired vector stride (1 or 2) + * r1: desired vector length (min. 1, max. 8) + * outputs: (none) + * modified: r0, r1, FPSCR + * notes: + * r0 and r1 will be truncated before fitting into FPSCR + */ +reconfig: + push {r0-r2} + and r0, r0, 3 + eor r0, r0, 1 + sub r1, r1, 1 + and r1, r1, 7 + mov r0, r0, lsl 20 + orr r0, r0, r1, lsl 16 + vmrs r2, fpscr + bic r2, 55*65536 + orr r2, r2, r0 + vmsr fpscr, r0 + pop {r0-r2} + bx lr + +deconfig: + push {r0, r1, lr} + mov r0, 1 + mov r1, 1 + bl reconfig + pop {r0, r1, pc} diff --git a/userland/arch/common.h b/userland/arch/common.h new file mode 100644 index 0000000..8104752 --- /dev/null +++ b/userland/arch/common.h @@ -0,0 +1,28 @@ +#ifndef COMMON_H +#define COMMON_H + +/* We define in this header only macros that are the same on all archs. */ + +/* common_arch.h contains arch specific macros. */ +#include "common_arch.h" + +.extern \ + exit, \ + printf, \ + puts \ +; + +/* Assert that the given branch instruction is taken. */ +#define ASSERT(branch_if_pass) \ + branch_if_pass 1f; \ + FAIL; \ +1: \ +; + +/* Assert that a register equals another register. */ +#define ASSERT_EQ_REG(reg1, reg2) \ + cmp reg1, reg2; \ + ASSERT(beq); \ +; + +#endif diff --git a/userland/arch/empty.S b/userland/arch/empty.S new file mode 100644 index 0000000..704d9a9 --- /dev/null +++ b/userland/arch/empty.S @@ -0,0 +1,6 @@ +/* https://github.com/cirosantilli/arm-assembly-cheat#about */ + +#include "common.h" + +ENTRY +EXIT diff --git a/userland/arch/fail.S b/userland/arch/fail.S new file mode 100644 index 0000000..6b6522a --- /dev/null +++ b/userland/arch/fail.S @@ -0,0 +1,10 @@ +/* See what happens on test failure. */ + +#include "common.h" + +ENTRY +#if 0 + /* Uncomment this to see it fail. */ + FAIL +#endif +EXIT diff --git a/userland/arch/main.c b/userland/arch/main.c new file mode 100644 index 0000000..3c7c769 --- /dev/null +++ b/userland/arch/main.c @@ -0,0 +1,17 @@ +/* This is the main entrypoint for all .S examples. */ + +#include "stdio.h" +#include "stdint.h" + +#include "lkmc.h" + +int asm_main(uint32_t *line); + +int main(void) { + uint32_t ret, line; + ret = asm_main(&line); + if (ret) { + printf("error %d at line %d\n", ret, line); + } + return ret; +} diff --git a/userland/arch/x86_64/c/add.c b/userland/arch/x86_64/c/add.c new file mode 100644 index 0000000..a56dd2b --- /dev/null +++ b/userland/arch/x86_64/c/add.c @@ -0,0 +1,16 @@ +#include +#include + +int main(void) { + uint64_t in1 = 0xFFFFFFFF; + uint64_t in2 = 0x1; + uint64_t out; + __asm__ ( + "lea (%[in1], %[in2]), %[out];" + : [out] "=r" (out) + : [in1] "r" (in1), + [in2] "r" (in2) + : + ); + assert(out == 0x100000000); +} diff --git a/userland/arch/x86_64/binutils_hack.c b/userland/arch/x86_64/c/binutils_hack.c similarity index 100% rename from userland/arch/x86_64/binutils_hack.c rename to userland/arch/x86_64/c/binutils_hack.c diff --git a/userland/arch/x86_64/asm_hello.c b/userland/arch/x86_64/c/binutils_nohack.c similarity index 73% rename from userland/arch/x86_64/asm_hello.c rename to userland/arch/x86_64/c/binutils_nohack.c index 0158aa9..efa3249 100644 --- a/userland/arch/x86_64/asm_hello.c +++ b/userland/arch/x86_64/c/binutils_nohack.c @@ -1,3 +1,5 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#your-first-binutils-hack */ + #include #include diff --git a/userland/arch/x86_64/c/build b/userland/arch/x86_64/c/build new file mode 120000 index 0000000..ab18017 --- /dev/null +++ b/userland/arch/x86_64/c/build @@ -0,0 +1 @@ +../build \ No newline at end of file diff --git a/userland/arch/x86_64/c/freestanding/build b/userland/arch/x86_64/c/freestanding/build new file mode 120000 index 0000000..ab18017 --- /dev/null +++ b/userland/arch/x86_64/c/freestanding/build @@ -0,0 +1 @@ +../build \ No newline at end of file diff --git a/userland/arch/x86_64/c/freestanding/hello.c b/userland/arch/x86_64/c/freestanding/hello.c new file mode 100644 index 0000000..969f401 --- /dev/null +++ b/userland/arch/x86_64/c/freestanding/hello.c @@ -0,0 +1,31 @@ +/* Linux freestanding hello world with inline assembly..*/ + +#define _XOPEN_SOURCE 700 +#include +#include + +ssize_t my_write(int fd, const void *buf, size_t size) { + ssize_t ret; + __asm__ __volatile__ ( + "syscall" + : "=a" (ret) + : "0" (1), "D" (fd), "S" (buf), "d" (size) + : "cc", "rcx", "r11", "memory" + ); + return ret; +} + +void my_exit(int exit_status) { + ssize_t ret; + __asm__ __volatile__ ( + "syscall" + : "=a" (ret) + : "0" (60), "D" (exit_status) + : "cc", "rcx", "r11", "memory" + ); +} + +void _start(void) { + char msg[] = "hello\n"; + my_exit(my_write(1, msg, sizeof(msg)) != sizeof(msg)); +} diff --git a/userland/arch/x86_64/c/freestanding/hello_regvar.c b/userland/arch/x86_64/c/freestanding/hello_regvar.c new file mode 100644 index 0000000..1038b4b --- /dev/null +++ b/userland/arch/x86_64/c/freestanding/hello_regvar.c @@ -0,0 +1,37 @@ +/* Same as hello.c, but with explicit register variables, see: + * https://stackoverflow.com/questions/9506353/how-to-invoke-a-system-call-via-sysenter-in-inline-assembly/54956854#54956854 + */ + +#define _XOPEN_SOURCE 700 +#include +#include + +ssize_t my_write(int fd, const void *buf, size_t size) { + register int64_t rax __asm__ ("rax") = 1; + register int rdi __asm__ ("rdi") = fd; + register const void *rsi __asm__ ("rsi") = buf; + register size_t rdx __asm__ ("rdx") = size; + __asm__ __volatile__ ( + "syscall" + : "+r" (rax) + : "r" (rdi), "r" (rsi), "r" (rdx) + : "cc", "rcx", "r11", "memory" + ); + return rax; +} + +void my_exit(int exit_status) { + register int64_t rax __asm__ ("rax") = 60; + register int rdi __asm__ ("rdi") = exit_status; + __asm__ __volatile__ ( + "syscall" + : "+r" (rax) + : "r" (rdi) + : "cc", "rcx", "r11", "memory" + ); +} + +void _start(void) { + char msg[] = "hello\n"; + my_exit(my_write(1, msg, sizeof(msg)) != sizeof(msg)); +} diff --git a/userland/arch/x86_64/c/inc.c b/userland/arch/x86_64/c/inc.c new file mode 100644 index 0000000..056c984 --- /dev/null +++ b/userland/arch/x86_64/c/inc.c @@ -0,0 +1,13 @@ +#include +#include + +int main(void) { + uint64_t io = 1; + __asm__ ( + "lea 1(%[io]), %[io];" + : [io] "+r" (io) + : + : + ); + assert(io == 2); +} diff --git a/userland/arch/x86_64/c/scratch.c b/userland/arch/x86_64/c/scratch.c new file mode 100644 index 0000000..a5d78e6 --- /dev/null +++ b/userland/arch/x86_64/c/scratch.c @@ -0,0 +1,22 @@ +/* https://stackoverflow.com/questions/6682733/gcc-prohibit-use-of-some-registers/54963829#54963829 */ + +#include +#include + +int main(void) { + uint64_t in1 = 0xFFFFFFFF; + uint64_t in2 = 1; + uint64_t out; + uint64_t scratch; + __asm__ ( + "mov %[in2], %[scratch];" /* scratch = in2 */ + "add %[in1], %[scratch];" /* scratch += in1 */ + "mov %[scratch], %[out];" /* out = scratch */ + : [scratch] "=&r" (scratch), + [out] "=r" (out) + : [in1] "r" (in1), + [in2] "r" (in2) + : + ); + assert(out == 0x100000000); +} diff --git a/userland/arch/x86_64/c/scratch_hardcode.c b/userland/arch/x86_64/c/scratch_hardcode.c new file mode 100644 index 0000000..7a02d39 --- /dev/null +++ b/userland/arch/x86_64/c/scratch_hardcode.c @@ -0,0 +1,20 @@ +/* This is a worse version of scratch.c with hardcoded scratch. */ + +#include +#include + +int main(void) { + uint64_t in1 = 0xFFFFFFFF; + uint64_t in2 = 1; + uint64_t out; + __asm__ ( + "mov %[in2], %%rax;" /* scratch = in2 */ + "add %[in1], %%rax;" /* scratch += in1 */ + "mov %%rax, %[out];" /* out = scratch */ + : [out] "=r" (out) + : [in1] "r" (in1), + [in2] "r" (in2) + : "rax" + ); + assert(out == 0x100000000); +} diff --git a/userland/arch/x86_64/common_arch.h b/userland/arch/x86_64/common_arch.h new file mode 100644 index 0000000..51217ef --- /dev/null +++ b/userland/arch/x86_64/common_arch.h @@ -0,0 +1,84 @@ +#ifndef COMMON_ARCH_H +#define COMMON_ARCH_H + +#define ASSERT_EQ(reg, const) \ + push %rax; \ + push %rbx; \ + mov reg, %rax; \ + mov const, %rbx; \ + cmp %rax, %rbx; \ + ASSERT(je); \ + pop %rbx; \ + pop %rax; \ +; + +# TODO +##define ASSERT_MEMCMP(s1, s2, n) \ +# MEMCMP(s1, s2, n); \ +# ASSERT_EQ(x0, 0); \ +#; + +/* Program entry point. + * + * Return with EXIT. + * + * Basically implements an x86_64 prologue: + * + * - save callee saved registers + * x86_64 explained at: https://stackoverflow.com/questions/18024672/what-registers-are-preserved-through-a-linux-x86-64-function-call/55207335#55207335 + * - save register arguments for later usage + */ +#define ENTRY \ +.text; \ +.global asm_main; \ +asm_main: \ + push %rbp; \ + mov %rsp, %rbp; \ + push %r15; \ + push %r14; \ + push %r13; \ + push %r12; \ + push %rbx; \ + push %rdi; \ + sub $8, %rsp; \ +asm_main_after_prologue: \ +; + +/* Meant to be called at the end of ENTRY.* + * + * Branching to "fail" makes tests fail with exit status 1. + * + * If EXIT is reached, the program ends successfully. + */ +#define EXIT \ + mov $0, %rax; \ + jmp pass; \ +fail: \ + mov -0x8(%rbp), %rbx; \ + movl %eax, (%rbx); \ + mov $1, %rax; \ +pass: \ + pop %rbx; \ + pop %r12; \ + pop %r13; \ + pop %r14; \ + pop %r15; \ + add $16, %rsp; \ + pop %rbp; \ + ret; \ +; + +#define FAIL \ + mov $__LINE__, %eax; \ + jmp fail; \ +; + +# TODO +##define MEMCMP(s1, s2, n) \ +# adr x0, s1; \ +# adr x1, s2; \ +# ldr x2, =n; \ +# bl memcmp; \ +#; + +#endif diff --git a/userland/arch/x86_64/empty.S b/userland/arch/x86_64/empty.S new file mode 120000 index 0000000..6bdf9f9 --- /dev/null +++ b/userland/arch/x86_64/empty.S @@ -0,0 +1 @@ +../empty.S \ No newline at end of file diff --git a/userland/arch/x86_64/fail.S b/userland/arch/x86_64/fail.S new file mode 120000 index 0000000..d5427d8 --- /dev/null +++ b/userland/arch/x86_64/fail.S @@ -0,0 +1 @@ +../fail.S \ No newline at end of file diff --git a/userland/arch/x86_64/freestanding/hello.S b/userland/arch/x86_64/freestanding/hello.S index f58f967..6de9c05 100644 --- a/userland/arch/x86_64/freestanding/hello.S +++ b/userland/arch/x86_64/freestanding/hello.S @@ -1,19 +1,18 @@ -.data - s: - .ascii "hello\n" - len = . - s .text - .global _start - _start: - - /* Write. */ - mov $1, %rax - mov $1, %rdi - mov $s, %rsi - mov $len, %rdx +.global _start +_start: +asm_main_after_prologue: + /* write */ + mov $1, %rax /* stdout */ + mov $1, %rdi /* buffer */ + mov $msg, %rsi /* len */ + mov $len, %rdx /* syscall number */ syscall - /* Exit. */ - mov $60, %rax - mov $0, %rdi + /* exit */ + mov $60, %rax /* exit status */ + mov $0, %rdi /* syscall number */ syscall +msg: + .ascii "hello\n" +len = . - msg