diff --git a/index.html b/index.html index 70fe219..777125c 100644 --- a/index.html +++ b/index.html @@ -1808,68 +1808,70 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
  • 28. Android @@ -3743,7 +3745,7 @@ cd userland --qemu-which host \ --userland-build-id host \ --userland userland/c/command_line_arguments.c \ - --userland-args 'asdf "qw er"' \ + --cli-args 'asdf "qw er"' \ ; @@ -3791,7 +3793,7 @@ cd userland --qemu-which host \ --userland-build-id host \ --userland userland/c/command_line_arguments.c \ - --userland-args 'asdf "qw er"' \ + --cli-args 'asdf "qw er"' \ ; @@ -4011,7 +4013,7 @@ error: simulation error detected by parsing logs
    -

    TODO: the carriage returns are a bit different than in QEMU, see: Section 27.4, “gem5 baremetal carriage return”.

    +

    TODO: the carriage returns are a bit different than in QEMU, see: Section 27.6, “gem5 baremetal carriage return”.

    Note that ./build-baremetal requires the --emulator gem5 option, and generates separate executable images for both, as can be seen from:

    @@ -5360,7 +5362,7 @@ Breakpoint 3 at 0xffffffff811615e3: fdget_pos. (9 locations)

    2.9. GDB step debug multicore userland

    -

    For a more minimal baremetal multicore setup, see: Section 27.8.3, “ARM baremetal multicore”.

    +

    For a more minimal baremetal multicore setup, see: Section 27.10.3, “ARM baremetal multicore”.

    We can set and get which cores the Linux kernel allows a program to run on with sched_getaffinity and sched_setaffinity:

    @@ -7442,7 +7444,7 @@ sudo ./setup -y
    ./build user-mode-qemu
     ./run \
       --userland userland/c/command_line_arguments.c \
    -  --userland-args='asdf "qw er"' \
    +  --cli-args='asdf "qw er"' \
     ;
    @@ -7481,7 +7483,7 @@ qw er --arch aarch64 \ --gdb-wait \ --userland userland/c/command_line_arguments.c \ - --userland-args 'asdf "qw er"' \ + --cli-args 'asdf "qw er"' \ ;
    @@ -7506,7 +7508,7 @@ qw er --arch aarch64 \ --gdb \ --userland userland/c/command_line_arguments.c \ - --userland-args 'asdf "qw er"' \ + --cli-args 'asdf "qw er"' \ ; @@ -7576,7 +7578,7 @@ qw er
    ./run \
       --userland "$(./getvar buildroot_target_dir)/bin/echo" \
    -  --userland-args='asdf' \
    +  --cli-args='asdf' \
     ;
    @@ -7600,7 +7602,7 @@ qw er
    ./run \
       --arch aarch64 \
       --userland "$(./getvar --arch aarch64 buildroot_target_dir)/bin/sh" \
    -  --userland-args='-c "uname -a && pwd"' \
    +  --cli-args='-c "uname -a && pwd"' \
     ;
    @@ -7727,7 +7729,7 @@ qemu: uncaught target signal 6 (Aborted) - core dumped --arch aarch64 \ --static \ --userland userland/c/command_line_arguments.c \ - --userland-args 'asdf "qw er"' \ + --cli-args 'asdf "qw er"' \ ; @@ -7931,7 +7933,7 @@ qemu-x86_64: /path/to/linux-kernel-module-cheat/submodules/qemu/accel/tcg/cpu-ex --arch aarch64 \ --emulator gem5 \ --userland userland/c/command_line_arguments.c \ - --userland-args 'asdf "qw er"' \ + --cli-args 'asdf "qw er"' \ ; @@ -7948,7 +7950,7 @@ qemu-x86_64: /path/to/linux-kernel-module-cheat/submodules/qemu/accel/tcg/cpu-ex --emulator gem5 \ --gdb-wait \ --userland userland/c/command_line_arguments.c \ - --userland-args 'asdf "qw er"' \ + --cli-args 'asdf "qw er"' \ ; ./run-gdb \ --arch aarch64 \ @@ -8053,7 +8055,7 @@ hello
    -
    ./run --userland userland/posix/count_to.c --userland-args 3
    +
    ./run --userland userland/posix/count_to.c --cli-args 3
    @@ -17712,7 +17714,7 @@ extern SimpleFlag ExecEnable;

    25007500: time count in some unit. Note how the microops execute at further timestamps.

  • -

    system.cpu: distinguishes between CPUs when there are more than one. For example, running Section 27.8.3, “ARM baremetal multicore” with two cores produces system.cpu0 and system.cpu1

    +

    system.cpu: distinguishes between CPUs when there are more than one. For example, running Section 27.10.3, “ARM baremetal multicore” with two cores produces system.cpu0 and system.cpu1

  • T0: thread number. TODO: hyperthread? How to play with it?

    @@ -18005,7 +18007,7 @@ root

    runs are deterministic by default, unlike QEMU which has a special QEMU record and replay mode, that requires first playing the content once and then replaying

  • -

    gem5 ARM at least appears to implement more low level CPU functionality than QEMU, e.g. QEMU only added EL2 in 2018: https://stackoverflow.com/questions/42824706/qemu-system-aarch64-entering-el1-when-emulating-a53-power-up See also: Section 27.8.1, “ARM exception levels”

    +

    gem5 ARM at least appears to implement more low level CPU functionality than QEMU, e.g. QEMU only added EL2 in 2018: https://stackoverflow.com/questions/42824706/qemu-system-aarch64-entering-el1-when-emulating-a53-power-up See also: Section 27.10.1, “ARM exception levels”

  • gem5 offers more advanced logging, even for non micro architectural things which QEMU models in some way, e.g. QEMU trace memory accesses, because QEMU’s binary translation optimizations reduce visibility

    @@ -18309,7 +18311,7 @@ getconf _NPROCESSORS_CONF
    -
    ./run --userland userland/posix/pthread_count.c --userland-args 4
    +
    ./run --userland userland/posix/pthread_count.c --cli-args 4
     ps Haux | grep qemu | wc
    @@ -18333,7 +18335,7 @@ ps Haux | grep qemu | wc
    -
    ./run --cpus 1 --emulator gem5 --userland userland/posix/pthread_self.c --userland-args 1
    +
    ./run --cpus 1 --emulator gem5 --userland userland/posix/pthread_self.c --cli-args 1
    @@ -18349,7 +18351,7 @@ ps Haux | grep qemu | wc
    -
    ./run --cpus 2 --emulator gem5 --userland userland/posix/pthread_self.c --userland-args 1
    +
    ./run --cpus 2 --emulator gem5 --userland userland/posix/pthread_self.c --cli-args 1
    @@ -18357,7 +18359,7 @@ ps Haux | grep qemu | wc
    -
    ./run --cpus 2 --emulator gem5 --userland userland/posix/pthread_self.c --userland-args '1 2'
    +
    ./run --cpus 2 --emulator gem5 --userland userland/posix/pthread_self.c --cli-args '1 2'
    @@ -18947,14 +18949,14 @@ m5 checkpoint

    19.5.1. gem5 checkpoint userland minimal example

    -

    In order to debug checkpoint restore bugs, this minimal setup using userland/freestanding/gem5_checkpoint_restore.S can be handy:

    +

    In order to debug checkpoint restore bugs, this minimal setup using userland/freestanding/gem5_checkpoint.S can be handy:

    ./build-userland --arch aarch64 --static
    -./run --arch aarch64 --emulator gem5 --static --userland userland/freestanding/gem5_checkpoint_restore.S --trace-insts-stdout
    -./run --arch aarch64 --emulator gem5 --static --userland userland/freestanding/gem5_checkpoint_restore.S --trace-insts-stdout --gem5-restore 1
    -./run --arch aarch64 --emulator gem5 --static --userland userland/freestanding/gem5_checkpoint_restore.S --trace-insts-stdout --gem5-restore 1 -- --cpu-type=DerivO3CPU --restore-with-cpu=DerivO3CPU --caches
    +./run --arch aarch64 --emulator gem5 --static --userland userland/freestanding/gem5_checkpoint.S --trace-insts-stdout +./run --arch aarch64 --emulator gem5 --static --userland userland/freestanding/gem5_checkpoint.S --trace-insts-stdout --gem5-restore 1 +./run --arch aarch64 --emulator gem5 --static --userland userland/freestanding/gem5_checkpoint.S --trace-insts-stdout --gem5-restore 1 -- --cpu-type=DerivO3CPU --restore-with-cpu=DerivO3CPU --caches
    @@ -19209,7 +19211,7 @@ m5 exit

    And now you will notice that everything happens much slower in the guest terminal!

    -

    One even more direct and minimal way to observe this is with userland/freestanding/gem5_checkpoint_restore.S which was mentioned at gem5 checkpoint userland minimal example plus some logging:

    +

    One even more direct and minimal way to observe this is with userland/freestanding/gem5_checkpoint.S which was mentioned at gem5 checkpoint userland minimal example plus some logging:

    @@ -19218,7 +19220,7 @@ m5 exit --emulator gem5 \ --static \ --trace ExecAll,FmtFlag,O3CPU,SimpleCPU \ - --userland userland/freestanding/gem5_checkpoint_restore.S \ + --userland userland/freestanding/gem5_checkpoint.S \ ; cat "$(./getvar --arch aarch64 --emulator gem5 trace_txt_file)" ./run \ @@ -19227,7 +19229,7 @@ cat "$(./getvar --arch aarch64 --emulator gem5 trace_txt_file)" --gem5-restore 1 \ --static \ --trace ExecAll,FmtFlag,O3CPU,SimpleCPU \ - --userland userland/freestanding/gem5_checkpoint_restore.S \ + --userland userland/freestanding/gem5_checkpoint.S \ -- \ --caches \ --cpu-type DerivO3CPU \ @@ -19303,7 +19305,7 @@ cat "$(./getvar --arch aarch64 --emulator gem5 trace_txt_file)"
    -

    But let’s give it a try anyways with userland/freestanding/gem5_checkpoint_restore.S which was mentioned at gem5 checkpoint userland minimal example

    +

    But let’s give it a try anyways with userland/freestanding/gem5_checkpoint.S which was mentioned at gem5 checkpoint userland minimal example

    @@ -19312,7 +19314,7 @@ cat "$(./getvar --arch aarch64 --emulator gem5 trace_txt_file)" --emulator gem5 \ --static \ --trace ExecAll,FmtFlag,O3CPU,SimpleCPU \ - --userland userland/freestanding/gem5_checkpoint_restore.S \ + --userland userland/freestanding/gem5_checkpoint.S \ -- \ --caches --cpu-type DerivO3CPU \ @@ -19532,7 +19534,7 @@ FullO3CPU: Ticking main, FullO3CPU.
    -
    ./run --arch aarch64 --emulator gem5 --userland "$(./getvar --arch aarch64 out_rootfs_overlay_bin_dir)/m5" --userland-args dumpstats
    +
    ./run --arch aarch64 --emulator gem5 --userland "$(./getvar --arch aarch64 out_rootfs_overlay_bin_dir)/m5" --cli-args dumpstats
    @@ -20257,7 +20259,7 @@ system.cpu.dtb.inst_hits
    -
    ./run --arch aarch64 --emulator gem5 --userland userland/c/m5ops.c --userland-args 'd 1000'
    +
    ./run --arch aarch64 --emulator gem5 --userland userland/c/m5ops.c --cli-args 'd 1000'
    @@ -20748,7 +20750,7 @@ Exiting @ tick 3000 because all threads reached the max instruction count
    ./run \
       --emulator gem5 \
       --userland userland/posix/pthread_deadlock.c \
    -  --userland-args 1 \
    +  --cli-args 1 \
     ;
    @@ -25909,7 +25911,7 @@ There are no non-locking atomic types or atomic primitives in POSIX:
    -
    ./run --userland "$(./getvar buildroot_target_dir)/usr/bin/python3" --userland-args rootfs_overlay/lkmc/python/hello.py
    +
    ./run --userland "$(./getvar buildroot_target_dir)/usr/bin/python3" --cli-args rootfs_overlay/lkmc/python/hello.py
    @@ -25923,7 +25925,7 @@ There are no non-locking atomic types or atomic primitives in POSIX:
    @@ -25947,7 +25949,7 @@ There are no non-locking atomic types or atomic primitives in POSIX: @@ -26372,7 +26374,7 @@ xdg-open bst_vs_heap_vs_hashmap.tmp.png --arch x86_64 \ --emulator gem5 \ --userland userland/cpp/bst_vs_heap_vs_hashmap.cpp \ - --userland-args='100000 1 0' \ + --cli-args='100000 1 0' \ -- \ --cpu-type=DerivO3CPU \ --caches \ @@ -26591,7 +26593,7 @@ cblas_dgemm( CblasColMajor, CblasNoTrans, CblasTrans,3,3,2 ,1, A,3, B,
    -
    ./run --userland "$(./getvar userland_build_dir)/submodules/dhrystone/dhrystone" --userland-args 100000000
    +
    ./run --userland "$(./getvar userland_build_dir)/submodules/dhrystone/dhrystone" --cli-args 100000000
    @@ -26613,14 +26615,25 @@ cblas_dgemm( CblasColMajor, CblasNoTrans, CblasTrans,3,3,2 ,1, A,3, B,
    -

    Build for Baremetal execution and run it in baremetal QEMU. TODO: fix the build, just need to factor out all run arguments from build-baremetal into common.py and it should just work, no missing syscalls.

    +

    Build Dhrystone for Baremetal and run it in on QEMU:

    # Build our Newlib stubs.
     ./build-baremetal --arch aarch64
     ./build-dhrystone --arch aarch64 --mode baremetal
    -./run --arch aarch64 --baremetal "$(./getvar baremetal_build_dir)/submodules/dhrystone/dhrystone"
    +./run --arch aarch64 --baremetal "$(./getvar --arch aarch64 baremetal_build_dir)/submodules/dhrystone/dhrystone" --cli-args 10000 +
    +
    +
    +

    or with gem5:

    +
    +
    +
    +
    # Build our Newlib stubs.
    +./build-baremetal --arch aarch64
    +./build-dhrystone --arch aarch64 --emulator gem5 --mode baremetal
    +./run --arch aarch64 --baremetal "$(./getvar --arch aarch64 --emulator gem5 baremetal_build_dir)/submodules/dhrystone/dhrystone" --cli-args 10000 --emulator gem5
    @@ -26701,7 +26714,7 @@ times[3 * ntimes + k] = mysecond() - times[3 * ntimes + k];
    -
    ./run --userland "$(./getvar userland_build_dir)/submodules/stream-benchmark/stream_c.exe" --userland-args '100 2'
    +
    ./run --userland "$(./getvar userland_build_dir)/submodules/stream-benchmark/stream_c.exe" --cli-args '100 2'
    @@ -26710,7 +26723,7 @@ times[3 * ntimes + k] = mysecond() - times[3 * ntimes + k];
    ./build-stream --optimization-level 3
    -./run --emulator gem5 --userland "$(./getvar userland_build_dir)/submodules/stream-benchmark/stream_c.exe" --userland-args '1000 2'
    +./run --emulator gem5 --userland "$(./getvar userland_build_dir)/submodules/stream-benchmark/stream_c.exe" --cli-args '1000 2'
    @@ -27666,6 +27679,9 @@ When instructions do not interpret this operand encoding as the zero register, u
    -

    27.3. Semihosting

    +

    27.3. Baremetal linker script

    -

    Semihosting is a publicly documented interface specified by ARM Holdings that allows us to do some magic operations very useful in development.

    +

    For things to work in baremetal, we often have to layout memory in specific ways.

    -

    Semihosting is implemented both on some real devices and on simulators such as QEMU and gem5 semihosting.

    +

    Notably, since we start with paging disabled, there are more constraints on where memory can or cannot go.

    +
    +
    +

    Especially for C programs, this memory layout is specified by a "linker script", which is present at: baremetal/link.ld

    +
    +
    +

    Note how our linker script also exposes some symbols to C:

    +
    +
    +
    +
    lkmc_heap_low = .;
    +lkmc_heap_top = .;
    +
    +
    +
    +

    Those for example are required to implement malloc in Newlib. We can play with those variables more explicitly with baremetal/linker_variables.c:

    +
    +
    +
    +
    ./run --arch aarch64 --baremetal baremetal/linker_variables.c
    +
    +
    +
    +
    +

    27.4. Baremetal command line arguments

    +
    +

    QEMU and gem5 currently supports baremetal CLI arguments!

    +
    +
    +

    You can see them in action e.g. with:

    +
    +
    +
    +
    ./run --arch aarch64 --baremetal userland/c/command_line_arguments.c --cli-args 'aa bb cc'
    +./run --arch aarch64 --userland userland/c/command_line_arguments.c --cli-args 'aa bb cc'
    +
    +
    +
    +

    both of which output the exact same thing:

    +
    +
    +
    +
    aa
    +bb
    +cc
    +
    +
    +
    +

    This is implemented by parsing the command line arguments and placing them into memory where the code will find them.

    +
    +
    +

    This works by:

    +
    +
    + +
    +
    +

    It is worth noting that e.g. ARM has a Semihosting mechanism for loading CLI arguments through SYS_GET_CMDLINE, but our mechanism works in principle for any ISA.

    +
    +
    +
    +

    27.5. Semihosting

    +
    +

    Semihosting is a publicly documented interface specified by ARM Holdings that allows us to do some magic operations very useful in development, such as writting to the terminal or reading and writing host files.

    It is documented at: https://developer.arm.com/docs/100863/latest/introduction

    @@ -32463,7 +32555,7 @@ svc 0x00123456
    -

    27.3.1. gem5 semihosting

    +

    27.5.1. gem5 semihosting

    For gem5, you need:

    @@ -32478,7 +32570,7 @@ svc 0x00123456
    -

    27.4. gem5 baremetal carriage return

    +

    27.6. gem5 baremetal carriage return

    TODO: our example is printing newlines without automatic carriage return \r as in:

    @@ -32501,7 +32593,7 @@ svc 0x00123456
    -

    27.5. Baremetal host packaged toolchain

    +

    27.7. Baremetal host packaged toolchain

    For arm, some baremetal examples compile fine with:

    @@ -32537,13 +32629,13 @@ collect2: error: ld returned 1 exit status
    -

    27.6. Baremetal C++

    +

    27.8. Baremetal C++

    -

    27.7. GDB builtin CPU simulator

    +

    27.9. GDB builtin CPU simulator

    It is incredible, but GDB also has a CPU simulator inside of it as documented at: https://sourceware.org/gdb/onlinedocs/gdb/Target-Commands.html

    @@ -32603,7 +32695,7 @@ starti
    -

    27.7.1. GDB builtin CPU simulator userland

    +

    27.9.1. GDB builtin CPU simulator userland

    Since I had this compiled, I also decided to try it out on userland.

    @@ -32638,7 +32730,7 @@ starti
    -

    27.8. ARM baremetal

    +

    27.10. ARM baremetal

    In this section we will focus on learning ARM architecture concepts that can only learnt on baremetal setups.

    @@ -32646,7 +32738,7 @@ starti

    Userland information can be found at: https://github.com/cirosantilli/arm-assembly-cheat

    -

    27.8.1. ARM exception levels

    +

    27.10.1. ARM exception levels

    ARM exception levels are analogous to x86 rings.

    @@ -32775,13 +32867,13 @@ CurrentEL.EL 0x3

    According to ARMv7 architecture reference manual, access to that register is controlled by other registers NSACR.{CP11, CP10} and HCPTR so those must be turned off, but I’m lazy to investigate now, even just trying to dump those registers in userland/arch/arm/dump_regs.c also leads to exceptions…​

    -
    27.8.1.1. ARM change exception level
    +
    27.10.1.1. ARM change exception level

    TODO. Create a minimal runnable example of going into EL0 and jumping to EL1.

    -
    27.8.1.2. ARM SP0 vs SPx
    +
    27.10.1.2. ARM SP0 vs SPx

    See ARMv8 architecture reference manual db D1.6.2 "The stack pointer registers".

    @@ -32794,7 +32886,7 @@ CurrentEL.EL 0x3
    -

    27.8.2. ARM SVC instruction

    +

    27.10.2. ARM SVC instruction

    This is the most basic example of exception handling we have.

    @@ -33143,7 +33235,7 @@ IN: main
    -
    27.8.2.1. ARMv8 exception vector table format
    +
    27.10.2.1. ARMv8 exception vector table format

    The vector table format is described on ARMv8 architecture reference manual Table D1-7 "Vector offsets from vector table base address".

    @@ -33283,29 +33375,29 @@ IN: main
    -
    27.8.2.2. ARM ESR register
    +
    27.10.2.2. ARM ESR register

    Exception Syndrome Register.

    Documentation: ARMv8 architecture reference manual db D12.2.36 "ESR_EL1, Exception Syndrome Register (EL1)".

    -
    27.8.2.3. ARM ELR register
    +
    27.10.2.3. ARM ELR register

    Exception Link Register.

    -

    27.8.3. ARM baremetal multicore

    +

    27.10.3. ARM baremetal multicore

    Examples:

    @@ -33384,7 +33476,7 @@ IN: main

    Bibliography: https://stackoverflow.com/questions/980999/what-does-multicore-assembly-language-look-like/33651438#33651438

    -
    27.8.3.1. ARM WFE and SEV instructions
    +
    27.10.3.1. ARM WFE and SEV instructions

    The WFE and SEV instructions are just hints: a compliant implementation can treat them as NOPs.

    @@ -33537,7 +33629,7 @@ IN: main

    For how userland spinlocks and mutexes are implemented see Userland mutex implementation.

    -
    27.8.3.1.1. ARM WFE global monitor events
    +
    27.10.3.1.1. ARM WFE global monitor events

    Examples:

    @@ -33568,7 +33660,7 @@ IN: main
    -
    27.8.3.1.2. WFE from userland
    +
    27.10.3.1.2. WFE from userland

    WFE and SEV are usable from userland, and are part of an efficient spinlock implementation (which userland should arguably stay away from and rather use the futex system call which allow for non busy sleep instead), which maybe is not something that userland should ever tho and just stick to mutexes?

    @@ -33675,7 +33767,7 @@ IN: main
    -
    27.8.3.1.3. ARMv8 spinlock pattern
    +
    27.10.3.1.3. ARMv8 spinlock pattern
    @@ -33694,7 +33786,7 @@ IN: main
    -
    27.8.3.1.4. gem5 ARM WFE
    +
    27.10.3.1.4. gem5 ARM WFE

    gem5 390a74f59934b85d91489f8a563450d8321b602d does not sleep on the first WFE on either syscall emulation or full system, because the code does:

    @@ -33736,14 +33828,14 @@ IN: main
    -
    27.8.3.1.5. ARM YIELD instruction
    +
    27.10.3.1.5. ARM YIELD instruction
    -
    27.8.3.2. ARM LDAXR and STLXR instructions
    +
    27.10.3.2. ARM LDAXR and STLXR instructions

    Can be used to implement atomic variables, see also:

    @@ -33762,7 +33854,7 @@ IN: main
    -
    27.8.3.3. ARM PSCI
    +
    27.10.3.3. ARM PSCI

    In QEMU, CPU 1 starts in a halted state. This can be observed from GDB, where:

    @@ -33812,14 +33904,14 @@ IN: main
    -
    27.8.3.4. ARM DMB instruction
    +
    27.10.3.4. ARM DMB instruction

    TODO: create and study a minimal examples in gem5 where the DMB instruction leads to less cycles: https://stackoverflow.com/questions/15491751/real-life-use-cases-of-barriers-dsb-dmb-isb-in-arm

    -

    27.8.4. ARM timer

    +

    27.10.4. ARM timer

    The ARM timer is the simplest way to generate hardware interrupts periodically, and therefore serves as the simples example of ARM GIC usage.

    @@ -33972,7 +34064,7 @@ cntvct_el0 0x3CF516F
    -

    27.8.5. ARM GIC

    +

    27.10.5. ARM GIC

    Generic Interrupt Controller.

    @@ -34014,7 +34106,7 @@ cntvct_el0 0x3CF516F
    -

    27.8.6. ARM paging

    +

    27.10.6. ARM paging

    TODO create a minimal working aarch64 example analogous to the x86 one at: https://github.com/cirosantilli/x86-bare-metal-examples/blob/6dc9a73830fc05358d8d66128f740ef9906f7677/paging.S

    @@ -34044,7 +34136,7 @@ cntvct_el0 0x3CF516F
    -

    27.8.7. ARM baremetal bibliography

    +

    27.10.7. ARM baremetal bibliography

    First, also consider the userland bibliography: Section 24.9, “ARM assembly bibliography”.

    @@ -34071,7 +34163,7 @@ cntvct_el0 0x3CF516F
    -
    27.8.7.1. NienfengYao/armv8-bare-metal
    +
    27.10.7.1. NienfengYao/armv8-bare-metal
    @@ -34130,7 +34222,7 @@ cntvct_el0 0x3CF516F
    -
    27.8.7.2. tukl-msd/gem5.bare-metal
    +
    27.10.7.2. tukl-msd/gem5.bare-metal
    @@ -34172,7 +34264,7 @@ make CROSS_COMPILE_DIR=/usr/bin
    -

    27.9. How we got some baremetal stuff to work

    +

    27.11. How we got some baremetal stuff to work

    It is nice when thing just work.

    @@ -34180,7 +34272,7 @@ make CROSS_COMPILE_DIR=/usr/bin

    But you can also learn a thing or two from how I actually made them work in the first place.

    -

    27.9.1. Find the UART address

    +

    27.11.1. Find the UART address

    Enter the QEMU console:

    @@ -34216,7 +34308,7 @@ make CROSS_COMPILE_DIR=/usr/bin
    -

    27.9.2. aarch64 baremetal NEON setup

    +

    27.11.2. aarch64 baremetal NEON setup

    Inside baremetal/lib/aarch64.S there is a chunk of code that enables floating point operations:

    @@ -34340,7 +34432,7 @@ ISB
    -

    27.10. Baremetal tests

    +

    27.12. Baremetal tests

    Baremetal tests work exactly like User mode tests, except that you have to add the --mode baremetal option, for example:

    @@ -34995,7 +35087,7 @@ instructions 124346081

    gem5 busy loop

    a18f28e263c91362519ef550150b5c9d75fa3679 + 1

    userland/gcc/busy_loop.c -O0

    -

    ./run --arch aarch64 --emulator gem5 --static --userland userland/gcc/busy_loop.c --userland-args 1000000

    +

    ./run --arch aarch64 --emulator gem5 --static --userland userland/gcc/busy_loop.c --cli-args 1000000

    10^6

    18

    2.4005699 * 10^7

    @@ -35005,7 +35097,7 @@ instructions 124346081

    gem5 busy loop for a debug build

    a18f28e263c91362519ef550150b5c9d75fa3679 + 1

    userland/gcc/busy_loop.c -O0

    -

    ./run --arch aarch64 --emulator gem5 --gem5-build-type debug --static --userland userland/gcc/busy_loop.c --userland-args 100000

    +

    ./run --arch aarch64 --emulator gem5 --gem5-build-type debug --static --userland userland/gcc/busy_loop.c --cli-args 100000

    10^5

    33

    2.405682 * 10^6

    @@ -35015,7 +35107,7 @@ instructions 124346081

    gem5 busy loop for a fast build

    0d5a41a3f88fcd7ed40fc19474fe5aed0463663f + 1

    userland/gcc/busy_loop.c -O0 -static

    -

    ./run --arch aarch64 --emulator gem5 --gem5-build-type fast --static --userland userland/gcc/busy_loop.c --userland-args 1000000

    +

    ./run --arch aarch64 --emulator gem5 --gem5-build-type fast --static --userland userland/gcc/busy_loop.c --cli-args 1000000

    10^6

    15

    2.4005699 * 10^7

    @@ -35025,7 +35117,7 @@ instructions 124346081

    gem5 busy loop for a TimingSimpleCPU

    a18f28e263c91362519ef550150b5c9d75fa3679 + 1

    userland/gcc/busy_loop.c -O0

    -

    ./run --arch aarch64 --emulator gem5 --arch aarch64 --static --userland userland/gcc/busy_loop.c --userland-args 1000000 -- --cpu-type TimingSimpleCPU --caches

    +

    ./run --arch aarch64 --emulator gem5 --arch aarch64 --static --userland userland/gcc/busy_loop.c --cli-args 1000000 -- --cpu-type TimingSimpleCPU --caches

    10^6

    26

    2.4005699 * 10^7

    @@ -35035,7 +35127,7 @@ instructions 124346081

    gem5 busy loop for a MinorCPU

    a18f28e263c91362519ef550150b5c9d75fa3679 + 1

    userland/gcc/busy_loop.c -O0

    -

    ./run --arch aarch64 --emulator gem5 --arch aarch64 --userland userland/gcc/busy_loop.c --userland-args 1000000 -- --cpu-type MinorCPU --caches

    +

    ./run --arch aarch64 --emulator gem5 --arch aarch64 --userland userland/gcc/busy_loop.c --cli-args 1000000 -- --cpu-type MinorCPU --caches

    10^6

    31

    1.1018152 * 10^7

    @@ -35075,7 +35167,7 @@ instructions 124346081

    5d233f2664a78789f9907d27e2a40e86cefad595

    STREAM benchmark -O3

    -

    ./run --arch aarch64 --emulator gem5 --userland userland/gcc/busy_loop.c --userland-args 1000000 --trace ExecAll

    +

    ./run --arch aarch64 --emulator gem5 --userland userland/gcc/busy_loop.c --cli-args 1000000 --trace ExecAll

    3 * 10^5 * 2

    64

    9.9674773 * 10^7

    @@ -35085,7 +35177,7 @@ instructions 124346081

    glibc C pre-main effects

    ab6f7331406b22f8ab6e2df5f8b8e464fb35b611

    userland/c/m5ops.c -O0

    -

    gem5 --arch aarch64 --userland-args e

    +

    gem5 --arch aarch64 --cli-args e

    1

    2

    1.26479 * 10^5

    @@ -35095,7 +35187,7 @@ instructions 124346081

    ab6f7331406b22f8ab6e2df5f8b8e464fb35b611

    glibc C pre-main userland/c/m5ops.c -O0

    -

    gem5 --arch aarch64 --userland-args e --gem5-build-type debug

    +

    gem5 --arch aarch64 --cli-args e --gem5-build-type debug

    1

    2

    1.26479 * 10^5

    @@ -35105,7 +35197,7 @@ instructions 124346081

    ab6f7331406b22f8ab6e2df5f8b8e464fb35b611

    glibc C++ pre-main userland/cpp/m5ops.cpp -O0

    -

    gem5 --arch aarch64 --userland-args e

    +

    gem5 --arch aarch64 --cli-args e

    1

    2

    2.385012 * 10^6

    @@ -35115,7 +35207,7 @@ instructions 124346081

    ab6f7331406b22f8ab6e2df5f8b8e464fb35b611

    glibc C++ pre-main userland/cpp/m5ops.cpp -O0

    -

    gem5 --arch aarch64 --userland-args e --gem5-build-type debug

    +

    gem5 --arch aarch64 --cli-args e --gem5-build-type debug

    1

    25

    2.385012 * 10^6

    @@ -35145,7 +35237,7 @@ instructions 124346081

    Check the effect of an ExecAll log (log every instruction) on execution time, compare to analogous run without it. trace.txt size: 3.5GB. 5x slowdown observed with output to a hard disk.

    d29a07ddad499f273cc90dd66e40f8474b5dfc40

    userland/gcc/busy_loop.c -O0

    -

    ./run --arch aarch64 --emulator gem5 --userland userland/gcc/busy_loop.c --userland-args 1000000 --gem5-worktree master --trace ExecAll

    +

    ./run --arch aarch64 --emulator gem5 --userland userland/gcc/busy_loop.c --cli-args 1000000 --gem5-worktree master --trace ExecAll

    10^6

    2.4106774 * 10^7

    136

    @@ -35155,7 +35247,7 @@ instructions 124346081

    Same as above but with run command manually hacked to output to a ramfs. Slightly faster, but the bulk was still just in log format operations!

    d29a07ddad499f273cc90dd66e40f8474b5dfc40

    userland/gcc/busy_loop.c -O0

    -

    ./run --arch aarch64 --emulator gem5 --userland userland/gcc/busy_loop.c --userland-args 1000000 --gem5-worktree master --trace ExecAll

    +

    ./run --arch aarch64 --emulator gem5 --userland userland/gcc/busy_loop.c --cli-args 1000000 --gem5-worktree master --trace ExecAll

    10^6

    2.4106774 * 10^7

    107

    @@ -35171,7 +35263,7 @@ instructions 124346081
    -
    ./run --arch aarch64 --emulator gem5 --userland userland/gcc/busy_loop.c --userland-args '1 10000000'
    +
    ./run --arch aarch64 --emulator gem5 --userland userland/gcc/busy_loop.c --cli-args '1 10000000'
     ./gem5-stat --arch aarch64 sim_insts
    @@ -35298,7 +35390,7 @@ time \ --arch arm \ --emulator gem5 \ --userland "$(./getvar --arch arm buildroot_build_build_dir)/dhrystone-2/dhrystone" \ - --userland-args 'asdf qwer' \ + --cli-args 'asdf qwer' \ ;