mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-23 02:05:57 +01:00
gem5: fix arm multicore with system.auto_reset_addr = True
baremetal: fix aarch64/no_bootloader/semihost_exit.S which was wrong because was using unset sp for register block. Tests needed urgently!!
This commit is contained in:
102
README.adoc
102
README.adoc
@@ -10560,9 +10560,14 @@ output:
|
|||||||
....
|
....
|
||||||
./run --arch aarch64 --baremetal arch/aarch64/multicore --cpus 2
|
./run --arch aarch64 --baremetal arch/aarch64/multicore --cpus 2
|
||||||
./run --arch aarch64 --baremetal arch/aarch64/multicore --cpus 2 --gem5
|
./run --arch aarch64 --baremetal arch/aarch64/multicore --cpus 2 --gem5
|
||||||
|
./run --arch arm --baremetal arch/aarch64/multicore --cpus 2
|
||||||
|
./run --arch arm --baremetal arch/aarch64/multicore --cpus 2 --gem5
|
||||||
....
|
....
|
||||||
|
|
||||||
Source: link:baremetal/arch/aarch64/multicore.S[]
|
Sources:
|
||||||
|
|
||||||
|
* link:baremetal/arch/aarch64/multicore.S[]
|
||||||
|
* link:baremetal/arch/arm/multicore.S[]
|
||||||
|
|
||||||
CPU 0 of this program enters a spinlock loop: it repeatedly checks if a given memory address is `1`.
|
CPU 0 of this program enters a spinlock loop: it repeatedly checks if a given memory address is `1`.
|
||||||
|
|
||||||
@@ -10576,6 +10581,26 @@ Don't believe me? Then try:
|
|||||||
|
|
||||||
and watch it hang forever.
|
and watch it hang forever.
|
||||||
|
|
||||||
|
Note that if you try the same thing on gem5:
|
||||||
|
|
||||||
|
....
|
||||||
|
./run --arch aarch64 --baremetal arch/aarch64/multicore --cpus 1 --gem5
|
||||||
|
....
|
||||||
|
|
||||||
|
then the gem5 actually exits, but with a different message:
|
||||||
|
|
||||||
|
....
|
||||||
|
Exiting @ tick 18446744073709551615 because simulate() limit reached
|
||||||
|
....
|
||||||
|
|
||||||
|
as opposed to the expected:
|
||||||
|
|
||||||
|
....
|
||||||
|
Exiting @ tick 36500 because m5_exit instruction encountered
|
||||||
|
....
|
||||||
|
|
||||||
|
since gem5 is able to detect when nothing will ever happen, and exits.
|
||||||
|
|
||||||
When GDB step debugging, switch between cores with the usual `thread` commands, see also: <<gdb-step-debug-multicore-userland>>.
|
When GDB step debugging, switch between cores with the usual `thread` commands, see also: <<gdb-step-debug-multicore-userland>>.
|
||||||
|
|
||||||
Bibliography:
|
Bibliography:
|
||||||
@@ -10594,6 +10619,81 @@ However, likely no implementation likely does (TODO confirm), since:
|
|||||||
|
|
||||||
and power consumption is key in ARM applications.
|
and power consumption is key in ARM applications.
|
||||||
|
|
||||||
|
In QEMU 3.0.0, `SEV` is a NOPs, and `WFE` might be, but I'm not sure, see: https://github.com/qemu/qemu/blob/v3.0.0/target/arm/translate-a64.c#L1423
|
||||||
|
|
||||||
|
....
|
||||||
|
case 2: /* WFE */
|
||||||
|
if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
|
||||||
|
s->base.is_jmp = DISAS_WFE;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
case 4: /* SEV */
|
||||||
|
case 5: /* SEVL */
|
||||||
|
/* we treat all as NOP at least for now */
|
||||||
|
return;
|
||||||
|
....
|
||||||
|
|
||||||
|
TODO: what does the WFE code do? How can it not be a NOP if SEV is a NOP? https://github.com/qemu/qemu/blob/v3.0.0/target/arm/translate.c#L4609 might explain why, but it is Chinese to me (I only understand 30% ;-)):
|
||||||
|
|
||||||
|
....
|
||||||
|
* For WFI we will halt the vCPU until an IRQ. For WFE and YIELD we
|
||||||
|
* only call the helper when running single threaded TCG code to ensure
|
||||||
|
* the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
|
||||||
|
* just skip this instruction. Currently the SEV/SEVL instructions
|
||||||
|
* which are *one* of many ways to wake the CPU from WFE are not
|
||||||
|
* implemented so we can't sleep like WFI does.
|
||||||
|
*/
|
||||||
|
....
|
||||||
|
|
||||||
|
For gem5 however, if we comment out the `SVE` instruction, then it actually exits with `simulate() limit reached`, so the CPU truly never wakes up, which is a more realistic behaviour.
|
||||||
|
|
||||||
|
The following Raspberry Pi bibliography helped us get this sample up and running:
|
||||||
|
|
||||||
|
* https://github.com/bztsrc/raspi3-tutorial/tree/a3f069b794aeebef633dbe1af3610784d55a0efa/02_multicorec
|
||||||
|
* https://github.com/dwelch67/raspberrypi/tree/a09771a1d5a0b53d8e7a461948dc226c5467aeec/multi00
|
||||||
|
* https://github.com/LdB-ECM/Raspberry-Pi/blob/3b628a2c113b3997ffdb408db03093b2953e4961/Multicore/SmartStart64.S
|
||||||
|
* https://github.com/LdB-ECM/Raspberry-Pi/blob/3b628a2c113b3997ffdb408db03093b2953e4961/Multicore/SmartStart32.S
|
||||||
|
|
||||||
|
===== PSCI
|
||||||
|
|
||||||
|
In QEMU, CPU 1 starts in a halted state. This can be observed from GDB, where:
|
||||||
|
|
||||||
|
....
|
||||||
|
info threads
|
||||||
|
....
|
||||||
|
|
||||||
|
shows something like:
|
||||||
|
|
||||||
|
....
|
||||||
|
* 1 Thread 1 (CPU#0 [running]) mystart
|
||||||
|
2 Thread 2 (CPU#1 [halted ]) mystart
|
||||||
|
....
|
||||||
|
|
||||||
|
To wake up CPU 1 on QEMU, we must use the Power State Coordination Interface (PSCI) which is documented at: link:https://developer.arm.com/docs/den0022/latest/arm-power-state-coordination-interface-platform-design-document[].
|
||||||
|
|
||||||
|
This interface uses `HVC` calls, and the calling convention is documented at "SMC CALLING CONVENTION" link:https://developer.arm.com/docs/den0028/latest[].
|
||||||
|
|
||||||
|
If we boot the Linux kernel on QEMU and <<get-device-tree-from-a-running-kernel,dump the auto-generated device tree>>, we observe that it contains the address of the PSCI CPU_ON call:
|
||||||
|
|
||||||
|
....
|
||||||
|
psci {
|
||||||
|
method = "hvc";
|
||||||
|
compatible = "arm,psci-0.2", "arm,psci";
|
||||||
|
cpu_on = <0xc4000003>;
|
||||||
|
migrate = <0xc4000005>;
|
||||||
|
cpu_suspend = <0xc4000001>;
|
||||||
|
cpu_off = <0x84000002>;
|
||||||
|
};
|
||||||
|
....
|
||||||
|
|
||||||
|
The Linux kernel wakes up the secondary cores in this exact same way at: https://github.com/torvalds/linux/blob/v4.19/drivers/firmware/psci.c#L122 We first actually got it working here by grepping the kernel and step debugging that call :-)
|
||||||
|
|
||||||
|
In gem5, CPU 1 starts woken up from the start, so PSCI is not needed. TODO gem5 actually blows up if we try to do the `hvc` call, understand why.
|
||||||
|
|
||||||
|
===== DMB
|
||||||
|
|
||||||
|
TODO: create and study a minimal examples in gem5 where the `DMB` instruction leads to less cycles: https://stackoverflow.com/questions/15491751/real-life-use-cases-of-barriers-dsb-dmb-isb-in-arm
|
||||||
|
|
||||||
=== How we got some baremetal stuff to work
|
=== How we got some baremetal stuff to work
|
||||||
|
|
||||||
It is nice when thing just work.
|
It is nice when thing just work.
|
||||||
|
|||||||
@@ -7,10 +7,12 @@ main:
|
|||||||
ldr x1, =spinlock
|
ldr x1, =spinlock
|
||||||
str x0, [x1]
|
str x0, [x1]
|
||||||
|
|
||||||
/* Read cpu id into x1. */
|
/* Read cpu id into x1.
|
||||||
|
* TODO: cores beyond 4th?
|
||||||
|
*/
|
||||||
mrs x1, mpidr_el1
|
mrs x1, mpidr_el1
|
||||||
and x1, x1, 3
|
ands x1, x1, 3
|
||||||
cbz x1, cpu0_only
|
beq cpu0_only
|
||||||
cpu1_only:
|
cpu1_only:
|
||||||
/* Only CPU 1 reaches this point and sets the spinlock. */
|
/* Only CPU 1 reaches this point and sets the spinlock. */
|
||||||
mov x0, 1
|
mov x0, 1
|
||||||
@@ -35,8 +37,7 @@ cpu0_only:
|
|||||||
|
|
||||||
#if !defined(GEM5)
|
#if !defined(GEM5)
|
||||||
/* Wake up CPU 1 from initial sleep!
|
/* Wake up CPU 1 from initial sleep!
|
||||||
* In gem5, CPU 1 starts woken up from the start,
|
* See:https://github.com/cirosantilli/linux-kernel-module-cheat#psci
|
||||||
* so this is not needed.
|
|
||||||
*/
|
*/
|
||||||
/* Function identifier: PCSI CPU_ON. */
|
/* Function identifier: PCSI CPU_ON. */
|
||||||
ldr w0, =0xc4000003
|
ldr w0, =0xc4000003
|
||||||
|
|||||||
@@ -2,11 +2,14 @@
|
|||||||
|
|
||||||
.global mystart
|
.global mystart
|
||||||
mystart:
|
mystart:
|
||||||
mov x1, #0x26
|
mov x1, 0x26
|
||||||
movk x1, #2, lsl #16
|
movk x1, 2, lsl 16
|
||||||
str x1, [sp,#0]
|
ldr x2, =semihost_args
|
||||||
|
str x1, [x2, 0]
|
||||||
mov x0, #0
|
mov x0, #0
|
||||||
str x0, [sp,#8]
|
str x0, [x2, 8]
|
||||||
mov x1, sp
|
mov x1, x2
|
||||||
mov w0, #0x18
|
mov w0, 0x18
|
||||||
hlt 0xf000
|
hlt 0xf000
|
||||||
|
semihost_args:
|
||||||
|
.skip 16
|
||||||
|
|||||||
@@ -1,20 +1,21 @@
|
|||||||
.global main
|
.global main
|
||||||
main:
|
main:
|
||||||
/* 0x20026 == ADP_Stopped_ApplicationExit */
|
/* 0x20026 == ADP_Stopped_ApplicationExit */
|
||||||
mov x1, #0x26
|
mov x1, 0x26
|
||||||
movk x1, #2, lsl #16
|
movk x1, 2, lsl 16
|
||||||
str x1, [sp,#0]
|
str x1, [sp, 0]
|
||||||
|
|
||||||
/* Exit status code. Host QEMU process exits with that status. */
|
/* Exit status code. Host QEMU process exits with that status. */
|
||||||
mov x0, #0
|
mov x0, 0
|
||||||
str x0, [sp,#8]
|
str x0, [sp, 8]
|
||||||
|
|
||||||
/* x1 contains the address of parameter block.
|
/* x1 contains the address of parameter block.
|
||||||
* Any memory address could be used. */
|
* Any memory address could be used.
|
||||||
|
*/
|
||||||
mov x1, sp
|
mov x1, sp
|
||||||
|
|
||||||
/* SYS_EXIT */
|
/* SYS_EXIT */
|
||||||
mov w0, #0x18
|
mov w0, 0x18
|
||||||
|
|
||||||
/* Do the semihosting call on A64. */
|
/* Do the semihosting call on A64. */
|
||||||
hlt 0xf000
|
hlt 0xf000
|
||||||
|
|||||||
37
baremetal/arch/arm/multicore.S
Normal file
37
baremetal/arch/arm/multicore.S
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#arm-multicore */
|
||||||
|
|
||||||
|
.global main
|
||||||
|
main:
|
||||||
|
mov r0, #0
|
||||||
|
ldr r1, =spinlock
|
||||||
|
str r0, [r1]
|
||||||
|
/* Get CPU ID. */
|
||||||
|
mrc p15, 0, r1, c0, c0, 5
|
||||||
|
ands r1, r1, #3
|
||||||
|
beq cpu0_only
|
||||||
|
cpu1_only:
|
||||||
|
mov r0, #1
|
||||||
|
ldr r1, =spinlock
|
||||||
|
str r0, [r1]
|
||||||
|
dmb sy
|
||||||
|
sev
|
||||||
|
cpu1_sleep_forever:
|
||||||
|
wfe
|
||||||
|
b cpu1_sleep_forever
|
||||||
|
cpu0_only:
|
||||||
|
#if !defined(GEM5)
|
||||||
|
/* PCSI CPU_ON. */
|
||||||
|
ldr r0, =0x84000003
|
||||||
|
mov r1, #1
|
||||||
|
ldr r2, =cpu1_only
|
||||||
|
mov r3, #0
|
||||||
|
hvc 0
|
||||||
|
#endif
|
||||||
|
spinlock_start:
|
||||||
|
ldr r0, spinlock
|
||||||
|
wfe
|
||||||
|
cmp r0, #0
|
||||||
|
beq spinlock_start
|
||||||
|
bx lr
|
||||||
|
spinlock:
|
||||||
|
.skip 4
|
||||||
@@ -931,7 +931,7 @@ def setup(parser):
|
|||||||
common.qcow2_file = common.buildroot_qcow2_file
|
common.qcow2_file = common.buildroot_qcow2_file
|
||||||
|
|
||||||
# Image.
|
# Image.
|
||||||
if args.baremetal is None:
|
if common.baremetal is None:
|
||||||
if common.emulator == 'gem5':
|
if common.emulator == 'gem5':
|
||||||
common.image = common.vmlinux
|
common.image = common.vmlinux
|
||||||
common.disk_image = common.rootfs_raw_file
|
common.disk_image = common.rootfs_raw_file
|
||||||
@@ -940,11 +940,11 @@ def setup(parser):
|
|||||||
common.disk_image = common.qcow2_file
|
common.disk_image = common.qcow2_file
|
||||||
else:
|
else:
|
||||||
common.disk_image = common.gem5_fake_iso
|
common.disk_image = common.gem5_fake_iso
|
||||||
if args.baremetal == 'all':
|
if common.baremetal == 'all':
|
||||||
path = args.baremetal
|
path = common.baremetal
|
||||||
else:
|
else:
|
||||||
path = common.resolve_executable(
|
path = common.resolve_executable(
|
||||||
args.baremetal,
|
common.baremetal,
|
||||||
common.baremetal_src_dir,
|
common.baremetal_src_dir,
|
||||||
common.baremetal_build_dir,
|
common.baremetal_build_dir,
|
||||||
common.baremetal_build_ext,
|
common.baremetal_build_ext,
|
||||||
|
|||||||
20
run
20
run
@@ -128,7 +128,7 @@ def main(args, extra_args=None):
|
|||||||
raise Exception('Baremetal ELF file not found. Tried:\n' + '\n'.join(paths))
|
raise Exception('Baremetal ELF file not found. Tried:\n' + '\n'.join(paths))
|
||||||
cmd = debug_vm.copy()
|
cmd = debug_vm.copy()
|
||||||
if common.emulator == 'gem5':
|
if common.emulator == 'gem5':
|
||||||
if args.baremetal is None:
|
if common.baremetal is None:
|
||||||
if not os.path.exists(common.rootfs_raw_file):
|
if not os.path.exists(common.rootfs_raw_file):
|
||||||
if not os.path.exists(common.qcow2_file):
|
if not os.path.exists(common.qcow2_file):
|
||||||
raise_rootfs_not_found()
|
raise_rootfs_not_found()
|
||||||
@@ -139,7 +139,7 @@ def main(args, extra_args=None):
|
|||||||
common.write_string_to_file(common.gem5_fake_iso, 'a' * 512)
|
common.write_string_to_file(common.gem5_fake_iso, 'a' * 512)
|
||||||
if not os.path.exists(common.image):
|
if not os.path.exists(common.image):
|
||||||
# This is to run gem5 from a prebuilt download.
|
# This is to run gem5 from a prebuilt download.
|
||||||
if (not args.baremetal is None) or (not os.path.exists(common.linux_image)):
|
if (not common.baremetal is None) or (not os.path.exists(common.linux_image)):
|
||||||
raise_image_not_found()
|
raise_image_not_found()
|
||||||
common.run_cmd([os.path.join(common.extract_vmlinux, common.linux_image)])
|
common.run_cmd([os.path.join(common.extract_vmlinux, common.linux_image)])
|
||||||
os.makedirs(os.path.dirname(common.gem5_readfile), exist_ok=True)
|
os.makedirs(os.path.dirname(common.gem5_readfile), exist_ok=True)
|
||||||
@@ -194,15 +194,17 @@ def main(args, extra_args=None):
|
|||||||
'--dtb-filename', os.path.join(common.gem5_system_dir, 'arm', 'dt', 'armv{}_gem5_v1_{}cpu.dtb'.format(common.armv, args.cpus)), common.Newline,
|
'--dtb-filename', os.path.join(common.gem5_system_dir, 'arm', 'dt', 'armv{}_gem5_v1_{}cpu.dtb'.format(common.armv, args.cpus)), common.Newline,
|
||||||
'--machine-type', common.machine, common.Newline,
|
'--machine-type', common.machine, common.Newline,
|
||||||
])
|
])
|
||||||
if args.baremetal is None:
|
if common.baremetal is None:
|
||||||
cmd.extend([
|
cmd.extend([
|
||||||
'--param', 'system.panic_on_panic = True', common.Newline])
|
'--param', 'system.panic_on_panic = True', common.Newline])
|
||||||
else:
|
else:
|
||||||
cmd.extend(['--bare-metal', common.Newline])
|
cmd.extend([
|
||||||
|
'--bare-metal', common.Newline,
|
||||||
|
'--param', 'system.auto_reset_addr = True', common.Newline,
|
||||||
|
])
|
||||||
if args.arch == 'aarch64':
|
if args.arch == 'aarch64':
|
||||||
# https://stackoverflow.com/questions/43682311/uart-communication-in-gem5-with-arm-bare-metal/50983650#50983650
|
# https://stackoverflow.com/questions/43682311/uart-communication-in-gem5-with-arm-bare-metal/50983650#50983650
|
||||||
cmd.extend(['--param', 'system.highest_el_is_64 = True', common.Newline])
|
cmd.extend(['--param', 'system.highest_el_is_64 = True', common.Newline])
|
||||||
cmd.extend(['--param', 'system.auto_reset_addr = True', common.Newline])
|
|
||||||
elif args.gem5_script == 'biglittle':
|
elif args.gem5_script == 'biglittle':
|
||||||
if args.kvm:
|
if args.kvm:
|
||||||
cpu_type = 'kvm'
|
cpu_type = 'kvm'
|
||||||
@@ -319,7 +321,7 @@ def main(args, extra_args=None):
|
|||||||
root = 'root=/dev/vda'
|
root = 'root=/dev/vda'
|
||||||
rrid = ''
|
rrid = ''
|
||||||
snapshot = ',snapshot'
|
snapshot = ',snapshot'
|
||||||
if args.baremetal is None:
|
if common.baremetal is None:
|
||||||
if not os.path.exists(common.qcow2_file):
|
if not os.path.exists(common.qcow2_file):
|
||||||
if not os.path.exists(common.rootfs_raw_file):
|
if not os.path.exists(common.rootfs_raw_file):
|
||||||
raise_rootfs_not_found()
|
raise_rootfs_not_found()
|
||||||
@@ -364,7 +366,7 @@ def main(args, extra_args=None):
|
|||||||
] +
|
] +
|
||||||
virtio_gpu_pci
|
virtio_gpu_pci
|
||||||
)
|
)
|
||||||
if args.baremetal is None:
|
if common.baremetal is None:
|
||||||
cmd.extend(append)
|
cmd.extend(append)
|
||||||
if args.tmux is not None:
|
if args.tmux is not None:
|
||||||
tmux_args = '--run-id {}'.format(args.run_id)
|
tmux_args = '--run-id {}'.format(args.run_id)
|
||||||
@@ -381,8 +383,8 @@ def main(args, extra_args=None):
|
|||||||
args.linux_build_id,
|
args.linux_build_id,
|
||||||
args.run_id,
|
args.run_id,
|
||||||
)
|
)
|
||||||
if args.baremetal:
|
if common.baremetal:
|
||||||
tmux_args += " --baremetal '{}'".format(args.baremetal)
|
tmux_args += " --baremetal '{}'".format(common.baremetal)
|
||||||
if args.userland:
|
if args.userland:
|
||||||
tmux_args += " --userland '{}'".format(args.userland)
|
tmux_args += " --userland '{}'".format(args.userland)
|
||||||
tmux_args += ' {}'.format(args.tmux)
|
tmux_args += ' {}'.format(args.tmux)
|
||||||
|
|||||||
6
run-gdb
6
run-gdb
@@ -120,15 +120,15 @@ def main(args, extra_args=None):
|
|||||||
break_at = ['-ex', 'break {}'.format(args.break_at), common.Newline]
|
break_at = ['-ex', 'break {}'.format(args.break_at), common.Newline]
|
||||||
else:
|
else:
|
||||||
break_at = []
|
break_at = []
|
||||||
linux_full_system = (args.baremetal is None and args.userland is None)
|
linux_full_system = (common.baremetal is None and args.userland is None)
|
||||||
if args.userland:
|
if args.userland:
|
||||||
image = common.resolve_userland(args.userland)
|
image = common.resolve_userland(args.userland)
|
||||||
elif args.baremetal:
|
elif common.baremetal:
|
||||||
image = common.image
|
image = common.image
|
||||||
test_script_path = os.path.splitext(common.source_path)[0] + '.py'
|
test_script_path = os.path.splitext(common.source_path)[0] + '.py'
|
||||||
else:
|
else:
|
||||||
image = common.vmlinux
|
image = common.vmlinux
|
||||||
if args.baremetal:
|
if common.baremetal:
|
||||||
allowed_toolchains = ['crosstool-ng', 'buildroot', 'host']
|
allowed_toolchains = ['crosstool-ng', 'buildroot', 'host']
|
||||||
else:
|
else:
|
||||||
allowed_toolchains = ['buildroot', 'crosstool-ng', 'host']
|
allowed_toolchains = ['buildroot', 'crosstool-ng', 'host']
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ parser.add_argument(
|
|||||||
nargs='*'
|
nargs='*'
|
||||||
)
|
)
|
||||||
args = common.setup(parser)
|
args = common.setup(parser)
|
||||||
if args.baremetal is None:
|
if common.baremetal is None:
|
||||||
image = common.vmlinux
|
image = common.vmlinux
|
||||||
else:
|
else:
|
||||||
image = common.image
|
image = common.image
|
||||||
|
|||||||
Reference in New Issue
Block a user