diff --git a/index.html b/index.html
index 8972440..e2c8db1 100644
--- a/index.html
+++ b/index.html
@@ -692,6 +692,7 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
 <ul class="sectlevel3">
 <li><a href="#gem5-syscall-emulation-exit-status">10.7.1. gem5 syscall emulation exit status</a></li>
 <li><a href="#gem5-syscall-emulation-mode-syscall-tracing">10.7.2. gem5 syscall emulation mode syscall tracing</a></li>
+<li><a href="#gem5-syscall-emulation-multiple-executables">10.7.3. gem5 syscall emulation multiple executables</a></li>
 </ul>
 </li>
 <li><a href="#qemu-user-mode-quirks">10.8. QEMU user mode quirks</a>
@@ -1206,7 +1207,7 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
 <ul class="sectlevel3">
 <li><a href="#list-gem5-cpu-types">19.16.1. List gem5 CPU types</a>
 <ul class="sectlevel4">
-<li><a href="#gem5-basesimplecpu">19.16.1.1. gem5 BaseSimpleCPU</a></li>
+<li><a href="#gem5-basesimplecpu">19.16.1.1. gem5 <code>BaseSimpleCPU</code></a></li>
 <li><a href="#gem5-minorcpu">19.16.1.2. gem5 MinorCPU</a></li>
 <li><a href="#gem5-deriveo3cpu">19.16.1.3. gem5 DeriveO3CPU</a></li>
 </ul>
@@ -1216,76 +1217,89 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
 </li>
 <li><a href="#gem5-arm-platforms">19.17. gem5 ARM platforms</a></li>
 <li><a href="#gem5-upstream-images">19.18. gem5 upstream images</a></li>
-<li><a href="#gem5-internals">19.19. gem5 internals</a>
+<li><a href="#gem5-bootloaders">19.19. gem5 bootloaders</a></li>
+<li><a href="#gem5-internals">19.20. gem5 internals</a>
 <ul class="sectlevel3">
-<li><a href="#gem5-eclipse-configuration">19.19.1. gem5 Eclipse configuration</a></li>
-<li><a href="#gem5-python-c-interaction">19.19.2. gem5 Python C++ interaction</a></li>
-<li><a href="#gem5-entry-point">19.19.3. gem5 entry point</a></li>
-<li><a href="#gem5-event-queue">19.19.4. gem5 event queue</a>
+<li><a href="#gem5-eclipse-configuration">19.20.1. gem5 Eclipse configuration</a></li>
+<li><a href="#gem5-python-c-interaction">19.20.2. gem5 Python C++ interaction</a></li>
+<li><a href="#gem5-entry-point">19.20.3. gem5 entry point</a></li>
+<li><a href="#gem5-event-queue">19.20.4. gem5 event queue</a>
 <ul class="sectlevel4">
-<li><a href="#gem5-event-queue-atomicsimplecpu-syscall-emulation-freestanding-example-analysis">19.19.4.1. gem5 event queue AtomicSimpleCPU syscall emulation freestanding example analysis</a>
+<li><a href="#gem5-event-queue-atomicsimplecpu-syscall-emulation-freestanding-example-analysis">19.20.4.1. gem5 event queue AtomicSimpleCPU syscall emulation freestanding example analysis</a>
 <ul class="sectlevel5">
-<li><a href="#atomicsimplecpu-initial-events">19.19.4.1.1. AtomicSimpleCPU initial events</a></li>
-<li><a href="#atomicsimplecpu-tick-reschedule-timing">19.19.4.1.2. AtomicSimpleCPU tick reschedule timing</a></li>
-<li><a href="#atomicsimplecpu-memory-access">19.19.4.1.3. AtomicSimpleCPU memory access</a></li>
-<li><a href="#gem5-se-py-page-translation">19.19.4.1.4. gem5 se.py page translation</a></li>
+<li><a href="#atomicsimplecpu-initial-events">19.20.4.1.1. AtomicSimpleCPU initial events</a></li>
+<li><a href="#atomicsimplecpu-tick-reschedule-timing">19.20.4.1.2. AtomicSimpleCPU tick reschedule timing</a></li>
+<li><a href="#atomicsimplecpu-memory-access">19.20.4.1.3. AtomicSimpleCPU memory access</a></li>
+<li><a href="#gem5-se-py-page-translation">19.20.4.1.4. gem5 se.py page translation</a></li>
 </ul>
 </li>
-<li><a href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis">19.19.4.2. gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis</a>
+<li><a href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis">19.20.4.2. gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis</a>
 <ul class="sectlevel5">
-<li><a href="#timingsimplecpu-analysis-0">19.19.4.2.1. TimingSimpleCPU analysis #0</a></li>
-<li><a href="#timingsimplecpu-analysis-1">19.19.4.2.2. TimingSimpleCPU analysis #1</a></li>
-<li><a href="#timingsimplecpu-analysis-2">19.19.4.2.3. TimingSimpleCPU analysis #2</a></li>
-<li><a href="#timingsimplecpu-analysis-3-and-4">19.19.4.2.4. TimingSimpleCPU analysis #3 and #4</a></li>
-<li><a href="#timingsimplecpu-analysis-5">19.19.4.2.5. TimingSimpleCPU analysis #5</a></li>
-<li><a href="#timingsimplecpu-analysis-6">19.19.4.2.6. TimingSimpleCPU analysis #6</a></li>
-<li><a href="#timingsimplecpu-analysis-7">19.19.4.2.7. TimingSimpleCPU analysis #7</a></li>
-<li><a href="#timingsimplecpu-analysis-8">19.19.4.2.8. TimingSimpleCPU analysis #8</a></li>
-<li><a href="#timingsimplecpu-analysis-9">19.19.4.2.9. TimingSimpleCPU analysis #9</a></li>
-<li><a href="#timingsimplecpu-analysis-10">19.19.4.2.10. TimingSimpleCPU analysis #10</a></li>
-<li><a href="#timingsimplecpu-analysis-11">19.19.4.2.11. TimingSimpleCPU analysis #11</a></li>
-<li><a href="#timingsimplecpu-analysis-12">19.19.4.2.12. TimingSimpleCPU analysis #12</a></li>
-<li><a href="#timingsimplecpu-analysis-13">19.19.4.2.13. TimingSimpleCPU analysis #13</a></li>
-<li><a href="#timingsimplecpu-analysis-14">19.19.4.2.14. TimingSimpleCPU analysis #14</a></li>
-<li><a href="#timingsimplecpu-analysis-15">19.19.4.2.15. TimingSimpleCPU analysis #15</a></li>
-<li><a href="#timingsimplecpu-analysis-16">19.19.4.2.16. TimingSimpleCPU analysis #16</a></li>
-<li><a href="#timingsimplecpu-analysis-17">19.19.4.2.17. TimingSimpleCPU analysis #17</a></li>
-<li><a href="#timingsimplecpu-analysis-18">19.19.4.2.18. TimingSimpleCPU analysis #18</a></li>
-<li><a href="#timingsimplecpu-analysis-19">19.19.4.2.19. TimingSimpleCPU analysis #19</a></li>
-<li><a href="#timingsimplecpu-analysis-20">19.19.4.2.20. TimingSimpleCPU analysis #20</a></li>
-<li><a href="#timingsimplecpu-analysis-21">19.19.4.2.21. TimingSimpleCPU analysis #21</a></li>
-<li><a href="#timingsimplecpu-analysis-22">19.19.4.2.22. TimingSimpleCPU analysis #22</a></li>
-<li><a href="#timingsimplecpu-analysis-23">19.19.4.2.23. TimingSimpleCPU analysis #23</a></li>
-<li><a href="#timingsimplecpu-analysis-24">19.19.4.2.24. TimingSimpleCPU analysis #24</a></li>
-<li><a href="#timingsimplecpu-analysis-25">19.19.4.2.25. TimingSimpleCPU analysis #25</a></li>
-<li><a href="#timingsimplecpu-analysis-26">19.19.4.2.26. TimingSimpleCPU analysis #26</a></li>
-<li><a href="#timingsimplecpu-analysis-27">19.19.4.2.27. TimingSimpleCPU analysis #27</a></li>
-<li><a href="#timingsimplecpu-analysis-28">19.19.4.2.28. TimingSimpleCPU analysis #28</a></li>
-<li><a href="#timingsimplecpu-analysis-29">19.19.4.2.29. TimingSimpleCPU analysis #29</a></li>
-<li><a href="#timingsimplecpu-analysis-ldr-stall">19.19.4.2.30. TimingSimpleCPU analysis: LDR stall</a></li>
+<li><a href="#timingsimplecpu-analysis-0">19.20.4.2.1. TimingSimpleCPU analysis #0</a></li>
+<li><a href="#timingsimplecpu-analysis-1">19.20.4.2.2. TimingSimpleCPU analysis #1</a></li>
+<li><a href="#timingsimplecpu-analysis-2">19.20.4.2.3. TimingSimpleCPU analysis #2</a></li>
+<li><a href="#timingsimplecpu-analysis-3-and-4">19.20.4.2.4. TimingSimpleCPU analysis #3 and #4</a></li>
+<li><a href="#timingsimplecpu-analysis-5">19.20.4.2.5. TimingSimpleCPU analysis #5</a></li>
+<li><a href="#timingsimplecpu-analysis-6">19.20.4.2.6. TimingSimpleCPU analysis #6</a></li>
+<li><a href="#timingsimplecpu-analysis-7">19.20.4.2.7. TimingSimpleCPU analysis #7</a></li>
+<li><a href="#timingsimplecpu-analysis-8">19.20.4.2.8. TimingSimpleCPU analysis #8</a></li>
+<li><a href="#timingsimplecpu-analysis-9">19.20.4.2.9. TimingSimpleCPU analysis #9</a></li>
+<li><a href="#timingsimplecpu-analysis-10">19.20.4.2.10. TimingSimpleCPU analysis #10</a></li>
+<li><a href="#timingsimplecpu-analysis-11">19.20.4.2.11. TimingSimpleCPU analysis #11</a></li>
+<li><a href="#timingsimplecpu-analysis-12">19.20.4.2.12. TimingSimpleCPU analysis #12</a></li>
+<li><a href="#timingsimplecpu-analysis-13">19.20.4.2.13. TimingSimpleCPU analysis #13</a></li>
+<li><a href="#timingsimplecpu-analysis-14">19.20.4.2.14. TimingSimpleCPU analysis #14</a></li>
+<li><a href="#timingsimplecpu-analysis-15">19.20.4.2.15. TimingSimpleCPU analysis #15</a></li>
+<li><a href="#timingsimplecpu-analysis-16">19.20.4.2.16. TimingSimpleCPU analysis #16</a></li>
+<li><a href="#timingsimplecpu-analysis-17">19.20.4.2.17. TimingSimpleCPU analysis #17</a></li>
+<li><a href="#timingsimplecpu-analysis-18">19.20.4.2.18. TimingSimpleCPU analysis #18</a></li>
+<li><a href="#timingsimplecpu-analysis-19">19.20.4.2.19. TimingSimpleCPU analysis #19</a></li>
+<li><a href="#timingsimplecpu-analysis-20">19.20.4.2.20. TimingSimpleCPU analysis #20</a></li>
+<li><a href="#timingsimplecpu-analysis-21">19.20.4.2.21. TimingSimpleCPU analysis #21</a></li>
+<li><a href="#timingsimplecpu-analysis-22">19.20.4.2.22. TimingSimpleCPU analysis #22</a></li>
+<li><a href="#timingsimplecpu-analysis-23">19.20.4.2.23. TimingSimpleCPU analysis #23</a></li>
+<li><a href="#timingsimplecpu-analysis-24">19.20.4.2.24. TimingSimpleCPU analysis #24</a></li>
+<li><a href="#timingsimplecpu-analysis-25">19.20.4.2.25. TimingSimpleCPU analysis #25</a></li>
+<li><a href="#timingsimplecpu-analysis-26">19.20.4.2.26. TimingSimpleCPU analysis #26</a></li>
+<li><a href="#timingsimplecpu-analysis-27">19.20.4.2.27. TimingSimpleCPU analysis #27</a></li>
+<li><a href="#timingsimplecpu-analysis-28">19.20.4.2.28. TimingSimpleCPU analysis #28</a></li>
+<li><a href="#timingsimplecpu-analysis-29">19.20.4.2.29. TimingSimpleCPU analysis #29</a></li>
+<li><a href="#timingsimplecpu-analysis-ldr-stall">19.20.4.2.30. TimingSimpleCPU analysis: LDR stall</a></li>
 </ul>
 </li>
-<li><a href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches">19.19.4.3. gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis with caches</a></li>
-<li><a href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches-and-multiple-cpus">19.19.4.4. gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis with caches and multiple CPUs</a></li>
-<li><a href="#gem5-event-queue-minorcpu-syscall-emulation-freestanding-example-analysis">19.19.4.5. gem5 event queue MinorCPU syscall emulation freestanding example analysis</a></li>
-<li><a href="#gem5-event-queue-derivo3cpu-syscall-emulation-freestanding-example-analysis">19.19.4.6. gem5 event queue DerivO3CPU syscall emulation freestanding example analysis</a></li>
+<li><a href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches">19.20.4.3. gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis with caches</a></li>
+<li><a href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches-and-multiple-cpus">19.20.4.4. gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis with caches and multiple CPUs</a></li>
+<li><a href="#gem5-event-queue-minorcpu-syscall-emulation-freestanding-example-analysis">19.20.4.5. gem5 event queue MinorCPU syscall emulation freestanding example analysis</a></li>
+<li><a href="#gem5-event-queue-derivo3cpu-syscall-emulation-freestanding-example-analysis">19.20.4.6. gem5 event queue DerivO3CPU syscall emulation freestanding example analysis</a></li>
 </ul>
 </li>
-<li><a href="#gem5-code-generation">19.19.5. gem5 code generation</a>
+<li><a href="#gem5-threadcontext-vs-threadstate-vs-execcontext-vs-process">19.20.5. gem5 <code>ThreadContext</code> vs <code>ThreadState</code> vs <code>ExecContext</code> vs <code>Process</code></a>
 <ul class="sectlevel4">
-<li><a href="#gem5-the-isa">19.19.5.1. gem5 THE_ISA</a></li>
+<li><a href="#gem5-threadcontext">19.20.5.1. gem5 <code>ThreadContext</code></a>
+<ul class="sectlevel5">
+<li><a href="#gem5-simplethread">19.20.5.1.1. gem5 <code>SimpleThread</code></a></li>
+<li><a href="#gem5-o3threadcontext">19.20.5.1.2. gem5 <code>O3ThreadContext</code></a></li>
 </ul>
 </li>
-<li><a href="#gem5-build-system">19.19.6. gem5 build system</a>
+<li><a href="#gem5-threadstate">19.20.5.2. gem5 <code>ThreadState</code></a></li>
+<li><a href="#gem5-execcontext">19.20.5.3. gem5 <code>ExecContext</code></a></li>
+<li><a href="#gem5-process">19.20.5.4. gem5 <code>Process</code></a></li>
+</ul>
+</li>
+<li><a href="#gem5-code-generation">19.20.6. gem5 code generation</a>
 <ul class="sectlevel4">
-<li><a href="#gem5-build-broken-on-recent-compiler-version">19.19.6.1. gem5 build broken on recent compiler version</a></li>
-<li><a href="#gem5-polymorphic-isa-includes">19.19.6.2. gem5 polymorphic ISA includes</a></li>
-<li><a href="#why-are-all-c-symlinked-into-the-gem5-build-dir">19.19.6.3. Why are all C++ symlinked into the gem5 build dir?</a></li>
+<li><a href="#gem5-the-isa">19.20.6.1. gem5 THE_ISA</a></li>
+</ul>
+</li>
+<li><a href="#gem5-build-system">19.20.7. gem5 build system</a>
+<ul class="sectlevel4">
+<li><a href="#gem5-build-broken-on-recent-compiler-version">19.20.7.1. gem5 build broken on recent compiler version</a></li>
+<li><a href="#gem5-polymorphic-isa-includes">19.20.7.2. gem5 polymorphic ISA includes</a></li>
+<li><a href="#why-are-all-c-symlinked-into-the-gem5-build-dir">19.20.7.3. Why are all C++ symlinked into the gem5 build dir?</a></li>
 </ul>
 </li>
 </ul>
 </li>
-<li><a href="#gem5-bootloaders">19.20. gem5 bootloaders</a></li>
 </ul>
 </li>
 <li><a href="#buildroot">20. Buildroot</a>
@@ -1372,7 +1386,8 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
 <li><a href="#unistd-h">21.3.1. unistd.h</a></li>
 <li><a href="#fork">21.3.2. fork</a>
 <ul class="sectlevel4">
-<li><a href="#fork-bomb">21.3.2.1. Fork bomb</a></li>
+<li><a href="#getpid">21.3.2.1. getpid</a></li>
+<li><a href="#fork-bomb">21.3.2.2. Fork bomb</a></li>
 </ul>
 </li>
 <li><a href="#pthreads">21.3.3. pthreads</a>
@@ -1497,6 +1512,7 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
 <li><a href="#userland-mutex-implementation">22.7.1.1. Userland mutex implementation</a></li>
 </ul>
 </li>
+<li><a href="#getcpu">22.7.2. <code>getcpu</code> system call and the <code>sched_getaffinity</code> glibc wrapper</a></li>
 </ul>
 </li>
 <li><a href="#linux-calling-conventions">22.8. Linux calling conventions</a>
@@ -1952,18 +1968,19 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
 </li>
 <li><a href="#computer-architecture">32. Computer architecture</a>
 <ul class="sectlevel2">
-<li><a href="#cache-coherence">32.1. Cache coherence</a>
+<li><a href="#hardware-threads">32.1. Hardware threads</a></li>
+<li><a href="#cache-coherence">32.2. Cache coherence</a>
 <ul class="sectlevel3">
-<li><a href="#can-caches-snoop-data-from-other-caches">32.1.1. Can caches snoop data from other caches?</a></li>
-<li><a href="#vi-cache-coherence-protocol">32.1.2. VI cache coherence protocol</a></li>
-<li><a href="#msi-cache-coherence-protocol">32.1.3. MSI cache coherence protocol</a>
+<li><a href="#can-caches-snoop-data-from-other-caches">32.2.1. Can caches snoop data from other caches?</a></li>
+<li><a href="#vi-cache-coherence-protocol">32.2.2. VI cache coherence protocol</a></li>
+<li><a href="#msi-cache-coherence-protocol">32.2.3. MSI cache coherence protocol</a>
 <ul class="sectlevel4">
-<li><a href="#msi-cache-coherence-protocol-with-transient-states">32.1.3.1. MSI cache coherence protocol with transient states</a></li>
+<li><a href="#msi-cache-coherence-protocol-with-transient-states">32.2.3.1. MSI cache coherence protocol with transient states</a></li>
 </ul>
 </li>
-<li><a href="#mesi-cache-coherence-protocol">32.1.4. MESI cache coherence protocol</a></li>
-<li><a href="#mosi-cache-coherence-protocol">32.1.5. MOSI cache coherence protocol</a></li>
-<li><a href="#moesi-cache-coherence-protocol">32.1.6. MOESI cache coherence protocol</a></li>
+<li><a href="#mesi-cache-coherence-protocol">32.2.4. MESI cache coherence protocol</a></li>
+<li><a href="#mosi-cache-coherence-protocol">32.2.5. MOSI cache coherence protocol</a></li>
+<li><a href="#moesi-cache-coherence-protocol">32.2.6. MOESI cache coherence protocol</a></li>
 </ul>
 </li>
 </ul>
@@ -3245,7 +3262,7 @@ j = 0</pre>
 </div>
 </li>
 <li>
-<p>you won&#8217;t get the latest version of this repository. Our <a href="#travis">Travis</a> attempt to automate builds failed, and storing a release for every commit would likely make GitHub mad at us anyways.</p>
+<p>you won&#8217;t get the latest version of this repository. Our <a href="#travis">Travis</a> attempt to automate builds failed, and storing a release for every commit would likely make GitHub mad at us anyway.</p>
 </li>
 <li>
 <p><a href="#gem5">gem5</a> is not currently supported. The major blocking point is how to avoid distributing the kernel images twice: once for gem5 which uses <code>vmlinux</code>, and once for QEMU which uses <code>arch/*</code> images, see also:</p>
@@ -8045,6 +8062,84 @@ hello
 <p>so we see that two syscall lines were added for each syscall, showing the syscall inputs and exit status, just like a mini <code>strace</code>!</p>
 </div>
 </div>
+<div class="sect3">
+<h4 id="gem5-syscall-emulation-multiple-executables"><a class="anchor" href="#gem5-syscall-emulation-multiple-executables"></a><a class="link" href="#gem5-syscall-emulation-multiple-executables">10.7.3. gem5 syscall emulation multiple executables</a></h4>
+<div class="paragraph">
+<p>This is not currently nicely exposed in LKMC, but gem5 syscall emulation does allow you to run multiple executables "at once".</p>
+</div>
+<div class="paragraph">
+<p><code>--cmd</code> takes a semicolon separated list, so we could do:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>./run --arch aarch64 --emulator gem5 --userland userland/posix/getpid.c --cpus 2</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>and then <a href="#dry-run">hack the produced command</a> by replacing:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>  --cmd /home/ciro/bak/git/linux-kernel-module-cheat/out/userland/default/aarch64/posix/getpid.out \
+  --param 'system.cpu[0].workload[:].release = "5.4.3"' \</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>with:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>  --cmd '/home/ciro/bak/git/linux-kernel-module-cheat/out/userland/default/aarch64/posix/getpid.out;/home/ciro/bak/git/linux-kernel-module-cheat/out/userland/default/aarch64/posix/getpid.out' \
+  --param 'system.cpu[:].workload[:].release = "5.4.3"' \</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>The outcome of this is that we see two different <code>pid</code> messages printed to stdout:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>pid=101
+pid=100</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>since from <a href="#gem5-process">gem5 <code>Process</code></a> we can see that se.py sets up one different PID per executable starting at `100:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>    workloads = options.cmd.split(';')
+    idx = 0
+    for wrkld in workloads:
+        process = Process(pid = 100 + idx)</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>This is basically starts running one process per CPU much like if it had been forked.</p>
+</div>
+<div class="paragraph">
+<p>We can also see that these processes are running concurrently with <a href="#gem5-tracing">gem5 tracing</a> by hacking:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>  --debug-flags ExecAll \
+  --debug-file cout \</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>which starts with:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>      0: system.cpu1: A0 T0 : @__end__+274873647040    :   add   x0, sp, #0         : IntAlu :  D=0x0000007ffffefde0  flags=(IsInteger)
+      0: system.cpu0: A0 T0 : @__end__+274873647040    :   add   x0, sp, #0         : IntAlu :  D=0x0000007ffffefde0  flags=(IsInteger)
+    500: system.cpu0: A0 T0 : @__end__+274873647044    :   bl   &lt;__end__+274873649648&gt; : IntAlu :  D=0x0000004000001008  flags=(IsInteger|IsControl|IsDirectControl|IsUncondControl|IsCall)
+    500: system.cpu1: A0 T0 : @__end__+274873647044    :   bl   &lt;__end__+274873649648&gt; : IntAlu :  D=0x0000004000001008  flags=(IsInteger|IsControl|IsDirectControl|IsUncondControl|IsCall)</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>and therefore shows one instruction running on each CPU for each process at the same time.</p>
+</div>
+</div>
 </div>
 <div class="sect2">
 <h3 id="qemu-user-mode-quirks"><a class="anchor" href="#qemu-user-mode-quirks"></a><a class="link" href="#qemu-user-mode-quirks">10.8. QEMU user mode quirks</a></h3>
@@ -18042,7 +18137,7 @@ root</pre>
 </div>
 </div>
 <div class="paragraph">
-<p>Well, not that AOSP is that much better anyways.</p>
+<p>Well, not that AOSP is that much better anyway.</p>
 </div>
 </li>
 <li>
@@ -18328,7 +18423,10 @@ ps Haux | grep qemu | wc</pre>
 <p>gem5 user mode multithreading has been particularly flaky compared <a href="#qemu-user-mode-multithreading">to QEMU&#8217;s</a>, but work is being put into improving it.</p>
 </div>
 <div class="paragraph">
-<p>In gem5 syscall simulation, the <code>fork</code> syscall checks if there is a free CPU, and if there is a free one, the new threads runs on that CPU. Otherwise, the <code>fork</code> call, and therefore higher level interfaces to <code>fork</code> such as <code>pthread_create</code> also fail and return a failure return status in the guest.</p>
+<p>In gem5 syscall simulation, the <code>fork</code> syscall checks if there is a free CPU, and if there is a free one, the new threads runs on that CPU.</p>
+</div>
+<div class="paragraph">
+<p>Otherwise, the <code>fork</code> call, and therefore higher level interfaces to <code>fork</code> such as <code>pthread_create</code> also fail and return a failure return status in the guest.</p>
 </div>
 <div class="paragraph">
 <p>For example, if we use just one CPU for <a href="https://github.com/cirosantilli/linux-kernel-module-cheat/blob/master/userland/posix/pthread_self.c">userland/posix/pthread_self.c</a> which spawns one thread besides <code>main</code>:</p>
@@ -18377,6 +18475,34 @@ ps Haux | grep qemu | wc</pre>
 <div class="paragraph">
 <p>outputs <code>1</code> and <code>2</code> respectively.</p>
 </div>
+<div class="paragraph">
+<p>This can also be clearly by running <code>sched_getcpu</code>:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>./run \
+  --arch aarch64 \
+  --cli-args  4 \
+  --cpus 8 \
+  --emulator gem5 \
+  --userland userland/linux/sched_getcpu.c \
+;</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>which necessarily produces an output containing the CPU numbers from 1 to 4 and no higher:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>1
+3
+4
+2</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>TODO why does the <code>2</code> come at the end here? Would be good to do a detailed assembly run analysis.</p>
+</div>
 </div>
 <div class="sect5">
 <h6 id="gem5-se-py-user-mode-with-2-or-more-pthreads-fails-with-because-simulate-limit-reached"><a class="anchor" href="#gem5-se-py-user-mode-with-2-or-more-pthreads-fails-with-because-simulate-limit-reached"></a><a class="link" href="#gem5-se-py-user-mode-with-2-or-more-pthreads-fails-with-because-simulate-limit-reached">19.2.2.1.3. gem5 se.py user mode with 2 or more pthreads fails with because simulate() limit reached</a></h6>
@@ -19305,7 +19431,7 @@ cat "$(./getvar --arch aarch64 --emulator gem5 trace_txt_file)"</pre>
 </ul>
 </div>
 <div class="paragraph">
-<p>But let&#8217;s give it a try anyways with <a href="https://github.com/cirosantilli/linux-kernel-module-cheat/blob/master/userland/freestanding/gem5_checkpoint.S">userland/freestanding/gem5_checkpoint.S</a> which was mentioned at <a href="#gem5-checkpoint-userland-minimal-example">gem5 checkpoint userland minimal example</a></p>
+<p>But let&#8217;s give it a try anyway with <a href="https://github.com/cirosantilli/linux-kernel-module-cheat/blob/master/userland/freestanding/gem5_checkpoint.S">userland/freestanding/gem5_checkpoint.S</a> which was mentioned at <a href="#gem5-checkpoint-userland-minimal-example">gem5 checkpoint userland minimal example</a></p>
 </div>
 <div class="literalblock">
 <div class="content">
@@ -19505,7 +19631,7 @@ FullO3CPU: Ticking main, FullO3CPU.
 <li>
 <p>when you want to call the instructions from inside interest points of your benchmark. Otherwise you add the syscall overhead to the benchmark, which is more intrusive and might affect results.</p>
 <div class="paragraph">
-<p>Why not just hardcode some <a href="#m5ops-instructions">m5ops instructions</a> as in our example instead, since you are going to modify the source of the benchmark anyways?</p>
+<p>Why not just hardcode some <a href="#m5ops-instructions">m5ops instructions</a> as in our example instead, since you are going to modify the source of the benchmark anyway?</p>
 </div>
 </li>
 </ul>
@@ -21087,7 +21213,7 @@ cat "$(./getvar --arch aarch64 --emulator gem5 trace_txt_file)"</pre>
 <p>Crossbar or <code>XBar</code> in the code, is the default <a href="#cache-coherence">CPU interconnect</a> that gets used by <code>fs.py</code> if <a href="#gem5-ruby-build"><code>--ruby</code></a> is not given.</p>
 </div>
 <div class="paragraph">
-<p>One simple example of its operation can be seen at: <a href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis">Section 19.19.4.2, &#8220;gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis&#8221;</a>.</p>
+<p>One simple example of its operation can be seen at: <a href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis">Section 19.20.4.2, &#8220;gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis&#8221;</a>.</p>
 </div>
 <div class="paragraph">
 <p>See also: <a href="https://en.wikipedia.org/wiki/Crossbar_switch" class="bare">https://en.wikipedia.org/wiki/Crossbar_switch</a></p>
@@ -21174,10 +21300,53 @@ class SystemXBar(CoherentXBar):</pre>
 <div class="paragraph">
 <p>Both of those can be checked with <code>git log</code> and <code>git blame</code>.</p>
 </div>
+<div class="paragraph">
+<p>All CPU types inherit from the <code>BaseCPU</code> class, and looking at the class hierarchy in <a href="#gem5-eclipse-configuration">Eclipse</a> gives a good overview of what we have:</p>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p><code>BaseCPU</code></p>
+<div class="ulist">
+<ul>
+<li>
+<p><code>BaseKvmCPU</code></p>
+</li>
+<li>
+<p><code>BaseSimpleCPU</code></p>
+<div class="ulist">
+<ul>
+<li>
+<p><code>AtomicSimpleCPU</code></p>
+</li>
+<li>
+<p><code>TimingSimpleCPU</code></p>
+</li>
+</ul>
+</div>
+</li>
+<li>
+<p><code>MinorO3CPU</code></p>
+</li>
+<li>
+<p><code>BaseO3CPU</code></p>
+<div class="ulist">
+<ul>
+<li>
+<p><code>FullO3CPU</code></p>
+</li>
+</ul>
+</div>
+</li>
+</ul>
+</div>
+</li>
+</ul>
+</div>
 <div class="sect3">
 <h4 id="list-gem5-cpu-types"><a class="anchor" href="#list-gem5-cpu-types"></a><a class="link" href="#list-gem5-cpu-types">19.16.1. List gem5 CPU types</a></h4>
 <div class="sect4">
-<h5 id="gem5-basesimplecpu"><a class="anchor" href="#gem5-basesimplecpu"></a><a class="link" href="#gem5-basesimplecpu">19.16.1.1. gem5 BaseSimpleCPU</a></h5>
+<h5 id="gem5-basesimplecpu"><a class="anchor" href="#gem5-basesimplecpu"></a><a class="link" href="#gem5-basesimplecpu">19.16.1.1. gem5 <code>BaseSimpleCPU</code></a></h5>
 <div class="paragraph">
 <p>Simple abstract CPU without a pipeline.</p>
 </div>
@@ -21405,7 +21574,34 @@ cd ..
 </div>
 </div>
 <div class="sect2">
-<h3 id="gem5-internals"><a class="anchor" href="#gem5-internals"></a><a class="link" href="#gem5-internals">19.19. gem5 internals</a></h3>
+<h3 id="gem5-bootloaders"><a class="anchor" href="#gem5-bootloaders"></a><a class="link" href="#gem5-bootloaders">19.19. gem5 bootloaders</a></h3>
+<div class="paragraph">
+<p>Certain ISAs like ARM have bootloaders that are automatically run before the main image to setup basic system state.</p>
+</div>
+<div class="paragraph">
+<p>We cross compile those bootloaders from source automatically during <code>./build-gem5</code>.</p>
+</div>
+<div class="paragraph">
+<p>As of gem5 bcf041f257623e5c9e77d35b7531bae59edc0423, the source code of the bootloaderes can be found under:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>system/arm/</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>and their selection can be seen under: <code>src/dev/arm/RealView.py</code>, e.g.:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>    def setupBootLoader(self, cur_sys, loc):
+        if not cur_sys.boot_loader:
+            cur_sys.boot_loader = [ loc('boot_emm.arm64'), loc('boot_emm.arm') ]</pre>
+</div>
+</div>
+</div>
+<div class="sect2">
+<h3 id="gem5-internals"><a class="anchor" href="#gem5-internals"></a><a class="link" href="#gem5-internals">19.20. gem5 internals</a></h3>
 <div class="paragraph">
 <p>Internals under other sections:</p>
 </div>
@@ -21423,7 +21619,7 @@ cd ..
 </ul>
 </div>
 <div class="sect3">
-<h4 id="gem5-eclipse-configuration"><a class="anchor" href="#gem5-eclipse-configuration"></a><a class="link" href="#gem5-eclipse-configuration">19.19.1. gem5 Eclipse configuration</a></h4>
+<h4 id="gem5-eclipse-configuration"><a class="anchor" href="#gem5-eclipse-configuration"></a><a class="link" href="#gem5-eclipse-configuration">19.20.1. gem5 Eclipse configuration</a></h4>
 <div class="paragraph">
 <p>In order to develop complex C++ software such as gem5, a good IDE setup is fundamental.</p>
 </div>
@@ -21431,6 +21627,9 @@ cd ..
 <p>The best setup I&#8217;ve reached is with Eclipse. It is not perfect, and there is a learning curve, but is worth it.</p>
 </div>
 <div class="paragraph">
+<p>Notably, it is very hard to get perfect due to: <a href="#why-are-all-c-symlinked-into-the-gem5-build-dir">Why are all C++ symlinked into the gem5 build dir?</a>.</p>
+</div>
+<div class="paragraph">
 <p>I recommend the following settings, tested in Eclipse 2019.09, Ubuntu 18.04:</p>
 </div>
 <div class="ulist">
@@ -21442,6 +21641,13 @@ cd ..
 <p>use spaces instead of tabs: Window, Preferences, Code Style, C/C++, Formatter, New, Edit, Tab Policy, Spaces Only</p>
 </li>
 <li>
+<p>either</p>
+<div class="ulist">
+<ul>
+<li>
+<p>create the project in the gem5 build directory! Files are moved around there and symlinked, and this gives the best chances of success</p>
+</li>
+<li>
 <p>add to the include search path:</p>
 <div class="ulist">
 <ul>
@@ -21456,12 +21662,15 @@ cd ..
 </li>
 </ul>
 </div>
+</li>
+</ul>
+</div>
 <div class="paragraph">
 <p>To run and GDB step debug the executable, just copy the full command line from the output <code>./run</code>, and configure it into Eclipse.</p>
 </div>
 </div>
 <div class="sect3">
-<h4 id="gem5-python-c-interaction"><a class="anchor" href="#gem5-python-c-interaction"></a><a class="link" href="#gem5-python-c-interaction">19.19.2. gem5 Python C++ interaction</a></h4>
+<h4 id="gem5-python-c-interaction"><a class="anchor" href="#gem5-python-c-interaction"></a><a class="link" href="#gem5-python-c-interaction">19.20.2. gem5 Python C++ interaction</a></h4>
 <div class="paragraph">
 <p>The interaction uses the Python C extension interface <a href="https://docs.python.org/2/extending/extending.html" class="bare">https://docs.python.org/2/extending/extending.html</a> interface through the <a href="#pybind11">pybind11</a> helper library: <a href="https://github.com/pybind/pybind11" class="bare">https://github.com/pybind/pybind11</a></p>
 </div>
@@ -21633,7 +21842,7 @@ static EmbeddedPyBind embed_obj("BadDevice", module_init, "BasicPioDevice");</pr
 </div>
 </div>
 <div class="sect3">
-<h4 id="gem5-entry-point"><a class="anchor" href="#gem5-entry-point"></a><a class="link" href="#gem5-entry-point">19.19.3. gem5 entry point</a></h4>
+<h4 id="gem5-entry-point"><a class="anchor" href="#gem5-entry-point"></a><a class="link" href="#gem5-entry-point">19.20.3. gem5 entry point</a></h4>
 <div class="paragraph">
 <p>The main is at: <code>src/sim/main.cc</code>. It calls:</p>
 </div>
@@ -21722,7 +21931,7 @@ exec filecode in scope</pre>
 </div>
 </div>
 <div class="sect3">
-<h4 id="gem5-event-queue"><a class="anchor" href="#gem5-event-queue"></a><a class="link" href="#gem5-event-queue">19.19.4. gem5 event queue</a></h4>
+<h4 id="gem5-event-queue"><a class="anchor" href="#gem5-event-queue"></a><a class="link" href="#gem5-event-queue">19.20.4. gem5 event queue</a></h4>
 <div class="paragraph">
 <p>gem5 is an event based simulator, and as such the event queue is of of the crucial elements in the system.</p>
 </div>
@@ -21825,7 +22034,7 @@ b EventFunctionWrapper::process</pre>
 <p>Then, once we had that, the most perfect thing ever would be to make the full event graph containing which events schedule which events!</p>
 </div>
 <div class="sect4">
-<h5 id="gem5-event-queue-atomicsimplecpu-syscall-emulation-freestanding-example-analysis"><a class="anchor" href="#gem5-event-queue-atomicsimplecpu-syscall-emulation-freestanding-example-analysis"></a><a class="link" href="#gem5-event-queue-atomicsimplecpu-syscall-emulation-freestanding-example-analysis">19.19.4.1. gem5 event queue AtomicSimpleCPU syscall emulation freestanding example analysis</a></h5>
+<h5 id="gem5-event-queue-atomicsimplecpu-syscall-emulation-freestanding-example-analysis"><a class="anchor" href="#gem5-event-queue-atomicsimplecpu-syscall-emulation-freestanding-example-analysis"></a><a class="link" href="#gem5-event-queue-atomicsimplecpu-syscall-emulation-freestanding-example-analysis">19.20.4.1. gem5 event queue AtomicSimpleCPU syscall emulation freestanding example analysis</a></h5>
 <div class="paragraph">
 <p>Let&#8217;s now analyze every single event on a minimal <a href="#gem5-syscall-emulation-mode">gem5 syscall emulation mode</a> in the <a href="#gem5-cpu-types">simplest CPU that we have</a>:</p>
 </div>
@@ -21961,7 +22170,7 @@ AtomicSimpleCPU::tick() at atomic.cc:757 0x55555907834c</pre>
 <p>Tested in gem5 12c917de54145d2d50260035ba7fa614e25317a3.</p>
 </div>
 <div class="sect5">
-<h6 id="atomicsimplecpu-initial-events"><a class="anchor" href="#atomicsimplecpu-initial-events"></a><a class="link" href="#atomicsimplecpu-initial-events">19.19.4.1.1. AtomicSimpleCPU initial events</a></h6>
+<h6 id="atomicsimplecpu-initial-events"><a class="anchor" href="#atomicsimplecpu-initial-events"></a><a class="link" href="#atomicsimplecpu-initial-events">19.20.4.1.1. AtomicSimpleCPU initial events</a></h6>
 <div class="paragraph">
 <p>Let&#8217;s have a closer look at the initial magically scheduled events of the simulation.</p>
 </div>
@@ -22180,7 +22389,7 @@ simulate() at simulate.cc:104 0x555559476d6f</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="atomicsimplecpu-tick-reschedule-timing"><a class="anchor" href="#atomicsimplecpu-tick-reschedule-timing"></a><a class="link" href="#atomicsimplecpu-tick-reschedule-timing">19.19.4.1.2. AtomicSimpleCPU tick reschedule timing</a></h6>
+<h6 id="atomicsimplecpu-tick-reschedule-timing"><a class="anchor" href="#atomicsimplecpu-tick-reschedule-timing"></a><a class="link" href="#atomicsimplecpu-tick-reschedule-timing">19.20.4.1.2. AtomicSimpleCPU tick reschedule timing</a></h6>
 <div class="paragraph">
 <p>Inside <code>AtomicSimpleCPU::tick()</code> we saw previously that the reschedule happens at:</p>
 </div>
@@ -22220,7 +22429,7 @@ clock=500</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="atomicsimplecpu-memory-access"><a class="anchor" href="#atomicsimplecpu-memory-access"></a><a class="link" href="#atomicsimplecpu-memory-access">19.19.4.1.3. AtomicSimpleCPU memory access</a></h6>
+<h6 id="atomicsimplecpu-memory-access"><a class="anchor" href="#atomicsimplecpu-memory-access"></a><a class="link" href="#atomicsimplecpu-memory-access">19.20.4.1.3. AtomicSimpleCPU memory access</a></h6>
 <div class="paragraph">
 <p>It will be interesting to see how <code>AtomicSimpleCPU</code> makes memory access on GDB and to compare that with <a href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis"><code>TimingSimpleCPU</code></a>.</p>
 </div>
@@ -22256,7 +22465,7 @@ clock=500</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="gem5-se-py-page-translation"><a class="anchor" href="#gem5-se-py-page-translation"></a><a class="link" href="#gem5-se-py-page-translation">19.19.4.1.4. gem5 se.py page translation</a></h6>
+<h6 id="gem5-se-py-page-translation"><a class="anchor" href="#gem5-se-py-page-translation"></a><a class="link" href="#gem5-se-py-page-translation">19.20.4.1.4. gem5 se.py page translation</a></h6>
 <div class="paragraph">
 <p>Happens on <code>EmulationPageTable</code>, and seems to happen atomically without making any extra memory requests.</p>
 </div>
@@ -22327,7 +22536,7 @@ Exiting @ tick 3500 because exiting with last active thread context
 </div>
 </div>
 <div class="sect4">
-<h5 id="gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis"><a class="anchor" href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis"></a><a class="link" href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis">19.19.4.2. gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis</a></h5>
+<h5 id="gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis"><a class="anchor" href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis"></a><a class="link" href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis">19.20.4.2. gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis</a></h5>
 <div class="paragraph">
 <p>Now, let&#8217;s move on to <code>TimingSimpleCPU</code>, which is just like <code>AtomicSimpleCPU</code> internally, but now the memory requests don&#8217;t actually finish immediately: <a href="#gem5-cpu-types">gem5 CPU types</a>!</p>
 </div>
@@ -22609,7 +22818,7 @@ info: Entering event queue @ 0.  Starting simulation...
 </ul>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-0"><a class="anchor" href="#timingsimplecpu-analysis-0"></a><a class="link" href="#timingsimplecpu-analysis-0">19.19.4.2.1. TimingSimpleCPU analysis #0</a></h6>
+<h6 id="timingsimplecpu-analysis-0"><a class="anchor" href="#timingsimplecpu-analysis-0"></a><a class="link" href="#timingsimplecpu-analysis-0">19.20.4.2.1. TimingSimpleCPU analysis #0</a></h6>
 <div class="paragraph">
 <p>Schedules <code>TimingSimpleCPU::fetch</code> through:</p>
 </div>
@@ -22654,7 +22863,7 @@ ArmLinuxProcess64::initState</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-1"><a class="anchor" href="#timingsimplecpu-analysis-1"></a><a class="link" href="#timingsimplecpu-analysis-1">19.19.4.2.2. TimingSimpleCPU analysis #1</a></h6>
+<h6 id="timingsimplecpu-analysis-1"><a class="anchor" href="#timingsimplecpu-analysis-1"></a><a class="link" href="#timingsimplecpu-analysis-1">19.20.4.2.2. TimingSimpleCPU analysis #1</a></h6>
 <div class="paragraph">
 <p>Backtrace:</p>
 </div>
@@ -22785,7 +22994,7 @@ DRAMCtrl::Rank::startup(Tick ref_tick)
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-2"><a class="anchor" href="#timingsimplecpu-analysis-2"></a><a class="link" href="#timingsimplecpu-analysis-2">19.19.4.2.3. TimingSimpleCPU analysis #2</a></h6>
+<h6 id="timingsimplecpu-analysis-2"><a class="anchor" href="#timingsimplecpu-analysis-2"></a><a class="link" href="#timingsimplecpu-analysis-2">19.20.4.2.3. TimingSimpleCPU analysis #2</a></h6>
 <div class="paragraph">
 <p>This is just the startup of the second rank, see: <a href="#timingsimplecpu-analysis-1">TimingSimpleCPU analysis #1</a>.</p>
 </div>
@@ -22818,13 +23027,13 @@ DRAMCtrl::Rank::startup(Tick ref_tick)
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-3-and-4"><a class="anchor" href="#timingsimplecpu-analysis-3-and-4"></a><a class="link" href="#timingsimplecpu-analysis-3-and-4">19.19.4.2.4. TimingSimpleCPU analysis #3 and #4</a></h6>
+<h6 id="timingsimplecpu-analysis-3-and-4"><a class="anchor" href="#timingsimplecpu-analysis-3-and-4"></a><a class="link" href="#timingsimplecpu-analysis-3-and-4">19.20.4.2.4. TimingSimpleCPU analysis #3 and #4</a></h6>
 <div class="paragraph">
 <p>From the timing we know what that one is: the end of time exit event, like for <code>AtomicSimpleCPU</code>.</p>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-5"><a class="anchor" href="#timingsimplecpu-analysis-5"></a><a class="link" href="#timingsimplecpu-analysis-5">19.19.4.2.5. TimingSimpleCPU analysis #5</a></h6>
+<h6 id="timingsimplecpu-analysis-5"><a class="anchor" href="#timingsimplecpu-analysis-5"></a><a class="link" href="#timingsimplecpu-analysis-5">19.20.4.2.5. TimingSimpleCPU analysis #5</a></h6>
 <div class="paragraph">
 <p>Executes <code>TimingSimpleCPU::fetch()</code>.</p>
 </div>
@@ -22889,7 +23098,7 @@ DRAMCtrl::Rank::startup(Tick ref_tick)
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-6"><a class="anchor" href="#timingsimplecpu-analysis-6"></a><a class="link" href="#timingsimplecpu-analysis-6">19.19.4.2.6. TimingSimpleCPU analysis #6</a></h6>
+<h6 id="timingsimplecpu-analysis-6"><a class="anchor" href="#timingsimplecpu-analysis-6"></a><a class="link" href="#timingsimplecpu-analysis-6">19.20.4.2.6. TimingSimpleCPU analysis #6</a></h6>
 <div class="paragraph">
 <p>Schedules <code>DRAMCtrl::processNextReqEvent</code> through:</p>
 </div>
@@ -23026,7 +23235,7 @@ TimingSimpleCPU::fetch</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-7"><a class="anchor" href="#timingsimplecpu-analysis-7"></a><a class="link" href="#timingsimplecpu-analysis-7">19.19.4.2.7. TimingSimpleCPU analysis #7</a></h6>
+<h6 id="timingsimplecpu-analysis-7"><a class="anchor" href="#timingsimplecpu-analysis-7"></a><a class="link" href="#timingsimplecpu-analysis-7">19.20.4.2.7. TimingSimpleCPU analysis #7</a></h6>
 <div class="paragraph">
 <p>Schedules <code>BaseXBar::Layer::releaseLayer</code> through:</p>
 </div>
@@ -23052,13 +23261,13 @@ TimingSimpleCPU::fetch</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-8"><a class="anchor" href="#timingsimplecpu-analysis-8"></a><a class="link" href="#timingsimplecpu-analysis-8">19.19.4.2.8. TimingSimpleCPU analysis #8</a></h6>
+<h6 id="timingsimplecpu-analysis-8"><a class="anchor" href="#timingsimplecpu-analysis-8"></a><a class="link" href="#timingsimplecpu-analysis-8">19.20.4.2.8. TimingSimpleCPU analysis #8</a></h6>
 <div class="paragraph">
 <p>Executes <code>DRAMCtrl::processNextReqEvent</code>.</p>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-9"><a class="anchor" href="#timingsimplecpu-analysis-9"></a><a class="link" href="#timingsimplecpu-analysis-9">19.19.4.2.9. TimingSimpleCPU analysis #9</a></h6>
+<h6 id="timingsimplecpu-analysis-9"><a class="anchor" href="#timingsimplecpu-analysis-9"></a><a class="link" href="#timingsimplecpu-analysis-9">19.20.4.2.9. TimingSimpleCPU analysis #9</a></h6>
 <div class="paragraph">
 <p>Schedules <code>DRAMCtrl::Rank::processActivateEvent</code> through:</p>
 </div>
@@ -23072,7 +23281,7 @@ DRAMCtrl::processNextReqEvent</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-10"><a class="anchor" href="#timingsimplecpu-analysis-10"></a><a class="link" href="#timingsimplecpu-analysis-10">19.19.4.2.10. TimingSimpleCPU analysis #10</a></h6>
+<h6 id="timingsimplecpu-analysis-10"><a class="anchor" href="#timingsimplecpu-analysis-10"></a><a class="link" href="#timingsimplecpu-analysis-10">19.20.4.2.10. TimingSimpleCPU analysis #10</a></h6>
 <div class="paragraph">
 <p>Schedules <code>DRAMCtrl::processRespondEvent</code> through:</p>
 </div>
@@ -23084,7 +23293,7 @@ DRAMCtrl::processNextReqEvent</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-11"><a class="anchor" href="#timingsimplecpu-analysis-11"></a><a class="link" href="#timingsimplecpu-analysis-11">19.19.4.2.11. TimingSimpleCPU analysis #11</a></h6>
+<h6 id="timingsimplecpu-analysis-11"><a class="anchor" href="#timingsimplecpu-analysis-11"></a><a class="link" href="#timingsimplecpu-analysis-11">19.20.4.2.11. TimingSimpleCPU analysis #11</a></h6>
 <div class="paragraph">
 <p>Schedules <code>DRAMCtrl::processNextReqEvent</code> through:</p>
 </div>
@@ -23096,7 +23305,7 @@ DRAMCtrl::processNextReqEvent</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-12"><a class="anchor" href="#timingsimplecpu-analysis-12"></a><a class="link" href="#timingsimplecpu-analysis-12">19.19.4.2.12. TimingSimpleCPU analysis #12</a></h6>
+<h6 id="timingsimplecpu-analysis-12"><a class="anchor" href="#timingsimplecpu-analysis-12"></a><a class="link" href="#timingsimplecpu-analysis-12">19.20.4.2.12. TimingSimpleCPU analysis #12</a></h6>
 <div class="paragraph">
 <p>Executes <code>DRAMCtrl::Rank::processActivateEvent</code>.</p>
 </div>
@@ -23105,7 +23314,7 @@ DRAMCtrl::processNextReqEvent</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-13"><a class="anchor" href="#timingsimplecpu-analysis-13"></a><a class="link" href="#timingsimplecpu-analysis-13">19.19.4.2.13. TimingSimpleCPU analysis #13</a></h6>
+<h6 id="timingsimplecpu-analysis-13"><a class="anchor" href="#timingsimplecpu-analysis-13"></a><a class="link" href="#timingsimplecpu-analysis-13">19.20.4.2.13. TimingSimpleCPU analysis #13</a></h6>
 <div class="paragraph">
 <p>Schedules <code>DRAMCtrl::Rank::processPowerEvent</code> through:</p>
 </div>
@@ -23118,7 +23327,7 @@ DRAMCtrl::Rank::processActivateEvent</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-14"><a class="anchor" href="#timingsimplecpu-analysis-14"></a><a class="link" href="#timingsimplecpu-analysis-14">19.19.4.2.14. TimingSimpleCPU analysis #14</a></h6>
+<h6 id="timingsimplecpu-analysis-14"><a class="anchor" href="#timingsimplecpu-analysis-14"></a><a class="link" href="#timingsimplecpu-analysis-14">19.20.4.2.14. TimingSimpleCPU analysis #14</a></h6>
 <div class="paragraph">
 <p>Executes <code>DRAMCtrl::Rank::processPowerEvent</code>.</p>
 </div>
@@ -23127,25 +23336,25 @@ DRAMCtrl::Rank::processActivateEvent</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-15"><a class="anchor" href="#timingsimplecpu-analysis-15"></a><a class="link" href="#timingsimplecpu-analysis-15">19.19.4.2.15. TimingSimpleCPU analysis #15</a></h6>
+<h6 id="timingsimplecpu-analysis-15"><a class="anchor" href="#timingsimplecpu-analysis-15"></a><a class="link" href="#timingsimplecpu-analysis-15">19.20.4.2.15. TimingSimpleCPU analysis #15</a></h6>
 <div class="paragraph">
 <p>Executes <code>BaseXBar::Layer&lt;SrcType, DstType&gt;::releaseLayer</code>.</p>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-16"><a class="anchor" href="#timingsimplecpu-analysis-16"></a><a class="link" href="#timingsimplecpu-analysis-16">19.19.4.2.16. TimingSimpleCPU analysis #16</a></h6>
+<h6 id="timingsimplecpu-analysis-16"><a class="anchor" href="#timingsimplecpu-analysis-16"></a><a class="link" href="#timingsimplecpu-analysis-16">19.20.4.2.16. TimingSimpleCPU analysis #16</a></h6>
 <div class="paragraph">
 <p>Executes <code>DRAMCtrl::processNextReqEvent()</code>.</p>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-17"><a class="anchor" href="#timingsimplecpu-analysis-17"></a><a class="link" href="#timingsimplecpu-analysis-17">19.19.4.2.17. TimingSimpleCPU analysis #17</a></h6>
+<h6 id="timingsimplecpu-analysis-17"><a class="anchor" href="#timingsimplecpu-analysis-17"></a><a class="link" href="#timingsimplecpu-analysis-17">19.20.4.2.17. TimingSimpleCPU analysis #17</a></h6>
 <div class="paragraph">
 <p>Executes <code>DRAMCtrl::processRespondEvent()</code>.</p>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-18"><a class="anchor" href="#timingsimplecpu-analysis-18"></a><a class="link" href="#timingsimplecpu-analysis-18">19.19.4.2.18. TimingSimpleCPU analysis #18</a></h6>
+<h6 id="timingsimplecpu-analysis-18"><a class="anchor" href="#timingsimplecpu-analysis-18"></a><a class="link" href="#timingsimplecpu-analysis-18">19.20.4.2.18. TimingSimpleCPU analysis #18</a></h6>
 <div class="paragraph">
 <p>Schedules <code>PacketQueue::processSendEvent()</code> through:</p>
 </div>
@@ -23160,13 +23369,13 @@ DRAMCtrl::processRespondEvent</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-19"><a class="anchor" href="#timingsimplecpu-analysis-19"></a><a class="link" href="#timingsimplecpu-analysis-19">19.19.4.2.19. TimingSimpleCPU analysis #19</a></h6>
+<h6 id="timingsimplecpu-analysis-19"><a class="anchor" href="#timingsimplecpu-analysis-19"></a><a class="link" href="#timingsimplecpu-analysis-19">19.20.4.2.19. TimingSimpleCPU analysis #19</a></h6>
 <div class="paragraph">
 <p>Executes <code>PacketQueue::processSendEvent()</code>.</p>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-20"><a class="anchor" href="#timingsimplecpu-analysis-20"></a><a class="link" href="#timingsimplecpu-analysis-20">19.19.4.2.20. TimingSimpleCPU analysis #20</a></h6>
+<h6 id="timingsimplecpu-analysis-20"><a class="anchor" href="#timingsimplecpu-analysis-20"></a><a class="link" href="#timingsimplecpu-analysis-20">19.20.4.2.20. TimingSimpleCPU analysis #20</a></h6>
 <div class="paragraph">
 <p>Schedules <code>PacketQueue::processSendEvent</code> through:</p>
 </div>
@@ -23190,7 +23399,7 @@ PacketQueue::processSendEvent</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-21"><a class="anchor" href="#timingsimplecpu-analysis-21"></a><a class="link" href="#timingsimplecpu-analysis-21">19.19.4.2.21. TimingSimpleCPU analysis #21</a></h6>
+<h6 id="timingsimplecpu-analysis-21"><a class="anchor" href="#timingsimplecpu-analysis-21"></a><a class="link" href="#timingsimplecpu-analysis-21">19.20.4.2.21. TimingSimpleCPU analysis #21</a></h6>
 <div class="paragraph">
 <p>Schedules <code>BaseXBar::Layer&lt;SrcType, DstType&gt;::releaseLayer</code> through:</p>
 </div>
@@ -23210,19 +23419,19 @@ PacketQueue::processSendEvent</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-22"><a class="anchor" href="#timingsimplecpu-analysis-22"></a><a class="link" href="#timingsimplecpu-analysis-22">19.19.4.2.22. TimingSimpleCPU analysis #22</a></h6>
+<h6 id="timingsimplecpu-analysis-22"><a class="anchor" href="#timingsimplecpu-analysis-22"></a><a class="link" href="#timingsimplecpu-analysis-22">19.20.4.2.22. TimingSimpleCPU analysis #22</a></h6>
 <div class="paragraph">
 <p>Executes <code>BaseXBar::Layer&lt;SrcType, DstType&gt;::releaseLayer</code>.</p>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-23"><a class="anchor" href="#timingsimplecpu-analysis-23"></a><a class="link" href="#timingsimplecpu-analysis-23">19.19.4.2.23. TimingSimpleCPU analysis #23</a></h6>
+<h6 id="timingsimplecpu-analysis-23"><a class="anchor" href="#timingsimplecpu-analysis-23"></a><a class="link" href="#timingsimplecpu-analysis-23">19.20.4.2.23. TimingSimpleCPU analysis #23</a></h6>
 <div class="paragraph">
 <p>Executes <code>PacketQueue::processSendEvent</code>.</p>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-24"><a class="anchor" href="#timingsimplecpu-analysis-24"></a><a class="link" href="#timingsimplecpu-analysis-24">19.19.4.2.24. TimingSimpleCPU analysis #24</a></h6>
+<h6 id="timingsimplecpu-analysis-24"><a class="anchor" href="#timingsimplecpu-analysis-24"></a><a class="link" href="#timingsimplecpu-analysis-24">19.20.4.2.24. TimingSimpleCPU analysis #24</a></h6>
 <div class="paragraph">
 <p>Schedules <code>TimingSimpleCPU::IcachePort::ITickEvent::process()</code> through:</p>
 </div>
@@ -23240,7 +23449,7 @@ PacketQueue::processSendEvent</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-25"><a class="anchor" href="#timingsimplecpu-analysis-25"></a><a class="link" href="#timingsimplecpu-analysis-25">19.19.4.2.25. TimingSimpleCPU analysis #25</a></h6>
+<h6 id="timingsimplecpu-analysis-25"><a class="anchor" href="#timingsimplecpu-analysis-25"></a><a class="link" href="#timingsimplecpu-analysis-25">19.20.4.2.25. TimingSimpleCPU analysis #25</a></h6>
 <div class="paragraph">
 <p>Executes <code>TimingSimpleCPU::IcachePort::ITickEvent::process()</code>.</p>
 </div>
@@ -23260,7 +23469,7 @@ PacketQueue::processSendEvent</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-26"><a class="anchor" href="#timingsimplecpu-analysis-26"></a><a class="link" href="#timingsimplecpu-analysis-26">19.19.4.2.26. TimingSimpleCPU analysis #26</a></h6>
+<h6 id="timingsimplecpu-analysis-26"><a class="anchor" href="#timingsimplecpu-analysis-26"></a><a class="link" href="#timingsimplecpu-analysis-26">19.20.4.2.26. TimingSimpleCPU analysis #26</a></h6>
 <div class="paragraph">
 <p>Schedules <code>DRAMCtrl::processNextReqEvent</code> through:</p>
 </div>
@@ -23289,7 +23498,7 @@ TimingSimpleCPU::IcachePort::ITickEvent::process</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-27"><a class="anchor" href="#timingsimplecpu-analysis-27"></a><a class="link" href="#timingsimplecpu-analysis-27">19.19.4.2.27. TimingSimpleCPU analysis #27</a></h6>
+<h6 id="timingsimplecpu-analysis-27"><a class="anchor" href="#timingsimplecpu-analysis-27"></a><a class="link" href="#timingsimplecpu-analysis-27">19.20.4.2.27. TimingSimpleCPU analysis #27</a></h6>
 <div class="paragraph">
 <p>Schedules <code>BaseXBar::Layer&lt;SrcType, DstType&gt;::releaseLayer</code> through:</p>
 </div>
@@ -23315,19 +23524,19 @@ TimingSimpleCPU::IcachePort::ITickEvent::process</pre>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-28"><a class="anchor" href="#timingsimplecpu-analysis-28"></a><a class="link" href="#timingsimplecpu-analysis-28">19.19.4.2.28. TimingSimpleCPU analysis #28</a></h6>
+<h6 id="timingsimplecpu-analysis-28"><a class="anchor" href="#timingsimplecpu-analysis-28"></a><a class="link" href="#timingsimplecpu-analysis-28">19.20.4.2.28. TimingSimpleCPU analysis #28</a></h6>
 <div class="paragraph">
 <p>Execute <code>DRAMCtrl::processNextReqEvent</code>.</p>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-29"><a class="anchor" href="#timingsimplecpu-analysis-29"></a><a class="link" href="#timingsimplecpu-analysis-29">19.19.4.2.29. TimingSimpleCPU analysis #29</a></h6>
+<h6 id="timingsimplecpu-analysis-29"><a class="anchor" href="#timingsimplecpu-analysis-29"></a><a class="link" href="#timingsimplecpu-analysis-29">19.20.4.2.29. TimingSimpleCPU analysis #29</a></h6>
 <div class="paragraph">
 <p>Schedule <code>DRAMCtrl::processRespondEvent()</code>.</p>
 </div>
 </div>
 <div class="sect5">
-<h6 id="timingsimplecpu-analysis-ldr-stall"><a class="anchor" href="#timingsimplecpu-analysis-ldr-stall"></a><a class="link" href="#timingsimplecpu-analysis-ldr-stall">19.19.4.2.30. TimingSimpleCPU analysis: LDR stall</a></h6>
+<h6 id="timingsimplecpu-analysis-ldr-stall"><a class="anchor" href="#timingsimplecpu-analysis-ldr-stall"></a><a class="link" href="#timingsimplecpu-analysis-ldr-stall">19.20.4.2.30. TimingSimpleCPU analysis: LDR stall</a></h6>
 <div class="paragraph">
 <p>One important thing we want to check now, is how the memory reads are going to make the processor stall in the middle of an instruction.</p>
 </div>
@@ -23446,7 +23655,7 @@ TimingSimpleCPU::IcachePort::ITickEvent::process</pre>
 </div>
 </div>
 <div class="sect4">
-<h5 id="gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches"><a class="anchor" href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches"></a><a class="link" href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches">19.19.4.3. gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis with caches</a></h5>
+<h5 id="gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches"><a class="anchor" href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches"></a><a class="link" href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches">19.20.4.3. gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis with caches</a></h5>
 <div class="paragraph">
 <p>Let&#8217;s just add <code>--caches</code> to see if things go any faster:</p>
 </div>
@@ -23519,7 +23728,7 @@ info: Entering event queue @ 0.  Starting simulation...
 </div>
 </div>
 <div class="sect4">
-<h5 id="gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches-and-multiple-cpus"><a class="anchor" href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches-and-multiple-cpus"></a><a class="link" href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches-and-multiple-cpus">19.19.4.4. gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis with caches and multiple CPUs</a></h5>
+<h5 id="gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches-and-multiple-cpus"><a class="anchor" href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches-and-multiple-cpus"></a><a class="link" href="#gem5-event-queue-timingsimplecpu-syscall-emulation-freestanding-example-analysis-with-caches-and-multiple-cpus">19.20.4.4. gem5 event queue TimingSimpleCPU syscall emulation freestanding example analysis with caches and multiple CPUs</a></h5>
 <div class="paragraph">
 <p>TODO is this the minimal setup that allows us to see the <a href="#gem5-crossbar-interconnect">gem5 crossbar interconnect</a>? Can we see anything in <code>AtomicSimpleCPU</code>?</p>
 </div>
@@ -23540,7 +23749,7 @@ info: Entering event queue @ 0.  Starting simulation...
 </div>
 </div>
 <div class="sect4">
-<h5 id="gem5-event-queue-minorcpu-syscall-emulation-freestanding-example-analysis"><a class="anchor" href="#gem5-event-queue-minorcpu-syscall-emulation-freestanding-example-analysis"></a><a class="link" href="#gem5-event-queue-minorcpu-syscall-emulation-freestanding-example-analysis">19.19.4.5. gem5 event queue MinorCPU syscall emulation freestanding example analysis</a></h5>
+<h5 id="gem5-event-queue-minorcpu-syscall-emulation-freestanding-example-analysis"><a class="anchor" href="#gem5-event-queue-minorcpu-syscall-emulation-freestanding-example-analysis"></a><a class="link" href="#gem5-event-queue-minorcpu-syscall-emulation-freestanding-example-analysis">19.20.4.5. gem5 event queue MinorCPU syscall emulation freestanding example analysis</a></h5>
 <div class="paragraph">
 <p>The events <a href="#gem5-event-queue-atomicsimplecpu-syscall-emulation-freestanding-example-analysis">for the Atomic CPU</a> were pretty simple: basically just ticks.</p>
 </div>
@@ -23572,14 +23781,561 @@ info: Entering event queue @ 0.  Starting simulation...
 </div>
 </div>
 <div class="sect4">
-<h5 id="gem5-event-queue-derivo3cpu-syscall-emulation-freestanding-example-analysis"><a class="anchor" href="#gem5-event-queue-derivo3cpu-syscall-emulation-freestanding-example-analysis"></a><a class="link" href="#gem5-event-queue-derivo3cpu-syscall-emulation-freestanding-example-analysis">19.19.4.6. gem5 event queue DerivO3CPU syscall emulation freestanding example analysis</a></h5>
+<h5 id="gem5-event-queue-derivo3cpu-syscall-emulation-freestanding-example-analysis"><a class="anchor" href="#gem5-event-queue-derivo3cpu-syscall-emulation-freestanding-example-analysis"></a><a class="link" href="#gem5-event-queue-derivo3cpu-syscall-emulation-freestanding-example-analysis">19.20.4.6. gem5 event queue DerivO3CPU syscall emulation freestanding example analysis</a></h5>
 <div class="paragraph">
 <p>TODO: like <a href="#gem5-event-queue-minorcpu-syscall-emulation-freestanding-example-analysis">gem5 event queue MinorCPU syscall emulation freestanding example analysis</a> but even more complex!</p>
 </div>
 </div>
 </div>
 <div class="sect3">
-<h4 id="gem5-code-generation"><a class="anchor" href="#gem5-code-generation"></a><a class="link" href="#gem5-code-generation">19.19.5. gem5 code generation</a></h4>
+<h4 id="gem5-threadcontext-vs-threadstate-vs-execcontext-vs-process"><a class="anchor" href="#gem5-threadcontext-vs-threadstate-vs-execcontext-vs-process"></a><a class="link" href="#gem5-threadcontext-vs-threadstate-vs-execcontext-vs-process">19.20.5. gem5 <code>ThreadContext</code> vs <code>ThreadState</code> vs <code>ExecContext</code> vs <code>Process</code></a></h4>
+<div class="paragraph">
+<p>These classes get used everywhere, and they have a somewhat convoluted relation with one another, so let&#8217;s figure it out this mess.</p>
+</div>
+<div class="paragraph">
+<p>None of those objects are <a href="#gem5-python-c-interaction">SimObjects</a>, so they must all belong to some higher SimObject.</p>
+</div>
+<div class="paragraph">
+<p>This section and all children tested at gem5 b1623cb2087873f64197e503ab8894b5e4d4c7b4.</p>
+</div>
+<div class="sect4">
+<h5 id="gem5-threadcontext"><a class="anchor" href="#gem5-threadcontext"></a><a class="link" href="#gem5-threadcontext">19.20.5.1. gem5 <code>ThreadContext</code></a></h5>
+<div class="paragraph">
+<p>As we delve into more details below, we will reach the following conclusion: a <code>ThreadContext</code> represents on thread of a CPU with multiple <a href="#hardware-threads">Hardware threads</a>.</p>
+</div>
+<div class="paragraph">
+<p>We therefore we can have multiple <code>ThreadContext</code> for each <a href="#gem5-cpu-types"><code>BaseCPU</code></a>.</p>
+</div>
+<div class="paragraph">
+<p><code>ThreadContext</code> is what gets passed in syscalls, e.g.:</p>
+</div>
+<div class="paragraph">
+<p>src/sim/syscall_emul.hh</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>template &lt;class OS&gt;
+SyscallReturn
+readFunc(SyscallDesc *desc, ThreadContext *tc,
+        int tgt_fd, Addr buf_ptr, int nbytes)</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>The class hierarchy for <code>ThreadContext</code> looks like:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>ThreadContext
+  O3ThreadContext
+  SimpleThread</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>where the gem5 MinorCPU also uses <code>SimpleThread</code>:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>/** Minor will use the SimpleThread state for now */
+typedef SimpleThread MinorThread;</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>It is a bit confusing, things would be much clearer if <code>SimpleThread</code> was called instead <code>SimpleThreadContext</code>!</p>
+</div>
+<div class="paragraph">
+<p><code>readIntReg</code> and other register access methods are some notable methods implemented in descendants, e.g. <a href="#gem5-simplethread"><code>SimpleThread::readIntReg</code></a>.</p>
+</div>
+<div class="paragraph">
+<p>Essentially all methods of the base <code>ThreadContext</code> are pure virtual.</p>
+</div>
+<div class="sect5">
+<h6 id="gem5-simplethread"><a class="anchor" href="#gem5-simplethread"></a><a class="link" href="#gem5-simplethread">19.20.5.1.1. gem5 <code>SimpleThread</code></a></h6>
+<div class="paragraph">
+<p><code>SimpleThread</code> storage defined on <a href="#gem5-basesimplecpu"><code>BaseSimpleCPU</code></a> for simple CPUs like <code>AtomicSimpleCPU</code>:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>    for (unsigned i = 0; i &lt; numThreads; i++) {
+        if (FullSystem) {
+            thread = new SimpleThread(this, i, p-&gt;system,
+                                      p-&gt;itb, p-&gt;dtb, p-&gt;isa[i]);
+        } else {
+            thread = new SimpleThread(this, i, p-&gt;system, p-&gt;workload[i],
+                                      p-&gt;itb, p-&gt;dtb, p-&gt;isa[i]);
+        }
+        threadInfo.push_back(new SimpleExecContext(this, thread));
+        ThreadContext *tc = thread-&gt;getTC();
+        threadContexts.push_back(tc);
+    }</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>and on <code>MinorCPU</code> for Minor:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>MinorCPU::MinorCPU(MinorCPUParams *params) :
+    BaseCPU(params),
+    threadPolicy(params-&gt;threadPolicy)
+{
+    /* This is only written for one thread at the moment */
+    Minor::MinorThread *thread;
+
+    for (ThreadID i = 0; i &lt; numThreads; i++) {
+        if (FullSystem) {
+            thread = new Minor::MinorThread(this, i, params-&gt;system,
+                    params-&gt;itb, params-&gt;dtb, params-&gt;isa[i]);
+            thread-&gt;setStatus(ThreadContext::Halted);
+        } else {
+            thread = new Minor::MinorThread(this, i, params-&gt;system,
+                    params-&gt;workload[i], params-&gt;itb, params-&gt;dtb,
+                    params-&gt;isa[i]);
+        }
+
+        threads.push_back(thread);
+        ThreadContext *tc = thread-&gt;getTC();
+        threadContexts.push_back(tc);
+    }</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>Those are used from <a href="#gem5-execcontext">gem5 <code>ExecContext</code></a>.</p>
+</div>
+<div class="paragraph">
+<p>From this we see that one CPU can have multiple threads, and that this is controlled from the Python:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>BaseCPU::BaseCPU(Params *p, bool is_checker)
+    : numThreads(p-&gt;numThreads)</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>and since <code>SimpleThread</code> contains its registers, this must represent <a href="#hardware-threads">Hardware threads</a>.</p>
+</div>
+<div class="paragraph">
+<p>If we analyse <code>SimpleThread::readIntReg</code>, we see that the actual register data is contained inside <code>ThreadContext</code> descendants, e.g. in <code>SimpleThread</code>:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>    RegVal
+    readIntReg(RegIndex reg_idx) const override
+    {
+        int flatIndex = isa-&gt;flattenIntIndex(reg_idx);
+        assert(flatIndex &lt; TheISA::NumIntRegs);
+        uint64_t regVal(readIntRegFlat(flatIndex));
+        DPRINTF(IntRegs, "Reading int reg %d (%d) as %#x.\n",
+                reg_idx, flatIndex, regVal);
+        return regVal;
+    }
+
+    RegVal readIntRegFlat(RegIndex idx) const override { return intRegs[idx]; }
+    void
+    setIntRegFlat(RegIndex idx, RegVal val) override
+    {
+        intRegs[idx] = val;
+    }
+
+    std::array&lt;RegVal, TheISA::NumIntRegs&gt; intRegs;</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>Another notable type of method contained in <code>Thread</code> context are methods that forward to <a href="#gem5-threadstate">gem5 <code>ThreadState</code></a>.</p>
+</div>
+</div>
+<div class="sect5">
+<h6 id="gem5-o3threadcontext"><a class="anchor" href="#gem5-o3threadcontext"></a><a class="link" href="#gem5-o3threadcontext">19.20.5.1.2. gem5 <code>O3ThreadContext</code></a></h6>
+<div class="paragraph">
+<p>Instantiation happens in the <code>FullO3CPU</code> constructor:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>FullO3CPU&lt;Impl&gt;::FullO3CPU(DerivO3CPUParams *params)
+
+    for (ThreadID tid = 0; tid &lt; this-&gt;numThreads; ++tid) {
+        if (FullSystem) {
+            // SMT is not supported in FS mode yet.
+            assert(this-&gt;numThreads == 1);
+            this-&gt;thread[tid] = new Thread(this, 0, NULL);
+
+        // Setup the TC that will serve as the interface to the threads/CPU.
+        O3ThreadContext&lt;Impl&gt; *o3_tc = new O3ThreadContext&lt;Impl&gt;;</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>and the SimObject <code>DerivO3CPU</code> is just a <code>FullO3CPU</code> instantiation:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>class DerivO3CPU : public FullO3CPU&lt;O3CPUImpl&gt;</pre>
+</div>
+</div>
+<div class="paragraph">
+<p><code>O3ThreadContext</code> is a template class:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>template &lt;class Impl&gt;
+class O3ThreadContext : public ThreadContext</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>The only <code>Impl</code> used appears to be <code>O3CPUImpl</code>? This is explicitly instantiated in the source:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>template class O3ThreadContext&lt;O3CPUImpl&gt;;</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>Unlike in <code>SimpleThread</code> however, <code>O3ThreadContext</code> does not contain the register data itself, e.g. <code>O3ThreadContext::readIntRegFlat</code> instead forwards to <code>cpu</code>:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>template &lt;class Impl&gt;
+RegVal
+O3ThreadContext&lt;Impl&gt;::readIntRegFlat(RegIndex reg_idx) const
+{
+    return cpu-&gt;readArchIntReg(reg_idx, thread-&gt;threadId());
+}</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>where:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>    typedef typename Impl::O3CPU O3CPU;
+
+   /** Pointer to the CPU. */
+    O3CPU *cpu;</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>and:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>struct O3CPUImpl
+{
+    /** The O3CPU type to be used. */
+    typedef FullO3CPU&lt;O3CPUImpl&gt; O3CPU;</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>and at long last <code>FullO3CPU</code> contains the register values:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>template &lt;class Impl&gt;
+RegVal
+FullO3CPU&lt;Impl&gt;::readArchIntReg(int reg_idx, ThreadID tid)
+{
+    intRegfileReads++;
+    PhysRegIdPtr phys_reg = commitRenameMap[tid].lookup(
+            RegId(IntRegClass, reg_idx));
+
+    return regFile.readIntReg(phys_reg);
+}</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>So we guess that this difference from <code>SimpleThread</code> is due to register renaming of the out of order implementation.</p>
+</div>
+</div>
+</div>
+<div class="sect4">
+<h5 id="gem5-threadstate"><a class="anchor" href="#gem5-threadstate"></a><a class="link" href="#gem5-threadstate">19.20.5.2. gem5 <code>ThreadState</code></a></h5>
+<div class="paragraph">
+<p>Owned one per <code>ThreadContext</code>.</p>
+</div>
+<div class="paragraph">
+<p>Many <code>ThreadContext</code> methods simply forward to <code>ThreadState</code> implementations.</p>
+</div>
+<div class="paragraph">
+<p><a href="#gem5-simplethread"><code>SimpleThread</code></a> inherits from <code>ThreadState</code>, and forwards to it on several methods e.g.:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>    int cpuId() const override { return ThreadState::cpuId(); }
+    uint32_t socketId() const override { return ThreadState::socketId(); }
+    int threadId() const override { return ThreadState::threadId(); }
+    void setThreadId(int id) override { ThreadState::setThreadId(id); }
+    ContextID contextId() const override { return ThreadState::contextId(); }
+    void setContextId(ContextID id) override { ThreadState::setContextId(id); }</pre>
+</div>
+</div>
+<div class="paragraph">
+<p><code>O3ThreadContext</code> on the other hand contains an <code>O3ThreadState</code>:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>template &lt;class Impl&gt;
+struct O3ThreadState : public ThreadState</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>at:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>template &lt;class Impl&gt;
+class O3ThreadContext : public ThreadContext
+{
+    O3ThreadState&lt;Impl&gt; *thread
+
+    ContextID contextId() const override { return thread-&gt;contextId(); }
+
+    void setContextId(ContextID id) override { thread-&gt;setContextId(id); }</pre>
+</div>
+</div>
+</div>
+<div class="sect4">
+<h5 id="gem5-execcontext"><a class="anchor" href="#gem5-execcontext"></a><a class="link" href="#gem5-execcontext">19.20.5.3. gem5 <code>ExecContext</code></a></h5>
+<div class="paragraph">
+<p><code>ExecContext</code> gets used in instruction definitions, e.g.:</p>
+</div>
+<div class="paragraph">
+<p>build/ARM/arch/arm/generated/exec-ns.cc.inc</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>    Fault Mul::execute(
+        ExecContext *xc, Trace::InstRecord *traceData) const</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>It contains methods to allow interacting with CPU state from inside instruction execution, notably reading and writing from/to registers.</p>
+</div>
+<div class="paragraph">
+<p>For example, the ARM <code>mul</code> instruction uses <code>ExecContext</code> to read the input operands, multiply them, and write to the output:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>    Fault Mul::execute(
+        ExecContext *xc, Trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        uint64_t resTemp = 0;
+        resTemp = resTemp;
+        uint32_t OptCondCodesNZ = 0;
+        uint32_t OptCondCodesC = 0;
+        uint32_t OptCondCodesV = 0;
+        uint32_t Reg0 = 0;
+        uint32_t Reg1 = 0;
+        uint32_t Reg2 = 0;
+
+        OptCondCodesNZ = xc-&gt;readCCRegOperand(this, 0);
+        OptCondCodesC = xc-&gt;readCCRegOperand(this, 1);
+        OptCondCodesV = xc-&gt;readCCRegOperand(this, 2);
+        Reg1 =
+            ((reg1 == PCReg) ? readPC(xc) : xc-&gt;readIntRegOperand(this, 3));
+        Reg2 =
+            ((reg2 == PCReg) ? readPC(xc) : xc-&gt;readIntRegOperand(this, 4));
+
+        if (testPredicate(OptCondCodesNZ, OptCondCodesC, OptCondCodesV, condCode)/*auto*/)
+        {
+            Reg0 = resTemp = Reg1 * Reg2;;
+            if (fault == NoFault) {
+                {
+                    uint32_t final_val = Reg0;
+                    ((reg0 == PCReg) ? setNextPC(xc, Reg0) : xc-&gt;setIntRegOperand(this, 0, Reg0));
+                    if (traceData) { traceData-&gt;setData(final_val); }
+                };
+            }
+        } else {
+            xc-&gt;setPredicate(false);
+        }
+
+        return fault;
+    }</pre>
+</div>
+</div>
+<div class="paragraph">
+<p><code>ExecContext</code> is however basically just a wrapper that forwards to other classes that actually contain the data in a microarchitectural neutral manner. For example, in <code>SimpleExecContext</code>:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>    /** Reads an integer register. */
+    RegVal
+    readIntRegOperand(const StaticInst *si, int idx) override
+    {
+        numIntRegReads++;
+        const RegId&amp; reg = si-&gt;srcRegIdx(idx);
+        assert(reg.isIntReg());
+        return thread-&gt;readIntReg(reg.index());
+    }</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>So we see that this just does some register position bookkeeping needed for instruction execution, but the actual data comes from <a href="#gem5-simplethread"><code>SimpleThread::readIntReg</code></a>, which is a specialization of <a href="#gem5-threadcontext">gem5 <code>ThreadContext</code></a>.</p>
+</div>
+<div class="paragraph">
+<p><code>ExecContext</code> is a fully virtual class. The hierarchy is:</p>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p><code>ExecContext</code></p>
+<div class="ulist">
+<ul>
+<li>
+<p><code>SimpleExecContext</code></p>
+</li>
+<li>
+<p><code>Minor::MinorExecContext</code></p>
+</li>
+<li>
+<p><code>BaseDynInst</code></p>
+<div class="ulist">
+<ul>
+<li>
+<p><code>BaseO3DynInst</code></p>
+</li>
+</ul>
+</div>
+</li>
+</ul>
+</div>
+</li>
+</ul>
+</div>
+<div class="paragraph">
+<p>If we follow <code>SimpleExecContext</code> creation for example, we see:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>class BaseSimpleCPU : public BaseCPU
+{
+    std::vector&lt;SimpleExecContext*&gt; threadInfo;</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>and:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>BaseSimpleCPU::BaseSimpleCPU(BaseSimpleCPUParams *p)
+    : BaseCPU(p),
+      curThread(0),
+      branchPred(p-&gt;branchPred),
+      traceData(NULL),
+      inst(),
+      _status(Idle)
+{
+    SimpleThread *thread;
+
+    for (unsigned i = 0; i &lt; numThreads; i++) {
+        if (FullSystem) {
+            thread = new SimpleThread(this, i, p-&gt;system,
+                                      p-&gt;itb, p-&gt;dtb, p-&gt;isa[i]);
+        } else {
+            thread = new SimpleThread(this, i, p-&gt;system, p-&gt;workload[i],
+                                      p-&gt;itb, p-&gt;dtb, p-&gt;isa[i]);
+        }
+        threadInfo.push_back(new SimpleExecContext(this, thread));
+        ThreadContext *tc = thread-&gt;getTC();
+        threadContexts.push_back(tc);
+    }</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>therefore there is one <code>ExecContext</code> for each <code>ThreadContext</code>, and each <code>ExecContext</code> knows about its own <code>ThreadContext</code>.</p>
+</div>
+<div class="paragraph">
+<p>This makes sense, since each <code>ThreadContext</code> represents one CPU register set, and therefore needs a separate <code>ExecContext</code> which allows instruction implementations to access those registers.</p>
+</div>
+</div>
+<div class="sect4">
+<h5 id="gem5-process"><a class="anchor" href="#gem5-process"></a><a class="link" href="#gem5-process">19.20.5.4. gem5 <code>Process</code></a></h5>
+<div class="paragraph">
+<p>The <code>Process</code> class is used only for <a href="#gem5-syscall-emulation-mode">gem5 syscall emulation mode</a>, and it represents a process like a Linux userland process, in addition to any further gem5 specific data needed to represent the process.</p>
+</div>
+<div class="paragraph">
+<p>The first thing most syscall implementations do is to actually pull <code>Process</code> out of <a href="#gem5-threadcontext">gem5 <code>ThreadContext</code></a>, e.g.:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>template &lt;class OS&gt;
+SyscallReturn
+readFunc(SyscallDesc *desc, ThreadContext *tc,
+        int tgt_fd, Addr buf_ptr, int nbytes)
+{
+    auto p = tc-&gt;getProcessPtr();</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>For example, we can readily see from its interface that it contains several accessors for common process fields:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>    inline uint64_t uid() { return _uid; }
+    inline uint64_t euid() { return _euid; }
+    inline uint64_t gid() { return _gid; }
+    inline uint64_t egid() { return _egid; }</pre>
+</div>
+</div>
+<div class="paragraph">
+<p><code>Process</code> is a <a href="#gem5-python-c-interaction"><code>SimObject</code></a>, and therefore produced directly in e.g. se.py.</p>
+</div>
+<div class="paragraph">
+<p>se.py produces one process <a href="#gem5-syscall-emulation-multiple-executables">per-executable given</a>:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>    workloads = options.cmd.split(';')
+    idx = 0
+    for wrkld in workloads:
+        process = Process(pid = 100 + idx)</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>and those are placed in the <code>workload</code> property:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>for i in range(np):
+    if options.smt:
+        system.cpu[i].workload = multiprocesses
+    elif len(multiprocesses) == 1:
+        system.cpu[i].workload = multiprocesses[0]
+    else:
+        system.cpu[i].workload = multiprocesses[i]</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>and finally each thread of a CPU gets assigned to a different such workload:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>BaseSimpleCPU::BaseSimpleCPU(BaseSimpleCPUParams *p)
+    : BaseCPU(p),
+      curThread(0),
+      branchPred(p-&gt;branchPred),
+      traceData(NULL),
+      inst(),
+      _status(Idle)
+{
+    SimpleThread *thread;
+
+    for (unsigned i = 0; i &lt; numThreads; i++) {
+        if (FullSystem) {
+            thread = new SimpleThread(this, i, p-&gt;system,
+                                      p-&gt;itb, p-&gt;dtb, p-&gt;isa[i]);
+        } else {
+            thread = new SimpleThread(this, i, p-&gt;system, p-&gt;workload[i],
+                                      p-&gt;itb, p-&gt;dtb, p-&gt;isa[i]);
+        }
+        threadInfo.push_back(new SimpleExecContext(this, thread));
+        ThreadContext *tc = thread-&gt;getTC();
+        threadContexts.push_back(tc);
+    }</pre>
+</div>
+</div>
+</div>
+</div>
+<div class="sect3">
+<h4 id="gem5-code-generation"><a class="anchor" href="#gem5-code-generation"></a><a class="link" href="#gem5-code-generation">19.20.6. gem5 code generation</a></h4>
 <div class="paragraph">
 <p>gem5 uses a ton of code generation, which makes the project horrendous:</p>
 </div>
@@ -23624,26 +24380,45 @@ info: Entering event queue @ 0.  Starting simulation...
 <p>But it has been widely overused to insanity. It likely also exists partly because when the project started in 2003 C++ compilers weren&#8217;t that good, so you couldn&#8217;t rely on features like templates that much.</p>
 </div>
 <div class="sect4">
-<h5 id="gem5-the-isa"><a class="anchor" href="#gem5-the-isa"></a><a class="link" href="#gem5-the-isa">19.19.5.1. gem5 THE_ISA</a></h5>
+<h5 id="gem5-the-isa"><a class="anchor" href="#gem5-the-isa"></a><a class="link" href="#gem5-the-isa">19.20.6.1. gem5 THE_ISA</a></h5>
 <div class="paragraph">
-<p>Generated code at: <code>build/&lt;ISA&gt;/config/the_isa.hh</code> which contains amongst other lines:</p>
+<p>Generated code at: <code>build/&lt;ISA&gt;/config/the_isa.hh</code> which e.g. for ARM contains:</p>
 </div>
 <div class="literalblock">
 <div class="content">
-<pre>#define X86_ISA 8
+<pre>#ifndef __CONFIG_THE_ISA_HH__
+#define __CONFIG_THE_ISA_HH__
+
+#define ARM_ISA 1
+#define MIPS_ISA 2
+#define NULL_ISA 3
+#define POWER_ISA 4
+#define RISCV_ISA 5
+#define SPARC_ISA 6
+#define X86_ISA 7
 
 enum class Arch {
+  ArmISA = ARM_ISA,
+  MipsISA = MIPS_ISA,
+  NullISA = NULL_ISA,
+  PowerISA = POWER_ISA,
+  RiscvISA = RISCV_ISA,
+  SparcISA = SPARC_ISA,
   X86ISA = X86_ISA
 };
 
-#define THE_ISA X86_ISA</pre>
+#define THE_ISA ARM_ISA
+#define TheISA ArmISA
+#define THE_ISA_STR "arm"
+
+#endif // __CONFIG_THE_ISA_HH__</pre>
 </div>
 </div>
 <div class="paragraph">
 <p>Generation code: <code>src/SConscript</code> at <code>def makeTheISA</code>.</p>
 </div>
 <div class="paragraph">
-<p>Tested on gem5 211869ea950f3cc3116655f06b1d46d3fa39fb3a.</p>
+<p>Tested on gem5 b1623cb2087873f64197e503ab8894b5e4d4c7b4.</p>
 </div>
 <div class="paragraph">
 <p>Bibliography: <a href="https://www.mail-archive.com/gem5-users@gem5.org/msg16989.html" class="bare">https://www.mail-archive.com/gem5-users@gem5.org/msg16989.html</a></p>
@@ -23651,9 +24426,9 @@ enum class Arch {
 </div>
 </div>
 <div class="sect3">
-<h4 id="gem5-build-system"><a class="anchor" href="#gem5-build-system"></a><a class="link" href="#gem5-build-system">19.19.6. gem5 build system</a></h4>
+<h4 id="gem5-build-system"><a class="anchor" href="#gem5-build-system"></a><a class="link" href="#gem5-build-system">19.20.7. gem5 build system</a></h4>
 <div class="sect4">
-<h5 id="gem5-build-broken-on-recent-compiler-version"><a class="anchor" href="#gem5-build-broken-on-recent-compiler-version"></a><a class="link" href="#gem5-build-broken-on-recent-compiler-version">19.19.6.1. gem5 build broken on recent compiler version</a></h5>
+<h5 id="gem5-build-broken-on-recent-compiler-version"><a class="anchor" href="#gem5-build-broken-on-recent-compiler-version"></a><a class="link" href="#gem5-build-broken-on-recent-compiler-version">19.20.7.1. gem5 build broken on recent compiler version</a></h5>
 <div class="paragraph">
 <p>gem5 moves a bit slowly, and if your host compiler is very new, the gem5 build might be broken for it, e.g. this was the case for Ubuntu 19.10 with GCC 9 and gem5 62d75e7105fe172eb906d4f80f360ff8591d4178 from Dec 2019.</p>
 </div>
@@ -23678,7 +24453,7 @@ enum class Arch {
 </div>
 </div>
 <div class="sect4">
-<h5 id="gem5-polymorphic-isa-includes"><a class="anchor" href="#gem5-polymorphic-isa-includes"></a><a class="link" href="#gem5-polymorphic-isa-includes">19.19.6.2. gem5 polymorphic ISA includes</a></h5>
+<h5 id="gem5-polymorphic-isa-includes"><a class="anchor" href="#gem5-polymorphic-isa-includes"></a><a class="link" href="#gem5-polymorphic-isa-includes">19.20.7.2. gem5 polymorphic ISA includes</a></h5>
 <div class="paragraph">
 <p>E.g. <code>src/cpu/decode_cache.hh</code> includes:</p>
 </div>
@@ -23757,7 +24532,10 @@ build/ARM/config/the_isa.hh
 </div>
 </div>
 <div class="sect4">
-<h5 id="why-are-all-c-symlinked-into-the-gem5-build-dir"><a class="anchor" href="#why-are-all-c-symlinked-into-the-gem5-build-dir"></a><a class="link" href="#why-are-all-c-symlinked-into-the-gem5-build-dir">19.19.6.3. Why are all C++ symlinked into the gem5 build dir?</a></h5>
+<h5 id="why-are-all-c-symlinked-into-the-gem5-build-dir"><a class="anchor" href="#why-are-all-c-symlinked-into-the-gem5-build-dir"></a><a class="link" href="#why-are-all-c-symlinked-into-the-gem5-build-dir">19.20.7.3. Why are all C++ symlinked into the gem5 build dir?</a></h5>
+<div class="paragraph">
+<p>Upstream request: <a href="https://gem5.atlassian.net/browse/GEM5-469" class="bare">https://gem5.atlassian.net/browse/GEM5-469</a></p>
+</div>
 <div class="paragraph">
 <p>Some scons madness.</p>
 </div>
@@ -23776,6 +24554,9 @@ build/ARM/config/the_isa.hh
 <div class="ulist">
 <ul>
 <li>
+<p>it is basically impossible to setup an IDE properly with gem5: <a href="#gem5-eclipse-configuration">gem5 Eclipse configuration</a></p>
+</li>
+<li>
 <p>It is likely preventing <a href="#ccache">ccache</a> hits when building to different output paths, because it makes the <code>-I</code> includes point to different paths. This is especially important for <a href="#gem5-ruby-build">gem5 Ruby build</a>, which could have the exact same source files as the non-Ruby builds: <a href="https://stackoverflow.com/questions/60340271/can-ccache-handle-symlinks-to-the-same-input-source-file-as-hits" class="bare">https://stackoverflow.com/questions/60340271/can-ccache-handle-symlinks-to-the-same-input-source-file-as-hits</a></p>
 </li>
 <li>
@@ -23789,33 +24570,6 @@ build/ARM/config/the_isa.hh
 </div>
 </div>
 </div>
-<div class="sect2">
-<h3 id="gem5-bootloaders"><a class="anchor" href="#gem5-bootloaders"></a><a class="link" href="#gem5-bootloaders">19.20. gem5 bootloaders</a></h3>
-<div class="paragraph">
-<p>Certain ISAs like ARM have bootloaders that are automatically run before the main image to setup basic system state.</p>
-</div>
-<div class="paragraph">
-<p>We cross compile those bootloaders from source automatically during <code>./build-gem5</code>.</p>
-</div>
-<div class="paragraph">
-<p>As of gem5 bcf041f257623e5c9e77d35b7531bae59edc0423, the source code of the bootloaderes can be found under:</p>
-</div>
-<div class="literalblock">
-<div class="content">
-<pre>system/arm/</pre>
-</div>
-</div>
-<div class="paragraph">
-<p>and their selection can be seen under: <code>src/dev/arm/RealView.py</code>, e.g.:</p>
-</div>
-<div class="literalblock">
-<div class="content">
-<pre>    def setupBootLoader(self, cur_sys, loc):
-        if not cur_sys.boot_loader:
-            cur_sys.boot_loader = [ loc('boot_emm.arm64'), loc('boot_emm.arm') ]</pre>
-</div>
-</div>
-</div>
 </div>
 </div>
 <div class="sect1">
@@ -25517,7 +26271,19 @@ fork() return = 13039</pre>
 <p>Read the source comments and understand everything that is going on!</p>
 </div>
 <div class="sect4">
-<h5 id="fork-bomb"><a class="anchor" href="#fork-bomb"></a><a class="link" href="#fork-bomb">21.3.2.1. Fork bomb</a></h5>
+<h5 id="getpid"><a class="anchor" href="#getpid"></a><a class="link" href="#getpid">21.3.2.1. getpid</a></h5>
+<div class="paragraph">
+<p>The minimal interesting example is to use fork and observe different PIDs.</p>
+</div>
+<div class="paragraph">
+<p>A more minimal test-like example without forking can be seen at: <a href="https://github.com/cirosantilli/linux-kernel-module-cheat/blob/master/userland/posix/getpid.c">userland/posix/getpid.c</a>.</p>
+</div>
+<div class="paragraph">
+<p>This example can for example be used used to play with: <a href="#gem5-syscall-emulation-multiple-executables">gem5 syscall emulation multiple executables</a>.</p>
+</div>
+</div>
+<div class="sect4">
+<h5 id="fork-bomb"><a class="anchor" href="#fork-bomb"></a><a class="link" href="#fork-bomb">21.3.2.2. Fork bomb</a></h5>
 <div class="paragraph">
 <p><a href="https://en.wikipedia.org/wiki/Fork_bomb" class="bare">https://en.wikipedia.org/wiki/Fork_bomb</a></p>
 </div>
@@ -28278,6 +29044,64 @@ child after parent sleep</pre>
 </div>
 </div>
 </div>
+<div class="sect3">
+<h4 id="getcpu"><a class="anchor" href="#getcpu"></a><a class="link" href="#getcpu">22.7.2. <code>getcpu</code> system call and the <code>sched_getaffinity</code> glibc wrapper</a></h4>
+<div class="paragraph">
+<p>Example: <a href="https://github.com/cirosantilli/linux-kernel-module-cheat/blob/master/userland/linux/sched_getcpu.c">userland/linux/sched_getcpu.c</a></p>
+</div>
+<div class="paragraph">
+<p>Returns the CPU that the process/thread is currently running on:</p>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p><a href="https://stackoverflow.com/questions/491520/how-can-i-get-the-cpu-core-number-from-within-a-user-space-app-linux-c" class="bare">https://stackoverflow.com/questions/491520/how-can-i-get-the-cpu-core-number-from-within-a-user-space-app-linux-c</a></p>
+</li>
+<li>
+<p><a href="https://stackoverflow.com/questions/6026896/how-to-know-on-which-physical-processor-and-on-which-physical-core-my-code-is-ru/16574301#16574301" class="bare">https://stackoverflow.com/questions/6026896/how-to-know-on-which-physical-processor-and-on-which-physical-core-my-code-is-ru/16574301#16574301</a></p>
+</li>
+</ul>
+</div>
+<div class="paragraph">
+<p>So when running a multicore program, we may see that each thread can be running on a different core.</p>
+</div>
+<div class="paragraph">
+<p>The cores in which the process runs can be fixed with <code>sched_setaffinity</code> as shown at: <a href="https://github.com/cirosantilli/linux-kernel-module-cheat/blob/master/userland/linux/sched_getaffinity.c">userland/linux/sched_getaffinity.c</a>.</p>
+</div>
+<div class="paragraph">
+<p>So when I run it with <code>main</code> thread + 4 threads on a 4 core CPUs:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>./userland/linux/sched_getcpu.out 4</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>I see random outputs like:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>7
+2
+1
+5</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>and:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>5
+0
+2
+1</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>Due to the way that <a href="#gem5-syscall-emulation-multithreading">gem5 syscall emulation multithreading</a> however, the output is more deterministic in that case, see that section for further details.</p>
+</div>
+</div>
 </div>
 <div class="sect2">
 <h3 id="linux-calling-conventions"><a class="anchor" href="#linux-calling-conventions"></a><a class="link" href="#linux-calling-conventions">22.8. Linux calling conventions</a></h3>
@@ -35519,7 +36343,7 @@ xdg-open graph-size.pdf</pre>
 <li>
 <p>GCC time increased 1.5x by our bloat, but its percentage of the total was greatly reduced, due to new packages being introduced.</p>
 <div class="paragraph">
-<p><code>make graph-depends</code> shows that most new dependencies come from QEMU and GDB, which we can&#8217;t get rid of anyways.</p>
+<p><code>make graph-depends</code> shows that most new dependencies come from QEMU and GDB, which we can&#8217;t get rid of anyway.</p>
 </div>
 </li>
 </ul>
@@ -35830,7 +36654,57 @@ west build -b qemu_aarch64 samples/hello_world</pre>
 <h2 id="computer-architecture"><a class="anchor" href="#computer-architecture"></a><a class="link" href="#computer-architecture">32. Computer architecture</a></h2>
 <div class="sectionbody">
 <div class="sect2">
-<h3 id="cache-coherence"><a class="anchor" href="#cache-coherence"></a><a class="link" href="#cache-coherence">32.1. Cache coherence</a></h3>
+<h3 id="hardware-threads"><a class="anchor" href="#hardware-threads"></a><a class="link" href="#hardware-threads">32.1. Hardware threads</a></h3>
+<div class="paragraph">
+<p>Intel name: "Hyperthreading"</p>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p><a href="https://superuser.com/questions/133082/what-is-the-difference-between-hyper-threading-and-multiple-cores/995858#995858" class="bare">https://superuser.com/questions/133082/what-is-the-difference-between-hyper-threading-and-multiple-cores/995858#995858</a></p>
+</li>
+<li>
+<p><a href="https://stackoverflow.com/questions/5593328/software-threads-vs-hardware-threads/61415402#61415402" class="bare">https://stackoverflow.com/questions/5593328/software-threads-vs-hardware-threads/61415402#61415402</a></p>
+</li>
+<li>
+<p><a href="https://superuser.com/questions/122536/what-is-hyper-threading-and-how-does-it-work" class="bare">https://superuser.com/questions/122536/what-is-hyper-threading-and-how-does-it-work</a></p>
+</li>
+</ul>
+</div>
+<div class="paragraph">
+<p>gem5 appears to possibly have attempted to implement hardware threads in <a href="#gem5-syscall-emulation-mode">gem5 syscall emulation mode</a>: <a href="https://github.com/cirosantilli/linux-kernel-module-cheat/issues/104" class="bare">https://github.com/cirosantilli/linux-kernel-module-cheat/issues/104</a> when using <a href="#gem5-syscall-emulation-multiple-executables">gem5 syscall emulation multiple executables</a>.</p>
+</div>
+<div class="paragraph">
+<p>On fs.py it is not exposed in any in-tree config however, and as pointed by the above issue O3 FS has an assert that prevents it in <a href="https://github.com/gem5/gem5/blob/377898c4034c72b84b2662ed252fa25079a4ea62/src/cpu/o3/cpu.cc#L313">src/cpu/o3/cpu.cc</a>:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>            // SMT is not supported in FS mode yet.
+            assert(this-&gt;numThreads == 1);</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>TODO why only in fs.py? Is there much difference between fs and se from a hyperthreading point of view? Maybe the message is there because as concluded in <a href="#gem5-o3threadcontext">gem5 <code>O3ThreadContext</code></a>, registeres for <code>DerivO3CPU</code> are stored in <code>DerivO3CPU</code> itself (<code>FullO3CPU</code>), and therefore there is no way to to currently represent multiple register sets per CPU.</p>
+</div>
+<div class="paragraph">
+<p>Other CPUs just appear to fail non-gracefully, e.g.:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>./run --arch aarch64 --emulator gem5 -- --param 'system.cpu[0].numThreads = 2'</pre>
+</div>
+</div>
+<div class="paragraph">
+<p>fails with:</p>
+</div>
+<div class="literalblock">
+<div class="content">
+<pre>fatal: fatal condition interrupts.size() != numThreads occurred: CPU system.cpu has 1 interrupt controllers, but is expecting one per thread (2)</pre>
+</div>
+</div>
+</div>
+<div class="sect2">
+<h3 id="cache-coherence"><a class="anchor" href="#cache-coherence"></a><a class="link" href="#cache-coherence">32.2. Cache coherence</a></h3>
 <div class="paragraph">
 <p><a href="https://en.wikipedia.org/wiki/Cache_coherence" class="bare">https://en.wikipedia.org/wiki/Cache_coherence</a></p>
 </div>
@@ -35847,10 +36721,10 @@ west build -b qemu_aarch64 samples/hello_world</pre>
 <p>Note that cache coherency only applies to memory read/write instructions that explicitly make coherency requirements.</p>
 </div>
 <div class="paragraph">
-<p>In most ISAs, this tends to be the minority of instructions, and is only used when something is going to modify memory that is known to be shared across threads. For example, the a <a href="#x86-thread-synchronization-primitives">x86 LOCK</a> would be used to increment atomic counters that get incremented across several threads. Outside of those cases, cache coherency is not garanteed, and behaviour is undefined.</p>
+<p>In most ISAs, this tends to be the minority of instructions, and is only used when something is going to modify memory that is known to be shared across threads. For example, the a <a href="#x86-thread-synchronization-primitives">x86 LOCK</a> would be used to increment atomic counters that get incremented across several threads. Outside of those cases, cache coherency is not guaranteed, and behaviour is undefined.</p>
 </div>
 <div class="sect3">
-<h4 id="can-caches-snoop-data-from-other-caches"><a class="anchor" href="#can-caches-snoop-data-from-other-caches"></a><a class="link" href="#can-caches-snoop-data-from-other-caches">32.1.1. Can caches snoop data from other caches?</a></h4>
+<h4 id="can-caches-snoop-data-from-other-caches"><a class="anchor" href="#can-caches-snoop-data-from-other-caches"></a><a class="link" href="#can-caches-snoop-data-from-other-caches">32.2.1. Can caches snoop data from other caches?</a></h4>
 <div class="paragraph">
 <p>Either they can snoop only control, or both control and data can be snooped.</p>
 </div>
@@ -35865,7 +36739,7 @@ west build -b qemu_aarch64 samples/hello_world</pre>
 </div>
 </div>
 <div class="sect3">
-<h4 id="vi-cache-coherence-protocol"><a class="anchor" href="#vi-cache-coherence-protocol"></a><a class="link" href="#vi-cache-coherence-protocol">32.1.2. VI cache coherence protocol</a></h4>
+<h4 id="vi-cache-coherence-protocol"><a class="anchor" href="#vi-cache-coherence-protocol"></a><a class="link" href="#vi-cache-coherence-protocol">32.2.2. VI cache coherence protocol</a></h4>
 <div class="paragraph">
 <p>Mentioned at:</p>
 </div>
@@ -36112,7 +36986,7 @@ west build -b qemu_aarch64 samples/hello_world</pre>
 </div>
 </div>
 <div class="sect3">
-<h4 id="msi-cache-coherence-protocol"><a class="anchor" href="#msi-cache-coherence-protocol"></a><a class="link" href="#msi-cache-coherence-protocol">32.1.3. MSI cache coherence protocol</a></h4>
+<h4 id="msi-cache-coherence-protocol"><a class="anchor" href="#msi-cache-coherence-protocol"></a><a class="link" href="#msi-cache-coherence-protocol">32.2.3. MSI cache coherence protocol</a></h4>
 <div class="paragraph">
 <p><a href="https://en.wikipedia.org/wiki/MSI_protocol" class="bare">https://en.wikipedia.org/wiki/MSI_protocol</a></p>
 </div>
@@ -36424,7 +37298,7 @@ CACHE2 S nyy
 <p>TODO gem5 concrete example.</p>
 </div>
 <div class="sect4">
-<h5 id="msi-cache-coherence-protocol-with-transient-states"><a class="anchor" href="#msi-cache-coherence-protocol-with-transient-states"></a><a class="link" href="#msi-cache-coherence-protocol-with-transient-states">32.1.3.1. MSI cache coherence protocol with transient states</a></h5>
+<h5 id="msi-cache-coherence-protocol-with-transient-states"><a class="anchor" href="#msi-cache-coherence-protocol-with-transient-states"></a><a class="link" href="#msi-cache-coherence-protocol-with-transient-states">32.2.3.1. MSI cache coherence protocol with transient states</a></h5>
 <div class="paragraph">
 <p>TODO underestand well why those are needed.</p>
 </div>
@@ -36444,7 +37318,7 @@ CACHE2 S nyy
 </div>
 </div>
 <div class="sect3">
-<h4 id="mesi-cache-coherence-protocol"><a class="anchor" href="#mesi-cache-coherence-protocol"></a><a class="link" href="#mesi-cache-coherence-protocol">32.1.4. MESI cache coherence protocol</a></h4>
+<h4 id="mesi-cache-coherence-protocol"><a class="anchor" href="#mesi-cache-coherence-protocol"></a><a class="link" href="#mesi-cache-coherence-protocol">32.2.4. MESI cache coherence protocol</a></h4>
 <div class="paragraph">
 <p><a href="https://en.wikipedia.org/wiki/MESI_protocol" class="bare">https://en.wikipedia.org/wiki/MESI_protocol</a></p>
 </div>
@@ -36478,7 +37352,7 @@ CACHE2 S nyy
 </div>
 </div>
 <div class="sect3">
-<h4 id="mosi-cache-coherence-protocol"><a class="anchor" href="#mosi-cache-coherence-protocol"></a><a class="link" href="#mosi-cache-coherence-protocol">32.1.5. MOSI cache coherence protocol</a></h4>
+<h4 id="mosi-cache-coherence-protocol"><a class="anchor" href="#mosi-cache-coherence-protocol"></a><a class="link" href="#mosi-cache-coherence-protocol">32.2.5. MOSI cache coherence protocol</a></h4>
 <div class="paragraph">
 <p><a href="https://en.wikipedia.org/wiki/MOSI_protocol" class="bare">https://en.wikipedia.org/wiki/MOSI_protocol</a> The critical MSI vs MOSI section was a bit bogus though: <a href="https://en.wikipedia.org/w/index.php?title=MOSI_protocol&amp;oldid=895443023" class="bare">https://en.wikipedia.org/w/index.php?title=MOSI_protocol&amp;oldid=895443023</a> we have to edit it.</p>
 </div>
@@ -36538,7 +37412,7 @@ CACHE2 S nyy
 </div>
 </div>
 <div class="sect3">
-<h4 id="moesi-cache-coherence-protocol"><a class="anchor" href="#moesi-cache-coherence-protocol"></a><a class="link" href="#moesi-cache-coherence-protocol">32.1.6. MOESI cache coherence protocol</a></h4>
+<h4 id="moesi-cache-coherence-protocol"><a class="anchor" href="#moesi-cache-coherence-protocol"></a><a class="link" href="#moesi-cache-coherence-protocol">32.2.6. MOESI cache coherence protocol</a></h4>
 <div class="paragraph">
 <p><a href="https://en.wikipedia.org/wiki/MOESI_protocol" class="bare">https://en.wikipedia.org/wiki/MOESI_protocol</a></p>
 </div>