From 4680b76b6ec1d4f23d07ea1e111811b2c858da7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?= =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= Date: Thu, 16 Jan 2020 00:00:00 +0000 Subject: [PATCH] d29a07ddad499f273cc90dd66e40f8474b5dfc40 --- index.html | 266 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 225 insertions(+), 41 deletions(-) diff --git a/index.html b/index.html index d31cc33..21df1d2 100644 --- a/index.html +++ b/index.html @@ -1353,17 +1353,19 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
  • 21.8. Benchmarks
  • 21.9. Userland content bibliography
  • @@ -1668,7 +1670,7 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
  • 24.6.3.2.1. ARM FADD vs VADD
  • -
  • 24.6.3.3. ARMv8 aarch64 ld2 instruction
  • +
  • 24.6.3.3. ARMv8 aarch64 LD2 instruction
  • 24.6.4. ARM SIMD bibliography
  • @@ -1755,9 +1757,11 @@ body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-b
    @@ -24220,7 +24265,23 @@ cblas_dgemm( CblasColMajor, CblasNoTrans, CblasTrans,3,3,2 ,1, A,3, B,
    -

    21.8.1. Dhrystone

    +

    21.8.1. Boost

    + + + +
    +
    +

    21.8.2. Dhrystone

    @@ -24317,7 +24378,7 @@ cblas_dgemm( CblasColMajor, CblasNoTrans, CblasTrans,3,3,2 ,1, A,3, B,
    -

    21.8.2. STREAM benchmark

    +

    21.8.3. STREAM benchmark

    @@ -24391,7 +24452,7 @@ times[3 * ntimes + k] = mysecond() - times[3 * ntimes + k];
    -

    21.8.3. PARSEC benchmark

    +

    21.8.4. PARSEC benchmark

    We have ported parts of the PARSEC benchmark for cross compilation at: https://github.com/cirosantilli/parsec-benchmark See the documentation on that repo to find out which benchmarks have been ported. Some of the benchmarks were are segfaulting, they are documented in that repo.

    @@ -24409,7 +24470,7 @@ times[3 * ntimes + k] = mysecond() - times[3 * ntimes + k];
    -
    21.8.3.1. PARSEC benchmark without parsecmgmt
    +
    21.8.4.1. PARSEC benchmark without parsecmgmt
    ./build --arch arm --download-dependencies gem5-buildroot parsec-benchmark
    @@ -24443,7 +24504,7 @@ times[3 * ntimes + k] = mysecond() - times[3 * ntimes + k];
     
    -
    21.8.3.2. PARSEC change the input size
    +
    21.8.4.2. PARSEC change the input size

    Running a benchmark of a size different than test, e.g. simsmall, requires a rebuild with:

    @@ -24507,7 +24568,7 @@ times[3 * ntimes + k] = mysecond() - times[3 * ntimes + k];
    -
    21.8.3.3. PARSEC benchmark with parsecmgmt
    +
    21.8.4.3. PARSEC benchmark with parsecmgmt

    Most users won’t want to use this method because:

    @@ -24570,7 +24631,7 @@ parsecmgmt -a run -p splash2x.fmm -i test
    -
    21.8.3.4. PARSEC uninstall
    +
    21.8.4.4. PARSEC uninstall

    If you want to remove PARSEC later, Buildroot doesn’t provide an automated package removal mechanism as mentioned at: Section 20.6, “Remove Buildroot packages”, but the following procedure should be satisfactory:

    @@ -24588,7 +24649,7 @@ parsecmgmt -a run -p splash2x.fmm -i test
    -
    21.8.3.5. PARSEC benchmark hacking
    +
    21.8.4.5. PARSEC benchmark hacking

    If you end up going inside submodules/parsec-benchmark to hack up the benchmark (you will!), these tips will be helpful.

    @@ -24640,6 +24701,21 @@ git clean -xdf .
    +
    +

    21.8.5. userland/libs directory

    +
    +

    Tests under userland/libs require certain optional libraries to be installed on the target, and are not built or tested by default, you must enable them with either:

    +
    +
    +
    +
    --package <package>
    +--package-all
    +
    +
    +
    +

    See for example BLAS.

    +
    +

    21.9. Userland content bibliography

    @@ -29308,7 +29384,7 @@ AArch64, see Procedure Call Standard for the ARM 64-bit Architecture.

    -
    24.6.3.3. ARMv8 aarch64 ld2 instruction
    +
    24.6.3.3. ARMv8 aarch64 LD2 instruction
    @@ -31048,6 +31124,9 @@ IN: main

    userland/arch/aarch64/freestanding/linux/wfe.S

  • +

    userland/arch/aarch64/freestanding/linux/sevl_wfe.S

    +
  • +
  • userland/arch/aarch64/freestanding/linux/wfe_wfe.S: run WFE twice, because gem5 390a74f59934b85d91489f8a563450d8321b602d does not sleep on the first, see also: gem5 ARM WFE

  • @@ -31091,9 +31170,6 @@ IN: main

    and power consumption is key in ARM applications.

  • -

    SEV is not the only thing that can wake up a WFE, it is only an explicit software way to do it. Notably, global monitor operations on memory accesses of regions marked by LDAXR and STLXR instructions can also wake up a WFE sleeping core. This is done to allow spinlocks opens to automatically wake up WFE sleeping cores at free time without the need for a explicit SEV.

    -
    -

    Quotes for the above ARMv8 architecture reference manual db G1.18.1 "Wait For Event and Send Event":

    @@ -31185,7 +31261,38 @@ IN: main

    For how userland spinlocks and mutexes are implemented see Userland mutex implementation.

    -
    27.8.3.1.1. WFE from userland
    +
    27.8.3.1.1. ARM WFE global monitor events
    +
    +

    Examples:

    +
    + +
    +

    SEV is not the only thing that can wake up a WFE, it is only an explicit software way to do it.

    +
    +
    +

    Notably, global monitor operations on memory accesses of regions marked by LDAXR and STLXR instructions can also wake up a WFE sleeping core.

    +
    +
    +

    This is done to allow spinlocks opens to automatically wake up WFE sleeping cores at free time without the need for a explicit SEV.

    +
    +
    +

    In the shown in the wfe_ldxr_stxr.cpp example, which can only terminate in gem5 user mode simulation because due to this event.

    +
    +
    +

    Note that that program still terminates when running on top of the Linux kernel as explained at: WFE from userland.

    +
    +
    +
    +
    27.8.3.1.2. WFE from userland

    WFE and SEV are usable from userland, and are part of an efficient spinlock implementation (which userland should arguably stay away from and rather use the futex system call which allow for non busy sleep instead), which maybe is not something that userland should ever tho and just stick to mutexes?

    @@ -31272,14 +31379,46 @@ IN: main
  • after a few interrupt handler instructions, the first ERET instruction exits the handler and comes back directly to the instruction after the WFE at PC 0x400080 == 0x40007c + 4

  • +
  • +

    the execution of the interrupt handler woke up the core that was in WFE, and it now continues normal execution past the WFE

    +
  • Therefore, a WFE in userland is treated much like a busy loop by the Linux kernel: the kernel does not seem to try and explicitly make up room for other processes as would happen on a futex.

    +
    +

    The following test checks that SEV events don’t wake up a futexes, running forever in case of success. In gem5 syscall emulation multithreading, this is crucial to prevent deadlocks:

    +
    +
    + +
    -
    27.8.3.1.2. gem5 ARM WFE
    +
    27.8.3.1.3. ARMv8 spinlock pattern
    + +
    +
    +
           sev
    +   1:  wfe
    +   2:  ldaxr  w1, [w0]
    +       cbnz   w1, %1b
    +       stxr   w1, w2, [w0]
    +       cbnz   w1, %2b
    +
    +
    +
    +

    It is the STXR from the unlock on another core that automatically wakes up the spinlock afterwards: https://stackoverflow.com/questions/32276313/how-is-a-spin-lock-woken-up-in-linux-arm64

    +
    +
    +
    +
    27.8.3.1.4. gem5 ARM WFE

    gem5 390a74f59934b85d91489f8a563450d8321b602d does not sleep on the first WFE on either syscall emulation or full system, because the code does:

    @@ -31321,7 +31460,7 @@ IN: main
    -
    27.8.3.1.3. ARM YIELD instruction
    +
    27.8.3.1.5. ARM YIELD instruction
    @@ -32338,9 +32477,9 @@ cd -
    -

    29.1. Continuous integraion

    +

    29.1. Continuous integration

    -

    We have exploreed a few Continuous integration solutions.

    +

    We have explored a few Continuous integration solutions.

    We haven’t setup any of them yet.

    @@ -32354,7 +32493,7 @@ cd -

    29.1.2. CircleCI

    -

    This setup sucessfully built gem5 on every commit: .circleci/config.yml

    +

    This setup successfully built gem5 on every commit: .circleci/config.yml

    Enabling it is however blocked on: https://github.com/cirosantilli/linux-kernel-module-cheat/issues/79 so we disabled the builds on the web UI.

    @@ -32570,6 +32709,15 @@ instructions 124346081

    a18f28e263c91362519ef550150b5c9d75fa3679 + 1

    userland/gcc/busy_loop.c -O0

    +

    gem5 --arch aarch64 --gem5-build-id debug

    +

    10^5

    +

    32

    +

    2.528728 * 10^6

    +

    0.08

    + + +

    a18f28e263c91362519ef550150b5c9d75fa3679 + 1

    +

    userland/gcc/busy_loop.c -O0

    gem5 --arch aarch64 -- --cpu-type MinorCPU --caches

    10^6

    31

    @@ -32614,7 +32762,7 @@ instructions 124346081

    ab6f7331406b22f8ab6e2df5f8b8e464fb35b611

    -

    userland/c/m5ops.c -O0

    +

    glibc C pre-main userland/c/m5ops.c -O0

    gem5 --arch aarch64 --userland-args e

    1

    2

    @@ -32623,13 +32771,49 @@ instructions 124346081

    ab6f7331406b22f8ab6e2df5f8b8e464fb35b611

    -

    userland/cpp/m5ops.cpp -O0

    +

    glibc C pre-main userland/c/m5ops.c -O0

    +

    gem5 --arch aarch64 --userland-args e --gem5-build-type debug

    +

    1

    +

    2

    +

    1.26479 * 10^5

    +

    0.05

    + + +

    ab6f7331406b22f8ab6e2df5f8b8e464fb35b611

    +

    glibc C++ pre-main userland/cpp/m5ops.cpp -O0

    gem5 --arch aarch64 --userland-args e

    1

    2

    2.385012 * 10^6

    1

    + +

    ab6f7331406b22f8ab6e2df5f8b8e464fb35b611

    +

    glibc C++ pre-main userland/cpp/m5ops.cpp -O0

    +

    gem5 --arch aarch64 --userland-args e --gem5-build-type debug

    +

    1

    +

    25

    +

    2.385012 * 10^6

    +

    0.1

    + + +

    ab6f7331406b22f8ab6e2df5f8b8e464fb35b611

    +

    immediate exit userland/arch/aarch64/freestanding/linux/gem5_exit.S -O0

    +

    gem5 --arch aarch64

    +

    1

    +

    1

    + + + + +

    ab6f7331406b22f8ab6e2df5f8b8e464fb35b611

    +

    immediate exit userland/arch/aarch64/freestanding/linux/gem5_exit.S -O0

    +

    gem5 --arch aarch64 --gem5-build-type debug

    +

    1

    +

    1

    + + +
    @@ -32748,7 +32932,7 @@ instructions 124346081

    First we build Dhrystone manually statically since dynamic linking is broken in gem5 as explained at: Section 10.7, “gem5 syscall emulation mode”.

    -

    TODO: move this section to our new custom dhrystone setup: Section 21.8.1, “Dhrystone”.

    +

    TODO: move this section to our new custom dhrystone setup: Section 21.8.2, “Dhrystone”.

    gem5 user mode: