diff --git a/README.adoc b/README.adoc index 3a11446..1ed9883 100644 --- a/README.adoc +++ b/README.adoc @@ -3954,10 +3954,10 @@ Result on <> at bad30f513c46c1b0995d3a10c0d9bc2a33dc4fa0: At 8d8307ac0710164701f6e14c99a69ee172ccbb70 + 1, I noticed that if you run link:userland/posix/count.c[]: .... -./run --userland userland/posix/count.c --userland-args 3 +./run --userland userland/posix/count_to.c --userland-args 3 .... -it first waits for 3 seconds, and then dumps all the output at once, instead of counting once every second as expected. +it first waits for 3 seconds, then the program exits, and then it dumps all the stdout at once, instead of counting once every second as expected. The same can be reproduced by copying the raw QEMU command and piping it through `tee`, so I don't think it is a bug in our setup: @@ -10078,7 +10078,75 @@ cat /proc/cpuinfo getconf _NPROCESSORS_CONF .... -====== gem5 arm more than 8 cores +====== Number of cores in QEMU user mode + +TODO why in <> QEMU always shows the number of cores of the host. E.g., both of the following output the same as `nproc` on the host: + +.... +nproc +./run --userland userland/cpp/thread_hardware_concurrency.cpp +./run --cpus 2 --userland userland/cpp/thread_hardware_concurrency.cpp +.... + +This random page suggests that QEMU splits one host thread thread per guest thread, and thus presumably delegates context switching to the host kernel: https://qemu.weilnetz.de/w64/2012/2012-12-04/qemu-tech.html#User-emulation-specific-details + +We can confirm that with: + +.... +./run --userland userland/posix/pthread_count.c --userland-args 4 +ps Haux | grep qemu | wc +.... + +Remember <> though. + +At 369a47fc6e5c2f4a7f911c1c058b6088f8824463 + 1 QEMU appears to spawn 3 host threads plus one for every new guest thread created. Remember that link:userland/posix/pthread_count.c[] spawns N + 1 total threads if you count the `main` thread. + +====== Number of cores in gem5 user mode + +gem5 user mode multi core has been particularly flaky compared <>. + +You have the limitation that you must have at least one core per guest thread, otherwise `pthread_create` fails. For example: + +.... +./run --cpus 1 --emulator gem5 --static --userland userland/posix/pthread_self.c --userland-args 1 +.... + +fails because that process has a total of 2 threads: one for `main` and one extra thread spawned: link:userland/posix/pthread_self.c[] The error message is: + +.... +pthread_create: Resource temporarily unavailable +.... + +It works however if we add on extra CPU: + +.... +./run --cpus 2 --emulator gem5 --static --userland userland/posix/pthread_self.c --userland-args 1 +.... + +This has to do with the fact that gem5 has a more simplistic thread implementation that does not spawn one host thread per guest thread CPU. Maybe this is required to achieve reproducible runs? What is the task switch algorithm then? + +gem5 threading does however show the expected number of cores, e.g.: + +.... +./run --cpus 1 --userland userland/cpp/thread_hardware_concurrency.cpp --emulator gem5 --static +./run --cpus 2 --userland userland/cpp/thread_hardware_concurrency.cpp --emulator gem5 --static +.... + +outputs `1` and `2` respectively. + +TODO: aarch64 seems to failing to spawn more than 2 threads at 369a47fc6e5c2f4a7f911c1c058b6088f8824463 + 1: + +.... +./run --arch aarch64 --cpus 3 --emulator gem5 --static --userland userland/posix/pthread_self.c --userland-args 2 +.... + +fails with: + +.... +Exiting @ tick 18446744073709551615 because simulate() limit reached +.... + +====== gem5 ARM full system with more than 8 cores https://stackoverflow.com/questions/50248067/how-to-run-a-gem5-arm-aarch64-full-system-simulation-with-fs-py-with-more-than-8 @@ -11203,7 +11271,7 @@ because glibc was built to expect a newer Linux kernel: <> It is obviously not possible to understand what they actually do from their commit message, so let's explain them one by one here as we understand them: * `drm: Add component-aware simple encoder` allows you to see images through VNC: <> -* `gem5: Add support for gem5's extended GIC mode` adds support for more than 8 cores: <> +* `gem5: Add support for gem5's extended GIC mode` adds support for more than 8 cores: <> Tested on 649d06d6758cefd080d04dc47fd6a5a26a620874 + 1. @@ -11881,9 +11949,13 @@ Programs under link:userland/cpp/[] are examples of https://en.wikipedia.org/wik [[cpp-multithreading]] ==== C++ multithreading -* ``: <> 32 "Atomic operations library" +* https://en.cppreference.com/w/cpp/header/thread[``] +** link:userland/cpp/count.cpp[] Exemplifies: `std::this_thread::sleep_for` +** link:userland/cpp/thread_hardware_concurrency.cpp[] `std::thread::hardware_concurrency` +* https://en.cppreference.com/w/cpp/header/atomic[``]: <> 32 "Atomic operations library" ** link:userland/cpp/atomic.cpp[] +[[cpp-standards]] ==== C++ standards Like for C, you have to pay for the standards... insane. So we just use the closest free drafts instead. @@ -11899,11 +11971,23 @@ http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/n4659.pdf Programs under link:userland/posix/[] are examples of POSIX C programming. -What is POSIX: +These links provide a clear overview of what POSIX is: * https://stackoverflow.com/questions/1780599/what-is-the-meaning-of-posix/31865755#31865755 * https://unix.stackexchange.com/questions/11983/what-exactly-is-posix/220877#220877 +==== unistd.h + +* link:userland/posix/count.c[] illustrates `sleep()` +* link:userland/posix/count_to.c[] minor variation of link:userland/posix/count.c[] + +==== pthreads + +POSIX' multithreading API. This was for a looong time the only "portable" multithreading alternative, until <>, thus also extending the portability to Windows. + +* link:userland/posix/pthread_count.c[] +* link:userland/posix/pthread_self.c[] + ==== sysconf https://pubs.opengroup.org/onlinepubs/9699919799/functions/sysconf.html @@ -11927,9 +12011,15 @@ getconf -a The following sections are related to multithreading in userland: -* -* <> -* <> +* language topics: +** <> +** <> +* ISA topics: +** <> +** <> +* emulator topics: +** <> +** <> == Userland assembly diff --git a/path_properties.py b/path_properties.py index 97dfff7..404a393 100644 --- a/path_properties.py +++ b/path_properties.py @@ -481,10 +481,13 @@ path_properties_tuples = ( {}, { 'atomic.cpp': { - 'test_run_args': {'cpus': 2}, + 'test_run_args': {'cpus': 3}, # LDADD from LSE 'gem5_unimplemented_instruction': True, }, + 'sleep_for.cpp': { + 'more_than_1s': True, + }, }, ), 'gcc': ( @@ -529,6 +532,13 @@ path_properties_tuples = ( 'baremetal': True, 'signal_received': signal.Signals.SIGHUP, }, + 'pthread_count.c': { + 'more_than_1s': True, + 'test_run_args': {'cpus': 2}, + }, + 'pthread_self.c': { + 'test_run_args': {'cpus': 2}, + }, 'sleep_forever.c': {'more_than_1s': True}, 'virt_to_phys_test.c': {'more_than_1s': True}, } diff --git a/run b/run index f708485..6a41af6 100755 --- a/run +++ b/run @@ -809,7 +809,6 @@ Extra options to append at the end of the emulator command line. match = re.search(b'Simulated exit code not 0! Exit code is (\d+)', last_line) if match is not None: exit_status = int(match.group(1)) - print(repr(last_line)) if re.search(b'Exiting @ tick \d+ because simulate\(\) limit reached', last_line) is not None: exit_status = 1 if not self.env['userland']: diff --git a/userland/cpp/atomic.cpp b/userland/cpp/atomic.cpp index 545487f..39792fe 100644 --- a/userland/cpp/atomic.cpp +++ b/userland/cpp/atomic.cpp @@ -1,4 +1,4 @@ -// https://cirosantilli.com/linux-kernel-module-cheat#cpp +// https://github.com/cirosantilli/linux-kernel-module-cheat#cpp-multithreading // // The non-atomic counters have undefined values which get printed: // they are extremely likely to be less than the correct value due to diff --git a/userland/cpp/count.cpp b/userland/cpp/count.cpp new file mode 100644 index 0000000..d63038f --- /dev/null +++ b/userland/cpp/count.cpp @@ -0,0 +1,16 @@ +// Count to infinity sleeping one second per number. +// +// https://github.com/cirosantilli/linux-kernel-module-cheat#cpp-multithreading + +#include +#include +#include + +int main() { + int i = 0; + while (1) { + std::cout << i << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(1)); + i++; + } +} diff --git a/userland/cpp/thread_hardware_concurrency.cpp b/userland/cpp/thread_hardware_concurrency.cpp new file mode 100644 index 0000000..4b4afda --- /dev/null +++ b/userland/cpp/thread_hardware_concurrency.cpp @@ -0,0 +1,10 @@ +// http://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine +// +// Not affected by taskset: https://stackoverflow.com/questions/1006289/how-to-find-out-the-number-of-cpus-using-python/55423170#55423170 + +#include +#include + +int main() { + std::cout << std::thread::hardware_concurrency() << std::endl; +} diff --git a/userland/posix/count.c b/userland/posix/count.c index 1bbb454..df4380d 100644 --- a/userland/posix/count.c +++ b/userland/posix/count.c @@ -1,4 +1,7 @@ /* Count to infinity with 1 second sleep between each increment. + * + * https://github.com/cirosantilli/linux-kernel-module-cheat#unistd-h + * * Sample application: https://cirosantilli.com/linux-kernel-module-cheat#gdb-step-debug-userland-custom-init */ diff --git a/userland/posix/count_to.c b/userland/posix/count_to.c new file mode 100644 index 0000000..2238a6f --- /dev/null +++ b/userland/posix/count_to.c @@ -0,0 +1,29 @@ +/* Count up to a given number 1 second sleep between each increment. + * + * https://github.com/cirosantilli/linux-kernel-module-cheat#unistd-h + * + * We need a separate program for this from count.c because count.c + * is also usable as an init process, where we can't control the CLI + * arguments very well. + */ + +#define _XOPEN_SOURCE 700 +#include +#include +#include +#include + +int main(int argc, char **argv) { + unsigned long i, max; + if (argc > 1) { + max = strtoll(argv[1], NULL, 0); + } else { + max = 1; + } + i = 0; + while (i < max) { + printf("%lu\n", i); + i++; + sleep(1); + } +} diff --git a/userland/posix/pthread_count.c b/userland/posix/pthread_count.c new file mode 100644 index 0000000..e7e41dc --- /dev/null +++ b/userland/posix/pthread_count.c @@ -0,0 +1,90 @@ +/* count to infinity in n threads. + * + * Useful if you need to keep several threads around + * to test something. + * + * Usage: + * + * .... + * ./pthread_count.out 3 + * .... + * + * Sample output: + * + * .... + * 0 0 + * 1 0 + * 2 0 + * 1 1 + * 2 1 + * 0 1 + * 1 2 + * 0 2 + * 2 2 + * .... + * + * Initial motivation: confirm that: + * + * .... + * ./pthread_count.out 4 & + * cat /proc/$!/status | grep -E '^Threads:' + * kill $! + * .... + * + * shows the right thread count: + * + * .... + * Threads: 5 + * .... + * + * which is 1 main thread + 4 we spawned! + */ + +#define _XOPEN_SOURCE 700 +#include +#include +#include +#include +#include +#include +#include +#include + +void* main_thread(void *arg) { + unsigned long i = 0; + unsigned int thread_id; + thread_id = *((unsigned int*)arg); + while (1) { + printf("%u %lu\n", thread_id, i); + i++; + sleep(1); + } + return NULL; +} + +int main(int argc, char **argv) { + pthread_t *threads; + unsigned int nthreads, i, *thread_args; + if (argc > 1) { + nthreads = strtoll(argv[1], NULL, 0); + } else { + nthreads = 1; + } + threads = malloc(nthreads * sizeof(*threads)); + thread_args = malloc(nthreads * sizeof(*thread_args)); + for (i = 0; i < nthreads; ++i) { + thread_args[i] = i; + assert(pthread_create( + &threads[i], + NULL, + main_thread, + (void*)&thread_args[i] + ) == 0); + } + for (i = 0; i < nthreads; ++i) { + pthread_join(threads[i], NULL); + } + free(thread_args); + free(threads); + return EXIT_SUCCESS; +} diff --git a/userland/posix/pthread_self.c b/userland/posix/pthread_self.c new file mode 100644 index 0000000..a5e08b5 --- /dev/null +++ b/userland/posix/pthread_self.c @@ -0,0 +1,105 @@ +/* Spawn N threads that print their TID with pthread_self and other + * ID-like information for multiple threads. + * + * https://github.com/cirosantilli/linux-kernel-module-cheat#pthreads + * + * Sample usage: + * + * .... + * ./pthread_tid.out 4 + * .... + * + * Sample output: + * + * .... + * 0 tid: 139852943714048 + * tid, getpid(), pthread_self() = 0, 13709, 139852943714048 + * tid, getpid(), pthread_self() = 1, 13709, 139852935321344 + * 1 tid: 139852935321344 + * 2 tid: 139852926928640 + * tid, getpid(), pthread_self() = 2, 13709, 139852926928640 + * 3 tid: 139852918535936 + * tid, getpid(), pthread_self() = 3, 13709, 139852918535936 + * .... + * + * Note how the PID is the same for all threads. + */ + +#define _XOPEN_SOURCE 700 +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void* main_thread(void *arg) { + int argument; + argument = *((int*)arg); + printf( + "tid, getpid(), pthread_self() = " + "%d, %ju, %ju\n", + argument, + (uintmax_t)getpid(), + (uintmax_t)pthread_self() + ); + return NULL; +} + +int main(int argc, char**argv) { + pthread_t *threads; + unsigned int nthreads, i, *thread_args; + int rc; + + /* CLI arguments. */ + if (argc > 1) { + nthreads = strtoll(argv[1], NULL, 0); + } else { + nthreads = 1; + } + threads = malloc(nthreads * sizeof(*threads)); + thread_args = malloc(nthreads * sizeof(*thread_args)); + + /* main thread for comparison. */ + printf( + "tid, getpid(), pthread_self() = " + "main, %ju, %ju\n", + (uintmax_t)getpid(), + (uintmax_t)pthread_self() + ); + + /* Create all threads */ + for (i = 0; i < nthreads; ++i) { + thread_args[i] = i; + rc = pthread_create( + &threads[i], + NULL, + main_thread, + (void*)&thread_args[i] + ); + if (rc != 0) { + errno = rc; + perror("pthread_create"); + exit(EXIT_FAILURE); + } + assert(rc == 0); + printf("%d tid: %ju\n", i, (uintmax_t)threads[i]); + } + + /* Wait for all threads to complete */ + for (i = 0; i < nthreads; ++i) { + rc = pthread_join(threads[i], NULL); + if (rc != 0) { + printf("%s\n", strerror(rc)); + exit(EXIT_FAILURE); + } + } + + /* Cleanup. */ + free(thread_args); + free(threads); + return EXIT_SUCCESS; +}