mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-23 02:05:57 +01:00
userland: move some multithreaded examples from cpp-cheat
Using them mostly to evaluate how well the emulators are handling user mode multithreading.
This commit is contained in:
108
README.adoc
108
README.adoc
@@ -3954,10 +3954,10 @@ Result on <<p51>> at bad30f513c46c1b0995d3a10c0d9bc2a33dc4fa0:
|
||||
At 8d8307ac0710164701f6e14c99a69ee172ccbb70 + 1, I noticed that if you run link:userland/posix/count.c[]:
|
||||
|
||||
....
|
||||
./run --userland userland/posix/count.c --userland-args 3
|
||||
./run --userland userland/posix/count_to.c --userland-args 3
|
||||
....
|
||||
|
||||
it first waits for 3 seconds, and then dumps all the output at once, instead of counting once every second as expected.
|
||||
it first waits for 3 seconds, then the program exits, and then it dumps all the stdout at once, instead of counting once every second as expected.
|
||||
|
||||
The same can be reproduced by copying the raw QEMU command and piping it through `tee`, so I don't think it is a bug in our setup:
|
||||
|
||||
@@ -10078,7 +10078,75 @@ cat /proc/cpuinfo
|
||||
getconf _NPROCESSORS_CONF
|
||||
....
|
||||
|
||||
====== gem5 arm more than 8 cores
|
||||
====== Number of cores in QEMU user mode
|
||||
|
||||
TODO why in <<user-mode-simulation>> QEMU always shows the number of cores of the host. E.g., both of the following output the same as `nproc` on the host:
|
||||
|
||||
....
|
||||
nproc
|
||||
./run --userland userland/cpp/thread_hardware_concurrency.cpp
|
||||
./run --cpus 2 --userland userland/cpp/thread_hardware_concurrency.cpp
|
||||
....
|
||||
|
||||
This random page suggests that QEMU splits one host thread thread per guest thread, and thus presumably delegates context switching to the host kernel: https://qemu.weilnetz.de/w64/2012/2012-12-04/qemu-tech.html#User-emulation-specific-details
|
||||
|
||||
We can confirm that with:
|
||||
|
||||
....
|
||||
./run --userland userland/posix/pthread_count.c --userland-args 4
|
||||
ps Haux | grep qemu | wc
|
||||
....
|
||||
|
||||
Remember <<qemu-user-mode-does-not-show-stdout-immediately>> though.
|
||||
|
||||
At 369a47fc6e5c2f4a7f911c1c058b6088f8824463 + 1 QEMU appears to spawn 3 host threads plus one for every new guest thread created. Remember that link:userland/posix/pthread_count.c[] spawns N + 1 total threads if you count the `main` thread.
|
||||
|
||||
====== Number of cores in gem5 user mode
|
||||
|
||||
gem5 user mode multi core has been particularly flaky compared <<number-of-cores-in-qemu-user-mode,to QEMU's>>.
|
||||
|
||||
You have the limitation that you must have at least one core per guest thread, otherwise `pthread_create` fails. For example:
|
||||
|
||||
....
|
||||
./run --cpus 1 --emulator gem5 --static --userland userland/posix/pthread_self.c --userland-args 1
|
||||
....
|
||||
|
||||
fails because that process has a total of 2 threads: one for `main` and one extra thread spawned: link:userland/posix/pthread_self.c[] The error message is:
|
||||
|
||||
....
|
||||
pthread_create: Resource temporarily unavailable
|
||||
....
|
||||
|
||||
It works however if we add on extra CPU:
|
||||
|
||||
....
|
||||
./run --cpus 2 --emulator gem5 --static --userland userland/posix/pthread_self.c --userland-args 1
|
||||
....
|
||||
|
||||
This has to do with the fact that gem5 has a more simplistic thread implementation that does not spawn one host thread per guest thread CPU. Maybe this is required to achieve reproducible runs? What is the task switch algorithm then?
|
||||
|
||||
gem5 threading does however show the expected number of cores, e.g.:
|
||||
|
||||
....
|
||||
./run --cpus 1 --userland userland/cpp/thread_hardware_concurrency.cpp --emulator gem5 --static
|
||||
./run --cpus 2 --userland userland/cpp/thread_hardware_concurrency.cpp --emulator gem5 --static
|
||||
....
|
||||
|
||||
outputs `1` and `2` respectively.
|
||||
|
||||
TODO: aarch64 seems to failing to spawn more than 2 threads at 369a47fc6e5c2f4a7f911c1c058b6088f8824463 + 1:
|
||||
|
||||
....
|
||||
./run --arch aarch64 --cpus 3 --emulator gem5 --static --userland userland/posix/pthread_self.c --userland-args 2
|
||||
....
|
||||
|
||||
fails with:
|
||||
|
||||
....
|
||||
Exiting @ tick 18446744073709551615 because simulate() limit reached
|
||||
....
|
||||
|
||||
====== gem5 ARM full system with more than 8 cores
|
||||
|
||||
https://stackoverflow.com/questions/50248067/how-to-run-a-gem5-arm-aarch64-full-system-simulation-with-fs-py-with-more-than-8
|
||||
|
||||
@@ -11203,7 +11271,7 @@ because glibc was built to expect a newer Linux kernel: <<fatal-kernel-too-old>>
|
||||
It is obviously not possible to understand what they actually do from their commit message, so let's explain them one by one here as we understand them:
|
||||
|
||||
* `drm: Add component-aware simple encoder` allows you to see images through VNC: <<gem5-graphic-mode>>
|
||||
* `gem5: Add support for gem5's extended GIC mode` adds support for more than 8 cores: <<gem5-arm-more-than-8-cores>>
|
||||
* `gem5: Add support for gem5's extended GIC mode` adds support for more than 8 cores: <<gem5-arm-full-system-with-more-than-8-cores>>
|
||||
|
||||
Tested on 649d06d6758cefd080d04dc47fd6a5a26a620874 + 1.
|
||||
|
||||
@@ -11881,9 +11949,13 @@ Programs under link:userland/cpp/[] are examples of https://en.wikipedia.org/wik
|
||||
[[cpp-multithreading]]
|
||||
==== C++ multithreading
|
||||
|
||||
* `<atomic>`: <<cpp17>> 32 "Atomic operations library"
|
||||
* https://en.cppreference.com/w/cpp/header/thread[`<thread>`]
|
||||
** link:userland/cpp/count.cpp[] Exemplifies: `std::this_thread::sleep_for`
|
||||
** link:userland/cpp/thread_hardware_concurrency.cpp[] `std::thread::hardware_concurrency`
|
||||
* https://en.cppreference.com/w/cpp/header/atomic[`<atomic>`]: <<cpp17>> 32 "Atomic operations library"
|
||||
** link:userland/cpp/atomic.cpp[]
|
||||
|
||||
[[cpp-standards]]
|
||||
==== C++ standards
|
||||
|
||||
Like for C, you have to pay for the standards... insane. So we just use the closest free drafts instead.
|
||||
@@ -11899,11 +11971,23 @@ http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/n4659.pdf
|
||||
|
||||
Programs under link:userland/posix/[] are examples of POSIX C programming.
|
||||
|
||||
What is POSIX:
|
||||
These links provide a clear overview of what POSIX is:
|
||||
|
||||
* https://stackoverflow.com/questions/1780599/what-is-the-meaning-of-posix/31865755#31865755
|
||||
* https://unix.stackexchange.com/questions/11983/what-exactly-is-posix/220877#220877
|
||||
|
||||
==== unistd.h
|
||||
|
||||
* link:userland/posix/count.c[] illustrates `sleep()`
|
||||
* link:userland/posix/count_to.c[] minor variation of link:userland/posix/count.c[]
|
||||
|
||||
==== pthreads
|
||||
|
||||
POSIX' multithreading API. This was for a looong time the only "portable" multithreading alternative, until <<cpp-multithreading,C++11 finally added threads>>, thus also extending the portability to Windows.
|
||||
|
||||
* link:userland/posix/pthread_count.c[]
|
||||
* link:userland/posix/pthread_self.c[]
|
||||
|
||||
==== sysconf
|
||||
|
||||
https://pubs.opengroup.org/onlinepubs/9699919799/functions/sysconf.html
|
||||
@@ -11927,9 +12011,15 @@ getconf -a
|
||||
|
||||
The following sections are related to multithreading in userland:
|
||||
|
||||
* <cpp-multithreading>
|
||||
* <<x86-thread-synchronization-primitives>>
|
||||
* <<arm-lse>>
|
||||
* language topics:
|
||||
** <<cpp-multithreading>>
|
||||
** <<pthreads>>
|
||||
* ISA topics:
|
||||
** <<x86-thread-synchronization-primitives>>
|
||||
** <<arm-lse>>
|
||||
* emulator topics:
|
||||
** <<number-of-cores-in-qemu-user-mode>>
|
||||
** <<number-of-cores-in-gem5-user-mode>>
|
||||
|
||||
== Userland assembly
|
||||
|
||||
|
||||
@@ -481,10 +481,13 @@ path_properties_tuples = (
|
||||
{},
|
||||
{
|
||||
'atomic.cpp': {
|
||||
'test_run_args': {'cpus': 2},
|
||||
'test_run_args': {'cpus': 3},
|
||||
# LDADD from LSE
|
||||
'gem5_unimplemented_instruction': True,
|
||||
},
|
||||
'sleep_for.cpp': {
|
||||
'more_than_1s': True,
|
||||
},
|
||||
},
|
||||
),
|
||||
'gcc': (
|
||||
@@ -529,6 +532,13 @@ path_properties_tuples = (
|
||||
'baremetal': True,
|
||||
'signal_received': signal.Signals.SIGHUP,
|
||||
},
|
||||
'pthread_count.c': {
|
||||
'more_than_1s': True,
|
||||
'test_run_args': {'cpus': 2},
|
||||
},
|
||||
'pthread_self.c': {
|
||||
'test_run_args': {'cpus': 2},
|
||||
},
|
||||
'sleep_forever.c': {'more_than_1s': True},
|
||||
'virt_to_phys_test.c': {'more_than_1s': True},
|
||||
}
|
||||
|
||||
1
run
1
run
@@ -809,7 +809,6 @@ Extra options to append at the end of the emulator command line.
|
||||
match = re.search(b'Simulated exit code not 0! Exit code is (\d+)', last_line)
|
||||
if match is not None:
|
||||
exit_status = int(match.group(1))
|
||||
print(repr(last_line))
|
||||
if re.search(b'Exiting @ tick \d+ because simulate\(\) limit reached', last_line) is not None:
|
||||
exit_status = 1
|
||||
if not self.env['userland']:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// https://cirosantilli.com/linux-kernel-module-cheat#cpp
|
||||
// https://github.com/cirosantilli/linux-kernel-module-cheat#cpp-multithreading
|
||||
//
|
||||
// The non-atomic counters have undefined values which get printed:
|
||||
// they are extremely likely to be less than the correct value due to
|
||||
|
||||
16
userland/cpp/count.cpp
Normal file
16
userland/cpp/count.cpp
Normal file
@@ -0,0 +1,16 @@
|
||||
// Count to infinity sleeping one second per number.
|
||||
//
|
||||
// https://github.com/cirosantilli/linux-kernel-module-cheat#cpp-multithreading
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#include <iostream>
|
||||
|
||||
int main() {
|
||||
int i = 0;
|
||||
while (1) {
|
||||
std::cout << i << std::endl;
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
i++;
|
||||
}
|
||||
}
|
||||
10
userland/cpp/thread_hardware_concurrency.cpp
Normal file
10
userland/cpp/thread_hardware_concurrency.cpp
Normal file
@@ -0,0 +1,10 @@
|
||||
// http://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
|
||||
//
|
||||
// Not affected by taskset: https://stackoverflow.com/questions/1006289/how-to-find-out-the-number-of-cpus-using-python/55423170#55423170
|
||||
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
int main() {
|
||||
std::cout << std::thread::hardware_concurrency() << std::endl;
|
||||
}
|
||||
@@ -1,4 +1,7 @@
|
||||
/* Count to infinity with 1 second sleep between each increment.
|
||||
*
|
||||
* https://github.com/cirosantilli/linux-kernel-module-cheat#unistd-h
|
||||
*
|
||||
* Sample application: https://cirosantilli.com/linux-kernel-module-cheat#gdb-step-debug-userland-custom-init
|
||||
*/
|
||||
|
||||
|
||||
29
userland/posix/count_to.c
Normal file
29
userland/posix/count_to.c
Normal file
@@ -0,0 +1,29 @@
|
||||
/* Count up to a given number 1 second sleep between each increment.
|
||||
*
|
||||
* https://github.com/cirosantilli/linux-kernel-module-cheat#unistd-h
|
||||
*
|
||||
* We need a separate program for this from count.c because count.c
|
||||
* is also usable as an init process, where we can't control the CLI
|
||||
* arguments very well.
|
||||
*/
|
||||
|
||||
#define _XOPEN_SOURCE 700
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
unsigned long i, max;
|
||||
if (argc > 1) {
|
||||
max = strtoll(argv[1], NULL, 0);
|
||||
} else {
|
||||
max = 1;
|
||||
}
|
||||
i = 0;
|
||||
while (i < max) {
|
||||
printf("%lu\n", i);
|
||||
i++;
|
||||
sleep(1);
|
||||
}
|
||||
}
|
||||
90
userland/posix/pthread_count.c
Normal file
90
userland/posix/pthread_count.c
Normal file
@@ -0,0 +1,90 @@
|
||||
/* count to infinity in n threads.
|
||||
*
|
||||
* Useful if you need to keep several threads around
|
||||
* to test something.
|
||||
*
|
||||
* Usage:
|
||||
*
|
||||
* ....
|
||||
* ./pthread_count.out 3
|
||||
* ....
|
||||
*
|
||||
* Sample output:
|
||||
*
|
||||
* ....
|
||||
* 0 0
|
||||
* 1 0
|
||||
* 2 0
|
||||
* 1 1
|
||||
* 2 1
|
||||
* 0 1
|
||||
* 1 2
|
||||
* 0 2
|
||||
* 2 2
|
||||
* ....
|
||||
*
|
||||
* Initial motivation: confirm that:
|
||||
*
|
||||
* ....
|
||||
* ./pthread_count.out 4 &
|
||||
* cat /proc/$!/status | grep -E '^Threads:'
|
||||
* kill $!
|
||||
* ....
|
||||
*
|
||||
* shows the right thread count:
|
||||
*
|
||||
* ....
|
||||
* Threads: 5
|
||||
* ....
|
||||
*
|
||||
* which is 1 main thread + 4 we spawned!
|
||||
*/
|
||||
|
||||
#define _XOPEN_SOURCE 700
|
||||
#include <assert.h>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
void* main_thread(void *arg) {
|
||||
unsigned long i = 0;
|
||||
unsigned int thread_id;
|
||||
thread_id = *((unsigned int*)arg);
|
||||
while (1) {
|
||||
printf("%u %lu\n", thread_id, i);
|
||||
i++;
|
||||
sleep(1);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
pthread_t *threads;
|
||||
unsigned int nthreads, i, *thread_args;
|
||||
if (argc > 1) {
|
||||
nthreads = strtoll(argv[1], NULL, 0);
|
||||
} else {
|
||||
nthreads = 1;
|
||||
}
|
||||
threads = malloc(nthreads * sizeof(*threads));
|
||||
thread_args = malloc(nthreads * sizeof(*thread_args));
|
||||
for (i = 0; i < nthreads; ++i) {
|
||||
thread_args[i] = i;
|
||||
assert(pthread_create(
|
||||
&threads[i],
|
||||
NULL,
|
||||
main_thread,
|
||||
(void*)&thread_args[i]
|
||||
) == 0);
|
||||
}
|
||||
for (i = 0; i < nthreads; ++i) {
|
||||
pthread_join(threads[i], NULL);
|
||||
}
|
||||
free(thread_args);
|
||||
free(threads);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
105
userland/posix/pthread_self.c
Normal file
105
userland/posix/pthread_self.c
Normal file
@@ -0,0 +1,105 @@
|
||||
/* Spawn N threads that print their TID with pthread_self and other
|
||||
* ID-like information for multiple threads.
|
||||
*
|
||||
* https://github.com/cirosantilli/linux-kernel-module-cheat#pthreads
|
||||
*
|
||||
* Sample usage:
|
||||
*
|
||||
* ....
|
||||
* ./pthread_tid.out 4
|
||||
* ....
|
||||
*
|
||||
* Sample output:
|
||||
*
|
||||
* ....
|
||||
* 0 tid: 139852943714048
|
||||
* tid, getpid(), pthread_self() = 0, 13709, 139852943714048
|
||||
* tid, getpid(), pthread_self() = 1, 13709, 139852935321344
|
||||
* 1 tid: 139852935321344
|
||||
* 2 tid: 139852926928640
|
||||
* tid, getpid(), pthread_self() = 2, 13709, 139852926928640
|
||||
* 3 tid: 139852918535936
|
||||
* tid, getpid(), pthread_self() = 3, 13709, 139852918535936
|
||||
* ....
|
||||
*
|
||||
* Note how the PID is the same for all threads.
|
||||
*/
|
||||
|
||||
#define _XOPEN_SOURCE 700
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
void* main_thread(void *arg) {
|
||||
int argument;
|
||||
argument = *((int*)arg);
|
||||
printf(
|
||||
"tid, getpid(), pthread_self() = "
|
||||
"%d, %ju, %ju\n",
|
||||
argument,
|
||||
(uintmax_t)getpid(),
|
||||
(uintmax_t)pthread_self()
|
||||
);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int main(int argc, char**argv) {
|
||||
pthread_t *threads;
|
||||
unsigned int nthreads, i, *thread_args;
|
||||
int rc;
|
||||
|
||||
/* CLI arguments. */
|
||||
if (argc > 1) {
|
||||
nthreads = strtoll(argv[1], NULL, 0);
|
||||
} else {
|
||||
nthreads = 1;
|
||||
}
|
||||
threads = malloc(nthreads * sizeof(*threads));
|
||||
thread_args = malloc(nthreads * sizeof(*thread_args));
|
||||
|
||||
/* main thread for comparison. */
|
||||
printf(
|
||||
"tid, getpid(), pthread_self() = "
|
||||
"main, %ju, %ju\n",
|
||||
(uintmax_t)getpid(),
|
||||
(uintmax_t)pthread_self()
|
||||
);
|
||||
|
||||
/* Create all threads */
|
||||
for (i = 0; i < nthreads; ++i) {
|
||||
thread_args[i] = i;
|
||||
rc = pthread_create(
|
||||
&threads[i],
|
||||
NULL,
|
||||
main_thread,
|
||||
(void*)&thread_args[i]
|
||||
);
|
||||
if (rc != 0) {
|
||||
errno = rc;
|
||||
perror("pthread_create");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
assert(rc == 0);
|
||||
printf("%d tid: %ju\n", i, (uintmax_t)threads[i]);
|
||||
}
|
||||
|
||||
/* Wait for all threads to complete */
|
||||
for (i = 0; i < nthreads; ++i) {
|
||||
rc = pthread_join(threads[i], NULL);
|
||||
if (rc != 0) {
|
||||
printf("%s\n", strerror(rc));
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
/* Cleanup. */
|
||||
free(thread_args);
|
||||
free(threads);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
Reference in New Issue
Block a user