From ef4fa33ef7d8b67b36da4cbc9f155725fe191746 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?= =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= Date: Sun, 16 Jun 2019 00:00:04 +0000 Subject: [PATCH] x86 asm: move RDTSC from x86-assembly-cheat, create RDTSCP --- README.adoc | 115 ++++++++++++------ lkmc.c | 6 +- userland/arch/x86_64/{ => intrinsics}/rdtsc.c | 5 +- userland/arch/x86_64/intrinsics/rdtscp.c | 15 +++ userland/arch/x86_64/rdtsc.S | 12 ++ userland/arch/x86_64/rdtscp.S | 20 +++ 6 files changed, 133 insertions(+), 40 deletions(-) rename userland/arch/x86_64/{ => intrinsics}/rdtsc.c (73%) create mode 100644 userland/arch/x86_64/intrinsics/rdtscp.c create mode 100644 userland/arch/x86_64/rdtsc.S create mode 100644 userland/arch/x86_64/rdtscp.S diff --git a/README.adoc b/README.adoc index 48ec621..acadc49 100644 --- a/README.adoc +++ b/README.adoc @@ -12489,6 +12489,85 @@ Generated some polemic when kernel devs wanted to use it as part of `/dev/random RDRAND sets the carry flag when data is ready so we must loop if the carry flag isn't set. +=== x86 system instructions + +<> 5.20 "SYSTEM INSTRUCTIONS" + +==== x86 RDTSC instruction + +Sources: + +* link:userland/arch/x86_64/rdtsc.S[] +* link:userland/arch/x86_64/intrinsics/rdtsc.c[] + +Try running the programs multiple times, and watch the value increase, and then try to correlate it with `/proc/cpuinfo` frequency! + +.... +while true; do sleep 1 && ./userland/arch/x86_64/rdtsc.out; done +.... + +RDTSC stores its output to EDX:EAX, even in 64-bit mode, top bits are zeroed out. + +TODO: review this section, make a more controlled userland experiment with <> instrumentation. + +Let's have some fun and try to correlate the gem5 <> `system.cpu.numCycles` cycle count with the link:https://en.wikipedia.org/wiki/Time_Stamp_Counter[x86 RDTSC instruction] that is supposed to do the same thing: + +.... +./build-userland --static userland/arch/x86_64/inline_asm/rdtsc.S +./run --eval './arch/x86_64/rdtsc.out;m5 exit;' --emulator gem5 +./gem5-stat +.... + +RDTSC outputs a cycle count which we compare with gem5's `gem5-stat`: + +* `3828578153`: RDTSC +* `3830832635`: `gem5-stat` + +which gives pretty close results, and serve as a nice sanity check that the cycle counter is coherent. + +It is also nice to see that RDTSC is a bit smaller than the `stats.txt` value, since the latter also includes the exec syscall for `m5`. + +Bibliography: + +* https://en.wikipedia.org/wiki/Time_Stamp_Counter +* https://stackoverflow.com/questions/9887839/clock-cycle-count-wth-gcc/9887979 + +===== x86 RDTSCP instruction + +RDTSCP is like RDTSP, but it also stores the CPU ID into ECX: this is convenient because the value of RDTSC depends on which core we are currently on, so you often also want the core ID when you want the RDTSC. + +Sources: + +* link:userland/arch/x86_64/rdtscp.S[] +* link:userland/arch/x86_64/intrinsics/rdtscp.c[] + +We can observe its operation with the good and old `taskset`, for example: + +.... +taskset -c 0 ./userland/arch/x86_64/rdtscp.out | tail -n 1 +taskset -c 1 ./userland/arch/x86_64/rdtscp.out | tail -n 1 +.... + +produces: + +.... +0x00000000 +0x00000001 +.... + + +There is also the RDPID instruction that reads just the processor ID, but it appears to be very new for QEMU 4.0.0 or <>, as it fails with SIGILL on both. + +Bibliography: https://stackoverflow.com/questions/22310028/is-there-an-x86-instruction-to-tell-which-core-the-instruction-is-being-run-on/56622112#56622112 + +===== ARM PMCCNTR register + +TODO We didn't manage to find a working ARM analogue to <>: link:kernel_modules/pmccntr.c[] is oopsing, and even it if weren't, it likely won't give the cycle count since boot since it needs to be activate before it starts counting anything: + +* https://stackoverflow.com/questions/40454157/is-there-an-equivalent-instruction-to-rdtsc-in-arm +* https://stackoverflow.com/questions/31620375/arm-cortex-a7-returning-pmccntr-0-in-kernel-mode-and-illegal-instruction-in-u/31649809#31649809 +* https://blog.regehr.org/archives/794 + === x86 SIMD History: @@ -12516,42 +12595,6 @@ link:userland/arch/x86_64/paddq.S[]: PADDQ, PADDL, PADDW, PADDB Good first instruction to learn SIMD: <> -=== x86 RDTSC instruction - -TODO: review this section, make a more controlled userland experiment with <> instrumentation. - -Let's have some fun and try to correlate the gem5 <> `system.cpu.numCycles` cycle count with the link:https://en.wikipedia.org/wiki/Time_Stamp_Counter[x86 RDTSC instruction] that is supposed to do the same thing: - -.... -./build-userland --static userland/arch/x86_64/inline_asm/rdtsc.c -./run --eval './arch/x86_64/c/rdtsc.out;m5 exit;' --emulator gem5 -./gem5-stat -.... - -Source: link:userland/arch/x86_64/rdtsc.c[] - -RDTSC outputs a cycle count which we compare with gem5's `gem5-stat`: - -* `3828578153`: RDTSC -* `3830832635`: `gem5-stat` - -which gives pretty close results, and serve as a nice sanity check that the cycle counter is coherent. - -It is also nice to see that RDTSC is a bit smaller than the `stats.txt` value, since the latter also includes the exec syscall for `m5`. - -Bibliography: - -* https://en.wikipedia.org/wiki/Time_Stamp_Counter -* https://stackoverflow.com/questions/9887839/clock-cycle-count-wth-gcc/9887979 - -==== ARM PMCCNTR register - -TODO We didn't manage to find a working ARM analogue to <>: link:kernel_modules/pmccntr.c[] is oopsing, and even it if weren't, it likely won't give the cycle count since boot since it needs to be activate before it starts counting anything: - -* https://stackoverflow.com/questions/40454157/is-there-an-equivalent-instruction-to-rdtsc-in-arm -* https://stackoverflow.com/questions/31620375/arm-cortex-a7-returning-pmccntr-0-in-kernel-mode-and-illegal-instruction-in-u/31649809#31649809 -* https://blog.regehr.org/archives/794 - === x86 assembly bibliography ==== x86 official bibliography diff --git a/lkmc.c b/lkmc.c index 6e00437..ac12f78 100644 --- a/lkmc.c +++ b/lkmc.c @@ -56,8 +56,12 @@ void lkmc_assert_memcmp( } } +void lkmc_print_hex_32(uint32_t x) { + printf("0x%08" PRIX32, x); +} + void lkmc_print_hex_64(uint64_t x) { - printf("0x%016" PRIx64, x); + printf("0x%016" PRIX64, x); } void lkmc_print_newline() { diff --git a/userland/arch/x86_64/rdtsc.c b/userland/arch/x86_64/intrinsics/rdtsc.c similarity index 73% rename from userland/arch/x86_64/rdtsc.c rename to userland/arch/x86_64/intrinsics/rdtsc.c index a4c2143..153e5be 100644 --- a/userland/arch/x86_64/rdtsc.c +++ b/userland/arch/x86_64/intrinsics/rdtsc.c @@ -1,5 +1,6 @@ /* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-rdtsc-instruction */ +#include #include #include #include @@ -7,8 +8,6 @@ #include int main(void) { - uintmax_t val; - val = __rdtsc(); - printf("%ju\n", val); + printf("0x%016" PRIX64 "\n", (uint64_t)__rdtsc()); return EXIT_SUCCESS; } diff --git a/userland/arch/x86_64/intrinsics/rdtscp.c b/userland/arch/x86_64/intrinsics/rdtscp.c new file mode 100644 index 0000000..0925028 --- /dev/null +++ b/userland/arch/x86_64/intrinsics/rdtscp.c @@ -0,0 +1,15 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-rdtscp-instruction */ + +#include +#include +#include +#include + +#include + +int main(void) { + uint32_t pid; + printf("0x%016" PRIX64 "\n", (uint64_t)__rdtscp(&pid)); + printf("0x%08" PRIX32 "\n", pid); + return EXIT_SUCCESS; +} diff --git a/userland/arch/x86_64/rdtsc.S b/userland/arch/x86_64/rdtsc.S new file mode 100644 index 0000000..42a453b --- /dev/null +++ b/userland/arch/x86_64/rdtsc.S @@ -0,0 +1,12 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-rdtsc-instruction */ + +#include + +LKMC_PROLOGUE + rdtsc + mov %edx, %edi + shl $32, %rdi + add %rax, %rdi + call lkmc_print_hex_64 + call lkmc_print_newline +LKMC_EPILOGUE diff --git a/userland/arch/x86_64/rdtscp.S b/userland/arch/x86_64/rdtscp.S new file mode 100644 index 0000000..a33b277 --- /dev/null +++ b/userland/arch/x86_64/rdtscp.S @@ -0,0 +1,20 @@ +/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-rdtscp-instruction */ + +#include + +LKMC_PROLOGUE + rdtscp + mov %edx, %edi + shl $32, %rdi + add %rax, %rdi + mov %ecx, %r12d + + /* Print RDTSC. */ + call lkmc_print_hex_64 + call lkmc_print_newline + + /* Print PID. */ + mov %r12d, %edi + call lkmc_print_hex_32 + call lkmc_print_newline +LKMC_EPILOGUE