mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-26 19:51:35 +01:00
asm: start intrinsics examples
Split userland/arch/<arch>/c/ into inline_asm and intrinsics, and move programs that don't match either up.
This commit is contained in:
84
README.adoc
84
README.adoc
@@ -373,7 +373,7 @@ Have you ever felt that a single `inc` instruction was not enough? Really? Me to
|
||||
|
||||
So let's hack the <<gnu-gas-assembler>>, which is part of link:https://en.wikipedia.org/wiki/GNU_Binutils[GNU Binutils], to add a new shiny version of `inc` called... `myinc`!
|
||||
|
||||
GCC uses GNU GAS as its backend, so we will test out new mnemonic with an <<gcc-inline-assembly>> test program: link:userland/arch/x86_64/c/binutils_hack.c[], which is just a copy of link:userland/arch/x86_64/c/binutils_nohack.c[] but with `myinc` instead of `inc`.
|
||||
GCC uses GNU GAS as its backend, so we will test out new mnemonic with an <<gcc-inline-assembly>> test program: link:userland/arch/x86_64/binutils_hack.c[], which is just a copy of link:userland/arch/x86_64/binutils_nohack.c[] but with `myinc` instead of `inc`.
|
||||
|
||||
The inline assembly is disabled with an `#ifdef`, so first modify the source to enable that.
|
||||
|
||||
@@ -2742,7 +2742,7 @@ Sources:
|
||||
|
||||
* link:kernel_modules/ring0.c[]
|
||||
* link:lkmc/ring0.h[]
|
||||
* link:userland/arch/x86_64/c/ring0.c[]
|
||||
* link:userland/arch/x86_64/ring0.c[]
|
||||
|
||||
In both cases, we attempt to run the exact same code which is shared on the `ring0.h` header file.
|
||||
|
||||
@@ -8611,9 +8611,24 @@ If we pass to QEMU the xen image directly instead of the boot wrapper one:
|
||||
-kernel ../xen/xen/xen
|
||||
....
|
||||
|
||||
then Xen messages do show up, so it seems that the configuration failure lies in the boot wrapper itself rather than Xen.
|
||||
then Xen messages do show up! So it seems that the configuration failure lies in the boot wrapper itself rather than Xen.
|
||||
|
||||
Bibliography: this attempt was based on: https://wiki.xenproject.org/wiki/Xen_ARM_with_Virtualization_Extensions/FastModels which is the documentation for the ARM Fast Models closed source simulators.
|
||||
Maybe it is also possible to run Xen directly like this: QEMU can already load multiple images at different memory locations with the generic loader: https://github.com/qemu/qemu/blob/master/docs/generic-loader.txt which looks something along:
|
||||
|
||||
....
|
||||
-kernel file1.elf -device loader,file=file2.elf
|
||||
....
|
||||
|
||||
so as long as we craft the correct DTB and feed it into Xen so that it can see the kernel, it should work. TODO does QEMU support patching the auto-generated DTB with pre-generated options? In the worst case we can just dump it hand hack it up though with `-machine dumpdtb`: <<device-tree-emulator-generation>>.
|
||||
|
||||
Bibliography:
|
||||
|
||||
* this attempt was based on: https://wiki.xenproject.org/wiki/Xen_ARM_with_Virtualization_Extensions/FastModels which is the documentation for the ARM Fast Models closed source simulators.
|
||||
* https://wiki.xenproject.org/wiki/Xen_ARM_with_Virtualization_Extensions/qemu-system-aarch64 this is the only QEMU aarch64 Xen page on the web. It uses the Ubuntu aarc64 image, which has EDK2.
|
||||
+
|
||||
I however see no joy on blobs. Buildroot does not seem to support EDK 2.
|
||||
|
||||
Link on readme https://stackoverflow.com/questions/49348453/xen-on-qemu-with-arm64-architecture
|
||||
|
||||
== QEMU
|
||||
|
||||
@@ -11878,25 +11893,32 @@ You are now left on the very first instruction of our tiny executable!
|
||||
Examples under `arch/<arch>/c/` directories show to how use inline assembly from higher level languages such as C:
|
||||
|
||||
* x86_64
|
||||
** link:userland/arch/x86_64/c/inc.c[]
|
||||
** link:userland/arch/x86_64/c/add.c[]
|
||||
** link:userland/arch/x86_64/inline_asm/inc.c[]
|
||||
** link:userland/arch/x86_64/inline_asm/add.c[]
|
||||
* arm
|
||||
** link:userland/arch/arm/c/inc.c[]
|
||||
** link:userland/arch/arm/c/inc_memory.c[]
|
||||
** link:userland/arch/arm/c/inc_memory_global.c[]
|
||||
** link:userland/arch/arm/c/add.c[]
|
||||
** link:userland/arch/arm/inline_asm/inc.c[]
|
||||
** link:userland/arch/arm/inline_asm/inc_memory.c[]
|
||||
** link:userland/arch/arm/inline_asm/inc_memory_global.c[]
|
||||
** link:userland/arch/arm/inline_asm/add.c[]
|
||||
* aarch64
|
||||
** link:userland/arch/aarch64/c/earlyclobber.c[]
|
||||
** link:userland/arch/aarch64/c/inc.c[]
|
||||
** link:userland/arch/aarch64/c/multiline.cpp[]
|
||||
** link:userland/arch/aarch64/inline_asm/earlyclobber.c[]
|
||||
** link:userland/arch/aarch64/inline_asm/inc.c[]
|
||||
** link:userland/arch/aarch64/inline_asm/multiline.cpp[]
|
||||
|
||||
==== GCC intrinsics
|
||||
|
||||
Pre-existing C wrappers on top of inline assembly, this is what production programs should use instead of inline assembly:
|
||||
|
||||
* x86_64
|
||||
** link:userland/arch/x86_64/intrinsics/paddq.c[]. Intrinsics version of link:userland/arch/x86_64/paddq.S[]
|
||||
|
||||
==== GCC inline assembly register variables
|
||||
|
||||
Used notably in some of the <<linux-system-calls>> setups:
|
||||
|
||||
* link:userland/arch/arm/c/reg_var.c[]
|
||||
* link:userland/arch/aarch64/c/reg_var.c[]
|
||||
* link:userland/arch/aarch64/c/reg_var_float.c[]
|
||||
* link:userland/arch/arm/inline_asm/reg_var.c[]
|
||||
* link:userland/arch/aarch64/inline_asm/reg_var.c[]
|
||||
* link:userland/arch/aarch64/inline_asm/reg_var_float.c[]
|
||||
|
||||
In x86, makes it possible to access variables not exposed with the one letter register constraints.
|
||||
|
||||
@@ -11911,8 +11933,8 @@ Documentation: https://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Explicit-Reg-Vars.ht
|
||||
How to use temporary registers in inline assembly:
|
||||
|
||||
* x86_64
|
||||
** link:userland/arch/x86_64/c/scratch.c[]
|
||||
** link:userland/arch/x86_64/c/scratch_hardcode.c[]
|
||||
** link:userland/arch/x86_64/inline_asm/scratch.c[]
|
||||
** link:userland/arch/x86_64/inline_asm/scratch_hardcode.c[]
|
||||
|
||||
Bibliography: https://stackoverflow.com/questions/6682733/gcc-prohibit-use-of-some-registers/54963829#54963829
|
||||
|
||||
@@ -11928,8 +11950,8 @@ The assertion may fail without it. It actually does fail in GCC 8.2.0.
|
||||
|
||||
Not documented as of GCC 8.2, but possible: https://stackoverflow.com/questions/53960240/armv8-floating-point-output-inline-assembly
|
||||
|
||||
* link:userland/arch/arm/c/inc_float.c[]
|
||||
* link:userland/arch/aarch64/c/inc_float.c[]
|
||||
* link:userland/arch/arm/inline_asm/inc_float.c[]
|
||||
* link:userland/arch/aarch64/inline_asm/inc_float.c[]
|
||||
|
||||
=== Linux system calls
|
||||
|
||||
@@ -11937,15 +11959,15 @@ The following <<userland-setup>> programs illustrate how to make system calls:
|
||||
|
||||
* x86_64
|
||||
** link:userland/arch/x86_64/freestanding/linux/hello.S[]
|
||||
** link:userland/arch/x86_64/c/freestanding/linux/hello.c[]
|
||||
** link:userland/arch/x86_64/c/freestanding/linux/hello_regvar.c[]
|
||||
** link:userland/arch/x86_64/inline_asm/freestanding/linux/hello.c[]
|
||||
** link:userland/arch/x86_64/inline_asm/freestanding/linux/hello_regvar.c[]
|
||||
* arm
|
||||
** link:userland/arch/arm/freestanding/linux/hello.S[]
|
||||
** link:userland/arch/arm/c/freestanding/linux/hello.c[]
|
||||
** link:userland/arch/arm/inline_asm/freestanding/linux/hello.c[]
|
||||
* aarch64
|
||||
** link:userland/arch/aarch64/freestanding/linux/hello.S[]
|
||||
** link:userland/arch/aarch64/c/freestanding/linux/hello.c[]
|
||||
** link:userland/arch/aarch64/c/freestanding/linux/hello_clobbers.c[]
|
||||
** link:userland/arch/aarch64/inline_asm/freestanding/linux/hello.c[]
|
||||
** link:userland/arch/aarch64/inline_asm/freestanding/linux/hello_clobbers.c[]
|
||||
|
||||
Determining the ARM syscall numbers:
|
||||
|
||||
@@ -12010,7 +12032,7 @@ Call C standard library functions from assembly and vice versa.
|
||||
** link:userland/arch/arm/linux/c_from_asm.S[]
|
||||
* aarch64
|
||||
** link:lkmc/aarch64.h[] `ENTRY` and `EXIT`
|
||||
** link:userland/arch/aarch64/c/linux/asm_from_c.c[]
|
||||
** link:userland/arch/aarch64/inline_asm/linux/asm_from_c.c[]
|
||||
|
||||
ARM Architecture Procedure Call Standard (AAPCS) is the name that ARM Holdings gives to the calling convention.
|
||||
|
||||
@@ -12183,12 +12205,12 @@ TODO: review this section, make a more controlled userland experiment with <<m5o
|
||||
Let's have some fun and try to correlate the gem5 <<stats-txt>> `system.cpu.numCycles` cycle count with the link:https://en.wikipedia.org/wiki/Time_Stamp_Counter[x86 `rdtsc` instruction] that is supposed to do the same thing:
|
||||
|
||||
....
|
||||
./build-userland --static userland/arch/x86_64/c/rdtsc.c
|
||||
./build-userland --static userland/arch/x86_64/inline_asm/rdtsc.c
|
||||
./run --eval './arch/x86_64/c/rdtsc.out;m5 exit;' --emulator gem5
|
||||
./gem5-stat
|
||||
....
|
||||
|
||||
Source: link:userland/arch/x86_64/c/rdtsc.c[]
|
||||
Source: link:userland/arch/x86_64/rdtsc.c[]
|
||||
|
||||
`rdtsc` outputs a cycle count which we compare with gem5's `gem5-stat`:
|
||||
|
||||
@@ -13264,7 +13286,7 @@ To test it out, I first hack link:common.py[] to enable `C++`:
|
||||
consts['baremetal_build_in_exts'] = consts['build_in_exts']
|
||||
....
|
||||
|
||||
and then I hack link:userland/arch/aarch64/c/multiline.cpp[] to consist only of an empty main:
|
||||
and then I hack link:userland/arch/aarch64/inline_asm/multiline.cpp[] to consist only of an empty main:
|
||||
|
||||
....
|
||||
int main() {}
|
||||
@@ -13274,7 +13296,7 @@ then for example:
|
||||
|
||||
....
|
||||
./build-baremetal --arch aarch64
|
||||
./run --arch aarch64 --baremetal userland/arch/aarch64/c/multiline.cpp
|
||||
./run --arch aarch64 --baremetal userland/arch/aarch64/inline_asm/multiline.cpp
|
||||
....
|
||||
|
||||
fails with:
|
||||
@@ -13287,7 +13309,7 @@ qemu-system-aarch64: rom check and register reset failed
|
||||
and the gem5 build fails completely:
|
||||
|
||||
....
|
||||
./build-baremetal --arch aarch64 --emulator gem5 userland/arch/aarch64/c/multiline.cpp
|
||||
./build-baremetal --arch aarch64 --emulator gem5 userland/arch/aarch64/inline_asm/multiline.cpp
|
||||
....
|
||||
|
||||
fails with:
|
||||
|
||||
@@ -348,7 +348,7 @@ path_properties_tuples = (
|
||||
]
|
||||
},
|
||||
{
|
||||
'c': (
|
||||
'inline_asm': (
|
||||
{
|
||||
},
|
||||
{
|
||||
@@ -371,7 +371,7 @@ path_properties_tuples = (
|
||||
'aarch64': (
|
||||
{'allowed_archs': {'aarch64'}},
|
||||
{
|
||||
'c': (
|
||||
'inline_asm': (
|
||||
{
|
||||
},
|
||||
{
|
||||
@@ -393,19 +393,19 @@ path_properties_tuples = (
|
||||
'x86_64': (
|
||||
{'allowed_archs': {'x86_64'}},
|
||||
{
|
||||
'c': (
|
||||
'inline_asm': (
|
||||
{
|
||||
},
|
||||
{
|
||||
'freestanding': freestanding_properties,
|
||||
'ring0.c': {
|
||||
'signal_received': signal.Signals.SIGSEGV
|
||||
}
|
||||
}
|
||||
),
|
||||
'freestanding': freestanding_properties,
|
||||
'lkmc_assert_eq_fail.S': {'signal_received': signal.Signals.SIGABRT},
|
||||
'lkmc_assert_memcmp_fail.S': {'signal_received': signal.Signals.SIGABRT},
|
||||
'ring0.c': {
|
||||
'signal_received': signal.Signals.SIGSEGV,
|
||||
}
|
||||
}
|
||||
),
|
||||
}
|
||||
|
||||
1
userland/arch/x86_64/intrinsics/README.adoc
Normal file
1
userland/arch/x86_64/intrinsics/README.adoc
Normal file
@@ -0,0 +1 @@
|
||||
https://github.com/cirosantilli/linux-kernel-module-cheat#gcc-intrinsics
|
||||
18
userland/arch/x86_64/intrinsics/paddq.c
Normal file
18
userland/arch/x86_64/intrinsics/paddq.c
Normal file
@@ -0,0 +1,18 @@
|
||||
/* https://github.com/cirosantilli/linux-kernel-module-cheat#gcc-intrinsics */
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
int main(void) {
|
||||
__m128i input0 = _mm_set_epi32(1, 2, 3, 4);
|
||||
__m128i input1 = _mm_set_epi32(5, 6, 7, 8);
|
||||
__m128i output = _mm_add_epi32(input0, input1);
|
||||
printf("%d\n", (int)output[3]);
|
||||
assert(output[0] == 6);
|
||||
assert(output[1] == 8);
|
||||
assert(output[2] == 10);
|
||||
assert(output[3] == 12);
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user