mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-28 04:24:26 +01:00
asm: start intrinsics examples
Split userland/arch/<arch>/c/ into inline_asm and intrinsics, and move programs that don't match either up.
This commit is contained in:
84
README.adoc
84
README.adoc
@@ -373,7 +373,7 @@ Have you ever felt that a single `inc` instruction was not enough? Really? Me to
|
|||||||
|
|
||||||
So let's hack the <<gnu-gas-assembler>>, which is part of link:https://en.wikipedia.org/wiki/GNU_Binutils[GNU Binutils], to add a new shiny version of `inc` called... `myinc`!
|
So let's hack the <<gnu-gas-assembler>>, which is part of link:https://en.wikipedia.org/wiki/GNU_Binutils[GNU Binutils], to add a new shiny version of `inc` called... `myinc`!
|
||||||
|
|
||||||
GCC uses GNU GAS as its backend, so we will test out new mnemonic with an <<gcc-inline-assembly>> test program: link:userland/arch/x86_64/c/binutils_hack.c[], which is just a copy of link:userland/arch/x86_64/c/binutils_nohack.c[] but with `myinc` instead of `inc`.
|
GCC uses GNU GAS as its backend, so we will test out new mnemonic with an <<gcc-inline-assembly>> test program: link:userland/arch/x86_64/binutils_hack.c[], which is just a copy of link:userland/arch/x86_64/binutils_nohack.c[] but with `myinc` instead of `inc`.
|
||||||
|
|
||||||
The inline assembly is disabled with an `#ifdef`, so first modify the source to enable that.
|
The inline assembly is disabled with an `#ifdef`, so first modify the source to enable that.
|
||||||
|
|
||||||
@@ -2742,7 +2742,7 @@ Sources:
|
|||||||
|
|
||||||
* link:kernel_modules/ring0.c[]
|
* link:kernel_modules/ring0.c[]
|
||||||
* link:lkmc/ring0.h[]
|
* link:lkmc/ring0.h[]
|
||||||
* link:userland/arch/x86_64/c/ring0.c[]
|
* link:userland/arch/x86_64/ring0.c[]
|
||||||
|
|
||||||
In both cases, we attempt to run the exact same code which is shared on the `ring0.h` header file.
|
In both cases, we attempt to run the exact same code which is shared on the `ring0.h` header file.
|
||||||
|
|
||||||
@@ -8611,9 +8611,24 @@ If we pass to QEMU the xen image directly instead of the boot wrapper one:
|
|||||||
-kernel ../xen/xen/xen
|
-kernel ../xen/xen/xen
|
||||||
....
|
....
|
||||||
|
|
||||||
then Xen messages do show up, so it seems that the configuration failure lies in the boot wrapper itself rather than Xen.
|
then Xen messages do show up! So it seems that the configuration failure lies in the boot wrapper itself rather than Xen.
|
||||||
|
|
||||||
Bibliography: this attempt was based on: https://wiki.xenproject.org/wiki/Xen_ARM_with_Virtualization_Extensions/FastModels which is the documentation for the ARM Fast Models closed source simulators.
|
Maybe it is also possible to run Xen directly like this: QEMU can already load multiple images at different memory locations with the generic loader: https://github.com/qemu/qemu/blob/master/docs/generic-loader.txt which looks something along:
|
||||||
|
|
||||||
|
....
|
||||||
|
-kernel file1.elf -device loader,file=file2.elf
|
||||||
|
....
|
||||||
|
|
||||||
|
so as long as we craft the correct DTB and feed it into Xen so that it can see the kernel, it should work. TODO does QEMU support patching the auto-generated DTB with pre-generated options? In the worst case we can just dump it hand hack it up though with `-machine dumpdtb`: <<device-tree-emulator-generation>>.
|
||||||
|
|
||||||
|
Bibliography:
|
||||||
|
|
||||||
|
* this attempt was based on: https://wiki.xenproject.org/wiki/Xen_ARM_with_Virtualization_Extensions/FastModels which is the documentation for the ARM Fast Models closed source simulators.
|
||||||
|
* https://wiki.xenproject.org/wiki/Xen_ARM_with_Virtualization_Extensions/qemu-system-aarch64 this is the only QEMU aarch64 Xen page on the web. It uses the Ubuntu aarc64 image, which has EDK2.
|
||||||
|
+
|
||||||
|
I however see no joy on blobs. Buildroot does not seem to support EDK 2.
|
||||||
|
|
||||||
|
Link on readme https://stackoverflow.com/questions/49348453/xen-on-qemu-with-arm64-architecture
|
||||||
|
|
||||||
== QEMU
|
== QEMU
|
||||||
|
|
||||||
@@ -11878,25 +11893,32 @@ You are now left on the very first instruction of our tiny executable!
|
|||||||
Examples under `arch/<arch>/c/` directories show to how use inline assembly from higher level languages such as C:
|
Examples under `arch/<arch>/c/` directories show to how use inline assembly from higher level languages such as C:
|
||||||
|
|
||||||
* x86_64
|
* x86_64
|
||||||
** link:userland/arch/x86_64/c/inc.c[]
|
** link:userland/arch/x86_64/inline_asm/inc.c[]
|
||||||
** link:userland/arch/x86_64/c/add.c[]
|
** link:userland/arch/x86_64/inline_asm/add.c[]
|
||||||
* arm
|
* arm
|
||||||
** link:userland/arch/arm/c/inc.c[]
|
** link:userland/arch/arm/inline_asm/inc.c[]
|
||||||
** link:userland/arch/arm/c/inc_memory.c[]
|
** link:userland/arch/arm/inline_asm/inc_memory.c[]
|
||||||
** link:userland/arch/arm/c/inc_memory_global.c[]
|
** link:userland/arch/arm/inline_asm/inc_memory_global.c[]
|
||||||
** link:userland/arch/arm/c/add.c[]
|
** link:userland/arch/arm/inline_asm/add.c[]
|
||||||
* aarch64
|
* aarch64
|
||||||
** link:userland/arch/aarch64/c/earlyclobber.c[]
|
** link:userland/arch/aarch64/inline_asm/earlyclobber.c[]
|
||||||
** link:userland/arch/aarch64/c/inc.c[]
|
** link:userland/arch/aarch64/inline_asm/inc.c[]
|
||||||
** link:userland/arch/aarch64/c/multiline.cpp[]
|
** link:userland/arch/aarch64/inline_asm/multiline.cpp[]
|
||||||
|
|
||||||
|
==== GCC intrinsics
|
||||||
|
|
||||||
|
Pre-existing C wrappers on top of inline assembly, this is what production programs should use instead of inline assembly:
|
||||||
|
|
||||||
|
* x86_64
|
||||||
|
** link:userland/arch/x86_64/intrinsics/paddq.c[]. Intrinsics version of link:userland/arch/x86_64/paddq.S[]
|
||||||
|
|
||||||
==== GCC inline assembly register variables
|
==== GCC inline assembly register variables
|
||||||
|
|
||||||
Used notably in some of the <<linux-system-calls>> setups:
|
Used notably in some of the <<linux-system-calls>> setups:
|
||||||
|
|
||||||
* link:userland/arch/arm/c/reg_var.c[]
|
* link:userland/arch/arm/inline_asm/reg_var.c[]
|
||||||
* link:userland/arch/aarch64/c/reg_var.c[]
|
* link:userland/arch/aarch64/inline_asm/reg_var.c[]
|
||||||
* link:userland/arch/aarch64/c/reg_var_float.c[]
|
* link:userland/arch/aarch64/inline_asm/reg_var_float.c[]
|
||||||
|
|
||||||
In x86, makes it possible to access variables not exposed with the one letter register constraints.
|
In x86, makes it possible to access variables not exposed with the one letter register constraints.
|
||||||
|
|
||||||
@@ -11911,8 +11933,8 @@ Documentation: https://gcc.gnu.org/onlinedocs/gcc-4.4.2/gcc/Explicit-Reg-Vars.ht
|
|||||||
How to use temporary registers in inline assembly:
|
How to use temporary registers in inline assembly:
|
||||||
|
|
||||||
* x86_64
|
* x86_64
|
||||||
** link:userland/arch/x86_64/c/scratch.c[]
|
** link:userland/arch/x86_64/inline_asm/scratch.c[]
|
||||||
** link:userland/arch/x86_64/c/scratch_hardcode.c[]
|
** link:userland/arch/x86_64/inline_asm/scratch_hardcode.c[]
|
||||||
|
|
||||||
Bibliography: https://stackoverflow.com/questions/6682733/gcc-prohibit-use-of-some-registers/54963829#54963829
|
Bibliography: https://stackoverflow.com/questions/6682733/gcc-prohibit-use-of-some-registers/54963829#54963829
|
||||||
|
|
||||||
@@ -11928,8 +11950,8 @@ The assertion may fail without it. It actually does fail in GCC 8.2.0.
|
|||||||
|
|
||||||
Not documented as of GCC 8.2, but possible: https://stackoverflow.com/questions/53960240/armv8-floating-point-output-inline-assembly
|
Not documented as of GCC 8.2, but possible: https://stackoverflow.com/questions/53960240/armv8-floating-point-output-inline-assembly
|
||||||
|
|
||||||
* link:userland/arch/arm/c/inc_float.c[]
|
* link:userland/arch/arm/inline_asm/inc_float.c[]
|
||||||
* link:userland/arch/aarch64/c/inc_float.c[]
|
* link:userland/arch/aarch64/inline_asm/inc_float.c[]
|
||||||
|
|
||||||
=== Linux system calls
|
=== Linux system calls
|
||||||
|
|
||||||
@@ -11937,15 +11959,15 @@ The following <<userland-setup>> programs illustrate how to make system calls:
|
|||||||
|
|
||||||
* x86_64
|
* x86_64
|
||||||
** link:userland/arch/x86_64/freestanding/linux/hello.S[]
|
** link:userland/arch/x86_64/freestanding/linux/hello.S[]
|
||||||
** link:userland/arch/x86_64/c/freestanding/linux/hello.c[]
|
** link:userland/arch/x86_64/inline_asm/freestanding/linux/hello.c[]
|
||||||
** link:userland/arch/x86_64/c/freestanding/linux/hello_regvar.c[]
|
** link:userland/arch/x86_64/inline_asm/freestanding/linux/hello_regvar.c[]
|
||||||
* arm
|
* arm
|
||||||
** link:userland/arch/arm/freestanding/linux/hello.S[]
|
** link:userland/arch/arm/freestanding/linux/hello.S[]
|
||||||
** link:userland/arch/arm/c/freestanding/linux/hello.c[]
|
** link:userland/arch/arm/inline_asm/freestanding/linux/hello.c[]
|
||||||
* aarch64
|
* aarch64
|
||||||
** link:userland/arch/aarch64/freestanding/linux/hello.S[]
|
** link:userland/arch/aarch64/freestanding/linux/hello.S[]
|
||||||
** link:userland/arch/aarch64/c/freestanding/linux/hello.c[]
|
** link:userland/arch/aarch64/inline_asm/freestanding/linux/hello.c[]
|
||||||
** link:userland/arch/aarch64/c/freestanding/linux/hello_clobbers.c[]
|
** link:userland/arch/aarch64/inline_asm/freestanding/linux/hello_clobbers.c[]
|
||||||
|
|
||||||
Determining the ARM syscall numbers:
|
Determining the ARM syscall numbers:
|
||||||
|
|
||||||
@@ -12010,7 +12032,7 @@ Call C standard library functions from assembly and vice versa.
|
|||||||
** link:userland/arch/arm/linux/c_from_asm.S[]
|
** link:userland/arch/arm/linux/c_from_asm.S[]
|
||||||
* aarch64
|
* aarch64
|
||||||
** link:lkmc/aarch64.h[] `ENTRY` and `EXIT`
|
** link:lkmc/aarch64.h[] `ENTRY` and `EXIT`
|
||||||
** link:userland/arch/aarch64/c/linux/asm_from_c.c[]
|
** link:userland/arch/aarch64/inline_asm/linux/asm_from_c.c[]
|
||||||
|
|
||||||
ARM Architecture Procedure Call Standard (AAPCS) is the name that ARM Holdings gives to the calling convention.
|
ARM Architecture Procedure Call Standard (AAPCS) is the name that ARM Holdings gives to the calling convention.
|
||||||
|
|
||||||
@@ -12183,12 +12205,12 @@ TODO: review this section, make a more controlled userland experiment with <<m5o
|
|||||||
Let's have some fun and try to correlate the gem5 <<stats-txt>> `system.cpu.numCycles` cycle count with the link:https://en.wikipedia.org/wiki/Time_Stamp_Counter[x86 `rdtsc` instruction] that is supposed to do the same thing:
|
Let's have some fun and try to correlate the gem5 <<stats-txt>> `system.cpu.numCycles` cycle count with the link:https://en.wikipedia.org/wiki/Time_Stamp_Counter[x86 `rdtsc` instruction] that is supposed to do the same thing:
|
||||||
|
|
||||||
....
|
....
|
||||||
./build-userland --static userland/arch/x86_64/c/rdtsc.c
|
./build-userland --static userland/arch/x86_64/inline_asm/rdtsc.c
|
||||||
./run --eval './arch/x86_64/c/rdtsc.out;m5 exit;' --emulator gem5
|
./run --eval './arch/x86_64/c/rdtsc.out;m5 exit;' --emulator gem5
|
||||||
./gem5-stat
|
./gem5-stat
|
||||||
....
|
....
|
||||||
|
|
||||||
Source: link:userland/arch/x86_64/c/rdtsc.c[]
|
Source: link:userland/arch/x86_64/rdtsc.c[]
|
||||||
|
|
||||||
`rdtsc` outputs a cycle count which we compare with gem5's `gem5-stat`:
|
`rdtsc` outputs a cycle count which we compare with gem5's `gem5-stat`:
|
||||||
|
|
||||||
@@ -13264,7 +13286,7 @@ To test it out, I first hack link:common.py[] to enable `C++`:
|
|||||||
consts['baremetal_build_in_exts'] = consts['build_in_exts']
|
consts['baremetal_build_in_exts'] = consts['build_in_exts']
|
||||||
....
|
....
|
||||||
|
|
||||||
and then I hack link:userland/arch/aarch64/c/multiline.cpp[] to consist only of an empty main:
|
and then I hack link:userland/arch/aarch64/inline_asm/multiline.cpp[] to consist only of an empty main:
|
||||||
|
|
||||||
....
|
....
|
||||||
int main() {}
|
int main() {}
|
||||||
@@ -13274,7 +13296,7 @@ then for example:
|
|||||||
|
|
||||||
....
|
....
|
||||||
./build-baremetal --arch aarch64
|
./build-baremetal --arch aarch64
|
||||||
./run --arch aarch64 --baremetal userland/arch/aarch64/c/multiline.cpp
|
./run --arch aarch64 --baremetal userland/arch/aarch64/inline_asm/multiline.cpp
|
||||||
....
|
....
|
||||||
|
|
||||||
fails with:
|
fails with:
|
||||||
@@ -13287,7 +13309,7 @@ qemu-system-aarch64: rom check and register reset failed
|
|||||||
and the gem5 build fails completely:
|
and the gem5 build fails completely:
|
||||||
|
|
||||||
....
|
....
|
||||||
./build-baremetal --arch aarch64 --emulator gem5 userland/arch/aarch64/c/multiline.cpp
|
./build-baremetal --arch aarch64 --emulator gem5 userland/arch/aarch64/inline_asm/multiline.cpp
|
||||||
....
|
....
|
||||||
|
|
||||||
fails with:
|
fails with:
|
||||||
|
|||||||
@@ -348,7 +348,7 @@ path_properties_tuples = (
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'c': (
|
'inline_asm': (
|
||||||
{
|
{
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -371,7 +371,7 @@ path_properties_tuples = (
|
|||||||
'aarch64': (
|
'aarch64': (
|
||||||
{'allowed_archs': {'aarch64'}},
|
{'allowed_archs': {'aarch64'}},
|
||||||
{
|
{
|
||||||
'c': (
|
'inline_asm': (
|
||||||
{
|
{
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -393,19 +393,19 @@ path_properties_tuples = (
|
|||||||
'x86_64': (
|
'x86_64': (
|
||||||
{'allowed_archs': {'x86_64'}},
|
{'allowed_archs': {'x86_64'}},
|
||||||
{
|
{
|
||||||
'c': (
|
'inline_asm': (
|
||||||
{
|
{
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'freestanding': freestanding_properties,
|
'freestanding': freestanding_properties,
|
||||||
'ring0.c': {
|
|
||||||
'signal_received': signal.Signals.SIGSEGV
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
'freestanding': freestanding_properties,
|
'freestanding': freestanding_properties,
|
||||||
'lkmc_assert_eq_fail.S': {'signal_received': signal.Signals.SIGABRT},
|
'lkmc_assert_eq_fail.S': {'signal_received': signal.Signals.SIGABRT},
|
||||||
'lkmc_assert_memcmp_fail.S': {'signal_received': signal.Signals.SIGABRT},
|
'lkmc_assert_memcmp_fail.S': {'signal_received': signal.Signals.SIGABRT},
|
||||||
|
'ring0.c': {
|
||||||
|
'signal_received': signal.Signals.SIGSEGV,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|||||||
1
userland/arch/x86_64/intrinsics/README.adoc
Normal file
1
userland/arch/x86_64/intrinsics/README.adoc
Normal file
@@ -0,0 +1 @@
|
|||||||
|
https://github.com/cirosantilli/linux-kernel-module-cheat#gcc-intrinsics
|
||||||
18
userland/arch/x86_64/intrinsics/paddq.c
Normal file
18
userland/arch/x86_64/intrinsics/paddq.c
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
/* https://github.com/cirosantilli/linux-kernel-module-cheat#gcc-intrinsics */
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include <x86intrin.h>
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
__m128i input0 = _mm_set_epi32(1, 2, 3, 4);
|
||||||
|
__m128i input1 = _mm_set_epi32(5, 6, 7, 8);
|
||||||
|
__m128i output = _mm_add_epi32(input0, input1);
|
||||||
|
printf("%d\n", (int)output[3]);
|
||||||
|
assert(output[0] == 6);
|
||||||
|
assert(output[1] == 8);
|
||||||
|
assert(output[2] == 10);
|
||||||
|
assert(output[3] == 12);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user