diff --git a/README.adoc b/README.adoc index 3719ca5..4ceba8e 100644 --- a/README.adoc +++ b/README.adoc @@ -7126,7 +7126,9 @@ Bibliography: https://stackoverflow.com/questions/5970595/how-to-create-a-device ==== File operations -File operations are the main method of userland driver communication. `struct file_operations` determines what the kernel will do on filesystem system calls of <>. +File operations are the main method of userland driver communication. + +`struct file_operations` determines what the kernel will do on filesystem system calls of <>. This example illustrates the most basic system calls: `open`, `read`, `write`, `close` and `lseek`: @@ -7224,26 +7226,78 @@ cd ==== poll -The poll system call allows an user process to do a non-busy wait on a kernel event: - -.... -./poll.sh -.... - -Outcome: `jiffies` gets printed to stdout every second from userland. +The poll system call allows an user process to do a non-busy wait on a kernel event. Sources: * link:kernel_modules/poll.c[] * link:rootfs_overlay/lkmc/poll.sh[] +Example: + +.... +./poll.sh +.... + +Outcome: `jiffies` gets printed to stdout every second from userland, e.g.: + +.... +poll +<6>[ 4.275305] poll +<6>[ 4.275580] return POLLIN +revents = 1 +POLLIN n=10 buf=4294893337 +poll +<6>[ 4.276627] poll +<6>[ 4.276911] return 0 +<6>[ 5.271193] wake_up +<6>[ 5.272326] poll +<6>[ 5.273207] return POLLIN +revents = 1 +POLLIN n=10 buf=4294893588 +poll +<6>[ 5.276367] poll +<6>[ 5.276618] return 0 +<6>[ 6.275178] wake_up +<6>[ 6.276370] poll +<6>[ 6.277269] return POLLIN +revents = 1 +POLLIN n=10 buf=4294893839 +.... + +Force the poll <> to return 0 to see what happens more clearly: + +.... +./poll.sh pol0=1 +.... + +Sample output: + +.... +poll +<6>[ 85.674801] poll +<6>[ 85.675788] return 0 +<6>[ 86.675182] wake_up +<6>[ 86.676431] poll +<6>[ 86.677373] return 0 +<6>[ 87.679198] wake_up +<6>[ 87.680515] poll +<6>[ 87.681564] return 0 +<6>[ 88.683198] wake_up +.... + +From this we see that control is not returned to userland: the kernel just keeps calling the poll `file_operation` again and again. + Typically, we are waiting for some hardware to make some piece of data available available to the kernel. The hardware notifies the kernel that the data is ready with an interrupt. To simplify this example, we just fake the hardware interrupts with a <> that sleeps for a second in an infinite loop. -Bibliography: https://stackoverflow.com/questions/30035776/how-to-add-poll-function-to-the-kernel-module-code/44645336#44645336 +Bibliography: + +* https://stackoverflow.com/questions/30035776/how-to-add-poll-function-to-the-kernel-module-code/44645336#44645336 +* https://stackoverflow.com/questions/30234496/why-do-we-need-to-call-poll-wait-in-poll/44645480#44645480 ==== ioctl diff --git a/kernel_modules/kprobe_example.c b/kernel_modules/kprobe_example.c index bacf77b..f205b20 100644 --- a/kernel_modules/kprobe_example.c +++ b/kernel_modules/kprobe_example.c @@ -3,8 +3,7 @@ * Adapted from: https://github.com/torvalds/linux/blob/v4.17/samples/kprobes/kprobe_example.c */ -/* - * NOTE: This example is works on x86 and powerpc. +/* NOTE: This example is works on x86 and powerpc. * Here's a sample kernel module showing the use of kprobes to dump a * stack trace and selected registers when _do_fork() is called. * diff --git a/kernel_modules/params.c b/kernel_modules/params.c index f371727..ad114ed 100644 --- a/kernel_modules/params.c +++ b/kernel_modules/params.c @@ -7,8 +7,8 @@ #include /* seq_read, seq_lseek, single_release */ #include /* S_IRUSR | S_IWUSR */ -static u32 i = 0; -static u32 j = 0; +static int i = 0; +static int j = 0; module_param(i, int, S_IRUSR | S_IWUSR); module_param(j, int, S_IRUSR | S_IWUSR); MODULE_PARM_DESC(i, "my favorite int"); diff --git a/kernel_modules/poll.c b/kernel_modules/poll.c index ee5adcf..746805f 100644 --- a/kernel_modules/poll.c +++ b/kernel_modules/poll.c @@ -14,6 +14,10 @@ #include /* wait_queue_head_t, wait_event_interruptible, wake_up_interruptible */ #include /* S_IRUSR */ +static int ret0 = 0; +module_param(ret0, int, S_IRUSR | S_IWUSR); +MODULE_PARM_DESC(i, "if 1, always return 0 from poll"); + static char readbuf[1024]; static size_t readbuflen; static struct dentry *debugfs_file; @@ -34,24 +38,33 @@ static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off return ret; } -/* If you return 0 here, then the kernel will sleep until an event happens in the queue. - * - * This gets called again every time an event happens in the wait queue. - */ +/* If you return 0 here, then the kernel will sleep until an event + * happens in the queue. and then call this again, because of the call to poll_wait. */ unsigned int poll(struct file *filp, struct poll_table_struct *wait) { + pr_info("poll\n"); + /* This doesn't sleep. It just makes the kernel call poll again if we return 0. */ poll_wait(filp, &waitqueue, wait); - if (readbuflen) + if (readbuflen && !ret0) { + pr_info("return POLLIN\n"); return POLLIN; - else + } else { + pr_info("return 0\n"); return 0; + } } static int kthread_func(void *data) { while (!kthread_should_stop()) { - readbuflen = snprintf(readbuf, sizeof(readbuf), "%llu", (unsigned long long)jiffies); + readbuflen = snprintf( + readbuf, + sizeof(readbuf), + "%llu", + (unsigned long long)jiffies + ); usleep_range(1000000, 1000001); + pr_info("wake_up\n"); wake_up(&waitqueue); } return 0; diff --git a/rootfs_overlay/lkmc/poll.sh b/rootfs_overlay/lkmc/poll.sh index 9ec28f5..67ce3d5 100755 --- a/rootfs_overlay/lkmc/poll.sh +++ b/rootfs_overlay/lkmc/poll.sh @@ -1,6 +1,7 @@ #!/bin/sh # https://cirosantilli.com/linux-kernel-module-cheat#poll set -e -insmod poll.ko +insmod poll.ko "$@" ./kernel_modules/poll.out /sys/kernel/debug/lkmc_poll +# TODO capture Ctrl+C and do this automatically. #rmmod poll diff --git a/userland/kernel_modules/poll.c b/userland/kernel_modules/poll.c index c5d40fd..3e0ff09 100644 --- a/userland/kernel_modules/poll.c +++ b/userland/kernel_modules/poll.c @@ -26,13 +26,14 @@ int main(int argc, char **argv) { pfd.fd = fd; pfd.events = POLLIN; while (1) { - puts("loop"); + puts("poll"); i = poll(&pfd, 1, -1); if (i == -1) { perror("poll"); assert(0); } revents = pfd.revents; + printf("revents = %d\n", revents); if (revents & POLLIN) { n = read(pfd.fd, buf, sizeof(buf)); printf("POLLIN n=%d buf=%.*s\n", n, n, buf);