diff --git a/README.md b/README.md index db8c9d5..e3fe3be 100644 --- a/README.md +++ b/README.md @@ -318,28 +318,4 @@ But TODO I don't think you can see where you are in the kernel source code and l 1. [Bibliography](bibliography.md) 1. Examples 1. [Host](host/) - 1. Buildroot - 1. [module_init](kernel_module/module_init.c) - 1. Debugging - 1. [hello](kernel_module/hello.c) - 1. [hello2](kernel_module/hello2.c) - 1. [debugfs](kernel_module/debugfs.c) - 1. [panic](kernel_module/panic.c) - 1. [params](kernel_module/params.c) - 1. [fops](kernel_module/fops.c) - 1. [ioctl](kernel_module/ioctl.c) - 1. [poll](kernel_module/poll.c) - 1. [anonymous_inode](kernel_module/anonymous_inode.c) - 1. Asynchronous - 1. [workqueue](kernel_module/workqueue.c) - 1. [sleep](kernel_module/sleep.c) - 1. [kthread](kernel_module/kthread.c) - 1. [kthreads](kernel_module/kthreads.c) - 1. [schedule](kernel_module/schedule.c) - 1. [timer](kernel_module/timer.c) - 1. [work_from_work](kernel_module/work_from_work.c) - 1. [irq](kernel_module/irq.c) - 1. Module dependencies - 1. [dep.c](kernel_module/dep.c) - 1. [dep2.c](kernel_module/dep2.c) - 1. [character_device](kernel_module/character_device.c) + 1. [QEMU Buildroot](kernel_module/) diff --git a/bibliography.md b/bibliography.md index a6e64f2..6173bb6 100644 --- a/bibliography.md +++ b/bibliography.md @@ -1,7 +1,14 @@ # Bibliography -- the best, but always outdated, book. Updated source: +Runnable stuff: + +- the best book, but outdated. Updated source: But examples non-minimal and take too much brain power to understand. +- manual build process without Buildroot, very few and simple kernel modules +- Buildroot based, no kernel modules? - + +Theory: + - - - you will fall here a lot when the hard Google queries start popping diff --git a/kernel_module/README.md b/kernel_module/README.md new file mode 100644 index 0000000..459f408 --- /dev/null +++ b/kernel_module/README.md @@ -0,0 +1,28 @@ +# Kernel module + +1. Modules + 1. [module_init](kernel_module/module_init.c) + 1. Debugging + 1. [hello](kernel_module/hello.c) + 1. [hello2](kernel_module/hello2.c) + 1. [debugfs](kernel_module/debugfs.c) + 1. [panic](kernel_module/panic.c) + 1. [params](kernel_module/params.c) + 1. [fops](kernel_module/fops.c) + 1. [ioctl](kernel_module/ioctl.c) + 1. [poll](kernel_module/poll.c) + 1. [anonymous_inode](kernel_module/anonymous_inode.c) + 1. Asynchronous + 1. [workqueue](kernel_module/workqueue.c) + 1. [sleep](kernel_module/sleep.c) + 1. [kthread](kernel_module/kthread.c) + 1. [kthreads](kernel_module/kthreads.c) + 1. [schedule](kernel_module/schedule.c) + 1. [timer](kernel_module/timer.c) + 1. [work_from_work](kernel_module/work_from_work.c) + 1. [irq](kernel_module/irq.c) + 1. Module dependencies + 1. [dep.c](kernel_module/dep.c) + 1. [dep2.c](kernel_module/dep2.c) + 1. [character_device](kernel_module/character_device.c) +1. [user](user/) diff --git a/kernel_module/fops.c b/kernel_module/fops.c index c309df9..beb01d9 100644 --- a/kernel_module/fops.c +++ b/kernel_module/fops.c @@ -33,7 +33,7 @@ static char data[] = {'a', 'b', 'c', 'd'}; static int open(struct inode *inode, struct file *filp) { - printk(KERN_INFO "open\n"); + pr_info("open\n"); return 0; } @@ -45,9 +45,10 @@ static int open(struct inode *inode, struct file *filp) static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off) { ssize_t ret; - printk(KERN_INFO "read\n"); - printk(KERN_INFO "len = %zu\n", len); - printk(KERN_INFO "off = %lld\n", (long long)*off); + + pr_info("read\n"); + pr_info("len = %zu\n", len); + pr_info("off = %lld\n", (long long)*off); if (sizeof(data) <= *off) { ret = 0; } else { @@ -58,8 +59,8 @@ static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off *off += ret; } } - printk(KERN_INFO "buf = %.*s\n", (int)len, buf); - printk(KERN_INFO "ret = %lld\n", (long long)ret); + pr_info("buf = %.*s\n", (int)len, buf); + pr_info("ret = %lld\n", (long long)ret); return ret; } @@ -70,9 +71,10 @@ static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off static ssize_t write(struct file *filp, const char __user *buf, size_t len, loff_t *off) { ssize_t ret; - printk(KERN_INFO "write\n"); - printk(KERN_INFO "len = %zu\n", len); - printk(KERN_INFO "off = %lld\n", (long long)*off); + + pr_info("write\n"); + pr_info("len = %zu\n", len); + pr_info("off = %lld\n", (long long)*off); if (sizeof(data) <= *off) { ret = 0; } else { @@ -83,12 +85,12 @@ static ssize_t write(struct file *filp, const char __user *buf, size_t len, loff ret = -EFAULT; } else { ret = len; - printk(KERN_INFO "buf = %.*s\n", (int)len, data + *off); + pr_info("buf = %.*s\n", (int)len, data + *off); *off += ret; } } } - printk(KERN_INFO "ret = %lld\n", (long long)ret); + pr_info("ret = %lld\n", (long long)ret); return ret; } @@ -98,17 +100,17 @@ http://stackoverflow.com/questions/11393674/why-is-the-close-function-is-called- */ static int release(struct inode *inode, struct file *filp) { - printk(KERN_INFO "release\n"); + pr_info("release\n"); return 0; } static loff_t llseek(struct file *filp, loff_t off, int whence) { loff_t newpos; - printk(KERN_INFO "llseek\n"); - printk(KERN_INFO "off = %lld\n", (long long)off); - printk(KERN_INFO "whence = %lld\n", (long long)whence); + pr_info("llseek\n"); + pr_info("off = %lld\n", (long long)off); + pr_info("whence = %lld\n", (long long)whence); switch(whence) { case SEEK_SET: newpos = off; @@ -124,7 +126,7 @@ static loff_t llseek(struct file *filp, loff_t off, int whence) } if (newpos < 0) return -EINVAL; filp->f_pos = newpos; - printk(KERN_INFO "newpos = %lld\n", (long long)newpos); + pr_info("newpos = %lld\n", (long long)newpos); return newpos; } diff --git a/kernel_module/pci.c b/kernel_module/pci.c index ad75786..22ec432 100644 --- a/kernel_module/pci.c +++ b/kernel_module/pci.c @@ -1,266 +1,211 @@ /* -http://www.zarb.org/~trem/kernel/pci/pci-driver.c -http://nairobi-embedded.org/linux_pci_device_driver.html +Like every other hardware, we could interact with PCI on x86 +using only IO instructions and memory operations. + +But PCI is a complex communication protocol that the Linux kernel +implements beautifully for us, so let's use the kernel API. + +This example relies on the QEMU "edu" educational device. +Grep QEMU source for the device description, and keep it open at all times! + +- http://www.zarb.org/~trem/kernel/pci/pci-driver.c inb outb runnable example (no device) +- LDD3 PCI chapter +- another QEMU device + module, but using a custom QEMU device: + - https://github.com/levex/kernel-qemu-pci/blob/31fc9355161b87cea8946b49857447ddd34c7aa6/module/levpci.c + - https://github.com/levex/kernel-qemu-pci/blob/31fc9355161b87cea8946b49857447ddd34c7aa6/qemu/hw/char/lev-pci.c +- https://is.muni.cz/el/1433/podzim2016/PB173/um/65218991/ course given by the creator of the edu device. + In Czech, and only describes API +- http://nairobi-embedded.org/linux_pci_device_driver.html */ -#include +#include /* put_user */ +#include /* cdev_ */ +#include #include +#include #include #include -#include -#include /* cdev_ */ -#include /* put_user */ -#define MAX_DEVICE 1 -#define DEVICE_NAME "virtual_pci" -#define BAR_IO 0 -#define BAR_MEM 3 +/* Each PCI device has 6 BAR IOs (base address register) as per the PCI spec. + * + * Each BAR corresponds to an address range that can be used to communicate with the PCI. + * + * Eech BAR is of one of the two types: + * + * - IORESOURCE_IO: must be accessed with inX and outX + * - IORESOURCE_MEM: must be accessed with ioreadX and iowriteX + * This is the saner method apparently, and what the edu device uses. + * + * The length of each region is defined BY THE HARDWARE, and communicated to software + * via the configuration registers. + * + * The Linux kernel automatically parses the 64 bytes of standardized configuration registers for us. + * + * QEMU devices register those regions with: + * + * memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu, + * "edu-mmio", 1 << 20); + * pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &edu->mmio); + * */ +#define BAR 0 +#define CDEV_NAME "lkmc_pci" MODULE_LICENSE("GPL"); +/** + * 0x1234: QEMU vendor ID + * 0x11e8: edu device ID + */ static struct pci_device_id pci_ids[] = { { PCI_DEVICE(0x1234, 0x11e8), }, { 0, } }; - MODULE_DEVICE_TABLE(pci, pci_ids); -static dev_t devno; static int major; +static struct pci_dev *pdev; +static void __iomem *mmio; -struct pci_cdev { - int minor; - struct pci_dev *pci_dev; - struct cdev *cdev; -}; - -static struct pci_cdev pci_cdev[MAX_DEVICE]; - -static void pci_cdev_del(struct pci_cdev pci_cdev[], int size, struct pci_dev *pdev) +static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off) { - int i; + ssize_t ret; + u32 kbuf; - for (i=0; if_pos = off; + return off; } -/** - * -1 => not found - * others => found - */ -static int pci_cdev_search_minor(struct pci_cdev pci_cdev[], - int size, struct pci_dev *pdev) -{ - int i, minor = -1; - - for (i=0; iprivate_data = (void *)pci_cdev_search_pci_dev(pci_cdev, MAX_DEVICE, minor); - return 0; -} - -static ssize_t pci_read(struct file *file, - char *buffer, - size_t length, - loff_t * offset) -{ - int byte_read = 0; - unsigned char value; - struct pci_dev *pdev = (struct pci_dev *)file->private_data; - unsigned long pci_io_addr = 0; - - pci_io_addr = pci_resource_start(pdev,BAR_IO); - while (byte_read < length) { - value = inb(pci_io_addr + 1); - put_user(value, &buffer[byte_read]); - byte_read++; - } - return byte_read; -} - -static ssize_t pci_write(struct file *filp, const char *buffer, size_t len, loff_t * off) { - int i; - unsigned char value; - struct pci_dev *pdev = (struct pci_dev *)filp->private_data; - unsigned long pci_io_addr = 0; - - pci_io_addr = pci_resource_start(pdev,BAR_IO); - for (i=0; i this driver doesn't handle this device - * 1 => this driver handles this device + * Called just after insmod if the hardware device is connected, + * not called otherwise. + * + * 0: all good + * 1: failed */ static int pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { - int i, ret, minor; - struct cdev *cdev; - dev_t devno; - pr_info("pci_probe\n"); + major = register_chrdev(0, CDEV_NAME, &fops); + pdev = dev; + if (pci_enable_device(dev) < 0) { + dev_err(&(pdev->dev), "pci_enable_device\n"); + goto error; + } + if (pci_request_region(dev, BAR, "myregion0")) { + dev_err(&(pdev->dev), "pci_request_region\n"); + goto error; + } + mmio = pci_iomap(pdev, BAR, pci_resource_len(pdev, BAR)); - minor = -1; - for (i=0; idev), "pci_resource_flags\n"); + goto error; + } + + /* 1Mb, as defined by the "1 << 20" in QEMU's memory_region_init_io. Same as pci_resource_len. */ + resource_size_t start = pci_resource_start(pdev, BAR); + resource_size_t end = pci_resource_end(pdev, BAR); + pr_info("length %llx\n", (unsigned long long)(end + 1 - start)); + + /* The PCI standardized 64 bytes of the configuration space, see LDD3. */ + for (i = 0; i < 64u; ++i) { + pci_read_config_byte(pdev, i, &val); + pr_info("config %x %x\n", i, val); + } + + /* Initial value of the IO memory. */ + for (i = 0; i < 0x28; i += 4) { + pr_info("io %x %x\n", i, ioread32((void*)(mmio + i))); } } - if (minor < 0) { - dev_info(&(dev->dev), "error pci_cdev_add"); - goto error; - } - - devno = MKDEV(major, minor); - cdev = cdev_alloc(); - cdev_init(cdev, &pci_ops); - cdev->owner = THIS_MODULE; - - /* register cdev */ - ret = cdev_add(cdev, devno, 1); - if (ret < 0) { - dev_err(&(dev->dev), "Can't register character device\n"); - goto error; - } - pci_cdev[minor].cdev = cdev; - - dev_info(&(dev->dev), "%s The major device number is %d (%d).\n", - "Registeration is a success", MAJOR(devno), MINOR(devno)); - dev_info(&(dev->dev), "If you want to talk to the device driver,\n"); - dev_info(&(dev->dev), "you'll have to create a device file. \n"); - dev_info(&(dev->dev), "We suggest you use:\n"); - dev_info(&(dev->dev), "mknod %s c %d %d\n", DEVICE_NAME, MAJOR(devno), MINOR(devno)); - dev_info(&(dev->dev), "The device file name is important, because\n"); - dev_info(&(dev->dev), "the ioctl program assumes that's the\n"); - dev_info(&(dev->dev), "file you'll use.\n"); - - /* enable the device */ - pci_enable_device(dev); - - /* 'alloc' IO to talk with the card */ - if (pci_request_region(dev, BAR_IO, "IO-pci")) { - dev_err(&(dev->dev), "Can't request BAR0\n"); - cdev_del(cdev); - goto error; - } - - /* TODO */ - /* check that BAR_IO is *really* IO region */ - /*if ((pci_resource_flags(dev, BAR_IO) & IORESOURCE_IO) != IORESOURCE_IO) {*/ - /*dev_err(&(dev->dev), "BAR2 isn't an IO region\n");*/ - /*cdev_del(cdev);*/ - /*goto error;*/ - /*}*/ return 0; - error: return 1; } static void pci_remove(struct pci_dev *dev) { - int minor; - struct cdev *cdev; - - minor = pci_cdev_search_minor(pci_cdev, MAX_DEVICE, dev); - cdev = pci_cdev_search_cdev(pci_cdev, MAX_DEVICE, minor); - if (cdev != NULL) - cdev_del(cdev); - pci_cdev_del(pci_cdev, MAX_DEVICE, dev); - pci_release_region(dev, BAR_IO); + pr_info("pci_remove\n"); + pci_release_region(dev, BAR); + unregister_chrdev(major, CDEV_NAME); } static struct pci_driver pci_driver = { - .name = "pci", + .name = "lkmc_pci", .id_table = pci_ids, .probe = pci_probe, .remove = pci_remove, }; -static int __init pci_init_module(void) +static int myinit(void) { - int i, first_minor, ret; - - ret = alloc_chrdev_region(&devno, 0, MAX_DEVICE, "lkmc_pci"); - major = MAJOR(devno); - first_minor = MINOR(devno); - for (i=0; i < MAX_DEVICE; i++) { - pci_cdev[i].minor = first_minor++; - pci_cdev[i].pci_dev = NULL; - pci_cdev[i].cdev = NULL; + if (pci_register_driver(&pci_driver) < 0) { + return 1; } - ret = pci_register_driver(&pci_driver); return 0; } -static void pci_exit_module(void) +static void myexit(void) { - int i; - pci_unregister_driver(&pci_driver); - for(i=0; i< MAX_DEVICE; i++) { - if (pci_cdev[i].pci_dev != NULL) { - cdev_del(pci_cdev[i].cdev); - } - } - unregister_chrdev_region(devno, MAX_DEVICE); } -module_init(pci_init_module); -module_exit(pci_exit_module); +module_init(myinit); +module_exit(myexit); diff --git a/rootfs_overlay/README.md b/rootfs_overlay/README.md new file mode 100644 index 0000000..1bdc972 --- /dev/null +++ b/rootfs_overlay/README.md @@ -0,0 +1,8 @@ +# rootfs_overlay + +This directory copied into the target filesystem. + +We use it to for things like: + +- customized configuration files +- userland module test scripts diff --git a/rootfs_overlay/character_device.sh b/rootfs_overlay/character_device.sh index 8205855..cb693c9 100755 --- a/rootfs_overlay/character_device.sh +++ b/rootfs_overlay/character_device.sh @@ -1,7 +1,7 @@ #!/bin/sh insmod /character_device.ko -major="$(grep lkmc_character_device /proc/devices | cut -d ' ' -f 1)" -mknod /character_device.dev c $major 0 -cat /character_device.dev +/mknoddev.sh character_device +cat /dev/lkmc_character_device # => abcd +rm /dev/lkmc_character_device rmmod character_device diff --git a/rootfs_overlay/mknoddev.sh b/rootfs_overlay/mknoddev.sh new file mode 100755 index 0000000..65d3899 --- /dev/null +++ b/rootfs_overlay/mknoddev.sh @@ -0,0 +1,4 @@ +#!/bin/sh +dev="lkmc_$1" +major="$(grep "$dev" /proc/devices | cut -d ' ' -f 1)" +mknod "/dev/$dev" c "$major" 0 diff --git a/rootfs_overlay/pci.sh b/rootfs_overlay/pci.sh new file mode 100755 index 0000000..c61961c --- /dev/null +++ b/rootfs_overlay/pci.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +set -ex + +# Setup. +insmod /pci.ko +/mknoddev.sh pci + +# Identifiction. +dd bs=4 status=none if=/dev/lkmc_pci count=1 skip=0 | od -An -t x1 +# => 010000ed + +# Negator. +dd bs=4 status=none if=/dev/lkmc_pci count=1 skip=1 | od -An -t x1 +printf '\xF0\xF0\xF0\xF0' | dd bs=4 status=none of=/dev/lkmc_pci count=1 seek=1 +dd bs=4 status=none if=/dev/lkmc_pci count=1 skip=1 | od -An -t x1 +# => 0F0F0F0F + +# Factorial calculator. +# factorial(0xC) = 0x1c8cfc00 +printf '\x00\x00\x00\x0C' | dd bs=4 status=none of=/dev/lkmc_pci count=1 seek=2 +printf '\x00\x00\x00\x00' | dd bs=4 status=none of=/dev/lkmc_pci count=1 seek=8 +sleep 1 +dd bs=4 status=none if=/dev/lkmc_pci count=1 skip=2 | od -An -t x1 +dd bs=4 status=none if=/dev/lkmc_pci count=1 skip=8 | od -An -t x1 +# => 1c8cfc00 + +# Teardown. +rm /dev/lkmc_pci +rmmod pci