diff --git a/README.md b/README.md
index db8c9d5..e3fe3be 100644
--- a/README.md
+++ b/README.md
@@ -318,28 +318,4 @@ But TODO I don't think you can see where you are in the kernel source code and l
1. [Bibliography](bibliography.md)
1. Examples
1. [Host](host/)
- 1. Buildroot
- 1. [module_init](kernel_module/module_init.c)
- 1. Debugging
- 1. [hello](kernel_module/hello.c)
- 1. [hello2](kernel_module/hello2.c)
- 1. [debugfs](kernel_module/debugfs.c)
- 1. [panic](kernel_module/panic.c)
- 1. [params](kernel_module/params.c)
- 1. [fops](kernel_module/fops.c)
- 1. [ioctl](kernel_module/ioctl.c)
- 1. [poll](kernel_module/poll.c)
- 1. [anonymous_inode](kernel_module/anonymous_inode.c)
- 1. Asynchronous
- 1. [workqueue](kernel_module/workqueue.c)
- 1. [sleep](kernel_module/sleep.c)
- 1. [kthread](kernel_module/kthread.c)
- 1. [kthreads](kernel_module/kthreads.c)
- 1. [schedule](kernel_module/schedule.c)
- 1. [timer](kernel_module/timer.c)
- 1. [work_from_work](kernel_module/work_from_work.c)
- 1. [irq](kernel_module/irq.c)
- 1. Module dependencies
- 1. [dep.c](kernel_module/dep.c)
- 1. [dep2.c](kernel_module/dep2.c)
- 1. [character_device](kernel_module/character_device.c)
+ 1. [QEMU Buildroot](kernel_module/)
diff --git a/bibliography.md b/bibliography.md
index a6e64f2..6173bb6 100644
--- a/bibliography.md
+++ b/bibliography.md
@@ -1,7 +1,14 @@
# Bibliography
-- the best, but always outdated, book. Updated source:
+Runnable stuff:
+
+- the best book, but outdated. Updated source: But examples non-minimal and take too much brain power to understand.
+- manual build process without Buildroot, very few and simple kernel modules
+- Buildroot based, no kernel modules?
-
+
+Theory:
+
-
-
- you will fall here a lot when the hard Google queries start popping
diff --git a/kernel_module/README.md b/kernel_module/README.md
new file mode 100644
index 0000000..459f408
--- /dev/null
+++ b/kernel_module/README.md
@@ -0,0 +1,28 @@
+# Kernel module
+
+1. Modules
+ 1. [module_init](kernel_module/module_init.c)
+ 1. Debugging
+ 1. [hello](kernel_module/hello.c)
+ 1. [hello2](kernel_module/hello2.c)
+ 1. [debugfs](kernel_module/debugfs.c)
+ 1. [panic](kernel_module/panic.c)
+ 1. [params](kernel_module/params.c)
+ 1. [fops](kernel_module/fops.c)
+ 1. [ioctl](kernel_module/ioctl.c)
+ 1. [poll](kernel_module/poll.c)
+ 1. [anonymous_inode](kernel_module/anonymous_inode.c)
+ 1. Asynchronous
+ 1. [workqueue](kernel_module/workqueue.c)
+ 1. [sleep](kernel_module/sleep.c)
+ 1. [kthread](kernel_module/kthread.c)
+ 1. [kthreads](kernel_module/kthreads.c)
+ 1. [schedule](kernel_module/schedule.c)
+ 1. [timer](kernel_module/timer.c)
+ 1. [work_from_work](kernel_module/work_from_work.c)
+ 1. [irq](kernel_module/irq.c)
+ 1. Module dependencies
+ 1. [dep.c](kernel_module/dep.c)
+ 1. [dep2.c](kernel_module/dep2.c)
+ 1. [character_device](kernel_module/character_device.c)
+1. [user](user/)
diff --git a/kernel_module/fops.c b/kernel_module/fops.c
index c309df9..beb01d9 100644
--- a/kernel_module/fops.c
+++ b/kernel_module/fops.c
@@ -33,7 +33,7 @@ static char data[] = {'a', 'b', 'c', 'd'};
static int open(struct inode *inode, struct file *filp)
{
- printk(KERN_INFO "open\n");
+ pr_info("open\n");
return 0;
}
@@ -45,9 +45,10 @@ static int open(struct inode *inode, struct file *filp)
static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off)
{
ssize_t ret;
- printk(KERN_INFO "read\n");
- printk(KERN_INFO "len = %zu\n", len);
- printk(KERN_INFO "off = %lld\n", (long long)*off);
+
+ pr_info("read\n");
+ pr_info("len = %zu\n", len);
+ pr_info("off = %lld\n", (long long)*off);
if (sizeof(data) <= *off) {
ret = 0;
} else {
@@ -58,8 +59,8 @@ static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off
*off += ret;
}
}
- printk(KERN_INFO "buf = %.*s\n", (int)len, buf);
- printk(KERN_INFO "ret = %lld\n", (long long)ret);
+ pr_info("buf = %.*s\n", (int)len, buf);
+ pr_info("ret = %lld\n", (long long)ret);
return ret;
}
@@ -70,9 +71,10 @@ static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off
static ssize_t write(struct file *filp, const char __user *buf, size_t len, loff_t *off)
{
ssize_t ret;
- printk(KERN_INFO "write\n");
- printk(KERN_INFO "len = %zu\n", len);
- printk(KERN_INFO "off = %lld\n", (long long)*off);
+
+ pr_info("write\n");
+ pr_info("len = %zu\n", len);
+ pr_info("off = %lld\n", (long long)*off);
if (sizeof(data) <= *off) {
ret = 0;
} else {
@@ -83,12 +85,12 @@ static ssize_t write(struct file *filp, const char __user *buf, size_t len, loff
ret = -EFAULT;
} else {
ret = len;
- printk(KERN_INFO "buf = %.*s\n", (int)len, data + *off);
+ pr_info("buf = %.*s\n", (int)len, data + *off);
*off += ret;
}
}
}
- printk(KERN_INFO "ret = %lld\n", (long long)ret);
+ pr_info("ret = %lld\n", (long long)ret);
return ret;
}
@@ -98,17 +100,17 @@ http://stackoverflow.com/questions/11393674/why-is-the-close-function-is-called-
*/
static int release(struct inode *inode, struct file *filp)
{
- printk(KERN_INFO "release\n");
+ pr_info("release\n");
return 0;
}
static loff_t llseek(struct file *filp, loff_t off, int whence)
{
loff_t newpos;
- printk(KERN_INFO "llseek\n");
- printk(KERN_INFO "off = %lld\n", (long long)off);
- printk(KERN_INFO "whence = %lld\n", (long long)whence);
+ pr_info("llseek\n");
+ pr_info("off = %lld\n", (long long)off);
+ pr_info("whence = %lld\n", (long long)whence);
switch(whence) {
case SEEK_SET:
newpos = off;
@@ -124,7 +126,7 @@ static loff_t llseek(struct file *filp, loff_t off, int whence)
}
if (newpos < 0) return -EINVAL;
filp->f_pos = newpos;
- printk(KERN_INFO "newpos = %lld\n", (long long)newpos);
+ pr_info("newpos = %lld\n", (long long)newpos);
return newpos;
}
diff --git a/kernel_module/pci.c b/kernel_module/pci.c
index ad75786..22ec432 100644
--- a/kernel_module/pci.c
+++ b/kernel_module/pci.c
@@ -1,266 +1,211 @@
/*
-http://www.zarb.org/~trem/kernel/pci/pci-driver.c
-http://nairobi-embedded.org/linux_pci_device_driver.html
+Like every other hardware, we could interact with PCI on x86
+using only IO instructions and memory operations.
+
+But PCI is a complex communication protocol that the Linux kernel
+implements beautifully for us, so let's use the kernel API.
+
+This example relies on the QEMU "edu" educational device.
+Grep QEMU source for the device description, and keep it open at all times!
+
+- http://www.zarb.org/~trem/kernel/pci/pci-driver.c inb outb runnable example (no device)
+- LDD3 PCI chapter
+- another QEMU device + module, but using a custom QEMU device:
+ - https://github.com/levex/kernel-qemu-pci/blob/31fc9355161b87cea8946b49857447ddd34c7aa6/module/levpci.c
+ - https://github.com/levex/kernel-qemu-pci/blob/31fc9355161b87cea8946b49857447ddd34c7aa6/qemu/hw/char/lev-pci.c
+- https://is.muni.cz/el/1433/podzim2016/PB173/um/65218991/ course given by the creator of the edu device.
+ In Czech, and only describes API
+- http://nairobi-embedded.org/linux_pci_device_driver.html
*/
-#include
+#include /* put_user */
+#include /* cdev_ */
+#include
#include
+#include
#include
#include
-#include
-#include /* cdev_ */
-#include /* put_user */
-#define MAX_DEVICE 1
-#define DEVICE_NAME "virtual_pci"
-#define BAR_IO 0
-#define BAR_MEM 3
+/* Each PCI device has 6 BAR IOs (base address register) as per the PCI spec.
+ *
+ * Each BAR corresponds to an address range that can be used to communicate with the PCI.
+ *
+ * Eech BAR is of one of the two types:
+ *
+ * - IORESOURCE_IO: must be accessed with inX and outX
+ * - IORESOURCE_MEM: must be accessed with ioreadX and iowriteX
+ * This is the saner method apparently, and what the edu device uses.
+ *
+ * The length of each region is defined BY THE HARDWARE, and communicated to software
+ * via the configuration registers.
+ *
+ * The Linux kernel automatically parses the 64 bytes of standardized configuration registers for us.
+ *
+ * QEMU devices register those regions with:
+ *
+ * memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu,
+ * "edu-mmio", 1 << 20);
+ * pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &edu->mmio);
+ * */
+#define BAR 0
+#define CDEV_NAME "lkmc_pci"
MODULE_LICENSE("GPL");
+/**
+ * 0x1234: QEMU vendor ID
+ * 0x11e8: edu device ID
+ */
static struct pci_device_id pci_ids[] = {
{ PCI_DEVICE(0x1234, 0x11e8), },
{ 0, }
};
-
MODULE_DEVICE_TABLE(pci, pci_ids);
-static dev_t devno;
static int major;
+static struct pci_dev *pdev;
+static void __iomem *mmio;
-struct pci_cdev {
- int minor;
- struct pci_dev *pci_dev;
- struct cdev *cdev;
-};
-
-static struct pci_cdev pci_cdev[MAX_DEVICE];
-
-static void pci_cdev_del(struct pci_cdev pci_cdev[], int size, struct pci_dev *pdev)
+static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off)
{
- int i;
+ ssize_t ret;
+ u32 kbuf;
- for (i=0; if_pos = off;
+ return off;
}
-/**
- * -1 => not found
- * others => found
- */
-static int pci_cdev_search_minor(struct pci_cdev pci_cdev[],
- int size, struct pci_dev *pdev)
-{
- int i, minor = -1;
-
- for (i=0; iprivate_data = (void *)pci_cdev_search_pci_dev(pci_cdev, MAX_DEVICE, minor);
- return 0;
-}
-
-static ssize_t pci_read(struct file *file,
- char *buffer,
- size_t length,
- loff_t * offset)
-{
- int byte_read = 0;
- unsigned char value;
- struct pci_dev *pdev = (struct pci_dev *)file->private_data;
- unsigned long pci_io_addr = 0;
-
- pci_io_addr = pci_resource_start(pdev,BAR_IO);
- while (byte_read < length) {
- value = inb(pci_io_addr + 1);
- put_user(value, &buffer[byte_read]);
- byte_read++;
- }
- return byte_read;
-}
-
-static ssize_t pci_write(struct file *filp, const char *buffer, size_t len, loff_t * off) {
- int i;
- unsigned char value;
- struct pci_dev *pdev = (struct pci_dev *)filp->private_data;
- unsigned long pci_io_addr = 0;
-
- pci_io_addr = pci_resource_start(pdev,BAR_IO);
- for (i=0; i this driver doesn't handle this device
- * 1 => this driver handles this device
+ * Called just after insmod if the hardware device is connected,
+ * not called otherwise.
+ *
+ * 0: all good
+ * 1: failed
*/
static int pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
- int i, ret, minor;
- struct cdev *cdev;
- dev_t devno;
-
pr_info("pci_probe\n");
+ major = register_chrdev(0, CDEV_NAME, &fops);
+ pdev = dev;
+ if (pci_enable_device(dev) < 0) {
+ dev_err(&(pdev->dev), "pci_enable_device\n");
+ goto error;
+ }
+ if (pci_request_region(dev, BAR, "myregion0")) {
+ dev_err(&(pdev->dev), "pci_request_region\n");
+ goto error;
+ }
+ mmio = pci_iomap(pdev, BAR, pci_resource_len(pdev, BAR));
- minor = -1;
- for (i=0; idev), "pci_resource_flags\n");
+ goto error;
+ }
+
+ /* 1Mb, as defined by the "1 << 20" in QEMU's memory_region_init_io. Same as pci_resource_len. */
+ resource_size_t start = pci_resource_start(pdev, BAR);
+ resource_size_t end = pci_resource_end(pdev, BAR);
+ pr_info("length %llx\n", (unsigned long long)(end + 1 - start));
+
+ /* The PCI standardized 64 bytes of the configuration space, see LDD3. */
+ for (i = 0; i < 64u; ++i) {
+ pci_read_config_byte(pdev, i, &val);
+ pr_info("config %x %x\n", i, val);
+ }
+
+ /* Initial value of the IO memory. */
+ for (i = 0; i < 0x28; i += 4) {
+ pr_info("io %x %x\n", i, ioread32((void*)(mmio + i)));
}
}
- if (minor < 0) {
- dev_info(&(dev->dev), "error pci_cdev_add");
- goto error;
- }
-
- devno = MKDEV(major, minor);
- cdev = cdev_alloc();
- cdev_init(cdev, &pci_ops);
- cdev->owner = THIS_MODULE;
-
- /* register cdev */
- ret = cdev_add(cdev, devno, 1);
- if (ret < 0) {
- dev_err(&(dev->dev), "Can't register character device\n");
- goto error;
- }
- pci_cdev[minor].cdev = cdev;
-
- dev_info(&(dev->dev), "%s The major device number is %d (%d).\n",
- "Registeration is a success", MAJOR(devno), MINOR(devno));
- dev_info(&(dev->dev), "If you want to talk to the device driver,\n");
- dev_info(&(dev->dev), "you'll have to create a device file. \n");
- dev_info(&(dev->dev), "We suggest you use:\n");
- dev_info(&(dev->dev), "mknod %s c %d %d\n", DEVICE_NAME, MAJOR(devno), MINOR(devno));
- dev_info(&(dev->dev), "The device file name is important, because\n");
- dev_info(&(dev->dev), "the ioctl program assumes that's the\n");
- dev_info(&(dev->dev), "file you'll use.\n");
-
- /* enable the device */
- pci_enable_device(dev);
-
- /* 'alloc' IO to talk with the card */
- if (pci_request_region(dev, BAR_IO, "IO-pci")) {
- dev_err(&(dev->dev), "Can't request BAR0\n");
- cdev_del(cdev);
- goto error;
- }
-
- /* TODO */
- /* check that BAR_IO is *really* IO region */
- /*if ((pci_resource_flags(dev, BAR_IO) & IORESOURCE_IO) != IORESOURCE_IO) {*/
- /*dev_err(&(dev->dev), "BAR2 isn't an IO region\n");*/
- /*cdev_del(cdev);*/
- /*goto error;*/
- /*}*/
return 0;
-
error:
return 1;
}
static void pci_remove(struct pci_dev *dev)
{
- int minor;
- struct cdev *cdev;
-
- minor = pci_cdev_search_minor(pci_cdev, MAX_DEVICE, dev);
- cdev = pci_cdev_search_cdev(pci_cdev, MAX_DEVICE, minor);
- if (cdev != NULL)
- cdev_del(cdev);
- pci_cdev_del(pci_cdev, MAX_DEVICE, dev);
- pci_release_region(dev, BAR_IO);
+ pr_info("pci_remove\n");
+ pci_release_region(dev, BAR);
+ unregister_chrdev(major, CDEV_NAME);
}
static struct pci_driver pci_driver = {
- .name = "pci",
+ .name = "lkmc_pci",
.id_table = pci_ids,
.probe = pci_probe,
.remove = pci_remove,
};
-static int __init pci_init_module(void)
+static int myinit(void)
{
- int i, first_minor, ret;
-
- ret = alloc_chrdev_region(&devno, 0, MAX_DEVICE, "lkmc_pci");
- major = MAJOR(devno);
- first_minor = MINOR(devno);
- for (i=0; i < MAX_DEVICE; i++) {
- pci_cdev[i].minor = first_minor++;
- pci_cdev[i].pci_dev = NULL;
- pci_cdev[i].cdev = NULL;
+ if (pci_register_driver(&pci_driver) < 0) {
+ return 1;
}
- ret = pci_register_driver(&pci_driver);
return 0;
}
-static void pci_exit_module(void)
+static void myexit(void)
{
- int i;
-
pci_unregister_driver(&pci_driver);
- for(i=0; i< MAX_DEVICE; i++) {
- if (pci_cdev[i].pci_dev != NULL) {
- cdev_del(pci_cdev[i].cdev);
- }
- }
- unregister_chrdev_region(devno, MAX_DEVICE);
}
-module_init(pci_init_module);
-module_exit(pci_exit_module);
+module_init(myinit);
+module_exit(myexit);
diff --git a/rootfs_overlay/README.md b/rootfs_overlay/README.md
new file mode 100644
index 0000000..1bdc972
--- /dev/null
+++ b/rootfs_overlay/README.md
@@ -0,0 +1,8 @@
+# rootfs_overlay
+
+This directory copied into the target filesystem.
+
+We use it to for things like:
+
+- customized configuration files
+- userland module test scripts
diff --git a/rootfs_overlay/character_device.sh b/rootfs_overlay/character_device.sh
index 8205855..cb693c9 100755
--- a/rootfs_overlay/character_device.sh
+++ b/rootfs_overlay/character_device.sh
@@ -1,7 +1,7 @@
#!/bin/sh
insmod /character_device.ko
-major="$(grep lkmc_character_device /proc/devices | cut -d ' ' -f 1)"
-mknod /character_device.dev c $major 0
-cat /character_device.dev
+/mknoddev.sh character_device
+cat /dev/lkmc_character_device
# => abcd
+rm /dev/lkmc_character_device
rmmod character_device
diff --git a/rootfs_overlay/mknoddev.sh b/rootfs_overlay/mknoddev.sh
new file mode 100755
index 0000000..65d3899
--- /dev/null
+++ b/rootfs_overlay/mknoddev.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+dev="lkmc_$1"
+major="$(grep "$dev" /proc/devices | cut -d ' ' -f 1)"
+mknod "/dev/$dev" c "$major" 0
diff --git a/rootfs_overlay/pci.sh b/rootfs_overlay/pci.sh
new file mode 100755
index 0000000..c61961c
--- /dev/null
+++ b/rootfs_overlay/pci.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+set -ex
+
+# Setup.
+insmod /pci.ko
+/mknoddev.sh pci
+
+# Identifiction.
+dd bs=4 status=none if=/dev/lkmc_pci count=1 skip=0 | od -An -t x1
+# => 010000ed
+
+# Negator.
+dd bs=4 status=none if=/dev/lkmc_pci count=1 skip=1 | od -An -t x1
+printf '\xF0\xF0\xF0\xF0' | dd bs=4 status=none of=/dev/lkmc_pci count=1 seek=1
+dd bs=4 status=none if=/dev/lkmc_pci count=1 skip=1 | od -An -t x1
+# => 0F0F0F0F
+
+# Factorial calculator.
+# factorial(0xC) = 0x1c8cfc00
+printf '\x00\x00\x00\x0C' | dd bs=4 status=none of=/dev/lkmc_pci count=1 seek=2
+printf '\x00\x00\x00\x00' | dd bs=4 status=none of=/dev/lkmc_pci count=1 seek=8
+sleep 1
+dd bs=4 status=none if=/dev/lkmc_pci count=1 skip=2 | od -An -t x1
+dd bs=4 status=none if=/dev/lkmc_pci count=1 skip=8 | od -An -t x1
+# => 1c8cfc00
+
+# Teardown.
+rm /dev/lkmc_pci
+rmmod pci