From 0111ca406bdfa6fd65a2605d353583b4c4051781 Mon Sep 17 00:00:00 2001 From: Ciro Santilli Date: Mon, 16 Apr 2018 13:09:30 +0100 Subject: [PATCH] CONFIG_PROC_EVENTS --- README.adoc | 48 +++++++-- kernel_config_fragment | 4 + kernel_module/user/proc_events.c | 180 +++++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+), 6 deletions(-) create mode 100644 kernel_module/user/proc_events.c diff --git a/README.adoc b/README.adoc index e189926..3c57870 100644 --- a/README.adoc +++ b/README.adoc @@ -2357,8 +2357,7 @@ This-did not work on `arm` due to <> so we nee * <> * <> post-mortem method -[[dump_stack]] -==== dump_stack kernel module +==== dump_stack The `dump_stack` function produces a stack trace much like panic and oops, but causes no problems and we return to the normal control flow, and can cleanly remove the module afterwards: @@ -2366,7 +2365,7 @@ The `dump_stack` function produces a stack trace much like panic and oops, but c insmod /dump_stack.ko .... -==== warn_on kernel module +==== WARN_ON The `WARN_ON` macro basically just calls <>. @@ -2404,7 +2403,44 @@ TODO: font and keymap. Mentioned at: https://cmcenroe.me/2017/05/05/linux-consol * https://unix.stackexchange.com/questions/177024/remap-keyboard-on-the-linux-console * https://superuser.com/questions/194202/remapping-keys-system-wide-in-linux-not-just-in-x -=== ftrace +=== Linux kernel tracing + +==== CONFIG_PROC_EVENTS + +Logs proc events such as process creation to a link:https://en.wikipedia.org/wiki/Netlink[netlink socket]. + +We then have a userland program that listens to the events and prints them out: + +.... +# /proc_events.out & +# set mcast listen ok +# sleep 2 & sleep 1 +fork: parent tid=48 pid=48 -> child tid=79 pid=79 +fork: parent tid=48 pid=48 -> child tid=80 pid=80 +exec: tid=80 pid=80 +exec: tid=79 pid=79 +# exit: tid=80 pid=80 exit_code=0 +exit: tid=79 pid=79 exit_code=0 +echo a +a +# +.... + +TODO: why `exit: tid=79` shows after `exit: tid=80`? + +Note how `echo a` is a Bash built-in, and therefore does not spawn a new process. + +TODO: why does this produce no output? + +.... +/proc_events.out >f & +.... + +* https://stackoverflow.com/questions/6075013/detect-launching-of-programs-on-linux-platform/8255487#8255487 +* https://serverfault.com/questions/199654/does-anyone-know-a-simple-way-to-monitor-root-process-spawn +* https://unix.stackexchange.com/questions/260162/how-to-track-newly-created-processes + +==== ftrace Trace a single function: @@ -2499,7 +2535,7 @@ TODO: what do `+` and `!` mean? Each `enable` under the `events/` tree enables a certain set of functions, the higher the `enable` more functions are enabled. -=== Count boot instructions +==== Count boot instructions * https://www.quora.com/How-many-instructions-does-a-typical-Linux-kernel-boot-take * https://github.com/cirosantilli/chat/issues/31 @@ -4290,7 +4326,7 @@ See also: * https://en.wikipedia.org/wiki/Time_Stamp_Counter * https://stackoverflow.com/questions/9887839/clock-cycle-count-wth-gcc/9887979 -===== pmccntr kernel module +===== pmccntr Unfortunately-we didn't manage to find an ARM analogue: link:kernel_module/pmccntr.c[] is oopsing, and even it if weren't, it likely won't give the cycle count since boot since it needs to be activate before it starts counting anything: diff --git a/kernel_config_fragment b/kernel_config_fragment index 285604d..f79ea49 100644 --- a/kernel_config_fragment +++ b/kernel_config_fragment @@ -61,6 +61,10 @@ CONFIG_SCHED_TRACER=y CONFIG_STACK_TRACER=y CONFIG_TRACER_SNAPSHOT=y +# Process tracing. +CONFIG_CONNECTOR=y +CONFIG_PROC_EVENTS=y + # 9P CONFIG_9P_FS=y CONFIG_9P_FS_POSIX_ACL=y diff --git a/kernel_module/user/proc_events.c b/kernel_module/user/proc_events.c new file mode 100644 index 0000000..8cc05d9 --- /dev/null +++ b/kernel_module/user/proc_events.c @@ -0,0 +1,180 @@ +/* +This file is licensed under the GPL v2 (http://www.gnu.org/licenses/gpl2.txt) (some parts was originally borrowed from proc events example) + +https://stackoverflow.com/questions/6075013/detect-launching-of-programs-on-linux-platform/8255487#8255487 +*/ + +#define _XOPEN_SOURCE 700 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static volatile bool need_exit = false; + +/* +* connect to netlink +* returns netlink socket, or -1 on error +*/ +static int nl_connect() +{ + int rc; + int nl_sock; + struct sockaddr_nl sa_nl; + + nl_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); + if (nl_sock == -1) { + perror("socket"); + return -1; + } + sa_nl.nl_family = AF_NETLINK; + sa_nl.nl_groups = CN_IDX_PROC; + sa_nl.nl_pid = getpid(); + rc = bind(nl_sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl)); + if (rc == -1) { + perror("bind"); + close(nl_sock); + return -1; + } + return nl_sock; +} + +/* +* subscribe on proc events (process notifications) +*/ +static int set_proc_ev_listen(int nl_sock, bool enable) +{ + int rc; + struct __attribute__ ((aligned(NLMSG_ALIGNTO))) { + struct nlmsghdr nl_hdr; + struct __attribute__ ((__packed__)) { + struct cn_msg cn_msg; + enum proc_cn_mcast_op cn_mcast; + }; + } nlcn_msg; + + memset(&nlcn_msg, 0, sizeof(nlcn_msg)); + nlcn_msg.nl_hdr.nlmsg_len = sizeof(nlcn_msg); + nlcn_msg.nl_hdr.nlmsg_pid = getpid(); + nlcn_msg.nl_hdr.nlmsg_type = NLMSG_DONE; + + nlcn_msg.cn_msg.id.idx = CN_IDX_PROC; + nlcn_msg.cn_msg.id.val = CN_VAL_PROC; + nlcn_msg.cn_msg.len = sizeof(enum proc_cn_mcast_op); + + nlcn_msg.cn_mcast = enable ? PROC_CN_MCAST_LISTEN : PROC_CN_MCAST_IGNORE; + + rc = send(nl_sock, &nlcn_msg, sizeof(nlcn_msg), 0); + if (rc == -1) { + perror("netlink send"); + return -1; + } + + return 0; +} + +/* +* handle a single process event +*/ +static int handle_proc_ev(int nl_sock) +{ + int rc; + struct __attribute__ ((aligned(NLMSG_ALIGNTO))) { + struct nlmsghdr nl_hdr; + struct __attribute__ ((__packed__)) { + struct cn_msg cn_msg; + struct proc_event proc_ev; + }; + } nlcn_msg; + while (!need_exit) { + rc = recv(nl_sock, &nlcn_msg, sizeof(nlcn_msg), 0); + if (rc == 0) { + /* shutdown? */ + return 0; + } else if (rc == -1) { + if (errno == EINTR) continue; + perror("netlink recv"); + return -1; + } + switch (nlcn_msg.proc_ev.what) { + case PROC_EVENT_NONE: + printf("set mcast listen ok\n"); + break; + case PROC_EVENT_FORK: + printf("fork: parent tid=%d pid=%d -> child tid=%d pid=%d\n", + nlcn_msg.proc_ev.event_data.fork.parent_pid, + nlcn_msg.proc_ev.event_data.fork.parent_tgid, + nlcn_msg.proc_ev.event_data.fork.child_pid, + nlcn_msg.proc_ev.event_data.fork.child_tgid); + break; + case PROC_EVENT_EXEC: + printf("exec: tid=%d pid=%d\n", + nlcn_msg.proc_ev.event_data.exec.process_pid, + nlcn_msg.proc_ev.event_data.exec.process_tgid); + break; + case PROC_EVENT_UID: + printf("uid change: tid=%d pid=%d from %d to %d\n", + nlcn_msg.proc_ev.event_data.id.process_pid, + nlcn_msg.proc_ev.event_data.id.process_tgid, + nlcn_msg.proc_ev.event_data.id.r.ruid, + nlcn_msg.proc_ev.event_data.id.e.euid); + break; + case PROC_EVENT_GID: + printf("gid change: tid=%d pid=%d from %d to %d\n", + nlcn_msg.proc_ev.event_data.id.process_pid, + nlcn_msg.proc_ev.event_data.id.process_tgid, + nlcn_msg.proc_ev.event_data.id.r.rgid, + nlcn_msg.proc_ev.event_data.id.e.egid); + break; + case PROC_EVENT_EXIT: + printf("exit: tid=%d pid=%d exit_code=%d\n", + nlcn_msg.proc_ev.event_data.exit.process_pid, + nlcn_msg.proc_ev.event_data.exit.process_tgid, + nlcn_msg.proc_ev.event_data.exit.exit_code); + break; + default: + printf("unhandled proc event\n"); + break; + } + } + + return 0; +} + +static void on_sigint(__attribute__ ((unused)) int unused) +{ + need_exit = true; +} + +int main() +{ + int nl_sock; + int rc = EXIT_SUCCESS; + + signal(SIGINT, &on_sigint); + siginterrupt(SIGINT, true); + nl_sock = nl_connect(); + if (nl_sock == -1) + exit(EXIT_FAILURE); + rc = set_proc_ev_listen(nl_sock, true); + if (rc == -1) { + rc = EXIT_FAILURE; + goto out; + } + rc = handle_proc_ev(nl_sock); + if (rc == -1) { + rc = EXIT_FAILURE; + goto out; + } + set_proc_ev_listen(nl_sock, false); +out: + close(nl_sock); + exit(rc); +}