How often do processes wake up due to timeouts?
Problem
(Transcribed from http://dev.laptop.org/ticket/110#comment:4)
Based on http://blogs.gnome.org/view/ryanl/2006/09/17/0, this SystemTap script aims to reveal how often processes wake up as a result of setting timers for themselves. The intention is to make it extremely easy to spot poorly behaved applications.
Scripts
# pstimeouts
# Copyright (C) 2006 Red Hat, Inc., Eugene Teo <eteo@redhat.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
#
# Based on http://blogs.gnome.org/view/ryanl/2006/09/17/0
#
# Tested on:
# [eteo@kerndev ~]$ date
# Mon Oct 30 21:12:40 SGT 2006
# [eteo@kerndev ~]$ uname -a
# Linux kerndev.sin.redhat.com 2.6.18-1.2798.fc6 #1 SMP Mon Oct 16 14:37:32 EDT 2006 i686 i686 i386 GNU/Linux
#
uid=`id -u`; alluid=0; detail=0
while getopts ":auh" options; do
case $options in
a) alluid=1;;
u) detail=1;;
h|?) cat >&2 <<-EOF
pstimeouts [-au] [pids..]
-a show processes belonging to all users
-u detailed listing
EOF
exit 1;;
esac
done
/usr/bin/env stap -g -e '
global alluid, detail, uid
global process, trace_poll, trace_select, trace_epoll, total_timeouts
global poll_timeouts, epoll_timeouts, select_timeouts, itimer_timeouts, schedule_timeouts
/* FIXME: remove this if it is already committed into CVS */
function kernel_long:long(addr:long) %{ /* pure */
THIS->__retvalue = deref (sizeof(long), (uintptr_t)(THIS->addr));
goto success;
deref_fault: /* branched to from deref() */
{
static char errmsg[40];
snprintf (errmsg, 40, "kernel long copy fault at 0x%p",
(void *) (uintptr_t) THIS->addr);
CONTEXT->last_error = errmsg;
}
success: ;
%}
probe begin {
alluid = '$alluid'
detail = '$detail'
uid = '$uid'
}
probe kernel.statement("*@kernel/fork.c:175") {
p = pid()
poll_timeouts[p] = epoll_timeouts[p] = select_timeouts[p] = itimer_timeouts[p] = schedule_timeouts[p] = 0
}
probe kernel.function("do_sys_poll") {
trace_poll[pid()] = 1
}
probe kernel.function("do_sys_poll").return {
trace_poll[pid()] = 0
}
/* do_sys_poll@fs/select.c */
/* Could not access do_poll directly because of a gcc bug:
http://sources.redhat.com/bugzilla/show_bug.cgi?id=1155 */
probe kernel.statement("*@fs/select.c:717") {
/* assume !*timeout if count == 0,
see fs/select.c:622 */
if ($fdcount == 0)
poll_timeouts[pid()]++
}
probe kernel.function("do_select") {
trace_select[pid()] = 1
}
probe kernel.function("do_select").return {
trace_select[pid()] = 0
}
/* do_select@fs/select.c */
probe kernel.statement("*@fs/select.c:289") {
if (!kernel_long($timeout))
select_timeouts[pid()]++
}
/* ep_poll@fs/eventpoll.c */
probe kernel.function("*@fs/eventpoll.c:1495") {
trace_epoll[pid()] = 1
}
/* it_real_fn@kernel/itimer.c */
probe kernel.function("it_real_fn") {
itimer_timeouts[pid()]++
}
/* schedule_timeout@kernel/timer.c */
probe kernel.function("schedule_timeout").return {
p = pid()
process[p, uid()] = execname()
if ($return <= 0)
schedule_timeouts[p]++
if (trace_poll[p] && !$return)
schedule_timeouts[p]--
else if (trace_select[p] && !$return)
schedule_timeouts[p]--
else if (trace_epoll[p] && !$return) {
epoll_timeouts[p]++
schedule_timeouts[p]--
trace_epoll[p] = 0
}
}
probe kernel.function("do_exit") {
p = pid() u = uid()
if (process[p, u] == execname())
delete process[p, u]
}
probe timer.ms(1000) {
printf("\033[2J\033[1;1H") /* clear screen */
if (detail) {
printf (" uid pid | poll select epoll itimer other| process\n");
foreach ([p+, u] in process) {
if (!alluid && u != uid) continue
printf ("%5d %7d |%8d %8d %8d %8d %8d| %-.38s\n", u, p,
poll_timeouts[p], select_timeouts[p],
epoll_timeouts[p], itimer_timeouts[p],
schedule_timeouts[p], process[p, u])
}
} else {
printf (" pid timeouts process\n");
foreach ([p+, u] in process) {
if (!alluid && u != uid) continue
total_timeouts = poll_timeouts[p] + select_timeouts[p] +
epoll_timeouts[p] + itimer_timeouts[p] + schedule_timeouts[p]
printf ("%5d %8d %-.38s\n", p, total_timeouts, process[p, u])
}
}
}
'
Output
[eteo@kerndev ~]$ ./pstimeouts -h
pstimeouts [-au] [pids..]
-a show processes belonging to all users
-u detailed listing
[eteo@kerndev ~]$ ./pstimeouts
pid timeouts process
3485 20 gnome-power-man
3530 16 scim-panel-gtk
3532 42 scim-launcher
...
[eteo@kerndev ~]$ ./pstimeouts -a
pid timeouts process
2148 0 syslogd
2311 4 pcscd
2342 1 automount
...
[eteo@kerndev ~]$ ./pstimeouts -u
uid pid | poll select epoll itimer other| process
500 3463 | 116 0 0 0 0| gnome-settings-
500 3467 | 0 0 0 0 0| scim-launcher
500 3482 | 220 0 0 2 0| metacity
...
[eteo@kerndev ~]$ ./pstimeouts -ua
uid pid | poll select epoll itimer other| process
0 2311 | 0 6 0 0 0| pcscd
0 2342 | 0 0 0 0 2| automount
38 2404 | 0 0 0 0 0| ntpd
...
Lessons
SystemTap makes it so much easier to gather statistics from the kernel and display it at the same time.
