latencytop.c 7.88 KB
Newer Older
Arjan van de Ven's avatar
Arjan van de Ven committed
1 2 3 4 5 6 7 8 9 10 11
/*
 * latencytop.c: Latency display infrastructure
 *
 * (C) Copyright 2008 Intel Corporation
 * Author: Arjan van de Ven <arjan@linux.intel.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; version 2
 * of the License.
 */
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49

/*
 * CONFIG_LATENCYTOP enables a kernel latency tracking infrastructure that is
 * used by the "latencytop" userspace tool. The latency that is tracked is not
 * the 'traditional' interrupt latency (which is primarily caused by something
 * else consuming CPU), but instead, it is the latency an application encounters
 * because the kernel sleeps on its behalf for various reasons.
 *
 * This code tracks 2 levels of statistics:
 * 1) System level latency
 * 2) Per process latency
 *
 * The latency is stored in fixed sized data structures in an accumulated form;
 * if the "same" latency cause is hit twice, this will be tracked as one entry
 * in the data structure. Both the count, total accumulated latency and maximum
 * latency are tracked in this data structure. When the fixed size structure is
 * full, no new causes are tracked until the buffer is flushed by writing to
 * the /proc file; the userspace tool does this on a regular basis.
 *
 * A latency cause is identified by a stringified backtrace at the point that
 * the scheduler gets invoked. The userland tool will use this string to
 * identify the cause of the latency in human readable form.
 *
 * The information is exported via /proc/latency_stats and /proc/<pid>/latency.
 * These files look like this:
 *
 * Latency Top version : v0.1
 * 70 59433 4897 i915_irq_wait drm_ioctl vfs_ioctl do_vfs_ioctl sys_ioctl
 * |    |    |    |
 * |    |    |    +----> the stringified backtrace
 * |    |    +---------> The maximum latency for this entry in microseconds
 * |    +--------------> The accumulated latency for this entry (microseconds)
 * +-------------------> The number of times this entry is hit
 *
 * (note: the average latency is the accumulated latency divided by the number
 * of times)
 */

Arjan van de Ven's avatar
Arjan van de Ven committed
50 51 52 53 54
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
#include <linux/notifier.h>
#include <linux/spinlock.h>
#include <linux/proc_fs.h>
55
#include <linux/latencytop.h>
56
#include <linux/export.h>
Arjan van de Ven's avatar
Arjan van de Ven committed
57
#include <linux/sched.h>
58
#include <linux/sched/debug.h>
59
#include <linux/sched/stat.h>
Arjan van de Ven's avatar
Arjan van de Ven committed
60 61 62
#include <linux/list.h>
#include <linux/stacktrace.h>

63
static DEFINE_RAW_SPINLOCK(latency_lock);
Arjan van de Ven's avatar
Arjan van de Ven committed
64 65 66 67 68 69 70 71 72 73 74 75 76

#define MAXLR 128
static struct latency_record latency_record[MAXLR];

int latencytop_enabled;

void clear_all_latency_tracing(struct task_struct *p)
{
	unsigned long flags;

	if (!latencytop_enabled)
		return;

77
	raw_spin_lock_irqsave(&latency_lock, flags);
Arjan van de Ven's avatar
Arjan van de Ven committed
78 79
	memset(&p->latency_record, 0, sizeof(p->latency_record));
	p->latency_record_count = 0;
80
	raw_spin_unlock_irqrestore(&latency_lock, flags);
Arjan van de Ven's avatar
Arjan van de Ven committed
81 82 83 84 85 86
}

static void clear_global_latency_tracing(void)
{
	unsigned long flags;

87
	raw_spin_lock_irqsave(&latency_lock, flags);
Arjan van de Ven's avatar
Arjan van de Ven committed
88
	memset(&latency_record, 0, sizeof(latency_record));
89
	raw_spin_unlock_irqrestore(&latency_lock, flags);
Arjan van de Ven's avatar
Arjan van de Ven committed
90 91 92
}

static void __sched
93 94
account_global_scheduler_latency(struct task_struct *tsk,
				 struct latency_record *lat)
Arjan van de Ven's avatar
Arjan van de Ven committed
95 96 97 98 99 100 101 102 103 104 105 106
{
	int firstnonnull = MAXLR + 1;
	int i;

	if (!latencytop_enabled)
		return;

	/* skip kernel threads for now */
	if (!tsk->mm)
		return;

	for (i = 0; i < MAXLR; i++) {
107 108
		int q, same = 1;

Arjan van de Ven's avatar
Arjan van de Ven committed
109 110 111 112 113 114
		/* Nothing stored: */
		if (!latency_record[i].backtrace[0]) {
			if (firstnonnull > i)
				firstnonnull = i;
			continue;
		}
115
		for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
116 117 118
			unsigned long record = lat->backtrace[q];

			if (latency_record[i].backtrace[q] != record) {
Arjan van de Ven's avatar
Arjan van de Ven committed
119 120
				same = 0;
				break;
121 122 123 124
			}

			/* 0 and ULONG_MAX entries mean end of backtrace: */
			if (record == 0 || record == ULONG_MAX)
Arjan van de Ven's avatar
Arjan van de Ven committed
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
				break;
		}
		if (same) {
			latency_record[i].count++;
			latency_record[i].time += lat->time;
			if (lat->time > latency_record[i].max)
				latency_record[i].max = lat->time;
			return;
		}
	}

	i = firstnonnull;
	if (i >= MAXLR - 1)
		return;

	/* Allocted a new one: */
	memcpy(&latency_record[i], lat, sizeof(struct latency_record));
}

144 145 146 147 148
/*
 * Iterator to store a backtrace into a latency record entry
 */
static inline void store_stacktrace(struct task_struct *tsk,
					struct latency_record *lat)
Arjan van de Ven's avatar
Arjan van de Ven committed
149 150 151 152 153 154 155 156 157
{
	struct stack_trace trace;

	memset(&trace, 0, sizeof(trace));
	trace.max_entries = LT_BACKTRACEDEPTH;
	trace.entries = &lat->backtrace[0];
	save_stack_trace_tsk(tsk, &trace);
}

158
/**
Lucas De Marchi's avatar
Lucas De Marchi committed
159
 * __account_scheduler_latency - record an occurred latency
160 161 162 163 164 165 166 167 168 169 170 171 172 173
 * @tsk - the task struct of the task hitting the latency
 * @usecs - the duration of the latency in microseconds
 * @inter - 1 if the sleep was interruptible, 0 if uninterruptible
 *
 * This function is the main entry point for recording latency entries
 * as called by the scheduler.
 *
 * This function has a few special cases to deal with normal 'non-latency'
 * sleeps: specifically, interruptible sleep longer than 5 msec is skipped
 * since this usually is caused by waiting for events via select() and co.
 *
 * Negative latencies (caused by time going backwards) are also explicitly
 * skipped.
 */
Arjan van de Ven's avatar
Arjan van de Ven committed
174
void __sched
175
__account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
Arjan van de Ven's avatar
Arjan van de Ven committed
176 177 178 179 180 181 182 183 184
{
	unsigned long flags;
	int i, q;
	struct latency_record lat;

	/* Long interruptible waits are generally user requested... */
	if (inter && usecs > 5000)
		return;

185 186 187 188 189
	/* Negative sleeps are time going backwards */
	/* Zero-time sleeps are non-interesting */
	if (usecs <= 0)
		return;

Arjan van de Ven's avatar
Arjan van de Ven committed
190 191 192 193 194 195
	memset(&lat, 0, sizeof(lat));
	lat.count = 1;
	lat.time = usecs;
	lat.max = usecs;
	store_stacktrace(tsk, &lat);

196
	raw_spin_lock_irqsave(&latency_lock, flags);
Arjan van de Ven's avatar
Arjan van de Ven committed
197 198 199

	account_global_scheduler_latency(tsk, &lat);

200
	for (i = 0; i < tsk->latency_record_count; i++) {
Arjan van de Ven's avatar
Arjan van de Ven committed
201 202
		struct latency_record *mylat;
		int same = 1;
203

Arjan van de Ven's avatar
Arjan van de Ven committed
204
		mylat = &tsk->latency_record[i];
205
		for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
206 207 208
			unsigned long record = lat.backtrace[q];

			if (mylat->backtrace[q] != record) {
Arjan van de Ven's avatar
Arjan van de Ven committed
209 210
				same = 0;
				break;
211 212 213 214
			}

			/* 0 and ULONG_MAX entries mean end of backtrace: */
			if (record == 0 || record == ULONG_MAX)
Arjan van de Ven's avatar
Arjan van de Ven committed
215 216 217 218 219 220 221 222 223 224 225
				break;
		}
		if (same) {
			mylat->count++;
			mylat->time += lat.time;
			if (lat.time > mylat->max)
				mylat->max = lat.time;
			goto out_unlock;
		}
	}

226 227 228 229 230 231
	/*
	 * short term hack; if we're > 32 we stop; future we recycle:
	 */
	if (tsk->latency_record_count >= LT_SAVECOUNT)
		goto out_unlock;

Arjan van de Ven's avatar
Arjan van de Ven committed
232
	/* Allocated a new one: */
233
	i = tsk->latency_record_count++;
Arjan van de Ven's avatar
Arjan van de Ven committed
234 235 236
	memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));

out_unlock:
237
	raw_spin_unlock_irqrestore(&latency_lock, flags);
Arjan van de Ven's avatar
Arjan van de Ven committed
238 239 240 241 242 243 244 245 246
}

static int lstats_show(struct seq_file *m, void *v)
{
	int i;

	seq_puts(m, "Latency Top version : v0.1\n");

	for (i = 0; i < MAXLR; i++) {
247 248 249
		struct latency_record *lr = &latency_record[i];

		if (lr->backtrace[0]) {
Arjan van de Ven's avatar
Arjan van de Ven committed
250
			int q;
251 252
			seq_printf(m, "%i %lu %lu",
				   lr->count, lr->time, lr->max);
Arjan van de Ven's avatar
Arjan van de Ven committed
253
			for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
254 255
				unsigned long bt = lr->backtrace[q];
				if (!bt)
Arjan van de Ven's avatar
Arjan van de Ven committed
256
					break;
257
				if (bt == ULONG_MAX)
Arjan van de Ven's avatar
Arjan van de Ven committed
258
					break;
259
				seq_printf(m, " %ps", (void *)bt);
Arjan van de Ven's avatar
Arjan van de Ven committed
260
			}
261
			seq_puts(m, "\n");
Arjan van de Ven's avatar
Arjan van de Ven committed
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
		}
	}
	return 0;
}

static ssize_t
lstats_write(struct file *file, const char __user *buf, size_t count,
	     loff_t *offs)
{
	clear_global_latency_tracing();

	return count;
}

static int lstats_open(struct inode *inode, struct file *filp)
{
	return single_open(filp, lstats_show, NULL);
}

281
static const struct file_operations lstats_fops = {
Arjan van de Ven's avatar
Arjan van de Ven committed
282 283 284 285 286 287 288 289 290
	.open		= lstats_open,
	.read		= seq_read,
	.write		= lstats_write,
	.llseek		= seq_lseek,
	.release	= single_release,
};

static int __init init_lstats_procfs(void)
{
291
	proc_create("latency_stats", 0644, NULL, &lstats_fops);
Arjan van de Ven's avatar
Arjan van de Ven committed
292 293
	return 0;
}
294 295 296 297 298 299 300 301 302 303 304 305

int sysctl_latencytop(struct ctl_table *table, int write,
			void __user *buffer, size_t *lenp, loff_t *ppos)
{
	int err;

	err = proc_dointvec(table, write, buffer, lenp, ppos);
	if (latencytop_enabled)
		force_schedstat_enabled();

	return err;
}
306
device_initcall(init_lstats_procfs);