getdelays.c 13.3 KB
Newer Older
1 2 3 4 5 6 7
/* getdelays.c
 *
 * Utility to get per-pid and per-tgid delay accounting statistics
 * Also illustrates usage of the taskstats interface
 *
 * Copyright (C) Shailabh Nagar, IBM Corp. 2005
 * Copyright (C) Balbir Singh, IBM Corp. 2006
8
 * Copyright (c) Jay Lan, SGI. 2006
9
 *
10 11
 * Compile with
 *	gcc -I/usr/src/linux/include getdelays.c -o getdelays
12 13 14 15 16 17 18 19 20 21 22 23
 */

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <poll.h>
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
24
#include <sys/wait.h>
25 26 27 28
#include <signal.h>

#include <linux/genetlink.h>
#include <linux/taskstats.h>
29
#include <linux/cgroupstats.h>
30 31 32 33 34 35 36 37 38 39 40

/*
 * Generic macros for dealing with netlink sockets. Might be duplicated
 * elsewhere. It is recommended that commercial grade applications use
 * libnl or libnetlink and use the interfaces provided by the library
 */
#define GENLMSG_DATA(glh)	((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
#define GENLMSG_PAYLOAD(glh)	(NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
#define NLA_DATA(na)		((void *)((char*)(na) + NLA_HDRLEN))
#define NLA_PAYLOAD(len)	(len - NLA_HDRLEN)

41 42 43 44 45 46 47 48 49 50 51
#define err(code, fmt, arg...)			\
	do {					\
		fprintf(stderr, fmt, ##arg);	\
		exit(code);			\
	} while (0)

int done;
int rcvbufsz;
char name[100];
int dbg;
int print_delays;
52
int print_io_accounting;
53
int print_task_context_switch_counts;
54

55 56 57 58 59 60 61
#define PRINTF(fmt, arg...) {			\
	    if (dbg) {				\
		printf(fmt, ##arg);		\
	    }					\
	}

/* Maximum size of response requested or message sent */
62
#define MAX_MSG_SIZE	1024
63 64 65 66 67 68 69 70 71 72
/* Maximum number of cpus expected to be specified in a cpumask */
#define MAX_CPUS	32

struct msgtemplate {
	struct nlmsghdr n;
	struct genlmsghdr g;
	char buf[MAX_MSG_SIZE];
};

char cpumask[100+6*MAX_CPUS];
73

74 75 76 77 78 79 80 81
static void usage(void)
{
	fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] "
			"[-m cpumask] [-t tgid] [-p pid]\n");
	fprintf(stderr, "  -d: print delayacct stats\n");
	fprintf(stderr, "  -i: print IO accounting (works only with -p)\n");
	fprintf(stderr, "  -l: listen forever\n");
	fprintf(stderr, "  -v: debug on\n");
82
	fprintf(stderr, "  -C: container path\n");
83 84
}

85 86 87
/*
 * Create a raw netlink socket and bind
 */
88
static int create_nl_socket(int protocol)
89
{
90 91 92 93 94 95 96 97 98 99
	int fd;
	struct sockaddr_nl local;

	fd = socket(AF_NETLINK, SOCK_RAW, protocol);
	if (fd < 0)
		return -1;

	if (rcvbufsz)
		if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
				&rcvbufsz, sizeof(rcvbufsz)) < 0) {
100
			fprintf(stderr, "Unable to set socket rcv buf size to %d\n",
101
				rcvbufsz);
102
			goto error;
103
		}
104

105 106
	memset(&local, 0, sizeof(local));
	local.nl_family = AF_NETLINK;
107

108 109
	if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
		goto error;
110

111 112 113 114
	return fd;
error:
	close(fd);
	return -1;
115 116
}

117

118
static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
119 120
	     __u8 genl_cmd, __u16 nla_type,
	     void *nla_data, int nla_len)
121
{
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
	struct nlattr *na;
	struct sockaddr_nl nladdr;
	int r, buflen;
	char *buf;

	struct msgtemplate msg;

	msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
	msg.n.nlmsg_type = nlmsg_type;
	msg.n.nlmsg_flags = NLM_F_REQUEST;
	msg.n.nlmsg_seq = 0;
	msg.n.nlmsg_pid = nlmsg_pid;
	msg.g.cmd = genl_cmd;
	msg.g.version = 0x1;
	na = (struct nlattr *) GENLMSG_DATA(&msg);
	na->nla_type = nla_type;
	na->nla_len = nla_len + 1 + NLA_HDRLEN;
	memcpy(NLA_DATA(na), nla_data, nla_len);
	msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);

	buf = (char *) &msg;
	buflen = msg.n.nlmsg_len ;
	memset(&nladdr, 0, sizeof(nladdr));
	nladdr.nl_family = AF_NETLINK;
	while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
			   sizeof(nladdr))) < buflen) {
		if (r > 0) {
			buf += r;
			buflen -= r;
		} else if (errno != EAGAIN)
			return -1;
	}
	return 0;
155 156
}

157

158 159 160 161
/*
 * Probe the controller in genetlink to find the family id
 * for the TASKSTATS family
 */
162
static int get_family_id(int sd)
163
{
164 165 166 167 168 169
	struct {
		struct nlmsghdr n;
		struct genlmsghdr g;
		char buf[256];
	} ans;

Randy Dunlap's avatar
Randy Dunlap committed
170
	int id = 0, rc;
171 172 173 174 175 176 177
	struct nlattr *na;
	int rep_len;

	strcpy(name, TASKSTATS_GENL_NAME);
	rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
			CTRL_ATTR_FAMILY_NAME, (void *)name,
			strlen(TASKSTATS_GENL_NAME)+1);
178 179
	if (rc < 0)
		return 0;	/* sendto() failure? */
180 181 182 183 184

	rep_len = recv(sd, &ans, sizeof(ans), 0);
	if (ans.n.nlmsg_type == NLMSG_ERROR ||
	    (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
		return 0;
185

186 187 188 189 190 191
	na = (struct nlattr *) GENLMSG_DATA(&ans);
	na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
	if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
		id = *(__u16 *) NLA_DATA(na);
	}
	return id;
192 193
}

194 195
#define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))

196
static void print_delayacct(struct taskstats *t)
197
{
198 199 200 201 202 203 204 205 206 207
	printf("\n\nCPU   %15s%15s%15s%15s%15s\n"
	       "      %15llu%15llu%15llu%15llu%15.3fms\n"
	       "IO    %15s%15s%15s\n"
	       "      %15llu%15llu%15llums\n"
	       "SWAP  %15s%15s%15s\n"
	       "      %15llu%15llu%15llums\n"
	       "RECLAIM  %12s%15s%15s\n"
	       "      %15llu%15llu%15llums\n",
	       "count", "real total", "virtual total",
	       "delay total", "delay average",
208 209 210 211
	       (unsigned long long)t->cpu_count,
	       (unsigned long long)t->cpu_run_real_total,
	       (unsigned long long)t->cpu_run_virtual_total,
	       (unsigned long long)t->cpu_delay_total,
212 213
	       average_ms((double)t->cpu_delay_total, t->cpu_count),
	       "count", "delay total", "delay average",
214 215
	       (unsigned long long)t->blkio_count,
	       (unsigned long long)t->blkio_delay_total,
216 217
	       average_ms(t->blkio_delay_total, t->blkio_count),
	       "count", "delay total", "delay average",
218 219
	       (unsigned long long)t->swapin_count,
	       (unsigned long long)t->swapin_delay_total,
220 221
	       average_ms(t->swapin_delay_total, t->swapin_count),
	       "count", "delay total", "delay average",
222
	       (unsigned long long)t->freepages_count,
223 224
	       (unsigned long long)t->freepages_delay_total,
	       average_ms(t->freepages_delay_total, t->freepages_count));
225 226
}

227
static void task_context_switch_counts(struct taskstats *t)
228 229
{
	printf("\n\nTask   %15s%15s\n"
Randy Dunlap's avatar
Randy Dunlap committed
230
	       "       %15llu%15llu\n",
231
	       "voluntary", "nonvoluntary",
232
	       (unsigned long long)t->nvcsw, (unsigned long long)t->nivcsw);
233 234
}

235
static void print_cgroupstats(struct cgroupstats *c)
236 237
{
	printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, "
238 239 240 241 242
		"uninterruptible %llu\n", (unsigned long long)c->nr_sleeping,
		(unsigned long long)c->nr_io_wait,
		(unsigned long long)c->nr_running,
		(unsigned long long)c->nr_stopped,
		(unsigned long long)c->nr_uninterruptible);
243 244 245
}


246
static void print_ioacct(struct taskstats *t)
247 248 249 250 251 252 253 254
{
	printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
		t->ac_comm,
		(unsigned long long)t->read_bytes,
		(unsigned long long)t->write_bytes,
		(unsigned long long)t->cancelled_write_bytes);
}

255 256
int main(int argc, char *argv[])
{
257 258
	int c, rc, rep_len, aggr_len, len2;
	int cmd_type = TASKSTATS_CMD_ATTR_UNSPEC;
259 260 261 262 263 264 265 266 267 268 269 270 271
	__u16 id;
	__u32 mypid;

	struct nlattr *na;
	int nl_sd = -1;
	int len = 0;
	pid_t tid = 0;
	pid_t rtid = 0;

	int fd = 0;
	int count = 0;
	int write_file = 0;
	int maskset = 0;
Scott Wiersdorf's avatar
Scott Wiersdorf committed
272
	char *logfile = NULL;
273
	int loop = 0;
274
	int containerset = 0;
275
	char *containerpath = NULL;
276
	int cfd = 0;
277 278
	int forking = 0;
	sigset_t sigset;
279 280 281

	struct msgtemplate msg;

282 283
	while (!forking) {
		c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:c:");
284 285
		if (c < 0)
			break;
286

287 288 289 290 291
		switch (c) {
		case 'd':
			printf("print delayacct stats ON\n");
			print_delays = 1;
			break;
292 293 294 295
		case 'i':
			printf("printing IO accounting\n");
			print_io_accounting = 1;
			break;
296 297 298 299
		case 'q':
			printf("printing task/process context switch rates\n");
			print_task_context_switch_counts = 1;
			break;
300 301
		case 'C':
			containerset = 1;
302
			containerpath = optarg;
303
			break;
304
		case 'w':
Scott Wiersdorf's avatar
Scott Wiersdorf committed
305
			logfile = strdup(optarg);
306 307 308 309 310 311 312 313 314 315 316
			printf("write to file %s\n", logfile);
			write_file = 1;
			break;
		case 'r':
			rcvbufsz = atoi(optarg);
			printf("receive buf size %d\n", rcvbufsz);
			if (rcvbufsz < 0)
				err(1, "Invalid rcv buf size\n");
			break;
		case 'm':
			strncpy(cpumask, optarg, sizeof(cpumask));
317
			cpumask[sizeof(cpumask) - 1] = '\0';
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
			maskset = 1;
			printf("cpumask %s maskset %d\n", cpumask, maskset);
			break;
		case 't':
			tid = atoi(optarg);
			if (!tid)
				err(1, "Invalid tgid\n");
			cmd_type = TASKSTATS_CMD_ATTR_TGID;
			break;
		case 'p':
			tid = atoi(optarg);
			if (!tid)
				err(1, "Invalid pid\n");
			cmd_type = TASKSTATS_CMD_ATTR_PID;
			break;
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
		case 'c':

			/* Block SIGCHLD for sigwait() later */
			if (sigemptyset(&sigset) == -1)
				err(1, "Failed to empty sigset");
			if (sigaddset(&sigset, SIGCHLD))
				err(1, "Failed to set sigchld in sigset");
			sigprocmask(SIG_BLOCK, &sigset, NULL);

			/* fork/exec a child */
			tid = fork();
			if (tid < 0)
				err(1, "Fork failed\n");
			if (tid == 0)
				if (execvp(argv[optind - 1],
				    &argv[optind - 1]) < 0)
					exit(-1);

			/* Set the command type and avoid further processing */
			cmd_type = TASKSTATS_CMD_ATTR_PID;
			forking = 1;
			break;
355 356 357 358 359 360 361 362 363
		case 'v':
			printf("debug on\n");
			dbg = 1;
			break;
		case 'l':
			printf("listen forever\n");
			loop = 1;
			break;
		default:
364
			usage();
365
			exit(-1);
366 367 368
		}
	}

369 370 371 372 373 374 375 376
	if (write_file) {
		fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
			  S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
		if (fd == -1) {
			perror("Cannot open output file\n");
			exit(1);
		}
	}
377

378 379
	nl_sd = create_nl_socket(NETLINK_GENERIC);
	if (nl_sd < 0)
380
		err(1, "error creating Netlink socket\n");
381 382


383 384 385
	mypid = getpid();
	id = get_family_id(nl_sd);
	if (!id) {
386
		fprintf(stderr, "Error getting family id, errno %d\n", errno);
387
		goto err;
388
	}
389 390 391 392 393
	PRINTF("family id %d\n", id);

	if (maskset) {
		rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
			      TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
394
			      &cpumask, strlen(cpumask) + 1);
395 396
		PRINTF("Sent register cpumask, retval %d\n", rc);
		if (rc < 0) {
397
			fprintf(stderr, "error sending register cpumask\n");
398 399
			goto err;
		}
400 401
	}

402 403 404 405 406
	if (tid && containerset) {
		fprintf(stderr, "Select either -t or -C, not both\n");
		goto err;
	}

407 408 409 410 411 412 413 414 415
	/*
	 * If we forked a child, wait for it to exit. Cannot use waitpid()
	 * as all the delicious data would be reaped as part of the wait
	 */
	if (tid && forking) {
		int sig_received;
		sigwait(&sigset, &sig_received);
	}

416 417 418 419 420
	if (tid) {
		rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
			      cmd_type, &tid, sizeof(__u32));
		PRINTF("Sent pid/tgid, retval %d\n", rc);
		if (rc < 0) {
421
			fprintf(stderr, "error sending tid/tgid cmd\n");
422 423
			goto done;
		}
424 425
	}

426 427 428 429 430 431 432 433 434 435 436 437 438
	if (containerset) {
		cfd = open(containerpath, O_RDONLY);
		if (cfd < 0) {
			perror("error opening container file");
			goto err;
		}
		rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET,
			      CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32));
		if (rc < 0) {
			perror("error sending cgroupstats command");
			goto err;
		}
	}
439 440 441 442
	if (!maskset && !tid && !containerset) {
		usage();
		goto err;
	}
443

444 445 446
	do {
		rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
		PRINTF("received %d bytes\n", rep_len);
447

448
		if (rep_len < 0) {
449 450
			fprintf(stderr, "nonfatal reply error: errno %d\n",
				errno);
451 452 453 454
			continue;
		}
		if (msg.n.nlmsg_type == NLMSG_ERROR ||
		    !NLMSG_OK((&msg.n), rep_len)) {
455
			struct nlmsgerr *err = NLMSG_DATA(&msg);
456 457
			fprintf(stderr, "fatal reply error,  errno %d\n",
				err->error);
458 459 460
			goto done;
		}

Randy Dunlap's avatar
Randy Dunlap committed
461
		PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n",
462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
		       sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);


		rep_len = GENLMSG_PAYLOAD(&msg.n);

		na = (struct nlattr *) GENLMSG_DATA(&msg);
		len = 0;
		while (len < rep_len) {
			len += NLA_ALIGN(na->nla_len);
			switch (na->nla_type) {
			case TASKSTATS_TYPE_AGGR_TGID:
				/* Fall through */
			case TASKSTATS_TYPE_AGGR_PID:
				aggr_len = NLA_PAYLOAD(na->nla_len);
				len2 = 0;
				/* For nested attributes, na follows */
				na = (struct nlattr *) NLA_DATA(na);
				done = 0;
				while (len2 < aggr_len) {
					switch (na->nla_type) {
					case TASKSTATS_TYPE_PID:
						rtid = *(int *) NLA_DATA(na);
						if (print_delays)
							printf("PID\t%d\n", rtid);
						break;
					case TASKSTATS_TYPE_TGID:
						rtid = *(int *) NLA_DATA(na);
						if (print_delays)
							printf("TGID\t%d\n", rtid);
						break;
					case TASKSTATS_TYPE_STATS:
						count++;
						if (print_delays)
							print_delayacct((struct taskstats *) NLA_DATA(na));
496 497
						if (print_io_accounting)
							print_ioacct((struct taskstats *) NLA_DATA(na));
498 499
						if (print_task_context_switch_counts)
							task_context_switch_counts((struct taskstats *) NLA_DATA(na));
500 501 502 503 504 505 506 507
						if (fd) {
							if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
								err(1,"write error\n");
							}
						}
						if (!loop)
							goto done;
						break;
508 509
					case TASKSTATS_TYPE_NULL:
						break;
510
					default:
511 512 513
						fprintf(stderr, "Unknown nested"
							" nla_type %d\n",
							na->nla_type);
514 515 516
						break;
					}
					len2 += NLA_ALIGN(na->nla_len);
517 518
					na = (struct nlattr *)((char *)na +
							       NLA_ALIGN(na->nla_len));
519 520 521
				}
				break;

522 523 524
			case CGROUPSTATS_TYPE_CGROUP_STATS:
				print_cgroupstats(NLA_DATA(na));
				break;
525
			default:
526 527
				fprintf(stderr, "Unknown nla_type %d\n",
					na->nla_type);
528
			case TASKSTATS_TYPE_NULL:
529
				break;
530
			}
531
			na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
532
		}
533 534 535 536 537
	} while (loop);
done:
	if (maskset) {
		rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
			      TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
538
			      &cpumask, strlen(cpumask) + 1);
539 540 541
		printf("Sent deregister mask, retval %d\n", rc);
		if (rc < 0)
			err(rc, "error sending deregister cpumask\n");
542
	}
543 544 545 546
err:
	close(nl_sd);
	if (fd)
		close(fd);
547 548
	if (cfd)
		close(cfd);
549
	return 0;
550
}