trace_functions_graph.c 38 KB
Newer Older
1 2 3
/*
 *
 * Function graph tracer.
4
 * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
5 6 7 8 9 10
 * Mostly borrowed from function tracer which
 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
 *
 */
#include <linux/uaccess.h>
#include <linux/ftrace.h>
11
#include <linux/interrupt.h>
12
#include <linux/slab.h>
13 14 15
#include <linux/fs.h>

#include "trace.h"
16
#include "trace_output.h"
17

18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
static bool kill_ftrace_graph;

/**
 * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called
 *
 * ftrace_graph_stop() is called when a severe error is detected in
 * the function graph tracing. This function is called by the critical
 * paths of function graph to keep those paths from doing any more harm.
 */
bool ftrace_graph_is_dead(void)
{
	return kill_ftrace_graph;
}

/**
 * ftrace_graph_stop - set to permanently disable function graph tracincg
 *
 * In case of an error int function graph tracing, this is called
 * to try to keep function graph tracing from causing any more harm.
 * Usually this is pretty severe and this is called to try to at least
 * get a warning out to the user.
 */
void ftrace_graph_stop(void)
{
	kill_ftrace_graph = true;
}

45 46 47
/* When set, irq functions will be ignored */
static int ftrace_graph_skip_irqs;

48
struct fgraph_cpu_data {
49 50
	pid_t		last_pid;
	int		depth;
51
	int		depth_irq;
52
	int		ignore;
53
	unsigned long	enter_funcs[FTRACE_RETFUNC_DEPTH];
54 55 56
};

struct fgraph_data {
57
	struct fgraph_cpu_data __percpu *cpu_data;
58 59 60 61 62 63

	/* Place to preserve last processed entry. */
	struct ftrace_graph_ent_entry	ent;
	struct ftrace_graph_ret_entry	ret;
	int				failed;
	int				cpu;
64 65
};

66
#define TRACE_GRAPH_INDENT	2
67

68 69
static unsigned int max_depth;

70
static struct tracer_opt trace_opts[] = {
71
	/* Display overruns? (for self-debug purpose) */
72 73 74 75 76
	{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
	/* Display CPU ? */
	{ TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
	/* Display Overhead ? */
	{ TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
77 78
	/* Display proc name/pid */
	{ TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
79 80 81 82
	/* Display duration of execution */
	{ TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
	/* Display absolute time of an entry */
	{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
83 84
	/* Display interrupts */
	{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
85 86
	/* Display function name after trailing } */
	{ TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
87 88 89 90
	/* Include sleep time (scheduled out) between entry and return */
	{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
	/* Include time within nested functions */
	{ TRACER_OPT(graph-time, TRACE_GRAPH_GRAPH_TIME) },
91 92 93 94
	{ } /* Empty entry */
};

static struct tracer_flags tracer_flags = {
95
	/* Don't display overruns, proc, or tail by default */
96
	.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
97 98
	       TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS |
	       TRACE_GRAPH_SLEEP_TIME | TRACE_GRAPH_GRAPH_TIME,
99 100 101
	.opts = trace_opts
};

102
static struct trace_array *graph_array;
103

104 105 106 107 108 109
/*
 * DURATION column is being also used to display IRQ signs,
 * following values are used by print_graph_irq and others
 * to fill in space into DURATION column.
 */
enum {
110 111 112
	FLAGS_FILL_FULL  = 1 << TRACE_GRAPH_PRINT_FILL_SHIFT,
	FLAGS_FILL_START = 2 << TRACE_GRAPH_PRINT_FILL_SHIFT,
	FLAGS_FILL_END   = 3 << TRACE_GRAPH_PRINT_FILL_SHIFT,
113 114
};

115
static void
116 117
print_graph_duration(struct trace_array *tr, unsigned long long duration,
		     struct trace_seq *s, u32 flags);
118

119 120
/* Add a function return address to the trace stack on thread info.*/
int
121
ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
122
			 unsigned long frame_pointer, unsigned long *retp)
123
{
124
	unsigned long long calltime;
125 126
	int index;

127 128 129
	if (unlikely(ftrace_graph_is_dead()))
		return -EBUSY;

130 131 132
	if (!current->ret_stack)
		return -EBUSY;

133 134 135 136 137 138
	/*
	 * We must make sure the ret_stack is tested before we read
	 * anything else.
	 */
	smp_rmb();

139 140 141 142 143 144
	/* The return trace stack is full */
	if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
		atomic_inc(&current->trace_overrun);
		return -EBUSY;
	}

145 146 147 148 149 150 151 152 153 154 155 156 157 158
	/*
	 * The curr_ret_stack is an index to ftrace return stack of
	 * current task.  Its value should be in [0, FTRACE_RETFUNC_
	 * DEPTH) when the function graph tracer is used.  To support
	 * filtering out specific functions, it makes the index
	 * negative by subtracting huge value (FTRACE_NOTRACE_DEPTH)
	 * so when it sees a negative index the ftrace will ignore
	 * the record.  And the index gets recovered when returning
	 * from the filtered function by adding the FTRACE_NOTRACE_
	 * DEPTH and then it'll continue to record functions normally.
	 *
	 * The curr_ret_stack is initialized to -1 and get increased
	 * in this function.  So it can be less than -1 only if it was
	 * filtered out via ftrace_graph_notrace_addr() which can be
159
	 * set from set_graph_notrace file in tracefs by user.
160 161 162 163
	 */
	if (current->curr_ret_stack < -1)
		return -EBUSY;

164 165
	calltime = trace_clock_local();

166
	index = ++current->curr_ret_stack;
167 168
	if (ftrace_graph_notrace_addr(func))
		current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
169 170 171
	barrier();
	current->ret_stack[index].ret = ret;
	current->ret_stack[index].func = func;
172
	current->ret_stack[index].calltime = calltime;
173
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
174
	current->ret_stack[index].fp = frame_pointer;
175 176 177
#endif
#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
	current->ret_stack[index].retp = retp;
178
#endif
179
	*depth = current->curr_ret_stack;
180 181 182 183 184

	return 0;
}

/* Retrieve a function return address to the trace stack on thread info.*/
185
static void
186 187
ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
			unsigned long frame_pointer)
188 189 190 191 192
{
	int index;

	index = current->curr_ret_stack;

193 194 195 196 197 198 199 200 201 202 203
	/*
	 * A negative index here means that it's just returned from a
	 * notrace'd function.  Recover index to get an original
	 * return address.  See ftrace_push_return_trace().
	 *
	 * TODO: Need to check whether the stack gets corrupted.
	 */
	if (index < 0)
		index += FTRACE_NOTRACE_DEPTH;

	if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
204 205 206 207 208 209 210
		ftrace_graph_stop();
		WARN_ON(1);
		/* Might as well panic, otherwise we have no where to go */
		*ret = (unsigned long)panic;
		return;
	}

211
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
212 213 214 215 216 217 218 219 220 221
	/*
	 * The arch may choose to record the frame pointer used
	 * and check it here to make sure that it is what we expect it
	 * to be. If gcc does not set the place holder of the return
	 * address in the frame pointer, and does a copy instead, then
	 * the function graph trace will fail. This test detects this
	 * case.
	 *
	 * Currently, x86_32 with optimize for size (-Os) makes the latest
	 * gcc do the above.
222 223 224
	 *
	 * Note, -mfentry does not use frame pointers, and this test
	 *  is not needed if CC_USING_FENTRY is set.
225 226 227 228
	 */
	if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
		ftrace_graph_stop();
		WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
229
		     "  from func %ps return to %lx\n",
230 231 232 233 234 235 236 237 238
		     current->ret_stack[index].fp,
		     frame_pointer,
		     (void *)current->ret_stack[index].func,
		     current->ret_stack[index].ret);
		*ret = (unsigned long)panic;
		return;
	}
#endif

239 240 241 242 243 244 245 246 247 248 249
	*ret = current->ret_stack[index].ret;
	trace->func = current->ret_stack[index].func;
	trace->calltime = current->ret_stack[index].calltime;
	trace->overrun = atomic_read(&current->trace_overrun);
	trace->depth = index;
}

/*
 * Send the trace to the ring-buffer.
 * @return the original return address.
 */
250
unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
251 252 253 254
{
	struct ftrace_graph_ret trace;
	unsigned long ret;

255
	ftrace_pop_return_trace(&trace, &ret, frame_pointer);
256
	trace.rettime = trace_clock_local();
257 258
	barrier();
	current->curr_ret_stack--;
259 260 261 262 263 264 265 266 267
	/*
	 * The curr_ret_stack can be less than -1 only if it was
	 * filtered out and it's about to return from the function.
	 * Recover the index and continue to trace normal functions.
	 */
	if (current->curr_ret_stack < -1) {
		current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
		return ret;
	}
268

269 270 271 272 273 274 275
	/*
	 * The trace should run after decrementing the ret counter
	 * in case an interrupt were to come in. We don't want to
	 * lose the interrupt if max_depth is set.
	 */
	ftrace_graph_return(&trace);

276 277 278 279 280 281 282 283 284 285
	if (unlikely(!ret)) {
		ftrace_graph_stop();
		WARN_ON(1);
		/* Might as well panic. What else to do? */
		ret = (unsigned long)panic;
	}

	return ret;
}

286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
/**
 * ftrace_graph_ret_addr - convert a potentially modified stack return address
 *			   to its original value
 *
 * This function can be called by stack unwinding code to convert a found stack
 * return address ('ret') to its original value, in case the function graph
 * tracer has modified it to be 'return_to_handler'.  If the address hasn't
 * been modified, the unchanged value of 'ret' is returned.
 *
 * 'idx' is a state variable which should be initialized by the caller to zero
 * before the first call.
 *
 * 'retp' is a pointer to the return address on the stack.  It's ignored if
 * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
 */
#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
				    unsigned long ret, unsigned long *retp)
{
	int index = task->curr_ret_stack;
	int i;

	if (ret != (unsigned long)return_to_handler)
		return ret;

	if (index < -1)
		index += FTRACE_NOTRACE_DEPTH;

	if (index < 0)
		return ret;

	for (i = 0; i <= index; i++)
		if (task->ret_stack[i].retp == retp)
			return task->ret_stack[i].ret;

	return ret;
}
#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
				    unsigned long ret, unsigned long *retp)
{
	int task_idx;

	if (ret != (unsigned long)return_to_handler)
		return ret;

	task_idx = task->curr_ret_stack;

	if (!task->ret_stack || task_idx < *idx)
		return ret;

	task_idx -= *idx;
	(*idx)++;

	return task->ret_stack[task_idx].ret;
}
#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */

344
int __trace_graph_entry(struct trace_array *tr,
345 346 347 348
				struct ftrace_graph_ent *trace,
				unsigned long flags,
				int pc)
{
349
	struct trace_event_call *call = &event_funcgraph_entry;
350
	struct ring_buffer_event *event;
351
	struct ring_buffer *buffer = tr->trace_buffer.buffer;
352 353
	struct ftrace_graph_ent_entry *entry;

354
	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
355 356 357 358 359
					  sizeof(*entry), flags, pc);
	if (!event)
		return 0;
	entry	= ring_buffer_event_data(event);
	entry->graph_ent			= *trace;
360
	if (!call_filter_check_discard(call, entry, buffer, event))
361
		__buffer_unlock_commit(buffer, event);
362 363 364 365

	return 1;
}

366 367
static inline int ftrace_graph_ignore_irqs(void)
{
368
	if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT))
369 370 371 372 373
		return 0;

	return in_irq();
}

374 375 376 377 378 379 380 381 382 383
int trace_graph_entry(struct ftrace_graph_ent *trace)
{
	struct trace_array *tr = graph_array;
	struct trace_array_cpu *data;
	unsigned long flags;
	long disabled;
	int ret;
	int cpu;
	int pc;

384
	if (!ftrace_trace_task(tr))
385 386
		return 0;

387
	/* trace it when it is-nested-in or is a function enabled. */
388
	if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
389
	     ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||
390
	    (max_depth && trace->depth >= max_depth))
391 392
		return 0;

393 394 395 396 397 398 399 400 401 402
	/*
	 * Do not trace a function if it's filtered by set_graph_notrace.
	 * Make the index of ret stack negative to indicate that it should
	 * ignore further functions.  But it needs its own ret stack entry
	 * to recover the original index in order to continue tracing after
	 * returning from the function.
	 */
	if (ftrace_graph_notrace_addr(trace->func))
		return 1;

403 404 405 406 407 408 409
	/*
	 * Stop here if tracing_threshold is set. We only write function return
	 * events to the ring buffer.
	 */
	if (tracing_thresh)
		return 1;

410 411
	local_irq_save(flags);
	cpu = raw_smp_processor_id();
412
	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
413 414 415 416 417 418 419 420 421 422 423 424 425 426
	disabled = atomic_inc_return(&data->disabled);
	if (likely(disabled == 1)) {
		pc = preempt_count();
		ret = __trace_graph_entry(tr, trace, flags, pc);
	} else {
		ret = 0;
	}

	atomic_dec(&data->disabled);
	local_irq_restore(flags);

	return ret;
}

427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
static void
__trace_graph_function(struct trace_array *tr,
		unsigned long ip, unsigned long flags, int pc)
{
	u64 time = trace_clock_local();
	struct ftrace_graph_ent ent = {
		.func  = ip,
		.depth = 0,
	};
	struct ftrace_graph_ret ret = {
		.func     = ip,
		.depth    = 0,
		.calltime = time,
		.rettime  = time,
	};

	__trace_graph_entry(tr, &ent, flags, pc);
	__trace_graph_return(tr, &ret, flags, pc);
}

void
trace_graph_function(struct trace_array *tr,
		unsigned long ip, unsigned long parent_ip,
		unsigned long flags, int pc)
{
	__trace_graph_function(tr, ip, flags, pc);
}

455
void __trace_graph_return(struct trace_array *tr,
456 457 458 459
				struct ftrace_graph_ret *trace,
				unsigned long flags,
				int pc)
{
460
	struct trace_event_call *call = &event_funcgraph_exit;
461
	struct ring_buffer_event *event;
462
	struct ring_buffer *buffer = tr->trace_buffer.buffer;
463 464
	struct ftrace_graph_ret_entry *entry;

465
	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
466 467 468 469 470
					  sizeof(*entry), flags, pc);
	if (!event)
		return;
	entry	= ring_buffer_event_data(event);
	entry->ret				= *trace;
471
	if (!call_filter_check_discard(call, entry, buffer, event))
472
		__buffer_unlock_commit(buffer, event);
473 474 475 476 477 478 479 480 481 482 483 484 485
}

void trace_graph_return(struct ftrace_graph_ret *trace)
{
	struct trace_array *tr = graph_array;
	struct trace_array_cpu *data;
	unsigned long flags;
	long disabled;
	int cpu;
	int pc;

	local_irq_save(flags);
	cpu = raw_smp_processor_id();
486
	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
487 488 489 490 491 492 493 494 495
	disabled = atomic_inc_return(&data->disabled);
	if (likely(disabled == 1)) {
		pc = preempt_count();
		__trace_graph_return(tr, trace, flags, pc);
	}
	atomic_dec(&data->disabled);
	local_irq_restore(flags);
}

496 497 498 499 500 501 502 503 504
void set_graph_array(struct trace_array *tr)
{
	graph_array = tr;

	/* Make graph_array visible before we start tracing */

	smp_mb();
}

505
static void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
506 507 508 509 510 511 512 513
{
	if (tracing_thresh &&
	    (trace->rettime - trace->calltime < tracing_thresh))
		return;
	else
		trace_graph_return(trace);
}

514 515
static int graph_trace_init(struct trace_array *tr)
{
516 517
	int ret;

518
	set_graph_array(tr);
519 520
	if (tracing_thresh)
		ret = register_ftrace_graph(&trace_graph_thresh_return,
521
					    &trace_graph_entry);
522 523 524
	else
		ret = register_ftrace_graph(&trace_graph_return,
					    &trace_graph_entry);
525 526 527 528 529
	if (ret)
		return ret;
	tracing_start_cmdline_record();

	return 0;
530 531 532 533
}

static void graph_trace_reset(struct trace_array *tr)
{
534 535
	tracing_stop_cmdline_record();
	unregister_ftrace_graph();
536 537
}

538
static int graph_trace_update_thresh(struct trace_array *tr)
539 540 541 542 543
{
	graph_trace_reset(tr);
	return graph_trace_init(tr);
}

544
static int max_bytes_for_cpu;
545

546
static void print_graph_cpu(struct trace_seq *s, int cpu)
547
{
548 549 550 551 552
	/*
	 * Start with a space character - to make it stand out
	 * to the right a bit when trace output is pasted into
	 * email:
	 */
553
	trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu);
554 555
}

556 557
#define TRACE_GRAPH_PROCINFO_LENGTH	14

558
static void print_graph_proc(struct trace_seq *s, pid_t pid)
559
{
560
	char comm[TASK_COMM_LEN];
561 562
	/* sign + log10(MAX_INT) + '\0' */
	char pid_str[11];
563 564 565
	int spaces = 0;
	int len;
	int i;
566

567
	trace_find_cmdline(pid, comm);
568 569 570 571 572 573 574 575 576 577
	comm[7] = '\0';
	sprintf(pid_str, "%d", pid);

	/* 1 stands for the "-" character */
	len = strlen(comm) + strlen(pid_str) + 1;

	if (len < TRACE_GRAPH_PROCINFO_LENGTH)
		spaces = TRACE_GRAPH_PROCINFO_LENGTH - len;

	/* First spaces to align center */
578 579
	for (i = 0; i < spaces / 2; i++)
		trace_seq_putc(s, ' ');
580

581
	trace_seq_printf(s, "%s-%s", comm, pid_str);
582 583

	/* Last spaces to align center */
584 585
	for (i = 0; i < spaces - (spaces / 2); i++)
		trace_seq_putc(s, ' ');
586 587
}

588

589
static void print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
590
{
591 592
	trace_seq_putc(s, ' ');
	trace_print_lat_fmt(s, entry);
593 594
}

595
/* If the pid changed since the last trace, output this event */
596
static void
597
verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
598
{
599
	pid_t prev_pid;
600
	pid_t *last_pid;
601

602
	if (!data)
603
		return;
604

605
	last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
606 607

	if (*last_pid == pid)
608
		return;
609

610 611
	prev_pid = *last_pid;
	*last_pid = pid;
612

613
	if (prev_pid == -1)
614
		return;
615 616 617 618 619 620 621 622
/*
 * Context-switch trace line:

 ------------------------------------------
 | 1)  migration/0--1  =>  sshd-1755
 ------------------------------------------

 */
623 624 625 626 627 628
	trace_seq_puts(s, " ------------------------------------------\n");
	print_graph_cpu(s, cpu);
	print_graph_proc(s, prev_pid);
	trace_seq_puts(s, " => ");
	print_graph_proc(s, pid);
	trace_seq_puts(s, "\n ------------------------------------------\n\n");
629 630
}

631 632
static struct ftrace_graph_ret_entry *
get_return_for_leaf(struct trace_iterator *iter,
633 634
		struct ftrace_graph_ent_entry *curr)
{
635 636
	struct fgraph_data *data = iter->private;
	struct ring_buffer_iter *ring_iter = NULL;
637 638 639
	struct ring_buffer_event *event;
	struct ftrace_graph_ret_entry *next;

640 641 642 643 644 645 646 647
	/*
	 * If the previous output failed to write to the seq buffer,
	 * then we just reuse the data from before.
	 */
	if (data && data->failed) {
		curr = &data->ent;
		next = &data->ret;
	} else {
648

649
		ring_iter = trace_buffer_iter(iter, iter->cpu);
650 651 652 653 654 655 656 657 658

		/* First peek to compare current entry and the next one */
		if (ring_iter)
			event = ring_buffer_iter_peek(ring_iter, NULL);
		else {
			/*
			 * We need to consume the current entry to see
			 * the next one.
			 */
659
			ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu,
660
					    NULL, NULL);
661
			event = ring_buffer_peek(iter->trace_buffer->buffer, iter->cpu,
662
						 NULL, NULL);
663
		}
664

665 666 667 668
		if (!event)
			return NULL;

		next = ring_buffer_event_data(event);
669

670 671 672 673 674 675
		if (data) {
			/*
			 * Save current and next entries for later reference
			 * if the output fails.
			 */
			data->ent = *curr;
676 677 678 679 680 681 682 683 684
			/*
			 * If the next event is not a return type, then
			 * we only care about what type it is. Otherwise we can
			 * safely copy the entire event.
			 */
			if (next->ent.type == TRACE_GRAPH_RET)
				data->ret = *next;
			else
				data->ret.ent.type = next->ent.type;
685 686
		}
	}
687 688

	if (next->ent.type != TRACE_GRAPH_RET)
689
		return NULL;
690 691 692

	if (curr->ent.pid != next->ent.pid ||
			curr->graph_ent.func != next->ret.func)
693
		return NULL;
694

695 696 697 698 699
	/* this is a leaf, now advance the iterator */
	if (ring_iter)
		ring_buffer_read(ring_iter, NULL);

	return next;
700 701
}

702
static void print_graph_abs_time(u64 t, struct trace_seq *s)
703 704 705 706 707 708
{
	unsigned long usecs_rem;

	usecs_rem = do_div(t, NSEC_PER_SEC);
	usecs_rem /= 1000;

709 710
	trace_seq_printf(s, "%5lu.%06lu |  ",
			 (unsigned long)t, usecs_rem);
711 712
}

713
static void
714
print_graph_irq(struct trace_iterator *iter, unsigned long addr,
715
		enum trace_type type, int cpu, pid_t pid, u32 flags)
716
{
717
	struct trace_array *tr = iter->tr;
718
	struct trace_seq *s = &iter->seq;
719
	struct trace_entry *ent = iter->ent;
720 721 722

	if (addr < (unsigned long)__irqentry_text_start ||
		addr >= (unsigned long)__irqentry_text_end)
723
		return;
724

725
	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
726
		/* Absolute time */
727 728
		if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
			print_graph_abs_time(iter->ts, s);
729

730
		/* Cpu */
731 732
		if (flags & TRACE_GRAPH_PRINT_CPU)
			print_graph_cpu(s, cpu);
733

734 735
		/* Proc */
		if (flags & TRACE_GRAPH_PRINT_PROC) {
736 737
			print_graph_proc(s, pid);
			trace_seq_puts(s, " | ");
738
		}
739 740

		/* Latency format */
741
		if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
742
			print_graph_lat_fmt(s, ent);
743
	}
744

745
	/* No overhead */
746
	print_graph_duration(tr, 0, s, flags | FLAGS_FILL_START);
747

748
	if (type == TRACE_GRAPH_ENT)
749
		trace_seq_puts(s, "==========>");
750
	else
751
		trace_seq_puts(s, "<==========");
752

753
	print_graph_duration(tr, 0, s, flags | FLAGS_FILL_END);
754
	trace_seq_putc(s, '\n');
755
}
756

757
void
758
trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
759 760
{
	unsigned long nsecs_rem = do_div(duration, 1000);
761
	/* log10(ULONG_MAX) + '\0' */
762
	char usecs_str[21];
763
	char nsecs_str[5];
764
	int len;
765 766
	int i;

767
	sprintf(usecs_str, "%lu", (unsigned long) duration);
768 769

	/* Print msecs */
770
	trace_seq_printf(s, "%s", usecs_str);
771

772
	len = strlen(usecs_str);
773 774 775

	/* Print nsecs (we don't want to exceed 7 numbers) */
	if (len < 7) {
776 777 778
		size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len);

		snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
779
		trace_seq_printf(s, ".%s", nsecs_str);
780
		len += strlen(nsecs_str) + 1;
781 782
	}

783
	trace_seq_puts(s, " us ");
784 785

	/* Print remaining spaces to fit the row's width */
786
	for (i = len; i < 8; i++)
787
		trace_seq_putc(s, ' ');
788 789
}

790
static void
791 792
print_graph_duration(struct trace_array *tr, unsigned long long duration,
		     struct trace_seq *s, u32 flags)
793
{
794
	if (!(flags & TRACE_GRAPH_PRINT_DURATION) ||
795
	    !(tr->trace_flags & TRACE_ITER_CONTEXT_INFO))
796
		return;
797 798

	/* No real adata, just filling the column with spaces */
799 800
	switch (flags & TRACE_GRAPH_PRINT_FILL_MASK) {
	case FLAGS_FILL_FULL:
801 802
		trace_seq_puts(s, "              |  ");
		return;
803
	case FLAGS_FILL_START:
804 805
		trace_seq_puts(s, "  ");
		return;
806
	case FLAGS_FILL_END:
807 808
		trace_seq_puts(s, " |");
		return;
809 810 811
	}

	/* Signal a overhead of time execution to the output */
812 813 814
	if (flags & TRACE_GRAPH_PRINT_OVERHEAD)
		trace_seq_printf(s, "%c ", trace_find_mark(duration));
	else
815
		trace_seq_puts(s, "  ");
816

817 818
	trace_print_graph_duration(duration, s);
	trace_seq_puts(s, "|  ");
819 820 821
}

/* Case of a leaf function on its call entry */
822
static enum print_line_t
823
print_graph_entry_leaf(struct trace_iterator *iter,
824
		struct ftrace_graph_ent_entry *entry,
825 826
		struct ftrace_graph_ret_entry *ret_entry,
		struct trace_seq *s, u32 flags)
827
{
828
	struct fgraph_data *data = iter->private;
829
	struct trace_array *tr = iter->tr;
830 831 832
	struct ftrace_graph_ret *graph_ret;
	struct ftrace_graph_ent *call;
	unsigned long long duration;
833
	int i;
834

835 836 837 838
	graph_ret = &ret_entry->ret;
	call = &entry->graph_ent;
	duration = graph_ret->rettime - graph_ret->calltime;

839
	if (data) {
840
		struct fgraph_cpu_data *cpu_data;
841
		int cpu = iter->cpu;
842 843

		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
844 845 846 847 848 849

		/*
		 * Comments display at + 1 to depth. Since
		 * this is a leaf function, keep the comments
		 * equal to this depth.
		 */
850 851 852 853 854
		cpu_data->depth = call->depth - 1;

		/* No need to keep this function around for this depth */
		if (call->depth < FTRACE_RETFUNC_DEPTH)
			cpu_data->enter_funcs[call->depth] = 0;
855 856
	}

857
	/* Overhead and duration */
858
	print_graph_duration(tr, duration, s, flags);
859

860
	/* Function */
861 862
	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
		trace_seq_putc(s, ' ');
863

864
	trace_seq_printf(s, "%ps();\n", (void *)call->func);
865

866
	return trace_handle_return(s);
867 868 869
}

static enum print_line_t
870 871
print_graph_entry_nested(struct trace_iterator *iter,
			 struct ftrace_graph_ent_entry *entry,
872
			 struct trace_seq *s, int cpu, u32 flags)
873 874
{
	struct ftrace_graph_ent *call = &entry->graph_ent;
875
	struct fgraph_data *data = iter->private;
876
	struct trace_array *tr = iter->tr;
877 878 879
	int i;

	if (data) {
880
		struct fgraph_cpu_data *cpu_data;
881 882
		int cpu = iter->cpu;

883 884 885 886 887 888
		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
		cpu_data->depth = call->depth;

		/* Save this function pointer to see if the exit matches */
		if (call->depth < FTRACE_RETFUNC_DEPTH)
			cpu_data->enter_funcs[call->depth] = call->func;
889
	}
890

891
	/* No time */
892
	print_graph_duration(tr, 0, s, flags | FLAGS_FILL_FULL);
893

894
	/* Function */
895 896 897 898
	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
		trace_seq_putc(s, ' ');

	trace_seq_printf(s, "%ps() {\n", (void *)call->func);
899

900
	if (trace_seq_has_overflowed(s))
901 902
		return TRACE_TYPE_PARTIAL_LINE;

903 904 905 906 907
	/*
	 * we already consumed the current entry to check the next one
	 * and see if this is a leaf.
	 */
	return TRACE_TYPE_NO_CONSUME;
908 909
}

910
static void
911
print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
912
		     int type, unsigned long addr, u32 flags)
913
{
914
	struct fgraph_data *data = iter->private;
915
	struct trace_entry *ent = iter->ent;
916
	struct trace_array *tr = iter->tr;
917
	int cpu = iter->cpu;
918

919
	/* Pid */
920
	verif_pid(s, ent->pid, cpu, data);
921

922
	if (type)
923
		/* Interrupt */
924
		print_graph_irq(iter, addr, type, cpu, ent->pid, flags);
925

926
	if (!(tr->trace_flags & TRACE_ITER_CONTEXT_INFO))
927
		return;
928

929
	/* Absolute time */
930 931
	if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
		print_graph_abs_time(iter->ts, s);
932

933
	/* Cpu */
934 935
	if (flags & TRACE_GRAPH_PRINT_CPU)
		print_graph_cpu(s, cpu);
936 937

	/* Proc */
938
	if (flags & TRACE_GRAPH_PRINT_PROC) {
939 940
		print_graph_proc(s, ent->pid);
		trace_seq_puts(s, " | ");
941
	}
942

943
	/* Latency format */
944
	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
945
		print_graph_lat_fmt(s, ent);
946

947
	return;
948 949
}

950 951 952 953 954
/*
 * Entry check for irq code
 *
 * returns 1 if
 *  - we are inside irq code
Lucas De Marchi's avatar
Lucas De Marchi committed
955
 *  - we just entered irq code
956 957 958 959 960 961 962 963 964 965
 *
 * retunns 0 if
 *  - funcgraph-interrupts option is set
 *  - we are not inside irq code
 */
static int
check_irq_entry(struct trace_iterator *iter, u32 flags,
		unsigned long addr, int depth)
{
	int cpu = iter->cpu;
966
	int *depth_irq;
967 968
	struct fgraph_data *data = iter->private;

969 970 971 972 973 974 975
	/*
	 * If we are either displaying irqs, or we got called as
	 * a graph event and private data does not exist,
	 * then we bypass the irq check.
	 */
	if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
	    (!data))
976 977
		return 0;

978 979
	depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);

980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
	/*
	 * We are inside the irq code
	 */
	if (*depth_irq >= 0)
		return 1;

	if ((addr < (unsigned long)__irqentry_text_start) ||
	    (addr >= (unsigned long)__irqentry_text_end))
		return 0;

	/*
	 * We are entering irq code.
	 */
	*depth_irq = depth;
	return 1;
}

/*
 * Return check for irq code
 *
 * returns 1 if
 *  - we are inside irq code
 *  - we just left irq code
 *
 * returns 0 if
 *  - funcgraph-interrupts option is set
 *  - we are not inside irq code
 */
static int
check_irq_return(struct trace_iterator *iter, u32 flags, int depth)
{
	int cpu = iter->cpu;
1012
	int *depth_irq;
1013 1014
	struct fgraph_data *data = iter->private;

1015 1016 1017 1018 1019 1020 1021
	/*
	 * If we are either displaying irqs, or we got called as
	 * a graph event and private data does not exist,
	 * then we bypass the irq check.
	 */
	if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
	    (!data))
1022 1023
		return 0;

1024 1025
	depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq);

1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051
	/*
	 * We are not inside the irq code.
	 */
	if (*depth_irq == -1)
		return 0;

	/*
	 * We are inside the irq code, and this is returning entry.
	 * Let's not trace it and clear the entry depth, since
	 * we are out of irq code.
	 *
	 * This condition ensures that we 'leave the irq code' once
	 * we are out of the entry depth. Thus protecting us from
	 * the RETURN entry loss.
	 */
	if (*depth_irq >= depth) {
		*depth_irq = -1;
		return 1;
	}

	/*
	 * We are inside the irq code, and this is not the entry.
	 */
	return 1;
}

1052 1053
static enum print_line_t
print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
1054
			struct trace_iterator *iter, u32 flags)
1055
{
1056
	struct fgraph_data *data = iter->private;
1057 1058
	struct ftrace_graph_ent *call = &field->graph_ent;
	struct ftrace_graph_ret_entry *leaf_ret;
1059 1060
	static enum print_line_t ret;
	int cpu = iter->cpu;
1061

1062 1063 1064
	if (check_irq_entry(iter, flags, call->func, call->depth))
		return TRACE_TYPE_HANDLED;

1065
	print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags);
1066

1067 1068
	leaf_ret = get_return_for_leaf(iter, field);
	if (leaf_ret)
1069
		ret = print_graph_entry_leaf(iter, field, leaf_ret, s, flags);
1070
	else
1071
		ret = print_graph_entry_nested(iter, field, s, cpu, flags);
1072

1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
	if (data) {
		/*
		 * If we failed to write our output, then we need to make
		 * note of it. Because we already consumed our entry.
		 */
		if (s->full) {
			data->failed = 1;
			data->cpu = cpu;
		} else
			data->failed = 0;
	}

	return ret;
1086 1087
}

1088 1089
static enum print_line_t
print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
1090 1091
		   struct trace_entry *ent, struct trace_iterator *iter,
		   u32 flags)
1092
{
1093
	unsigned long long duration = trace->rettime - trace->calltime;
1094
	struct fgraph_data *data = iter->private;
1095
	struct trace_array *tr = iter->tr;
1096 1097
	pid_t pid = ent->pid;
	int cpu = iter->cpu;
1098
	int func_match = 1;
1099 1100
	int i;

1101 1102 1103
	if (check_irq_return(iter, flags, trace->depth))
		return TRACE_TYPE_HANDLED;

1104
	if (data) {
1105 1106 1107 1108
		struct fgraph_cpu_data *cpu_data;
		int cpu = iter->cpu;

		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
1109 1110 1111 1112 1113 1114

		/*
		 * Comments display at + 1 to depth. This is the
		 * return from a function, we now want the comments
		 * to display at the same level of the bracket.
		 */
1115 1116 1117 1118 1119 1120 1121
		cpu_data->depth = trace->depth - 1;

		if (trace->depth < FTRACE_RETFUNC_DEPTH) {
			if (cpu_data->enter_funcs[trace->depth] != trace->func)
				func_match = 0;
			cpu_data->enter_funcs[trace->depth] = 0;
		}
1122
	}
1123

1124
	print_graph_prologue(iter, s, 0, 0, flags);
1125

1126
	/* Overhead and duration */
1127
	print_graph_duration(tr, duration, s, flags);
1128

1129
	/* Closing brace */
1130 1131
	for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++)
		trace_seq_putc(s, ' ');
1132