trace_functions_graph.c 38.4 KB
Newer Older
1 2 3
/*
 *
 * Function graph tracer.
4
 * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
5 6 7 8 9 10
 * Mostly borrowed from function tracer which
 * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
 *
 */
#include <linux/uaccess.h>
#include <linux/ftrace.h>
11
#include <linux/interrupt.h>
12
#include <linux/slab.h>
13 14 15
#include <linux/fs.h>

#include "trace.h"
16
#include "trace_output.h"
17

18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
static bool kill_ftrace_graph;

/**
 * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called
 *
 * ftrace_graph_stop() is called when a severe error is detected in
 * the function graph tracing. This function is called by the critical
 * paths of function graph to keep those paths from doing any more harm.
 */
bool ftrace_graph_is_dead(void)
{
	return kill_ftrace_graph;
}

/**
 * ftrace_graph_stop - set to permanently disable function graph tracincg
 *
 * In case of an error int function graph tracing, this is called
 * to try to keep function graph tracing from causing any more harm.
 * Usually this is pretty severe and this is called to try to at least
 * get a warning out to the user.
 */
void ftrace_graph_stop(void)
{
	kill_ftrace_graph = true;
}

45 46 47
/* When set, irq functions will be ignored */
static int ftrace_graph_skip_irqs;

48
struct fgraph_cpu_data {
49 50
	pid_t		last_pid;
	int		depth;
51
	int		depth_irq;
52
	int		ignore;
53
	unsigned long	enter_funcs[FTRACE_RETFUNC_DEPTH];
54 55 56
};

struct fgraph_data {
57
	struct fgraph_cpu_data __percpu *cpu_data;
58 59 60 61 62 63

	/* Place to preserve last processed entry. */
	struct ftrace_graph_ent_entry	ent;
	struct ftrace_graph_ret_entry	ret;
	int				failed;
	int				cpu;
64 65
};

66
#define TRACE_GRAPH_INDENT	2
67

68 69
static unsigned int max_depth;

70
static struct tracer_opt trace_opts[] = {
71
	/* Display overruns? (for self-debug purpose) */
72 73 74 75 76
	{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
	/* Display CPU ? */
	{ TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
	/* Display Overhead ? */
	{ TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
77 78
	/* Display proc name/pid */
	{ TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
79 80 81 82
	/* Display duration of execution */
	{ TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
	/* Display absolute time of an entry */
	{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
83 84
	/* Display interrupts */
	{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
85 86
	/* Display function name after trailing } */
	{ TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
87 88 89 90
	/* Include sleep time (scheduled out) between entry and return */
	{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
	/* Include time within nested functions */
	{ TRACER_OPT(graph-time, TRACE_GRAPH_GRAPH_TIME) },
91 92 93 94
	{ } /* Empty entry */
};

static struct tracer_flags tracer_flags = {
95
	/* Don't display overruns, proc, or tail by default */
96
	.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
97 98
	       TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS |
	       TRACE_GRAPH_SLEEP_TIME | TRACE_GRAPH_GRAPH_TIME,
99 100 101
	.opts = trace_opts
};

102
static struct trace_array *graph_array;
103

104 105 106 107 108 109
/*
 * DURATION column is being also used to display IRQ signs,
 * following values are used by print_graph_irq and others
 * to fill in space into DURATION column.
 */
enum {
110 111 112
	FLAGS_FILL_FULL  = 1 << TRACE_GRAPH_PRINT_FILL_SHIFT,
	FLAGS_FILL_START = 2 << TRACE_GRAPH_PRINT_FILL_SHIFT,
	FLAGS_FILL_END   = 3 << TRACE_GRAPH_PRINT_FILL_SHIFT,
113 114
};

115
static void
116 117
print_graph_duration(struct trace_array *tr, unsigned long long duration,
		     struct trace_seq *s, u32 flags);
118

119 120
/* Add a function return address to the trace stack on thread info.*/
int
121
ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
122
			 unsigned long frame_pointer, unsigned long *retp)
123
{
124
	unsigned long long calltime;
125 126
	int index;

127 128 129
	if (unlikely(ftrace_graph_is_dead()))
		return -EBUSY;

130 131 132
	if (!current->ret_stack)
		return -EBUSY;

133 134 135 136 137 138
	/*
	 * We must make sure the ret_stack is tested before we read
	 * anything else.
	 */
	smp_rmb();

139 140 141 142 143 144
	/* The return trace stack is full */
	if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
		atomic_inc(&current->trace_overrun);
		return -EBUSY;
	}

145 146 147 148 149 150 151 152 153 154 155 156 157 158
	/*
	 * The curr_ret_stack is an index to ftrace return stack of
	 * current task.  Its value should be in [0, FTRACE_RETFUNC_
	 * DEPTH) when the function graph tracer is used.  To support
	 * filtering out specific functions, it makes the index
	 * negative by subtracting huge value (FTRACE_NOTRACE_DEPTH)
	 * so when it sees a negative index the ftrace will ignore
	 * the record.  And the index gets recovered when returning
	 * from the filtered function by adding the FTRACE_NOTRACE_
	 * DEPTH and then it'll continue to record functions normally.
	 *
	 * The curr_ret_stack is initialized to -1 and get increased
	 * in this function.  So it can be less than -1 only if it was
	 * filtered out via ftrace_graph_notrace_addr() which can be
159
	 * set from set_graph_notrace file in tracefs by user.
160 161 162 163
	 */
	if (current->curr_ret_stack < -1)
		return -EBUSY;

164 165
	calltime = trace_clock_local();

166
	index = ++current->curr_ret_stack;
167 168
	if (ftrace_graph_notrace_addr(func))
		current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
169 170 171
	barrier();
	current->ret_stack[index].ret = ret;
	current->ret_stack[index].func = func;
172
	current->ret_stack[index].calltime = calltime;
173
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
174
	current->ret_stack[index].fp = frame_pointer;
175 176 177
#endif
#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
	current->ret_stack[index].retp = retp;
178
#endif
179
	*depth = current->curr_ret_stack;
180 181 182 183 184

	return 0;
}

/* Retrieve a function return address to the trace stack on thread info.*/
185
static void
186 187
ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
			unsigned long frame_pointer)
188 189 190 191 192
{
	int index;

	index = current->curr_ret_stack;

193 194 195 196 197 198 199 200 201 202 203
	/*
	 * A negative index here means that it's just returned from a
	 * notrace'd function.  Recover index to get an original
	 * return address.  See ftrace_push_return_trace().
	 *
	 * TODO: Need to check whether the stack gets corrupted.
	 */
	if (index < 0)
		index += FTRACE_NOTRACE_DEPTH;

	if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
204 205 206 207 208 209 210
		ftrace_graph_stop();
		WARN_ON(1);
		/* Might as well panic, otherwise we have no where to go */
		*ret = (unsigned long)panic;
		return;
	}

211
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
212 213 214 215 216 217 218 219 220 221
	/*
	 * The arch may choose to record the frame pointer used
	 * and check it here to make sure that it is what we expect it
	 * to be. If gcc does not set the place holder of the return
	 * address in the frame pointer, and does a copy instead, then
	 * the function graph trace will fail. This test detects this
	 * case.
	 *
	 * Currently, x86_32 with optimize for size (-Os) makes the latest
	 * gcc do the above.
222 223 224
	 *
	 * Note, -mfentry does not use frame pointers, and this test
	 *  is not needed if CC_USING_FENTRY is set.
225 226 227 228
	 */
	if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
		ftrace_graph_stop();
		WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
229
		     "  from func %ps return to %lx\n",
230 231 232 233 234 235 236 237 238
		     current->ret_stack[index].fp,
		     frame_pointer,
		     (void *)current->ret_stack[index].func,
		     current->ret_stack[index].ret);
		*ret = (unsigned long)panic;
		return;
	}
#endif

239 240 241 242 243 244 245 246 247 248 249
	*ret = current->ret_stack[index].ret;
	trace->func = current->ret_stack[index].func;
	trace->calltime = current->ret_stack[index].calltime;
	trace->overrun = atomic_read(&current->trace_overrun);
	trace->depth = index;
}

/*
 * Send the trace to the ring-buffer.
 * @return the original return address.
 */
250
unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
251 252 253 254
{
	struct ftrace_graph_ret trace;
	unsigned long ret;

255
	ftrace_pop_return_trace(&trace, &ret, frame_pointer);
256
	trace.rettime = trace_clock_local();
257 258
	barrier();
	current->curr_ret_stack--;
259 260 261 262 263 264 265 266 267
	/*
	 * The curr_ret_stack can be less than -1 only if it was
	 * filtered out and it's about to return from the function.
	 * Recover the index and continue to trace normal functions.
	 */
	if (current->curr_ret_stack < -1) {
		current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
		return ret;
	}
268

269 270 271 272 273 274 275
	/*
	 * The trace should run after decrementing the ret counter
	 * in case an interrupt were to come in. We don't want to
	 * lose the interrupt if max_depth is set.
	 */
	ftrace_graph_return(&trace);

276 277 278 279 280 281 282 283 284 285
	if (unlikely(!ret)) {
		ftrace_graph_stop();
		WARN_ON(1);
		/* Might as well panic. What else to do? */
		ret = (unsigned long)panic;
	}

	return ret;
}

286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
/**
 * ftrace_graph_ret_addr - convert a potentially modified stack return address
 *			   to its original value
 *
 * This function can be called by stack unwinding code to convert a found stack
 * return address ('ret') to its original value, in case the function graph
 * tracer has modified it to be 'return_to_handler'.  If the address hasn't
 * been modified, the unchanged value of 'ret' is returned.
 *
 * 'idx' is a state variable which should be initialized by the caller to zero
 * before the first call.
 *
 * 'retp' is a pointer to the return address on the stack.  It's ignored if
 * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
 */
#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
				    unsigned long ret, unsigned long *retp)
{
	int index = task->curr_ret_stack;
	int i;

	if (ret != (unsigned long)return_to_handler)
		return ret;

	if (index < -1)
		index += FTRACE_NOTRACE_DEPTH;

	if (index < 0)
		return ret;

	for (i = 0; i <= index; i++)
		if (task->ret_stack[i].retp == retp)
			return task->ret_stack[i].ret;

	return ret;
}
#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
				    unsigned long ret, unsigned long *retp)
{
	int task_idx;

	if (ret != (unsigned long)return_to_handler)
		return ret;

	task_idx = task->curr_ret_stack;

	if (!task->ret_stack || task_idx < *idx)
		return ret;

	task_idx -= *idx;
	(*idx)++;

	return task->ret_stack[task_idx].ret;
}
#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */

344
int __trace_graph_entry(struct trace_array *tr,
345 346 347 348
				struct ftrace_graph_ent *trace,
				unsigned long flags,
				int pc)
{
349
	struct trace_event_call *call = &event_funcgraph_entry;
350
	struct ring_buffer_event *event;
351
	struct ring_buffer *buffer = tr->trace_buffer.buffer;
352 353
	struct ftrace_graph_ent_entry *entry;

354
	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
355 356 357 358 359
					  sizeof(*entry), flags, pc);
	if (!event)
		return 0;
	entry	= ring_buffer_event_data(event);
	entry->graph_ent			= *trace;
360
	if (!call_filter_check_discard(call, entry, buffer, event))
361
		trace_buffer_unlock_commit_nostack(buffer, event);
362 363 364 365

	return 1;
}

366 367
static inline int ftrace_graph_ignore_irqs(void)
{
368
	if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT))
369 370 371 372 373
		return 0;

	return in_irq();
}

374 375 376 377 378 379 380 381 382 383
int trace_graph_entry(struct ftrace_graph_ent *trace)
{
	struct trace_array *tr = graph_array;
	struct trace_array_cpu *data;
	unsigned long flags;
	long disabled;
	int ret;
	int cpu;
	int pc;

384
	if (!ftrace_trace_task(tr))
385 386
		return 0;

387
	/* trace it when it is-nested-in or is a function enabled. */
388
	if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
389
	     ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||
390
	    (max_depth && trace->depth >= max_depth))
391 392
		return 0;

393 394 395 396 397 398 399 400 401 402
	/*
	 * Do not trace a function if it's filtered by set_graph_notrace.
	 * Make the index of ret stack negative to indicate that it should
	 * ignore further functions.  But it needs its own ret stack entry
	 * to recover the original index in order to continue tracing after
	 * returning from the function.
	 */
	if (ftrace_graph_notrace_addr(trace->func))
		return 1;

403 404 405 406 407 408 409
	/*
	 * Stop here if tracing_threshold is set. We only write function return
	 * events to the ring buffer.
	 */
	if (tracing_thresh)
		return 1;

410 411
	local_irq_save(flags);
	cpu = raw_smp_processor_id();
412
	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
413 414 415 416 417 418 419 420 421 422 423 424 425 426
	disabled = atomic_inc_return(&data->disabled);
	if (likely(disabled == 1)) {
		pc = preempt_count();
		ret = __trace_graph_entry(tr, trace, flags, pc);
	} else {
		ret = 0;
	}

	atomic_dec(&data->disabled);
	local_irq_restore(flags);

	return ret;
}

427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
static void
__trace_graph_function(struct trace_array *tr,
		unsigned long ip, unsigned long flags, int pc)
{
	u64 time = trace_clock_local();
	struct ftrace_graph_ent ent = {
		.func  = ip,
		.depth = 0,
	};
	struct ftrace_graph_ret ret = {
		.func     = ip,
		.depth    = 0,
		.calltime = time,
		.rettime  = time,
	};

	__trace_graph_entry(tr, &ent, flags, pc);
	__trace_graph_return(tr, &ret, flags, pc);
}

void
trace_graph_function(struct trace_array *tr,
		unsigned long ip, unsigned long parent_ip,
		unsigned long flags, int pc)
{
	__trace_graph_function(tr, ip, flags, pc);
}

455
void __trace_graph_return(struct trace_array *tr,
456 457 458 459
				struct ftrace_graph_ret *trace,
				unsigned long flags,
				int pc)
{
460
	struct trace_event_call *call = &event_funcgraph_exit;
461
	struct ring_buffer_event *event;
462
	struct ring_buffer *buffer = tr->trace_buffer.buffer;
463 464
	struct ftrace_graph_ret_entry *entry;

465
	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
466 467 468 469 470
					  sizeof(*entry), flags, pc);
	if (!event)
		return;
	entry	= ring_buffer_event_data(event);
	entry->ret				= *trace;
471
	if (!call_filter_check_discard(call, entry, buffer, event))
472
		trace_buffer_unlock_commit_nostack(buffer, event);
473 474 475 476 477 478 479 480 481 482 483 484 485
}

void trace_graph_return(struct ftrace_graph_ret *trace)
{
	struct trace_array *tr = graph_array;
	struct trace_array_cpu *data;
	unsigned long flags;
	long disabled;
	int cpu;
	int pc;

	local_irq_save(flags);
	cpu = raw_smp_processor_id();
486
	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
487 488 489 490 491 492 493 494 495
	disabled = atomic_inc_return(&data->disabled);
	if (likely(disabled == 1)) {
		pc = preempt_count();
		__trace_graph_return(tr, trace, flags, pc);
	}
	atomic_dec(&data->disabled);
	local_irq_restore(flags);
}

496 497 498 499 500 501 502 503 504
void set_graph_array(struct trace_array *tr)
{
	graph_array = tr;

	/* Make graph_array visible before we start tracing */

	smp_mb();
}

505
static void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
506 507 508 509 510 511 512 513
{
	if (tracing_thresh &&
	    (trace->rettime - trace->calltime < tracing_thresh))
		return;
	else
		trace_graph_return(trace);
}

514 515
static int graph_trace_init(struct trace_array *tr)
{
516 517
	int ret;

518
	set_graph_array(tr);
519 520
	if (tracing_thresh)
		ret = register_ftrace_graph(&trace_graph_thresh_return,
521
					    &trace_graph_entry);
522 523 524
	else
		ret = register_ftrace_graph(&trace_graph_return,
					    &trace_graph_entry);
525 526 527 528 529
	if (ret)
		return ret;
	tracing_start_cmdline_record();

	return 0;
530 531 532 533
}

static void graph_trace_reset(struct trace_array *tr)
{
534 535
	tracing_stop_cmdline_record();
	unregister_ftrace_graph();
536 537
}

538
static int graph_trace_update_thresh(struct trace_array *tr)
539 540 541 542 543
{
	graph_trace_reset(tr);
	return graph_trace_init(tr);
}

544
static int max_bytes_for_cpu;
545

546
static void print_graph_cpu(struct trace_seq *s, int cpu)
547
{
548 549 550 551 552
	/*
	 * Start with a space character - to make it stand out
	 * to the right a bit when trace output is pasted into
	 * email:
	 */
553
	trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu);
554 555
}

556 557
#define TRACE_GRAPH_PROCINFO_LENGTH	14

558
static void print_graph_proc(struct trace_seq *s, pid_t pid)
559
{
560
	char comm[TASK_COMM_LEN];
561 562
	/* sign + log10(MAX_INT) + '\0' */
	char pid_str[11];
563 564 565
	int spaces = 0;
	int len;
	int i;
566

567
	trace_find_cmdline(pid, comm);
568 569 570 571 572 573 574 575 576 577
	comm[7] = '\0';
	sprintf(pid_str, "%d", pid);

	/* 1 stands for the "-" character */
	len = strlen(comm) + strlen(pid_str) + 1;

	if (len < TRACE_GRAPH_PROCINFO_LENGTH)
		spaces = TRACE_GRAPH_PROCINFO_LENGTH - len;

	/* First spaces to align center */
578 579
	for (i = 0; i < spaces / 2; i++)
		trace_seq_putc(s, ' ');
580

581
	trace_seq_printf(s, "%s-%s", comm, pid_str);
582 583

	/* Last spaces to align center */
584 585
	for (i = 0; i < spaces - (spaces / 2); i++)
		trace_seq_putc(s, ' ');
586 587
}

588

589
static void print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
590
{
591 592
	trace_seq_putc(s, ' ');
	trace_print_lat_fmt(s, entry);
593 594
}

595
/* If the pid changed since the last trace, output this event */
596
static void
597
verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
598
{
599
	pid_t prev_pid;
600
	pid_t *last_pid;
601

602
	if (!data)
603
		return;
604

605
	last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid);
606 607

	if (*last_pid == pid)
608
		return;
609

610 611
	prev_pid = *last_pid;
	*last_pid = pid;
612

613
	if (prev_pid == -1)
614
		return;
615 616 617 618 619 620 621 622
/*
 * Context-switch trace line:

 ------------------------------------------
 | 1)  migration/0--1  =>  sshd-1755
 ------------------------------------------

 */
623 624 625 626 627 628
	trace_seq_puts(s, " ------------------------------------------\n");
	print_graph_cpu(s, cpu);
	print_graph_proc(s, prev_pid);
	trace_seq_puts(s, " => ");
	print_graph_proc(s, pid);
	trace_seq_puts(s, "\n ------------------------------------------\n\n");
629 630
}

631 632
static struct ftrace_graph_ret_entry *
get_return_for_leaf(struct trace_iterator *iter,
633 634
		struct ftrace_graph_ent_entry *curr)
{
635 636
	struct fgraph_data *data = iter->private;
	struct ring_buffer_iter *ring_iter = NULL;
637 638 639
	struct ring_buffer_event *event;
	struct ftrace_graph_ret_entry *next;

640 641 642 643 644 645 646 647
	/*
	 * If the previous output failed to write to the seq buffer,
	 * then we just reuse the data from before.
	 */
	if (data && data->failed) {
		curr = &data->ent;
		next = &data->ret;
	} else {
648

649
		ring_iter = trace_buffer_iter(iter, iter->cpu);
650 651 652 653 654 655 656 657 658

		/* First peek to compare current entry and the next one */
		if (ring_iter)
			event = ring_buffer_iter_peek(ring_iter, NULL);
		else {
			/*
			 * We need to consume the current entry to see
			 * the next one.
			 */
659
			ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu,
660
					    NULL, NULL);
661
			event = ring_buffer_peek(iter->trace_buffer->buffer, iter->cpu,
662
						 NULL, NULL);
663
		}
664

665 666 667 668
		if (!event)
			return NULL;

		next = ring_buffer_event_data(event);
669

670 671 672 673 674 675
		if (data) {
			/*
			 * Save current and next entries for later reference
			 * if the output fails.
			 */
			data->ent = *curr;
676 677 678 679 680 681 682 683 684
			/*
			 * If the next event is not a return type, then
			 * we only care about what type it is. Otherwise we can
			 * safely copy the entire event.
			 */
			if (next->ent.type == TRACE_GRAPH_RET)
				data->ret = *next;
			else
				data->ret.ent.type = next->ent.type;
685 686
		}
	}
687 688

	if (next->ent.type != TRACE_GRAPH_RET)
689
		return NULL;
690 691 692

	if (curr->ent.pid != next->ent.pid ||
			curr->graph_ent.func != next->ret.func)
693
		return NULL;
694

695 696 697 698 699
	/* this is a leaf, now advance the iterator */
	if (ring_iter)
		ring_buffer_read(ring_iter, NULL);

	return next;
700 701
}

702
static void print_graph_abs_time(u64 t, struct trace_seq *s)
703 704 705 706 707 708
{
	unsigned long usecs_rem;

	usecs_rem = do_div(t, NSEC_PER_SEC);
	usecs_rem /= 1000;

709 710
	trace_seq_printf(s, "%5lu.%06lu |  ",
			 (unsigned long)t, usecs_rem);
711 712
}

713
static void
714
print_graph_irq(struct trace_iterator *iter, unsigned long addr,
715
		enum trace_type type, int cpu, pid_t pid, u32 flags)
716
{
717
	struct trace_array *tr = iter->tr;
718
	struct trace_seq *s = &iter->seq;
719
	struct trace_entry *ent = iter->ent;
720 721 722

	if (addr < (unsigned long)__irqentry_text_start ||
		addr >= (unsigned long)__irqentry_text_end)
723
		return;
724

725
	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
726
		/* Absolute time */
727 728
		if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
			print_graph_abs_time(iter->ts, s);
729

730
		/* Cpu */
731 732
		if (flags & TRACE_GRAPH_PRINT_CPU)
			print_graph_cpu(s, cpu);
733

734 735
		/* Proc */
		if (flags & TRACE_GRAPH_PRINT_PROC) {
736 737
			print_graph_proc(s, pid);
			trace_seq_puts(s, " | ");
738
		}
739 740

		/* Latency format */
741
		if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
742
			print_graph_lat_fmt(s, ent);
743
	}
744

745
	/* No overhead */
746
	print_graph_duration(tr, 0, s, flags | FLAGS_FILL_START);
747

748
	if (type == TRACE_GRAPH_ENT)
749
		trace_seq_puts(s, "==========>");
750
	else
751
		trace_seq_puts(s, "<==========");
752

753
	print_graph_duration(tr, 0, s, flags | FLAGS_FILL_END);
754
	trace_seq_putc(s, '\n');
755
}
756

757
void
758
trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
759 760
{
	unsigned long nsecs_rem = do_div(duration, 1000);
761
	/* log10(ULONG_MAX) + '\0' */
762
	char usecs_str[21];
763
	char nsecs_str[5];
764
	int len;
765 766
	int i;

767
	sprintf(usecs_str, "%lu", (unsigned long) duration);
768 769

	/* Print msecs */
770
	trace_seq_printf(s, "%s", usecs_str);
771

772
	len = strlen(usecs_str);
773 774 775

	/* Print nsecs (we don't want to exceed 7 numbers) */
	if (len < 7) {
776 777 778
		size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len);

		snprintf(nsecs_str, slen, "%03lu", nsecs_rem);
779
		trace_seq_printf(s, ".%s", nsecs_str);
780
		len += strlen(nsecs_str) + 1;
781 782
	}

783
	trace_seq_puts(s, " us ");
784 785

	/* Print remaining spaces to fit the row's width */
786
	for (i = len; i < 8; i++)
787
		trace_seq_putc(s, ' ');
788 789
}

790
static void
791 792
print_graph_duration(struct trace_array *tr, unsigned long long duration,
		     struct trace_seq *s, u32 flags)
793
{
794
	if (!(flags & TRACE_GRAPH_PRINT_DURATION) ||
795
	    !(tr->trace_flags & TRACE_ITER_CONTEXT_INFO))
796
		return;
797 798

	/* No real adata, just filling the column with spaces */
799 800
	switch (flags & TRACE_GRAPH_PRINT_FILL_MASK) {
	case FLAGS_FILL_FULL:
801 802
		trace_seq_puts(s, "              |  ");
		return;
803
	case FLAGS_FILL_START:
804 805
		trace_seq_puts(s, "  ");
		return;
806
	case FLAGS_FILL_END:
807 808
		trace_seq_puts(s, " |");
		return;
809 810 811
	}

	/* Signal a overhead of time execution to the output */
812 813 814
	if (flags & TRACE_GRAPH_PRINT_OVERHEAD)
		trace_seq_printf(s, "%c ", trace_find_mark(duration));
	else
815
		trace_seq_puts(s, "  ");
816

817 818
	trace_print_graph_duration(duration, s);
	trace_seq_puts(s, "|  ");
819 820 821
}

/* Case of a leaf function on its call entry */
822
static enum print_line_t
823
print_graph_entry_leaf(struct trace_iterator *iter,
824
		struct ftrace_graph_ent_entry *entry,
825 826
		struct ftrace_graph_ret_entry *ret_entry,
		struct trace_seq *s, u32 flags)
827
{
828
	struct fgraph_data *data = iter->private;
829
	struct trace_array *tr = iter->tr;
830 831 832
	struct ftrace_graph_ret *graph_ret;
	struct ftrace_graph_ent *call;
	unsigned long long duration;
833
	int i;
834

835 836 837 838
	graph_ret = &ret_entry->ret;
	call = &entry->graph_ent;
	duration = graph_ret->rettime - graph_ret->calltime;

839
	if (data) {
840
		struct fgraph_cpu_data *cpu_data;
841
		int cpu = iter->cpu;
842 843

		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
844

845 846 847 848
		/* If a graph tracer ignored set_graph_notrace */
		if (call->depth < -1)
			call->depth += FTRACE_NOTRACE_DEPTH;

849 850 851 852 853
		/*
		 * Comments display at + 1 to depth. Since
		 * this is a leaf function, keep the comments
		 * equal to this depth.
		 */
854 855 856
		cpu_data->depth = call->depth - 1;

		/* No need to keep this function around for this depth */
857 858
		if (call->depth < FTRACE_RETFUNC_DEPTH &&
		    !WARN_ON_ONCE(call->depth < 0))
859
			cpu_data->enter_funcs[call->depth] = 0;
860 861
	}

862
	/* Overhead and duration */
863
	print_graph_duration(tr, duration, s, flags);
864

865
	/* Function */
866 867
	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
		trace_seq_putc(s, ' ');
868

869
	trace_seq_printf(s, "%ps();\n", (void *)call->func);
870

871
	return trace_handle_return(s);
872 873 874
}

static enum print_line_t
875 876
print_graph_entry_nested(struct trace_iterator *iter,
			 struct ftrace_graph_ent_entry *entry,
877
			 struct trace_seq *s, int cpu, u32 flags)
878 879
{
	struct ftrace_graph_ent *call = &entry->graph_ent;
880
	struct fgraph_data *data = iter->private;
881
	struct trace_array *tr = iter->tr;
882 883 884
	int i;

	if (data) {
885
		struct fgraph_cpu_data *cpu_data;
886 887
		int cpu = iter->cpu;

888 889 890 891
		/* If a graph tracer ignored set_graph_notrace */
		if (call->depth < -1)
			call->depth += FTRACE_NOTRACE_DEPTH;

892 893 894 895
		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
		cpu_data->depth = call->depth;

		/* Save this function pointer to see if the exit matches */
896 897
		if (call->depth < FTRACE_RETFUNC_DEPTH &&
		    !WARN_ON_ONCE(call->depth < 0))
898
			cpu_data->enter_funcs[call->depth] = call->func;
899
	}
900

901
	/* No time */
902
	print_graph_duration(tr, 0, s, flags | FLAGS_FILL_FULL);
903

904
	/* Function */
905 906 907 908
	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++)
		trace_seq_putc(s, ' ');

	trace_seq_printf(s, "%ps() {\n", (void *)call->func);
909

910
	if (trace_seq_has_overflowed(s))
911 912
		return TRACE_TYPE_PARTIAL_LINE;

913 914 915 916 917
	/*
	 * we already consumed the current entry to check the next one
	 * and see if this is a leaf.
	 */
	return TRACE_TYPE_NO_CONSUME;
918 919
}

920
static void
921
print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
922
		     int type, unsigned long addr, u32 flags)
923
{
924
	struct fgraph_data *data = iter->private;
925
	struct trace_entry *ent = iter->ent;
926
	struct trace_array *tr = iter->tr;
927
	int cpu = iter->cpu;
928

929
	/* Pid */
930
	verif_pid(s, ent->pid, cpu, data);
931

932
	if (type)
933
		/* Interrupt */
934
		print_graph_irq(iter, addr, type, cpu, ent->pid, flags);
935

936
	if (!(tr->trace_flags & TRACE_ITER_CONTEXT_INFO))
937
		return;
938

939
	/* Absolute time */
940 941
	if (flags & TRACE_GRAPH_PRINT_ABS_TIME)
		print_graph_abs_time(iter->ts, s);
942

943
	/* Cpu */
944 945
	if (flags & TRACE_GRAPH_PRINT_CPU)
		print_graph_cpu(s, cpu);
946 947

	/* Proc */
948
	if (flags & TRACE_GRAPH_PRINT_PROC) {
949 950
		print_graph_proc(s, ent->pid);
		trace_seq_puts(s, " | ");
951
	}
952

953
	/* Latency format */
954
	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
955
		print_graph_lat_fmt(s, ent);
956

957
	return;
958 959
}

960 961 962 963 964
/*
 * Entry check for irq code
 *
 * returns 1 if
 *  - we are inside irq code
Lucas De Marchi's avatar
Lucas De Marchi committed
965
 *  - we just entered irq code
966 967 968 969 970 971 972 973 974 975
 *
 * retunns 0 if
 *  - funcgraph-interrupts option is set
 *  - we are not inside irq code
 */
static int
check_irq_entry(struct trace_iterator *iter, u32 flags,
		unsigned long addr, int depth)
{
	int cpu = iter->cpu;
976
	int *depth_irq;
977 978
	struct fgraph_data *data = iter->private;

979 980 981 982 983 984 985
	/*
	 * If we are either displaying irqs, or we got called as
	 * a graph event and private data does not exist,
	 * then we bypass the irq check.
	 */
	if ((flags & TRACE_GRAPH_PRINT_IRQS) ||
	    (!data))