trace_syscalls.c 19.7 KB
Newer Older
1
#include <trace/syscall.h>
2
#include <trace/events/syscalls.h>
3
#include <linux/syscalls.h>
4
#include <linux/slab.h>
5
#include <linux/kernel.h>
6
#include <linux/module.h>	/* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
7
#include <linux/ftrace.h>
8
#include <linux/perf_event.h>
9 10 11 12 13
#include <asm/syscall.h>

#include "trace_output.h"
#include "trace.h"

14
static DEFINE_MUTEX(syscall_trace_lock);
15

16
static int syscall_enter_register(struct ftrace_event_call *event,
17
				 enum trace_reg type, void *data);
18
static int syscall_exit_register(struct ftrace_event_call *event,
19
				 enum trace_reg type, void *data);
20

21 22 23 24 25 26 27 28
static struct list_head *
syscall_get_enter_fields(struct ftrace_event_call *call)
{
	struct syscall_metadata *entry = call->data;

	return &entry->enter_fields;
}

29 30
extern struct syscall_metadata *__start_syscalls_metadata[];
extern struct syscall_metadata *__stop_syscalls_metadata[];
31 32 33

static struct syscall_metadata **syscalls_metadata;

34 35 36 37 38 39
#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
{
	/*
	 * Only compare after the "sys" prefix. Archs that use
	 * syscall wrappers may have syscalls symbols aliases prefixed
40
	 * with ".SyS" or ".sys" instead of "sys", leading to an unwanted
41 42 43 44 45 46
	 * mismatch.
	 */
	return !strcmp(sym + 3, name + 3);
}
#endif

47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
#ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
/*
 * Some architectures that allow for 32bit applications
 * to run on a 64bit kernel, do not map the syscalls for
 * the 32bit tasks the same as they do for 64bit tasks.
 *
 *     *cough*x86*cough*
 *
 * In such a case, instead of reporting the wrong syscalls,
 * simply ignore them.
 *
 * For an arch to ignore the compat syscalls it needs to
 * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as
 * define the function arch_trace_is_compat_syscall() to let
 * the tracing system know that it should ignore it.
 */
static int
trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
{
	if (unlikely(arch_trace_is_compat_syscall(regs)))
		return -1;

	return syscall_get_nr(task, regs);
}
#else
static inline int
trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
{
	return syscall_get_nr(task, regs);
}
#endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */

79 80
static __init struct syscall_metadata *
find_syscall_meta(unsigned long syscall)
81
{
82 83
	struct syscall_metadata **start;
	struct syscall_metadata **stop;
84 85 86
	char str[KSYM_SYMBOL_LEN];


87 88
	start = __start_syscalls_metadata;
	stop = __stop_syscalls_metadata;
89 90
	kallsyms_lookup(syscall, NULL, NULL, NULL, str);

91 92 93
	if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
		return NULL;

94
	for ( ; start < stop; start++) {
95
		if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
96
			return *start;
97 98 99 100 101 102 103 104 105 106 107 108
	}
	return NULL;
}

static struct syscall_metadata *syscall_nr_to_meta(int nr)
{
	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
		return NULL;

	return syscalls_metadata[nr];
}

109
static enum print_line_t
110 111
print_syscall_enter(struct trace_iterator *iter, int flags,
		    struct trace_event *event)
112 113 114 115 116 117 118
{
	struct trace_seq *s = &iter->seq;
	struct trace_entry *ent = iter->ent;
	struct syscall_trace_enter *trace;
	struct syscall_metadata *entry;
	int i, ret, syscall;

119
	trace = (typeof(trace))ent;
120 121
	syscall = trace->nr;
	entry = syscall_nr_to_meta(syscall);
122

123 124 125
	if (!entry)
		goto end;

126
	if (entry->enter_event->event.type != ent->type) {
127 128 129 130
		WARN_ON_ONCE(1);
		goto end;
	}

131 132 133 134 135 136
	ret = trace_seq_printf(s, "%s(", entry->name);
	if (!ret)
		return TRACE_TYPE_PARTIAL_LINE;

	for (i = 0; i < entry->nb_args; i++) {
		/* parameter types */
137
		if (trace_flags & TRACE_ITER_VERBOSE) {
138 139 140 141 142
			ret = trace_seq_printf(s, "%s ", entry->types[i]);
			if (!ret)
				return TRACE_TYPE_PARTIAL_LINE;
		}
		/* parameter values */
143
		ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
144
				       trace->args[i],
145
				       i == entry->nb_args - 1 ? "" : ", ");
146 147 148 149
		if (!ret)
			return TRACE_TYPE_PARTIAL_LINE;
	}

150 151 152 153
	ret = trace_seq_putc(s, ')');
	if (!ret)
		return TRACE_TYPE_PARTIAL_LINE;

154
end:
155 156 157 158
	ret =  trace_seq_putc(s, '\n');
	if (!ret)
		return TRACE_TYPE_PARTIAL_LINE;

159 160 161
	return TRACE_TYPE_HANDLED;
}

162
static enum print_line_t
163 164
print_syscall_exit(struct trace_iterator *iter, int flags,
		   struct trace_event *event)
165 166 167 168 169 170 171 172
{
	struct trace_seq *s = &iter->seq;
	struct trace_entry *ent = iter->ent;
	struct syscall_trace_exit *trace;
	int syscall;
	struct syscall_metadata *entry;
	int ret;

173
	trace = (typeof(trace))ent;
174 175
	syscall = trace->nr;
	entry = syscall_nr_to_meta(syscall);
176

177
	if (!entry) {
178
		trace_seq_putc(s, '\n');
179 180 181
		return TRACE_TYPE_HANDLED;
	}

182
	if (entry->exit_event->event.type != ent->type) {
183 184 185 186
		WARN_ON_ONCE(1);
		return TRACE_TYPE_UNHANDLED;
	}

187 188 189 190 191 192 193 194
	ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
				trace->ret);
	if (!ret)
		return TRACE_TYPE_PARTIAL_LINE;

	return TRACE_TYPE_HANDLED;
}

195 196 197 198 199
extern char *__bad_type_size(void);

#define SYSCALL_FIELD(type, name)					\
	sizeof(type) != sizeof(trace.name) ?				\
		__bad_type_size() :					\
200 201
		#type, #name, offsetof(typeof(trace), name),		\
		sizeof(trace.name), is_signed_type(type)
202

203 204
static int __init
__set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
{
	int i;
	int pos = 0;

	/* When len=0, we just calculate the needed length */
#define LEN_OR_ZERO (len ? len - pos : 0)

	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
	for (i = 0; i < entry->nb_args; i++) {
		pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
				entry->args[i], sizeof(unsigned long),
				i == entry->nb_args - 1 ? "" : ", ");
	}
	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");

	for (i = 0; i < entry->nb_args; i++) {
		pos += snprintf(buf + pos, LEN_OR_ZERO,
				", ((unsigned long)(REC->%s))", entry->args[i]);
	}

#undef LEN_OR_ZERO

	/* return the length of print_fmt */
	return pos;
}

231
static int __init set_syscall_print_fmt(struct ftrace_event_call *call)
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
{
	char *print_fmt;
	int len;
	struct syscall_metadata *entry = call->data;

	if (entry->enter_event != call) {
		call->print_fmt = "\"0x%lx\", REC->ret";
		return 0;
	}

	/* First: called with 0 length to calculate the needed length */
	len = __set_enter_print_fmt(entry, NULL, 0);

	print_fmt = kmalloc(len + 1, GFP_KERNEL);
	if (!print_fmt)
		return -ENOMEM;

	/* Second: actually write the @print_fmt */
	__set_enter_print_fmt(entry, print_fmt, len + 1);
	call->print_fmt = print_fmt;

	return 0;
}

256
static void __init free_syscall_print_fmt(struct ftrace_event_call *call)
257 258 259 260 261 262 263
{
	struct syscall_metadata *entry = call->data;

	if (entry->enter_event == call)
		kfree(call->print_fmt);
}

264
static int __init syscall_enter_define_fields(struct ftrace_event_call *call)
265 266
{
	struct syscall_trace_enter trace;
267
	struct syscall_metadata *meta = call->data;
268 269 270 271
	int ret;
	int i;
	int offset = offsetof(typeof(trace), args);

272 273 274 275
	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
	if (ret)
		return ret;

276
	for (i = 0; i < meta->nb_args; i++) {
277 278
		ret = trace_define_field(call, meta->types[i],
					 meta->args[i], offset,
279 280
					 sizeof(unsigned long), 0,
					 FILTER_OTHER);
281 282 283 284 285 286
		offset += sizeof(unsigned long);
	}

	return ret;
}

287
static int __init syscall_exit_define_fields(struct ftrace_event_call *call)
288 289 290 291
{
	struct syscall_trace_exit trace;
	int ret;

292 293 294 295
	ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
	if (ret)
		return ret;

296
	ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
297
				 FILTER_OTHER);
298 299 300 301

	return ret;
}

302
static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
303
{
304
	struct trace_array *tr = data;
305
	struct ftrace_event_file *ftrace_file;
306 307 308
	struct syscall_trace_enter *entry;
	struct syscall_metadata *sys_data;
	struct ring_buffer_event *event;
309
	enum event_trigger_type tt = ETT_NONE;
310
	struct ring_buffer *buffer;
311
	unsigned long irq_flags;
312
	unsigned long eflags;
313
	int pc;
314
	int syscall_nr;
315
	int size;
316

317
	syscall_nr = trace_get_syscall_nr(current, regs);
318 319
	if (syscall_nr < 0)
		return;
320 321 322 323 324 325

	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
	ftrace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
	if (!ftrace_file)
		return;

326 327 328 329 330 331 332 333
	eflags = ftrace_file->flags;

	if (!(eflags & FTRACE_EVENT_FL_TRIGGER_COND)) {
		if (eflags & FTRACE_EVENT_FL_TRIGGER_MODE)
			event_triggers_call(ftrace_file, NULL);
		if (eflags & FTRACE_EVENT_FL_SOFT_DISABLED)
			return;
	}
334

335 336 337 338 339 340
	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
		return;

	size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;

341 342 343
	local_save_flags(irq_flags);
	pc = preempt_count();

344
	buffer = tr->trace_buffer.buffer;
345
	event = trace_buffer_lock_reserve(buffer,
346
			sys_data->enter_event->event.type, size, irq_flags, pc);
347 348 349 350 351 352 353
	if (!event)
		return;

	entry = ring_buffer_event_data(event);
	entry->nr = syscall_nr;
	syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);

354
	if (eflags & FTRACE_EVENT_FL_TRIGGER_COND)
355
		tt = event_triggers_call(ftrace_file, entry);
356 357 358 359

	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
		ring_buffer_discard_commit(buffer, event);
	else if (!filter_check_discard(ftrace_file, entry, buffer, event))
360 361
		trace_current_buffer_unlock_commit(buffer, event,
						   irq_flags, pc);
362 363
	if (tt)
		event_triggers_post_call(ftrace_file, tt);
364 365
}

366
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
367
{
368
	struct trace_array *tr = data;
369
	struct ftrace_event_file *ftrace_file;
370 371 372
	struct syscall_trace_exit *entry;
	struct syscall_metadata *sys_data;
	struct ring_buffer_event *event;
373
	enum event_trigger_type tt = ETT_NONE;
374
	struct ring_buffer *buffer;
375
	unsigned long irq_flags;
376
	unsigned long eflags;
377
	int pc;
378 379
	int syscall_nr;

380
	syscall_nr = trace_get_syscall_nr(current, regs);
381 382
	if (syscall_nr < 0)
		return;
383 384 385 386 387 388

	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
	ftrace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
	if (!ftrace_file)
		return;

389 390 391 392 393 394 395 396
	eflags = ftrace_file->flags;

	if (!(eflags & FTRACE_EVENT_FL_TRIGGER_COND)) {
		if (eflags & FTRACE_EVENT_FL_TRIGGER_MODE)
			event_triggers_call(ftrace_file, NULL);
		if (eflags & FTRACE_EVENT_FL_SOFT_DISABLED)
			return;
	}
397

398 399 400 401
	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
		return;

402 403 404
	local_save_flags(irq_flags);
	pc = preempt_count();

405
	buffer = tr->trace_buffer.buffer;
406
	event = trace_buffer_lock_reserve(buffer,
407 408
			sys_data->exit_event->event.type, sizeof(*entry),
			irq_flags, pc);
409 410 411 412 413 414 415
	if (!event)
		return;

	entry = ring_buffer_event_data(event);
	entry->nr = syscall_nr;
	entry->ret = syscall_get_return_value(current, regs);

416
	if (eflags & FTRACE_EVENT_FL_TRIGGER_COND)
417
		tt = event_triggers_call(ftrace_file, entry);
418 419 420 421

	if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags))
		ring_buffer_discard_commit(buffer, event);
	else if (!filter_check_discard(ftrace_file, entry, buffer, event))
422 423
		trace_current_buffer_unlock_commit(buffer, event,
						   irq_flags, pc);
424 425
	if (tt)
		event_triggers_post_call(ftrace_file, tt);
426 427
}

428 429
static int reg_event_syscall_enter(struct ftrace_event_file *file,
				   struct ftrace_event_call *call)
430
{
431
	struct trace_array *tr = file->tr;
432 433 434
	int ret = 0;
	int num;

435
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
436
	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
437 438
		return -ENOSYS;
	mutex_lock(&syscall_trace_lock);
439 440
	if (!tr->sys_refcount_enter)
		ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
441
	if (!ret) {
442
		rcu_assign_pointer(tr->enter_syscall_files[num], file);
443
		tr->sys_refcount_enter++;
444 445 446
	}
	mutex_unlock(&syscall_trace_lock);
	return ret;
447 448
}

449 450
static void unreg_event_syscall_enter(struct ftrace_event_file *file,
				      struct ftrace_event_call *call)
451
{
452
	struct trace_array *tr = file->tr;
453
	int num;
454

455
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
456
	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
457 458
		return;
	mutex_lock(&syscall_trace_lock);
459
	tr->sys_refcount_enter--;
460
	rcu_assign_pointer(tr->enter_syscall_files[num], NULL);
461 462
	if (!tr->sys_refcount_enter)
		unregister_trace_sys_enter(ftrace_syscall_enter, tr);
463 464
	mutex_unlock(&syscall_trace_lock);
}
465

466 467
static int reg_event_syscall_exit(struct ftrace_event_file *file,
				  struct ftrace_event_call *call)
468
{
469
	struct trace_array *tr = file->tr;
470 471 472
	int ret = 0;
	int num;

473
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
474
	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
475 476
		return -ENOSYS;
	mutex_lock(&syscall_trace_lock);
477 478
	if (!tr->sys_refcount_exit)
		ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
479
	if (!ret) {
480
		rcu_assign_pointer(tr->exit_syscall_files[num], file);
481
		tr->sys_refcount_exit++;
482
	}
483 484 485
	mutex_unlock(&syscall_trace_lock);
	return ret;
}
486

487 488
static void unreg_event_syscall_exit(struct ftrace_event_file *file,
				     struct ftrace_event_call *call)
489
{
490
	struct trace_array *tr = file->tr;
491
	int num;
492

493
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
494
	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
495 496
		return;
	mutex_lock(&syscall_trace_lock);
497
	tr->sys_refcount_exit--;
498
	rcu_assign_pointer(tr->exit_syscall_files[num], NULL);
499 500
	if (!tr->sys_refcount_exit)
		unregister_trace_sys_exit(ftrace_syscall_exit, tr);
501
	mutex_unlock(&syscall_trace_lock);
502
}
503

504
static int __init init_syscall_trace(struct ftrace_event_call *call)
505 506
{
	int id;
507 508 509 510 511 512 513 514
	int num;

	num = ((struct syscall_metadata *)call->data)->syscall_nr;
	if (num < 0 || num >= NR_syscalls) {
		pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
				((struct syscall_metadata *)call->data)->name);
		return -ENOSYS;
	}
515

516 517 518
	if (set_syscall_print_fmt(call) < 0)
		return -ENOMEM;

519 520 521
	id = trace_event_raw_init(call);

	if (id < 0) {
522
		free_syscall_print_fmt(call);
523
		return id;
524
	}
525 526

	return id;
527 528
}

529 530 531 532 533 534 535 536
struct trace_event_functions enter_syscall_print_funcs = {
	.trace		= print_syscall_enter,
};

struct trace_event_functions exit_syscall_print_funcs = {
	.trace		= print_syscall_exit,
};

537
struct ftrace_event_class __refdata event_class_syscall_enter = {
538 539 540 541 542 543 544
	.system		= "syscalls",
	.reg		= syscall_enter_register,
	.define_fields	= syscall_enter_define_fields,
	.get_fields	= syscall_get_enter_fields,
	.raw_init	= init_syscall_trace,
};

545
struct ftrace_event_class __refdata event_class_syscall_exit = {
546 547 548 549 550 551 552
	.system		= "syscalls",
	.reg		= syscall_exit_register,
	.define_fields	= syscall_exit_define_fields,
	.fields		= LIST_HEAD_INIT(event_class_syscall_exit.fields),
	.raw_init	= init_syscall_trace,
};

553
unsigned long __init __weak arch_syscall_addr(int nr)
554 555 556 557
{
	return (unsigned long)sys_call_table[nr];
}

558
static int __init init_ftrace_syscalls(void)
559 560 561 562 563
{
	struct syscall_metadata *meta;
	unsigned long addr;
	int i;

564 565
	syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata),
				    GFP_KERNEL);
566 567 568 569 570 571 572 573
	if (!syscalls_metadata) {
		WARN_ON(1);
		return -ENOMEM;
	}

	for (i = 0; i < NR_syscalls; i++) {
		addr = arch_syscall_addr(i);
		meta = find_syscall_meta(addr);
574 575 576 577
		if (!meta)
			continue;

		meta->syscall_nr = i;
578 579 580 581 582
		syscalls_metadata[i] = meta;
	}

	return 0;
}
583
early_initcall(init_ftrace_syscalls);
584

585
#ifdef CONFIG_PERF_EVENTS
586

587 588 589 590
static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
static int sys_perf_refcount_enter;
static int sys_perf_refcount_exit;
591

592
static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
593 594
{
	struct syscall_metadata *sys_data;
595
	struct syscall_trace_enter *rec;
596
	struct hlist_head *head;
597
	int syscall_nr;
598
	int rctx;
599
	int size;
600

601
	syscall_nr = trace_get_syscall_nr(current, regs);
602 603
	if (syscall_nr < 0)
		return;
604
	if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
605 606 607 608 609 610
		return;

	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
		return;

611 612 613 614
	head = this_cpu_ptr(sys_data->enter_event->perf_events);
	if (hlist_empty(head))
		return;

615 616 617 618 619
	/* get the size after alignment with the u32 buffer size field */
	size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
	size = ALIGN(size + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);

620
	rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
621
				sys_data->enter_event->event.type, regs, &rctx);
622 623
	if (!rec)
		return;
624 625 626 627

	rec->nr = syscall_nr;
	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
			       (unsigned long *)&rec->args);
628
	perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
629 630
}

631
static int perf_sysenter_enable(struct ftrace_event_call *call)
632 633 634 635
{
	int ret = 0;
	int num;

636
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
637 638

	mutex_lock(&syscall_trace_lock);
639
	if (!sys_perf_refcount_enter)
640
		ret = register_trace_sys_enter(perf_syscall_enter, NULL);
641 642 643 644
	if (ret) {
		pr_info("event trace: Could not activate"
				"syscall entry trace point");
	} else {
645 646
		set_bit(num, enabled_perf_enter_syscalls);
		sys_perf_refcount_enter++;
647 648 649 650 651
	}
	mutex_unlock(&syscall_trace_lock);
	return ret;
}

652
static void perf_sysenter_disable(struct ftrace_event_call *call)
653 654 655
{
	int num;

656
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
657 658

	mutex_lock(&syscall_trace_lock);
659 660 661
	sys_perf_refcount_enter--;
	clear_bit(num, enabled_perf_enter_syscalls);
	if (!sys_perf_refcount_enter)
662
		unregister_trace_sys_enter(perf_syscall_enter, NULL);
663 664 665
	mutex_unlock(&syscall_trace_lock);
}

666
static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
667 668
{
	struct syscall_metadata *sys_data;
669
	struct syscall_trace_exit *rec;
670
	struct hlist_head *head;
671
	int syscall_nr;
672
	int rctx;
673
	int size;
674

675
	syscall_nr = trace_get_syscall_nr(current, regs);
676 677
	if (syscall_nr < 0)
		return;
678
	if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
679 680 681 682 683 684
		return;

	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
		return;

685 686 687 688
	head = this_cpu_ptr(sys_data->exit_event->perf_events);
	if (hlist_empty(head))
		return;

689 690 691
	/* We can probably do that at build time */
	size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);
692

693
	rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
694
				sys_data->exit_event->event.type, regs, &rctx);
695 696
	if (!rec)
		return;
697 698 699

	rec->nr = syscall_nr;
	rec->ret = syscall_get_return_value(current, regs);
700
	perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
701 702
}

703
static int perf_sysexit_enable(struct ftrace_event_call *call)
704 705 706 707
{
	int ret = 0;
	int num;

708
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
709 710

	mutex_lock(&syscall_trace_lock);
711
	if (!sys_perf_refcount_exit)
712
		ret = register_trace_sys_exit(perf_syscall_exit, NULL);
713 714
	if (ret) {
		pr_info("event trace: Could not activate"
715
				"syscall exit trace point");
716
	} else {
717 718
		set_bit(num, enabled_perf_exit_syscalls);
		sys_perf_refcount_exit++;
719 720 721 722 723
	}
	mutex_unlock(&syscall_trace_lock);
	return ret;
}

724
static void perf_sysexit_disable(struct ftrace_event_call *call)
725 726 727
{
	int num;

728
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
729 730

	mutex_lock(&syscall_trace_lock);
731 732 733
	sys_perf_refcount_exit--;
	clear_bit(num, enabled_perf_exit_syscalls);
	if (!sys_perf_refcount_exit)
734
		unregister_trace_sys_exit(perf_syscall_exit, NULL);
735 736 737
	mutex_unlock(&syscall_trace_lock);
}

738
#endif /* CONFIG_PERF_EVENTS */
739

740
static int syscall_enter_register(struct ftrace_event_call *event,
741
				 enum trace_reg type, void *data)
742
{
743 744
	struct ftrace_event_file *file = data;

745 746
	switch (type) {
	case TRACE_REG_REGISTER:
747
		return reg_event_syscall_enter(file, event);
748
	case TRACE_REG_UNREGISTER:
749
		unreg_event_syscall_enter(file, event);
750 751 752 753 754 755 756 757
		return 0;

#ifdef CONFIG_PERF_EVENTS
	case TRACE_REG_PERF_REGISTER:
		return perf_sysenter_enable(event);
	case TRACE_REG_PERF_UNREGISTER:
		perf_sysenter_disable(event);
		return 0;
758 759
	case TRACE_REG_PERF_OPEN:
	case TRACE_REG_PERF_CLOSE:
760 761
	case TRACE_REG_PERF_ADD:
	case TRACE_REG_PERF_DEL:
762
		return 0;
763 764 765 766 767 768
#endif
	}
	return 0;
}

static int syscall_exit_register(struct ftrace_event_call *event,
769
				 enum trace_reg type, void *data)
770
{
771 772
	struct ftrace_event_file *file = data;

773 774
	switch (type) {
	case TRACE_REG_REGISTER:
775
		return reg_event_syscall_exit(file, event);
776
	case TRACE_REG_UNREGISTER:
777
		unreg_event_syscall_exit(file, event);
778 779 780 781 782 783 784 785
		return 0;

#ifdef CONFIG_PERF_EVENTS
	case TRACE_REG_PERF_REGISTER:
		return perf_sysexit_enable(event);
	case TRACE_REG_PERF_UNREGISTER:
		perf_sysexit_disable(event);
		return 0;
786 787
	case TRACE_REG_PERF_OPEN:
	case TRACE_REG_PERF_CLOSE:
788 789
	case TRACE_REG_PERF_ADD:
	case TRACE_REG_PERF_DEL:
790
		return 0;
791 792 793 794
#endif
	}
	return 0;
}