trace_syscalls.c 19.8 KB
Newer Older
1
#include <trace/syscall.h>
2
#include <trace/events/syscalls.h>
3
#include <linux/syscalls.h>
4
#include <linux/slab.h>
5
#include <linux/kernel.h>
6
#include <linux/module.h>	/* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
7
#include <linux/ftrace.h>
8
#include <linux/perf_event.h>
9 10 11 12 13
#include <asm/syscall.h>

#include "trace_output.h"
#include "trace.h"

14
static DEFINE_MUTEX(syscall_trace_lock);
15

16
static int syscall_enter_register(struct trace_event_call *event,
17
				 enum trace_reg type, void *data);
18
static int syscall_exit_register(struct trace_event_call *event,
19
				 enum trace_reg type, void *data);
20

21
static struct list_head *
22
syscall_get_enter_fields(struct trace_event_call *call)
23 24 25 26 27 28
{
	struct syscall_metadata *entry = call->data;

	return &entry->enter_fields;
}

29 30
extern struct syscall_metadata *__start_syscalls_metadata[];
extern struct syscall_metadata *__stop_syscalls_metadata[];
31 32 33

static struct syscall_metadata **syscalls_metadata;

34 35 36 37 38 39
#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
{
	/*
	 * Only compare after the "sys" prefix. Archs that use
	 * syscall wrappers may have syscalls symbols aliases prefixed
40
	 * with ".SyS" or ".sys" instead of "sys", leading to an unwanted
41 42 43 44 45 46
	 * mismatch.
	 */
	return !strcmp(sym + 3, name + 3);
}
#endif

47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
#ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
/*
 * Some architectures that allow for 32bit applications
 * to run on a 64bit kernel, do not map the syscalls for
 * the 32bit tasks the same as they do for 64bit tasks.
 *
 *     *cough*x86*cough*
 *
 * In such a case, instead of reporting the wrong syscalls,
 * simply ignore them.
 *
 * For an arch to ignore the compat syscalls it needs to
 * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as
 * define the function arch_trace_is_compat_syscall() to let
 * the tracing system know that it should ignore it.
 */
static int
trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
{
	if (unlikely(arch_trace_is_compat_syscall(regs)))
		return -1;

	return syscall_get_nr(task, regs);
}
#else
static inline int
trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
{
	return syscall_get_nr(task, regs);
}
#endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */

79 80
static __init struct syscall_metadata *
find_syscall_meta(unsigned long syscall)
81
{
82 83
	struct syscall_metadata **start;
	struct syscall_metadata **stop;
84 85 86
	char str[KSYM_SYMBOL_LEN];


87 88
	start = __start_syscalls_metadata;
	stop = __stop_syscalls_metadata;
89 90
	kallsyms_lookup(syscall, NULL, NULL, NULL, str);

91 92 93
	if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
		return NULL;

94
	for ( ; start < stop; start++) {
95
		if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
96
			return *start;
97 98 99 100 101 102 103 104 105 106 107 108
	}
	return NULL;
}

static struct syscall_metadata *syscall_nr_to_meta(int nr)
{
	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
		return NULL;

	return syscalls_metadata[nr];
}

Tom Zanussi's avatar
Tom Zanussi committed
109 110 111 112 113 114 115 116 117 118 119
const char *get_syscall_name(int syscall)
{
	struct syscall_metadata *entry;

	entry = syscall_nr_to_meta(syscall);
	if (!entry)
		return NULL;

	return entry->name;
}

120
static enum print_line_t
121 122
print_syscall_enter(struct trace_iterator *iter, int flags,
		    struct trace_event *event)
123
{
124
	struct trace_array *tr = iter->tr;
125 126 127 128
	struct trace_seq *s = &iter->seq;
	struct trace_entry *ent = iter->ent;
	struct syscall_trace_enter *trace;
	struct syscall_metadata *entry;
129
	int i, syscall;
130

131
	trace = (typeof(trace))ent;
132 133
	syscall = trace->nr;
	entry = syscall_nr_to_meta(syscall);
134

135 136 137
	if (!entry)
		goto end;

138
	if (entry->enter_event->event.type != ent->type) {
139 140 141 142
		WARN_ON_ONCE(1);
		goto end;
	}

143
	trace_seq_printf(s, "%s(", entry->name);
144 145

	for (i = 0; i < entry->nb_args; i++) {
146 147 148 149

		if (trace_seq_has_overflowed(s))
			goto end;

150
		/* parameter types */
151
		if (tr->trace_flags & TRACE_ITER_VERBOSE)
152 153
			trace_seq_printf(s, "%s ", entry->types[i]);

154
		/* parameter values */
155 156 157
		trace_seq_printf(s, "%s: %lx%s", entry->args[i],
				 trace->args[i],
				 i == entry->nb_args - 1 ? "" : ", ");
158 159
	}

160
	trace_seq_putc(s, ')');
161
end:
162
	trace_seq_putc(s, '\n');
163

164
	return trace_handle_return(s);
165 166
}

167
static enum print_line_t
168 169
print_syscall_exit(struct trace_iterator *iter, int flags,
		   struct trace_event *event)
170 171 172 173 174 175 176
{
	struct trace_seq *s = &iter->seq;
	struct trace_entry *ent = iter->ent;
	struct syscall_trace_exit *trace;
	int syscall;
	struct syscall_metadata *entry;

177
	trace = (typeof(trace))ent;
178 179
	syscall = trace->nr;
	entry = syscall_nr_to_meta(syscall);
180

181
	if (!entry) {
182
		trace_seq_putc(s, '\n');
183
		goto out;
184 185
	}

186
	if (entry->exit_event->event.type != ent->type) {
187 188 189 190
		WARN_ON_ONCE(1);
		return TRACE_TYPE_UNHANDLED;
	}

191
	trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
192 193
				trace->ret);

194 195
 out:
	return trace_handle_return(s);
196 197
}

198 199
extern char *__bad_type_size(void);

200 201
#define SYSCALL_FIELD(type, field, name)				\
	sizeof(type) != sizeof(trace.field) ?				\
202
		__bad_type_size() :					\
203 204
		#type, #name, offsetof(typeof(trace), field),		\
		sizeof(trace.field), is_signed_type(type)
205

206 207
static int __init
__set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
{
	int i;
	int pos = 0;

	/* When len=0, we just calculate the needed length */
#define LEN_OR_ZERO (len ? len - pos : 0)

	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
	for (i = 0; i < entry->nb_args; i++) {
		pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
				entry->args[i], sizeof(unsigned long),
				i == entry->nb_args - 1 ? "" : ", ");
	}
	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");

	for (i = 0; i < entry->nb_args; i++) {
		pos += snprintf(buf + pos, LEN_OR_ZERO,
				", ((unsigned long)(REC->%s))", entry->args[i]);
	}

#undef LEN_OR_ZERO

	/* return the length of print_fmt */
	return pos;
}

234
static int __init set_syscall_print_fmt(struct trace_event_call *call)
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
{
	char *print_fmt;
	int len;
	struct syscall_metadata *entry = call->data;

	if (entry->enter_event != call) {
		call->print_fmt = "\"0x%lx\", REC->ret";
		return 0;
	}

	/* First: called with 0 length to calculate the needed length */
	len = __set_enter_print_fmt(entry, NULL, 0);

	print_fmt = kmalloc(len + 1, GFP_KERNEL);
	if (!print_fmt)
		return -ENOMEM;

	/* Second: actually write the @print_fmt */
	__set_enter_print_fmt(entry, print_fmt, len + 1);
	call->print_fmt = print_fmt;

	return 0;
}

259
static void __init free_syscall_print_fmt(struct trace_event_call *call)
260 261 262 263 264 265 266
{
	struct syscall_metadata *entry = call->data;

	if (entry->enter_event == call)
		kfree(call->print_fmt);
}

267
static int __init syscall_enter_define_fields(struct trace_event_call *call)
268 269
{
	struct syscall_trace_enter trace;
270
	struct syscall_metadata *meta = call->data;
271 272 273 274
	int ret;
	int i;
	int offset = offsetof(typeof(trace), args);

275 276
	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
				 FILTER_OTHER);
277 278 279
	if (ret)
		return ret;

280
	for (i = 0; i < meta->nb_args; i++) {
281 282
		ret = trace_define_field(call, meta->types[i],
					 meta->args[i], offset,
283 284
					 sizeof(unsigned long), 0,
					 FILTER_OTHER);
285 286 287 288 289 290
		offset += sizeof(unsigned long);
	}

	return ret;
}

291
static int __init syscall_exit_define_fields(struct trace_event_call *call)
292 293 294 295
{
	struct syscall_trace_exit trace;
	int ret;

296 297
	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
				 FILTER_OTHER);
298 299 300
	if (ret)
		return ret;

301
	ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
302
				 FILTER_OTHER);
303 304 305 306

	return ret;
}

307
static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
308
{
309
	struct trace_array *tr = data;
310
	struct trace_event_file *trace_file;
311 312 313
	struct syscall_trace_enter *entry;
	struct syscall_metadata *sys_data;
	struct ring_buffer_event *event;
314
	struct ring_buffer *buffer;
315 316
	unsigned long irq_flags;
	int pc;
317
	int syscall_nr;
318
	int size;
319

320
	syscall_nr = trace_get_syscall_nr(current, regs);
321
	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
322
		return;
323 324

	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
325 326
	trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
	if (!trace_file)
327 328
		return;

329
	if (trace_trigger_soft_disabled(trace_file))
330
		return;
331

332 333 334 335 336 337
	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
		return;

	size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;

338 339 340
	local_save_flags(irq_flags);
	pc = preempt_count();

341
	buffer = tr->trace_buffer.buffer;
342
	event = trace_buffer_lock_reserve(buffer,
343
			sys_data->enter_event->event.type, size, irq_flags, pc);
344 345 346 347 348 349 350
	if (!event)
		return;

	entry = ring_buffer_event_data(event);
	entry->nr = syscall_nr;
	syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);

351
	event_trigger_unlock_commit(trace_file, buffer, event, entry,
352
				    irq_flags, pc);
353 354
}

355
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
356
{
357
	struct trace_array *tr = data;
358
	struct trace_event_file *trace_file;
359 360 361
	struct syscall_trace_exit *entry;
	struct syscall_metadata *sys_data;
	struct ring_buffer_event *event;
362
	struct ring_buffer *buffer;
363 364
	unsigned long irq_flags;
	int pc;
365 366
	int syscall_nr;

367
	syscall_nr = trace_get_syscall_nr(current, regs);
368
	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
369
		return;
370 371

	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
372 373
	trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
	if (!trace_file)
374 375
		return;

376
	if (trace_trigger_soft_disabled(trace_file))
377
		return;
378

379 380 381 382
	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
		return;

383 384 385
	local_save_flags(irq_flags);
	pc = preempt_count();

386
	buffer = tr->trace_buffer.buffer;
387
	event = trace_buffer_lock_reserve(buffer,
388 389
			sys_data->exit_event->event.type, sizeof(*entry),
			irq_flags, pc);
390 391 392 393 394 395 396
	if (!event)
		return;

	entry = ring_buffer_event_data(event);
	entry->nr = syscall_nr;
	entry->ret = syscall_get_return_value(current, regs);

397
	event_trigger_unlock_commit(trace_file, buffer, event, entry,
398
				    irq_flags, pc);
399 400
}

401
static int reg_event_syscall_enter(struct trace_event_file *file,
402
				   struct trace_event_call *call)
403
{
404
	struct trace_array *tr = file->tr;
405 406 407
	int ret = 0;
	int num;

408
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
409
	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
410 411
		return -ENOSYS;
	mutex_lock(&syscall_trace_lock);
412 413
	if (!tr->sys_refcount_enter)
		ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
414
	if (!ret) {
415
		rcu_assign_pointer(tr->enter_syscall_files[num], file);
416
		tr->sys_refcount_enter++;
417 418 419
	}
	mutex_unlock(&syscall_trace_lock);
	return ret;
420 421
}

422
static void unreg_event_syscall_enter(struct trace_event_file *file,
423
				      struct trace_event_call *call)
424
{
425
	struct trace_array *tr = file->tr;
426
	int num;
427

428
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
429
	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
430 431
		return;
	mutex_lock(&syscall_trace_lock);
432
	tr->sys_refcount_enter--;
433
	RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL);
434 435
	if (!tr->sys_refcount_enter)
		unregister_trace_sys_enter(ftrace_syscall_enter, tr);
436 437
	mutex_unlock(&syscall_trace_lock);
}
438

439
static int reg_event_syscall_exit(struct trace_event_file *file,
440
				  struct trace_event_call *call)
441
{
442
	struct trace_array *tr = file->tr;
443 444 445
	int ret = 0;
	int num;

446
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
447
	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
448 449
		return -ENOSYS;
	mutex_lock(&syscall_trace_lock);
450 451
	if (!tr->sys_refcount_exit)
		ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
452
	if (!ret) {
453
		rcu_assign_pointer(tr->exit_syscall_files[num], file);
454
		tr->sys_refcount_exit++;
455
	}
456 457 458
	mutex_unlock(&syscall_trace_lock);
	return ret;
}
459

460
static void unreg_event_syscall_exit(struct trace_event_file *file,
461
				     struct trace_event_call *call)
462
{
463
	struct trace_array *tr = file->tr;
464
	int num;
465

466
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
467
	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
468 469
		return;
	mutex_lock(&syscall_trace_lock);
470
	tr->sys_refcount_exit--;
471
	RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL);
472 473
	if (!tr->sys_refcount_exit)
		unregister_trace_sys_exit(ftrace_syscall_exit, tr);
474
	mutex_unlock(&syscall_trace_lock);
475
}
476

477
static int __init init_syscall_trace(struct trace_event_call *call)
478 479
{
	int id;
480 481 482 483 484 485 486 487
	int num;

	num = ((struct syscall_metadata *)call->data)->syscall_nr;
	if (num < 0 || num >= NR_syscalls) {
		pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
				((struct syscall_metadata *)call->data)->name);
		return -ENOSYS;
	}
488

489 490 491
	if (set_syscall_print_fmt(call) < 0)
		return -ENOMEM;

492 493 494
	id = trace_event_raw_init(call);

	if (id < 0) {
495
		free_syscall_print_fmt(call);
496
		return id;
497
	}
498 499

	return id;
500 501
}

502 503 504 505 506 507 508 509
struct trace_event_functions enter_syscall_print_funcs = {
	.trace		= print_syscall_enter,
};

struct trace_event_functions exit_syscall_print_funcs = {
	.trace		= print_syscall_exit,
};

510
struct trace_event_class __refdata event_class_syscall_enter = {
511 512 513 514 515 516 517
	.system		= "syscalls",
	.reg		= syscall_enter_register,
	.define_fields	= syscall_enter_define_fields,
	.get_fields	= syscall_get_enter_fields,
	.raw_init	= init_syscall_trace,
};

518
struct trace_event_class __refdata event_class_syscall_exit = {
519 520 521 522 523 524 525
	.system		= "syscalls",
	.reg		= syscall_exit_register,
	.define_fields	= syscall_exit_define_fields,
	.fields		= LIST_HEAD_INIT(event_class_syscall_exit.fields),
	.raw_init	= init_syscall_trace,
};

526
unsigned long __init __weak arch_syscall_addr(int nr)
527 528 529 530
{
	return (unsigned long)sys_call_table[nr];
}

531
void __init init_ftrace_syscalls(void)
532 533 534 535 536
{
	struct syscall_metadata *meta;
	unsigned long addr;
	int i;

537 538
	syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata),
				    GFP_KERNEL);
539 540
	if (!syscalls_metadata) {
		WARN_ON(1);
541
		return;
542 543 544 545 546
	}

	for (i = 0; i < NR_syscalls; i++) {
		addr = arch_syscall_addr(i);
		meta = find_syscall_meta(addr);
547 548 549 550
		if (!meta)
			continue;

		meta->syscall_nr = i;
551 552 553 554
		syscalls_metadata[i] = meta;
	}
}

555
#ifdef CONFIG_PERF_EVENTS
556

557 558 559 560
static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
static int sys_perf_refcount_enter;
static int sys_perf_refcount_exit;
561

562 563 564 565 566 567
static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs,
			      struct syscall_metadata *sys_data,
			      struct syscall_trace_enter *rec) {
	struct syscall_tp_t {
		unsigned long long regs;
		unsigned long syscall_nr;
568
		unsigned long args[SYSCALL_DEFINE_MAXARGS];
569 570 571 572 573 574 575 576 577 578
	} param;
	int i;

	*(struct pt_regs **)&param = regs;
	param.syscall_nr = rec->nr;
	for (i = 0; i < sys_data->nb_args; i++)
		param.args[i] = rec->args[i];
	return trace_call_bpf(prog, &param);
}

579
static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
580 581
{
	struct syscall_metadata *sys_data;
582
	struct syscall_trace_enter *rec;
583
	struct hlist_head *head;
584
	struct bpf_prog *prog;
585
	int syscall_nr;
586
	int rctx;
587
	int size;
588

589
	syscall_nr = trace_get_syscall_nr(current, regs);
590
	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
591
		return;
592
	if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
593 594 595 596 597 598
		return;

	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
		return;

599
	prog = READ_ONCE(sys_data->enter_event->prog);
600
	head = this_cpu_ptr(sys_data->enter_event->perf_events);
601
	if (!prog && hlist_empty(head))
602 603
		return;

604 605 606 607 608
	/* get the size after alignment with the u32 buffer size field */
	size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
	size = ALIGN(size + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);

609
	rec = perf_trace_buf_alloc(size, NULL, &rctx);
610 611
	if (!rec)
		return;
612 613 614 615

	rec->nr = syscall_nr;
	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
			       (unsigned long *)&rec->args);
616 617 618 619 620 621 622

	if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) ||
	    hlist_empty(head)) {
		perf_swevent_put_recursion_context(rctx);
		return;
	}

623 624
	perf_trace_buf_submit(rec, size, rctx,
			      sys_data->enter_event->event.type, 1, regs,
625
			      head, NULL, NULL);
626 627
}

628
static int perf_sysenter_enable(struct trace_event_call *call)
629 630 631 632
{
	int ret = 0;
	int num;

633
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
634 635

	mutex_lock(&syscall_trace_lock);
636
	if (!sys_perf_refcount_enter)
637
		ret = register_trace_sys_enter(perf_syscall_enter, NULL);
638
	if (ret) {
639
		pr_info("event trace: Could not activate syscall entry trace point");
640
	} else {
641 642
		set_bit(num, enabled_perf_enter_syscalls);
		sys_perf_refcount_enter++;
643 644 645 646 647
	}
	mutex_unlock(&syscall_trace_lock);
	return ret;
}

648
static void perf_sysenter_disable(struct trace_event_call *call)
649 650 651
{
	int num;

652
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
653 654

	mutex_lock(&syscall_trace_lock);
655 656 657
	sys_perf_refcount_enter--;
	clear_bit(num, enabled_perf_enter_syscalls);
	if (!sys_perf_refcount_enter)
658
		unregister_trace_sys_enter(perf_syscall_enter, NULL);
659 660 661
	mutex_unlock(&syscall_trace_lock);
}

662 663 664 665 666 667 668 669 670 671 672 673 674 675
static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs,
			      struct syscall_trace_exit *rec) {
	struct syscall_tp_t {
		unsigned long long regs;
		unsigned long syscall_nr;
		unsigned long ret;
	} param;

	*(struct pt_regs **)&param = regs;
	param.syscall_nr = rec->nr;
	param.ret = rec->ret;
	return trace_call_bpf(prog, &param);
}

676
static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
677 678
{
	struct syscall_metadata *sys_data;
679
	struct syscall_trace_exit *rec;
680
	struct hlist_head *head;
681
	struct bpf_prog *prog;
682
	int syscall_nr;
683
	int rctx;
684
	int size;
685

686
	syscall_nr = trace_get_syscall_nr(current, regs);
687
	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
688
		return;
689
	if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
690 691 692 693 694 695
		return;

	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
		return;

696
	prog = READ_ONCE(sys_data->exit_event->prog);
697
	head = this_cpu_ptr(sys_data->exit_event->perf_events);
698
	if (!prog && hlist_empty(head))
699 700
		return;

701 702 703
	/* We can probably do that at build time */
	size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);
704

705
	rec = perf_trace_buf_alloc(size, NULL, &rctx);
706 707
	if (!rec)
		return;
708 709 710

	rec->nr = syscall_nr;
	rec->ret = syscall_get_return_value(current, regs);
711 712 713 714 715 716 717

	if ((prog && !perf_call_bpf_exit(prog, regs, rec)) ||
	    hlist_empty(head)) {
		perf_swevent_put_recursion_context(rctx);
		return;
	}

718
	perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
719
			      1, regs, head, NULL, NULL);
720 721
}

722
static int perf_sysexit_enable(struct trace_event_call *call)
723 724 725 726
{
	int ret = 0;
	int num;

727
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
728 729

	mutex_lock(&syscall_trace_lock);
730
	if (!sys_perf_refcount_exit)
731
		ret = register_trace_sys_exit(perf_syscall_exit, NULL);
732
	if (ret) {
733
		pr_info("event trace: Could not activate syscall exit trace point");
734
	} else {
735 736
		set_bit(num, enabled_perf_exit_syscalls);
		sys_perf_refcount_exit++;
737 738 739 740 741
	}
	mutex_unlock(&syscall_trace_lock);
	return ret;
}

742
static void perf_sysexit_disable(struct trace_event_call *call)
743 744 745
{
	int num;

746
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
747 748

	mutex_lock(&syscall_trace_lock);
749 750 751
	sys_perf_refcount_exit--;
	clear_bit(num, enabled_perf_exit_syscalls);
	if (!sys_perf_refcount_exit)
752
		unregister_trace_sys_exit(perf_syscall_exit, NULL);
753 754 755
	mutex_unlock(&syscall_trace_lock);
}

756
#endif /* CONFIG_PERF_EVENTS */
757

758
static int syscall_enter_register(struct trace_event_call *event,
759
				 enum trace_reg type, void *data)
760
{
761
	struct trace_event_file *file = data;
762

763 764
	switch (type) {
	case TRACE_REG_REGISTER:
765
		return reg_event_syscall_enter(file, event);
766
	case TRACE_REG_UNREGISTER:
767
		unreg_event_syscall_enter(file, event);
768 769 770 771 772 773 774 775
		return 0;

#ifdef CONFIG_PERF_EVENTS
	case TRACE_REG_PERF_REGISTER:
		return perf_sysenter_enable(event);
	case TRACE_REG_PERF_UNREGISTER:
		perf_sysenter_disable(event);
		return 0;
776 777
	case TRACE_REG_PERF_OPEN:
	case TRACE_REG_PERF_CLOSE:
778 779
	case TRACE_REG_PERF_ADD:
	case TRACE_REG_PERF_DEL:
780
		return 0;
781 782 783 784 785
#endif
	}
	return 0;
}

786
static int syscall_exit_register(struct trace_event_call *event,
787
				 enum trace_reg type, void *data)
788
{
789
	struct trace_event_file *file = data;
790

791 792
	switch (type) {
	case TRACE_REG_REGISTER:
793
		return reg_event_syscall_exit(file, event);
794
	case TRACE_REG_UNREGISTER:
795
		unreg_event_syscall_exit(file, event);
796 797 798 799 800 801 802 803
		return 0;

#ifdef CONFIG_PERF_EVENTS
	case TRACE_REG_PERF_REGISTER:
		return perf_sysexit_enable(event);
	case TRACE_REG_PERF_UNREGISTER:
		perf_sysexit_disable(event);
		return 0;
804 805
	case TRACE_REG_PERF_OPEN:
	case TRACE_REG_PERF_CLOSE:
806 807
	case TRACE_REG_PERF_ADD:
	case TRACE_REG_PERF_DEL:
808
		return 0;
809 810 811 812
#endif
	}
	return 0;
}