trace_syscalls.c 19.9 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2
#include <trace/syscall.h>
3
#include <trace/events/syscalls.h>
4
#include <linux/syscalls.h>
5
#include <linux/slab.h>
6
#include <linux/kernel.h>
7
#include <linux/module.h>	/* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */
8
#include <linux/ftrace.h>
9
#include <linux/perf_event.h>
10 11 12 13 14
#include <asm/syscall.h>

#include "trace_output.h"
#include "trace.h"

15
static DEFINE_MUTEX(syscall_trace_lock);
16

17
static int syscall_enter_register(struct trace_event_call *event,
18
				 enum trace_reg type, void *data);
19
static int syscall_exit_register(struct trace_event_call *event,
20
				 enum trace_reg type, void *data);
21

22
static struct list_head *
23
syscall_get_enter_fields(struct trace_event_call *call)
24 25 26 27 28 29
{
	struct syscall_metadata *entry = call->data;

	return &entry->enter_fields;
}

30 31
extern struct syscall_metadata *__start_syscalls_metadata[];
extern struct syscall_metadata *__stop_syscalls_metadata[];
32 33 34

static struct syscall_metadata **syscalls_metadata;

35 36 37 38 39 40
#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
{
	/*
	 * Only compare after the "sys" prefix. Archs that use
	 * syscall wrappers may have syscalls symbols aliases prefixed
41
	 * with ".SyS" or ".sys" instead of "sys", leading to an unwanted
42 43 44 45 46 47
	 * mismatch.
	 */
	return !strcmp(sym + 3, name + 3);
}
#endif

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
#ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS
/*
 * Some architectures that allow for 32bit applications
 * to run on a 64bit kernel, do not map the syscalls for
 * the 32bit tasks the same as they do for 64bit tasks.
 *
 *     *cough*x86*cough*
 *
 * In such a case, instead of reporting the wrong syscalls,
 * simply ignore them.
 *
 * For an arch to ignore the compat syscalls it needs to
 * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as
 * define the function arch_trace_is_compat_syscall() to let
 * the tracing system know that it should ignore it.
 */
static int
trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
{
	if (unlikely(arch_trace_is_compat_syscall(regs)))
		return -1;

	return syscall_get_nr(task, regs);
}
#else
static inline int
trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs)
{
	return syscall_get_nr(task, regs);
}
#endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */

80 81
static __init struct syscall_metadata *
find_syscall_meta(unsigned long syscall)
82
{
83 84
	struct syscall_metadata **start;
	struct syscall_metadata **stop;
85 86 87
	char str[KSYM_SYMBOL_LEN];


88 89
	start = __start_syscalls_metadata;
	stop = __stop_syscalls_metadata;
90 91
	kallsyms_lookup(syscall, NULL, NULL, NULL, str);

92 93 94
	if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
		return NULL;

95
	for ( ; start < stop; start++) {
96
		if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
97
			return *start;
98 99 100 101 102 103 104 105 106 107 108 109
	}
	return NULL;
}

static struct syscall_metadata *syscall_nr_to_meta(int nr)
{
	if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
		return NULL;

	return syscalls_metadata[nr];
}

110 111 112 113 114 115 116 117 118 119 120
const char *get_syscall_name(int syscall)
{
	struct syscall_metadata *entry;

	entry = syscall_nr_to_meta(syscall);
	if (!entry)
		return NULL;

	return entry->name;
}

121
static enum print_line_t
122 123
print_syscall_enter(struct trace_iterator *iter, int flags,
		    struct trace_event *event)
124
{
125
	struct trace_array *tr = iter->tr;
126 127 128 129
	struct trace_seq *s = &iter->seq;
	struct trace_entry *ent = iter->ent;
	struct syscall_trace_enter *trace;
	struct syscall_metadata *entry;
130
	int i, syscall;
131

132
	trace = (typeof(trace))ent;
133 134
	syscall = trace->nr;
	entry = syscall_nr_to_meta(syscall);
135

136 137 138
	if (!entry)
		goto end;

139
	if (entry->enter_event->event.type != ent->type) {
140 141 142 143
		WARN_ON_ONCE(1);
		goto end;
	}

144
	trace_seq_printf(s, "%s(", entry->name);
145 146

	for (i = 0; i < entry->nb_args; i++) {
147 148 149 150

		if (trace_seq_has_overflowed(s))
			goto end;

151
		/* parameter types */
152
		if (tr->trace_flags & TRACE_ITER_VERBOSE)
153 154
			trace_seq_printf(s, "%s ", entry->types[i]);

155
		/* parameter values */
156 157 158
		trace_seq_printf(s, "%s: %lx%s", entry->args[i],
				 trace->args[i],
				 i == entry->nb_args - 1 ? "" : ", ");
159 160
	}

161
	trace_seq_putc(s, ')');
162
end:
163
	trace_seq_putc(s, '\n');
164

165
	return trace_handle_return(s);
166 167
}

168
static enum print_line_t
169 170
print_syscall_exit(struct trace_iterator *iter, int flags,
		   struct trace_event *event)
171 172 173 174 175 176 177
{
	struct trace_seq *s = &iter->seq;
	struct trace_entry *ent = iter->ent;
	struct syscall_trace_exit *trace;
	int syscall;
	struct syscall_metadata *entry;

178
	trace = (typeof(trace))ent;
179 180
	syscall = trace->nr;
	entry = syscall_nr_to_meta(syscall);
181

182
	if (!entry) {
183
		trace_seq_putc(s, '\n');
184
		goto out;
185 186
	}

187
	if (entry->exit_event->event.type != ent->type) {
188 189 190 191
		WARN_ON_ONCE(1);
		return TRACE_TYPE_UNHANDLED;
	}

192
	trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
193 194
				trace->ret);

195 196
 out:
	return trace_handle_return(s);
197 198
}

199 200
extern char *__bad_type_size(void);

201 202
#define SYSCALL_FIELD(type, field, name)				\
	sizeof(type) != sizeof(trace.field) ?				\
203
		__bad_type_size() :					\
204 205
		#type, #name, offsetof(typeof(trace), field),		\
		sizeof(trace.field), is_signed_type(type)
206

207 208
static int __init
__set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
{
	int i;
	int pos = 0;

	/* When len=0, we just calculate the needed length */
#define LEN_OR_ZERO (len ? len - pos : 0)

	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
	for (i = 0; i < entry->nb_args; i++) {
		pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
				entry->args[i], sizeof(unsigned long),
				i == entry->nb_args - 1 ? "" : ", ");
	}
	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");

	for (i = 0; i < entry->nb_args; i++) {
		pos += snprintf(buf + pos, LEN_OR_ZERO,
				", ((unsigned long)(REC->%s))", entry->args[i]);
	}

#undef LEN_OR_ZERO

	/* return the length of print_fmt */
	return pos;
}

235
static int __init set_syscall_print_fmt(struct trace_event_call *call)
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
{
	char *print_fmt;
	int len;
	struct syscall_metadata *entry = call->data;

	if (entry->enter_event != call) {
		call->print_fmt = "\"0x%lx\", REC->ret";
		return 0;
	}

	/* First: called with 0 length to calculate the needed length */
	len = __set_enter_print_fmt(entry, NULL, 0);

	print_fmt = kmalloc(len + 1, GFP_KERNEL);
	if (!print_fmt)
		return -ENOMEM;

	/* Second: actually write the @print_fmt */
	__set_enter_print_fmt(entry, print_fmt, len + 1);
	call->print_fmt = print_fmt;

	return 0;
}

260
static void __init free_syscall_print_fmt(struct trace_event_call *call)
261 262 263 264 265 266 267
{
	struct syscall_metadata *entry = call->data;

	if (entry->enter_event == call)
		kfree(call->print_fmt);
}

268
static int __init syscall_enter_define_fields(struct trace_event_call *call)
269 270
{
	struct syscall_trace_enter trace;
271
	struct syscall_metadata *meta = call->data;
272 273 274 275
	int ret;
	int i;
	int offset = offsetof(typeof(trace), args);

276 277
	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
				 FILTER_OTHER);
278 279 280
	if (ret)
		return ret;

281
	for (i = 0; i < meta->nb_args; i++) {
282 283
		ret = trace_define_field(call, meta->types[i],
					 meta->args[i], offset,
284 285
					 sizeof(unsigned long), 0,
					 FILTER_OTHER);
286 287 288 289 290 291
		offset += sizeof(unsigned long);
	}

	return ret;
}

292
static int __init syscall_exit_define_fields(struct trace_event_call *call)
293 294 295 296
{
	struct syscall_trace_exit trace;
	int ret;

297 298
	ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
				 FILTER_OTHER);
299 300 301
	if (ret)
		return ret;

302
	ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
303
				 FILTER_OTHER);
304 305 306 307

	return ret;
}

308
static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
309
{
310
	struct trace_array *tr = data;
311
	struct trace_event_file *trace_file;
312 313 314
	struct syscall_trace_enter *entry;
	struct syscall_metadata *sys_data;
	struct ring_buffer_event *event;
315
	struct ring_buffer *buffer;
316 317
	unsigned long irq_flags;
	int pc;
318
	int syscall_nr;
319
	int size;
320

321
	syscall_nr = trace_get_syscall_nr(current, regs);
322
	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
323
		return;
324 325

	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
326 327
	trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
	if (!trace_file)
328 329
		return;

330
	if (trace_trigger_soft_disabled(trace_file))
331
		return;
332

333 334 335 336 337 338
	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
		return;

	size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;

339 340 341
	local_save_flags(irq_flags);
	pc = preempt_count();

342
	buffer = tr->trace_buffer.buffer;
343
	event = trace_buffer_lock_reserve(buffer,
344
			sys_data->enter_event->event.type, size, irq_flags, pc);
345 346 347 348 349 350 351
	if (!event)
		return;

	entry = ring_buffer_event_data(event);
	entry->nr = syscall_nr;
	syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);

352
	event_trigger_unlock_commit(trace_file, buffer, event, entry,
353
				    irq_flags, pc);
354 355
}

356
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
357
{
358
	struct trace_array *tr = data;
359
	struct trace_event_file *trace_file;
360 361 362
	struct syscall_trace_exit *entry;
	struct syscall_metadata *sys_data;
	struct ring_buffer_event *event;
363
	struct ring_buffer *buffer;
364 365
	unsigned long irq_flags;
	int pc;
366 367
	int syscall_nr;

368
	syscall_nr = trace_get_syscall_nr(current, regs);
369
	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
370
		return;
371 372

	/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
373 374
	trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
	if (!trace_file)
375 376
		return;

377
	if (trace_trigger_soft_disabled(trace_file))
378
		return;
379

380 381 382 383
	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
		return;

384 385 386
	local_save_flags(irq_flags);
	pc = preempt_count();

387
	buffer = tr->trace_buffer.buffer;
388
	event = trace_buffer_lock_reserve(buffer,
389 390
			sys_data->exit_event->event.type, sizeof(*entry),
			irq_flags, pc);
391 392 393 394 395 396 397
	if (!event)
		return;

	entry = ring_buffer_event_data(event);
	entry->nr = syscall_nr;
	entry->ret = syscall_get_return_value(current, regs);

398
	event_trigger_unlock_commit(trace_file, buffer, event, entry,
399
				    irq_flags, pc);
400 401
}

402
static int reg_event_syscall_enter(struct trace_event_file *file,
403
				   struct trace_event_call *call)
404
{
405
	struct trace_array *tr = file->tr;
406 407 408
	int ret = 0;
	int num;

409
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
410
	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
411 412
		return -ENOSYS;
	mutex_lock(&syscall_trace_lock);
413 414
	if (!tr->sys_refcount_enter)
		ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
415
	if (!ret) {
416
		rcu_assign_pointer(tr->enter_syscall_files[num], file);
417
		tr->sys_refcount_enter++;
418 419 420
	}
	mutex_unlock(&syscall_trace_lock);
	return ret;
421 422
}

423
static void unreg_event_syscall_enter(struct trace_event_file *file,
424
				      struct trace_event_call *call)
425
{
426
	struct trace_array *tr = file->tr;
427
	int num;
428

429
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
430
	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
431 432
		return;
	mutex_lock(&syscall_trace_lock);
433
	tr->sys_refcount_enter--;
434
	RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL);
435 436
	if (!tr->sys_refcount_enter)
		unregister_trace_sys_enter(ftrace_syscall_enter, tr);
437 438
	mutex_unlock(&syscall_trace_lock);
}
439

440
static int reg_event_syscall_exit(struct trace_event_file *file,
441
				  struct trace_event_call *call)
442
{
443
	struct trace_array *tr = file->tr;
444 445 446
	int ret = 0;
	int num;

447
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
448
	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
449 450
		return -ENOSYS;
	mutex_lock(&syscall_trace_lock);
451 452
	if (!tr->sys_refcount_exit)
		ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
453
	if (!ret) {
454
		rcu_assign_pointer(tr->exit_syscall_files[num], file);
455
		tr->sys_refcount_exit++;
456
	}
457 458 459
	mutex_unlock(&syscall_trace_lock);
	return ret;
}
460

461
static void unreg_event_syscall_exit(struct trace_event_file *file,
462
				     struct trace_event_call *call)
463
{
464
	struct trace_array *tr = file->tr;
465
	int num;
466

467
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
468
	if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
469 470
		return;
	mutex_lock(&syscall_trace_lock);
471
	tr->sys_refcount_exit--;
472
	RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL);
473 474
	if (!tr->sys_refcount_exit)
		unregister_trace_sys_exit(ftrace_syscall_exit, tr);
475
	mutex_unlock(&syscall_trace_lock);
476
}
477

478
static int __init init_syscall_trace(struct trace_event_call *call)
479 480
{
	int id;
481 482 483 484 485 486 487 488
	int num;

	num = ((struct syscall_metadata *)call->data)->syscall_nr;
	if (num < 0 || num >= NR_syscalls) {
		pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
				((struct syscall_metadata *)call->data)->name);
		return -ENOSYS;
	}
489

490 491 492
	if (set_syscall_print_fmt(call) < 0)
		return -ENOMEM;

493 494 495
	id = trace_event_raw_init(call);

	if (id < 0) {
496
		free_syscall_print_fmt(call);
497
		return id;
498
	}
499 500

	return id;
501 502
}

503 504 505 506 507 508 509 510
struct trace_event_functions enter_syscall_print_funcs = {
	.trace		= print_syscall_enter,
};

struct trace_event_functions exit_syscall_print_funcs = {
	.trace		= print_syscall_exit,
};

511
struct trace_event_class __refdata event_class_syscall_enter = {
512 513 514 515 516 517 518
	.system		= "syscalls",
	.reg		= syscall_enter_register,
	.define_fields	= syscall_enter_define_fields,
	.get_fields	= syscall_get_enter_fields,
	.raw_init	= init_syscall_trace,
};

519
struct trace_event_class __refdata event_class_syscall_exit = {
520 521 522 523 524 525 526
	.system		= "syscalls",
	.reg		= syscall_exit_register,
	.define_fields	= syscall_exit_define_fields,
	.fields		= LIST_HEAD_INIT(event_class_syscall_exit.fields),
	.raw_init	= init_syscall_trace,
};

527
unsigned long __init __weak arch_syscall_addr(int nr)
528 529 530 531
{
	return (unsigned long)sys_call_table[nr];
}

532
void __init init_ftrace_syscalls(void)
533 534 535 536 537
{
	struct syscall_metadata *meta;
	unsigned long addr;
	int i;

538 539
	syscalls_metadata = kcalloc(NR_syscalls, sizeof(*syscalls_metadata),
				    GFP_KERNEL);
540 541
	if (!syscalls_metadata) {
		WARN_ON(1);
542
		return;
543 544 545 546 547
	}

	for (i = 0; i < NR_syscalls; i++) {
		addr = arch_syscall_addr(i);
		meta = find_syscall_meta(addr);
548 549 550 551
		if (!meta)
			continue;

		meta->syscall_nr = i;
552 553 554 555
		syscalls_metadata[i] = meta;
	}
}

556
#ifdef CONFIG_PERF_EVENTS
557

558 559 560 561
static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
static int sys_perf_refcount_enter;
static int sys_perf_refcount_exit;
562

563 564 565 566 567 568
static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs,
			      struct syscall_metadata *sys_data,
			      struct syscall_trace_enter *rec) {
	struct syscall_tp_t {
		unsigned long long regs;
		unsigned long syscall_nr;
569
		unsigned long args[SYSCALL_DEFINE_MAXARGS];
570 571 572 573 574 575 576 577 578 579
	} param;
	int i;

	*(struct pt_regs **)&param = regs;
	param.syscall_nr = rec->nr;
	for (i = 0; i < sys_data->nb_args; i++)
		param.args[i] = rec->args[i];
	return trace_call_bpf(prog, &param);
}

580
static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
581 582
{
	struct syscall_metadata *sys_data;
583
	struct syscall_trace_enter *rec;
584
	struct hlist_head *head;
585
	struct bpf_prog *prog;
586
	int syscall_nr;
587
	int rctx;
588
	int size;
589

590
	syscall_nr = trace_get_syscall_nr(current, regs);
591
	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
592
		return;
593
	if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
594 595 596 597 598 599
		return;

	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
		return;

600
	prog = READ_ONCE(sys_data->enter_event->prog);
601
	head = this_cpu_ptr(sys_data->enter_event->perf_events);
602
	if (!prog && hlist_empty(head))
603 604
		return;

605 606 607 608 609
	/* get the size after alignment with the u32 buffer size field */
	size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
	size = ALIGN(size + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);

610
	rec = perf_trace_buf_alloc(size, NULL, &rctx);
611 612
	if (!rec)
		return;
613 614 615 616

	rec->nr = syscall_nr;
	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
			       (unsigned long *)&rec->args);
617 618 619 620 621 622 623

	if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) ||
	    hlist_empty(head)) {
		perf_swevent_put_recursion_context(rctx);
		return;
	}

624 625
	perf_trace_buf_submit(rec, size, rctx,
			      sys_data->enter_event->event.type, 1, regs,
626
			      head, NULL, NULL);
627 628
}

629
static int perf_sysenter_enable(struct trace_event_call *call)
630 631 632 633
{
	int ret = 0;
	int num;

634
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
635 636

	mutex_lock(&syscall_trace_lock);
637
	if (!sys_perf_refcount_enter)
638
		ret = register_trace_sys_enter(perf_syscall_enter, NULL);
639
	if (ret) {
640
		pr_info("event trace: Could not activate syscall entry trace point");
641
	} else {
642 643
		set_bit(num, enabled_perf_enter_syscalls);
		sys_perf_refcount_enter++;
644 645 646 647 648
	}
	mutex_unlock(&syscall_trace_lock);
	return ret;
}

649
static void perf_sysenter_disable(struct trace_event_call *call)
650 651 652
{
	int num;

653
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
654 655

	mutex_lock(&syscall_trace_lock);
656 657 658
	sys_perf_refcount_enter--;
	clear_bit(num, enabled_perf_enter_syscalls);
	if (!sys_perf_refcount_enter)
659
		unregister_trace_sys_enter(perf_syscall_enter, NULL);
660 661 662
	mutex_unlock(&syscall_trace_lock);
}

663 664 665 666 667 668 669 670 671 672 673 674 675 676
static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs,
			      struct syscall_trace_exit *rec) {
	struct syscall_tp_t {
		unsigned long long regs;
		unsigned long syscall_nr;
		unsigned long ret;
	} param;

	*(struct pt_regs **)&param = regs;
	param.syscall_nr = rec->nr;
	param.ret = rec->ret;
	return trace_call_bpf(prog, &param);
}

677
static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
678 679
{
	struct syscall_metadata *sys_data;
680
	struct syscall_trace_exit *rec;
681
	struct hlist_head *head;
682
	struct bpf_prog *prog;
683
	int syscall_nr;
684
	int rctx;
685
	int size;
686

687
	syscall_nr = trace_get_syscall_nr(current, regs);
688
	if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
689
		return;
690
	if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
691 692 693 694 695 696
		return;

	sys_data = syscall_nr_to_meta(syscall_nr);
	if (!sys_data)
		return;

697
	prog = READ_ONCE(sys_data->exit_event->prog);
698
	head = this_cpu_ptr(sys_data->exit_event->perf_events);
699
	if (!prog && hlist_empty(head))
700 701
		return;

702 703 704
	/* We can probably do that at build time */
	size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
	size -= sizeof(u32);
705

706
	rec = perf_trace_buf_alloc(size, NULL, &rctx);
707 708
	if (!rec)
		return;
709 710 711

	rec->nr = syscall_nr;
	rec->ret = syscall_get_return_value(current, regs);
712 713 714 715 716 717 718

	if ((prog && !perf_call_bpf_exit(prog, regs, rec)) ||
	    hlist_empty(head)) {
		perf_swevent_put_recursion_context(rctx);
		return;
	}

719
	perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
720
			      1, regs, head, NULL, NULL);
721 722
}

723
static int perf_sysexit_enable(struct trace_event_call *call)
724 725 726 727
{
	int ret = 0;
	int num;

728
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
729 730

	mutex_lock(&syscall_trace_lock);
731
	if (!sys_perf_refcount_exit)
732
		ret = register_trace_sys_exit(perf_syscall_exit, NULL);
733
	if (ret) {
734
		pr_info("event trace: Could not activate syscall exit trace point");
735
	} else {
736 737
		set_bit(num, enabled_perf_exit_syscalls);
		sys_perf_refcount_exit++;
738 739 740 741 742
	}
	mutex_unlock(&syscall_trace_lock);
	return ret;
}

743
static void perf_sysexit_disable(struct trace_event_call *call)
744 745 746
{
	int num;

747
	num = ((struct syscall_metadata *)call->data)->syscall_nr;
748 749

	mutex_lock(&syscall_trace_lock);
750 751 752
	sys_perf_refcount_exit--;
	clear_bit(num, enabled_perf_exit_syscalls);
	if (!sys_perf_refcount_exit)
753
		unregister_trace_sys_exit(perf_syscall_exit, NULL);
754 755 756
	mutex_unlock(&syscall_trace_lock);
}

757
#endif /* CONFIG_PERF_EVENTS */
758

759
static int syscall_enter_register(struct trace_event_call *event,
760
				 enum trace_reg type, void *data)
761
{
762
	struct trace_event_file *file = data;
763

764 765
	switch (type) {
	case TRACE_REG_REGISTER:
766
		return reg_event_syscall_enter(file, event);
767
	case TRACE_REG_UNREGISTER:
768
		unreg_event_syscall_enter(file, event);
769 770 771 772 773 774 775 776
		return 0;

#ifdef CONFIG_PERF_EVENTS
	case TRACE_REG_PERF_REGISTER:
		return perf_sysenter_enable(event);
	case TRACE_REG_PERF_UNREGISTER:
		perf_sysenter_disable(event);
		return 0;
777 778
	case TRACE_REG_PERF_OPEN:
	case TRACE_REG_PERF_CLOSE:
779 780
	case TRACE_REG_PERF_ADD:
	case TRACE_REG_PERF_DEL:
781
		return 0;
782 783 784 785 786
#endif
	}
	return 0;
}

787
static int syscall_exit_register(struct trace_event_call *event,
788
				 enum trace_reg type, void *data)
789
{
790
	struct trace_event_file *file = data;
791

792 793
	switch (type) {
	case TRACE_REG_REGISTER:
794
		return reg_event_syscall_exit(file, event);
795
	case TRACE_REG_UNREGISTER:
796
		unreg_event_syscall_exit(file, event);
797 798 799 800 801 802 803 804
		return 0;

#ifdef CONFIG_PERF_EVENTS
	case TRACE_REG_PERF_REGISTER:
		return perf_sysexit_enable(event);
	case TRACE_REG_PERF_UNREGISTER:
		perf_sysexit_disable(event);
		return 0;
805 806
	case TRACE_REG_PERF_OPEN:
	case TRACE_REG_PERF_CLOSE:
807 808
	case TRACE_REG_PERF_ADD:
	case TRACE_REG_PERF_DEL:
809
		return 0;
810 811 812 813
#endif
	}
	return 0;
}