kprobes.c 62.8 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
/*
 *  Kernel Probes (KProbes)
 *  kernel/kprobes.c
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright (C) IBM Corporation, 2002, 2004
 *
 * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
 *		Probes initial implementation (includes suggestions from
 *		Rusty Russell).
 * 2004-Aug	Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
 *		hlists and exceptions notifier as suggested by Andi Kleen.
 * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
 *		interface to access function arguments.
 * 2004-Sep	Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
 *		exceptions notifier to be first on the priority list.
30 31 32
 * 2005-May	Hien Nguyen <hien@us.ibm.com>, Jim Keniston
 *		<jkenisto@us.ibm.com> and Prasanna S Panchamukhi
 *		<prasanna@in.ibm.com> added function-return probes.
Linus Torvalds's avatar
Linus Torvalds committed
33 34 35 36
 */
#include <linux/kprobes.h>
#include <linux/hash.h>
#include <linux/init.h>
Tim Schmielau's avatar
Tim Schmielau committed
37
#include <linux/slab.h>
38
#include <linux/stddef.h>
39
#include <linux/export.h>
40
#include <linux/moduleloader.h>
41
#include <linux/kallsyms.h>
42
#include <linux/freezer.h>
43 44
#include <linux/seq_file.h>
#include <linux/debugfs.h>
45
#include <linux/sysctl.h>
46
#include <linux/kdebug.h>
47
#include <linux/memory.h>
48
#include <linux/ftrace.h>
49
#include <linux/cpu.h>
50
#include <linux/jump_label.h>
51

52
#include <asm/sections.h>
Linus Torvalds's avatar
Linus Torvalds committed
53 54
#include <asm/cacheflush.h>
#include <asm/errno.h>
55
#include <linux/uaccess.h>
Linus Torvalds's avatar
Linus Torvalds committed
56 57 58 59

#define KPROBE_HASH_BITS 6
#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)

60

61
static int kprobes_initialized;
Linus Torvalds's avatar
Linus Torvalds committed
62
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
63
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
Linus Torvalds's avatar
Linus Torvalds committed
64

65
/* NOTE: change this value only with kprobe_mutex held */
66
static bool kprobes_all_disarmed;
67

68 69
/* This protects kprobe_table and optimizing_list */
static DEFINE_MUTEX(kprobe_mutex);
70
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
71
static struct {
72
	raw_spinlock_t lock ____cacheline_aligned_in_smp;
73 74
} kretprobe_table_locks[KPROBE_TABLE_SIZE];

75 76
kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
					unsigned int __unused)
77 78 79 80
{
	return ((kprobe_opcode_t *)(kallsyms_lookup_name(name)));
}

81
static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
82 83 84
{
	return &(kretprobe_table_locks[hash].lock);
}
Linus Torvalds's avatar
Linus Torvalds committed
85

86 87
/* Blacklist -- list of struct kprobe_blacklist_entry */
static LIST_HEAD(kprobe_blacklist);
88

89
#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
90 91 92 93 94 95 96
/*
 * kprobe->ainsn.insn points to the copy of the instruction to be
 * single-stepped. x86_64, POWER4 and above have no-exec support and
 * stepping on the instruction on a vmalloced/kmalloced/data page
 * is a recipe for disaster
 */
struct kprobe_insn_page {
97
	struct list_head list;
98
	kprobe_opcode_t *insns;		/* Page of instruction slots */
99
	struct kprobe_insn_cache *cache;
100
	int nused;
101
	int ngarbage;
102
	char slot_used[];
103 104
};

105 106 107 108 109 110 111 112 113
#define KPROBE_INSN_PAGE_SIZE(slots)			\
	(offsetof(struct kprobe_insn_page, slot_used) +	\
	 (sizeof(char) * (slots)))

static int slots_per_page(struct kprobe_insn_cache *c)
{
	return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
}

114 115 116 117 118 119
enum kprobe_slot_state {
	SLOT_CLEAN = 0,
	SLOT_DIRTY = 1,
	SLOT_USED = 2,
};

120 121 122 123 124
static void *alloc_insn_page(void)
{
	return module_alloc(PAGE_SIZE);
}

125
void __weak free_insn_page(void *page)
126
{
127
	module_memfree(page);
128 129
}

Heiko Carstens's avatar
Heiko Carstens committed
130 131
struct kprobe_insn_cache kprobe_insn_slots = {
	.mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
132 133
	.alloc = alloc_insn_page,
	.free = free_insn_page,
134 135 136 137
	.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
	.insn_size = MAX_INSN_SIZE,
	.nr_garbage = 0,
};
138
static int collect_garbage_slots(struct kprobe_insn_cache *c);
139

140
/**
141
 * __get_insn_slot() - Find a slot on an executable page for an instruction.
142 143
 * We allocate an executable page if there's no room on existing ones.
 */
144
kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
145 146
{
	struct kprobe_insn_page *kip;
Heiko Carstens's avatar
Heiko Carstens committed
147
	kprobe_opcode_t *slot = NULL;
148

149
	/* Since the slot array is not protected by rcu, we need a mutex */
Heiko Carstens's avatar
Heiko Carstens committed
150
	mutex_lock(&c->mutex);
151
 retry:
152 153
	rcu_read_lock();
	list_for_each_entry_rcu(kip, &c->pages, list) {
154
		if (kip->nused < slots_per_page(c)) {
155
			int i;
156
			for (i = 0; i < slots_per_page(c); i++) {
157 158
				if (kip->slot_used[i] == SLOT_CLEAN) {
					kip->slot_used[i] = SLOT_USED;
159
					kip->nused++;
Heiko Carstens's avatar
Heiko Carstens committed
160
					slot = kip->insns + (i * c->insn_size);
161
					rcu_read_unlock();
Heiko Carstens's avatar
Heiko Carstens committed
162
					goto out;
163 164
				}
			}
165 166 167
			/* kip->nused is broken. Fix it. */
			kip->nused = slots_per_page(c);
			WARN_ON(1);
168 169
		}
	}
170
	rcu_read_unlock();
171

172
	/* If there are any garbage slots, collect it and try again. */
173
	if (c->nr_garbage && collect_garbage_slots(c) == 0)
174
		goto retry;
175 176 177

	/* All out of space.  Need to allocate a new page. */
	kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
178
	if (!kip)
Heiko Carstens's avatar
Heiko Carstens committed
179
		goto out;
180 181 182 183 184 185

	/*
	 * Use module_alloc so this page is within +/- 2GB of where the
	 * kernel image and loaded module images reside. This is required
	 * so x86_64 can correctly handle the %rip-relative fixups.
	 */
186
	kip->insns = c->alloc();
187 188
	if (!kip->insns) {
		kfree(kip);
Heiko Carstens's avatar
Heiko Carstens committed
189
		goto out;
190
	}
191
	INIT_LIST_HEAD(&kip->list);
192
	memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
193
	kip->slot_used[0] = SLOT_USED;
194
	kip->nused = 1;
195
	kip->ngarbage = 0;
196
	kip->cache = c;
197
	list_add_rcu(&kip->list, &c->pages);
Heiko Carstens's avatar
Heiko Carstens committed
198 199 200 201
	slot = kip->insns;
out:
	mutex_unlock(&c->mutex);
	return slot;
202 203
}

204
/* Return 1 if all garbages are collected, otherwise 0. */
205
static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
206
{
207
	kip->slot_used[idx] = SLOT_CLEAN;
208 209 210 211 212 213 214 215
	kip->nused--;
	if (kip->nused == 0) {
		/*
		 * Page is no longer in use.  Free it unless
		 * it's the last one.  We keep the last one
		 * so as not to have to set it up again the
		 * next time somebody inserts a probe.
		 */
216
		if (!list_is_singular(&kip->list)) {
217 218
			list_del_rcu(&kip->list);
			synchronize_rcu();
219
			kip->cache->free(kip->insns);
220 221 222 223 224 225 226
			kfree(kip);
		}
		return 1;
	}
	return 0;
}

227
static int collect_garbage_slots(struct kprobe_insn_cache *c)
228
{
229
	struct kprobe_insn_page *kip, *next;
230

231 232
	/* Ensure no-one is interrupted on the garbages */
	synchronize_sched();
233

234
	list_for_each_entry_safe(kip, next, &c->pages, list) {
235 236 237 238
		int i;
		if (kip->ngarbage == 0)
			continue;
		kip->ngarbage = 0;	/* we will collect all garbages */
239
		for (i = 0; i < slots_per_page(c); i++) {
240
			if (kip->slot_used[i] == SLOT_DIRTY && collect_one_slot(kip, i))
241 242 243
				break;
		}
	}
244
	c->nr_garbage = 0;
245 246 247
	return 0;
}

248 249
void __free_insn_slot(struct kprobe_insn_cache *c,
		      kprobe_opcode_t *slot, int dirty)
250 251
{
	struct kprobe_insn_page *kip;
252
	long idx;
253

Heiko Carstens's avatar
Heiko Carstens committed
254
	mutex_lock(&c->mutex);
255 256 257 258 259
	rcu_read_lock();
	list_for_each_entry_rcu(kip, &c->pages, list) {
		idx = ((long)slot - (long)kip->insns) /
			(c->insn_size * sizeof(kprobe_opcode_t));
		if (idx >= 0 && idx < slots_per_page(c))
Heiko Carstens's avatar
Heiko Carstens committed
260
			goto out;
261
	}
262
	/* Could not find this slot. */
263
	WARN_ON(1);
264
	kip = NULL;
Heiko Carstens's avatar
Heiko Carstens committed
265
out:
266 267 268 269 270 271 272 273 274 275 276 277 278 279
	rcu_read_unlock();
	/* Mark and sweep: this may sleep */
	if (kip) {
		/* Check double free */
		WARN_ON(kip->slot_used[idx] != SLOT_USED);
		if (dirty) {
			kip->slot_used[idx] = SLOT_DIRTY;
			kip->ngarbage++;
			if (++c->nr_garbage > slots_per_page(c))
				collect_garbage_slots(c);
		} else {
			collect_one_slot(kip, idx);
		}
	}
Heiko Carstens's avatar
Heiko Carstens committed
280
	mutex_unlock(&c->mutex);
281
}
282

283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
/*
 * Check given address is on the page of kprobe instruction slots.
 * This will be used for checking whether the address on a stack
 * is on a text area or not.
 */
bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
{
	struct kprobe_insn_page *kip;
	bool ret = false;

	rcu_read_lock();
	list_for_each_entry_rcu(kip, &c->pages, list) {
		if (addr >= (unsigned long)kip->insns &&
		    addr < (unsigned long)kip->insns + PAGE_SIZE) {
			ret = true;
			break;
		}
	}
	rcu_read_unlock();

	return ret;
}

306 307
#ifdef CONFIG_OPTPROBES
/* For optimized_kprobe buffer */
Heiko Carstens's avatar
Heiko Carstens committed
308 309
struct kprobe_insn_cache kprobe_optinsn_slots = {
	.mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
310 311
	.alloc = alloc_insn_page,
	.free = free_insn_page,
312 313 314 315 316
	.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
	/* .insn_size is initialized later */
	.nr_garbage = 0,
};
#endif
317
#endif
318

319 320 321
/* We have preemption disabled.. so it is safe to use __ versions */
static inline void set_kprobe_instance(struct kprobe *kp)
{
Christoph Lameter's avatar
Christoph Lameter committed
322
	__this_cpu_write(kprobe_instance, kp);
323 324 325 326
}

static inline void reset_kprobe_instance(void)
{
Christoph Lameter's avatar
Christoph Lameter committed
327
	__this_cpu_write(kprobe_instance, NULL);
328 329
}

330 331
/*
 * This routine is called either:
332
 * 	- under the kprobe_mutex - during kprobe_[un]register()
333
 * 				OR
334
 * 	- with preemption disabled - from arch/xxx/kernel/kprobes.c
335
 */
336
struct kprobe *get_kprobe(void *addr)
Linus Torvalds's avatar
Linus Torvalds committed
337 338
{
	struct hlist_head *head;
339
	struct kprobe *p;
Linus Torvalds's avatar
Linus Torvalds committed
340 341

	head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
342
	hlist_for_each_entry_rcu(p, head, hlist) {
Linus Torvalds's avatar
Linus Torvalds committed
343 344 345
		if (p->addr == addr)
			return p;
	}
346

Linus Torvalds's avatar
Linus Torvalds committed
347 348
	return NULL;
}
349
NOKPROBE_SYMBOL(get_kprobe);
Linus Torvalds's avatar
Linus Torvalds committed
350

351
static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
352 353 354 355 356 357 358

/* Return true if the kprobe is an aggregator */
static inline int kprobe_aggrprobe(struct kprobe *p)
{
	return p->pre_handler == aggr_pre_handler;
}

359 360 361 362 363 364 365
/* Return true(!0) if the kprobe is unused */
static inline int kprobe_unused(struct kprobe *p)
{
	return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
	       list_empty(&p->list);
}

366 367 368
/*
 * Keep all fields in the kprobe consistent
 */
369
static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
370
{
371 372
	memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
	memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
373 374 375
}

#ifdef CONFIG_OPTPROBES
376 377 378
/* NOTE: change this value only with kprobe_mutex held */
static bool kprobes_allow_optimization;

379 380 381 382
/*
 * Call all pre_handler on the list, but ignores its return value.
 * This must be called from arch-dep optimized caller.
 */
383
void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
384 385 386 387 388 389 390 391 392 393 394
{
	struct kprobe *kp;

	list_for_each_entry_rcu(kp, &p->list, list) {
		if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
			set_kprobe_instance(kp);
			kp->pre_handler(kp, regs);
		}
		reset_kprobe_instance();
	}
}
395
NOKPROBE_SYMBOL(opt_pre_handler);
396

397
/* Free optimized instructions and optimized_kprobe */
398
static void free_aggr_kprobe(struct kprobe *p)
399 400 401 402 403 404 405 406 407
{
	struct optimized_kprobe *op;

	op = container_of(p, struct optimized_kprobe, kp);
	arch_remove_optimized_kprobe(op);
	arch_remove_kprobe(p);
	kfree(op);
}

408 409 410 411 412 413 414 415 416 417 418 419 420
/* Return true(!0) if the kprobe is ready for optimization. */
static inline int kprobe_optready(struct kprobe *p)
{
	struct optimized_kprobe *op;

	if (kprobe_aggrprobe(p)) {
		op = container_of(p, struct optimized_kprobe, kp);
		return arch_prepared_optinsn(&op->optinsn);
	}

	return 0;
}

421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
static inline int kprobe_disarmed(struct kprobe *p)
{
	struct optimized_kprobe *op;

	/* If kprobe is not aggr/opt probe, just return kprobe is disabled */
	if (!kprobe_aggrprobe(p))
		return kprobe_disabled(p);

	op = container_of(p, struct optimized_kprobe, kp);

	return kprobe_disabled(p) && list_empty(&op->list);
}

/* Return true(!0) if the probe is queued on (un)optimizing lists */
436
static int kprobe_queued(struct kprobe *p)
437 438 439 440 441 442 443 444 445 446 447
{
	struct optimized_kprobe *op;

	if (kprobe_aggrprobe(p)) {
		op = container_of(p, struct optimized_kprobe, kp);
		if (!list_empty(&op->list))
			return 1;
	}
	return 0;
}

448 449 450 451
/*
 * Return an optimized kprobe whose optimizing code replaces
 * instructions including addr (exclude breakpoint).
 */
452
static struct kprobe *get_optimized_kprobe(unsigned long addr)
453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
{
	int i;
	struct kprobe *p = NULL;
	struct optimized_kprobe *op;

	/* Don't check i == 0, since that is a breakpoint case. */
	for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH; i++)
		p = get_kprobe((void *)(addr - i));

	if (p && kprobe_optready(p)) {
		op = container_of(p, struct optimized_kprobe, kp);
		if (arch_within_optimized_kprobe(op, addr))
			return p;
	}

	return NULL;
}

/* Optimization staging list, protected by kprobe_mutex */
static LIST_HEAD(optimizing_list);
473
static LIST_HEAD(unoptimizing_list);
474
static LIST_HEAD(freeing_list);
475 476 477 478 479

static void kprobe_optimizer(struct work_struct *work);
static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
#define OPTIMIZE_DELAY 5

480 481 482 483
/*
 * Optimize (replace a breakpoint with a jump) kprobes listed on
 * optimizing_list.
 */
484
static void do_optimize_kprobes(void)
485 486 487 488 489 490 491 492
{
	/*
	 * The optimization/unoptimization refers online_cpus via
	 * stop_machine() and cpu-hotplug modifies online_cpus.
	 * And same time, text_mutex will be held in cpu-hotplug and here.
	 * This combination can cause a deadlock (cpu-hotplug try to lock
	 * text_mutex but stop_machine can not be done because online_cpus
	 * has been changed)
493
	 * To avoid this deadlock, caller must have locked cpu hotplug
494 495
	 * for preventing cpu-hotplug outside of text_mutex locking.
	 */
496 497 498 499 500 501 502
	lockdep_assert_cpus_held();

	/* Optimization never be done when disarmed */
	if (kprobes_all_disarmed || !kprobes_allow_optimization ||
	    list_empty(&optimizing_list))
		return;

503
	mutex_lock(&text_mutex);
504
	arch_optimize_kprobes(&optimizing_list);
505
	mutex_unlock(&text_mutex);
506 507
}

508 509 510 511
/*
 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
 * if need) kprobes listed on unoptimizing_list.
 */
512
static void do_unoptimize_kprobes(void)
513 514 515
{
	struct optimized_kprobe *op, *tmp;

516 517 518
	/* See comment in do_optimize_kprobes() */
	lockdep_assert_cpus_held();

519 520 521 522 523
	/* Unoptimization must be done anytime */
	if (list_empty(&unoptimizing_list))
		return;

	mutex_lock(&text_mutex);
524
	arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
525
	/* Loop free_list for disarming */
526
	list_for_each_entry_safe(op, tmp, &freeing_list, list) {
527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
		/* Disarm probes if marked disabled */
		if (kprobe_disabled(&op->kp))
			arch_disarm_kprobe(&op->kp);
		if (kprobe_unused(&op->kp)) {
			/*
			 * Remove unused probes from hash list. After waiting
			 * for synchronization, these probes are reclaimed.
			 * (reclaiming is done by do_free_cleaned_kprobes.)
			 */
			hlist_del_rcu(&op->kp.hlist);
		} else
			list_del_init(&op->list);
	}
	mutex_unlock(&text_mutex);
}

/* Reclaim all kprobes on the free_list */
544
static void do_free_cleaned_kprobes(void)
545 546 547
{
	struct optimized_kprobe *op, *tmp;

548
	list_for_each_entry_safe(op, tmp, &freeing_list, list) {
549 550 551 552 553 554 555
		BUG_ON(!kprobe_unused(&op->kp));
		list_del_init(&op->list);
		free_aggr_kprobe(&op->kp);
	}
}

/* Start optimizer after OPTIMIZE_DELAY passed */
556
static void kick_kprobe_optimizer(void)
557
{
558
	schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
559 560
}

561
/* Kprobe jump optimizer */
562
static void kprobe_optimizer(struct work_struct *work)
563
{
564
	mutex_lock(&kprobe_mutex);
565
	cpus_read_lock();
566 567 568 569
	/* Lock modules while optimizing kprobes */
	mutex_lock(&module_mutex);

	/*
570 571 572
	 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
	 * kprobes before waiting for quiesence period.
	 */
573
	do_unoptimize_kprobes();
574 575

	/*
576 577 578 579 580 581 582
	 * Step 2: Wait for quiesence period to ensure all potentially
	 * preempted tasks to have normally scheduled. Because optprobe
	 * may modify multiple instructions, there is a chance that Nth
	 * instruction is preempted. In that case, such tasks can return
	 * to 2nd-Nth byte of jump instruction. This wait is for avoiding it.
	 * Note that on non-preemptive kernel, this is transparently converted
	 * to synchronoze_sched() to wait for all interrupts to have completed.
583
	 */
584
	synchronize_rcu_tasks();
585

586
	/* Step 3: Optimize kprobes after quiesence period */
587
	do_optimize_kprobes();
588 589

	/* Step 4: Free cleaned kprobes after quiesence period */
590
	do_free_cleaned_kprobes();
591

592
	mutex_unlock(&module_mutex);
593
	cpus_read_unlock();
594
	mutex_unlock(&kprobe_mutex);
595

596
	/* Step 5: Kick optimizer again if needed */
597
	if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
598
		kick_kprobe_optimizer();
599 600 601
}

/* Wait for completing optimization and unoptimization */
602
void wait_for_kprobe_optimizer(void)
603
{
604 605 606 607 608 609 610 611 612 613 614 615 616 617
	mutex_lock(&kprobe_mutex);

	while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) {
		mutex_unlock(&kprobe_mutex);

		/* this will also make optimizing_work execute immmediately */
		flush_delayed_work(&optimizing_work);
		/* @optimizing_work might not have been queued yet, relax */
		cpu_relax();

		mutex_lock(&kprobe_mutex);
	}

	mutex_unlock(&kprobe_mutex);
618 619 620
}

/* Optimize kprobe if p is ready to be optimized */
621
static void optimize_kprobe(struct kprobe *p)
622 623 624 625
{
	struct optimized_kprobe *op;

	/* Check if the kprobe is disabled or not ready for optimization. */
626
	if (!kprobe_optready(p) || !kprobes_allow_optimization ||
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
	    (kprobe_disabled(p) || kprobes_all_disarmed))
		return;

	/* Both of break_handler and post_handler are not supported. */
	if (p->break_handler || p->post_handler)
		return;

	op = container_of(p, struct optimized_kprobe, kp);

	/* Check there is no other kprobes at the optimized instructions */
	if (arch_check_optimized_kprobe(op) < 0)
		return;

	/* Check if it is already optimized. */
	if (op->kp.flags & KPROBE_FLAG_OPTIMIZED)
		return;
	op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
644 645 646 647 648 649 650 651 652 653 654

	if (!list_empty(&op->list))
		/* This is under unoptimizing. Just dequeue the probe */
		list_del_init(&op->list);
	else {
		list_add(&op->list, &optimizing_list);
		kick_kprobe_optimizer();
	}
}

/* Short cut to direct unoptimizing */
655
static void force_unoptimize_kprobe(struct optimized_kprobe *op)
656
{
657
	lockdep_assert_cpus_held();
658 659 660
	arch_unoptimize_kprobe(op);
	if (kprobe_disabled(&op->kp))
		arch_disarm_kprobe(&op->kp);
661 662 663
}

/* Unoptimize a kprobe if p is optimized */
664
static void unoptimize_kprobe(struct kprobe *p, bool force)
665 666 667
{
	struct optimized_kprobe *op;

668 669 670 671 672 673 674 675 676 677 678 679
	if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
		return; /* This is not an optprobe nor optimized */

	op = container_of(p, struct optimized_kprobe, kp);
	if (!kprobe_optimized(p)) {
		/* Unoptimized or unoptimizing case */
		if (force && !list_empty(&op->list)) {
			/*
			 * Only if this is unoptimizing kprobe and forced,
			 * forcibly unoptimize it. (No need to unoptimize
			 * unoptimized kprobe again :)
			 */
680
			list_del_init(&op->list);
681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
			force_unoptimize_kprobe(op);
		}
		return;
	}

	op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
	if (!list_empty(&op->list)) {
		/* Dequeue from the optimization queue */
		list_del_init(&op->list);
		return;
	}
	/* Optimized kprobe case */
	if (force)
		/* Forcibly update the code: this is a special case */
		force_unoptimize_kprobe(op);
	else {
		list_add(&op->list, &unoptimizing_list);
		kick_kprobe_optimizer();
699 700 701
	}
}

702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
/* Cancel unoptimizing for reusing */
static void reuse_unused_kprobe(struct kprobe *ap)
{
	struct optimized_kprobe *op;

	BUG_ON(!kprobe_unused(ap));
	/*
	 * Unused kprobe MUST be on the way of delayed unoptimizing (means
	 * there is still a relative jump) and disabled.
	 */
	op = container_of(ap, struct optimized_kprobe, kp);
	if (unlikely(list_empty(&op->list)))
		printk(KERN_WARNING "Warning: found a stray unused "
			"aggrprobe@%p\n", ap->addr);
	/* Enable the probe again */
	ap->flags &= ~KPROBE_FLAG_DISABLED;
	/* Optimize it again (remove from op->list) */
	BUG_ON(!kprobe_optready(ap));
	optimize_kprobe(ap);
}

723
/* Remove optimized instructions */
724
static void kill_optimized_kprobe(struct kprobe *p)
725 726 727 728
{
	struct optimized_kprobe *op;

	op = container_of(p, struct optimized_kprobe, kp);
729 730
	if (!list_empty(&op->list))
		/* Dequeue from the (un)optimization queue */
731
		list_del_init(&op->list);
732
	op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
733 734 735 736 737 738 739 740 741 742 743 744

	if (kprobe_unused(p)) {
		/* Enqueue if it is unused */
		list_add(&op->list, &freeing_list);
		/*
		 * Remove unused probes from the hash list. After waiting
		 * for synchronization, this probe is reclaimed.
		 * (reclaiming is done by do_free_cleaned_kprobes().)
		 */
		hlist_del_rcu(&op->kp.hlist);
	}

745
	/* Don't touch the code, because it is already freed. */
746 747 748
	arch_remove_optimized_kprobe(op);
}

749 750 751 752 753 754 755
static inline
void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
{
	if (!kprobe_ftrace(p))
		arch_prepare_optimized_kprobe(op, p);
}

756
/* Try to prepare optimized instructions */
757
static void prepare_optimized_kprobe(struct kprobe *p)
758 759 760 761
{
	struct optimized_kprobe *op;

	op = container_of(p, struct optimized_kprobe, kp);
762
	__prepare_optimized_kprobe(op, p);
763 764 765
}

/* Allocate new optimized_kprobe and try to prepare optimized instructions */
766
static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
767 768 769 770 771 772 773 774 775
{
	struct optimized_kprobe *op;

	op = kzalloc(sizeof(struct optimized_kprobe), GFP_KERNEL);
	if (!op)
		return NULL;

	INIT_LIST_HEAD(&op->list);
	op->kp.addr = p->addr;
776
	__prepare_optimized_kprobe(op, p);
777 778 779 780

	return &op->kp;
}

781
static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
782 783 784 785 786

/*
 * Prepare an optimized_kprobe and optimize it
 * NOTE: p must be a normal registered kprobe
 */
787
static void try_to_optimize_kprobe(struct kprobe *p)
788 789 790 791
{
	struct kprobe *ap;
	struct optimized_kprobe *op;

792 793 794 795
	/* Impossible to optimize ftrace-based kprobe */
	if (kprobe_ftrace(p))
		return;

796
	/* For preparing optimization, jump_label_text_reserved() is called */
797
	cpus_read_lock();
798 799 800
	jump_label_lock();
	mutex_lock(&text_mutex);

801 802
	ap = alloc_aggr_kprobe(p);
	if (!ap)
803
		goto out;
804 805 806 807

	op = container_of(ap, struct optimized_kprobe, kp);
	if (!arch_prepared_optinsn(&op->optinsn)) {
		/* If failed to setup optimizing, fallback to kprobe */
808 809
		arch_remove_optimized_kprobe(op);
		kfree(op);
810
		goto out;
811 812 813
	}

	init_aggr_kprobe(ap, p);
814 815 816 817 818
	optimize_kprobe(ap);	/* This just kicks optimizer thread */

out:
	mutex_unlock(&text_mutex);
	jump_label_unlock();
819
	cpus_read_unlock();
820 821
}

822
#ifdef CONFIG_SYSCTL
823
static void optimize_all_kprobes(void)
824 825 826 827 828
{
	struct hlist_head *head;
	struct kprobe *p;
	unsigned int i;

829
	mutex_lock(&kprobe_mutex);
830 831
	/* If optimization is already allowed, just return */
	if (kprobes_allow_optimization)
832
		goto out;
833

834
	cpus_read_lock();
835 836 837
	kprobes_allow_optimization = true;
	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
		head = &kprobe_table[i];
838
		hlist_for_each_entry_rcu(p, head, hlist)
839 840 841
			if (!kprobe_disabled(p))
				optimize_kprobe(p);
	}
842
	cpus_read_unlock();
843
	printk(KERN_INFO "Kprobes globally optimized\n");
844 845
out:
	mutex_unlock(&kprobe_mutex);
846 847
}

848
static void unoptimize_all_kprobes(void)
849 850 851 852 853
{
	struct hlist_head *head;
	struct kprobe *p;
	unsigned int i;

854
	mutex_lock(&kprobe_mutex);
855
	/* If optimization is already prohibited, just return */
856 857
	if (!kprobes_allow_optimization) {
		mutex_unlock(&kprobe_mutex);
858
		return;
859
	}
860

861
	cpus_read_lock();
862 863 864
	kprobes_allow_optimization = false;
	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
		head = &kprobe_table[i];
865
		hlist_for_each_entry_rcu(p, head, hlist) {
866
			if (!kprobe_disabled(p))
867
				unoptimize_kprobe(p, false);
868 869
		}
	}
870
	cpus_read_unlock();
871 872
	mutex_unlock(&kprobe_mutex);

873 874 875
	/* Wait for unoptimizing completion */
	wait_for_kprobe_optimizer();
	printk(KERN_INFO "Kprobes globally unoptimized\n");
876 877
}

878
static DEFINE_MUTEX(kprobe_sysctl_mutex);
879 880 881 882 883 884 885
int sysctl_kprobes_optimization;
int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
				      void __user *buffer, size_t *length,
				      loff_t *ppos)
{
	int ret;

886
	mutex_lock(&kprobe_sysctl_mutex);
887 888 889 890 891 892 893
	sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);

	if (sysctl_kprobes_optimization)
		optimize_all_kprobes();
	else
		unoptimize_all_kprobes();
894
	mutex_unlock(&kprobe_sysctl_mutex);
895 896 897 898 899

	return ret;
}
#endif /* CONFIG_SYSCTL */

900
/* Put a breakpoint for a probe. Must be called with text_mutex locked */
901
static void __arm_kprobe(struct kprobe *p)
902
{
903
	struct kprobe *_p;
904 905

	/* Check collision with other optimized kprobes */
906 907
	_p = get_optimized_kprobe((unsigned long)p->addr);
	if (unlikely(_p))
908 909
		/* Fallback to unoptimized kprobe */
		unoptimize_kprobe(_p, true);
910 911 912 913 914

	arch_arm_kprobe(p);
	optimize_kprobe(p);	/* Try to optimize (add kprobe to a list) */
}

915
/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
916
static void __disarm_kprobe(struct kprobe *p, bool reopt)
917
{
918
	struct kprobe *_p;
919

920 921
	/* Try to unoptimize */
	unoptimize_kprobe(p, kprobes_all_disarmed);
922

923 924 925 926 927 928 929 930
	if (!kprobe_queued(p)) {
		arch_disarm_kprobe(p);
		/* If another kprobe was blocked, optimize it. */
		_p = get_optimized_kprobe((unsigned long)p->addr);
		if (unlikely(_p) && reopt)
			optimize_kprobe(_p);
	}
	/* TODO: reoptimize others after unoptimized this probe */
931 932 933 934 935
}

#else /* !CONFIG_OPTPROBES */

#define optimize_kprobe(p)			do {} while (0)
936
#define unoptimize_kprobe(p, f)			do {} while (0)
937 938 939 940
#define kill_optimized_kprobe(p)		do {} while (0)
#define prepare_optimized_kprobe(p)		do {} while (0)
#define try_to_optimize_kprobe(p)		do {} while (0)
#define __arm_kprobe(p)				arch_arm_kprobe(p)
941 942 943
#define __disarm_kprobe(p, o)			arch_disarm_kprobe(p)
#define kprobe_disarmed(p)			kprobe_disabled(p)
#define wait_for_kprobe_optimizer()		do {} while (0)
944

945 946 947 948 949 950 951
/* There should be no unused kprobes can be reused without optimization */
static void reuse_unused_kprobe(struct kprobe *ap)
{
	printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
	BUG_ON(kprobe_unused(ap));
}

952
static void free_aggr_kprobe(struct kprobe *p)
953
{
954
	arch_remove_kprobe(p);
955 956 957
	kfree(p);
}

958
static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
959 960 961 962 963
{
	return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
}
#endif /* CONFIG_OPTPROBES */

964
#ifdef CONFIG_KPROBES_ON_FTRACE
965
static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
966
	.func = kprobe_ftrace_handler,
967
	.flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
968 969 970 971
};
static int kprobe_ftrace_enabled;

/* Must ensure p->addr is really on ftrace */
972
static int prepare_kprobe(struct kprobe *p)
973 974 975 976 977 978 979 980
{
	if (!kprobe_ftrace(p))
		return arch_prepare_kprobe(p);

	return arch_prepare_kprobe_ftrace(p);
}

/* Caller must lock kprobe_mutex */
981
static void arm_kprobe_ftrace(struct kprobe *p)
982 983 984 985 986 987 988 989 990 991 992 993 994 995
{
	int ret;

	ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
				   (unsigned long)p->addr, 0, 0);
	WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret);
	kprobe_ftrace_enabled++;
	if (kprobe_ftrace_enabled == 1) {
		ret = register_ftrace_function(&kprobe_ftrace_ops);
		WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
	}
}

/* Caller must lock kprobe_mutex */
996
static void disarm_kprobe_ftrace(struct kprobe *p)
997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
{
	int ret;

	kprobe_ftrace_enabled--;
	if (kprobe_ftrace_enabled == 0) {
		ret = unregister_ftrace_function(&kprobe_ftrace_ops);
		WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret);
	}
	ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
			   (unsigned long)p->addr, 1, 0);
	WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
}
1009
#else	/* !CONFIG_KPROBES_ON_FTRACE */
1010 1011 1012 1013 1014
#define prepare_kprobe(p)	arch_prepare_kprobe(p)
#define arm_kprobe_ftrace(p)	do {} while (0)
#define disarm_kprobe_ftrace(p)	do {} while (0)
#endif

1015
/* Arm a kprobe with text_mutex */
1016
static void arm_kprobe(struct kprobe *kp)
1017
{
1018 1019 1020 1021
	if (unlikely(kprobe_ftrace(kp))) {
		arm_kprobe_ftrace(kp);
		return;
	}
1022
	cpus_read_lock();
1023
	mutex_lock(&text_mutex);
1024
	__arm_kprobe(kp);
1025
	mutex_unlock(&text_mutex);
1026
	cpus_read_unlock();
1027 1028 1029
}

/* Disarm a kprobe with text_mutex */
1030
static void disarm_kprobe(struct kprobe *kp, bool reopt)
1031
{
1032 1033 1034 1035
	if (unlikely(kprobe_ftrace(kp))) {
		disarm_kprobe_ftrace(kp);
		return;
	}
1036 1037

	cpus_read_lock();
1038
	mutex_lock(&text_mutex);
1039
	__disarm_kprobe(kp, reopt);
1040
	mutex_unlock(&text_mutex);
1041
	cpus_read_unlock();
1042 1043
}

1044 1045 1046 1047
/*
 * Aggregate handlers for multiple kprobes support - these handlers
 * take care of invoking the individual kprobe handlers on p->list
 */
1048
static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
1049 1050 1051
{
	struct kprobe *kp;

1052
	list_for_each_entry_rcu(kp, &p->list, list) {
1053
		if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
1054
			set_kprobe_instance(kp);
1055 1056
			if (kp->pre_handler(kp, regs))
				return 1;
1057
		}
1058
		reset_kprobe_instance();
1059 1060 1061
	}
	return 0;
}
1062
NOKPROBE_SYMBOL(aggr_pre_handler);
1063

1064 1065
static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
			      unsigned long flags)
1066 1067 1068
{
	struct kprobe *kp;

1069
	list_for_each_entry_rcu(kp, &p->list, list) {
1070
		if (kp->post_handler && likely(!kprobe_disabled(kp))) {
1071
			set_kprobe_instance(kp);
1072
			kp->post_handler(kp, regs, flags);
1073
			reset_kprobe_instance();
1074 1075 1076
		}
	}
}
1077
NOKPROBE_SYMBOL(aggr_post_handler);
1078

1079 1080
static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
			      int trapnr)
1081
{
Christoph Lameter's avatar
Christoph Lameter committed
1082
	struct kprobe *cur = __this_cpu_read(kprobe_instance);
1083

1084 1085 1086 1087
	/*
	 * if we faulted "during" the execution of a user specified
	 * probe handler, invoke just that probe's fault handler
	 */
1088 1089
	if (cur && cur->fault_handler) {
		if (cur->fault_handler(cur, regs, trapnr))
1090 1091 1092 1093
			return 1;
	}
	return 0;
}
1094
NOKPROBE_SYMBOL(aggr_fault_handler);
1095

1096
static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
1097
{
Christoph Lameter's avatar
Christoph Lameter committed
1098
	struct kprobe *cur = __this_cpu_read(kprobe_instance);
1099 1100 1101 1102 1103
	int ret = 0;

	if (cur && cur->break_handler) {
		if (cur->break_handler(cur, regs))
			ret = 1;
1104
	}
1105 1106
	reset_kprobe_instance();
	return ret;
1107
}
1108
NOKPROBE_SYMBOL(aggr_break_handler);
1109

1110
/* Walks the list and increments nmissed count for multiprobe case */
1111
void kprobes_inc_nmissed_count(struct kprobe *p)
1112 1113
{
	struct kprobe *kp;
1114
	if (!kprobe_aggrprobe(p)) {
1115 1116 1117 1118 1119 1120 1121
		p->nmissed++;
	} else {
		list_for_each_entry_rcu(kp, &p->list, list)
			kp->nmissed++;
	}
	return;
}
1122
NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
1123

1124 1125
void recycle_rp_inst(struct kretprobe_instance *ri,
		     struct hlist_head *head)
1126
{
1127 1128
	struct kretprobe *rp = ri->rp;

1129 1130
	/* remove rp inst off the rprobe_inst_table */
	hlist_del(&ri->hlist);