cpu.c 55.3 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
10
#include <linux/sched/signal.h>
11
#include <linux/sched/hotplug.h>
12
#include <linux/sched/task.h>
13
#include <linux/sched/smt.h>
Linus Torvalds's avatar
Linus Torvalds committed
14 15
#include <linux/unistd.h>
#include <linux/cpu.h>
16 17
#include <linux/oom.h>
#include <linux/rcupdate.h>
18
#include <linux/export.h>
19
#include <linux/bug.h>
Linus Torvalds's avatar
Linus Torvalds committed
20 21
#include <linux/kthread.h>
#include <linux/stop_machine.h>
22
#include <linux/mutex.h>
23
#include <linux/gfp.h>
24
#include <linux/suspend.h>
25
#include <linux/lockdep.h>
26
#include <linux/tick.h>
27
#include <linux/irq.h>
28
#include <linux/nmi.h>
29
#include <linux/smpboot.h>
30
#include <linux/relay.h>
31
#include <linux/slab.h>
32
#include <linux/percpu-rwsem.h>
33

34
#include <trace/events/power.h>
35 36
#define CREATE_TRACE_POINTS
#include <trace/events/cpuhp.h>
Linus Torvalds's avatar
Linus Torvalds committed
37

38 39
#include "smpboot.h"

40 41 42 43
/**
 * cpuhp_cpu_state - Per cpu hotplug state storage
 * @state:	The current cpu state
 * @target:	The target state
44 45
 * @thread:	Pointer to the hotplug thread
 * @should_run:	Thread should execute
46
 * @rollback:	Perform a rollback
47 48 49
 * @single:	Single callback invocation
 * @bringup:	Single callback bringup or teardown selector
 * @cb_state:	The state for a single callback (install/uninstall)
50
 * @result:	Result of the operation
51 52
 * @done_up:	Signal completion to the issuer of the task for cpu-up
 * @done_down:	Signal completion to the issuer of the task for cpu-down
53 54 55 56
 */
struct cpuhp_cpu_state {
	enum cpuhp_state	state;
	enum cpuhp_state	target;
57
	enum cpuhp_state	fail;
58 59 60
#ifdef CONFIG_SMP
	struct task_struct	*thread;
	bool			should_run;
61
	bool			rollback;
62 63
	bool			single;
	bool			bringup;
64
	bool			booted_once;
65
	struct hlist_node	*node;
66
	struct hlist_node	*last;
67 68
	enum cpuhp_state	cb_state;
	int			result;
69 70
	struct completion	done_up;
	struct completion	done_down;
71
#endif
72 73
};

74 75 76
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
	.fail = CPUHP_INVALID,
};
77

78
#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
static struct lockdep_map cpuhp_state_up_map =
	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
static struct lockdep_map cpuhp_state_down_map =
	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);


static void inline cpuhp_lock_acquire(bool bringup)
{
	lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
}

static void inline cpuhp_lock_release(bool bringup)
{
	lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
}
#else

static void inline cpuhp_lock_acquire(bool bringup) { }
static void inline cpuhp_lock_release(bool bringup) { }

99 100
#endif

101 102 103 104 105 106 107
/**
 * cpuhp_step - Hotplug state machine step
 * @name:	Name of the step
 * @startup:	Startup function of the step
 * @teardown:	Teardown function of the step
 * @skip_onerr:	Do not invoke the functions on error rollback
 *		Will go away once the notifiers	are gone
108
 * @cant_stop:	Bringup/teardown can't be stopped at this step
109 110
 */
struct cpuhp_step {
111 112
	const char		*name;
	union {
113 114 115 116
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} startup;
117
	union {
118 119 120 121
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} teardown;
122 123 124 125
	struct hlist_head	list;
	bool			skip_onerr;
	bool			cant_stop;
	bool			multi_instance;
126 127
};

128
static DEFINE_MUTEX(cpuhp_state_mutex);
129
static struct cpuhp_step cpuhp_bp_states[];
130
static struct cpuhp_step cpuhp_ap_states[];
131

132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
static bool cpuhp_is_ap_state(enum cpuhp_state state)
{
	/*
	 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
	 * purposes as that state is handled explicitly in cpu_down.
	 */
	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
}

static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
{
	struct cpuhp_step *sp;

	sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
	return sp + state;
}

149 150 151
/**
 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
 * @cpu:	The cpu for which the callback should be invoked
152
 * @state:	The state to do callbacks for
153
 * @bringup:	True if the bringup callback should be invoked
154 155
 * @node:	For multi-instance, do a single entry callback for install/remove
 * @lastp:	For multi-instance rollback, remember how far we got
156
 *
157
 * Called from cpu hotplug and from the state register machinery.
158
 */
159
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
160 161
				 bool bringup, struct hlist_node *node,
				 struct hlist_node **lastp)
162 163
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
164
	struct cpuhp_step *step = cpuhp_get_step(state);
165 166 167 168
	int (*cbm)(unsigned int cpu, struct hlist_node *node);
	int (*cb)(unsigned int cpu);
	int ret, cnt;

169 170 171 172 173 174 175 176 177
	if (st->fail == state) {
		st->fail = CPUHP_INVALID;

		if (!(bringup ? step->startup.single : step->teardown.single))
			return 0;

		return -EAGAIN;
	}

178
	if (!step->multi_instance) {
179
		WARN_ON_ONCE(lastp && *lastp);
180
		cb = bringup ? step->startup.single : step->teardown.single;
181 182
		if (!cb)
			return 0;
183
		trace_cpuhp_enter(cpu, st->target, state, cb);
184
		ret = cb(cpu);
185
		trace_cpuhp_exit(cpu, st->state, state, ret);
186 187
		return ret;
	}
188
	cbm = bringup ? step->startup.multi : step->teardown.multi;
189 190 191 192 193
	if (!cbm)
		return 0;

	/* Single invocation for instance add/remove */
	if (node) {
194
		WARN_ON_ONCE(lastp && *lastp);
195 196 197 198 199 200 201 202 203
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		return ret;
	}

	/* State transition. Invoke on all instances */
	cnt = 0;
	hlist_for_each(node, &step->list) {
204 205 206
		if (lastp && node == *lastp)
			break;

207 208 209
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
210 211 212 213 214 215 216
		if (ret) {
			if (!lastp)
				goto err;

			*lastp = node;
			return ret;
		}
217 218
		cnt++;
	}
219 220
	if (lastp)
		*lastp = NULL;
221 222 223
	return 0;
err:
	/* Rollback the instances if one failed */
224
	cbm = !bringup ? step->startup.multi : step->teardown.multi;
225 226 227 228 229 230
	if (!cbm)
		return ret;

	hlist_for_each(node, &step->list) {
		if (!cnt--)
			break;
231 232 233 234 235 236 237 238

		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		/*
		 * Rollback must not fail,
		 */
		WARN_ON_ONCE(ret);
239 240 241 242
	}
	return ret;
}

243
#ifdef CONFIG_SMP
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
{
	struct completion *done = bringup ? &st->done_up : &st->done_down;
	wait_for_completion(done);
}

static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
{
	struct completion *done = bringup ? &st->done_up : &st->done_down;
	complete(done);
}

/*
 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
 */
static bool cpuhp_is_atomic_state(enum cpuhp_state state)
{
	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
}

264
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
265
static DEFINE_MUTEX(cpu_add_remove_lock);
266 267
bool cpuhp_tasks_frozen;
EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
Linus Torvalds's avatar
Linus Torvalds committed
268

269
/*
270 271
 * The following two APIs (cpu_maps_update_begin/done) must be used when
 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
272 273 274 275 276 277 278 279 280 281
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}
Linus Torvalds's avatar
Linus Torvalds committed
282

283 284
/*
 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
285 286 287 288
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

289 290
#ifdef CONFIG_HOTPLUG_CPU

291
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
292

293
void cpus_read_lock(void)
294
{
295
	percpu_down_read(&cpu_hotplug_lock);
296
}
297
EXPORT_SYMBOL_GPL(cpus_read_lock);
298

299
void cpus_read_unlock(void)
300
{
301
	percpu_up_read(&cpu_hotplug_lock);
302
}
303
EXPORT_SYMBOL_GPL(cpus_read_unlock);
304

305
void cpus_write_lock(void)
306
{
307
	percpu_down_write(&cpu_hotplug_lock);
308
}
309

310
void cpus_write_unlock(void)
311
{
312
	percpu_up_write(&cpu_hotplug_lock);
313 314
}

315
void lockdep_assert_cpus_held(void)
316
{
317 318 319 320 321 322 323 324 325
	/*
	 * We can't have hotplug operations before userspace starts running,
	 * and some init codepaths will knowingly not take the hotplug lock.
	 * This is all valid, so mute lockdep until it makes sense to report
	 * unheld locks.
	 */
	if (system_state < SYSTEM_RUNNING)
		return;

326
	percpu_rwsem_assert_held(&cpu_hotplug_lock);
327
}
328

329 330 331 332 333 334 335 336 337 338
/*
 * Wait for currently running CPU hotplug operations to complete (if any) and
 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 * hotplug path before performing hotplug operations. So acquiring that lock
 * guarantees mutual exclusion from any currently running hotplug operations.
 */
void cpu_hotplug_disable(void)
{
	cpu_maps_update_begin();
339
	cpu_hotplug_disabled++;
340 341
	cpu_maps_update_done();
}
342
EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
343

344 345 346 347 348 349 350
static void __cpu_hotplug_enable(void)
{
	if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
		return;
	cpu_hotplug_disabled--;
}

351 352 353
void cpu_hotplug_enable(void)
{
	cpu_maps_update_begin();
354
	__cpu_hotplug_enable();
355 356
	cpu_maps_update_done();
}
357
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
358
#endif	/* CONFIG_HOTPLUG_CPU */
359

360 361 362 363 364 365
/*
 * Architectures that need SMT-specific errata handling during SMT hotplug
 * should override this.
 */
void __weak arch_smt_update(void) { }

366 367
#ifdef CONFIG_HOTPLUG_SMT
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
368

369
void __init cpu_smt_disable(bool force)
370
{
371 372 373 374 375
	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
		cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
		return;

	if (force) {
376 377
		pr_info("SMT: Force disabled\n");
		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
378 379
	} else {
		cpu_smt_control = CPU_SMT_DISABLED;
380
	}
381 382
}

383 384
/*
 * The decision whether SMT is supported can only be done after the full
385
 * CPU identification. Called from architecture code.
386 387 388
 */
void __init cpu_smt_check_topology(void)
{
389
	if (!topology_smt_supported())
390 391 392
		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
}

393 394 395
static int __init smt_cmdline_disable(char *str)
{
	cpu_smt_disable(str && !strcmp(str, "force"));
396 397 398 399 400 401
	return 0;
}
early_param("nosmt", smt_cmdline_disable);

static inline bool cpu_smt_allowed(unsigned int cpu)
{
402
	if (cpu_smt_control == CPU_SMT_ENABLED)
403 404
		return true;

405
	if (topology_is_primary_thread(cpu))
406 407 408 409 410 411 412 413 414 415 416 417 418 419
		return true;

	/*
	 * On x86 it's required to boot all logical CPUs at least once so
	 * that the init code can get a chance to set CR4.MCE on each
	 * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
	 * core will shutdown the machine.
	 */
	return !per_cpu(cpuhp_state, cpu).booted_once;
}
#else
static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
#endif

420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
static inline enum cpuhp_state
cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
	enum cpuhp_state prev_state = st->state;

	st->rollback = false;
	st->last = NULL;

	st->target = target;
	st->single = false;
	st->bringup = st->state < target;

	return prev_state;
}

static inline void
cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
{
	st->rollback = true;

	/*
	 * If we have st->last we need to undo partial multi_instance of this
	 * state first. Otherwise start undo at the previous state.
	 */
	if (!st->last) {
		if (st->bringup)
			st->state--;
		else
			st->state++;
	}

	st->target = prev_state;
	st->bringup = !st->bringup;
}

/* Regular hotplug invocation of the AP hotplug thread */
static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
{
	if (!st->single && st->state == st->target)
		return;

	st->result = 0;
	/*
	 * Make sure the above stores are visible before should_run becomes
	 * true. Paired with the mb() above in cpuhp_thread_fun()
	 */
	smp_mb();
	st->should_run = true;
	wake_up_process(st->thread);
469
	wait_for_ap_thread(st, st->bringup);
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
}

static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
	enum cpuhp_state prev_state;
	int ret;

	prev_state = cpuhp_set_state(st, target);
	__cpuhp_kick_ap(st);
	if ((ret = st->result)) {
		cpuhp_reset_state(st, prev_state);
		__cpuhp_kick_ap(st);
	}

	return ret;
}
486

487 488 489 490
static int bringup_wait_for_ap(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

491
	/* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
492
	wait_for_ap_thread(st, true);
493 494
	if (WARN_ON_ONCE((!cpu_online(cpu))))
		return -ECANCELED;
495 496 497 498 499

	/* Unpark the stopper thread and the hotplug thread of the target cpu */
	stop_machine_unpark(cpu);
	kthread_unpark(st->thread);

500 501 502 503 504 505 506 507 508 509
	/*
	 * SMT soft disabling on X86 requires to bring the CPU out of the
	 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
	 * CPU marked itself as booted_once in cpu_notify_starting() so the
	 * cpu_smt_allowed() check will now return false if this is not the
	 * primary sibling.
	 */
	if (!cpu_smt_allowed(cpu))
		return -ECANCELED;

510 511 512 513
	if (st->target <= CPUHP_AP_ONLINE_IDLE)
		return 0;

	return cpuhp_kick_ap(st, st->target);
514 515
}

516 517 518 519 520
static int bringup_cpu(unsigned int cpu)
{
	struct task_struct *idle = idle_thread_get(cpu);
	int ret;

521 522 523 524 525 526 527
	/*
	 * Some architectures have to walk the irq descriptors to
	 * setup the vector space for the cpu which comes online.
	 * Prevent irq alloc/free across the bringup.
	 */
	irq_lock_sparse();

528 529
	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu, idle);
530
	irq_unlock_sparse();
531
	if (ret)
532
		return ret;
533
	return bringup_wait_for_ap(cpu);
534 535
}

536 537 538 539
/*
 * Hotplug state machine related functions
 */

540
static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
541 542
{
	for (st->state--; st->state > st->target; st->state--) {
543
		struct cpuhp_step *step = cpuhp_get_step(st->state);
544 545

		if (!step->skip_onerr)
546
			cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
547 548 549
	}
}

550 551 552 553 554 555 556 557 558 559 560 561 562 563
static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
{
	if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
		return true;
	/*
	 * When CPU hotplug is disabled, then taking the CPU down is not
	 * possible because takedown_cpu() and the architecture and
	 * subsystem specific mechanisms are not available. So the CPU
	 * which would be completely unplugged again needs to stay around
	 * in the current state.
	 */
	return st->state <= CPUHP_BRINGUP_CPU;
}

564
static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
565
			      enum cpuhp_state target)
566 567 568 569 570 571
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	while (st->state < target) {
		st->state++;
572
		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
573
		if (ret) {
574 575 576 577
			if (can_rollback_cpu(st)) {
				st->target = prev_state;
				undo_cpu_up(cpu, st);
			}
578 579 580 581 582 583
			break;
		}
	}
	return ret;
}

584 585 586 587 588 589 590
/*
 * The cpu hotplug threads manage the bringup and teardown of the cpus
 */
static void cpuhp_create(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

591 592
	init_completion(&st->done_up);
	init_completion(&st->done_down);
593 594 595 596 597 598 599 600 601 602 603 604
}

static int cpuhp_should_run(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	return st->should_run;
}

/*
 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
 * callbacks when a state gets [un]installed at runtime.
605 606 607 608 609 610 611 612 613 614
 *
 * Each invocation of this function by the smpboot thread does a single AP
 * state callback.
 *
 * It has 3 modes of operation:
 *  - single: runs st->cb_state
 *  - up:     runs ++st->state, while st->state < st->target
 *  - down:   runs st->state--, while st->state > st->target
 *
 * When complete or on error, should_run is cleared and the completion is fired.
615 616 617 618
 */
static void cpuhp_thread_fun(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
619 620
	bool bringup = st->bringup;
	enum cpuhp_state state;
621

622 623 624
	if (WARN_ON_ONCE(!st->should_run))
		return;

625
	/*
626 627
	 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
	 * that if we see ->should_run we also see the rest of the state.
628 629 630
	 */
	smp_mb();

631
	cpuhp_lock_acquire(bringup);
632

633
	if (st->single) {
634 635 636 637 638 639 640 641
		state = st->cb_state;
		st->should_run = false;
	} else {
		if (bringup) {
			st->state++;
			state = st->state;
			st->should_run = (st->state < st->target);
			WARN_ON_ONCE(st->state > st->target);
642
		} else {
643 644 645 646
			state = st->state;
			st->state--;
			st->should_run = (st->state > st->target);
			WARN_ON_ONCE(st->state < st->target);
647
		}
648 649 650 651 652 653 654 655 656 657 658 659 660 661
	}

	WARN_ON_ONCE(!cpuhp_is_ap_state(state));

	if (st->rollback) {
		struct cpuhp_step *step = cpuhp_get_step(state);
		if (step->skip_onerr)
			goto next;
	}

	if (cpuhp_is_atomic_state(state)) {
		local_irq_disable();
		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
		local_irq_enable();
662

663 664 665 666
		/*
		 * STARTING/DYING must not fail!
		 */
		WARN_ON_ONCE(st->result);
667
	} else {
668 669 670 671 672 673 674 675 676 677 678
		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
	}

	if (st->result) {
		/*
		 * If we fail on a rollback, we're up a creek without no
		 * paddle, no way forward, no way back. We loose, thanks for
		 * playing.
		 */
		WARN_ON_ONCE(st->rollback);
		st->should_run = false;
679
	}
680 681

next:
682
	cpuhp_lock_release(bringup);
683 684

	if (!st->should_run)
685
		complete_ap_thread(st, bringup);
686 687 688
}

/* Invoke a single callback on a remote cpu */
689
static int
690 691
cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
			 struct hlist_node *node)
692 693
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
694
	int ret;
695 696 697 698

	if (!cpu_online(cpu))
		return 0;

699 700 701 702 703
	cpuhp_lock_acquire(false);
	cpuhp_lock_release(false);

	cpuhp_lock_acquire(true);
	cpuhp_lock_release(true);
704

705 706 707 708 709
	/*
	 * If we are up and running, use the hotplug thread. For early calls
	 * we invoke the thread function directly.
	 */
	if (!st->thread)
710
		return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
711

712 713 714 715 716
	st->rollback = false;
	st->last = NULL;

	st->node = node;
	st->bringup = bringup;
717
	st->cb_state = state;
718 719
	st->single = true;

720
	__cpuhp_kick_ap(st);
721 722

	/*
723
	 * If we failed and did a partial, do a rollback.
724
	 */
725 726 727 728 729 730 731
	if ((ret = st->result) && st->last) {
		st->rollback = true;
		st->bringup = !bringup;

		__cpuhp_kick_ap(st);
	}

732 733 734 735 736
	/*
	 * Clean up the leftovers so the next hotplug operation wont use stale
	 * data.
	 */
	st->node = st->last = NULL;
737
	return ret;
738 739 740 741 742
}

static int cpuhp_kick_ap_work(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
743 744
	enum cpuhp_state prev_state = st->state;
	int ret;
745

746 747 748 749 750
	cpuhp_lock_acquire(false);
	cpuhp_lock_release(false);

	cpuhp_lock_acquire(true);
	cpuhp_lock_release(true);
751 752 753 754 755 756

	trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
	ret = cpuhp_kick_ap(st, st->target);
	trace_cpuhp_exit(cpu, st->state, prev_state, ret);

	return ret;
757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
}

static struct smp_hotplug_thread cpuhp_threads = {
	.store			= &cpuhp_state.thread,
	.create			= &cpuhp_create,
	.thread_should_run	= cpuhp_should_run,
	.thread_fn		= cpuhp_thread_fun,
	.thread_comm		= "cpuhp/%u",
	.selfparking		= true,
};

void __init cpuhp_threads_init(void)
{
	BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
	kthread_unpark(this_cpu_read(cpuhp_state.thread));
}

774
#ifdef CONFIG_HOTPLUG_CPU
775 776 777 778 779 780 781 782 783 784 785 786
/**
 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 * @cpu: a CPU id
 *
 * This function walks all processes, finds a valid mm struct for each one and
 * then clears a corresponding bit in mm's cpumask.  While this all sounds
 * trivial, there are various non-obvious corner cases, which this function
 * tries to solve in a safe manner.
 *
 * Also note that the function uses a somewhat relaxed locking scheme, so it may
 * be called only for an already offlined CPU.
 */
787 788 789 790 791 792 793 794 795 796 797
void clear_tasks_mm_cpumask(int cpu)
{
	struct task_struct *p;

	/*
	 * This function is called after the cpu is taken down and marked
	 * offline, so its not like new tasks will ever get this cpu set in
	 * their mm mask. -- Peter Zijlstra
	 * Thus, we may use rcu_read_lock() here, instead of grabbing
	 * full-fledged tasklist_lock.
	 */
798
	WARN_ON(cpu_online(cpu));
799 800 801 802
	rcu_read_lock();
	for_each_process(p) {
		struct task_struct *t;

803 804 805 806
		/*
		 * Main thread might exit, but other threads may still have
		 * a valid mm. Find one.
		 */
807 808 809 810 811 812 813 814 815
		t = find_lock_task_mm(p);
		if (!t)
			continue;
		cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
		task_unlock(t);
	}
	rcu_read_unlock();
}

Linus Torvalds's avatar
Linus Torvalds committed
816
/* Take this CPU down. */
817
static int take_cpu_down(void *_param)
Linus Torvalds's avatar
Linus Torvalds committed
818
{
819 820
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
821
	int err, cpu = smp_processor_id();
822
	int ret;
Linus Torvalds's avatar
Linus Torvalds committed
823 824 825 826

	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
Zwane Mwaikambo's avatar
Zwane Mwaikambo committed
827
		return err;
Linus Torvalds's avatar
Linus Torvalds committed
828

829 830 831 832 833 834
	/*
	 * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
	 * do this step again.
	 */
	WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
	st->state--;
835
	/* Invoke the former CPU_DYING callbacks */
836 837 838 839 840 841 842
	for (; st->state > target; st->state--) {
		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
		/*
		 * DYING must not fail!
		 */
		WARN_ON_ONCE(ret);
	}
843

844 845
	/* Give up timekeeping duties */
	tick_handover_do_timer();
846
	/* Park the stopper thread */
847
	stop_machine_park(cpu);
Zwane Mwaikambo's avatar
Zwane Mwaikambo committed
848
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
849 850
}

851
static int takedown_cpu(unsigned int cpu)
Linus Torvalds's avatar
Linus Torvalds committed
852
{
853
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
854
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
855

856
	/* Park the smpboot threads */
857 858
	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);

859
	/*
860 861
	 * Prevent irq alloc/free while the dying cpu reorganizes the
	 * interrupt affinities.
862
	 */
863
	irq_lock_sparse();
864

865 866 867
	/*
	 * So now all preempt/rcu users must observe !cpu_active().
	 */
868
	err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
869
	if (err) {
870
		/* CPU refused to die */
871
		irq_unlock_sparse();
872 873
		/* Unpark the hotplug thread so we can rollback there */
		kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
874
		return err;
875
	}
876
	BUG_ON(cpu_online(cpu));
Linus Torvalds's avatar
Linus Torvalds committed
877

878
	/*
879
	 * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
880 881
	 * runnable tasks from the cpu, there's only the idle task left now
	 * that the migration thread is done doing the stop_machine thing.
Peter Zijlstra's avatar
Peter Zijlstra committed
882 883
	 *
	 * Wait for the stop thread to go away.
884
	 */
885
	wait_for_ap_thread(st, false);
886
	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
Linus Torvalds's avatar
Linus Torvalds committed
887

888 889 890
	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
	irq_unlock_sparse();

891
	hotplug_cpu__broadcast_tick_pull(cpu);
Linus Torvalds's avatar
Linus Torvalds committed
892 893 894
	/* This actually kills the CPU. */
	__cpu_die(cpu);

895
	tick_cleanup_dead_cpu(cpu);
896
	rcutree_migrate_callbacks(cpu);
897 898
	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
899

900 901 902 903
static void cpuhp_complete_idle_dead(void *arg)
{
	struct cpuhp_cpu_state *st = arg;

904
	complete_ap_thread(st, false);
905 906
}

907 908 909 910 911
void cpuhp_report_idle_dead(void)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	BUG_ON(st->state != CPUHP_AP_OFFLINE);
912
	rcu_report_dead(smp_processor_id());
913 914 915 916 917 918 919
	st->state = CPUHP_AP_IDLE_DEAD;
	/*
	 * We cannot call complete after rcu_report_dead() so we delegate it
	 * to an online cpu.
	 */
	smp_call_function_single(cpumask_first(cpu_online_mask),
				 cpuhp_complete_idle_dead, st, 0);
920 921
}

922 923 924 925
static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
{
	for (st->state++; st->state < st->target; st->state++) {
		struct cpuhp_step *step = cpuhp_get_step(st->state);
926

927 928 929 930 931 932 933 934 935 936 937 938 939 940 941
		if (!step->skip_onerr)
			cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
	}
}

static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
				enum cpuhp_state target)
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	for (; st->state > target; st->state--) {
		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
		if (ret) {
			st->target = prev_state;
942 943
			if (st->state < prev_state)
				undo_cpu_down(cpu, st);
944 945 946 947 948
			break;
		}
	}
	return ret;
}
949

950
/* Requires cpu_add_remove_lock to be held */
951 952
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
			   enum cpuhp_state target)
953
{
954 955
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	int prev_state, ret = 0;
956 957 958 959

	if (num_online_cpus() == 1)
		return -EBUSY;

960
	if (!cpu_present(cpu))
961 962
		return -EINVAL;

963
	cpus_write_lock();
964 965 966

	cpuhp_tasks_frozen = tasks_frozen;

967
	prev_state = cpuhp_set_state(st, target);
968 969 970 971
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread.
	 */
972
	if (st->state > CPUHP_TEARDOWN_CPU) {
973
		st->target = max((int)target, CPUHP_TEARDOWN_CPU);
974 975 976 977 978 979 980 981 982 983 984 985
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;

		/*
		 * We might have stopped still in the range of the AP hotplug
		 * thread. Nothing to do anymore.
		 */
986
		if (st->state > CPUHP_TEARDOWN_CPU)
987
			goto out;
988 989

		st->target = target;
990 991
	}
	/*
992
	 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
993 994
	 * to do the further cleanups.
	 */
995
	ret = cpuhp_down_callbacks(cpu, st, target);
996
	if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
997 998
		cpuhp_reset_state(st, prev_state);
		__cpuhp_kick_ap(st);
999
	}
1000

1001
out:
1002
	cpus_write_unlock();
1003 1004 1005 1006 1007
	/*
	 * Do post unplug cleanup. This is still protected against
	 * concurrent CPU hotplug via cpu_add_remove_lock.
	 */
	lockup_detector_cleanup();
1008
	arch_smt_update();
1009
	return ret;
1010 1011
}

1012 1013 1014 1015 1016 1017 1018
static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
{
	if (cpu_hotplug_disabled)
		return -EBUSY;
	return _cpu_down(cpu, 0, target);
}

1019
static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
1020
{
1021
	int err;
1022

1023
	cpu_maps_update_begin();
1024
	err = cpu_down_maps_locked(cpu, target);
1025
	cpu_maps_update_done();
Linus Torvalds's avatar
Linus Torvalds committed
1026 1027
	return err;
}
1028

1029 1030 1031 1032
int cpu_down(unsigned int cpu)
{
	return do_cpu_down(cpu, CPUHP_OFFLINE);
}
1033
EXPORT_SYMBOL(cpu_down);
1034 1035 1036

#else
#define takedown_cpu		NULL
Linus Torvalds's avatar
Linus Torvalds committed
1037 1038
#endif /*CONFIG_HOTPLUG_CPU*/

1039
/**
1040
 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1041 1042 1043 1044 1045 1046 1047 1048 1049
 * @cpu: cpu that just started
 *
 * It must be called by the arch code on the new cpu, before the new cpu
 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 */
void notify_cpu_starting(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1050
	int ret;
1051

1052
	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
1053
	st->booted_once = true;
1054 1055
	while (st->state < target) {
		st->state++;
1056 1057 1058 1059 1060
		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
		/*
		 * STARTING must not fail!
		 */
		WARN_ON_ONCE(ret);
1061 1062 1063
	}
}

1064
/*
1065 1066 1067
 * Called from the idle task. Wake up the controlling task which brings the
 * stopper and the hotplug thread of the upcoming CPU up and then delegates
 * the rest of the online bringup to the hotplug thread.
1068
 */
1069
void cpuhp_online_idle(enum cpuhp_state state)
1070
{
1071 1072 1073 1074 1075 1076 1077
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	/* Happens for the boot cpu */
	if (state != CPUHP_AP_ONLINE_IDLE)
		return;

	st->state = CPUHP_AP_ONLINE_IDLE;
1078
	complete_ap_thread(st, true);
1079 1080
}

1081
/* Requires cpu_add_remove_lock to be held */
1082
static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
Linus Torvalds's avatar
Linus Torvalds committed
1083
{
1084
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1085
	struct task_struct *idle;
1086
	int ret = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1087

1088
	cpus_write_lock();
1089

1090
	if (!cpu_present(cpu)) {
1091 1092 1093 1094
		ret = -EINVAL;
		goto out;
	}

1095 1096 1097 1098 1099
	/*
	 * The caller of do_cpu_up might have raced with another
	 * caller. Ignore it for now.
	 */
	if (st->state >= target)
1100
		goto out;
1101 1102 1103 1104 1105 1106 1107 1108

	if (st->state == CPUHP_OFFLINE) {
		/* Let it fail before we try to bring the cpu up */
		idle = idle_thread_get(cpu);
		if (IS_ERR(idle)) {
			ret = PTR_ERR(idle);
			goto out;
		}
1109
	}
1110

1111 1112
	cpuhp_tasks_frozen = tasks_frozen;

1113
	cpuhp_set_state(st, target);
1114 1115 1116 1117
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread once more.
	 */
1118
	if (st->state > CPUHP_BRINGUP_CPU) {
1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;
	}

	/*
	 * Try to reach the target state. We max out on the BP at
1130
	 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1131 1132
	 * responsible for bringing it up to the target state.
	 */
1133
	target = min((int)target, CPUHP_BRINGUP_CPU);
1134
	ret = cpuhp_up_callbacks(cpu, st, target);
1135
out:
1136
	cpus_write_unlock();
1137
	arch_smt_update();
1138 1139 1140
	return ret;
}

1141
static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
1142 1143
{
	int err = 0;
1144

Rusty Russell's avatar
Rusty Russell committed
1145
	if (!cpu_possible(cpu)) {
1146 1147
		pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
		       cpu);
1148
#if defined(CONFIG_IA64)
1149
		pr_err("please check additional_cpus= boot parameter\n");
1150 1151 1152
#endif
		return -EINVAL;
	}
1153

1154 1155 1156
	err = try_online_node(cpu_to_node(cpu));
	if (err)
		return err;
1157

1158
	cpu_maps_update_begin();
1159 1160

	if (cpu_hotplug_disabled) {
1161
		err = -EBUSY;
1162 1163
		goto out;
	}
1164 1165 1166 1167
	if (!cpu_smt_allowed(cpu)) {
		err = -EPERM;
		goto out;
	}
1168

1169
	err = _cpu_up(cpu, 0, target);
1170
out:
1171
	cpu_maps_update_done();
1172 1173
	return err;
}
1174 1175 1176 1177 1178

int cpu_up(unsigned int cpu)
{
	return do_cpu_up(cpu, CPUHP_ONLINE);
}
Paul E. McKenney's avatar
Paul E. McKenney committed
1179
EXPORT_SYMBOL_GPL(cpu_up);
1180

1181
#ifdef CONFIG_PM_SLEEP_SMP
Rusty Russell's avatar
Rusty Russell committed
1182
static cpumask_var_t frozen_cpus;
1183

1184
int freeze_secondary_cpus(int primary)
1185
{
1186
	int cpu, error = 0;
1187