cpu.c 54.4 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3 4 5 6 7 8 9
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
10
#include <linux/sched/signal.h>
11
#include <linux/sched/hotplug.h>
12
#include <linux/sched/task.h>
Linus Torvalds's avatar
Linus Torvalds committed
13 14
#include <linux/unistd.h>
#include <linux/cpu.h>
15 16
#include <linux/oom.h>
#include <linux/rcupdate.h>
17
#include <linux/export.h>
18
#include <linux/bug.h>
Linus Torvalds's avatar
Linus Torvalds committed
19 20
#include <linux/kthread.h>
#include <linux/stop_machine.h>
21
#include <linux/mutex.h>
22
#include <linux/gfp.h>
23
#include <linux/suspend.h>
24
#include <linux/lockdep.h>
25
#include <linux/tick.h>
26
#include <linux/irq.h>
27
#include <linux/nmi.h>
28
#include <linux/smpboot.h>
29
#include <linux/relay.h>
30
#include <linux/slab.h>
31
#include <linux/percpu-rwsem.h>
32

33
#include <trace/events/power.h>
34 35
#define CREATE_TRACE_POINTS
#include <trace/events/cpuhp.h>
Linus Torvalds's avatar
Linus Torvalds committed
36

37 38
#include "smpboot.h"

39 40 41 42
/**
 * cpuhp_cpu_state - Per cpu hotplug state storage
 * @state:	The current cpu state
 * @target:	The target state
43 44
 * @thread:	Pointer to the hotplug thread
 * @should_run:	Thread should execute
45
 * @rollback:	Perform a rollback
46 47 48
 * @single:	Single callback invocation
 * @bringup:	Single callback bringup or teardown selector
 * @cb_state:	The state for a single callback (install/uninstall)
49
 * @result:	Result of the operation
50 51
 * @done_up:	Signal completion to the issuer of the task for cpu-up
 * @done_down:	Signal completion to the issuer of the task for cpu-down
52 53 54 55
 */
struct cpuhp_cpu_state {
	enum cpuhp_state	state;
	enum cpuhp_state	target;
56
	enum cpuhp_state	fail;
57 58 59
#ifdef CONFIG_SMP
	struct task_struct	*thread;
	bool			should_run;
60
	bool			rollback;
61 62
	bool			single;
	bool			bringup;
63
	bool			booted_once;
64
	struct hlist_node	*node;
65
	struct hlist_node	*last;
66 67
	enum cpuhp_state	cb_state;
	int			result;
68 69
	struct completion	done_up;
	struct completion	done_down;
70
#endif
71 72
};

73 74 75
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
	.fail = CPUHP_INVALID,
};
76

77
#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
78 79 80 81 82 83
static struct lockdep_map cpuhp_state_up_map =
	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
static struct lockdep_map cpuhp_state_down_map =
	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);


84
static inline void cpuhp_lock_acquire(bool bringup)
85 86 87 88
{
	lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
}

89
static inline void cpuhp_lock_release(bool bringup)
90 91 92 93 94
{
	lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
}
#else

95 96
static inline void cpuhp_lock_acquire(bool bringup) { }
static inline void cpuhp_lock_release(bool bringup) { }
97

98 99
#endif

100 101 102 103 104 105 106
/**
 * cpuhp_step - Hotplug state machine step
 * @name:	Name of the step
 * @startup:	Startup function of the step
 * @teardown:	Teardown function of the step
 * @skip_onerr:	Do not invoke the functions on error rollback
 *		Will go away once the notifiers	are gone
107
 * @cant_stop:	Bringup/teardown can't be stopped at this step
108 109
 */
struct cpuhp_step {
110 111
	const char		*name;
	union {
112 113 114 115
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} startup;
116
	union {
117 118 119 120
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} teardown;
121 122 123 124
	struct hlist_head	list;
	bool			skip_onerr;
	bool			cant_stop;
	bool			multi_instance;
125 126
};

127
static DEFINE_MUTEX(cpuhp_state_mutex);
128
static struct cpuhp_step cpuhp_hp_states[];
129

130 131
static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
{
132
	return cpuhp_hp_states + state;
133 134
}

135 136 137
/**
 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
 * @cpu:	The cpu for which the callback should be invoked
138
 * @state:	The state to do callbacks for
139
 * @bringup:	True if the bringup callback should be invoked
140 141
 * @node:	For multi-instance, do a single entry callback for install/remove
 * @lastp:	For multi-instance rollback, remember how far we got
142
 *
143
 * Called from cpu hotplug and from the state register machinery.
144
 */
145
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
146 147
				 bool bringup, struct hlist_node *node,
				 struct hlist_node **lastp)
148 149
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
150
	struct cpuhp_step *step = cpuhp_get_step(state);
151 152 153 154
	int (*cbm)(unsigned int cpu, struct hlist_node *node);
	int (*cb)(unsigned int cpu);
	int ret, cnt;

155 156 157 158 159 160 161 162 163
	if (st->fail == state) {
		st->fail = CPUHP_INVALID;

		if (!(bringup ? step->startup.single : step->teardown.single))
			return 0;

		return -EAGAIN;
	}

164
	if (!step->multi_instance) {
165
		WARN_ON_ONCE(lastp && *lastp);
166
		cb = bringup ? step->startup.single : step->teardown.single;
167 168
		if (!cb)
			return 0;
169
		trace_cpuhp_enter(cpu, st->target, state, cb);
170
		ret = cb(cpu);
171
		trace_cpuhp_exit(cpu, st->state, state, ret);
172 173
		return ret;
	}
174
	cbm = bringup ? step->startup.multi : step->teardown.multi;
175 176 177 178 179
	if (!cbm)
		return 0;

	/* Single invocation for instance add/remove */
	if (node) {
180
		WARN_ON_ONCE(lastp && *lastp);
181 182 183 184 185 186 187 188 189
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		return ret;
	}

	/* State transition. Invoke on all instances */
	cnt = 0;
	hlist_for_each(node, &step->list) {
190 191 192
		if (lastp && node == *lastp)
			break;

193 194 195
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
196 197 198 199 200 201 202
		if (ret) {
			if (!lastp)
				goto err;

			*lastp = node;
			return ret;
		}
203 204
		cnt++;
	}
205 206
	if (lastp)
		*lastp = NULL;
207 208 209
	return 0;
err:
	/* Rollback the instances if one failed */
210
	cbm = !bringup ? step->startup.multi : step->teardown.multi;
211 212 213 214 215 216
	if (!cbm)
		return ret;

	hlist_for_each(node, &step->list) {
		if (!cnt--)
			break;
217 218 219 220 221 222 223 224

		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		/*
		 * Rollback must not fail,
		 */
		WARN_ON_ONCE(ret);
225 226 227 228
	}
	return ret;
}

229
#ifdef CONFIG_SMP
230 231 232 233 234 235 236 237 238
static bool cpuhp_is_ap_state(enum cpuhp_state state)
{
	/*
	 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
	 * purposes as that state is handled explicitly in cpu_down.
	 */
	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
}

239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
{
	struct completion *done = bringup ? &st->done_up : &st->done_down;
	wait_for_completion(done);
}

static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
{
	struct completion *done = bringup ? &st->done_up : &st->done_down;
	complete(done);
}

/*
 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
 */
static bool cpuhp_is_atomic_state(enum cpuhp_state state)
{
	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
}

259
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
260
static DEFINE_MUTEX(cpu_add_remove_lock);
261 262
bool cpuhp_tasks_frozen;
EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
Linus Torvalds's avatar
Linus Torvalds committed
263

264
/*
265 266
 * The following two APIs (cpu_maps_update_begin/done) must be used when
 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
267 268 269 270 271 272 273 274 275 276
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}
Linus Torvalds's avatar
Linus Torvalds committed
277

278 279
/*
 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
280 281 282 283
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

284 285
#ifdef CONFIG_HOTPLUG_CPU

286
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
287

288
void cpus_read_lock(void)
289
{
290
	percpu_down_read(&cpu_hotplug_lock);
291
}
292
EXPORT_SYMBOL_GPL(cpus_read_lock);
293

294
void cpus_read_unlock(void)
295
{
296
	percpu_up_read(&cpu_hotplug_lock);
297
}
298
EXPORT_SYMBOL_GPL(cpus_read_unlock);
299

300
void cpus_write_lock(void)
301
{
302
	percpu_down_write(&cpu_hotplug_lock);
303
}
304

305
void cpus_write_unlock(void)
306
{
307
	percpu_up_write(&cpu_hotplug_lock);
308 309
}

310
void lockdep_assert_cpus_held(void)
311
{
312
	percpu_rwsem_assert_held(&cpu_hotplug_lock);
313
}
314

315 316 317 318 319 320 321 322 323 324
/*
 * Wait for currently running CPU hotplug operations to complete (if any) and
 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 * hotplug path before performing hotplug operations. So acquiring that lock
 * guarantees mutual exclusion from any currently running hotplug operations.
 */
void cpu_hotplug_disable(void)
{
	cpu_maps_update_begin();
325
	cpu_hotplug_disabled++;
326 327
	cpu_maps_update_done();
}
328
EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
329

330 331 332 333 334 335 336
static void __cpu_hotplug_enable(void)
{
	if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
		return;
	cpu_hotplug_disabled--;
}

337 338 339
void cpu_hotplug_enable(void)
{
	cpu_maps_update_begin();
340
	__cpu_hotplug_enable();
341 342
	cpu_maps_update_done();
}
343
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
344
#endif	/* CONFIG_HOTPLUG_CPU */
345

346 347
#ifdef CONFIG_HOTPLUG_SMT
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
348
EXPORT_SYMBOL_GPL(cpu_smt_control);
349

350 351
static bool cpu_smt_available __read_mostly;

352
void __init cpu_smt_disable(bool force)
353
{
354 355 356 357 358
	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
		cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
		return;

	if (force) {
359 360
		pr_info("SMT: Force disabled\n");
		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
361 362
	} else {
		cpu_smt_control = CPU_SMT_DISABLED;
363
	}
364 365
}

366 367
/*
 * The decision whether SMT is supported can only be done after the full
368 369
 * CPU identification. Called from architecture code before non boot CPUs
 * are brought up.
370
 */
371
void __init cpu_smt_check_topology_early(void)
372 373 374 375 376
{
	if (!topology_smt_supported())
		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
}

377 378 379 380 381 382 383 384 385 386 387 388 389
/*
 * If SMT was disabled by BIOS, detect it here, after the CPUs have been
 * brought online. This ensures the smt/l1tf sysfs entries are consistent
 * with reality. cpu_smt_available is set to true during the bringup of non
 * boot CPUs when a SMT sibling is detected. Note, this may overwrite
 * cpu_smt_control's previous setting.
 */
void __init cpu_smt_check_topology(void)
{
	if (!cpu_smt_available)
		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
}

390 391 392
static int __init smt_cmdline_disable(char *str)
{
	cpu_smt_disable(str && !strcmp(str, "force"));
393 394 395 396 397 398
	return 0;
}
early_param("nosmt", smt_cmdline_disable);

static inline bool cpu_smt_allowed(unsigned int cpu)
{
399
	if (topology_is_primary_thread(cpu))
400 401
		return true;

402 403 404 405 406 407 408 409 410
	/*
	 * If the CPU is not a 'primary' thread and the booted_once bit is
	 * set then the processor has SMT support. Store this information
	 * for the late check of SMT support in cpu_smt_check_topology().
	 */
	if (per_cpu(cpuhp_state, cpu).booted_once)
		cpu_smt_available = true;

	if (cpu_smt_control == CPU_SMT_ENABLED)
411 412 413 414 415 416 417 418 419 420 421 422 423 424
		return true;

	/*
	 * On x86 it's required to boot all logical CPUs at least once so
	 * that the init code can get a chance to set CR4.MCE on each
	 * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
	 * core will shutdown the machine.
	 */
	return !per_cpu(cpuhp_state, cpu).booted_once;
}
#else
static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
#endif

425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
static inline enum cpuhp_state
cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
	enum cpuhp_state prev_state = st->state;

	st->rollback = false;
	st->last = NULL;

	st->target = target;
	st->single = false;
	st->bringup = st->state < target;

	return prev_state;
}

static inline void
cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
{
	st->rollback = true;

	/*
	 * If we have st->last we need to undo partial multi_instance of this
	 * state first. Otherwise start undo at the previous state.
	 */
	if (!st->last) {
		if (st->bringup)
			st->state--;
		else
			st->state++;
	}

	st->target = prev_state;
	st->bringup = !st->bringup;
}

/* Regular hotplug invocation of the AP hotplug thread */
static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
{
	if (!st->single && st->state == st->target)
		return;

	st->result = 0;
	/*
	 * Make sure the above stores are visible before should_run becomes
	 * true. Paired with the mb() above in cpuhp_thread_fun()
	 */
	smp_mb();
	st->should_run = true;
	wake_up_process(st->thread);
474
	wait_for_ap_thread(st, st->bringup);
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490
}

static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
	enum cpuhp_state prev_state;
	int ret;

	prev_state = cpuhp_set_state(st, target);
	__cpuhp_kick_ap(st);
	if ((ret = st->result)) {
		cpuhp_reset_state(st, prev_state);
		__cpuhp_kick_ap(st);
	}

	return ret;
}
491

492 493 494 495
static int bringup_wait_for_ap(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

496
	/* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
497
	wait_for_ap_thread(st, true);
498 499
	if (WARN_ON_ONCE((!cpu_online(cpu))))
		return -ECANCELED;
500 501 502 503 504

	/* Unpark the stopper thread and the hotplug thread of the target cpu */
	stop_machine_unpark(cpu);
	kthread_unpark(st->thread);

505 506 507 508 509 510 511 512 513 514
	/*
	 * SMT soft disabling on X86 requires to bring the CPU out of the
	 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
	 * CPU marked itself as booted_once in cpu_notify_starting() so the
	 * cpu_smt_allowed() check will now return false if this is not the
	 * primary sibling.
	 */
	if (!cpu_smt_allowed(cpu))
		return -ECANCELED;

515 516 517 518
	if (st->target <= CPUHP_AP_ONLINE_IDLE)
		return 0;

	return cpuhp_kick_ap(st, st->target);
519 520
}

521 522 523 524 525
static int bringup_cpu(unsigned int cpu)
{
	struct task_struct *idle = idle_thread_get(cpu);
	int ret;

526 527 528 529 530 531 532
	/*
	 * Some architectures have to walk the irq descriptors to
	 * setup the vector space for the cpu which comes online.
	 * Prevent irq alloc/free across the bringup.
	 */
	irq_lock_sparse();

533 534
	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu, idle);
535
	irq_unlock_sparse();
536
	if (ret)
537
		return ret;
538
	return bringup_wait_for_ap(cpu);
539 540
}

541 542 543 544
/*
 * Hotplug state machine related functions
 */

545
static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
546 547
{
	for (st->state--; st->state > st->target; st->state--) {
548
		struct cpuhp_step *step = cpuhp_get_step(st->state);
549 550

		if (!step->skip_onerr)
551
			cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
552 553 554 555
	}
}

static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
556
			      enum cpuhp_state target)
557 558 559 560 561 562
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	while (st->state < target) {
		st->state++;
563
		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
564 565
		if (ret) {
			st->target = prev_state;
566
			undo_cpu_up(cpu, st);
567 568 569 570 571 572
			break;
		}
	}
	return ret;
}

573 574 575 576 577 578 579
/*
 * The cpu hotplug threads manage the bringup and teardown of the cpus
 */
static void cpuhp_create(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

580 581
	init_completion(&st->done_up);
	init_completion(&st->done_down);
582 583 584 585 586 587 588 589 590 591 592 593
}

static int cpuhp_should_run(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	return st->should_run;
}

/*
 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
 * callbacks when a state gets [un]installed at runtime.
594 595 596 597 598 599 600 601 602 603
 *
 * Each invocation of this function by the smpboot thread does a single AP
 * state callback.
 *
 * It has 3 modes of operation:
 *  - single: runs st->cb_state
 *  - up:     runs ++st->state, while st->state < st->target
 *  - down:   runs st->state--, while st->state > st->target
 *
 * When complete or on error, should_run is cleared and the completion is fired.
604 605 606 607
 */
static void cpuhp_thread_fun(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
608 609
	bool bringup = st->bringup;
	enum cpuhp_state state;
610

611 612 613
	if (WARN_ON_ONCE(!st->should_run))
		return;

614
	/*
615 616
	 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
	 * that if we see ->should_run we also see the rest of the state.
617 618 619
	 */
	smp_mb();

620
	cpuhp_lock_acquire(bringup);
621

622
	if (st->single) {
623 624 625 626 627 628 629 630
		state = st->cb_state;
		st->should_run = false;
	} else {
		if (bringup) {
			st->state++;
			state = st->state;
			st->should_run = (st->state < st->target);
			WARN_ON_ONCE(st->state > st->target);
631
		} else {
632 633 634 635
			state = st->state;
			st->state--;
			st->should_run = (st->state > st->target);
			WARN_ON_ONCE(st->state < st->target);
636
		}
637 638 639 640 641 642 643 644 645 646 647 648 649 650
	}

	WARN_ON_ONCE(!cpuhp_is_ap_state(state));

	if (st->rollback) {
		struct cpuhp_step *step = cpuhp_get_step(state);
		if (step->skip_onerr)
			goto next;
	}

	if (cpuhp_is_atomic_state(state)) {
		local_irq_disable();
		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
		local_irq_enable();
651

652 653 654 655
		/*
		 * STARTING/DYING must not fail!
		 */
		WARN_ON_ONCE(st->result);
656
	} else {
657 658 659 660 661 662 663 664 665 666 667
		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
	}

	if (st->result) {
		/*
		 * If we fail on a rollback, we're up a creek without no
		 * paddle, no way forward, no way back. We loose, thanks for
		 * playing.
		 */
		WARN_ON_ONCE(st->rollback);
		st->should_run = false;
668
	}
669 670

next:
671
	cpuhp_lock_release(bringup);
672 673

	if (!st->should_run)
674
		complete_ap_thread(st, bringup);
675 676 677
}

/* Invoke a single callback on a remote cpu */
678
static int
679 680
cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
			 struct hlist_node *node)
681 682
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
683
	int ret;
684 685 686 687

	if (!cpu_online(cpu))
		return 0;

688 689 690 691 692
	cpuhp_lock_acquire(false);
	cpuhp_lock_release(false);

	cpuhp_lock_acquire(true);
	cpuhp_lock_release(true);
693

694 695 696 697 698
	/*
	 * If we are up and running, use the hotplug thread. For early calls
	 * we invoke the thread function directly.
	 */
	if (!st->thread)
699
		return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
700

701 702 703 704 705
	st->rollback = false;
	st->last = NULL;

	st->node = node;
	st->bringup = bringup;
706
	st->cb_state = state;
707 708
	st->single = true;

709
	__cpuhp_kick_ap(st);
710 711

	/*
712
	 * If we failed and did a partial, do a rollback.
713
	 */
714 715 716 717 718 719 720
	if ((ret = st->result) && st->last) {
		st->rollback = true;
		st->bringup = !bringup;

		__cpuhp_kick_ap(st);
	}

721 722 723 724 725
	/*
	 * Clean up the leftovers so the next hotplug operation wont use stale
	 * data.
	 */
	st->node = st->last = NULL;
726
	return ret;
727 728 729 730 731
}

static int cpuhp_kick_ap_work(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
732 733
	enum cpuhp_state prev_state = st->state;
	int ret;
734

735 736 737 738 739
	cpuhp_lock_acquire(false);
	cpuhp_lock_release(false);

	cpuhp_lock_acquire(true);
	cpuhp_lock_release(true);
740 741 742 743 744 745

	trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
	ret = cpuhp_kick_ap(st, st->target);
	trace_cpuhp_exit(cpu, st->state, prev_state, ret);

	return ret;
746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
}

static struct smp_hotplug_thread cpuhp_threads = {
	.store			= &cpuhp_state.thread,
	.create			= &cpuhp_create,
	.thread_should_run	= cpuhp_should_run,
	.thread_fn		= cpuhp_thread_fun,
	.thread_comm		= "cpuhp/%u",
	.selfparking		= true,
};

void __init cpuhp_threads_init(void)
{
	BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
	kthread_unpark(this_cpu_read(cpuhp_state.thread));
}

763
#ifdef CONFIG_HOTPLUG_CPU
764 765 766 767 768 769 770 771 772 773 774 775
/**
 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 * @cpu: a CPU id
 *
 * This function walks all processes, finds a valid mm struct for each one and
 * then clears a corresponding bit in mm's cpumask.  While this all sounds
 * trivial, there are various non-obvious corner cases, which this function
 * tries to solve in a safe manner.
 *
 * Also note that the function uses a somewhat relaxed locking scheme, so it may
 * be called only for an already offlined CPU.
 */
776 777 778 779 780 781 782 783 784 785 786
void clear_tasks_mm_cpumask(int cpu)
{
	struct task_struct *p;

	/*
	 * This function is called after the cpu is taken down and marked
	 * offline, so its not like new tasks will ever get this cpu set in
	 * their mm mask. -- Peter Zijlstra
	 * Thus, we may use rcu_read_lock() here, instead of grabbing
	 * full-fledged tasklist_lock.
	 */
787
	WARN_ON(cpu_online(cpu));
788 789 790 791
	rcu_read_lock();
	for_each_process(p) {
		struct task_struct *t;

792 793 794 795
		/*
		 * Main thread might exit, but other threads may still have
		 * a valid mm. Find one.
		 */
796 797 798 799 800 801 802 803 804
		t = find_lock_task_mm(p);
		if (!t)
			continue;
		cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
		task_unlock(t);
	}
	rcu_read_unlock();
}

Linus Torvalds's avatar
Linus Torvalds committed
805
/* Take this CPU down. */
806
static int take_cpu_down(void *_param)
Linus Torvalds's avatar
Linus Torvalds committed
807
{
808 809
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
810
	int err, cpu = smp_processor_id();
811
	int ret;
Linus Torvalds's avatar
Linus Torvalds committed
812 813 814 815

	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
816
		return err;
Linus Torvalds's avatar
Linus Torvalds committed
817

818 819 820 821 822 823
	/*
	 * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
	 * do this step again.
	 */
	WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
	st->state--;
824
	/* Invoke the former CPU_DYING callbacks */
825 826 827 828 829 830 831
	for (; st->state > target; st->state--) {
		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
		/*
		 * DYING must not fail!
		 */
		WARN_ON_ONCE(ret);
	}
832

833 834
	/* Give up timekeeping duties */
	tick_handover_do_timer();
835
	/* Park the stopper thread */
836
	stop_machine_park(cpu);
837
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
838 839
}

840
static int takedown_cpu(unsigned int cpu)
Linus Torvalds's avatar
Linus Torvalds committed
841
{
842
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
843
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
844

845
	/* Park the smpboot threads */
846 847
	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);

848
	/*
849 850
	 * Prevent irq alloc/free while the dying cpu reorganizes the
	 * interrupt affinities.
851
	 */
852
	irq_lock_sparse();
853

854 855 856
	/*
	 * So now all preempt/rcu users must observe !cpu_active().
	 */
857
	err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
858
	if (err) {
859
		/* CPU refused to die */
860
		irq_unlock_sparse();
861 862
		/* Unpark the hotplug thread so we can rollback there */
		kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
863
		return err;
864
	}
865
	BUG_ON(cpu_online(cpu));
Linus Torvalds's avatar
Linus Torvalds committed
866

867
	/*
868 869
	 * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
	 * all runnable tasks from the CPU, there's only the idle task left now
870
	 * that the migration thread is done doing the stop_machine thing.
871 872
	 *
	 * Wait for the stop thread to go away.
873
	 */
874
	wait_for_ap_thread(st, false);
875
	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
Linus Torvalds's avatar
Linus Torvalds committed
876

877 878 879
	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
	irq_unlock_sparse();

880
	hotplug_cpu__broadcast_tick_pull(cpu);
Linus Torvalds's avatar
Linus Torvalds committed
881 882 883
	/* This actually kills the CPU. */
	__cpu_die(cpu);

884
	tick_cleanup_dead_cpu(cpu);
885
	rcutree_migrate_callbacks(cpu);
886 887
	return 0;
}
Linus Torvalds's avatar
Linus Torvalds committed
888

889 890 891 892
static void cpuhp_complete_idle_dead(void *arg)
{
	struct cpuhp_cpu_state *st = arg;

893
	complete_ap_thread(st, false);
894 895
}

896 897 898 899 900
void cpuhp_report_idle_dead(void)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	BUG_ON(st->state != CPUHP_AP_OFFLINE);
901
	rcu_report_dead(smp_processor_id());
902 903 904 905 906 907 908
	st->state = CPUHP_AP_IDLE_DEAD;
	/*
	 * We cannot call complete after rcu_report_dead() so we delegate it
	 * to an online cpu.
	 */
	smp_call_function_single(cpumask_first(cpu_online_mask),
				 cpuhp_complete_idle_dead, st, 0);
909 910
}

911 912 913 914
static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
{
	for (st->state++; st->state < st->target; st->state++) {
		struct cpuhp_step *step = cpuhp_get_step(st->state);
915

916 917 918 919 920 921 922 923 924 925 926 927 928 929 930
		if (!step->skip_onerr)
			cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
	}
}

static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
				enum cpuhp_state target)
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	for (; st->state > target; st->state--) {
		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
		if (ret) {
			st->target = prev_state;
931 932
			if (st->state < prev_state)
				undo_cpu_down(cpu, st);
933 934 935 936 937
			break;
		}
	}
	return ret;
}
938

939
/* Requires cpu_add_remove_lock to be held */
940 941
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
			   enum cpuhp_state target)
942
{
943 944
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	int prev_state, ret = 0;
945 946 947 948

	if (num_online_cpus() == 1)
		return -EBUSY;

949
	if (!cpu_present(cpu))
950 951
		return -EINVAL;

952
	cpus_write_lock();
953 954 955

	cpuhp_tasks_frozen = tasks_frozen;

956
	prev_state = cpuhp_set_state(st, target);
957 958 959 960
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread.
	 */
961
	if (st->state > CPUHP_TEARDOWN_CPU) {
962
		st->target = max((int)target, CPUHP_TEARDOWN_CPU);
963 964 965 966 967 968 969 970 971 972 973 974
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;

		/*
		 * We might have stopped still in the range of the AP hotplug
		 * thread. Nothing to do anymore.
		 */
975
		if (st->state > CPUHP_TEARDOWN_CPU)
976
			goto out;
977 978

		st->target = target;
979 980
	}
	/*
981
	 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
982 983
	 * to do the further cleanups.
	 */
984
	ret = cpuhp_down_callbacks(cpu, st, target);
985
	if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
986 987
		cpuhp_reset_state(st, prev_state);
		__cpuhp_kick_ap(st);
988
	}
989

990
out:
991
	cpus_write_unlock();
992 993 994 995 996
	/*
	 * Do post unplug cleanup. This is still protected against
	 * concurrent CPU hotplug via cpu_add_remove_lock.
	 */
	lockup_detector_cleanup();
997
	return ret;
998 999
}

1000 1001 1002 1003 1004 1005 1006
static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
{
	if (cpu_hotplug_disabled)
		return -EBUSY;
	return _cpu_down(cpu, 0, target);
}

1007
static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
1008
{
1009
	int err;
1010

1011
	cpu_maps_update_begin();
1012
	err = cpu_down_maps_locked(cpu, target);
1013
	cpu_maps_update_done();
Linus Torvalds's avatar
Linus Torvalds committed
1014 1015
	return err;
}
1016

1017 1018 1019 1020
int cpu_down(unsigned int cpu)
{
	return do_cpu_down(cpu, CPUHP_OFFLINE);
}
1021
EXPORT_SYMBOL(cpu_down);
1022 1023 1024

#else
#define takedown_cpu		NULL
Linus Torvalds's avatar
Linus Torvalds committed
1025 1026
#endif /*CONFIG_HOTPLUG_CPU*/

1027
/**
1028
 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1029 1030 1031 1032 1033 1034 1035 1036 1037
 * @cpu: cpu that just started
 *
 * It must be called by the arch code on the new cpu, before the new cpu
 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 */
void notify_cpu_starting(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1038
	int ret;
1039

1040
	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
1041
	st->booted_once = true;
1042 1043
	while (st->state < target) {
		st->state++;
1044 1045 1046 1047 1048
		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
		/*
		 * STARTING must not fail!
		 */
		WARN_ON_ONCE(ret);
1049 1050 1051
	}
}

1052
/*
1053 1054 1055
 * Called from the idle task. Wake up the controlling task which brings the
 * stopper and the hotplug thread of the upcoming CPU up and then delegates
 * the rest of the online bringup to the hotplug thread.
1056
 */
1057
void cpuhp_online_idle(enum cpuhp_state state)
1058
{
1059 1060 1061 1062 1063 1064 1065
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	/* Happens for the boot cpu */
	if (state != CPUHP_AP_ONLINE_IDLE)
		return;

	st->state = CPUHP_AP_ONLINE_IDLE;
1066
	complete_ap_thread(st, true);
1067 1068
}

1069
/* Requires cpu_add_remove_lock to be held */
1070
static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
Linus Torvalds's avatar
Linus Torvalds committed
1071
{
1072
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1073
	struct task_struct *idle;
1074
	int ret = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1075

1076
	cpus_write_lock();
1077

1078
	if (!cpu_present(cpu)) {
1079 1080 1081 1082
		ret = -EINVAL;
		goto out;
	}

1083 1084 1085 1086 1087
	/*
	 * The caller of do_cpu_up might have raced with another
	 * caller. Ignore it for now.
	 */
	if (st->state >= target)
1088
		goto out;
1089 1090 1091 1092 1093 1094 1095 1096

	if (st->state == CPUHP_OFFLINE) {
		/* Let it fail before we try to bring the cpu up */
		idle = idle_thread_get(cpu);
		if (IS_ERR(idle)) {
			ret = PTR_ERR(idle);
			goto out;
		}
1097
	}
1098

1099 1100
	cpuhp_tasks_frozen = tasks_frozen;

1101
	cpuhp_set_state(st, target);
1102 1103 1104 1105
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread once more.
	 */
1106
	if (st->state > CPUHP_BRINGUP_CPU) {
1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;
	}

	/*
	 * Try to reach the target state. We max out on the BP at
1118
	 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1119 1120
	 * responsible for bringing it up to the target state.
	 */
1121
	target = min((int)target, CPUHP_BRINGUP_CPU);
1122
	ret = cpuhp_up_callbacks(cpu, st, target);
1123
out:
1124
	cpus_write_unlock();
1125 1126 1127
	return ret;
}

1128
static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
1129 1130
{
	int err = 0;
1131

1132
	if (!cpu_possible(cpu)) {
1133 1134
		pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
		       cpu);
1135
#if defined(CONFIG_IA64)
1136
		pr_err("please check additional_cpus= boot parameter\n");
1137 1138 1139
#endif
		return -EINVAL;
	}
1140

1141 1142 1143
	err = try_online_node(cpu_to_node(cpu));
	if (err)
		return err;
1144

1145
	cpu_maps_update_begin();
1146 1147

	if (cpu_hotplug_disabled) {
1148
		err = -EBUSY;
1149 1150
		goto out;
	}
1151 1152 1153 1154
	if (!cpu_smt_allowed(cpu)) {
		err = -EPERM;
		goto out;
	}
1155

1156
	err = _cpu_up(cpu, 0, target);
1157
out:
1158
	cpu_maps_update_done();
1159 1160
	return err;
}
1161 1162 1163 1164 1165

int cpu_up(unsigned int cpu)
{
	return do_cpu_up(cpu, CPUHP_ONLINE);
}
Paul E. McKenney's avatar
Paul E. McKenney committed
1166
EXPORT_SYMBOL_GPL(cpu_up);
1167

1168
#ifdef CONFIG_PM_SLEEP_SMP
1169
static cpumask_var_t frozen_cpus;
1170

1171
int freeze_secondary_cpus(int primary)
1172
{
1173
	int cpu, error = 0;
1174

1175
	cpu_maps_update_begin();
1176 1177
	if (!cpu_online(primary))
		primary = cpumask_first(cpu_online_mask);
1178 1179
	/*
	 * We take down all of the non-boot CPUs in one shot to avoid races
1180 1181
	 * with the userspace trying to use the CPU hotplug at the same time
	 */
1182
	cpumask_clear(frozen_cpus);
1183

1184
	pr_info("Disabling non-boot CPUs ...\n");
1185
	for_each_online_cpu(cpu) {
1186
		if (cpu == primary)
1187
			continue;
1188
		trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1189
		error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1190
		trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1191
		if (!error)
1192
			cpumask_set_cpu(cpu, frozen_cpus);
1193
		else {
1194
			pr_err("Error taking CPU%d down: %d\n", cpu, error);
1195 1196 1197
			break;
		}
	}
1198

1199
	if (!error)
1200
		BUG_ON(num_online_cpus() > 1);
1201
	else
1202
		pr_err("Non-boot CPUs are not disabled\n");
1203 1204 1205 1206 1207 1208 1209 1210

	/*
	 * Make sure the CPUs won't be enabled by someone else. We need to do
	 * this even in case of failure as all disable_nonboot_cpus() users are
	 * supposed to do enable_nonboot_cpus() on the failure path.
	 */
	cpu_hotplug_disabled++;

1211
	cpu_maps_update_done();
1212 1213 1214
	return error;
}

1215 1216 1217 1218 1219 1220 1221 1222
void __weak arch_enable_nonboot_cpus_begin(void)
{
}

void __weak arch_enable_nonboot_cpus_end(void)
{
}

1223
void enable_nonboot_cpus(void)
1224 1225 1226 1227
{
	int cpu, error;

	/* Allow everyone to use the CPU hotplug again */
1228
	cpu_maps_update_begin();
1229
	__cpu_hotplug_enable();
1230
	if (cpumask_empty(frozen_cpus))
1231
		goto out;
1232

1233
	pr_info("Enabling non-boot CPUs ...\n");
1234 1235 1236

	arch_enable_nonboot_cpus_begin();

1237
	for_each_cpu(cpu, frozen_cpus) {
1238
		trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1239
		error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1240
		trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1241
		if (!error) {
1242
			pr_info("CPU%d is up\n", cpu);
1243 1244
			continue;
		}
1245
		pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1246
	}
1247 1248 1249

	arch_enable_nonboot_cpus_end();

1250
	cpumask_clear(frozen_cpus);
1251
out:
1252
	cpu_maps_update_done();
Linus Torvalds's avatar
Linus Torvalds committed
1253
}
1254

1255
static int __init alloc_frozen_cpus(void)
1256 1257 1258 1259 1260 1261
{
	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
		return -ENOMEM;
	return 0;
}
core_initcall(alloc_frozen_cpus);
1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281

/*
 * When callbacks for CPU hotplug notifications are being executed, we must
 * ensure that the state of the system with respect to the tasks being frozen
 * or not, as reported by the notification, remains unchanged *throughout the
 * duration* of the execution of the callbacks.
 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
 *
 * This synchronization is implemented by mutually excluding regular CPU
 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
 * Hibernate notifications.
 */
static int
cpu_hotplug_pm_callback(struct notifier_block *nb,
			unsigned long action, void *ptr)
{
	switch (action) {

	case PM_SUSPEND_PREPARE:
	case PM_HIBERNATION_PREPARE:
1282
		cpu_hotplug_disable();
1283 1284 1285 1286
		break;

	case PM_POST_SUSPEND:
	case PM_POST_HIBERNATION:
1287
		cpu_hotplug_enable();
1288 1289 1290 1291 1292 1293 1294 1295 1296 1297
		break;

	default:
		return NOTIFY_DONE;
	}

	return NOTIFY_OK;
}


1298
static int __init cpu_hotplug_pm_sync_init(void)
1299
{
1300 1301 1302 1303 1304
	/*
	 * cpu_hotplug_pm_callback has higher priority than x86
	 * bsp_pm_callback which depends on cpu_hotplug_pm_callback
	 * to disable cpu hotplug to avoid cpu hotplug race.
	 */
1305 1306 1307 1308 1309
	pm_notifier(cpu_hotplug_pm_callback, 0);
	return 0;
}
core_initcall(cpu_hotplug_pm_sync_init);

1310
#endif /* CONFIG_PM_SLEEP_SMP */
1311

1312 1313
int __boot_cpu_id;

1314
#endif /* CONFIG_SMP */
1315

1316
/* Boot processor state steps */
1317
static struct cpuhp_step cpuhp_hp_states[] = {
1318 1319
	[CPUHP_OFFLINE] = {
		.name			= "offline",
1320 1321
		.startup.single		= NULL,
		.teardown.single	= NULL,
1322 1323 1324
	},
#ifdef CONFIG_SMP
	[CPUHP_CREATE_THREADS]= {