bugs.c 25.3 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12
/*
 *  Copyright (C) 1994  Linus Torvalds
 *
 *  Cyrix stuff, June 1998 by:
 *	- Rafael R. Reilova (moved everything from head.S),
 *        <rreilova@ececs.uc.edu>
 *	- Channing Corn (tests & fixes),
 *	- Andrew D. Balsa (code cleanup).
 */
#include <linux/init.h>
#include <linux/utsname.h>
13
#include <linux/cpu.h>
14
#include <linux/module.h>
15 16
#include <linux/nospec.h>
#include <linux/prctl.h>
17
#include <linux/sched/smt.h>
18

19
#include <asm/spec-ctrl.h>
20
#include <asm/cmdline.h>
21
#include <asm/bugs.h>
22
#include <asm/processor.h>
Dave Jones's avatar
Dave Jones committed
23
#include <asm/processor-flags.h>
24
#include <asm/fpu/internal.h>
25
#include <asm/msr.h>
26
#include <asm/vmx.h>
27 28
#include <asm/paravirt.h>
#include <asm/alternative.h>
29
#include <asm/pgtable.h>
Laura Abbott's avatar
Laura Abbott committed
30
#include <asm/set_memory.h>
31
#include <asm/intel-family.h>
32
#include <asm/e820/api.h>
33

34
static void __init spectre_v2_select_mitigation(void);
35
static void __init ssb_select_mitigation(void);
36
static void __init l1tf_select_mitigation(void);
37

38 39
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
u64 x86_spec_ctrl_base;
40
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
41
static DEFINE_MUTEX(spec_ctrl_mutex);
42

43 44 45 46
/*
 * The vendor and possibly platform specific bits which can be modified in
 * x86_spec_ctrl_base.
 */
47
static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
48

49 50
/*
 * AMD specific MSR info for Speculative Store Bypass control.
51
 * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
52 53
 */
u64 __ro_after_init x86_amd_ls_cfg_base;
54
u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
55

56 57 58
void __init check_bugs(void)
{
	identify_boot_cpu();
59

60 61 62 63
	/*
	 * identify_boot_cpu() initialized SMT support information, let the
	 * core code know.
	 */
64
	cpu_smt_check_topology_early();
65

66 67 68 69 70
	if (!IS_ENABLED(CONFIG_SMP)) {
		pr_info("CPU: ");
		print_cpu_info(&boot_cpu_data);
	}

71 72
	/*
	 * Read the SPEC_CTRL MSR to account for reserved bits which may
73 74
	 * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
	 * init code as it is not enumerated and depends on the family.
75
	 */
76
	if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
77 78
		rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);

79 80 81 82
	/* Allow STIBP in MSR_SPEC_CTRL if supported */
	if (boot_cpu_has(X86_FEATURE_STIBP))
		x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;

83 84 85
	/* Select the proper spectre mitigation before patching alternatives */
	spectre_v2_select_mitigation();

86 87 88 89 90 91
	/*
	 * Select proper mitigation for any exposure to the Speculative Store
	 * Bypass vulnerability.
	 */
	ssb_select_mitigation();

92 93
	l1tf_select_mitigation();

94
#ifdef CONFIG_X86_32
95 96 97 98 99 100 101 102 103 104
	/*
	 * Check whether we are able to run this kernel safely on SMP.
	 *
	 * - i386 is no longer supported.
	 * - In order to run on anything without a TSC, we need to be
	 *   compiled for a i486.
	 */
	if (boot_cpu_data.x86 < 4)
		panic("Kernel requires i486+ for 'invlpg' and other features");

Miklos Vajna's avatar
Miklos Vajna committed
105 106
	init_utsname()->machine[1] =
		'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
107
	alternative_instructions();
108

109
	fpu__init_check_bugs();
110 111 112 113 114 115 116 117 118 119 120 121 122 123
#else /* CONFIG_X86_64 */
	alternative_instructions();

	/*
	 * Make sure the first 2MB area is not mapped by huge pages
	 * There are typically fixed size MTRRs in there and overlapping
	 * MTRRs into large pages causes slow downs.
	 *
	 * Right now we don't do that with gbpages because there seems
	 * very little benefit for that case.
	 */
	if (!direct_gbpages)
		set_memory_4k((unsigned long)__va(0), 1);
#endif
124
}
125

126 127
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
128
{
129
	u64 msrval, guestval, hostval = x86_spec_ctrl_base;
130
	struct thread_info *ti = current_thread_info();
131

132
	/* Is MSR_SPEC_CTRL implemented ? */
133
	if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
134 135 136 137 138 139 140 141
		/*
		 * Restrict guest_spec_ctrl to supported values. Clear the
		 * modifiable bits in the host base value and or the
		 * modifiable bits from the guest value.
		 */
		guestval = hostval & ~x86_spec_ctrl_mask;
		guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;

142
		/* SSBD controlled in MSR_SPEC_CTRL */
143 144
		if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
		    static_cpu_has(X86_FEATURE_AMD_SSBD))
145
			hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
146

147 148 149
		if (hostval != guestval) {
			msrval = setguest ? guestval : hostval;
			wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
150 151
		}
	}
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179

	/*
	 * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
	 * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
	 */
	if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
	    !static_cpu_has(X86_FEATURE_VIRT_SSBD))
		return;

	/*
	 * If the host has SSBD mitigation enabled, force it in the host's
	 * virtual MSR value. If its not permanently enabled, evaluate
	 * current's TIF_SSBD thread flag.
	 */
	if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE))
		hostval = SPEC_CTRL_SSBD;
	else
		hostval = ssbd_tif_to_spec_ctrl(ti->flags);

	/* Sanitize the guest value */
	guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD;

	if (hostval != guestval) {
		unsigned long tif;

		tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
				 ssbd_spec_ctrl_to_tif(hostval);

180
		speculation_ctrl_update(tif);
181
	}
182
}
183
EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
184

185
static void x86_amd_ssb_disable(void)
186
{
187
	u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
188

189 190 191
	if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
		wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD);
	else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
192 193 194
		wrmsrl(MSR_AMD64_LS_CFG, msrval);
}

195 196 197 198 199 200
#undef pr_fmt
#define pr_fmt(fmt)     "Spectre V2 : " fmt

static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
	SPECTRE_V2_NONE;

201
#ifdef RETPOLINE
202 203
static bool spectre_v2_bad_module;

204 205 206 207 208
bool retpoline_module_ok(bool has_retpoline)
{
	if (spectre_v2_enabled == SPECTRE_V2_NONE || has_retpoline)
		return true;

209
	pr_err("System may be vulnerable to spectre v2\n");
210 211 212
	spectre_v2_bad_module = true;
	return false;
}
213 214 215 216 217 218 219

static inline const char *spectre_v2_module_string(void)
{
	return spectre_v2_bad_module ? " - vulnerable module loaded" : "";
}
#else
static inline const char *spectre_v2_module_string(void) { return ""; }
220
#endif
221 222 223 224 225 226 227 228

static inline bool match_option(const char *arg, int arglen, const char *opt)
{
	int len = strlen(opt);

	return len == arglen && !strncmp(arg, opt, len);
}

229 230 231 232 233 234 235 236 237 238
/* The kernel command line selection for spectre v2 */
enum spectre_v2_mitigation_cmd {
	SPECTRE_V2_CMD_NONE,
	SPECTRE_V2_CMD_AUTO,
	SPECTRE_V2_CMD_FORCE,
	SPECTRE_V2_CMD_RETPOLINE,
	SPECTRE_V2_CMD_RETPOLINE_GENERIC,
	SPECTRE_V2_CMD_RETPOLINE_AMD,
};

239
static const char * const spectre_v2_strings[] = {
240 241 242 243 244 245
	[SPECTRE_V2_NONE]			= "Vulnerable",
	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
	[SPECTRE_V2_IBRS_ENHANCED]		= "Mitigation: Enhanced IBRS",
};

246 247 248 249
static const struct {
	const char *option;
	enum spectre_v2_mitigation_cmd cmd;
	bool secure;
250
} mitigation_options[] __initdata = {
251 252 253 254 255 256
	{ "off",		SPECTRE_V2_CMD_NONE,		  false },
	{ "on",			SPECTRE_V2_CMD_FORCE,		  true  },
	{ "retpoline",		SPECTRE_V2_CMD_RETPOLINE,	  false },
	{ "retpoline,amd",	SPECTRE_V2_CMD_RETPOLINE_AMD,	  false },
	{ "retpoline,generic",	SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
	{ "auto",		SPECTRE_V2_CMD_AUTO,		  false },
257 258
};

259
static void __init spec_v2_print_cond(const char *reason, bool secure)
260
{
261
	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
262 263 264
		pr_info("%s selected on command line.\n", reason);
}

265 266
static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
{
267
	enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
268
	char arg[20];
269 270 271 272 273
	int ret, i;

	if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
		return SPECTRE_V2_CMD_NONE;

274 275 276 277 278 279 280 281 282 283 284 285 286 287
	ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
	if (ret < 0)
		return SPECTRE_V2_CMD_AUTO;

	for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
		if (!match_option(arg, ret, mitigation_options[i].option))
			continue;
		cmd = mitigation_options[i].cmd;
		break;
	}

	if (i >= ARRAY_SIZE(mitigation_options)) {
		pr_err("unknown option (%s). Switching to AUTO select\n", arg);
		return SPECTRE_V2_CMD_AUTO;
288 289
	}

290 291 292 293
	if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
	     cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
	     cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
	    !IS_ENABLED(CONFIG_RETPOLINE)) {
294
		pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
295
		return SPECTRE_V2_CMD_AUTO;
296 297 298 299 300 301 302 303
	}

	if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
	    boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
		pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
		return SPECTRE_V2_CMD_AUTO;
	}

304 305
	spec_v2_print_cond(mitigation_options[i].option,
			   mitigation_options[i].secure);
306
	return cmd;
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
}

static void __init spectre_v2_select_mitigation(void)
{
	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
	enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;

	/*
	 * If the CPU is not affected and the command line mode is NONE or AUTO
	 * then nothing to do.
	 */
	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
	    (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
		return;

	switch (cmd) {
	case SPECTRE_V2_CMD_NONE:
		return;

	case SPECTRE_V2_CMD_FORCE:
	case SPECTRE_V2_CMD_AUTO:
328 329 330 331 332 333 334
		if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
			mode = SPECTRE_V2_IBRS_ENHANCED;
			/* Force it so VMEXIT will restore correctly */
			x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
			wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
			goto specv2_set_mode;
		}
335 336 337
		if (IS_ENABLED(CONFIG_RETPOLINE))
			goto retpoline_auto;
		break;
338 339 340 341 342 343 344 345 346 347 348 349 350
	case SPECTRE_V2_CMD_RETPOLINE_AMD:
		if (IS_ENABLED(CONFIG_RETPOLINE))
			goto retpoline_amd;
		break;
	case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
		if (IS_ENABLED(CONFIG_RETPOLINE))
			goto retpoline_generic;
		break;
	case SPECTRE_V2_CMD_RETPOLINE:
		if (IS_ENABLED(CONFIG_RETPOLINE))
			goto retpoline_auto;
		break;
	}
351
	pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
352 353 354 355 356 357
	return;

retpoline_auto:
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
	retpoline_amd:
		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
358
			pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
359 360
			goto retpoline_generic;
		}
361
		mode = SPECTRE_V2_RETPOLINE_AMD;
362 363 364 365
		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
	} else {
	retpoline_generic:
366
		mode = SPECTRE_V2_RETPOLINE_GENERIC;
367 368 369
		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
	}

370
specv2_set_mode:
371 372
	spectre_v2_enabled = mode;
	pr_info("%s\n", spectre_v2_strings[mode]);
373 374

	/*
375 376 377
	 * If spectre v2 protection has been enabled, unconditionally fill
	 * RSB during a context switch; this protects against two independent
	 * issues:
378
	 *
379 380
	 *	- RSB underflow (and switch to BTB) on Skylake+
	 *	- SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
381
	 */
382 383
	setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
	pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
384 385

	/* Initialize Indirect Branch Prediction Barrier if supported */
386 387
	if (boot_cpu_has(X86_FEATURE_IBPB)) {
		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
388
		pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
389
	}
390 391 392

	/*
	 * Retpoline means the kernel is safe because it has no indirect
393 394 395 396 397 398 399 400
	 * branches. Enhanced IBRS protects firmware too, so, enable restricted
	 * speculation around firmware calls only when Enhanced IBRS isn't
	 * supported.
	 *
	 * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
	 * the user might select retpoline on the kernel command line and if
	 * the CPU supports Enhanced IBRS, kernel might un-intentionally not
	 * enable IBRS around firmware calls.
401
	 */
402
	if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) {
403 404 405
		setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
		pr_info("Enabling Restricted Speculation for firmware calls\n");
	}
406 407 408

	/* Enable STIBP if appropriate */
	arch_smt_update();
409 410
}

411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
static bool stibp_needed(void)
{
	if (spectre_v2_enabled == SPECTRE_V2_NONE)
		return false;

	/* Enhanced IBRS makes using STIBP unnecessary. */
	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
		return false;

	if (!boot_cpu_has(X86_FEATURE_STIBP))
		return false;

	return true;
}

static void update_stibp_msr(void *info)
{
	wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
}

void arch_smt_update(void)
{
	u64 mask;

	if (!stibp_needed())
		return;

	mutex_lock(&spec_ctrl_mutex);

	mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
	if (sched_smt_active())
		mask |= SPEC_CTRL_STIBP;

	if (mask != x86_spec_ctrl_base) {
		pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n",
			mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling");
		x86_spec_ctrl_base = mask;
		on_each_cpu(update_stibp_msr, NULL, 1);
	}
	mutex_unlock(&spec_ctrl_mutex);
}

453 454 455
#undef pr_fmt
#define pr_fmt(fmt)	"Speculative Store Bypass: " fmt

456
static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE;
457 458 459 460 461 462

/* The kernel command line selection */
enum ssb_mitigation_cmd {
	SPEC_STORE_BYPASS_CMD_NONE,
	SPEC_STORE_BYPASS_CMD_AUTO,
	SPEC_STORE_BYPASS_CMD_ON,
463
	SPEC_STORE_BYPASS_CMD_PRCTL,
464
	SPEC_STORE_BYPASS_CMD_SECCOMP,
465 466
};

467
static const char * const ssb_strings[] = {
468
	[SPEC_STORE_BYPASS_NONE]	= "Vulnerable",
469
	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled",
470 471
	[SPEC_STORE_BYPASS_PRCTL]	= "Mitigation: Speculative Store Bypass disabled via prctl",
	[SPEC_STORE_BYPASS_SECCOMP]	= "Mitigation: Speculative Store Bypass disabled via prctl and seccomp",
472 473 474 475 476
};

static const struct {
	const char *option;
	enum ssb_mitigation_cmd cmd;
477
} ssb_mitigation_options[]  __initdata = {
478 479 480 481 482
	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO },    /* Platform decides */
	{ "on",		SPEC_STORE_BYPASS_CMD_ON },      /* Disable Speculative Store Bypass */
	{ "off",	SPEC_STORE_BYPASS_CMD_NONE },    /* Don't touch Speculative Store Bypass */
	{ "prctl",	SPEC_STORE_BYPASS_CMD_PRCTL },   /* Disable Speculative Store Bypass via prctl */
	{ "seccomp",	SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */
483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
};

static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
{
	enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO;
	char arg[20];
	int ret, i;

	if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
		return SPEC_STORE_BYPASS_CMD_NONE;
	} else {
		ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
					  arg, sizeof(arg));
		if (ret < 0)
			return SPEC_STORE_BYPASS_CMD_AUTO;

		for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
			if (!match_option(arg, ret, ssb_mitigation_options[i].option))
				continue;

			cmd = ssb_mitigation_options[i].cmd;
			break;
		}

		if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
			pr_err("unknown option (%s). Switching to AUTO select\n", arg);
			return SPEC_STORE_BYPASS_CMD_AUTO;
		}
	}

	return cmd;
}

516
static enum ssb_mitigation __init __ssb_select_mitigation(void)
517 518 519 520
{
	enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
	enum ssb_mitigation_cmd cmd;

521
	if (!boot_cpu_has(X86_FEATURE_SSBD))
522 523 524 525 526 527 528 529 530 531
		return mode;

	cmd = ssb_parse_cmdline();
	if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
	    (cmd == SPEC_STORE_BYPASS_CMD_NONE ||
	     cmd == SPEC_STORE_BYPASS_CMD_AUTO))
		return mode;

	switch (cmd) {
	case SPEC_STORE_BYPASS_CMD_AUTO:
532 533 534 535 536 537 538 539 540
	case SPEC_STORE_BYPASS_CMD_SECCOMP:
		/*
		 * Choose prctl+seccomp as the default mode if seccomp is
		 * enabled.
		 */
		if (IS_ENABLED(CONFIG_SECCOMP))
			mode = SPEC_STORE_BYPASS_SECCOMP;
		else
			mode = SPEC_STORE_BYPASS_PRCTL;
541
		break;
542 543 544
	case SPEC_STORE_BYPASS_CMD_ON:
		mode = SPEC_STORE_BYPASS_DISABLE;
		break;
545 546 547
	case SPEC_STORE_BYPASS_CMD_PRCTL:
		mode = SPEC_STORE_BYPASS_PRCTL;
		break;
548 549 550 551
	case SPEC_STORE_BYPASS_CMD_NONE:
		break;
	}

552 553 554
	/*
	 * We have three CPU feature flags that are in play here:
	 *  - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
555
	 *  - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
556 557
	 *  - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
	 */
558
	if (mode == SPEC_STORE_BYPASS_DISABLE) {
559
		setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
560
		/*
561 562
		 * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
		 * use a completely different MSR and bit dependent on family.
563
		 */
564 565
		if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
		    !static_cpu_has(X86_FEATURE_AMD_SSBD)) {
566
			x86_amd_ssb_disable();
567
		} else {
568
			x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
569
			x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
570
			wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
571 572 573
		}
	}

574 575 576
	return mode;
}

577
static void ssb_select_mitigation(void)
578 579 580 581 582 583 584
{
	ssb_mode = __ssb_select_mitigation();

	if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
		pr_info("%s\n", ssb_strings[ssb_mode]);
}

585
#undef pr_fmt
586
#define pr_fmt(fmt)     "Speculation prctl: " fmt
587

588
static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
589
{
590
	bool update;
591

592 593
	if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
	    ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
594 595
		return -ENXIO;

596 597 598 599 600 601
	switch (ctrl) {
	case PR_SPEC_ENABLE:
		/* If speculation is force disabled, enable is not allowed */
		if (task_spec_ssb_force_disable(task))
			return -EPERM;
		task_clear_spec_ssb_disable(task);
602
		update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
603 604 605
		break;
	case PR_SPEC_DISABLE:
		task_set_spec_ssb_disable(task);
606
		update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
607 608 609 610
		break;
	case PR_SPEC_FORCE_DISABLE:
		task_set_spec_ssb_disable(task);
		task_set_spec_ssb_force_disable(task);
611
		update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
612 613 614 615
		break;
	default:
		return -ERANGE;
	}
616

617 618 619 620
	/*
	 * If being set on non-current task, delay setting the CPU
	 * mitigation until it is next scheduled.
	 */
621
	if (task == current && update)
622
		speculation_ctrl_update_current();
623 624 625 626

	return 0;
}

627 628 629 630 631 632 633 634 635 636 637 638 639 640
int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
			     unsigned long ctrl)
{
	switch (which) {
	case PR_SPEC_STORE_BYPASS:
		return ssb_prctl_set(task, ctrl);
	default:
		return -ENODEV;
	}
}

#ifdef CONFIG_SECCOMP
void arch_seccomp_spec_mitigate(struct task_struct *task)
{
641 642
	if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
		ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
643 644 645
}
#endif

646
static int ssb_prctl_get(struct task_struct *task)
647 648 649 650
{
	switch (ssb_mode) {
	case SPEC_STORE_BYPASS_DISABLE:
		return PR_SPEC_DISABLE;
651
	case SPEC_STORE_BYPASS_SECCOMP:
652
	case SPEC_STORE_BYPASS_PRCTL:
653 654 655
		if (task_spec_ssb_force_disable(task))
			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
		if (task_spec_ssb_disable(task))
656 657 658 659 660 661 662 663 664
			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
	default:
		if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
			return PR_SPEC_ENABLE;
		return PR_SPEC_NOT_AFFECTED;
	}
}

665
int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
666 667 668
{
	switch (which) {
	case PR_SPEC_STORE_BYPASS:
669
		return ssb_prctl_get(task);
670 671 672 673 674
	default:
		return -ENODEV;
	}
}

675 676
void x86_spec_ctrl_setup_ap(void)
{
677
	if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
678
		wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
679 680

	if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
681
		x86_amd_ssb_disable();
682 683
}

684 685
#undef pr_fmt
#define pr_fmt(fmt)	"L1TF: " fmt
686

687 688
/* Default mitigation for L1TF-affected CPUs */
enum l1tf_mitigations l1tf_mitigation __ro_after_init = L1TF_MITIGATION_FLUSH;
689
#if IS_ENABLED(CONFIG_KVM_INTEL)
690
EXPORT_SYMBOL_GPL(l1tf_mitigation);
691
#endif
692
enum vmx_l1d_flush_state l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
693 694
EXPORT_SYMBOL_GPL(l1tf_vmx_mitigation);

695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
/*
 * These CPUs all support 44bits physical address space internally in the
 * cache but CPUID can report a smaller number of physical address bits.
 *
 * The L1TF mitigation uses the top most address bit for the inversion of
 * non present PTEs. When the installed memory reaches into the top most
 * address bit due to memory holes, which has been observed on machines
 * which report 36bits physical address bits and have 32G RAM installed,
 * then the mitigation range check in l1tf_select_mitigation() triggers.
 * This is a false positive because the mitigation is still possible due to
 * the fact that the cache uses 44bit internally. Use the cache bits
 * instead of the reported physical bits and adjust them on the affected
 * machines to 44bit if the reported bits are less than 44.
 */
static void override_cache_bits(struct cpuinfo_x86 *c)
{
	if (c->x86 != 6)
		return;

	switch (c->x86_model) {
	case INTEL_FAM6_NEHALEM:
	case INTEL_FAM6_WESTMERE:
	case INTEL_FAM6_SANDYBRIDGE:
	case INTEL_FAM6_IVYBRIDGE:
	case INTEL_FAM6_HASWELL_CORE:
	case INTEL_FAM6_HASWELL_ULT:
	case INTEL_FAM6_HASWELL_GT3E:
	case INTEL_FAM6_BROADWELL_CORE:
	case INTEL_FAM6_BROADWELL_GT3E:
	case INTEL_FAM6_SKYLAKE_MOBILE:
	case INTEL_FAM6_SKYLAKE_DESKTOP:
	case INTEL_FAM6_KABYLAKE_MOBILE:
	case INTEL_FAM6_KABYLAKE_DESKTOP:
		if (c->x86_cache_bits < 44)
			c->x86_cache_bits = 44;
		break;
	}
}

734 735 736 737 738 739 740
static void __init l1tf_select_mitigation(void)
{
	u64 half_pa;

	if (!boot_cpu_has_bug(X86_BUG_L1TF))
		return;

741 742
	override_cache_bits(&boot_cpu_data);

743 744 745 746 747 748 749 750 751 752 753 754 755 756
	switch (l1tf_mitigation) {
	case L1TF_MITIGATION_OFF:
	case L1TF_MITIGATION_FLUSH_NOWARN:
	case L1TF_MITIGATION_FLUSH:
		break;
	case L1TF_MITIGATION_FLUSH_NOSMT:
	case L1TF_MITIGATION_FULL:
		cpu_smt_disable(false);
		break;
	case L1TF_MITIGATION_FULL_FORCE:
		cpu_smt_disable(true);
		break;
	}

757 758 759 760 761 762 763 764
#if CONFIG_PGTABLE_LEVELS == 2
	pr_warn("Kernel not compiled for PAE. No mitigation for L1TF\n");
	return;
#endif

	half_pa = (u64)l1tf_pfn_limit() << PAGE_SHIFT;
	if (e820__mapped_any(half_pa, ULLONG_MAX - half_pa, E820_TYPE_RAM)) {
		pr_warn("System has more than MAX_PA/2 memory. L1TF mitigation not effective.\n");
765 766 767 768
		pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n",
				half_pa);
		pr_info("However, doing so will make a part of your RAM unusable.\n");
		pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n");
769 770 771 772 773
		return;
	}

	setup_force_cpu_cap(X86_FEATURE_L1TF_PTEINV);
}
774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799

static int __init l1tf_cmdline(char *str)
{
	if (!boot_cpu_has_bug(X86_BUG_L1TF))
		return 0;

	if (!str)
		return -EINVAL;

	if (!strcmp(str, "off"))
		l1tf_mitigation = L1TF_MITIGATION_OFF;
	else if (!strcmp(str, "flush,nowarn"))
		l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOWARN;
	else if (!strcmp(str, "flush"))
		l1tf_mitigation = L1TF_MITIGATION_FLUSH;
	else if (!strcmp(str, "flush,nosmt"))
		l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
	else if (!strcmp(str, "full"))
		l1tf_mitigation = L1TF_MITIGATION_FULL;
	else if (!strcmp(str, "full,force"))
		l1tf_mitigation = L1TF_MITIGATION_FULL_FORCE;

	return 0;
}
early_param("l1tf", l1tf_cmdline);

800 801
#undef pr_fmt

802
#ifdef CONFIG_SYSFS
803

804 805 806
#define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"

#if IS_ENABLED(CONFIG_KVM_INTEL)
807
static const char * const l1tf_vmx_states[] = {
808 809 810 811 812
	[VMENTER_L1D_FLUSH_AUTO]		= "auto",
	[VMENTER_L1D_FLUSH_NEVER]		= "vulnerable",
	[VMENTER_L1D_FLUSH_COND]		= "conditional cache flushes",
	[VMENTER_L1D_FLUSH_ALWAYS]		= "cache flushes",
	[VMENTER_L1D_FLUSH_EPT_DISABLED]	= "EPT disabled",
813
	[VMENTER_L1D_FLUSH_NOT_REQUIRED]	= "flush not necessary"
814 815 816 817 818 819 820
};

static ssize_t l1tf_show_state(char *buf)
{
	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO)
		return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);

821 822
	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
	    (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
823
	     sched_smt_active())) {
824 825
		return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
			       l1tf_vmx_states[l1tf_vmx_mitigation]);
826
	}
827 828 829

	return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
		       l1tf_vmx_states[l1tf_vmx_mitigation],
830
		       sched_smt_active() ? "vulnerable" : "disabled");
831 832 833 834 835 836 837 838
}
#else
static ssize_t l1tf_show_state(char *buf)
{
	return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
}
#endif

839 840
static char *stibp_state(void)
{
841 842 843
	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
		return "";

844 845 846 847 848 849 850 851 852 853 854 855 856 857
	if (x86_spec_ctrl_base & SPEC_CTRL_STIBP)
		return ", STIBP";
	else
		return "";
}

static char *ibpb_state(void)
{
	if (boot_cpu_has(X86_FEATURE_USE_IBPB))
		return ", IBPB";
	else
		return "";
}

858
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
859
			       char *buf, unsigned int bug)
860
{
861
	if (!boot_cpu_has_bug(bug))
862
		return sprintf(buf, "Not affected\n");
863 864 865 866 867 868 869 870 871 872 873 874

	switch (bug) {
	case X86_BUG_CPU_MELTDOWN:
		if (boot_cpu_has(X86_FEATURE_PTI))
			return sprintf(buf, "Mitigation: PTI\n");

		break;

	case X86_BUG_SPECTRE_V1:
		return sprintf(buf, "Mitigation: __user pointer sanitization\n");

	case X86_BUG_SPECTRE_V2:
875
		return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
876
			       ibpb_state(),
877
			       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
878
			       stibp_state(),
879
			       boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
880 881
			       spectre_v2_module_string());

882 883 884
	case X86_BUG_SPEC_STORE_BYPASS:
		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);

885 886
	case X86_BUG_L1TF:
		if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV))
887
			return l1tf_show_state(buf);
888
		break;
889 890 891 892
	default:
		break;
	}

893 894 895
	return sprintf(buf, "Vulnerable\n");
}

896 897 898 899 900
ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
{
	return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN);
}

901
ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
902
{
903
	return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1);
904 905
}

906
ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
907
{
908
	return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2);
909
}
910 911 912 913 914

ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
{
	return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
}
915 916 917 918 919

ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
{
	return cpu_show_common(dev, attr, buf, X86_BUG_L1TF);
}
920
#endif