vmx.c 313 KB
Newer Older
Avi Kivity's avatar
Avi Kivity committed
1 2 3 4 5 6 7
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * This module enables machines with Intel VT-x extensions to run virtual
 * machines without emulation or binary translation.
 *
 * Copyright (C) 2006 Qumranet, Inc.
8
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
Avi Kivity's avatar
Avi Kivity committed
9 10 11 12 13 14 15 16 17 18
 *
 * Authors:
 *   Avi Kivity   <avi@qumranet.com>
 *   Yaniv Kamay  <yaniv@qumranet.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

19
#include "irq.h"
20
#include "mmu.h"
Avi Kivity's avatar
Avi Kivity committed
21
#include "cpuid.h"
Avi Kivity's avatar
Avi Kivity committed
22

23
#include <linux/kvm_host.h>
Avi Kivity's avatar
Avi Kivity committed
24
#include <linux/module.h>
25
#include <linux/kernel.h>
Avi Kivity's avatar
Avi Kivity committed
26 27
#include <linux/mm.h>
#include <linux/highmem.h>
Alexey Dobriyan's avatar
Alexey Dobriyan committed
28
#include <linux/sched.h>
29
#include <linux/moduleparam.h>
30
#include <linux/mod_devicetable.h>
31
#include <linux/trace_events.h>
32
#include <linux/slab.h>
33
#include <linux/tboot.h>
34
#include <linux/hrtimer.h>
35
#include <linux/nospec.h>
36
#include "kvm_cache_regs.h"
37
#include "x86.h"
Avi Kivity's avatar
Avi Kivity committed
38

39
#include <asm/cpu.h>
Avi Kivity's avatar
Avi Kivity committed
40
#include <asm/io.h>
41
#include <asm/desc.h>
42
#include <asm/vmx.h>
43
#include <asm/virtext.h>
44
#include <asm/mce.h>
45
#include <asm/fpu/internal.h>
46
#include <asm/perf_event.h>
47
#include <asm/debugreg.h>
48
#include <asm/kexec.h>
49
#include <asm/apic.h>
50
#include <asm/irq_remapping.h>
51
#include <asm/microcode.h>
52
#include <asm/spec-ctrl.h>
Avi Kivity's avatar
Avi Kivity committed
53

54
#include "trace.h"
55
#include "pmu.h"
56

57
#define __ex(x) __kvm_handle_fault_on_reboot(x)
58 59
#define __ex_clear(x, reg) \
	____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg)
60

Avi Kivity's avatar
Avi Kivity committed
61 62 63
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");

64 65 66 67 68 69
static const struct x86_cpu_id vmx_cpu_id[] = {
	X86_FEATURE_MATCH(X86_FEATURE_VMX),
	{}
};
MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);

70
static bool __read_mostly enable_vpid = 1;
71
module_param_named(vpid, enable_vpid, bool, 0444);
72

73
static bool __read_mostly flexpriority_enabled = 1;
74
module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
75

76
static bool __read_mostly enable_ept = 1;
77
module_param_named(ept, enable_ept, bool, S_IRUGO);
Sheng Yang's avatar
Sheng Yang committed
78

79
static bool __read_mostly enable_unrestricted_guest = 1;
80 81 82
module_param_named(unrestricted_guest,
			enable_unrestricted_guest, bool, S_IRUGO);

83 84 85
static bool __read_mostly enable_ept_ad_bits = 1;
module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);

86
static bool __read_mostly emulate_invalid_guest_state = true;
87
module_param(emulate_invalid_guest_state, bool, S_IRUGO);
88

89
static bool __read_mostly vmm_exclusive = 1;
90 91
module_param(vmm_exclusive, bool, S_IRUGO);

92
static bool __read_mostly fasteoi = 1;
93 94
module_param(fasteoi, bool, S_IRUGO);

95
static bool __read_mostly enable_apicv = 1;
96
module_param(enable_apicv, bool, S_IRUGO);
97

98 99
static bool __read_mostly enable_shadow_vmcs = 1;
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
100 101 102 103 104
/*
 * If nested=1, nested virtualization is supported, i.e., guests may use
 * VMX and be a hypervisor for its own guests. If nested=0, guests may not
 * use VMX instructions.
 */
105
static bool __read_mostly nested = 0;
106 107
module_param(nested, bool, S_IRUGO);

108 109
static u64 __read_mostly host_xss;

Kai Huang's avatar
Kai Huang committed
110 111 112
static bool __read_mostly enable_pml = 1;
module_param_named(pml, enable_pml, bool, S_IRUGO);

113 114 115 116 117 118 119 120
#define MSR_TYPE_R	1
#define MSR_TYPE_W	2
#define MSR_TYPE_RW	3

#define MSR_BITMAP_MODE_X2APIC		1
#define MSR_BITMAP_MODE_X2APIC_APICV	2
#define MSR_BITMAP_MODE_LM		4

121 122
#define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL

123 124
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
125 126
#define KVM_VM_CR0_ALWAYS_ON						\
	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
127 128
#define KVM_CR4_GUEST_OWNED_BITS				      \
	(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
129
	 | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
130

131 132 133
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)

134 135
#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))

136 137
#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5

138 139 140 141 142 143
#define VMX_VPID_EXTENT_SUPPORTED_MASK		\
	(VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT |	\
	VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT |	\
	VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT |	\
	VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)

144 145 146 147
/*
 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
 * ple_gap:    upper bound on the amount of time between two successive
 *             executions of PAUSE in a loop. Also indicate if ple enabled.
148
 *             According to test, this time is usually smaller than 128 cycles.
149 150 151 152 153 154
 * ple_window: upper bound on the amount of time a guest is allowed to execute
 *             in a PAUSE loop. Tests indicate that most spinlocks are held for
 *             less than 2^12 cycles
 * Time is measured based on a counter that runs at the same rate as the TSC,
 * refer SDM volume 3b section 21.6.13 & 22.1.3.
 */
155 156 157 158 159 160 161
#define KVM_VMX_DEFAULT_PLE_GAP           128
#define KVM_VMX_DEFAULT_PLE_WINDOW        4096
#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW   2
#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX    \
		INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW

162 163 164 165 166 167
static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
module_param(ple_gap, int, S_IRUGO);

static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
module_param(ple_window, int, S_IRUGO);

168 169 170 171 172 173 174 175 176 177 178 179 180
/* Default doubles per-vcpu window every exit. */
static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
module_param(ple_window_grow, int, S_IRUGO);

/* Default resets per-vcpu window every exit to ple_window. */
static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
module_param(ple_window_shrink, int, S_IRUGO);

/* Default is to compute the maximum so we can never overflow. */
static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
static int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
module_param(ple_window_max, int, S_IRUGO);

Avi Kivity's avatar
Avi Kivity committed
181 182
extern const ulong vmx_return;

183
#define NR_AUTOLOAD_MSRS 8
184

185 186 187 188 189 190
struct vmcs {
	u32 revision_id;
	u32 abort;
	char data[0];
};

191 192 193 194 195 196 197 198 199
/*
 * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
 * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
 * loaded on this CPU (so we can clear them if the CPU goes down).
 */
struct loaded_vmcs {
	struct vmcs *vmcs;
	int cpu;
	int launched;
200
	unsigned long *msr_bitmap;
201 202 203
	struct list_head loaded_vmcss_on_cpu_link;
};

204 205 206
struct shared_msr_entry {
	unsigned index;
	u64 data;
207
	u64 mask;
208 209
};

210 211 212 213 214 215 216
/*
 * struct vmcs12 describes the state that our guest hypervisor (L1) keeps for a
 * single nested guest (L2), hence the name vmcs12. Any VMX implementation has
 * a VMCS structure, and vmcs12 is our emulated VMX's VMCS. This structure is
 * stored in guest memory specified by VMPTRLD, but is opaque to the guest,
 * which must access it using VMREAD/VMWRITE/VMCLEAR instructions.
 * More than one of these structures may exist, if L1 runs multiple L2 guests.
Jim Mattson's avatar
Jim Mattson committed
217
 * nested_vmx_run() will use the data here to build the vmcs02: a VMCS for the
218 219 220 221 222
 * underlying hardware which will be used to run L2.
 * This structure is packed to ensure that its layout is identical across
 * machines (necessary for live migration).
 * If there are changes in this struct, VMCS12_REVISION must be changed.
 */
223
typedef u64 natural_width;
224 225 226 227 228 229
struct __packed vmcs12 {
	/* According to the Intel spec, a VMCS region must start with the
	 * following two fields. Then follow implementation-specific data.
	 */
	u32 revision_id;
	u32 abort;
230

Nadav Har'El's avatar
Nadav Har'El committed
231 232 233
	u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
	u32 padding[7]; /* room for future expansion */

234 235 236 237 238 239 240 241 242
	u64 io_bitmap_a;
	u64 io_bitmap_b;
	u64 msr_bitmap;
	u64 vm_exit_msr_store_addr;
	u64 vm_exit_msr_load_addr;
	u64 vm_entry_msr_load_addr;
	u64 tsc_offset;
	u64 virtual_apic_page_addr;
	u64 apic_access_addr;
243
	u64 posted_intr_desc_addr;
244
	u64 ept_pointer;
245 246 247 248
	u64 eoi_exit_bitmap0;
	u64 eoi_exit_bitmap1;
	u64 eoi_exit_bitmap2;
	u64 eoi_exit_bitmap3;
249
	u64 xss_exit_bitmap;
250 251 252 253 254 255 256 257 258 259
	u64 guest_physical_address;
	u64 vmcs_link_pointer;
	u64 guest_ia32_debugctl;
	u64 guest_ia32_pat;
	u64 guest_ia32_efer;
	u64 guest_ia32_perf_global_ctrl;
	u64 guest_pdptr0;
	u64 guest_pdptr1;
	u64 guest_pdptr2;
	u64 guest_pdptr3;
260
	u64 guest_bndcfgs;
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
	u64 host_ia32_pat;
	u64 host_ia32_efer;
	u64 host_ia32_perf_global_ctrl;
	u64 padding64[8]; /* room for future expansion */
	/*
	 * To allow migration of L1 (complete with its L2 guests) between
	 * machines of different natural widths (32 or 64 bit), we cannot have
	 * unsigned long fields with no explict size. We use u64 (aliased
	 * natural_width) instead. Luckily, x86 is little-endian.
	 */
	natural_width cr0_guest_host_mask;
	natural_width cr4_guest_host_mask;
	natural_width cr0_read_shadow;
	natural_width cr4_read_shadow;
	natural_width cr3_target_value0;
	natural_width cr3_target_value1;
	natural_width cr3_target_value2;
	natural_width cr3_target_value3;
	natural_width exit_qualification;
	natural_width guest_linear_address;
	natural_width guest_cr0;
	natural_width guest_cr3;
	natural_width guest_cr4;
	natural_width guest_es_base;
	natural_width guest_cs_base;
	natural_width guest_ss_base;
	natural_width guest_ds_base;
	natural_width guest_fs_base;
	natural_width guest_gs_base;
	natural_width guest_ldtr_base;
	natural_width guest_tr_base;
	natural_width guest_gdtr_base;
	natural_width guest_idtr_base;
	natural_width guest_dr7;
	natural_width guest_rsp;
	natural_width guest_rip;
	natural_width guest_rflags;
	natural_width guest_pending_dbg_exceptions;
	natural_width guest_sysenter_esp;
	natural_width guest_sysenter_eip;
	natural_width host_cr0;
	natural_width host_cr3;
	natural_width host_cr4;
	natural_width host_fs_base;
	natural_width host_gs_base;
	natural_width host_tr_base;
	natural_width host_gdtr_base;
	natural_width host_idtr_base;
	natural_width host_ia32_sysenter_esp;
	natural_width host_ia32_sysenter_eip;
	natural_width host_rsp;
	natural_width host_rip;
	natural_width paddingl[8]; /* room for future expansion */
	u32 pin_based_vm_exec_control;
	u32 cpu_based_vm_exec_control;
	u32 exception_bitmap;
	u32 page_fault_error_code_mask;
	u32 page_fault_error_code_match;
	u32 cr3_target_count;
	u32 vm_exit_controls;
	u32 vm_exit_msr_store_count;
	u32 vm_exit_msr_load_count;
	u32 vm_entry_controls;
	u32 vm_entry_msr_load_count;
	u32 vm_entry_intr_info_field;
	u32 vm_entry_exception_error_code;
	u32 vm_entry_instruction_len;
	u32 tpr_threshold;
	u32 secondary_vm_exec_control;
	u32 vm_instruction_error;
	u32 vm_exit_reason;
	u32 vm_exit_intr_info;
	u32 vm_exit_intr_error_code;
	u32 idt_vectoring_info_field;
	u32 idt_vectoring_error_code;
	u32 vm_exit_instruction_len;
	u32 vmx_instruction_info;
	u32 guest_es_limit;
	u32 guest_cs_limit;
	u32 guest_ss_limit;
	u32 guest_ds_limit;
	u32 guest_fs_limit;
	u32 guest_gs_limit;
	u32 guest_ldtr_limit;
	u32 guest_tr_limit;
	u32 guest_gdtr_limit;
	u32 guest_idtr_limit;
	u32 guest_es_ar_bytes;
	u32 guest_cs_ar_bytes;
	u32 guest_ss_ar_bytes;
	u32 guest_ds_ar_bytes;
	u32 guest_fs_ar_bytes;
	u32 guest_gs_ar_bytes;
	u32 guest_ldtr_ar_bytes;
	u32 guest_tr_ar_bytes;
	u32 guest_interruptibility_info;
	u32 guest_activity_state;
	u32 guest_sysenter_cs;
	u32 host_ia32_sysenter_cs;
360 361
	u32 vmx_preemption_timer_value;
	u32 padding32[7]; /* room for future expansion */
362
	u16 virtual_processor_id;
363
	u16 posted_intr_nv;
364 365 366 367 368 369 370 371
	u16 guest_es_selector;
	u16 guest_cs_selector;
	u16 guest_ss_selector;
	u16 guest_ds_selector;
	u16 guest_fs_selector;
	u16 guest_gs_selector;
	u16 guest_ldtr_selector;
	u16 guest_tr_selector;
372
	u16 guest_intr_status;
373 374 375 376 377 378 379
	u16 host_es_selector;
	u16 host_cs_selector;
	u16 host_ss_selector;
	u16 host_ds_selector;
	u16 host_fs_selector;
	u16 host_gs_selector;
	u16 host_tr_selector;
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
};

/*
 * VMCS12_REVISION is an arbitrary id that should be changed if the content or
 * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
 * VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
 */
#define VMCS12_REVISION 0x11e57ed0

/*
 * VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region
 * and any VMCS region. Although only sizeof(struct vmcs12) are used by the
 * current implementation, 4K are reserved to avoid future complications.
 */
#define VMCS12_SIZE 0x1000

396 397 398 399 400 401 402
/*
 * The nested_vmx structure is part of vcpu_vmx, and holds information we need
 * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
 */
struct nested_vmx {
	/* Has the level1 guest done vmxon? */
	bool vmxon;
403
	gpa_t vmxon_ptr;
404 405 406 407 408 409

	/* The guest-physical address of the current VMCS L1 keeps for L2 */
	gpa_t current_vmptr;
	/* The host-usable pointer to the above */
	struct page *current_vmcs12_page;
	struct vmcs12 *current_vmcs12;
Abel Gordon's avatar
Abel Gordon committed
410
	struct vmcs *current_shadow_vmcs;
411 412 413 414 415
	/*
	 * Indicates if the shadow vmcs must be updated with the
	 * data hold by vmcs12
	 */
	bool sync_shadow_vmcs;
416

417
	u64 vmcs01_tsc_offset;
418
	bool change_vmcs01_virtual_x2apic_mode;
419 420
	/* L2 must run next, and mustn't decide to exit to L1. */
	bool nested_run_pending;
Jim Mattson's avatar
Jim Mattson committed
421 422 423

	struct loaded_vmcs vmcs02;

424
	/*
Jim Mattson's avatar
Jim Mattson committed
425 426
	 * Guest pages referred to in the vmcs02 with host-physical
	 * pointers, so we must keep them pinned while L2 runs.
427 428
	 */
	struct page *apic_access_page;
429
	struct page *virtual_apic_page;
430 431 432 433
	struct page *pi_desc_page;
	struct pi_desc *pi_desc;
	bool pi_pending;
	u16 posted_intr_nv;
434
	u64 msr_ia32_feature_control;
435 436 437

	struct hrtimer preemption_timer;
	bool preemption_timer_expired;
438 439 440

	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
	u64 vmcs01_debugctl;
441

Wanpeng Li's avatar
Wanpeng Li committed
442 443 444
	u16 vpid02;
	u16 last_vpid;

445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
	u32 nested_vmx_procbased_ctls_low;
	u32 nested_vmx_procbased_ctls_high;
	u32 nested_vmx_true_procbased_ctls_low;
	u32 nested_vmx_secondary_ctls_low;
	u32 nested_vmx_secondary_ctls_high;
	u32 nested_vmx_pinbased_ctls_low;
	u32 nested_vmx_pinbased_ctls_high;
	u32 nested_vmx_exit_ctls_low;
	u32 nested_vmx_exit_ctls_high;
	u32 nested_vmx_true_exit_ctls_low;
	u32 nested_vmx_entry_ctls_low;
	u32 nested_vmx_entry_ctls_high;
	u32 nested_vmx_true_entry_ctls_low;
	u32 nested_vmx_misc_low;
	u32 nested_vmx_misc_high;
	u32 nested_vmx_ept_caps;
461
	u32 nested_vmx_vpid_caps;
462 463
};

464
#define POSTED_INTR_ON  0
465 466
#define POSTED_INTR_SN  1

467 468 469
/* Posted-Interrupt Descriptor */
struct pi_desc {
	u32 pir[8];     /* Posted interrupt requested */
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487
	union {
		struct {
				/* bit 256 - Outstanding Notification */
			u16	on	: 1,
				/* bit 257 - Suppress Notification */
				sn	: 1,
				/* bit 271:258 - Reserved */
				rsvd_1	: 14;
				/* bit 279:272 - Notification Vector */
			u8	nv;
				/* bit 287:280 - Reserved */
			u8	rsvd_2;
				/* bit 319:288 - Notification Destination */
			u32	ndst;
		};
		u64 control;
	};
	u32 rsvd[6];
488 489
} __aligned(64);

490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506
static bool pi_test_and_set_on(struct pi_desc *pi_desc)
{
	return test_and_set_bit(POSTED_INTR_ON,
			(unsigned long *)&pi_desc->control);
}

static bool pi_test_and_clear_on(struct pi_desc *pi_desc)
{
	return test_and_clear_bit(POSTED_INTR_ON,
			(unsigned long *)&pi_desc->control);
}

static int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
{
	return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
}

507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
static inline void pi_clear_sn(struct pi_desc *pi_desc)
{
	return clear_bit(POSTED_INTR_SN,
			(unsigned long *)&pi_desc->control);
}

static inline void pi_set_sn(struct pi_desc *pi_desc)
{
	return set_bit(POSTED_INTR_SN,
			(unsigned long *)&pi_desc->control);
}

static inline int pi_test_on(struct pi_desc *pi_desc)
{
	return test_bit(POSTED_INTR_ON,
			(unsigned long *)&pi_desc->control);
}

static inline int pi_test_sn(struct pi_desc *pi_desc)
{
	return test_bit(POSTED_INTR_SN,
			(unsigned long *)&pi_desc->control);
}

531
struct vcpu_vmx {
532
	struct kvm_vcpu       vcpu;
533
	unsigned long         host_rsp;
534
	u8                    fail;
535
	bool                  nmi_known_unmasked;
536
	u8		      msr_bitmap_mode;
537
	u32                   exit_intr_info;
538
	u32                   idt_vectoring_info;
539
	ulong                 rflags;
540
	struct shared_msr_entry *guest_msrs;
541 542
	int                   nmsrs;
	int                   save_nmsrs;
543
	unsigned long	      host_idt_base;
544
#ifdef CONFIG_X86_64
545 546
	u64 		      msr_host_kernel_gs_base;
	u64 		      msr_guest_kernel_gs_base;
547
#endif
Ashok Raj's avatar
Ashok Raj committed
548

549
	u64 		      arch_capabilities;
550
	u64 		      spec_ctrl;
551

552 553
	u32 vm_entry_controls_shadow;
	u32 vm_exit_controls_shadow;
554 555 556 557 558 559 560 561
	/*
	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
	 * non-nested (L1) guest, it always points to vmcs01. For a nested
	 * guest (L2), it points to a different VMCS.
	 */
	struct loaded_vmcs    vmcs01;
	struct loaded_vmcs   *loaded_vmcs;
	bool                  __launched; /* temporary, used in vmx_vcpu_run */
562 563 564 565 566
	struct msr_autoload {
		unsigned nr;
		struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
		struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
	} msr_autoload;
567 568 569
	struct {
		int           loaded;
		u16           fs_sel, gs_sel, ldt_sel;
570 571 572
#ifdef CONFIG_X86_64
		u16           ds_sel, es_sel;
#endif
573 574
		int           gs_ldt_reload_needed;
		int           fs_reload_needed;
575
		u64           msr_host_bndcfgs;
576
		unsigned long vmcs_host_cr4;	/* May not match real cr4 */
Mike Day's avatar
Mike Day committed
577
	} host_state;
578
	struct {
579
		int vm86_active;
580
		ulong save_rflags;
581 582 583 584
		struct kvm_segment segs[8];
	} rmode;
	struct {
		u32 bitmask; /* 4 bits per segment (1 bit per field) */
585 586 587 588 589
		struct kvm_save_segment {
			u16 selector;
			unsigned long base;
			u32 limit;
			u32 ar;
590
		} seg[8];
591
	} segment_cache;
592
	int vpid;
593
	bool emulation_required;
594 595 596 597 598

	/* Support for vnmi-less CPUs */
	int soft_vnmi_blocked;
	ktime_t entry_time;
	s64 vnmi_blocked_time;
599
	u32 exit_reason;
600

601 602 603
	/* Posted interrupt descriptor */
	struct pi_desc pi_desc;

604 605
	/* Support for a guest hypervisor (nested VMX) */
	struct nested_vmx nested;
606 607 608 609

	/* Dynamic PLE window. */
	int ple_window;
	bool ple_window_dirty;
Kai Huang's avatar
Kai Huang committed
610 611 612 613

	/* Support for PML */
#define PML_ENTITY_NUM		512
	struct page *pml_pg;
614 615

	u64 current_tsc_ratio;
616 617
};

618 619 620 621 622 623 624 625 626
enum segment_cache_field {
	SEG_FIELD_SEL = 0,
	SEG_FIELD_BASE = 1,
	SEG_FIELD_LIMIT = 2,
	SEG_FIELD_AR = 3,

	SEG_FIELD_NR = 4
};

627 628
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
{
629
	return container_of(vcpu, struct vcpu_vmx, vcpu);
630 631
}

632 633 634 635 636
static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
{
	return &(to_vmx(vcpu)->pi_desc);
}

637 638 639 640 641
#define VMCS12_OFFSET(x) offsetof(struct vmcs12, x)
#define FIELD(number, name)	[number] = VMCS12_OFFSET(name)
#define FIELD64(number, name)	[number] = VMCS12_OFFSET(name), \
				[number##_HIGH] = VMCS12_OFFSET(name)+4

642

643
static unsigned long shadow_read_only_fields[] = {
644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
	/*
	 * We do NOT shadow fields that are modified when L0
	 * traps and emulates any vmx instruction (e.g. VMPTRLD,
	 * VMXON...) executed by L1.
	 * For example, VM_INSTRUCTION_ERROR is read
	 * by L1 if a vmx instruction fails (part of the error path).
	 * Note the code assumes this logic. If for some reason
	 * we start shadowing these fields then we need to
	 * force a shadow sync when L0 emulates vmx instructions
	 * (e.g. force a sync if VM_INSTRUCTION_ERROR is modified
	 * by nested_vmx_failValid)
	 */
	VM_EXIT_REASON,
	VM_EXIT_INTR_INFO,
	VM_EXIT_INSTRUCTION_LEN,
	IDT_VECTORING_INFO_FIELD,
	IDT_VECTORING_ERROR_CODE,
	VM_EXIT_INTR_ERROR_CODE,
	EXIT_QUALIFICATION,
	GUEST_LINEAR_ADDRESS,
	GUEST_PHYSICAL_ADDRESS
};
666
static int max_shadow_read_only_fields =
667 668
	ARRAY_SIZE(shadow_read_only_fields);

669
static unsigned long shadow_read_write_fields[] = {
670
	TPR_THRESHOLD,
671 672 673 674 675 676 677 678 679 680 681 682
	GUEST_RIP,
	GUEST_RSP,
	GUEST_CR0,
	GUEST_CR3,
	GUEST_CR4,
	GUEST_INTERRUPTIBILITY_INFO,
	GUEST_RFLAGS,
	GUEST_CS_SELECTOR,
	GUEST_CS_AR_BYTES,
	GUEST_CS_LIMIT,
	GUEST_CS_BASE,
	GUEST_ES_BASE,
683
	GUEST_BNDCFGS,
684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
	CR0_GUEST_HOST_MASK,
	CR0_READ_SHADOW,
	CR4_READ_SHADOW,
	TSC_OFFSET,
	EXCEPTION_BITMAP,
	CPU_BASED_VM_EXEC_CONTROL,
	VM_ENTRY_EXCEPTION_ERROR_CODE,
	VM_ENTRY_INTR_INFO_FIELD,
	VM_ENTRY_INSTRUCTION_LEN,
	VM_ENTRY_EXCEPTION_ERROR_CODE,
	HOST_FS_BASE,
	HOST_GS_BASE,
	HOST_FS_SELECTOR,
	HOST_GS_SELECTOR
};
699
static int max_shadow_read_write_fields =
700 701
	ARRAY_SIZE(shadow_read_write_fields);

702
static const unsigned short vmcs_field_to_offset_table[] = {
703
	FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
704
	FIELD(POSTED_INTR_NV, posted_intr_nv),
705 706 707 708 709 710 711 712
	FIELD(GUEST_ES_SELECTOR, guest_es_selector),
	FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
	FIELD(GUEST_SS_SELECTOR, guest_ss_selector),
	FIELD(GUEST_DS_SELECTOR, guest_ds_selector),
	FIELD(GUEST_FS_SELECTOR, guest_fs_selector),
	FIELD(GUEST_GS_SELECTOR, guest_gs_selector),
	FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
	FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
713
	FIELD(GUEST_INTR_STATUS, guest_intr_status),
714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729
	FIELD(HOST_ES_SELECTOR, host_es_selector),
	FIELD(HOST_CS_SELECTOR, host_cs_selector),
	FIELD(HOST_SS_SELECTOR, host_ss_selector),
	FIELD(HOST_DS_SELECTOR, host_ds_selector),
	FIELD(HOST_FS_SELECTOR, host_fs_selector),
	FIELD(HOST_GS_SELECTOR, host_gs_selector),
	FIELD(HOST_TR_SELECTOR, host_tr_selector),
	FIELD64(IO_BITMAP_A, io_bitmap_a),
	FIELD64(IO_BITMAP_B, io_bitmap_b),
	FIELD64(MSR_BITMAP, msr_bitmap),
	FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
	FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
	FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
	FIELD64(TSC_OFFSET, tsc_offset),
	FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
	FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
730
	FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
731
	FIELD64(EPT_POINTER, ept_pointer),
732 733 734 735
	FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
	FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
	FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
	FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
736
	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
737 738 739 740 741 742 743 744 745 746
	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
	FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
	FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
	FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
	FIELD64(GUEST_IA32_PERF_GLOBAL_CTRL, guest_ia32_perf_global_ctrl),
	FIELD64(GUEST_PDPTR0, guest_pdptr0),
	FIELD64(GUEST_PDPTR1, guest_pdptr1),
	FIELD64(GUEST_PDPTR2, guest_pdptr2),
	FIELD64(GUEST_PDPTR3, guest_pdptr3),
747
	FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796
	FIELD64(HOST_IA32_PAT, host_ia32_pat),
	FIELD64(HOST_IA32_EFER, host_ia32_efer),
	FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
	FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control),
	FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control),
	FIELD(EXCEPTION_BITMAP, exception_bitmap),
	FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask),
	FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match),
	FIELD(CR3_TARGET_COUNT, cr3_target_count),
	FIELD(VM_EXIT_CONTROLS, vm_exit_controls),
	FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count),
	FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count),
	FIELD(VM_ENTRY_CONTROLS, vm_entry_controls),
	FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count),
	FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field),
	FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code),
	FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len),
	FIELD(TPR_THRESHOLD, tpr_threshold),
	FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control),
	FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error),
	FIELD(VM_EXIT_REASON, vm_exit_reason),
	FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info),
	FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code),
	FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field),
	FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code),
	FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len),
	FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info),
	FIELD(GUEST_ES_LIMIT, guest_es_limit),
	FIELD(GUEST_CS_LIMIT, guest_cs_limit),
	FIELD(GUEST_SS_LIMIT, guest_ss_limit),
	FIELD(GUEST_DS_LIMIT, guest_ds_limit),
	FIELD(GUEST_FS_LIMIT, guest_fs_limit),
	FIELD(GUEST_GS_LIMIT, guest_gs_limit),
	FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit),
	FIELD(GUEST_TR_LIMIT, guest_tr_limit),
	FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit),
	FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit),
	FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes),
	FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes),
	FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes),
	FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes),
	FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes),
	FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes),
	FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes),
	FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes),
	FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info),
	FIELD(GUEST_ACTIVITY_STATE, guest_activity_state),
	FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs),
	FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs),
797
	FIELD(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value),
798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843
	FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask),
	FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask),
	FIELD(CR0_READ_SHADOW, cr0_read_shadow),
	FIELD(CR4_READ_SHADOW, cr4_read_shadow),
	FIELD(CR3_TARGET_VALUE0, cr3_target_value0),
	FIELD(CR3_TARGET_VALUE1, cr3_target_value1),
	FIELD(CR3_TARGET_VALUE2, cr3_target_value2),
	FIELD(CR3_TARGET_VALUE3, cr3_target_value3),
	FIELD(EXIT_QUALIFICATION, exit_qualification),
	FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address),
	FIELD(GUEST_CR0, guest_cr0),
	FIELD(GUEST_CR3, guest_cr3),
	FIELD(GUEST_CR4, guest_cr4),
	FIELD(GUEST_ES_BASE, guest_es_base),
	FIELD(GUEST_CS_BASE, guest_cs_base),
	FIELD(GUEST_SS_BASE, guest_ss_base),
	FIELD(GUEST_DS_BASE, guest_ds_base),
	FIELD(GUEST_FS_BASE, guest_fs_base),
	FIELD(GUEST_GS_BASE, guest_gs_base),
	FIELD(GUEST_LDTR_BASE, guest_ldtr_base),
	FIELD(GUEST_TR_BASE, guest_tr_base),
	FIELD(GUEST_GDTR_BASE, guest_gdtr_base),
	FIELD(GUEST_IDTR_BASE, guest_idtr_base),
	FIELD(GUEST_DR7, guest_dr7),
	FIELD(GUEST_RSP, guest_rsp),
	FIELD(GUEST_RIP, guest_rip),
	FIELD(GUEST_RFLAGS, guest_rflags),
	FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions),
	FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp),
	FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip),
	FIELD(HOST_CR0, host_cr0),
	FIELD(HOST_CR3, host_cr3),
	FIELD(HOST_CR4, host_cr4),
	FIELD(HOST_FS_BASE, host_fs_base),
	FIELD(HOST_GS_BASE, host_gs_base),
	FIELD(HOST_TR_BASE, host_tr_base),
	FIELD(HOST_GDTR_BASE, host_gdtr_base),
	FIELD(HOST_IDTR_BASE, host_idtr_base),
	FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp),
	FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip),
	FIELD(HOST_RSP, host_rsp),
	FIELD(HOST_RIP, host_rip),
};

static inline short vmcs_field_to_offset(unsigned long field)
{
844 845
	const size_t size = ARRAY_SIZE(vmcs_field_to_offset_table);
	unsigned short offset;
846

847 848
	BUILD_BUG_ON(size > SHRT_MAX);
	if (field >= size)
849 850
		return -ENOENT;

851 852 853
	field = array_index_nospec(field, size);
	offset = vmcs_field_to_offset_table[field];
	if (offset == 0)
854
		return -ENOENT;
855
	return offset;
856 857
}

858 859 860 861 862 863 864
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
{
	return to_vmx(vcpu)->nested.current_vmcs12;
}

static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
{
865
	struct page *page = kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT);
866
	if (is_error_page(page))
867
		return NULL;
868

869 870 871 872 873 874 875 876 877 878 879 880 881
	return page;
}

static void nested_release_page(struct page *page)
{
	kvm_release_page_dirty(page);
}

static void nested_release_page_clean(struct page *page)
{
	kvm_release_page_clean(page);
}

Nadav Har'El's avatar
Nadav Har'El committed
882
static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
883
static u64 construct_eptp(unsigned long root_hpa);
884 885
static void kvm_cpu_vmxon(u64 addr);
static void kvm_cpu_vmxoff(void);
886
static bool vmx_xsaves_supported(void);
887
static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu);
888
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
889 890 891 892
static void vmx_set_segment(struct kvm_vcpu *vcpu,
			    struct kvm_segment *var, int seg);
static void vmx_get_segment(struct kvm_vcpu *vcpu,
			    struct kvm_segment *var, int seg);
893 894
static bool guest_state_valid(struct kvm_vcpu *vcpu);
static u32 vmx_segment_access_rights(struct kvm_segment *var);
895
static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
896
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
897
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
898
static int alloc_identity_pagetable(struct kvm *kvm);
899
static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
Ashok Raj's avatar
Ashok Raj committed
900 901
static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
							  u32 msr, int type);
902

Avi Kivity's avatar
Avi Kivity committed
903 904
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
905 906 907 908 909
/*
 * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
 * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
 */
static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
910
static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
Avi Kivity's avatar
Avi Kivity committed
911

912 913 914 915 916 917 918
/*
 * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
 * can find which vCPU should be waken up.
 */
static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);

919 920
static unsigned long *vmx_io_bitmap_a;
static unsigned long *vmx_io_bitmap_b;
921 922
static unsigned long *vmx_vmread_bitmap;
static unsigned long *vmx_vmwrite_bitmap;
923

924
static bool cpu_has_load_ia32_efer;
925
static bool cpu_has_load_perf_global_ctrl;
926

927 928 929
static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
static DEFINE_SPINLOCK(vmx_vpid_lock);

930
static struct vmcs_config {
Avi Kivity's avatar
Avi Kivity committed
931 932 933
	int size;
	int order;
	u32 revision_id;
934 935
	u32 pin_based_exec_ctrl;
	u32 cpu_based_exec_ctrl;
936
	u32 cpu_based_2nd_exec_ctrl;
937 938 939
	u32 vmexit_ctrl;
	u32 vmentry_ctrl;
} vmcs_config;
Avi Kivity's avatar
Avi Kivity committed
940

Hannes Eder's avatar
Hannes Eder committed
941
static struct vmx_capability {
Sheng Yang's avatar
Sheng Yang committed
942 943 944 945
	u32 ept;
	u32 vpid;
} vmx_capability;

Avi Kivity's avatar
Avi Kivity committed
946 947 948 949 950 951 952 953
#define VMX_SEGMENT_FIELD(seg)					\
	[VCPU_SREG_##seg] = {                                   \
		.selector = GUEST_##seg##_SELECTOR,		\
		.base = GUEST_##seg##_BASE,		   	\
		.limit = GUEST_##seg##_LIMIT,		   	\
		.ar_bytes = GUEST_##seg##_AR_BYTES,	   	\
	}

954
static const struct kvm_vmx_segment_field {
Avi Kivity's avatar
Avi Kivity committed
955 956 957 958 959 960 961 962 963 964 965 966 967 968 969
	unsigned selector;
	unsigned base;
	unsigned limit;
	unsigned ar_bytes;
} kvm_vmx_segment_fields[] = {
	VMX_SEGMENT_FIELD(CS),
	VMX_SEGMENT_FIELD(DS),
	VMX_SEGMENT_FIELD(ES),
	VMX_SEGMENT_FIELD(FS),
	VMX_SEGMENT_FIELD(GS),
	VMX_SEGMENT_FIELD(SS),
	VMX_SEGMENT_FIELD(TR),
	VMX_SEGMENT_FIELD(LDTR),
};

970 971
static u64 host_efer;

Avi Kivity's avatar
Avi Kivity committed
972 973
static void ept_save_pdptrs(struct kvm_vcpu *vcpu);

974
/*
Brian Gerst's avatar
Brian Gerst committed
975
 * Keep MSR_STAR at the end, as setup_msrs() will try to optimize it
976 977
 * away by decrementing the array size.
 */
Avi Kivity's avatar
Avi Kivity committed
978
static const u32 vmx_msr_index[] = {
979
#ifdef CONFIG_X86_64
980
	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
Avi Kivity's avatar
Avi Kivity committed
981
#endif
Brian Gerst's avatar
Brian Gerst committed
982
	MSR_EFER, MSR_TSC_AUX, MSR_STAR,
Avi Kivity's avatar
Avi Kivity committed
983 984
};

985
static inline bool is_page_fault(u32 intr_info)
Avi Kivity's avatar
Avi Kivity committed
986 987 988
{
	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
			     INTR_INFO_VALID_MASK)) ==
989
		(INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
Avi Kivity's avatar
Avi Kivity committed
990 991
}

992
static inline bool is_no_device(u32 intr_info)
993 994 995
{
	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
			     INTR_INFO_VALID_MASK)) ==
996
		(INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
997 998
}

999
static inline bool is_invalid_opcode(u32 intr_info)
1000 1001 1002
{
	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
			     INTR_INFO_VALID_MASK)) ==
1003
		(INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
1004 1005
}

1006
static inline bool is_external_interrupt(u32 intr_info)
Avi Kivity's avatar
Avi Kivity committed
1007 1008 1009 1010 1011
{
	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
		== (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
}

1012
static inline bool is_machine_check(u32 intr_info)
1013 1014 1015 1016 1017 1018
{
	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
			     INTR_INFO_VALID_MASK)) ==
		(INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
}

1019 1020 1021 1022 1023 1024 1025
/* Undocumented: icebp/int1 */
static inline bool is_icebp(u32 intr_info)
{
	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
		== (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
}

1026
static inline bool cpu_has_vmx_msr_bitmap(void)
1027
{
1028
	return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
1029 1030
}

1031
static inline bool cpu_has_vmx_tpr_shadow(void)
1032
{
1033
	return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW;
1034 1035
}

1036
static inline bool cpu_need_tpr_shadow(struct kvm_vcpu *vcpu)
1037
{
1038
	return cpu_has_vmx_tpr_shadow() && lapic_in_kernel(vcpu);
1039 1040
}

1041
static inline bool cpu_has_secondary_exec_ctrls(void)
1042
{
1043 1044
	return vmcs_config.cpu_based_exec_ctrl &
		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
1045 1046
}

1047
static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
1048
{
1049 1050 1051 1052
	return vmcs_config.cpu_based_2nd_exec_ctrl &
		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
}

1053 1054 1055 1056 1057 1058
static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
{
	return vmcs_config.cpu_based_2nd_exec_ctrl &
		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
}

1059 1060 1061 1062 1063 1064
static inline bool cpu_has_vmx_apic_register_virt(void)
{
	return vmcs_config.cpu_based_2nd_exec_ctrl &
		SECONDARY_EXEC_APIC_REGISTER_VIRT;
}

1065 1066 1067 1068 1069 1070
static inline bool cpu_has_vmx_virtual_intr_delivery(void)
{
	return vmcs_config.cpu_based_2nd_exec_ctrl &
		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
}

1071 1072
static inline bool cpu_has_vmx_posted_intr(void)
{
1073 1074
	return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
		vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
1075 1076 1077 1078 1079 1080 1081 1082 1083
}

static inline bool cpu_has_vmx_apicv(void)
{
	return cpu_has_vmx_apic_register_virt() &&
		cpu_has_vmx_virtual_intr_delivery() &&
		cpu_has_vmx_posted_intr();
}

1084 1085 1086 1087
static inline bool cpu_has_vmx_flexpriority(void)
{
	return cpu_has_vmx_tpr_shadow() &&
		cpu_has_vmx_virtualize_apic_accesses();
1088 1089
}

1090 1091
static inline bool cpu_has_vmx_ept_execute_only(void)
{
1092
	return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT;
1093 1094 1095 1096
}

static inline bool cpu_has_vmx_ept_2m_page(void)
{
1097
	return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT;
1098 1099
}

1100 1101
static inline bool cpu_has_vmx_ept_1g_page(void)
{
1102
	return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
1103 1104
}

1105 1106 1107 1108 1109
static inline bool cpu_has_vmx_ept_4levels(void)
{
	return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
}

1110 1111 1112 1113 1114
static inline bool cpu_has_vmx_ept_ad_bits(void)
{
	return vmx_capability.ept & VMX_EPT_AD_BIT;
}

1115
static inline bool cpu_has_vmx_invept_context(void)
Sheng Yang's avatar
Sheng Yang committed
1116
{
1117
	return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
Sheng Yang's avatar
Sheng Yang committed
1118 1119
}

1120
static inline bool cpu_has_vmx_invept_global(void)
Sheng Yang's avatar
Sheng Yang committed
1121
{
1122
	return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
Sheng Yang's avatar
Sheng Yang committed
1123 1124
}

1125 1126 1127 1128 1129
static inline bool cpu_has_vmx_invvpid_single(void)
{
	return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
}

1130 1131 1132 1133 1134
static inline bool cpu_has_vmx_invvpid_global(void)
{
	return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
}

1135 1136 1137 1138 1139
static inline bool cpu_has_vmx_invvpid(void)
{
	return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
}

1140
static inline bool cpu_has_vmx_ept(void)
Sheng Yang's avatar
Sheng Yang committed
1141
{
1142 1143
	return vmcs_config.cpu_based_2nd_exec_ctrl &
		SECONDARY_EXEC_ENABLE_EPT;
Sheng Yang's avatar
Sheng Yang committed
1144 1145
}

1146
static inline bool cpu_has_vmx_unrestricted_guest(void)
1147 1148 1149 1150 1151
{
	return vmcs_config.cpu_based_2nd_exec_ctrl &
		SECONDARY_EXEC_UNRESTRICTED_GUEST;
}

1152
static inline bool cpu_has_vmx_ple(void)
1153 1154 1155 1156 1157
{
	return vmcs_config.cpu_based_2nd_exec_ctrl &
		SECONDARY_EXEC_PAUSE_LOOP_EXITING;
}

1158
static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
1159
{
1160
	return flexpriority_enabled && lapic_in_kernel(vcpu);
1161 1162
}

1163
static inline bool cpu_has_vmx_vpid(void)
1164
{
1165 1166
	return vmcs_config.cpu_based_2nd_exec_ctrl &
		SECONDARY_EXEC_ENABLE_VPID;
1167 1168
}

1169
static inline bool cpu_has_vmx_rdtscp(void)