Commit 9e0600f5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:

 - x86 bugfixes: APIC, nested virtualization, IOAPIC

 - PPC bugfix: HPT guests on a POWER9 radix host

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (26 commits)
  KVM: Let KVM_SET_SIGNAL_MASK work as advertised
  KVM: VMX: Fix vmx->nested freeing when no SMI handler
  KVM: VMX: Fix rflags cache during vCPU reset
  KVM: X86: Fix softlockup when get the current kvmclock
  KVM: lapic: Fixup LDR on load in x2apic
  KVM: lapic: Split out x2apic ldr calculation
  KVM: PPC: Book3S HV: Fix migration and HPT resizing of HPT guests on radix hosts
  KVM: vmx: use X86_CR4_UMIP and X86_FEATURE_UMIP
  KVM: x86: Fix CPUID function for word 6 (80000001_ECX)
  KVM: nVMX: Fix vmx_check_nested_events() return value in case an event was reinjected to L2
  KVM: x86: ioapic: Preserve read-only values in the redirection table
  KVM: x86: ioapic: Clear Remote IRR when entry is switched to edge-triggered
  KVM: x86: ioapic: Remove redundant check for Remote IRR in ioapic_set_irq
  KVM: x86: ioapic: Don't fire level irq when Remote IRR set
  KVM: x86: ioapic: Fix level-triggered EOI and IOAPIC reconfigure race
  KVM: x86: inject exceptions produced by x86_decode_insn
  KVM: x86: Allow suppressing prints on RDMSR/WRMSR of unhandled MSRs
  KVM: x86: fix em_fxstor() sleeping while in atomic
  KVM: nVMX: Fix mmu context after VMLAUNCH/VMRESUME failure
  KVM: nVMX: Validate the IA32_BNDCFGS on nested VM-entry
  ...
parents 22985bf5 a63dd748
......@@ -445,10 +445,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int r = -EINTR;
sigset_t sigsaved;
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
kvm_sigset_activate(vcpu);
if (vcpu->mmio_needed) {
if (!vcpu->mmio_is_write)
......@@ -480,8 +478,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
local_irq_enable();
out:
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
kvm_sigset_deactivate(vcpu);
return r;
}
......
......@@ -180,6 +180,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
struct iommu_group *grp);
extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
extern void kvmppc_setup_partition_table(struct kvm *kvm);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce_64 *args);
......
......@@ -1238,8 +1238,9 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
unsigned long vpte, rpte, guest_rpte;
int ret;
struct revmap_entry *rev;
unsigned long apsize, psize, avpn, pteg, hash;
unsigned long apsize, avpn, pteg, hash;
unsigned long new_idx, new_pteg, replace_vpte;
int pshift;
hptep = (__be64 *)(old->virt + (idx << 4));
......@@ -1298,8 +1299,8 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
goto out;
rpte = be64_to_cpu(hptep[1]);
psize = hpte_base_page_size(vpte, rpte);
avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23);
pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
pteg = idx / HPTES_PER_GROUP;
if (vpte & HPTE_V_SECONDARY)
pteg = ~pteg;
......@@ -1311,20 +1312,20 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
offset = (avpn & 0x1f) << 23;
vsid = avpn >> 5;
/* We can find more bits from the pteg value */
if (psize < (1ULL << 23))
offset |= ((vsid ^ pteg) & old_hash_mask) * psize;
if (pshift < 23)
offset |= ((vsid ^ pteg) & old_hash_mask) << pshift;
hash = vsid ^ (offset / psize);
hash = vsid ^ (offset >> pshift);
} else {
unsigned long offset, vsid;
/* We only have 40 - 23 bits of seg_off in avpn */
offset = (avpn & 0x1ffff) << 23;
vsid = avpn >> 17;
if (psize < (1ULL << 23))
offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize;
if (pshift < 23)
offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift;
hash = vsid ^ (vsid << 25) ^ (offset / psize);
hash = vsid ^ (vsid << 25) ^ (offset >> pshift);
}
new_pteg = hash & new_hash_mask;
......@@ -1801,6 +1802,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
ssize_t nb;
long int err, ret;
int mmu_ready;
int pshift;
if (!access_ok(VERIFY_READ, buf, count))
return -EFAULT;
......@@ -1855,6 +1857,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
err = -EINVAL;
if (!(v & HPTE_V_VALID))
goto out;
pshift = kvmppc_hpte_base_page_shift(v, r);
if (pshift <= 0)
goto out;
lbuf += 2;
nb += HPTE_SIZE;
......@@ -1869,14 +1874,18 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
goto out;
}
if (!mmu_ready && is_vrma_hpte(v)) {
unsigned long psize = hpte_base_page_size(v, r);
unsigned long senc = slb_pgsize_encoding(psize);
unsigned long lpcr;
unsigned long senc, lpcr;
senc = slb_pgsize_encoding(1ul << pshift);
kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
(VRMA_VSID << SLB_VSID_SHIFT_1T);
lpcr = senc << (LPCR_VRMASD_SH - 4);
kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
lpcr = senc << (LPCR_VRMASD_SH - 4);
kvmppc_update_lpcr(kvm, lpcr,
LPCR_VRMASD);
} else {
kvmppc_setup_partition_table(kvm);
}
mmu_ready = 1;
}
++i;
......
......@@ -120,7 +120,6 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
static void kvmppc_setup_partition_table(struct kvm *kvm);
static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
int *ip)
......@@ -3574,7 +3573,7 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
return;
}
static void kvmppc_setup_partition_table(struct kvm *kvm)
void kvmppc_setup_partition_table(struct kvm *kvm)
{
unsigned long dw0, dw1;
......
......@@ -1407,7 +1407,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int r;
sigset_t sigsaved;
if (vcpu->mmio_needed) {
vcpu->mmio_needed = 0;
......@@ -1448,16 +1447,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
#endif
}
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
kvm_sigset_activate(vcpu);
if (run->immediate_exit)
r = -EINTR;
else
r = kvmppc_vcpu_run(run, vcpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
kvm_sigset_deactivate(vcpu);
return r;
}
......
......@@ -3372,7 +3372,6 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int rc;
sigset_t sigsaved;
if (kvm_run->immediate_exit)
return -EINTR;
......@@ -3382,8 +3381,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 0;
}
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
kvm_sigset_activate(vcpu);
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
kvm_s390_vcpu_start(vcpu);
......@@ -3417,8 +3415,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
disable_cpu_timer_accounting(vcpu);
store_regs(vcpu, kvm_run);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
kvm_sigset_deactivate(vcpu);
vcpu->stat.exit_userspace++;
return rc;
......
......@@ -1161,7 +1161,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
static inline int emulate_instruction(struct kvm_vcpu *vcpu,
int emulation_type)
{
return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
return x86_emulate_instruction(vcpu, 0,
emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
}
void kvm_enable_efer_bits(u64);
......
......@@ -44,7 +44,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX},
[CPUID_1_ECX] = { 1, 0, CPUID_ECX},
[CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX},
[CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX},
[CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX},
[CPUID_7_0_EBX] = { 7, 0, CPUID_EBX},
[CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX},
[CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX},
......
......@@ -4014,6 +4014,26 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
fxstate_size(ctxt));
}
/*
* FXRSTOR might restore XMM registers not provided by the guest. Fill
* in the host registers (via FXSAVE) instead, so they won't be modified.
* (preemption has to stay disabled until FXRSTOR).
*
* Use noinline to keep the stack for other functions called by callers small.
*/
static noinline int fxregs_fixup(struct fxregs_state *fx_state,
const size_t used_size)
{
struct fxregs_state fx_tmp;
int rc;
rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
__fxstate_size(16) - used_size);
return rc;
}
static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
{
struct fxregs_state fx_state;
......@@ -4024,19 +4044,19 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
size = fxstate_size(ctxt);
rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
if (rc != X86EMUL_CONTINUE)
return rc;
ctxt->ops->get_fpu(ctxt);
size = fxstate_size(ctxt);
if (size < __fxstate_size(16)) {
rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
rc = fxregs_fixup(&fx_state, size);
if (rc != X86EMUL_CONTINUE)
goto out;
}
rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
if (rc != X86EMUL_CONTINUE)
goto out;
if (fx_state.mxcsr >> 16) {
rc = emulate_gp(ctxt, 0);
goto out;
......@@ -5000,6 +5020,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
bool op_prefix = false;
bool has_seg_override = false;
struct opcode opcode;
u16 dummy;
struct desc_struct desc;
ctxt->memop.type = OP_NONE;
ctxt->memopp = NULL;
......@@ -5018,6 +5040,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
switch (mode) {
case X86EMUL_MODE_REAL:
case X86EMUL_MODE_VM86:
def_op_bytes = def_ad_bytes = 2;
ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
if (desc.d)
def_op_bytes = def_ad_bytes = 4;
break;
case X86EMUL_MODE_PROT16:
def_op_bytes = def_ad_bytes = 2;
break;
......
......@@ -209,12 +209,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
old_irr = ioapic->irr;
ioapic->irr |= mask;
if (edge)
if (edge) {
ioapic->irr_delivered &= ~mask;
if ((edge && old_irr == ioapic->irr) ||
(!edge && entry.fields.remote_irr)) {
ret = 0;
goto out;
if (old_irr == ioapic->irr) {
ret = 0;
goto out;
}
}
ret = ioapic_service(ioapic, irq, line_status);
......@@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
index == RTC_GSI) {
if (kvm_apic_match_dest(vcpu, NULL, 0,
e->fields.dest_id, e->fields.dest_mode) ||
(e->fields.trig_mode == IOAPIC_EDGE_TRIG &&
kvm_apic_pending_eoi(vcpu, e->fields.vector)))
kvm_apic_pending_eoi(vcpu, e->fields.vector))
__set_bit(e->fields.vector,
ioapic_handled_vectors);
}
......@@ -277,6 +276,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
{
unsigned index;
bool mask_before, mask_after;
int old_remote_irr, old_delivery_status;
union kvm_ioapic_redirect_entry *e;
switch (ioapic->ioregsel) {
......@@ -299,14 +299,28 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
return;
e = &ioapic->redirtbl[index];
mask_before = e->fields.mask;
/* Preserve read-only fields */
old_remote_irr = e->fields.remote_irr;
old_delivery_status = e->fields.delivery_status;
if (ioapic->ioregsel & 1) {
e->bits &= 0xffffffff;
e->bits |= (u64) val << 32;
} else {
e->bits &= ~0xffffffffULL;
e->bits |= (u32) val;
e->fields.remote_irr = 0;
}
e->fields.remote_irr = old_remote_irr;
e->fields.delivery_status = old_delivery_status;
/*
* Some OSes (Linux, Xen) assume that Remote IRR bit will
* be cleared by IOAPIC hardware when the entry is configured
* as edge-triggered. This behavior is used to simulate an
* explicit EOI on IOAPICs that don't have the EOI register.
*/
if (e->fields.trig_mode == IOAPIC_EDGE_TRIG)
e->fields.remote_irr = 0;
mask_after = e->fields.mask;
if (mask_before != mask_after)
kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
......@@ -324,7 +338,9 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
struct kvm_lapic_irq irqe;
int ret;
if (entry->fields.mask)
if (entry->fields.mask ||
(entry->fields.trig_mode == IOAPIC_LEVEL_TRIG &&
entry->fields.remote_irr))
return -1;
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
......
......@@ -266,9 +266,14 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
recalculate_apic_map(apic->vcpu->kvm);
}
static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
{
return ((id >> 4) << 16) | (1 << (id & 0xf));
}
static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
{
u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
u32 ldr = kvm_apic_calc_x2apic_ldr(id);
WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
......@@ -2245,6 +2250,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
{
if (apic_x2apic_mode(vcpu->arch.apic)) {
u32 *id = (u32 *)(s->regs + APIC_ID);
u32 *ldr = (u32 *)(s->regs + APIC_LDR);
if (vcpu->kvm->arch.x2apic_format) {
if (*id != vcpu->vcpu_id)
......@@ -2255,6 +2261,10 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
else
*id <<= 24;
}
/* In x2APIC mode, the LDR is fixed and based on the id */
if (set)
*ldr = kvm_apic_calc_x2apic_ldr(*id);
}
return 0;
......
......@@ -361,6 +361,7 @@ static void recalc_intercepts(struct vcpu_svm *svm)
{
struct vmcb_control_area *c, *h;
struct nested_state *g;
u32 h_intercept_exceptions;
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
......@@ -371,9 +372,14 @@ static void recalc_intercepts(struct vcpu_svm *svm)
h = &svm->nested.hsave->control;
g = &svm->nested;
/* No need to intercept #UD if L1 doesn't intercept it */
h_intercept_exceptions =
h->intercept_exceptions & ~(1U << UD_VECTOR);
c->intercept_cr = h->intercept_cr | g->intercept_cr;
c->intercept_dr = h->intercept_dr | g->intercept_dr;
c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
c->intercept_exceptions =
h_intercept_exceptions | g->intercept_exceptions;
c->intercept = h->intercept | g->intercept;
}
......@@ -2196,7 +2202,10 @@ static int ud_interception(struct vcpu_svm *svm)
{
int er;
WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
if (er == EMULATE_USER_EXIT)
return 0;
if (er != EMULATE_DONE)
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
......
......@@ -1887,7 +1887,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
{
u32 eb;
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
(1u << DB_VECTOR) | (1u << AC_VECTOR);
if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
......@@ -1905,6 +1905,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
*/
if (is_guest_mode(vcpu))
eb |= get_vmcs12(vcpu)->exception_bitmap;
else
eb |= 1u << UD_VECTOR;
vmcs_write32(EXCEPTION_BITMAP, eb);
}
......@@ -5600,7 +5602,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
}
vmcs_writel(GUEST_RFLAGS, 0x02);
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0xfff0);
vmcs_writel(GUEST_GDTR_BASE, 0);
......@@ -5915,11 +5917,10 @@ static int handle_exception(struct kvm_vcpu *vcpu)
return 1; /* already handled by vmx_vcpu_run() */
if (is_invalid_opcode(intr_info)) {
if (is_guest_mode(vcpu)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
WARN_ON_ONCE(is_guest_mode(vcpu));
er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
if (er == EMULATE_USER_EXIT)
return 0;
if (er != EMULATE_DONE)
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
......@@ -6602,7 +6603,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (kvm_test_request(KVM_REQ_EVENT, vcpu))
return 1;
err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
err = emulate_instruction(vcpu, 0);
if (err == EMULATE_USER_EXIT) {
++vcpu->stat.mmio_exits;
......@@ -7414,10 +7415,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
*/
static void free_nested(struct vcpu_vmx *vmx)
{
if (!vmx->nested.vmxon)
if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
return;
vmx->nested.vmxon = false;
vmx->nested.smm.vmxon = false;
free_vpid(vmx->nested.vpid02);
vmx->nested.posted_intr_nv = -1;
vmx->nested.current_vmptr = -1ull;
......@@ -9800,8 +9802,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
/* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */
cr4_fixed1_update(bit(11), ecx, bit(2));
cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
#undef cr4_fixed1_update
}
......@@ -10875,6 +10876,11 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
return 1;
}
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) ||
(vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))
return 1;
return 0;
}
......@@ -11099,13 +11105,12 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qual;
if (kvm_event_needs_reinjection(vcpu))
return -EBUSY;
bool block_nested_events =
vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
if (vcpu->arch.exception.pending &&
nested_vmx_check_exception(vcpu, &exit_qual)) {
if (vmx->nested.nested_run_pending)
if (block_nested_events)
return -EBUSY;
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
vcpu->arch.exception.pending = false;
......@@ -11114,14 +11119,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
vmx->nested.preemption_timer_expired) {
if (vmx->nested.nested_run_pending)
if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
return 0;
}
if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
if (vmx->nested.nested_run_pending)
if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
NMI_VECTOR | INTR_TYPE_NMI_INTR |
......@@ -11137,7 +11142,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
nested_exit_on_intr(vcpu)) {
if (vmx->nested.nested_run_pending)
if (block_nested_events)
return -EBUSY;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
return 0;
......@@ -11324,6 +11329,24 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
kvm_clear_interrupt_queue(vcpu);
}
static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
u32 entry_failure_code;
nested_ept_uninit_mmu_context(vcpu);
/*
* Only PDPTE load can fail as the value of cr3 was checked on entry and
* couldn't have changed.
*/
if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
}
/*
* A part of what we need to when the nested L2 guest exits and we want to
* run its L1 parent, is to reset L1's guest state to the host state specified
......@@ -11337,7 +11360,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
struct kvm_segment seg;
u32 entry_failure_code;
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
vcpu->arch.efer = vmcs12->host_ia32_efer;
......@@ -11364,17 +11386,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
vmx_set_cr4(vcpu, vmcs12->host_cr4);
nested_ept_uninit_mmu_context(vcpu);
/*
* Only PDPTE load can fail as the value of cr3 was checked on entry and
* couldn't have changed.
*/
if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
load_vmcs12_mmu_host_state(vcpu, vmcs12);
if (enable_vpid) {
/*
......@@ -11604,6 +11616,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
* accordingly.
*/
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
load_vmcs12_mmu_host_state(vcpu, vmcs12);
/*
* The emulated instruction was already skipped in
* nested_vmx_run, but the updated RIP was never
......
......@@ -107,6 +107,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
static bool __read_mostly ignore_msrs = 0;
module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
static bool __read_mostly report_ignored_msrs = true;
module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
unsigned int min_timer_period_us = 500;
module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
......@@ -1795,10 +1798,13 @@ u64 get_kvmclock_ns(struct kvm *kvm)
/* both __this_cpu_read() and rdtsc() should be on the same cpu */
get_cpu();
kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
&hv_clock.tsc_shift,
&hv_clock.tsc_to_system_mul);
ret = __pvclock_read_cycles(&hv_clock, rdtsc());
if (__this_cpu_read(cpu_tsc_khz)) {
kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
&hv_clock.tsc_shift,
&hv_clock.tsc_to_system_mul);
ret = __pvclock_read_cycles(&hv_clock, rdtsc());
} else
ret = ktime_get_boot_ns() + ka->kvmclock_offset;
put_cpu();
......@@ -1830,6 +1836,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
*/
BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
if (guest_hv_clock.version & 1)
++guest_hv_clock.version; /* first time write, random junk */
vcpu->hv_clock.version = guest_hv_clock.version + 1;
kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
&vcpu->hv_clock,
......@@ -2322,7 +2331,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* Drop writes to this legacy MSR -- see rdmsr
* counterpart for further detail.
*/
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
if (report_ignored_msrs)
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
msr, data);
break;
case MSR_AMD64_OSVW_ID_LENGTH:
if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
......@@ -2359,8 +2370,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr, data);
return 1;
} else {
vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
msr, data);
if (report_ignored_msrs)
vcpu_unimpl(vcpu,
"ignored wrmsr: 0x%x data 0x%llx\n",
msr, data);
break;
}
}
......@@ -2578,7 +2591,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->index);
return 1;
} else {
vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
if (report_ignored_msrs)
vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
msr_info->index);
msr_info->data = 0;
}
break;
......@@ -5430,7 +5445,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
r = EMULATE_FAIL;
r = EMULATE_USER_EXIT;
}
kvm_queue_exception(vcpu, UD_VECTOR);
......@@ -5722,6 +5737,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
emulation_type))
return EMULATE_DONE;
if (ctxt->have_exception && inject_emulated_exception(vcpu))
return EMULATE_DONE;
if (emulation_type & EMULTYPE_SKIP)
return EMULATE_FAIL;
return handle_emulation_failure(vcpu);
......@@ -7250,12 +7267,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct fpu *fpu = &current->thread.fpu;
int r;
sigset_t sigsaved;
fpu__initialize(fpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
kvm_sigset_activate(vcpu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
if (kvm_run->immediate_exit) {
......@@ -7298,8 +7313,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
out:
post_kvm_run_save(vcpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
kvm_sigset_deactivate(vcpu);
return r;
}
......
......@@ -715,6 +715,9 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,