Commit 6819af75 authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini
Browse files

KVM: x86: Clean up and document nested #PF workaround



Replace the per-vendor hack-a-fix for KVM's #PF => #PF => #DF workaround
with an explicit, common workaround in kvm_inject_emulated_page_fault().
Aside from being a hack, the current approach is brittle and incomplete,
e.g. nSVM's KVM_SET_NESTED_STATE fails to set ->inject_page_fault(),
and nVMX fails to apply the workaround when VMX is intercepting #PF due
to allow_smaller_maxphyaddr=1.

Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 25cc0565
Loading
Loading
Loading
Loading
+2 −0
Original line number Original line Diff line number Diff line
@@ -1516,6 +1516,8 @@ struct kvm_x86_ops {
struct kvm_x86_nested_ops {
struct kvm_x86_nested_ops {
	void (*leave_nested)(struct kvm_vcpu *vcpu);
	void (*leave_nested)(struct kvm_vcpu *vcpu);
	int (*check_events)(struct kvm_vcpu *vcpu);
	int (*check_events)(struct kvm_vcpu *vcpu);
	bool (*handle_page_fault_workaround)(struct kvm_vcpu *vcpu,
					     struct x86_exception *fault);
	bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
	bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
	void (*triple_fault)(struct kvm_vcpu *vcpu);
	void (*triple_fault)(struct kvm_vcpu *vcpu);
	int (*get_state)(struct kvm_vcpu *vcpu,
	int (*get_state)(struct kvm_vcpu *vcpu,
+9 −9
Original line number Original line Diff line number Diff line
@@ -55,7 +55,8 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
	nested_svm_vmexit(svm);
	nested_svm_vmexit(svm);
}
}


static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_exception *fault)
static bool nested_svm_handle_page_fault_workaround(struct kvm_vcpu *vcpu,
						    struct x86_exception *fault)
{
{
	struct vcpu_svm *svm = to_svm(vcpu);
	struct vcpu_svm *svm = to_svm(vcpu);
	struct vmcb *vmcb = svm->vmcb;
	struct vmcb *vmcb = svm->vmcb;
@@ -64,15 +65,16 @@ static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_excep


	if (vmcb12_is_intercept(&svm->nested.ctl,
	if (vmcb12_is_intercept(&svm->nested.ctl,
				INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
				INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
				!svm->nested.nested_run_pending) {
	    !WARN_ON_ONCE(svm->nested.nested_run_pending)) {
	     	vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
	     	vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
		vmcb->control.exit_code_hi = 0;
		vmcb->control.exit_code_hi = 0;
		vmcb->control.exit_info_1 = fault->error_code;
		vmcb->control.exit_info_1 = fault->error_code;
		vmcb->control.exit_info_2 = fault->address;
		vmcb->control.exit_info_2 = fault->address;
		nested_svm_vmexit(svm);
		nested_svm_vmexit(svm);
	} else {
		return true;
		kvm_inject_page_fault(vcpu, fault);
	}
	}

	return false;
}
}


static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
@@ -751,9 +753,6 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
	if (ret)
	if (ret)
		return ret;
		return ret;


	if (!npt_enabled)
		vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested;

	if (!from_vmrun)
	if (!from_vmrun)
		kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
		kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);


@@ -1659,6 +1658,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
struct kvm_x86_nested_ops svm_nested_ops = {
struct kvm_x86_nested_ops svm_nested_ops = {
	.leave_nested = svm_leave_nested,
	.leave_nested = svm_leave_nested,
	.check_events = svm_check_nested_events,
	.check_events = svm_check_nested_events,
	.handle_page_fault_workaround = nested_svm_handle_page_fault_workaround,
	.triple_fault = nested_svm_triple_fault,
	.triple_fault = nested_svm_triple_fault,
	.get_nested_state_pages = svm_get_nested_state_pages,
	.get_nested_state_pages = svm_get_nested_state_pages,
	.get_state = svm_get_nested_state,
	.get_state = svm_get_nested_state,
+6 −9
Original line number Original line Diff line number Diff line
@@ -476,8 +476,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
	return 0;
	return 0;
}
}



static bool nested_vmx_handle_page_fault_workaround(struct kvm_vcpu *vcpu,
static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
						    struct x86_exception *fault)
						    struct x86_exception *fault)
{
{
	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -485,15 +484,15 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
	WARN_ON(!is_guest_mode(vcpu));
	WARN_ON(!is_guest_mode(vcpu));


	if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
	if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
		!to_vmx(vcpu)->nested.nested_run_pending) {
	    !WARN_ON_ONCE(to_vmx(vcpu)->nested.nested_run_pending)) {
		vmcs12->vm_exit_intr_error_code = fault->error_code;
		vmcs12->vm_exit_intr_error_code = fault->error_code;
		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
				  PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
				  PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
				  INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
				  INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
				  fault->address);
				  fault->address);
	} else {
		return true;
		kvm_inject_page_fault(vcpu, fault);
	}
	}
	return false;
}
}


static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
@@ -2614,9 +2613,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
	}
	}


	if (!enable_ept)
		vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;

	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
	    WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
	    WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
				     vmcs12->guest_ia32_perf_global_ctrl))) {
				     vmcs12->guest_ia32_perf_global_ctrl))) {
@@ -6830,6 +6826,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
struct kvm_x86_nested_ops vmx_nested_ops = {
struct kvm_x86_nested_ops vmx_nested_ops = {
	.leave_nested = vmx_leave_nested,
	.leave_nested = vmx_leave_nested,
	.check_events = vmx_check_nested_events,
	.check_events = vmx_check_nested_events,
	.handle_page_fault_workaround = nested_vmx_handle_page_fault_workaround,
	.hv_timer_pending = nested_vmx_preemption_timer_pending,
	.hv_timer_pending = nested_vmx_preemption_timer_pending,
	.triple_fault = nested_vmx_triple_fault,
	.triple_fault = nested_vmx_triple_fault,
	.get_state = vmx_get_nested_state,
	.get_state = vmx_get_nested_state,
+20 −1
Original line number Original line Diff line number Diff line
@@ -748,6 +748,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
}
}
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);


/* Returns true if the page fault was immediately morphed into a VM-Exit. */
bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
				    struct x86_exception *fault)
				    struct x86_exception *fault)
{
{
@@ -766,8 +767,26 @@ bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
		kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
		kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
				       fault_mmu->root.hpa);
				       fault_mmu->root.hpa);


	/*
	 * A workaround for KVM's bad exception handling.  If KVM injected an
	 * exception into L2, and L2 encountered a #PF while vectoring the
	 * injected exception, manually check to see if L1 wants to intercept
	 * #PF, otherwise queuing the #PF will lead to #DF or a lost exception.
	 * In all other cases, defer the check to nested_ops->check_events(),
	 * which will correctly handle priority (this does not).  Note, other
	 * exceptions, e.g. #GP, are theoretically affected, #PF is simply the
	 * most problematic, e.g. when L0 and L1 are both intercepting #PF for
	 * shadow paging.
	 *
	 * TODO: Rewrite exception handling to track injected and pending
	 *       (VM-Exit) exceptions separately.
	 */
	if (unlikely(vcpu->arch.exception.injected && is_guest_mode(vcpu)) &&
	    kvm_x86_ops.nested_ops->handle_page_fault_workaround(vcpu, fault))
		return true;

	fault_mmu->inject_page_fault(vcpu, fault);
	fault_mmu->inject_page_fault(vcpu, fault);
	return fault->nested_page_fault;
	return false;
}
}
EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);
EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);