Commit 27b025eb authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-vmx-6.3' of https://github.com/kvm-x86/linux into HEAD

KVM VMX changes for 6.3:

 - Handle NMI VM-Exits before leaving the noinstr region

 - A few trivial cleanups in the VM-Enter flows

 - Stop enabling VMFUNC for L1 purely to document that KVM doesn't support
   EPTP switching (or any other VM function) for L1

 - Fix a crash when using eVMCS's enlighted MSR bitmaps
parents 4bc6dcaa 93827a0a
Loading
Loading
Loading
Loading
+6 −10
Original line number Diff line number Diff line
@@ -582,18 +582,14 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC, xenpv_exc_machine_check);

/* NMI */

#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
#if IS_ENABLED(CONFIG_KVM_INTEL)
/*
 * Special NOIST entry point for VMX which invokes this on the kernel
 * stack. asm_exc_nmi() requires an IST to work correctly vs. the NMI
 * 'executing' marker.
 *
 * On 32bit this just uses the regular NMI entry point because 32-bit does
 * not have ISTs.
 * Special entry point for VMX which invokes this on the kernel stack, even for
 * 64-bit, i.e. without using an IST.  asm_exc_nmi() requires an IST to work
 * correctly vs. the NMI 'executing' marker.  Used for 32-bit kernels as well
 * to avoid more ifdeffery.
 */
DECLARE_IDTENTRY(X86_TRAP_NMI,		exc_nmi_noist);
#else
#define asm_exc_nmi_noist		asm_exc_nmi
DECLARE_IDTENTRY(X86_TRAP_NMI,		exc_nmi_kvm_vmx);
#endif

DECLARE_IDTENTRY_NMI(X86_TRAP_NMI,	exc_nmi);
+4 −4
Original line number Diff line number Diff line
@@ -527,14 +527,14 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
		mds_user_clear_cpu_buffers();
}

#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
DEFINE_IDTENTRY_RAW(exc_nmi_noist)
#if IS_ENABLED(CONFIG_KVM_INTEL)
DEFINE_IDTENTRY_RAW(exc_nmi_kvm_vmx)
{
	exc_nmi(regs);
}
#endif
#if IS_MODULE(CONFIG_KVM_INTEL)
EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx);
#endif
#endif

void stop_nmi(void)
+12 −0
Original line number Diff line number Diff line
@@ -75,6 +75,18 @@ static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
}

/*
 * kvm_register_test_and_mark_available() is a special snowflake that uses an
 * arch bitop directly to avoid the explicit instrumentation that comes with
 * the generic bitops.  This allows code that cannot be instrumented (noinstr
 * functions), e.g. the low level VM-Enter/VM-Exit paths, to cache registers.
 */
static __always_inline bool kvm_register_test_and_mark_available(struct kvm_vcpu *vcpu,
								 enum kvm_reg reg)
{
	return arch___test_and_set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
}

/*
 * The "raw" register helpers are only for cases where the full 64 bits of a
 * register are read/written irrespective of current vCPU mode.  In other words,
+10 −21
Original line number Diff line number Diff line
@@ -196,7 +196,7 @@ static __always_inline void evmcs_write64(unsigned long field, u64 value)
	current_evmcs->hv_clean_fields &= ~clean_field;
}

static inline void evmcs_write32(unsigned long field, u32 value)
static __always_inline void evmcs_write32(unsigned long field, u32 value)
{
	u16 clean_field;
	int offset = get_evmcs_offset(field, &clean_field);
@@ -208,7 +208,7 @@ static inline void evmcs_write32(unsigned long field, u32 value)
	current_evmcs->hv_clean_fields &= ~clean_field;
}

static inline void evmcs_write16(unsigned long field, u16 value)
static __always_inline void evmcs_write16(unsigned long field, u16 value)
{
	u16 clean_field;
	int offset = get_evmcs_offset(field, &clean_field);
@@ -220,7 +220,7 @@ static inline void evmcs_write16(unsigned long field, u16 value)
	current_evmcs->hv_clean_fields &= ~clean_field;
}

static inline u64 evmcs_read64(unsigned long field)
static __always_inline u64 evmcs_read64(unsigned long field)
{
	int offset = get_evmcs_offset(field, NULL);

@@ -230,7 +230,7 @@ static inline u64 evmcs_read64(unsigned long field)
	return *(u64 *)((char *)current_evmcs + offset);
}

static inline u32 evmcs_read32(unsigned long field)
static __always_inline u32 evmcs_read32(unsigned long field)
{
	int offset = get_evmcs_offset(field, NULL);

@@ -240,7 +240,7 @@ static inline u32 evmcs_read32(unsigned long field)
	return *(u32 *)((char *)current_evmcs + offset);
}

static inline u16 evmcs_read16(unsigned long field)
static __always_inline u16 evmcs_read16(unsigned long field)
{
	int offset = get_evmcs_offset(field, NULL);

@@ -250,16 +250,6 @@ static inline u16 evmcs_read16(unsigned long field)
	return *(u16 *)((char *)current_evmcs + offset);
}

static inline void evmcs_touch_msr_bitmap(void)
{
	if (unlikely(!current_evmcs))
		return;

	if (current_evmcs->hv_enlightenments_control.msr_bitmap)
		current_evmcs->hv_clean_fields &=
			~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
}

static inline void evmcs_load(u64 phys_addr)
{
	struct hv_vp_assist_page *vp_ap =
@@ -274,13 +264,12 @@ static inline void evmcs_load(u64 phys_addr)
void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf);
#else /* !IS_ENABLED(CONFIG_HYPERV) */
static __always_inline void evmcs_write64(unsigned long field, u64 value) {}
static inline void evmcs_write32(unsigned long field, u32 value) {}
static inline void evmcs_write16(unsigned long field, u16 value) {}
static inline u64 evmcs_read64(unsigned long field) { return 0; }
static inline u32 evmcs_read32(unsigned long field) { return 0; }
static inline u16 evmcs_read16(unsigned long field) { return 0; }
static __always_inline void evmcs_write32(unsigned long field, u32 value) {}
static __always_inline void evmcs_write16(unsigned long field, u16 value) {}
static __always_inline u64 evmcs_read64(unsigned long field) { return 0; }
static __always_inline u32 evmcs_read32(unsigned long field) { return 0; }
static __always_inline u16 evmcs_read16(unsigned long field) { return 0; }
static inline void evmcs_load(u64 phys_addr) {}
static inline void evmcs_touch_msr_bitmap(void) {}
#endif /* IS_ENABLED(CONFIG_HYPERV) */

#define EVMPTR_INVALID (-1ULL)
+8 −13
Original line number Diff line number Diff line
@@ -5864,11 +5864,10 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
	u32 function = kvm_rax_read(vcpu);

	/*
	 * VMFUNC is only supported for nested guests, but we always enable the
	 * secondary control for simplicity; for non-nested mode, fake that we
	 * didn't by injecting #UD.
	 * VMFUNC should never execute cleanly while L1 is active; KVM supports
	 * VMFUNC for nested VMs, but not for L1.
	 */
	if (!is_guest_mode(vcpu)) {
	if (WARN_ON_ONCE(!is_guest_mode(vcpu))) {
		kvm_queue_exception(vcpu, UD_VECTOR);
		return 1;
	}
@@ -6881,6 +6880,7 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
		SECONDARY_EXEC_RDRAND_EXITING |
		SECONDARY_EXEC_ENABLE_INVPCID |
		SECONDARY_EXEC_ENABLE_VMFUNC |
		SECONDARY_EXEC_RDSEED_EXITING |
		SECONDARY_EXEC_XSAVES |
		SECONDARY_EXEC_TSC_SCALING |
@@ -6913,18 +6913,13 @@ void nested_vmx_setup_ctls_msrs(struct vmcs_config *vmcs_conf, u32 ept_caps)
				SECONDARY_EXEC_ENABLE_PML;
			msrs->ept_caps |= VMX_EPT_AD_BIT;
		}
	}

	if (cpu_has_vmx_vmfunc()) {
		msrs->secondary_ctls_high |=
			SECONDARY_EXEC_ENABLE_VMFUNC;
		/*
		 * Advertise EPTP switching unconditionally
		 * since we emulate it
		 * Advertise EPTP switching irrespective of hardware support,
		 * KVM emulates it in software so long as VMFUNC is supported.
		 */
		if (enable_ept)
			msrs->vmfunc_controls =
				VMX_VMFUNC_EPTP_SWITCHING;
		if (cpu_has_vmx_vmfunc())
			msrs->vmfunc_controls = VMX_VMFUNC_EPTP_SWITCHING;
	}

	/*
Loading