Commit 3c1a4322 authored by Nicholas Piggin's avatar Nicholas Piggin Committed by Michael Ellerman
Browse files

KVM: PPC: Book3S HV: Change dec_expires to be relative to guest timebase



Change dec_expires to be relative to the guest timebase, and allow
it to be moved into low level P9 guest entry functions, to improve
SPR access scheduling.

Signed-off-by: default avatarNicholas Piggin <npiggin@gmail.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20211123095231.1036501-23-npiggin@gmail.com
parent cf99dedb
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -406,6 +406,12 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
	return vcpu->arch.fault_dar;
}

/* Expiry time of vcpu DEC relative to host TB */
static inline u64 kvmppc_dec_expires_host_tb(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.dec_expires - vcpu->arch.vcore->tb_offset;
}

static inline bool is_kvmppc_resume_guest(int r)
{
	return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
+1 −1
Original line number Diff line number Diff line
@@ -741,7 +741,7 @@ struct kvm_vcpu_arch {

	struct hrtimer dec_timer;
	u64 dec_jiffies;
	u64 dec_expires;
	u64 dec_expires;	/* Relative to guest timebase. */
	unsigned long pending_exceptions;
	u8 ceded;
	u8 prodded;
+30 −28
Original line number Diff line number Diff line
@@ -2261,8 +2261,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
		*val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
		break;
	case KVM_REG_PPC_DEC_EXPIRY:
		*val = get_reg_val(id, vcpu->arch.dec_expires +
				   vcpu->arch.vcore->tb_offset);
		*val = get_reg_val(id, vcpu->arch.dec_expires);
		break;
	case KVM_REG_PPC_ONLINE:
		*val = get_reg_val(id, vcpu->arch.online);
@@ -2514,8 +2513,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
		r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
		break;
	case KVM_REG_PPC_DEC_EXPIRY:
		vcpu->arch.dec_expires = set_reg_val(id, *val) -
			vcpu->arch.vcore->tb_offset;
		vcpu->arch.dec_expires = set_reg_val(id, *val);
		break;
	case KVM_REG_PPC_ONLINE:
		i = set_reg_val(id, *val);
@@ -2902,13 +2900,13 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
	unsigned long dec_nsec, now;

	now = get_tb();
	if (now > vcpu->arch.dec_expires) {
	if (now > kvmppc_dec_expires_host_tb(vcpu)) {
		/* decrementer has already gone negative */
		kvmppc_core_queue_dec(vcpu);
		kvmppc_core_prepare_to_enter(vcpu);
		return;
	}
	dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now);
	dec_nsec = tb_to_ns(kvmppc_dec_expires_host_tb(vcpu) - now);
	hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
	vcpu->arch.timer_running = 1;
}
@@ -3380,7 +3378,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
		 */
		spin_unlock(&vc->lock);
		/* cancel pending dec exception if dec is positive */
		if (now < vcpu->arch.dec_expires &&
		if (now < kvmppc_dec_expires_host_tb(vcpu) &&
		    kvmppc_core_pending_dec(vcpu))
			kvmppc_core_dequeue_dec(vcpu);

@@ -4224,20 +4222,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,

	load_spr_state(vcpu);

	/*
	 * When setting DEC, we must always deal with irq_work_raise via NMI vs
	 * setting DEC. The problem occurs right as we switch into guest mode
	 * if a NMI hits and sets pending work and sets DEC, then that will
	 * apply to the guest and not bring us back to the host.
	 *
	 * irq_work_raise could check a flag (or possibly LPCR[HDICE] for
	 * example) and set HDEC to 1? That wouldn't solve the nested hv
	 * case which needs to abort the hcall or zero the time limit.
	 *
	 * XXX: Another day's problem.
	 */
	mtspr(SPRN_DEC, vcpu->arch.dec_expires - tb);

	if (kvmhv_on_pseries()) {
		/*
		 * We need to save and restore the guest visible part of the
@@ -4263,6 +4247,23 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
			hvregs.vcpu_token = vcpu->vcpu_id;
		}
		hvregs.hdec_expiry = time_limit;

		/*
		 * When setting DEC, we must always deal with irq_work_raise
		 * via NMI vs setting DEC. The problem occurs right as we
		 * switch into guest mode if a NMI hits and sets pending work
		 * and sets DEC, then that will apply to the guest and not
		 * bring us back to the host.
		 *
		 * irq_work_raise could check a flag (or possibly LPCR[HDICE]
		 * for example) and set HDEC to 1? That wouldn't solve the
		 * nested hv case which needs to abort the hcall or zero the
		 * time limit.
		 *
		 * XXX: Another day's problem.
		 */
		mtspr(SPRN_DEC, kvmppc_dec_expires_host_tb(vcpu) - tb);

		mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
		mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
		trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
@@ -4274,6 +4275,12 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
		vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
		mtspr(SPRN_PSSCR_PR, host_psscr);

		dec = mfspr(SPRN_DEC);
		if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
			dec = (s32) dec;
		tb = mftb();
		vcpu->arch.dec_expires = dec + (tb + vc->tb_offset);

		/* H_CEDE has to be handled now, not later */
		if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
		    kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
@@ -4281,6 +4288,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
			kvmppc_set_gpr(vcpu, 3, 0);
			trap = 0;
		}

	} else {
		kvmppc_xive_push_vcpu(vcpu);
		trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr);
@@ -4312,12 +4320,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
			vcpu->arch.slb_max = 0;
	}

	dec = mfspr(SPRN_DEC);
	if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
		dec = (s32) dec;
	tb = mftb();
	vcpu->arch.dec_expires = dec + tb;

	store_spr_state(vcpu);

	restore_p9_host_os_sprs(vcpu, &host_os_sprs);
@@ -4827,7 +4829,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
	 * by L2 and the L1 decrementer is provided in hdec_expires
	 */
	if (kvmppc_core_pending_dec(vcpu) &&
			((get_tb() < vcpu->arch.dec_expires) ||
			((get_tb() < kvmppc_dec_expires_host_tb(vcpu)) ||
			 (trap == BOOK3S_INTERRUPT_SYSCALL &&
			  kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
		kvmppc_core_dequeue_dec(vcpu);
+3 −0
Original line number Diff line number Diff line
@@ -358,6 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
	/* convert TB values/offsets to host (L0) values */
	hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
	vc->tb_offset += l2_hv.tb_offset;
	vcpu->arch.dec_expires += l2_hv.tb_offset;

	/* set L1 state to L2 state */
	vcpu->arch.nested = l2;
@@ -399,6 +400,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
	if (l2_regs.msr & MSR_TS_MASK)
		vcpu->arch.shregs.msr |= MSR_TS_S;
	vc->tb_offset = saved_l1_hv.tb_offset;
	/* XXX: is this always the same delta as saved_l1_hv.tb_offset? */
	vcpu->arch.dec_expires -= l2_hv.tb_offset;
	restore_hv_regs(vcpu, &saved_l1_hv);
	vcpu->arch.purr += delta_purr;
	vcpu->arch.spurr += delta_spurr;
+9 −1
Original line number Diff line number Diff line
@@ -188,7 +188,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
	struct kvm *kvm = vcpu->kvm;
	struct kvm_nested_guest *nested = vcpu->arch.nested;
	struct kvmppc_vcore *vc = vcpu->arch.vcore;
	s64 hdec;
	s64 hdec, dec;
	u64 tb, purr, spurr;
	u64 *exsave;
	bool ri_set;
@@ -317,6 +317,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
	 */
	mtspr(SPRN_HDEC, hdec);

	mtspr(SPRN_DEC, vcpu->arch.dec_expires - tb);

#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
tm_return_to_guest:
#endif
@@ -461,6 +463,12 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
	vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
	vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);

	dec = mfspr(SPRN_DEC);
	if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
		dec = (s32) dec;
	tb = mftb();
	vcpu->arch.dec_expires = dec + tb;

	/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
	mtspr(SPRN_PSSCR, host_psscr |
	      (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
Loading