Commit bb4f196b authored by Ruijing Dong's avatar Ruijing Dong Committed by Alex Deucher
Browse files

drm/amdgpu/vcn: support unified queue only in vcn4



- remove multiple queue support
- add unified queue related functions

Acked-by: default avatarLeo Liu <leo.liu@amd.com>
Signed-off-by: default avatarRuijing Dong <ruijing.dong@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 4ed49c95
Loading
Loading
Loading
Loading
+140 −423
Original line number Diff line number Diff line
@@ -29,7 +29,6 @@
#include "soc15d.h"
#include "soc15_hw_ip.h"
#include "vcn_v2_0.h"
#include "vcn_sw_ring.h"

#include "vcn/vcn_4_0_0_offset.h"
#include "vcn/vcn_4_0_0_sh_mask.h"
@@ -45,15 +44,12 @@
#define VCN_VID_SOC_ADDRESS_2_0							0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0						0x48300

bool unifiedQ_enabled = false;

static int amdgpu_ih_clientid_vcns[] = {
	SOC15_IH_CLIENTID_VCN,
	SOC15_IH_CLIENTID_VCN1
};

static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
static int vcn_v4_0_set_powergating_state(void *handle,
        enum amd_powergating_state state);
@@ -71,36 +67,15 @@ static int vcn_v4_0_early_init(void *handle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

	if (unifiedQ_enabled) {
		adev->vcn.num_vcn_inst = 1;
	/* re-use enc ring as unified ring */
	adev->vcn.num_enc_rings = 1;
	} else {
		adev->vcn.num_enc_rings = 2;
	}

	if (!unifiedQ_enabled)
		vcn_v4_0_set_dec_ring_funcs(adev);

	vcn_v4_0_set_enc_ring_funcs(adev);
	vcn_v4_0_set_unified_ring_funcs(adev);
	vcn_v4_0_set_irq_funcs(adev);

	return 0;
}

static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev)
{
	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
		const struct common_firmware_header *hdr;

		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
		adev->firmware.fw_size +=
			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
		DRM_INFO("PSP loading VCN firmware\n");
	}
}

/**
 * vcn_v4_0_sw_init - sw init for VCN block
 *
@@ -111,16 +86,13 @@ static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev)
static int vcn_v4_0_sw_init(void *handle)
{
	struct amdgpu_ring *ring;
	int i, j, r;
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
	int i, r;

	r = amdgpu_vcn_sw_init(adev);
	if (r)
		return r;

	if (unifiedQ_enabled)
		amdgpu_vcn_setup_unified_queue_ucode(adev);
	else
	amdgpu_vcn_setup_ucode(adev);

	r = amdgpu_vcn_resume(adev);
@@ -129,81 +101,40 @@ static int vcn_v4_0_sw_init(void *handle)

	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
		volatile struct amdgpu_vcn4_fw_shared *fw_shared;

		if (adev->vcn.harvest_config & (1 << i))
			continue;
		/* VCN DEC TRAP */
		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
				VCN_4_0__SRCID__UVD_TRAP, &adev->vcn.inst[i].irq);
		if (r)
			return r;

		atomic_set(&adev->vcn.inst[i].sched_score, 0);
		if (!unifiedQ_enabled) {
			ring = &adev->vcn.inst[i].ring_dec;
			ring->use_doorbell = true;

			/* VCN4 doorbell layout
			 * 1: VCN_JPEG_DB_CTRL UVD_JRBC_RB_WPTR; (jpeg)
			 * 2: VCN_RB1_DB_CTRL  UVD_RB_WPTR; (decode/encode for unified queue)
			 * 3: VCN_RB2_DB_CTRL  UVD_RB_WPTR2; (encode only for swqueue)
			 * 4: VCN_RB3_DB_CTRL  UVD_RB_WPTR3; (Reserved)
			 * 5: VCN_RB4_DB_CTRL  UVD_RB_WPTR4; (decode only for swqueue)
			 */

			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1)
						+ 5 + 8 * i;

			sprintf(ring->name, "vcn_dec_%d", i);
			r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
					     AMDGPU_RING_PRIO_DEFAULT,
					     &adev->vcn.inst[i].sched_score);
			if (r)
				return r;
		}
		for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
			/* VCN ENC TRAP */
		/* VCN UNIFIED TRAP */
		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
				j + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
				VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
		if (r)
			return r;

			ring = &adev->vcn.inst[i].ring_enc[j];
		ring = &adev->vcn.inst[i].ring_enc[0];
		ring->use_doorbell = true;
		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;

			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;

			if (unifiedQ_enabled) {
				sprintf(ring->name, "vcn_unified%d", i);
				r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0,
				     AMDGPU_RING_PRIO_DEFAULT, NULL);
			} else {
				enum amdgpu_ring_priority_level hw_prio;
		sprintf(ring->name, "vcn_unified_%d", i);

				hw_prio = amdgpu_vcn_get_enc_ring_prio(j);
				sprintf(ring->name, "vcn_enc_%d.%d", i, j);
		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
					     hw_prio, &adev->vcn.inst[i].sched_score);
			}
				  AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score);
		if (r)
			return r;
		}

		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
		fw_shared->present_flag_0 = 0;

		if (unifiedQ_enabled) {
			fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
		fw_shared->sq.is_enabled = 1;
		}

		if (amdgpu_vcnfw_log)
			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
	}

	if (!unifiedQ_enabled) {
	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
		adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;
	}

	return 0;
}

@@ -254,15 +185,13 @@ static int vcn_v4_0_hw_init(void *handle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
	struct amdgpu_ring *ring;
	int i, j, r;
	int i, r;

	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
		if (adev->vcn.harvest_config & (1 << i))
			continue;
		if (unifiedQ_enabled)

		ring = &adev->vcn.inst[i].ring_enc[0];
		else
			ring = &adev->vcn.inst[i].ring_dec;

		adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
				((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i);
@@ -270,13 +199,6 @@ static int vcn_v4_0_hw_init(void *handle)
		r = amdgpu_ring_test_helper(ring);
		if (r)
			goto done;

		for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
			ring = &adev->vcn.inst[i].ring_enc[j];
			r = amdgpu_ring_test_helper(ring);
			if (r)
				goto done;
		}
	}

done:
@@ -464,7 +386,6 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
			VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0),
			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);

	}

	if (!indirect)
@@ -888,7 +809,6 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
	volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
	struct amdgpu_ring *ring;
	uint32_t tmp;
	int i;

	/* disable register anti-hang mechanism */
	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1,
@@ -974,74 +894,32 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
			(uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
				(uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));

	if (unifiedQ_enabled) {
	ring = &adev->vcn.inst[inst_idx].ring_enc[0];
		fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
	} else
		ring = &adev->vcn.inst[inst_idx].ring_dec;

	WREG32_SOC15(VCN, inst_idx, regVCN_RB4_DB_CTRL,
		ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT |
		VCN_RB4_DB_CTRL__EN_MASK);

	/* program the RB_BASE for ring buffer */
	WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO4,
		lower_32_bits(ring->gpu_addr));
	WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI4,
		upper_32_bits(ring->gpu_addr));

	WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t));

	/* reseting ring, fw should not check RB ring */
	tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
	tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK);
	WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);

	/* Initialize the ring buffer's read and write pointers */
	tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR4);
	WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4, tmp);
	ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4);
	WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
	WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
	WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);

	tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
	tmp |= VCN_RB_ENABLE__RB4_EN_MASK;
	tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
	WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
	fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
	WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0);
	WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0);

	WREG32_SOC15(VCN, inst_idx, regUVD_SCRATCH2, 0);

	if (unifiedQ_enabled)
		fw_shared->sq.queue_mode &= ~FW_QUEUE_RING_RESET;

	for (i = 0; i < adev->vcn.num_enc_rings; i++) {
		ring = &adev->vcn.inst[inst_idx].ring_enc[i];

		if (i) {
			ring = &adev->vcn.inst[inst_idx].ring_enc[1];

			WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO2, ring->gpu_addr);
			WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
			WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE2, ring->ring_size / 4);
			tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR2);
			WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2, tmp);
			ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2);

			WREG32_SOC15(VCN, inst_idx, regVCN_RB2_DB_CTRL,
				ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT |
				VCN_RB2_DB_CTRL__EN_MASK);
		} else {
			ring = &adev->vcn.inst[inst_idx].ring_enc[0];

			WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr);
			WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
			WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4);
	tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR);
	WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp);
	ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);

	tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE);
	tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
	WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp);
	fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);

	WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL,
			ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
			VCN_RB1_DB_CTRL__EN_MASK);
		}
	}

	return 0;
}

@@ -1064,6 +942,8 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
		amdgpu_dpm_enable_uvd(adev, true);

	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;

		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
			r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
			continue;
@@ -1166,7 +1046,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
				if (status & 2)
					break;

				dev_err(adev->dev, "VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
				dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i);
				WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL),
						UVD_VCPU_CNTL__BLK_RST_MASK,
						~UVD_VCPU_CNTL__BLK_RST_MASK);
@@ -1180,7 +1060,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
		}

		if (r) {
			dev_err(adev->dev, "VCN[%d] decode not responding, giving up!!!\n", i);
			dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i);
			return r;
		}

@@ -1193,65 +1073,30 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
		WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0,
				~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));

		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
		if (unifiedQ_enabled) {
		ring = &adev->vcn.inst[i].ring_enc[0];
			fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
		} else {
			ring = &adev->vcn.inst[i].ring_dec;

			WREG32_SOC15(VCN, i, regVCN_RB4_DB_CTRL,
				ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT |
				VCN_RB4_DB_CTRL__EN_MASK);

			/* program the RB_BASE for ring buffer */
			WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO4,
				lower_32_bits(ring->gpu_addr));
			WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI4,
				upper_32_bits(ring->gpu_addr));

			WREG32_SOC15(VCN, i, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t));

			/* resetting ring, fw should not check RB ring */
			tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
			tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK);
			WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
		WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
				ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
				VCN_RB1_DB_CTRL__EN_MASK);

			/* Initialize the ring buffer's read and write pointers */
			tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4);
			WREG32_SOC15(VCN, i, regUVD_RB_WPTR4, tmp);
			ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR4);
		WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
		WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
		WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);

		tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
			tmp |= VCN_RB_ENABLE__RB4_EN_MASK;
		tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK);
		WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
		fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET;
		WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0);
		WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0);

			ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4);
		}
		ring = &adev->vcn.inst[i].ring_enc[0];
		WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL,
			ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
			VCN_RB1_DB_CTRL__EN_MASK);
		tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR);
		WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp);
		ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR);
		WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr);
		WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
		WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4);
		if (unifiedQ_enabled)

		tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
		tmp |= VCN_RB_ENABLE__RB1_EN_MASK;
		WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
		fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
		else {
			ring = &adev->vcn.inst[i].ring_enc[1];
			WREG32_SOC15(VCN, i, regVCN_RB2_DB_CTRL,
				ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT |
				VCN_RB2_DB_CTRL__EN_MASK);
			tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR2);
			WREG32_SOC15(VCN, i, regUVD_RB_WPTR2, tmp);
			ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR2);
			WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO2, ring->gpu_addr);
			WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
			WREG32_SOC15(VCN, i, regUVD_RB_SIZE2, ring->ring_size / 4);
		}
	}

	return 0;
@@ -1277,12 +1122,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
	tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR);
	SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF);

	tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2);
	SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR2, tmp, 0xFFFFFFFF);

	tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4);
	SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR4, tmp, 0xFFFFFFFF);

	SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1,
		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);

@@ -1301,10 +1140,14 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
 */
static int vcn_v4_0_stop(struct amdgpu_device *adev)
{
	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
	uint32_t tmp;
	int i, r = 0;

	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
		fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;

		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
			r = vcn_v4_0_stop_dpg_mode(adev, i);
			continue;
@@ -1414,8 +1257,6 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
			/* unpause dpg, no need to wait */
			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
			WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data);
			SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1,
				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
		}
		adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
	}
@@ -1424,165 +1265,72 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx,
}

/**
 * vcn_v4_0_dec_ring_get_rptr - get read pointer
 * vcn_v4_0_unified_ring_get_rptr - get unified read pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Returns the current hardware read pointer
 * Returns the current hardware unified read pointer
 */
static uint64_t vcn_v4_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

	return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR4);
}
	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
		DRM_ERROR("wrong ring id is identified in %s", __func__);

/**
 * vcn_v4_0_dec_ring_get_wptr - get write pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Returns the current hardware write pointer
 */
static uint64_t vcn_v4_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

	if (ring->use_doorbell)
		return *ring->wptr_cpu_addr;
	else
		return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4);
}

/**
 * vcn_v4_0_dec_ring_set_wptr - set write pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Commits the write pointer to the hardware
 */
static void vcn_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
		WREG32_SOC15(VCN, ring->me, regUVD_SCRATCH2,
			lower_32_bits(ring->wptr));
	}

	if (ring->use_doorbell) {
		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
	} else {
		WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4, lower_32_bits(ring->wptr));
	}
}

static const struct amdgpu_ring_funcs vcn_v4_0_dec_sw_ring_vm_funcs = {
	.type = AMDGPU_RING_TYPE_VCN_DEC,
	.align_mask = 0x3f,
	.nop = VCN_DEC_SW_CMD_NO_OP,
	.vmhub = AMDGPU_MMHUB_0,
	.get_rptr = vcn_v4_0_dec_ring_get_rptr,
	.get_wptr = vcn_v4_0_dec_ring_get_wptr,
	.set_wptr = vcn_v4_0_dec_ring_set_wptr,
	.emit_frame_size =
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
		VCN_SW_RING_EMIT_FRAME_SIZE,
	.emit_ib_size = 5, /* vcn_dec_sw_ring_emit_ib */
	.emit_ib = vcn_dec_sw_ring_emit_ib,
	.emit_fence = vcn_dec_sw_ring_emit_fence,
	.emit_vm_flush = vcn_dec_sw_ring_emit_vm_flush,
	.test_ring = amdgpu_vcn_dec_sw_ring_test_ring,
	.test_ib = amdgpu_vcn_dec_sw_ring_test_ib,
	.insert_nop = amdgpu_ring_insert_nop,
	.insert_end = vcn_dec_sw_ring_insert_end,
	.pad_ib = amdgpu_ring_generic_pad_ib,
	.begin_use = amdgpu_vcn_ring_begin_use,
	.end_use = amdgpu_vcn_ring_end_use,
	.emit_wreg = vcn_dec_sw_ring_emit_wreg,
	.emit_reg_wait = vcn_dec_sw_ring_emit_reg_wait,
	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};

/**
 * vcn_v4_0_enc_ring_get_rptr - get enc read pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Returns the current hardware enc read pointer
 */
static uint64_t vcn_v4_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

	if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
	return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR);
	else
		return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR2);
}

/**
 * vcn_v4_0_enc_ring_get_wptr - get enc write pointer
 * vcn_v4_0_unified_ring_get_wptr - get unified write pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Returns the current hardware enc write pointer
 * Returns the current hardware unified write pointer
 */
static uint64_t vcn_v4_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
		DRM_ERROR("wrong ring id is identified in %s", __func__);

	if (ring->use_doorbell)
		return *ring->wptr_cpu_addr;
	else
		return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR);
	} else {
		if (ring->use_doorbell)
			return *ring->wptr_cpu_addr;
		else
			return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2);
	}
}

/**
 * vcn_v4_0_enc_ring_set_wptr - set enc write pointer
 * vcn_v4_0_unified_ring_set_wptr - set enc write pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Commits the enc write pointer to the hardware
 */
static void vcn_v4_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
	if (ring != &adev->vcn.inst[ring->me].ring_enc[0])
		DRM_ERROR("wrong ring id is identified in %s", __func__);

	if (ring->use_doorbell) {
		*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
	} else {
		WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr));
	}
	} else {
		if (ring->use_doorbell) {
			*ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
		} else {
			WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2, lower_32_bits(ring->wptr));
		}
	}
}

static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = {
static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
	.type = AMDGPU_RING_TYPE_VCN_ENC,
	.align_mask = 0x3f,
	.nop = VCN_ENC_CMD_NO_OP,
	.vmhub = AMDGPU_MMHUB_0,
	.get_rptr = vcn_v4_0_enc_ring_get_rptr,
	.get_wptr = vcn_v4_0_enc_ring_get_wptr,
	.set_wptr = vcn_v4_0_enc_ring_set_wptr,
	.get_rptr = vcn_v4_0_unified_ring_get_rptr,
	.get_wptr = vcn_v4_0_unified_ring_get_wptr,
	.set_wptr = vcn_v4_0_unified_ring_set_wptr,
	.emit_frame_size =
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
@@ -1594,7 +1342,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = {
	.emit_fence = vcn_v2_0_enc_ring_emit_fence,
	.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
	.test_ring = amdgpu_vcn_enc_ring_test_ring,
	.test_ib = amdgpu_vcn_enc_ring_test_ib,
	.test_ib = amdgpu_vcn_unified_ring_test_ib,
	.insert_nop = amdgpu_ring_insert_nop,
	.insert_end = vcn_v2_0_enc_ring_insert_end,
	.pad_ib = amdgpu_ring_generic_pad_ib,
@@ -1606,13 +1354,13 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = {
};

/**
 * vcn_v4_0_set_dec_ring_funcs - set dec ring functions
 * vcn_v4_0_set_unified_ring_funcs - set unified ring functions
 *
 * @adev: amdgpu_device pointer
 *
 * Set decode ring functions
 * Set unified ring functions
 */
static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev)
{
	int i;

@@ -1620,32 +1368,10 @@ static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev)
		if (adev->vcn.harvest_config & (1 << i))
			continue;

		adev->vcn.inst[i].ring_dec.funcs = &vcn_v4_0_dec_sw_ring_vm_funcs;
		adev->vcn.inst[i].ring_dec.me = i;
		DRM_INFO("VCN(%d) decode software ring is enabled in VM mode\n", i);
	}
}

/**
 * vcn_v4_0_set_enc_ring_funcs - set enc ring functions
 *
 * @adev: amdgpu_device pointer
 *
 * Set encode ring functions
 */
static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev)
{
	int i, j;

	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
		if (adev->vcn.harvest_config & (1 << i))
			continue;
		adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_unified_ring_vm_funcs;
		adev->vcn.inst[i].ring_enc[0].me = i;

		for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
			adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v4_0_enc_ring_vm_funcs;
			adev->vcn.inst[i].ring_enc[j].me = i;
		}
		DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i);
		DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i);
	}
}

@@ -1798,18 +1524,9 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_
	DRM_DEBUG("IH: VCN TRAP\n");

	switch (entry->src_id) {
	case VCN_4_0__SRCID__UVD_TRAP:
		if (!unifiedQ_enabled) {
			amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
			break;
		}
		break;
	case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
		break;
	case VCN_4_0__SRCID__UVD_ENC_LOW_LATENCY:
		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
		break;
	default:
		DRM_ERROR("Unhandled interrupt: %d %d\n",
			  entry->src_id, entry->src_data[0]);