Loading drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +140 −423 Original line number Diff line number Diff line Loading @@ -29,7 +29,6 @@ #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" #include "vcn_sw_ring.h" #include "vcn/vcn_4_0_0_offset.h" #include "vcn/vcn_4_0_0_sh_mask.h" Loading @@ -45,15 +44,12 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 bool unifiedQ_enabled = false; static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 }; static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state); Loading @@ -71,36 +67,15 @@ static int vcn_v4_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (unifiedQ_enabled) { adev->vcn.num_vcn_inst = 1; /* re-use enc ring as unified ring */ adev->vcn.num_enc_rings = 1; } else { adev->vcn.num_enc_rings = 2; } if (!unifiedQ_enabled) vcn_v4_0_set_dec_ring_funcs(adev); vcn_v4_0_set_enc_ring_funcs(adev); vcn_v4_0_set_unified_ring_funcs(adev); vcn_v4_0_set_irq_funcs(adev); return 0; } static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { const struct common_firmware_header *hdr; hdr = (const struct common_firmware_header *)adev->vcn.fw->data; adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); DRM_INFO("PSP loading VCN firmware\n"); } } /** * vcn_v4_0_sw_init - sw init for VCN block * Loading @@ -111,16 +86,13 @@ static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev) static int vcn_v4_0_sw_init(void *handle) { struct amdgpu_ring *ring; int i, j, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; r = amdgpu_vcn_sw_init(adev); if (r) return r; if (unifiedQ_enabled) amdgpu_vcn_setup_unified_queue_ucode(adev); else amdgpu_vcn_setup_ucode(adev); r = amdgpu_vcn_resume(adev); Loading @@ -129,81 +101,40 @@ static int vcn_v4_0_sw_init(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; /* VCN DEC TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], VCN_4_0__SRCID__UVD_TRAP, &adev->vcn.inst[i].irq); if (r) return r; atomic_set(&adev->vcn.inst[i].sched_score, 0); if (!unifiedQ_enabled) { ring = &adev->vcn.inst[i].ring_dec; ring->use_doorbell = true; /* VCN4 doorbell layout * 1: VCN_JPEG_DB_CTRL UVD_JRBC_RB_WPTR; (jpeg) * 2: VCN_RB1_DB_CTRL UVD_RB_WPTR; (decode/encode for unified queue) * 3: VCN_RB2_DB_CTRL UVD_RB_WPTR2; (encode only for swqueue) * 4: VCN_RB3_DB_CTRL UVD_RB_WPTR3; (Reserved) * 5: VCN_RB4_DB_CTRL UVD_RB_WPTR4; (decode only for swqueue) */ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 5 + 8 * i; sprintf(ring->name, "vcn_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score); if (r) return r; } for (j = 0; j < adev->vcn.num_enc_rings; ++j) { /* VCN ENC TRAP */ /* VCN UNIFIED TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], j + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); if (r) return r; ring = &adev->vcn.inst[i].ring_enc[j]; ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; if (unifiedQ_enabled) { sprintf(ring->name, "vcn_unified%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); } else { enum amdgpu_ring_priority_level hw_prio; sprintf(ring->name, "vcn_unified_%d", i); hw_prio = amdgpu_vcn_get_enc_ring_prio(j); sprintf(ring->name, "vcn_enc_%d.%d", i, j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, hw_prio, &adev->vcn.inst[i].sched_score); } AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score); if (r) return r; } fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->present_flag_0 = 0; if (unifiedQ_enabled) { fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); fw_shared->sq.is_enabled = 1; } if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } if (!unifiedQ_enabled) { if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode; } return 0; } Loading Loading @@ -254,15 +185,13 @@ static int vcn_v4_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; int i, j, r; int i, r; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; if (unifiedQ_enabled) ring = &adev->vcn.inst[i].ring_enc[0]; else ring = &adev->vcn.inst[i].ring_dec; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); Loading @@ -270,13 +199,6 @@ static int vcn_v4_0_hw_init(void *handle) r = amdgpu_ring_test_helper(ring); if (r) goto done; for (j = 0; j < adev->vcn.num_enc_rings; ++j) { ring = &adev->vcn.inst[i].ring_enc[j]; r = amdgpu_ring_test_helper(ring); if (r) goto done; } } done: Loading Loading @@ -464,7 +386,6 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); } if (!indirect) Loading Loading @@ -888,7 +809,6 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; int i; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, Loading Loading @@ -974,74 +894,32 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); if (unifiedQ_enabled) { ring = &adev->vcn.inst[inst_idx].ring_enc[0]; fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; } else ring = &adev->vcn.inst[inst_idx].ring_dec; WREG32_SOC15(VCN, inst_idx, regVCN_RB4_DB_CTRL, ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT | VCN_RB4_DB_CTRL__EN_MASK); /* program the RB_BASE for ring buffer */ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO4, lower_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI4, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t)); /* reseting ring, fw should not check RB ring */ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK); WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); /* Initialize the ring buffer's read and write pointers */ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR4); WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4, tmp); ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4); WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB4_EN_MASK; tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0); WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0); WREG32_SOC15(VCN, inst_idx, regUVD_SCRATCH2, 0); if (unifiedQ_enabled) fw_shared->sq.queue_mode &= ~FW_QUEUE_RING_RESET; for (i = 0; i < adev->vcn.num_enc_rings; i++) { ring = &adev->vcn.inst[inst_idx].ring_enc[i]; if (i) { ring = &adev->vcn.inst[inst_idx].ring_enc[1]; WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE2, ring->ring_size / 4); tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR2); WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2, tmp); ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2); WREG32_SOC15(VCN, inst_idx, regVCN_RB2_DB_CTRL, ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT | VCN_RB2_DB_CTRL__EN_MASK); } else { ring = &adev->vcn.inst[inst_idx].ring_enc[0]; WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR); WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp); ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB1_EN_MASK; WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL, ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); } } return 0; } Loading @@ -1064,6 +942,8 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); continue; Loading Loading @@ -1166,7 +1046,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) if (status & 2) break; dev_err(adev->dev, "VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i); dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), UVD_VCPU_CNTL__BLK_RST_MASK, ~UVD_VCPU_CNTL__BLK_RST_MASK); Loading @@ -1180,7 +1060,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) } if (r) { dev_err(adev->dev, "VCN[%d] decode not responding, giving up!!!\n", i); dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); return r; } Loading @@ -1193,65 +1073,30 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (unifiedQ_enabled) { ring = &adev->vcn.inst[i].ring_enc[0]; fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; } else { ring = &adev->vcn.inst[i].ring_dec; WREG32_SOC15(VCN, i, regVCN_RB4_DB_CTRL, ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT | VCN_RB4_DB_CTRL__EN_MASK); /* program the RB_BASE for ring buffer */ WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO4, lower_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI4, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t)); /* resetting ring, fw should not check RB ring */ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK); WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); /* Initialize the ring buffer's read and write pointers */ tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4); WREG32_SOC15(VCN, i, regUVD_RB_WPTR4, tmp); ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR4); WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB4_EN_MASK; tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4); } ring = &adev->vcn.inst[i].ring_enc[0]; WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); if (unifiedQ_enabled) tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB1_EN_MASK; WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); else { ring = &adev->vcn.inst[i].ring_enc[1]; WREG32_SOC15(VCN, i, regVCN_RB2_DB_CTRL, ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT | VCN_RB2_DB_CTRL__EN_MASK); tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR2); WREG32_SOC15(VCN, i, regUVD_RB_WPTR2, tmp); ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR2); WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, regUVD_RB_SIZE2, ring->ring_size / 4); } } return 0; Loading @@ -1277,12 +1122,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2); SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR2, tmp, 0xFFFFFFFF); tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4); SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR4, tmp, 0xFFFFFFFF); SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); Loading @@ -1301,10 +1140,14 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) */ static int vcn_v4_0_stop(struct amdgpu_device *adev) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; uint32_t tmp; int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v4_0_stop_dpg_mode(adev, i); continue; Loading Loading @@ -1414,8 +1257,6 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, /* unpause dpg, no need to wait */ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data); SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; } Loading @@ -1424,165 +1265,72 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, } /** * vcn_v4_0_dec_ring_get_rptr - get read pointer * vcn_v4_0_unified_ring_get_rptr - get unified read pointer * * @ring: amdgpu_ring pointer * * Returns the current hardware read pointer * Returns the current hardware unified read pointer */ static uint64_t vcn_v4_0_dec_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR4); } if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) DRM_ERROR("wrong ring id is identified in %s", __func__); /** * vcn_v4_0_dec_ring_get_wptr - get write pointer * * @ring: amdgpu_ring pointer * * Returns the current hardware write pointer */ static uint64_t vcn_v4_0_dec_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; if (ring->use_doorbell) return *ring->wptr_cpu_addr; else return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4); } /** * vcn_v4_0_dec_ring_set_wptr - set write pointer * * @ring: amdgpu_ring pointer * * Commits the write pointer to the hardware */ static void vcn_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { WREG32_SOC15(VCN, ring->me, regUVD_SCRATCH2, lower_32_bits(ring->wptr)); } if (ring->use_doorbell) { *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4, lower_32_bits(ring->wptr)); } } static const struct amdgpu_ring_funcs vcn_v4_0_dec_sw_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0x3f, .nop = VCN_DEC_SW_CMD_NO_OP, .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v4_0_dec_ring_get_rptr, .get_wptr = vcn_v4_0_dec_ring_get_wptr, .set_wptr = vcn_v4_0_dec_ring_set_wptr, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + VCN_SW_RING_EMIT_FRAME_SIZE, .emit_ib_size = 5, /* vcn_dec_sw_ring_emit_ib */ .emit_ib = vcn_dec_sw_ring_emit_ib, .emit_fence = vcn_dec_sw_ring_emit_fence, .emit_vm_flush = vcn_dec_sw_ring_emit_vm_flush, .test_ring = amdgpu_vcn_dec_sw_ring_test_ring, .test_ib = amdgpu_vcn_dec_sw_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .insert_end = vcn_dec_sw_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_vcn_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, .emit_wreg = vcn_dec_sw_ring_emit_wreg, .emit_reg_wait = vcn_dec_sw_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; /** * vcn_v4_0_enc_ring_get_rptr - get enc read pointer * * @ring: amdgpu_ring pointer * * Returns the current hardware enc read pointer */ static uint64_t vcn_v4_0_enc_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR); else return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR2); } /** * vcn_v4_0_enc_ring_get_wptr - get enc write pointer * vcn_v4_0_unified_ring_get_wptr - get unified write pointer * * @ring: amdgpu_ring pointer * * Returns the current hardware enc write pointer * Returns the current hardware unified write pointer */ static uint64_t vcn_v4_0_enc_ring_get_wptr(struct amdgpu_ring *ring) static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) DRM_ERROR("wrong ring id is identified in %s", __func__); if (ring->use_doorbell) return *ring->wptr_cpu_addr; else return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR); } else { if (ring->use_doorbell) return *ring->wptr_cpu_addr; else return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2); } } /** * vcn_v4_0_enc_ring_set_wptr - set enc write pointer * vcn_v4_0_unified_ring_set_wptr - set enc write pointer * * @ring: amdgpu_ring pointer * * Commits the enc write pointer to the hardware */ static void vcn_v4_0_enc_ring_set_wptr(struct amdgpu_ring *ring) static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) DRM_ERROR("wrong ring id is identified in %s", __func__); if (ring->use_doorbell) { *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr)); } } else { if (ring->use_doorbell) { *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2, lower_32_bits(ring->wptr)); } } } static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v4_0_enc_ring_get_rptr, .get_wptr = vcn_v4_0_enc_ring_get_wptr, .set_wptr = vcn_v4_0_enc_ring_set_wptr, .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + Loading @@ -1594,7 +1342,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { .emit_fence = vcn_v2_0_enc_ring_emit_fence, .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, .test_ring = amdgpu_vcn_enc_ring_test_ring, .test_ib = amdgpu_vcn_enc_ring_test_ib, .test_ib = amdgpu_vcn_unified_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .insert_end = vcn_v2_0_enc_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, Loading @@ -1606,13 +1354,13 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { }; /** * vcn_v4_0_set_dec_ring_funcs - set dec ring functions * vcn_v4_0_set_unified_ring_funcs - set unified ring functions * * @adev: amdgpu_device pointer * * Set decode ring functions * Set unified ring functions */ static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev) { int i; Loading @@ -1620,32 +1368,10 @@ static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; adev->vcn.inst[i].ring_dec.funcs = &vcn_v4_0_dec_sw_ring_vm_funcs; adev->vcn.inst[i].ring_dec.me = i; DRM_INFO("VCN(%d) decode software ring is enabled in VM mode\n", i); } } /** * vcn_v4_0_set_enc_ring_funcs - set enc ring functions * * @adev: amdgpu_device pointer * * Set encode ring functions */ static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev) { int i, j; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_unified_ring_vm_funcs; adev->vcn.inst[i].ring_enc[0].me = i; for (j = 0; j < adev->vcn.num_enc_rings; ++j) { adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v4_0_enc_ring_vm_funcs; adev->vcn.inst[i].ring_enc[j].me = i; } DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i); DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i); } } Loading Loading @@ -1798,18 +1524,9 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ DRM_DEBUG("IH: VCN TRAP\n"); switch (entry->src_id) { case VCN_4_0__SRCID__UVD_TRAP: if (!unifiedQ_enabled) { amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); break; } break; case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); break; case VCN_4_0__SRCID__UVD_ENC_LOW_LATENCY: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); Loading Loading
drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +140 −423 Original line number Diff line number Diff line Loading @@ -29,7 +29,6 @@ #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" #include "vcn_sw_ring.h" #include "vcn/vcn_4_0_0_offset.h" #include "vcn/vcn_4_0_0_sh_mask.h" Loading @@ -45,15 +44,12 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 bool unifiedQ_enabled = false; static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN1 }; static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state); Loading @@ -71,36 +67,15 @@ static int vcn_v4_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (unifiedQ_enabled) { adev->vcn.num_vcn_inst = 1; /* re-use enc ring as unified ring */ adev->vcn.num_enc_rings = 1; } else { adev->vcn.num_enc_rings = 2; } if (!unifiedQ_enabled) vcn_v4_0_set_dec_ring_funcs(adev); vcn_v4_0_set_enc_ring_funcs(adev); vcn_v4_0_set_unified_ring_funcs(adev); vcn_v4_0_set_irq_funcs(adev); return 0; } static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { const struct common_firmware_header *hdr; hdr = (const struct common_firmware_header *)adev->vcn.fw->data; adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); DRM_INFO("PSP loading VCN firmware\n"); } } /** * vcn_v4_0_sw_init - sw init for VCN block * Loading @@ -111,16 +86,13 @@ static void amdgpu_vcn_setup_unified_queue_ucode(struct amdgpu_device *adev) static int vcn_v4_0_sw_init(void *handle) { struct amdgpu_ring *ring; int i, j, r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; r = amdgpu_vcn_sw_init(adev); if (r) return r; if (unifiedQ_enabled) amdgpu_vcn_setup_unified_queue_ucode(adev); else amdgpu_vcn_setup_ucode(adev); r = amdgpu_vcn_resume(adev); Loading @@ -129,81 +101,40 @@ static int vcn_v4_0_sw_init(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; /* VCN DEC TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], VCN_4_0__SRCID__UVD_TRAP, &adev->vcn.inst[i].irq); if (r) return r; atomic_set(&adev->vcn.inst[i].sched_score, 0); if (!unifiedQ_enabled) { ring = &adev->vcn.inst[i].ring_dec; ring->use_doorbell = true; /* VCN4 doorbell layout * 1: VCN_JPEG_DB_CTRL UVD_JRBC_RB_WPTR; (jpeg) * 2: VCN_RB1_DB_CTRL UVD_RB_WPTR; (decode/encode for unified queue) * 3: VCN_RB2_DB_CTRL UVD_RB_WPTR2; (encode only for swqueue) * 4: VCN_RB3_DB_CTRL UVD_RB_WPTR3; (Reserved) * 5: VCN_RB4_DB_CTRL UVD_RB_WPTR4; (decode only for swqueue) */ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 5 + 8 * i; sprintf(ring->name, "vcn_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score); if (r) return r; } for (j = 0; j < adev->vcn.num_enc_rings; ++j) { /* VCN ENC TRAP */ /* VCN UNIFIED TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], j + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); if (r) return r; ring = &adev->vcn.inst[i].ring_enc[j]; ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; if (unifiedQ_enabled) { sprintf(ring->name, "vcn_unified%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); } else { enum amdgpu_ring_priority_level hw_prio; sprintf(ring->name, "vcn_unified_%d", i); hw_prio = amdgpu_vcn_get_enc_ring_prio(j); sprintf(ring->name, "vcn_enc_%d.%d", i, j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, hw_prio, &adev->vcn.inst[i].sched_score); } AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score); if (r) return r; } fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->present_flag_0 = 0; if (unifiedQ_enabled) { fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); fw_shared->sq.is_enabled = 1; } if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } if (!unifiedQ_enabled) { if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode; } return 0; } Loading Loading @@ -254,15 +185,13 @@ static int vcn_v4_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; int i, j, r; int i, r; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; if (unifiedQ_enabled) ring = &adev->vcn.inst[i].ring_enc[0]; else ring = &adev->vcn.inst[i].ring_dec; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); Loading @@ -270,13 +199,6 @@ static int vcn_v4_0_hw_init(void *handle) r = amdgpu_ring_test_helper(ring); if (r) goto done; for (j = 0; j < adev->vcn.num_enc_rings; ++j) { ring = &adev->vcn.inst[i].ring_enc[j]; r = amdgpu_ring_test_helper(ring); if (r) goto done; } } done: Loading Loading @@ -464,7 +386,6 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); } if (!indirect) Loading Loading @@ -888,7 +809,6 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; int i; /* disable register anti-hang mechanism */ WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, Loading Loading @@ -974,74 +894,32 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); if (unifiedQ_enabled) { ring = &adev->vcn.inst[inst_idx].ring_enc[0]; fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; } else ring = &adev->vcn.inst[inst_idx].ring_dec; WREG32_SOC15(VCN, inst_idx, regVCN_RB4_DB_CTRL, ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT | VCN_RB4_DB_CTRL__EN_MASK); /* program the RB_BASE for ring buffer */ WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO4, lower_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI4, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t)); /* reseting ring, fw should not check RB ring */ tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK); WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); /* Initialize the ring buffer's read and write pointers */ tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR4); WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4, tmp); ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4); WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB4_EN_MASK; tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0); WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0); WREG32_SOC15(VCN, inst_idx, regUVD_SCRATCH2, 0); if (unifiedQ_enabled) fw_shared->sq.queue_mode &= ~FW_QUEUE_RING_RESET; for (i = 0; i < adev->vcn.num_enc_rings; i++) { ring = &adev->vcn.inst[inst_idx].ring_enc[i]; if (i) { ring = &adev->vcn.inst[inst_idx].ring_enc[1]; WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE2, ring->ring_size / 4); tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR2); WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2, tmp); ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2); WREG32_SOC15(VCN, inst_idx, regVCN_RB2_DB_CTRL, ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT | VCN_RB2_DB_CTRL__EN_MASK); } else { ring = &adev->vcn.inst[inst_idx].ring_enc[0]; WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); tmp= RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR); WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp); ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB1_EN_MASK; WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL, ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); } } return 0; } Loading @@ -1064,6 +942,8 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); continue; Loading Loading @@ -1166,7 +1046,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) if (status & 2) break; dev_err(adev->dev, "VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i); dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), UVD_VCPU_CNTL__BLK_RST_MASK, ~UVD_VCPU_CNTL__BLK_RST_MASK); Loading @@ -1180,7 +1060,7 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) } if (r) { dev_err(adev->dev, "VCN[%d] decode not responding, giving up!!!\n", i); dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); return r; } Loading @@ -1193,65 +1073,30 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (unifiedQ_enabled) { ring = &adev->vcn.inst[i].ring_enc[0]; fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; } else { ring = &adev->vcn.inst[i].ring_dec; WREG32_SOC15(VCN, i, regVCN_RB4_DB_CTRL, ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT | VCN_RB4_DB_CTRL__EN_MASK); /* program the RB_BASE for ring buffer */ WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO4, lower_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI4, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, regUVD_RB_SIZE4, ring->ring_size / sizeof(uint32_t)); /* resetting ring, fw should not check RB ring */ tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); tmp &= ~(VCN_RB_ENABLE__RB4_EN_MASK); WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); /* Initialize the ring buffer's read and write pointers */ tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4); WREG32_SOC15(VCN, i, regUVD_RB_WPTR4, tmp); ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR4); WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB4_EN_MASK; tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_RPTR4); } ring = &adev->vcn.inst[i].ring_enc[0]; WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | VCN_RB1_DB_CTRL__EN_MASK); tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); if (unifiedQ_enabled) tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB1_EN_MASK; WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); else { ring = &adev->vcn.inst[i].ring_enc[1]; WREG32_SOC15(VCN, i, regVCN_RB2_DB_CTRL, ring->doorbell_index << VCN_RB2_DB_CTRL__OFFSET__SHIFT | VCN_RB2_DB_CTRL__EN_MASK); tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR2); WREG32_SOC15(VCN, i, regUVD_RB_WPTR2, tmp); ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR2); WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, regUVD_RB_SIZE2, ring->ring_size / 4); } } return 0; Loading @@ -1277,12 +1122,6 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR2); SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR2, tmp, 0xFFFFFFFF); tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR4); SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR4, tmp, 0xFFFFFFFF); SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); Loading @@ -1301,10 +1140,14 @@ static int vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) */ static int vcn_v4_0_stop(struct amdgpu_device *adev) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; uint32_t tmp; int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v4_0_stop_dpg_mode(adev, i); continue; Loading Loading @@ -1414,8 +1257,6 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, /* unpause dpg, no need to wait */ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data); SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; } Loading @@ -1424,165 +1265,72 @@ static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, } /** * vcn_v4_0_dec_ring_get_rptr - get read pointer * vcn_v4_0_unified_ring_get_rptr - get unified read pointer * * @ring: amdgpu_ring pointer * * Returns the current hardware read pointer * Returns the current hardware unified read pointer */ static uint64_t vcn_v4_0_dec_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t vcn_v4_0_unified_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR4); } if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) DRM_ERROR("wrong ring id is identified in %s", __func__); /** * vcn_v4_0_dec_ring_get_wptr - get write pointer * * @ring: amdgpu_ring pointer * * Returns the current hardware write pointer */ static uint64_t vcn_v4_0_dec_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; if (ring->use_doorbell) return *ring->wptr_cpu_addr; else return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4); } /** * vcn_v4_0_dec_ring_set_wptr - set write pointer * * @ring: amdgpu_ring pointer * * Commits the write pointer to the hardware */ static void vcn_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { WREG32_SOC15(VCN, ring->me, regUVD_SCRATCH2, lower_32_bits(ring->wptr)); } if (ring->use_doorbell) { *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR4, lower_32_bits(ring->wptr)); } } static const struct amdgpu_ring_funcs vcn_v4_0_dec_sw_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0x3f, .nop = VCN_DEC_SW_CMD_NO_OP, .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v4_0_dec_ring_get_rptr, .get_wptr = vcn_v4_0_dec_ring_get_wptr, .set_wptr = vcn_v4_0_dec_ring_set_wptr, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + VCN_SW_RING_EMIT_FRAME_SIZE, .emit_ib_size = 5, /* vcn_dec_sw_ring_emit_ib */ .emit_ib = vcn_dec_sw_ring_emit_ib, .emit_fence = vcn_dec_sw_ring_emit_fence, .emit_vm_flush = vcn_dec_sw_ring_emit_vm_flush, .test_ring = amdgpu_vcn_dec_sw_ring_test_ring, .test_ib = amdgpu_vcn_dec_sw_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .insert_end = vcn_dec_sw_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, .begin_use = amdgpu_vcn_ring_begin_use, .end_use = amdgpu_vcn_ring_end_use, .emit_wreg = vcn_dec_sw_ring_emit_wreg, .emit_reg_wait = vcn_dec_sw_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; /** * vcn_v4_0_enc_ring_get_rptr - get enc read pointer * * @ring: amdgpu_ring pointer * * Returns the current hardware enc read pointer */ static uint64_t vcn_v4_0_enc_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR); else return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR2); } /** * vcn_v4_0_enc_ring_get_wptr - get enc write pointer * vcn_v4_0_unified_ring_get_wptr - get unified write pointer * * @ring: amdgpu_ring pointer * * Returns the current hardware enc write pointer * Returns the current hardware unified write pointer */ static uint64_t vcn_v4_0_enc_ring_get_wptr(struct amdgpu_ring *ring) static uint64_t vcn_v4_0_unified_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) DRM_ERROR("wrong ring id is identified in %s", __func__); if (ring->use_doorbell) return *ring->wptr_cpu_addr; else return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR); } else { if (ring->use_doorbell) return *ring->wptr_cpu_addr; else return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2); } } /** * vcn_v4_0_enc_ring_set_wptr - set enc write pointer * vcn_v4_0_unified_ring_set_wptr - set enc write pointer * * @ring: amdgpu_ring pointer * * Commits the enc write pointer to the hardware */ static void vcn_v4_0_enc_ring_set_wptr(struct amdgpu_ring *ring) static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) DRM_ERROR("wrong ring id is identified in %s", __func__); if (ring->use_doorbell) { *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr)); } } else { if (ring->use_doorbell) { *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR2, lower_32_bits(ring->wptr)); } } } static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v4_0_enc_ring_get_rptr, .get_wptr = vcn_v4_0_enc_ring_get_wptr, .set_wptr = vcn_v4_0_enc_ring_set_wptr, .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + Loading @@ -1594,7 +1342,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { .emit_fence = vcn_v2_0_enc_ring_emit_fence, .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, .test_ring = amdgpu_vcn_enc_ring_test_ring, .test_ib = amdgpu_vcn_enc_ring_test_ib, .test_ib = amdgpu_vcn_unified_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .insert_end = vcn_v2_0_enc_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, Loading @@ -1606,13 +1354,13 @@ static const struct amdgpu_ring_funcs vcn_v4_0_enc_ring_vm_funcs = { }; /** * vcn_v4_0_set_dec_ring_funcs - set dec ring functions * vcn_v4_0_set_unified_ring_funcs - set unified ring functions * * @adev: amdgpu_device pointer * * Set decode ring functions * Set unified ring functions */ static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev) { int i; Loading @@ -1620,32 +1368,10 @@ static void vcn_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; adev->vcn.inst[i].ring_dec.funcs = &vcn_v4_0_dec_sw_ring_vm_funcs; adev->vcn.inst[i].ring_dec.me = i; DRM_INFO("VCN(%d) decode software ring is enabled in VM mode\n", i); } } /** * vcn_v4_0_set_enc_ring_funcs - set enc ring functions * * @adev: amdgpu_device pointer * * Set encode ring functions */ static void vcn_v4_0_set_enc_ring_funcs(struct amdgpu_device *adev) { int i, j; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_unified_ring_vm_funcs; adev->vcn.inst[i].ring_enc[0].me = i; for (j = 0; j < adev->vcn.num_enc_rings; ++j) { adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v4_0_enc_ring_vm_funcs; adev->vcn.inst[i].ring_enc[j].me = i; } DRM_INFO("VCN(%d) encode is enabled in VM mode\n", i); DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i); } } Loading Loading @@ -1798,18 +1524,9 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ DRM_DEBUG("IH: VCN TRAP\n"); switch (entry->src_id) { case VCN_4_0__SRCID__UVD_TRAP: if (!unifiedQ_enabled) { amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); break; } break; case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); break; case VCN_4_0__SRCID__UVD_ENC_LOW_LATENCY: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); Loading