drm/amdgpu: load balance VCN3 decode as well v8 (87cc7f9e) · Commits · jan.koester / Linux

drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c

+130 −2

Original line number	Diff line number	Diff line
		@@ -50,6 +50,9 @@
		#define VCN_INSTANCES_SIENNA_CICHLID 2
		#define DEC_SW_RING_ENABLED FALSE

		#define RDECODE_MSG_CREATE 0x00000000
		#define RDECODE_MESSAGE_CREATE 0x00000001

		static int amdgpu_ih_clientid_vcns[] = {
		SOC15_IH_CLIENTID_VCN,
		SOC15_IH_CLIENTID_VCN1
		@@ -208,8 +211,6 @@ static int vcn_v3_0_sw_init(void *handle)
		} else {
		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
		}
		if (adev->asic_type == CHIP_SIENNA_CICHLID && i != 0)
		ring->no_scheduler = true;
		sprintf(ring->name, "vcn_dec_%d", i);
		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
		AMDGPU_RING_PRIO_DEFAULT,
		@@ -1847,6 +1848,132 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = {
		.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
		};

		static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p)
		{
		struct drm_gpu_scheduler **scheds;

		/* The create msg must be in the first IB submitted */
		if (atomic_read(&p->entity->fence_seq))
		return -EINVAL;

		scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
		[AMDGPU_RING_PRIO_DEFAULT].sched;
		drm_sched_entity_modify_sched(p->entity, scheds, 1);
		return 0;
		}

		static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
		{
		struct ttm_operation_ctx ctx = { false, false };
		struct amdgpu_bo_va_mapping *map;
		uint32_t *msg, num_buffers;
		struct amdgpu_bo *bo;
		uint64_t start, end;
		unsigned int i;
		void * ptr;
		int r;

		addr &= AMDGPU_GMC_HOLE_MASK;
		r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
		if (r) {
		DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
		return r;
		}

		start = map->start * AMDGPU_GPU_PAGE_SIZE;
		end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
		if (addr & 0x7) {
		DRM_ERROR("VCN messages must be 8 byte aligned!\n");
		return -EINVAL;
		}

		bo->flags \|= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
		amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
		if (r) {
		DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
		return r;
		}

		r = amdgpu_bo_kmap(bo, &ptr);
		if (r) {
		DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
		return r;
		}

		msg = ptr + addr - start;

		/* Check length */
		if (msg[1] > end - addr) {
		r = -EINVAL;
		goto out;
		}

		if (msg[3] != RDECODE_MSG_CREATE)
		goto out;

		num_buffers = msg[2];
		for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
		uint32_t offset, size, *create;

		if (msg[0] != RDECODE_MESSAGE_CREATE)
		continue;

		offset = msg[1];
		size = msg[2];

		if (offset + size > end) {
		r = -EINVAL;
		goto out;
		}

		create = ptr + addr + offset - start;

		/* H246, HEVC and VP9 can run on any instance */
		if (create[0] == 0x7 \|\| create[0] == 0x10 \|\| create[0] == 0x11)
		continue;

		r = vcn_v3_0_limit_sched(p);
		if (r)
		goto out;
		}

		out:
		amdgpu_bo_kunmap(bo);
		return r;
		}

		static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
		uint32_t ib_idx)
		{
		struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
		struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
		uint32_t msg_lo = 0, msg_hi = 0;
		unsigned i;
		int r;

		/* The first instance can decode anything */
		if (!ring->me)
		return 0;

		for (i = 0; i < ib->length_dw; i += 2) {
		uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i);
		uint32_t val = amdgpu_get_ib_value(p, ib_idx, i + 1);

		if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {
		msg_lo = val;
		} else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) {
		msg_hi = val;
		} else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&
		val == 0) {
		r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 \| msg_lo);
		if (r)
		return r;
		}
		}
		return 0;
		}

		static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {
		.type = AMDGPU_RING_TYPE_VCN_DEC,
		.align_mask = 0xf,
		@@ -1854,6 +1981,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {
		.get_rptr = vcn_v3_0_dec_ring_get_rptr,
		.get_wptr = vcn_v3_0_dec_ring_get_wptr,
		.set_wptr = vcn_v3_0_dec_ring_set_wptr,
		.patch_cs_in_place = vcn_v3_0_ring_patch_cs_in_place,
		.emit_frame_size =
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +