Commit 1411796f authored by Mikko Perttunen's avatar Mikko Perttunen Committed by Thierry Reding
Browse files

gpu: host1x: Rewrite job opcode sequence



For new (Tegra186+) SoCs, use a new ('full-featured') job opcode
sequence that is compatible with virtualization. In particular,
the Host1x hardware in Tegra234 is more strict regarding the sequence,
requiring ACQUIRE_MLOCK-SETCLASS-SETSTREAMID opcodes to occur in
that sequence without gaps (except for SETPAYLOAD), so let's do it
properly in one go now.

Signed-off-by: default avatarMikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: default avatarThierry Reding <treding@nvidia.com>
parent 9abdd497
Loading
Loading
Loading
Loading
+85 −59
Original line number Diff line number Diff line
@@ -47,10 +47,41 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
	}
}

static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold,
static void submit_wait(struct host1x_job *job, u32 id, u32 threshold,
			u32 next_class)
{
#if HOST1X_HW >= 2
	struct host1x_cdma *cdma = &job->channel->cdma;

#if HOST1X_HW >= 6
	u32 stream_id;

	/*
	 * If a memory context has been set, use it. Otherwise
	 * (if context isolation is disabled) use the engine's
	 * firmware stream ID.
	 */
	if (job->memory_context)
		stream_id = job->memory_context->stream_id;
	else
		stream_id = job->engine_fallback_streamid;

	host1x_cdma_push_wide(cdma,
		host1x_opcode_setclass(
			HOST1X_CLASS_HOST1X,
			HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32,
			/* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */
			BIT(0) | BIT(2)
		),
		threshold,
		id,
		HOST1X_OPCODE_NOP
	);
	host1x_cdma_push_wide(&job->channel->cdma,
		host1x_opcode_setclass(job->class, 0, 0),
		host1x_opcode_setpayload(stream_id),
		host1x_opcode_setstreamid(job->engine_streamid_offset / 4),
		HOST1X_OPCODE_NOP);
#elif HOST1X_HW >= 2
	host1x_cdma_push_wide(cdma,
		host1x_opcode_setclass(
			HOST1X_CLASS_HOST1X,
@@ -97,7 +128,7 @@ static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base)
			else
				threshold = cmd->wait.threshold;

			submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class);
			submit_wait(job, cmd->wait.id, threshold, cmd->wait.next_class);
		} else {
			struct host1x_job_gather *g = &cmd->gather;

@@ -180,42 +211,70 @@ static void host1x_enable_gather_filter(struct host1x_channel *ch)
#endif
}

static void host1x_channel_program_engine_streamid(struct host1x_job *job)
static void channel_program_cdma(struct host1x_job *job)
{
	struct host1x_cdma *cdma = &job->channel->cdma;
	struct host1x_syncpt *sp = job->syncpt;

#if HOST1X_HW >= 6
	u32 fence;

	if (!job->memory_context)
		return;

	fence = host1x_syncpt_incr_max(job->syncpt, 1);

	/* First, increment a syncpoint on OP_DONE condition.. */
	/* Enter engine class with invalid stream ID. */
	host1x_cdma_push_wide(cdma,
		host1x_opcode_acquire_mlock(job->class),
		host1x_opcode_setclass(job->class, 0, 0),
		host1x_opcode_setpayload(0),
		host1x_opcode_setstreamid(job->engine_streamid_offset / 4));

	/* Before switching stream ID to real stream ID, ensure engine is idle. */
	fence = host1x_syncpt_incr_max(sp, 1);
	host1x_cdma_push(&job->channel->cdma,
		host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
		HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
			HOST1X_UCLASS_INCR_SYNCPT_COND_F(1));
			HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
	submit_wait(job, job->syncpt->id, fence, job->class);

	/* Wait for syncpoint to increment */
	/* Submit work. */
	job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
	submit_gathers(job, job->syncpt_end - job->syncpt_incrs);

	/* Before releasing MLOCK, ensure engine is idle again. */
	fence = host1x_syncpt_incr_max(sp, 1);
	host1x_cdma_push(&job->channel->cdma,
		host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
			host1x_uclass_wait_syncpt_r(), 1),
		host1x_class_host_wait_syncpt(job->syncpt->id, fence));
		host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
		HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
			HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
	submit_wait(job, job->syncpt->id, fence, job->class);

	/* Release MLOCK. */
	host1x_cdma_push(cdma,
		HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class));
#else
	if (job->serialize) {
		/*
	 * Now that we know the engine is idle, return to class and
	 * change stream ID.
		 * Force serialization by inserting a host wait for the
		 * previous job to finish before this one can commence.
		 */
		host1x_cdma_push(cdma,
				 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
					host1x_uclass_wait_syncpt_r(), 1),
				 host1x_class_host_wait_syncpt(job->syncpt->id,
					host1x_syncpt_read_max(sp)));
	}

	host1x_cdma_push(&job->channel->cdma,
	/* Synchronize base register to allow using it for relative waiting */
	if (sp->base)
		synchronize_syncpt_base(job);

	/* add a setclass for modules that require it */
	if (job->class)
		host1x_cdma_push(cdma,
				 host1x_opcode_setclass(job->class, 0, 0),
				 HOST1X_OPCODE_NOP);

	host1x_cdma_push(&job->channel->cdma,
		host1x_opcode_setpayload(job->memory_context->stream_id),
		host1x_opcode_setstreamid(job->engine_streamid_offset / 4));
	job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);

	submit_gathers(job, job->syncpt_end - job->syncpt_incrs);
#endif
}

@@ -223,7 +282,6 @@ static int channel_submit(struct host1x_job *job)
{
	struct host1x_channel *ch = job->channel;
	struct host1x_syncpt *sp = job->syncpt;
	u32 user_syncpt_incrs = job->syncpt_incrs;
	u32 prev_max = 0;
	u32 syncval;
	int err;
@@ -251,6 +309,7 @@ static int channel_submit(struct host1x_job *job)

	host1x_channel_set_streamid(ch);
	host1x_enable_gather_filter(ch);
	host1x_hw_syncpt_assign_to_channel(host, sp, ch);

	/* begin a CDMA submit */
	err = host1x_cdma_begin(&ch->cdma, job);
@@ -259,40 +318,7 @@ static int channel_submit(struct host1x_job *job)
		goto error;
	}

	if (job->serialize) {
		/*
		 * Force serialization by inserting a host wait for the
		 * previous job to finish before this one can commence.
		 */
		host1x_cdma_push(&ch->cdma,
				 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
					host1x_uclass_wait_syncpt_r(), 1),
				 host1x_class_host_wait_syncpt(job->syncpt->id,
					host1x_syncpt_read_max(sp)));
	}

	/* Synchronize base register to allow using it for relative waiting */
	if (sp->base)
		synchronize_syncpt_base(job);

	host1x_hw_syncpt_assign_to_channel(host, sp, ch);

	/* add a setclass for modules that require it */
	if (job->class)
		host1x_cdma_push(&ch->cdma,
				 host1x_opcode_setclass(job->class, 0, 0),
				 HOST1X_OPCODE_NOP);

	/*
	 * Ensure engine DMA is idle and set new stream ID. May increment
	 * syncpt max.
	 */
	host1x_channel_program_engine_streamid(job);

	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
	job->syncpt_end = syncval;

	submit_gathers(job, syncval - user_syncpt_incrs);
	channel_program_cdma(job);

	/* end CDMA submit & stash pinned hMems into sync queue */
	host1x_cdma_end(&ch->cdma, job);