Commit b0222053 authored by Ohad Sharabi's avatar Ohad Sharabi Committed by Oded Gabbay
Browse files

habanalabs: wait again for multi-CS if no CS completed



The original multi-CS design assumption that stream masters are used
exclusively (i.e. multi-CS with set of stream master QIDs will not get
completed by CS not from the multi-CS set) is inaccurate.

Thus multi-CS behavior is now modified not to treat such case as an
error.

Instead, if we have multi-CS completion but we detect that no CS from
the list is actually completed we will do another multi-CS wait (with
modified timeout).

Signed-off-by: default avatarOhad Sharabi <osharabi@habana.ai>
Reviewed-by: default avatarDani Liberman <dliberman@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 5b90e59d
Loading
Loading
Loading
Loading
+48 −49
Original line number Diff line number Diff line
@@ -545,13 +545,6 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
			 * mcs fences.
			 */
			fence->mcs_handling_done = true;
			/*
			 * Since CS (and its related fence) can be associated with only one
			 * multi CS context, once it triggered multi CS completion no need to
			 * continue checking other multi CS contexts.
			 */
			spin_unlock(&mcs_compl->lock);
			break;
		}

		spin_unlock(&mcs_compl->lock);
@@ -2498,6 +2491,21 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
	return rc;
}

static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
{
	if (usecs <= U32_MAX)
		return usecs_to_jiffies(usecs);

	/*
	 * If the value in nanoseconds is larger than 64 bit, use the largest
	 * 64 bit value.
	 */
	if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
		return nsecs_to_jiffies(U64_MAX);

	return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
}

/*
 * hl_wait_multi_cs_completion_init - init completion structure
 *
@@ -2534,8 +2542,7 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
	}

	if (i == MULTI_CS_MAX_USER_CTX) {
		dev_err(hdev->dev,
				"no available multi-CS completion structure\n");
		dev_err(hdev->dev, "no available multi-CS completion structure\n");
		return ERR_PTR(-ENOMEM);
	}
	return mcs_compl;
@@ -2566,27 +2573,18 @@ static void hl_wait_multi_cs_completion_fini(
 *
 * @return 0 on success, otherwise non 0 error code
 */
static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data)
static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
						struct multi_cs_completion *mcs_compl)
{
	struct hl_device *hdev = mcs_data->ctx->hdev;
	struct multi_cs_completion *mcs_compl;
	long completion_rc;

	mcs_compl = hl_wait_multi_cs_completion_init(hdev,
					mcs_data->stream_master_qid_map);
	if (IS_ERR(mcs_compl))
		return PTR_ERR(mcs_compl);

	completion_rc = wait_for_completion_interruptible_timeout(
					&mcs_compl->completion,
					usecs_to_jiffies(mcs_data->timeout_us));
	completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
									mcs_data->timeout_jiffies);

	/* update timestamp */
	if (completion_rc > 0)
		mcs_data->timestamp = mcs_compl->timestamp;

	hl_wait_multi_cs_completion_fini(mcs_compl);

	mcs_data->wait_status = completion_rc;

	return 0;
@@ -2619,6 +2617,7 @@ void hl_multi_cs_completion_init(struct hl_device *hdev)
 */
static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{
	struct multi_cs_completion *mcs_compl;
	struct hl_device *hdev = hpriv->hdev;
	struct multi_cs_data mcs_data = {0};
	union hl_wait_cs_args *args = data;
@@ -2686,12 +2685,19 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
		goto put_ctx;

	/* wait (with timeout) for the first CS to be completed */
	mcs_data.timeout_us = args->in.timeout_us;
	rc = hl_wait_multi_cs_completion(&mcs_data);
	if (rc)
	mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);

	mcs_compl = hl_wait_multi_cs_completion_init(hdev, mcs_data.stream_master_qid_map);
	if (IS_ERR(mcs_compl)) {
		rc = PTR_ERR(mcs_compl);
		goto put_ctx;
	}

	while (true) {
		rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
		if (rc || (mcs_data.wait_status == 0))
			break;

	if (mcs_data.wait_status > 0) {
		/*
		 * poll fences once again to update the CS map.
		 * no timestamp should be updated this time.
@@ -2699,18 +2705,26 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
		mcs_data.update_ts = false;
		rc = hl_cs_poll_fences(&mcs_data);

		if (mcs_data.completion_bitmap)
			break;

		/*
		 * if hl_wait_multi_cs_completion returned before timeout (i.e.
		 * it got a completion) we expect to see at least one CS
		 * completed after the poll function.
		 */
		if (!mcs_data.completion_bitmap) {
			dev_warn_ratelimited(hdev->dev,
				"Multi-CS got completion on wait but no CS completed\n");
			rc = -EFAULT;
		}
		 * it got a completion) it either got completed by CS in the multi CS list
		 * (in which case the indication will be non empty completion_bitmap) or it
		 * got completed by CS submitted to one of the shared stream master but
		 * not in the multi CS list (in which case we should wait again but reinit
		 * the completion, modify the timeout and set timestamp as zero to let a CS
		 * related to the current multi-CS set a new, relevant, timestamp)
		 */
		/* wait again with modified timeout */
		mcs_data.timeout_jiffies = mcs_data.wait_status;
		reinit_completion(&mcs_compl->completion);
		mcs_compl->timestamp = 0;
	}

	hl_wait_multi_cs_completion_fini(mcs_compl);

put_ctx:
	hl_ctx_put(ctx);
	kfree(fence_arr);
@@ -2741,7 +2755,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
		}

		/* update if some CS was gone */
		if (mcs_data.timestamp)
		if (!mcs_data.timestamp)
			args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
	} else {
		args->out.status = HL_WAIT_CS_STATUS_BUSY;
@@ -2807,21 +2821,6 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
	return 0;
}

static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
{
	if (usecs <= U32_MAX)
		return usecs_to_jiffies(usecs);

	/*
	 * If the value in nanoseconds is larger than 64 bit, use the largest
	 * 64 bit value.
	 */
	if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
		return nsecs_to_jiffies(U64_MAX);

	return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
}

static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
				u64 timeout_us, u64 user_address,
				u64 target_value, struct hl_user_interrupt *interrupt,
+2 −2
Original line number Diff line number Diff line
@@ -2362,7 +2362,7 @@ struct multi_cs_completion {
 * @ctx: pointer to the context structure
 * @fence_arr: array of fences of all CSs
 * @seq_arr: array of CS sequence numbers
 * @timeout_us: timeout in usec for waiting for CS to complete
 * @timeout_jiffies: timeout in jiffies for waiting for CS to complete
 * @timestamp: timestamp of first completed CS
 * @wait_status: wait for CS status
 * @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
@@ -2376,7 +2376,7 @@ struct multi_cs_data {
	struct hl_ctx	*ctx;
	struct hl_fence	**fence_arr;
	u64		*seq_arr;
	s64		timeout_us;
	s64		timeout_jiffies;
	s64		timestamp;
	long		wait_status;
	u32		completion_bitmap;