Commit aa87e317 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman
Browse files

Merge tag 'misc-habanalabs-next-2021-04-10' of...

Merge tag 'misc-habanalabs-next-2021-04-10' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

This tag contains habanalabs driver changes for v5.13:

- Add support to reset device after the user closes the file descriptor.
  Because we support a single user, we can reset the device (if needs to)
  after a user closes its file descriptor to make sure the device is in
  idle and clean state for the next user.

- Add a new feature to allow the user to wait on interrupt. This is needed
  for future ASICs

- Replace GFP_ATOMIC with GFP_KERNEL wherever possible and add code to
  support failure of allocating with GFP_ATOMIC.

- Update code to support the latest firmware image:
  - More security features are done in the firmware
  - Remove hard-coded assumptions and replace them with values that are
    sent to the firmware on loading.
  - Print device unusable error
  - Reset device in case the communication between driver and firmware
    gets out of sync.
  - Support new PCI device ids for secured GAUDI.

- Expose current power draw through the INFO IOCTL.

- Support resetting the device upon a request from the BMC (through F/W).

- Always use only a single MSI in GAUDI, due to H/W limitation.

- Improve data-path code by taking out code from spinlock protection.

- Allow user to specify custom timeout per Command Submission.

- Some enhancements to debugfs.

- Various minor changes and improvements.

* tag 'misc-habanalabs-next-2021-04-10' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (41 commits)
  habanalabs: print f/w boot unknown error
  habanalabs: update to latest F/W communication header
  habanalabs/gaudi: skip iATU if F/W security is enabled
  habanalabs/gaudi: derive security status from pci id
  habanalabs: move dram scrub to free sequence
  habanalabs: send dynamic msi-x indexes to f/w
  habanalabs/gaudi: clear QM errors only if not in stop_on_err mode
  habanalabs: support DEVICE_UNUSABLE error indication from FW
  habanalabs: use strscpy instead of sprintf and strlcpy
  habanalabs: remove the store jobs array from CS IOCTL
  habanalabs/gaudi: add debugfs to DMA from the device
  habanalabs/gaudi: sync stream add protection to SOB reset flow
  habanalabs: add custom timeout flag per cs
  habanalabs: improve utilization calculation
  habanalabs: support legacy and new pll indexes
  habanalabs: move relevant datapath work outside cs lock
  habanalabs: avoid soft lockup bug upon mapping error
  habanalabs/gaudi: Update async events header
  habanalabs/gaudi: unsecure TPC cfg status registers
  habanalabs/gaudi: always use single-msi mode
  ...
parents 19ab2339 b575a767
Loading
Loading
Loading
Loading
+54 −16
Original line number Diff line number Diff line
@@ -82,6 +82,24 @@ Description: Allows the root user to read or write 64 bit data directly
                If the IOMMU is disabled, it also allows the root user to read
                or write from the host a device VA of a host mapped memory

What:           /sys/kernel/debug/habanalabs/hl<n>/data_dma
Date:           Apr 2021
KernelVersion:  5.13
Contact:        ogabbay@kernel.org
Description:    Allows the root user to read from the device's internal
                memory (DRAM/SRAM) through a DMA engine.
                This property is a binary blob that contains the result of the
                DMA transfer.
                This custom interface is needed (instead of using the generic
                Linux user-space PCI mapping) because the amount of internal
                memory is huge (>32GB) and reading it via the PCI bar will take
                a very long time.
                This interface doesn't support concurrency in the same device.
                In GAUDI and GOYA, this action can cause undefined behavior
                in case the it is done while the device is executing user
                workloads.
                Only supported on GAUDI at this stage.

What:           /sys/kernel/debug/habanalabs/hl<n>/device
Date:           Jan 2019
KernelVersion:  5.1
@@ -90,6 +108,24 @@ Description: Enables the root user to set the device to specific state.
                Valid values are "disable", "enable", "suspend", "resume".
                User can read this property to see the valid values

What:           /sys/kernel/debug/habanalabs/hl<n>/dma_size
Date:           Apr 2021
KernelVersion:  5.13
Contact:        ogabbay@kernel.org
Description:    Specify the size of the DMA transaction when using DMA to read
                from the device's internal memory. The value can not be larger
                than 128MB. Writing to this value initiates the DMA transfer.
                When the write is finished, the user can read the "data_dma"
                blob

What:           /sys/kernel/debug/habanalabs/hl<n>/dump_security_violations
Date:           Jan 2021
KernelVersion:  5.12
Contact:        ogabbay@kernel.org
Description:    Dumps all security violations to dmesg. This will also ack
                all security violations meanings those violations will not be
                dumped next time user calls this API

What:           /sys/kernel/debug/habanalabs/hl<n>/engines
Date:           Jul 2019
KernelVersion:  5.3
@@ -154,6 +190,16 @@ Description: Displays the hop values and physical address for a given ASID
                e.g. to display info about VA 0x1000 for ASID 1 you need to do:
                echo "1 0x1000" > /sys/kernel/debug/habanalabs/hl0/mmu

What:           /sys/kernel/debug/habanalabs/hl<n>/mmu_error
Date:           Mar 2021
KernelVersion:  5.12
Contact:        fkassabri@habana.ai
Description:    Check and display page fault or access violation mmu errors for
                all MMUs specified in mmu_cap_mask.
                e.g. to display error info for MMU hw cap bit 9, you need to do:
                echo "0x200" > /sys/kernel/debug/habanalabs/hl0/mmu_error
                cat /sys/kernel/debug/habanalabs/hl0/mmu_error

What:           /sys/kernel/debug/habanalabs/hl<n>/set_power_state
Date:           Jan 2019
KernelVersion:  5.1
@@ -161,6 +207,13 @@ Contact: ogabbay@kernel.org
Description:    Sets the PCI power state. Valid values are "1" for D0 and "2"
                for D3Hot

What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date:           Mar 2020
KernelVersion:  5.6
Contact:        ogabbay@kernel.org
Description:    Sets the stop-on_error option for the device engines. Value of
                "0" is for disable, otherwise enable.

What:           /sys/kernel/debug/habanalabs/hl<n>/userptr
Date:           Jan 2019
KernelVersion:  5.1
@@ -174,19 +227,4 @@ Date: Jan 2019
KernelVersion:  5.1
Contact:        ogabbay@kernel.org
Description:    Displays a list with information about all the active virtual
                address mappings per ASID

What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date:           Mar 2020
KernelVersion:  5.6
Contact:        ogabbay@kernel.org
Description:    Sets the stop-on_error option for the device engines. Value of
                "0" is for disable, otherwise enable.

What:           /sys/kernel/debug/habanalabs/hl<n>/dump_security_violations
Date:           Jan 2021
KernelVersion:  5.12
Contact:        ogabbay@kernel.org
Description:    Dumps all security violations to dmesg. This will also ack
                all security violations meanings those violations will not be
                dumped next time user calls this API
                address mappings per ASID and all user mappings of HW blocks
+9 −3
Original line number Diff line number Diff line
@@ -181,7 +181,7 @@ static void cb_release(struct kref *ref)
static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
					int ctx_id, bool internal_cb)
{
	struct hl_cb *cb;
	struct hl_cb *cb = NULL;
	u32 cb_offset;
	void *p;

@@ -193,9 +193,10 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
	 * the kernel's copy. Hence, we must never sleep in this code section
	 * and must use GFP_ATOMIC for all memory allocations.
	 */
	if (ctx_id == HL_KERNEL_ASID_ID)
	if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled)
		cb = kzalloc(sizeof(*cb), GFP_ATOMIC);
	else

	if (!cb)
		cb = kzalloc(sizeof(*cb), GFP_KERNEL);

	if (!cb)
@@ -214,6 +215,9 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
	} else if (ctx_id == HL_KERNEL_ASID_ID) {
		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
						&cb->bus_address, GFP_ATOMIC);
		if (!p)
			p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
					cb_size, &cb->bus_address, GFP_KERNEL);
	} else {
		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
						&cb->bus_address,
@@ -310,6 +314,8 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,

	spin_lock(&mgr->cb_lock);
	rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
	if (rc < 0)
		rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL);
	spin_unlock(&mgr->cb_lock);

	if (rc < 0) {
+302 −66
Original line number Diff line number Diff line
@@ -84,31 +84,12 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
	return 0;
}

static void hl_fence_release(struct kref *kref)
static void sob_reset_work(struct work_struct *work)
{
	struct hl_fence *fence =
		container_of(kref, struct hl_fence, refcount);
	struct hl_cs_compl *hl_cs_cmpl =
		container_of(fence, struct hl_cs_compl, base_fence);
		container_of(work, struct hl_cs_compl, sob_reset_work);
	struct hl_device *hdev = hl_cs_cmpl->hdev;

	/* EBUSY means the CS was never submitted and hence we don't have
	 * an attached hw_sob object that we should handle here
	 */
	if (fence->error == -EBUSY)
		goto free;

	if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
		(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
		(hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {

		dev_dbg(hdev->dev,
			"CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
			hl_cs_cmpl->cs_seq,
			hl_cs_cmpl->type,
			hl_cs_cmpl->hw_sob->sob_id,
			hl_cs_cmpl->sob_val);

	/*
	 * A signal CS can get completion while the corresponding wait
	 * for signal CS is on its way to the PQ. The wait for signal CS
@@ -131,6 +112,38 @@ static void hl_fence_release(struct kref *kref)
	if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
		hdev->asic_funcs->reset_sob_group(hdev,
				hl_cs_cmpl->sob_group);

	kfree(hl_cs_cmpl);
}

static void hl_fence_release(struct kref *kref)
{
	struct hl_fence *fence =
		container_of(kref, struct hl_fence, refcount);
	struct hl_cs_compl *hl_cs_cmpl =
		container_of(fence, struct hl_cs_compl, base_fence);
	struct hl_device *hdev = hl_cs_cmpl->hdev;

	/* EBUSY means the CS was never submitted and hence we don't have
	 * an attached hw_sob object that we should handle here
	 */
	if (fence->error == -EBUSY)
		goto free;

	if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
		(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
		(hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {

		dev_dbg(hdev->dev,
			"CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
			hl_cs_cmpl->cs_seq,
			hl_cs_cmpl->type,
			hl_cs_cmpl->hw_sob->sob_id,
			hl_cs_cmpl->sob_val);

		queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work);

		return;
	}

free:
@@ -454,8 +467,7 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)

	if (next_entry_found && !next->tdr_active) {
		next->tdr_active = true;
		schedule_delayed_work(&next->work_tdr,
					hdev->timeout_jiffies);
		schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
	}

	spin_unlock(&hdev->cs_mirror_lock);
@@ -492,24 +504,6 @@ static void cs_do_release(struct kref *ref)
		goto out;
	}

	hdev->asic_funcs->hw_queues_lock(hdev);

	hdev->cs_active_cnt--;
	if (!hdev->cs_active_cnt) {
		struct hl_device_idle_busy_ts *ts;

		ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
		ts->busy_to_idle_ts = ktime_get();

		if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
			hdev->idle_busy_ts_idx = 0;
	} else if (hdev->cs_active_cnt < 0) {
		dev_crit(hdev->dev, "CS active cnt %d is negative\n",
			hdev->cs_active_cnt);
	}

	hdev->asic_funcs->hw_queues_unlock(hdev);

	/* Need to update CI for all queue jobs that does not get completion */
	hl_hw_queue_update_ci(cs);

@@ -620,14 +614,14 @@ static void cs_timedout(struct work_struct *work)
	cs_put(cs);

	if (hdev->reset_on_lockup)
		hl_device_reset(hdev, false, false);
		hl_device_reset(hdev, 0);
	else
		hdev->needs_reset = true;
}

static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
			enum hl_cs_type cs_type, u64 user_sequence,
			struct hl_cs **cs_new)
			struct hl_cs **cs_new, u32 flags, u32 timeout)
{
	struct hl_cs_counters_atomic *cntr;
	struct hl_fence *other = NULL;
@@ -638,6 +632,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
	cntr = &hdev->aggregated_cs_counters;

	cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
	if (!cs)
		cs = kzalloc(sizeof(*cs), GFP_KERNEL);

	if (!cs) {
		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
		atomic64_inc(&cntr->out_of_mem_drop_cnt);
@@ -651,12 +648,17 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
	cs->submitted = false;
	cs->completed = false;
	cs->type = cs_type;
	cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
	cs->timeout_jiffies = timeout;
	INIT_LIST_HEAD(&cs->job_list);
	INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
	kref_init(&cs->refcount);
	spin_lock_init(&cs->job_lock);

	cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
	if (!cs_cmpl)
		cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL);

	if (!cs_cmpl) {
		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
		atomic64_inc(&cntr->out_of_mem_drop_cnt);
@@ -664,9 +666,23 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
		goto free_cs;
	}

	cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
			sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
	if (!cs->jobs_in_queue_cnt)
		cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
				sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);

	if (!cs->jobs_in_queue_cnt) {
		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
		atomic64_inc(&cntr->out_of_mem_drop_cnt);
		rc = -ENOMEM;
		goto free_cs_cmpl;
	}

	cs_cmpl->hdev = hdev;
	cs_cmpl->type = cs->type;
	spin_lock_init(&cs_cmpl->lock);
	INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work);
	cs->fence = &cs_cmpl->base_fence;

	spin_lock(&ctx->cs_lock);
@@ -696,15 +712,6 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
		goto free_fence;
	}

	cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
			sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
	if (!cs->jobs_in_queue_cnt) {
		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
		atomic64_inc(&cntr->out_of_mem_drop_cnt);
		rc = -ENOMEM;
		goto free_fence;
	}

	/* init hl_fence */
	hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);

@@ -727,6 +734,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,

free_fence:
	spin_unlock(&ctx->cs_lock);
	kfree(cs->jobs_in_queue_cnt);
free_cs_cmpl:
	kfree(cs_cmpl);
free_cs:
	kfree(cs);
@@ -749,6 +758,8 @@ void hl_cs_rollback_all(struct hl_device *hdev)
	int i;
	struct hl_cs *cs, *tmp;

	flush_workqueue(hdev->sob_reset_wq);

	/* flush all completions before iterating over the CS mirror list in
	 * order to avoid a race with the release functions
	 */
@@ -778,6 +789,44 @@ void hl_pending_cb_list_flush(struct hl_ctx *ctx)
	}
}

static void
wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
{
	struct hl_user_pending_interrupt *pend;

	spin_lock(&interrupt->wait_list_lock);
	list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
		pend->fence.error = -EIO;
		complete_all(&pend->fence.completion);
	}
	spin_unlock(&interrupt->wait_list_lock);
}

void hl_release_pending_user_interrupts(struct hl_device *hdev)
{
	struct asic_fixed_properties *prop = &hdev->asic_prop;
	struct hl_user_interrupt *interrupt;
	int i;

	if (!prop->user_interrupt_count)
		return;

	/* We iterate through the user interrupt requests and waking up all
	 * user threads waiting for interrupt completion. We iterate the
	 * list under a lock, this is why all user threads, once awake,
	 * will wait on the same lock and will release the waiting object upon
	 * unlock.
	 */

	for (i = 0 ; i < prop->user_interrupt_count ; i++) {
		interrupt = &hdev->user_interrupt[i];
		wake_pending_user_interrupt_threads(interrupt);
	}

	interrupt = &hdev->common_user_interrupt;
	wake_pending_user_interrupt_threads(interrupt);
}

static void job_wq_completion(struct work_struct *work)
{
	struct hl_cs_job *job = container_of(work, struct hl_cs_job,
@@ -889,6 +938,9 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
	struct hl_cs_job *job;

	job = kzalloc(sizeof(*job), GFP_ATOMIC);
	if (!job)
		job = kzalloc(sizeof(*job), GFP_KERNEL);

	if (!job)
		return NULL;

@@ -991,6 +1043,9 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,

	*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
					GFP_ATOMIC);
	if (!*cs_chunk_array)
		*cs_chunk_array = kmalloc_array(num_chunks,
					sizeof(**cs_chunk_array), GFP_KERNEL);
	if (!*cs_chunk_array) {
		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
@@ -1038,7 +1093,8 @@ static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
}

static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
				u32 num_chunks, u64 *cs_seq, u32 flags)
				u32 num_chunks, u64 *cs_seq, u32 flags,
				u32 timeout)
{
	bool staged_mid, int_queues_only = true;
	struct hl_device *hdev = hpriv->hdev;
@@ -1067,11 +1123,11 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
		staged_mid = false;

	rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
			staged_mid ? user_sequence : ULLONG_MAX, &cs);
			staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
			timeout);
	if (rc)
		goto free_cs_chunk_array;

	cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
	*cs_seq = cs->sequence;

	hl_debugfs_add_cs(cs);
@@ -1269,7 +1325,8 @@ static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
		list_move_tail(&pending_cb->cb_node, &local_cb_list);
	spin_unlock(&ctx->pending_cb_lock);

	rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs);
	rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0,
				hdev->timeout_jiffies);
	if (rc)
		goto add_list_elements;

@@ -1370,7 +1427,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
			rc = 0;
		} else {
			rc = cs_ioctl_default(hpriv, chunks, num_chunks,
								cs_seq, 0);
					cs_seq, 0, hdev->timeout_jiffies);
		}

		mutex_unlock(&hpriv->restore_phase_mutex);
@@ -1419,7 +1476,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,

out:
	if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
		hl_device_reset(hdev, false, false);
		hl_device_reset(hdev, 0);

	return rc;
}
@@ -1445,6 +1502,10 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
	signal_seq_arr = kmalloc_array(signal_seq_arr_len,
					sizeof(*signal_seq_arr),
					GFP_ATOMIC);
	if (!signal_seq_arr)
		signal_seq_arr = kmalloc_array(signal_seq_arr_len,
					sizeof(*signal_seq_arr),
					GFP_KERNEL);
	if (!signal_seq_arr) {
		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
		atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt);
@@ -1536,7 +1597,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,

static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
				void __user *chunks, u32 num_chunks,
				u64 *cs_seq, bool timestamp)
				u64 *cs_seq, u32 flags, u32 timeout)
{
	struct hl_cs_chunk *cs_chunk_array, *chunk;
	struct hw_queue_properties *hw_queue_prop;
@@ -1642,7 +1703,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
		}
	}

	rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs);
	rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
	if (rc) {
		if (cs_type == CS_TYPE_WAIT ||
			cs_type == CS_TYPE_COLLECTIVE_WAIT)
@@ -1650,8 +1711,6 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
		goto free_cs_chunk_array;
	}

	cs->timestamp = !!timestamp;

	/*
	 * Save the signal CS fence for later initialization right before
	 * hanging the wait CS on the queue.
@@ -1709,7 +1768,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
	enum hl_cs_type cs_type;
	u64 cs_seq = ULONG_MAX;
	void __user *chunks;
	u32 num_chunks, flags;
	u32 num_chunks, flags, timeout;
	int rc;

	rc = hl_cs_sanity_checks(hpriv, args);
@@ -1735,16 +1794,20 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
			!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
		cs_seq = args->in.seq;

	timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
			? msecs_to_jiffies(args->in.timeout * 1000)
			: hpriv->hdev->timeout_jiffies;

	switch (cs_type) {
	case CS_TYPE_SIGNAL:
	case CS_TYPE_WAIT:
	case CS_TYPE_COLLECTIVE_WAIT:
		rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
			&cs_seq, args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP);
					&cs_seq, args->in.cs_flags, timeout);
		break;
	default:
		rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
							args->in.cs_flags);
						args->in.cs_flags, timeout);
		break;
	}

@@ -1818,7 +1881,7 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
	return rc;
}

int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{
	struct hl_device *hdev = hpriv->hdev;
	union hl_wait_cs_args *args = data;
@@ -1873,3 +1936,176 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)

	return 0;
}

static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
				u32 timeout_us, u64 user_address,
				u32 target_value, u16 interrupt_offset,
				enum hl_cs_wait_status *status)
{
	struct hl_user_pending_interrupt *pend;
	struct hl_user_interrupt *interrupt;
	unsigned long timeout;
	long completion_rc;
	u32 completion_value;
	int rc = 0;

	if (timeout_us == U32_MAX)
		timeout = timeout_us;
	else
		timeout = usecs_to_jiffies(timeout_us);

	hl_ctx_get(hdev, ctx);

	pend = kmalloc(sizeof(*pend), GFP_KERNEL);
	if (!pend) {
		hl_ctx_put(ctx);
		return -ENOMEM;
	}

	hl_fence_init(&pend->fence, ULONG_MAX);

	if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID)
		interrupt = &hdev->common_user_interrupt;
	else
		interrupt = &hdev->user_interrupt[interrupt_offset];

	spin_lock(&interrupt->wait_list_lock);
	if (!hl_device_operational(hdev, NULL)) {
		rc = -EPERM;
		goto unlock_and_free_fence;
	}

	if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
		dev_err(hdev->dev,
			"Failed to copy completion value from user\n");
		rc = -EFAULT;
		goto unlock_and_free_fence;
	}

	if (completion_value >= target_value)
		*status = CS_WAIT_STATUS_COMPLETED;
	else
		*status = CS_WAIT_STATUS_BUSY;

	if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
		goto unlock_and_free_fence;

	/* Add pending user interrupt to relevant list for the interrupt
	 * handler to monitor
	 */
	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
	spin_unlock(&interrupt->wait_list_lock);

wait_again:
	/* Wait for interrupt handler to signal completion */
	completion_rc =
		wait_for_completion_interruptible_timeout(
				&pend->fence.completion, timeout);

	/* If timeout did not expire we need to perform the comparison.
	 * If comparison fails, keep waiting until timeout expires
	 */
	if (completion_rc > 0) {
		if (copy_from_user(&completion_value,
				u64_to_user_ptr(user_address), 4)) {
			dev_err(hdev->dev,
				"Failed to copy completion value from user\n");
			rc = -EFAULT;
			goto remove_pending_user_interrupt;
		}

		if (completion_value >= target_value) {
			*status = CS_WAIT_STATUS_COMPLETED;
		} else {
			timeout -= jiffies_to_usecs(completion_rc);
			goto wait_again;
		}
	} else {
		*status = CS_WAIT_STATUS_BUSY;
	}

remove_pending_user_interrupt:
	spin_lock(&interrupt->wait_list_lock);
	list_del(&pend->wait_list_node);

unlock_and_free_fence:
	spin_unlock(&interrupt->wait_list_lock);
	kfree(pend);
	hl_ctx_put(ctx);

	return rc;
}

static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{
	u16 interrupt_id, interrupt_offset, first_interrupt, last_interrupt;
	struct hl_device *hdev = hpriv->hdev;
	struct asic_fixed_properties *prop;
	union hl_wait_cs_args *args = data;
	enum hl_cs_wait_status status;
	int rc;

	prop = &hdev->asic_prop;

	if (!prop->user_interrupt_count) {
		dev_err(hdev->dev, "no user interrupts allowed");
		return -EPERM;
	}

	interrupt_id =
		FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);

	first_interrupt = prop->first_available_user_msix_interrupt;
	last_interrupt = prop->first_available_user_msix_interrupt +
						prop->user_interrupt_count - 1;

	if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) &&
			interrupt_id != HL_COMMON_USER_INTERRUPT_ID) {
		dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id);
		return -EINVAL;
	}

	if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID)
		interrupt_offset = HL_COMMON_USER_INTERRUPT_ID;
	else
		interrupt_offset = interrupt_id - first_interrupt;

	rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
				args->in.interrupt_timeout_us, args->in.addr,
				args->in.target, interrupt_offset, &status);

	memset(args, 0, sizeof(*args));

	if (rc) {
		dev_err_ratelimited(hdev->dev,
			"interrupt_wait_ioctl failed (%d)\n", rc);

		return rc;
	}

	switch (status) {
	case CS_WAIT_STATUS_COMPLETED:
		args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
		break;
	case CS_WAIT_STATUS_BUSY:
	default:
		args->out.status = HL_WAIT_CS_STATUS_BUSY;
		break;
	}

	return 0;
}

int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{
	union hl_wait_cs_args *args = data;
	u32 flags = args->in.flags;
	int rc;

	if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
		rc = hl_interrupt_wait_ioctl(hpriv, data);
	else
		rc = hl_cs_wait_ioctl(hpriv, data);

	return rc;
}
+11 −3
Original line number Diff line number Diff line
@@ -20,6 +20,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
	 */
	hl_pending_cb_list_flush(ctx);

	/* Release all allocated HW block mapped list entries and destroy
	 * the mutex.
	 */
	hl_hw_block_mem_fini(ctx);

	/*
	 * If we arrived here, there are no jobs waiting for this context
	 * on its queues so we can safely remove it.
@@ -160,13 +165,15 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
	if (!ctx->cs_pending)
		return -ENOMEM;

	hl_hw_block_mem_init(ctx);

	if (is_kernel_ctx) {
		ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
		rc = hl_vm_ctx_init(ctx);
		if (rc) {
			dev_err(hdev->dev, "Failed to init mem ctx module\n");
			rc = -ENOMEM;
			goto err_free_cs_pending;
			goto err_hw_block_mem_fini;
		}

		rc = hdev->asic_funcs->ctx_init(ctx);
@@ -179,7 +186,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
		if (!ctx->asid) {
			dev_err(hdev->dev, "No free ASID, failed to create context\n");
			rc = -ENOMEM;
			goto err_free_cs_pending;
			goto err_hw_block_mem_fini;
		}

		rc = hl_vm_ctx_init(ctx);
@@ -214,7 +221,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
err_asid_free:
	if (ctx->asid != HL_KERNEL_ASID_ID)
		hl_asid_free(hdev, ctx->asid);
err_free_cs_pending:
err_hw_block_mem_fini:
	hl_hw_block_mem_fini(ctx);
	kfree(ctx->cs_pending);

	return rc;
+201 −23

File changed.

Preview size limit exceeded, changes collapsed.

Loading