Commit af5e675f authored by Koby Elbaz's avatar Koby Elbaz Committed by Oded Gabbay
Browse files

accel/habanalabs: return tlb inv error code upon failure



Now that CQ-completion based jobs do not trigger a reset upon failure,
failure of such jobs (e.g., MMU cache invalidation) should be handled
by the caller itself depending on the error code returned to it.

Signed-off-by: default avatarKoby Elbaz <kelbaz@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 60d7bbb5
Loading
Loading
Loading
Loading
+15 −9
Original line number Diff line number Diff line
@@ -3725,7 +3725,7 @@ static int gaudi_mmu_init(struct hl_device *hdev)
		if (rc) {
			dev_err(hdev->dev,
				"failed to set hop0 addr for asid %d\n", i);
			goto err;
			return rc;
		}
	}

@@ -3736,7 +3736,9 @@ static int gaudi_mmu_init(struct hl_device *hdev)
	/* mem cache invalidation */
	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);

	hl_mmu_invalidate_cache(hdev, true, 0);
	rc = hl_mmu_invalidate_cache(hdev, true, 0);
	if (rc)
		return rc;

	WREG32(mmMMU_UP_MMU_ENABLE, 1);
	WREG32(mmMMU_UP_SPI_MASK, 0xF);
@@ -3752,9 +3754,6 @@ static int gaudi_mmu_init(struct hl_device *hdev)
	gaudi->hw_cap_initialized |= HW_CAP_MMU;

	return 0;

err:
	return rc;
}

static int gaudi_load_firmware_to_device(struct hl_device *hdev)
@@ -8420,19 +8419,26 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
	}

	mutex_lock(&hdev->mmu_lock);

	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
			hdev->internal_cb_pool_dma_addr,
			HOST_SPACE_INTERNAL_CB_SZ);

	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
	mutex_unlock(&hdev->mmu_lock);

	if (rc)
		goto unreserve_internal_cb_pool;

	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
	if (rc)
		goto unmap_internal_cb_pool;

	mutex_unlock(&hdev->mmu_lock);

	return 0;

unmap_internal_cb_pool:
	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
			HOST_SPACE_INTERNAL_CB_SZ);
unreserve_internal_cb_pool:
	mutex_unlock(&hdev->mmu_lock);
	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
			HOST_SPACE_INTERNAL_CB_SZ);
destroy_internal_cb_pool:
+28 −9
Original line number Diff line number Diff line
@@ -10239,16 +10239,23 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v

	/* Create mapping on asic side */
	mutex_lock(&hdev->mmu_lock);

	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
	hl_mmu_invalidate_cache_range(hdev, false,
	if (rc) {
		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
		goto unreserve_va;
	}

	rc = hl_mmu_invalidate_cache_range(hdev, false,
				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
				      ctx->asid, reserved_va_base, SZ_2M);
	mutex_unlock(&hdev->mmu_lock);
	if (rc) {
		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
		hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
		goto unreserve_va;
	}

	mutex_unlock(&hdev->mmu_lock);

	/* Enable MMU on KDMA */
	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);

@@ -10277,11 +10284,16 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v
	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);

	mutex_lock(&hdev->mmu_lock);
	hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
	hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,

	rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
	if (rc)
		goto unreserve_va;

	rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
				      ctx->asid, reserved_va_base, SZ_2M);
	mutex_unlock(&hdev->mmu_lock);

unreserve_va:
	mutex_unlock(&hdev->mmu_lock);
	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
free_data_buffer:
	hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
@@ -10334,17 +10346,24 @@ static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *c
	}

	mutex_lock(&hdev->mmu_lock);

	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
					HOST_SPACE_INTERNAL_CB_SZ);
	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
	mutex_unlock(&hdev->mmu_lock);

	if (rc)
		goto unreserve_internal_cb_pool;

	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
	if (rc)
		goto unmap_internal_cb_pool;

	mutex_unlock(&hdev->mmu_lock);

	return 0;

unmap_internal_cb_pool:
	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
unreserve_internal_cb_pool:
	mutex_unlock(&hdev->mmu_lock);
	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
destroy_internal_cb_pool:
	gen_pool_destroy(hdev->internal_cb_pool);