Commit 86b74d84 authored by Dafna Hirschfeld's avatar Dafna Hirschfeld Committed by Oded Gabbay
Browse files

accel/habanalabs: assert return value of hw_fini



Since hw_fini return error code for failure indication, we should
check its return value. Currently it might only fail upon soft-reset
from hl_device_reset. Later patch will add hw_fini failure in case of
polling timeout in hard-reset.

Signed-off-by: default avatarDafna Hirschfeld <dhirschfeld@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Reviewed-by: default avatarStanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent d85f0531
Loading
Loading
Loading
Loading
+9 −3
Original line number Diff line number Diff line
@@ -1472,7 +1472,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
		schedule_hard_reset = false, delay_reset, from_dev_release, from_watchdog_thread;
	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
	struct hl_ctx *ctx;
	int i, rc;
	int i, rc, hw_fini_rc;

	if (!hdev->init_done) {
		dev_err(hdev->dev, "Can't reset before initialization is done\n");
@@ -1634,7 +1634,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
	}

	/* Reset the H/W. It will be in idle state after this returns */
	hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
	hw_fini_rc = hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);

	if (hard_reset) {
		hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;
@@ -1661,6 +1661,10 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
		hl_ctx_put(ctx);
	}

	if (hw_fini_rc) {
		rc = hw_fini_rc;
		goto out_err;
	}
	/* Finished tear-down, starting to re-initialize */

	if (hard_reset) {
@@ -2416,7 +2420,9 @@ void hl_device_fini(struct hl_device *hdev)
	hl_cb_pool_fini(hdev);

	/* Reset the H/W. It will be in idle state after this returns */
	hdev->asic_funcs->hw_fini(hdev, true, false);
	rc = hdev->asic_funcs->hw_fini(hdev, true, false);
	if (rc)
		dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc);

	hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE;

+6 −1
Original line number Diff line number Diff line
@@ -868,13 +868,18 @@ static int gaudi_early_init(struct hl_device *hdev)
	rc = hl_fw_read_preboot_status(hdev);
	if (rc) {
		if (hdev->reset_on_preboot_fail)
			/* we are already on failure flow, so don't check if hw_fini fails. */
			hdev->asic_funcs->hw_fini(hdev, true, false);
		goto pci_fini;
	}

	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
		hdev->asic_funcs->hw_fini(hdev, true, false);
		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
		if (rc) {
			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
			goto pci_fini;
		}
	}

	return 0;
+6 −1
Original line number Diff line number Diff line
@@ -2886,13 +2886,18 @@ static int gaudi2_early_init(struct hl_device *hdev)
	rc = hl_fw_read_preboot_status(hdev);
	if (rc) {
		if (hdev->reset_on_preboot_fail)
			/* we are already on failure flow, so don't check if hw_fini fails. */
			hdev->asic_funcs->hw_fini(hdev, true, false);
		goto pci_fini;
	}

	if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
		hdev->asic_funcs->hw_fini(hdev, true, false);
		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
		if (rc) {
			dev_err(hdev->dev, "failed to reset HW during early init (%d)\n", rc);
			goto pci_fini;
		}
	}

	return 0;
+6 −1
Original line number Diff line number Diff line
@@ -669,13 +669,18 @@ static int goya_early_init(struct hl_device *hdev)
	rc = hl_fw_read_preboot_status(hdev);
	if (rc) {
		if (hdev->reset_on_preboot_fail)
			/* we are already on failure flow, so don't check if hw_fini fails. */
			hdev->asic_funcs->hw_fini(hdev, true, false);
		goto pci_fini;
	}

	if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
		hdev->asic_funcs->hw_fini(hdev, true, false);
		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
		if (rc) {
			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
			goto pci_fini;
		}
	}

	if (!hdev->pldm) {