Commit 60e0431f authored by Oded Gabbay's avatar Oded Gabbay
Browse files

habanalabs: fix soft reset accounting



Reset upon device release is not a soft-reset from user/system point
of view. As such, we shouldn't count that reset in the statistics we
gather and expose to the monitoring applications.

We also shouldn't print soft-reset when doing the reset upon device
release.

Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent d8eb50f3
Loading
Loading
Loading
Loading
+25 −25
Original line number Diff line number Diff line
@@ -962,13 +962,13 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
 */
int hl_device_reset(struct hl_device *hdev, u32 flags)
{
	bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false;
	bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
								reset_upon_device_release = false;
	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
	int i, rc;

	if (!hdev->init_done) {
		dev_err(hdev->dev,
			"Can't reset before initialization is done\n");
		dev_err(hdev->dev, "Can't reset before initialization is done\n");
		return 0;
	}

@@ -988,6 +988,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
			return -EINVAL;
		}

		reset_upon_device_release = true;

		goto do_reset;
	}

@@ -1024,12 +1026,10 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)

		if (hard_reset)
			dev_info(hdev->dev, "Going to reset device\n");
		else if (flags & HL_DRV_RESET_DEV_RELEASE)
			dev_info(hdev->dev,
				"Going to reset device after it was released by user\n");
		else if (reset_upon_device_release)
			dev_info(hdev->dev, "Going to reset device after release by user\n");
		else
			dev_info(hdev->dev,
				"Going to reset compute engines of inference device\n");
			dev_info(hdev->dev, "Going to reset engines of inference device\n");
	}

again:
@@ -1174,16 +1174,14 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)

	rc = hdev->asic_funcs->hw_init(hdev);
	if (rc) {
		dev_err(hdev->dev,
			"failed to initialize the H/W after reset\n");
		dev_err(hdev->dev, "failed to initialize the H/W after reset\n");
		goto out_err;
	}

	/* If device is not idle fail the reset process */
	if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
			HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
		dev_err(hdev->dev,
			"device is not idle (mask 0x%llx_%llx) after reset\n",
		dev_err(hdev->dev, "device is not idle (mask 0x%llx_%llx) after reset\n",
			idle_mask[1], idle_mask[0]);
		rc = -EIO;
		goto out_err;
@@ -1192,23 +1190,20 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
	/* Check that the communication with the device is working */
	rc = hdev->asic_funcs->test_queues(hdev);
	if (rc) {
		dev_err(hdev->dev,
			"Failed to detect if device is alive after reset\n");
		dev_err(hdev->dev, "Failed to detect if device is alive after reset\n");
		goto out_err;
	}

	if (hard_reset) {
		rc = device_late_init(hdev);
		if (rc) {
			dev_err(hdev->dev,
				"Failed late init after hard reset\n");
			dev_err(hdev->dev, "Failed late init after hard reset\n");
			goto out_err;
		}

		rc = hl_vm_init(hdev);
		if (rc) {
			dev_err(hdev->dev,
				"Failed to init memory module after hard reset\n");
			dev_err(hdev->dev, "Failed to init memory module after hard reset\n");
			goto out_err;
		}

@@ -1216,8 +1211,11 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
	} else {
		rc = hdev->asic_funcs->soft_reset_late_init(hdev);
		if (rc) {
			if (reset_upon_device_release)
				dev_err(hdev->dev,
				"Failed late init after soft reset\n");
					"Failed late init in reset after device release\n");
			else
				dev_err(hdev->dev, "Failed late init after soft reset\n");
			goto out_err;
		}
	}
@@ -1236,7 +1234,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
		 * the device will be operational although it shouldn't be
		 */
		hdev->asic_funcs->enable_events_from_fw(hdev);
	} else {
	} else if (!reset_upon_device_release) {
		hdev->soft_reset_cnt++;
	}

@@ -1246,12 +1244,14 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
	hdev->disabled = true;

	if (hard_reset) {
		dev_err(hdev->dev,
			"Failed to reset! Device is NOT usable\n");
		dev_err(hdev->dev, "Failed to reset! Device is NOT usable\n");
		hdev->hard_reset_cnt++;
	} else if (reset_upon_device_release) {
		dev_err(hdev->dev, "Failed to reset device after user release\n");
		hard_reset = true;
		goto again;
	} else {
		dev_err(hdev->dev,
			"Failed to do soft-reset, trying hard reset\n");
		dev_err(hdev->dev, "Failed to do soft-reset\n");
		hdev->soft_reset_cnt++;
		hard_reset = true;
		goto again;