Commit fdc94d3a authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher
Browse files

drm/amdgpu: Rework pcie_bif ras sw_init



pcie_bif ras blocks needs to be initialized as early
as possible to handle fatal error detected in hw_init
phase. also align the pcie_bif ras sw_init with other
ras blocks

Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarStanley Yang <Stanley.Yang@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent da9d669e
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
@@ -22,6 +22,29 @@
#include "amdgpu.h"
#include "amdgpu_ras.h"

int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev)
{
	int err;
	struct amdgpu_nbio_ras *ras;

	if (!adev->nbio.ras)
		return 0;

	ras = adev->nbio.ras;
	err = amdgpu_ras_register_ras_block(adev, &ras->ras_block);
	if (err) {
		dev_err(adev->dev, "Failed to register pcie_bif ras block!\n");
		return err;
	}

	strcpy(ras->ras_block.ras_comm.name, "pcie_bif");
	ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__PCIE_BIF;
	ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
	adev->nbio.ras_if = &ras->ras_block.ras_comm;

	return 0;
}

int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
	int r;
+1 −0
Original line number Diff line number Diff line
@@ -106,5 +106,6 @@ struct amdgpu_nbio {
	struct amdgpu_nbio_ras  *ras;
};

int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
#endif
+11 −8
Original line number Diff line number Diff line
@@ -2554,21 +2554,24 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
	/* initialize nbio ras function ahead of any other
	 * ras functions so hardware fatal error interrupt
	 * can be enabled as early as possible */
	switch (adev->asic_type) {
	case CHIP_VEGA20:
	case CHIP_ARCTURUS:
	case CHIP_ALDEBARAN:
		if (!adev->gmc.xgmi.connected_to_cpu) {
	switch (adev->ip_versions[NBIO_HWIP][0]) {
	case IP_VERSION(7, 4, 0):
	case IP_VERSION(7, 4, 1):
	case IP_VERSION(7, 4, 4):
		if (!adev->gmc.xgmi.connected_to_cpu)
			adev->nbio.ras = &nbio_v7_4_ras;
			amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block);
			adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm;
		}
		break;
	default:
		/* nbio ras is not available */
		break;
	}

	/* nbio ras block needs to be enabled ahead of other ras blocks
	 * to handle fatal error */
	r = amdgpu_nbio_ras_sw_init(adev);
	if (r)
		return r;

	if (adev->nbio.ras &&
	    adev->nbio.ras->init_ras_controller_interrupt) {
		r = adev->nbio.ras->init_ras_controller_interrupt(adev);