Skip to content

Commit d59fcfb

Browse files
candicelicyalexdeucher
authored andcommitted
drm/amdgpu: Identify data parity error corrected in replay mode
Use ErrorCodeExt field to identify data parity error in replay mode. Signed-off-by: Candice Li <candice.li@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Yang Wang <kevinyang.wang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent f7a17b2 commit d59fcfb

File tree

1 file changed

+23
-9
lines changed

1 file changed

+23
-9
lines changed

drivers/gpu/drm/amd/amdgpu/umc_v12_0.c

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,27 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
8888
umc_v12_0_reset_error_count_per_channel, NULL);
8989
}
9090

91+
static bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status)
92+
{
93+
return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
94+
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
95+
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
96+
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
97+
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1));
98+
}
99+
100+
static bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status)
101+
{
102+
return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
103+
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
104+
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 &&
105+
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 0) ||
106+
/* Identify data parity error in replay mode */
107+
((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 ||
108+
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) &&
109+
!(umc_v12_0_is_uncorrectable_error(mc_umc_status)))));
110+
}
111+
91112
static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
92113
uint64_t umc_reg_offset,
93114
unsigned long *error_count)
@@ -104,10 +125,7 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
104125
mc_umc_status =
105126
RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
106127

107-
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
108-
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 ||
109-
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 &&
110-
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 0)))
128+
if (umc_v12_0_is_correctable_error(mc_umc_status))
111129
*error_count += 1;
112130
}
113131

@@ -125,11 +143,7 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev
125143
mc_umc_status =
126144
RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4);
127145

128-
if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
129-
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
130-
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
131-
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
132-
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
146+
if (umc_v12_0_is_uncorrectable_error(mc_umc_status))
133147
*error_count += 1;
134148
}
135149

0 commit comments

Comments
 (0)