@@ -88,6 +88,27 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
88
88
umc_v12_0_reset_error_count_per_channel , NULL );
89
89
}
90
90
91
+ static bool umc_v12_0_is_uncorrectable_error (uint64_t mc_umc_status )
92
+ {
93
+ return ((REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , Val ) == 1 ) &&
94
+ (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , Deferred ) == 1 ||
95
+ REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , PCC ) == 1 ||
96
+ REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , UC ) == 1 ||
97
+ REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , TCC ) == 1 ));
98
+ }
99
+
100
+ static bool umc_v12_0_is_correctable_error (uint64_t mc_umc_status )
101
+ {
102
+ return (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , Val ) == 1 &&
103
+ (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , CECC ) == 1 ||
104
+ (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , UECC ) == 1 &&
105
+ REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , UC ) == 0 ) ||
106
+ /* Identify data parity error in replay mode */
107
+ ((REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , ErrorCodeExt ) == 0x5 ||
108
+ REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , ErrorCodeExt ) == 0xb ) &&
109
+ !(umc_v12_0_is_uncorrectable_error (mc_umc_status )))));
110
+ }
111
+
91
112
static void umc_v12_0_query_correctable_error_count (struct amdgpu_device * adev ,
92
113
uint64_t umc_reg_offset ,
93
114
unsigned long * error_count )
@@ -104,10 +125,7 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev,
104
125
mc_umc_status =
105
126
RREG64_PCIE_EXT ((mc_umc_status_addr + umc_reg_offset ) * 4 );
106
127
107
- if (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , Val ) == 1 &&
108
- (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , CECC ) == 1 ||
109
- (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , UECC ) == 1 &&
110
- REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , UC ) == 0 )))
128
+ if (umc_v12_0_is_correctable_error (mc_umc_status ))
111
129
* error_count += 1 ;
112
130
}
113
131
@@ -125,11 +143,7 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev
125
143
mc_umc_status =
126
144
RREG64_PCIE_EXT ((mc_umc_status_addr + umc_reg_offset ) * 4 );
127
145
128
- if ((REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , Val ) == 1 ) &&
129
- (REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , Deferred ) == 1 ||
130
- REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , PCC ) == 1 ||
131
- REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , UC ) == 1 ||
132
- REG_GET_FIELD (mc_umc_status , MCA_UMC_UMC0_MCUMC_STATUST0 , TCC ) == 1 ))
146
+ if (umc_v12_0_is_uncorrectable_error (mc_umc_status ))
133
147
* error_count += 1 ;
134
148
}
135
149
0 commit comments