51
51
#include <linux/edac.h>
52
52
53
53
#include <linux/io-64-nonatomic-lo-hi.h>
54
+ #include <asm/mce.h>
54
55
#include "edac_module.h"
55
56
56
57
#define EDAC_MOD_STR "ie31200_edac"
@@ -123,6 +124,7 @@ static int ie31200_registered = 1;
123
124
124
125
struct res_config {
125
126
enum mem_type mtype ;
127
+ bool cmci ;
126
128
int imc_num ;
127
129
/* Host MMIO configuration register */
128
130
u64 reg_mchbar_mask ;
@@ -172,6 +174,7 @@ struct ie31200_error_info {
172
174
u16 errsts ;
173
175
u16 errsts2 ;
174
176
u64 eccerrlog [IE31200_CHANNELS ];
177
+ u64 erraddr ;
175
178
};
176
179
177
180
static const struct ie31200_dev_info ie31200_devs [] = {
@@ -327,13 +330,13 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci,
327
330
log = info -> eccerrlog [channel ];
328
331
if (log & cfg -> reg_eccerrlog_ue_mask ) {
329
332
edac_mc_handle_error (HW_EVENT_ERR_UNCORRECTED , mci , 1 ,
330
- 0 , 0 , 0 ,
333
+ info -> erraddr >> PAGE_SHIFT , 0 , 0 ,
331
334
field_get (cfg -> reg_eccerrlog_rank_mask , log ),
332
335
channel , -1 ,
333
336
"ie31200 UE" , "" );
334
337
} else if (log & cfg -> reg_eccerrlog_ce_mask ) {
335
338
edac_mc_handle_error (HW_EVENT_ERR_CORRECTED , mci , 1 ,
336
- 0 , 0 ,
339
+ info -> erraddr >> PAGE_SHIFT , 0 ,
337
340
field_get (cfg -> reg_eccerrlog_syndrome_mask , log ),
338
341
field_get (cfg -> reg_eccerrlog_rank_mask , log ),
339
342
channel , -1 ,
@@ -342,14 +345,20 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci,
342
345
}
343
346
}
344
347
345
- static void ie31200_check (struct mem_ctl_info * mci )
348
+ static void __ie31200_check (struct mem_ctl_info * mci , struct mce * mce )
346
349
{
347
350
struct ie31200_error_info info ;
348
351
352
+ info .erraddr = mce ? mce -> addr : 0 ;
349
353
ie31200_get_and_clear_error_info (mci , & info );
350
354
ie31200_process_error_info (mci , & info );
351
355
}
352
356
357
+ static void ie31200_check (struct mem_ctl_info * mci )
358
+ {
359
+ __ie31200_check (mci , NULL );
360
+ }
361
+
353
362
static void __iomem * ie31200_map_mchbar (struct pci_dev * pdev , struct res_config * cfg , int mc )
354
363
{
355
364
union {
@@ -459,7 +468,7 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, in
459
468
mci -> mod_name = EDAC_MOD_STR ;
460
469
mci -> ctl_name = ie31200_devs [mc ].ctl_name ;
461
470
mci -> dev_name = pci_name (pdev );
462
- mci -> edac_check = ie31200_check ;
471
+ mci -> edac_check = cfg -> cmci ? NULL : ie31200_check ;
463
472
mci -> ctl_page_to_phys = NULL ;
464
473
priv = mci -> pvt_info ;
465
474
priv -> window = window ;
@@ -499,6 +508,58 @@ static int ie31200_register_mci(struct pci_dev *pdev, struct res_config *cfg, in
499
508
return ret ;
500
509
}
501
510
511
+ static void mce_check (struct mce * mce )
512
+ {
513
+ struct ie31200_priv * priv ;
514
+ int i ;
515
+
516
+ for (i = 0 ; i < IE31200_IMC_NUM ; i ++ ) {
517
+ priv = ie31200_pvt .priv [i ];
518
+ if (!priv )
519
+ continue ;
520
+
521
+ __ie31200_check (priv -> mci , mce );
522
+ }
523
+ }
524
+
525
+ static int mce_handler (struct notifier_block * nb , unsigned long val , void * data )
526
+ {
527
+ struct mce * mce = (struct mce * )data ;
528
+ char * type ;
529
+
530
+ if (mce -> kflags & MCE_HANDLED_CEC )
531
+ return NOTIFY_DONE ;
532
+
533
+ /*
534
+ * Ignore unless this is a memory related error.
535
+ * Don't check MCI_STATUS_ADDRV since it's not set on some CPUs.
536
+ */
537
+ if ((mce -> status & 0xefff ) >> 7 != 1 )
538
+ return NOTIFY_DONE ;
539
+
540
+ type = mce -> mcgstatus & MCG_STATUS_MCIP ? "Exception" : "Event" ;
541
+
542
+ edac_dbg (0 , "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n" ,
543
+ mce -> extcpu , type , mce -> mcgstatus ,
544
+ mce -> bank , mce -> status );
545
+ edac_dbg (0 , "TSC 0x%llx\n" , mce -> tsc );
546
+ edac_dbg (0 , "ADDR 0x%llx\n" , mce -> addr );
547
+ edac_dbg (0 , "MISC 0x%llx\n" , mce -> misc );
548
+ edac_dbg (0 , "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n" ,
549
+ mce -> cpuvendor , mce -> cpuid , mce -> time ,
550
+ mce -> socketid , mce -> apicid );
551
+
552
+ mce_check (mce );
553
+ mce -> kflags |= MCE_HANDLED_EDAC ;
554
+
555
+ return NOTIFY_DONE ;
556
+ }
557
+
558
+ static struct notifier_block ie31200_mce_dec = {
559
+ .notifier_call = mce_handler ,
560
+ .priority = MCE_PRIO_EDAC ,
561
+ };
562
+
502
563
static void ie31200_unregister_mcis (void )
503
564
{
504
565
struct ie31200_priv * priv ;
@@ -534,6 +595,13 @@ static int ie31200_probe1(struct pci_dev *pdev, struct res_config *cfg)
534
595
goto fail_register ;
535
596
}
536
597
598
+ if (cfg -> cmci ) {
599
+ mce_register_decode_chain (& ie31200_mce_dec );
600
+ edac_op_state = EDAC_OPSTATE_INT ;
601
+ } else {
602
+ edac_op_state = EDAC_OPSTATE_POLL ;
603
+ }
604
+
537
605
/* get this far and it's successful. */
538
606
edac_dbg (3 , "MC: success\n" );
539
607
return 0 ;
@@ -560,9 +628,13 @@ static int ie31200_init_one(struct pci_dev *pdev,
560
628
561
629
static void ie31200_remove_one (struct pci_dev * pdev )
562
630
{
631
+ struct ie31200_priv * priv = ie31200_pvt .priv [0 ];
632
+
563
633
edac_dbg (0 , "\n" );
564
634
pci_dev_put (mci_pdev );
565
635
mci_pdev = NULL ;
636
+ if (priv -> cfg -> cmci )
637
+ mce_unregister_decode_chain (& ie31200_mce_dec );
566
638
ie31200_unregister_mcis ();
567
639
}
568
640
@@ -612,6 +684,7 @@ static struct res_config skl_cfg = {
612
684
613
685
struct res_config rpl_s_cfg = {
614
686
.mtype = MEM_DDR5 ,
687
+ .cmci = true,
615
688
.imc_num = 2 ,
616
689
.reg_mchbar_mask = GENMASK_ULL (41 , 17 ),
617
690
.reg_mchbar_window_size = BIT_ULL (16 ),
@@ -677,8 +750,6 @@ static int __init ie31200_init(void)
677
750
int pci_rc , i ;
678
751
679
752
edac_dbg (3 , "MC:\n" );
680
- /* Ensure that the OPSTATE is set correctly for POLL or NMI */
681
- opstate_init ();
682
753
683
754
pci_rc = pci_register_driver (& ie31200_driver );
684
755
if (pci_rc < 0 )
0 commit comments