Skip to content

Commit 4c6e20e

Browse files
shijujose4davejiang
authored andcommitted
cxl/events: Update Memory Module Event Record to CXL spec rev 3.1
CXL spec 3.1 section 8.2.9.2.1.3 Table 8-47, Memory Module Event Record has updated with following new fields and new info for Device Event Type and Device Health Information fields. 1. Validity Flags 2. Component Identifier 3. Device Event Sub-Type Update the Memory Module event record and Memory Module trace event for the above spec changes. The new fields are inserted in logical places. Example trace print of cxl_memory_module trace event, cxl_memory_module: memdev=mem3 host=0000:0f:00.0 serial=3 log=Fatal : \ time=371709344709 uuid=fe927475-dd59-4339-a586-79bab113b774 len=128 \ flags='0x1' handle=2 related_handle=0 maint_op_class=0 \ maint_op_sub_class=0 : event_type='Temperature Change' \ event_sub_type='Unsupported Config Data' \ health_status='MAINTENANCE_NEEDED|REPLACEMENT_NEEDED' \ media_status='All Data Loss in Event of Power Loss' as_life_used=0x3 \ as_dev_temp=Normal as_cor_vol_err_cnt=Normal as_cor_per_err_cnt=Normal \ life_used=8 device_temp=3 dirty_shutdown_cnt=33 cor_vol_err_cnt=25 \ cor_per_err_cnt=45 validity_flags='COMPONENT|COMPONENT PLDM FORMAT' \ comp_id=03 74 c5 08 9a 1a 0b fc d2 7e 2f 31 9b 3c 81 4d \ comp_id_pldm_valid_flags='Resource ID' \ pldm_entity_id=0x00 pldm_resource_id=fc d2 7e 2f Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Shiju Jose <shiju.jose@huawei.com> Link: https://patch.msgid.link/20250111091756.1682-6-shiju.jose@huawei.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
1 parent 24ec41f commit 4c6e20e

File tree

2 files changed

+60
-11
lines changed

2 files changed

+60
-11
lines changed

drivers/cxl/core/trace.h

Lines changed: 54 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -631,35 +631,43 @@ TRACE_EVENT(cxl_dram,
631631
/*
632632
* Memory Module Event Record - MMER
633633
*
634-
* CXL res 3.0 section 8.2.9.2.1.3; Table 8-45
634+
* CXL res 3.1 section 8.2.9.2.1.3; Table 8-47
635635
*/
636636
#define CXL_MMER_HEALTH_STATUS_CHANGE 0x00
637637
#define CXL_MMER_MEDIA_STATUS_CHANGE 0x01
638638
#define CXL_MMER_LIFE_USED_CHANGE 0x02
639639
#define CXL_MMER_TEMP_CHANGE 0x03
640640
#define CXL_MMER_DATA_PATH_ERROR 0x04
641641
#define CXL_MMER_LSA_ERROR 0x05
642+
#define CXL_MMER_UNRECOV_SIDEBAND_BUS_ERROR 0x06
643+
#define CXL_MMER_MEMORY_MEDIA_FRU_ERROR 0x07
644+
#define CXL_MMER_POWER_MANAGEMENT_FAULT 0x08
642645
#define show_dev_evt_type(type) __print_symbolic(type, \
643646
{ CXL_MMER_HEALTH_STATUS_CHANGE, "Health Status Change" }, \
644647
{ CXL_MMER_MEDIA_STATUS_CHANGE, "Media Status Change" }, \
645648
{ CXL_MMER_LIFE_USED_CHANGE, "Life Used Change" }, \
646649
{ CXL_MMER_TEMP_CHANGE, "Temperature Change" }, \
647650
{ CXL_MMER_DATA_PATH_ERROR, "Data Path Error" }, \
648-
{ CXL_MMER_LSA_ERROR, "LSA Error" } \
651+
{ CXL_MMER_LSA_ERROR, "LSA Error" }, \
652+
{ CXL_MMER_UNRECOV_SIDEBAND_BUS_ERROR, "Unrecoverable Internal Sideband Bus Error" }, \
653+
{ CXL_MMER_MEMORY_MEDIA_FRU_ERROR, "Memory Media FRU Error" }, \
654+
{ CXL_MMER_POWER_MANAGEMENT_FAULT, "Power Management Fault" } \
649655
)
650656

651657
/*
652658
* Device Health Information - DHI
653659
*
654-
* CXL res 3.0 section 8.2.9.8.3.1; Table 8-100
660+
* CXL res 3.1 section 8.2.9.9.3.1; Table 8-133
655661
*/
656662
#define CXL_DHI_HS_MAINTENANCE_NEEDED BIT(0)
657663
#define CXL_DHI_HS_PERFORMANCE_DEGRADED BIT(1)
658664
#define CXL_DHI_HS_HW_REPLACEMENT_NEEDED BIT(2)
665+
#define CXL_DHI_HS_MEM_CAPACITY_DEGRADED BIT(3)
659666
#define show_health_status_flags(flags) __print_flags(flags, "|", \
660667
{ CXL_DHI_HS_MAINTENANCE_NEEDED, "MAINTENANCE_NEEDED" }, \
661668
{ CXL_DHI_HS_PERFORMANCE_DEGRADED, "PERFORMANCE_DEGRADED" }, \
662-
{ CXL_DHI_HS_HW_REPLACEMENT_NEEDED, "REPLACEMENT_NEEDED" } \
669+
{ CXL_DHI_HS_HW_REPLACEMENT_NEEDED, "REPLACEMENT_NEEDED" }, \
670+
{ CXL_DHI_HS_MEM_CAPACITY_DEGRADED, "MEM_CAPACITY_DEGRADED" } \
663671
)
664672

665673
#define CXL_DHI_MS_NORMAL 0x00
@@ -713,6 +721,26 @@ TRACE_EVENT(cxl_dram,
713721
#define CXL_DHI_AS_COR_VOL_ERR_CNT(as) ((as & 0x10) >> 4)
714722
#define CXL_DHI_AS_COR_PER_ERR_CNT(as) ((as & 0x20) >> 5)
715723

724+
#define CXL_MMER_VALID_COMPONENT BIT(0)
725+
#define CXL_MMER_VALID_COMPONENT_ID_FORMAT BIT(1)
726+
#define show_mem_module_valid_flags(flags) __print_flags(flags, "|", \
727+
{ CXL_MMER_VALID_COMPONENT, "COMPONENT" }, \
728+
{ CXL_MMER_VALID_COMPONENT_ID_FORMAT, "COMPONENT PLDM FORMAT" } \
729+
)
730+
#define CXL_MMER_DEV_EVT_SUB_TYPE_NOT_REPORTED 0x00
731+
#define CXL_MMER_DEV_EVT_SUB_TYPE_INVALID_CONFIG_DATA 0x01
732+
#define CXL_MMER_DEV_EVT_SUB_TYPE_UNSUPP_CONFIG_DATA 0x02
733+
#define CXL_MMER_DEV_EVT_SUB_TYPE_UNSUPP_MEM_MEDIA_FRU 0x03
734+
#define show_dev_event_sub_type(sub_type) __print_symbolic(sub_type, \
735+
{ CXL_MMER_DEV_EVT_SUB_TYPE_NOT_REPORTED, "Not Reported" }, \
736+
{ CXL_MMER_DEV_EVT_SUB_TYPE_INVALID_CONFIG_DATA, "Invalid Config Data" }, \
737+
{ CXL_MMER_DEV_EVT_SUB_TYPE_UNSUPP_CONFIG_DATA, "Unsupported Config Data" }, \
738+
{ \
739+
CXL_MMER_DEV_EVT_SUB_TYPE_UNSUPP_MEM_MEDIA_FRU, \
740+
"Unsupported Memory Media FRU" \
741+
} \
742+
)
743+
716744
TRACE_EVENT(cxl_memory_module,
717745

718746
TP_PROTO(const struct cxl_memdev *cxlmd, enum cxl_event_log_type log,
@@ -735,6 +763,9 @@ TRACE_EVENT(cxl_memory_module,
735763
__field(u32, cor_per_err_cnt)
736764
__field(s16, device_temp)
737765
__field(u8, add_status)
766+
__field(u8, event_sub_type)
767+
__array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
768+
__field(u16, validity_flags)
738769
),
739770

740771
TP_fast_assign(
@@ -743,6 +774,7 @@ TRACE_EVENT(cxl_memory_module,
743774

744775
/* Memory Module Event */
745776
__entry->event_type = rec->event_type;
777+
__entry->event_sub_type = rec->event_sub_type;
746778

747779
/* Device Health Info */
748780
__entry->health_status = rec->info.health_status;
@@ -753,13 +785,20 @@ TRACE_EVENT(cxl_memory_module,
753785
__entry->cor_per_err_cnt = get_unaligned_le32(rec->info.cor_per_err_cnt);
754786
__entry->device_temp = get_unaligned_le16(rec->info.device_temp);
755787
__entry->add_status = rec->info.add_status;
788+
__entry->validity_flags = get_unaligned_le16(rec->validity_flags);
789+
memcpy(__entry->comp_id, &rec->component_id,
790+
CXL_EVENT_GEN_MED_COMP_ID_SIZE);
756791
),
757792

758-
CXL_EVT_TP_printk("event_type='%s' health_status='%s' media_status='%s' " \
759-
"as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \
793+
CXL_EVT_TP_printk("event_type='%s' event_sub_type='%s' health_status='%s' " \
794+
"media_status='%s' as_life_used=%s as_dev_temp=%s as_cor_vol_err_cnt=%s " \
760795
"as_cor_per_err_cnt=%s life_used=%u device_temp=%d " \
761-
"dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u",
796+
"dirty_shutdown_cnt=%u cor_vol_err_cnt=%u cor_per_err_cnt=%u " \
797+
"validity_flags='%s' " \
798+
"comp_id=%s comp_id_pldm_valid_flags='%s' " \
799+
"pldm_entity_id=%s pldm_resource_id=%s",
762800
show_dev_evt_type(__entry->event_type),
801+
show_dev_event_sub_type(__entry->event_sub_type),
763802
show_health_status_flags(__entry->health_status),
764803
show_media_status(__entry->media_status),
765804
show_two_bit_status(CXL_DHI_AS_LIFE_USED(__entry->add_status)),
@@ -768,7 +807,14 @@ TRACE_EVENT(cxl_memory_module,
768807
show_one_bit_status(CXL_DHI_AS_COR_PER_ERR_CNT(__entry->add_status)),
769808
__entry->life_used, __entry->device_temp,
770809
__entry->dirty_shutdown_cnt, __entry->cor_vol_err_cnt,
771-
__entry->cor_per_err_cnt
810+
__entry->cor_per_err_cnt,
811+
show_mem_module_valid_flags(__entry->validity_flags),
812+
__print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
813+
show_comp_id_pldm_flags(__entry->comp_id[0]),
814+
show_pldm_entity_id(__entry->validity_flags, CXL_MMER_VALID_COMPONENT,
815+
CXL_MMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id),
816+
show_pldm_resource_id(__entry->validity_flags, CXL_MMER_VALID_COMPONENT,
817+
CXL_MMER_VALID_COMPONENT_ID_FORMAT, __entry->comp_id)
772818
)
773819
);
774820

include/cxl/event.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ struct cxl_event_dram {
8181

8282
/*
8383
* Get Health Info Record
84-
* CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100
84+
* CXL rev 3.1 section 8.2.9.9.3.1; Table 8-133
8585
*/
8686
struct cxl_get_health_info {
8787
u8 health_status;
@@ -96,13 +96,16 @@ struct cxl_get_health_info {
9696

9797
/*
9898
* Memory Module Event Record
99-
* CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
99+
* CXL rev 3.1 section 8.2.9.2.1.3; Table 8-47
100100
*/
101101
struct cxl_event_mem_module {
102102
struct cxl_event_record_hdr hdr;
103103
u8 event_type;
104104
struct cxl_get_health_info info;
105-
u8 reserved[0x3d];
105+
u8 validity_flags[2];
106+
u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
107+
u8 event_sub_type;
108+
u8 reserved[0x2a];
106109
} __packed;
107110

108111
union cxl_event {

0 commit comments

Comments
 (0)