Skip to content

Commit 02f4f01

Browse files
skoralahdavejiang
authored andcommitted
cxl/pci: Add trace logging for CXL PCIe Port RAS errors
The CXL drivers use kernel trace functions for logging endpoint and Restricted CXL host (RCH) Downstream Port RAS errors. Similar functionality is required for CXL Root Ports, CXL Downstream Switch Ports, and CXL Upstream Switch Ports. Introduce trace logging functions for both RAS correctable and uncorrectable errors specific to CXL PCIe Ports. Use them to trace FW-First Protocol errors. Co-developed-by: Terry Bowman <terry.bowman@amd.com> Signed-off-by: Terry Bowman <terry.bowman@amd.com> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Li Ming <ming.li@zohomail.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Reviewed-by: Tony Luck <tony.luck@intel.com> Link: https://patch.msgid.link/20250310223839.31342-3-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
1 parent 36f257e commit 02f4f01

File tree

2 files changed

+84
-0
lines changed

2 files changed

+84
-0
lines changed

drivers/cxl/core/ras.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,30 @@
77
#include <cxlmem.h>
88
#include "trace.h"
99

10+
static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev,
11+
struct cxl_ras_capability_regs ras_cap)
12+
{
13+
u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
14+
15+
trace_cxl_port_aer_correctable_error(&pdev->dev, status);
16+
}
17+
18+
static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
19+
struct cxl_ras_capability_regs ras_cap)
20+
{
21+
u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
22+
u32 fe;
23+
24+
if (hweight32(status) > 1)
25+
fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
26+
ras_cap.cap_control));
27+
else
28+
fe = status;
29+
30+
trace_cxl_port_aer_uncorrectable_error(&pdev->dev, status, fe,
31+
ras_cap.header_log);
32+
}
33+
1034
static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev,
1135
struct cxl_ras_capability_regs ras_cap)
1236
{
@@ -49,12 +73,25 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
4973
pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment,
5074
data->prot_err.agent_addr.bus,
5175
devfn);
76+
int port_type;
5277

5378
if (!pdev)
5479
return;
5580

5681
guard(device)(&pdev->dev);
5782

83+
port_type = pci_pcie_type(pdev);
84+
if (port_type == PCI_EXP_TYPE_ROOT_PORT ||
85+
port_type == PCI_EXP_TYPE_DOWNSTREAM ||
86+
port_type == PCI_EXP_TYPE_UPSTREAM) {
87+
if (data->severity == AER_CORRECTABLE)
88+
cxl_cper_trace_corr_port_prot_err(pdev, data->ras_cap);
89+
else
90+
cxl_cper_trace_uncorr_port_prot_err(pdev, data->ras_cap);
91+
92+
return;
93+
}
94+
5895
if (data->severity == AER_CORRECTABLE)
5996
cxl_cper_trace_corr_prot_err(pdev, data->ras_cap);
6097
else

drivers/cxl/core/trace.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,34 @@
4848
{ CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \
4949
)
5050

51+
TRACE_EVENT(cxl_port_aer_uncorrectable_error,
52+
TP_PROTO(struct device *dev, u32 status, u32 fe, u32 *hl),
53+
TP_ARGS(dev, status, fe, hl),
54+
TP_STRUCT__entry(
55+
__string(device, dev_name(dev))
56+
__string(host, dev_name(dev->parent))
57+
__field(u32, status)
58+
__field(u32, first_error)
59+
__array(u32, header_log, CXL_HEADERLOG_SIZE_U32)
60+
),
61+
TP_fast_assign(
62+
__assign_str(device);
63+
__assign_str(host);
64+
__entry->status = status;
65+
__entry->first_error = fe;
66+
/*
67+
* Embed the 512B headerlog data for user app retrieval and
68+
* parsing, but no need to print this in the trace buffer.
69+
*/
70+
memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE);
71+
),
72+
TP_printk("device=%s host=%s status: '%s' first_error: '%s'",
73+
__get_str(device), __get_str(host),
74+
show_uc_errs(__entry->status),
75+
show_uc_errs(__entry->first_error)
76+
)
77+
);
78+
5179
TRACE_EVENT(cxl_aer_uncorrectable_error,
5280
TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl),
5381
TP_ARGS(cxlmd, status, fe, hl),
@@ -96,6 +124,25 @@ TRACE_EVENT(cxl_aer_uncorrectable_error,
96124
{ CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \
97125
)
98126

127+
TRACE_EVENT(cxl_port_aer_correctable_error,
128+
TP_PROTO(struct device *dev, u32 status),
129+
TP_ARGS(dev, status),
130+
TP_STRUCT__entry(
131+
__string(device, dev_name(dev))
132+
__string(host, dev_name(dev->parent))
133+
__field(u32, status)
134+
),
135+
TP_fast_assign(
136+
__assign_str(device);
137+
__assign_str(host);
138+
__entry->status = status;
139+
),
140+
TP_printk("device=%s host=%s status='%s'",
141+
__get_str(device), __get_str(host),
142+
show_ce_errs(__entry->status)
143+
)
144+
);
145+
99146
TRACE_EVENT(cxl_aer_correctable_error,
100147
TP_PROTO(const struct cxl_memdev *cxlmd, u32 status),
101148
TP_ARGS(cxlmd, status),

0 commit comments

Comments
 (0)