Replies: 1 comment 1 reply
-
was this fixed in #11 ? |
Beta Was this translation helpful? Give feedback.
1 reply
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
We had a device where the lmt feature would not work after the first reading.
This boiled down to missing a "go to normal settings " before assessing the LMT capabilities of an LMT capable device.
Most devices where LMT was tested did not seem to need this, but one switch did.
Below are the steps to replicate this using 1.2.0 version and the proposed fix.
We should also implement the attempt of going back to normal settings after the assessment of the margin.
HOST=HOSTNAME
git clone https://github.com/opencomputeproject/ocp-diag-pci_lmt.git
#cd ocp-diag-pci_lmt; git checkout main; cd ..
scp -r ocp-diag-pci_lmt root@$HOST:
/./ocp-diag-pci_lmt/configs/test.JSONecho """{
"platform_name": "FB_SWITCH_SWITCH",
"lmt_groups": [
{
"name": "SWITCH_RX",
"receiver_number": 1,
"bdf_list": [
"0000:e2:04.0",
"0000:c2:03.0"
],
"margin_type": "TIMING",
"margin_direction": "left",
"margin_steps": [
0,2
]
}
]
}""" > test.JSON
scp test.JSON root@$HOST:
ssh root@$HOST
Currently we are testing with python3.9
dnf -y install python39
python3.9 -m ocp-diag-pci_lmt.src.pci_lmt_bin.main ./ocp-diag-pci_lmt/configs/test.JSON -o csv
[root@twshared30621.01.cln6 ~]# python3.9 -m ocp-diag-pci_lmt.src.pci_lmt_bin.main ./ocp-diag-pci_lmt/configs/test.JSON -o csv
test_info.run_id,test_info.timestamp,test_info.host_id,test_info.hostname,test_info.model_name,test_info.dwell_time_secs,test_info.elapsed_time_secs,test_info.error_count_limit,test_info.test_version,test_info.annotation,device_info.bdf,device_info.speed,device_info.width,device_info.lmt_capable,device_info.ind_error_sampler,device_info.sample_reporting_method,device_info.ind_left_right_timing,device_info.ind_up_down_voltage,device_info.voltage_supported,device_info.num_voltage_steps,device_info.num_timing_steps,device_info.max_timing_offset,device_info.max_voltage_offset,device_info.sampling_rate_voltage,device_info.sampling_rate_timing,device_info.max_lanes,device_info.reserved,lane,receiver_number,margin_type,step,sample_count,sample_count_bits,error_count,ber,error,error_msg
43716534661591451691390801274462684509,1696645718,,twshared30621.01.cln6.facebook.com,,5,5.003610610961914,63,1.2.0,SWITCH_RX,0000:e2:04.0,32GT/s,16,False,False,0,False,False,False,0,0,0,0,0,0,0,0,0,1,timing_left,0,-1,-1,-1,-1.0,True,ERROR: FetchMarginControlCapabilities - Timedout
43716534661591451691390801274462684509,1696645718,,twshared30621.01.cln6.facebook.com,,5,5.003610610961914,63,1.2.0,SWITCH_RX,0000:e2:04.0,32GT/s,16,False,False,0,False,False,False,0,0,0,0,0,0,0,0,1,1,timing_left,0,-1,-1,-1,-1.0,True,ERROR: FetchMarginControlCapabilities - Timedout
...
1031540232177008814616933348871881381525,1696645724,,twshared30621.01.cln6.facebook.com,,5,5.005248069763184,63,1.2.0,SWITCH_RX,0000:c2:03.0,32GT/s,16,False,False,0,False,False,False,0,0,0,0,0,0,0,0,12,1,timing_left,2,-1,-1,-1,-1.0,True,ERROR: FetchMarginControlCapabilities - Timedout
1031540232177008814616933348871881381525,1696645724,,twshared30621.01.cln6.facebook.com,,5,5.005248069763184,63,1.2.0,SWITCH_RX,0000:c2:03.0,32GT/s,16,False,False,0,False,False,False,0,0,0,0,0,0,0,0,13,1,timing_left,2,-1,-1,-1,-1.0,True,ERROR: FetchMarginControlCapabilities - Timedout
1031540232177008814616933348871881381525,1696645724,,twshared30621.01.cln6.facebook.com,,5,5.005248069763184,63,1.2.0,SWITCH_RX,0000:c2:03.0,32GT/s,16,False,False,0,False,False,False,0,0,0,0,0,0,0,0,14,1,timing_left,2,-1,-1,-1,-1.0,True,ERROR: FetchMarginControlCapabilities - Timedout
1031540232177008814616933348871881381525,1696645724,,twshared30621.01.cln6.facebook.com,,5,5.005248069763184,63,1.2.0,SWITCH_RX,0000:c2:03.0,32GT/s,16,False,False,0,False,False,False,0,0,0,0,0,0,0,0,15,1,timing_left,2,-1,-1,-1,-1.0,True,ERROR: FetchMarginControlCapabilities - Timedout
vi ./ocp-diag-pci_lmt/src/pci_lmt/collector.py
def info_lane_margin_on_device_list(self):
for dev in self.devices:
for lane in [0]:
*** ADD: *** ret = dev.goto_normal_settings(lane=lane, receiver_number=self.receiver_number)
ret = dev.fetch_margin_control_capabilities(lane=lane, receiver_number=self.receiver_number)
if ret["error"] is None:
dev.primed = True
logger.info(
"Device %s ReceiverNum %d PRIMED: %s",
dev.device_info.bdf,
self.receiver_number,
dev.device_info,
)
continue
[root@twshared30621.01.cln6 ~]# python3.9 -m ocp-diag-pci_lmt.src.pci_lmt_bin.main ./ocp-diag-pci_lmt/configs/test.JSON -o csv
test_info.run_id,test_info.timestamp,test_info.host_id,test_info.hostname,test_info.model_name,test_info.dwell_time_secs,test_info.elapsed_time_secs,test_info.error_count_limit,test_info.test_version,test_info.annotation,device_info.bdf,device_info.speed,device_info.width,device_info.lmt_capable,device_info.ind_error_sampler,device_info.sample_reporting_method,device_info.ind_left_right_timing,device_info.ind_up_down_voltage,device_info.voltage_supported,device_info.num_voltage_steps,device_info.num_timing_steps,device_info.max_timing_offset,device_info.max_voltage_offset,device_info.sampling_rate_voltage,device_info.sampling_rate_timing,device_info.max_lanes,device_info.reserved,lane,receiver_number,margin_type,step,sample_count,sample_count_bits,error_count,ber,error,error_msg
38654274535780517444756117265885691932,1696646554,,twshared30621.01.cln6.facebook.com,,5,5.750913143157959,63,1.2.0,SWITCH_RX,0000:e2:04.0,32GT/s,16,True,1,0,1,1,1,127,31,32,49,0,0,15,0,0,1,timing_left,0,79,84551870,0,0.0,False,
38654274535780517444756117265885691932,1696646554,,twshared30621.01.cln6.facebook.com,,5,5.750913143157959,63,1.2.0,SWITCH_RX,0000:e2:04.0,32GT/s,16,True,1,0,1,1,1,127,31,32,49,0,0,15,0,1,1,timing_left,0,79,84551870,0,0.0,False,
...
897017214416983682810574165152126079142,1696646562,,twshared30621.01.cln6.facebook.com,,5,5.719206809997559,63,1.2.0,SWITCH_RX,0000:c2:03.0,32GT/s,16,True,1,0,1,1,1,127,31,32,49,0,0,15,0,12,1,timing_left,2,79,84551870,0,0.0,False,
897017214416983682810574165152126079142,1696646562,,twshared30621.01.cln6.facebook.com,,5,5.719206809997559,63,1.2.0,SWITCH_RX,0000:c2:03.0,32GT/s,16,True,1,0,1,1,1,127,31,32,49,0,0,15,0,13,1,timing_left,2,79,84551870,0,0.0,False,
897017214416983682810574165152126079142,1696646562,,twshared30621.01.cln6.facebook.com,,5,5.719206809997559,63,1.2.0,SWITCH_RX,0000:c2:03.0,32GT/s,16,True,1,0,1,1,1,127,31,32,49,0,0,15,0,14,1,timing_left,2,79,84551870,0,0.0,False,
897017214416983682810574165152126079142,1696646562,,twshared30621.01.cln6.facebook.com,,5,5.719206809997559,63,1.2.0,SWITCH_RX,0000:c2:03.0,32GT/s,16,True,1,0,1,1,1,127,31,32,49,0,0,15,0,15,1,timing_left,2,78,67108864,0,0.0,False,
Beta Was this translation helpful? Give feedback.
All reactions