Skip to content

Commit e855869

Browse files
Tomer Tayarogabbay
authored andcommitted
accel/habanalabs: fix glbl error cause handling
The glbl error cause handling has a wrong assumption that all error bits are consecutive. Fix the handling to check all relevant error bits per ASIC. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Carl Vanderlip <quic_carlv@quicinc.com> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
1 parent c1e89ae commit e855869

File tree

5 files changed

+35
-18
lines changed

5 files changed

+35
-18
lines changed

drivers/accel/habanalabs/common/habanalabs.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,7 @@ struct hl_hints_range {
647647
* @num_engine_cores: number of engine cpu cores.
648648
* @max_num_of_engines: maximum number of all engines in the ASIC.
649649
* @num_of_special_blocks: special_blocks array size.
650-
* @glbl_err_cause_num: global err cause number.
650+
* @glbl_err_max_cause_num: global err max cause number.
651651
* @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
652652
* not supported.
653653
* @reserved_fw_mem_size: size in MB of dram memory reserved for FW.
@@ -779,7 +779,7 @@ struct asic_fixed_properties {
779779
u32 num_engine_cores;
780780
u32 max_num_of_engines;
781781
u32 num_of_special_blocks;
782-
u32 glbl_err_cause_num;
782+
u32 glbl_err_max_cause_num;
783783
u32 hbw_flush_reg;
784784
u32 reserved_fw_mem_size;
785785
u16 collective_first_sob;

drivers/accel/habanalabs/common/security.c

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,31 @@
77

88
#include "habanalabs.h"
99

10-
static const char * const hl_glbl_error_cause[HL_MAX_NUM_OF_GLBL_ERR_CAUSE] = {
10+
static const char * const hl_glbl_error_cause[] = {
1111
"Error due to un-priv read",
1212
"Error due to un-secure read",
1313
"Error due to read from unmapped reg",
1414
"Error due to un-priv write",
1515
"Error due to un-secure write",
1616
"Error due to write to unmapped reg",
17+
"N/A",
18+
"N/A",
19+
"N/A",
20+
"N/A",
21+
"N/A",
22+
"N/A",
23+
"N/A",
24+
"N/A",
25+
"N/A",
26+
"N/A",
1727
"External I/F write sec violation",
1828
"External I/F write to un-mapped reg",
29+
"N/A",
30+
"N/A",
31+
"N/A",
32+
"N/A",
33+
"N/A",
34+
"N/A",
1935
"Read to write only",
2036
"Write to read only"
2137
};
@@ -671,10 +687,11 @@ static bool hl_check_block_range_exclusion(struct hl_device *hdev,
671687
static int hl_read_glbl_errors(struct hl_device *hdev,
672688
u32 blk_idx, u32 major, u32 minor, u32 sub_minor, void *data)
673689
{
674-
struct hl_special_block_info *special_blocks = hdev->asic_prop.special_blocks;
690+
struct asic_fixed_properties *prop = &hdev->asic_prop;
691+
struct hl_special_block_info *special_blocks = prop->special_blocks;
675692
struct hl_special_block_info *current_block = &special_blocks[blk_idx];
676693
u32 glbl_err_addr, glbl_err_cause, addr_val, cause_val, block_base,
677-
base = current_block->base_addr - lower_32_bits(hdev->asic_prop.cfg_base_address);
694+
base = current_block->base_addr - lower_32_bits(prop->cfg_base_address);
678695
int i;
679696

680697
block_base = base + major * current_block->major_offset +
@@ -689,13 +706,13 @@ static int hl_read_glbl_errors(struct hl_device *hdev,
689706
glbl_err_addr = block_base + HL_GLBL_ERR_ADDR_OFFSET;
690707
addr_val = RREG32(glbl_err_addr);
691708

692-
for (i = 0 ; i < hdev->asic_prop.glbl_err_cause_num ; i++) {
709+
for (i = 0 ; i <= prop->glbl_err_max_cause_num ; i++) {
693710
if (cause_val & BIT(i))
694711
dev_err_ratelimited(hdev->dev,
695-
"%s, addr %#llx\n",
696-
hl_glbl_error_cause[i],
697-
hdev->asic_prop.cfg_base_address + block_base +
698-
FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val));
712+
"%s, addr %#llx\n",
713+
hl_glbl_error_cause[i],
714+
prop->cfg_base_address + block_base +
715+
FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val));
699716
}
700717

701718
WREG32(glbl_err_cause, cause_val);

drivers/accel/habanalabs/common/security.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
struct hl_device;
1414

1515
/* special blocks */
16-
#define HL_MAX_NUM_OF_GLBL_ERR_CAUSE 10
17-
#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0)
16+
#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0)
1817
/* GLBL_ERR_ADDR register offset from the start of the block */
1918
#define HL_GLBL_ERR_ADDR_OFFSET 0xF44
2019
/* GLBL_ERR_CAUSE register offset from the start of the block */

drivers/accel/habanalabs/gaudi2/gaudi2.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,13 @@
158158
#define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
159159
(RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
160160

161-
#define PSOC_RAZWI_ENG_STR_SIZE 128
162-
#define PSOC_RAZWI_MAX_ENG_PER_RTR 5
161+
#define PSOC_RAZWI_ENG_STR_SIZE 128
162+
#define PSOC_RAZWI_MAX_ENG_PER_RTR 5
163163

164164
/* HW scrambles only bits 0-25 */
165-
#define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
165+
#define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
166+
167+
#define GAUDI2_GLBL_ERR_MAX_CAUSE_NUM 17
166168

167169
struct gaudi2_razwi_info {
168170
u32 axuser_xy;
@@ -3587,7 +3589,7 @@ static int gaudi2_special_blocks_config(struct hl_device *hdev)
35873589
int i, rc;
35883590

35893591
/* Configure Special blocks */
3590-
prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
3592+
prop->glbl_err_max_cause_num = GAUDI2_GLBL_ERR_MAX_CAUSE_NUM;
35913593
prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
35923594
prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
35933595
sizeof(*prop->special_blocks), GFP_KERNEL);

drivers/accel/habanalabs/gaudi2/gaudi2P.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,8 @@
237237
#define GAUDI2_SOB_INCREMENT_BY_ONE (FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1) | \
238238
FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1))
239239

240-
#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
240+
#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
241241

242-
#define GAUDI2_NUM_OF_GLBL_ERR_CAUSE 8
243242

244243
enum gaudi2_reserved_sob_id {
245244
GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST,

0 commit comments

Comments
 (0)