Skip to content

Commit 98d187a

Browse files
Fenghua Yuvinodkoul
authored andcommitted
dmaengine: idxd: Enable Function Level Reset (FLR) for halt
When DSA/IAA device hits a fatal error, the device enters a halt state. The driver can reset the device depending on Reset Type required by hardware to recover the device. Supported Reset Types are: 0: Reset Device command 1: Function Level Reset (FLR) 2: Warm reset 3: Cold reset Currently, the driver only supports Reset Type 0. This patch adds support for FLR recovery Type 1. Before issuing a PCIe FLR command, IDXD device and WQ states are saved. After the FLR command execution, the device is recovered to its previous states, allowing the user can continue using the device. Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Link: https://lore.kernel.org/r/20241122233028.2762809-6-fenghua.yu@intel.com Signed-off-by: Vinod Koul <vkoul@kernel.org>
1 parent 3e114fa commit 98d187a

File tree

2 files changed

+148
-3
lines changed

2 files changed

+148
-3
lines changed

drivers/dma/idxd/init.c

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,118 @@ static void idxd_device_config_restore(struct idxd_device *idxd,
983983
kfree(idxd_saved->saved_wqs);
984984
}
985985

986+
static void idxd_reset_prepare(struct pci_dev *pdev)
987+
{
988+
struct idxd_device *idxd = pci_get_drvdata(pdev);
989+
struct device *dev = &idxd->pdev->dev;
990+
const char *idxd_name;
991+
int rc;
992+
993+
dev = &idxd->pdev->dev;
994+
idxd_name = dev_name(idxd_confdev(idxd));
995+
996+
struct idxd_saved_states *idxd_saved __free(kfree) =
997+
kzalloc_node(sizeof(*idxd_saved), GFP_KERNEL,
998+
dev_to_node(&pdev->dev));
999+
if (!idxd_saved) {
1000+
dev_err(dev, "HALT: no memory\n");
1001+
1002+
return;
1003+
}
1004+
1005+
/* Save IDXD configurations. */
1006+
rc = idxd_device_config_save(idxd, idxd_saved);
1007+
if (rc < 0) {
1008+
dev_err(dev, "HALT: cannot save %s configs\n", idxd_name);
1009+
1010+
return;
1011+
}
1012+
1013+
idxd->idxd_saved = no_free_ptr(idxd_saved);
1014+
1015+
/* Save PCI device state. */
1016+
pci_save_state(idxd->pdev);
1017+
}
1018+
1019+
static void idxd_reset_done(struct pci_dev *pdev)
1020+
{
1021+
struct idxd_device *idxd = pci_get_drvdata(pdev);
1022+
const char *idxd_name;
1023+
struct device *dev;
1024+
int rc, i;
1025+
1026+
if (!idxd->idxd_saved)
1027+
return;
1028+
1029+
dev = &idxd->pdev->dev;
1030+
idxd_name = dev_name(idxd_confdev(idxd));
1031+
1032+
/* Restore PCI device state. */
1033+
pci_restore_state(idxd->pdev);
1034+
1035+
/* Unbind idxd device from driver. */
1036+
idxd_unbind(&idxd_drv.drv, idxd_name);
1037+
1038+
/*
1039+
* Probe PCI device without allocating or changing
1040+
* idxd software data which keeps the same as before FLR.
1041+
*/
1042+
idxd_pci_probe_alloc(idxd, NULL, NULL);
1043+
1044+
/* Restore IDXD configurations. */
1045+
idxd_device_config_restore(idxd, idxd->idxd_saved);
1046+
1047+
/* Re-configure IDXD device if allowed. */
1048+
if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
1049+
rc = idxd_device_config(idxd);
1050+
if (rc < 0) {
1051+
dev_err(dev, "HALT: %s config fails\n", idxd_name);
1052+
goto out;
1053+
}
1054+
}
1055+
1056+
/* Bind IDXD device to driver. */
1057+
rc = idxd_bind(&idxd_drv.drv, idxd_name);
1058+
if (rc < 0) {
1059+
dev_err(dev, "HALT: binding %s to driver fails\n", idxd_name);
1060+
goto out;
1061+
}
1062+
1063+
/* Bind enabled wq in the IDXD device to driver. */
1064+
for (i = 0; i < idxd->max_wqs; i++) {
1065+
if (test_bit(i, idxd->wq_enable_map)) {
1066+
struct idxd_wq *wq = idxd->wqs[i];
1067+
char wq_name[32];
1068+
1069+
wq->state = IDXD_WQ_DISABLED;
1070+
sprintf(wq_name, "wq%d.%d", idxd->id, wq->id);
1071+
/*
1072+
* Bind to user driver depending on wq type.
1073+
*
1074+
* Currently only support user type WQ. Will support
1075+
* kernel type WQ in the future.
1076+
*/
1077+
if (wq->type == IDXD_WQT_USER)
1078+
rc = idxd_bind(&idxd_user_drv.drv, wq_name);
1079+
else
1080+
rc = -EINVAL;
1081+
if (rc < 0) {
1082+
clear_bit(i, idxd->wq_enable_map);
1083+
dev_err(dev,
1084+
"HALT: unable to re-enable wq %s\n",
1085+
dev_name(wq_confdev(wq)));
1086+
}
1087+
}
1088+
}
1089+
out:
1090+
kfree(idxd->idxd_saved);
1091+
}
1092+
1093+
static const struct pci_error_handlers idxd_error_handler = {
1094+
.reset_prepare = idxd_reset_prepare,
1095+
.reset_done = idxd_reset_done,
1096+
};
1097+
9861098
/*
9871099
* Probe idxd PCI device.
9881100
* If idxd is not given, need to allocate idxd and set up its data.
@@ -1056,6 +1168,16 @@ int idxd_pci_probe_alloc(struct idxd_device *idxd, struct pci_dev *pdev,
10561168
dev_warn(dev, "IDXD debugfs failed to setup\n");
10571169
}
10581170

1171+
if (!alloc_idxd) {
1172+
/* Release interrupts in the IDXD device. */
1173+
idxd_cleanup_interrupts(idxd);
1174+
1175+
/* Re-enable interrupts in the IDXD device. */
1176+
rc = idxd_setup_interrupts(idxd);
1177+
if (rc)
1178+
dev_warn(dev, "IDXD interrupts failed to setup\n");
1179+
}
1180+
10591181
dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
10601182
idxd->hw.version);
10611183

@@ -1146,6 +1268,7 @@ static struct pci_driver idxd_pci_driver = {
11461268
.probe = idxd_pci_probe,
11471269
.remove = idxd_remove,
11481270
.shutdown = idxd_shutdown,
1271+
.err_handler = &idxd_error_handler,
11491272
};
11501273

11511274
static int __init idxd_init_module(void)

drivers/dma/idxd/irq.c

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,20 @@ static void process_evl_entries(struct idxd_device *idxd)
383383
mutex_unlock(&evl->lock);
384384
}
385385

386+
static void idxd_device_flr(struct work_struct *work)
387+
{
388+
struct idxd_device *idxd = container_of(work, struct idxd_device, work);
389+
int rc;
390+
391+
/*
392+
* IDXD device requires a Function Level Reset (FLR).
393+
* pci_reset_function() will reset the device with FLR.
394+
*/
395+
rc = pci_reset_function(idxd->pdev);
396+
if (rc)
397+
dev_err(&idxd->pdev->dev, "FLR failed\n");
398+
}
399+
386400
static irqreturn_t idxd_halt(struct idxd_device *idxd)
387401
{
388402
union gensts_reg gensts;
@@ -398,15 +412,23 @@ static irqreturn_t idxd_halt(struct idxd_device *idxd)
398412
*/
399413
INIT_WORK(&idxd->work, idxd_device_reinit);
400414
queue_work(idxd->wq, &idxd->work);
415+
} else if (gensts.reset_type == IDXD_DEVICE_RESET_FLR) {
416+
idxd->state = IDXD_DEV_HALTED;
417+
idxd_mask_error_interrupts(idxd);
418+
dev_dbg(&idxd->pdev->dev,
419+
"idxd halted, doing FLR. After FLR, configs are restored\n");
420+
INIT_WORK(&idxd->work, idxd_device_flr);
421+
queue_work(idxd->wq, &idxd->work);
422+
401423
} else {
402424
idxd->state = IDXD_DEV_HALTED;
403425
idxd_wqs_quiesce(idxd);
404426
idxd_wqs_unmap_portal(idxd);
405427
idxd_device_clear_state(idxd);
406428
dev_err(&idxd->pdev->dev,
407-
"idxd halted, need %s.\n",
408-
gensts.reset_type == IDXD_DEVICE_RESET_FLR ?
409-
"FLR" : "system reset");
429+
"idxd halted, need system reset");
430+
431+
return -ENXIO;
410432
}
411433
}
412434

0 commit comments

Comments
 (0)