Skip to content

Commit caf2f15

Browse files
committed
drm/xe: Move survivability back to xe
Commit d40f275 ("drm/xe: Move survivability entirely to xe_pci") moved the survivability handling to be done entirely in the xe_pci layer. However there are some issues with that approach: 1) Survivability mode needs at least the mmio initialized, otherwise it can't really read a register to decide if it should enter that state 2) SR-IOV mode should be initialized, otherwise it's not possible to check if it's VF Besides, as pointed by Riana the check for xe_survivability_mode_enable() was wrong in xe_pci_probe() since it's not a bool return. Fix that by moving the initialization to be entirely in the xe_device layer, with the correct dependencies handled: only after mmio and sriov initialization, and not triggering it on error from wait_for_lmem_ready(). This restores the trigger behavior before that commit. The xe_pci layer now only checks for "is it enabled?", like it's doing in xe_pci_suspend()/xe_pci_remove(), etc. Cc: Riana Tauro <riana.tauro@intel.com> Fixes: d40f275 ("drm/xe: Move survivability entirely to xe_pci") Reviewed-by: Riana Tauro <riana.tauro@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20250314-fix-survivability-v5-1-fdb3559ea965@intel.com Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> (cherry picked from commit 86b5e0d) Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
1 parent d2de441 commit caf2f15

File tree

4 files changed

+34
-19
lines changed

4 files changed

+34
-19
lines changed

drivers/gpu/drm/xe/xe_device.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
#include "xe_pxp.h"
5454
#include "xe_query.h"
5555
#include "xe_shrinker.h"
56+
#include "xe_survivability_mode.h"
5657
#include "xe_sriov.h"
5758
#include "xe_tile.h"
5859
#include "xe_ttm_stolen_mgr.h"
@@ -705,8 +706,20 @@ int xe_device_probe_early(struct xe_device *xe)
705706
sriov_update_device_info(xe);
706707

707708
err = xe_pcode_probe_early(xe);
708-
if (err)
709-
return err;
709+
if (err) {
710+
int save_err = err;
711+
712+
/*
713+
* Try to leave device in survivability mode if device is
714+
* possible, but still return the previous error for error
715+
* propagation
716+
*/
717+
err = xe_survivability_mode_enable(xe);
718+
if (err)
719+
return err;
720+
721+
return save_err;
722+
}
710723

711724
err = wait_for_lmem_ready(xe);
712725
if (err)

drivers/gpu/drm/xe/xe_pci.c

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -803,16 +803,14 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
803803
return err;
804804

805805
err = xe_device_probe_early(xe);
806-
807-
/*
808-
* In Boot Survivability mode, no drm card is exposed and driver is
809-
* loaded with bare minimum to allow for firmware to be flashed through
810-
* mei. If early probe fails, check if survivability mode is flagged by
811-
* HW to be enabled. In that case enable it and return success.
812-
*/
813806
if (err) {
814-
if (xe_survivability_mode_required(xe) &&
815-
xe_survivability_mode_enable(xe))
807+
/*
808+
* In Boot Survivability mode, no drm card is exposed and driver
809+
* is loaded with bare minimum to allow for firmware to be
810+
* flashed through mei. If early probe failed, but it managed to
811+
* enable survivability mode, return success.
812+
*/
813+
if (xe_survivability_mode_is_enabled(xe))
816814
return 0;
817815

818816
return err;

drivers/gpu/drm/xe/xe_survivability_mode.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -178,15 +178,16 @@ bool xe_survivability_mode_is_enabled(struct xe_device *xe)
178178
return xe->survivability.mode;
179179
}
180180

181-
/**
182-
* xe_survivability_mode_required - checks if survivability mode is required
183-
* @xe: xe device instance
181+
/*
182+
* survivability_mode_requested - check if it's possible to enable
183+
* survivability mode and that was requested by firmware
184184
*
185-
* This function reads the boot status from Pcode
185+
* This function reads the boot status from Pcode.
186186
*
187-
* Return: true if boot status indicates failure, false otherwise
187+
* Return: true if platform support is available and boot status indicates
188+
* failure, false otherwise.
188189
*/
189-
bool xe_survivability_mode_required(struct xe_device *xe)
190+
static bool survivability_mode_requested(struct xe_device *xe)
190191
{
191192
struct xe_survivability *survivability = &xe->survivability;
192193
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
@@ -208,14 +209,18 @@ bool xe_survivability_mode_required(struct xe_device *xe)
208209
*
209210
* Initialize survivability information and enable survivability mode
210211
*
211-
* Return: 0 for success, negative error code otherwise.
212+
* Return: 0 if survivability mode is enabled or not requested; negative error
213+
* code otherwise.
212214
*/
213215
int xe_survivability_mode_enable(struct xe_device *xe)
214216
{
215217
struct xe_survivability *survivability = &xe->survivability;
216218
struct xe_survivability_info *info;
217219
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
218220

221+
if (!survivability_mode_requested(xe))
222+
return 0;
223+
219224
survivability->size = MAX_SCRATCH_MMIO;
220225

221226
info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),

drivers/gpu/drm/xe/xe_survivability_mode.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,5 @@ struct xe_device;
1212

1313
int xe_survivability_mode_enable(struct xe_device *xe);
1414
bool xe_survivability_mode_is_enabled(struct xe_device *xe);
15-
bool xe_survivability_mode_required(struct xe_device *xe);
1615

1716
#endif /* _XE_SURVIVABILITY_MODE_H_ */

0 commit comments

Comments
 (0)