Skip to content

Commit eacabb5

Browse files
committed
nouveau: push event block/allowing out of the fence context
There is a deadlock between the irq and fctx locks, the irq handling takes irq then fctx lock the fence signalling takes fctx then irq lock This splits the fence signalling path so the code that hits the irq lock is done in a separate work queue. This seems to fix crashes/hangs when using nouveau gsp with i915 primary GPU. Signed-off-by: Dave Airlie <airlied@redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/20231222043308.3090089-11-airlied@gmail.com
1 parent 9c9dd22 commit eacabb5

File tree

2 files changed

+27
-6
lines changed

2 files changed

+27
-6
lines changed

drivers/gpu/drm/nouveau/nouveau_fence.c

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ nouveau_fence_signal(struct nouveau_fence *fence)
6262
if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) {
6363
struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
6464

65-
if (!--fctx->notify_ref)
65+
if (atomic_dec_and_test(&fctx->notify_ref))
6666
drop = 1;
6767
}
6868

@@ -103,6 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
103103
void
104104
nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
105105
{
106+
cancel_work_sync(&fctx->allow_block_work);
106107
nouveau_fence_context_kill(fctx, 0);
107108
nvif_event_dtor(&fctx->event);
108109
fctx->dead = 1;
@@ -167,6 +168,18 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc
167168
return ret;
168169
}
169170

171+
static void
172+
nouveau_fence_work_allow_block(struct work_struct *work)
173+
{
174+
struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan,
175+
allow_block_work);
176+
177+
if (atomic_read(&fctx->notify_ref) == 0)
178+
nvif_event_block(&fctx->event);
179+
else
180+
nvif_event_allow(&fctx->event);
181+
}
182+
170183
void
171184
nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx)
172185
{
@@ -178,6 +191,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
178191
} args;
179192
int ret;
180193

194+
INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block);
181195
INIT_LIST_HEAD(&fctx->flip);
182196
INIT_LIST_HEAD(&fctx->pending);
183197
spin_lock_init(&fctx->lock);
@@ -521,15 +535,19 @@ static bool nouveau_fence_enable_signaling(struct dma_fence *f)
521535
struct nouveau_fence *fence = from_fence(f);
522536
struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
523537
bool ret;
538+
bool do_work;
524539

525-
if (!fctx->notify_ref++)
526-
nvif_event_allow(&fctx->event);
540+
if (atomic_inc_return(&fctx->notify_ref) == 0)
541+
do_work = true;
527542

528543
ret = nouveau_fence_no_signaling(f);
529544
if (ret)
530545
set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags);
531-
else if (!--fctx->notify_ref)
532-
nvif_event_block(&fctx->event);
546+
else if (atomic_dec_and_test(&fctx->notify_ref))
547+
do_work = true;
548+
549+
if (do_work)
550+
schedule_work(&fctx->allow_block_work);
533551

534552
return ret;
535553
}

drivers/gpu/drm/nouveau/nouveau_fence.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#define __NOUVEAU_FENCE_H__
44

55
#include <linux/dma-fence.h>
6+
#include <linux/workqueue.h>
67
#include <nvif/event.h>
78

89
struct nouveau_drm;
@@ -45,7 +46,9 @@ struct nouveau_fence_chan {
4546
char name[32];
4647

4748
struct nvif_event event;
48-
int notify_ref, dead, killed;
49+
struct work_struct allow_block_work;
50+
atomic_t notify_ref;
51+
int dead, killed;
4952
};
5053

5154
struct nouveau_fence_priv {

0 commit comments

Comments
 (0)