Skip to content

Commit 55b7285

Browse files
committed
Merge tag 'drm-intel-gt-next-2023-10-19' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Driver Changes: Fixes/improvements/new stuff: - Retry gtt fault when out of fence registers (Ville Syrjälä) - Determine context valid in OA reports [perf] (Umesh Nerlige Ramappa) Future platform enablement: - GuC based TLB invalidation for Meteorlake (Jonathan Cavitt, Prathap Kumar Valsan) - Don't set PIPE_CONTROL_FLUSH_L3 [mtl] (Vinay Belgaumkar) Miscellaneous: - Clean up zero initializers [guc,pxp] (Ville Syrjälä) - Prevent potential null-ptr-deref in engine_init_common (Nirmoy Das) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/ZTFDFSbd/U7YP+hI@tursulin-desk
2 parents 3ac5fa3 + 7eeaedf commit 55b7285

21 files changed

+407
-30
lines changed

drivers/gpu/drm/i915/gem/i915_gem_mman.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ static vm_fault_t i915_error_to_vmf_fault(int err)
235235
case 0:
236236
case -EAGAIN:
237237
case -ENOSPC: /* transient failure to evict? */
238+
case -ENOBUFS: /* temporarily out of fences? */
238239
case -ERESTARTSYS:
239240
case -EINTR:
240241
case -EBUSY:

drivers/gpu/drm/i915/gt/gen8_engine_cs.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,8 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
278278
* deals with Protected Memory which is not needed for
279279
* AUX CCS invalidation and lead to unwanted side effects.
280280
*/
281-
if (mode & EMIT_FLUSH)
281+
if ((mode & EMIT_FLUSH) &&
282+
GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70))
282283
bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
283284

284285
bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
@@ -812,12 +813,14 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
812813
u32 flags = (PIPE_CONTROL_CS_STALL |
813814
PIPE_CONTROL_TLB_INVALIDATE |
814815
PIPE_CONTROL_TILE_CACHE_FLUSH |
815-
PIPE_CONTROL_FLUSH_L3 |
816816
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
817817
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
818818
PIPE_CONTROL_DC_FLUSH_ENABLE |
819819
PIPE_CONTROL_FLUSH_ENABLE);
820820

821+
if (GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70))
822+
flags |= PIPE_CONTROL_FLUSH_L3;
823+
821824
/* Wa_14016712196 */
822825
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915))
823826
/* dummy PIPE_CONTROL + depth flush */

drivers/gpu/drm/i915/gt/intel_engine_cs.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1491,7 +1491,8 @@ static int engine_init_common(struct intel_engine_cs *engine)
14911491
return 0;
14921492

14931493
err_bce_context:
1494-
intel_engine_destroy_pinned_context(bce);
1494+
if (bce)
1495+
intel_engine_destroy_pinned_context(bce);
14951496
err_ce_context:
14961497
intel_engine_destroy_pinned_context(ce);
14971498
return ret;

drivers/gpu/drm/i915/gt/intel_ggtt.c

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -206,22 +206,36 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
206206
intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
207207
}
208208

209+
static void guc_ggtt_ct_invalidate(struct intel_gt *gt)
210+
{
211+
struct intel_uncore *uncore = gt->uncore;
212+
intel_wakeref_t wakeref;
213+
214+
with_intel_runtime_pm_if_active(uncore->rpm, wakeref) {
215+
struct intel_guc *guc = &gt->uc.guc;
216+
217+
intel_guc_invalidate_tlb_guc(guc);
218+
}
219+
}
220+
209221
static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
210222
{
211223
struct drm_i915_private *i915 = ggtt->vm.i915;
224+
struct intel_gt *gt;
212225

213226
gen8_ggtt_invalidate(ggtt);
214227

215-
if (GRAPHICS_VER(i915) >= 12) {
216-
struct intel_gt *gt;
217-
218-
list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
228+
list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
229+
if (intel_guc_tlb_invalidation_is_available(&gt->uc.guc)) {
230+
guc_ggtt_ct_invalidate(gt);
231+
} else if (GRAPHICS_VER(i915) >= 12) {
219232
intel_uncore_write_fw(gt->uncore,
220233
GEN12_GUC_TLB_INV_CR,
221234
GEN12_GUC_TLB_INV_CR_INVALIDATE);
222-
} else {
223-
intel_uncore_write_fw(ggtt->vm.gt->uncore,
224-
GEN8_GTCR, GEN8_GTCR_INVALIDATE);
235+
} else {
236+
intel_uncore_write_fw(gt->uncore,
237+
GEN8_GTCR, GEN8_GTCR_INVALIDATE);
238+
}
225239
}
226240
}
227241

@@ -1243,7 +1257,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
12431257
ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
12441258
}
12451259

1246-
if (intel_uc_wants_guc(&ggtt->vm.gt->uc))
1260+
if (intel_uc_wants_guc_submission(&ggtt->vm.gt->uc))
12471261
ggtt->invalidate = guc_ggtt_invalidate;
12481262
else
12491263
ggtt->invalidate = gen8_ggtt_invalidate;

drivers/gpu/drm/i915/gt/intel_tlb.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "intel_gt_print.h"
1313
#include "intel_gt_regs.h"
1414
#include "intel_tlb.h"
15+
#include "uc/intel_guc.h"
1516

1617
/*
1718
* HW architecture suggest typical invalidation time at 40us,
@@ -131,11 +132,24 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
131132
return;
132133

133134
with_intel_gt_pm_if_awake(gt, wakeref) {
135+
struct intel_guc *guc = &gt->uc.guc;
136+
134137
mutex_lock(&gt->tlb.invalidate_lock);
135138
if (tlb_seqno_passed(gt, seqno))
136139
goto unlock;
137140

138-
mmio_invalidate_full(gt);
141+
if (HAS_GUC_TLB_INVALIDATION(gt->i915)) {
142+
/*
143+
* Only perform GuC TLB invalidation if GuC is ready.
144+
* The only time GuC could not be ready is on GT reset,
145+
* which would clobber all the TLBs anyways, making
146+
* any TLB invalidation path here unnecessary.
147+
*/
148+
if (intel_guc_is_ready(guc))
149+
intel_guc_invalidate_tlb_engines(guc);
150+
} else {
151+
mmio_invalidate_full(gt);
152+
}
139153

140154
write_seqcount_invalidate(&gt->tlb.seqno);
141155
unlock:

drivers/gpu/drm/i915/gt/selftest_tlb.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,15 @@ pte_tlbinv(struct intel_context *ce,
136136
i915_request_get(rq);
137137
i915_request_add(rq);
138138

139-
/* Short sleep to sanitycheck the batch is spinning before we begin */
140-
msleep(10);
139+
/*
140+
* Short sleep to sanitycheck the batch is spinning before we begin.
141+
* FIXME: Why is GSC so slow?
142+
*/
143+
if (ce->engine->class == OTHER_CLASS)
144+
msleep(200);
145+
else
146+
msleep(10);
147+
141148
if (va == vb) {
142149
if (!i915_request_completed(rq)) {
143150
pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n",

drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ enum intel_guc_action {
138138
INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
139139
INTEL_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
140140
INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
141+
INTEL_GUC_ACTION_TLB_INVALIDATION = 0x7000,
142+
INTEL_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001,
141143
INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
142144
INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
143145
INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
@@ -181,4 +183,35 @@ enum intel_guc_state_capture_event_status {
181183

182184
#define INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK 0x000000FF
183185

186+
#define INTEL_GUC_TLB_INVAL_TYPE_MASK REG_GENMASK(7, 0)
187+
#define INTEL_GUC_TLB_INVAL_MODE_MASK REG_GENMASK(11, 8)
188+
#define INTEL_GUC_TLB_INVAL_FLUSH_CACHE REG_BIT(31)
189+
190+
enum intel_guc_tlb_invalidation_type {
191+
INTEL_GUC_TLB_INVAL_ENGINES = 0x0,
192+
INTEL_GUC_TLB_INVAL_GUC = 0x3,
193+
};
194+
195+
/*
196+
* 0: Heavy mode of Invalidation:
197+
* The pipeline of the engine(s) for which the invalidation is targeted to is
198+
* blocked, and all the in-flight transactions are guaranteed to be Globally
199+
* Observed before completing the TLB invalidation
200+
* 1: Lite mode of Invalidation:
201+
* TLBs of the targeted engine(s) are immediately invalidated.
202+
* In-flight transactions are NOT guaranteed to be Globally Observed before
203+
* completing TLB invalidation.
204+
* Light Invalidation Mode is to be used only when
205+
* it can be guaranteed (by SW) that the address translations remain invariant
206+
* for the in-flight transactions across the TLB invalidation. In other words,
207+
* this mode can be used when the TLB invalidation is intended to clear out the
208+
* stale cached translations that are no longer in use. Light Invalidation Mode
209+
* is much faster than the Heavy Invalidation Mode, as it does not wait for the
210+
* in-flight transactions to be GOd.
211+
*/
212+
enum intel_guc_tlb_inval_mode {
213+
INTEL_GUC_TLB_INVAL_MODE_HEAVY = 0x0,
214+
INTEL_GUC_TLB_INVAL_MODE_LITE = 0x1,
215+
};
216+
184217
#endif /* _ABI_GUC_ACTIONS_ABI_H */

drivers/gpu/drm/i915/gt/uc/intel_guc.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,18 @@ struct intel_guc {
7979
*/
8080
atomic_t outstanding_submission_g2h;
8181

82+
/** @tlb_lookup: xarray to store all pending TLB invalidation requests */
83+
struct xarray tlb_lookup;
84+
85+
/**
86+
* @serial_slot: id to the initial waiter created in tlb_lookup,
87+
* which is used only when failed to allocate new waiter.
88+
*/
89+
u32 serial_slot;
90+
91+
/** @next_seqno: the next id (sequence number) to allocate. */
92+
u32 next_seqno;
93+
8294
/** @interrupts: pointers to GuC interrupt-managing functions. */
8395
struct {
8496
bool enabled;
@@ -288,6 +300,11 @@ struct intel_guc {
288300
#endif
289301
};
290302

303+
struct intel_guc_tlb_wait {
304+
struct wait_queue_head wq;
305+
bool busy;
306+
};
307+
291308
/*
292309
* GuC version number components are only 8-bit, so converting to a 32bit 8.8.8
293310
* integer works.
@@ -515,4 +532,10 @@ void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p);
515532

516533
int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc);
517534

535+
bool intel_guc_tlb_invalidation_is_available(struct intel_guc *guc);
536+
int intel_guc_invalidate_tlb_engines(struct intel_guc *guc);
537+
int intel_guc_invalidate_tlb_guc(struct intel_guc *guc);
538+
int intel_guc_tlb_invalidation_done(struct intel_guc *guc,
539+
const u32 *payload, u32 len);
540+
void wake_up_all_tlb_invalidate(struct intel_guc *guc);
518541
#endif

drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,8 +1101,8 @@ guc_capture_create_prealloc_nodes(struct intel_guc *guc)
11011101
static int
11021102
guc_capture_extract_reglists(struct intel_guc *guc, struct __guc_capture_bufstate *buf)
11031103
{
1104-
struct guc_state_capture_group_header_t ghdr = {0};
1105-
struct guc_state_capture_header_t hdr = {0};
1104+
struct guc_state_capture_group_header_t ghdr = {};
1105+
struct guc_state_capture_header_t hdr = {};
11061106
struct __guc_capture_parsed_output *node = NULL;
11071107
struct guc_mmio_reg *regs = NULL;
11081108
int i, numlists, numregs, ret = 0;

drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,33 @@ enum { CTB_SEND = 0, CTB_RECV = 1 };
103103

104104
enum { CTB_OWNER_HOST = 0 };
105105

106+
/*
107+
* Some H2G commands involve a synchronous response that the driver needs
108+
* to wait for. In such cases, a timeout is required to prevent the driver
109+
* from waiting forever in the case of an error (either no error response
110+
* is defined in the protocol or something has died and requires a reset).
111+
* The specific command may be defined as having a time bound response but
112+
* the CT is a queue and that time guarantee only starts from the point
113+
* when the command reaches the head of the queue and is processed by GuC.
114+
*
115+
* Ideally there would be a helper to report the progress of a given
116+
* command through the CT. However, that would require a significant
117+
* amount of work in the CT layer. In the meantime, provide a reasonable
118+
* estimation of the worst case latency it should take for the entire
119+
* queue to drain. And therefore, how long a caller should wait before
120+
* giving up on their request. The current estimate is based on empirical
121+
* measurement of a test that fills the buffer with context creation and
122+
* destruction requests as they seem to be the slowest operation.
123+
*/
124+
long intel_guc_ct_max_queue_time_jiffies(void)
125+
{
126+
/*
127+
* A 4KB buffer full of context destroy commands takes a little
128+
* over a second to process so bump that to 2s to be super safe.
129+
*/
130+
return (CTB_H2G_BUFFER_SIZE * HZ) / SZ_2K;
131+
}
132+
106133
static void ct_receive_tasklet_func(struct tasklet_struct *t);
107134
static void ct_incoming_request_worker_func(struct work_struct *w);
108135

@@ -1115,6 +1142,9 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
11151142
case INTEL_GUC_ACTION_NOTIFY_EXCEPTION:
11161143
ret = intel_guc_crash_process_msg(guc, action);
11171144
break;
1145+
case INTEL_GUC_ACTION_TLB_INVALIDATION_DONE:
1146+
ret = intel_guc_tlb_invalidation_done(guc, payload, len);
1147+
break;
11181148
default:
11191149
ret = -EOPNOTSUPP;
11201150
break;
@@ -1186,9 +1216,17 @@ static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *requ
11861216
switch (action) {
11871217
case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
11881218
case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
1219+
case INTEL_GUC_ACTION_TLB_INVALIDATION_DONE:
11891220
g2h_release_space(ct, request->size);
11901221
}
11911222

1223+
/*
1224+
* TLB invalidation responses must be handled immediately as processing
1225+
* of other G2H notifications may be blocked by an invalidation request.
1226+
*/
1227+
if (action == INTEL_GUC_ACTION_TLB_INVALIDATION_DONE)
1228+
return ct_process_request(ct, request);
1229+
11921230
spin_lock_irqsave(&ct->requests.lock, flags);
11931231
list_add_tail(&request->link, &ct->requests.incoming);
11941232
spin_unlock_irqrestore(&ct->requests.lock, flags);

0 commit comments

Comments
 (0)