@@ -165,14 +165,60 @@ static u32 preparser_disable(bool state)
165
165
return MI_ARB_CHECK | 1 << 8 | state ;
166
166
}
167
167
168
- u32 * gen12_emit_aux_table_inv (struct intel_gt * gt , u32 * cs , const i915_reg_t inv_reg )
168
+ static i915_reg_t gen12_get_aux_inv_reg (struct intel_engine_cs * engine )
169
169
{
170
- u32 gsi_offset = gt -> uncore -> gsi_offset ;
170
+ switch (engine -> id ) {
171
+ case RCS0 :
172
+ return GEN12_CCS_AUX_INV ;
173
+ case BCS0 :
174
+ return GEN12_BCS0_AUX_INV ;
175
+ case VCS0 :
176
+ return GEN12_VD0_AUX_INV ;
177
+ case VCS2 :
178
+ return GEN12_VD2_AUX_INV ;
179
+ case VECS0 :
180
+ return GEN12_VE0_AUX_INV ;
181
+ case CCS0 :
182
+ return GEN12_CCS0_AUX_INV ;
183
+ default :
184
+ return INVALID_MMIO_REG ;
185
+ }
186
+ }
187
+
188
+ static bool gen12_needs_ccs_aux_inv (struct intel_engine_cs * engine )
189
+ {
190
+ i915_reg_t reg = gen12_get_aux_inv_reg (engine );
191
+
192
+ if (IS_PONTEVECCHIO (engine -> i915 ))
193
+ return false;
194
+
195
+ /*
196
+ * So far platforms supported by i915 having flat ccs do not require
197
+ * AUX invalidation. Check also whether the engine requires it.
198
+ */
199
+ return i915_mmio_reg_valid (reg ) && !HAS_FLAT_CCS (engine -> i915 );
200
+ }
201
+
202
+ u32 * gen12_emit_aux_table_inv (struct intel_engine_cs * engine , u32 * cs )
203
+ {
204
+ i915_reg_t inv_reg = gen12_get_aux_inv_reg (engine );
205
+ u32 gsi_offset = engine -> gt -> uncore -> gsi_offset ;
206
+
207
+ if (!gen12_needs_ccs_aux_inv (engine ))
208
+ return cs ;
171
209
172
210
* cs ++ = MI_LOAD_REGISTER_IMM (1 ) | MI_LRI_MMIO_REMAP_EN ;
173
211
* cs ++ = i915_mmio_reg_offset (inv_reg ) + gsi_offset ;
174
212
* cs ++ = AUX_INV ;
175
- * cs ++ = MI_NOOP ;
213
+
214
+ * cs ++ = MI_SEMAPHORE_WAIT_TOKEN |
215
+ MI_SEMAPHORE_REGISTER_POLL |
216
+ MI_SEMAPHORE_POLL |
217
+ MI_SEMAPHORE_SAD_EQ_SDD ;
218
+ * cs ++ = 0 ;
219
+ * cs ++ = i915_mmio_reg_offset (inv_reg ) + gsi_offset ;
220
+ * cs ++ = 0 ;
221
+ * cs ++ = 0 ;
176
222
177
223
return cs ;
178
224
}
@@ -202,41 +248,54 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
202
248
{
203
249
struct intel_engine_cs * engine = rq -> engine ;
204
250
205
- if (mode & EMIT_FLUSH ) {
206
- u32 flags = 0 ;
251
+ /*
252
+ * On Aux CCS platforms the invalidation of the Aux
253
+ * table requires quiescing memory traffic beforehand
254
+ */
255
+ if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv (engine )) {
256
+ u32 bit_group_0 = 0 ;
257
+ u32 bit_group_1 = 0 ;
207
258
int err ;
208
259
u32 * cs ;
209
260
210
261
err = mtl_dummy_pipe_control (rq );
211
262
if (err )
212
263
return err ;
213
264
214
- flags |= PIPE_CONTROL_TILE_CACHE_FLUSH ;
215
- flags |= PIPE_CONTROL_FLUSH_L3 ;
216
- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH ;
217
- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH ;
265
+ bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH ;
266
+
267
+ /*
268
+ * When required, in MTL and beyond platforms we
269
+ * need to set the CCS_FLUSH bit in the pipe control
270
+ */
271
+ if (GRAPHICS_VER_FULL (rq -> i915 ) >= IP_VER (12 , 70 ))
272
+ bit_group_0 |= PIPE_CONTROL_CCS_FLUSH ;
273
+
274
+ bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH ;
275
+ bit_group_1 |= PIPE_CONTROL_FLUSH_L3 ;
276
+ bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH ;
277
+ bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH ;
218
278
/* Wa_1409600907:tgl,adl-p */
219
- flags |= PIPE_CONTROL_DEPTH_STALL ;
220
- flags |= PIPE_CONTROL_DC_FLUSH_ENABLE ;
221
- flags |= PIPE_CONTROL_FLUSH_ENABLE ;
279
+ bit_group_1 |= PIPE_CONTROL_DEPTH_STALL ;
280
+ bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE ;
281
+ bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE ;
222
282
223
- flags |= PIPE_CONTROL_STORE_DATA_INDEX ;
224
- flags |= PIPE_CONTROL_QW_WRITE ;
283
+ bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX ;
284
+ bit_group_1 |= PIPE_CONTROL_QW_WRITE ;
225
285
226
- flags |= PIPE_CONTROL_CS_STALL ;
286
+ bit_group_1 |= PIPE_CONTROL_CS_STALL ;
227
287
228
288
if (!HAS_3D_PIPELINE (engine -> i915 ))
229
- flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS ;
289
+ bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS ;
230
290
else if (engine -> class == COMPUTE_CLASS )
231
- flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS ;
291
+ bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS ;
232
292
233
293
cs = intel_ring_begin (rq , 6 );
234
294
if (IS_ERR (cs ))
235
295
return PTR_ERR (cs );
236
296
237
- cs = gen12_emit_pipe_control (cs ,
238
- PIPE_CONTROL0_HDC_PIPELINE_FLUSH ,
239
- flags , LRC_PPHWSP_SCRATCH_ADDR );
297
+ cs = gen12_emit_pipe_control (cs , bit_group_0 , bit_group_1 ,
298
+ LRC_PPHWSP_SCRATCH_ADDR );
240
299
intel_ring_advance (rq , cs );
241
300
}
242
301
@@ -267,10 +326,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
267
326
else if (engine -> class == COMPUTE_CLASS )
268
327
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS ;
269
328
270
- if (!HAS_FLAT_CCS (rq -> engine -> i915 ))
271
- count = 8 + 4 ;
272
- else
273
- count = 8 ;
329
+ count = 8 ;
330
+ if (gen12_needs_ccs_aux_inv (rq -> engine ))
331
+ count += 8 ;
274
332
275
333
cs = intel_ring_begin (rq , count );
276
334
if (IS_ERR (cs ))
@@ -285,11 +343,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
285
343
286
344
cs = gen8_emit_pipe_control (cs , flags , LRC_PPHWSP_SCRATCH_ADDR );
287
345
288
- if (!HAS_FLAT_CCS (rq -> engine -> i915 )) {
289
- /* hsdes: 1809175790 */
290
- cs = gen12_emit_aux_table_inv (rq -> engine -> gt ,
291
- cs , GEN12_GFX_CCS_AUX_NV );
292
- }
346
+ cs = gen12_emit_aux_table_inv (engine , cs );
293
347
294
348
* cs ++ = preparser_disable (false);
295
349
intel_ring_advance (rq , cs );
@@ -300,21 +354,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
300
354
301
355
int gen12_emit_flush_xcs (struct i915_request * rq , u32 mode )
302
356
{
303
- intel_engine_mask_t aux_inv = 0 ;
304
- u32 cmd , * cs ;
357
+ u32 cmd = 4 ;
358
+ u32 * cs ;
305
359
306
- cmd = 4 ;
307
360
if (mode & EMIT_INVALIDATE ) {
308
361
cmd += 2 ;
309
362
310
- if (!HAS_FLAT_CCS (rq -> engine -> i915 ) &&
311
- (rq -> engine -> class == VIDEO_DECODE_CLASS ||
312
- rq -> engine -> class == VIDEO_ENHANCEMENT_CLASS )) {
313
- aux_inv = rq -> engine -> mask &
314
- ~GENMASK (_BCS (I915_MAX_BCS - 1 ), BCS0 );
315
- if (aux_inv )
316
- cmd += 4 ;
317
- }
363
+ if (gen12_needs_ccs_aux_inv (rq -> engine ))
364
+ cmd += 8 ;
318
365
}
319
366
320
367
cs = intel_ring_begin (rq , cmd );
@@ -338,21 +385,18 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
338
385
cmd |= MI_INVALIDATE_TLB ;
339
386
if (rq -> engine -> class == VIDEO_DECODE_CLASS )
340
387
cmd |= MI_INVALIDATE_BSD ;
388
+
389
+ if (gen12_needs_ccs_aux_inv (rq -> engine ) &&
390
+ rq -> engine -> class == COPY_ENGINE_CLASS )
391
+ cmd |= MI_FLUSH_DW_CCS ;
341
392
}
342
393
343
394
* cs ++ = cmd ;
344
395
* cs ++ = LRC_PPHWSP_SCRATCH_ADDR ;
345
396
* cs ++ = 0 ; /* upper addr */
346
397
* cs ++ = 0 ; /* value */
347
398
348
- if (aux_inv ) { /* hsdes: 1809175790 */
349
- if (rq -> engine -> class == VIDEO_DECODE_CLASS )
350
- cs = gen12_emit_aux_table_inv (rq -> engine -> gt ,
351
- cs , GEN12_VD0_AUX_NV );
352
- else
353
- cs = gen12_emit_aux_table_inv (rq -> engine -> gt ,
354
- cs , GEN12_VE0_AUX_NV );
355
- }
399
+ cs = gen12_emit_aux_table_inv (rq -> engine , cs );
356
400
357
401
if (mode & EMIT_INVALIDATE )
358
402
* cs ++ = preparser_disable (false);
0 commit comments