Skip to content

Commit 6dd751c

Browse files
6by9popcornmix
authored andcommitted
drm/vc4: Cache LBM allocations to avoid double-buffering
LBM is only relevant for each active dlist, so there is no need to double-buffer the allocations. Cache the allocations per plane so that we can ensure the allocations are possible. Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
1 parent 3ad8f6c commit 6dd751c

File tree

4 files changed

+128
-20
lines changed

4 files changed

+128
-20
lines changed

drivers/gpu/drm/vc4/tests/vc4_test_lbm_size.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ static void drm_vc4_test_vc4_lbm_size(struct kunit *test)
188188
struct drm_framebuffer *fb;
189189
struct drm_plane *plane;
190190
struct drm_crtc *crtc;
191+
struct vc4_dev *vc4;
191192
unsigned int i;
192193
int ret;
193194

@@ -248,7 +249,12 @@ static void drm_vc4_test_vc4_lbm_size(struct kunit *test)
248249
ret = drm_atomic_check_only(state);
249250
KUNIT_ASSERT_EQ(test, ret, 0);
250251

251-
KUNIT_EXPECT_EQ(test, vc4_plane_state->lbm.size, params->expected_lbm_size);
252+
vc4 = to_vc4_dev(state->dev);
253+
KUNIT_ASSERT_NOT_NULL(test, vc4);
254+
KUNIT_ASSERT_NOT_NULL(test, vc4->hvs);
255+
KUNIT_EXPECT_EQ(test,
256+
vc4->hvs->lbm_refcounts[vc4_plane_state->lbm_handle].size,
257+
params->expected_lbm_size);
252258

253259
for (i = 0; i < 2; i++) {
254260
KUNIT_EXPECT_EQ(test,

drivers/gpu/drm/vc4/vc4_drv.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,19 @@ struct vc4_v3d {
322322
struct debugfs_regset32 regset;
323323
};
324324

325+
#define VC4_NUM_LBM_HANDLES 64
326+
struct vc4_lbm_refcounts {
327+
refcount_t refcount;
328+
329+
/* Allocation size */
330+
size_t size;
331+
/* Our allocation in LBM. */
332+
struct drm_mm_node lbm;
333+
334+
/* Pointer back to the HVS structure */
335+
struct vc4_hvs *hvs;
336+
};
337+
325338
#define VC4_NUM_UPM_HANDLES 32
326339
struct vc4_upm_refcounts {
327340
refcount_t refcount;
@@ -363,6 +376,8 @@ struct vc4_hvs {
363376

364377
/* Memory manager for the LBM memory used by HVS scaling. */
365378
struct drm_mm lbm_mm;
379+
struct ida lbm_handles;
380+
struct vc4_lbm_refcounts lbm_refcounts[VC4_NUM_LBM_HANDLES + 1];
366381

367382
/* Memory manager for the UPM memory used for prefetching. */
368383
struct drm_mm upm_mm;
@@ -462,7 +477,7 @@ struct vc4_plane_state {
462477
bool is_yuv;
463478

464479
/* Our allocation in LBM for temporary storage during scaling. */
465-
struct drm_mm_node lbm;
480+
unsigned int lbm_handle;
466481

467482
/* The Unified Pre-Fetcher Handle */
468483
unsigned int upm_handle[DRM_FORMAT_MAX_PLANES];

drivers/gpu/drm/vc4/vc4_hvs.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,27 @@ static int vc4_hvs_debugfs_dlist_allocs(struct seq_file *m, void *data)
379379
return 0;
380380
}
381381

382+
static int vc4_hvs_debugfs_lbm_allocs(struct seq_file *m, void *data)
383+
{
384+
struct drm_debugfs_entry *entry = m->private;
385+
struct drm_device *dev = entry->dev;
386+
struct vc4_dev *vc4 = to_vc4_dev(dev);
387+
struct vc4_hvs *hvs = vc4->hvs;
388+
struct drm_printer p = drm_seq_file_printer(m);
389+
struct vc4_lbm_refcounts *refcount;
390+
unsigned int i;
391+
392+
drm_printf(&p, "LBM Handles:\n");
393+
for (i = 0; i < VC4_NUM_LBM_HANDLES; i++) {
394+
refcount = &hvs->lbm_refcounts[i];
395+
drm_printf(&p, "handle %u: refcount %u, size %zu [%08llx + %08llx]\n",
396+
i, refcount_read(&refcount->refcount), refcount->size,
397+
refcount->lbm.start, refcount->lbm.size);
398+
}
399+
400+
return 0;
401+
}
402+
382403
/* The filter kernel is composed of dwords each containing 3 9-bit
383404
* signed integers packed next to each other.
384405
*/
@@ -1511,6 +1532,8 @@ int vc4_hvs_debugfs_init(struct drm_minor *minor)
15111532
drm_debugfs_add_file(drm, "hvs_dlists", vc4_hvs_debugfs_dlist, NULL);
15121533
}
15131534

1535+
drm_debugfs_add_file(drm, "hvs_lbm", vc4_hvs_debugfs_lbm_allocs, NULL);
1536+
15141537
drm_debugfs_add_file(drm, "hvs_underrun", vc4_hvs_debugfs_underrun, NULL);
15151538

15161539
drm_debugfs_add_file(drm, "hvs_dlist_allocs", vc4_hvs_debugfs_dlist_allocs, NULL);
@@ -1627,6 +1650,7 @@ struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4,
16271650
}
16281651

16291652
drm_mm_init(&hvs->lbm_mm, 0, lbm_size);
1653+
ida_init(&hvs->lbm_handles);
16301654

16311655
if (vc4->gen >= VC4_GEN_6_C) {
16321656
ida_init(&hvs->upm_handles);

drivers/gpu/drm/vc4/vc4_plane.c

Lines changed: 81 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -294,12 +294,12 @@ struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
294294
if (!vc4_state)
295295
return NULL;
296296

297-
memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
298-
299297
for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
300298
if (vc4_state->upm_handle[i])
301299
refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount);
302300
}
301+
if (vc4_state->lbm_handle)
302+
refcount_inc(&hvs->lbm_refcounts[vc4_state->lbm_handle].refcount);
303303

304304
vc4_state->dlist_initialized = 0;
305305

@@ -319,6 +319,21 @@ struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
319319
return &vc4_state->base;
320320
}
321321

322+
static void vc4_plane_release_lbm_ida(struct vc4_hvs *hvs, unsigned int lbm_handle)
323+
{
324+
struct vc4_lbm_refcounts *refcount = &hvs->lbm_refcounts[lbm_handle];
325+
unsigned long irqflags;
326+
327+
spin_lock_irqsave(&hvs->mm_lock, irqflags);
328+
drm_mm_remove_node(&refcount->lbm);
329+
spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
330+
refcount->lbm.start = 0;
331+
refcount->lbm.size = 0;
332+
refcount->size = 0;
333+
334+
ida_free(&hvs->lbm_handles, lbm_handle);
335+
}
336+
322337
static void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle)
323338
{
324339
struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle];
@@ -342,12 +357,13 @@ void vc4_plane_destroy_state(struct drm_plane *plane,
342357
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
343358
unsigned int i;
344359

345-
if (drm_mm_node_allocated(&vc4_state->lbm)) {
346-
unsigned long irqflags;
360+
if (vc4_state->lbm_handle) {
361+
struct vc4_lbm_refcounts *refcount;
347362

348-
spin_lock_irqsave(&hvs->mm_lock, irqflags);
349-
drm_mm_remove_node(&vc4_state->lbm);
350-
spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
363+
refcount = &hvs->lbm_refcounts[vc4_state->lbm_handle];
364+
365+
if (refcount_dec_and_test(&refcount->refcount))
366+
vc4_plane_release_lbm_ida(hvs, vc4_state->lbm_handle);
351367
}
352368

353369
for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) {
@@ -939,10 +955,14 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
939955
{
940956
struct drm_device *drm = state->plane->dev;
941957
struct vc4_dev *vc4 = to_vc4_dev(drm);
958+
struct vc4_hvs *hvs = vc4->hvs;
942959
struct drm_plane *plane = state->plane;
943960
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
961+
struct vc4_lbm_refcounts *refcount;
944962
unsigned long irqflags;
963+
int lbm_handle;
945964
u32 lbm_size;
965+
int ret;
946966

947967
lbm_size = vc4_lbm_size(state);
948968
if (!lbm_size)
@@ -966,29 +986,71 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
966986
/* Allocate the LBM memory that the HVS will use for temporary
967987
* storage due to our scaling/format conversion.
968988
*/
969-
if (!drm_mm_node_allocated(&vc4_state->lbm)) {
970-
int ret;
989+
lbm_handle = vc4_state->lbm_handle;
990+
if (lbm_handle &&
991+
hvs->lbm_refcounts[lbm_handle].size == lbm_size) {
992+
/* Allocation is the same size as the previous user of
993+
* the plane. Keep the allocation.
994+
*/
995+
vc4_state->lbm_handle = lbm_handle;
996+
} else {
997+
if (lbm_handle &&
998+
refcount_dec_and_test(&hvs->lbm_refcounts[lbm_handle].refcount)) {
999+
vc4_plane_release_lbm_ida(hvs, lbm_handle);
1000+
vc4_state->lbm_handle = 0;
1001+
}
1002+
1003+
lbm_handle = ida_alloc_range(&hvs->lbm_handles, 1,
1004+
VC4_NUM_LBM_HANDLES,
1005+
GFP_KERNEL);
1006+
if (lbm_handle < 0) {
1007+
drm_err(drm, "Out of lbm_handles\n");
1008+
return lbm_handle;
1009+
}
1010+
vc4_state->lbm_handle = lbm_handle;
1011+
1012+
refcount = &hvs->lbm_refcounts[lbm_handle];
1013+
refcount_set(&refcount->refcount, 1);
1014+
refcount->size = lbm_size;
9711015

972-
spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
1016+
spin_lock_irqsave(&hvs->mm_lock, irqflags);
9731017
ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
974-
&vc4_state->lbm,
1018+
&refcount->lbm,
9751019
lbm_size, 1,
9761020
0, 0);
977-
spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
1021+
spin_unlock_irqrestore(&hvs->mm_lock, irqflags);
9781022

9791023
if (ret) {
9801024
drm_err(drm, "Failed to allocate LBM entry: %d\n", ret);
1025+
refcount_set(&refcount->refcount, 0);
1026+
ida_free(&hvs->lbm_handles, lbm_handle);
1027+
vc4_state->lbm_handle = 0;
9811028
return ret;
9821029
}
983-
} else {
984-
WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
9851030
}
9861031

987-
vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
1032+
vc4_state->dlist[vc4_state->lbm_offset] = hvs->lbm_refcounts[lbm_handle].lbm.start;
9881033

9891034
return 0;
9901035
}
9911036

1037+
static void vc4_plane_free_lbm(struct drm_plane_state *state)
1038+
{
1039+
struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1040+
struct drm_device *drm = state->plane->dev;
1041+
struct vc4_dev *vc4 = to_vc4_dev(drm);
1042+
struct vc4_hvs *hvs = vc4->hvs;
1043+
unsigned int lbm_handle;
1044+
1045+
lbm_handle = vc4_state->lbm_handle;
1046+
if (!lbm_handle)
1047+
return;
1048+
1049+
if (refcount_dec_and_test(&hvs->lbm_refcounts[lbm_handle].refcount))
1050+
vc4_plane_release_lbm_ida(hvs, lbm_handle);
1051+
vc4_state->lbm_handle = 0;
1052+
}
1053+
9921054
static int vc6_plane_allocate_upm(struct drm_plane_state *state)
9931055
{
9941056
const struct drm_format_info *info = state->fb->format;
@@ -2174,9 +2236,10 @@ int vc4_plane_atomic_check(struct drm_plane *plane,
21742236
struct drm_plane_state *old_plane_state =
21752237
drm_atomic_get_old_plane_state(state, plane);
21762238

2177-
if (vc4->gen >= VC4_GEN_6_C && old_plane_state &&
2178-
plane_enabled(old_plane_state)) {
2179-
vc6_plane_free_upm(new_plane_state);
2239+
if (old_plane_state && plane_enabled(old_plane_state)) {
2240+
if (vc4->gen >= VC4_GEN_6_C)
2241+
vc6_plane_free_upm(new_plane_state);
2242+
vc4_plane_free_lbm(new_plane_state);
21802243
}
21812244
return 0;
21822245
}

0 commit comments

Comments
 (0)