Skip to content

Commit 712274e

Browse files
committed
hevc_d: Put command FIFO into coherent memory
Change command FIFO alloction to be from dma coherent memory rather than from generic kernel memory that we map and unmap. This removes a potential issue with unmap in an IRQ tail. In general it should also be faster as it avoids the overhead inherent in map and unmap and most streams only create short command streams. Signed-off-by: John Cox <jc@kynesim.co.uk>
1 parent e57003a commit 712274e

File tree

1 file changed

+42
-65
lines changed

1 file changed

+42
-65
lines changed

drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.c

Lines changed: 42 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,15 @@
2121
#include "hevc_d_hw.h"
2222
#include "hevc_d_video.h"
2323

24+
/* Maximum length of command buffer before we rate it an error */
25+
#define CMD_BUFFER_SIZE_MAX 0x100000
26+
27+
/* Initial size of command FIFO in commands.
28+
* The FIFO will be extended if this value is exceeded but 8192 seems to
29+
* deal with all streams found in the wild.
30+
*/
31+
#define CMD_BUFFER_SIZE_INIT 8192
32+
2433
enum hevc_slice_type {
2534
HEVC_SLICE_B = 0,
2635
HEVC_SLICE_P = 1,
@@ -92,6 +101,24 @@ static int gptr_realloc_new(struct hevc_d_dev * const dev,
92101
return 0;
93102
}
94103

104+
/* Realloc with copy */
105+
static int gptr_realloc_copy(struct hevc_d_dev * const dev,
106+
struct hevc_d_gptr * const gptr, size_t newsize)
107+
{
108+
struct hevc_d_gptr gnew;
109+
110+
if (newsize <= gptr->size)
111+
return 0;
112+
113+
if (gptr_alloc(dev, &gnew, newsize, gptr->attrs))
114+
return -ENOMEM;
115+
116+
memcpy(gnew.ptr, gptr->ptr, gptr->size);
117+
gptr_free(dev, gptr);
118+
*gptr = gnew;
119+
return 0;
120+
}
121+
95122
static size_t next_size(const size_t x)
96123
{
97124
return hevc_d_round_up_size(x + 1);
@@ -106,11 +133,6 @@ static size_t next_size(const size_t x)
106133

107134
#define HEVC_MAX_REFS V4L2_HEVC_DPB_ENTRIES_NUM_MAX
108135

109-
struct rpi_cmd {
110-
u32 addr;
111-
u32 data;
112-
} __packed;
113-
114136
struct hevc_d_q_aux {
115137
unsigned int refcount;
116138
unsigned int q_index;
@@ -133,8 +155,9 @@ struct hevc_d_dec_env {
133155
unsigned int decode_order;
134156
int p1_status; /* P1 status - what to realloc */
135157

136-
struct rpi_cmd *cmd_fifo;
137-
unsigned int cmd_len, cmd_max;
158+
struct hevc_d_gptr cmd;
159+
unsigned int cmd_len;
160+
unsigned int cmd_max;
138161
unsigned int num_slice_msgs;
139162
unsigned int pic_width_in_ctbs_y;
140163
unsigned int pic_height_in_ctbs_y;
@@ -167,9 +190,6 @@ struct hevc_d_dec_env {
167190
struct hevc_d_q_aux *frame_aux;
168191
struct hevc_d_q_aux *col_aux;
169192

170-
dma_addr_t cmd_addr;
171-
size_t cmd_size;
172-
173193
dma_addr_t pu_base_vc;
174194
dma_addr_t coeff_base_vc;
175195
u32 pu_stride;
@@ -234,23 +254,19 @@ static inline int clip_int(const int x, const int lo, const int hi)
234254
/* Phase 1 command and bit FIFOs */
235255
static int cmds_check_space(struct hevc_d_dec_env *const de, unsigned int n)
236256
{
237-
struct rpi_cmd *a;
238257
unsigned int newmax;
239258

240-
if (n > 0x100000) {
241-
v4l2_err(&de->ctx->dev->v4l2_dev,
242-
"%s: n %u implausible\n", __func__, n);
243-
return -ENOMEM;
244-
}
245-
246259
if (de->cmd_len + n <= de->cmd_max)
247260
return 0;
248261

249262
newmax = roundup_pow_of_two(de->cmd_len + n);
263+
if (newmax > CMD_BUFFER_SIZE_MAX) {
264+
v4l2_err(&de->ctx->dev->v4l2_dev,
265+
"%s: n %u implausible\n", __func__, newmax);
266+
return -ENOMEM;
267+
}
250268

251-
a = krealloc(de->cmd_fifo, newmax * sizeof(struct rpi_cmd),
252-
GFP_KERNEL);
253-
if (!a) {
269+
if (gptr_realloc_copy(de->ctx->dev, &de->cmd, newmax * sizeof(u64))) {
254270
v4l2_err(&de->ctx->dev->v4l2_dev,
255271
"Failed cmd buffer realloc from %u to %u\n",
256272
de->cmd_max, newmax);
@@ -259,7 +275,6 @@ static int cmds_check_space(struct hevc_d_dec_env *const de, unsigned int n)
259275
v4l2_info(&de->ctx->dev->v4l2_dev,
260276
"cmd buffer realloc from %u to %u\n", de->cmd_max, newmax);
261277

262-
de->cmd_fifo = a;
263278
de->cmd_max = newmax;
264279
return 0;
265280
}
@@ -268,15 +283,7 @@ static int cmds_check_space(struct hevc_d_dec_env *const de, unsigned int n)
268283
static void p1_apb_write(struct hevc_d_dec_env *const de, const u16 addr,
269284
const u32 data)
270285
{
271-
if (de->cmd_len >= de->cmd_max) {
272-
v4l2_err(&de->ctx->dev->v4l2_dev,
273-
"%s: Overflow @ %d\n", __func__, de->cmd_len);
274-
return;
275-
}
276-
277-
de->cmd_fifo[de->cmd_len].addr = addr;
278-
de->cmd_fifo[de->cmd_len].data = data;
279-
286+
WRITE_ONCE(((u64 *)de->cmd.ptr)[de->cmd_len], addr | ((u64)data << 32));
280287
de->cmd_len++;
281288
}
282289

@@ -1411,24 +1418,6 @@ static int updated_ps(struct hevc_d_dec_state *const s)
14111418
return -ENOMEM;
14121419
}
14131420

1414-
static int write_cmd_buffer(struct hevc_d_dev *const dev,
1415-
struct hevc_d_dec_env *const de,
1416-
const struct hevc_d_dec_state *const s)
1417-
{
1418-
const size_t cmd_size = ALIGN(de->cmd_len * sizeof(de->cmd_fifo[0]),
1419-
dev->cache_align);
1420-
1421-
de->cmd_addr = dma_map_single(dev->dev, de->cmd_fifo,
1422-
cmd_size, DMA_TO_DEVICE);
1423-
if (dma_mapping_error(dev->dev, de->cmd_addr)) {
1424-
v4l2_err(&dev->v4l2_dev,
1425-
"Map cmd buffer (%zu): FAILED\n", cmd_size);
1426-
return -ENOMEM;
1427-
}
1428-
de->cmd_size = cmd_size;
1429-
return 0;
1430-
}
1431-
14321421
static void setup_colmv(struct hevc_d_ctx *const ctx, struct hevc_d_run *run,
14331422
struct hevc_d_dec_state *const s)
14341423
{
@@ -1461,12 +1450,6 @@ static void dec_env_delete(struct hevc_d_dec_env *const de)
14611450
struct hevc_d_ctx * const ctx = de->ctx;
14621451
unsigned long lock_flags;
14631452

1464-
if (de->cmd_size) {
1465-
dma_unmap_single(ctx->dev->dev, de->cmd_addr, de->cmd_size,
1466-
DMA_TO_DEVICE);
1467-
de->cmd_size = 0;
1468-
}
1469-
14701453
aux_q_release(ctx, &de->frame_aux);
14711454
aux_q_release(ctx, &de->col_aux);
14721455

@@ -1486,8 +1469,7 @@ static void dec_env_uninit(struct hevc_d_ctx *const ctx)
14861469
if (ctx->dec_pool) {
14871470
for (i = 0; i != HEVC_D_DEC_ENV_COUNT; ++i) {
14881471
struct hevc_d_dec_env *const de = ctx->dec_pool + i;
1489-
1490-
kfree(de->cmd_fifo);
1472+
gptr_free(ctx->dev, &de->cmd);
14911473
}
14921474

14931475
kfree(ctx->dec_pool);
@@ -1517,11 +1499,9 @@ static int dec_env_init(struct hevc_d_ctx *const ctx)
15171499

15181500
de->ctx = ctx;
15191501
de->decode_order = i;
1520-
de->cmd_max = 8096;
1521-
de->cmd_fifo = kmalloc_array(de->cmd_max,
1522-
sizeof(struct rpi_cmd),
1523-
GFP_KERNEL);
1524-
if (!de->cmd_fifo)
1502+
de->cmd_max = CMD_BUFFER_SIZE_INIT;
1503+
if (gptr_alloc(ctx->dev, &de->cmd,
1504+
de->cmd_max * sizeof(u64), 0))
15251505
goto fail;
15261506
}
15271507

@@ -1871,9 +1851,6 @@ void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run)
18711851
goto fail;
18721852
}
18731853

1874-
if (write_cmd_buffer(dev, de, s))
1875-
goto fail;
1876-
18771854
for (i = 0; i < dec->num_active_dpb_entries; ++i) {
18781855
struct vb2_buffer *buf = vb2_find_buffer(vq, dec->dpb[i].timestamp);
18791856

@@ -2205,7 +2182,7 @@ static void phase1_claimed(struct hevc_d_dev *const dev, void *v)
22052182
hevc_d_hw_irq_active1_irq(dev, &de->irq_ent, phase1_cb, de);
22062183

22072184
/* Start the h/w */
2208-
apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_addr);
2185+
apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd.addr);
22092186

22102187
return;
22112188

0 commit comments

Comments
 (0)