Skip to content

Commit e2e5ca2

Browse files
committed
hevc_d: Put command FIFO into coherent memory
Change command FIFO alloction to be from dma coherent memory rather than from generic kernel memory that we map and unmap. This removes a potential issue with unmap in an IRQ tail. In general it should also be faster as it avoids the overhead inherent in map and unmap and most streams only create short command streams. Signed-off-by: John Cox <jc@kynesim.co.uk>
1 parent e57003a commit e2e5ca2

File tree

1 file changed

+43
-65
lines changed

1 file changed

+43
-65
lines changed

drivers/media/platform/raspberrypi/hevc_dec/hevc_d_h265.c

Lines changed: 43 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,15 @@
2121
#include "hevc_d_hw.h"
2222
#include "hevc_d_video.h"
2323

24+
/* Maximum length of command buffer before we rate it an error */
25+
#define CMD_BUFFER_SIZE_MAX 0x100000
26+
27+
/* Initial size of command FIFO in commands.
28+
* The FIFO will be extended if this value is exceeded but 8192 seems to
29+
* deal with all streams found in the wild.
30+
*/
31+
#define CMD_BUFFER_SIZE_INIT 8192
32+
2433
enum hevc_slice_type {
2534
HEVC_SLICE_B = 0,
2635
HEVC_SLICE_P = 1,
@@ -92,6 +101,24 @@ static int gptr_realloc_new(struct hevc_d_dev * const dev,
92101
return 0;
93102
}
94103

104+
/* Realloc with copy */
105+
static int gptr_realloc_copy(struct hevc_d_dev * const dev,
106+
struct hevc_d_gptr * const gptr, size_t newsize)
107+
{
108+
struct hevc_d_gptr gnew;
109+
110+
if (newsize <= gptr->size)
111+
return 0;
112+
113+
if (gptr_alloc(dev, &gnew, newsize, gptr->attrs))
114+
return -ENOMEM;
115+
116+
memcpy(gnew.ptr, gptr->ptr, gptr->size);
117+
gptr_free(dev, gptr);
118+
*gptr = gnew;
119+
return 0;
120+
}
121+
95122
static size_t next_size(const size_t x)
96123
{
97124
return hevc_d_round_up_size(x + 1);
@@ -106,11 +133,6 @@ static size_t next_size(const size_t x)
106133

107134
#define HEVC_MAX_REFS V4L2_HEVC_DPB_ENTRIES_NUM_MAX
108135

109-
struct rpi_cmd {
110-
u32 addr;
111-
u32 data;
112-
} __packed;
113-
114136
struct hevc_d_q_aux {
115137
unsigned int refcount;
116138
unsigned int q_index;
@@ -133,8 +155,9 @@ struct hevc_d_dec_env {
133155
unsigned int decode_order;
134156
int p1_status; /* P1 status - what to realloc */
135157

136-
struct rpi_cmd *cmd_fifo;
137-
unsigned int cmd_len, cmd_max;
158+
struct hevc_d_gptr cmd;
159+
unsigned int cmd_len;
160+
unsigned int cmd_max;
138161
unsigned int num_slice_msgs;
139162
unsigned int pic_width_in_ctbs_y;
140163
unsigned int pic_height_in_ctbs_y;
@@ -167,9 +190,6 @@ struct hevc_d_dec_env {
167190
struct hevc_d_q_aux *frame_aux;
168191
struct hevc_d_q_aux *col_aux;
169192

170-
dma_addr_t cmd_addr;
171-
size_t cmd_size;
172-
173193
dma_addr_t pu_base_vc;
174194
dma_addr_t coeff_base_vc;
175195
u32 pu_stride;
@@ -234,23 +254,20 @@ static inline int clip_int(const int x, const int lo, const int hi)
234254
/* Phase 1 command and bit FIFOs */
235255
static int cmds_check_space(struct hevc_d_dec_env *const de, unsigned int n)
236256
{
237-
struct rpi_cmd *a;
238257
unsigned int newmax;
239258

240-
if (n > 0x100000) {
241-
v4l2_err(&de->ctx->dev->v4l2_dev,
242-
"%s: n %u implausible\n", __func__, n);
243-
return -ENOMEM;
244-
}
245-
246259
if (de->cmd_len + n <= de->cmd_max)
247260
return 0;
248261

249262
newmax = roundup_pow_of_two(de->cmd_len + n);
263+
if (newmax > CMD_BUFFER_SIZE_MAX) {
264+
v4l2_err(&de->ctx->dev->v4l2_dev,
265+
"%s: n %u implausible\n", __func__, newmax);
266+
return -ENOMEM;
267+
}
250268

251-
a = krealloc(de->cmd_fifo, newmax * sizeof(struct rpi_cmd),
252-
GFP_KERNEL);
253-
if (!a) {
269+
if (gptr_realloc_copy(de->ctx->dev, &de->cmd, newmax * sizeof(u64)))
270+
{
254271
v4l2_err(&de->ctx->dev->v4l2_dev,
255272
"Failed cmd buffer realloc from %u to %u\n",
256273
de->cmd_max, newmax);
@@ -259,7 +276,6 @@ static int cmds_check_space(struct hevc_d_dec_env *const de, unsigned int n)
259276
v4l2_info(&de->ctx->dev->v4l2_dev,
260277
"cmd buffer realloc from %u to %u\n", de->cmd_max, newmax);
261278

262-
de->cmd_fifo = a;
263279
de->cmd_max = newmax;
264280
return 0;
265281
}
@@ -268,15 +284,7 @@ static int cmds_check_space(struct hevc_d_dec_env *const de, unsigned int n)
268284
static void p1_apb_write(struct hevc_d_dec_env *const de, const u16 addr,
269285
const u32 data)
270286
{
271-
if (de->cmd_len >= de->cmd_max) {
272-
v4l2_err(&de->ctx->dev->v4l2_dev,
273-
"%s: Overflow @ %d\n", __func__, de->cmd_len);
274-
return;
275-
}
276-
277-
de->cmd_fifo[de->cmd_len].addr = addr;
278-
de->cmd_fifo[de->cmd_len].data = data;
279-
287+
WRITE_ONCE(((u64*)de->cmd.ptr)[de->cmd_len], addr | ((u64)data << 32));
280288
de->cmd_len++;
281289
}
282290

@@ -1411,24 +1419,6 @@ static int updated_ps(struct hevc_d_dec_state *const s)
14111419
return -ENOMEM;
14121420
}
14131421

1414-
static int write_cmd_buffer(struct hevc_d_dev *const dev,
1415-
struct hevc_d_dec_env *const de,
1416-
const struct hevc_d_dec_state *const s)
1417-
{
1418-
const size_t cmd_size = ALIGN(de->cmd_len * sizeof(de->cmd_fifo[0]),
1419-
dev->cache_align);
1420-
1421-
de->cmd_addr = dma_map_single(dev->dev, de->cmd_fifo,
1422-
cmd_size, DMA_TO_DEVICE);
1423-
if (dma_mapping_error(dev->dev, de->cmd_addr)) {
1424-
v4l2_err(&dev->v4l2_dev,
1425-
"Map cmd buffer (%zu): FAILED\n", cmd_size);
1426-
return -ENOMEM;
1427-
}
1428-
de->cmd_size = cmd_size;
1429-
return 0;
1430-
}
1431-
14321422
static void setup_colmv(struct hevc_d_ctx *const ctx, struct hevc_d_run *run,
14331423
struct hevc_d_dec_state *const s)
14341424
{
@@ -1461,12 +1451,6 @@ static void dec_env_delete(struct hevc_d_dec_env *const de)
14611451
struct hevc_d_ctx * const ctx = de->ctx;
14621452
unsigned long lock_flags;
14631453

1464-
if (de->cmd_size) {
1465-
dma_unmap_single(ctx->dev->dev, de->cmd_addr, de->cmd_size,
1466-
DMA_TO_DEVICE);
1467-
de->cmd_size = 0;
1468-
}
1469-
14701454
aux_q_release(ctx, &de->frame_aux);
14711455
aux_q_release(ctx, &de->col_aux);
14721456

@@ -1486,8 +1470,7 @@ static void dec_env_uninit(struct hevc_d_ctx *const ctx)
14861470
if (ctx->dec_pool) {
14871471
for (i = 0; i != HEVC_D_DEC_ENV_COUNT; ++i) {
14881472
struct hevc_d_dec_env *const de = ctx->dec_pool + i;
1489-
1490-
kfree(de->cmd_fifo);
1473+
gptr_free(ctx->dev, &de->cmd);
14911474
}
14921475

14931476
kfree(ctx->dec_pool);
@@ -1517,11 +1500,9 @@ static int dec_env_init(struct hevc_d_ctx *const ctx)
15171500

15181501
de->ctx = ctx;
15191502
de->decode_order = i;
1520-
de->cmd_max = 8096;
1521-
de->cmd_fifo = kmalloc_array(de->cmd_max,
1522-
sizeof(struct rpi_cmd),
1523-
GFP_KERNEL);
1524-
if (!de->cmd_fifo)
1503+
de->cmd_max = CMD_BUFFER_SIZE_INIT;
1504+
if (gptr_alloc(ctx->dev, &de->cmd,
1505+
de->cmd_max * sizeof(u64), 0))
15251506
goto fail;
15261507
}
15271508

@@ -1871,9 +1852,6 @@ void hevc_d_h265_setup(struct hevc_d_ctx *ctx, struct hevc_d_run *run)
18711852
goto fail;
18721853
}
18731854

1874-
if (write_cmd_buffer(dev, de, s))
1875-
goto fail;
1876-
18771855
for (i = 0; i < dec->num_active_dpb_entries; ++i) {
18781856
struct vb2_buffer *buf = vb2_find_buffer(vq, dec->dpb[i].timestamp);
18791857

@@ -2205,7 +2183,7 @@ static void phase1_claimed(struct hevc_d_dev *const dev, void *v)
22052183
hevc_d_hw_irq_active1_irq(dev, &de->irq_ent, phase1_cb, de);
22062184

22072185
/* Start the h/w */
2208-
apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd_addr);
2186+
apb_write_vc_addr_final(dev, RPI_CFBASE, de->cmd.addr);
22092187

22102188
return;
22112189

0 commit comments

Comments
 (0)