1/*
2 * Coda multi-standard codec IP - BIT processor functions
3 *
4 * Copyright (C) 2012 Vista Silicon S.L.
5 *    Javier Martin, <javier.martin@vista-silicon.com>
6 *    Xavier Duret
7 * Copyright (C) 2012-2014 Philipp Zabel, Pengutronix
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 */
14
15#include <linux/clk.h>
16#include <linux/irqreturn.h>
17#include <linux/kernel.h>
18#include <linux/log2.h>
19#include <linux/platform_device.h>
20#include <linux/reset.h>
21#include <linux/slab.h>
22#include <linux/videodev2.h>
23
24#include <media/v4l2-common.h>
25#include <media/v4l2-ctrls.h>
26#include <media/v4l2-fh.h>
27#include <media/v4l2-mem2mem.h>
28#include <media/videobuf2-v4l2.h>
29#include <media/videobuf2-dma-contig.h>
30#include <media/videobuf2-vmalloc.h>
31
32#include "coda.h"
33#define CREATE_TRACE_POINTS
34#include "trace.h"
35
36#define CODA_PARA_BUF_SIZE	(10 * 1024)
37#define CODA7_PS_BUF_SIZE	0x28000
38#define CODA9_PS_SAVE_SIZE	(512 * 1024)
39
40#define CODA_DEFAULT_GAMMA	4096
41#define CODA9_DEFAULT_GAMMA	24576	/* 0.75 * 32768 */
42
43static void coda_free_bitstream_buffer(struct coda_ctx *ctx);
44
45static inline int coda_is_initialized(struct coda_dev *dev)
46{
47	return coda_read(dev, CODA_REG_BIT_CUR_PC) != 0;
48}
49
50static inline unsigned long coda_isbusy(struct coda_dev *dev)
51{
52	return coda_read(dev, CODA_REG_BIT_BUSY);
53}
54
55static int coda_wait_timeout(struct coda_dev *dev)
56{
57	unsigned long timeout = jiffies + msecs_to_jiffies(1000);
58
59	while (coda_isbusy(dev)) {
60		if (time_after(jiffies, timeout))
61			return -ETIMEDOUT;
62	}
63	return 0;
64}
65
66static void coda_command_async(struct coda_ctx *ctx, int cmd)
67{
68	struct coda_dev *dev = ctx->dev;
69
70	if (dev->devtype->product == CODA_960 ||
71	    dev->devtype->product == CODA_7541) {
72		/* Restore context related registers to CODA */
73		coda_write(dev, ctx->bit_stream_param,
74				CODA_REG_BIT_BIT_STREAM_PARAM);
75		coda_write(dev, ctx->frm_dis_flg,
76				CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
77		coda_write(dev, ctx->frame_mem_ctrl,
78				CODA_REG_BIT_FRAME_MEM_CTRL);
79		coda_write(dev, ctx->workbuf.paddr, CODA_REG_BIT_WORK_BUF_ADDR);
80	}
81
82	if (dev->devtype->product == CODA_960) {
83		coda_write(dev, 1, CODA9_GDI_WPROT_ERR_CLR);
84		coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
85	}
86
87	coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
88
89	coda_write(dev, ctx->idx, CODA_REG_BIT_RUN_INDEX);
90	coda_write(dev, ctx->params.codec_mode, CODA_REG_BIT_RUN_COD_STD);
91	coda_write(dev, ctx->params.codec_mode_aux, CODA7_REG_BIT_RUN_AUX_STD);
92
93	trace_coda_bit_run(ctx, cmd);
94
95	coda_write(dev, cmd, CODA_REG_BIT_RUN_COMMAND);
96}
97
98static int coda_command_sync(struct coda_ctx *ctx, int cmd)
99{
100	struct coda_dev *dev = ctx->dev;
101	int ret;
102
103	coda_command_async(ctx, cmd);
104	ret = coda_wait_timeout(dev);
105	trace_coda_bit_done(ctx);
106
107	return ret;
108}
109
110int coda_hw_reset(struct coda_ctx *ctx)
111{
112	struct coda_dev *dev = ctx->dev;
113	unsigned long timeout;
114	unsigned int idx;
115	int ret;
116
117	if (!dev->rstc)
118		return -ENOENT;
119
120	idx = coda_read(dev, CODA_REG_BIT_RUN_INDEX);
121
122	if (dev->devtype->product == CODA_960) {
123		timeout = jiffies + msecs_to_jiffies(100);
124		coda_write(dev, 0x11, CODA9_GDI_BUS_CTRL);
125		while (coda_read(dev, CODA9_GDI_BUS_STATUS) != 0x77) {
126			if (time_after(jiffies, timeout))
127				return -ETIME;
128			cpu_relax();
129		}
130	}
131
132	ret = reset_control_reset(dev->rstc);
133	if (ret < 0)
134		return ret;
135
136	if (dev->devtype->product == CODA_960)
137		coda_write(dev, 0x00, CODA9_GDI_BUS_CTRL);
138	coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
139	coda_write(dev, CODA_REG_RUN_ENABLE, CODA_REG_BIT_CODE_RUN);
140	ret = coda_wait_timeout(dev);
141	coda_write(dev, idx, CODA_REG_BIT_RUN_INDEX);
142
143	return ret;
144}
145
146static void coda_kfifo_sync_from_device(struct coda_ctx *ctx)
147{
148	struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
149	struct coda_dev *dev = ctx->dev;
150	u32 rd_ptr;
151
152	rd_ptr = coda_read(dev, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
153	kfifo->out = (kfifo->in & ~kfifo->mask) |
154		      (rd_ptr - ctx->bitstream.paddr);
155	if (kfifo->out > kfifo->in)
156		kfifo->out -= kfifo->mask + 1;
157}
158
159static void coda_kfifo_sync_to_device_full(struct coda_ctx *ctx)
160{
161	struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
162	struct coda_dev *dev = ctx->dev;
163	u32 rd_ptr, wr_ptr;
164
165	rd_ptr = ctx->bitstream.paddr + (kfifo->out & kfifo->mask);
166	coda_write(dev, rd_ptr, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
167	wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
168	coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
169}
170
171static void coda_kfifo_sync_to_device_write(struct coda_ctx *ctx)
172{
173	struct __kfifo *kfifo = &ctx->bitstream_fifo.kfifo;
174	struct coda_dev *dev = ctx->dev;
175	u32 wr_ptr;
176
177	wr_ptr = ctx->bitstream.paddr + (kfifo->in & kfifo->mask);
178	coda_write(dev, wr_ptr, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
179}
180
181static int coda_bitstream_queue(struct coda_ctx *ctx,
182				struct vb2_v4l2_buffer *src_buf)
183{
184	u32 src_size = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
185	u32 n;
186
187	n = kfifo_in(&ctx->bitstream_fifo,
188			vb2_plane_vaddr(&src_buf->vb2_buf, 0), src_size);
189	if (n < src_size)
190		return -ENOSPC;
191
192	src_buf->sequence = ctx->qsequence++;
193
194	return 0;
195}
196
197static bool coda_bitstream_try_queue(struct coda_ctx *ctx,
198				     struct vb2_v4l2_buffer *src_buf)
199{
200	int ret;
201
202	if (coda_get_bitstream_payload(ctx) +
203	    vb2_get_plane_payload(&src_buf->vb2_buf, 0) + 512 >=
204	    ctx->bitstream.size)
205		return false;
206
207	if (vb2_plane_vaddr(&src_buf->vb2_buf, 0) == NULL) {
208		v4l2_err(&ctx->dev->v4l2_dev, "trying to queue empty buffer\n");
209		return true;
210	}
211
212	ret = coda_bitstream_queue(ctx, src_buf);
213	if (ret < 0) {
214		v4l2_err(&ctx->dev->v4l2_dev, "bitstream buffer overflow\n");
215		return false;
216	}
217	/* Sync read pointer to device */
218	if (ctx == v4l2_m2m_get_curr_priv(ctx->dev->m2m_dev))
219		coda_kfifo_sync_to_device_write(ctx);
220
221	ctx->hold = false;
222
223	return true;
224}
225
226void coda_fill_bitstream(struct coda_ctx *ctx, bool streaming)
227{
228	struct vb2_v4l2_buffer *src_buf;
229	struct coda_buffer_meta *meta;
230	unsigned long flags;
231	u32 start;
232
233	if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG)
234		return;
235
236	while (v4l2_m2m_num_src_bufs_ready(ctx->fh.m2m_ctx) > 0) {
237		/*
238		 * Only queue a single JPEG into the bitstream buffer, except
239		 * to increase payload over 512 bytes or if in hold state.
240		 */
241		if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
242		    (coda_get_bitstream_payload(ctx) >= 512) && !ctx->hold)
243			break;
244
245		src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
246
247		/* Drop frames that do not start/end with a SOI/EOI markers */
248		if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG &&
249		    !coda_jpeg_check_buffer(ctx, src_buf)) {
250			v4l2_err(&ctx->dev->v4l2_dev,
251				 "dropping invalid JPEG frame %d\n",
252				 ctx->qsequence);
253			src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
254			v4l2_m2m_buf_done(src_buf, streaming ?
255					  VB2_BUF_STATE_ERROR :
256					  VB2_BUF_STATE_QUEUED);
257			continue;
258		}
259
260		/* Dump empty buffers */
261		if (!vb2_get_plane_payload(&src_buf->vb2_buf, 0)) {
262			src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
263			v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
264			continue;
265		}
266
267		/* Buffer start position */
268		start = ctx->bitstream_fifo.kfifo.in &
269			ctx->bitstream_fifo.kfifo.mask;
270
271		if (coda_bitstream_try_queue(ctx, src_buf)) {
272			/*
273			 * Source buffer is queued in the bitstream ringbuffer;
274			 * queue the timestamp and mark source buffer as done
275			 */
276			src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
277
278			meta = kmalloc(sizeof(*meta), GFP_KERNEL);
279			if (meta) {
280				meta->sequence = src_buf->sequence;
281				meta->timecode = src_buf->timecode;
282				meta->timestamp = src_buf->timestamp;
283				meta->start = start;
284				meta->end = ctx->bitstream_fifo.kfifo.in &
285					    ctx->bitstream_fifo.kfifo.mask;
286				spin_lock_irqsave(&ctx->buffer_meta_lock,
287						  flags);
288				list_add_tail(&meta->list,
289					      &ctx->buffer_meta_list);
290				ctx->num_metas++;
291				spin_unlock_irqrestore(&ctx->buffer_meta_lock,
292						       flags);
293
294				trace_coda_bit_queue(ctx, src_buf, meta);
295			}
296
297			v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
298		} else {
299			break;
300		}
301	}
302}
303
304void coda_bit_stream_end_flag(struct coda_ctx *ctx)
305{
306	struct coda_dev *dev = ctx->dev;
307
308	ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
309
310	/* If this context is currently running, update the hardware flag */
311	if ((dev->devtype->product == CODA_960) &&
312	    coda_isbusy(dev) &&
313	    (ctx->idx == coda_read(dev, CODA_REG_BIT_RUN_INDEX))) {
314		coda_write(dev, ctx->bit_stream_param,
315			   CODA_REG_BIT_BIT_STREAM_PARAM);
316	}
317}
318
319static void coda_parabuf_write(struct coda_ctx *ctx, int index, u32 value)
320{
321	struct coda_dev *dev = ctx->dev;
322	u32 *p = ctx->parabuf.vaddr;
323
324	if (dev->devtype->product == CODA_DX6)
325		p[index] = value;
326	else
327		p[index ^ 1] = value;
328}
329
330static inline int coda_alloc_context_buf(struct coda_ctx *ctx,
331					 struct coda_aux_buf *buf, size_t size,
332					 const char *name)
333{
334	return coda_alloc_aux_buf(ctx->dev, buf, size, name, ctx->debugfs_entry);
335}
336
337
338static void coda_free_framebuffers(struct coda_ctx *ctx)
339{
340	int i;
341
342	for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++)
343		coda_free_aux_buf(ctx->dev, &ctx->internal_frames[i]);
344}
345
346static int coda_alloc_framebuffers(struct coda_ctx *ctx,
347				   struct coda_q_data *q_data, u32 fourcc)
348{
349	struct coda_dev *dev = ctx->dev;
350	int width, height;
351	int ysize;
352	int ret;
353	int i;
354
355	if (ctx->codec && (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 ||
356	     ctx->codec->dst_fourcc == V4L2_PIX_FMT_H264)) {
357		width = round_up(q_data->width, 16);
358		height = round_up(q_data->height, 16);
359	} else {
360		width = round_up(q_data->width, 8);
361		height = q_data->height;
362	}
363	ysize = width * height;
364
365	/* Allocate frame buffers */
366	for (i = 0; i < ctx->num_internal_frames; i++) {
367		size_t size;
368		char *name;
369
370		if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
371			size = round_up(ysize, 4096) + ysize / 2;
372		else
373			size = ysize + ysize / 2;
374		if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
375		    dev->devtype->product != CODA_DX6)
376			size += ysize / 4;
377		name = kasprintf(GFP_KERNEL, "fb%d", i);
378		ret = coda_alloc_context_buf(ctx, &ctx->internal_frames[i],
379					     size, name);
380		kfree(name);
381		if (ret < 0) {
382			coda_free_framebuffers(ctx);
383			return ret;
384		}
385	}
386
387	/* Register frame buffers in the parameter buffer */
388	for (i = 0; i < ctx->num_internal_frames; i++) {
389		u32 y, cb, cr;
390
391		/* Start addresses of Y, Cb, Cr planes */
392		y = ctx->internal_frames[i].paddr;
393		cb = y + ysize;
394		cr = y + ysize + ysize/4;
395		if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP) {
396			cb = round_up(cb, 4096);
397			cr = 0;
398			/* Packed 20-bit MSB of base addresses */
399			/* YYYYYCCC, CCyyyyyc, cccc.... */
400			y = (y & 0xfffff000) | cb >> 20;
401			cb = (cb & 0x000ff000) << 12;
402		}
403		coda_parabuf_write(ctx, i * 3 + 0, y);
404		coda_parabuf_write(ctx, i * 3 + 1, cb);
405		coda_parabuf_write(ctx, i * 3 + 2, cr);
406
407		/* mvcol buffer for h.264 */
408		if (ctx->codec->src_fourcc == V4L2_PIX_FMT_H264 &&
409		    dev->devtype->product != CODA_DX6)
410			coda_parabuf_write(ctx, 96 + i,
411					   ctx->internal_frames[i].paddr +
412					   ysize + ysize/4 + ysize/4);
413	}
414
415	/* mvcol buffer for mpeg4 */
416	if ((dev->devtype->product != CODA_DX6) &&
417	    (ctx->codec->src_fourcc == V4L2_PIX_FMT_MPEG4))
418		coda_parabuf_write(ctx, 97, ctx->internal_frames[0].paddr +
419					    ysize + ysize/4 + ysize/4);
420
421	return 0;
422}
423
424static void coda_free_context_buffers(struct coda_ctx *ctx)
425{
426	struct coda_dev *dev = ctx->dev;
427
428	coda_free_aux_buf(dev, &ctx->slicebuf);
429	coda_free_aux_buf(dev, &ctx->psbuf);
430	if (dev->devtype->product != CODA_DX6)
431		coda_free_aux_buf(dev, &ctx->workbuf);
432	coda_free_aux_buf(dev, &ctx->parabuf);
433}
434
435static int coda_alloc_context_buffers(struct coda_ctx *ctx,
436				      struct coda_q_data *q_data)
437{
438	struct coda_dev *dev = ctx->dev;
439	size_t size;
440	int ret;
441
442	if (!ctx->parabuf.vaddr) {
443		ret = coda_alloc_context_buf(ctx, &ctx->parabuf,
444					     CODA_PARA_BUF_SIZE, "parabuf");
445		if (ret < 0)
446			return ret;
447	}
448
449	if (dev->devtype->product == CODA_DX6)
450		return 0;
451
452	if (!ctx->slicebuf.vaddr && q_data->fourcc == V4L2_PIX_FMT_H264) {
453		/* worst case slice size */
454		size = (DIV_ROUND_UP(q_data->width, 16) *
455			DIV_ROUND_UP(q_data->height, 16)) * 3200 / 8 + 512;
456		ret = coda_alloc_context_buf(ctx, &ctx->slicebuf, size,
457					     "slicebuf");
458		if (ret < 0)
459			goto err;
460	}
461
462	if (!ctx->psbuf.vaddr && dev->devtype->product == CODA_7541) {
463		ret = coda_alloc_context_buf(ctx, &ctx->psbuf,
464					     CODA7_PS_BUF_SIZE, "psbuf");
465		if (ret < 0)
466			goto err;
467	}
468
469	if (!ctx->workbuf.vaddr) {
470		size = dev->devtype->workbuf_size;
471		if (dev->devtype->product == CODA_960 &&
472		    q_data->fourcc == V4L2_PIX_FMT_H264)
473			size += CODA9_PS_SAVE_SIZE;
474		ret = coda_alloc_context_buf(ctx, &ctx->workbuf, size,
475					     "workbuf");
476		if (ret < 0)
477			goto err;
478	}
479
480	return 0;
481
482err:
483	coda_free_context_buffers(ctx);
484	return ret;
485}
486
487static int coda_encode_header(struct coda_ctx *ctx, struct vb2_v4l2_buffer *buf,
488			      int header_code, u8 *header, int *size)
489{
490	struct vb2_buffer *vb = &buf->vb2_buf;
491	struct coda_dev *dev = ctx->dev;
492	size_t bufsize;
493	int ret;
494	int i;
495
496	if (dev->devtype->product == CODA_960)
497		memset(vb2_plane_vaddr(vb, 0), 0, 64);
498
499	coda_write(dev, vb2_dma_contig_plane_dma_addr(vb, 0),
500		   CODA_CMD_ENC_HEADER_BB_START);
501	bufsize = vb2_plane_size(vb, 0);
502	if (dev->devtype->product == CODA_960)
503		bufsize /= 1024;
504	coda_write(dev, bufsize, CODA_CMD_ENC_HEADER_BB_SIZE);
505	coda_write(dev, header_code, CODA_CMD_ENC_HEADER_CODE);
506	ret = coda_command_sync(ctx, CODA_COMMAND_ENCODE_HEADER);
507	if (ret < 0) {
508		v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_ENCODE_HEADER timeout\n");
509		return ret;
510	}
511
512	if (dev->devtype->product == CODA_960) {
513		for (i = 63; i > 0; i--)
514			if (((char *)vb2_plane_vaddr(vb, 0))[i] != 0)
515				break;
516		*size = i + 1;
517	} else {
518		*size = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx)) -
519			coda_read(dev, CODA_CMD_ENC_HEADER_BB_START);
520	}
521	memcpy(header, vb2_plane_vaddr(vb, 0), *size);
522
523	return 0;
524}
525
526static phys_addr_t coda_iram_alloc(struct coda_iram_info *iram, size_t size)
527{
528	phys_addr_t ret;
529
530	size = round_up(size, 1024);
531	if (size > iram->remaining)
532		return 0;
533	iram->remaining -= size;
534
535	ret = iram->next_paddr;
536	iram->next_paddr += size;
537
538	return ret;
539}
540
541static void coda_setup_iram(struct coda_ctx *ctx)
542{
543	struct coda_iram_info *iram_info = &ctx->iram_info;
544	struct coda_dev *dev = ctx->dev;
545	int w64, w128;
546	int mb_width;
547	int dbk_bits;
548	int bit_bits;
549	int ip_bits;
550
551	memset(iram_info, 0, sizeof(*iram_info));
552	iram_info->next_paddr = dev->iram.paddr;
553	iram_info->remaining = dev->iram.size;
554
555	if (!dev->iram.vaddr)
556		return;
557
558	switch (dev->devtype->product) {
559	case CODA_7541:
560		dbk_bits = CODA7_USE_HOST_DBK_ENABLE | CODA7_USE_DBK_ENABLE;
561		bit_bits = CODA7_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
562		ip_bits = CODA7_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
563		break;
564	case CODA_960:
565		dbk_bits = CODA9_USE_HOST_DBK_ENABLE | CODA9_USE_DBK_ENABLE;
566		bit_bits = CODA9_USE_HOST_BIT_ENABLE | CODA7_USE_BIT_ENABLE;
567		ip_bits = CODA9_USE_HOST_IP_ENABLE | CODA7_USE_IP_ENABLE;
568		break;
569	default: /* CODA_DX6 */
570		return;
571	}
572
573	if (ctx->inst_type == CODA_INST_ENCODER) {
574		struct coda_q_data *q_data_src;
575
576		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
577		mb_width = DIV_ROUND_UP(q_data_src->width, 16);
578		w128 = mb_width * 128;
579		w64 = mb_width * 64;
580
581		/* Prioritize in case IRAM is too small for everything */
582		if (dev->devtype->product == CODA_7541) {
583			iram_info->search_ram_size = round_up(mb_width * 16 *
584							      36 + 2048, 1024);
585			iram_info->search_ram_paddr = coda_iram_alloc(iram_info,
586						iram_info->search_ram_size);
587			if (!iram_info->search_ram_paddr) {
588				pr_err("IRAM is smaller than the search ram size\n");
589				goto out;
590			}
591			iram_info->axi_sram_use |= CODA7_USE_HOST_ME_ENABLE |
592						   CODA7_USE_ME_ENABLE;
593		}
594
595		/* Only H.264BP and H.263P3 are considered */
596		iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w64);
597		iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w64);
598		if (!iram_info->buf_dbk_c_use)
599			goto out;
600		iram_info->axi_sram_use |= dbk_bits;
601
602		iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
603		if (!iram_info->buf_bit_use)
604			goto out;
605		iram_info->axi_sram_use |= bit_bits;
606
607		iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
608		if (!iram_info->buf_ip_ac_dc_use)
609			goto out;
610		iram_info->axi_sram_use |= ip_bits;
611
612		/* OVL and BTP disabled for encoder */
613	} else if (ctx->inst_type == CODA_INST_DECODER) {
614		struct coda_q_data *q_data_dst;
615
616		q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
617		mb_width = DIV_ROUND_UP(q_data_dst->width, 16);
618		w128 = mb_width * 128;
619
620		iram_info->buf_dbk_y_use = coda_iram_alloc(iram_info, w128);
621		iram_info->buf_dbk_c_use = coda_iram_alloc(iram_info, w128);
622		if (!iram_info->buf_dbk_c_use)
623			goto out;
624		iram_info->axi_sram_use |= dbk_bits;
625
626		iram_info->buf_bit_use = coda_iram_alloc(iram_info, w128);
627		if (!iram_info->buf_bit_use)
628			goto out;
629		iram_info->axi_sram_use |= bit_bits;
630
631		iram_info->buf_ip_ac_dc_use = coda_iram_alloc(iram_info, w128);
632		if (!iram_info->buf_ip_ac_dc_use)
633			goto out;
634		iram_info->axi_sram_use |= ip_bits;
635
636		/* OVL and BTP unused as there is no VC1 support yet */
637	}
638
639out:
640	if (!(iram_info->axi_sram_use & CODA7_USE_HOST_IP_ENABLE))
641		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
642			 "IRAM smaller than needed\n");
643
644	if (dev->devtype->product == CODA_7541) {
645		/* TODO - Enabling these causes picture errors on CODA7541 */
646		if (ctx->inst_type == CODA_INST_DECODER) {
647			/* fw 1.4.50 */
648			iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
649						     CODA7_USE_IP_ENABLE);
650		} else {
651			/* fw 13.4.29 */
652			iram_info->axi_sram_use &= ~(CODA7_USE_HOST_IP_ENABLE |
653						     CODA7_USE_HOST_DBK_ENABLE |
654						     CODA7_USE_IP_ENABLE |
655						     CODA7_USE_DBK_ENABLE);
656		}
657	}
658}
659
660static u32 coda_supported_firmwares[] = {
661	CODA_FIRMWARE_VERNUM(CODA_DX6, 2, 2, 5),
662	CODA_FIRMWARE_VERNUM(CODA_7541, 1, 4, 50),
663	CODA_FIRMWARE_VERNUM(CODA_960, 2, 1, 5),
664};
665
666static bool coda_firmware_supported(u32 vernum)
667{
668	int i;
669
670	for (i = 0; i < ARRAY_SIZE(coda_supported_firmwares); i++)
671		if (vernum == coda_supported_firmwares[i])
672			return true;
673	return false;
674}
675
676int coda_check_firmware(struct coda_dev *dev)
677{
678	u16 product, major, minor, release;
679	u32 data;
680	int ret;
681
682	ret = clk_prepare_enable(dev->clk_per);
683	if (ret)
684		goto err_clk_per;
685
686	ret = clk_prepare_enable(dev->clk_ahb);
687	if (ret)
688		goto err_clk_ahb;
689
690	coda_write(dev, 0, CODA_CMD_FIRMWARE_VERNUM);
691	coda_write(dev, CODA_REG_BIT_BUSY_FLAG, CODA_REG_BIT_BUSY);
692	coda_write(dev, 0, CODA_REG_BIT_RUN_INDEX);
693	coda_write(dev, 0, CODA_REG_BIT_RUN_COD_STD);
694	coda_write(dev, CODA_COMMAND_FIRMWARE_GET, CODA_REG_BIT_RUN_COMMAND);
695	if (coda_wait_timeout(dev)) {
696		v4l2_err(&dev->v4l2_dev, "firmware get command error\n");
697		ret = -EIO;
698		goto err_run_cmd;
699	}
700
701	if (dev->devtype->product == CODA_960) {
702		data = coda_read(dev, CODA9_CMD_FIRMWARE_CODE_REV);
703		v4l2_info(&dev->v4l2_dev, "Firmware code revision: %d\n",
704			  data);
705	}
706
707	/* Check we are compatible with the loaded firmware */
708	data = coda_read(dev, CODA_CMD_FIRMWARE_VERNUM);
709	product = CODA_FIRMWARE_PRODUCT(data);
710	major = CODA_FIRMWARE_MAJOR(data);
711	minor = CODA_FIRMWARE_MINOR(data);
712	release = CODA_FIRMWARE_RELEASE(data);
713
714	clk_disable_unprepare(dev->clk_per);
715	clk_disable_unprepare(dev->clk_ahb);
716
717	if (product != dev->devtype->product) {
718		v4l2_err(&dev->v4l2_dev,
719			 "Wrong firmware. Hw: %s, Fw: %s, Version: %u.%u.%u\n",
720			 coda_product_name(dev->devtype->product),
721			 coda_product_name(product), major, minor, release);
722		return -EINVAL;
723	}
724
725	v4l2_info(&dev->v4l2_dev, "Initialized %s.\n",
726		  coda_product_name(product));
727
728	if (coda_firmware_supported(data)) {
729		v4l2_info(&dev->v4l2_dev, "Firmware version: %u.%u.%u\n",
730			  major, minor, release);
731	} else {
732		v4l2_warn(&dev->v4l2_dev,
733			  "Unsupported firmware version: %u.%u.%u\n",
734			  major, minor, release);
735	}
736
737	return 0;
738
739err_run_cmd:
740	clk_disable_unprepare(dev->clk_ahb);
741err_clk_ahb:
742	clk_disable_unprepare(dev->clk_per);
743err_clk_per:
744	return ret;
745}
746
747static void coda9_set_frame_cache(struct coda_ctx *ctx, u32 fourcc)
748{
749	u32 cache_size, cache_config;
750
751	if (ctx->tiled_map_type == GDI_LINEAR_FRAME_MAP) {
752		/* Luma 2x0 page, 2x6 cache, chroma 2x0 page, 2x4 cache size */
753		cache_size = 0x20262024;
754		cache_config = 2 << CODA9_CACHE_PAGEMERGE_OFFSET;
755	} else {
756		/* Luma 0x2 page, 4x4 cache, chroma 0x2 page, 4x3 cache size */
757		cache_size = 0x02440243;
758		cache_config = 1 << CODA9_CACHE_PAGEMERGE_OFFSET;
759	}
760	coda_write(ctx->dev, cache_size, CODA9_CMD_SET_FRAME_CACHE_SIZE);
761	if (fourcc == V4L2_PIX_FMT_NV12) {
762		cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
763				16 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
764				0 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
765	} else {
766		cache_config |= 32 << CODA9_CACHE_LUMA_BUFFER_SIZE_OFFSET |
767				8 << CODA9_CACHE_CR_BUFFER_SIZE_OFFSET |
768				8 << CODA9_CACHE_CB_BUFFER_SIZE_OFFSET;
769	}
770	coda_write(ctx->dev, cache_config, CODA9_CMD_SET_FRAME_CACHE_CONFIG);
771}
772
773/*
774 * Encoder context operations
775 */
776
777static int coda_encoder_reqbufs(struct coda_ctx *ctx,
778				struct v4l2_requestbuffers *rb)
779{
780	struct coda_q_data *q_data_src;
781	int ret;
782
783	if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
784		return 0;
785
786	if (rb->count) {
787		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
788		ret = coda_alloc_context_buffers(ctx, q_data_src);
789		if (ret < 0)
790			return ret;
791	} else {
792		coda_free_context_buffers(ctx);
793	}
794
795	return 0;
796}
797
798static int coda_start_encoding(struct coda_ctx *ctx)
799{
800	struct coda_dev *dev = ctx->dev;
801	struct v4l2_device *v4l2_dev = &dev->v4l2_dev;
802	struct coda_q_data *q_data_src, *q_data_dst;
803	u32 bitstream_buf, bitstream_size;
804	struct vb2_v4l2_buffer *buf;
805	int gamma, ret, value;
806	u32 dst_fourcc;
807	int num_fb;
808	u32 stride;
809
810	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
811	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
812	dst_fourcc = q_data_dst->fourcc;
813
814	buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
815	bitstream_buf = vb2_dma_contig_plane_dma_addr(&buf->vb2_buf, 0);
816	bitstream_size = q_data_dst->sizeimage;
817
818	if (!coda_is_initialized(dev)) {
819		v4l2_err(v4l2_dev, "coda is not initialized.\n");
820		return -EFAULT;
821	}
822
823	if (dst_fourcc == V4L2_PIX_FMT_JPEG) {
824		if (!ctx->params.jpeg_qmat_tab[0])
825			ctx->params.jpeg_qmat_tab[0] = kmalloc(64, GFP_KERNEL);
826		if (!ctx->params.jpeg_qmat_tab[1])
827			ctx->params.jpeg_qmat_tab[1] = kmalloc(64, GFP_KERNEL);
828		coda_set_jpeg_compression_quality(ctx, ctx->params.jpeg_quality);
829	}
830
831	mutex_lock(&dev->coda_mutex);
832
833	coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
834	coda_write(dev, bitstream_buf, CODA_REG_BIT_RD_PTR(ctx->reg_idx));
835	coda_write(dev, bitstream_buf, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
836	switch (dev->devtype->product) {
837	case CODA_DX6:
838		coda_write(dev, CODADX6_STREAM_BUF_DYNALLOC_EN |
839			CODADX6_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
840		break;
841	case CODA_960:
842		coda_write(dev, 0, CODA9_GDI_WPROT_RGN_EN);
843		/* fallthrough */
844	case CODA_7541:
845		coda_write(dev, CODA7_STREAM_BUF_DYNALLOC_EN |
846			CODA7_STREAM_BUF_PIC_RESET, CODA_REG_BIT_STREAM_CTRL);
847		break;
848	}
849
850	ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
851				 CODA9_FRAME_TILED2LINEAR);
852	if (q_data_src->fourcc == V4L2_PIX_FMT_NV12)
853		ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
854	if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
855		ctx->frame_mem_ctrl |= (0x3 << 9) | CODA9_FRAME_TILED2LINEAR;
856	coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
857
858	if (dev->devtype->product == CODA_DX6) {
859		/* Configure the coda */
860		coda_write(dev, dev->iram.paddr,
861			   CODADX6_REG_BIT_SEARCH_RAM_BASE_ADDR);
862	}
863
864	/* Could set rotation here if needed */
865	value = 0;
866	switch (dev->devtype->product) {
867	case CODA_DX6:
868		value = (q_data_src->width & CODADX6_PICWIDTH_MASK)
869			<< CODADX6_PICWIDTH_OFFSET;
870		value |= (q_data_src->height & CODADX6_PICHEIGHT_MASK)
871			 << CODA_PICHEIGHT_OFFSET;
872		break;
873	case CODA_7541:
874		if (dst_fourcc == V4L2_PIX_FMT_H264) {
875			value = (round_up(q_data_src->width, 16) &
876				 CODA7_PICWIDTH_MASK) << CODA7_PICWIDTH_OFFSET;
877			value |= (round_up(q_data_src->height, 16) &
878				 CODA7_PICHEIGHT_MASK) << CODA_PICHEIGHT_OFFSET;
879			break;
880		}
881		/* fallthrough */
882	case CODA_960:
883		value = (q_data_src->width & CODA7_PICWIDTH_MASK)
884			<< CODA7_PICWIDTH_OFFSET;
885		value |= (q_data_src->height & CODA7_PICHEIGHT_MASK)
886			 << CODA_PICHEIGHT_OFFSET;
887	}
888	coda_write(dev, value, CODA_CMD_ENC_SEQ_SRC_SIZE);
889	if (dst_fourcc == V4L2_PIX_FMT_JPEG)
890		ctx->params.framerate = 0;
891	coda_write(dev, ctx->params.framerate,
892		   CODA_CMD_ENC_SEQ_SRC_F_RATE);
893
894	ctx->params.codec_mode = ctx->codec->mode;
895	switch (dst_fourcc) {
896	case V4L2_PIX_FMT_MPEG4:
897		if (dev->devtype->product == CODA_960)
898			coda_write(dev, CODA9_STD_MPEG4,
899				   CODA_CMD_ENC_SEQ_COD_STD);
900		else
901			coda_write(dev, CODA_STD_MPEG4,
902				   CODA_CMD_ENC_SEQ_COD_STD);
903		coda_write(dev, 0, CODA_CMD_ENC_SEQ_MP4_PARA);
904		break;
905	case V4L2_PIX_FMT_H264:
906		if (dev->devtype->product == CODA_960)
907			coda_write(dev, CODA9_STD_H264,
908				   CODA_CMD_ENC_SEQ_COD_STD);
909		else
910			coda_write(dev, CODA_STD_H264,
911				   CODA_CMD_ENC_SEQ_COD_STD);
912		if (ctx->params.h264_deblk_enabled) {
913			value = ((ctx->params.h264_deblk_alpha &
914				  CODA_264PARAM_DEBLKFILTEROFFSETALPHA_MASK) <<
915				 CODA_264PARAM_DEBLKFILTEROFFSETALPHA_OFFSET) |
916				((ctx->params.h264_deblk_beta &
917				  CODA_264PARAM_DEBLKFILTEROFFSETBETA_MASK) <<
918				 CODA_264PARAM_DEBLKFILTEROFFSETBETA_OFFSET);
919		} else {
920			value = 1 << CODA_264PARAM_DISABLEDEBLK_OFFSET;
921		}
922		coda_write(dev, value, CODA_CMD_ENC_SEQ_264_PARA);
923		break;
924	case V4L2_PIX_FMT_JPEG:
925		coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_PARA);
926		coda_write(dev, ctx->params.jpeg_restart_interval,
927				CODA_CMD_ENC_SEQ_JPG_RST_INTERVAL);
928		coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_EN);
929		coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_SIZE);
930		coda_write(dev, 0, CODA_CMD_ENC_SEQ_JPG_THUMB_OFFSET);
931
932		coda_jpeg_write_tables(ctx);
933		break;
934	default:
935		v4l2_err(v4l2_dev,
936			 "dst format (0x%08x) invalid.\n", dst_fourcc);
937		ret = -EINVAL;
938		goto out;
939	}
940
941	/*
942	 * slice mode and GOP size registers are used for thumb size/offset
943	 * in JPEG mode
944	 */
945	if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
946		switch (ctx->params.slice_mode) {
947		case V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE:
948			value = 0;
949			break;
950		case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_MB:
951			value  = (ctx->params.slice_max_mb &
952				  CODA_SLICING_SIZE_MASK)
953				 << CODA_SLICING_SIZE_OFFSET;
954			value |= (1 & CODA_SLICING_UNIT_MASK)
955				 << CODA_SLICING_UNIT_OFFSET;
956			value |=  1 & CODA_SLICING_MODE_MASK;
957			break;
958		case V4L2_MPEG_VIDEO_MULTI_SICE_MODE_MAX_BYTES:
959			value  = (ctx->params.slice_max_bits &
960				  CODA_SLICING_SIZE_MASK)
961				 << CODA_SLICING_SIZE_OFFSET;
962			value |= (0 & CODA_SLICING_UNIT_MASK)
963				 << CODA_SLICING_UNIT_OFFSET;
964			value |=  1 & CODA_SLICING_MODE_MASK;
965			break;
966		}
967		coda_write(dev, value, CODA_CMD_ENC_SEQ_SLICE_MODE);
968		value = ctx->params.gop_size & CODA_GOP_SIZE_MASK;
969		coda_write(dev, value, CODA_CMD_ENC_SEQ_GOP_SIZE);
970	}
971
972	if (ctx->params.bitrate) {
973		/* Rate control enabled */
974		value = (ctx->params.bitrate & CODA_RATECONTROL_BITRATE_MASK)
975			<< CODA_RATECONTROL_BITRATE_OFFSET;
976		value |=  1 & CODA_RATECONTROL_ENABLE_MASK;
977		value |= (ctx->params.vbv_delay &
978			  CODA_RATECONTROL_INITIALDELAY_MASK)
979			 << CODA_RATECONTROL_INITIALDELAY_OFFSET;
980		if (dev->devtype->product == CODA_960)
981			value |= BIT(31); /* disable autoskip */
982	} else {
983		value = 0;
984	}
985	coda_write(dev, value, CODA_CMD_ENC_SEQ_RC_PARA);
986
987	coda_write(dev, ctx->params.vbv_size, CODA_CMD_ENC_SEQ_RC_BUF_SIZE);
988	coda_write(dev, ctx->params.intra_refresh,
989		   CODA_CMD_ENC_SEQ_INTRA_REFRESH);
990
991	coda_write(dev, bitstream_buf, CODA_CMD_ENC_SEQ_BB_START);
992	coda_write(dev, bitstream_size / 1024, CODA_CMD_ENC_SEQ_BB_SIZE);
993
994
995	value = 0;
996	if (dev->devtype->product == CODA_960)
997		gamma = CODA9_DEFAULT_GAMMA;
998	else
999		gamma = CODA_DEFAULT_GAMMA;
1000	if (gamma > 0) {
1001		coda_write(dev, (gamma & CODA_GAMMA_MASK) << CODA_GAMMA_OFFSET,
1002			   CODA_CMD_ENC_SEQ_RC_GAMMA);
1003	}
1004
1005	if (ctx->params.h264_min_qp || ctx->params.h264_max_qp) {
1006		coda_write(dev,
1007			   ctx->params.h264_min_qp << CODA_QPMIN_OFFSET |
1008			   ctx->params.h264_max_qp << CODA_QPMAX_OFFSET,
1009			   CODA_CMD_ENC_SEQ_RC_QP_MIN_MAX);
1010	}
1011	if (dev->devtype->product == CODA_960) {
1012		if (ctx->params.h264_max_qp)
1013			value |= 1 << CODA9_OPTION_RCQPMAX_OFFSET;
1014		if (CODA_DEFAULT_GAMMA > 0)
1015			value |= 1 << CODA9_OPTION_GAMMA_OFFSET;
1016	} else {
1017		if (CODA_DEFAULT_GAMMA > 0) {
1018			if (dev->devtype->product == CODA_DX6)
1019				value |= 1 << CODADX6_OPTION_GAMMA_OFFSET;
1020			else
1021				value |= 1 << CODA7_OPTION_GAMMA_OFFSET;
1022		}
1023		if (ctx->params.h264_min_qp)
1024			value |= 1 << CODA7_OPTION_RCQPMIN_OFFSET;
1025		if (ctx->params.h264_max_qp)
1026			value |= 1 << CODA7_OPTION_RCQPMAX_OFFSET;
1027	}
1028	coda_write(dev, value, CODA_CMD_ENC_SEQ_OPTION);
1029
1030	coda_write(dev, 0, CODA_CMD_ENC_SEQ_RC_INTERVAL_MODE);
1031
1032	coda_setup_iram(ctx);
1033
1034	if (dst_fourcc == V4L2_PIX_FMT_H264) {
1035		switch (dev->devtype->product) {
1036		case CODA_DX6:
1037			value = FMO_SLICE_SAVE_BUF_SIZE << 7;
1038			coda_write(dev, value, CODADX6_CMD_ENC_SEQ_FMO);
1039			break;
1040		case CODA_7541:
1041			coda_write(dev, ctx->iram_info.search_ram_paddr,
1042					CODA7_CMD_ENC_SEQ_SEARCH_BASE);
1043			coda_write(dev, ctx->iram_info.search_ram_size,
1044					CODA7_CMD_ENC_SEQ_SEARCH_SIZE);
1045			break;
1046		case CODA_960:
1047			coda_write(dev, 0, CODA9_CMD_ENC_SEQ_ME_OPTION);
1048			coda_write(dev, 0, CODA9_CMD_ENC_SEQ_INTRA_WEIGHT);
1049		}
1050	}
1051
1052	ret = coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT);
1053	if (ret < 0) {
1054		v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1055		goto out;
1056	}
1057
1058	if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0) {
1059		v4l2_err(v4l2_dev, "CODA_COMMAND_SEQ_INIT failed\n");
1060		ret = -EFAULT;
1061		goto out;
1062	}
1063	ctx->initialized = 1;
1064
1065	if (dst_fourcc != V4L2_PIX_FMT_JPEG) {
1066		if (dev->devtype->product == CODA_960)
1067			ctx->num_internal_frames = 4;
1068		else
1069			ctx->num_internal_frames = 2;
1070		ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc);
1071		if (ret < 0) {
1072			v4l2_err(v4l2_dev, "failed to allocate framebuffers\n");
1073			goto out;
1074		}
1075		num_fb = 2;
1076		stride = q_data_src->bytesperline;
1077	} else {
1078		ctx->num_internal_frames = 0;
1079		num_fb = 0;
1080		stride = 0;
1081	}
1082	coda_write(dev, num_fb, CODA_CMD_SET_FRAME_BUF_NUM);
1083	coda_write(dev, stride, CODA_CMD_SET_FRAME_BUF_STRIDE);
1084
1085	if (dev->devtype->product == CODA_7541) {
1086		coda_write(dev, q_data_src->bytesperline,
1087				CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
1088	}
1089	if (dev->devtype->product != CODA_DX6) {
1090		coda_write(dev, ctx->iram_info.buf_bit_use,
1091				CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1092		coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1093				CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1094		coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1095				CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1096		coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1097				CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1098		coda_write(dev, ctx->iram_info.buf_ovl_use,
1099				CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1100		if (dev->devtype->product == CODA_960) {
1101			coda_write(dev, ctx->iram_info.buf_btp_use,
1102					CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1103
1104			coda9_set_frame_cache(ctx, q_data_src->fourcc);
1105
1106			/* FIXME */
1107			coda_write(dev, ctx->internal_frames[2].paddr,
1108				   CODA9_CMD_SET_FRAME_SUBSAMP_A);
1109			coda_write(dev, ctx->internal_frames[3].paddr,
1110				   CODA9_CMD_SET_FRAME_SUBSAMP_B);
1111		}
1112	}
1113
1114	ret = coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF);
1115	if (ret < 0) {
1116		v4l2_err(v4l2_dev, "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1117		goto out;
1118	}
1119
1120	/* Save stream headers */
1121	buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1122	switch (dst_fourcc) {
1123	case V4L2_PIX_FMT_H264:
1124		/*
1125		 * Get SPS in the first frame and copy it to an
1126		 * intermediate buffer.
1127		 */
1128		ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_SPS,
1129					 &ctx->vpu_header[0][0],
1130					 &ctx->vpu_header_size[0]);
1131		if (ret < 0)
1132			goto out;
1133
1134		/*
1135		 * Get PPS in the first frame and copy it to an
1136		 * intermediate buffer.
1137		 */
1138		ret = coda_encode_header(ctx, buf, CODA_HEADER_H264_PPS,
1139					 &ctx->vpu_header[1][0],
1140					 &ctx->vpu_header_size[1]);
1141		if (ret < 0)
1142			goto out;
1143
1144		/*
1145		 * Length of H.264 headers is variable and thus it might not be
1146		 * aligned for the coda to append the encoded frame. In that is
1147		 * the case a filler NAL must be added to header 2.
1148		 */
1149		ctx->vpu_header_size[2] = coda_h264_padding(
1150					(ctx->vpu_header_size[0] +
1151					 ctx->vpu_header_size[1]),
1152					 ctx->vpu_header[2]);
1153		break;
1154	case V4L2_PIX_FMT_MPEG4:
1155		/*
1156		 * Get VOS in the first frame and copy it to an
1157		 * intermediate buffer
1158		 */
1159		ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOS,
1160					 &ctx->vpu_header[0][0],
1161					 &ctx->vpu_header_size[0]);
1162		if (ret < 0)
1163			goto out;
1164
1165		ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VIS,
1166					 &ctx->vpu_header[1][0],
1167					 &ctx->vpu_header_size[1]);
1168		if (ret < 0)
1169			goto out;
1170
1171		ret = coda_encode_header(ctx, buf, CODA_HEADER_MP4V_VOL,
1172					 &ctx->vpu_header[2][0],
1173					 &ctx->vpu_header_size[2]);
1174		if (ret < 0)
1175			goto out;
1176		break;
1177	default:
1178		/* No more formats need to save headers at the moment */
1179		break;
1180	}
1181
1182out:
1183	mutex_unlock(&dev->coda_mutex);
1184	return ret;
1185}
1186
1187static int coda_prepare_encode(struct coda_ctx *ctx)
1188{
1189	struct coda_q_data *q_data_src, *q_data_dst;
1190	struct vb2_v4l2_buffer *src_buf, *dst_buf;
1191	struct coda_dev *dev = ctx->dev;
1192	int force_ipicture;
1193	int quant_param = 0;
1194	u32 pic_stream_buffer_addr, pic_stream_buffer_size;
1195	u32 rot_mode = 0;
1196	u32 dst_fourcc;
1197	u32 reg;
1198
1199	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
1200	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1201	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1202	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1203	dst_fourcc = q_data_dst->fourcc;
1204
1205	src_buf->sequence = ctx->osequence;
1206	dst_buf->sequence = ctx->osequence;
1207	ctx->osequence++;
1208
1209	/*
1210	 * Workaround coda firmware BUG that only marks the first
1211	 * frame as IDR. This is a problem for some decoders that can't
1212	 * recover when a frame is lost.
1213	 */
1214	if (src_buf->sequence % ctx->params.gop_size) {
1215		src_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1216		src_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1217	} else {
1218		src_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1219		src_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1220	}
1221
1222	if (dev->devtype->product == CODA_960)
1223		coda_set_gdi_regs(ctx);
1224
1225	/*
1226	 * Copy headers at the beginning of the first frame for H.264 only.
1227	 * In MPEG4 they are already copied by the coda.
1228	 */
1229	if (src_buf->sequence == 0) {
1230		pic_stream_buffer_addr =
1231			vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0) +
1232			ctx->vpu_header_size[0] +
1233			ctx->vpu_header_size[1] +
1234			ctx->vpu_header_size[2];
1235		pic_stream_buffer_size = q_data_dst->sizeimage -
1236			ctx->vpu_header_size[0] -
1237			ctx->vpu_header_size[1] -
1238			ctx->vpu_header_size[2];
1239		memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0),
1240		       &ctx->vpu_header[0][0], ctx->vpu_header_size[0]);
1241		memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1242			+ ctx->vpu_header_size[0], &ctx->vpu_header[1][0],
1243			ctx->vpu_header_size[1]);
1244		memcpy(vb2_plane_vaddr(&dst_buf->vb2_buf, 0)
1245			+ ctx->vpu_header_size[0] + ctx->vpu_header_size[1],
1246			&ctx->vpu_header[2][0], ctx->vpu_header_size[2]);
1247	} else {
1248		pic_stream_buffer_addr =
1249			vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
1250		pic_stream_buffer_size = q_data_dst->sizeimage;
1251	}
1252
1253	if (src_buf->flags & V4L2_BUF_FLAG_KEYFRAME) {
1254		force_ipicture = 1;
1255		switch (dst_fourcc) {
1256		case V4L2_PIX_FMT_H264:
1257			quant_param = ctx->params.h264_intra_qp;
1258			break;
1259		case V4L2_PIX_FMT_MPEG4:
1260			quant_param = ctx->params.mpeg4_intra_qp;
1261			break;
1262		case V4L2_PIX_FMT_JPEG:
1263			quant_param = 30;
1264			break;
1265		default:
1266			v4l2_warn(&ctx->dev->v4l2_dev,
1267				"cannot set intra qp, fmt not supported\n");
1268			break;
1269		}
1270	} else {
1271		force_ipicture = 0;
1272		switch (dst_fourcc) {
1273		case V4L2_PIX_FMT_H264:
1274			quant_param = ctx->params.h264_inter_qp;
1275			break;
1276		case V4L2_PIX_FMT_MPEG4:
1277			quant_param = ctx->params.mpeg4_inter_qp;
1278			break;
1279		default:
1280			v4l2_warn(&ctx->dev->v4l2_dev,
1281				"cannot set inter qp, fmt not supported\n");
1282			break;
1283		}
1284	}
1285
1286	/* submit */
1287	if (ctx->params.rot_mode)
1288		rot_mode = CODA_ROT_MIR_ENABLE | ctx->params.rot_mode;
1289	coda_write(dev, rot_mode, CODA_CMD_ENC_PIC_ROT_MODE);
1290	coda_write(dev, quant_param, CODA_CMD_ENC_PIC_QS);
1291
1292	if (dev->devtype->product == CODA_960) {
1293		coda_write(dev, 4/*FIXME: 0*/, CODA9_CMD_ENC_PIC_SRC_INDEX);
1294		coda_write(dev, q_data_src->width, CODA9_CMD_ENC_PIC_SRC_STRIDE);
1295		coda_write(dev, 0, CODA9_CMD_ENC_PIC_SUB_FRAME_SYNC);
1296
1297		reg = CODA9_CMD_ENC_PIC_SRC_ADDR_Y;
1298	} else {
1299		reg = CODA_CMD_ENC_PIC_SRC_ADDR_Y;
1300	}
1301	coda_write_base(ctx, q_data_src, src_buf, reg);
1302
1303	coda_write(dev, force_ipicture << 1 & 0x2,
1304		   CODA_CMD_ENC_PIC_OPTION);
1305
1306	coda_write(dev, pic_stream_buffer_addr, CODA_CMD_ENC_PIC_BB_START);
1307	coda_write(dev, pic_stream_buffer_size / 1024,
1308		   CODA_CMD_ENC_PIC_BB_SIZE);
1309
1310	if (!ctx->streamon_out) {
1311		/* After streamoff on the output side, set stream end flag */
1312		ctx->bit_stream_param |= CODA_BIT_STREAM_END_FLAG;
1313		coda_write(dev, ctx->bit_stream_param,
1314			   CODA_REG_BIT_BIT_STREAM_PARAM);
1315	}
1316
1317	if (dev->devtype->product != CODA_DX6)
1318		coda_write(dev, ctx->iram_info.axi_sram_use,
1319				CODA7_REG_BIT_AXI_SRAM_USE);
1320
1321	trace_coda_enc_pic_run(ctx, src_buf);
1322
1323	coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1324
1325	return 0;
1326}
1327
1328static void coda_finish_encode(struct coda_ctx *ctx)
1329{
1330	struct vb2_v4l2_buffer *src_buf, *dst_buf;
1331	struct coda_dev *dev = ctx->dev;
1332	u32 wr_ptr, start_ptr;
1333
1334	src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
1335	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1336
1337	trace_coda_enc_pic_done(ctx, dst_buf);
1338
1339	/* Get results from the coda */
1340	start_ptr = coda_read(dev, CODA_CMD_ENC_PIC_BB_START);
1341	wr_ptr = coda_read(dev, CODA_REG_BIT_WR_PTR(ctx->reg_idx));
1342
1343	/* Calculate bytesused field */
1344	if (dst_buf->sequence == 0) {
1345		vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr +
1346					ctx->vpu_header_size[0] +
1347					ctx->vpu_header_size[1] +
1348					ctx->vpu_header_size[2]);
1349	} else {
1350		vb2_set_plane_payload(&dst_buf->vb2_buf, 0, wr_ptr - start_ptr);
1351	}
1352
1353	v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev, "frame size = %u\n",
1354		 wr_ptr - start_ptr);
1355
1356	coda_read(dev, CODA_RET_ENC_PIC_SLICE_NUM);
1357	coda_read(dev, CODA_RET_ENC_PIC_FLAG);
1358
1359	if (coda_read(dev, CODA_RET_ENC_PIC_TYPE) == 0) {
1360		dst_buf->flags |= V4L2_BUF_FLAG_KEYFRAME;
1361		dst_buf->flags &= ~V4L2_BUF_FLAG_PFRAME;
1362	} else {
1363		dst_buf->flags |= V4L2_BUF_FLAG_PFRAME;
1364		dst_buf->flags &= ~V4L2_BUF_FLAG_KEYFRAME;
1365	}
1366
1367	dst_buf->timestamp = src_buf->timestamp;
1368	dst_buf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1369	dst_buf->flags |=
1370		src_buf->flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
1371	dst_buf->timecode = src_buf->timecode;
1372
1373	v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
1374
1375	dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
1376	coda_m2m_buf_done(ctx, dst_buf, VB2_BUF_STATE_DONE);
1377
1378	ctx->gopcounter--;
1379	if (ctx->gopcounter < 0)
1380		ctx->gopcounter = ctx->params.gop_size - 1;
1381
1382	v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1383		"job finished: encoding frame (%d) (%s)\n",
1384		dst_buf->sequence,
1385		(dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
1386		"KEYFRAME" : "PFRAME");
1387}
1388
1389static void coda_seq_end_work(struct work_struct *work)
1390{
1391	struct coda_ctx *ctx = container_of(work, struct coda_ctx, seq_end_work);
1392	struct coda_dev *dev = ctx->dev;
1393
1394	mutex_lock(&ctx->buffer_mutex);
1395	mutex_lock(&dev->coda_mutex);
1396
1397	if (ctx->initialized == 0)
1398		goto out;
1399
1400	v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1401		 "%d: %s: sent command 'SEQ_END' to coda\n", ctx->idx,
1402		 __func__);
1403	if (coda_command_sync(ctx, CODA_COMMAND_SEQ_END)) {
1404		v4l2_err(&dev->v4l2_dev,
1405			 "CODA_COMMAND_SEQ_END failed\n");
1406	}
1407
1408	/*
1409	 * FIXME: Sometimes h.264 encoding fails with 8-byte sequences missing
1410	 * from the output stream after the h.264 decoder has run. Resetting the
1411	 * hardware after the decoder has finished seems to help.
1412	 */
1413	if (dev->devtype->product == CODA_960)
1414		coda_hw_reset(ctx);
1415
1416	kfifo_init(&ctx->bitstream_fifo,
1417		ctx->bitstream.vaddr, ctx->bitstream.size);
1418
1419	coda_free_framebuffers(ctx);
1420
1421	ctx->initialized = 0;
1422
1423out:
1424	mutex_unlock(&dev->coda_mutex);
1425	mutex_unlock(&ctx->buffer_mutex);
1426}
1427
1428static void coda_bit_release(struct coda_ctx *ctx)
1429{
1430	mutex_lock(&ctx->buffer_mutex);
1431	coda_free_framebuffers(ctx);
1432	coda_free_context_buffers(ctx);
1433	coda_free_bitstream_buffer(ctx);
1434	mutex_unlock(&ctx->buffer_mutex);
1435}
1436
1437const struct coda_context_ops coda_bit_encode_ops = {
1438	.queue_init = coda_encoder_queue_init,
1439	.reqbufs = coda_encoder_reqbufs,
1440	.start_streaming = coda_start_encoding,
1441	.prepare_run = coda_prepare_encode,
1442	.finish_run = coda_finish_encode,
1443	.seq_end_work = coda_seq_end_work,
1444	.release = coda_bit_release,
1445};
1446
1447/*
1448 * Decoder context operations
1449 */
1450
1451static int coda_alloc_bitstream_buffer(struct coda_ctx *ctx,
1452				       struct coda_q_data *q_data)
1453{
1454	if (ctx->bitstream.vaddr)
1455		return 0;
1456
1457	ctx->bitstream.size = roundup_pow_of_two(q_data->sizeimage * 2);
1458	ctx->bitstream.vaddr = dma_alloc_writecombine(
1459			&ctx->dev->plat_dev->dev, ctx->bitstream.size,
1460			&ctx->bitstream.paddr, GFP_KERNEL);
1461	if (!ctx->bitstream.vaddr) {
1462		v4l2_err(&ctx->dev->v4l2_dev,
1463			 "failed to allocate bitstream ringbuffer");
1464		return -ENOMEM;
1465	}
1466	kfifo_init(&ctx->bitstream_fifo,
1467		   ctx->bitstream.vaddr, ctx->bitstream.size);
1468
1469	return 0;
1470}
1471
1472static void coda_free_bitstream_buffer(struct coda_ctx *ctx)
1473{
1474	if (ctx->bitstream.vaddr == NULL)
1475		return;
1476
1477	dma_free_writecombine(&ctx->dev->plat_dev->dev, ctx->bitstream.size,
1478			      ctx->bitstream.vaddr, ctx->bitstream.paddr);
1479	ctx->bitstream.vaddr = NULL;
1480	kfifo_init(&ctx->bitstream_fifo, NULL, 0);
1481}
1482
1483static int coda_decoder_reqbufs(struct coda_ctx *ctx,
1484				struct v4l2_requestbuffers *rb)
1485{
1486	struct coda_q_data *q_data_src;
1487	int ret;
1488
1489	if (rb->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
1490		return 0;
1491
1492	if (rb->count) {
1493		q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1494		ret = coda_alloc_context_buffers(ctx, q_data_src);
1495		if (ret < 0)
1496			return ret;
1497		ret = coda_alloc_bitstream_buffer(ctx, q_data_src);
1498		if (ret < 0) {
1499			coda_free_context_buffers(ctx);
1500			return ret;
1501		}
1502	} else {
1503		coda_free_bitstream_buffer(ctx);
1504		coda_free_context_buffers(ctx);
1505	}
1506
1507	return 0;
1508}
1509
1510static int __coda_start_decoding(struct coda_ctx *ctx)
1511{
1512	struct coda_q_data *q_data_src, *q_data_dst;
1513	u32 bitstream_buf, bitstream_size;
1514	struct coda_dev *dev = ctx->dev;
1515	int width, height;
1516	u32 src_fourcc, dst_fourcc;
1517	u32 val;
1518	int ret;
1519
1520	/* Start decoding */
1521	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1522	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1523	bitstream_buf = ctx->bitstream.paddr;
1524	bitstream_size = ctx->bitstream.size;
1525	src_fourcc = q_data_src->fourcc;
1526	dst_fourcc = q_data_dst->fourcc;
1527
1528	coda_write(dev, ctx->parabuf.paddr, CODA_REG_BIT_PARA_BUF_ADDR);
1529
1530	/* Update coda bitstream read and write pointers from kfifo */
1531	coda_kfifo_sync_to_device_full(ctx);
1532
1533	ctx->frame_mem_ctrl &= ~(CODA_FRAME_CHROMA_INTERLEAVE | (0x3 << 9) |
1534				 CODA9_FRAME_TILED2LINEAR);
1535	if (dst_fourcc == V4L2_PIX_FMT_NV12)
1536		ctx->frame_mem_ctrl |= CODA_FRAME_CHROMA_INTERLEAVE;
1537	if (ctx->tiled_map_type == GDI_TILED_FRAME_MB_RASTER_MAP)
1538		ctx->frame_mem_ctrl |= (0x3 << 9) | CODA9_FRAME_TILED2LINEAR;
1539	coda_write(dev, ctx->frame_mem_ctrl, CODA_REG_BIT_FRAME_MEM_CTRL);
1540
1541	ctx->display_idx = -1;
1542	ctx->frm_dis_flg = 0;
1543	coda_write(dev, 0, CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1544
1545	coda_write(dev, CODA_BIT_DEC_SEQ_INIT_ESCAPE,
1546			CODA_REG_BIT_BIT_STREAM_PARAM);
1547
1548	coda_write(dev, bitstream_buf, CODA_CMD_DEC_SEQ_BB_START);
1549	coda_write(dev, bitstream_size / 1024, CODA_CMD_DEC_SEQ_BB_SIZE);
1550	val = 0;
1551	if ((dev->devtype->product == CODA_7541) ||
1552	    (dev->devtype->product == CODA_960))
1553		val |= CODA_REORDER_ENABLE;
1554	if (ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG)
1555		val |= CODA_NO_INT_ENABLE;
1556	coda_write(dev, val, CODA_CMD_DEC_SEQ_OPTION);
1557
1558	ctx->params.codec_mode = ctx->codec->mode;
1559	if (dev->devtype->product == CODA_960 &&
1560	    src_fourcc == V4L2_PIX_FMT_MPEG4)
1561		ctx->params.codec_mode_aux = CODA_MP4_AUX_MPEG4;
1562	else
1563		ctx->params.codec_mode_aux = 0;
1564	if (src_fourcc == V4L2_PIX_FMT_H264) {
1565		if (dev->devtype->product == CODA_7541) {
1566			coda_write(dev, ctx->psbuf.paddr,
1567					CODA_CMD_DEC_SEQ_PS_BB_START);
1568			coda_write(dev, (CODA7_PS_BUF_SIZE / 1024),
1569					CODA_CMD_DEC_SEQ_PS_BB_SIZE);
1570		}
1571		if (dev->devtype->product == CODA_960) {
1572			coda_write(dev, 0, CODA_CMD_DEC_SEQ_X264_MV_EN);
1573			coda_write(dev, 512, CODA_CMD_DEC_SEQ_SPP_CHUNK_SIZE);
1574		}
1575	}
1576	if (dev->devtype->product != CODA_960)
1577		coda_write(dev, 0, CODA_CMD_DEC_SEQ_SRC_SIZE);
1578
1579	if (coda_command_sync(ctx, CODA_COMMAND_SEQ_INIT)) {
1580		v4l2_err(&dev->v4l2_dev, "CODA_COMMAND_SEQ_INIT timeout\n");
1581		coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
1582		return -ETIMEDOUT;
1583	}
1584	ctx->initialized = 1;
1585
1586	/* Update kfifo out pointer from coda bitstream read pointer */
1587	coda_kfifo_sync_from_device(ctx);
1588
1589	coda_write(dev, 0, CODA_REG_BIT_BIT_STREAM_PARAM);
1590
1591	if (coda_read(dev, CODA_RET_DEC_SEQ_SUCCESS) == 0) {
1592		v4l2_err(&dev->v4l2_dev,
1593			"CODA_COMMAND_SEQ_INIT failed, error code = %d\n",
1594			coda_read(dev, CODA_RET_DEC_SEQ_ERR_REASON));
1595		return -EAGAIN;
1596	}
1597
1598	val = coda_read(dev, CODA_RET_DEC_SEQ_SRC_SIZE);
1599	if (dev->devtype->product == CODA_DX6) {
1600		width = (val >> CODADX6_PICWIDTH_OFFSET) & CODADX6_PICWIDTH_MASK;
1601		height = val & CODADX6_PICHEIGHT_MASK;
1602	} else {
1603		width = (val >> CODA7_PICWIDTH_OFFSET) & CODA7_PICWIDTH_MASK;
1604		height = val & CODA7_PICHEIGHT_MASK;
1605	}
1606
1607	if (width > q_data_dst->bytesperline || height > q_data_dst->height) {
1608		v4l2_err(&dev->v4l2_dev, "stream is %dx%d, not %dx%d\n",
1609			 width, height, q_data_dst->bytesperline,
1610			 q_data_dst->height);
1611		return -EINVAL;
1612	}
1613
1614	width = round_up(width, 16);
1615	height = round_up(height, 16);
1616
1617	v4l2_dbg(1, coda_debug, &dev->v4l2_dev, "%s instance %d now: %dx%d\n",
1618		 __func__, ctx->idx, width, height);
1619
1620	ctx->num_internal_frames = coda_read(dev, CODA_RET_DEC_SEQ_FRAME_NEED);
1621	if (ctx->num_internal_frames > CODA_MAX_FRAMEBUFFERS) {
1622		v4l2_err(&dev->v4l2_dev,
1623			 "not enough framebuffers to decode (%d < %d)\n",
1624			 CODA_MAX_FRAMEBUFFERS, ctx->num_internal_frames);
1625		return -EINVAL;
1626	}
1627
1628	if (src_fourcc == V4L2_PIX_FMT_H264) {
1629		u32 left_right;
1630		u32 top_bottom;
1631
1632		left_right = coda_read(dev, CODA_RET_DEC_SEQ_CROP_LEFT_RIGHT);
1633		top_bottom = coda_read(dev, CODA_RET_DEC_SEQ_CROP_TOP_BOTTOM);
1634
1635		q_data_dst->rect.left = (left_right >> 10) & 0x3ff;
1636		q_data_dst->rect.top = (top_bottom >> 10) & 0x3ff;
1637		q_data_dst->rect.width = width - q_data_dst->rect.left -
1638					 (left_right & 0x3ff);
1639		q_data_dst->rect.height = height - q_data_dst->rect.top -
1640					  (top_bottom & 0x3ff);
1641	}
1642
1643	ret = coda_alloc_framebuffers(ctx, q_data_dst, src_fourcc);
1644	if (ret < 0) {
1645		v4l2_err(&dev->v4l2_dev, "failed to allocate framebuffers\n");
1646		return ret;
1647	}
1648
1649	/* Tell the decoder how many frame buffers we allocated. */
1650	coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
1651	coda_write(dev, width, CODA_CMD_SET_FRAME_BUF_STRIDE);
1652
1653	if (dev->devtype->product != CODA_DX6) {
1654		/* Set secondary AXI IRAM */
1655		coda_setup_iram(ctx);
1656
1657		coda_write(dev, ctx->iram_info.buf_bit_use,
1658				CODA7_CMD_SET_FRAME_AXI_BIT_ADDR);
1659		coda_write(dev, ctx->iram_info.buf_ip_ac_dc_use,
1660				CODA7_CMD_SET_FRAME_AXI_IPACDC_ADDR);
1661		coda_write(dev, ctx->iram_info.buf_dbk_y_use,
1662				CODA7_CMD_SET_FRAME_AXI_DBKY_ADDR);
1663		coda_write(dev, ctx->iram_info.buf_dbk_c_use,
1664				CODA7_CMD_SET_FRAME_AXI_DBKC_ADDR);
1665		coda_write(dev, ctx->iram_info.buf_ovl_use,
1666				CODA7_CMD_SET_FRAME_AXI_OVL_ADDR);
1667		if (dev->devtype->product == CODA_960) {
1668			coda_write(dev, ctx->iram_info.buf_btp_use,
1669					CODA9_CMD_SET_FRAME_AXI_BTP_ADDR);
1670
1671			coda_write(dev, -1, CODA9_CMD_SET_FRAME_DELAY);
1672			coda9_set_frame_cache(ctx, dst_fourcc);
1673		}
1674	}
1675
1676	if (src_fourcc == V4L2_PIX_FMT_H264) {
1677		coda_write(dev, ctx->slicebuf.paddr,
1678				CODA_CMD_SET_FRAME_SLICE_BB_START);
1679		coda_write(dev, ctx->slicebuf.size / 1024,
1680				CODA_CMD_SET_FRAME_SLICE_BB_SIZE);
1681	}
1682
1683	if (dev->devtype->product == CODA_7541) {
1684		int max_mb_x = 1920 / 16;
1685		int max_mb_y = 1088 / 16;
1686		int max_mb_num = max_mb_x * max_mb_y;
1687
1688		coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1689				CODA7_CMD_SET_FRAME_MAX_DEC_SIZE);
1690	} else if (dev->devtype->product == CODA_960) {
1691		int max_mb_x = 1920 / 16;
1692		int max_mb_y = 1088 / 16;
1693		int max_mb_num = max_mb_x * max_mb_y;
1694
1695		coda_write(dev, max_mb_num << 16 | max_mb_x << 8 | max_mb_y,
1696				CODA9_CMD_SET_FRAME_MAX_DEC_SIZE);
1697	}
1698
1699	if (coda_command_sync(ctx, CODA_COMMAND_SET_FRAME_BUF)) {
1700		v4l2_err(&ctx->dev->v4l2_dev,
1701			 "CODA_COMMAND_SET_FRAME_BUF timeout\n");
1702		return -ETIMEDOUT;
1703	}
1704
1705	return 0;
1706}
1707
1708static int coda_start_decoding(struct coda_ctx *ctx)
1709{
1710	struct coda_dev *dev = ctx->dev;
1711	int ret;
1712
1713	mutex_lock(&dev->coda_mutex);
1714	ret = __coda_start_decoding(ctx);
1715	mutex_unlock(&dev->coda_mutex);
1716
1717	return ret;
1718}
1719
1720static int coda_prepare_decode(struct coda_ctx *ctx)
1721{
1722	struct vb2_v4l2_buffer *dst_buf;
1723	struct coda_dev *dev = ctx->dev;
1724	struct coda_q_data *q_data_dst;
1725	struct coda_buffer_meta *meta;
1726	unsigned long flags;
1727	u32 reg_addr, reg_stride;
1728
1729	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
1730	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1731
1732	/* Try to copy source buffer contents into the bitstream ringbuffer */
1733	mutex_lock(&ctx->bitstream_mutex);
1734	coda_fill_bitstream(ctx, true);
1735	mutex_unlock(&ctx->bitstream_mutex);
1736
1737	if (coda_get_bitstream_payload(ctx) < 512 &&
1738	    (!(ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG))) {
1739		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1740			 "bitstream payload: %d, skipping\n",
1741			 coda_get_bitstream_payload(ctx));
1742		v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1743		return -EAGAIN;
1744	}
1745
1746	/* Run coda_start_decoding (again) if not yet initialized */
1747	if (!ctx->initialized) {
1748		int ret = __coda_start_decoding(ctx);
1749
1750		if (ret < 0) {
1751			v4l2_err(&dev->v4l2_dev, "failed to start decoding\n");
1752			v4l2_m2m_job_finish(ctx->dev->m2m_dev, ctx->fh.m2m_ctx);
1753			return -EAGAIN;
1754		} else {
1755			ctx->initialized = 1;
1756		}
1757	}
1758
1759	if (dev->devtype->product == CODA_960)
1760		coda_set_gdi_regs(ctx);
1761
1762	if (dev->devtype->product == CODA_960) {
1763		/*
1764		 * The CODA960 seems to have an internal list of buffers with
1765		 * 64 entries that includes the registered frame buffers as
1766		 * well as the rotator buffer output.
1767		 * ROT_INDEX needs to be < 0x40, but > ctx->num_internal_frames.
1768		 */
1769		coda_write(dev, CODA_MAX_FRAMEBUFFERS + dst_buf->vb2_buf.index,
1770				CODA9_CMD_DEC_PIC_ROT_INDEX);
1771
1772		reg_addr = CODA9_CMD_DEC_PIC_ROT_ADDR_Y;
1773		reg_stride = CODA9_CMD_DEC_PIC_ROT_STRIDE;
1774	} else {
1775		reg_addr = CODA_CMD_DEC_PIC_ROT_ADDR_Y;
1776		reg_stride = CODA_CMD_DEC_PIC_ROT_STRIDE;
1777	}
1778	coda_write_base(ctx, q_data_dst, dst_buf, reg_addr);
1779	coda_write(dev, q_data_dst->bytesperline, reg_stride);
1780
1781	coda_write(dev, CODA_ROT_MIR_ENABLE | ctx->params.rot_mode,
1782			CODA_CMD_DEC_PIC_ROT_MODE);
1783
1784	switch (dev->devtype->product) {
1785	case CODA_DX6:
1786		/* TBD */
1787	case CODA_7541:
1788		coda_write(dev, CODA_PRE_SCAN_EN, CODA_CMD_DEC_PIC_OPTION);
1789		break;
1790	case CODA_960:
1791		/* 'hardcode to use interrupt disable mode'? */
1792		coda_write(dev, (1 << 10), CODA_CMD_DEC_PIC_OPTION);
1793		break;
1794	}
1795
1796	coda_write(dev, 0, CODA_CMD_DEC_PIC_SKIP_NUM);
1797
1798	coda_write(dev, 0, CODA_CMD_DEC_PIC_BB_START);
1799	coda_write(dev, 0, CODA_CMD_DEC_PIC_START_BYTE);
1800
1801	if (dev->devtype->product != CODA_DX6)
1802		coda_write(dev, ctx->iram_info.axi_sram_use,
1803				CODA7_REG_BIT_AXI_SRAM_USE);
1804
1805	spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
1806	meta = list_first_entry_or_null(&ctx->buffer_meta_list,
1807					struct coda_buffer_meta, list);
1808
1809	if (meta && ctx->codec->src_fourcc == V4L2_PIX_FMT_JPEG) {
1810
1811		/* If this is the last buffer in the bitstream, add padding */
1812		if (meta->end == (ctx->bitstream_fifo.kfifo.in &
1813				  ctx->bitstream_fifo.kfifo.mask)) {
1814			static unsigned char buf[512];
1815			unsigned int pad;
1816
1817			/* Pad to multiple of 256 and then add 256 more */
1818			pad = ((0 - meta->end) & 0xff) + 256;
1819
1820			memset(buf, 0xff, sizeof(buf));
1821
1822			kfifo_in(&ctx->bitstream_fifo, buf, pad);
1823		}
1824	}
1825	spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
1826
1827	coda_kfifo_sync_to_device_full(ctx);
1828
1829	/* Clear decode success flag */
1830	coda_write(dev, 0, CODA_RET_DEC_PIC_SUCCESS);
1831
1832	trace_coda_dec_pic_run(ctx, meta);
1833
1834	coda_command_async(ctx, CODA_COMMAND_PIC_RUN);
1835
1836	return 0;
1837}
1838
1839static void coda_finish_decode(struct coda_ctx *ctx)
1840{
1841	struct coda_dev *dev = ctx->dev;
1842	struct coda_q_data *q_data_src;
1843	struct coda_q_data *q_data_dst;
1844	struct vb2_v4l2_buffer *dst_buf;
1845	struct coda_buffer_meta *meta;
1846	unsigned long payload;
1847	unsigned long flags;
1848	int width, height;
1849	int decoded_idx;
1850	int display_idx;
1851	u32 src_fourcc;
1852	int success;
1853	u32 err_mb;
1854	u32 val;
1855
1856	/* Update kfifo out pointer from coda bitstream read pointer */
1857	coda_kfifo_sync_from_device(ctx);
1858
1859	/*
1860	 * in stream-end mode, the read pointer can overshoot the write pointer
1861	 * by up to 512 bytes
1862	 */
1863	if (ctx->bit_stream_param & CODA_BIT_STREAM_END_FLAG) {
1864		if (coda_get_bitstream_payload(ctx) >= ctx->bitstream.size - 512)
1865			kfifo_init(&ctx->bitstream_fifo,
1866				ctx->bitstream.vaddr, ctx->bitstream.size);
1867	}
1868
1869	q_data_src = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
1870	src_fourcc = q_data_src->fourcc;
1871
1872	val = coda_read(dev, CODA_RET_DEC_PIC_SUCCESS);
1873	if (val != 1)
1874		pr_err("DEC_PIC_SUCCESS = %d\n", val);
1875
1876	success = val & 0x1;
1877	if (!success)
1878		v4l2_err(&dev->v4l2_dev, "decode failed\n");
1879
1880	if (src_fourcc == V4L2_PIX_FMT_H264) {
1881		if (val & (1 << 3))
1882			v4l2_err(&dev->v4l2_dev,
1883				 "insufficient PS buffer space (%d bytes)\n",
1884				 ctx->psbuf.size);
1885		if (val & (1 << 2))
1886			v4l2_err(&dev->v4l2_dev,
1887				 "insufficient slice buffer space (%d bytes)\n",
1888				 ctx->slicebuf.size);
1889	}
1890
1891	val = coda_read(dev, CODA_RET_DEC_PIC_SIZE);
1892	width = (val >> 16) & 0xffff;
1893	height = val & 0xffff;
1894
1895	q_data_dst = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
1896
1897	/* frame crop information */
1898	if (src_fourcc == V4L2_PIX_FMT_H264) {
1899		u32 left_right;
1900		u32 top_bottom;
1901
1902		left_right = coda_read(dev, CODA_RET_DEC_PIC_CROP_LEFT_RIGHT);
1903		top_bottom = coda_read(dev, CODA_RET_DEC_PIC_CROP_TOP_BOTTOM);
1904
1905		if (left_right == 0xffffffff && top_bottom == 0xffffffff) {
1906			/* Keep current crop information */
1907		} else {
1908			struct v4l2_rect *rect = &q_data_dst->rect;
1909
1910			rect->left = left_right >> 16 & 0xffff;
1911			rect->top = top_bottom >> 16 & 0xffff;
1912			rect->width = width - rect->left -
1913				      (left_right & 0xffff);
1914			rect->height = height - rect->top -
1915				       (top_bottom & 0xffff);
1916		}
1917	} else {
1918		/* no cropping */
1919	}
1920
1921	err_mb = coda_read(dev, CODA_RET_DEC_PIC_ERR_MB);
1922	if (err_mb > 0)
1923		v4l2_err(&dev->v4l2_dev,
1924			 "errors in %d macroblocks\n", err_mb);
1925
1926	if (dev->devtype->product == CODA_7541) {
1927		val = coda_read(dev, CODA_RET_DEC_PIC_OPTION);
1928		if (val == 0) {
1929			/* not enough bitstream data */
1930			v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
1931				 "prescan failed: %d\n", val);
1932			ctx->hold = true;
1933			return;
1934		}
1935	}
1936
1937	ctx->frm_dis_flg = coda_read(dev,
1938				     CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1939
1940	/*
1941	 * The previous display frame was copied out by the rotator,
1942	 * now it can be overwritten again
1943	 */
1944	if (ctx->display_idx >= 0 &&
1945	    ctx->display_idx < ctx->num_internal_frames) {
1946		ctx->frm_dis_flg &= ~(1 << ctx->display_idx);
1947		coda_write(dev, ctx->frm_dis_flg,
1948				CODA_REG_BIT_FRM_DIS_FLG(ctx->reg_idx));
1949	}
1950
1951	/*
1952	 * The index of the last decoded frame, not necessarily in
1953	 * display order, and the index of the next display frame.
1954	 * The latter could have been decoded in a previous run.
1955	 */
1956	decoded_idx = coda_read(dev, CODA_RET_DEC_PIC_CUR_IDX);
1957	display_idx = coda_read(dev, CODA_RET_DEC_PIC_FRAME_IDX);
1958
1959	if (decoded_idx == -1) {
1960		/* no frame was decoded, but we might have a display frame */
1961		if (display_idx >= 0 && display_idx < ctx->num_internal_frames)
1962			ctx->sequence_offset++;
1963		else if (ctx->display_idx < 0)
1964			ctx->hold = true;
1965	} else if (decoded_idx == -2) {
1966		/* no frame was decoded, we still return remaining buffers */
1967	} else if (decoded_idx < 0 || decoded_idx >= ctx->num_internal_frames) {
1968		v4l2_err(&dev->v4l2_dev,
1969			 "decoded frame index out of range: %d\n", decoded_idx);
1970	} else {
1971		val = coda_read(dev, CODA_RET_DEC_PIC_FRAME_NUM) - 1;
1972		val -= ctx->sequence_offset;
1973		spin_lock_irqsave(&ctx->buffer_meta_lock, flags);
1974		if (!list_empty(&ctx->buffer_meta_list)) {
1975			meta = list_first_entry(&ctx->buffer_meta_list,
1976					      struct coda_buffer_meta, list);
1977			list_del(&meta->list);
1978			ctx->num_metas--;
1979			spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
1980			/*
1981			 * Clamp counters to 16 bits for comparison, as the HW
1982			 * counter rolls over at this point for h.264. This
1983			 * may be different for other formats, but using 16 bits
1984			 * should be enough to detect most errors and saves us
1985			 * from doing different things based on the format.
1986			 */
1987			if ((val & 0xffff) != (meta->sequence & 0xffff)) {
1988				v4l2_err(&dev->v4l2_dev,
1989					 "sequence number mismatch (%d(%d) != %d)\n",
1990					 val, ctx->sequence_offset,
1991					 meta->sequence);
1992			}
1993			ctx->frame_metas[decoded_idx] = *meta;
1994			kfree(meta);
1995		} else {
1996			spin_unlock_irqrestore(&ctx->buffer_meta_lock, flags);
1997			v4l2_err(&dev->v4l2_dev, "empty timestamp list!\n");
1998			memset(&ctx->frame_metas[decoded_idx], 0,
1999			       sizeof(struct coda_buffer_meta));
2000			ctx->frame_metas[decoded_idx].sequence = val;
2001			ctx->sequence_offset++;
2002		}
2003
2004		trace_coda_dec_pic_done(ctx, &ctx->frame_metas[decoded_idx]);
2005
2006		val = coda_read(dev, CODA_RET_DEC_PIC_TYPE) & 0x7;
2007		if (val == 0)
2008			ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_KEYFRAME;
2009		else if (val == 1)
2010			ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_PFRAME;
2011		else
2012			ctx->frame_types[decoded_idx] = V4L2_BUF_FLAG_BFRAME;
2013
2014		ctx->frame_errors[decoded_idx] = err_mb;
2015	}
2016
2017	if (display_idx == -1) {
2018		/*
2019		 * no more frames to be decoded, but there could still
2020		 * be rotator output to dequeue
2021		 */
2022		ctx->hold = true;
2023	} else if (display_idx == -3) {
2024		/* possibly prescan failure */
2025	} else if (display_idx < 0 || display_idx >= ctx->num_internal_frames) {
2026		v4l2_err(&dev->v4l2_dev,
2027			 "presentation frame index out of range: %d\n",
2028			 display_idx);
2029	}
2030
2031	/* If a frame was copied out, return it */
2032	if (ctx->display_idx >= 0 &&
2033	    ctx->display_idx < ctx->num_internal_frames) {
2034		dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
2035		dst_buf->sequence = ctx->osequence++;
2036
2037		dst_buf->flags &= ~(V4L2_BUF_FLAG_KEYFRAME |
2038					     V4L2_BUF_FLAG_PFRAME |
2039					     V4L2_BUF_FLAG_BFRAME);
2040		dst_buf->flags |= ctx->frame_types[ctx->display_idx];
2041		meta = &ctx->frame_metas[ctx->display_idx];
2042		dst_buf->timecode = meta->timecode;
2043		dst_buf->timestamp = meta->timestamp;
2044
2045		trace_coda_dec_rot_done(ctx, dst_buf, meta);
2046
2047		switch (q_data_dst->fourcc) {
2048		case V4L2_PIX_FMT_YUV420:
2049		case V4L2_PIX_FMT_YVU420:
2050		case V4L2_PIX_FMT_NV12:
2051		default:
2052			payload = width * height * 3 / 2;
2053			break;
2054		case V4L2_PIX_FMT_YUV422P:
2055			payload = width * height * 2;
2056			break;
2057		}
2058		vb2_set_plane_payload(&dst_buf->vb2_buf, 0, payload);
2059
2060		coda_m2m_buf_done(ctx, dst_buf, ctx->frame_errors[display_idx] ?
2061				  VB2_BUF_STATE_ERROR : VB2_BUF_STATE_DONE);
2062
2063		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2064			"job finished: decoding frame (%d) (%s)\n",
2065			dst_buf->sequence,
2066			(dst_buf->flags & V4L2_BUF_FLAG_KEYFRAME) ?
2067			"KEYFRAME" : "PFRAME");
2068	} else {
2069		v4l2_dbg(1, coda_debug, &dev->v4l2_dev,
2070			"job finished: no frame decoded\n");
2071	}
2072
2073	/* The rotator will copy the current display frame next time */
2074	ctx->display_idx = display_idx;
2075}
2076
2077const struct coda_context_ops coda_bit_decode_ops = {
2078	.queue_init = coda_decoder_queue_init,
2079	.reqbufs = coda_decoder_reqbufs,
2080	.start_streaming = coda_start_decoding,
2081	.prepare_run = coda_prepare_decode,
2082	.finish_run = coda_finish_decode,
2083	.seq_end_work = coda_seq_end_work,
2084	.release = coda_bit_release,
2085};
2086
2087irqreturn_t coda_irq_handler(int irq, void *data)
2088{
2089	struct coda_dev *dev = data;
2090	struct coda_ctx *ctx;
2091
2092	/* read status register to attend the IRQ */
2093	coda_read(dev, CODA_REG_BIT_INT_STATUS);
2094	coda_write(dev, CODA_REG_BIT_INT_CLEAR_SET,
2095		      CODA_REG_BIT_INT_CLEAR);
2096
2097	ctx = v4l2_m2m_get_curr_priv(dev->m2m_dev);
2098	if (ctx == NULL) {
2099		v4l2_err(&dev->v4l2_dev,
2100			 "Instance released before the end of transaction\n");
2101		mutex_unlock(&dev->coda_mutex);
2102		return IRQ_HANDLED;
2103	}
2104
2105	trace_coda_bit_done(ctx);
2106
2107	if (ctx->aborting) {
2108		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2109			 "task has been aborted\n");
2110	}
2111
2112	if (coda_isbusy(ctx->dev)) {
2113		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
2114			 "coda is still busy!!!!\n");
2115		return IRQ_NONE;
2116	}
2117
2118	complete(&ctx->completion);
2119
2120	return IRQ_HANDLED;
2121}
2122