1/*
2 * Copyright 2010 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors: Dave Airlie
25 *          Alex Deucher
26 *          Jerome Glisse
27 */
28#include <drm/drmP.h>
29#include "radeon.h"
30#include "evergreend.h"
31#include "evergreen_reg_safe.h"
32#include "cayman_reg_safe.h"
33
34#define MAX(a,b)                   (((a)>(b))?(a):(b))
35#define MIN(a,b)                   (((a)<(b))?(a):(b))
36
37int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
38			   struct radeon_bo_list **cs_reloc);
39struct evergreen_cs_track {
40	u32			group_size;
41	u32			nbanks;
42	u32			npipes;
43	u32			row_size;
44	/* value we track */
45	u32			nsamples;		/* unused */
46	struct radeon_bo	*cb_color_bo[12];
47	u32			cb_color_bo_offset[12];
48	struct radeon_bo	*cb_color_fmask_bo[8];	/* unused */
49	struct radeon_bo	*cb_color_cmask_bo[8];	/* unused */
50	u32			cb_color_info[12];
51	u32			cb_color_view[12];
52	u32			cb_color_pitch[12];
53	u32			cb_color_slice[12];
54	u32			cb_color_slice_idx[12];
55	u32			cb_color_attrib[12];
56	u32			cb_color_cmask_slice[8];/* unused */
57	u32			cb_color_fmask_slice[8];/* unused */
58	u32			cb_target_mask;
59	u32			cb_shader_mask; /* unused */
60	u32			vgt_strmout_config;
61	u32			vgt_strmout_buffer_config;
62	struct radeon_bo	*vgt_strmout_bo[4];
63	u32			vgt_strmout_bo_offset[4];
64	u32			vgt_strmout_size[4];
65	u32			db_depth_control;
66	u32			db_depth_view;
67	u32			db_depth_slice;
68	u32			db_depth_size;
69	u32			db_z_info;
70	u32			db_z_read_offset;
71	u32			db_z_write_offset;
72	struct radeon_bo	*db_z_read_bo;
73	struct radeon_bo	*db_z_write_bo;
74	u32			db_s_info;
75	u32			db_s_read_offset;
76	u32			db_s_write_offset;
77	struct radeon_bo	*db_s_read_bo;
78	struct radeon_bo	*db_s_write_bo;
79	bool			sx_misc_kill_all_prims;
80	bool			cb_dirty;
81	bool			db_dirty;
82	bool			streamout_dirty;
83	u32			htile_offset;
84	u32			htile_surface;
85	struct radeon_bo	*htile_bo;
86	unsigned long		indirect_draw_buffer_size;
87};
88
89static u32 evergreen_cs_get_aray_mode(u32 tiling_flags)
90{
91	if (tiling_flags & RADEON_TILING_MACRO)
92		return ARRAY_2D_TILED_THIN1;
93	else if (tiling_flags & RADEON_TILING_MICRO)
94		return ARRAY_1D_TILED_THIN1;
95	else
96		return ARRAY_LINEAR_GENERAL;
97}
98
99static u32 evergreen_cs_get_num_banks(u32 nbanks)
100{
101	switch (nbanks) {
102	case 2:
103		return ADDR_SURF_2_BANK;
104	case 4:
105		return ADDR_SURF_4_BANK;
106	case 8:
107	default:
108		return ADDR_SURF_8_BANK;
109	case 16:
110		return ADDR_SURF_16_BANK;
111	}
112}
113
114static void evergreen_cs_track_init(struct evergreen_cs_track *track)
115{
116	int i;
117
118	for (i = 0; i < 8; i++) {
119		track->cb_color_fmask_bo[i] = NULL;
120		track->cb_color_cmask_bo[i] = NULL;
121		track->cb_color_cmask_slice[i] = 0;
122		track->cb_color_fmask_slice[i] = 0;
123	}
124
125	for (i = 0; i < 12; i++) {
126		track->cb_color_bo[i] = NULL;
127		track->cb_color_bo_offset[i] = 0xFFFFFFFF;
128		track->cb_color_info[i] = 0;
129		track->cb_color_view[i] = 0xFFFFFFFF;
130		track->cb_color_pitch[i] = 0;
131		track->cb_color_slice[i] = 0xfffffff;
132		track->cb_color_slice_idx[i] = 0;
133	}
134	track->cb_target_mask = 0xFFFFFFFF;
135	track->cb_shader_mask = 0xFFFFFFFF;
136	track->cb_dirty = true;
137
138	track->db_depth_slice = 0xffffffff;
139	track->db_depth_view = 0xFFFFC000;
140	track->db_depth_size = 0xFFFFFFFF;
141	track->db_depth_control = 0xFFFFFFFF;
142	track->db_z_info = 0xFFFFFFFF;
143	track->db_z_read_offset = 0xFFFFFFFF;
144	track->db_z_write_offset = 0xFFFFFFFF;
145	track->db_z_read_bo = NULL;
146	track->db_z_write_bo = NULL;
147	track->db_s_info = 0xFFFFFFFF;
148	track->db_s_read_offset = 0xFFFFFFFF;
149	track->db_s_write_offset = 0xFFFFFFFF;
150	track->db_s_read_bo = NULL;
151	track->db_s_write_bo = NULL;
152	track->db_dirty = true;
153	track->htile_bo = NULL;
154	track->htile_offset = 0xFFFFFFFF;
155	track->htile_surface = 0;
156
157	for (i = 0; i < 4; i++) {
158		track->vgt_strmout_size[i] = 0;
159		track->vgt_strmout_bo[i] = NULL;
160		track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
161	}
162	track->streamout_dirty = true;
163	track->sx_misc_kill_all_prims = false;
164}
165
166struct eg_surface {
167	/* value gathered from cs */
168	unsigned	nbx;
169	unsigned	nby;
170	unsigned	format;
171	unsigned	mode;
172	unsigned	nbanks;
173	unsigned	bankw;
174	unsigned	bankh;
175	unsigned	tsplit;
176	unsigned	mtilea;
177	unsigned	nsamples;
178	/* output value */
179	unsigned	bpe;
180	unsigned	layer_size;
181	unsigned	palign;
182	unsigned	halign;
183	unsigned long	base_align;
184};
185
186static int evergreen_surface_check_linear(struct radeon_cs_parser *p,
187					  struct eg_surface *surf,
188					  const char *prefix)
189{
190	surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
191	surf->base_align = surf->bpe;
192	surf->palign = 1;
193	surf->halign = 1;
194	return 0;
195}
196
197static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser *p,
198						  struct eg_surface *surf,
199						  const char *prefix)
200{
201	struct evergreen_cs_track *track = p->track;
202	unsigned palign;
203
204	palign = MAX(64, track->group_size / surf->bpe);
205	surf->layer_size = surf->nbx * surf->nby * surf->bpe * surf->nsamples;
206	surf->base_align = track->group_size;
207	surf->palign = palign;
208	surf->halign = 1;
209	if (surf->nbx & (palign - 1)) {
210		if (prefix) {
211			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
212				 __func__, __LINE__, prefix, surf->nbx, palign);
213		}
214		return -EINVAL;
215	}
216	return 0;
217}
218
219static int evergreen_surface_check_1d(struct radeon_cs_parser *p,
220				      struct eg_surface *surf,
221				      const char *prefix)
222{
223	struct evergreen_cs_track *track = p->track;
224	unsigned palign;
225
226	palign = track->group_size / (8 * surf->bpe * surf->nsamples);
227	palign = MAX(8, palign);
228	surf->layer_size = surf->nbx * surf->nby * surf->bpe;
229	surf->base_align = track->group_size;
230	surf->palign = palign;
231	surf->halign = 8;
232	if ((surf->nbx & (palign - 1))) {
233		if (prefix) {
234			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
235				 __func__, __LINE__, prefix, surf->nbx, palign,
236				 track->group_size, surf->bpe, surf->nsamples);
237		}
238		return -EINVAL;
239	}
240	if ((surf->nby & (8 - 1))) {
241		if (prefix) {
242			dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with 8\n",
243				 __func__, __LINE__, prefix, surf->nby);
244		}
245		return -EINVAL;
246	}
247	return 0;
248}
249
250static int evergreen_surface_check_2d(struct radeon_cs_parser *p,
251				      struct eg_surface *surf,
252				      const char *prefix)
253{
254	struct evergreen_cs_track *track = p->track;
255	unsigned palign, halign, tileb, slice_pt;
256	unsigned mtile_pr, mtile_ps, mtileb;
257
258	tileb = 64 * surf->bpe * surf->nsamples;
259	slice_pt = 1;
260	if (tileb > surf->tsplit) {
261		slice_pt = tileb / surf->tsplit;
262	}
263	tileb = tileb / slice_pt;
264	/* macro tile width & height */
265	palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
266	halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
267	mtileb = (palign / 8) * (halign / 8) * tileb;
268	mtile_pr = surf->nbx / palign;
269	mtile_ps = (mtile_pr * surf->nby) / halign;
270	surf->layer_size = mtile_ps * mtileb * slice_pt;
271	surf->base_align = (palign / 8) * (halign / 8) * tileb;
272	surf->palign = palign;
273	surf->halign = halign;
274
275	if ((surf->nbx & (palign - 1))) {
276		if (prefix) {
277			dev_warn(p->dev, "%s:%d %s pitch %d invalid must be aligned with %d\n",
278				 __func__, __LINE__, prefix, surf->nbx, palign);
279		}
280		return -EINVAL;
281	}
282	if ((surf->nby & (halign - 1))) {
283		if (prefix) {
284			dev_warn(p->dev, "%s:%d %s height %d invalid must be aligned with %d\n",
285				 __func__, __LINE__, prefix, surf->nby, halign);
286		}
287		return -EINVAL;
288	}
289
290	return 0;
291}
292
293static int evergreen_surface_check(struct radeon_cs_parser *p,
294				   struct eg_surface *surf,
295				   const char *prefix)
296{
297	/* some common value computed here */
298	surf->bpe = r600_fmt_get_blocksize(surf->format);
299
300	switch (surf->mode) {
301	case ARRAY_LINEAR_GENERAL:
302		return evergreen_surface_check_linear(p, surf, prefix);
303	case ARRAY_LINEAR_ALIGNED:
304		return evergreen_surface_check_linear_aligned(p, surf, prefix);
305	case ARRAY_1D_TILED_THIN1:
306		return evergreen_surface_check_1d(p, surf, prefix);
307	case ARRAY_2D_TILED_THIN1:
308		return evergreen_surface_check_2d(p, surf, prefix);
309	default:
310		dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
311				__func__, __LINE__, prefix, surf->mode);
312		return -EINVAL;
313	}
314	return -EINVAL;
315}
316
317static int evergreen_surface_value_conv_check(struct radeon_cs_parser *p,
318					      struct eg_surface *surf,
319					      const char *prefix)
320{
321	switch (surf->mode) {
322	case ARRAY_2D_TILED_THIN1:
323		break;
324	case ARRAY_LINEAR_GENERAL:
325	case ARRAY_LINEAR_ALIGNED:
326	case ARRAY_1D_TILED_THIN1:
327		return 0;
328	default:
329		dev_warn(p->dev, "%s:%d %s invalid array mode %d\n",
330				__func__, __LINE__, prefix, surf->mode);
331		return -EINVAL;
332	}
333
334	switch (surf->nbanks) {
335	case 0: surf->nbanks = 2; break;
336	case 1: surf->nbanks = 4; break;
337	case 2: surf->nbanks = 8; break;
338	case 3: surf->nbanks = 16; break;
339	default:
340		dev_warn(p->dev, "%s:%d %s invalid number of banks %d\n",
341			 __func__, __LINE__, prefix, surf->nbanks);
342		return -EINVAL;
343	}
344	switch (surf->bankw) {
345	case 0: surf->bankw = 1; break;
346	case 1: surf->bankw = 2; break;
347	case 2: surf->bankw = 4; break;
348	case 3: surf->bankw = 8; break;
349	default:
350		dev_warn(p->dev, "%s:%d %s invalid bankw %d\n",
351			 __func__, __LINE__, prefix, surf->bankw);
352		return -EINVAL;
353	}
354	switch (surf->bankh) {
355	case 0: surf->bankh = 1; break;
356	case 1: surf->bankh = 2; break;
357	case 2: surf->bankh = 4; break;
358	case 3: surf->bankh = 8; break;
359	default:
360		dev_warn(p->dev, "%s:%d %s invalid bankh %d\n",
361			 __func__, __LINE__, prefix, surf->bankh);
362		return -EINVAL;
363	}
364	switch (surf->mtilea) {
365	case 0: surf->mtilea = 1; break;
366	case 1: surf->mtilea = 2; break;
367	case 2: surf->mtilea = 4; break;
368	case 3: surf->mtilea = 8; break;
369	default:
370		dev_warn(p->dev, "%s:%d %s invalid macro tile aspect %d\n",
371			 __func__, __LINE__, prefix, surf->mtilea);
372		return -EINVAL;
373	}
374	switch (surf->tsplit) {
375	case 0: surf->tsplit = 64; break;
376	case 1: surf->tsplit = 128; break;
377	case 2: surf->tsplit = 256; break;
378	case 3: surf->tsplit = 512; break;
379	case 4: surf->tsplit = 1024; break;
380	case 5: surf->tsplit = 2048; break;
381	case 6: surf->tsplit = 4096; break;
382	default:
383		dev_warn(p->dev, "%s:%d %s invalid tile split %d\n",
384			 __func__, __LINE__, prefix, surf->tsplit);
385		return -EINVAL;
386	}
387	return 0;
388}
389
390static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned id)
391{
392	struct evergreen_cs_track *track = p->track;
393	struct eg_surface surf;
394	unsigned pitch, slice, mslice;
395	unsigned long offset;
396	int r;
397
398	mslice = G_028C6C_SLICE_MAX(track->cb_color_view[id]) + 1;
399	pitch = track->cb_color_pitch[id];
400	slice = track->cb_color_slice[id];
401	surf.nbx = (pitch + 1) * 8;
402	surf.nby = ((slice + 1) * 64) / surf.nbx;
403	surf.mode = G_028C70_ARRAY_MODE(track->cb_color_info[id]);
404	surf.format = G_028C70_FORMAT(track->cb_color_info[id]);
405	surf.tsplit = G_028C74_TILE_SPLIT(track->cb_color_attrib[id]);
406	surf.nbanks = G_028C74_NUM_BANKS(track->cb_color_attrib[id]);
407	surf.bankw = G_028C74_BANK_WIDTH(track->cb_color_attrib[id]);
408	surf.bankh = G_028C74_BANK_HEIGHT(track->cb_color_attrib[id]);
409	surf.mtilea = G_028C74_MACRO_TILE_ASPECT(track->cb_color_attrib[id]);
410	surf.nsamples = 1;
411
412	if (!r600_fmt_is_valid_color(surf.format)) {
413		dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08x)\n",
414			 __func__, __LINE__, surf.format,
415			id, track->cb_color_info[id]);
416		return -EINVAL;
417	}
418
419	r = evergreen_surface_value_conv_check(p, &surf, "cb");
420	if (r) {
421		return r;
422	}
423
424	r = evergreen_surface_check(p, &surf, "cb");
425	if (r) {
426		dev_warn(p->dev, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
427			 __func__, __LINE__, id, track->cb_color_pitch[id],
428			 track->cb_color_slice[id], track->cb_color_attrib[id],
429			 track->cb_color_info[id]);
430		return r;
431	}
432
433	offset = track->cb_color_bo_offset[id] << 8;
434	if (offset & (surf.base_align - 1)) {
435		dev_warn(p->dev, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
436			 __func__, __LINE__, id, offset, surf.base_align);
437		return -EINVAL;
438	}
439
440	offset += surf.layer_size * mslice;
441	if (offset > radeon_bo_size(track->cb_color_bo[id])) {
442		/* old ddx are broken they allocate bo with w*h*bpp but
443		 * program slice with ALIGN(h, 8), catch this and patch
444		 * command stream.
445		 */
446		if (!surf.mode) {
447			volatile u32 *ib = p->ib.ptr;
448			unsigned long tmp, nby, bsize, size, min = 0;
449
450			/* find the height the ddx wants */
451			if (surf.nby > 8) {
452				min = surf.nby - 8;
453			}
454			bsize = radeon_bo_size(track->cb_color_bo[id]);
455			tmp = track->cb_color_bo_offset[id] << 8;
456			for (nby = surf.nby; nby > min; nby--) {
457				size = nby * surf.nbx * surf.bpe * surf.nsamples;
458				if ((tmp + size * mslice) <= bsize) {
459					break;
460				}
461			}
462			if (nby > min) {
463				surf.nby = nby;
464				slice = ((nby * surf.nbx) / 64) - 1;
465				if (!evergreen_surface_check(p, &surf, "cb")) {
466					/* check if this one works */
467					tmp += surf.layer_size * mslice;
468					if (tmp <= bsize) {
469						ib[track->cb_color_slice_idx[id]] = slice;
470						goto old_ddx_ok;
471					}
472				}
473			}
474		}
475		dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
476			 "offset %d, max layer %d, bo size %ld, slice %d)\n",
477			 __func__, __LINE__, id, surf.layer_size,
478			track->cb_color_bo_offset[id] << 8, mslice,
479			radeon_bo_size(track->cb_color_bo[id]), slice);
480		dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
481			 __func__, __LINE__, surf.nbx, surf.nby,
482			surf.mode, surf.bpe, surf.nsamples,
483			surf.bankw, surf.bankh,
484			surf.tsplit, surf.mtilea);
485		return -EINVAL;
486	}
487old_ddx_ok:
488
489	return 0;
490}
491
492static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
493						unsigned nbx, unsigned nby)
494{
495	struct evergreen_cs_track *track = p->track;
496	unsigned long size;
497
498	if (track->htile_bo == NULL) {
499		dev_warn(p->dev, "%s:%d htile enabled without htile surface 0x%08x\n",
500				__func__, __LINE__, track->db_z_info);
501		return -EINVAL;
502	}
503
504	if (G_028ABC_LINEAR(track->htile_surface)) {
505		/* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
506		nbx = round_up(nbx, 16 * 8);
507		/* height is npipes htiles aligned == npipes * 8 pixel aligned */
508		nby = round_up(nby, track->npipes * 8);
509	} else {
510		/* always assume 8x8 htile */
511		/* align is htile align * 8, htile align vary according to
512		 * number of pipe and tile width and nby
513		 */
514		switch (track->npipes) {
515		case 8:
516			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
517			nbx = round_up(nbx, 64 * 8);
518			nby = round_up(nby, 64 * 8);
519			break;
520		case 4:
521			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
522			nbx = round_up(nbx, 64 * 8);
523			nby = round_up(nby, 32 * 8);
524			break;
525		case 2:
526			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
527			nbx = round_up(nbx, 32 * 8);
528			nby = round_up(nby, 32 * 8);
529			break;
530		case 1:
531			/* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
532			nbx = round_up(nbx, 32 * 8);
533			nby = round_up(nby, 16 * 8);
534			break;
535		default:
536			dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
537					__func__, __LINE__, track->npipes);
538			return -EINVAL;
539		}
540	}
541	/* compute number of htile */
542	nbx = nbx >> 3;
543	nby = nby >> 3;
544	/* size must be aligned on npipes * 2K boundary */
545	size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
546	size += track->htile_offset;
547
548	if (size > radeon_bo_size(track->htile_bo)) {
549		dev_warn(p->dev, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
550				__func__, __LINE__, radeon_bo_size(track->htile_bo),
551				size, nbx, nby);
552		return -EINVAL;
553	}
554	return 0;
555}
556
557static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser *p)
558{
559	struct evergreen_cs_track *track = p->track;
560	struct eg_surface surf;
561	unsigned pitch, slice, mslice;
562	unsigned long offset;
563	int r;
564
565	mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
566	pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
567	slice = track->db_depth_slice;
568	surf.nbx = (pitch + 1) * 8;
569	surf.nby = ((slice + 1) * 64) / surf.nbx;
570	surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
571	surf.format = G_028044_FORMAT(track->db_s_info);
572	surf.tsplit = G_028044_TILE_SPLIT(track->db_s_info);
573	surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
574	surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
575	surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
576	surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
577	surf.nsamples = 1;
578
579	if (surf.format != 1) {
580		dev_warn(p->dev, "%s:%d stencil invalid format %d\n",
581			 __func__, __LINE__, surf.format);
582		return -EINVAL;
583	}
584	/* replace by color format so we can use same code */
585	surf.format = V_028C70_COLOR_8;
586
587	r = evergreen_surface_value_conv_check(p, &surf, "stencil");
588	if (r) {
589		return r;
590	}
591
592	r = evergreen_surface_check(p, &surf, NULL);
593	if (r) {
594		/* old userspace doesn't compute proper depth/stencil alignment
595		 * check that alignment against a bigger byte per elements and
596		 * only report if that alignment is wrong too.
597		 */
598		surf.format = V_028C70_COLOR_8_8_8_8;
599		r = evergreen_surface_check(p, &surf, "stencil");
600		if (r) {
601			dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
602				 __func__, __LINE__, track->db_depth_size,
603				 track->db_depth_slice, track->db_s_info, track->db_z_info);
604		}
605		return r;
606	}
607
608	offset = track->db_s_read_offset << 8;
609	if (offset & (surf.base_align - 1)) {
610		dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
611			 __func__, __LINE__, offset, surf.base_align);
612		return -EINVAL;
613	}
614	offset += surf.layer_size * mslice;
615	if (offset > radeon_bo_size(track->db_s_read_bo)) {
616		dev_warn(p->dev, "%s:%d stencil read bo too small (layer size %d, "
617			 "offset %ld, max layer %d, bo size %ld)\n",
618			 __func__, __LINE__, surf.layer_size,
619			(unsigned long)track->db_s_read_offset << 8, mslice,
620			radeon_bo_size(track->db_s_read_bo));
621		dev_warn(p->dev, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
622			 __func__, __LINE__, track->db_depth_size,
623			 track->db_depth_slice, track->db_s_info, track->db_z_info);
624		return -EINVAL;
625	}
626
627	offset = track->db_s_write_offset << 8;
628	if (offset & (surf.base_align - 1)) {
629		dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
630			 __func__, __LINE__, offset, surf.base_align);
631		return -EINVAL;
632	}
633	offset += surf.layer_size * mslice;
634	if (offset > radeon_bo_size(track->db_s_write_bo)) {
635		dev_warn(p->dev, "%s:%d stencil write bo too small (layer size %d, "
636			 "offset %ld, max layer %d, bo size %ld)\n",
637			 __func__, __LINE__, surf.layer_size,
638			(unsigned long)track->db_s_write_offset << 8, mslice,
639			radeon_bo_size(track->db_s_write_bo));
640		return -EINVAL;
641	}
642
643	/* hyperz */
644	if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
645		r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
646		if (r) {
647			return r;
648		}
649	}
650
651	return 0;
652}
653
654static int evergreen_cs_track_validate_depth(struct radeon_cs_parser *p)
655{
656	struct evergreen_cs_track *track = p->track;
657	struct eg_surface surf;
658	unsigned pitch, slice, mslice;
659	unsigned long offset;
660	int r;
661
662	mslice = G_028008_SLICE_MAX(track->db_depth_view) + 1;
663	pitch = G_028058_PITCH_TILE_MAX(track->db_depth_size);
664	slice = track->db_depth_slice;
665	surf.nbx = (pitch + 1) * 8;
666	surf.nby = ((slice + 1) * 64) / surf.nbx;
667	surf.mode = G_028040_ARRAY_MODE(track->db_z_info);
668	surf.format = G_028040_FORMAT(track->db_z_info);
669	surf.tsplit = G_028040_TILE_SPLIT(track->db_z_info);
670	surf.nbanks = G_028040_NUM_BANKS(track->db_z_info);
671	surf.bankw = G_028040_BANK_WIDTH(track->db_z_info);
672	surf.bankh = G_028040_BANK_HEIGHT(track->db_z_info);
673	surf.mtilea = G_028040_MACRO_TILE_ASPECT(track->db_z_info);
674	surf.nsamples = 1;
675
676	switch (surf.format) {
677	case V_028040_Z_16:
678		surf.format = V_028C70_COLOR_16;
679		break;
680	case V_028040_Z_24:
681	case V_028040_Z_32_FLOAT:
682		surf.format = V_028C70_COLOR_8_8_8_8;
683		break;
684	default:
685		dev_warn(p->dev, "%s:%d depth invalid format %d\n",
686			 __func__, __LINE__, surf.format);
687		return -EINVAL;
688	}
689
690	r = evergreen_surface_value_conv_check(p, &surf, "depth");
691	if (r) {
692		dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
693			 __func__, __LINE__, track->db_depth_size,
694			 track->db_depth_slice, track->db_z_info);
695		return r;
696	}
697
698	r = evergreen_surface_check(p, &surf, "depth");
699	if (r) {
700		dev_warn(p->dev, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
701			 __func__, __LINE__, track->db_depth_size,
702			 track->db_depth_slice, track->db_z_info);
703		return r;
704	}
705
706	offset = track->db_z_read_offset << 8;
707	if (offset & (surf.base_align - 1)) {
708		dev_warn(p->dev, "%s:%d stencil read bo base %ld not aligned with %ld\n",
709			 __func__, __LINE__, offset, surf.base_align);
710		return -EINVAL;
711	}
712	offset += surf.layer_size * mslice;
713	if (offset > radeon_bo_size(track->db_z_read_bo)) {
714		dev_warn(p->dev, "%s:%d depth read bo too small (layer size %d, "
715			 "offset %ld, max layer %d, bo size %ld)\n",
716			 __func__, __LINE__, surf.layer_size,
717			(unsigned long)track->db_z_read_offset << 8, mslice,
718			radeon_bo_size(track->db_z_read_bo));
719		return -EINVAL;
720	}
721
722	offset = track->db_z_write_offset << 8;
723	if (offset & (surf.base_align - 1)) {
724		dev_warn(p->dev, "%s:%d stencil write bo base %ld not aligned with %ld\n",
725			 __func__, __LINE__, offset, surf.base_align);
726		return -EINVAL;
727	}
728	offset += surf.layer_size * mslice;
729	if (offset > radeon_bo_size(track->db_z_write_bo)) {
730		dev_warn(p->dev, "%s:%d depth write bo too small (layer size %d, "
731			 "offset %ld, max layer %d, bo size %ld)\n",
732			 __func__, __LINE__, surf.layer_size,
733			(unsigned long)track->db_z_write_offset << 8, mslice,
734			radeon_bo_size(track->db_z_write_bo));
735		return -EINVAL;
736	}
737
738	/* hyperz */
739	if (G_028040_TILE_SURFACE_ENABLE(track->db_z_info)) {
740		r = evergreen_cs_track_validate_htile(p, surf.nbx, surf.nby);
741		if (r) {
742			return r;
743		}
744	}
745
746	return 0;
747}
748
749static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
750					       struct radeon_bo *texture,
751					       struct radeon_bo *mipmap,
752					       unsigned idx)
753{
754	struct eg_surface surf;
755	unsigned long toffset, moffset;
756	unsigned dim, llevel, mslice, width, height, depth, i;
757	u32 texdw[8];
758	int r;
759
760	texdw[0] = radeon_get_ib_value(p, idx + 0);
761	texdw[1] = radeon_get_ib_value(p, idx + 1);
762	texdw[2] = radeon_get_ib_value(p, idx + 2);
763	texdw[3] = radeon_get_ib_value(p, idx + 3);
764	texdw[4] = radeon_get_ib_value(p, idx + 4);
765	texdw[5] = radeon_get_ib_value(p, idx + 5);
766	texdw[6] = radeon_get_ib_value(p, idx + 6);
767	texdw[7] = radeon_get_ib_value(p, idx + 7);
768	dim = G_030000_DIM(texdw[0]);
769	llevel = G_030014_LAST_LEVEL(texdw[5]);
770	mslice = G_030014_LAST_ARRAY(texdw[5]) + 1;
771	width = G_030000_TEX_WIDTH(texdw[0]) + 1;
772	height =  G_030004_TEX_HEIGHT(texdw[1]) + 1;
773	depth = G_030004_TEX_DEPTH(texdw[1]) + 1;
774	surf.format = G_03001C_DATA_FORMAT(texdw[7]);
775	surf.nbx = (G_030000_PITCH(texdw[0]) + 1) * 8;
776	surf.nbx = r600_fmt_get_nblocksx(surf.format, surf.nbx);
777	surf.nby = r600_fmt_get_nblocksy(surf.format, height);
778	surf.mode = G_030004_ARRAY_MODE(texdw[1]);
779	surf.tsplit = G_030018_TILE_SPLIT(texdw[6]);
780	surf.nbanks = G_03001C_NUM_BANKS(texdw[7]);
781	surf.bankw = G_03001C_BANK_WIDTH(texdw[7]);
782	surf.bankh = G_03001C_BANK_HEIGHT(texdw[7]);
783	surf.mtilea = G_03001C_MACRO_TILE_ASPECT(texdw[7]);
784	surf.nsamples = 1;
785	toffset = texdw[2] << 8;
786	moffset = texdw[3] << 8;
787
788	if (!r600_fmt_is_valid_texture(surf.format, p->family)) {
789		dev_warn(p->dev, "%s:%d texture invalid format %d\n",
790			 __func__, __LINE__, surf.format);
791		return -EINVAL;
792	}
793	switch (dim) {
794	case V_030000_SQ_TEX_DIM_1D:
795	case V_030000_SQ_TEX_DIM_2D:
796	case V_030000_SQ_TEX_DIM_CUBEMAP:
797	case V_030000_SQ_TEX_DIM_1D_ARRAY:
798	case V_030000_SQ_TEX_DIM_2D_ARRAY:
799		depth = 1;
800		break;
801	case V_030000_SQ_TEX_DIM_2D_MSAA:
802	case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
803		surf.nsamples = 1 << llevel;
804		llevel = 0;
805		depth = 1;
806		break;
807	case V_030000_SQ_TEX_DIM_3D:
808		break;
809	default:
810		dev_warn(p->dev, "%s:%d texture invalid dimension %d\n",
811			 __func__, __LINE__, dim);
812		return -EINVAL;
813	}
814
815	r = evergreen_surface_value_conv_check(p, &surf, "texture");
816	if (r) {
817		return r;
818	}
819
820	/* align height */
821	evergreen_surface_check(p, &surf, NULL);
822	surf.nby = ALIGN(surf.nby, surf.halign);
823
824	r = evergreen_surface_check(p, &surf, "texture");
825	if (r) {
826		dev_warn(p->dev, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
827			 __func__, __LINE__, texdw[0], texdw[1], texdw[4],
828			 texdw[5], texdw[6], texdw[7]);
829		return r;
830	}
831
832	/* check texture size */
833	if (toffset & (surf.base_align - 1)) {
834		dev_warn(p->dev, "%s:%d texture bo base %ld not aligned with %ld\n",
835			 __func__, __LINE__, toffset, surf.base_align);
836		return -EINVAL;
837	}
838	if (surf.nsamples <= 1 && moffset & (surf.base_align - 1)) {
839		dev_warn(p->dev, "%s:%d mipmap bo base %ld not aligned with %ld\n",
840			 __func__, __LINE__, moffset, surf.base_align);
841		return -EINVAL;
842	}
843	if (dim == SQ_TEX_DIM_3D) {
844		toffset += surf.layer_size * depth;
845	} else {
846		toffset += surf.layer_size * mslice;
847	}
848	if (toffset > radeon_bo_size(texture)) {
849		dev_warn(p->dev, "%s:%d texture bo too small (layer size %d, "
850			 "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
851			 __func__, __LINE__, surf.layer_size,
852			(unsigned long)texdw[2] << 8, mslice,
853			depth, radeon_bo_size(texture),
854			surf.nbx, surf.nby);
855		return -EINVAL;
856	}
857
858	if (!mipmap) {
859		if (llevel) {
860			dev_warn(p->dev, "%s:%i got NULL MIP_ADDRESS relocation\n",
861				 __func__, __LINE__);
862			return -EINVAL;
863		} else {
864			return 0; /* everything's ok */
865		}
866	}
867
868	/* check mipmap size */
869	for (i = 1; i <= llevel; i++) {
870		unsigned w, h, d;
871
872		w = r600_mip_minify(width, i);
873		h = r600_mip_minify(height, i);
874		d = r600_mip_minify(depth, i);
875		surf.nbx = r600_fmt_get_nblocksx(surf.format, w);
876		surf.nby = r600_fmt_get_nblocksy(surf.format, h);
877
878		switch (surf.mode) {
879		case ARRAY_2D_TILED_THIN1:
880			if (surf.nbx < surf.palign || surf.nby < surf.halign) {
881				surf.mode = ARRAY_1D_TILED_THIN1;
882			}
883			/* recompute alignment */
884			evergreen_surface_check(p, &surf, NULL);
885			break;
886		case ARRAY_LINEAR_GENERAL:
887		case ARRAY_LINEAR_ALIGNED:
888		case ARRAY_1D_TILED_THIN1:
889			break;
890		default:
891			dev_warn(p->dev, "%s:%d invalid array mode %d\n",
892				 __func__, __LINE__, surf.mode);
893			return -EINVAL;
894		}
895		surf.nbx = ALIGN(surf.nbx, surf.palign);
896		surf.nby = ALIGN(surf.nby, surf.halign);
897
898		r = evergreen_surface_check(p, &surf, "mipmap");
899		if (r) {
900			return r;
901		}
902
903		if (dim == SQ_TEX_DIM_3D) {
904			moffset += surf.layer_size * d;
905		} else {
906			moffset += surf.layer_size * mslice;
907		}
908		if (moffset > radeon_bo_size(mipmap)) {
909			dev_warn(p->dev, "%s:%d mipmap [%d] bo too small (layer size %d, "
910					"offset %ld, coffset %ld, max layer %d, depth %d, "
911					"bo size %ld) level0 (%d %d %d)\n",
912					__func__, __LINE__, i, surf.layer_size,
913					(unsigned long)texdw[3] << 8, moffset, mslice,
914					d, radeon_bo_size(mipmap),
915					width, height, depth);
916			dev_warn(p->dev, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
917				 __func__, __LINE__, surf.nbx, surf.nby,
918				surf.mode, surf.bpe, surf.nsamples,
919				surf.bankw, surf.bankh,
920				surf.tsplit, surf.mtilea);
921			return -EINVAL;
922		}
923	}
924
925	return 0;
926}
927
928static int evergreen_cs_track_check(struct radeon_cs_parser *p)
929{
930	struct evergreen_cs_track *track = p->track;
931	unsigned tmp, i;
932	int r;
933	unsigned buffer_mask = 0;
934
935	/* check streamout */
936	if (track->streamout_dirty && track->vgt_strmout_config) {
937		for (i = 0; i < 4; i++) {
938			if (track->vgt_strmout_config & (1 << i)) {
939				buffer_mask |= (track->vgt_strmout_buffer_config >> (i * 4)) & 0xf;
940			}
941		}
942
943		for (i = 0; i < 4; i++) {
944			if (buffer_mask & (1 << i)) {
945				if (track->vgt_strmout_bo[i]) {
946					u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
947							(u64)track->vgt_strmout_size[i];
948					if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
949						DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
950							  i, offset,
951							  radeon_bo_size(track->vgt_strmout_bo[i]));
952						return -EINVAL;
953					}
954				} else {
955					dev_warn(p->dev, "No buffer for streamout %d\n", i);
956					return -EINVAL;
957				}
958			}
959		}
960		track->streamout_dirty = false;
961	}
962
963	if (track->sx_misc_kill_all_prims)
964		return 0;
965
966	/* check that we have a cb for each enabled target
967	 */
968	if (track->cb_dirty) {
969		tmp = track->cb_target_mask;
970		for (i = 0; i < 8; i++) {
971			u32 format = G_028C70_FORMAT(track->cb_color_info[i]);
972
973			if (format != V_028C70_COLOR_INVALID &&
974			    (tmp >> (i * 4)) & 0xF) {
975				/* at least one component is enabled */
976				if (track->cb_color_bo[i] == NULL) {
977					dev_warn(p->dev, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
978						__func__, __LINE__, track->cb_target_mask, track->cb_shader_mask, i);
979					return -EINVAL;
980				}
981				/* check cb */
982				r = evergreen_cs_track_validate_cb(p, i);
983				if (r) {
984					return r;
985				}
986			}
987		}
988		track->cb_dirty = false;
989	}
990
991	if (track->db_dirty) {
992		/* Check stencil buffer */
993		if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
994		    G_028800_STENCIL_ENABLE(track->db_depth_control)) {
995			r = evergreen_cs_track_validate_stencil(p);
996			if (r)
997				return r;
998		}
999		/* Check depth buffer */
1000		if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
1001		    G_028800_Z_ENABLE(track->db_depth_control)) {
1002			r = evergreen_cs_track_validate_depth(p);
1003			if (r)
1004				return r;
1005		}
1006		track->db_dirty = false;
1007	}
1008
1009	return 0;
1010}
1011
1012/**
1013 * evergreen_cs_packet_parse_vline() - parse userspace VLINE packet
1014 * @parser:		parser structure holding parsing context.
1015 *
1016 * This is an Evergreen(+)-specific function for parsing VLINE packets.
1017 * Real work is done by r600_cs_common_vline_parse function.
1018 * Here we just set up ASIC-specific register table and call
1019 * the common implementation function.
1020 */
1021static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser *p)
1022{
1023
1024	static uint32_t vline_start_end[6] = {
1025		EVERGREEN_VLINE_START_END + EVERGREEN_CRTC0_REGISTER_OFFSET,
1026		EVERGREEN_VLINE_START_END + EVERGREEN_CRTC1_REGISTER_OFFSET,
1027		EVERGREEN_VLINE_START_END + EVERGREEN_CRTC2_REGISTER_OFFSET,
1028		EVERGREEN_VLINE_START_END + EVERGREEN_CRTC3_REGISTER_OFFSET,
1029		EVERGREEN_VLINE_START_END + EVERGREEN_CRTC4_REGISTER_OFFSET,
1030		EVERGREEN_VLINE_START_END + EVERGREEN_CRTC5_REGISTER_OFFSET
1031	};
1032	static uint32_t vline_status[6] = {
1033		EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
1034		EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
1035		EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
1036		EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
1037		EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
1038		EVERGREEN_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET
1039	};
1040
1041	return r600_cs_common_vline_parse(p, vline_start_end, vline_status);
1042}
1043
1044static int evergreen_packet0_check(struct radeon_cs_parser *p,
1045				   struct radeon_cs_packet *pkt,
1046				   unsigned idx, unsigned reg)
1047{
1048	int r;
1049
1050	switch (reg) {
1051	case EVERGREEN_VLINE_START_END:
1052		r = evergreen_cs_packet_parse_vline(p);
1053		if (r) {
1054			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1055					idx, reg);
1056			return r;
1057		}
1058		break;
1059	default:
1060		printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
1061		       reg, idx);
1062		return -EINVAL;
1063	}
1064	return 0;
1065}
1066
1067static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
1068				      struct radeon_cs_packet *pkt)
1069{
1070	unsigned reg, i;
1071	unsigned idx;
1072	int r;
1073
1074	idx = pkt->idx + 1;
1075	reg = pkt->reg;
1076	for (i = 0; i <= pkt->count; i++, idx++, reg += 4) {
1077		r = evergreen_packet0_check(p, pkt, idx, reg);
1078		if (r) {
1079			return r;
1080		}
1081	}
1082	return 0;
1083}
1084
1085/**
1086 * evergreen_cs_check_reg() - check if register is authorized or not
1087 * @parser: parser structure holding parsing context
1088 * @reg: register we are testing
1089 * @idx: index into the cs buffer
1090 *
1091 * This function will test against evergreen_reg_safe_bm and return 0
1092 * if register is safe. If register is not flag as safe this function
1093 * will test it against a list of register needind special handling.
1094 */
1095static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1096{
1097	struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
1098	struct radeon_bo_list *reloc;
1099	u32 last_reg;
1100	u32 m, i, tmp, *ib;
1101	int r;
1102
1103	if (p->rdev->family >= CHIP_CAYMAN)
1104		last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1105	else
1106		last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1107
1108	i = (reg >> 7);
1109	if (i >= last_reg) {
1110		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1111		return -EINVAL;
1112	}
1113	m = 1 << ((reg >> 2) & 31);
1114	if (p->rdev->family >= CHIP_CAYMAN) {
1115		if (!(cayman_reg_safe_bm[i] & m))
1116			return 0;
1117	} else {
1118		if (!(evergreen_reg_safe_bm[i] & m))
1119			return 0;
1120	}
1121	ib = p->ib.ptr;
1122	switch (reg) {
1123	/* force following reg to 0 in an attempt to disable out buffer
1124	 * which will need us to better understand how it works to perform
1125	 * security check on it (Jerome)
1126	 */
1127	case SQ_ESGS_RING_SIZE:
1128	case SQ_GSVS_RING_SIZE:
1129	case SQ_ESTMP_RING_SIZE:
1130	case SQ_GSTMP_RING_SIZE:
1131	case SQ_HSTMP_RING_SIZE:
1132	case SQ_LSTMP_RING_SIZE:
1133	case SQ_PSTMP_RING_SIZE:
1134	case SQ_VSTMP_RING_SIZE:
1135	case SQ_ESGS_RING_ITEMSIZE:
1136	case SQ_ESTMP_RING_ITEMSIZE:
1137	case SQ_GSTMP_RING_ITEMSIZE:
1138	case SQ_GSVS_RING_ITEMSIZE:
1139	case SQ_GS_VERT_ITEMSIZE:
1140	case SQ_GS_VERT_ITEMSIZE_1:
1141	case SQ_GS_VERT_ITEMSIZE_2:
1142	case SQ_GS_VERT_ITEMSIZE_3:
1143	case SQ_GSVS_RING_OFFSET_1:
1144	case SQ_GSVS_RING_OFFSET_2:
1145	case SQ_GSVS_RING_OFFSET_3:
1146	case SQ_HSTMP_RING_ITEMSIZE:
1147	case SQ_LSTMP_RING_ITEMSIZE:
1148	case SQ_PSTMP_RING_ITEMSIZE:
1149	case SQ_VSTMP_RING_ITEMSIZE:
1150	case VGT_TF_RING_SIZE:
1151		/* get value to populate the IB don't remove */
1152		/*tmp =radeon_get_ib_value(p, idx);
1153		  ib[idx] = 0;*/
1154		break;
1155	case SQ_ESGS_RING_BASE:
1156	case SQ_GSVS_RING_BASE:
1157	case SQ_ESTMP_RING_BASE:
1158	case SQ_GSTMP_RING_BASE:
1159	case SQ_HSTMP_RING_BASE:
1160	case SQ_LSTMP_RING_BASE:
1161	case SQ_PSTMP_RING_BASE:
1162	case SQ_VSTMP_RING_BASE:
1163		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1164		if (r) {
1165			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1166					"0x%04X\n", reg);
1167			return -EINVAL;
1168		}
1169		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1170		break;
1171	case DB_DEPTH_CONTROL:
1172		track->db_depth_control = radeon_get_ib_value(p, idx);
1173		track->db_dirty = true;
1174		break;
1175	case CAYMAN_DB_EQAA:
1176		if (p->rdev->family < CHIP_CAYMAN) {
1177			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1178				 "0x%04X\n", reg);
1179			return -EINVAL;
1180		}
1181		break;
1182	case CAYMAN_DB_DEPTH_INFO:
1183		if (p->rdev->family < CHIP_CAYMAN) {
1184			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1185				 "0x%04X\n", reg);
1186			return -EINVAL;
1187		}
1188		break;
1189	case DB_Z_INFO:
1190		track->db_z_info = radeon_get_ib_value(p, idx);
1191		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1192			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1193			if (r) {
1194				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1195						"0x%04X\n", reg);
1196				return -EINVAL;
1197			}
1198			ib[idx] &= ~Z_ARRAY_MODE(0xf);
1199			track->db_z_info &= ~Z_ARRAY_MODE(0xf);
1200			ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1201			track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1202			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1203				unsigned bankw, bankh, mtaspect, tile_split;
1204
1205				evergreen_tiling_fields(reloc->tiling_flags,
1206							&bankw, &bankh, &mtaspect,
1207							&tile_split);
1208				ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1209				ib[idx] |= DB_TILE_SPLIT(tile_split) |
1210						DB_BANK_WIDTH(bankw) |
1211						DB_BANK_HEIGHT(bankh) |
1212						DB_MACRO_TILE_ASPECT(mtaspect);
1213			}
1214		}
1215		track->db_dirty = true;
1216		break;
1217	case DB_STENCIL_INFO:
1218		track->db_s_info = radeon_get_ib_value(p, idx);
1219		track->db_dirty = true;
1220		break;
1221	case DB_DEPTH_VIEW:
1222		track->db_depth_view = radeon_get_ib_value(p, idx);
1223		track->db_dirty = true;
1224		break;
1225	case DB_DEPTH_SIZE:
1226		track->db_depth_size = radeon_get_ib_value(p, idx);
1227		track->db_dirty = true;
1228		break;
1229	case R_02805C_DB_DEPTH_SLICE:
1230		track->db_depth_slice = radeon_get_ib_value(p, idx);
1231		track->db_dirty = true;
1232		break;
1233	case DB_Z_READ_BASE:
1234		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1235		if (r) {
1236			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1237					"0x%04X\n", reg);
1238			return -EINVAL;
1239		}
1240		track->db_z_read_offset = radeon_get_ib_value(p, idx);
1241		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1242		track->db_z_read_bo = reloc->robj;
1243		track->db_dirty = true;
1244		break;
1245	case DB_Z_WRITE_BASE:
1246		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1247		if (r) {
1248			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1249					"0x%04X\n", reg);
1250			return -EINVAL;
1251		}
1252		track->db_z_write_offset = radeon_get_ib_value(p, idx);
1253		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1254		track->db_z_write_bo = reloc->robj;
1255		track->db_dirty = true;
1256		break;
1257	case DB_STENCIL_READ_BASE:
1258		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1259		if (r) {
1260			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1261					"0x%04X\n", reg);
1262			return -EINVAL;
1263		}
1264		track->db_s_read_offset = radeon_get_ib_value(p, idx);
1265		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1266		track->db_s_read_bo = reloc->robj;
1267		track->db_dirty = true;
1268		break;
1269	case DB_STENCIL_WRITE_BASE:
1270		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1271		if (r) {
1272			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1273					"0x%04X\n", reg);
1274			return -EINVAL;
1275		}
1276		track->db_s_write_offset = radeon_get_ib_value(p, idx);
1277		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1278		track->db_s_write_bo = reloc->robj;
1279		track->db_dirty = true;
1280		break;
1281	case VGT_STRMOUT_CONFIG:
1282		track->vgt_strmout_config = radeon_get_ib_value(p, idx);
1283		track->streamout_dirty = true;
1284		break;
1285	case VGT_STRMOUT_BUFFER_CONFIG:
1286		track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
1287		track->streamout_dirty = true;
1288		break;
1289	case VGT_STRMOUT_BUFFER_BASE_0:
1290	case VGT_STRMOUT_BUFFER_BASE_1:
1291	case VGT_STRMOUT_BUFFER_BASE_2:
1292	case VGT_STRMOUT_BUFFER_BASE_3:
1293		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1294		if (r) {
1295			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1296					"0x%04X\n", reg);
1297			return -EINVAL;
1298		}
1299		tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1300		track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1301		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1302		track->vgt_strmout_bo[tmp] = reloc->robj;
1303		track->streamout_dirty = true;
1304		break;
1305	case VGT_STRMOUT_BUFFER_SIZE_0:
1306	case VGT_STRMOUT_BUFFER_SIZE_1:
1307	case VGT_STRMOUT_BUFFER_SIZE_2:
1308	case VGT_STRMOUT_BUFFER_SIZE_3:
1309		tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1310		/* size in register is DWs, convert to bytes */
1311		track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1312		track->streamout_dirty = true;
1313		break;
1314	case CP_COHER_BASE:
1315		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1316		if (r) {
1317			dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1318					"0x%04X\n", reg);
1319			return -EINVAL;
1320		}
1321		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1322	case CB_TARGET_MASK:
1323		track->cb_target_mask = radeon_get_ib_value(p, idx);
1324		track->cb_dirty = true;
1325		break;
1326	case CB_SHADER_MASK:
1327		track->cb_shader_mask = radeon_get_ib_value(p, idx);
1328		track->cb_dirty = true;
1329		break;
1330	case PA_SC_AA_CONFIG:
1331		if (p->rdev->family >= CHIP_CAYMAN) {
1332			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1333				 "0x%04X\n", reg);
1334			return -EINVAL;
1335		}
1336		tmp = radeon_get_ib_value(p, idx) & MSAA_NUM_SAMPLES_MASK;
1337		track->nsamples = 1 << tmp;
1338		break;
1339	case CAYMAN_PA_SC_AA_CONFIG:
1340		if (p->rdev->family < CHIP_CAYMAN) {
1341			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1342				 "0x%04X\n", reg);
1343			return -EINVAL;
1344		}
1345		tmp = radeon_get_ib_value(p, idx) & CAYMAN_MSAA_NUM_SAMPLES_MASK;
1346		track->nsamples = 1 << tmp;
1347		break;
1348	case CB_COLOR0_VIEW:
1349	case CB_COLOR1_VIEW:
1350	case CB_COLOR2_VIEW:
1351	case CB_COLOR3_VIEW:
1352	case CB_COLOR4_VIEW:
1353	case CB_COLOR5_VIEW:
1354	case CB_COLOR6_VIEW:
1355	case CB_COLOR7_VIEW:
1356		tmp = (reg - CB_COLOR0_VIEW) / 0x3c;
1357		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1358		track->cb_dirty = true;
1359		break;
1360	case CB_COLOR8_VIEW:
1361	case CB_COLOR9_VIEW:
1362	case CB_COLOR10_VIEW:
1363	case CB_COLOR11_VIEW:
1364		tmp = ((reg - CB_COLOR8_VIEW) / 0x1c) + 8;
1365		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
1366		track->cb_dirty = true;
1367		break;
1368	case CB_COLOR0_INFO:
1369	case CB_COLOR1_INFO:
1370	case CB_COLOR2_INFO:
1371	case CB_COLOR3_INFO:
1372	case CB_COLOR4_INFO:
1373	case CB_COLOR5_INFO:
1374	case CB_COLOR6_INFO:
1375	case CB_COLOR7_INFO:
1376		tmp = (reg - CB_COLOR0_INFO) / 0x3c;
1377		track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1378		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1379			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1380			if (r) {
1381				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1382						"0x%04X\n", reg);
1383				return -EINVAL;
1384			}
1385			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1386			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1387		}
1388		track->cb_dirty = true;
1389		break;
1390	case CB_COLOR8_INFO:
1391	case CB_COLOR9_INFO:
1392	case CB_COLOR10_INFO:
1393	case CB_COLOR11_INFO:
1394		tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
1395		track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
1396		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1397			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1398			if (r) {
1399				dev_warn(p->dev, "bad SET_CONTEXT_REG "
1400						"0x%04X\n", reg);
1401				return -EINVAL;
1402			}
1403			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1404			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
1405		}
1406		track->cb_dirty = true;
1407		break;
1408	case CB_COLOR0_PITCH:
1409	case CB_COLOR1_PITCH:
1410	case CB_COLOR2_PITCH:
1411	case CB_COLOR3_PITCH:
1412	case CB_COLOR4_PITCH:
1413	case CB_COLOR5_PITCH:
1414	case CB_COLOR6_PITCH:
1415	case CB_COLOR7_PITCH:
1416		tmp = (reg - CB_COLOR0_PITCH) / 0x3c;
1417		track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1418		track->cb_dirty = true;
1419		break;
1420	case CB_COLOR8_PITCH:
1421	case CB_COLOR9_PITCH:
1422	case CB_COLOR10_PITCH:
1423	case CB_COLOR11_PITCH:
1424		tmp = ((reg - CB_COLOR8_PITCH) / 0x1c) + 8;
1425		track->cb_color_pitch[tmp] = radeon_get_ib_value(p, idx);
1426		track->cb_dirty = true;
1427		break;
1428	case CB_COLOR0_SLICE:
1429	case CB_COLOR1_SLICE:
1430	case CB_COLOR2_SLICE:
1431	case CB_COLOR3_SLICE:
1432	case CB_COLOR4_SLICE:
1433	case CB_COLOR5_SLICE:
1434	case CB_COLOR6_SLICE:
1435	case CB_COLOR7_SLICE:
1436		tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
1437		track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1438		track->cb_color_slice_idx[tmp] = idx;
1439		track->cb_dirty = true;
1440		break;
1441	case CB_COLOR8_SLICE:
1442	case CB_COLOR9_SLICE:
1443	case CB_COLOR10_SLICE:
1444	case CB_COLOR11_SLICE:
1445		tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
1446		track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
1447		track->cb_color_slice_idx[tmp] = idx;
1448		track->cb_dirty = true;
1449		break;
1450	case CB_COLOR0_ATTRIB:
1451	case CB_COLOR1_ATTRIB:
1452	case CB_COLOR2_ATTRIB:
1453	case CB_COLOR3_ATTRIB:
1454	case CB_COLOR4_ATTRIB:
1455	case CB_COLOR5_ATTRIB:
1456	case CB_COLOR6_ATTRIB:
1457	case CB_COLOR7_ATTRIB:
1458		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1459		if (r) {
1460			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1461					"0x%04X\n", reg);
1462			return -EINVAL;
1463		}
1464		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1465			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1466				unsigned bankw, bankh, mtaspect, tile_split;
1467
1468				evergreen_tiling_fields(reloc->tiling_flags,
1469							&bankw, &bankh, &mtaspect,
1470							&tile_split);
1471				ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1472				ib[idx] |= CB_TILE_SPLIT(tile_split) |
1473					   CB_BANK_WIDTH(bankw) |
1474					   CB_BANK_HEIGHT(bankh) |
1475					   CB_MACRO_TILE_ASPECT(mtaspect);
1476			}
1477		}
1478		tmp = ((reg - CB_COLOR0_ATTRIB) / 0x3c);
1479		track->cb_color_attrib[tmp] = ib[idx];
1480		track->cb_dirty = true;
1481		break;
1482	case CB_COLOR8_ATTRIB:
1483	case CB_COLOR9_ATTRIB:
1484	case CB_COLOR10_ATTRIB:
1485	case CB_COLOR11_ATTRIB:
1486		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1487		if (r) {
1488			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1489					"0x%04X\n", reg);
1490			return -EINVAL;
1491		}
1492		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1493			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
1494				unsigned bankw, bankh, mtaspect, tile_split;
1495
1496				evergreen_tiling_fields(reloc->tiling_flags,
1497							&bankw, &bankh, &mtaspect,
1498							&tile_split);
1499				ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
1500				ib[idx] |= CB_TILE_SPLIT(tile_split) |
1501					   CB_BANK_WIDTH(bankw) |
1502					   CB_BANK_HEIGHT(bankh) |
1503					   CB_MACRO_TILE_ASPECT(mtaspect);
1504			}
1505		}
1506		tmp = ((reg - CB_COLOR8_ATTRIB) / 0x1c) + 8;
1507		track->cb_color_attrib[tmp] = ib[idx];
1508		track->cb_dirty = true;
1509		break;
1510	case CB_COLOR0_FMASK:
1511	case CB_COLOR1_FMASK:
1512	case CB_COLOR2_FMASK:
1513	case CB_COLOR3_FMASK:
1514	case CB_COLOR4_FMASK:
1515	case CB_COLOR5_FMASK:
1516	case CB_COLOR6_FMASK:
1517	case CB_COLOR7_FMASK:
1518		tmp = (reg - CB_COLOR0_FMASK) / 0x3c;
1519		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1520		if (r) {
1521			dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1522			return -EINVAL;
1523		}
1524		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1525		track->cb_color_fmask_bo[tmp] = reloc->robj;
1526		break;
1527	case CB_COLOR0_CMASK:
1528	case CB_COLOR1_CMASK:
1529	case CB_COLOR2_CMASK:
1530	case CB_COLOR3_CMASK:
1531	case CB_COLOR4_CMASK:
1532	case CB_COLOR5_CMASK:
1533	case CB_COLOR6_CMASK:
1534	case CB_COLOR7_CMASK:
1535		tmp = (reg - CB_COLOR0_CMASK) / 0x3c;
1536		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1537		if (r) {
1538			dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
1539			return -EINVAL;
1540		}
1541		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1542		track->cb_color_cmask_bo[tmp] = reloc->robj;
1543		break;
1544	case CB_COLOR0_FMASK_SLICE:
1545	case CB_COLOR1_FMASK_SLICE:
1546	case CB_COLOR2_FMASK_SLICE:
1547	case CB_COLOR3_FMASK_SLICE:
1548	case CB_COLOR4_FMASK_SLICE:
1549	case CB_COLOR5_FMASK_SLICE:
1550	case CB_COLOR6_FMASK_SLICE:
1551	case CB_COLOR7_FMASK_SLICE:
1552		tmp = (reg - CB_COLOR0_FMASK_SLICE) / 0x3c;
1553		track->cb_color_fmask_slice[tmp] = radeon_get_ib_value(p, idx);
1554		break;
1555	case CB_COLOR0_CMASK_SLICE:
1556	case CB_COLOR1_CMASK_SLICE:
1557	case CB_COLOR2_CMASK_SLICE:
1558	case CB_COLOR3_CMASK_SLICE:
1559	case CB_COLOR4_CMASK_SLICE:
1560	case CB_COLOR5_CMASK_SLICE:
1561	case CB_COLOR6_CMASK_SLICE:
1562	case CB_COLOR7_CMASK_SLICE:
1563		tmp = (reg - CB_COLOR0_CMASK_SLICE) / 0x3c;
1564		track->cb_color_cmask_slice[tmp] = radeon_get_ib_value(p, idx);
1565		break;
1566	case CB_COLOR0_BASE:
1567	case CB_COLOR1_BASE:
1568	case CB_COLOR2_BASE:
1569	case CB_COLOR3_BASE:
1570	case CB_COLOR4_BASE:
1571	case CB_COLOR5_BASE:
1572	case CB_COLOR6_BASE:
1573	case CB_COLOR7_BASE:
1574		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1575		if (r) {
1576			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1577					"0x%04X\n", reg);
1578			return -EINVAL;
1579		}
1580		tmp = (reg - CB_COLOR0_BASE) / 0x3c;
1581		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1582		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1583		track->cb_color_bo[tmp] = reloc->robj;
1584		track->cb_dirty = true;
1585		break;
1586	case CB_COLOR8_BASE:
1587	case CB_COLOR9_BASE:
1588	case CB_COLOR10_BASE:
1589	case CB_COLOR11_BASE:
1590		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1591		if (r) {
1592			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1593					"0x%04X\n", reg);
1594			return -EINVAL;
1595		}
1596		tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
1597		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
1598		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1599		track->cb_color_bo[tmp] = reloc->robj;
1600		track->cb_dirty = true;
1601		break;
1602	case DB_HTILE_DATA_BASE:
1603		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1604		if (r) {
1605			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1606					"0x%04X\n", reg);
1607			return -EINVAL;
1608		}
1609		track->htile_offset = radeon_get_ib_value(p, idx);
1610		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1611		track->htile_bo = reloc->robj;
1612		track->db_dirty = true;
1613		break;
1614	case DB_HTILE_SURFACE:
1615		/* 8x8 only */
1616		track->htile_surface = radeon_get_ib_value(p, idx);
1617		/* force 8x8 htile width and height */
1618		ib[idx] |= 3;
1619		track->db_dirty = true;
1620		break;
1621	case CB_IMMED0_BASE:
1622	case CB_IMMED1_BASE:
1623	case CB_IMMED2_BASE:
1624	case CB_IMMED3_BASE:
1625	case CB_IMMED4_BASE:
1626	case CB_IMMED5_BASE:
1627	case CB_IMMED6_BASE:
1628	case CB_IMMED7_BASE:
1629	case CB_IMMED8_BASE:
1630	case CB_IMMED9_BASE:
1631	case CB_IMMED10_BASE:
1632	case CB_IMMED11_BASE:
1633	case SQ_PGM_START_FS:
1634	case SQ_PGM_START_ES:
1635	case SQ_PGM_START_VS:
1636	case SQ_PGM_START_GS:
1637	case SQ_PGM_START_PS:
1638	case SQ_PGM_START_HS:
1639	case SQ_PGM_START_LS:
1640	case SQ_CONST_MEM_BASE:
1641	case SQ_ALU_CONST_CACHE_GS_0:
1642	case SQ_ALU_CONST_CACHE_GS_1:
1643	case SQ_ALU_CONST_CACHE_GS_2:
1644	case SQ_ALU_CONST_CACHE_GS_3:
1645	case SQ_ALU_CONST_CACHE_GS_4:
1646	case SQ_ALU_CONST_CACHE_GS_5:
1647	case SQ_ALU_CONST_CACHE_GS_6:
1648	case SQ_ALU_CONST_CACHE_GS_7:
1649	case SQ_ALU_CONST_CACHE_GS_8:
1650	case SQ_ALU_CONST_CACHE_GS_9:
1651	case SQ_ALU_CONST_CACHE_GS_10:
1652	case SQ_ALU_CONST_CACHE_GS_11:
1653	case SQ_ALU_CONST_CACHE_GS_12:
1654	case SQ_ALU_CONST_CACHE_GS_13:
1655	case SQ_ALU_CONST_CACHE_GS_14:
1656	case SQ_ALU_CONST_CACHE_GS_15:
1657	case SQ_ALU_CONST_CACHE_PS_0:
1658	case SQ_ALU_CONST_CACHE_PS_1:
1659	case SQ_ALU_CONST_CACHE_PS_2:
1660	case SQ_ALU_CONST_CACHE_PS_3:
1661	case SQ_ALU_CONST_CACHE_PS_4:
1662	case SQ_ALU_CONST_CACHE_PS_5:
1663	case SQ_ALU_CONST_CACHE_PS_6:
1664	case SQ_ALU_CONST_CACHE_PS_7:
1665	case SQ_ALU_CONST_CACHE_PS_8:
1666	case SQ_ALU_CONST_CACHE_PS_9:
1667	case SQ_ALU_CONST_CACHE_PS_10:
1668	case SQ_ALU_CONST_CACHE_PS_11:
1669	case SQ_ALU_CONST_CACHE_PS_12:
1670	case SQ_ALU_CONST_CACHE_PS_13:
1671	case SQ_ALU_CONST_CACHE_PS_14:
1672	case SQ_ALU_CONST_CACHE_PS_15:
1673	case SQ_ALU_CONST_CACHE_VS_0:
1674	case SQ_ALU_CONST_CACHE_VS_1:
1675	case SQ_ALU_CONST_CACHE_VS_2:
1676	case SQ_ALU_CONST_CACHE_VS_3:
1677	case SQ_ALU_CONST_CACHE_VS_4:
1678	case SQ_ALU_CONST_CACHE_VS_5:
1679	case SQ_ALU_CONST_CACHE_VS_6:
1680	case SQ_ALU_CONST_CACHE_VS_7:
1681	case SQ_ALU_CONST_CACHE_VS_8:
1682	case SQ_ALU_CONST_CACHE_VS_9:
1683	case SQ_ALU_CONST_CACHE_VS_10:
1684	case SQ_ALU_CONST_CACHE_VS_11:
1685	case SQ_ALU_CONST_CACHE_VS_12:
1686	case SQ_ALU_CONST_CACHE_VS_13:
1687	case SQ_ALU_CONST_CACHE_VS_14:
1688	case SQ_ALU_CONST_CACHE_VS_15:
1689	case SQ_ALU_CONST_CACHE_HS_0:
1690	case SQ_ALU_CONST_CACHE_HS_1:
1691	case SQ_ALU_CONST_CACHE_HS_2:
1692	case SQ_ALU_CONST_CACHE_HS_3:
1693	case SQ_ALU_CONST_CACHE_HS_4:
1694	case SQ_ALU_CONST_CACHE_HS_5:
1695	case SQ_ALU_CONST_CACHE_HS_6:
1696	case SQ_ALU_CONST_CACHE_HS_7:
1697	case SQ_ALU_CONST_CACHE_HS_8:
1698	case SQ_ALU_CONST_CACHE_HS_9:
1699	case SQ_ALU_CONST_CACHE_HS_10:
1700	case SQ_ALU_CONST_CACHE_HS_11:
1701	case SQ_ALU_CONST_CACHE_HS_12:
1702	case SQ_ALU_CONST_CACHE_HS_13:
1703	case SQ_ALU_CONST_CACHE_HS_14:
1704	case SQ_ALU_CONST_CACHE_HS_15:
1705	case SQ_ALU_CONST_CACHE_LS_0:
1706	case SQ_ALU_CONST_CACHE_LS_1:
1707	case SQ_ALU_CONST_CACHE_LS_2:
1708	case SQ_ALU_CONST_CACHE_LS_3:
1709	case SQ_ALU_CONST_CACHE_LS_4:
1710	case SQ_ALU_CONST_CACHE_LS_5:
1711	case SQ_ALU_CONST_CACHE_LS_6:
1712	case SQ_ALU_CONST_CACHE_LS_7:
1713	case SQ_ALU_CONST_CACHE_LS_8:
1714	case SQ_ALU_CONST_CACHE_LS_9:
1715	case SQ_ALU_CONST_CACHE_LS_10:
1716	case SQ_ALU_CONST_CACHE_LS_11:
1717	case SQ_ALU_CONST_CACHE_LS_12:
1718	case SQ_ALU_CONST_CACHE_LS_13:
1719	case SQ_ALU_CONST_CACHE_LS_14:
1720	case SQ_ALU_CONST_CACHE_LS_15:
1721		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1722		if (r) {
1723			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1724					"0x%04X\n", reg);
1725			return -EINVAL;
1726		}
1727		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1728		break;
1729	case SX_MEMORY_EXPORT_BASE:
1730		if (p->rdev->family >= CHIP_CAYMAN) {
1731			dev_warn(p->dev, "bad SET_CONFIG_REG "
1732				 "0x%04X\n", reg);
1733			return -EINVAL;
1734		}
1735		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1736		if (r) {
1737			dev_warn(p->dev, "bad SET_CONFIG_REG "
1738					"0x%04X\n", reg);
1739			return -EINVAL;
1740		}
1741		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1742		break;
1743	case CAYMAN_SX_SCATTER_EXPORT_BASE:
1744		if (p->rdev->family < CHIP_CAYMAN) {
1745			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1746				 "0x%04X\n", reg);
1747			return -EINVAL;
1748		}
1749		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1750		if (r) {
1751			dev_warn(p->dev, "bad SET_CONTEXT_REG "
1752					"0x%04X\n", reg);
1753			return -EINVAL;
1754		}
1755		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
1756		break;
1757	case SX_MISC:
1758		track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
1759		break;
1760	default:
1761		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1762		return -EINVAL;
1763	}
1764	return 0;
1765}
1766
1767static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1768{
1769	u32 last_reg, m, i;
1770
1771	if (p->rdev->family >= CHIP_CAYMAN)
1772		last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1773	else
1774		last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1775
1776	i = (reg >> 7);
1777	if (i >= last_reg) {
1778		dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1779		return false;
1780	}
1781	m = 1 << ((reg >> 2) & 31);
1782	if (p->rdev->family >= CHIP_CAYMAN) {
1783		if (!(cayman_reg_safe_bm[i] & m))
1784			return true;
1785	} else {
1786		if (!(evergreen_reg_safe_bm[i] & m))
1787			return true;
1788	}
1789	dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1790	return false;
1791}
1792
1793static int evergreen_packet3_check(struct radeon_cs_parser *p,
1794				   struct radeon_cs_packet *pkt)
1795{
1796	struct radeon_bo_list *reloc;
1797	struct evergreen_cs_track *track;
1798	volatile u32 *ib;
1799	unsigned idx;
1800	unsigned i;
1801	unsigned start_reg, end_reg, reg;
1802	int r;
1803	u32 idx_value;
1804
1805	track = (struct evergreen_cs_track *)p->track;
1806	ib = p->ib.ptr;
1807	idx = pkt->idx + 1;
1808	idx_value = radeon_get_ib_value(p, idx);
1809
1810	switch (pkt->opcode) {
1811	case PACKET3_SET_PREDICATION:
1812	{
1813		int pred_op;
1814		int tmp;
1815		uint64_t offset;
1816
1817		if (pkt->count != 1) {
1818			DRM_ERROR("bad SET PREDICATION\n");
1819			return -EINVAL;
1820		}
1821
1822		tmp = radeon_get_ib_value(p, idx + 1);
1823		pred_op = (tmp >> 16) & 0x7;
1824
1825		/* for the clear predicate operation */
1826		if (pred_op == 0)
1827			return 0;
1828
1829		if (pred_op > 2) {
1830			DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op);
1831			return -EINVAL;
1832		}
1833
1834		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1835		if (r) {
1836			DRM_ERROR("bad SET PREDICATION\n");
1837			return -EINVAL;
1838		}
1839
1840		offset = reloc->gpu_offset +
1841		         (idx_value & 0xfffffff0) +
1842		         ((u64)(tmp & 0xff) << 32);
1843
1844		ib[idx + 0] = offset;
1845		ib[idx + 1] = (tmp & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1846	}
1847	break;
1848	case PACKET3_CONTEXT_CONTROL:
1849		if (pkt->count != 1) {
1850			DRM_ERROR("bad CONTEXT_CONTROL\n");
1851			return -EINVAL;
1852		}
1853		break;
1854	case PACKET3_INDEX_TYPE:
1855	case PACKET3_NUM_INSTANCES:
1856	case PACKET3_CLEAR_STATE:
1857		if (pkt->count) {
1858			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1859			return -EINVAL;
1860		}
1861		break;
1862	case CAYMAN_PACKET3_DEALLOC_STATE:
1863		if (p->rdev->family < CHIP_CAYMAN) {
1864			DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
1865			return -EINVAL;
1866		}
1867		if (pkt->count) {
1868			DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
1869			return -EINVAL;
1870		}
1871		break;
1872	case PACKET3_INDEX_BASE:
1873	{
1874		uint64_t offset;
1875
1876		if (pkt->count != 1) {
1877			DRM_ERROR("bad INDEX_BASE\n");
1878			return -EINVAL;
1879		}
1880		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1881		if (r) {
1882			DRM_ERROR("bad INDEX_BASE\n");
1883			return -EINVAL;
1884		}
1885
1886		offset = reloc->gpu_offset +
1887		         idx_value +
1888		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1889
1890		ib[idx+0] = offset;
1891		ib[idx+1] = upper_32_bits(offset) & 0xff;
1892
1893		r = evergreen_cs_track_check(p);
1894		if (r) {
1895			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1896			return r;
1897		}
1898		break;
1899	}
1900	case PACKET3_INDEX_BUFFER_SIZE:
1901	{
1902		if (pkt->count != 0) {
1903			DRM_ERROR("bad INDEX_BUFFER_SIZE\n");
1904			return -EINVAL;
1905		}
1906		break;
1907	}
1908	case PACKET3_DRAW_INDEX:
1909	{
1910		uint64_t offset;
1911		if (pkt->count != 3) {
1912			DRM_ERROR("bad DRAW_INDEX\n");
1913			return -EINVAL;
1914		}
1915		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1916		if (r) {
1917			DRM_ERROR("bad DRAW_INDEX\n");
1918			return -EINVAL;
1919		}
1920
1921		offset = reloc->gpu_offset +
1922		         idx_value +
1923		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1924
1925		ib[idx+0] = offset;
1926		ib[idx+1] = upper_32_bits(offset) & 0xff;
1927
1928		r = evergreen_cs_track_check(p);
1929		if (r) {
1930			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1931			return r;
1932		}
1933		break;
1934	}
1935	case PACKET3_DRAW_INDEX_2:
1936	{
1937		uint64_t offset;
1938
1939		if (pkt->count != 4) {
1940			DRM_ERROR("bad DRAW_INDEX_2\n");
1941			return -EINVAL;
1942		}
1943		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
1944		if (r) {
1945			DRM_ERROR("bad DRAW_INDEX_2\n");
1946			return -EINVAL;
1947		}
1948
1949		offset = reloc->gpu_offset +
1950		         radeon_get_ib_value(p, idx+1) +
1951		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
1952
1953		ib[idx+1] = offset;
1954		ib[idx+2] = upper_32_bits(offset) & 0xff;
1955
1956		r = evergreen_cs_track_check(p);
1957		if (r) {
1958			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1959			return r;
1960		}
1961		break;
1962	}
1963	case PACKET3_DRAW_INDEX_AUTO:
1964		if (pkt->count != 1) {
1965			DRM_ERROR("bad DRAW_INDEX_AUTO\n");
1966			return -EINVAL;
1967		}
1968		r = evergreen_cs_track_check(p);
1969		if (r) {
1970			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1971			return r;
1972		}
1973		break;
1974	case PACKET3_DRAW_INDEX_MULTI_AUTO:
1975		if (pkt->count != 2) {
1976			DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
1977			return -EINVAL;
1978		}
1979		r = evergreen_cs_track_check(p);
1980		if (r) {
1981			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
1982			return r;
1983		}
1984		break;
1985	case PACKET3_DRAW_INDEX_IMMD:
1986		if (pkt->count < 2) {
1987			DRM_ERROR("bad DRAW_INDEX_IMMD\n");
1988			return -EINVAL;
1989		}
1990		r = evergreen_cs_track_check(p);
1991		if (r) {
1992			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
1993			return r;
1994		}
1995		break;
1996	case PACKET3_DRAW_INDEX_OFFSET:
1997		if (pkt->count != 2) {
1998			DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
1999			return -EINVAL;
2000		}
2001		r = evergreen_cs_track_check(p);
2002		if (r) {
2003			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2004			return r;
2005		}
2006		break;
2007	case PACKET3_DRAW_INDEX_OFFSET_2:
2008		if (pkt->count != 3) {
2009			DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
2010			return -EINVAL;
2011		}
2012		r = evergreen_cs_track_check(p);
2013		if (r) {
2014			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2015			return r;
2016		}
2017		break;
2018	case PACKET3_SET_BASE:
2019	{
2020		/*
2021		DW 1 HEADER Header of the packet. Shader_Type in bit 1 of the Header will correspond to the shader type of the Load, see Type-3 Packet.
2022		   2 BASE_INDEX Bits [3:0] BASE_INDEX - Base Index specifies which base address is specified in the last two DWs.
2023		     0001: DX11 Draw_Index_Indirect Patch Table Base: Base address for Draw_Index_Indirect data.
2024		   3 ADDRESS_LO Bits [31:3] - Lower bits of QWORD-Aligned Address. Bits [2:0] - Reserved
2025		   4 ADDRESS_HI Bits [31:8] - Reserved. Bits [7:0] - Upper bits of Address [47:32]
2026		*/
2027		if (pkt->count != 2) {
2028			DRM_ERROR("bad SET_BASE\n");
2029			return -EINVAL;
2030		}
2031
2032		/* currently only supporting setting indirect draw buffer base address */
2033		if (idx_value != 1) {
2034			DRM_ERROR("bad SET_BASE\n");
2035			return -EINVAL;
2036		}
2037
2038		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2039		if (r) {
2040			DRM_ERROR("bad SET_BASE\n");
2041			return -EINVAL;
2042		}
2043
2044		track->indirect_draw_buffer_size = radeon_bo_size(reloc->robj);
2045
2046		ib[idx+1] = reloc->gpu_offset;
2047		ib[idx+2] = upper_32_bits(reloc->gpu_offset) & 0xff;
2048
2049		break;
2050	}
2051	case PACKET3_DRAW_INDIRECT:
2052	case PACKET3_DRAW_INDEX_INDIRECT:
2053	{
2054		u64 size = pkt->opcode == PACKET3_DRAW_INDIRECT ? 16 : 20;
2055
2056		/*
2057		DW 1 HEADER
2058		   2 DATA_OFFSET Bits [31:0] + byte aligned offset where the required data structure starts. Bits 1:0 are zero
2059		   3 DRAW_INITIATOR Draw Initiator Register. Written to the VGT_DRAW_INITIATOR register for the assigned context
2060		*/
2061		if (pkt->count != 1) {
2062			DRM_ERROR("bad DRAW_INDIRECT\n");
2063			return -EINVAL;
2064		}
2065
2066		if (idx_value + size > track->indirect_draw_buffer_size) {
2067			dev_warn(p->dev, "DRAW_INDIRECT buffer too small %u + %llu > %lu\n",
2068				idx_value, size, track->indirect_draw_buffer_size);
2069			return -EINVAL;
2070		}
2071
2072		r = evergreen_cs_track_check(p);
2073		if (r) {
2074			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2075			return r;
2076		}
2077		break;
2078	}
2079	case PACKET3_DISPATCH_DIRECT:
2080		if (pkt->count != 3) {
2081			DRM_ERROR("bad DISPATCH_DIRECT\n");
2082			return -EINVAL;
2083		}
2084		r = evergreen_cs_track_check(p);
2085		if (r) {
2086			dev_warn(p->dev, "%s:%d invalid cmd stream %d\n", __func__, __LINE__, idx);
2087			return r;
2088		}
2089		break;
2090	case PACKET3_DISPATCH_INDIRECT:
2091		if (pkt->count != 1) {
2092			DRM_ERROR("bad DISPATCH_INDIRECT\n");
2093			return -EINVAL;
2094		}
2095		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2096		if (r) {
2097			DRM_ERROR("bad DISPATCH_INDIRECT\n");
2098			return -EINVAL;
2099		}
2100		ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff);
2101		r = evergreen_cs_track_check(p);
2102		if (r) {
2103			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
2104			return r;
2105		}
2106		break;
2107	case PACKET3_WAIT_REG_MEM:
2108		if (pkt->count != 5) {
2109			DRM_ERROR("bad WAIT_REG_MEM\n");
2110			return -EINVAL;
2111		}
2112		/* bit 4 is reg (0) or mem (1) */
2113		if (idx_value & 0x10) {
2114			uint64_t offset;
2115
2116			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2117			if (r) {
2118				DRM_ERROR("bad WAIT_REG_MEM\n");
2119				return -EINVAL;
2120			}
2121
2122			offset = reloc->gpu_offset +
2123			         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2124			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2125
2126			ib[idx+1] = (ib[idx+1] & 0x3) | (offset & 0xfffffffc);
2127			ib[idx+2] = upper_32_bits(offset) & 0xff;
2128		} else if (idx_value & 0x100) {
2129			DRM_ERROR("cannot use PFP on REG wait\n");
2130			return -EINVAL;
2131		}
2132		break;
2133	case PACKET3_CP_DMA:
2134	{
2135		u32 command, size, info;
2136		u64 offset, tmp;
2137		if (pkt->count != 4) {
2138			DRM_ERROR("bad CP DMA\n");
2139			return -EINVAL;
2140		}
2141		command = radeon_get_ib_value(p, idx+4);
2142		size = command & 0x1fffff;
2143		info = radeon_get_ib_value(p, idx+1);
2144		if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2145		    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2146		    ((((info & 0x00300000) >> 20) == 0) &&
2147		     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2148		    ((((info & 0x60000000) >> 29) == 0) &&
2149		     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2150			/* non mem to mem copies requires dw aligned count */
2151			if (size % 4) {
2152				DRM_ERROR("CP DMA command requires dw count alignment\n");
2153				return -EINVAL;
2154			}
2155		}
2156		if (command & PACKET3_CP_DMA_CMD_SAS) {
2157			/* src address space is register */
2158			/* GDS is ok */
2159			if (((info & 0x60000000) >> 29) != 1) {
2160				DRM_ERROR("CP DMA SAS not supported\n");
2161				return -EINVAL;
2162			}
2163		} else {
2164			if (command & PACKET3_CP_DMA_CMD_SAIC) {
2165				DRM_ERROR("CP DMA SAIC only supported for registers\n");
2166				return -EINVAL;
2167			}
2168			/* src address space is memory */
2169			if (((info & 0x60000000) >> 29) == 0) {
2170				r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2171				if (r) {
2172					DRM_ERROR("bad CP DMA SRC\n");
2173					return -EINVAL;
2174				}
2175
2176				tmp = radeon_get_ib_value(p, idx) +
2177					((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2178
2179				offset = reloc->gpu_offset + tmp;
2180
2181				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2182					dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
2183						 tmp + size, radeon_bo_size(reloc->robj));
2184					return -EINVAL;
2185				}
2186
2187				ib[idx] = offset;
2188				ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2189			} else if (((info & 0x60000000) >> 29) != 2) {
2190				DRM_ERROR("bad CP DMA SRC_SEL\n");
2191				return -EINVAL;
2192			}
2193		}
2194		if (command & PACKET3_CP_DMA_CMD_DAS) {
2195			/* dst address space is register */
2196			/* GDS is ok */
2197			if (((info & 0x00300000) >> 20) != 1) {
2198				DRM_ERROR("CP DMA DAS not supported\n");
2199				return -EINVAL;
2200			}
2201		} else {
2202			/* dst address space is memory */
2203			if (command & PACKET3_CP_DMA_CMD_DAIC) {
2204				DRM_ERROR("CP DMA DAIC only supported for registers\n");
2205				return -EINVAL;
2206			}
2207			if (((info & 0x00300000) >> 20) == 0) {
2208				r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2209				if (r) {
2210					DRM_ERROR("bad CP DMA DST\n");
2211					return -EINVAL;
2212				}
2213
2214				tmp = radeon_get_ib_value(p, idx+2) +
2215					((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2216
2217				offset = reloc->gpu_offset + tmp;
2218
2219				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2220					dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
2221						 tmp + size, radeon_bo_size(reloc->robj));
2222					return -EINVAL;
2223				}
2224
2225				ib[idx+2] = offset;
2226				ib[idx+3] = upper_32_bits(offset) & 0xff;
2227			} else {
2228				DRM_ERROR("bad CP DMA DST_SEL\n");
2229				return -EINVAL;
2230			}
2231		}
2232		break;
2233	}
2234	case PACKET3_SURFACE_SYNC:
2235		if (pkt->count != 3) {
2236			DRM_ERROR("bad SURFACE_SYNC\n");
2237			return -EINVAL;
2238		}
2239		/* 0xffffffff/0x0 is flush all cache flag */
2240		if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
2241		    radeon_get_ib_value(p, idx + 2) != 0) {
2242			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2243			if (r) {
2244				DRM_ERROR("bad SURFACE_SYNC\n");
2245				return -EINVAL;
2246			}
2247			ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2248		}
2249		break;
2250	case PACKET3_EVENT_WRITE:
2251		if (pkt->count != 2 && pkt->count != 0) {
2252			DRM_ERROR("bad EVENT_WRITE\n");
2253			return -EINVAL;
2254		}
2255		if (pkt->count) {
2256			uint64_t offset;
2257
2258			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2259			if (r) {
2260				DRM_ERROR("bad EVENT_WRITE\n");
2261				return -EINVAL;
2262			}
2263			offset = reloc->gpu_offset +
2264			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
2265			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2266
2267			ib[idx+1] = offset & 0xfffffff8;
2268			ib[idx+2] = upper_32_bits(offset) & 0xff;
2269		}
2270		break;
2271	case PACKET3_EVENT_WRITE_EOP:
2272	{
2273		uint64_t offset;
2274
2275		if (pkt->count != 4) {
2276			DRM_ERROR("bad EVENT_WRITE_EOP\n");
2277			return -EINVAL;
2278		}
2279		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2280		if (r) {
2281			DRM_ERROR("bad EVENT_WRITE_EOP\n");
2282			return -EINVAL;
2283		}
2284
2285		offset = reloc->gpu_offset +
2286		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2287		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2288
2289		ib[idx+1] = offset & 0xfffffffc;
2290		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2291		break;
2292	}
2293	case PACKET3_EVENT_WRITE_EOS:
2294	{
2295		uint64_t offset;
2296
2297		if (pkt->count != 3) {
2298			DRM_ERROR("bad EVENT_WRITE_EOS\n");
2299			return -EINVAL;
2300		}
2301		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2302		if (r) {
2303			DRM_ERROR("bad EVENT_WRITE_EOS\n");
2304			return -EINVAL;
2305		}
2306
2307		offset = reloc->gpu_offset +
2308		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
2309		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);
2310
2311		ib[idx+1] = offset & 0xfffffffc;
2312		ib[idx+2] = (ib[idx+2] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2313		break;
2314	}
2315	case PACKET3_SET_CONFIG_REG:
2316		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
2317		end_reg = 4 * pkt->count + start_reg - 4;
2318		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
2319		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
2320		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
2321			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2322			return -EINVAL;
2323		}
2324		for (i = 0; i < pkt->count; i++) {
2325			reg = start_reg + (4 * i);
2326			r = evergreen_cs_check_reg(p, reg, idx+1+i);
2327			if (r)
2328				return r;
2329		}
2330		break;
2331	case PACKET3_SET_CONTEXT_REG:
2332		start_reg = (idx_value << 2) + PACKET3_SET_CONTEXT_REG_START;
2333		end_reg = 4 * pkt->count + start_reg - 4;
2334		if ((start_reg < PACKET3_SET_CONTEXT_REG_START) ||
2335		    (start_reg >= PACKET3_SET_CONTEXT_REG_END) ||
2336		    (end_reg >= PACKET3_SET_CONTEXT_REG_END)) {
2337			DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2338			return -EINVAL;
2339		}
2340		for (i = 0; i < pkt->count; i++) {
2341			reg = start_reg + (4 * i);
2342			r = evergreen_cs_check_reg(p, reg, idx+1+i);
2343			if (r)
2344				return r;
2345		}
2346		break;
2347	case PACKET3_SET_RESOURCE:
2348		if (pkt->count % 8) {
2349			DRM_ERROR("bad SET_RESOURCE\n");
2350			return -EINVAL;
2351		}
2352		start_reg = (idx_value << 2) + PACKET3_SET_RESOURCE_START;
2353		end_reg = 4 * pkt->count + start_reg - 4;
2354		if ((start_reg < PACKET3_SET_RESOURCE_START) ||
2355		    (start_reg >= PACKET3_SET_RESOURCE_END) ||
2356		    (end_reg >= PACKET3_SET_RESOURCE_END)) {
2357			DRM_ERROR("bad SET_RESOURCE\n");
2358			return -EINVAL;
2359		}
2360		for (i = 0; i < (pkt->count / 8); i++) {
2361			struct radeon_bo *texture, *mipmap;
2362			u32 toffset, moffset;
2363			u32 size, offset, mip_address, tex_dim;
2364
2365			switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p, idx+1+(i*8)+7))) {
2366			case SQ_TEX_VTX_VALID_TEXTURE:
2367				/* tex base */
2368				r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2369				if (r) {
2370					DRM_ERROR("bad SET_RESOURCE (tex)\n");
2371					return -EINVAL;
2372				}
2373				if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
2374					ib[idx+1+(i*8)+1] |=
2375						TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
2376					if (reloc->tiling_flags & RADEON_TILING_MACRO) {
2377						unsigned bankw, bankh, mtaspect, tile_split;
2378
2379						evergreen_tiling_fields(reloc->tiling_flags,
2380									&bankw, &bankh, &mtaspect,
2381									&tile_split);
2382						ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
2383						ib[idx+1+(i*8)+7] |=
2384							TEX_BANK_WIDTH(bankw) |
2385							TEX_BANK_HEIGHT(bankh) |
2386							MACRO_TILE_ASPECT(mtaspect) |
2387							TEX_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
2388					}
2389				}
2390				texture = reloc->robj;
2391				toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2392
2393				/* tex mip base */
2394				tex_dim = ib[idx+1+(i*8)+0] & 0x7;
2395				mip_address = ib[idx+1+(i*8)+3];
2396
2397				if ((tex_dim == SQ_TEX_DIM_2D_MSAA || tex_dim == SQ_TEX_DIM_2D_ARRAY_MSAA) &&
2398				    !mip_address &&
2399				    !radeon_cs_packet_next_is_pkt3_nop(p)) {
2400					/* MIP_ADDRESS should point to FMASK for an MSAA texture.
2401					 * It should be 0 if FMASK is disabled. */
2402					moffset = 0;
2403					mipmap = NULL;
2404				} else {
2405					r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2406					if (r) {
2407						DRM_ERROR("bad SET_RESOURCE (tex)\n");
2408						return -EINVAL;
2409					}
2410					moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
2411					mipmap = reloc->robj;
2412				}
2413
2414				r = evergreen_cs_track_validate_texture(p, texture, mipmap, idx+1+(i*8));
2415				if (r)
2416					return r;
2417				ib[idx+1+(i*8)+2] += toffset;
2418				ib[idx+1+(i*8)+3] += moffset;
2419				break;
2420			case SQ_TEX_VTX_VALID_BUFFER:
2421			{
2422				uint64_t offset64;
2423				/* vtx base */
2424				r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2425				if (r) {
2426					DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2427					return -EINVAL;
2428				}
2429				offset = radeon_get_ib_value(p, idx+1+(i*8)+0);
2430				size = radeon_get_ib_value(p, idx+1+(i*8)+1);
2431				if (p->rdev && (size + offset) > radeon_bo_size(reloc->robj)) {
2432					/* force size to size of the buffer */
2433					dev_warn(p->dev, "vbo resource seems too big for the bo\n");
2434					ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
2435				}
2436
2437				offset64 = reloc->gpu_offset + offset;
2438				ib[idx+1+(i*8)+0] = offset64;
2439				ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
2440						    (upper_32_bits(offset64) & 0xff);
2441				break;
2442			}
2443			case SQ_TEX_VTX_INVALID_TEXTURE:
2444			case SQ_TEX_VTX_INVALID_BUFFER:
2445			default:
2446				DRM_ERROR("bad SET_RESOURCE\n");
2447				return -EINVAL;
2448			}
2449		}
2450		break;
2451	case PACKET3_SET_ALU_CONST:
2452		/* XXX fix me ALU const buffers only */
2453		break;
2454	case PACKET3_SET_BOOL_CONST:
2455		start_reg = (idx_value << 2) + PACKET3_SET_BOOL_CONST_START;
2456		end_reg = 4 * pkt->count + start_reg - 4;
2457		if ((start_reg < PACKET3_SET_BOOL_CONST_START) ||
2458		    (start_reg >= PACKET3_SET_BOOL_CONST_END) ||
2459		    (end_reg >= PACKET3_SET_BOOL_CONST_END)) {
2460			DRM_ERROR("bad SET_BOOL_CONST\n");
2461			return -EINVAL;
2462		}
2463		break;
2464	case PACKET3_SET_LOOP_CONST:
2465		start_reg = (idx_value << 2) + PACKET3_SET_LOOP_CONST_START;
2466		end_reg = 4 * pkt->count + start_reg - 4;
2467		if ((start_reg < PACKET3_SET_LOOP_CONST_START) ||
2468		    (start_reg >= PACKET3_SET_LOOP_CONST_END) ||
2469		    (end_reg >= PACKET3_SET_LOOP_CONST_END)) {
2470			DRM_ERROR("bad SET_LOOP_CONST\n");
2471			return -EINVAL;
2472		}
2473		break;
2474	case PACKET3_SET_CTL_CONST:
2475		start_reg = (idx_value << 2) + PACKET3_SET_CTL_CONST_START;
2476		end_reg = 4 * pkt->count + start_reg - 4;
2477		if ((start_reg < PACKET3_SET_CTL_CONST_START) ||
2478		    (start_reg >= PACKET3_SET_CTL_CONST_END) ||
2479		    (end_reg >= PACKET3_SET_CTL_CONST_END)) {
2480			DRM_ERROR("bad SET_CTL_CONST\n");
2481			return -EINVAL;
2482		}
2483		break;
2484	case PACKET3_SET_SAMPLER:
2485		if (pkt->count % 3) {
2486			DRM_ERROR("bad SET_SAMPLER\n");
2487			return -EINVAL;
2488		}
2489		start_reg = (idx_value << 2) + PACKET3_SET_SAMPLER_START;
2490		end_reg = 4 * pkt->count + start_reg - 4;
2491		if ((start_reg < PACKET3_SET_SAMPLER_START) ||
2492		    (start_reg >= PACKET3_SET_SAMPLER_END) ||
2493		    (end_reg >= PACKET3_SET_SAMPLER_END)) {
2494			DRM_ERROR("bad SET_SAMPLER\n");
2495			return -EINVAL;
2496		}
2497		break;
2498	case PACKET3_STRMOUT_BUFFER_UPDATE:
2499		if (pkt->count != 4) {
2500			DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2501			return -EINVAL;
2502		}
2503		/* Updating memory at DST_ADDRESS. */
2504		if (idx_value & 0x1) {
2505			u64 offset;
2506			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2507			if (r) {
2508				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2509				return -EINVAL;
2510			}
2511			offset = radeon_get_ib_value(p, idx+1);
2512			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2513			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2514				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2515					  offset + 4, radeon_bo_size(reloc->robj));
2516				return -EINVAL;
2517			}
2518			offset += reloc->gpu_offset;
2519			ib[idx+1] = offset;
2520			ib[idx+2] = upper_32_bits(offset) & 0xff;
2521		}
2522		/* Reading data from SRC_ADDRESS. */
2523		if (((idx_value >> 1) & 0x3) == 2) {
2524			u64 offset;
2525			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2526			if (r) {
2527				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2528				return -EINVAL;
2529			}
2530			offset = radeon_get_ib_value(p, idx+3);
2531			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2532			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2533				DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2534					  offset + 4, radeon_bo_size(reloc->robj));
2535				return -EINVAL;
2536			}
2537			offset += reloc->gpu_offset;
2538			ib[idx+3] = offset;
2539			ib[idx+4] = upper_32_bits(offset) & 0xff;
2540		}
2541		break;
2542	case PACKET3_MEM_WRITE:
2543	{
2544		u64 offset;
2545
2546		if (pkt->count != 3) {
2547			DRM_ERROR("bad MEM_WRITE (invalid count)\n");
2548			return -EINVAL;
2549		}
2550		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2551		if (r) {
2552			DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
2553			return -EINVAL;
2554		}
2555		offset = radeon_get_ib_value(p, idx+0);
2556		offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
2557		if (offset & 0x7) {
2558			DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
2559			return -EINVAL;
2560		}
2561		if ((offset + 8) > radeon_bo_size(reloc->robj)) {
2562			DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
2563				  offset + 8, radeon_bo_size(reloc->robj));
2564			return -EINVAL;
2565		}
2566		offset += reloc->gpu_offset;
2567		ib[idx+0] = offset;
2568		ib[idx+1] = upper_32_bits(offset) & 0xff;
2569		break;
2570	}
2571	case PACKET3_COPY_DW:
2572		if (pkt->count != 4) {
2573			DRM_ERROR("bad COPY_DW (invalid count)\n");
2574			return -EINVAL;
2575		}
2576		if (idx_value & 0x1) {
2577			u64 offset;
2578			/* SRC is memory. */
2579			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2580			if (r) {
2581				DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2582				return -EINVAL;
2583			}
2584			offset = radeon_get_ib_value(p, idx+1);
2585			offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2586			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2587				DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2588					  offset + 4, radeon_bo_size(reloc->robj));
2589				return -EINVAL;
2590			}
2591			offset += reloc->gpu_offset;
2592			ib[idx+1] = offset;
2593			ib[idx+2] = upper_32_bits(offset) & 0xff;
2594		} else {
2595			/* SRC is a reg. */
2596			reg = radeon_get_ib_value(p, idx+1) << 2;
2597			if (!evergreen_is_safe_reg(p, reg, idx+1))
2598				return -EINVAL;
2599		}
2600		if (idx_value & 0x2) {
2601			u64 offset;
2602			/* DST is memory. */
2603			r = radeon_cs_packet_next_reloc(p, &reloc, 0);
2604			if (r) {
2605				DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2606				return -EINVAL;
2607			}
2608			offset = radeon_get_ib_value(p, idx+3);
2609			offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2610			if ((offset + 4) > radeon_bo_size(reloc->robj)) {
2611				DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2612					  offset + 4, radeon_bo_size(reloc->robj));
2613				return -EINVAL;
2614			}
2615			offset += reloc->gpu_offset;
2616			ib[idx+3] = offset;
2617			ib[idx+4] = upper_32_bits(offset) & 0xff;
2618		} else {
2619			/* DST is a reg. */
2620			reg = radeon_get_ib_value(p, idx+3) << 2;
2621			if (!evergreen_is_safe_reg(p, reg, idx+3))
2622				return -EINVAL;
2623		}
2624		break;
2625	case PACKET3_NOP:
2626		break;
2627	default:
2628		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
2629		return -EINVAL;
2630	}
2631	return 0;
2632}
2633
2634int evergreen_cs_parse(struct radeon_cs_parser *p)
2635{
2636	struct radeon_cs_packet pkt;
2637	struct evergreen_cs_track *track;
2638	u32 tmp;
2639	int r;
2640
2641	if (p->track == NULL) {
2642		/* initialize tracker, we are in kms */
2643		track = kzalloc(sizeof(*track), GFP_KERNEL);
2644		if (track == NULL)
2645			return -ENOMEM;
2646		evergreen_cs_track_init(track);
2647		if (p->rdev->family >= CHIP_CAYMAN)
2648			tmp = p->rdev->config.cayman.tile_config;
2649		else
2650			tmp = p->rdev->config.evergreen.tile_config;
2651
2652		switch (tmp & 0xf) {
2653		case 0:
2654			track->npipes = 1;
2655			break;
2656		case 1:
2657		default:
2658			track->npipes = 2;
2659			break;
2660		case 2:
2661			track->npipes = 4;
2662			break;
2663		case 3:
2664			track->npipes = 8;
2665			break;
2666		}
2667
2668		switch ((tmp & 0xf0) >> 4) {
2669		case 0:
2670			track->nbanks = 4;
2671			break;
2672		case 1:
2673		default:
2674			track->nbanks = 8;
2675			break;
2676		case 2:
2677			track->nbanks = 16;
2678			break;
2679		}
2680
2681		switch ((tmp & 0xf00) >> 8) {
2682		case 0:
2683			track->group_size = 256;
2684			break;
2685		case 1:
2686		default:
2687			track->group_size = 512;
2688			break;
2689		}
2690
2691		switch ((tmp & 0xf000) >> 12) {
2692		case 0:
2693			track->row_size = 1;
2694			break;
2695		case 1:
2696		default:
2697			track->row_size = 2;
2698			break;
2699		case 2:
2700			track->row_size = 4;
2701			break;
2702		}
2703
2704		p->track = track;
2705	}
2706	do {
2707		r = radeon_cs_packet_parse(p, &pkt, p->idx);
2708		if (r) {
2709			kfree(p->track);
2710			p->track = NULL;
2711			return r;
2712		}
2713		p->idx += pkt.count + 2;
2714		switch (pkt.type) {
2715		case RADEON_PACKET_TYPE0:
2716			r = evergreen_cs_parse_packet0(p, &pkt);
2717			break;
2718		case RADEON_PACKET_TYPE2:
2719			break;
2720		case RADEON_PACKET_TYPE3:
2721			r = evergreen_packet3_check(p, &pkt);
2722			break;
2723		default:
2724			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
2725			kfree(p->track);
2726			p->track = NULL;
2727			return -EINVAL;
2728		}
2729		if (r) {
2730			kfree(p->track);
2731			p->track = NULL;
2732			return r;
2733		}
2734	} while (p->idx < p->chunk_ib->length_dw);
2735#if 0
2736	for (r = 0; r < p->ib.length_dw; r++) {
2737		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
2738		mdelay(1);
2739	}
2740#endif
2741	kfree(p->track);
2742	p->track = NULL;
2743	return 0;
2744}
2745
2746/**
2747 * evergreen_dma_cs_parse() - parse the DMA IB
2748 * @p:		parser structure holding parsing context.
2749 *
2750 * Parses the DMA IB from the CS ioctl and updates
2751 * the GPU addresses based on the reloc information and
2752 * checks for errors. (Evergreen-Cayman)
2753 * Returns 0 for success and an error on failure.
2754 **/
2755int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2756{
2757	struct radeon_cs_chunk *ib_chunk = p->chunk_ib;
2758	struct radeon_bo_list *src_reloc, *dst_reloc, *dst2_reloc;
2759	u32 header, cmd, count, sub_cmd;
2760	volatile u32 *ib = p->ib.ptr;
2761	u32 idx;
2762	u64 src_offset, dst_offset, dst2_offset;
2763	int r;
2764
2765	do {
2766		if (p->idx >= ib_chunk->length_dw) {
2767			DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2768				  p->idx, ib_chunk->length_dw);
2769			return -EINVAL;
2770		}
2771		idx = p->idx;
2772		header = radeon_get_ib_value(p, idx);
2773		cmd = GET_DMA_CMD(header);
2774		count = GET_DMA_COUNT(header);
2775		sub_cmd = GET_DMA_SUB_CMD(header);
2776
2777		switch (cmd) {
2778		case DMA_PACKET_WRITE:
2779			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2780			if (r) {
2781				DRM_ERROR("bad DMA_PACKET_WRITE\n");
2782				return -EINVAL;
2783			}
2784			switch (sub_cmd) {
2785			/* tiled */
2786			case 8:
2787				dst_offset = radeon_get_ib_value(p, idx+1);
2788				dst_offset <<= 8;
2789
2790				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2791				p->idx += count + 7;
2792				break;
2793			/* linear */
2794			case 0:
2795				dst_offset = radeon_get_ib_value(p, idx+1);
2796				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
2797
2798				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2799				ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2800				p->idx += count + 3;
2801				break;
2802			default:
2803				DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, header);
2804				return -EINVAL;
2805			}
2806			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2807				dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2808					 dst_offset, radeon_bo_size(dst_reloc->robj));
2809				return -EINVAL;
2810			}
2811			break;
2812		case DMA_PACKET_COPY:
2813			r = r600_dma_cs_next_reloc(p, &src_reloc);
2814			if (r) {
2815				DRM_ERROR("bad DMA_PACKET_COPY\n");
2816				return -EINVAL;
2817			}
2818			r = r600_dma_cs_next_reloc(p, &dst_reloc);
2819			if (r) {
2820				DRM_ERROR("bad DMA_PACKET_COPY\n");
2821				return -EINVAL;
2822			}
2823			switch (sub_cmd) {
2824			/* Copy L2L, DW aligned */
2825			case 0x00:
2826				/* L2L, dw */
2827				src_offset = radeon_get_ib_value(p, idx+2);
2828				src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2829				dst_offset = radeon_get_ib_value(p, idx+1);
2830				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2831				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2832					dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
2833							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2834					return -EINVAL;
2835				}
2836				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2837					dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
2838							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2839					return -EINVAL;
2840				}
2841				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2842				ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2843				ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2844				ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2845				p->idx += 5;
2846				break;
2847			/* Copy L2T/T2L */
2848			case 0x08:
2849				/* detile bit */
2850				if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2851					/* tiled src, linear dst */
2852					src_offset = radeon_get_ib_value(p, idx+1);
2853					src_offset <<= 8;
2854					ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
2855
2856					dst_offset = radeon_get_ib_value(p, idx + 7);
2857					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2858					ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2859					ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2860				} else {
2861					/* linear src, tiled dst */
2862					src_offset = radeon_get_ib_value(p, idx+7);
2863					src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
2864					ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2865					ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2866
2867					dst_offset = radeon_get_ib_value(p, idx+1);
2868					dst_offset <<= 8;
2869					ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2870				}
2871				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2872					dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n",
2873							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2874					return -EINVAL;
2875				}
2876				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2877					dev_warn(p->dev, "DMA L2T, dst buffer too small (%llu %lu)\n",
2878							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2879					return -EINVAL;
2880				}
2881				p->idx += 9;
2882				break;
2883			/* Copy L2L, byte aligned */
2884			case 0x40:
2885				/* L2L, byte */
2886				src_offset = radeon_get_ib_value(p, idx+2);
2887				src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2888				dst_offset = radeon_get_ib_value(p, idx+1);
2889				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;
2890				if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
2891					dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
2892							src_offset + count, radeon_bo_size(src_reloc->robj));
2893					return -EINVAL;
2894				}
2895				if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
2896					dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
2897							dst_offset + count, radeon_bo_size(dst_reloc->robj));
2898					return -EINVAL;
2899				}
2900				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2901				ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2902				ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2903				ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2904				p->idx += 5;
2905				break;
2906			/* Copy L2L, partial */
2907			case 0x41:
2908				/* L2L, partial */
2909				if (p->family < CHIP_CAYMAN) {
2910					DRM_ERROR("L2L Partial is cayman only !\n");
2911					return -EINVAL;
2912				}
2913				ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff);
2914				ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2915				ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
2916				ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2917
2918				p->idx += 9;
2919				break;
2920			/* Copy L2L, DW aligned, broadcast */
2921			case 0x44:
2922				/* L2L, dw, broadcast */
2923				r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2924				if (r) {
2925					DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
2926					return -EINVAL;
2927				}
2928				dst_offset = radeon_get_ib_value(p, idx+1);
2929				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
2930				dst2_offset = radeon_get_ib_value(p, idx+2);
2931				dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32;
2932				src_offset = radeon_get_ib_value(p, idx+3);
2933				src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
2934				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2935					dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
2936							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2937					return -EINVAL;
2938				}
2939				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2940					dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
2941							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2942					return -EINVAL;
2943				}
2944				if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2945					dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
2946							dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2947					return -EINVAL;
2948				}
2949				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
2950				ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc);
2951				ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2952				ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
2953				ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff;
2954				ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2955				p->idx += 7;
2956				break;
2957			/* Copy L2T Frame to Field */
2958			case 0x48:
2959				if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
2960					DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2961					return -EINVAL;
2962				}
2963				r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2964				if (r) {
2965					DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2966					return -EINVAL;
2967				}
2968				dst_offset = radeon_get_ib_value(p, idx+1);
2969				dst_offset <<= 8;
2970				dst2_offset = radeon_get_ib_value(p, idx+2);
2971				dst2_offset <<= 8;
2972				src_offset = radeon_get_ib_value(p, idx+8);
2973				src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
2974				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2975					dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
2976							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2977					return -EINVAL;
2978				}
2979				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2980					dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2981							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2982					return -EINVAL;
2983				}
2984				if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2985					dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2986							dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2987					return -EINVAL;
2988				}
2989				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
2990				ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
2991				ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
2992				ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
2993				p->idx += 10;
2994				break;
2995			/* Copy L2T/T2L, partial */
2996			case 0x49:
2997				/* L2T, T2L partial */
2998				if (p->family < CHIP_CAYMAN) {
2999					DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3000					return -EINVAL;
3001				}
3002				/* detile bit */
3003				if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3004					/* tiled src, linear dst */
3005					ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3006
3007					ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3008					ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3009				} else {
3010					/* linear src, tiled dst */
3011					ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3012					ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3013
3014					ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3015				}
3016				p->idx += 12;
3017				break;
3018			/* Copy L2T broadcast */
3019			case 0x4b:
3020				/* L2T, broadcast */
3021				if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3022					DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3023					return -EINVAL;
3024				}
3025				r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3026				if (r) {
3027					DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3028					return -EINVAL;
3029				}
3030				dst_offset = radeon_get_ib_value(p, idx+1);
3031				dst_offset <<= 8;
3032				dst2_offset = radeon_get_ib_value(p, idx+2);
3033				dst2_offset <<= 8;
3034				src_offset = radeon_get_ib_value(p, idx+8);
3035				src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3036				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3037					dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3038							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3039					return -EINVAL;
3040				}
3041				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3042					dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3043							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3044					return -EINVAL;
3045				}
3046				if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3047					dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3048							dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3049					return -EINVAL;
3050				}
3051				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3052				ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3053				ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3054				ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3055				p->idx += 10;
3056				break;
3057			/* Copy L2T/T2L (tile units) */
3058			case 0x4c:
3059				/* L2T, T2L */
3060				/* detile bit */
3061				if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3062					/* tiled src, linear dst */
3063					src_offset = radeon_get_ib_value(p, idx+1);
3064					src_offset <<= 8;
3065					ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3066
3067					dst_offset = radeon_get_ib_value(p, idx+7);
3068					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3069					ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3070					ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
3071				} else {
3072					/* linear src, tiled dst */
3073					src_offset = radeon_get_ib_value(p, idx+7);
3074					src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
3075					ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3076					ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3077
3078					dst_offset = radeon_get_ib_value(p, idx+1);
3079					dst_offset <<= 8;
3080					ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3081				}
3082				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3083					dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3084							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3085					return -EINVAL;
3086				}
3087				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3088					dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3089							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3090					return -EINVAL;
3091				}
3092				p->idx += 9;
3093				break;
3094			/* Copy T2T, partial (tile units) */
3095			case 0x4d:
3096				/* T2T partial */
3097				if (p->family < CHIP_CAYMAN) {
3098					DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3099					return -EINVAL;
3100				}
3101				ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
3102				ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8);
3103				p->idx += 13;
3104				break;
3105			/* Copy L2T broadcast (tile units) */
3106			case 0x4f:
3107				/* L2T, broadcast */
3108				if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
3109					DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3110					return -EINVAL;
3111				}
3112				r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3113				if (r) {
3114					DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3115					return -EINVAL;
3116				}
3117				dst_offset = radeon_get_ib_value(p, idx+1);
3118				dst_offset <<= 8;
3119				dst2_offset = radeon_get_ib_value(p, idx+2);
3120				dst2_offset <<= 8;
3121				src_offset = radeon_get_ib_value(p, idx+8);
3122				src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32;
3123				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3124					dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3125							src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3126					return -EINVAL;
3127				}
3128				if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3129					dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3130							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3131					return -EINVAL;
3132				}
3133				if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3134					dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3135							dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3136					return -EINVAL;
3137				}
3138				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
3139				ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
3140				ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
3141				ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
3142				p->idx += 10;
3143				break;
3144			default:
3145				DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, header);
3146				return -EINVAL;
3147			}
3148			break;
3149		case DMA_PACKET_CONSTANT_FILL:
3150			r = r600_dma_cs_next_reloc(p, &dst_reloc);
3151			if (r) {
3152				DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3153				return -EINVAL;
3154			}
3155			dst_offset = radeon_get_ib_value(p, idx+1);
3156			dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16;
3157			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3158				dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3159					 dst_offset, radeon_bo_size(dst_reloc->robj));
3160				return -EINVAL;
3161			}
3162			ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
3163			ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
3164			p->idx += 4;
3165			break;
3166		case DMA_PACKET_NOP:
3167			p->idx += 1;
3168			break;
3169		default:
3170			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3171			return -EINVAL;
3172		}
3173	} while (p->idx < p->chunk_ib->length_dw);
3174#if 0
3175	for (r = 0; r < p->ib->length_dw; r++) {
3176		printk(KERN_INFO "%05d  0x%08X\n", r, p->ib.ptr[r]);
3177		mdelay(1);
3178	}
3179#endif
3180	return 0;
3181}
3182
3183/* vm parser */
3184static bool evergreen_vm_reg_valid(u32 reg)
3185{
3186	/* context regs are fine */
3187	if (reg >= 0x28000)
3188		return true;
3189
3190	/* check config regs */
3191	switch (reg) {
3192	case WAIT_UNTIL:
3193	case GRBM_GFX_INDEX:
3194	case CP_STRMOUT_CNTL:
3195	case CP_COHER_CNTL:
3196	case CP_COHER_SIZE:
3197	case VGT_VTX_VECT_EJECT_REG:
3198	case VGT_CACHE_INVALIDATION:
3199	case VGT_GS_VERTEX_REUSE:
3200	case VGT_PRIMITIVE_TYPE:
3201	case VGT_INDEX_TYPE:
3202	case VGT_NUM_INDICES:
3203	case VGT_NUM_INSTANCES:
3204	case VGT_COMPUTE_DIM_X:
3205	case VGT_COMPUTE_DIM_Y:
3206	case VGT_COMPUTE_DIM_Z:
3207	case VGT_COMPUTE_START_X:
3208	case VGT_COMPUTE_START_Y:
3209	case VGT_COMPUTE_START_Z:
3210	case VGT_COMPUTE_INDEX:
3211	case VGT_COMPUTE_THREAD_GROUP_SIZE:
3212	case VGT_HS_OFFCHIP_PARAM:
3213	case PA_CL_ENHANCE:
3214	case PA_SU_LINE_STIPPLE_VALUE:
3215	case PA_SC_LINE_STIPPLE_STATE:
3216	case PA_SC_ENHANCE:
3217	case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
3218	case SQ_DYN_GPR_SIMD_LOCK_EN:
3219	case SQ_CONFIG:
3220	case SQ_GPR_RESOURCE_MGMT_1:
3221	case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
3222	case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
3223	case SQ_CONST_MEM_BASE:
3224	case SQ_STATIC_THREAD_MGMT_1:
3225	case SQ_STATIC_THREAD_MGMT_2:
3226	case SQ_STATIC_THREAD_MGMT_3:
3227	case SPI_CONFIG_CNTL:
3228	case SPI_CONFIG_CNTL_1:
3229	case TA_CNTL_AUX:
3230	case DB_DEBUG:
3231	case DB_DEBUG2:
3232	case DB_DEBUG3:
3233	case DB_DEBUG4:
3234	case DB_WATERMARKS:
3235	case TD_PS_BORDER_COLOR_INDEX:
3236	case TD_PS_BORDER_COLOR_RED:
3237	case TD_PS_BORDER_COLOR_GREEN:
3238	case TD_PS_BORDER_COLOR_BLUE:
3239	case TD_PS_BORDER_COLOR_ALPHA:
3240	case TD_VS_BORDER_COLOR_INDEX:
3241	case TD_VS_BORDER_COLOR_RED:
3242	case TD_VS_BORDER_COLOR_GREEN:
3243	case TD_VS_BORDER_COLOR_BLUE:
3244	case TD_VS_BORDER_COLOR_ALPHA:
3245	case TD_GS_BORDER_COLOR_INDEX:
3246	case TD_GS_BORDER_COLOR_RED:
3247	case TD_GS_BORDER_COLOR_GREEN:
3248	case TD_GS_BORDER_COLOR_BLUE:
3249	case TD_GS_BORDER_COLOR_ALPHA:
3250	case TD_HS_BORDER_COLOR_INDEX:
3251	case TD_HS_BORDER_COLOR_RED:
3252	case TD_HS_BORDER_COLOR_GREEN:
3253	case TD_HS_BORDER_COLOR_BLUE:
3254	case TD_HS_BORDER_COLOR_ALPHA:
3255	case TD_LS_BORDER_COLOR_INDEX:
3256	case TD_LS_BORDER_COLOR_RED:
3257	case TD_LS_BORDER_COLOR_GREEN:
3258	case TD_LS_BORDER_COLOR_BLUE:
3259	case TD_LS_BORDER_COLOR_ALPHA:
3260	case TD_CS_BORDER_COLOR_INDEX:
3261	case TD_CS_BORDER_COLOR_RED:
3262	case TD_CS_BORDER_COLOR_GREEN:
3263	case TD_CS_BORDER_COLOR_BLUE:
3264	case TD_CS_BORDER_COLOR_ALPHA:
3265	case SQ_ESGS_RING_SIZE:
3266	case SQ_GSVS_RING_SIZE:
3267	case SQ_ESTMP_RING_SIZE:
3268	case SQ_GSTMP_RING_SIZE:
3269	case SQ_HSTMP_RING_SIZE:
3270	case SQ_LSTMP_RING_SIZE:
3271	case SQ_PSTMP_RING_SIZE:
3272	case SQ_VSTMP_RING_SIZE:
3273	case SQ_ESGS_RING_ITEMSIZE:
3274	case SQ_ESTMP_RING_ITEMSIZE:
3275	case SQ_GSTMP_RING_ITEMSIZE:
3276	case SQ_GSVS_RING_ITEMSIZE:
3277	case SQ_GS_VERT_ITEMSIZE:
3278	case SQ_GS_VERT_ITEMSIZE_1:
3279	case SQ_GS_VERT_ITEMSIZE_2:
3280	case SQ_GS_VERT_ITEMSIZE_3:
3281	case SQ_GSVS_RING_OFFSET_1:
3282	case SQ_GSVS_RING_OFFSET_2:
3283	case SQ_GSVS_RING_OFFSET_3:
3284	case SQ_HSTMP_RING_ITEMSIZE:
3285	case SQ_LSTMP_RING_ITEMSIZE:
3286	case SQ_PSTMP_RING_ITEMSIZE:
3287	case SQ_VSTMP_RING_ITEMSIZE:
3288	case VGT_TF_RING_SIZE:
3289	case SQ_ESGS_RING_BASE:
3290	case SQ_GSVS_RING_BASE:
3291	case SQ_ESTMP_RING_BASE:
3292	case SQ_GSTMP_RING_BASE:
3293	case SQ_HSTMP_RING_BASE:
3294	case SQ_LSTMP_RING_BASE:
3295	case SQ_PSTMP_RING_BASE:
3296	case SQ_VSTMP_RING_BASE:
3297	case CAYMAN_VGT_OFFCHIP_LDS_BASE:
3298	case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
3299		return true;
3300	default:
3301		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
3302		return false;
3303	}
3304}
3305
3306static int evergreen_vm_packet3_check(struct radeon_device *rdev,
3307				      u32 *ib, struct radeon_cs_packet *pkt)
3308{
3309	u32 idx = pkt->idx + 1;
3310	u32 idx_value = ib[idx];
3311	u32 start_reg, end_reg, reg, i;
3312	u32 command, info;
3313
3314	switch (pkt->opcode) {
3315	case PACKET3_NOP:
3316		break;
3317	case PACKET3_SET_BASE:
3318		if (idx_value != 1) {
3319			DRM_ERROR("bad SET_BASE");
3320			return -EINVAL;
3321		}
3322		break;
3323	case PACKET3_CLEAR_STATE:
3324	case PACKET3_INDEX_BUFFER_SIZE:
3325	case PACKET3_DISPATCH_DIRECT:
3326	case PACKET3_DISPATCH_INDIRECT:
3327	case PACKET3_MODE_CONTROL:
3328	case PACKET3_SET_PREDICATION:
3329	case PACKET3_COND_EXEC:
3330	case PACKET3_PRED_EXEC:
3331	case PACKET3_DRAW_INDIRECT:
3332	case PACKET3_DRAW_INDEX_INDIRECT:
3333	case PACKET3_INDEX_BASE:
3334	case PACKET3_DRAW_INDEX_2:
3335	case PACKET3_CONTEXT_CONTROL:
3336	case PACKET3_DRAW_INDEX_OFFSET:
3337	case PACKET3_INDEX_TYPE:
3338	case PACKET3_DRAW_INDEX:
3339	case PACKET3_DRAW_INDEX_AUTO:
3340	case PACKET3_DRAW_INDEX_IMMD:
3341	case PACKET3_NUM_INSTANCES:
3342	case PACKET3_DRAW_INDEX_MULTI_AUTO:
3343	case PACKET3_STRMOUT_BUFFER_UPDATE:
3344	case PACKET3_DRAW_INDEX_OFFSET_2:
3345	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
3346	case PACKET3_MPEG_INDEX:
3347	case PACKET3_WAIT_REG_MEM:
3348	case PACKET3_MEM_WRITE:
3349	case PACKET3_SURFACE_SYNC:
3350	case PACKET3_EVENT_WRITE:
3351	case PACKET3_EVENT_WRITE_EOP:
3352	case PACKET3_EVENT_WRITE_EOS:
3353	case PACKET3_SET_CONTEXT_REG:
3354	case PACKET3_SET_BOOL_CONST:
3355	case PACKET3_SET_LOOP_CONST:
3356	case PACKET3_SET_RESOURCE:
3357	case PACKET3_SET_SAMPLER:
3358	case PACKET3_SET_CTL_CONST:
3359	case PACKET3_SET_RESOURCE_OFFSET:
3360	case PACKET3_SET_CONTEXT_REG_INDIRECT:
3361	case PACKET3_SET_RESOURCE_INDIRECT:
3362	case CAYMAN_PACKET3_DEALLOC_STATE:
3363		break;
3364	case PACKET3_COND_WRITE:
3365		if (idx_value & 0x100) {
3366			reg = ib[idx + 5] * 4;
3367			if (!evergreen_vm_reg_valid(reg))
3368				return -EINVAL;
3369		}
3370		break;
3371	case PACKET3_COPY_DW:
3372		if (idx_value & 0x2) {
3373			reg = ib[idx + 3] * 4;
3374			if (!evergreen_vm_reg_valid(reg))
3375				return -EINVAL;
3376		}
3377		break;
3378	case PACKET3_SET_CONFIG_REG:
3379		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
3380		end_reg = 4 * pkt->count + start_reg - 4;
3381		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
3382		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
3383		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
3384			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
3385			return -EINVAL;
3386		}
3387		for (i = 0; i < pkt->count; i++) {
3388			reg = start_reg + (4 * i);
3389			if (!evergreen_vm_reg_valid(reg))
3390				return -EINVAL;
3391		}
3392		break;
3393	case PACKET3_CP_DMA:
3394		command = ib[idx + 4];
3395		info = ib[idx + 1];
3396		if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3397		    (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3398		    ((((info & 0x00300000) >> 20) == 0) &&
3399		     (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3400		    ((((info & 0x60000000) >> 29) == 0) &&
3401		     (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3402			/* non mem to mem copies requires dw aligned count */
3403			if ((command & 0x1fffff) % 4) {
3404				DRM_ERROR("CP DMA command requires dw count alignment\n");
3405				return -EINVAL;
3406			}
3407		}
3408		if (command & PACKET3_CP_DMA_CMD_SAS) {
3409			/* src address space is register */
3410			if (((info & 0x60000000) >> 29) == 0) {
3411				start_reg = idx_value << 2;
3412				if (command & PACKET3_CP_DMA_CMD_SAIC) {
3413					reg = start_reg;
3414					if (!evergreen_vm_reg_valid(reg)) {
3415						DRM_ERROR("CP DMA Bad SRC register\n");
3416						return -EINVAL;
3417					}
3418				} else {
3419					for (i = 0; i < (command & 0x1fffff); i++) {
3420						reg = start_reg + (4 * i);
3421						if (!evergreen_vm_reg_valid(reg)) {
3422							DRM_ERROR("CP DMA Bad SRC register\n");
3423							return -EINVAL;
3424						}
3425					}
3426				}
3427			}
3428		}
3429		if (command & PACKET3_CP_DMA_CMD_DAS) {
3430			/* dst address space is register */
3431			if (((info & 0x00300000) >> 20) == 0) {
3432				start_reg = ib[idx + 2];
3433				if (command & PACKET3_CP_DMA_CMD_DAIC) {
3434					reg = start_reg;
3435					if (!evergreen_vm_reg_valid(reg)) {
3436						DRM_ERROR("CP DMA Bad DST register\n");
3437						return -EINVAL;
3438					}
3439				} else {
3440					for (i = 0; i < (command & 0x1fffff); i++) {
3441						reg = start_reg + (4 * i);
3442						if (!evergreen_vm_reg_valid(reg)) {
3443							DRM_ERROR("CP DMA Bad DST register\n");
3444							return -EINVAL;
3445						}
3446					}
3447				}
3448			}
3449		}
3450		break;
3451	default:
3452		return -EINVAL;
3453	}
3454	return 0;
3455}
3456
3457int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3458{
3459	int ret = 0;
3460	u32 idx = 0;
3461	struct radeon_cs_packet pkt;
3462
3463	do {
3464		pkt.idx = idx;
3465		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
3466		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
3467		pkt.one_reg_wr = 0;
3468		switch (pkt.type) {
3469		case RADEON_PACKET_TYPE0:
3470			dev_err(rdev->dev, "Packet0 not allowed!\n");
3471			ret = -EINVAL;
3472			break;
3473		case RADEON_PACKET_TYPE2:
3474			idx += 1;
3475			break;
3476		case RADEON_PACKET_TYPE3:
3477			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
3478			ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
3479			idx += pkt.count + 2;
3480			break;
3481		default:
3482			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
3483			ret = -EINVAL;
3484			break;
3485		}
3486		if (ret)
3487			break;
3488	} while (idx < ib->length_dw);
3489
3490	return ret;
3491}
3492
3493/**
3494 * evergreen_dma_ib_parse() - parse the DMA IB for VM
3495 * @rdev: radeon_device pointer
3496 * @ib:	radeon_ib pointer
3497 *
3498 * Parses the DMA IB from the VM CS ioctl
3499 * checks for errors. (Cayman-SI)
3500 * Returns 0 for success and an error on failure.
3501 **/
3502int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3503{
3504	u32 idx = 0;
3505	u32 header, cmd, count, sub_cmd;
3506
3507	do {
3508		header = ib->ptr[idx];
3509		cmd = GET_DMA_CMD(header);
3510		count = GET_DMA_COUNT(header);
3511		sub_cmd = GET_DMA_SUB_CMD(header);
3512
3513		switch (cmd) {
3514		case DMA_PACKET_WRITE:
3515			switch (sub_cmd) {
3516			/* tiled */
3517			case 8:
3518				idx += count + 7;
3519				break;
3520			/* linear */
3521			case 0:
3522				idx += count + 3;
3523				break;
3524			default:
3525				DRM_ERROR("bad DMA_PACKET_WRITE [%6d] 0x%08x sub cmd is not 0 or 8\n", idx, ib->ptr[idx]);
3526				return -EINVAL;
3527			}
3528			break;
3529		case DMA_PACKET_COPY:
3530			switch (sub_cmd) {
3531			/* Copy L2L, DW aligned */
3532			case 0x00:
3533				idx += 5;
3534				break;
3535			/* Copy L2T/T2L */
3536			case 0x08:
3537				idx += 9;
3538				break;
3539			/* Copy L2L, byte aligned */
3540			case 0x40:
3541				idx += 5;
3542				break;
3543			/* Copy L2L, partial */
3544			case 0x41:
3545				idx += 9;
3546				break;
3547			/* Copy L2L, DW aligned, broadcast */
3548			case 0x44:
3549				idx += 7;
3550				break;
3551			/* Copy L2T Frame to Field */
3552			case 0x48:
3553				idx += 10;
3554				break;
3555			/* Copy L2T/T2L, partial */
3556			case 0x49:
3557				idx += 12;
3558				break;
3559			/* Copy L2T broadcast */
3560			case 0x4b:
3561				idx += 10;
3562				break;
3563			/* Copy L2T/T2L (tile units) */
3564			case 0x4c:
3565				idx += 9;
3566				break;
3567			/* Copy T2T, partial (tile units) */
3568			case 0x4d:
3569				idx += 13;
3570				break;
3571			/* Copy L2T broadcast (tile units) */
3572			case 0x4f:
3573				idx += 10;
3574				break;
3575			default:
3576				DRM_ERROR("bad DMA_PACKET_COPY [%6d] 0x%08x invalid sub cmd\n", idx, ib->ptr[idx]);
3577				return -EINVAL;
3578			}
3579			break;
3580		case DMA_PACKET_CONSTANT_FILL:
3581			idx += 4;
3582			break;
3583		case DMA_PACKET_NOP:
3584			idx += 1;
3585			break;
3586		default:
3587			DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3588			return -EINVAL;
3589		}
3590	} while (idx < ib->length_dw);
3591
3592	return 0;
3593}
3594