1/*
2 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22#include "gf100.h"
23#include "ctxgf100.h"
24
25#include <subdev/timer.h>
26
27#include <nvif/class.h>
28
29static void
30gk20a_gr_init_dtor(struct gf100_gr_pack *pack)
31{
32	vfree(pack);
33}
34
35struct gk20a_fw_av
36{
37	u32 addr;
38	u32 data;
39};
40
41static struct gf100_gr_pack *
42gk20a_gr_av_to_init(struct gf100_gr_fuc *fuc)
43{
44	struct gf100_gr_init *init;
45	struct gf100_gr_pack *pack;
46	const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
47	int i;
48
49	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
50	if (!pack)
51		return ERR_PTR(-ENOMEM);
52
53	init = (void *)(pack + 2);
54
55	pack[0].init = init;
56
57	for (i = 0; i < nent; i++) {
58		struct gf100_gr_init *ent = &init[i];
59		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];
60
61		ent->addr = av->addr;
62		ent->data = av->data;
63		ent->count = 1;
64		ent->pitch = 1;
65	}
66
67	return pack;
68}
69
70struct gk20a_fw_aiv
71{
72	u32 addr;
73	u32 index;
74	u32 data;
75};
76
77static struct gf100_gr_pack *
78gk20a_gr_aiv_to_init(struct gf100_gr_fuc *fuc)
79{
80	struct gf100_gr_init *init;
81	struct gf100_gr_pack *pack;
82	const int nent = (fuc->size / sizeof(struct gk20a_fw_aiv));
83	int i;
84
85	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
86	if (!pack)
87		return ERR_PTR(-ENOMEM);
88
89	init = (void *)(pack + 2);
90
91	pack[0].init = init;
92
93	for (i = 0; i < nent; i++) {
94		struct gf100_gr_init *ent = &init[i];
95		struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)fuc->data)[i];
96
97		ent->addr = av->addr;
98		ent->data = av->data;
99		ent->count = 1;
100		ent->pitch = 1;
101	}
102
103	return pack;
104}
105
106static struct gf100_gr_pack *
107gk20a_gr_av_to_method(struct gf100_gr_fuc *fuc)
108{
109	struct gf100_gr_init *init;
110	struct gf100_gr_pack *pack;
111	/* We don't suppose we will initialize more than 16 classes here... */
112	static const unsigned int max_classes = 16;
113	const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
114	int i, classidx = 0;
115	u32 prevclass = 0;
116
117	pack = vzalloc((sizeof(*pack) * max_classes) +
118		       (sizeof(*init) * (nent + 1)));
119	if (!pack)
120		return ERR_PTR(-ENOMEM);
121
122	init = (void *)(pack + max_classes);
123
124	for (i = 0; i < nent; i++) {
125		struct gf100_gr_init *ent = &init[i];
126		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];
127		u32 class = av->addr & 0xffff;
128		u32 addr = (av->addr & 0xffff0000) >> 14;
129
130		if (prevclass != class) {
131			pack[classidx].init = ent;
132			pack[classidx].type = class;
133			prevclass = class;
134			if (++classidx >= max_classes) {
135				vfree(pack);
136				return ERR_PTR(-ENOSPC);
137			}
138		}
139
140		ent->addr = addr;
141		ent->data = av->data;
142		ent->count = 1;
143		ent->pitch = 1;
144	}
145
146	return pack;
147}
148
149static int
150gk20a_gr_wait_mem_scrubbing(struct gf100_gr *gr)
151{
152	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
153	struct nvkm_device *device = subdev->device;
154
155	if (nvkm_msec(device, 2000,
156		if (!(nvkm_rd32(device, 0x40910c) & 0x00000006))
157			break;
158	) < 0) {
159		nvkm_error(subdev, "FECS mem scrubbing timeout\n");
160		return -ETIMEDOUT;
161	}
162
163	if (nvkm_msec(device, 2000,
164		if (!(nvkm_rd32(device, 0x41a10c) & 0x00000006))
165			break;
166	) < 0) {
167		nvkm_error(subdev, "GPCCS mem scrubbing timeout\n");
168		return -ETIMEDOUT;
169	}
170
171	return 0;
172}
173
174static void
175gk20a_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
176{
177	struct nvkm_device *device = gr->base.engine.subdev.device;
178	nvkm_wr32(device, 0x419e44, 0x1ffffe);
179	nvkm_wr32(device, 0x419e4c, 0x7f);
180}
181
182int
183gk20a_gr_init(struct gf100_gr *gr)
184{
185	struct nvkm_device *device = gr->base.engine.subdev.device;
186	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
187	u32 data[TPC_MAX / 8] = {};
188	u8  tpcnr[GPC_MAX];
189	int gpc, tpc;
190	int ret, i;
191
192	/* Clear SCC RAM */
193	nvkm_wr32(device, 0x40802c, 0x1);
194
195	gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
196
197	ret = gk20a_gr_wait_mem_scrubbing(gr);
198	if (ret)
199		return ret;
200
201	ret = gf100_gr_wait_idle(gr);
202	if (ret)
203		return ret;
204
205	/* MMU debug buffer */
206	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(gr->unk4188b4) >> 8);
207	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(gr->unk4188b8) >> 8);
208
209	if (gr->func->init_gpc_mmu)
210		gr->func->init_gpc_mmu(gr);
211
212	/* Set the PE as stream master */
213	nvkm_mask(device, 0x503018, 0x1, 0x1);
214
215	/* Zcull init */
216	memset(data, 0x00, sizeof(data));
217	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
218	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
219		do {
220			gpc = (gpc + 1) % gr->gpc_nr;
221		} while (!tpcnr[gpc]);
222		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
223
224		data[i / 8] |= tpc << ((i % 8) * 4);
225	}
226
227	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
228	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
229	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
230	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
231
232	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
233		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
234			  gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
235		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
236			  gr->tpc_total);
237		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
238	}
239
240	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
241
242	/* Enable FIFO access */
243	nvkm_wr32(device, 0x400500, 0x00010001);
244
245	/* Enable interrupts */
246	nvkm_wr32(device, 0x400100, 0xffffffff);
247	nvkm_wr32(device, 0x40013c, 0xffffffff);
248
249	/* Enable FECS error interrupts */
250	nvkm_wr32(device, 0x409c24, 0x000f0000);
251
252	/* Enable hardware warning exceptions */
253	nvkm_wr32(device, 0x404000, 0xc0000000);
254	nvkm_wr32(device, 0x404600, 0xc0000000);
255
256	if (gr->func->set_hww_esr_report_mask)
257		gr->func->set_hww_esr_report_mask(gr);
258
259	/* Enable TPC exceptions per GPC */
260	nvkm_wr32(device, 0x419d0c, 0x2);
261	nvkm_wr32(device, 0x41ac94, (((1 << gr->tpc_total) - 1) & 0xff) << 16);
262
263	/* Reset and enable all exceptions */
264	nvkm_wr32(device, 0x400108, 0xffffffff);
265	nvkm_wr32(device, 0x400138, 0xffffffff);
266	nvkm_wr32(device, 0x400118, 0xffffffff);
267	nvkm_wr32(device, 0x400130, 0xffffffff);
268	nvkm_wr32(device, 0x40011c, 0xffffffff);
269	nvkm_wr32(device, 0x400134, 0xffffffff);
270
271	gf100_gr_zbc_init(gr);
272
273	return gf100_gr_init_ctxctl(gr);
274}
275
276void
277gk20a_gr_dtor(struct gf100_gr *gr)
278{
279	gk20a_gr_init_dtor(gr->fuc_method);
280	gk20a_gr_init_dtor(gr->fuc_bundle);
281	gk20a_gr_init_dtor(gr->fuc_sw_ctx);
282	gk20a_gr_init_dtor(gr->fuc_sw_nonctx);
283}
284
285int
286gk20a_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
287	      int index, struct nvkm_gr **pgr)
288{
289	struct gf100_gr_fuc fuc;
290	struct gf100_gr *gr;
291	int ret;
292
293	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
294		return -ENOMEM;
295	*pgr = &gr->base;
296
297	ret = gf100_gr_ctor(func, device, index, gr);
298	if (ret)
299		return ret;
300
301	ret = gf100_gr_ctor_fw(gr, "sw_nonctx", &fuc);
302	if (ret)
303		return ret;
304	gr->fuc_sw_nonctx = gk20a_gr_av_to_init(&fuc);
305	gf100_gr_dtor_fw(&fuc);
306	if (IS_ERR(gr->fuc_sw_nonctx))
307		return PTR_ERR(gr->fuc_sw_nonctx);
308
309	ret = gf100_gr_ctor_fw(gr, "sw_ctx", &fuc);
310	if (ret)
311		return ret;
312	gr->fuc_sw_ctx = gk20a_gr_aiv_to_init(&fuc);
313	gf100_gr_dtor_fw(&fuc);
314	if (IS_ERR(gr->fuc_sw_ctx))
315		return PTR_ERR(gr->fuc_sw_ctx);
316
317	ret = gf100_gr_ctor_fw(gr, "sw_bundle_init", &fuc);
318	if (ret)
319		return ret;
320	gr->fuc_bundle = gk20a_gr_av_to_init(&fuc);
321	gf100_gr_dtor_fw(&fuc);
322	if (IS_ERR(gr->fuc_bundle))
323		return PTR_ERR(gr->fuc_bundle);
324
325	ret = gf100_gr_ctor_fw(gr, "sw_method_init", &fuc);
326	if (ret)
327		return ret;
328	gr->fuc_method = gk20a_gr_av_to_method(&fuc);
329	gf100_gr_dtor_fw(&fuc);
330	if (IS_ERR(gr->fuc_method))
331		return PTR_ERR(gr->fuc_method);
332
333	return 0;
334}
335
336static const struct gf100_gr_func
337gk20a_gr = {
338	.dtor = gk20a_gr_dtor,
339	.init = gk20a_gr_init,
340	.set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask,
341	.ppc_nr = 1,
342	.grctx = &gk20a_grctx,
343	.sclass = {
344		{ -1, -1, FERMI_TWOD_A },
345		{ -1, -1, KEPLER_INLINE_TO_MEMORY_A },
346		{ -1, -1, KEPLER_C, &gf100_fermi },
347		{ -1, -1, KEPLER_COMPUTE_A },
348		{}
349	}
350};
351
352int
353gk20a_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
354{
355	return gk20a_gr_new_(&gk20a_gr, device, index, pgr);
356}
357