1/* fuc microcode for gf100 PGRAPH/GPC
2 *
3 * Copyright 2011 Red Hat Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: Ben Skeggs
24 */
25
26/* TODO
27 * - bracket certain functions with scratch writes, useful for debugging
28 * - watchdog timer around ctx operations
29 */
30
31#ifdef INCLUDE_DATA
32gpc_mmio_list_head:	.b32 #mmio_list_base
33gpc_mmio_list_tail:
34tpc_mmio_list_head:	.b32 #mmio_list_base
35tpc_mmio_list_tail:
36unk_mmio_list_head:	.b32 #mmio_list_base
37unk_mmio_list_tail:	.b32 #mmio_list_base
38
39gpc_id:			.b32 0
40
41tpc_count:		.b32 0
42tpc_mask:		.b32 0
43
44#if NV_PGRAPH_GPCX_UNK__SIZE > 0
45unk_count:		.b32 0
46unk_mask:		.b32 0
47#endif
48
49cmd_queue:		queue_init
50
51mmio_list_base:
52#endif
53
54#ifdef INCLUDE_CODE
55#define gpc_wr32(addr,reg)                                                    /*
56*/	mov b32 $r15 reg                                                      /*
57*/	imm32($r14, addr)                                                     /*
58*/	or $r14 NV_PGRAPH_GPCX_GPCCS_MMIO_CTRL_BASE_ENABLE                    /*
59*/	call(nv_wr32)
60
61// reports an exception to the host
62//
63// In: $r15 error code (see os.h)
64//
65error:
66	push $r14
67	nv_wr32(NV_PGRAPH_FECS_CC_SCRATCH_VAL(5), $r15)
68	mov $r15 1
69	nv_wr32(NV_PGRAPH_FECS_INTR_UP_SET, $r15)
70	pop $r14
71	ret
72
73#if CHIPSET >= GM107
74tpc_strand_wait:
75	push $r9
76	trace_set(T_STRTPC)
77	tpc_strand_busy:
78		nv_iord($r9, NV_PGRAPH_GPCX_GPCCS_TPC_STATUS, 0)
79		bra b32 $r9 0x0 ne #tpc_strand_busy
80	trace_clr(T_STRTPC)
81	pop $r9
82	ret
83
84#define tpc_strand_wait() call(tpc_strand_wait)
85#define tpc_strand_enable()                                                   /*
86*/	mov $r15 NV_PGRAPH_GPC0_TPCX_STRAND_CMD_ENABLE                        /*
87*/	gpc_wr32(NV_PGRAPH_GPC0_TPCX_STRAND_CMD, $r15)                        /*
88*/	tpc_strand_wait()
89#define tpc_strand_disable()                                                  /*
90*/	mov $r15 NV_PGRAPH_GPC0_TPCX_STRAND_CMD_DISABLE                       /*
91*/	gpc_wr32(NV_PGRAPH_GPC0_TPCX_STRAND_CMD, $r15)                        /*
92*/	tpc_strand_wait()
93#define tpc_strand_seek(p)                                                    /*
94*/	mov $r15 NV_PGRAPH_GPC0_TPCX_STRAND_INDEX_ALL                         /*
95*/	gpc_wr32(NV_PGRAPH_GPC0_TPCX_STRAND_INDEX, $r15)                      /*
96*/	mov $r15 p                                                            /*
97*/	gpc_wr32(NV_PGRAPH_GPC0_TPCX_STRAND_SELECT, $r15)                     /*
98*/	mov $r15 NV_PGRAPH_GPC0_TPCX_STRAND_CMD_SEEK                          /*
99*/	tpc_strand_wait()
100#define tpc_strand_info(m)                                                    /*
101*/	gpc_wr32(NV_PGRAPH_GPC0_TPCX_STRAND_CMD, $r15)                        /*
102*/	mov $r15 m                                                            /*
103*/	gpc_wr32(NV_PGRAPH_GPC0_TPCX_STRAND_DATA, $r15)                       /*
104*/	mov $r15 NV_PGRAPH_GPC0_TPCX_STRAND_CMD_GET_INFO                      /*
105*/	gpc_wr32(NV_PGRAPH_GPC0_TPCX_STRAND_CMD, $r15)                        /*
106*/	tpc_strand_wait()
107#endif
108
109
110// GPC fuc initialisation, executed by triggering ucode start, will
111// fall through to main loop after completion.
112//
113// Input:
114//   CC_SCRATCH[1]: context base
115//
116// Output:
117//   CC_SCRATCH[0]:
118//	     31:31: set to signal completion
119//   CC_SCRATCH[1]:
120//	      31:0: GPC context size
121//
122init:
123	clear b32 $r0
124
125	// setup stack
126	nv_iord($r1, NV_PGRAPH_GPCX_GPCCS_CAPS, 0)
127	extr $r1 $r1 9:17
128	shl b32 $r1 8
129	mov $sp $r1
130
131	// enable fifo access
132	mov $r2 NV_PGRAPH_GPCX_GPCCS_ACCESS_FIFO
133	nv_iowr(NV_PGRAPH_GPCX_GPCCS_ACCESS, 0, $r2)
134
135	// setup i0 handler, and route all interrupts to it
136	mov $r1 #ih
137	mov $iv0 $r1
138	nv_iowr(NV_PGRAPH_GPCX_GPCCS_INTR_ROUTE, 0, $r0)
139
140	// enable fifo interrupt
141	mov $r2 NV_PGRAPH_GPCX_GPCCS_INTR_EN_SET_FIFO
142	nv_iowr(NV_PGRAPH_GPCX_GPCCS_INTR_EN_SET, 0, $r2)
143
144	// enable interrupts
145	bset $flags ie0
146
147	// how many TPCs do we have?
148	nv_iord($r2, NV_PGRAPH_GPCX_GPCCS_UNITS, 0)
149	mov $r3 1
150	and $r2 0x1f
151	shl b32 $r3 $r2
152	sub b32 $r3 1
153	st b32 D[$r0 + #tpc_count] $r2
154	st b32 D[$r0 + #tpc_mask] $r3
155
156	// determine which GPC we are, setup (optional) mmio access offset
157	nv_iord($r2, NV_PGRAPH_GPCX_GPCCS_MYINDEX, 0)
158	st b32 D[$r0 + #gpc_id] $r2
159	shl b32 $r2 15
160	nv_iowr(NV_PGRAPH_GPCX_GPCCS_MMIO_BASE, 0, $r2)
161
162#if NV_PGRAPH_GPCX_UNK__SIZE > 0
163	// figure out which, and how many, UNKs are actually present
164	imm32($r14, 0x500c30)
165	clear b32 $r2
166	clear b32 $r3
167	clear b32 $r4
168	init_unk_loop:
169		call(nv_rd32)
170		cmp b32 $r15 0
171		bra z #init_unk_next
172			mov $r15 1
173			shl b32 $r15 $r2
174			or $r4 $r15
175			add b32 $r3 1
176		init_unk_next:
177		add b32 $r2 1
178		add b32 $r14 4
179		cmp b32 $r2 NV_PGRAPH_GPCX_UNK__SIZE
180		bra ne #init_unk_loop
181	init_unk_done:
182	st b32 D[$r0 + #unk_count] $r3
183	st b32 D[$r0 + #unk_mask] $r4
184#endif
185
186	// initialise context base, and size tracking
187	nv_iord($r2, NV_PGRAPH_GPCX_GPCCS_CC_SCRATCH_VAL(1), 0)
188	clear b32 $r3		// track GPC context size here
189
190	// set mmctx base addresses now so we don't have to do it later,
191	// they don't currently ever change
192	shr b32 $r5 $r2 8
193	nv_iowr(NV_PGRAPH_GPCX_GPCCS_MMCTX_SAVE_SWBASE, 0, $r5)
194	nv_iowr(NV_PGRAPH_GPCX_GPCCS_MMCTX_LOAD_SWBASE, 0, $r5)
195
196	// calculate GPC mmio context size
197	ld b32 $r14 D[$r0 + #gpc_mmio_list_head]
198	ld b32 $r15 D[$r0 + #gpc_mmio_list_tail]
199	call(mmctx_size)
200	add b32 $r2 $r15
201	add b32 $r3 $r15
202
203	// calculate per-TPC mmio context size
204	ld b32 $r14 D[$r0 + #tpc_mmio_list_head]
205	ld b32 $r15 D[$r0 + #tpc_mmio_list_tail]
206	call(mmctx_size)
207	ld b32 $r14 D[$r0 + #tpc_count]
208	mulu $r14 $r15
209	add b32 $r2 $r14
210	add b32 $r3 $r14
211
212#if NV_PGRAPH_GPCX_UNK__SIZE > 0
213	// calculate per-UNK mmio context size
214	ld b32 $r14 D[$r0 + #unk_mmio_list_head]
215	ld b32 $r15 D[$r0 + #unk_mmio_list_tail]
216	call(mmctx_size)
217	ld b32 $r14 D[$r0 + #unk_count]
218	mulu $r14 $r15
219	add b32 $r2 $r14
220	add b32 $r3 $r14
221#endif
222
223	// round up base/size to 256 byte boundary (for strand SWBASE)
224	shr b32 $r3 2
225	nv_iowr(NV_PGRAPH_GPCX_GPCCS_MMCTX_LOAD_COUNT, 0, $r3) // wtf for?!
226	shr b32 $r2 8
227	shr b32 $r3 6
228	add b32 $r2 1
229	add b32 $r3 1
230	shl b32 $r2 8
231	shl b32 $r3 8
232
233	// calculate size of strand context data
234	mov b32 $r15 $r2
235	call(strand_ctx_init)
236	add b32 $r2 $r15
237	add b32 $r3 $r15
238
239#if CHIPSET >= GM107
240	// calculate size of tpc strand context data
241	mov $r15 NV_PGRAPH_GPC0_TPCX_STRAND_INDEX_ALL
242	gpc_wr32(NV_PGRAPH_GPC0_TPCX_STRAND_INDEX, $r15)
243	tpc_strand_enable();
244	tpc_strand_seek(0);
245	tpc_strand_info(-1);
246
247	ld b32 $r4 D[$r0 + #tpc_count]
248	mov $r5 NV_PGRAPH_GPC0_TPC0
249	ld b32 $r6 D[$r0 + #gpc_id]
250	shl b32 $r6 15
251	add b32 $r5 $r6
252	tpc_strand_init_tpc_loop:
253		add b32 $r14 $r5 NV_TPC_STRAND_CNT
254		call(nv_rd32)
255		mov b32 $r6 $r15
256		clear b32 $r7
257		tpc_strand_init_idx_loop:
258			add b32 $r14 $r5 NV_TPC_STRAND_INDEX
259			mov b32 $r15 $r7
260			call(nv_wr32)
261			add b32 $r14 $r5 NV_TPC_STRAND_SAVE_SWBASE
262			shr b32 $r15 $r2 8
263			call(nv_wr32)
264			add b32 $r14 $r5 NV_TPC_STRAND_LOAD_SWBASE
265			shr b32 $r15 $r2 8
266			call(nv_wr32)
267			add b32 $r14 $r5 NV_TPC_STRAND_WORDS
268			call(nv_rd32)
269			shr b32 $r15 6
270			add b32 $r15 1
271			shl b32 $r15 8
272			add b32 $r2 $r15
273			add b32 $r3 $r15
274			add b32 $r7 1
275			sub b32 $r6 1
276			bra nz #tpc_strand_init_idx_loop
277		add b32 $r5 NV_PGRAPH_GPC0_TPC0__SIZE
278		sub b32 $r4 1
279		bra nz #tpc_strand_init_tpc_loop
280
281	mov $r15 NV_PGRAPH_GPC0_TPCX_STRAND_INDEX_ALL
282	gpc_wr32(NV_PGRAPH_GPC0_TPCX_STRAND_INDEX, $r15)
283	tpc_strand_disable();
284#endif
285
286	// save context size, and tell HUB we're done
287	nv_iowr(NV_PGRAPH_GPCX_GPCCS_CC_SCRATCH_VAL(1), 0, $r3)
288	clear b32 $r2
289	bset $r2 31
290	nv_iowr(NV_PGRAPH_GPCX_GPCCS_CC_SCRATCH_SET(0), 0, $r2)
291
292// Main program loop, very simple, sleeps until woken up by the interrupt
293// handler, pulls a command from the queue and executes its handler
294//
295main:
296	bset $flags $p0
297	sleep $p0
298	mov $r13 #cmd_queue
299	call(queue_get)
300	bra $p1 #main
301
302	// 0x0000-0x0003 are all context transfers
303	cmpu b32 $r14 0x04
304	bra nc #main_not_ctx_xfer
305		// fetch $flags and mask off $p1/$p2
306		mov $r1 $flags
307		mov $r2 0x0006
308		not b32 $r2
309		and $r1 $r2
310		// set $p1/$p2 according to transfer type
311		shl b32 $r14 1
312		or $r1 $r14
313		mov $flags $r1
314		// transfer context data
315		call(ctx_xfer)
316		bra #main
317
318	main_not_ctx_xfer:
319	shl b32 $r15 $r14 16
320	or $r15 E_BAD_COMMAND
321	call(error)
322	bra #main
323
324// interrupt handler
325ih:
326	push $r8
327	mov $r8 $flags
328	push $r8
329	push $r9
330	push $r10
331	push $r11
332	push $r13
333	push $r14
334	push $r15
335	clear b32 $r0
336
337	// incoming fifo command?
338	nv_iord($r10, NV_PGRAPH_GPCX_GPCCS_INTR, 0)
339	and $r11 $r10 NV_PGRAPH_GPCX_GPCCS_INTR_FIFO
340	bra e #ih_no_fifo
341		// queue incoming fifo command for later processing
342		mov $r13 #cmd_queue
343		nv_iord($r14, NV_PGRAPH_GPCX_GPCCS_FIFO_CMD, 0)
344		nv_iord($r15, NV_PGRAPH_GPCX_GPCCS_FIFO_DATA, 0)
345		call(queue_put)
346		mov $r14 1
347		nv_iowr(NV_PGRAPH_GPCX_GPCCS_FIFO_ACK, 0, $r14)
348
349	// ack, and wake up main()
350	ih_no_fifo:
351	nv_iowr(NV_PGRAPH_GPCX_GPCCS_INTR_ACK, 0, $r10)
352
353	pop $r15
354	pop $r14
355	pop $r13
356	pop $r11
357	pop $r10
358	pop $r9
359	pop $r8
360	mov $flags $r8
361	pop $r8
362	bclr $flags $p0
363	iret
364
365// Set this GPC's bit in HUB_BAR, used to signal completion of various
366// activities to the HUB fuc
367//
368hub_barrier_done:
369	mov $r15 1
370	ld b32 $r14 D[$r0 + #gpc_id]
371	shl b32 $r15 $r14
372	nv_wr32(0x409418, $r15)	// 0x409418 - HUB_BAR_SET
373	ret
374
375// Disables various things, waits a bit, and re-enables them..
376//
377// Not sure how exactly this helps, perhaps "ENABLE" is not such a
378// good description for the bits we turn off?  Anyways, without this,
379// funny things happen.
380//
381ctx_redswitch:
382	mov $r15 NV_PGRAPH_GPCX_GPCCS_RED_SWITCH_POWER
383	nv_iowr(NV_PGRAPH_GPCX_GPCCS_RED_SWITCH, 0, $r15)
384	mov $r14 8
385	ctx_redswitch_delay:
386		sub b32 $r14 1
387		bra ne #ctx_redswitch_delay
388	or $r15 NV_PGRAPH_GPCX_GPCCS_RED_SWITCH_UNK11
389	or $r15 NV_PGRAPH_GPCX_GPCCS_RED_SWITCH_ENABLE
390	nv_iowr(NV_PGRAPH_GPCX_GPCCS_RED_SWITCH, 0, $r15)
391	ret
392
393// Transfer GPC context data between GPU and storage area
394//
395// In: $r15 context base address
396//     $p1 clear on save, set on load
397//     $p2 set if opposite direction done/will be done, so:
398//		on save it means: "a load will follow this save"
399//		on load it means: "a save preceeded this load"
400//
401ctx_xfer:
402	// set context base address
403	nv_iowr(NV_PGRAPH_GPCX_GPCCS_MEM_BASE, 0, $r15)
404#if CHIPSET >= GM107
405	gpc_wr32(NV_PGRAPH_GPC0_TPCX_STRAND_MEM_BASE, $r15)
406#endif
407	bra not $p1 #ctx_xfer_not_load
408		call(ctx_redswitch)
409	ctx_xfer_not_load:
410
411	// strands
412	call(strand_pre)
413	clear b32 $r2
414	nv_iowr(NV_PGRAPH_GPCX_GPCCS_STRAND_SELECT, 0x3f, $r2)
415	xbit $r2 $flags $p1	// SAVE/LOAD
416	add b32 $r2 NV_PGRAPH_GPCX_GPCCS_STRAND_CMD_SAVE
417	nv_iowr(NV_PGRAPH_GPCX_GPCCS_STRAND_CMD, 0x3f, $r2)
418
419#if CHIPSET >= GM107
420	tpc_strand_enable();
421	tpc_strand_seek(0);
422	xbit $r15 $flags $p1	// SAVE/LOAD
423	add b32 $r15 NV_PGRAPH_GPC0_TPCX_STRAND_CMD_SAVE
424	gpc_wr32(NV_PGRAPH_GPC0_TPCX_STRAND_CMD, $r15)
425#endif
426
427	// mmio context
428	xbit $r10 $flags $p1	// direction
429	or $r10 2		// first
430	imm32($r11,0x500000)
431	ld b32 $r12 D[$r0 + #gpc_id]
432	shl b32 $r12 15
433	add b32 $r11 $r12	// base = NV_PGRAPH_GPCn
434	ld b32 $r12 D[$r0 + #gpc_mmio_list_head]
435	ld b32 $r13 D[$r0 + #gpc_mmio_list_tail]
436	mov $r14 0		// not multi
437	call(mmctx_xfer)
438
439	// per-TPC mmio context
440	xbit $r10 $flags $p1	// direction
441#if !NV_PGRAPH_GPCX_UNK__SIZE
442	or $r10 4		// last
443#endif
444	imm32($r11, 0x504000)
445	ld b32 $r12 D[$r0 + #gpc_id]
446	shl b32 $r12 15
447	add b32 $r11 $r12	// base = NV_PGRAPH_GPCn_TPC0
448	ld b32 $r12 D[$r0 + #tpc_mmio_list_head]
449	ld b32 $r13 D[$r0 + #tpc_mmio_list_tail]
450	ld b32 $r15 D[$r0 + #tpc_mask]
451	mov $r14 0x800		// stride = 0x800
452	call(mmctx_xfer)
453
454#if NV_PGRAPH_GPCX_UNK__SIZE > 0
455	// per-UNK mmio context
456	xbit $r10 $flags $p1	// direction
457	or $r10 4		// last
458	imm32($r11, 0x503000)
459	ld b32 $r12 D[$r0 + #gpc_id]
460	shl b32 $r12 15
461	add b32 $r11 $r12	// base = NV_PGRAPH_GPCn_UNK0
462	ld b32 $r12 D[$r0 + #unk_mmio_list_head]
463	ld b32 $r13 D[$r0 + #unk_mmio_list_tail]
464	ld b32 $r15 D[$r0 + #unk_mask]
465	mov $r14 0x200		// stride = 0x200
466	call(mmctx_xfer)
467#endif
468
469	// wait for strands to finish
470	call(strand_wait)
471#if CHIPSET >= GM107
472	tpc_strand_wait()
473#endif
474
475	// if load, or a save without a load following, do some
476	// unknown stuff that's done after finishing a block of
477	// strand commands
478	bra $p1 #ctx_xfer_post
479	bra not $p2 #ctx_xfer_done
480	ctx_xfer_post:
481		call(strand_post)
482#if CHIPSET >= GM107
483		tpc_strand_disable()
484#endif
485
486	// mark completion in HUB's barrier
487	ctx_xfer_done:
488	call(hub_barrier_done)
489	ret
490#endif
491