1/*
2 *  fuc microcode for g98 sec engine
3 *  Copyright (C) 2010  Marcin Ko��cielnicki
4 *
5 *  This program is free software; you can redistribute it and/or modify
6 *  it under the terms of the GNU General Public License as published by
7 *  the Free Software Foundation; either version 2 of the License, or
8 *  (at your option) any later version.
9 *
10 *  This program is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 *  GNU General Public License for more details.
14 *
15 *  You should have received a copy of the GNU General Public License
16 *  along with this program; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19
20.section #g98_sec_data
21
22ctx_dma:
23ctx_dma_query:		.b32 0
24ctx_dma_src:		.b32 0
25ctx_dma_dst:		.b32 0
26.equ #dma_count 3
27ctx_query_address_high:	.b32 0
28ctx_query_address_low:	.b32 0
29ctx_query_counter:	.b32 0
30ctx_cond_address_high:	.b32 0
31ctx_cond_address_low:	.b32 0
32ctx_cond_off:		.b32 0
33ctx_src_address_high:	.b32 0
34ctx_src_address_low:	.b32 0
35ctx_dst_address_high:	.b32 0
36ctx_dst_address_low:	.b32 0
37ctx_mode:		.b32 0
38.align 16
39ctx_key:		.skip 16
40ctx_iv:			.skip 16
41
42.align 0x80
43swap:
44.skip 32
45
46.align 8
47common_cmd_dtable:
48.b32 #ctx_query_address_high + 0x20000 ~0xff
49.b32 #ctx_query_address_low + 0x20000 ~0xfffffff0
50.b32 #ctx_query_counter + 0x20000 ~0xffffffff
51.b32 #cmd_query_get + 0x00000 ~1
52.b32 #ctx_cond_address_high + 0x20000 ~0xff
53.b32 #ctx_cond_address_low + 0x20000 ~0xfffffff0
54.b32 #cmd_cond_mode + 0x00000 ~7
55.b32 #cmd_wrcache_flush + 0x00000 ~0
56.equ #common_cmd_max 0x88
57
58
59.align 8
60engine_cmd_dtable:
61.b32 #ctx_key + 0x0 + 0x20000 ~0xffffffff
62.b32 #ctx_key + 0x4 + 0x20000 ~0xffffffff
63.b32 #ctx_key + 0x8 + 0x20000 ~0xffffffff
64.b32 #ctx_key + 0xc + 0x20000 ~0xffffffff
65.b32 #ctx_iv + 0x0 + 0x20000 ~0xffffffff
66.b32 #ctx_iv + 0x4 + 0x20000 ~0xffffffff
67.b32 #ctx_iv + 0x8 + 0x20000 ~0xffffffff
68.b32 #ctx_iv + 0xc + 0x20000 ~0xffffffff
69.b32 #ctx_src_address_high + 0x20000 ~0xff
70.b32 #ctx_src_address_low + 0x20000 ~0xfffffff0
71.b32 #ctx_dst_address_high + 0x20000 ~0xff
72.b32 #ctx_dst_address_low + 0x20000 ~0xfffffff0
73.b32 #sec_cmd_mode + 0x00000 ~0xf
74.b32 #sec_cmd_length + 0x10000 ~0x0ffffff0
75.equ #engine_cmd_max 0xce
76
77.align 4
78sec_dtable:
79.b16 #sec_copy_prep #sec_do_inout
80.b16 #sec_store_prep #sec_do_out
81.b16 #sec_ecb_e_prep #sec_do_inout
82.b16 #sec_ecb_d_prep #sec_do_inout
83.b16 #sec_cbc_e_prep #sec_do_inout
84.b16 #sec_cbc_d_prep #sec_do_inout
85.b16 #sec_pcbc_e_prep #sec_do_inout
86.b16 #sec_pcbc_d_prep #sec_do_inout
87.b16 #sec_cfb_e_prep #sec_do_inout
88.b16 #sec_cfb_d_prep #sec_do_inout
89.b16 #sec_ofb_prep #sec_do_inout
90.b16 #sec_ctr_prep #sec_do_inout
91.b16 #sec_cbc_mac_prep #sec_do_in
92.b16 #sec_cmac_finish_complete_prep #sec_do_in
93.b16 #sec_cmac_finish_partial_prep #sec_do_in
94
95.align 0x100
96
97.section #g98_sec_code
98
99	// $r0 is always set to 0 in our code - this allows some space savings.
100	clear b32 $r0
101
102	// set up the interrupt handler
103	mov $r1 #ih
104	mov $iv0 $r1
105
106	// init stack pointer
107	mov $sp $r0
108
109	// set interrupt dispatch - route timer, fifo, ctxswitch to i0, others to host
110	movw $r1 0xfff0
111	sethi $r1 0
112	mov $r2 0x400
113	iowr I[$r2 + 0x300] $r1
114
115	// enable the interrupts
116	or $r1 0xc
117	iowr I[$r2] $r1
118
119	// enable fifo access and context switching
120	mov $r1 3
121	mov $r2 0x1200
122	iowr I[$r2] $r1
123
124	// enable i0 delivery
125	bset $flags ie0
126
127	// sleep forver, waking only for interrupts.
128	bset $flags $p0
129	spin:
130	sleep $p0
131	bra #spin
132
133// i0 handler
134ih:
135	// see which interrupts we got
136	iord $r1 I[$r0 + 0x200]
137
138	and $r2 $r1 0x8
139	cmpu b32 $r2 0
140	bra e #noctx
141
142		// context switch... prepare the regs for xfer
143		mov $r2 0x7700
144		mov $xtargets $r2
145		mov $xdbase $r0
146		// 128-byte context.
147		mov $r2 0
148		sethi $r2 0x50000
149
150		// read current channel
151		mov $r3 0x1400
152		iord $r4 I[$r3]
153		// if bit 30 set, it's active, so we have to unload it first.
154		shl b32 $r5 $r4 1
155		cmps b32 $r5 0
156		bra nc #ctxload
157
158			// unload the current channel - save the context
159			xdst $r0 $r2
160			xdwait
161			// and clear bit 30, then write back
162			bclr $r4 0x1e
163			iowr I[$r3] $r4
164			// tell PFIFO we unloaded
165			mov $r4 1
166			iowr I[$r3 + 0x200] $r4
167
168		bra #noctx
169
170		ctxload:
171			// no channel loaded - perhaps we're requested to load one
172			iord $r4 I[$r3 + 0x100]
173			shl b32 $r15 $r4 1
174			cmps b32 $r15 0
175			// if bit 30 of next channel not set, probably PFIFO is just
176			// killing a context. do a faux load, without the active bit.
177			bra nc #dummyload
178
179				// ok, do a real context load.
180				xdld $r0 $r2
181				xdwait
182				mov $r5 #ctx_dma
183				mov $r6 #dma_count - 1
184				ctxload_dma_loop:
185					ld b32 $r7 D[$r5 + $r6 * 4]
186					add b32 $r8 $r6 0x180
187					shl b32 $r8 8
188					iowr I[$r8] $r7
189					sub b32 $r6 1
190				bra nc #ctxload_dma_loop
191
192			dummyload:
193			// tell PFIFO we're done
194			mov $r5 2
195			iowr I[$r3 + 0x200] $r5
196
197	noctx:
198	and $r2 $r1 0x4
199	cmpu b32 $r2 0
200	bra e #nocmd
201
202		// incoming fifo command.
203		mov $r3 0x1900
204		iord $r2 I[$r3 + 0x100]
205		iord $r3 I[$r3]
206		// extract the method
207		and $r4 $r2 0x7ff
208		// shift the addr to proper position if we need to interrupt later
209		shl b32 $r2 0x10
210
211		// mthd 0 and 0x100 [NAME, NOP]: ignore
212		and $r5 $r4 0x7bf
213		cmpu b32 $r5 0
214		bra e #cmddone
215
216		mov $r5 #engine_cmd_dtable - 0xc0 * 8
217		mov $r6 #engine_cmd_max
218		cmpu b32 $r4 0xc0
219		bra nc #dtable_cmd
220		mov $r5 #common_cmd_dtable - 0x80 * 8
221		mov $r6 #common_cmd_max
222		cmpu b32 $r4 0x80
223		bra nc #dtable_cmd
224		cmpu b32 $r4 0x60
225		bra nc #dma_cmd
226		cmpu b32 $r4 0x50
227		bra ne #illegal_mthd
228
229			// mthd 0x140: PM_TRIGGER
230			mov $r2 0x2200
231			clear b32 $r3
232			sethi $r3 0x20000
233			iowr I[$r2] $r3
234			bra #cmddone
235
236		dma_cmd:
237			// mthd 0x180...: DMA_*
238			cmpu b32 $r4 0x60+#dma_count
239			bra nc #illegal_mthd
240			shl b32 $r5 $r4 2
241			add b32 $r5 ((#ctx_dma - 0x60 * 4) & 0xffff)
242			bset $r3 0x1e
243			st b32 D[$r5] $r3
244			add b32 $r4 0x180 - 0x60
245			shl b32 $r4 8
246			iowr I[$r4] $r3
247			bra #cmddone
248
249		dtable_cmd:
250			cmpu b32 $r4 $r6
251			bra nc #illegal_mthd
252			shl b32 $r4 3
253			add b32 $r4 $r5
254			ld b32 $r5 D[$r4 + 4]
255			and $r5 $r3
256			cmpu b32 $r5 0
257			bra ne #invalid_bitfield
258			ld b16 $r5 D[$r4]
259			ld b16 $r6 D[$r4 + 2]
260			cmpu b32 $r6 2
261			bra e #cmd_setctx
262			ld b32 $r7 D[$r0 + #ctx_cond_off]
263			and $r6 $r7
264			cmpu b32 $r6 1
265			bra e #cmddone
266			call $r5
267			bra $p1 #dispatch_error
268			bra #cmddone
269
270		cmd_setctx:
271			st b32 D[$r5] $r3
272			bra #cmddone
273
274
275		invalid_bitfield:
276			or $r2 1
277		dispatch_error:
278		illegal_mthd:
279			mov $r4 0x1000
280			iowr I[$r4] $r2
281			iowr I[$r4 + 0x100] $r3
282			mov $r4 0x40
283			iowr I[$r0] $r4
284
285			im_loop:
286				iord $r4 I[$r0 + 0x200]
287				and $r4 0x40
288				cmpu b32 $r4 0
289			bra ne #im_loop
290
291		cmddone:
292		// remove the command from FIFO
293		mov $r3 0x1d00
294		mov $r4 1
295		iowr I[$r3] $r4
296
297	nocmd:
298	// ack the processed interrupts
299	and $r1 $r1 0xc
300	iowr I[$r0 + 0x100] $r1
301iret
302
303cmd_query_get:
304	// if bit 0 of param set, trigger interrupt afterwards.
305	setp $p1 $r3
306	or $r2 3
307
308	// read PTIMER, beware of races...
309	mov $r4 0xb00
310	ptimer_retry:
311		iord $r6 I[$r4 + 0x100]
312		iord $r5 I[$r4]
313		iord $r7 I[$r4 + 0x100]
314		cmpu b32 $r6 $r7
315	bra ne #ptimer_retry
316
317	// prepare the query structure
318	ld b32 $r4 D[$r0 + #ctx_query_counter]
319	st b32 D[$r0 + #swap + 0x0] $r4
320	st b32 D[$r0 + #swap + 0x4] $r0
321	st b32 D[$r0 + #swap + 0x8] $r5
322	st b32 D[$r0 + #swap + 0xc] $r6
323
324	// will use target 0, DMA_QUERY.
325	mov $xtargets $r0
326
327	ld b32 $r4 D[$r0 + #ctx_query_address_high]
328	shl b32 $r4 0x18
329	mov $xdbase $r4
330
331	ld b32 $r4 D[$r0 + #ctx_query_address_low]
332	mov $r5 #swap
333	sethi $r5 0x20000
334	xdst $r4 $r5
335	xdwait
336
337	ret
338
339cmd_cond_mode:
340	// if >= 5, INVALID_ENUM
341	bset $flags $p1
342	or $r2 2
343	cmpu b32 $r3 5
344	bra nc #return
345
346	// otherwise, no error.
347	bclr $flags $p1
348
349	// if < 2, no QUERY object is involved
350	cmpu b32 $r3 2
351	bra nc #cmd_cond_mode_queryful
352
353		xor $r3 1
354		st b32 D[$r0 + #ctx_cond_off] $r3
355	return:
356		ret
357
358	cmd_cond_mode_queryful:
359	// ok, will need to pull a QUERY object, prepare offsets
360	ld b32 $r4 D[$r0 + #ctx_cond_address_high]
361	ld b32 $r5 D[$r0 + #ctx_cond_address_low]
362	and $r6 $r5 0xff
363	shr b32 $r5 8
364	shl b32 $r4 0x18
365	or $r4 $r5
366	mov $xdbase $r4
367	mov $xtargets $r0
368
369	// pull the first one
370	mov $r5 #swap
371	sethi $r5 0x20000
372	xdld $r6 $r5
373
374	// if == 2, only a single QUERY is involved...
375	cmpu b32 $r3 2
376	bra ne #cmd_cond_mode_double
377
378		xdwait
379		ld b32 $r4 D[$r0 + #swap + 4]
380		cmpu b32 $r4 0
381		xbit $r4 $flags z
382		st b32 D[$r0 + #ctx_cond_off] $r4
383		ret
384
385	// ok, we'll need to pull second one too
386	cmd_cond_mode_double:
387	add b32 $r6 0x10
388	add b32 $r5 0x10
389	xdld $r6 $r5
390	xdwait
391
392	// compare COUNTERs
393	ld b32 $r5 D[$r0 + #swap + 0x00]
394	ld b32 $r6 D[$r0 + #swap + 0x10]
395	cmpu b32 $r5 $r6
396	xbit $r4 $flags z
397
398	// compare RESen
399	ld b32 $r5 D[$r0 + #swap + 0x04]
400	ld b32 $r6 D[$r0 + #swap + 0x14]
401	cmpu b32 $r5 $r6
402	xbit $r5 $flags z
403	and $r4 $r5
404
405	// and negate or not, depending on mode
406	cmpu b32 $r3 3
407	xbit $r5 $flags z
408	xor $r4 $r5
409	st b32 D[$r0 + #ctx_cond_off] $r4
410	ret
411
412cmd_wrcache_flush:
413	bclr $flags $p1
414	mov $r2 0x2200
415	clear b32 $r3
416	sethi $r3 0x10000
417	iowr I[$r2] $r3
418	ret
419
420sec_cmd_mode:
421	// if >= 0xf, INVALID_ENUM
422	bset $flags $p1
423	or $r2 2
424	cmpu b32 $r3 0xf
425	bra nc #sec_cmd_mode_return
426
427		bclr $flags $p1
428		st b32 D[$r0 + #ctx_mode] $r3
429
430	sec_cmd_mode_return:
431	ret
432
433sec_cmd_length:
434	// nop if length == 0
435	cmpu b32 $r3 0
436	bra e #sec_cmd_mode_return
437
438	// init key, IV
439	cxset 3
440	mov $r4 #ctx_key
441	sethi $r4 0x70000
442	xdst $r0 $r4
443	mov $r4 #ctx_iv
444	sethi $r4 0x60000
445	xdst $r0 $r4
446	xdwait
447	ckeyreg $c7
448
449	// prepare the targets
450	mov $r4 0x2100
451	mov $xtargets $r4
452
453	// prepare src address
454	ld b32 $r4 D[$r0 + #ctx_src_address_high]
455	ld b32 $r5 D[$r0 + #ctx_src_address_low]
456	shr b32 $r8 $r5 8
457	shl b32 $r4 0x18
458	or $r4 $r8
459	and $r5 $r5 0xff
460
461	// prepare dst address
462	ld b32 $r6 D[$r0 + #ctx_dst_address_high]
463	ld b32 $r7 D[$r0 + #ctx_dst_address_low]
464	shr b32 $r8 $r7 8
465	shl b32 $r6 0x18
466	or $r6 $r8
467	and $r7 $r7 0xff
468
469	// find the proper prep & do functions
470	ld b32 $r8 D[$r0 + #ctx_mode]
471	shl b32 $r8 2
472
473	// run prep
474	ld b16 $r9 D[$r8 + #sec_dtable]
475	call $r9
476
477	// do it
478	ld b16 $r9 D[$r8 + #sec_dtable + 2]
479	call $r9
480	cxset 1
481	xdwait
482	cxset 0x61
483	xdwait
484	xdwait
485
486	// update src address
487	shr b32 $r8 $r4 0x18
488	shl b32 $r9 $r4 8
489	add b32 $r9 $r5
490	adc b32 $r8 0
491	st b32 D[$r0 + #ctx_src_address_high] $r8
492	st b32 D[$r0 + #ctx_src_address_low] $r9
493
494	// update dst address
495	shr b32 $r8 $r6 0x18
496	shl b32 $r9 $r6 8
497	add b32 $r9 $r7
498	adc b32 $r8 0
499	st b32 D[$r0 + #ctx_dst_address_high] $r8
500	st b32 D[$r0 + #ctx_dst_address_low] $r9
501
502	// pull updated IV
503	cxset 2
504	mov $r4 #ctx_iv
505	sethi $r4 0x60000
506	xdld $r0 $r4
507	xdwait
508
509	ret
510
511
512sec_copy_prep:
513	cs0begin 2
514		cxsin $c0
515		cxsout $c0
516	ret
517
518sec_store_prep:
519	cs0begin 1
520		cxsout $c6
521	ret
522
523sec_ecb_e_prep:
524	cs0begin 3
525		cxsin $c0
526		cenc $c0 $c0
527		cxsout $c0
528	ret
529
530sec_ecb_d_prep:
531	ckexp $c7 $c7
532	cs0begin 3
533		cxsin $c0
534		cdec $c0 $c0
535		cxsout $c0
536	ret
537
538sec_cbc_e_prep:
539	cs0begin 4
540		cxsin $c0
541		cxor $c6 $c0
542		cenc $c6 $c6
543		cxsout $c6
544	ret
545
546sec_cbc_d_prep:
547	ckexp $c7 $c7
548	cs0begin 5
549		cmov $c2 $c6
550		cxsin $c6
551		cdec $c0 $c6
552		cxor $c0 $c2
553		cxsout $c0
554	ret
555
556sec_pcbc_e_prep:
557	cs0begin 5
558		cxsin $c0
559		cxor $c6 $c0
560		cenc $c6 $c6
561		cxsout $c6
562		cxor $c6 $c0
563	ret
564
565sec_pcbc_d_prep:
566	ckexp $c7 $c7
567	cs0begin 5
568		cxsin $c0
569		cdec $c1 $c0
570		cxor $c6 $c1
571		cxsout $c6
572		cxor $c6 $c0
573	ret
574
575sec_cfb_e_prep:
576	cs0begin 4
577		cenc $c6 $c6
578		cxsin $c0
579		cxor $c6 $c0
580		cxsout $c6
581	ret
582
583sec_cfb_d_prep:
584	cs0begin 4
585		cenc $c0 $c6
586		cxsin $c6
587		cxor $c0 $c6
588		cxsout $c0
589	ret
590
591sec_ofb_prep:
592	cs0begin 4
593		cenc $c6 $c6
594		cxsin $c0
595		cxor $c0 $c6
596		cxsout $c0
597	ret
598
599sec_ctr_prep:
600	cs0begin 5
601		cenc $c1 $c6
602		cadd $c6 1
603		cxsin $c0
604		cxor $c0 $c1
605		cxsout $c0
606	ret
607
608sec_cbc_mac_prep:
609	cs0begin 3
610		cxsin $c0
611		cxor $c6 $c0
612		cenc $c6 $c6
613	ret
614
615sec_cmac_finish_complete_prep:
616	cs0begin 7
617		cxsin $c0
618		cxor $c6 $c0
619		cxor $c0 $c0
620		cenc $c0 $c0
621		cprecmac $c0 $c0
622		cxor $c6 $c0
623		cenc $c6 $c6
624	ret
625
626sec_cmac_finish_partial_prep:
627	cs0begin 8
628		cxsin $c0
629		cxor $c6 $c0
630		cxor $c0 $c0
631		cenc $c0 $c0
632		cprecmac $c0 $c0
633		cprecmac $c0 $c0
634		cxor $c6 $c0
635		cenc $c6 $c6
636	ret
637
638// TODO
639sec_do_in:
640	add b32 $r3 $r5
641	mov $xdbase $r4
642	mov $r9 #swap
643	sethi $r9 0x20000
644	sec_do_in_loop:
645		xdld $r5 $r9
646		xdwait
647		cxset 0x22
648		xdst $r0 $r9
649		cs0exec 1
650		xdwait
651		add b32 $r5 0x10
652		cmpu b32 $r5 $r3
653	bra ne #sec_do_in_loop
654	cxset 1
655	xdwait
656	ret
657
658sec_do_out:
659	add b32 $r3 $r7
660	mov $xdbase $r6
661	mov $r9 #swap
662	sethi $r9 0x20000
663	sec_do_out_loop:
664		cs0exec 1
665		cxset 0x61
666		xdld $r7 $r9
667		xdst $r7 $r9
668		cxset 1
669		xdwait
670		add b32 $r7 0x10
671		cmpu b32 $r7 $r3
672	bra ne #sec_do_out_loop
673	ret
674
675sec_do_inout:
676	add b32 $r3 $r5
677	mov $r9 #swap
678	sethi $r9 0x20000
679	sec_do_inout_loop:
680		mov $xdbase $r4
681		xdld $r5 $r9
682		xdwait
683		cxset 0x21
684		xdst $r0 $r9
685		cs0exec 1
686		cxset 0x61
687		mov $xdbase $r6
688		xdld $r7 $r9
689		xdst $r7 $r9
690		cxset 1
691		xdwait
692		add b32 $r5 0x10
693		add b32 $r7 0x10
694		cmpu b32 $r5 $r3
695	bra ne #sec_do_inout_loop
696	ret
697
698.align 0x100
699