1/*
2 *  linux/arch/arm/vfp/vfpdouble.c
3 *
4 * This code is derived in part from John R. Housers softfloat library, which
5 * carries the following notice:
6 *
7 * ===========================================================================
8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
9 * Arithmetic Package, Release 2.
10 *
11 * Written by John R. Hauser.  This work was made possible in part by the
12 * International Computer Science Institute, located at Suite 600, 1947 Center
13 * Street, Berkeley, California 94704.  Funding was partially provided by the
14 * National Science Foundation under grant MIP-9311980.  The original version
15 * of this code was written as part of a project to build a fixed-point vector
16 * processor in collaboration with the University of California at Berkeley,
17 * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
19 * arithmetic/softfloat.html'.
20 *
21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
23 * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
26 *
27 * Derivative works are acceptable, even for commercial purposes, so long as
28 * (1) they include prominent notice that the work is derivative, and (2) they
29 * include prominent notice akin to these three paragraphs for those parts of
30 * this code that are retained.
31 * ===========================================================================
32 */
33#include <linux/kernel.h>
34#include <linux/bitops.h>
35
36#include <asm/div64.h>
37#include <asm/vfp.h>
38
39#include "vfpinstr.h"
40#include "vfp.h"
41
42static struct vfp_double vfp_double_default_qnan = {
43	.exponent	= 2047,
44	.sign		= 0,
45	.significand	= VFP_DOUBLE_SIGNIFICAND_QNAN,
46};
47
48static void vfp_double_dump(const char *str, struct vfp_double *d)
49{
50	pr_debug("VFP: %s: sign=%d exponent=%d significand=%016llx\n",
51		 str, d->sign != 0, d->exponent, d->significand);
52}
53
54static void vfp_double_normalise_denormal(struct vfp_double *vd)
55{
56	int bits = 31 - fls(vd->significand >> 32);
57	if (bits == 31)
58		bits = 63 - fls(vd->significand);
59
60	vfp_double_dump("normalise_denormal: in", vd);
61
62	if (bits) {
63		vd->exponent -= bits - 1;
64		vd->significand <<= bits;
65	}
66
67	vfp_double_dump("normalise_denormal: out", vd);
68}
69
70u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func)
71{
72	u64 significand, incr;
73	int exponent, shift, underflow;
74	u32 rmode;
75
76	vfp_double_dump("pack: in", vd);
77
78	/*
79	 * Infinities and NaNs are a special case.
80	 */
81	if (vd->exponent == 2047 && (vd->significand == 0 || exceptions))
82		goto pack;
83
84	/*
85	 * Special-case zero.
86	 */
87	if (vd->significand == 0) {
88		vd->exponent = 0;
89		goto pack;
90	}
91
92	exponent = vd->exponent;
93	significand = vd->significand;
94
95	shift = 32 - fls(significand >> 32);
96	if (shift == 32)
97		shift = 64 - fls(significand);
98	if (shift) {
99		exponent -= shift;
100		significand <<= shift;
101	}
102
103#ifdef DEBUG
104	vd->exponent = exponent;
105	vd->significand = significand;
106	vfp_double_dump("pack: normalised", vd);
107#endif
108
109	/*
110	 * Tiny number?
111	 */
112	underflow = exponent < 0;
113	if (underflow) {
114		significand = vfp_shiftright64jamming(significand, -exponent);
115		exponent = 0;
116#ifdef DEBUG
117		vd->exponent = exponent;
118		vd->significand = significand;
119		vfp_double_dump("pack: tiny number", vd);
120#endif
121		if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1)))
122			underflow = 0;
123	}
124
125	/*
126	 * Select rounding increment.
127	 */
128	incr = 0;
129	rmode = fpscr & FPSCR_RMODE_MASK;
130
131	if (rmode == FPSCR_ROUND_NEAREST) {
132		incr = 1ULL << VFP_DOUBLE_LOW_BITS;
133		if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0)
134			incr -= 1;
135	} else if (rmode == FPSCR_ROUND_TOZERO) {
136		incr = 0;
137	} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0))
138		incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1;
139
140	pr_debug("VFP: rounding increment = 0x%08llx\n", incr);
141
142	/*
143	 * Is our rounding going to overflow?
144	 */
145	if ((significand + incr) < significand) {
146		exponent += 1;
147		significand = (significand >> 1) | (significand & 1);
148		incr >>= 1;
149#ifdef DEBUG
150		vd->exponent = exponent;
151		vd->significand = significand;
152		vfp_double_dump("pack: overflow", vd);
153#endif
154	}
155
156	/*
157	 * If any of the low bits (which will be shifted out of the
158	 * number) are non-zero, the result is inexact.
159	 */
160	if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1))
161		exceptions |= FPSCR_IXC;
162
163	/*
164	 * Do our rounding.
165	 */
166	significand += incr;
167
168	/*
169	 * Infinity?
170	 */
171	if (exponent >= 2046) {
172		exceptions |= FPSCR_OFC | FPSCR_IXC;
173		if (incr == 0) {
174			vd->exponent = 2045;
175			vd->significand = 0x7fffffffffffffffULL;
176		} else {
177			vd->exponent = 2047;		/* infinity */
178			vd->significand = 0;
179		}
180	} else {
181		if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0)
182			exponent = 0;
183		if (exponent || significand > 0x8000000000000000ULL)
184			underflow = 0;
185		if (underflow)
186			exceptions |= FPSCR_UFC;
187		vd->exponent = exponent;
188		vd->significand = significand >> 1;
189	}
190
191 pack:
192	vfp_double_dump("pack: final", vd);
193	{
194		s64 d = vfp_double_pack(vd);
195		pr_debug("VFP: %s: d(d%d)=%016llx exceptions=%08x\n", func,
196			 dd, d, exceptions);
197		vfp_put_double(d, dd);
198	}
199	return exceptions;
200}
201
202/*
203 * Propagate the NaN, setting exceptions if it is signalling.
204 * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
205 */
206static u32
207vfp_propagate_nan(struct vfp_double *vdd, struct vfp_double *vdn,
208		  struct vfp_double *vdm, u32 fpscr)
209{
210	struct vfp_double *nan;
211	int tn, tm = 0;
212
213	tn = vfp_double_type(vdn);
214
215	if (vdm)
216		tm = vfp_double_type(vdm);
217
218	if (fpscr & FPSCR_DEFAULT_NAN)
219		/*
220		 * Default NaN mode - always returns a quiet NaN
221		 */
222		nan = &vfp_double_default_qnan;
223	else {
224		/*
225		 * Contemporary mode - select the first signalling
226		 * NAN, or if neither are signalling, the first
227		 * quiet NAN.
228		 */
229		if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
230			nan = vdn;
231		else
232			nan = vdm;
233		/*
234		 * Make the NaN quiet.
235		 */
236		nan->significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
237	}
238
239	*vdd = *nan;
240
241	/*
242	 * If one was a signalling NAN, raise invalid operation.
243	 */
244	return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
245}
246
247/*
248 * Extended operations
249 */
250static u32 vfp_double_fabs(int dd, int unused, int dm, u32 fpscr)
251{
252	vfp_put_double(vfp_double_packed_abs(vfp_get_double(dm)), dd);
253	return 0;
254}
255
256static u32 vfp_double_fcpy(int dd, int unused, int dm, u32 fpscr)
257{
258	vfp_put_double(vfp_get_double(dm), dd);
259	return 0;
260}
261
262static u32 vfp_double_fneg(int dd, int unused, int dm, u32 fpscr)
263{
264	vfp_put_double(vfp_double_packed_negate(vfp_get_double(dm)), dd);
265	return 0;
266}
267
268static u32 vfp_double_fsqrt(int dd, int unused, int dm, u32 fpscr)
269{
270	struct vfp_double vdm, vdd;
271	int ret, tm;
272
273	vfp_double_unpack(&vdm, vfp_get_double(dm));
274	tm = vfp_double_type(&vdm);
275	if (tm & (VFP_NAN|VFP_INFINITY)) {
276		struct vfp_double *vdp = &vdd;
277
278		if (tm & VFP_NAN)
279			ret = vfp_propagate_nan(vdp, &vdm, NULL, fpscr);
280		else if (vdm.sign == 0) {
281 sqrt_copy:
282			vdp = &vdm;
283			ret = 0;
284		} else {
285 sqrt_invalid:
286			vdp = &vfp_double_default_qnan;
287			ret = FPSCR_IOC;
288		}
289		vfp_put_double(vfp_double_pack(vdp), dd);
290		return ret;
291	}
292
293	/*
294	 * sqrt(+/- 0) == +/- 0
295	 */
296	if (tm & VFP_ZERO)
297		goto sqrt_copy;
298
299	/*
300	 * Normalise a denormalised number
301	 */
302	if (tm & VFP_DENORMAL)
303		vfp_double_normalise_denormal(&vdm);
304
305	/*
306	 * sqrt(<0) = invalid
307	 */
308	if (vdm.sign)
309		goto sqrt_invalid;
310
311	vfp_double_dump("sqrt", &vdm);
312
313	/*
314	 * Estimate the square root.
315	 */
316	vdd.sign = 0;
317	vdd.exponent = ((vdm.exponent - 1023) >> 1) + 1023;
318	vdd.significand = (u64)vfp_estimate_sqrt_significand(vdm.exponent, vdm.significand >> 32) << 31;
319
320	vfp_double_dump("sqrt estimate1", &vdd);
321
322	vdm.significand >>= 1 + (vdm.exponent & 1);
323	vdd.significand += 2 + vfp_estimate_div128to64(vdm.significand, 0, vdd.significand);
324
325	vfp_double_dump("sqrt estimate2", &vdd);
326
327	/*
328	 * And now adjust.
329	 */
330	if ((vdd.significand & VFP_DOUBLE_LOW_BITS_MASK) <= 5) {
331		if (vdd.significand < 2) {
332			vdd.significand = ~0ULL;
333		} else {
334			u64 termh, terml, remh, reml;
335			vdm.significand <<= 2;
336			mul64to128(&termh, &terml, vdd.significand, vdd.significand);
337			sub128(&remh, &reml, vdm.significand, 0, termh, terml);
338			while ((s64)remh < 0) {
339				vdd.significand -= 1;
340				shift64left(&termh, &terml, vdd.significand);
341				terml |= 1;
342				add128(&remh, &reml, remh, reml, termh, terml);
343			}
344			vdd.significand |= (remh | reml) != 0;
345		}
346	}
347	vdd.significand = vfp_shiftright64jamming(vdd.significand, 1);
348
349	return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fsqrt");
350}
351
352/*
353 * Equal	:= ZC
354 * Less than	:= N
355 * Greater than	:= C
356 * Unordered	:= CV
357 */
358static u32 vfp_compare(int dd, int signal_on_qnan, int dm, u32 fpscr)
359{
360	s64 d, m;
361	u32 ret = 0;
362
363	m = vfp_get_double(dm);
364	if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) {
365		ret |= FPSCR_C | FPSCR_V;
366		if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
367			/*
368			 * Signalling NaN, or signalling on quiet NaN
369			 */
370			ret |= FPSCR_IOC;
371	}
372
373	d = vfp_get_double(dd);
374	if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) {
375		ret |= FPSCR_C | FPSCR_V;
376		if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1))))
377			/*
378			 * Signalling NaN, or signalling on quiet NaN
379			 */
380			ret |= FPSCR_IOC;
381	}
382
383	if (ret == 0) {
384		if (d == m || vfp_double_packed_abs(d | m) == 0) {
385			/*
386			 * equal
387			 */
388			ret |= FPSCR_Z | FPSCR_C;
389		} else if (vfp_double_packed_sign(d ^ m)) {
390			/*
391			 * different signs
392			 */
393			if (vfp_double_packed_sign(d))
394				/*
395				 * d is negative, so d < m
396				 */
397				ret |= FPSCR_N;
398			else
399				/*
400				 * d is positive, so d > m
401				 */
402				ret |= FPSCR_C;
403		} else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) {
404			/*
405			 * d < m
406			 */
407			ret |= FPSCR_N;
408		} else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) {
409			/*
410			 * d > m
411			 */
412			ret |= FPSCR_C;
413		}
414	}
415
416	return ret;
417}
418
419static u32 vfp_double_fcmp(int dd, int unused, int dm, u32 fpscr)
420{
421	return vfp_compare(dd, 0, dm, fpscr);
422}
423
424static u32 vfp_double_fcmpe(int dd, int unused, int dm, u32 fpscr)
425{
426	return vfp_compare(dd, 1, dm, fpscr);
427}
428
429static u32 vfp_double_fcmpz(int dd, int unused, int dm, u32 fpscr)
430{
431	return vfp_compare(dd, 0, VFP_REG_ZERO, fpscr);
432}
433
434static u32 vfp_double_fcmpez(int dd, int unused, int dm, u32 fpscr)
435{
436	return vfp_compare(dd, 1, VFP_REG_ZERO, fpscr);
437}
438
439static u32 vfp_double_fcvts(int sd, int unused, int dm, u32 fpscr)
440{
441	struct vfp_double vdm;
442	struct vfp_single vsd;
443	int tm;
444	u32 exceptions = 0;
445
446	vfp_double_unpack(&vdm, vfp_get_double(dm));
447
448	tm = vfp_double_type(&vdm);
449
450	/*
451	 * If we have a signalling NaN, signal invalid operation.
452	 */
453	if (tm == VFP_SNAN)
454		exceptions = FPSCR_IOC;
455
456	if (tm & VFP_DENORMAL)
457		vfp_double_normalise_denormal(&vdm);
458
459	vsd.sign = vdm.sign;
460	vsd.significand = vfp_hi64to32jamming(vdm.significand);
461
462	/*
463	 * If we have an infinity or a NaN, the exponent must be 255
464	 */
465	if (tm & (VFP_INFINITY|VFP_NAN)) {
466		vsd.exponent = 255;
467		if (tm == VFP_QNAN)
468			vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
469		goto pack_nan;
470	} else if (tm & VFP_ZERO)
471		vsd.exponent = 0;
472	else
473		vsd.exponent = vdm.exponent - (1023 - 127);
474
475	return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fcvts");
476
477 pack_nan:
478	vfp_put_float(vfp_single_pack(&vsd), sd);
479	return exceptions;
480}
481
482static u32 vfp_double_fuito(int dd, int unused, int dm, u32 fpscr)
483{
484	struct vfp_double vdm;
485	u32 m = vfp_get_float(dm);
486
487	vdm.sign = 0;
488	vdm.exponent = 1023 + 63 - 1;
489	vdm.significand = (u64)m;
490
491	return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fuito");
492}
493
494static u32 vfp_double_fsito(int dd, int unused, int dm, u32 fpscr)
495{
496	struct vfp_double vdm;
497	u32 m = vfp_get_float(dm);
498
499	vdm.sign = (m & 0x80000000) >> 16;
500	vdm.exponent = 1023 + 63 - 1;
501	vdm.significand = vdm.sign ? -m : m;
502
503	return vfp_double_normaliseround(dd, &vdm, fpscr, 0, "fsito");
504}
505
506static u32 vfp_double_ftoui(int sd, int unused, int dm, u32 fpscr)
507{
508	struct vfp_double vdm;
509	u32 d, exceptions = 0;
510	int rmode = fpscr & FPSCR_RMODE_MASK;
511	int tm;
512
513	vfp_double_unpack(&vdm, vfp_get_double(dm));
514
515	/*
516	 * Do we have a denormalised number?
517	 */
518	tm = vfp_double_type(&vdm);
519	if (tm & VFP_DENORMAL)
520		exceptions |= FPSCR_IDC;
521
522	if (tm & VFP_NAN)
523		vdm.sign = 0;
524
525	if (vdm.exponent >= 1023 + 32) {
526		d = vdm.sign ? 0 : 0xffffffff;
527		exceptions = FPSCR_IOC;
528	} else if (vdm.exponent >= 1023 - 1) {
529		int shift = 1023 + 63 - vdm.exponent;
530		u64 rem, incr = 0;
531
532		/*
533		 * 2^0 <= m < 2^32-2^8
534		 */
535		d = (vdm.significand << 1) >> shift;
536		rem = vdm.significand << (65 - shift);
537
538		if (rmode == FPSCR_ROUND_NEAREST) {
539			incr = 0x8000000000000000ULL;
540			if ((d & 1) == 0)
541				incr -= 1;
542		} else if (rmode == FPSCR_ROUND_TOZERO) {
543			incr = 0;
544		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
545			incr = ~0ULL;
546		}
547
548		if ((rem + incr) < rem) {
549			if (d < 0xffffffff)
550				d += 1;
551			else
552				exceptions |= FPSCR_IOC;
553		}
554
555		if (d && vdm.sign) {
556			d = 0;
557			exceptions |= FPSCR_IOC;
558		} else if (rem)
559			exceptions |= FPSCR_IXC;
560	} else {
561		d = 0;
562		if (vdm.exponent | vdm.significand) {
563			exceptions |= FPSCR_IXC;
564			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
565				d = 1;
566			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign) {
567				d = 0;
568				exceptions |= FPSCR_IOC;
569			}
570		}
571	}
572
573	pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
574
575	vfp_put_float(d, sd);
576
577	return exceptions;
578}
579
580static u32 vfp_double_ftouiz(int sd, int unused, int dm, u32 fpscr)
581{
582	return vfp_double_ftoui(sd, unused, dm, FPSCR_ROUND_TOZERO);
583}
584
585static u32 vfp_double_ftosi(int sd, int unused, int dm, u32 fpscr)
586{
587	struct vfp_double vdm;
588	u32 d, exceptions = 0;
589	int rmode = fpscr & FPSCR_RMODE_MASK;
590	int tm;
591
592	vfp_double_unpack(&vdm, vfp_get_double(dm));
593	vfp_double_dump("VDM", &vdm);
594
595	/*
596	 * Do we have denormalised number?
597	 */
598	tm = vfp_double_type(&vdm);
599	if (tm & VFP_DENORMAL)
600		exceptions |= FPSCR_IDC;
601
602	if (tm & VFP_NAN) {
603		d = 0;
604		exceptions |= FPSCR_IOC;
605	} else if (vdm.exponent >= 1023 + 32) {
606		d = 0x7fffffff;
607		if (vdm.sign)
608			d = ~d;
609		exceptions |= FPSCR_IOC;
610	} else if (vdm.exponent >= 1023 - 1) {
611		int shift = 1023 + 63 - vdm.exponent;	/* 58 */
612		u64 rem, incr = 0;
613
614		d = (vdm.significand << 1) >> shift;
615		rem = vdm.significand << (65 - shift);
616
617		if (rmode == FPSCR_ROUND_NEAREST) {
618			incr = 0x8000000000000000ULL;
619			if ((d & 1) == 0)
620				incr -= 1;
621		} else if (rmode == FPSCR_ROUND_TOZERO) {
622			incr = 0;
623		} else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vdm.sign != 0)) {
624			incr = ~0ULL;
625		}
626
627		if ((rem + incr) < rem && d < 0xffffffff)
628			d += 1;
629		if (d > 0x7fffffff + (vdm.sign != 0)) {
630			d = 0x7fffffff + (vdm.sign != 0);
631			exceptions |= FPSCR_IOC;
632		} else if (rem)
633			exceptions |= FPSCR_IXC;
634
635		if (vdm.sign)
636			d = -d;
637	} else {
638		d = 0;
639		if (vdm.exponent | vdm.significand) {
640			exceptions |= FPSCR_IXC;
641			if (rmode == FPSCR_ROUND_PLUSINF && vdm.sign == 0)
642				d = 1;
643			else if (rmode == FPSCR_ROUND_MINUSINF && vdm.sign)
644				d = -1;
645		}
646	}
647
648	pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
649
650	vfp_put_float((s32)d, sd);
651
652	return exceptions;
653}
654
655static u32 vfp_double_ftosiz(int dd, int unused, int dm, u32 fpscr)
656{
657	return vfp_double_ftosi(dd, unused, dm, FPSCR_ROUND_TOZERO);
658}
659
660
661static struct op fops_ext[32] = {
662	[FEXT_TO_IDX(FEXT_FCPY)]	= { vfp_double_fcpy,   0 },
663	[FEXT_TO_IDX(FEXT_FABS)]	= { vfp_double_fabs,   0 },
664	[FEXT_TO_IDX(FEXT_FNEG)]	= { vfp_double_fneg,   0 },
665	[FEXT_TO_IDX(FEXT_FSQRT)]	= { vfp_double_fsqrt,  0 },
666	[FEXT_TO_IDX(FEXT_FCMP)]	= { vfp_double_fcmp,   OP_SCALAR },
667	[FEXT_TO_IDX(FEXT_FCMPE)]	= { vfp_double_fcmpe,  OP_SCALAR },
668	[FEXT_TO_IDX(FEXT_FCMPZ)]	= { vfp_double_fcmpz,  OP_SCALAR },
669	[FEXT_TO_IDX(FEXT_FCMPEZ)]	= { vfp_double_fcmpez, OP_SCALAR },
670	[FEXT_TO_IDX(FEXT_FCVT)]	= { vfp_double_fcvts,  OP_SCALAR|OP_SD },
671	[FEXT_TO_IDX(FEXT_FUITO)]	= { vfp_double_fuito,  OP_SCALAR|OP_SM },
672	[FEXT_TO_IDX(FEXT_FSITO)]	= { vfp_double_fsito,  OP_SCALAR|OP_SM },
673	[FEXT_TO_IDX(FEXT_FTOUI)]	= { vfp_double_ftoui,  OP_SCALAR|OP_SD },
674	[FEXT_TO_IDX(FEXT_FTOUIZ)]	= { vfp_double_ftouiz, OP_SCALAR|OP_SD },
675	[FEXT_TO_IDX(FEXT_FTOSI)]	= { vfp_double_ftosi,  OP_SCALAR|OP_SD },
676	[FEXT_TO_IDX(FEXT_FTOSIZ)]	= { vfp_double_ftosiz, OP_SCALAR|OP_SD },
677};
678
679
680
681
682static u32
683vfp_double_fadd_nonnumber(struct vfp_double *vdd, struct vfp_double *vdn,
684			  struct vfp_double *vdm, u32 fpscr)
685{
686	struct vfp_double *vdp;
687	u32 exceptions = 0;
688	int tn, tm;
689
690	tn = vfp_double_type(vdn);
691	tm = vfp_double_type(vdm);
692
693	if (tn & tm & VFP_INFINITY) {
694		/*
695		 * Two infinities.  Are they different signs?
696		 */
697		if (vdn->sign ^ vdm->sign) {
698			/*
699			 * different signs -> invalid
700			 */
701			exceptions = FPSCR_IOC;
702			vdp = &vfp_double_default_qnan;
703		} else {
704			/*
705			 * same signs -> valid
706			 */
707			vdp = vdn;
708		}
709	} else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
710		/*
711		 * One infinity and one number -> infinity
712		 */
713		vdp = vdn;
714	} else {
715		/*
716		 * 'n' is a NaN of some type
717		 */
718		return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
719	}
720	*vdd = *vdp;
721	return exceptions;
722}
723
724static u32
725vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
726	       struct vfp_double *vdm, u32 fpscr)
727{
728	u32 exp_diff;
729	u64 m_sig;
730
731	if (vdn->significand & (1ULL << 63) ||
732	    vdm->significand & (1ULL << 63)) {
733		pr_info("VFP: bad FP values in %s\n", __func__);
734		vfp_double_dump("VDN", vdn);
735		vfp_double_dump("VDM", vdm);
736	}
737
738	/*
739	 * Ensure that 'n' is the largest magnitude number.  Note that
740	 * if 'n' and 'm' have equal exponents, we do not swap them.
741	 * This ensures that NaN propagation works correctly.
742	 */
743	if (vdn->exponent < vdm->exponent) {
744		struct vfp_double *t = vdn;
745		vdn = vdm;
746		vdm = t;
747	}
748
749	/*
750	 * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
751	 * infinity or a NaN here.
752	 */
753	if (vdn->exponent == 2047)
754		return vfp_double_fadd_nonnumber(vdd, vdn, vdm, fpscr);
755
756	/*
757	 * We have two proper numbers, where 'vdn' is the larger magnitude.
758	 *
759	 * Copy 'n' to 'd' before doing the arithmetic.
760	 */
761	*vdd = *vdn;
762
763	/*
764	 * Align 'm' with the result.
765	 */
766	exp_diff = vdn->exponent - vdm->exponent;
767	m_sig = vfp_shiftright64jamming(vdm->significand, exp_diff);
768
769	/*
770	 * If the signs are different, we are really subtracting.
771	 */
772	if (vdn->sign ^ vdm->sign) {
773		m_sig = vdn->significand - m_sig;
774		if ((s64)m_sig < 0) {
775			vdd->sign = vfp_sign_negate(vdd->sign);
776			m_sig = -m_sig;
777		} else if (m_sig == 0) {
778			vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
779				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
780		}
781	} else {
782		m_sig += vdn->significand;
783	}
784	vdd->significand = m_sig;
785
786	return 0;
787}
788
789static u32
790vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn,
791		    struct vfp_double *vdm, u32 fpscr)
792{
793	vfp_double_dump("VDN", vdn);
794	vfp_double_dump("VDM", vdm);
795
796	/*
797	 * Ensure that 'n' is the largest magnitude number.  Note that
798	 * if 'n' and 'm' have equal exponents, we do not swap them.
799	 * This ensures that NaN propagation works correctly.
800	 */
801	if (vdn->exponent < vdm->exponent) {
802		struct vfp_double *t = vdn;
803		vdn = vdm;
804		vdm = t;
805		pr_debug("VFP: swapping M <-> N\n");
806	}
807
808	vdd->sign = vdn->sign ^ vdm->sign;
809
810	/*
811	 * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
812	 */
813	if (vdn->exponent == 2047) {
814		if (vdn->significand || (vdm->exponent == 2047 && vdm->significand))
815			return vfp_propagate_nan(vdd, vdn, vdm, fpscr);
816		if ((vdm->exponent | vdm->significand) == 0) {
817			*vdd = vfp_double_default_qnan;
818			return FPSCR_IOC;
819		}
820		vdd->exponent = vdn->exponent;
821		vdd->significand = 0;
822		return 0;
823	}
824
825	/*
826	 * If 'm' is zero, the result is always zero.  In this case,
827	 * 'n' may be zero or a number, but it doesn't matter which.
828	 */
829	if ((vdm->exponent | vdm->significand) == 0) {
830		vdd->exponent = 0;
831		vdd->significand = 0;
832		return 0;
833	}
834
835	/*
836	 * We add 2 to the destination exponent for the same reason
837	 * as the addition case - though this time we have +1 from
838	 * each input operand.
839	 */
840	vdd->exponent = vdn->exponent + vdm->exponent - 1023 + 2;
841	vdd->significand = vfp_hi64multiply64(vdn->significand, vdm->significand);
842
843	vfp_double_dump("VDD", vdd);
844	return 0;
845}
846
847#define NEG_MULTIPLY	(1 << 0)
848#define NEG_SUBTRACT	(1 << 1)
849
850static u32
851vfp_double_multiply_accumulate(int dd, int dn, int dm, u32 fpscr, u32 negate, char *func)
852{
853	struct vfp_double vdd, vdp, vdn, vdm;
854	u32 exceptions;
855
856	vfp_double_unpack(&vdn, vfp_get_double(dn));
857	if (vdn.exponent == 0 && vdn.significand)
858		vfp_double_normalise_denormal(&vdn);
859
860	vfp_double_unpack(&vdm, vfp_get_double(dm));
861	if (vdm.exponent == 0 && vdm.significand)
862		vfp_double_normalise_denormal(&vdm);
863
864	exceptions = vfp_double_multiply(&vdp, &vdn, &vdm, fpscr);
865	if (negate & NEG_MULTIPLY)
866		vdp.sign = vfp_sign_negate(vdp.sign);
867
868	vfp_double_unpack(&vdn, vfp_get_double(dd));
869	if (vdn.exponent == 0 && vdn.significand)
870		vfp_double_normalise_denormal(&vdn);
871	if (negate & NEG_SUBTRACT)
872		vdn.sign = vfp_sign_negate(vdn.sign);
873
874	exceptions |= vfp_double_add(&vdd, &vdn, &vdp, fpscr);
875
876	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, func);
877}
878
879/*
880 * Standard operations
881 */
882
883/*
884 * sd = sd + (sn * sm)
885 */
886static u32 vfp_double_fmac(int dd, int dn, int dm, u32 fpscr)
887{
888	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, 0, "fmac");
889}
890
891/*
892 * sd = sd - (sn * sm)
893 */
894static u32 vfp_double_fnmac(int dd, int dn, int dm, u32 fpscr)
895{
896	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_MULTIPLY, "fnmac");
897}
898
899/*
900 * sd = -sd + (sn * sm)
901 */
902static u32 vfp_double_fmsc(int dd, int dn, int dm, u32 fpscr)
903{
904	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT, "fmsc");
905}
906
907/*
908 * sd = -sd - (sn * sm)
909 */
910static u32 vfp_double_fnmsc(int dd, int dn, int dm, u32 fpscr)
911{
912	return vfp_double_multiply_accumulate(dd, dn, dm, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
913}
914
915/*
916 * sd = sn * sm
917 */
918static u32 vfp_double_fmul(int dd, int dn, int dm, u32 fpscr)
919{
920	struct vfp_double vdd, vdn, vdm;
921	u32 exceptions;
922
923	vfp_double_unpack(&vdn, vfp_get_double(dn));
924	if (vdn.exponent == 0 && vdn.significand)
925		vfp_double_normalise_denormal(&vdn);
926
927	vfp_double_unpack(&vdm, vfp_get_double(dm));
928	if (vdm.exponent == 0 && vdm.significand)
929		vfp_double_normalise_denormal(&vdm);
930
931	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
932	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fmul");
933}
934
935/*
936 * sd = -(sn * sm)
937 */
938static u32 vfp_double_fnmul(int dd, int dn, int dm, u32 fpscr)
939{
940	struct vfp_double vdd, vdn, vdm;
941	u32 exceptions;
942
943	vfp_double_unpack(&vdn, vfp_get_double(dn));
944	if (vdn.exponent == 0 && vdn.significand)
945		vfp_double_normalise_denormal(&vdn);
946
947	vfp_double_unpack(&vdm, vfp_get_double(dm));
948	if (vdm.exponent == 0 && vdm.significand)
949		vfp_double_normalise_denormal(&vdm);
950
951	exceptions = vfp_double_multiply(&vdd, &vdn, &vdm, fpscr);
952	vdd.sign = vfp_sign_negate(vdd.sign);
953
954	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fnmul");
955}
956
957/*
958 * sd = sn + sm
959 */
960static u32 vfp_double_fadd(int dd, int dn, int dm, u32 fpscr)
961{
962	struct vfp_double vdd, vdn, vdm;
963	u32 exceptions;
964
965	vfp_double_unpack(&vdn, vfp_get_double(dn));
966	if (vdn.exponent == 0 && vdn.significand)
967		vfp_double_normalise_denormal(&vdn);
968
969	vfp_double_unpack(&vdm, vfp_get_double(dm));
970	if (vdm.exponent == 0 && vdm.significand)
971		vfp_double_normalise_denormal(&vdm);
972
973	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
974
975	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fadd");
976}
977
978/*
979 * sd = sn - sm
980 */
981static u32 vfp_double_fsub(int dd, int dn, int dm, u32 fpscr)
982{
983	struct vfp_double vdd, vdn, vdm;
984	u32 exceptions;
985
986	vfp_double_unpack(&vdn, vfp_get_double(dn));
987	if (vdn.exponent == 0 && vdn.significand)
988		vfp_double_normalise_denormal(&vdn);
989
990	vfp_double_unpack(&vdm, vfp_get_double(dm));
991	if (vdm.exponent == 0 && vdm.significand)
992		vfp_double_normalise_denormal(&vdm);
993
994	/*
995	 * Subtraction is like addition, but with a negated operand.
996	 */
997	vdm.sign = vfp_sign_negate(vdm.sign);
998
999	exceptions = vfp_double_add(&vdd, &vdn, &vdm, fpscr);
1000
1001	return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fsub");
1002}
1003
1004/*
1005 * sd = sn / sm
1006 */
1007static u32 vfp_double_fdiv(int dd, int dn, int dm, u32 fpscr)
1008{
1009	struct vfp_double vdd, vdn, vdm;
1010	u32 exceptions = 0;
1011	int tm, tn;
1012
1013	vfp_double_unpack(&vdn, vfp_get_double(dn));
1014	vfp_double_unpack(&vdm, vfp_get_double(dm));
1015
1016	vdd.sign = vdn.sign ^ vdm.sign;
1017
1018	tn = vfp_double_type(&vdn);
1019	tm = vfp_double_type(&vdm);
1020
1021	/*
1022	 * Is n a NAN?
1023	 */
1024	if (tn & VFP_NAN)
1025		goto vdn_nan;
1026
1027	/*
1028	 * Is m a NAN?
1029	 */
1030	if (tm & VFP_NAN)
1031		goto vdm_nan;
1032
1033	/*
1034	 * If n and m are infinity, the result is invalid
1035	 * If n and m are zero, the result is invalid
1036	 */
1037	if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1038		goto invalid;
1039
1040	/*
1041	 * If n is infinity, the result is infinity
1042	 */
1043	if (tn & VFP_INFINITY)
1044		goto infinity;
1045
1046	/*
1047	 * If m is zero, raise div0 exceptions
1048	 */
1049	if (tm & VFP_ZERO)
1050		goto divzero;
1051
1052	/*
1053	 * If m is infinity, or n is zero, the result is zero
1054	 */
1055	if (tm & VFP_INFINITY || tn & VFP_ZERO)
1056		goto zero;
1057
1058	if (tn & VFP_DENORMAL)
1059		vfp_double_normalise_denormal(&vdn);
1060	if (tm & VFP_DENORMAL)
1061		vfp_double_normalise_denormal(&vdm);
1062
1063	/*
1064	 * Ok, we have two numbers, we can perform division.
1065	 */
1066	vdd.exponent = vdn.exponent - vdm.exponent + 1023 - 1;
1067	vdm.significand <<= 1;
1068	if (vdm.significand <= (2 * vdn.significand)) {
1069		vdn.significand >>= 1;
1070		vdd.exponent++;
1071	}
1072	vdd.significand = vfp_estimate_div128to64(vdn.significand, 0, vdm.significand);
1073	if ((vdd.significand & 0x1ff) <= 2) {
1074		u64 termh, terml, remh, reml;
1075		mul64to128(&termh, &terml, vdm.significand, vdd.significand);
1076		sub128(&remh, &reml, vdn.significand, 0, termh, terml);
1077		while ((s64)remh < 0) {
1078			vdd.significand -= 1;
1079			add128(&remh, &reml, remh, reml, 0, vdm.significand);
1080		}
1081		vdd.significand |= (reml != 0);
1082	}
1083	return vfp_double_normaliseround(dd, &vdd, fpscr, 0, "fdiv");
1084
1085 vdn_nan:
1086	exceptions = vfp_propagate_nan(&vdd, &vdn, &vdm, fpscr);
1087 pack:
1088	vfp_put_double(vfp_double_pack(&vdd), dd);
1089	return exceptions;
1090
1091 vdm_nan:
1092	exceptions = vfp_propagate_nan(&vdd, &vdm, &vdn, fpscr);
1093	goto pack;
1094
1095 zero:
1096	vdd.exponent = 0;
1097	vdd.significand = 0;
1098	goto pack;
1099
1100 divzero:
1101	exceptions = FPSCR_DZC;
1102 infinity:
1103	vdd.exponent = 2047;
1104	vdd.significand = 0;
1105	goto pack;
1106
1107 invalid:
1108	vfp_put_double(vfp_double_pack(&vfp_double_default_qnan), dd);
1109	return FPSCR_IOC;
1110}
1111
1112static struct op fops[16] = {
1113	[FOP_TO_IDX(FOP_FMAC)]	= { vfp_double_fmac,  0 },
1114	[FOP_TO_IDX(FOP_FNMAC)]	= { vfp_double_fnmac, 0 },
1115	[FOP_TO_IDX(FOP_FMSC)]	= { vfp_double_fmsc,  0 },
1116	[FOP_TO_IDX(FOP_FNMSC)]	= { vfp_double_fnmsc, 0 },
1117	[FOP_TO_IDX(FOP_FMUL)]	= { vfp_double_fmul,  0 },
1118	[FOP_TO_IDX(FOP_FNMUL)]	= { vfp_double_fnmul, 0 },
1119	[FOP_TO_IDX(FOP_FADD)]	= { vfp_double_fadd,  0 },
1120	[FOP_TO_IDX(FOP_FSUB)]	= { vfp_double_fsub,  0 },
1121	[FOP_TO_IDX(FOP_FDIV)]	= { vfp_double_fdiv,  0 },
1122};
1123
1124#define FREG_BANK(x)	((x) & 0x0c)
1125#define FREG_IDX(x)	((x) & 3)
1126
1127u32 vfp_double_cpdo(u32 inst, u32 fpscr)
1128{
1129	u32 op = inst & FOP_MASK;
1130	u32 exceptions = 0;
1131	unsigned int dest;
1132	unsigned int dn = vfp_get_dn(inst);
1133	unsigned int dm;
1134	unsigned int vecitr, veclen, vecstride;
1135	struct op *fop;
1136
1137	vecstride = (1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK));
1138
1139	fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1140
1141	/*
1142	 * fcvtds takes an sN register number as destination, not dN.
1143	 * It also always operates on scalars.
1144	 */
1145	if (fop->flags & OP_SD)
1146		dest = vfp_get_sd(inst);
1147	else
1148		dest = vfp_get_dd(inst);
1149
1150	/*
1151	 * f[us]ito takes a sN operand, not a dN operand.
1152	 */
1153	if (fop->flags & OP_SM)
1154		dm = vfp_get_sm(inst);
1155	else
1156		dm = vfp_get_dm(inst);
1157
1158	/*
1159	 * If destination bank is zero, vector length is always '1'.
1160	 * ARM DDI0100F C5.1.3, C5.3.2.
1161	 */
1162	if ((fop->flags & OP_SCALAR) || (FREG_BANK(dest) == 0))
1163		veclen = 0;
1164	else
1165		veclen = fpscr & FPSCR_LENGTH_MASK;
1166
1167	pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1168		 (veclen >> FPSCR_LENGTH_BIT) + 1);
1169
1170	if (!fop->fn)
1171		goto invalid;
1172
1173	for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1174		u32 except;
1175		char type;
1176
1177		type = fop->flags & OP_SD ? 's' : 'd';
1178		if (op == FOP_EXT)
1179			pr_debug("VFP: itr%d (%c%u) = op[%u] (d%u)\n",
1180				 vecitr >> FPSCR_LENGTH_BIT,
1181				 type, dest, dn, dm);
1182		else
1183			pr_debug("VFP: itr%d (%c%u) = (d%u) op[%u] (d%u)\n",
1184				 vecitr >> FPSCR_LENGTH_BIT,
1185				 type, dest, dn, FOP_TO_IDX(op), dm);
1186
1187		except = fop->fn(dest, dn, dm, fpscr);
1188		pr_debug("VFP: itr%d: exceptions=%08x\n",
1189			 vecitr >> FPSCR_LENGTH_BIT, except);
1190
1191		exceptions |= except;
1192
1193		/*
1194		 * CHECK: It appears to be undefined whether we stop when
1195		 * we encounter an exception.  We continue.
1196		 */
1197		dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 3);
1198		dn = FREG_BANK(dn) + ((FREG_IDX(dn) + vecstride) & 3);
1199		if (FREG_BANK(dm) != 0)
1200			dm = FREG_BANK(dm) + ((FREG_IDX(dm) + vecstride) & 3);
1201	}
1202	return exceptions;
1203
1204 invalid:
1205	return ~0;
1206}
1207