1/*
2 * IEEE754 floating point arithmetic
3 * single precision: MSUB.f (Fused Multiply Subtract)
4 * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft])
5 *
6 * MIPS floating point support
7 * Copyright (C) 2015 Imagination Technologies, Ltd.
8 * Author: Markos Chandras <markos.chandras@imgtec.com>
9 *
10 *  This program is free software; you can distribute it and/or modify it
11 *  under the terms of the GNU General Public License as published by the
12 *  Free Software Foundation; version 2 of the License.
13 */
14
15#include "ieee754sp.h"
16
17union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
18				union ieee754sp y)
19{
20	int re;
21	int rs;
22	unsigned rm;
23	unsigned short lxm;
24	unsigned short hxm;
25	unsigned short lym;
26	unsigned short hym;
27	unsigned lrm;
28	unsigned hrm;
29	unsigned t;
30	unsigned at;
31	int s;
32
33	COMPXSP;
34	COMPYSP;
35	u32 zm; int ze; int zs __maybe_unused; int zc;
36
37	EXPLODEXSP;
38	EXPLODEYSP;
39	EXPLODESP(z, zc, zs, ze, zm)
40
41	FLUSHXSP;
42	FLUSHYSP;
43	FLUSHSP(z, zc, zs, ze, zm);
44
45	ieee754_clearcx();
46
47	switch (zc) {
48	case IEEE754_CLASS_SNAN:
49		ieee754_setcx(IEEE754_INVALID_OPERATION);
50		return ieee754sp_nanxcpt(z);
51	case IEEE754_CLASS_DNORM:
52		SPDNORMx(zm, ze);
53	/* QNAN is handled separately below */
54	}
55
56	switch (CLPAIR(xc, yc)) {
57	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
58	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
59	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
60	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
61	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
62		return ieee754sp_nanxcpt(y);
63
64	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
65	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
66	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
67	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
68	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
69	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
70		return ieee754sp_nanxcpt(x);
71
72	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
73	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
74	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
75	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
76		return y;
77
78	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
79	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
80	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
81	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
82	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
83		return x;
84
85	/*
86	 * Infinity handling
87	 */
88	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
89	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
90		if (zc == IEEE754_CLASS_QNAN)
91			return z;
92		ieee754_setcx(IEEE754_INVALID_OPERATION);
93		return ieee754sp_indef();
94
95	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
96	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
97	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
98	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
99	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
100		if (zc == IEEE754_CLASS_QNAN)
101			return z;
102		return ieee754sp_inf(xs ^ ys);
103
104	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
105	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
106	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
107	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
108	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
109		if (zc == IEEE754_CLASS_INF)
110			return ieee754sp_inf(zs);
111		/* Multiplication is 0 so just return z */
112		return z;
113
114	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
115		SPDNORMX;
116
117	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
118		if (zc == IEEE754_CLASS_QNAN)
119			return z;
120		else if (zc == IEEE754_CLASS_INF)
121			return ieee754sp_inf(zs);
122		SPDNORMY;
123		break;
124
125	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
126		if (zc == IEEE754_CLASS_QNAN)
127			return z;
128		else if (zc == IEEE754_CLASS_INF)
129			return ieee754sp_inf(zs);
130		SPDNORMX;
131		break;
132
133	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
134		if (zc == IEEE754_CLASS_QNAN)
135			return z;
136		else if (zc == IEEE754_CLASS_INF)
137			return ieee754sp_inf(zs);
138		/* fall through to real compuation */
139	}
140
141	/* Finally get to do some computation */
142
143	/*
144	 * Do the multiplication bit first
145	 *
146	 * rm = xm * ym, re = xe + ye basically
147	 *
148	 * At this point xm and ym should have been normalized.
149	 */
150
151	/* rm = xm * ym, re = xe+ye basically */
152	assert(xm & SP_HIDDEN_BIT);
153	assert(ym & SP_HIDDEN_BIT);
154
155	re = xe + ye;
156	rs = xs ^ ys;
157
158	/* shunt to top of word */
159	xm <<= 32 - (SP_FBITS + 1);
160	ym <<= 32 - (SP_FBITS + 1);
161
162	/*
163	 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
164	 */
165	lxm = xm & 0xffff;
166	hxm = xm >> 16;
167	lym = ym & 0xffff;
168	hym = ym >> 16;
169
170	lrm = lxm * lym;	/* 16 * 16 => 32 */
171	hrm = hxm * hym;	/* 16 * 16 => 32 */
172
173	t = lxm * hym; /* 16 * 16 => 32 */
174	at = lrm + (t << 16);
175	hrm += at < lrm;
176	lrm = at;
177	hrm = hrm + (t >> 16);
178
179	t = hxm * lym; /* 16 * 16 => 32 */
180	at = lrm + (t << 16);
181	hrm += at < lrm;
182	lrm = at;
183	hrm = hrm + (t >> 16);
184
185	rm = hrm | (lrm != 0);
186
187	/*
188	 * Sticky shift down to normal rounding precision.
189	 */
190	if ((int) rm < 0) {
191		rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
192		    ((rm << (SP_FBITS + 1 + 3)) != 0);
193		re++;
194	} else {
195		rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
196		     ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
197	}
198	assert(rm & (SP_HIDDEN_BIT << 3));
199
200	/* And now the subtraction */
201
202	/* Flip sign of r and handle as add */
203	rs ^= 1;
204
205	assert(zm & SP_HIDDEN_BIT);
206
207	/*
208	 * Provide guard,round and stick bit space.
209	 */
210	zm <<= 3;
211
212	if (ze > re) {
213		/*
214		 * Have to shift y fraction right to align.
215		 */
216		s = ze - re;
217		SPXSRSYn(s);
218	} else if (re > ze) {
219		/*
220		 * Have to shift x fraction right to align.
221		 */
222		s = re - ze;
223		SPXSRSYn(s);
224	}
225	assert(ze == re);
226	assert(ze <= SP_EMAX);
227
228	if (zs == rs) {
229		/*
230		 * Generate 28 bit result of adding two 27 bit numbers
231		 * leaving result in zm, zs and ze.
232		 */
233		zm = zm + rm;
234
235		if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
236			SPXSRSX1(); /* shift preserving sticky */
237		}
238	} else {
239		if (zm >= rm) {
240			zm = zm - rm;
241		} else {
242			zm = rm - zm;
243			zs = rs;
244		}
245		if (zm == 0)
246			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
247
248		/*
249		 * Normalize in extended single precision
250		 */
251		while ((zm >> (SP_MBITS + 3)) == 0) {
252			zm <<= 1;
253			ze--;
254		}
255
256	}
257	return ieee754sp_format(zs, ze, zm);
258}
259