1/* 2 * IEEE754 floating point arithmetic 3 * single precision: MSUB.f (Fused Multiply Subtract) 4 * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft]) 5 * 6 * MIPS floating point support 7 * Copyright (C) 2015 Imagination Technologies, Ltd. 8 * Author: Markos Chandras <markos.chandras@imgtec.com> 9 * 10 * This program is free software; you can distribute it and/or modify it 11 * under the terms of the GNU General Public License as published by the 12 * Free Software Foundation; version 2 of the License. 13 */ 14 15#include "ieee754sp.h" 16 17union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, 18 union ieee754sp y) 19{ 20 int re; 21 int rs; 22 unsigned rm; 23 unsigned short lxm; 24 unsigned short hxm; 25 unsigned short lym; 26 unsigned short hym; 27 unsigned lrm; 28 unsigned hrm; 29 unsigned t; 30 unsigned at; 31 int s; 32 33 COMPXSP; 34 COMPYSP; 35 u32 zm; int ze; int zs __maybe_unused; int zc; 36 37 EXPLODEXSP; 38 EXPLODEYSP; 39 EXPLODESP(z, zc, zs, ze, zm) 40 41 FLUSHXSP; 42 FLUSHYSP; 43 FLUSHSP(z, zc, zs, ze, zm); 44 45 ieee754_clearcx(); 46 47 switch (zc) { 48 case IEEE754_CLASS_SNAN: 49 ieee754_setcx(IEEE754_INVALID_OPERATION); 50 return ieee754sp_nanxcpt(z); 51 case IEEE754_CLASS_DNORM: 52 SPDNORMx(zm, ze); 53 /* QNAN is handled separately below */ 54 } 55 56 switch (CLPAIR(xc, yc)) { 57 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): 58 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): 59 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): 60 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): 61 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): 62 return ieee754sp_nanxcpt(y); 63 64 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): 65 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): 66 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): 67 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): 68 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): 69 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): 70 return ieee754sp_nanxcpt(x); 71 72 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): 73 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): 74 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): 75 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): 76 return y; 77 78 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): 79 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): 80 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): 81 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): 82 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): 83 return x; 84 85 /* 86 * Infinity handling 87 */ 88 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 89 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 90 if (zc == IEEE754_CLASS_QNAN) 91 return z; 92 ieee754_setcx(IEEE754_INVALID_OPERATION); 93 return ieee754sp_indef(); 94 95 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 96 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 97 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 98 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 99 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 100 if (zc == IEEE754_CLASS_QNAN) 101 return z; 102 return ieee754sp_inf(xs ^ ys); 103 104 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 105 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 106 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 107 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 108 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 109 if (zc == IEEE754_CLASS_INF) 110 return ieee754sp_inf(zs); 111 /* Multiplication is 0 so just return z */ 112 return z; 113 114 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 115 SPDNORMX; 116 117 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 118 if (zc == IEEE754_CLASS_QNAN) 119 return z; 120 else if (zc == IEEE754_CLASS_INF) 121 return ieee754sp_inf(zs); 122 SPDNORMY; 123 break; 124 125 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 126 if (zc == IEEE754_CLASS_QNAN) 127 return z; 128 else if (zc == IEEE754_CLASS_INF) 129 return ieee754sp_inf(zs); 130 SPDNORMX; 131 break; 132 133 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 134 if (zc == IEEE754_CLASS_QNAN) 135 return z; 136 else if (zc == IEEE754_CLASS_INF) 137 return ieee754sp_inf(zs); 138 /* fall through to real compuation */ 139 } 140 141 /* Finally get to do some computation */ 142 143 /* 144 * Do the multiplication bit first 145 * 146 * rm = xm * ym, re = xe + ye basically 147 * 148 * At this point xm and ym should have been normalized. 149 */ 150 151 /* rm = xm * ym, re = xe+ye basically */ 152 assert(xm & SP_HIDDEN_BIT); 153 assert(ym & SP_HIDDEN_BIT); 154 155 re = xe + ye; 156 rs = xs ^ ys; 157 158 /* shunt to top of word */ 159 xm <<= 32 - (SP_FBITS + 1); 160 ym <<= 32 - (SP_FBITS + 1); 161 162 /* 163 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. 164 */ 165 lxm = xm & 0xffff; 166 hxm = xm >> 16; 167 lym = ym & 0xffff; 168 hym = ym >> 16; 169 170 lrm = lxm * lym; /* 16 * 16 => 32 */ 171 hrm = hxm * hym; /* 16 * 16 => 32 */ 172 173 t = lxm * hym; /* 16 * 16 => 32 */ 174 at = lrm + (t << 16); 175 hrm += at < lrm; 176 lrm = at; 177 hrm = hrm + (t >> 16); 178 179 t = hxm * lym; /* 16 * 16 => 32 */ 180 at = lrm + (t << 16); 181 hrm += at < lrm; 182 lrm = at; 183 hrm = hrm + (t >> 16); 184 185 rm = hrm | (lrm != 0); 186 187 /* 188 * Sticky shift down to normal rounding precision. 189 */ 190 if ((int) rm < 0) { 191 rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | 192 ((rm << (SP_FBITS + 1 + 3)) != 0); 193 re++; 194 } else { 195 rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | 196 ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); 197 } 198 assert(rm & (SP_HIDDEN_BIT << 3)); 199 200 /* And now the subtraction */ 201 202 /* Flip sign of r and handle as add */ 203 rs ^= 1; 204 205 assert(zm & SP_HIDDEN_BIT); 206 207 /* 208 * Provide guard,round and stick bit space. 209 */ 210 zm <<= 3; 211 212 if (ze > re) { 213 /* 214 * Have to shift y fraction right to align. 215 */ 216 s = ze - re; 217 SPXSRSYn(s); 218 } else if (re > ze) { 219 /* 220 * Have to shift x fraction right to align. 221 */ 222 s = re - ze; 223 SPXSRSYn(s); 224 } 225 assert(ze == re); 226 assert(ze <= SP_EMAX); 227 228 if (zs == rs) { 229 /* 230 * Generate 28 bit result of adding two 27 bit numbers 231 * leaving result in zm, zs and ze. 232 */ 233 zm = zm + rm; 234 235 if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ 236 SPXSRSX1(); /* shift preserving sticky */ 237 } 238 } else { 239 if (zm >= rm) { 240 zm = zm - rm; 241 } else { 242 zm = rm - zm; 243 zs = rs; 244 } 245 if (zm == 0) 246 return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); 247 248 /* 249 * Normalize in extended single precision 250 */ 251 while ((zm >> (SP_MBITS + 3)) == 0) { 252 zm <<= 1; 253 ze--; 254 } 255 256 } 257 return ieee754sp_format(zs, ze, zm); 258} 259