1/* 2 * IEEE754 floating point arithmetic 3 * double precision: MADDF.f (Fused Multiply Add) 4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) 5 * 6 * MIPS floating point support 7 * Copyright (C) 2015 Imagination Technologies, Ltd. 8 * Author: Markos Chandras <markos.chandras@imgtec.com> 9 * 10 * This program is free software; you can distribute it and/or modify it 11 * under the terms of the GNU General Public License as published by the 12 * Free Software Foundation; version 2 of the License. 13 */ 14 15#include "ieee754dp.h" 16 17union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, 18 union ieee754dp y) 19{ 20 int re; 21 int rs; 22 u64 rm; 23 unsigned lxm; 24 unsigned hxm; 25 unsigned lym; 26 unsigned hym; 27 u64 lrm; 28 u64 hrm; 29 u64 t; 30 u64 at; 31 int s; 32 33 COMPXDP; 34 COMPYDP; 35 36 u64 zm; int ze; int zs __maybe_unused; int zc; 37 38 EXPLODEXDP; 39 EXPLODEYDP; 40 EXPLODEDP(z, zc, zs, ze, zm) 41 42 FLUSHXDP; 43 FLUSHYDP; 44 FLUSHDP(z, zc, zs, ze, zm); 45 46 ieee754_clearcx(); 47 48 switch (zc) { 49 case IEEE754_CLASS_SNAN: 50 ieee754_setcx(IEEE754_INVALID_OPERATION); 51 return ieee754dp_nanxcpt(z); 52 case IEEE754_CLASS_DNORM: 53 DPDNORMx(zm, ze); 54 /* QNAN is handled separately below */ 55 } 56 57 switch (CLPAIR(xc, yc)) { 58 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): 59 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): 60 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): 61 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): 62 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): 63 return ieee754dp_nanxcpt(y); 64 65 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): 66 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): 67 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): 68 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): 69 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): 70 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): 71 return ieee754dp_nanxcpt(x); 72 73 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): 74 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): 75 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): 76 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): 77 return y; 78 79 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): 80 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): 81 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): 82 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): 83 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): 84 return x; 85 86 87 /* 88 * Infinity handling 89 */ 90 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 91 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 92 if (zc == IEEE754_CLASS_QNAN) 93 return z; 94 ieee754_setcx(IEEE754_INVALID_OPERATION); 95 return ieee754dp_indef(); 96 97 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 98 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 99 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 100 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 101 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 102 if (zc == IEEE754_CLASS_QNAN) 103 return z; 104 return ieee754dp_inf(xs ^ ys); 105 106 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 107 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 108 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 109 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 110 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 111 if (zc == IEEE754_CLASS_INF) 112 return ieee754dp_inf(zs); 113 /* Multiplication is 0 so just return z */ 114 return z; 115 116 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 117 DPDNORMX; 118 119 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 120 if (zc == IEEE754_CLASS_QNAN) 121 return z; 122 else if (zc == IEEE754_CLASS_INF) 123 return ieee754dp_inf(zs); 124 DPDNORMY; 125 break; 126 127 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 128 if (zc == IEEE754_CLASS_QNAN) 129 return z; 130 else if (zc == IEEE754_CLASS_INF) 131 return ieee754dp_inf(zs); 132 DPDNORMX; 133 break; 134 135 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 136 if (zc == IEEE754_CLASS_QNAN) 137 return z; 138 else if (zc == IEEE754_CLASS_INF) 139 return ieee754dp_inf(zs); 140 /* fall through to real computations */ 141 } 142 143 /* Finally get to do some computation */ 144 145 /* 146 * Do the multiplication bit first 147 * 148 * rm = xm * ym, re = xe + ye basically 149 * 150 * At this point xm and ym should have been normalized. 151 */ 152 assert(xm & DP_HIDDEN_BIT); 153 assert(ym & DP_HIDDEN_BIT); 154 155 re = xe + ye; 156 rs = xs ^ ys; 157 158 /* shunt to top of word */ 159 xm <<= 64 - (DP_FBITS + 1); 160 ym <<= 64 - (DP_FBITS + 1); 161 162 /* 163 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. 164 */ 165 166 /* 32 * 32 => 64 */ 167#define DPXMULT(x, y) ((u64)(x) * (u64)y) 168 169 lxm = xm; 170 hxm = xm >> 32; 171 lym = ym; 172 hym = ym >> 32; 173 174 lrm = DPXMULT(lxm, lym); 175 hrm = DPXMULT(hxm, hym); 176 177 t = DPXMULT(lxm, hym); 178 179 at = lrm + (t << 32); 180 hrm += at < lrm; 181 lrm = at; 182 183 hrm = hrm + (t >> 32); 184 185 t = DPXMULT(hxm, lym); 186 187 at = lrm + (t << 32); 188 hrm += at < lrm; 189 lrm = at; 190 191 hrm = hrm + (t >> 32); 192 193 rm = hrm | (lrm != 0); 194 195 /* 196 * Sticky shift down to normal rounding precision. 197 */ 198 if ((s64) rm < 0) { 199 rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | 200 ((rm << (DP_FBITS + 1 + 3)) != 0); 201 re++; 202 } else { 203 rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | 204 ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); 205 } 206 assert(rm & (DP_HIDDEN_BIT << 3)); 207 208 /* And now the addition */ 209 assert(zm & DP_HIDDEN_BIT); 210 211 /* 212 * Provide guard,round and stick bit space. 213 */ 214 zm <<= 3; 215 216 if (ze > re) { 217 /* 218 * Have to shift y fraction right to align. 219 */ 220 s = ze - re; 221 rm = XDPSRS(rm, s); 222 re += s; 223 } else if (re > ze) { 224 /* 225 * Have to shift x fraction right to align. 226 */ 227 s = re - ze; 228 zm = XDPSRS(zm, s); 229 ze += s; 230 } 231 assert(ze == re); 232 assert(ze <= DP_EMAX); 233 234 if (zs == rs) { 235 /* 236 * Generate 28 bit result of adding two 27 bit numbers 237 * leaving result in xm, xs and xe. 238 */ 239 zm = zm + rm; 240 241 if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */ 242 zm = XDPSRS1(zm); 243 ze++; 244 } 245 } else { 246 if (zm >= rm) { 247 zm = zm - rm; 248 } else { 249 zm = rm - zm; 250 zs = rs; 251 } 252 if (zm == 0) 253 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 254 255 /* 256 * Normalize to rounding precision. 257 */ 258 while ((zm >> (DP_FBITS + 3)) == 0) { 259 zm <<= 1; 260 ze--; 261 } 262 } 263 264 return ieee754dp_format(zs, ze, zm); 265} 266