root/arch/sh/kernel/cpu/sh4/softfloat.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. extractFloat64Frac
  2. extractFloat64Sign
  3. extractFloat64Exp
  4. extractFloat32Exp
  5. extractFloat32Sign
  6. extractFloat32Frac
  7. packFloat64
  8. shift64RightJamming
  9. countLeadingZeros32
  10. countLeadingZeros64
  11. normalizeRoundAndPackFloat64
  12. subFloat64Sigs
  13. addFloat64Sigs
  14. packFloat32
  15. shift32RightJamming
  16. roundAndPackFloat32
  17. normalizeRoundAndPackFloat32
  18. roundAndPackFloat64
  19. subFloat32Sigs
  20. addFloat32Sigs
  21. float64_sub
  22. float32_sub
  23. float32_add
  24. float64_add
  25. normalizeFloat64Subnormal
  26. add128
  27. sub128
  28. estimateDiv128To64
  29. mul64To128
  30. normalizeFloat32Subnormal
  31. float64_div
  32. float32_div
  33. float32_mul
  34. float64_mul
  35. float64_to_float32

   1 /*
   2  * Floating point emulation support for subnormalised numbers on SH4
   3  * architecture This file is derived from the SoftFloat IEC/IEEE
   4  * Floating-point Arithmetic Package, Release 2 the original license of
   5  * which is reproduced below.
   6  *
   7  * ========================================================================
   8  *
   9  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
  10  * Arithmetic Package, Release 2.
  11  *
  12  * Written by John R. Hauser.  This work was made possible in part by the
  13  * International Computer Science Institute, located at Suite 600, 1947 Center
  14  * Street, Berkeley, California 94704.  Funding was partially provided by the
  15  * National Science Foundation under grant MIP-9311980.  The original version
  16  * of this code was written as part of a project to build a fixed-point vector
  17  * processor in collaboration with the University of California at Berkeley,
  18  * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  19  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  20  * arithmetic/softfloat.html'.
  21  *
  22  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  23  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  24  * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  25  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  26  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  27  *
  28  * Derivative works are acceptable, even for commercial purposes, so long as
  29  * (1) they include prominent notice that the work is derivative, and (2) they
  30  * include prominent notice akin to these three paragraphs for those parts of
  31  * this code that are retained.
  32  *
  33  * ========================================================================
  34  *
  35  * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
  36  * and Kamel Khelifi <kamel.khelifi@st.com>
  37  */
  38 #include <linux/kernel.h>
  39 #include <cpu/fpu.h>
  40 #include <asm/div64.h>
  41 
  42 #define LIT64( a ) a##LL
  43 
  44 typedef char flag;
  45 typedef unsigned char uint8;
  46 typedef signed char int8;
  47 typedef int uint16;
  48 typedef int int16;
  49 typedef unsigned int uint32;
  50 typedef signed int int32;
  51 
  52 typedef unsigned long long int bits64;
  53 typedef signed long long int sbits64;
  54 
  55 typedef unsigned char bits8;
  56 typedef signed char sbits8;
  57 typedef unsigned short int bits16;
  58 typedef signed short int sbits16;
  59 typedef unsigned int bits32;
  60 typedef signed int sbits32;
  61 
  62 typedef unsigned long long int uint64;
  63 typedef signed long long int int64;
  64 
  65 typedef unsigned long int float32;
  66 typedef unsigned long long float64;
  67 
  68 extern void float_raise(unsigned int flags);    /* in fpu.c */
  69 extern int float_rounding_mode(void);   /* in fpu.c */
  70 
  71 bits64 extractFloat64Frac(float64 a);
  72 flag extractFloat64Sign(float64 a);
  73 int16 extractFloat64Exp(float64 a);
  74 int16 extractFloat32Exp(float32 a);
  75 flag extractFloat32Sign(float32 a);
  76 bits32 extractFloat32Frac(float32 a);
  77 float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
  78 void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
  79 float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
  80 void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
  81 float64 float64_sub(float64 a, float64 b);
  82 float32 float32_sub(float32 a, float32 b);
  83 float32 float32_add(float32 a, float32 b);
  84 float64 float64_add(float64 a, float64 b);
  85 float64 float64_div(float64 a, float64 b);
  86 float32 float32_div(float32 a, float32 b);
  87 float32 float32_mul(float32 a, float32 b);
  88 float64 float64_mul(float64 a, float64 b);
  89 float32 float64_to_float32(float64 a);
  90 void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
  91                    bits64 * z1Ptr);
  92 void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
  93                    bits64 * z1Ptr);
  94 void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
  95 
  96 static int8 countLeadingZeros32(bits32 a);
  97 static int8 countLeadingZeros64(bits64 a);
  98 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
  99                                             bits64 zSig);
 100 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
 101 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
 102 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
 103 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
 104                                             bits32 zSig);
 105 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
 106 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
 107 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
 108 static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
 109                                       bits64 * zSigPtr);
 110 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
 111 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
 112                                       bits32 * zSigPtr);
 113 
 114 bits64 extractFloat64Frac(float64 a)
 115 {
 116         return a & LIT64(0x000FFFFFFFFFFFFF);
 117 }
 118 
 119 flag extractFloat64Sign(float64 a)
 120 {
 121         return a >> 63;
 122 }
 123 
 124 int16 extractFloat64Exp(float64 a)
 125 {
 126         return (a >> 52) & 0x7FF;
 127 }
 128 
 129 int16 extractFloat32Exp(float32 a)
 130 {
 131         return (a >> 23) & 0xFF;
 132 }
 133 
 134 flag extractFloat32Sign(float32 a)
 135 {
 136         return a >> 31;
 137 }
 138 
 139 bits32 extractFloat32Frac(float32 a)
 140 {
 141         return a & 0x007FFFFF;
 142 }
 143 
 144 float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
 145 {
 146         return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
 147 }
 148 
 149 void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
 150 {
 151         bits64 z;
 152 
 153         if (count == 0) {
 154                 z = a;
 155         } else if (count < 64) {
 156                 z = (a >> count) | ((a << ((-count) & 63)) != 0);
 157         } else {
 158                 z = (a != 0);
 159         }
 160         *zPtr = z;
 161 }
 162 
 163 static int8 countLeadingZeros32(bits32 a)
 164 {
 165         static const int8 countLeadingZerosHigh[] = {
 166                 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
 167                 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 168                 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 169                 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 170                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 171                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 172                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 173                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 174                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 175                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 176                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 177                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 178                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 179                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 180                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 181                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 182         };
 183         int8 shiftCount;
 184 
 185         shiftCount = 0;
 186         if (a < 0x10000) {
 187                 shiftCount += 16;
 188                 a <<= 16;
 189         }
 190         if (a < 0x1000000) {
 191                 shiftCount += 8;
 192                 a <<= 8;
 193         }
 194         shiftCount += countLeadingZerosHigh[a >> 24];
 195         return shiftCount;
 196 
 197 }
 198 
 199 static int8 countLeadingZeros64(bits64 a)
 200 {
 201         int8 shiftCount;
 202 
 203         shiftCount = 0;
 204         if (a < ((bits64) 1) << 32) {
 205                 shiftCount += 32;
 206         } else {
 207                 a >>= 32;
 208         }
 209         shiftCount += countLeadingZeros32(a);
 210         return shiftCount;
 211 
 212 }
 213 
 214 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
 215 {
 216         int8 shiftCount;
 217 
 218         shiftCount = countLeadingZeros64(zSig) - 1;
 219         return roundAndPackFloat64(zSign, zExp - shiftCount,
 220                                    zSig << shiftCount);
 221 
 222 }
 223 
 224 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
 225 {
 226         int16 aExp, bExp, zExp;
 227         bits64 aSig, bSig, zSig;
 228         int16 expDiff;
 229 
 230         aSig = extractFloat64Frac(a);
 231         aExp = extractFloat64Exp(a);
 232         bSig = extractFloat64Frac(b);
 233         bExp = extractFloat64Exp(b);
 234         expDiff = aExp - bExp;
 235         aSig <<= 10;
 236         bSig <<= 10;
 237         if (0 < expDiff)
 238                 goto aExpBigger;
 239         if (expDiff < 0)
 240                 goto bExpBigger;
 241         if (aExp == 0) {
 242                 aExp = 1;
 243                 bExp = 1;
 244         }
 245         if (bSig < aSig)
 246                 goto aBigger;
 247         if (aSig < bSig)
 248                 goto bBigger;
 249         return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
 250       bExpBigger:
 251         if (bExp == 0x7FF) {
 252                 return packFloat64(zSign ^ 1, 0x7FF, 0);
 253         }
 254         if (aExp == 0) {
 255                 ++expDiff;
 256         } else {
 257                 aSig |= LIT64(0x4000000000000000);
 258         }
 259         shift64RightJamming(aSig, -expDiff, &aSig);
 260         bSig |= LIT64(0x4000000000000000);
 261       bBigger:
 262         zSig = bSig - aSig;
 263         zExp = bExp;
 264         zSign ^= 1;
 265         goto normalizeRoundAndPack;
 266       aExpBigger:
 267         if (aExp == 0x7FF) {
 268                 return a;
 269         }
 270         if (bExp == 0) {
 271                 --expDiff;
 272         } else {
 273                 bSig |= LIT64(0x4000000000000000);
 274         }
 275         shift64RightJamming(bSig, expDiff, &bSig);
 276         aSig |= LIT64(0x4000000000000000);
 277       aBigger:
 278         zSig = aSig - bSig;
 279         zExp = aExp;
 280       normalizeRoundAndPack:
 281         --zExp;
 282         return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
 283 
 284 }
 285 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
 286 {
 287         int16 aExp, bExp, zExp;
 288         bits64 aSig, bSig, zSig;
 289         int16 expDiff;
 290 
 291         aSig = extractFloat64Frac(a);
 292         aExp = extractFloat64Exp(a);
 293         bSig = extractFloat64Frac(b);
 294         bExp = extractFloat64Exp(b);
 295         expDiff = aExp - bExp;
 296         aSig <<= 9;
 297         bSig <<= 9;
 298         if (0 < expDiff) {
 299                 if (aExp == 0x7FF) {
 300                         return a;
 301                 }
 302                 if (bExp == 0) {
 303                         --expDiff;
 304                 } else {
 305                         bSig |= LIT64(0x2000000000000000);
 306                 }
 307                 shift64RightJamming(bSig, expDiff, &bSig);
 308                 zExp = aExp;
 309         } else if (expDiff < 0) {
 310                 if (bExp == 0x7FF) {
 311                         return packFloat64(zSign, 0x7FF, 0);
 312                 }
 313                 if (aExp == 0) {
 314                         ++expDiff;
 315                 } else {
 316                         aSig |= LIT64(0x2000000000000000);
 317                 }
 318                 shift64RightJamming(aSig, -expDiff, &aSig);
 319                 zExp = bExp;
 320         } else {
 321                 if (aExp == 0x7FF) {
 322                         return a;
 323                 }
 324                 if (aExp == 0)
 325                         return packFloat64(zSign, 0, (aSig + bSig) >> 9);
 326                 zSig = LIT64(0x4000000000000000) + aSig + bSig;
 327                 zExp = aExp;
 328                 goto roundAndPack;
 329         }
 330         aSig |= LIT64(0x2000000000000000);
 331         zSig = (aSig + bSig) << 1;
 332         --zExp;
 333         if ((sbits64) zSig < 0) {
 334                 zSig = aSig + bSig;
 335                 ++zExp;
 336         }
 337       roundAndPack:
 338         return roundAndPackFloat64(zSign, zExp, zSig);
 339 
 340 }
 341 
 342 float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
 343 {
 344         return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
 345 }
 346 
 347 void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
 348 {
 349         bits32 z;
 350         if (count == 0) {
 351                 z = a;
 352         } else if (count < 32) {
 353                 z = (a >> count) | ((a << ((-count) & 31)) != 0);
 354         } else {
 355                 z = (a != 0);
 356         }
 357         *zPtr = z;
 358 }
 359 
 360 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
 361 {
 362         flag roundNearestEven;
 363         int8 roundIncrement, roundBits;
 364         flag isTiny;
 365 
 366         /* SH4 has only 2 rounding modes - round to nearest and round to zero */
 367         roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
 368         roundIncrement = 0x40;
 369         if (!roundNearestEven) {
 370                 roundIncrement = 0;
 371         }
 372         roundBits = zSig & 0x7F;
 373         if (0xFD <= (bits16) zExp) {
 374                 if ((0xFD < zExp)
 375                     || ((zExp == 0xFD)
 376                         && ((sbits32) (zSig + roundIncrement) < 0))
 377                     ) {
 378                         float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
 379                         return packFloat32(zSign, 0xFF,
 380                                            0) - (roundIncrement == 0);
 381                 }
 382                 if (zExp < 0) {
 383                         isTiny = (zExp < -1)
 384                             || (zSig + roundIncrement < 0x80000000);
 385                         shift32RightJamming(zSig, -zExp, &zSig);
 386                         zExp = 0;
 387                         roundBits = zSig & 0x7F;
 388                         if (isTiny && roundBits)
 389                                 float_raise(FPSCR_CAUSE_UNDERFLOW);
 390                 }
 391         }
 392         if (roundBits)
 393                 float_raise(FPSCR_CAUSE_INEXACT);
 394         zSig = (zSig + roundIncrement) >> 7;
 395         zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
 396         if (zSig == 0)
 397                 zExp = 0;
 398         return packFloat32(zSign, zExp, zSig);
 399 
 400 }
 401 
 402 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
 403 {
 404         int8 shiftCount;
 405 
 406         shiftCount = countLeadingZeros32(zSig) - 1;
 407         return roundAndPackFloat32(zSign, zExp - shiftCount,
 408                                    zSig << shiftCount);
 409 }
 410 
 411 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
 412 {
 413         flag roundNearestEven;
 414         int16 roundIncrement, roundBits;
 415         flag isTiny;
 416 
 417         /* SH4 has only 2 rounding modes - round to nearest and round to zero */
 418         roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
 419         roundIncrement = 0x200;
 420         if (!roundNearestEven) {
 421                 roundIncrement = 0;
 422         }
 423         roundBits = zSig & 0x3FF;
 424         if (0x7FD <= (bits16) zExp) {
 425                 if ((0x7FD < zExp)
 426                     || ((zExp == 0x7FD)
 427                         && ((sbits64) (zSig + roundIncrement) < 0))
 428                     ) {
 429                         float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
 430                         return packFloat64(zSign, 0x7FF,
 431                                            0) - (roundIncrement == 0);
 432                 }
 433                 if (zExp < 0) {
 434                         isTiny = (zExp < -1)
 435                             || (zSig + roundIncrement <
 436                                 LIT64(0x8000000000000000));
 437                         shift64RightJamming(zSig, -zExp, &zSig);
 438                         zExp = 0;
 439                         roundBits = zSig & 0x3FF;
 440                         if (isTiny && roundBits)
 441                                 float_raise(FPSCR_CAUSE_UNDERFLOW);
 442                 }
 443         }
 444         if (roundBits)
 445                 float_raise(FPSCR_CAUSE_INEXACT);
 446         zSig = (zSig + roundIncrement) >> 10;
 447         zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
 448         if (zSig == 0)
 449                 zExp = 0;
 450         return packFloat64(zSign, zExp, zSig);
 451 
 452 }
 453 
 454 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
 455 {
 456         int16 aExp, bExp, zExp;
 457         bits32 aSig, bSig, zSig;
 458         int16 expDiff;
 459 
 460         aSig = extractFloat32Frac(a);
 461         aExp = extractFloat32Exp(a);
 462         bSig = extractFloat32Frac(b);
 463         bExp = extractFloat32Exp(b);
 464         expDiff = aExp - bExp;
 465         aSig <<= 7;
 466         bSig <<= 7;
 467         if (0 < expDiff)
 468                 goto aExpBigger;
 469         if (expDiff < 0)
 470                 goto bExpBigger;
 471         if (aExp == 0) {
 472                 aExp = 1;
 473                 bExp = 1;
 474         }
 475         if (bSig < aSig)
 476                 goto aBigger;
 477         if (aSig < bSig)
 478                 goto bBigger;
 479         return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
 480       bExpBigger:
 481         if (bExp == 0xFF) {
 482                 return packFloat32(zSign ^ 1, 0xFF, 0);
 483         }
 484         if (aExp == 0) {
 485                 ++expDiff;
 486         } else {
 487                 aSig |= 0x40000000;
 488         }
 489         shift32RightJamming(aSig, -expDiff, &aSig);
 490         bSig |= 0x40000000;
 491       bBigger:
 492         zSig = bSig - aSig;
 493         zExp = bExp;
 494         zSign ^= 1;
 495         goto normalizeRoundAndPack;
 496       aExpBigger:
 497         if (aExp == 0xFF) {
 498                 return a;
 499         }
 500         if (bExp == 0) {
 501                 --expDiff;
 502         } else {
 503                 bSig |= 0x40000000;
 504         }
 505         shift32RightJamming(bSig, expDiff, &bSig);
 506         aSig |= 0x40000000;
 507       aBigger:
 508         zSig = aSig - bSig;
 509         zExp = aExp;
 510       normalizeRoundAndPack:
 511         --zExp;
 512         return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
 513 
 514 }
 515 
 516 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
 517 {
 518         int16 aExp, bExp, zExp;
 519         bits32 aSig, bSig, zSig;
 520         int16 expDiff;
 521 
 522         aSig = extractFloat32Frac(a);
 523         aExp = extractFloat32Exp(a);
 524         bSig = extractFloat32Frac(b);
 525         bExp = extractFloat32Exp(b);
 526         expDiff = aExp - bExp;
 527         aSig <<= 6;
 528         bSig <<= 6;
 529         if (0 < expDiff) {
 530                 if (aExp == 0xFF) {
 531                         return a;
 532                 }
 533                 if (bExp == 0) {
 534                         --expDiff;
 535                 } else {
 536                         bSig |= 0x20000000;
 537                 }
 538                 shift32RightJamming(bSig, expDiff, &bSig);
 539                 zExp = aExp;
 540         } else if (expDiff < 0) {
 541                 if (bExp == 0xFF) {
 542                         return packFloat32(zSign, 0xFF, 0);
 543                 }
 544                 if (aExp == 0) {
 545                         ++expDiff;
 546                 } else {
 547                         aSig |= 0x20000000;
 548                 }
 549                 shift32RightJamming(aSig, -expDiff, &aSig);
 550                 zExp = bExp;
 551         } else {
 552                 if (aExp == 0xFF) {
 553                         return a;
 554                 }
 555                 if (aExp == 0)
 556                         return packFloat32(zSign, 0, (aSig + bSig) >> 6);
 557                 zSig = 0x40000000 + aSig + bSig;
 558                 zExp = aExp;
 559                 goto roundAndPack;
 560         }
 561         aSig |= 0x20000000;
 562         zSig = (aSig + bSig) << 1;
 563         --zExp;
 564         if ((sbits32) zSig < 0) {
 565                 zSig = aSig + bSig;
 566                 ++zExp;
 567         }
 568       roundAndPack:
 569         return roundAndPackFloat32(zSign, zExp, zSig);
 570 
 571 }
 572 
 573 float64 float64_sub(float64 a, float64 b)
 574 {
 575         flag aSign, bSign;
 576 
 577         aSign = extractFloat64Sign(a);
 578         bSign = extractFloat64Sign(b);
 579         if (aSign == bSign) {
 580                 return subFloat64Sigs(a, b, aSign);
 581         } else {
 582                 return addFloat64Sigs(a, b, aSign);
 583         }
 584 
 585 }
 586 
 587 float32 float32_sub(float32 a, float32 b)
 588 {
 589         flag aSign, bSign;
 590 
 591         aSign = extractFloat32Sign(a);
 592         bSign = extractFloat32Sign(b);
 593         if (aSign == bSign) {
 594                 return subFloat32Sigs(a, b, aSign);
 595         } else {
 596                 return addFloat32Sigs(a, b, aSign);
 597         }
 598 
 599 }
 600 
 601 float32 float32_add(float32 a, float32 b)
 602 {
 603         flag aSign, bSign;
 604 
 605         aSign = extractFloat32Sign(a);
 606         bSign = extractFloat32Sign(b);
 607         if (aSign == bSign) {
 608                 return addFloat32Sigs(a, b, aSign);
 609         } else {
 610                 return subFloat32Sigs(a, b, aSign);
 611         }
 612 
 613 }
 614 
 615 float64 float64_add(float64 a, float64 b)
 616 {
 617         flag aSign, bSign;
 618 
 619         aSign = extractFloat64Sign(a);
 620         bSign = extractFloat64Sign(b);
 621         if (aSign == bSign) {
 622                 return addFloat64Sigs(a, b, aSign);
 623         } else {
 624                 return subFloat64Sigs(a, b, aSign);
 625         }
 626 }
 627 
 628 static void
 629 normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
 630 {
 631         int8 shiftCount;
 632 
 633         shiftCount = countLeadingZeros64(aSig) - 11;
 634         *zSigPtr = aSig << shiftCount;
 635         *zExpPtr = 1 - shiftCount;
 636 }
 637 
 638 void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
 639                    bits64 * z1Ptr)
 640 {
 641         bits64 z1;
 642 
 643         z1 = a1 + b1;
 644         *z1Ptr = z1;
 645         *z0Ptr = a0 + b0 + (z1 < a1);
 646 }
 647 
 648 void
 649 sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
 650        bits64 * z1Ptr)
 651 {
 652         *z1Ptr = a1 - b1;
 653         *z0Ptr = a0 - b0 - (a1 < b1);
 654 }
 655 
 656 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
 657 {
 658         bits64 b0, b1;
 659         bits64 rem0, rem1, term0, term1;
 660         bits64 z, tmp;
 661         if (b <= a0)
 662                 return LIT64(0xFFFFFFFFFFFFFFFF);
 663         b0 = b >> 32;
 664         tmp = a0;
 665         do_div(tmp, b0);
 666 
 667         z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : tmp << 32;
 668         mul64To128(b, z, &term0, &term1);
 669         sub128(a0, a1, term0, term1, &rem0, &rem1);
 670         while (((sbits64) rem0) < 0) {
 671                 z -= LIT64(0x100000000);
 672                 b1 = b << 32;
 673                 add128(rem0, rem1, b0, b1, &rem0, &rem1);
 674         }
 675         rem0 = (rem0 << 32) | (rem1 >> 32);
 676         tmp = rem0;
 677         do_div(tmp, b0);
 678         z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : tmp;
 679         return z;
 680 }
 681 
 682 void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
 683 {
 684         bits32 aHigh, aLow, bHigh, bLow;
 685         bits64 z0, zMiddleA, zMiddleB, z1;
 686 
 687         aLow = a;
 688         aHigh = a >> 32;
 689         bLow = b;
 690         bHigh = b >> 32;
 691         z1 = ((bits64) aLow) * bLow;
 692         zMiddleA = ((bits64) aLow) * bHigh;
 693         zMiddleB = ((bits64) aHigh) * bLow;
 694         z0 = ((bits64) aHigh) * bHigh;
 695         zMiddleA += zMiddleB;
 696         z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
 697         zMiddleA <<= 32;
 698         z1 += zMiddleA;
 699         z0 += (z1 < zMiddleA);
 700         *z1Ptr = z1;
 701         *z0Ptr = z0;
 702 
 703 }
 704 
 705 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
 706                                       bits32 * zSigPtr)
 707 {
 708         int8 shiftCount;
 709 
 710         shiftCount = countLeadingZeros32(aSig) - 8;
 711         *zSigPtr = aSig << shiftCount;
 712         *zExpPtr = 1 - shiftCount;
 713 
 714 }
 715 
 716 float64 float64_div(float64 a, float64 b)
 717 {
 718         flag aSign, bSign, zSign;
 719         int16 aExp, bExp, zExp;
 720         bits64 aSig, bSig, zSig;
 721         bits64 rem0, rem1;
 722         bits64 term0, term1;
 723 
 724         aSig = extractFloat64Frac(a);
 725         aExp = extractFloat64Exp(a);
 726         aSign = extractFloat64Sign(a);
 727         bSig = extractFloat64Frac(b);
 728         bExp = extractFloat64Exp(b);
 729         bSign = extractFloat64Sign(b);
 730         zSign = aSign ^ bSign;
 731         if (aExp == 0x7FF) {
 732                 if (bExp == 0x7FF) {
 733                 }
 734                 return packFloat64(zSign, 0x7FF, 0);
 735         }
 736         if (bExp == 0x7FF) {
 737                 return packFloat64(zSign, 0, 0);
 738         }
 739         if (bExp == 0) {
 740                 if (bSig == 0) {
 741                         if ((aExp | aSig) == 0) {
 742                                 float_raise(FPSCR_CAUSE_INVALID);
 743                         }
 744                         return packFloat64(zSign, 0x7FF, 0);
 745                 }
 746                 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
 747         }
 748         if (aExp == 0) {
 749                 if (aSig == 0)
 750                         return packFloat64(zSign, 0, 0);
 751                 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
 752         }
 753         zExp = aExp - bExp + 0x3FD;
 754         aSig = (aSig | LIT64(0x0010000000000000)) << 10;
 755         bSig = (bSig | LIT64(0x0010000000000000)) << 11;
 756         if (bSig <= (aSig + aSig)) {
 757                 aSig >>= 1;
 758                 ++zExp;
 759         }
 760         zSig = estimateDiv128To64(aSig, 0, bSig);
 761         if ((zSig & 0x1FF) <= 2) {
 762                 mul64To128(bSig, zSig, &term0, &term1);
 763                 sub128(aSig, 0, term0, term1, &rem0, &rem1);
 764                 while ((sbits64) rem0 < 0) {
 765                         --zSig;
 766                         add128(rem0, rem1, 0, bSig, &rem0, &rem1);
 767                 }
 768                 zSig |= (rem1 != 0);
 769         }
 770         return roundAndPackFloat64(zSign, zExp, zSig);
 771 
 772 }
 773 
 774 float32 float32_div(float32 a, float32 b)
 775 {
 776         flag aSign, bSign, zSign;
 777         int16 aExp, bExp, zExp;
 778         bits32 aSig, bSig;
 779         uint64_t zSig;
 780 
 781         aSig = extractFloat32Frac(a);
 782         aExp = extractFloat32Exp(a);
 783         aSign = extractFloat32Sign(a);
 784         bSig = extractFloat32Frac(b);
 785         bExp = extractFloat32Exp(b);
 786         bSign = extractFloat32Sign(b);
 787         zSign = aSign ^ bSign;
 788         if (aExp == 0xFF) {
 789                 if (bExp == 0xFF) {
 790                 }
 791                 return packFloat32(zSign, 0xFF, 0);
 792         }
 793         if (bExp == 0xFF) {
 794                 return packFloat32(zSign, 0, 0);
 795         }
 796         if (bExp == 0) {
 797                 if (bSig == 0) {
 798                         return packFloat32(zSign, 0xFF, 0);
 799                 }
 800                 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
 801         }
 802         if (aExp == 0) {
 803                 if (aSig == 0)
 804                         return packFloat32(zSign, 0, 0);
 805                 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
 806         }
 807         zExp = aExp - bExp + 0x7D;
 808         aSig = (aSig | 0x00800000) << 7;
 809         bSig = (bSig | 0x00800000) << 8;
 810         if (bSig <= (aSig + aSig)) {
 811                 aSig >>= 1;
 812                 ++zExp;
 813         }
 814         zSig = (((bits64) aSig) << 32);
 815         do_div(zSig, bSig);
 816 
 817         if ((zSig & 0x3F) == 0) {
 818                 zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
 819         }
 820         return roundAndPackFloat32(zSign, zExp, (bits32)zSig);
 821 
 822 }
 823 
 824 float32 float32_mul(float32 a, float32 b)
 825 {
 826         char aSign, bSign, zSign;
 827         int aExp, bExp, zExp;
 828         unsigned int aSig, bSig;
 829         unsigned long long zSig64;
 830         unsigned int zSig;
 831 
 832         aSig = extractFloat32Frac(a);
 833         aExp = extractFloat32Exp(a);
 834         aSign = extractFloat32Sign(a);
 835         bSig = extractFloat32Frac(b);
 836         bExp = extractFloat32Exp(b);
 837         bSign = extractFloat32Sign(b);
 838         zSign = aSign ^ bSign;
 839         if (aExp == 0) {
 840                 if (aSig == 0)
 841                         return packFloat32(zSign, 0, 0);
 842                 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
 843         }
 844         if (bExp == 0) {
 845                 if (bSig == 0)
 846                         return packFloat32(zSign, 0, 0);
 847                 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
 848         }
 849         if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
 850                 return roundAndPackFloat32(zSign, 0xff, 0);
 851 
 852         zExp = aExp + bExp - 0x7F;
 853         aSig = (aSig | 0x00800000) << 7;
 854         bSig = (bSig | 0x00800000) << 8;
 855         shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
 856         zSig = zSig64;
 857         if (0 <= (signed int)(zSig << 1)) {
 858                 zSig <<= 1;
 859                 --zExp;
 860         }
 861         return roundAndPackFloat32(zSign, zExp, zSig);
 862 
 863 }
 864 
 865 float64 float64_mul(float64 a, float64 b)
 866 {
 867         char aSign, bSign, zSign;
 868         int aExp, bExp, zExp;
 869         unsigned long long int aSig, bSig, zSig0, zSig1;
 870 
 871         aSig = extractFloat64Frac(a);
 872         aExp = extractFloat64Exp(a);
 873         aSign = extractFloat64Sign(a);
 874         bSig = extractFloat64Frac(b);
 875         bExp = extractFloat64Exp(b);
 876         bSign = extractFloat64Sign(b);
 877         zSign = aSign ^ bSign;
 878 
 879         if (aExp == 0) {
 880                 if (aSig == 0)
 881                         return packFloat64(zSign, 0, 0);
 882                 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
 883         }
 884         if (bExp == 0) {
 885                 if (bSig == 0)
 886                         return packFloat64(zSign, 0, 0);
 887                 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
 888         }
 889         if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
 890                 return roundAndPackFloat64(zSign, 0x7ff, 0);
 891 
 892         zExp = aExp + bExp - 0x3FF;
 893         aSig = (aSig | 0x0010000000000000LL) << 10;
 894         bSig = (bSig | 0x0010000000000000LL) << 11;
 895         mul64To128(aSig, bSig, &zSig0, &zSig1);
 896         zSig0 |= (zSig1 != 0);
 897         if (0 <= (signed long long int)(zSig0 << 1)) {
 898                 zSig0 <<= 1;
 899                 --zExp;
 900         }
 901         return roundAndPackFloat64(zSign, zExp, zSig0);
 902 }
 903 
 904 /*
 905  * -------------------------------------------------------------------------------
 906  *  Returns the result of converting the double-precision floating-point value
 907  *  `a' to the single-precision floating-point format.  The conversion is
 908  *  performed according to the IEC/IEEE Standard for Binary Floating-point
 909  *  Arithmetic.
 910  *  -------------------------------------------------------------------------------
 911  *  */
 912 float32 float64_to_float32(float64 a)
 913 {
 914     flag aSign;
 915     int16 aExp;
 916     bits64 aSig;
 917     bits32 zSig;
 918 
 919     aSig = extractFloat64Frac( a );
 920     aExp = extractFloat64Exp( a );
 921     aSign = extractFloat64Sign( a );
 922 
 923     shift64RightJamming( aSig, 22, &aSig );
 924     zSig = aSig;
 925     if ( aExp || zSig ) {
 926         zSig |= 0x40000000;
 927         aExp -= 0x381;
 928     }
 929     return roundAndPackFloat32(aSign, aExp, zSig);
 930 }

/* [<][>][^][v][top][bottom][index][help] */