root/arch/m68k/fpsp040/stwotox.S

/* [<][>][^][v][top][bottom][index][help] */
   1 |
   2 |       stwotox.sa 3.1 12/10/90
   3 |
   4 |       stwotox  --- 2**X
   5 |       stwotoxd --- 2**X for denormalized X
   6 |       stentox  --- 10**X
   7 |       stentoxd --- 10**X for denormalized X
   8 |
   9 |       Input: Double-extended number X in location pointed to
  10 |               by address register a0.
  11 |
  12 |       Output: The function values are returned in Fp0.
  13 |
  14 |       Accuracy and Monotonicity: The returned result is within 2 ulps in
  15 |               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
  16 |               result is subsequently rounded to double precision. The
  17 |               result is provably monotonic in double precision.
  18 |
  19 |       Speed: The program stwotox takes approximately 190 cycles and the
  20 |               program stentox takes approximately 200 cycles.
  21 |
  22 |       Algorithm:
  23 |
  24 |       twotox
  25 |       1. If |X| > 16480, go to ExpBig.
  26 |
  27 |       2. If |X| < 2**(-70), go to ExpSm.
  28 |
  29 |       3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
  30 |               decompose N as
  31 |                N = 64(M + M') + j,  j = 0,1,2,...,63.
  32 |
  33 |       4. Overwrite r := r * log2. Then
  34 |               2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
  35 |               Go to expr to compute that expression.
  36 |
  37 |       tentox
  38 |       1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
  39 |
  40 |       2. If |X| < 2**(-70), go to ExpSm.
  41 |
  42 |       3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
  43 |               N := round-to-int(y). Decompose N as
  44 |                N = 64(M + M') + j,  j = 0,1,2,...,63.
  45 |
  46 |       4. Define r as
  47 |               r := ((X - N*L1)-N*L2) * L10
  48 |               where L1, L2 are the leading and trailing parts of log_10(2)/64
  49 |               and L10 is the natural log of 10. Then
  50 |               10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
  51 |               Go to expr to compute that expression.
  52 |
  53 |       expr
  54 |       1. Fetch 2**(j/64) from table as Fact1 and Fact2.
  55 |
  56 |       2. Overwrite Fact1 and Fact2 by
  57 |               Fact1 := 2**(M) * Fact1
  58 |               Fact2 := 2**(M) * Fact2
  59 |               Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
  60 |
  61 |       3. Calculate P where 1 + P approximates exp(r):
  62 |               P = r + r*r*(A1+r*(A2+...+r*A5)).
  63 |
  64 |       4. Let AdjFact := 2**(M'). Return
  65 |               AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
  66 |               Exit.
  67 |
  68 |       ExpBig
  69 |       1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
  70 |               underflow by Tiny * Tiny.
  71 |
  72 |       ExpSm
  73 |       1. Return 1 + X.
  74 |
  75 
  76 |               Copyright (C) Motorola, Inc. 1990
  77 |                       All Rights Reserved
  78 |
  79 |       For details on the license for this file, please see the
  80 |       file, README, in this same directory.
  81 
  82 |STWOTOX        idnt    2,1 | Motorola 040 Floating Point Software Package
  83 
  84         |section        8
  85 
  86 #include "fpsp.h"
  87 
  88 BOUNDS1:        .long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
  89 BOUNDS2:        .long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
  90 
  91 L2TEN64:        .long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
  92 L10TWO1:        .long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
  93 
  94 L10TWO2:        .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
  95 
  96 LOG10:  .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
  97 
  98 LOG2:   .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
  99 
 100 EXPA5:  .long 0x3F56C16D,0x6F7BD0B2
 101 EXPA4:  .long 0x3F811112,0x302C712C
 102 EXPA3:  .long 0x3FA55555,0x55554CC1
 103 EXPA2:  .long 0x3FC55555,0x55554A54
 104 EXPA1:  .long 0x3FE00000,0x00000000,0x00000000,0x00000000
 105 
 106 HUGE:   .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
 107 TINY:   .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
 108 
 109 EXPTBL:
 110         .long  0x3FFF0000,0x80000000,0x00000000,0x3F738000
 111         .long  0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
 112         .long  0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
 113         .long  0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
 114         .long  0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
 115         .long  0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
 116         .long  0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
 117         .long  0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
 118         .long  0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
 119         .long  0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
 120         .long  0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
 121         .long  0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
 122         .long  0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
 123         .long  0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
 124         .long  0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
 125         .long  0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
 126         .long  0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
 127         .long  0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
 128         .long  0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
 129         .long  0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
 130         .long  0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
 131         .long  0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
 132         .long  0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
 133         .long  0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
 134         .long  0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
 135         .long  0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
 136         .long  0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
 137         .long  0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
 138         .long  0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
 139         .long  0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
 140         .long  0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
 141         .long  0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
 142         .long  0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
 143         .long  0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
 144         .long  0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
 145         .long  0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
 146         .long  0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
 147         .long  0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
 148         .long  0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
 149         .long  0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
 150         .long  0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
 151         .long  0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
 152         .long  0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
 153         .long  0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
 154         .long  0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
 155         .long  0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
 156         .long  0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
 157         .long  0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
 158         .long  0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
 159         .long  0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
 160         .long  0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
 161         .long  0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
 162         .long  0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
 163         .long  0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
 164         .long  0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
 165         .long  0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
 166         .long  0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
 167         .long  0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
 168         .long  0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
 169         .long  0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
 170         .long  0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
 171         .long  0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
 172         .long  0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
 173         .long  0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
 174 
 175         .set    N,L_SCR1
 176 
 177         .set    X,FP_SCR1
 178         .set    XDCARE,X+2
 179         .set    XFRAC,X+4
 180 
 181         .set    ADJFACT,FP_SCR2
 182 
 183         .set    FACT1,FP_SCR3
 184         .set    FACT1HI,FACT1+4
 185         .set    FACT1LOW,FACT1+8
 186 
 187         .set    FACT2,FP_SCR4
 188         .set    FACT2HI,FACT2+4
 189         .set    FACT2LOW,FACT2+8
 190 
 191         | xref  t_unfl
 192         |xref   t_ovfl
 193         |xref   t_frcinx
 194 
 195         .global stwotoxd
 196 stwotoxd:
 197 |--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
 198 
 199         fmovel          %d1,%fpcr               | ...set user's rounding mode/precision
 200         fmoves          #0x3F800000,%fp0  | ...RETURN 1 + X
 201         movel           (%a0),%d0
 202         orl             #0x00800001,%d0
 203         fadds           %d0,%fp0
 204         bra             t_frcinx
 205 
 206         .global stwotox
 207 stwotox:
 208 |--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
 209         fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
 210 
 211         movel           (%a0),%d0
 212         movew           4(%a0),%d0
 213         fmovex          %fp0,X(%a6)
 214         andil           #0x7FFFFFFF,%d0
 215 
 216         cmpil           #0x3FB98000,%d0         | ...|X| >= 2**(-70)?
 217         bges            TWOOK1
 218         bra             EXPBORS
 219 
 220 TWOOK1:
 221         cmpil           #0x400D80C0,%d0         | ...|X| > 16480?
 222         bles            TWOMAIN
 223         bra             EXPBORS
 224 
 225 
 226 TWOMAIN:
 227 |--USUAL CASE, 2^(-70) <= |X| <= 16480
 228 
 229         fmovex          %fp0,%fp1
 230         fmuls           #0x42800000,%fp1  | ...64 * X
 231 
 232         fmovel          %fp1,N(%a6)             | ...N = ROUND-TO-INT(64 X)
 233         movel           %d2,-(%sp)
 234         lea             EXPTBL,%a1      | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
 235         fmovel          N(%a6),%fp1             | ...N --> FLOATING FMT
 236         movel           N(%a6),%d0
 237         movel           %d0,%d2
 238         andil           #0x3F,%d0               | ...D0 IS J
 239         asll            #4,%d0          | ...DISPLACEMENT FOR 2^(J/64)
 240         addal           %d0,%a1         | ...ADDRESS FOR 2^(J/64)
 241         asrl            #6,%d2          | ...d2 IS L, N = 64L + J
 242         movel           %d2,%d0
 243         asrl            #1,%d0          | ...D0 IS M
 244         subl            %d0,%d2         | ...d2 IS M', N = 64(M+M') + J
 245         addil           #0x3FFF,%d2
 246         movew           %d2,ADJFACT(%a6)        | ...ADJFACT IS 2^(M')
 247         movel           (%sp)+,%d2
 248 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
 249 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
 250 |--ADJFACT = 2^(M').
 251 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
 252 
 253         fmuls           #0x3C800000,%fp1  | ...(1/64)*N
 254         movel           (%a1)+,FACT1(%a6)
 255         movel           (%a1)+,FACT1HI(%a6)
 256         movel           (%a1)+,FACT1LOW(%a6)
 257         movew           (%a1)+,FACT2(%a6)
 258         clrw            FACT2+2(%a6)
 259 
 260         fsubx           %fp1,%fp0               | ...X - (1/64)*INT(64 X)
 261 
 262         movew           (%a1)+,FACT2HI(%a6)
 263         clrw            FACT2HI+2(%a6)
 264         clrl            FACT2LOW(%a6)
 265         addw            %d0,FACT1(%a6)
 266 
 267         fmulx           LOG2,%fp0       | ...FP0 IS R
 268         addw            %d0,FACT2(%a6)
 269 
 270         bra             expr
 271 
 272 EXPBORS:
 273 |--FPCR, D0 SAVED
 274         cmpil           #0x3FFF8000,%d0
 275         bgts            EXPBIG
 276 
 277 EXPSM:
 278 |--|X| IS SMALL, RETURN 1 + X
 279 
 280         fmovel          %d1,%FPCR               |restore users exceptions
 281         fadds           #0x3F800000,%fp0  | ...RETURN 1 + X
 282 
 283         bra             t_frcinx
 284 
 285 EXPBIG:
 286 |--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
 287 |--REGISTERS SAVE SO FAR ARE FPCR AND  D0
 288         movel           X(%a6),%d0
 289         cmpil           #0,%d0
 290         blts            EXPNEG
 291 
 292         bclrb           #7,(%a0)                |t_ovfl expects positive value
 293         bra             t_ovfl
 294 
 295 EXPNEG:
 296         bclrb           #7,(%a0)                |t_unfl expects positive value
 297         bra             t_unfl
 298 
 299         .global stentoxd
 300 stentoxd:
 301 |--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
 302 
 303         fmovel          %d1,%fpcr               | ...set user's rounding mode/precision
 304         fmoves          #0x3F800000,%fp0  | ...RETURN 1 + X
 305         movel           (%a0),%d0
 306         orl             #0x00800001,%d0
 307         fadds           %d0,%fp0
 308         bra             t_frcinx
 309 
 310         .global stentox
 311 stentox:
 312 |--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
 313         fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
 314 
 315         movel           (%a0),%d0
 316         movew           4(%a0),%d0
 317         fmovex          %fp0,X(%a6)
 318         andil           #0x7FFFFFFF,%d0
 319 
 320         cmpil           #0x3FB98000,%d0         | ...|X| >= 2**(-70)?
 321         bges            TENOK1
 322         bra             EXPBORS
 323 
 324 TENOK1:
 325         cmpil           #0x400B9B07,%d0         | ...|X| <= 16480*log2/log10 ?
 326         bles            TENMAIN
 327         bra             EXPBORS
 328 
 329 TENMAIN:
 330 |--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
 331 
 332         fmovex          %fp0,%fp1
 333         fmuld           L2TEN64,%fp1    | ...X*64*LOG10/LOG2
 334 
 335         fmovel          %fp1,N(%a6)             | ...N=INT(X*64*LOG10/LOG2)
 336         movel           %d2,-(%sp)
 337         lea             EXPTBL,%a1      | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
 338         fmovel          N(%a6),%fp1             | ...N --> FLOATING FMT
 339         movel           N(%a6),%d0
 340         movel           %d0,%d2
 341         andil           #0x3F,%d0               | ...D0 IS J
 342         asll            #4,%d0          | ...DISPLACEMENT FOR 2^(J/64)
 343         addal           %d0,%a1         | ...ADDRESS FOR 2^(J/64)
 344         asrl            #6,%d2          | ...d2 IS L, N = 64L + J
 345         movel           %d2,%d0
 346         asrl            #1,%d0          | ...D0 IS M
 347         subl            %d0,%d2         | ...d2 IS M', N = 64(M+M') + J
 348         addil           #0x3FFF,%d2
 349         movew           %d2,ADJFACT(%a6)        | ...ADJFACT IS 2^(M')
 350         movel           (%sp)+,%d2
 351 
 352 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
 353 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
 354 |--ADJFACT = 2^(M').
 355 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
 356 
 357         fmovex          %fp1,%fp2
 358 
 359         fmuld           L10TWO1,%fp1    | ...N*(LOG2/64LOG10)_LEAD
 360         movel           (%a1)+,FACT1(%a6)
 361 
 362         fmulx           L10TWO2,%fp2    | ...N*(LOG2/64LOG10)_TRAIL
 363 
 364         movel           (%a1)+,FACT1HI(%a6)
 365         movel           (%a1)+,FACT1LOW(%a6)
 366         fsubx           %fp1,%fp0               | ...X - N L_LEAD
 367         movew           (%a1)+,FACT2(%a6)
 368 
 369         fsubx           %fp2,%fp0               | ...X - N L_TRAIL
 370 
 371         clrw            FACT2+2(%a6)
 372         movew           (%a1)+,FACT2HI(%a6)
 373         clrw            FACT2HI+2(%a6)
 374         clrl            FACT2LOW(%a6)
 375 
 376         fmulx           LOG10,%fp0      | ...FP0 IS R
 377 
 378         addw            %d0,FACT1(%a6)
 379         addw            %d0,FACT2(%a6)
 380 
 381 expr:
 382 |--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
 383 |--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
 384 |--FP0 IS R. THE FOLLOWING CODE COMPUTES
 385 |--     2**(M'+M) * 2**(J/64) * EXP(R)
 386 
 387         fmovex          %fp0,%fp1
 388         fmulx           %fp1,%fp1               | ...FP1 IS S = R*R
 389 
 390         fmoved          EXPA5,%fp2      | ...FP2 IS A5
 391         fmoved          EXPA4,%fp3      | ...FP3 IS A4
 392 
 393         fmulx           %fp1,%fp2               | ...FP2 IS S*A5
 394         fmulx           %fp1,%fp3               | ...FP3 IS S*A4
 395 
 396         faddd           EXPA3,%fp2      | ...FP2 IS A3+S*A5
 397         faddd           EXPA2,%fp3      | ...FP3 IS A2+S*A4
 398 
 399         fmulx           %fp1,%fp2               | ...FP2 IS S*(A3+S*A5)
 400         fmulx           %fp1,%fp3               | ...FP3 IS S*(A2+S*A4)
 401 
 402         faddd           EXPA1,%fp2      | ...FP2 IS A1+S*(A3+S*A5)
 403         fmulx           %fp0,%fp3               | ...FP3 IS R*S*(A2+S*A4)
 404 
 405         fmulx           %fp1,%fp2               | ...FP2 IS S*(A1+S*(A3+S*A5))
 406         faddx           %fp3,%fp0               | ...FP0 IS R+R*S*(A2+S*A4)
 407 
 408         faddx           %fp2,%fp0               | ...FP0 IS EXP(R) - 1
 409 
 410 
 411 |--FINAL RECONSTRUCTION PROCESS
 412 |--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
 413 
 414         fmulx           FACT1(%a6),%fp0
 415         faddx           FACT2(%a6),%fp0
 416         faddx           FACT1(%a6),%fp0
 417 
 418         fmovel          %d1,%FPCR               |restore users exceptions
 419         clrw            ADJFACT+2(%a6)
 420         movel           #0x80000000,ADJFACT+4(%a6)
 421         clrl            ADJFACT+8(%a6)
 422         fmulx           ADJFACT(%a6),%fp0       | ...FINAL ADJUSTMENT
 423 
 424         bra             t_frcinx
 425 
 426         |end

/* [<][>][^][v][top][bottom][index][help] */