root/arch/m68k/fpsp040/slogn.S

/* [<][>][^][v][top][bottom][index][help] */
   1 |
   2 |       slogn.sa 3.1 12/10/90
   3 |
   4 |       slogn computes the natural logarithm of an
   5 |       input value. slognd does the same except the input value is a
   6 |       denormalized number. slognp1 computes log(1+X), and slognp1d
   7 |       computes log(1+X) for denormalized X.
   8 |
   9 |       Input: Double-extended value in memory location pointed to by address
  10 |               register a0.
  11 |
  12 |       Output: log(X) or log(1+X) returned in floating-point register Fp0.
  13 |
  14 |       Accuracy and Monotonicity: The returned result is within 2 ulps in
  15 |               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
  16 |               result is subsequently rounded to double precision. The
  17 |               result is provably monotonic in double precision.
  18 |
  19 |       Speed: The program slogn takes approximately 190 cycles for input
  20 |               argument X such that |X-1| >= 1/16, which is the usual
  21 |               situation. For those arguments, slognp1 takes approximately
  22 |                210 cycles. For the less common arguments, the program will
  23 |                run no worse than 10% slower.
  24 |
  25 |       Algorithm:
  26 |       LOGN:
  27 |       Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
  28 |               u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
  29 |
  30 |       Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
  31 |               significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
  32 |               2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
  33 |
  34 |       Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
  35 |               log(1+u) = poly.
  36 |
  37 |       Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
  38 |               by k*log(2) + (log(F) + poly). The values of log(F) are calculated
  39 |               beforehand and stored in the program.
  40 |
  41 |       lognp1:
  42 |       Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
  43 |               u where u = 2X/(2+X). Otherwise, move on to Step 2.
  44 |
  45 |       Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
  46 |               of the algorithm for LOGN and compute log(1+X) as
  47 |               k*log(2) + log(F) + poly where poly approximates log(1+u),
  48 |               u = (Y-F)/F.
  49 |
  50 |       Implementation Notes:
  51 |       Note 1. There are 64 different possible values for F, thus 64 log(F)'s
  52 |               need to be tabulated. Moreover, the values of 1/F are also
  53 |               tabulated so that the division in (Y-F)/F can be performed by a
  54 |               multiplication.
  55 |
  56 |       Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
  57 |               Y-F has to be calculated carefully when 1/2 <= X < 3/2.
  58 |
  59 |       Note 3. To fully exploit the pipeline, polynomials are usually separated
  60 |               into two parts evaluated independently before being added up.
  61 |
  62 
  63 |               Copyright (C) Motorola, Inc. 1990
  64 |                       All Rights Reserved
  65 |
  66 |       For details on the license for this file, please see the
  67 |       file, README, in this same directory.
  68 
  69 |slogn  idnt    2,1 | Motorola 040 Floating Point Software Package
  70 
  71         |section        8
  72 
  73 #include "fpsp.h"
  74 
  75 BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
  76 BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
  77 
  78 LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
  79 
  80 one:    .long 0x3F800000
  81 zero:   .long 0x00000000
  82 infty:  .long 0x7F800000
  83 negone: .long 0xBF800000
  84 
  85 LOGA6:  .long 0x3FC2499A,0xB5E4040B
  86 LOGA5:  .long 0xBFC555B5,0x848CB7DB
  87 
  88 LOGA4:  .long 0x3FC99999,0x987D8730
  89 LOGA3:  .long 0xBFCFFFFF,0xFF6F7E97
  90 
  91 LOGA2:  .long 0x3FD55555,0x555555a4
  92 LOGA1:  .long 0xBFE00000,0x00000008
  93 
  94 LOGB5:  .long 0x3F175496,0xADD7DAD6
  95 LOGB4:  .long 0x3F3C71C2,0xFE80C7E0
  96 
  97 LOGB3:  .long 0x3F624924,0x928BCCFF
  98 LOGB2:  .long 0x3F899999,0x999995EC
  99 
 100 LOGB1:  .long 0x3FB55555,0x55555555
 101 TWO:    .long 0x40000000,0x00000000
 102 
 103 LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
 104 
 105 LOGTBL:
 106         .long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
 107         .long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
 108         .long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
 109         .long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
 110         .long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
 111         .long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
 112         .long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
 113         .long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
 114         .long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
 115         .long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
 116         .long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
 117         .long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
 118         .long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
 119         .long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
 120         .long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
 121         .long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
 122         .long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
 123         .long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
 124         .long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
 125         .long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
 126         .long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
 127         .long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
 128         .long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
 129         .long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
 130         .long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
 131         .long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
 132         .long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
 133         .long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
 134         .long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
 135         .long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
 136         .long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
 137         .long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
 138         .long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
 139         .long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
 140         .long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
 141         .long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
 142         .long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
 143         .long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
 144         .long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
 145         .long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
 146         .long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
 147         .long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
 148         .long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
 149         .long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
 150         .long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
 151         .long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
 152         .long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
 153         .long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
 154         .long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
 155         .long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
 156         .long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
 157         .long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
 158         .long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
 159         .long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
 160         .long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
 161         .long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
 162         .long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
 163         .long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
 164         .long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
 165         .long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
 166         .long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
 167         .long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
 168         .long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
 169         .long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
 170         .long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
 171         .long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
 172         .long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
 173         .long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
 174         .long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
 175         .long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
 176         .long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
 177         .long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
 178         .long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
 179         .long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
 180         .long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
 181         .long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
 182         .long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
 183         .long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
 184         .long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
 185         .long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
 186         .long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
 187         .long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
 188         .long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
 189         .long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
 190         .long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
 191         .long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
 192         .long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
 193         .long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
 194         .long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
 195         .long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
 196         .long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
 197         .long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
 198         .long  0x3FFE0000,0x94458094,0x45809446,0x00000000
 199         .long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
 200         .long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
 201         .long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
 202         .long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
 203         .long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
 204         .long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
 205         .long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
 206         .long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
 207         .long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
 208         .long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
 209         .long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
 210         .long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
 211         .long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
 212         .long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
 213         .long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
 214         .long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
 215         .long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
 216         .long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
 217         .long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
 218         .long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
 219         .long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
 220         .long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
 221         .long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
 222         .long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
 223         .long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
 224         .long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
 225         .long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
 226         .long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
 227         .long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
 228         .long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
 229         .long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
 230         .long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
 231         .long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
 232         .long  0x3FFE0000,0x80808080,0x80808081,0x00000000
 233         .long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
 234 
 235         .set    ADJK,L_SCR1
 236 
 237         .set    X,FP_SCR1
 238         .set    XDCARE,X+2
 239         .set    XFRAC,X+4
 240 
 241         .set    F,FP_SCR2
 242         .set    FFRAC,F+4
 243 
 244         .set    KLOG2,FP_SCR3
 245 
 246         .set    SAVEU,FP_SCR4
 247 
 248         | xref  t_frcinx
 249         |xref   t_extdnrm
 250         |xref   t_operr
 251         |xref   t_dz
 252 
 253         .global slognd
 254 slognd:
 255 |--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
 256 
 257         movel           #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0
 258 
 259 |----normalize the input value by left shifting k bits (k to be determined
 260 |----below), adjusting exponent and storing -k to  ADJK
 261 |----the value TWOTO100 is no longer needed.
 262 |----Note that this code assumes the denormalized input is NON-ZERO.
 263 
 264      moveml     %d2-%d7,-(%a7)          | ...save some registers
 265      movel      #0x00000000,%d3         | ...D3 is exponent of smallest norm. #
 266      movel      4(%a0),%d4
 267      movel      8(%a0),%d5              | ...(D4,D5) is (Hi_X,Lo_X)
 268      clrl       %d2                     | ...D2 used for holding K
 269 
 270      tstl       %d4
 271      bnes       HiX_not0
 272 
 273 HiX_0:
 274      movel      %d5,%d4
 275      clrl       %d5
 276      movel      #32,%d2
 277      clrl       %d6
 278      bfffo      %d4{#0:#32},%d6
 279      lsll      %d6,%d4
 280      addl       %d6,%d2                 | ...(D3,D4,D5) is normalized
 281 
 282      movel      %d3,X(%a6)
 283      movel      %d4,XFRAC(%a6)
 284      movel      %d5,XFRAC+4(%a6)
 285      negl       %d2
 286      movel      %d2,ADJK(%a6)
 287      fmovex     X(%a6),%fp0
 288      moveml     (%a7)+,%d2-%d7          | ...restore registers
 289      lea        X(%a6),%a0
 290      bras       LOGBGN                  | ...begin regular log(X)
 291 
 292 
 293 HiX_not0:
 294      clrl       %d6
 295      bfffo      %d4{#0:#32},%d6         | ...find first 1
 296      movel      %d6,%d2                 | ...get k
 297      lsll       %d6,%d4
 298      movel      %d5,%d7                 | ...a copy of D5
 299      lsll       %d6,%d5
 300      negl       %d6
 301      addil      #32,%d6
 302      lsrl       %d6,%d7
 303      orl        %d7,%d4                 | ...(D3,D4,D5) normalized
 304 
 305      movel      %d3,X(%a6)
 306      movel      %d4,XFRAC(%a6)
 307      movel      %d5,XFRAC+4(%a6)
 308      negl       %d2
 309      movel      %d2,ADJK(%a6)
 310      fmovex     X(%a6),%fp0
 311      moveml     (%a7)+,%d2-%d7          | ...restore registers
 312      lea        X(%a6),%a0
 313      bras       LOGBGN                  | ...begin regular log(X)
 314 
 315 
 316         .global slogn
 317 slogn:
 318 |--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
 319 
 320         fmovex          (%a0),%fp0      | ...LOAD INPUT
 321         movel           #0x00000000,ADJK(%a6)
 322 
 323 LOGBGN:
 324 |--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
 325 |--A FINITE, NON-ZERO, NORMALIZED NUMBER.
 326 
 327         movel   (%a0),%d0
 328         movew   4(%a0),%d0
 329 
 330         movel   (%a0),X(%a6)
 331         movel   4(%a0),X+4(%a6)
 332         movel   8(%a0),X+8(%a6)
 333 
 334         cmpil   #0,%d0          | ...CHECK IF X IS NEGATIVE
 335         blt     LOGNEG          | ...LOG OF NEGATIVE ARGUMENT IS INVALID
 336         cmp2l   BOUNDS1,%d0     | ...X IS POSITIVE, CHECK IF X IS NEAR 1
 337         bcc     LOGNEAR1        | ...BOUNDS IS ROUGHLY [15/16, 17/16]
 338 
 339 LOGMAIN:
 340 |--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
 341 
 342 |--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
 343 |--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
 344 |--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
 345 |--                      = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
 346 |--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
 347 |--LOG(1+U) CAN BE VERY EFFICIENT.
 348 |--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
 349 |--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
 350 
 351 |--GET K, Y, F, AND ADDRESS OF 1/F.
 352         asrl    #8,%d0
 353         asrl    #8,%d0          | ...SHIFTED 16 BITS, BIASED EXPO. OF X
 354         subil   #0x3FFF,%d0     | ...THIS IS K
 355         addl    ADJK(%a6),%d0   | ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
 356         lea     LOGTBL,%a0      | ...BASE ADDRESS OF 1/F AND LOG(F)
 357         fmovel  %d0,%fp1                | ...CONVERT K TO FLOATING-POINT FORMAT
 358 
 359 |--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
 360         movel   #0x3FFF0000,X(%a6)      | ...X IS NOW Y, I.E. 2^(-K)*X
 361         movel   XFRAC(%a6),FFRAC(%a6)
 362         andil   #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
 363         oril    #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
 364         movel   FFRAC(%a6),%d0  | ...READY TO GET ADDRESS OF 1/F
 365         andil   #0x7E000000,%d0
 366         asrl    #8,%d0
 367         asrl    #8,%d0
 368         asrl    #4,%d0          | ...SHIFTED 20, D0 IS THE DISPLACEMENT
 369         addal   %d0,%a0         | ...A0 IS THE ADDRESS FOR 1/F
 370 
 371         fmovex  X(%a6),%fp0
 372         movel   #0x3fff0000,F(%a6)
 373         clrl    F+8(%a6)
 374         fsubx   F(%a6),%fp0             | ...Y-F
 375         fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2 WHILE FP0 IS NOT READY
 376 |--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
 377 |--REGISTERS SAVED: FPCR, FP1, FP2
 378 
 379 LP1CONT1:
 380 |--AN RE-ENTRY POINT FOR LOGNP1
 381         fmulx   (%a0),%fp0      | ...FP0 IS U = (Y-F)/F
 382         fmulx   LOGOF2,%fp1     | ...GET K*LOG2 WHILE FP0 IS NOT READY
 383         fmovex  %fp0,%fp2
 384         fmulx   %fp2,%fp2               | ...FP2 IS V=U*U
 385         fmovex  %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1
 386 
 387 |--LOG(1+U) IS APPROXIMATED BY
 388 |--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
 389 |--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
 390 
 391         fmovex  %fp2,%fp3
 392         fmovex  %fp2,%fp1
 393 
 394         fmuld   LOGA6,%fp1      | ...V*A6
 395         fmuld   LOGA5,%fp2      | ...V*A5
 396 
 397         faddd   LOGA4,%fp1      | ...A4+V*A6
 398         faddd   LOGA3,%fp2      | ...A3+V*A5
 399 
 400         fmulx   %fp3,%fp1               | ...V*(A4+V*A6)
 401         fmulx   %fp3,%fp2               | ...V*(A3+V*A5)
 402 
 403         faddd   LOGA2,%fp1      | ...A2+V*(A4+V*A6)
 404         faddd   LOGA1,%fp2      | ...A1+V*(A3+V*A5)
 405 
 406         fmulx   %fp3,%fp1               | ...V*(A2+V*(A4+V*A6))
 407         addal   #16,%a0         | ...ADDRESS OF LOG(F)
 408         fmulx   %fp3,%fp2               | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
 409 
 410         fmulx   %fp0,%fp1               | ...U*V*(A2+V*(A4+V*A6))
 411         faddx   %fp2,%fp0               | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
 412 
 413         faddx   (%a0),%fp1      | ...LOG(F)+U*V*(A2+V*(A4+V*A6))
 414         fmovemx  (%sp)+,%fp2-%fp2/%fp3  | ...RESTORE FP2
 415         faddx   %fp1,%fp0               | ...FP0 IS LOG(F) + LOG(1+U)
 416 
 417         fmovel  %d1,%fpcr
 418         faddx   KLOG2(%a6),%fp0 | ...FINAL ADD
 419         bra     t_frcinx
 420 
 421 
 422 LOGNEAR1:
 423 |--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
 424         fmovex  %fp0,%fp1
 425         fsubs   one,%fp1                | ...FP1 IS X-1
 426         fadds   one,%fp0                | ...FP0 IS X+1
 427         faddx   %fp1,%fp1               | ...FP1 IS 2(X-1)
 428 |--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
 429 |--IN U, U = 2(X-1)/(X+1) = FP1/FP0
 430 
 431 LP1CONT2:
 432 |--THIS IS AN RE-ENTRY POINT FOR LOGNP1
 433         fdivx   %fp0,%fp1               | ...FP1 IS U
 434         fmovemx %fp2-%fp2/%fp3,-(%sp)    | ...SAVE FP2
 435 |--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
 436 |--LET V=U*U, W=V*V, CALCULATE
 437 |--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
 438 |--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
 439         fmovex  %fp1,%fp0
 440         fmulx   %fp0,%fp0       | ...FP0 IS V
 441         fmovex  %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
 442         fmovex  %fp0,%fp1
 443         fmulx   %fp1,%fp1       | ...FP1 IS W
 444 
 445         fmoved  LOGB5,%fp3
 446         fmoved  LOGB4,%fp2
 447 
 448         fmulx   %fp1,%fp3       | ...W*B5
 449         fmulx   %fp1,%fp2       | ...W*B4
 450 
 451         faddd   LOGB3,%fp3 | ...B3+W*B5
 452         faddd   LOGB2,%fp2 | ...B2+W*B4
 453 
 454         fmulx   %fp3,%fp1       | ...W*(B3+W*B5), FP3 RELEASED
 455 
 456         fmulx   %fp0,%fp2       | ...V*(B2+W*B4)
 457 
 458         faddd   LOGB1,%fp1 | ...B1+W*(B3+W*B5)
 459         fmulx   SAVEU(%a6),%fp0 | ...FP0 IS U*V
 460 
 461         faddx   %fp2,%fp1       | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
 462         fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
 463 
 464         fmulx   %fp1,%fp0       | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
 465 
 466         fmovel  %d1,%fpcr
 467         faddx   SAVEU(%a6),%fp0
 468         bra     t_frcinx
 469         rts
 470 
 471 LOGNEG:
 472 |--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
 473         bra     t_operr
 474 
 475         .global slognp1d
 476 slognp1d:
 477 |--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
 478 | Simply return the denorm
 479 
 480         bra     t_extdnrm
 481 
 482         .global slognp1
 483 slognp1:
 484 |--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
 485 
 486         fmovex  (%a0),%fp0      | ...LOAD INPUT
 487         fabsx   %fp0            |test magnitude
 488         fcmpx   LTHOLD,%fp0     |compare with min threshold
 489         fbgt    LP1REAL         |if greater, continue
 490         fmovel  #0,%fpsr                |clr N flag from compare
 491         fmovel  %d1,%fpcr
 492         fmovex  (%a0),%fp0      |return signed argument
 493         bra     t_frcinx
 494 
 495 LP1REAL:
 496         fmovex  (%a0),%fp0      | ...LOAD INPUT
 497         movel   #0x00000000,ADJK(%a6)
 498         fmovex  %fp0,%fp1       | ...FP1 IS INPUT Z
 499         fadds   one,%fp0        | ...X := ROUND(1+Z)
 500         fmovex  %fp0,X(%a6)
 501         movew   XFRAC(%a6),XDCARE(%a6)
 502         movel   X(%a6),%d0
 503         cmpil   #0,%d0
 504         ble     LP1NEG0 | ...LOG OF ZERO OR -VE
 505         cmp2l   BOUNDS2,%d0
 506         bcs     LOGMAIN | ...BOUNDS2 IS [1/2,3/2]
 507 |--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
 508 |--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
 509 |--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
 510 
 511 LP1NEAR1:
 512 |--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
 513         cmp2l   BOUNDS1,%d0
 514         bcss    LP1CARE
 515 
 516 LP1ONE16:
 517 |--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
 518 |--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
 519         faddx   %fp1,%fp1       | ...FP1 IS 2Z
 520         fadds   one,%fp0        | ...FP0 IS 1+X
 521 |--U = FP1/FP0
 522         bra     LP1CONT2
 523 
 524 LP1CARE:
 525 |--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
 526 |--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
 527 |--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
 528 |--THERE ARE ONLY TWO CASES.
 529 |--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
 530 |--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
 531 |--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
 532 |--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
 533 
 534         movel   XFRAC(%a6),FFRAC(%a6)
 535         andil   #0xFE000000,FFRAC(%a6)
 536         oril    #0x01000000,FFRAC(%a6)  | ...F OBTAINED
 537         cmpil   #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1
 538         bges    KISZERO
 539 
 540 KISNEG1:
 541         fmoves  TWO,%fp0
 542         movel   #0x3fff0000,F(%a6)
 543         clrl    F+8(%a6)
 544         fsubx   F(%a6),%fp0     | ...2-F
 545         movel   FFRAC(%a6),%d0
 546         andil   #0x7E000000,%d0
 547         asrl    #8,%d0
 548         asrl    #8,%d0
 549         asrl    #4,%d0          | ...D0 CONTAINS DISPLACEMENT FOR 1/F
 550         faddx   %fp1,%fp1               | ...GET 2Z
 551         fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2
 552         faddx   %fp1,%fp0               | ...FP0 IS Y-F = (2-F)+2Z
 553         lea     LOGTBL,%a0      | ...A0 IS ADDRESS OF 1/F
 554         addal   %d0,%a0
 555         fmoves  negone,%fp1     | ...FP1 IS K = -1
 556         bra     LP1CONT1
 557 
 558 KISZERO:
 559         fmoves  one,%fp0
 560         movel   #0x3fff0000,F(%a6)
 561         clrl    F+8(%a6)
 562         fsubx   F(%a6),%fp0             | ...1-F
 563         movel   FFRAC(%a6),%d0
 564         andil   #0x7E000000,%d0
 565         asrl    #8,%d0
 566         asrl    #8,%d0
 567         asrl    #4,%d0
 568         faddx   %fp1,%fp0               | ...FP0 IS Y-F
 569         fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...FP2 SAVED
 570         lea     LOGTBL,%a0
 571         addal   %d0,%a0         | ...A0 IS ADDRESS OF 1/F
 572         fmoves  zero,%fp1       | ...FP1 IS K = 0
 573         bra     LP1CONT1
 574 
 575 LP1NEG0:
 576 |--FPCR SAVED. D0 IS X IN COMPACT FORM.
 577         cmpil   #0,%d0
 578         blts    LP1NEG
 579 LP1ZERO:
 580         fmoves  negone,%fp0
 581 
 582         fmovel  %d1,%fpcr
 583         bra t_dz
 584 
 585 LP1NEG:
 586         fmoves  zero,%fp0
 587 
 588         fmovel  %d1,%fpcr
 589         bra     t_operr
 590 
 591         |end

/* [<][>][^][v][top][bottom][index][help] */