1 .file "wm_sqrt.S" 2/*---------------------------------------------------------------------------+ 3 | wm_sqrt.S | 4 | | 5 | Fixed point arithmetic square root evaluation. | 6 | | 7 | Copyright (C) 1992,1993,1995,1997 | 8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 9 | Australia. E-mail billm@suburbia.net | 10 | | 11 | Call from C as: | 12 | int wm_sqrt(FPU_REG *n, unsigned int control_word) | 13 | | 14 +---------------------------------------------------------------------------*/ 15 16/*---------------------------------------------------------------------------+ 17 | wm_sqrt(FPU_REG *n, unsigned int control_word) | 18 | returns the square root of n in n. | 19 | | 20 | Use Newton's method to compute the square root of a number, which must | 21 | be in the range [1.0 .. 4.0), to 64 bits accuracy. | 22 | Does not check the sign or tag of the argument. | 23 | Sets the exponent, but not the sign or tag of the result. | 24 | | 25 | The guess is kept in %esi:%edi | 26 +---------------------------------------------------------------------------*/ 27 28#include "exception.h" 29#include "fpu_emu.h" 30 31 32#ifndef NON_REENTRANT_FPU 33/* Local storage on the stack: */ 34#define FPU_accum_3 -4(%ebp) /* ms word */ 35#define FPU_accum_2 -8(%ebp) 36#define FPU_accum_1 -12(%ebp) 37#define FPU_accum_0 -16(%ebp) 38 39/* 40 * The de-normalised argument: 41 * sq_2 sq_1 sq_0 42 * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 43 * ^ binary point here 44 */ 45#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ 46#define FPU_fsqrt_arg_1 -24(%ebp) 47#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ 48 49#else 50/* Local storage in a static area: */ 51.data 52 .align 4,0 53FPU_accum_3: 54 .long 0 /* ms word */ 55FPU_accum_2: 56 .long 0 57FPU_accum_1: 58 .long 0 59FPU_accum_0: 60 .long 0 61 62/* The de-normalised argument: 63 sq_2 sq_1 sq_0 64 b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 65 ^ binary point here 66 */ 67FPU_fsqrt_arg_2: 68 .long 0 /* ms word */ 69FPU_fsqrt_arg_1: 70 .long 0 71FPU_fsqrt_arg_0: 72 .long 0 /* ls word, at most the ms bit is set */ 73#endif /* NON_REENTRANT_FPU */ 74 75 76.text 77ENTRY(wm_sqrt) 78 pushl %ebp 79 movl %esp,%ebp 80#ifndef NON_REENTRANT_FPU 81 subl $28,%esp 82#endif /* NON_REENTRANT_FPU */ 83 pushl %esi 84 pushl %edi 85 pushl %ebx 86 87 movl PARAM1,%esi 88 89 movl SIGH(%esi),%eax 90 movl SIGL(%esi),%ecx 91 xorl %edx,%edx 92 93/* We use a rough linear estimate for the first guess.. */ 94 95 cmpw EXP_BIAS,EXP(%esi) 96 jnz sqrt_arg_ge_2 97 98 shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ 99 rcrl $1,%ecx 100 rcrl $1,%edx 101 102sqrt_arg_ge_2: 103/* From here on, n is never accessed directly again until it is 104 replaced by the answer. */ 105 106 movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ 107 movl %ecx,FPU_fsqrt_arg_1 108 movl %edx,FPU_fsqrt_arg_0 109 110/* Make a linear first estimate */ 111 shrl $1,%eax 112 addl $0x40000000,%eax 113 movl $0xaaaaaaaa,%ecx 114 mull %ecx 115 shll %edx /* max result was 7fff... */ 116 testl $0x80000000,%edx /* but min was 3fff... */ 117 jnz sqrt_prelim_no_adjust 118 119 movl $0x80000000,%edx /* round up */ 120 121sqrt_prelim_no_adjust: 122 movl %edx,%esi /* Our first guess */ 123 124/* We have now computed (approx) (2 + x) / 3, which forms the basis 125 for a few iterations of Newton's method */ 126 127 movl FPU_fsqrt_arg_2,%ecx /* ms word */ 128 129/* 130 * From our initial estimate, three iterations are enough to get us 131 * to 30 bits or so. This will then allow two iterations at better 132 * precision to complete the process. 133 */ 134 135/* Compute (g + n/g)/2 at each iteration (g is the guess). */ 136 shrl %ecx /* Doing this first will prevent a divide */ 137 /* overflow later. */ 138 139 movl %ecx,%edx /* msw of the arg / 2 */ 140 divl %esi /* current estimate */ 141 shrl %esi /* divide by 2 */ 142 addl %eax,%esi /* the new estimate */ 143 144 movl %ecx,%edx 145 divl %esi 146 shrl %esi 147 addl %eax,%esi 148 149 movl %ecx,%edx 150 divl %esi 151 shrl %esi 152 addl %eax,%esi 153 154/* 155 * Now that an estimate accurate to about 30 bits has been obtained (in %esi), 156 * we improve it to 60 bits or so. 157 * 158 * The strategy from now on is to compute new estimates from 159 * guess := guess + (n - guess^2) / (2 * guess) 160 */ 161 162/* First, find the square of the guess */ 163 movl %esi,%eax 164 mull %esi 165/* guess^2 now in %edx:%eax */ 166 167 movl FPU_fsqrt_arg_1,%ecx 168 subl %ecx,%eax 169 movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ 170 sbbl %ecx,%edx 171 jnc sqrt_stage_2_positive 172 173/* Subtraction gives a negative result, 174 negate the result before division. */ 175 notl %edx 176 notl %eax 177 addl $1,%eax 178 adcl $0,%edx 179 180 divl %esi 181 movl %eax,%ecx 182 183 movl %edx,%eax 184 divl %esi 185 jmp sqrt_stage_2_finish 186 187sqrt_stage_2_positive: 188 divl %esi 189 movl %eax,%ecx 190 191 movl %edx,%eax 192 divl %esi 193 194 notl %ecx 195 notl %eax 196 addl $1,%eax 197 adcl $0,%ecx 198 199sqrt_stage_2_finish: 200 sarl $1,%ecx /* divide by 2 */ 201 rcrl $1,%eax 202 203 /* Form the new estimate in %esi:%edi */ 204 movl %eax,%edi 205 addl %ecx,%esi 206 207 jnz sqrt_stage_2_done /* result should be [1..2) */ 208 209#ifdef PARANOID 210/* It should be possible to get here only if the arg is ffff....ffff */ 211 cmp $0xffffffff,FPU_fsqrt_arg_1 212 jnz sqrt_stage_2_error 213#endif /* PARANOID */ 214 215/* The best rounded result. */ 216 xorl %eax,%eax 217 decl %eax 218 movl %eax,%edi 219 movl %eax,%esi 220 movl $0x7fffffff,%eax 221 jmp sqrt_round_result 222 223#ifdef PARANOID 224sqrt_stage_2_error: 225 pushl EX_INTERNAL|0x213 226 call EXCEPTION 227#endif /* PARANOID */ 228 229sqrt_stage_2_done: 230 231/* Now the square root has been computed to better than 60 bits. */ 232 233/* Find the square of the guess. */ 234 movl %edi,%eax /* ls word of guess */ 235 mull %edi 236 movl %edx,FPU_accum_1 237 238 movl %esi,%eax 239 mull %esi 240 movl %edx,FPU_accum_3 241 movl %eax,FPU_accum_2 242 243 movl %edi,%eax 244 mull %esi 245 addl %eax,FPU_accum_1 246 adcl %edx,FPU_accum_2 247 adcl $0,FPU_accum_3 248 249/* movl %esi,%eax */ 250/* mull %edi */ 251 addl %eax,FPU_accum_1 252 adcl %edx,FPU_accum_2 253 adcl $0,FPU_accum_3 254 255/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ 256 257 movl FPU_fsqrt_arg_0,%eax /* get normalized n */ 258 subl %eax,FPU_accum_1 259 movl FPU_fsqrt_arg_1,%eax 260 sbbl %eax,FPU_accum_2 261 movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ 262 sbbl %eax,FPU_accum_3 263 jnc sqrt_stage_3_positive 264 265/* Subtraction gives a negative result, 266 negate the result before division */ 267 notl FPU_accum_1 268 notl FPU_accum_2 269 notl FPU_accum_3 270 addl $1,FPU_accum_1 271 adcl $0,FPU_accum_2 272 273#ifdef PARANOID 274 adcl $0,FPU_accum_3 /* This must be zero */ 275 jz sqrt_stage_3_no_error 276 277sqrt_stage_3_error: 278 pushl EX_INTERNAL|0x207 279 call EXCEPTION 280 281sqrt_stage_3_no_error: 282#endif /* PARANOID */ 283 284 movl FPU_accum_2,%edx 285 movl FPU_accum_1,%eax 286 divl %esi 287 movl %eax,%ecx 288 289 movl %edx,%eax 290 divl %esi 291 292 sarl $1,%ecx /* divide by 2 */ 293 rcrl $1,%eax 294 295 /* prepare to round the result */ 296 297 addl %ecx,%edi 298 adcl $0,%esi 299 300 jmp sqrt_stage_3_finished 301 302sqrt_stage_3_positive: 303 movl FPU_accum_2,%edx 304 movl FPU_accum_1,%eax 305 divl %esi 306 movl %eax,%ecx 307 308 movl %edx,%eax 309 divl %esi 310 311 sarl $1,%ecx /* divide by 2 */ 312 rcrl $1,%eax 313 314 /* prepare to round the result */ 315 316 notl %eax /* Negate the correction term */ 317 notl %ecx 318 addl $1,%eax 319 adcl $0,%ecx /* carry here ==> correction == 0 */ 320 adcl $0xffffffff,%esi 321 322 addl %ecx,%edi 323 adcl $0,%esi 324 325sqrt_stage_3_finished: 326 327/* 328 * The result in %esi:%edi:%esi should be good to about 90 bits here, 329 * and the rounding information here does not have sufficient accuracy 330 * in a few rare cases. 331 */ 332 cmpl $0xffffffe0,%eax 333 ja sqrt_near_exact_x 334 335 cmpl $0x00000020,%eax 336 jb sqrt_near_exact 337 338 cmpl $0x7fffffe0,%eax 339 jb sqrt_round_result 340 341 cmpl $0x80000020,%eax 342 jb sqrt_get_more_precision 343 344sqrt_round_result: 345/* Set up for rounding operations */ 346 movl %eax,%edx 347 movl %esi,%eax 348 movl %edi,%ebx 349 movl PARAM1,%edi 350 movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ 351 jmp fpu_reg_round 352 353 354sqrt_near_exact_x: 355/* First, the estimate must be rounded up. */ 356 addl $1,%edi 357 adcl $0,%esi 358 359sqrt_near_exact: 360/* 361 * This is an easy case because x^1/2 is monotonic. 362 * We need just find the square of our estimate, compare it 363 * with the argument, and deduce whether our estimate is 364 * above, below, or exact. We use the fact that the estimate 365 * is known to be accurate to about 90 bits. 366 */ 367 movl %edi,%eax /* ls word of guess */ 368 mull %edi 369 movl %edx,%ebx /* 2nd ls word of square */ 370 movl %eax,%ecx /* ls word of square */ 371 372 movl %edi,%eax 373 mull %esi 374 addl %eax,%ebx 375 addl %eax,%ebx 376 377#ifdef PARANOID 378 cmp $0xffffffb0,%ebx 379 jb sqrt_near_exact_ok 380 381 cmp $0x00000050,%ebx 382 ja sqrt_near_exact_ok 383 384 pushl EX_INTERNAL|0x214 385 call EXCEPTION 386 387sqrt_near_exact_ok: 388#endif /* PARANOID */ 389 390 or %ebx,%ebx 391 js sqrt_near_exact_small 392 393 jnz sqrt_near_exact_large 394 395 or %ebx,%edx 396 jnz sqrt_near_exact_large 397 398/* Our estimate is exactly the right answer */ 399 xorl %eax,%eax 400 jmp sqrt_round_result 401 402sqrt_near_exact_small: 403/* Our estimate is too small */ 404 movl $0x000000ff,%eax 405 jmp sqrt_round_result 406 407sqrt_near_exact_large: 408/* Our estimate is too large, we need to decrement it */ 409 subl $1,%edi 410 sbbl $0,%esi 411 movl $0xffffff00,%eax 412 jmp sqrt_round_result 413 414 415sqrt_get_more_precision: 416/* This case is almost the same as the above, except we start 417 with an extra bit of precision in the estimate. */ 418 stc /* The extra bit. */ 419 rcll $1,%edi /* Shift the estimate left one bit */ 420 rcll $1,%esi 421 422 movl %edi,%eax /* ls word of guess */ 423 mull %edi 424 movl %edx,%ebx /* 2nd ls word of square */ 425 movl %eax,%ecx /* ls word of square */ 426 427 movl %edi,%eax 428 mull %esi 429 addl %eax,%ebx 430 addl %eax,%ebx 431 432/* Put our estimate back to its original value */ 433 stc /* The ms bit. */ 434 rcrl $1,%esi /* Shift the estimate left one bit */ 435 rcrl $1,%edi 436 437#ifdef PARANOID 438 cmp $0xffffff60,%ebx 439 jb sqrt_more_prec_ok 440 441 cmp $0x000000a0,%ebx 442 ja sqrt_more_prec_ok 443 444 pushl EX_INTERNAL|0x215 445 call EXCEPTION 446 447sqrt_more_prec_ok: 448#endif /* PARANOID */ 449 450 or %ebx,%ebx 451 js sqrt_more_prec_small 452 453 jnz sqrt_more_prec_large 454 455 or %ebx,%ecx 456 jnz sqrt_more_prec_large 457 458/* Our estimate is exactly the right answer */ 459 movl $0x80000000,%eax 460 jmp sqrt_round_result 461 462sqrt_more_prec_small: 463/* Our estimate is too small */ 464 movl $0x800000ff,%eax 465 jmp sqrt_round_result 466 467sqrt_more_prec_large: 468/* Our estimate is too large */ 469 movl $0x7fffff00,%eax 470 jmp sqrt_round_result 471