1	.file	"wm_sqrt.S"
2/*---------------------------------------------------------------------------+
3 |  wm_sqrt.S                                                                |
4 |                                                                           |
5 | Fixed point arithmetic square root evaluation.                            |
6 |                                                                           |
7 | Copyright (C) 1992,1993,1995,1997                                         |
8 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
9 |                       Australia.  E-mail billm@suburbia.net               |
10 |                                                                           |
11 | Call from C as:                                                           |
12 |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
13 |                                                                           |
14 +---------------------------------------------------------------------------*/
15
16/*---------------------------------------------------------------------------+
17 |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
18 |    returns the square root of n in n.                                     |
19 |                                                                           |
20 |  Use Newton's method to compute the square root of a number, which must   |
21 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
22 |  Does not check the sign or tag of the argument.                          |
23 |  Sets the exponent, but not the sign or tag of the result.                |
24 |                                                                           |
25 |  The guess is kept in %esi:%edi                                           |
26 +---------------------------------------------------------------------------*/
27
28#include "exception.h"
29#include "fpu_emu.h"
30
31
32#ifndef NON_REENTRANT_FPU
33/*	Local storage on the stack: */
34#define FPU_accum_3	-4(%ebp)	/* ms word */
35#define FPU_accum_2	-8(%ebp)
36#define FPU_accum_1	-12(%ebp)
37#define FPU_accum_0	-16(%ebp)
38
39/*
40 * The de-normalised argument:
41 *                  sq_2                  sq_1              sq_0
42 *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
43 *           ^ binary point here
44 */
45#define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */
46#define FPU_fsqrt_arg_1	-24(%ebp)
47#define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */
48
49#else
50/*	Local storage in a static area: */
51.data
52	.align 4,0
53FPU_accum_3:
54	.long	0		/* ms word */
55FPU_accum_2:
56	.long	0
57FPU_accum_1:
58	.long	0
59FPU_accum_0:
60	.long	0
61
62/* The de-normalised argument:
63                    sq_2                  sq_1              sq_0
64          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
65             ^ binary point here
66 */
67FPU_fsqrt_arg_2:
68	.long	0		/* ms word */
69FPU_fsqrt_arg_1:
70	.long	0
71FPU_fsqrt_arg_0:
72	.long	0		/* ls word, at most the ms bit is set */
73#endif /* NON_REENTRANT_FPU */
74
75
76.text
77ENTRY(wm_sqrt)
78	pushl	%ebp
79	movl	%esp,%ebp
80#ifndef NON_REENTRANT_FPU
81	subl	$28,%esp
82#endif /* NON_REENTRANT_FPU */
83	pushl	%esi
84	pushl	%edi
85	pushl	%ebx
86
87	movl	PARAM1,%esi
88
89	movl	SIGH(%esi),%eax
90	movl	SIGL(%esi),%ecx
91	xorl	%edx,%edx
92
93/* We use a rough linear estimate for the first guess.. */
94
95	cmpw	EXP_BIAS,EXP(%esi)
96	jnz	sqrt_arg_ge_2
97
98	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
99	rcrl	$1,%ecx
100	rcrl	$1,%edx
101
102sqrt_arg_ge_2:
103/* From here on, n is never accessed directly again until it is
104   replaced by the answer. */
105
106	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */
107	movl	%ecx,FPU_fsqrt_arg_1
108	movl	%edx,FPU_fsqrt_arg_0
109
110/* Make a linear first estimate */
111	shrl	$1,%eax
112	addl	$0x40000000,%eax
113	movl	$0xaaaaaaaa,%ecx
114	mull	%ecx
115	shll	%edx			/* max result was 7fff... */
116	testl	$0x80000000,%edx	/* but min was 3fff... */
117	jnz	sqrt_prelim_no_adjust
118
119	movl	$0x80000000,%edx	/* round up */
120
121sqrt_prelim_no_adjust:
122	movl	%edx,%esi	/* Our first guess */
123
124/* We have now computed (approx)   (2 + x) / 3, which forms the basis
125   for a few iterations of Newton's method */
126
127	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */
128
129/*
130 * From our initial estimate, three iterations are enough to get us
131 * to 30 bits or so. This will then allow two iterations at better
132 * precision to complete the process.
133 */
134
135/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
136	shrl	%ecx		/* Doing this first will prevent a divide */
137				/* overflow later. */
138
139	movl	%ecx,%edx	/* msw of the arg / 2 */
140	divl	%esi		/* current estimate */
141	shrl	%esi		/* divide by 2 */
142	addl	%eax,%esi	/* the new estimate */
143
144	movl	%ecx,%edx
145	divl	%esi
146	shrl	%esi
147	addl	%eax,%esi
148
149	movl	%ecx,%edx
150	divl	%esi
151	shrl	%esi
152	addl	%eax,%esi
153
154/*
155 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
156 * we improve it to 60 bits or so.
157 *
158 * The strategy from now on is to compute new estimates from
159 *      guess := guess + (n - guess^2) / (2 * guess)
160 */
161
162/* First, find the square of the guess */
163	movl	%esi,%eax
164	mull	%esi
165/* guess^2 now in %edx:%eax */
166
167	movl	FPU_fsqrt_arg_1,%ecx
168	subl	%ecx,%eax
169	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */
170	sbbl	%ecx,%edx
171	jnc	sqrt_stage_2_positive
172
173/* Subtraction gives a negative result,
174   negate the result before division. */
175	notl	%edx
176	notl	%eax
177	addl	$1,%eax
178	adcl	$0,%edx
179
180	divl	%esi
181	movl	%eax,%ecx
182
183	movl	%edx,%eax
184	divl	%esi
185	jmp	sqrt_stage_2_finish
186
187sqrt_stage_2_positive:
188	divl	%esi
189	movl	%eax,%ecx
190
191	movl	%edx,%eax
192	divl	%esi
193
194	notl	%ecx
195	notl	%eax
196	addl	$1,%eax
197	adcl	$0,%ecx
198
199sqrt_stage_2_finish:
200	sarl	$1,%ecx		/* divide by 2 */
201	rcrl	$1,%eax
202
203	/* Form the new estimate in %esi:%edi */
204	movl	%eax,%edi
205	addl	%ecx,%esi
206
207	jnz	sqrt_stage_2_done	/* result should be [1..2) */
208
209#ifdef PARANOID
210/* It should be possible to get here only if the arg is ffff....ffff */
211	cmp	$0xffffffff,FPU_fsqrt_arg_1
212	jnz	sqrt_stage_2_error
213#endif /* PARANOID */
214
215/* The best rounded result. */
216	xorl	%eax,%eax
217	decl	%eax
218	movl	%eax,%edi
219	movl	%eax,%esi
220	movl	$0x7fffffff,%eax
221	jmp	sqrt_round_result
222
223#ifdef PARANOID
224sqrt_stage_2_error:
225	pushl	EX_INTERNAL|0x213
226	call	EXCEPTION
227#endif /* PARANOID */
228
229sqrt_stage_2_done:
230
231/* Now the square root has been computed to better than 60 bits. */
232
233/* Find the square of the guess. */
234	movl	%edi,%eax		/* ls word of guess */
235	mull	%edi
236	movl	%edx,FPU_accum_1
237
238	movl	%esi,%eax
239	mull	%esi
240	movl	%edx,FPU_accum_3
241	movl	%eax,FPU_accum_2
242
243	movl	%edi,%eax
244	mull	%esi
245	addl	%eax,FPU_accum_1
246	adcl	%edx,FPU_accum_2
247	adcl	$0,FPU_accum_3
248
249/*	movl	%esi,%eax */
250/*	mull	%edi */
251	addl	%eax,FPU_accum_1
252	adcl	%edx,FPU_accum_2
253	adcl	$0,FPU_accum_3
254
255/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
256
257	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */
258	subl	%eax,FPU_accum_1
259	movl	FPU_fsqrt_arg_1,%eax
260	sbbl	%eax,FPU_accum_2
261	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */
262	sbbl	%eax,FPU_accum_3
263	jnc	sqrt_stage_3_positive
264
265/* Subtraction gives a negative result,
266   negate the result before division */
267	notl	FPU_accum_1
268	notl	FPU_accum_2
269	notl	FPU_accum_3
270	addl	$1,FPU_accum_1
271	adcl	$0,FPU_accum_2
272
273#ifdef PARANOID
274	adcl	$0,FPU_accum_3	/* This must be zero */
275	jz	sqrt_stage_3_no_error
276
277sqrt_stage_3_error:
278	pushl	EX_INTERNAL|0x207
279	call	EXCEPTION
280
281sqrt_stage_3_no_error:
282#endif /* PARANOID */
283
284	movl	FPU_accum_2,%edx
285	movl	FPU_accum_1,%eax
286	divl	%esi
287	movl	%eax,%ecx
288
289	movl	%edx,%eax
290	divl	%esi
291
292	sarl	$1,%ecx		/* divide by 2 */
293	rcrl	$1,%eax
294
295	/* prepare to round the result */
296
297	addl	%ecx,%edi
298	adcl	$0,%esi
299
300	jmp	sqrt_stage_3_finished
301
302sqrt_stage_3_positive:
303	movl	FPU_accum_2,%edx
304	movl	FPU_accum_1,%eax
305	divl	%esi
306	movl	%eax,%ecx
307
308	movl	%edx,%eax
309	divl	%esi
310
311	sarl	$1,%ecx		/* divide by 2 */
312	rcrl	$1,%eax
313
314	/* prepare to round the result */
315
316	notl	%eax		/* Negate the correction term */
317	notl	%ecx
318	addl	$1,%eax
319	adcl	$0,%ecx		/* carry here ==> correction == 0 */
320	adcl	$0xffffffff,%esi
321
322	addl	%ecx,%edi
323	adcl	$0,%esi
324
325sqrt_stage_3_finished:
326
327/*
328 * The result in %esi:%edi:%esi should be good to about 90 bits here,
329 * and the rounding information here does not have sufficient accuracy
330 * in a few rare cases.
331 */
332	cmpl	$0xffffffe0,%eax
333	ja	sqrt_near_exact_x
334
335	cmpl	$0x00000020,%eax
336	jb	sqrt_near_exact
337
338	cmpl	$0x7fffffe0,%eax
339	jb	sqrt_round_result
340
341	cmpl	$0x80000020,%eax
342	jb	sqrt_get_more_precision
343
344sqrt_round_result:
345/* Set up for rounding operations */
346	movl	%eax,%edx
347	movl	%esi,%eax
348	movl	%edi,%ebx
349	movl	PARAM1,%edi
350	movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */
351	jmp	fpu_reg_round
352
353
354sqrt_near_exact_x:
355/* First, the estimate must be rounded up. */
356	addl	$1,%edi
357	adcl	$0,%esi
358
359sqrt_near_exact:
360/*
361 * This is an easy case because x^1/2 is monotonic.
362 * We need just find the square of our estimate, compare it
363 * with the argument, and deduce whether our estimate is
364 * above, below, or exact. We use the fact that the estimate
365 * is known to be accurate to about 90 bits.
366 */
367	movl	%edi,%eax		/* ls word of guess */
368	mull	%edi
369	movl	%edx,%ebx		/* 2nd ls word of square */
370	movl	%eax,%ecx		/* ls word of square */
371
372	movl	%edi,%eax
373	mull	%esi
374	addl	%eax,%ebx
375	addl	%eax,%ebx
376
377#ifdef PARANOID
378	cmp	$0xffffffb0,%ebx
379	jb	sqrt_near_exact_ok
380
381	cmp	$0x00000050,%ebx
382	ja	sqrt_near_exact_ok
383
384	pushl	EX_INTERNAL|0x214
385	call	EXCEPTION
386
387sqrt_near_exact_ok:
388#endif /* PARANOID */
389
390	or	%ebx,%ebx
391	js	sqrt_near_exact_small
392
393	jnz	sqrt_near_exact_large
394
395	or	%ebx,%edx
396	jnz	sqrt_near_exact_large
397
398/* Our estimate is exactly the right answer */
399	xorl	%eax,%eax
400	jmp	sqrt_round_result
401
402sqrt_near_exact_small:
403/* Our estimate is too small */
404	movl	$0x000000ff,%eax
405	jmp	sqrt_round_result
406
407sqrt_near_exact_large:
408/* Our estimate is too large, we need to decrement it */
409	subl	$1,%edi
410	sbbl	$0,%esi
411	movl	$0xffffff00,%eax
412	jmp	sqrt_round_result
413
414
415sqrt_get_more_precision:
416/* This case is almost the same as the above, except we start
417   with an extra bit of precision in the estimate. */
418	stc			/* The extra bit. */
419	rcll	$1,%edi		/* Shift the estimate left one bit */
420	rcll	$1,%esi
421
422	movl	%edi,%eax		/* ls word of guess */
423	mull	%edi
424	movl	%edx,%ebx		/* 2nd ls word of square */
425	movl	%eax,%ecx		/* ls word of square */
426
427	movl	%edi,%eax
428	mull	%esi
429	addl	%eax,%ebx
430	addl	%eax,%ebx
431
432/* Put our estimate back to its original value */
433	stc			/* The ms bit. */
434	rcrl	$1,%esi		/* Shift the estimate left one bit */
435	rcrl	$1,%edi
436
437#ifdef PARANOID
438	cmp	$0xffffff60,%ebx
439	jb	sqrt_more_prec_ok
440
441	cmp	$0x000000a0,%ebx
442	ja	sqrt_more_prec_ok
443
444	pushl	EX_INTERNAL|0x215
445	call	EXCEPTION
446
447sqrt_more_prec_ok:
448#endif /* PARANOID */
449
450	or	%ebx,%ebx
451	js	sqrt_more_prec_small
452
453	jnz	sqrt_more_prec_large
454
455	or	%ebx,%ecx
456	jnz	sqrt_more_prec_large
457
458/* Our estimate is exactly the right answer */
459	movl	$0x80000000,%eax
460	jmp	sqrt_round_result
461
462sqrt_more_prec_small:
463/* Our estimate is too small */
464	movl	$0x800000ff,%eax
465	jmp	sqrt_round_result
466
467sqrt_more_prec_large:
468/* Our estimate is too large */
469	movl	$0x7fffff00,%eax
470	jmp	sqrt_round_result
471