root/arch/x86/math-emu/polynom_Xsig.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /*---------------------------------------------------------------------------+
   3  |  polynomial_Xsig.S                                                        |
   4  |                                                                           |
   5  | Fixed point arithmetic polynomial evaluation.                             |
   6  |                                                                           |
   7  | Copyright (C) 1992,1993,1994,1995                                         |
   8  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
   9  |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
  10  |                                                                           |
  11  | Call from C as:                                                           |
  12  |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
  13  |                        unsigned long long terms[], int n)                 |
  14  |                                                                           |
  15  | Computes:                                                                 |
  16  | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
  17  | and adds the result to the 12 byte Xsig.                                  |
  18  | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
  19  | precision.                                                                |
  20  |                                                                           |
  21  | This function must be used carefully: most overflow of intermediate       |
  22  | results is controlled, but overflow of the result is not.                 |
  23  |                                                                           |
  24  +---------------------------------------------------------------------------*/
  25         .file   "polynomial_Xsig.S"
  26 
  27 #include "fpu_emu.h"
  28 
  29 
  30 #define TERM_SIZE       $8
  31 #define SUM_MS          -20(%ebp)       /* sum ms long */
  32 #define SUM_MIDDLE      -24(%ebp)       /* sum middle long */
  33 #define SUM_LS          -28(%ebp)       /* sum ls long */
  34 #define ACCUM_MS        -4(%ebp)        /* accum ms long */
  35 #define ACCUM_MIDDLE    -8(%ebp)        /* accum middle long */
  36 #define ACCUM_LS        -12(%ebp)       /* accum ls long */
  37 #define OVERFLOWED      -16(%ebp)       /* addition overflow flag */
  38 
  39 .text
  40 ENTRY(polynomial_Xsig)
  41         pushl   %ebp
  42         movl    %esp,%ebp
  43         subl    $32,%esp
  44         pushl   %esi
  45         pushl   %edi
  46         pushl   %ebx
  47 
  48         movl    PARAM2,%esi             /* x */
  49         movl    PARAM3,%edi             /* terms */
  50 
  51         movl    TERM_SIZE,%eax
  52         mull    PARAM4                  /* n */
  53         addl    %eax,%edi
  54 
  55         movl    4(%edi),%edx            /* terms[n] */
  56         movl    %edx,SUM_MS
  57         movl    (%edi),%edx             /* terms[n] */
  58         movl    %edx,SUM_MIDDLE
  59         xor     %eax,%eax
  60         movl    %eax,SUM_LS
  61         movb    %al,OVERFLOWED
  62 
  63         subl    TERM_SIZE,%edi
  64         decl    PARAM4
  65         js      L_accum_done
  66 
  67 L_accum_loop:
  68         xor     %eax,%eax
  69         movl    %eax,ACCUM_MS
  70         movl    %eax,ACCUM_MIDDLE
  71 
  72         movl    SUM_MIDDLE,%eax
  73         mull    (%esi)                  /* x ls long */
  74         movl    %edx,ACCUM_LS
  75 
  76         movl    SUM_MIDDLE,%eax
  77         mull    4(%esi)                 /* x ms long */
  78         addl    %eax,ACCUM_LS
  79         adcl    %edx,ACCUM_MIDDLE
  80         adcl    $0,ACCUM_MS
  81 
  82         movl    SUM_MS,%eax
  83         mull    (%esi)                  /* x ls long */
  84         addl    %eax,ACCUM_LS
  85         adcl    %edx,ACCUM_MIDDLE
  86         adcl    $0,ACCUM_MS
  87 
  88         movl    SUM_MS,%eax
  89         mull    4(%esi)                 /* x ms long */
  90         addl    %eax,ACCUM_MIDDLE
  91         adcl    %edx,ACCUM_MS
  92 
  93         testb   $0xff,OVERFLOWED
  94         jz      L_no_overflow
  95 
  96         movl    (%esi),%eax
  97         addl    %eax,ACCUM_MIDDLE
  98         movl    4(%esi),%eax
  99         adcl    %eax,ACCUM_MS           /* This could overflow too */
 100 
 101 L_no_overflow:
 102 
 103 /*
 104  * Now put the sum of next term and the accumulator
 105  * into the sum register
 106  */
 107         movl    ACCUM_LS,%eax
 108         addl    (%edi),%eax             /* term ls long */
 109         movl    %eax,SUM_LS
 110         movl    ACCUM_MIDDLE,%eax
 111         adcl    (%edi),%eax             /* term ls long */
 112         movl    %eax,SUM_MIDDLE
 113         movl    ACCUM_MS,%eax
 114         adcl    4(%edi),%eax            /* term ms long */
 115         movl    %eax,SUM_MS
 116         sbbb    %al,%al
 117         movb    %al,OVERFLOWED          /* Used in the next iteration */
 118 
 119         subl    TERM_SIZE,%edi
 120         decl    PARAM4
 121         jns     L_accum_loop
 122 
 123 L_accum_done:
 124         movl    PARAM1,%edi             /* accum */
 125         movl    SUM_LS,%eax
 126         addl    %eax,(%edi)
 127         movl    SUM_MIDDLE,%eax
 128         adcl    %eax,4(%edi)
 129         movl    SUM_MS,%eax
 130         adcl    %eax,8(%edi)
 131 
 132         popl    %ebx
 133         popl    %edi
 134         popl    %esi
 135         leave
 136         ret
 137 ENDPROC(polynomial_Xsig)

/* [<][>][^][v][top][bottom][index][help] */