root/arch/arm/lib/lib1funcs.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /*
   2  * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
   3  *
   4  * Author: Nicolas Pitre <nico@fluxnic.net>
   5  *   - contributed to gcc-3.4 on Sep 30, 2003
   6  *   - adapted for the Linux kernel on Oct 2, 2003
   7  */
   8 
   9 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
  10 
  11 This file is free software; you can redistribute it and/or modify it
  12 under the terms of the GNU General Public License as published by the
  13 Free Software Foundation; either version 2, or (at your option) any
  14 later version.
  15 
  16 In addition to the permissions in the GNU General Public License, the
  17 Free Software Foundation gives you unlimited permission to link the
  18 compiled version of this file into combinations with other programs,
  19 and to distribute those combinations without any restriction coming
  20 from the use of this file.  (The General Public License restrictions
  21 do apply in other respects; for example, they cover modification of
  22 the file, and distribution when not linked into a combine
  23 executable.)
  24 
  25 This file is distributed in the hope that it will be useful, but
  26 WITHOUT ANY WARRANTY; without even the implied warranty of
  27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  28 General Public License for more details.
  29 
  30 You should have received a copy of the GNU General Public License
  31 along with this program; see the file COPYING.  If not, write to
  32 the Free Software Foundation, 59 Temple Place - Suite 330,
  33 Boston, MA 02111-1307, USA.  */
  34 
  35 
  36 #include <linux/linkage.h>
  37 #include <asm/assembler.h>
  38 #include <asm/unwind.h>
  39 
  40 .macro ARM_DIV_BODY dividend, divisor, result, curbit
  41 
  42 #if __LINUX_ARM_ARCH__ >= 5
  43 
  44         clz     \curbit, \divisor
  45         clz     \result, \dividend
  46         sub     \result, \curbit, \result
  47         mov     \curbit, #1
  48         mov     \divisor, \divisor, lsl \result
  49         mov     \curbit, \curbit, lsl \result
  50         mov     \result, #0
  51         
  52 #else
  53 
  54         @ Initially shift the divisor left 3 bits if possible,
  55         @ set curbit accordingly.  This allows for curbit to be located
  56         @ at the left end of each 4 bit nibbles in the division loop
  57         @ to save one loop in most cases.
  58         tst     \divisor, #0xe0000000
  59         moveq   \divisor, \divisor, lsl #3
  60         moveq   \curbit, #8
  61         movne   \curbit, #1
  62 
  63         @ Unless the divisor is very big, shift it up in multiples of
  64         @ four bits, since this is the amount of unwinding in the main
  65         @ division loop.  Continue shifting until the divisor is 
  66         @ larger than the dividend.
  67 1:      cmp     \divisor, #0x10000000
  68         cmplo   \divisor, \dividend
  69         movlo   \divisor, \divisor, lsl #4
  70         movlo   \curbit, \curbit, lsl #4
  71         blo     1b
  72 
  73         @ For very big divisors, we must shift it a bit at a time, or
  74         @ we will be in danger of overflowing.
  75 1:      cmp     \divisor, #0x80000000
  76         cmplo   \divisor, \dividend
  77         movlo   \divisor, \divisor, lsl #1
  78         movlo   \curbit, \curbit, lsl #1
  79         blo     1b
  80 
  81         mov     \result, #0
  82 
  83 #endif
  84 
  85         @ Division loop
  86 1:      cmp     \dividend, \divisor
  87         subhs   \dividend, \dividend, \divisor
  88         orrhs   \result,   \result,   \curbit
  89         cmp     \dividend, \divisor,  lsr #1
  90         subhs   \dividend, \dividend, \divisor, lsr #1
  91         orrhs   \result,   \result,   \curbit,  lsr #1
  92         cmp     \dividend, \divisor,  lsr #2
  93         subhs   \dividend, \dividend, \divisor, lsr #2
  94         orrhs   \result,   \result,   \curbit,  lsr #2
  95         cmp     \dividend, \divisor,  lsr #3
  96         subhs   \dividend, \dividend, \divisor, lsr #3
  97         orrhs   \result,   \result,   \curbit,  lsr #3
  98         cmp     \dividend, #0                   @ Early termination?
  99         movsne  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
 100         movne   \divisor,  \divisor, lsr #4
 101         bne     1b
 102 
 103 .endm
 104 
 105 
 106 .macro ARM_DIV2_ORDER divisor, order
 107 
 108 #if __LINUX_ARM_ARCH__ >= 5
 109 
 110         clz     \order, \divisor
 111         rsb     \order, \order, #31
 112 
 113 #else
 114 
 115         cmp     \divisor, #(1 << 16)
 116         movhs   \divisor, \divisor, lsr #16
 117         movhs   \order, #16
 118         movlo   \order, #0
 119 
 120         cmp     \divisor, #(1 << 8)
 121         movhs   \divisor, \divisor, lsr #8
 122         addhs   \order, \order, #8
 123 
 124         cmp     \divisor, #(1 << 4)
 125         movhs   \divisor, \divisor, lsr #4
 126         addhs   \order, \order, #4
 127 
 128         cmp     \divisor, #(1 << 2)
 129         addhi   \order, \order, #3
 130         addls   \order, \order, \divisor, lsr #1
 131 
 132 #endif
 133 
 134 .endm
 135 
 136 
 137 .macro ARM_MOD_BODY dividend, divisor, order, spare
 138 
 139 #if __LINUX_ARM_ARCH__ >= 5
 140 
 141         clz     \order, \divisor
 142         clz     \spare, \dividend
 143         sub     \order, \order, \spare
 144         mov     \divisor, \divisor, lsl \order
 145 
 146 #else
 147 
 148         mov     \order, #0
 149 
 150         @ Unless the divisor is very big, shift it up in multiples of
 151         @ four bits, since this is the amount of unwinding in the main
 152         @ division loop.  Continue shifting until the divisor is 
 153         @ larger than the dividend.
 154 1:      cmp     \divisor, #0x10000000
 155         cmplo   \divisor, \dividend
 156         movlo   \divisor, \divisor, lsl #4
 157         addlo   \order, \order, #4
 158         blo     1b
 159 
 160         @ For very big divisors, we must shift it a bit at a time, or
 161         @ we will be in danger of overflowing.
 162 1:      cmp     \divisor, #0x80000000
 163         cmplo   \divisor, \dividend
 164         movlo   \divisor, \divisor, lsl #1
 165         addlo   \order, \order, #1
 166         blo     1b
 167 
 168 #endif
 169 
 170         @ Perform all needed subtractions to keep only the reminder.
 171         @ Do comparisons in batch of 4 first.
 172         subs    \order, \order, #3              @ yes, 3 is intended here
 173         blt     2f
 174 
 175 1:      cmp     \dividend, \divisor
 176         subhs   \dividend, \dividend, \divisor
 177         cmp     \dividend, \divisor,  lsr #1
 178         subhs   \dividend, \dividend, \divisor, lsr #1
 179         cmp     \dividend, \divisor,  lsr #2
 180         subhs   \dividend, \dividend, \divisor, lsr #2
 181         cmp     \dividend, \divisor,  lsr #3
 182         subhs   \dividend, \dividend, \divisor, lsr #3
 183         cmp     \dividend, #1
 184         mov     \divisor, \divisor, lsr #4
 185         subsge  \order, \order, #4
 186         bge     1b
 187 
 188         tst     \order, #3
 189         teqne   \dividend, #0
 190         beq     5f
 191 
 192         @ Either 1, 2 or 3 comparison/subtractions are left.
 193 2:      cmn     \order, #2
 194         blt     4f
 195         beq     3f
 196         cmp     \dividend, \divisor
 197         subhs   \dividend, \dividend, \divisor
 198         mov     \divisor,  \divisor,  lsr #1
 199 3:      cmp     \dividend, \divisor
 200         subhs   \dividend, \dividend, \divisor
 201         mov     \divisor,  \divisor,  lsr #1
 202 4:      cmp     \dividend, \divisor
 203         subhs   \dividend, \dividend, \divisor
 204 5:
 205 .endm
 206 
 207 
 208 #ifdef CONFIG_ARM_PATCH_IDIV
 209         .align  3
 210 #endif
 211 
 212 ENTRY(__udivsi3)
 213 ENTRY(__aeabi_uidiv)
 214 UNWIND(.fnstart)
 215 
 216         subs    r2, r1, #1
 217         reteq   lr
 218         bcc     Ldiv0
 219         cmp     r0, r1
 220         bls     11f
 221         tst     r1, r2
 222         beq     12f
 223 
 224         ARM_DIV_BODY r0, r1, r2, r3
 225 
 226         mov     r0, r2
 227         ret     lr
 228 
 229 11:     moveq   r0, #1
 230         movne   r0, #0
 231         ret     lr
 232 
 233 12:     ARM_DIV2_ORDER r1, r2
 234 
 235         mov     r0, r0, lsr r2
 236         ret     lr
 237 
 238 UNWIND(.fnend)
 239 ENDPROC(__udivsi3)
 240 ENDPROC(__aeabi_uidiv)
 241 
 242 ENTRY(__umodsi3)
 243 UNWIND(.fnstart)
 244 
 245         subs    r2, r1, #1                      @ compare divisor with 1
 246         bcc     Ldiv0
 247         cmpne   r0, r1                          @ compare dividend with divisor
 248         moveq   r0, #0
 249         tsthi   r1, r2                          @ see if divisor is power of 2
 250         andeq   r0, r0, r2
 251         retls   lr
 252 
 253         ARM_MOD_BODY r0, r1, r2, r3
 254 
 255         ret     lr
 256 
 257 UNWIND(.fnend)
 258 ENDPROC(__umodsi3)
 259 
 260 #ifdef CONFIG_ARM_PATCH_IDIV
 261         .align 3
 262 #endif
 263 
 264 ENTRY(__divsi3)
 265 ENTRY(__aeabi_idiv)
 266 UNWIND(.fnstart)
 267 
 268         cmp     r1, #0
 269         eor     ip, r0, r1                      @ save the sign of the result.
 270         beq     Ldiv0
 271         rsbmi   r1, r1, #0                      @ loops below use unsigned.
 272         subs    r2, r1, #1                      @ division by 1 or -1 ?
 273         beq     10f
 274         movs    r3, r0
 275         rsbmi   r3, r0, #0                      @ positive dividend value
 276         cmp     r3, r1
 277         bls     11f
 278         tst     r1, r2                          @ divisor is power of 2 ?
 279         beq     12f
 280 
 281         ARM_DIV_BODY r3, r1, r0, r2
 282 
 283         cmp     ip, #0
 284         rsbmi   r0, r0, #0
 285         ret     lr
 286 
 287 10:     teq     ip, r0                          @ same sign ?
 288         rsbmi   r0, r0, #0
 289         ret     lr
 290 
 291 11:     movlo   r0, #0
 292         moveq   r0, ip, asr #31
 293         orreq   r0, r0, #1
 294         ret     lr
 295 
 296 12:     ARM_DIV2_ORDER r1, r2
 297 
 298         cmp     ip, #0
 299         mov     r0, r3, lsr r2
 300         rsbmi   r0, r0, #0
 301         ret     lr
 302 
 303 UNWIND(.fnend)
 304 ENDPROC(__divsi3)
 305 ENDPROC(__aeabi_idiv)
 306 
 307 ENTRY(__modsi3)
 308 UNWIND(.fnstart)
 309 
 310         cmp     r1, #0
 311         beq     Ldiv0
 312         rsbmi   r1, r1, #0                      @ loops below use unsigned.
 313         movs    ip, r0                          @ preserve sign of dividend
 314         rsbmi   r0, r0, #0                      @ if negative make positive
 315         subs    r2, r1, #1                      @ compare divisor with 1
 316         cmpne   r0, r1                          @ compare dividend with divisor
 317         moveq   r0, #0
 318         tsthi   r1, r2                          @ see if divisor is power of 2
 319         andeq   r0, r0, r2
 320         bls     10f
 321 
 322         ARM_MOD_BODY r0, r1, r2, r3
 323 
 324 10:     cmp     ip, #0
 325         rsbmi   r0, r0, #0
 326         ret     lr
 327 
 328 UNWIND(.fnend)
 329 ENDPROC(__modsi3)
 330 
 331 #ifdef CONFIG_AEABI
 332 
 333 ENTRY(__aeabi_uidivmod)
 334 UNWIND(.fnstart)
 335 UNWIND(.save {r0, r1, ip, lr}   )
 336 
 337         stmfd   sp!, {r0, r1, ip, lr}
 338         bl      __aeabi_uidiv
 339         ldmfd   sp!, {r1, r2, ip, lr}
 340         mul     r3, r0, r2
 341         sub     r1, r1, r3
 342         ret     lr
 343 
 344 UNWIND(.fnend)
 345 ENDPROC(__aeabi_uidivmod)
 346 
 347 ENTRY(__aeabi_idivmod)
 348 UNWIND(.fnstart)
 349 UNWIND(.save {r0, r1, ip, lr}   )
 350         stmfd   sp!, {r0, r1, ip, lr}
 351         bl      __aeabi_idiv
 352         ldmfd   sp!, {r1, r2, ip, lr}
 353         mul     r3, r0, r2
 354         sub     r1, r1, r3
 355         ret     lr
 356 
 357 UNWIND(.fnend)
 358 ENDPROC(__aeabi_idivmod)
 359 
 360 #endif
 361 
 362 Ldiv0:
 363 UNWIND(.fnstart)
 364 UNWIND(.pad #4)
 365 UNWIND(.save {lr})
 366         str     lr, [sp, #-8]!
 367         bl      __div0
 368         mov     r0, #0                  @ About as wrong as it could be.
 369         ldr     pc, [sp], #8
 370 UNWIND(.fnend)
 371 ENDPROC(Ldiv0)

/* [<][>][^][v][top][bottom][index][help] */