root/arch/sh/lib/udivsi3_i4i-Os.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
   2  *
   3  * Copyright (C) 2006 Free Software Foundation, Inc.
   4  */
   5 
   6 /* Moderately Space-optimized libgcc routines for the Renesas SH /
   7    STMicroelectronics ST40 CPUs.
   8    Contributed by J"orn Rennecke joern.rennecke@st.com.  */
   9 
  10 /* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
  11    sh4-200 run times:
  12    udiv small divisor: 55 cycles
  13    udiv large divisor: 52 cycles
  14    sdiv small divisor, positive result: 59 cycles
  15    sdiv large divisor, positive result: 56 cycles
  16    sdiv small divisor, negative result: 65 cycles (*)
  17    sdiv large divisor, negative result: 62 cycles (*)
  18    (*): r2 is restored in the rts delay slot and has a lingering latency
  19         of two more cycles.  */
  20         .balign 4
  21         .global __udivsi3_i4i
  22         .global __udivsi3_i4
  23         .set    __udivsi3_i4, __udivsi3_i4i
  24         .type   __udivsi3_i4i, @function
  25         .type   __sdivsi3_i4i, @function
  26 __udivsi3_i4i:
  27         sts pr,r1
  28         mov.l r4,@-r15
  29         extu.w r5,r0
  30         cmp/eq r5,r0
  31         swap.w r4,r0
  32         shlr16 r4
  33         bf/s large_divisor
  34         div0u
  35         mov.l r5,@-r15
  36         shll16 r5
  37 sdiv_small_divisor:
  38         div1 r5,r4
  39         bsr div6
  40         div1 r5,r4
  41         div1 r5,r4
  42         bsr div6
  43         div1 r5,r4
  44         xtrct r4,r0
  45         xtrct r0,r4
  46         bsr div7
  47         swap.w r4,r4
  48         div1 r5,r4
  49         bsr div7
  50         div1 r5,r4
  51         xtrct r4,r0
  52         mov.l @r15+,r5
  53         swap.w r0,r0
  54         mov.l @r15+,r4
  55         jmp @r1
  56         rotcl r0
  57 div7:
  58         div1 r5,r4
  59 div6:
  60                     div1 r5,r4; div1 r5,r4; div1 r5,r4
  61         div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4
  62 
  63 divx3:
  64         rotcl r0
  65         div1 r5,r4
  66         rotcl r0
  67         div1 r5,r4
  68         rotcl r0
  69         rts
  70         div1 r5,r4
  71 
  72 large_divisor:
  73         mov.l r5,@-r15
  74 sdiv_large_divisor:
  75         xor r4,r0
  76         .rept 4
  77         rotcl r0
  78         bsr divx3
  79         div1 r5,r4
  80         .endr
  81         mov.l @r15+,r5
  82         mov.l @r15+,r4
  83         jmp @r1
  84         rotcl r0
  85 
  86         .global __sdivsi3_i4i
  87         .global __sdivsi3_i4
  88         .global __sdivsi3
  89         .set    __sdivsi3_i4, __sdivsi3_i4i
  90         .set    __sdivsi3, __sdivsi3_i4i
  91 __sdivsi3_i4i:
  92         mov.l r4,@-r15
  93         cmp/pz r5
  94         mov.l r5,@-r15
  95         bt/s pos_divisor
  96         cmp/pz r4
  97         neg r5,r5
  98         extu.w r5,r0
  99         bt/s neg_result
 100         cmp/eq r5,r0
 101         neg r4,r4
 102 pos_result:
 103         swap.w r4,r0
 104         bra sdiv_check_divisor
 105         sts pr,r1
 106 pos_divisor:
 107         extu.w r5,r0
 108         bt/s pos_result
 109         cmp/eq r5,r0
 110         neg r4,r4
 111 neg_result:
 112         mova negate_result,r0
 113         ;
 114         mov r0,r1
 115         swap.w r4,r0
 116         lds r2,macl
 117         sts pr,r2
 118 sdiv_check_divisor:
 119         shlr16 r4
 120         bf/s sdiv_large_divisor
 121         div0u
 122         bra sdiv_small_divisor
 123         shll16 r5
 124         .balign 4
 125 negate_result:
 126         neg r0,r0
 127         jmp @r2
 128         sts macl,r2

/* [<][>][^][v][top][bottom][index][help] */