root/arch/ia64/lib/xor.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-or-later */
   2 /*
   3  * arch/ia64/lib/xor.S
   4  *
   5  * Optimized RAID-5 checksumming functions for IA-64.
   6  */
   7 
   8 #include <asm/asmmacro.h>
   9 #include <asm/export.h>
  10 
  11 GLOBAL_ENTRY(xor_ia64_2)
  12         .prologue
  13         .fframe 0
  14         .save ar.pfs, r31
  15         alloc r31 = ar.pfs, 3, 0, 13, 16
  16         .save ar.lc, r30
  17         mov r30 = ar.lc
  18         .save pr, r29
  19         mov r29 = pr
  20         ;;
  21         .body
  22         mov r8 = in1
  23         mov ar.ec = 6 + 2
  24         shr in0 = in0, 3
  25         ;;
  26         adds in0 = -1, in0
  27         mov r16 = in1
  28         mov r17 = in2
  29         ;;
  30         mov ar.lc = in0
  31         mov pr.rot = 1 << 16
  32         ;;
  33         .rotr s1[6+1], s2[6+1], d[2]
  34         .rotp p[6+2]
  35 0:
  36 (p[0])  ld8.nta s1[0] = [r16], 8
  37 (p[0])  ld8.nta s2[0] = [r17], 8
  38 (p[6])  xor d[0] = s1[6], s2[6]
  39 (p[6+1])st8.nta [r8] = d[1], 8
  40         nop.f 0
  41         br.ctop.dptk.few 0b
  42         ;;
  43         mov ar.lc = r30
  44         mov pr = r29, -1
  45         br.ret.sptk.few rp
  46 END(xor_ia64_2)
  47 EXPORT_SYMBOL(xor_ia64_2)
  48 
  49 GLOBAL_ENTRY(xor_ia64_3)
  50         .prologue
  51         .fframe 0
  52         .save ar.pfs, r31
  53         alloc r31 = ar.pfs, 4, 0, 20, 24
  54         .save ar.lc, r30
  55         mov r30 = ar.lc
  56         .save pr, r29
  57         mov r29 = pr
  58         ;;
  59         .body
  60         mov r8 = in1
  61         mov ar.ec = 6 + 2
  62         shr in0 = in0, 3
  63         ;;
  64         adds in0 = -1, in0
  65         mov r16 = in1
  66         mov r17 = in2
  67         ;;
  68         mov r18 = in3
  69         mov ar.lc = in0
  70         mov pr.rot = 1 << 16
  71         ;;
  72         .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
  73         .rotp p[6+2]
  74 0:
  75 (p[0])  ld8.nta s1[0] = [r16], 8
  76 (p[0])  ld8.nta s2[0] = [r17], 8
  77 (p[6])  xor d[0] = s1[6], s2[6]
  78         ;;
  79 (p[0])  ld8.nta s3[0] = [r18], 8
  80 (p[6+1])st8.nta [r8] = d[1], 8
  81 (p[6])  xor d[0] = d[0], s3[6]
  82         br.ctop.dptk.few 0b
  83         ;;
  84         mov ar.lc = r30
  85         mov pr = r29, -1
  86         br.ret.sptk.few rp
  87 END(xor_ia64_3)
  88 EXPORT_SYMBOL(xor_ia64_3)
  89 
  90 GLOBAL_ENTRY(xor_ia64_4)
  91         .prologue
  92         .fframe 0
  93         .save ar.pfs, r31
  94         alloc r31 = ar.pfs, 5, 0, 27, 32
  95         .save ar.lc, r30
  96         mov r30 = ar.lc
  97         .save pr, r29
  98         mov r29 = pr
  99         ;;
 100         .body
 101         mov r8 = in1
 102         mov ar.ec = 6 + 2
 103         shr in0 = in0, 3
 104         ;;
 105         adds in0 = -1, in0
 106         mov r16 = in1
 107         mov r17 = in2
 108         ;;
 109         mov r18 = in3
 110         mov ar.lc = in0
 111         mov pr.rot = 1 << 16
 112         mov r19 = in4
 113         ;;
 114         .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
 115         .rotp p[6+2]
 116 0:
 117 (p[0])  ld8.nta s1[0] = [r16], 8
 118 (p[0])  ld8.nta s2[0] = [r17], 8
 119 (p[6])  xor d[0] = s1[6], s2[6]
 120 (p[0])  ld8.nta s3[0] = [r18], 8
 121 (p[0])  ld8.nta s4[0] = [r19], 8
 122 (p[6])  xor r20 = s3[6], s4[6]
 123         ;;
 124 (p[6+1])st8.nta [r8] = d[1], 8
 125 (p[6])  xor d[0] = d[0], r20
 126         br.ctop.dptk.few 0b
 127         ;;
 128         mov ar.lc = r30
 129         mov pr = r29, -1
 130         br.ret.sptk.few rp
 131 END(xor_ia64_4)
 132 EXPORT_SYMBOL(xor_ia64_4)
 133 
 134 GLOBAL_ENTRY(xor_ia64_5)
 135         .prologue
 136         .fframe 0
 137         .save ar.pfs, r31
 138         alloc r31 = ar.pfs, 6, 0, 34, 40
 139         .save ar.lc, r30
 140         mov r30 = ar.lc
 141         .save pr, r29
 142         mov r29 = pr
 143         ;;
 144         .body
 145         mov r8 = in1
 146         mov ar.ec = 6 + 2
 147         shr in0 = in0, 3
 148         ;;
 149         adds in0 = -1, in0
 150         mov r16 = in1
 151         mov r17 = in2
 152         ;;
 153         mov r18 = in3
 154         mov ar.lc = in0
 155         mov pr.rot = 1 << 16
 156         mov r19 = in4
 157         mov r20 = in5
 158         ;;
 159         .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
 160         .rotp p[6+2]
 161 0:
 162 (p[0])  ld8.nta s1[0] = [r16], 8
 163 (p[0])  ld8.nta s2[0] = [r17], 8
 164 (p[6])  xor d[0] = s1[6], s2[6]
 165 (p[0])  ld8.nta s3[0] = [r18], 8
 166 (p[0])  ld8.nta s4[0] = [r19], 8
 167 (p[6])  xor r21 = s3[6], s4[6]
 168         ;;
 169 (p[0])  ld8.nta s5[0] = [r20], 8
 170 (p[6+1])st8.nta [r8] = d[1], 8
 171 (p[6])  xor d[0] = d[0], r21
 172         ;;
 173 (p[6])    xor d[0] = d[0], s5[6]
 174         nop.f 0
 175         br.ctop.dptk.few 0b
 176         ;;
 177         mov ar.lc = r30
 178         mov pr = r29, -1
 179         br.ret.sptk.few rp
 180 END(xor_ia64_5)
 181 EXPORT_SYMBOL(xor_ia64_5)

/* [<][>][^][v][top][bottom][index][help] */