root/arch/powerpc/lib/checksum_32.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /* SPDX-License-Identifier: GPL-2.0-or-later */
   2 /*
   3  * This file contains assembly-language implementations
   4  * of IP-style 1's complement checksum routines.
   5  *      
   6  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
   7  *
   8  * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
   9  */
  10 
  11 #include <linux/sys.h>
  12 #include <asm/processor.h>
  13 #include <asm/cache.h>
  14 #include <asm/errno.h>
  15 #include <asm/ppc_asm.h>
  16 #include <asm/export.h>
  17 
  18         .text
  19 
  20 /*
  21  * computes the checksum of a memory block at buff, length len,
  22  * and adds in "sum" (32-bit)
  23  *
  24  * __csum_partial(buff, len, sum)
  25  */
  26 _GLOBAL(__csum_partial)
  27         subi    r3,r3,4
  28         srawi.  r6,r4,2         /* Divide len by 4 and also clear carry */
  29         beq     3f              /* if we're doing < 4 bytes */
  30         andi.   r0,r3,2         /* Align buffer to longword boundary */
  31         beq+    1f
  32         lhz     r0,4(r3)        /* do 2 bytes to get aligned */
  33         subi    r4,r4,2
  34         addi    r3,r3,2
  35         srwi.   r6,r4,2         /* # words to do */
  36         adde    r5,r5,r0
  37         beq     3f
  38 1:      andi.   r6,r6,3         /* Prepare to handle words 4 by 4 */
  39         beq     21f
  40         mtctr   r6
  41 2:      lwzu    r0,4(r3)
  42         adde    r5,r5,r0
  43         bdnz    2b
  44 21:     srwi.   r6,r4,4         /* # blocks of 4 words to do */
  45         beq     3f
  46         lwz     r0,4(r3)
  47         mtctr   r6
  48         lwz     r6,8(r3)
  49         adde    r5,r5,r0
  50         lwz     r7,12(r3)
  51         adde    r5,r5,r6
  52         lwzu    r8,16(r3)
  53         adde    r5,r5,r7
  54         bdz     23f
  55 22:     lwz     r0,4(r3)
  56         adde    r5,r5,r8
  57         lwz     r6,8(r3)
  58         adde    r5,r5,r0
  59         lwz     r7,12(r3)
  60         adde    r5,r5,r6
  61         lwzu    r8,16(r3)
  62         adde    r5,r5,r7
  63         bdnz    22b
  64 23:     adde    r5,r5,r8
  65 3:      andi.   r0,r4,2
  66         beq+    4f
  67         lhz     r0,4(r3)
  68         addi    r3,r3,2
  69         adde    r5,r5,r0
  70 4:      andi.   r0,r4,1
  71         beq+    5f
  72         lbz     r0,4(r3)
  73         slwi    r0,r0,8         /* Upper byte of word */
  74         adde    r5,r5,r0
  75 5:      addze   r3,r5           /* add in final carry */
  76         blr
  77 EXPORT_SYMBOL(__csum_partial)
  78 
  79 /*
  80  * Computes the checksum of a memory block at src, length len,
  81  * and adds in "sum" (32-bit), while copying the block to dst.
  82  * If an access exception occurs on src or dst, it stores -EFAULT
  83  * to *src_err or *dst_err respectively, and (for an error on
  84  * src) zeroes the rest of dst.
  85  *
  86  * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
  87  */
  88 #define CSUM_COPY_16_BYTES_WITHEX(n)    \
  89 8 ## n ## 0:                    \
  90         lwz     r7,4(r4);       \
  91 8 ## n ## 1:                    \
  92         lwz     r8,8(r4);       \
  93 8 ## n ## 2:                    \
  94         lwz     r9,12(r4);      \
  95 8 ## n ## 3:                    \
  96         lwzu    r10,16(r4);     \
  97 8 ## n ## 4:                    \
  98         stw     r7,4(r6);       \
  99         adde    r12,r12,r7;     \
 100 8 ## n ## 5:                    \
 101         stw     r8,8(r6);       \
 102         adde    r12,r12,r8;     \
 103 8 ## n ## 6:                    \
 104         stw     r9,12(r6);      \
 105         adde    r12,r12,r9;     \
 106 8 ## n ## 7:                    \
 107         stwu    r10,16(r6);     \
 108         adde    r12,r12,r10
 109 
 110 #define CSUM_COPY_16_BYTES_EXCODE(n)            \
 111         EX_TABLE(8 ## n ## 0b, src_error);      \
 112         EX_TABLE(8 ## n ## 1b, src_error);      \
 113         EX_TABLE(8 ## n ## 2b, src_error);      \
 114         EX_TABLE(8 ## n ## 3b, src_error);      \
 115         EX_TABLE(8 ## n ## 4b, dst_error);      \
 116         EX_TABLE(8 ## n ## 5b, dst_error);      \
 117         EX_TABLE(8 ## n ## 6b, dst_error);      \
 118         EX_TABLE(8 ## n ## 7b, dst_error);
 119 
 120         .text
 121         .stabs  "arch/powerpc/lib/",N_SO,0,0,0f
 122         .stabs  "checksum_32.S",N_SO,0,0,0f
 123 0:
 124 
 125 CACHELINE_BYTES = L1_CACHE_BYTES
 126 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 127 CACHELINE_MASK = (L1_CACHE_BYTES-1)
 128 
 129 _GLOBAL(csum_partial_copy_generic)
 130         stwu    r1,-16(r1)
 131         stw     r7,12(r1)
 132         stw     r8,8(r1)
 133 
 134         addic   r12,r6,0
 135         addi    r6,r4,-4
 136         neg     r0,r4
 137         addi    r4,r3,-4
 138         andi.   r0,r0,CACHELINE_MASK    /* # bytes to start of cache line */
 139         crset   4*cr7+eq
 140         beq     58f
 141 
 142         cmplw   0,r5,r0                 /* is this more than total to do? */
 143         blt     63f                     /* if not much to do */
 144         rlwinm  r7,r6,3,0x8
 145         rlwnm   r12,r12,r7,0,31 /* odd destination address: rotate one byte */
 146         cmplwi  cr7,r7,0        /* is destination address even ? */
 147         andi.   r8,r0,3                 /* get it word-aligned first */
 148         mtctr   r8
 149         beq+    61f
 150         li      r3,0
 151 70:     lbz     r9,4(r4)                /* do some bytes */
 152         addi    r4,r4,1
 153         slwi    r3,r3,8
 154         rlwimi  r3,r9,0,24,31
 155 71:     stb     r9,4(r6)
 156         addi    r6,r6,1
 157         bdnz    70b
 158         adde    r12,r12,r3
 159 61:     subf    r5,r0,r5
 160         srwi.   r0,r0,2
 161         mtctr   r0
 162         beq     58f
 163 72:     lwzu    r9,4(r4)                /* do some words */
 164         adde    r12,r12,r9
 165 73:     stwu    r9,4(r6)
 166         bdnz    72b
 167 
 168 58:     srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
 169         clrlwi  r5,r5,32-LG_CACHELINE_BYTES
 170         li      r11,4
 171         beq     63f
 172 
 173         /* Here we decide how far ahead to prefetch the source */
 174         li      r3,4
 175         cmpwi   r0,1
 176         li      r7,0
 177         ble     114f
 178         li      r7,1
 179 #if MAX_COPY_PREFETCH > 1
 180         /* Heuristically, for large transfers we prefetch
 181            MAX_COPY_PREFETCH cachelines ahead.  For small transfers
 182            we prefetch 1 cacheline ahead. */
 183         cmpwi   r0,MAX_COPY_PREFETCH
 184         ble     112f
 185         li      r7,MAX_COPY_PREFETCH
 186 112:    mtctr   r7
 187 111:    dcbt    r3,r4
 188         addi    r3,r3,CACHELINE_BYTES
 189         bdnz    111b
 190 #else
 191         dcbt    r3,r4
 192         addi    r3,r3,CACHELINE_BYTES
 193 #endif /* MAX_COPY_PREFETCH > 1 */
 194 
 195 114:    subf    r8,r7,r0
 196         mr      r0,r7
 197         mtctr   r8
 198 
 199 53:     dcbt    r3,r4
 200 54:     dcbz    r11,r6
 201 /* the main body of the cacheline loop */
 202         CSUM_COPY_16_BYTES_WITHEX(0)
 203 #if L1_CACHE_BYTES >= 32
 204         CSUM_COPY_16_BYTES_WITHEX(1)
 205 #if L1_CACHE_BYTES >= 64
 206         CSUM_COPY_16_BYTES_WITHEX(2)
 207         CSUM_COPY_16_BYTES_WITHEX(3)
 208 #if L1_CACHE_BYTES >= 128
 209         CSUM_COPY_16_BYTES_WITHEX(4)
 210         CSUM_COPY_16_BYTES_WITHEX(5)
 211         CSUM_COPY_16_BYTES_WITHEX(6)
 212         CSUM_COPY_16_BYTES_WITHEX(7)
 213 #endif
 214 #endif
 215 #endif
 216         bdnz    53b
 217         cmpwi   r0,0
 218         li      r3,4
 219         li      r7,0
 220         bne     114b
 221 
 222 63:     srwi.   r0,r5,2
 223         mtctr   r0
 224         beq     64f
 225 30:     lwzu    r0,4(r4)
 226         adde    r12,r12,r0
 227 31:     stwu    r0,4(r6)
 228         bdnz    30b
 229 
 230 64:     andi.   r0,r5,2
 231         beq+    65f
 232 40:     lhz     r0,4(r4)
 233         addi    r4,r4,2
 234 41:     sth     r0,4(r6)
 235         adde    r12,r12,r0
 236         addi    r6,r6,2
 237 65:     andi.   r0,r5,1
 238         beq+    66f
 239 50:     lbz     r0,4(r4)
 240 51:     stb     r0,4(r6)
 241         slwi    r0,r0,8
 242         adde    r12,r12,r0
 243 66:     addze   r3,r12
 244         addi    r1,r1,16
 245         beqlr+  cr7
 246         rlwinm  r3,r3,8,0,31    /* odd destination address: rotate one byte */
 247         blr
 248 
 249 /* read fault */
 250 src_error:
 251         lwz     r7,12(r1)
 252         addi    r1,r1,16
 253         cmpwi   cr0,r7,0
 254         beqlr
 255         li      r0,-EFAULT
 256         stw     r0,0(r7)
 257         blr
 258 /* write fault */
 259 dst_error:
 260         lwz     r8,8(r1)
 261         addi    r1,r1,16
 262         cmpwi   cr0,r8,0
 263         beqlr
 264         li      r0,-EFAULT
 265         stw     r0,0(r8)
 266         blr
 267 
 268         EX_TABLE(70b, src_error);
 269         EX_TABLE(71b, dst_error);
 270         EX_TABLE(72b, src_error);
 271         EX_TABLE(73b, dst_error);
 272         EX_TABLE(54b, dst_error);
 273 
 274 /*
 275  * this stuff handles faults in the cacheline loop and branches to either
 276  * src_error (if in read part) or dst_error (if in write part)
 277  */
 278         CSUM_COPY_16_BYTES_EXCODE(0)
 279 #if L1_CACHE_BYTES >= 32
 280         CSUM_COPY_16_BYTES_EXCODE(1)
 281 #if L1_CACHE_BYTES >= 64
 282         CSUM_COPY_16_BYTES_EXCODE(2)
 283         CSUM_COPY_16_BYTES_EXCODE(3)
 284 #if L1_CACHE_BYTES >= 128
 285         CSUM_COPY_16_BYTES_EXCODE(4)
 286         CSUM_COPY_16_BYTES_EXCODE(5)
 287         CSUM_COPY_16_BYTES_EXCODE(6)
 288         CSUM_COPY_16_BYTES_EXCODE(7)
 289 #endif
 290 #endif
 291 #endif
 292 
 293         EX_TABLE(30b, src_error);
 294         EX_TABLE(31b, dst_error);
 295         EX_TABLE(40b, src_error);
 296         EX_TABLE(41b, dst_error);
 297         EX_TABLE(50b, src_error);
 298         EX_TABLE(51b, dst_error);
 299 
 300 EXPORT_SYMBOL(csum_partial_copy_generic)
 301 
 302 /*
 303  * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 304  *                         const struct in6_addr *daddr,
 305  *                         __u32 len, __u8 proto, __wsum sum)
 306  */
 307 
 308 _GLOBAL(csum_ipv6_magic)
 309         lwz     r8, 0(r3)
 310         lwz     r9, 4(r3)
 311         addc    r0, r7, r8
 312         lwz     r10, 8(r3)
 313         adde    r0, r0, r9
 314         lwz     r11, 12(r3)
 315         adde    r0, r0, r10
 316         lwz     r8, 0(r4)
 317         adde    r0, r0, r11
 318         lwz     r9, 4(r4)
 319         adde    r0, r0, r8
 320         lwz     r10, 8(r4)
 321         adde    r0, r0, r9
 322         lwz     r11, 12(r4)
 323         adde    r0, r0, r10
 324         add     r5, r5, r6      /* assumption: len + proto doesn't carry */
 325         adde    r0, r0, r11
 326         adde    r0, r0, r5
 327         addze   r0, r0
 328         rotlwi  r3, r0, 16
 329         add     r3, r0, r3
 330         not     r3, r3
 331         rlwinm  r3, r3, 16, 16, 31
 332         blr
 333 EXPORT_SYMBOL(csum_ipv6_magic)

/* [<][>][^][v][top][bottom][index][help] */