root/arch/x86/lib/csum-copy_64.S

/* [<][>][^][v][top][bottom][index][help] */
   1 /*
   2  * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
   3  *
   4  * This file is subject to the terms and conditions of the GNU General Public
   5  * License.  See the file COPYING in the main directory of this archive
   6  * for more details. No warranty for anything given at all.
   7  */
   8 #include <linux/linkage.h>
   9 #include <asm/errno.h>
  10 #include <asm/asm.h>
  11 
  12 /*
  13  * Checksum copy with exception handling.
  14  * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
  15  * destination is zeroed.
  16  *
  17  * Input
  18  * rdi  source
  19  * rsi  destination
  20  * edx  len (32bit)
  21  * ecx  sum (32bit)
  22  * r8   src_err_ptr (int)
  23  * r9   dst_err_ptr (int)
  24  *
  25  * Output
  26  * eax  64bit sum. undefined in case of exception.
  27  *
  28  * Wrappers need to take care of valid exception sum and zeroing.
  29  * They also should align source or destination to 8 bytes.
  30  */
  31 
  32         .macro source
  33 10:
  34         _ASM_EXTABLE_UA(10b, .Lbad_source)
  35         .endm
  36 
  37         .macro dest
  38 20:
  39         _ASM_EXTABLE_UA(20b, .Lbad_dest)
  40         .endm
  41 
  42         /*
  43          * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
  44          * potentially unmapped kernel address.
  45          */
  46         .macro ignore L=.Lignore
  47 30:
  48         _ASM_EXTABLE(30b, \L)
  49         .endm
  50 
  51 
  52 ENTRY(csum_partial_copy_generic)
  53         cmpl    $3*64, %edx
  54         jle     .Lignore
  55 
  56 .Lignore:
  57         subq  $7*8, %rsp
  58         movq  %rbx, 2*8(%rsp)
  59         movq  %r12, 3*8(%rsp)
  60         movq  %r14, 4*8(%rsp)
  61         movq  %r13, 5*8(%rsp)
  62         movq  %r15, 6*8(%rsp)
  63 
  64         movq  %r8, (%rsp)
  65         movq  %r9, 1*8(%rsp)
  66 
  67         movl  %ecx, %eax
  68         movl  %edx, %ecx
  69 
  70         xorl  %r9d, %r9d
  71         movq  %rcx, %r12
  72 
  73         shrq  $6, %r12
  74         jz      .Lhandle_tail       /* < 64 */
  75 
  76         clc
  77 
  78         /* main loop. clear in 64 byte blocks */
  79         /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
  80         /* r11: temp3, rdx: temp4, r12 loopcnt */
  81         /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */
  82         .p2align 4
  83 .Lloop:
  84         source
  85         movq  (%rdi), %rbx
  86         source
  87         movq  8(%rdi), %r8
  88         source
  89         movq  16(%rdi), %r11
  90         source
  91         movq  24(%rdi), %rdx
  92 
  93         source
  94         movq  32(%rdi), %r10
  95         source
  96         movq  40(%rdi), %r15
  97         source
  98         movq  48(%rdi), %r14
  99         source
 100         movq  56(%rdi), %r13
 101 
 102         ignore 2f
 103         prefetcht0 5*64(%rdi)
 104 2:
 105         adcq  %rbx, %rax
 106         adcq  %r8, %rax
 107         adcq  %r11, %rax
 108         adcq  %rdx, %rax
 109         adcq  %r10, %rax
 110         adcq  %r15, %rax
 111         adcq  %r14, %rax
 112         adcq  %r13, %rax
 113 
 114         decl %r12d
 115 
 116         dest
 117         movq %rbx, (%rsi)
 118         dest
 119         movq %r8, 8(%rsi)
 120         dest
 121         movq %r11, 16(%rsi)
 122         dest
 123         movq %rdx, 24(%rsi)
 124 
 125         dest
 126         movq %r10, 32(%rsi)
 127         dest
 128         movq %r15, 40(%rsi)
 129         dest
 130         movq %r14, 48(%rsi)
 131         dest
 132         movq %r13, 56(%rsi)
 133 
 134 3:
 135 
 136         leaq 64(%rdi), %rdi
 137         leaq 64(%rsi), %rsi
 138 
 139         jnz     .Lloop
 140 
 141         adcq  %r9, %rax
 142 
 143         /* do last up to 56 bytes */
 144 .Lhandle_tail:
 145         /* ecx: count */
 146         movl %ecx, %r10d
 147         andl $63, %ecx
 148         shrl $3, %ecx
 149         jz      .Lfold
 150         clc
 151         .p2align 4
 152 .Lloop_8:
 153         source
 154         movq (%rdi), %rbx
 155         adcq %rbx, %rax
 156         decl %ecx
 157         dest
 158         movq %rbx, (%rsi)
 159         leaq 8(%rsi), %rsi /* preserve carry */
 160         leaq 8(%rdi), %rdi
 161         jnz     .Lloop_8
 162         adcq %r9, %rax  /* add in carry */
 163 
 164 .Lfold:
 165         /* reduce checksum to 32bits */
 166         movl %eax, %ebx
 167         shrq $32, %rax
 168         addl %ebx, %eax
 169         adcl %r9d, %eax
 170 
 171         /* do last up to 6 bytes */
 172 .Lhandle_7:
 173         movl %r10d, %ecx
 174         andl $7, %ecx
 175         shrl $1, %ecx
 176         jz   .Lhandle_1
 177         movl $2, %edx
 178         xorl %ebx, %ebx
 179         clc
 180         .p2align 4
 181 .Lloop_1:
 182         source
 183         movw (%rdi), %bx
 184         adcl %ebx, %eax
 185         decl %ecx
 186         dest
 187         movw %bx, (%rsi)
 188         leaq 2(%rdi), %rdi
 189         leaq 2(%rsi), %rsi
 190         jnz .Lloop_1
 191         adcl %r9d, %eax /* add in carry */
 192 
 193         /* handle last odd byte */
 194 .Lhandle_1:
 195         testb $1, %r10b
 196         jz    .Lende
 197         xorl  %ebx, %ebx
 198         source
 199         movb (%rdi), %bl
 200         dest
 201         movb %bl, (%rsi)
 202         addl %ebx, %eax
 203         adcl %r9d, %eax         /* carry */
 204 
 205 .Lende:
 206         movq 2*8(%rsp), %rbx
 207         movq 3*8(%rsp), %r12
 208         movq 4*8(%rsp), %r14
 209         movq 5*8(%rsp), %r13
 210         movq 6*8(%rsp), %r15
 211         addq $7*8, %rsp
 212         ret
 213 
 214         /* Exception handlers. Very simple, zeroing is done in the wrappers */
 215 .Lbad_source:
 216         movq (%rsp), %rax
 217         testq %rax, %rax
 218         jz   .Lende
 219         movl $-EFAULT, (%rax)
 220         jmp  .Lende
 221 
 222 .Lbad_dest:
 223         movq 8(%rsp), %rax
 224         testq %rax, %rax
 225         jz   .Lende
 226         movl $-EFAULT, (%rax)
 227         jmp .Lende
 228 ENDPROC(csum_partial_copy_generic)

/* [<][>][^][v][top][bottom][index][help] */