root/arch/x86/include/asm/xor_avx.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. xor_avx_2
  2. xor_avx_3
  3. xor_avx_4
  4. xor_avx_5

   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 #ifndef _ASM_X86_XOR_AVX_H
   3 #define _ASM_X86_XOR_AVX_H
   4 
   5 /*
   6  * Optimized RAID-5 checksumming functions for AVX
   7  *
   8  * Copyright (C) 2012 Intel Corporation
   9  * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
  10  *
  11  * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
  12  */
  13 
  14 #ifdef CONFIG_AS_AVX
  15 
  16 #include <linux/compiler.h>
  17 #include <asm/fpu/api.h>
  18 
  19 #define BLOCK4(i) \
  20                 BLOCK(32 * i, 0) \
  21                 BLOCK(32 * (i + 1), 1) \
  22                 BLOCK(32 * (i + 2), 2) \
  23                 BLOCK(32 * (i + 3), 3)
  24 
  25 #define BLOCK16() \
  26                 BLOCK4(0) \
  27                 BLOCK4(4) \
  28                 BLOCK4(8) \
  29                 BLOCK4(12)
  30 
  31 static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
  32 {
  33         unsigned long lines = bytes >> 9;
  34 
  35         kernel_fpu_begin();
  36 
  37         while (lines--) {
  38 #undef BLOCK
  39 #define BLOCK(i, reg) \
  40 do { \
  41         asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
  42         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm"  #reg : : \
  43                 "m" (p0[i / sizeof(*p0)])); \
  44         asm volatile("vmovdqa %%ymm" #reg ", %0" : \
  45                 "=m" (p0[i / sizeof(*p0)])); \
  46 } while (0);
  47 
  48                 BLOCK16()
  49 
  50                 p0 = (unsigned long *)((uintptr_t)p0 + 512);
  51                 p1 = (unsigned long *)((uintptr_t)p1 + 512);
  52         }
  53 
  54         kernel_fpu_end();
  55 }
  56 
  57 static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
  58         unsigned long *p2)
  59 {
  60         unsigned long lines = bytes >> 9;
  61 
  62         kernel_fpu_begin();
  63 
  64         while (lines--) {
  65 #undef BLOCK
  66 #define BLOCK(i, reg) \
  67 do { \
  68         asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
  69         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
  70                 "m" (p1[i / sizeof(*p1)])); \
  71         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
  72                 "m" (p0[i / sizeof(*p0)])); \
  73         asm volatile("vmovdqa %%ymm" #reg ", %0" : \
  74                 "=m" (p0[i / sizeof(*p0)])); \
  75 } while (0);
  76 
  77                 BLOCK16()
  78 
  79                 p0 = (unsigned long *)((uintptr_t)p0 + 512);
  80                 p1 = (unsigned long *)((uintptr_t)p1 + 512);
  81                 p2 = (unsigned long *)((uintptr_t)p2 + 512);
  82         }
  83 
  84         kernel_fpu_end();
  85 }
  86 
  87 static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
  88         unsigned long *p2, unsigned long *p3)
  89 {
  90         unsigned long lines = bytes >> 9;
  91 
  92         kernel_fpu_begin();
  93 
  94         while (lines--) {
  95 #undef BLOCK
  96 #define BLOCK(i, reg) \
  97 do { \
  98         asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
  99         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 100                 "m" (p2[i / sizeof(*p2)])); \
 101         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 102                 "m" (p1[i / sizeof(*p1)])); \
 103         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 104                 "m" (p0[i / sizeof(*p0)])); \
 105         asm volatile("vmovdqa %%ymm" #reg ", %0" : \
 106                 "=m" (p0[i / sizeof(*p0)])); \
 107 } while (0);
 108 
 109                 BLOCK16();
 110 
 111                 p0 = (unsigned long *)((uintptr_t)p0 + 512);
 112                 p1 = (unsigned long *)((uintptr_t)p1 + 512);
 113                 p2 = (unsigned long *)((uintptr_t)p2 + 512);
 114                 p3 = (unsigned long *)((uintptr_t)p3 + 512);
 115         }
 116 
 117         kernel_fpu_end();
 118 }
 119 
 120 static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
 121         unsigned long *p2, unsigned long *p3, unsigned long *p4)
 122 {
 123         unsigned long lines = bytes >> 9;
 124 
 125         kernel_fpu_begin();
 126 
 127         while (lines--) {
 128 #undef BLOCK
 129 #define BLOCK(i, reg) \
 130 do { \
 131         asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
 132         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 133                 "m" (p3[i / sizeof(*p3)])); \
 134         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 135                 "m" (p2[i / sizeof(*p2)])); \
 136         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 137                 "m" (p1[i / sizeof(*p1)])); \
 138         asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
 139                 "m" (p0[i / sizeof(*p0)])); \
 140         asm volatile("vmovdqa %%ymm" #reg ", %0" : \
 141                 "=m" (p0[i / sizeof(*p0)])); \
 142 } while (0);
 143 
 144                 BLOCK16()
 145 
 146                 p0 = (unsigned long *)((uintptr_t)p0 + 512);
 147                 p1 = (unsigned long *)((uintptr_t)p1 + 512);
 148                 p2 = (unsigned long *)((uintptr_t)p2 + 512);
 149                 p3 = (unsigned long *)((uintptr_t)p3 + 512);
 150                 p4 = (unsigned long *)((uintptr_t)p4 + 512);
 151         }
 152 
 153         kernel_fpu_end();
 154 }
 155 
 156 static struct xor_block_template xor_block_avx = {
 157         .name = "avx",
 158         .do_2 = xor_avx_2,
 159         .do_3 = xor_avx_3,
 160         .do_4 = xor_avx_4,
 161         .do_5 = xor_avx_5,
 162 };
 163 
 164 #define AVX_XOR_SPEED \
 165 do { \
 166         if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
 167                 xor_speed(&xor_block_avx); \
 168 } while (0)
 169 
 170 #define AVX_SELECT(FASTEST) \
 171         (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)
 172 
 173 #else
 174 
 175 #define AVX_XOR_SPEED {}
 176 
 177 #define AVX_SELECT(FASTEST) (FASTEST)
 178 
 179 #endif
 180 #endif

/* [<][>][^][v][top][bottom][index][help] */