root/lib/raid6/avx2.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. raid6_have_avx2
  2. raid6_avx21_gen_syndrome
  3. raid6_avx21_xor_syndrome
  4. raid6_avx22_gen_syndrome
  5. raid6_avx22_xor_syndrome
  6. raid6_avx24_gen_syndrome
  7. raid6_avx24_xor_syndrome

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /* -*- linux-c -*- ------------------------------------------------------- *
   3  *
   4  *   Copyright (C) 2012 Intel Corporation
   5  *   Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
   6  *
   7  *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
   8  *
   9  * ----------------------------------------------------------------------- */
  10 
  11 /*
  12  * AVX2 implementation of RAID-6 syndrome functions
  13  *
  14  */
  15 
  16 #ifdef CONFIG_AS_AVX2
  17 
  18 #include <linux/raid/pq.h>
  19 #include "x86.h"
  20 
  21 static const struct raid6_avx2_constants {
  22         u64 x1d[4];
  23 } raid6_avx2_constants __aligned(32) = {
  24         { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
  25           0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
  26 };
  27 
  28 static int raid6_have_avx2(void)
  29 {
  30         return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
  31 }
  32 
  33 /*
  34  * Plain AVX2 implementation
  35  */
  36 static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
  37 {
  38         u8 **dptr = (u8 **)ptrs;
  39         u8 *p, *q;
  40         int d, z, z0;
  41 
  42         z0 = disks - 3;         /* Highest data disk */
  43         p = dptr[z0+1];         /* XOR parity */
  44         q = dptr[z0+2];         /* RS syndrome */
  45 
  46         kernel_fpu_begin();
  47 
  48         asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
  49         asm volatile("vpxor %ymm3,%ymm3,%ymm3");        /* Zero temp */
  50 
  51         for (d = 0; d < bytes; d += 32) {
  52                 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
  53                 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
  54                 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
  55                 asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
  56                 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
  57                 for (z = z0-2; z >= 0; z--) {
  58                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
  59                         asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
  60                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
  61                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
  62                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
  63                         asm volatile("vpxor %ymm6,%ymm2,%ymm2");
  64                         asm volatile("vpxor %ymm6,%ymm4,%ymm4");
  65                         asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
  66                 }
  67                 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
  68                 asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
  69                 asm volatile("vpand %ymm0,%ymm5,%ymm5");
  70                 asm volatile("vpxor %ymm5,%ymm4,%ymm4");
  71                 asm volatile("vpxor %ymm6,%ymm2,%ymm2");
  72                 asm volatile("vpxor %ymm6,%ymm4,%ymm4");
  73 
  74                 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
  75                 asm volatile("vpxor %ymm2,%ymm2,%ymm2");
  76                 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
  77                 asm volatile("vpxor %ymm4,%ymm4,%ymm4");
  78         }
  79 
  80         asm volatile("sfence" : : : "memory");
  81         kernel_fpu_end();
  82 }
  83 
  84 static void raid6_avx21_xor_syndrome(int disks, int start, int stop,
  85                                      size_t bytes, void **ptrs)
  86 {
  87         u8 **dptr = (u8 **)ptrs;
  88         u8 *p, *q;
  89         int d, z, z0;
  90 
  91         z0 = stop;              /* P/Q right side optimization */
  92         p = dptr[disks-2];      /* XOR parity */
  93         q = dptr[disks-1];      /* RS syndrome */
  94 
  95         kernel_fpu_begin();
  96 
  97         asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
  98 
  99         for (d = 0 ; d < bytes ; d += 32) {
 100                 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
 101                 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
 102                 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
 103                 /* P/Q data pages */
 104                 for (z = z0-1 ; z >= start ; z--) {
 105                         asm volatile("vpxor %ymm5,%ymm5,%ymm5");
 106                         asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
 107                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 108                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
 109                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 110                         asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
 111                         asm volatile("vpxor %ymm5,%ymm2,%ymm2");
 112                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 113                 }
 114                 /* P/Q left side optimization */
 115                 for (z = start-1 ; z >= 0 ; z--) {
 116                         asm volatile("vpxor %ymm5,%ymm5,%ymm5");
 117                         asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
 118                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 119                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
 120                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 121                 }
 122                 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
 123                 /* Don't use movntdq for r/w memory area < cache line */
 124                 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
 125                 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
 126         }
 127 
 128         asm volatile("sfence" : : : "memory");
 129         kernel_fpu_end();
 130 }
 131 
 132 const struct raid6_calls raid6_avx2x1 = {
 133         raid6_avx21_gen_syndrome,
 134         raid6_avx21_xor_syndrome,
 135         raid6_have_avx2,
 136         "avx2x1",
 137         1                       /* Has cache hints */
 138 };
 139 
 140 /*
 141  * Unrolled-by-2 AVX2 implementation
 142  */
 143 static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
 144 {
 145         u8 **dptr = (u8 **)ptrs;
 146         u8 *p, *q;
 147         int d, z, z0;
 148 
 149         z0 = disks - 3;         /* Highest data disk */
 150         p = dptr[z0+1];         /* XOR parity */
 151         q = dptr[z0+2];         /* RS syndrome */
 152 
 153         kernel_fpu_begin();
 154 
 155         asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
 156         asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
 157 
 158         /* We uniformly assume a single prefetch covers at least 32 bytes */
 159         for (d = 0; d < bytes; d += 64) {
 160                 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
 161                 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
 162                 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
 163                 asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */
 164                 asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
 165                 asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
 166                 for (z = z0-1; z >= 0; z--) {
 167                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
 168                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
 169                         asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
 170                         asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
 171                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 172                         asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 173                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
 174                         asm volatile("vpand %ymm0,%ymm7,%ymm7");
 175                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 176                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 177                         asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
 178                         asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
 179                         asm volatile("vpxor %ymm5,%ymm2,%ymm2");
 180                         asm volatile("vpxor %ymm7,%ymm3,%ymm3");
 181                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 182                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 183                 }
 184                 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
 185                 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
 186                 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
 187                 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
 188         }
 189 
 190         asm volatile("sfence" : : : "memory");
 191         kernel_fpu_end();
 192 }
 193 
 194 static void raid6_avx22_xor_syndrome(int disks, int start, int stop,
 195                                      size_t bytes, void **ptrs)
 196 {
 197         u8 **dptr = (u8 **)ptrs;
 198         u8 *p, *q;
 199         int d, z, z0;
 200 
 201         z0 = stop;              /* P/Q right side optimization */
 202         p = dptr[disks-2];      /* XOR parity */
 203         q = dptr[disks-1];      /* RS syndrome */
 204 
 205         kernel_fpu_begin();
 206 
 207         asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
 208 
 209         for (d = 0 ; d < bytes ; d += 64) {
 210                 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
 211                 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
 212                 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
 213                 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
 214                 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
 215                 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
 216                 /* P/Q data pages */
 217                 for (z = z0-1 ; z >= start ; z--) {
 218                         asm volatile("vpxor %ymm5,%ymm5,%ymm5");
 219                         asm volatile("vpxor %ymm7,%ymm7,%ymm7");
 220                         asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
 221                         asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
 222                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 223                         asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 224                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
 225                         asm volatile("vpand %ymm0,%ymm7,%ymm7");
 226                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 227                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 228                         asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
 229                         asm volatile("vmovdqa %0,%%ymm7"
 230                                      :: "m" (dptr[z][d+32]));
 231                         asm volatile("vpxor %ymm5,%ymm2,%ymm2");
 232                         asm volatile("vpxor %ymm7,%ymm3,%ymm3");
 233                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 234                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 235                 }
 236                 /* P/Q left side optimization */
 237                 for (z = start-1 ; z >= 0 ; z--) {
 238                         asm volatile("vpxor %ymm5,%ymm5,%ymm5");
 239                         asm volatile("vpxor %ymm7,%ymm7,%ymm7");
 240                         asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
 241                         asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
 242                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 243                         asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 244                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
 245                         asm volatile("vpand %ymm0,%ymm7,%ymm7");
 246                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 247                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 248                 }
 249                 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
 250                 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
 251                 /* Don't use movntdq for r/w memory area < cache line */
 252                 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
 253                 asm volatile("vmovdqa %%ymm6,%0" : "=m" (q[d+32]));
 254                 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
 255                 asm volatile("vmovdqa %%ymm3,%0" : "=m" (p[d+32]));
 256         }
 257 
 258         asm volatile("sfence" : : : "memory");
 259         kernel_fpu_end();
 260 }
 261 
 262 const struct raid6_calls raid6_avx2x2 = {
 263         raid6_avx22_gen_syndrome,
 264         raid6_avx22_xor_syndrome,
 265         raid6_have_avx2,
 266         "avx2x2",
 267         1                       /* Has cache hints */
 268 };
 269 
 270 #ifdef CONFIG_X86_64
 271 
 272 /*
 273  * Unrolled-by-4 AVX2 implementation
 274  */
 275 static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
 276 {
 277         u8 **dptr = (u8 **)ptrs;
 278         u8 *p, *q;
 279         int d, z, z0;
 280 
 281         z0 = disks - 3;         /* Highest data disk */
 282         p = dptr[z0+1];         /* XOR parity */
 283         q = dptr[z0+2];         /* RS syndrome */
 284 
 285         kernel_fpu_begin();
 286 
 287         asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
 288         asm volatile("vpxor %ymm1,%ymm1,%ymm1");        /* Zero temp */
 289         asm volatile("vpxor %ymm2,%ymm2,%ymm2");        /* P[0] */
 290         asm volatile("vpxor %ymm3,%ymm3,%ymm3");        /* P[1] */
 291         asm volatile("vpxor %ymm4,%ymm4,%ymm4");        /* Q[0] */
 292         asm volatile("vpxor %ymm6,%ymm6,%ymm6");        /* Q[1] */
 293         asm volatile("vpxor %ymm10,%ymm10,%ymm10");     /* P[2] */
 294         asm volatile("vpxor %ymm11,%ymm11,%ymm11");     /* P[3] */
 295         asm volatile("vpxor %ymm12,%ymm12,%ymm12");     /* Q[2] */
 296         asm volatile("vpxor %ymm14,%ymm14,%ymm14");     /* Q[3] */
 297 
 298         for (d = 0; d < bytes; d += 128) {
 299                 for (z = z0; z >= 0; z--) {
 300                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
 301                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
 302                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
 303                         asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
 304                         asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
 305                         asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
 306                         asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
 307                         asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
 308                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 309                         asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 310                         asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
 311                         asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
 312                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
 313                         asm volatile("vpand %ymm0,%ymm7,%ymm7");
 314                         asm volatile("vpand %ymm0,%ymm13,%ymm13");
 315                         asm volatile("vpand %ymm0,%ymm15,%ymm15");
 316                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 317                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 318                         asm volatile("vpxor %ymm13,%ymm12,%ymm12");
 319                         asm volatile("vpxor %ymm15,%ymm14,%ymm14");
 320                         asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
 321                         asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
 322                         asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
 323                         asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
 324                         asm volatile("vpxor %ymm5,%ymm2,%ymm2");
 325                         asm volatile("vpxor %ymm7,%ymm3,%ymm3");
 326                         asm volatile("vpxor %ymm13,%ymm10,%ymm10");
 327                         asm volatile("vpxor %ymm15,%ymm11,%ymm11");
 328                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 329                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 330                         asm volatile("vpxor %ymm13,%ymm12,%ymm12");
 331                         asm volatile("vpxor %ymm15,%ymm14,%ymm14");
 332                 }
 333                 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
 334                 asm volatile("vpxor %ymm2,%ymm2,%ymm2");
 335                 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
 336                 asm volatile("vpxor %ymm3,%ymm3,%ymm3");
 337                 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
 338                 asm volatile("vpxor %ymm10,%ymm10,%ymm10");
 339                 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
 340                 asm volatile("vpxor %ymm11,%ymm11,%ymm11");
 341                 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
 342                 asm volatile("vpxor %ymm4,%ymm4,%ymm4");
 343                 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
 344                 asm volatile("vpxor %ymm6,%ymm6,%ymm6");
 345                 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
 346                 asm volatile("vpxor %ymm12,%ymm12,%ymm12");
 347                 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
 348                 asm volatile("vpxor %ymm14,%ymm14,%ymm14");
 349         }
 350 
 351         asm volatile("sfence" : : : "memory");
 352         kernel_fpu_end();
 353 }
 354 
 355 static void raid6_avx24_xor_syndrome(int disks, int start, int stop,
 356                                      size_t bytes, void **ptrs)
 357 {
 358         u8 **dptr = (u8 **)ptrs;
 359         u8 *p, *q;
 360         int d, z, z0;
 361 
 362         z0 = stop;              /* P/Q right side optimization */
 363         p = dptr[disks-2];      /* XOR parity */
 364         q = dptr[disks-1];      /* RS syndrome */
 365 
 366         kernel_fpu_begin();
 367 
 368         asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants.x1d[0]));
 369 
 370         for (d = 0 ; d < bytes ; d += 128) {
 371                 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
 372                 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
 373                 asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr[z0][d+64]));
 374                 asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr[z0][d+96]));
 375                 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
 376                 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
 377                 asm volatile("vmovdqa %0,%%ymm10" : : "m" (p[d+64]));
 378                 asm volatile("vmovdqa %0,%%ymm11" : : "m" (p[d+96]));
 379                 asm volatile("vpxor %ymm4,%ymm2,%ymm2");
 380                 asm volatile("vpxor %ymm6,%ymm3,%ymm3");
 381                 asm volatile("vpxor %ymm12,%ymm10,%ymm10");
 382                 asm volatile("vpxor %ymm14,%ymm11,%ymm11");
 383                 /* P/Q data pages */
 384                 for (z = z0-1 ; z >= start ; z--) {
 385                         asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
 386                         asm volatile("prefetchnta %0" :: "m" (dptr[z][d+64]));
 387                         asm volatile("vpxor %ymm5,%ymm5,%ymm5");
 388                         asm volatile("vpxor %ymm7,%ymm7,%ymm7");
 389                         asm volatile("vpxor %ymm13,%ymm13,%ymm13");
 390                         asm volatile("vpxor %ymm15,%ymm15,%ymm15");
 391                         asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
 392                         asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
 393                         asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
 394                         asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
 395                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 396                         asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 397                         asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
 398                         asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
 399                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
 400                         asm volatile("vpand %ymm0,%ymm7,%ymm7");
 401                         asm volatile("vpand %ymm0,%ymm13,%ymm13");
 402                         asm volatile("vpand %ymm0,%ymm15,%ymm15");
 403                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 404                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 405                         asm volatile("vpxor %ymm13,%ymm12,%ymm12");
 406                         asm volatile("vpxor %ymm15,%ymm14,%ymm14");
 407                         asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
 408                         asm volatile("vmovdqa %0,%%ymm7"
 409                                      :: "m" (dptr[z][d+32]));
 410                         asm volatile("vmovdqa %0,%%ymm13"
 411                                      :: "m" (dptr[z][d+64]));
 412                         asm volatile("vmovdqa %0,%%ymm15"
 413                                      :: "m" (dptr[z][d+96]));
 414                         asm volatile("vpxor %ymm5,%ymm2,%ymm2");
 415                         asm volatile("vpxor %ymm7,%ymm3,%ymm3");
 416                         asm volatile("vpxor %ymm13,%ymm10,%ymm10");
 417                         asm volatile("vpxor %ymm15,%ymm11,%ymm11");
 418                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 419                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 420                         asm volatile("vpxor %ymm13,%ymm12,%ymm12");
 421                         asm volatile("vpxor %ymm15,%ymm14,%ymm14");
 422                 }
 423                 asm volatile("prefetchnta %0" :: "m" (q[d]));
 424                 asm volatile("prefetchnta %0" :: "m" (q[d+64]));
 425                 /* P/Q left side optimization */
 426                 for (z = start-1 ; z >= 0 ; z--) {
 427                         asm volatile("vpxor %ymm5,%ymm5,%ymm5");
 428                         asm volatile("vpxor %ymm7,%ymm7,%ymm7");
 429                         asm volatile("vpxor %ymm13,%ymm13,%ymm13");
 430                         asm volatile("vpxor %ymm15,%ymm15,%ymm15");
 431                         asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
 432                         asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
 433                         asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
 434                         asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
 435                         asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
 436                         asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
 437                         asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
 438                         asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
 439                         asm volatile("vpand %ymm0,%ymm5,%ymm5");
 440                         asm volatile("vpand %ymm0,%ymm7,%ymm7");
 441                         asm volatile("vpand %ymm0,%ymm13,%ymm13");
 442                         asm volatile("vpand %ymm0,%ymm15,%ymm15");
 443                         asm volatile("vpxor %ymm5,%ymm4,%ymm4");
 444                         asm volatile("vpxor %ymm7,%ymm6,%ymm6");
 445                         asm volatile("vpxor %ymm13,%ymm12,%ymm12");
 446                         asm volatile("vpxor %ymm15,%ymm14,%ymm14");
 447                 }
 448                 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
 449                 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
 450                 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
 451                 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
 452                 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
 453                 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
 454                 asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q[d+64]));
 455                 asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q[d+96]));
 456                 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
 457                 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
 458                 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
 459                 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
 460         }
 461         asm volatile("sfence" : : : "memory");
 462         kernel_fpu_end();
 463 }
 464 
 465 const struct raid6_calls raid6_avx2x4 = {
 466         raid6_avx24_gen_syndrome,
 467         raid6_avx24_xor_syndrome,
 468         raid6_have_avx2,
 469         "avx2x4",
 470         1                       /* Has cache hints */
 471 };
 472 #endif
 473 
 474 #endif /* CONFIG_AS_AVX2 */

/* [<][>][^][v][top][bottom][index][help] */