root/lib/raid6/recov_avx512.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. raid6_has_avx512
  2. raid6_2data_recov_avx512
  3. raid6_datap_recov_avx512

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) 2016 Intel Corporation
   4  *
   5  * Author: Gayatri Kammela <gayatri.kammela@intel.com>
   6  * Author: Megha Dey <megha.dey@linux.intel.com>
   7  */
   8 
   9 #ifdef CONFIG_AS_AVX512
  10 
  11 #include <linux/raid/pq.h>
  12 #include "x86.h"
  13 
  14 static int raid6_has_avx512(void)
  15 {
  16         return boot_cpu_has(X86_FEATURE_AVX2) &&
  17                 boot_cpu_has(X86_FEATURE_AVX) &&
  18                 boot_cpu_has(X86_FEATURE_AVX512F) &&
  19                 boot_cpu_has(X86_FEATURE_AVX512BW) &&
  20                 boot_cpu_has(X86_FEATURE_AVX512VL) &&
  21                 boot_cpu_has(X86_FEATURE_AVX512DQ);
  22 }
  23 
  24 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
  25                                      int failb, void **ptrs)
  26 {
  27         u8 *p, *q, *dp, *dq;
  28         const u8 *pbmul;        /* P multiplier table for B data */
  29         const u8 *qmul;         /* Q multiplier table (for both) */
  30         const u8 x0f = 0x0f;
  31 
  32         p = (u8 *)ptrs[disks-2];
  33         q = (u8 *)ptrs[disks-1];
  34 
  35         /*
  36          * Compute syndrome with zero for the missing data pages
  37          * Use the dead data pages as temporary storage for
  38          * delta p and delta q
  39          */
  40 
  41         dp = (u8 *)ptrs[faila];
  42         ptrs[faila] = (void *)raid6_empty_zero_page;
  43         ptrs[disks-2] = dp;
  44         dq = (u8 *)ptrs[failb];
  45         ptrs[failb] = (void *)raid6_empty_zero_page;
  46         ptrs[disks-1] = dq;
  47 
  48         raid6_call.gen_syndrome(disks, bytes, ptrs);
  49 
  50         /* Restore pointer table */
  51         ptrs[faila]   = dp;
  52         ptrs[failb]   = dq;
  53         ptrs[disks-2] = p;
  54         ptrs[disks-1] = q;
  55 
  56         /* Now, pick the proper data tables */
  57         pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
  58         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
  59                 raid6_gfexp[failb]]];
  60 
  61         kernel_fpu_begin();
  62 
  63         /* zmm0 = x0f[16] */
  64         asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
  65 
  66         while (bytes) {
  67 #ifdef CONFIG_X86_64
  68                 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
  69                              "vmovdqa64 %1, %%zmm9\n\t"
  70                              "vmovdqa64 %2, %%zmm0\n\t"
  71                              "vmovdqa64 %3, %%zmm8\n\t"
  72                              "vpxorq %4, %%zmm1, %%zmm1\n\t"
  73                              "vpxorq %5, %%zmm9, %%zmm9\n\t"
  74                              "vpxorq %6, %%zmm0, %%zmm0\n\t"
  75                              "vpxorq %7, %%zmm8, %%zmm8"
  76                              :
  77                              : "m" (q[0]), "m" (q[64]), "m" (p[0]),
  78                                "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
  79                                "m" (dp[0]), "m" (dp[64]));
  80 
  81                 /*
  82                  * 1 = dq[0]  ^ q[0]
  83                  * 9 = dq[64] ^ q[64]
  84                  * 0 = dp[0]  ^ p[0]
  85                  * 8 = dp[64] ^ p[64]
  86                  */
  87 
  88                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
  89                              "vbroadcasti64x2 %1, %%zmm5"
  90                              :
  91                              : "m" (qmul[0]), "m" (qmul[16]));
  92 
  93                 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
  94                              "vpsraw $4, %%zmm9, %%zmm12\n\t"
  95                              "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
  96                              "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
  97                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
  98                              "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
  99                              "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
 100                              "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
 101                              "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
 102                              "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
 103                              "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
 104                              "vpxorq %%zmm4, %%zmm5, %%zmm5"
 105                              :
 106                              : );
 107 
 108                 /*
 109                  * 5 = qx[0]
 110                  * 15 = qx[64]
 111                  */
 112 
 113                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
 114                              "vbroadcasti64x2 %1, %%zmm1\n\t"
 115                              "vpsraw $4, %%zmm0, %%zmm2\n\t"
 116                              "vpsraw $4, %%zmm8, %%zmm6\n\t"
 117                              "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
 118                              "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
 119                              "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
 120                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
 121                              "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
 122                              "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
 123                              "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
 124                              "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
 125                              "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
 126                              "vpxorq %%zmm12, %%zmm13, %%zmm13"
 127                              :
 128                              : "m" (pbmul[0]), "m" (pbmul[16]));
 129 
 130                 /*
 131                  * 1  = pbmul[px[0]]
 132                  * 13 = pbmul[px[64]]
 133                  */
 134                 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
 135                              "vpxorq %%zmm15, %%zmm13, %%zmm13"
 136                              :
 137                              : );
 138 
 139                 /*
 140                  * 1 = db = DQ
 141                  * 13 = db[64] = DQ[64]
 142                  */
 143                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
 144                              "vmovdqa64 %%zmm13,%1\n\t"
 145                              "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
 146                              "vpxorq %%zmm13, %%zmm8, %%zmm8"
 147                              :
 148                              : "m" (dq[0]), "m" (dq[64]));
 149 
 150                 asm volatile("vmovdqa64 %%zmm0, %0\n\t"
 151                              "vmovdqa64 %%zmm8, %1"
 152                              :
 153                              : "m" (dp[0]), "m" (dp[64]));
 154 
 155                 bytes -= 128;
 156                 p += 128;
 157                 q += 128;
 158                 dp += 128;
 159                 dq += 128;
 160 #else
 161                 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
 162                              "vmovdqa64 %1, %%zmm0\n\t"
 163                              "vpxorq %2, %%zmm1, %%zmm1\n\t"
 164                              "vpxorq %3, %%zmm0, %%zmm0"
 165                              :
 166                              : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
 167 
 168                 /* 1 = dq ^ q;  0 = dp ^ p */
 169 
 170                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
 171                              "vbroadcasti64x2 %1, %%zmm5"
 172                              :
 173                              : "m" (qmul[0]), "m" (qmul[16]));
 174 
 175                 /*
 176                  * 1 = dq ^ q
 177                  * 3 = dq ^ p >> 4
 178                  */
 179                 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
 180                              "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
 181                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
 182                              "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
 183                              "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
 184                              "vpxorq %%zmm4, %%zmm5, %%zmm5"
 185                              :
 186                              : );
 187 
 188                 /* 5 = qx */
 189 
 190                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
 191                              "vbroadcasti64x2 %1, %%zmm1"
 192                              :
 193                              : "m" (pbmul[0]), "m" (pbmul[16]));
 194 
 195                 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
 196                              "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
 197                              "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
 198                              "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
 199                              "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
 200                              "vpxorq %%zmm4, %%zmm1, %%zmm1"
 201                              :
 202                              : );
 203 
 204                 /* 1 = pbmul[px] */
 205                 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
 206                              /* 1 = db = DQ */
 207                              "vmovdqa64 %%zmm1, %0\n\t"
 208                              :
 209                              : "m" (dq[0]));
 210 
 211                 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
 212                              "vmovdqa64 %%zmm0, %0"
 213                              :
 214                              : "m" (dp[0]));
 215 
 216                 bytes -= 64;
 217                 p += 64;
 218                 q += 64;
 219                 dp += 64;
 220                 dq += 64;
 221 #endif
 222         }
 223 
 224         kernel_fpu_end();
 225 }
 226 
 227 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
 228                                      void **ptrs)
 229 {
 230         u8 *p, *q, *dq;
 231         const u8 *qmul;         /* Q multiplier table */
 232         const u8 x0f = 0x0f;
 233 
 234         p = (u8 *)ptrs[disks-2];
 235         q = (u8 *)ptrs[disks-1];
 236 
 237         /*
 238          * Compute syndrome with zero for the missing data page
 239          * Use the dead data page as temporary storage for delta q
 240          */
 241 
 242         dq = (u8 *)ptrs[faila];
 243         ptrs[faila] = (void *)raid6_empty_zero_page;
 244         ptrs[disks-1] = dq;
 245 
 246         raid6_call.gen_syndrome(disks, bytes, ptrs);
 247 
 248         /* Restore pointer table */
 249         ptrs[faila]   = dq;
 250         ptrs[disks-1] = q;
 251 
 252         /* Now, pick the proper data tables */
 253         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
 254 
 255         kernel_fpu_begin();
 256 
 257         asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
 258 
 259         while (bytes) {
 260 #ifdef CONFIG_X86_64
 261                 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
 262                              "vmovdqa64 %1, %%zmm8\n\t"
 263                              "vpxorq %2, %%zmm3, %%zmm3\n\t"
 264                              "vpxorq %3, %%zmm8, %%zmm8"
 265                              :
 266                              : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
 267                                "m" (q[64]));
 268 
 269                 /*
 270                  * 3 = q[0] ^ dq[0]
 271                  * 8 = q[64] ^ dq[64]
 272                  */
 273                 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
 274                              "vmovapd %%zmm0, %%zmm13\n\t"
 275                              "vbroadcasti64x2 %1, %%zmm1\n\t"
 276                              "vmovapd %%zmm1, %%zmm14"
 277                              :
 278                              : "m" (qmul[0]), "m" (qmul[16]));
 279 
 280                 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
 281                              "vpsraw $4, %%zmm8, %%zmm12\n\t"
 282                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
 283                              "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
 284                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
 285                              "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
 286                              "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
 287                              "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
 288                              "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
 289                              "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
 290                              "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
 291                              "vpxorq %%zmm13, %%zmm14, %%zmm14"
 292                              :
 293                              : );
 294 
 295                 /*
 296                  * 1  = qmul[q[0]  ^ dq[0]]
 297                  * 14 = qmul[q[64] ^ dq[64]]
 298                  */
 299                 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
 300                              "vmovdqa64 %1, %%zmm12\n\t"
 301                              "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
 302                              "vpxorq %%zmm14, %%zmm12, %%zmm12"
 303                              :
 304                              : "m" (p[0]), "m" (p[64]));
 305 
 306                 /*
 307                  * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
 308                  * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
 309                  */
 310 
 311                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
 312                              "vmovdqa64 %%zmm14, %1\n\t"
 313                              "vmovdqa64 %%zmm2, %2\n\t"
 314                              "vmovdqa64 %%zmm12,%3"
 315                              :
 316                              : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
 317                                "m" (p[64]));
 318 
 319                 bytes -= 128;
 320                 p += 128;
 321                 q += 128;
 322                 dq += 128;
 323 #else
 324                 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
 325                              "vpxorq %1, %%zmm3, %%zmm3"
 326                              :
 327                              : "m" (dq[0]), "m" (q[0]));
 328 
 329                 /* 3 = q ^ dq */
 330 
 331                 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
 332                              "vbroadcasti64x2 %1, %%zmm1"
 333                              :
 334                              : "m" (qmul[0]), "m" (qmul[16]));
 335 
 336                 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
 337                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
 338                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
 339                              "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
 340                              "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
 341                              "vpxorq %%zmm0, %%zmm1, %%zmm1"
 342                              :
 343                              : );
 344 
 345                 /* 1 = qmul[q ^ dq] */
 346 
 347                 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
 348                              "vpxorq %%zmm1, %%zmm2, %%zmm2"
 349                              :
 350                              : "m" (p[0]));
 351 
 352                 /* 2 = p ^ qmul[q ^ dq] */
 353 
 354                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
 355                              "vmovdqa64 %%zmm2, %1"
 356                              :
 357                              : "m" (dq[0]), "m" (p[0]));
 358 
 359                 bytes -= 64;
 360                 p += 64;
 361                 q += 64;
 362                 dq += 64;
 363 #endif
 364         }
 365 
 366         kernel_fpu_end();
 367 }
 368 
 369 const struct raid6_recov_calls raid6_recov_avx512 = {
 370         .data2 = raid6_2data_recov_avx512,
 371         .datap = raid6_datap_recov_avx512,
 372         .valid = raid6_has_avx512,
 373 #ifdef CONFIG_X86_64
 374         .name = "avx512x2",
 375 #else
 376         .name = "avx512x1",
 377 #endif
 378         .priority = 3,
 379 };
 380 
 381 #else
 382 #warning "your version of binutils lacks AVX512 support"
 383 #endif

/* [<][>][^][v][top][bottom][index][help] */