1/* 2 * Copyright (C) 2012 Intel Corporation 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; version 2 7 * of the License. 8 */ 9 10#ifdef CONFIG_AS_SSSE3 11 12#include <linux/raid/pq.h> 13#include "x86.h" 14 15static int raid6_has_ssse3(void) 16{ 17 return boot_cpu_has(X86_FEATURE_XMM) && 18 boot_cpu_has(X86_FEATURE_XMM2) && 19 boot_cpu_has(X86_FEATURE_SSSE3); 20} 21 22static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, 23 int failb, void **ptrs) 24{ 25 u8 *p, *q, *dp, *dq; 26 const u8 *pbmul; /* P multiplier table for B data */ 27 const u8 *qmul; /* Q multiplier table (for both) */ 28 static const u8 __aligned(16) x0f[16] = { 29 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 30 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; 31 32 p = (u8 *)ptrs[disks-2]; 33 q = (u8 *)ptrs[disks-1]; 34 35 /* Compute syndrome with zero for the missing data pages 36 Use the dead data pages as temporary storage for 37 delta p and delta q */ 38 dp = (u8 *)ptrs[faila]; 39 ptrs[faila] = (void *)raid6_empty_zero_page; 40 ptrs[disks-2] = dp; 41 dq = (u8 *)ptrs[failb]; 42 ptrs[failb] = (void *)raid6_empty_zero_page; 43 ptrs[disks-1] = dq; 44 45 raid6_call.gen_syndrome(disks, bytes, ptrs); 46 47 /* Restore pointer table */ 48 ptrs[faila] = dp; 49 ptrs[failb] = dq; 50 ptrs[disks-2] = p; 51 ptrs[disks-1] = q; 52 53 /* Now, pick the proper data tables */ 54 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; 55 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ 56 raid6_gfexp[failb]]]; 57 58 kernel_fpu_begin(); 59 60 asm volatile("movdqa %0,%%xmm7" : : "m" (x0f[0])); 61 62#ifdef CONFIG_X86_64 63 asm volatile("movdqa %0,%%xmm6" : : "m" (qmul[0])); 64 asm volatile("movdqa %0,%%xmm14" : : "m" (pbmul[0])); 65 asm volatile("movdqa %0,%%xmm15" : : "m" (pbmul[16])); 66#endif 67 68 /* Now do it... */ 69 while (bytes) { 70#ifdef CONFIG_X86_64 71 /* xmm6, xmm14, xmm15 */ 72 73 asm volatile("movdqa %0,%%xmm1" : : "m" (q[0])); 74 asm volatile("movdqa %0,%%xmm9" : : "m" (q[16])); 75 asm volatile("movdqa %0,%%xmm0" : : "m" (p[0])); 76 asm volatile("movdqa %0,%%xmm8" : : "m" (p[16])); 77 asm volatile("pxor %0,%%xmm1" : : "m" (dq[0])); 78 asm volatile("pxor %0,%%xmm9" : : "m" (dq[16])); 79 asm volatile("pxor %0,%%xmm0" : : "m" (dp[0])); 80 asm volatile("pxor %0,%%xmm8" : : "m" (dp[16])); 81 82 /* xmm0/8 = px */ 83 84 asm volatile("movdqa %xmm6,%xmm4"); 85 asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); 86 asm volatile("movdqa %xmm6,%xmm12"); 87 asm volatile("movdqa %xmm5,%xmm13"); 88 asm volatile("movdqa %xmm1,%xmm3"); 89 asm volatile("movdqa %xmm9,%xmm11"); 90 asm volatile("movdqa %xmm0,%xmm2"); /* xmm2/10 = px */ 91 asm volatile("movdqa %xmm8,%xmm10"); 92 asm volatile("psraw $4,%xmm1"); 93 asm volatile("psraw $4,%xmm9"); 94 asm volatile("pand %xmm7,%xmm3"); 95 asm volatile("pand %xmm7,%xmm11"); 96 asm volatile("pand %xmm7,%xmm1"); 97 asm volatile("pand %xmm7,%xmm9"); 98 asm volatile("pshufb %xmm3,%xmm4"); 99 asm volatile("pshufb %xmm11,%xmm12"); 100 asm volatile("pshufb %xmm1,%xmm5"); 101 asm volatile("pshufb %xmm9,%xmm13"); 102 asm volatile("pxor %xmm4,%xmm5"); 103 asm volatile("pxor %xmm12,%xmm13"); 104 105 /* xmm5/13 = qx */ 106 107 asm volatile("movdqa %xmm14,%xmm4"); 108 asm volatile("movdqa %xmm15,%xmm1"); 109 asm volatile("movdqa %xmm14,%xmm12"); 110 asm volatile("movdqa %xmm15,%xmm9"); 111 asm volatile("movdqa %xmm2,%xmm3"); 112 asm volatile("movdqa %xmm10,%xmm11"); 113 asm volatile("psraw $4,%xmm2"); 114 asm volatile("psraw $4,%xmm10"); 115 asm volatile("pand %xmm7,%xmm3"); 116 asm volatile("pand %xmm7,%xmm11"); 117 asm volatile("pand %xmm7,%xmm2"); 118 asm volatile("pand %xmm7,%xmm10"); 119 asm volatile("pshufb %xmm3,%xmm4"); 120 asm volatile("pshufb %xmm11,%xmm12"); 121 asm volatile("pshufb %xmm2,%xmm1"); 122 asm volatile("pshufb %xmm10,%xmm9"); 123 asm volatile("pxor %xmm4,%xmm1"); 124 asm volatile("pxor %xmm12,%xmm9"); 125 126 /* xmm1/9 = pbmul[px] */ 127 asm volatile("pxor %xmm5,%xmm1"); 128 asm volatile("pxor %xmm13,%xmm9"); 129 /* xmm1/9 = db = DQ */ 130 asm volatile("movdqa %%xmm1,%0" : "=m" (dq[0])); 131 asm volatile("movdqa %%xmm9,%0" : "=m" (dq[16])); 132 133 asm volatile("pxor %xmm1,%xmm0"); 134 asm volatile("pxor %xmm9,%xmm8"); 135 asm volatile("movdqa %%xmm0,%0" : "=m" (dp[0])); 136 asm volatile("movdqa %%xmm8,%0" : "=m" (dp[16])); 137 138 bytes -= 32; 139 p += 32; 140 q += 32; 141 dp += 32; 142 dq += 32; 143#else 144 asm volatile("movdqa %0,%%xmm1" : : "m" (*q)); 145 asm volatile("movdqa %0,%%xmm0" : : "m" (*p)); 146 asm volatile("pxor %0,%%xmm1" : : "m" (*dq)); 147 asm volatile("pxor %0,%%xmm0" : : "m" (*dp)); 148 149 /* 1 = dq ^ q 150 * 0 = dp ^ p 151 */ 152 asm volatile("movdqa %0,%%xmm4" : : "m" (qmul[0])); 153 asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); 154 155 asm volatile("movdqa %xmm1,%xmm3"); 156 asm volatile("psraw $4,%xmm1"); 157 asm volatile("pand %xmm7,%xmm3"); 158 asm volatile("pand %xmm7,%xmm1"); 159 asm volatile("pshufb %xmm3,%xmm4"); 160 asm volatile("pshufb %xmm1,%xmm5"); 161 asm volatile("pxor %xmm4,%xmm5"); 162 163 asm volatile("movdqa %xmm0,%xmm2"); /* xmm2 = px */ 164 165 /* xmm5 = qx */ 166 167 asm volatile("movdqa %0,%%xmm4" : : "m" (pbmul[0])); 168 asm volatile("movdqa %0,%%xmm1" : : "m" (pbmul[16])); 169 asm volatile("movdqa %xmm2,%xmm3"); 170 asm volatile("psraw $4,%xmm2"); 171 asm volatile("pand %xmm7,%xmm3"); 172 asm volatile("pand %xmm7,%xmm2"); 173 asm volatile("pshufb %xmm3,%xmm4"); 174 asm volatile("pshufb %xmm2,%xmm1"); 175 asm volatile("pxor %xmm4,%xmm1"); 176 177 /* xmm1 = pbmul[px] */ 178 asm volatile("pxor %xmm5,%xmm1"); 179 /* xmm1 = db = DQ */ 180 asm volatile("movdqa %%xmm1,%0" : "=m" (*dq)); 181 182 asm volatile("pxor %xmm1,%xmm0"); 183 asm volatile("movdqa %%xmm0,%0" : "=m" (*dp)); 184 185 bytes -= 16; 186 p += 16; 187 q += 16; 188 dp += 16; 189 dq += 16; 190#endif 191 } 192 193 kernel_fpu_end(); 194} 195 196 197static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, 198 void **ptrs) 199{ 200 u8 *p, *q, *dq; 201 const u8 *qmul; /* Q multiplier table */ 202 static const u8 __aligned(16) x0f[16] = { 203 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 204 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; 205 206 p = (u8 *)ptrs[disks-2]; 207 q = (u8 *)ptrs[disks-1]; 208 209 /* Compute syndrome with zero for the missing data page 210 Use the dead data page as temporary storage for delta q */ 211 dq = (u8 *)ptrs[faila]; 212 ptrs[faila] = (void *)raid6_empty_zero_page; 213 ptrs[disks-1] = dq; 214 215 raid6_call.gen_syndrome(disks, bytes, ptrs); 216 217 /* Restore pointer table */ 218 ptrs[faila] = dq; 219 ptrs[disks-1] = q; 220 221 /* Now, pick the proper data tables */ 222 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 223 224 kernel_fpu_begin(); 225 226 asm volatile("movdqa %0, %%xmm7" : : "m" (x0f[0])); 227 228 while (bytes) { 229#ifdef CONFIG_X86_64 230 asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); 231 asm volatile("movdqa %0, %%xmm4" : : "m" (dq[16])); 232 asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); 233 asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); 234 235 /* xmm3 = q[0] ^ dq[0] */ 236 237 asm volatile("pxor %0, %%xmm4" : : "m" (q[16])); 238 asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); 239 240 /* xmm4 = q[16] ^ dq[16] */ 241 242 asm volatile("movdqa %xmm3, %xmm6"); 243 asm volatile("movdqa %xmm4, %xmm8"); 244 245 /* xmm4 = xmm8 = q[16] ^ dq[16] */ 246 247 asm volatile("psraw $4, %xmm3"); 248 asm volatile("pand %xmm7, %xmm6"); 249 asm volatile("pand %xmm7, %xmm3"); 250 asm volatile("pshufb %xmm6, %xmm0"); 251 asm volatile("pshufb %xmm3, %xmm1"); 252 asm volatile("movdqa %0, %%xmm10" : : "m" (qmul[0])); 253 asm volatile("pxor %xmm0, %xmm1"); 254 asm volatile("movdqa %0, %%xmm11" : : "m" (qmul[16])); 255 256 /* xmm1 = qmul[q[0] ^ dq[0]] */ 257 258 asm volatile("psraw $4, %xmm4"); 259 asm volatile("pand %xmm7, %xmm8"); 260 asm volatile("pand %xmm7, %xmm4"); 261 asm volatile("pshufb %xmm8, %xmm10"); 262 asm volatile("pshufb %xmm4, %xmm11"); 263 asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); 264 asm volatile("pxor %xmm10, %xmm11"); 265 asm volatile("movdqa %0, %%xmm12" : : "m" (p[16])); 266 267 /* xmm11 = qmul[q[16] ^ dq[16]] */ 268 269 asm volatile("pxor %xmm1, %xmm2"); 270 271 /* xmm2 = p[0] ^ qmul[q[0] ^ dq[0]] */ 272 273 asm volatile("pxor %xmm11, %xmm12"); 274 275 /* xmm12 = p[16] ^ qmul[q[16] ^ dq[16]] */ 276 277 asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); 278 asm volatile("movdqa %%xmm11, %0" : "=m" (dq[16])); 279 280 asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); 281 asm volatile("movdqa %%xmm12, %0" : "=m" (p[16])); 282 283 bytes -= 32; 284 p += 32; 285 q += 32; 286 dq += 32; 287 288#else 289 asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); 290 asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); 291 asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); 292 asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); 293 294 /* xmm3 = *q ^ *dq */ 295 296 asm volatile("movdqa %xmm3, %xmm6"); 297 asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); 298 asm volatile("psraw $4, %xmm3"); 299 asm volatile("pand %xmm7, %xmm6"); 300 asm volatile("pand %xmm7, %xmm3"); 301 asm volatile("pshufb %xmm6, %xmm0"); 302 asm volatile("pshufb %xmm3, %xmm1"); 303 asm volatile("pxor %xmm0, %xmm1"); 304 305 /* xmm1 = qmul[*q ^ *dq */ 306 307 asm volatile("pxor %xmm1, %xmm2"); 308 309 /* xmm2 = *p ^ qmul[*q ^ *dq] */ 310 311 asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); 312 asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); 313 314 bytes -= 16; 315 p += 16; 316 q += 16; 317 dq += 16; 318#endif 319 } 320 321 kernel_fpu_end(); 322} 323 324const struct raid6_recov_calls raid6_recov_ssse3 = { 325 .data2 = raid6_2data_recov_ssse3, 326 .datap = raid6_datap_recov_ssse3, 327 .valid = raid6_has_ssse3, 328#ifdef CONFIG_X86_64 329 .name = "ssse3x2", 330#else 331 .name = "ssse3x1", 332#endif 333 .priority = 1, 334}; 335 336#else 337#warning "your version of binutils lacks SSSE3 support" 338#endif 339