1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 *   Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 *   This program is free software; you can redistribute it and/or modify
6 *   it under the terms of the GNU General Public License as published by
7 *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 *   Boston MA 02111-1307, USA; either version 2 of the License, or
9 *   (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
12
13/*
14 * raid6/sse2.c
15 *
16 * SSE-2 implementation of RAID-6 syndrome functions
17 *
18 */
19
20#include <linux/raid/pq.h>
21#include "x86.h"
22
23static const struct raid6_sse_constants {
24	u64 x1d[2];
25} raid6_sse_constants  __attribute__((aligned(16))) = {
26	{ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
27};
28
29static int raid6_have_sse2(void)
30{
31	/* Not really boot_cpu but "all_cpus" */
32	return boot_cpu_has(X86_FEATURE_MMX) &&
33		boot_cpu_has(X86_FEATURE_FXSR) &&
34		boot_cpu_has(X86_FEATURE_XMM) &&
35		boot_cpu_has(X86_FEATURE_XMM2);
36}
37
38/*
39 * Plain SSE2 implementation
40 */
41static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
42{
43	u8 **dptr = (u8 **)ptrs;
44	u8 *p, *q;
45	int d, z, z0;
46
47	z0 = disks - 3;		/* Highest data disk */
48	p = dptr[z0+1];		/* XOR parity */
49	q = dptr[z0+2];		/* RS syndrome */
50
51	kernel_fpu_begin();
52
53	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
54	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */
55
56	for ( d = 0 ; d < bytes ; d += 16 ) {
57		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
58		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
59		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
60		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
61		asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
62		for ( z = z0-2 ; z >= 0 ; z-- ) {
63			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
64			asm volatile("pcmpgtb %xmm4,%xmm5");
65			asm volatile("paddb %xmm4,%xmm4");
66			asm volatile("pand %xmm0,%xmm5");
67			asm volatile("pxor %xmm5,%xmm4");
68			asm volatile("pxor %xmm5,%xmm5");
69			asm volatile("pxor %xmm6,%xmm2");
70			asm volatile("pxor %xmm6,%xmm4");
71			asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
72		}
73		asm volatile("pcmpgtb %xmm4,%xmm5");
74		asm volatile("paddb %xmm4,%xmm4");
75		asm volatile("pand %xmm0,%xmm5");
76		asm volatile("pxor %xmm5,%xmm4");
77		asm volatile("pxor %xmm5,%xmm5");
78		asm volatile("pxor %xmm6,%xmm2");
79		asm volatile("pxor %xmm6,%xmm4");
80
81		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
82		asm volatile("pxor %xmm2,%xmm2");
83		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
84		asm volatile("pxor %xmm4,%xmm4");
85	}
86
87	asm volatile("sfence" : : : "memory");
88	kernel_fpu_end();
89}
90
91
92static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
93				     size_t bytes, void **ptrs)
94 {
95	u8 **dptr = (u8 **)ptrs;
96	u8 *p, *q;
97	int d, z, z0;
98
99	z0 = stop;		/* P/Q right side optimization */
100	p = dptr[disks-2];	/* XOR parity */
101	q = dptr[disks-1];	/* RS syndrome */
102
103	kernel_fpu_begin();
104
105	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
106
107	for ( d = 0 ; d < bytes ; d += 16 ) {
108		asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
109		asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
110		asm volatile("pxor %xmm4,%xmm2");
111		/* P/Q data pages */
112		for ( z = z0-1 ; z >= start ; z-- ) {
113			asm volatile("pxor %xmm5,%xmm5");
114			asm volatile("pcmpgtb %xmm4,%xmm5");
115			asm volatile("paddb %xmm4,%xmm4");
116			asm volatile("pand %xmm0,%xmm5");
117			asm volatile("pxor %xmm5,%xmm4");
118			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
119			asm volatile("pxor %xmm5,%xmm2");
120			asm volatile("pxor %xmm5,%xmm4");
121		}
122		/* P/Q left side optimization */
123		for ( z = start-1 ; z >= 0 ; z-- ) {
124			asm volatile("pxor %xmm5,%xmm5");
125			asm volatile("pcmpgtb %xmm4,%xmm5");
126			asm volatile("paddb %xmm4,%xmm4");
127			asm volatile("pand %xmm0,%xmm5");
128			asm volatile("pxor %xmm5,%xmm4");
129		}
130		asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
131		/* Don't use movntdq for r/w memory area < cache line */
132		asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
133		asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
134	}
135
136	asm volatile("sfence" : : : "memory");
137	kernel_fpu_end();
138}
139
140const struct raid6_calls raid6_sse2x1 = {
141	raid6_sse21_gen_syndrome,
142	raid6_sse21_xor_syndrome,
143	raid6_have_sse2,
144	"sse2x1",
145	1			/* Has cache hints */
146};
147
148/*
149 * Unrolled-by-2 SSE2 implementation
150 */
151static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
152{
153	u8 **dptr = (u8 **)ptrs;
154	u8 *p, *q;
155	int d, z, z0;
156
157	z0 = disks - 3;		/* Highest data disk */
158	p = dptr[z0+1];		/* XOR parity */
159	q = dptr[z0+2];		/* RS syndrome */
160
161	kernel_fpu_begin();
162
163	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
164	asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
165	asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
166
167	/* We uniformly assume a single prefetch covers at least 32 bytes */
168	for ( d = 0 ; d < bytes ; d += 32 ) {
169		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
170		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d]));    /* P[0] */
171		asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
172		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
173		asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
174		for ( z = z0-1 ; z >= 0 ; z-- ) {
175			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
176			asm volatile("pcmpgtb %xmm4,%xmm5");
177			asm volatile("pcmpgtb %xmm6,%xmm7");
178			asm volatile("paddb %xmm4,%xmm4");
179			asm volatile("paddb %xmm6,%xmm6");
180			asm volatile("pand %xmm0,%xmm5");
181			asm volatile("pand %xmm0,%xmm7");
182			asm volatile("pxor %xmm5,%xmm4");
183			asm volatile("pxor %xmm7,%xmm6");
184			asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
185			asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
186			asm volatile("pxor %xmm5,%xmm2");
187			asm volatile("pxor %xmm7,%xmm3");
188			asm volatile("pxor %xmm5,%xmm4");
189			asm volatile("pxor %xmm7,%xmm6");
190			asm volatile("pxor %xmm5,%xmm5");
191			asm volatile("pxor %xmm7,%xmm7");
192		}
193		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
194		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
195		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
196		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
197	}
198
199	asm volatile("sfence" : : : "memory");
200	kernel_fpu_end();
201}
202
203 static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
204				     size_t bytes, void **ptrs)
205 {
206	u8 **dptr = (u8 **)ptrs;
207	u8 *p, *q;
208	int d, z, z0;
209
210	z0 = stop;		/* P/Q right side optimization */
211	p = dptr[disks-2];	/* XOR parity */
212	q = dptr[disks-1];	/* RS syndrome */
213
214	kernel_fpu_begin();
215
216	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
217
218	for ( d = 0 ; d < bytes ; d += 32 ) {
219		asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
220		asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
221		asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
222		asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
223		asm volatile("pxor %xmm4,%xmm2");
224		asm volatile("pxor %xmm6,%xmm3");
225		/* P/Q data pages */
226		for ( z = z0-1 ; z >= start ; z-- ) {
227			asm volatile("pxor %xmm5,%xmm5");
228			asm volatile("pxor %xmm7,%xmm7");
229			asm volatile("pcmpgtb %xmm4,%xmm5");
230			asm volatile("pcmpgtb %xmm6,%xmm7");
231			asm volatile("paddb %xmm4,%xmm4");
232			asm volatile("paddb %xmm6,%xmm6");
233			asm volatile("pand %xmm0,%xmm5");
234			asm volatile("pand %xmm0,%xmm7");
235			asm volatile("pxor %xmm5,%xmm4");
236			asm volatile("pxor %xmm7,%xmm6");
237			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
238			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
239			asm volatile("pxor %xmm5,%xmm2");
240			asm volatile("pxor %xmm7,%xmm3");
241			asm volatile("pxor %xmm5,%xmm4");
242			asm volatile("pxor %xmm7,%xmm6");
243		}
244		/* P/Q left side optimization */
245		for ( z = start-1 ; z >= 0 ; z-- ) {
246			asm volatile("pxor %xmm5,%xmm5");
247			asm volatile("pxor %xmm7,%xmm7");
248			asm volatile("pcmpgtb %xmm4,%xmm5");
249			asm volatile("pcmpgtb %xmm6,%xmm7");
250			asm volatile("paddb %xmm4,%xmm4");
251			asm volatile("paddb %xmm6,%xmm6");
252			asm volatile("pand %xmm0,%xmm5");
253			asm volatile("pand %xmm0,%xmm7");
254			asm volatile("pxor %xmm5,%xmm4");
255			asm volatile("pxor %xmm7,%xmm6");
256		}
257		asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
258		asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
259		/* Don't use movntdq for r/w memory area < cache line */
260		asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
261		asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
262		asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
263		asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
264	}
265
266	asm volatile("sfence" : : : "memory");
267	kernel_fpu_end();
268 }
269
270const struct raid6_calls raid6_sse2x2 = {
271	raid6_sse22_gen_syndrome,
272	raid6_sse22_xor_syndrome,
273	raid6_have_sse2,
274	"sse2x2",
275	1			/* Has cache hints */
276};
277
278#ifdef CONFIG_X86_64
279
280/*
281 * Unrolled-by-4 SSE2 implementation
282 */
283static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
284{
285	u8 **dptr = (u8 **)ptrs;
286	u8 *p, *q;
287	int d, z, z0;
288
289	z0 = disks - 3;		/* Highest data disk */
290	p = dptr[z0+1];		/* XOR parity */
291	q = dptr[z0+2];		/* RS syndrome */
292
293	kernel_fpu_begin();
294
295	asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
296	asm volatile("pxor %xmm2,%xmm2");	/* P[0] */
297	asm volatile("pxor %xmm3,%xmm3");	/* P[1] */
298	asm volatile("pxor %xmm4,%xmm4"); 	/* Q[0] */
299	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */
300	asm volatile("pxor %xmm6,%xmm6"); 	/* Q[1] */
301	asm volatile("pxor %xmm7,%xmm7"); 	/* Zero temp */
302	asm volatile("pxor %xmm10,%xmm10");	/* P[2] */
303	asm volatile("pxor %xmm11,%xmm11");	/* P[3] */
304	asm volatile("pxor %xmm12,%xmm12"); 	/* Q[2] */
305	asm volatile("pxor %xmm13,%xmm13");	/* Zero temp */
306	asm volatile("pxor %xmm14,%xmm14"); 	/* Q[3] */
307	asm volatile("pxor %xmm15,%xmm15"); 	/* Zero temp */
308
309	for ( d = 0 ; d < bytes ; d += 64 ) {
310		for ( z = z0 ; z >= 0 ; z-- ) {
311			/* The second prefetch seems to improve performance... */
312			asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
313			asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
314			asm volatile("pcmpgtb %xmm4,%xmm5");
315			asm volatile("pcmpgtb %xmm6,%xmm7");
316			asm volatile("pcmpgtb %xmm12,%xmm13");
317			asm volatile("pcmpgtb %xmm14,%xmm15");
318			asm volatile("paddb %xmm4,%xmm4");
319			asm volatile("paddb %xmm6,%xmm6");
320			asm volatile("paddb %xmm12,%xmm12");
321			asm volatile("paddb %xmm14,%xmm14");
322			asm volatile("pand %xmm0,%xmm5");
323			asm volatile("pand %xmm0,%xmm7");
324			asm volatile("pand %xmm0,%xmm13");
325			asm volatile("pand %xmm0,%xmm15");
326			asm volatile("pxor %xmm5,%xmm4");
327			asm volatile("pxor %xmm7,%xmm6");
328			asm volatile("pxor %xmm13,%xmm12");
329			asm volatile("pxor %xmm15,%xmm14");
330			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
331			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
332			asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
333			asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
334			asm volatile("pxor %xmm5,%xmm2");
335			asm volatile("pxor %xmm7,%xmm3");
336			asm volatile("pxor %xmm13,%xmm10");
337			asm volatile("pxor %xmm15,%xmm11");
338			asm volatile("pxor %xmm5,%xmm4");
339			asm volatile("pxor %xmm7,%xmm6");
340			asm volatile("pxor %xmm13,%xmm12");
341			asm volatile("pxor %xmm15,%xmm14");
342			asm volatile("pxor %xmm5,%xmm5");
343			asm volatile("pxor %xmm7,%xmm7");
344			asm volatile("pxor %xmm13,%xmm13");
345			asm volatile("pxor %xmm15,%xmm15");
346		}
347		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
348		asm volatile("pxor %xmm2,%xmm2");
349		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
350		asm volatile("pxor %xmm3,%xmm3");
351		asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
352		asm volatile("pxor %xmm10,%xmm10");
353		asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
354		asm volatile("pxor %xmm11,%xmm11");
355		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
356		asm volatile("pxor %xmm4,%xmm4");
357		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
358		asm volatile("pxor %xmm6,%xmm6");
359		asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
360		asm volatile("pxor %xmm12,%xmm12");
361		asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
362		asm volatile("pxor %xmm14,%xmm14");
363	}
364
365	asm volatile("sfence" : : : "memory");
366	kernel_fpu_end();
367}
368
369 static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
370				     size_t bytes, void **ptrs)
371 {
372	u8 **dptr = (u8 **)ptrs;
373	u8 *p, *q;
374	int d, z, z0;
375
376	z0 = stop;		/* P/Q right side optimization */
377	p = dptr[disks-2];	/* XOR parity */
378	q = dptr[disks-1];	/* RS syndrome */
379
380	kernel_fpu_begin();
381
382	asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
383
384	for ( d = 0 ; d < bytes ; d += 64 ) {
385		asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
386		asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
387		asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
388		asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
389		asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
390		asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
391		asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
392		asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
393		asm volatile("pxor %xmm4,%xmm2");
394		asm volatile("pxor %xmm6,%xmm3");
395		asm volatile("pxor %xmm12,%xmm10");
396		asm volatile("pxor %xmm14,%xmm11");
397		/* P/Q data pages */
398		for ( z = z0-1 ; z >= start ; z-- ) {
399			asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
400			asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
401			asm volatile("pxor %xmm5,%xmm5");
402			asm volatile("pxor %xmm7,%xmm7");
403			asm volatile("pxor %xmm13,%xmm13");
404			asm volatile("pxor %xmm15,%xmm15");
405			asm volatile("pcmpgtb %xmm4,%xmm5");
406			asm volatile("pcmpgtb %xmm6,%xmm7");
407			asm volatile("pcmpgtb %xmm12,%xmm13");
408			asm volatile("pcmpgtb %xmm14,%xmm15");
409			asm volatile("paddb %xmm4,%xmm4");
410			asm volatile("paddb %xmm6,%xmm6");
411			asm volatile("paddb %xmm12,%xmm12");
412			asm volatile("paddb %xmm14,%xmm14");
413			asm volatile("pand %xmm0,%xmm5");
414			asm volatile("pand %xmm0,%xmm7");
415			asm volatile("pand %xmm0,%xmm13");
416			asm volatile("pand %xmm0,%xmm15");
417			asm volatile("pxor %xmm5,%xmm4");
418			asm volatile("pxor %xmm7,%xmm6");
419			asm volatile("pxor %xmm13,%xmm12");
420			asm volatile("pxor %xmm15,%xmm14");
421			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
422			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
423			asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
424			asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
425			asm volatile("pxor %xmm5,%xmm2");
426			asm volatile("pxor %xmm7,%xmm3");
427			asm volatile("pxor %xmm13,%xmm10");
428			asm volatile("pxor %xmm15,%xmm11");
429			asm volatile("pxor %xmm5,%xmm4");
430			asm volatile("pxor %xmm7,%xmm6");
431			asm volatile("pxor %xmm13,%xmm12");
432			asm volatile("pxor %xmm15,%xmm14");
433		}
434		asm volatile("prefetchnta %0" :: "m" (q[d]));
435		asm volatile("prefetchnta %0" :: "m" (q[d+32]));
436		/* P/Q left side optimization */
437		for ( z = start-1 ; z >= 0 ; z-- ) {
438			asm volatile("pxor %xmm5,%xmm5");
439			asm volatile("pxor %xmm7,%xmm7");
440			asm volatile("pxor %xmm13,%xmm13");
441			asm volatile("pxor %xmm15,%xmm15");
442			asm volatile("pcmpgtb %xmm4,%xmm5");
443			asm volatile("pcmpgtb %xmm6,%xmm7");
444			asm volatile("pcmpgtb %xmm12,%xmm13");
445			asm volatile("pcmpgtb %xmm14,%xmm15");
446			asm volatile("paddb %xmm4,%xmm4");
447			asm volatile("paddb %xmm6,%xmm6");
448			asm volatile("paddb %xmm12,%xmm12");
449			asm volatile("paddb %xmm14,%xmm14");
450			asm volatile("pand %xmm0,%xmm5");
451			asm volatile("pand %xmm0,%xmm7");
452			asm volatile("pand %xmm0,%xmm13");
453			asm volatile("pand %xmm0,%xmm15");
454			asm volatile("pxor %xmm5,%xmm4");
455			asm volatile("pxor %xmm7,%xmm6");
456			asm volatile("pxor %xmm13,%xmm12");
457			asm volatile("pxor %xmm15,%xmm14");
458		}
459		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
460		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
461		asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
462		asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
463		asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
464		asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
465		asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
466		asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
467		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
468		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
469		asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
470		asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
471	}
472	asm volatile("sfence" : : : "memory");
473	kernel_fpu_end();
474 }
475
476
477const struct raid6_calls raid6_sse2x4 = {
478	raid6_sse24_gen_syndrome,
479	raid6_sse24_xor_syndrome,
480	raid6_have_sse2,
481	"sse2x4",
482	1			/* Has cache hints */
483};
484
485#endif /* CONFIG_X86_64 */
486