1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
2
3#include <linux/linkage.h>
4#include <asm/dwarf2.h>
5#include <asm/cpufeature.h>
6#include <asm/alternative-asm.h>
7
8/*
9 * Some CPUs run faster using the string copy instructions (sane microcode).
10 * It is also a lot simpler. Use this when possible. But, don't use streaming
11 * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
12 * prefetch distance based on SMP/UP.
13 */
14	ALIGN
15ENTRY(copy_page)
16	CFI_STARTPROC
17	ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
18	movl	$4096/8, %ecx
19	rep	movsq
20	ret
21	CFI_ENDPROC
22ENDPROC(copy_page)
23
24ENTRY(copy_page_regs)
25	CFI_STARTPROC
26	subq	$2*8,	%rsp
27	CFI_ADJUST_CFA_OFFSET 2*8
28	movq	%rbx,	(%rsp)
29	CFI_REL_OFFSET rbx, 0
30	movq	%r12,	1*8(%rsp)
31	CFI_REL_OFFSET r12, 1*8
32
33	movl	$(4096/64)-5,	%ecx
34	.p2align 4
35.Loop64:
36	dec	%rcx
37	movq	0x8*0(%rsi), %rax
38	movq	0x8*1(%rsi), %rbx
39	movq	0x8*2(%rsi), %rdx
40	movq	0x8*3(%rsi), %r8
41	movq	0x8*4(%rsi), %r9
42	movq	0x8*5(%rsi), %r10
43	movq	0x8*6(%rsi), %r11
44	movq	0x8*7(%rsi), %r12
45
46	prefetcht0 5*64(%rsi)
47
48	movq	%rax, 0x8*0(%rdi)
49	movq	%rbx, 0x8*1(%rdi)
50	movq	%rdx, 0x8*2(%rdi)
51	movq	%r8,  0x8*3(%rdi)
52	movq	%r9,  0x8*4(%rdi)
53	movq	%r10, 0x8*5(%rdi)
54	movq	%r11, 0x8*6(%rdi)
55	movq	%r12, 0x8*7(%rdi)
56
57	leaq	64 (%rsi), %rsi
58	leaq	64 (%rdi), %rdi
59
60	jnz	.Loop64
61
62	movl	$5, %ecx
63	.p2align 4
64.Loop2:
65	decl	%ecx
66
67	movq	0x8*0(%rsi), %rax
68	movq	0x8*1(%rsi), %rbx
69	movq	0x8*2(%rsi), %rdx
70	movq	0x8*3(%rsi), %r8
71	movq	0x8*4(%rsi), %r9
72	movq	0x8*5(%rsi), %r10
73	movq	0x8*6(%rsi), %r11
74	movq	0x8*7(%rsi), %r12
75
76	movq	%rax, 0x8*0(%rdi)
77	movq	%rbx, 0x8*1(%rdi)
78	movq	%rdx, 0x8*2(%rdi)
79	movq	%r8,  0x8*3(%rdi)
80	movq	%r9,  0x8*4(%rdi)
81	movq	%r10, 0x8*5(%rdi)
82	movq	%r11, 0x8*6(%rdi)
83	movq	%r12, 0x8*7(%rdi)
84
85	leaq	64(%rdi), %rdi
86	leaq	64(%rsi), %rsi
87	jnz	.Loop2
88
89	movq	(%rsp), %rbx
90	CFI_RESTORE rbx
91	movq	1*8(%rsp), %r12
92	CFI_RESTORE r12
93	addq	$2*8, %rsp
94	CFI_ADJUST_CFA_OFFSET -2*8
95	ret
96	CFI_ENDPROC
97ENDPROC(copy_page_regs)
98