1 
   2 
   3 
   4 #include <linux/linkage.h>
   5 #include <asm/cpufeatures.h>
   6 #include <asm/alternative-asm.h>
   7 
   8 .weak memset
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 ENTRY(memset)
  22 ENTRY(__memset)
  23         
  24 
  25 
  26 
  27 
  28 
  29         ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
  30                       "jmp memset_erms", X86_FEATURE_ERMS
  31 
  32         movq %rdi,%r9
  33         movq %rdx,%rcx
  34         andl $7,%edx
  35         shrq $3,%rcx
  36         
  37         movzbl %sil,%esi
  38         movabs $0x0101010101010101,%rax
  39         imulq %rsi,%rax
  40         rep stosq
  41         movl %edx,%ecx
  42         rep stosb
  43         movq %r9,%rax
  44         ret
  45 ENDPROC(memset)
  46 ENDPROC(__memset)
  47 
  48 
  49 
  50 
  51 
  52 
  53 
  54 
  55 
  56 
  57 
  58 
  59 ENTRY(memset_erms)
  60         movq %rdi,%r9
  61         movb %sil,%al
  62         movq %rdx,%rcx
  63         rep stosb
  64         movq %r9,%rax
  65         ret
  66 ENDPROC(memset_erms)
  67 
  68 ENTRY(memset_orig)
  69         movq %rdi,%r10
  70 
  71         
  72         movzbl %sil,%ecx
  73         movabs $0x0101010101010101,%rax
  74         imulq  %rcx,%rax
  75 
  76         
  77         movl  %edi,%r9d
  78         andl  $7,%r9d
  79         jnz  .Lbad_alignment
  80 .Lafter_bad_alignment:
  81 
  82         movq  %rdx,%rcx
  83         shrq  $6,%rcx
  84         jz       .Lhandle_tail
  85 
  86         .p2align 4
  87 .Lloop_64:
  88         decq  %rcx
  89         movq  %rax,(%rdi)
  90         movq  %rax,8(%rdi)
  91         movq  %rax,16(%rdi)
  92         movq  %rax,24(%rdi)
  93         movq  %rax,32(%rdi)
  94         movq  %rax,40(%rdi)
  95         movq  %rax,48(%rdi)
  96         movq  %rax,56(%rdi)
  97         leaq  64(%rdi),%rdi
  98         jnz    .Lloop_64
  99 
 100         
 101 
 102         .p2align 4
 103 .Lhandle_tail:
 104         movl    %edx,%ecx
 105         andl    $63&(~7),%ecx
 106         jz              .Lhandle_7
 107         shrl    $3,%ecx
 108         .p2align 4
 109 .Lloop_8:
 110         decl   %ecx
 111         movq  %rax,(%rdi)
 112         leaq  8(%rdi),%rdi
 113         jnz    .Lloop_8
 114 
 115 .Lhandle_7:
 116         andl    $7,%edx
 117         jz      .Lende
 118         .p2align 4
 119 .Lloop_1:
 120         decl    %edx
 121         movb    %al,(%rdi)
 122         leaq    1(%rdi),%rdi
 123         jnz     .Lloop_1
 124 
 125 .Lende:
 126         movq    %r10,%rax
 127         ret
 128 
 129 .Lbad_alignment:
 130         cmpq $7,%rdx
 131         jbe     .Lhandle_7
 132         movq %rax,(%rdi)        
 133         movq $8,%r8
 134         subq %r9,%r8
 135         addq %r8,%rdi
 136         subq %r8,%rdx
 137         jmp .Lafter_bad_alignment
 138 .Lfinal:
 139 ENDPROC(memset_orig)