|
27 | 27 | * NOTE! The calling convention is very intentionally the same as
|
28 | 28 | * for 'rep movs', so that we can rewrite the function call with
|
29 | 29 | * just a plain 'rep movs' on machines that have FSRM. But to make
|
30 |
| - * it simpler for us, we can clobber rsi/rdi and rax/r8-r11 freely. |
| 30 | + * it simpler for us, we can clobber rsi/rdi and rax freely. |
31 | 31 | */
|
32 | 32 | SYM_FUNC_START(rep_movs_alternative)
|
33 | 33 | cmpq $64,%rcx
|
@@ -68,55 +68,24 @@ SYM_FUNC_START(rep_movs_alternative)
|
68 | 68 | _ASM_EXTABLE_UA( 3b, .Lcopy_user_tail)
|
69 | 69 |
|
70 | 70 | .Llarge:
|
71 |
| -0: ALTERNATIVE "jmp .Lunrolled", "rep movsb", X86_FEATURE_ERMS |
| 71 | +0: ALTERNATIVE "jmp .Llarge_movsq", "rep movsb", X86_FEATURE_ERMS |
72 | 72 | 1: RET
|
73 | 73 |
|
74 |
| - _ASM_EXTABLE_UA( 0b, 1b) |
| 74 | + _ASM_EXTABLE_UA( 0b, 1b) |
75 | 75 |
|
76 |
| - .p2align 4 |
77 |
| -.Lunrolled: |
78 |
| -10: movq (%rsi),%r8 |
79 |
| -11: movq 8(%rsi),%r9 |
80 |
| -12: movq 16(%rsi),%r10 |
81 |
| -13: movq 24(%rsi),%r11 |
82 |
| -14: movq %r8,(%rdi) |
83 |
| -15: movq %r9,8(%rdi) |
84 |
| -16: movq %r10,16(%rdi) |
85 |
| -17: movq %r11,24(%rdi) |
86 |
| -20: movq 32(%rsi),%r8 |
87 |
| -21: movq 40(%rsi),%r9 |
88 |
| -22: movq 48(%rsi),%r10 |
89 |
| -23: movq 56(%rsi),%r11 |
90 |
| -24: movq %r8,32(%rdi) |
91 |
| -25: movq %r9,40(%rdi) |
92 |
| -26: movq %r10,48(%rdi) |
93 |
| -27: movq %r11,56(%rdi) |
94 |
| - addq $64,%rsi |
95 |
| - addq $64,%rdi |
96 |
| - subq $64,%rcx |
97 |
| - cmpq $64,%rcx |
98 |
| - jae .Lunrolled |
99 |
| - cmpl $8,%ecx |
100 |
| - jae .Lword |
| 76 | +.Llarge_movsq: |
| 77 | + movq %rcx,%rax |
| 78 | + shrq $3,%rcx |
| 79 | + andl $7,%eax |
| 80 | +0: rep movsq |
| 81 | + movl %eax,%ecx |
101 | 82 | testl %ecx,%ecx
|
102 | 83 | jne .Lcopy_user_tail
|
103 | 84 | RET
|
104 | 85 |
|
105 |
| - _ASM_EXTABLE_UA(10b, .Lcopy_user_tail) |
106 |
| - _ASM_EXTABLE_UA(11b, .Lcopy_user_tail) |
107 |
| - _ASM_EXTABLE_UA(12b, .Lcopy_user_tail) |
108 |
| - _ASM_EXTABLE_UA(13b, .Lcopy_user_tail) |
109 |
| - _ASM_EXTABLE_UA(14b, .Lcopy_user_tail) |
110 |
| - _ASM_EXTABLE_UA(15b, .Lcopy_user_tail) |
111 |
| - _ASM_EXTABLE_UA(16b, .Lcopy_user_tail) |
112 |
| - _ASM_EXTABLE_UA(17b, .Lcopy_user_tail) |
113 |
| - _ASM_EXTABLE_UA(20b, .Lcopy_user_tail) |
114 |
| - _ASM_EXTABLE_UA(21b, .Lcopy_user_tail) |
115 |
| - _ASM_EXTABLE_UA(22b, .Lcopy_user_tail) |
116 |
| - _ASM_EXTABLE_UA(23b, .Lcopy_user_tail) |
117 |
| - _ASM_EXTABLE_UA(24b, .Lcopy_user_tail) |
118 |
| - _ASM_EXTABLE_UA(25b, .Lcopy_user_tail) |
119 |
| - _ASM_EXTABLE_UA(26b, .Lcopy_user_tail) |
120 |
| - _ASM_EXTABLE_UA(27b, .Lcopy_user_tail) |
| 86 | +1: leaq (%rax,%rcx,8),%rcx |
| 87 | + jmp .Lcopy_user_tail |
| 88 | + |
| 89 | + _ASM_EXTABLE_UA( 0b, 1b) |
121 | 90 | SYM_FUNC_END(rep_movs_alternative)
|
122 | 91 | EXPORT_SYMBOL(rep_movs_alternative)
|
0 commit comments