blob: 86976b55ae743ef3b534d05475e2dcced6241962 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
4 * Copyright 2002 Andi Kleen, SuSE Labs.
5 *
6 * Functions to copy from and to user space.
7 */
8
9#include <linux/linkage.h>
10#include <asm/current.h>
11#include <asm/asm-offsets.h>
12#include <asm/thread_info.h>
13#include <asm/cpufeatures.h>
14#include <asm/alternative-asm.h>
15#include <asm/asm.h>
16#include <asm/smap.h>
17#include <asm/export.h>
18
19.macro ALIGN_DESTINATION
20 /* check for bad alignment of destination */
21 movl %edi,%ecx
22 andl $7,%ecx
23 jz 102f /* already aligned */
24 subl $8,%ecx
25 negl %ecx
26 subl %ecx,%edx
27100: movb (%rsi),%al
28101: movb %al,(%rdi)
29 incq %rsi
30 incq %rdi
31 decl %ecx
32 jnz 100b
33102:
34 .section .fixup,"ax"
35103: addl %ecx,%edx /* ecx is zerorest also */
36 jmp .Lcopy_user_handle_tail
37 .previous
38
39 _ASM_EXTABLE_UA(100b, 103b)
40 _ASM_EXTABLE_UA(101b, 103b)
41 .endm
42
43/*
44 * copy_user_generic_unrolled - memory copy with exception handling.
45 * This version is for CPUs like P4 that don't have efficient micro
46 * code for rep movsq
47 *
48 * Input:
49 * rdi destination
50 * rsi source
51 * rdx count
52 *
53 * Output:
54 * eax uncopied bytes or 0 if successful.
55 */
56ENTRY(copy_user_generic_unrolled)
57 ASM_STAC
58 cmpl $8,%edx
59 jb 20f /* less then 8 bytes, go to byte copy loop */
60 ALIGN_DESTINATION
61 movl %edx,%ecx
62 andl $63,%edx
63 shrl $6,%ecx
64 jz .L_copy_short_string
651: movq (%rsi),%r8
662: movq 1*8(%rsi),%r9
673: movq 2*8(%rsi),%r10
684: movq 3*8(%rsi),%r11
695: movq %r8,(%rdi)
706: movq %r9,1*8(%rdi)
717: movq %r10,2*8(%rdi)
728: movq %r11,3*8(%rdi)
739: movq 4*8(%rsi),%r8
7410: movq 5*8(%rsi),%r9
7511: movq 6*8(%rsi),%r10
7612: movq 7*8(%rsi),%r11
7713: movq %r8,4*8(%rdi)
7814: movq %r9,5*8(%rdi)
7915: movq %r10,6*8(%rdi)
8016: movq %r11,7*8(%rdi)
81 leaq 64(%rsi),%rsi
82 leaq 64(%rdi),%rdi
83 decl %ecx
84 jnz 1b
85.L_copy_short_string:
86 movl %edx,%ecx
87 andl $7,%edx
88 shrl $3,%ecx
89 jz 20f
9018: movq (%rsi),%r8
9119: movq %r8,(%rdi)
92 leaq 8(%rsi),%rsi
93 leaq 8(%rdi),%rdi
94 decl %ecx
95 jnz 18b
9620: andl %edx,%edx
97 jz 23f
98 movl %edx,%ecx
9921: movb (%rsi),%al
10022: movb %al,(%rdi)
101 incq %rsi
102 incq %rdi
103 decl %ecx
104 jnz 21b
10523: xor %eax,%eax
106 ASM_CLAC
107 ret
108
109 .section .fixup,"ax"
11030: shll $6,%ecx
111 addl %ecx,%edx
112 jmp 60f
11340: leal (%rdx,%rcx,8),%edx
114 jmp 60f
11550: movl %ecx,%edx
11660: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
117 .previous
118
119 _ASM_EXTABLE_UA(1b, 30b)
120 _ASM_EXTABLE_UA(2b, 30b)
121 _ASM_EXTABLE_UA(3b, 30b)
122 _ASM_EXTABLE_UA(4b, 30b)
123 _ASM_EXTABLE_UA(5b, 30b)
124 _ASM_EXTABLE_UA(6b, 30b)
125 _ASM_EXTABLE_UA(7b, 30b)
126 _ASM_EXTABLE_UA(8b, 30b)
127 _ASM_EXTABLE_UA(9b, 30b)
128 _ASM_EXTABLE_UA(10b, 30b)
129 _ASM_EXTABLE_UA(11b, 30b)
130 _ASM_EXTABLE_UA(12b, 30b)
131 _ASM_EXTABLE_UA(13b, 30b)
132 _ASM_EXTABLE_UA(14b, 30b)
133 _ASM_EXTABLE_UA(15b, 30b)
134 _ASM_EXTABLE_UA(16b, 30b)
135 _ASM_EXTABLE_UA(18b, 40b)
136 _ASM_EXTABLE_UA(19b, 40b)
137 _ASM_EXTABLE_UA(21b, 50b)
138 _ASM_EXTABLE_UA(22b, 50b)
139ENDPROC(copy_user_generic_unrolled)
140EXPORT_SYMBOL(copy_user_generic_unrolled)
141
142/* Some CPUs run faster using the string copy instructions.
143 * This is also a lot simpler. Use them when possible.
144 *
145 * Only 4GB of copy is supported. This shouldn't be a problem
146 * because the kernel normally only writes from/to page sized chunks
147 * even if user space passed a longer buffer.
148 * And more would be dangerous because both Intel and AMD have
149 * errata with rep movsq > 4GB. If someone feels the need to fix
150 * this please consider this.
151 *
152 * Input:
153 * rdi destination
154 * rsi source
155 * rdx count
156 *
157 * Output:
158 * eax uncopied bytes or 0 if successful.
159 */
160ENTRY(copy_user_generic_string)
161 ASM_STAC
162 cmpl $8,%edx
163 jb 2f /* less than 8 bytes, go to byte copy loop */
164 ALIGN_DESTINATION
165 movl %edx,%ecx
166 shrl $3,%ecx
167 andl $7,%edx
1681: rep
169 movsq
1702: movl %edx,%ecx
1713: rep
172 movsb
173 xorl %eax,%eax
174 ASM_CLAC
175 ret
176
177 .section .fixup,"ax"
17811: leal (%rdx,%rcx,8),%ecx
17912: movl %ecx,%edx /* ecx is zerorest also */
180 jmp .Lcopy_user_handle_tail
181 .previous
182
183 _ASM_EXTABLE_UA(1b, 11b)
184 _ASM_EXTABLE_UA(3b, 12b)
185ENDPROC(copy_user_generic_string)
186EXPORT_SYMBOL(copy_user_generic_string)
187
188/*
189 * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
190 * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
191 *
192 * Input:
193 * rdi destination
194 * rsi source
195 * rdx count
196 *
197 * Output:
198 * eax uncopied bytes or 0 if successful.
199 */
200ENTRY(copy_user_enhanced_fast_string)
201 ASM_STAC
202 cmpl $64,%edx
203 jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
204 movl %edx,%ecx
2051: rep
206 movsb
207 xorl %eax,%eax
208 ASM_CLAC
209 ret
210
211 .section .fixup,"ax"
21212: movl %ecx,%edx /* ecx is zerorest also */
213 jmp .Lcopy_user_handle_tail
214 .previous
215
216 _ASM_EXTABLE_UA(1b, 12b)
217ENDPROC(copy_user_enhanced_fast_string)
218EXPORT_SYMBOL(copy_user_enhanced_fast_string)
219
220/*
221 * Try to copy last bytes and clear the rest if needed.
222 * Since protection fault in copy_from/to_user is not a normal situation,
223 * it is not necessary to optimize tail handling.
224 *
225 * Input:
226 * rdi destination
227 * rsi source
228 * rdx count
229 *
230 * Output:
231 * eax uncopied bytes or 0 if successful.
232 */
233ALIGN;
234.Lcopy_user_handle_tail:
235 movl %edx,%ecx
2361: rep movsb
2372: mov %ecx,%eax
238 ASM_CLAC
239 ret
240
241 _ASM_EXTABLE_UA(1b, 2b)
242END(.Lcopy_user_handle_tail)
243
244/*
245 * copy_user_nocache - Uncached memory copy with exception handling
246 * This will force destination out of cache for more performance.
247 *
248 * Note: Cached memory copy is used when destination or size is not
249 * naturally aligned. That is:
250 * - Require 8-byte alignment when size is 8 bytes or larger.
251 * - Require 4-byte alignment when size is 4 bytes.
252 */
253ENTRY(__copy_user_nocache)
254 ASM_STAC
255
256 /* If size is less than 8 bytes, go to 4-byte copy */
257 cmpl $8,%edx
258 jb .L_4b_nocache_copy_entry
259
260 /* If destination is not 8-byte aligned, "cache" copy to align it */
261 ALIGN_DESTINATION
262
263 /* Set 4x8-byte copy count and remainder */
264 movl %edx,%ecx
265 andl $63,%edx
266 shrl $6,%ecx
267 jz .L_8b_nocache_copy_entry /* jump if count is 0 */
268
269 /* Perform 4x8-byte nocache loop-copy */
270.L_4x8b_nocache_copy_loop:
2711: movq (%rsi),%r8
2722: movq 1*8(%rsi),%r9
2733: movq 2*8(%rsi),%r10
2744: movq 3*8(%rsi),%r11
2755: movnti %r8,(%rdi)
2766: movnti %r9,1*8(%rdi)
2777: movnti %r10,2*8(%rdi)
2788: movnti %r11,3*8(%rdi)
2799: movq 4*8(%rsi),%r8
28010: movq 5*8(%rsi),%r9
28111: movq 6*8(%rsi),%r10
28212: movq 7*8(%rsi),%r11
28313: movnti %r8,4*8(%rdi)
28414: movnti %r9,5*8(%rdi)
28515: movnti %r10,6*8(%rdi)
28616: movnti %r11,7*8(%rdi)
287 leaq 64(%rsi),%rsi
288 leaq 64(%rdi),%rdi
289 decl %ecx
290 jnz .L_4x8b_nocache_copy_loop
291
292 /* Set 8-byte copy count and remainder */
293.L_8b_nocache_copy_entry:
294 movl %edx,%ecx
295 andl $7,%edx
296 shrl $3,%ecx
297 jz .L_4b_nocache_copy_entry /* jump if count is 0 */
298
299 /* Perform 8-byte nocache loop-copy */
300.L_8b_nocache_copy_loop:
30120: movq (%rsi),%r8
30221: movnti %r8,(%rdi)
303 leaq 8(%rsi),%rsi
304 leaq 8(%rdi),%rdi
305 decl %ecx
306 jnz .L_8b_nocache_copy_loop
307
308 /* If no byte left, we're done */
309.L_4b_nocache_copy_entry:
310 andl %edx,%edx
311 jz .L_finish_copy
312
313 /* If destination is not 4-byte aligned, go to byte copy: */
314 movl %edi,%ecx
315 andl $3,%ecx
316 jnz .L_1b_cache_copy_entry
317
318 /* Set 4-byte copy count (1 or 0) and remainder */
319 movl %edx,%ecx
320 andl $3,%edx
321 shrl $2,%ecx
322 jz .L_1b_cache_copy_entry /* jump if count is 0 */
323
324 /* Perform 4-byte nocache copy: */
32530: movl (%rsi),%r8d
32631: movnti %r8d,(%rdi)
327 leaq 4(%rsi),%rsi
328 leaq 4(%rdi),%rdi
329
330 /* If no bytes left, we're done: */
331 andl %edx,%edx
332 jz .L_finish_copy
333
334 /* Perform byte "cache" loop-copy for the remainder */
335.L_1b_cache_copy_entry:
336 movl %edx,%ecx
337.L_1b_cache_copy_loop:
33840: movb (%rsi),%al
33941: movb %al,(%rdi)
340 incq %rsi
341 incq %rdi
342 decl %ecx
343 jnz .L_1b_cache_copy_loop
344
345 /* Finished copying; fence the prior stores */
346.L_finish_copy:
347 xorl %eax,%eax
348 ASM_CLAC
349 sfence
350 ret
351
352 .section .fixup,"ax"
353.L_fixup_4x8b_copy:
354 shll $6,%ecx
355 addl %ecx,%edx
356 jmp .L_fixup_handle_tail
357.L_fixup_8b_copy:
358 lea (%rdx,%rcx,8),%rdx
359 jmp .L_fixup_handle_tail
360.L_fixup_4b_copy:
361 lea (%rdx,%rcx,4),%rdx
362 jmp .L_fixup_handle_tail
363.L_fixup_1b_copy:
364 movl %ecx,%edx
365.L_fixup_handle_tail:
366 sfence
367 jmp .Lcopy_user_handle_tail
368 .previous
369
370 _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
371 _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
372 _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
373 _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
374 _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
375 _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
376 _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
377 _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
378 _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
379 _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
380 _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
381 _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
382 _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
383 _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
384 _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
385 _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
386 _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
387 _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
388 _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
389 _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
390 _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
391 _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
392ENDPROC(__copy_user_nocache)
393EXPORT_SYMBOL(__copy_user_nocache)