b.liu | e958203 | 2025-04-17 19:18:16 +0800 | [diff] [blame^] | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
| 2 | /* |
| 3 | * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> |
| 4 | * Copyright 2002 Andi Kleen, SuSE Labs. |
| 5 | * |
| 6 | * Functions to copy from and to user space. |
| 7 | */ |
| 8 | |
| 9 | #include <linux/linkage.h> |
| 10 | #include <asm/current.h> |
| 11 | #include <asm/asm-offsets.h> |
| 12 | #include <asm/thread_info.h> |
| 13 | #include <asm/cpufeatures.h> |
| 14 | #include <asm/alternative-asm.h> |
| 15 | #include <asm/asm.h> |
| 16 | #include <asm/smap.h> |
| 17 | #include <asm/export.h> |
| 18 | |
| 19 | .macro ALIGN_DESTINATION |
| 20 | /* check for bad alignment of destination */ |
| 21 | movl %edi,%ecx |
| 22 | andl $7,%ecx |
| 23 | jz 102f /* already aligned */ |
| 24 | subl $8,%ecx |
| 25 | negl %ecx |
| 26 | subl %ecx,%edx |
| 27 | 100: movb (%rsi),%al |
| 28 | 101: movb %al,(%rdi) |
| 29 | incq %rsi |
| 30 | incq %rdi |
| 31 | decl %ecx |
| 32 | jnz 100b |
| 33 | 102: |
| 34 | .section .fixup,"ax" |
| 35 | 103: addl %ecx,%edx /* ecx is zerorest also */ |
| 36 | jmp .Lcopy_user_handle_tail |
| 37 | .previous |
| 38 | |
| 39 | _ASM_EXTABLE_UA(100b, 103b) |
| 40 | _ASM_EXTABLE_UA(101b, 103b) |
| 41 | .endm |
| 42 | |
| 43 | /* |
| 44 | * copy_user_generic_unrolled - memory copy with exception handling. |
| 45 | * This version is for CPUs like P4 that don't have efficient micro |
| 46 | * code for rep movsq |
| 47 | * |
| 48 | * Input: |
| 49 | * rdi destination |
| 50 | * rsi source |
| 51 | * rdx count |
| 52 | * |
| 53 | * Output: |
| 54 | * eax uncopied bytes or 0 if successful. |
| 55 | */ |
| 56 | ENTRY(copy_user_generic_unrolled) |
| 57 | ASM_STAC |
| 58 | cmpl $8,%edx |
| 59 | jb 20f /* less then 8 bytes, go to byte copy loop */ |
| 60 | ALIGN_DESTINATION |
| 61 | movl %edx,%ecx |
| 62 | andl $63,%edx |
| 63 | shrl $6,%ecx |
| 64 | jz .L_copy_short_string |
| 65 | 1: movq (%rsi),%r8 |
| 66 | 2: movq 1*8(%rsi),%r9 |
| 67 | 3: movq 2*8(%rsi),%r10 |
| 68 | 4: movq 3*8(%rsi),%r11 |
| 69 | 5: movq %r8,(%rdi) |
| 70 | 6: movq %r9,1*8(%rdi) |
| 71 | 7: movq %r10,2*8(%rdi) |
| 72 | 8: movq %r11,3*8(%rdi) |
| 73 | 9: movq 4*8(%rsi),%r8 |
| 74 | 10: movq 5*8(%rsi),%r9 |
| 75 | 11: movq 6*8(%rsi),%r10 |
| 76 | 12: movq 7*8(%rsi),%r11 |
| 77 | 13: movq %r8,4*8(%rdi) |
| 78 | 14: movq %r9,5*8(%rdi) |
| 79 | 15: movq %r10,6*8(%rdi) |
| 80 | 16: movq %r11,7*8(%rdi) |
| 81 | leaq 64(%rsi),%rsi |
| 82 | leaq 64(%rdi),%rdi |
| 83 | decl %ecx |
| 84 | jnz 1b |
| 85 | .L_copy_short_string: |
| 86 | movl %edx,%ecx |
| 87 | andl $7,%edx |
| 88 | shrl $3,%ecx |
| 89 | jz 20f |
| 90 | 18: movq (%rsi),%r8 |
| 91 | 19: movq %r8,(%rdi) |
| 92 | leaq 8(%rsi),%rsi |
| 93 | leaq 8(%rdi),%rdi |
| 94 | decl %ecx |
| 95 | jnz 18b |
| 96 | 20: andl %edx,%edx |
| 97 | jz 23f |
| 98 | movl %edx,%ecx |
| 99 | 21: movb (%rsi),%al |
| 100 | 22: movb %al,(%rdi) |
| 101 | incq %rsi |
| 102 | incq %rdi |
| 103 | decl %ecx |
| 104 | jnz 21b |
| 105 | 23: xor %eax,%eax |
| 106 | ASM_CLAC |
| 107 | ret |
| 108 | |
| 109 | .section .fixup,"ax" |
| 110 | 30: shll $6,%ecx |
| 111 | addl %ecx,%edx |
| 112 | jmp 60f |
| 113 | 40: leal (%rdx,%rcx,8),%edx |
| 114 | jmp 60f |
| 115 | 50: movl %ecx,%edx |
| 116 | 60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */ |
| 117 | .previous |
| 118 | |
| 119 | _ASM_EXTABLE_UA(1b, 30b) |
| 120 | _ASM_EXTABLE_UA(2b, 30b) |
| 121 | _ASM_EXTABLE_UA(3b, 30b) |
| 122 | _ASM_EXTABLE_UA(4b, 30b) |
| 123 | _ASM_EXTABLE_UA(5b, 30b) |
| 124 | _ASM_EXTABLE_UA(6b, 30b) |
| 125 | _ASM_EXTABLE_UA(7b, 30b) |
| 126 | _ASM_EXTABLE_UA(8b, 30b) |
| 127 | _ASM_EXTABLE_UA(9b, 30b) |
| 128 | _ASM_EXTABLE_UA(10b, 30b) |
| 129 | _ASM_EXTABLE_UA(11b, 30b) |
| 130 | _ASM_EXTABLE_UA(12b, 30b) |
| 131 | _ASM_EXTABLE_UA(13b, 30b) |
| 132 | _ASM_EXTABLE_UA(14b, 30b) |
| 133 | _ASM_EXTABLE_UA(15b, 30b) |
| 134 | _ASM_EXTABLE_UA(16b, 30b) |
| 135 | _ASM_EXTABLE_UA(18b, 40b) |
| 136 | _ASM_EXTABLE_UA(19b, 40b) |
| 137 | _ASM_EXTABLE_UA(21b, 50b) |
| 138 | _ASM_EXTABLE_UA(22b, 50b) |
| 139 | ENDPROC(copy_user_generic_unrolled) |
| 140 | EXPORT_SYMBOL(copy_user_generic_unrolled) |
| 141 | |
| 142 | /* Some CPUs run faster using the string copy instructions. |
| 143 | * This is also a lot simpler. Use them when possible. |
| 144 | * |
| 145 | * Only 4GB of copy is supported. This shouldn't be a problem |
| 146 | * because the kernel normally only writes from/to page sized chunks |
| 147 | * even if user space passed a longer buffer. |
| 148 | * And more would be dangerous because both Intel and AMD have |
| 149 | * errata with rep movsq > 4GB. If someone feels the need to fix |
| 150 | * this please consider this. |
| 151 | * |
| 152 | * Input: |
| 153 | * rdi destination |
| 154 | * rsi source |
| 155 | * rdx count |
| 156 | * |
| 157 | * Output: |
| 158 | * eax uncopied bytes or 0 if successful. |
| 159 | */ |
| 160 | ENTRY(copy_user_generic_string) |
| 161 | ASM_STAC |
| 162 | cmpl $8,%edx |
| 163 | jb 2f /* less than 8 bytes, go to byte copy loop */ |
| 164 | ALIGN_DESTINATION |
| 165 | movl %edx,%ecx |
| 166 | shrl $3,%ecx |
| 167 | andl $7,%edx |
| 168 | 1: rep |
| 169 | movsq |
| 170 | 2: movl %edx,%ecx |
| 171 | 3: rep |
| 172 | movsb |
| 173 | xorl %eax,%eax |
| 174 | ASM_CLAC |
| 175 | ret |
| 176 | |
| 177 | .section .fixup,"ax" |
| 178 | 11: leal (%rdx,%rcx,8),%ecx |
| 179 | 12: movl %ecx,%edx /* ecx is zerorest also */ |
| 180 | jmp .Lcopy_user_handle_tail |
| 181 | .previous |
| 182 | |
| 183 | _ASM_EXTABLE_UA(1b, 11b) |
| 184 | _ASM_EXTABLE_UA(3b, 12b) |
| 185 | ENDPROC(copy_user_generic_string) |
| 186 | EXPORT_SYMBOL(copy_user_generic_string) |
| 187 | |
| 188 | /* |
| 189 | * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. |
| 190 | * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. |
| 191 | * |
| 192 | * Input: |
| 193 | * rdi destination |
| 194 | * rsi source |
| 195 | * rdx count |
| 196 | * |
| 197 | * Output: |
| 198 | * eax uncopied bytes or 0 if successful. |
| 199 | */ |
| 200 | ENTRY(copy_user_enhanced_fast_string) |
| 201 | ASM_STAC |
| 202 | cmpl $64,%edx |
| 203 | jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ |
| 204 | movl %edx,%ecx |
| 205 | 1: rep |
| 206 | movsb |
| 207 | xorl %eax,%eax |
| 208 | ASM_CLAC |
| 209 | ret |
| 210 | |
| 211 | .section .fixup,"ax" |
| 212 | 12: movl %ecx,%edx /* ecx is zerorest also */ |
| 213 | jmp .Lcopy_user_handle_tail |
| 214 | .previous |
| 215 | |
| 216 | _ASM_EXTABLE_UA(1b, 12b) |
| 217 | ENDPROC(copy_user_enhanced_fast_string) |
| 218 | EXPORT_SYMBOL(copy_user_enhanced_fast_string) |
| 219 | |
| 220 | /* |
| 221 | * Try to copy last bytes and clear the rest if needed. |
| 222 | * Since protection fault in copy_from/to_user is not a normal situation, |
| 223 | * it is not necessary to optimize tail handling. |
| 224 | * |
| 225 | * Input: |
| 226 | * rdi destination |
| 227 | * rsi source |
| 228 | * rdx count |
| 229 | * |
| 230 | * Output: |
| 231 | * eax uncopied bytes or 0 if successful. |
| 232 | */ |
| 233 | ALIGN; |
| 234 | .Lcopy_user_handle_tail: |
| 235 | movl %edx,%ecx |
| 236 | 1: rep movsb |
| 237 | 2: mov %ecx,%eax |
| 238 | ASM_CLAC |
| 239 | ret |
| 240 | |
| 241 | _ASM_EXTABLE_UA(1b, 2b) |
| 242 | END(.Lcopy_user_handle_tail) |
| 243 | |
| 244 | /* |
| 245 | * copy_user_nocache - Uncached memory copy with exception handling |
| 246 | * This will force destination out of cache for more performance. |
| 247 | * |
| 248 | * Note: Cached memory copy is used when destination or size is not |
| 249 | * naturally aligned. That is: |
| 250 | * - Require 8-byte alignment when size is 8 bytes or larger. |
| 251 | * - Require 4-byte alignment when size is 4 bytes. |
| 252 | */ |
| 253 | ENTRY(__copy_user_nocache) |
| 254 | ASM_STAC |
| 255 | |
| 256 | /* If size is less than 8 bytes, go to 4-byte copy */ |
| 257 | cmpl $8,%edx |
| 258 | jb .L_4b_nocache_copy_entry |
| 259 | |
| 260 | /* If destination is not 8-byte aligned, "cache" copy to align it */ |
| 261 | ALIGN_DESTINATION |
| 262 | |
| 263 | /* Set 4x8-byte copy count and remainder */ |
| 264 | movl %edx,%ecx |
| 265 | andl $63,%edx |
| 266 | shrl $6,%ecx |
| 267 | jz .L_8b_nocache_copy_entry /* jump if count is 0 */ |
| 268 | |
| 269 | /* Perform 4x8-byte nocache loop-copy */ |
| 270 | .L_4x8b_nocache_copy_loop: |
| 271 | 1: movq (%rsi),%r8 |
| 272 | 2: movq 1*8(%rsi),%r9 |
| 273 | 3: movq 2*8(%rsi),%r10 |
| 274 | 4: movq 3*8(%rsi),%r11 |
| 275 | 5: movnti %r8,(%rdi) |
| 276 | 6: movnti %r9,1*8(%rdi) |
| 277 | 7: movnti %r10,2*8(%rdi) |
| 278 | 8: movnti %r11,3*8(%rdi) |
| 279 | 9: movq 4*8(%rsi),%r8 |
| 280 | 10: movq 5*8(%rsi),%r9 |
| 281 | 11: movq 6*8(%rsi),%r10 |
| 282 | 12: movq 7*8(%rsi),%r11 |
| 283 | 13: movnti %r8,4*8(%rdi) |
| 284 | 14: movnti %r9,5*8(%rdi) |
| 285 | 15: movnti %r10,6*8(%rdi) |
| 286 | 16: movnti %r11,7*8(%rdi) |
| 287 | leaq 64(%rsi),%rsi |
| 288 | leaq 64(%rdi),%rdi |
| 289 | decl %ecx |
| 290 | jnz .L_4x8b_nocache_copy_loop |
| 291 | |
| 292 | /* Set 8-byte copy count and remainder */ |
| 293 | .L_8b_nocache_copy_entry: |
| 294 | movl %edx,%ecx |
| 295 | andl $7,%edx |
| 296 | shrl $3,%ecx |
| 297 | jz .L_4b_nocache_copy_entry /* jump if count is 0 */ |
| 298 | |
| 299 | /* Perform 8-byte nocache loop-copy */ |
| 300 | .L_8b_nocache_copy_loop: |
| 301 | 20: movq (%rsi),%r8 |
| 302 | 21: movnti %r8,(%rdi) |
| 303 | leaq 8(%rsi),%rsi |
| 304 | leaq 8(%rdi),%rdi |
| 305 | decl %ecx |
| 306 | jnz .L_8b_nocache_copy_loop |
| 307 | |
| 308 | /* If no byte left, we're done */ |
| 309 | .L_4b_nocache_copy_entry: |
| 310 | andl %edx,%edx |
| 311 | jz .L_finish_copy |
| 312 | |
| 313 | /* If destination is not 4-byte aligned, go to byte copy: */ |
| 314 | movl %edi,%ecx |
| 315 | andl $3,%ecx |
| 316 | jnz .L_1b_cache_copy_entry |
| 317 | |
| 318 | /* Set 4-byte copy count (1 or 0) and remainder */ |
| 319 | movl %edx,%ecx |
| 320 | andl $3,%edx |
| 321 | shrl $2,%ecx |
| 322 | jz .L_1b_cache_copy_entry /* jump if count is 0 */ |
| 323 | |
| 324 | /* Perform 4-byte nocache copy: */ |
| 325 | 30: movl (%rsi),%r8d |
| 326 | 31: movnti %r8d,(%rdi) |
| 327 | leaq 4(%rsi),%rsi |
| 328 | leaq 4(%rdi),%rdi |
| 329 | |
| 330 | /* If no bytes left, we're done: */ |
| 331 | andl %edx,%edx |
| 332 | jz .L_finish_copy |
| 333 | |
| 334 | /* Perform byte "cache" loop-copy for the remainder */ |
| 335 | .L_1b_cache_copy_entry: |
| 336 | movl %edx,%ecx |
| 337 | .L_1b_cache_copy_loop: |
| 338 | 40: movb (%rsi),%al |
| 339 | 41: movb %al,(%rdi) |
| 340 | incq %rsi |
| 341 | incq %rdi |
| 342 | decl %ecx |
| 343 | jnz .L_1b_cache_copy_loop |
| 344 | |
| 345 | /* Finished copying; fence the prior stores */ |
| 346 | .L_finish_copy: |
| 347 | xorl %eax,%eax |
| 348 | ASM_CLAC |
| 349 | sfence |
| 350 | ret |
| 351 | |
| 352 | .section .fixup,"ax" |
| 353 | .L_fixup_4x8b_copy: |
| 354 | shll $6,%ecx |
| 355 | addl %ecx,%edx |
| 356 | jmp .L_fixup_handle_tail |
| 357 | .L_fixup_8b_copy: |
| 358 | lea (%rdx,%rcx,8),%rdx |
| 359 | jmp .L_fixup_handle_tail |
| 360 | .L_fixup_4b_copy: |
| 361 | lea (%rdx,%rcx,4),%rdx |
| 362 | jmp .L_fixup_handle_tail |
| 363 | .L_fixup_1b_copy: |
| 364 | movl %ecx,%edx |
| 365 | .L_fixup_handle_tail: |
| 366 | sfence |
| 367 | jmp .Lcopy_user_handle_tail |
| 368 | .previous |
| 369 | |
| 370 | _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy) |
| 371 | _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy) |
| 372 | _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy) |
| 373 | _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy) |
| 374 | _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy) |
| 375 | _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy) |
| 376 | _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy) |
| 377 | _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy) |
| 378 | _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy) |
| 379 | _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy) |
| 380 | _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy) |
| 381 | _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy) |
| 382 | _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy) |
| 383 | _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy) |
| 384 | _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy) |
| 385 | _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy) |
| 386 | _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy) |
| 387 | _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy) |
| 388 | _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy) |
| 389 | _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy) |
| 390 | _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy) |
| 391 | _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy) |
| 392 | ENDPROC(__copy_user_nocache) |
| 393 | EXPORT_SYMBOL(__copy_user_nocache) |