xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame^] | 1 | /* Copy SRC to DEST returning the address of the terminating '\0' in DEST. |
| 2 | For Intel 80x86, x>=3. |
| 3 | Copyright (C) 1994-2016 Free Software Foundation, Inc. |
| 4 | This file is part of the GNU C Library. |
| 5 | Contributed by Ulrich Drepper (drepper@gnu.ai.mit.edu). |
| 6 | |
| 7 | The GNU C Library is free software; you can redistribute it and/or |
| 8 | modify it under the terms of the GNU Lesser General Public |
| 9 | License as published by the Free Software Foundation; either |
| 10 | version 2.1 of the License, or (at your option) any later version. |
| 11 | |
| 12 | The GNU C Library is distributed in the hope that it will be useful, |
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | Lesser General Public License for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU Lesser General Public |
| 18 | License along with the GNU C Library; if not, see |
| 19 | <http://www.gnu.org/licenses/>. */ |
| 20 | |
| 21 | /* This function is defined neither in ANSI nor POSIX standards but is |
| 22 | also not invented here. */ |
| 23 | |
| 24 | #include <sysdep.h> |
| 25 | #include "asm-syntax.h" |
| 26 | |
| 27 | #define PARMS 4 /* no space for saved regs */ |
| 28 | #define RTN PARMS |
| 29 | #define DEST RTN |
| 30 | #define SRC DEST+4 |
| 31 | |
| 32 | .text |
| 33 | ENTRY (__stpcpy) |
| 34 | |
| 35 | movl DEST(%esp), %eax |
| 36 | movl SRC(%esp), %ecx |
| 37 | subl %eax, %ecx /* magic: reduce number of loop variants |
| 38 | to one using addressing mode */ |
| 39 | |
| 40 | /* Here we would like to write |
| 41 | |
| 42 | subl $4, %eax |
| 43 | ALIGN (4) |
| 44 | |
| 45 | but the assembler is too smart and optimizes for the shortest |
| 46 | form where the number only needs one byte. But if we could |
| 47 | have the long form we would not need the alignment. */ |
| 48 | |
| 49 | .byte 0x81, 0xe8 /* This is `subl $0x00000004, %eax' */ |
| 50 | .long 0x00000004 |
| 51 | |
| 52 | /* Four times unfolded loop with only one loop counter. This |
| 53 | is achieved by the use of index+base addressing mode. As the |
| 54 | loop counter we use the destination address because this is |
| 55 | also the result. */ |
| 56 | L(1): addl $4, %eax /* increment loop counter */ |
| 57 | |
| 58 | movb (%eax,%ecx), %dl /* load current char */ |
| 59 | movb %dl, (%eax) /* and store it */ |
| 60 | testb %dl, %dl /* was it NUL? */ |
| 61 | jz L(2) /* yes, then exit */ |
| 62 | |
| 63 | movb 1(%eax,%ecx), %dl /* load current char */ |
| 64 | movb %dl, 1(%eax) /* and store it */ |
| 65 | testb %dl, %dl /* was it NUL? */ |
| 66 | jz L(3) /* yes, then exit */ |
| 67 | |
| 68 | movb 2(%eax,%ecx), %dl /* load current char */ |
| 69 | movb %dl, 2(%eax) /* and store it */ |
| 70 | testb %dl, %dl /* was it NUL? */ |
| 71 | jz L(4) /* yes, then exit */ |
| 72 | |
| 73 | movb 3(%eax,%ecx), %dl /* load current char */ |
| 74 | movb %dl, 3(%eax) /* and store it */ |
| 75 | testb %dl, %dl /* was it NUL? */ |
| 76 | jnz L(1) /* no, then continue loop */ |
| 77 | |
| 78 | incl %eax /* correct loop counter */ |
| 79 | L(4): incl %eax |
| 80 | L(3): incl %eax |
| 81 | L(2): |
| 82 | |
| 83 | ret |
| 84 | END (__stpcpy) |
| 85 | |
| 86 | weak_alias (__stpcpy, stpcpy) |
| 87 | libc_hidden_def (__stpcpy) |
| 88 | libc_hidden_builtin_def (stpcpy) |