lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* |
| 2 | * This string-include defines all string functions as inline |
| 3 | * functions. Use gcc. It also assumes ds=es=data space, this should be |
| 4 | * normal. Most of the string-functions are rather heavily hand-optimized, |
| 5 | * see especially strtok,strstr,str[c]spn. They should work, but are not |
| 6 | * very easy to understand. Everything is done entirely within the register |
| 7 | * set, making the functions fast and clean. String instructions have been |
| 8 | * used through-out, making for "slightly" unclear code :-) |
| 9 | * |
| 10 | * NO Copyright (C) 1991, 1992 Linus Torvalds, |
| 11 | * consider these trivial functions to be PD. |
| 12 | */ |
| 13 | |
| 14 | /* |
| 15 | * Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org> |
| 16 | * |
| 17 | * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. |
| 18 | */ |
| 19 | |
| 20 | /* |
| 21 | * Modified for uClibc by Erik Andersen <andersen@codepoet.org> |
| 22 | * These make no attempt to use nifty things like mmx/3dnow/etc. |
| 23 | * These are not inline, and will therefore not be as fast as |
| 24 | * modifying the headers to use inlines (and cannot therefore |
| 25 | * do tricky things when dealing with const memory). But they |
| 26 | * should (I hope!) be faster than their generic equivalents.... |
| 27 | * |
| 28 | * More importantly, these should provide a good example for |
| 29 | * others to follow when adding arch specific optimizations. |
| 30 | * -Erik |
| 31 | * |
| 32 | * 2009-04: modified by Denys Vlasenko <vda.linux@googlemail.com> |
| 33 | * Fill byte-by-byte is a bit too slow. I prefer 46 byte function |
| 34 | * which fills x4 faster than 21 bytes one. |
| 35 | */ |
| 36 | |
| 37 | #include <string.h> |
| 38 | |
| 39 | #undef memset |
| 40 | void *memset(void *s, int c, size_t count) |
| 41 | { |
| 42 | int reg, edi; |
| 43 | __asm__ __volatile__( |
| 44 | |
| 45 | /* Most of the time, count is divisible by 4 and nonzero */ |
| 46 | /* It's better to make this case faster */ |
| 47 | /* " jecxz 9f\n" - (optional) count == 0: goto ret */ |
| 48 | " mov %%ecx, %1\n" |
| 49 | " shr $2, %%ecx\n" |
| 50 | " jz 1f\n" /* zero words: goto fill_bytes */ |
| 51 | /* extend 8-bit fill to 32 bits */ |
| 52 | " movzx %%al, %%eax\n" /* 3 bytes */ |
| 53 | /* or: " and $0xff, %%eax\n" - 5 bytes */ |
| 54 | " imul $0x01010101, %%eax\n" /* 6 bytes */ |
| 55 | /* fill full words */ |
| 56 | " rep; stosl\n" |
| 57 | /* fill 0-3 bytes */ |
| 58 | "1: and $3, %1\n" |
| 59 | " jz 9f\n" /* (count & 3) == 0: goto end */ |
| 60 | "2: stosb\n" |
| 61 | " dec %1\n" |
| 62 | " jnz 2b\n" |
| 63 | /* end */ |
| 64 | "9:\n" |
| 65 | |
| 66 | : "=&D" (edi), "=&r" (reg) |
| 67 | : "0" (s), "a" (c), "c" (count) |
| 68 | : "memory" |
| 69 | ); |
| 70 | return s; |
| 71 | } |
| 72 | libc_hidden_def(memset) |
| 73 | |
| 74 | /* |
| 75 | gcc 4.3.1 |
| 76 | ========= |
| 77 | 57 push %edi |
| 78 | 8b 7c 24 08 mov 0x8(%esp),%edi |
| 79 | 8b 4c 24 10 mov 0x10(%esp),%ecx |
| 80 | 8b 44 24 0c mov 0xc(%esp),%eax |
| 81 | 89 ca mov %ecx,%edx |
| 82 | c1 e9 02 shr $0x2,%ecx |
| 83 | 74 0b je 1f <__GI_memset+0x1f> |
| 84 | 0f b6 c0 movzbl %al,%eax |
| 85 | 69 c0 01 01 01 01 imul $0x1010101,%eax,%eax |
| 86 | f3 ab rep stos %eax,%es:(%edi) |
| 87 | 83 e2 03 and $0x3,%edx |
| 88 | 74 04 je 28 <__GI_memset+0x28> |
| 89 | aa stos %al,%es:(%edi) |
| 90 | 4a dec %edx |
| 91 | 75 fc jne 24 <__GI_memset+0x24> |
| 92 | 8b 44 24 08 mov 0x8(%esp),%eax |
| 93 | 5f pop %edi |
| 94 | c3 ret |
| 95 | */ |