| lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * This string-include defines all string functions as inline | 
 | 3 |  * functions. Use gcc. It also assumes ds=es=data space, this should be | 
 | 4 |  * normal. Most of the string-functions are rather heavily hand-optimized, | 
 | 5 |  * see especially strtok,strstr,str[c]spn. They should work, but are not | 
 | 6 |  * very easy to understand. Everything is done entirely within the register | 
 | 7 |  * set, making the functions fast and clean. String instructions have been | 
 | 8 |  * used through-out, making for "slightly" unclear code :-) | 
 | 9 |  * | 
 | 10 |  *		NO Copyright (C) 1991, 1992 Linus Torvalds, | 
 | 11 |  *		consider these trivial functions to be PD. | 
 | 12 |  */ | 
 | 13 |  | 
 | 14 | /* | 
 | 15 |  * Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org> | 
 | 16 |  * | 
 | 17 |  * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. | 
 | 18 |  */ | 
 | 19 |  | 
 | 20 | /* | 
 | 21 |  * Modified for uClibc by Erik Andersen <andersen@codepoet.org> | 
 | 22 |  * These make no attempt to use nifty things like mmx/3dnow/etc. | 
 | 23 |  * These are not inline, and will therefore not be as fast as | 
 | 24 |  * modifying the headers to use inlines (and cannot therefore | 
 | 25 |  * do tricky things when dealing with const memory).  But they | 
 | 26 |  * should (I hope!) be faster than their generic equivalents.... | 
 | 27 |  * | 
 | 28 |  * More importantly, these should provide a good example for | 
 | 29 |  * others to follow when adding arch specific optimizations. | 
 | 30 |  *  -Erik | 
 | 31 |  * | 
 | 32 |  * 2009-04: modified by Denys Vlasenko <vda.linux@googlemail.com> | 
 | 33 |  * Fill byte-by-byte is a bit too slow. I prefer 46 byte function | 
 | 34 |  * which fills x4 faster than 21 bytes one. | 
 | 35 |  */ | 
 | 36 |  | 
 | 37 | #include <string.h> | 
 | 38 |  | 
 | 39 | #undef memset | 
 | 40 | void *memset(void *s, int c, size_t count) | 
 | 41 | { | 
 | 42 | 	int reg, edi; | 
 | 43 | 	__asm__ __volatile__( | 
 | 44 |  | 
 | 45 | 		/* Most of the time, count is divisible by 4 and nonzero */ | 
 | 46 | 		/* It's better to make this case faster */ | 
 | 47 | 	/*	"	jecxz	9f\n" - (optional) count == 0: goto ret */ | 
 | 48 | 		"	mov	%%ecx, %1\n" | 
 | 49 | 		"	shr	$2, %%ecx\n" | 
 | 50 | 		"	jz	1f\n" /* zero words: goto fill_bytes */ | 
 | 51 | 		/* extend 8-bit fill to 32 bits */ | 
 | 52 | 		"	movzx	%%al, %%eax\n" /* 3 bytes */ | 
 | 53 | 	/* or:	"	and	$0xff, %%eax\n" - 5 bytes */ | 
 | 54 | 		"	imul	$0x01010101, %%eax\n" /* 6 bytes */ | 
 | 55 | 		/* fill full words */ | 
 | 56 | 		"	rep; stosl\n" | 
 | 57 | 		/* fill 0-3 bytes */ | 
 | 58 | 		"1:	and	$3, %1\n" | 
 | 59 | 		"	jz	9f\n" /* (count & 3) == 0: goto end */ | 
 | 60 | 		"2:	stosb\n" | 
 | 61 | 		"	dec	%1\n" | 
 | 62 | 		"	jnz	2b\n" | 
 | 63 | 		/* end */ | 
 | 64 | 		"9:\n" | 
 | 65 |  | 
 | 66 | 		: "=&D" (edi), "=&r" (reg) | 
 | 67 | 		: "0" (s), "a" (c), "c" (count) | 
 | 68 | 		: "memory" | 
 | 69 | 	); | 
 | 70 | 	return s; | 
 | 71 | } | 
 | 72 | libc_hidden_def(memset) | 
 | 73 |  | 
 | 74 | /* | 
 | 75 | gcc 4.3.1 | 
 | 76 | ========= | 
 | 77 | 57                     push   %edi | 
 | 78 | 8b 7c 24 08            mov    0x8(%esp),%edi | 
 | 79 | 8b 4c 24 10            mov    0x10(%esp),%ecx | 
 | 80 | 8b 44 24 0c            mov    0xc(%esp),%eax | 
 | 81 | 89 ca                  mov    %ecx,%edx | 
 | 82 | c1 e9 02               shr    $0x2,%ecx | 
 | 83 | 74 0b                  je     1f <__GI_memset+0x1f> | 
 | 84 | 0f b6 c0               movzbl %al,%eax | 
 | 85 | 69 c0 01 01 01 01      imul   $0x1010101,%eax,%eax | 
 | 86 | f3 ab                  rep stos %eax,%es:(%edi) | 
 | 87 | 83 e2 03               and    $0x3,%edx | 
 | 88 | 74 04                  je     28 <__GI_memset+0x28> | 
 | 89 | aa                     stos   %al,%es:(%edi) | 
 | 90 | 4a                     dec    %edx | 
 | 91 | 75 fc                  jne    24 <__GI_memset+0x24> | 
 | 92 | 8b 44 24 08            mov    0x8(%esp),%eax | 
 | 93 | 5f                     pop    %edi | 
 | 94 | c3                     ret | 
 | 95 | */ |