blob: 9f51f3c601b8da28b738a55c4bcb1e736ba9e33d [file] [log] [blame]
lh9ed821d2023-04-07 01:36:19 -07001/*
2 * This string-include defines all string functions as inline
3 * functions. Use gcc. It also assumes ds=es=data space, this should be
4 * normal. Most of the string-functions are rather heavily hand-optimized,
5 * see especially strtok,strstr,str[c]spn. They should work, but are not
6 * very easy to understand. Everything is done entirely within the register
7 * set, making the functions fast and clean. String instructions have been
8 * used through-out, making for "slightly" unclear code :-)
9 *
10 * NO Copyright (C) 1991, 1992 Linus Torvalds,
11 * consider these trivial functions to be PD.
12 */
13
14/*
15 * Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
16 *
17 * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
18 */
19
20/*
21 * Modified for uClibc by Erik Andersen <andersen@codepoet.org>
22 * These make no attempt to use nifty things like mmx/3dnow/etc.
23 * These are not inline, and will therefore not be as fast as
24 * modifying the headers to use inlines (and cannot therefore
25 * do tricky things when dealing with const memory). But they
26 * should (I hope!) be faster than their generic equivalents....
27 *
28 * More importantly, these should provide a good example for
29 * others to follow when adding arch specific optimizations.
30 * -Erik
31 *
32 * 2009-04: modified by Denys Vlasenko <vda.linux@googlemail.com>
33 * Fill byte-by-byte is a bit too slow. I prefer 46 byte function
34 * which fills x4 faster than 21 bytes one.
35 */
36
37#include <string.h>
38
39#undef memset
40void *memset(void *s, int c, size_t count)
41{
42 int reg, edi;
43 __asm__ __volatile__(
44
45 /* Most of the time, count is divisible by 4 and nonzero */
46 /* It's better to make this case faster */
47 /* " jecxz 9f\n" - (optional) count == 0: goto ret */
48 " mov %%ecx, %1\n"
49 " shr $2, %%ecx\n"
50 " jz 1f\n" /* zero words: goto fill_bytes */
51 /* extend 8-bit fill to 32 bits */
52 " movzx %%al, %%eax\n" /* 3 bytes */
53 /* or: " and $0xff, %%eax\n" - 5 bytes */
54 " imul $0x01010101, %%eax\n" /* 6 bytes */
55 /* fill full words */
56 " rep; stosl\n"
57 /* fill 0-3 bytes */
58 "1: and $3, %1\n"
59 " jz 9f\n" /* (count & 3) == 0: goto end */
60 "2: stosb\n"
61 " dec %1\n"
62 " jnz 2b\n"
63 /* end */
64 "9:\n"
65
66 : "=&D" (edi), "=&r" (reg)
67 : "0" (s), "a" (c), "c" (count)
68 : "memory"
69 );
70 return s;
71}
72libc_hidden_def(memset)
73
74/*
75gcc 4.3.1
76=========
7757 push %edi
788b 7c 24 08 mov 0x8(%esp),%edi
798b 4c 24 10 mov 0x10(%esp),%ecx
808b 44 24 0c mov 0xc(%esp),%eax
8189 ca mov %ecx,%edx
82c1 e9 02 shr $0x2,%ecx
8374 0b je 1f <__GI_memset+0x1f>
840f b6 c0 movzbl %al,%eax
8569 c0 01 01 01 01 imul $0x1010101,%eax,%eax
86f3 ab rep stos %eax,%es:(%edi)
8783 e2 03 and $0x3,%edx
8874 04 je 28 <__GI_memset+0x28>
89aa stos %al,%es:(%edi)
904a dec %edx
9175 fc jne 24 <__GI_memset+0x24>
928b 44 24 08 mov 0x8(%esp),%eax
935f pop %edi
94c3 ret
95*/