| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Copyright (C) 1991-2016 Free Software Foundation, Inc. | 
 | 2 |    This file is part of the GNU C Library. | 
 | 3 |    Based on strlen implementation by Torbjorn Granlund (tege@sics.se), | 
 | 4 |    with help from Dan Sahlin (dan@sics.se) and | 
 | 5 |    commentary by Jim Blandy (jimb@ai.mit.edu); | 
 | 6 |    adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), | 
 | 7 |    and implemented by Roland McGrath (roland@ai.mit.edu). | 
 | 8 |  | 
 | 9 |    The GNU C Library is free software; you can redistribute it and/or | 
 | 10 |    modify it under the terms of the GNU Lesser General Public | 
 | 11 |    License as published by the Free Software Foundation; either | 
 | 12 |    version 2.1 of the License, or (at your option) any later version. | 
 | 13 |  | 
 | 14 |    The GNU C Library is distributed in the hope that it will be useful, | 
 | 15 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
 | 16 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
 | 17 |    Lesser General Public License for more details. | 
 | 18 |  | 
 | 19 |    You should have received a copy of the GNU Lesser General Public | 
 | 20 |    License along with the GNU C Library; if not, see | 
 | 21 |    <http://www.gnu.org/licenses/>.  */ | 
 | 22 |  | 
 | 23 | #ifdef HAVE_CONFIG_H | 
 | 24 | #include <config.h> | 
 | 25 | #endif | 
 | 26 |  | 
 | 27 | #undef __ptr_t | 
 | 28 | #define __ptr_t void * | 
 | 29 |  | 
 | 30 | #if defined (_LIBC) | 
 | 31 | # include <string.h> | 
 | 32 | # include <memcopy.h> | 
 | 33 | # include <stdlib.h> | 
 | 34 | #endif | 
 | 35 |  | 
 | 36 | #if defined (HAVE_LIMITS_H) || defined (_LIBC) | 
 | 37 | # include <limits.h> | 
 | 38 | #endif | 
 | 39 |  | 
 | 40 | #define LONG_MAX_32_BITS 2147483647 | 
 | 41 |  | 
 | 42 | #ifndef LONG_MAX | 
 | 43 | #define LONG_MAX LONG_MAX_32_BITS | 
 | 44 | #endif | 
 | 45 |  | 
 | 46 | #include <sys/types.h> | 
 | 47 |  | 
 | 48 | #undef memchr | 
 | 49 |  | 
 | 50 | #ifndef RAWMEMCHR | 
 | 51 | # define RAWMEMCHR __rawmemchr | 
 | 52 | #endif | 
 | 53 |  | 
 | 54 | /* Find the first occurrence of C in S.  */ | 
 | 55 | __ptr_t | 
 | 56 | RAWMEMCHR (const __ptr_t s, int c_in) | 
 | 57 | { | 
 | 58 |   const unsigned char *char_ptr; | 
 | 59 |   const unsigned long int *longword_ptr; | 
 | 60 |   unsigned long int longword, magic_bits, charmask; | 
 | 61 |   unsigned char c; | 
 | 62 |  | 
 | 63 |   c = (unsigned char) c_in; | 
 | 64 |  | 
 | 65 |   /* Handle the first few characters by reading one character at a time. | 
 | 66 |      Do this until CHAR_PTR is aligned on a longword boundary.  */ | 
 | 67 |   for (char_ptr = (const unsigned char *) s; | 
 | 68 |        ((unsigned long int) char_ptr & (sizeof (longword) - 1)) != 0; | 
 | 69 |        ++char_ptr) | 
 | 70 |     if (*char_ptr == c) | 
 | 71 |       return (__ptr_t) char_ptr; | 
 | 72 |  | 
 | 73 |   /* All these elucidatory comments refer to 4-byte longwords, | 
 | 74 |      but the theory applies equally well to 8-byte longwords.  */ | 
 | 75 |  | 
 | 76 |   longword_ptr = (unsigned long int *) char_ptr; | 
 | 77 |  | 
 | 78 |   /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits | 
 | 79 |      the "holes."  Note that there is a hole just to the left of | 
 | 80 |      each byte, with an extra at the end: | 
 | 81 |  | 
 | 82 |      bits:  01111110 11111110 11111110 11111111 | 
 | 83 |      bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD | 
 | 84 |  | 
 | 85 |      The 1-bits make sure that carries propagate to the next 0-bit. | 
 | 86 |      The 0-bits provide holes for carries to fall into.  */ | 
 | 87 |   magic_bits = -1; | 
 | 88 |   magic_bits = magic_bits / 0xff * 0xfe << 1 >> 1 | 1; | 
 | 89 |  | 
 | 90 |   /* Set up a longword, each of whose bytes is C.  */ | 
 | 91 |   charmask = c | (c << 8); | 
 | 92 |   charmask |= charmask << 16; | 
 | 93 | #if LONG_MAX > LONG_MAX_32_BITS | 
 | 94 |   charmask |= charmask << 32; | 
 | 95 | #endif | 
 | 96 |  | 
 | 97 |   /* Instead of the traditional loop which tests each character, | 
 | 98 |      we will test a longword at a time.  The tricky part is testing | 
 | 99 |      if *any of the four* bytes in the longword in question are zero.  */ | 
 | 100 |   while (1) | 
 | 101 |     { | 
 | 102 |       /* We tentatively exit the loop if adding MAGIC_BITS to | 
 | 103 | 	 LONGWORD fails to change any of the hole bits of LONGWORD. | 
 | 104 |  | 
 | 105 | 	 1) Is this safe?  Will it catch all the zero bytes? | 
 | 106 | 	 Suppose there is a byte with all zeros.  Any carry bits | 
 | 107 | 	 propagating from its left will fall into the hole at its | 
 | 108 | 	 least significant bit and stop.  Since there will be no | 
 | 109 | 	 carry from its most significant bit, the LSB of the | 
 | 110 | 	 byte to the left will be unchanged, and the zero will be | 
 | 111 | 	 detected. | 
 | 112 |  | 
 | 113 | 	 2) Is this worthwhile?  Will it ignore everything except | 
 | 114 | 	 zero bytes?  Suppose every byte of LONGWORD has a bit set | 
 | 115 | 	 somewhere.  There will be a carry into bit 8.  If bit 8 | 
 | 116 | 	 is set, this will carry into bit 16.  If bit 8 is clear, | 
 | 117 | 	 one of bits 9-15 must be set, so there will be a carry | 
 | 118 | 	 into bit 16.  Similarly, there will be a carry into bit | 
 | 119 | 	 24.  If one of bits 24-30 is set, there will be a carry | 
 | 120 | 	 into bit 31, so all of the hole bits will be changed. | 
 | 121 |  | 
 | 122 | 	 The one misfire occurs when bits 24-30 are clear and bit | 
 | 123 | 	 31 is set; in this case, the hole at bit 31 is not | 
 | 124 | 	 changed.  If we had access to the processor carry flag, | 
 | 125 | 	 we could close this loophole by putting the fourth hole | 
 | 126 | 	 at bit 32! | 
 | 127 |  | 
 | 128 | 	 So it ignores everything except 128's, when they're aligned | 
 | 129 | 	 properly. | 
 | 130 |  | 
 | 131 | 	 3) But wait!  Aren't we looking for C, not zero? | 
 | 132 | 	 Good point.  So what we do is XOR LONGWORD with a longword, | 
 | 133 | 	 each of whose bytes is C.  This turns each byte that is C | 
 | 134 | 	 into a zero.  */ | 
 | 135 |  | 
 | 136 |       longword = *longword_ptr++ ^ charmask; | 
 | 137 |  | 
 | 138 |       /* Add MAGIC_BITS to LONGWORD.  */ | 
 | 139 |       if ((((longword + magic_bits) | 
 | 140 |  | 
 | 141 | 	    /* Set those bits that were unchanged by the addition.  */ | 
 | 142 | 	    ^ ~longword) | 
 | 143 |  | 
 | 144 | 	   /* Look at only the hole bits.  If any of the hole bits | 
 | 145 | 	      are unchanged, most likely one of the bytes was a | 
 | 146 | 	      zero.  */ | 
 | 147 | 	   & ~magic_bits) != 0) | 
 | 148 | 	{ | 
 | 149 | 	  /* Which of the bytes was C?  If none of them were, it was | 
 | 150 | 	     a misfire; continue the search.  */ | 
 | 151 |  | 
 | 152 | 	  const unsigned char *cp = (const unsigned char *) (longword_ptr - 1); | 
 | 153 |  | 
 | 154 | 	  if (cp[0] == c) | 
 | 155 | 	    return (__ptr_t) cp; | 
 | 156 | 	  if (cp[1] == c) | 
 | 157 | 	    return (__ptr_t) &cp[1]; | 
 | 158 | 	  if (cp[2] == c) | 
 | 159 | 	    return (__ptr_t) &cp[2]; | 
 | 160 | 	  if (cp[3] == c) | 
 | 161 | 	    return (__ptr_t) &cp[3]; | 
 | 162 | #if LONG_MAX > 2147483647 | 
 | 163 | 	  if (cp[4] == c) | 
 | 164 | 	    return (__ptr_t) &cp[4]; | 
 | 165 | 	  if (cp[5] == c) | 
 | 166 | 	    return (__ptr_t) &cp[5]; | 
 | 167 | 	  if (cp[6] == c) | 
 | 168 | 	    return (__ptr_t) &cp[6]; | 
 | 169 | 	  if (cp[7] == c) | 
 | 170 | 	    return (__ptr_t) &cp[7]; | 
 | 171 | #endif | 
 | 172 | 	} | 
 | 173 |     } | 
 | 174 | } | 
 | 175 | libc_hidden_def (__rawmemchr) | 
 | 176 | weak_alias (__rawmemchr, rawmemchr) |