| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Byte-wise substring search, using the Two-Way algorithm. | 
|  | 2 | Copyright (C) 2008-2016 Free Software Foundation, Inc. | 
|  | 3 | This file is part of the GNU C Library. | 
|  | 4 | Written by Eric Blake <ebb9@byu.net>, 2008. | 
|  | 5 |  | 
|  | 6 | The GNU C Library is free software; you can redistribute it and/or | 
|  | 7 | modify it under the terms of the GNU Lesser General Public | 
|  | 8 | License as published by the Free Software Foundation; either | 
|  | 9 | version 2.1 of the License, or (at your option) any later version. | 
|  | 10 |  | 
|  | 11 | The GNU C Library is distributed in the hope that it will be useful, | 
|  | 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 14 | Lesser General Public License for more details. | 
|  | 15 |  | 
|  | 16 | You should have received a copy of the GNU Lesser General Public | 
|  | 17 | License along with the GNU C Library; if not, see | 
|  | 18 | <http://www.gnu.org/licenses/>.  */ | 
|  | 19 |  | 
|  | 20 | /* Before including this file, you need to include <string.h> (and | 
|  | 21 | <config.h> before that, if not part of libc), and define: | 
|  | 22 | RETURN_TYPE             A macro that expands to the return type. | 
|  | 23 | AVAILABLE(h, h_l, j, n_l) | 
|  | 24 | A macro that returns nonzero if there are | 
|  | 25 | at least N_L bytes left starting at H[J]. | 
|  | 26 | H is 'unsigned char *', H_L, J, and N_L | 
|  | 27 | are 'size_t'; H_L is an lvalue.  For | 
|  | 28 | NUL-terminated searches, H_L can be | 
|  | 29 | modified each iteration to avoid having | 
|  | 30 | to compute the end of H up front. | 
|  | 31 |  | 
|  | 32 | For case-insensitivity, you may optionally define: | 
|  | 33 | CMP_FUNC(p1, p2, l)     A macro that returns 0 iff the first L | 
|  | 34 | characters of P1 and P2 are equal. | 
|  | 35 | CANON_ELEMENT(c)        A macro that canonicalizes an element right after | 
|  | 36 | it has been fetched from one of the two strings. | 
|  | 37 | The argument is an 'unsigned char'; the result | 
|  | 38 | must be an 'unsigned char' as well. | 
|  | 39 |  | 
|  | 40 | Other macros you may optionally define: | 
|  | 41 | RET0_IF_0(a)            Documented below at default definition. | 
|  | 42 | CHECK_EOL               Same. | 
|  | 43 |  | 
|  | 44 | This file undefines the macros listed above, and defines | 
|  | 45 | LONG_NEEDLE_THRESHOLD. | 
|  | 46 | */ | 
|  | 47 |  | 
|  | 48 | #include <limits.h> | 
|  | 49 | #include <stdint.h> | 
|  | 50 | #include <sys/param.h>                  /* Defines MAX.  */ | 
|  | 51 |  | 
|  | 52 | /* We use the Two-Way string matching algorithm, which guarantees | 
|  | 53 | linear complexity with constant space.  Additionally, for long | 
|  | 54 | needles, we also use a bad character shift table similar to the | 
|  | 55 | Boyer-Moore algorithm to achieve improved (potentially sub-linear) | 
|  | 56 | performance. | 
|  | 57 |  | 
|  | 58 | See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 | 
|  | 59 | and http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm | 
|  | 60 | */ | 
|  | 61 |  | 
|  | 62 | /* Point at which computing a bad-byte shift table is likely to be | 
|  | 63 | worthwhile.  Small needles should not compute a table, since it | 
|  | 64 | adds (1 << CHAR_BIT) + NEEDLE_LEN computations of preparation for a | 
|  | 65 | speedup no greater than a factor of NEEDLE_LEN.  The larger the | 
|  | 66 | needle, the better the potential performance gain.  On the other | 
|  | 67 | hand, on non-POSIX systems with CHAR_BIT larger than eight, the | 
|  | 68 | memory required for the table is prohibitive.  */ | 
|  | 69 | #if CHAR_BIT < 10 | 
|  | 70 | # define LONG_NEEDLE_THRESHOLD 32U | 
|  | 71 | #else | 
|  | 72 | # define LONG_NEEDLE_THRESHOLD SIZE_MAX | 
|  | 73 | #endif | 
|  | 74 |  | 
|  | 75 | #ifndef CANON_ELEMENT | 
|  | 76 | # define CANON_ELEMENT(c) c | 
|  | 77 | #endif | 
|  | 78 | #ifndef CMP_FUNC | 
|  | 79 | # define CMP_FUNC memcmp | 
|  | 80 | #endif | 
|  | 81 |  | 
|  | 82 | /* Check for end-of-line in strstr and strcasestr routines. | 
|  | 83 | We piggy-back matching procedure for detecting EOL where possible, | 
|  | 84 | and use AVAILABLE macro otherwise.  */ | 
|  | 85 | #ifndef CHECK_EOL | 
|  | 86 | # define CHECK_EOL (0) | 
|  | 87 | #endif | 
|  | 88 |  | 
|  | 89 | /* Return NULL if argument is '\0'.  */ | 
|  | 90 | #ifndef RET0_IF_0 | 
|  | 91 | # define RET0_IF_0(a) /* nothing */ | 
|  | 92 | #endif | 
|  | 93 |  | 
|  | 94 | /* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN. | 
|  | 95 | Return the index of the first byte in the right half, and set | 
|  | 96 | *PERIOD to the global period of the right half. | 
|  | 97 |  | 
|  | 98 | The global period of a string is the smallest index (possibly its | 
|  | 99 | length) at which all remaining bytes in the string are repetitions | 
|  | 100 | of the prefix (the last repetition may be a subset of the prefix). | 
|  | 101 |  | 
|  | 102 | When NEEDLE is factored into two halves, a local period is the | 
|  | 103 | length of the smallest word that shares a suffix with the left half | 
|  | 104 | and shares a prefix with the right half.  All factorizations of a | 
|  | 105 | non-empty NEEDLE have a local period of at least 1 and no greater | 
|  | 106 | than NEEDLE_LEN. | 
|  | 107 |  | 
|  | 108 | A critical factorization has the property that the local period | 
|  | 109 | equals the global period.  All strings have at least one critical | 
|  | 110 | factorization with the left half smaller than the global period. | 
|  | 111 |  | 
|  | 112 | Given an ordered alphabet, a critical factorization can be computed | 
|  | 113 | in linear time, with 2 * NEEDLE_LEN comparisons, by computing the | 
|  | 114 | larger of two ordered maximal suffixes.  The ordered maximal | 
|  | 115 | suffixes are determined by lexicographic comparison of | 
|  | 116 | periodicity.  */ | 
|  | 117 | static size_t | 
|  | 118 | critical_factorization (const unsigned char *needle, size_t needle_len, | 
|  | 119 | size_t *period) | 
|  | 120 | { | 
|  | 121 | /* Index of last byte of left half, or SIZE_MAX.  */ | 
|  | 122 | size_t max_suffix, max_suffix_rev; | 
|  | 123 | size_t j; /* Index into NEEDLE for current candidate suffix.  */ | 
|  | 124 | size_t k; /* Offset into current period.  */ | 
|  | 125 | size_t p; /* Intermediate period.  */ | 
|  | 126 | unsigned char a, b; /* Current comparison bytes.  */ | 
|  | 127 |  | 
|  | 128 | /* Invariants: | 
|  | 129 | 0 <= j < NEEDLE_LEN - 1 | 
|  | 130 | -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed) | 
|  | 131 | min(max_suffix, max_suffix_rev) < global period of NEEDLE | 
|  | 132 | 1 <= p <= global period of NEEDLE | 
|  | 133 | p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j] | 
|  | 134 | 1 <= k <= p | 
|  | 135 | */ | 
|  | 136 |  | 
|  | 137 | /* Perform lexicographic search.  */ | 
|  | 138 | max_suffix = SIZE_MAX; | 
|  | 139 | j = 0; | 
|  | 140 | k = p = 1; | 
|  | 141 | while (j + k < needle_len) | 
|  | 142 | { | 
|  | 143 | a = CANON_ELEMENT (needle[j + k]); | 
|  | 144 | b = CANON_ELEMENT (needle[max_suffix + k]); | 
|  | 145 | if (a < b) | 
|  | 146 | { | 
|  | 147 | /* Suffix is smaller, period is entire prefix so far.  */ | 
|  | 148 | j += k; | 
|  | 149 | k = 1; | 
|  | 150 | p = j - max_suffix; | 
|  | 151 | } | 
|  | 152 | else if (a == b) | 
|  | 153 | { | 
|  | 154 | /* Advance through repetition of the current period.  */ | 
|  | 155 | if (k != p) | 
|  | 156 | ++k; | 
|  | 157 | else | 
|  | 158 | { | 
|  | 159 | j += p; | 
|  | 160 | k = 1; | 
|  | 161 | } | 
|  | 162 | } | 
|  | 163 | else /* b < a */ | 
|  | 164 | { | 
|  | 165 | /* Suffix is larger, start over from current location.  */ | 
|  | 166 | max_suffix = j++; | 
|  | 167 | k = p = 1; | 
|  | 168 | } | 
|  | 169 | } | 
|  | 170 | *period = p; | 
|  | 171 |  | 
|  | 172 | /* Perform reverse lexicographic search.  */ | 
|  | 173 | max_suffix_rev = SIZE_MAX; | 
|  | 174 | j = 0; | 
|  | 175 | k = p = 1; | 
|  | 176 | while (j + k < needle_len) | 
|  | 177 | { | 
|  | 178 | a = CANON_ELEMENT (needle[j + k]); | 
|  | 179 | b = CANON_ELEMENT (needle[max_suffix_rev + k]); | 
|  | 180 | if (b < a) | 
|  | 181 | { | 
|  | 182 | /* Suffix is smaller, period is entire prefix so far.  */ | 
|  | 183 | j += k; | 
|  | 184 | k = 1; | 
|  | 185 | p = j - max_suffix_rev; | 
|  | 186 | } | 
|  | 187 | else if (a == b) | 
|  | 188 | { | 
|  | 189 | /* Advance through repetition of the current period.  */ | 
|  | 190 | if (k != p) | 
|  | 191 | ++k; | 
|  | 192 | else | 
|  | 193 | { | 
|  | 194 | j += p; | 
|  | 195 | k = 1; | 
|  | 196 | } | 
|  | 197 | } | 
|  | 198 | else /* a < b */ | 
|  | 199 | { | 
|  | 200 | /* Suffix is larger, start over from current location.  */ | 
|  | 201 | max_suffix_rev = j++; | 
|  | 202 | k = p = 1; | 
|  | 203 | } | 
|  | 204 | } | 
|  | 205 |  | 
|  | 206 | /* Choose the longer suffix.  Return the first byte of the right | 
|  | 207 | half, rather than the last byte of the left half.  */ | 
|  | 208 | if (max_suffix_rev + 1 < max_suffix + 1) | 
|  | 209 | return max_suffix + 1; | 
|  | 210 | *period = p; | 
|  | 211 | return max_suffix_rev + 1; | 
|  | 212 | } | 
|  | 213 |  | 
|  | 214 | /* Return the first location of non-empty NEEDLE within HAYSTACK, or | 
|  | 215 | NULL.  HAYSTACK_LEN is the minimum known length of HAYSTACK.  This | 
|  | 216 | method is optimized for NEEDLE_LEN < LONG_NEEDLE_THRESHOLD. | 
|  | 217 | Performance is guaranteed to be linear, with an initialization cost | 
|  | 218 | of 2 * NEEDLE_LEN comparisons. | 
|  | 219 |  | 
|  | 220 | If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at | 
|  | 221 | most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. | 
|  | 222 | If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * | 
|  | 223 | HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching.  */ | 
|  | 224 | static RETURN_TYPE | 
|  | 225 | two_way_short_needle (const unsigned char *haystack, size_t haystack_len, | 
|  | 226 | const unsigned char *needle, size_t needle_len) | 
|  | 227 | { | 
|  | 228 | size_t i; /* Index into current byte of NEEDLE.  */ | 
|  | 229 | size_t j; /* Index into current window of HAYSTACK.  */ | 
|  | 230 | size_t period; /* The period of the right half of needle.  */ | 
|  | 231 | size_t suffix; /* The index of the right half of needle.  */ | 
|  | 232 |  | 
|  | 233 | /* Factor the needle into two halves, such that the left half is | 
|  | 234 | smaller than the global period, and the right half is | 
|  | 235 | periodic (with a period as large as NEEDLE_LEN - suffix).  */ | 
|  | 236 | suffix = critical_factorization (needle, needle_len, &period); | 
|  | 237 |  | 
|  | 238 | /* Perform the search.  Each iteration compares the right half | 
|  | 239 | first.  */ | 
|  | 240 | if (CMP_FUNC (needle, needle + period, suffix) == 0) | 
|  | 241 | { | 
|  | 242 | /* Entire needle is periodic; a mismatch can only advance by the | 
|  | 243 | period, so use memory to avoid rescanning known occurrences | 
|  | 244 | of the period.  */ | 
|  | 245 | size_t memory = 0; | 
|  | 246 | j = 0; | 
|  | 247 | while (AVAILABLE (haystack, haystack_len, j, needle_len)) | 
|  | 248 | { | 
|  | 249 | const unsigned char *pneedle; | 
|  | 250 | const unsigned char *phaystack; | 
|  | 251 |  | 
|  | 252 | /* Scan for matches in right half.  */ | 
|  | 253 | i = MAX (suffix, memory); | 
|  | 254 | pneedle = &needle[i]; | 
|  | 255 | phaystack = &haystack[i + j]; | 
|  | 256 | while (i < needle_len && (CANON_ELEMENT (*pneedle++) | 
|  | 257 | == CANON_ELEMENT (*phaystack++))) | 
|  | 258 | ++i; | 
|  | 259 | if (needle_len <= i) | 
|  | 260 | { | 
|  | 261 | /* Scan for matches in left half.  */ | 
|  | 262 | i = suffix - 1; | 
|  | 263 | pneedle = &needle[i]; | 
|  | 264 | phaystack = &haystack[i + j]; | 
|  | 265 | while (memory < i + 1 && (CANON_ELEMENT (*pneedle--) | 
|  | 266 | == CANON_ELEMENT (*phaystack--))) | 
|  | 267 | --i; | 
|  | 268 | if (i + 1 < memory + 1) | 
|  | 269 | return (RETURN_TYPE) (haystack + j); | 
|  | 270 | /* No match, so remember how many repetitions of period | 
|  | 271 | on the right half were scanned.  */ | 
|  | 272 | j += period; | 
|  | 273 | memory = needle_len - period; | 
|  | 274 | } | 
|  | 275 | else | 
|  | 276 | { | 
|  | 277 | j += i - suffix + 1; | 
|  | 278 | memory = 0; | 
|  | 279 | } | 
|  | 280 | } | 
|  | 281 | } | 
|  | 282 | else | 
|  | 283 | { | 
|  | 284 | const unsigned char *phaystack = &haystack[suffix]; | 
|  | 285 | /* The comparison always starts from needle[suffix], so cache it | 
|  | 286 | and use an optimized first-character loop.  */ | 
|  | 287 | unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]); | 
|  | 288 |  | 
|  | 289 | #if CHECK_EOL | 
|  | 290 | /* We start matching from the SUFFIX'th element, so make sure we | 
|  | 291 | don't hit '\0' before that.  */ | 
|  | 292 | if (haystack_len < suffix + 1 | 
|  | 293 | && !AVAILABLE (haystack, haystack_len, 0, suffix + 1)) | 
|  | 294 | return NULL; | 
|  | 295 | #endif | 
|  | 296 |  | 
|  | 297 | /* The two halves of needle are distinct; no extra memory is | 
|  | 298 | required, and any mismatch results in a maximal shift.  */ | 
|  | 299 | period = MAX (suffix, needle_len - suffix) + 1; | 
|  | 300 | j = 0; | 
|  | 301 | while (1 | 
|  | 302 | #if !CHECK_EOL | 
|  | 303 | && AVAILABLE (haystack, haystack_len, j, needle_len) | 
|  | 304 | #endif | 
|  | 305 | ) | 
|  | 306 | { | 
|  | 307 | unsigned char haystack_char; | 
|  | 308 | const unsigned char *pneedle; | 
|  | 309 |  | 
|  | 310 | /* TODO: The first-character loop can be sped up by adapting | 
|  | 311 | longword-at-a-time implementation of memchr/strchr.  */ | 
|  | 312 | if (needle_suffix | 
|  | 313 | != (haystack_char = CANON_ELEMENT (*phaystack++))) | 
|  | 314 | { | 
|  | 315 | RET0_IF_0 (haystack_char); | 
|  | 316 | #if !CHECK_EOL | 
|  | 317 | ++j; | 
|  | 318 | #endif | 
|  | 319 | continue; | 
|  | 320 | } | 
|  | 321 |  | 
|  | 322 | #if CHECK_EOL | 
|  | 323 | /* Calculate J if it wasn't kept up-to-date in the first-character | 
|  | 324 | loop.  */ | 
|  | 325 | j = phaystack - &haystack[suffix] - 1; | 
|  | 326 | #endif | 
|  | 327 |  | 
|  | 328 | /* Scan for matches in right half.  */ | 
|  | 329 | i = suffix + 1; | 
|  | 330 | pneedle = &needle[i]; | 
|  | 331 | while (i < needle_len) | 
|  | 332 | { | 
|  | 333 | if (CANON_ELEMENT (*pneedle++) | 
|  | 334 | != (haystack_char = CANON_ELEMENT (*phaystack++))) | 
|  | 335 | { | 
|  | 336 | RET0_IF_0 (haystack_char); | 
|  | 337 | break; | 
|  | 338 | } | 
|  | 339 | ++i; | 
|  | 340 | } | 
|  | 341 | if (needle_len <= i) | 
|  | 342 | { | 
|  | 343 | /* Scan for matches in left half.  */ | 
|  | 344 | i = suffix - 1; | 
|  | 345 | pneedle = &needle[i]; | 
|  | 346 | phaystack = &haystack[i + j]; | 
|  | 347 | while (i != SIZE_MAX) | 
|  | 348 | { | 
|  | 349 | if (CANON_ELEMENT (*pneedle--) | 
|  | 350 | != (haystack_char = CANON_ELEMENT (*phaystack--))) | 
|  | 351 | { | 
|  | 352 | RET0_IF_0 (haystack_char); | 
|  | 353 | break; | 
|  | 354 | } | 
|  | 355 | --i; | 
|  | 356 | } | 
|  | 357 | if (i == SIZE_MAX) | 
|  | 358 | return (RETURN_TYPE) (haystack + j); | 
|  | 359 | j += period; | 
|  | 360 | } | 
|  | 361 | else | 
|  | 362 | j += i - suffix + 1; | 
|  | 363 |  | 
|  | 364 | #if CHECK_EOL | 
|  | 365 | if (!AVAILABLE (haystack, haystack_len, j, needle_len)) | 
|  | 366 | break; | 
|  | 367 | #endif | 
|  | 368 |  | 
|  | 369 | phaystack = &haystack[suffix + j]; | 
|  | 370 | } | 
|  | 371 | } | 
|  | 372 | ret0: __attribute__ ((unused)) | 
|  | 373 | return NULL; | 
|  | 374 | } | 
|  | 375 |  | 
|  | 376 | /* Return the first location of non-empty NEEDLE within HAYSTACK, or | 
|  | 377 | NULL.  HAYSTACK_LEN is the minimum known length of HAYSTACK.  This | 
|  | 378 | method is optimized for LONG_NEEDLE_THRESHOLD <= NEEDLE_LEN. | 
|  | 379 | Performance is guaranteed to be linear, with an initialization cost | 
|  | 380 | of 3 * NEEDLE_LEN + (1 << CHAR_BIT) operations. | 
|  | 381 |  | 
|  | 382 | If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at | 
|  | 383 | most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, | 
|  | 384 | and sublinear performance O(HAYSTACK_LEN / NEEDLE_LEN) is possible. | 
|  | 385 | If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * | 
|  | 386 | HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, and | 
|  | 387 | sublinear performance is not possible.  */ | 
|  | 388 | static RETURN_TYPE | 
|  | 389 | two_way_long_needle (const unsigned char *haystack, size_t haystack_len, | 
|  | 390 | const unsigned char *needle, size_t needle_len) | 
|  | 391 | { | 
|  | 392 | size_t i; /* Index into current byte of NEEDLE.  */ | 
|  | 393 | size_t j; /* Index into current window of HAYSTACK.  */ | 
|  | 394 | size_t period; /* The period of the right half of needle.  */ | 
|  | 395 | size_t suffix; /* The index of the right half of needle.  */ | 
|  | 396 | size_t shift_table[1U << CHAR_BIT]; /* See below.  */ | 
|  | 397 |  | 
|  | 398 | /* Factor the needle into two halves, such that the left half is | 
|  | 399 | smaller than the global period, and the right half is | 
|  | 400 | periodic (with a period as large as NEEDLE_LEN - suffix).  */ | 
|  | 401 | suffix = critical_factorization (needle, needle_len, &period); | 
|  | 402 |  | 
|  | 403 | /* Populate shift_table.  For each possible byte value c, | 
|  | 404 | shift_table[c] is the distance from the last occurrence of c to | 
|  | 405 | the end of NEEDLE, or NEEDLE_LEN if c is absent from the NEEDLE. | 
|  | 406 | shift_table[NEEDLE[NEEDLE_LEN - 1]] contains the only 0.  */ | 
|  | 407 | for (i = 0; i < 1U << CHAR_BIT; i++) | 
|  | 408 | shift_table[i] = needle_len; | 
|  | 409 | for (i = 0; i < needle_len; i++) | 
|  | 410 | shift_table[CANON_ELEMENT (needle[i])] = needle_len - i - 1; | 
|  | 411 |  | 
|  | 412 | /* Perform the search.  Each iteration compares the right half | 
|  | 413 | first.  */ | 
|  | 414 | if (CMP_FUNC (needle, needle + period, suffix) == 0) | 
|  | 415 | { | 
|  | 416 | /* Entire needle is periodic; a mismatch can only advance by the | 
|  | 417 | period, so use memory to avoid rescanning known occurrences | 
|  | 418 | of the period.  */ | 
|  | 419 | size_t memory = 0; | 
|  | 420 | size_t shift; | 
|  | 421 | j = 0; | 
|  | 422 | while (AVAILABLE (haystack, haystack_len, j, needle_len)) | 
|  | 423 | { | 
|  | 424 | const unsigned char *pneedle; | 
|  | 425 | const unsigned char *phaystack; | 
|  | 426 |  | 
|  | 427 | /* Check the last byte first; if it does not match, then | 
|  | 428 | shift to the next possible match location.  */ | 
|  | 429 | shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])]; | 
|  | 430 | if (0 < shift) | 
|  | 431 | { | 
|  | 432 | if (memory && shift < period) | 
|  | 433 | { | 
|  | 434 | /* Since needle is periodic, but the last period has | 
|  | 435 | a byte out of place, there can be no match until | 
|  | 436 | after the mismatch.  */ | 
|  | 437 | shift = needle_len - period; | 
|  | 438 | } | 
|  | 439 | memory = 0; | 
|  | 440 | j += shift; | 
|  | 441 | continue; | 
|  | 442 | } | 
|  | 443 | /* Scan for matches in right half.  The last byte has | 
|  | 444 | already been matched, by virtue of the shift table.  */ | 
|  | 445 | i = MAX (suffix, memory); | 
|  | 446 | pneedle = &needle[i]; | 
|  | 447 | phaystack = &haystack[i + j]; | 
|  | 448 | while (i < needle_len - 1 && (CANON_ELEMENT (*pneedle++) | 
|  | 449 | == CANON_ELEMENT (*phaystack++))) | 
|  | 450 | ++i; | 
|  | 451 | if (needle_len - 1 <= i) | 
|  | 452 | { | 
|  | 453 | /* Scan for matches in left half.  */ | 
|  | 454 | i = suffix - 1; | 
|  | 455 | pneedle = &needle[i]; | 
|  | 456 | phaystack = &haystack[i + j]; | 
|  | 457 | while (memory < i + 1 && (CANON_ELEMENT (*pneedle--) | 
|  | 458 | == CANON_ELEMENT (*phaystack--))) | 
|  | 459 | --i; | 
|  | 460 | if (i + 1 < memory + 1) | 
|  | 461 | return (RETURN_TYPE) (haystack + j); | 
|  | 462 | /* No match, so remember how many repetitions of period | 
|  | 463 | on the right half were scanned.  */ | 
|  | 464 | j += period; | 
|  | 465 | memory = needle_len - period; | 
|  | 466 | } | 
|  | 467 | else | 
|  | 468 | { | 
|  | 469 | j += i - suffix + 1; | 
|  | 470 | memory = 0; | 
|  | 471 | } | 
|  | 472 | } | 
|  | 473 | } | 
|  | 474 | else | 
|  | 475 | { | 
|  | 476 | /* The two halves of needle are distinct; no extra memory is | 
|  | 477 | required, and any mismatch results in a maximal shift.  */ | 
|  | 478 | size_t shift; | 
|  | 479 | period = MAX (suffix, needle_len - suffix) + 1; | 
|  | 480 | j = 0; | 
|  | 481 | while (AVAILABLE (haystack, haystack_len, j, needle_len)) | 
|  | 482 | { | 
|  | 483 | const unsigned char *pneedle; | 
|  | 484 | const unsigned char *phaystack; | 
|  | 485 |  | 
|  | 486 | /* Check the last byte first; if it does not match, then | 
|  | 487 | shift to the next possible match location.  */ | 
|  | 488 | shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])]; | 
|  | 489 | if (0 < shift) | 
|  | 490 | { | 
|  | 491 | j += shift; | 
|  | 492 | continue; | 
|  | 493 | } | 
|  | 494 | /* Scan for matches in right half.  The last byte has | 
|  | 495 | already been matched, by virtue of the shift table.  */ | 
|  | 496 | i = suffix; | 
|  | 497 | pneedle = &needle[i]; | 
|  | 498 | phaystack = &haystack[i + j]; | 
|  | 499 | while (i < needle_len - 1 && (CANON_ELEMENT (*pneedle++) | 
|  | 500 | == CANON_ELEMENT (*phaystack++))) | 
|  | 501 | ++i; | 
|  | 502 | if (needle_len - 1 <= i) | 
|  | 503 | { | 
|  | 504 | /* Scan for matches in left half.  */ | 
|  | 505 | i = suffix - 1; | 
|  | 506 | pneedle = &needle[i]; | 
|  | 507 | phaystack = &haystack[i + j]; | 
|  | 508 | while (i != SIZE_MAX && (CANON_ELEMENT (*pneedle--) | 
|  | 509 | == CANON_ELEMENT (*phaystack--))) | 
|  | 510 | --i; | 
|  | 511 | if (i == SIZE_MAX) | 
|  | 512 | return (RETURN_TYPE) (haystack + j); | 
|  | 513 | j += period; | 
|  | 514 | } | 
|  | 515 | else | 
|  | 516 | j += i - suffix + 1; | 
|  | 517 | } | 
|  | 518 | } | 
|  | 519 | return NULL; | 
|  | 520 | } | 
|  | 521 |  | 
|  | 522 | #undef AVAILABLE | 
|  | 523 | #undef CANON_ELEMENT | 
|  | 524 | #undef CMP_FUNC | 
|  | 525 | #undef RET0_IF_0 | 
|  | 526 | #undef RETURN_TYPE | 
|  | 527 | #undef CHECK_EOL |