lh | 9ed821d | 2023-04-07 01:36:19 -0700 | [diff] [blame] | 1 | ; |
| 2 | ; |
| 3 | ; this file contains a script of tests to run through regress.exe |
| 4 | ; |
| 5 | ; comments start with a semicolon and proceed to the end of the line |
| 6 | ; |
| 7 | ; changes to regular expression compile flags start with a "-" as the first |
| 8 | ; non-whitespace character and consist of a list of the printable names |
| 9 | ; of the flags, for example "match_default" |
| 10 | ; |
| 11 | ; Other lines contain a test to perform using the current flag status |
| 12 | ; the first token contains the expression to compile, the second the string |
| 13 | ; to match it against. If the second string is "!" then the expression should |
| 14 | ; not compile, that is the first string is an invalid regular expression. |
| 15 | ; This is then followed by a list of integers that specify what should match, |
| 16 | ; each pair represents the starting and ending positions of a subexpression |
| 17 | ; starting with the zeroth subexpression (the whole match). |
| 18 | ; A value of -1 indicates that the subexpression should not take part in the |
| 19 | ; match at all, if the first value is -1 then no part of the expression should |
| 20 | ; match the string. |
| 21 | ; |
| 22 | ; Tests taken from BOOST testsuite and adapted to glibc regex. |
| 23 | ; |
| 24 | ; Boost Software License - Version 1.0 - August 17th, 2003 |
| 25 | ; |
| 26 | ; Permission is hereby granted, free of charge, to any person or organization |
| 27 | ; obtaining a copy of the software and accompanying documentation covered by |
| 28 | ; this license (the "Software") to use, reproduce, display, distribute, |
| 29 | ; execute, and transmit the Software, and to prepare derivative works of the |
| 30 | ; Software, and to permit third-parties to whom the Software is furnished to |
| 31 | ; do so, all subject to the following: |
| 32 | ; |
| 33 | ; The copyright notices in the Software and this entire statement, including |
| 34 | ; the above license grant, this restriction and the following disclaimer, |
| 35 | ; must be included in all copies of the Software, in whole or in part, and |
| 36 | ; all derivative works of the Software, unless such copies or derivative |
| 37 | ; works are solely in the form of machine-executable object code generated by |
| 38 | ; a source language processor. |
| 39 | ; |
| 40 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 41 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 42 | ; FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT |
| 43 | ; SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE |
| 44 | ; FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, |
| 45 | ; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| 46 | ; DEALINGS IN THE SOFTWARE. |
| 47 | ; |
| 48 | |
| 49 | - match_default normal REG_EXTENDED |
| 50 | |
| 51 | ; |
| 52 | ; try some really simple literals: |
| 53 | a a 0 1 |
| 54 | Z Z 0 1 |
| 55 | Z aaa -1 -1 |
| 56 | Z xxxxZZxxx 4 5 |
| 57 | |
| 58 | ; and some simple brackets: |
| 59 | (a) zzzaazz 3 4 3 4 |
| 60 | () zzz 0 0 0 0 |
| 61 | () "" 0 0 0 0 |
| 62 | ( ! |
| 63 | ) ) 0 1 |
| 64 | (aa ! |
| 65 | aa) baa)b 1 4 |
| 66 | a b -1 -1 |
| 67 | \(\) () 0 2 |
| 68 | \(a\) (a) 0 3 |
| 69 | \() () 0 2 |
| 70 | (\) ! |
| 71 | p(a)rameter ABCparameterXYZ 3 12 4 5 |
| 72 | [pq](a)rameter ABCparameterXYZ 3 12 4 5 |
| 73 | |
| 74 | ; now try escaped brackets: |
| 75 | - match_default bk_parens REG_BASIC |
| 76 | \(a\) zzzaazz 3 4 3 4 |
| 77 | \(\) zzz 0 0 0 0 |
| 78 | \(\) "" 0 0 0 0 |
| 79 | \( ! |
| 80 | \) ! |
| 81 | \(aa ! |
| 82 | aa\) ! |
| 83 | () () 0 2 |
| 84 | (a) (a) 0 3 |
| 85 | (\) ! |
| 86 | \() ! |
| 87 | |
| 88 | ; now move on to "." wildcards |
| 89 | - match_default normal REG_EXTENDED REG_STARTEND |
| 90 | . a 0 1 |
| 91 | . \n 0 1 |
| 92 | . \r 0 1 |
| 93 | . \0 0 1 |
| 94 | |
| 95 | ; |
| 96 | ; now move on to the repetion ops, |
| 97 | ; starting with operator * |
| 98 | - match_default normal REG_EXTENDED |
| 99 | a* b 0 0 |
| 100 | ab* a 0 1 |
| 101 | ab* ab 0 2 |
| 102 | ab* sssabbbbbbsss 3 10 |
| 103 | ab*c* a 0 1 |
| 104 | ab*c* abbb 0 4 |
| 105 | ab*c* accc 0 4 |
| 106 | ab*c* abbcc 0 5 |
| 107 | *a ! |
| 108 | \<* ! |
| 109 | \>* ! |
| 110 | \n* \n\n 0 2 |
| 111 | \** ** 0 2 |
| 112 | \* * 0 1 |
| 113 | |
| 114 | ; now try operator + |
| 115 | ab+ a -1 -1 |
| 116 | ab+ ab 0 2 |
| 117 | ab+ sssabbbbbbsss 3 10 |
| 118 | ab+c+ a -1 -1 |
| 119 | ab+c+ abbb -1 -1 |
| 120 | ab+c+ accc -1 -1 |
| 121 | ab+c+ abbcc 0 5 |
| 122 | +a ! |
| 123 | \<+ ! |
| 124 | \>+ ! |
| 125 | \n+ \n\n 0 2 |
| 126 | \+ + 0 1 |
| 127 | \+ ++ 0 1 |
| 128 | \++ ++ 0 2 |
| 129 | |
| 130 | ; now try operator ? |
| 131 | - match_default normal REG_EXTENDED |
| 132 | a? b 0 0 |
| 133 | ab? a 0 1 |
| 134 | ab? ab 0 2 |
| 135 | ab? sssabbbbbbsss 3 5 |
| 136 | ab?c? a 0 1 |
| 137 | ab?c? abbb 0 2 |
| 138 | ab?c? accc 0 2 |
| 139 | ab?c? abcc 0 3 |
| 140 | ?a ! |
| 141 | \<? ! |
| 142 | \>? ! |
| 143 | \n? \n\n 0 1 |
| 144 | \? ? 0 1 |
| 145 | \? ?? 0 1 |
| 146 | \?? ?? 0 1 |
| 147 | |
| 148 | ; now try operator {} |
| 149 | - match_default normal REG_EXTENDED |
| 150 | a{2} a -1 -1 |
| 151 | a{2} aa 0 2 |
| 152 | a{2} aaa 0 2 |
| 153 | a{2,} a -1 -1 |
| 154 | a{2,} aa 0 2 |
| 155 | a{2,} aaaaa 0 5 |
| 156 | a{2,4} a -1 -1 |
| 157 | a{2,4} aa 0 2 |
| 158 | a{2,4} aaa 0 3 |
| 159 | a{2,4} aaaa 0 4 |
| 160 | a{2,4} aaaaa 0 4 |
| 161 | a{} ! |
| 162 | a{2 ! |
| 163 | a} a} 0 2 |
| 164 | \{\} {} 0 2 |
| 165 | |
| 166 | - match_default normal REG_BASIC |
| 167 | a\{2\} a -1 -1 |
| 168 | a\{2\} aa 0 2 |
| 169 | a\{2\} aaa 0 2 |
| 170 | a\{2,\} a -1 -1 |
| 171 | a\{2,\} aa 0 2 |
| 172 | a\{2,\} aaaaa 0 5 |
| 173 | a\{2,4\} a -1 -1 |
| 174 | a\{2,4\} aa 0 2 |
| 175 | a\{2,4\} aaa 0 3 |
| 176 | a\{2,4\} aaaa 0 4 |
| 177 | a\{2,4\} aaaaa 0 4 |
| 178 | {} {} 0 2 |
| 179 | |
| 180 | ; now test the alternation operator | |
| 181 | - match_default normal REG_EXTENDED |
| 182 | a|b a 0 1 |
| 183 | a|b b 0 1 |
| 184 | a(b|c) ab 0 2 1 2 |
| 185 | a(b|c) ac 0 2 1 2 |
| 186 | a(b|c) ad -1 -1 -1 -1 |
| 187 | a\| a| 0 2 |
| 188 | |
| 189 | ; now test the set operator [] |
| 190 | - match_default normal REG_EXTENDED |
| 191 | ; try some literals first |
| 192 | [abc] a 0 1 |
| 193 | [abc] b 0 1 |
| 194 | [abc] c 0 1 |
| 195 | [abc] d -1 -1 |
| 196 | [^bcd] a 0 1 |
| 197 | [^bcd] b -1 -1 |
| 198 | [^bcd] d -1 -1 |
| 199 | [^bcd] e 0 1 |
| 200 | a[b]c abc 0 3 |
| 201 | a[ab]c abc 0 3 |
| 202 | a[^ab]c adc 0 3 |
| 203 | a[]b]c a]c 0 3 |
| 204 | a[[b]c a[c 0 3 |
| 205 | a[-b]c a-c 0 3 |
| 206 | a[^]b]c adc 0 3 |
| 207 | a[^-b]c adc 0 3 |
| 208 | a[b-]c a-c 0 3 |
| 209 | a[b ! |
| 210 | a[] ! |
| 211 | |
| 212 | ; then some ranges |
| 213 | [b-e] a -1 -1 |
| 214 | [b-e] b 0 1 |
| 215 | [b-e] e 0 1 |
| 216 | [b-e] f -1 -1 |
| 217 | [^b-e] a 0 1 |
| 218 | [^b-e] b -1 -1 |
| 219 | [^b-e] e -1 -1 |
| 220 | [^b-e] f 0 1 |
| 221 | a[1-3]c a2c 0 3 |
| 222 | a[3-1]c ! |
| 223 | a[1-3-5]c ! |
| 224 | a[1- ! |
| 225 | |
| 226 | ; and some classes |
| 227 | a[[:alpha:]]c abc 0 3 |
| 228 | a[[:unknown:]]c ! |
| 229 | a[[: ! |
| 230 | a[[:alpha ! |
| 231 | a[[:alpha:] ! |
| 232 | a[[:alpha,:] ! |
| 233 | a[[:]:]]b ! |
| 234 | a[[:-:]]b ! |
| 235 | a[[:alph:]] ! |
| 236 | a[[:alphabet:]] ! |
| 237 | [[:alnum:]]+ -%@a0X_- 3 6 |
| 238 | [[:alpha:]]+ -%@aX_0- 3 5 |
| 239 | [[:blank:]]+ "a \tb" 1 4 |
| 240 | [[:cntrl:]]+ a\n\tb 1 3 |
| 241 | [[:digit:]]+ a019b 1 4 |
| 242 | [[:graph:]]+ " a%b " 1 4 |
| 243 | [[:lower:]]+ AabC 1 3 |
| 244 | ; This test fails with STLPort, disable for now as this is a corner case anyway... |
| 245 | ;[[:print:]]+ "\na b\n" 1 4 |
| 246 | [[:punct:]]+ " %-&\t" 1 4 |
| 247 | [[:space:]]+ "a \n\t\rb" 1 5 |
| 248 | [[:upper:]]+ aBCd 1 3 |
| 249 | [[:xdigit:]]+ p0f3Cx 1 5 |
| 250 | |
| 251 | ; now test flag settings: |
| 252 | - escape_in_lists REG_NO_POSIX_TEST |
| 253 | [\n] \n 0 1 |
| 254 | - REG_NO_POSIX_TEST |
| 255 | |
| 256 | ; line anchors |
| 257 | - match_default normal REG_EXTENDED |
| 258 | ^ab ab 0 2 |
| 259 | ^ab xxabxx -1 -1 |
| 260 | ab$ ab 0 2 |
| 261 | ab$ abxx -1 -1 |
| 262 | - match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL |
| 263 | ^ab ab -1 -1 |
| 264 | ^ab xxabxx -1 -1 |
| 265 | ab$ ab -1 -1 |
| 266 | ab$ abxx -1 -1 |
| 267 | |
| 268 | ; back references |
| 269 | - match_default normal REG_PERL |
| 270 | a(b)\2c ! |
| 271 | a(b\1)c ! |
| 272 | a(b*)c\1d abbcbbd 0 7 1 3 |
| 273 | a(b*)c\1d abbcbd -1 -1 |
| 274 | a(b*)c\1d abbcbbbd -1 -1 |
| 275 | ^(.)\1 abc -1 -1 |
| 276 | a([bc])\1d abcdabbd 4 8 5 6 |
| 277 | ; strictly speaking this is at best ambiguous, at worst wrong, this is what most |
| 278 | ; re implimentations will match though. |
| 279 | a(([bc])\2)*d abbccd 0 6 3 5 3 4 |
| 280 | |
| 281 | a(([bc])\2)*d abbcbd -1 -1 |
| 282 | a((b)*\2)*d abbbd 0 5 1 4 2 3 |
| 283 | ; perl only: |
| 284 | (ab*)[ab]*\1 ababaaa 0 7 0 1 |
| 285 | (a)\1bcd aabcd 0 5 0 1 |
| 286 | (a)\1bc*d aabcd 0 5 0 1 |
| 287 | (a)\1bc*d aabd 0 4 0 1 |
| 288 | (a)\1bc*d aabcccd 0 7 0 1 |
| 289 | (a)\1bc*[ce]d aabcccd 0 7 0 1 |
| 290 | ^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5 |
| 291 | |
| 292 | ; posix only: |
| 293 | - match_default extended REG_EXTENDED |
| 294 | (ab*)[ab]*\1 ababaaa 0 7 0 1 |
| 295 | |
| 296 | ; |
| 297 | ; word operators: |
| 298 | \w a 0 1 |
| 299 | \w z 0 1 |
| 300 | \w A 0 1 |
| 301 | \w Z 0 1 |
| 302 | \w _ 0 1 |
| 303 | \w } -1 -1 |
| 304 | \w ` -1 -1 |
| 305 | \w [ -1 -1 |
| 306 | \w @ -1 -1 |
| 307 | ; non-word: |
| 308 | \W a -1 -1 |
| 309 | \W z -1 -1 |
| 310 | \W A -1 -1 |
| 311 | \W Z -1 -1 |
| 312 | \W _ -1 -1 |
| 313 | \W } 0 1 |
| 314 | \W ` 0 1 |
| 315 | \W [ 0 1 |
| 316 | \W @ 0 1 |
| 317 | ; word start: |
| 318 | \<abcd " abcd" 2 6 |
| 319 | \<ab cab -1 -1 |
| 320 | \<ab "\nab" 1 3 |
| 321 | \<tag ::tag 2 5 |
| 322 | ;word end: |
| 323 | abc\> abc 0 3 |
| 324 | abc\> abcd -1 -1 |
| 325 | abc\> abc\n 0 3 |
| 326 | abc\> abc:: 0 3 |
| 327 | ; word boundary: |
| 328 | \babcd " abcd" 2 6 |
| 329 | \bab cab -1 -1 |
| 330 | \bab "\nab" 1 3 |
| 331 | \btag ::tag 2 5 |
| 332 | abc\b abc 0 3 |
| 333 | abc\b abcd -1 -1 |
| 334 | abc\b abc\n 0 3 |
| 335 | abc\b abc:: 0 3 |
| 336 | ; within word: |
| 337 | \B ab 1 1 |
| 338 | a\Bb ab 0 2 |
| 339 | a\B ab 0 1 |
| 340 | a\B a -1 -1 |
| 341 | a\B "a " -1 -1 |
| 342 | |
| 343 | ; |
| 344 | ; buffer operators: |
| 345 | \`abc abc 0 3 |
| 346 | \`abc \nabc -1 -1 |
| 347 | \`abc " abc" -1 -1 |
| 348 | abc\' abc 0 3 |
| 349 | abc\' abc\n -1 -1 |
| 350 | abc\' "abc " -1 -1 |
| 351 | |
| 352 | ; |
| 353 | ; now follows various complex expressions designed to try and bust the matcher: |
| 354 | a(((b)))c abc 0 3 1 2 1 2 1 2 |
| 355 | a(b|(c))d abd 0 3 1 2 -1 -1 |
| 356 | a(b|(c))d acd 0 3 1 2 1 2 |
| 357 | a(b*|c)d abbd 0 4 1 3 |
| 358 | ; just gotta have one DFA-buster, of course |
| 359 | a[ab]{20} aaaaabaaaabaaaabaaaab 0 21 |
| 360 | ; and an inline expansion in case somebody gets tricky |
| 361 | a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21 |
| 362 | ; and in case somebody just slips in an NFA... |
| 363 | a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31 |
| 364 | ; one really big one |
| 365 | 1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71 |
| 366 | ; fish for problems as brackets go past 8 |
| 367 | [ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8 |
| 368 | [ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9 |
| 369 | [ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10 |
| 370 | [ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10 |
| 371 | ; and as parenthesis go past 9: |
| 372 | (a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 |
| 373 | (a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 |
| 374 | (a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 |
| 375 | (a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 |
| 376 | (a)d|(b)c abc 1 3 -1 -1 1 2 |
| 377 | _+((www)|(ftp)|(mailto)):_* "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19 |
| 378 | |
| 379 | ; subtleties of matching |
| 380 | ;a(b)?c\1d acd 0 3 -1 -1 |
| 381 | ; POSIX is about the following test: |
| 382 | a(b)?c\1d acd -1 -1 -1 -1 |
| 383 | a(b?c)+d accd 0 4 2 3 |
| 384 | (wee|week)(knights|night) weeknights 0 10 0 3 3 10 |
| 385 | .* abc 0 3 |
| 386 | a(b|(c))d abd 0 3 1 2 -1 -1 |
| 387 | a(b|(c))d acd 0 3 1 2 1 2 |
| 388 | a(b*|c|e)d abbd 0 4 1 3 |
| 389 | a(b*|c|e)d acd 0 3 1 2 |
| 390 | a(b*|c|e)d ad 0 2 1 1 |
| 391 | a(b?)c abc 0 3 1 2 |
| 392 | a(b?)c ac 0 2 1 1 |
| 393 | a(b+)c abc 0 3 1 2 |
| 394 | a(b+)c abbbc 0 5 1 4 |
| 395 | a(b*)c ac 0 2 1 1 |
| 396 | (a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5 |
| 397 | a([bc]?)c abc 0 3 1 2 |
| 398 | a([bc]?)c ac 0 2 1 1 |
| 399 | a([bc]+)c abc 0 3 1 2 |
| 400 | a([bc]+)c abcc 0 4 1 3 |
| 401 | a([bc]+)bc abcbc 0 5 1 3 |
| 402 | a(bb+|b)b abb 0 3 1 2 |
| 403 | a(bbb+|bb+|b)b abb 0 3 1 2 |
| 404 | a(bbb+|bb+|b)b abbb 0 4 1 3 |
| 405 | a(bbb+|bb+|b)bb abbb 0 4 1 2 |
| 406 | (.*).* abcdef 0 6 0 6 |
| 407 | (a*)* bc 0 0 0 0 |
| 408 | xyx*xz xyxxxxyxxxz 5 11 |
| 409 | |
| 410 | ; do we get the right subexpression when it is used more than once? |
| 411 | a(b|c)*d ad 0 2 -1 -1 |
| 412 | a(b|c)*d abcd 0 4 2 3 |
| 413 | a(b|c)+d abd 0 3 1 2 |
| 414 | a(b|c)+d abcd 0 4 2 3 |
| 415 | a(b|c?)+d ad 0 2 1 1 |
| 416 | a(b|c){0,0}d ad 0 2 -1 -1 |
| 417 | a(b|c){0,1}d ad 0 2 -1 -1 |
| 418 | a(b|c){0,1}d abd 0 3 1 2 |
| 419 | a(b|c){0,2}d ad 0 2 -1 -1 |
| 420 | a(b|c){0,2}d abcd 0 4 2 3 |
| 421 | a(b|c){0,}d ad 0 2 -1 -1 |
| 422 | a(b|c){0,}d abcd 0 4 2 3 |
| 423 | a(b|c){1,1}d abd 0 3 1 2 |
| 424 | a(b|c){1,2}d abd 0 3 1 2 |
| 425 | a(b|c){1,2}d abcd 0 4 2 3 |
| 426 | a(b|c){1,}d abd 0 3 1 2 |
| 427 | a(b|c){1,}d abcd 0 4 2 3 |
| 428 | a(b|c){2,2}d acbd 0 4 2 3 |
| 429 | a(b|c){2,2}d abcd 0 4 2 3 |
| 430 | a(b|c){2,4}d abcd 0 4 2 3 |
| 431 | a(b|c){2,4}d abcbd 0 5 3 4 |
| 432 | a(b|c){2,4}d abcbcd 0 6 4 5 |
| 433 | a(b|c){2,}d abcd 0 4 2 3 |
| 434 | a(b|c){2,}d abcbd 0 5 3 4 |
| 435 | ; perl only: these conflict with the POSIX test below |
| 436 | ;a(b|c?)+d abcd 0 4 3 3 |
| 437 | ;a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1 |
| 438 | ;a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3 |
| 439 | |
| 440 | ; posix only: |
| 441 | - match_default extended REG_EXTENDED REG_STARTEND |
| 442 | |
| 443 | a(b|c?)+d abcd 0 4 2 3 |
| 444 | a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3 |
| 445 | a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1 |
| 446 | a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3 |
| 447 | a(b|((c)*))+d ad 0 2 1 1 1 1 -1 -1 |
| 448 | a(b|((c)*))*d abcd 0 4 2 3 2 3 2 3 |
| 449 | a(b+|((c)*))*d abd 0 3 1 2 -1 -1 -1 -1 |
| 450 | a(b+|((c)*))*d abcd 0 4 2 3 2 3 2 3 |
| 451 | a(b|((c)*))*d ad 0 2 1 1 1 1 -1 -1 |
| 452 | |
| 453 | - match_default normal REG_PERL |
| 454 | ; try to match C++ syntax elements: |
| 455 | ; line comment: |
| 456 | //[^\n]* "++i //here is a line comment\n" 4 28 |
| 457 | ; block comment: |
| 458 | /\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27 |
| 459 | /\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1 |
| 460 | /\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1 |
| 461 | /\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1 |
| 462 | /\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1 |
| 463 | /\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1 |
| 464 | ; preprossor directives: |
| 465 | ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1 |
| 466 | ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1 |
| 467 | ; perl only: |
| 468 | ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 30 42 |
| 469 | ; literals: |
| 470 | ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF 0 4 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1 |
| 471 | ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 0 2 0 2 -1 -1 0 2 -1 -1 -1 -1 -1 -1 |
| 472 | ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 0 5 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1 |
| 473 | ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 0 5 0 4 0 4 -1 -1 4 5 -1 -1 -1 -1 |
| 474 | ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 0 24 0 18 0 18 -1 -1 19 24 19 24 22 24 |
| 475 | ; strings: |
| 476 | '([^\\']|\\.)*' '\\x3A' 0 6 4 5 |
| 477 | '([^\\']|\\.)*' '\\'' 0 4 1 3 |
| 478 | '([^\\']|\\.)*' '\\n' 0 4 1 3 |
| 479 | |
| 480 | ; finally try some case insensitive matches: |
| 481 | - match_default normal REG_EXTENDED REG_ICASE |
| 482 | ; upper and lower have no meaning here so they fail, however these |
| 483 | ; may compile with other libraries... |
| 484 | ;[[:lower:]] ! |
| 485 | ;[[:upper:]] ! |
| 486 | 0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72 |
| 487 | |
| 488 | ; known and suspected bugs: |
| 489 | - match_default normal REG_EXTENDED |
| 490 | \( ( 0 1 |
| 491 | \) ) 0 1 |
| 492 | \$ $ 0 1 |
| 493 | \^ ^ 0 1 |
| 494 | \. . 0 1 |
| 495 | \* * 0 1 |
| 496 | \+ + 0 1 |
| 497 | \? ? 0 1 |
| 498 | \[ [ 0 1 |
| 499 | \] ] 0 1 |
| 500 | \| | 0 1 |
| 501 | \\ \\ 0 1 |
| 502 | # # 0 1 |
| 503 | \# # 0 1 |
| 504 | a- a- 0 2 |
| 505 | \- - 0 1 |
| 506 | \{ { 0 1 |
| 507 | \} } 0 1 |
| 508 | 0 0 0 1 |
| 509 | 1 1 0 1 |
| 510 | 9 9 0 1 |
| 511 | b b 0 1 |
| 512 | B B 0 1 |
| 513 | < < 0 1 |
| 514 | > > 0 1 |
| 515 | w w 0 1 |
| 516 | W W 0 1 |
| 517 | ` ` 0 1 |
| 518 | ' ' 0 1 |
| 519 | \n \n 0 1 |
| 520 | , , 0 1 |
| 521 | a a 0 1 |
| 522 | f f 0 1 |
| 523 | n n 0 1 |
| 524 | r r 0 1 |
| 525 | t t 0 1 |
| 526 | v v 0 1 |
| 527 | c c 0 1 |
| 528 | x x 0 1 |
| 529 | : : 0 1 |
| 530 | (\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5 |
| 531 | |
| 532 | - match_default normal REG_EXTENDED REG_ICASE |
| 533 | a A 0 1 |
| 534 | A a 0 1 |
| 535 | [abc]+ abcABC 0 6 |
| 536 | [ABC]+ abcABC 0 6 |
| 537 | [a-z]+ abcABC 0 6 |
| 538 | [A-Z]+ abzANZ 0 6 |
| 539 | [a-Z]+ abzABZ 0 6 |
| 540 | [A-z]+ abzABZ 0 6 |
| 541 | [[:lower:]]+ abyzABYZ 0 8 |
| 542 | [[:upper:]]+ abzABZ 0 6 |
| 543 | [[:alpha:]]+ abyzABYZ 0 8 |
| 544 | [[:alnum:]]+ 09abyzABYZ 0 10 |
| 545 | |
| 546 | ; word start: |
| 547 | \<abcd " abcd" 2 6 |
| 548 | \<ab cab -1 -1 |
| 549 | \<ab "\nab" 1 3 |
| 550 | \<tag ::tag 2 5 |
| 551 | ;word end: |
| 552 | abc\> abc 0 3 |
| 553 | abc\> abcd -1 -1 |
| 554 | abc\> abc\n 0 3 |
| 555 | abc\> abc:: 0 3 |
| 556 | |
| 557 | ; collating elements and rewritten set code: |
| 558 | - match_default normal REG_EXTENDED REG_STARTEND |
| 559 | ;[[.zero.]] 0 0 1 |
| 560 | ;[[.one.]] 1 0 1 |
| 561 | ;[[.two.]] 2 0 1 |
| 562 | ;[[.three.]] 3 0 1 |
| 563 | [[.a.]] baa 1 2 |
| 564 | ;[[.right-curly-bracket.]] } 0 1 |
| 565 | ;[[.NUL.]] \0 0 1 |
| 566 | [[:<:]z] ! |
| 567 | [a[:>:]] ! |
| 568 | [[=a=]] a 0 1 |
| 569 | ;[[=right-curly-bracket=]] } 0 1 |
| 570 | - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE |
| 571 | [[.A.]] A 0 1 |
| 572 | [[.A.]] a 0 1 |
| 573 | [[.A.]-b]+ AaBb 0 4 |
| 574 | [A-[.b.]]+ AaBb 0 4 |
| 575 | [[.a.]-B]+ AaBb 0 4 |
| 576 | [a-[.B.]]+ AaBb 0 4 |
| 577 | - match_default normal REG_EXTENDED REG_STARTEND |
| 578 | [[.a.]-c]+ abcd 0 3 |
| 579 | [a-[.c.]]+ abcd 0 3 |
| 580 | [[:alpha:]-a] ! |
| 581 | [a-[:alpha:]] ! |
| 582 | |
| 583 | ; try mutli-character ligatures: |
| 584 | ;[[.ae.]] ae 0 2 |
| 585 | ;[[.ae.]] aE -1 -1 |
| 586 | ;[[.AE.]] AE 0 2 |
| 587 | ;[[.Ae.]] Ae 0 2 |
| 588 | ;[[.ae.]-b] a -1 -1 |
| 589 | ;[[.ae.]-b] b 0 1 |
| 590 | ;[[.ae.]-b] ae 0 2 |
| 591 | ;[a-[.ae.]] a 0 1 |
| 592 | ;[a-[.ae.]] b -1 -1 |
| 593 | ;[a-[.ae.]] ae 0 2 |
| 594 | - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE |
| 595 | ;[[.ae.]] AE 0 2 |
| 596 | ;[[.ae.]] Ae 0 2 |
| 597 | ;[[.AE.]] Ae 0 2 |
| 598 | ;[[.Ae.]] aE 0 2 |
| 599 | ;[[.AE.]-B] a -1 -1 |
| 600 | ;[[.Ae.]-b] b 0 1 |
| 601 | ;[[.Ae.]-b] B 0 1 |
| 602 | ;[[.ae.]-b] AE 0 2 |
| 603 | |
| 604 | - match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST |
| 605 | \s+ "ab ab" 2 5 |
| 606 | \S+ " abc " 2 5 |
| 607 | |
| 608 | - match_default normal REG_EXTENDED REG_STARTEND |
| 609 | \`abc abc 0 3 |
| 610 | \`abc aabc -1 -1 |
| 611 | abc\' abc 0 3 |
| 612 | abc\' abcd -1 -1 |
| 613 | abc\' abc\n\n -1 -1 |
| 614 | abc\' abc 0 3 |
| 615 | |
| 616 | ; extended repeat checking to exercise new algorithms: |
| 617 | ab.*xy abxy_ 0 4 |
| 618 | ab.*xy ab_xy_ 0 5 |
| 619 | ab.*xy abxy 0 4 |
| 620 | ab.*xy ab_xy 0 5 |
| 621 | ab.* ab 0 2 |
| 622 | ab.* ab__ 0 4 |
| 623 | |
| 624 | ab.{2,5}xy ab__xy_ 0 6 |
| 625 | ab.{2,5}xy ab____xy_ 0 8 |
| 626 | ab.{2,5}xy ab_____xy_ 0 9 |
| 627 | ab.{2,5}xy ab__xy 0 6 |
| 628 | ab.{2,5}xy ab_____xy 0 9 |
| 629 | ab.{2,5} ab__ 0 4 |
| 630 | ab.{2,5} ab_______ 0 7 |
| 631 | ab.{2,5}xy ab______xy -1 -1 |
| 632 | ab.{2,5}xy ab_xy -1 -1 |
| 633 | |
| 634 | ab.*?xy abxy_ 0 4 |
| 635 | ab.*?xy ab_xy_ 0 5 |
| 636 | ab.*?xy abxy 0 4 |
| 637 | ab.*?xy ab_xy 0 5 |
| 638 | ab.*? ab 0 2 |
| 639 | ab.*? ab__ 0 4 |
| 640 | |
| 641 | ab.{2,5}?xy ab__xy_ 0 6 |
| 642 | ab.{2,5}?xy ab____xy_ 0 8 |
| 643 | ab.{2,5}?xy ab_____xy_ 0 9 |
| 644 | ab.{2,5}?xy ab__xy 0 6 |
| 645 | ab.{2,5}?xy ab_____xy 0 9 |
| 646 | ab.{2,5}? ab__ 0 4 |
| 647 | ab.{2,5}? ab_______ 0 7 |
| 648 | ab.{2,5}?xy ab______xy -1 -1 |
| 649 | ab.{2,5}xy ab_xy -1 -1 |
| 650 | |
| 651 | ; again but with slower algorithm variant: |
| 652 | - match_default REG_EXTENDED |
| 653 | ; now again for single character repeats: |
| 654 | |
| 655 | ab_*xy abxy_ 0 4 |
| 656 | ab_*xy ab_xy_ 0 5 |
| 657 | ab_*xy abxy 0 4 |
| 658 | ab_*xy ab_xy 0 5 |
| 659 | ab_* ab 0 2 |
| 660 | ab_* ab__ 0 4 |
| 661 | |
| 662 | ab_{2,5}xy ab__xy_ 0 6 |
| 663 | ab_{2,5}xy ab____xy_ 0 8 |
| 664 | ab_{2,5}xy ab_____xy_ 0 9 |
| 665 | ab_{2,5}xy ab__xy 0 6 |
| 666 | ab_{2,5}xy ab_____xy 0 9 |
| 667 | ab_{2,5} ab__ 0 4 |
| 668 | ab_{2,5} ab_______ 0 7 |
| 669 | ab_{2,5}xy ab______xy -1 -1 |
| 670 | ab_{2,5}xy ab_xy -1 -1 |
| 671 | |
| 672 | ab_*?xy abxy_ 0 4 |
| 673 | ab_*?xy ab_xy_ 0 5 |
| 674 | ab_*?xy abxy 0 4 |
| 675 | ab_*?xy ab_xy 0 5 |
| 676 | ab_*? ab 0 2 |
| 677 | ab_*? ab__ 0 4 |
| 678 | |
| 679 | ab_{2,5}?xy ab__xy_ 0 6 |
| 680 | ab_{2,5}?xy ab____xy_ 0 8 |
| 681 | ab_{2,5}?xy ab_____xy_ 0 9 |
| 682 | ab_{2,5}?xy ab__xy 0 6 |
| 683 | ab_{2,5}?xy ab_____xy 0 9 |
| 684 | ab_{2,5}? ab__ 0 4 |
| 685 | ab_{2,5}? ab_______ 0 7 |
| 686 | ab_{2,5}?xy ab______xy -1 -1 |
| 687 | ab_{2,5}xy ab_xy -1 -1 |
| 688 | |
| 689 | ; and again for sets: |
| 690 | ab[_,;]*xy abxy_ 0 4 |
| 691 | ab[_,;]*xy ab_xy_ 0 5 |
| 692 | ab[_,;]*xy abxy 0 4 |
| 693 | ab[_,;]*xy ab_xy 0 5 |
| 694 | ab[_,;]* ab 0 2 |
| 695 | ab[_,;]* ab__ 0 4 |
| 696 | |
| 697 | ab[_,;]{2,5}xy ab__xy_ 0 6 |
| 698 | ab[_,;]{2,5}xy ab____xy_ 0 8 |
| 699 | ab[_,;]{2,5}xy ab_____xy_ 0 9 |
| 700 | ab[_,;]{2,5}xy ab__xy 0 6 |
| 701 | ab[_,;]{2,5}xy ab_____xy 0 9 |
| 702 | ab[_,;]{2,5} ab__ 0 4 |
| 703 | ab[_,;]{2,5} ab_______ 0 7 |
| 704 | ab[_,;]{2,5}xy ab______xy -1 -1 |
| 705 | ab[_,;]{2,5}xy ab_xy -1 -1 |
| 706 | |
| 707 | ab[_,;]*?xy abxy_ 0 4 |
| 708 | ab[_,;]*?xy ab_xy_ 0 5 |
| 709 | ab[_,;]*?xy abxy 0 4 |
| 710 | ab[_,;]*?xy ab_xy 0 5 |
| 711 | ab[_,;]*? ab 0 2 |
| 712 | ab[_,;]*? ab__ 0 4 |
| 713 | |
| 714 | ab[_,;]{2,5}?xy ab__xy_ 0 6 |
| 715 | ab[_,;]{2,5}?xy ab____xy_ 0 8 |
| 716 | ab[_,;]{2,5}?xy ab_____xy_ 0 9 |
| 717 | ab[_,;]{2,5}?xy ab__xy 0 6 |
| 718 | ab[_,;]{2,5}?xy ab_____xy 0 9 |
| 719 | ab[_,;]{2,5}? ab__ 0 4 |
| 720 | ab[_,;]{2,5}? ab_______ 0 7 |
| 721 | ab[_,;]{2,5}?xy ab______xy -1 -1 |
| 722 | ab[_,;]{2,5}xy ab_xy -1 -1 |
| 723 | |
| 724 | ; and again for tricky sets with digraphs: |
| 725 | ;ab[_[.ae.]]*xy abxy_ 0 4 |
| 726 | ;ab[_[.ae.]]*xy ab_xy_ 0 5 |
| 727 | ;ab[_[.ae.]]*xy abxy 0 4 |
| 728 | ;ab[_[.ae.]]*xy ab_xy 0 5 |
| 729 | ;ab[_[.ae.]]* ab 0 2 |
| 730 | ;ab[_[.ae.]]* ab__ 0 4 |
| 731 | |
| 732 | ;ab[_[.ae.]]{2,5}xy ab__xy_ 0 6 |
| 733 | ;ab[_[.ae.]]{2,5}xy ab____xy_ 0 8 |
| 734 | ;ab[_[.ae.]]{2,5}xy ab_____xy_ 0 9 |
| 735 | ;ab[_[.ae.]]{2,5}xy ab__xy 0 6 |
| 736 | ;ab[_[.ae.]]{2,5}xy ab_____xy 0 9 |
| 737 | ;ab[_[.ae.]]{2,5} ab__ 0 4 |
| 738 | ;ab[_[.ae.]]{2,5} ab_______ 0 7 |
| 739 | ;ab[_[.ae.]]{2,5}xy ab______xy -1 -1 |
| 740 | ;ab[_[.ae.]]{2,5}xy ab_xy -1 -1 |
| 741 | |
| 742 | ;ab[_[.ae.]]*?xy abxy_ 0 4 |
| 743 | ;ab[_[.ae.]]*?xy ab_xy_ 0 5 |
| 744 | ;ab[_[.ae.]]*?xy abxy 0 4 |
| 745 | ;ab[_[.ae.]]*?xy ab_xy 0 5 |
| 746 | ;ab[_[.ae.]]*? ab 0 2 |
| 747 | ;ab[_[.ae.]]*? ab__ 0 2 |
| 748 | |
| 749 | ;ab[_[.ae.]]{2,5}?xy ab__xy_ 0 6 |
| 750 | ;ab[_[.ae.]]{2,5}?xy ab____xy_ 0 8 |
| 751 | ;ab[_[.ae.]]{2,5}?xy ab_____xy_ 0 9 |
| 752 | ;ab[_[.ae.]]{2,5}?xy ab__xy 0 6 |
| 753 | ;ab[_[.ae.]]{2,5}?xy ab_____xy 0 9 |
| 754 | ;ab[_[.ae.]]{2,5}? ab__ 0 4 |
| 755 | ;ab[_[.ae.]]{2,5}? ab_______ 0 4 |
| 756 | ;ab[_[.ae.]]{2,5}?xy ab______xy -1 -1 |
| 757 | ;ab[_[.ae.]]{2,5}xy ab_xy -1 -1 |
| 758 | |
| 759 | ; new bugs detected in spring 2003: |
| 760 | - normal match_continuous REG_NO_POSIX_TEST |
| 761 | b abc 1 2 |
| 762 | |
| 763 | () abc 0 0 0 0 |
| 764 | ^() abc 0 0 0 0 |
| 765 | ^()+ abc 0 0 0 0 |
| 766 | ^(){1} abc 0 0 0 0 |
| 767 | ^(){2} abc 0 0 0 0 |
| 768 | ^((){2}) abc 0 0 0 0 0 0 |
| 769 | () "" 0 0 0 0 |
| 770 | ()\1 "" 0 0 0 0 |
| 771 | ()\1 a 0 0 0 0 |
| 772 | a()\1b ab 0 2 1 1 |
| 773 | a()b\1 ab 0 2 1 1 |
| 774 | |
| 775 | ; subtleties of matching with no sub-expressions marked |
| 776 | - normal match_nosubs REG_NO_POSIX_TEST |
| 777 | a(b?c)+d accd 0 4 |
| 778 | (wee|week)(knights|night) weeknights 0 10 |
| 779 | .* abc 0 3 |
| 780 | a(b|(c))d abd 0 3 |
| 781 | a(b|(c))d acd 0 3 |
| 782 | a(b*|c|e)d abbd 0 4 |
| 783 | a(b*|c|e)d acd 0 3 |
| 784 | a(b*|c|e)d ad 0 2 |
| 785 | a(b?)c abc 0 3 |
| 786 | a(b?)c ac 0 2 |
| 787 | a(b+)c abc 0 3 |
| 788 | a(b+)c abbbc 0 5 |
| 789 | a(b*)c ac 0 2 |
| 790 | (a|ab)(bc([de]+)f|cde) abcdef 0 6 |
| 791 | a([bc]?)c abc 0 3 |
| 792 | a([bc]?)c ac 0 2 |
| 793 | a([bc]+)c abc 0 3 |
| 794 | a([bc]+)c abcc 0 4 |
| 795 | a([bc]+)bc abcbc 0 5 |
| 796 | a(bb+|b)b abb 0 3 |
| 797 | a(bbb+|bb+|b)b abb 0 3 |
| 798 | a(bbb+|bb+|b)b abbb 0 4 |
| 799 | a(bbb+|bb+|b)bb abbb 0 4 |
| 800 | (.*).* abcdef 0 6 |
| 801 | (a*)* bc 0 0 |
| 802 | |
| 803 | - normal nosubs REG_NO_POSIX_TEST |
| 804 | a(b?c)+d accd 0 4 |
| 805 | (wee|week)(knights|night) weeknights 0 10 |
| 806 | .* abc 0 3 |
| 807 | a(b|(c))d abd 0 3 |
| 808 | a(b|(c))d acd 0 3 |
| 809 | a(b*|c|e)d abbd 0 4 |
| 810 | a(b*|c|e)d acd 0 3 |
| 811 | a(b*|c|e)d ad 0 2 |
| 812 | a(b?)c abc 0 3 |
| 813 | a(b?)c ac 0 2 |
| 814 | a(b+)c abc 0 3 |
| 815 | a(b+)c abbbc 0 5 |
| 816 | a(b*)c ac 0 2 |
| 817 | (a|ab)(bc([de]+)f|cde) abcdef 0 6 |
| 818 | a([bc]?)c abc 0 3 |
| 819 | a([bc]?)c ac 0 2 |
| 820 | a([bc]+)c abc 0 3 |
| 821 | a([bc]+)c abcc 0 4 |
| 822 | a([bc]+)bc abcbc 0 5 |
| 823 | a(bb+|b)b abb 0 3 |
| 824 | a(bbb+|bb+|b)b abb 0 3 |
| 825 | a(bbb+|bb+|b)b abbb 0 4 |
| 826 | a(bbb+|bb+|b)bb abbb 0 4 |
| 827 | (.*).* abcdef 0 6 |
| 828 | (a*)* bc 0 0 |
| 829 | |