yu.dong | c33b307 | 2024-08-21 23:14:49 -0700 | [diff] [blame^] | 1 | # GENERATE RECURSIVE DESCENT PARSER OBJECTS FROM A GRAMMAR |
| 2 | |
| 3 | use 5.006; |
| 4 | use strict; |
| 5 | |
| 6 | package Parse::RecDescent; |
| 7 | |
| 8 | use Text::Balanced qw ( extract_codeblock extract_bracketed extract_quotelike extract_delimited ); |
| 9 | |
| 10 | use vars qw ( $skip ); |
| 11 | |
| 12 | *defskip = \ '\s*'; # DEFAULT SEPARATOR IS OPTIONAL WHITESPACE |
| 13 | $skip = '\s*'; # UNIVERSAL SEPARATOR IS OPTIONAL WHITESPACE |
| 14 | my $MAXREP = 100_000_000; # REPETITIONS MATCH AT MOST 100,000,000 TIMES |
| 15 | |
| 16 | |
| 17 | #ifndef RUNTIME |
| 18 | sub import # IMPLEMENT PRECOMPILER BEHAVIOUR UNDER: |
| 19 | # perl -MParse::RecDescent - <grammarfile> <classname> |
| 20 | { |
| 21 | local *_die = sub { print @_, "\n"; exit }; |
| 22 | |
| 23 | my ($package, $file, $line) = caller; |
| 24 | |
| 25 | if ($file eq '-' && $line == 0) |
| 26 | { |
| 27 | _die("Usage: perl -MLocalTest - <grammarfile> <classname>") |
| 28 | unless @ARGV == 2; |
| 29 | |
| 30 | my ($sourcefile, $class) = @ARGV; |
| 31 | |
| 32 | local *IN; |
| 33 | open IN, $sourcefile |
| 34 | or _die(qq{Can't open grammar file "$sourcefile"}); |
| 35 | local $/; # |
| 36 | my $grammar = <IN>; |
| 37 | close IN; |
| 38 | |
| 39 | Parse::RecDescent->Precompile($grammar, $class, $sourcefile); |
| 40 | exit; |
| 41 | } |
| 42 | } |
| 43 | |
| 44 | sub Save |
| 45 | { |
| 46 | my $self = shift; |
| 47 | my %opt; |
| 48 | if ('HASH' eq ref $_[0]) { |
| 49 | %opt = (%opt, %{$_[0]}); |
| 50 | shift; |
| 51 | } |
| 52 | my ($class) = @_; |
| 53 | $self->{saving} = 1; |
| 54 | $self->Precompile(undef,$class); |
| 55 | $self->{saving} = 0; |
| 56 | } |
| 57 | |
| 58 | sub Precompile |
| 59 | { |
| 60 | my $self = shift; |
| 61 | my %opt = ( -standalone => 0 ); |
| 62 | if ('HASH' eq ref $_[0]) { |
| 63 | %opt = (%opt, %{$_[0]}); |
| 64 | shift; |
| 65 | } |
| 66 | my ($grammar, $class, $sourcefile) = @_; |
| 67 | |
| 68 | $class =~ /^(\w+::)*\w+$/ or croak("Bad class name: $class"); |
| 69 | |
| 70 | my $modulefile = $class; |
| 71 | $modulefile =~ s/.*:://; |
| 72 | $modulefile .= ".pm"; |
| 73 | |
| 74 | my $runtime_package = 'Parse::RecDescent::_Runtime'; |
| 75 | my $code; |
| 76 | |
| 77 | local *OUT; |
| 78 | open OUT, ">", $modulefile |
| 79 | or croak("Can't write to new module file '$modulefile'"); |
| 80 | |
| 81 | print STDERR "precompiling grammar from file '$sourcefile'\n", |
| 82 | "to class $class in module file '$modulefile'\n" |
| 83 | if $grammar && $sourcefile; |
| 84 | |
| 85 | # Make the resulting pre-compiled parser stand-alone by |
| 86 | # including the contents of Parse::RecDescent as |
| 87 | # Parse::RecDescent::Runtime in the resulting precompiled |
| 88 | # parser. |
| 89 | if ($opt{-standalone}) { |
| 90 | local *IN; |
| 91 | open IN, '<', $Parse::RecDescent::_FILENAME |
| 92 | or croak("Can't open $Parse::RecDescent::_FILENAME for standalone pre-compilation: $!\n"); |
| 93 | my $exclude = 0; |
| 94 | print OUT "{\n"; |
| 95 | while (<IN>) { |
| 96 | if ($_ =~ /^\s*#\s*ifndef\s+RUNTIME\s*$/) { |
| 97 | ++$exclude; |
| 98 | } |
| 99 | if ($exclude) { |
| 100 | if ($_ =~ /^\s*#\s*endif\s$/) { |
| 101 | --$exclude; |
| 102 | } |
| 103 | } else { |
| 104 | if ($_ =~ m/^__END__/) { |
| 105 | last; |
| 106 | } |
| 107 | s/Parse::RecDescent/$runtime_package/gs; |
| 108 | print OUT $_; |
| 109 | } |
| 110 | } |
| 111 | close IN; |
| 112 | print OUT "}\n"; |
| 113 | } |
| 114 | |
| 115 | $self = Parse::RecDescent->new($grammar, # $grammar |
| 116 | 1, # $compiling |
| 117 | $class # $namespace |
| 118 | ) |
| 119 | || croak("Can't compile bad grammar") |
| 120 | if $grammar; |
| 121 | |
| 122 | $self->{_precompiled} = 1; |
| 123 | |
| 124 | foreach ( keys %{$self->{rules}} ) { |
| 125 | $self->{rules}{$_}{changed} = 1; |
| 126 | } |
| 127 | |
| 128 | |
| 129 | print OUT "package $class;\n"; |
| 130 | if (not $opt{-standalone}) { |
| 131 | print OUT "use Parse::RecDescent;\n"; |
| 132 | } |
| 133 | |
| 134 | print OUT "{ my \$ERRORS;\n\n"; |
| 135 | |
| 136 | $code = $self->_code(); |
| 137 | if ($opt{-standalone}) { |
| 138 | $code =~ s/Parse::RecDescent/$runtime_package/gs; |
| 139 | } |
| 140 | print OUT $code; |
| 141 | |
| 142 | print OUT "}\npackage $class; sub new { "; |
| 143 | print OUT "my "; |
| 144 | |
| 145 | require Data::Dumper; |
| 146 | $code = Data::Dumper->Dump([$self], [qw(self)]); |
| 147 | if ($opt{-standalone}) { |
| 148 | $code =~ s/Parse::RecDescent/$runtime_package/gs; |
| 149 | } |
| 150 | print OUT $code; |
| 151 | |
| 152 | print OUT "}"; |
| 153 | |
| 154 | close OUT |
| 155 | or croak("Can't write to new module file '$modulefile'"); |
| 156 | } |
| 157 | #endif |
| 158 | |
| 159 | package Parse::RecDescent::LineCounter; |
| 160 | |
| 161 | |
| 162 | sub TIESCALAR # ($classname, \$text, $thisparser, $prevflag) |
| 163 | { |
| 164 | bless { |
| 165 | text => $_[1], |
| 166 | parser => $_[2], |
| 167 | prev => $_[3]?1:0, |
| 168 | }, $_[0]; |
| 169 | } |
| 170 | |
| 171 | sub FETCH |
| 172 | { |
| 173 | my $parser = $_[0]->{parser}; |
| 174 | my $cache = $parser->{linecounter_cache}; |
| 175 | my $from = $parser->{fulltextlen}-length(${$_[0]->{text}})-$_[0]->{prev} |
| 176 | ; |
| 177 | |
| 178 | unless (exists $cache->{$from}) |
| 179 | { |
| 180 | $parser->{lastlinenum} = $parser->{offsetlinenum} |
| 181 | - Parse::RecDescent::_linecount(substr($parser->{fulltext},$from)) |
| 182 | + 1; |
| 183 | $cache->{$from} = $parser->{lastlinenum}; |
| 184 | } |
| 185 | return $cache->{$from}; |
| 186 | } |
| 187 | |
| 188 | sub STORE |
| 189 | { |
| 190 | my $parser = $_[0]->{parser}; |
| 191 | $parser->{offsetlinenum} -= $parser->{lastlinenum} - $_[1]; |
| 192 | return undef; |
| 193 | } |
| 194 | |
| 195 | sub resync # ($linecounter) |
| 196 | { |
| 197 | my $self = tied($_[0]); |
| 198 | die "Tried to alter something other than a LineCounter\n" |
| 199 | unless $self =~ /Parse::RecDescent::LineCounter/; |
| 200 | |
| 201 | my $parser = $self->{parser}; |
| 202 | my $apparently = $parser->{offsetlinenum} |
| 203 | - Parse::RecDescent::_linecount(${$self->{text}}) |
| 204 | + 1; |
| 205 | |
| 206 | $parser->{offsetlinenum} += $parser->{lastlinenum} - $apparently; |
| 207 | return 1; |
| 208 | } |
| 209 | |
| 210 | package Parse::RecDescent::ColCounter; |
| 211 | |
| 212 | sub TIESCALAR # ($classname, \$text, $thisparser, $prevflag) |
| 213 | { |
| 214 | bless { |
| 215 | text => $_[1], |
| 216 | parser => $_[2], |
| 217 | prev => $_[3]?1:0, |
| 218 | }, $_[0]; |
| 219 | } |
| 220 | |
| 221 | sub FETCH |
| 222 | { |
| 223 | my $parser = $_[0]->{parser}; |
| 224 | my $missing = $parser->{fulltextlen}-length(${$_[0]->{text}})-$_[0]->{prev}+1; |
| 225 | substr($parser->{fulltext},0,$missing) =~ m/^(.*)\Z/m; |
| 226 | return length($1); |
| 227 | } |
| 228 | |
| 229 | sub STORE |
| 230 | { |
| 231 | die "Can't set column number via \$thiscolumn\n"; |
| 232 | } |
| 233 | |
| 234 | |
| 235 | package Parse::RecDescent::OffsetCounter; |
| 236 | |
| 237 | sub TIESCALAR # ($classname, \$text, $thisparser, $prev) |
| 238 | { |
| 239 | bless { |
| 240 | text => $_[1], |
| 241 | parser => $_[2], |
| 242 | prev => $_[3]?-1:0, |
| 243 | }, $_[0]; |
| 244 | } |
| 245 | |
| 246 | sub FETCH |
| 247 | { |
| 248 | my $parser = $_[0]->{parser}; |
| 249 | return $parser->{fulltextlen}-length(${$_[0]->{text}})+$_[0]->{prev}; |
| 250 | } |
| 251 | |
| 252 | sub STORE |
| 253 | { |
| 254 | die "Can't set current offset via \$thisoffset or \$prevoffset\n"; |
| 255 | } |
| 256 | |
| 257 | |
| 258 | |
| 259 | package Parse::RecDescent::Rule; |
| 260 | |
| 261 | sub new ($$$$$) |
| 262 | { |
| 263 | my $class = ref($_[0]) || $_[0]; |
| 264 | my $name = $_[1]; |
| 265 | my $owner = $_[2]; |
| 266 | my $line = $_[3]; |
| 267 | my $replace = $_[4]; |
| 268 | |
| 269 | if (defined $owner->{"rules"}{$name}) |
| 270 | { |
| 271 | my $self = $owner->{"rules"}{$name}; |
| 272 | if ($replace && !$self->{"changed"}) |
| 273 | { |
| 274 | $self->reset; |
| 275 | } |
| 276 | return $self; |
| 277 | } |
| 278 | else |
| 279 | { |
| 280 | return $owner->{"rules"}{$name} = |
| 281 | bless |
| 282 | { |
| 283 | "name" => $name, |
| 284 | "prods" => [], |
| 285 | "calls" => [], |
| 286 | "changed" => 0, |
| 287 | "line" => $line, |
| 288 | "impcount" => 0, |
| 289 | "opcount" => 0, |
| 290 | "vars" => "", |
| 291 | }, $class; |
| 292 | } |
| 293 | } |
| 294 | |
| 295 | sub reset($) |
| 296 | { |
| 297 | @{$_[0]->{"prods"}} = (); |
| 298 | @{$_[0]->{"calls"}} = (); |
| 299 | $_[0]->{"changed"} = 0; |
| 300 | $_[0]->{"impcount"} = 0; |
| 301 | $_[0]->{"opcount"} = 0; |
| 302 | $_[0]->{"vars"} = ""; |
| 303 | } |
| 304 | |
| 305 | sub DESTROY {} |
| 306 | |
| 307 | sub hasleftmost($$) |
| 308 | { |
| 309 | my ($self, $ref) = @_; |
| 310 | |
| 311 | my $prod; |
| 312 | foreach $prod ( @{$self->{"prods"}} ) |
| 313 | { |
| 314 | return 1 if $prod->hasleftmost($ref); |
| 315 | } |
| 316 | |
| 317 | return 0; |
| 318 | } |
| 319 | |
| 320 | sub leftmostsubrules($) |
| 321 | { |
| 322 | my $self = shift; |
| 323 | my @subrules = (); |
| 324 | |
| 325 | my $prod; |
| 326 | foreach $prod ( @{$self->{"prods"}} ) |
| 327 | { |
| 328 | push @subrules, $prod->leftmostsubrule(); |
| 329 | } |
| 330 | |
| 331 | return @subrules; |
| 332 | } |
| 333 | |
| 334 | sub expected($) |
| 335 | { |
| 336 | my $self = shift; |
| 337 | my @expected = (); |
| 338 | |
| 339 | my $prod; |
| 340 | foreach $prod ( @{$self->{"prods"}} ) |
| 341 | { |
| 342 | my $next = $prod->expected(); |
| 343 | unless (! $next or _contains($next,@expected) ) |
| 344 | { |
| 345 | push @expected, $next; |
| 346 | } |
| 347 | } |
| 348 | |
| 349 | return join ', or ', @expected; |
| 350 | } |
| 351 | |
| 352 | sub _contains($@) |
| 353 | { |
| 354 | my $target = shift; |
| 355 | my $item; |
| 356 | foreach $item ( @_ ) { return 1 if $target eq $item; } |
| 357 | return 0; |
| 358 | } |
| 359 | |
| 360 | sub addcall($$) |
| 361 | { |
| 362 | my ( $self, $subrule ) = @_; |
| 363 | unless ( _contains($subrule, @{$self->{"calls"}}) ) |
| 364 | { |
| 365 | push @{$self->{"calls"}}, $subrule; |
| 366 | } |
| 367 | } |
| 368 | |
| 369 | sub addprod($$) |
| 370 | { |
| 371 | my ( $self, $prod ) = @_; |
| 372 | push @{$self->{"prods"}}, $prod; |
| 373 | $self->{"changed"} = 1; |
| 374 | $self->{"impcount"} = 0; |
| 375 | $self->{"opcount"} = 0; |
| 376 | $prod->{"number"} = $#{$self->{"prods"}}; |
| 377 | return $prod; |
| 378 | } |
| 379 | |
| 380 | sub addvar |
| 381 | { |
| 382 | my ( $self, $var, $parser ) = @_; |
| 383 | if ($var =~ /\A\s*local\s+([%@\$]\w+)/) |
| 384 | { |
| 385 | $parser->{localvars} .= " $1"; |
| 386 | $self->{"vars"} .= "$var;\n" } |
| 387 | else |
| 388 | { $self->{"vars"} .= "my $var;\n" } |
| 389 | $self->{"changed"} = 1; |
| 390 | return 1; |
| 391 | } |
| 392 | |
| 393 | sub addautoscore |
| 394 | { |
| 395 | my ( $self, $code ) = @_; |
| 396 | $self->{"autoscore"} = $code; |
| 397 | $self->{"changed"} = 1; |
| 398 | return 1; |
| 399 | } |
| 400 | |
| 401 | sub nextoperator($) |
| 402 | { |
| 403 | my $self = shift; |
| 404 | my $prodcount = scalar @{$self->{"prods"}}; |
| 405 | my $opcount = ++$self->{"opcount"}; |
| 406 | return "_operator_${opcount}_of_production_${prodcount}_of_rule_$self->{name}"; |
| 407 | } |
| 408 | |
| 409 | sub nextimplicit($) |
| 410 | { |
| 411 | my $self = shift; |
| 412 | my $prodcount = scalar @{$self->{"prods"}}; |
| 413 | my $impcount = ++$self->{"impcount"}; |
| 414 | return "_alternation_${impcount}_of_production_${prodcount}_of_rule_$self->{name}"; |
| 415 | } |
| 416 | |
| 417 | |
| 418 | sub code |
| 419 | { |
| 420 | my ($self, $namespace, $parser, $check) = @_; |
| 421 | |
| 422 | eval 'undef &' . $namespace . '::' . $self->{"name"} unless $parser->{saving}; |
| 423 | |
| 424 | my $code = |
| 425 | ' |
| 426 | # ARGS ARE: ($parser, $text; $repeating, $_noactions, $_itempos, \@args) |
| 427 | sub ' . $namespace . '::' . $self->{"name"} . ' |
| 428 | { |
| 429 | my $thisparser = $_[0]; |
| 430 | use vars q{$tracelevel}; |
| 431 | local $tracelevel = ($tracelevel||0)+1; |
| 432 | $ERRORS = 0; |
| 433 | my $thisrule = $thisparser->{"rules"}{"' . $self->{"name"} . '"}; |
| 434 | |
| 435 | Parse::RecDescent::_trace(q{Trying rule: [' . $self->{"name"} . ']}, |
| 436 | Parse::RecDescent::_tracefirst($_[1]), |
| 437 | q{' . $self->{"name"} . '}, |
| 438 | $tracelevel) |
| 439 | if defined $::RD_TRACE; |
| 440 | |
| 441 | ' . ($parser->{deferrable} |
| 442 | ? 'my $def_at = @{$thisparser->{deferred}};' |
| 443 | : '') . |
| 444 | ' |
| 445 | my $err_at = @{$thisparser->{errors}}; |
| 446 | |
| 447 | my $score; |
| 448 | my $score_return; |
| 449 | my $_tok; |
| 450 | my $return = undef; |
| 451 | my $_matched=0; |
| 452 | my $commit=0; |
| 453 | my @item = (); |
| 454 | my %item = (); |
| 455 | my $repeating = $_[2]; |
| 456 | my $_noactions = $_[3]; |
| 457 | my $_itempos = $_[4]; |
| 458 | my @arg = defined $_[5] ? @{ &{$_[5]} } : (); |
| 459 | my %arg = ($#arg & 01) ? @arg : (@arg, undef); |
| 460 | my $text; |
| 461 | my $lastsep; |
| 462 | my $current_match; |
| 463 | my $expectation = new Parse::RecDescent::Expectation(q{' . $self->expected() . '}); |
| 464 | $expectation->at($_[1]); |
| 465 | '. ($parser->{_check}{thisoffset}?' |
| 466 | my $thisoffset; |
| 467 | tie $thisoffset, q{Parse::RecDescent::OffsetCounter}, \$text, $thisparser; |
| 468 | ':'') . ($parser->{_check}{prevoffset}?' |
| 469 | my $prevoffset; |
| 470 | tie $prevoffset, q{Parse::RecDescent::OffsetCounter}, \$text, $thisparser, 1; |
| 471 | ':'') . ($parser->{_check}{thiscolumn}?' |
| 472 | my $thiscolumn; |
| 473 | tie $thiscolumn, q{Parse::RecDescent::ColCounter}, \$text, $thisparser; |
| 474 | ':'') . ($parser->{_check}{prevcolumn}?' |
| 475 | my $prevcolumn; |
| 476 | tie $prevcolumn, q{Parse::RecDescent::ColCounter}, \$text, $thisparser, 1; |
| 477 | ':'') . ($parser->{_check}{prevline}?' |
| 478 | my $prevline; |
| 479 | tie $prevline, q{Parse::RecDescent::LineCounter}, \$text, $thisparser, 1; |
| 480 | ':'') . ' |
| 481 | my $thisline; |
| 482 | tie $thisline, q{Parse::RecDescent::LineCounter}, \$text, $thisparser; |
| 483 | |
| 484 | '. $self->{vars} .' |
| 485 | '; |
| 486 | |
| 487 | my $prod; |
| 488 | foreach $prod ( @{$self->{"prods"}} ) |
| 489 | { |
| 490 | $prod->addscore($self->{autoscore},0,0) if $self->{autoscore}; |
| 491 | next unless $prod->checkleftmost(); |
| 492 | $code .= $prod->code($namespace,$self,$parser); |
| 493 | |
| 494 | $code .= $parser->{deferrable} |
| 495 | ? ' splice |
| 496 | @{$thisparser->{deferred}}, $def_at unless $_matched; |
| 497 | ' |
| 498 | : ''; |
| 499 | } |
| 500 | |
| 501 | $code .= |
| 502 | ' |
| 503 | unless ( $_matched || defined($score) ) |
| 504 | { |
| 505 | ' .($parser->{deferrable} |
| 506 | ? ' splice @{$thisparser->{deferred}}, $def_at; |
| 507 | ' |
| 508 | : '') . ' |
| 509 | |
| 510 | $_[1] = $text; # NOT SURE THIS IS NEEDED |
| 511 | Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' rule>>}, |
| 512 | Parse::RecDescent::_tracefirst($_[1]), |
| 513 | q{' . $self->{"name"} .'}, |
| 514 | $tracelevel) |
| 515 | if defined $::RD_TRACE; |
| 516 | return undef; |
| 517 | } |
| 518 | if (!defined($return) && defined($score)) |
| 519 | { |
| 520 | Parse::RecDescent::_trace(q{>>Accepted scored production<<}, "", |
| 521 | q{' . $self->{"name"} .'}, |
| 522 | $tracelevel) |
| 523 | if defined $::RD_TRACE; |
| 524 | $return = $score_return; |
| 525 | } |
| 526 | splice @{$thisparser->{errors}}, $err_at; |
| 527 | $return = $item[$#item] unless defined $return; |
| 528 | if (defined $::RD_TRACE) |
| 529 | { |
| 530 | Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' rule<< (return value: [} . |
| 531 | $return . q{])}, "", |
| 532 | q{' . $self->{"name"} .'}, |
| 533 | $tracelevel); |
| 534 | Parse::RecDescent::_trace(q{(consumed: [} . |
| 535 | Parse::RecDescent::_tracemax(substr($_[1],0,-length($text))) . q{])}, |
| 536 | Parse::RecDescent::_tracefirst($text), |
| 537 | , q{' . $self->{"name"} .'}, |
| 538 | $tracelevel) |
| 539 | } |
| 540 | $_[1] = $text; |
| 541 | return $return; |
| 542 | } |
| 543 | '; |
| 544 | |
| 545 | return $code; |
| 546 | } |
| 547 | |
| 548 | my @left; |
| 549 | sub isleftrec($$) |
| 550 | { |
| 551 | my ($self, $rules) = @_; |
| 552 | my $root = $self->{"name"}; |
| 553 | @left = $self->leftmostsubrules(); |
| 554 | my $next; |
| 555 | foreach $next ( @left ) |
| 556 | { |
| 557 | next unless defined $rules->{$next}; # SKIP NON-EXISTENT RULES |
| 558 | return 1 if $next eq $root; |
| 559 | my $child; |
| 560 | foreach $child ( $rules->{$next}->leftmostsubrules() ) |
| 561 | { |
| 562 | push(@left, $child) |
| 563 | if ! _contains($child, @left) ; |
| 564 | } |
| 565 | } |
| 566 | return 0; |
| 567 | } |
| 568 | |
| 569 | package Parse::RecDescent::Production; |
| 570 | |
| 571 | sub describe ($;$) |
| 572 | { |
| 573 | return join ' ', map { $_->describe($_[1]) or () } @{$_[0]->{items}}; |
| 574 | } |
| 575 | |
| 576 | sub new ($$;$$) |
| 577 | { |
| 578 | my ($self, $line, $uncommit, $error) = @_; |
| 579 | my $class = ref($self) || $self; |
| 580 | |
| 581 | bless |
| 582 | { |
| 583 | "items" => [], |
| 584 | "uncommit" => $uncommit, |
| 585 | "error" => $error, |
| 586 | "line" => $line, |
| 587 | strcount => 0, |
| 588 | patcount => 0, |
| 589 | dircount => 0, |
| 590 | actcount => 0, |
| 591 | }, $class; |
| 592 | } |
| 593 | |
| 594 | sub expected ($) |
| 595 | { |
| 596 | my $itemcount = scalar @{$_[0]->{"items"}}; |
| 597 | return ($itemcount) ? $_[0]->{"items"}[0]->describe(1) : ''; |
| 598 | } |
| 599 | |
| 600 | sub hasleftmost ($$) |
| 601 | { |
| 602 | my ($self, $ref) = @_; |
| 603 | return ${$self->{"items"}}[0] eq $ref if scalar @{$self->{"items"}}; |
| 604 | return 0; |
| 605 | } |
| 606 | |
| 607 | sub isempty($) |
| 608 | { |
| 609 | my $self = shift; |
| 610 | return 0 == @{$self->{"items"}}; |
| 611 | } |
| 612 | |
| 613 | sub leftmostsubrule($) |
| 614 | { |
| 615 | my $self = shift; |
| 616 | |
| 617 | if ( $#{$self->{"items"}} >= 0 ) |
| 618 | { |
| 619 | my $subrule = $self->{"items"}[0]->issubrule(); |
| 620 | return $subrule if defined $subrule; |
| 621 | } |
| 622 | |
| 623 | return (); |
| 624 | } |
| 625 | |
| 626 | sub checkleftmost($) |
| 627 | { |
| 628 | my @items = @{$_[0]->{"items"}}; |
| 629 | if (@items==1 && ref($items[0]) =~ /\AParse::RecDescent::Error/ |
| 630 | && $items[0]->{commitonly} ) |
| 631 | { |
| 632 | Parse::RecDescent::_warn(2,"Lone <error?> in production treated |
| 633 | as <error?> <reject>"); |
| 634 | Parse::RecDescent::_hint("A production consisting of a single |
| 635 | conditional <error?> directive would |
| 636 | normally succeed (with the value zero) if the |
| 637 | rule is not 'commited' when it is |
| 638 | tried. Since you almost certainly wanted |
| 639 | '<error?> <reject>' Parse::RecDescent |
| 640 | supplied it for you."); |
| 641 | push @{$_[0]->{items}}, |
| 642 | Parse::RecDescent::UncondReject->new(0,0,'<reject>'); |
| 643 | } |
| 644 | elsif (@items==1 && ($items[0]->describe||"") =~ /<rulevar|<autoscore/) |
| 645 | { |
| 646 | # Do nothing |
| 647 | } |
| 648 | elsif (@items && |
| 649 | ( ref($items[0]) =~ /\AParse::RecDescent::UncondReject/ |
| 650 | || ($items[0]->describe||"") =~ /<autoscore/ |
| 651 | )) |
| 652 | { |
| 653 | Parse::RecDescent::_warn(1,"Optimizing away production: [". $_[0]->describe ."]"); |
| 654 | my $what = $items[0]->describe =~ /<rulevar/ |
| 655 | ? "a <rulevar> (which acts like an unconditional <reject> during parsing)" |
| 656 | : $items[0]->describe =~ /<autoscore/ |
| 657 | ? "an <autoscore> (which acts like an unconditional <reject> during parsing)" |
| 658 | : "an unconditional <reject>"; |
| 659 | my $caveat = $items[0]->describe =~ /<rulevar/ |
| 660 | ? " after the specified variable was set up" |
| 661 | : ""; |
| 662 | my $advice = @items > 1 |
| 663 | ? "However, there were also other (useless) items after the leading " |
| 664 | . $items[0]->describe |
| 665 | . ", so you may have been expecting some other behaviour." |
| 666 | : "You can safely ignore this message."; |
| 667 | Parse::RecDescent::_hint("The production starts with $what. That means that the |
| 668 | production can never successfully match, so it was |
| 669 | optimized out of the final parser$caveat. $advice"); |
| 670 | return 0; |
| 671 | } |
| 672 | return 1; |
| 673 | } |
| 674 | |
| 675 | sub changesskip($) |
| 676 | { |
| 677 | my $item; |
| 678 | foreach $item (@{$_[0]->{"items"}}) |
| 679 | { |
| 680 | if (ref($item) =~ /Parse::RecDescent::(Action|Directive)/) |
| 681 | { |
| 682 | return 1 if $item->{code} =~ /\$skip\s*=/; |
| 683 | } |
| 684 | } |
| 685 | return 0; |
| 686 | } |
| 687 | |
| 688 | sub adddirective |
| 689 | { |
| 690 | my ( $self, $whichop, $line, $name ) = @_; |
| 691 | push @{$self->{op}}, |
| 692 | { type=>$whichop, line=>$line, name=>$name, |
| 693 | offset=> scalar(@{$self->{items}}) }; |
| 694 | } |
| 695 | |
| 696 | sub addscore |
| 697 | { |
| 698 | my ( $self, $code, $lookahead, $line ) = @_; |
| 699 | $self->additem(Parse::RecDescent::Directive->new( |
| 700 | "local \$^W; |
| 701 | my \$thisscore = do { $code } + 0; |
| 702 | if (!defined(\$score) || \$thisscore>\$score) |
| 703 | { \$score=\$thisscore; \$score_return=\$item[-1]; } |
| 704 | undef;", $lookahead, $line,"<score: $code>") ) |
| 705 | unless $self->{items}[-1]->describe =~ /<score/; |
| 706 | return 1; |
| 707 | } |
| 708 | |
| 709 | sub check_pending |
| 710 | { |
| 711 | my ( $self, $line ) = @_; |
| 712 | if ($self->{op}) |
| 713 | { |
| 714 | while (my $next = pop @{$self->{op}}) |
| 715 | { |
| 716 | Parse::RecDescent::_error("Incomplete <$next->{type}op:...>.", $line); |
| 717 | Parse::RecDescent::_hint( |
| 718 | "The current production ended without completing the |
| 719 | <$next->{type}op:...> directive that started near line |
| 720 | $next->{line}. Did you forget the closing '>'?"); |
| 721 | } |
| 722 | } |
| 723 | return 1; |
| 724 | } |
| 725 | |
| 726 | sub enddirective |
| 727 | { |
| 728 | my ( $self, $line, $minrep, $maxrep ) = @_; |
| 729 | unless ($self->{op}) |
| 730 | { |
| 731 | Parse::RecDescent::_error("Unmatched > found.", $line); |
| 732 | Parse::RecDescent::_hint( |
| 733 | "A '>' angle bracket was encountered, which typically |
| 734 | indicates the end of a directive. However no suitable |
| 735 | preceding directive was encountered. Typically this |
| 736 | indicates either a extra '>' in the grammar, or a |
| 737 | problem inside the previous directive."); |
| 738 | return; |
| 739 | } |
| 740 | my $op = pop @{$self->{op}}; |
| 741 | my $span = @{$self->{items}} - $op->{offset}; |
| 742 | if ($op->{type} =~ /left|right/) |
| 743 | { |
| 744 | if ($span != 3) |
| 745 | { |
| 746 | Parse::RecDescent::_error( |
| 747 | "Incorrect <$op->{type}op:...> specification: |
| 748 | expected 3 args, but found $span instead", $line); |
| 749 | Parse::RecDescent::_hint( |
| 750 | "The <$op->{type}op:...> directive requires a |
| 751 | sequence of exactly three elements. For example: |
| 752 | <$op->{type}op:leftarg /op/ rightarg>"); |
| 753 | } |
| 754 | else |
| 755 | { |
| 756 | push @{$self->{items}}, |
| 757 | Parse::RecDescent::Operator->new( |
| 758 | $op->{type}, $minrep, $maxrep, splice(@{$self->{"items"}}, -3)); |
| 759 | $self->{items}[-1]->sethashname($self); |
| 760 | $self->{items}[-1]{name} = $op->{name}; |
| 761 | } |
| 762 | } |
| 763 | } |
| 764 | |
| 765 | sub prevwasreturn |
| 766 | { |
| 767 | my ( $self, $line ) = @_; |
| 768 | unless (@{$self->{items}}) |
| 769 | { |
| 770 | Parse::RecDescent::_error( |
| 771 | "Incorrect <return:...> specification: |
| 772 | expected item missing", $line); |
| 773 | Parse::RecDescent::_hint( |
| 774 | "The <return:...> directive requires a |
| 775 | sequence of at least one item. For example: |
| 776 | <return: list>"); |
| 777 | return; |
| 778 | } |
| 779 | push @{$self->{items}}, |
| 780 | Parse::RecDescent::Result->new(); |
| 781 | } |
| 782 | |
| 783 | sub additem |
| 784 | { |
| 785 | my ( $self, $item ) = @_; |
| 786 | $item->sethashname($self); |
| 787 | push @{$self->{"items"}}, $item; |
| 788 | return $item; |
| 789 | } |
| 790 | |
| 791 | sub _duplicate_itempos |
| 792 | { |
| 793 | my ($src) = @_; |
| 794 | my $dst = {}; |
| 795 | |
| 796 | foreach (keys %$src) |
| 797 | { |
| 798 | %{$dst->{$_}} = %{$src->{$_}}; |
| 799 | } |
| 800 | $dst; |
| 801 | } |
| 802 | |
| 803 | sub _update_itempos |
| 804 | { |
| 805 | my ($dst, $src, $typekeys, $poskeys) = @_; |
| 806 | |
| 807 | my @typekeys = 'ARRAY' eq ref $typekeys ? |
| 808 | @$typekeys : |
| 809 | keys %$src; |
| 810 | |
| 811 | foreach my $k (keys %$src) |
| 812 | { |
| 813 | if ('ARRAY' eq ref $poskeys) |
| 814 | { |
| 815 | @{$dst->{$k}}{@$poskeys} = @{$src->{$k}}{@$poskeys}; |
| 816 | } |
| 817 | else |
| 818 | { |
| 819 | %{$dst->{$k}} = %{$src->{$k}}; |
| 820 | } |
| 821 | } |
| 822 | } |
| 823 | |
| 824 | sub preitempos |
| 825 | { |
| 826 | return q |
| 827 | { |
| 828 | push @itempos, {'offset' => {'from'=>$thisoffset, 'to'=>undef}, |
| 829 | 'line' => {'from'=>$thisline, 'to'=>undef}, |
| 830 | 'column' => {'from'=>$thiscolumn, 'to'=>undef} }; |
| 831 | } |
| 832 | } |
| 833 | |
| 834 | sub incitempos |
| 835 | { |
| 836 | return q |
| 837 | { |
| 838 | $itempos[$#itempos]{'offset'}{'from'} += length($lastsep); |
| 839 | $itempos[$#itempos]{'line'}{'from'} = $thisline; |
| 840 | $itempos[$#itempos]{'column'}{'from'} = $thiscolumn; |
| 841 | } |
| 842 | } |
| 843 | |
| 844 | sub unincitempos |
| 845 | { |
| 846 | # the next incitempos will properly set these two fields, but |
| 847 | # {'offset'}{'from'} needs to be decreased by length($lastsep) |
| 848 | # $itempos[$#itempos]{'line'}{'from'} |
| 849 | # $itempos[$#itempos]{'column'}{'from'} |
| 850 | return q |
| 851 | { |
| 852 | $itempos[$#itempos]{'offset'}{'from'} -= length($lastsep) if defined $lastsep; |
| 853 | } |
| 854 | } |
| 855 | |
| 856 | sub postitempos |
| 857 | { |
| 858 | return q |
| 859 | { |
| 860 | $itempos[$#itempos]{'offset'}{'to'} = $prevoffset; |
| 861 | $itempos[$#itempos]{'line'}{'to'} = $prevline; |
| 862 | $itempos[$#itempos]{'column'}{'to'} = $prevcolumn; |
| 863 | } |
| 864 | } |
| 865 | |
| 866 | sub code($$$$) |
| 867 | { |
| 868 | my ($self,$namespace,$rule,$parser) = @_; |
| 869 | my $code = |
| 870 | ' |
| 871 | while (!$_matched' |
| 872 | . (defined $self->{"uncommit"} ? '' : ' && !$commit') |
| 873 | . ') |
| 874 | { |
| 875 | ' . |
| 876 | ($self->changesskip() |
| 877 | ? 'local $skip = defined($skip) ? $skip : $Parse::RecDescent::skip;' |
| 878 | : '') .' |
| 879 | Parse::RecDescent::_trace(q{Trying production: [' |
| 880 | . $self->describe . ']}, |
| 881 | Parse::RecDescent::_tracefirst($_[1]), |
| 882 | q{' . $rule ->{name}. '}, |
| 883 | $tracelevel) |
| 884 | if defined $::RD_TRACE; |
| 885 | my $thisprod = $thisrule->{"prods"}[' . $self->{"number"} . ']; |
| 886 | ' . (defined $self->{"error"} ? '' : '$text = $_[1];' ) . ' |
| 887 | my $_savetext; |
| 888 | @item = (q{' . $rule->{"name"} . '}); |
| 889 | %item = (__RULE__ => q{' . $rule->{"name"} . '}); |
| 890 | my $repcount = 0; |
| 891 | |
| 892 | '; |
| 893 | $code .= |
| 894 | ' my @itempos = ({}); |
| 895 | ' if $parser->{_check}{itempos}; |
| 896 | |
| 897 | my $item; |
| 898 | my $i; |
| 899 | |
| 900 | for ($i = 0; $i < @{$self->{"items"}}; $i++) |
| 901 | { |
| 902 | $item = ${$self->{items}}[$i]; |
| 903 | |
| 904 | $code .= preitempos() if $parser->{_check}{itempos}; |
| 905 | |
| 906 | $code .= $item->code($namespace,$rule,$parser->{_check}); |
| 907 | |
| 908 | $code .= postitempos() if $parser->{_check}{itempos}; |
| 909 | |
| 910 | } |
| 911 | |
| 912 | if ($parser->{_AUTOACTION} && defined($item) && !$item->isa("Parse::RecDescent::Action")) |
| 913 | { |
| 914 | $code .= $parser->{_AUTOACTION}->code($namespace,$rule); |
| 915 | Parse::RecDescent::_warn(1,"Autogenerating action in rule |
| 916 | \"$rule->{name}\": |
| 917 | $parser->{_AUTOACTION}{code}") |
| 918 | and |
| 919 | Parse::RecDescent::_hint("The \$::RD_AUTOACTION was defined, |
| 920 | so any production not ending in an |
| 921 | explicit action has the specified |
| 922 | \"auto-action\" automatically |
| 923 | appended."); |
| 924 | } |
| 925 | elsif ($parser->{_AUTOTREE} && defined($item) && !$item->isa("Parse::RecDescent::Action")) |
| 926 | { |
| 927 | if ($i==1 && $item->isterminal) |
| 928 | { |
| 929 | $code .= $parser->{_AUTOTREE}{TERMINAL}->code($namespace,$rule); |
| 930 | } |
| 931 | else |
| 932 | { |
| 933 | $code .= $parser->{_AUTOTREE}{NODE}->code($namespace,$rule); |
| 934 | } |
| 935 | Parse::RecDescent::_warn(1,"Autogenerating tree-building action in rule |
| 936 | \"$rule->{name}\"") |
| 937 | and |
| 938 | Parse::RecDescent::_hint("The directive <autotree> was specified, |
| 939 | so any production not ending |
| 940 | in an explicit action has |
| 941 | some parse-tree building code |
| 942 | automatically appended."); |
| 943 | } |
| 944 | |
| 945 | $code .= |
| 946 | ' |
| 947 | Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' production: [' |
| 948 | . $self->describe . ']<<}, |
| 949 | Parse::RecDescent::_tracefirst($text), |
| 950 | q{' . $rule->{name} . '}, |
| 951 | $tracelevel) |
| 952 | if defined $::RD_TRACE; |
| 953 | |
| 954 | ' . ( $parser->{_check}{itempos} ? ' |
| 955 | if ( defined($_itempos) ) |
| 956 | { |
| 957 | Parse::RecDescent::Production::_update_itempos($_itempos, $itempos[ 1], undef, [qw(from)]); |
| 958 | Parse::RecDescent::Production::_update_itempos($_itempos, $itempos[-1], undef, [qw(to)]); |
| 959 | } |
| 960 | ' : '' ) . ' |
| 961 | |
| 962 | $_matched = 1; |
| 963 | last; |
| 964 | } |
| 965 | |
| 966 | '; |
| 967 | return $code; |
| 968 | } |
| 969 | |
| 970 | 1; |
| 971 | |
| 972 | package Parse::RecDescent::Action; |
| 973 | |
| 974 | sub describe { undef } |
| 975 | |
| 976 | sub sethashname { $_[0]->{hashname} = '__ACTION' . ++$_[1]->{actcount} .'__'; } |
| 977 | |
| 978 | sub new |
| 979 | { |
| 980 | my $class = ref($_[0]) || $_[0]; |
| 981 | bless |
| 982 | { |
| 983 | "code" => $_[1], |
| 984 | "lookahead" => $_[2], |
| 985 | "line" => $_[3], |
| 986 | }, $class; |
| 987 | } |
| 988 | |
| 989 | sub issubrule { undef } |
| 990 | sub isterminal { 0 } |
| 991 | |
| 992 | sub code($$$$) |
| 993 | { |
| 994 | my ($self, $namespace, $rule) = @_; |
| 995 | |
| 996 | ' |
| 997 | Parse::RecDescent::_trace(q{Trying action}, |
| 998 | Parse::RecDescent::_tracefirst($text), |
| 999 | q{' . $rule->{name} . '}, |
| 1000 | $tracelevel) |
| 1001 | if defined $::RD_TRACE; |
| 1002 | ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' |
| 1003 | |
| 1004 | $_tok = ($_noactions) ? 0 : do ' . $self->{"code"} . '; |
| 1005 | ' . ($self->{"lookahead"}<0?'if':'unless') . ' (defined $_tok) |
| 1006 | { |
| 1007 | Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' action>> (return value: [undef])}) |
| 1008 | if defined $::RD_TRACE; |
| 1009 | last; |
| 1010 | } |
| 1011 | Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' action<< (return value: [} |
| 1012 | . $_tok . q{])}, |
| 1013 | Parse::RecDescent::_tracefirst($text)) |
| 1014 | if defined $::RD_TRACE; |
| 1015 | push @item, $_tok; |
| 1016 | ' . ($self->{line}>=0 ? '$item{'. $self->{hashname} .'}=$_tok;' : '' ) .' |
| 1017 | ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| 1018 | ' |
| 1019 | } |
| 1020 | |
| 1021 | |
| 1022 | 1; |
| 1023 | |
| 1024 | package Parse::RecDescent::Directive; |
| 1025 | |
| 1026 | sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } |
| 1027 | |
| 1028 | sub issubrule { undef } |
| 1029 | sub isterminal { 0 } |
| 1030 | sub describe { $_[1] ? '' : $_[0]->{name} } |
| 1031 | |
| 1032 | sub new ($$$$$) |
| 1033 | { |
| 1034 | my $class = ref($_[0]) || $_[0]; |
| 1035 | bless |
| 1036 | { |
| 1037 | "code" => $_[1], |
| 1038 | "lookahead" => $_[2], |
| 1039 | "line" => $_[3], |
| 1040 | "name" => $_[4], |
| 1041 | }, $class; |
| 1042 | } |
| 1043 | |
| 1044 | sub code($$$$) |
| 1045 | { |
| 1046 | my ($self, $namespace, $rule) = @_; |
| 1047 | |
| 1048 | ' |
| 1049 | ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' |
| 1050 | |
| 1051 | Parse::RecDescent::_trace(q{Trying directive: [' |
| 1052 | . $self->describe . ']}, |
| 1053 | Parse::RecDescent::_tracefirst($text), |
| 1054 | q{' . $rule->{name} . '}, |
| 1055 | $tracelevel) |
| 1056 | if defined $::RD_TRACE; ' .' |
| 1057 | $_tok = do { ' . $self->{"code"} . ' }; |
| 1058 | if (defined($_tok)) |
| 1059 | { |
| 1060 | Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' directive<< (return value: [} |
| 1061 | . $_tok . q{])}, |
| 1062 | Parse::RecDescent::_tracefirst($text)) |
| 1063 | if defined $::RD_TRACE; |
| 1064 | } |
| 1065 | else |
| 1066 | { |
| 1067 | Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' directive>>}, |
| 1068 | Parse::RecDescent::_tracefirst($text)) |
| 1069 | if defined $::RD_TRACE; |
| 1070 | } |
| 1071 | ' . ($self->{"lookahead"} ? '$text = $_savetext and ' : '' ) .' |
| 1072 | last ' |
| 1073 | . ($self->{"lookahead"}<0?'if':'unless') . ' defined $_tok; |
| 1074 | push @item, $item{'.$self->{hashname}.'}=$_tok; |
| 1075 | ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| 1076 | ' |
| 1077 | } |
| 1078 | |
| 1079 | 1; |
| 1080 | |
| 1081 | package Parse::RecDescent::UncondReject; |
| 1082 | |
| 1083 | sub issubrule { undef } |
| 1084 | sub isterminal { 0 } |
| 1085 | sub describe { $_[1] ? '' : $_[0]->{name} } |
| 1086 | sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } |
| 1087 | |
| 1088 | sub new ($$$;$) |
| 1089 | { |
| 1090 | my $class = ref($_[0]) || $_[0]; |
| 1091 | bless |
| 1092 | { |
| 1093 | "lookahead" => $_[1], |
| 1094 | "line" => $_[2], |
| 1095 | "name" => $_[3], |
| 1096 | }, $class; |
| 1097 | } |
| 1098 | |
| 1099 | # MARK, YOU MAY WANT TO OPTIMIZE THIS. |
| 1100 | |
| 1101 | |
| 1102 | sub code($$$$) |
| 1103 | { |
| 1104 | my ($self, $namespace, $rule) = @_; |
| 1105 | |
| 1106 | ' |
| 1107 | Parse::RecDescent::_trace(q{>>Rejecting production<< (found ' |
| 1108 | . $self->describe . ')}, |
| 1109 | Parse::RecDescent::_tracefirst($text), |
| 1110 | q{' . $rule->{name} . '}, |
| 1111 | $tracelevel) |
| 1112 | if defined $::RD_TRACE; |
| 1113 | undef $return; |
| 1114 | ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' |
| 1115 | |
| 1116 | $_tok = undef; |
| 1117 | ' . ($self->{"lookahead"} ? '$text = $_savetext and ' : '' ) .' |
| 1118 | last ' |
| 1119 | . ($self->{"lookahead"}<0?'if':'unless') . ' defined $_tok; |
| 1120 | ' |
| 1121 | } |
| 1122 | |
| 1123 | 1; |
| 1124 | |
| 1125 | package Parse::RecDescent::Error; |
| 1126 | |
| 1127 | sub issubrule { undef } |
| 1128 | sub isterminal { 0 } |
| 1129 | sub describe { $_[1] ? '' : $_[0]->{commitonly} ? '<error?:...>' : '<error...>' } |
| 1130 | sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } |
| 1131 | |
| 1132 | sub new ($$$$$) |
| 1133 | { |
| 1134 | my $class = ref($_[0]) || $_[0]; |
| 1135 | bless |
| 1136 | { |
| 1137 | "msg" => $_[1], |
| 1138 | "lookahead" => $_[2], |
| 1139 | "commitonly" => $_[3], |
| 1140 | "line" => $_[4], |
| 1141 | }, $class; |
| 1142 | } |
| 1143 | |
| 1144 | sub code($$$$) |
| 1145 | { |
| 1146 | my ($self, $namespace, $rule) = @_; |
| 1147 | |
| 1148 | my $action = ''; |
| 1149 | |
| 1150 | if ($self->{"msg"}) # ERROR MESSAGE SUPPLIED |
| 1151 | { |
| 1152 | #WAS: $action .= "Parse::RecDescent::_error(qq{$self->{msg}}" . ',$thisline);'; |
| 1153 | $action .= 'push @{$thisparser->{errors}}, [qq{'.$self->{msg}.'},$thisline];'; |
| 1154 | |
| 1155 | } |
| 1156 | else # GENERATE ERROR MESSAGE DURING PARSE |
| 1157 | { |
| 1158 | $action .= ' |
| 1159 | my $rule = $item[0]; |
| 1160 | $rule =~ s/_/ /g; |
| 1161 | #WAS: Parse::RecDescent::_error("Invalid $rule: " . $expectation->message() ,$thisline); |
| 1162 | push @{$thisparser->{errors}}, ["Invalid $rule: " . $expectation->message() ,$thisline]; |
| 1163 | '; |
| 1164 | } |
| 1165 | |
| 1166 | my $dir = |
| 1167 | new Parse::RecDescent::Directive('if (' . |
| 1168 | ($self->{"commitonly"} ? '$commit' : '1') . |
| 1169 | ") { do {$action} unless ".' $_noactions; undef } else {0}', |
| 1170 | $self->{"lookahead"},0,$self->describe); |
| 1171 | $dir->{hashname} = $self->{hashname}; |
| 1172 | return $dir->code($namespace, $rule, 0); |
| 1173 | } |
| 1174 | |
| 1175 | 1; |
| 1176 | |
| 1177 | package Parse::RecDescent::Token; |
| 1178 | |
| 1179 | sub sethashname { $_[0]->{hashname} = '__PATTERN' . ++$_[1]->{patcount} . '__'; } |
| 1180 | |
| 1181 | sub issubrule { undef } |
| 1182 | sub isterminal { 1 } |
| 1183 | sub describe ($) { shift->{'description'}} |
| 1184 | |
| 1185 | |
| 1186 | # ARGS ARE: $self, $pattern, $left_delim, $modifiers, $lookahead, $linenum |
| 1187 | sub new ($$$$$$) |
| 1188 | { |
| 1189 | my $class = ref($_[0]) || $_[0]; |
| 1190 | my $pattern = $_[1]; |
| 1191 | my $pat = $_[1]; |
| 1192 | my $ldel = $_[2]; |
| 1193 | my $rdel = $ldel; |
| 1194 | $rdel =~ tr/{[(</}])>/; |
| 1195 | |
| 1196 | my $mod = $_[3]; |
| 1197 | |
| 1198 | my $desc; |
| 1199 | |
| 1200 | if ($ldel eq '/') { $desc = "$ldel$pattern$rdel$mod" } |
| 1201 | else { $desc = "m$ldel$pattern$rdel$mod" } |
| 1202 | $desc =~ s/\\/\\\\/g; |
| 1203 | $desc =~ s/\$$/\\\$/g; |
| 1204 | $desc =~ s/}/\\}/g; |
| 1205 | $desc =~ s/{/\\{/g; |
| 1206 | |
| 1207 | if (!eval "no strict; |
| 1208 | local \$SIG{__WARN__} = sub {0}; |
| 1209 | '' =~ m$ldel$pattern$rdel$mod" and $@) |
| 1210 | { |
| 1211 | Parse::RecDescent::_warn(3, "Token pattern \"m$ldel$pattern$rdel$mod\" |
| 1212 | may not be a valid regular expression", |
| 1213 | $_[5]); |
| 1214 | $@ =~ s/ at \(eval.*/./; |
| 1215 | Parse::RecDescent::_hint($@); |
| 1216 | } |
| 1217 | |
| 1218 | # QUIETLY PREVENT (WELL-INTENTIONED) CALAMITY |
| 1219 | $mod =~ s/[gc]//g; |
| 1220 | $pattern =~ s/(\A|[^\\])\\G/$1/g; |
| 1221 | |
| 1222 | bless |
| 1223 | { |
| 1224 | "pattern" => $pattern, |
| 1225 | "ldelim" => $ldel, |
| 1226 | "rdelim" => $rdel, |
| 1227 | "mod" => $mod, |
| 1228 | "lookahead" => $_[4], |
| 1229 | "line" => $_[5], |
| 1230 | "description" => $desc, |
| 1231 | }, $class; |
| 1232 | } |
| 1233 | |
| 1234 | |
| 1235 | sub code($$$$$) |
| 1236 | { |
| 1237 | my ($self, $namespace, $rule, $check) = @_; |
| 1238 | my $ldel = $self->{"ldelim"}; |
| 1239 | my $rdel = $self->{"rdelim"}; |
| 1240 | my $sdel = $ldel; |
| 1241 | my $mod = $self->{"mod"}; |
| 1242 | |
| 1243 | $sdel =~ s/[[{(<]/{}/; |
| 1244 | |
| 1245 | my $code = ' |
| 1246 | Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe |
| 1247 | . ']}, Parse::RecDescent::_tracefirst($text), |
| 1248 | q{' . $rule->{name} . '}, |
| 1249 | $tracelevel) |
| 1250 | if defined $::RD_TRACE; |
| 1251 | undef $lastsep; |
| 1252 | $expectation->is(q{' . ($rule->hasleftmost($self) ? '' |
| 1253 | : $self->describe ) . '})->at($text); |
| 1254 | ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . ' |
| 1255 | |
| 1256 | ' . ($self->{"lookahead"}<0?'if':'unless') |
| 1257 | . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and ' |
| 1258 | . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '') |
| 1259 | . ' $text =~ m' . $ldel . '\A(?:' . $self->{"pattern"} . ')' . $rdel . $mod . ') |
| 1260 | { |
| 1261 | '.($self->{"lookahead"} ? '$text = $_savetext;' : '$text = $lastsep . $text if defined $lastsep;') . |
| 1262 | ($check->{itempos} ? Parse::RecDescent::Production::unincitempos() : '') . ' |
| 1263 | $expectation->failed(); |
| 1264 | Parse::RecDescent::_trace(q{<<Didn\'t match terminal>>}, |
| 1265 | Parse::RecDescent::_tracefirst($text)) |
| 1266 | if defined $::RD_TRACE; |
| 1267 | |
| 1268 | last; |
| 1269 | } |
| 1270 | $current_match = substr($text, $-[0], $+[0] - $-[0]); |
| 1271 | substr($text,0,length($current_match),q{}); |
| 1272 | Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [} |
| 1273 | . $current_match . q{])}, |
| 1274 | Parse::RecDescent::_tracefirst($text)) |
| 1275 | if defined $::RD_TRACE; |
| 1276 | push @item, $item{'.$self->{hashname}.'}=$current_match; |
| 1277 | ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| 1278 | '; |
| 1279 | |
| 1280 | return $code; |
| 1281 | } |
| 1282 | |
| 1283 | 1; |
| 1284 | |
| 1285 | package Parse::RecDescent::Literal; |
| 1286 | |
| 1287 | sub sethashname { $_[0]->{hashname} = '__STRING' . ++$_[1]->{strcount} . '__'; } |
| 1288 | |
| 1289 | sub issubrule { undef } |
| 1290 | sub isterminal { 1 } |
| 1291 | sub describe ($) { shift->{'description'} } |
| 1292 | |
| 1293 | sub new ($$$$) |
| 1294 | { |
| 1295 | my $class = ref($_[0]) || $_[0]; |
| 1296 | |
| 1297 | my $pattern = $_[1]; |
| 1298 | |
| 1299 | my $desc = $pattern; |
| 1300 | $desc=~s/\\/\\\\/g; |
| 1301 | $desc=~s/}/\\}/g; |
| 1302 | $desc=~s/{/\\{/g; |
| 1303 | |
| 1304 | bless |
| 1305 | { |
| 1306 | "pattern" => $pattern, |
| 1307 | "lookahead" => $_[2], |
| 1308 | "line" => $_[3], |
| 1309 | "description" => "'$desc'", |
| 1310 | }, $class; |
| 1311 | } |
| 1312 | |
| 1313 | |
| 1314 | sub code($$$$) |
| 1315 | { |
| 1316 | my ($self, $namespace, $rule, $check) = @_; |
| 1317 | |
| 1318 | my $code = ' |
| 1319 | Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe |
| 1320 | . ']}, |
| 1321 | Parse::RecDescent::_tracefirst($text), |
| 1322 | q{' . $rule->{name} . '}, |
| 1323 | $tracelevel) |
| 1324 | if defined $::RD_TRACE; |
| 1325 | undef $lastsep; |
| 1326 | $expectation->is(q{' . ($rule->hasleftmost($self) ? '' |
| 1327 | : $self->describe ) . '})->at($text); |
| 1328 | ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . ' |
| 1329 | |
| 1330 | ' . ($self->{"lookahead"}<0?'if':'unless') |
| 1331 | . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and ' |
| 1332 | . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '') |
| 1333 | . ' $text =~ m/\A' . quotemeta($self->{"pattern"}) . '/) |
| 1334 | { |
| 1335 | '.($self->{"lookahead"} ? '$text = $_savetext;' : '$text = $lastsep . $text if defined $lastsep;').' |
| 1336 | '. ($check->{itempos} ? Parse::RecDescent::Production::unincitempos() : '') . ' |
| 1337 | $expectation->failed(); |
| 1338 | Parse::RecDescent::_trace(qq{<<Didn\'t match terminal>>}, |
| 1339 | Parse::RecDescent::_tracefirst($text)) |
| 1340 | if defined $::RD_TRACE; |
| 1341 | last; |
| 1342 | } |
| 1343 | $current_match = substr($text, $-[0], $+[0] - $-[0]); |
| 1344 | substr($text,0,length($current_match),q{}); |
| 1345 | Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [} |
| 1346 | . $current_match . q{])}, |
| 1347 | Parse::RecDescent::_tracefirst($text)) |
| 1348 | if defined $::RD_TRACE; |
| 1349 | push @item, $item{'.$self->{hashname}.'}=$current_match; |
| 1350 | ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| 1351 | '; |
| 1352 | |
| 1353 | return $code; |
| 1354 | } |
| 1355 | |
| 1356 | 1; |
| 1357 | |
| 1358 | package Parse::RecDescent::InterpLit; |
| 1359 | |
| 1360 | sub sethashname { $_[0]->{hashname} = '__STRING' . ++$_[1]->{strcount} . '__'; } |
| 1361 | |
| 1362 | sub issubrule { undef } |
| 1363 | sub isterminal { 1 } |
| 1364 | sub describe ($) { shift->{'description'} } |
| 1365 | |
| 1366 | sub new ($$$$) |
| 1367 | { |
| 1368 | my $class = ref($_[0]) || $_[0]; |
| 1369 | |
| 1370 | my $pattern = $_[1]; |
| 1371 | $pattern =~ s#/#\\/#g; |
| 1372 | |
| 1373 | my $desc = $pattern; |
| 1374 | $desc=~s/\\/\\\\/g; |
| 1375 | $desc=~s/}/\\}/g; |
| 1376 | $desc=~s/{/\\{/g; |
| 1377 | |
| 1378 | bless |
| 1379 | { |
| 1380 | "pattern" => $pattern, |
| 1381 | "lookahead" => $_[2], |
| 1382 | "line" => $_[3], |
| 1383 | "description" => "'$desc'", |
| 1384 | }, $class; |
| 1385 | } |
| 1386 | |
| 1387 | sub code($$$$) |
| 1388 | { |
| 1389 | my ($self, $namespace, $rule, $check) = @_; |
| 1390 | |
| 1391 | my $code = ' |
| 1392 | Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe |
| 1393 | . ']}, |
| 1394 | Parse::RecDescent::_tracefirst($text), |
| 1395 | q{' . $rule->{name} . '}, |
| 1396 | $tracelevel) |
| 1397 | if defined $::RD_TRACE; |
| 1398 | undef $lastsep; |
| 1399 | $expectation->is(q{' . ($rule->hasleftmost($self) ? '' |
| 1400 | : $self->describe ) . '})->at($text); |
| 1401 | ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . ' |
| 1402 | |
| 1403 | ' . ($self->{"lookahead"}<0?'if':'unless') |
| 1404 | . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and ' |
| 1405 | . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '') |
| 1406 | . ' do { $_tok = "' . $self->{"pattern"} . '"; 1 } and |
| 1407 | substr($text,0,length($_tok)) eq $_tok and |
| 1408 | do { substr($text,0,length($_tok)) = ""; 1; } |
| 1409 | ) |
| 1410 | { |
| 1411 | '.($self->{"lookahead"} ? '$text = $_savetext;' : '$text = $lastsep . $text if defined $lastsep;').' |
| 1412 | '. ($check->{itempos} ? Parse::RecDescent::Production::unincitempos() : '') . ' |
| 1413 | $expectation->failed(); |
| 1414 | Parse::RecDescent::_trace(q{<<Didn\'t match terminal>>}, |
| 1415 | Parse::RecDescent::_tracefirst($text)) |
| 1416 | if defined $::RD_TRACE; |
| 1417 | last; |
| 1418 | } |
| 1419 | Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [} |
| 1420 | . $_tok . q{])}, |
| 1421 | Parse::RecDescent::_tracefirst($text)) |
| 1422 | if defined $::RD_TRACE; |
| 1423 | push @item, $item{'.$self->{hashname}.'}=$_tok; |
| 1424 | ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| 1425 | '; |
| 1426 | |
| 1427 | return $code; |
| 1428 | } |
| 1429 | |
| 1430 | 1; |
| 1431 | |
| 1432 | package Parse::RecDescent::Subrule; |
| 1433 | |
| 1434 | sub issubrule ($) { return $_[0]->{"subrule"} } |
| 1435 | sub isterminal { 0 } |
| 1436 | sub sethashname {} |
| 1437 | |
| 1438 | sub describe ($) |
| 1439 | { |
| 1440 | my $desc = $_[0]->{"implicit"} || $_[0]->{"subrule"}; |
| 1441 | $desc = "<matchrule:$desc>" if $_[0]->{"matchrule"}; |
| 1442 | return $desc; |
| 1443 | } |
| 1444 | |
| 1445 | sub callsyntax($$) |
| 1446 | { |
| 1447 | if ($_[0]->{"matchrule"}) |
| 1448 | { |
| 1449 | return "&{'$_[1]'.qq{$_[0]->{subrule}}}"; |
| 1450 | } |
| 1451 | else |
| 1452 | { |
| 1453 | return $_[1].$_[0]->{"subrule"}; |
| 1454 | } |
| 1455 | } |
| 1456 | |
| 1457 | sub new ($$$$;$$$) |
| 1458 | { |
| 1459 | my $class = ref($_[0]) || $_[0]; |
| 1460 | bless |
| 1461 | { |
| 1462 | "subrule" => $_[1], |
| 1463 | "lookahead" => $_[2], |
| 1464 | "line" => $_[3], |
| 1465 | "implicit" => $_[4] || undef, |
| 1466 | "matchrule" => $_[5], |
| 1467 | "argcode" => $_[6] || undef, |
| 1468 | }, $class; |
| 1469 | } |
| 1470 | |
| 1471 | |
| 1472 | sub code($$$$) |
| 1473 | { |
| 1474 | my ($self, $namespace, $rule, $check) = @_; |
| 1475 | |
| 1476 | ' |
| 1477 | Parse::RecDescent::_trace(q{Trying subrule: [' . $self->{"subrule"} . ']}, |
| 1478 | Parse::RecDescent::_tracefirst($text), |
| 1479 | q{' . $rule->{"name"} . '}, |
| 1480 | $tracelevel) |
| 1481 | if defined $::RD_TRACE; |
| 1482 | if (1) { no strict qw{refs}; |
| 1483 | $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}' |
| 1484 | # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text); |
| 1485 | : 'q{'.$self->describe.'}' ) . ')->at($text); |
| 1486 | ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) |
| 1487 | . ($self->{"lookahead"}<0?'if':'unless') |
| 1488 | . ' (defined ($_tok = ' |
| 1489 | . $self->callsyntax($namespace.'::') |
| 1490 | . '($thisparser,$text,$repeating,' |
| 1491 | . ($self->{"lookahead"}?'1':'$_noactions') |
| 1492 | . ($check->{"itempos"}?',$itempos[$#itempos]':',undef') |
| 1493 | . ($self->{argcode} ? ",sub { return $self->{argcode} }" |
| 1494 | : ',sub { \\@arg }') |
| 1495 | . '))) |
| 1496 | { |
| 1497 | '.($self->{"lookahead"} ? '$text = $_savetext;' : '').' |
| 1498 | Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' subrule: [' |
| 1499 | . $self->{subrule} . ']>>}, |
| 1500 | Parse::RecDescent::_tracefirst($text), |
| 1501 | q{' . $rule->{"name"} .'}, |
| 1502 | $tracelevel) |
| 1503 | if defined $::RD_TRACE; |
| 1504 | $expectation->failed(); |
| 1505 | last; |
| 1506 | } |
| 1507 | Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' subrule: [' |
| 1508 | . $self->{subrule} . ']<< (return value: [} |
| 1509 | . $_tok . q{]}, |
| 1510 | |
| 1511 | Parse::RecDescent::_tracefirst($text), |
| 1512 | q{' . $rule->{"name"} .'}, |
| 1513 | $tracelevel) |
| 1514 | if defined $::RD_TRACE; |
| 1515 | $item{q{' . $self->{subrule} . '}} = $_tok; |
| 1516 | push @item, $_tok; |
| 1517 | ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| 1518 | } |
| 1519 | ' |
| 1520 | } |
| 1521 | |
| 1522 | package Parse::RecDescent::Repetition; |
| 1523 | |
| 1524 | sub issubrule ($) { return $_[0]->{"subrule"} } |
| 1525 | sub isterminal { 0 } |
| 1526 | sub sethashname { } |
| 1527 | |
| 1528 | sub describe ($) |
| 1529 | { |
| 1530 | my $desc = $_[0]->{"expected"} || $_[0]->{"subrule"}; |
| 1531 | $desc = "<matchrule:$desc>" if $_[0]->{"matchrule"}; |
| 1532 | return $desc; |
| 1533 | } |
| 1534 | |
| 1535 | sub callsyntax($$) |
| 1536 | { |
| 1537 | if ($_[0]->{matchrule}) |
| 1538 | { return "sub { goto &{''.qq{$_[1]$_[0]->{subrule}}} }"; } |
| 1539 | else |
| 1540 | { return "\\&$_[1]$_[0]->{subrule}"; } |
| 1541 | } |
| 1542 | |
| 1543 | sub new ($$$$$$$$$$) |
| 1544 | { |
| 1545 | my ($self, $subrule, $repspec, $min, $max, $lookahead, $line, $parser, $matchrule, $argcode) = @_; |
| 1546 | my $class = ref($self) || $self; |
| 1547 | ($max, $min) = ( $min, $max) if ($max<$min); |
| 1548 | |
| 1549 | my $desc; |
| 1550 | if ($subrule=~/\A_alternation_\d+_of_production_\d+_of_rule/) |
| 1551 | { $desc = $parser->{"rules"}{$subrule}->expected } |
| 1552 | |
| 1553 | if ($lookahead) |
| 1554 | { |
| 1555 | if ($min>0) |
| 1556 | { |
| 1557 | return new Parse::RecDescent::Subrule($subrule,$lookahead,$line,$desc,$matchrule,$argcode); |
| 1558 | } |
| 1559 | else |
| 1560 | { |
| 1561 | Parse::RecDescent::_error("Not symbol (\"!\") before |
| 1562 | \"$subrule\" doesn't make |
| 1563 | sense.",$line); |
| 1564 | Parse::RecDescent::_hint("Lookahead for negated optional |
| 1565 | repetitions (such as |
| 1566 | \"!$subrule($repspec)\" can never |
| 1567 | succeed, since optional items always |
| 1568 | match (zero times at worst). |
| 1569 | Did you mean a single \"!$subrule\", |
| 1570 | instead?"); |
| 1571 | } |
| 1572 | } |
| 1573 | bless |
| 1574 | { |
| 1575 | "subrule" => $subrule, |
| 1576 | "repspec" => $repspec, |
| 1577 | "min" => $min, |
| 1578 | "max" => $max, |
| 1579 | "lookahead" => $lookahead, |
| 1580 | "line" => $line, |
| 1581 | "expected" => $desc, |
| 1582 | "argcode" => $argcode || undef, |
| 1583 | "matchrule" => $matchrule, |
| 1584 | }, $class; |
| 1585 | } |
| 1586 | |
| 1587 | sub code($$$$) |
| 1588 | { |
| 1589 | my ($self, $namespace, $rule, $check) = @_; |
| 1590 | |
| 1591 | my ($subrule, $repspec, $min, $max, $lookahead) = |
| 1592 | @{$self}{ qw{subrule repspec min max lookahead} }; |
| 1593 | |
| 1594 | ' |
| 1595 | Parse::RecDescent::_trace(q{Trying repeated subrule: [' . $self->describe . ']}, |
| 1596 | Parse::RecDescent::_tracefirst($text), |
| 1597 | q{' . $rule->{"name"} . '}, |
| 1598 | $tracelevel) |
| 1599 | if defined $::RD_TRACE; |
| 1600 | $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}' |
| 1601 | # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text); |
| 1602 | : 'q{'.$self->describe.'}' ) . ')->at($text); |
| 1603 | ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' |
| 1604 | unless (defined ($_tok = $thisparser->_parserepeat($text, ' |
| 1605 | . $self->callsyntax($namespace.'::') |
| 1606 | . ', ' . $min . ', ' . $max . ', ' |
| 1607 | . ($self->{"lookahead"}?'1':'$_noactions') |
| 1608 | . ($check->{"itempos"}?',$itempos[$#itempos]':',undef') |
| 1609 | . ',$expectation,' |
| 1610 | . ($self->{argcode} ? "sub { return $self->{argcode} }" |
| 1611 | : 'sub { \\@arg }') |
| 1612 | . '))) |
| 1613 | { |
| 1614 | Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' repeated subrule: [' |
| 1615 | . $self->describe . ']>>}, |
| 1616 | Parse::RecDescent::_tracefirst($text), |
| 1617 | q{' . $rule->{"name"} .'}, |
| 1618 | $tracelevel) |
| 1619 | if defined $::RD_TRACE; |
| 1620 | last; |
| 1621 | } |
| 1622 | Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' repeated subrule: [' |
| 1623 | . $self->{subrule} . ']<< (} |
| 1624 | . @$_tok . q{ times)}, |
| 1625 | |
| 1626 | Parse::RecDescent::_tracefirst($text), |
| 1627 | q{' . $rule->{"name"} .'}, |
| 1628 | $tracelevel) |
| 1629 | if defined $::RD_TRACE; |
| 1630 | $item{q{' . "$self->{subrule}($self->{repspec})" . '}} = $_tok; |
| 1631 | push @item, $_tok; |
| 1632 | ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| 1633 | |
| 1634 | ' |
| 1635 | } |
| 1636 | |
| 1637 | package Parse::RecDescent::Result; |
| 1638 | |
| 1639 | sub issubrule { 0 } |
| 1640 | sub isterminal { 0 } |
| 1641 | sub describe { '' } |
| 1642 | |
| 1643 | sub new |
| 1644 | { |
| 1645 | my ($class, $pos) = @_; |
| 1646 | |
| 1647 | bless {}, $class; |
| 1648 | } |
| 1649 | |
| 1650 | sub code($$$$) |
| 1651 | { |
| 1652 | my ($self, $namespace, $rule) = @_; |
| 1653 | |
| 1654 | ' |
| 1655 | $return = $item[-1]; |
| 1656 | '; |
| 1657 | } |
| 1658 | |
| 1659 | package Parse::RecDescent::Operator; |
| 1660 | |
| 1661 | my @opertype = ( " non-optional", "n optional" ); |
| 1662 | |
| 1663 | sub issubrule { 0 } |
| 1664 | sub isterminal { 0 } |
| 1665 | |
| 1666 | sub describe { $_[0]->{"expected"} } |
| 1667 | sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } |
| 1668 | |
| 1669 | |
| 1670 | sub new |
| 1671 | { |
| 1672 | my ($class, $type, $minrep, $maxrep, $leftarg, $op, $rightarg) = @_; |
| 1673 | |
| 1674 | bless |
| 1675 | { |
| 1676 | "type" => "${type}op", |
| 1677 | "leftarg" => $leftarg, |
| 1678 | "op" => $op, |
| 1679 | "min" => $minrep, |
| 1680 | "max" => $maxrep, |
| 1681 | "rightarg" => $rightarg, |
| 1682 | "expected" => "<${type}op: ".$leftarg->describe." ".$op->describe." ".$rightarg->describe.">", |
| 1683 | }, $class; |
| 1684 | } |
| 1685 | |
| 1686 | sub code($$$$) |
| 1687 | { |
| 1688 | my ($self, $namespace, $rule, $check) = @_; |
| 1689 | |
| 1690 | my @codeargs = @_[1..$#_]; |
| 1691 | |
| 1692 | my ($leftarg, $op, $rightarg) = |
| 1693 | @{$self}{ qw{leftarg op rightarg} }; |
| 1694 | |
| 1695 | my $code = ' |
| 1696 | Parse::RecDescent::_trace(q{Trying operator: [' . $self->describe . ']}, |
| 1697 | Parse::RecDescent::_tracefirst($text), |
| 1698 | q{' . $rule->{"name"} . '}, |
| 1699 | $tracelevel) |
| 1700 | if defined $::RD_TRACE; |
| 1701 | $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}' |
| 1702 | # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text); |
| 1703 | : 'q{'.$self->describe.'}' ) . ')->at($text); |
| 1704 | |
| 1705 | $_tok = undef; |
| 1706 | OPLOOP: while (1) |
| 1707 | { |
| 1708 | $repcount = 0; |
| 1709 | my @item; |
| 1710 | my %item; |
| 1711 | '; |
| 1712 | |
| 1713 | $code .= ' |
| 1714 | my $_itempos = $itempos[-1]; |
| 1715 | my $itemposfirst; |
| 1716 | ' if $check->{itempos}; |
| 1717 | |
| 1718 | if ($self->{type} eq "leftop" ) |
| 1719 | { |
| 1720 | $code .= ' |
| 1721 | # MATCH LEFTARG |
| 1722 | ' . $leftarg->code(@codeargs) . ' |
| 1723 | |
| 1724 | '; |
| 1725 | |
| 1726 | $code .= ' |
| 1727 | if (defined($_itempos) and !defined($itemposfirst)) |
| 1728 | { |
| 1729 | $itemposfirst = Parse::RecDescent::Production::_duplicate_itempos($_itempos); |
| 1730 | } |
| 1731 | ' if $check->{itempos}; |
| 1732 | |
| 1733 | $code .= ' |
| 1734 | $repcount++; |
| 1735 | |
| 1736 | my $savetext = $text; |
| 1737 | my $backtrack; |
| 1738 | |
| 1739 | # MATCH (OP RIGHTARG)(s) |
| 1740 | while ($repcount < ' . $self->{max} . ') |
| 1741 | { |
| 1742 | $backtrack = 0; |
| 1743 | ' . $op->code(@codeargs) . ' |
| 1744 | ' . ($op->isterminal() ? 'pop @item;' : '$backtrack=1;' ) . ' |
| 1745 | ' . (ref($op) eq 'Parse::RecDescent::Token' |
| 1746 | ? 'if (defined $1) {push @item, $item{'.($self->{name}||$self->{hashname}).'}=$1; $backtrack=1;}' |
| 1747 | : "" ) . ' |
| 1748 | ' . $rightarg->code(@codeargs) . ' |
| 1749 | $savetext = $text; |
| 1750 | $repcount++; |
| 1751 | } |
| 1752 | $text = $savetext; |
| 1753 | pop @item if $backtrack; |
| 1754 | |
| 1755 | '; |
| 1756 | } |
| 1757 | else |
| 1758 | { |
| 1759 | $code .= ' |
| 1760 | my $savetext = $text; |
| 1761 | my $backtrack; |
| 1762 | # MATCH (LEFTARG OP)(s) |
| 1763 | while ($repcount < ' . $self->{max} . ') |
| 1764 | { |
| 1765 | $backtrack = 0; |
| 1766 | ' . $leftarg->code(@codeargs) . ' |
| 1767 | '; |
| 1768 | $code .= ' |
| 1769 | if (defined($_itempos) and !defined($itemposfirst)) |
| 1770 | { |
| 1771 | $itemposfirst = Parse::RecDescent::Production::_duplicate_itempos($_itempos); |
| 1772 | } |
| 1773 | ' if $check->{itempos}; |
| 1774 | |
| 1775 | $code .= ' |
| 1776 | $repcount++; |
| 1777 | $backtrack = 1; |
| 1778 | ' . $op->code(@codeargs) . ' |
| 1779 | $savetext = $text; |
| 1780 | ' . ($op->isterminal() ? 'pop @item;' : "" ) . ' |
| 1781 | ' . (ref($op) eq 'Parse::RecDescent::Token' ? 'do { push @item, $item{'.($self->{name}||$self->{hashname}).'}=$1; } if defined $1;' : "" ) . ' |
| 1782 | } |
| 1783 | $text = $savetext; |
| 1784 | pop @item if $backtrack; |
| 1785 | |
| 1786 | # MATCH RIGHTARG |
| 1787 | ' . $rightarg->code(@codeargs) . ' |
| 1788 | $repcount++; |
| 1789 | '; |
| 1790 | } |
| 1791 | |
| 1792 | $code .= 'unless (@item) { undef $_tok; last }' unless $self->{min}==0; |
| 1793 | |
| 1794 | $code .= ' |
| 1795 | $_tok = [ @item ]; |
| 1796 | '; |
| 1797 | |
| 1798 | |
| 1799 | $code .= ' |
| 1800 | if (defined $itemposfirst) |
| 1801 | { |
| 1802 | Parse::RecDescent::Production::_update_itempos( |
| 1803 | $_itempos, $itemposfirst, undef, [qw(from)]); |
| 1804 | } |
| 1805 | ' if $check->{itempos}; |
| 1806 | |
| 1807 | $code .= ' |
| 1808 | last; |
| 1809 | } # end of OPLOOP |
| 1810 | '; |
| 1811 | |
| 1812 | $code .= ' |
| 1813 | unless ($repcount>='.$self->{min}.') |
| 1814 | { |
| 1815 | Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' operator: [' |
| 1816 | . $self->describe |
| 1817 | . ']>>}, |
| 1818 | Parse::RecDescent::_tracefirst($text), |
| 1819 | q{' . $rule->{"name"} .'}, |
| 1820 | $tracelevel) |
| 1821 | if defined $::RD_TRACE; |
| 1822 | $expectation->failed(); |
| 1823 | last; |
| 1824 | } |
| 1825 | Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' operator: [' |
| 1826 | . $self->describe |
| 1827 | . ']<< (return value: [} |
| 1828 | . qq{@{$_tok||[]}} . q{]}, |
| 1829 | Parse::RecDescent::_tracefirst($text), |
| 1830 | q{' . $rule->{"name"} .'}, |
| 1831 | $tracelevel) |
| 1832 | if defined $::RD_TRACE; |
| 1833 | |
| 1834 | push @item, $item{'.($self->{name}||$self->{hashname}).'}=$_tok||[]; |
| 1835 | '; |
| 1836 | |
| 1837 | return $code; |
| 1838 | } |
| 1839 | |
| 1840 | |
| 1841 | package Parse::RecDescent::Expectation; |
| 1842 | |
| 1843 | sub new ($) |
| 1844 | { |
| 1845 | bless { |
| 1846 | "failed" => 0, |
| 1847 | "expected" => "", |
| 1848 | "unexpected" => "", |
| 1849 | "lastexpected" => "", |
| 1850 | "lastunexpected" => "", |
| 1851 | "defexpected" => $_[1], |
| 1852 | }; |
| 1853 | } |
| 1854 | |
| 1855 | sub is ($$) |
| 1856 | { |
| 1857 | $_[0]->{lastexpected} = $_[1]; return $_[0]; |
| 1858 | } |
| 1859 | |
| 1860 | sub at ($$) |
| 1861 | { |
| 1862 | $_[0]->{lastunexpected} = $_[1]; return $_[0]; |
| 1863 | } |
| 1864 | |
| 1865 | sub failed ($) |
| 1866 | { |
| 1867 | return unless $_[0]->{lastexpected}; |
| 1868 | $_[0]->{expected} = $_[0]->{lastexpected} unless $_[0]->{failed}; |
| 1869 | $_[0]->{unexpected} = $_[0]->{lastunexpected} unless $_[0]->{failed}; |
| 1870 | $_[0]->{failed} = 1; |
| 1871 | } |
| 1872 | |
| 1873 | sub message ($) |
| 1874 | { |
| 1875 | my ($self) = @_; |
| 1876 | $self->{expected} = $self->{defexpected} unless $self->{expected}; |
| 1877 | $self->{expected} =~ s/_/ /g; |
| 1878 | if (!$self->{unexpected} || $self->{unexpected} =~ /\A\s*\Z/s) |
| 1879 | { |
| 1880 | return "Was expecting $self->{expected}"; |
| 1881 | } |
| 1882 | else |
| 1883 | { |
| 1884 | $self->{unexpected} =~ /\s*(.*)/; |
| 1885 | return "Was expecting $self->{expected} but found \"$1\" instead"; |
| 1886 | } |
| 1887 | } |
| 1888 | |
| 1889 | 1; |
| 1890 | |
| 1891 | package Parse::RecDescent; |
| 1892 | |
| 1893 | use Carp; |
| 1894 | use vars qw ( $AUTOLOAD $VERSION $_FILENAME); |
| 1895 | |
| 1896 | my $ERRORS = 0; |
| 1897 | |
| 1898 | our $VERSION = '1.967006'; |
| 1899 | $VERSION = eval $VERSION; |
| 1900 | $_FILENAME=__FILE__; |
| 1901 | |
| 1902 | # BUILDING A PARSER |
| 1903 | |
| 1904 | my $nextnamespace = "namespace000001"; |
| 1905 | |
| 1906 | sub _nextnamespace() |
| 1907 | { |
| 1908 | return "Parse::RecDescent::" . $nextnamespace++; |
| 1909 | } |
| 1910 | |
| 1911 | # ARGS ARE: $class, $grammar, $compiling, $namespace |
| 1912 | sub new ($$$$) |
| 1913 | { |
| 1914 | my $class = ref($_[0]) || $_[0]; |
| 1915 | local $Parse::RecDescent::compiling = $_[2]; |
| 1916 | my $name_space_name = defined $_[3] |
| 1917 | ? "Parse::RecDescent::".$_[3] |
| 1918 | : _nextnamespace(); |
| 1919 | my $self = |
| 1920 | { |
| 1921 | "rules" => {}, |
| 1922 | "namespace" => $name_space_name, |
| 1923 | "startcode" => '', |
| 1924 | "localvars" => '', |
| 1925 | "_AUTOACTION" => undef, |
| 1926 | "_AUTOTREE" => undef, |
| 1927 | }; |
| 1928 | |
| 1929 | |
| 1930 | if ($::RD_AUTOACTION) { |
| 1931 | my $sourcecode = $::RD_AUTOACTION; |
| 1932 | $sourcecode = "{ $sourcecode }" |
| 1933 | unless $sourcecode =~ /\A\s*\{.*\}\s*\Z/; |
| 1934 | $self->{_check}{itempos} = |
| 1935 | $sourcecode =~ /\@itempos\b|\$itempos\s*\[/; |
| 1936 | $self->{_AUTOACTION} |
| 1937 | = new Parse::RecDescent::Action($sourcecode,0,-1) |
| 1938 | } |
| 1939 | |
| 1940 | bless $self, $class; |
| 1941 | return $self->Replace($_[1]) |
| 1942 | } |
| 1943 | |
| 1944 | sub Compile($$$$) { |
| 1945 | die "Compilation of Parse::RecDescent grammars not yet implemented\n"; |
| 1946 | } |
| 1947 | |
| 1948 | sub DESTROY { |
| 1949 | my ($self) = @_; |
| 1950 | my $namespace = $self->{namespace}; |
| 1951 | $namespace =~ s/Parse::RecDescent:://; |
| 1952 | if (!$self->{_precompiled}) { |
| 1953 | delete $Parse::RecDescent::{$namespace.'::'}; |
| 1954 | } |
| 1955 | } |
| 1956 | |
| 1957 | # BUILDING A GRAMMAR.... |
| 1958 | |
| 1959 | # ARGS ARE: $self, $grammar, $isimplicit, $isleftop |
| 1960 | sub Replace ($$) |
| 1961 | { |
| 1962 | # set $replace = 1 for _generate |
| 1963 | splice(@_, 2, 0, 1); |
| 1964 | |
| 1965 | return _generate(@_); |
| 1966 | } |
| 1967 | |
| 1968 | # ARGS ARE: $self, $grammar, $isimplicit, $isleftop |
| 1969 | sub Extend ($$) |
| 1970 | { |
| 1971 | # set $replace = 0 for _generate |
| 1972 | splice(@_, 2, 0, 0); |
| 1973 | |
| 1974 | return _generate(@_); |
| 1975 | } |
| 1976 | |
| 1977 | sub _no_rule ($$;$) |
| 1978 | { |
| 1979 | _error("Ruleless $_[0] at start of grammar.",$_[1]); |
| 1980 | my $desc = $_[2] ? "\"$_[2]\"" : ""; |
| 1981 | _hint("You need to define a rule for the $_[0] $desc |
| 1982 | to be part of."); |
| 1983 | } |
| 1984 | |
| 1985 | my $NEGLOOKAHEAD = '\G(\s*\.\.\.\!)'; |
| 1986 | my $POSLOOKAHEAD = '\G(\s*\.\.\.)'; |
| 1987 | my $RULE = '\G\s*(\w+)[ \t]*:'; |
| 1988 | my $PROD = '\G\s*([|])'; |
| 1989 | my $TOKEN = q{\G\s*/((\\\\/|\\\\\\\\|[^/])*)/([cgimsox]*)}; |
| 1990 | my $MTOKEN = q{\G\s*(m\s*[^\w\s])}; |
| 1991 | my $LITERAL = q{\G\s*'((\\\\['\\\\]|[^'])*)'}; |
| 1992 | my $INTERPLIT = q{\G\s*"((\\\\["\\\\]|[^"])*)"}; |
| 1993 | my $SUBRULE = '\G\s*(\w+)'; |
| 1994 | my $MATCHRULE = '\G(\s*<matchrule:)'; |
| 1995 | my $SIMPLEPAT = '((\\s+/[^/\\\\]*(?:\\\\.[^/\\\\]*)*/)?)'; |
| 1996 | my $OPTIONAL = '\G\((\?)'.$SIMPLEPAT.'\)'; |
| 1997 | my $ANY = '\G\((s\?)'.$SIMPLEPAT.'\)'; |
| 1998 | my $MANY = '\G\((s|\.\.)'.$SIMPLEPAT.'\)'; |
| 1999 | my $EXACTLY = '\G\(([1-9]\d*)'.$SIMPLEPAT.'\)'; |
| 2000 | my $BETWEEN = '\G\((\d+)\.\.([1-9]\d*)'.$SIMPLEPAT.'\)'; |
| 2001 | my $ATLEAST = '\G\((\d+)\.\.'.$SIMPLEPAT.'\)'; |
| 2002 | my $ATMOST = '\G\(\.\.([1-9]\d*)'.$SIMPLEPAT.'\)'; |
| 2003 | my $BADREP = '\G\((-?\d+)?\.\.(-?\d+)?'.$SIMPLEPAT.'\)'; |
| 2004 | my $ACTION = '\G\s*\{'; |
| 2005 | my $IMPLICITSUBRULE = '\G\s*\('; |
| 2006 | my $COMMENT = '\G\s*(#.*)'; |
| 2007 | my $COMMITMK = '\G\s*<commit>'; |
| 2008 | my $UNCOMMITMK = '\G\s*<uncommit>'; |
| 2009 | my $QUOTELIKEMK = '\G\s*<perl_quotelike>'; |
| 2010 | my $CODEBLOCKMK = '\G\s*<perl_codeblock(?:\s+([][()<>{}]+))?>'; |
| 2011 | my $VARIABLEMK = '\G\s*<perl_variable>'; |
| 2012 | my $NOCHECKMK = '\G\s*<nocheck>'; |
| 2013 | my $AUTOACTIONPATMK = '\G\s*<autoaction:'; |
| 2014 | my $AUTOTREEMK = '\G\s*<autotree(?::\s*([\w:]+)\s*)?>'; |
| 2015 | my $AUTOSTUBMK = '\G\s*<autostub>'; |
| 2016 | my $AUTORULEMK = '\G\s*<autorule:(.*?)>'; |
| 2017 | my $REJECTMK = '\G\s*<reject>'; |
| 2018 | my $CONDREJECTMK = '\G\s*<reject:'; |
| 2019 | my $SCOREMK = '\G\s*<score:'; |
| 2020 | my $AUTOSCOREMK = '\G\s*<autoscore:'; |
| 2021 | my $SKIPMK = '\G\s*<skip:'; |
| 2022 | my $OPMK = '\G\s*<(left|right)op(?:=(\'.*?\'))?:'; |
| 2023 | my $ENDDIRECTIVEMK = '\G\s*>'; |
| 2024 | my $RESYNCMK = '\G\s*<resync>'; |
| 2025 | my $RESYNCPATMK = '\G\s*<resync:'; |
| 2026 | my $RULEVARPATMK = '\G\s*<rulevar:'; |
| 2027 | my $DEFERPATMK = '\G\s*<defer:'; |
| 2028 | my $TOKENPATMK = '\G\s*<token:'; |
| 2029 | my $AUTOERRORMK = '\G\s*<error(\??)>'; |
| 2030 | my $MSGERRORMK = '\G\s*<error(\??):'; |
| 2031 | my $NOCHECK = '\G\s*<nocheck>'; |
| 2032 | my $WARNMK = '\G\s*<warn((?::\s*(\d+)\s*)?)>'; |
| 2033 | my $HINTMK = '\G\s*<hint>'; |
| 2034 | my $TRACEBUILDMK = '\G\s*<trace_build((?::\s*(\d+)\s*)?)>'; |
| 2035 | my $TRACEPARSEMK = '\G\s*<trace_parse((?::\s*(\d+)\s*)?)>'; |
| 2036 | my $UNCOMMITPROD = $PROD.'\s*<uncommit'; |
| 2037 | my $ERRORPROD = $PROD.'\s*<error'; |
| 2038 | my $LONECOLON = '\G\s*:'; |
| 2039 | my $OTHER = '\G\s*([^\s]+)'; |
| 2040 | |
| 2041 | my @lines = 0; |
| 2042 | |
| 2043 | sub _generate |
| 2044 | { |
| 2045 | my ($self, $grammar, $replace, $isimplicit, $isleftop) = (@_, 0); |
| 2046 | |
| 2047 | my $aftererror = 0; |
| 2048 | my $lookahead = 0; |
| 2049 | my $lookaheadspec = ""; |
| 2050 | my $must_pop_lines; |
| 2051 | if (! $lines[-1]) { |
| 2052 | push @lines, _linecount($grammar) ; |
| 2053 | $must_pop_lines = 1; |
| 2054 | } |
| 2055 | $self->{_check}{itempos} = ($grammar =~ /\@itempos\b|\$itempos\s*\[/) |
| 2056 | unless $self->{_check}{itempos}; |
| 2057 | for (qw(thisoffset thiscolumn prevline prevoffset prevcolumn)) |
| 2058 | { |
| 2059 | $self->{_check}{$_} = |
| 2060 | ($grammar =~ /\$$_/) || $self->{_check}{itempos} |
| 2061 | unless $self->{_check}{$_}; |
| 2062 | } |
| 2063 | my $line; |
| 2064 | |
| 2065 | my $rule = undef; |
| 2066 | my $prod = undef; |
| 2067 | my $item = undef; |
| 2068 | my $lastgreedy = ''; |
| 2069 | pos $grammar = 0; |
| 2070 | study $grammar; |
| 2071 | |
| 2072 | local $::RD_HINT = $::RD_HINT; |
| 2073 | local $::RD_WARN = $::RD_WARN; |
| 2074 | local $::RD_TRACE = $::RD_TRACE; |
| 2075 | local $::RD_CHECK = $::RD_CHECK; |
| 2076 | |
| 2077 | while (pos $grammar < length $grammar) |
| 2078 | { |
| 2079 | $line = $lines[-1] - _linecount($grammar) + 1; |
| 2080 | my $commitonly; |
| 2081 | my $code = ""; |
| 2082 | my @components = (); |
| 2083 | if ($grammar =~ m/$COMMENT/gco) |
| 2084 | { |
| 2085 | _parse("a comment",0,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2086 | next; |
| 2087 | } |
| 2088 | elsif ($grammar =~ m/$NEGLOOKAHEAD/gco) |
| 2089 | { |
| 2090 | _parse("a negative lookahead",$aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2091 | $lookahead = $lookahead ? -$lookahead : -1; |
| 2092 | $lookaheadspec .= $1; |
| 2093 | next; # SKIP LOOKAHEAD RESET AT END OF while LOOP |
| 2094 | } |
| 2095 | elsif ($grammar =~ m/$POSLOOKAHEAD/gco) |
| 2096 | { |
| 2097 | _parse("a positive lookahead",$aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2098 | $lookahead = $lookahead ? $lookahead : 1; |
| 2099 | $lookaheadspec .= $1; |
| 2100 | next; # SKIP LOOKAHEAD RESET AT END OF while LOOP |
| 2101 | } |
| 2102 | elsif ($grammar =~ m/(?=$ACTION)/gco |
| 2103 | and do { ($code) = extract_codeblock($grammar); $code }) |
| 2104 | { |
| 2105 | _parse("an action", $aftererror, $line, $code); |
| 2106 | $item = new Parse::RecDescent::Action($code,$lookahead,$line); |
| 2107 | $prod and $prod->additem($item) |
| 2108 | or $self->_addstartcode($code); |
| 2109 | } |
| 2110 | elsif ($grammar =~ m/(?=$IMPLICITSUBRULE)/gco |
| 2111 | and do { ($code) = extract_codeblock($grammar,'{([',undef,'(',1); |
| 2112 | $code }) |
| 2113 | { |
| 2114 | $code =~ s/\A\s*\(|\)\Z//g; |
| 2115 | _parse("an implicit subrule", $aftererror, $line, |
| 2116 | "( $code )"); |
| 2117 | my $implicit = $rule->nextimplicit; |
| 2118 | return undef |
| 2119 | if !$self->_generate("$implicit : $code",$replace,1); |
| 2120 | my $pos = pos $grammar; |
| 2121 | substr($grammar,$pos,0,$implicit); |
| 2122 | pos $grammar = $pos;; |
| 2123 | } |
| 2124 | elsif ($grammar =~ m/$ENDDIRECTIVEMK/gco) |
| 2125 | { |
| 2126 | |
| 2127 | # EXTRACT TRAILING REPETITION SPECIFIER (IF ANY) |
| 2128 | |
| 2129 | my ($minrep,$maxrep) = (1,$MAXREP); |
| 2130 | if ($grammar =~ m/\G[(]/gc) |
| 2131 | { |
| 2132 | pos($grammar)--; |
| 2133 | |
| 2134 | if ($grammar =~ m/$OPTIONAL/gco) |
| 2135 | { ($minrep, $maxrep) = (0,1) } |
| 2136 | elsif ($grammar =~ m/$ANY/gco) |
| 2137 | { $minrep = 0 } |
| 2138 | elsif ($grammar =~ m/$EXACTLY/gco) |
| 2139 | { ($minrep, $maxrep) = ($1,$1) } |
| 2140 | elsif ($grammar =~ m/$BETWEEN/gco) |
| 2141 | { ($minrep, $maxrep) = ($1,$2) } |
| 2142 | elsif ($grammar =~ m/$ATLEAST/gco) |
| 2143 | { $minrep = $1 } |
| 2144 | elsif ($grammar =~ m/$ATMOST/gco) |
| 2145 | { $maxrep = $1 } |
| 2146 | elsif ($grammar =~ m/$MANY/gco) |
| 2147 | { } |
| 2148 | elsif ($grammar =~ m/$BADREP/gco) |
| 2149 | { |
| 2150 | _parse("an invalid repetition specifier", 0,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2151 | _error("Incorrect specification of a repeated directive", |
| 2152 | $line); |
| 2153 | _hint("Repeated directives cannot have |
| 2154 | a maximum repetition of zero, nor can they have |
| 2155 | negative components in their ranges."); |
| 2156 | } |
| 2157 | } |
| 2158 | |
| 2159 | $prod && $prod->enddirective($line,$minrep,$maxrep); |
| 2160 | } |
| 2161 | elsif ($grammar =~ m/\G\s*<[^m]/gc) |
| 2162 | { |
| 2163 | pos($grammar)-=2; |
| 2164 | |
| 2165 | if ($grammar =~ m/$OPMK/gco) |
| 2166 | { |
| 2167 | # $DB::single=1; |
| 2168 | _parse("a $1-associative operator directive", $aftererror, $line, "<$1op:...>"); |
| 2169 | $prod->adddirective($1, $line,$2||''); |
| 2170 | } |
| 2171 | elsif ($grammar =~ m/$UNCOMMITMK/gco) |
| 2172 | { |
| 2173 | _parse("an uncommit marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2174 | $item = new Parse::RecDescent::Directive('$commit=0;1', |
| 2175 | $lookahead,$line,"<uncommit>"); |
| 2176 | $prod and $prod->additem($item) |
| 2177 | or _no_rule("<uncommit>",$line); |
| 2178 | } |
| 2179 | elsif ($grammar =~ m/$QUOTELIKEMK/gco) |
| 2180 | { |
| 2181 | _parse("an perl quotelike marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2182 | $item = new Parse::RecDescent::Directive( |
| 2183 | 'my ($match,@res); |
| 2184 | ($match,$text,undef,@res) = |
| 2185 | Text::Balanced::extract_quotelike($text,$skip); |
| 2186 | $match ? \@res : undef; |
| 2187 | ', $lookahead,$line,"<perl_quotelike>"); |
| 2188 | $prod and $prod->additem($item) |
| 2189 | or _no_rule("<perl_quotelike>",$line); |
| 2190 | } |
| 2191 | elsif ($grammar =~ m/$CODEBLOCKMK/gco) |
| 2192 | { |
| 2193 | my $outer = $1||"{}"; |
| 2194 | _parse("an perl codeblock marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2195 | $item = new Parse::RecDescent::Directive( |
| 2196 | 'Text::Balanced::extract_codeblock($text,undef,$skip,\''.$outer.'\'); |
| 2197 | ', $lookahead,$line,"<perl_codeblock>"); |
| 2198 | $prod and $prod->additem($item) |
| 2199 | or _no_rule("<perl_codeblock>",$line); |
| 2200 | } |
| 2201 | elsif ($grammar =~ m/$VARIABLEMK/gco) |
| 2202 | { |
| 2203 | _parse("an perl variable marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2204 | $item = new Parse::RecDescent::Directive( |
| 2205 | 'Text::Balanced::extract_variable($text,$skip); |
| 2206 | ', $lookahead,$line,"<perl_variable>"); |
| 2207 | $prod and $prod->additem($item) |
| 2208 | or _no_rule("<perl_variable>",$line); |
| 2209 | } |
| 2210 | elsif ($grammar =~ m/$NOCHECKMK/gco) |
| 2211 | { |
| 2212 | _parse("a disable checking marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2213 | if ($rule) |
| 2214 | { |
| 2215 | _error("<nocheck> directive not at start of grammar", $line); |
| 2216 | _hint("The <nocheck> directive can only |
| 2217 | be specified at the start of a |
| 2218 | grammar (before the first rule |
| 2219 | is defined."); |
| 2220 | } |
| 2221 | else |
| 2222 | { |
| 2223 | local $::RD_CHECK = 1; |
| 2224 | } |
| 2225 | } |
| 2226 | elsif ($grammar =~ m/$AUTOSTUBMK/gco) |
| 2227 | { |
| 2228 | _parse("an autostub marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2229 | $::RD_AUTOSTUB = ""; |
| 2230 | } |
| 2231 | elsif ($grammar =~ m/$AUTORULEMK/gco) |
| 2232 | { |
| 2233 | _parse("an autorule marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2234 | $::RD_AUTOSTUB = $1; |
| 2235 | } |
| 2236 | elsif ($grammar =~ m/$AUTOTREEMK/gco) |
| 2237 | { |
| 2238 | my $base = defined($1) ? $1 : ""; |
| 2239 | my $current_match = substr($grammar, $-[0], $+[0] - $-[0]); |
| 2240 | $base .= "::" if $base && $base !~ /::$/; |
| 2241 | _parse("an autotree marker", $aftererror,$line, $current_match); |
| 2242 | if ($rule) |
| 2243 | { |
| 2244 | _error("<autotree> directive not at start of grammar", $line); |
| 2245 | _hint("The <autotree> directive can only |
| 2246 | be specified at the start of a |
| 2247 | grammar (before the first rule |
| 2248 | is defined."); |
| 2249 | } |
| 2250 | else |
| 2251 | { |
| 2252 | undef $self->{_AUTOACTION}; |
| 2253 | $self->{_AUTOTREE}{NODE} |
| 2254 | = new Parse::RecDescent::Action(q({bless \%item, ').$base.q('.$item[0]}),0,-1); |
| 2255 | $self->{_AUTOTREE}{TERMINAL} |
| 2256 | = new Parse::RecDescent::Action(q({bless {__VALUE__=>$item[1]}, ').$base.q('.$item[0]}),0,-1); |
| 2257 | } |
| 2258 | } |
| 2259 | |
| 2260 | elsif ($grammar =~ m/$REJECTMK/gco) |
| 2261 | { |
| 2262 | _parse("an reject marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2263 | $item = new Parse::RecDescent::UncondReject($lookahead,$line,"<reject>"); |
| 2264 | $prod and $prod->additem($item) |
| 2265 | or _no_rule("<reject>",$line); |
| 2266 | } |
| 2267 | elsif ($grammar =~ m/(?=$CONDREJECTMK)/gco |
| 2268 | and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| 2269 | $code }) |
| 2270 | { |
| 2271 | _parse("a (conditional) reject marker", $aftererror,$line, $code ); |
| 2272 | $code =~ /\A\s*<reject:(.*)>\Z/s; |
| 2273 | my $cond = $1; |
| 2274 | $item = new Parse::RecDescent::Directive( |
| 2275 | "($1) ? undef : 1", $lookahead,$line,"<reject:$cond>"); |
| 2276 | $prod and $prod->additem($item) |
| 2277 | or _no_rule("<reject:$cond>",$line); |
| 2278 | } |
| 2279 | elsif ($grammar =~ m/(?=$SCOREMK)/gco |
| 2280 | and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| 2281 | $code }) |
| 2282 | { |
| 2283 | _parse("a score marker", $aftererror,$line, $code ); |
| 2284 | $code =~ /\A\s*<score:(.*)>\Z/s; |
| 2285 | $prod and $prod->addscore($1, $lookahead, $line) |
| 2286 | or _no_rule($code,$line); |
| 2287 | } |
| 2288 | elsif ($grammar =~ m/(?=$AUTOSCOREMK)/gco |
| 2289 | and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| 2290 | $code; |
| 2291 | } ) |
| 2292 | { |
| 2293 | _parse("an autoscore specifier", $aftererror,$line,$code); |
| 2294 | $code =~ /\A\s*<autoscore:(.*)>\Z/s; |
| 2295 | |
| 2296 | $rule and $rule->addautoscore($1,$self) |
| 2297 | or _no_rule($code,$line); |
| 2298 | |
| 2299 | $item = new Parse::RecDescent::UncondReject($lookahead,$line,$code); |
| 2300 | $prod and $prod->additem($item) |
| 2301 | or _no_rule($code,$line); |
| 2302 | } |
| 2303 | elsif ($grammar =~ m/$RESYNCMK/gco) |
| 2304 | { |
| 2305 | _parse("a resync to newline marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2306 | $item = new Parse::RecDescent::Directive( |
| 2307 | 'if ($text =~ s/(\A[^\n]*\n)//) { $return = 0; $1; } else { undef }', |
| 2308 | $lookahead,$line,"<resync>"); |
| 2309 | $prod and $prod->additem($item) |
| 2310 | or _no_rule("<resync>",$line); |
| 2311 | } |
| 2312 | elsif ($grammar =~ m/(?=$RESYNCPATMK)/gco |
| 2313 | and do { ($code) = extract_bracketed($grammar,'<'); |
| 2314 | $code }) |
| 2315 | { |
| 2316 | _parse("a resync with pattern marker", $aftererror,$line, $code ); |
| 2317 | $code =~ /\A\s*<resync:(.*)>\Z/s; |
| 2318 | $item = new Parse::RecDescent::Directive( |
| 2319 | 'if ($text =~ s/(\A'.$1.')//) { $return = 0; $1; } else { undef }', |
| 2320 | $lookahead,$line,$code); |
| 2321 | $prod and $prod->additem($item) |
| 2322 | or _no_rule($code,$line); |
| 2323 | } |
| 2324 | elsif ($grammar =~ m/(?=$SKIPMK)/gco |
| 2325 | and do { ($code) = extract_codeblock($grammar,'<'); |
| 2326 | $code }) |
| 2327 | { |
| 2328 | _parse("a skip marker", $aftererror,$line, $code ); |
| 2329 | $code =~ /\A\s*<skip:(.*)>\Z/s; |
| 2330 | if ($rule) { |
| 2331 | $item = new Parse::RecDescent::Directive( |
| 2332 | 'my $oldskip = $skip; $skip='.$1.'; $oldskip', |
| 2333 | $lookahead,$line,$code); |
| 2334 | $prod and $prod->additem($item) |
| 2335 | or _no_rule($code,$line); |
| 2336 | } else { |
| 2337 | #global <skip> directive |
| 2338 | $self->{skip} = $1; |
| 2339 | } |
| 2340 | } |
| 2341 | elsif ($grammar =~ m/(?=$RULEVARPATMK)/gco |
| 2342 | and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| 2343 | $code; |
| 2344 | } ) |
| 2345 | { |
| 2346 | _parse("a rule variable specifier", $aftererror,$line,$code); |
| 2347 | $code =~ /\A\s*<rulevar:(.*)>\Z/s; |
| 2348 | |
| 2349 | $rule and $rule->addvar($1,$self) |
| 2350 | or _no_rule($code,$line); |
| 2351 | |
| 2352 | $item = new Parse::RecDescent::UncondReject($lookahead,$line,$code); |
| 2353 | $prod and $prod->additem($item) |
| 2354 | or _no_rule($code,$line); |
| 2355 | } |
| 2356 | elsif ($grammar =~ m/(?=$AUTOACTIONPATMK)/gco |
| 2357 | and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| 2358 | $code; |
| 2359 | } ) |
| 2360 | { |
| 2361 | _parse("an autoaction specifier", $aftererror,$line,$code); |
| 2362 | $code =~ s/\A\s*<autoaction:(.*)>\Z/$1/s; |
| 2363 | if ($code =~ /\A\s*[^{]|[^}]\s*\Z/) { |
| 2364 | $code = "{ $code }" |
| 2365 | } |
| 2366 | $self->{_check}{itempos} = |
| 2367 | $code =~ /\@itempos\b|\$itempos\s*\[/; |
| 2368 | $self->{_AUTOACTION} |
| 2369 | = new Parse::RecDescent::Action($code,0,-$line) |
| 2370 | } |
| 2371 | elsif ($grammar =~ m/(?=$DEFERPATMK)/gco |
| 2372 | and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| 2373 | $code; |
| 2374 | } ) |
| 2375 | { |
| 2376 | _parse("a deferred action specifier", $aftererror,$line,$code); |
| 2377 | $code =~ s/\A\s*<defer:(.*)>\Z/$1/s; |
| 2378 | if ($code =~ /\A\s*[^{]|[^}]\s*\Z/) |
| 2379 | { |
| 2380 | $code = "{ $code }" |
| 2381 | } |
| 2382 | |
| 2383 | $item = new Parse::RecDescent::Directive( |
| 2384 | "push \@{\$thisparser->{deferred}}, sub $code;", |
| 2385 | $lookahead,$line,"<defer:$code>"); |
| 2386 | $prod and $prod->additem($item) |
| 2387 | or _no_rule("<defer:$code>",$line); |
| 2388 | |
| 2389 | $self->{deferrable} = 1; |
| 2390 | } |
| 2391 | elsif ($grammar =~ m/(?=$TOKENPATMK)/gco |
| 2392 | and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| 2393 | $code; |
| 2394 | } ) |
| 2395 | { |
| 2396 | _parse("a token constructor", $aftererror,$line,$code); |
| 2397 | $code =~ s/\A\s*<token:(.*)>\Z/$1/s; |
| 2398 | |
| 2399 | my $types = eval 'no strict; local $SIG{__WARN__} = sub {0}; my @arr=('.$code.'); @arr' || (); |
| 2400 | if (!$types) |
| 2401 | { |
| 2402 | _error("Incorrect token specification: \"$@\"", $line); |
| 2403 | _hint("The <token:...> directive requires a list |
| 2404 | of one or more strings representing possible |
| 2405 | types of the specified token. For example: |
| 2406 | <token:NOUN,VERB>"); |
| 2407 | } |
| 2408 | else |
| 2409 | { |
| 2410 | $item = new Parse::RecDescent::Directive( |
| 2411 | 'no strict; |
| 2412 | $return = { text => $item[-1] }; |
| 2413 | @{$return->{type}}{'.$code.'} = (1..'.$types.');', |
| 2414 | $lookahead,$line,"<token:$code>"); |
| 2415 | $prod and $prod->additem($item) |
| 2416 | or _no_rule("<token:$code>",$line); |
| 2417 | } |
| 2418 | } |
| 2419 | elsif ($grammar =~ m/$COMMITMK/gco) |
| 2420 | { |
| 2421 | _parse("an commit marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2422 | $item = new Parse::RecDescent::Directive('$commit = 1', |
| 2423 | $lookahead,$line,"<commit>"); |
| 2424 | $prod and $prod->additem($item) |
| 2425 | or _no_rule("<commit>",$line); |
| 2426 | } |
| 2427 | elsif ($grammar =~ m/$NOCHECKMK/gco) { |
| 2428 | _parse("an hint request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2429 | $::RD_CHECK = 0; |
| 2430 | } |
| 2431 | elsif ($grammar =~ m/$HINTMK/gco) { |
| 2432 | _parse("an hint request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2433 | $::RD_HINT = $self->{__HINT__} = 1; |
| 2434 | } |
| 2435 | elsif ($grammar =~ m/$WARNMK/gco) { |
| 2436 | _parse("an warning request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2437 | $::RD_WARN = $self->{__WARN__} = $1 ? $2+0 : 1; |
| 2438 | } |
| 2439 | elsif ($grammar =~ m/$TRACEBUILDMK/gco) { |
| 2440 | _parse("an grammar build trace request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2441 | $::RD_TRACE = $1 ? $2+0 : 1; |
| 2442 | } |
| 2443 | elsif ($grammar =~ m/$TRACEPARSEMK/gco) { |
| 2444 | _parse("an parse trace request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2445 | $self->{__TRACE__} = $1 ? $2+0 : 1; |
| 2446 | } |
| 2447 | elsif ($grammar =~ m/$AUTOERRORMK/gco) |
| 2448 | { |
| 2449 | $commitonly = $1; |
| 2450 | _parse("an error marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2451 | $item = new Parse::RecDescent::Error('',$lookahead,$1,$line); |
| 2452 | $prod and $prod->additem($item) |
| 2453 | or _no_rule("<error>",$line); |
| 2454 | $aftererror = !$commitonly; |
| 2455 | } |
| 2456 | elsif ($grammar =~ m/(?=$MSGERRORMK)/gco |
| 2457 | and do { $commitonly = $1; |
| 2458 | ($code) = extract_bracketed($grammar,'<'); |
| 2459 | $code }) |
| 2460 | { |
| 2461 | _parse("an error marker", $aftererror,$line,$code); |
| 2462 | $code =~ /\A\s*<error\??:(.*)>\Z/s; |
| 2463 | $item = new Parse::RecDescent::Error($1,$lookahead,$commitonly,$line); |
| 2464 | $prod and $prod->additem($item) |
| 2465 | or _no_rule("$code",$line); |
| 2466 | $aftererror = !$commitonly; |
| 2467 | } |
| 2468 | elsif (do { $commitonly = $1; |
| 2469 | ($code) = extract_bracketed($grammar,'<'); |
| 2470 | $code }) |
| 2471 | { |
| 2472 | if ($code =~ /^<[A-Z_]+>$/) |
| 2473 | { |
| 2474 | _error("Token items are not yet |
| 2475 | supported: \"$code\"", |
| 2476 | $line); |
| 2477 | _hint("Items like $code that consist of angle |
| 2478 | brackets enclosing a sequence of |
| 2479 | uppercase characters will eventually |
| 2480 | be used to specify pre-lexed tokens |
| 2481 | in a grammar. That functionality is not |
| 2482 | yet implemented. Or did you misspell |
| 2483 | \"$code\"?"); |
| 2484 | } |
| 2485 | else |
| 2486 | { |
| 2487 | _error("Untranslatable item encountered: \"$code\"", |
| 2488 | $line); |
| 2489 | _hint("Did you misspell \"$code\" |
| 2490 | or forget to comment it out?"); |
| 2491 | } |
| 2492 | } |
| 2493 | } |
| 2494 | elsif ($grammar =~ m/$RULE/gco) |
| 2495 | { |
| 2496 | _parseunneg("a rule declaration", 0, |
| 2497 | $lookahead,$line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next; |
| 2498 | my $rulename = $1; |
| 2499 | if ($rulename =~ /Replace|Extend|Precompile|Save/ ) |
| 2500 | { |
| 2501 | _warn(2,"Rule \"$rulename\" hidden by method |
| 2502 | Parse::RecDescent::$rulename",$line) |
| 2503 | and |
| 2504 | _hint("The rule named \"$rulename\" cannot be directly |
| 2505 | called through the Parse::RecDescent object |
| 2506 | for this grammar (although it may still |
| 2507 | be used as a subrule of other rules). |
| 2508 | It can't be directly called because |
| 2509 | Parse::RecDescent::$rulename is already defined (it |
| 2510 | is the standard method of all |
| 2511 | parsers)."); |
| 2512 | } |
| 2513 | $rule = new Parse::RecDescent::Rule($rulename,$self,$line,$replace); |
| 2514 | $prod->check_pending($line) if $prod; |
| 2515 | $prod = $rule->addprod( new Parse::RecDescent::Production ); |
| 2516 | $aftererror = 0; |
| 2517 | } |
| 2518 | elsif ($grammar =~ m/$UNCOMMITPROD/gco) |
| 2519 | { |
| 2520 | pos($grammar)-=9; |
| 2521 | _parseunneg("a new (uncommitted) production", |
| 2522 | 0, $lookahead, $line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next; |
| 2523 | |
| 2524 | $prod->check_pending($line) if $prod; |
| 2525 | $prod = new Parse::RecDescent::Production($line,1); |
| 2526 | $rule and $rule->addprod($prod) |
| 2527 | or _no_rule("<uncommit>",$line); |
| 2528 | $aftererror = 0; |
| 2529 | } |
| 2530 | elsif ($grammar =~ m/$ERRORPROD/gco) |
| 2531 | { |
| 2532 | pos($grammar)-=6; |
| 2533 | _parseunneg("a new (error) production", $aftererror, |
| 2534 | $lookahead,$line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next; |
| 2535 | $prod->check_pending($line) if $prod; |
| 2536 | $prod = new Parse::RecDescent::Production($line,0,1); |
| 2537 | $rule and $rule->addprod($prod) |
| 2538 | or _no_rule("<error>",$line); |
| 2539 | $aftererror = 0; |
| 2540 | } |
| 2541 | elsif ($grammar =~ m/$PROD/gco) |
| 2542 | { |
| 2543 | _parseunneg("a new production", 0, |
| 2544 | $lookahead,$line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next; |
| 2545 | $rule |
| 2546 | and (!$prod || $prod->check_pending($line)) |
| 2547 | and $prod = $rule->addprod(new Parse::RecDescent::Production($line)) |
| 2548 | or _no_rule("production",$line); |
| 2549 | $aftererror = 0; |
| 2550 | } |
| 2551 | elsif ($grammar =~ m/$LITERAL/gco) |
| 2552 | { |
| 2553 | my $literal = $1; |
| 2554 | ($code = $literal) =~ s/\\\\/\\/g; |
| 2555 | _parse("a literal terminal", $aftererror,$line,$literal); |
| 2556 | $item = new Parse::RecDescent::Literal($code,$lookahead,$line); |
| 2557 | $prod and $prod->additem($item) |
| 2558 | or _no_rule("literal terminal",$line,"'$literal'"); |
| 2559 | } |
| 2560 | elsif ($grammar =~ m/$INTERPLIT/gco) |
| 2561 | { |
| 2562 | _parse("an interpolated literal terminal", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2563 | $item = new Parse::RecDescent::InterpLit($1,$lookahead,$line); |
| 2564 | $prod and $prod->additem($item) |
| 2565 | or _no_rule("interpolated literal terminal",$line,"'$1'"); |
| 2566 | } |
| 2567 | elsif ($grammar =~ m/$TOKEN/gco) |
| 2568 | { |
| 2569 | _parse("a /../ pattern terminal", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| 2570 | $item = new Parse::RecDescent::Token($1,'/',$3?$3:'',$lookahead,$line); |
| 2571 | $prod and $prod->additem($item) |
| 2572 | or _no_rule("pattern terminal",$line,"/$1/"); |
| 2573 | } |
| 2574 | elsif ($grammar =~ m/(?=$MTOKEN)/gco |
| 2575 | and do { ($code, undef, @components) |
| 2576 | = extract_quotelike($grammar); |
| 2577 | $code } |
| 2578 | ) |
| 2579 | |
| 2580 | { |
| 2581 | _parse("an m/../ pattern terminal", $aftererror,$line,$code); |
| 2582 | $item = new Parse::RecDescent::Token(@components[3,2,8], |
| 2583 | $lookahead,$line); |
| 2584 | $prod and $prod->additem($item) |
| 2585 | or _no_rule("pattern terminal",$line,$code); |
| 2586 | } |
| 2587 | elsif ($grammar =~ m/(?=$MATCHRULE)/gco |
| 2588 | and do { ($code) = extract_bracketed($grammar,'<'); |
| 2589 | $code |
| 2590 | } |
| 2591 | or $grammar =~ m/$SUBRULE/gco |
| 2592 | and $code = $1) |
| 2593 | { |
| 2594 | my $name = $code; |
| 2595 | my $matchrule = 0; |
| 2596 | if (substr($name,0,1) eq '<') |
| 2597 | { |
| 2598 | $name =~ s/$MATCHRULE\s*//; |
| 2599 | $name =~ s/\s*>\Z//; |
| 2600 | $matchrule = 1; |
| 2601 | } |
| 2602 | |
| 2603 | # EXTRACT TRAILING ARG LIST (IF ANY) |
| 2604 | |
| 2605 | my ($argcode) = extract_codeblock($grammar, "[]",'') || ''; |
| 2606 | |
| 2607 | # EXTRACT TRAILING REPETITION SPECIFIER (IF ANY) |
| 2608 | |
| 2609 | if ($grammar =~ m/\G[(]/gc) |
| 2610 | { |
| 2611 | pos($grammar)--; |
| 2612 | |
| 2613 | if ($grammar =~ m/$OPTIONAL/gco) |
| 2614 | { |
| 2615 | _parse("an zero-or-one subrule match", $aftererror,$line,"$code$argcode($1)"); |
| 2616 | $item = new Parse::RecDescent::Repetition($name,$1,0,1, |
| 2617 | $lookahead,$line, |
| 2618 | $self, |
| 2619 | $matchrule, |
| 2620 | $argcode); |
| 2621 | $prod and $prod->additem($item) |
| 2622 | or _no_rule("repetition",$line,"$code$argcode($1)"); |
| 2623 | |
| 2624 | !$matchrule and $rule and $rule->addcall($name); |
| 2625 | } |
| 2626 | elsif ($grammar =~ m/$ANY/gco) |
| 2627 | { |
| 2628 | _parse("a zero-or-more subrule match", $aftererror,$line,"$code$argcode($1)"); |
| 2629 | if ($2) |
| 2630 | { |
| 2631 | my $pos = pos $grammar; |
| 2632 | substr($grammar,$pos,0, |
| 2633 | "<leftop='$name(s?)': $name $2 $name>(s?) "); |
| 2634 | |
| 2635 | pos $grammar = $pos; |
| 2636 | } |
| 2637 | else |
| 2638 | { |
| 2639 | $item = new Parse::RecDescent::Repetition($name,$1,0,$MAXREP, |
| 2640 | $lookahead,$line, |
| 2641 | $self, |
| 2642 | $matchrule, |
| 2643 | $argcode); |
| 2644 | $prod and $prod->additem($item) |
| 2645 | or _no_rule("repetition",$line,"$code$argcode($1)"); |
| 2646 | |
| 2647 | !$matchrule and $rule and $rule->addcall($name); |
| 2648 | |
| 2649 | _check_insatiable($name,$1,$grammar,$line) if $::RD_CHECK; |
| 2650 | } |
| 2651 | } |
| 2652 | elsif ($grammar =~ m/$MANY/gco) |
| 2653 | { |
| 2654 | _parse("a one-or-more subrule match", $aftererror,$line,"$code$argcode($1)"); |
| 2655 | if ($2) |
| 2656 | { |
| 2657 | # $DB::single=1; |
| 2658 | my $pos = pos $grammar; |
| 2659 | substr($grammar,$pos,0, |
| 2660 | "<leftop='$name(s)': $name $2 $name> "); |
| 2661 | |
| 2662 | pos $grammar = $pos; |
| 2663 | } |
| 2664 | else |
| 2665 | { |
| 2666 | $item = new Parse::RecDescent::Repetition($name,$1,1,$MAXREP, |
| 2667 | $lookahead,$line, |
| 2668 | $self, |
| 2669 | $matchrule, |
| 2670 | $argcode); |
| 2671 | |
| 2672 | $prod and $prod->additem($item) |
| 2673 | or _no_rule("repetition",$line,"$code$argcode($1)"); |
| 2674 | |
| 2675 | !$matchrule and $rule and $rule->addcall($name); |
| 2676 | |
| 2677 | _check_insatiable($name,$1,$grammar,$line) if $::RD_CHECK; |
| 2678 | } |
| 2679 | } |
| 2680 | elsif ($grammar =~ m/$EXACTLY/gco) |
| 2681 | { |
| 2682 | _parse("an exactly-$1-times subrule match", $aftererror,$line,"$code$argcode($1)"); |
| 2683 | if ($2) |
| 2684 | { |
| 2685 | my $pos = pos $grammar; |
| 2686 | substr($grammar,$pos,0, |
| 2687 | "<leftop='$name($1)': $name $2 $name>($1) "); |
| 2688 | |
| 2689 | pos $grammar = $pos; |
| 2690 | } |
| 2691 | else |
| 2692 | { |
| 2693 | $item = new Parse::RecDescent::Repetition($name,$1,$1,$1, |
| 2694 | $lookahead,$line, |
| 2695 | $self, |
| 2696 | $matchrule, |
| 2697 | $argcode); |
| 2698 | $prod and $prod->additem($item) |
| 2699 | or _no_rule("repetition",$line,"$code$argcode($1)"); |
| 2700 | |
| 2701 | !$matchrule and $rule and $rule->addcall($name); |
| 2702 | } |
| 2703 | } |
| 2704 | elsif ($grammar =~ m/$BETWEEN/gco) |
| 2705 | { |
| 2706 | _parse("a $1-to-$2 subrule match", $aftererror,$line,"$code$argcode($1..$2)"); |
| 2707 | if ($3) |
| 2708 | { |
| 2709 | my $pos = pos $grammar; |
| 2710 | substr($grammar,$pos,0, |
| 2711 | "<leftop='$name($1..$2)': $name $3 $name>($1..$2) "); |
| 2712 | |
| 2713 | pos $grammar = $pos; |
| 2714 | } |
| 2715 | else |
| 2716 | { |
| 2717 | $item = new Parse::RecDescent::Repetition($name,"$1..$2",$1,$2, |
| 2718 | $lookahead,$line, |
| 2719 | $self, |
| 2720 | $matchrule, |
| 2721 | $argcode); |
| 2722 | $prod and $prod->additem($item) |
| 2723 | or _no_rule("repetition",$line,"$code$argcode($1..$2)"); |
| 2724 | |
| 2725 | !$matchrule and $rule and $rule->addcall($name); |
| 2726 | } |
| 2727 | } |
| 2728 | elsif ($grammar =~ m/$ATLEAST/gco) |
| 2729 | { |
| 2730 | _parse("a $1-or-more subrule match", $aftererror,$line,"$code$argcode($1..)"); |
| 2731 | if ($2) |
| 2732 | { |
| 2733 | my $pos = pos $grammar; |
| 2734 | substr($grammar,$pos,0, |
| 2735 | "<leftop='$name($1..)': $name $2 $name>($1..) "); |
| 2736 | |
| 2737 | pos $grammar = $pos; |
| 2738 | } |
| 2739 | else |
| 2740 | { |
| 2741 | $item = new Parse::RecDescent::Repetition($name,"$1..",$1,$MAXREP, |
| 2742 | $lookahead,$line, |
| 2743 | $self, |
| 2744 | $matchrule, |
| 2745 | $argcode); |
| 2746 | $prod and $prod->additem($item) |
| 2747 | or _no_rule("repetition",$line,"$code$argcode($1..)"); |
| 2748 | |
| 2749 | !$matchrule and $rule and $rule->addcall($name); |
| 2750 | _check_insatiable($name,"$1..",$grammar,$line) if $::RD_CHECK; |
| 2751 | } |
| 2752 | } |
| 2753 | elsif ($grammar =~ m/$ATMOST/gco) |
| 2754 | { |
| 2755 | _parse("a one-to-$1 subrule match", $aftererror,$line,"$code$argcode(..$1)"); |
| 2756 | if ($2) |
| 2757 | { |
| 2758 | my $pos = pos $grammar; |
| 2759 | substr($grammar,$pos,0, |
| 2760 | "<leftop='$name(..$1)': $name $2 $name>(..$1) "); |
| 2761 | |
| 2762 | pos $grammar = $pos; |
| 2763 | } |
| 2764 | else |
| 2765 | { |
| 2766 | $item = new Parse::RecDescent::Repetition($name,"..$1",1,$1, |
| 2767 | $lookahead,$line, |
| 2768 | $self, |
| 2769 | $matchrule, |
| 2770 | $argcode); |
| 2771 | $prod and $prod->additem($item) |
| 2772 | or _no_rule("repetition",$line,"$code$argcode(..$1)"); |
| 2773 | |
| 2774 | !$matchrule and $rule and $rule->addcall($name); |
| 2775 | } |
| 2776 | } |
| 2777 | elsif ($grammar =~ m/$BADREP/gco) |
| 2778 | { |
| 2779 | my $current_match = substr($grammar, $-[0], $+[0] - $-[0]); |
| 2780 | _parse("an subrule match with invalid repetition specifier", 0,$line, $current_match); |
| 2781 | _error("Incorrect specification of a repeated subrule", |
| 2782 | $line); |
| 2783 | _hint("Repeated subrules like \"$code$argcode$current_match\" cannot have |
| 2784 | a maximum repetition of zero, nor can they have |
| 2785 | negative components in their ranges."); |
| 2786 | } |
| 2787 | } |
| 2788 | else |
| 2789 | { |
| 2790 | _parse("a subrule match", $aftererror,$line,$code); |
| 2791 | my $desc; |
| 2792 | if ($name=~/\A_alternation_\d+_of_production_\d+_of_rule/) |
| 2793 | { $desc = $self->{"rules"}{$name}->expected } |
| 2794 | $item = new Parse::RecDescent::Subrule($name, |
| 2795 | $lookahead, |
| 2796 | $line, |
| 2797 | $desc, |
| 2798 | $matchrule, |
| 2799 | $argcode); |
| 2800 | |
| 2801 | $prod and $prod->additem($item) |
| 2802 | or _no_rule("(sub)rule",$line,$name); |
| 2803 | |
| 2804 | !$matchrule and $rule and $rule->addcall($name); |
| 2805 | } |
| 2806 | } |
| 2807 | elsif ($grammar =~ m/$LONECOLON/gco ) |
| 2808 | { |
| 2809 | _error("Unexpected colon encountered", $line); |
| 2810 | _hint("Did you mean \"|\" (to start a new production)? |
| 2811 | Or perhaps you forgot that the colon |
| 2812 | in a rule definition must be |
| 2813 | on the same line as the rule name?"); |
| 2814 | } |
| 2815 | elsif ($grammar =~ m/$ACTION/gco ) # BAD ACTION, ALREADY FAILED |
| 2816 | { |
| 2817 | _error("Malformed action encountered", |
| 2818 | $line); |
| 2819 | _hint("Did you forget the closing curly bracket |
| 2820 | or is there a syntax error in the action?"); |
| 2821 | } |
| 2822 | elsif ($grammar =~ m/$OTHER/gco ) |
| 2823 | { |
| 2824 | _error("Untranslatable item encountered: \"$1\"", |
| 2825 | $line); |
| 2826 | _hint("Did you misspell \"$1\" |
| 2827 | or forget to comment it out?"); |
| 2828 | } |
| 2829 | |
| 2830 | if ($lookaheadspec =~ tr /././ > 3) |
| 2831 | { |
| 2832 | $lookaheadspec =~ s/\A\s+//; |
| 2833 | $lookahead = $lookahead<0 |
| 2834 | ? 'a negative lookahead ("...!")' |
| 2835 | : 'a positive lookahead ("...")' ; |
| 2836 | _warn(1,"Found two or more lookahead specifiers in a |
| 2837 | row.",$line) |
| 2838 | and |
| 2839 | _hint("Multiple positive and/or negative lookaheads |
| 2840 | are simply multiplied together to produce a |
| 2841 | single positive or negative lookahead |
| 2842 | specification. In this case the sequence |
| 2843 | \"$lookaheadspec\" was reduced to $lookahead. |
| 2844 | Was this your intention?"); |
| 2845 | } |
| 2846 | $lookahead = 0; |
| 2847 | $lookaheadspec = ""; |
| 2848 | |
| 2849 | $grammar =~ m/\G\s+/gc; |
| 2850 | } |
| 2851 | |
| 2852 | if ($must_pop_lines) { |
| 2853 | pop @lines; |
| 2854 | } |
| 2855 | |
| 2856 | unless ($ERRORS or $isimplicit or !$::RD_CHECK) |
| 2857 | { |
| 2858 | $self->_check_grammar(); |
| 2859 | } |
| 2860 | |
| 2861 | unless ($ERRORS or $isimplicit or $Parse::RecDescent::compiling) |
| 2862 | { |
| 2863 | my $code = $self->_code(); |
| 2864 | if (defined $::RD_TRACE) |
| 2865 | { |
| 2866 | my $mode = ($nextnamespace eq "namespace000002") ? '>' : '>>'; |
| 2867 | print STDERR "printing code (", length($code),") to RD_TRACE\n"; |
| 2868 | local *TRACE_FILE; |
| 2869 | open TRACE_FILE, $mode, "RD_TRACE" |
| 2870 | and print TRACE_FILE "my \$ERRORS;\n$code" |
| 2871 | and close TRACE_FILE; |
| 2872 | } |
| 2873 | |
| 2874 | unless ( eval "$code 1" ) |
| 2875 | { |
| 2876 | _error("Internal error in generated parser code!"); |
| 2877 | $@ =~ s/at grammar/in grammar at/; |
| 2878 | _hint($@); |
| 2879 | } |
| 2880 | } |
| 2881 | |
| 2882 | if ($ERRORS and !_verbosity("HINT")) |
| 2883 | { |
| 2884 | local $::RD_HINT = defined $::RD_HINT ? $::RD_HINT : 1; |
| 2885 | _hint('Set $::RD_HINT (or -RD_HINT if you\'re using "perl -s") |
| 2886 | for hints on fixing these problems. Use $::RD_HINT = 0 |
| 2887 | to disable this message.'); |
| 2888 | } |
| 2889 | if ($ERRORS) { $ERRORS=0; return } |
| 2890 | return $self; |
| 2891 | } |
| 2892 | |
| 2893 | |
| 2894 | sub _addstartcode($$) |
| 2895 | { |
| 2896 | my ($self, $code) = @_; |
| 2897 | $code =~ s/\A\s*\{(.*)\}\Z/$1/s; |
| 2898 | |
| 2899 | $self->{"startcode"} .= "$code;\n"; |
| 2900 | } |
| 2901 | |
| 2902 | # CHECK FOR GRAMMAR PROBLEMS.... |
| 2903 | |
| 2904 | sub _check_insatiable($$$$) |
| 2905 | { |
| 2906 | my ($subrule,$repspec,$grammar,$line) = @_; |
| 2907 | pos($grammar)=pos($_[2]); |
| 2908 | return if $grammar =~ m/$OPTIONAL/gco || $grammar =~ m/$ANY/gco; |
| 2909 | my $min = 1; |
| 2910 | if ( $grammar =~ m/$MANY/gco |
| 2911 | || $grammar =~ m/$EXACTLY/gco |
| 2912 | || $grammar =~ m/$ATMOST/gco |
| 2913 | || $grammar =~ m/$BETWEEN/gco && do { $min=$2; 1 } |
| 2914 | || $grammar =~ m/$ATLEAST/gco && do { $min=$2; 1 } |
| 2915 | || $grammar =~ m/$SUBRULE(?!\s*:)/gco |
| 2916 | ) |
| 2917 | { |
| 2918 | return unless $1 eq $subrule && $min > 0; |
| 2919 | my $current_match = substr($grammar, $-[0], $+[0] - $-[0]); |
| 2920 | _warn(3,"Subrule sequence \"$subrule($repspec) $current_match\" will |
| 2921 | (almost certainly) fail.",$line) |
| 2922 | and |
| 2923 | _hint("Unless subrule \"$subrule\" performs some cunning |
| 2924 | lookahead, the repetition \"$subrule($repspec)\" will |
| 2925 | insatiably consume as many matches of \"$subrule\" as it |
| 2926 | can, leaving none to match the \"$current_match\" that follows."); |
| 2927 | } |
| 2928 | } |
| 2929 | |
| 2930 | sub _check_grammar ($) |
| 2931 | { |
| 2932 | my $self = shift; |
| 2933 | my $rules = $self->{"rules"}; |
| 2934 | my $rule; |
| 2935 | foreach $rule ( values %$rules ) |
| 2936 | { |
| 2937 | next if ! $rule->{"changed"}; |
| 2938 | |
| 2939 | # CHECK FOR UNDEFINED RULES |
| 2940 | |
| 2941 | my $call; |
| 2942 | foreach $call ( @{$rule->{"calls"}} ) |
| 2943 | { |
| 2944 | if (!defined ${$rules}{$call} |
| 2945 | &&!defined &{"Parse::RecDescent::$call"}) |
| 2946 | { |
| 2947 | if (!defined $::RD_AUTOSTUB) |
| 2948 | { |
| 2949 | _warn(3,"Undefined (sub)rule \"$call\" |
| 2950 | used in a production.") |
| 2951 | and |
| 2952 | _hint("Will you be providing this rule |
| 2953 | later, or did you perhaps |
| 2954 | misspell \"$call\"? Otherwise |
| 2955 | it will be treated as an |
| 2956 | immediate <reject>."); |
| 2957 | eval "sub $self->{namespace}::$call {undef}"; |
| 2958 | } |
| 2959 | else # EXPERIMENTAL |
| 2960 | { |
| 2961 | my $rule = qq{'$call'}; |
| 2962 | if ($::RD_AUTOSTUB and $::RD_AUTOSTUB ne "1") { |
| 2963 | $rule = $::RD_AUTOSTUB; |
| 2964 | } |
| 2965 | _warn(1,"Autogenerating rule: $call") |
| 2966 | and |
| 2967 | _hint("A call was made to a subrule |
| 2968 | named \"$call\", but no such |
| 2969 | rule was specified. However, |
| 2970 | since \$::RD_AUTOSTUB |
| 2971 | was defined, a rule stub |
| 2972 | ($call : $rule) was |
| 2973 | automatically created."); |
| 2974 | |
| 2975 | $self->_generate("$call: $rule",0,1); |
| 2976 | } |
| 2977 | } |
| 2978 | } |
| 2979 | |
| 2980 | # CHECK FOR LEFT RECURSION |
| 2981 | |
| 2982 | if ($rule->isleftrec($rules)) |
| 2983 | { |
| 2984 | _error("Rule \"$rule->{name}\" is left-recursive."); |
| 2985 | _hint("Redesign the grammar so it's not left-recursive. |
| 2986 | That will probably mean you need to re-implement |
| 2987 | repetitions using the '(s)' notation. |
| 2988 | For example: \"$rule->{name}(s)\"."); |
| 2989 | next; |
| 2990 | } |
| 2991 | |
| 2992 | # CHECK FOR PRODUCTIONS FOLLOWING EMPTY PRODUCTIONS |
| 2993 | { |
| 2994 | my $hasempty; |
| 2995 | my $prod; |
| 2996 | foreach $prod ( @{$rule->{"prods"}} ) { |
| 2997 | if ($hasempty) { |
| 2998 | _error("Production " . $prod->describe . " for \"$rule->{name}\" |
| 2999 | will never be reached (preceding empty production will |
| 3000 | always match first)."); |
| 3001 | _hint("Reorder the grammar so that the empty production |
| 3002 | is last in the list or productions."); |
| 3003 | last; |
| 3004 | } |
| 3005 | $hasempty ||= $prod->isempty(); |
| 3006 | } |
| 3007 | } |
| 3008 | } |
| 3009 | } |
| 3010 | |
| 3011 | # GENERATE ACTUAL PARSER CODE |
| 3012 | |
| 3013 | sub _code($) |
| 3014 | { |
| 3015 | my $self = shift; |
| 3016 | my $initial_skip = defined($self->{skip}) ? $self->{skip} : $skip; |
| 3017 | |
| 3018 | my $code = qq{ |
| 3019 | package $self->{namespace}; |
| 3020 | use strict; |
| 3021 | use vars qw(\$skip \$AUTOLOAD $self->{localvars} ); |
| 3022 | \@$self->{namespace}\::ISA = (); |
| 3023 | \$skip = '$initial_skip'; |
| 3024 | $self->{startcode} |
| 3025 | |
| 3026 | { |
| 3027 | local \$SIG{__WARN__} = sub {0}; |
| 3028 | # PRETEND TO BE IN Parse::RecDescent NAMESPACE |
| 3029 | *$self->{namespace}::AUTOLOAD = sub |
| 3030 | { |
| 3031 | no strict 'refs'; |
| 3032 | \$AUTOLOAD =~ s/^$self->{namespace}/Parse::RecDescent/; |
| 3033 | goto &{\$AUTOLOAD}; |
| 3034 | } |
| 3035 | } |
| 3036 | |
| 3037 | }; |
| 3038 | $code .= "push \@$self->{namespace}\::ISA, 'Parse::RecDescent';"; |
| 3039 | $self->{"startcode"} = ''; |
| 3040 | |
| 3041 | my $rule; |
| 3042 | foreach $rule ( values %{$self->{"rules"}} ) |
| 3043 | { |
| 3044 | if ($rule->{"changed"}) |
| 3045 | { |
| 3046 | $code .= $rule->code($self->{"namespace"},$self); |
| 3047 | $rule->{"changed"} = 0; |
| 3048 | } |
| 3049 | } |
| 3050 | |
| 3051 | return $code; |
| 3052 | } |
| 3053 | |
| 3054 | |
| 3055 | # EXECUTING A PARSE.... |
| 3056 | |
| 3057 | sub AUTOLOAD # ($parser, $text; $linenum, @args) |
| 3058 | { |
| 3059 | croak "Could not find method: $AUTOLOAD\n" unless ref $_[0]; |
| 3060 | my $class = ref($_[0]) || $_[0]; |
| 3061 | my $text = ref($_[1]) eq 'SCALAR' ? ${$_[1]} : "$_[1]"; |
| 3062 | $_[0]->{lastlinenum} = $_[2]||_linecount($_[1]); |
| 3063 | $_[0]->{lastlinenum} = _linecount($_[1]); |
| 3064 | $_[0]->{lastlinenum} += ($_[2]||0) if @_ > 2; |
| 3065 | $_[0]->{offsetlinenum} = $_[0]->{lastlinenum}; |
| 3066 | $_[0]->{fulltext} = $text; |
| 3067 | $_[0]->{fulltextlen} = length $text; |
| 3068 | $_[0]->{linecounter_cache} = {}; |
| 3069 | $_[0]->{deferred} = []; |
| 3070 | $_[0]->{errors} = []; |
| 3071 | my @args = @_[3..$#_]; |
| 3072 | my $args = sub { [ @args ] }; |
| 3073 | |
| 3074 | $AUTOLOAD =~ s/$class/$_[0]->{namespace}/; |
| 3075 | no strict "refs"; |
| 3076 | |
| 3077 | local $::RD_WARN = $::RD_WARN || $_[0]->{__WARN__}; |
| 3078 | local $::RD_HINT = $::RD_HINT || $_[0]->{__HINT__}; |
| 3079 | local $::RD_TRACE = $::RD_TRACE || $_[0]->{__TRACE__}; |
| 3080 | |
| 3081 | croak "Unknown starting rule ($AUTOLOAD) called\n" |
| 3082 | unless defined &$AUTOLOAD; |
| 3083 | my $retval = &{$AUTOLOAD}($_[0],$text,undef,undef,undef,$args); |
| 3084 | |
| 3085 | if (defined $retval) |
| 3086 | { |
| 3087 | foreach ( @{$_[0]->{deferred}} ) { &$_; } |
| 3088 | } |
| 3089 | else |
| 3090 | { |
| 3091 | foreach ( @{$_[0]->{errors}} ) { _error(@$_); } |
| 3092 | } |
| 3093 | |
| 3094 | if (ref $_[1] eq 'SCALAR') { ${$_[1]} = $text } |
| 3095 | |
| 3096 | $ERRORS = 0; |
| 3097 | return $retval; |
| 3098 | } |
| 3099 | |
| 3100 | sub _parserepeat($$$$$$$$$) # RETURNS A REF TO AN ARRAY OF MATCHES |
| 3101 | { |
| 3102 | my ($parser, $text, $prod, $min, $max, $_noactions, $_itempos, $expectation, $argcode) = @_; |
| 3103 | my @tokens = (); |
| 3104 | |
| 3105 | my $itemposfirst; |
| 3106 | my $reps; |
| 3107 | for ($reps=0; $reps<$max;) |
| 3108 | { |
| 3109 | $expectation->at($text); |
| 3110 | my $_savetext = $text; |
| 3111 | my $prevtextlen = length $text; |
| 3112 | my $_tok; |
| 3113 | if (! defined ($_tok = &$prod($parser,$text,1,$_noactions,$_itempos,$argcode))) |
| 3114 | { |
| 3115 | $text = $_savetext; |
| 3116 | last; |
| 3117 | } |
| 3118 | |
| 3119 | if (defined($_itempos) and !defined($itemposfirst)) |
| 3120 | { |
| 3121 | $itemposfirst = Parse::RecDescent::Production::_duplicate_itempos($_itempos); |
| 3122 | } |
| 3123 | |
| 3124 | push @tokens, $_tok if defined $_tok; |
| 3125 | last if ++$reps >= $min and $prevtextlen == length $text; |
| 3126 | } |
| 3127 | |
| 3128 | do { $expectation->failed(); return undef} if $reps<$min; |
| 3129 | |
| 3130 | if (defined $itemposfirst) |
| 3131 | { |
| 3132 | Parse::RecDescent::Production::_update_itempos($_itempos, $itemposfirst, undef, [qw(from)]); |
| 3133 | } |
| 3134 | |
| 3135 | $_[1] = $text; |
| 3136 | return [@tokens]; |
| 3137 | } |
| 3138 | |
| 3139 | sub set_autoflush { |
| 3140 | my $orig_selected = select $_[0]; |
| 3141 | $| = 1; |
| 3142 | select $orig_selected; |
| 3143 | return; |
| 3144 | } |
| 3145 | |
| 3146 | # ERROR REPORTING.... |
| 3147 | |
| 3148 | sub _write_ERROR { |
| 3149 | my ($errorprefix, $errortext) = @_; |
| 3150 | return if $errortext !~ /\S/; |
| 3151 | $errorprefix =~ s/\s+\Z//; |
| 3152 | local $^A = q{}; |
| 3153 | |
| 3154 | formline(<<'END_FORMAT', $errorprefix, $errortext); |
| 3155 | @>>>>>>>>>>>>>>>>>>>>: ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< |
| 3156 | END_FORMAT |
| 3157 | formline(<<'END_FORMAT', $errortext); |
| 3158 | ~~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< |
| 3159 | END_FORMAT |
| 3160 | print {*STDERR} $^A; |
| 3161 | } |
| 3162 | |
| 3163 | # TRACING |
| 3164 | |
| 3165 | my $TRACE_FORMAT = <<'END_FORMAT'; |
| 3166 | @>|@|||||||||@^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<| |
| 3167 | | ~~ |^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<| |
| 3168 | END_FORMAT |
| 3169 | |
| 3170 | my $TRACECONTEXT_FORMAT = <<'END_FORMAT'; |
| 3171 | @>|@|||||||||@ |^<<<<<<<<<<<<<<<<<<<<<<<<<<< |
| 3172 | | ~~ | |^<<<<<<<<<<<<<<<<<<<<<<<<<<< |
| 3173 | END_FORMAT |
| 3174 | |
| 3175 | sub _write_TRACE { |
| 3176 | my ($tracelevel, $tracerulename, $tracemsg) = @_; |
| 3177 | return if $tracemsg !~ /\S/; |
| 3178 | $tracemsg =~ s/\s*\Z//; |
| 3179 | local $^A = q{}; |
| 3180 | my $bar = '|'; |
| 3181 | formline($TRACE_FORMAT, $tracelevel, $tracerulename, $bar, $tracemsg, $tracemsg); |
| 3182 | print {*STDERR} $^A; |
| 3183 | } |
| 3184 | |
| 3185 | sub _write_TRACECONTEXT { |
| 3186 | my ($tracelevel, $tracerulename, $tracecontext) = @_; |
| 3187 | return if $tracecontext !~ /\S/; |
| 3188 | $tracecontext =~ s/\s*\Z//; |
| 3189 | local $^A = q{}; |
| 3190 | my $bar = '|'; |
| 3191 | formline($TRACECONTEXT_FORMAT, $tracelevel, $tracerulename, $bar, $tracecontext, $tracecontext); |
| 3192 | print {*STDERR} $^A; |
| 3193 | } |
| 3194 | |
| 3195 | sub _verbosity($) |
| 3196 | { |
| 3197 | defined $::RD_TRACE |
| 3198 | or defined $::RD_HINT and $::RD_HINT and $_[0] =~ /ERRORS|WARN|HINT/ |
| 3199 | or defined $::RD_WARN and $::RD_WARN and $_[0] =~ /ERRORS|WARN/ |
| 3200 | or defined $::RD_ERRORS and $::RD_ERRORS and $_[0] =~ /ERRORS/ |
| 3201 | } |
| 3202 | |
| 3203 | sub _error($;$) |
| 3204 | { |
| 3205 | $ERRORS++; |
| 3206 | return 0 if ! _verbosity("ERRORS"); |
| 3207 | my $errortext = $_[0]; |
| 3208 | my $errorprefix = "ERROR" . ($_[1] ? " (line $_[1])" : ""); |
| 3209 | $errortext =~ s/\s+/ /g; |
| 3210 | print {*STDERR} "\n" if _verbosity("WARN"); |
| 3211 | _write_ERROR($errorprefix, $errortext); |
| 3212 | return 1; |
| 3213 | } |
| 3214 | |
| 3215 | sub _warn($$;$) |
| 3216 | { |
| 3217 | return 0 unless _verbosity("WARN") && ($::RD_HINT || $_[0] >= ($::RD_WARN||1)); |
| 3218 | my $errortext = $_[1]; |
| 3219 | my $errorprefix = "Warning" . ($_[2] ? " (line $_[2])" : ""); |
| 3220 | print {*STDERR} "\n" if _verbosity("HINT"); |
| 3221 | $errortext =~ s/\s+/ /g; |
| 3222 | _write_ERROR($errorprefix, $errortext); |
| 3223 | return 1; |
| 3224 | } |
| 3225 | |
| 3226 | sub _hint($) |
| 3227 | { |
| 3228 | return 0 unless $::RD_HINT; |
| 3229 | my $errortext = $_[0]; |
| 3230 | my $errorprefix = "Hint" . ($_[1] ? " (line $_[1])" : ""); |
| 3231 | $errortext =~ s/\s+/ /g; |
| 3232 | _write_ERROR($errorprefix, $errortext); |
| 3233 | return 1; |
| 3234 | } |
| 3235 | |
| 3236 | sub _tracemax($) |
| 3237 | { |
| 3238 | if (defined $::RD_TRACE |
| 3239 | && $::RD_TRACE =~ /\d+/ |
| 3240 | && $::RD_TRACE>1 |
| 3241 | && $::RD_TRACE+10<length($_[0])) |
| 3242 | { |
| 3243 | my $count = length($_[0]) - $::RD_TRACE; |
| 3244 | return substr($_[0],0,$::RD_TRACE/2) |
| 3245 | . "...<$count>..." |
| 3246 | . substr($_[0],-$::RD_TRACE/2); |
| 3247 | } |
| 3248 | else |
| 3249 | { |
| 3250 | return substr($_[0],0,500); |
| 3251 | } |
| 3252 | } |
| 3253 | |
| 3254 | sub _tracefirst($) |
| 3255 | { |
| 3256 | if (defined $::RD_TRACE |
| 3257 | && $::RD_TRACE =~ /\d+/ |
| 3258 | && $::RD_TRACE>1 |
| 3259 | && $::RD_TRACE+10<length($_[0])) |
| 3260 | { |
| 3261 | my $count = length($_[0]) - $::RD_TRACE; |
| 3262 | return substr($_[0],0,$::RD_TRACE) . "...<+$count>"; |
| 3263 | } |
| 3264 | else |
| 3265 | { |
| 3266 | return substr($_[0],0,500); |
| 3267 | } |
| 3268 | } |
| 3269 | |
| 3270 | my $lastcontext = ''; |
| 3271 | my $lastrulename = ''; |
| 3272 | my $lastlevel = ''; |
| 3273 | |
| 3274 | sub _trace($;$$$) |
| 3275 | { |
| 3276 | my $tracemsg = $_[0]; |
| 3277 | my $tracecontext = $_[1]||$lastcontext; |
| 3278 | my $tracerulename = $_[2]||$lastrulename; |
| 3279 | my $tracelevel = $_[3]||$lastlevel; |
| 3280 | if ($tracerulename) { $lastrulename = $tracerulename } |
| 3281 | if ($tracelevel) { $lastlevel = $tracelevel } |
| 3282 | |
| 3283 | $tracecontext =~ s/\n/\\n/g; |
| 3284 | $tracecontext =~ s/\s+/ /g; |
| 3285 | $tracerulename = qq{$tracerulename}; |
| 3286 | _write_TRACE($tracelevel, $tracerulename, $tracemsg); |
| 3287 | if ($tracecontext ne $lastcontext) |
| 3288 | { |
| 3289 | if ($tracecontext) |
| 3290 | { |
| 3291 | $lastcontext = _tracefirst($tracecontext); |
| 3292 | $tracecontext = qq{"$tracecontext"}; |
| 3293 | } |
| 3294 | else |
| 3295 | { |
| 3296 | $tracecontext = qq{<NO TEXT LEFT>}; |
| 3297 | } |
| 3298 | _write_TRACECONTEXT($tracelevel, $tracerulename, $tracecontext); |
| 3299 | } |
| 3300 | } |
| 3301 | |
| 3302 | sub _matchtracemessage |
| 3303 | { |
| 3304 | my ($self, $reject) = @_; |
| 3305 | |
| 3306 | my $prefix = ''; |
| 3307 | my $postfix = ''; |
| 3308 | my $matched = not $reject; |
| 3309 | my @t = ("Matched", "Didn't match"); |
| 3310 | if (exists $self->{lookahead} and $self->{lookahead}) |
| 3311 | { |
| 3312 | $postfix = $reject ? "(reject)" : "(keep)"; |
| 3313 | $prefix = "..."; |
| 3314 | if ($self->{lookahead} < 0) |
| 3315 | { |
| 3316 | $prefix .= '!'; |
| 3317 | $matched = not $matched; |
| 3318 | } |
| 3319 | } |
| 3320 | $prefix . ($matched ? $t[0] : $t[1]) . $postfix; |
| 3321 | } |
| 3322 | |
| 3323 | sub _parseunneg($$$$$) |
| 3324 | { |
| 3325 | _parse($_[0],$_[1],$_[3],$_[4]); |
| 3326 | if ($_[2]<0) |
| 3327 | { |
| 3328 | _error("Can't negate \"$_[4]\".",$_[3]); |
| 3329 | _hint("You can't negate $_[0]. Remove the \"...!\" before |
| 3330 | \"$_[4]\"."); |
| 3331 | return 0; |
| 3332 | } |
| 3333 | return 1; |
| 3334 | } |
| 3335 | |
| 3336 | sub _parse($$$$) |
| 3337 | { |
| 3338 | my $what = $_[3]; |
| 3339 | $what =~ s/^\s+//; |
| 3340 | if ($_[1]) |
| 3341 | { |
| 3342 | _warn(3,"Found $_[0] ($what) after an unconditional <error>",$_[2]) |
| 3343 | and |
| 3344 | _hint("An unconditional <error> always causes the |
| 3345 | production containing it to immediately fail. |
| 3346 | \u$_[0] that follows an <error> |
| 3347 | will never be reached. Did you mean to use |
| 3348 | <error?> instead?"); |
| 3349 | } |
| 3350 | |
| 3351 | return if ! _verbosity("TRACE"); |
| 3352 | my $errortext = "Treating \"$what\" as $_[0]"; |
| 3353 | my $errorprefix = "Parse::RecDescent"; |
| 3354 | $errortext =~ s/\s+/ /g; |
| 3355 | _write_ERROR($errorprefix, $errortext); |
| 3356 | } |
| 3357 | |
| 3358 | sub _linecount($) { |
| 3359 | scalar substr($_[0], pos $_[0]||0) =~ tr/\n// |
| 3360 | } |
| 3361 | |
| 3362 | |
| 3363 | package main; |
| 3364 | |
| 3365 | use vars qw ( $RD_ERRORS $RD_WARN $RD_HINT $RD_TRACE $RD_CHECK ); |
| 3366 | $::RD_CHECK = 1; |
| 3367 | $::RD_ERRORS = 1; |
| 3368 | $::RD_WARN = 3; |
| 3369 | |
| 3370 | 1; |
| 3371 | |
| 3372 | __END__ |
| 3373 | |
| 3374 | =head1 NAME |
| 3375 | |
| 3376 | Parse::RecDescent - Generate Recursive-Descent Parsers |
| 3377 | |
| 3378 | =head1 VERSION |
| 3379 | |
| 3380 | This document describes version 1.967006 of Parse::RecDescent |
| 3381 | released January 29th, 2012. |
| 3382 | |
| 3383 | =head1 SYNOPSIS |
| 3384 | |
| 3385 | use Parse::RecDescent; |
| 3386 | |
| 3387 | # Generate a parser from the specification in $grammar: |
| 3388 | |
| 3389 | $parser = new Parse::RecDescent ($grammar); |
| 3390 | |
| 3391 | # Generate a parser from the specification in $othergrammar |
| 3392 | |
| 3393 | $anotherparser = new Parse::RecDescent ($othergrammar); |
| 3394 | |
| 3395 | |
| 3396 | # Parse $text using rule 'startrule' (which must be |
| 3397 | # defined in $grammar): |
| 3398 | |
| 3399 | $parser->startrule($text); |
| 3400 | |
| 3401 | |
| 3402 | # Parse $text using rule 'otherrule' (which must also |
| 3403 | # be defined in $grammar): |
| 3404 | |
| 3405 | $parser->otherrule($text); |
| 3406 | |
| 3407 | |
| 3408 | # Change the universal token prefix pattern |
| 3409 | # before building a grammar |
| 3410 | # (the default is: '\s*'): |
| 3411 | |
| 3412 | $Parse::RecDescent::skip = '[ \t]+'; |
| 3413 | |
| 3414 | |
| 3415 | # Replace productions of existing rules (or create new ones) |
| 3416 | # with the productions defined in $newgrammar: |
| 3417 | |
| 3418 | $parser->Replace($newgrammar); |
| 3419 | |
| 3420 | |
| 3421 | # Extend existing rules (or create new ones) |
| 3422 | # by adding extra productions defined in $moregrammar: |
| 3423 | |
| 3424 | $parser->Extend($moregrammar); |
| 3425 | |
| 3426 | |
| 3427 | # Global flags (useful as command line arguments under -s): |
| 3428 | |
| 3429 | $::RD_ERRORS # unless undefined, report fatal errors |
| 3430 | $::RD_WARN # unless undefined, also report non-fatal problems |
| 3431 | $::RD_HINT # if defined, also suggestion remedies |
| 3432 | $::RD_TRACE # if defined, also trace parsers' behaviour |
| 3433 | $::RD_AUTOSTUB # if defined, generates "stubs" for undefined rules |
| 3434 | $::RD_AUTOACTION # if defined, appends specified action to productions |
| 3435 | |
| 3436 | |
| 3437 | =head1 DESCRIPTION |
| 3438 | |
| 3439 | =head2 Overview |
| 3440 | |
| 3441 | Parse::RecDescent incrementally generates top-down recursive-descent text |
| 3442 | parsers from simple I<yacc>-like grammar specifications. It provides: |
| 3443 | |
| 3444 | =over 4 |
| 3445 | |
| 3446 | =item * |
| 3447 | |
| 3448 | Regular expressions or literal strings as terminals (tokens), |
| 3449 | |
| 3450 | =item * |
| 3451 | |
| 3452 | Multiple (non-contiguous) productions for any rule, |
| 3453 | |
| 3454 | =item * |
| 3455 | |
| 3456 | Repeated and optional subrules within productions, |
| 3457 | |
| 3458 | =item * |
| 3459 | |
| 3460 | Full access to Perl within actions specified as part of the grammar, |
| 3461 | |
| 3462 | =item * |
| 3463 | |
| 3464 | Simple automated error reporting during parser generation and parsing, |
| 3465 | |
| 3466 | =item * |
| 3467 | |
| 3468 | The ability to commit to, uncommit to, or reject particular |
| 3469 | productions during a parse, |
| 3470 | |
| 3471 | =item * |
| 3472 | |
| 3473 | The ability to pass data up and down the parse tree ("down" via subrule |
| 3474 | argument lists, "up" via subrule return values) |
| 3475 | |
| 3476 | =item * |
| 3477 | |
| 3478 | Incremental extension of the parsing grammar (even during a parse), |
| 3479 | |
| 3480 | =item * |
| 3481 | |
| 3482 | Precompilation of parser objects, |
| 3483 | |
| 3484 | =item * |
| 3485 | |
| 3486 | User-definable reduce-reduce conflict resolution via |
| 3487 | "scoring" of matching productions. |
| 3488 | |
| 3489 | =back |
| 3490 | |
| 3491 | =head2 Using C<Parse::RecDescent> |
| 3492 | |
| 3493 | Parser objects are created by calling C<Parse::RecDescent::new>, passing in a |
| 3494 | grammar specification (see the following subsections). If the grammar is |
| 3495 | correct, C<new> returns a blessed reference which can then be used to initiate |
| 3496 | parsing through any rule specified in the original grammar. A typical sequence |
| 3497 | looks like this: |
| 3498 | |
| 3499 | $grammar = q { |
| 3500 | # GRAMMAR SPECIFICATION HERE |
| 3501 | }; |
| 3502 | |
| 3503 | $parser = new Parse::RecDescent ($grammar) or die "Bad grammar!\n"; |
| 3504 | |
| 3505 | # acquire $text |
| 3506 | |
| 3507 | defined $parser->startrule($text) or print "Bad text!\n"; |
| 3508 | |
| 3509 | The rule through which parsing is initiated must be explicitly defined |
| 3510 | in the grammar (i.e. for the above example, the grammar must include a |
| 3511 | rule of the form: "startrule: <subrules>". |
| 3512 | |
| 3513 | If the starting rule succeeds, its value (see below) |
| 3514 | is returned. Failure to generate the original parser or failure to match a text |
| 3515 | is indicated by returning C<undef>. Note that it's easy to set up grammars |
| 3516 | that can succeed, but which return a value of 0, "0", or "". So don't be |
| 3517 | tempted to write: |
| 3518 | |
| 3519 | $parser->startrule($text) or print "Bad text!\n"; |
| 3520 | |
| 3521 | Normally, the parser has no effect on the original text. So in the |
| 3522 | previous example the value of $text would be unchanged after having |
| 3523 | been parsed. |
| 3524 | |
| 3525 | If, however, the text to be matched is passed by reference: |
| 3526 | |
| 3527 | $parser->startrule(\$text) |
| 3528 | |
| 3529 | then any text which was consumed during the match will be removed from the |
| 3530 | start of $text. |
| 3531 | |
| 3532 | |
| 3533 | =head2 Rules |
| 3534 | |
| 3535 | In the grammar from which the parser is built, rules are specified by |
| 3536 | giving an identifier (which must satisfy /[A-Za-z]\w*/), followed by a |
| 3537 | colon I<on the same line>, followed by one or more productions, |
| 3538 | separated by single vertical bars. The layout of the productions |
| 3539 | is entirely free-format: |
| 3540 | |
| 3541 | rule1: production1 |
| 3542 | | production2 | |
| 3543 | production3 | production4 |
| 3544 | |
| 3545 | At any point in the grammar previously defined rules may be extended with |
| 3546 | additional productions. This is achieved by redeclaring the rule with the new |
| 3547 | productions. Thus: |
| 3548 | |
| 3549 | rule1: a | b | c |
| 3550 | rule2: d | e | f |
| 3551 | rule1: g | h |
| 3552 | |
| 3553 | is exactly equivalent to: |
| 3554 | |
| 3555 | rule1: a | b | c | g | h |
| 3556 | rule2: d | e | f |
| 3557 | |
| 3558 | Each production in a rule consists of zero or more items, each of which |
| 3559 | may be either: the name of another rule to be matched (a "subrule"), |
| 3560 | a pattern or string literal to be matched directly (a "token"), a |
| 3561 | block of Perl code to be executed (an "action"), a special instruction |
| 3562 | to the parser (a "directive"), or a standard Perl comment (which is |
| 3563 | ignored). |
| 3564 | |
| 3565 | A rule matches a text if one of its productions matches. A production |
| 3566 | matches if each of its items match consecutive substrings of the |
| 3567 | text. The productions of a rule being matched are tried in the same |
| 3568 | order that they appear in the original grammar, and the first matching |
| 3569 | production terminates the match attempt (successfully). If all |
| 3570 | productions are tried and none matches, the match attempt fails. |
| 3571 | |
| 3572 | Note that this behaviour is quite different from the "prefer the longer match" |
| 3573 | behaviour of I<yacc>. For example, if I<yacc> were parsing the rule: |
| 3574 | |
| 3575 | seq : 'A' 'B' |
| 3576 | | 'A' 'B' 'C' |
| 3577 | |
| 3578 | upon matching "AB" it would look ahead to see if a 'C' is next and, if |
| 3579 | so, will match the second production in preference to the first. In |
| 3580 | other words, I<yacc> effectively tries all the productions of a rule |
| 3581 | breadth-first in parallel, and selects the "best" match, where "best" |
| 3582 | means longest (note that this is a gross simplification of the true |
| 3583 | behaviour of I<yacc> but it will do for our purposes). |
| 3584 | |
| 3585 | In contrast, C<Parse::RecDescent> tries each production depth-first in |
| 3586 | sequence, and selects the "best" match, where "best" means first. This is |
| 3587 | the fundamental difference between "bottom-up" and "recursive descent" |
| 3588 | parsing. |
| 3589 | |
| 3590 | Each successfully matched item in a production is assigned a value, |
| 3591 | which can be accessed in subsequent actions within the same |
| 3592 | production (or, in some cases, as the return value of a successful |
| 3593 | subrule call). Unsuccessful items don't have an associated value, |
| 3594 | since the failure of an item causes the entire surrounding production |
| 3595 | to immediately fail. The following sections describe the various types |
| 3596 | of items and their success values. |
| 3597 | |
| 3598 | |
| 3599 | =head2 Subrules |
| 3600 | |
| 3601 | A subrule which appears in a production is an instruction to the parser to |
| 3602 | attempt to match the named rule at that point in the text being |
| 3603 | parsed. If the named subrule is not defined when requested the |
| 3604 | production containing it immediately fails (unless it was "autostubbed" - see |
| 3605 | L<Autostubbing>). |
| 3606 | |
| 3607 | A rule may (recursively) call itself as a subrule, but I<not> as the |
| 3608 | left-most item in any of its productions (since such recursions are usually |
| 3609 | non-terminating). |
| 3610 | |
| 3611 | The value associated with a subrule is the value associated with its |
| 3612 | C<$return> variable (see L<"Actions"> below), or with the last successfully |
| 3613 | matched item in the subrule match. |
| 3614 | |
| 3615 | Subrules may also be specified with a trailing repetition specifier, |
| 3616 | indicating that they are to be (greedily) matched the specified number |
| 3617 | of times. The available specifiers are: |
| 3618 | |
| 3619 | subrule(?) # Match one-or-zero times |
| 3620 | subrule(s) # Match one-or-more times |
| 3621 | subrule(s?) # Match zero-or-more times |
| 3622 | subrule(N) # Match exactly N times for integer N > 0 |
| 3623 | subrule(N..M) # Match between N and M times |
| 3624 | subrule(..M) # Match between 1 and M times |
| 3625 | subrule(N..) # Match at least N times |
| 3626 | |
| 3627 | Repeated subrules keep matching until either the subrule fails to |
| 3628 | match, or it has matched the minimal number of times but fails to |
| 3629 | consume any of the parsed text (this second condition prevents the |
| 3630 | subrule matching forever in some cases). |
| 3631 | |
| 3632 | Since a repeated subrule may match many instances of the subrule itself, the |
| 3633 | value associated with it is not a simple scalar, but rather a reference to a |
| 3634 | list of scalars, each of which is the value associated with one of the |
| 3635 | individual subrule matches. In other words in the rule: |
| 3636 | |
| 3637 | program: statement(s) |
| 3638 | |
| 3639 | the value associated with the repeated subrule "statement(s)" is a reference |
| 3640 | to an array containing the values matched by each call to the individual |
| 3641 | subrule "statement". |
| 3642 | |
| 3643 | Repetition modifiers may include a separator pattern: |
| 3644 | |
| 3645 | program: statement(s /;/) |
| 3646 | |
| 3647 | specifying some sequence of characters to be skipped between each repetition. |
| 3648 | This is really just a shorthand for the E<lt>leftop:...E<gt> directive |
| 3649 | (see below). |
| 3650 | |
| 3651 | =head2 Tokens |
| 3652 | |
| 3653 | If a quote-delimited string or a Perl regex appears in a production, |
| 3654 | the parser attempts to match that string or pattern at that point in |
| 3655 | the text. For example: |
| 3656 | |
| 3657 | typedef: "typedef" typename identifier ';' |
| 3658 | |
| 3659 | identifier: /[A-Za-z_][A-Za-z0-9_]*/ |
| 3660 | |
| 3661 | As in regular Perl, a single quoted string is uninterpolated, whilst |
| 3662 | a double-quoted string or a pattern is interpolated (at the time |
| 3663 | of matching, I<not> when the parser is constructed). Hence, it is |
| 3664 | possible to define rules in which tokens can be set at run-time: |
| 3665 | |
| 3666 | typedef: "$::typedefkeyword" typename identifier ';' |
| 3667 | |
| 3668 | identifier: /$::identpat/ |
| 3669 | |
| 3670 | Note that, since each rule is implemented inside a special namespace |
| 3671 | belonging to its parser, it is necessary to explicitly quantify |
| 3672 | variables from the main package. |
| 3673 | |
| 3674 | Regex tokens can be specified using just slashes as delimiters |
| 3675 | or with the explicit C<mE<lt>delimiterE<gt>......E<lt>delimiterE<gt>> syntax: |
| 3676 | |
| 3677 | typedef: "typedef" typename identifier ';' |
| 3678 | |
| 3679 | typename: /[A-Za-z_][A-Za-z0-9_]*/ |
| 3680 | |
| 3681 | identifier: m{[A-Za-z_][A-Za-z0-9_]*} |
| 3682 | |
| 3683 | A regex of either type can also have any valid trailing parameter(s) |
| 3684 | (that is, any of [cgimsox]): |
| 3685 | |
| 3686 | typedef: "typedef" typename identifier ';' |
| 3687 | |
| 3688 | identifier: / [a-z_] # LEADING ALPHA OR UNDERSCORE |
| 3689 | [a-z0-9_]* # THEN DIGITS ALSO ALLOWED |
| 3690 | /ix # CASE/SPACE/COMMENT INSENSITIVE |
| 3691 | |
| 3692 | The value associated with any successfully matched token is a string |
| 3693 | containing the actual text which was matched by the token. |
| 3694 | |
| 3695 | It is important to remember that, since each grammar is specified in a |
| 3696 | Perl string, all instances of the universal escape character '\' within |
| 3697 | a grammar must be "doubled", so that they interpolate to single '\'s when |
| 3698 | the string is compiled. For example, to use the grammar: |
| 3699 | |
| 3700 | word: /\S+/ | backslash |
| 3701 | line: prefix word(s) "\n" |
| 3702 | backslash: '\\' |
| 3703 | |
| 3704 | the following code is required: |
| 3705 | |
| 3706 | $parser = new Parse::RecDescent (q{ |
| 3707 | |
| 3708 | word: /\\S+/ | backslash |
| 3709 | line: prefix word(s) "\\n" |
| 3710 | backslash: '\\\\' |
| 3711 | |
| 3712 | }); |
| 3713 | |
| 3714 | =head2 Anonymous subrules |
| 3715 | |
| 3716 | Parentheses introduce a nested scope that is very like a call to an anonymous |
| 3717 | subrule. Hence they are useful for "in-lining" subroutine calls, and other |
| 3718 | kinds of grouping behaviour. For example, instead of: |
| 3719 | |
| 3720 | word: /\S+/ | backslash |
| 3721 | line: prefix word(s) "\n" |
| 3722 | |
| 3723 | you could write: |
| 3724 | |
| 3725 | line: prefix ( /\S+/ | backslash )(s) "\n" |
| 3726 | |
| 3727 | and get exactly the same effects. |
| 3728 | |
| 3729 | Parentheses are also use for collecting unrepeated alternations within a |
| 3730 | single production. |
| 3731 | |
| 3732 | secret_identity: "Mr" ("Incredible"|"Fantastic"|"Sheen") ", Esq." |
| 3733 | |
| 3734 | |
| 3735 | =head2 Terminal Separators |
| 3736 | |
| 3737 | For the purpose of matching, each terminal in a production is considered |
| 3738 | to be preceded by a "prefix" - a pattern which must be |
| 3739 | matched before a token match is attempted. By default, the |
| 3740 | prefix is optional whitespace (which always matches, at |
| 3741 | least trivially), but this default may be reset in any production. |
| 3742 | |
| 3743 | The variable C<$Parse::RecDescent::skip> stores the universal |
| 3744 | prefix, which is the default for all terminal matches in all parsers |
| 3745 | built with C<Parse::RecDescent>. |
| 3746 | |
| 3747 | If you want to change the universal prefix using |
| 3748 | C<$Parse::RecDescent::skip>, be careful to set it I<before> creating |
| 3749 | the grammar object, because it is applied statically (when a grammar |
| 3750 | is built) rather than dynamically (when the grammar is used). |
| 3751 | Alternatively you can provide a global C<E<lt>skip:...E<gt>> directive |
| 3752 | in your grammar before any rules (described later). |
| 3753 | |
| 3754 | The prefix for an individual production can be altered |
| 3755 | by using the C<E<lt>skip:...E<gt>> directive (described later). |
| 3756 | Setting this directive in the top-level rule is an alternative approach |
| 3757 | to setting C<$Parse::RecDescent::skip> before creating the object, but |
| 3758 | in this case you don't get the intended skipping behaviour if you |
| 3759 | directly invoke methods different from the top-level rule. |
| 3760 | |
| 3761 | |
| 3762 | =head2 Actions |
| 3763 | |
| 3764 | An action is a block of Perl code which is to be executed (as the |
| 3765 | block of a C<do> statement) when the parser reaches that point in a |
| 3766 | production. The action executes within a special namespace belonging to |
| 3767 | the active parser, so care must be taken in correctly qualifying variable |
| 3768 | names (see also L<Start-up Actions> below). |
| 3769 | |
| 3770 | The action is considered to succeed if the final value of the block |
| 3771 | is defined (that is, if the implied C<do> statement evaluates to a |
| 3772 | defined value - I<even one which would be treated as "false">). Note |
| 3773 | that the value associated with a successful action is also the final |
| 3774 | value in the block. |
| 3775 | |
| 3776 | An action will I<fail> if its last evaluated value is C<undef>. This is |
| 3777 | surprisingly easy to accomplish by accident. For instance, here's an |
| 3778 | infuriating case of an action that makes its production fail, but only |
| 3779 | when debugging I<isn't> activated: |
| 3780 | |
| 3781 | description: name rank serial_number |
| 3782 | { print "Got $item[2] $item[1] ($item[3])\n" |
| 3783 | if $::debugging |
| 3784 | } |
| 3785 | |
| 3786 | If C<$debugging> is false, no statement in the block is executed, so |
| 3787 | the final value is C<undef>, and the entire production fails. The solution is: |
| 3788 | |
| 3789 | description: name rank serial_number |
| 3790 | { print "Got $item[2] $item[1] ($item[3])\n" |
| 3791 | if $::debugging; |
| 3792 | 1; |
| 3793 | } |
| 3794 | |
| 3795 | Within an action, a number of useful parse-time variables are |
| 3796 | available in the special parser namespace (there are other variables |
| 3797 | also accessible, but meddling with them will probably just break your |
| 3798 | parser. As a general rule, if you avoid referring to unqualified |
| 3799 | variables - especially those starting with an underscore - inside an action, |
| 3800 | things should be okay): |
| 3801 | |
| 3802 | =over 4 |
| 3803 | |
| 3804 | =item C<@item> and C<%item> |
| 3805 | |
| 3806 | The array slice C<@item[1..$#item]> stores the value associated with each item |
| 3807 | (that is, each subrule, token, or action) in the current production. The |
| 3808 | analogy is to C<$1>, C<$2>, etc. in a I<yacc> grammar. |
| 3809 | Note that, for obvious reasons, C<@item> only contains the |
| 3810 | values of items I<before> the current point in the production. |
| 3811 | |
| 3812 | The first element (C<$item[0]>) stores the name of the current rule |
| 3813 | being matched. |
| 3814 | |
| 3815 | C<@item> is a standard Perl array, so it can also be indexed with negative |
| 3816 | numbers, representing the number of items I<back> from the current position in |
| 3817 | the parse: |
| 3818 | |
| 3819 | stuff: /various/ bits 'and' pieces "then" data 'end' |
| 3820 | { print $item[-2] } # PRINTS data |
| 3821 | # (EASIER THAN: $item[6]) |
| 3822 | |
| 3823 | The C<%item> hash complements the <@item> array, providing named |
| 3824 | access to the same item values: |
| 3825 | |
| 3826 | stuff: /various/ bits 'and' pieces "then" data 'end' |
| 3827 | { print $item{data} # PRINTS data |
| 3828 | # (EVEN EASIER THAN USING @item) |
| 3829 | |
| 3830 | |
| 3831 | The results of named subrules are stored in the hash under each |
| 3832 | subrule's name (including the repetition specifier, if any), |
| 3833 | whilst all other items are stored under a "named |
| 3834 | positional" key that indictates their ordinal position within their item |
| 3835 | type: __STRINGI<n>__, __PATTERNI<n>__, __DIRECTIVEI<n>__, __ACTIONI<n>__: |
| 3836 | |
| 3837 | stuff: /various/ bits 'and' pieces "then" data 'end' { save } |
| 3838 | { print $item{__PATTERN1__}, # PRINTS 'various' |
| 3839 | $item{__STRING2__}, # PRINTS 'then' |
| 3840 | $item{__ACTION1__}, # PRINTS RETURN |
| 3841 | # VALUE OF save |
| 3842 | } |
| 3843 | |
| 3844 | |
| 3845 | If you want proper I<named> access to patterns or literals, you need to turn |
| 3846 | them into separate rules: |
| 3847 | |
| 3848 | stuff: various bits 'and' pieces "then" data 'end' |
| 3849 | { print $item{various} # PRINTS various |
| 3850 | } |
| 3851 | |
| 3852 | various: /various/ |
| 3853 | |
| 3854 | |
| 3855 | The special entry C<$item{__RULE__}> stores the name of the current |
| 3856 | rule (i.e. the same value as C<$item[0]>. |
| 3857 | |
| 3858 | The advantage of using C<%item>, instead of C<@items> is that it |
| 3859 | removes the need to track items positions that may change as a grammar |
| 3860 | evolves. For example, adding an interim C<E<lt>skipE<gt>> directive |
| 3861 | of action can silently ruin a trailing action, by moving an C<@item> |
| 3862 | element "down" the array one place. In contrast, the named entry |
| 3863 | of C<%item> is unaffected by such an insertion. |
| 3864 | |
| 3865 | A limitation of the C<%item> hash is that it only records the I<last> |
| 3866 | value of a particular subrule. For example: |
| 3867 | |
| 3868 | range: '(' number '..' number )' |
| 3869 | { $return = $item{number} } |
| 3870 | |
| 3871 | will return only the value corresponding to the I<second> match of the |
| 3872 | C<number> subrule. In other words, successive calls to a subrule |
| 3873 | overwrite the corresponding entry in C<%item>. Once again, the |
| 3874 | solution is to rename each subrule in its own rule: |
| 3875 | |
| 3876 | range: '(' from_num '..' to_num ')' |
| 3877 | { $return = $item{from_num} } |
| 3878 | |
| 3879 | from_num: number |
| 3880 | to_num: number |
| 3881 | |
| 3882 | |
| 3883 | |
| 3884 | =item C<@arg> and C<%arg> |
| 3885 | |
| 3886 | The array C<@arg> and the hash C<%arg> store any arguments passed to |
| 3887 | the rule from some other rule (see L<Subrule argument lists>). Changes |
| 3888 | to the elements of either variable do not propagate back to the calling |
| 3889 | rule (data can be passed back from a subrule via the C<$return> |
| 3890 | variable - see next item). |
| 3891 | |
| 3892 | |
| 3893 | =item C<$return> |
| 3894 | |
| 3895 | If a value is assigned to C<$return> within an action, that value is |
| 3896 | returned if the production containing the action eventually matches |
| 3897 | successfully. Note that setting C<$return> I<doesn't> cause the current |
| 3898 | production to succeed. It merely tells it what to return if it I<does> succeed. |
| 3899 | Hence C<$return> is analogous to C<$$> in a I<yacc> grammar. |
| 3900 | |
| 3901 | If C<$return> is not assigned within a production, the value of the |
| 3902 | last component of the production (namely: C<$item[$#item]>) is |
| 3903 | returned if the production succeeds. |
| 3904 | |
| 3905 | |
| 3906 | =item C<$commit> |
| 3907 | |
| 3908 | The current state of commitment to the current production (see L<"Directives"> |
| 3909 | below). |
| 3910 | |
| 3911 | =item C<$skip> |
| 3912 | |
| 3913 | The current terminal prefix (see L<"Directives"> below). |
| 3914 | |
| 3915 | =item C<$text> |
| 3916 | |
| 3917 | The remaining (unparsed) text. Changes to C<$text> I<do not |
| 3918 | propagate> out of unsuccessful productions, but I<do> survive |
| 3919 | successful productions. Hence it is possible to dynamically alter the |
| 3920 | text being parsed - for example, to provide a C<#include>-like facility: |
| 3921 | |
| 3922 | hash_include: '#include' filename |
| 3923 | { $text = ::loadfile($item[2]) . $text } |
| 3924 | |
| 3925 | filename: '<' /[a-z0-9._-]+/i '>' { $return = $item[2] } |
| 3926 | | '"' /[a-z0-9._-]+/i '"' { $return = $item[2] } |
| 3927 | |
| 3928 | |
| 3929 | =item C<$thisline> and C<$prevline> |
| 3930 | |
| 3931 | C<$thisline> stores the current line number within the current parse |
| 3932 | (starting from 1). C<$prevline> stores the line number for the last |
| 3933 | character which was already successfully parsed (this will be different from |
| 3934 | C<$thisline> at the end of each line). |
| 3935 | |
| 3936 | For efficiency, C<$thisline> and C<$prevline> are actually tied |
| 3937 | hashes, and only recompute the required line number when the variable's |
| 3938 | value is used. |
| 3939 | |
| 3940 | Assignment to C<$thisline> adjusts the line number calculator, so that |
| 3941 | it believes that the current line number is the value being assigned. Note |
| 3942 | that this adjustment will be reflected in all subsequent line numbers |
| 3943 | calculations. |
| 3944 | |
| 3945 | Modifying the value of the variable C<$text> (as in the previous |
| 3946 | C<hash_include> example, for instance) will confuse the line |
| 3947 | counting mechanism. To prevent this, you should call |
| 3948 | C<Parse::RecDescent::LineCounter::resync($thisline)> I<immediately> |
| 3949 | after any assignment to the variable C<$text> (or, at least, before the |
| 3950 | next attempt to use C<$thisline>). |
| 3951 | |
| 3952 | Note that if a production fails after assigning to or |
| 3953 | resync'ing C<$thisline>, the parser's line counter mechanism will |
| 3954 | usually be corrupted. |
| 3955 | |
| 3956 | Also see the entry for C<@itempos>. |
| 3957 | |
| 3958 | The line number can be set to values other than 1, by calling the start |
| 3959 | rule with a second argument. For example: |
| 3960 | |
| 3961 | $parser = new Parse::RecDescent ($grammar); |
| 3962 | |
| 3963 | $parser->input($text, 10); # START LINE NUMBERS AT 10 |
| 3964 | |
| 3965 | |
| 3966 | =item C<$thiscolumn> and C<$prevcolumn> |
| 3967 | |
| 3968 | C<$thiscolumn> stores the current column number within the current line |
| 3969 | being parsed (starting from 1). C<$prevcolumn> stores the column number |
| 3970 | of the last character which was actually successfully parsed. Usually |
| 3971 | C<$prevcolumn == $thiscolumn-1>, but not at the end of lines. |
| 3972 | |
| 3973 | For efficiency, C<$thiscolumn> and C<$prevcolumn> are |
| 3974 | actually tied hashes, and only recompute the required column number |
| 3975 | when the variable's value is used. |
| 3976 | |
| 3977 | Assignment to C<$thiscolumn> or C<$prevcolumn> is a fatal error. |
| 3978 | |
| 3979 | Modifying the value of the variable C<$text> (as in the previous |
| 3980 | C<hash_include> example, for instance) may confuse the column |
| 3981 | counting mechanism. |
| 3982 | |
| 3983 | Note that C<$thiscolumn> reports the column number I<before> any |
| 3984 | whitespace that might be skipped before reading a token. Hence |
| 3985 | if you wish to know where a token started (and ended) use something like this: |
| 3986 | |
| 3987 | rule: token1 token2 startcol token3 endcol token4 |
| 3988 | { print "token3: columns $item[3] to $item[5]"; } |
| 3989 | |
| 3990 | startcol: '' { $thiscolumn } # NEED THE '' TO STEP PAST TOKEN SEP |
| 3991 | endcol: { $prevcolumn } |
| 3992 | |
| 3993 | Also see the entry for C<@itempos>. |
| 3994 | |
| 3995 | =item C<$thisoffset> and C<$prevoffset> |
| 3996 | |
| 3997 | C<$thisoffset> stores the offset of the current parsing position |
| 3998 | within the complete text |
| 3999 | being parsed (starting from 0). C<$prevoffset> stores the offset |
| 4000 | of the last character which was actually successfully parsed. In all |
| 4001 | cases C<$prevoffset == $thisoffset-1>. |
| 4002 | |
| 4003 | For efficiency, C<$thisoffset> and C<$prevoffset> are |
| 4004 | actually tied hashes, and only recompute the required offset |
| 4005 | when the variable's value is used. |
| 4006 | |
| 4007 | Assignment to C<$thisoffset> or <$prevoffset> is a fatal error. |
| 4008 | |
| 4009 | Modifying the value of the variable C<$text> will I<not> affect the |
| 4010 | offset counting mechanism. |
| 4011 | |
| 4012 | Also see the entry for C<@itempos>. |
| 4013 | |
| 4014 | =item C<@itempos> |
| 4015 | |
| 4016 | The array C<@itempos> stores a hash reference corresponding to |
| 4017 | each element of C<@item>. The elements of the hash provide the |
| 4018 | following: |
| 4019 | |
| 4020 | $itempos[$n]{offset}{from} # VALUE OF $thisoffset BEFORE $item[$n] |
| 4021 | $itempos[$n]{offset}{to} # VALUE OF $prevoffset AFTER $item[$n] |
| 4022 | $itempos[$n]{line}{from} # VALUE OF $thisline BEFORE $item[$n] |
| 4023 | $itempos[$n]{line}{to} # VALUE OF $prevline AFTER $item[$n] |
| 4024 | $itempos[$n]{column}{from} # VALUE OF $thiscolumn BEFORE $item[$n] |
| 4025 | $itempos[$n]{column}{to} # VALUE OF $prevcolumn AFTER $item[$n] |
| 4026 | |
| 4027 | Note that the various C<$itempos[$n]...{from}> values record the |
| 4028 | appropriate value I<after> any token prefix has been skipped. |
| 4029 | |
| 4030 | Hence, instead of the somewhat tedious and error-prone: |
| 4031 | |
| 4032 | rule: startcol token1 endcol |
| 4033 | startcol token2 endcol |
| 4034 | startcol token3 endcol |
| 4035 | { print "token1: columns $item[1] |
| 4036 | to $item[3] |
| 4037 | token2: columns $item[4] |
| 4038 | to $item[6] |
| 4039 | token3: columns $item[7] |
| 4040 | to $item[9]" } |
| 4041 | |
| 4042 | startcol: '' { $thiscolumn } # NEED THE '' TO STEP PAST TOKEN SEP |
| 4043 | endcol: { $prevcolumn } |
| 4044 | |
| 4045 | it is possible to write: |
| 4046 | |
| 4047 | rule: token1 token2 token3 |
| 4048 | { print "token1: columns $itempos[1]{column}{from} |
| 4049 | to $itempos[1]{column}{to} |
| 4050 | token2: columns $itempos[2]{column}{from} |
| 4051 | to $itempos[2]{column}{to} |
| 4052 | token3: columns $itempos[3]{column}{from} |
| 4053 | to $itempos[3]{column}{to}" } |
| 4054 | |
| 4055 | Note however that (in the current implementation) the use of C<@itempos> |
| 4056 | anywhere in a grammar implies that item positioning information is |
| 4057 | collected I<everywhere> during the parse. Depending on the grammar |
| 4058 | and the size of the text to be parsed, this may be prohibitively |
| 4059 | expensive and the explicit use of C<$thisline>, C<$thiscolumn>, etc. may |
| 4060 | be a better choice. |
| 4061 | |
| 4062 | |
| 4063 | =item C<$thisparser> |
| 4064 | |
| 4065 | A reference to the S<C<Parse::RecDescent>> object through which |
| 4066 | parsing was initiated. |
| 4067 | |
| 4068 | The value of C<$thisparser> propagates down the subrules of a parse |
| 4069 | but not back up. Hence, you can invoke subrules from another parser |
| 4070 | for the scope of the current rule as follows: |
| 4071 | |
| 4072 | rule: subrule1 subrule2 |
| 4073 | | { $thisparser = $::otherparser } <reject> |
| 4074 | | subrule3 subrule4 |
| 4075 | | subrule5 |
| 4076 | |
| 4077 | The result is that the production calls "subrule1" and "subrule2" of |
| 4078 | the current parser, and the remaining productions call the named subrules |
| 4079 | from C<$::otherparser>. Note, however that "Bad Things" will happen if |
| 4080 | C<::otherparser> isn't a blessed reference and/or doesn't have methods |
| 4081 | with the same names as the required subrules! |
| 4082 | |
| 4083 | =item C<$thisrule> |
| 4084 | |
| 4085 | A reference to the S<C<Parse::RecDescent::Rule>> object corresponding to the |
| 4086 | rule currently being matched. |
| 4087 | |
| 4088 | =item C<$thisprod> |
| 4089 | |
| 4090 | A reference to the S<C<Parse::RecDescent::Production>> object |
| 4091 | corresponding to the production currently being matched. |
| 4092 | |
| 4093 | =item C<$score> and C<$score_return> |
| 4094 | |
| 4095 | $score stores the best production score to date, as specified by |
| 4096 | an earlier C<E<lt>score:...E<gt>> directive. $score_return stores |
| 4097 | the corresponding return value for the successful production. |
| 4098 | |
| 4099 | See L<Scored productions>. |
| 4100 | |
| 4101 | =back |
| 4102 | |
| 4103 | B<Warning:> the parser relies on the information in the various C<this...> |
| 4104 | objects in some non-obvious ways. Tinkering with the other members of |
| 4105 | these objects will probably cause Bad Things to happen, unless you |
| 4106 | I<really> know what you're doing. The only exception to this advice is |
| 4107 | that the use of C<$this...-E<gt>{local}> is always safe. |
| 4108 | |
| 4109 | |
| 4110 | =head2 Start-up Actions |
| 4111 | |
| 4112 | Any actions which appear I<before> the first rule definition in a |
| 4113 | grammar are treated as "start-up" actions. Each such action is |
| 4114 | stripped of its outermost brackets and then evaluated (in the parser's |
| 4115 | special namespace) just before the rules of the grammar are first |
| 4116 | compiled. |
| 4117 | |
| 4118 | The main use of start-up actions is to declare local variables within the |
| 4119 | parser's special namespace: |
| 4120 | |
| 4121 | { my $lastitem = '???'; } |
| 4122 | |
| 4123 | list: item(s) { $return = $lastitem } |
| 4124 | |
| 4125 | item: book { $lastitem = 'book'; } |
| 4126 | bell { $lastitem = 'bell'; } |
| 4127 | candle { $lastitem = 'candle'; } |
| 4128 | |
| 4129 | but start-up actions can be used to execute I<any> valid Perl code |
| 4130 | within a parser's special namespace. |
| 4131 | |
| 4132 | Start-up actions can appear within a grammar extension or replacement |
| 4133 | (that is, a partial grammar installed via C<Parse::RecDescent::Extend()> or |
| 4134 | C<Parse::RecDescent::Replace()> - see L<Incremental Parsing>), and will be |
| 4135 | executed before the new grammar is installed. Note, however, that a |
| 4136 | particular start-up action is only ever executed once. |
| 4137 | |
| 4138 | |
| 4139 | =head2 Autoactions |
| 4140 | |
| 4141 | It is sometimes desirable to be able to specify a default action to be |
| 4142 | taken at the end of every production (for example, in order to easily |
| 4143 | build a parse tree). If the variable C<$::RD_AUTOACTION> is defined |
| 4144 | when C<Parse::RecDescent::new()> is called, the contents of that |
| 4145 | variable are treated as a specification of an action which is to appended |
| 4146 | to each production in the corresponding grammar. |
| 4147 | |
| 4148 | Alternatively, you can hard-code the autoaction within a grammar, using the |
| 4149 | C<< <autoaction:...> >> directive. |
| 4150 | |
| 4151 | So, for example, to construct a simple parse tree you could write: |
| 4152 | |
| 4153 | $::RD_AUTOACTION = q { [@item] }; |
| 4154 | |
| 4155 | parser = Parse::RecDescent->new(q{ |
| 4156 | expression: and_expr '||' expression | and_expr |
| 4157 | and_expr: not_expr '&&' and_expr | not_expr |
| 4158 | not_expr: '!' brack_expr | brack_expr |
| 4159 | brack_expr: '(' expression ')' | identifier |
| 4160 | identifier: /[a-z]+/i |
| 4161 | }); |
| 4162 | |
| 4163 | or: |
| 4164 | |
| 4165 | parser = Parse::RecDescent->new(q{ |
| 4166 | <autoaction: { [@item] } > |
| 4167 | |
| 4168 | expression: and_expr '||' expression | and_expr |
| 4169 | and_expr: not_expr '&&' and_expr | not_expr |
| 4170 | not_expr: '!' brack_expr | brack_expr |
| 4171 | brack_expr: '(' expression ')' | identifier |
| 4172 | identifier: /[a-z]+/i |
| 4173 | }); |
| 4174 | |
| 4175 | Either of these is equivalent to: |
| 4176 | |
| 4177 | parser = new Parse::RecDescent (q{ |
| 4178 | expression: and_expr '||' expression |
| 4179 | { [@item] } |
| 4180 | | and_expr |
| 4181 | { [@item] } |
| 4182 | |
| 4183 | and_expr: not_expr '&&' and_expr |
| 4184 | { [@item] } |
| 4185 | | not_expr |
| 4186 | { [@item] } |
| 4187 | |
| 4188 | not_expr: '!' brack_expr |
| 4189 | { [@item] } |
| 4190 | | brack_expr |
| 4191 | { [@item] } |
| 4192 | |
| 4193 | brack_expr: '(' expression ')' |
| 4194 | { [@item] } |
| 4195 | | identifier |
| 4196 | { [@item] } |
| 4197 | |
| 4198 | identifier: /[a-z]+/i |
| 4199 | { [@item] } |
| 4200 | }); |
| 4201 | |
| 4202 | Alternatively, we could take an object-oriented approach, use different |
| 4203 | classes for each node (and also eliminating redundant intermediate nodes): |
| 4204 | |
| 4205 | $::RD_AUTOACTION = q |
| 4206 | { $#item==1 ? $item[1] : "$item[0]_node"->new(@item[1..$#item]) }; |
| 4207 | |
| 4208 | parser = Parse::RecDescent->new(q{ |
| 4209 | expression: and_expr '||' expression | and_expr |
| 4210 | and_expr: not_expr '&&' and_expr | not_expr |
| 4211 | not_expr: '!' brack_expr | brack_expr |
| 4212 | brack_expr: '(' expression ')' | identifier |
| 4213 | identifier: /[a-z]+/i |
| 4214 | }); |
| 4215 | |
| 4216 | or: |
| 4217 | |
| 4218 | parser = Parse::RecDescent->new(q{ |
| 4219 | <autoaction: |
| 4220 | $#item==1 ? $item[1] : "$item[0]_node"->new(@item[1..$#item]) |
| 4221 | > |
| 4222 | |
| 4223 | expression: and_expr '||' expression | and_expr |
| 4224 | and_expr: not_expr '&&' and_expr | not_expr |
| 4225 | not_expr: '!' brack_expr | brack_expr |
| 4226 | brack_expr: '(' expression ')' | identifier |
| 4227 | identifier: /[a-z]+/i |
| 4228 | }); |
| 4229 | |
| 4230 | which are equivalent to: |
| 4231 | |
| 4232 | parser = Parse::RecDescent->new(q{ |
| 4233 | expression: and_expr '||' expression |
| 4234 | { "expression_node"->new(@item[1..3]) } |
| 4235 | | and_expr |
| 4236 | |
| 4237 | and_expr: not_expr '&&' and_expr |
| 4238 | { "and_expr_node"->new(@item[1..3]) } |
| 4239 | | not_expr |
| 4240 | |
| 4241 | not_expr: '!' brack_expr |
| 4242 | { "not_expr_node"->new(@item[1..2]) } |
| 4243 | | brack_expr |
| 4244 | |
| 4245 | brack_expr: '(' expression ')' |
| 4246 | { "brack_expr_node"->new(@item[1..3]) } |
| 4247 | | identifier |
| 4248 | |
| 4249 | identifier: /[a-z]+/i |
| 4250 | { "identifer_node"->new(@item[1]) } |
| 4251 | }); |
| 4252 | |
| 4253 | Note that, if a production already ends in an action, no autoaction is appended |
| 4254 | to it. For example, in this version: |
| 4255 | |
| 4256 | $::RD_AUTOACTION = q |
| 4257 | { $#item==1 ? $item[1] : "$item[0]_node"->new(@item[1..$#item]) }; |
| 4258 | |
| 4259 | parser = Parse::RecDescent->new(q{ |
| 4260 | expression: and_expr '&&' expression | and_expr |
| 4261 | and_expr: not_expr '&&' and_expr | not_expr |
| 4262 | not_expr: '!' brack_expr | brack_expr |
| 4263 | brack_expr: '(' expression ')' | identifier |
| 4264 | identifier: /[a-z]+/i |
| 4265 | { 'terminal_node'->new($item[1]) } |
| 4266 | }); |
| 4267 | |
| 4268 | each C<identifier> match produces a C<terminal_node> object, I<not> an |
| 4269 | C<identifier_node> object. |
| 4270 | |
| 4271 | A level 1 warning is issued each time an "autoaction" is added to |
| 4272 | some production. |
| 4273 | |
| 4274 | |
| 4275 | =head2 Autotrees |
| 4276 | |
| 4277 | A commonly needed autoaction is one that builds a parse-tree. It is moderately |
| 4278 | tricky to set up such an action (which must treat terminals differently from |
| 4279 | non-terminals), so Parse::RecDescent simplifies the process by providing the |
| 4280 | C<E<lt>autotreeE<gt>> directive. |
| 4281 | |
| 4282 | If this directive appears at the start of grammar, it causes |
| 4283 | Parse::RecDescent to insert autoactions at the end of any rule except |
| 4284 | those which already end in an action. The action inserted depends on whether |
| 4285 | the production is an intermediate rule (two or more items), or a terminal |
| 4286 | of the grammar (i.e. a single pattern or string item). |
| 4287 | |
| 4288 | So, for example, the following grammar: |
| 4289 | |
| 4290 | <autotree> |
| 4291 | |
| 4292 | file : command(s) |
| 4293 | command : get | set | vet |
| 4294 | get : 'get' ident ';' |
| 4295 | set : 'set' ident 'to' value ';' |
| 4296 | vet : 'check' ident 'is' value ';' |
| 4297 | ident : /\w+/ |
| 4298 | value : /\d+/ |
| 4299 | |
| 4300 | is equivalent to: |
| 4301 | |
| 4302 | file : command(s) { bless \%item, $item[0] } |
| 4303 | command : get { bless \%item, $item[0] } |
| 4304 | | set { bless \%item, $item[0] } |
| 4305 | | vet { bless \%item, $item[0] } |
| 4306 | get : 'get' ident ';' { bless \%item, $item[0] } |
| 4307 | set : 'set' ident 'to' value ';' { bless \%item, $item[0] } |
| 4308 | vet : 'check' ident 'is' value ';' { bless \%item, $item[0] } |
| 4309 | |
| 4310 | ident : /\w+/ { bless {__VALUE__=>$item[1]}, $item[0] } |
| 4311 | value : /\d+/ { bless {__VALUE__=>$item[1]}, $item[0] } |
| 4312 | |
| 4313 | Note that each node in the tree is blessed into a class of the same name |
| 4314 | as the rule itself. This makes it easy to build object-oriented |
| 4315 | processors for the parse-trees that the grammar produces. Note too that |
| 4316 | the last two rules produce special objects with the single attribute |
| 4317 | '__VALUE__'. This is because they consist solely of a single terminal. |
| 4318 | |
| 4319 | This autoaction-ed grammar would then produce a parse tree in a data |
| 4320 | structure like this: |
| 4321 | |
| 4322 | { |
| 4323 | file => { |
| 4324 | command => { |
| 4325 | [ get => { |
| 4326 | identifier => { __VALUE__ => 'a' }, |
| 4327 | }, |
| 4328 | set => { |
| 4329 | identifier => { __VALUE__ => 'b' }, |
| 4330 | value => { __VALUE__ => '7' }, |
| 4331 | }, |
| 4332 | vet => { |
| 4333 | identifier => { __VALUE__ => 'b' }, |
| 4334 | value => { __VALUE__ => '7' }, |
| 4335 | }, |
| 4336 | ], |
| 4337 | }, |
| 4338 | } |
| 4339 | } |
| 4340 | |
| 4341 | (except, of course, that each nested hash would also be blessed into |
| 4342 | the appropriate class). |
| 4343 | |
| 4344 | You can also specify a base class for the C<E<lt>autotreeE<gt>> directive. |
| 4345 | The supplied prefix will be prepended to the rule names when creating |
| 4346 | tree nodes. The following are equivalent: |
| 4347 | |
| 4348 | <autotree:MyBase::Class> |
| 4349 | <autotree:MyBase::Class::> |
| 4350 | |
| 4351 | And will produce a root node blessed into the C<MyBase::Class::file> |
| 4352 | package in the example above. |
| 4353 | |
| 4354 | |
| 4355 | =head2 Autostubbing |
| 4356 | |
| 4357 | Normally, if a subrule appears in some production, but no rule of that |
| 4358 | name is ever defined in the grammar, the production which refers to the |
| 4359 | non-existent subrule fails immediately. This typically occurs as a |
| 4360 | result of misspellings, and is a sufficiently common occurance that a |
| 4361 | warning is generated for such situations. |
| 4362 | |
| 4363 | However, when prototyping a grammar it is sometimes useful to be |
| 4364 | able to use subrules before a proper specification of them is |
| 4365 | really possible. For example, a grammar might include a section like: |
| 4366 | |
| 4367 | function_call: identifier '(' arg(s?) ')' |
| 4368 | |
| 4369 | identifier: /[a-z]\w*/i |
| 4370 | |
| 4371 | where the possible format of an argument is sufficiently complex that |
| 4372 | it is not worth specifying in full until the general function call |
| 4373 | syntax has been debugged. In this situation it is convenient to leave |
| 4374 | the real rule C<arg> undefined and just slip in a placeholder (or |
| 4375 | "stub"): |
| 4376 | |
| 4377 | arg: 'arg' |
| 4378 | |
| 4379 | so that the function call syntax can be tested with dummy input such as: |
| 4380 | |
| 4381 | f0() |
| 4382 | f1(arg) |
| 4383 | f2(arg arg) |
| 4384 | f3(arg arg arg) |
| 4385 | |
| 4386 | et cetera. |
| 4387 | |
| 4388 | Early in prototyping, many such "stubs" may be required, so |
| 4389 | C<Parse::RecDescent> provides a means of automating their definition. |
| 4390 | If the variable C<$::RD_AUTOSTUB> is defined when a parser is built, a |
| 4391 | subrule reference to any non-existent rule (say, C<subrule>), will |
| 4392 | cause a "stub" rule to be automatically defined in the generated |
| 4393 | parser. If C<$::RD_AUTOSTUB eq '1'> or is false, a stub rule of the |
| 4394 | form: |
| 4395 | |
| 4396 | subrule: 'subrule' |
| 4397 | |
| 4398 | will be generated. The special-case for a value of C<'1'> is to allow |
| 4399 | the use of the B<perl -s> with B<-RD_AUTOSTUB> without generating |
| 4400 | C<subrule: '1'> per below. If C<$::RD_AUTOSTUB> is true, a stub rule |
| 4401 | of the form: |
| 4402 | |
| 4403 | subrule: $::RD_AUTOSTUB |
| 4404 | |
| 4405 | will be generated. C<$::RD_AUTOSTUB> must contain a valid production |
| 4406 | item, no checking is performed. No lazy evaluation of |
| 4407 | C<$::RD_AUTOSTUB> is performed, it is evaluated at the time the Parser |
| 4408 | is generated. |
| 4409 | |
| 4410 | Hence, with C<$::RD_AUTOSTUB> defined, it is possible to only |
| 4411 | partially specify a grammar, and then "fake" matches of the |
| 4412 | unspecified (sub)rules by just typing in their name, or a literal |
| 4413 | value that was assigned to C<$::RD_AUTOSTUB>. |
| 4414 | |
| 4415 | |
| 4416 | |
| 4417 | =head2 Look-ahead |
| 4418 | |
| 4419 | If a subrule, token, or action is prefixed by "...", then it is |
| 4420 | treated as a "look-ahead" request. That means that the current production can |
| 4421 | (as usual) only succeed if the specified item is matched, but that the matching |
| 4422 | I<does not consume any of the text being parsed>. This is very similar to the |
| 4423 | C</(?=...)/> look-ahead construct in Perl patterns. Thus, the rule: |
| 4424 | |
| 4425 | inner_word: word ...word |
| 4426 | |
| 4427 | will match whatever the subrule "word" matches, provided that match is followed |
| 4428 | by some more text which subrule "word" would also match (although this |
| 4429 | second substring is not actually consumed by "inner_word") |
| 4430 | |
| 4431 | Likewise, a "...!" prefix, causes the following item to succeed (without |
| 4432 | consuming any text) if and only if it would normally fail. Hence, a |
| 4433 | rule such as: |
| 4434 | |
| 4435 | identifier: ...!keyword ...!'_' /[A-Za-z_]\w*/ |
| 4436 | |
| 4437 | matches a string of characters which satisfies the pattern |
| 4438 | C</[A-Za-z_]\w*/>, but only if the same sequence of characters would |
| 4439 | not match either subrule "keyword" or the literal token '_'. |
| 4440 | |
| 4441 | Sequences of look-ahead prefixes accumulate, multiplying their positive and/or |
| 4442 | negative senses. Hence: |
| 4443 | |
| 4444 | inner_word: word ...!......!word |
| 4445 | |
| 4446 | is exactly equivalent the the original example above (a warning is issued in |
| 4447 | cases like these, since they often indicate something left out, or |
| 4448 | misunderstood). |
| 4449 | |
| 4450 | Note that actions can also be treated as look-aheads. In such cases, |
| 4451 | the state of the parser text (in the local variable C<$text>) |
| 4452 | I<after> the look-ahead action is guaranteed to be identical to its |
| 4453 | state I<before> the action, regardless of how it's changed I<within> |
| 4454 | the action (unless you actually undefine C<$text>, in which case you |
| 4455 | get the disaster you deserve :-). |
| 4456 | |
| 4457 | |
| 4458 | =head2 Directives |
| 4459 | |
| 4460 | Directives are special pre-defined actions which may be used to alter |
| 4461 | the behaviour of the parser. There are currently twenty-three directives: |
| 4462 | C<E<lt>commitE<gt>>, |
| 4463 | C<E<lt>uncommitE<gt>>, |
| 4464 | C<E<lt>rejectE<gt>>, |
| 4465 | C<E<lt>scoreE<gt>>, |
| 4466 | C<E<lt>autoscoreE<gt>>, |
| 4467 | C<E<lt>skipE<gt>>, |
| 4468 | C<E<lt>resyncE<gt>>, |
| 4469 | C<E<lt>errorE<gt>>, |
| 4470 | C<E<lt>warnE<gt>>, |
| 4471 | C<E<lt>hintE<gt>>, |
| 4472 | C<E<lt>trace_buildE<gt>>, |
| 4473 | C<E<lt>trace_parseE<gt>>, |
| 4474 | C<E<lt>nocheckE<gt>>, |
| 4475 | C<E<lt>rulevarE<gt>>, |
| 4476 | C<E<lt>matchruleE<gt>>, |
| 4477 | C<E<lt>leftopE<gt>>, |
| 4478 | C<E<lt>rightopE<gt>>, |
| 4479 | C<E<lt>deferE<gt>>, |
| 4480 | C<E<lt>nocheckE<gt>>, |
| 4481 | C<E<lt>perl_quotelikeE<gt>>, |
| 4482 | C<E<lt>perl_codeblockE<gt>>, |
| 4483 | C<E<lt>perl_variableE<gt>>, |
| 4484 | and C<E<lt>tokenE<gt>>. |
| 4485 | |
| 4486 | =over 4 |
| 4487 | |
| 4488 | =item Committing and uncommitting |
| 4489 | |
| 4490 | The C<E<lt>commitE<gt>> and C<E<lt>uncommitE<gt>> directives permit the recursive |
| 4491 | descent of the parse tree to be pruned (or "cut") for efficiency. |
| 4492 | Within a rule, a C<E<lt>commitE<gt>> directive instructs the rule to ignore subsequent |
| 4493 | productions if the current production fails. For example: |
| 4494 | |
| 4495 | command: 'find' <commit> filename |
| 4496 | | 'open' <commit> filename |
| 4497 | | 'move' filename filename |
| 4498 | |
| 4499 | Clearly, if the leading token 'find' is matched in the first production but that |
| 4500 | production fails for some other reason, then the remaining |
| 4501 | productions cannot possibly match. The presence of the |
| 4502 | C<E<lt>commitE<gt>> causes the "command" rule to fail immediately if |
| 4503 | an invalid "find" command is found, and likewise if an invalid "open" |
| 4504 | command is encountered. |
| 4505 | |
| 4506 | It is also possible to revoke a previous commitment. For example: |
| 4507 | |
| 4508 | if_statement: 'if' <commit> condition |
| 4509 | 'then' block <uncommit> |
| 4510 | 'else' block |
| 4511 | | 'if' <commit> condition |
| 4512 | 'then' block |
| 4513 | |
| 4514 | In this case, a failure to find an "else" block in the first |
| 4515 | production shouldn't preclude trying the second production, but a |
| 4516 | failure to find a "condition" certainly should. |
| 4517 | |
| 4518 | As a special case, any production in which the I<first> item is an |
| 4519 | C<E<lt>uncommitE<gt>> immediately revokes a preceding C<E<lt>commitE<gt>> |
| 4520 | (even though the production would not otherwise have been tried). For |
| 4521 | example, in the rule: |
| 4522 | |
| 4523 | request: 'explain' expression |
| 4524 | | 'explain' <commit> keyword |
| 4525 | | 'save' |
| 4526 | | 'quit' |
| 4527 | | <uncommit> term '?' |
| 4528 | |
| 4529 | if the text being matched was "explain?", and the first two |
| 4530 | productions failed, then the C<E<lt>commitE<gt>> in production two would cause |
| 4531 | productions three and four to be skipped, but the leading |
| 4532 | C<E<lt>uncommitE<gt>> in the production five would allow that production to |
| 4533 | attempt a match. |
| 4534 | |
| 4535 | Note in the preceding example, that the C<E<lt>commitE<gt>> was only placed |
| 4536 | in production two. If production one had been: |
| 4537 | |
| 4538 | request: 'explain' <commit> expression |
| 4539 | |
| 4540 | then production two would be (inappropriately) skipped if a leading |
| 4541 | "explain..." was encountered. |
| 4542 | |
| 4543 | Both C<E<lt>commitE<gt>> and C<E<lt>uncommitE<gt>> directives always succeed, and their value |
| 4544 | is always 1. |
| 4545 | |
| 4546 | |
| 4547 | =item Rejecting a production |
| 4548 | |
| 4549 | The C<E<lt>rejectE<gt>> directive immediately causes the current production |
| 4550 | to fail (it is exactly equivalent to, but more obvious than, the |
| 4551 | action C<{undef}>). A C<E<lt>rejectE<gt>> is useful when it is desirable to get |
| 4552 | the side effects of the actions in one production, without prejudicing a match |
| 4553 | by some other production later in the rule. For example, to insert |
| 4554 | tracing code into the parse: |
| 4555 | |
| 4556 | complex_rule: { print "In complex rule...\n"; } <reject> |
| 4557 | |
| 4558 | complex_rule: simple_rule '+' 'i' '*' simple_rule |
| 4559 | | 'i' '*' simple_rule |
| 4560 | | simple_rule |
| 4561 | |
| 4562 | |
| 4563 | It is also possible to specify a conditional rejection, using the |
| 4564 | form C<E<lt>reject:I<condition>E<gt>>, which only rejects if the |
| 4565 | specified condition is true. This form of rejection is exactly |
| 4566 | equivalent to the action C<{(I<condition>)?undef:1}E<gt>>. |
| 4567 | For example: |
| 4568 | |
| 4569 | command: save_command |
| 4570 | | restore_command |
| 4571 | | <reject: defined $::tolerant> { exit } |
| 4572 | | <error: Unknown command. Ignored.> |
| 4573 | |
| 4574 | A C<E<lt>rejectE<gt>> directive never succeeds (and hence has no |
| 4575 | associated value). A conditional rejection may succeed (if its |
| 4576 | condition is not satisfied), in which case its value is 1. |
| 4577 | |
| 4578 | As an extra optimization, C<Parse::RecDescent> ignores any production |
| 4579 | which I<begins> with an unconditional C<E<lt>rejectE<gt>> directive, |
| 4580 | since any such production can never successfully match or have any |
| 4581 | useful side-effects. A level 1 warning is issued in all such cases. |
| 4582 | |
| 4583 | Note that productions beginning with conditional |
| 4584 | C<E<lt>reject:...E<gt>> directives are I<never> "optimized away" in |
| 4585 | this manner, even if they are always guaranteed to fail (for example: |
| 4586 | C<E<lt>reject:1E<gt>>) |
| 4587 | |
| 4588 | Due to the way grammars are parsed, there is a minor restriction on the |
| 4589 | condition of a conditional C<E<lt>reject:...E<gt>>: it cannot |
| 4590 | contain any raw '<' or '>' characters. For example: |
| 4591 | |
| 4592 | line: cmd <reject: $thiscolumn > max> data |
| 4593 | |
| 4594 | results in an error when a parser is built from this grammar (since the |
| 4595 | grammar parser has no way of knowing whether the first > is a "less than" |
| 4596 | or the end of the C<E<lt>reject:...E<gt>>. |
| 4597 | |
| 4598 | To overcome this problem, put the condition inside a do{} block: |
| 4599 | |
| 4600 | line: cmd <reject: do{$thiscolumn > max}> data |
| 4601 | |
| 4602 | Note that the same problem may occur in other directives that take |
| 4603 | arguments. The same solution will work in all cases. |
| 4604 | |
| 4605 | |
| 4606 | =item Skipping between terminals |
| 4607 | |
| 4608 | The C<E<lt>skipE<gt>> directive enables the terminal prefix used in |
| 4609 | a production to be changed. For example: |
| 4610 | |
| 4611 | OneLiner: Command <skip:'[ \t]*'> Arg(s) /;/ |
| 4612 | |
| 4613 | causes only blanks and tabs to be skipped before terminals in the C<Arg> |
| 4614 | subrule (and any of I<its> subrules>, and also before the final C</;/> terminal. |
| 4615 | Once the production is complete, the previous terminal prefix is |
| 4616 | reinstated. Note that this implies that distinct productions of a rule |
| 4617 | must reset their terminal prefixes individually. |
| 4618 | |
| 4619 | The C<E<lt>skipE<gt>> directive evaluates to the I<previous> terminal prefix, |
| 4620 | so it's easy to reinstate a prefix later in a production: |
| 4621 | |
| 4622 | Command: <skip:","> CSV(s) <skip:$item[1]> Modifier |
| 4623 | |
| 4624 | The value specified after the colon is interpolated into a pattern, so all of |
| 4625 | the following are equivalent (though their efficiency increases down the list): |
| 4626 | |
| 4627 | <skip: "$colon|$comma"> # ASSUMING THE VARS HOLD THE OBVIOUS VALUES |
| 4628 | |
| 4629 | <skip: ':|,'> |
| 4630 | |
| 4631 | <skip: q{[:,]}> |
| 4632 | |
| 4633 | <skip: qr/[:,]/> |
| 4634 | |
| 4635 | There is no way of directly setting the prefix for |
| 4636 | an entire rule, except as follows: |
| 4637 | |
| 4638 | Rule: <skip: '[ \t]*'> Prod1 |
| 4639 | | <skip: '[ \t]*'> Prod2a Prod2b |
| 4640 | | <skip: '[ \t]*'> Prod3 |
| 4641 | |
| 4642 | or, better: |
| 4643 | |
| 4644 | Rule: <skip: '[ \t]*'> |
| 4645 | ( |
| 4646 | Prod1 |
| 4647 | | Prod2a Prod2b |
| 4648 | | Prod3 |
| 4649 | ) |
| 4650 | |
| 4651 | The skip pattern is passed down to subrules, so setting the skip for |
| 4652 | the top-level rule as described above actually sets the prefix for the |
| 4653 | entire grammar (provided that you only call the method corresponding |
| 4654 | to the top-level rule itself). Alternatively, or if you have more than |
| 4655 | one top-level rule in your grammar, you can provide a global |
| 4656 | C<E<lt>skipE<gt>> directive prior to defining any rules in the |
| 4657 | grammar. These are the preferred alternatives to setting |
| 4658 | C<$Parse::RecDescent::skip>. |
| 4659 | |
| 4660 | Additionally, using C<E<lt>skipE<gt>> actually allows you to have |
| 4661 | a completely dynamic skipping behaviour. For example: |
| 4662 | |
| 4663 | Rule_with_dynamic_skip: <skip: $::skip_pattern> Rule |
| 4664 | |
| 4665 | Then you can set C<$::skip_pattern> before invoking |
| 4666 | C<Rule_with_dynamic_skip> and have it skip whatever you specified. |
| 4667 | |
| 4668 | B<Note: Up to release 1.51 of Parse::RecDescent, an entirely different |
| 4669 | mechanism was used for specifying terminal prefixes. The current method |
| 4670 | is not backwards-compatible with that early approach. The current approach |
| 4671 | is stable and will not to change again.> |
| 4672 | |
| 4673 | |
| 4674 | =item Resynchronization |
| 4675 | |
| 4676 | The C<E<lt>resyncE<gt>> directive provides a visually distinctive |
| 4677 | means of consuming some of the text being parsed, usually to skip an |
| 4678 | erroneous input. In its simplest form C<E<lt>resyncE<gt>> simply |
| 4679 | consumes text up to and including the next newline (C<"\n">) |
| 4680 | character, succeeding only if the newline is found, in which case it |
| 4681 | causes its surrounding rule to return zero on success. |
| 4682 | |
| 4683 | In other words, a C<E<lt>resyncE<gt>> is exactly equivalent to the token |
| 4684 | C</[^\n]*\n/> followed by the action S<C<{ $return = 0 }>> (except that |
| 4685 | productions beginning with a C<E<lt>resyncE<gt>> are ignored when generating |
| 4686 | error messages). A typical use might be: |
| 4687 | |
| 4688 | script : command(s) |
| 4689 | |
| 4690 | command: save_command |
| 4691 | | restore_command |
| 4692 | | <resync> # TRY NEXT LINE, IF POSSIBLE |
| 4693 | |
| 4694 | It is also possible to explicitly specify a resynchronization |
| 4695 | pattern, using the C<E<lt>resync:I<pattern>E<gt>> variant. This version |
| 4696 | succeeds only if the specified pattern matches (and consumes) the |
| 4697 | parsed text. In other words, C<E<lt>resync:I<pattern>E<gt>> is exactly |
| 4698 | equivalent to the token C</I<pattern>/> (followed by a S<C<{ $return = 0 }>> |
| 4699 | action). For example, if commands were terminated by newlines or semi-colons: |
| 4700 | |
| 4701 | command: save_command |
| 4702 | | restore_command |
| 4703 | | <resync:[^;\n]*[;\n]> |
| 4704 | |
| 4705 | The value of a successfully matched C<E<lt>resyncE<gt>> directive (of either |
| 4706 | type) is the text that it consumed. Note, however, that since the |
| 4707 | directive also sets C<$return>, a production consisting of a lone |
| 4708 | C<E<lt>resyncE<gt>> succeeds but returns the value zero (which a calling rule |
| 4709 | may find useful to distinguish between "true" matches and "tolerant" matches). |
| 4710 | Remember that returning a zero value indicates that the rule I<succeeded> (since |
| 4711 | only an C<undef> denotes failure within C<Parse::RecDescent> parsers. |
| 4712 | |
| 4713 | |
| 4714 | =item Error handling |
| 4715 | |
| 4716 | The C<E<lt>errorE<gt>> directive provides automatic or user-defined |
| 4717 | generation of error messages during a parse. In its simplest form |
| 4718 | C<E<lt>errorE<gt>> prepares an error message based on |
| 4719 | the mismatch between the last item expected and the text which cause |
| 4720 | it to fail. For example, given the rule: |
| 4721 | |
| 4722 | McCoy: curse ',' name ', I'm a doctor, not a' a_profession '!' |
| 4723 | | pronoun 'dead,' name '!' |
| 4724 | | <error> |
| 4725 | |
| 4726 | the following strings would produce the following messages: |
| 4727 | |
| 4728 | =over 4 |
| 4729 | |
| 4730 | =item "Amen, Jim!" |
| 4731 | |
| 4732 | ERROR (line 1): Invalid McCoy: Expected curse or pronoun |
| 4733 | not found |
| 4734 | |
| 4735 | =item "Dammit, Jim, I'm a doctor!" |
| 4736 | |
| 4737 | ERROR (line 1): Invalid McCoy: Expected ", I'm a doctor, not a" |
| 4738 | but found ", I'm a doctor!" instead |
| 4739 | |
| 4740 | =item "He's dead,\n" |
| 4741 | |
| 4742 | ERROR (line 2): Invalid McCoy: Expected name not found |
| 4743 | |
| 4744 | =item "He's alive!" |
| 4745 | |
| 4746 | ERROR (line 1): Invalid McCoy: Expected 'dead,' but found |
| 4747 | "alive!" instead |
| 4748 | |
| 4749 | =item "Dammit, Jim, I'm a doctor, not a pointy-eared Vulcan!" |
| 4750 | |
| 4751 | ERROR (line 1): Invalid McCoy: Expected a profession but found |
| 4752 | "pointy-eared Vulcan!" instead |
| 4753 | |
| 4754 | |
| 4755 | =back |
| 4756 | |
| 4757 | Note that, when autogenerating error messages, all underscores in any |
| 4758 | rule name used in a message are replaced by single spaces (for example |
| 4759 | "a_production" becomes "a production"). Judicious choice of rule |
| 4760 | names can therefore considerably improve the readability of automatic |
| 4761 | error messages (as well as the maintainability of the original |
| 4762 | grammar). |
| 4763 | |
| 4764 | If the automatically generated error is not sufficient, it is possible to |
| 4765 | provide an explicit message as part of the error directive. For example: |
| 4766 | |
| 4767 | Spock: "Fascinating ',' (name | 'Captain') '.' |
| 4768 | | "Highly illogical, doctor." |
| 4769 | | <error: He never said that!> |
| 4770 | |
| 4771 | which would result in I<all> failures to parse a "Spock" subrule printing the |
| 4772 | following message: |
| 4773 | |
| 4774 | ERROR (line <N>): Invalid Spock: He never said that! |
| 4775 | |
| 4776 | The error message is treated as a "qq{...}" string and interpolated |
| 4777 | when the error is generated (I<not> when the directive is specified!). |
| 4778 | Hence: |
| 4779 | |
| 4780 | <error: Mystical error near "$text"> |
| 4781 | |
| 4782 | would correctly insert the ambient text string which caused the error. |
| 4783 | |
| 4784 | There are two other forms of error directive: C<E<lt>error?E<gt>> and |
| 4785 | S<C<E<lt>error?: msgE<gt>>>. These behave just like C<E<lt>errorE<gt>> |
| 4786 | and S<C<E<lt>error: msgE<gt>>> respectively, except that they are |
| 4787 | only triggered if the rule is "committed" at the time they are |
| 4788 | encountered. For example: |
| 4789 | |
| 4790 | Scotty: "Ya kenna change the Laws of Phusics," <commit> name |
| 4791 | | name <commit> ',' 'she's goanta blaw!' |
| 4792 | | <error?> |
| 4793 | |
| 4794 | will only generate an error for a string beginning with "Ya kenna |
| 4795 | change the Laws o' Phusics," or a valid name, but which still fails to match the |
| 4796 | corresponding production. That is, C<$parser-E<gt>Scotty("Aye, Cap'ain")> will |
| 4797 | fail silently (since neither production will "commit" the rule on that |
| 4798 | input), whereas S<C<$parser-E<gt>Scotty("Mr Spock, ah jest kenna do'ut!")>> |
| 4799 | will fail with the error message: |
| 4800 | |
| 4801 | ERROR (line 1): Invalid Scotty: expected 'she's goanta blaw!' |
| 4802 | but found 'I jest kenna do'ut!' instead. |
| 4803 | |
| 4804 | since in that case the second production would commit after matching |
| 4805 | the leading name. |
| 4806 | |
| 4807 | Note that to allow this behaviour, all C<E<lt>errorE<gt>> directives which are |
| 4808 | the first item in a production automatically uncommit the rule just |
| 4809 | long enough to allow their production to be attempted (that is, when |
| 4810 | their production fails, the commitment is reinstated so that |
| 4811 | subsequent productions are skipped). |
| 4812 | |
| 4813 | In order to I<permanently> uncommit the rule before an error message, |
| 4814 | it is necessary to put an explicit C<E<lt>uncommitE<gt>> before the |
| 4815 | C<E<lt>errorE<gt>>. For example: |
| 4816 | |
| 4817 | line: 'Kirk:' <commit> Kirk |
| 4818 | | 'Spock:' <commit> Spock |
| 4819 | | 'McCoy:' <commit> McCoy |
| 4820 | | <uncommit> <error?> <reject> |
| 4821 | | <resync> |
| 4822 | |
| 4823 | |
| 4824 | Error messages generated by the various C<E<lt>error...E<gt>> directives |
| 4825 | are not displayed immediately. Instead, they are "queued" in a buffer and |
| 4826 | are only displayed once parsing ultimately fails. Moreover, |
| 4827 | C<E<lt>error...E<gt>> directives that cause one production of a rule |
| 4828 | to fail are automatically removed from the message queue |
| 4829 | if another production subsequently causes the entire rule to succeed. |
| 4830 | This means that you can put |
| 4831 | C<E<lt>error...E<gt>> directives wherever useful diagnosis can be done, |
| 4832 | and only those associated with actual parser failure will ever be |
| 4833 | displayed. Also see L<"GOTCHAS">. |
| 4834 | |
| 4835 | As a general rule, the most useful diagnostics are usually generated |
| 4836 | either at the very lowest level within the grammar, or at the very |
| 4837 | highest. A good rule of thumb is to identify those subrules which |
| 4838 | consist mainly (or entirely) of terminals, and then put an |
| 4839 | C<E<lt>error...E<gt>> directive at the end of any other rule which calls |
| 4840 | one or more of those subrules. |
| 4841 | |
| 4842 | There is one other situation in which the output of the various types of |
| 4843 | error directive is suppressed; namely, when the rule containing them |
| 4844 | is being parsed as part of a "look-ahead" (see L<"Look-ahead">). In this |
| 4845 | case, the error directive will still cause the rule to fail, but will do |
| 4846 | so silently. |
| 4847 | |
| 4848 | An unconditional C<E<lt>errorE<gt>> directive always fails (and hence has no |
| 4849 | associated value). This means that encountering such a directive |
| 4850 | always causes the production containing it to fail. Hence an |
| 4851 | C<E<lt>errorE<gt>> directive will inevitably be the last (useful) item of a |
| 4852 | rule (a level 3 warning is issued if a production contains items after an unconditional |
| 4853 | C<E<lt>errorE<gt>> directive). |
| 4854 | |
| 4855 | An C<E<lt>error?E<gt>> directive will I<succeed> (that is: fail to fail :-), if |
| 4856 | the current rule is uncommitted when the directive is encountered. In |
| 4857 | that case the directive's associated value is zero. Hence, this type |
| 4858 | of error directive I<can> be used before the end of a |
| 4859 | production. For example: |
| 4860 | |
| 4861 | command: 'do' <commit> something |
| 4862 | | 'report' <commit> something |
| 4863 | | <error?: Syntax error> <error: Unknown command> |
| 4864 | |
| 4865 | |
| 4866 | B<Warning:> The C<E<lt>error?E<gt>> directive does I<not> mean "always fail (but |
| 4867 | do so silently unless committed)". It actually means "only fail (and report) if |
| 4868 | committed, otherwise I<succeed>". To achieve the "fail silently if uncommitted" |
| 4869 | semantics, it is necessary to use: |
| 4870 | |
| 4871 | rule: item <commit> item(s) |
| 4872 | | <error?> <reject> # FAIL SILENTLY UNLESS COMMITTED |
| 4873 | |
| 4874 | However, because people seem to expect a lone C<E<lt>error?E<gt>> directive |
| 4875 | to work like this: |
| 4876 | |
| 4877 | rule: item <commit> item(s) |
| 4878 | | <error?: Error message if committed> |
| 4879 | | <error: Error message if uncommitted> |
| 4880 | |
| 4881 | Parse::RecDescent automatically appends a |
| 4882 | C<E<lt>rejectE<gt>> directive if the C<E<lt>error?E<gt>> directive |
| 4883 | is the only item in a production. A level 2 warning (see below) |
| 4884 | is issued when this happens. |
| 4885 | |
| 4886 | The level of error reporting during both parser construction and |
| 4887 | parsing is controlled by the presence or absence of four global |
| 4888 | variables: C<$::RD_ERRORS>, C<$::RD_WARN>, C<$::RD_HINT>, and |
| 4889 | <$::RD_TRACE>. If C<$::RD_ERRORS> is defined (and, by default, it is) |
| 4890 | then fatal errors are reported. |
| 4891 | |
| 4892 | Whenever C<$::RD_WARN> is defined, certain non-fatal problems are also reported. |
| 4893 | |
| 4894 | Warnings have an associated "level": 1, 2, or 3. The higher the level, |
| 4895 | the more serious the warning. The value of the corresponding global |
| 4896 | variable (C<$::RD_WARN>) determines the I<lowest> level of warning to |
| 4897 | be displayed. Hence, to see I<all> warnings, set C<$::RD_WARN> to 1. |
| 4898 | To see only the most serious warnings set C<$::RD_WARN> to 3. |
| 4899 | By default C<$::RD_WARN> is initialized to 3, ensuring that serious but |
| 4900 | non-fatal errors are automatically reported. |
| 4901 | |
| 4902 | There is also a grammar directive to turn on warnings from within the |
| 4903 | grammar: C<< <warn> >>. It takes an optional argument, which specifies |
| 4904 | the warning level: C<< <warn: 2> >>. |
| 4905 | |
| 4906 | See F<"DIAGNOSTICS"> for a list of the varous error and warning messages |
| 4907 | that Parse::RecDescent generates when these two variables are defined. |
| 4908 | |
| 4909 | Defining any of the remaining variables (which are not defined by |
| 4910 | default) further increases the amount of information reported. |
| 4911 | Defining C<$::RD_HINT> causes the parser generator to offer |
| 4912 | more detailed analyses and hints on both errors and warnings. |
| 4913 | Note that setting C<$::RD_HINT> at any point automagically |
| 4914 | sets C<$::RD_WARN> to 1. There is also a C<< <hint> >> directive, which can |
| 4915 | be hard-coded into a grammar. |
| 4916 | |
| 4917 | Defining C<$::RD_TRACE> causes the parser generator and the parser to |
| 4918 | report their progress to STDERR in excruciating detail (although, without hints |
| 4919 | unless $::RD_HINT is separately defined). This detail |
| 4920 | can be moderated in only one respect: if C<$::RD_TRACE> has an |
| 4921 | integer value (I<N>) greater than 1, only the I<N> characters of |
| 4922 | the "current parsing context" (that is, where in the input string we |
| 4923 | are at any point in the parse) is reported at any time. |
| 4924 | |
| 4925 | C<$::RD_TRACE> is mainly useful for debugging a grammar that isn't |
| 4926 | behaving as you expected it to. To this end, if C<$::RD_TRACE> is |
| 4927 | defined when a parser is built, any actual parser code which is |
| 4928 | generated is also written to a file named "RD_TRACE" in the local |
| 4929 | directory. |
| 4930 | |
| 4931 | There are two directives associated with the C<$::RD_TRACE> variable. |
| 4932 | If a grammar contains a C<< <trace_build> >> directive anywhere in its |
| 4933 | specification, C<$::RD_TRACE> is turned on during the parser construction |
| 4934 | phase. If a grammar contains a C<< <trace_parse> >> directive anywhere in its |
| 4935 | specification, C<$::RD_TRACE> is turned on during any parse the parser |
| 4936 | performs. |
| 4937 | |
| 4938 | Note that the four variables belong to the "main" package, which |
| 4939 | makes them easier to refer to in the code controlling the parser, and |
| 4940 | also makes it easy to turn them into command line flags ("-RD_ERRORS", |
| 4941 | "-RD_WARN", "-RD_HINT", "-RD_TRACE") under B<perl -s>. |
| 4942 | |
| 4943 | The corresponding directives are useful to "hardwire" the various |
| 4944 | debugging features into a particular grammar (rather than having to set |
| 4945 | and reset external variables). |
| 4946 | |
| 4947 | =item Redirecting diagnostics |
| 4948 | |
| 4949 | The diagnostics provided by the tracing mechanism always go to STDERR. |
| 4950 | If you need them to go elsewhere, localize and reopen STDERR prior to the |
| 4951 | parse. |
| 4952 | |
| 4953 | For example: |
| 4954 | |
| 4955 | { |
| 4956 | local *STDERR = IO::File->new(">$filename") or die $!; |
| 4957 | |
| 4958 | my $result = $parser->startrule($text); |
| 4959 | } |
| 4960 | |
| 4961 | |
| 4962 | =item Consistency checks |
| 4963 | |
| 4964 | Whenever a parser is build, Parse::RecDescent carries out a number of |
| 4965 | (potentially expensive) consistency checks. These include: verifying that the |
| 4966 | grammar is not left-recursive and that no rules have been left undefined. |
| 4967 | |
| 4968 | These checks are important safeguards during development, but unnecessary |
| 4969 | overheads when the grammar is stable and ready to be deployed. So |
| 4970 | Parse::RecDescent provides a directive to disable them: C<< <nocheck> >>. |
| 4971 | |
| 4972 | If a grammar contains a C<< <nocheck> >> directive anywhere in its |
| 4973 | specification, the extra compile-time checks are by-passed. |
| 4974 | |
| 4975 | |
| 4976 | =item Specifying local variables |
| 4977 | |
| 4978 | It is occasionally convenient to specify variables which are local |
| 4979 | to a single rule. This may be achieved by including a |
| 4980 | C<E<lt>rulevar:...E<gt>> directive anywhere in the rule. For example: |
| 4981 | |
| 4982 | markup: <rulevar: $tag> |
| 4983 | |
| 4984 | markup: tag {($tag=$item[1]) =~ s/^<|>$//g} body[$tag] |
| 4985 | |
| 4986 | The example C<E<lt>rulevar: $tagE<gt>> directive causes a "my" variable named |
| 4987 | C<$tag> to be declared at the start of the subroutine implementing the |
| 4988 | C<markup> rule (that is, I<before> the first production, regardless of |
| 4989 | where in the rule it is specified). |
| 4990 | |
| 4991 | Specifically, any directive of the form: |
| 4992 | C<E<lt>rulevar:I<text>E<gt>> causes a line of the form C<my I<text>;> |
| 4993 | to be added at the beginning of the rule subroutine, immediately after |
| 4994 | the definitions of the following local variables: |
| 4995 | |
| 4996 | $thisparser $commit |
| 4997 | $thisrule @item |
| 4998 | $thisline @arg |
| 4999 | $text %arg |
| 5000 | |
| 5001 | This means that the following C<E<lt>rulevarE<gt>> directives work |
| 5002 | as expected: |
| 5003 | |
| 5004 | <rulevar: $count = 0 > |
| 5005 | |
| 5006 | <rulevar: $firstarg = $arg[0] || '' > |
| 5007 | |
| 5008 | <rulevar: $myItems = \@item > |
| 5009 | |
| 5010 | <rulevar: @context = ( $thisline, $text, @arg ) > |
| 5011 | |
| 5012 | <rulevar: ($name,$age) = $arg{"name","age"} > |
| 5013 | |
| 5014 | If a variable that is also visible to subrules is required, it needs |
| 5015 | to be C<local>'d, not C<my>'d. C<rulevar> defaults to C<my>, but if C<local> |
| 5016 | is explicitly specified: |
| 5017 | |
| 5018 | <rulevar: local $count = 0 > |
| 5019 | |
| 5020 | then a C<local>-ized variable is declared instead, and will be available |
| 5021 | within subrules. |
| 5022 | |
| 5023 | Note however that, because all such variables are "my" variables, their |
| 5024 | values I<do not persist> between match attempts on a given rule. To |
| 5025 | preserve values between match attempts, values can be stored within the |
| 5026 | "local" member of the C<$thisrule> object: |
| 5027 | |
| 5028 | countedrule: { $thisrule->{"local"}{"count"}++ } |
| 5029 | <reject> |
| 5030 | | subrule1 |
| 5031 | | subrule2 |
| 5032 | | <reject: $thisrule->{"local"}{"count"} == 1> |
| 5033 | subrule3 |
| 5034 | |
| 5035 | |
| 5036 | When matching a rule, each C<E<lt>rulevarE<gt>> directive is matched as |
| 5037 | if it were an unconditional C<E<lt>rejectE<gt>> directive (that is, it |
| 5038 | causes any production in which it appears to immediately fail to match). |
| 5039 | For this reason (and to improve readability) it is usual to specify any |
| 5040 | C<E<lt>rulevarE<gt>> directive in a separate production at the start of |
| 5041 | the rule (this has the added advantage that it enables |
| 5042 | C<Parse::RecDescent> to optimize away such productions, just as it does |
| 5043 | for the C<E<lt>rejectE<gt>> directive). |
| 5044 | |
| 5045 | |
| 5046 | =item Dynamically matched rules |
| 5047 | |
| 5048 | Because regexes and double-quoted strings are interpolated, it is relatively |
| 5049 | easy to specify productions with "context sensitive" tokens. For example: |
| 5050 | |
| 5051 | command: keyword body "end $item[1]" |
| 5052 | |
| 5053 | which ensures that a command block is bounded by a |
| 5054 | "I<E<lt>keywordE<gt>>...end I<E<lt>same keywordE<gt>>" pair. |
| 5055 | |
| 5056 | Building productions in which subrules are context sensitive is also possible, |
| 5057 | via the C<E<lt>matchrule:...E<gt>> directive. This directive behaves |
| 5058 | identically to a subrule item, except that the rule which is invoked to match |
| 5059 | it is determined by the string specified after the colon. For example, we could |
| 5060 | rewrite the C<command> rule like this: |
| 5061 | |
| 5062 | command: keyword <matchrule:body> "end $item[1]" |
| 5063 | |
| 5064 | Whatever appears after the colon in the directive is treated as an interpolated |
| 5065 | string (that is, as if it appeared in C<qq{...}> operator) and the value of |
| 5066 | that interpolated string is the name of the subrule to be matched. |
| 5067 | |
| 5068 | Of course, just putting a constant string like C<body> in a |
| 5069 | C<E<lt>matchrule:...E<gt>> directive is of little interest or benefit. |
| 5070 | The power of directive is seen when we use a string that interpolates |
| 5071 | to something interesting. For example: |
| 5072 | |
| 5073 | command: keyword <matchrule:$item[1]_body> "end $item[1]" |
| 5074 | |
| 5075 | keyword: 'while' | 'if' | 'function' |
| 5076 | |
| 5077 | while_body: condition block |
| 5078 | |
| 5079 | if_body: condition block ('else' block)(?) |
| 5080 | |
| 5081 | function_body: arglist block |
| 5082 | |
| 5083 | Now the C<command> rule selects how to proceed on the basis of the keyword |
| 5084 | that is found. It is as if C<command> were declared: |
| 5085 | |
| 5086 | command: 'while' while_body "end while" |
| 5087 | | 'if' if_body "end if" |
| 5088 | | 'function' function_body "end function" |
| 5089 | |
| 5090 | |
| 5091 | When a C<E<lt>matchrule:...E<gt>> directive is used as a repeated |
| 5092 | subrule, the rule name expression is "late-bound". That is, the name of |
| 5093 | the rule to be called is re-evaluated I<each time> a match attempt is |
| 5094 | made. Hence, the following grammar: |
| 5095 | |
| 5096 | { $::species = 'dogs' } |
| 5097 | |
| 5098 | pair: 'two' <matchrule:$::species>(s) |
| 5099 | |
| 5100 | dogs: /dogs/ { $::species = 'cats' } |
| 5101 | |
| 5102 | cats: /cats/ |
| 5103 | |
| 5104 | will match the string "two dogs cats cats" completely, whereas it will |
| 5105 | only match the string "two dogs dogs dogs" up to the eighth letter. If |
| 5106 | the rule name were "early bound" (that is, evaluated only the first |
| 5107 | time the directive is encountered in a production), the reverse |
| 5108 | behaviour would be expected. |
| 5109 | |
| 5110 | Note that the C<matchrule> directive takes a string that is to be treated |
| 5111 | as a rule name, I<not> as a rule invocation. That is, |
| 5112 | it's like a Perl symbolic reference, not an C<eval>. Just as you can say: |
| 5113 | |
| 5114 | $subname = 'foo'; |
| 5115 | |
| 5116 | # and later... |
| 5117 | |
| 5118 | &{$foo}(@args); |
| 5119 | |
| 5120 | but not: |
| 5121 | |
| 5122 | $subname = 'foo(@args)'; |
| 5123 | |
| 5124 | # and later... |
| 5125 | |
| 5126 | &{$foo}; |
| 5127 | |
| 5128 | likewise you can say: |
| 5129 | |
| 5130 | $rulename = 'foo'; |
| 5131 | |
| 5132 | # and in the grammar... |
| 5133 | |
| 5134 | <matchrule:$rulename>[@args] |
| 5135 | |
| 5136 | but not: |
| 5137 | |
| 5138 | $rulename = 'foo[@args]'; |
| 5139 | |
| 5140 | # and in the grammar... |
| 5141 | |
| 5142 | <matchrule:$rulename> |
| 5143 | |
| 5144 | |
| 5145 | =item Deferred actions |
| 5146 | |
| 5147 | The C<E<lt>defer:...E<gt>> directive is used to specify an action to be |
| 5148 | performed when (and only if!) the current production ultimately succeeds. |
| 5149 | |
| 5150 | Whenever a C<E<lt>defer:...E<gt>> directive appears, the code it specifies |
| 5151 | is converted to a closure (an anonymous subroutine reference) which is |
| 5152 | queued within the active parser object. Note that, |
| 5153 | because the deferred code is converted to a closure, the values of any |
| 5154 | "local" variable (such as C<$text>, <@item>, etc.) are preserved |
| 5155 | until the deferred code is actually executed. |
| 5156 | |
| 5157 | If the parse ultimately succeeds |
| 5158 | I<and> the production in which the C<E<lt>defer:...E<gt>> directive was |
| 5159 | evaluated formed part of the successful parse, then the deferred code is |
| 5160 | executed immediately before the parse returns. If however the production |
| 5161 | which queued a deferred action fails, or one of the higher-level |
| 5162 | rules which called that production fails, then the deferred action is |
| 5163 | removed from the queue, and hence is never executed. |
| 5164 | |
| 5165 | For example, given the grammar: |
| 5166 | |
| 5167 | sentence: noun trans noun |
| 5168 | | noun intrans |
| 5169 | |
| 5170 | noun: 'the dog' |
| 5171 | { print "$item[1]\t(noun)\n" } |
| 5172 | | 'the meat' |
| 5173 | { print "$item[1]\t(noun)\n" } |
| 5174 | |
| 5175 | trans: 'ate' |
| 5176 | { print "$item[1]\t(transitive)\n" } |
| 5177 | |
| 5178 | intrans: 'ate' |
| 5179 | { print "$item[1]\t(intransitive)\n" } |
| 5180 | | 'barked' |
| 5181 | { print "$item[1]\t(intransitive)\n" } |
| 5182 | |
| 5183 | then parsing the sentence C<"the dog ate"> would produce the output: |
| 5184 | |
| 5185 | the dog (noun) |
| 5186 | ate (transitive) |
| 5187 | the dog (noun) |
| 5188 | ate (intransitive) |
| 5189 | |
| 5190 | This is because, even though the first production of C<sentence> |
| 5191 | ultimately fails, its initial subrules C<noun> and C<trans> do match, |
| 5192 | and hence they execute their associated actions. |
| 5193 | Then the second production of C<sentence> succeeds, causing the |
| 5194 | actions of the subrules C<noun> and C<intrans> to be executed as well. |
| 5195 | |
| 5196 | On the other hand, if the actions were replaced by C<E<lt>defer:...E<gt>> |
| 5197 | directives: |
| 5198 | |
| 5199 | sentence: noun trans noun |
| 5200 | | noun intrans |
| 5201 | |
| 5202 | noun: 'the dog' |
| 5203 | <defer: print "$item[1]\t(noun)\n" > |
| 5204 | | 'the meat' |
| 5205 | <defer: print "$item[1]\t(noun)\n" > |
| 5206 | |
| 5207 | trans: 'ate' |
| 5208 | <defer: print "$item[1]\t(transitive)\n" > |
| 5209 | |
| 5210 | intrans: 'ate' |
| 5211 | <defer: print "$item[1]\t(intransitive)\n" > |
| 5212 | | 'barked' |
| 5213 | <defer: print "$item[1]\t(intransitive)\n" > |
| 5214 | |
| 5215 | the output would be: |
| 5216 | |
| 5217 | the dog (noun) |
| 5218 | ate (intransitive) |
| 5219 | |
| 5220 | since deferred actions are only executed if they were evaluated in |
| 5221 | a production which ultimately contributes to the successful parse. |
| 5222 | |
| 5223 | In this case, even though the first production of C<sentence> caused |
| 5224 | the subrules C<noun> and C<trans> to match, that production ultimately |
| 5225 | failed and so the deferred actions queued by those subrules were subsequently |
| 5226 | disgarded. The second production then succeeded, causing the entire |
| 5227 | parse to succeed, and so the deferred actions queued by the (second) match of |
| 5228 | the C<noun> subrule and the subsequent match of C<intrans> I<are> preserved and |
| 5229 | eventually executed. |
| 5230 | |
| 5231 | Deferred actions provide a means of improving the performance of a parser, |
| 5232 | by only executing those actions which are part of the final parse-tree |
| 5233 | for the input data. |
| 5234 | |
| 5235 | Alternatively, deferred actions can be viewed as a mechanism for building |
| 5236 | (and executing) a |
| 5237 | customized subroutine corresponding to the given input data, much in the |
| 5238 | same way that autoactions (see L<"Autoactions">) can be used to build a |
| 5239 | customized data structure for specific input. |
| 5240 | |
| 5241 | Whether or not the action it specifies is ever executed, |
| 5242 | a C<E<lt>defer:...E<gt>> directive always succeeds, returning the |
| 5243 | number of deferred actions currently queued at that point. |
| 5244 | |
| 5245 | |
| 5246 | =item Parsing Perl |
| 5247 | |
| 5248 | Parse::RecDescent provides limited support for parsing subsets of Perl, |
| 5249 | namely: quote-like operators, Perl variables, and complete code blocks. |
| 5250 | |
| 5251 | The C<E<lt>perl_quotelikeE<gt>> directive can be used to parse any Perl |
| 5252 | quote-like operator: C<'a string'>, C<m/a pattern/>, C<tr{ans}{lation}>, |
| 5253 | etc. It does this by calling Text::Balanced::quotelike(). |
| 5254 | |
| 5255 | If a quote-like operator is found, a reference to an array of eight elements |
| 5256 | is returned. Those elements are identical to the last eight elements returned |
| 5257 | by Text::Balanced::extract_quotelike() in an array context, namely: |
| 5258 | |
| 5259 | =over 4 |
| 5260 | |
| 5261 | =item [0] |
| 5262 | |
| 5263 | the name of the quotelike operator -- 'q', 'qq', 'm', 's', 'tr' -- if the |
| 5264 | operator was named; otherwise C<undef>, |
| 5265 | |
| 5266 | =item [1] |
| 5267 | |
| 5268 | the left delimiter of the first block of the operation, |
| 5269 | |
| 5270 | =item [2] |
| 5271 | |
| 5272 | the text of the first block of the operation |
| 5273 | (that is, the contents of |
| 5274 | a quote, the regex of a match, or substitution or the target list of a |
| 5275 | translation), |
| 5276 | |
| 5277 | =item [3] |
| 5278 | |
| 5279 | the right delimiter of the first block of the operation, |
| 5280 | |
| 5281 | =item [4] |
| 5282 | |
| 5283 | the left delimiter of the second block of the operation if there is one |
| 5284 | (that is, if it is a C<s>, C<tr>, or C<y>); otherwise C<undef>, |
| 5285 | |
| 5286 | =item [5] |
| 5287 | |
| 5288 | the text of the second block of the operation if there is one |
| 5289 | (that is, the replacement of a substitution or the translation list |
| 5290 | of a translation); otherwise C<undef>, |
| 5291 | |
| 5292 | =item [6] |
| 5293 | |
| 5294 | the right delimiter of the second block of the operation (if any); |
| 5295 | otherwise C<undef>, |
| 5296 | |
| 5297 | =item [7] |
| 5298 | |
| 5299 | the trailing modifiers on the operation (if any); otherwise C<undef>. |
| 5300 | |
| 5301 | =back |
| 5302 | |
| 5303 | If a quote-like expression is not found, the directive fails with the usual |
| 5304 | C<undef> value. |
| 5305 | |
| 5306 | The C<E<lt>perl_variableE<gt>> directive can be used to parse any Perl |
| 5307 | variable: $scalar, @array, %hash, $ref->{field}[$index], etc. |
| 5308 | It does this by calling Text::Balanced::extract_variable(). |
| 5309 | |
| 5310 | If the directive matches text representing a valid Perl variable |
| 5311 | specification, it returns that text. Otherwise it fails with the usual |
| 5312 | C<undef> value. |
| 5313 | |
| 5314 | The C<E<lt>perl_codeblockE<gt>> directive can be used to parse curly-brace-delimited block of Perl code, such as: { $a = 1; f() =~ m/pat/; }. |
| 5315 | It does this by calling Text::Balanced::extract_codeblock(). |
| 5316 | |
| 5317 | If the directive matches text representing a valid Perl code block, |
| 5318 | it returns that text. Otherwise it fails with the usual C<undef> value. |
| 5319 | |
| 5320 | You can also tell it what kind of brackets to use as the outermost |
| 5321 | delimiters. For example: |
| 5322 | |
| 5323 | arglist: <perl_codeblock ()> |
| 5324 | |
| 5325 | causes an arglist to match a perl code block whose outermost delimiters |
| 5326 | are C<(...)> (rather than the default C<{...}>). |
| 5327 | |
| 5328 | |
| 5329 | =item Constructing tokens |
| 5330 | |
| 5331 | Eventually, Parse::RecDescent will be able to parse tokenized input, as |
| 5332 | well as ordinary strings. In preparation for this joyous day, the |
| 5333 | C<E<lt>token:...E<gt>> directive has been provided. |
| 5334 | This directive creates a token which will be suitable for |
| 5335 | input to a Parse::RecDescent parser (when it eventually supports |
| 5336 | tokenized input). |
| 5337 | |
| 5338 | The text of the token is the value of the |
| 5339 | immediately preceding item in the production. A |
| 5340 | C<E<lt>token:...E<gt>> directive always succeeds with a return |
| 5341 | value which is the hash reference that is the new token. It also |
| 5342 | sets the return value for the production to that hash ref. |
| 5343 | |
| 5344 | The C<E<lt>token:...E<gt>> directive makes it easy to build |
| 5345 | a Parse::RecDescent-compatible lexer in Parse::RecDescent: |
| 5346 | |
| 5347 | my $lexer = new Parse::RecDescent q |
| 5348 | { |
| 5349 | lex: token(s) |
| 5350 | |
| 5351 | token: /a\b/ <token:INDEF> |
| 5352 | | /the\b/ <token:DEF> |
| 5353 | | /fly\b/ <token:NOUN,VERB> |
| 5354 | | /[a-z]+/i { lc $item[1] } <token:ALPHA> |
| 5355 | | <error: Unknown token> |
| 5356 | |
| 5357 | }; |
| 5358 | |
| 5359 | which will eventually be able to be used with a regular Parse::RecDescent |
| 5360 | grammar: |
| 5361 | |
| 5362 | my $parser = new Parse::RecDescent q |
| 5363 | { |
| 5364 | startrule: subrule1 subrule 2 |
| 5365 | |
| 5366 | # ETC... |
| 5367 | }; |
| 5368 | |
| 5369 | either with a pre-lexing phase: |
| 5370 | |
| 5371 | $parser->startrule( $lexer->lex($data) ); |
| 5372 | |
| 5373 | or with a lex-on-demand approach: |
| 5374 | |
| 5375 | $parser->startrule( sub{$lexer->token(\$data)} ); |
| 5376 | |
| 5377 | But at present, only the C<E<lt>token:...E<gt>> directive is |
| 5378 | actually implemented. The rest is vapourware. |
| 5379 | |
| 5380 | =item Specifying operations |
| 5381 | |
| 5382 | One of the commonest requirements when building a parser is to specify |
| 5383 | binary operators. Unfortunately, in a normal grammar, the rules for |
| 5384 | such things are awkward: |
| 5385 | |
| 5386 | disjunction: conjunction ('or' conjunction)(s?) |
| 5387 | { $return = [ $item[1], @{$item[2]} ] } |
| 5388 | |
| 5389 | conjunction: atom ('and' atom)(s?) |
| 5390 | { $return = [ $item[1], @{$item[2]} ] } |
| 5391 | |
| 5392 | or inefficient: |
| 5393 | |
| 5394 | disjunction: conjunction 'or' disjunction |
| 5395 | { $return = [ $item[1], @{$item[2]} ] } |
| 5396 | | conjunction |
| 5397 | { $return = [ $item[1] ] } |
| 5398 | |
| 5399 | conjunction: atom 'and' conjunction |
| 5400 | { $return = [ $item[1], @{$item[2]} ] } |
| 5401 | | atom |
| 5402 | { $return = [ $item[1] ] } |
| 5403 | |
| 5404 | and either way is ugly and hard to get right. |
| 5405 | |
| 5406 | The C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives provide an |
| 5407 | easier way of specifying such operations. Using C<E<lt>leftop:...E<gt>> the |
| 5408 | above examples become: |
| 5409 | |
| 5410 | disjunction: <leftop: conjunction 'or' conjunction> |
| 5411 | conjunction: <leftop: atom 'and' atom> |
| 5412 | |
| 5413 | The C<E<lt>leftop:...E<gt>> directive specifies a left-associative binary operator. |
| 5414 | It is specified around three other grammar elements |
| 5415 | (typically subrules or terminals), which match the left operand, |
| 5416 | the operator itself, and the right operand respectively. |
| 5417 | |
| 5418 | A C<E<lt>leftop:...E<gt>> directive such as: |
| 5419 | |
| 5420 | disjunction: <leftop: conjunction 'or' conjunction> |
| 5421 | |
| 5422 | is converted to the following: |
| 5423 | |
| 5424 | disjunction: ( conjunction ('or' conjunction)(s?) |
| 5425 | { $return = [ $item[1], @{$item[2]} ] } ) |
| 5426 | |
| 5427 | In other words, a C<E<lt>leftop:...E<gt>> directive matches the left operand followed by zero |
| 5428 | or more repetitions of both the operator and the right operand. It then |
| 5429 | flattens the matched items into an anonymous array which becomes the |
| 5430 | (single) value of the entire C<E<lt>leftop:...E<gt>> directive. |
| 5431 | |
| 5432 | For example, an C<E<lt>leftop:...E<gt>> directive such as: |
| 5433 | |
| 5434 | output: <leftop: ident '<<' expr > |
| 5435 | |
| 5436 | when given a string such as: |
| 5437 | |
| 5438 | cout << var << "str" << 3 |
| 5439 | |
| 5440 | would match, and C<$item[1]> would be set to: |
| 5441 | |
| 5442 | [ 'cout', 'var', '"str"', '3' ] |
| 5443 | |
| 5444 | In other words: |
| 5445 | |
| 5446 | output: <leftop: ident '<<' expr > |
| 5447 | |
| 5448 | is equivalent to a left-associative operator: |
| 5449 | |
| 5450 | output: ident { $return = [$item[1]] } |
| 5451 | | ident '<<' expr { $return = [@item[1,3]] } |
| 5452 | | ident '<<' expr '<<' expr { $return = [@item[1,3,5]] } |
| 5453 | | ident '<<' expr '<<' expr '<<' expr { $return = [@item[1,3,5,7]] } |
| 5454 | # ...etc... |
| 5455 | |
| 5456 | |
| 5457 | Similarly, the C<E<lt>rightop:...E<gt>> directive takes a left operand, an operator, and a right operand: |
| 5458 | |
| 5459 | assign: <rightop: var '=' expr > |
| 5460 | |
| 5461 | and converts them to: |
| 5462 | |
| 5463 | assign: ( (var '=' {$return=$item[1]})(s?) expr |
| 5464 | { $return = [ @{$item[1]}, $item[2] ] } ) |
| 5465 | |
| 5466 | which is equivalent to a right-associative operator: |
| 5467 | |
| 5468 | assign: expr { $return = [$item[1]] } |
| 5469 | | var '=' expr { $return = [@item[1,3]] } |
| 5470 | | var '=' var '=' expr { $return = [@item[1,3,5]] } |
| 5471 | | var '=' var '=' var '=' expr { $return = [@item[1,3,5,7]] } |
| 5472 | # ...etc... |
| 5473 | |
| 5474 | |
| 5475 | Note that for both the C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives, the directive does not normally |
| 5476 | return the operator itself, just a list of the operands involved. This is |
| 5477 | particularly handy for specifying lists: |
| 5478 | |
| 5479 | list: '(' <leftop: list_item ',' list_item> ')' |
| 5480 | { $return = $item[2] } |
| 5481 | |
| 5482 | There is, however, a problem: sometimes the operator is itself significant. |
| 5483 | For example, in a Perl list a comma and a C<=E<gt>> are both |
| 5484 | valid separators, but the C<=E<gt>> has additional stringification semantics. |
| 5485 | Hence it's important to know which was used in each case. |
| 5486 | |
| 5487 | To solve this problem the |
| 5488 | C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives |
| 5489 | I<do> return the operator(s) as well, under two circumstances. |
| 5490 | The first case is where the operator is specified as a subrule. In that instance, |
| 5491 | whatever the operator matches is returned (on the assumption that if the operator |
| 5492 | is important enough to have its own subrule, then it's important enough to return). |
| 5493 | |
| 5494 | The second case is where the operator is specified as a regular |
| 5495 | expression. In that case, if the first bracketed subpattern of the |
| 5496 | regular expression matches, that matching value is returned (this is analogous to |
| 5497 | the behaviour of the Perl C<split> function, except that only the first subpattern |
| 5498 | is returned). |
| 5499 | |
| 5500 | In other words, given the input: |
| 5501 | |
| 5502 | ( a=>1, b=>2 ) |
| 5503 | |
| 5504 | the specifications: |
| 5505 | |
| 5506 | list: '(' <leftop: list_item separator list_item> ')' |
| 5507 | |
| 5508 | separator: ',' | '=>' |
| 5509 | |
| 5510 | or: |
| 5511 | |
| 5512 | list: '(' <leftop: list_item /(,|=>)/ list_item> ')' |
| 5513 | |
| 5514 | cause the list separators to be interleaved with the operands in the |
| 5515 | anonymous array in C<$item[2]>: |
| 5516 | |
| 5517 | [ 'a', '=>', '1', ',', 'b', '=>', '2' ] |
| 5518 | |
| 5519 | |
| 5520 | But the following version: |
| 5521 | |
| 5522 | list: '(' <leftop: list_item /,|=>/ list_item> ')' |
| 5523 | |
| 5524 | returns only the operators: |
| 5525 | |
| 5526 | [ 'a', '1', 'b', '2' ] |
| 5527 | |
| 5528 | Of course, none of the above specifications handle the case of an empty |
| 5529 | list, since the C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives |
| 5530 | require at least a single right or left operand to match. To specify |
| 5531 | that the operator can match "trivially", |
| 5532 | it's necessary to add a C<(s?)> qualifier to the directive: |
| 5533 | |
| 5534 | list: '(' <leftop: list_item /(,|=>)/ list_item>(s?) ')' |
| 5535 | |
| 5536 | Note that in almost all the above examples, the first and third arguments |
| 5537 | of the C<<leftop:...E<gt>> directive were the same subrule. That is because |
| 5538 | C<<leftop:...E<gt>>'s are frequently used to specify "separated" lists of the |
| 5539 | same type of item. To make such lists easier to specify, the following |
| 5540 | syntax: |
| 5541 | |
| 5542 | list: element(s /,/) |
| 5543 | |
| 5544 | is exactly equivalent to: |
| 5545 | |
| 5546 | list: <leftop: element /,/ element> |
| 5547 | |
| 5548 | Note that the separator must be specified as a raw pattern (i.e. |
| 5549 | not a string or subrule). |
| 5550 | |
| 5551 | |
| 5552 | =item Scored productions |
| 5553 | |
| 5554 | By default, Parse::RecDescent grammar rules always accept the first |
| 5555 | production that matches the input. But if two or more productions may |
| 5556 | potentially match the same input, choosing the first that does so may |
| 5557 | not be optimal. |
| 5558 | |
| 5559 | For example, if you were parsing the sentence "time flies like an arrow", |
| 5560 | you might use a rule like this: |
| 5561 | |
| 5562 | sentence: verb noun preposition article noun { [@item] } |
| 5563 | | adjective noun verb article noun { [@item] } |
| 5564 | | noun verb preposition article noun { [@item] } |
| 5565 | |
| 5566 | Each of these productions matches the sentence, but the third one |
| 5567 | is the most likely interpretation. However, if the sentence had been |
| 5568 | "fruit flies like a banana", then the second production is probably |
| 5569 | the right match. |
| 5570 | |
| 5571 | To cater for such situtations, the C<E<lt>score:...E<gt>> can be used. |
| 5572 | The directive is equivalent to an unconditional C<E<lt>rejectE<gt>>, |
| 5573 | except that it allows you to specify a "score" for the current |
| 5574 | production. If that score is numerically greater than the best |
| 5575 | score of any preceding production, the current production is cached for later |
| 5576 | consideration. If no later production matches, then the cached |
| 5577 | production is treated as having matched, and the value of the |
| 5578 | item immediately before its C<E<lt>score:...E<gt>> directive is returned as the |
| 5579 | result. |
| 5580 | |
| 5581 | In other words, by putting a C<E<lt>score:...E<gt>> directive at the end of |
| 5582 | each production, you can select which production matches using |
| 5583 | criteria other than specification order. For example: |
| 5584 | |
| 5585 | sentence: verb noun preposition article noun { [@item] } <score: sensible(@item)> |
| 5586 | | adjective noun verb article noun { [@item] } <score: sensible(@item)> |
| 5587 | | noun verb preposition article noun { [@item] } <score: sensible(@item)> |
| 5588 | |
| 5589 | Now, when each production reaches its respective C<E<lt>score:...E<gt>> |
| 5590 | directive, the subroutine C<sensible> will be called to evaluate the |
| 5591 | matched items (somehow). Once all productions have been tried, the |
| 5592 | one which C<sensible> scored most highly will be the one that is |
| 5593 | accepted as a match for the rule. |
| 5594 | |
| 5595 | The variable $score always holds the current best score of any production, |
| 5596 | and the variable $score_return holds the corresponding return value. |
| 5597 | |
| 5598 | As another example, the following grammar matches lines that may be |
| 5599 | separated by commas, colons, or semi-colons. This can be tricky if |
| 5600 | a colon-separated line also contains commas, or vice versa. The grammar |
| 5601 | resolves the ambiguity by selecting the rule that results in the |
| 5602 | fewest fields: |
| 5603 | |
| 5604 | line: seplist[sep=>','] <score: -@{$item[1]}> |
| 5605 | | seplist[sep=>':'] <score: -@{$item[1]}> |
| 5606 | | seplist[sep=>" "] <score: -@{$item[1]}> |
| 5607 | |
| 5608 | seplist: <skip:""> <leftop: /[^$arg{sep}]*/ "$arg{sep}" /[^$arg{sep}]*/> |
| 5609 | |
| 5610 | Note the use of negation within the C<E<lt>score:...E<gt>> directive |
| 5611 | to ensure that the seplist with the most items gets the lowest score. |
| 5612 | |
| 5613 | As the above examples indicate, it is often the case that all productions |
| 5614 | in a rule use exactly the same C<E<lt>score:...E<gt>> directive. It is |
| 5615 | tedious to have to repeat this identical directive in every production, so |
| 5616 | Parse::RecDescent also provides the C<E<lt>autoscore:...E<gt>> directive. |
| 5617 | |
| 5618 | If an C<E<lt>autoscore:...E<gt>> directive appears in any |
| 5619 | production of a rule, the code it specifies is used as the scoring |
| 5620 | code for every production of that rule, except productions that already |
| 5621 | end with an explicit C<E<lt>score:...E<gt>> directive. Thus the rules above could |
| 5622 | be rewritten: |
| 5623 | |
| 5624 | line: <autoscore: -@{$item[1]}> |
| 5625 | line: seplist[sep=>','] |
| 5626 | | seplist[sep=>':'] |
| 5627 | | seplist[sep=>" "] |
| 5628 | |
| 5629 | |
| 5630 | sentence: <autoscore: sensible(@item)> |
| 5631 | | verb noun preposition article noun { [@item] } |
| 5632 | | adjective noun verb article noun { [@item] } |
| 5633 | | noun verb preposition article noun { [@item] } |
| 5634 | |
| 5635 | Note that the C<E<lt>autoscore:...E<gt>> directive itself acts as an |
| 5636 | unconditional C<E<lt>rejectE<gt>>, and (like the C<E<lt>rulevar:...E<gt>> |
| 5637 | directive) is pruned at compile-time wherever possible. |
| 5638 | |
| 5639 | |
| 5640 | =item Dispensing with grammar checks |
| 5641 | |
| 5642 | During the compilation phase of parser construction, Parse::RecDescent performs |
| 5643 | a small number of checks on the grammar it's given. Specifically it checks that |
| 5644 | the grammar is not left-recursive, that there are no "insatiable" constructs of |
| 5645 | the form: |
| 5646 | |
| 5647 | rule: subrule(s) subrule |
| 5648 | |
| 5649 | and that there are no rules missing (i.e. referred to, but never defined). |
| 5650 | |
| 5651 | These checks are important during development, but can slow down parser |
| 5652 | construction in stable code. So Parse::RecDescent provides the |
| 5653 | E<lt>nocheckE<gt> directive to turn them off. The directive can only appear |
| 5654 | before the first rule definition, and switches off checking throughout the rest |
| 5655 | of the current grammar. |
| 5656 | |
| 5657 | Typically, this directive would be added when a parser has been thoroughly |
| 5658 | tested and is ready for release. |
| 5659 | |
| 5660 | =back |
| 5661 | |
| 5662 | |
| 5663 | =head2 Subrule argument lists |
| 5664 | |
| 5665 | It is occasionally useful to pass data to a subrule which is being invoked. For |
| 5666 | example, consider the following grammar fragment: |
| 5667 | |
| 5668 | classdecl: keyword decl |
| 5669 | |
| 5670 | keyword: 'struct' | 'class'; |
| 5671 | |
| 5672 | decl: # WHATEVER |
| 5673 | |
| 5674 | The C<decl> rule might wish to know which of the two keywords was used |
| 5675 | (since it may affect some aspect of the way the subsequent declaration |
| 5676 | is interpreted). C<Parse::RecDescent> allows the grammar designer to |
| 5677 | pass data into a rule, by placing that data in an I<argument list> |
| 5678 | (that is, in square brackets) immediately after any subrule item in a |
| 5679 | production. Hence, we could pass the keyword to C<decl> as follows: |
| 5680 | |
| 5681 | classdecl: keyword decl[ $item[1] ] |
| 5682 | |
| 5683 | keyword: 'struct' | 'class'; |
| 5684 | |
| 5685 | decl: # WHATEVER |
| 5686 | |
| 5687 | The argument list can consist of any number (including zero!) of comma-separated |
| 5688 | Perl expressions. In other words, it looks exactly like a Perl anonymous |
| 5689 | array reference. For example, we could pass the keyword, the name of the |
| 5690 | surrounding rule, and the literal 'keyword' to C<decl> like so: |
| 5691 | |
| 5692 | classdecl: keyword decl[$item[1],$item[0],'keyword'] |
| 5693 | |
| 5694 | keyword: 'struct' | 'class'; |
| 5695 | |
| 5696 | decl: # WHATEVER |
| 5697 | |
| 5698 | Within the rule to which the data is passed (C<decl> in the above examples) |
| 5699 | that data is available as the elements of a local variable C<@arg>. Hence |
| 5700 | C<decl> might report its intentions as follows: |
| 5701 | |
| 5702 | classdecl: keyword decl[$item[1],$item[0],'keyword'] |
| 5703 | |
| 5704 | keyword: 'struct' | 'class'; |
| 5705 | |
| 5706 | decl: { print "Declaring $arg[0] (a $arg[2])\n"; |
| 5707 | print "(this rule called by $arg[1])" } |
| 5708 | |
| 5709 | Subrule argument lists can also be interpreted as hashes, simply by using |
| 5710 | the local variable C<%arg> instead of C<@arg>. Hence we could rewrite the |
| 5711 | previous example: |
| 5712 | |
| 5713 | classdecl: keyword decl[keyword => $item[1], |
| 5714 | caller => $item[0], |
| 5715 | type => 'keyword'] |
| 5716 | |
| 5717 | keyword: 'struct' | 'class'; |
| 5718 | |
| 5719 | decl: { print "Declaring $arg{keyword} (a $arg{type})\n"; |
| 5720 | print "(this rule called by $arg{caller})" } |
| 5721 | |
| 5722 | Both C<@arg> and C<%arg> are always available, so the grammar designer may |
| 5723 | choose whichever convention (or combination of conventions) suits best. |
| 5724 | |
| 5725 | Subrule argument lists are also useful for creating "rule templates" |
| 5726 | (especially when used in conjunction with the C<E<lt>matchrule:...E<gt>> |
| 5727 | directive). For example, the subrule: |
| 5728 | |
| 5729 | list: <matchrule:$arg{rule}> /$arg{sep}/ list[%arg] |
| 5730 | { $return = [ $item[1], @{$item[3]} ] } |
| 5731 | | <matchrule:$arg{rule}> |
| 5732 | { $return = [ $item[1]] } |
| 5733 | |
| 5734 | is a handy template for the common problem of matching a separated list. |
| 5735 | For example: |
| 5736 | |
| 5737 | function: 'func' name '(' list[rule=>'param',sep=>';'] ')' |
| 5738 | |
| 5739 | param: list[rule=>'name',sep=>','] ':' typename |
| 5740 | |
| 5741 | name: /\w+/ |
| 5742 | |
| 5743 | typename: name |
| 5744 | |
| 5745 | |
| 5746 | When a subrule argument list is used with a repeated subrule, the argument list |
| 5747 | goes I<before> the repetition specifier: |
| 5748 | |
| 5749 | list: /some|many/ thing[ $item[1] ](s) |
| 5750 | |
| 5751 | The argument list is "late bound". That is, it is re-evaluated for every |
| 5752 | repetition of the repeated subrule. |
| 5753 | This means that each repeated attempt to match the subrule may be |
| 5754 | passed a completely different set of arguments if the value of the |
| 5755 | expression in the argument list changes between attempts. So, for |
| 5756 | example, the grammar: |
| 5757 | |
| 5758 | { $::species = 'dogs' } |
| 5759 | |
| 5760 | pair: 'two' animal[$::species](s) |
| 5761 | |
| 5762 | animal: /$arg[0]/ { $::species = 'cats' } |
| 5763 | |
| 5764 | will match the string "two dogs cats cats" completely, whereas |
| 5765 | it will only match the string "two dogs dogs dogs" up to the |
| 5766 | eighth letter. If the value of the argument list were "early bound" |
| 5767 | (that is, evaluated only the first time a repeated subrule match is |
| 5768 | attempted), one would expect the matching behaviours to be reversed. |
| 5769 | |
| 5770 | Of course, it is possible to effectively "early bind" such argument lists |
| 5771 | by passing them a value which does not change on each repetition. For example: |
| 5772 | |
| 5773 | { $::species = 'dogs' } |
| 5774 | |
| 5775 | pair: 'two' { $::species } animal[$item[2]](s) |
| 5776 | |
| 5777 | animal: /$arg[0]/ { $::species = 'cats' } |
| 5778 | |
| 5779 | |
| 5780 | Arguments can also be passed to the start rule, simply by appending them |
| 5781 | to the argument list with which the start rule is called (I<after> the |
| 5782 | "line number" parameter). For example, given: |
| 5783 | |
| 5784 | $parser = new Parse::RecDescent ( $grammar ); |
| 5785 | |
| 5786 | $parser->data($text, 1, "str", 2, \@arr); |
| 5787 | |
| 5788 | # ^^^^^ ^ ^^^^^^^^^^^^^^^ |
| 5789 | # | | | |
| 5790 | # TEXT TO BE PARSED | | |
| 5791 | # STARTING LINE NUMBER | |
| 5792 | # ELEMENTS OF @arg WHICH IS PASSED TO RULE data |
| 5793 | |
| 5794 | then within the productions of the rule C<data>, the array C<@arg> will contain |
| 5795 | C<("str", 2, \@arr)>. |
| 5796 | |
| 5797 | |
| 5798 | =head2 Alternations |
| 5799 | |
| 5800 | Alternations are implicit (unnamed) rules defined as part of a production. An |
| 5801 | alternation is defined as a series of '|'-separated productions inside a |
| 5802 | pair of round brackets. For example: |
| 5803 | |
| 5804 | character: 'the' ( good | bad | ugly ) /dude/ |
| 5805 | |
| 5806 | Every alternation implicitly defines a new subrule, whose |
| 5807 | automatically-generated name indicates its origin: |
| 5808 | "_alternation_<I>_of_production_<P>_of_rule<R>" for the appropriate |
| 5809 | values of <I>, <P>, and <R>. A call to this implicit subrule is then |
| 5810 | inserted in place of the brackets. Hence the above example is merely a |
| 5811 | convenient short-hand for: |
| 5812 | |
| 5813 | character: 'the' |
| 5814 | _alternation_1_of_production_1_of_rule_character |
| 5815 | /dude/ |
| 5816 | |
| 5817 | _alternation_1_of_production_1_of_rule_character: |
| 5818 | good | bad | ugly |
| 5819 | |
| 5820 | Since alternations are parsed by recursively calling the parser generator, |
| 5821 | any type(s) of item can appear in an alternation. For example: |
| 5822 | |
| 5823 | character: 'the' ( 'high' "plains" # Silent, with poncho |
| 5824 | | /no[- ]name/ # Silent, no poncho |
| 5825 | | vengeance_seeking # Poncho-optional |
| 5826 | | <error> |
| 5827 | ) drifter |
| 5828 | |
| 5829 | In this case, if an error occurred, the automatically generated |
| 5830 | message would be: |
| 5831 | |
| 5832 | ERROR (line <N>): Invalid implicit subrule: Expected |
| 5833 | 'high' or /no[- ]name/ or generic, |
| 5834 | but found "pacifist" instead |
| 5835 | |
| 5836 | Since every alternation actually has a name, it's even possible |
| 5837 | to extend or replace them: |
| 5838 | |
| 5839 | parser->Replace( |
| 5840 | "_alternation_1_of_production_1_of_rule_character: |
| 5841 | 'generic Eastwood'" |
| 5842 | ); |
| 5843 | |
| 5844 | More importantly, since alternations are a form of subrule, they can be given |
| 5845 | repetition specifiers: |
| 5846 | |
| 5847 | character: 'the' ( good | bad | ugly )(?) /dude/ |
| 5848 | |
| 5849 | |
| 5850 | =head2 Incremental Parsing |
| 5851 | |
| 5852 | C<Parse::RecDescent> provides two methods - C<Extend> and C<Replace> - which |
| 5853 | can be used to alter the grammar matched by a parser. Both methods |
| 5854 | take the same argument as C<Parse::RecDescent::new>, namely a |
| 5855 | grammar specification string |
| 5856 | |
| 5857 | C<Parse::RecDescent::Extend> interprets the grammar specification and adds any |
| 5858 | productions it finds to the end of the rules for which they are specified. For |
| 5859 | example: |
| 5860 | |
| 5861 | $add = "name: 'Jimmy-Bob' | 'Bobby-Jim'\ndesc: colour /necks?/"; |
| 5862 | parser->Extend($add); |
| 5863 | |
| 5864 | adds two productions to the rule "name" (creating it if necessary) and one |
| 5865 | production to the rule "desc". |
| 5866 | |
| 5867 | C<Parse::RecDescent::Replace> is identical, except that it first resets are |
| 5868 | rule specified in the additional grammar, removing any existing productions. |
| 5869 | Hence after: |
| 5870 | |
| 5871 | $add = "name: 'Jimmy-Bob' | 'Bobby-Jim'\ndesc: colour /necks?/"; |
| 5872 | parser->Replace($add); |
| 5873 | |
| 5874 | are are I<only> valid "name"s and the one possible description. |
| 5875 | |
| 5876 | A more interesting use of the C<Extend> and C<Replace> methods is to call them |
| 5877 | inside the action of an executing parser. For example: |
| 5878 | |
| 5879 | typedef: 'typedef' type_name identifier ';' |
| 5880 | { $thisparser->Extend("type_name: '$item[3]'") } |
| 5881 | | <error> |
| 5882 | |
| 5883 | identifier: ...!type_name /[A-Za-z_]w*/ |
| 5884 | |
| 5885 | which automatically prevents type names from being typedef'd, or: |
| 5886 | |
| 5887 | command: 'map' key_name 'to' abort_key |
| 5888 | { $thisparser->Replace("abort_key: '$item[2]'") } |
| 5889 | | 'map' key_name 'to' key_name |
| 5890 | { map_key($item[2],$item[4]) } |
| 5891 | | abort_key |
| 5892 | { exit if confirm("abort?") } |
| 5893 | |
| 5894 | abort_key: 'q' |
| 5895 | |
| 5896 | key_name: ...!abort_key /[A-Za-z]/ |
| 5897 | |
| 5898 | which allows the user to change the abort key binding, but not to unbind it. |
| 5899 | |
| 5900 | The careful use of such constructs makes it possible to reconfigure a |
| 5901 | a running parser, eliminating the need for semantic feedback by |
| 5902 | providing syntactic feedback instead. However, as currently implemented, |
| 5903 | C<Replace()> and C<Extend()> have to regenerate and re-C<eval> the |
| 5904 | entire parser whenever they are called. This makes them quite slow for |
| 5905 | large grammars. |
| 5906 | |
| 5907 | In such cases, the judicious use of an interpolated regex is likely to |
| 5908 | be far more efficient: |
| 5909 | |
| 5910 | typedef: 'typedef' type_name/ identifier ';' |
| 5911 | { $thisparser->{local}{type_name} .= "|$item[3]" } |
| 5912 | | <error> |
| 5913 | |
| 5914 | identifier: ...!type_name /[A-Za-z_]w*/ |
| 5915 | |
| 5916 | type_name: /$thisparser->{local}{type_name}/ |
| 5917 | |
| 5918 | |
| 5919 | =head2 Precompiling parsers |
| 5920 | |
| 5921 | Normally Parse::RecDescent builds a parser from a grammar at run-time. |
| 5922 | That approach simplifies the design and implementation of parsing code, |
| 5923 | but has the disadvantage that it slows the parsing process down - you |
| 5924 | have to wait for Parse::RecDescent to build the parser every time the |
| 5925 | program runs. Long or complex grammars can be particularly slow to |
| 5926 | build, leading to unacceptable delays at start-up. |
| 5927 | |
| 5928 | To overcome this, the module provides a way of "pre-building" a parser |
| 5929 | object and saving it in a separate module. That module can then be used |
| 5930 | to create clones of the original parser. |
| 5931 | |
| 5932 | A grammar may be precompiled using the C<Precompile> class method. |
| 5933 | For example, to precompile a grammar stored in the scalar $grammar, |
| 5934 | and produce a class named PreGrammar in a module file named PreGrammar.pm, |
| 5935 | you could use: |
| 5936 | |
| 5937 | use Parse::RecDescent; |
| 5938 | |
| 5939 | Parse::RecDescent->Precompile([$options_hashref], $grammar, "PreGrammar"); |
| 5940 | |
| 5941 | The first required argument is the grammar string, the second is the |
| 5942 | name of the class to be built. The name of the module file is |
| 5943 | generated automatically by appending ".pm" to the last element of the |
| 5944 | class name. Thus |
| 5945 | |
| 5946 | Parse::RecDescent->Precompile($grammar, "My::New::Parser"); |
| 5947 | |
| 5948 | would produce a module file named Parser.pm. |
| 5949 | |
| 5950 | An optional hash reference may be supplied as the first argument to |
| 5951 | C<Precompile>. This argument is currently EXPERIMENTAL, and may change |
| 5952 | in a future release of Parse::RecDescent. The only supported option |
| 5953 | is currently C<-standalone>, see L</"Standalone Precompiled Parsers">. |
| 5954 | |
| 5955 | It is somewhat tedious to have to write a small Perl program just to |
| 5956 | generate a precompiled grammar class, so Parse::RecDescent has some special |
| 5957 | magic that allows you to do the job directly from the command-line. |
| 5958 | |
| 5959 | If your grammar is specified in a file named F<grammar>, you can generate |
| 5960 | a class named Yet::Another::Grammar like so: |
| 5961 | |
| 5962 | > perl -MParse::RecDescent - grammar Yet::Another::Grammar |
| 5963 | |
| 5964 | This would produce a file named F<Grammar.pm> containing the full |
| 5965 | definition of a class called Yet::Another::Grammar. Of course, to use |
| 5966 | that class, you would need to put the F<Grammar.pm> file in a |
| 5967 | directory named F<Yet/Another>, somewhere in your Perl include path. |
| 5968 | |
| 5969 | Having created the new class, it's very easy to use it to build |
| 5970 | a parser. You simply C<use> the new module, and then call its |
| 5971 | C<new> method to create a parser object. For example: |
| 5972 | |
| 5973 | use Yet::Another::Grammar; |
| 5974 | my $parser = Yet::Another::Grammar->new(); |
| 5975 | |
| 5976 | The effect of these two lines is exactly the same as: |
| 5977 | |
| 5978 | use Parse::RecDescent; |
| 5979 | |
| 5980 | open GRAMMAR_FILE, "grammar" or die; |
| 5981 | local $/; |
| 5982 | my $grammar = <GRAMMAR_FILE>; |
| 5983 | |
| 5984 | my $parser = Parse::RecDescent->new($grammar); |
| 5985 | |
| 5986 | only considerably faster. |
| 5987 | |
| 5988 | Note however that the parsers produced by either approach are exactly |
| 5989 | the same, so whilst precompilation has an effect on I<set-up> speed, |
| 5990 | it has no effect on I<parsing> speed. RecDescent 2.0 will address that |
| 5991 | problem. |
| 5992 | |
| 5993 | =head3 Standalone Precompiled Parsers |
| 5994 | |
| 5995 | Until version 1.967003 of Parse::RecDescent, parser modules built with |
| 5996 | C<Precompile> were dependent on Parse::RecDescent. Future |
| 5997 | Parse::RecDescent releases with different internal implementations |
| 5998 | would break pre-existing precompiled parsers. |
| 5999 | |
| 6000 | Version 1.967_005 added the ability for Parse::RecDescent to include |
| 6001 | itself in the resulting .pm file if you pass the boolean option |
| 6002 | C<-standalone> to C<Precompile>: |
| 6003 | |
| 6004 | Parse::RecDescent->Precompile({ -standalone = 1, }, |
| 6005 | $grammar, "My::New::Parser"); |
| 6006 | |
| 6007 | Parse::RecDescent is included as Parse::RecDescent::_Runtime in order |
| 6008 | to avoid conflicts between an installed version of Parse::RecDescent |
| 6009 | and a precompiled, standalone parser made with another version of |
| 6010 | Parse::RecDescent. This renaming is experimental, and is subject to |
| 6011 | change in future versions. |
| 6012 | |
| 6013 | Precompiled parsers remain dependent on Parse::RecDescent by default, |
| 6014 | as this feature is still considered experimental. In the future, |
| 6015 | standalone parsers will become the default. |
| 6016 | |
| 6017 | =head1 GOTCHAS |
| 6018 | |
| 6019 | This section describes common mistakes that grammar writers seem to |
| 6020 | make on a regular basis. |
| 6021 | |
| 6022 | =head2 1. Expecting an error to always invalidate a parse |
| 6023 | |
| 6024 | A common mistake when using error messages is to write the grammar like this: |
| 6025 | |
| 6026 | file: line(s) |
| 6027 | |
| 6028 | line: line_type_1 |
| 6029 | | line_type_2 |
| 6030 | | line_type_3 |
| 6031 | | <error> |
| 6032 | |
| 6033 | The expectation seems to be that any line that is not of type 1, 2 or 3 will |
| 6034 | invoke the C<E<lt>errorE<gt>> directive and thereby cause the parse to fail. |
| 6035 | |
| 6036 | Unfortunately, that only happens if the error occurs in the very first line. |
| 6037 | The first rule states that a C<file> is matched by one or more lines, so if |
| 6038 | even a single line succeeds, the first rule is completely satisfied and the |
| 6039 | parse as a whole succeeds. That means that any error messages generated by |
| 6040 | subsequent failures in the C<line> rule are quietly ignored. |
| 6041 | |
| 6042 | Typically what's really needed is this: |
| 6043 | |
| 6044 | file: line(s) eofile { $return = $item[1] } |
| 6045 | |
| 6046 | line: line_type_1 |
| 6047 | | line_type_2 |
| 6048 | | line_type_3 |
| 6049 | | <error> |
| 6050 | |
| 6051 | eofile: /^\Z/ |
| 6052 | |
| 6053 | The addition of the C<eofile> subrule to the first production means that |
| 6054 | a file only matches a series of successful C<line> matches I<that consume the |
| 6055 | complete input text>. If any input text remains after the lines are matched, |
| 6056 | there must have been an error in the last C<line>. In that case the C<eofile> |
| 6057 | rule will fail, causing the entire C<file> rule to fail too. |
| 6058 | |
| 6059 | Note too that C<eofile> must match C</^\Z/> (end-of-text), I<not> |
| 6060 | C</^\cZ/> or C</^\cD/> (end-of-file). |
| 6061 | |
| 6062 | And don't forget the action at the end of the production. If you just |
| 6063 | write: |
| 6064 | |
| 6065 | file: line(s) eofile |
| 6066 | |
| 6067 | then the value returned by the C<file> rule will be the value of its |
| 6068 | last item: C<eofile>. Since C<eofile> always returns an empty string |
| 6069 | on success, that will cause the C<file> rule to return that empty |
| 6070 | string. Apart from returning the wrong value, returning an empty string |
| 6071 | will trip up code such as: |
| 6072 | |
| 6073 | $parser->file($filetext) || die; |
| 6074 | |
| 6075 | (since "" is false). |
| 6076 | |
| 6077 | Remember that Parse::RecDescent returns undef on failure, |
| 6078 | so the only safe test for failure is: |
| 6079 | |
| 6080 | defined($parser->file($filetext)) || die; |
| 6081 | |
| 6082 | |
| 6083 | =head2 2. Using a C<return> in an action |
| 6084 | |
| 6085 | An action is like a C<do> block inside the subroutine implementing the |
| 6086 | surrounding rule. So if you put a C<return> statement in an action: |
| 6087 | |
| 6088 | range: '(' start '..' end )' |
| 6089 | { return $item{end} } |
| 6090 | /\s+/ |
| 6091 | |
| 6092 | that subroutine will immediately return, without checking the rest of |
| 6093 | the items in the current production (e.g. the C</\s+/>) and without |
| 6094 | setting up the necessary data structures to tell the parser that the |
| 6095 | rule has succeeded. |
| 6096 | |
| 6097 | The correct way to set a return value in an action is to set the C<$return> |
| 6098 | variable: |
| 6099 | |
| 6100 | range: '(' start '..' end )' |
| 6101 | { $return = $item{end} } |
| 6102 | /\s+/ |
| 6103 | |
| 6104 | |
| 6105 | =head2 2. Setting C<$Parse::RecDescent::skip> at parse time |
| 6106 | |
| 6107 | If you want to change the default skipping behaviour (see |
| 6108 | L<Terminal Separators> and the C<E<lt>skip:...E<gt>> directive) by setting |
| 6109 | C<$Parse::RecDescent::skip> you have to remember to set this variable |
| 6110 | I<before> creating the grammar object. |
| 6111 | |
| 6112 | For example, you might want to skip all Perl-like comments with this |
| 6113 | regular expression: |
| 6114 | |
| 6115 | my $skip_spaces_and_comments = qr/ |
| 6116 | (?mxs: |
| 6117 | \s+ # either spaces |
| 6118 | | \# .*?$ # or a dash and whatever up to the end of line |
| 6119 | )* # repeated at will (in whatever order) |
| 6120 | /; |
| 6121 | |
| 6122 | And then: |
| 6123 | |
| 6124 | my $parser1 = Parse::RecDescent->new($grammar); |
| 6125 | |
| 6126 | $Parse::RecDescent::skip = $skip_spaces_and_comments; |
| 6127 | |
| 6128 | my $parser2 = Parse::RecDescent->new($grammar); |
| 6129 | |
| 6130 | $parser1->parse($text); # this does not cope with comments |
| 6131 | $parser2->parse($text); # this skips comments correctly |
| 6132 | |
| 6133 | The two parsers behave differently, because any skipping behaviour |
| 6134 | specified via C<$Parse::RecDescent::skip> is hard-coded when the |
| 6135 | grammar object is built, not at parse time. |
| 6136 | |
| 6137 | |
| 6138 | =head1 DIAGNOSTICS |
| 6139 | |
| 6140 | Diagnostics are intended to be self-explanatory (particularly if you |
| 6141 | use B<-RD_HINT> (under B<perl -s>) or define C<$::RD_HINT> inside the program). |
| 6142 | |
| 6143 | C<Parse::RecDescent> currently diagnoses the following: |
| 6144 | |
| 6145 | =over 4 |
| 6146 | |
| 6147 | =item * |
| 6148 | |
| 6149 | Invalid regular expressions used as pattern terminals (fatal error). |
| 6150 | |
| 6151 | =item * |
| 6152 | |
| 6153 | Invalid Perl code in code blocks (fatal error). |
| 6154 | |
| 6155 | =item * |
| 6156 | |
| 6157 | Lookahead used in the wrong place or in a nonsensical way (fatal error). |
| 6158 | |
| 6159 | =item * |
| 6160 | |
| 6161 | "Obvious" cases of left-recursion (fatal error). |
| 6162 | |
| 6163 | =item * |
| 6164 | |
| 6165 | Missing or extra components in a C<E<lt>leftopE<gt>> or C<E<lt>rightopE<gt>> |
| 6166 | directive. |
| 6167 | |
| 6168 | =item * |
| 6169 | |
| 6170 | Unrecognisable components in the grammar specification (fatal error). |
| 6171 | |
| 6172 | =item * |
| 6173 | |
| 6174 | "Orphaned" rule components specified before the first rule (fatal error) |
| 6175 | or after an C<E<lt>errorE<gt>> directive (level 3 warning). |
| 6176 | |
| 6177 | =item * |
| 6178 | |
| 6179 | Missing rule definitions (this only generates a level 3 warning, since you |
| 6180 | may be providing them later via C<Parse::RecDescent::Extend()>). |
| 6181 | |
| 6182 | =item * |
| 6183 | |
| 6184 | Instances where greedy repetition behaviour will almost certainly |
| 6185 | cause the failure of a production (a level 3 warning - see |
| 6186 | L<"ON-GOING ISSUES AND FUTURE DIRECTIONS"> below). |
| 6187 | |
| 6188 | =item * |
| 6189 | |
| 6190 | Attempts to define rules named 'Replace' or 'Extend', which cannot be |
| 6191 | called directly through the parser object because of the predefined |
| 6192 | meaning of C<Parse::RecDescent::Replace> and |
| 6193 | C<Parse::RecDescent::Extend>. (Only a level 2 warning is generated, since |
| 6194 | such rules I<can> still be used as subrules). |
| 6195 | |
| 6196 | =item * |
| 6197 | |
| 6198 | Productions which consist of a single C<E<lt>error?E<gt>> |
| 6199 | directive, and which therefore may succeed unexpectedly |
| 6200 | (a level 2 warning, since this might conceivably be the desired effect). |
| 6201 | |
| 6202 | =item * |
| 6203 | |
| 6204 | Multiple consecutive lookahead specifiers (a level 1 warning only, since their |
| 6205 | effects simply accumulate). |
| 6206 | |
| 6207 | =item * |
| 6208 | |
| 6209 | Productions which start with a C<E<lt>rejectE<gt>> or C<E<lt>rulevar:...E<gt>> |
| 6210 | directive. Such productions are optimized away (a level 1 warning). |
| 6211 | |
| 6212 | =item * |
| 6213 | |
| 6214 | Rules which are autogenerated under C<$::AUTOSTUB> (a level 1 warning). |
| 6215 | |
| 6216 | =back |
| 6217 | |
| 6218 | =head1 AUTHOR |
| 6219 | |
| 6220 | Damian Conway (damian@conway.org) |
| 6221 | Jeremy T. Braun (JTBRAUN@CPAN.org) [current maintainer] |
| 6222 | |
| 6223 | =head1 BUGS AND IRRITATIONS |
| 6224 | |
| 6225 | There are undoubtedly serious bugs lurking somewhere in this much code :-) |
| 6226 | Bug reports, test cases and other feedback are most welcome. |
| 6227 | |
| 6228 | Ongoing annoyances include: |
| 6229 | |
| 6230 | =over 4 |
| 6231 | |
| 6232 | =item * |
| 6233 | |
| 6234 | There's no support for parsing directly from an input stream. |
| 6235 | If and when the Perl Gods give us regular expressions on streams, |
| 6236 | this should be trivial (ahem!) to implement. |
| 6237 | |
| 6238 | =item * |
| 6239 | |
| 6240 | The parser generator can get confused if actions aren't properly |
| 6241 | closed or if they contain particularly nasty Perl syntax errors |
| 6242 | (especially unmatched curly brackets). |
| 6243 | |
| 6244 | =item * |
| 6245 | |
| 6246 | The generator only detects the most obvious form of left recursion |
| 6247 | (potential recursion on the first subrule in a rule). More subtle |
| 6248 | forms of left recursion (for example, through the second item in a |
| 6249 | rule after a "zero" match of a preceding "zero-or-more" repetition, |
| 6250 | or after a match of a subrule with an empty production) are not found. |
| 6251 | |
| 6252 | =item * |
| 6253 | |
| 6254 | Instead of complaining about left-recursion, the generator should |
| 6255 | silently transform the grammar to remove it. Don't expect this |
| 6256 | feature any time soon as it would require a more sophisticated |
| 6257 | approach to parser generation than is currently used. |
| 6258 | |
| 6259 | =item * |
| 6260 | |
| 6261 | The generated parsers don't always run as fast as might be wished. |
| 6262 | |
| 6263 | =item * |
| 6264 | |
| 6265 | The meta-parser should be bootstrapped using C<Parse::RecDescent> :-) |
| 6266 | |
| 6267 | =back |
| 6268 | |
| 6269 | =head1 ON-GOING ISSUES AND FUTURE DIRECTIONS |
| 6270 | |
| 6271 | =over 4 |
| 6272 | |
| 6273 | =item 1. |
| 6274 | |
| 6275 | Repetitions are "incorrigibly greedy" in that they will eat everything they can |
| 6276 | and won't backtrack if that behaviour causes a production to fail needlessly. |
| 6277 | So, for example: |
| 6278 | |
| 6279 | rule: subrule(s) subrule |
| 6280 | |
| 6281 | will I<never> succeed, because the repetition will eat all the |
| 6282 | subrules it finds, leaving none to match the second item. Such |
| 6283 | constructions are relatively rare (and C<Parse::RecDescent::new> generates a |
| 6284 | warning whenever they occur) so this may not be a problem, especially |
| 6285 | since the insatiable behaviour can be overcome "manually" by writing: |
| 6286 | |
| 6287 | rule: penultimate_subrule(s) subrule |
| 6288 | |
| 6289 | penultimate_subrule: subrule ...subrule |
| 6290 | |
| 6291 | The issue is that this construction is exactly twice as expensive as the |
| 6292 | original, whereas backtracking would add only 1/I<N> to the cost (for |
| 6293 | matching I<N> repetitions of C<subrule>). I would welcome feedback on |
| 6294 | the need for backtracking; particularly on cases where the lack of it |
| 6295 | makes parsing performance problematical. |
| 6296 | |
| 6297 | =item 2. |
| 6298 | |
| 6299 | Having opened that can of worms, it's also necessary to consider whether there |
| 6300 | is a need for non-greedy repetition specifiers. Again, it's possible (at some |
| 6301 | cost) to manually provide the required functionality: |
| 6302 | |
| 6303 | rule: nongreedy_subrule(s) othersubrule |
| 6304 | |
| 6305 | nongreedy_subrule: subrule ...!othersubrule |
| 6306 | |
| 6307 | Overall, the issue is whether the benefit of this extra functionality |
| 6308 | outweighs the drawbacks of further complicating the (currently |
| 6309 | minimalist) grammar specification syntax, and (worse) introducing more overhead |
| 6310 | into the generated parsers. |
| 6311 | |
| 6312 | =item 3. |
| 6313 | |
| 6314 | An C<E<lt>autocommitE<gt>> directive would be nice. That is, it would be useful to be |
| 6315 | able to say: |
| 6316 | |
| 6317 | command: <autocommit> |
| 6318 | command: 'find' name |
| 6319 | | 'find' address |
| 6320 | | 'do' command 'at' time 'if' condition |
| 6321 | | 'do' command 'at' time |
| 6322 | | 'do' command |
| 6323 | | unusual_command |
| 6324 | |
| 6325 | and have the generator work out that this should be "pruned" thus: |
| 6326 | |
| 6327 | command: 'find' name |
| 6328 | | 'find' <commit> address |
| 6329 | | 'do' <commit> command <uncommit> |
| 6330 | 'at' time |
| 6331 | 'if' <commit> condition |
| 6332 | | 'do' <commit> command <uncommit> |
| 6333 | 'at' <commit> time |
| 6334 | | 'do' <commit> command |
| 6335 | | unusual_command |
| 6336 | |
| 6337 | There are several issues here. Firstly, should the |
| 6338 | C<E<lt>autocommitE<gt>> automatically install an C<E<lt>uncommitE<gt>> |
| 6339 | at the start of the last production (on the grounds that the "command" |
| 6340 | rule doesn't know whether an "unusual_command" might start with "find" |
| 6341 | or "do") or should the "unusual_command" subgraph be analysed (to see |
| 6342 | if it I<might> be viable after a "find" or "do")? |
| 6343 | |
| 6344 | The second issue is how regular expressions should be treated. The simplest |
| 6345 | approach would be simply to uncommit before them (on the grounds that they |
| 6346 | I<might> match). Better efficiency would be obtained by analyzing all preceding |
| 6347 | literal tokens to determine whether the pattern would match them. |
| 6348 | |
| 6349 | Overall, the issues are: can such automated "pruning" approach a hand-tuned |
| 6350 | version sufficiently closely to warrant the extra set-up expense, and (more |
| 6351 | importantly) is the problem important enough to even warrant the non-trivial |
| 6352 | effort of building an automated solution? |
| 6353 | |
| 6354 | =back |
| 6355 | |
| 6356 | =head1 SUPPORT |
| 6357 | |
| 6358 | =head2 Source Code Repository |
| 6359 | |
| 6360 | L<http://github.com/jtbraun/Parse-RecDescent> |
| 6361 | |
| 6362 | =head2 Mailing List |
| 6363 | |
| 6364 | Visit L<http://www.perlfoundation.org/perl5/index.cgi?parse_recdescent> to sign up for the mailing list. |
| 6365 | |
| 6366 | L<http://www.PerlMonks.org> is also a good place to ask |
| 6367 | questions. Previous posts about Parse::RecDescent can typically be |
| 6368 | found with this search: |
| 6369 | L<http://perlmonks.org/index.pl?node=recdescent>. |
| 6370 | |
| 6371 | =head2 FAQ |
| 6372 | |
| 6373 | Visit L<Parse::RecDescent::FAQ> for answers to frequently (and not so |
| 6374 | frequently) asked questions about Parse::RecDescent. |
| 6375 | |
| 6376 | =head2 View/Report Bugs |
| 6377 | |
| 6378 | To view the current bug list or report a new issue visit |
| 6379 | L<https://rt.cpan.org/Public/Dist/Display.html?Name=Parse-RecDescent>. |
| 6380 | |
| 6381 | =head1 SEE ALSO |
| 6382 | |
| 6383 | L<Regexp::Grammars> provides Parse::RecDescent style parsing using native |
| 6384 | Perl 5.10 regular expressions. |
| 6385 | |
| 6386 | |
| 6387 | =head1 LICENCE AND COPYRIGHT |
| 6388 | |
| 6389 | Copyright (c) 1997-2007, Damian Conway C<< <DCONWAY@CPAN.org> >>. All rights |
| 6390 | reserved. |
| 6391 | |
| 6392 | This module is free software; you can redistribute it and/or |
| 6393 | modify it under the same terms as Perl itself. See L<perlartistic>. |
| 6394 | |
| 6395 | |
| 6396 | =head1 DISCLAIMER OF WARRANTY |
| 6397 | |
| 6398 | BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY |
| 6399 | FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN |
| 6400 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES |
| 6401 | PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER |
| 6402 | EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| 6403 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE |
| 6404 | ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH |
| 6405 | YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL |
| 6406 | NECESSARY SERVICING, REPAIR, OR CORRECTION. |
| 6407 | |
| 6408 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING |
| 6409 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR |
| 6410 | REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE |
| 6411 | LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, |
| 6412 | OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE |
| 6413 | THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING |
| 6414 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A |
| 6415 | FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF |
| 6416 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF |
| 6417 | SUCH DAMAGES. |