blob: ec8aa40defb4f01569dc60b7b6b4ee99e91aec1e [file] [log] [blame]
yu.dongc33b3072024-08-21 23:14:49 -07001# GENERATE RECURSIVE DESCENT PARSER OBJECTS FROM A GRAMMAR
2
3use 5.006;
4use strict;
5
6package Parse::RecDescent;
7
8use Text::Balanced qw ( extract_codeblock extract_bracketed extract_quotelike extract_delimited );
9
10use vars qw ( $skip );
11
12 *defskip = \ '\s*'; # DEFAULT SEPARATOR IS OPTIONAL WHITESPACE
13 $skip = '\s*'; # UNIVERSAL SEPARATOR IS OPTIONAL WHITESPACE
14my $MAXREP = 100_000_000; # REPETITIONS MATCH AT MOST 100,000,000 TIMES
15
16
17#ifndef RUNTIME
18sub import # IMPLEMENT PRECOMPILER BEHAVIOUR UNDER:
19 # perl -MParse::RecDescent - <grammarfile> <classname>
20{
21 local *_die = sub { print @_, "\n"; exit };
22
23 my ($package, $file, $line) = caller;
24
25 if ($file eq '-' && $line == 0)
26 {
27 _die("Usage: perl -MLocalTest - <grammarfile> <classname>")
28 unless @ARGV == 2;
29
30 my ($sourcefile, $class) = @ARGV;
31
32 local *IN;
33 open IN, $sourcefile
34 or _die(qq{Can't open grammar file "$sourcefile"});
35 local $/; #
36 my $grammar = <IN>;
37 close IN;
38
39 Parse::RecDescent->Precompile($grammar, $class, $sourcefile);
40 exit;
41 }
42}
43
44sub Save
45{
46 my $self = shift;
47 my %opt;
48 if ('HASH' eq ref $_[0]) {
49 %opt = (%opt, %{$_[0]});
50 shift;
51 }
52 my ($class) = @_;
53 $self->{saving} = 1;
54 $self->Precompile(undef,$class);
55 $self->{saving} = 0;
56}
57
58sub Precompile
59{
60 my $self = shift;
61 my %opt = ( -standalone => 0 );
62 if ('HASH' eq ref $_[0]) {
63 %opt = (%opt, %{$_[0]});
64 shift;
65 }
66 my ($grammar, $class, $sourcefile) = @_;
67
68 $class =~ /^(\w+::)*\w+$/ or croak("Bad class name: $class");
69
70 my $modulefile = $class;
71 $modulefile =~ s/.*:://;
72 $modulefile .= ".pm";
73
74 my $runtime_package = 'Parse::RecDescent::_Runtime';
75 my $code;
76
77 local *OUT;
78 open OUT, ">", $modulefile
79 or croak("Can't write to new module file '$modulefile'");
80
81 print STDERR "precompiling grammar from file '$sourcefile'\n",
82 "to class $class in module file '$modulefile'\n"
83 if $grammar && $sourcefile;
84
85 # Make the resulting pre-compiled parser stand-alone by
86 # including the contents of Parse::RecDescent as
87 # Parse::RecDescent::Runtime in the resulting precompiled
88 # parser.
89 if ($opt{-standalone}) {
90 local *IN;
91 open IN, '<', $Parse::RecDescent::_FILENAME
92 or croak("Can't open $Parse::RecDescent::_FILENAME for standalone pre-compilation: $!\n");
93 my $exclude = 0;
94 print OUT "{\n";
95 while (<IN>) {
96 if ($_ =~ /^\s*#\s*ifndef\s+RUNTIME\s*$/) {
97 ++$exclude;
98 }
99 if ($exclude) {
100 if ($_ =~ /^\s*#\s*endif\s$/) {
101 --$exclude;
102 }
103 } else {
104 if ($_ =~ m/^__END__/) {
105 last;
106 }
107 s/Parse::RecDescent/$runtime_package/gs;
108 print OUT $_;
109 }
110 }
111 close IN;
112 print OUT "}\n";
113 }
114
115 $self = Parse::RecDescent->new($grammar, # $grammar
116 1, # $compiling
117 $class # $namespace
118 )
119 || croak("Can't compile bad grammar")
120 if $grammar;
121
122 $self->{_precompiled} = 1;
123
124 foreach ( keys %{$self->{rules}} ) {
125 $self->{rules}{$_}{changed} = 1;
126 }
127
128
129 print OUT "package $class;\n";
130 if (not $opt{-standalone}) {
131 print OUT "use Parse::RecDescent;\n";
132 }
133
134 print OUT "{ my \$ERRORS;\n\n";
135
136 $code = $self->_code();
137 if ($opt{-standalone}) {
138 $code =~ s/Parse::RecDescent/$runtime_package/gs;
139 }
140 print OUT $code;
141
142 print OUT "}\npackage $class; sub new { ";
143 print OUT "my ";
144
145 require Data::Dumper;
146 $code = Data::Dumper->Dump([$self], [qw(self)]);
147 if ($opt{-standalone}) {
148 $code =~ s/Parse::RecDescent/$runtime_package/gs;
149 }
150 print OUT $code;
151
152 print OUT "}";
153
154 close OUT
155 or croak("Can't write to new module file '$modulefile'");
156}
157#endif
158
159package Parse::RecDescent::LineCounter;
160
161
162sub TIESCALAR # ($classname, \$text, $thisparser, $prevflag)
163{
164 bless {
165 text => $_[1],
166 parser => $_[2],
167 prev => $_[3]?1:0,
168 }, $_[0];
169}
170
171sub FETCH
172{
173 my $parser = $_[0]->{parser};
174 my $cache = $parser->{linecounter_cache};
175 my $from = $parser->{fulltextlen}-length(${$_[0]->{text}})-$_[0]->{prev}
176;
177
178 unless (exists $cache->{$from})
179 {
180 $parser->{lastlinenum} = $parser->{offsetlinenum}
181 - Parse::RecDescent::_linecount(substr($parser->{fulltext},$from))
182 + 1;
183 $cache->{$from} = $parser->{lastlinenum};
184 }
185 return $cache->{$from};
186}
187
188sub STORE
189{
190 my $parser = $_[0]->{parser};
191 $parser->{offsetlinenum} -= $parser->{lastlinenum} - $_[1];
192 return undef;
193}
194
195sub resync # ($linecounter)
196{
197 my $self = tied($_[0]);
198 die "Tried to alter something other than a LineCounter\n"
199 unless $self =~ /Parse::RecDescent::LineCounter/;
200
201 my $parser = $self->{parser};
202 my $apparently = $parser->{offsetlinenum}
203 - Parse::RecDescent::_linecount(${$self->{text}})
204 + 1;
205
206 $parser->{offsetlinenum} += $parser->{lastlinenum} - $apparently;
207 return 1;
208}
209
210package Parse::RecDescent::ColCounter;
211
212sub TIESCALAR # ($classname, \$text, $thisparser, $prevflag)
213{
214 bless {
215 text => $_[1],
216 parser => $_[2],
217 prev => $_[3]?1:0,
218 }, $_[0];
219}
220
221sub FETCH
222{
223 my $parser = $_[0]->{parser};
224 my $missing = $parser->{fulltextlen}-length(${$_[0]->{text}})-$_[0]->{prev}+1;
225 substr($parser->{fulltext},0,$missing) =~ m/^(.*)\Z/m;
226 return length($1);
227}
228
229sub STORE
230{
231 die "Can't set column number via \$thiscolumn\n";
232}
233
234
235package Parse::RecDescent::OffsetCounter;
236
237sub TIESCALAR # ($classname, \$text, $thisparser, $prev)
238{
239 bless {
240 text => $_[1],
241 parser => $_[2],
242 prev => $_[3]?-1:0,
243 }, $_[0];
244}
245
246sub FETCH
247{
248 my $parser = $_[0]->{parser};
249 return $parser->{fulltextlen}-length(${$_[0]->{text}})+$_[0]->{prev};
250}
251
252sub STORE
253{
254 die "Can't set current offset via \$thisoffset or \$prevoffset\n";
255}
256
257
258
259package Parse::RecDescent::Rule;
260
261sub new ($$$$$)
262{
263 my $class = ref($_[0]) || $_[0];
264 my $name = $_[1];
265 my $owner = $_[2];
266 my $line = $_[3];
267 my $replace = $_[4];
268
269 if (defined $owner->{"rules"}{$name})
270 {
271 my $self = $owner->{"rules"}{$name};
272 if ($replace && !$self->{"changed"})
273 {
274 $self->reset;
275 }
276 return $self;
277 }
278 else
279 {
280 return $owner->{"rules"}{$name} =
281 bless
282 {
283 "name" => $name,
284 "prods" => [],
285 "calls" => [],
286 "changed" => 0,
287 "line" => $line,
288 "impcount" => 0,
289 "opcount" => 0,
290 "vars" => "",
291 }, $class;
292 }
293}
294
295sub reset($)
296{
297 @{$_[0]->{"prods"}} = ();
298 @{$_[0]->{"calls"}} = ();
299 $_[0]->{"changed"} = 0;
300 $_[0]->{"impcount"} = 0;
301 $_[0]->{"opcount"} = 0;
302 $_[0]->{"vars"} = "";
303}
304
305sub DESTROY {}
306
307sub hasleftmost($$)
308{
309 my ($self, $ref) = @_;
310
311 my $prod;
312 foreach $prod ( @{$self->{"prods"}} )
313 {
314 return 1 if $prod->hasleftmost($ref);
315 }
316
317 return 0;
318}
319
320sub leftmostsubrules($)
321{
322 my $self = shift;
323 my @subrules = ();
324
325 my $prod;
326 foreach $prod ( @{$self->{"prods"}} )
327 {
328 push @subrules, $prod->leftmostsubrule();
329 }
330
331 return @subrules;
332}
333
334sub expected($)
335{
336 my $self = shift;
337 my @expected = ();
338
339 my $prod;
340 foreach $prod ( @{$self->{"prods"}} )
341 {
342 my $next = $prod->expected();
343 unless (! $next or _contains($next,@expected) )
344 {
345 push @expected, $next;
346 }
347 }
348
349 return join ', or ', @expected;
350}
351
352sub _contains($@)
353{
354 my $target = shift;
355 my $item;
356 foreach $item ( @_ ) { return 1 if $target eq $item; }
357 return 0;
358}
359
360sub addcall($$)
361{
362 my ( $self, $subrule ) = @_;
363 unless ( _contains($subrule, @{$self->{"calls"}}) )
364 {
365 push @{$self->{"calls"}}, $subrule;
366 }
367}
368
369sub addprod($$)
370{
371 my ( $self, $prod ) = @_;
372 push @{$self->{"prods"}}, $prod;
373 $self->{"changed"} = 1;
374 $self->{"impcount"} = 0;
375 $self->{"opcount"} = 0;
376 $prod->{"number"} = $#{$self->{"prods"}};
377 return $prod;
378}
379
380sub addvar
381{
382 my ( $self, $var, $parser ) = @_;
383 if ($var =~ /\A\s*local\s+([%@\$]\w+)/)
384 {
385 $parser->{localvars} .= " $1";
386 $self->{"vars"} .= "$var;\n" }
387 else
388 { $self->{"vars"} .= "my $var;\n" }
389 $self->{"changed"} = 1;
390 return 1;
391}
392
393sub addautoscore
394{
395 my ( $self, $code ) = @_;
396 $self->{"autoscore"} = $code;
397 $self->{"changed"} = 1;
398 return 1;
399}
400
401sub nextoperator($)
402{
403 my $self = shift;
404 my $prodcount = scalar @{$self->{"prods"}};
405 my $opcount = ++$self->{"opcount"};
406 return "_operator_${opcount}_of_production_${prodcount}_of_rule_$self->{name}";
407}
408
409sub nextimplicit($)
410{
411 my $self = shift;
412 my $prodcount = scalar @{$self->{"prods"}};
413 my $impcount = ++$self->{"impcount"};
414 return "_alternation_${impcount}_of_production_${prodcount}_of_rule_$self->{name}";
415}
416
417
418sub code
419{
420 my ($self, $namespace, $parser, $check) = @_;
421
422eval 'undef &' . $namespace . '::' . $self->{"name"} unless $parser->{saving};
423
424 my $code =
425'
426# ARGS ARE: ($parser, $text; $repeating, $_noactions, $_itempos, \@args)
427sub ' . $namespace . '::' . $self->{"name"} . '
428{
429 my $thisparser = $_[0];
430 use vars q{$tracelevel};
431 local $tracelevel = ($tracelevel||0)+1;
432 $ERRORS = 0;
433 my $thisrule = $thisparser->{"rules"}{"' . $self->{"name"} . '"};
434
435 Parse::RecDescent::_trace(q{Trying rule: [' . $self->{"name"} . ']},
436 Parse::RecDescent::_tracefirst($_[1]),
437 q{' . $self->{"name"} . '},
438 $tracelevel)
439 if defined $::RD_TRACE;
440
441 ' . ($parser->{deferrable}
442 ? 'my $def_at = @{$thisparser->{deferred}};'
443 : '') .
444 '
445 my $err_at = @{$thisparser->{errors}};
446
447 my $score;
448 my $score_return;
449 my $_tok;
450 my $return = undef;
451 my $_matched=0;
452 my $commit=0;
453 my @item = ();
454 my %item = ();
455 my $repeating = $_[2];
456 my $_noactions = $_[3];
457 my $_itempos = $_[4];
458 my @arg = defined $_[5] ? @{ &{$_[5]} } : ();
459 my %arg = ($#arg & 01) ? @arg : (@arg, undef);
460 my $text;
461 my $lastsep;
462 my $current_match;
463 my $expectation = new Parse::RecDescent::Expectation(q{' . $self->expected() . '});
464 $expectation->at($_[1]);
465 '. ($parser->{_check}{thisoffset}?'
466 my $thisoffset;
467 tie $thisoffset, q{Parse::RecDescent::OffsetCounter}, \$text, $thisparser;
468 ':'') . ($parser->{_check}{prevoffset}?'
469 my $prevoffset;
470 tie $prevoffset, q{Parse::RecDescent::OffsetCounter}, \$text, $thisparser, 1;
471 ':'') . ($parser->{_check}{thiscolumn}?'
472 my $thiscolumn;
473 tie $thiscolumn, q{Parse::RecDescent::ColCounter}, \$text, $thisparser;
474 ':'') . ($parser->{_check}{prevcolumn}?'
475 my $prevcolumn;
476 tie $prevcolumn, q{Parse::RecDescent::ColCounter}, \$text, $thisparser, 1;
477 ':'') . ($parser->{_check}{prevline}?'
478 my $prevline;
479 tie $prevline, q{Parse::RecDescent::LineCounter}, \$text, $thisparser, 1;
480 ':'') . '
481 my $thisline;
482 tie $thisline, q{Parse::RecDescent::LineCounter}, \$text, $thisparser;
483
484 '. $self->{vars} .'
485';
486
487 my $prod;
488 foreach $prod ( @{$self->{"prods"}} )
489 {
490 $prod->addscore($self->{autoscore},0,0) if $self->{autoscore};
491 next unless $prod->checkleftmost();
492 $code .= $prod->code($namespace,$self,$parser);
493
494 $code .= $parser->{deferrable}
495 ? ' splice
496 @{$thisparser->{deferred}}, $def_at unless $_matched;
497 '
498 : '';
499 }
500
501 $code .=
502'
503 unless ( $_matched || defined($score) )
504 {
505 ' .($parser->{deferrable}
506 ? ' splice @{$thisparser->{deferred}}, $def_at;
507 '
508 : '') . '
509
510 $_[1] = $text; # NOT SURE THIS IS NEEDED
511 Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' rule>>},
512 Parse::RecDescent::_tracefirst($_[1]),
513 q{' . $self->{"name"} .'},
514 $tracelevel)
515 if defined $::RD_TRACE;
516 return undef;
517 }
518 if (!defined($return) && defined($score))
519 {
520 Parse::RecDescent::_trace(q{>>Accepted scored production<<}, "",
521 q{' . $self->{"name"} .'},
522 $tracelevel)
523 if defined $::RD_TRACE;
524 $return = $score_return;
525 }
526 splice @{$thisparser->{errors}}, $err_at;
527 $return = $item[$#item] unless defined $return;
528 if (defined $::RD_TRACE)
529 {
530 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' rule<< (return value: [} .
531 $return . q{])}, "",
532 q{' . $self->{"name"} .'},
533 $tracelevel);
534 Parse::RecDescent::_trace(q{(consumed: [} .
535 Parse::RecDescent::_tracemax(substr($_[1],0,-length($text))) . q{])},
536 Parse::RecDescent::_tracefirst($text),
537 , q{' . $self->{"name"} .'},
538 $tracelevel)
539 }
540 $_[1] = $text;
541 return $return;
542}
543';
544
545 return $code;
546}
547
548my @left;
549sub isleftrec($$)
550{
551 my ($self, $rules) = @_;
552 my $root = $self->{"name"};
553 @left = $self->leftmostsubrules();
554 my $next;
555 foreach $next ( @left )
556 {
557 next unless defined $rules->{$next}; # SKIP NON-EXISTENT RULES
558 return 1 if $next eq $root;
559 my $child;
560 foreach $child ( $rules->{$next}->leftmostsubrules() )
561 {
562 push(@left, $child)
563 if ! _contains($child, @left) ;
564 }
565 }
566 return 0;
567}
568
569package Parse::RecDescent::Production;
570
571sub describe ($;$)
572{
573 return join ' ', map { $_->describe($_[1]) or () } @{$_[0]->{items}};
574}
575
576sub new ($$;$$)
577{
578 my ($self, $line, $uncommit, $error) = @_;
579 my $class = ref($self) || $self;
580
581 bless
582 {
583 "items" => [],
584 "uncommit" => $uncommit,
585 "error" => $error,
586 "line" => $line,
587 strcount => 0,
588 patcount => 0,
589 dircount => 0,
590 actcount => 0,
591 }, $class;
592}
593
594sub expected ($)
595{
596 my $itemcount = scalar @{$_[0]->{"items"}};
597 return ($itemcount) ? $_[0]->{"items"}[0]->describe(1) : '';
598}
599
600sub hasleftmost ($$)
601{
602 my ($self, $ref) = @_;
603 return ${$self->{"items"}}[0] eq $ref if scalar @{$self->{"items"}};
604 return 0;
605}
606
607sub isempty($)
608{
609 my $self = shift;
610 return 0 == @{$self->{"items"}};
611}
612
613sub leftmostsubrule($)
614{
615 my $self = shift;
616
617 if ( $#{$self->{"items"}} >= 0 )
618 {
619 my $subrule = $self->{"items"}[0]->issubrule();
620 return $subrule if defined $subrule;
621 }
622
623 return ();
624}
625
626sub checkleftmost($)
627{
628 my @items = @{$_[0]->{"items"}};
629 if (@items==1 && ref($items[0]) =~ /\AParse::RecDescent::Error/
630 && $items[0]->{commitonly} )
631 {
632 Parse::RecDescent::_warn(2,"Lone <error?> in production treated
633 as <error?> <reject>");
634 Parse::RecDescent::_hint("A production consisting of a single
635 conditional <error?> directive would
636 normally succeed (with the value zero) if the
637 rule is not 'commited' when it is
638 tried. Since you almost certainly wanted
639 '<error?> <reject>' Parse::RecDescent
640 supplied it for you.");
641 push @{$_[0]->{items}},
642 Parse::RecDescent::UncondReject->new(0,0,'<reject>');
643 }
644 elsif (@items==1 && ($items[0]->describe||"") =~ /<rulevar|<autoscore/)
645 {
646 # Do nothing
647 }
648 elsif (@items &&
649 ( ref($items[0]) =~ /\AParse::RecDescent::UncondReject/
650 || ($items[0]->describe||"") =~ /<autoscore/
651 ))
652 {
653 Parse::RecDescent::_warn(1,"Optimizing away production: [". $_[0]->describe ."]");
654 my $what = $items[0]->describe =~ /<rulevar/
655 ? "a <rulevar> (which acts like an unconditional <reject> during parsing)"
656 : $items[0]->describe =~ /<autoscore/
657 ? "an <autoscore> (which acts like an unconditional <reject> during parsing)"
658 : "an unconditional <reject>";
659 my $caveat = $items[0]->describe =~ /<rulevar/
660 ? " after the specified variable was set up"
661 : "";
662 my $advice = @items > 1
663 ? "However, there were also other (useless) items after the leading "
664 . $items[0]->describe
665 . ", so you may have been expecting some other behaviour."
666 : "You can safely ignore this message.";
667 Parse::RecDescent::_hint("The production starts with $what. That means that the
668 production can never successfully match, so it was
669 optimized out of the final parser$caveat. $advice");
670 return 0;
671 }
672 return 1;
673}
674
675sub changesskip($)
676{
677 my $item;
678 foreach $item (@{$_[0]->{"items"}})
679 {
680 if (ref($item) =~ /Parse::RecDescent::(Action|Directive)/)
681 {
682 return 1 if $item->{code} =~ /\$skip\s*=/;
683 }
684 }
685 return 0;
686}
687
688sub adddirective
689{
690 my ( $self, $whichop, $line, $name ) = @_;
691 push @{$self->{op}},
692 { type=>$whichop, line=>$line, name=>$name,
693 offset=> scalar(@{$self->{items}}) };
694}
695
696sub addscore
697{
698 my ( $self, $code, $lookahead, $line ) = @_;
699 $self->additem(Parse::RecDescent::Directive->new(
700 "local \$^W;
701 my \$thisscore = do { $code } + 0;
702 if (!defined(\$score) || \$thisscore>\$score)
703 { \$score=\$thisscore; \$score_return=\$item[-1]; }
704 undef;", $lookahead, $line,"<score: $code>") )
705 unless $self->{items}[-1]->describe =~ /<score/;
706 return 1;
707}
708
709sub check_pending
710{
711 my ( $self, $line ) = @_;
712 if ($self->{op})
713 {
714 while (my $next = pop @{$self->{op}})
715 {
716 Parse::RecDescent::_error("Incomplete <$next->{type}op:...>.", $line);
717 Parse::RecDescent::_hint(
718 "The current production ended without completing the
719 <$next->{type}op:...> directive that started near line
720 $next->{line}. Did you forget the closing '>'?");
721 }
722 }
723 return 1;
724}
725
726sub enddirective
727{
728 my ( $self, $line, $minrep, $maxrep ) = @_;
729 unless ($self->{op})
730 {
731 Parse::RecDescent::_error("Unmatched > found.", $line);
732 Parse::RecDescent::_hint(
733 "A '>' angle bracket was encountered, which typically
734 indicates the end of a directive. However no suitable
735 preceding directive was encountered. Typically this
736 indicates either a extra '>' in the grammar, or a
737 problem inside the previous directive.");
738 return;
739 }
740 my $op = pop @{$self->{op}};
741 my $span = @{$self->{items}} - $op->{offset};
742 if ($op->{type} =~ /left|right/)
743 {
744 if ($span != 3)
745 {
746 Parse::RecDescent::_error(
747 "Incorrect <$op->{type}op:...> specification:
748 expected 3 args, but found $span instead", $line);
749 Parse::RecDescent::_hint(
750 "The <$op->{type}op:...> directive requires a
751 sequence of exactly three elements. For example:
752 <$op->{type}op:leftarg /op/ rightarg>");
753 }
754 else
755 {
756 push @{$self->{items}},
757 Parse::RecDescent::Operator->new(
758 $op->{type}, $minrep, $maxrep, splice(@{$self->{"items"}}, -3));
759 $self->{items}[-1]->sethashname($self);
760 $self->{items}[-1]{name} = $op->{name};
761 }
762 }
763}
764
765sub prevwasreturn
766{
767 my ( $self, $line ) = @_;
768 unless (@{$self->{items}})
769 {
770 Parse::RecDescent::_error(
771 "Incorrect <return:...> specification:
772 expected item missing", $line);
773 Parse::RecDescent::_hint(
774 "The <return:...> directive requires a
775 sequence of at least one item. For example:
776 <return: list>");
777 return;
778 }
779 push @{$self->{items}},
780 Parse::RecDescent::Result->new();
781}
782
783sub additem
784{
785 my ( $self, $item ) = @_;
786 $item->sethashname($self);
787 push @{$self->{"items"}}, $item;
788 return $item;
789}
790
791sub _duplicate_itempos
792{
793 my ($src) = @_;
794 my $dst = {};
795
796 foreach (keys %$src)
797 {
798 %{$dst->{$_}} = %{$src->{$_}};
799 }
800 $dst;
801}
802
803sub _update_itempos
804{
805 my ($dst, $src, $typekeys, $poskeys) = @_;
806
807 my @typekeys = 'ARRAY' eq ref $typekeys ?
808 @$typekeys :
809 keys %$src;
810
811 foreach my $k (keys %$src)
812 {
813 if ('ARRAY' eq ref $poskeys)
814 {
815 @{$dst->{$k}}{@$poskeys} = @{$src->{$k}}{@$poskeys};
816 }
817 else
818 {
819 %{$dst->{$k}} = %{$src->{$k}};
820 }
821 }
822}
823
824sub preitempos
825{
826 return q
827 {
828 push @itempos, {'offset' => {'from'=>$thisoffset, 'to'=>undef},
829 'line' => {'from'=>$thisline, 'to'=>undef},
830 'column' => {'from'=>$thiscolumn, 'to'=>undef} };
831 }
832}
833
834sub incitempos
835{
836 return q
837 {
838 $itempos[$#itempos]{'offset'}{'from'} += length($lastsep);
839 $itempos[$#itempos]{'line'}{'from'} = $thisline;
840 $itempos[$#itempos]{'column'}{'from'} = $thiscolumn;
841 }
842}
843
844sub unincitempos
845{
846 # the next incitempos will properly set these two fields, but
847 # {'offset'}{'from'} needs to be decreased by length($lastsep)
848 # $itempos[$#itempos]{'line'}{'from'}
849 # $itempos[$#itempos]{'column'}{'from'}
850 return q
851 {
852 $itempos[$#itempos]{'offset'}{'from'} -= length($lastsep) if defined $lastsep;
853 }
854}
855
856sub postitempos
857{
858 return q
859 {
860 $itempos[$#itempos]{'offset'}{'to'} = $prevoffset;
861 $itempos[$#itempos]{'line'}{'to'} = $prevline;
862 $itempos[$#itempos]{'column'}{'to'} = $prevcolumn;
863 }
864}
865
866sub code($$$$)
867{
868 my ($self,$namespace,$rule,$parser) = @_;
869 my $code =
870'
871 while (!$_matched'
872 . (defined $self->{"uncommit"} ? '' : ' && !$commit')
873 . ')
874 {
875 ' .
876 ($self->changesskip()
877 ? 'local $skip = defined($skip) ? $skip : $Parse::RecDescent::skip;'
878 : '') .'
879 Parse::RecDescent::_trace(q{Trying production: ['
880 . $self->describe . ']},
881 Parse::RecDescent::_tracefirst($_[1]),
882 q{' . $rule ->{name}. '},
883 $tracelevel)
884 if defined $::RD_TRACE;
885 my $thisprod = $thisrule->{"prods"}[' . $self->{"number"} . '];
886 ' . (defined $self->{"error"} ? '' : '$text = $_[1];' ) . '
887 my $_savetext;
888 @item = (q{' . $rule->{"name"} . '});
889 %item = (__RULE__ => q{' . $rule->{"name"} . '});
890 my $repcount = 0;
891
892';
893 $code .=
894' my @itempos = ({});
895' if $parser->{_check}{itempos};
896
897 my $item;
898 my $i;
899
900 for ($i = 0; $i < @{$self->{"items"}}; $i++)
901 {
902 $item = ${$self->{items}}[$i];
903
904 $code .= preitempos() if $parser->{_check}{itempos};
905
906 $code .= $item->code($namespace,$rule,$parser->{_check});
907
908 $code .= postitempos() if $parser->{_check}{itempos};
909
910 }
911
912 if ($parser->{_AUTOACTION} && defined($item) && !$item->isa("Parse::RecDescent::Action"))
913 {
914 $code .= $parser->{_AUTOACTION}->code($namespace,$rule);
915 Parse::RecDescent::_warn(1,"Autogenerating action in rule
916 \"$rule->{name}\":
917 $parser->{_AUTOACTION}{code}")
918 and
919 Parse::RecDescent::_hint("The \$::RD_AUTOACTION was defined,
920 so any production not ending in an
921 explicit action has the specified
922 \"auto-action\" automatically
923 appended.");
924 }
925 elsif ($parser->{_AUTOTREE} && defined($item) && !$item->isa("Parse::RecDescent::Action"))
926 {
927 if ($i==1 && $item->isterminal)
928 {
929 $code .= $parser->{_AUTOTREE}{TERMINAL}->code($namespace,$rule);
930 }
931 else
932 {
933 $code .= $parser->{_AUTOTREE}{NODE}->code($namespace,$rule);
934 }
935 Parse::RecDescent::_warn(1,"Autogenerating tree-building action in rule
936 \"$rule->{name}\"")
937 and
938 Parse::RecDescent::_hint("The directive <autotree> was specified,
939 so any production not ending
940 in an explicit action has
941 some parse-tree building code
942 automatically appended.");
943 }
944
945 $code .=
946'
947 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' production: ['
948 . $self->describe . ']<<},
949 Parse::RecDescent::_tracefirst($text),
950 q{' . $rule->{name} . '},
951 $tracelevel)
952 if defined $::RD_TRACE;
953
954' . ( $parser->{_check}{itempos} ? '
955 if ( defined($_itempos) )
956 {
957 Parse::RecDescent::Production::_update_itempos($_itempos, $itempos[ 1], undef, [qw(from)]);
958 Parse::RecDescent::Production::_update_itempos($_itempos, $itempos[-1], undef, [qw(to)]);
959 }
960' : '' ) . '
961
962 $_matched = 1;
963 last;
964 }
965
966';
967 return $code;
968}
969
9701;
971
972package Parse::RecDescent::Action;
973
974sub describe { undef }
975
976sub sethashname { $_[0]->{hashname} = '__ACTION' . ++$_[1]->{actcount} .'__'; }
977
978sub new
979{
980 my $class = ref($_[0]) || $_[0];
981 bless
982 {
983 "code" => $_[1],
984 "lookahead" => $_[2],
985 "line" => $_[3],
986 }, $class;
987}
988
989sub issubrule { undef }
990sub isterminal { 0 }
991
992sub code($$$$)
993{
994 my ($self, $namespace, $rule) = @_;
995
996'
997 Parse::RecDescent::_trace(q{Trying action},
998 Parse::RecDescent::_tracefirst($text),
999 q{' . $rule->{name} . '},
1000 $tracelevel)
1001 if defined $::RD_TRACE;
1002 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .'
1003
1004 $_tok = ($_noactions) ? 0 : do ' . $self->{"code"} . ';
1005 ' . ($self->{"lookahead"}<0?'if':'unless') . ' (defined $_tok)
1006 {
1007 Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' action>> (return value: [undef])})
1008 if defined $::RD_TRACE;
1009 last;
1010 }
1011 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' action<< (return value: [}
1012 . $_tok . q{])},
1013 Parse::RecDescent::_tracefirst($text))
1014 if defined $::RD_TRACE;
1015 push @item, $_tok;
1016 ' . ($self->{line}>=0 ? '$item{'. $self->{hashname} .'}=$_tok;' : '' ) .'
1017 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
1018'
1019}
1020
1021
10221;
1023
1024package Parse::RecDescent::Directive;
1025
1026sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; }
1027
1028sub issubrule { undef }
1029sub isterminal { 0 }
1030sub describe { $_[1] ? '' : $_[0]->{name} }
1031
1032sub new ($$$$$)
1033{
1034 my $class = ref($_[0]) || $_[0];
1035 bless
1036 {
1037 "code" => $_[1],
1038 "lookahead" => $_[2],
1039 "line" => $_[3],
1040 "name" => $_[4],
1041 }, $class;
1042}
1043
1044sub code($$$$)
1045{
1046 my ($self, $namespace, $rule) = @_;
1047
1048'
1049 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .'
1050
1051 Parse::RecDescent::_trace(q{Trying directive: ['
1052 . $self->describe . ']},
1053 Parse::RecDescent::_tracefirst($text),
1054 q{' . $rule->{name} . '},
1055 $tracelevel)
1056 if defined $::RD_TRACE; ' .'
1057 $_tok = do { ' . $self->{"code"} . ' };
1058 if (defined($_tok))
1059 {
1060 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' directive<< (return value: [}
1061 . $_tok . q{])},
1062 Parse::RecDescent::_tracefirst($text))
1063 if defined $::RD_TRACE;
1064 }
1065 else
1066 {
1067 Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' directive>>},
1068 Parse::RecDescent::_tracefirst($text))
1069 if defined $::RD_TRACE;
1070 }
1071 ' . ($self->{"lookahead"} ? '$text = $_savetext and ' : '' ) .'
1072 last '
1073 . ($self->{"lookahead"}<0?'if':'unless') . ' defined $_tok;
1074 push @item, $item{'.$self->{hashname}.'}=$_tok;
1075 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
1076'
1077}
1078
10791;
1080
1081package Parse::RecDescent::UncondReject;
1082
1083sub issubrule { undef }
1084sub isterminal { 0 }
1085sub describe { $_[1] ? '' : $_[0]->{name} }
1086sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; }
1087
1088sub new ($$$;$)
1089{
1090 my $class = ref($_[0]) || $_[0];
1091 bless
1092 {
1093 "lookahead" => $_[1],
1094 "line" => $_[2],
1095 "name" => $_[3],
1096 }, $class;
1097}
1098
1099# MARK, YOU MAY WANT TO OPTIMIZE THIS.
1100
1101
1102sub code($$$$)
1103{
1104 my ($self, $namespace, $rule) = @_;
1105
1106'
1107 Parse::RecDescent::_trace(q{>>Rejecting production<< (found '
1108 . $self->describe . ')},
1109 Parse::RecDescent::_tracefirst($text),
1110 q{' . $rule->{name} . '},
1111 $tracelevel)
1112 if defined $::RD_TRACE;
1113 undef $return;
1114 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .'
1115
1116 $_tok = undef;
1117 ' . ($self->{"lookahead"} ? '$text = $_savetext and ' : '' ) .'
1118 last '
1119 . ($self->{"lookahead"}<0?'if':'unless') . ' defined $_tok;
1120'
1121}
1122
11231;
1124
1125package Parse::RecDescent::Error;
1126
1127sub issubrule { undef }
1128sub isterminal { 0 }
1129sub describe { $_[1] ? '' : $_[0]->{commitonly} ? '<error?:...>' : '<error...>' }
1130sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; }
1131
1132sub new ($$$$$)
1133{
1134 my $class = ref($_[0]) || $_[0];
1135 bless
1136 {
1137 "msg" => $_[1],
1138 "lookahead" => $_[2],
1139 "commitonly" => $_[3],
1140 "line" => $_[4],
1141 }, $class;
1142}
1143
1144sub code($$$$)
1145{
1146 my ($self, $namespace, $rule) = @_;
1147
1148 my $action = '';
1149
1150 if ($self->{"msg"}) # ERROR MESSAGE SUPPLIED
1151 {
1152 #WAS: $action .= "Parse::RecDescent::_error(qq{$self->{msg}}" . ',$thisline);';
1153 $action .= 'push @{$thisparser->{errors}}, [qq{'.$self->{msg}.'},$thisline];';
1154
1155 }
1156 else # GENERATE ERROR MESSAGE DURING PARSE
1157 {
1158 $action .= '
1159 my $rule = $item[0];
1160 $rule =~ s/_/ /g;
1161 #WAS: Parse::RecDescent::_error("Invalid $rule: " . $expectation->message() ,$thisline);
1162 push @{$thisparser->{errors}}, ["Invalid $rule: " . $expectation->message() ,$thisline];
1163 ';
1164 }
1165
1166 my $dir =
1167 new Parse::RecDescent::Directive('if (' .
1168 ($self->{"commitonly"} ? '$commit' : '1') .
1169 ") { do {$action} unless ".' $_noactions; undef } else {0}',
1170 $self->{"lookahead"},0,$self->describe);
1171 $dir->{hashname} = $self->{hashname};
1172 return $dir->code($namespace, $rule, 0);
1173}
1174
11751;
1176
1177package Parse::RecDescent::Token;
1178
1179sub sethashname { $_[0]->{hashname} = '__PATTERN' . ++$_[1]->{patcount} . '__'; }
1180
1181sub issubrule { undef }
1182sub isterminal { 1 }
1183sub describe ($) { shift->{'description'}}
1184
1185
1186# ARGS ARE: $self, $pattern, $left_delim, $modifiers, $lookahead, $linenum
1187sub new ($$$$$$)
1188{
1189 my $class = ref($_[0]) || $_[0];
1190 my $pattern = $_[1];
1191 my $pat = $_[1];
1192 my $ldel = $_[2];
1193 my $rdel = $ldel;
1194 $rdel =~ tr/{[(</}])>/;
1195
1196 my $mod = $_[3];
1197
1198 my $desc;
1199
1200 if ($ldel eq '/') { $desc = "$ldel$pattern$rdel$mod" }
1201 else { $desc = "m$ldel$pattern$rdel$mod" }
1202 $desc =~ s/\\/\\\\/g;
1203 $desc =~ s/\$$/\\\$/g;
1204 $desc =~ s/}/\\}/g;
1205 $desc =~ s/{/\\{/g;
1206
1207 if (!eval "no strict;
1208 local \$SIG{__WARN__} = sub {0};
1209 '' =~ m$ldel$pattern$rdel$mod" and $@)
1210 {
1211 Parse::RecDescent::_warn(3, "Token pattern \"m$ldel$pattern$rdel$mod\"
1212 may not be a valid regular expression",
1213 $_[5]);
1214 $@ =~ s/ at \(eval.*/./;
1215 Parse::RecDescent::_hint($@);
1216 }
1217
1218 # QUIETLY PREVENT (WELL-INTENTIONED) CALAMITY
1219 $mod =~ s/[gc]//g;
1220 $pattern =~ s/(\A|[^\\])\\G/$1/g;
1221
1222 bless
1223 {
1224 "pattern" => $pattern,
1225 "ldelim" => $ldel,
1226 "rdelim" => $rdel,
1227 "mod" => $mod,
1228 "lookahead" => $_[4],
1229 "line" => $_[5],
1230 "description" => $desc,
1231 }, $class;
1232}
1233
1234
1235sub code($$$$$)
1236{
1237 my ($self, $namespace, $rule, $check) = @_;
1238 my $ldel = $self->{"ldelim"};
1239 my $rdel = $self->{"rdelim"};
1240 my $sdel = $ldel;
1241 my $mod = $self->{"mod"};
1242
1243 $sdel =~ s/[[{(<]/{}/;
1244
1245my $code = '
1246 Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe
1247 . ']}, Parse::RecDescent::_tracefirst($text),
1248 q{' . $rule->{name} . '},
1249 $tracelevel)
1250 if defined $::RD_TRACE;
1251 undef $lastsep;
1252 $expectation->is(q{' . ($rule->hasleftmost($self) ? ''
1253 : $self->describe ) . '})->at($text);
1254 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . '
1255
1256 ' . ($self->{"lookahead"}<0?'if':'unless')
1257 . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and '
1258 . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '')
1259 . ' $text =~ m' . $ldel . '\A(?:' . $self->{"pattern"} . ')' . $rdel . $mod . ')
1260 {
1261 '.($self->{"lookahead"} ? '$text = $_savetext;' : '$text = $lastsep . $text if defined $lastsep;') .
1262 ($check->{itempos} ? Parse::RecDescent::Production::unincitempos() : '') . '
1263 $expectation->failed();
1264 Parse::RecDescent::_trace(q{<<Didn\'t match terminal>>},
1265 Parse::RecDescent::_tracefirst($text))
1266 if defined $::RD_TRACE;
1267
1268 last;
1269 }
1270 $current_match = substr($text, $-[0], $+[0] - $-[0]);
1271 substr($text,0,length($current_match),q{});
1272 Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [}
1273 . $current_match . q{])},
1274 Parse::RecDescent::_tracefirst($text))
1275 if defined $::RD_TRACE;
1276 push @item, $item{'.$self->{hashname}.'}=$current_match;
1277 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
1278';
1279
1280 return $code;
1281}
1282
12831;
1284
1285package Parse::RecDescent::Literal;
1286
1287sub sethashname { $_[0]->{hashname} = '__STRING' . ++$_[1]->{strcount} . '__'; }
1288
1289sub issubrule { undef }
1290sub isterminal { 1 }
1291sub describe ($) { shift->{'description'} }
1292
1293sub new ($$$$)
1294{
1295 my $class = ref($_[0]) || $_[0];
1296
1297 my $pattern = $_[1];
1298
1299 my $desc = $pattern;
1300 $desc=~s/\\/\\\\/g;
1301 $desc=~s/}/\\}/g;
1302 $desc=~s/{/\\{/g;
1303
1304 bless
1305 {
1306 "pattern" => $pattern,
1307 "lookahead" => $_[2],
1308 "line" => $_[3],
1309 "description" => "'$desc'",
1310 }, $class;
1311}
1312
1313
1314sub code($$$$)
1315{
1316 my ($self, $namespace, $rule, $check) = @_;
1317
1318my $code = '
1319 Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe
1320 . ']},
1321 Parse::RecDescent::_tracefirst($text),
1322 q{' . $rule->{name} . '},
1323 $tracelevel)
1324 if defined $::RD_TRACE;
1325 undef $lastsep;
1326 $expectation->is(q{' . ($rule->hasleftmost($self) ? ''
1327 : $self->describe ) . '})->at($text);
1328 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . '
1329
1330 ' . ($self->{"lookahead"}<0?'if':'unless')
1331 . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and '
1332 . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '')
1333 . ' $text =~ m/\A' . quotemeta($self->{"pattern"}) . '/)
1334 {
1335 '.($self->{"lookahead"} ? '$text = $_savetext;' : '$text = $lastsep . $text if defined $lastsep;').'
1336 '. ($check->{itempos} ? Parse::RecDescent::Production::unincitempos() : '') . '
1337 $expectation->failed();
1338 Parse::RecDescent::_trace(qq{<<Didn\'t match terminal>>},
1339 Parse::RecDescent::_tracefirst($text))
1340 if defined $::RD_TRACE;
1341 last;
1342 }
1343 $current_match = substr($text, $-[0], $+[0] - $-[0]);
1344 substr($text,0,length($current_match),q{});
1345 Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [}
1346 . $current_match . q{])},
1347 Parse::RecDescent::_tracefirst($text))
1348 if defined $::RD_TRACE;
1349 push @item, $item{'.$self->{hashname}.'}=$current_match;
1350 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
1351';
1352
1353 return $code;
1354}
1355
13561;
1357
1358package Parse::RecDescent::InterpLit;
1359
1360sub sethashname { $_[0]->{hashname} = '__STRING' . ++$_[1]->{strcount} . '__'; }
1361
1362sub issubrule { undef }
1363sub isterminal { 1 }
1364sub describe ($) { shift->{'description'} }
1365
1366sub new ($$$$)
1367{
1368 my $class = ref($_[0]) || $_[0];
1369
1370 my $pattern = $_[1];
1371 $pattern =~ s#/#\\/#g;
1372
1373 my $desc = $pattern;
1374 $desc=~s/\\/\\\\/g;
1375 $desc=~s/}/\\}/g;
1376 $desc=~s/{/\\{/g;
1377
1378 bless
1379 {
1380 "pattern" => $pattern,
1381 "lookahead" => $_[2],
1382 "line" => $_[3],
1383 "description" => "'$desc'",
1384 }, $class;
1385}
1386
1387sub code($$$$)
1388{
1389 my ($self, $namespace, $rule, $check) = @_;
1390
1391my $code = '
1392 Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe
1393 . ']},
1394 Parse::RecDescent::_tracefirst($text),
1395 q{' . $rule->{name} . '},
1396 $tracelevel)
1397 if defined $::RD_TRACE;
1398 undef $lastsep;
1399 $expectation->is(q{' . ($rule->hasleftmost($self) ? ''
1400 : $self->describe ) . '})->at($text);
1401 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . '
1402
1403 ' . ($self->{"lookahead"}<0?'if':'unless')
1404 . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and '
1405 . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '')
1406 . ' do { $_tok = "' . $self->{"pattern"} . '"; 1 } and
1407 substr($text,0,length($_tok)) eq $_tok and
1408 do { substr($text,0,length($_tok)) = ""; 1; }
1409 )
1410 {
1411 '.($self->{"lookahead"} ? '$text = $_savetext;' : '$text = $lastsep . $text if defined $lastsep;').'
1412 '. ($check->{itempos} ? Parse::RecDescent::Production::unincitempos() : '') . '
1413 $expectation->failed();
1414 Parse::RecDescent::_trace(q{<<Didn\'t match terminal>>},
1415 Parse::RecDescent::_tracefirst($text))
1416 if defined $::RD_TRACE;
1417 last;
1418 }
1419 Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [}
1420 . $_tok . q{])},
1421 Parse::RecDescent::_tracefirst($text))
1422 if defined $::RD_TRACE;
1423 push @item, $item{'.$self->{hashname}.'}=$_tok;
1424 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
1425';
1426
1427 return $code;
1428}
1429
14301;
1431
1432package Parse::RecDescent::Subrule;
1433
1434sub issubrule ($) { return $_[0]->{"subrule"} }
1435sub isterminal { 0 }
1436sub sethashname {}
1437
1438sub describe ($)
1439{
1440 my $desc = $_[0]->{"implicit"} || $_[0]->{"subrule"};
1441 $desc = "<matchrule:$desc>" if $_[0]->{"matchrule"};
1442 return $desc;
1443}
1444
1445sub callsyntax($$)
1446{
1447 if ($_[0]->{"matchrule"})
1448 {
1449 return "&{'$_[1]'.qq{$_[0]->{subrule}}}";
1450 }
1451 else
1452 {
1453 return $_[1].$_[0]->{"subrule"};
1454 }
1455}
1456
1457sub new ($$$$;$$$)
1458{
1459 my $class = ref($_[0]) || $_[0];
1460 bless
1461 {
1462 "subrule" => $_[1],
1463 "lookahead" => $_[2],
1464 "line" => $_[3],
1465 "implicit" => $_[4] || undef,
1466 "matchrule" => $_[5],
1467 "argcode" => $_[6] || undef,
1468 }, $class;
1469}
1470
1471
1472sub code($$$$)
1473{
1474 my ($self, $namespace, $rule, $check) = @_;
1475
1476'
1477 Parse::RecDescent::_trace(q{Trying subrule: [' . $self->{"subrule"} . ']},
1478 Parse::RecDescent::_tracefirst($text),
1479 q{' . $rule->{"name"} . '},
1480 $tracelevel)
1481 if defined $::RD_TRACE;
1482 if (1) { no strict qw{refs};
1483 $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}'
1484 # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text);
1485 : 'q{'.$self->describe.'}' ) . ')->at($text);
1486 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' )
1487 . ($self->{"lookahead"}<0?'if':'unless')
1488 . ' (defined ($_tok = '
1489 . $self->callsyntax($namespace.'::')
1490 . '($thisparser,$text,$repeating,'
1491 . ($self->{"lookahead"}?'1':'$_noactions')
1492 . ($check->{"itempos"}?',$itempos[$#itempos]':',undef')
1493 . ($self->{argcode} ? ",sub { return $self->{argcode} }"
1494 : ',sub { \\@arg }')
1495 . ')))
1496 {
1497 '.($self->{"lookahead"} ? '$text = $_savetext;' : '').'
1498 Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' subrule: ['
1499 . $self->{subrule} . ']>>},
1500 Parse::RecDescent::_tracefirst($text),
1501 q{' . $rule->{"name"} .'},
1502 $tracelevel)
1503 if defined $::RD_TRACE;
1504 $expectation->failed();
1505 last;
1506 }
1507 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' subrule: ['
1508 . $self->{subrule} . ']<< (return value: [}
1509 . $_tok . q{]},
1510
1511 Parse::RecDescent::_tracefirst($text),
1512 q{' . $rule->{"name"} .'},
1513 $tracelevel)
1514 if defined $::RD_TRACE;
1515 $item{q{' . $self->{subrule} . '}} = $_tok;
1516 push @item, $_tok;
1517 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
1518 }
1519'
1520}
1521
1522package Parse::RecDescent::Repetition;
1523
1524sub issubrule ($) { return $_[0]->{"subrule"} }
1525sub isterminal { 0 }
1526sub sethashname { }
1527
1528sub describe ($)
1529{
1530 my $desc = $_[0]->{"expected"} || $_[0]->{"subrule"};
1531 $desc = "<matchrule:$desc>" if $_[0]->{"matchrule"};
1532 return $desc;
1533}
1534
1535sub callsyntax($$)
1536{
1537 if ($_[0]->{matchrule})
1538 { return "sub { goto &{''.qq{$_[1]$_[0]->{subrule}}} }"; }
1539 else
1540 { return "\\&$_[1]$_[0]->{subrule}"; }
1541}
1542
1543sub new ($$$$$$$$$$)
1544{
1545 my ($self, $subrule, $repspec, $min, $max, $lookahead, $line, $parser, $matchrule, $argcode) = @_;
1546 my $class = ref($self) || $self;
1547 ($max, $min) = ( $min, $max) if ($max<$min);
1548
1549 my $desc;
1550 if ($subrule=~/\A_alternation_\d+_of_production_\d+_of_rule/)
1551 { $desc = $parser->{"rules"}{$subrule}->expected }
1552
1553 if ($lookahead)
1554 {
1555 if ($min>0)
1556 {
1557 return new Parse::RecDescent::Subrule($subrule,$lookahead,$line,$desc,$matchrule,$argcode);
1558 }
1559 else
1560 {
1561 Parse::RecDescent::_error("Not symbol (\"!\") before
1562 \"$subrule\" doesn't make
1563 sense.",$line);
1564 Parse::RecDescent::_hint("Lookahead for negated optional
1565 repetitions (such as
1566 \"!$subrule($repspec)\" can never
1567 succeed, since optional items always
1568 match (zero times at worst).
1569 Did you mean a single \"!$subrule\",
1570 instead?");
1571 }
1572 }
1573 bless
1574 {
1575 "subrule" => $subrule,
1576 "repspec" => $repspec,
1577 "min" => $min,
1578 "max" => $max,
1579 "lookahead" => $lookahead,
1580 "line" => $line,
1581 "expected" => $desc,
1582 "argcode" => $argcode || undef,
1583 "matchrule" => $matchrule,
1584 }, $class;
1585}
1586
1587sub code($$$$)
1588{
1589 my ($self, $namespace, $rule, $check) = @_;
1590
1591 my ($subrule, $repspec, $min, $max, $lookahead) =
1592 @{$self}{ qw{subrule repspec min max lookahead} };
1593
1594'
1595 Parse::RecDescent::_trace(q{Trying repeated subrule: [' . $self->describe . ']},
1596 Parse::RecDescent::_tracefirst($text),
1597 q{' . $rule->{"name"} . '},
1598 $tracelevel)
1599 if defined $::RD_TRACE;
1600 $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}'
1601 # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text);
1602 : 'q{'.$self->describe.'}' ) . ')->at($text);
1603 ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .'
1604 unless (defined ($_tok = $thisparser->_parserepeat($text, '
1605 . $self->callsyntax($namespace.'::')
1606 . ', ' . $min . ', ' . $max . ', '
1607 . ($self->{"lookahead"}?'1':'$_noactions')
1608 . ($check->{"itempos"}?',$itempos[$#itempos]':',undef')
1609 . ',$expectation,'
1610 . ($self->{argcode} ? "sub { return $self->{argcode} }"
1611 : 'sub { \\@arg }')
1612 . ')))
1613 {
1614 Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' repeated subrule: ['
1615 . $self->describe . ']>>},
1616 Parse::RecDescent::_tracefirst($text),
1617 q{' . $rule->{"name"} .'},
1618 $tracelevel)
1619 if defined $::RD_TRACE;
1620 last;
1621 }
1622 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' repeated subrule: ['
1623 . $self->{subrule} . ']<< (}
1624 . @$_tok . q{ times)},
1625
1626 Parse::RecDescent::_tracefirst($text),
1627 q{' . $rule->{"name"} .'},
1628 $tracelevel)
1629 if defined $::RD_TRACE;
1630 $item{q{' . "$self->{subrule}($self->{repspec})" . '}} = $_tok;
1631 push @item, $_tok;
1632 ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .'
1633
1634'
1635}
1636
1637package Parse::RecDescent::Result;
1638
1639sub issubrule { 0 }
1640sub isterminal { 0 }
1641sub describe { '' }
1642
1643sub new
1644{
1645 my ($class, $pos) = @_;
1646
1647 bless {}, $class;
1648}
1649
1650sub code($$$$)
1651{
1652 my ($self, $namespace, $rule) = @_;
1653
1654 '
1655 $return = $item[-1];
1656 ';
1657}
1658
1659package Parse::RecDescent::Operator;
1660
1661my @opertype = ( " non-optional", "n optional" );
1662
1663sub issubrule { 0 }
1664sub isterminal { 0 }
1665
1666sub describe { $_[0]->{"expected"} }
1667sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; }
1668
1669
1670sub new
1671{
1672 my ($class, $type, $minrep, $maxrep, $leftarg, $op, $rightarg) = @_;
1673
1674 bless
1675 {
1676 "type" => "${type}op",
1677 "leftarg" => $leftarg,
1678 "op" => $op,
1679 "min" => $minrep,
1680 "max" => $maxrep,
1681 "rightarg" => $rightarg,
1682 "expected" => "<${type}op: ".$leftarg->describe." ".$op->describe." ".$rightarg->describe.">",
1683 }, $class;
1684}
1685
1686sub code($$$$)
1687{
1688 my ($self, $namespace, $rule, $check) = @_;
1689
1690 my @codeargs = @_[1..$#_];
1691
1692 my ($leftarg, $op, $rightarg) =
1693 @{$self}{ qw{leftarg op rightarg} };
1694
1695 my $code = '
1696 Parse::RecDescent::_trace(q{Trying operator: [' . $self->describe . ']},
1697 Parse::RecDescent::_tracefirst($text),
1698 q{' . $rule->{"name"} . '},
1699 $tracelevel)
1700 if defined $::RD_TRACE;
1701 $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}'
1702 # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text);
1703 : 'q{'.$self->describe.'}' ) . ')->at($text);
1704
1705 $_tok = undef;
1706 OPLOOP: while (1)
1707 {
1708 $repcount = 0;
1709 my @item;
1710 my %item;
1711';
1712
1713 $code .= '
1714 my $_itempos = $itempos[-1];
1715 my $itemposfirst;
1716' if $check->{itempos};
1717
1718 if ($self->{type} eq "leftop" )
1719 {
1720 $code .= '
1721 # MATCH LEFTARG
1722 ' . $leftarg->code(@codeargs) . '
1723
1724';
1725
1726 $code .= '
1727 if (defined($_itempos) and !defined($itemposfirst))
1728 {
1729 $itemposfirst = Parse::RecDescent::Production::_duplicate_itempos($_itempos);
1730 }
1731' if $check->{itempos};
1732
1733 $code .= '
1734 $repcount++;
1735
1736 my $savetext = $text;
1737 my $backtrack;
1738
1739 # MATCH (OP RIGHTARG)(s)
1740 while ($repcount < ' . $self->{max} . ')
1741 {
1742 $backtrack = 0;
1743 ' . $op->code(@codeargs) . '
1744 ' . ($op->isterminal() ? 'pop @item;' : '$backtrack=1;' ) . '
1745 ' . (ref($op) eq 'Parse::RecDescent::Token'
1746 ? 'if (defined $1) {push @item, $item{'.($self->{name}||$self->{hashname}).'}=$1; $backtrack=1;}'
1747 : "" ) . '
1748 ' . $rightarg->code(@codeargs) . '
1749 $savetext = $text;
1750 $repcount++;
1751 }
1752 $text = $savetext;
1753 pop @item if $backtrack;
1754
1755 ';
1756 }
1757 else
1758 {
1759 $code .= '
1760 my $savetext = $text;
1761 my $backtrack;
1762 # MATCH (LEFTARG OP)(s)
1763 while ($repcount < ' . $self->{max} . ')
1764 {
1765 $backtrack = 0;
1766 ' . $leftarg->code(@codeargs) . '
1767';
1768 $code .= '
1769 if (defined($_itempos) and !defined($itemposfirst))
1770 {
1771 $itemposfirst = Parse::RecDescent::Production::_duplicate_itempos($_itempos);
1772 }
1773' if $check->{itempos};
1774
1775 $code .= '
1776 $repcount++;
1777 $backtrack = 1;
1778 ' . $op->code(@codeargs) . '
1779 $savetext = $text;
1780 ' . ($op->isterminal() ? 'pop @item;' : "" ) . '
1781 ' . (ref($op) eq 'Parse::RecDescent::Token' ? 'do { push @item, $item{'.($self->{name}||$self->{hashname}).'}=$1; } if defined $1;' : "" ) . '
1782 }
1783 $text = $savetext;
1784 pop @item if $backtrack;
1785
1786 # MATCH RIGHTARG
1787 ' . $rightarg->code(@codeargs) . '
1788 $repcount++;
1789 ';
1790 }
1791
1792 $code .= 'unless (@item) { undef $_tok; last }' unless $self->{min}==0;
1793
1794 $code .= '
1795 $_tok = [ @item ];
1796';
1797
1798
1799 $code .= '
1800 if (defined $itemposfirst)
1801 {
1802 Parse::RecDescent::Production::_update_itempos(
1803 $_itempos, $itemposfirst, undef, [qw(from)]);
1804 }
1805' if $check->{itempos};
1806
1807 $code .= '
1808 last;
1809 } # end of OPLOOP
1810';
1811
1812 $code .= '
1813 unless ($repcount>='.$self->{min}.')
1814 {
1815 Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' operator: ['
1816 . $self->describe
1817 . ']>>},
1818 Parse::RecDescent::_tracefirst($text),
1819 q{' . $rule->{"name"} .'},
1820 $tracelevel)
1821 if defined $::RD_TRACE;
1822 $expectation->failed();
1823 last;
1824 }
1825 Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' operator: ['
1826 . $self->describe
1827 . ']<< (return value: [}
1828 . qq{@{$_tok||[]}} . q{]},
1829 Parse::RecDescent::_tracefirst($text),
1830 q{' . $rule->{"name"} .'},
1831 $tracelevel)
1832 if defined $::RD_TRACE;
1833
1834 push @item, $item{'.($self->{name}||$self->{hashname}).'}=$_tok||[];
1835';
1836
1837 return $code;
1838}
1839
1840
1841package Parse::RecDescent::Expectation;
1842
1843sub new ($)
1844{
1845 bless {
1846 "failed" => 0,
1847 "expected" => "",
1848 "unexpected" => "",
1849 "lastexpected" => "",
1850 "lastunexpected" => "",
1851 "defexpected" => $_[1],
1852 };
1853}
1854
1855sub is ($$)
1856{
1857 $_[0]->{lastexpected} = $_[1]; return $_[0];
1858}
1859
1860sub at ($$)
1861{
1862 $_[0]->{lastunexpected} = $_[1]; return $_[0];
1863}
1864
1865sub failed ($)
1866{
1867 return unless $_[0]->{lastexpected};
1868 $_[0]->{expected} = $_[0]->{lastexpected} unless $_[0]->{failed};
1869 $_[0]->{unexpected} = $_[0]->{lastunexpected} unless $_[0]->{failed};
1870 $_[0]->{failed} = 1;
1871}
1872
1873sub message ($)
1874{
1875 my ($self) = @_;
1876 $self->{expected} = $self->{defexpected} unless $self->{expected};
1877 $self->{expected} =~ s/_/ /g;
1878 if (!$self->{unexpected} || $self->{unexpected} =~ /\A\s*\Z/s)
1879 {
1880 return "Was expecting $self->{expected}";
1881 }
1882 else
1883 {
1884 $self->{unexpected} =~ /\s*(.*)/;
1885 return "Was expecting $self->{expected} but found \"$1\" instead";
1886 }
1887}
1888
18891;
1890
1891package Parse::RecDescent;
1892
1893use Carp;
1894use vars qw ( $AUTOLOAD $VERSION $_FILENAME);
1895
1896my $ERRORS = 0;
1897
1898our $VERSION = '1.967006';
1899$VERSION = eval $VERSION;
1900$_FILENAME=__FILE__;
1901
1902# BUILDING A PARSER
1903
1904my $nextnamespace = "namespace000001";
1905
1906sub _nextnamespace()
1907{
1908 return "Parse::RecDescent::" . $nextnamespace++;
1909}
1910
1911# ARGS ARE: $class, $grammar, $compiling, $namespace
1912sub new ($$$$)
1913{
1914 my $class = ref($_[0]) || $_[0];
1915 local $Parse::RecDescent::compiling = $_[2];
1916 my $name_space_name = defined $_[3]
1917 ? "Parse::RecDescent::".$_[3]
1918 : _nextnamespace();
1919 my $self =
1920 {
1921 "rules" => {},
1922 "namespace" => $name_space_name,
1923 "startcode" => '',
1924 "localvars" => '',
1925 "_AUTOACTION" => undef,
1926 "_AUTOTREE" => undef,
1927 };
1928
1929
1930 if ($::RD_AUTOACTION) {
1931 my $sourcecode = $::RD_AUTOACTION;
1932 $sourcecode = "{ $sourcecode }"
1933 unless $sourcecode =~ /\A\s*\{.*\}\s*\Z/;
1934 $self->{_check}{itempos} =
1935 $sourcecode =~ /\@itempos\b|\$itempos\s*\[/;
1936 $self->{_AUTOACTION}
1937 = new Parse::RecDescent::Action($sourcecode,0,-1)
1938 }
1939
1940 bless $self, $class;
1941 return $self->Replace($_[1])
1942}
1943
1944sub Compile($$$$) {
1945 die "Compilation of Parse::RecDescent grammars not yet implemented\n";
1946}
1947
1948sub DESTROY {
1949 my ($self) = @_;
1950 my $namespace = $self->{namespace};
1951 $namespace =~ s/Parse::RecDescent:://;
1952 if (!$self->{_precompiled}) {
1953 delete $Parse::RecDescent::{$namespace.'::'};
1954 }
1955}
1956
1957# BUILDING A GRAMMAR....
1958
1959# ARGS ARE: $self, $grammar, $isimplicit, $isleftop
1960sub Replace ($$)
1961{
1962 # set $replace = 1 for _generate
1963 splice(@_, 2, 0, 1);
1964
1965 return _generate(@_);
1966}
1967
1968# ARGS ARE: $self, $grammar, $isimplicit, $isleftop
1969sub Extend ($$)
1970{
1971 # set $replace = 0 for _generate
1972 splice(@_, 2, 0, 0);
1973
1974 return _generate(@_);
1975}
1976
1977sub _no_rule ($$;$)
1978{
1979 _error("Ruleless $_[0] at start of grammar.",$_[1]);
1980 my $desc = $_[2] ? "\"$_[2]\"" : "";
1981 _hint("You need to define a rule for the $_[0] $desc
1982 to be part of.");
1983}
1984
1985my $NEGLOOKAHEAD = '\G(\s*\.\.\.\!)';
1986my $POSLOOKAHEAD = '\G(\s*\.\.\.)';
1987my $RULE = '\G\s*(\w+)[ \t]*:';
1988my $PROD = '\G\s*([|])';
1989my $TOKEN = q{\G\s*/((\\\\/|\\\\\\\\|[^/])*)/([cgimsox]*)};
1990my $MTOKEN = q{\G\s*(m\s*[^\w\s])};
1991my $LITERAL = q{\G\s*'((\\\\['\\\\]|[^'])*)'};
1992my $INTERPLIT = q{\G\s*"((\\\\["\\\\]|[^"])*)"};
1993my $SUBRULE = '\G\s*(\w+)';
1994my $MATCHRULE = '\G(\s*<matchrule:)';
1995my $SIMPLEPAT = '((\\s+/[^/\\\\]*(?:\\\\.[^/\\\\]*)*/)?)';
1996my $OPTIONAL = '\G\((\?)'.$SIMPLEPAT.'\)';
1997my $ANY = '\G\((s\?)'.$SIMPLEPAT.'\)';
1998my $MANY = '\G\((s|\.\.)'.$SIMPLEPAT.'\)';
1999my $EXACTLY = '\G\(([1-9]\d*)'.$SIMPLEPAT.'\)';
2000my $BETWEEN = '\G\((\d+)\.\.([1-9]\d*)'.$SIMPLEPAT.'\)';
2001my $ATLEAST = '\G\((\d+)\.\.'.$SIMPLEPAT.'\)';
2002my $ATMOST = '\G\(\.\.([1-9]\d*)'.$SIMPLEPAT.'\)';
2003my $BADREP = '\G\((-?\d+)?\.\.(-?\d+)?'.$SIMPLEPAT.'\)';
2004my $ACTION = '\G\s*\{';
2005my $IMPLICITSUBRULE = '\G\s*\(';
2006my $COMMENT = '\G\s*(#.*)';
2007my $COMMITMK = '\G\s*<commit>';
2008my $UNCOMMITMK = '\G\s*<uncommit>';
2009my $QUOTELIKEMK = '\G\s*<perl_quotelike>';
2010my $CODEBLOCKMK = '\G\s*<perl_codeblock(?:\s+([][()<>{}]+))?>';
2011my $VARIABLEMK = '\G\s*<perl_variable>';
2012my $NOCHECKMK = '\G\s*<nocheck>';
2013my $AUTOACTIONPATMK = '\G\s*<autoaction:';
2014my $AUTOTREEMK = '\G\s*<autotree(?::\s*([\w:]+)\s*)?>';
2015my $AUTOSTUBMK = '\G\s*<autostub>';
2016my $AUTORULEMK = '\G\s*<autorule:(.*?)>';
2017my $REJECTMK = '\G\s*<reject>';
2018my $CONDREJECTMK = '\G\s*<reject:';
2019my $SCOREMK = '\G\s*<score:';
2020my $AUTOSCOREMK = '\G\s*<autoscore:';
2021my $SKIPMK = '\G\s*<skip:';
2022my $OPMK = '\G\s*<(left|right)op(?:=(\'.*?\'))?:';
2023my $ENDDIRECTIVEMK = '\G\s*>';
2024my $RESYNCMK = '\G\s*<resync>';
2025my $RESYNCPATMK = '\G\s*<resync:';
2026my $RULEVARPATMK = '\G\s*<rulevar:';
2027my $DEFERPATMK = '\G\s*<defer:';
2028my $TOKENPATMK = '\G\s*<token:';
2029my $AUTOERRORMK = '\G\s*<error(\??)>';
2030my $MSGERRORMK = '\G\s*<error(\??):';
2031my $NOCHECK = '\G\s*<nocheck>';
2032my $WARNMK = '\G\s*<warn((?::\s*(\d+)\s*)?)>';
2033my $HINTMK = '\G\s*<hint>';
2034my $TRACEBUILDMK = '\G\s*<trace_build((?::\s*(\d+)\s*)?)>';
2035my $TRACEPARSEMK = '\G\s*<trace_parse((?::\s*(\d+)\s*)?)>';
2036my $UNCOMMITPROD = $PROD.'\s*<uncommit';
2037my $ERRORPROD = $PROD.'\s*<error';
2038my $LONECOLON = '\G\s*:';
2039my $OTHER = '\G\s*([^\s]+)';
2040
2041my @lines = 0;
2042
2043sub _generate
2044{
2045 my ($self, $grammar, $replace, $isimplicit, $isleftop) = (@_, 0);
2046
2047 my $aftererror = 0;
2048 my $lookahead = 0;
2049 my $lookaheadspec = "";
2050 my $must_pop_lines;
2051 if (! $lines[-1]) {
2052 push @lines, _linecount($grammar) ;
2053 $must_pop_lines = 1;
2054 }
2055 $self->{_check}{itempos} = ($grammar =~ /\@itempos\b|\$itempos\s*\[/)
2056 unless $self->{_check}{itempos};
2057 for (qw(thisoffset thiscolumn prevline prevoffset prevcolumn))
2058 {
2059 $self->{_check}{$_} =
2060 ($grammar =~ /\$$_/) || $self->{_check}{itempos}
2061 unless $self->{_check}{$_};
2062 }
2063 my $line;
2064
2065 my $rule = undef;
2066 my $prod = undef;
2067 my $item = undef;
2068 my $lastgreedy = '';
2069 pos $grammar = 0;
2070 study $grammar;
2071
2072 local $::RD_HINT = $::RD_HINT;
2073 local $::RD_WARN = $::RD_WARN;
2074 local $::RD_TRACE = $::RD_TRACE;
2075 local $::RD_CHECK = $::RD_CHECK;
2076
2077 while (pos $grammar < length $grammar)
2078 {
2079 $line = $lines[-1] - _linecount($grammar) + 1;
2080 my $commitonly;
2081 my $code = "";
2082 my @components = ();
2083 if ($grammar =~ m/$COMMENT/gco)
2084 {
2085 _parse("a comment",0,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2086 next;
2087 }
2088 elsif ($grammar =~ m/$NEGLOOKAHEAD/gco)
2089 {
2090 _parse("a negative lookahead",$aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2091 $lookahead = $lookahead ? -$lookahead : -1;
2092 $lookaheadspec .= $1;
2093 next; # SKIP LOOKAHEAD RESET AT END OF while LOOP
2094 }
2095 elsif ($grammar =~ m/$POSLOOKAHEAD/gco)
2096 {
2097 _parse("a positive lookahead",$aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2098 $lookahead = $lookahead ? $lookahead : 1;
2099 $lookaheadspec .= $1;
2100 next; # SKIP LOOKAHEAD RESET AT END OF while LOOP
2101 }
2102 elsif ($grammar =~ m/(?=$ACTION)/gco
2103 and do { ($code) = extract_codeblock($grammar); $code })
2104 {
2105 _parse("an action", $aftererror, $line, $code);
2106 $item = new Parse::RecDescent::Action($code,$lookahead,$line);
2107 $prod and $prod->additem($item)
2108 or $self->_addstartcode($code);
2109 }
2110 elsif ($grammar =~ m/(?=$IMPLICITSUBRULE)/gco
2111 and do { ($code) = extract_codeblock($grammar,'{([',undef,'(',1);
2112 $code })
2113 {
2114 $code =~ s/\A\s*\(|\)\Z//g;
2115 _parse("an implicit subrule", $aftererror, $line,
2116 "( $code )");
2117 my $implicit = $rule->nextimplicit;
2118 return undef
2119 if !$self->_generate("$implicit : $code",$replace,1);
2120 my $pos = pos $grammar;
2121 substr($grammar,$pos,0,$implicit);
2122 pos $grammar = $pos;;
2123 }
2124 elsif ($grammar =~ m/$ENDDIRECTIVEMK/gco)
2125 {
2126
2127 # EXTRACT TRAILING REPETITION SPECIFIER (IF ANY)
2128
2129 my ($minrep,$maxrep) = (1,$MAXREP);
2130 if ($grammar =~ m/\G[(]/gc)
2131 {
2132 pos($grammar)--;
2133
2134 if ($grammar =~ m/$OPTIONAL/gco)
2135 { ($minrep, $maxrep) = (0,1) }
2136 elsif ($grammar =~ m/$ANY/gco)
2137 { $minrep = 0 }
2138 elsif ($grammar =~ m/$EXACTLY/gco)
2139 { ($minrep, $maxrep) = ($1,$1) }
2140 elsif ($grammar =~ m/$BETWEEN/gco)
2141 { ($minrep, $maxrep) = ($1,$2) }
2142 elsif ($grammar =~ m/$ATLEAST/gco)
2143 { $minrep = $1 }
2144 elsif ($grammar =~ m/$ATMOST/gco)
2145 { $maxrep = $1 }
2146 elsif ($grammar =~ m/$MANY/gco)
2147 { }
2148 elsif ($grammar =~ m/$BADREP/gco)
2149 {
2150 _parse("an invalid repetition specifier", 0,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2151 _error("Incorrect specification of a repeated directive",
2152 $line);
2153 _hint("Repeated directives cannot have
2154 a maximum repetition of zero, nor can they have
2155 negative components in their ranges.");
2156 }
2157 }
2158
2159 $prod && $prod->enddirective($line,$minrep,$maxrep);
2160 }
2161 elsif ($grammar =~ m/\G\s*<[^m]/gc)
2162 {
2163 pos($grammar)-=2;
2164
2165 if ($grammar =~ m/$OPMK/gco)
2166 {
2167 # $DB::single=1;
2168 _parse("a $1-associative operator directive", $aftererror, $line, "<$1op:...>");
2169 $prod->adddirective($1, $line,$2||'');
2170 }
2171 elsif ($grammar =~ m/$UNCOMMITMK/gco)
2172 {
2173 _parse("an uncommit marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2174 $item = new Parse::RecDescent::Directive('$commit=0;1',
2175 $lookahead,$line,"<uncommit>");
2176 $prod and $prod->additem($item)
2177 or _no_rule("<uncommit>",$line);
2178 }
2179 elsif ($grammar =~ m/$QUOTELIKEMK/gco)
2180 {
2181 _parse("an perl quotelike marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2182 $item = new Parse::RecDescent::Directive(
2183 'my ($match,@res);
2184 ($match,$text,undef,@res) =
2185 Text::Balanced::extract_quotelike($text,$skip);
2186 $match ? \@res : undef;
2187 ', $lookahead,$line,"<perl_quotelike>");
2188 $prod and $prod->additem($item)
2189 or _no_rule("<perl_quotelike>",$line);
2190 }
2191 elsif ($grammar =~ m/$CODEBLOCKMK/gco)
2192 {
2193 my $outer = $1||"{}";
2194 _parse("an perl codeblock marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2195 $item = new Parse::RecDescent::Directive(
2196 'Text::Balanced::extract_codeblock($text,undef,$skip,\''.$outer.'\');
2197 ', $lookahead,$line,"<perl_codeblock>");
2198 $prod and $prod->additem($item)
2199 or _no_rule("<perl_codeblock>",$line);
2200 }
2201 elsif ($grammar =~ m/$VARIABLEMK/gco)
2202 {
2203 _parse("an perl variable marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2204 $item = new Parse::RecDescent::Directive(
2205 'Text::Balanced::extract_variable($text,$skip);
2206 ', $lookahead,$line,"<perl_variable>");
2207 $prod and $prod->additem($item)
2208 or _no_rule("<perl_variable>",$line);
2209 }
2210 elsif ($grammar =~ m/$NOCHECKMK/gco)
2211 {
2212 _parse("a disable checking marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2213 if ($rule)
2214 {
2215 _error("<nocheck> directive not at start of grammar", $line);
2216 _hint("The <nocheck> directive can only
2217 be specified at the start of a
2218 grammar (before the first rule
2219 is defined.");
2220 }
2221 else
2222 {
2223 local $::RD_CHECK = 1;
2224 }
2225 }
2226 elsif ($grammar =~ m/$AUTOSTUBMK/gco)
2227 {
2228 _parse("an autostub marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2229 $::RD_AUTOSTUB = "";
2230 }
2231 elsif ($grammar =~ m/$AUTORULEMK/gco)
2232 {
2233 _parse("an autorule marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2234 $::RD_AUTOSTUB = $1;
2235 }
2236 elsif ($grammar =~ m/$AUTOTREEMK/gco)
2237 {
2238 my $base = defined($1) ? $1 : "";
2239 my $current_match = substr($grammar, $-[0], $+[0] - $-[0]);
2240 $base .= "::" if $base && $base !~ /::$/;
2241 _parse("an autotree marker", $aftererror,$line, $current_match);
2242 if ($rule)
2243 {
2244 _error("<autotree> directive not at start of grammar", $line);
2245 _hint("The <autotree> directive can only
2246 be specified at the start of a
2247 grammar (before the first rule
2248 is defined.");
2249 }
2250 else
2251 {
2252 undef $self->{_AUTOACTION};
2253 $self->{_AUTOTREE}{NODE}
2254 = new Parse::RecDescent::Action(q({bless \%item, ').$base.q('.$item[0]}),0,-1);
2255 $self->{_AUTOTREE}{TERMINAL}
2256 = new Parse::RecDescent::Action(q({bless {__VALUE__=>$item[1]}, ').$base.q('.$item[0]}),0,-1);
2257 }
2258 }
2259
2260 elsif ($grammar =~ m/$REJECTMK/gco)
2261 {
2262 _parse("an reject marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2263 $item = new Parse::RecDescent::UncondReject($lookahead,$line,"<reject>");
2264 $prod and $prod->additem($item)
2265 or _no_rule("<reject>",$line);
2266 }
2267 elsif ($grammar =~ m/(?=$CONDREJECTMK)/gco
2268 and do { ($code) = extract_codeblock($grammar,'{',undef,'<');
2269 $code })
2270 {
2271 _parse("a (conditional) reject marker", $aftererror,$line, $code );
2272 $code =~ /\A\s*<reject:(.*)>\Z/s;
2273 my $cond = $1;
2274 $item = new Parse::RecDescent::Directive(
2275 "($1) ? undef : 1", $lookahead,$line,"<reject:$cond>");
2276 $prod and $prod->additem($item)
2277 or _no_rule("<reject:$cond>",$line);
2278 }
2279 elsif ($grammar =~ m/(?=$SCOREMK)/gco
2280 and do { ($code) = extract_codeblock($grammar,'{',undef,'<');
2281 $code })
2282 {
2283 _parse("a score marker", $aftererror,$line, $code );
2284 $code =~ /\A\s*<score:(.*)>\Z/s;
2285 $prod and $prod->addscore($1, $lookahead, $line)
2286 or _no_rule($code,$line);
2287 }
2288 elsif ($grammar =~ m/(?=$AUTOSCOREMK)/gco
2289 and do { ($code) = extract_codeblock($grammar,'{',undef,'<');
2290 $code;
2291 } )
2292 {
2293 _parse("an autoscore specifier", $aftererror,$line,$code);
2294 $code =~ /\A\s*<autoscore:(.*)>\Z/s;
2295
2296 $rule and $rule->addautoscore($1,$self)
2297 or _no_rule($code,$line);
2298
2299 $item = new Parse::RecDescent::UncondReject($lookahead,$line,$code);
2300 $prod and $prod->additem($item)
2301 or _no_rule($code,$line);
2302 }
2303 elsif ($grammar =~ m/$RESYNCMK/gco)
2304 {
2305 _parse("a resync to newline marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2306 $item = new Parse::RecDescent::Directive(
2307 'if ($text =~ s/(\A[^\n]*\n)//) { $return = 0; $1; } else { undef }',
2308 $lookahead,$line,"<resync>");
2309 $prod and $prod->additem($item)
2310 or _no_rule("<resync>",$line);
2311 }
2312 elsif ($grammar =~ m/(?=$RESYNCPATMK)/gco
2313 and do { ($code) = extract_bracketed($grammar,'<');
2314 $code })
2315 {
2316 _parse("a resync with pattern marker", $aftererror,$line, $code );
2317 $code =~ /\A\s*<resync:(.*)>\Z/s;
2318 $item = new Parse::RecDescent::Directive(
2319 'if ($text =~ s/(\A'.$1.')//) { $return = 0; $1; } else { undef }',
2320 $lookahead,$line,$code);
2321 $prod and $prod->additem($item)
2322 or _no_rule($code,$line);
2323 }
2324 elsif ($grammar =~ m/(?=$SKIPMK)/gco
2325 and do { ($code) = extract_codeblock($grammar,'<');
2326 $code })
2327 {
2328 _parse("a skip marker", $aftererror,$line, $code );
2329 $code =~ /\A\s*<skip:(.*)>\Z/s;
2330 if ($rule) {
2331 $item = new Parse::RecDescent::Directive(
2332 'my $oldskip = $skip; $skip='.$1.'; $oldskip',
2333 $lookahead,$line,$code);
2334 $prod and $prod->additem($item)
2335 or _no_rule($code,$line);
2336 } else {
2337 #global <skip> directive
2338 $self->{skip} = $1;
2339 }
2340 }
2341 elsif ($grammar =~ m/(?=$RULEVARPATMK)/gco
2342 and do { ($code) = extract_codeblock($grammar,'{',undef,'<');
2343 $code;
2344 } )
2345 {
2346 _parse("a rule variable specifier", $aftererror,$line,$code);
2347 $code =~ /\A\s*<rulevar:(.*)>\Z/s;
2348
2349 $rule and $rule->addvar($1,$self)
2350 or _no_rule($code,$line);
2351
2352 $item = new Parse::RecDescent::UncondReject($lookahead,$line,$code);
2353 $prod and $prod->additem($item)
2354 or _no_rule($code,$line);
2355 }
2356 elsif ($grammar =~ m/(?=$AUTOACTIONPATMK)/gco
2357 and do { ($code) = extract_codeblock($grammar,'{',undef,'<');
2358 $code;
2359 } )
2360 {
2361 _parse("an autoaction specifier", $aftererror,$line,$code);
2362 $code =~ s/\A\s*<autoaction:(.*)>\Z/$1/s;
2363 if ($code =~ /\A\s*[^{]|[^}]\s*\Z/) {
2364 $code = "{ $code }"
2365 }
2366 $self->{_check}{itempos} =
2367 $code =~ /\@itempos\b|\$itempos\s*\[/;
2368 $self->{_AUTOACTION}
2369 = new Parse::RecDescent::Action($code,0,-$line)
2370 }
2371 elsif ($grammar =~ m/(?=$DEFERPATMK)/gco
2372 and do { ($code) = extract_codeblock($grammar,'{',undef,'<');
2373 $code;
2374 } )
2375 {
2376 _parse("a deferred action specifier", $aftererror,$line,$code);
2377 $code =~ s/\A\s*<defer:(.*)>\Z/$1/s;
2378 if ($code =~ /\A\s*[^{]|[^}]\s*\Z/)
2379 {
2380 $code = "{ $code }"
2381 }
2382
2383 $item = new Parse::RecDescent::Directive(
2384 "push \@{\$thisparser->{deferred}}, sub $code;",
2385 $lookahead,$line,"<defer:$code>");
2386 $prod and $prod->additem($item)
2387 or _no_rule("<defer:$code>",$line);
2388
2389 $self->{deferrable} = 1;
2390 }
2391 elsif ($grammar =~ m/(?=$TOKENPATMK)/gco
2392 and do { ($code) = extract_codeblock($grammar,'{',undef,'<');
2393 $code;
2394 } )
2395 {
2396 _parse("a token constructor", $aftererror,$line,$code);
2397 $code =~ s/\A\s*<token:(.*)>\Z/$1/s;
2398
2399 my $types = eval 'no strict; local $SIG{__WARN__} = sub {0}; my @arr=('.$code.'); @arr' || ();
2400 if (!$types)
2401 {
2402 _error("Incorrect token specification: \"$@\"", $line);
2403 _hint("The <token:...> directive requires a list
2404 of one or more strings representing possible
2405 types of the specified token. For example:
2406 <token:NOUN,VERB>");
2407 }
2408 else
2409 {
2410 $item = new Parse::RecDescent::Directive(
2411 'no strict;
2412 $return = { text => $item[-1] };
2413 @{$return->{type}}{'.$code.'} = (1..'.$types.');',
2414 $lookahead,$line,"<token:$code>");
2415 $prod and $prod->additem($item)
2416 or _no_rule("<token:$code>",$line);
2417 }
2418 }
2419 elsif ($grammar =~ m/$COMMITMK/gco)
2420 {
2421 _parse("an commit marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2422 $item = new Parse::RecDescent::Directive('$commit = 1',
2423 $lookahead,$line,"<commit>");
2424 $prod and $prod->additem($item)
2425 or _no_rule("<commit>",$line);
2426 }
2427 elsif ($grammar =~ m/$NOCHECKMK/gco) {
2428 _parse("an hint request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2429 $::RD_CHECK = 0;
2430 }
2431 elsif ($grammar =~ m/$HINTMK/gco) {
2432 _parse("an hint request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2433 $::RD_HINT = $self->{__HINT__} = 1;
2434 }
2435 elsif ($grammar =~ m/$WARNMK/gco) {
2436 _parse("an warning request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2437 $::RD_WARN = $self->{__WARN__} = $1 ? $2+0 : 1;
2438 }
2439 elsif ($grammar =~ m/$TRACEBUILDMK/gco) {
2440 _parse("an grammar build trace request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2441 $::RD_TRACE = $1 ? $2+0 : 1;
2442 }
2443 elsif ($grammar =~ m/$TRACEPARSEMK/gco) {
2444 _parse("an parse trace request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2445 $self->{__TRACE__} = $1 ? $2+0 : 1;
2446 }
2447 elsif ($grammar =~ m/$AUTOERRORMK/gco)
2448 {
2449 $commitonly = $1;
2450 _parse("an error marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2451 $item = new Parse::RecDescent::Error('',$lookahead,$1,$line);
2452 $prod and $prod->additem($item)
2453 or _no_rule("<error>",$line);
2454 $aftererror = !$commitonly;
2455 }
2456 elsif ($grammar =~ m/(?=$MSGERRORMK)/gco
2457 and do { $commitonly = $1;
2458 ($code) = extract_bracketed($grammar,'<');
2459 $code })
2460 {
2461 _parse("an error marker", $aftererror,$line,$code);
2462 $code =~ /\A\s*<error\??:(.*)>\Z/s;
2463 $item = new Parse::RecDescent::Error($1,$lookahead,$commitonly,$line);
2464 $prod and $prod->additem($item)
2465 or _no_rule("$code",$line);
2466 $aftererror = !$commitonly;
2467 }
2468 elsif (do { $commitonly = $1;
2469 ($code) = extract_bracketed($grammar,'<');
2470 $code })
2471 {
2472 if ($code =~ /^<[A-Z_]+>$/)
2473 {
2474 _error("Token items are not yet
2475 supported: \"$code\"",
2476 $line);
2477 _hint("Items like $code that consist of angle
2478 brackets enclosing a sequence of
2479 uppercase characters will eventually
2480 be used to specify pre-lexed tokens
2481 in a grammar. That functionality is not
2482 yet implemented. Or did you misspell
2483 \"$code\"?");
2484 }
2485 else
2486 {
2487 _error("Untranslatable item encountered: \"$code\"",
2488 $line);
2489 _hint("Did you misspell \"$code\"
2490 or forget to comment it out?");
2491 }
2492 }
2493 }
2494 elsif ($grammar =~ m/$RULE/gco)
2495 {
2496 _parseunneg("a rule declaration", 0,
2497 $lookahead,$line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next;
2498 my $rulename = $1;
2499 if ($rulename =~ /Replace|Extend|Precompile|Save/ )
2500 {
2501 _warn(2,"Rule \"$rulename\" hidden by method
2502 Parse::RecDescent::$rulename",$line)
2503 and
2504 _hint("The rule named \"$rulename\" cannot be directly
2505 called through the Parse::RecDescent object
2506 for this grammar (although it may still
2507 be used as a subrule of other rules).
2508 It can't be directly called because
2509 Parse::RecDescent::$rulename is already defined (it
2510 is the standard method of all
2511 parsers).");
2512 }
2513 $rule = new Parse::RecDescent::Rule($rulename,$self,$line,$replace);
2514 $prod->check_pending($line) if $prod;
2515 $prod = $rule->addprod( new Parse::RecDescent::Production );
2516 $aftererror = 0;
2517 }
2518 elsif ($grammar =~ m/$UNCOMMITPROD/gco)
2519 {
2520 pos($grammar)-=9;
2521 _parseunneg("a new (uncommitted) production",
2522 0, $lookahead, $line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next;
2523
2524 $prod->check_pending($line) if $prod;
2525 $prod = new Parse::RecDescent::Production($line,1);
2526 $rule and $rule->addprod($prod)
2527 or _no_rule("<uncommit>",$line);
2528 $aftererror = 0;
2529 }
2530 elsif ($grammar =~ m/$ERRORPROD/gco)
2531 {
2532 pos($grammar)-=6;
2533 _parseunneg("a new (error) production", $aftererror,
2534 $lookahead,$line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next;
2535 $prod->check_pending($line) if $prod;
2536 $prod = new Parse::RecDescent::Production($line,0,1);
2537 $rule and $rule->addprod($prod)
2538 or _no_rule("<error>",$line);
2539 $aftererror = 0;
2540 }
2541 elsif ($grammar =~ m/$PROD/gco)
2542 {
2543 _parseunneg("a new production", 0,
2544 $lookahead,$line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next;
2545 $rule
2546 and (!$prod || $prod->check_pending($line))
2547 and $prod = $rule->addprod(new Parse::RecDescent::Production($line))
2548 or _no_rule("production",$line);
2549 $aftererror = 0;
2550 }
2551 elsif ($grammar =~ m/$LITERAL/gco)
2552 {
2553 my $literal = $1;
2554 ($code = $literal) =~ s/\\\\/\\/g;
2555 _parse("a literal terminal", $aftererror,$line,$literal);
2556 $item = new Parse::RecDescent::Literal($code,$lookahead,$line);
2557 $prod and $prod->additem($item)
2558 or _no_rule("literal terminal",$line,"'$literal'");
2559 }
2560 elsif ($grammar =~ m/$INTERPLIT/gco)
2561 {
2562 _parse("an interpolated literal terminal", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2563 $item = new Parse::RecDescent::InterpLit($1,$lookahead,$line);
2564 $prod and $prod->additem($item)
2565 or _no_rule("interpolated literal terminal",$line,"'$1'");
2566 }
2567 elsif ($grammar =~ m/$TOKEN/gco)
2568 {
2569 _parse("a /../ pattern terminal", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) );
2570 $item = new Parse::RecDescent::Token($1,'/',$3?$3:'',$lookahead,$line);
2571 $prod and $prod->additem($item)
2572 or _no_rule("pattern terminal",$line,"/$1/");
2573 }
2574 elsif ($grammar =~ m/(?=$MTOKEN)/gco
2575 and do { ($code, undef, @components)
2576 = extract_quotelike($grammar);
2577 $code }
2578 )
2579
2580 {
2581 _parse("an m/../ pattern terminal", $aftererror,$line,$code);
2582 $item = new Parse::RecDescent::Token(@components[3,2,8],
2583 $lookahead,$line);
2584 $prod and $prod->additem($item)
2585 or _no_rule("pattern terminal",$line,$code);
2586 }
2587 elsif ($grammar =~ m/(?=$MATCHRULE)/gco
2588 and do { ($code) = extract_bracketed($grammar,'<');
2589 $code
2590 }
2591 or $grammar =~ m/$SUBRULE/gco
2592 and $code = $1)
2593 {
2594 my $name = $code;
2595 my $matchrule = 0;
2596 if (substr($name,0,1) eq '<')
2597 {
2598 $name =~ s/$MATCHRULE\s*//;
2599 $name =~ s/\s*>\Z//;
2600 $matchrule = 1;
2601 }
2602
2603 # EXTRACT TRAILING ARG LIST (IF ANY)
2604
2605 my ($argcode) = extract_codeblock($grammar, "[]",'') || '';
2606
2607 # EXTRACT TRAILING REPETITION SPECIFIER (IF ANY)
2608
2609 if ($grammar =~ m/\G[(]/gc)
2610 {
2611 pos($grammar)--;
2612
2613 if ($grammar =~ m/$OPTIONAL/gco)
2614 {
2615 _parse("an zero-or-one subrule match", $aftererror,$line,"$code$argcode($1)");
2616 $item = new Parse::RecDescent::Repetition($name,$1,0,1,
2617 $lookahead,$line,
2618 $self,
2619 $matchrule,
2620 $argcode);
2621 $prod and $prod->additem($item)
2622 or _no_rule("repetition",$line,"$code$argcode($1)");
2623
2624 !$matchrule and $rule and $rule->addcall($name);
2625 }
2626 elsif ($grammar =~ m/$ANY/gco)
2627 {
2628 _parse("a zero-or-more subrule match", $aftererror,$line,"$code$argcode($1)");
2629 if ($2)
2630 {
2631 my $pos = pos $grammar;
2632 substr($grammar,$pos,0,
2633 "<leftop='$name(s?)': $name $2 $name>(s?) ");
2634
2635 pos $grammar = $pos;
2636 }
2637 else
2638 {
2639 $item = new Parse::RecDescent::Repetition($name,$1,0,$MAXREP,
2640 $lookahead,$line,
2641 $self,
2642 $matchrule,
2643 $argcode);
2644 $prod and $prod->additem($item)
2645 or _no_rule("repetition",$line,"$code$argcode($1)");
2646
2647 !$matchrule and $rule and $rule->addcall($name);
2648
2649 _check_insatiable($name,$1,$grammar,$line) if $::RD_CHECK;
2650 }
2651 }
2652 elsif ($grammar =~ m/$MANY/gco)
2653 {
2654 _parse("a one-or-more subrule match", $aftererror,$line,"$code$argcode($1)");
2655 if ($2)
2656 {
2657 # $DB::single=1;
2658 my $pos = pos $grammar;
2659 substr($grammar,$pos,0,
2660 "<leftop='$name(s)': $name $2 $name> ");
2661
2662 pos $grammar = $pos;
2663 }
2664 else
2665 {
2666 $item = new Parse::RecDescent::Repetition($name,$1,1,$MAXREP,
2667 $lookahead,$line,
2668 $self,
2669 $matchrule,
2670 $argcode);
2671
2672 $prod and $prod->additem($item)
2673 or _no_rule("repetition",$line,"$code$argcode($1)");
2674
2675 !$matchrule and $rule and $rule->addcall($name);
2676
2677 _check_insatiable($name,$1,$grammar,$line) if $::RD_CHECK;
2678 }
2679 }
2680 elsif ($grammar =~ m/$EXACTLY/gco)
2681 {
2682 _parse("an exactly-$1-times subrule match", $aftererror,$line,"$code$argcode($1)");
2683 if ($2)
2684 {
2685 my $pos = pos $grammar;
2686 substr($grammar,$pos,0,
2687 "<leftop='$name($1)': $name $2 $name>($1) ");
2688
2689 pos $grammar = $pos;
2690 }
2691 else
2692 {
2693 $item = new Parse::RecDescent::Repetition($name,$1,$1,$1,
2694 $lookahead,$line,
2695 $self,
2696 $matchrule,
2697 $argcode);
2698 $prod and $prod->additem($item)
2699 or _no_rule("repetition",$line,"$code$argcode($1)");
2700
2701 !$matchrule and $rule and $rule->addcall($name);
2702 }
2703 }
2704 elsif ($grammar =~ m/$BETWEEN/gco)
2705 {
2706 _parse("a $1-to-$2 subrule match", $aftererror,$line,"$code$argcode($1..$2)");
2707 if ($3)
2708 {
2709 my $pos = pos $grammar;
2710 substr($grammar,$pos,0,
2711 "<leftop='$name($1..$2)': $name $3 $name>($1..$2) ");
2712
2713 pos $grammar = $pos;
2714 }
2715 else
2716 {
2717 $item = new Parse::RecDescent::Repetition($name,"$1..$2",$1,$2,
2718 $lookahead,$line,
2719 $self,
2720 $matchrule,
2721 $argcode);
2722 $prod and $prod->additem($item)
2723 or _no_rule("repetition",$line,"$code$argcode($1..$2)");
2724
2725 !$matchrule and $rule and $rule->addcall($name);
2726 }
2727 }
2728 elsif ($grammar =~ m/$ATLEAST/gco)
2729 {
2730 _parse("a $1-or-more subrule match", $aftererror,$line,"$code$argcode($1..)");
2731 if ($2)
2732 {
2733 my $pos = pos $grammar;
2734 substr($grammar,$pos,0,
2735 "<leftop='$name($1..)': $name $2 $name>($1..) ");
2736
2737 pos $grammar = $pos;
2738 }
2739 else
2740 {
2741 $item = new Parse::RecDescent::Repetition($name,"$1..",$1,$MAXREP,
2742 $lookahead,$line,
2743 $self,
2744 $matchrule,
2745 $argcode);
2746 $prod and $prod->additem($item)
2747 or _no_rule("repetition",$line,"$code$argcode($1..)");
2748
2749 !$matchrule and $rule and $rule->addcall($name);
2750 _check_insatiable($name,"$1..",$grammar,$line) if $::RD_CHECK;
2751 }
2752 }
2753 elsif ($grammar =~ m/$ATMOST/gco)
2754 {
2755 _parse("a one-to-$1 subrule match", $aftererror,$line,"$code$argcode(..$1)");
2756 if ($2)
2757 {
2758 my $pos = pos $grammar;
2759 substr($grammar,$pos,0,
2760 "<leftop='$name(..$1)': $name $2 $name>(..$1) ");
2761
2762 pos $grammar = $pos;
2763 }
2764 else
2765 {
2766 $item = new Parse::RecDescent::Repetition($name,"..$1",1,$1,
2767 $lookahead,$line,
2768 $self,
2769 $matchrule,
2770 $argcode);
2771 $prod and $prod->additem($item)
2772 or _no_rule("repetition",$line,"$code$argcode(..$1)");
2773
2774 !$matchrule and $rule and $rule->addcall($name);
2775 }
2776 }
2777 elsif ($grammar =~ m/$BADREP/gco)
2778 {
2779 my $current_match = substr($grammar, $-[0], $+[0] - $-[0]);
2780 _parse("an subrule match with invalid repetition specifier", 0,$line, $current_match);
2781 _error("Incorrect specification of a repeated subrule",
2782 $line);
2783 _hint("Repeated subrules like \"$code$argcode$current_match\" cannot have
2784 a maximum repetition of zero, nor can they have
2785 negative components in their ranges.");
2786 }
2787 }
2788 else
2789 {
2790 _parse("a subrule match", $aftererror,$line,$code);
2791 my $desc;
2792 if ($name=~/\A_alternation_\d+_of_production_\d+_of_rule/)
2793 { $desc = $self->{"rules"}{$name}->expected }
2794 $item = new Parse::RecDescent::Subrule($name,
2795 $lookahead,
2796 $line,
2797 $desc,
2798 $matchrule,
2799 $argcode);
2800
2801 $prod and $prod->additem($item)
2802 or _no_rule("(sub)rule",$line,$name);
2803
2804 !$matchrule and $rule and $rule->addcall($name);
2805 }
2806 }
2807 elsif ($grammar =~ m/$LONECOLON/gco )
2808 {
2809 _error("Unexpected colon encountered", $line);
2810 _hint("Did you mean \"|\" (to start a new production)?
2811 Or perhaps you forgot that the colon
2812 in a rule definition must be
2813 on the same line as the rule name?");
2814 }
2815 elsif ($grammar =~ m/$ACTION/gco ) # BAD ACTION, ALREADY FAILED
2816 {
2817 _error("Malformed action encountered",
2818 $line);
2819 _hint("Did you forget the closing curly bracket
2820 or is there a syntax error in the action?");
2821 }
2822 elsif ($grammar =~ m/$OTHER/gco )
2823 {
2824 _error("Untranslatable item encountered: \"$1\"",
2825 $line);
2826 _hint("Did you misspell \"$1\"
2827 or forget to comment it out?");
2828 }
2829
2830 if ($lookaheadspec =~ tr /././ > 3)
2831 {
2832 $lookaheadspec =~ s/\A\s+//;
2833 $lookahead = $lookahead<0
2834 ? 'a negative lookahead ("...!")'
2835 : 'a positive lookahead ("...")' ;
2836 _warn(1,"Found two or more lookahead specifiers in a
2837 row.",$line)
2838 and
2839 _hint("Multiple positive and/or negative lookaheads
2840 are simply multiplied together to produce a
2841 single positive or negative lookahead
2842 specification. In this case the sequence
2843 \"$lookaheadspec\" was reduced to $lookahead.
2844 Was this your intention?");
2845 }
2846 $lookahead = 0;
2847 $lookaheadspec = "";
2848
2849 $grammar =~ m/\G\s+/gc;
2850 }
2851
2852 if ($must_pop_lines) {
2853 pop @lines;
2854 }
2855
2856 unless ($ERRORS or $isimplicit or !$::RD_CHECK)
2857 {
2858 $self->_check_grammar();
2859 }
2860
2861 unless ($ERRORS or $isimplicit or $Parse::RecDescent::compiling)
2862 {
2863 my $code = $self->_code();
2864 if (defined $::RD_TRACE)
2865 {
2866 my $mode = ($nextnamespace eq "namespace000002") ? '>' : '>>';
2867 print STDERR "printing code (", length($code),") to RD_TRACE\n";
2868 local *TRACE_FILE;
2869 open TRACE_FILE, $mode, "RD_TRACE"
2870 and print TRACE_FILE "my \$ERRORS;\n$code"
2871 and close TRACE_FILE;
2872 }
2873
2874 unless ( eval "$code 1" )
2875 {
2876 _error("Internal error in generated parser code!");
2877 $@ =~ s/at grammar/in grammar at/;
2878 _hint($@);
2879 }
2880 }
2881
2882 if ($ERRORS and !_verbosity("HINT"))
2883 {
2884 local $::RD_HINT = defined $::RD_HINT ? $::RD_HINT : 1;
2885 _hint('Set $::RD_HINT (or -RD_HINT if you\'re using "perl -s")
2886 for hints on fixing these problems. Use $::RD_HINT = 0
2887 to disable this message.');
2888 }
2889 if ($ERRORS) { $ERRORS=0; return }
2890 return $self;
2891}
2892
2893
2894sub _addstartcode($$)
2895{
2896 my ($self, $code) = @_;
2897 $code =~ s/\A\s*\{(.*)\}\Z/$1/s;
2898
2899 $self->{"startcode"} .= "$code;\n";
2900}
2901
2902# CHECK FOR GRAMMAR PROBLEMS....
2903
2904sub _check_insatiable($$$$)
2905{
2906 my ($subrule,$repspec,$grammar,$line) = @_;
2907 pos($grammar)=pos($_[2]);
2908 return if $grammar =~ m/$OPTIONAL/gco || $grammar =~ m/$ANY/gco;
2909 my $min = 1;
2910 if ( $grammar =~ m/$MANY/gco
2911 || $grammar =~ m/$EXACTLY/gco
2912 || $grammar =~ m/$ATMOST/gco
2913 || $grammar =~ m/$BETWEEN/gco && do { $min=$2; 1 }
2914 || $grammar =~ m/$ATLEAST/gco && do { $min=$2; 1 }
2915 || $grammar =~ m/$SUBRULE(?!\s*:)/gco
2916 )
2917 {
2918 return unless $1 eq $subrule && $min > 0;
2919 my $current_match = substr($grammar, $-[0], $+[0] - $-[0]);
2920 _warn(3,"Subrule sequence \"$subrule($repspec) $current_match\" will
2921 (almost certainly) fail.",$line)
2922 and
2923 _hint("Unless subrule \"$subrule\" performs some cunning
2924 lookahead, the repetition \"$subrule($repspec)\" will
2925 insatiably consume as many matches of \"$subrule\" as it
2926 can, leaving none to match the \"$current_match\" that follows.");
2927 }
2928}
2929
2930sub _check_grammar ($)
2931{
2932 my $self = shift;
2933 my $rules = $self->{"rules"};
2934 my $rule;
2935 foreach $rule ( values %$rules )
2936 {
2937 next if ! $rule->{"changed"};
2938
2939 # CHECK FOR UNDEFINED RULES
2940
2941 my $call;
2942 foreach $call ( @{$rule->{"calls"}} )
2943 {
2944 if (!defined ${$rules}{$call}
2945 &&!defined &{"Parse::RecDescent::$call"})
2946 {
2947 if (!defined $::RD_AUTOSTUB)
2948 {
2949 _warn(3,"Undefined (sub)rule \"$call\"
2950 used in a production.")
2951 and
2952 _hint("Will you be providing this rule
2953 later, or did you perhaps
2954 misspell \"$call\"? Otherwise
2955 it will be treated as an
2956 immediate <reject>.");
2957 eval "sub $self->{namespace}::$call {undef}";
2958 }
2959 else # EXPERIMENTAL
2960 {
2961 my $rule = qq{'$call'};
2962 if ($::RD_AUTOSTUB and $::RD_AUTOSTUB ne "1") {
2963 $rule = $::RD_AUTOSTUB;
2964 }
2965 _warn(1,"Autogenerating rule: $call")
2966 and
2967 _hint("A call was made to a subrule
2968 named \"$call\", but no such
2969 rule was specified. However,
2970 since \$::RD_AUTOSTUB
2971 was defined, a rule stub
2972 ($call : $rule) was
2973 automatically created.");
2974
2975 $self->_generate("$call: $rule",0,1);
2976 }
2977 }
2978 }
2979
2980 # CHECK FOR LEFT RECURSION
2981
2982 if ($rule->isleftrec($rules))
2983 {
2984 _error("Rule \"$rule->{name}\" is left-recursive.");
2985 _hint("Redesign the grammar so it's not left-recursive.
2986 That will probably mean you need to re-implement
2987 repetitions using the '(s)' notation.
2988 For example: \"$rule->{name}(s)\".");
2989 next;
2990 }
2991
2992 # CHECK FOR PRODUCTIONS FOLLOWING EMPTY PRODUCTIONS
2993 {
2994 my $hasempty;
2995 my $prod;
2996 foreach $prod ( @{$rule->{"prods"}} ) {
2997 if ($hasempty) {
2998 _error("Production " . $prod->describe . " for \"$rule->{name}\"
2999 will never be reached (preceding empty production will
3000 always match first).");
3001 _hint("Reorder the grammar so that the empty production
3002 is last in the list or productions.");
3003 last;
3004 }
3005 $hasempty ||= $prod->isempty();
3006 }
3007 }
3008 }
3009}
3010
3011# GENERATE ACTUAL PARSER CODE
3012
3013sub _code($)
3014{
3015 my $self = shift;
3016 my $initial_skip = defined($self->{skip}) ? $self->{skip} : $skip;
3017
3018 my $code = qq{
3019package $self->{namespace};
3020use strict;
3021use vars qw(\$skip \$AUTOLOAD $self->{localvars} );
3022\@$self->{namespace}\::ISA = ();
3023\$skip = '$initial_skip';
3024$self->{startcode}
3025
3026{
3027local \$SIG{__WARN__} = sub {0};
3028# PRETEND TO BE IN Parse::RecDescent NAMESPACE
3029*$self->{namespace}::AUTOLOAD = sub
3030{
3031 no strict 'refs';
3032 \$AUTOLOAD =~ s/^$self->{namespace}/Parse::RecDescent/;
3033 goto &{\$AUTOLOAD};
3034}
3035}
3036
3037};
3038 $code .= "push \@$self->{namespace}\::ISA, 'Parse::RecDescent';";
3039 $self->{"startcode"} = '';
3040
3041 my $rule;
3042 foreach $rule ( values %{$self->{"rules"}} )
3043 {
3044 if ($rule->{"changed"})
3045 {
3046 $code .= $rule->code($self->{"namespace"},$self);
3047 $rule->{"changed"} = 0;
3048 }
3049 }
3050
3051 return $code;
3052}
3053
3054
3055# EXECUTING A PARSE....
3056
3057sub AUTOLOAD # ($parser, $text; $linenum, @args)
3058{
3059 croak "Could not find method: $AUTOLOAD\n" unless ref $_[0];
3060 my $class = ref($_[0]) || $_[0];
3061 my $text = ref($_[1]) eq 'SCALAR' ? ${$_[1]} : "$_[1]";
3062 $_[0]->{lastlinenum} = $_[2]||_linecount($_[1]);
3063 $_[0]->{lastlinenum} = _linecount($_[1]);
3064 $_[0]->{lastlinenum} += ($_[2]||0) if @_ > 2;
3065 $_[0]->{offsetlinenum} = $_[0]->{lastlinenum};
3066 $_[0]->{fulltext} = $text;
3067 $_[0]->{fulltextlen} = length $text;
3068 $_[0]->{linecounter_cache} = {};
3069 $_[0]->{deferred} = [];
3070 $_[0]->{errors} = [];
3071 my @args = @_[3..$#_];
3072 my $args = sub { [ @args ] };
3073
3074 $AUTOLOAD =~ s/$class/$_[0]->{namespace}/;
3075 no strict "refs";
3076
3077 local $::RD_WARN = $::RD_WARN || $_[0]->{__WARN__};
3078 local $::RD_HINT = $::RD_HINT || $_[0]->{__HINT__};
3079 local $::RD_TRACE = $::RD_TRACE || $_[0]->{__TRACE__};
3080
3081 croak "Unknown starting rule ($AUTOLOAD) called\n"
3082 unless defined &$AUTOLOAD;
3083 my $retval = &{$AUTOLOAD}($_[0],$text,undef,undef,undef,$args);
3084
3085 if (defined $retval)
3086 {
3087 foreach ( @{$_[0]->{deferred}} ) { &$_; }
3088 }
3089 else
3090 {
3091 foreach ( @{$_[0]->{errors}} ) { _error(@$_); }
3092 }
3093
3094 if (ref $_[1] eq 'SCALAR') { ${$_[1]} = $text }
3095
3096 $ERRORS = 0;
3097 return $retval;
3098}
3099
3100sub _parserepeat($$$$$$$$$) # RETURNS A REF TO AN ARRAY OF MATCHES
3101{
3102 my ($parser, $text, $prod, $min, $max, $_noactions, $_itempos, $expectation, $argcode) = @_;
3103 my @tokens = ();
3104
3105 my $itemposfirst;
3106 my $reps;
3107 for ($reps=0; $reps<$max;)
3108 {
3109 $expectation->at($text);
3110 my $_savetext = $text;
3111 my $prevtextlen = length $text;
3112 my $_tok;
3113 if (! defined ($_tok = &$prod($parser,$text,1,$_noactions,$_itempos,$argcode)))
3114 {
3115 $text = $_savetext;
3116 last;
3117 }
3118
3119 if (defined($_itempos) and !defined($itemposfirst))
3120 {
3121 $itemposfirst = Parse::RecDescent::Production::_duplicate_itempos($_itempos);
3122 }
3123
3124 push @tokens, $_tok if defined $_tok;
3125 last if ++$reps >= $min and $prevtextlen == length $text;
3126 }
3127
3128 do { $expectation->failed(); return undef} if $reps<$min;
3129
3130 if (defined $itemposfirst)
3131 {
3132 Parse::RecDescent::Production::_update_itempos($_itempos, $itemposfirst, undef, [qw(from)]);
3133 }
3134
3135 $_[1] = $text;
3136 return [@tokens];
3137}
3138
3139sub set_autoflush {
3140 my $orig_selected = select $_[0];
3141 $| = 1;
3142 select $orig_selected;
3143 return;
3144}
3145
3146# ERROR REPORTING....
3147
3148sub _write_ERROR {
3149 my ($errorprefix, $errortext) = @_;
3150 return if $errortext !~ /\S/;
3151 $errorprefix =~ s/\s+\Z//;
3152 local $^A = q{};
3153
3154 formline(<<'END_FORMAT', $errorprefix, $errortext);
3155@>>>>>>>>>>>>>>>>>>>>: ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
3156END_FORMAT
3157 formline(<<'END_FORMAT', $errortext);
3158~~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
3159END_FORMAT
3160 print {*STDERR} $^A;
3161}
3162
3163# TRACING
3164
3165my $TRACE_FORMAT = <<'END_FORMAT';
3166@>|@|||||||||@^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<|
3167 | ~~ |^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<|
3168END_FORMAT
3169
3170my $TRACECONTEXT_FORMAT = <<'END_FORMAT';
3171@>|@|||||||||@ |^<<<<<<<<<<<<<<<<<<<<<<<<<<<
3172 | ~~ | |^<<<<<<<<<<<<<<<<<<<<<<<<<<<
3173END_FORMAT
3174
3175sub _write_TRACE {
3176 my ($tracelevel, $tracerulename, $tracemsg) = @_;
3177 return if $tracemsg !~ /\S/;
3178 $tracemsg =~ s/\s*\Z//;
3179 local $^A = q{};
3180 my $bar = '|';
3181 formline($TRACE_FORMAT, $tracelevel, $tracerulename, $bar, $tracemsg, $tracemsg);
3182 print {*STDERR} $^A;
3183}
3184
3185sub _write_TRACECONTEXT {
3186 my ($tracelevel, $tracerulename, $tracecontext) = @_;
3187 return if $tracecontext !~ /\S/;
3188 $tracecontext =~ s/\s*\Z//;
3189 local $^A = q{};
3190 my $bar = '|';
3191 formline($TRACECONTEXT_FORMAT, $tracelevel, $tracerulename, $bar, $tracecontext, $tracecontext);
3192 print {*STDERR} $^A;
3193}
3194
3195sub _verbosity($)
3196{
3197 defined $::RD_TRACE
3198 or defined $::RD_HINT and $::RD_HINT and $_[0] =~ /ERRORS|WARN|HINT/
3199 or defined $::RD_WARN and $::RD_WARN and $_[0] =~ /ERRORS|WARN/
3200 or defined $::RD_ERRORS and $::RD_ERRORS and $_[0] =~ /ERRORS/
3201}
3202
3203sub _error($;$)
3204{
3205 $ERRORS++;
3206 return 0 if ! _verbosity("ERRORS");
3207 my $errortext = $_[0];
3208 my $errorprefix = "ERROR" . ($_[1] ? " (line $_[1])" : "");
3209 $errortext =~ s/\s+/ /g;
3210 print {*STDERR} "\n" if _verbosity("WARN");
3211 _write_ERROR($errorprefix, $errortext);
3212 return 1;
3213}
3214
3215sub _warn($$;$)
3216{
3217 return 0 unless _verbosity("WARN") && ($::RD_HINT || $_[0] >= ($::RD_WARN||1));
3218 my $errortext = $_[1];
3219 my $errorprefix = "Warning" . ($_[2] ? " (line $_[2])" : "");
3220 print {*STDERR} "\n" if _verbosity("HINT");
3221 $errortext =~ s/\s+/ /g;
3222 _write_ERROR($errorprefix, $errortext);
3223 return 1;
3224}
3225
3226sub _hint($)
3227{
3228 return 0 unless $::RD_HINT;
3229 my $errortext = $_[0];
3230 my $errorprefix = "Hint" . ($_[1] ? " (line $_[1])" : "");
3231 $errortext =~ s/\s+/ /g;
3232 _write_ERROR($errorprefix, $errortext);
3233 return 1;
3234}
3235
3236sub _tracemax($)
3237{
3238 if (defined $::RD_TRACE
3239 && $::RD_TRACE =~ /\d+/
3240 && $::RD_TRACE>1
3241 && $::RD_TRACE+10<length($_[0]))
3242 {
3243 my $count = length($_[0]) - $::RD_TRACE;
3244 return substr($_[0],0,$::RD_TRACE/2)
3245 . "...<$count>..."
3246 . substr($_[0],-$::RD_TRACE/2);
3247 }
3248 else
3249 {
3250 return substr($_[0],0,500);
3251 }
3252}
3253
3254sub _tracefirst($)
3255{
3256 if (defined $::RD_TRACE
3257 && $::RD_TRACE =~ /\d+/
3258 && $::RD_TRACE>1
3259 && $::RD_TRACE+10<length($_[0]))
3260 {
3261 my $count = length($_[0]) - $::RD_TRACE;
3262 return substr($_[0],0,$::RD_TRACE) . "...<+$count>";
3263 }
3264 else
3265 {
3266 return substr($_[0],0,500);
3267 }
3268}
3269
3270my $lastcontext = '';
3271my $lastrulename = '';
3272my $lastlevel = '';
3273
3274sub _trace($;$$$)
3275{
3276 my $tracemsg = $_[0];
3277 my $tracecontext = $_[1]||$lastcontext;
3278 my $tracerulename = $_[2]||$lastrulename;
3279 my $tracelevel = $_[3]||$lastlevel;
3280 if ($tracerulename) { $lastrulename = $tracerulename }
3281 if ($tracelevel) { $lastlevel = $tracelevel }
3282
3283 $tracecontext =~ s/\n/\\n/g;
3284 $tracecontext =~ s/\s+/ /g;
3285 $tracerulename = qq{$tracerulename};
3286 _write_TRACE($tracelevel, $tracerulename, $tracemsg);
3287 if ($tracecontext ne $lastcontext)
3288 {
3289 if ($tracecontext)
3290 {
3291 $lastcontext = _tracefirst($tracecontext);
3292 $tracecontext = qq{"$tracecontext"};
3293 }
3294 else
3295 {
3296 $tracecontext = qq{<NO TEXT LEFT>};
3297 }
3298 _write_TRACECONTEXT($tracelevel, $tracerulename, $tracecontext);
3299 }
3300}
3301
3302sub _matchtracemessage
3303{
3304 my ($self, $reject) = @_;
3305
3306 my $prefix = '';
3307 my $postfix = '';
3308 my $matched = not $reject;
3309 my @t = ("Matched", "Didn't match");
3310 if (exists $self->{lookahead} and $self->{lookahead})
3311 {
3312 $postfix = $reject ? "(reject)" : "(keep)";
3313 $prefix = "...";
3314 if ($self->{lookahead} < 0)
3315 {
3316 $prefix .= '!';
3317 $matched = not $matched;
3318 }
3319 }
3320 $prefix . ($matched ? $t[0] : $t[1]) . $postfix;
3321}
3322
3323sub _parseunneg($$$$$)
3324{
3325 _parse($_[0],$_[1],$_[3],$_[4]);
3326 if ($_[2]<0)
3327 {
3328 _error("Can't negate \"$_[4]\".",$_[3]);
3329 _hint("You can't negate $_[0]. Remove the \"...!\" before
3330 \"$_[4]\".");
3331 return 0;
3332 }
3333 return 1;
3334}
3335
3336sub _parse($$$$)
3337{
3338 my $what = $_[3];
3339 $what =~ s/^\s+//;
3340 if ($_[1])
3341 {
3342 _warn(3,"Found $_[0] ($what) after an unconditional <error>",$_[2])
3343 and
3344 _hint("An unconditional <error> always causes the
3345 production containing it to immediately fail.
3346 \u$_[0] that follows an <error>
3347 will never be reached. Did you mean to use
3348 <error?> instead?");
3349 }
3350
3351 return if ! _verbosity("TRACE");
3352 my $errortext = "Treating \"$what\" as $_[0]";
3353 my $errorprefix = "Parse::RecDescent";
3354 $errortext =~ s/\s+/ /g;
3355 _write_ERROR($errorprefix, $errortext);
3356}
3357
3358sub _linecount($) {
3359 scalar substr($_[0], pos $_[0]||0) =~ tr/\n//
3360}
3361
3362
3363package main;
3364
3365use vars qw ( $RD_ERRORS $RD_WARN $RD_HINT $RD_TRACE $RD_CHECK );
3366$::RD_CHECK = 1;
3367$::RD_ERRORS = 1;
3368$::RD_WARN = 3;
3369
33701;
3371
3372__END__
3373
3374=head1 NAME
3375
3376Parse::RecDescent - Generate Recursive-Descent Parsers
3377
3378=head1 VERSION
3379
3380This document describes version 1.967006 of Parse::RecDescent
3381released January 29th, 2012.
3382
3383=head1 SYNOPSIS
3384
3385 use Parse::RecDescent;
3386
3387 # Generate a parser from the specification in $grammar:
3388
3389 $parser = new Parse::RecDescent ($grammar);
3390
3391 # Generate a parser from the specification in $othergrammar
3392
3393 $anotherparser = new Parse::RecDescent ($othergrammar);
3394
3395
3396 # Parse $text using rule 'startrule' (which must be
3397 # defined in $grammar):
3398
3399 $parser->startrule($text);
3400
3401
3402 # Parse $text using rule 'otherrule' (which must also
3403 # be defined in $grammar):
3404
3405 $parser->otherrule($text);
3406
3407
3408 # Change the universal token prefix pattern
3409 # before building a grammar
3410 # (the default is: '\s*'):
3411
3412 $Parse::RecDescent::skip = '[ \t]+';
3413
3414
3415 # Replace productions of existing rules (or create new ones)
3416 # with the productions defined in $newgrammar:
3417
3418 $parser->Replace($newgrammar);
3419
3420
3421 # Extend existing rules (or create new ones)
3422 # by adding extra productions defined in $moregrammar:
3423
3424 $parser->Extend($moregrammar);
3425
3426
3427 # Global flags (useful as command line arguments under -s):
3428
3429 $::RD_ERRORS # unless undefined, report fatal errors
3430 $::RD_WARN # unless undefined, also report non-fatal problems
3431 $::RD_HINT # if defined, also suggestion remedies
3432 $::RD_TRACE # if defined, also trace parsers' behaviour
3433 $::RD_AUTOSTUB # if defined, generates "stubs" for undefined rules
3434 $::RD_AUTOACTION # if defined, appends specified action to productions
3435
3436
3437=head1 DESCRIPTION
3438
3439=head2 Overview
3440
3441Parse::RecDescent incrementally generates top-down recursive-descent text
3442parsers from simple I<yacc>-like grammar specifications. It provides:
3443
3444=over 4
3445
3446=item *
3447
3448Regular expressions or literal strings as terminals (tokens),
3449
3450=item *
3451
3452Multiple (non-contiguous) productions for any rule,
3453
3454=item *
3455
3456Repeated and optional subrules within productions,
3457
3458=item *
3459
3460Full access to Perl within actions specified as part of the grammar,
3461
3462=item *
3463
3464Simple automated error reporting during parser generation and parsing,
3465
3466=item *
3467
3468The ability to commit to, uncommit to, or reject particular
3469productions during a parse,
3470
3471=item *
3472
3473The ability to pass data up and down the parse tree ("down" via subrule
3474argument lists, "up" via subrule return values)
3475
3476=item *
3477
3478Incremental extension of the parsing grammar (even during a parse),
3479
3480=item *
3481
3482Precompilation of parser objects,
3483
3484=item *
3485
3486User-definable reduce-reduce conflict resolution via
3487"scoring" of matching productions.
3488
3489=back
3490
3491=head2 Using C<Parse::RecDescent>
3492
3493Parser objects are created by calling C<Parse::RecDescent::new>, passing in a
3494grammar specification (see the following subsections). If the grammar is
3495correct, C<new> returns a blessed reference which can then be used to initiate
3496parsing through any rule specified in the original grammar. A typical sequence
3497looks like this:
3498
3499 $grammar = q {
3500 # GRAMMAR SPECIFICATION HERE
3501 };
3502
3503 $parser = new Parse::RecDescent ($grammar) or die "Bad grammar!\n";
3504
3505 # acquire $text
3506
3507 defined $parser->startrule($text) or print "Bad text!\n";
3508
3509The rule through which parsing is initiated must be explicitly defined
3510in the grammar (i.e. for the above example, the grammar must include a
3511rule of the form: "startrule: <subrules>".
3512
3513If the starting rule succeeds, its value (see below)
3514is returned. Failure to generate the original parser or failure to match a text
3515is indicated by returning C<undef>. Note that it's easy to set up grammars
3516that can succeed, but which return a value of 0, "0", or "". So don't be
3517tempted to write:
3518
3519 $parser->startrule($text) or print "Bad text!\n";
3520
3521Normally, the parser has no effect on the original text. So in the
3522previous example the value of $text would be unchanged after having
3523been parsed.
3524
3525If, however, the text to be matched is passed by reference:
3526
3527 $parser->startrule(\$text)
3528
3529then any text which was consumed during the match will be removed from the
3530start of $text.
3531
3532
3533=head2 Rules
3534
3535In the grammar from which the parser is built, rules are specified by
3536giving an identifier (which must satisfy /[A-Za-z]\w*/), followed by a
3537colon I<on the same line>, followed by one or more productions,
3538separated by single vertical bars. The layout of the productions
3539is entirely free-format:
3540
3541 rule1: production1
3542 | production2 |
3543 production3 | production4
3544
3545At any point in the grammar previously defined rules may be extended with
3546additional productions. This is achieved by redeclaring the rule with the new
3547productions. Thus:
3548
3549 rule1: a | b | c
3550 rule2: d | e | f
3551 rule1: g | h
3552
3553is exactly equivalent to:
3554
3555 rule1: a | b | c | g | h
3556 rule2: d | e | f
3557
3558Each production in a rule consists of zero or more items, each of which
3559may be either: the name of another rule to be matched (a "subrule"),
3560a pattern or string literal to be matched directly (a "token"), a
3561block of Perl code to be executed (an "action"), a special instruction
3562to the parser (a "directive"), or a standard Perl comment (which is
3563ignored).
3564
3565A rule matches a text if one of its productions matches. A production
3566matches if each of its items match consecutive substrings of the
3567text. The productions of a rule being matched are tried in the same
3568order that they appear in the original grammar, and the first matching
3569production terminates the match attempt (successfully). If all
3570productions are tried and none matches, the match attempt fails.
3571
3572Note that this behaviour is quite different from the "prefer the longer match"
3573behaviour of I<yacc>. For example, if I<yacc> were parsing the rule:
3574
3575 seq : 'A' 'B'
3576 | 'A' 'B' 'C'
3577
3578upon matching "AB" it would look ahead to see if a 'C' is next and, if
3579so, will match the second production in preference to the first. In
3580other words, I<yacc> effectively tries all the productions of a rule
3581breadth-first in parallel, and selects the "best" match, where "best"
3582means longest (note that this is a gross simplification of the true
3583behaviour of I<yacc> but it will do for our purposes).
3584
3585In contrast, C<Parse::RecDescent> tries each production depth-first in
3586sequence, and selects the "best" match, where "best" means first. This is
3587the fundamental difference between "bottom-up" and "recursive descent"
3588parsing.
3589
3590Each successfully matched item in a production is assigned a value,
3591which can be accessed in subsequent actions within the same
3592production (or, in some cases, as the return value of a successful
3593subrule call). Unsuccessful items don't have an associated value,
3594since the failure of an item causes the entire surrounding production
3595to immediately fail. The following sections describe the various types
3596of items and their success values.
3597
3598
3599=head2 Subrules
3600
3601A subrule which appears in a production is an instruction to the parser to
3602attempt to match the named rule at that point in the text being
3603parsed. If the named subrule is not defined when requested the
3604production containing it immediately fails (unless it was "autostubbed" - see
3605L<Autostubbing>).
3606
3607A rule may (recursively) call itself as a subrule, but I<not> as the
3608left-most item in any of its productions (since such recursions are usually
3609non-terminating).
3610
3611The value associated with a subrule is the value associated with its
3612C<$return> variable (see L<"Actions"> below), or with the last successfully
3613matched item in the subrule match.
3614
3615Subrules may also be specified with a trailing repetition specifier,
3616indicating that they are to be (greedily) matched the specified number
3617of times. The available specifiers are:
3618
3619 subrule(?) # Match one-or-zero times
3620 subrule(s) # Match one-or-more times
3621 subrule(s?) # Match zero-or-more times
3622 subrule(N) # Match exactly N times for integer N > 0
3623 subrule(N..M) # Match between N and M times
3624 subrule(..M) # Match between 1 and M times
3625 subrule(N..) # Match at least N times
3626
3627Repeated subrules keep matching until either the subrule fails to
3628match, or it has matched the minimal number of times but fails to
3629consume any of the parsed text (this second condition prevents the
3630subrule matching forever in some cases).
3631
3632Since a repeated subrule may match many instances of the subrule itself, the
3633value associated with it is not a simple scalar, but rather a reference to a
3634list of scalars, each of which is the value associated with one of the
3635individual subrule matches. In other words in the rule:
3636
3637 program: statement(s)
3638
3639the value associated with the repeated subrule "statement(s)" is a reference
3640to an array containing the values matched by each call to the individual
3641subrule "statement".
3642
3643Repetition modifiers may include a separator pattern:
3644
3645 program: statement(s /;/)
3646
3647specifying some sequence of characters to be skipped between each repetition.
3648This is really just a shorthand for the E<lt>leftop:...E<gt> directive
3649(see below).
3650
3651=head2 Tokens
3652
3653If a quote-delimited string or a Perl regex appears in a production,
3654the parser attempts to match that string or pattern at that point in
3655the text. For example:
3656
3657 typedef: "typedef" typename identifier ';'
3658
3659 identifier: /[A-Za-z_][A-Za-z0-9_]*/
3660
3661As in regular Perl, a single quoted string is uninterpolated, whilst
3662a double-quoted string or a pattern is interpolated (at the time
3663of matching, I<not> when the parser is constructed). Hence, it is
3664possible to define rules in which tokens can be set at run-time:
3665
3666 typedef: "$::typedefkeyword" typename identifier ';'
3667
3668 identifier: /$::identpat/
3669
3670Note that, since each rule is implemented inside a special namespace
3671belonging to its parser, it is necessary to explicitly quantify
3672variables from the main package.
3673
3674Regex tokens can be specified using just slashes as delimiters
3675or with the explicit C<mE<lt>delimiterE<gt>......E<lt>delimiterE<gt>> syntax:
3676
3677 typedef: "typedef" typename identifier ';'
3678
3679 typename: /[A-Za-z_][A-Za-z0-9_]*/
3680
3681 identifier: m{[A-Za-z_][A-Za-z0-9_]*}
3682
3683A regex of either type can also have any valid trailing parameter(s)
3684(that is, any of [cgimsox]):
3685
3686 typedef: "typedef" typename identifier ';'
3687
3688 identifier: / [a-z_] # LEADING ALPHA OR UNDERSCORE
3689 [a-z0-9_]* # THEN DIGITS ALSO ALLOWED
3690 /ix # CASE/SPACE/COMMENT INSENSITIVE
3691
3692The value associated with any successfully matched token is a string
3693containing the actual text which was matched by the token.
3694
3695It is important to remember that, since each grammar is specified in a
3696Perl string, all instances of the universal escape character '\' within
3697a grammar must be "doubled", so that they interpolate to single '\'s when
3698the string is compiled. For example, to use the grammar:
3699
3700 word: /\S+/ | backslash
3701 line: prefix word(s) "\n"
3702 backslash: '\\'
3703
3704the following code is required:
3705
3706 $parser = new Parse::RecDescent (q{
3707
3708 word: /\\S+/ | backslash
3709 line: prefix word(s) "\\n"
3710 backslash: '\\\\'
3711
3712 });
3713
3714=head2 Anonymous subrules
3715
3716Parentheses introduce a nested scope that is very like a call to an anonymous
3717subrule. Hence they are useful for "in-lining" subroutine calls, and other
3718kinds of grouping behaviour. For example, instead of:
3719
3720 word: /\S+/ | backslash
3721 line: prefix word(s) "\n"
3722
3723you could write:
3724
3725 line: prefix ( /\S+/ | backslash )(s) "\n"
3726
3727and get exactly the same effects.
3728
3729Parentheses are also use for collecting unrepeated alternations within a
3730single production.
3731
3732 secret_identity: "Mr" ("Incredible"|"Fantastic"|"Sheen") ", Esq."
3733
3734
3735=head2 Terminal Separators
3736
3737For the purpose of matching, each terminal in a production is considered
3738to be preceded by a "prefix" - a pattern which must be
3739matched before a token match is attempted. By default, the
3740prefix is optional whitespace (which always matches, at
3741least trivially), but this default may be reset in any production.
3742
3743The variable C<$Parse::RecDescent::skip> stores the universal
3744prefix, which is the default for all terminal matches in all parsers
3745built with C<Parse::RecDescent>.
3746
3747If you want to change the universal prefix using
3748C<$Parse::RecDescent::skip>, be careful to set it I<before> creating
3749the grammar object, because it is applied statically (when a grammar
3750is built) rather than dynamically (when the grammar is used).
3751Alternatively you can provide a global C<E<lt>skip:...E<gt>> directive
3752in your grammar before any rules (described later).
3753
3754The prefix for an individual production can be altered
3755by using the C<E<lt>skip:...E<gt>> directive (described later).
3756Setting this directive in the top-level rule is an alternative approach
3757to setting C<$Parse::RecDescent::skip> before creating the object, but
3758in this case you don't get the intended skipping behaviour if you
3759directly invoke methods different from the top-level rule.
3760
3761
3762=head2 Actions
3763
3764An action is a block of Perl code which is to be executed (as the
3765block of a C<do> statement) when the parser reaches that point in a
3766production. The action executes within a special namespace belonging to
3767the active parser, so care must be taken in correctly qualifying variable
3768names (see also L<Start-up Actions> below).
3769
3770The action is considered to succeed if the final value of the block
3771is defined (that is, if the implied C<do> statement evaluates to a
3772defined value - I<even one which would be treated as "false">). Note
3773that the value associated with a successful action is also the final
3774value in the block.
3775
3776An action will I<fail> if its last evaluated value is C<undef>. This is
3777surprisingly easy to accomplish by accident. For instance, here's an
3778infuriating case of an action that makes its production fail, but only
3779when debugging I<isn't> activated:
3780
3781 description: name rank serial_number
3782 { print "Got $item[2] $item[1] ($item[3])\n"
3783 if $::debugging
3784 }
3785
3786If C<$debugging> is false, no statement in the block is executed, so
3787the final value is C<undef>, and the entire production fails. The solution is:
3788
3789 description: name rank serial_number
3790 { print "Got $item[2] $item[1] ($item[3])\n"
3791 if $::debugging;
3792 1;
3793 }
3794
3795Within an action, a number of useful parse-time variables are
3796available in the special parser namespace (there are other variables
3797also accessible, but meddling with them will probably just break your
3798parser. As a general rule, if you avoid referring to unqualified
3799variables - especially those starting with an underscore - inside an action,
3800things should be okay):
3801
3802=over 4
3803
3804=item C<@item> and C<%item>
3805
3806The array slice C<@item[1..$#item]> stores the value associated with each item
3807(that is, each subrule, token, or action) in the current production. The
3808analogy is to C<$1>, C<$2>, etc. in a I<yacc> grammar.
3809Note that, for obvious reasons, C<@item> only contains the
3810values of items I<before> the current point in the production.
3811
3812The first element (C<$item[0]>) stores the name of the current rule
3813being matched.
3814
3815C<@item> is a standard Perl array, so it can also be indexed with negative
3816numbers, representing the number of items I<back> from the current position in
3817the parse:
3818
3819 stuff: /various/ bits 'and' pieces "then" data 'end'
3820 { print $item[-2] } # PRINTS data
3821 # (EASIER THAN: $item[6])
3822
3823The C<%item> hash complements the <@item> array, providing named
3824access to the same item values:
3825
3826 stuff: /various/ bits 'and' pieces "then" data 'end'
3827 { print $item{data} # PRINTS data
3828 # (EVEN EASIER THAN USING @item)
3829
3830
3831The results of named subrules are stored in the hash under each
3832subrule's name (including the repetition specifier, if any),
3833whilst all other items are stored under a "named
3834positional" key that indictates their ordinal position within their item
3835type: __STRINGI<n>__, __PATTERNI<n>__, __DIRECTIVEI<n>__, __ACTIONI<n>__:
3836
3837 stuff: /various/ bits 'and' pieces "then" data 'end' { save }
3838 { print $item{__PATTERN1__}, # PRINTS 'various'
3839 $item{__STRING2__}, # PRINTS 'then'
3840 $item{__ACTION1__}, # PRINTS RETURN
3841 # VALUE OF save
3842 }
3843
3844
3845If you want proper I<named> access to patterns or literals, you need to turn
3846them into separate rules:
3847
3848 stuff: various bits 'and' pieces "then" data 'end'
3849 { print $item{various} # PRINTS various
3850 }
3851
3852 various: /various/
3853
3854
3855The special entry C<$item{__RULE__}> stores the name of the current
3856rule (i.e. the same value as C<$item[0]>.
3857
3858The advantage of using C<%item>, instead of C<@items> is that it
3859removes the need to track items positions that may change as a grammar
3860evolves. For example, adding an interim C<E<lt>skipE<gt>> directive
3861of action can silently ruin a trailing action, by moving an C<@item>
3862element "down" the array one place. In contrast, the named entry
3863of C<%item> is unaffected by such an insertion.
3864
3865A limitation of the C<%item> hash is that it only records the I<last>
3866value of a particular subrule. For example:
3867
3868 range: '(' number '..' number )'
3869 { $return = $item{number} }
3870
3871will return only the value corresponding to the I<second> match of the
3872C<number> subrule. In other words, successive calls to a subrule
3873overwrite the corresponding entry in C<%item>. Once again, the
3874solution is to rename each subrule in its own rule:
3875
3876 range: '(' from_num '..' to_num ')'
3877 { $return = $item{from_num} }
3878
3879 from_num: number
3880 to_num: number
3881
3882
3883
3884=item C<@arg> and C<%arg>
3885
3886The array C<@arg> and the hash C<%arg> store any arguments passed to
3887the rule from some other rule (see L<Subrule argument lists>). Changes
3888to the elements of either variable do not propagate back to the calling
3889rule (data can be passed back from a subrule via the C<$return>
3890variable - see next item).
3891
3892
3893=item C<$return>
3894
3895If a value is assigned to C<$return> within an action, that value is
3896returned if the production containing the action eventually matches
3897successfully. Note that setting C<$return> I<doesn't> cause the current
3898production to succeed. It merely tells it what to return if it I<does> succeed.
3899Hence C<$return> is analogous to C<$$> in a I<yacc> grammar.
3900
3901If C<$return> is not assigned within a production, the value of the
3902last component of the production (namely: C<$item[$#item]>) is
3903returned if the production succeeds.
3904
3905
3906=item C<$commit>
3907
3908The current state of commitment to the current production (see L<"Directives">
3909below).
3910
3911=item C<$skip>
3912
3913The current terminal prefix (see L<"Directives"> below).
3914
3915=item C<$text>
3916
3917The remaining (unparsed) text. Changes to C<$text> I<do not
3918propagate> out of unsuccessful productions, but I<do> survive
3919successful productions. Hence it is possible to dynamically alter the
3920text being parsed - for example, to provide a C<#include>-like facility:
3921
3922 hash_include: '#include' filename
3923 { $text = ::loadfile($item[2]) . $text }
3924
3925 filename: '<' /[a-z0-9._-]+/i '>' { $return = $item[2] }
3926 | '"' /[a-z0-9._-]+/i '"' { $return = $item[2] }
3927
3928
3929=item C<$thisline> and C<$prevline>
3930
3931C<$thisline> stores the current line number within the current parse
3932(starting from 1). C<$prevline> stores the line number for the last
3933character which was already successfully parsed (this will be different from
3934C<$thisline> at the end of each line).
3935
3936For efficiency, C<$thisline> and C<$prevline> are actually tied
3937hashes, and only recompute the required line number when the variable's
3938value is used.
3939
3940Assignment to C<$thisline> adjusts the line number calculator, so that
3941it believes that the current line number is the value being assigned. Note
3942that this adjustment will be reflected in all subsequent line numbers
3943calculations.
3944
3945Modifying the value of the variable C<$text> (as in the previous
3946C<hash_include> example, for instance) will confuse the line
3947counting mechanism. To prevent this, you should call
3948C<Parse::RecDescent::LineCounter::resync($thisline)> I<immediately>
3949after any assignment to the variable C<$text> (or, at least, before the
3950next attempt to use C<$thisline>).
3951
3952Note that if a production fails after assigning to or
3953resync'ing C<$thisline>, the parser's line counter mechanism will
3954usually be corrupted.
3955
3956Also see the entry for C<@itempos>.
3957
3958The line number can be set to values other than 1, by calling the start
3959rule with a second argument. For example:
3960
3961 $parser = new Parse::RecDescent ($grammar);
3962
3963 $parser->input($text, 10); # START LINE NUMBERS AT 10
3964
3965
3966=item C<$thiscolumn> and C<$prevcolumn>
3967
3968C<$thiscolumn> stores the current column number within the current line
3969being parsed (starting from 1). C<$prevcolumn> stores the column number
3970of the last character which was actually successfully parsed. Usually
3971C<$prevcolumn == $thiscolumn-1>, but not at the end of lines.
3972
3973For efficiency, C<$thiscolumn> and C<$prevcolumn> are
3974actually tied hashes, and only recompute the required column number
3975when the variable's value is used.
3976
3977Assignment to C<$thiscolumn> or C<$prevcolumn> is a fatal error.
3978
3979Modifying the value of the variable C<$text> (as in the previous
3980C<hash_include> example, for instance) may confuse the column
3981counting mechanism.
3982
3983Note that C<$thiscolumn> reports the column number I<before> any
3984whitespace that might be skipped before reading a token. Hence
3985if you wish to know where a token started (and ended) use something like this:
3986
3987 rule: token1 token2 startcol token3 endcol token4
3988 { print "token3: columns $item[3] to $item[5]"; }
3989
3990 startcol: '' { $thiscolumn } # NEED THE '' TO STEP PAST TOKEN SEP
3991 endcol: { $prevcolumn }
3992
3993Also see the entry for C<@itempos>.
3994
3995=item C<$thisoffset> and C<$prevoffset>
3996
3997C<$thisoffset> stores the offset of the current parsing position
3998within the complete text
3999being parsed (starting from 0). C<$prevoffset> stores the offset
4000of the last character which was actually successfully parsed. In all
4001cases C<$prevoffset == $thisoffset-1>.
4002
4003For efficiency, C<$thisoffset> and C<$prevoffset> are
4004actually tied hashes, and only recompute the required offset
4005when the variable's value is used.
4006
4007Assignment to C<$thisoffset> or <$prevoffset> is a fatal error.
4008
4009Modifying the value of the variable C<$text> will I<not> affect the
4010offset counting mechanism.
4011
4012Also see the entry for C<@itempos>.
4013
4014=item C<@itempos>
4015
4016The array C<@itempos> stores a hash reference corresponding to
4017each element of C<@item>. The elements of the hash provide the
4018following:
4019
4020 $itempos[$n]{offset}{from} # VALUE OF $thisoffset BEFORE $item[$n]
4021 $itempos[$n]{offset}{to} # VALUE OF $prevoffset AFTER $item[$n]
4022 $itempos[$n]{line}{from} # VALUE OF $thisline BEFORE $item[$n]
4023 $itempos[$n]{line}{to} # VALUE OF $prevline AFTER $item[$n]
4024 $itempos[$n]{column}{from} # VALUE OF $thiscolumn BEFORE $item[$n]
4025 $itempos[$n]{column}{to} # VALUE OF $prevcolumn AFTER $item[$n]
4026
4027Note that the various C<$itempos[$n]...{from}> values record the
4028appropriate value I<after> any token prefix has been skipped.
4029
4030Hence, instead of the somewhat tedious and error-prone:
4031
4032 rule: startcol token1 endcol
4033 startcol token2 endcol
4034 startcol token3 endcol
4035 { print "token1: columns $item[1]
4036 to $item[3]
4037 token2: columns $item[4]
4038 to $item[6]
4039 token3: columns $item[7]
4040 to $item[9]" }
4041
4042 startcol: '' { $thiscolumn } # NEED THE '' TO STEP PAST TOKEN SEP
4043 endcol: { $prevcolumn }
4044
4045it is possible to write:
4046
4047 rule: token1 token2 token3
4048 { print "token1: columns $itempos[1]{column}{from}
4049 to $itempos[1]{column}{to}
4050 token2: columns $itempos[2]{column}{from}
4051 to $itempos[2]{column}{to}
4052 token3: columns $itempos[3]{column}{from}
4053 to $itempos[3]{column}{to}" }
4054
4055Note however that (in the current implementation) the use of C<@itempos>
4056anywhere in a grammar implies that item positioning information is
4057collected I<everywhere> during the parse. Depending on the grammar
4058and the size of the text to be parsed, this may be prohibitively
4059expensive and the explicit use of C<$thisline>, C<$thiscolumn>, etc. may
4060be a better choice.
4061
4062
4063=item C<$thisparser>
4064
4065A reference to the S<C<Parse::RecDescent>> object through which
4066parsing was initiated.
4067
4068The value of C<$thisparser> propagates down the subrules of a parse
4069but not back up. Hence, you can invoke subrules from another parser
4070for the scope of the current rule as follows:
4071
4072 rule: subrule1 subrule2
4073 | { $thisparser = $::otherparser } <reject>
4074 | subrule3 subrule4
4075 | subrule5
4076
4077The result is that the production calls "subrule1" and "subrule2" of
4078the current parser, and the remaining productions call the named subrules
4079from C<$::otherparser>. Note, however that "Bad Things" will happen if
4080C<::otherparser> isn't a blessed reference and/or doesn't have methods
4081with the same names as the required subrules!
4082
4083=item C<$thisrule>
4084
4085A reference to the S<C<Parse::RecDescent::Rule>> object corresponding to the
4086rule currently being matched.
4087
4088=item C<$thisprod>
4089
4090A reference to the S<C<Parse::RecDescent::Production>> object
4091corresponding to the production currently being matched.
4092
4093=item C<$score> and C<$score_return>
4094
4095$score stores the best production score to date, as specified by
4096an earlier C<E<lt>score:...E<gt>> directive. $score_return stores
4097the corresponding return value for the successful production.
4098
4099See L<Scored productions>.
4100
4101=back
4102
4103B<Warning:> the parser relies on the information in the various C<this...>
4104objects in some non-obvious ways. Tinkering with the other members of
4105these objects will probably cause Bad Things to happen, unless you
4106I<really> know what you're doing. The only exception to this advice is
4107that the use of C<$this...-E<gt>{local}> is always safe.
4108
4109
4110=head2 Start-up Actions
4111
4112Any actions which appear I<before> the first rule definition in a
4113grammar are treated as "start-up" actions. Each such action is
4114stripped of its outermost brackets and then evaluated (in the parser's
4115special namespace) just before the rules of the grammar are first
4116compiled.
4117
4118The main use of start-up actions is to declare local variables within the
4119parser's special namespace:
4120
4121 { my $lastitem = '???'; }
4122
4123 list: item(s) { $return = $lastitem }
4124
4125 item: book { $lastitem = 'book'; }
4126 bell { $lastitem = 'bell'; }
4127 candle { $lastitem = 'candle'; }
4128
4129but start-up actions can be used to execute I<any> valid Perl code
4130within a parser's special namespace.
4131
4132Start-up actions can appear within a grammar extension or replacement
4133(that is, a partial grammar installed via C<Parse::RecDescent::Extend()> or
4134C<Parse::RecDescent::Replace()> - see L<Incremental Parsing>), and will be
4135executed before the new grammar is installed. Note, however, that a
4136particular start-up action is only ever executed once.
4137
4138
4139=head2 Autoactions
4140
4141It is sometimes desirable to be able to specify a default action to be
4142taken at the end of every production (for example, in order to easily
4143build a parse tree). If the variable C<$::RD_AUTOACTION> is defined
4144when C<Parse::RecDescent::new()> is called, the contents of that
4145variable are treated as a specification of an action which is to appended
4146to each production in the corresponding grammar.
4147
4148Alternatively, you can hard-code the autoaction within a grammar, using the
4149C<< <autoaction:...> >> directive.
4150
4151So, for example, to construct a simple parse tree you could write:
4152
4153 $::RD_AUTOACTION = q { [@item] };
4154
4155 parser = Parse::RecDescent->new(q{
4156 expression: and_expr '||' expression | and_expr
4157 and_expr: not_expr '&&' and_expr | not_expr
4158 not_expr: '!' brack_expr | brack_expr
4159 brack_expr: '(' expression ')' | identifier
4160 identifier: /[a-z]+/i
4161 });
4162
4163or:
4164
4165 parser = Parse::RecDescent->new(q{
4166 <autoaction: { [@item] } >
4167
4168 expression: and_expr '||' expression | and_expr
4169 and_expr: not_expr '&&' and_expr | not_expr
4170 not_expr: '!' brack_expr | brack_expr
4171 brack_expr: '(' expression ')' | identifier
4172 identifier: /[a-z]+/i
4173 });
4174
4175Either of these is equivalent to:
4176
4177 parser = new Parse::RecDescent (q{
4178 expression: and_expr '||' expression
4179 { [@item] }
4180 | and_expr
4181 { [@item] }
4182
4183 and_expr: not_expr '&&' and_expr
4184 { [@item] }
4185 | not_expr
4186 { [@item] }
4187
4188 not_expr: '!' brack_expr
4189 { [@item] }
4190 | brack_expr
4191 { [@item] }
4192
4193 brack_expr: '(' expression ')'
4194 { [@item] }
4195 | identifier
4196 { [@item] }
4197
4198 identifier: /[a-z]+/i
4199 { [@item] }
4200 });
4201
4202Alternatively, we could take an object-oriented approach, use different
4203classes for each node (and also eliminating redundant intermediate nodes):
4204
4205 $::RD_AUTOACTION = q
4206 { $#item==1 ? $item[1] : "$item[0]_node"->new(@item[1..$#item]) };
4207
4208 parser = Parse::RecDescent->new(q{
4209 expression: and_expr '||' expression | and_expr
4210 and_expr: not_expr '&&' and_expr | not_expr
4211 not_expr: '!' brack_expr | brack_expr
4212 brack_expr: '(' expression ')' | identifier
4213 identifier: /[a-z]+/i
4214 });
4215
4216or:
4217
4218 parser = Parse::RecDescent->new(q{
4219 <autoaction:
4220 $#item==1 ? $item[1] : "$item[0]_node"->new(@item[1..$#item])
4221 >
4222
4223 expression: and_expr '||' expression | and_expr
4224 and_expr: not_expr '&&' and_expr | not_expr
4225 not_expr: '!' brack_expr | brack_expr
4226 brack_expr: '(' expression ')' | identifier
4227 identifier: /[a-z]+/i
4228 });
4229
4230which are equivalent to:
4231
4232 parser = Parse::RecDescent->new(q{
4233 expression: and_expr '||' expression
4234 { "expression_node"->new(@item[1..3]) }
4235 | and_expr
4236
4237 and_expr: not_expr '&&' and_expr
4238 { "and_expr_node"->new(@item[1..3]) }
4239 | not_expr
4240
4241 not_expr: '!' brack_expr
4242 { "not_expr_node"->new(@item[1..2]) }
4243 | brack_expr
4244
4245 brack_expr: '(' expression ')'
4246 { "brack_expr_node"->new(@item[1..3]) }
4247 | identifier
4248
4249 identifier: /[a-z]+/i
4250 { "identifer_node"->new(@item[1]) }
4251 });
4252
4253Note that, if a production already ends in an action, no autoaction is appended
4254to it. For example, in this version:
4255
4256 $::RD_AUTOACTION = q
4257 { $#item==1 ? $item[1] : "$item[0]_node"->new(@item[1..$#item]) };
4258
4259 parser = Parse::RecDescent->new(q{
4260 expression: and_expr '&&' expression | and_expr
4261 and_expr: not_expr '&&' and_expr | not_expr
4262 not_expr: '!' brack_expr | brack_expr
4263 brack_expr: '(' expression ')' | identifier
4264 identifier: /[a-z]+/i
4265 { 'terminal_node'->new($item[1]) }
4266 });
4267
4268each C<identifier> match produces a C<terminal_node> object, I<not> an
4269C<identifier_node> object.
4270
4271A level 1 warning is issued each time an "autoaction" is added to
4272some production.
4273
4274
4275=head2 Autotrees
4276
4277A commonly needed autoaction is one that builds a parse-tree. It is moderately
4278tricky to set up such an action (which must treat terminals differently from
4279non-terminals), so Parse::RecDescent simplifies the process by providing the
4280C<E<lt>autotreeE<gt>> directive.
4281
4282If this directive appears at the start of grammar, it causes
4283Parse::RecDescent to insert autoactions at the end of any rule except
4284those which already end in an action. The action inserted depends on whether
4285the production is an intermediate rule (two or more items), or a terminal
4286of the grammar (i.e. a single pattern or string item).
4287
4288So, for example, the following grammar:
4289
4290 <autotree>
4291
4292 file : command(s)
4293 command : get | set | vet
4294 get : 'get' ident ';'
4295 set : 'set' ident 'to' value ';'
4296 vet : 'check' ident 'is' value ';'
4297 ident : /\w+/
4298 value : /\d+/
4299
4300is equivalent to:
4301
4302 file : command(s) { bless \%item, $item[0] }
4303 command : get { bless \%item, $item[0] }
4304 | set { bless \%item, $item[0] }
4305 | vet { bless \%item, $item[0] }
4306 get : 'get' ident ';' { bless \%item, $item[0] }
4307 set : 'set' ident 'to' value ';' { bless \%item, $item[0] }
4308 vet : 'check' ident 'is' value ';' { bless \%item, $item[0] }
4309
4310 ident : /\w+/ { bless {__VALUE__=>$item[1]}, $item[0] }
4311 value : /\d+/ { bless {__VALUE__=>$item[1]}, $item[0] }
4312
4313Note that each node in the tree is blessed into a class of the same name
4314as the rule itself. This makes it easy to build object-oriented
4315processors for the parse-trees that the grammar produces. Note too that
4316the last two rules produce special objects with the single attribute
4317'__VALUE__'. This is because they consist solely of a single terminal.
4318
4319This autoaction-ed grammar would then produce a parse tree in a data
4320structure like this:
4321
4322 {
4323 file => {
4324 command => {
4325 [ get => {
4326 identifier => { __VALUE__ => 'a' },
4327 },
4328 set => {
4329 identifier => { __VALUE__ => 'b' },
4330 value => { __VALUE__ => '7' },
4331 },
4332 vet => {
4333 identifier => { __VALUE__ => 'b' },
4334 value => { __VALUE__ => '7' },
4335 },
4336 ],
4337 },
4338 }
4339 }
4340
4341(except, of course, that each nested hash would also be blessed into
4342the appropriate class).
4343
4344You can also specify a base class for the C<E<lt>autotreeE<gt>> directive.
4345The supplied prefix will be prepended to the rule names when creating
4346tree nodes. The following are equivalent:
4347
4348 <autotree:MyBase::Class>
4349 <autotree:MyBase::Class::>
4350
4351And will produce a root node blessed into the C<MyBase::Class::file>
4352package in the example above.
4353
4354
4355=head2 Autostubbing
4356
4357Normally, if a subrule appears in some production, but no rule of that
4358name is ever defined in the grammar, the production which refers to the
4359non-existent subrule fails immediately. This typically occurs as a
4360result of misspellings, and is a sufficiently common occurance that a
4361warning is generated for such situations.
4362
4363However, when prototyping a grammar it is sometimes useful to be
4364able to use subrules before a proper specification of them is
4365really possible. For example, a grammar might include a section like:
4366
4367 function_call: identifier '(' arg(s?) ')'
4368
4369 identifier: /[a-z]\w*/i
4370
4371where the possible format of an argument is sufficiently complex that
4372it is not worth specifying in full until the general function call
4373syntax has been debugged. In this situation it is convenient to leave
4374the real rule C<arg> undefined and just slip in a placeholder (or
4375"stub"):
4376
4377 arg: 'arg'
4378
4379so that the function call syntax can be tested with dummy input such as:
4380
4381 f0()
4382 f1(arg)
4383 f2(arg arg)
4384 f3(arg arg arg)
4385
4386et cetera.
4387
4388Early in prototyping, many such "stubs" may be required, so
4389C<Parse::RecDescent> provides a means of automating their definition.
4390If the variable C<$::RD_AUTOSTUB> is defined when a parser is built, a
4391subrule reference to any non-existent rule (say, C<subrule>), will
4392cause a "stub" rule to be automatically defined in the generated
4393parser. If C<$::RD_AUTOSTUB eq '1'> or is false, a stub rule of the
4394form:
4395
4396 subrule: 'subrule'
4397
4398will be generated. The special-case for a value of C<'1'> is to allow
4399the use of the B<perl -s> with B<-RD_AUTOSTUB> without generating
4400C<subrule: '1'> per below. If C<$::RD_AUTOSTUB> is true, a stub rule
4401of the form:
4402
4403 subrule: $::RD_AUTOSTUB
4404
4405will be generated. C<$::RD_AUTOSTUB> must contain a valid production
4406item, no checking is performed. No lazy evaluation of
4407C<$::RD_AUTOSTUB> is performed, it is evaluated at the time the Parser
4408is generated.
4409
4410Hence, with C<$::RD_AUTOSTUB> defined, it is possible to only
4411partially specify a grammar, and then "fake" matches of the
4412unspecified (sub)rules by just typing in their name, or a literal
4413value that was assigned to C<$::RD_AUTOSTUB>.
4414
4415
4416
4417=head2 Look-ahead
4418
4419If a subrule, token, or action is prefixed by "...", then it is
4420treated as a "look-ahead" request. That means that the current production can
4421(as usual) only succeed if the specified item is matched, but that the matching
4422I<does not consume any of the text being parsed>. This is very similar to the
4423C</(?=...)/> look-ahead construct in Perl patterns. Thus, the rule:
4424
4425 inner_word: word ...word
4426
4427will match whatever the subrule "word" matches, provided that match is followed
4428by some more text which subrule "word" would also match (although this
4429second substring is not actually consumed by "inner_word")
4430
4431Likewise, a "...!" prefix, causes the following item to succeed (without
4432consuming any text) if and only if it would normally fail. Hence, a
4433rule such as:
4434
4435 identifier: ...!keyword ...!'_' /[A-Za-z_]\w*/
4436
4437matches a string of characters which satisfies the pattern
4438C</[A-Za-z_]\w*/>, but only if the same sequence of characters would
4439not match either subrule "keyword" or the literal token '_'.
4440
4441Sequences of look-ahead prefixes accumulate, multiplying their positive and/or
4442negative senses. Hence:
4443
4444 inner_word: word ...!......!word
4445
4446is exactly equivalent the the original example above (a warning is issued in
4447cases like these, since they often indicate something left out, or
4448misunderstood).
4449
4450Note that actions can also be treated as look-aheads. In such cases,
4451the state of the parser text (in the local variable C<$text>)
4452I<after> the look-ahead action is guaranteed to be identical to its
4453state I<before> the action, regardless of how it's changed I<within>
4454the action (unless you actually undefine C<$text>, in which case you
4455get the disaster you deserve :-).
4456
4457
4458=head2 Directives
4459
4460Directives are special pre-defined actions which may be used to alter
4461the behaviour of the parser. There are currently twenty-three directives:
4462C<E<lt>commitE<gt>>,
4463C<E<lt>uncommitE<gt>>,
4464C<E<lt>rejectE<gt>>,
4465C<E<lt>scoreE<gt>>,
4466C<E<lt>autoscoreE<gt>>,
4467C<E<lt>skipE<gt>>,
4468C<E<lt>resyncE<gt>>,
4469C<E<lt>errorE<gt>>,
4470C<E<lt>warnE<gt>>,
4471C<E<lt>hintE<gt>>,
4472C<E<lt>trace_buildE<gt>>,
4473C<E<lt>trace_parseE<gt>>,
4474C<E<lt>nocheckE<gt>>,
4475C<E<lt>rulevarE<gt>>,
4476C<E<lt>matchruleE<gt>>,
4477C<E<lt>leftopE<gt>>,
4478C<E<lt>rightopE<gt>>,
4479C<E<lt>deferE<gt>>,
4480C<E<lt>nocheckE<gt>>,
4481C<E<lt>perl_quotelikeE<gt>>,
4482C<E<lt>perl_codeblockE<gt>>,
4483C<E<lt>perl_variableE<gt>>,
4484and C<E<lt>tokenE<gt>>.
4485
4486=over 4
4487
4488=item Committing and uncommitting
4489
4490The C<E<lt>commitE<gt>> and C<E<lt>uncommitE<gt>> directives permit the recursive
4491descent of the parse tree to be pruned (or "cut") for efficiency.
4492Within a rule, a C<E<lt>commitE<gt>> directive instructs the rule to ignore subsequent
4493productions if the current production fails. For example:
4494
4495 command: 'find' <commit> filename
4496 | 'open' <commit> filename
4497 | 'move' filename filename
4498
4499Clearly, if the leading token 'find' is matched in the first production but that
4500production fails for some other reason, then the remaining
4501productions cannot possibly match. The presence of the
4502C<E<lt>commitE<gt>> causes the "command" rule to fail immediately if
4503an invalid "find" command is found, and likewise if an invalid "open"
4504command is encountered.
4505
4506It is also possible to revoke a previous commitment. For example:
4507
4508 if_statement: 'if' <commit> condition
4509 'then' block <uncommit>
4510 'else' block
4511 | 'if' <commit> condition
4512 'then' block
4513
4514In this case, a failure to find an "else" block in the first
4515production shouldn't preclude trying the second production, but a
4516failure to find a "condition" certainly should.
4517
4518As a special case, any production in which the I<first> item is an
4519C<E<lt>uncommitE<gt>> immediately revokes a preceding C<E<lt>commitE<gt>>
4520(even though the production would not otherwise have been tried). For
4521example, in the rule:
4522
4523 request: 'explain' expression
4524 | 'explain' <commit> keyword
4525 | 'save'
4526 | 'quit'
4527 | <uncommit> term '?'
4528
4529if the text being matched was "explain?", and the first two
4530productions failed, then the C<E<lt>commitE<gt>> in production two would cause
4531productions three and four to be skipped, but the leading
4532C<E<lt>uncommitE<gt>> in the production five would allow that production to
4533attempt a match.
4534
4535Note in the preceding example, that the C<E<lt>commitE<gt>> was only placed
4536in production two. If production one had been:
4537
4538 request: 'explain' <commit> expression
4539
4540then production two would be (inappropriately) skipped if a leading
4541"explain..." was encountered.
4542
4543Both C<E<lt>commitE<gt>> and C<E<lt>uncommitE<gt>> directives always succeed, and their value
4544is always 1.
4545
4546
4547=item Rejecting a production
4548
4549The C<E<lt>rejectE<gt>> directive immediately causes the current production
4550to fail (it is exactly equivalent to, but more obvious than, the
4551action C<{undef}>). A C<E<lt>rejectE<gt>> is useful when it is desirable to get
4552the side effects of the actions in one production, without prejudicing a match
4553by some other production later in the rule. For example, to insert
4554tracing code into the parse:
4555
4556 complex_rule: { print "In complex rule...\n"; } <reject>
4557
4558 complex_rule: simple_rule '+' 'i' '*' simple_rule
4559 | 'i' '*' simple_rule
4560 | simple_rule
4561
4562
4563It is also possible to specify a conditional rejection, using the
4564form C<E<lt>reject:I<condition>E<gt>>, which only rejects if the
4565specified condition is true. This form of rejection is exactly
4566equivalent to the action C<{(I<condition>)?undef:1}E<gt>>.
4567For example:
4568
4569 command: save_command
4570 | restore_command
4571 | <reject: defined $::tolerant> { exit }
4572 | <error: Unknown command. Ignored.>
4573
4574A C<E<lt>rejectE<gt>> directive never succeeds (and hence has no
4575associated value). A conditional rejection may succeed (if its
4576condition is not satisfied), in which case its value is 1.
4577
4578As an extra optimization, C<Parse::RecDescent> ignores any production
4579which I<begins> with an unconditional C<E<lt>rejectE<gt>> directive,
4580since any such production can never successfully match or have any
4581useful side-effects. A level 1 warning is issued in all such cases.
4582
4583Note that productions beginning with conditional
4584C<E<lt>reject:...E<gt>> directives are I<never> "optimized away" in
4585this manner, even if they are always guaranteed to fail (for example:
4586C<E<lt>reject:1E<gt>>)
4587
4588Due to the way grammars are parsed, there is a minor restriction on the
4589condition of a conditional C<E<lt>reject:...E<gt>>: it cannot
4590contain any raw '<' or '>' characters. For example:
4591
4592 line: cmd <reject: $thiscolumn > max> data
4593
4594results in an error when a parser is built from this grammar (since the
4595grammar parser has no way of knowing whether the first > is a "less than"
4596or the end of the C<E<lt>reject:...E<gt>>.
4597
4598To overcome this problem, put the condition inside a do{} block:
4599
4600 line: cmd <reject: do{$thiscolumn > max}> data
4601
4602Note that the same problem may occur in other directives that take
4603arguments. The same solution will work in all cases.
4604
4605
4606=item Skipping between terminals
4607
4608The C<E<lt>skipE<gt>> directive enables the terminal prefix used in
4609a production to be changed. For example:
4610
4611 OneLiner: Command <skip:'[ \t]*'> Arg(s) /;/
4612
4613causes only blanks and tabs to be skipped before terminals in the C<Arg>
4614subrule (and any of I<its> subrules>, and also before the final C</;/> terminal.
4615Once the production is complete, the previous terminal prefix is
4616reinstated. Note that this implies that distinct productions of a rule
4617must reset their terminal prefixes individually.
4618
4619The C<E<lt>skipE<gt>> directive evaluates to the I<previous> terminal prefix,
4620so it's easy to reinstate a prefix later in a production:
4621
4622 Command: <skip:","> CSV(s) <skip:$item[1]> Modifier
4623
4624The value specified after the colon is interpolated into a pattern, so all of
4625the following are equivalent (though their efficiency increases down the list):
4626
4627 <skip: "$colon|$comma"> # ASSUMING THE VARS HOLD THE OBVIOUS VALUES
4628
4629 <skip: ':|,'>
4630
4631 <skip: q{[:,]}>
4632
4633 <skip: qr/[:,]/>
4634
4635There is no way of directly setting the prefix for
4636an entire rule, except as follows:
4637
4638 Rule: <skip: '[ \t]*'> Prod1
4639 | <skip: '[ \t]*'> Prod2a Prod2b
4640 | <skip: '[ \t]*'> Prod3
4641
4642or, better:
4643
4644 Rule: <skip: '[ \t]*'>
4645 (
4646 Prod1
4647 | Prod2a Prod2b
4648 | Prod3
4649 )
4650
4651The skip pattern is passed down to subrules, so setting the skip for
4652the top-level rule as described above actually sets the prefix for the
4653entire grammar (provided that you only call the method corresponding
4654to the top-level rule itself). Alternatively, or if you have more than
4655one top-level rule in your grammar, you can provide a global
4656C<E<lt>skipE<gt>> directive prior to defining any rules in the
4657grammar. These are the preferred alternatives to setting
4658C<$Parse::RecDescent::skip>.
4659
4660Additionally, using C<E<lt>skipE<gt>> actually allows you to have
4661a completely dynamic skipping behaviour. For example:
4662
4663 Rule_with_dynamic_skip: <skip: $::skip_pattern> Rule
4664
4665Then you can set C<$::skip_pattern> before invoking
4666C<Rule_with_dynamic_skip> and have it skip whatever you specified.
4667
4668B<Note: Up to release 1.51 of Parse::RecDescent, an entirely different
4669mechanism was used for specifying terminal prefixes. The current method
4670is not backwards-compatible with that early approach. The current approach
4671is stable and will not to change again.>
4672
4673
4674=item Resynchronization
4675
4676The C<E<lt>resyncE<gt>> directive provides a visually distinctive
4677means of consuming some of the text being parsed, usually to skip an
4678erroneous input. In its simplest form C<E<lt>resyncE<gt>> simply
4679consumes text up to and including the next newline (C<"\n">)
4680character, succeeding only if the newline is found, in which case it
4681causes its surrounding rule to return zero on success.
4682
4683In other words, a C<E<lt>resyncE<gt>> is exactly equivalent to the token
4684C</[^\n]*\n/> followed by the action S<C<{ $return = 0 }>> (except that
4685productions beginning with a C<E<lt>resyncE<gt>> are ignored when generating
4686error messages). A typical use might be:
4687
4688 script : command(s)
4689
4690 command: save_command
4691 | restore_command
4692 | <resync> # TRY NEXT LINE, IF POSSIBLE
4693
4694It is also possible to explicitly specify a resynchronization
4695pattern, using the C<E<lt>resync:I<pattern>E<gt>> variant. This version
4696succeeds only if the specified pattern matches (and consumes) the
4697parsed text. In other words, C<E<lt>resync:I<pattern>E<gt>> is exactly
4698equivalent to the token C</I<pattern>/> (followed by a S<C<{ $return = 0 }>>
4699action). For example, if commands were terminated by newlines or semi-colons:
4700
4701 command: save_command
4702 | restore_command
4703 | <resync:[^;\n]*[;\n]>
4704
4705The value of a successfully matched C<E<lt>resyncE<gt>> directive (of either
4706type) is the text that it consumed. Note, however, that since the
4707directive also sets C<$return>, a production consisting of a lone
4708C<E<lt>resyncE<gt>> succeeds but returns the value zero (which a calling rule
4709may find useful to distinguish between "true" matches and "tolerant" matches).
4710Remember that returning a zero value indicates that the rule I<succeeded> (since
4711only an C<undef> denotes failure within C<Parse::RecDescent> parsers.
4712
4713
4714=item Error handling
4715
4716The C<E<lt>errorE<gt>> directive provides automatic or user-defined
4717generation of error messages during a parse. In its simplest form
4718C<E<lt>errorE<gt>> prepares an error message based on
4719the mismatch between the last item expected and the text which cause
4720it to fail. For example, given the rule:
4721
4722 McCoy: curse ',' name ', I'm a doctor, not a' a_profession '!'
4723 | pronoun 'dead,' name '!'
4724 | <error>
4725
4726the following strings would produce the following messages:
4727
4728=over 4
4729
4730=item "Amen, Jim!"
4731
4732 ERROR (line 1): Invalid McCoy: Expected curse or pronoun
4733 not found
4734
4735=item "Dammit, Jim, I'm a doctor!"
4736
4737 ERROR (line 1): Invalid McCoy: Expected ", I'm a doctor, not a"
4738 but found ", I'm a doctor!" instead
4739
4740=item "He's dead,\n"
4741
4742 ERROR (line 2): Invalid McCoy: Expected name not found
4743
4744=item "He's alive!"
4745
4746 ERROR (line 1): Invalid McCoy: Expected 'dead,' but found
4747 "alive!" instead
4748
4749=item "Dammit, Jim, I'm a doctor, not a pointy-eared Vulcan!"
4750
4751 ERROR (line 1): Invalid McCoy: Expected a profession but found
4752 "pointy-eared Vulcan!" instead
4753
4754
4755=back
4756
4757Note that, when autogenerating error messages, all underscores in any
4758rule name used in a message are replaced by single spaces (for example
4759"a_production" becomes "a production"). Judicious choice of rule
4760names can therefore considerably improve the readability of automatic
4761error messages (as well as the maintainability of the original
4762grammar).
4763
4764If the automatically generated error is not sufficient, it is possible to
4765provide an explicit message as part of the error directive. For example:
4766
4767 Spock: "Fascinating ',' (name | 'Captain') '.'
4768 | "Highly illogical, doctor."
4769 | <error: He never said that!>
4770
4771which would result in I<all> failures to parse a "Spock" subrule printing the
4772following message:
4773
4774 ERROR (line <N>): Invalid Spock: He never said that!
4775
4776The error message is treated as a "qq{...}" string and interpolated
4777when the error is generated (I<not> when the directive is specified!).
4778Hence:
4779
4780 <error: Mystical error near "$text">
4781
4782would correctly insert the ambient text string which caused the error.
4783
4784There are two other forms of error directive: C<E<lt>error?E<gt>> and
4785S<C<E<lt>error?: msgE<gt>>>. These behave just like C<E<lt>errorE<gt>>
4786and S<C<E<lt>error: msgE<gt>>> respectively, except that they are
4787only triggered if the rule is "committed" at the time they are
4788encountered. For example:
4789
4790 Scotty: "Ya kenna change the Laws of Phusics," <commit> name
4791 | name <commit> ',' 'she's goanta blaw!'
4792 | <error?>
4793
4794will only generate an error for a string beginning with "Ya kenna
4795change the Laws o' Phusics," or a valid name, but which still fails to match the
4796corresponding production. That is, C<$parser-E<gt>Scotty("Aye, Cap'ain")> will
4797fail silently (since neither production will "commit" the rule on that
4798input), whereas S<C<$parser-E<gt>Scotty("Mr Spock, ah jest kenna do'ut!")>>
4799will fail with the error message:
4800
4801 ERROR (line 1): Invalid Scotty: expected 'she's goanta blaw!'
4802 but found 'I jest kenna do'ut!' instead.
4803
4804since in that case the second production would commit after matching
4805the leading name.
4806
4807Note that to allow this behaviour, all C<E<lt>errorE<gt>> directives which are
4808the first item in a production automatically uncommit the rule just
4809long enough to allow their production to be attempted (that is, when
4810their production fails, the commitment is reinstated so that
4811subsequent productions are skipped).
4812
4813In order to I<permanently> uncommit the rule before an error message,
4814it is necessary to put an explicit C<E<lt>uncommitE<gt>> before the
4815C<E<lt>errorE<gt>>. For example:
4816
4817 line: 'Kirk:' <commit> Kirk
4818 | 'Spock:' <commit> Spock
4819 | 'McCoy:' <commit> McCoy
4820 | <uncommit> <error?> <reject>
4821 | <resync>
4822
4823
4824Error messages generated by the various C<E<lt>error...E<gt>> directives
4825are not displayed immediately. Instead, they are "queued" in a buffer and
4826are only displayed once parsing ultimately fails. Moreover,
4827C<E<lt>error...E<gt>> directives that cause one production of a rule
4828to fail are automatically removed from the message queue
4829if another production subsequently causes the entire rule to succeed.
4830This means that you can put
4831C<E<lt>error...E<gt>> directives wherever useful diagnosis can be done,
4832and only those associated with actual parser failure will ever be
4833displayed. Also see L<"GOTCHAS">.
4834
4835As a general rule, the most useful diagnostics are usually generated
4836either at the very lowest level within the grammar, or at the very
4837highest. A good rule of thumb is to identify those subrules which
4838consist mainly (or entirely) of terminals, and then put an
4839C<E<lt>error...E<gt>> directive at the end of any other rule which calls
4840one or more of those subrules.
4841
4842There is one other situation in which the output of the various types of
4843error directive is suppressed; namely, when the rule containing them
4844is being parsed as part of a "look-ahead" (see L<"Look-ahead">). In this
4845case, the error directive will still cause the rule to fail, but will do
4846so silently.
4847
4848An unconditional C<E<lt>errorE<gt>> directive always fails (and hence has no
4849associated value). This means that encountering such a directive
4850always causes the production containing it to fail. Hence an
4851C<E<lt>errorE<gt>> directive will inevitably be the last (useful) item of a
4852rule (a level 3 warning is issued if a production contains items after an unconditional
4853C<E<lt>errorE<gt>> directive).
4854
4855An C<E<lt>error?E<gt>> directive will I<succeed> (that is: fail to fail :-), if
4856the current rule is uncommitted when the directive is encountered. In
4857that case the directive's associated value is zero. Hence, this type
4858of error directive I<can> be used before the end of a
4859production. For example:
4860
4861 command: 'do' <commit> something
4862 | 'report' <commit> something
4863 | <error?: Syntax error> <error: Unknown command>
4864
4865
4866B<Warning:> The C<E<lt>error?E<gt>> directive does I<not> mean "always fail (but
4867do so silently unless committed)". It actually means "only fail (and report) if
4868committed, otherwise I<succeed>". To achieve the "fail silently if uncommitted"
4869semantics, it is necessary to use:
4870
4871 rule: item <commit> item(s)
4872 | <error?> <reject> # FAIL SILENTLY UNLESS COMMITTED
4873
4874However, because people seem to expect a lone C<E<lt>error?E<gt>> directive
4875to work like this:
4876
4877 rule: item <commit> item(s)
4878 | <error?: Error message if committed>
4879 | <error: Error message if uncommitted>
4880
4881Parse::RecDescent automatically appends a
4882C<E<lt>rejectE<gt>> directive if the C<E<lt>error?E<gt>> directive
4883is the only item in a production. A level 2 warning (see below)
4884is issued when this happens.
4885
4886The level of error reporting during both parser construction and
4887parsing is controlled by the presence or absence of four global
4888variables: C<$::RD_ERRORS>, C<$::RD_WARN>, C<$::RD_HINT>, and
4889<$::RD_TRACE>. If C<$::RD_ERRORS> is defined (and, by default, it is)
4890then fatal errors are reported.
4891
4892Whenever C<$::RD_WARN> is defined, certain non-fatal problems are also reported.
4893
4894Warnings have an associated "level": 1, 2, or 3. The higher the level,
4895the more serious the warning. The value of the corresponding global
4896variable (C<$::RD_WARN>) determines the I<lowest> level of warning to
4897be displayed. Hence, to see I<all> warnings, set C<$::RD_WARN> to 1.
4898To see only the most serious warnings set C<$::RD_WARN> to 3.
4899By default C<$::RD_WARN> is initialized to 3, ensuring that serious but
4900non-fatal errors are automatically reported.
4901
4902There is also a grammar directive to turn on warnings from within the
4903grammar: C<< <warn> >>. It takes an optional argument, which specifies
4904the warning level: C<< <warn: 2> >>.
4905
4906See F<"DIAGNOSTICS"> for a list of the varous error and warning messages
4907that Parse::RecDescent generates when these two variables are defined.
4908
4909Defining any of the remaining variables (which are not defined by
4910default) further increases the amount of information reported.
4911Defining C<$::RD_HINT> causes the parser generator to offer
4912more detailed analyses and hints on both errors and warnings.
4913Note that setting C<$::RD_HINT> at any point automagically
4914sets C<$::RD_WARN> to 1. There is also a C<< <hint> >> directive, which can
4915be hard-coded into a grammar.
4916
4917Defining C<$::RD_TRACE> causes the parser generator and the parser to
4918report their progress to STDERR in excruciating detail (although, without hints
4919unless $::RD_HINT is separately defined). This detail
4920can be moderated in only one respect: if C<$::RD_TRACE> has an
4921integer value (I<N>) greater than 1, only the I<N> characters of
4922the "current parsing context" (that is, where in the input string we
4923are at any point in the parse) is reported at any time.
4924
4925C<$::RD_TRACE> is mainly useful for debugging a grammar that isn't
4926behaving as you expected it to. To this end, if C<$::RD_TRACE> is
4927defined when a parser is built, any actual parser code which is
4928generated is also written to a file named "RD_TRACE" in the local
4929directory.
4930
4931There are two directives associated with the C<$::RD_TRACE> variable.
4932If a grammar contains a C<< <trace_build> >> directive anywhere in its
4933specification, C<$::RD_TRACE> is turned on during the parser construction
4934phase. If a grammar contains a C<< <trace_parse> >> directive anywhere in its
4935specification, C<$::RD_TRACE> is turned on during any parse the parser
4936performs.
4937
4938Note that the four variables belong to the "main" package, which
4939makes them easier to refer to in the code controlling the parser, and
4940also makes it easy to turn them into command line flags ("-RD_ERRORS",
4941"-RD_WARN", "-RD_HINT", "-RD_TRACE") under B<perl -s>.
4942
4943The corresponding directives are useful to "hardwire" the various
4944debugging features into a particular grammar (rather than having to set
4945and reset external variables).
4946
4947=item Redirecting diagnostics
4948
4949The diagnostics provided by the tracing mechanism always go to STDERR.
4950If you need them to go elsewhere, localize and reopen STDERR prior to the
4951parse.
4952
4953For example:
4954
4955 {
4956 local *STDERR = IO::File->new(">$filename") or die $!;
4957
4958 my $result = $parser->startrule($text);
4959 }
4960
4961
4962=item Consistency checks
4963
4964Whenever a parser is build, Parse::RecDescent carries out a number of
4965(potentially expensive) consistency checks. These include: verifying that the
4966grammar is not left-recursive and that no rules have been left undefined.
4967
4968These checks are important safeguards during development, but unnecessary
4969overheads when the grammar is stable and ready to be deployed. So
4970Parse::RecDescent provides a directive to disable them: C<< <nocheck> >>.
4971
4972If a grammar contains a C<< <nocheck> >> directive anywhere in its
4973specification, the extra compile-time checks are by-passed.
4974
4975
4976=item Specifying local variables
4977
4978It is occasionally convenient to specify variables which are local
4979to a single rule. This may be achieved by including a
4980C<E<lt>rulevar:...E<gt>> directive anywhere in the rule. For example:
4981
4982 markup: <rulevar: $tag>
4983
4984 markup: tag {($tag=$item[1]) =~ s/^<|>$//g} body[$tag]
4985
4986The example C<E<lt>rulevar: $tagE<gt>> directive causes a "my" variable named
4987C<$tag> to be declared at the start of the subroutine implementing the
4988C<markup> rule (that is, I<before> the first production, regardless of
4989where in the rule it is specified).
4990
4991Specifically, any directive of the form:
4992C<E<lt>rulevar:I<text>E<gt>> causes a line of the form C<my I<text>;>
4993to be added at the beginning of the rule subroutine, immediately after
4994the definitions of the following local variables:
4995
4996 $thisparser $commit
4997 $thisrule @item
4998 $thisline @arg
4999 $text %arg
5000
5001This means that the following C<E<lt>rulevarE<gt>> directives work
5002as expected:
5003
5004 <rulevar: $count = 0 >
5005
5006 <rulevar: $firstarg = $arg[0] || '' >
5007
5008 <rulevar: $myItems = \@item >
5009
5010 <rulevar: @context = ( $thisline, $text, @arg ) >
5011
5012 <rulevar: ($name,$age) = $arg{"name","age"} >
5013
5014If a variable that is also visible to subrules is required, it needs
5015to be C<local>'d, not C<my>'d. C<rulevar> defaults to C<my>, but if C<local>
5016is explicitly specified:
5017
5018 <rulevar: local $count = 0 >
5019
5020then a C<local>-ized variable is declared instead, and will be available
5021within subrules.
5022
5023Note however that, because all such variables are "my" variables, their
5024values I<do not persist> between match attempts on a given rule. To
5025preserve values between match attempts, values can be stored within the
5026"local" member of the C<$thisrule> object:
5027
5028 countedrule: { $thisrule->{"local"}{"count"}++ }
5029 <reject>
5030 | subrule1
5031 | subrule2
5032 | <reject: $thisrule->{"local"}{"count"} == 1>
5033 subrule3
5034
5035
5036When matching a rule, each C<E<lt>rulevarE<gt>> directive is matched as
5037if it were an unconditional C<E<lt>rejectE<gt>> directive (that is, it
5038causes any production in which it appears to immediately fail to match).
5039For this reason (and to improve readability) it is usual to specify any
5040C<E<lt>rulevarE<gt>> directive in a separate production at the start of
5041the rule (this has the added advantage that it enables
5042C<Parse::RecDescent> to optimize away such productions, just as it does
5043for the C<E<lt>rejectE<gt>> directive).
5044
5045
5046=item Dynamically matched rules
5047
5048Because regexes and double-quoted strings are interpolated, it is relatively
5049easy to specify productions with "context sensitive" tokens. For example:
5050
5051 command: keyword body "end $item[1]"
5052
5053which ensures that a command block is bounded by a
5054"I<E<lt>keywordE<gt>>...end I<E<lt>same keywordE<gt>>" pair.
5055
5056Building productions in which subrules are context sensitive is also possible,
5057via the C<E<lt>matchrule:...E<gt>> directive. This directive behaves
5058identically to a subrule item, except that the rule which is invoked to match
5059it is determined by the string specified after the colon. For example, we could
5060rewrite the C<command> rule like this:
5061
5062 command: keyword <matchrule:body> "end $item[1]"
5063
5064Whatever appears after the colon in the directive is treated as an interpolated
5065string (that is, as if it appeared in C<qq{...}> operator) and the value of
5066that interpolated string is the name of the subrule to be matched.
5067
5068Of course, just putting a constant string like C<body> in a
5069C<E<lt>matchrule:...E<gt>> directive is of little interest or benefit.
5070The power of directive is seen when we use a string that interpolates
5071to something interesting. For example:
5072
5073 command: keyword <matchrule:$item[1]_body> "end $item[1]"
5074
5075 keyword: 'while' | 'if' | 'function'
5076
5077 while_body: condition block
5078
5079 if_body: condition block ('else' block)(?)
5080
5081 function_body: arglist block
5082
5083Now the C<command> rule selects how to proceed on the basis of the keyword
5084that is found. It is as if C<command> were declared:
5085
5086 command: 'while' while_body "end while"
5087 | 'if' if_body "end if"
5088 | 'function' function_body "end function"
5089
5090
5091When a C<E<lt>matchrule:...E<gt>> directive is used as a repeated
5092subrule, the rule name expression is "late-bound". That is, the name of
5093the rule to be called is re-evaluated I<each time> a match attempt is
5094made. Hence, the following grammar:
5095
5096 { $::species = 'dogs' }
5097
5098 pair: 'two' <matchrule:$::species>(s)
5099
5100 dogs: /dogs/ { $::species = 'cats' }
5101
5102 cats: /cats/
5103
5104will match the string "two dogs cats cats" completely, whereas it will
5105only match the string "two dogs dogs dogs" up to the eighth letter. If
5106the rule name were "early bound" (that is, evaluated only the first
5107time the directive is encountered in a production), the reverse
5108behaviour would be expected.
5109
5110Note that the C<matchrule> directive takes a string that is to be treated
5111as a rule name, I<not> as a rule invocation. That is,
5112it's like a Perl symbolic reference, not an C<eval>. Just as you can say:
5113
5114 $subname = 'foo';
5115
5116 # and later...
5117
5118 &{$foo}(@args);
5119
5120but not:
5121
5122 $subname = 'foo(@args)';
5123
5124 # and later...
5125
5126 &{$foo};
5127
5128likewise you can say:
5129
5130 $rulename = 'foo';
5131
5132 # and in the grammar...
5133
5134 <matchrule:$rulename>[@args]
5135
5136but not:
5137
5138 $rulename = 'foo[@args]';
5139
5140 # and in the grammar...
5141
5142 <matchrule:$rulename>
5143
5144
5145=item Deferred actions
5146
5147The C<E<lt>defer:...E<gt>> directive is used to specify an action to be
5148performed when (and only if!) the current production ultimately succeeds.
5149
5150Whenever a C<E<lt>defer:...E<gt>> directive appears, the code it specifies
5151is converted to a closure (an anonymous subroutine reference) which is
5152queued within the active parser object. Note that,
5153because the deferred code is converted to a closure, the values of any
5154"local" variable (such as C<$text>, <@item>, etc.) are preserved
5155until the deferred code is actually executed.
5156
5157If the parse ultimately succeeds
5158I<and> the production in which the C<E<lt>defer:...E<gt>> directive was
5159evaluated formed part of the successful parse, then the deferred code is
5160executed immediately before the parse returns. If however the production
5161which queued a deferred action fails, or one of the higher-level
5162rules which called that production fails, then the deferred action is
5163removed from the queue, and hence is never executed.
5164
5165For example, given the grammar:
5166
5167 sentence: noun trans noun
5168 | noun intrans
5169
5170 noun: 'the dog'
5171 { print "$item[1]\t(noun)\n" }
5172 | 'the meat'
5173 { print "$item[1]\t(noun)\n" }
5174
5175 trans: 'ate'
5176 { print "$item[1]\t(transitive)\n" }
5177
5178 intrans: 'ate'
5179 { print "$item[1]\t(intransitive)\n" }
5180 | 'barked'
5181 { print "$item[1]\t(intransitive)\n" }
5182
5183then parsing the sentence C<"the dog ate"> would produce the output:
5184
5185 the dog (noun)
5186 ate (transitive)
5187 the dog (noun)
5188 ate (intransitive)
5189
5190This is because, even though the first production of C<sentence>
5191ultimately fails, its initial subrules C<noun> and C<trans> do match,
5192and hence they execute their associated actions.
5193Then the second production of C<sentence> succeeds, causing the
5194actions of the subrules C<noun> and C<intrans> to be executed as well.
5195
5196On the other hand, if the actions were replaced by C<E<lt>defer:...E<gt>>
5197directives:
5198
5199 sentence: noun trans noun
5200 | noun intrans
5201
5202 noun: 'the dog'
5203 <defer: print "$item[1]\t(noun)\n" >
5204 | 'the meat'
5205 <defer: print "$item[1]\t(noun)\n" >
5206
5207 trans: 'ate'
5208 <defer: print "$item[1]\t(transitive)\n" >
5209
5210 intrans: 'ate'
5211 <defer: print "$item[1]\t(intransitive)\n" >
5212 | 'barked'
5213 <defer: print "$item[1]\t(intransitive)\n" >
5214
5215the output would be:
5216
5217 the dog (noun)
5218 ate (intransitive)
5219
5220since deferred actions are only executed if they were evaluated in
5221a production which ultimately contributes to the successful parse.
5222
5223In this case, even though the first production of C<sentence> caused
5224the subrules C<noun> and C<trans> to match, that production ultimately
5225failed and so the deferred actions queued by those subrules were subsequently
5226disgarded. The second production then succeeded, causing the entire
5227parse to succeed, and so the deferred actions queued by the (second) match of
5228the C<noun> subrule and the subsequent match of C<intrans> I<are> preserved and
5229eventually executed.
5230
5231Deferred actions provide a means of improving the performance of a parser,
5232by only executing those actions which are part of the final parse-tree
5233for the input data.
5234
5235Alternatively, deferred actions can be viewed as a mechanism for building
5236(and executing) a
5237customized subroutine corresponding to the given input data, much in the
5238same way that autoactions (see L<"Autoactions">) can be used to build a
5239customized data structure for specific input.
5240
5241Whether or not the action it specifies is ever executed,
5242a C<E<lt>defer:...E<gt>> directive always succeeds, returning the
5243number of deferred actions currently queued at that point.
5244
5245
5246=item Parsing Perl
5247
5248Parse::RecDescent provides limited support for parsing subsets of Perl,
5249namely: quote-like operators, Perl variables, and complete code blocks.
5250
5251The C<E<lt>perl_quotelikeE<gt>> directive can be used to parse any Perl
5252quote-like operator: C<'a string'>, C<m/a pattern/>, C<tr{ans}{lation}>,
5253etc. It does this by calling Text::Balanced::quotelike().
5254
5255If a quote-like operator is found, a reference to an array of eight elements
5256is returned. Those elements are identical to the last eight elements returned
5257by Text::Balanced::extract_quotelike() in an array context, namely:
5258
5259=over 4
5260
5261=item [0]
5262
5263the name of the quotelike operator -- 'q', 'qq', 'm', 's', 'tr' -- if the
5264operator was named; otherwise C<undef>,
5265
5266=item [1]
5267
5268the left delimiter of the first block of the operation,
5269
5270=item [2]
5271
5272the text of the first block of the operation
5273(that is, the contents of
5274a quote, the regex of a match, or substitution or the target list of a
5275translation),
5276
5277=item [3]
5278
5279the right delimiter of the first block of the operation,
5280
5281=item [4]
5282
5283the left delimiter of the second block of the operation if there is one
5284(that is, if it is a C<s>, C<tr>, or C<y>); otherwise C<undef>,
5285
5286=item [5]
5287
5288the text of the second block of the operation if there is one
5289(that is, the replacement of a substitution or the translation list
5290of a translation); otherwise C<undef>,
5291
5292=item [6]
5293
5294the right delimiter of the second block of the operation (if any);
5295otherwise C<undef>,
5296
5297=item [7]
5298
5299the trailing modifiers on the operation (if any); otherwise C<undef>.
5300
5301=back
5302
5303If a quote-like expression is not found, the directive fails with the usual
5304C<undef> value.
5305
5306The C<E<lt>perl_variableE<gt>> directive can be used to parse any Perl
5307variable: $scalar, @array, %hash, $ref->{field}[$index], etc.
5308It does this by calling Text::Balanced::extract_variable().
5309
5310If the directive matches text representing a valid Perl variable
5311specification, it returns that text. Otherwise it fails with the usual
5312C<undef> value.
5313
5314The C<E<lt>perl_codeblockE<gt>> directive can be used to parse curly-brace-delimited block of Perl code, such as: { $a = 1; f() =~ m/pat/; }.
5315It does this by calling Text::Balanced::extract_codeblock().
5316
5317If the directive matches text representing a valid Perl code block,
5318it returns that text. Otherwise it fails with the usual C<undef> value.
5319
5320You can also tell it what kind of brackets to use as the outermost
5321delimiters. For example:
5322
5323 arglist: <perl_codeblock ()>
5324
5325causes an arglist to match a perl code block whose outermost delimiters
5326are C<(...)> (rather than the default C<{...}>).
5327
5328
5329=item Constructing tokens
5330
5331Eventually, Parse::RecDescent will be able to parse tokenized input, as
5332well as ordinary strings. In preparation for this joyous day, the
5333C<E<lt>token:...E<gt>> directive has been provided.
5334This directive creates a token which will be suitable for
5335input to a Parse::RecDescent parser (when it eventually supports
5336tokenized input).
5337
5338The text of the token is the value of the
5339immediately preceding item in the production. A
5340C<E<lt>token:...E<gt>> directive always succeeds with a return
5341value which is the hash reference that is the new token. It also
5342sets the return value for the production to that hash ref.
5343
5344The C<E<lt>token:...E<gt>> directive makes it easy to build
5345a Parse::RecDescent-compatible lexer in Parse::RecDescent:
5346
5347 my $lexer = new Parse::RecDescent q
5348 {
5349 lex: token(s)
5350
5351 token: /a\b/ <token:INDEF>
5352 | /the\b/ <token:DEF>
5353 | /fly\b/ <token:NOUN,VERB>
5354 | /[a-z]+/i { lc $item[1] } <token:ALPHA>
5355 | <error: Unknown token>
5356
5357 };
5358
5359which will eventually be able to be used with a regular Parse::RecDescent
5360grammar:
5361
5362 my $parser = new Parse::RecDescent q
5363 {
5364 startrule: subrule1 subrule 2
5365
5366 # ETC...
5367 };
5368
5369either with a pre-lexing phase:
5370
5371 $parser->startrule( $lexer->lex($data) );
5372
5373or with a lex-on-demand approach:
5374
5375 $parser->startrule( sub{$lexer->token(\$data)} );
5376
5377But at present, only the C<E<lt>token:...E<gt>> directive is
5378actually implemented. The rest is vapourware.
5379
5380=item Specifying operations
5381
5382One of the commonest requirements when building a parser is to specify
5383binary operators. Unfortunately, in a normal grammar, the rules for
5384such things are awkward:
5385
5386 disjunction: conjunction ('or' conjunction)(s?)
5387 { $return = [ $item[1], @{$item[2]} ] }
5388
5389 conjunction: atom ('and' atom)(s?)
5390 { $return = [ $item[1], @{$item[2]} ] }
5391
5392or inefficient:
5393
5394 disjunction: conjunction 'or' disjunction
5395 { $return = [ $item[1], @{$item[2]} ] }
5396 | conjunction
5397 { $return = [ $item[1] ] }
5398
5399 conjunction: atom 'and' conjunction
5400 { $return = [ $item[1], @{$item[2]} ] }
5401 | atom
5402 { $return = [ $item[1] ] }
5403
5404and either way is ugly and hard to get right.
5405
5406The C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives provide an
5407easier way of specifying such operations. Using C<E<lt>leftop:...E<gt>> the
5408above examples become:
5409
5410 disjunction: <leftop: conjunction 'or' conjunction>
5411 conjunction: <leftop: atom 'and' atom>
5412
5413The C<E<lt>leftop:...E<gt>> directive specifies a left-associative binary operator.
5414It is specified around three other grammar elements
5415(typically subrules or terminals), which match the left operand,
5416the operator itself, and the right operand respectively.
5417
5418A C<E<lt>leftop:...E<gt>> directive such as:
5419
5420 disjunction: <leftop: conjunction 'or' conjunction>
5421
5422is converted to the following:
5423
5424 disjunction: ( conjunction ('or' conjunction)(s?)
5425 { $return = [ $item[1], @{$item[2]} ] } )
5426
5427In other words, a C<E<lt>leftop:...E<gt>> directive matches the left operand followed by zero
5428or more repetitions of both the operator and the right operand. It then
5429flattens the matched items into an anonymous array which becomes the
5430(single) value of the entire C<E<lt>leftop:...E<gt>> directive.
5431
5432For example, an C<E<lt>leftop:...E<gt>> directive such as:
5433
5434 output: <leftop: ident '<<' expr >
5435
5436when given a string such as:
5437
5438 cout << var << "str" << 3
5439
5440would match, and C<$item[1]> would be set to:
5441
5442 [ 'cout', 'var', '"str"', '3' ]
5443
5444In other words:
5445
5446 output: <leftop: ident '<<' expr >
5447
5448is equivalent to a left-associative operator:
5449
5450 output: ident { $return = [$item[1]] }
5451 | ident '<<' expr { $return = [@item[1,3]] }
5452 | ident '<<' expr '<<' expr { $return = [@item[1,3,5]] }
5453 | ident '<<' expr '<<' expr '<<' expr { $return = [@item[1,3,5,7]] }
5454 # ...etc...
5455
5456
5457Similarly, the C<E<lt>rightop:...E<gt>> directive takes a left operand, an operator, and a right operand:
5458
5459 assign: <rightop: var '=' expr >
5460
5461and converts them to:
5462
5463 assign: ( (var '=' {$return=$item[1]})(s?) expr
5464 { $return = [ @{$item[1]}, $item[2] ] } )
5465
5466which is equivalent to a right-associative operator:
5467
5468 assign: expr { $return = [$item[1]] }
5469 | var '=' expr { $return = [@item[1,3]] }
5470 | var '=' var '=' expr { $return = [@item[1,3,5]] }
5471 | var '=' var '=' var '=' expr { $return = [@item[1,3,5,7]] }
5472 # ...etc...
5473
5474
5475Note that for both the C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives, the directive does not normally
5476return the operator itself, just a list of the operands involved. This is
5477particularly handy for specifying lists:
5478
5479 list: '(' <leftop: list_item ',' list_item> ')'
5480 { $return = $item[2] }
5481
5482There is, however, a problem: sometimes the operator is itself significant.
5483For example, in a Perl list a comma and a C<=E<gt>> are both
5484valid separators, but the C<=E<gt>> has additional stringification semantics.
5485Hence it's important to know which was used in each case.
5486
5487To solve this problem the
5488C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives
5489I<do> return the operator(s) as well, under two circumstances.
5490The first case is where the operator is specified as a subrule. In that instance,
5491whatever the operator matches is returned (on the assumption that if the operator
5492is important enough to have its own subrule, then it's important enough to return).
5493
5494The second case is where the operator is specified as a regular
5495expression. In that case, if the first bracketed subpattern of the
5496regular expression matches, that matching value is returned (this is analogous to
5497the behaviour of the Perl C<split> function, except that only the first subpattern
5498is returned).
5499
5500In other words, given the input:
5501
5502 ( a=>1, b=>2 )
5503
5504the specifications:
5505
5506 list: '(' <leftop: list_item separator list_item> ')'
5507
5508 separator: ',' | '=>'
5509
5510or:
5511
5512 list: '(' <leftop: list_item /(,|=>)/ list_item> ')'
5513
5514cause the list separators to be interleaved with the operands in the
5515anonymous array in C<$item[2]>:
5516
5517 [ 'a', '=>', '1', ',', 'b', '=>', '2' ]
5518
5519
5520But the following version:
5521
5522 list: '(' <leftop: list_item /,|=>/ list_item> ')'
5523
5524returns only the operators:
5525
5526 [ 'a', '1', 'b', '2' ]
5527
5528Of course, none of the above specifications handle the case of an empty
5529list, since the C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives
5530require at least a single right or left operand to match. To specify
5531that the operator can match "trivially",
5532it's necessary to add a C<(s?)> qualifier to the directive:
5533
5534 list: '(' <leftop: list_item /(,|=>)/ list_item>(s?) ')'
5535
5536Note that in almost all the above examples, the first and third arguments
5537of the C<<leftop:...E<gt>> directive were the same subrule. That is because
5538C<<leftop:...E<gt>>'s are frequently used to specify "separated" lists of the
5539same type of item. To make such lists easier to specify, the following
5540syntax:
5541
5542 list: element(s /,/)
5543
5544is exactly equivalent to:
5545
5546 list: <leftop: element /,/ element>
5547
5548Note that the separator must be specified as a raw pattern (i.e.
5549not a string or subrule).
5550
5551
5552=item Scored productions
5553
5554By default, Parse::RecDescent grammar rules always accept the first
5555production that matches the input. But if two or more productions may
5556potentially match the same input, choosing the first that does so may
5557not be optimal.
5558
5559For example, if you were parsing the sentence "time flies like an arrow",
5560you might use a rule like this:
5561
5562 sentence: verb noun preposition article noun { [@item] }
5563 | adjective noun verb article noun { [@item] }
5564 | noun verb preposition article noun { [@item] }
5565
5566Each of these productions matches the sentence, but the third one
5567is the most likely interpretation. However, if the sentence had been
5568"fruit flies like a banana", then the second production is probably
5569the right match.
5570
5571To cater for such situtations, the C<E<lt>score:...E<gt>> can be used.
5572The directive is equivalent to an unconditional C<E<lt>rejectE<gt>>,
5573except that it allows you to specify a "score" for the current
5574production. If that score is numerically greater than the best
5575score of any preceding production, the current production is cached for later
5576consideration. If no later production matches, then the cached
5577production is treated as having matched, and the value of the
5578item immediately before its C<E<lt>score:...E<gt>> directive is returned as the
5579result.
5580
5581In other words, by putting a C<E<lt>score:...E<gt>> directive at the end of
5582each production, you can select which production matches using
5583criteria other than specification order. For example:
5584
5585 sentence: verb noun preposition article noun { [@item] } <score: sensible(@item)>
5586 | adjective noun verb article noun { [@item] } <score: sensible(@item)>
5587 | noun verb preposition article noun { [@item] } <score: sensible(@item)>
5588
5589Now, when each production reaches its respective C<E<lt>score:...E<gt>>
5590directive, the subroutine C<sensible> will be called to evaluate the
5591matched items (somehow). Once all productions have been tried, the
5592one which C<sensible> scored most highly will be the one that is
5593accepted as a match for the rule.
5594
5595The variable $score always holds the current best score of any production,
5596and the variable $score_return holds the corresponding return value.
5597
5598As another example, the following grammar matches lines that may be
5599separated by commas, colons, or semi-colons. This can be tricky if
5600a colon-separated line also contains commas, or vice versa. The grammar
5601resolves the ambiguity by selecting the rule that results in the
5602fewest fields:
5603
5604 line: seplist[sep=>','] <score: -@{$item[1]}>
5605 | seplist[sep=>':'] <score: -@{$item[1]}>
5606 | seplist[sep=>" "] <score: -@{$item[1]}>
5607
5608 seplist: <skip:""> <leftop: /[^$arg{sep}]*/ "$arg{sep}" /[^$arg{sep}]*/>
5609
5610Note the use of negation within the C<E<lt>score:...E<gt>> directive
5611to ensure that the seplist with the most items gets the lowest score.
5612
5613As the above examples indicate, it is often the case that all productions
5614in a rule use exactly the same C<E<lt>score:...E<gt>> directive. It is
5615tedious to have to repeat this identical directive in every production, so
5616Parse::RecDescent also provides the C<E<lt>autoscore:...E<gt>> directive.
5617
5618If an C<E<lt>autoscore:...E<gt>> directive appears in any
5619production of a rule, the code it specifies is used as the scoring
5620code for every production of that rule, except productions that already
5621end with an explicit C<E<lt>score:...E<gt>> directive. Thus the rules above could
5622be rewritten:
5623
5624 line: <autoscore: -@{$item[1]}>
5625 line: seplist[sep=>',']
5626 | seplist[sep=>':']
5627 | seplist[sep=>" "]
5628
5629
5630 sentence: <autoscore: sensible(@item)>
5631 | verb noun preposition article noun { [@item] }
5632 | adjective noun verb article noun { [@item] }
5633 | noun verb preposition article noun { [@item] }
5634
5635Note that the C<E<lt>autoscore:...E<gt>> directive itself acts as an
5636unconditional C<E<lt>rejectE<gt>>, and (like the C<E<lt>rulevar:...E<gt>>
5637directive) is pruned at compile-time wherever possible.
5638
5639
5640=item Dispensing with grammar checks
5641
5642During the compilation phase of parser construction, Parse::RecDescent performs
5643a small number of checks on the grammar it's given. Specifically it checks that
5644the grammar is not left-recursive, that there are no "insatiable" constructs of
5645the form:
5646
5647 rule: subrule(s) subrule
5648
5649and that there are no rules missing (i.e. referred to, but never defined).
5650
5651These checks are important during development, but can slow down parser
5652construction in stable code. So Parse::RecDescent provides the
5653E<lt>nocheckE<gt> directive to turn them off. The directive can only appear
5654before the first rule definition, and switches off checking throughout the rest
5655of the current grammar.
5656
5657Typically, this directive would be added when a parser has been thoroughly
5658tested and is ready for release.
5659
5660=back
5661
5662
5663=head2 Subrule argument lists
5664
5665It is occasionally useful to pass data to a subrule which is being invoked. For
5666example, consider the following grammar fragment:
5667
5668 classdecl: keyword decl
5669
5670 keyword: 'struct' | 'class';
5671
5672 decl: # WHATEVER
5673
5674The C<decl> rule might wish to know which of the two keywords was used
5675(since it may affect some aspect of the way the subsequent declaration
5676is interpreted). C<Parse::RecDescent> allows the grammar designer to
5677pass data into a rule, by placing that data in an I<argument list>
5678(that is, in square brackets) immediately after any subrule item in a
5679production. Hence, we could pass the keyword to C<decl> as follows:
5680
5681 classdecl: keyword decl[ $item[1] ]
5682
5683 keyword: 'struct' | 'class';
5684
5685 decl: # WHATEVER
5686
5687The argument list can consist of any number (including zero!) of comma-separated
5688Perl expressions. In other words, it looks exactly like a Perl anonymous
5689array reference. For example, we could pass the keyword, the name of the
5690surrounding rule, and the literal 'keyword' to C<decl> like so:
5691
5692 classdecl: keyword decl[$item[1],$item[0],'keyword']
5693
5694 keyword: 'struct' | 'class';
5695
5696 decl: # WHATEVER
5697
5698Within the rule to which the data is passed (C<decl> in the above examples)
5699that data is available as the elements of a local variable C<@arg>. Hence
5700C<decl> might report its intentions as follows:
5701
5702 classdecl: keyword decl[$item[1],$item[0],'keyword']
5703
5704 keyword: 'struct' | 'class';
5705
5706 decl: { print "Declaring $arg[0] (a $arg[2])\n";
5707 print "(this rule called by $arg[1])" }
5708
5709Subrule argument lists can also be interpreted as hashes, simply by using
5710the local variable C<%arg> instead of C<@arg>. Hence we could rewrite the
5711previous example:
5712
5713 classdecl: keyword decl[keyword => $item[1],
5714 caller => $item[0],
5715 type => 'keyword']
5716
5717 keyword: 'struct' | 'class';
5718
5719 decl: { print "Declaring $arg{keyword} (a $arg{type})\n";
5720 print "(this rule called by $arg{caller})" }
5721
5722Both C<@arg> and C<%arg> are always available, so the grammar designer may
5723choose whichever convention (or combination of conventions) suits best.
5724
5725Subrule argument lists are also useful for creating "rule templates"
5726(especially when used in conjunction with the C<E<lt>matchrule:...E<gt>>
5727directive). For example, the subrule:
5728
5729 list: <matchrule:$arg{rule}> /$arg{sep}/ list[%arg]
5730 { $return = [ $item[1], @{$item[3]} ] }
5731 | <matchrule:$arg{rule}>
5732 { $return = [ $item[1]] }
5733
5734is a handy template for the common problem of matching a separated list.
5735For example:
5736
5737 function: 'func' name '(' list[rule=>'param',sep=>';'] ')'
5738
5739 param: list[rule=>'name',sep=>','] ':' typename
5740
5741 name: /\w+/
5742
5743 typename: name
5744
5745
5746When a subrule argument list is used with a repeated subrule, the argument list
5747goes I<before> the repetition specifier:
5748
5749 list: /some|many/ thing[ $item[1] ](s)
5750
5751The argument list is "late bound". That is, it is re-evaluated for every
5752repetition of the repeated subrule.
5753This means that each repeated attempt to match the subrule may be
5754passed a completely different set of arguments if the value of the
5755expression in the argument list changes between attempts. So, for
5756example, the grammar:
5757
5758 { $::species = 'dogs' }
5759
5760 pair: 'two' animal[$::species](s)
5761
5762 animal: /$arg[0]/ { $::species = 'cats' }
5763
5764will match the string "two dogs cats cats" completely, whereas
5765it will only match the string "two dogs dogs dogs" up to the
5766eighth letter. If the value of the argument list were "early bound"
5767(that is, evaluated only the first time a repeated subrule match is
5768attempted), one would expect the matching behaviours to be reversed.
5769
5770Of course, it is possible to effectively "early bind" such argument lists
5771by passing them a value which does not change on each repetition. For example:
5772
5773 { $::species = 'dogs' }
5774
5775 pair: 'two' { $::species } animal[$item[2]](s)
5776
5777 animal: /$arg[0]/ { $::species = 'cats' }
5778
5779
5780Arguments can also be passed to the start rule, simply by appending them
5781to the argument list with which the start rule is called (I<after> the
5782"line number" parameter). For example, given:
5783
5784 $parser = new Parse::RecDescent ( $grammar );
5785
5786 $parser->data($text, 1, "str", 2, \@arr);
5787
5788 # ^^^^^ ^ ^^^^^^^^^^^^^^^
5789 # | | |
5790 # TEXT TO BE PARSED | |
5791 # STARTING LINE NUMBER |
5792 # ELEMENTS OF @arg WHICH IS PASSED TO RULE data
5793
5794then within the productions of the rule C<data>, the array C<@arg> will contain
5795C<("str", 2, \@arr)>.
5796
5797
5798=head2 Alternations
5799
5800Alternations are implicit (unnamed) rules defined as part of a production. An
5801alternation is defined as a series of '|'-separated productions inside a
5802pair of round brackets. For example:
5803
5804 character: 'the' ( good | bad | ugly ) /dude/
5805
5806Every alternation implicitly defines a new subrule, whose
5807automatically-generated name indicates its origin:
5808"_alternation_<I>_of_production_<P>_of_rule<R>" for the appropriate
5809values of <I>, <P>, and <R>. A call to this implicit subrule is then
5810inserted in place of the brackets. Hence the above example is merely a
5811convenient short-hand for:
5812
5813 character: 'the'
5814 _alternation_1_of_production_1_of_rule_character
5815 /dude/
5816
5817 _alternation_1_of_production_1_of_rule_character:
5818 good | bad | ugly
5819
5820Since alternations are parsed by recursively calling the parser generator,
5821any type(s) of item can appear in an alternation. For example:
5822
5823 character: 'the' ( 'high' "plains" # Silent, with poncho
5824 | /no[- ]name/ # Silent, no poncho
5825 | vengeance_seeking # Poncho-optional
5826 | <error>
5827 ) drifter
5828
5829In this case, if an error occurred, the automatically generated
5830message would be:
5831
5832 ERROR (line <N>): Invalid implicit subrule: Expected
5833 'high' or /no[- ]name/ or generic,
5834 but found "pacifist" instead
5835
5836Since every alternation actually has a name, it's even possible
5837to extend or replace them:
5838
5839 parser->Replace(
5840 "_alternation_1_of_production_1_of_rule_character:
5841 'generic Eastwood'"
5842 );
5843
5844More importantly, since alternations are a form of subrule, they can be given
5845repetition specifiers:
5846
5847 character: 'the' ( good | bad | ugly )(?) /dude/
5848
5849
5850=head2 Incremental Parsing
5851
5852C<Parse::RecDescent> provides two methods - C<Extend> and C<Replace> - which
5853can be used to alter the grammar matched by a parser. Both methods
5854take the same argument as C<Parse::RecDescent::new>, namely a
5855grammar specification string
5856
5857C<Parse::RecDescent::Extend> interprets the grammar specification and adds any
5858productions it finds to the end of the rules for which they are specified. For
5859example:
5860
5861 $add = "name: 'Jimmy-Bob' | 'Bobby-Jim'\ndesc: colour /necks?/";
5862 parser->Extend($add);
5863
5864adds two productions to the rule "name" (creating it if necessary) and one
5865production to the rule "desc".
5866
5867C<Parse::RecDescent::Replace> is identical, except that it first resets are
5868rule specified in the additional grammar, removing any existing productions.
5869Hence after:
5870
5871 $add = "name: 'Jimmy-Bob' | 'Bobby-Jim'\ndesc: colour /necks?/";
5872 parser->Replace($add);
5873
5874are are I<only> valid "name"s and the one possible description.
5875
5876A more interesting use of the C<Extend> and C<Replace> methods is to call them
5877inside the action of an executing parser. For example:
5878
5879 typedef: 'typedef' type_name identifier ';'
5880 { $thisparser->Extend("type_name: '$item[3]'") }
5881 | <error>
5882
5883 identifier: ...!type_name /[A-Za-z_]w*/
5884
5885which automatically prevents type names from being typedef'd, or:
5886
5887 command: 'map' key_name 'to' abort_key
5888 { $thisparser->Replace("abort_key: '$item[2]'") }
5889 | 'map' key_name 'to' key_name
5890 { map_key($item[2],$item[4]) }
5891 | abort_key
5892 { exit if confirm("abort?") }
5893
5894 abort_key: 'q'
5895
5896 key_name: ...!abort_key /[A-Za-z]/
5897
5898which allows the user to change the abort key binding, but not to unbind it.
5899
5900The careful use of such constructs makes it possible to reconfigure a
5901a running parser, eliminating the need for semantic feedback by
5902providing syntactic feedback instead. However, as currently implemented,
5903C<Replace()> and C<Extend()> have to regenerate and re-C<eval> the
5904entire parser whenever they are called. This makes them quite slow for
5905large grammars.
5906
5907In such cases, the judicious use of an interpolated regex is likely to
5908be far more efficient:
5909
5910 typedef: 'typedef' type_name/ identifier ';'
5911 { $thisparser->{local}{type_name} .= "|$item[3]" }
5912 | <error>
5913
5914 identifier: ...!type_name /[A-Za-z_]w*/
5915
5916 type_name: /$thisparser->{local}{type_name}/
5917
5918
5919=head2 Precompiling parsers
5920
5921Normally Parse::RecDescent builds a parser from a grammar at run-time.
5922That approach simplifies the design and implementation of parsing code,
5923but has the disadvantage that it slows the parsing process down - you
5924have to wait for Parse::RecDescent to build the parser every time the
5925program runs. Long or complex grammars can be particularly slow to
5926build, leading to unacceptable delays at start-up.
5927
5928To overcome this, the module provides a way of "pre-building" a parser
5929object and saving it in a separate module. That module can then be used
5930to create clones of the original parser.
5931
5932A grammar may be precompiled using the C<Precompile> class method.
5933For example, to precompile a grammar stored in the scalar $grammar,
5934and produce a class named PreGrammar in a module file named PreGrammar.pm,
5935you could use:
5936
5937 use Parse::RecDescent;
5938
5939 Parse::RecDescent->Precompile([$options_hashref], $grammar, "PreGrammar");
5940
5941The first required argument is the grammar string, the second is the
5942name of the class to be built. The name of the module file is
5943generated automatically by appending ".pm" to the last element of the
5944class name. Thus
5945
5946 Parse::RecDescent->Precompile($grammar, "My::New::Parser");
5947
5948would produce a module file named Parser.pm.
5949
5950An optional hash reference may be supplied as the first argument to
5951C<Precompile>. This argument is currently EXPERIMENTAL, and may change
5952in a future release of Parse::RecDescent. The only supported option
5953is currently C<-standalone>, see L</"Standalone Precompiled Parsers">.
5954
5955It is somewhat tedious to have to write a small Perl program just to
5956generate a precompiled grammar class, so Parse::RecDescent has some special
5957magic that allows you to do the job directly from the command-line.
5958
5959If your grammar is specified in a file named F<grammar>, you can generate
5960a class named Yet::Another::Grammar like so:
5961
5962 > perl -MParse::RecDescent - grammar Yet::Another::Grammar
5963
5964This would produce a file named F<Grammar.pm> containing the full
5965definition of a class called Yet::Another::Grammar. Of course, to use
5966that class, you would need to put the F<Grammar.pm> file in a
5967directory named F<Yet/Another>, somewhere in your Perl include path.
5968
5969Having created the new class, it's very easy to use it to build
5970a parser. You simply C<use> the new module, and then call its
5971C<new> method to create a parser object. For example:
5972
5973 use Yet::Another::Grammar;
5974 my $parser = Yet::Another::Grammar->new();
5975
5976The effect of these two lines is exactly the same as:
5977
5978 use Parse::RecDescent;
5979
5980 open GRAMMAR_FILE, "grammar" or die;
5981 local $/;
5982 my $grammar = <GRAMMAR_FILE>;
5983
5984 my $parser = Parse::RecDescent->new($grammar);
5985
5986only considerably faster.
5987
5988Note however that the parsers produced by either approach are exactly
5989the same, so whilst precompilation has an effect on I<set-up> speed,
5990it has no effect on I<parsing> speed. RecDescent 2.0 will address that
5991problem.
5992
5993=head3 Standalone Precompiled Parsers
5994
5995Until version 1.967003 of Parse::RecDescent, parser modules built with
5996C<Precompile> were dependent on Parse::RecDescent. Future
5997Parse::RecDescent releases with different internal implementations
5998would break pre-existing precompiled parsers.
5999
6000Version 1.967_005 added the ability for Parse::RecDescent to include
6001itself in the resulting .pm file if you pass the boolean option
6002C<-standalone> to C<Precompile>:
6003
6004 Parse::RecDescent->Precompile({ -standalone = 1, },
6005 $grammar, "My::New::Parser");
6006
6007Parse::RecDescent is included as Parse::RecDescent::_Runtime in order
6008to avoid conflicts between an installed version of Parse::RecDescent
6009and a precompiled, standalone parser made with another version of
6010Parse::RecDescent. This renaming is experimental, and is subject to
6011change in future versions.
6012
6013Precompiled parsers remain dependent on Parse::RecDescent by default,
6014as this feature is still considered experimental. In the future,
6015standalone parsers will become the default.
6016
6017=head1 GOTCHAS
6018
6019This section describes common mistakes that grammar writers seem to
6020make on a regular basis.
6021
6022=head2 1. Expecting an error to always invalidate a parse
6023
6024A common mistake when using error messages is to write the grammar like this:
6025
6026 file: line(s)
6027
6028 line: line_type_1
6029 | line_type_2
6030 | line_type_3
6031 | <error>
6032
6033The expectation seems to be that any line that is not of type 1, 2 or 3 will
6034invoke the C<E<lt>errorE<gt>> directive and thereby cause the parse to fail.
6035
6036Unfortunately, that only happens if the error occurs in the very first line.
6037The first rule states that a C<file> is matched by one or more lines, so if
6038even a single line succeeds, the first rule is completely satisfied and the
6039parse as a whole succeeds. That means that any error messages generated by
6040subsequent failures in the C<line> rule are quietly ignored.
6041
6042Typically what's really needed is this:
6043
6044 file: line(s) eofile { $return = $item[1] }
6045
6046 line: line_type_1
6047 | line_type_2
6048 | line_type_3
6049 | <error>
6050
6051 eofile: /^\Z/
6052
6053The addition of the C<eofile> subrule to the first production means that
6054a file only matches a series of successful C<line> matches I<that consume the
6055complete input text>. If any input text remains after the lines are matched,
6056there must have been an error in the last C<line>. In that case the C<eofile>
6057rule will fail, causing the entire C<file> rule to fail too.
6058
6059Note too that C<eofile> must match C</^\Z/> (end-of-text), I<not>
6060C</^\cZ/> or C</^\cD/> (end-of-file).
6061
6062And don't forget the action at the end of the production. If you just
6063write:
6064
6065 file: line(s) eofile
6066
6067then the value returned by the C<file> rule will be the value of its
6068last item: C<eofile>. Since C<eofile> always returns an empty string
6069on success, that will cause the C<file> rule to return that empty
6070string. Apart from returning the wrong value, returning an empty string
6071will trip up code such as:
6072
6073 $parser->file($filetext) || die;
6074
6075(since "" is false).
6076
6077Remember that Parse::RecDescent returns undef on failure,
6078so the only safe test for failure is:
6079
6080 defined($parser->file($filetext)) || die;
6081
6082
6083=head2 2. Using a C<return> in an action
6084
6085An action is like a C<do> block inside the subroutine implementing the
6086surrounding rule. So if you put a C<return> statement in an action:
6087
6088 range: '(' start '..' end )'
6089 { return $item{end} }
6090 /\s+/
6091
6092that subroutine will immediately return, without checking the rest of
6093the items in the current production (e.g. the C</\s+/>) and without
6094setting up the necessary data structures to tell the parser that the
6095rule has succeeded.
6096
6097The correct way to set a return value in an action is to set the C<$return>
6098variable:
6099
6100 range: '(' start '..' end )'
6101 { $return = $item{end} }
6102 /\s+/
6103
6104
6105=head2 2. Setting C<$Parse::RecDescent::skip> at parse time
6106
6107If you want to change the default skipping behaviour (see
6108L<Terminal Separators> and the C<E<lt>skip:...E<gt>> directive) by setting
6109C<$Parse::RecDescent::skip> you have to remember to set this variable
6110I<before> creating the grammar object.
6111
6112For example, you might want to skip all Perl-like comments with this
6113regular expression:
6114
6115 my $skip_spaces_and_comments = qr/
6116 (?mxs:
6117 \s+ # either spaces
6118 | \# .*?$ # or a dash and whatever up to the end of line
6119 )* # repeated at will (in whatever order)
6120 /;
6121
6122And then:
6123
6124 my $parser1 = Parse::RecDescent->new($grammar);
6125
6126 $Parse::RecDescent::skip = $skip_spaces_and_comments;
6127
6128 my $parser2 = Parse::RecDescent->new($grammar);
6129
6130 $parser1->parse($text); # this does not cope with comments
6131 $parser2->parse($text); # this skips comments correctly
6132
6133The two parsers behave differently, because any skipping behaviour
6134specified via C<$Parse::RecDescent::skip> is hard-coded when the
6135grammar object is built, not at parse time.
6136
6137
6138=head1 DIAGNOSTICS
6139
6140Diagnostics are intended to be self-explanatory (particularly if you
6141use B<-RD_HINT> (under B<perl -s>) or define C<$::RD_HINT> inside the program).
6142
6143C<Parse::RecDescent> currently diagnoses the following:
6144
6145=over 4
6146
6147=item *
6148
6149Invalid regular expressions used as pattern terminals (fatal error).
6150
6151=item *
6152
6153Invalid Perl code in code blocks (fatal error).
6154
6155=item *
6156
6157Lookahead used in the wrong place or in a nonsensical way (fatal error).
6158
6159=item *
6160
6161"Obvious" cases of left-recursion (fatal error).
6162
6163=item *
6164
6165Missing or extra components in a C<E<lt>leftopE<gt>> or C<E<lt>rightopE<gt>>
6166directive.
6167
6168=item *
6169
6170Unrecognisable components in the grammar specification (fatal error).
6171
6172=item *
6173
6174"Orphaned" rule components specified before the first rule (fatal error)
6175or after an C<E<lt>errorE<gt>> directive (level 3 warning).
6176
6177=item *
6178
6179Missing rule definitions (this only generates a level 3 warning, since you
6180may be providing them later via C<Parse::RecDescent::Extend()>).
6181
6182=item *
6183
6184Instances where greedy repetition behaviour will almost certainly
6185cause the failure of a production (a level 3 warning - see
6186L<"ON-GOING ISSUES AND FUTURE DIRECTIONS"> below).
6187
6188=item *
6189
6190Attempts to define rules named 'Replace' or 'Extend', which cannot be
6191called directly through the parser object because of the predefined
6192meaning of C<Parse::RecDescent::Replace> and
6193C<Parse::RecDescent::Extend>. (Only a level 2 warning is generated, since
6194such rules I<can> still be used as subrules).
6195
6196=item *
6197
6198Productions which consist of a single C<E<lt>error?E<gt>>
6199directive, and which therefore may succeed unexpectedly
6200(a level 2 warning, since this might conceivably be the desired effect).
6201
6202=item *
6203
6204Multiple consecutive lookahead specifiers (a level 1 warning only, since their
6205effects simply accumulate).
6206
6207=item *
6208
6209Productions which start with a C<E<lt>rejectE<gt>> or C<E<lt>rulevar:...E<gt>>
6210directive. Such productions are optimized away (a level 1 warning).
6211
6212=item *
6213
6214Rules which are autogenerated under C<$::AUTOSTUB> (a level 1 warning).
6215
6216=back
6217
6218=head1 AUTHOR
6219
6220Damian Conway (damian@conway.org)
6221Jeremy T. Braun (JTBRAUN@CPAN.org) [current maintainer]
6222
6223=head1 BUGS AND IRRITATIONS
6224
6225There are undoubtedly serious bugs lurking somewhere in this much code :-)
6226Bug reports, test cases and other feedback are most welcome.
6227
6228Ongoing annoyances include:
6229
6230=over 4
6231
6232=item *
6233
6234There's no support for parsing directly from an input stream.
6235If and when the Perl Gods give us regular expressions on streams,
6236this should be trivial (ahem!) to implement.
6237
6238=item *
6239
6240The parser generator can get confused if actions aren't properly
6241closed or if they contain particularly nasty Perl syntax errors
6242(especially unmatched curly brackets).
6243
6244=item *
6245
6246The generator only detects the most obvious form of left recursion
6247(potential recursion on the first subrule in a rule). More subtle
6248forms of left recursion (for example, through the second item in a
6249rule after a "zero" match of a preceding "zero-or-more" repetition,
6250or after a match of a subrule with an empty production) are not found.
6251
6252=item *
6253
6254Instead of complaining about left-recursion, the generator should
6255silently transform the grammar to remove it. Don't expect this
6256feature any time soon as it would require a more sophisticated
6257approach to parser generation than is currently used.
6258
6259=item *
6260
6261The generated parsers don't always run as fast as might be wished.
6262
6263=item *
6264
6265The meta-parser should be bootstrapped using C<Parse::RecDescent> :-)
6266
6267=back
6268
6269=head1 ON-GOING ISSUES AND FUTURE DIRECTIONS
6270
6271=over 4
6272
6273=item 1.
6274
6275Repetitions are "incorrigibly greedy" in that they will eat everything they can
6276and won't backtrack if that behaviour causes a production to fail needlessly.
6277So, for example:
6278
6279 rule: subrule(s) subrule
6280
6281will I<never> succeed, because the repetition will eat all the
6282subrules it finds, leaving none to match the second item. Such
6283constructions are relatively rare (and C<Parse::RecDescent::new> generates a
6284warning whenever they occur) so this may not be a problem, especially
6285since the insatiable behaviour can be overcome "manually" by writing:
6286
6287 rule: penultimate_subrule(s) subrule
6288
6289 penultimate_subrule: subrule ...subrule
6290
6291The issue is that this construction is exactly twice as expensive as the
6292original, whereas backtracking would add only 1/I<N> to the cost (for
6293matching I<N> repetitions of C<subrule>). I would welcome feedback on
6294the need for backtracking; particularly on cases where the lack of it
6295makes parsing performance problematical.
6296
6297=item 2.
6298
6299Having opened that can of worms, it's also necessary to consider whether there
6300is a need for non-greedy repetition specifiers. Again, it's possible (at some
6301cost) to manually provide the required functionality:
6302
6303 rule: nongreedy_subrule(s) othersubrule
6304
6305 nongreedy_subrule: subrule ...!othersubrule
6306
6307Overall, the issue is whether the benefit of this extra functionality
6308outweighs the drawbacks of further complicating the (currently
6309minimalist) grammar specification syntax, and (worse) introducing more overhead
6310into the generated parsers.
6311
6312=item 3.
6313
6314An C<E<lt>autocommitE<gt>> directive would be nice. That is, it would be useful to be
6315able to say:
6316
6317 command: <autocommit>
6318 command: 'find' name
6319 | 'find' address
6320 | 'do' command 'at' time 'if' condition
6321 | 'do' command 'at' time
6322 | 'do' command
6323 | unusual_command
6324
6325and have the generator work out that this should be "pruned" thus:
6326
6327 command: 'find' name
6328 | 'find' <commit> address
6329 | 'do' <commit> command <uncommit>
6330 'at' time
6331 'if' <commit> condition
6332 | 'do' <commit> command <uncommit>
6333 'at' <commit> time
6334 | 'do' <commit> command
6335 | unusual_command
6336
6337There are several issues here. Firstly, should the
6338C<E<lt>autocommitE<gt>> automatically install an C<E<lt>uncommitE<gt>>
6339at the start of the last production (on the grounds that the "command"
6340rule doesn't know whether an "unusual_command" might start with "find"
6341or "do") or should the "unusual_command" subgraph be analysed (to see
6342if it I<might> be viable after a "find" or "do")?
6343
6344The second issue is how regular expressions should be treated. The simplest
6345approach would be simply to uncommit before them (on the grounds that they
6346I<might> match). Better efficiency would be obtained by analyzing all preceding
6347literal tokens to determine whether the pattern would match them.
6348
6349Overall, the issues are: can such automated "pruning" approach a hand-tuned
6350version sufficiently closely to warrant the extra set-up expense, and (more
6351importantly) is the problem important enough to even warrant the non-trivial
6352effort of building an automated solution?
6353
6354=back
6355
6356=head1 SUPPORT
6357
6358=head2 Source Code Repository
6359
6360L<http://github.com/jtbraun/Parse-RecDescent>
6361
6362=head2 Mailing List
6363
6364Visit L<http://www.perlfoundation.org/perl5/index.cgi?parse_recdescent> to sign up for the mailing list.
6365
6366L<http://www.PerlMonks.org> is also a good place to ask
6367questions. Previous posts about Parse::RecDescent can typically be
6368found with this search:
6369L<http://perlmonks.org/index.pl?node=recdescent>.
6370
6371=head2 FAQ
6372
6373Visit L<Parse::RecDescent::FAQ> for answers to frequently (and not so
6374frequently) asked questions about Parse::RecDescent.
6375
6376=head2 View/Report Bugs
6377
6378To view the current bug list or report a new issue visit
6379L<https://rt.cpan.org/Public/Dist/Display.html?Name=Parse-RecDescent>.
6380
6381=head1 SEE ALSO
6382
6383L<Regexp::Grammars> provides Parse::RecDescent style parsing using native
6384Perl 5.10 regular expressions.
6385
6386
6387=head1 LICENCE AND COPYRIGHT
6388
6389Copyright (c) 1997-2007, Damian Conway C<< <DCONWAY@CPAN.org> >>. All rights
6390reserved.
6391
6392This module is free software; you can redistribute it and/or
6393modify it under the same terms as Perl itself. See L<perlartistic>.
6394
6395
6396=head1 DISCLAIMER OF WARRANTY
6397
6398BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
6399FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
6400OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
6401PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
6402EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
6403WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
6404ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
6405YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
6406NECESSARY SERVICING, REPAIR, OR CORRECTION.
6407
6408IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
6409WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
6410REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE
6411LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
6412OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
6413THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
6414RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
6415FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
6416SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
6417SUCH DAMAGES.