| # GENERATE RECURSIVE DESCENT PARSER OBJECTS FROM A GRAMMAR |
| |
| use 5.006; |
| use strict; |
| |
| package Parse::RecDescent; |
| |
| use Text::Balanced qw ( extract_codeblock extract_bracketed extract_quotelike extract_delimited ); |
| |
| use vars qw ( $skip ); |
| |
| *defskip = \ '\s*'; # DEFAULT SEPARATOR IS OPTIONAL WHITESPACE |
| $skip = '\s*'; # UNIVERSAL SEPARATOR IS OPTIONAL WHITESPACE |
| my $MAXREP = 100_000_000; # REPETITIONS MATCH AT MOST 100,000,000 TIMES |
| |
| |
| #ifndef RUNTIME |
| sub import # IMPLEMENT PRECOMPILER BEHAVIOUR UNDER: |
| # perl -MParse::RecDescent - <grammarfile> <classname> |
| { |
| local *_die = sub { print @_, "\n"; exit }; |
| |
| my ($package, $file, $line) = caller; |
| |
| if ($file eq '-' && $line == 0) |
| { |
| _die("Usage: perl -MLocalTest - <grammarfile> <classname>") |
| unless @ARGV == 2; |
| |
| my ($sourcefile, $class) = @ARGV; |
| |
| local *IN; |
| open IN, $sourcefile |
| or _die(qq{Can't open grammar file "$sourcefile"}); |
| local $/; # |
| my $grammar = <IN>; |
| close IN; |
| |
| Parse::RecDescent->Precompile($grammar, $class, $sourcefile); |
| exit; |
| } |
| } |
| |
| sub Save |
| { |
| my $self = shift; |
| my %opt; |
| if ('HASH' eq ref $_[0]) { |
| %opt = (%opt, %{$_[0]}); |
| shift; |
| } |
| my ($class) = @_; |
| $self->{saving} = 1; |
| $self->Precompile(undef,$class); |
| $self->{saving} = 0; |
| } |
| |
| sub Precompile |
| { |
| my $self = shift; |
| my %opt = ( -standalone => 0 ); |
| if ('HASH' eq ref $_[0]) { |
| %opt = (%opt, %{$_[0]}); |
| shift; |
| } |
| my ($grammar, $class, $sourcefile) = @_; |
| |
| $class =~ /^(\w+::)*\w+$/ or croak("Bad class name: $class"); |
| |
| my $modulefile = $class; |
| $modulefile =~ s/.*:://; |
| $modulefile .= ".pm"; |
| |
| my $runtime_package = 'Parse::RecDescent::_Runtime'; |
| my $code; |
| |
| local *OUT; |
| open OUT, ">", $modulefile |
| or croak("Can't write to new module file '$modulefile'"); |
| |
| print STDERR "precompiling grammar from file '$sourcefile'\n", |
| "to class $class in module file '$modulefile'\n" |
| if $grammar && $sourcefile; |
| |
| # Make the resulting pre-compiled parser stand-alone by |
| # including the contents of Parse::RecDescent as |
| # Parse::RecDescent::Runtime in the resulting precompiled |
| # parser. |
| if ($opt{-standalone}) { |
| local *IN; |
| open IN, '<', $Parse::RecDescent::_FILENAME |
| or croak("Can't open $Parse::RecDescent::_FILENAME for standalone pre-compilation: $!\n"); |
| my $exclude = 0; |
| print OUT "{\n"; |
| while (<IN>) { |
| if ($_ =~ /^\s*#\s*ifndef\s+RUNTIME\s*$/) { |
| ++$exclude; |
| } |
| if ($exclude) { |
| if ($_ =~ /^\s*#\s*endif\s$/) { |
| --$exclude; |
| } |
| } else { |
| if ($_ =~ m/^__END__/) { |
| last; |
| } |
| s/Parse::RecDescent/$runtime_package/gs; |
| print OUT $_; |
| } |
| } |
| close IN; |
| print OUT "}\n"; |
| } |
| |
| $self = Parse::RecDescent->new($grammar, # $grammar |
| 1, # $compiling |
| $class # $namespace |
| ) |
| || croak("Can't compile bad grammar") |
| if $grammar; |
| |
| $self->{_precompiled} = 1; |
| |
| foreach ( keys %{$self->{rules}} ) { |
| $self->{rules}{$_}{changed} = 1; |
| } |
| |
| |
| print OUT "package $class;\n"; |
| if (not $opt{-standalone}) { |
| print OUT "use Parse::RecDescent;\n"; |
| } |
| |
| print OUT "{ my \$ERRORS;\n\n"; |
| |
| $code = $self->_code(); |
| if ($opt{-standalone}) { |
| $code =~ s/Parse::RecDescent/$runtime_package/gs; |
| } |
| print OUT $code; |
| |
| print OUT "}\npackage $class; sub new { "; |
| print OUT "my "; |
| |
| require Data::Dumper; |
| $code = Data::Dumper->Dump([$self], [qw(self)]); |
| if ($opt{-standalone}) { |
| $code =~ s/Parse::RecDescent/$runtime_package/gs; |
| } |
| print OUT $code; |
| |
| print OUT "}"; |
| |
| close OUT |
| or croak("Can't write to new module file '$modulefile'"); |
| } |
| #endif |
| |
| package Parse::RecDescent::LineCounter; |
| |
| |
| sub TIESCALAR # ($classname, \$text, $thisparser, $prevflag) |
| { |
| bless { |
| text => $_[1], |
| parser => $_[2], |
| prev => $_[3]?1:0, |
| }, $_[0]; |
| } |
| |
| sub FETCH |
| { |
| my $parser = $_[0]->{parser}; |
| my $cache = $parser->{linecounter_cache}; |
| my $from = $parser->{fulltextlen}-length(${$_[0]->{text}})-$_[0]->{prev} |
| ; |
| |
| unless (exists $cache->{$from}) |
| { |
| $parser->{lastlinenum} = $parser->{offsetlinenum} |
| - Parse::RecDescent::_linecount(substr($parser->{fulltext},$from)) |
| + 1; |
| $cache->{$from} = $parser->{lastlinenum}; |
| } |
| return $cache->{$from}; |
| } |
| |
| sub STORE |
| { |
| my $parser = $_[0]->{parser}; |
| $parser->{offsetlinenum} -= $parser->{lastlinenum} - $_[1]; |
| return undef; |
| } |
| |
| sub resync # ($linecounter) |
| { |
| my $self = tied($_[0]); |
| die "Tried to alter something other than a LineCounter\n" |
| unless $self =~ /Parse::RecDescent::LineCounter/; |
| |
| my $parser = $self->{parser}; |
| my $apparently = $parser->{offsetlinenum} |
| - Parse::RecDescent::_linecount(${$self->{text}}) |
| + 1; |
| |
| $parser->{offsetlinenum} += $parser->{lastlinenum} - $apparently; |
| return 1; |
| } |
| |
| package Parse::RecDescent::ColCounter; |
| |
| sub TIESCALAR # ($classname, \$text, $thisparser, $prevflag) |
| { |
| bless { |
| text => $_[1], |
| parser => $_[2], |
| prev => $_[3]?1:0, |
| }, $_[0]; |
| } |
| |
| sub FETCH |
| { |
| my $parser = $_[0]->{parser}; |
| my $missing = $parser->{fulltextlen}-length(${$_[0]->{text}})-$_[0]->{prev}+1; |
| substr($parser->{fulltext},0,$missing) =~ m/^(.*)\Z/m; |
| return length($1); |
| } |
| |
| sub STORE |
| { |
| die "Can't set column number via \$thiscolumn\n"; |
| } |
| |
| |
| package Parse::RecDescent::OffsetCounter; |
| |
| sub TIESCALAR # ($classname, \$text, $thisparser, $prev) |
| { |
| bless { |
| text => $_[1], |
| parser => $_[2], |
| prev => $_[3]?-1:0, |
| }, $_[0]; |
| } |
| |
| sub FETCH |
| { |
| my $parser = $_[0]->{parser}; |
| return $parser->{fulltextlen}-length(${$_[0]->{text}})+$_[0]->{prev}; |
| } |
| |
| sub STORE |
| { |
| die "Can't set current offset via \$thisoffset or \$prevoffset\n"; |
| } |
| |
| |
| |
| package Parse::RecDescent::Rule; |
| |
| sub new ($$$$$) |
| { |
| my $class = ref($_[0]) || $_[0]; |
| my $name = $_[1]; |
| my $owner = $_[2]; |
| my $line = $_[3]; |
| my $replace = $_[4]; |
| |
| if (defined $owner->{"rules"}{$name}) |
| { |
| my $self = $owner->{"rules"}{$name}; |
| if ($replace && !$self->{"changed"}) |
| { |
| $self->reset; |
| } |
| return $self; |
| } |
| else |
| { |
| return $owner->{"rules"}{$name} = |
| bless |
| { |
| "name" => $name, |
| "prods" => [], |
| "calls" => [], |
| "changed" => 0, |
| "line" => $line, |
| "impcount" => 0, |
| "opcount" => 0, |
| "vars" => "", |
| }, $class; |
| } |
| } |
| |
| sub reset($) |
| { |
| @{$_[0]->{"prods"}} = (); |
| @{$_[0]->{"calls"}} = (); |
| $_[0]->{"changed"} = 0; |
| $_[0]->{"impcount"} = 0; |
| $_[0]->{"opcount"} = 0; |
| $_[0]->{"vars"} = ""; |
| } |
| |
| sub DESTROY {} |
| |
| sub hasleftmost($$) |
| { |
| my ($self, $ref) = @_; |
| |
| my $prod; |
| foreach $prod ( @{$self->{"prods"}} ) |
| { |
| return 1 if $prod->hasleftmost($ref); |
| } |
| |
| return 0; |
| } |
| |
| sub leftmostsubrules($) |
| { |
| my $self = shift; |
| my @subrules = (); |
| |
| my $prod; |
| foreach $prod ( @{$self->{"prods"}} ) |
| { |
| push @subrules, $prod->leftmostsubrule(); |
| } |
| |
| return @subrules; |
| } |
| |
| sub expected($) |
| { |
| my $self = shift; |
| my @expected = (); |
| |
| my $prod; |
| foreach $prod ( @{$self->{"prods"}} ) |
| { |
| my $next = $prod->expected(); |
| unless (! $next or _contains($next,@expected) ) |
| { |
| push @expected, $next; |
| } |
| } |
| |
| return join ', or ', @expected; |
| } |
| |
| sub _contains($@) |
| { |
| my $target = shift; |
| my $item; |
| foreach $item ( @_ ) { return 1 if $target eq $item; } |
| return 0; |
| } |
| |
| sub addcall($$) |
| { |
| my ( $self, $subrule ) = @_; |
| unless ( _contains($subrule, @{$self->{"calls"}}) ) |
| { |
| push @{$self->{"calls"}}, $subrule; |
| } |
| } |
| |
| sub addprod($$) |
| { |
| my ( $self, $prod ) = @_; |
| push @{$self->{"prods"}}, $prod; |
| $self->{"changed"} = 1; |
| $self->{"impcount"} = 0; |
| $self->{"opcount"} = 0; |
| $prod->{"number"} = $#{$self->{"prods"}}; |
| return $prod; |
| } |
| |
| sub addvar |
| { |
| my ( $self, $var, $parser ) = @_; |
| if ($var =~ /\A\s*local\s+([%@\$]\w+)/) |
| { |
| $parser->{localvars} .= " $1"; |
| $self->{"vars"} .= "$var;\n" } |
| else |
| { $self->{"vars"} .= "my $var;\n" } |
| $self->{"changed"} = 1; |
| return 1; |
| } |
| |
| sub addautoscore |
| { |
| my ( $self, $code ) = @_; |
| $self->{"autoscore"} = $code; |
| $self->{"changed"} = 1; |
| return 1; |
| } |
| |
| sub nextoperator($) |
| { |
| my $self = shift; |
| my $prodcount = scalar @{$self->{"prods"}}; |
| my $opcount = ++$self->{"opcount"}; |
| return "_operator_${opcount}_of_production_${prodcount}_of_rule_$self->{name}"; |
| } |
| |
| sub nextimplicit($) |
| { |
| my $self = shift; |
| my $prodcount = scalar @{$self->{"prods"}}; |
| my $impcount = ++$self->{"impcount"}; |
| return "_alternation_${impcount}_of_production_${prodcount}_of_rule_$self->{name}"; |
| } |
| |
| |
| sub code |
| { |
| my ($self, $namespace, $parser, $check) = @_; |
| |
| eval 'undef &' . $namespace . '::' . $self->{"name"} unless $parser->{saving}; |
| |
| my $code = |
| ' |
| # ARGS ARE: ($parser, $text; $repeating, $_noactions, $_itempos, \@args) |
| sub ' . $namespace . '::' . $self->{"name"} . ' |
| { |
| my $thisparser = $_[0]; |
| use vars q{$tracelevel}; |
| local $tracelevel = ($tracelevel||0)+1; |
| $ERRORS = 0; |
| my $thisrule = $thisparser->{"rules"}{"' . $self->{"name"} . '"}; |
| |
| Parse::RecDescent::_trace(q{Trying rule: [' . $self->{"name"} . ']}, |
| Parse::RecDescent::_tracefirst($_[1]), |
| q{' . $self->{"name"} . '}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| |
| ' . ($parser->{deferrable} |
| ? 'my $def_at = @{$thisparser->{deferred}};' |
| : '') . |
| ' |
| my $err_at = @{$thisparser->{errors}}; |
| |
| my $score; |
| my $score_return; |
| my $_tok; |
| my $return = undef; |
| my $_matched=0; |
| my $commit=0; |
| my @item = (); |
| my %item = (); |
| my $repeating = $_[2]; |
| my $_noactions = $_[3]; |
| my $_itempos = $_[4]; |
| my @arg = defined $_[5] ? @{ &{$_[5]} } : (); |
| my %arg = ($#arg & 01) ? @arg : (@arg, undef); |
| my $text; |
| my $lastsep; |
| my $current_match; |
| my $expectation = new Parse::RecDescent::Expectation(q{' . $self->expected() . '}); |
| $expectation->at($_[1]); |
| '. ($parser->{_check}{thisoffset}?' |
| my $thisoffset; |
| tie $thisoffset, q{Parse::RecDescent::OffsetCounter}, \$text, $thisparser; |
| ':'') . ($parser->{_check}{prevoffset}?' |
| my $prevoffset; |
| tie $prevoffset, q{Parse::RecDescent::OffsetCounter}, \$text, $thisparser, 1; |
| ':'') . ($parser->{_check}{thiscolumn}?' |
| my $thiscolumn; |
| tie $thiscolumn, q{Parse::RecDescent::ColCounter}, \$text, $thisparser; |
| ':'') . ($parser->{_check}{prevcolumn}?' |
| my $prevcolumn; |
| tie $prevcolumn, q{Parse::RecDescent::ColCounter}, \$text, $thisparser, 1; |
| ':'') . ($parser->{_check}{prevline}?' |
| my $prevline; |
| tie $prevline, q{Parse::RecDescent::LineCounter}, \$text, $thisparser, 1; |
| ':'') . ' |
| my $thisline; |
| tie $thisline, q{Parse::RecDescent::LineCounter}, \$text, $thisparser; |
| |
| '. $self->{vars} .' |
| '; |
| |
| my $prod; |
| foreach $prod ( @{$self->{"prods"}} ) |
| { |
| $prod->addscore($self->{autoscore},0,0) if $self->{autoscore}; |
| next unless $prod->checkleftmost(); |
| $code .= $prod->code($namespace,$self,$parser); |
| |
| $code .= $parser->{deferrable} |
| ? ' splice |
| @{$thisparser->{deferred}}, $def_at unless $_matched; |
| ' |
| : ''; |
| } |
| |
| $code .= |
| ' |
| unless ( $_matched || defined($score) ) |
| { |
| ' .($parser->{deferrable} |
| ? ' splice @{$thisparser->{deferred}}, $def_at; |
| ' |
| : '') . ' |
| |
| $_[1] = $text; # NOT SURE THIS IS NEEDED |
| Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' rule>>}, |
| Parse::RecDescent::_tracefirst($_[1]), |
| q{' . $self->{"name"} .'}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| return undef; |
| } |
| if (!defined($return) && defined($score)) |
| { |
| Parse::RecDescent::_trace(q{>>Accepted scored production<<}, "", |
| q{' . $self->{"name"} .'}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| $return = $score_return; |
| } |
| splice @{$thisparser->{errors}}, $err_at; |
| $return = $item[$#item] unless defined $return; |
| if (defined $::RD_TRACE) |
| { |
| Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' rule<< (return value: [} . |
| $return . q{])}, "", |
| q{' . $self->{"name"} .'}, |
| $tracelevel); |
| Parse::RecDescent::_trace(q{(consumed: [} . |
| Parse::RecDescent::_tracemax(substr($_[1],0,-length($text))) . q{])}, |
| Parse::RecDescent::_tracefirst($text), |
| , q{' . $self->{"name"} .'}, |
| $tracelevel) |
| } |
| $_[1] = $text; |
| return $return; |
| } |
| '; |
| |
| return $code; |
| } |
| |
| my @left; |
| sub isleftrec($$) |
| { |
| my ($self, $rules) = @_; |
| my $root = $self->{"name"}; |
| @left = $self->leftmostsubrules(); |
| my $next; |
| foreach $next ( @left ) |
| { |
| next unless defined $rules->{$next}; # SKIP NON-EXISTENT RULES |
| return 1 if $next eq $root; |
| my $child; |
| foreach $child ( $rules->{$next}->leftmostsubrules() ) |
| { |
| push(@left, $child) |
| if ! _contains($child, @left) ; |
| } |
| } |
| return 0; |
| } |
| |
| package Parse::RecDescent::Production; |
| |
| sub describe ($;$) |
| { |
| return join ' ', map { $_->describe($_[1]) or () } @{$_[0]->{items}}; |
| } |
| |
| sub new ($$;$$) |
| { |
| my ($self, $line, $uncommit, $error) = @_; |
| my $class = ref($self) || $self; |
| |
| bless |
| { |
| "items" => [], |
| "uncommit" => $uncommit, |
| "error" => $error, |
| "line" => $line, |
| strcount => 0, |
| patcount => 0, |
| dircount => 0, |
| actcount => 0, |
| }, $class; |
| } |
| |
| sub expected ($) |
| { |
| my $itemcount = scalar @{$_[0]->{"items"}}; |
| return ($itemcount) ? $_[0]->{"items"}[0]->describe(1) : ''; |
| } |
| |
| sub hasleftmost ($$) |
| { |
| my ($self, $ref) = @_; |
| return ${$self->{"items"}}[0] eq $ref if scalar @{$self->{"items"}}; |
| return 0; |
| } |
| |
| sub isempty($) |
| { |
| my $self = shift; |
| return 0 == @{$self->{"items"}}; |
| } |
| |
| sub leftmostsubrule($) |
| { |
| my $self = shift; |
| |
| if ( $#{$self->{"items"}} >= 0 ) |
| { |
| my $subrule = $self->{"items"}[0]->issubrule(); |
| return $subrule if defined $subrule; |
| } |
| |
| return (); |
| } |
| |
| sub checkleftmost($) |
| { |
| my @items = @{$_[0]->{"items"}}; |
| if (@items==1 && ref($items[0]) =~ /\AParse::RecDescent::Error/ |
| && $items[0]->{commitonly} ) |
| { |
| Parse::RecDescent::_warn(2,"Lone <error?> in production treated |
| as <error?> <reject>"); |
| Parse::RecDescent::_hint("A production consisting of a single |
| conditional <error?> directive would |
| normally succeed (with the value zero) if the |
| rule is not 'commited' when it is |
| tried. Since you almost certainly wanted |
| '<error?> <reject>' Parse::RecDescent |
| supplied it for you."); |
| push @{$_[0]->{items}}, |
| Parse::RecDescent::UncondReject->new(0,0,'<reject>'); |
| } |
| elsif (@items==1 && ($items[0]->describe||"") =~ /<rulevar|<autoscore/) |
| { |
| # Do nothing |
| } |
| elsif (@items && |
| ( ref($items[0]) =~ /\AParse::RecDescent::UncondReject/ |
| || ($items[0]->describe||"") =~ /<autoscore/ |
| )) |
| { |
| Parse::RecDescent::_warn(1,"Optimizing away production: [". $_[0]->describe ."]"); |
| my $what = $items[0]->describe =~ /<rulevar/ |
| ? "a <rulevar> (which acts like an unconditional <reject> during parsing)" |
| : $items[0]->describe =~ /<autoscore/ |
| ? "an <autoscore> (which acts like an unconditional <reject> during parsing)" |
| : "an unconditional <reject>"; |
| my $caveat = $items[0]->describe =~ /<rulevar/ |
| ? " after the specified variable was set up" |
| : ""; |
| my $advice = @items > 1 |
| ? "However, there were also other (useless) items after the leading " |
| . $items[0]->describe |
| . ", so you may have been expecting some other behaviour." |
| : "You can safely ignore this message."; |
| Parse::RecDescent::_hint("The production starts with $what. That means that the |
| production can never successfully match, so it was |
| optimized out of the final parser$caveat. $advice"); |
| return 0; |
| } |
| return 1; |
| } |
| |
| sub changesskip($) |
| { |
| my $item; |
| foreach $item (@{$_[0]->{"items"}}) |
| { |
| if (ref($item) =~ /Parse::RecDescent::(Action|Directive)/) |
| { |
| return 1 if $item->{code} =~ /\$skip\s*=/; |
| } |
| } |
| return 0; |
| } |
| |
| sub adddirective |
| { |
| my ( $self, $whichop, $line, $name ) = @_; |
| push @{$self->{op}}, |
| { type=>$whichop, line=>$line, name=>$name, |
| offset=> scalar(@{$self->{items}}) }; |
| } |
| |
| sub addscore |
| { |
| my ( $self, $code, $lookahead, $line ) = @_; |
| $self->additem(Parse::RecDescent::Directive->new( |
| "local \$^W; |
| my \$thisscore = do { $code } + 0; |
| if (!defined(\$score) || \$thisscore>\$score) |
| { \$score=\$thisscore; \$score_return=\$item[-1]; } |
| undef;", $lookahead, $line,"<score: $code>") ) |
| unless $self->{items}[-1]->describe =~ /<score/; |
| return 1; |
| } |
| |
| sub check_pending |
| { |
| my ( $self, $line ) = @_; |
| if ($self->{op}) |
| { |
| while (my $next = pop @{$self->{op}}) |
| { |
| Parse::RecDescent::_error("Incomplete <$next->{type}op:...>.", $line); |
| Parse::RecDescent::_hint( |
| "The current production ended without completing the |
| <$next->{type}op:...> directive that started near line |
| $next->{line}. Did you forget the closing '>'?"); |
| } |
| } |
| return 1; |
| } |
| |
| sub enddirective |
| { |
| my ( $self, $line, $minrep, $maxrep ) = @_; |
| unless ($self->{op}) |
| { |
| Parse::RecDescent::_error("Unmatched > found.", $line); |
| Parse::RecDescent::_hint( |
| "A '>' angle bracket was encountered, which typically |
| indicates the end of a directive. However no suitable |
| preceding directive was encountered. Typically this |
| indicates either a extra '>' in the grammar, or a |
| problem inside the previous directive."); |
| return; |
| } |
| my $op = pop @{$self->{op}}; |
| my $span = @{$self->{items}} - $op->{offset}; |
| if ($op->{type} =~ /left|right/) |
| { |
| if ($span != 3) |
| { |
| Parse::RecDescent::_error( |
| "Incorrect <$op->{type}op:...> specification: |
| expected 3 args, but found $span instead", $line); |
| Parse::RecDescent::_hint( |
| "The <$op->{type}op:...> directive requires a |
| sequence of exactly three elements. For example: |
| <$op->{type}op:leftarg /op/ rightarg>"); |
| } |
| else |
| { |
| push @{$self->{items}}, |
| Parse::RecDescent::Operator->new( |
| $op->{type}, $minrep, $maxrep, splice(@{$self->{"items"}}, -3)); |
| $self->{items}[-1]->sethashname($self); |
| $self->{items}[-1]{name} = $op->{name}; |
| } |
| } |
| } |
| |
| sub prevwasreturn |
| { |
| my ( $self, $line ) = @_; |
| unless (@{$self->{items}}) |
| { |
| Parse::RecDescent::_error( |
| "Incorrect <return:...> specification: |
| expected item missing", $line); |
| Parse::RecDescent::_hint( |
| "The <return:...> directive requires a |
| sequence of at least one item. For example: |
| <return: list>"); |
| return; |
| } |
| push @{$self->{items}}, |
| Parse::RecDescent::Result->new(); |
| } |
| |
| sub additem |
| { |
| my ( $self, $item ) = @_; |
| $item->sethashname($self); |
| push @{$self->{"items"}}, $item; |
| return $item; |
| } |
| |
| sub _duplicate_itempos |
| { |
| my ($src) = @_; |
| my $dst = {}; |
| |
| foreach (keys %$src) |
| { |
| %{$dst->{$_}} = %{$src->{$_}}; |
| } |
| $dst; |
| } |
| |
| sub _update_itempos |
| { |
| my ($dst, $src, $typekeys, $poskeys) = @_; |
| |
| my @typekeys = 'ARRAY' eq ref $typekeys ? |
| @$typekeys : |
| keys %$src; |
| |
| foreach my $k (keys %$src) |
| { |
| if ('ARRAY' eq ref $poskeys) |
| { |
| @{$dst->{$k}}{@$poskeys} = @{$src->{$k}}{@$poskeys}; |
| } |
| else |
| { |
| %{$dst->{$k}} = %{$src->{$k}}; |
| } |
| } |
| } |
| |
| sub preitempos |
| { |
| return q |
| { |
| push @itempos, {'offset' => {'from'=>$thisoffset, 'to'=>undef}, |
| 'line' => {'from'=>$thisline, 'to'=>undef}, |
| 'column' => {'from'=>$thiscolumn, 'to'=>undef} }; |
| } |
| } |
| |
| sub incitempos |
| { |
| return q |
| { |
| $itempos[$#itempos]{'offset'}{'from'} += length($lastsep); |
| $itempos[$#itempos]{'line'}{'from'} = $thisline; |
| $itempos[$#itempos]{'column'}{'from'} = $thiscolumn; |
| } |
| } |
| |
| sub unincitempos |
| { |
| # the next incitempos will properly set these two fields, but |
| # {'offset'}{'from'} needs to be decreased by length($lastsep) |
| # $itempos[$#itempos]{'line'}{'from'} |
| # $itempos[$#itempos]{'column'}{'from'} |
| return q |
| { |
| $itempos[$#itempos]{'offset'}{'from'} -= length($lastsep) if defined $lastsep; |
| } |
| } |
| |
| sub postitempos |
| { |
| return q |
| { |
| $itempos[$#itempos]{'offset'}{'to'} = $prevoffset; |
| $itempos[$#itempos]{'line'}{'to'} = $prevline; |
| $itempos[$#itempos]{'column'}{'to'} = $prevcolumn; |
| } |
| } |
| |
| sub code($$$$) |
| { |
| my ($self,$namespace,$rule,$parser) = @_; |
| my $code = |
| ' |
| while (!$_matched' |
| . (defined $self->{"uncommit"} ? '' : ' && !$commit') |
| . ') |
| { |
| ' . |
| ($self->changesskip() |
| ? 'local $skip = defined($skip) ? $skip : $Parse::RecDescent::skip;' |
| : '') .' |
| Parse::RecDescent::_trace(q{Trying production: [' |
| . $self->describe . ']}, |
| Parse::RecDescent::_tracefirst($_[1]), |
| q{' . $rule ->{name}. '}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| my $thisprod = $thisrule->{"prods"}[' . $self->{"number"} . ']; |
| ' . (defined $self->{"error"} ? '' : '$text = $_[1];' ) . ' |
| my $_savetext; |
| @item = (q{' . $rule->{"name"} . '}); |
| %item = (__RULE__ => q{' . $rule->{"name"} . '}); |
| my $repcount = 0; |
| |
| '; |
| $code .= |
| ' my @itempos = ({}); |
| ' if $parser->{_check}{itempos}; |
| |
| my $item; |
| my $i; |
| |
| for ($i = 0; $i < @{$self->{"items"}}; $i++) |
| { |
| $item = ${$self->{items}}[$i]; |
| |
| $code .= preitempos() if $parser->{_check}{itempos}; |
| |
| $code .= $item->code($namespace,$rule,$parser->{_check}); |
| |
| $code .= postitempos() if $parser->{_check}{itempos}; |
| |
| } |
| |
| if ($parser->{_AUTOACTION} && defined($item) && !$item->isa("Parse::RecDescent::Action")) |
| { |
| $code .= $parser->{_AUTOACTION}->code($namespace,$rule); |
| Parse::RecDescent::_warn(1,"Autogenerating action in rule |
| \"$rule->{name}\": |
| $parser->{_AUTOACTION}{code}") |
| and |
| Parse::RecDescent::_hint("The \$::RD_AUTOACTION was defined, |
| so any production not ending in an |
| explicit action has the specified |
| \"auto-action\" automatically |
| appended."); |
| } |
| elsif ($parser->{_AUTOTREE} && defined($item) && !$item->isa("Parse::RecDescent::Action")) |
| { |
| if ($i==1 && $item->isterminal) |
| { |
| $code .= $parser->{_AUTOTREE}{TERMINAL}->code($namespace,$rule); |
| } |
| else |
| { |
| $code .= $parser->{_AUTOTREE}{NODE}->code($namespace,$rule); |
| } |
| Parse::RecDescent::_warn(1,"Autogenerating tree-building action in rule |
| \"$rule->{name}\"") |
| and |
| Parse::RecDescent::_hint("The directive <autotree> was specified, |
| so any production not ending |
| in an explicit action has |
| some parse-tree building code |
| automatically appended."); |
| } |
| |
| $code .= |
| ' |
| Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' production: [' |
| . $self->describe . ']<<}, |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{name} . '}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| |
| ' . ( $parser->{_check}{itempos} ? ' |
| if ( defined($_itempos) ) |
| { |
| Parse::RecDescent::Production::_update_itempos($_itempos, $itempos[ 1], undef, [qw(from)]); |
| Parse::RecDescent::Production::_update_itempos($_itempos, $itempos[-1], undef, [qw(to)]); |
| } |
| ' : '' ) . ' |
| |
| $_matched = 1; |
| last; |
| } |
| |
| '; |
| return $code; |
| } |
| |
| 1; |
| |
| package Parse::RecDescent::Action; |
| |
| sub describe { undef } |
| |
| sub sethashname { $_[0]->{hashname} = '__ACTION' . ++$_[1]->{actcount} .'__'; } |
| |
| sub new |
| { |
| my $class = ref($_[0]) || $_[0]; |
| bless |
| { |
| "code" => $_[1], |
| "lookahead" => $_[2], |
| "line" => $_[3], |
| }, $class; |
| } |
| |
| sub issubrule { undef } |
| sub isterminal { 0 } |
| |
| sub code($$$$) |
| { |
| my ($self, $namespace, $rule) = @_; |
| |
| ' |
| Parse::RecDescent::_trace(q{Trying action}, |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{name} . '}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' |
| |
| $_tok = ($_noactions) ? 0 : do ' . $self->{"code"} . '; |
| ' . ($self->{"lookahead"}<0?'if':'unless') . ' (defined $_tok) |
| { |
| Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' action>> (return value: [undef])}) |
| if defined $::RD_TRACE; |
| last; |
| } |
| Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' action<< (return value: [} |
| . $_tok . q{])}, |
| Parse::RecDescent::_tracefirst($text)) |
| if defined $::RD_TRACE; |
| push @item, $_tok; |
| ' . ($self->{line}>=0 ? '$item{'. $self->{hashname} .'}=$_tok;' : '' ) .' |
| ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| ' |
| } |
| |
| |
| 1; |
| |
| package Parse::RecDescent::Directive; |
| |
| sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } |
| |
| sub issubrule { undef } |
| sub isterminal { 0 } |
| sub describe { $_[1] ? '' : $_[0]->{name} } |
| |
| sub new ($$$$$) |
| { |
| my $class = ref($_[0]) || $_[0]; |
| bless |
| { |
| "code" => $_[1], |
| "lookahead" => $_[2], |
| "line" => $_[3], |
| "name" => $_[4], |
| }, $class; |
| } |
| |
| sub code($$$$) |
| { |
| my ($self, $namespace, $rule) = @_; |
| |
| ' |
| ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' |
| |
| Parse::RecDescent::_trace(q{Trying directive: [' |
| . $self->describe . ']}, |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{name} . '}, |
| $tracelevel) |
| if defined $::RD_TRACE; ' .' |
| $_tok = do { ' . $self->{"code"} . ' }; |
| if (defined($_tok)) |
| { |
| Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' directive<< (return value: [} |
| . $_tok . q{])}, |
| Parse::RecDescent::_tracefirst($text)) |
| if defined $::RD_TRACE; |
| } |
| else |
| { |
| Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' directive>>}, |
| Parse::RecDescent::_tracefirst($text)) |
| if defined $::RD_TRACE; |
| } |
| ' . ($self->{"lookahead"} ? '$text = $_savetext and ' : '' ) .' |
| last ' |
| . ($self->{"lookahead"}<0?'if':'unless') . ' defined $_tok; |
| push @item, $item{'.$self->{hashname}.'}=$_tok; |
| ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| ' |
| } |
| |
| 1; |
| |
| package Parse::RecDescent::UncondReject; |
| |
| sub issubrule { undef } |
| sub isterminal { 0 } |
| sub describe { $_[1] ? '' : $_[0]->{name} } |
| sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } |
| |
| sub new ($$$;$) |
| { |
| my $class = ref($_[0]) || $_[0]; |
| bless |
| { |
| "lookahead" => $_[1], |
| "line" => $_[2], |
| "name" => $_[3], |
| }, $class; |
| } |
| |
| # MARK, YOU MAY WANT TO OPTIMIZE THIS. |
| |
| |
| sub code($$$$) |
| { |
| my ($self, $namespace, $rule) = @_; |
| |
| ' |
| Parse::RecDescent::_trace(q{>>Rejecting production<< (found ' |
| . $self->describe . ')}, |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{name} . '}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| undef $return; |
| ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' |
| |
| $_tok = undef; |
| ' . ($self->{"lookahead"} ? '$text = $_savetext and ' : '' ) .' |
| last ' |
| . ($self->{"lookahead"}<0?'if':'unless') . ' defined $_tok; |
| ' |
| } |
| |
| 1; |
| |
| package Parse::RecDescent::Error; |
| |
| sub issubrule { undef } |
| sub isterminal { 0 } |
| sub describe { $_[1] ? '' : $_[0]->{commitonly} ? '<error?:...>' : '<error...>' } |
| sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } |
| |
| sub new ($$$$$) |
| { |
| my $class = ref($_[0]) || $_[0]; |
| bless |
| { |
| "msg" => $_[1], |
| "lookahead" => $_[2], |
| "commitonly" => $_[3], |
| "line" => $_[4], |
| }, $class; |
| } |
| |
| sub code($$$$) |
| { |
| my ($self, $namespace, $rule) = @_; |
| |
| my $action = ''; |
| |
| if ($self->{"msg"}) # ERROR MESSAGE SUPPLIED |
| { |
| #WAS: $action .= "Parse::RecDescent::_error(qq{$self->{msg}}" . ',$thisline);'; |
| $action .= 'push @{$thisparser->{errors}}, [qq{'.$self->{msg}.'},$thisline];'; |
| |
| } |
| else # GENERATE ERROR MESSAGE DURING PARSE |
| { |
| $action .= ' |
| my $rule = $item[0]; |
| $rule =~ s/_/ /g; |
| #WAS: Parse::RecDescent::_error("Invalid $rule: " . $expectation->message() ,$thisline); |
| push @{$thisparser->{errors}}, ["Invalid $rule: " . $expectation->message() ,$thisline]; |
| '; |
| } |
| |
| my $dir = |
| new Parse::RecDescent::Directive('if (' . |
| ($self->{"commitonly"} ? '$commit' : '1') . |
| ") { do {$action} unless ".' $_noactions; undef } else {0}', |
| $self->{"lookahead"},0,$self->describe); |
| $dir->{hashname} = $self->{hashname}; |
| return $dir->code($namespace, $rule, 0); |
| } |
| |
| 1; |
| |
| package Parse::RecDescent::Token; |
| |
| sub sethashname { $_[0]->{hashname} = '__PATTERN' . ++$_[1]->{patcount} . '__'; } |
| |
| sub issubrule { undef } |
| sub isterminal { 1 } |
| sub describe ($) { shift->{'description'}} |
| |
| |
| # ARGS ARE: $self, $pattern, $left_delim, $modifiers, $lookahead, $linenum |
| sub new ($$$$$$) |
| { |
| my $class = ref($_[0]) || $_[0]; |
| my $pattern = $_[1]; |
| my $pat = $_[1]; |
| my $ldel = $_[2]; |
| my $rdel = $ldel; |
| $rdel =~ tr/{[(</}])>/; |
| |
| my $mod = $_[3]; |
| |
| my $desc; |
| |
| if ($ldel eq '/') { $desc = "$ldel$pattern$rdel$mod" } |
| else { $desc = "m$ldel$pattern$rdel$mod" } |
| $desc =~ s/\\/\\\\/g; |
| $desc =~ s/\$$/\\\$/g; |
| $desc =~ s/}/\\}/g; |
| $desc =~ s/{/\\{/g; |
| |
| if (!eval "no strict; |
| local \$SIG{__WARN__} = sub {0}; |
| '' =~ m$ldel$pattern$rdel$mod" and $@) |
| { |
| Parse::RecDescent::_warn(3, "Token pattern \"m$ldel$pattern$rdel$mod\" |
| may not be a valid regular expression", |
| $_[5]); |
| $@ =~ s/ at \(eval.*/./; |
| Parse::RecDescent::_hint($@); |
| } |
| |
| # QUIETLY PREVENT (WELL-INTENTIONED) CALAMITY |
| $mod =~ s/[gc]//g; |
| $pattern =~ s/(\A|[^\\])\\G/$1/g; |
| |
| bless |
| { |
| "pattern" => $pattern, |
| "ldelim" => $ldel, |
| "rdelim" => $rdel, |
| "mod" => $mod, |
| "lookahead" => $_[4], |
| "line" => $_[5], |
| "description" => $desc, |
| }, $class; |
| } |
| |
| |
| sub code($$$$$) |
| { |
| my ($self, $namespace, $rule, $check) = @_; |
| my $ldel = $self->{"ldelim"}; |
| my $rdel = $self->{"rdelim"}; |
| my $sdel = $ldel; |
| my $mod = $self->{"mod"}; |
| |
| $sdel =~ s/[[{(<]/{}/; |
| |
| my $code = ' |
| Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe |
| . ']}, Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{name} . '}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| undef $lastsep; |
| $expectation->is(q{' . ($rule->hasleftmost($self) ? '' |
| : $self->describe ) . '})->at($text); |
| ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . ' |
| |
| ' . ($self->{"lookahead"}<0?'if':'unless') |
| . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and ' |
| . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '') |
| . ' $text =~ m' . $ldel . '\A(?:' . $self->{"pattern"} . ')' . $rdel . $mod . ') |
| { |
| '.($self->{"lookahead"} ? '$text = $_savetext;' : '$text = $lastsep . $text if defined $lastsep;') . |
| ($check->{itempos} ? Parse::RecDescent::Production::unincitempos() : '') . ' |
| $expectation->failed(); |
| Parse::RecDescent::_trace(q{<<Didn\'t match terminal>>}, |
| Parse::RecDescent::_tracefirst($text)) |
| if defined $::RD_TRACE; |
| |
| last; |
| } |
| $current_match = substr($text, $-[0], $+[0] - $-[0]); |
| substr($text,0,length($current_match),q{}); |
| Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [} |
| . $current_match . q{])}, |
| Parse::RecDescent::_tracefirst($text)) |
| if defined $::RD_TRACE; |
| push @item, $item{'.$self->{hashname}.'}=$current_match; |
| ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| '; |
| |
| return $code; |
| } |
| |
| 1; |
| |
| package Parse::RecDescent::Literal; |
| |
| sub sethashname { $_[0]->{hashname} = '__STRING' . ++$_[1]->{strcount} . '__'; } |
| |
| sub issubrule { undef } |
| sub isterminal { 1 } |
| sub describe ($) { shift->{'description'} } |
| |
| sub new ($$$$) |
| { |
| my $class = ref($_[0]) || $_[0]; |
| |
| my $pattern = $_[1]; |
| |
| my $desc = $pattern; |
| $desc=~s/\\/\\\\/g; |
| $desc=~s/}/\\}/g; |
| $desc=~s/{/\\{/g; |
| |
| bless |
| { |
| "pattern" => $pattern, |
| "lookahead" => $_[2], |
| "line" => $_[3], |
| "description" => "'$desc'", |
| }, $class; |
| } |
| |
| |
| sub code($$$$) |
| { |
| my ($self, $namespace, $rule, $check) = @_; |
| |
| my $code = ' |
| Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe |
| . ']}, |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{name} . '}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| undef $lastsep; |
| $expectation->is(q{' . ($rule->hasleftmost($self) ? '' |
| : $self->describe ) . '})->at($text); |
| ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . ' |
| |
| ' . ($self->{"lookahead"}<0?'if':'unless') |
| . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and ' |
| . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '') |
| . ' $text =~ m/\A' . quotemeta($self->{"pattern"}) . '/) |
| { |
| '.($self->{"lookahead"} ? '$text = $_savetext;' : '$text = $lastsep . $text if defined $lastsep;').' |
| '. ($check->{itempos} ? Parse::RecDescent::Production::unincitempos() : '') . ' |
| $expectation->failed(); |
| Parse::RecDescent::_trace(qq{<<Didn\'t match terminal>>}, |
| Parse::RecDescent::_tracefirst($text)) |
| if defined $::RD_TRACE; |
| last; |
| } |
| $current_match = substr($text, $-[0], $+[0] - $-[0]); |
| substr($text,0,length($current_match),q{}); |
| Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [} |
| . $current_match . q{])}, |
| Parse::RecDescent::_tracefirst($text)) |
| if defined $::RD_TRACE; |
| push @item, $item{'.$self->{hashname}.'}=$current_match; |
| ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| '; |
| |
| return $code; |
| } |
| |
| 1; |
| |
| package Parse::RecDescent::InterpLit; |
| |
| sub sethashname { $_[0]->{hashname} = '__STRING' . ++$_[1]->{strcount} . '__'; } |
| |
| sub issubrule { undef } |
| sub isterminal { 1 } |
| sub describe ($) { shift->{'description'} } |
| |
| sub new ($$$$) |
| { |
| my $class = ref($_[0]) || $_[0]; |
| |
| my $pattern = $_[1]; |
| $pattern =~ s#/#\\/#g; |
| |
| my $desc = $pattern; |
| $desc=~s/\\/\\\\/g; |
| $desc=~s/}/\\}/g; |
| $desc=~s/{/\\{/g; |
| |
| bless |
| { |
| "pattern" => $pattern, |
| "lookahead" => $_[2], |
| "line" => $_[3], |
| "description" => "'$desc'", |
| }, $class; |
| } |
| |
| sub code($$$$) |
| { |
| my ($self, $namespace, $rule, $check) = @_; |
| |
| my $code = ' |
| Parse::RecDescent::_trace(q{Trying terminal: [' . $self->describe |
| . ']}, |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{name} . '}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| undef $lastsep; |
| $expectation->is(q{' . ($rule->hasleftmost($self) ? '' |
| : $self->describe ) . '})->at($text); |
| ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) . ' |
| |
| ' . ($self->{"lookahead"}<0?'if':'unless') |
| . ' ($text =~ s/\A($skip)/$lastsep=$1 and ""/e and ' |
| . ($check->{itempos}? 'do {'.Parse::RecDescent::Production::incitempos().' 1} and ' : '') |
| . ' do { $_tok = "' . $self->{"pattern"} . '"; 1 } and |
| substr($text,0,length($_tok)) eq $_tok and |
| do { substr($text,0,length($_tok)) = ""; 1; } |
| ) |
| { |
| '.($self->{"lookahead"} ? '$text = $_savetext;' : '$text = $lastsep . $text if defined $lastsep;').' |
| '. ($check->{itempos} ? Parse::RecDescent::Production::unincitempos() : '') . ' |
| $expectation->failed(); |
| Parse::RecDescent::_trace(q{<<Didn\'t match terminal>>}, |
| Parse::RecDescent::_tracefirst($text)) |
| if defined $::RD_TRACE; |
| last; |
| } |
| Parse::RecDescent::_trace(q{>>Matched terminal<< (return value: [} |
| . $_tok . q{])}, |
| Parse::RecDescent::_tracefirst($text)) |
| if defined $::RD_TRACE; |
| push @item, $item{'.$self->{hashname}.'}=$_tok; |
| ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| '; |
| |
| return $code; |
| } |
| |
| 1; |
| |
| package Parse::RecDescent::Subrule; |
| |
| sub issubrule ($) { return $_[0]->{"subrule"} } |
| sub isterminal { 0 } |
| sub sethashname {} |
| |
| sub describe ($) |
| { |
| my $desc = $_[0]->{"implicit"} || $_[0]->{"subrule"}; |
| $desc = "<matchrule:$desc>" if $_[0]->{"matchrule"}; |
| return $desc; |
| } |
| |
| sub callsyntax($$) |
| { |
| if ($_[0]->{"matchrule"}) |
| { |
| return "&{'$_[1]'.qq{$_[0]->{subrule}}}"; |
| } |
| else |
| { |
| return $_[1].$_[0]->{"subrule"}; |
| } |
| } |
| |
| sub new ($$$$;$$$) |
| { |
| my $class = ref($_[0]) || $_[0]; |
| bless |
| { |
| "subrule" => $_[1], |
| "lookahead" => $_[2], |
| "line" => $_[3], |
| "implicit" => $_[4] || undef, |
| "matchrule" => $_[5], |
| "argcode" => $_[6] || undef, |
| }, $class; |
| } |
| |
| |
| sub code($$$$) |
| { |
| my ($self, $namespace, $rule, $check) = @_; |
| |
| ' |
| Parse::RecDescent::_trace(q{Trying subrule: [' . $self->{"subrule"} . ']}, |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{"name"} . '}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| if (1) { no strict qw{refs}; |
| $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}' |
| # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text); |
| : 'q{'.$self->describe.'}' ) . ')->at($text); |
| ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) |
| . ($self->{"lookahead"}<0?'if':'unless') |
| . ' (defined ($_tok = ' |
| . $self->callsyntax($namespace.'::') |
| . '($thisparser,$text,$repeating,' |
| . ($self->{"lookahead"}?'1':'$_noactions') |
| . ($check->{"itempos"}?',$itempos[$#itempos]':',undef') |
| . ($self->{argcode} ? ",sub { return $self->{argcode} }" |
| : ',sub { \\@arg }') |
| . '))) |
| { |
| '.($self->{"lookahead"} ? '$text = $_savetext;' : '').' |
| Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' subrule: [' |
| . $self->{subrule} . ']>>}, |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{"name"} .'}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| $expectation->failed(); |
| last; |
| } |
| Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' subrule: [' |
| . $self->{subrule} . ']<< (return value: [} |
| . $_tok . q{]}, |
| |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{"name"} .'}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| $item{q{' . $self->{subrule} . '}} = $_tok; |
| push @item, $_tok; |
| ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| } |
| ' |
| } |
| |
| package Parse::RecDescent::Repetition; |
| |
| sub issubrule ($) { return $_[0]->{"subrule"} } |
| sub isterminal { 0 } |
| sub sethashname { } |
| |
| sub describe ($) |
| { |
| my $desc = $_[0]->{"expected"} || $_[0]->{"subrule"}; |
| $desc = "<matchrule:$desc>" if $_[0]->{"matchrule"}; |
| return $desc; |
| } |
| |
| sub callsyntax($$) |
| { |
| if ($_[0]->{matchrule}) |
| { return "sub { goto &{''.qq{$_[1]$_[0]->{subrule}}} }"; } |
| else |
| { return "\\&$_[1]$_[0]->{subrule}"; } |
| } |
| |
| sub new ($$$$$$$$$$) |
| { |
| my ($self, $subrule, $repspec, $min, $max, $lookahead, $line, $parser, $matchrule, $argcode) = @_; |
| my $class = ref($self) || $self; |
| ($max, $min) = ( $min, $max) if ($max<$min); |
| |
| my $desc; |
| if ($subrule=~/\A_alternation_\d+_of_production_\d+_of_rule/) |
| { $desc = $parser->{"rules"}{$subrule}->expected } |
| |
| if ($lookahead) |
| { |
| if ($min>0) |
| { |
| return new Parse::RecDescent::Subrule($subrule,$lookahead,$line,$desc,$matchrule,$argcode); |
| } |
| else |
| { |
| Parse::RecDescent::_error("Not symbol (\"!\") before |
| \"$subrule\" doesn't make |
| sense.",$line); |
| Parse::RecDescent::_hint("Lookahead for negated optional |
| repetitions (such as |
| \"!$subrule($repspec)\" can never |
| succeed, since optional items always |
| match (zero times at worst). |
| Did you mean a single \"!$subrule\", |
| instead?"); |
| } |
| } |
| bless |
| { |
| "subrule" => $subrule, |
| "repspec" => $repspec, |
| "min" => $min, |
| "max" => $max, |
| "lookahead" => $lookahead, |
| "line" => $line, |
| "expected" => $desc, |
| "argcode" => $argcode || undef, |
| "matchrule" => $matchrule, |
| }, $class; |
| } |
| |
| sub code($$$$) |
| { |
| my ($self, $namespace, $rule, $check) = @_; |
| |
| my ($subrule, $repspec, $min, $max, $lookahead) = |
| @{$self}{ qw{subrule repspec min max lookahead} }; |
| |
| ' |
| Parse::RecDescent::_trace(q{Trying repeated subrule: [' . $self->describe . ']}, |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{"name"} . '}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}' |
| # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text); |
| : 'q{'.$self->describe.'}' ) . ')->at($text); |
| ' . ($self->{"lookahead"} ? '$_savetext = $text;' : '' ) .' |
| unless (defined ($_tok = $thisparser->_parserepeat($text, ' |
| . $self->callsyntax($namespace.'::') |
| . ', ' . $min . ', ' . $max . ', ' |
| . ($self->{"lookahead"}?'1':'$_noactions') |
| . ($check->{"itempos"}?',$itempos[$#itempos]':',undef') |
| . ',$expectation,' |
| . ($self->{argcode} ? "sub { return $self->{argcode} }" |
| : 'sub { \\@arg }') |
| . '))) |
| { |
| Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' repeated subrule: [' |
| . $self->describe . ']>>}, |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{"name"} .'}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| last; |
| } |
| Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' repeated subrule: [' |
| . $self->{subrule} . ']<< (} |
| . @$_tok . q{ times)}, |
| |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{"name"} .'}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| $item{q{' . "$self->{subrule}($self->{repspec})" . '}} = $_tok; |
| push @item, $_tok; |
| ' . ($self->{"lookahead"} ? '$text = $_savetext;' : '' ) .' |
| |
| ' |
| } |
| |
| package Parse::RecDescent::Result; |
| |
| sub issubrule { 0 } |
| sub isterminal { 0 } |
| sub describe { '' } |
| |
| sub new |
| { |
| my ($class, $pos) = @_; |
| |
| bless {}, $class; |
| } |
| |
| sub code($$$$) |
| { |
| my ($self, $namespace, $rule) = @_; |
| |
| ' |
| $return = $item[-1]; |
| '; |
| } |
| |
| package Parse::RecDescent::Operator; |
| |
| my @opertype = ( " non-optional", "n optional" ); |
| |
| sub issubrule { 0 } |
| sub isterminal { 0 } |
| |
| sub describe { $_[0]->{"expected"} } |
| sub sethashname { $_[0]->{hashname} = '__DIRECTIVE' . ++$_[1]->{dircount} . '__'; } |
| |
| |
| sub new |
| { |
| my ($class, $type, $minrep, $maxrep, $leftarg, $op, $rightarg) = @_; |
| |
| bless |
| { |
| "type" => "${type}op", |
| "leftarg" => $leftarg, |
| "op" => $op, |
| "min" => $minrep, |
| "max" => $maxrep, |
| "rightarg" => $rightarg, |
| "expected" => "<${type}op: ".$leftarg->describe." ".$op->describe." ".$rightarg->describe.">", |
| }, $class; |
| } |
| |
| sub code($$$$) |
| { |
| my ($self, $namespace, $rule, $check) = @_; |
| |
| my @codeargs = @_[1..$#_]; |
| |
| my ($leftarg, $op, $rightarg) = |
| @{$self}{ qw{leftarg op rightarg} }; |
| |
| my $code = ' |
| Parse::RecDescent::_trace(q{Trying operator: [' . $self->describe . ']}, |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{"name"} . '}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| $expectation->is(' . ($rule->hasleftmost($self) ? 'q{}' |
| # WAS : 'qq{'.$self->describe.'}' ) . ')->at($text); |
| : 'q{'.$self->describe.'}' ) . ')->at($text); |
| |
| $_tok = undef; |
| OPLOOP: while (1) |
| { |
| $repcount = 0; |
| my @item; |
| my %item; |
| '; |
| |
| $code .= ' |
| my $_itempos = $itempos[-1]; |
| my $itemposfirst; |
| ' if $check->{itempos}; |
| |
| if ($self->{type} eq "leftop" ) |
| { |
| $code .= ' |
| # MATCH LEFTARG |
| ' . $leftarg->code(@codeargs) . ' |
| |
| '; |
| |
| $code .= ' |
| if (defined($_itempos) and !defined($itemposfirst)) |
| { |
| $itemposfirst = Parse::RecDescent::Production::_duplicate_itempos($_itempos); |
| } |
| ' if $check->{itempos}; |
| |
| $code .= ' |
| $repcount++; |
| |
| my $savetext = $text; |
| my $backtrack; |
| |
| # MATCH (OP RIGHTARG)(s) |
| while ($repcount < ' . $self->{max} . ') |
| { |
| $backtrack = 0; |
| ' . $op->code(@codeargs) . ' |
| ' . ($op->isterminal() ? 'pop @item;' : '$backtrack=1;' ) . ' |
| ' . (ref($op) eq 'Parse::RecDescent::Token' |
| ? 'if (defined $1) {push @item, $item{'.($self->{name}||$self->{hashname}).'}=$1; $backtrack=1;}' |
| : "" ) . ' |
| ' . $rightarg->code(@codeargs) . ' |
| $savetext = $text; |
| $repcount++; |
| } |
| $text = $savetext; |
| pop @item if $backtrack; |
| |
| '; |
| } |
| else |
| { |
| $code .= ' |
| my $savetext = $text; |
| my $backtrack; |
| # MATCH (LEFTARG OP)(s) |
| while ($repcount < ' . $self->{max} . ') |
| { |
| $backtrack = 0; |
| ' . $leftarg->code(@codeargs) . ' |
| '; |
| $code .= ' |
| if (defined($_itempos) and !defined($itemposfirst)) |
| { |
| $itemposfirst = Parse::RecDescent::Production::_duplicate_itempos($_itempos); |
| } |
| ' if $check->{itempos}; |
| |
| $code .= ' |
| $repcount++; |
| $backtrack = 1; |
| ' . $op->code(@codeargs) . ' |
| $savetext = $text; |
| ' . ($op->isterminal() ? 'pop @item;' : "" ) . ' |
| ' . (ref($op) eq 'Parse::RecDescent::Token' ? 'do { push @item, $item{'.($self->{name}||$self->{hashname}).'}=$1; } if defined $1;' : "" ) . ' |
| } |
| $text = $savetext; |
| pop @item if $backtrack; |
| |
| # MATCH RIGHTARG |
| ' . $rightarg->code(@codeargs) . ' |
| $repcount++; |
| '; |
| } |
| |
| $code .= 'unless (@item) { undef $_tok; last }' unless $self->{min}==0; |
| |
| $code .= ' |
| $_tok = [ @item ]; |
| '; |
| |
| |
| $code .= ' |
| if (defined $itemposfirst) |
| { |
| Parse::RecDescent::Production::_update_itempos( |
| $_itempos, $itemposfirst, undef, [qw(from)]); |
| } |
| ' if $check->{itempos}; |
| |
| $code .= ' |
| last; |
| } # end of OPLOOP |
| '; |
| |
| $code .= ' |
| unless ($repcount>='.$self->{min}.') |
| { |
| Parse::RecDescent::_trace(q{<<'.Parse::RecDescent::_matchtracemessage($self,1).' operator: [' |
| . $self->describe |
| . ']>>}, |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{"name"} .'}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| $expectation->failed(); |
| last; |
| } |
| Parse::RecDescent::_trace(q{>>'.Parse::RecDescent::_matchtracemessage($self).' operator: [' |
| . $self->describe |
| . ']<< (return value: [} |
| . qq{@{$_tok||[]}} . q{]}, |
| Parse::RecDescent::_tracefirst($text), |
| q{' . $rule->{"name"} .'}, |
| $tracelevel) |
| if defined $::RD_TRACE; |
| |
| push @item, $item{'.($self->{name}||$self->{hashname}).'}=$_tok||[]; |
| '; |
| |
| return $code; |
| } |
| |
| |
| package Parse::RecDescent::Expectation; |
| |
| sub new ($) |
| { |
| bless { |
| "failed" => 0, |
| "expected" => "", |
| "unexpected" => "", |
| "lastexpected" => "", |
| "lastunexpected" => "", |
| "defexpected" => $_[1], |
| }; |
| } |
| |
| sub is ($$) |
| { |
| $_[0]->{lastexpected} = $_[1]; return $_[0]; |
| } |
| |
| sub at ($$) |
| { |
| $_[0]->{lastunexpected} = $_[1]; return $_[0]; |
| } |
| |
| sub failed ($) |
| { |
| return unless $_[0]->{lastexpected}; |
| $_[0]->{expected} = $_[0]->{lastexpected} unless $_[0]->{failed}; |
| $_[0]->{unexpected} = $_[0]->{lastunexpected} unless $_[0]->{failed}; |
| $_[0]->{failed} = 1; |
| } |
| |
| sub message ($) |
| { |
| my ($self) = @_; |
| $self->{expected} = $self->{defexpected} unless $self->{expected}; |
| $self->{expected} =~ s/_/ /g; |
| if (!$self->{unexpected} || $self->{unexpected} =~ /\A\s*\Z/s) |
| { |
| return "Was expecting $self->{expected}"; |
| } |
| else |
| { |
| $self->{unexpected} =~ /\s*(.*)/; |
| return "Was expecting $self->{expected} but found \"$1\" instead"; |
| } |
| } |
| |
| 1; |
| |
| package Parse::RecDescent; |
| |
| use Carp; |
| use vars qw ( $AUTOLOAD $VERSION $_FILENAME); |
| |
| my $ERRORS = 0; |
| |
| our $VERSION = '1.967006'; |
| $VERSION = eval $VERSION; |
| $_FILENAME=__FILE__; |
| |
| # BUILDING A PARSER |
| |
| my $nextnamespace = "namespace000001"; |
| |
| sub _nextnamespace() |
| { |
| return "Parse::RecDescent::" . $nextnamespace++; |
| } |
| |
| # ARGS ARE: $class, $grammar, $compiling, $namespace |
| sub new ($$$$) |
| { |
| my $class = ref($_[0]) || $_[0]; |
| local $Parse::RecDescent::compiling = $_[2]; |
| my $name_space_name = defined $_[3] |
| ? "Parse::RecDescent::".$_[3] |
| : _nextnamespace(); |
| my $self = |
| { |
| "rules" => {}, |
| "namespace" => $name_space_name, |
| "startcode" => '', |
| "localvars" => '', |
| "_AUTOACTION" => undef, |
| "_AUTOTREE" => undef, |
| }; |
| |
| |
| if ($::RD_AUTOACTION) { |
| my $sourcecode = $::RD_AUTOACTION; |
| $sourcecode = "{ $sourcecode }" |
| unless $sourcecode =~ /\A\s*\{.*\}\s*\Z/; |
| $self->{_check}{itempos} = |
| $sourcecode =~ /\@itempos\b|\$itempos\s*\[/; |
| $self->{_AUTOACTION} |
| = new Parse::RecDescent::Action($sourcecode,0,-1) |
| } |
| |
| bless $self, $class; |
| return $self->Replace($_[1]) |
| } |
| |
| sub Compile($$$$) { |
| die "Compilation of Parse::RecDescent grammars not yet implemented\n"; |
| } |
| |
| sub DESTROY { |
| my ($self) = @_; |
| my $namespace = $self->{namespace}; |
| $namespace =~ s/Parse::RecDescent:://; |
| if (!$self->{_precompiled}) { |
| delete $Parse::RecDescent::{$namespace.'::'}; |
| } |
| } |
| |
| # BUILDING A GRAMMAR.... |
| |
| # ARGS ARE: $self, $grammar, $isimplicit, $isleftop |
| sub Replace ($$) |
| { |
| # set $replace = 1 for _generate |
| splice(@_, 2, 0, 1); |
| |
| return _generate(@_); |
| } |
| |
| # ARGS ARE: $self, $grammar, $isimplicit, $isleftop |
| sub Extend ($$) |
| { |
| # set $replace = 0 for _generate |
| splice(@_, 2, 0, 0); |
| |
| return _generate(@_); |
| } |
| |
| sub _no_rule ($$;$) |
| { |
| _error("Ruleless $_[0] at start of grammar.",$_[1]); |
| my $desc = $_[2] ? "\"$_[2]\"" : ""; |
| _hint("You need to define a rule for the $_[0] $desc |
| to be part of."); |
| } |
| |
| my $NEGLOOKAHEAD = '\G(\s*\.\.\.\!)'; |
| my $POSLOOKAHEAD = '\G(\s*\.\.\.)'; |
| my $RULE = '\G\s*(\w+)[ \t]*:'; |
| my $PROD = '\G\s*([|])'; |
| my $TOKEN = q{\G\s*/((\\\\/|\\\\\\\\|[^/])*)/([cgimsox]*)}; |
| my $MTOKEN = q{\G\s*(m\s*[^\w\s])}; |
| my $LITERAL = q{\G\s*'((\\\\['\\\\]|[^'])*)'}; |
| my $INTERPLIT = q{\G\s*"((\\\\["\\\\]|[^"])*)"}; |
| my $SUBRULE = '\G\s*(\w+)'; |
| my $MATCHRULE = '\G(\s*<matchrule:)'; |
| my $SIMPLEPAT = '((\\s+/[^/\\\\]*(?:\\\\.[^/\\\\]*)*/)?)'; |
| my $OPTIONAL = '\G\((\?)'.$SIMPLEPAT.'\)'; |
| my $ANY = '\G\((s\?)'.$SIMPLEPAT.'\)'; |
| my $MANY = '\G\((s|\.\.)'.$SIMPLEPAT.'\)'; |
| my $EXACTLY = '\G\(([1-9]\d*)'.$SIMPLEPAT.'\)'; |
| my $BETWEEN = '\G\((\d+)\.\.([1-9]\d*)'.$SIMPLEPAT.'\)'; |
| my $ATLEAST = '\G\((\d+)\.\.'.$SIMPLEPAT.'\)'; |
| my $ATMOST = '\G\(\.\.([1-9]\d*)'.$SIMPLEPAT.'\)'; |
| my $BADREP = '\G\((-?\d+)?\.\.(-?\d+)?'.$SIMPLEPAT.'\)'; |
| my $ACTION = '\G\s*\{'; |
| my $IMPLICITSUBRULE = '\G\s*\('; |
| my $COMMENT = '\G\s*(#.*)'; |
| my $COMMITMK = '\G\s*<commit>'; |
| my $UNCOMMITMK = '\G\s*<uncommit>'; |
| my $QUOTELIKEMK = '\G\s*<perl_quotelike>'; |
| my $CODEBLOCKMK = '\G\s*<perl_codeblock(?:\s+([][()<>{}]+))?>'; |
| my $VARIABLEMK = '\G\s*<perl_variable>'; |
| my $NOCHECKMK = '\G\s*<nocheck>'; |
| my $AUTOACTIONPATMK = '\G\s*<autoaction:'; |
| my $AUTOTREEMK = '\G\s*<autotree(?::\s*([\w:]+)\s*)?>'; |
| my $AUTOSTUBMK = '\G\s*<autostub>'; |
| my $AUTORULEMK = '\G\s*<autorule:(.*?)>'; |
| my $REJECTMK = '\G\s*<reject>'; |
| my $CONDREJECTMK = '\G\s*<reject:'; |
| my $SCOREMK = '\G\s*<score:'; |
| my $AUTOSCOREMK = '\G\s*<autoscore:'; |
| my $SKIPMK = '\G\s*<skip:'; |
| my $OPMK = '\G\s*<(left|right)op(?:=(\'.*?\'))?:'; |
| my $ENDDIRECTIVEMK = '\G\s*>'; |
| my $RESYNCMK = '\G\s*<resync>'; |
| my $RESYNCPATMK = '\G\s*<resync:'; |
| my $RULEVARPATMK = '\G\s*<rulevar:'; |
| my $DEFERPATMK = '\G\s*<defer:'; |
| my $TOKENPATMK = '\G\s*<token:'; |
| my $AUTOERRORMK = '\G\s*<error(\??)>'; |
| my $MSGERRORMK = '\G\s*<error(\??):'; |
| my $NOCHECK = '\G\s*<nocheck>'; |
| my $WARNMK = '\G\s*<warn((?::\s*(\d+)\s*)?)>'; |
| my $HINTMK = '\G\s*<hint>'; |
| my $TRACEBUILDMK = '\G\s*<trace_build((?::\s*(\d+)\s*)?)>'; |
| my $TRACEPARSEMK = '\G\s*<trace_parse((?::\s*(\d+)\s*)?)>'; |
| my $UNCOMMITPROD = $PROD.'\s*<uncommit'; |
| my $ERRORPROD = $PROD.'\s*<error'; |
| my $LONECOLON = '\G\s*:'; |
| my $OTHER = '\G\s*([^\s]+)'; |
| |
| my @lines = 0; |
| |
| sub _generate |
| { |
| my ($self, $grammar, $replace, $isimplicit, $isleftop) = (@_, 0); |
| |
| my $aftererror = 0; |
| my $lookahead = 0; |
| my $lookaheadspec = ""; |
| my $must_pop_lines; |
| if (! $lines[-1]) { |
| push @lines, _linecount($grammar) ; |
| $must_pop_lines = 1; |
| } |
| $self->{_check}{itempos} = ($grammar =~ /\@itempos\b|\$itempos\s*\[/) |
| unless $self->{_check}{itempos}; |
| for (qw(thisoffset thiscolumn prevline prevoffset prevcolumn)) |
| { |
| $self->{_check}{$_} = |
| ($grammar =~ /\$$_/) || $self->{_check}{itempos} |
| unless $self->{_check}{$_}; |
| } |
| my $line; |
| |
| my $rule = undef; |
| my $prod = undef; |
| my $item = undef; |
| my $lastgreedy = ''; |
| pos $grammar = 0; |
| study $grammar; |
| |
| local $::RD_HINT = $::RD_HINT; |
| local $::RD_WARN = $::RD_WARN; |
| local $::RD_TRACE = $::RD_TRACE; |
| local $::RD_CHECK = $::RD_CHECK; |
| |
| while (pos $grammar < length $grammar) |
| { |
| $line = $lines[-1] - _linecount($grammar) + 1; |
| my $commitonly; |
| my $code = ""; |
| my @components = (); |
| if ($grammar =~ m/$COMMENT/gco) |
| { |
| _parse("a comment",0,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| next; |
| } |
| elsif ($grammar =~ m/$NEGLOOKAHEAD/gco) |
| { |
| _parse("a negative lookahead",$aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $lookahead = $lookahead ? -$lookahead : -1; |
| $lookaheadspec .= $1; |
| next; # SKIP LOOKAHEAD RESET AT END OF while LOOP |
| } |
| elsif ($grammar =~ m/$POSLOOKAHEAD/gco) |
| { |
| _parse("a positive lookahead",$aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $lookahead = $lookahead ? $lookahead : 1; |
| $lookaheadspec .= $1; |
| next; # SKIP LOOKAHEAD RESET AT END OF while LOOP |
| } |
| elsif ($grammar =~ m/(?=$ACTION)/gco |
| and do { ($code) = extract_codeblock($grammar); $code }) |
| { |
| _parse("an action", $aftererror, $line, $code); |
| $item = new Parse::RecDescent::Action($code,$lookahead,$line); |
| $prod and $prod->additem($item) |
| or $self->_addstartcode($code); |
| } |
| elsif ($grammar =~ m/(?=$IMPLICITSUBRULE)/gco |
| and do { ($code) = extract_codeblock($grammar,'{([',undef,'(',1); |
| $code }) |
| { |
| $code =~ s/\A\s*\(|\)\Z//g; |
| _parse("an implicit subrule", $aftererror, $line, |
| "( $code )"); |
| my $implicit = $rule->nextimplicit; |
| return undef |
| if !$self->_generate("$implicit : $code",$replace,1); |
| my $pos = pos $grammar; |
| substr($grammar,$pos,0,$implicit); |
| pos $grammar = $pos;; |
| } |
| elsif ($grammar =~ m/$ENDDIRECTIVEMK/gco) |
| { |
| |
| # EXTRACT TRAILING REPETITION SPECIFIER (IF ANY) |
| |
| my ($minrep,$maxrep) = (1,$MAXREP); |
| if ($grammar =~ m/\G[(]/gc) |
| { |
| pos($grammar)--; |
| |
| if ($grammar =~ m/$OPTIONAL/gco) |
| { ($minrep, $maxrep) = (0,1) } |
| elsif ($grammar =~ m/$ANY/gco) |
| { $minrep = 0 } |
| elsif ($grammar =~ m/$EXACTLY/gco) |
| { ($minrep, $maxrep) = ($1,$1) } |
| elsif ($grammar =~ m/$BETWEEN/gco) |
| { ($minrep, $maxrep) = ($1,$2) } |
| elsif ($grammar =~ m/$ATLEAST/gco) |
| { $minrep = $1 } |
| elsif ($grammar =~ m/$ATMOST/gco) |
| { $maxrep = $1 } |
| elsif ($grammar =~ m/$MANY/gco) |
| { } |
| elsif ($grammar =~ m/$BADREP/gco) |
| { |
| _parse("an invalid repetition specifier", 0,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| _error("Incorrect specification of a repeated directive", |
| $line); |
| _hint("Repeated directives cannot have |
| a maximum repetition of zero, nor can they have |
| negative components in their ranges."); |
| } |
| } |
| |
| $prod && $prod->enddirective($line,$minrep,$maxrep); |
| } |
| elsif ($grammar =~ m/\G\s*<[^m]/gc) |
| { |
| pos($grammar)-=2; |
| |
| if ($grammar =~ m/$OPMK/gco) |
| { |
| # $DB::single=1; |
| _parse("a $1-associative operator directive", $aftererror, $line, "<$1op:...>"); |
| $prod->adddirective($1, $line,$2||''); |
| } |
| elsif ($grammar =~ m/$UNCOMMITMK/gco) |
| { |
| _parse("an uncommit marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $item = new Parse::RecDescent::Directive('$commit=0;1', |
| $lookahead,$line,"<uncommit>"); |
| $prod and $prod->additem($item) |
| or _no_rule("<uncommit>",$line); |
| } |
| elsif ($grammar =~ m/$QUOTELIKEMK/gco) |
| { |
| _parse("an perl quotelike marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $item = new Parse::RecDescent::Directive( |
| 'my ($match,@res); |
| ($match,$text,undef,@res) = |
| Text::Balanced::extract_quotelike($text,$skip); |
| $match ? \@res : undef; |
| ', $lookahead,$line,"<perl_quotelike>"); |
| $prod and $prod->additem($item) |
| or _no_rule("<perl_quotelike>",$line); |
| } |
| elsif ($grammar =~ m/$CODEBLOCKMK/gco) |
| { |
| my $outer = $1||"{}"; |
| _parse("an perl codeblock marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $item = new Parse::RecDescent::Directive( |
| 'Text::Balanced::extract_codeblock($text,undef,$skip,\''.$outer.'\'); |
| ', $lookahead,$line,"<perl_codeblock>"); |
| $prod and $prod->additem($item) |
| or _no_rule("<perl_codeblock>",$line); |
| } |
| elsif ($grammar =~ m/$VARIABLEMK/gco) |
| { |
| _parse("an perl variable marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $item = new Parse::RecDescent::Directive( |
| 'Text::Balanced::extract_variable($text,$skip); |
| ', $lookahead,$line,"<perl_variable>"); |
| $prod and $prod->additem($item) |
| or _no_rule("<perl_variable>",$line); |
| } |
| elsif ($grammar =~ m/$NOCHECKMK/gco) |
| { |
| _parse("a disable checking marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| if ($rule) |
| { |
| _error("<nocheck> directive not at start of grammar", $line); |
| _hint("The <nocheck> directive can only |
| be specified at the start of a |
| grammar (before the first rule |
| is defined."); |
| } |
| else |
| { |
| local $::RD_CHECK = 1; |
| } |
| } |
| elsif ($grammar =~ m/$AUTOSTUBMK/gco) |
| { |
| _parse("an autostub marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $::RD_AUTOSTUB = ""; |
| } |
| elsif ($grammar =~ m/$AUTORULEMK/gco) |
| { |
| _parse("an autorule marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $::RD_AUTOSTUB = $1; |
| } |
| elsif ($grammar =~ m/$AUTOTREEMK/gco) |
| { |
| my $base = defined($1) ? $1 : ""; |
| my $current_match = substr($grammar, $-[0], $+[0] - $-[0]); |
| $base .= "::" if $base && $base !~ /::$/; |
| _parse("an autotree marker", $aftererror,$line, $current_match); |
| if ($rule) |
| { |
| _error("<autotree> directive not at start of grammar", $line); |
| _hint("The <autotree> directive can only |
| be specified at the start of a |
| grammar (before the first rule |
| is defined."); |
| } |
| else |
| { |
| undef $self->{_AUTOACTION}; |
| $self->{_AUTOTREE}{NODE} |
| = new Parse::RecDescent::Action(q({bless \%item, ').$base.q('.$item[0]}),0,-1); |
| $self->{_AUTOTREE}{TERMINAL} |
| = new Parse::RecDescent::Action(q({bless {__VALUE__=>$item[1]}, ').$base.q('.$item[0]}),0,-1); |
| } |
| } |
| |
| elsif ($grammar =~ m/$REJECTMK/gco) |
| { |
| _parse("an reject marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $item = new Parse::RecDescent::UncondReject($lookahead,$line,"<reject>"); |
| $prod and $prod->additem($item) |
| or _no_rule("<reject>",$line); |
| } |
| elsif ($grammar =~ m/(?=$CONDREJECTMK)/gco |
| and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| $code }) |
| { |
| _parse("a (conditional) reject marker", $aftererror,$line, $code ); |
| $code =~ /\A\s*<reject:(.*)>\Z/s; |
| my $cond = $1; |
| $item = new Parse::RecDescent::Directive( |
| "($1) ? undef : 1", $lookahead,$line,"<reject:$cond>"); |
| $prod and $prod->additem($item) |
| or _no_rule("<reject:$cond>",$line); |
| } |
| elsif ($grammar =~ m/(?=$SCOREMK)/gco |
| and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| $code }) |
| { |
| _parse("a score marker", $aftererror,$line, $code ); |
| $code =~ /\A\s*<score:(.*)>\Z/s; |
| $prod and $prod->addscore($1, $lookahead, $line) |
| or _no_rule($code,$line); |
| } |
| elsif ($grammar =~ m/(?=$AUTOSCOREMK)/gco |
| and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| $code; |
| } ) |
| { |
| _parse("an autoscore specifier", $aftererror,$line,$code); |
| $code =~ /\A\s*<autoscore:(.*)>\Z/s; |
| |
| $rule and $rule->addautoscore($1,$self) |
| or _no_rule($code,$line); |
| |
| $item = new Parse::RecDescent::UncondReject($lookahead,$line,$code); |
| $prod and $prod->additem($item) |
| or _no_rule($code,$line); |
| } |
| elsif ($grammar =~ m/$RESYNCMK/gco) |
| { |
| _parse("a resync to newline marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $item = new Parse::RecDescent::Directive( |
| 'if ($text =~ s/(\A[^\n]*\n)//) { $return = 0; $1; } else { undef }', |
| $lookahead,$line,"<resync>"); |
| $prod and $prod->additem($item) |
| or _no_rule("<resync>",$line); |
| } |
| elsif ($grammar =~ m/(?=$RESYNCPATMK)/gco |
| and do { ($code) = extract_bracketed($grammar,'<'); |
| $code }) |
| { |
| _parse("a resync with pattern marker", $aftererror,$line, $code ); |
| $code =~ /\A\s*<resync:(.*)>\Z/s; |
| $item = new Parse::RecDescent::Directive( |
| 'if ($text =~ s/(\A'.$1.')//) { $return = 0; $1; } else { undef }', |
| $lookahead,$line,$code); |
| $prod and $prod->additem($item) |
| or _no_rule($code,$line); |
| } |
| elsif ($grammar =~ m/(?=$SKIPMK)/gco |
| and do { ($code) = extract_codeblock($grammar,'<'); |
| $code }) |
| { |
| _parse("a skip marker", $aftererror,$line, $code ); |
| $code =~ /\A\s*<skip:(.*)>\Z/s; |
| if ($rule) { |
| $item = new Parse::RecDescent::Directive( |
| 'my $oldskip = $skip; $skip='.$1.'; $oldskip', |
| $lookahead,$line,$code); |
| $prod and $prod->additem($item) |
| or _no_rule($code,$line); |
| } else { |
| #global <skip> directive |
| $self->{skip} = $1; |
| } |
| } |
| elsif ($grammar =~ m/(?=$RULEVARPATMK)/gco |
| and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| $code; |
| } ) |
| { |
| _parse("a rule variable specifier", $aftererror,$line,$code); |
| $code =~ /\A\s*<rulevar:(.*)>\Z/s; |
| |
| $rule and $rule->addvar($1,$self) |
| or _no_rule($code,$line); |
| |
| $item = new Parse::RecDescent::UncondReject($lookahead,$line,$code); |
| $prod and $prod->additem($item) |
| or _no_rule($code,$line); |
| } |
| elsif ($grammar =~ m/(?=$AUTOACTIONPATMK)/gco |
| and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| $code; |
| } ) |
| { |
| _parse("an autoaction specifier", $aftererror,$line,$code); |
| $code =~ s/\A\s*<autoaction:(.*)>\Z/$1/s; |
| if ($code =~ /\A\s*[^{]|[^}]\s*\Z/) { |
| $code = "{ $code }" |
| } |
| $self->{_check}{itempos} = |
| $code =~ /\@itempos\b|\$itempos\s*\[/; |
| $self->{_AUTOACTION} |
| = new Parse::RecDescent::Action($code,0,-$line) |
| } |
| elsif ($grammar =~ m/(?=$DEFERPATMK)/gco |
| and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| $code; |
| } ) |
| { |
| _parse("a deferred action specifier", $aftererror,$line,$code); |
| $code =~ s/\A\s*<defer:(.*)>\Z/$1/s; |
| if ($code =~ /\A\s*[^{]|[^}]\s*\Z/) |
| { |
| $code = "{ $code }" |
| } |
| |
| $item = new Parse::RecDescent::Directive( |
| "push \@{\$thisparser->{deferred}}, sub $code;", |
| $lookahead,$line,"<defer:$code>"); |
| $prod and $prod->additem($item) |
| or _no_rule("<defer:$code>",$line); |
| |
| $self->{deferrable} = 1; |
| } |
| elsif ($grammar =~ m/(?=$TOKENPATMK)/gco |
| and do { ($code) = extract_codeblock($grammar,'{',undef,'<'); |
| $code; |
| } ) |
| { |
| _parse("a token constructor", $aftererror,$line,$code); |
| $code =~ s/\A\s*<token:(.*)>\Z/$1/s; |
| |
| my $types = eval 'no strict; local $SIG{__WARN__} = sub {0}; my @arr=('.$code.'); @arr' || (); |
| if (!$types) |
| { |
| _error("Incorrect token specification: \"$@\"", $line); |
| _hint("The <token:...> directive requires a list |
| of one or more strings representing possible |
| types of the specified token. For example: |
| <token:NOUN,VERB>"); |
| } |
| else |
| { |
| $item = new Parse::RecDescent::Directive( |
| 'no strict; |
| $return = { text => $item[-1] }; |
| @{$return->{type}}{'.$code.'} = (1..'.$types.');', |
| $lookahead,$line,"<token:$code>"); |
| $prod and $prod->additem($item) |
| or _no_rule("<token:$code>",$line); |
| } |
| } |
| elsif ($grammar =~ m/$COMMITMK/gco) |
| { |
| _parse("an commit marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $item = new Parse::RecDescent::Directive('$commit = 1', |
| $lookahead,$line,"<commit>"); |
| $prod and $prod->additem($item) |
| or _no_rule("<commit>",$line); |
| } |
| elsif ($grammar =~ m/$NOCHECKMK/gco) { |
| _parse("an hint request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $::RD_CHECK = 0; |
| } |
| elsif ($grammar =~ m/$HINTMK/gco) { |
| _parse("an hint request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $::RD_HINT = $self->{__HINT__} = 1; |
| } |
| elsif ($grammar =~ m/$WARNMK/gco) { |
| _parse("an warning request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $::RD_WARN = $self->{__WARN__} = $1 ? $2+0 : 1; |
| } |
| elsif ($grammar =~ m/$TRACEBUILDMK/gco) { |
| _parse("an grammar build trace request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $::RD_TRACE = $1 ? $2+0 : 1; |
| } |
| elsif ($grammar =~ m/$TRACEPARSEMK/gco) { |
| _parse("an parse trace request", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $self->{__TRACE__} = $1 ? $2+0 : 1; |
| } |
| elsif ($grammar =~ m/$AUTOERRORMK/gco) |
| { |
| $commitonly = $1; |
| _parse("an error marker", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $item = new Parse::RecDescent::Error('',$lookahead,$1,$line); |
| $prod and $prod->additem($item) |
| or _no_rule("<error>",$line); |
| $aftererror = !$commitonly; |
| } |
| elsif ($grammar =~ m/(?=$MSGERRORMK)/gco |
| and do { $commitonly = $1; |
| ($code) = extract_bracketed($grammar,'<'); |
| $code }) |
| { |
| _parse("an error marker", $aftererror,$line,$code); |
| $code =~ /\A\s*<error\??:(.*)>\Z/s; |
| $item = new Parse::RecDescent::Error($1,$lookahead,$commitonly,$line); |
| $prod and $prod->additem($item) |
| or _no_rule("$code",$line); |
| $aftererror = !$commitonly; |
| } |
| elsif (do { $commitonly = $1; |
| ($code) = extract_bracketed($grammar,'<'); |
| $code }) |
| { |
| if ($code =~ /^<[A-Z_]+>$/) |
| { |
| _error("Token items are not yet |
| supported: \"$code\"", |
| $line); |
| _hint("Items like $code that consist of angle |
| brackets enclosing a sequence of |
| uppercase characters will eventually |
| be used to specify pre-lexed tokens |
| in a grammar. That functionality is not |
| yet implemented. Or did you misspell |
| \"$code\"?"); |
| } |
| else |
| { |
| _error("Untranslatable item encountered: \"$code\"", |
| $line); |
| _hint("Did you misspell \"$code\" |
| or forget to comment it out?"); |
| } |
| } |
| } |
| elsif ($grammar =~ m/$RULE/gco) |
| { |
| _parseunneg("a rule declaration", 0, |
| $lookahead,$line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next; |
| my $rulename = $1; |
| if ($rulename =~ /Replace|Extend|Precompile|Save/ ) |
| { |
| _warn(2,"Rule \"$rulename\" hidden by method |
| Parse::RecDescent::$rulename",$line) |
| and |
| _hint("The rule named \"$rulename\" cannot be directly |
| called through the Parse::RecDescent object |
| for this grammar (although it may still |
| be used as a subrule of other rules). |
| It can't be directly called because |
| Parse::RecDescent::$rulename is already defined (it |
| is the standard method of all |
| parsers)."); |
| } |
| $rule = new Parse::RecDescent::Rule($rulename,$self,$line,$replace); |
| $prod->check_pending($line) if $prod; |
| $prod = $rule->addprod( new Parse::RecDescent::Production ); |
| $aftererror = 0; |
| } |
| elsif ($grammar =~ m/$UNCOMMITPROD/gco) |
| { |
| pos($grammar)-=9; |
| _parseunneg("a new (uncommitted) production", |
| 0, $lookahead, $line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next; |
| |
| $prod->check_pending($line) if $prod; |
| $prod = new Parse::RecDescent::Production($line,1); |
| $rule and $rule->addprod($prod) |
| or _no_rule("<uncommit>",$line); |
| $aftererror = 0; |
| } |
| elsif ($grammar =~ m/$ERRORPROD/gco) |
| { |
| pos($grammar)-=6; |
| _parseunneg("a new (error) production", $aftererror, |
| $lookahead,$line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next; |
| $prod->check_pending($line) if $prod; |
| $prod = new Parse::RecDescent::Production($line,0,1); |
| $rule and $rule->addprod($prod) |
| or _no_rule("<error>",$line); |
| $aftererror = 0; |
| } |
| elsif ($grammar =~ m/$PROD/gco) |
| { |
| _parseunneg("a new production", 0, |
| $lookahead,$line, substr($grammar, $-[0], $+[0] - $-[0]) ) or next; |
| $rule |
| and (!$prod || $prod->check_pending($line)) |
| and $prod = $rule->addprod(new Parse::RecDescent::Production($line)) |
| or _no_rule("production",$line); |
| $aftererror = 0; |
| } |
| elsif ($grammar =~ m/$LITERAL/gco) |
| { |
| my $literal = $1; |
| ($code = $literal) =~ s/\\\\/\\/g; |
| _parse("a literal terminal", $aftererror,$line,$literal); |
| $item = new Parse::RecDescent::Literal($code,$lookahead,$line); |
| $prod and $prod->additem($item) |
| or _no_rule("literal terminal",$line,"'$literal'"); |
| } |
| elsif ($grammar =~ m/$INTERPLIT/gco) |
| { |
| _parse("an interpolated literal terminal", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $item = new Parse::RecDescent::InterpLit($1,$lookahead,$line); |
| $prod and $prod->additem($item) |
| or _no_rule("interpolated literal terminal",$line,"'$1'"); |
| } |
| elsif ($grammar =~ m/$TOKEN/gco) |
| { |
| _parse("a /../ pattern terminal", $aftererror,$line, substr($grammar, $-[0], $+[0] - $-[0]) ); |
| $item = new Parse::RecDescent::Token($1,'/',$3?$3:'',$lookahead,$line); |
| $prod and $prod->additem($item) |
| or _no_rule("pattern terminal",$line,"/$1/"); |
| } |
| elsif ($grammar =~ m/(?=$MTOKEN)/gco |
| and do { ($code, undef, @components) |
| = extract_quotelike($grammar); |
| $code } |
| ) |
| |
| { |
| _parse("an m/../ pattern terminal", $aftererror,$line,$code); |
| $item = new Parse::RecDescent::Token(@components[3,2,8], |
| $lookahead,$line); |
| $prod and $prod->additem($item) |
| or _no_rule("pattern terminal",$line,$code); |
| } |
| elsif ($grammar =~ m/(?=$MATCHRULE)/gco |
| and do { ($code) = extract_bracketed($grammar,'<'); |
| $code |
| } |
| or $grammar =~ m/$SUBRULE/gco |
| and $code = $1) |
| { |
| my $name = $code; |
| my $matchrule = 0; |
| if (substr($name,0,1) eq '<') |
| { |
| $name =~ s/$MATCHRULE\s*//; |
| $name =~ s/\s*>\Z//; |
| $matchrule = 1; |
| } |
| |
| # EXTRACT TRAILING ARG LIST (IF ANY) |
| |
| my ($argcode) = extract_codeblock($grammar, "[]",'') || ''; |
| |
| # EXTRACT TRAILING REPETITION SPECIFIER (IF ANY) |
| |
| if ($grammar =~ m/\G[(]/gc) |
| { |
| pos($grammar)--; |
| |
| if ($grammar =~ m/$OPTIONAL/gco) |
| { |
| _parse("an zero-or-one subrule match", $aftererror,$line,"$code$argcode($1)"); |
| $item = new Parse::RecDescent::Repetition($name,$1,0,1, |
| $lookahead,$line, |
| $self, |
| $matchrule, |
| $argcode); |
| $prod and $prod->additem($item) |
| or _no_rule("repetition",$line,"$code$argcode($1)"); |
| |
| !$matchrule and $rule and $rule->addcall($name); |
| } |
| elsif ($grammar =~ m/$ANY/gco) |
| { |
| _parse("a zero-or-more subrule match", $aftererror,$line,"$code$argcode($1)"); |
| if ($2) |
| { |
| my $pos = pos $grammar; |
| substr($grammar,$pos,0, |
| "<leftop='$name(s?)': $name $2 $name>(s?) "); |
| |
| pos $grammar = $pos; |
| } |
| else |
| { |
| $item = new Parse::RecDescent::Repetition($name,$1,0,$MAXREP, |
| $lookahead,$line, |
| $self, |
| $matchrule, |
| $argcode); |
| $prod and $prod->additem($item) |
| or _no_rule("repetition",$line,"$code$argcode($1)"); |
| |
| !$matchrule and $rule and $rule->addcall($name); |
| |
| _check_insatiable($name,$1,$grammar,$line) if $::RD_CHECK; |
| } |
| } |
| elsif ($grammar =~ m/$MANY/gco) |
| { |
| _parse("a one-or-more subrule match", $aftererror,$line,"$code$argcode($1)"); |
| if ($2) |
| { |
| # $DB::single=1; |
| my $pos = pos $grammar; |
| substr($grammar,$pos,0, |
| "<leftop='$name(s)': $name $2 $name> "); |
| |
| pos $grammar = $pos; |
| } |
| else |
| { |
| $item = new Parse::RecDescent::Repetition($name,$1,1,$MAXREP, |
| $lookahead,$line, |
| $self, |
| $matchrule, |
| $argcode); |
| |
| $prod and $prod->additem($item) |
| or _no_rule("repetition",$line,"$code$argcode($1)"); |
| |
| !$matchrule and $rule and $rule->addcall($name); |
| |
| _check_insatiable($name,$1,$grammar,$line) if $::RD_CHECK; |
| } |
| } |
| elsif ($grammar =~ m/$EXACTLY/gco) |
| { |
| _parse("an exactly-$1-times subrule match", $aftererror,$line,"$code$argcode($1)"); |
| if ($2) |
| { |
| my $pos = pos $grammar; |
| substr($grammar,$pos,0, |
| "<leftop='$name($1)': $name $2 $name>($1) "); |
| |
| pos $grammar = $pos; |
| } |
| else |
| { |
| $item = new Parse::RecDescent::Repetition($name,$1,$1,$1, |
| $lookahead,$line, |
| $self, |
| $matchrule, |
| $argcode); |
| $prod and $prod->additem($item) |
| or _no_rule("repetition",$line,"$code$argcode($1)"); |
| |
| !$matchrule and $rule and $rule->addcall($name); |
| } |
| } |
| elsif ($grammar =~ m/$BETWEEN/gco) |
| { |
| _parse("a $1-to-$2 subrule match", $aftererror,$line,"$code$argcode($1..$2)"); |
| if ($3) |
| { |
| my $pos = pos $grammar; |
| substr($grammar,$pos,0, |
| "<leftop='$name($1..$2)': $name $3 $name>($1..$2) "); |
| |
| pos $grammar = $pos; |
| } |
| else |
| { |
| $item = new Parse::RecDescent::Repetition($name,"$1..$2",$1,$2, |
| $lookahead,$line, |
| $self, |
| $matchrule, |
| $argcode); |
| $prod and $prod->additem($item) |
| or _no_rule("repetition",$line,"$code$argcode($1..$2)"); |
| |
| !$matchrule and $rule and $rule->addcall($name); |
| } |
| } |
| elsif ($grammar =~ m/$ATLEAST/gco) |
| { |
| _parse("a $1-or-more subrule match", $aftererror,$line,"$code$argcode($1..)"); |
| if ($2) |
| { |
| my $pos = pos $grammar; |
| substr($grammar,$pos,0, |
| "<leftop='$name($1..)': $name $2 $name>($1..) "); |
| |
| pos $grammar = $pos; |
| } |
| else |
| { |
| $item = new Parse::RecDescent::Repetition($name,"$1..",$1,$MAXREP, |
| $lookahead,$line, |
| $self, |
| $matchrule, |
| $argcode); |
| $prod and $prod->additem($item) |
| or _no_rule("repetition",$line,"$code$argcode($1..)"); |
| |
| !$matchrule and $rule and $rule->addcall($name); |
| _check_insatiable($name,"$1..",$grammar,$line) if $::RD_CHECK; |
| } |
| } |
| elsif ($grammar =~ m/$ATMOST/gco) |
| { |
| _parse("a one-to-$1 subrule match", $aftererror,$line,"$code$argcode(..$1)"); |
| if ($2) |
| { |
| my $pos = pos $grammar; |
| substr($grammar,$pos,0, |
| "<leftop='$name(..$1)': $name $2 $name>(..$1) "); |
| |
| pos $grammar = $pos; |
| } |
| else |
| { |
| $item = new Parse::RecDescent::Repetition($name,"..$1",1,$1, |
| $lookahead,$line, |
| $self, |
| $matchrule, |
| $argcode); |
| $prod and $prod->additem($item) |
| or _no_rule("repetition",$line,"$code$argcode(..$1)"); |
| |
| !$matchrule and $rule and $rule->addcall($name); |
| } |
| } |
| elsif ($grammar =~ m/$BADREP/gco) |
| { |
| my $current_match = substr($grammar, $-[0], $+[0] - $-[0]); |
| _parse("an subrule match with invalid repetition specifier", 0,$line, $current_match); |
| _error("Incorrect specification of a repeated subrule", |
| $line); |
| _hint("Repeated subrules like \"$code$argcode$current_match\" cannot have |
| a maximum repetition of zero, nor can they have |
| negative components in their ranges."); |
| } |
| } |
| else |
| { |
| _parse("a subrule match", $aftererror,$line,$code); |
| my $desc; |
| if ($name=~/\A_alternation_\d+_of_production_\d+_of_rule/) |
| { $desc = $self->{"rules"}{$name}->expected } |
| $item = new Parse::RecDescent::Subrule($name, |
| $lookahead, |
| $line, |
| $desc, |
| $matchrule, |
| $argcode); |
| |
| $prod and $prod->additem($item) |
| or _no_rule("(sub)rule",$line,$name); |
| |
| !$matchrule and $rule and $rule->addcall($name); |
| } |
| } |
| elsif ($grammar =~ m/$LONECOLON/gco ) |
| { |
| _error("Unexpected colon encountered", $line); |
| _hint("Did you mean \"|\" (to start a new production)? |
| Or perhaps you forgot that the colon |
| in a rule definition must be |
| on the same line as the rule name?"); |
| } |
| elsif ($grammar =~ m/$ACTION/gco ) # BAD ACTION, ALREADY FAILED |
| { |
| _error("Malformed action encountered", |
| $line); |
| _hint("Did you forget the closing curly bracket |
| or is there a syntax error in the action?"); |
| } |
| elsif ($grammar =~ m/$OTHER/gco ) |
| { |
| _error("Untranslatable item encountered: \"$1\"", |
| $line); |
| _hint("Did you misspell \"$1\" |
| or forget to comment it out?"); |
| } |
| |
| if ($lookaheadspec =~ tr /././ > 3) |
| { |
| $lookaheadspec =~ s/\A\s+//; |
| $lookahead = $lookahead<0 |
| ? 'a negative lookahead ("...!")' |
| : 'a positive lookahead ("...")' ; |
| _warn(1,"Found two or more lookahead specifiers in a |
| row.",$line) |
| and |
| _hint("Multiple positive and/or negative lookaheads |
| are simply multiplied together to produce a |
| single positive or negative lookahead |
| specification. In this case the sequence |
| \"$lookaheadspec\" was reduced to $lookahead. |
| Was this your intention?"); |
| } |
| $lookahead = 0; |
| $lookaheadspec = ""; |
| |
| $grammar =~ m/\G\s+/gc; |
| } |
| |
| if ($must_pop_lines) { |
| pop @lines; |
| } |
| |
| unless ($ERRORS or $isimplicit or !$::RD_CHECK) |
| { |
| $self->_check_grammar(); |
| } |
| |
| unless ($ERRORS or $isimplicit or $Parse::RecDescent::compiling) |
| { |
| my $code = $self->_code(); |
| if (defined $::RD_TRACE) |
| { |
| my $mode = ($nextnamespace eq "namespace000002") ? '>' : '>>'; |
| print STDERR "printing code (", length($code),") to RD_TRACE\n"; |
| local *TRACE_FILE; |
| open TRACE_FILE, $mode, "RD_TRACE" |
| and print TRACE_FILE "my \$ERRORS;\n$code" |
| and close TRACE_FILE; |
| } |
| |
| unless ( eval "$code 1" ) |
| { |
| _error("Internal error in generated parser code!"); |
| $@ =~ s/at grammar/in grammar at/; |
| _hint($@); |
| } |
| } |
| |
| if ($ERRORS and !_verbosity("HINT")) |
| { |
| local $::RD_HINT = defined $::RD_HINT ? $::RD_HINT : 1; |
| _hint('Set $::RD_HINT (or -RD_HINT if you\'re using "perl -s") |
| for hints on fixing these problems. Use $::RD_HINT = 0 |
| to disable this message.'); |
| } |
| if ($ERRORS) { $ERRORS=0; return } |
| return $self; |
| } |
| |
| |
| sub _addstartcode($$) |
| { |
| my ($self, $code) = @_; |
| $code =~ s/\A\s*\{(.*)\}\Z/$1/s; |
| |
| $self->{"startcode"} .= "$code;\n"; |
| } |
| |
| # CHECK FOR GRAMMAR PROBLEMS.... |
| |
| sub _check_insatiable($$$$) |
| { |
| my ($subrule,$repspec,$grammar,$line) = @_; |
| pos($grammar)=pos($_[2]); |
| return if $grammar =~ m/$OPTIONAL/gco || $grammar =~ m/$ANY/gco; |
| my $min = 1; |
| if ( $grammar =~ m/$MANY/gco |
| || $grammar =~ m/$EXACTLY/gco |
| || $grammar =~ m/$ATMOST/gco |
| || $grammar =~ m/$BETWEEN/gco && do { $min=$2; 1 } |
| || $grammar =~ m/$ATLEAST/gco && do { $min=$2; 1 } |
| || $grammar =~ m/$SUBRULE(?!\s*:)/gco |
| ) |
| { |
| return unless $1 eq $subrule && $min > 0; |
| my $current_match = substr($grammar, $-[0], $+[0] - $-[0]); |
| _warn(3,"Subrule sequence \"$subrule($repspec) $current_match\" will |
| (almost certainly) fail.",$line) |
| and |
| _hint("Unless subrule \"$subrule\" performs some cunning |
| lookahead, the repetition \"$subrule($repspec)\" will |
| insatiably consume as many matches of \"$subrule\" as it |
| can, leaving none to match the \"$current_match\" that follows."); |
| } |
| } |
| |
| sub _check_grammar ($) |
| { |
| my $self = shift; |
| my $rules = $self->{"rules"}; |
| my $rule; |
| foreach $rule ( values %$rules ) |
| { |
| next if ! $rule->{"changed"}; |
| |
| # CHECK FOR UNDEFINED RULES |
| |
| my $call; |
| foreach $call ( @{$rule->{"calls"}} ) |
| { |
| if (!defined ${$rules}{$call} |
| &&!defined &{"Parse::RecDescent::$call"}) |
| { |
| if (!defined $::RD_AUTOSTUB) |
| { |
| _warn(3,"Undefined (sub)rule \"$call\" |
| used in a production.") |
| and |
| _hint("Will you be providing this rule |
| later, or did you perhaps |
| misspell \"$call\"? Otherwise |
| it will be treated as an |
| immediate <reject>."); |
| eval "sub $self->{namespace}::$call {undef}"; |
| } |
| else # EXPERIMENTAL |
| { |
| my $rule = qq{'$call'}; |
| if ($::RD_AUTOSTUB and $::RD_AUTOSTUB ne "1") { |
| $rule = $::RD_AUTOSTUB; |
| } |
| _warn(1,"Autogenerating rule: $call") |
| and |
| _hint("A call was made to a subrule |
| named \"$call\", but no such |
| rule was specified. However, |
| since \$::RD_AUTOSTUB |
| was defined, a rule stub |
| ($call : $rule) was |
| automatically created."); |
| |
| $self->_generate("$call: $rule",0,1); |
| } |
| } |
| } |
| |
| # CHECK FOR LEFT RECURSION |
| |
| if ($rule->isleftrec($rules)) |
| { |
| _error("Rule \"$rule->{name}\" is left-recursive."); |
| _hint("Redesign the grammar so it's not left-recursive. |
| That will probably mean you need to re-implement |
| repetitions using the '(s)' notation. |
| For example: \"$rule->{name}(s)\"."); |
| next; |
| } |
| |
| # CHECK FOR PRODUCTIONS FOLLOWING EMPTY PRODUCTIONS |
| { |
| my $hasempty; |
| my $prod; |
| foreach $prod ( @{$rule->{"prods"}} ) { |
| if ($hasempty) { |
| _error("Production " . $prod->describe . " for \"$rule->{name}\" |
| will never be reached (preceding empty production will |
| always match first)."); |
| _hint("Reorder the grammar so that the empty production |
| is last in the list or productions."); |
| last; |
| } |
| $hasempty ||= $prod->isempty(); |
| } |
| } |
| } |
| } |
| |
| # GENERATE ACTUAL PARSER CODE |
| |
| sub _code($) |
| { |
| my $self = shift; |
| my $initial_skip = defined($self->{skip}) ? $self->{skip} : $skip; |
| |
| my $code = qq{ |
| package $self->{namespace}; |
| use strict; |
| use vars qw(\$skip \$AUTOLOAD $self->{localvars} ); |
| \@$self->{namespace}\::ISA = (); |
| \$skip = '$initial_skip'; |
| $self->{startcode} |
| |
| { |
| local \$SIG{__WARN__} = sub {0}; |
| # PRETEND TO BE IN Parse::RecDescent NAMESPACE |
| *$self->{namespace}::AUTOLOAD = sub |
| { |
| no strict 'refs'; |
| \$AUTOLOAD =~ s/^$self->{namespace}/Parse::RecDescent/; |
| goto &{\$AUTOLOAD}; |
| } |
| } |
| |
| }; |
| $code .= "push \@$self->{namespace}\::ISA, 'Parse::RecDescent';"; |
| $self->{"startcode"} = ''; |
| |
| my $rule; |
| foreach $rule ( values %{$self->{"rules"}} ) |
| { |
| if ($rule->{"changed"}) |
| { |
| $code .= $rule->code($self->{"namespace"},$self); |
| $rule->{"changed"} = 0; |
| } |
| } |
| |
| return $code; |
| } |
| |
| |
| # EXECUTING A PARSE.... |
| |
| sub AUTOLOAD # ($parser, $text; $linenum, @args) |
| { |
| croak "Could not find method: $AUTOLOAD\n" unless ref $_[0]; |
| my $class = ref($_[0]) || $_[0]; |
| my $text = ref($_[1]) eq 'SCALAR' ? ${$_[1]} : "$_[1]"; |
| $_[0]->{lastlinenum} = $_[2]||_linecount($_[1]); |
| $_[0]->{lastlinenum} = _linecount($_[1]); |
| $_[0]->{lastlinenum} += ($_[2]||0) if @_ > 2; |
| $_[0]->{offsetlinenum} = $_[0]->{lastlinenum}; |
| $_[0]->{fulltext} = $text; |
| $_[0]->{fulltextlen} = length $text; |
| $_[0]->{linecounter_cache} = {}; |
| $_[0]->{deferred} = []; |
| $_[0]->{errors} = []; |
| my @args = @_[3..$#_]; |
| my $args = sub { [ @args ] }; |
| |
| $AUTOLOAD =~ s/$class/$_[0]->{namespace}/; |
| no strict "refs"; |
| |
| local $::RD_WARN = $::RD_WARN || $_[0]->{__WARN__}; |
| local $::RD_HINT = $::RD_HINT || $_[0]->{__HINT__}; |
| local $::RD_TRACE = $::RD_TRACE || $_[0]->{__TRACE__}; |
| |
| croak "Unknown starting rule ($AUTOLOAD) called\n" |
| unless defined &$AUTOLOAD; |
| my $retval = &{$AUTOLOAD}($_[0],$text,undef,undef,undef,$args); |
| |
| if (defined $retval) |
| { |
| foreach ( @{$_[0]->{deferred}} ) { &$_; } |
| } |
| else |
| { |
| foreach ( @{$_[0]->{errors}} ) { _error(@$_); } |
| } |
| |
| if (ref $_[1] eq 'SCALAR') { ${$_[1]} = $text } |
| |
| $ERRORS = 0; |
| return $retval; |
| } |
| |
| sub _parserepeat($$$$$$$$$) # RETURNS A REF TO AN ARRAY OF MATCHES |
| { |
| my ($parser, $text, $prod, $min, $max, $_noactions, $_itempos, $expectation, $argcode) = @_; |
| my @tokens = (); |
| |
| my $itemposfirst; |
| my $reps; |
| for ($reps=0; $reps<$max;) |
| { |
| $expectation->at($text); |
| my $_savetext = $text; |
| my $prevtextlen = length $text; |
| my $_tok; |
| if (! defined ($_tok = &$prod($parser,$text,1,$_noactions,$_itempos,$argcode))) |
| { |
| $text = $_savetext; |
| last; |
| } |
| |
| if (defined($_itempos) and !defined($itemposfirst)) |
| { |
| $itemposfirst = Parse::RecDescent::Production::_duplicate_itempos($_itempos); |
| } |
| |
| push @tokens, $_tok if defined $_tok; |
| last if ++$reps >= $min and $prevtextlen == length $text; |
| } |
| |
| do { $expectation->failed(); return undef} if $reps<$min; |
| |
| if (defined $itemposfirst) |
| { |
| Parse::RecDescent::Production::_update_itempos($_itempos, $itemposfirst, undef, [qw(from)]); |
| } |
| |
| $_[1] = $text; |
| return [@tokens]; |
| } |
| |
| sub set_autoflush { |
| my $orig_selected = select $_[0]; |
| $| = 1; |
| select $orig_selected; |
| return; |
| } |
| |
| # ERROR REPORTING.... |
| |
| sub _write_ERROR { |
| my ($errorprefix, $errortext) = @_; |
| return if $errortext !~ /\S/; |
| $errorprefix =~ s/\s+\Z//; |
| local $^A = q{}; |
| |
| formline(<<'END_FORMAT', $errorprefix, $errortext); |
| @>>>>>>>>>>>>>>>>>>>>: ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< |
| END_FORMAT |
| formline(<<'END_FORMAT', $errortext); |
| ~~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< |
| END_FORMAT |
| print {*STDERR} $^A; |
| } |
| |
| # TRACING |
| |
| my $TRACE_FORMAT = <<'END_FORMAT'; |
| @>|@|||||||||@^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<| |
| | ~~ |^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<| |
| END_FORMAT |
| |
| my $TRACECONTEXT_FORMAT = <<'END_FORMAT'; |
| @>|@|||||||||@ |^<<<<<<<<<<<<<<<<<<<<<<<<<<< |
| | ~~ | |^<<<<<<<<<<<<<<<<<<<<<<<<<<< |
| END_FORMAT |
| |
| sub _write_TRACE { |
| my ($tracelevel, $tracerulename, $tracemsg) = @_; |
| return if $tracemsg !~ /\S/; |
| $tracemsg =~ s/\s*\Z//; |
| local $^A = q{}; |
| my $bar = '|'; |
| formline($TRACE_FORMAT, $tracelevel, $tracerulename, $bar, $tracemsg, $tracemsg); |
| print {*STDERR} $^A; |
| } |
| |
| sub _write_TRACECONTEXT { |
| my ($tracelevel, $tracerulename, $tracecontext) = @_; |
| return if $tracecontext !~ /\S/; |
| $tracecontext =~ s/\s*\Z//; |
| local $^A = q{}; |
| my $bar = '|'; |
| formline($TRACECONTEXT_FORMAT, $tracelevel, $tracerulename, $bar, $tracecontext, $tracecontext); |
| print {*STDERR} $^A; |
| } |
| |
| sub _verbosity($) |
| { |
| defined $::RD_TRACE |
| or defined $::RD_HINT and $::RD_HINT and $_[0] =~ /ERRORS|WARN|HINT/ |
| or defined $::RD_WARN and $::RD_WARN and $_[0] =~ /ERRORS|WARN/ |
| or defined $::RD_ERRORS and $::RD_ERRORS and $_[0] =~ /ERRORS/ |
| } |
| |
| sub _error($;$) |
| { |
| $ERRORS++; |
| return 0 if ! _verbosity("ERRORS"); |
| my $errortext = $_[0]; |
| my $errorprefix = "ERROR" . ($_[1] ? " (line $_[1])" : ""); |
| $errortext =~ s/\s+/ /g; |
| print {*STDERR} "\n" if _verbosity("WARN"); |
| _write_ERROR($errorprefix, $errortext); |
| return 1; |
| } |
| |
| sub _warn($$;$) |
| { |
| return 0 unless _verbosity("WARN") && ($::RD_HINT || $_[0] >= ($::RD_WARN||1)); |
| my $errortext = $_[1]; |
| my $errorprefix = "Warning" . ($_[2] ? " (line $_[2])" : ""); |
| print {*STDERR} "\n" if _verbosity("HINT"); |
| $errortext =~ s/\s+/ /g; |
| _write_ERROR($errorprefix, $errortext); |
| return 1; |
| } |
| |
| sub _hint($) |
| { |
| return 0 unless $::RD_HINT; |
| my $errortext = $_[0]; |
| my $errorprefix = "Hint" . ($_[1] ? " (line $_[1])" : ""); |
| $errortext =~ s/\s+/ /g; |
| _write_ERROR($errorprefix, $errortext); |
| return 1; |
| } |
| |
| sub _tracemax($) |
| { |
| if (defined $::RD_TRACE |
| && $::RD_TRACE =~ /\d+/ |
| && $::RD_TRACE>1 |
| && $::RD_TRACE+10<length($_[0])) |
| { |
| my $count = length($_[0]) - $::RD_TRACE; |
| return substr($_[0],0,$::RD_TRACE/2) |
| . "...<$count>..." |
| . substr($_[0],-$::RD_TRACE/2); |
| } |
| else |
| { |
| return substr($_[0],0,500); |
| } |
| } |
| |
| sub _tracefirst($) |
| { |
| if (defined $::RD_TRACE |
| && $::RD_TRACE =~ /\d+/ |
| && $::RD_TRACE>1 |
| && $::RD_TRACE+10<length($_[0])) |
| { |
| my $count = length($_[0]) - $::RD_TRACE; |
| return substr($_[0],0,$::RD_TRACE) . "...<+$count>"; |
| } |
| else |
| { |
| return substr($_[0],0,500); |
| } |
| } |
| |
| my $lastcontext = ''; |
| my $lastrulename = ''; |
| my $lastlevel = ''; |
| |
| sub _trace($;$$$) |
| { |
| my $tracemsg = $_[0]; |
| my $tracecontext = $_[1]||$lastcontext; |
| my $tracerulename = $_[2]||$lastrulename; |
| my $tracelevel = $_[3]||$lastlevel; |
| if ($tracerulename) { $lastrulename = $tracerulename } |
| if ($tracelevel) { $lastlevel = $tracelevel } |
| |
| $tracecontext =~ s/\n/\\n/g; |
| $tracecontext =~ s/\s+/ /g; |
| $tracerulename = qq{$tracerulename}; |
| _write_TRACE($tracelevel, $tracerulename, $tracemsg); |
| if ($tracecontext ne $lastcontext) |
| { |
| if ($tracecontext) |
| { |
| $lastcontext = _tracefirst($tracecontext); |
| $tracecontext = qq{"$tracecontext"}; |
| } |
| else |
| { |
| $tracecontext = qq{<NO TEXT LEFT>}; |
| } |
| _write_TRACECONTEXT($tracelevel, $tracerulename, $tracecontext); |
| } |
| } |
| |
| sub _matchtracemessage |
| { |
| my ($self, $reject) = @_; |
| |
| my $prefix = ''; |
| my $postfix = ''; |
| my $matched = not $reject; |
| my @t = ("Matched", "Didn't match"); |
| if (exists $self->{lookahead} and $self->{lookahead}) |
| { |
| $postfix = $reject ? "(reject)" : "(keep)"; |
| $prefix = "..."; |
| if ($self->{lookahead} < 0) |
| { |
| $prefix .= '!'; |
| $matched = not $matched; |
| } |
| } |
| $prefix . ($matched ? $t[0] : $t[1]) . $postfix; |
| } |
| |
| sub _parseunneg($$$$$) |
| { |
| _parse($_[0],$_[1],$_[3],$_[4]); |
| if ($_[2]<0) |
| { |
| _error("Can't negate \"$_[4]\".",$_[3]); |
| _hint("You can't negate $_[0]. Remove the \"...!\" before |
| \"$_[4]\"."); |
| return 0; |
| } |
| return 1; |
| } |
| |
| sub _parse($$$$) |
| { |
| my $what = $_[3]; |
| $what =~ s/^\s+//; |
| if ($_[1]) |
| { |
| _warn(3,"Found $_[0] ($what) after an unconditional <error>",$_[2]) |
| and |
| _hint("An unconditional <error> always causes the |
| production containing it to immediately fail. |
| \u$_[0] that follows an <error> |
| will never be reached. Did you mean to use |
| <error?> instead?"); |
| } |
| |
| return if ! _verbosity("TRACE"); |
| my $errortext = "Treating \"$what\" as $_[0]"; |
| my $errorprefix = "Parse::RecDescent"; |
| $errortext =~ s/\s+/ /g; |
| _write_ERROR($errorprefix, $errortext); |
| } |
| |
| sub _linecount($) { |
| scalar substr($_[0], pos $_[0]||0) =~ tr/\n// |
| } |
| |
| |
| package main; |
| |
| use vars qw ( $RD_ERRORS $RD_WARN $RD_HINT $RD_TRACE $RD_CHECK ); |
| $::RD_CHECK = 1; |
| $::RD_ERRORS = 1; |
| $::RD_WARN = 3; |
| |
| 1; |
| |
| __END__ |
| |
| =head1 NAME |
| |
| Parse::RecDescent - Generate Recursive-Descent Parsers |
| |
| =head1 VERSION |
| |
| This document describes version 1.967006 of Parse::RecDescent |
| released January 29th, 2012. |
| |
| =head1 SYNOPSIS |
| |
| use Parse::RecDescent; |
| |
| # Generate a parser from the specification in $grammar: |
| |
| $parser = new Parse::RecDescent ($grammar); |
| |
| # Generate a parser from the specification in $othergrammar |
| |
| $anotherparser = new Parse::RecDescent ($othergrammar); |
| |
| |
| # Parse $text using rule 'startrule' (which must be |
| # defined in $grammar): |
| |
| $parser->startrule($text); |
| |
| |
| # Parse $text using rule 'otherrule' (which must also |
| # be defined in $grammar): |
| |
| $parser->otherrule($text); |
| |
| |
| # Change the universal token prefix pattern |
| # before building a grammar |
| # (the default is: '\s*'): |
| |
| $Parse::RecDescent::skip = '[ \t]+'; |
| |
| |
| # Replace productions of existing rules (or create new ones) |
| # with the productions defined in $newgrammar: |
| |
| $parser->Replace($newgrammar); |
| |
| |
| # Extend existing rules (or create new ones) |
| # by adding extra productions defined in $moregrammar: |
| |
| $parser->Extend($moregrammar); |
| |
| |
| # Global flags (useful as command line arguments under -s): |
| |
| $::RD_ERRORS # unless undefined, report fatal errors |
| $::RD_WARN # unless undefined, also report non-fatal problems |
| $::RD_HINT # if defined, also suggestion remedies |
| $::RD_TRACE # if defined, also trace parsers' behaviour |
| $::RD_AUTOSTUB # if defined, generates "stubs" for undefined rules |
| $::RD_AUTOACTION # if defined, appends specified action to productions |
| |
| |
| =head1 DESCRIPTION |
| |
| =head2 Overview |
| |
| Parse::RecDescent incrementally generates top-down recursive-descent text |
| parsers from simple I<yacc>-like grammar specifications. It provides: |
| |
| =over 4 |
| |
| =item * |
| |
| Regular expressions or literal strings as terminals (tokens), |
| |
| =item * |
| |
| Multiple (non-contiguous) productions for any rule, |
| |
| =item * |
| |
| Repeated and optional subrules within productions, |
| |
| =item * |
| |
| Full access to Perl within actions specified as part of the grammar, |
| |
| =item * |
| |
| Simple automated error reporting during parser generation and parsing, |
| |
| =item * |
| |
| The ability to commit to, uncommit to, or reject particular |
| productions during a parse, |
| |
| =item * |
| |
| The ability to pass data up and down the parse tree ("down" via subrule |
| argument lists, "up" via subrule return values) |
| |
| =item * |
| |
| Incremental extension of the parsing grammar (even during a parse), |
| |
| =item * |
| |
| Precompilation of parser objects, |
| |
| =item * |
| |
| User-definable reduce-reduce conflict resolution via |
| "scoring" of matching productions. |
| |
| =back |
| |
| =head2 Using C<Parse::RecDescent> |
| |
| Parser objects are created by calling C<Parse::RecDescent::new>, passing in a |
| grammar specification (see the following subsections). If the grammar is |
| correct, C<new> returns a blessed reference which can then be used to initiate |
| parsing through any rule specified in the original grammar. A typical sequence |
| looks like this: |
| |
| $grammar = q { |
| # GRAMMAR SPECIFICATION HERE |
| }; |
| |
| $parser = new Parse::RecDescent ($grammar) or die "Bad grammar!\n"; |
| |
| # acquire $text |
| |
| defined $parser->startrule($text) or print "Bad text!\n"; |
| |
| The rule through which parsing is initiated must be explicitly defined |
| in the grammar (i.e. for the above example, the grammar must include a |
| rule of the form: "startrule: <subrules>". |
| |
| If the starting rule succeeds, its value (see below) |
| is returned. Failure to generate the original parser or failure to match a text |
| is indicated by returning C<undef>. Note that it's easy to set up grammars |
| that can succeed, but which return a value of 0, "0", or "". So don't be |
| tempted to write: |
| |
| $parser->startrule($text) or print "Bad text!\n"; |
| |
| Normally, the parser has no effect on the original text. So in the |
| previous example the value of $text would be unchanged after having |
| been parsed. |
| |
| If, however, the text to be matched is passed by reference: |
| |
| $parser->startrule(\$text) |
| |
| then any text which was consumed during the match will be removed from the |
| start of $text. |
| |
| |
| =head2 Rules |
| |
| In the grammar from which the parser is built, rules are specified by |
| giving an identifier (which must satisfy /[A-Za-z]\w*/), followed by a |
| colon I<on the same line>, followed by one or more productions, |
| separated by single vertical bars. The layout of the productions |
| is entirely free-format: |
| |
| rule1: production1 |
| | production2 | |
| production3 | production4 |
| |
| At any point in the grammar previously defined rules may be extended with |
| additional productions. This is achieved by redeclaring the rule with the new |
| productions. Thus: |
| |
| rule1: a | b | c |
| rule2: d | e | f |
| rule1: g | h |
| |
| is exactly equivalent to: |
| |
| rule1: a | b | c | g | h |
| rule2: d | e | f |
| |
| Each production in a rule consists of zero or more items, each of which |
| may be either: the name of another rule to be matched (a "subrule"), |
| a pattern or string literal to be matched directly (a "token"), a |
| block of Perl code to be executed (an "action"), a special instruction |
| to the parser (a "directive"), or a standard Perl comment (which is |
| ignored). |
| |
| A rule matches a text if one of its productions matches. A production |
| matches if each of its items match consecutive substrings of the |
| text. The productions of a rule being matched are tried in the same |
| order that they appear in the original grammar, and the first matching |
| production terminates the match attempt (successfully). If all |
| productions are tried and none matches, the match attempt fails. |
| |
| Note that this behaviour is quite different from the "prefer the longer match" |
| behaviour of I<yacc>. For example, if I<yacc> were parsing the rule: |
| |
| seq : 'A' 'B' |
| | 'A' 'B' 'C' |
| |
| upon matching "AB" it would look ahead to see if a 'C' is next and, if |
| so, will match the second production in preference to the first. In |
| other words, I<yacc> effectively tries all the productions of a rule |
| breadth-first in parallel, and selects the "best" match, where "best" |
| means longest (note that this is a gross simplification of the true |
| behaviour of I<yacc> but it will do for our purposes). |
| |
| In contrast, C<Parse::RecDescent> tries each production depth-first in |
| sequence, and selects the "best" match, where "best" means first. This is |
| the fundamental difference between "bottom-up" and "recursive descent" |
| parsing. |
| |
| Each successfully matched item in a production is assigned a value, |
| which can be accessed in subsequent actions within the same |
| production (or, in some cases, as the return value of a successful |
| subrule call). Unsuccessful items don't have an associated value, |
| since the failure of an item causes the entire surrounding production |
| to immediately fail. The following sections describe the various types |
| of items and their success values. |
| |
| |
| =head2 Subrules |
| |
| A subrule which appears in a production is an instruction to the parser to |
| attempt to match the named rule at that point in the text being |
| parsed. If the named subrule is not defined when requested the |
| production containing it immediately fails (unless it was "autostubbed" - see |
| L<Autostubbing>). |
| |
| A rule may (recursively) call itself as a subrule, but I<not> as the |
| left-most item in any of its productions (since such recursions are usually |
| non-terminating). |
| |
| The value associated with a subrule is the value associated with its |
| C<$return> variable (see L<"Actions"> below), or with the last successfully |
| matched item in the subrule match. |
| |
| Subrules may also be specified with a trailing repetition specifier, |
| indicating that they are to be (greedily) matched the specified number |
| of times. The available specifiers are: |
| |
| subrule(?) # Match one-or-zero times |
| subrule(s) # Match one-or-more times |
| subrule(s?) # Match zero-or-more times |
| subrule(N) # Match exactly N times for integer N > 0 |
| subrule(N..M) # Match between N and M times |
| subrule(..M) # Match between 1 and M times |
| subrule(N..) # Match at least N times |
| |
| Repeated subrules keep matching until either the subrule fails to |
| match, or it has matched the minimal number of times but fails to |
| consume any of the parsed text (this second condition prevents the |
| subrule matching forever in some cases). |
| |
| Since a repeated subrule may match many instances of the subrule itself, the |
| value associated with it is not a simple scalar, but rather a reference to a |
| list of scalars, each of which is the value associated with one of the |
| individual subrule matches. In other words in the rule: |
| |
| program: statement(s) |
| |
| the value associated with the repeated subrule "statement(s)" is a reference |
| to an array containing the values matched by each call to the individual |
| subrule "statement". |
| |
| Repetition modifiers may include a separator pattern: |
| |
| program: statement(s /;/) |
| |
| specifying some sequence of characters to be skipped between each repetition. |
| This is really just a shorthand for the E<lt>leftop:...E<gt> directive |
| (see below). |
| |
| =head2 Tokens |
| |
| If a quote-delimited string or a Perl regex appears in a production, |
| the parser attempts to match that string or pattern at that point in |
| the text. For example: |
| |
| typedef: "typedef" typename identifier ';' |
| |
| identifier: /[A-Za-z_][A-Za-z0-9_]*/ |
| |
| As in regular Perl, a single quoted string is uninterpolated, whilst |
| a double-quoted string or a pattern is interpolated (at the time |
| of matching, I<not> when the parser is constructed). Hence, it is |
| possible to define rules in which tokens can be set at run-time: |
| |
| typedef: "$::typedefkeyword" typename identifier ';' |
| |
| identifier: /$::identpat/ |
| |
| Note that, since each rule is implemented inside a special namespace |
| belonging to its parser, it is necessary to explicitly quantify |
| variables from the main package. |
| |
| Regex tokens can be specified using just slashes as delimiters |
| or with the explicit C<mE<lt>delimiterE<gt>......E<lt>delimiterE<gt>> syntax: |
| |
| typedef: "typedef" typename identifier ';' |
| |
| typename: /[A-Za-z_][A-Za-z0-9_]*/ |
| |
| identifier: m{[A-Za-z_][A-Za-z0-9_]*} |
| |
| A regex of either type can also have any valid trailing parameter(s) |
| (that is, any of [cgimsox]): |
| |
| typedef: "typedef" typename identifier ';' |
| |
| identifier: / [a-z_] # LEADING ALPHA OR UNDERSCORE |
| [a-z0-9_]* # THEN DIGITS ALSO ALLOWED |
| /ix # CASE/SPACE/COMMENT INSENSITIVE |
| |
| The value associated with any successfully matched token is a string |
| containing the actual text which was matched by the token. |
| |
| It is important to remember that, since each grammar is specified in a |
| Perl string, all instances of the universal escape character '\' within |
| a grammar must be "doubled", so that they interpolate to single '\'s when |
| the string is compiled. For example, to use the grammar: |
| |
| word: /\S+/ | backslash |
| line: prefix word(s) "\n" |
| backslash: '\\' |
| |
| the following code is required: |
| |
| $parser = new Parse::RecDescent (q{ |
| |
| word: /\\S+/ | backslash |
| line: prefix word(s) "\\n" |
| backslash: '\\\\' |
| |
| }); |
| |
| =head2 Anonymous subrules |
| |
| Parentheses introduce a nested scope that is very like a call to an anonymous |
| subrule. Hence they are useful for "in-lining" subroutine calls, and other |
| kinds of grouping behaviour. For example, instead of: |
| |
| word: /\S+/ | backslash |
| line: prefix word(s) "\n" |
| |
| you could write: |
| |
| line: prefix ( /\S+/ | backslash )(s) "\n" |
| |
| and get exactly the same effects. |
| |
| Parentheses are also use for collecting unrepeated alternations within a |
| single production. |
| |
| secret_identity: "Mr" ("Incredible"|"Fantastic"|"Sheen") ", Esq." |
| |
| |
| =head2 Terminal Separators |
| |
| For the purpose of matching, each terminal in a production is considered |
| to be preceded by a "prefix" - a pattern which must be |
| matched before a token match is attempted. By default, the |
| prefix is optional whitespace (which always matches, at |
| least trivially), but this default may be reset in any production. |
| |
| The variable C<$Parse::RecDescent::skip> stores the universal |
| prefix, which is the default for all terminal matches in all parsers |
| built with C<Parse::RecDescent>. |
| |
| If you want to change the universal prefix using |
| C<$Parse::RecDescent::skip>, be careful to set it I<before> creating |
| the grammar object, because it is applied statically (when a grammar |
| is built) rather than dynamically (when the grammar is used). |
| Alternatively you can provide a global C<E<lt>skip:...E<gt>> directive |
| in your grammar before any rules (described later). |
| |
| The prefix for an individual production can be altered |
| by using the C<E<lt>skip:...E<gt>> directive (described later). |
| Setting this directive in the top-level rule is an alternative approach |
| to setting C<$Parse::RecDescent::skip> before creating the object, but |
| in this case you don't get the intended skipping behaviour if you |
| directly invoke methods different from the top-level rule. |
| |
| |
| =head2 Actions |
| |
| An action is a block of Perl code which is to be executed (as the |
| block of a C<do> statement) when the parser reaches that point in a |
| production. The action executes within a special namespace belonging to |
| the active parser, so care must be taken in correctly qualifying variable |
| names (see also L<Start-up Actions> below). |
| |
| The action is considered to succeed if the final value of the block |
| is defined (that is, if the implied C<do> statement evaluates to a |
| defined value - I<even one which would be treated as "false">). Note |
| that the value associated with a successful action is also the final |
| value in the block. |
| |
| An action will I<fail> if its last evaluated value is C<undef>. This is |
| surprisingly easy to accomplish by accident. For instance, here's an |
| infuriating case of an action that makes its production fail, but only |
| when debugging I<isn't> activated: |
| |
| description: name rank serial_number |
| { print "Got $item[2] $item[1] ($item[3])\n" |
| if $::debugging |
| } |
| |
| If C<$debugging> is false, no statement in the block is executed, so |
| the final value is C<undef>, and the entire production fails. The solution is: |
| |
| description: name rank serial_number |
| { print "Got $item[2] $item[1] ($item[3])\n" |
| if $::debugging; |
| 1; |
| } |
| |
| Within an action, a number of useful parse-time variables are |
| available in the special parser namespace (there are other variables |
| also accessible, but meddling with them will probably just break your |
| parser. As a general rule, if you avoid referring to unqualified |
| variables - especially those starting with an underscore - inside an action, |
| things should be okay): |
| |
| =over 4 |
| |
| =item C<@item> and C<%item> |
| |
| The array slice C<@item[1..$#item]> stores the value associated with each item |
| (that is, each subrule, token, or action) in the current production. The |
| analogy is to C<$1>, C<$2>, etc. in a I<yacc> grammar. |
| Note that, for obvious reasons, C<@item> only contains the |
| values of items I<before> the current point in the production. |
| |
| The first element (C<$item[0]>) stores the name of the current rule |
| being matched. |
| |
| C<@item> is a standard Perl array, so it can also be indexed with negative |
| numbers, representing the number of items I<back> from the current position in |
| the parse: |
| |
| stuff: /various/ bits 'and' pieces "then" data 'end' |
| { print $item[-2] } # PRINTS data |
| # (EASIER THAN: $item[6]) |
| |
| The C<%item> hash complements the <@item> array, providing named |
| access to the same item values: |
| |
| stuff: /various/ bits 'and' pieces "then" data 'end' |
| { print $item{data} # PRINTS data |
| # (EVEN EASIER THAN USING @item) |
| |
| |
| The results of named subrules are stored in the hash under each |
| subrule's name (including the repetition specifier, if any), |
| whilst all other items are stored under a "named |
| positional" key that indictates their ordinal position within their item |
| type: __STRINGI<n>__, __PATTERNI<n>__, __DIRECTIVEI<n>__, __ACTIONI<n>__: |
| |
| stuff: /various/ bits 'and' pieces "then" data 'end' { save } |
| { print $item{__PATTERN1__}, # PRINTS 'various' |
| $item{__STRING2__}, # PRINTS 'then' |
| $item{__ACTION1__}, # PRINTS RETURN |
| # VALUE OF save |
| } |
| |
| |
| If you want proper I<named> access to patterns or literals, you need to turn |
| them into separate rules: |
| |
| stuff: various bits 'and' pieces "then" data 'end' |
| { print $item{various} # PRINTS various |
| } |
| |
| various: /various/ |
| |
| |
| The special entry C<$item{__RULE__}> stores the name of the current |
| rule (i.e. the same value as C<$item[0]>. |
| |
| The advantage of using C<%item>, instead of C<@items> is that it |
| removes the need to track items positions that may change as a grammar |
| evolves. For example, adding an interim C<E<lt>skipE<gt>> directive |
| of action can silently ruin a trailing action, by moving an C<@item> |
| element "down" the array one place. In contrast, the named entry |
| of C<%item> is unaffected by such an insertion. |
| |
| A limitation of the C<%item> hash is that it only records the I<last> |
| value of a particular subrule. For example: |
| |
| range: '(' number '..' number )' |
| { $return = $item{number} } |
| |
| will return only the value corresponding to the I<second> match of the |
| C<number> subrule. In other words, successive calls to a subrule |
| overwrite the corresponding entry in C<%item>. Once again, the |
| solution is to rename each subrule in its own rule: |
| |
| range: '(' from_num '..' to_num ')' |
| { $return = $item{from_num} } |
| |
| from_num: number |
| to_num: number |
| |
| |
| |
| =item C<@arg> and C<%arg> |
| |
| The array C<@arg> and the hash C<%arg> store any arguments passed to |
| the rule from some other rule (see L<Subrule argument lists>). Changes |
| to the elements of either variable do not propagate back to the calling |
| rule (data can be passed back from a subrule via the C<$return> |
| variable - see next item). |
| |
| |
| =item C<$return> |
| |
| If a value is assigned to C<$return> within an action, that value is |
| returned if the production containing the action eventually matches |
| successfully. Note that setting C<$return> I<doesn't> cause the current |
| production to succeed. It merely tells it what to return if it I<does> succeed. |
| Hence C<$return> is analogous to C<$$> in a I<yacc> grammar. |
| |
| If C<$return> is not assigned within a production, the value of the |
| last component of the production (namely: C<$item[$#item]>) is |
| returned if the production succeeds. |
| |
| |
| =item C<$commit> |
| |
| The current state of commitment to the current production (see L<"Directives"> |
| below). |
| |
| =item C<$skip> |
| |
| The current terminal prefix (see L<"Directives"> below). |
| |
| =item C<$text> |
| |
| The remaining (unparsed) text. Changes to C<$text> I<do not |
| propagate> out of unsuccessful productions, but I<do> survive |
| successful productions. Hence it is possible to dynamically alter the |
| text being parsed - for example, to provide a C<#include>-like facility: |
| |
| hash_include: '#include' filename |
| { $text = ::loadfile($item[2]) . $text } |
| |
| filename: '<' /[a-z0-9._-]+/i '>' { $return = $item[2] } |
| | '"' /[a-z0-9._-]+/i '"' { $return = $item[2] } |
| |
| |
| =item C<$thisline> and C<$prevline> |
| |
| C<$thisline> stores the current line number within the current parse |
| (starting from 1). C<$prevline> stores the line number for the last |
| character which was already successfully parsed (this will be different from |
| C<$thisline> at the end of each line). |
| |
| For efficiency, C<$thisline> and C<$prevline> are actually tied |
| hashes, and only recompute the required line number when the variable's |
| value is used. |
| |
| Assignment to C<$thisline> adjusts the line number calculator, so that |
| it believes that the current line number is the value being assigned. Note |
| that this adjustment will be reflected in all subsequent line numbers |
| calculations. |
| |
| Modifying the value of the variable C<$text> (as in the previous |
| C<hash_include> example, for instance) will confuse the line |
| counting mechanism. To prevent this, you should call |
| C<Parse::RecDescent::LineCounter::resync($thisline)> I<immediately> |
| after any assignment to the variable C<$text> (or, at least, before the |
| next attempt to use C<$thisline>). |
| |
| Note that if a production fails after assigning to or |
| resync'ing C<$thisline>, the parser's line counter mechanism will |
| usually be corrupted. |
| |
| Also see the entry for C<@itempos>. |
| |
| The line number can be set to values other than 1, by calling the start |
| rule with a second argument. For example: |
| |
| $parser = new Parse::RecDescent ($grammar); |
| |
| $parser->input($text, 10); # START LINE NUMBERS AT 10 |
| |
| |
| =item C<$thiscolumn> and C<$prevcolumn> |
| |
| C<$thiscolumn> stores the current column number within the current line |
| being parsed (starting from 1). C<$prevcolumn> stores the column number |
| of the last character which was actually successfully parsed. Usually |
| C<$prevcolumn == $thiscolumn-1>, but not at the end of lines. |
| |
| For efficiency, C<$thiscolumn> and C<$prevcolumn> are |
| actually tied hashes, and only recompute the required column number |
| when the variable's value is used. |
| |
| Assignment to C<$thiscolumn> or C<$prevcolumn> is a fatal error. |
| |
| Modifying the value of the variable C<$text> (as in the previous |
| C<hash_include> example, for instance) may confuse the column |
| counting mechanism. |
| |
| Note that C<$thiscolumn> reports the column number I<before> any |
| whitespace that might be skipped before reading a token. Hence |
| if you wish to know where a token started (and ended) use something like this: |
| |
| rule: token1 token2 startcol token3 endcol token4 |
| { print "token3: columns $item[3] to $item[5]"; } |
| |
| startcol: '' { $thiscolumn } # NEED THE '' TO STEP PAST TOKEN SEP |
| endcol: { $prevcolumn } |
| |
| Also see the entry for C<@itempos>. |
| |
| =item C<$thisoffset> and C<$prevoffset> |
| |
| C<$thisoffset> stores the offset of the current parsing position |
| within the complete text |
| being parsed (starting from 0). C<$prevoffset> stores the offset |
| of the last character which was actually successfully parsed. In all |
| cases C<$prevoffset == $thisoffset-1>. |
| |
| For efficiency, C<$thisoffset> and C<$prevoffset> are |
| actually tied hashes, and only recompute the required offset |
| when the variable's value is used. |
| |
| Assignment to C<$thisoffset> or <$prevoffset> is a fatal error. |
| |
| Modifying the value of the variable C<$text> will I<not> affect the |
| offset counting mechanism. |
| |
| Also see the entry for C<@itempos>. |
| |
| =item C<@itempos> |
| |
| The array C<@itempos> stores a hash reference corresponding to |
| each element of C<@item>. The elements of the hash provide the |
| following: |
| |
| $itempos[$n]{offset}{from} # VALUE OF $thisoffset BEFORE $item[$n] |
| $itempos[$n]{offset}{to} # VALUE OF $prevoffset AFTER $item[$n] |
| $itempos[$n]{line}{from} # VALUE OF $thisline BEFORE $item[$n] |
| $itempos[$n]{line}{to} # VALUE OF $prevline AFTER $item[$n] |
| $itempos[$n]{column}{from} # VALUE OF $thiscolumn BEFORE $item[$n] |
| $itempos[$n]{column}{to} # VALUE OF $prevcolumn AFTER $item[$n] |
| |
| Note that the various C<$itempos[$n]...{from}> values record the |
| appropriate value I<after> any token prefix has been skipped. |
| |
| Hence, instead of the somewhat tedious and error-prone: |
| |
| rule: startcol token1 endcol |
| startcol token2 endcol |
| startcol token3 endcol |
| { print "token1: columns $item[1] |
| to $item[3] |
| token2: columns $item[4] |
| to $item[6] |
| token3: columns $item[7] |
| to $item[9]" } |
| |
| startcol: '' { $thiscolumn } # NEED THE '' TO STEP PAST TOKEN SEP |
| endcol: { $prevcolumn } |
| |
| it is possible to write: |
| |
| rule: token1 token2 token3 |
| { print "token1: columns $itempos[1]{column}{from} |
| to $itempos[1]{column}{to} |
| token2: columns $itempos[2]{column}{from} |
| to $itempos[2]{column}{to} |
| token3: columns $itempos[3]{column}{from} |
| to $itempos[3]{column}{to}" } |
| |
| Note however that (in the current implementation) the use of C<@itempos> |
| anywhere in a grammar implies that item positioning information is |
| collected I<everywhere> during the parse. Depending on the grammar |
| and the size of the text to be parsed, this may be prohibitively |
| expensive and the explicit use of C<$thisline>, C<$thiscolumn>, etc. may |
| be a better choice. |
| |
| |
| =item C<$thisparser> |
| |
| A reference to the S<C<Parse::RecDescent>> object through which |
| parsing was initiated. |
| |
| The value of C<$thisparser> propagates down the subrules of a parse |
| but not back up. Hence, you can invoke subrules from another parser |
| for the scope of the current rule as follows: |
| |
| rule: subrule1 subrule2 |
| | { $thisparser = $::otherparser } <reject> |
| | subrule3 subrule4 |
| | subrule5 |
| |
| The result is that the production calls "subrule1" and "subrule2" of |
| the current parser, and the remaining productions call the named subrules |
| from C<$::otherparser>. Note, however that "Bad Things" will happen if |
| C<::otherparser> isn't a blessed reference and/or doesn't have methods |
| with the same names as the required subrules! |
| |
| =item C<$thisrule> |
| |
| A reference to the S<C<Parse::RecDescent::Rule>> object corresponding to the |
| rule currently being matched. |
| |
| =item C<$thisprod> |
| |
| A reference to the S<C<Parse::RecDescent::Production>> object |
| corresponding to the production currently being matched. |
| |
| =item C<$score> and C<$score_return> |
| |
| $score stores the best production score to date, as specified by |
| an earlier C<E<lt>score:...E<gt>> directive. $score_return stores |
| the corresponding return value for the successful production. |
| |
| See L<Scored productions>. |
| |
| =back |
| |
| B<Warning:> the parser relies on the information in the various C<this...> |
| objects in some non-obvious ways. Tinkering with the other members of |
| these objects will probably cause Bad Things to happen, unless you |
| I<really> know what you're doing. The only exception to this advice is |
| that the use of C<$this...-E<gt>{local}> is always safe. |
| |
| |
| =head2 Start-up Actions |
| |
| Any actions which appear I<before> the first rule definition in a |
| grammar are treated as "start-up" actions. Each such action is |
| stripped of its outermost brackets and then evaluated (in the parser's |
| special namespace) just before the rules of the grammar are first |
| compiled. |
| |
| The main use of start-up actions is to declare local variables within the |
| parser's special namespace: |
| |
| { my $lastitem = '???'; } |
| |
| list: item(s) { $return = $lastitem } |
| |
| item: book { $lastitem = 'book'; } |
| bell { $lastitem = 'bell'; } |
| candle { $lastitem = 'candle'; } |
| |
| but start-up actions can be used to execute I<any> valid Perl code |
| within a parser's special namespace. |
| |
| Start-up actions can appear within a grammar extension or replacement |
| (that is, a partial grammar installed via C<Parse::RecDescent::Extend()> or |
| C<Parse::RecDescent::Replace()> - see L<Incremental Parsing>), and will be |
| executed before the new grammar is installed. Note, however, that a |
| particular start-up action is only ever executed once. |
| |
| |
| =head2 Autoactions |
| |
| It is sometimes desirable to be able to specify a default action to be |
| taken at the end of every production (for example, in order to easily |
| build a parse tree). If the variable C<$::RD_AUTOACTION> is defined |
| when C<Parse::RecDescent::new()> is called, the contents of that |
| variable are treated as a specification of an action which is to appended |
| to each production in the corresponding grammar. |
| |
| Alternatively, you can hard-code the autoaction within a grammar, using the |
| C<< <autoaction:...> >> directive. |
| |
| So, for example, to construct a simple parse tree you could write: |
| |
| $::RD_AUTOACTION = q { [@item] }; |
| |
| parser = Parse::RecDescent->new(q{ |
| expression: and_expr '||' expression | and_expr |
| and_expr: not_expr '&&' and_expr | not_expr |
| not_expr: '!' brack_expr | brack_expr |
| brack_expr: '(' expression ')' | identifier |
| identifier: /[a-z]+/i |
| }); |
| |
| or: |
| |
| parser = Parse::RecDescent->new(q{ |
| <autoaction: { [@item] } > |
| |
| expression: and_expr '||' expression | and_expr |
| and_expr: not_expr '&&' and_expr | not_expr |
| not_expr: '!' brack_expr | brack_expr |
| brack_expr: '(' expression ')' | identifier |
| identifier: /[a-z]+/i |
| }); |
| |
| Either of these is equivalent to: |
| |
| parser = new Parse::RecDescent (q{ |
| expression: and_expr '||' expression |
| { [@item] } |
| | and_expr |
| { [@item] } |
| |
| and_expr: not_expr '&&' and_expr |
| { [@item] } |
| | not_expr |
| { [@item] } |
| |
| not_expr: '!' brack_expr |
| { [@item] } |
| | brack_expr |
| { [@item] } |
| |
| brack_expr: '(' expression ')' |
| { [@item] } |
| | identifier |
| { [@item] } |
| |
| identifier: /[a-z]+/i |
| { [@item] } |
| }); |
| |
| Alternatively, we could take an object-oriented approach, use different |
| classes for each node (and also eliminating redundant intermediate nodes): |
| |
| $::RD_AUTOACTION = q |
| { $#item==1 ? $item[1] : "$item[0]_node"->new(@item[1..$#item]) }; |
| |
| parser = Parse::RecDescent->new(q{ |
| expression: and_expr '||' expression | and_expr |
| and_expr: not_expr '&&' and_expr | not_expr |
| not_expr: '!' brack_expr | brack_expr |
| brack_expr: '(' expression ')' | identifier |
| identifier: /[a-z]+/i |
| }); |
| |
| or: |
| |
| parser = Parse::RecDescent->new(q{ |
| <autoaction: |
| $#item==1 ? $item[1] : "$item[0]_node"->new(@item[1..$#item]) |
| > |
| |
| expression: and_expr '||' expression | and_expr |
| and_expr: not_expr '&&' and_expr | not_expr |
| not_expr: '!' brack_expr | brack_expr |
| brack_expr: '(' expression ')' | identifier |
| identifier: /[a-z]+/i |
| }); |
| |
| which are equivalent to: |
| |
| parser = Parse::RecDescent->new(q{ |
| expression: and_expr '||' expression |
| { "expression_node"->new(@item[1..3]) } |
| | and_expr |
| |
| and_expr: not_expr '&&' and_expr |
| { "and_expr_node"->new(@item[1..3]) } |
| | not_expr |
| |
| not_expr: '!' brack_expr |
| { "not_expr_node"->new(@item[1..2]) } |
| | brack_expr |
| |
| brack_expr: '(' expression ')' |
| { "brack_expr_node"->new(@item[1..3]) } |
| | identifier |
| |
| identifier: /[a-z]+/i |
| { "identifer_node"->new(@item[1]) } |
| }); |
| |
| Note that, if a production already ends in an action, no autoaction is appended |
| to it. For example, in this version: |
| |
| $::RD_AUTOACTION = q |
| { $#item==1 ? $item[1] : "$item[0]_node"->new(@item[1..$#item]) }; |
| |
| parser = Parse::RecDescent->new(q{ |
| expression: and_expr '&&' expression | and_expr |
| and_expr: not_expr '&&' and_expr | not_expr |
| not_expr: '!' brack_expr | brack_expr |
| brack_expr: '(' expression ')' | identifier |
| identifier: /[a-z]+/i |
| { 'terminal_node'->new($item[1]) } |
| }); |
| |
| each C<identifier> match produces a C<terminal_node> object, I<not> an |
| C<identifier_node> object. |
| |
| A level 1 warning is issued each time an "autoaction" is added to |
| some production. |
| |
| |
| =head2 Autotrees |
| |
| A commonly needed autoaction is one that builds a parse-tree. It is moderately |
| tricky to set up such an action (which must treat terminals differently from |
| non-terminals), so Parse::RecDescent simplifies the process by providing the |
| C<E<lt>autotreeE<gt>> directive. |
| |
| If this directive appears at the start of grammar, it causes |
| Parse::RecDescent to insert autoactions at the end of any rule except |
| those which already end in an action. The action inserted depends on whether |
| the production is an intermediate rule (two or more items), or a terminal |
| of the grammar (i.e. a single pattern or string item). |
| |
| So, for example, the following grammar: |
| |
| <autotree> |
| |
| file : command(s) |
| command : get | set | vet |
| get : 'get' ident ';' |
| set : 'set' ident 'to' value ';' |
| vet : 'check' ident 'is' value ';' |
| ident : /\w+/ |
| value : /\d+/ |
| |
| is equivalent to: |
| |
| file : command(s) { bless \%item, $item[0] } |
| command : get { bless \%item, $item[0] } |
| | set { bless \%item, $item[0] } |
| | vet { bless \%item, $item[0] } |
| get : 'get' ident ';' { bless \%item, $item[0] } |
| set : 'set' ident 'to' value ';' { bless \%item, $item[0] } |
| vet : 'check' ident 'is' value ';' { bless \%item, $item[0] } |
| |
| ident : /\w+/ { bless {__VALUE__=>$item[1]}, $item[0] } |
| value : /\d+/ { bless {__VALUE__=>$item[1]}, $item[0] } |
| |
| Note that each node in the tree is blessed into a class of the same name |
| as the rule itself. This makes it easy to build object-oriented |
| processors for the parse-trees that the grammar produces. Note too that |
| the last two rules produce special objects with the single attribute |
| '__VALUE__'. This is because they consist solely of a single terminal. |
| |
| This autoaction-ed grammar would then produce a parse tree in a data |
| structure like this: |
| |
| { |
| file => { |
| command => { |
| [ get => { |
| identifier => { __VALUE__ => 'a' }, |
| }, |
| set => { |
| identifier => { __VALUE__ => 'b' }, |
| value => { __VALUE__ => '7' }, |
| }, |
| vet => { |
| identifier => { __VALUE__ => 'b' }, |
| value => { __VALUE__ => '7' }, |
| }, |
| ], |
| }, |
| } |
| } |
| |
| (except, of course, that each nested hash would also be blessed into |
| the appropriate class). |
| |
| You can also specify a base class for the C<E<lt>autotreeE<gt>> directive. |
| The supplied prefix will be prepended to the rule names when creating |
| tree nodes. The following are equivalent: |
| |
| <autotree:MyBase::Class> |
| <autotree:MyBase::Class::> |
| |
| And will produce a root node blessed into the C<MyBase::Class::file> |
| package in the example above. |
| |
| |
| =head2 Autostubbing |
| |
| Normally, if a subrule appears in some production, but no rule of that |
| name is ever defined in the grammar, the production which refers to the |
| non-existent subrule fails immediately. This typically occurs as a |
| result of misspellings, and is a sufficiently common occurance that a |
| warning is generated for such situations. |
| |
| However, when prototyping a grammar it is sometimes useful to be |
| able to use subrules before a proper specification of them is |
| really possible. For example, a grammar might include a section like: |
| |
| function_call: identifier '(' arg(s?) ')' |
| |
| identifier: /[a-z]\w*/i |
| |
| where the possible format of an argument is sufficiently complex that |
| it is not worth specifying in full until the general function call |
| syntax has been debugged. In this situation it is convenient to leave |
| the real rule C<arg> undefined and just slip in a placeholder (or |
| "stub"): |
| |
| arg: 'arg' |
| |
| so that the function call syntax can be tested with dummy input such as: |
| |
| f0() |
| f1(arg) |
| f2(arg arg) |
| f3(arg arg arg) |
| |
| et cetera. |
| |
| Early in prototyping, many such "stubs" may be required, so |
| C<Parse::RecDescent> provides a means of automating their definition. |
| If the variable C<$::RD_AUTOSTUB> is defined when a parser is built, a |
| subrule reference to any non-existent rule (say, C<subrule>), will |
| cause a "stub" rule to be automatically defined in the generated |
| parser. If C<$::RD_AUTOSTUB eq '1'> or is false, a stub rule of the |
| form: |
| |
| subrule: 'subrule' |
| |
| will be generated. The special-case for a value of C<'1'> is to allow |
| the use of the B<perl -s> with B<-RD_AUTOSTUB> without generating |
| C<subrule: '1'> per below. If C<$::RD_AUTOSTUB> is true, a stub rule |
| of the form: |
| |
| subrule: $::RD_AUTOSTUB |
| |
| will be generated. C<$::RD_AUTOSTUB> must contain a valid production |
| item, no checking is performed. No lazy evaluation of |
| C<$::RD_AUTOSTUB> is performed, it is evaluated at the time the Parser |
| is generated. |
| |
| Hence, with C<$::RD_AUTOSTUB> defined, it is possible to only |
| partially specify a grammar, and then "fake" matches of the |
| unspecified (sub)rules by just typing in their name, or a literal |
| value that was assigned to C<$::RD_AUTOSTUB>. |
| |
| |
| |
| =head2 Look-ahead |
| |
| If a subrule, token, or action is prefixed by "...", then it is |
| treated as a "look-ahead" request. That means that the current production can |
| (as usual) only succeed if the specified item is matched, but that the matching |
| I<does not consume any of the text being parsed>. This is very similar to the |
| C</(?=...)/> look-ahead construct in Perl patterns. Thus, the rule: |
| |
| inner_word: word ...word |
| |
| will match whatever the subrule "word" matches, provided that match is followed |
| by some more text which subrule "word" would also match (although this |
| second substring is not actually consumed by "inner_word") |
| |
| Likewise, a "...!" prefix, causes the following item to succeed (without |
| consuming any text) if and only if it would normally fail. Hence, a |
| rule such as: |
| |
| identifier: ...!keyword ...!'_' /[A-Za-z_]\w*/ |
| |
| matches a string of characters which satisfies the pattern |
| C</[A-Za-z_]\w*/>, but only if the same sequence of characters would |
| not match either subrule "keyword" or the literal token '_'. |
| |
| Sequences of look-ahead prefixes accumulate, multiplying their positive and/or |
| negative senses. Hence: |
| |
| inner_word: word ...!......!word |
| |
| is exactly equivalent the the original example above (a warning is issued in |
| cases like these, since they often indicate something left out, or |
| misunderstood). |
| |
| Note that actions can also be treated as look-aheads. In such cases, |
| the state of the parser text (in the local variable C<$text>) |
| I<after> the look-ahead action is guaranteed to be identical to its |
| state I<before> the action, regardless of how it's changed I<within> |
| the action (unless you actually undefine C<$text>, in which case you |
| get the disaster you deserve :-). |
| |
| |
| =head2 Directives |
| |
| Directives are special pre-defined actions which may be used to alter |
| the behaviour of the parser. There are currently twenty-three directives: |
| C<E<lt>commitE<gt>>, |
| C<E<lt>uncommitE<gt>>, |
| C<E<lt>rejectE<gt>>, |
| C<E<lt>scoreE<gt>>, |
| C<E<lt>autoscoreE<gt>>, |
| C<E<lt>skipE<gt>>, |
| C<E<lt>resyncE<gt>>, |
| C<E<lt>errorE<gt>>, |
| C<E<lt>warnE<gt>>, |
| C<E<lt>hintE<gt>>, |
| C<E<lt>trace_buildE<gt>>, |
| C<E<lt>trace_parseE<gt>>, |
| C<E<lt>nocheckE<gt>>, |
| C<E<lt>rulevarE<gt>>, |
| C<E<lt>matchruleE<gt>>, |
| C<E<lt>leftopE<gt>>, |
| C<E<lt>rightopE<gt>>, |
| C<E<lt>deferE<gt>>, |
| C<E<lt>nocheckE<gt>>, |
| C<E<lt>perl_quotelikeE<gt>>, |
| C<E<lt>perl_codeblockE<gt>>, |
| C<E<lt>perl_variableE<gt>>, |
| and C<E<lt>tokenE<gt>>. |
| |
| =over 4 |
| |
| =item Committing and uncommitting |
| |
| The C<E<lt>commitE<gt>> and C<E<lt>uncommitE<gt>> directives permit the recursive |
| descent of the parse tree to be pruned (or "cut") for efficiency. |
| Within a rule, a C<E<lt>commitE<gt>> directive instructs the rule to ignore subsequent |
| productions if the current production fails. For example: |
| |
| command: 'find' <commit> filename |
| | 'open' <commit> filename |
| | 'move' filename filename |
| |
| Clearly, if the leading token 'find' is matched in the first production but that |
| production fails for some other reason, then the remaining |
| productions cannot possibly match. The presence of the |
| C<E<lt>commitE<gt>> causes the "command" rule to fail immediately if |
| an invalid "find" command is found, and likewise if an invalid "open" |
| command is encountered. |
| |
| It is also possible to revoke a previous commitment. For example: |
| |
| if_statement: 'if' <commit> condition |
| 'then' block <uncommit> |
| 'else' block |
| | 'if' <commit> condition |
| 'then' block |
| |
| In this case, a failure to find an "else" block in the first |
| production shouldn't preclude trying the second production, but a |
| failure to find a "condition" certainly should. |
| |
| As a special case, any production in which the I<first> item is an |
| C<E<lt>uncommitE<gt>> immediately revokes a preceding C<E<lt>commitE<gt>> |
| (even though the production would not otherwise have been tried). For |
| example, in the rule: |
| |
| request: 'explain' expression |
| | 'explain' <commit> keyword |
| | 'save' |
| | 'quit' |
| | <uncommit> term '?' |
| |
| if the text being matched was "explain?", and the first two |
| productions failed, then the C<E<lt>commitE<gt>> in production two would cause |
| productions three and four to be skipped, but the leading |
| C<E<lt>uncommitE<gt>> in the production five would allow that production to |
| attempt a match. |
| |
| Note in the preceding example, that the C<E<lt>commitE<gt>> was only placed |
| in production two. If production one had been: |
| |
| request: 'explain' <commit> expression |
| |
| then production two would be (inappropriately) skipped if a leading |
| "explain..." was encountered. |
| |
| Both C<E<lt>commitE<gt>> and C<E<lt>uncommitE<gt>> directives always succeed, and their value |
| is always 1. |
| |
| |
| =item Rejecting a production |
| |
| The C<E<lt>rejectE<gt>> directive immediately causes the current production |
| to fail (it is exactly equivalent to, but more obvious than, the |
| action C<{undef}>). A C<E<lt>rejectE<gt>> is useful when it is desirable to get |
| the side effects of the actions in one production, without prejudicing a match |
| by some other production later in the rule. For example, to insert |
| tracing code into the parse: |
| |
| complex_rule: { print "In complex rule...\n"; } <reject> |
| |
| complex_rule: simple_rule '+' 'i' '*' simple_rule |
| | 'i' '*' simple_rule |
| | simple_rule |
| |
| |
| It is also possible to specify a conditional rejection, using the |
| form C<E<lt>reject:I<condition>E<gt>>, which only rejects if the |
| specified condition is true. This form of rejection is exactly |
| equivalent to the action C<{(I<condition>)?undef:1}E<gt>>. |
| For example: |
| |
| command: save_command |
| | restore_command |
| | <reject: defined $::tolerant> { exit } |
| | <error: Unknown command. Ignored.> |
| |
| A C<E<lt>rejectE<gt>> directive never succeeds (and hence has no |
| associated value). A conditional rejection may succeed (if its |
| condition is not satisfied), in which case its value is 1. |
| |
| As an extra optimization, C<Parse::RecDescent> ignores any production |
| which I<begins> with an unconditional C<E<lt>rejectE<gt>> directive, |
| since any such production can never successfully match or have any |
| useful side-effects. A level 1 warning is issued in all such cases. |
| |
| Note that productions beginning with conditional |
| C<E<lt>reject:...E<gt>> directives are I<never> "optimized away" in |
| this manner, even if they are always guaranteed to fail (for example: |
| C<E<lt>reject:1E<gt>>) |
| |
| Due to the way grammars are parsed, there is a minor restriction on the |
| condition of a conditional C<E<lt>reject:...E<gt>>: it cannot |
| contain any raw '<' or '>' characters. For example: |
| |
| line: cmd <reject: $thiscolumn > max> data |
| |
| results in an error when a parser is built from this grammar (since the |
| grammar parser has no way of knowing whether the first > is a "less than" |
| or the end of the C<E<lt>reject:...E<gt>>. |
| |
| To overcome this problem, put the condition inside a do{} block: |
| |
| line: cmd <reject: do{$thiscolumn > max}> data |
| |
| Note that the same problem may occur in other directives that take |
| arguments. The same solution will work in all cases. |
| |
| |
| =item Skipping between terminals |
| |
| The C<E<lt>skipE<gt>> directive enables the terminal prefix used in |
| a production to be changed. For example: |
| |
| OneLiner: Command <skip:'[ \t]*'> Arg(s) /;/ |
| |
| causes only blanks and tabs to be skipped before terminals in the C<Arg> |
| subrule (and any of I<its> subrules>, and also before the final C</;/> terminal. |
| Once the production is complete, the previous terminal prefix is |
| reinstated. Note that this implies that distinct productions of a rule |
| must reset their terminal prefixes individually. |
| |
| The C<E<lt>skipE<gt>> directive evaluates to the I<previous> terminal prefix, |
| so it's easy to reinstate a prefix later in a production: |
| |
| Command: <skip:","> CSV(s) <skip:$item[1]> Modifier |
| |
| The value specified after the colon is interpolated into a pattern, so all of |
| the following are equivalent (though their efficiency increases down the list): |
| |
| <skip: "$colon|$comma"> # ASSUMING THE VARS HOLD THE OBVIOUS VALUES |
| |
| <skip: ':|,'> |
| |
| <skip: q{[:,]}> |
| |
| <skip: qr/[:,]/> |
| |
| There is no way of directly setting the prefix for |
| an entire rule, except as follows: |
| |
| Rule: <skip: '[ \t]*'> Prod1 |
| | <skip: '[ \t]*'> Prod2a Prod2b |
| | <skip: '[ \t]*'> Prod3 |
| |
| or, better: |
| |
| Rule: <skip: '[ \t]*'> |
| ( |
| Prod1 |
| | Prod2a Prod2b |
| | Prod3 |
| ) |
| |
| The skip pattern is passed down to subrules, so setting the skip for |
| the top-level rule as described above actually sets the prefix for the |
| entire grammar (provided that you only call the method corresponding |
| to the top-level rule itself). Alternatively, or if you have more than |
| one top-level rule in your grammar, you can provide a global |
| C<E<lt>skipE<gt>> directive prior to defining any rules in the |
| grammar. These are the preferred alternatives to setting |
| C<$Parse::RecDescent::skip>. |
| |
| Additionally, using C<E<lt>skipE<gt>> actually allows you to have |
| a completely dynamic skipping behaviour. For example: |
| |
| Rule_with_dynamic_skip: <skip: $::skip_pattern> Rule |
| |
| Then you can set C<$::skip_pattern> before invoking |
| C<Rule_with_dynamic_skip> and have it skip whatever you specified. |
| |
| B<Note: Up to release 1.51 of Parse::RecDescent, an entirely different |
| mechanism was used for specifying terminal prefixes. The current method |
| is not backwards-compatible with that early approach. The current approach |
| is stable and will not to change again.> |
| |
| |
| =item Resynchronization |
| |
| The C<E<lt>resyncE<gt>> directive provides a visually distinctive |
| means of consuming some of the text being parsed, usually to skip an |
| erroneous input. In its simplest form C<E<lt>resyncE<gt>> simply |
| consumes text up to and including the next newline (C<"\n">) |
| character, succeeding only if the newline is found, in which case it |
| causes its surrounding rule to return zero on success. |
| |
| In other words, a C<E<lt>resyncE<gt>> is exactly equivalent to the token |
| C</[^\n]*\n/> followed by the action S<C<{ $return = 0 }>> (except that |
| productions beginning with a C<E<lt>resyncE<gt>> are ignored when generating |
| error messages). A typical use might be: |
| |
| script : command(s) |
| |
| command: save_command |
| | restore_command |
| | <resync> # TRY NEXT LINE, IF POSSIBLE |
| |
| It is also possible to explicitly specify a resynchronization |
| pattern, using the C<E<lt>resync:I<pattern>E<gt>> variant. This version |
| succeeds only if the specified pattern matches (and consumes) the |
| parsed text. In other words, C<E<lt>resync:I<pattern>E<gt>> is exactly |
| equivalent to the token C</I<pattern>/> (followed by a S<C<{ $return = 0 }>> |
| action). For example, if commands were terminated by newlines or semi-colons: |
| |
| command: save_command |
| | restore_command |
| | <resync:[^;\n]*[;\n]> |
| |
| The value of a successfully matched C<E<lt>resyncE<gt>> directive (of either |
| type) is the text that it consumed. Note, however, that since the |
| directive also sets C<$return>, a production consisting of a lone |
| C<E<lt>resyncE<gt>> succeeds but returns the value zero (which a calling rule |
| may find useful to distinguish between "true" matches and "tolerant" matches). |
| Remember that returning a zero value indicates that the rule I<succeeded> (since |
| only an C<undef> denotes failure within C<Parse::RecDescent> parsers. |
| |
| |
| =item Error handling |
| |
| The C<E<lt>errorE<gt>> directive provides automatic or user-defined |
| generation of error messages during a parse. In its simplest form |
| C<E<lt>errorE<gt>> prepares an error message based on |
| the mismatch between the last item expected and the text which cause |
| it to fail. For example, given the rule: |
| |
| McCoy: curse ',' name ', I'm a doctor, not a' a_profession '!' |
| | pronoun 'dead,' name '!' |
| | <error> |
| |
| the following strings would produce the following messages: |
| |
| =over 4 |
| |
| =item "Amen, Jim!" |
| |
| ERROR (line 1): Invalid McCoy: Expected curse or pronoun |
| not found |
| |
| =item "Dammit, Jim, I'm a doctor!" |
| |
| ERROR (line 1): Invalid McCoy: Expected ", I'm a doctor, not a" |
| but found ", I'm a doctor!" instead |
| |
| =item "He's dead,\n" |
| |
| ERROR (line 2): Invalid McCoy: Expected name not found |
| |
| =item "He's alive!" |
| |
| ERROR (line 1): Invalid McCoy: Expected 'dead,' but found |
| "alive!" instead |
| |
| =item "Dammit, Jim, I'm a doctor, not a pointy-eared Vulcan!" |
| |
| ERROR (line 1): Invalid McCoy: Expected a profession but found |
| "pointy-eared Vulcan!" instead |
| |
| |
| =back |
| |
| Note that, when autogenerating error messages, all underscores in any |
| rule name used in a message are replaced by single spaces (for example |
| "a_production" becomes "a production"). Judicious choice of rule |
| names can therefore considerably improve the readability of automatic |
| error messages (as well as the maintainability of the original |
| grammar). |
| |
| If the automatically generated error is not sufficient, it is possible to |
| provide an explicit message as part of the error directive. For example: |
| |
| Spock: "Fascinating ',' (name | 'Captain') '.' |
| | "Highly illogical, doctor." |
| | <error: He never said that!> |
| |
| which would result in I<all> failures to parse a "Spock" subrule printing the |
| following message: |
| |
| ERROR (line <N>): Invalid Spock: He never said that! |
| |
| The error message is treated as a "qq{...}" string and interpolated |
| when the error is generated (I<not> when the directive is specified!). |
| Hence: |
| |
| <error: Mystical error near "$text"> |
| |
| would correctly insert the ambient text string which caused the error. |
| |
| There are two other forms of error directive: C<E<lt>error?E<gt>> and |
| S<C<E<lt>error?: msgE<gt>>>. These behave just like C<E<lt>errorE<gt>> |
| and S<C<E<lt>error: msgE<gt>>> respectively, except that they are |
| only triggered if the rule is "committed" at the time they are |
| encountered. For example: |
| |
| Scotty: "Ya kenna change the Laws of Phusics," <commit> name |
| | name <commit> ',' 'she's goanta blaw!' |
| | <error?> |
| |
| will only generate an error for a string beginning with "Ya kenna |
| change the Laws o' Phusics," or a valid name, but which still fails to match the |
| corresponding production. That is, C<$parser-E<gt>Scotty("Aye, Cap'ain")> will |
| fail silently (since neither production will "commit" the rule on that |
| input), whereas S<C<$parser-E<gt>Scotty("Mr Spock, ah jest kenna do'ut!")>> |
| will fail with the error message: |
| |
| ERROR (line 1): Invalid Scotty: expected 'she's goanta blaw!' |
| but found 'I jest kenna do'ut!' instead. |
| |
| since in that case the second production would commit after matching |
| the leading name. |
| |
| Note that to allow this behaviour, all C<E<lt>errorE<gt>> directives which are |
| the first item in a production automatically uncommit the rule just |
| long enough to allow their production to be attempted (that is, when |
| their production fails, the commitment is reinstated so that |
| subsequent productions are skipped). |
| |
| In order to I<permanently> uncommit the rule before an error message, |
| it is necessary to put an explicit C<E<lt>uncommitE<gt>> before the |
| C<E<lt>errorE<gt>>. For example: |
| |
| line: 'Kirk:' <commit> Kirk |
| | 'Spock:' <commit> Spock |
| | 'McCoy:' <commit> McCoy |
| | <uncommit> <error?> <reject> |
| | <resync> |
| |
| |
| Error messages generated by the various C<E<lt>error...E<gt>> directives |
| are not displayed immediately. Instead, they are "queued" in a buffer and |
| are only displayed once parsing ultimately fails. Moreover, |
| C<E<lt>error...E<gt>> directives that cause one production of a rule |
| to fail are automatically removed from the message queue |
| if another production subsequently causes the entire rule to succeed. |
| This means that you can put |
| C<E<lt>error...E<gt>> directives wherever useful diagnosis can be done, |
| and only those associated with actual parser failure will ever be |
| displayed. Also see L<"GOTCHAS">. |
| |
| As a general rule, the most useful diagnostics are usually generated |
| either at the very lowest level within the grammar, or at the very |
| highest. A good rule of thumb is to identify those subrules which |
| consist mainly (or entirely) of terminals, and then put an |
| C<E<lt>error...E<gt>> directive at the end of any other rule which calls |
| one or more of those subrules. |
| |
| There is one other situation in which the output of the various types of |
| error directive is suppressed; namely, when the rule containing them |
| is being parsed as part of a "look-ahead" (see L<"Look-ahead">). In this |
| case, the error directive will still cause the rule to fail, but will do |
| so silently. |
| |
| An unconditional C<E<lt>errorE<gt>> directive always fails (and hence has no |
| associated value). This means that encountering such a directive |
| always causes the production containing it to fail. Hence an |
| C<E<lt>errorE<gt>> directive will inevitably be the last (useful) item of a |
| rule (a level 3 warning is issued if a production contains items after an unconditional |
| C<E<lt>errorE<gt>> directive). |
| |
| An C<E<lt>error?E<gt>> directive will I<succeed> (that is: fail to fail :-), if |
| the current rule is uncommitted when the directive is encountered. In |
| that case the directive's associated value is zero. Hence, this type |
| of error directive I<can> be used before the end of a |
| production. For example: |
| |
| command: 'do' <commit> something |
| | 'report' <commit> something |
| | <error?: Syntax error> <error: Unknown command> |
| |
| |
| B<Warning:> The C<E<lt>error?E<gt>> directive does I<not> mean "always fail (but |
| do so silently unless committed)". It actually means "only fail (and report) if |
| committed, otherwise I<succeed>". To achieve the "fail silently if uncommitted" |
| semantics, it is necessary to use: |
| |
| rule: item <commit> item(s) |
| | <error?> <reject> # FAIL SILENTLY UNLESS COMMITTED |
| |
| However, because people seem to expect a lone C<E<lt>error?E<gt>> directive |
| to work like this: |
| |
| rule: item <commit> item(s) |
| | <error?: Error message if committed> |
| | <error: Error message if uncommitted> |
| |
| Parse::RecDescent automatically appends a |
| C<E<lt>rejectE<gt>> directive if the C<E<lt>error?E<gt>> directive |
| is the only item in a production. A level 2 warning (see below) |
| is issued when this happens. |
| |
| The level of error reporting during both parser construction and |
| parsing is controlled by the presence or absence of four global |
| variables: C<$::RD_ERRORS>, C<$::RD_WARN>, C<$::RD_HINT>, and |
| <$::RD_TRACE>. If C<$::RD_ERRORS> is defined (and, by default, it is) |
| then fatal errors are reported. |
| |
| Whenever C<$::RD_WARN> is defined, certain non-fatal problems are also reported. |
| |
| Warnings have an associated "level": 1, 2, or 3. The higher the level, |
| the more serious the warning. The value of the corresponding global |
| variable (C<$::RD_WARN>) determines the I<lowest> level of warning to |
| be displayed. Hence, to see I<all> warnings, set C<$::RD_WARN> to 1. |
| To see only the most serious warnings set C<$::RD_WARN> to 3. |
| By default C<$::RD_WARN> is initialized to 3, ensuring that serious but |
| non-fatal errors are automatically reported. |
| |
| There is also a grammar directive to turn on warnings from within the |
| grammar: C<< <warn> >>. It takes an optional argument, which specifies |
| the warning level: C<< <warn: 2> >>. |
| |
| See F<"DIAGNOSTICS"> for a list of the varous error and warning messages |
| that Parse::RecDescent generates when these two variables are defined. |
| |
| Defining any of the remaining variables (which are not defined by |
| default) further increases the amount of information reported. |
| Defining C<$::RD_HINT> causes the parser generator to offer |
| more detailed analyses and hints on both errors and warnings. |
| Note that setting C<$::RD_HINT> at any point automagically |
| sets C<$::RD_WARN> to 1. There is also a C<< <hint> >> directive, which can |
| be hard-coded into a grammar. |
| |
| Defining C<$::RD_TRACE> causes the parser generator and the parser to |
| report their progress to STDERR in excruciating detail (although, without hints |
| unless $::RD_HINT is separately defined). This detail |
| can be moderated in only one respect: if C<$::RD_TRACE> has an |
| integer value (I<N>) greater than 1, only the I<N> characters of |
| the "current parsing context" (that is, where in the input string we |
| are at any point in the parse) is reported at any time. |
| |
| C<$::RD_TRACE> is mainly useful for debugging a grammar that isn't |
| behaving as you expected it to. To this end, if C<$::RD_TRACE> is |
| defined when a parser is built, any actual parser code which is |
| generated is also written to a file named "RD_TRACE" in the local |
| directory. |
| |
| There are two directives associated with the C<$::RD_TRACE> variable. |
| If a grammar contains a C<< <trace_build> >> directive anywhere in its |
| specification, C<$::RD_TRACE> is turned on during the parser construction |
| phase. If a grammar contains a C<< <trace_parse> >> directive anywhere in its |
| specification, C<$::RD_TRACE> is turned on during any parse the parser |
| performs. |
| |
| Note that the four variables belong to the "main" package, which |
| makes them easier to refer to in the code controlling the parser, and |
| also makes it easy to turn them into command line flags ("-RD_ERRORS", |
| "-RD_WARN", "-RD_HINT", "-RD_TRACE") under B<perl -s>. |
| |
| The corresponding directives are useful to "hardwire" the various |
| debugging features into a particular grammar (rather than having to set |
| and reset external variables). |
| |
| =item Redirecting diagnostics |
| |
| The diagnostics provided by the tracing mechanism always go to STDERR. |
| If you need them to go elsewhere, localize and reopen STDERR prior to the |
| parse. |
| |
| For example: |
| |
| { |
| local *STDERR = IO::File->new(">$filename") or die $!; |
| |
| my $result = $parser->startrule($text); |
| } |
| |
| |
| =item Consistency checks |
| |
| Whenever a parser is build, Parse::RecDescent carries out a number of |
| (potentially expensive) consistency checks. These include: verifying that the |
| grammar is not left-recursive and that no rules have been left undefined. |
| |
| These checks are important safeguards during development, but unnecessary |
| overheads when the grammar is stable and ready to be deployed. So |
| Parse::RecDescent provides a directive to disable them: C<< <nocheck> >>. |
| |
| If a grammar contains a C<< <nocheck> >> directive anywhere in its |
| specification, the extra compile-time checks are by-passed. |
| |
| |
| =item Specifying local variables |
| |
| It is occasionally convenient to specify variables which are local |
| to a single rule. This may be achieved by including a |
| C<E<lt>rulevar:...E<gt>> directive anywhere in the rule. For example: |
| |
| markup: <rulevar: $tag> |
| |
| markup: tag {($tag=$item[1]) =~ s/^<|>$//g} body[$tag] |
| |
| The example C<E<lt>rulevar: $tagE<gt>> directive causes a "my" variable named |
| C<$tag> to be declared at the start of the subroutine implementing the |
| C<markup> rule (that is, I<before> the first production, regardless of |
| where in the rule it is specified). |
| |
| Specifically, any directive of the form: |
| C<E<lt>rulevar:I<text>E<gt>> causes a line of the form C<my I<text>;> |
| to be added at the beginning of the rule subroutine, immediately after |
| the definitions of the following local variables: |
| |
| $thisparser $commit |
| $thisrule @item |
| $thisline @arg |
| $text %arg |
| |
| This means that the following C<E<lt>rulevarE<gt>> directives work |
| as expected: |
| |
| <rulevar: $count = 0 > |
| |
| <rulevar: $firstarg = $arg[0] || '' > |
| |
| <rulevar: $myItems = \@item > |
| |
| <rulevar: @context = ( $thisline, $text, @arg ) > |
| |
| <rulevar: ($name,$age) = $arg{"name","age"} > |
| |
| If a variable that is also visible to subrules is required, it needs |
| to be C<local>'d, not C<my>'d. C<rulevar> defaults to C<my>, but if C<local> |
| is explicitly specified: |
| |
| <rulevar: local $count = 0 > |
| |
| then a C<local>-ized variable is declared instead, and will be available |
| within subrules. |
| |
| Note however that, because all such variables are "my" variables, their |
| values I<do not persist> between match attempts on a given rule. To |
| preserve values between match attempts, values can be stored within the |
| "local" member of the C<$thisrule> object: |
| |
| countedrule: { $thisrule->{"local"}{"count"}++ } |
| <reject> |
| | subrule1 |
| | subrule2 |
| | <reject: $thisrule->{"local"}{"count"} == 1> |
| subrule3 |
| |
| |
| When matching a rule, each C<E<lt>rulevarE<gt>> directive is matched as |
| if it were an unconditional C<E<lt>rejectE<gt>> directive (that is, it |
| causes any production in which it appears to immediately fail to match). |
| For this reason (and to improve readability) it is usual to specify any |
| C<E<lt>rulevarE<gt>> directive in a separate production at the start of |
| the rule (this has the added advantage that it enables |
| C<Parse::RecDescent> to optimize away such productions, just as it does |
| for the C<E<lt>rejectE<gt>> directive). |
| |
| |
| =item Dynamically matched rules |
| |
| Because regexes and double-quoted strings are interpolated, it is relatively |
| easy to specify productions with "context sensitive" tokens. For example: |
| |
| command: keyword body "end $item[1]" |
| |
| which ensures that a command block is bounded by a |
| "I<E<lt>keywordE<gt>>...end I<E<lt>same keywordE<gt>>" pair. |
| |
| Building productions in which subrules are context sensitive is also possible, |
| via the C<E<lt>matchrule:...E<gt>> directive. This directive behaves |
| identically to a subrule item, except that the rule which is invoked to match |
| it is determined by the string specified after the colon. For example, we could |
| rewrite the C<command> rule like this: |
| |
| command: keyword <matchrule:body> "end $item[1]" |
| |
| Whatever appears after the colon in the directive is treated as an interpolated |
| string (that is, as if it appeared in C<qq{...}> operator) and the value of |
| that interpolated string is the name of the subrule to be matched. |
| |
| Of course, just putting a constant string like C<body> in a |
| C<E<lt>matchrule:...E<gt>> directive is of little interest or benefit. |
| The power of directive is seen when we use a string that interpolates |
| to something interesting. For example: |
| |
| command: keyword <matchrule:$item[1]_body> "end $item[1]" |
| |
| keyword: 'while' | 'if' | 'function' |
| |
| while_body: condition block |
| |
| if_body: condition block ('else' block)(?) |
| |
| function_body: arglist block |
| |
| Now the C<command> rule selects how to proceed on the basis of the keyword |
| that is found. It is as if C<command> were declared: |
| |
| command: 'while' while_body "end while" |
| | 'if' if_body "end if" |
| | 'function' function_body "end function" |
| |
| |
| When a C<E<lt>matchrule:...E<gt>> directive is used as a repeated |
| subrule, the rule name expression is "late-bound". That is, the name of |
| the rule to be called is re-evaluated I<each time> a match attempt is |
| made. Hence, the following grammar: |
| |
| { $::species = 'dogs' } |
| |
| pair: 'two' <matchrule:$::species>(s) |
| |
| dogs: /dogs/ { $::species = 'cats' } |
| |
| cats: /cats/ |
| |
| will match the string "two dogs cats cats" completely, whereas it will |
| only match the string "two dogs dogs dogs" up to the eighth letter. If |
| the rule name were "early bound" (that is, evaluated only the first |
| time the directive is encountered in a production), the reverse |
| behaviour would be expected. |
| |
| Note that the C<matchrule> directive takes a string that is to be treated |
| as a rule name, I<not> as a rule invocation. That is, |
| it's like a Perl symbolic reference, not an C<eval>. Just as you can say: |
| |
| $subname = 'foo'; |
| |
| # and later... |
| |
| &{$foo}(@args); |
| |
| but not: |
| |
| $subname = 'foo(@args)'; |
| |
| # and later... |
| |
| &{$foo}; |
| |
| likewise you can say: |
| |
| $rulename = 'foo'; |
| |
| # and in the grammar... |
| |
| <matchrule:$rulename>[@args] |
| |
| but not: |
| |
| $rulename = 'foo[@args]'; |
| |
| # and in the grammar... |
| |
| <matchrule:$rulename> |
| |
| |
| =item Deferred actions |
| |
| The C<E<lt>defer:...E<gt>> directive is used to specify an action to be |
| performed when (and only if!) the current production ultimately succeeds. |
| |
| Whenever a C<E<lt>defer:...E<gt>> directive appears, the code it specifies |
| is converted to a closure (an anonymous subroutine reference) which is |
| queued within the active parser object. Note that, |
| because the deferred code is converted to a closure, the values of any |
| "local" variable (such as C<$text>, <@item>, etc.) are preserved |
| until the deferred code is actually executed. |
| |
| If the parse ultimately succeeds |
| I<and> the production in which the C<E<lt>defer:...E<gt>> directive was |
| evaluated formed part of the successful parse, then the deferred code is |
| executed immediately before the parse returns. If however the production |
| which queued a deferred action fails, or one of the higher-level |
| rules which called that production fails, then the deferred action is |
| removed from the queue, and hence is never executed. |
| |
| For example, given the grammar: |
| |
| sentence: noun trans noun |
| | noun intrans |
| |
| noun: 'the dog' |
| { print "$item[1]\t(noun)\n" } |
| | 'the meat' |
| { print "$item[1]\t(noun)\n" } |
| |
| trans: 'ate' |
| { print "$item[1]\t(transitive)\n" } |
| |
| intrans: 'ate' |
| { print "$item[1]\t(intransitive)\n" } |
| | 'barked' |
| { print "$item[1]\t(intransitive)\n" } |
| |
| then parsing the sentence C<"the dog ate"> would produce the output: |
| |
| the dog (noun) |
| ate (transitive) |
| the dog (noun) |
| ate (intransitive) |
| |
| This is because, even though the first production of C<sentence> |
| ultimately fails, its initial subrules C<noun> and C<trans> do match, |
| and hence they execute their associated actions. |
| Then the second production of C<sentence> succeeds, causing the |
| actions of the subrules C<noun> and C<intrans> to be executed as well. |
| |
| On the other hand, if the actions were replaced by C<E<lt>defer:...E<gt>> |
| directives: |
| |
| sentence: noun trans noun |
| | noun intrans |
| |
| noun: 'the dog' |
| <defer: print "$item[1]\t(noun)\n" > |
| | 'the meat' |
| <defer: print "$item[1]\t(noun)\n" > |
| |
| trans: 'ate' |
| <defer: print "$item[1]\t(transitive)\n" > |
| |
| intrans: 'ate' |
| <defer: print "$item[1]\t(intransitive)\n" > |
| | 'barked' |
| <defer: print "$item[1]\t(intransitive)\n" > |
| |
| the output would be: |
| |
| the dog (noun) |
| ate (intransitive) |
| |
| since deferred actions are only executed if they were evaluated in |
| a production which ultimately contributes to the successful parse. |
| |
| In this case, even though the first production of C<sentence> caused |
| the subrules C<noun> and C<trans> to match, that production ultimately |
| failed and so the deferred actions queued by those subrules were subsequently |
| disgarded. The second production then succeeded, causing the entire |
| parse to succeed, and so the deferred actions queued by the (second) match of |
| the C<noun> subrule and the subsequent match of C<intrans> I<are> preserved and |
| eventually executed. |
| |
| Deferred actions provide a means of improving the performance of a parser, |
| by only executing those actions which are part of the final parse-tree |
| for the input data. |
| |
| Alternatively, deferred actions can be viewed as a mechanism for building |
| (and executing) a |
| customized subroutine corresponding to the given input data, much in the |
| same way that autoactions (see L<"Autoactions">) can be used to build a |
| customized data structure for specific input. |
| |
| Whether or not the action it specifies is ever executed, |
| a C<E<lt>defer:...E<gt>> directive always succeeds, returning the |
| number of deferred actions currently queued at that point. |
| |
| |
| =item Parsing Perl |
| |
| Parse::RecDescent provides limited support for parsing subsets of Perl, |
| namely: quote-like operators, Perl variables, and complete code blocks. |
| |
| The C<E<lt>perl_quotelikeE<gt>> directive can be used to parse any Perl |
| quote-like operator: C<'a string'>, C<m/a pattern/>, C<tr{ans}{lation}>, |
| etc. It does this by calling Text::Balanced::quotelike(). |
| |
| If a quote-like operator is found, a reference to an array of eight elements |
| is returned. Those elements are identical to the last eight elements returned |
| by Text::Balanced::extract_quotelike() in an array context, namely: |
| |
| =over 4 |
| |
| =item [0] |
| |
| the name of the quotelike operator -- 'q', 'qq', 'm', 's', 'tr' -- if the |
| operator was named; otherwise C<undef>, |
| |
| =item [1] |
| |
| the left delimiter of the first block of the operation, |
| |
| =item [2] |
| |
| the text of the first block of the operation |
| (that is, the contents of |
| a quote, the regex of a match, or substitution or the target list of a |
| translation), |
| |
| =item [3] |
| |
| the right delimiter of the first block of the operation, |
| |
| =item [4] |
| |
| the left delimiter of the second block of the operation if there is one |
| (that is, if it is a C<s>, C<tr>, or C<y>); otherwise C<undef>, |
| |
| =item [5] |
| |
| the text of the second block of the operation if there is one |
| (that is, the replacement of a substitution or the translation list |
| of a translation); otherwise C<undef>, |
| |
| =item [6] |
| |
| the right delimiter of the second block of the operation (if any); |
| otherwise C<undef>, |
| |
| =item [7] |
| |
| the trailing modifiers on the operation (if any); otherwise C<undef>. |
| |
| =back |
| |
| If a quote-like expression is not found, the directive fails with the usual |
| C<undef> value. |
| |
| The C<E<lt>perl_variableE<gt>> directive can be used to parse any Perl |
| variable: $scalar, @array, %hash, $ref->{field}[$index], etc. |
| It does this by calling Text::Balanced::extract_variable(). |
| |
| If the directive matches text representing a valid Perl variable |
| specification, it returns that text. Otherwise it fails with the usual |
| C<undef> value. |
| |
| The C<E<lt>perl_codeblockE<gt>> directive can be used to parse curly-brace-delimited block of Perl code, such as: { $a = 1; f() =~ m/pat/; }. |
| It does this by calling Text::Balanced::extract_codeblock(). |
| |
| If the directive matches text representing a valid Perl code block, |
| it returns that text. Otherwise it fails with the usual C<undef> value. |
| |
| You can also tell it what kind of brackets to use as the outermost |
| delimiters. For example: |
| |
| arglist: <perl_codeblock ()> |
| |
| causes an arglist to match a perl code block whose outermost delimiters |
| are C<(...)> (rather than the default C<{...}>). |
| |
| |
| =item Constructing tokens |
| |
| Eventually, Parse::RecDescent will be able to parse tokenized input, as |
| well as ordinary strings. In preparation for this joyous day, the |
| C<E<lt>token:...E<gt>> directive has been provided. |
| This directive creates a token which will be suitable for |
| input to a Parse::RecDescent parser (when it eventually supports |
| tokenized input). |
| |
| The text of the token is the value of the |
| immediately preceding item in the production. A |
| C<E<lt>token:...E<gt>> directive always succeeds with a return |
| value which is the hash reference that is the new token. It also |
| sets the return value for the production to that hash ref. |
| |
| The C<E<lt>token:...E<gt>> directive makes it easy to build |
| a Parse::RecDescent-compatible lexer in Parse::RecDescent: |
| |
| my $lexer = new Parse::RecDescent q |
| { |
| lex: token(s) |
| |
| token: /a\b/ <token:INDEF> |
| | /the\b/ <token:DEF> |
| | /fly\b/ <token:NOUN,VERB> |
| | /[a-z]+/i { lc $item[1] } <token:ALPHA> |
| | <error: Unknown token> |
| |
| }; |
| |
| which will eventually be able to be used with a regular Parse::RecDescent |
| grammar: |
| |
| my $parser = new Parse::RecDescent q |
| { |
| startrule: subrule1 subrule 2 |
| |
| # ETC... |
| }; |
| |
| either with a pre-lexing phase: |
| |
| $parser->startrule( $lexer->lex($data) ); |
| |
| or with a lex-on-demand approach: |
| |
| $parser->startrule( sub{$lexer->token(\$data)} ); |
| |
| But at present, only the C<E<lt>token:...E<gt>> directive is |
| actually implemented. The rest is vapourware. |
| |
| =item Specifying operations |
| |
| One of the commonest requirements when building a parser is to specify |
| binary operators. Unfortunately, in a normal grammar, the rules for |
| such things are awkward: |
| |
| disjunction: conjunction ('or' conjunction)(s?) |
| { $return = [ $item[1], @{$item[2]} ] } |
| |
| conjunction: atom ('and' atom)(s?) |
| { $return = [ $item[1], @{$item[2]} ] } |
| |
| or inefficient: |
| |
| disjunction: conjunction 'or' disjunction |
| { $return = [ $item[1], @{$item[2]} ] } |
| | conjunction |
| { $return = [ $item[1] ] } |
| |
| conjunction: atom 'and' conjunction |
| { $return = [ $item[1], @{$item[2]} ] } |
| | atom |
| { $return = [ $item[1] ] } |
| |
| and either way is ugly and hard to get right. |
| |
| The C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives provide an |
| easier way of specifying such operations. Using C<E<lt>leftop:...E<gt>> the |
| above examples become: |
| |
| disjunction: <leftop: conjunction 'or' conjunction> |
| conjunction: <leftop: atom 'and' atom> |
| |
| The C<E<lt>leftop:...E<gt>> directive specifies a left-associative binary operator. |
| It is specified around three other grammar elements |
| (typically subrules or terminals), which match the left operand, |
| the operator itself, and the right operand respectively. |
| |
| A C<E<lt>leftop:...E<gt>> directive such as: |
| |
| disjunction: <leftop: conjunction 'or' conjunction> |
| |
| is converted to the following: |
| |
| disjunction: ( conjunction ('or' conjunction)(s?) |
| { $return = [ $item[1], @{$item[2]} ] } ) |
| |
| In other words, a C<E<lt>leftop:...E<gt>> directive matches the left operand followed by zero |
| or more repetitions of both the operator and the right operand. It then |
| flattens the matched items into an anonymous array which becomes the |
| (single) value of the entire C<E<lt>leftop:...E<gt>> directive. |
| |
| For example, an C<E<lt>leftop:...E<gt>> directive such as: |
| |
| output: <leftop: ident '<<' expr > |
| |
| when given a string such as: |
| |
| cout << var << "str" << 3 |
| |
| would match, and C<$item[1]> would be set to: |
| |
| [ 'cout', 'var', '"str"', '3' ] |
| |
| In other words: |
| |
| output: <leftop: ident '<<' expr > |
| |
| is equivalent to a left-associative operator: |
| |
| output: ident { $return = [$item[1]] } |
| | ident '<<' expr { $return = [@item[1,3]] } |
| | ident '<<' expr '<<' expr { $return = [@item[1,3,5]] } |
| | ident '<<' expr '<<' expr '<<' expr { $return = [@item[1,3,5,7]] } |
| # ...etc... |
| |
| |
| Similarly, the C<E<lt>rightop:...E<gt>> directive takes a left operand, an operator, and a right operand: |
| |
| assign: <rightop: var '=' expr > |
| |
| and converts them to: |
| |
| assign: ( (var '=' {$return=$item[1]})(s?) expr |
| { $return = [ @{$item[1]}, $item[2] ] } ) |
| |
| which is equivalent to a right-associative operator: |
| |
| assign: expr { $return = [$item[1]] } |
| | var '=' expr { $return = [@item[1,3]] } |
| | var '=' var '=' expr { $return = [@item[1,3,5]] } |
| | var '=' var '=' var '=' expr { $return = [@item[1,3,5,7]] } |
| # ...etc... |
| |
| |
| Note that for both the C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives, the directive does not normally |
| return the operator itself, just a list of the operands involved. This is |
| particularly handy for specifying lists: |
| |
| list: '(' <leftop: list_item ',' list_item> ')' |
| { $return = $item[2] } |
| |
| There is, however, a problem: sometimes the operator is itself significant. |
| For example, in a Perl list a comma and a C<=E<gt>> are both |
| valid separators, but the C<=E<gt>> has additional stringification semantics. |
| Hence it's important to know which was used in each case. |
| |
| To solve this problem the |
| C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives |
| I<do> return the operator(s) as well, under two circumstances. |
| The first case is where the operator is specified as a subrule. In that instance, |
| whatever the operator matches is returned (on the assumption that if the operator |
| is important enough to have its own subrule, then it's important enough to return). |
| |
| The second case is where the operator is specified as a regular |
| expression. In that case, if the first bracketed subpattern of the |
| regular expression matches, that matching value is returned (this is analogous to |
| the behaviour of the Perl C<split> function, except that only the first subpattern |
| is returned). |
| |
| In other words, given the input: |
| |
| ( a=>1, b=>2 ) |
| |
| the specifications: |
| |
| list: '(' <leftop: list_item separator list_item> ')' |
| |
| separator: ',' | '=>' |
| |
| or: |
| |
| list: '(' <leftop: list_item /(,|=>)/ list_item> ')' |
| |
| cause the list separators to be interleaved with the operands in the |
| anonymous array in C<$item[2]>: |
| |
| [ 'a', '=>', '1', ',', 'b', '=>', '2' ] |
| |
| |
| But the following version: |
| |
| list: '(' <leftop: list_item /,|=>/ list_item> ')' |
| |
| returns only the operators: |
| |
| [ 'a', '1', 'b', '2' ] |
| |
| Of course, none of the above specifications handle the case of an empty |
| list, since the C<E<lt>leftop:...E<gt>> and C<E<lt>rightop:...E<gt>> directives |
| require at least a single right or left operand to match. To specify |
| that the operator can match "trivially", |
| it's necessary to add a C<(s?)> qualifier to the directive: |
| |
| list: '(' <leftop: list_item /(,|=>)/ list_item>(s?) ')' |
| |
| Note that in almost all the above examples, the first and third arguments |
| of the C<<leftop:...E<gt>> directive were the same subrule. That is because |
| C<<leftop:...E<gt>>'s are frequently used to specify "separated" lists of the |
| same type of item. To make such lists easier to specify, the following |
| syntax: |
| |
| list: element(s /,/) |
| |
| is exactly equivalent to: |
| |
| list: <leftop: element /,/ element> |
| |
| Note that the separator must be specified as a raw pattern (i.e. |
| not a string or subrule). |
| |
| |
| =item Scored productions |
| |
| By default, Parse::RecDescent grammar rules always accept the first |
| production that matches the input. But if two or more productions may |
| potentially match the same input, choosing the first that does so may |
| not be optimal. |
| |
| For example, if you were parsing the sentence "time flies like an arrow", |
| you might use a rule like this: |
| |
| sentence: verb noun preposition article noun { [@item] } |
| | adjective noun verb article noun { [@item] } |
| | noun verb preposition article noun { [@item] } |
| |
| Each of these productions matches the sentence, but the third one |
| is the most likely interpretation. However, if the sentence had been |
| "fruit flies like a banana", then the second production is probably |
| the right match. |
| |
| To cater for such situtations, the C<E<lt>score:...E<gt>> can be used. |
| The directive is equivalent to an unconditional C<E<lt>rejectE<gt>>, |
| except that it allows you to specify a "score" for the current |
| production. If that score is numerically greater than the best |
| score of any preceding production, the current production is cached for later |
| consideration. If no later production matches, then the cached |
| production is treated as having matched, and the value of the |
| item immediately before its C<E<lt>score:...E<gt>> directive is returned as the |
| result. |
| |
| In other words, by putting a C<E<lt>score:...E<gt>> directive at the end of |
| each production, you can select which production matches using |
| criteria other than specification order. For example: |
| |
| sentence: verb noun preposition article noun { [@item] } <score: sensible(@item)> |
| | adjective noun verb article noun { [@item] } <score: sensible(@item)> |
| | noun verb preposition article noun { [@item] } <score: sensible(@item)> |
| |
| Now, when each production reaches its respective C<E<lt>score:...E<gt>> |
| directive, the subroutine C<sensible> will be called to evaluate the |
| matched items (somehow). Once all productions have been tried, the |
| one which C<sensible> scored most highly will be the one that is |
| accepted as a match for the rule. |
| |
| The variable $score always holds the current best score of any production, |
| and the variable $score_return holds the corresponding return value. |
| |
| As another example, the following grammar matches lines that may be |
| separated by commas, colons, or semi-colons. This can be tricky if |
| a colon-separated line also contains commas, or vice versa. The grammar |
| resolves the ambiguity by selecting the rule that results in the |
| fewest fields: |
| |
| line: seplist[sep=>','] <score: -@{$item[1]}> |
| | seplist[sep=>':'] <score: -@{$item[1]}> |
| | seplist[sep=>" "] <score: -@{$item[1]}> |
| |
| seplist: <skip:""> <leftop: /[^$arg{sep}]*/ "$arg{sep}" /[^$arg{sep}]*/> |
| |
| Note the use of negation within the C<E<lt>score:...E<gt>> directive |
| to ensure that the seplist with the most items gets the lowest score. |
| |
| As the above examples indicate, it is often the case that all productions |
| in a rule use exactly the same C<E<lt>score:...E<gt>> directive. It is |
| tedious to have to repeat this identical directive in every production, so |
| Parse::RecDescent also provides the C<E<lt>autoscore:...E<gt>> directive. |
| |
| If an C<E<lt>autoscore:...E<gt>> directive appears in any |
| production of a rule, the code it specifies is used as the scoring |
| code for every production of that rule, except productions that already |
| end with an explicit C<E<lt>score:...E<gt>> directive. Thus the rules above could |
| be rewritten: |
| |
| line: <autoscore: -@{$item[1]}> |
| line: seplist[sep=>','] |
| | seplist[sep=>':'] |
| | seplist[sep=>" "] |
| |
| |
| sentence: <autoscore: sensible(@item)> |
| | verb noun preposition article noun { [@item] } |
| | adjective noun verb article noun { [@item] } |
| | noun verb preposition article noun { [@item] } |
| |
| Note that the C<E<lt>autoscore:...E<gt>> directive itself acts as an |
| unconditional C<E<lt>rejectE<gt>>, and (like the C<E<lt>rulevar:...E<gt>> |
| directive) is pruned at compile-time wherever possible. |
| |
| |
| =item Dispensing with grammar checks |
| |
| During the compilation phase of parser construction, Parse::RecDescent performs |
| a small number of checks on the grammar it's given. Specifically it checks that |
| the grammar is not left-recursive, that there are no "insatiable" constructs of |
| the form: |
| |
| rule: subrule(s) subrule |
| |
| and that there are no rules missing (i.e. referred to, but never defined). |
| |
| These checks are important during development, but can slow down parser |
| construction in stable code. So Parse::RecDescent provides the |
| E<lt>nocheckE<gt> directive to turn them off. The directive can only appear |
| before the first rule definition, and switches off checking throughout the rest |
| of the current grammar. |
| |
| Typically, this directive would be added when a parser has been thoroughly |
| tested and is ready for release. |
| |
| =back |
| |
| |
| =head2 Subrule argument lists |
| |
| It is occasionally useful to pass data to a subrule which is being invoked. For |
| example, consider the following grammar fragment: |
| |
| classdecl: keyword decl |
| |
| keyword: 'struct' | 'class'; |
| |
| decl: # WHATEVER |
| |
| The C<decl> rule might wish to know which of the two keywords was used |
| (since it may affect some aspect of the way the subsequent declaration |
| is interpreted). C<Parse::RecDescent> allows the grammar designer to |
| pass data into a rule, by placing that data in an I<argument list> |
| (that is, in square brackets) immediately after any subrule item in a |
| production. Hence, we could pass the keyword to C<decl> as follows: |
| |
| classdecl: keyword decl[ $item[1] ] |
| |
| keyword: 'struct' | 'class'; |
| |
| decl: # WHATEVER |
| |
| The argument list can consist of any number (including zero!) of comma-separated |
| Perl expressions. In other words, it looks exactly like a Perl anonymous |
| array reference. For example, we could pass the keyword, the name of the |
| surrounding rule, and the literal 'keyword' to C<decl> like so: |
| |
| classdecl: keyword decl[$item[1],$item[0],'keyword'] |
| |
| keyword: 'struct' | 'class'; |
| |
| decl: # WHATEVER |
| |
| Within the rule to which the data is passed (C<decl> in the above examples) |
| that data is available as the elements of a local variable C<@arg>. Hence |
| C<decl> might report its intentions as follows: |
| |
| classdecl: keyword decl[$item[1],$item[0],'keyword'] |
| |
| keyword: 'struct' | 'class'; |
| |
| decl: { print "Declaring $arg[0] (a $arg[2])\n"; |
| print "(this rule called by $arg[1])" } |
| |
| Subrule argument lists can also be interpreted as hashes, simply by using |
| the local variable C<%arg> instead of C<@arg>. Hence we could rewrite the |
| previous example: |
| |
| classdecl: keyword decl[keyword => $item[1], |
| caller => $item[0], |
| type => 'keyword'] |
| |
| keyword: 'struct' | 'class'; |
| |
| decl: { print "Declaring $arg{keyword} (a $arg{type})\n"; |
| print "(this rule called by $arg{caller})" } |
| |
| Both C<@arg> and C<%arg> are always available, so the grammar designer may |
| choose whichever convention (or combination of conventions) suits best. |
| |
| Subrule argument lists are also useful for creating "rule templates" |
| (especially when used in conjunction with the C<E<lt>matchrule:...E<gt>> |
| directive). For example, the subrule: |
| |
| list: <matchrule:$arg{rule}> /$arg{sep}/ list[%arg] |
| { $return = [ $item[1], @{$item[3]} ] } |
| | <matchrule:$arg{rule}> |
| { $return = [ $item[1]] } |
| |
| is a handy template for the common problem of matching a separated list. |
| For example: |
| |
| function: 'func' name '(' list[rule=>'param',sep=>';'] ')' |
| |
| param: list[rule=>'name',sep=>','] ':' typename |
| |
| name: /\w+/ |
| |
| typename: name |
| |
| |
| When a subrule argument list is used with a repeated subrule, the argument list |
| goes I<before> the repetition specifier: |
| |
| list: /some|many/ thing[ $item[1] ](s) |
| |
| The argument list is "late bound". That is, it is re-evaluated for every |
| repetition of the repeated subrule. |
| This means that each repeated attempt to match the subrule may be |
| passed a completely different set of arguments if the value of the |
| expression in the argument list changes between attempts. So, for |
| example, the grammar: |
| |
| { $::species = 'dogs' } |
| |
| pair: 'two' animal[$::species](s) |
| |
| animal: /$arg[0]/ { $::species = 'cats' } |
| |
| will match the string "two dogs cats cats" completely, whereas |
| it will only match the string "two dogs dogs dogs" up to the |
| eighth letter. If the value of the argument list were "early bound" |
| (that is, evaluated only the first time a repeated subrule match is |
| attempted), one would expect the matching behaviours to be reversed. |
| |
| Of course, it is possible to effectively "early bind" such argument lists |
| by passing them a value which does not change on each repetition. For example: |
| |
| { $::species = 'dogs' } |
| |
| pair: 'two' { $::species } animal[$item[2]](s) |
| |
| animal: /$arg[0]/ { $::species = 'cats' } |
| |
| |
| Arguments can also be passed to the start rule, simply by appending them |
| to the argument list with which the start rule is called (I<after> the |
| "line number" parameter). For example, given: |
| |
| $parser = new Parse::RecDescent ( $grammar ); |
| |
| $parser->data($text, 1, "str", 2, \@arr); |
| |
| # ^^^^^ ^ ^^^^^^^^^^^^^^^ |
| # | | | |
| # TEXT TO BE PARSED | | |
| # STARTING LINE NUMBER | |
| # ELEMENTS OF @arg WHICH IS PASSED TO RULE data |
| |
| then within the productions of the rule C<data>, the array C<@arg> will contain |
| C<("str", 2, \@arr)>. |
| |
| |
| =head2 Alternations |
| |
| Alternations are implicit (unnamed) rules defined as part of a production. An |
| alternation is defined as a series of '|'-separated productions inside a |
| pair of round brackets. For example: |
| |
| character: 'the' ( good | bad | ugly ) /dude/ |
| |
| Every alternation implicitly defines a new subrule, whose |
| automatically-generated name indicates its origin: |
| "_alternation_<I>_of_production_<P>_of_rule<R>" for the appropriate |
| values of <I>, <P>, and <R>. A call to this implicit subrule is then |
| inserted in place of the brackets. Hence the above example is merely a |
| convenient short-hand for: |
| |
| character: 'the' |
| _alternation_1_of_production_1_of_rule_character |
| /dude/ |
| |
| _alternation_1_of_production_1_of_rule_character: |
| good | bad | ugly |
| |
| Since alternations are parsed by recursively calling the parser generator, |
| any type(s) of item can appear in an alternation. For example: |
| |
| character: 'the' ( 'high' "plains" # Silent, with poncho |
| | /no[- ]name/ # Silent, no poncho |
| | vengeance_seeking # Poncho-optional |
| | <error> |
| ) drifter |
| |
| In this case, if an error occurred, the automatically generated |
| message would be: |
| |
| ERROR (line <N>): Invalid implicit subrule: Expected |
| 'high' or /no[- ]name/ or generic, |
| but found "pacifist" instead |
| |
| Since every alternation actually has a name, it's even possible |
| to extend or replace them: |
| |
| parser->Replace( |
| "_alternation_1_of_production_1_of_rule_character: |
| 'generic Eastwood'" |
| ); |
| |
| More importantly, since alternations are a form of subrule, they can be given |
| repetition specifiers: |
| |
| character: 'the' ( good | bad | ugly )(?) /dude/ |
| |
| |
| =head2 Incremental Parsing |
| |
| C<Parse::RecDescent> provides two methods - C<Extend> and C<Replace> - which |
| can be used to alter the grammar matched by a parser. Both methods |
| take the same argument as C<Parse::RecDescent::new>, namely a |
| grammar specification string |
| |
| C<Parse::RecDescent::Extend> interprets the grammar specification and adds any |
| productions it finds to the end of the rules for which they are specified. For |
| example: |
| |
| $add = "name: 'Jimmy-Bob' | 'Bobby-Jim'\ndesc: colour /necks?/"; |
| parser->Extend($add); |
| |
| adds two productions to the rule "name" (creating it if necessary) and one |
| production to the rule "desc". |
| |
| C<Parse::RecDescent::Replace> is identical, except that it first resets are |
| rule specified in the additional grammar, removing any existing productions. |
| Hence after: |
| |
| $add = "name: 'Jimmy-Bob' | 'Bobby-Jim'\ndesc: colour /necks?/"; |
| parser->Replace($add); |
| |
| are are I<only> valid "name"s and the one possible description. |
| |
| A more interesting use of the C<Extend> and C<Replace> methods is to call them |
| inside the action of an executing parser. For example: |
| |
| typedef: 'typedef' type_name identifier ';' |
| { $thisparser->Extend("type_name: '$item[3]'") } |
| | <error> |
| |
| identifier: ...!type_name /[A-Za-z_]w*/ |
| |
| which automatically prevents type names from being typedef'd, or: |
| |
| command: 'map' key_name 'to' abort_key |
| { $thisparser->Replace("abort_key: '$item[2]'") } |
| | 'map' key_name 'to' key_name |
| { map_key($item[2],$item[4]) } |
| | abort_key |
| { exit if confirm("abort?") } |
| |
| abort_key: 'q' |
| |
| key_name: ...!abort_key /[A-Za-z]/ |
| |
| which allows the user to change the abort key binding, but not to unbind it. |
| |
| The careful use of such constructs makes it possible to reconfigure a |
| a running parser, eliminating the need for semantic feedback by |
| providing syntactic feedback instead. However, as currently implemented, |
| C<Replace()> and C<Extend()> have to regenerate and re-C<eval> the |
| entire parser whenever they are called. This makes them quite slow for |
| large grammars. |
| |
| In such cases, the judicious use of an interpolated regex is likely to |
| be far more efficient: |
| |
| typedef: 'typedef' type_name/ identifier ';' |
| { $thisparser->{local}{type_name} .= "|$item[3]" } |
| | <error> |
| |
| identifier: ...!type_name /[A-Za-z_]w*/ |
| |
| type_name: /$thisparser->{local}{type_name}/ |
| |
| |
| =head2 Precompiling parsers |
| |
| Normally Parse::RecDescent builds a parser from a grammar at run-time. |
| That approach simplifies the design and implementation of parsing code, |
| but has the disadvantage that it slows the parsing process down - you |
| have to wait for Parse::RecDescent to build the parser every time the |
| program runs. Long or complex grammars can be particularly slow to |
| build, leading to unacceptable delays at start-up. |
| |
| To overcome this, the module provides a way of "pre-building" a parser |
| object and saving it in a separate module. That module can then be used |
| to create clones of the original parser. |
| |
| A grammar may be precompiled using the C<Precompile> class method. |
| For example, to precompile a grammar stored in the scalar $grammar, |
| and produce a class named PreGrammar in a module file named PreGrammar.pm, |
| you could use: |
| |
| use Parse::RecDescent; |
| |
| Parse::RecDescent->Precompile([$options_hashref], $grammar, "PreGrammar"); |
| |
| The first required argument is the grammar string, the second is the |
| name of the class to be built. The name of the module file is |
| generated automatically by appending ".pm" to the last element of the |
| class name. Thus |
| |
| Parse::RecDescent->Precompile($grammar, "My::New::Parser"); |
| |
| would produce a module file named Parser.pm. |
| |
| An optional hash reference may be supplied as the first argument to |
| C<Precompile>. This argument is currently EXPERIMENTAL, and may change |
| in a future release of Parse::RecDescent. The only supported option |
| is currently C<-standalone>, see L</"Standalone Precompiled Parsers">. |
| |
| It is somewhat tedious to have to write a small Perl program just to |
| generate a precompiled grammar class, so Parse::RecDescent has some special |
| magic that allows you to do the job directly from the command-line. |
| |
| If your grammar is specified in a file named F<grammar>, you can generate |
| a class named Yet::Another::Grammar like so: |
| |
| > perl -MParse::RecDescent - grammar Yet::Another::Grammar |
| |
| This would produce a file named F<Grammar.pm> containing the full |
| definition of a class called Yet::Another::Grammar. Of course, to use |
| that class, you would need to put the F<Grammar.pm> file in a |
| directory named F<Yet/Another>, somewhere in your Perl include path. |
| |
| Having created the new class, it's very easy to use it to build |
| a parser. You simply C<use> the new module, and then call its |
| C<new> method to create a parser object. For example: |
| |
| use Yet::Another::Grammar; |
| my $parser = Yet::Another::Grammar->new(); |
| |
| The effect of these two lines is exactly the same as: |
| |
| use Parse::RecDescent; |
| |
| open GRAMMAR_FILE, "grammar" or die; |
| local $/; |
| my $grammar = <GRAMMAR_FILE>; |
| |
| my $parser = Parse::RecDescent->new($grammar); |
| |
| only considerably faster. |
| |
| Note however that the parsers produced by either approach are exactly |
| the same, so whilst precompilation has an effect on I<set-up> speed, |
| it has no effect on I<parsing> speed. RecDescent 2.0 will address that |
| problem. |
| |
| =head3 Standalone Precompiled Parsers |
| |
| Until version 1.967003 of Parse::RecDescent, parser modules built with |
| C<Precompile> were dependent on Parse::RecDescent. Future |
| Parse::RecDescent releases with different internal implementations |
| would break pre-existing precompiled parsers. |
| |
| Version 1.967_005 added the ability for Parse::RecDescent to include |
| itself in the resulting .pm file if you pass the boolean option |
| C<-standalone> to C<Precompile>: |
| |
| Parse::RecDescent->Precompile({ -standalone = 1, }, |
| $grammar, "My::New::Parser"); |
| |
| Parse::RecDescent is included as Parse::RecDescent::_Runtime in order |
| to avoid conflicts between an installed version of Parse::RecDescent |
| and a precompiled, standalone parser made with another version of |
| Parse::RecDescent. This renaming is experimental, and is subject to |
| change in future versions. |
| |
| Precompiled parsers remain dependent on Parse::RecDescent by default, |
| as this feature is still considered experimental. In the future, |
| standalone parsers will become the default. |
| |
| =head1 GOTCHAS |
| |
| This section describes common mistakes that grammar writers seem to |
| make on a regular basis. |
| |
| =head2 1. Expecting an error to always invalidate a parse |
| |
| A common mistake when using error messages is to write the grammar like this: |
| |
| file: line(s) |
| |
| line: line_type_1 |
| | line_type_2 |
| | line_type_3 |
| | <error> |
| |
| The expectation seems to be that any line that is not of type 1, 2 or 3 will |
| invoke the C<E<lt>errorE<gt>> directive and thereby cause the parse to fail. |
| |
| Unfortunately, that only happens if the error occurs in the very first line. |
| The first rule states that a C<file> is matched by one or more lines, so if |
| even a single line succeeds, the first rule is completely satisfied and the |
| parse as a whole succeeds. That means that any error messages generated by |
| subsequent failures in the C<line> rule are quietly ignored. |
| |
| Typically what's really needed is this: |
| |
| file: line(s) eofile { $return = $item[1] } |
| |
| line: line_type_1 |
| | line_type_2 |
| | line_type_3 |
| | <error> |
| |
| eofile: /^\Z/ |
| |
| The addition of the C<eofile> subrule to the first production means that |
| a file only matches a series of successful C<line> matches I<that consume the |
| complete input text>. If any input text remains after the lines are matched, |
| there must have been an error in the last C<line>. In that case the C<eofile> |
| rule will fail, causing the entire C<file> rule to fail too. |
| |
| Note too that C<eofile> must match C</^\Z/> (end-of-text), I<not> |
| C</^\cZ/> or C</^\cD/> (end-of-file). |
| |
| And don't forget the action at the end of the production. If you just |
| write: |
| |
| file: line(s) eofile |
| |
| then the value returned by the C<file> rule will be the value of its |
| last item: C<eofile>. Since C<eofile> always returns an empty string |
| on success, that will cause the C<file> rule to return that empty |
| string. Apart from returning the wrong value, returning an empty string |
| will trip up code such as: |
| |
| $parser->file($filetext) || die; |
| |
| (since "" is false). |
| |
| Remember that Parse::RecDescent returns undef on failure, |
| so the only safe test for failure is: |
| |
| defined($parser->file($filetext)) || die; |
| |
| |
| =head2 2. Using a C<return> in an action |
| |
| An action is like a C<do> block inside the subroutine implementing the |
| surrounding rule. So if you put a C<return> statement in an action: |
| |
| range: '(' start '..' end )' |
| { return $item{end} } |
| /\s+/ |
| |
| that subroutine will immediately return, without checking the rest of |
| the items in the current production (e.g. the C</\s+/>) and without |
| setting up the necessary data structures to tell the parser that the |
| rule has succeeded. |
| |
| The correct way to set a return value in an action is to set the C<$return> |
| variable: |
| |
| range: '(' start '..' end )' |
| { $return = $item{end} } |
| /\s+/ |
| |
| |
| =head2 2. Setting C<$Parse::RecDescent::skip> at parse time |
| |
| If you want to change the default skipping behaviour (see |
| L<Terminal Separators> and the C<E<lt>skip:...E<gt>> directive) by setting |
| C<$Parse::RecDescent::skip> you have to remember to set this variable |
| I<before> creating the grammar object. |
| |
| For example, you might want to skip all Perl-like comments with this |
| regular expression: |
| |
| my $skip_spaces_and_comments = qr/ |
| (?mxs: |
| \s+ # either spaces |
| | \# .*?$ # or a dash and whatever up to the end of line |
| )* # repeated at will (in whatever order) |
| /; |
| |
| And then: |
| |
| my $parser1 = Parse::RecDescent->new($grammar); |
| |
| $Parse::RecDescent::skip = $skip_spaces_and_comments; |
| |
| my $parser2 = Parse::RecDescent->new($grammar); |
| |
| $parser1->parse($text); # this does not cope with comments |
| $parser2->parse($text); # this skips comments correctly |
| |
| The two parsers behave differently, because any skipping behaviour |
| specified via C<$Parse::RecDescent::skip> is hard-coded when the |
| grammar object is built, not at parse time. |
| |
| |
| =head1 DIAGNOSTICS |
| |
| Diagnostics are intended to be self-explanatory (particularly if you |
| use B<-RD_HINT> (under B<perl -s>) or define C<$::RD_HINT> inside the program). |
| |
| C<Parse::RecDescent> currently diagnoses the following: |
| |
| =over 4 |
| |
| =item * |
| |
| Invalid regular expressions used as pattern terminals (fatal error). |
| |
| =item * |
| |
| Invalid Perl code in code blocks (fatal error). |
| |
| =item * |
| |
| Lookahead used in the wrong place or in a nonsensical way (fatal error). |
| |
| =item * |
| |
| "Obvious" cases of left-recursion (fatal error). |
| |
| =item * |
| |
| Missing or extra components in a C<E<lt>leftopE<gt>> or C<E<lt>rightopE<gt>> |
| directive. |
| |
| =item * |
| |
| Unrecognisable components in the grammar specification (fatal error). |
| |
| =item * |
| |
| "Orphaned" rule components specified before the first rule (fatal error) |
| or after an C<E<lt>errorE<gt>> directive (level 3 warning). |
| |
| =item * |
| |
| Missing rule definitions (this only generates a level 3 warning, since you |
| may be providing them later via C<Parse::RecDescent::Extend()>). |
| |
| =item * |
| |
| Instances where greedy repetition behaviour will almost certainly |
| cause the failure of a production (a level 3 warning - see |
| L<"ON-GOING ISSUES AND FUTURE DIRECTIONS"> below). |
| |
| =item * |
| |
| Attempts to define rules named 'Replace' or 'Extend', which cannot be |
| called directly through the parser object because of the predefined |
| meaning of C<Parse::RecDescent::Replace> and |
| C<Parse::RecDescent::Extend>. (Only a level 2 warning is generated, since |
| such rules I<can> still be used as subrules). |
| |
| =item * |
| |
| Productions which consist of a single C<E<lt>error?E<gt>> |
| directive, and which therefore may succeed unexpectedly |
| (a level 2 warning, since this might conceivably be the desired effect). |
| |
| =item * |
| |
| Multiple consecutive lookahead specifiers (a level 1 warning only, since their |
| effects simply accumulate). |
| |
| =item * |
| |
| Productions which start with a C<E<lt>rejectE<gt>> or C<E<lt>rulevar:...E<gt>> |
| directive. Such productions are optimized away (a level 1 warning). |
| |
| =item * |
| |
| Rules which are autogenerated under C<$::AUTOSTUB> (a level 1 warning). |
| |
| =back |
| |
| =head1 AUTHOR |
| |
| Damian Conway (damian@conway.org) |
| Jeremy T. Braun (JTBRAUN@CPAN.org) [current maintainer] |
| |
| =head1 BUGS AND IRRITATIONS |
| |
| There are undoubtedly serious bugs lurking somewhere in this much code :-) |
| Bug reports, test cases and other feedback are most welcome. |
| |
| Ongoing annoyances include: |
| |
| =over 4 |
| |
| =item * |
| |
| There's no support for parsing directly from an input stream. |
| If and when the Perl Gods give us regular expressions on streams, |
| this should be trivial (ahem!) to implement. |
| |
| =item * |
| |
| The parser generator can get confused if actions aren't properly |
| closed or if they contain particularly nasty Perl syntax errors |
| (especially unmatched curly brackets). |
| |
| =item * |
| |
| The generator only detects the most obvious form of left recursion |
| (potential recursion on the first subrule in a rule). More subtle |
| forms of left recursion (for example, through the second item in a |
| rule after a "zero" match of a preceding "zero-or-more" repetition, |
| or after a match of a subrule with an empty production) are not found. |
| |
| =item * |
| |
| Instead of complaining about left-recursion, the generator should |
| silently transform the grammar to remove it. Don't expect this |
| feature any time soon as it would require a more sophisticated |
| approach to parser generation than is currently used. |
| |
| =item * |
| |
| The generated parsers don't always run as fast as might be wished. |
| |
| =item * |
| |
| The meta-parser should be bootstrapped using C<Parse::RecDescent> :-) |
| |
| =back |
| |
| =head1 ON-GOING ISSUES AND FUTURE DIRECTIONS |
| |
| =over 4 |
| |
| =item 1. |
| |
| Repetitions are "incorrigibly greedy" in that they will eat everything they can |
| and won't backtrack if that behaviour causes a production to fail needlessly. |
| So, for example: |
| |
| rule: subrule(s) subrule |
| |
| will I<never> succeed, because the repetition will eat all the |
| subrules it finds, leaving none to match the second item. Such |
| constructions are relatively rare (and C<Parse::RecDescent::new> generates a |
| warning whenever they occur) so this may not be a problem, especially |
| since the insatiable behaviour can be overcome "manually" by writing: |
| |
| rule: penultimate_subrule(s) subrule |
| |
| penultimate_subrule: subrule ...subrule |
| |
| The issue is that this construction is exactly twice as expensive as the |
| original, whereas backtracking would add only 1/I<N> to the cost (for |
| matching I<N> repetitions of C<subrule>). I would welcome feedback on |
| the need for backtracking; particularly on cases where the lack of it |
| makes parsing performance problematical. |
| |
| =item 2. |
| |
| Having opened that can of worms, it's also necessary to consider whether there |
| is a need for non-greedy repetition specifiers. Again, it's possible (at some |
| cost) to manually provide the required functionality: |
| |
| rule: nongreedy_subrule(s) othersubrule |
| |
| nongreedy_subrule: subrule ...!othersubrule |
| |
| Overall, the issue is whether the benefit of this extra functionality |
| outweighs the drawbacks of further complicating the (currently |
| minimalist) grammar specification syntax, and (worse) introducing more overhead |
| into the generated parsers. |
| |
| =item 3. |
| |
| An C<E<lt>autocommitE<gt>> directive would be nice. That is, it would be useful to be |
| able to say: |
| |
| command: <autocommit> |
| command: 'find' name |
| | 'find' address |
| | 'do' command 'at' time 'if' condition |
| | 'do' command 'at' time |
| | 'do' command |
| | unusual_command |
| |
| and have the generator work out that this should be "pruned" thus: |
| |
| command: 'find' name |
| | 'find' <commit> address |
| | 'do' <commit> command <uncommit> |
| 'at' time |
| 'if' <commit> condition |
| | 'do' <commit> command <uncommit> |
| 'at' <commit> time |
| | 'do' <commit> command |
| | unusual_command |
| |
| There are several issues here. Firstly, should the |
| C<E<lt>autocommitE<gt>> automatically install an C<E<lt>uncommitE<gt>> |
| at the start of the last production (on the grounds that the "command" |
| rule doesn't know whether an "unusual_command" might start with "find" |
| or "do") or should the "unusual_command" subgraph be analysed (to see |
| if it I<might> be viable after a "find" or "do")? |
| |
| The second issue is how regular expressions should be treated. The simplest |
| approach would be simply to uncommit before them (on the grounds that they |
| I<might> match). Better efficiency would be obtained by analyzing all preceding |
| literal tokens to determine whether the pattern would match them. |
| |
| Overall, the issues are: can such automated "pruning" approach a hand-tuned |
| version sufficiently closely to warrant the extra set-up expense, and (more |
| importantly) is the problem important enough to even warrant the non-trivial |
| effort of building an automated solution? |
| |
| =back |
| |
| =head1 SUPPORT |
| |
| =head2 Source Code Repository |
| |
| L<http://github.com/jtbraun/Parse-RecDescent> |
| |
| =head2 Mailing List |
| |
| Visit L<http://www.perlfoundation.org/perl5/index.cgi?parse_recdescent> to sign up for the mailing list. |
| |
| L<http://www.PerlMonks.org> is also a good place to ask |
| questions. Previous posts about Parse::RecDescent can typically be |
| found with this search: |
| L<http://perlmonks.org/index.pl?node=recdescent>. |
| |
| =head2 FAQ |
| |
| Visit L<Parse::RecDescent::FAQ> for answers to frequently (and not so |
| frequently) asked questions about Parse::RecDescent. |
| |
| =head2 View/Report Bugs |
| |
| To view the current bug list or report a new issue visit |
| L<https://rt.cpan.org/Public/Dist/Display.html?Name=Parse-RecDescent>. |
| |
| =head1 SEE ALSO |
| |
| L<Regexp::Grammars> provides Parse::RecDescent style parsing using native |
| Perl 5.10 regular expressions. |
| |
| |
| =head1 LICENCE AND COPYRIGHT |
| |
| Copyright (c) 1997-2007, Damian Conway C<< <DCONWAY@CPAN.org> >>. All rights |
| reserved. |
| |
| This module is free software; you can redistribute it and/or |
| modify it under the same terms as Perl itself. See L<perlartistic>. |
| |
| |
| =head1 DISCLAIMER OF WARRANTY |
| |
| BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY |
| FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN |
| OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES |
| PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER |
| EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE |
| ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH |
| YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL |
| NECESSARY SERVICING, REPAIR, OR CORRECTION. |
| |
| IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING |
| WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR |
| REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE |
| LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, |
| OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE |
| THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING |
| RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A |
| FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF |
| SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF |
| SUCH DAMAGES. |