use QRegex; use NQPHLL; use QAST; class QRegex::P5Regex::World is HLL::World { method create_code($ast, $name) { # Create a fresh stub code, and set its name. my $dummy := nqp::freshcoderef(-> { nqp::die("Uncompiled code executed") }); nqp::setcodename($dummy, $name); # Tag it as a static code ref and add it to the root code refs set. nqp::markcodestatic($dummy); self.add_root_code_ref($dummy, $ast); # Create code object. my $code_obj := nqp::create(NQPRegex); nqp::bindattr($code_obj, NQPRegex, '$!do', $dummy); my $slot := self.add_object($code_obj); # Add fixup of the code object and the $!do attribute. my $fixups := QAST::Stmt.new(); $fixups.push(QAST::Op.new( :op('bindattr'), QAST::WVal.new( :value($code_obj) ), QAST::WVal.new( :value(NQPRegex) ), QAST::SVal.new( :value('$!do') ), QAST::BVal.new( :value($ast) ) )); $fixups.push(QAST::Op.new( :op('setcodeobj'), QAST::BVal.new( :value($ast) ), QAST::WVal.new( :value($code_obj) ) )); self.add_fixup_task(:fixup_ast($fixups)); $code_obj } } grammar QRegex::P5Regex::Grammar is HLL::Grammar { my $cur_handle := 0; token TOP { :my %*RX; :my $*INTERPOLATE := 1; :my $handle := '__QREGEX_P5REGEX__' ~ $cur_handle++; :my $*W := QRegex::P5Regex::World.new(:$handle); [ $ || <.panic: 'Confused'> ] } token nibbler { :my $OLDRX := nqp::getlexdyn('%*RX'); :my %*RX; { for $OLDRX { %*RX{$_.key} := $_.value; } } } token rxstopper { $ } token alternation { + % '|' } token sequence { <.ws> # XXX assuming old /x here? * } token quantified_atom { [ <.ws> > ]**0..1 <.ws> } token atom { [ | \w | | {} \W ] } proto token p5metachar { <...> } token p5metachar:sym { <.panic: "quantifier quantifies nothing"> } token p5metachar:sym { \\ } token p5metachar:sym<.> { } token p5metachar:sym<^> { } token p5metachar:sym<$> { '$' } token p5metachar:sym<(? )> { '(?' [ | '<' $=[<-[>]>+] '>' {} | "'" $=[<-[']>+] "'" {} | ] [ ')' || <.panic: "Perl 5 named capture group not terminated by parenthesis"> ] } token p5metachar:sym<(?: )> { '(?:' {} ')' } token p5metachar:sym<( )> { '(' {} ')' } token p5metachar:sym<[ ]> { } token cclass { :my $astfirst := 0; '[' $=['^'|] [ || $=( ( '\\' || ( <-[\\]> || <-[\]\\]>) ) [ \s* '-' \s* ( '\\' || (<-[\]\\]>) ) ]**0..1 { ++$astfirst } )+ ']' || <.panic: "failed to parse character class; unescaped ']'?"> ] } proto token p5backslash { <...> } token p5backslash:sym { } token p5backslash:sym { $=[<[bB]>] } token p5backslash:sym { } token p5backslash:sym { } token p5backslash:sym { $=[<[dDnNsSwW]>] } token p5backslash:sym { } token p5backslash:sym { [ | $=[ <[ 0..9 a..f A..F ]>**0..2 ] | '{' ~ '}' $=[ <[ 0..9 a..f A..F ]>* ] ] } token p5backslash:sym { } token p5backslash:sym { } token p5backslash:sym { } token p5backslash:sym { } token p5backslash:sym { $=(\W) | $=(\d+) } token p5backslash:sym { <.panic: "Unrecognized Perl 5 regex backslash sequence"> } proto token p5assertion { <...> } token p5assertion:sym«<» { $=['='|'!'] [ | ] } token p5assertion:sym<=> { [ | ] } token p5assertion:sym { [ | ] } token p5mod { <[imsox]>* } token p5mods { [ '-' ]**0..1 } token p5assertion:sym { :my %*OLDRX := nqp::getlexdyn('%*RX'); :my %*RX; { for %*OLDRX { %*RX{$_.key} := $_.value; } } [ | ':' **0..1 | ] } proto token p5quantifier { <...> } token p5quantifier:sym<*> { } token p5quantifier:sym<+> { } token p5quantifier:sym { } token p5quantifier:sym<{ }> { '{' $=[\d+] [ $=',' $=[\d*] ]**0..1 '}' } token quantmod { [ '?' | '+' ]? } token ws { [ | '(?#' ~ ')' <-[)]>* | }> [ \s+ | '#' \N* ] ]* } # XXX Below here is straight from P6Regex and unreviewed. token normspace { <.ws> } token identifier { <.ident> [ <[\-']> <.ident> ]* } token arg { [ | | | $=[\d+] ] } rule arglist { [ ',' ]* } proto token metachar { <...> } token metachar:sym<'> { } token metachar:sym<"> { } token metachar:sym { $=['<<'|'«'] } token metachar:sym { $=['>>'|'»'] } token metachar:sym { '<(' } token metachar:sym { ')>' } token metachar:sym { [ | '$<' $=[<-[>]>+] '>' | '$' $=[\d+] ] [ <.ws> '=' <.ws> ]**0..1 } proto token backslash { <...> } token backslash:sym { $=[<[eE]>] } token backslash:sym { $=[<[fF]>] } token backslash:sym { $=[<[hH]>] } token backslash:sym { $=[<[rR]>] } token backslash:sym { $=[<[tT]>] } token backslash:sym { $=[<[vV]>] } token backslash:sym { $=[<[oO]>] [ | '[' ']' ] } token backslash:sym { $=[<[xX]>] [ | '[' ']' ] } token backslash:sym { $=[<[cC]>] } token backslash:sym { 'A' <.obs: '\\A as beginning-of-string matcher', '^'> } token backslash:sym { 'z' <.obs: '\\z as end-of-string matcher', '$'> } token backslash:sym { 'Z' <.obs: '\\Z as end-of-string matcher', '\\n?$'> } token backslash:sym { 'Q' <.obs: '\\Q as quotemeta', 'quotes or literal variable match'> } token backslash:sym { {} \w <.panic: 'Unrecognized backslash sequence'> } proto token assertion { <...> } token assertion:sym { [ | '> | '=' | ':' | '(' ')' | <.normspace> ]? } }