class QRegex::P5Regex::Actions is HLL::Actions { method TOP($/) { make QAST::CompUnit.new( :hll('P5Regex'), :sc($*W.sc()), :code_ref_blocks($*W.code_ref_blocks()), :compilation_mode(0), :pre_deserialize($*W.load_dependency_tasks()), :post_deserialize($*W.fixup_tasks()), self.qbuildsub($.ast, :anon(1), :addself(1)) ); } method nibbler($/) { make $.ast } method alternation($/) { my $qast := $[0].ast; if +$ > 1 { $qast := QAST::Regex.new( :rxtype, :node($/) ); for $ { $qast.push($_.ast); } } make $qast; } method sequence($/) { if $ { my $qast := QAST::Regex.new( :rxtype, :node($/) ); my $lastlit := 0; for $ { my $ast := $_.ast; if $ast { if $lastlit && $ast.rxtype eq 'literal' && !QAST::Node.ACCEPTS($ast[0]) { $lastlit[0] := $lastlit[0] ~ $ast[0]; } else { $qast.push($_.ast); $lastlit := $ast.rxtype eq 'literal' && !QAST::Node.ACCEPTS($ast[0]) ?? $ast !! 0; } } } make $qast; } else { make QAST::Regex.new( :rxtype, :name, :node($/) ); } } method quantified_atom($/) { my $qast := $.ast; if $ { my $ast := $[0].ast; $ast.unshift($qast || QAST::Regex.new( :rxtype, :name )); $qast := $ast; } $qast.backtrack('r') if $qast && !$qast.backtrack && %*RX; make $qast; } method atom($/) { if $ { make $.ast; } elsif $ { my $qast := QAST::Regex.new( ~$, :rxtype, :node($/)); make $qast; } else { my $qast := QAST::Regex.new( ~$/, :rxtype, :node($/)); $qast.subtype('ignorecase') if %*RX; make $qast; } } method p5metachar:sym($/) { make $.ast; } method p5metachar:sym<.>($/) { make %*RX ?? QAST::Regex.new( :rxtype, :name<.>, :node($/) ) !! QAST::Regex.new( :rxtype, :name, :negate(1), :node($/) ); } method p5metachar:sym<^>($/) { make QAST::Regex.new( :rxtype, :subtype(%*RX ?? 'bol' !! 'bos'), :node($/) ); } method p5metachar:sym<$>($/) { make QAST::Regex.new( :rxtype('concat'), QAST::Regex.new( :rxtype('quant'), :min(0), :max(1), QAST::Regex.new( :rxtype('cclass'), :name ) ), QAST::Regex.new( :rxtype, :subtype(%*RX ?? 'eol' !! 'eos'), :node($/) ) ); } method p5metachar:sym<(? )>($/) { # like P6's $=[ ... ] my $qast; if $ { $qast := QAST::Regex.new( :rxtype, :name(~$), $.ast, :node($/) ); } else { $qast := $.ast; } make $qast; } method p5metachar:sym<(?: )>($/) { make $.ast; } method p5metachar:sym<( )>($/) { make QAST::Regex.new( :rxtype, :node($/), $.ast ); } method p5metachar:sym<[ ]>($/) { make $.ast; } method cclass($/) { my $str := ''; my $qast; my @alts; for $ { if $_[1] { my $node; my $lhs; my $rhs; if $_[0] { $node := $_[0].ast; $/.panic("Illegal range endpoint in regex: " ~ ~$_) if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist' || $node.negate || nqp::chars($node[0]) != 1; $lhs := $node[0]; } else { $lhs := ~$_[0][0]; } if $_[1][0] { $node := $_[1][0].ast; $/.panic("Illegal range endpoint in regex: " ~ ~$_) if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist' || $node.negate || nqp::chars($node[0]) != 1; $rhs := $node[0]; } else { $rhs := ~$_[1][0][0]; } sub add_range($from, $to) { my int $ord0 := nqp::ord($from); my int $ord1 := nqp::ord($to); $/.panic("Illegal reversed character range in regex: " ~ ~$_) if $ord0 > $ord1; $str := nqp::concat($str, nqp::chr($ord0++)) while $ord0 <= $ord1; } if %*RX { add_range(nqp::lc($lhs), nqp::lc($rhs)); add_range(nqp::uc($lhs), nqp::uc($rhs)); } else { add_range($lhs, $rhs); } } elsif $_[0] { my $bs := $_[0].ast; $bs.negate(!$bs.negate) if $ eq '^'; @alts.push($bs); } else { my $c := ~$_[0]; $str := $str ~ (%*RX ?? nqp::lc($c) ~ nqp::uc($c) !! $c); } } @alts.push(QAST::Regex.new( $str, :rxtype, :node($/), :negate( $ eq '^' ) )) if nqp::chars($str); $qast := +@alts == 1 ?? @alts[0] !! $ eq '^' ?? QAST::Regex.new( :rxtype, :node($/), QAST::Regex.new( :rxtype, :subtype, |@alts ), QAST::Regex.new( :rxtype, :name<.> ) ) !! QAST::Regex.new( :rxtype, |@alts ); make $qast; } method p5backslash:sym($/) { make QAST::Regex.new( :rxtype, :subtype, :node($/) ); } method p5backslash:sym($/) { make QAST::Regex.new(:rxtype, :subtype, :node($/), :negate($ eq 'B'), :name(''), QAST::NodeList.new( QAST::SVal.new( :value('wb') ) )); } method p5backslash:sym($/) { make QAST::Regex.new( #?if js nqp::chr(0x2000) ~ nqp::chr(0x2001) ~ # HACK workaround for a cross compiling problem #?endif "\x[09,20,a0,1680,180e,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,200a,202f,205f,3000]", :rxtype('enumcharlist'), :negate($ eq 'H'), :node($/) ); } method p5backslash:sym($/) { make QAST::Regex.new( "\r", :rxtype('enumcharlist'), :node($/) ); } method p5backslash:sym($/) { make QAST::Regex.new( :rxtype, :name( 'n' ), :node($/) ); } method p5backslash:sym($/) { make QAST::Regex.new(:rxtype, :name( nqp::lc(~$) ), :negate($ le 'Z'), :node($/)); } method p5backslash:sym($/) { make QAST::Regex.new( "\t", :rxtype('enumcharlist'), :negate($ eq 'T'), :node($/) ); } method p5backslash:sym($/) { make QAST::Regex.new( "\x[0a,0b,0c,0d,85,2028,2029]", :rxtype('enumcharlist'), :negate($ eq 'V'), :node($/) ); } method p5backslash:sym($/) { my $hexlit := nqp::chars($) ?? nqp::chr( self.string_to_int($, 16) ) !! nqp::chr(0); make QAST::Regex.new( $hexlit, :rxtype('literal'), :node($/) ); } method p5backslash:sym($/) { make QAST::Regex.new( :rxtype, :subtype, :node($/) ); } method p5backslash:sym($/) { make QAST::Regex.new( :rxtype('concat'), QAST::Regex.new( :rxtype('quant'), :min(0), :max(1), QAST::Regex.new( :rxtype('cclass'), :name ) ), QAST::Regex.new( :rxtype, :subtype('eos'), :node($/) ) ); } method p5backslash:sym($/) { if $ { make QAST::Regex.new( ~$ , :rxtype('literal'), :node($/) ); } else { make QAST::Regex.new( :rxtype, :subtype, :node($/), QAST::NodeList.new( QAST::SVal.new( :value('!BACKREF-LATEST-CAPTURE') ), QAST::SVal.new( :value(~$ - 1) ) ) ); } } method p5assertion:sym«<»($/) { if $ { make QAST::Regex.new( :rxtype, :subtype, :negate($ eq '!'), :node($/), QAST::NodeList.new( QAST::SVal.new( :value('after') ), self.qbuildsub(self.flip_ast($.ast), :anon(1), :addself(1)) )); } else { make 0; } } method p5assertion:sym<=>($/) { if $ { make QAST::Regex.new( :rxtype, :subtype, :node($/), QAST::NodeList.new( QAST::SVal.new( :value('before') ), self.qbuildsub($.ast, :anon(1), :addself(1)) )); } else { make 0; } } method p5assertion:sym($/) { if $ { make QAST::Regex.new( :rxtype, :subtype, :negate(1), :node($/), QAST::NodeList.new( QAST::SVal.new( :value('before') ), self.qbuildsub($.ast, :anon(1), :addself(1)) )); } else { make 0; } } method p5mods($/) { for nqp::split('', ~$) { %*RX{$_} := 1; } if $ { for nqp::split('', ~$[0]) { %*RX{$_} := 0; } } } method p5assertion:sym($/) { if $ { make $[0].ast; } else { for %*RX { %*OLDRX{$_.key} := $_.value; } make 0; } } method p5quantifier:sym<*>($/) { my $qast := QAST::Regex.new( :rxtype, :min(0), :max(-1), :node($/) ); make quantmod($qast, $); } method p5quantifier:sym<+>($/) { my $qast := QAST::Regex.new( :rxtype, :min(1), :max(-1), :node($/) ); make quantmod($qast, $); } method p5quantifier:sym($/) { my $qast := QAST::Regex.new( :rxtype, :min(0), :max(1), :node($/) ); make quantmod($qast, ~$); } method p5quantifier:sym<{ }>($/) { my $qast; $qast := QAST::Regex.new( :rxtype, :min(nqp::radix(10, $, 0, 0)[0]), :node($/) ); if $ && ~$[0] ne '' { $qast.max(nqp::radix(10, $[0], 0, 0)[0]); } elsif $ { $qast.max(-1); } else { $qast.max($qast.min); } make quantmod($qast, $); } sub quantmod($ast, $mod) { if $mod eq '?' { $ast.backtrack('f') } elsif $mod eq '+' { $ast.backtrack('g') } $ast; } method qbuildsub($qast, $block = QAST::Block.new(), :$anon, :$addself, *%rest) { my $*LANG := $qast.node; my $code_obj := nqp::existskey(%rest, 'code_obj') ?? %rest !! self.create_regex_code_object($block); if $addself { $block.push(QAST::Var.new( :name('self'), :scope('local'), :decl('param') )); } unless $block.symbol('$¢') { $block.push(QAST::Var.new(:name<$¢>, :scope, :decl('var'))); $block.symbol('$¢', :scope); } self.store_regex_caps($code_obj, $block, capnames($qast, 0)); self.store_regex_nfa($code_obj, $block, QRegex::NFA.new.addnode($qast)); $block.annotate('orig_qast', $qast); $qast := QAST::Regex.new( :rxtype, QAST::Regex.new( :rxtype ), $qast, ($anon ?? QAST::Regex.new( :rxtype ) !! QAST::Regex.new( :rxtype, :name(%*RX) ))); $block.push($qast); $block; } sub capnames($ast, int $count) { my %capnames; my $rxtype := $ast.rxtype; if $rxtype eq 'concat' { for $ast.list { my %x := capnames($_, $count); for %x { %capnames{$_.key} := nqp::add_i((%capnames{$_.key} // 0), $_.value); } $count := %x{''}; } } elsif $rxtype eq 'altseq' || $rxtype eq 'alt' { my $max := $count; for $ast.list { my %x := capnames($_, $count); for %x { %capnames{$_.key} := (%capnames{$_.key} // 0) < 2 && %x{$_.key} == 1 ?? 1 !! 2; } $count := %x{''}; } } elsif $rxtype eq 'subrule' && $ast.subtype eq 'capture' { my $name := $ast.name; if $name eq '' { $name := $count; $ast.name($name); } my @names := nqp::split('=', $name); for @names { $count := $_ + 1 if $_ eq '0' # check for named capture before numifying || nqp::iscclass(nqp::const::CCLASS_NUMERIC, $_, 0) && $_ > 0; %capnames{$_} := 1; } } elsif $rxtype eq 'subcapture' { my $name := $ast.name; if $name eq '' { $name := $count; $ast.name($name); } for nqp::split(' ', $name) { $count := $_ + 1 if $_ eq '0' # check for named capture before numifying || nqp::iscclass(nqp::const::CCLASS_NUMERIC, $_, 0) && $_ > 0; %capnames{$_} := 1; } my %x := capnames($ast[0], $count); for %x { %capnames{$_.key} := nqp::add_i((%capnames{$_.key} // 0), %x{$_.key}) } $count := %x{''}; } elsif $rxtype eq 'quant' { my %x := capnames($ast[0], $count); for %x { %capnames{$_.key} := nqp::add_i((%capnames{$_.key} // 0), $_.value); } $count := %x{''}; } %capnames{''} := $count; nqp::deletekey(%capnames, '$!from'); nqp::deletekey(%capnames, '$!to'); %capnames; } method flip_ast($qast) { return $qast unless nqp::istype($qast, QAST::Regex); if $qast.rxtype eq 'literal' { $qast[0] := nqp::flip($qast[0]); } elsif $qast.rxtype eq 'concat' { my @tmp; while nqp::elems(@($qast)) { @tmp.push(@($qast).shift) } while @tmp { @($qast).push(self.flip_ast(@tmp.pop)) } } else { for @($qast) { self.flip_ast($_) } } $qast } # This is overridden by a compiler that wants to create code objects # for regexes. We just use the standard NQP one in standalone mode. method create_regex_code_object($block) { $*W.create_code($block, $block.name); } # Stores the captures info for a regex. method store_regex_caps($code_obj, $block, %caps) { $code_obj.SET_CAPS(%caps); } # Override this to store the overall NFA for a regex. (Standalone mode doesn't need # it, as it only supports executing individual regexes). method store_regex_nfa($code_obj, $block, $nfa) { } # XXX Below here copied from p6regex; needs review method metachar:sym<'>($/) { my $quote := $.ast; if QAST::SVal.ACCEPTS($quote) { $quote := $quote.value; } my $qast := QAST::Regex.new( $quote, :rxtype, :node($/) ); $qast.subtype('ignorecase') if %*RX; make $qast; } method metachar:sym<">($/) { my $quote := $.ast; if QAST::SVal.ACCEPTS($quote) { $quote := $quote.value; } my $qast := QAST::Regex.new( $quote, :rxtype, :node($/) ); $qast.subtype('ignorecase') if %*RX; make $qast; } method metachar:sym($/) { make QAST::Regex.new( :rxtype, :subtype, :node($/) ); } method metachar:sym($/) { make QAST::Regex.new( :rxtype, :subtype, :node($/) ); } method metachar:sym($/) { make QAST::Regex.new( :rxtype, :subtype, :backtrack, :name<$!from>, :node($/), QAST::NodeList.new( QAST::SVal.new( :value('!LITERAL') ), QAST::SVal.new( :value('') ) ) ); } method metachar:sym($/) { make QAST::Regex.new( :rxtype, :subtype, :backtrack, :name<$!to>, :node($/), QAST::NodeList.new( QAST::SVal.new( :value('!LITERAL') ), QAST::SVal.new( :value('') ) ) ); } method metachar:sym($/) { make $.ast; } method metachar:sym($/) { my $qast; my $name := $ ?? nqp::radix(10, $, 0, 0)[0] !! ~$; if $ { $qast := $[0].ast; if $qast.rxtype eq 'quant' && $qast[0].rxtype eq 'subrule' { self.subrule_alias($qast[0], $name); } elsif $qast.rxtype eq 'subrule' { self.subrule_alias($qast, $name); } else { $qast := QAST::Regex.new( $qast, :name($name), :rxtype, :node($/) ); } } else { $qast := QAST::Regex.new( :rxtype, :subtype, :node($/), QAST::NodeList.new( QAST::SVal.new( :value('!BACKREF-LATEST-CAPTURE') ), QAST::SVal.new( :value($name) ) ) ); } make $qast; } method backslash:sym($/) { my $qast := QAST::Regex.new( "\c[27]", :rxtype('enumcharlist'), :negate($ eq 'E'), :node($/) ); make $qast; } method backslash:sym($/) { my $qast := QAST::Regex.new( "\c[12]", :rxtype('enumcharlist'), :negate($ eq 'F'), :node($/) ); make $qast; } method backslash:sym($/) { my $qast := QAST::Regex.new( #?if js nqp::chr(0x2000) ~ nqp::chr(0x2001) ~ # HACK workaround for a cross compiling problem #?endif "\x[09,20,a0,1680,180e,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,200a,202f,205f,3000]", :rxtype('enumcharlist'), :negate($ eq 'H'), :node($/) ); make $qast; } method backslash:sym($/) { my $qast := QAST::Regex.new( "\r", :rxtype('enumcharlist'), :negate($ eq 'R'), :node($/) ); make $qast; } method backslash:sym($/) { my $qast := QAST::Regex.new( "\t", :rxtype('enumcharlist'), :negate($ eq 'T'), :node($/) ); make $qast; } method backslash:sym($/) { my $qast := QAST::Regex.new( "\x[0a,0b,0c,0d,85,2028,2029]", :rxtype('enumcharlist'), :negate($ eq 'V'), :node($/) ); make $qast; } method backslash:sym($/) { my $octlit := HLL::Actions.ints_to_string( $ || $ ); make $ eq 'O' ?? QAST::Regex.new( $octlit, :rxtype('enumcharlist'), :negate(1), :node($/) ) !! QAST::Regex.new( $octlit, :rxtype('literal'), :node($/) ); } method backslash:sym($/) { my $hexlit := HLL::Actions.ints_to_string( $ || $ ); make $ eq 'X' ?? QAST::Regex.new( $hexlit, :rxtype('enumcharlist'), :negate(1), :node($/) ) !! QAST::Regex.new( $hexlit, :rxtype('literal'), :node($/) ); } method backslash:sym($/) { make QAST::Regex.new( $.ast, :rxtype('literal'), :node($/) ); } method assertion:sym($/) { my $name := ~$; my $qast; if $ { $qast := $.ast; if $qast.rxtype eq 'subrule' { self.subrule_alias($qast, $name); } else { $qast := QAST::Regex.new( $qast, :name($name), :rxtype, :node($/) ); } } elsif $name eq 'sym' { my $loc := nqp::index(%*RX, ':sym<'); $loc := nqp::index(%*RX, ':sym«') if $loc < 0; my $rxname := nqp::substr(%*RX, $loc + 5); $rxname := nqp::substr($rxname, 0, nqp::chars($rxname) - 1); $qast := QAST::Regex.new(:name('sym'), :rxtype, :node($/), QAST::Regex.new(:rxtype, $rxname, :node($/))); } else { $qast := QAST::Regex.new(:rxtype, :subtype, :node($/), :name($name), QAST::NodeList.new(QAST::SVal.new( :value($name) ))); if $ { for $.ast.list { $qast[0].push( $_ ) } } elsif $ { $name eq 'after' ?? $qast[0].push(self.qbuildsub(self.flip_ast($.ast), :anon(1), :addself(1))) !! $qast[0].push(self.qbuildsub($.ast, :anon(1), :addself(1))); } } make $qast; } method arg($/) { make $ ?? $.ast !! +$; } method arglist($/) { my $ast := QAST::Op.new( :op('list') ); for $ { $ast.push( $_.ast ); } make $ast; } method subrule_alias($ast, $name) { if $ast.name gt '' { $ast.name( $name ~ '=' ~ $ast.name ); } else { $ast.name($name); } $ast.subtype('capture'); } }