class QRegex::P6Regex::Actions is HLL::Actions { method TOP($/) { make QAST::CompUnit.new( :hll('P6Regex'), :sc($*W.sc()), :code_ref_blocks($*W.code_ref_blocks()), :compilation_mode(0), :pre_deserialize($*W.load_dependency_tasks()), :post_deserialize($*W.fixup_tasks()), self.qbuildsub($.ast, :node($/), :anon(1), :addself(1)) ); } method nibbler($/) { make $.ast } method termseq($/) { make $.ast if $ } method termaltseq($/) { my $qast := $[0].ast; if nqp::elems($) > 1 { $qast := QAST::Regex.new( :rxtype, :node($/) ); for $ { $qast.push($_.ast) } } make $qast; } method termconjseq($/) { my $qast := $[0].ast; if nqp::elems($) > 1 { $qast := QAST::Regex.new( :rxtype, :node($/) ); for $ { $qast.push($_.ast); } } make $qast; } method termalt($/) { my $qast := $[0].ast; if nqp::elems($) > 1 { $qast := QAST::Regex.new( :rxtype, :node($/) ); for $ { $qast.push($_.ast) } } make $qast; } method termconj($/) { my $qast := $[0].ast; if nqp::elems($) > 1 { $qast := QAST::Regex.new( :rxtype, :node($/) ); for $ { $qast.push($_.ast); } } make $qast; } method termish($/) { my $qast := QAST::Regex.new( :rxtype, :node($/) ); my $lastlit := 0; my $last_noun; for $ { my $ast := $_.ast; if $ast { if $lastlit && $ast.rxtype eq 'literal' && !QAST::Node.ACCEPTS($ast[0]) && $lastlit.subtype eq $ast.subtype { $lastlit[0] := $lastlit[0] ~ $ast[0]; } elsif $last_noun && $last_noun eq '\r' && $_ eq '\n' && !$ast.negate && !$last_noun.ast.negate { $qast.pop(); $qast.push(QAST::Regex.new( :rxtype, "\r\n" )); } else { $qast.push($_.ast); $lastlit := $ast.rxtype eq 'literal' && !QAST::Node.ACCEPTS($ast[0]) ?? $ast !! 0; } } $last_noun := $_; } make $qast; } method quantified_atom($/) { my $qast := $.ast; my $sigmaybe := $.ast if $; $qast := QAST::Regex.new(:rxtype, $qast, $sigmaybe) if $sigmaybe; if $ { $/.panic('Quantifier quantifies nothing') unless $qast; my str $rxtype := $qast.rxtype; $/.throw_non_quantifiable() if $rxtype eq 'qastnode' || $rxtype eq 'anchor'; my $ast := $.ast; $ast.unshift($qast); $qast := $ast; } if $ { if $qast.rxtype ne 'quant' && $qast.rxtype ne 'dynquant' { $/.panic("'" ~ $ ~ "' may only be used immediately following a quantifier") } $qast.push($.ast); if $ eq '%%' { $qast := QAST::Regex.new( :rxtype, $qast, QAST::Regex.new( :rxtype, :min(0), :max(1), $.ast )); } } my $sigfinal := $.ast if $; $qast := QAST::Regex.new(:rxtype, $qast, $sigfinal) if $sigfinal; if $qast { $qast.backtrack('r') if !$qast.backtrack && nqp::if($, (~$ eq ':'), %*RX); $qast.node($/); } make $qast; } method separator($/) { make $.ast; } method atom($/) { if $ { make $.ast; } else { my $qast := QAST::Regex.new( ~$/, :rxtype, :node($/)); make self.apply_literal_modifiers($qast); } } method sigmaybe:sym($/) { make QAST::Regex.new( :rxtype, :subtype, :node($/), :name, QAST::NodeList.new(QAST::SVal.new( :value('ws') )) ); } method quantifier:sym<*>($/) { my $qast := QAST::Regex.new( :rxtype, :min(0), :max(-1), :node($/) ); make backmod($qast, $); } method quantifier:sym<+>($/) { my $qast := QAST::Regex.new( :rxtype, :min(1), :max(-1), :node($/) ); make backmod($qast, $); } method quantifier:sym($/) { my $qast := QAST::Regex.new( :rxtype, :subtype, :min(0), :max(1), :node($/) ); make backmod($qast, $); } method quantifier:sym<**>($/) { my $qast; if $ { $qast := QAST::Regex.new( :rxtype, :node($/), QAST::Op.new( :op('callmethod'), :name('!DYNQUANT_LIMITS'), QAST::Var.new( :name('$¢'), :scope('lexical') ), $.ast ), ); } else { my $min := 0; if $ { $min := nqp::radix(10, $, 0, 0)[0]; } my $max := -1; my $upto := $; if $ eq '^' { ++$min } if ! $ { $max := $min } elsif $ ne '*' { $max := nqp::radix(10, $, 0, 0)[0]; if $ eq '^' { --$max; } $/.panic("Empty range") if $min > $max; } $qast := QAST::Regex.new( :rxtype, :min($min), :max($max), :node($/) ); } make backmod($qast, $); } method codeblock($/) { my $block := $.ast; $block.blocktype('immediate'); my $ast := QAST::Stmts.new( QAST::Op.new( :op('bind'), QAST::Var.new( :name('$/'), :scope('lexical') ), QAST::Op.new( QAST::Var.new( :name('$¢'), :scope('lexical') ), :name('MATCH'), :op('callmethod') ) ), $block ); make $ast; } method metachar:sym<[ ]>($/) { make $.ast; } method metachar:sym<( )>($/) { my $sub_ast := QAST::NodeList.new(self.qbuildsub($.ast, :node($/), :anon(1), :addself(1))); my $ast := QAST::Regex.new( $sub_ast, $.ast, :rxtype('subrule'), :subtype('capture'), :node($/) ); make $ast; } method metachar:sym<'>($/) { my $quote := $.ast; if QAST::SVal.ACCEPTS($quote) { $quote := $quote.value; } my $qast := QAST::Regex.new( $quote, :rxtype, :node($/) ); make self.apply_literal_modifiers($qast); } method metachar:sym<">($/) { my $quote := $.ast; if QAST::SVal.ACCEPTS($quote) { $quote := $quote.value; } my $qast := QAST::Regex.new( $quote, :rxtype, :node($/) ); make self.apply_literal_modifiers($qast); } method metachar:sym<.>($/) { make QAST::Regex.new( :rxtype, :name<.>, :node($/) ); } method metachar:sym<^>($/) { make QAST::Regex.new( :rxtype, :subtype, :node($/) ); } method metachar:sym<^^>($/) { make QAST::Regex.new( :rxtype, :subtype, :node($/) ); } method metachar:sym<$>($/) { make QAST::Regex.new( :rxtype, :subtype, :node($/) ); } method metachar:sym<$$>($/) { make QAST::Regex.new( :rxtype, :subtype, :node($/) ); } method metachar:sym($/) { make QAST::Regex.new( :rxtype, :subtype, :node($/) ); } method metachar:sym($/) { make QAST::Regex.new( :rxtype, :subtype, :node($/) ); } method metachar:sym($/) { make QAST::Regex.new( :rxtype, :subtype, :backtrack, :name<$!from>, :node($/), QAST::NodeList.new( QAST::SVal.new( :value('!LITERAL') ), QAST::SVal.new( :value('') ) ) ); } method metachar:sym($/) { make QAST::Regex.new( :rxtype, :subtype, :backtrack, :name<$!to>, :node($/), QAST::NodeList.new( QAST::SVal.new( :value('!LITERAL') ), QAST::SVal.new( :value('') ) ) ); } method metachar:sym($/) { make $.ast; } method metachar:sym($/) { make $.ast; } method metachar:sym($/) { my $qast; my $name := $ ?? nqp::radix(10, $, 0, 0)[0] !! ~$; if $ { $qast := $[0].ast; if ($qast.rxtype eq 'quant' || $qast.rxtype eq 'dynquant') && $qast[0].rxtype eq 'subrule' { self.subrule_alias($qast[0], $name); } elsif $qast.rxtype eq 'subrule' { self.subrule_alias($qast, $name); $qast := QAST::Regex.new( :rxtype, :min(1), :max(1), $qast) if $; } else { $qast := QAST::Regex.new( $qast, :name($name), :rxtype, :node($/) ); } } else { $qast := QAST::Regex.new( :rxtype, :subtype, :node($/), QAST::NodeList.new( QAST::SVal.new( :value('!BACKREF') ), QAST::SVal.new( :value($name) ) ) ); } make $qast; } method metachar:sym<~>($/) { my @dba := [QAST::SVal.new(:value(%*RX))] if nqp::existskey(%*RX, 'dba'); make QAST::Regex.new( :rxtype, $.ast, $.ast, QAST::Regex.new( :rxtype, :subtype, QAST::NodeList.new( QAST::SVal.new( :value('FAILGOAL') ), QAST::SVal.new( :value(~$) ), |@dba) ) ); } method metachar:sym($/) { make $.ast; } method backslash:sym($/) { make QAST::Regex.new(:rxtype, :name( nqp::lc(~$) ), :negate($ le 'Z'), :node($/)); } method backslash:sym($/) { my $qast := QAST::Regex.new( "\c[27]", :rxtype('enumcharlist'), :negate($ eq 'E'), :node($/) ); make $qast; } method backslash:sym($/) { my $qast := QAST::Regex.new( "\c[12]", :rxtype('enumcharlist'), :negate($ eq 'F'), :node($/) ); make $qast; } method backslash:sym($/) { my $qast := QAST::Regex.new( #?if js nqp::chr(0x2000) ~ nqp::chr(0x2001) ~ # HACK workaround for a cross compiling problem #?endif "\x[09,20,a0,1680,180e,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,200a,202f,205f,3000]", :rxtype('enumcharlist'), :negate($ eq 'H'), :node($/) ); make $qast; } method backslash:sym($/) { my $qast := QAST::Regex.new( "\r", :rxtype('enumcharlist'), :negate($ eq 'R'), :node($/) ); make $qast; } method backslash:sym($/) { my $qast := QAST::Regex.new( "\t", :rxtype('enumcharlist'), :negate($ eq 'T'), :node($/) ); make $qast; } method backslash:sym($/) { my $qast := QAST::Regex.new( "\x[0a,0b,0c,0d,85,2028,2029]\r\n", :rxtype('enumcharlist'), :negate($ eq 'V'), :node($/) ); make $qast; } method backslash:sym($/) { my $octlit := HLL::Actions.ints_to_string( $ || $ ); make $ eq 'O' ?? QAST::Regex.new( $octlit, :rxtype('enumcharlist'), :negate(1), :node($/) ) !! QAST::Regex.new( $octlit, :rxtype('literal'), :node($/) ); } method backslash:sym($/) { my $hexlit := HLL::Actions.ints_to_string( $ || $ ); make $ eq 'X' ?? QAST::Regex.new( $hexlit, :rxtype('enumcharlist'), :negate(1), :node($/) ) !! QAST::Regex.new( $hexlit, :rxtype('literal'), :node($/) ); } method backslash:sym($/) { make $ eq 'C' ?? QAST::Regex.new( $.ast, :rxtype('enumcharlist'), :negate(1), :node($/) ) !! QAST::Regex.new( $.ast, :rxtype('literal'), :node($/) ) } method backslash:sym<0>($/) { make QAST::Regex.new( "\0", :rxtype('literal'), :node($/) ); } method backslash:sym($/) { my $qast := QAST::Regex.new( ~$/ , :rxtype('literal'), :node($/) ); make self.apply_literal_modifiers($qast); } method cclass_backslash:sym($/) { make QAST::Regex.new(:rxtype, :name( nqp::lc(~$) ), :negate($ le 'Z'), :node($/)); } method cclass_backslash:sym($/) { my $qast := QAST::Regex.new( "\b", :rxtype('enumcharlist'), :negate($ eq 'B'), :node($/) ); make $qast; } method cclass_backslash:sym($/) { my $qast := QAST::Regex.new( "\c[27]", :rxtype('enumcharlist'), :negate($ eq 'E'), :node($/) ); make $qast; } method cclass_backslash:sym($/) { my $qast := QAST::Regex.new( "\c[12]", :rxtype('enumcharlist'), :negate($ eq 'F'), :node($/) ); make $qast; } method cclass_backslash:sym($/) { my $qast := QAST::Regex.new( #?if js nqp::chr(0x2000) ~ nqp::chr(0x2001) ~ # HACK workaround for a cross compiling problem #?endif "\x[09,20,a0,1680,180e,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,200a,202f,205f,3000]", :rxtype('enumcharlist'), :negate($ eq 'H'), :node($/) ); make $qast; } method cclass_backslash:sym($/) { my $qast := QAST::Regex.new( "\n", :rxtype('enumcharlist'), :negate($ eq 'N'), :node($/) ); make $qast; } method cclass_backslash:sym($/) { my $qast := QAST::Regex.new( "\r", :rxtype('enumcharlist'), :negate($ eq 'R'), :node($/) ); make $qast; } method cclass_backslash:sym($/) { my $qast := QAST::Regex.new( "\t", :rxtype('enumcharlist'), :negate($ eq 'T'), :node($/) ); make $qast; } method cclass_backslash:sym($/) { my $qast := QAST::Regex.new( "\x[0a,0b,0c,0d,85,2028,2029]", :rxtype('enumcharlist'), :negate($ eq 'V'), :node($/) ); make $qast; } method cclass_backslash:sym($/) { my $octlit := HLL::Actions.ints_to_string( $ || $ ); make ($ eq 'O' ?? QAST::Regex.new( $octlit, :rxtype('enumcharlist'), :negate(1), :node($/) ) !! QAST::Regex.new( $octlit, :rxtype('literal'), :node($/) ) ).annotate_self('codepoint', $ ?? $.ast !! $[0].ast) } method cclass_backslash:sym($/) { my $hexlit := HLL::Actions.ints_to_string( $ || $ ); make ($ eq 'X' ?? QAST::Regex.new( $hexlit, :rxtype('enumcharlist'), :negate(1), :node($/) ) !! QAST::Regex.new( $hexlit, :rxtype('literal'), :node($/) ) ).annotate_self('codepoint', $ ?? $.ast !! $[0].ast) } method cclass_backslash:sym($/) { make $ eq 'C' ?? QAST::Regex.new( $.ast, :rxtype('enumcharlist'), :negate(1), :node($/) ) !! QAST::Regex.new( $.ast, :rxtype('literal'), :node($/) ) } method cclass_backslash:sym<0>($/) { make QAST::Regex.new( "\0", :rxtype('literal'), :node($/) ); } method cclass_backslash:sym($/) { my $qast := QAST::Regex.new( ~$/ , :rxtype('literal'), :node($/) ); make $qast; } method assertion:sym($/) { my $qast; if $ { $qast := $.ast; $qast.subtype('zerowidth'); } else { $qast := QAST::Regex.new( :rxtype, :subtype, :node($/) ); } make $qast; } method assertion:sym($/) { my $qast; if $ { $qast := $.ast; $qast.negate( !$qast.negate ); $qast.subtype('zerowidth'); } else { $qast := QAST::Regex.new( :rxtype, :subtype, :node($/) ); } make $qast; } method assertion:sym<|>($/) { my $qast; my $name := ~$; if $name eq 'c' { # codepoint boundaries always match in # our current Unicode abstraction level $qast := 0; } elsif $name eq 'w' { $qast := QAST::Regex.new(:rxtype, :subtype, :node($/), :name(''), QAST::NodeList.new(QAST::SVal.new( :value('wb') )) ); } make $qast; } method assertion:sym($/) { my $qast := $.ast; if $qast.rxtype eq 'subrule' { $qast.subtype('method'); $qast.name(''); } make $qast; } method assertion:sym($/) { my $name := ~$; my $qast; if $ { $qast := $.ast; if $qast.rxtype eq 'subrule' { self.subrule_alias($qast, $name); } else { $qast := QAST::Regex.new( $qast, :name($name), :rxtype, :node($/) ); } } elsif $name eq 'sym' { my $rxname := ""; my $loc := nqp::index(%*RX, ':sym'); if $loc >= 0 { $rxname := nqp::substr(%*RX, $loc + 5 ); $rxname := nqp::substr( $rxname, 0, nqp::chars($rxname) - 1); } else { $loc := nqp::index(%*RX, ':'); my $angleloc := nqp::index(%*RX, '<', $loc); $angleloc := nqp::index(%*RX, '«', $loc) if $angleloc < 0; $rxname := nqp::substr(%*RX, $loc + 1, $angleloc - $loc - 1) unless $loc < 0; } if $loc >= 0 { $qast := QAST::Regex.new(:name('sym'), :rxtype, :node($/), QAST::Regex.new(:rxtype, $rxname, :node($/))); } else { self.panic(" is only valid in multiregexes"); } } else { $qast := QAST::Regex.new(:rxtype, :subtype, :node($/), :name($name), QAST::NodeList.new(QAST::SVal.new( :value($name) ))); if $ { for $.ast.list { $qast[0].push( $_ ) } } elsif $ { if $name eq 'after' { my int $litlen := self.offset_ast($.ast); if $litlen >= 0 { $qast[0][0].value('before'); $qast[0].push(self.qbuildsub($.ast, :node($/), :anon(1), :addself(1))); $qast[0].push(QAST::IVal.new( :value($litlen) )); # optional offset to before } else { $qast[0].push(self.qbuildsub(self.flip_ast($.ast), :node($/), :anon(1), :addself(1))); } } else { $qast[0].push(self.qbuildsub($.ast, :node($/), :anon(1), :addself(1))); } } } make $qast; } method assertion:sym<[>($/) { my $clist := $; my $qast := $clist[0].ast; if $qast.negate && $qast.rxtype eq 'subrule' { $qast.subtype('zerowidth'); $qast := QAST::Regex.new(:rxtype, :node($/), $qast, QAST::Regex.new( :rxtype, :name<.> )); } my int $i := 1; my int $n := nqp::elems($clist); while $i < $n { unless ~$clist[$i] { my $curse := $clist[$i]; $curse."!clear_highwater"(); $curse.panic('Missing + or - between character class elements') } my $ast := $clist[$i].ast; if $ast.negate || $ast.rxtype eq 'cclass' && ~$ast.node le 'Z' { $ast.subtype('zerowidth'); $qast := QAST::Regex.new( :rxtype, :node($/), :subtype, :negate(1), QAST::Regex.new( :rxtype, :subtype, $ast ), $qast ); } else { $qast := QAST::Regex.new( $qast, $ast, :rxtype, :node($/)); } ++$i; } make $qast; } method arg($/) { make $ ?? $.ast !! QAST::IVal.new( :value(+$) ); } method arglist($/) { my $ast := QAST::Op.new( :op('list') ); for $ { $ast.push( $_.ast ); } make $ast; } method cclass_elem($/) { my $str := ''; my $qast; if $ { my $name := ~$; $qast := QAST::Regex.new( :rxtype, :subtype, :negate( $ eq '-' ), :node($/), QAST::NodeList.new(QAST::SVal.new( :value($name) )) ); } # <:Letter> elsif $ { $qast := QAST::Regex.new( $*key, :rxtype, :negate( $ eq '-' && $ ne '!' # $ ^^ $ || $ ne '-' && $ eq '!' ), :node($/) ); # <:NumericValue(0 ^..^ 1)> $qast.push($.ast) if $; } else { my @alts; my $RXi := %*RX; my $RXm := %*RX; my $RXim := $RXi && $RXm; for $ { if $_[1] { my $node; my $ord0; my $ord1; sub non_synth_ord($chr) { my int $ord := nqp::ord($chr); if nqp::chr($ord) ne $chr { $/.panic("Cannot use $chr as a range endpoint, as it is not a single codepoint"); } $ord } if $_[0] { $node := $_[0].ast; #?if !js # HACK check disabled for js because of lack of proper NFG support $/.panic("Illegal range endpoint in regex: " ~ ~$_) if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist' #?if moar || $node.negate || nqp::chars($node[0]) != 1; #?endif #?if jvm # TODO expected chars tweaked for jvm because of lack of proper NFG support || $node.negate || nqp::chars($node[0]) != (nqp::ord($node[0]) < 65536 ?? 1 !! 2); #?endif #?endif $ord0 := $node.ann('codepoint') // ($RXm ?? nqp::ordbaseat($node[0], 0) !! non_synth_ord($node[0])); } else { $ord0 := $RXm ?? nqp::ordbaseat(~$_[0][0], 0) !! non_synth_ord(~$_[0][0]); } if $_[1][0] { $node := $_[1][0].ast; #?if !js # HACK check disabled for js because of lack of proper NFG support $/.panic("Illegal range endpoint in regex: " ~ ~$_) if $node.rxtype ne 'literal' && $node.rxtype ne 'enumcharlist' #?if moar || $node.negate || nqp::chars($node[0]) != 1; #?endif #?if jvm # TODO expected chars tweaked for jvm because of lack of proper NFG support || $node.negate || nqp::chars($node[0]) != (nqp::ord($node[0]) < 65536 ?? 1 !! 2); #?endif #?endif $ord1 := $node.ann('codepoint') // ($RXm ?? nqp::ordbaseat($node[0], 0) !! non_synth_ord($node[0])); } else { $ord1 := $RXm ?? nqp::ordbaseat(~$_[1][0][0], 0) !! non_synth_ord(~$_[1][0][0]); } $/.panic("Illegal reversed character range in regex: " ~ ~$_) if $ord0 > $ord1; @alts.push(QAST::Regex.new( $RXim ?? 'ignorecase+ignoremark' !! $RXi ?? 'ignorecase' !! $RXm ?? 'ignoremark' !! '', QAST::IVal.new( :value($ord0) ), QAST::IVal.new( :value($ord1) ), :negate( $ eq '-' ), :rxtype, :node($/) )); } elsif $_[0] { my $bs := $_[0].ast; if $bs.rxtype eq 'enumcharlist' && !$bs.negate || $bs.rxtype eq 'literal' { $str := $str ~ $bs[0]; } else { $bs.negate(!$bs.negate) if $ eq '-'; @alts.push($bs); } } elsif $RXim { my $c := nqp::chr(nqp::ordbaseat(~$_[0], 0)); $str := $str ~ nqp::fc($c) ~ nqp::uc($c); } elsif $RXi { my $c := ~$_[0]; $str := $str ~ nqp::fc($c) ~ nqp::uc($c); } elsif $RXm { $str := $str ~ nqp::chr(nqp::ordbaseat(~$_[0], 0)); } else { $str := $str ~ ~$_[0]; } } @alts.push(QAST::Regex.new( $str, :rxtype, :node($/), :negate( $ eq '-' ), :subtype($RXm ?? 'ignoremark' !! '') )) if nqp::chars($str); $qast := ( my $num := nqp::elems(@alts) ) == 1 ?? @alts[0] !! 0 < $num && $ eq '-' ?? QAST::Regex.new( :rxtype, :node($/), :negate(1), QAST::Regex.new( :rxtype, :subtype, |@alts ), QAST::Regex.new( :rxtype, :name<.> ) ) !! QAST::Regex.new( :rxtype, |@alts ); } make $qast; } method mod_internal($/) { if $ { if nqp::istype($[0].ast, QAST::SVal) { my $key := ~$; my $val := $[0].ast.value; %*RX{$key} := $val; make $key eq 'dba' ?? QAST::Regex.new( :rxtype('dba'), :name($val) ) !! 0; } else { $/.panic("Internal modifier strings must be literals"); } } } sub backmod($ast, $backmod) { if $backmod eq ':' { $ast.backtrack('r') } elsif $backmod eq ':?' || $backmod eq '?' { $ast.backtrack('f') } elsif $backmod eq ':!' || $backmod eq '!' { $ast.backtrack('g') } $ast; } method apply_literal_modifiers($qast) { if %*RX && %*RX { # > $qast.subtype('ignorecase+ignoremark') } elsif %*RX { $qast.subtype('ignorecase') } elsif %*RX { # > $qast.subtype('ignoremark') } return $qast } method qbuildsub($qast, $block = QAST::Block.new(), :$node, :$anon, :$addself, *%rest) { my $*LANG := $qast.node; my $code_obj := nqp::existskey(%rest, 'code_obj') ?? %rest !! self.create_regex_code_object($block); if $addself { $block.push(QAST::Var.new( :name('self'), :scope('local'), :decl('param') )); } unless $block.symbol('$¢') { $block.push(QAST::Var.new(:name<$¢>, :scope, :decl('var'))); $block.symbol('$¢', :scope); } self.store_regex_caps($code_obj, $block, self.capnames($qast, 0)); self.store_regex_nfa($code_obj, $block, QRegex::NFA.new.addnode($qast)); self.alt_nfas($code_obj, $qast); my $scan := QAST::Regex.new( :rxtype ); { my $q := $qast; if $q.rxtype eq 'concat' && $q[0] { $q := $q[0] } if $q.rxtype eq 'literal' { nqp::push($scan, $q[0]); $scan.subtype($q.subtype); } } $block.annotate('orig_qast', $qast); $qast := QAST::Regex.new( :rxtype, $scan, $qast, ($anon ?? QAST::Regex.new( :rxtype ) !! (nqp::substr(%*RX, 0, 12) ne '!!LATENAME!!' ?? QAST::Regex.new( :rxtype, :name(%*RX) ) !! QAST::Regex.new( :rxtype, QAST::Var.new( :name(nqp::substr(%*RX, 12)), :scope('lexical') ) ) ))); if %*RX { $qast[2].backtrack('r'); } $block.push(QAST::Stmts.new($qast, :$node)); self.set_cursor_type($qast); $block; } # A hook point that subclasses can set to the cursor type method set_cursor_type($qast) { } method capnames($ast, int $count) { my %capnames; my $rxtype := $ast.rxtype; if $rxtype eq 'concat' || $rxtype eq 'goal' || $rxtype eq 'conjseq' || $rxtype eq 'conj' { for $ast.list { my %x := self.capnames($_, $count); for %x { %capnames{$_.key} := nqp::add_i((%capnames{$_.key} // 0), $_.value); } $count := %x{''}; } } elsif $rxtype eq 'altseq' || $rxtype eq 'alt' { my int $max := $count; for $ast.list { my %x := self.capnames($_, $count); for %x { %capnames{$_.key} := (%capnames{$_.key} // 0) < 2 && %x{$_.key} == 1 ?? 1 !! 2; } $max := %x{''} if %x{''} > $max; } $count := $max; } elsif $rxtype eq 'subrule' && $ast.subtype eq 'capture' { my $name := $ast.name; if $name eq '' { $name := $count; $ast.name($name); } my @names := nqp::split('=', $name); for @names { my int $n := nqp::radix(10, $_, 0, 0)[0]; if $_ eq '0' || $n > 0 { $count := $n + 1; %capnames{$n} := 1 } else { %capnames{$_} := 1; } } } elsif $rxtype eq 'subcapture' { for nqp::split(' ', $ast.name) { my $n := nqp::radix(10, $_, 0, 0)[0]; if $_ eq '0' || $n > 0 { $count := $n + 1; %capnames{$n} := 1 } else { %capnames{$_} := 1; } } my %x := self.capnames($ast[0], $count); for %x { %capnames{$_.key} := nqp::add_i((%capnames{$_.key} // 0), %x{$_.key}) } $count := %x{''}; } elsif $rxtype eq 'quant' || $rxtype eq 'dynquant' { my $ilist := ($ast.subtype eq 'item'); my %astcap := self.capnames($ast[0], $count); for %astcap { %capnames{$_.key} := $ilist ?? $_.value !! 2 } $count := %astcap{''}; my $sep_ast := $ast[$rxtype eq 'quant' ?? 1 !! 2]; if $sep_ast { # handle any separator quantification my %astcap := self.capnames($sep_ast, $count); for %astcap { %capnames{$_.key} := $ilist ?? $_.value !! 2 } $count := %astcap{''}; } } %capnames{''} := $count; # will be deleted in SET_CAPS %capnames; } method alt_nfas($code_obj, $ast, $suffix = $*W.handle()) { my $rxtype := $ast.rxtype; if $rxtype eq 'alt' { my @alternatives; for $ast.list { self.alt_nfas($code_obj, $_, $suffix); nqp::push(@alternatives, QRegex::NFA.new.addnode($_)); } $ast.name(QAST::Node.unique('alt_nfa_') ~ '_' ~ $suffix); self.store_regex_alt_nfa($code_obj, $ast.name, @alternatives); } elsif $rxtype eq 'subcapture' || $rxtype eq 'quant' { self.alt_nfas($code_obj, $ast[0], $suffix) } elsif $rxtype eq 'concat' || $rxtype eq 'altseq' || $rxtype eq 'conj' || $rxtype eq 'conjseq' { for $ast.list { self.alt_nfas($code_obj, $_, $suffix) } } } method subrule_alias($ast, $name) { if $ast.name gt '' { $ast.name( $name ~ '=' ~ $ast.name ); } else { $ast.name($name); } $ast.subtype('capture'); } method offset_ast($qast) { return -1 unless nqp::istype($qast, QAST::Regex); if $qast.rxtype eq 'literal' { return nqp::chars($qast[0]); } elsif $qast.rxtype eq 'cclass' { return 1; } elsif $qast.rxtype eq 'anchor' { return 0; } elsif $qast.rxtype eq 'concat' { my int $litlen; for @($qast) { my int $ll := self.offset_ast($_); return -1 if $ll < 0; $litlen := $litlen + $ll; } return $litlen; } return -1; } method flip_ast($qast) { return $qast unless nqp::istype($qast, QAST::Regex); if $qast.rxtype eq 'literal' { $qast[0] := nqp::flip($qast[0]); } elsif $qast.rxtype eq 'concat' { my @tmp; while nqp::elems(@($qast)) { @tmp.push(@($qast).shift) } while @tmp { @($qast).push(self.flip_ast(@tmp.pop)) } } elsif $qast.rxtype eq 'anchor' { if $qast.subtype eq 'rwb' { $qast.subtype("lwb"); } elsif $qast.subtype eq 'lwb' { $qast.subtype("rwb"); } elsif $qast.subtype eq 'bol' { $qast.subtype("eol"); } elsif $qast.subtype eq 'eol' { $qast.subtype("bol"); } elsif $qast.subtype eq 'bos' { $qast.subtype("eos"); } elsif $qast.subtype eq 'eos' { $qast.subtype("bos"); } } else { for @($qast) { self.flip_ast($_) } } $qast } # This is overridden by a compiler that wants to create code objects # for regexes. We just use the standard NQP one in standalone mode. method create_regex_code_object($block) { $*W.create_code($block, $block.name); } # Stores the captures info for a regex. method store_regex_caps($code_obj, $block, %caps) { $code_obj.SET_CAPS(%caps); } # Stores the NFA for the regex overall. method store_regex_nfa($code_obj, $block, $nfa) { $code_obj.SET_NFA($nfa.save); } # Stores the NFA for a regex alternation. method store_regex_alt_nfa($code_obj, $key, @alternatives) { my @saved; for @alternatives { @saved.push($_.save(:non_empty)); } $code_obj.SET_ALT_NFA($key, @saved); } }