#!/usr/bin/env raku

# This script reads the Rakudo/Internals.rakumod file from STDIN, and generates
# the necessary lookup hashes for making magic Str .succ / .pred work, and
# writes it to STDOUT.

use v6;
use nqp;

# general initializations
my $generator = $*PROGRAM-NAME;
my $generated = DateTime.now.gist.subst(/\.\d+/,'');
my $start     = '#- start of generated part of succ/pred';
my $end       = '#- end of generated part of succ/pred';

# the ranges we consider magic wrt to .succ / .pred
my @ranges =
  "0".ord .. "9".ord,  # arabic digits
  "A".ord .. "Z".ord,  # latin uppercase
  "a".ord .. "z".ord,  # lating lowercase
  0x00391 .. 0x003A9,  # greek uppercase
  0x003B1 .. 0x003C9,  # greek lowercase
  0x005D0 .. 0x005EA,  # hebrew
  0x00410 .. 0x0042F,  # cyrillic uppercase
  0x00430 .. 0x0044F,  # cyrillic lowercase
  0x00660 .. 0x00669,  # arabic-indic digits
  0x006F0 .. 0x006F9,  # extended arabic-indic digits
  0x007C0 .. 0x007C9,  # nko digits
  0x00966 .. 0x0096F,  # devanagari digits
  0x009E6 .. 0x009EF,  # bengali digits
  0x00A66 .. 0x00A6F,  # gurmukhi digits
  0x00AE6 .. 0x00AEF,  # gujarati digits
  0x00B66 .. 0x00B6F,  # oriya digits
  0x00BE6 .. 0x00BEF,  # tamil digits
  0x00C66 .. 0x00C6F,  # telugu digits
  0x00CE6 .. 0x00CEF,  # kannada digits
  0x00D66 .. 0x00D6F,  # malayalam digits
  0x00DE6 .. 0x00DEF,  # sinhala lith digits
  0x00E50 .. 0x00E59,  # thai digits
  0x00ED0 .. 0x00ED9,  # lao digits
  0x00F20 .. 0x00F29,  # tibetan digits
  0x01040 .. 0x01049,  # myanmar digits
  0x01090 .. 0x01099,  # myanmar shan digits
  0x017E0 .. 0x017E9,  # khmer digits
  0x01810 .. 0x01819,  # mongolian digits
  0x01946 .. 0x0194F,  # limbu digits
  0x019D0 .. 0x019D9,  # new tai lue digits
  0x01A80 .. 0x01A89,  # tai tham hora digits
  0x01A90 .. 0x01A99,  # tai tham tham digits
  0x01B50 .. 0x01B59,  # balinese digits
  0x01BB0 .. 0x01BB9,  # sundanese digits
  0x01C40 .. 0x01C49,  # lepcha digits
  0x01C50 .. 0x01C59,  # ol chiki digits
  0x02070 .. 0x02079,  # superscripts
  0x02080 .. 0x02089,  # subscripts
  0x02160 .. 0x0216b,  # clock roman uc
  0x02170 .. 0x0217b,  # clock roman lc
  0x02460 .. 0x02473,  # circled digits 1..20
  0x02474 .. 0x02487,  # parenthesized digits 1..20
  0x0249C .. 0x024B5,  # parenthesized latin lc
  0x02581 .. 0x02588,  # lower blocks
  0x02680 .. 0x02685,  # die faces
  0x02776 .. 0x0277F,  # dingbat negative circled 1..10
  0x0A620 .. 0x0A629,  # vai digits
  0x0A8D0 .. 0x0A8D9,  # saurashtra digits
  0x0A8E0 .. 0x0A8E9,  # combining devanagari digits
  0x0A900 .. 0x0A909,  # kayah li digits
  0x0A9D0 .. 0x0A9D9,  # javanese digits
  0x0A9F0 .. 0x0A9F9,  # myanmar tai laing digits
  0x0AA50 .. 0x0AA59,  # cham digits
  0x0ABF0 .. 0x0ABF9,  # meetei mayek digits
  0x0FF10 .. 0x0FF19,  # fullwidth digits
  0x1F37A .. 0x1F37B,  # beer mugs
  0x1F42A .. 0x1F42B,  # camels
;

# ranges that start with these, carry (aka "9".succ -> "10" instead of "00")
my str $carrydigits =
           '0'  # arabic
   ~ "\x00660"  # arabic-indic
   ~ "\x006F0"  # extended arabic-indic
   ~ "\x007C0"  # nko
   ~ "\x00966"  # devanagari
   ~ "\x009E6"  # bengali
   ~ "\x00A66"  # gurmukhi
   ~ "\x00AE6"  # gujarati
   ~ "\x00B66"  # oriya
   ~ "\x00BE6"  # tamil
   ~ "\x00C66"  # telugu
   ~ "\x00CE6"  # kannada
   ~ "\x00D66"  # malayalam
   ~ "\x00DE6"  # sinhala lith
   ~ "\x00E50"  # thai
   ~ "\x00ED0"  # lao
   ~ "\x00F20"  # tibetan
   ~ "\x01040"  # myanmar
   ~ "\x01090"  # myanmar shan
   ~ "\x017E0"  # khmer
   ~ "\x01810"  # mongolian
   ~ "\x01946"  # limbu
   ~ "\x019D0"  # new tai lue
   ~ "\x01A80"  # tai tham hora
   ~ "\x01A90"  # tai tham tham
   ~ "\x01B50"  # balinese
   ~ "\x01BB0"  # sundanese
   ~ "\x01C40"  # lepcha
   ~ "\x02070"  # superscripts XXX: should be treated as digit?
   ~ "\x02080"  # subscripts XXX: should be treated as digit?
   ~ "\x0A620"  # vai
   ~ "\x0A8D0"  # saurashtra
   ~ "\x0A8E0"  # combining devanagari
   ~ "\x0A900"  # kayah li
   ~ "\x0A9D0"  # javanese
   ~ "\x0A9F0"  # myanmar tai laing
   ~ "\x0ABF0"  # meetei mayek
   ~ "\x0AA50"  # cham
   ~ "\x0FF10"  # fullwidth XXX: should be treated as digit?
   ~ "\x1F37A"  # beer mugs
   ~ "\x1F42A"  # camels
;

# holes in otherwise contiguous ranges
my str $holes =
     "\x003A2"  # <reserved>
   ~ "\x003C2"  # GREEK SMALL LETTER FINAL SIGMA
;

# for all the lines in the source that don't need special handling
for $*IN.lines -> $line {

    # nothing to do yet
    unless $line.starts-with($start) {
        say $line;
        next;
    }

    # found header
    say $start ~ " ---------------------------------------";
    say "#- Generated on $generated by $generator";
    say "#- PLEASE DON'T CHANGE ANYTHING BELOW THIS LINE";

    # skip the old version of the code
    for $*IN.lines -> $line {
        last if $line.starts-with($end);
    }

    # initialize .succ data structures
    my $nlook := nqp::list_s;
    my $nchrs := nqp::list_s;
    my $blook := nqp::list_s;
    my $bchrs := nqp::list_s;
    for @ranges -> $range {
        my int $first = $range.AT-POS(0);
        my int $carry = nqp::index($carrydigits,nqp::chr($first)) > -1;
        my int $end   = $range.end;
        my str $char;

        for $range.kv -> int $i, int $ord {
            if $i < $end {
                $char = nqp::chr($ord);
                nqp::push_s($nlook,$char)
                  if nqp::iseq_i(nqp::index($holes,$char),-1);
                $char = nqp::chr($ord + 1);
                nqp::push_s($nchrs,$char)
                  if nqp::iseq_i(nqp::index($holes,$char),-1);
            }
            else {
                nqp::push_s($blook,nqp::chr($ord));
                nqp::push_s($bchrs,nqp::chr($first+$carry) ~ nqp::chr($first));
            }
        }
    }

    # generate the SUCC initialization
    print Q:c:to/SOURCE/;

    # normal increment magic chars & incremented char at same index
    my $succ-nlook = '{nqp::join('',$nlook)}';
    my $succ-nchrs = '{nqp::join('',$nchrs)}'; 

    # magic increment chars at boundary & incremented char at same index
    my $succ-blook = '{nqp::join('',$blook)}';
    my $succ-bchrs = '{nqp::join('',$bchrs)}';

SOURCE

    # initialize .pred data structures
    $nlook := nqp::list_s;
    $nchrs := nqp::list_s;
    $blook := nqp::list_s;
    $bchrs := nqp::list_s;
    for @ranges -> $range {
        my str $char;
        for $range.kv -> int $i, int $ord {
            if $i {
                $char = nqp::chr($ord);
                nqp::push_s($nlook,$char)
                  if nqp::iseq_i(nqp::index($holes,$char),-1);
                $char = nqp::chr($ord - 1);
                nqp::push_s($nchrs,$char)
                  if nqp::iseq_i(nqp::index($holes,$char),-1);
            }
            else {
                nqp::push_s($blook,nqp::chr($ord));
                nqp::push_s($bchrs,nqp::chr($range.AT-POS($range.end)));
            }
        }
    }

    # generate the PRED initialization
    print Q:c:to/SOURCE/;
    # normal decrement magic chars & incremented char at same index
    my $pred-nlook = '{nqp::join('',$nlook)}';
    my $pred-nchrs = '{nqp::join('',$nchrs)}'; 

    # magic decrement chars at boundary & incremented char at same index
    my $pred-blook = '{nqp::join('',$blook)}';
    my $pred-bchrs = '{nqp::join('',$bchrs)}';

SOURCE

    # we're done for this role
    say "#- PLEASE DON'T CHANGE ANYTHING ABOVE THIS LINE";
    say $end ~ " -----------------------------------------";
}

# vim: expandtab sw=4