#!/usr/bin/env raku # This script reads the HTML entities data from # https://html.spec.whatwg.org/entities.json # converts the JSON and uses that data structure to create the source # code of the Str.html5parse hash lookup. # always use highest version of Raku use v6.*; my $generator := $*PROGRAM-NAME; my $generated := DateTime.now.gist.subst(/\.\d+/,''); my $start := '#- start of generated part of HTML entities'; my $end := '#- end of generated part of HTML entities'; # slurp the whole file and set up writing to it my $filename = "src/core.c/RakuAST/HTML/Entities.rakumod"; my @lines = $filename.IO.lines; $*OUT = $filename.IO.open(:w); my $url := 'https://html.spec.whatwg.org/entities.json'; my $proc := run 'curl', $url, :out; my $entities := $proc.out.slurp(:close); my %json := Rakudo::Internals::JSON.from-json($entities); # for all the lines in the source that don't need special handling while @lines { my $line := @lines.shift; # nothing to do yet unless $line.starts-with($start) { say $line; next; } say "$start ------------------------------------"; say "#- Generated on $generated by $generator"; say "#- PLEASE DON'T CHANGE ANYTHING BELOW THIS LINE"; say ""; # skip the old version of the code while @lines { last if @lines.shift.starts-with($end); } my %seen; for %json.keys.sort(-> $a, $b { $a.lc cmp $b.lc || $a cmp $b }) { my $key := .substr(1).chomp(';'); # lose the & and any ; unless %seen{$key}++ { my @codepoints := %json{$_}; my $value := @codepoints > 1 ?? ('(' ~ @codepoints.join(',') ~ ')') !! @codepoints.head.Str; say " '$key', $value,"; } } # we're done for this role say ""; say "#- PLEASE DON'T CHANGE ANYTHING ABOVE THIS LINE"; say "$end --------------------------------------"; } # close the file properly $*OUT.close; # vim: expandtab sw=4