class Zef::Utils::URI { has $.is-relative; has $.match; has $.scheme; has $.host; has $.port; has $.user-info; has $.path; has $.query; has $.fragment; my grammar URI { token URI-reference { || } token URI { ':' ['?' ]? ['#' ]? } token relative-ref { ['?' ]? ['#' ]? } token heir-part { || '//' || || || } token relative-part { || '//' || || || } token scheme { <.alpha> [ || <.alpha> || <.digit> || '+' || '-' || '.' ]* } token authority { [ '@']? [':' ]? } token userinfo { [<.unreserved> || <.pct-encoded> || <.sub-delims> || ':']* } token host { <.IP-literal> || <.IPv4address> || <.reg-name> } token IP-literal { '[' [<.IPv6address> || <.IPv6addrz> || <.IPvFuture>] ']' } token IPv6addz { <.IPv6address> '%25' <.ZoneID> } token ZoneID { [<.unreserved> || <.pct-encoded>]+ } token IPvFuture { 'v' <.xdigit>+ '.' [<.unreserved> || <.sub-delims> || ':']+ } token IPv6address { || [<.h16> ':'] ** 6 <.ls32> || '::' [<.h16> ':'] ** 5 <.ls32> || [ <.h16> ]? '::' [<.h16> ':'] ** 4 <.ls32> || [[<.h16> ':'] ** 0..1 <.h16> ]? '::' [<.h16> ':'] ** 3 <.ls32> || [[<.h16> ':'] ** 0..2 <.h16> ]? '::' [<.h16> ':'] ** 2 <.ls32> || [[<.h16> ':'] ** 0..3 <.h16> ]? '::' <.h16> ':' <.ls32> || [[<.h16> ':'] ** 0..4 <.h16> ]? '::' <.ls32> || [[<.h16> ':'] ** 0..5 <.h16> ]? '::' <.h16> || [[<.h16> ':'] ** 0..6 <.h16> ]? '::' } token h16 { <.xdigit> ** 1..4 } token ls32 { [<.h16> ':' <.h16>] || <.IPv4address> } token IPv4address { <.dec-octet> '.' <.dec-octet> '.' <.dec-octet> '.' <.decoctet> } token dec-octet { || <.digit> || [\x[31]..\x[39]] <.digit> || '1' <.digit> ** 2 || '2' [\x[30]..\x[34]] <.digit> || '25' [\x[30]..\x[35]] } token reg-name { [<.unreserved> || <.pct-encoded> || <.sub-delims>]* } token port { <.digit>* } token path { || <.path-abempty> || <.path-absolute> || <.path-noscheme> || <.path-rootless> || <.path-empty> } token path-abempty { ['/' <.segment>]* } token path-absolute { '/' [<.segment-nz> ['/' <.segment>]*]? } token path-noscheme { <.segment-nz-nc> ['/' <.segment>]* } token path-rootless { <.segment-nz> ['/' <.segment>]* } token path-empty { <.pchar> ** 0 } token segment { <.pchar>* } token segment-nz { <.pchar>+ } token segment-nz-nc { [<.unreserved> || <.pct-encoded> || <.sub-delims>]+ } token pchar { <.unreserved> || <.pct-encoded> || <.sub-delims> || ':' || '@' } token query { [<.pchar> || '/' || '?']* } token fragment { [<.pchar> || '/' || '?']* } token pct-encoded { '%' <.xdigit> <.xdigit> } token unreserved { <.alpha> || <.digit> || < - . _ ~ > } token reserved { <.gen-delims> || <.sub-delims> } token gen-delims { < : / ? # [ ] @ > } token sub-delims { < ! $ & ' ( ) * + , ; = > } # ' <- fixes syntax highlighting } my grammar URI::File is URI { token TOP { } token file-URI { ":" [ "?" ]? } token scheme { "file" } token heir-part { "//"? || } token auth-path { [ ]? || || } token auth { [ "@" ]? } token local-path { || } token unc-path { "//" "/"? } token windows-path { } token drive-letter { [ ]? } token drive-marker { ":" || "|" } # XXX: this is a bit of a hack -- see: # https://github.com/ugexe/zef/issues/204#issuecomment-366957374 token pchar { <.unreserved> || <.pct-encoded> || <.sub-delims> || ':' || '@' || ' ' } } method new($id is copy) { # prefix windows paths with `file://` so they get parsed as a 'uri' type identity. my $possible-file-uri = "{$id.starts-with('file://')??''!!'file://'}{$*DISTRO.is-win??$id.subst('\\','/',:g)!!$id}"; if URI::File.parse($possible-file-uri, :rule) -> $m { my $ap = $m.; my $volume = ~($ap.. // ''); # what IO::SPEC::Win32 understands my $path = ~($ap.. // $ap. // die "Could not parse path from: $id"); my $host = ~($ap. // ''); my $scheme = ~$m.; my $is-relative = $path.IO.is-relative || not $ap...defined; # because `|` is sometimes used as a windows volume separator in a file-URI my $normalized-path = $is-relative ?? $path !! $*SPEC.join($volume, $path, ''); self.bless( :match($m), :$is-relative, :$scheme, :$host, :path($normalized-path) ); } elsif URI.parse($id, :rule) -> $m { my $heir = $m.; my $auth = $heir.; self.bless( match => $m, is-relative => False, scheme => ~($m. // '').lc, host => ~($auth. // ''), port => ($auth. // Int).Int, user-info => ~($auth. // ''), path => ~($heir. // '/'), query => ~($m. // ''), fragment => ~($m. // ''), ); } elsif URI.parse($id, :rule) -> $m { self.bless( match => $m, is-relative => True, scheme => ~($m. // '').lc, path => ~($m. || '/'), query => ~($m. // ''), fragment => ~($m. // ''), ); } elsif $id ~~ /^(.+?) '@' (.+?) ':' (.*)/ and URI.parse("ssh\:\/\/$0\@$1\/$2", :rule) -> $m { my $heir = $m.; my $auth = $heir.; self.bless( match => $m, is-relative => False, scheme => ~($m. // '').lc, host => ~($auth. // ''), port => ($auth. // Int).Int, user-info => ~($auth. // ''), path => ~($heir. // '/'), query => ~($m. // ''), fragment => ~($m. // ''), ); } else { die "Cannot parse $id as an URI"; } } } sub uri(Str() $uri) is export(:internals) { try Zef::Utils::URI.new($uri) }