#!/usr/bin/perl use strict; use warnings; use Encode; package Encode::UTF24; use base qw/Encode::Encoding/; __PACKAGE__->Define('UTF-24'); sub perlio_ok { 0 } sub decode { my ( $self, $bytes ) = @_; my $utf8 = ''; for ( my $i = 0 ; $i < length($bytes) ; $i++ ) { my $o0 = ord substr $bytes, $i, 1; my $o1 = ord substr $bytes, $i + 1, 1; if ($o1 < 0x80){ $utf8 .= chr($o0); }else{ my $o2 = ord substr $bytes, $i + 2, 1; if ( $o2 < 0x80 ) { $utf8 .= chr( ( $o0 << 7 ) + ( $o1 & 0x7F ) ); $i += 1; } else{ $utf8 .= chr( ( $o0 << 14 ) + ( ( $o1 & 0x7F ) << 7 ) + ( $o2 & 0x7F ) ); $i += 2; } } } return $utf8; } sub encode { my ( $self, $utf8 ) = @_; my $bytes = ''; for my $ord ( unpack 'U*', $utf8 ) { $bytes .= $ord < 0x80 ? chr($ord) : $ord < 0x4000 ? pack 'C2', $ord >> 7, 0x80 + ( $ord & 0x7F ) : $ord < 0x10FFFF ? pack 'C3', $ord >> 14, 0x80 + ( ( $ord >> 7 ) & 0x7f ), 0x80 + ( $ord & 0x7F ) : die "chr($ord) is impossible"; # never happens } return $bytes; } 1; package main; use utf8; local $\ = "\n"; sub hexdump{ join " ", map { sprintf "%02x", $_ } @_; } binmode STDOUT, ":utf8"; for my $utf8 (qw/d δ だ 弾 𪚲 𪚲弾だδd/){ my $utf24 = encode('UTF-24', $utf8); print "$utf8:", hexdump unpack 'C*', encode_utf8 $utf8; print "$utf8:", hexdump unpack 'C*', $utf24; print "$utf8:", hexdump unpack 'C*', encode_utf8 decode('UTF-24', $utf24); print ""; }