dankogai · dankogai · Oct 26, 2016 · Oct 26, 2016 · Oct 26, 2016 · Oct 26, 2016
diff --git a/Encode.xs b/Encode.xs
diff --git a/MANIFEST b/MANIFEST
@@ -94,12 +94,14 @@ t/jisx0212.utf	test data
 t/jperl.t	test script
 t/ksc5601.enc	test data
 t/ksc5601.utf	test data
+t/magic.t	test script
 t/mime-header.t	test script
 t/mime-name.t	test script
 t/mime_header_iso2022jp.t  test script
 t/perlio.t	test script
 t/piconv.t	test script
 t/rt.pl		even more test script
+t/rt85489.t	test script
 t/taint.t       test script
 t/unibench.pl	benchmark script
 t/utf8ref.t	test script

diff --git a/lib/Encode/CN/HZ.pm b/lib/Encode/CN/HZ.pm
@@ -49,7 +49,8 @@ sub decode ($$;$) {
         else {        # GB mode; the byte ranges are as in RFC 1843.
             no warnings 'uninitialized';
             if ( $str =~ s/^((?:[\x21-\x77][\x21-\x7E])+)// ) {
-                $ret .= $GB->decode( $1, $chk );
+                my $prefix = $1;
+                $ret .= $GB->decode( $prefix, $chk );
             }
             elsif ( $str =~ s/^\x7E\x7D// ) {    # '~}'
                 $in_ascii = 1;

diff --git a/lib/Encode/Encoder.pm b/lib/Encode/Encoder.pm
@@ -85,9 +85,7 @@ sub AUTOLOAD {
         from_to( $self->{data}, $self->{encoding}, $obj->name, 1 );
     }
     else {
-        if ( defined($self->{data}) ) {
-            $self->{data} = $obj->encode( $self->{data}, 1 );
-        }
+        $self->{data} = $obj->encode( $self->{data}, 1 );
     }
     $self->{encoding} = $obj->name;
     return $self;

diff --git a/t/decode.t b/t/decode.t
@@ -3,7 +3,7 @@
 #
 use strict;
 use Encode qw(decode_utf8 FB_CROAK find_encoding decode);
-use Test::More tests => 5;
+use Test::More tests => 17;
 
 sub croak_ok(&) {
     my $code = shift;
@@ -32,3 +32,55 @@ SKIP: {
     *a = $orig;
     is($latin1->decode(*a), '*main::'.$orig, '[cpan #115168] passing typeglobs to decode');
 }
+
+$orig = "\x80";
+$orig =~ /(.)/;
+is($latin1->decode($1), "\N{U+0080}", 'passing magic regex to latin1 decode');
+
+$orig = "\x80";
+*a = $orig;
+is($latin1->decode(*a), "*main::\N{U+0080}", 'passing typeglob to latin1 decode');
+
+$orig = "\N{U+0080}";
+$orig =~ /(.)/;
+is($latin1->encode($1), "\x80", 'passing magic regex to latin1 encode');
+
+$orig = "\xC3\x80";
+$orig =~ /(..)/;
+is(Encode::decode_utf8($1), "\N{U+C0}", 'passing magic regex to Encode::decode_utf8');
+
+$orig = "\xC3\x80";
+*a = $orig;
+is(Encode::decode_utf8(*a), "*main::\N{U+C0}", 'passing typeglob to Encode::decode_utf8');
+
+$orig = "\N{U+C0}";
+$orig =~ /(.)/;
+is(Encode::encode_utf8($1), "\xC3\x80", 'passing magic regex to Encode::encode_utf8');
+
+$orig = "\xC3\x80";
+$orig =~ /(..)/;
+is(Encode::decode('utf-8', $1), "\N{U+C0}", 'passing magic regex to UTF-8 decode');
+
+$orig = "\xC3\x80";
+*a = $orig;
+is(Encode::decode('utf-8', *a), "*main::\N{U+C0}", 'passing typeglob to UTF-8 decode');
+
+$orig = "\N{U+C0}";
+$orig =~ /(.)/;
+is(Encode::encode('utf-8', $1), "\xC3\x80", 'passing magic regex to UTF-8 encode');
+
+SKIP: {
+    skip "Perl Version ($]) is older than v5.16", 3 if $] < 5.016;
+
+    $orig = "\N{U+0080}";
+    *a = $orig;
+    is($latin1->encode(*a), "*main::\x80", 'passing typeglob to latin1 encode');
+
+    $orig = "\N{U+C0}";
+    *a = $orig;
+    is(Encode::encode_utf8(*a), "*main::\xC3\x80", 'passing typeglob to Encode::encode_utf8');
+
+    $orig = "\N{U+C0}";
+    *a = $orig;
+    is(Encode::encode('utf-8', *a), "*main::\xC3\x80", 'passing typeglob to UTF-8 encode');
+}
diff --git a/t/magic.t b/t/magic.t
@@ -0,0 +1,141 @@
+BEGIN {
+    if ($ENV{'PERL_CORE'}) {
+        chdir 't';
+        unshift @INC, '../lib';
+    }
+    require Config; import Config;
+    if ($Config{'extensions'} !~ /\bEncode\b/) {
+      print "1..0 # Skip: Encode was not built\n";
+      exit 0;
+    }
+    if (ord("A") == 193) {
+      print "1..0 # Skip: EBCDIC\n";
+      exit 0;
+    }
+    $| = 1;
+}
+
+use strict;
+use warnings;
+
+use Encode qw(find_encoding encode decode encode_utf8 decode_utf8 is_utf8 _utf8_on _utf8_off FB_CROAK);
+
+use Test::More tests => 3*(2*(3*(4*4)+4)+4+3*3);
+
+my $ascii = find_encoding('ASCII');
+my $latin1 = find_encoding('Latin1');
+my $utf8 = find_encoding('UTF-8');
+
+my $undef = undef;
+my $ascii_str = 'ascii_str';
+my $utf8_str = 'utf8_str';
+_utf8_on($utf8_str);
+
+{
+    foreach my $str ($undef, $ascii_str, $utf8_str) {
+        foreach my $croak (0, 1) {
+            foreach my $enc ('ASCII', 'Latin1', 'UTF-8') {
+                my $mod = defined $str && $croak;
+                my $func = "encode('" . $enc . "', " . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
+                tie my $input, 'TieScalarCounter', $str;
+                my $output = encode($enc, $input, $croak ? FB_CROAK : 0);
+                is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+                is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
+                is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
+                is($output, $str, "$func returns correct \$output string");
+            }
+            foreach my $enc ('ASCII', 'Latin1', 'UTF-8') {
+                my $mod = defined $str && $croak;
+                my $func = "decode('" . $enc . "', " . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
+                tie my $input, 'TieScalarCounter', $str;
+                my $output = decode($enc, $input, $croak ? FB_CROAK : 0);
+                is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+                is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
+                is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
+                is($output, $str, "$func returns correct \$output string");
+            }
+            foreach my $obj ($ascii, $latin1, $utf8) {
+                my $mod = defined $str && $croak;
+                my $func = '$' . $obj->name() . '->encode(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
+                tie my $input, 'TieScalarCounter', $str;
+                my $output = $obj->encode($input, $croak ? FB_CROAK : 0);
+                is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+                is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
+                is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
+                is($output, $str, "$func returns correct \$output string");
+            }
+            foreach my $obj ($ascii, $latin1, $utf8) {
+                my $mod = defined $str && $croak;
+                my $func = '$' . $obj->name() . '->decode(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
+                tie my $input, 'TieScalarCounter', $str;
+                my $output = $obj->decode($input, $croak ? FB_CROAK : 0);
+                is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+                is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
+                is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
+                is($output, $str, "$func returns correct \$output string");
+            }
+            {
+                my $mod = defined $str && $croak;
+                my $func = 'decode_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
+                tie my $input, 'TieScalarCounter', $str;
+                my $output = decode_utf8($input, $croak ? FB_CROAK : 0);
+                is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+                is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
+                is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
+                is($output, $str, "$func returns correct \$output string");
+            }
+        }
+        {
+            my $func = 'encode_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')';
+            tie my $input, 'TieScalarCounter', $str;
+            my $output = encode_utf8($input);
+            is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+            is(tied($input)->{store}, 0, "$func does not process set magic");
+            is($input, $str, "$func does not modify \$input string");
+            is($output, $str, "$func returns correct \$output string");
+        }
+        {
+            my $func = '_utf8_on(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')';
+            tie my $input, 'TieScalarCounter', $str;
+            _utf8_on($input);
+            is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+            is(tied($input)->{store}, defined $str ? 1 : 0, "$func " . (defined $str ? 'processes set magic only once' : 'does not process set magic'));
+            defined $str ? ok(is_utf8($input), "$func sets UTF8 status flag") : ok(!is_utf8($input), "$func does not set UTF8 status flag");
+        }
+        {
+            my $func = '_utf8_off(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')';
+            tie my $input, 'TieScalarCounter', $str;
+            _utf8_off($input);
+            is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+            is(tied($input)->{store}, defined $str ? 1 : 0, "$func " . (defined $str ? 'processes set magic only once' : 'does not process set magic'));
+            ok(!is_utf8($input), "$func unsets UTF8 status flag");
+        }
+        {
+            my $func = 'is_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')';
+            tie my $input, 'TieScalarCounter', $str;
+            my $utf8 = is_utf8($input);
+            is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+            is(tied($input)->{store}, 0, "$func does not process set magic");
+            is($utf8, is_utf8($str), "$func returned correct state");
+        }
+    }
+}
+
+package TieScalarCounter;
+
+sub TIESCALAR {
+    my ($class, $value) = @_;
+    return bless { fetch => 0, store => 0, value => $value }, $class;
+}
+
+sub FETCH {
+    my ($self) = @_;
+    $self->{fetch}++;
+    return $self->{value};
+}
+
+sub STORE {
+    my ($self, $value) = @_;
+    $self->{store}++;
+    $self->{value} = $value;
+}
diff --git a/t/rt85489.t b/t/rt85489.t
@@ -0,0 +1,48 @@
+BEGIN {
+    if ($ENV{'PERL_CORE'}) {
+        chdir 't';
+        unshift @INC, '../lib';
+    }
+    require Config; import Config;
+    if ($Config{'extensions'} !~ /\bEncode\b/) {
+      print "1..0 # Skip: Encode was not built\n";
+      exit 0;
+    }
+    if (ord("A") == 193) {
+      print "1..0 # Skip: EBCDIC\n";
+      exit 0;
+    }
+    $| = 1;
+}
+
+use strict;
+use warnings;
+
+use Test::More tests => 8;
+
+use Encode;
+
+my $ascii = Encode::find_encoding("ascii");
+my $orig = "str";
+
+my $str = $orig;
+ok !Encode::is_utf8($str), "UTF8 flag is not set on input string before ascii encode";
+$ascii->encode($str);
+ok !Encode::is_utf8($str), "UTF8 flag is not set on input string after ascii encode";
+
+$str = $orig;
+ok !Encode::is_utf8($str), "UTF8 flag is not set on input string before Encode::encode ascii";
+Encode::encode("ascii", $str);
+ok !Encode::is_utf8($str), "UTF8 flag is not set on input string after Encode::encode ascii";
+
+$str = $orig;
+Encode::_utf8_on($str);
+ok Encode::is_utf8($str), "UTF8 flag is set on input string before ascii decode";
+$ascii->decode($str);
+ok Encode::is_utf8($str), "UTF8 flag is set on input string after ascii decode";
+
+$str = $orig;
+Encode::_utf8_on($str);
+ok Encode::is_utf8($str), "UTF8 flag is set on input string before Encode::decode ascii";
+Encode::decode("ascii", $str);
+ok Encode::is_utf8($str), "UTF8 flag is set on input string after Encode::decode ascii";
diff --git a/t/utf8ref.t b/t/utf8ref.t
@@ -14,10 +14,10 @@ my $u = find_encoding('UTF-8');
 my $r = [];
 no warnings 'uninitialized';
 is encode_utf8($r), ''.$r;
-is $u->encode($r), '';
+is $u->encode($r), ''.$r;
 $r = {};
 is decode_utf8($r), ''.$r;
-is $u->decode($r), '';
+is $u->decode($r), ''.$r;
 use warnings 'uninitialized';
 
 is encode_utf8(undef), undef;