Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix handling of undef, ref, typeglob, UTF8, COW and magic scalar argument in all XS functions #70

Merged
merged 3 commits into from
Oct 26, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
256 changes: 134 additions & 122 deletions Encode.xs

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,14 @@ t/jisx0212.utf test data
t/jperl.t test script
t/ksc5601.enc test data
t/ksc5601.utf test data
t/magic.t test script
t/mime-header.t test script
t/mime-name.t test script
t/mime_header_iso2022jp.t test script
t/perlio.t test script
t/piconv.t test script
t/rt.pl even more test script
t/rt85489.t test script
t/taint.t test script
t/unibench.pl benchmark script
t/utf8ref.t test script
Expand Down
3 changes: 2 additions & 1 deletion lib/Encode/CN/HZ.pm
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ sub decode ($$;$) {
else { # GB mode; the byte ranges are as in RFC 1843.
no warnings 'uninitialized';
if ( $str =~ s/^((?:[\x21-\x77][\x21-\x7E])+)// ) {
$ret .= $GB->decode( $1, $chk );
my $prefix = $1;
$ret .= $GB->decode( $prefix, $chk );
}
elsif ( $str =~ s/^\x7E\x7D// ) { # '~}'
$in_ascii = 1;
Expand Down
4 changes: 1 addition & 3 deletions lib/Encode/Encoder.pm
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,7 @@ sub AUTOLOAD {
from_to( $self->{data}, $self->{encoding}, $obj->name, 1 );
}
else {
if ( defined($self->{data}) ) {
$self->{data} = $obj->encode( $self->{data}, 1 );
}
$self->{data} = $obj->encode( $self->{data}, 1 );
}
$self->{encoding} = $obj->name;
return $self;
Expand Down
54 changes: 53 additions & 1 deletion t/decode.t
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
use strict;
use Encode qw(decode_utf8 FB_CROAK find_encoding decode);
use Test::More tests => 5;
use Test::More tests => 17;

sub croak_ok(&) {
my $code = shift;
Expand Down Expand Up @@ -32,3 +32,55 @@ SKIP: {
*a = $orig;
is($latin1->decode(*a), '*main::'.$orig, '[cpan #115168] passing typeglobs to decode');
}

$orig = "\x80";
$orig =~ /(.)/;
is($latin1->decode($1), "\N{U+0080}", 'passing magic regex to latin1 decode');

$orig = "\x80";
*a = $orig;
is($latin1->decode(*a), "*main::\N{U+0080}", 'passing typeglob to latin1 decode');

$orig = "\N{U+0080}";
$orig =~ /(.)/;
is($latin1->encode($1), "\x80", 'passing magic regex to latin1 encode');

$orig = "\xC3\x80";
$orig =~ /(..)/;
is(Encode::decode_utf8($1), "\N{U+C0}", 'passing magic regex to Encode::decode_utf8');

$orig = "\xC3\x80";
*a = $orig;
is(Encode::decode_utf8(*a), "*main::\N{U+C0}", 'passing typeglob to Encode::decode_utf8');

$orig = "\N{U+C0}";
$orig =~ /(.)/;
is(Encode::encode_utf8($1), "\xC3\x80", 'passing magic regex to Encode::encode_utf8');

$orig = "\xC3\x80";
$orig =~ /(..)/;
is(Encode::decode('utf-8', $1), "\N{U+C0}", 'passing magic regex to UTF-8 decode');

$orig = "\xC3\x80";
*a = $orig;
is(Encode::decode('utf-8', *a), "*main::\N{U+C0}", 'passing typeglob to UTF-8 decode');

$orig = "\N{U+C0}";
$orig =~ /(.)/;
is(Encode::encode('utf-8', $1), "\xC3\x80", 'passing magic regex to UTF-8 encode');

SKIP: {
skip "Perl Version ($]) is older than v5.16", 3 if $] < 5.016;

$orig = "\N{U+0080}";
*a = $orig;
is($latin1->encode(*a), "*main::\x80", 'passing typeglob to latin1 encode');

$orig = "\N{U+C0}";
*a = $orig;
is(Encode::encode_utf8(*a), "*main::\xC3\x80", 'passing typeglob to Encode::encode_utf8');

$orig = "\N{U+C0}";
*a = $orig;
is(Encode::encode('utf-8', *a), "*main::\xC3\x80", 'passing typeglob to UTF-8 encode');
}
141 changes: 141 additions & 0 deletions t/magic.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
BEGIN {
if ($ENV{'PERL_CORE'}) {
chdir 't';
unshift @INC, '../lib';
}
require Config; import Config;
if ($Config{'extensions'} !~ /\bEncode\b/) {
print "1..0 # Skip: Encode was not built\n";
exit 0;
}
if (ord("A") == 193) {
print "1..0 # Skip: EBCDIC\n";
exit 0;
}
$| = 1;
}

use strict;
use warnings;

use Encode qw(find_encoding encode decode encode_utf8 decode_utf8 is_utf8 _utf8_on _utf8_off FB_CROAK);

use Test::More tests => 3*(2*(3*(4*4)+4)+4+3*3);

my $ascii = find_encoding('ASCII');
my $latin1 = find_encoding('Latin1');
my $utf8 = find_encoding('UTF-8');

my $undef = undef;
my $ascii_str = 'ascii_str';
my $utf8_str = 'utf8_str';
_utf8_on($utf8_str);

{
foreach my $str ($undef, $ascii_str, $utf8_str) {
foreach my $croak (0, 1) {
foreach my $enc ('ASCII', 'Latin1', 'UTF-8') {
my $mod = defined $str && $croak;
my $func = "encode('" . $enc . "', " . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
tie my $input, 'TieScalarCounter', $str;
my $output = encode($enc, $input, $croak ? FB_CROAK : 0);
is(tied($input)->{fetch}, 1, "$func processes get magic only once");
is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
is($output, $str, "$func returns correct \$output string");
}
foreach my $enc ('ASCII', 'Latin1', 'UTF-8') {
my $mod = defined $str && $croak;
my $func = "decode('" . $enc . "', " . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
tie my $input, 'TieScalarCounter', $str;
my $output = decode($enc, $input, $croak ? FB_CROAK : 0);
is(tied($input)->{fetch}, 1, "$func processes get magic only once");
is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
is($output, $str, "$func returns correct \$output string");
}
foreach my $obj ($ascii, $latin1, $utf8) {
my $mod = defined $str && $croak;
my $func = '$' . $obj->name() . '->encode(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
tie my $input, 'TieScalarCounter', $str;
my $output = $obj->encode($input, $croak ? FB_CROAK : 0);
is(tied($input)->{fetch}, 1, "$func processes get magic only once");
is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
is($output, $str, "$func returns correct \$output string");
}
foreach my $obj ($ascii, $latin1, $utf8) {
my $mod = defined $str && $croak;
my $func = '$' . $obj->name() . '->decode(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
tie my $input, 'TieScalarCounter', $str;
my $output = $obj->decode($input, $croak ? FB_CROAK : 0);
is(tied($input)->{fetch}, 1, "$func processes get magic only once");
is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
is($output, $str, "$func returns correct \$output string");
}
{
my $mod = defined $str && $croak;
my $func = 'decode_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
tie my $input, 'TieScalarCounter', $str;
my $output = decode_utf8($input, $croak ? FB_CROAK : 0);
is(tied($input)->{fetch}, 1, "$func processes get magic only once");
is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
is($output, $str, "$func returns correct \$output string");
}
}
{
my $func = 'encode_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')';
tie my $input, 'TieScalarCounter', $str;
my $output = encode_utf8($input);
is(tied($input)->{fetch}, 1, "$func processes get magic only once");
is(tied($input)->{store}, 0, "$func does not process set magic");
is($input, $str, "$func does not modify \$input string");
is($output, $str, "$func returns correct \$output string");
}
{
my $func = '_utf8_on(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')';
tie my $input, 'TieScalarCounter', $str;
_utf8_on($input);
is(tied($input)->{fetch}, 1, "$func processes get magic only once");
is(tied($input)->{store}, defined $str ? 1 : 0, "$func " . (defined $str ? 'processes set magic only once' : 'does not process set magic'));
defined $str ? ok(is_utf8($input), "$func sets UTF8 status flag") : ok(!is_utf8($input), "$func does not set UTF8 status flag");
}
{
my $func = '_utf8_off(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')';
tie my $input, 'TieScalarCounter', $str;
_utf8_off($input);
is(tied($input)->{fetch}, 1, "$func processes get magic only once");
is(tied($input)->{store}, defined $str ? 1 : 0, "$func " . (defined $str ? 'processes set magic only once' : 'does not process set magic'));
ok(!is_utf8($input), "$func unsets UTF8 status flag");
}
{
my $func = 'is_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')';
tie my $input, 'TieScalarCounter', $str;
my $utf8 = is_utf8($input);
is(tied($input)->{fetch}, 1, "$func processes get magic only once");
is(tied($input)->{store}, 0, "$func does not process set magic");
is($utf8, is_utf8($str), "$func returned correct state");
}
}
}

package TieScalarCounter;

sub TIESCALAR {
my ($class, $value) = @_;
return bless { fetch => 0, store => 0, value => $value }, $class;
}

sub FETCH {
my ($self) = @_;
$self->{fetch}++;
return $self->{value};
}

sub STORE {
my ($self, $value) = @_;
$self->{store}++;
$self->{value} = $value;
}
48 changes: 48 additions & 0 deletions t/rt85489.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
BEGIN {
if ($ENV{'PERL_CORE'}) {
chdir 't';
unshift @INC, '../lib';
}
require Config; import Config;
if ($Config{'extensions'} !~ /\bEncode\b/) {
print "1..0 # Skip: Encode was not built\n";
exit 0;
}
if (ord("A") == 193) {
print "1..0 # Skip: EBCDIC\n";
exit 0;
}
$| = 1;
}

use strict;
use warnings;

use Test::More tests => 8;

use Encode;

my $ascii = Encode::find_encoding("ascii");
my $orig = "str";

my $str = $orig;
ok !Encode::is_utf8($str), "UTF8 flag is not set on input string before ascii encode";
$ascii->encode($str);
ok !Encode::is_utf8($str), "UTF8 flag is not set on input string after ascii encode";

$str = $orig;
ok !Encode::is_utf8($str), "UTF8 flag is not set on input string before Encode::encode ascii";
Encode::encode("ascii", $str);
ok !Encode::is_utf8($str), "UTF8 flag is not set on input string after Encode::encode ascii";

$str = $orig;
Encode::_utf8_on($str);
ok Encode::is_utf8($str), "UTF8 flag is set on input string before ascii decode";
$ascii->decode($str);
ok Encode::is_utf8($str), "UTF8 flag is set on input string after ascii decode";

$str = $orig;
Encode::_utf8_on($str);
ok Encode::is_utf8($str), "UTF8 flag is set on input string before Encode::decode ascii";
Encode::decode("ascii", $str);
ok Encode::is_utf8($str), "UTF8 flag is set on input string after Encode::decode ascii";
4 changes: 2 additions & 2 deletions t/utf8ref.t
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ my $u = find_encoding('UTF-8');
my $r = [];
no warnings 'uninitialized';
is encode_utf8($r), ''.$r;
is $u->encode($r), '';
is $u->encode($r), ''.$r;
$r = {};
is decode_utf8($r), ''.$r;
is $u->decode($r), '';
is $u->decode($r), ''.$r;
use warnings 'uninitialized';

is encode_utf8(undef), undef;
Expand Down