diff --git a/Encode.xs b/Encode.xs index 8c990ea..888e785 100644 --- a/Encode.xs +++ b/Encode.xs @@ -31,6 +31,10 @@ UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) +#ifndef SvIV_nomg +#define SvIV_nomg SvIV +#endif + #ifdef UTF8_DISALLOW_ILLEGAL_INTERCHANGE # define UTF8_ALLOW_STRICT UTF8_DISALLOW_ILLEGAL_INTERCHANGE #else @@ -76,6 +80,37 @@ call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) PERL_UNUSED_VAR(orig); } +static void +utf8_safe_downgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify) +{ + if (!modify) { + SV *tmp = sv_2mortal(newSVpvn(*s, *slen)); + SvUTF8_on(tmp); + if (SvTAINTED(*src)) + SvTAINTED_on(tmp); + *src = tmp; + *s = SvPVX(*src); + } + if (*slen) { + if (!utf8_to_bytes(*s, slen)) + croak("Wide character"); + SvCUR_set(*src, *slen); + } + SvUTF8_off(*src); +} + +static void +utf8_safe_upgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify) +{ + if (!modify) { + SV *tmp = sv_2mortal(newSVpvn(*s, *slen)); + if (SvTAINTED(*src)) + SvTAINTED_on(tmp); + *src = tmp; + } + sv_utf8_upgrade_nomg(*src); + *s = SvPV_nomg(*src, *slen); +} #define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" #define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" @@ -104,12 +139,10 @@ do_fallback_cb(pTHX_ UV ch, SV *fallback_cb) } static SV * -encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, +encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 * s, STRLEN slen, int check, STRLEN * offset, SV * term, int * retcode, SV *fallback_cb) { - STRLEN slen; - U8 *s = (U8 *) SvPV(src, slen); STRLEN tlen = slen; STRLEN ddone = 0; STRLEN sdone = 0; @@ -279,6 +312,7 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, sv_setpvn(src, (char*)s+slen, sdone); } SvCUR_set(src, sdone); + SvSETMAGIC(src); } /* warn("check = 0x%X, code = 0x%d\n", check, code); */ @@ -392,7 +426,7 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, } else { fallback_cb = &PL_sv_undef; - check = SvIV(check_sv); + check = SvIV_nomg(check_sv); } SvPOK_only(dst); @@ -504,10 +538,6 @@ MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ PROTOTYPES: DISABLE -#ifndef SvIsCOW -# define SvIsCOW(sv) (SvREADONLY(sv) && SvFAKE(sv)) -#endif - void Method_decode_xs(obj,src,check_sv = &PL_sv_no) SV * obj @@ -520,23 +550,26 @@ PREINIT: SV *dst; bool renewed = 0; int check; + bool modify; +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + modify = (check && !(check & ENCODE_LEAVE_SRC)); CODE: { - dSP; ENTER; SAVETMPS; - if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0)); - check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv); - if (!(check & ENCODE_LEAVE_SRC) && SvIsCOW(src)) { - /* - * disassociate from any other scalars before doing - * in-place modifications - */ - sv_force_normal(src); - } - s = (U8 *) SvPV(src, slen); - e = (U8 *) SvEND(src); + if (!SvOK(src)) + XSRETURN_UNDEF; + s = modify ? SvPV_force_nomg(src, slen) : SvPV_nomg(src, slen); + if (SvUTF8(src)) + utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify); + e = s+slen; + /* * PerlIO check -- we assume the object is of PerlIO if renewed */ + dSP; + ENTER; SAVETMPS; PUSHMARK(sp); XPUSHs(obj); PUTBACK; @@ -551,28 +584,17 @@ CODE: FREETMPS; LEAVE; /* end PerlIO check */ - if (SvUTF8(src)) { - s = utf8_to_bytes(s,&slen); - if (s) { - SvCUR_set(src,slen); - SvUTF8_off(src); - e = s+slen; - } - else { - croak("Cannot decode string with wide characters"); - } - } - dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */ s = process_utf8(aTHX_ dst, s, e, check_sv, 0, strict_utf8(aTHX_ obj), renewed); /* Clear out translated part of source unless asked not to */ - if (check && !(check & ENCODE_LEAVE_SRC)){ + if (modify) { slen = e-s; if (slen) { sv_setpvn(src, (char*)s, slen); } SvCUR_set(src, slen); + SvSETMAGIC(src); } SvUTF8_on(dst); if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ @@ -591,12 +613,18 @@ PREINIT: U8 *e; SV *dst; int check; + bool modify; +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + modify = (check && !(check & ENCODE_LEAVE_SRC)); CODE: { - check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv); - if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0)); - s = (U8 *) SvPV(src, slen); - e = (U8 *) SvEND(src); + if (!SvOK(src)) + XSRETURN_UNDEF; + s = modify ? SvPV_force_nomg(src, slen) : SvPV_nomg(src, slen); + e = s+slen; dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */ if (SvUTF8(src)) { /* Already encoded */ @@ -632,12 +660,13 @@ CODE: } /* Clear out translated part of source unless asked not to */ - if (check && !(check & ENCODE_LEAVE_SRC)){ + if (modify) { slen = e-s; if (slen) { sv_setpvn(src, (char*)s, slen); } SvCUR_set(src, slen); + SvSETMAGIC(src); } SvPOK_only(dst); SvUTF8_off(dst); @@ -686,23 +715,26 @@ SV * src SV * off SV * term SV * check_sv -CODE: -{ - int check; - SV *fallback_cb = &PL_sv_undef; +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + int check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + SV *fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef; + bool modify = (check && !(check & ENCODE_LEAVE_SRC)); encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); STRLEN offset = (STRLEN)SvIV(off); int code = 0; - if (SvUTF8(src)) { - sv_utf8_downgrade(src, FALSE); - } - if (SvROK(check_sv)){ - fallback_cb = check_sv; - check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ - }else{ - check = SvIV(check_sv); - } - sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check, + U8 *s; + STRLEN slen; + SV *tmp; +CODE: +{ + if (!SvOK(src)) + XSRETURN_NO; + s = modify ? SvPV_force_nomg(src, slen) : SvPV_nomg(src, slen); + if (SvUTF8(src)) + utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify); + sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check, &offset, term, &code, fallback_cb)); SvIV_set(off, (IV)offset); if (code == ENCODE_FOUND_TERM) { @@ -718,71 +750,50 @@ Method_decode(obj,src,check_sv = &PL_sv_no) SV * obj SV * src SV * check_sv +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + int check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + SV *fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef; + bool modify = (check && !(check & ENCODE_LEAVE_SRC)); + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + U8 *s; + STRLEN slen; CODE: { - int check; - SV *fallback_cb = &PL_sv_undef; - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); - if (SvREADONLY(src) || SvSMAGICAL(src) || SvGMAGICAL(src) || !SvPOK(src)) { - SV *tmp; - tmp = sv_newmortal(); - sv_copypv(tmp, src); - src = tmp; - } - if (SvUTF8(src)) { - sv_utf8_downgrade(src, FALSE); - } - if (SvROK(check_sv)){ - fallback_cb = check_sv; - check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ - }else{ - check = SvIV(check_sv); - } - ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check, + if (!SvOK(src)) + XSRETURN_UNDEF; + s = modify ? SvPV_force_nomg(src, slen) : SvPV_nomg(src, slen); + if (SvUTF8(src)) + utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify); + ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check, NULL, Nullsv, NULL, fallback_cb); SvUTF8_on(ST(0)); XSRETURN(1); } - -#ifndef SvPV_force_nolen -# define SvPV_force_nolen(sv) SvPV_force_flags_nolen(sv, SV_GMAGIC) -#endif - -#ifndef SvPV_force_flags_nolen -# define SvPV_force_flags_nolen(sv, flags) \ - ((SvFLAGS(sv) & (SVf_POK|SVf_THINKFIRST)) == SVf_POK \ - ? SvPVX(sv) : sv_pvn_force_flags(sv, &PL_na, flags)) -#endif - void Method_encode(obj,src,check_sv = &PL_sv_no) SV * obj SV * src SV * check_sv +INIT: + SvGETMAGIC(src); + SvGETMAGIC(check_sv); + int check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv); + SV *fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef; + bool modify = (check && !(check & ENCODE_LEAVE_SRC)); + encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); + U8 *s; + STRLEN slen; CODE: { - int check; - SV *fallback_cb = &PL_sv_undef; - encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); - if (SvREADONLY(src) || SvSMAGICAL(src) || SvGMAGICAL(src) || !SvPOK(src)) { - /* - SV *tmp; - tmp = sv_newmortal(); - sv_copypv(tmp, src); - src = tmp; - */ - src = sv_mortalcopy(src); - SvPV_force_nolen(src); - } - sv_utf8_upgrade(src); - if (SvROK(check_sv)){ - fallback_cb = check_sv; - check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ - }else{ - check = SvIV(check_sv); - } - ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check, + if (!SvOK(src)) + XSRETURN_UNDEF; + s = modify ? SvPV_force_nomg(src, slen) : SvPV_nomg(src, slen); + if (!SvUTF8(src)) + utf8_safe_upgrade(aTHX_ &src, &s, &slen, modify); + ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, s, slen, check, NULL, Nullsv, NULL, fallback_cb); XSRETURN(1); } @@ -951,17 +962,16 @@ bool is_utf8(sv, check = 0) SV * sv int check +PREINIT: + char *str; + STRLEN len; CODE: { - if (SvGMAGICAL(sv)) /* it could be $1, for example */ - sv = newSVsv(sv); /* GMAGIG will be done */ + SvGETMAGIC(sv); /* SvGETMAGIC() can modify SvOK flag */ + str = SvOK(sv) ? SvPV_nomg(sv, len) : NULL; /* SvPV() can modify SvUTF8 flag */ RETVAL = SvUTF8(sv) ? TRUE : FALSE; - if (RETVAL && - check && - !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) + if (RETVAL && check && (!str || !is_utf8_string((U8 *)str, len))) RETVAL = FALSE; - if (sv != ST(0)) - SvREFCNT_dec(sv); /* it was a temp copy */ } OUTPUT: RETVAL @@ -971,13 +981,14 @@ _utf8_on(sv) SV * sv CODE: { - if (SvPOK(sv)) { - SV *rsv = newSViv(SvUTF8(sv)); - RETVAL = rsv; - if (SvIsCOW(sv)) sv_force_normal(sv); - SvUTF8_on(sv); + SvGETMAGIC(sv); + if (SvPOKp(sv)) { + if (SvTHINKFIRST(sv)) sv_force_normal(sv); + RETVAL = newSViv(SvUTF8(sv)); + SvUTF8_on(sv); + SvSETMAGIC(sv); } else { - RETVAL = &PL_sv_undef; + RETVAL = &PL_sv_undef; } } OUTPUT: @@ -988,13 +999,14 @@ _utf8_off(sv) SV * sv CODE: { - if (SvPOK(sv)) { - SV *rsv = newSViv(SvUTF8(sv)); - RETVAL = rsv; - if (SvIsCOW(sv)) sv_force_normal(sv); - SvUTF8_off(sv); + SvGETMAGIC(sv); + if (SvPOKp(sv)) { + if (SvTHINKFIRST(sv)) sv_force_normal(sv); + RETVAL = newSViv(SvUTF8(sv)); + SvUTF8_off(sv); + SvSETMAGIC(sv); } else { - RETVAL = &PL_sv_undef; + RETVAL = &PL_sv_undef; } } OUTPUT: diff --git a/MANIFEST b/MANIFEST index a74c0bf..14d8222 100644 --- a/MANIFEST +++ b/MANIFEST @@ -94,12 +94,14 @@ t/jisx0212.utf test data t/jperl.t test script t/ksc5601.enc test data t/ksc5601.utf test data +t/magic.t test script t/mime-header.t test script t/mime-name.t test script t/mime_header_iso2022jp.t test script t/perlio.t test script t/piconv.t test script t/rt.pl even more test script +t/rt85489.t test script t/taint.t test script t/unibench.pl benchmark script t/utf8ref.t test script diff --git a/lib/Encode/CN/HZ.pm b/lib/Encode/CN/HZ.pm index f035d82..22989b0 100644 --- a/lib/Encode/CN/HZ.pm +++ b/lib/Encode/CN/HZ.pm @@ -49,7 +49,8 @@ sub decode ($$;$) { else { # GB mode; the byte ranges are as in RFC 1843. no warnings 'uninitialized'; if ( $str =~ s/^((?:[\x21-\x77][\x21-\x7E])+)// ) { - $ret .= $GB->decode( $1, $chk ); + my $prefix = $1; + $ret .= $GB->decode( $prefix, $chk ); } elsif ( $str =~ s/^\x7E\x7D// ) { # '~}' $in_ascii = 1; diff --git a/lib/Encode/Encoder.pm b/lib/Encode/Encoder.pm index fef4e9b..23e0349 100644 --- a/lib/Encode/Encoder.pm +++ b/lib/Encode/Encoder.pm @@ -85,9 +85,7 @@ sub AUTOLOAD { from_to( $self->{data}, $self->{encoding}, $obj->name, 1 ); } else { - if ( defined($self->{data}) ) { - $self->{data} = $obj->encode( $self->{data}, 1 ); - } + $self->{data} = $obj->encode( $self->{data}, 1 ); } $self->{encoding} = $obj->name; return $self; diff --git a/t/decode.t b/t/decode.t index 6b24a8f..8aefb15 100644 --- a/t/decode.t +++ b/t/decode.t @@ -3,7 +3,7 @@ # use strict; use Encode qw(decode_utf8 FB_CROAK find_encoding decode); -use Test::More tests => 5; +use Test::More tests => 17; sub croak_ok(&) { my $code = shift; @@ -32,3 +32,55 @@ SKIP: { *a = $orig; is($latin1->decode(*a), '*main::'.$orig, '[cpan #115168] passing typeglobs to decode'); } + +$orig = "\x80"; +$orig =~ /(.)/; +is($latin1->decode($1), "\N{U+0080}", 'passing magic regex to latin1 decode'); + +$orig = "\x80"; +*a = $orig; +is($latin1->decode(*a), "*main::\N{U+0080}", 'passing typeglob to latin1 decode'); + +$orig = "\N{U+0080}"; +$orig =~ /(.)/; +is($latin1->encode($1), "\x80", 'passing magic regex to latin1 encode'); + +$orig = "\xC3\x80"; +$orig =~ /(..)/; +is(Encode::decode_utf8($1), "\N{U+C0}", 'passing magic regex to Encode::decode_utf8'); + +$orig = "\xC3\x80"; +*a = $orig; +is(Encode::decode_utf8(*a), "*main::\N{U+C0}", 'passing typeglob to Encode::decode_utf8'); + +$orig = "\N{U+C0}"; +$orig =~ /(.)/; +is(Encode::encode_utf8($1), "\xC3\x80", 'passing magic regex to Encode::encode_utf8'); + +$orig = "\xC3\x80"; +$orig =~ /(..)/; +is(Encode::decode('utf-8', $1), "\N{U+C0}", 'passing magic regex to UTF-8 decode'); + +$orig = "\xC3\x80"; +*a = $orig; +is(Encode::decode('utf-8', *a), "*main::\N{U+C0}", 'passing typeglob to UTF-8 decode'); + +$orig = "\N{U+C0}"; +$orig =~ /(.)/; +is(Encode::encode('utf-8', $1), "\xC3\x80", 'passing magic regex to UTF-8 encode'); + +SKIP: { + skip "Perl Version ($]) is older than v5.16", 3 if $] < 5.016; + + $orig = "\N{U+0080}"; + *a = $orig; + is($latin1->encode(*a), "*main::\x80", 'passing typeglob to latin1 encode'); + + $orig = "\N{U+C0}"; + *a = $orig; + is(Encode::encode_utf8(*a), "*main::\xC3\x80", 'passing typeglob to Encode::encode_utf8'); + + $orig = "\N{U+C0}"; + *a = $orig; + is(Encode::encode('utf-8', *a), "*main::\xC3\x80", 'passing typeglob to UTF-8 encode'); +} diff --git a/t/magic.t b/t/magic.t new file mode 100644 index 0000000..447e963 --- /dev/null +++ b/t/magic.t @@ -0,0 +1,141 @@ +BEGIN { + if ($ENV{'PERL_CORE'}) { + chdir 't'; + unshift @INC, '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + $| = 1; +} + +use strict; +use warnings; + +use Encode qw(find_encoding encode decode encode_utf8 decode_utf8 is_utf8 _utf8_on _utf8_off FB_CROAK); + +use Test::More tests => 3*(2*(3*(4*4)+4)+4+3*3); + +my $ascii = find_encoding('ASCII'); +my $latin1 = find_encoding('Latin1'); +my $utf8 = find_encoding('UTF-8'); + +my $undef = undef; +my $ascii_str = 'ascii_str'; +my $utf8_str = 'utf8_str'; +_utf8_on($utf8_str); + +{ + foreach my $str ($undef, $ascii_str, $utf8_str) { + foreach my $croak (0, 1) { + foreach my $enc ('ASCII', 'Latin1', 'UTF-8') { + my $mod = defined $str && $croak; + my $func = "encode('" . $enc . "', " . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $output = encode($enc, $input, $croak ? FB_CROAK : 0); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic')); + is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string'); + is($output, $str, "$func returns correct \$output string"); + } + foreach my $enc ('ASCII', 'Latin1', 'UTF-8') { + my $mod = defined $str && $croak; + my $func = "decode('" . $enc . "', " . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $output = decode($enc, $input, $croak ? FB_CROAK : 0); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic')); + is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string'); + is($output, $str, "$func returns correct \$output string"); + } + foreach my $obj ($ascii, $latin1, $utf8) { + my $mod = defined $str && $croak; + my $func = '$' . $obj->name() . '->encode(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $output = $obj->encode($input, $croak ? FB_CROAK : 0); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic')); + is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string'); + is($output, $str, "$func returns correct \$output string"); + } + foreach my $obj ($ascii, $latin1, $utf8) { + my $mod = defined $str && $croak; + my $func = '$' . $obj->name() . '->decode(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $output = $obj->decode($input, $croak ? FB_CROAK : 0); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic')); + is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string'); + is($output, $str, "$func returns correct \$output string"); + } + { + my $mod = defined $str && $croak; + my $func = 'decode_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $output = decode_utf8($input, $croak ? FB_CROAK : 0); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic')); + is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string'); + is($output, $str, "$func returns correct \$output string"); + } + } + { + my $func = 'encode_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $output = encode_utf8($input); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, 0, "$func does not process set magic"); + is($input, $str, "$func does not modify \$input string"); + is($output, $str, "$func returns correct \$output string"); + } + { + my $func = '_utf8_on(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')'; + tie my $input, 'TieScalarCounter', $str; + _utf8_on($input); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, defined $str ? 1 : 0, "$func " . (defined $str ? 'processes set magic only once' : 'does not process set magic')); + defined $str ? ok(is_utf8($input), "$func sets UTF8 status flag") : ok(!is_utf8($input), "$func does not set UTF8 status flag"); + } + { + my $func = '_utf8_off(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')'; + tie my $input, 'TieScalarCounter', $str; + _utf8_off($input); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, defined $str ? 1 : 0, "$func " . (defined $str ? 'processes set magic only once' : 'does not process set magic')); + ok(!is_utf8($input), "$func unsets UTF8 status flag"); + } + { + my $func = 'is_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')'; + tie my $input, 'TieScalarCounter', $str; + my $utf8 = is_utf8($input); + is(tied($input)->{fetch}, 1, "$func processes get magic only once"); + is(tied($input)->{store}, 0, "$func does not process set magic"); + is($utf8, is_utf8($str), "$func returned correct state"); + } + } +} + +package TieScalarCounter; + +sub TIESCALAR { + my ($class, $value) = @_; + return bless { fetch => 0, store => 0, value => $value }, $class; +} + +sub FETCH { + my ($self) = @_; + $self->{fetch}++; + return $self->{value}; +} + +sub STORE { + my ($self, $value) = @_; + $self->{store}++; + $self->{value} = $value; +} diff --git a/t/rt85489.t b/t/rt85489.t new file mode 100644 index 0000000..3b28e35 --- /dev/null +++ b/t/rt85489.t @@ -0,0 +1,48 @@ +BEGIN { + if ($ENV{'PERL_CORE'}) { + chdir 't'; + unshift @INC, '../lib'; + } + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # Skip: EBCDIC\n"; + exit 0; + } + $| = 1; +} + +use strict; +use warnings; + +use Test::More tests => 8; + +use Encode; + +my $ascii = Encode::find_encoding("ascii"); +my $orig = "str"; + +my $str = $orig; +ok !Encode::is_utf8($str), "UTF8 flag is not set on input string before ascii encode"; +$ascii->encode($str); +ok !Encode::is_utf8($str), "UTF8 flag is not set on input string after ascii encode"; + +$str = $orig; +ok !Encode::is_utf8($str), "UTF8 flag is not set on input string before Encode::encode ascii"; +Encode::encode("ascii", $str); +ok !Encode::is_utf8($str), "UTF8 flag is not set on input string after Encode::encode ascii"; + +$str = $orig; +Encode::_utf8_on($str); +ok Encode::is_utf8($str), "UTF8 flag is set on input string before ascii decode"; +$ascii->decode($str); +ok Encode::is_utf8($str), "UTF8 flag is set on input string after ascii decode"; + +$str = $orig; +Encode::_utf8_on($str); +ok Encode::is_utf8($str), "UTF8 flag is set on input string before Encode::decode ascii"; +Encode::decode("ascii", $str); +ok Encode::is_utf8($str), "UTF8 flag is set on input string after Encode::decode ascii"; diff --git a/t/utf8ref.t b/t/utf8ref.t index aff098f..465fb6a 100644 --- a/t/utf8ref.t +++ b/t/utf8ref.t @@ -14,10 +14,10 @@ my $u = find_encoding('UTF-8'); my $r = []; no warnings 'uninitialized'; is encode_utf8($r), ''.$r; -is $u->encode($r), ''; +is $u->encode($r), ''.$r; $r = {}; is decode_utf8($r), ''.$r; -is $u->decode($r), ''; +is $u->decode($r), ''.$r; use warnings 'uninitialized'; is encode_utf8(undef), undef;