From acf3708b203ef4b0bc335984415acdd6fd636e83 Mon Sep 17 00:00:00 2001 From: oleibman <10341515+oleibman@users.noreply.github.com> Date: Wed, 30 Aug 2023 09:36:28 -0700 Subject: [PATCH] Writer Xls Handle Characters Outside Unicode BMP Fix #642. Opened over 5 years ago, probably the oldest problem I've worked on. And https://github.com/PHPOffice/PHPExcel/issues/1320, opened a year before that. And https://github.com/SpartnerNL/Laravel-Excel/issues/1521. Shared/StringHelper::UTF8toBIFF8UnicodeLong calculates incorrect length for strings when they contain characters outside Unicode BMP. Xls uses UTF-16 to encode its strings, and characters outside BMP require a surrogate pair to encode. PhpSpreadsheet (and PhpExcel before it) have been counting these as a single character, but Excel counts them as 2. Change to compute the length as half the number of bytes in the UTF-16 string, as Excel does. A formal test is added, but it's a bit difficult to follow. So I aso added a non-BMP emoji to 27template.xls, which will cause it to be both read by Xls reader and written by Xls writer. This would previously have created a corrupt worksheet. The emoji is now handled correctly. --- samples/Basic/27_Images_Xls.php | 2 +- samples/templates/27template.xls | Bin 364544 -> 376320 bytes src/PhpSpreadsheet/Shared/StringHelper.php | 4 +-- .../Writer/Xls/Issue642Test.php | 32 ++++++++++++++++++ 4 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 tests/PhpSpreadsheetTests/Writer/Xls/Issue642Test.php diff --git a/samples/Basic/27_Images_Xls.php b/samples/Basic/27_Images_Xls.php index 4c20a9ac8b..b4cb296329 100644 --- a/samples/Basic/27_Images_Xls.php +++ b/samples/Basic/27_Images_Xls.php @@ -5,7 +5,7 @@ require __DIR__ . '/../Header.php'; // Read from Xls (.xls) template -$helper->log('Load Xlsx template file'); +$helper->log('Load Xls template file'); $reader = IOFactory::createReader('Xls'); $spreadsheet = $reader->load(__DIR__ . '/../templates/27template.xls'); diff --git a/samples/templates/27template.xls b/samples/templates/27template.xls index cb6cd6df501fcf6eae8b877ab9aea07b1d241c13..90c5c3de27b56762bf40e570ba4172322c2e01c2 100644 GIT binary patch delta 16253 zcmb_@30#cZ`}ldMnP%Gev`|wjZQ5nmPAEdIHI(*0b~sc9aeJ`s8*a;j+NU@CE!TLDQ71 z#p_ukGzcskmw{3wT+Xy&GokPExyO}*krc2PNlTR&#e1IR?2FYKT{Q~7jjActh=jO`s1fU6U-NT#}9ZpfpoqNjm;WaN%33gMU{=Ly7PCzl$!NIL zkmF>kEzIH|2glEaSVjc^mWO3yS=|6a0Mq~`0$@2209c0^0EPf_0I+V00Mr4t0Z4Si zUd2?H9$!+wm~A`43^?P)*iSx*vs@)NZju{!qz+9mUu-YtGl8EtYu*eVPcV<7R1#V+ z4f?8Zpp`NUO&)!TeHEH3gEbG~w}@Fu5kCULsh~0_D(x?!t8%1pi?W%c4l<_N=~CZ% z)b}AOKu%KMgOM&0ks|V&*AH`How66f5Nfk6;5&rfgZ?&Wum~TNh~iKribt-X?gN4V zr=G;RGY~^q$JQ6_Vnfv7JIxlra?nQXaJmvRt799gGr;*$Xd|H`M}?{~5{_!SndG=Kww_IpldDVo5`^gH|+~u<%Uo=*2SJHQB7Q& zYD*!e+6@#VEPGpm;^DU5;UL8c*}(@$F&#wz>(S(k=s>Bi`&R|?z6kiowTn(;8A4Ob*ip6N= z&Q9=$I>C3=K3ldu*(P9{Y(*QspcDM>o#3~3f>*2%iI@jEA?Qp%XYCJn!lyHZHskgq z6u7!G0i7vyoD!@l3|Hxk3W9f441C2M3!qYa;w&D2*+6#}V>JU1V9rU*@ zzBe|NP+f(;Fqr;QgVF z0qYWddT{_HoI4@u*lR+jCXzWmas-gKplLe+^OEwY(&1$!hif{re7Dd3kwt&`KI7(F)_lN+F3>QaBPECGV`>(7xSSUy{+k z*Gk$i|H@a;|7~hvVu`99BWEyF(UF5Ypo3NtX`&RO%~u^wa{^M@Gbq67%w*f0)T+o zHtt|mDYl!WM=ApwiCrQ+?y!*(oMZrW>=zrLY9G;1=;%=i6UoSuVnW+bKk~G5Y1dDz z9_|+<#EX$uPl^kc3KyOM{*g&-kTesrBNsf;{393n3?-lEddV<>f?}_*u>A_$f(hW# z0r*E%cro~Ku5%`M4(gN%iKyJCgldDj=7LvsopO;dadYpQiLDG1C?HZMZs~5OpW!<< zNLPkSr?G-J0y<}cR|Q}XDUR_@xYP!9%>_Rt{|m2l&V-78SpQ3Lfr8>-VM9gd2H^$D zKdQoukz<1MKNcvtVDb0QTxx^lxX3Xv|2ItB+%XffbA#+Gr-T*$Z^Ad%4D1q89!OFuktU6*dc`=O--^1=9++$zB{A&%%R|b}& zBQM9Tqa4dFjMRO62Iu%W>~0$EFK8?(K4vc`?n+&9ZVNV)W$1 zkOGg6a_qa1gI6A6d)T}f(feA(a`ffo2>Nss;{;;J&ibcMyBrQLrs-9yVmSu#ay&cA zaqGey(z9KT3NPl-+uMre7-BgdU5A@F$noep+^A@)4JtnOTr7rO|Fnl&H+eY7bhy<9-MMo|u^eNG9ElkDXd@B9bm&5kbhy<9 ziJom$EXPD%j)HJ=RwRZLrvvG5s|`AJ^NnITrt)$Wgqyn}Ibt!=(N-ID?m;oBSWb5= z$Fu8j7E{ll#`<%+^o8gBOPvT?jMTM6q{ovZ%$jZ(eV#skYL(& zAxApg+}vwwY81;cmzN_SZXm`{Mhq!CD@cc%o4cqnMzI{Oyc`AL<{~3UR!n=ek!+5e z`?fRB6w9%Ym!lxuJi3r09d2&!`|Hv>h#_H;Q-f`8SfSF>sF0z7dg6U3y2y3K2rDUs z8%9`5A>4%wbz}pN)nXEuh<)TH;vOQlqRAJsA&E}`{S(j9ia{LUb3XCptB!iLbA`Z^ z=V~FxRXP|EYEE`l%+$7>se2~bfvdF~SLx702yYBf>jZgK?b^9Q(8;T6C&#oc&S3us z94D$kin-dibA=F-=h|D2YkOF+p#!IEYf#M8p`EEG*^#TO99QWef^*1&!xVAlz?lRr zC4D2KGdbO5dY?s@ij?5sD-uP6&tONDp$&^M*Z}p5jE|a6#zzX`_z9Vuoxx;D{1tcc&6NMzh0M<;f2_3L#PJCoSq=28em>bGlB*^3pljY3lBhp3zaQ;1q ziiQL*SOBIcf<|08spmnB00c0AHysbS`^A|=1NFfPYbc*DNZ<>2LWT;=oRD~i@ku@e zteK1?7GWY{!j>E#K9Z1CM}6_23FJja$0fuH8Jegc?7U0LTu6Dc?q(@zgS!JkD;)N5 z5Vf(OC7igyg{k{Xs7YYQh1@1juA_h)=Qo;sfP@@V!(1xoDkWV7)N~%Bau*C#|InF~ zb4b|v(}X7=dXWGxNPuz!rMW{g>o4OTLP<{ncbw2@*R!R#!{nlh2I1Zwo-k)V^yz$A zRt$Z-{uvJLnFB$FyRUs{OWV4QS<;u2=@qQygdB!c{peM(;h z!w8ZNK2lGBkQ@j>(t?x9in6uZ@wTg5>khijI}JHJ7m^T zz!;mF2$2Cv1CKvEW>E@)!DJ~zKoAJnj-rAbk8hk~aBCGfx(A9;5Xl{4m!S%=i%A3^ zhiLYPSKq}*I_$- zQdnSTyJM*QL$FxjnGn!8vF-U>qPs8FeViatFn=Dl+z(rh^8=17e;iq%ae{d8gR6{L zgpw8nPbhSy0NjQ>(J?p)%z_M{c<=?dzvT80l#D4J*q5~PnN$eKY!Tq3E?L6RzL1pY zfEbDcVnSqM0+lv&Ag#c0rMQgq90`NMOICG;8l=z(ATkl+P?98H*iLp4^xI43bBCO1ys1SH<5m+#1jc0YCI#SYHQ-@ z9WGd&QNIz?i#jX8oWVKJ3(zh2g(6(m+0#~b!(}3`t?Zu7F+$l=ni3Q*`faFOc&R8? zst#AJ2fhR%!cir`0YIqFUknt(83iDwgdYZilk1j#6hgTLZ|o8fB={r+vzG>MWcCgp zc+xhAP)bS)YK6lGp^lb5Nk9$I;SQs0#e{g*Nx}%*k8z_B(efmc1)gQMJRJj@)yg^h zxWgxf)w>=~KfKD>D!;D?RRSKjr7xRvD8a1dIN&<8ToW>Cn1-qeK6hq));fArYlS)S z>Zqr)ncD#2lVt;Hty~J3fj4Z8$KPBfLdLsfx$YX;qeTaB@-2_1NAB{~Uk<`o0Hgw> z0jvZl)n^b%1m_vt;y3uRwkQ!ZvNAjvCPxGFcwqw?`vut75BTZHpXd5rj?T83B0?U| zWkq5`904caQuyVQNhzUD^xjj#BsLM8Sg@Xwki#OPOPvadul@rP?fzK5ts*e~%geb_ zMfT@BM95|}-b#{OHs`QGW`L7#SsL{p;^K)CWnv4#c_HDvi%mqVaw=q$aEKg&69?Ty z3CvR=rUF{2M3rC?K2Dk%&-#(N8+)AztG8v3#KEFg-;ygpe7sS!9;X@(PMuY?mg91bV z0}+F7sooR8+LMFc{eq`GgrYcx021fWr(3whK$|(YH8xAtU~I@pC8j>)N{qHZFYjrU z<8j&@RMUgsDWo4iK>)j?;h~G?@aBa%Et(zwd&b34RW8~At%2WCpY|S+bHtlt{c=Th zdi9>9it}5o3~O#3**pGgYtXe&-+f%ec*jQf)*S;MC!R8AAFJAuvv65m7SEUSJlRY+n>9aYpK}QCY=^GQxH|AaY(vNYm z&jOp2?=8w5+^lD8;zsEIxH=in%6!n-m_|4PLTdW>!EI#4oIqLTjew@^!6`9vx z@B2Ko#?aU4!tq`oo$?&k?7lSVOz3Chq8HP6eXjP|caQsCZM|+)k@}kOE3sVT-p|Sh z?pX8s&W%a&C!5lbq$hb!s~uHIiu5BsdcDxAuO+9D;1m%&lgPR(|Hj_u72qmQh%fwl zMtCAGex9=v{!ZI)WoDI&`l`1pPo-W>9&@eA$a;Zu{zgCN++_67mYiW}ddKwDozd0G zs%4zCytl3U2mdHs_xa(b=Ul_=0_CbleqI;J84ni>E+<#itjybM&6%=j&d<3^?;Hz1 z8FDpu&?Q&X{J;{IUv;%sH;z6zw$L~FYK``_5dn8RrdyGFGvt=Pw{`h?3JJ>>7 zN=iq^ti_uRaaLFhfKNLGGkCLk&QjmOuo92uCE_SiuMJkM;%a+8Oy0e4V`JZ_GTVUZ zM=lnY{f+!`Onj<$UE6u*=F`WieJ|hFZ`d|cW0GSITVpNnlGEzamL$i$ru%LOcK_2c z{vlay@a*LxqnhGh^zIKoKW|KO&xVaF4<2jc-ZsmdXCIIsC9Am zks|B<3#L~O&C9#hvrpC5$Xer3!_#Ls88(h{W~s)no2L5VhTj;$a;Lfg=kHMrs~M%F zzj=RiOKbZd-XYOycT!yZzTactUz+rG&6CH@hsOOCHrUa{b@YE5^Mhl5PI3s~eqH+geiMkE#L(P49pC zLD}ke`okAJefQJ(s|GE>FK;Y3&~RsjPEC@tpIy=5b(Xpj{NE;i89novp6a2k`ZKw^ zk_sk9t@DklT36z`dZp2rH({0oKKP6}Ul{Xg@)xrJ-!t9(%JW6v4))qmS~o4g|Ecoi z)HfldXuQC{XVKwGl0Z z%KMT8EoQCN181x8TkcP)UtE6RNAHu$zkVAy>r+*g&cv%{W`(SITllbyotk?1Qh4f! zB)-Ode)EQb$?M9Mtv5w?oAu(S(o*+=*gM{qLpS8KkUpjJZrv~3;ygxMBUoUkqjT}e zZxcseH9e`Kuep1C+N<1vS&{bx`AnjF2iX=hcK)6(5+BK|nmG$6jYV(*u?FM2hP zDPEP~bMONr`0VYC*Q)i1UdOwCxsq~#?|o0yQdL%ZzkKP})`j!}h8;Tg?qzwj?HdPQ$Fd!nDeje)FOvgK zD*5NwY|S`s5IAyA&&(C!hEA-?hRw4_g)e2VKM^zdT$ZQqX4W3fr?Fcm$0V&)T{E`Z zMdK0s<688r^s+MRSz1AN{R3M_!Fs#@*jN~z37B^-^7PO3JN*I6Y z4f0vP^}K7Dk+tomuv>ODx_V0;ow)uP3oqxXExYjAV9&iuE!#UU-rQtPd2_nI&dC)| zW)>8yYkv%`>~T72?N)>Gde$%63wT?TcT8@azf&((&|Uwq!R=l~;{+>x*PohDt*Jq3 zPE2S_+W+Aw^T-9A1*{wH(bWY@t`wzs4*%HamEHR^jzjd`eleyW`iK2)wS#qG?Ag9^ zml4zY&K|Pd{&`S-tj8ZN%bhvJq_*>@+#Hn$k<8Yih0j%v%*NYt?;H$%i9Ky{8{1hwu5Tz{Vj*ka228Rzk|T z^M5TIo7r^h+waALn?%8P9#7QR`P;*}?j>Vy^q8_e*zW0!{L03(uPP;dqC%se8`;+^ zxxD1XOtTXvBGKssjm4YImuwC0QSIma*l)wTx{IL$9d`bfHL1k8u5n*edSceFx1aM& zOOq^pUxqr7wnKE5+#FWnoZX_WY_R>_o3ob;BX(5e+Rx)wocXhRoXH0F&uW&!xD|=F zOlE#vJKWL!#^IJkn~3T_t$~Xj2KBx+&T#Xj#)`k6fdO<_B z;jYP#4L;R0S89FSeZLv0XjjH`FMhuJ{kWHZ-7dSmb?3GEZR_I#Q1Pw#Ovk~4Lrx^* z?8pu?o>|&boS&wXP@;R=?L?)X$~W!8J>!14H}sdXFH4F9`}23Wt=!DseJnJ${Hts4 zH?L;xnsQ0Q!`+>A>Ewak_wSG|ea|y(&A;fpHmZus_Iu6wctGu{QF<7o<|2#SS(`b} zqB_tjzjFRHyQ4lMte%-!zpR^3&%f*KzTK}lp?P*7XXn90r9g+B+Y%oL?$q>KJBJ%w zYTEO{wWkizpWG(&D{pwx%61s>Q%c!h$fTFzD#I-~hZD*VsvGGNY)6u*fE( z_hWv(%lNpPgVtVi4NdQv-D_WgZH$}q!vVkc{p&^PIpuRx`oy~Ii_9$0z8p0^A~<() zL-Br-d`;5>Z!Y;?NE~`?>Wv_W1N#bIUY@==?S%U8x85%cxK~%;>H5Uu=$WkjOZz<4 z{A+6IwO$_M8S3|LT^Tc*pMm+~$HWH?4dutX`+hN)$|b z)^iIvYogcgyk)9|N55pGE-~xAFO_@VqGkW(tI7O(oJ$XjbD34~!8$iCX#Zqs_juQd z0UMP|j}JuK>bnK}2&j0FPjzU+YvV4mA}NTBB`)*GB0({?@D~``> zI_s=<)<@2l9MDYaQuO>0GE$a<`~tk4+yz`JwH%ixlwZ!a6B@%xpI#o4QmaI%9*}1x zzqV{=g&+K+cu1`@<$1^l0>y)*zNf;2g)|e|C<-H`6crXBBU!NVh2W`?g~Gu83HXr^ zZG%0MZBkEuP=VG1ZoL!^w}brzME^-koBorQ6+AZLe`%>P^l?4))QxvS#%*VzmjD`2 zPh^>-vaB8LsT~go>U#x7qZI0?lA=S_c(W2&63d~cl9@Ftl{Hg22hI(0IPlLf6S6ph zZidi!Xuv_q)T1o$P=P%fe8K)QRvODDsBaFO2f-E(+=t={Kx-k4(~`q|2cvMiK&la4 zVnGQL(Ir(BIGWO$E@}Y_c)X@6F*Ca9O;8G-A?d>o{%iCN+AvzaK;LR3P78J-eoE(n zi9p-(h>>NU#&RH`h|yYz*^&ppN0_Q~Q|t-wbQMx-nF_ZgN_YS5hOPhvcS7I9``d`}C7x;B%8umGsN2;JA{74Q?lebOzzp#z^->HIb!?B{! zw7~ze-|&v4jBQzFDJ)}nYQkm{q~fTbk-aFj~IZyMZ6x(K5c+CiJ)57mm| z&k*p6X!#!D{DUP!;GMb>DzkY5wd7g{Ic%Lw4r9Qdo2c=N^9Fbf zE-kICI384DIo(h!G{%-t7vw0s;f6b~L*fGpI`83((|+~L{h#H$9$n_VtQV;)i>!q! zSkuTZjKqr3Mj|9@q(tTa!bZJ(v5k;}87Ml7o60i(|H|(FY$+M~zcf-}d)DG~)>3NN zLTbzUx}G&W>+CNq7Zwhf_^jP&tdYXAOqZ;pG*$?{5xOwdBM8`%Rf1^1fx+d_xvqj{ zIFt*&X2Ee$iTru|7y&n6Q4}wp>*nI(&Tad*GbC?#n;|YHrXL*e;L~Yn;xrEb58A08 Q`q2N#+)|<X;(a0ZFG)Z2$lO delta 3718 zcmcgvYiv}<6+UzC9q-+j_xiExS8dh}AW~?-kS9rsZ47}lr6CDTq!7uAz1RlV4$E3i zMB3eu8s|qUVJ_086e$L6q$-Mtl`XcwNU;fh@~BIjR;o}EXakLkqD`c#N>j4^&a7?L zTZ^Jb>a5PpoHJ+6duHw?{}oCOh4xmizmIhKJnN<+y3u(4Gi?gDh)!WHfPS9MW~aH5 zn7jbk4R{go1E*w}R+Vln+9UR9h6kKk9b|*j#oG6U*^YS>VDHyEMZbMr|CYfl_MkD} zV6o1|G5N65GA5VZ9KA2!2Db_0iX@Hs&$=L6`EZFe8a5`-6I}*ure|OTt&ViJIZYw0 z%Rb<#t1hND1T&me{R32rIa(uy?aQ83337?m6rpbFqAe6d-$2#$OE6>;dpQnhQZ%U* z=K5l*b@V=Dnsky40=R^I%3E68sfbD)i1xU*UXVTMtMsg*8RDcbWLNvVxdr>{KKInD zb$aIUB(>j!(8!Ca;KRSog3+W8-Xdh(&&fmreIJn#>f@~FiwW9(Wh5>KCEOn=<>j~U)(iVzn18ZtFr|Wj9LO(AtxJERel|}Y9XY{fBn2LKY-LL z0QV5KVqJ=N%vx3b5sspW%bM($K?aYZiliHpO`pEB%tLI!bD&xBI(P0_7D~nW{)Mf?& ztRUBdQ={g=B==xg`45KEGb|T4uMW%7>Lua@h?p?GqH1Qb>UX0T4`cY>UFgOe7o~PUm{6(C>uFh);BTNp%e7myT)zd&IqElwed3mapKtj+ z5O^aEd=NM|#Wai(oy!@0PG??MX-IM6rYngHZaNR!FrBNCnB^wk+|-#6_HrfVJJaAi za!ks5Z}`Jy#fOhey^0Q^FHDY+?~n77W3qNlEguwtjt?#W0)Y;*f;#eU4jg>kXdEAQ z@r&IZWpmse&RwH2ku!vKupznje>Y^|ahX`EB9b%URty=9oRr%{EwfHP0-R}10$=@1 zGt>1&?}@>r(ISBB2qMSRGP(GO##K*iYcPJZt1Z^Mvv+Geyyn@qXm|MDyTAH)WJ|QW zJF0{=cWvEzk0Zw9{rk)oWKFXtQ$c&PS)SOB&HUiA8I1roVI2UURP-c(6W0#lYxx-f zU)|3G{D7ALY|4k%2_Hf^)7QkhwqvSrURw@m1SAKpMKP#3kK7swUx&#f@18%i{Z|4A zSP3N?{B3S#getActiveSheet(); + $stringUtf8 = "Hello\u{1f600}goodbye"; + self::assertSame(13, mb_strlen($stringUtf8)); + $stringUtf16 = (string) iconv('UTF-8', 'UTF-16LE', $stringUtf8); + self::assertSame(28, strlen($stringUtf16)); // each character requires 2 bytes except for non-BMP which requires 4 + $sheet->getCell('A1')->setValue($stringUtf8); + $outputFilename = File::temporaryFilename(); + $writer = new Xls($spreadsheet); + $writer->save($outputFilename); + $spreadsheet->disconnectWorksheets(); + $contents = (string) file_get_contents($outputFilename); + unlink($outputFilename); + $expected = "\x00\x0e\x00\x01" . $stringUtf16; // length is 14 (0e), not 13 + self::assertStringContainsString($expected, $contents); + $unexpected = "\x00\x0d\x00\x01" . $stringUtf16; // length is 14 (0e), not 13 + self::assertStringNotContainsString($unexpected, $contents); + } +}