From df999f6ab15f8d4572019acbeccb0f0f65398caa Mon Sep 17 00:00:00 2001
From: Owen Leibman <eclipsechasers2@yahoo.com>
Date: Mon, 16 Nov 2020 23:10:35 -0800
Subject: [PATCH 1/4] CSV - Guess Encoding, Handle Null-string Escape

This is in response to issue #1647 (detect CSV character encoding).
First, my tests with mb_detect_encoding indicate that it doesn't work
well enough; regardless, users can always do that on their own
if they deem it useful.
Rolling my own is also troublesome, but I can at least:
a. Check for BOM (UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE).
b. Do some heuristic tests for each of the above encodings.
c. Fallback to a user-specified encoding (default CP1252)
  if a and b don't yield result.
I think this is probably useful enough to include, and relatively
easy to expand if other potential encodings should be considered.

Starting with PHP7.4, fgetcsv allows specification of null string as
escape character in fgetcsv. This is a much better choice than the PHP
(and PhpSpreadsheet) default of backslash in that it handles the file
in the same manner as Excel does. There is one statement in Reader/CSV
which would be adversely affected if the caller so specified (building
a regular expression under the assumption that escape character is
a single character). Fix that statement appropriately and add tests.
---
 docs/topics/reading-and-writing-to-file.md    |  18 +++++
 src/PhpSpreadsheet/Reader/Csv.php             |  76 ++++++++++++++++--
 tests/PhpSpreadsheetTests/Reader/CsvTest.php  |  62 ++++++++++++++
 tests/data/Reader/CSV/escape.csv              |   4 +
 tests/data/Reader/CSV/premiere.utf16be.csv    | Bin 0 -> 112 bytes
 tests/data/Reader/CSV/premiere.utf16bebom.csv | Bin 0 -> 114 bytes
 tests/data/Reader/CSV/premiere.utf16le.csv    | Bin 0 -> 112 bytes
 tests/data/Reader/CSV/premiere.utf16lebom.csv | Bin 0 -> 114 bytes
 tests/data/Reader/CSV/premiere.utf32be.csv    | Bin 0 -> 224 bytes
 tests/data/Reader/CSV/premiere.utf32bebom.csv | Bin 0 -> 228 bytes
 tests/data/Reader/CSV/premiere.utf32le.csv    | Bin 0 -> 224 bytes
 tests/data/Reader/CSV/premiere.utf32lebom.csv | Bin 0 -> 228 bytes
 tests/data/Reader/CSV/premiere.utf8.csv       |   2 +
 tests/data/Reader/CSV/premiere.utf8bom.csv    |   2 +
 tests/data/Reader/CSV/premiere.win1252.csv    |   2 +
 15 files changed, 158 insertions(+), 8 deletions(-)
 create mode 100644 tests/data/Reader/CSV/escape.csv
 create mode 100644 tests/data/Reader/CSV/premiere.utf16be.csv
 create mode 100644 tests/data/Reader/CSV/premiere.utf16bebom.csv
 create mode 100644 tests/data/Reader/CSV/premiere.utf16le.csv
 create mode 100644 tests/data/Reader/CSV/premiere.utf16lebom.csv
 create mode 100644 tests/data/Reader/CSV/premiere.utf32be.csv
 create mode 100644 tests/data/Reader/CSV/premiere.utf32bebom.csv
 create mode 100644 tests/data/Reader/CSV/premiere.utf32le.csv
 create mode 100644 tests/data/Reader/CSV/premiere.utf32lebom.csv
 create mode 100644 tests/data/Reader/CSV/premiere.utf8.csv
 create mode 100644 tests/data/Reader/CSV/premiere.utf8bom.csv
 create mode 100644 tests/data/Reader/CSV/premiere.win1252.csv

diff --git a/docs/topics/reading-and-writing-to-file.md b/docs/topics/reading-and-writing-to-file.md
index e55471a7c4..e1b7e3a2f1 100644
--- a/docs/topics/reading-and-writing-to-file.md
+++ b/docs/topics/reading-and-writing-to-file.md
@@ -458,6 +458,24 @@ $reader->setSheetIndex(0);
 
 $spreadsheet = $reader->load("sample.csv");
 ```
+You may also let PhpSpreadsheet attempt to guess the input encoding.
+It will do so based on a test for BOM (UTF-8, UTF-16BE, UTF-16LE, UTF-32BE,
+or UTF-32LE),
+or by doing heuristic tests for those encodings, falling back to a
+specifiable encoding (default is CP1252) if all of those tests fail.
+
+```php
+$reader = new \PhpOffice\PhpSpreadsheet\Reader\Csv();
+$encoding = \PhpOffice\PhpSpreadsheet\Reader\Csv::guessEncoding('sample.csv');
+// or, e.g. $encoding = \PhpOffice\PhpSpreadsheet\Reader\Csv::guessEncoding(
+//                      'sample.csv', 'ISO-8859-2');
+$reader->setInputEncoding($encoding);
+$reader->setDelimiter(';');
+$reader->setEnclosure('');
+$reader->setSheetIndex(0);
+
+$spreadsheet = $reader->load('sample.csv');
+```
 
 #### Read a specific worksheet
 
diff --git a/src/PhpSpreadsheet/Reader/Csv.php b/src/PhpSpreadsheet/Reader/Csv.php
index d6eb16b0af..e5ea87c43e 100644
--- a/src/PhpSpreadsheet/Reader/Csv.php
+++ b/src/PhpSpreadsheet/Reader/Csv.php
@@ -83,6 +83,9 @@ public function getInputEncoding()
         return $this->inputEncoding;
     }
 
+    const UTF8_BOM = "\xEF\xBB\xBF";
+    const UTF8_BOM_LEN = 3;
+
     /**
      * Move filepointer past any BOM marker.
      */
@@ -90,12 +93,8 @@ protected function skipBOM(): void
     {
         rewind($this->fileHandle);
 
-        switch ($this->inputEncoding) {
-            case 'UTF-8':
-                fgets($this->fileHandle, 4) == "\xEF\xBB\xBF" ?
-                    fseek($this->fileHandle, 3) : fseek($this->fileHandle, 0);
-
-                break;
+        if (fgets($this->fileHandle, self::UTF8_BOM_LEN + 1) !== self::UTF8_BOM) {
+            rewind($this->fileHandle);
         }
     }
 
@@ -213,7 +212,9 @@ function ($sum, $value) use ($median) {
     private function getNextLine()
     {
         $line = '';
-        $enclosure = '(?<!' . preg_quote($this->escapeCharacter, '/') . ')' . preg_quote($this->enclosure, '/');
+        $enclosure = ($this->escapeCharacter === '' ? ''
+            : ('(?<!' . preg_quote($this->escapeCharacter, '/') . ')'))
+            . preg_quote($this->enclosure, '/');
 
         do {
             // Get the next line in the file
@@ -307,7 +308,7 @@ private function openFileOrMemory($pFilename): void
             $this->fileHandle = fopen('php://memory', 'r+b');
             $data = StringHelper::convertEncoding($entireFile, 'UTF-8', $this->inputEncoding);
             fwrite($this->fileHandle, $data);
-            rewind($this->fileHandle);
+            $this->skipBOM();
         }
     }
 
@@ -531,4 +532,63 @@ public function canRead($pFilename)
 
         return in_array($type, $supportedTypes, true);
     }
+
+    private static function guessEncodingTestNoBom(string &$encoding, string &$contents, string $compare, string $setEncoding): void
+    {
+        if ($encoding === '') {
+            $pos = strpos($contents, $compare);
+            if ($pos !== false && $pos % strlen($compare) === 0) {
+                $encoding = $setEncoding;
+            }
+        }
+    }
+
+    private static function guessEncodingNoBom(string $filename): string
+    {
+        $encoding = '';
+        $contents = file_get_contents($filename);
+        self::guessEncodingTestNoBom($encoding, $contents, "\x00\x00\x00\x0a", 'UTF-32BE');
+        self::guessEncodingTestNoBom($encoding, $contents, "\x0a\x00\x00\x00", 'UTF-32LE');
+        self::guessEncodingTestNoBom($encoding, $contents, "\x00\x0a", 'UTF-16BE');
+        self::guessEncodingTestNoBom($encoding, $contents, "\x0a\x00", 'UTF-16LE');
+        if ($encoding === '' && 1 == preg_match('//u', $contents)) {
+            $encoding = 'UTF-8';
+        }
+
+        return $encoding;
+    }
+
+    private static function guessEncodingTestBom(string &$encoding, string $first4, string $compare, string $setEncoding): void
+    {
+        if ($encoding === '') {
+            if ($compare === substr($first4, 0, strlen($compare))) {
+                $encoding = $setEncoding;
+            }
+        }
+    }
+
+    private static function guessEncodingBom(string $filename): string
+    {
+        $encoding = '';
+        $first4 = file_get_contents($filename, false, null, 0, 4);
+        if ($first4 !== false) {
+            self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
+            self::guessEncodingTestBom($encoding, $first4, "\xfe\xff", 'UTF-16BE');
+            self::guessEncodingTestBom($encoding, $first4, "\x00\x00\xfe\xff", 'UTF-32BE');
+            self::guessEncodingTestBom($encoding, $first4, "\xff\xfe\x00\x00", 'UTF-32LE');
+            self::guessEncodingTestBom($encoding, $first4, "\xff\xfe", 'UTF-16LE');
+        }
+
+        return $encoding;
+    }
+
+    public static function guessEncoding(string $filename, string $dflt = 'CP1252'): string
+    {
+        $encoding = self::guessEncodingBom($filename);
+        if ($encoding === '') {
+            $encoding = self::guessEncodingNoBom($filename);
+        }
+
+        return ($encoding === '') ? $dflt : $encoding;
+    }
 }
diff --git a/tests/PhpSpreadsheetTests/Reader/CsvTest.php b/tests/PhpSpreadsheetTests/Reader/CsvTest.php
index 797f3f1deb..e421f88e6e 100644
--- a/tests/PhpSpreadsheetTests/Reader/CsvTest.php
+++ b/tests/PhpSpreadsheetTests/Reader/CsvTest.php
@@ -275,4 +275,66 @@ public function testReadNonexistentFileName(): void
         $reader = new Csv();
         $reader->load('tests/data/Reader/CSV/encoding.utf8.csvxxx');
     }
+
+    /**
+     * @dataProvider providerEscapes
+     */
+    public function testInferSeparator(string $escape, string $delimiter): void
+    {
+        $reader = new Csv();
+        $reader->setEscapeCharacter($escape);
+        $filename = 'tests/data/Reader/CSV/escape.csv';
+        $worksheetinfo = $reader->listWorksheetInfo($filename);
+        self::assertEquals($delimiter, $reader->getDelimiter());
+    }
+
+    public function providerEscapes()
+    {
+        return [
+            ['\\', ';'],
+            ["\x0", ','],
+            [(version_compare(PHP_VERSION, '7.4') < 0) ? "\x0" : '', ','],
+        ];
+    }
+
+    /**
+     * @dataProvider providerGuessEncoding
+     */
+    public function testGuessEncoding(string $filename): void
+    {
+        $reader = new Csv();
+        $reader->setInputEncoding(Csv::guessEncoding($filename));
+        $spreadsheet = $reader->load($filename);
+        $sheet = $spreadsheet->getActiveSheet();
+        self::assertEquals('première', $sheet->getCell('A1')->getValue());
+        self::assertEquals('sixième', $sheet->getCell('C2')->getValue());
+    }
+
+    public function providerGuessEncoding()
+    {
+        return [
+            ['tests/data/Reader/CSV/premiere.utf8.csv'],
+            ['tests/data/Reader/CSV/premiere.utf8bom.csv'],
+            ['tests/data/Reader/CSV/premiere.utf16be.csv'],
+            ['tests/data/Reader/CSV/premiere.utf16bebom.csv'],
+            ['tests/data/Reader/CSV/premiere.utf16le.csv'],
+            ['tests/data/Reader/CSV/premiere.utf16lebom.csv'],
+            ['tests/data/Reader/CSV/premiere.utf32be.csv'],
+            ['tests/data/Reader/CSV/premiere.utf32bebom.csv'],
+            ['tests/data/Reader/CSV/premiere.utf32le.csv'],
+            ['tests/data/Reader/CSV/premiere.utf32lebom.csv'],
+            ['tests/data/Reader/CSV/premiere.win1252.csv'],
+        ];
+    }
+
+    public function testGuessEncodingDefltIso2(): void
+    {
+        $filename = 'tests/data/Reader/CSV/premiere.win1252.csv';
+        $reader = new Csv();
+        $reader->setInputEncoding(Csv::guessEncoding($filename, 'ISO-8859-2'));
+        $spreadsheet = $reader->load($filename);
+        $sheet = $spreadsheet->getActiveSheet();
+        self::assertEquals('premičre', $sheet->getCell('A1')->getValue());
+        self::assertEquals('sixičme', $sheet->getCell('C2')->getValue());
+    }
 }
diff --git a/tests/data/Reader/CSV/escape.csv b/tests/data/Reader/CSV/escape.csv
new file mode 100644
index 0000000000..a8b0c08435
--- /dev/null
+++ b/tests/data/Reader/CSV/escape.csv
@@ -0,0 +1,4 @@
+a\"hello;hello;hello;\",b\"hello;hello;hello;\",c\"\hello;hello;hello;\"
+a\"hello;hello;hello;\",b\"hello;hello;hello;\",c\"\hello;hello;hello;\",d
+a\"hello;hello;hello;\",b\"hello;hello;hello;\",c\"\hello;hello;hello;\"
+a\"hello;hello;hello;\",b\"hello;hello;hello;\",c\"\hello;hello;hello;\"
diff --git a/tests/data/Reader/CSV/premiere.utf16be.csv b/tests/data/Reader/CSV/premiere.utf16be.csv
new file mode 100644
index 0000000000000000000000000000000000000000..44c25684bc93576b6b0eac52da1170f3170f3ae6
GIT binary patch
literal 112
zcmYL=u?>JQ3<R%6ViPaJ2nZ-rgaAn-MrI7&E}-+@`8c@P$RwV)jH<m**jX`}Xpyb)
Zk>y4PCOUIC@0^;Nq~?_QnOFC>RlYSx7j*yt

literal 0
HcmV?d00001

diff --git a/tests/data/Reader/CSV/premiere.utf16bebom.csv b/tests/data/Reader/CSV/premiere.utf16bebom.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2d63bbe12f6204cac31e9757141c625e75e47727
GIT binary patch
literal 114
zcmYL=K@vbf3<UeE%1`ncPf$4QVJ(&_PxcB-a^RNfPTvm;8xxtt6_-)92MRMI#x7c9
bYkXw6mWGalKAd+>O-)j>%KXf$^V=$47Xuk`

literal 0
HcmV?d00001

diff --git a/tests/data/Reader/CSV/premiere.utf16le.csv b/tests/data/Reader/CSV/premiere.utf16le.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a5bb1ff12e771e8628bf3c52930311bffbb3ca94
GIT binary patch
literal 112
zcmYL=u?>JQ3<R%6ViPaJ2nZ-rgaAn-MrI7&E}-+@`3^2NGKnWHqiQb{c2<lgT4ZZ{
ZWVw-niOw9(JEx{5sX1kS=GFaeFJDZ%7j*yt

literal 0
HcmV?d00001

diff --git a/tests/data/Reader/CSV/premiere.utf16lebom.csv b/tests/data/Reader/CSV/premiere.utf16lebom.csv
new file mode 100644
index 0000000000000000000000000000000000000000..fe6bb5b6b720f013518d650d01169fa6f731460b
GIT binary patch
literal 114
zcmYL=K@vbf3<UeE%1`ncPf$4QVJ(&_PxcB-a^RNfPCxg;#zZD@#bs3Ofx^s)v5OYj
b8XsA%rJ>`X59ggzQ<K!JGC%X`{I-`bE_fMo

literal 0
HcmV?d00001

diff --git a/tests/data/Reader/CSV/premiere.utf32be.csv b/tests/data/Reader/CSV/premiere.utf32be.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d6517533f9f61c6e2207f13d81b2d0fccb075ac5
GIT binary patch
literal 224
zcmZwAxe0&(5CzeV1v{}BOAs|M@K&&7%Wy{6L<}A*d;N%5B4R_25d&^yucF@Jyv2+O
p9h@C-*S@#z<`4gQSE4|T6Akk2yX#ML?3=T~o4>uxx_;(w<^xk27j*yt

literal 0
HcmV?d00001

diff --git a/tests/data/Reader/CSV/premiere.utf32bebom.csv b/tests/data/Reader/CSV/premiere.utf32bebom.csv
new file mode 100644
index 0000000000000000000000000000000000000000..83326b64e44768f5b574f8c70f3af8745838be05
GIT binary patch
literal 228
zcmZwAxe0(!5Jl061v{}BOAs|Ma96Not5EM1ej)}3hS^@kbw|X61uc4XIFWh<^$O=T
sMhs}+Y=gV@y>&PJ@Sk@%GL+a+A>Dm<{b`PUb7pw+x0kYxpZS~l027xPasU7T

literal 0
HcmV?d00001

diff --git a/tests/data/Reader/CSV/premiere.utf32le.csv b/tests/data/Reader/CSV/premiere.utf32le.csv
new file mode 100644
index 0000000000000000000000000000000000000000..64d29f13cfadcbf775041cafc2b5177f8dcad164
GIT binary patch
literal 224
zcmZwAxe0(!5Jl061v{}BOAs|Ma96No%kbV{68XV_VYatL#DNhrCcMaALw&+|j};3B
oIJ@AkeQ({(AO7>MMuiqPI^^Ad*PrItH)n@8rM=903iHhT0GW0dbpQYW

literal 0
HcmV?d00001

diff --git a/tests/data/Reader/CSV/premiere.utf32lebom.csv b/tests/data/Reader/CSV/premiere.utf32lebom.csv
new file mode 100644
index 0000000000000000000000000000000000000000..25617c6e9b2a51e78b00ac8e376509cb8c45f0e8
GIT binary patch
literal 228
zcmZwAy9tC~5JlmMh3&*<EJ0QS19jI2Sh7{9=LD0;AAB&(>)uzyFCrH77%|{P_G;=C
t&cB#3p@Xvx?%Ma(-TdJ{?>?x|U`LC*d+++w9Q)?{;Z12TvyQ?%Gap^<8FBys

literal 0
HcmV?d00001

diff --git a/tests/data/Reader/CSV/premiere.utf8.csv b/tests/data/Reader/CSV/premiere.utf8.csv
new file mode 100644
index 0000000000..c668120175
--- /dev/null
+++ b/tests/data/Reader/CSV/premiere.utf8.csv
@@ -0,0 +1,2 @@
+première,second,troisième
+Quatrième,cinquième,sixième
diff --git a/tests/data/Reader/CSV/premiere.utf8bom.csv b/tests/data/Reader/CSV/premiere.utf8bom.csv
new file mode 100644
index 0000000000..4068e6c38e
--- /dev/null
+++ b/tests/data/Reader/CSV/premiere.utf8bom.csv
@@ -0,0 +1,2 @@
+première,second,troisième
+Quatrième,cinquième,sixième
diff --git a/tests/data/Reader/CSV/premiere.win1252.csv b/tests/data/Reader/CSV/premiere.win1252.csv
new file mode 100644
index 0000000000..908cb88fe2
--- /dev/null
+++ b/tests/data/Reader/CSV/premiere.win1252.csv
@@ -0,0 +1,2 @@
+premi�re,second,troisi�me
+Quatri�me,cinqui�me,sixi�me

From 45626a783d753f286f4a80b24b2b34547e639fb9 Mon Sep 17 00:00:00 2001
From: Owen Leibman <eclipsechasers2@yahoo.com>
Date: Mon, 16 Nov 2020 23:42:19 -0800
Subject: [PATCH 2/4] Correct Minor Scrutinizer Issue in Test

Assigned function result to otherwise unused variable.
---
 tests/PhpSpreadsheetTests/Reader/CsvTest.php | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/PhpSpreadsheetTests/Reader/CsvTest.php b/tests/PhpSpreadsheetTests/Reader/CsvTest.php
index e421f88e6e..e543ff4832 100644
--- a/tests/PhpSpreadsheetTests/Reader/CsvTest.php
+++ b/tests/PhpSpreadsheetTests/Reader/CsvTest.php
@@ -284,7 +284,7 @@ public function testInferSeparator(string $escape, string $delimiter): void
         $reader = new Csv();
         $reader->setEscapeCharacter($escape);
         $filename = 'tests/data/Reader/CSV/escape.csv';
-        $worksheetinfo = $reader->listWorksheetInfo($filename);
+        $reader->listWorksheetInfo($filename);
         self::assertEquals($delimiter, $reader->getDelimiter());
     }
 

From 385fc7173f37deccc678a2a73ba6ad1297078798 Mon Sep 17 00:00:00 2001
From: Owen Leibman <eclipsechasers2@yahoo.com>
Date: Sun, 13 Dec 2020 18:02:45 -0800
Subject: [PATCH 3/4] Add Some Class Constants

Request from @MarkBaker.
---
 src/PhpSpreadsheet/Reader/Csv.php | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/src/PhpSpreadsheet/Reader/Csv.php b/src/PhpSpreadsheet/Reader/Csv.php
index e5ea87c43e..7c9b014287 100644
--- a/src/PhpSpreadsheet/Reader/Csv.php
+++ b/src/PhpSpreadsheet/Reader/Csv.php
@@ -85,6 +85,18 @@ public function getInputEncoding()
 
     const UTF8_BOM = "\xEF\xBB\xBF";
     const UTF8_BOM_LEN = 3;
+    const UTF16BE_BOM = "\xfe\xff";
+    const UTF16BE_BOM_LEN = 2;
+    const UTF16BE_LF = "\x00\x0a";
+    const UTF16LE_BOM = "\xff\xfe";
+    const UTF16LE_BOM_LEN = 2;
+    const UTF16LE_LF = "\x0a\x00";
+    const UTF32BE_BOM = "\x00\x00\xfe\xff";
+    const UTF32BE_BOM_LEN = 4;
+    const UTF32BE_LF = "\x00\x00\x00\x0a";
+    const UTF32LE_BOM = "\xff\xfe\x00\x00";
+    const UTF32LE_BOM_LEN = 4;
+    const UTF32LE_LF = "\x0a\x00\x00\x00";
 
     /**
      * Move filepointer past any BOM marker.
@@ -547,10 +559,10 @@ private static function guessEncodingNoBom(string $filename): string
     {
         $encoding = '';
         $contents = file_get_contents($filename);
-        self::guessEncodingTestNoBom($encoding, $contents, "\x00\x00\x00\x0a", 'UTF-32BE');
-        self::guessEncodingTestNoBom($encoding, $contents, "\x0a\x00\x00\x00", 'UTF-32LE');
-        self::guessEncodingTestNoBom($encoding, $contents, "\x00\x0a", 'UTF-16BE');
-        self::guessEncodingTestNoBom($encoding, $contents, "\x0a\x00", 'UTF-16LE');
+        self::guessEncodingTestNoBom($encoding, $contents, self::UTF32BE_LF, 'UTF-32BE');
+        self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE');
+        self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE');
+        self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE');
         if ($encoding === '' && 1 == preg_match('//u', $contents)) {
             $encoding = 'UTF-8';
         }
@@ -573,10 +585,10 @@ private static function guessEncodingBom(string $filename): string
         $first4 = file_get_contents($filename, false, null, 0, 4);
         if ($first4 !== false) {
             self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
-            self::guessEncodingTestBom($encoding, $first4, "\xfe\xff", 'UTF-16BE');
-            self::guessEncodingTestBom($encoding, $first4, "\x00\x00\xfe\xff", 'UTF-32BE');
-            self::guessEncodingTestBom($encoding, $first4, "\xff\xfe\x00\x00", 'UTF-32LE');
-            self::guessEncodingTestBom($encoding, $first4, "\xff\xfe", 'UTF-16LE');
+            self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
+            self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
+            self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
+            self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
         }
 
         return $encoding;

From 1299716acf5788535bdef762688f0cc7f023c384 Mon Sep 17 00:00:00 2001
From: Owen Leibman <eclipsechasers2@yahoo.com>
Date: Fri, 18 Dec 2020 06:50:29 -0800
Subject: [PATCH 4/4] More Style Changes

Constants at top of class, change one comparison to strict,
express one condition in non-yoda format.
---
 src/PhpSpreadsheet/Reader/Csv.php | 32 +++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/PhpSpreadsheet/Reader/Csv.php b/src/PhpSpreadsheet/Reader/Csv.php
index 7c9b014287..1495d102c0 100644
--- a/src/PhpSpreadsheet/Reader/Csv.php
+++ b/src/PhpSpreadsheet/Reader/Csv.php
@@ -9,6 +9,21 @@
 
 class Csv extends BaseReader
 {
+    const UTF8_BOM = "\xEF\xBB\xBF";
+    const UTF8_BOM_LEN = 3;
+    const UTF16BE_BOM = "\xfe\xff";
+    const UTF16BE_BOM_LEN = 2;
+    const UTF16BE_LF = "\x00\x0a";
+    const UTF16LE_BOM = "\xff\xfe";
+    const UTF16LE_BOM_LEN = 2;
+    const UTF16LE_LF = "\x0a\x00";
+    const UTF32BE_BOM = "\x00\x00\xfe\xff";
+    const UTF32BE_BOM_LEN = 4;
+    const UTF32BE_LF = "\x00\x00\x00\x0a";
+    const UTF32LE_BOM = "\xff\xfe\x00\x00";
+    const UTF32LE_BOM_LEN = 4;
+    const UTF32LE_LF = "\x0a\x00\x00\x00";
+
     /**
      * Input encoding.
      *
@@ -83,21 +98,6 @@ public function getInputEncoding()
         return $this->inputEncoding;
     }
 
-    const UTF8_BOM = "\xEF\xBB\xBF";
-    const UTF8_BOM_LEN = 3;
-    const UTF16BE_BOM = "\xfe\xff";
-    const UTF16BE_BOM_LEN = 2;
-    const UTF16BE_LF = "\x00\x0a";
-    const UTF16LE_BOM = "\xff\xfe";
-    const UTF16LE_BOM_LEN = 2;
-    const UTF16LE_LF = "\x0a\x00";
-    const UTF32BE_BOM = "\x00\x00\xfe\xff";
-    const UTF32BE_BOM_LEN = 4;
-    const UTF32BE_LF = "\x00\x00\x00\x0a";
-    const UTF32LE_BOM = "\xff\xfe\x00\x00";
-    const UTF32LE_BOM_LEN = 4;
-    const UTF32LE_LF = "\x0a\x00\x00\x00";
-
     /**
      * Move filepointer past any BOM marker.
      */
@@ -563,7 +563,7 @@ private static function guessEncodingNoBom(string $filename): string
         self::guessEncodingTestNoBom($encoding, $contents, self::UTF32LE_LF, 'UTF-32LE');
         self::guessEncodingTestNoBom($encoding, $contents, self::UTF16BE_LF, 'UTF-16BE');
         self::guessEncodingTestNoBom($encoding, $contents, self::UTF16LE_LF, 'UTF-16LE');
-        if ($encoding === '' && 1 == preg_match('//u', $contents)) {
+        if ($encoding === '' && preg_match('//u', $contents) === 1) {
             $encoding = 'UTF-8';
         }