From 85e38719ad4a56b2c9607e43a061f82b95ee25fe Mon Sep 17 00:00:00 2001
From: sameeul <sameeul@gmail.com>
Date: Thu, 9 Nov 2023 07:51:37 -0500
Subject: [PATCH 1/3] Minor follow-up of PR#164

---
 CMakeLists.txt                   |  8 +---
 ci-utils/install_prereq_linux.sh | 16 +++++---
 src/nyx/arrow_output_stream.cpp  |  9 -----
 src/nyx/arrow_output_stream.h    |  3 --
 src/nyx/output_writers.cpp       | 66 --------------------------------
 src/nyx/output_writers.h         |  8 ----
 6 files changed, 11 insertions(+), 99 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 906a90f6..c41856d5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -341,7 +341,6 @@ if(USEGPU)
 endif()
 
 if(USE_ARROW)
-	option(PARQUET_LINK_SHARED "Link to the Parquet shared library" ON)
 	# Look for installed packages the system
 	find_package(Arrow)
 	if (NOT Arrow_FOUND) 
@@ -360,12 +359,7 @@ endif()
 if(USE_ARROW)
 	add_definitions(-DUSE_ARROW)
 	list(APPEND Nyxus_LIBRARIES arrow_shared)
-
-	if(PARQUET_LINK_SHARED)
-		list(APPEND Nyxus_LIBRARIES parquet_shared)
-	else()
-		list(APPEND Nyxus_LIBRARIES parquet_static)
-	endif()
+	list(APPEND Nyxus_LIBRARIES parquet_shared)
 endif()
 
 if(BUILD_LIB)
diff --git a/ci-utils/install_prereq_linux.sh b/ci-utils/install_prereq_linux.sh
index cc16d751..20cc0044 100755
--- a/ci-utils/install_prereq_linux.sh
+++ b/ci-utils/install_prereq_linux.sh
@@ -10,7 +10,8 @@
 
 BUILD_Z5_DEP=1
 BULD_DCMTK_DEP=1
-BUILD_ARROW=0
+BUILD_ARROW_DEP=0
+BUILD_BOOST_DEP=1
 
 while [ $# -gt 0 ]; do
     if [[ $1 == "--"* ]]; then
@@ -24,14 +25,15 @@ done
 if [[ "${min_build,,}" == "yes" ]]; then
     BUILD_Z5_DEP=0
     BULD_DCMTK_DEP=0
-    BUILD_ARROW=0
+    BUILD_ARROW_DEP=0
+    BUILD_BOOST_DEP=0
 fi
 
 if [[ "${build_arrow}" == "yes" ]]; then
-    BUILD_ARROW=1
+    BUILD_ARROW_DEP=1
+    BUILD_BOOST_DEP=1
 fi
 
-echo build arrow $BUILD_ARROW
 
 if [[ -z $install_dir ]]
 then
@@ -64,7 +66,7 @@ cmake --build .
 cmake --build . --target install 
 cd ../../
 
-if [[ $BUILD_Z5_DEP -eq 1 ]]; then
+if [[ $BUILD_BOOST_DEP -eq 1 ]]; then
     for i in {1..5}
     do
         curl -L https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.tar.bz2 -o boost_1_79_0.tar.bz2 
@@ -78,7 +80,9 @@ if [[ $BUILD_Z5_DEP -eq 1 ]]; then
     ./b2 headers
     cp -r boost ../"$LOCAL_INSTALL_DIR"/include
     cd ../
+fi
 
+if [[ $BUILD_Z5_DEP -eq 1 ]]; then
     curl -L https://github.com/Blosc/c-blosc/archive/refs/tags/v1.21.5.zip -o v1.21.5.zip
     unzip v1.21.5.zip
     cd c-blosc-1.21.5
@@ -215,7 +219,7 @@ if [[ $BULD_DCMTK_DEP -eq 1 ]]; then
     cd ../../
 fi
 
-if [[ $BUILD_ARROW -eq 1 ]]; then
+if [[ $BUILD_ARROW_DEP -eq 1 ]]; then
 
     curl -L https://github.com/apache/arrow/archive/refs/tags/apache-arrow-13.0.0.zip -o  arrow-apache-arrow-13.0.0.zip
     unzip arrow-apache-arrow-13.0.0.zip
diff --git a/src/nyx/arrow_output_stream.cpp b/src/nyx/arrow_output_stream.cpp
index 0c7aadac..677781c2 100644
--- a/src/nyx/arrow_output_stream.cpp
+++ b/src/nyx/arrow_output_stream.cpp
@@ -54,15 +54,6 @@ std::tuple<bool, std::optional<std::string>> ArrowOutputStream::create_arrow_fil
 }
 
 
-std::shared_ptr<arrow::Table> ArrowOutputStream::get_arrow_table(const std::string& file_path) {
-
-    if (this->arrow_table_ != nullptr) return this->arrow_table_;
-                                                    
-    this->arrow_table_ = writer_->get_arrow_table(file_path);
-
-    return this->arrow_table_;
-}
-
 std::string ArrowOutputStream::get_arrow_path() {
     return arrow_file_path_;
 }
diff --git a/src/nyx/arrow_output_stream.h b/src/nyx/arrow_output_stream.h
index 3f003fe6..bacd9785 100644
--- a/src/nyx/arrow_output_stream.h
+++ b/src/nyx/arrow_output_stream.h
@@ -10,7 +10,6 @@
 #include "save_option.h"
 
 #ifdef USE_ARROW
-#include <arrow/table.h>
 
 /**
  * @brief Class to write to Apache Arrow formats
@@ -25,13 +24,11 @@ class ArrowOutputStream {
   std::string arrow_file_path_ = "";
 	std::unique_ptr<ApacheArrowWriter> writer_ = nullptr;
 	std::string arrow_output_type_ = "";
-  std::shared_ptr<arrow::Table> arrow_table_ = nullptr;
 
 public:
     std::tuple<bool, std::optional<std::string>> create_arrow_file(const Nyxus::SaveOption& arrow_file_type,
                                                          const std::string& arrow_file_path,
                                                          const std::vector<std::string>& header);
-    std::shared_ptr<arrow::Table> get_arrow_table(const std::string& file_path);
     std::string get_arrow_path();
     std::tuple<bool, std::optional<std::string>> write_arrow_file (const std::vector<std::tuple<std::vector<std::string>, int, std::vector<double>>>& features);
     std::tuple<bool, std::optional<std::string>> close_arrow_file ();
diff --git a/src/nyx/output_writers.cpp b/src/nyx/output_writers.cpp
index 085a3a63..9c3f4796 100644
--- a/src/nyx/output_writers.cpp
+++ b/src/nyx/output_writers.cpp
@@ -1,77 +1,11 @@
 #include "output_writers.h"
 
 #ifdef USE_ARROW
-
-#if __has_include(<filesystem>)
-  #include <filesystem>
-  namespace fs = std::filesystem;
-#elif __has_include(<experimental/filesystem>)
-  #include <experimental/filesystem> 
-  namespace fs = std::experimental::filesystem;
-#else
-  error "Missing the <filesystem> header."
-#endif
-
 #include <iostream>
 #include <parquet/arrow/reader.h>
 
 #include "helpers/helpers.h"
 
-std::shared_ptr<arrow::Table> ApacheArrowWriter::get_arrow_table(const std::string& file_path) {
-
-    if (table_ != nullptr) return table_;
-
-    auto file_extension = fs::path(file_path).extension().u8string();
-
-    if (file_extension == ".parquet") {
-        arrow::MemoryPool* pool = arrow::default_memory_pool();
-
-
-        std::shared_ptr<arrow::io::RandomAccessFile> input;
-
-        input = arrow::io::ReadableFile::Open(file_path).ValueOrDie();
-        
-        std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
-
-        auto status = parquet::arrow::OpenFile(input, pool, &arrow_reader);
-
-        if (!status.ok()) {
-            // Handle read error
-            std::cerr << "Error creating arrow table: " << status.ToString();
-            return nullptr;
-        }
-
-        // Read entire file as a single Arrow table
-        std::shared_ptr<arrow::Table> table;
-
-        status = arrow_reader->ReadTable(&table);
-
-        if (!status.ok()) {
-            // Handle read error
-            std::cerr << "Error creating arrow table: " << status.ToString();
-            return nullptr;
-        }
-
-        return table;
-
-    } else if (file_extension == ".arrow") {
-
-        // Create a memory-mapped file for reading.
-        std::shared_ptr<arrow::io::ReadableFile> input;
-        input = arrow::io::ReadableFile::Open(file_path).ValueOrDie();
-
-        // Create an IPC reader.
-        auto ipc_reader = (arrow::ipc::RecordBatchStreamReader::Open(input.get())).ValueOrDie();
-
-        this->table_ = ipc_reader->ToTable().ValueOrDie();
-
-        return table_;
-
-    } else {
-        throw std::invalid_argument("Error: file must either be an Arrow or Parquet file.");
-    }
-
-}
 
 arrow::Status ParquetWriter::setup(const std::vector<std::string> &header) {
 
diff --git a/src/nyx/output_writers.h b/src/nyx/output_writers.h
index c40a8207..5be6494e 100644
--- a/src/nyx/output_writers.h
+++ b/src/nyx/output_writers.h
@@ -27,14 +27,6 @@ class ApacheArrowWriter
     std::shared_ptr<arrow::Table> table_ = nullptr;
 
 public:
-
-    /**
-     * @brief Get the arrow table object
-     * 
-     * @return std::shared_ptr<arrow::Table> 
-     */
-    std::shared_ptr<arrow::Table> get_arrow_table(const std::string& file_path);
-
     /**
      * @brief Write Nyxus data to Arrow file
      * 

From c0f6d51bfe3152ed96f00f59f531de8aca1dad8a Mon Sep 17 00:00:00 2001
From: sameeul <sameeul@gmail.com>
Date: Thu, 9 Nov 2023 08:21:20 -0500
Subject: [PATCH 2/3] fix fs namespace

---
 src/nyx/output_writers.cpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/nyx/output_writers.cpp b/src/nyx/output_writers.cpp
index 9c3f4796..bda0b9f3 100644
--- a/src/nyx/output_writers.cpp
+++ b/src/nyx/output_writers.cpp
@@ -1,6 +1,17 @@
 #include "output_writers.h"
 
 #ifdef USE_ARROW
+
+#if __has_include(<filesystem>)
+  #include <filesystem>
+  namespace fs = std::filesystem;
+#elif __has_include(<experimental/filesystem>)
+  #include <experimental/filesystem> 
+  namespace fs = std::experimental::filesystem;
+#else
+  error "Missing the <filesystem> header."
+#endif
+
 #include <iostream>
 #include <parquet/arrow/reader.h>
 
@@ -348,7 +359,7 @@ std::tuple<std::unique_ptr<ApacheArrowWriter>, std::optional<std::string>> Write
 
     } else {
 
-        std::filesystem::path path(output_file);
+        fs::path path(output_file);
 
         auto error_msg = [&path](){        
             if (path.has_extension()) 

From 14ac445883e0fb71862034c1749c077b23a83f2c Mon Sep 17 00:00:00 2001
From: sameeul <sameeul@gmail.com>
Date: Mon, 13 Nov 2023 09:13:40 -0500
Subject: [PATCH 3/3] Update workflows

---
 .github/workflows/build_wheels.yml |  9 ++++-----
 .github/workflows/publish_pypi.yml | 19 ++++++++++++-------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
index 599964ee..d84a52ef 100644
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@@ -12,7 +12,7 @@ jobs:
       MACOSX_DEPLOYMENT_TARGET: "10.15"
     strategy:
       matrix:
-        os: [ubuntu-20.04, macos-11, windows-latest]
+        os: [ubuntu-20.04, macos-12, windows-latest]
         cibw_archs: ["auto64"]
         cibw_build: ["cp38-*", "cp39-*", "cp310-*", "cp311-*"]
 
@@ -28,11 +28,11 @@ jobs:
       - uses: actions/setup-python@v4
         name: Install Python
         with:
-          python-version: '3.8'
+          python-version: '3.9'
  
       - name: Install cibuildwheel
         run: |
-          python -m pip install cibuildwheel==2.12.1 delvewheel wheel
+          python -m pip install cibuildwheel==2.16.2 delvewheel wheel
 
       - name: Building wheels 
         run: |
@@ -42,8 +42,7 @@ jobs:
           CIBW_SKIP: "*musllinux*"
           CIBW_BUILD_VERBOSITY: 3
           CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
-          CIBW_BEFORE_ALL_MACOS:  brew install llvm libomp && 
-                                   python3 -m pip install setuptools==68.2.2 &&
+          CIBW_BEFORE_ALL_MACOS:  brew install llvm && 
                                    bash ci-utils/install_prereq_linux.sh --build_arrow yes &&
                                    mkdir -p /tmp/nyxus_bld &&
                                    cp -r local_install /tmp/nyxus_bld 
diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml
index 5066bcb6..aeda0031 100644
--- a/.github/workflows/publish_pypi.yml
+++ b/.github/workflows/publish_pypi.yml
@@ -14,7 +14,7 @@ jobs:
       MACOSX_DEPLOYMENT_TARGET: "10.15"
     strategy:
       matrix:
-        os: [ubuntu-20.04, macos-11, windows-latest]
+        os: [ubuntu-20.04, macos-12, windows-latest]
         cibw_archs: ["auto64"]
         cibw_build: ["cp38-*", "cp39-*", "cp310-*", "cp311-*"]
 
@@ -30,11 +30,11 @@ jobs:
       - uses: actions/setup-python@v4
         name: Install Python
         with:
-          python-version: '3.8'
+          python-version: '3.9'
 
       - name: Install cibuildwheel
         run: |
-          python -m pip install cibuildwheel==2.12.1 delvewheel wheel
+          python -m pip install cibuildwheel==2.16.2 delvewheel wheel
 
       - name: Building wheels 
         run: |
@@ -42,12 +42,15 @@ jobs:
         env:
           CIBW_BUILD: ${{ matrix.cibw_build }}
           CIBW_SKIP: "*musllinux*"
+          CIBW_BUILD_VERBOSITY: 3
           CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
-          CIBW_BEFORE_ALL_MACOS: brew install llvm libomp &&
-                                   bash ci-utils/install_prereq_linux.sh &&
+          CIBW_BEFORE_ALL_MACOS: brew install llvm &&
+                                   bash ci-utils/install_prereq_linux.sh --build_arrow yes &&
                                    mkdir -p /tmp/nyxus_bld &&
                                    cp -r local_install /tmp/nyxus_bld
-          CIBW_BEFORE_ALL_LINUX: bash ci-utils/install_prereq_linux.sh &&
+          CIBW_BEFORE_ALL_LINUX:  yum install -y llvm libevent-devel openssl-devel && 
+                                   bash ci-utils/install_arrow_yum.sh &&
+                                   bash ci-utils/install_prereq_linux.sh --build_arrow no &&
                                    mkdir -p /tmp/nyxus_bld &&
                                    cp -r local_install /tmp/nyxus_bld
           CIBW_BEFORE_ALL_WINDOWS: ci-utils\install_prereq_win.bat &&
@@ -59,7 +62,9 @@ jobs:
           CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair -w {dest_dir} {wheel}"
           CIBW_ARCHS: ${{ matrix.cibw_archs }}
           CIBW_TEST_REQUIRES: numpy pandas pytest requests
-          CIBW_TEST_COMMAND: pytest {project}/tests/python -m "not arrow"
+          CIBW_TEST_COMMAND_MACOS: pytest {project}/tests/python
+          CIBW_TEST_COMMAND_LINUX: pytest {project}/tests/python
+          CIBW_TEST_COMMAND_WINDOWS: pytest {project}/tests/python -m "not arrow"
 
       - name: Install Dependencies
         run: python -m pip install --upgrade twine requests