From 85e38719ad4a56b2c9607e43a061f82b95ee25fe Mon Sep 17 00:00:00 2001 From: sameeul <sameeul@gmail.com> Date: Thu, 9 Nov 2023 07:51:37 -0500 Subject: [PATCH 1/3] Minor follow-up of PR#164 --- CMakeLists.txt | 8 +--- ci-utils/install_prereq_linux.sh | 16 +++++--- src/nyx/arrow_output_stream.cpp | 9 ----- src/nyx/arrow_output_stream.h | 3 -- src/nyx/output_writers.cpp | 66 -------------------------------- src/nyx/output_writers.h | 8 ---- 6 files changed, 11 insertions(+), 99 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 906a90f6..c41856d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -341,7 +341,6 @@ if(USEGPU) endif() if(USE_ARROW) - option(PARQUET_LINK_SHARED "Link to the Parquet shared library" ON) # Look for installed packages the system find_package(Arrow) if (NOT Arrow_FOUND) @@ -360,12 +359,7 @@ endif() if(USE_ARROW) add_definitions(-DUSE_ARROW) list(APPEND Nyxus_LIBRARIES arrow_shared) - - if(PARQUET_LINK_SHARED) - list(APPEND Nyxus_LIBRARIES parquet_shared) - else() - list(APPEND Nyxus_LIBRARIES parquet_static) - endif() + list(APPEND Nyxus_LIBRARIES parquet_shared) endif() if(BUILD_LIB) diff --git a/ci-utils/install_prereq_linux.sh b/ci-utils/install_prereq_linux.sh index cc16d751..20cc0044 100755 --- a/ci-utils/install_prereq_linux.sh +++ b/ci-utils/install_prereq_linux.sh @@ -10,7 +10,8 @@ BUILD_Z5_DEP=1 BULD_DCMTK_DEP=1 -BUILD_ARROW=0 +BUILD_ARROW_DEP=0 +BUILD_BOOST_DEP=1 while [ $# -gt 0 ]; do if [[ $1 == "--"* ]]; then @@ -24,14 +25,15 @@ done if [[ "${min_build,,}" == "yes" ]]; then BUILD_Z5_DEP=0 BULD_DCMTK_DEP=0 - BUILD_ARROW=0 + BUILD_ARROW_DEP=0 + BUILD_BOOST_DEP=0 fi if [[ "${build_arrow}" == "yes" ]]; then - BUILD_ARROW=1 + BUILD_ARROW_DEP=1 + BUILD_BOOST_DEP=1 fi -echo build arrow $BUILD_ARROW if [[ -z $install_dir ]] then @@ -64,7 +66,7 @@ cmake --build . cmake --build . --target install cd ../../ -if [[ $BUILD_Z5_DEP -eq 1 ]]; then +if [[ $BUILD_BOOST_DEP -eq 1 ]]; then for i in {1..5} do curl -L https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.tar.bz2 -o boost_1_79_0.tar.bz2 @@ -78,7 +80,9 @@ if [[ $BUILD_Z5_DEP -eq 1 ]]; then ./b2 headers cp -r boost ../"$LOCAL_INSTALL_DIR"/include cd ../ +fi +if [[ $BUILD_Z5_DEP -eq 1 ]]; then curl -L https://github.com/Blosc/c-blosc/archive/refs/tags/v1.21.5.zip -o v1.21.5.zip unzip v1.21.5.zip cd c-blosc-1.21.5 @@ -215,7 +219,7 @@ if [[ $BULD_DCMTK_DEP -eq 1 ]]; then cd ../../ fi -if [[ $BUILD_ARROW -eq 1 ]]; then +if [[ $BUILD_ARROW_DEP -eq 1 ]]; then curl -L https://github.com/apache/arrow/archive/refs/tags/apache-arrow-13.0.0.zip -o arrow-apache-arrow-13.0.0.zip unzip arrow-apache-arrow-13.0.0.zip diff --git a/src/nyx/arrow_output_stream.cpp b/src/nyx/arrow_output_stream.cpp index 0c7aadac..677781c2 100644 --- a/src/nyx/arrow_output_stream.cpp +++ b/src/nyx/arrow_output_stream.cpp @@ -54,15 +54,6 @@ std::tuple<bool, std::optional<std::string>> ArrowOutputStream::create_arrow_fil } -std::shared_ptr<arrow::Table> ArrowOutputStream::get_arrow_table(const std::string& file_path) { - - if (this->arrow_table_ != nullptr) return this->arrow_table_; - - this->arrow_table_ = writer_->get_arrow_table(file_path); - - return this->arrow_table_; -} - std::string ArrowOutputStream::get_arrow_path() { return arrow_file_path_; } diff --git a/src/nyx/arrow_output_stream.h b/src/nyx/arrow_output_stream.h index 3f003fe6..bacd9785 100644 --- a/src/nyx/arrow_output_stream.h +++ b/src/nyx/arrow_output_stream.h @@ -10,7 +10,6 @@ #include "save_option.h" #ifdef USE_ARROW -#include <arrow/table.h> /** * @brief Class to write to Apache Arrow formats @@ -25,13 +24,11 @@ class ArrowOutputStream { std::string arrow_file_path_ = ""; std::unique_ptr<ApacheArrowWriter> writer_ = nullptr; std::string arrow_output_type_ = ""; - std::shared_ptr<arrow::Table> arrow_table_ = nullptr; public: std::tuple<bool, std::optional<std::string>> create_arrow_file(const Nyxus::SaveOption& arrow_file_type, const std::string& arrow_file_path, const std::vector<std::string>& header); - std::shared_ptr<arrow::Table> get_arrow_table(const std::string& file_path); std::string get_arrow_path(); std::tuple<bool, std::optional<std::string>> write_arrow_file (const std::vector<std::tuple<std::vector<std::string>, int, std::vector<double>>>& features); std::tuple<bool, std::optional<std::string>> close_arrow_file (); diff --git a/src/nyx/output_writers.cpp b/src/nyx/output_writers.cpp index 085a3a63..9c3f4796 100644 --- a/src/nyx/output_writers.cpp +++ b/src/nyx/output_writers.cpp @@ -1,77 +1,11 @@ #include "output_writers.h" #ifdef USE_ARROW - -#if __has_include(<filesystem>) - #include <filesystem> - namespace fs = std::filesystem; -#elif __has_include(<experimental/filesystem>) - #include <experimental/filesystem> - namespace fs = std::experimental::filesystem; -#else - error "Missing the <filesystem> header." -#endif - #include <iostream> #include <parquet/arrow/reader.h> #include "helpers/helpers.h" -std::shared_ptr<arrow::Table> ApacheArrowWriter::get_arrow_table(const std::string& file_path) { - - if (table_ != nullptr) return table_; - - auto file_extension = fs::path(file_path).extension().u8string(); - - if (file_extension == ".parquet") { - arrow::MemoryPool* pool = arrow::default_memory_pool(); - - - std::shared_ptr<arrow::io::RandomAccessFile> input; - - input = arrow::io::ReadableFile::Open(file_path).ValueOrDie(); - - std::unique_ptr<parquet::arrow::FileReader> arrow_reader; - - auto status = parquet::arrow::OpenFile(input, pool, &arrow_reader); - - if (!status.ok()) { - // Handle read error - std::cerr << "Error creating arrow table: " << status.ToString(); - return nullptr; - } - - // Read entire file as a single Arrow table - std::shared_ptr<arrow::Table> table; - - status = arrow_reader->ReadTable(&table); - - if (!status.ok()) { - // Handle read error - std::cerr << "Error creating arrow table: " << status.ToString(); - return nullptr; - } - - return table; - - } else if (file_extension == ".arrow") { - - // Create a memory-mapped file for reading. - std::shared_ptr<arrow::io::ReadableFile> input; - input = arrow::io::ReadableFile::Open(file_path).ValueOrDie(); - - // Create an IPC reader. - auto ipc_reader = (arrow::ipc::RecordBatchStreamReader::Open(input.get())).ValueOrDie(); - - this->table_ = ipc_reader->ToTable().ValueOrDie(); - - return table_; - - } else { - throw std::invalid_argument("Error: file must either be an Arrow or Parquet file."); - } - -} arrow::Status ParquetWriter::setup(const std::vector<std::string> &header) { diff --git a/src/nyx/output_writers.h b/src/nyx/output_writers.h index c40a8207..5be6494e 100644 --- a/src/nyx/output_writers.h +++ b/src/nyx/output_writers.h @@ -27,14 +27,6 @@ class ApacheArrowWriter std::shared_ptr<arrow::Table> table_ = nullptr; public: - - /** - * @brief Get the arrow table object - * - * @return std::shared_ptr<arrow::Table> - */ - std::shared_ptr<arrow::Table> get_arrow_table(const std::string& file_path); - /** * @brief Write Nyxus data to Arrow file * From c0f6d51bfe3152ed96f00f59f531de8aca1dad8a Mon Sep 17 00:00:00 2001 From: sameeul <sameeul@gmail.com> Date: Thu, 9 Nov 2023 08:21:20 -0500 Subject: [PATCH 2/3] fix fs namespace --- src/nyx/output_writers.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/nyx/output_writers.cpp b/src/nyx/output_writers.cpp index 9c3f4796..bda0b9f3 100644 --- a/src/nyx/output_writers.cpp +++ b/src/nyx/output_writers.cpp @@ -1,6 +1,17 @@ #include "output_writers.h" #ifdef USE_ARROW + +#if __has_include(<filesystem>) + #include <filesystem> + namespace fs = std::filesystem; +#elif __has_include(<experimental/filesystem>) + #include <experimental/filesystem> + namespace fs = std::experimental::filesystem; +#else + error "Missing the <filesystem> header." +#endif + #include <iostream> #include <parquet/arrow/reader.h> @@ -348,7 +359,7 @@ std::tuple<std::unique_ptr<ApacheArrowWriter>, std::optional<std::string>> Write } else { - std::filesystem::path path(output_file); + fs::path path(output_file); auto error_msg = [&path](){ if (path.has_extension()) From 14ac445883e0fb71862034c1749c077b23a83f2c Mon Sep 17 00:00:00 2001 From: sameeul <sameeul@gmail.com> Date: Mon, 13 Nov 2023 09:13:40 -0500 Subject: [PATCH 3/3] Update workflows --- .github/workflows/build_wheels.yml | 9 ++++----- .github/workflows/publish_pypi.yml | 19 ++++++++++++------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 599964ee..d84a52ef 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -12,7 +12,7 @@ jobs: MACOSX_DEPLOYMENT_TARGET: "10.15" strategy: matrix: - os: [ubuntu-20.04, macos-11, windows-latest] + os: [ubuntu-20.04, macos-12, windows-latest] cibw_archs: ["auto64"] cibw_build: ["cp38-*", "cp39-*", "cp310-*", "cp311-*"] @@ -28,11 +28,11 @@ jobs: - uses: actions/setup-python@v4 name: Install Python with: - python-version: '3.8' + python-version: '3.9' - name: Install cibuildwheel run: | - python -m pip install cibuildwheel==2.12.1 delvewheel wheel + python -m pip install cibuildwheel==2.16.2 delvewheel wheel - name: Building wheels run: | @@ -42,8 +42,7 @@ jobs: CIBW_SKIP: "*musllinux*" CIBW_BUILD_VERBOSITY: 3 CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 - CIBW_BEFORE_ALL_MACOS: brew install llvm libomp && - python3 -m pip install setuptools==68.2.2 && + CIBW_BEFORE_ALL_MACOS: brew install llvm && bash ci-utils/install_prereq_linux.sh --build_arrow yes && mkdir -p /tmp/nyxus_bld && cp -r local_install /tmp/nyxus_bld diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml index 5066bcb6..aeda0031 100644 --- a/.github/workflows/publish_pypi.yml +++ b/.github/workflows/publish_pypi.yml @@ -14,7 +14,7 @@ jobs: MACOSX_DEPLOYMENT_TARGET: "10.15" strategy: matrix: - os: [ubuntu-20.04, macos-11, windows-latest] + os: [ubuntu-20.04, macos-12, windows-latest] cibw_archs: ["auto64"] cibw_build: ["cp38-*", "cp39-*", "cp310-*", "cp311-*"] @@ -30,11 +30,11 @@ jobs: - uses: actions/setup-python@v4 name: Install Python with: - python-version: '3.8' + python-version: '3.9' - name: Install cibuildwheel run: | - python -m pip install cibuildwheel==2.12.1 delvewheel wheel + python -m pip install cibuildwheel==2.16.2 delvewheel wheel - name: Building wheels run: | @@ -42,12 +42,15 @@ jobs: env: CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_SKIP: "*musllinux*" + CIBW_BUILD_VERBOSITY: 3 CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 - CIBW_BEFORE_ALL_MACOS: brew install llvm libomp && - bash ci-utils/install_prereq_linux.sh && + CIBW_BEFORE_ALL_MACOS: brew install llvm && + bash ci-utils/install_prereq_linux.sh --build_arrow yes && mkdir -p /tmp/nyxus_bld && cp -r local_install /tmp/nyxus_bld - CIBW_BEFORE_ALL_LINUX: bash ci-utils/install_prereq_linux.sh && + CIBW_BEFORE_ALL_LINUX: yum install -y llvm libevent-devel openssl-devel && + bash ci-utils/install_arrow_yum.sh && + bash ci-utils/install_prereq_linux.sh --build_arrow no && mkdir -p /tmp/nyxus_bld && cp -r local_install /tmp/nyxus_bld CIBW_BEFORE_ALL_WINDOWS: ci-utils\install_prereq_win.bat && @@ -59,7 +62,9 @@ jobs: CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair -w {dest_dir} {wheel}" CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_TEST_REQUIRES: numpy pandas pytest requests - CIBW_TEST_COMMAND: pytest {project}/tests/python -m "not arrow" + CIBW_TEST_COMMAND_MACOS: pytest {project}/tests/python + CIBW_TEST_COMMAND_LINUX: pytest {project}/tests/python + CIBW_TEST_COMMAND_WINDOWS: pytest {project}/tests/python -m "not arrow" - name: Install Dependencies run: python -m pip install --upgrade twine requests