Skip to content

Commit

Permalink
Merge pull request #166 from sameeul/rev_pr_164
Browse files Browse the repository at this point in the history
Minor follow-up of PR#164
  • Loading branch information
sameeul authored Nov 13, 2023
2 parents ce115a6 + 14ac445 commit cebeb2e
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 101 deletions.
9 changes: 4 additions & 5 deletions .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
MACOSX_DEPLOYMENT_TARGET: "10.15"
strategy:
matrix:
os: [ubuntu-20.04, macos-11, windows-latest]
os: [ubuntu-20.04, macos-12, windows-latest]
cibw_archs: ["auto64"]
cibw_build: ["cp38-*", "cp39-*", "cp310-*", "cp311-*"]

Expand All @@ -28,11 +28,11 @@ jobs:
- uses: actions/setup-python@v4
name: Install Python
with:
python-version: '3.8'
python-version: '3.9'

- name: Install cibuildwheel
run: |
python -m pip install cibuildwheel==2.12.1 delvewheel wheel
python -m pip install cibuildwheel==2.16.2 delvewheel wheel
- name: Building wheels
run: |
Expand All @@ -42,8 +42,7 @@ jobs:
CIBW_SKIP: "*musllinux*"
CIBW_BUILD_VERBOSITY: 3
CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
CIBW_BEFORE_ALL_MACOS: brew install llvm libomp &&
python3 -m pip install setuptools==68.2.2 &&
CIBW_BEFORE_ALL_MACOS: brew install llvm &&
bash ci-utils/install_prereq_linux.sh --build_arrow yes &&
mkdir -p /tmp/nyxus_bld &&
cp -r local_install /tmp/nyxus_bld
Expand Down
19 changes: 12 additions & 7 deletions .github/workflows/publish_pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
MACOSX_DEPLOYMENT_TARGET: "10.15"
strategy:
matrix:
os: [ubuntu-20.04, macos-11, windows-latest]
os: [ubuntu-20.04, macos-12, windows-latest]
cibw_archs: ["auto64"]
cibw_build: ["cp38-*", "cp39-*", "cp310-*", "cp311-*"]

Expand All @@ -30,24 +30,27 @@ jobs:
- uses: actions/setup-python@v4
name: Install Python
with:
python-version: '3.8'
python-version: '3.9'

- name: Install cibuildwheel
run: |
python -m pip install cibuildwheel==2.12.1 delvewheel wheel
python -m pip install cibuildwheel==2.16.2 delvewheel wheel
- name: Building wheels
run: |
python -m cibuildwheel --output-dir dist
env:
CIBW_BUILD: ${{ matrix.cibw_build }}
CIBW_SKIP: "*musllinux*"
CIBW_BUILD_VERBOSITY: 3
CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
CIBW_BEFORE_ALL_MACOS: brew install llvm libomp &&
bash ci-utils/install_prereq_linux.sh &&
CIBW_BEFORE_ALL_MACOS: brew install llvm &&
bash ci-utils/install_prereq_linux.sh --build_arrow yes &&
mkdir -p /tmp/nyxus_bld &&
cp -r local_install /tmp/nyxus_bld
CIBW_BEFORE_ALL_LINUX: bash ci-utils/install_prereq_linux.sh &&
CIBW_BEFORE_ALL_LINUX: yum install -y llvm libevent-devel openssl-devel &&
bash ci-utils/install_arrow_yum.sh &&
bash ci-utils/install_prereq_linux.sh --build_arrow no &&
mkdir -p /tmp/nyxus_bld &&
cp -r local_install /tmp/nyxus_bld
CIBW_BEFORE_ALL_WINDOWS: ci-utils\install_prereq_win.bat &&
Expand All @@ -59,7 +62,9 @@ jobs:
CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair -w {dest_dir} {wheel}"
CIBW_ARCHS: ${{ matrix.cibw_archs }}
CIBW_TEST_REQUIRES: numpy pandas pytest requests
CIBW_TEST_COMMAND: pytest {project}/tests/python -m "not arrow"
CIBW_TEST_COMMAND_MACOS: pytest {project}/tests/python
CIBW_TEST_COMMAND_LINUX: pytest {project}/tests/python
CIBW_TEST_COMMAND_WINDOWS: pytest {project}/tests/python -m "not arrow"

- name: Install Dependencies
run: python -m pip install --upgrade twine requests
Expand Down
8 changes: 1 addition & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,6 @@ if(USEGPU)
endif()

if(USE_ARROW)
option(PARQUET_LINK_SHARED "Link to the Parquet shared library" ON)
# Look for installed packages the system
find_package(Arrow)
if (NOT Arrow_FOUND)
Expand All @@ -360,12 +359,7 @@ endif()
if(USE_ARROW)
add_definitions(-DUSE_ARROW)
list(APPEND Nyxus_LIBRARIES arrow_shared)

if(PARQUET_LINK_SHARED)
list(APPEND Nyxus_LIBRARIES parquet_shared)
else()
list(APPEND Nyxus_LIBRARIES parquet_static)
endif()
list(APPEND Nyxus_LIBRARIES parquet_shared)
endif()

if(BUILD_LIB)
Expand Down
16 changes: 10 additions & 6 deletions ci-utils/install_prereq_linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@

BUILD_Z5_DEP=1
BULD_DCMTK_DEP=1
BUILD_ARROW=0
BUILD_ARROW_DEP=0
BUILD_BOOST_DEP=1

while [ $# -gt 0 ]; do
if [[ $1 == "--"* ]]; then
Expand All @@ -24,14 +25,15 @@ done
if [[ "${min_build,,}" == "yes" ]]; then
BUILD_Z5_DEP=0
BULD_DCMTK_DEP=0
BUILD_ARROW=0
BUILD_ARROW_DEP=0
BUILD_BOOST_DEP=0
fi

if [[ "${build_arrow}" == "yes" ]]; then
BUILD_ARROW=1
BUILD_ARROW_DEP=1
BUILD_BOOST_DEP=1
fi

echo build arrow $BUILD_ARROW

if [[ -z $install_dir ]]
then
Expand Down Expand Up @@ -64,7 +66,7 @@ cmake --build .
cmake --build . --target install
cd ../../

if [[ $BUILD_Z5_DEP -eq 1 ]]; then
if [[ $BUILD_BOOST_DEP -eq 1 ]]; then
for i in {1..5}
do
curl -L https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.tar.bz2 -o boost_1_79_0.tar.bz2
Expand All @@ -78,7 +80,9 @@ if [[ $BUILD_Z5_DEP -eq 1 ]]; then
./b2 headers
cp -r boost ../"$LOCAL_INSTALL_DIR"/include
cd ../
fi

if [[ $BUILD_Z5_DEP -eq 1 ]]; then
curl -L https://github.com/Blosc/c-blosc/archive/refs/tags/v1.21.5.zip -o v1.21.5.zip
unzip v1.21.5.zip
cd c-blosc-1.21.5
Expand Down Expand Up @@ -215,7 +219,7 @@ if [[ $BULD_DCMTK_DEP -eq 1 ]]; then
cd ../../
fi

if [[ $BUILD_ARROW -eq 1 ]]; then
if [[ $BUILD_ARROW_DEP -eq 1 ]]; then

curl -L https://github.com/apache/arrow/archive/refs/tags/apache-arrow-13.0.0.zip -o arrow-apache-arrow-13.0.0.zip
unzip arrow-apache-arrow-13.0.0.zip
Expand Down
9 changes: 0 additions & 9 deletions src/nyx/arrow_output_stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,6 @@ std::tuple<bool, std::optional<std::string>> ArrowOutputStream::create_arrow_fil
}


std::shared_ptr<arrow::Table> ArrowOutputStream::get_arrow_table(const std::string& file_path) {

if (this->arrow_table_ != nullptr) return this->arrow_table_;

this->arrow_table_ = writer_->get_arrow_table(file_path);

return this->arrow_table_;
}

std::string ArrowOutputStream::get_arrow_path() {
return arrow_file_path_;
}
Expand Down
3 changes: 0 additions & 3 deletions src/nyx/arrow_output_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "save_option.h"

#ifdef USE_ARROW
#include <arrow/table.h>

/**
* @brief Class to write to Apache Arrow formats
Expand All @@ -25,13 +24,11 @@ class ArrowOutputStream {
std::string arrow_file_path_ = "";
std::unique_ptr<ApacheArrowWriter> writer_ = nullptr;
std::string arrow_output_type_ = "";
std::shared_ptr<arrow::Table> arrow_table_ = nullptr;

public:
std::tuple<bool, std::optional<std::string>> create_arrow_file(const Nyxus::SaveOption& arrow_file_type,
const std::string& arrow_file_path,
const std::vector<std::string>& header);
std::shared_ptr<arrow::Table> get_arrow_table(const std::string& file_path);
std::string get_arrow_path();
std::tuple<bool, std::optional<std::string>> write_arrow_file (const std::vector<std::tuple<std::vector<std::string>, int, std::vector<double>>>& features);
std::tuple<bool, std::optional<std::string>> close_arrow_file ();
Expand Down
57 changes: 1 addition & 56 deletions src/nyx/output_writers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,61 +17,6 @@

#include "helpers/helpers.h"

std::shared_ptr<arrow::Table> ApacheArrowWriter::get_arrow_table(const std::string& file_path) {

if (table_ != nullptr) return table_;

auto file_extension = fs::path(file_path).extension().u8string();

if (file_extension == ".parquet") {
arrow::MemoryPool* pool = arrow::default_memory_pool();


std::shared_ptr<arrow::io::RandomAccessFile> input;

input = arrow::io::ReadableFile::Open(file_path).ValueOrDie();

std::unique_ptr<parquet::arrow::FileReader> arrow_reader;

auto status = parquet::arrow::OpenFile(input, pool, &arrow_reader);

if (!status.ok()) {
// Handle read error
std::cerr << "Error creating arrow table: " << status.ToString();
return nullptr;
}

// Read entire file as a single Arrow table
std::shared_ptr<arrow::Table> table;

status = arrow_reader->ReadTable(&table);

if (!status.ok()) {
// Handle read error
std::cerr << "Error creating arrow table: " << status.ToString();
return nullptr;
}

return table;

} else if (file_extension == ".arrow") {

// Create a memory-mapped file for reading.
std::shared_ptr<arrow::io::ReadableFile> input;
input = arrow::io::ReadableFile::Open(file_path).ValueOrDie();

// Create an IPC reader.
auto ipc_reader = (arrow::ipc::RecordBatchStreamReader::Open(input.get())).ValueOrDie();

this->table_ = ipc_reader->ToTable().ValueOrDie();

return table_;

} else {
throw std::invalid_argument("Error: file must either be an Arrow or Parquet file.");
}

}

arrow::Status ParquetWriter::setup(const std::vector<std::string> &header) {

Expand Down Expand Up @@ -414,7 +359,7 @@ std::tuple<std::unique_ptr<ApacheArrowWriter>, std::optional<std::string>> Write

} else {

std::filesystem::path path(output_file);
fs::path path(output_file);

auto error_msg = [&path](){
if (path.has_extension())
Expand Down
8 changes: 0 additions & 8 deletions src/nyx/output_writers.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,6 @@ class ApacheArrowWriter
std::shared_ptr<arrow::Table> table_ = nullptr;

public:

/**
* @brief Get the arrow table object
*
* @return std::shared_ptr<arrow::Table>
*/
std::shared_ptr<arrow::Table> get_arrow_table(const std::string& file_path);

/**
* @brief Write Nyxus data to Arrow file
*
Expand Down

0 comments on commit cebeb2e

Please sign in to comment.