From c94ee69fc8858d7d4ffb4b2e46674e26fd2ebac7 Mon Sep 17 00:00:00 2001 From: sameeul Date: Fri, 27 Oct 2023 07:50:05 -0400 Subject: [PATCH 01/12] update 1 --- src/nyx/scan_fastloader_way.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/nyx/scan_fastloader_way.cpp b/src/nyx/scan_fastloader_way.cpp index 8fdcc9a3..09aa0b81 100644 --- a/src/nyx/scan_fastloader_way.cpp +++ b/src/nyx/scan_fastloader_way.cpp @@ -26,11 +26,6 @@ namespace py = pybind11; #include "globals.h" #include "helpers/timing.h" -#ifdef USE_ARROW -#include "arrow_output_stream.h" -#include "output_writers.h" -#endif - // Sanity #ifdef _WIN32 #include From bcda806b22a95067891a95ab42079c989cb0cb26 Mon Sep 17 00:00:00 2001 From: sameeul Date: Fri, 27 Oct 2023 08:10:38 -0400 Subject: [PATCH 02/12] update 2 --- src/nyx/arrow_output_stream.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/nyx/arrow_output_stream.cpp b/src/nyx/arrow_output_stream.cpp index 0d27c508..43c40535 100644 --- a/src/nyx/arrow_output_stream.cpp +++ b/src/nyx/arrow_output_stream.cpp @@ -18,13 +18,12 @@ std::shared_ptr ArrowOutputStream::create_arrow_file(const Ny if (arrow_file_path == "") { arrow_file_path_ = "NyxusFeatures" + extension; + } else if (fs::is_directory(arrow_file_path)) { + arrow_file_path_ = arrow_file_path + "/NyxusFeatures" + extension; } else { arrow_file_path_ = arrow_file_path; } - if (fs::is_directory(arrow_file_path)) { - arrow_file_path_ += "/NyxusFeatures" + extension; - } writer_ = WriterFactory::create_writer(arrow_file_path_, header); From 814ac213e680415e9e3e162cc17e863563c5e481 Mon Sep 17 00:00:00 2001 From: sameeul Date: Fri, 27 Oct 2023 08:30:08 -0400 Subject: [PATCH 03/12] update 3 --- src/nyx/arrow_output_stream.h | 7 ------- src/nyx/output_writers.h | 3 --- 2 files changed, 10 deletions(-) diff --git a/src/nyx/arrow_output_stream.h b/src/nyx/arrow_output_stream.h index 33ec7014..e582de09 100644 --- a/src/nyx/arrow_output_stream.h +++ b/src/nyx/arrow_output_stream.h @@ -58,13 +58,6 @@ namespace arrow { */ class ArrowOutputStream { -private: - - std::string arrow_file_path_ = ""; - std::shared_ptr writer_ = nullptr; - std::string arrow_output_type_ = ""; - std::shared_ptr arrow_table_ = nullptr; - public: std::shared_ptr create_arrow_file(const Nyxus::SaveOption& arrow_file_type, const std::string& arrow_file_path, diff --git a/src/nyx/output_writers.h b/src/nyx/output_writers.h index 04fcdee2..4bb54f26 100644 --- a/src/nyx/output_writers.h +++ b/src/nyx/output_writers.h @@ -195,9 +195,6 @@ namespace arrow { class ApacheArrowWriter { -private: - std::shared_ptr table_ = nullptr; - public: /** From 77f08833b20f273de7b2cc6fe3c11eaf5fb4af5b Mon Sep 17 00:00:00 2001 From: sameeul Date: Fri, 27 Oct 2023 08:58:57 -0400 Subject: [PATCH 04/12] update 4 --- src/nyx/environment.cpp | 44 ++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/src/nyx/environment.cpp b/src/nyx/environment.cpp index 3323db7a..df7ca61f 100644 --- a/src/nyx/environment.cpp +++ b/src/nyx/environment.cpp @@ -838,16 +838,21 @@ bool Environment::parse_cmdline(int argc, char **argv) } //==== Output type -#ifdef USE_ARROW VERBOSLVL1(std::cout << "\n*-*-*-*- Using Apache output -*-*-*-*\n"); auto rawOutpTypeUC = Nyxus::toupper(rawOutpType); - if (rawOutpTypeUC != Nyxus::toupper(OT_SINGLECSV) && - rawOutpTypeUC != Nyxus::toupper(OT_SEPCSV) && - rawOutpTypeUC != Nyxus::toupper(OT_ARROWIPC) && - rawOutpTypeUC != Nyxus::toupper(OT_PARQUET)) + if (!((rawOutpTypeUC == Nyxus::toupper(OT_SINGLECSV)) || + (rawOutpTypeUC == Nyxus::toupper(OT_SEPCSV)) || + (rawOutpTypeUC == Nyxus::toupper(OT_ARROWIPC)) || + (rawOutpTypeUC == Nyxus::toupper(OT_PARQUET)) + )) { - std::cout << "Error: valid values of " << OUTPUTTYPE << " are " << OT_SEPCSV << ", " << OT_SINGLECSV << ", or" << OT_PARQUET << "." "\n"; + std::cout << "Error: valid values of " << OUTPUTTYPE << " are " << OT_SEPCSV << ", " + << OT_SINGLECSV <<", " + #ifdef USE_ARROW + << OT_ARROWIPC <<", or" << OT_PARQUET << + #endif + "." "\n"; return false; } @@ -861,31 +866,16 @@ bool Environment::parse_cmdline(int argc, char **argv) } }(); - if (saveOption == SaveOption::saveCSV) { - separateCsv = rawOutpTypeUC == Nyxus::toupper(OT_SEPCSV); - } - -#else // no Apache support - auto rawOutpTypeUC = Nyxus::toupper(rawOutpType); - if (rawOutpTypeUC != Nyxus::toupper(OT_SINGLECSV) && - rawOutpTypeUC != Nyxus::toupper(OT_SEPCSV)) - { - std::cout << "Error: valid values of " << OUTPUTTYPE << " are " << OT_SEPCSV << ", " << OT_SINGLECSV << "\n"; - - // Intercept an attempt of running Nyxus with Apache options - if (rawOutpTypeUC != Nyxus::toupper(OT_ARROWIPC) || - rawOutpTypeUC != Nyxus::toupper(OT_PARQUET)) - std::cout << "Error: Nyxus must be built with Apache Arrow enabled to use Arrow output types. Please rebuild with the flag USEARROW=ON." << std::endl; - +#ifndef USE_ARROW // no Apache support + if (saveOption == SaveOption::saveArrowIPC || saveOption == SaveOption::saveParquet) { + std::cout << "Error: Nyxus must be built with Apache Arrow enabled to use Arrow output types. Please rebuild with the flag USEARROW=ON." << std::endl; return false; } +#endif - separateCsv = rawOutpTypeUC == Nyxus::toupper(OT_SEPCSV); - - if (rawOutpTypeUC == Nyxus::toupper(OT_SINGLECSV) || rawOutpTypeUC == Nyxus::toupper(OT_SEPCSV)) { - saveOption = Nyxus::SaveOption::saveCSV; + if (saveOption == SaveOption::saveCSV) { + separateCsv = rawOutpTypeUC == Nyxus::toupper(OT_SEPCSV); } -#endif //==== Check numeric parameters if (!loader_threads.empty()) From 4c5c125341a91fac9e25f55a327057b8d5d87f0d Mon Sep 17 00:00:00 2001 From: sameeul Date: Fri, 27 Oct 2023 11:10:37 -0400 Subject: [PATCH 05/12] Update 5 --- src/nyx/arrow_output_stream.cpp | 66 +++++++++++++++++++++-------- src/nyx/arrow_output_stream.h | 12 ++++-- src/nyx/environment.h | 1 - src/nyx/globals.h | 4 +- src/nyx/output_writers.cpp | 26 ++++++------ src/nyx/output_writers.h | 6 ++- src/nyx/scan_fastloader_way.cpp | 73 ++++++++++++++++++--------------- 7 files changed, 118 insertions(+), 70 deletions(-) diff --git a/src/nyx/arrow_output_stream.cpp b/src/nyx/arrow_output_stream.cpp index 43c40535..1b9b454b 100644 --- a/src/nyx/arrow_output_stream.cpp +++ b/src/nyx/arrow_output_stream.cpp @@ -1,18 +1,11 @@ #include "arrow_output_stream.h" - #ifdef USE_ARROW -std::shared_ptr ArrowOutputStream::create_arrow_file(const Nyxus::SaveOption& arrow_file_type, +bool ArrowOutputStream::create_arrow_file(const Nyxus::SaveOption& arrow_file_type, const std::string& arrow_file_path, const std::vector& header) { - if(arrow_file_path != "" && !fs::is_directory(arrow_file_path) && !(Nyxus::ends_with_substr(arrow_file_path, ".arrow") || Nyxus::ends_with_substr(arrow_file_path, ".feather") || Nyxus::ends_with_substr(arrow_file_path, ".parquet"))) { - throw std::invalid_argument("The arrow file path must end in \".arrow\""); - } - if (arrow_file_type != Nyxus::SaveOption::saveArrowIPC && arrow_file_type != Nyxus::SaveOption::saveParquet) { - throw std::invalid_argument("The valid save options are Nyxus::SaveOption::saveArrowIPC or Nyxus::SaveOption::saveParquet."); - } std::string extension = (arrow_file_type == Nyxus::SaveOption::saveParquet) ? ".parquet" : ".arrow"; @@ -23,11 +16,15 @@ std::shared_ptr ArrowOutputStream::create_arrow_file(const Ny } else { arrow_file_path_ = arrow_file_path; } - - - writer_ = WriterFactory::create_writer(arrow_file_path_, header); - - return writer_; + + std::optional error_msg; + std::tie(writer_, error_msg) = WriterFactory::create_writer(arrow_file_path_, header); + if (writer_) { + return true; + } else { + std::cout << error_msg.value() << std::endl; + return false; + } } @@ -44,23 +41,48 @@ std::string ArrowOutputStream::get_arrow_path() { return arrow_file_path_; } +std::tuple> ArrowOutputStream::write_arrow_file (const std::vector, int, std::vector>>& features){ + if (writer_){ + auto status = writer_->write(features); + if (status.ok()) { + return std::make_tuple(true, std::nullopt); + } + else { + return std::make_tuple(false, status.ToString()); + } + } + return std::make_tuple(false, "Arrow Writer is not initialized."); +} +std::tuple> ArrowOutputStream::close_arrow_file (){ + if (writer_){ + auto status = writer_->close(); + if (status.ok()) { + return std::make_tuple(true, std::nullopt); + } + else { + return std::make_tuple(false, status.ToString()); + } + } + return std::make_tuple(false, "Arrow Writer is not initialized."); +} + #else -std::shared_ptr ArrowOutputStream::create_arrow_file(const Nyxus::SaveOption& arrow_file_type, +bool ArrowOutputStream::create_arrow_file(const Nyxus::SaveOption& arrow_file_type, const std::string& arrow_file_path, const std::vector& header) { std::cerr << "Apache Arrow functionality is not available. Please install Nyxus with Arrow enabled to use this functionality." << std::endl; - return nullptr; + return false; } -std::shared_ptr ArrowOutputStream::get_arrow_table(const std::string& file_path) { +bool ArrowOutputStream::get_arrow_table(const std::string& file_path) { std::cerr << "Apache Arrow functionality is not available. Please install Nyxus with Arrow enabled to use this functionality." << std::endl; - return nullptr; + return false; } std::string ArrowOutputStream::get_arrow_path() { @@ -70,4 +92,14 @@ std::string ArrowOutputStream::get_arrow_path() { return ""; } +std::tuple> ArrowOutputStream::write_arrow_file (const std::vector, int, std::vector>>& features){ + std::cerr << "Apache Arrow functionality is not available. Please install Nyxus with Arrow enabled to use this functionality." << std::endl; + return std::make_tuple(false, "Apache Arrow functionality is not available.") +} +std::tuple> ArrowOutputStream::close_arrow_file (){ + std::cerr << "Apache Arrow functionality is not available. Please install Nyxus with Arrow enabled to use this functionality." << std::endl; + return std::make_tuple(false, "Apache Arrow functionality is not available.") +} + + #endif \ No newline at end of file diff --git a/src/nyx/arrow_output_stream.h b/src/nyx/arrow_output_stream.h index e582de09..024b7358 100644 --- a/src/nyx/arrow_output_stream.h +++ b/src/nyx/arrow_output_stream.h @@ -1,7 +1,9 @@ #pragma once +#include #include #include +#include #include "output_writers.h" #include "helpers/helpers.h" @@ -31,16 +33,18 @@ class ArrowOutputStream { private: std::string arrow_file_path_ = ""; - std::shared_ptr writer_ = nullptr; + std::unique_ptr writer_ = nullptr; std::string arrow_output_type_ = ""; std::shared_ptr arrow_table_ = nullptr; public: - std::shared_ptr create_arrow_file(const Nyxus::SaveOption& arrow_file_type, + bool create_arrow_file(const Nyxus::SaveOption& arrow_file_type, const std::string& arrow_file_path, const std::vector& header); std::shared_ptr get_arrow_table(const std::string& file_path); std::string get_arrow_path(); + std::tuple> write_arrow_file (const std::vector, int, std::vector>>& features); + std::tuple> close_arrow_file (); }; #else @@ -59,11 +63,13 @@ namespace arrow { class ArrowOutputStream { public: - std::shared_ptr create_arrow_file(const Nyxus::SaveOption& arrow_file_type, + bool create_arrow_file(const Nyxus::SaveOption& arrow_file_type, const std::string& arrow_file_path, const std::vector& header); std::shared_ptr get_arrow_table(const std::string& file_path); std::string get_arrow_path(); + std::tuple> write_arrow_file (const std::vector, int, std::vector>>& features); + std::tuple> close_arrow_file (); }; diff --git a/src/nyx/environment.h b/src/nyx/environment.h index 024aa98f..09088c25 100644 --- a/src/nyx/environment.h +++ b/src/nyx/environment.h @@ -119,7 +119,6 @@ class Environment: public BasicEnvironment ArrowOutputStream arrow_stream; - std::shared_ptr arrow_writer = nullptr; std::string embedded_pixel_size = ""; diff --git a/src/nyx/globals.h b/src/nyx/globals.h index 63074ab3..573a208e 100644 --- a/src/nyx/globals.h +++ b/src/nyx/globals.h @@ -40,7 +40,7 @@ namespace Nyxus bool scanFilePairParallel(const std::string& intens_fpath, const std::string& label_fpath, int num_fastloader_threads, int num_sensemaker_threads, int filepair_index, int tot_num_filepairs); std::string getPureFname(const std::string& fpath); - int processDataset(const std::vector& intensFiles, const std::vector& labelFiles, int numFastloaderThreads, int numSensemakerThreads, int numReduceThreads, int min_online_roi_size, SaveOption saveOption, const std::string& outputDir); + int processDataset(const std::vector& intensFiles, const std::vector& labelFiles, int numFastloaderThreads, int numSensemakerThreads, int numReduceThreads, int min_online_roi_size, const SaveOption saveOption, const std::string& outputDir); bool gatherRoisMetrics(const std::string& intens_fpath, const std::string& label_fpath, int num_FL_threads); bool processTrivialRois (const std::vector& trivRoiLabels, const std::string& intens_fpath, const std::string& label_fpath, int num_FL_threads, size_t memory_limit); bool processNontrivialRois (const std::vector& nontrivRoiLabels, const std::string& intens_fpath, const std::string& label_fpath, int num_FL_threads); @@ -54,7 +54,7 @@ namespace Nyxus bool gatherRoisMetricsInMemory (const py::array_t& intens_image, const py::array_t& label_image, int start_idx); bool processIntSegImagePairInMemory (const std::string& intens_fpath, const std::string& label_fpath, int filepair_index, const std::string& intens_name, const std::string& seg_name); int processMontage(const py::array_t& intensFiles, const py::array_t& labelFiles, int numReduceThreads, const std::vector& intensity_names, - const std::vector& seg_names, std::string& error_message, SaveOption saveOption, const std::string& outputDir=""); + const std::vector& seg_names, std::string& error_message, const SaveOption saveOption, const std::string& outputDir=""); bool scanTrivialRois (const std::vector& batch_labels, const py::array_t& intens_images, const py::array_t& label_images, int start_idx); bool processTrivialRoisInMemory (const std::vector& trivRoiLabels, const py::array_t& intens_fpath, const py::array_t& label_fpath, int start_idx, size_t memory_limit); #endif diff --git a/src/nyx/output_writers.cpp b/src/nyx/output_writers.cpp index 6a850cd9..cd6cb3a2 100644 --- a/src/nyx/output_writers.cpp +++ b/src/nyx/output_writers.cpp @@ -1,5 +1,5 @@ #include "output_writers.h" - +#define USE_ARROW #ifdef USE_ARROW std::shared_ptr ApacheArrowWriter::get_arrow_table(const std::string& file_path) { @@ -386,30 +386,30 @@ arrow::Status ArrowIPCWriter::close () { } -std::shared_ptr WriterFactory::create_writer(const std::string &output_file, const std::vector &header) { +std::tuple, std::optional> WriterFactory::create_writer(const std::string &output_file, const std::vector &header) { if (Nyxus::ends_with_substr(output_file, ".parquet")) { - return std::make_shared(output_file, header); + return {std::make_unique(output_file, header), std::nullopt}; } else if (Nyxus::ends_with_substr(output_file, ".arrow") || Nyxus::ends_with_substr(output_file, ".feather")) { - return std::make_shared(output_file, header); + return {std::make_unique(output_file, header), std::nullopt}; } else { std::filesystem::path path(output_file); - if (path.has_extension()) { - std::string file_extension = path.extension().string(); - - throw std::invalid_argument("No writer option for extension \"" + file_extension + "\". Valid options are \".parquet\" or \".arrow\"."); - - } else { - - throw std::invalid_argument("No extension type was provided in the path. "); + auto error_msg = [&path](){ + if (path.has_extension()) + { + return "No writer option for extension \"" + path.extension().string() + "\". Valid options are \".parquet\" or \".arrow\"."; + } else { + return std::string{"No extension type was provided in the path."}; + } + }; - } + return {nullptr, error_msg()}; } } #else diff --git a/src/nyx/output_writers.h b/src/nyx/output_writers.h index 4bb54f26..0a2628c5 100644 --- a/src/nyx/output_writers.h +++ b/src/nyx/output_writers.h @@ -4,6 +4,8 @@ #include #include #include +#include +#include #ifdef USE_ARROW #include @@ -161,9 +163,9 @@ class WriterFactory { * @brief Create an ApacheArrowWriter based on the type of file passed. * * @param output_file Path to output file (.arrow or .parquet) - * @return std::shared_ptr + * @return std::unique_ptr */ - static std::shared_ptr create_writer(const std::string &output_file, const std::vector &header); + static std::tuple, std::optional> create_writer(const std::string &output_file, const std::vector &header); }; #else diff --git a/src/nyx/scan_fastloader_way.cpp b/src/nyx/scan_fastloader_way.cpp index 09aa0b81..e5694c8b 100644 --- a/src/nyx/scan_fastloader_way.cpp +++ b/src/nyx/scan_fastloader_way.cpp @@ -204,7 +204,7 @@ namespace Nyxus int numSensemakerThreads, int numReduceThreads, int min_online_roi_size, - SaveOption saveOption, + const SaveOption saveOption, const std::string& outputDir) { @@ -222,11 +222,20 @@ namespace Nyxus if (write_apache) { theEnvironment.arrow_stream = ArrowOutputStream(); + if( outputDir != "" && + !fs::is_directory(outputDir) && + !( + Nyxus::ends_with_substr(outputDir, ".arrow") || + Nyxus::ends_with_substr(outputDir, ".feather") || + Nyxus::ends_with_substr(outputDir, ".parquet") + ) + ) { std::cout <<"Acceptable arrow file extensions are \".arrow\", \".feather\", \".parquet\"'"; + return 1; + } - try { - theEnvironment.arrow_writer = theEnvironment.arrow_stream.create_arrow_file(saveOption, outputDir, Nyxus::get_header(theFeatureSet.getEnabledFeatures())); - } catch (const std::exception &err) { - std::cout << "Error creating Arrow file: " << err.what() << std::endl; + auto error = theEnvironment.arrow_stream.create_arrow_file(saveOption, outputDir, Nyxus::get_header(theFeatureSet.getEnabledFeatures())); + if (!error) { + std::cout << "Error creating Arrow file. \n" ; return 1; } } @@ -273,11 +282,10 @@ namespace Nyxus if (write_apache) { - auto status = theEnvironment.arrow_writer->write(Nyxus::get_feature_values()); + auto [status, msg] = theEnvironment.arrow_stream.write_arrow_file(Nyxus::get_feature_values()); - if (!status.ok()) { - // Handle read error - std::cout << "Error writing Arrow file: " << status.ToString() << std::endl; + if (!status) { + std::cout << "Error closing Arrow file: " << msg.value() << std::endl; return 2; } } else if (saveOption == SaveOption::saveCSV) { @@ -344,12 +352,10 @@ namespace Nyxus if (write_apache) { // close arrow file after use - auto status = theEnvironment.arrow_writer->close(); - - if (!status.ok()) { - // Handle read error - std::cout << "Error closing Arrow file: " << status.ToString() << std::endl; - return 2; + auto [status, msg] = theEnvironment.arrow_stream.close_arrow_file(); + if (!status) { + std::cout << "Error closing Arrow file: " << msg.value() << std::endl; + return 2; } } @@ -365,7 +371,7 @@ namespace Nyxus const std::vector& intensity_names, const std::vector& seg_names, std::string& error_message, - SaveOption saveOption, + const SaveOption saveOption, const std::string& outputDir) { bool write_apache = (saveOption == SaveOption::saveArrowIPC || saveOption == SaveOption::saveParquet); @@ -373,11 +379,20 @@ namespace Nyxus if (write_apache) { theEnvironment.arrow_stream = ArrowOutputStream(); + if( outputDir != "" && + !fs::is_directory(outputDir) && + !( + Nyxus::ends_with_substr(outputDir, ".arrow") || + Nyxus::ends_with_substr(outputDir, ".feather") || + Nyxus::ends_with_substr(outputDir, ".parquet") + ) + ) { std::cout <<"Acceptable arrow file extensions are \".arrow\", \".feather\", \".parquet\"'"; + return 1; + } - try { - theEnvironment.arrow_writer = theEnvironment.arrow_stream.create_arrow_file(saveOption, outputDir, Nyxus::get_header(theFeatureSet.getEnabledFeatures())); - } catch (const std::exception &err) { - error_message = err.what(); + auto error = theEnvironment.arrow_stream.create_arrow_file(saveOption, outputDir, Nyxus::get_header(theFeatureSet.getEnabledFeatures())); + if (!error) { + std::cout << "Error creating Arrow file. \n" ; return 1; } } @@ -408,14 +423,11 @@ namespace Nyxus if (write_apache) { - auto status = theEnvironment.arrow_writer->write(Nyxus::get_feature_values()); - - if (!status.ok()) { - // Handle read error - error_message = "Error writing Arrow file: " + status.ToString(); + auto [status, msg] = theEnvironment.arrow_stream.write_arrow_file(Nyxus::get_feature_values()); + if (!status) { + std::cout << "Error closing Arrow file: " << msg.value() << std::endl; return 2; } - } else { ok = save_features_2_buffer(theResultsCache); @@ -448,15 +460,12 @@ namespace Nyxus if (write_apache) { // close arrow file after use - auto status = theEnvironment.arrow_writer->close(); - - if (!status.ok()) { - // Handle read error - error_message = "Error closing Arrow file: " + status.ToString(); + auto [status, msg] = theEnvironment.arrow_stream.close_arrow_file(); + if (!status) { + std::cout << "Error closing Arrow file: " << msg.value() << std::endl; return 2; } } - return 0; // success } #endif From 4a6b000e111f0b3a9b6f44ce3d9143468747dcee Mon Sep 17 00:00:00 2001 From: sameeul Date: Fri, 27 Oct 2023 11:32:34 -0400 Subject: [PATCH 06/12] update 6 --- src/nyx/arrow_output_stream.cpp | 8 ++--- src/nyx/environment.h | 2 -- src/nyx/output_writers.cpp | 16 +--------- src/nyx/output_writers.h | 54 --------------------------------- 4 files changed, 5 insertions(+), 75 deletions(-) diff --git a/src/nyx/arrow_output_stream.cpp b/src/nyx/arrow_output_stream.cpp index 1b9b454b..4d987aec 100644 --- a/src/nyx/arrow_output_stream.cpp +++ b/src/nyx/arrow_output_stream.cpp @@ -78,11 +78,11 @@ bool ArrowOutputStream::create_arrow_file(const Nyxus::SaveOption& arrow_file_ty } -bool ArrowOutputStream::get_arrow_table(const std::string& file_path) { +std::shared_ptr ArrowOutputStream::get_arrow_table(const std::string& file_path) { std::cerr << "Apache Arrow functionality is not available. Please install Nyxus with Arrow enabled to use this functionality." << std::endl; - return false; + return nullptr; } std::string ArrowOutputStream::get_arrow_path() { @@ -94,11 +94,11 @@ std::string ArrowOutputStream::get_arrow_path() { std::tuple> ArrowOutputStream::write_arrow_file (const std::vector, int, std::vector>>& features){ std::cerr << "Apache Arrow functionality is not available. Please install Nyxus with Arrow enabled to use this functionality." << std::endl; - return std::make_tuple(false, "Apache Arrow functionality is not available.") + return {false, "Apache Arrow functionality is not available."}; } std::tuple> ArrowOutputStream::close_arrow_file (){ std::cerr << "Apache Arrow functionality is not available. Please install Nyxus with Arrow enabled to use this functionality." << std::endl; - return std::make_tuple(false, "Apache Arrow functionality is not available.") + return {false, "Apache Arrow functionality is not available."}; } diff --git a/src/nyx/environment.h b/src/nyx/environment.h index 09088c25..969d5b46 100644 --- a/src/nyx/environment.h +++ b/src/nyx/environment.h @@ -8,8 +8,6 @@ #include "cli_gabor_options.h" #include "cli_nested_roi_options.h" #include "save_option.h" - -#include "output_writers.h" #include "arrow_output_stream.h" diff --git a/src/nyx/output_writers.cpp b/src/nyx/output_writers.cpp index cd6cb3a2..343ecea6 100644 --- a/src/nyx/output_writers.cpp +++ b/src/nyx/output_writers.cpp @@ -1,5 +1,5 @@ #include "output_writers.h" -#define USE_ARROW + #ifdef USE_ARROW std::shared_ptr ApacheArrowWriter::get_arrow_table(const std::string& file_path) { @@ -412,18 +412,4 @@ std::tuple, std::optional> Write return {nullptr, error_msg()}; } } -#else - - std::shared_ptr ApacheArrowWriter::get_arrow_table(const std::string& file_path) { - return nullptr; - } - - arrow::Status ApacheArrowWriter::write (const std::vector, int, std::vector>>& features) { - return arrow::Status(); - } - - arrow::Status ApacheArrowWriter::close () { - return arrow::Status(); - } - #endif \ No newline at end of file diff --git a/src/nyx/output_writers.h b/src/nyx/output_writers.h index 0a2628c5..3fb9556c 100644 --- a/src/nyx/output_writers.h +++ b/src/nyx/output_writers.h @@ -167,58 +167,4 @@ class WriterFactory { */ static std::tuple, std::optional> create_writer(const std::string &output_file, const std::vector &header); }; - -#else - - -namespace arrow { - - using Table = bool; - - class Status { - - public: - - bool ok() {return false;} - - std::string ToString() {return "Apache Arrow support is not enabled. Please reinstall Nyxus with Arrow support enabled.";} - - }; - -}; - -/** - * @brief Base class for creating Apache Arrow output writers - * - * This class provides methods for the Arrow table used for writing to Arrow formats and - * provides virtual functions to overridden for writing to different formats - * - */ -class ApacheArrowWriter -{ - -public: - - /** - * @brief Get the arrow table object - * - * @return std::shared_ptr - */ - std::shared_ptr get_arrow_table(const std::string& file_path); - - /** - * @brief Write Nyxus data to Arrow file - * - * @param header Header data - * @param string_columns String data - * @param numeric_columns Numeric data - * @param number_of_rows Number of rows - * @return arrow::Status - */ - virtual arrow::Status write (const std::vector, int, std::vector>>& features); - - virtual arrow::Status close (); - -}; - #endif From 0033f4028b48208fe4f0e0ca0ec3d377dd5a7344 Mon Sep 17 00:00:00 2001 From: sameeul Date: Fri, 27 Oct 2023 11:50:22 -0400 Subject: [PATCH 07/12] update 7 --- src/nyx/arrow_output_stream.cpp | 15 --------------- src/nyx/arrow_output_stream.h | 7 ------- 2 files changed, 22 deletions(-) diff --git a/src/nyx/arrow_output_stream.cpp b/src/nyx/arrow_output_stream.cpp index 4d987aec..fa80d86f 100644 --- a/src/nyx/arrow_output_stream.cpp +++ b/src/nyx/arrow_output_stream.cpp @@ -77,21 +77,6 @@ bool ArrowOutputStream::create_arrow_file(const Nyxus::SaveOption& arrow_file_ty return false; } - -std::shared_ptr ArrowOutputStream::get_arrow_table(const std::string& file_path) { - - std::cerr << "Apache Arrow functionality is not available. Please install Nyxus with Arrow enabled to use this functionality." << std::endl; - - return nullptr; -} - -std::string ArrowOutputStream::get_arrow_path() { - - std::cerr << "Apache Arrow functionality is not available. Please install Nyxus with Arrow enabled to use this functionality." << std::endl; - - return ""; -} - std::tuple> ArrowOutputStream::write_arrow_file (const std::vector, int, std::vector>>& features){ std::cerr << "Apache Arrow functionality is not available. Please install Nyxus with Arrow enabled to use this functionality." << std::endl; return {false, "Apache Arrow functionality is not available."}; diff --git a/src/nyx/arrow_output_stream.h b/src/nyx/arrow_output_stream.h index 024b7358..2231e71f 100644 --- a/src/nyx/arrow_output_stream.h +++ b/src/nyx/arrow_output_stream.h @@ -49,11 +49,6 @@ class ArrowOutputStream { #else -// Replace arrow::Table with a dummy variable -namespace arrow { - using Table = bool; -}; - /** * @brief Class to write to Apache Arrow formats * @@ -66,8 +61,6 @@ class ArrowOutputStream { bool create_arrow_file(const Nyxus::SaveOption& arrow_file_type, const std::string& arrow_file_path, const std::vector& header); - std::shared_ptr get_arrow_table(const std::string& file_path); - std::string get_arrow_path(); std::tuple> write_arrow_file (const std::vector, int, std::vector>>& features); std::tuple> close_arrow_file (); }; From 2d6c327ee68022d9a01ab5692f8ee4e86bd8a3c8 Mon Sep 17 00:00:00 2001 From: sameeul Date: Fri, 27 Oct 2023 11:54:53 -0400 Subject: [PATCH 08/12] update 8 --- src/nyx/arrow_output_stream.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/nyx/arrow_output_stream.cpp b/src/nyx/arrow_output_stream.cpp index fa80d86f..ac183cc8 100644 --- a/src/nyx/arrow_output_stream.cpp +++ b/src/nyx/arrow_output_stream.cpp @@ -45,25 +45,25 @@ std::tuple> ArrowOutputStream::write_arrow_file if (writer_){ auto status = writer_->write(features); if (status.ok()) { - return std::make_tuple(true, std::nullopt); + return {true, std::nullopt}; } else { - return std::make_tuple(false, status.ToString()); + return {false, status.ToString()}; } } - return std::make_tuple(false, "Arrow Writer is not initialized."); + return {false, "Arrow Writer is not initialized."}; } std::tuple> ArrowOutputStream::close_arrow_file (){ if (writer_){ auto status = writer_->close(); if (status.ok()) { - return std::make_tuple(true, std::nullopt); + return {true, std::nullopt}; } else { - return std::make_tuple(false, status.ToString()); + return {false, status.ToString()}; } } - return std::make_tuple(false, "Arrow Writer is not initialized."); + return {false, "Arrow Writer is not initialized."}; } #else From fecfe145bb71d526e834d3225a54ccc096ca1716 Mon Sep 17 00:00:00 2001 From: sameeul Date: Fri, 27 Oct 2023 13:59:36 -0400 Subject: [PATCH 09/12] add pytest verbise --- .github/workflows/build_and_test_mac.yml | 2 +- .github/workflows/build_and_test_ubuntu.yml | 2 +- .github/workflows/build_and_test_windows.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test_mac.yml b/.github/workflows/build_and_test_mac.yml index cbc24b69..7d16ea2d 100644 --- a/.github/workflows/build_and_test_mac.yml +++ b/.github/workflows/build_and_test_mac.yml @@ -77,4 +77,4 @@ jobs: - name: Run PyTest working-directory: ${{github.workspace}} - run: python -m pytest tests/python/ \ No newline at end of file + run: python -m pytest tests/python/ -vv \ No newline at end of file diff --git a/.github/workflows/build_and_test_ubuntu.yml b/.github/workflows/build_and_test_ubuntu.yml index 49bcd88c..8b5d5c5a 100644 --- a/.github/workflows/build_and_test_ubuntu.yml +++ b/.github/workflows/build_and_test_ubuntu.yml @@ -76,4 +76,4 @@ jobs: - name: Run PyTest working-directory: ${{github.workspace}} - run: python -m pytest tests/python/ \ No newline at end of file + run: python -m pytest tests/python/ -vv \ No newline at end of file diff --git a/.github/workflows/build_and_test_windows.yml b/.github/workflows/build_and_test_windows.yml index de769fcc..8de643a2 100644 --- a/.github/workflows/build_and_test_windows.yml +++ b/.github/workflows/build_and_test_windows.yml @@ -80,4 +80,4 @@ jobs: - name: Run PyTest working-directory: ${{github.workspace}} - run: python -m pytest tests/python/ \ No newline at end of file + run: python -m pytest tests/python/ -vv \ No newline at end of file From 710e33990dd1e59397d5852beea2afd32087ae59 Mon Sep 17 00:00:00 2001 From: sameeul Date: Fri, 27 Oct 2023 14:30:01 -0400 Subject: [PATCH 10/12] fix virtual ~ --- src/nyx/output_writers.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/nyx/output_writers.h b/src/nyx/output_writers.h index 3fb9556c..ac31d1f5 100644 --- a/src/nyx/output_writers.h +++ b/src/nyx/output_writers.h @@ -88,6 +88,8 @@ class ApacheArrowWriter virtual arrow::Status close () = 0; + virtual ~ApacheArrowWriter() = default; + }; /** From 99a83ea8cf79f2af3e49ef678721c399d1e425fc Mon Sep 17 00:00:00 2001 From: sameeul Date: Sun, 29 Oct 2023 07:50:13 -0400 Subject: [PATCH 11/12] update 9 --- CMakeLists.txt | 2 +- src/nyx/arrow_output_stream.cpp | 41 ++++++++++++++++++++---------- src/nyx/arrow_output_stream.h | 4 +-- src/nyx/globals.h | 4 +-- src/nyx/scan_fastloader_way.cpp | 45 +++++++++------------------------ 5 files changed, 45 insertions(+), 51 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a785ccd..fba19162 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -412,7 +412,7 @@ endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") - list(APPEND Nyxus_LIBRARIES stdc++fs) + list(APPEND Nyxus_LIBRARIES stdc++fs -static-libgcc -static-libstdc++) endif() if(BUILD_LIB) diff --git a/src/nyx/arrow_output_stream.cpp b/src/nyx/arrow_output_stream.cpp index ac183cc8..d846d6d1 100644 --- a/src/nyx/arrow_output_stream.cpp +++ b/src/nyx/arrow_output_stream.cpp @@ -1,29 +1,44 @@ #include "arrow_output_stream.h" #ifdef USE_ARROW -bool ArrowOutputStream::create_arrow_file(const Nyxus::SaveOption& arrow_file_type, +std::tuple> ArrowOutputStream::create_arrow_file(const Nyxus::SaveOption& arrow_file_type, const std::string& arrow_file_path, const std::vector& header) { - std::string extension = (arrow_file_type == Nyxus::SaveOption::saveParquet) ? ".parquet" : ".arrow"; + auto valid_extension = [&arrow_file_path](){ + if( arrow_file_path != "" && + !fs::is_directory(arrow_file_path)){ + if( auto ext = fs::path(arrow_file_path).extension(); + ext == ".arrow" || ext == ".feather" || ext == ".arrow"){ + return true; + } else { + return false; + } + } + return false; + }(); - if (arrow_file_path == "") { - arrow_file_path_ = "NyxusFeatures" + extension; - } else if (fs::is_directory(arrow_file_path)) { - arrow_file_path_ = arrow_file_path + "/NyxusFeatures" + extension; - } else { + if (valid_extension) { arrow_file_path_ = arrow_file_path; + } else { + std::string extension = (arrow_file_type == Nyxus::SaveOption::saveParquet) ? "parquet" : "arrow"; + if (arrow_file_path == "") { + arrow_file_path_ = "NyxusFeatures." + extension; + } else if (fs::is_directory(arrow_file_path)) { + arrow_file_path_ = arrow_file_path + "/NyxusFeatures." + extension; + } else { + arrow_file_path_ = fs::path(arrow_file_path).replace_extension(extension); + } } - + std::optional error_msg; std::tie(writer_, error_msg) = WriterFactory::create_writer(arrow_file_path_, header); if (writer_) { - return true; + return {true, std::nullopt}; } else { - std::cout << error_msg.value() << std::endl; - return false; + return {false, error_msg}; } } @@ -68,13 +83,13 @@ std::tuple> ArrowOutputStream::close_arrow_file #else -bool ArrowOutputStream::create_arrow_file(const Nyxus::SaveOption& arrow_file_type, +std::tuple> ArrowOutputStream::create_arrow_file(const Nyxus::SaveOption& arrow_file_type, const std::string& arrow_file_path, const std::vector& header) { std::cerr << "Apache Arrow functionality is not available. Please install Nyxus with Arrow enabled to use this functionality." << std::endl; - return false; + return {false, "Apache Arrow functionality is not available."}; } std::tuple> ArrowOutputStream::write_arrow_file (const std::vector, int, std::vector>>& features){ diff --git a/src/nyx/arrow_output_stream.h b/src/nyx/arrow_output_stream.h index 2231e71f..e720ef89 100644 --- a/src/nyx/arrow_output_stream.h +++ b/src/nyx/arrow_output_stream.h @@ -38,7 +38,7 @@ class ArrowOutputStream { std::shared_ptr arrow_table_ = nullptr; public: - bool create_arrow_file(const Nyxus::SaveOption& arrow_file_type, + std::tuple> create_arrow_file(const Nyxus::SaveOption& arrow_file_type, const std::string& arrow_file_path, const std::vector& header); std::shared_ptr get_arrow_table(const std::string& file_path); @@ -58,7 +58,7 @@ class ArrowOutputStream { class ArrowOutputStream { public: - bool create_arrow_file(const Nyxus::SaveOption& arrow_file_type, + std::tuple> create_arrow_file(const Nyxus::SaveOption& arrow_file_type, const std::string& arrow_file_path, const std::vector& header); std::tuple> write_arrow_file (const std::vector, int, std::vector>>& features); diff --git a/src/nyx/globals.h b/src/nyx/globals.h index 573a208e..b620d229 100644 --- a/src/nyx/globals.h +++ b/src/nyx/globals.h @@ -40,7 +40,7 @@ namespace Nyxus bool scanFilePairParallel(const std::string& intens_fpath, const std::string& label_fpath, int num_fastloader_threads, int num_sensemaker_threads, int filepair_index, int tot_num_filepairs); std::string getPureFname(const std::string& fpath); - int processDataset(const std::vector& intensFiles, const std::vector& labelFiles, int numFastloaderThreads, int numSensemakerThreads, int numReduceThreads, int min_online_roi_size, const SaveOption saveOption, const std::string& outputDir); + int processDataset(const std::vector& intensFiles, const std::vector& labelFiles, int numFastloaderThreads, int numSensemakerThreads, int numReduceThreads, int min_online_roi_size, const SaveOption saveOption, const std::string& outputPath); bool gatherRoisMetrics(const std::string& intens_fpath, const std::string& label_fpath, int num_FL_threads); bool processTrivialRois (const std::vector& trivRoiLabels, const std::string& intens_fpath, const std::string& label_fpath, int num_FL_threads, size_t memory_limit); bool processNontrivialRois (const std::vector& nontrivRoiLabels, const std::string& intens_fpath, const std::string& label_fpath, int num_FL_threads); @@ -54,7 +54,7 @@ namespace Nyxus bool gatherRoisMetricsInMemory (const py::array_t& intens_image, const py::array_t& label_image, int start_idx); bool processIntSegImagePairInMemory (const std::string& intens_fpath, const std::string& label_fpath, int filepair_index, const std::string& intens_name, const std::string& seg_name); int processMontage(const py::array_t& intensFiles, const py::array_t& labelFiles, int numReduceThreads, const std::vector& intensity_names, - const std::vector& seg_names, std::string& error_message, const SaveOption saveOption, const std::string& outputDir=""); + const std::vector& seg_names, std::string& error_message, const SaveOption saveOption, const std::string& outputPath=""); bool scanTrivialRois (const std::vector& batch_labels, const py::array_t& intens_images, const py::array_t& label_images, int start_idx); bool processTrivialRoisInMemory (const std::vector& trivRoiLabels, const py::array_t& intens_fpath, const py::array_t& label_fpath, int start_idx, size_t memory_limit); #endif diff --git a/src/nyx/scan_fastloader_way.cpp b/src/nyx/scan_fastloader_way.cpp index e5694c8b..2d68b2a0 100644 --- a/src/nyx/scan_fastloader_way.cpp +++ b/src/nyx/scan_fastloader_way.cpp @@ -205,7 +205,7 @@ namespace Nyxus int numReduceThreads, int min_online_roi_size, const SaveOption saveOption, - const std::string& outputDir) + const std::string& outputPath) { #ifdef CHECKTIMING @@ -222,20 +222,10 @@ namespace Nyxus if (write_apache) { theEnvironment.arrow_stream = ArrowOutputStream(); - if( outputDir != "" && - !fs::is_directory(outputDir) && - !( - Nyxus::ends_with_substr(outputDir, ".arrow") || - Nyxus::ends_with_substr(outputDir, ".feather") || - Nyxus::ends_with_substr(outputDir, ".parquet") - ) - ) { std::cout <<"Acceptable arrow file extensions are \".arrow\", \".feather\", \".parquet\"'"; - return 1; - } - - auto error = theEnvironment.arrow_stream.create_arrow_file(saveOption, outputDir, Nyxus::get_header(theFeatureSet.getEnabledFeatures())); - if (!error) { - std::cout << "Error creating Arrow file. \n" ; + auto [status, msg] = theEnvironment.arrow_stream.create_arrow_file(saveOption, outputPath, Nyxus::get_header(theFeatureSet.getEnabledFeatures())); + + if (!status) { + std::cout << "Error creating Arrow file: " << msg.value() << std::endl; return 1; } } @@ -285,11 +275,11 @@ namespace Nyxus auto [status, msg] = theEnvironment.arrow_stream.write_arrow_file(Nyxus::get_feature_values()); if (!status) { - std::cout << "Error closing Arrow file: " << msg.value() << std::endl; + std::cout << "Error writing Arrow file: " << msg.value() << std::endl; return 2; } } else if (saveOption == SaveOption::saveCSV) { - ok = save_features_2_csv(ifp, lfp, outputDir); + ok = save_features_2_csv(ifp, lfp, outputPath); if (ok == false) { @@ -372,27 +362,16 @@ namespace Nyxus const std::vector& seg_names, std::string& error_message, const SaveOption saveOption, - const std::string& outputDir) + const std::string& outputPath) { bool write_apache = (saveOption == SaveOption::saveArrowIPC || saveOption == SaveOption::saveParquet); if (write_apache) { theEnvironment.arrow_stream = ArrowOutputStream(); - if( outputDir != "" && - !fs::is_directory(outputDir) && - !( - Nyxus::ends_with_substr(outputDir, ".arrow") || - Nyxus::ends_with_substr(outputDir, ".feather") || - Nyxus::ends_with_substr(outputDir, ".parquet") - ) - ) { std::cout <<"Acceptable arrow file extensions are \".arrow\", \".feather\", \".parquet\"'"; - return 1; - } - - auto error = theEnvironment.arrow_stream.create_arrow_file(saveOption, outputDir, Nyxus::get_header(theFeatureSet.getEnabledFeatures())); - if (!error) { - std::cout << "Error creating Arrow file. \n" ; + auto [status, msg] = theEnvironment.arrow_stream.create_arrow_file(saveOption, outputPath, Nyxus::get_header(theFeatureSet.getEnabledFeatures())); + if (!status) { + std::cout << "Error creating Arrow file: " << msg.value() << std::endl; return 1; } } @@ -425,7 +404,7 @@ namespace Nyxus auto [status, msg] = theEnvironment.arrow_stream.write_arrow_file(Nyxus::get_feature_values()); if (!status) { - std::cout << "Error closing Arrow file: " << msg.value() << std::endl; + std::cout << "Error writing Arrow file: " << msg.value() << std::endl; return 2; } } else { From 976d0048d548a91edef23c22e5c51b257586b0da Mon Sep 17 00:00:00 2001 From: Sameeul B Samee Date: Sun, 29 Oct 2023 11:00:48 -0400 Subject: [PATCH 12/12] fix msvc --- src/nyx/arrow_output_stream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nyx/arrow_output_stream.cpp b/src/nyx/arrow_output_stream.cpp index d846d6d1..ce13ffd6 100644 --- a/src/nyx/arrow_output_stream.cpp +++ b/src/nyx/arrow_output_stream.cpp @@ -29,7 +29,7 @@ std::tuple> ArrowOutputStream::create_arrow_fil } else if (fs::is_directory(arrow_file_path)) { arrow_file_path_ = arrow_file_path + "/NyxusFeatures." + extension; } else { - arrow_file_path_ = fs::path(arrow_file_path).replace_extension(extension); + arrow_file_path_ = fs::path(arrow_file_path).replace_extension(extension).string(); } }