From 75072df4f428df83dad65aa13cc30fd65de59adf Mon Sep 17 00:00:00 2001 From: Fran Bacic Toplek Date: Thu, 6 Feb 2025 15:26:59 +0100 Subject: [PATCH 1/2] Fixed chunk size error --- tools/cmdata/CMakeLists.txt | 8 +++++-- tools/cmdata/main.cpp | 9 ++++++- tools/cmdata/src/cmdata/cmdata.hpp | 36 +++++++++++++++++++++------- tools/cmdata/src/cmdata/io.hpp | 38 +++++++++++++++++++++++++++--- 4 files changed, 77 insertions(+), 14 deletions(-) diff --git a/tools/cmdata/CMakeLists.txt b/tools/cmdata/CMakeLists.txt index ebc9513a..96c72f9c 100644 --- a/tools/cmdata/CMakeLists.txt +++ b/tools/cmdata/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.16) project(cmdata VERSION 0.1 DESCRIPTION "A programm to calculate contact data from gromacs trajectories for multi-eGO" - LANGUAGES CXX) + LANGUAGES CXX C) set(CMAKE_CXX_STANDARD 17) set(CMDATA cmdata) @@ -25,7 +25,11 @@ endif() # find gromacs find_package(GROMACS REQUIRED NAMES gromacs gromacs_mpi gromacs_d gromacs_mpi_d HINTS "$ENV{GROMACS_DIR}") -find_package(HDF5 REQUIRED COMPONENTS C CXX) +find_package(HDF5 COMPONENTS C CXX) +if(HDF5_FOUND) + message(STATUS "HDF5 found, enabling HDF5 support") + add_definitions(-DUSE_HDF5) +endif() # include source and header files target_include_directories(${CMDATA} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src ${HDF5_INCLUDE_DIRS}) diff --git a/tools/cmdata/main.cpp b/tools/cmdata/main.cpp index 6cf87873..c58d8ca5 100644 --- a/tools/cmdata/main.cpp +++ b/tools/cmdata/main.cpp @@ -46,7 +46,9 @@ int main(int argc, const char** argv) {"mode", '\0', POPT_ARG_STRING | POPT_ARGFLAG_OPTIONAL, &p_mode, 0, "Mode of operation", "STRING"}, {"weights", '\0', POPT_ARG_STRING | POPT_ARGFLAG_OPTIONAL, &p_weights_path, 0, "Weights file", "FILE"}, {"no_pbc", '\0', POPT_ARG_NONE | POPT_ARGFLAG_OPTIONAL, &p_nopbc, 0, "Ignore pbcs", 0}, + #ifdef USE_HDF5 {"h5", '\0', POPT_ARG_NONE | POPT_ARGFLAG_OPTIONAL, &p_h5 , 0, "Write output in HDF5 format", 0}, + #endif POPT_TABLEEND }; @@ -73,7 +75,9 @@ int main(int argc, const char** argv) if ( p_weights_path != NULL ) weights_path = std::string(p_weights_path); if ( p_out_prefix != NULL ) out_prefix = std::string(p_out_prefix); if ( p_nopbc != NULL ) nopbc = true; + #ifdef USE_HDF5 if ( p_h5 != NULL ) h5 = true; + #endif // check if paths are valid if ( !std::filesystem::exists(std::filesystem::path(traj_path)) ) @@ -155,7 +159,10 @@ int main(int argc, const char** argv) cmdata::CMData cmdata( top_path, traj_path, cutoff, mol_cutoff, nskip, num_threads, mol_threads, dt, - mode, weights_path, nopbc, t_begin, t_end, h5 + mode, weights_path, nopbc, t_begin, t_end + #ifdef USE_HDF5 + , h5 + #endif ); cmdata.run(); cmdata.process_data(); diff --git a/tools/cmdata/src/cmdata/cmdata.hpp b/tools/cmdata/src/cmdata/cmdata.hpp index f3fa3b36..890c3ee9 100644 --- a/tools/cmdata/src/cmdata/cmdata.hpp +++ b/tools/cmdata/src/cmdata/cmdata.hpp @@ -104,7 +104,9 @@ class CMData // mode selection, booleans and functions std::string mode_; bool intra_ = false, same_ = false, cross_ = false; + #ifdef USE_HDF5 bool h5_ = false; + #endif // function types using ftype_intra_ = cmdata::ftypes::function_traits; @@ -235,12 +237,18 @@ class CMData public: CMData( const std::string &top_path, const std::string &traj_path, - float cutoff, float mol_cutoff, int nskip, int num_threads, int num_mol_threads, + double cutoff, double mol_cutoff, int nskip, int num_threads, int num_mol_threads, int dt, const std::string &mode, const std::string &weights_path, - bool no_pbc, float t_begin, float t_end, bool h5 + bool no_pbc, float t_begin, float t_end + #ifdef USE_HDF5 + , bool h5 + #endif ) : cutoff_(cutoff), mol_cutoff_(mol_cutoff), nskip_(nskip), num_threads_(num_threads), num_mol_threads_(num_mol_threads), mode_(mode), weights_path_(weights_path), - no_pbc_(no_pbc), dt_(dt), t_begin_(t_begin), t_end_(t_end), h5_(h5) + no_pbc_(no_pbc), dt_(dt), t_begin_(t_begin), t_end_(t_end) + #ifdef USE_HDF5 + ,h5_(h5) + #endif { bool bTop_; matrix boxtop_; @@ -756,27 +764,39 @@ class CMData void write_output( const std::string &output_prefix ) { std::cout << "Writing data... " << std::endl; - using ftype_write_intra = cmdata::ftypes::function_traits; - using ftype_write_inter_same = cmdata::ftypes::function_traits; - using ftype_write_inter_cross = cmdata::ftypes::function_traits; + using ftype_write_intra = cmdata::ftypes::function_traits; + using ftype_write_inter_same = cmdata::ftypes::function_traits; + using ftype_write_inter_cross = cmdata::ftypes::function_traits; std::function write_intra = cmdata::ftypes::do_nothing(); std::function write_inter_same = cmdata::ftypes::do_nothing(); std::function write_inter_cross = cmdata::ftypes::do_nothing(); if (intra_) { + #ifdef USE_HDF5 if(h5_) write_intra = cmdata::io::f_write_intra_HDF5; - else write_intra = cmdata::io::f_write_intra; + else write_intra = cmdata::io::f_write_intra; + #else + write_intra = cmdata::io::f_write_intra; + #endif } if (same_) { + #ifdef USE_HDF5 if(h5_) write_inter_same = cmdata::io::f_write_inter_same_HDF5; else write_inter_same = cmdata::io::f_write_inter_same; + #else + write_inter_same = cmdata::io::f_write_inter_same; + #endif } if (cross_) { + #ifdef USE_HDF5 if(h5_) write_inter_cross = cmdata::io::f_write_inter_cross_HDF5; - else write_inter_cross = cmdata::io::f_write_inter_cross; + else write_inter_cross = cmdata::io::f_write_inter_cross; + #else + write_inter_cross = cmdata::io::f_write_inter_cross; + #endif } for (std::size_t i = 0; i < natmol2_.size(); i++) diff --git a/tools/cmdata/src/cmdata/io.hpp b/tools/cmdata/src/cmdata/io.hpp index 6880a437..14161a21 100644 --- a/tools/cmdata/src/cmdata/io.hpp +++ b/tools/cmdata/src/cmdata/io.hpp @@ -4,7 +4,9 @@ #include #include +#ifdef USE_HDF5 #include +#endif #include #include @@ -18,7 +20,9 @@ #define COUT_FLOAT_PREC6 std::fixed << std::setprecision(6) +#ifdef USE_HDF5 using namespace H5; +#endif static inline void mtopGetMolblockIndex(const gmx_mtop_t& mtop, int globalAtomIndex, @@ -176,6 +180,7 @@ std::vector read_weights_file( const std::string &path ) // return mols; // } +#ifdef USE_HDF5 void f_write_intra_HDF5(const std::string &output_prefix, std::size_t i, int ii, const std::vector &density_bins, const std::vector &natmol2, const std::vector>>> &intram_mat_density @@ -192,7 +197,11 @@ void f_write_intra_HDF5(const std::string &output_prefix, // Create dataset creation property list with compression DSetCreatPropList plist; plist.setDeflate(6); // Set compression level (0-9, where 9 is maximum compression) - hsize_t chunk_dims[2] = {300, 512}; // Adjust chunk size based on your data + // hsize_t chunk_dims[2] = {300, 512}; // Adjust chunk size based on your data + hsize_t chunk_dims[2] = { + std::min(static_cast(density_bins.size()), static_cast(300)), + std::min(static_cast(natmol2[i] + 1), static_cast(512)) + }; plist.setChunk(2, chunk_dims); // Create a dataset with compression @@ -211,6 +220,7 @@ void f_write_intra_HDF5(const std::string &output_prefix, // Write the data to the dataset dataset.write(flat_data.data(), PredType::NATIVE_FLOAT); } +#endif void f_write_intra(const std::string &output_prefix, std::size_t i, int ii, const std::vector &density_bins, const std::vector &natmol2, @@ -232,6 +242,7 @@ void f_write_intra(const std::string &output_prefix, fp_intra.close(); } +#ifdef USE_HDF5 void f_write_inter_same_HDF5(const std::string &output_prefix, std::size_t i, int ii, const std::vector &density_bins, const std::vector &natmol2, const std::vector>>> &interm_same_mat_density, @@ -251,7 +262,21 @@ void f_write_inter_same_HDF5(const std::string &output_prefix, // Create dataset creation property list with compression DSetCreatPropList plist; plist.setDeflate(6); // Set compression level (0-9, where 9 is maximum compression) - hsize_t chunk_dims[2] = {300, 512}; // Adjust chunk size based on your data + // hsize_t chunk_dims[2] = { + // (static_cast(density_bins.size()) < 300) ? 300 : static_cast(density_bins.size()), + // (static_cast(natmol2[i]+1) < 512) ? 512 : static_cast(natmol2[i]+1) + // }; // Adjust chunk size based on your data + hsize_t chunk_dims[2] = { + std::min(static_cast(density_bins.size()), static_cast(300)), + std::min(static_cast(natmol2[i] + 1), static_cast(512)) + }; +std::cout << "Dataset dimensions: (" + << density_bins.size() << ", " + << (natmol2[i] + 1) << ")" << std::endl; +std::cout << "Chunk dimensions: (" + << chunk_dims[0] << ", " + << chunk_dims[1] << ")" << std::endl; + plist.setChunk(2, chunk_dims); // Create a dataset with compression @@ -275,6 +300,7 @@ void f_write_inter_same_HDF5(const std::string &output_prefix, dataset.write(flat_data.data(), PredType::NATIVE_FLOAT); dataset_c.write(flat_data_c.data(), PredType::NATIVE_FLOAT); } +#endif void f_write_inter_same(const std::string &output_prefix, std::size_t i, int ii, const std::vector &density_bins, const std::vector &natmol2, @@ -302,6 +328,7 @@ void f_write_inter_same(const std::string &output_prefix, fp_inter_cum.close(); } +#ifdef USE_HDF5 void f_write_inter_cross_HDF5(const std::string &output_prefix, std::size_t i, std::size_t j, int ii, const std::vector &density_bins, const std::vector &natmol2, const std::vector> &cross_index, @@ -322,7 +349,11 @@ void f_write_inter_cross_HDF5(const std::string &output_prefix, // Create dataset creation property list with compression DSetCreatPropList plist; plist.setDeflate(6); // Set compression level (0-9, where 9 is maximum compression) - hsize_t chunk_dims[2] = {300, 512}; // Adjust chunk size based on your data + // hsize_t chunk_dims[2] = {300, 512}; // Adjust chunk size based on your data + hsize_t chunk_dims[2] = { + std::min(static_cast(density_bins.size()), static_cast(300)), + std::min(static_cast(natmol2[i] + 1), static_cast(512)) + }; plist.setChunk(2, chunk_dims); // Create a dataset with compression @@ -346,6 +377,7 @@ void f_write_inter_cross_HDF5(const std::string &output_prefix, dataset.write(flat_data.data(), PredType::NATIVE_FLOAT); dataset_c.write(flat_data_c.data(), PredType::NATIVE_FLOAT); } +#endif void f_write_inter_cross(const std::string &output_prefix, std::size_t i, std::size_t j, int ii, const std::vector &density_bins, const std::vector &natmol2, From 8b9b115ca0c0efd30e944b2df9a3ae6c4bb1f7b9 Mon Sep 17 00:00:00 2001 From: Fran Bacic Toplek Date: Thu, 6 Feb 2025 15:45:27 +0100 Subject: [PATCH 2/2] Fixed include bugs --- tools/cmdata/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/cmdata/CMakeLists.txt b/tools/cmdata/CMakeLists.txt index 96c72f9c..e89b2953 100644 --- a/tools/cmdata/CMakeLists.txt +++ b/tools/cmdata/CMakeLists.txt @@ -29,10 +29,12 @@ find_package(HDF5 COMPONENTS C CXX) if(HDF5_FOUND) message(STATUS "HDF5 found, enabling HDF5 support") add_definitions(-DUSE_HDF5) + target_include_directories(${CMDATA} PRIVATE ${HDF5_INCLUDE_DIRS}) + target_link_libraries(${CMDATA} PRIVATE ${HDF5_CXX_LIBRARIES}) endif() # include source and header files -target_include_directories(${CMDATA} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src ${HDF5_INCLUDE_DIRS}) +target_include_directories(${CMDATA} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) include(FetchContent) SET(FETCHCONTENT_QUIET OFF) @@ -55,7 +57,7 @@ FetchContent_Declare( FetchContent_MakeAvailable(xdrfile) # link libraries -target_link_libraries(${CMDATA} PRIVATE Gromacs::libgromacs xdrfile popt ${HDF5_CXX_LIBRARIES}) +target_link_libraries(${CMDATA} PRIVATE Gromacs::libgromacs xdrfile popt) set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib) install(TARGETS ${CMDATA} DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)