Skip to content

Commit

Permalink
Expose molzip functionality to MinimalLib (rdkit#7959)
Browse files Browse the repository at this point in the history
* Expose molzip functionality to MinimalLib

* changes from code review

---------

Co-authored-by: ptosco <[email protected]>
Co-authored-by: Greg Landrum <[email protected]>
  • Loading branch information
3 people authored Nov 12, 2024
1 parent 9db22a9 commit 9c63cf6
Show file tree
Hide file tree
Showing 12 changed files with 304 additions and 22 deletions.
1 change: 1 addition & 0 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ AllowShortCaseLabelsOnASingleLine: false
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AllowShortEnumsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: false
AlwaysBreakTemplateDeclarations: true
AlwaysBreakBeforeMultilineStrings: true
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ option(RDK_BUILD_FUZZ_TARGETS "build the fuzz targets" OFF)
option(RDK_BUILD_MINIMAL_LIB_RXN "build support for reactions into MinimalLib" ON )
option(RDK_BUILD_MINIMAL_LIB_SUBSTRUCTLIBRARY "build support for SubstructLibrary into MinimalLib" ON )
option(RDK_BUILD_MINIMAL_LIB_MCS "build support for MCS into MinimalLib" OFF )
option(RDK_BUILD_MINIMAL_LIB_MOLZIP "build support for molzip into MinimalLib" OFF )

set(RDK_BOOST_VERSION "1.70.0")

Expand Down
5 changes: 3 additions & 2 deletions Code/GraphMol/ChemTransforms/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@

rdkit_library(ChemTransforms ChemTransforms.cpp MolFragmenter.cpp LINK_LIBRARIES
rdkit_library(ChemTransforms ChemTransforms.cpp
MolFragmenter.cpp MolFragmenterJSONParser.cpp LINK_LIBRARIES
GraphMol SubstructMatch SmilesParse Depictor)
target_compile_definitions(ChemTransforms PRIVATE -DRDKIT_CHEMTRANSFORMS_BUILD)

rdkit_headers(ChemTransforms.h
MolFragmenter.h
MolFragmenter.h MolFragmenterJSONParser.h
DEST GraphMol/ChemTransforms)

# there's no Wrap subdirectory on the main trunk (but in "minimal" there is)..
Expand Down
18 changes: 11 additions & 7 deletions Code/GraphMol/ChemTransforms/MolFragmenter.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include <istream>
#include <GraphMol/ROMol.h>
#include <RDGeneral/BetterEnums.h>

namespace RDKit {
namespace MolFragmenter {
Expand Down Expand Up @@ -103,13 +104,15 @@ RDKIT_CHEMTRANSFORMS_EXPORT void constructBRICSBondTypes(

// n.b. AtomProperty must resolve to an unsigned integer value on an atom
// property
enum class MolzipLabel {
AtomMapNumber,
Isotope,
FragmentOnBonds,
AtomType,
AtomProperty
};
// clang-format off
BETTER_ENUM_CLASS(MolzipLabel, unsigned int,
AtomMapNumber,
Isotope,
FragmentOnBonds,
AtomType,
AtomProperty
);
// clang-format on

struct RDKIT_CHEMTRANSFORMS_EXPORT MolzipParams {
MolzipLabel label = MolzipLabel::AtomMapNumber;
Expand Down Expand Up @@ -157,5 +160,6 @@ RDKIT_CHEMTRANSFORMS_EXPORT std::unique_ptr<ROMol> molzip(
RDKIT_CHEMTRANSFORMS_EXPORT std::unique_ptr<ROMol> molzip(
const std::map<std::string, ROMOL_SPTR> &row,
const MolzipParams &params = MolzipParams());

} // namespace RDKit
#endif
52 changes: 52 additions & 0 deletions Code/GraphMol/ChemTransforms/MolFragmenterJSONParser.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
//
// Copyright (C) 2024 Novartis Biomedical Research and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//

#define USE_BETTER_ENUMS
#include "MolFragmenterJSONParser.h"
#include <RDGeneral/BoostStartInclude.h>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/json_parser.hpp>
#include <RDGeneral/BoostEndInclude.h>

namespace RDKit {

void parseMolzipParametersJSON(MolzipParams &params, const char *details_json) {
if (!details_json || !strlen(details_json)) {
return;
}
boost::property_tree::ptree pt;
std::istringstream ss;
ss.str(details_json);
boost::property_tree::read_json(ss, pt);
std::string label;
label = pt.get<std::string>("Label", label);
if (MolzipLabel::_is_valid(label.c_str())) {
params.label = MolzipLabel::_from_string(label.c_str());
}
auto atomSymbolsIt = pt.find("AtomSymbols");
if (atomSymbolsIt != pt.not_found()) {
const auto &jsonVect = atomSymbolsIt->second;
params.atomSymbols.resize(jsonVect.size());
std::transform(
jsonVect.begin(), jsonVect.end(), params.atomSymbols.begin(),
[](const auto &atomSymbolNode) {
return atomSymbolNode.second.template get_value<std::string>();
});
}

params.atomProperty =
pt.get<std::string>("AtomProperty", params.atomProperty);
params.enforceValenceRules =
pt.get<bool>("EnforceValenceRules", params.enforceValenceRules);
params.generateCoordinates =
pt.get<bool>("GenerateCoordinates", params.generateCoordinates);
}

} // end namespace RDKit
27 changes: 27 additions & 0 deletions Code/GraphMol/ChemTransforms/MolFragmenterJSONParser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//
// Copyright (C) 2024 Novartis Biomedical Research and other RDKit contributors
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//

#pragma once

#include "MolFragmenter.h"

namespace RDKit {

//! \brief Parse MolzipParams from JSON.
/*! The passed MolzipParams instance is updated from
* the JSON-parsed content.
*
* @param params - molzip parameters
* @param details_json - JSON string
*/
RDKIT_CHEMTRANSFORMS_EXPORT void parseMolzipParametersJSON(
MolzipParams &params, const char *details_json);

} // end namespace RDKit
3 changes: 3 additions & 0 deletions Code/JavaWrappers/ChemTransforms.i
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
%{
#include <GraphMol/ChemTransforms/ChemTransforms.h>
#include <GraphMol/ChemTransforms/MolFragmenter.h>
#include <GraphMol/ChemTransforms/MolFragmenterJSONParser.h>
#include <GraphMol/Bond.h>
// Fixes annoying compilation namespace issue
typedef RDKit::MatchVectType MatchVectType;
Expand Down Expand Up @@ -120,4 +121,6 @@ RDKit::ROMol * new_molzip(

%newobject fragmentOnBRICSBonds;
%template(UIntMolMap) std::map<unsigned int,boost::shared_ptr<RDKit::ROMol> >;
%include <RDGeneral/BetterEnums.h>
%include <GraphMol/ChemTransforms/MolFragmenter.h>
%include <GraphMol/ChemTransforms/MolFragmenterJSONParser.h>
6 changes: 5 additions & 1 deletion Code/MinimalLib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ if(RDK_BUILD_MINIMAL_LIB)
if(RDK_BUILD_MINIMAL_LIB_MMPA)
add_definitions(-DRDK_BUILD_MINIMAL_LIB_MMPA)
set(MINIMAL_LIB_LIBRARIES "${MINIMAL_LIB_LIBRARIES};MMPA_static")
endif()
endif()
if(RDK_BUILD_MINIMAL_LIB_MOLZIP)
add_definitions(-DRDK_BUILD_MINIMAL_LIB_MOLZIP)
set(MINIMAL_LIB_LIBRARIES "${MINIMAL_LIB_LIBRARIES};ChemTransforms_static")
endif()
if(RDK_BUILD_FREETYPE_SUPPORT)
if( ${CMAKE_SYSTEM_NAME} MATCHES "Emscripten")
set(USE_FLAGS "-s USE_FREETYPE=1")
Expand Down
96 changes: 91 additions & 5 deletions Code/MinimalLib/jswrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,29 @@
#include <GraphMol/MolDraw2D/MolDraw2D.h>
#include <GraphMol/MolDraw2D/MolDraw2DUtils.h>
#include <GraphMol/MolDraw2D/MolDraw2DJS.h>

#if defined(RDK_BUILD_MINIMAL_LIB_MOLZIP) && \
defined(RDK_BUILD_MINIMAL_LIB_RGROUPDECOMP)
#include <GraphMol/ChemTransforms/MolFragmenter.h>
#include <GraphMol/ChemTransforms/MolFragmenterJSONParser.h>
#endif
using namespace RDKit;

namespace {
const emscripten::val JSMolObj() {
static const auto JSMOL = emscripten::val::module_property("Mol");
return JSMOL;
}

const emscripten::val JSMolListObj() {
static const auto JSMOLLIST = emscripten::val::module_property("MolList");
return JSMOLLIST;
}

const emscripten::val ObjectObj() {
static const auto OBJECT = emscripten::val::global("Object");
return OBJECT;
}

class JSDrawerFromDetails : public MinimalLib::DrawerFromDetails {
public:
JSDrawerFromDetails(const emscripten::val &ctx, int w = -1, int h = -1,
Expand Down Expand Up @@ -134,6 +153,66 @@ std::string get_mcs_as_smarts_no_details(const JSMolList &mols) {
}
#endif

#ifdef RDK_BUILD_MINIMAL_LIB_MOLZIP
JSMolBase *molzip_no_details_helper(const JSMolBase &a, const JSMolBase &b) {
return molzip(a, b, std::string());
}

#ifdef RDK_BUILD_MINIMAL_LIB_RGROUPDECOMP
JSMolBase *molzip_rgd_row_helper(const emscripten::val &rgdRow,
const std::string &details_json) {
auto rlabelsAsVal = ObjectObj().call<emscripten::val>("keys", rgdRow);
auto rlabels = emscripten::vecFromJSArray<std::string>(rlabelsAsVal);
if (rlabels.empty()) {
return nullptr;
}
bool dynamicCastOk = true;
std::map<std::string, ROMOL_SPTR> molzipRow;
for (const auto &rlabel : rlabels) {
auto jsMolAsVal = rgdRow[rlabel];
if (!jsMolAsVal.instanceof (JSMolObj())) {
return nullptr;
}
auto jsMolShared = dynamic_cast<JSMolShared *>(
jsMolAsVal.as<JSMolBase *>(emscripten::allow_raw_pointers()));
if (!jsMolShared) {
dynamicCastOk = false;
return nullptr;
}
molzipRow.emplace(rlabel, jsMolShared->get_sptr());
}
MolzipParams params;
parseMolzipParametersJSON(params, details_json.c_str());
auto res = RDKit::molzip(molzipRow, params);
return new JSMol(new RWMol(*res));
}

JSMolBase *molzip_no_details_rgd_row_helper(const emscripten::val &rgdRow) {
return molzip_rgd_row_helper(rgdRow, std::string());
}

JSMolBase *molzip_2params_helper(const emscripten::val &param1,
const emscripten::val &param2) {
JSMolBase *res = nullptr;
static const auto JSMOL = emscripten::val::module_property("Mol");
if (param1.instanceof (JSMolObj()) && param2.instanceof (JSMolObj())) {
const auto aJsMolPtr =
param1.as<JSMolBase *>(emscripten::allow_raw_pointers());
const auto bJsMolPtr =
param2.as<JSMolBase *>(emscripten::allow_raw_pointers());
if (aJsMolPtr && bJsMolPtr) {
res = molzip_no_details_helper(*aJsMolPtr, *bJsMolPtr);
}
} else if (!param1.instanceof
(JSMolObj()) && param2.typeOf().as<std::string>() == "string") {
auto details_json = param2.as<std::string>();
res = molzip_rgd_row_helper(param1, details_json);
}
return res;
}
#endif
#endif

emscripten::val binary_string_to_uint8array(const std::string &pkl) {
emscripten::val view(emscripten::typed_memory_view(
pkl.size(), reinterpret_cast<const unsigned char *>(pkl.c_str())));
Expand Down Expand Up @@ -340,16 +419,14 @@ emscripten::val get_mmpa_frags_helper(const JSMolBase &self,
JSRGroupDecomposition *get_rgd_helper(
const emscripten::val &singleOrMultipleCores,
const std::string &details_json) {
static const auto JSMOL = emscripten::val::module_property("Mol");
static const auto JSMOLLIST = emscripten::val::module_property("MolList");
JSRGroupDecomposition *res = nullptr;
if (singleOrMultipleCores.instanceof (JSMOL)) {
if (singleOrMultipleCores.instanceof (JSMolObj())) {
const auto jsMolPtr =
singleOrMultipleCores.as<JSMolBase *>(emscripten::allow_raw_pointers());
if (jsMolPtr) {
res = new JSRGroupDecomposition(*jsMolPtr, details_json);
}
} else if (singleOrMultipleCores.instanceof (JSMOLLIST)) {
} else if (singleOrMultipleCores.instanceof (JSMolListObj())) {
const auto jsMolListPtr =
singleOrMultipleCores.as<JSMolList *>(emscripten::allow_raw_pointers());
if (jsMolListPtr) {
Expand Down Expand Up @@ -747,4 +824,13 @@ EMSCRIPTEN_BINDINGS(RDKit_minimal) {
function("get_rgd", &get_rgd_helper, allow_raw_pointers());
function("get_rgd", &get_rgd_no_details_helper, allow_raw_pointers());
#endif
#if defined(RDK_BUILD_MINIMAL_LIB_MOLZIP) && defined(__EMSCRIPTEN__)
function("molzip", &::molzip, allow_raw_pointers());
#ifdef RDK_BUILD_MINIMAL_LIB_RGROUPDECOMP
function("molzip", &molzip_2params_helper, allow_raw_pointers());
function("molzip", &molzip_no_details_rgd_row_helper, allow_raw_pointers());
#else
function("molzip", &molzip_no_details_helper, allow_raw_pointers());
#endif
#endif
}
13 changes: 13 additions & 0 deletions Code/MinimalLib/minilib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
#ifdef RDK_BUILD_MINIMAL_LIB_MCS
#include <GraphMol/FMCS/FMCS.h>
#endif
#ifdef RDK_BUILD_MINIMAL_LIB_MOLZIP
#include <GraphMol/ChemTransforms/MolFragmenterJSONParser.h>
#endif

#include <GraphMol/Descriptors/Property.h>
#include <GraphMol/Descriptors/MolDescriptors.h>
#include <GraphMol/MolInterchange/MolInterchange.h>
Expand Down Expand Up @@ -992,3 +996,12 @@ JSRGroupDecomposition::getRGroupsAsRows() const {
return res;
}
#endif
#ifdef RDK_BUILD_MINIMAL_LIB_MOLZIP
JSMolBase *molzip(const JSMolBase &a, const JSMolBase &b,
const std::string &details_json) {
MolzipParams params;
parseMolzipParametersJSON(params, details_json.c_str());
auto out = molzip(a.get(), b.get(), params);
return new JSMol(new RDKit::RWMol(*out));
}
#endif
15 changes: 10 additions & 5 deletions Code/MinimalLib/minilib.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class JSMolBase {
public:
JSMolBase(const JSMolBase &) = delete;
JSMolBase &operator=(const JSMolBase &) = delete;
virtual ~JSMolBase() {};
virtual ~JSMolBase(){};
virtual const RDKit::RWMol &get() const = 0;
virtual RDKit::RWMol &get() = 0;
std::string get_smiles() const;
Expand Down Expand Up @@ -225,8 +225,8 @@ class JSMolShared : public JSMolBase {
class JSMolList {
public:
JSMolList(const std::vector<RDKit::ROMOL_SPTR> &mols)
: d_mols(mols), d_idx(0) {};
JSMolList() : d_idx(0) {};
: d_mols(mols), d_idx(0){};
JSMolList() : d_idx(0){};
JSMolBase *next();
size_t append(const JSMolBase &mol);
size_t insert(size_t idx, const JSMolBase &mol);
Expand Down Expand Up @@ -363,11 +363,11 @@ class JSRGroupDecomposition {
public:
JSRGroupDecomposition(const JSMolBase &core, const std::string &details_json);
JSRGroupDecomposition(const JSMolBase &core)
: JSRGroupDecomposition(core, "") {};
: JSRGroupDecomposition(core, ""){};
JSRGroupDecomposition(const JSMolList &cores,
const std::string &details_json);
JSRGroupDecomposition(const JSMolList &cores)
: JSRGroupDecomposition(cores, "") {};
: JSRGroupDecomposition(cores, ""){};
int add(const JSMolBase &mol);
bool process();
std::map<std::string, std::unique_ptr<JSMolList>> getRGroupsAsColumns() const;
Expand All @@ -379,3 +379,8 @@ class JSRGroupDecomposition {
std::vector<unsigned int> d_unmatched;
};
#endif

#ifdef RDK_BUILD_MINIMAL_LIB_MOLZIP
JSMolBase *molzip(const JSMolBase &a, const JSMolBase &b,
const std::string &details_json);
#endif
Loading

0 comments on commit 9c63cf6

Please sign in to comment.