Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jv branch #204

Merged
merged 5 commits into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion aqme/argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
"ani_method": "ANI2x",
"stacksize": "1G",
"xtb_keywords": None,
"max_workers": 4,
"ewin_sample_fullmonte": 2.0,
"ewin_fullmonte": 5.0,
"nsteps_fullmonte": 100,
Expand Down
92 changes: 39 additions & 53 deletions aqme/csearch/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@
Directory to create the output file(s)
varfile : str, default=None
Option to parse the variables using a yaml file (specify the filename)
max_workers : int, default=4
Number of simultaneous RDKit jobs run with multiprocessing
(WARNING! More than 12 simultaneous jobs might collapse your computer!)
charge : int, default=None
Charge of the calculations used in the following input files.
If charge isn't defined, it automatically reads the charge of the
Expand Down Expand Up @@ -144,7 +141,7 @@
CREST only
++++++++++

nprocs : int, default=2
nprocs : int, default=8
Number of processors used in CREST optimizations
constraints_atoms : list, default=[]
Specify constrained atoms as [AT1,AT2,AT3]. An example of multiple constraints with
Expand Down Expand Up @@ -191,7 +188,6 @@
from pathlib import Path
import pandas as pd
import concurrent.futures as futures
import multiprocessing as mp
from progress.bar import IncrementalBar
import numpy as np

Expand Down Expand Up @@ -306,7 +302,7 @@ def __init__(self, **kwargs):
self.args.log.write(f"\nStarting CSEARCH with {len(job_inputs)} job(s) (SDF, XYZ, CSV, etc. files might contain multiple jobs/structures inside)\n")

# runs the conformer sampling with multiprocessors
self.run_csearch(job_inputs)
_ = self.run_csearch(job_inputs)

# store all the information into a CSV file
csearch_file_no_path = (
Expand Down Expand Up @@ -391,64 +387,50 @@ def run_csearch(self, job_inputs):
bar = IncrementalBar(
"o Number of finished jobs from CSEARCH", max=len(job_inputs)
)
with futures.ProcessPoolExecutor(
max_workers=self.args.max_workers,

# rdkit benefits from using multithreading, since the RMSD filter in RDKit's GetBestRMS
# doesn't parallelize well (by default, it uses 1 thread and it fails when using more,
# we're not sure that it tries to use all the CPUs or only 1)
if self.args.program.lower() == "rdkit":
# we do not recommend more than 4 parallel RDKit jobs, as each job runs RDKit functions
# with all available CPUs/threadss (i.e. numThreads=0 in rdDistGeom.EmbedMultipleConfs)
csearch_procs = min(4,self.args.nprocs)
else: # each CREST job already parallelizes CPUs, so only 1 simultaneous job is run at a time
csearch_procs = 1

# asynchronous multithreading to accelerate CSEARCH (only benefits RDKit)
with futures.ThreadPoolExecutor(
max_workers=csearch_procs,
) as executor:
# Submit a set of asynchronous jobs
jobs = []
# Submit the Jobs
for job_input in job_inputs:
(
smi_,
name_,
charge_,
mult_,
constraints_atoms_,
constraints_dist_,
constraints_angle_,
constraints_dihedral_,
complex_type_,
geom_
) = job_input
job = executor.submit(
self.compute_confs(
smi_,
name_,
charge_,
mult_,
constraints_atoms_,
constraints_dist_,
constraints_angle_,
constraints_dihedral_,
complex_type_,
geom_
)
_ = executor.submit(
self.compute_confs, job_input,bar
)
jobs.append(job)

bar.next()

bar.finish()
bar.finish()

def compute_confs(
self,
smi,
name,
charge,
mult,
constraints_atoms,
constraints_dist,
constraints_angle,
constraints_dihedral,
complex_type,
geom
):
def compute_confs(self,job_input,bar):
"""
Function to start conformer generation
"""

# load variables from job_input
(
smi,
name,
charge,
mult,
constraints_atoms,
constraints_dist,
constraints_angle,
constraints_dihedral,
complex_type,
geom
) = job_input

self.args.log.write(f"\n ----- {os.path.basename(Path(name))} -----")

# load mol and other parameters when using SMILES as input
if self.args.smi is not None or os.path.basename(Path(self.args.input)).split(".")[1] in ["smi","csv","cdx","txt","yaml","yml","rtf"]:
(
mol,
Expand All @@ -473,6 +455,7 @@ def compute_confs(
if os.path.basename(Path(self.args.input)).split(".")[1] not in ["csv","cdx","txt","yaml","yml","rtf"]:
self.args.log.finalize()
sys.exit()
bar.next()
return

else:
Expand All @@ -483,6 +466,7 @@ def compute_confs(
if os.path.basename(Path(self.args.input)).split(".")[1] not in ["csv","cdx","txt","yaml","yml","rtf"]:
self.args.log.finalize()
sys.exit()
bar.next()
return

# check if the optimization is constrained
Expand Down Expand Up @@ -547,6 +531,7 @@ def compute_confs(
if os.path.basename(Path(self.args.input)).split(".")[1] not in ["csv","cdx","txt","yaml","yml","rtf"]:
self.args.log.finalize()
sys.exit()
bar.next()
return

if complex_type in accepted_complex_types:
Expand Down Expand Up @@ -616,6 +601,7 @@ def compute_confs(
# Updates the dataframe with infromation about conformer generation
frames = [self.final_dup_data, total_data]
self.final_dup_data = pd.concat(frames, ignore_index=True, sort=True)
bar.next()

# automatic detection of metal atoms
def find_metal_atom(self,mol,charge,mult):
Expand Down
Loading