Skip to content

Commit

Permalink
Add GPU support
Browse files Browse the repository at this point in the history
  • Loading branch information
jan-janssen committed Jun 8, 2023
1 parent 615609f commit 4e11642
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
15 changes: 14 additions & 1 deletion pyiron_base/jobs/job/extension/server/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,11 @@ class Server: # add the option to return the job id and the hold id to the serv
"""

def __init__(
self, host=None, queue=None, cores=1, threads=1, run_mode="modal", new_hdf=True
self, host=None, queue=None, cores=1, threads=1, gpus=None, run_mode="modal", new_hdf=True
):
self._cores = cores
self._threads = threads
self._gpus = None
self._run_time = None
self._memory_limit = None
self._host = self._init_host(host=host)
Expand Down Expand Up @@ -230,6 +231,14 @@ def threads(self):
def threads(self, number_of_threads):
self._threads = number_of_threads

@property
def gpus(self):
return self._gpus

@gpus.setter
def gpus(self, number_of_gpus):
self._gpus = number_of_gpus

@property
def cores(self):
"""
Expand Down Expand Up @@ -447,6 +456,8 @@ def to_hdf(self, hdf, group_name=None):
hdf_dict["accept_crash"] = self.accept_crash
if len(self.additional_arguments) > 0:
hdf_dict["additional_arguments"] = self.additional_arguments
if self._gpus is not None:
hdf_dict["accept_crash"] = self._gpus

if group_name is not None:
with hdf.open(group_name) as hdf_group:
Expand Down Expand Up @@ -490,6 +501,8 @@ def from_hdf(self, hdf, group_name=None):
self._threads = hdf_dict["threads"]
if "additional_arguments" in hdf_dict.keys():
self.additional_arguments = hdf_dict["additional_arguments"]
if "gpus" in hdf_dict.keys():
self._gpus = hdf_dict["accept_crash"]
self._new_hdf = hdf_dict["new_h5"] == 1

def db_entry(self):
Expand Down
13 changes: 11 additions & 2 deletions pyiron_base/jobs/job/runfunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,15 @@ def run_job_with_status_created(job):
elif job.server.run_mode.srun:
run_job_with_runmode_srun(job=job)
elif job.server.run_mode.flux:
return run_job_with_runmode_flux(job=job, executor=job.flux_executor)
if job.server.gpus is not None:
gpus_per_slot = int(job.server.gpus/job.server.cores)
else:
gpus_per_slot = None
return run_job_with_runmode_flux(
job=job,
executor=job.flux_executor,
gpus_per_slot=gpus_per_slot,
)
elif (
job.server.run_mode.non_modal
or job.server.run_mode.thread
Expand Down Expand Up @@ -443,7 +451,7 @@ def run_job_with_runmode_srun(job):
)


def run_job_with_runmode_flux(job, executor):
def run_job_with_runmode_flux(job, executor, gpus_per_slot=None):
if not flux_available:
raise ModuleNotFoundError(
"No module named 'flux'. No linux you can install flux via conda."
Expand Down Expand Up @@ -480,6 +488,7 @@ def run_job_with_runmode_flux(job, executor):
script=exeuctable_str,
num_nodes=1,
cores_per_slot=1,
gpus_per_slot=gpus_per_slot,
num_slots=job.server.cores,
)
jobspec.cwd = job.project_hdf5.working_directory
Expand Down

0 comments on commit 4e11642

Please sign in to comment.