diff --git a/spearmint/main.py b/spearmint/main.py index 052bea6..86a6a52 100755 --- a/spearmint/main.py +++ b/spearmint/main.py @@ -182,6 +182,7 @@ # to enter into this License and Terms of Use on behalf of itself and # its Institution. + import sys import optparse import importlib @@ -273,6 +274,7 @@ def main(): # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here + remove_broken_jobs(db, jobs, experiment_name, resources) while resource.acceptingJobs(jobs): # Load jobs from DB diff --git a/spearmint/schedulers/local.py b/spearmint/schedulers/local.py index 04072d9..68ca972 100755 --- a/spearmint/schedulers/local.py +++ b/spearmint/schedulers/local.py @@ -187,6 +187,7 @@ import os import subprocess import sys +import psutil def init(*args, **kwargs): return LocalScheduler(*args, **kwargs) @@ -228,12 +229,7 @@ def submit(self, job_id, experiment_name, experiment_dir, database_address): def alive(self, process_id): - try: - # Send an alive signal to proc (note this could kill it in windows) - os.kill(process_id, 0) - except OSError: - # Job is no longer running. - return False - else: - return True + # todo: not correct to check pid b/c it could be taken by + # another program . + return psutil.pid_exists(process_id)