Skip to content

Commit

Permalink
modified: .gitignore
Browse files Browse the repository at this point in the history
	renamed:    release/bin/code -> release/bin/ssh_code
	renamed:    release/bin/server -> release/bin/ssh_server
	modified:   release/install.sh
	modified:   src/slurm.py
  • Loading branch information
l1997i committed Apr 30, 2024
1 parent a97839e commit 899c225
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 50 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,4 @@ dmypy.json
.pyre/

.pytorch
.vscode/
File renamed without changes.
File renamed without changes.
7 changes: 4 additions & 3 deletions release/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@ sed -i "s|/home2/mznv82|$escaped_user_home|g" config.json

# Set up the local bin directory and copy executables
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
DIR_BIN="$DIR/bin"
mkdir -p ~/.local/bin
cp -r bin/* ~/.local/bin
chmod +x hpc_helper
chmod +x ~/.local/bin/code
chmod +x ~/.local/bin/server
echo "export PATH=\"$DIR:\$PATH\"" >> ~/.bashrc
chmod +x ~/.local/bin/shh_code
chmod +x ~/.local/bin/shh_server
echo "export PATH=\"$DIR_BIN:$DIR:\$PATH\"" >> ~/.bashrc

# Install stage 2 PyTorch environment
mkdir -p .pytorch
Expand Down
82 changes: 35 additions & 47 deletions src/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,30 +225,6 @@ def __init__(self):
if os.path.exists(os.path.join(HPC_GUI_PATH, 'data/jobs.json')):
with open(os.path.join(HPC_GUI_PATH, 'data/jobs.json'), 'r') as f:
self.jobs = json.load(f)
# command = 'squeue -u mznv82 -o "%.18i %.9P %.8j %.8u %.2t %.10M %.6D %R"'
# result = cli(command)

# lines = result.strip().split('\n')
# jobs_data = []
# for line in lines:
# fields = line.split()
# if len(fields) == 8:
# job_dict = {
# "id": fields[0],
# "partition": fields[1],
# "name": fields[2],
# "user": fields[3],
# "state": fields[4],
# "time": fields[5],
# "nodes": fields[6],
# "nodelist": fields[7]
# }

# if job_dict["id"] != "JOBID":
# jobs_data.append(job_dict)

# jobs_data = {d['id']: d for d in jobs_data}
# self.jobs = jobs_data

else:
self.jobs = {}
Expand All @@ -260,33 +236,45 @@ def Loop(self):
self.Update()
time.sleep(2)

def update_job_outputs(self):
for id in list(outputs.keys()): # Use a list to avoid RuntimeError
if self.jobs[id]['state'] in ['R', 'PD']:
self.update_output(id)

def update_job_states(self):
sacct_output = cli('squeue -u $(whoami)')
for id in list(self.jobs.keys()):
is_id_in_line = False
for line in sacct_output.split("\n"):
if id in line:
self.update_job_details(line, id)
is_id_in_line = True
break
if not is_id_in_line:
self.jobs[id]['state'] = 'CP' # Consider marking as COMPLETED
self.update_output(id) # Update output after state check

def update_job_details(self, line, id):
columns = line.split()
self.jobs[id]['node'] = columns[-1]
self.jobs[id]['state'] = columns[4]
if self.jobs[id]['node']:
self.fetch_process_ids(id)

def fetch_process_ids(self, id):
node = self.jobs[id]['node']
pid_1 = cli(f"ssh {node} \"pgrep -f '^{id}_1'\"")
pid_2 = cli(f"ssh {node} \"pgrep -f '^{id}_2'\"")
self.jobs[id]['pid_1'] = pid_1
self.jobs[id]['pid_2'] = pid_2

def Update(self):
sinfo = cli('sinfo')
sacct_all = cli('sacct')
sacct = cli('squeue -u $(whoami)')
# sacct_all = cli('sacct')
self.update_job_outputs()
self.update_job_states()

for id in outputs.keys():
if self.jobs[id]['state'] == 'R' or self.jobs[id]['state'] == 'PD':
self.UpdateOutput(id)

for id in self.jobs.keys():
if self.jobs[id]['state'] == 'R' or self.jobs[id]['state'] == 'PD':

is_id_in_line = 0 # NOT in Lines
for line in sacct.split("\n"):
if id in line:
columns = line.split()
self.jobs[id]['node'] = columns[-1]
self.jobs[id]['state'] = columns[4]
if self.jobs[id]['node']:
pid_1 = cli(f"ssh {self.jobs[id]['node']} \"pgrep -f '^{id}_1'\"")
pid_2 = cli(f"ssh {self.jobs[id]['node']} \"pgrep -f '^{id}_2'\"")
self.jobs[id]['pid_1'] = pid_1
self.jobs[id]['pid_2'] = pid_2
is_id_in_line = 1
if not is_id_in_line:
self.jobs[id]['state'] = 'CP' # CP for COMPLETED
self.UpdateOutput(id)

self.update_content = {
'html': # Update html content
Expand Down

0 comments on commit 899c225

Please sign in to comment.