-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathinstallJDOnSLURM.py
285 lines (239 loc) · 11.2 KB
/
installJDOnSLURM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
#!/usr/bin/env python
"""
This script provides installation functions to launch a jupyter dask on SURF infrastructure (snellius or spider)
with ssh access to the remote supercomputer administered by SLURM. This does not configure or set-up dCache.
This script is automatically invoked from the runJupyterDaskOnSLURM script when install or uninstall flags are invoked.
USE:
The script can most easily be invoked through 'python runJupyterDaskOnSLURM.py --<your arg>'
Paths to the working directory and scripts on the remote host, as well as to the local connection configuration file can be specified below.
Configurable paths are:
config_path : path to config file that is created/updated. default is in local installation of JupyterDaskOnSLURM
repository. Can be adapted to user preferences
remoteWD : Working directory from which to submit batch job on remote host
remoteJDD : Clone directory of JupyterDaskOnSLURM
remoteScriptWD : Path to directory on remote host where job submission scripts are located
ATTENTION! When adding a job script on a plattform other than spider/snellius @SURF the user MUST
create a job script for the platform (this can be done by using the existing scripts as templates).
Expected naming convention is 'jupyter_dask_xxx.bsh', where xxx is the name of the platform.
mamba_URL : Download URL for mamba installation file
"""
from fabric import Connection
from runJupyterDaskOnSLURM import ssh_remote_executor
import subprocess
config_path = './config/platforms/platforms.ini'
remoteWD = '~'
remoteJDD = '~/JupyterDaskOnSLURM'
mamba_URL = 'https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh'
def check_copy(conn):
"""
Check if repository has been cloned already on remote host
:param conn: ssh connection object
:return folder_exists: Logical on whether the repository exists on remote host
"""
cmd = f"if test -d JupyterDaskOnSLURM; then echo 'True'; fi"
folder_exists = False
result = conn.run(cmd, hide=True)
folder_exists = result.stdout
return folder_exists
def copy_folder(conn, config_inputs):
"""
Copy repository from local to remote host
:param conn: ssh connection object
:return None:
"""
cmd = f"scp -i {config_inputs['keypath']} -r ../JupyterDaskOnSLURM {config_inputs['user']}@{config_inputs['host']}:~/. "
subprocess.run(cmd, shell=True, capture_output=True)
return None
def test_mamba(conn):
"""
Check if mamba is installed on remote host
:param conn: ssh connection object
:return mamba_exists: Logical on if mamba is installed on remote host
"""
cmd = "if test -f ~/mambaforge/bin/mamba ; then echo 'True'; fi"
mamba_exists = False
result = conn.run(cmd, hide=True)
mamba_exists = result.stdout
return mamba_exists
def install_mamba(conn):
"""
Install mamba on remote host
:param conn: ssh connection object
:return None:
"""
cmd = f"cd {remoteWD} && wget {mamba_URL} && chmod +x Mambaforge-Linux-x86_64.sh && ./Mambaforge-Linux-x86_64.sh && rm Mambaforge-Linux-x86_64.sh"
conn.run(cmd)
return None
def test_env(conn, envfile):
"""
Check whether the mamba environment is configured on remote host
:param conn: ssh connection object
:param envfile: Name of the yaml file containing the dependencies and environment name.
:return env_exists: Logical on whether environment exists on remote host
:return envname: Name of the expected environment
"""
cmd = f"cd {remoteJDD} && head -1 {envfile} && mamba env list"
result = conn.run(cmd, hide=True)
env_exists = False
index = result.stdout.find('\n')
envname = result.stdout[6:index]
if envname in result.stdout[index+1:]:
env_exists = True
return env_exists, envname
def create_env(conn, envfile):
"""
Create mamba environment from file on remote host
:param conn: ssh connection object
:param envfile: Name of the yaml file containing the dependencies and environment name.
:return None:
"""
cmd = f"cd {remoteJDD} && mamba env create -f {envfile}"
conn.run(cmd, hide=False)
return None
def check_jpconfig(conn):
"""
Check if jupyter has been configured on remote host
:param conn: ssh connection object
:return jpconfig_exists: Logical on whether jupyter is configured on remote host
"""
cmd = "if test -f ~/.jupyter/jupyter_server_config.py ; then echo 'True'; fi"
jpconfig_exists = False
result = conn.run(cmd, hide=True)
jpconfig_exists = result.stdout
return jpconfig_exists
def jpconfig(conn, envname):
"""
Configure jupyter on remote host
:param conn: ssh connection object
:param envname: Name of the expected environment
:return None:
"""
cmd = f'cd {remoteJDD} && mamba activate {envname} && jupyter server --generate-config && jupyter server password && chmod 400 ~/.jupyter/jupyter_server_config.py'
conn.run(cmd, hide=False)
return None
def check_daskconfig(conn):
"""
Check if dask has been configured on remote host
:param conn: ssh connection object
:param args: ArgumentParser return object containing command line arguments. included for optional specification of local port
:param platform: platform name
:return outfilename: name of slurm output file on remote host
"""
cmd = "if test -f ~/.config/dask/config.yml ; then echo 'True'; fi"
daskconfig_exists = False
result = conn.run(cmd, hide=True)
daskconfig_exists = result.stdout
return daskconfig_exists
def daskconfig(conn, platform):
"""
Submit batch job with jupyter server on remote host
:param conn: ssh connection object
:param args: ArgumentParser return object containing command line arguments. included for optional specification of local port
:param platform: platform name
:return outfilename: name of slurm output file on remote host
"""
cmd = f"cd {remoteJDD} && mkdir -p ~/.config/dask && cp -r config/dask/config_{platform}.yml ~/.config/dask/config.yml"
conn.run(cmd, hide=True)
return None
def install_JD(config_inputs, platform_name, envfile):
#Copy folder as needed
folder_exists = ssh_remote_executor(config_inputs, check_copy)
if not folder_exists:
print ('Cloning JupyterDaskonSLURM on remote host...')
ssh_remote_executor(config_inputs, copy_folder, config_inputs)
folder_exists = ssh_remote_executor(config_inputs, check_copy)
if not folder_exists:
raise ValueError(f'Error cloning repository. Check git credentials or copy manually')
#Install mamba as needed
mamba_exists = ssh_remote_executor(config_inputs, test_mamba)
if not mamba_exists:
print ('Installing mamba on remote host...')
ssh_remote_executor(config_inputs, install_mamba)
mamba_exists = ssh_remote_executor(config_inputs, test_mamba)
if not mamba_exists:
raise ValueError(f'Error installing mamba. Please install manually')
#Create environment
env_exists, envname = ssh_remote_executor(config_inputs, test_env, envfile)
if not env_exists:
print (f'Creating mamba environment from {envfile} on remote host...')
ssh_remote_executor(config_inputs, create_env, envfile)
env_exists = ssh_remote_executor(config_inputs, test_env, envfile)
if not env_exists:
raise ValueError(f'Error creating environment. Please create manually')
#Configure Jupyter
jpconfig_exists = ssh_remote_executor(config_inputs, check_jpconfig)
if not jpconfig_exists:
print ('Configuring Jupyter on remote host...')
ssh_remote_executor(config_inputs, jpconfig, envname)
jpconfig_exists = ssh_remote_executor(config_inputs, check_jpconfig)
if not jpconfig_exists:
raise ValueError(f'Error configuring jupyter. Please configure manually')
#Configure Dask
daskconfig_exists = ssh_remote_executor(config_inputs, check_daskconfig)
if not daskconfig_exists:
print ('Configuring Dask on remote host...')
ssh_remote_executor(config_inputs, daskconfig, platform_name)
daskconfig_exists = ssh_remote_executor(config_inputs, check_daskconfig)
if not daskconfig_exists:
raise ValueError(f'Error configuring dask. Please configure manually')
#Configure Dcache
dcache_config = input ('If you want to use dCache, it needs to be manually configured. Has dCache been configured? (Y/n): ') or 'Y'
if dcache_config in {'Y', 'y'}:
print ("dCache configured by User")
elif dcache_config in {'N', 'n'}:
print ("""
Please configure dCache as per the instructions in https://github.com/RS-DAT/JupyterDaskOnSLURM/blob/main/user-guide.md.
Please note that the deployable analysis environment does not require dCache to run scalable analyses.
""")
else:
raise ValueError('Chosen option invalid. Please retry.')
install = False
if (folder_exists and mamba_exists and env_exists and jpconfig_exists and daskconfig_exists):
install = True
return install
def remove_env(conn, envname):
"""
Submit batch job with jupyter server on remote host
:param conn: ssh connection object
:param args: ArgumentParser return object containing command line arguments. included for optional specification of local port
:param platform: platform name
:return outfilename: name of slurm output file on remote host
"""
cmd = f"cd {remoteWD} && mamba env remove -n {envname}"
conn.run(cmd, hide=False)
return None
def remove_files(conn):
"""
Submit batch job with jupyter server on remote host
:param conn: ssh connection object
:param args: ArgumentParser return object containing command line arguments. included for optional specification of local port
:param platform: platform name
:return outfilename: name of slurm output file on remote host
"""
cmd = f"cd {remoteWD} && rm -f ~/.config/dask/config.yml ~/.jupyter/jupyter_server_config.py ~/.jupyter/jupyter_server_config.json "
conn.run(cmd, hide=False)
return None
def remove_folders(conn):
"""
Submit batch job with jupyter server on remote host
:param conn: ssh connection object
:param args: ArgumentParser return object containing command line arguments. included for optional specification of local port
:param platform: platform name
:return outfilename: name of slurm output file on remote host
"""
cmd = f"cd {remoteWD} && rm -rf ~/JupyterDaskOnSLURM"
conn.run(cmd, hide=False)
return None
def uninstall_JD(config_inputs, platform_name, envfile = 'environment.yaml'):
print ('Uninstalling all components...')
folder_exists = ssh_remote_executor(config_inputs, check_clone)
if (folder_exists):
_, envname = ssh_remote_executor(config_inputs, test_env, envfile)
print ('Removing environment...')
ssh_remote_executor(config_inputs, remove_env, envname)
print ('Removing JupyterDaskOnSLURM...')
ssh_remote_executor(config_inputs, remove_folders)
print ('Removing config files...')
ssh_remote_executor(config_inputs, remove_files)
uninstall = True
return uninstall