-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathpsom_run_pipeline.m
executable file
·423 lines (397 loc) · 16.8 KB
/
psom_run_pipeline.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
function [] = psom_run_pipeline(pipeline,opt)
% Run a pipeline using the Pipeline System for Octave and Matlab (PSOM).
%
% SYNTAX:
% [] = PSOM_RUN_PIPELINE(PIPELINE,OPT)
%
% _________________________________________________________________________
% INPUTS:
%
% PIPELINE
% (structure) a matlab structure which defines a pipeline.
% Each field name <JOB_NAME> will be used to name jobs of the
% pipeline. The fields <JOB_NAME> are themselves structure, with the
% following fields :
%
% COMMAND
% (string) the name of the command applied for this job.
% This command can use the variables FILES_IN, FILES_OUT and OPT
% associated with the job (see below).
% Examples :
% 'niak_brick_something(files_in,files_out,opt);'
% 'my_function(opt)'
%
% FILES_IN
% (string, cell of strings, structure whose terminal nodes are
% string or cell of strings)
% The files used as input by the command. Note that for properly
% handling dependencies, this field needs to contain the exact
% name of the file (full path, no wildcards, no '' for default
% values).
%
% FILES_OUT
% (string, cell of strings, structure whose terminal nodes are
% string or cell of strings) The list of files generated by the
% command. Note that for properly handling dependencies, this
% field needs to contain the exact name of the file
% (full path, no wildcards, no '' for default values).
%
% FILES_CLEAN
% (string, cell of strings, structure whose terminal nodes are
% string or cell of strings) The list of files deleted by the
% command. Note that for properly handling dependencies, this
% field needs to contain the exact name of the file
% (full path, no wildcards, no '' for default values).
%
% DEP
% (cell of strings) a list of job names. The job <JOB_NAME>
% will depend on these jobs.
%
% OPT
% (any matlab variable) options of the job. This field has no
% impact on dependencies. OPT can for example be a structure,
% where each field will be used as an argument of the command.
% The options will be scanned to check if a job has changed,
% should a pipeline be executed multiple times using the same
% logs folder.
%
% OPT
% (structure) with the following fields :
%
% PATH_LOGS
% (string) The folder where the "memory" of the pipeline
% manager will be stored. See the COMMENTS section below.
%
% MODE
% (string, default GB_PSOM_MODE defined in PSOM_GB_VARS)
% how to execute the jobs :
% 'session' : current Matlab session.
% 'background' : background execution, not-unlogin-proofed
% (asynchronous system call).
% 'batch' : background execution, unlogin-proofed ('at' in
% UNIX, start in WINDOWS).
% 'qsub' : remote execution using qsub (torque, SGE, PBS).
% 'msub' : remote execution using msub (MOAB)
% 'condor' : remote execution using condor
%
% MODE_PIPELINE_MANAGER
% (string, default GB_PSOM_MODE_PM defined in PSOM_GB_VARS)
% same as OPT.MODE, but applies to the pipeline manager itself.
%
% MAX_QUEUED
% (integer, default 1 'batch' modes, Inf in 'session', 'qsub',
% 'msub' and 'condor' modes)
% The maximum number of jobs that can be processed
% simultaneously. Some qsub systems actually put restrictions
% on that. Contact your local system administrator for more info.
%
% NB_RESUB
% (integer, default 0 in 'session', 'batch' and 'background' modes,
% 1 otherwise) The number of times a job will be resubmitted if it
% fails.
%
% SHELL_OPTIONS
% (string, default GB_PSOM_SHELL_OPTIONS defined in PSOM_GB_VARS)
% some commands that will be added at the begining of the shell
% script submitted to batch or qsub. This can be used to set
% important variables, or source an initialization script.
%
% QSUB_OPTIONS
% (string, GB_PSOM_QSUB_OPTIONS defined in PSOM_GB_VARS)
% This field can be used to pass any argument when submitting a
% job with qsub. For example, '-q all.q@yeatman,all.q@zeus' will
% force qsub to only use the yeatman and zeus workstations in the
% all.q queue. It can also be used to put restrictions on the
% minimum avalaible memory, etc.
%
% FLAG_SHORT_JOB_NAMES
% (boolean, default true) only the 8 first characters of a job
% name are used to submit to qsub/msub. Most qsub systems truncate
% the name of the job anyway, and some systems even refuse to
% submit jobs with long names.
%
% COMMAND_MATLAB
% (string, default GB_PSOM_COMMAND_MATLAB or
% GB_PSOM_COMMAND_OCTAVE depending on the current environment,
% defined in PSOM_GB_VARS)
% how to invoke matlab (or OCTAVE).
% You may want to update that to add the full path of the command.
% The defaut for this field can be set using the variable
% GB_PSOM_COMMAND_MATLAB/OCTAVE in the file PSOM_GB_VARS.
%
% INIT_MATLAB
% (string, GB_PSOM_INIT_MATLAB defined in PSOM_GB_VARS) a matlab
% command (multiple commands can actually be passed using comma
% separation) that will be executed at the begining of any
% matlab/Octave job.
%
% PATH_SEARCH
% (string, default GB_PSOM_PATH_SEARCH in the file PSOM_GB_VARS).
% If PATH_SEARCH is empty, the current path is used. If
% PATH_SEARCH equals 'gb_psom_omitted', then PSOM will not attempt
% to set the search path, i.e. the search path for every job will
% be the current search path in 'session' mode, and the default
% Octave/Matlab search path in the other modes.
%
% RESTART
% (cell of strings, default {}) any job whose name contains one
% of the strings in RESTART will be restarted
%
% TYPE_RESTART
% (string, default 'substring') defines how OPT.RESTART is to be
% interpreted. Available options:
% 'substring' : restart jobs whose name contains one of the
% string in OPT.RESTART
% 'exact' restart jobs whose name is listed in OPT.RESTART.
%
% There are actually other minor options available, see
% PSOM_PIPELINE_INIT and PSOM_PIPELINE_PROCESS for details.
%
% _________________________________________________________________________
% OUTPUTS:
%
% The pipeline manager is going to try to process the pipeline and create
% all the output files. In addition logs and parameters of the pipeline are
% stored in the log folder :
%
% PIPE.mat
%
% A .MAT file with the following variables:
%
% HISTORY
% A string recapituling when and who created the pipeline, (and
% on which machine).
%
% LIST_JOBS, FILES_IN, FILES_OUT, GRAPH_DEPS
% See PSOM_BUILD_DEPENDENCIES for more info.
%
% PIPE_history.txt
%
% A text file with the history of the pipeline. Basically, it keeps
% track of the time of submission, completion and failure of all jobs
% of the pipeline. If the pipeline is executed multiple times with
% the same log folders, the history file is keeping track of all
% sessions.
%
% PIPE_jobs.mat
%
% A .mat file which contains variables <NAME_JOB> where NAME_JOB is
% the name of any job in the pipeline, and is equal to the field
% PIPELINE.<NAME_JOB> for the lattest execution of this job in the
% pipeline.
%
% PIPE_logs.mat
%
% A .mat file which contains variables <NAME_JOB> where NAME_JOB is
% the name of any job in the pipeline. The variable <NAME_JOB> is a
% string which contains the log of the job. Jobs that have not been
% processed yet have an empty log.
%
% PIPE_news_feed.csv
%
% A comma-separated values (csv) file, with one line per job
% submission/completion/failure. This file is reset everytime the
% pipeline is started. Jobs that were already completed/failed before
% anything is processed are listed as such. This file is useful to
% monitor the activity of the pipeline manager for third-party
% software.
%
% PIPE_status.mat
%
% A .mat file which contains variables <NAME_JOB> where NAME_JOB is
% the name of any job in the pipeline. The variable <NAME_JOB> is a
% string which describes the current status of the job (either
% 'submitted', 'running', 'finished', 'failed', 'none').
%
% PIPE_profile.mat
%
% A .mat file which contains variables <NAME_JOB> where NAME_JOB is
% the name of any job in the pipeline. The variable <NAME_JOB> is a
% structure where each field is a profile variable fot the execution
% of the job.
%
% _________________________________________________________________________
% SEE ALSO:
% PSOM_DEMO_PIPELINE, PSOM_CONFIG, PSOM_PIPELINE_VISU,
% PSOM_PIPELINE_PROCESS, PSOM_PIPELINE_INIT
%
% _________________________________________________________________________
% COMMENTS:
%
% Empty file strings or strings equal to 'gb_niak_omitted' in the pipeline
% description are ignored in the dependency graph and checks for
% the existence of required files.
%
% If a pipeline is already running (a 'PIPE.lock' file could be found in
% the logs folder), a warning will be issued and the user may not restart
% the pipeline. To force a restart of the pipeline, the '.lock' file
% has to be manually deleted before, which will force the pipeline manager
% to stop running if it is still active before the pipeline can be
% restarted.
%
% If this is not the first time a pipeline is executed, the pipeline
% manager will check which jobs have been successfully completed, and will
% not restart these ones. If a job description has somehow been
% modified since a previous processing, this job and all its children will
% be restarted. For more details on this behavior, please read the
% documentation of PSOM_PIPELINE_INIT or run the pipeline demo in
% PSOM_DEMO_PIPELINE.
%
% Copyright (c) Pierre Bellec, Montreal Neurological Institute, 2008-2010.
% Departement d'informatique et de recherche operationnelle
% Centre de recherche de l'institut de Geriatrie de Montreal
% Universite de Montreal, 2011
% Maintainer : [email protected]
% See licensing information in the code.
% Keywords : pipeline
% Permission is hereby granted, free of charge, to any person obtaining a copy
% of this software and associated documentation files (the "Software"), to deal
% in the Software without restriction, including without limitation the rights
% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
% copies of the Software, and to permit persons to whom the Software is
% furnished to do so, subject to the following conditions:
%
% The above copyright notice and this permission notice shall be included in
% all copies or substantial portions of the Software.
%
% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
% THE SOFTWARE.
psom_gb_vars
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Setting up default values for inputs %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% SYNTAX
if ~exist('pipeline','var')||~exist('opt','var')
error('SYNTAX: [] = PSOM_RUN_PIPELINE(FILE_PIPELINE,OPT). Type ''help psom_run_pipeline'' for more info.')
end
%% Options
name_pipeline = 'PIPE';
gb_name_structure = 'opt';
gb_list_fields = {'flag_short_job_names' , 'nb_resub' , 'type_restart' , 'flag_pause' , 'init_matlab' , 'flag_update' , 'flag_debug' , 'path_search' , 'restart' , 'shell_options' , 'path_logs' , 'command_matlab' , 'flag_verbose' , 'mode' , 'mode_pipeline_manager' , 'max_queued' , 'qsub_options' , 'time_between_checks' , 'nb_checks_per_point' , 'time_cool_down' };
gb_list_defaults = {true , gb_psom_nb_resub , 'substring' , true , gb_psom_init_matlab , true , false , gb_psom_path_search , {} , gb_psom_shell_options , NaN , '' , true , gb_psom_mode , gb_psom_mode_pm , gb_psom_max_queued , gb_psom_qsub_options , [] , [] , [] };
psom_set_defaults
if ~strcmp(opt.path_logs(end),filesep)
opt.path_logs = [opt.path_logs filesep];
path_logs = opt.path_logs;
end
if isempty(path_search)
path_search = path;
opt.path_search = path_search;
end
if isempty(opt.command_matlab)
if strcmp(gb_psom_language,'matlab')
opt.command_matlab = gb_psom_command_matlab;
else
opt.command_matlab = gb_psom_command_octave;
end
end
if strcmp(opt.mode,'session')
opt.max_queued = 1;
max_queued = 1;
end
if max_queued == 0
switch opt.mode
case {'batch','background'}
if isempty(gb_psom_max_queued)
opt.max_queued = 1;
max_queued = 1;
else
opt.max_queued = gb_psom_max_queued;
max_queued = gb_psom_max_queued;
end
case {'session','qsub','msub','condor'}
if isempty(gb_psom_max_queued)
opt.max_queued = Inf;
max_queued = Inf;
else
opt.max_queued = gb_psom_max_queued;
max_queued = gb_psom_max_queued;
end
end % switch action
end % default of max_queued
if ~ismember(opt.mode,{'session','background','batch','qsub','msub','condor'})
error('%s is an unknown mode of pipeline execution. Sorry dude, I must quit ...',opt.mode);
end
switch opt.mode
case 'session'
if isempty(time_between_checks)
time_between_checks = 0;
end
if isempty(nb_checks_per_point)
nb_checks_per_point = Inf;
end
otherwise
if isempty(time_between_checks)
time_between_checks = 0;
end
if isempty(nb_checks_per_point)
nb_checks_per_point = 60;
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% The pipeline processing starts now %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Check for a 'lock' tag
file_pipe_running = cat(2,path_logs,filesep,name_pipeline,'.lock');
file_logs = cat(2,path_logs,filesep,name_pipeline,'_history.txt');
if exist(file_pipe_running,'file') % Is the pipeline running ?
fprintf('\nA lock file %s has been found on the pipeline !\nIf the pipeline crashed, press CTRL-C now, delete manually the lock and restart the pipeline.\nOtherwise press any key to monitor the current pipeline execution.\n\n',file_pipe_running)
pause
psom_pipeline_visu(path_logs,'monitor');
else
%% Initialize the logs folder
opt_init.path_logs = opt.path_logs;
opt_init.path_search = opt.path_search;
opt_init.command_matlab = opt.command_matlab;
opt_init.flag_verbose = opt.flag_verbose;
opt_init.restart = opt.restart;
opt_init.flag_update = opt.flag_update;
opt_init.flag_pause = opt.flag_pause;
opt_init.type_restart = opt.type_restart;
if flag_debug
opt_init
end
[tmp,flag_start] = psom_pipeline_init(pipeline,opt_init);
if ~flag_start
return
end
%% Run the pipeline manager
file_pipeline = cat(2,path_logs,filesep,name_pipeline,'.mat');
opt_proc.mode = opt.mode;
opt_proc.mode_pipeline_manager = opt.mode_pipeline_manager;
opt_proc.max_queued = opt.max_queued;
opt_proc.qsub_options = opt.qsub_options;
opt_proc.shell_options = shell_options;
opt_proc.command_matlab = opt.command_matlab;
opt_proc.time_between_checks = opt.time_between_checks;
opt_proc.nb_checks_per_point = opt.nb_checks_per_point;
opt_proc.flag_short_job_names = opt.flag_short_job_names;
opt_proc.flag_debug = opt.flag_debug;
opt_proc.flag_verbose = opt.flag_verbose;
opt_proc.init_matlab = opt.init_matlab;
opt_proc.nb_resub = opt.nb_resub;
if flag_debug
opt_proc
end
% Read the number of characters that are currently in the history
if flag_verbose&&~strcmp(opt.mode_pipeline_manager,'session')
hf = fopen(file_logs,'r');
if hf~=-1
str_logs = fread(hf,Inf,'uint8=>char')';
nb_chars = ftell(hf);
fclose(hf);
else
nb_chars = 0;
end
end
psom_pipeline_process(file_pipeline,opt_proc);
%% If not in session mode, monitor the output of the pipeline
if flag_verbose&&~strcmp(opt.mode_pipeline_manager,'session')
psom_pipeline_visu(path_logs,'monitor',nb_chars);
end
end