Skip to content

Commit

Permalink
move Grid library to a proper place. And update to OO moose
Browse files Browse the repository at this point in the history
  • Loading branch information
Juke34 committed Oct 22, 2019
1 parent f861e32 commit 27806eb
Show file tree
Hide file tree
Showing 10 changed files with 490 additions and 712 deletions.
76 changes: 76 additions & 0 deletions annotation/BILS/Grid/BilsGridRunner.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package BilsGridRunner;

use strict;
use warnings;
use Carp;
use base qw (GridRunner); ## really just for aesthetics
use List::Util qw (shuffle);
use FindBin;

BEGIN {

## find the path to the LSF_perl_lib directory.
foreach my $dir (@INC) {
if (-d "$dir/LSF_perl_lib") {
push (@INC, "$dir/LSF_perl_lib");
last;
}
}
## find the path to the SLURM_perl_lib directory.
foreach my $dir (@INC) {
if (-d "$dir/SLURM_perl_lib") {
push (@INC, "$dir/SLURM_perl_lib");
last;
}
}
}

use Run_Bsub;
use Cwd;

####
sub run_on_grid {
my ($args) = @_ ;

my (@cmds, $scheduler, $queueu);
if( ! defined($args->{cmds}) ) {print "No command provided.\n";exit;} else{ @cmds = $args->{cmds}; }
if( ! defined($args->{scheduler})) {$scheduler="slurm"; print "Default scheduler used: $scheduler\n"} else{ $scheduler = $args->{scheduler}; }
if( ! defined($args->{queueu})) {$queueu=undef;} else{ $queueu = $args->{queueu}; }

@cmds = shuffle @cmds;

if($scheduler eq "slurm"){
&Run_Slurm::set_queue("normal");
&Run_Slurm::set_memory("4"); # 4 G of RAM
#&Run_Bsub::set_mount_test(cwd()); # only run on nodes with a verified mount
my @failed_cmds = &Run_Slurm::run(@cmds);
}
elsif($scheduler eq "lsf"){
&Run_Bsub::set_queue("normal");
&Run_Bsub::set_memory("4"); # 4 G of RAM
#&Run_Bsub::set_mount_test(cwd()); # only run on nodes with a verified mount
my @failed_cmds = &Run_Bsub::run(@cmds);
}
else{
print "Scheduler $scheduler not implemented yet. <slurm> and <lsf> are the only possible choice currently\n";
}

if (@failed_cmds) {

my $num_failed_cmds = scalar(@failed_cmds);

print STDERR "$num_failed_cmds commands failed during grid computing.\n";

return(&run_on_grid(@failed_cmds));

}
else {
print "All commands completed successfully on the computing grid.\n";
return(0);
}
}

####


1;
153 changes: 153 additions & 0 deletions annotation/BILS/Grid/Bsub.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package Bsub;

use strict;
use warnings;
use File::Basename;
use IPC::Cmd qw[can_run run];
use Carp;
use Moose;
use BILS::Grid::GridRunner;

extends 'GridRunner';

has scheduler => ('is' => 'rw', isa => 'Str', default => 'LSF');

# The BUILD method is called after an object is created.
# Here it is used to set all different folders used to store logging
sub BUILD {
can_run('bsub') or croak 'bsub command does not exist. Cannot run jobs!';
}


####
sub _submit_job {
my $self = shift;
my $num_cmds_launched = shift;

my $num_cmds = $self->{num_cmds};
my $cmds_per_node = $self->{cmds_per_node};
my $cmds_list_aref = $self->{cmds_list};

my $log_dir = $self->{log_dir};
my $retvals_dir = $self->{retvals_dir};
my $cmds_dir = $self->{cmds_dir};
my $monitor_dir = $self->{monitor_dir};

my $orig_num_cmds_launched = $num_cmds_launched;


my $shell_script = "$cmds_dir/J$$.S${num_cmds_launched}.sh";
open (my $fh, ">$shell_script") or die $!;
print $fh "#!/bin/sh\n\n";

$self->_write_minimal_environment($fh);

my $num_cmds_written = 0;

my $monitor_started = "$monitor_dir/$num_cmds_launched.started";
my $monitor_finished = "$monitor_dir/$num_cmds_launched.finished";

my @cmd_indices_prepped;

while ($num_cmds_launched < $num_cmds && $num_cmds_written < $cmds_per_node) {
my $next_cmd_index = $num_cmds_launched; #always one less than the current index
my $cmd_string = $cmds_list_aref->[ $next_cmd_index ];

push (@cmd_indices_prepped, $next_cmd_index);

my $retval_bin = int($next_cmd_index / $self->retval_bin_size());

my $retval_subdir = "$retvals_dir/$retval_bin";
unless (-d $retval_subdir) {
mkdir $retval_subdir or die "Error, cannot mkdir $retval_subdir";
}

print $fh "## Command index $next_cmd_index\n"
. "touch $monitor_started\n"
. "$cmd_string\n"
. 'echo $? >> ' . "$retval_subdir/entry_$next_cmd_index.ret\n\n";

$num_cmds_launched++;
$num_cmds_written++;
}

print $fh "\n"
. "rm -f $monitor_started\n"
. "touch $monitor_finished\n"
. "\n"
. "exit 0\n\n";


close $fh;
chmod (0775, $shell_script);

print "Submitting: $shell_script to bsub\n" if $self->verbose;


my $cmd = undef;
if($self->{queue}){
my $queue = $self->{queue};
my $cmd = "bsub -q $queue -e $shell_script.stderr -o $shell_script.stdout ";
}
else{
my $cmd = "bsub -e $shell_script.stderr -o $shell_script.stdout ";
}
if (my $memory = $self->{memory}) {
$cmd .= " -R \"rusage[mem=$memory]\" ";
}
#if (my $mount_test = $self->{mount_test}) {
# $cmd .= " -E \"/broad/tools/NoArch/pkgs/local/checkmount $mount_test && [ -e $mount_test ]\" ";
# }
if (my $group = $self->{group}) {
$cmd .= " -G $group ";
}

$cmd .= " $shell_script 2>&1 ";

my $job_id_text = `$cmd`;
# print STDERR "\n$job_id_text\n";

my $ret = $?;
if ($ret) {
print STDERR "BSUB failed to accept job: $cmd\n (ret $ret)\n";

unlink $shell_script; # cleanup, try again later

sleep(2*60); # sleep 2 minutes for now. Give the system time to recuperate if a problem exists
return ($orig_num_cmds_launched);

}

else {

$shell_script = basename($shell_script);
open (my $logdir_jobsfh, ">>$log_dir/job_ids.txt") or die "Error, cannot open file $log_dir/job_ids.txt";
## get the job ID and log it:
if ($job_id_text =~ /Job \<(\d+)\>/) {
my $job_id = $1;
print $logdir_jobsfh "$job_id\t$shell_script\n";
my $monitor_href = $self->{nodes_in_progress};
$monitor_href->{$monitor_finished} = $job_id;

$self->{job_id_to_cmd_indices}->{$job_id} = \@cmd_indices_prepped;
$self->{job_id_to_submission_time}->{$job_id} = time();

}
else {

die "Fatal error, couldn't extract Job ID from submission text: $job_id_text";

}
close $logdir_jobsfh;

# sleep($WAITTIME); # wait just a short while to give the system a few seconds to act on the submitted jobs.
return ($num_cmds_launched);
}

}


#no Moose;
#__PACKAGE__->meta->make_immutable;
;
1
Loading

0 comments on commit 27806eb

Please sign in to comment.