Skip to content

Commit

Permalink
Disable staging script for remote work dir (#4282)
Browse files Browse the repository at this point in the history
This commit disables the use of separate stage file for executors using 
a non-default system, e.g. object storage as work directory (including Fusion 
file system). The generation of stage file is still used for local and HPC batch 
schedulers. 

See #4279

Signed-off-by: Ben Sherman <[email protected]>
Signed-off-by: Paolo Di Tommaso <[email protected]>
Co-authored-by: Paolo Di Tommaso <[email protected]>
  • Loading branch information
bentsherman and pditommaso authored Sep 10, 2023
1 parent bb96763 commit 80f7cd4
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,9 @@ class BashWrapperBuilder {
return null

final header = "# stage input files\n"
if( stagingScript.size() >= stageFileThreshold.bytes ) {
// enable only when the stage uses the default file system, i.e. it's not a remote object storage file
// see https://github.com/nextflow-io/nextflow/issues/4279
if( stageFile.fileSystem == FileSystems.default && stagingScript.size() >= stageFileThreshold.bytes ) {
stageScript = stagingScript
return header + "bash ${stageFile}"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@

package nextflow.cloud.aws.batch

import java.nio.file.FileSystems
import java.nio.file.Files
import java.nio.file.Paths

import nextflow.Session
import nextflow.SysEnv
import nextflow.cloud.aws.config.AwsConfig
import nextflow.cloud.aws.util.S3PathFactory
import nextflow.processor.TaskBean
import nextflow.util.Duration
import spock.lang.Specification
Expand Down Expand Up @@ -631,4 +634,44 @@ class AwsBatchScriptLauncherTest extends Specification {

}

def 'should not create separate stage script' () {
given:
SysEnv.push([NXF_WRAPPER_STAGE_FILE_THRESHOLD: '100'])
and:
def workDir = S3PathFactory.parse('s3://my-bucket/work')
and:
def inputFiles = [
'sample_1.fq': Paths.get('/my-bucket/data/sample_1.fq'),
'sample_2.fq': Paths.get('/my-bucket/data/sample_2.fq'),
]
def stageScript = '''\
# stage input files
downloads=(true)
rm -f sample_1.fq
rm -f sample_2.fq
rm -f .command.sh
downloads+=("nxf_s3_download s3://my-bucket/data/sample_1.fq sample_1.fq")
downloads+=("nxf_s3_download s3://my-bucket/data/sample_2.fq sample_2.fq")
downloads+=("nxf_s3_download s3://my-bucket/work/.command.sh .command.sh")
nxf_parallel "${downloads[@]}"
'''.stripIndent()
and:
def bean = [
workDir: workDir,
targetDir: workDir,
inputFiles: inputFiles,
script: 'echo Hello world!'
] as TaskBean
def opts = new AwsOptions()
def builder = new AwsBatchScriptLauncher(bean, opts)

when:
def binding = builder.makeBinding()
then:
binding.stage_inputs == stageScript

cleanup:
SysEnv.pop()
}

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package nextflow.cloud.azure.batch

import java.nio.file.FileSystem

import java.nio.file.Path
import java.nio.file.Paths
import java.nio.file.attribute.BasicFileAttributes
Expand All @@ -9,6 +9,7 @@ import java.nio.file.spi.FileSystemProvider
import com.azure.storage.blob.BlobClient
import nextflow.Session
import nextflow.cloud.azure.config.AzConfig
import nextflow.cloud.azure.nio.AzFileSystem
import nextflow.cloud.azure.nio.AzPath
import nextflow.processor.TaskBean
import spock.lang.Specification
Expand All @@ -34,7 +35,7 @@ class AzFileCopyStrategyTest extends Specification {
provider.getScheme() >> 'az'
provider.readAttributes(_, _, _) >> attr

def fs = Mock(FileSystem)
def fs = Mock(AzFileSystem)
fs.provider() >> provider
fs.toString() >> ('az://' + bucket)
def uri = GroovyMock(URI)
Expand Down

0 comments on commit 80f7cd4

Please sign in to comment.