Skip to content

Commit

Permalink
Add a REQUESTER_PROJECT variable to support Requester Pays buckets
Browse files Browse the repository at this point in the history
- Update software version to 202010.01
- Minor updates to the runner
  • Loading branch information
DonFreed committed Mar 26, 2021
1 parent 603a3f7 commit 8eeea7d
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 21 deletions.
28 changes: 14 additions & 14 deletions pipeline_scripts/gc_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ transfer()
src_file=$1
dst_file=$2
start_s=`date +%s`
gsutil cp "$src_file" "$dst_file"
gsutil ${REQUESTER_PROJECT:+-u $REQUESTER_PROJECT} cp "$src_file" "$dst_file"
check_error $? "Transfer $src_file to $dst_file"
end_s=`date +%s`
runtime=$(delta_time $start_s $end_s)
Expand All @@ -65,9 +65,9 @@ transfer_all_sites()
local_sites+=("$local_file")
local_str+=" -k \"$local_file\" "
# Index
if $(test -e "${src_file}".idx) || $(gsutil -q stat "${src_file}".idx); then
if $(test -e "${src_file}".idx) || $(gsutil ${REQUESTER_PROJECT:+-u $REQUESTER_PROJECT} -q stat "${src_file}".idx); then
idx="${src_file}".idx
elif $(test -e "${src_file}".tbi) || $(gsutil -q stat "${src_file}".tbi); then
elif $(test -e "${src_file}".tbi) || $(gsutil ${REQUESTER_PROJECT:+-u $REQUESTER_PROJECT} -q stat "${src_file}".tbi); then
idx="${src_file}".tbi
else
echo "Cannot find idx for $src_file"
Expand Down Expand Up @@ -134,7 +134,7 @@ gc_setup()
## Setup license information #
cred=$license_dir/credentials.json
project_file=$license_dir/credentials.json.project
python /opt/sentieon/gen_credentials.py ${EMAIL:+--email $EMAIL} $cred "$SENTIEON_KEY"
python3 /opt/sentieon/gen_credentials.py ${EMAIL:+--email $EMAIL} $cred "$SENTIEON_KEY"
sleep 10
if [[ -n $SENTIEON_KEY ]]; then
export SENTIEON_AUTH_MECH=proxy_GOOGLE
Expand Down Expand Up @@ -179,9 +179,9 @@ download_bams()
for bam in "${bams[@]}"; do
local_bam=$download_input_dir/$(basename "$bam")
transfer "$bam" "$local_bam"
if $(test -e "${bam}".bai) || $(gsutil -q stat "${bam}".bai); then
if $(test -e "${bam}".bai) || $(gsutil ${REQUESTER_PROJECT:+-u $REQUESTER_PROJECT} -q stat "${bam}".bai); then
bai="${bam}".bai
elif $(test -e "${bam%%.bam}".bai) || $(gsutil -q stat "${bam%%.bam}".bai); then
elif $(test -e "${bam%%.bam}".bai) || $(gsutil ${REQUESTER_PROJECT:+-u $REQUESTER_PROJECT} -q stat "${bam%%.bam}".bai); then
bai="${bam%%.bam}".bai
else
echo "Cannot find the index file for $bam"
Expand Down Expand Up @@ -216,20 +216,20 @@ download_reference()
ref=$ref_dir/$(basename "$REF")
transfer "$REF" "$ref"
transfer "${REF}".fai "${ref}".fai
if $(test -e "${REF}".dict) || $(gsutil -q stat "${REF}".dict); then
if $(test -e "${REF}".dict) || $(gsutil ${REQUESTER_PROJECT:+-u $REQUESTER_PROJECT} -q stat "${REF}".dict); then
transfer "${REF}".dict "${ref}".dict
elif $(test -e "${REF%%.fa}".dict) || $(gsutil -q stat "${REF%%.fa}".dict); then
elif $(test -e "${REF%%.fa}".dict) || $(gsutil ${REQUESTER_PROJECT:+-u $REQUESTER_PROJECT} -q stat "${REF%%.fa}".dict); then
transfer "${REF%%.fa}".dict "${ref%%.fa}".dict
elif $(test -e "${REF%%.fasta}".dict) || $(gsutil -q stat "${REF%%.fasta}".dict); then
elif $(test -e "${REF%%.fasta}".dict) || $(gsutil ${REQUESTER_PROJECT:+-u $REQUESTER_PROJECT} -q stat "${REF%%.fasta}".dict); then
transfer "${REF%%.fasta}".dict "${ref%%.fasta}".dict
else
echo "Cannot find reference dictionary"
exit 1
fi
if [[ -n "$FQ1" || -n "$TUMOR_FQ1" ]]; then
if $(test -e "${REF}".64.amb) || $(gsutil -q stat "${REF}".64.amb); then
if $(test -e "${REF}".64.amb) || $(gsutil ${REQUESTER_PROJECT:+-u $REQUESTER_PROJECT} -q stat "${REF}".64.amb); then
middle=".64"
elif $(test -e "${REF}".amb) || $(gsutil -q stat "${REF}".amb); then
elif $(test -e "${REF}".amb) || $(gsutil ${REQUESTER_PROJECT:+-u $REQUESTER_PROJECT} -q stat "${REF}".amb); then
middle=""
else
echo "Cannot file BWA index files"
Expand All @@ -240,7 +240,7 @@ download_reference()
transfer "${REF}"${middle}.bwt "${ref}"${middle}.bwt
transfer "${REF}"${middle}.pac "${ref}"${middle}.pac
transfer "${REF}"${middle}.sa "${ref}"${middle}.sa
if $(test -e "${REF}"${middle}.alt) || $(gsutil -q stat "${REF}"${middle}.alt); then
if $(test -e "${REF}"${middle}.alt) || $(gsutil ${REQUESTER_PROJECT:+-u $REQUESTER_PROJECT} -q stat "${REF}"${middle}.alt); then
transfer "${REF}"${middle}.alt "${ref}"${middle}.alt
fi
fi
Expand Down Expand Up @@ -275,9 +275,9 @@ bwa_mem_align()
readgroup=${fun_rgs[$i]}
bwa_cmd="$release_dir/bin/bwa mem ${fun_bwa_xargs} -R \"${readgroup}\" -t $nt \"$ref\" "
if [[ -n "$STREAM_INPUT" ]]; then
bwa_cmd="$bwa_cmd <(gsutil cp $fq1 -) "
bwa_cmd="$bwa_cmd <(gsutil ${REQUESTER_PROJECT:+-u $REQUESTER_PROJECT} cp $fq1 -) "
if [[ -n "$fq2" ]]; then
bwa_cmd="$bwa_cmd <(gsutil cp $fq2 -) "
bwa_cmd="$bwa_cmd <(gsutil ${REQUESTER_PROJECT:+-u $REQUESTER_PROJECT} cp $fq2 -) "
fi
else
local_fq1=$input_dir/$(basename "$fq1")
Expand Down
4 changes: 2 additions & 2 deletions pipeline_scripts/gc_germline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ environmental_variables=(FQ1 FQ2 BAM OUTPUT_BUCKET REF READGROUP DEDUP \
BQSR_SITES DBSNP INTERVAL INTERVAL_FILE NO_METRICS NO_BAM_OUTPUT \
NO_HAPLOTYPER GVCF_OUTPUT STREAM_INPUT PIPELINE OUTPUT_CRAM_FORMAT \
SENTIEON_KEY RECALIBRATED_OUTPUT EMAIL SENTIEON_VERSION CALLING_ARGS \
DNASCOPE_MODEL CALLING_ALGO)
DNASCOPE_MODEL CALLING_ALGO REQUESTER_PROJECT)
unset_none_variables ${environmental_variables[@]}
OUTPUT_CRAM_FORMAT="" # Not yet supported

readonly FQ1 FQ2 BAM OUTPUT_BUCKET REF READGROUP DEDUP BQSR_SITES DBSNP \
INTERVAL INTERVAL_FILE NO_METRICS NO_BAM_OUTPUT NO_HAPLOTYPER GVCF_OUTPUT \
STREAM_INPUT PIPELINE OUTPUT_CRAM_FORMAT SENTIEON_KEY RECALIBRATED_OUTPUT \
EMAIL SENTIEON_VERSION CALLING_ARGS DNASCOPE_MODEL CALLING_ALGO
EMAIL SENTIEON_VERSION CALLING_ARGS DNASCOPE_MODEL CALLING_ALGO REQUESTER_PROJECT

release_dir="/opt/sentieon/sentieon-genomics-${SENTIEON_VERSION}/"

Expand Down
2 changes: 1 addition & 1 deletion pipeline_scripts/gc_somatic.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ environmental_variables=(FQ1 FQ2 TUMOR_FQ1 TUMOR_FQ2 BAM TUMOR_BAM \
OUTPUT_BUCKET REF READGROUP TUMOR_READGROUP DEDUP BQSR_SITES DBSNP \
INTERVAL INTERVAL_FILE NO_METRICS NO_BAM_OUTPUT NO_VCF RUN_TNSNV \
STREAM_INPUT PIPELINE REALIGN_SITES OUTPUT_CRAM_FORMAT SENTIEON_KEY \
EMAIL SENTIEON_VERSION CALLING_ARGS CALLING_ALGO)
EMAIL SENTIEON_VERSION CALLING_ARGS CALLING_ALGO REQUESTER_PROJECT)
unset_none_variables ${environmental_variables[@]}
OUTPUT_CRAM_FORMAT="" # Not yet supported

Expand Down
3 changes: 3 additions & 0 deletions runner/ccdg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,6 @@ inputParameters:
- name: CALLING_ALGO
description: The variant calling algorithm to use
defaultValue: Haplotyper
- name: REQUESTER_PROJECT
description: The requester project to use for for gsutil requests on the remote server
defaultValue: None
3 changes: 3 additions & 0 deletions runner/germline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,6 @@ inputParameters:
- name: CALLING_ALGO
description: The variant calling algorithm to use
defaultValue: Haplotyper
- name: REQUESTER_PROJECT
description: The requester project to use for for gsutil requests on the remote server
defaultValue: None
3 changes: 2 additions & 1 deletion runner/runner_default.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"OUTPUT_BUCKET": null,
"REF": null,
"EMAIL": null,
"SENTIEON_VERSION": "201911",
"SENTIEON_VERSION": "202010.01",
"READGROUP": "@RG\\tID:read-group\\tSM:sample-name\\tPL:ILLUMINA",
"DEDUP": "rmdup",
"BQSR_SITES": null,
Expand Down Expand Up @@ -33,6 +33,7 @@
"MACHINE_TYPE": "n1-highcpu-64",
"CPU_PLATFORM": "Intel Broadwell",
"PROJECT_ID": null,
"REQUESTER_PROJECT": null,
"DOCKER_IMAGE": "sentieon/sentieon-google-cloud:0.2.3",
"CALLING_ARGS": null,
"CALLING_ALGO": "Haplotyper",
Expand Down
6 changes: 3 additions & 3 deletions runner/sentieon_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def main(
# Grab input arguments from the json file
try:
job_vars = json.load(open(default_json))
except json.decoder.JSONDecodeError as e:
except ValueError as e:
logging.error("Error reading the default json file: " + default_json)
raise e
job_vars.update(pipeline_config)
Expand Down Expand Up @@ -487,7 +487,7 @@ def main(
tries += 1
if not new_op:
logging.error(
"Network error while polling running " "operation."
"Network error while polling running operation."
)
sys.exit(1)
operation = new_op
Expand Down Expand Up @@ -647,7 +647,7 @@ def main(
sys.exit(-1)
try:
pipeline_config = json.load(open(args.pipeline_config))
except json.decoder.JSONDecodeError as e:
except ValueError as e:
logging.error("Error reading the json file: " + args.pipeline_config)
raise e

Expand Down
3 changes: 3 additions & 0 deletions runner/somatic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,6 @@ inputParameters:
- name: CALLING_ALGO
description: The variant calling algorithm to use
defaultValue: TNhaplotyper
- name: REQUESTER_PROJECT
description: The requester project to use for for gsutil requests on the remote server
defaultValue: None

0 comments on commit 8eeea7d

Please sign in to comment.