-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathNutVar2_snpEff_chunker.sh
executable file
·99 lines (57 loc) · 6.24 KB
/
NutVar2_snpEff_chunker.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# Define time and stamp it in every result directory
echo `ulimit -a`
Softwaredir=$1
# ~/Downloads/nutvar2-master
vcfinput=$2
# USER INPUT p.e. vep_example.vcf
output=$3
# data/final
chunk=$4
snpEFFdir=SOFTWARE/snpEff
VEPdir=SOFTWARE/ensembl-tools-release-75/scripts/variant_effect_predictor
bindir1=bin/shared
bindir2=bin/snpEff
bindir3=bin/VEP
datadir1=data/intermediate
datadir2=data/build_tables
datadir3=data/external
datadir4=data/final
mypwd=$(pwd)
# Open file and eliminate header lines. Here we should check in the input vcf has the appropriate fields. ISSUE.
cat $2 | perl -ne 'chomp;unless($_=~/^##/){$_=~s/^[Cc]hr//;print "$_\n";}' > ${datadir1}/vcfinput_$4.vcf
# Run the minimal representation script
perl $1/${bindir1}/2_Script_minimal_representation_vcf_7.0.pl ${datadir1}/vcfinput_$4.vcf $1/${datadir1}/vcfinput_mr_$4.vcf
# Ask the user whether she wants to run SnpEff, VEP or both #ISSUE
# NOTICE GRCh37.75 database for snpEff and the ENSEMBL version of genome 37.75 are installed through the install script
# Run SnpEff
echo "Runing Snpeff"
java -Xmx4g -jar $1/${snpEFFdir}/snpEff.jar eff -c $1/${snpEFFdir}/snpEff.config -v GRCh37.75 -lof -csvStats -nextProt -sequenceOntology $1/${datadir1}/vcfinput_mr_$4.vcf > $1/${datadir1}/vcfinput_mr_eff_$4.vcf
mv snpEff_genes.txt $1/${datadir1}/snpEff_genes.txt
mv snpEff_summary.csv $1/${datadir1}/snpEff_summary.csv
#Parsing the results of snpEff
# ISSUE There are two more scripts in this folder /bin/snpEff/ 24 and 25 ---> Erase them?
echo "Parsing Snpeff results"
perl $1/${bindir2}/24_snpEff_parser_def_minus_heather_2.0.pl ${datadir1}/vcfinput_mr_eff_$4.vcf ${datadir1}/out_snpeff_parsed_$4.txt
echo "snpEff done. Runing NUTVAR perl scripts for snpEff results"
# Here there is a chance to MPI as script 25 is the longest the others can take place untill 27 while 25 is executing
perl $1/${bindir1}/25_Downstream_frameshift_API_independent_5.0.pl ${datadir1}/out_snpeff_parsed_$4.txt ${datadir2}/CDS_genomic_coordinates_full_compresed.txt ${datadir1}/snpeff_derived_PTCS_API_independent_$4.txt
perl $1/${bindir1}/26_NEW_EXTRA_key_%_sequence_2.0.pl ${datadir1}/out_snpeff_parsed_$4.txt ${datadir2}/ENST_table_full_condensed.txt ${datadir1}/snpeff_percentage_$4.txt
perl $1/${bindir1}/27_key_NMD_5.0_3.0.pl ${datadir1}/out_snpeff_parsed_$4.txt ${datadir2}/NMD_table.txt ${datadir2}/ENST_table_full_condensed.txt ${datadir1}/snpeff_NMD_$4.txt
perl $1/${bindir1}/32_key_PROTEINS_8.0_GLOBAL_3.0.pl ${datadir1}/out_snpeff_parsed_$4.txt ${datadir2}/ENST_table_full_condensed.txt ${datadir2}/ALL_ISOFORMS_PROTEIN_table_full.txt ${datadir1}/snpeff_detailed_ProtAndSite_Pre_step_$4.txt
sort -k1,1 -k2,2 -k3,3 -k4,4 -k5,5 -k6,6 -k7,7 -k8,8 ${datadir1}/snpeff_detailed_ProtAndSite_Pre_step_$4.txt > ${datadir1}/snpeff_detailed_ProtAndSite_Pre_step_ordered_$4.txt
perl $1/${bindir1}/32_key_PROTEINS_8.0_GLOBAL_ParteII.pl ${datadir1}/snpeff_detailed_ProtAndSite_Pre_step_ordered_$4.txt ${datadir1}/snpeff_detailed_ProtAndSite_Post_step_$4.txt
perl $1/${bindir1}/32_key_PROTEINS_8.0_GLOBAL_3.0.pl ${datadir1}/out_snpeff_parsed_$4.txt ${datadir2}/ENST_table_full_condensed.txt ${datadir2}/ALL_ISOFORMS_DOMAIN_table_full.txt ${datadir1}/snpeff_DOMAINS_Pre_step_$4.txt
sort -k1,1 -k2,2 -k3,3 -k4,4 -k5,5 -k6,6 -k7,7 -k8,8 ${datadir1}/snpeff_DOMAINS_Pre_step_$4.txt > ${datadir1}/snpeff_DOMAINS_Pre_step_ordered_$4.txt
perl $1/${bindir1}/32_key_PROTEINS_8.0_GLOBAL_ParteII.pl ${datadir1}/snpeff_DOMAINS_Pre_step_ordered_$4.txt ${datadir1}/snpeff_DOMAINS_Post_step_$4.txt
#~
perl $1/${bindir1}/27_key_NMD_5.0_DERIVED_STOPS_2.0.pl ${datadir1}/snpeff_derived_PTCS_API_independent_$4.txt ${datadir1}/out_snpeff_parsed_$4.txt ${datadir2}/NMD_table.txt ${datadir2}/ENST_table_full_condensed.txt ${datadir1}/snpeff_derived_NMD_$4.txt
#~
perl $1/${bindir1}/38_global_feature_table_1_4_paralell.pl ${datadir1}/snpeff_NMD_$4.txt ${datadir1}/snpeff_derived_NMD_$4.txt ${datadir1}/snpeff_detailed_ProtAndSite_Post_step_$4.txt ${datadir1}/snpeff_DOMAINS_Post_step_$4.txt ${datadir1}/snpeff_percentage_$4.txt ${datadir1}/snpeff_first_table_$4.txt
perl $1/${bindir1}/40_tabla_PEJMAN_16_def_2.0.pl ${datadir1}/snpeff_first_table_$4.txt ${datadir1}/snpeff_NMD_$4.txt ${datadir1}/snpeff_derived_NMD_$4.txt ${datadir2}/gtf_output_ENST.txt ${datadir2}/gtf_output_ENSG.txt ${datadir2}/ENST_table_full_condensed.txt ${datadir3}/appris_principal_isoform_gencode_19_15_10_2014.txt ${datadir3}/Pervasive.txt ${datadir1}/Matrix_snpeff_$4.txt
#~
perl $1/${bindir1}/41_CCDS_collapser_3.0.pl ${datadir2}/gtf_tabladef_sorted_by_SYMBOL.txt ${datadir1}/snpeff_NMD_$4.txt ${datadir1}/snpeff_NMD_CCDS.txt ${datadir1}/snpeff_derived_NMD_$4.txt ${datadir1}/snpeff_derived_NMD_CCDS.txt ${datadir2}/gtf_output_ENSG.txt ${datadir2}/gtf_output_ENSG_CCDS.txt ${datadir2}/gtf_output_ENST.txt ${datadir2}/gtf_output_ENST_CCDS.txt ${datadir2}/ENST_table_full_condensed.txt ${datadir2}/ENST_table_full_condensed_CCDS.txt ${datadir3}/appris_principal_isoform_gencode_19_15_10_2014.txt ${datadir3}/appris_principal_isoform_gencode_19_15_10_2014_CCDS.txt ${datadir3}/Pervasive.txt ${datadir3}/Pervasive_CCDS.txt ${datadir1}/snpeff_first_table_$4.txt ${datadir1}/snpeff_first_table_CCDS_$4.txt
perl $1/${bindir1}/42_tabla_PEJMAN_15.0_version_paralel_4.0.pl ${datadir1}/snpeff_first_table_CCDS_$4.txt ${datadir1}/snpeff_NMD_CCDS.txt ${datadir1}/snpeff_derived_NMD_CCDS.txt ${datadir2}/gtf_output_ENST_CCDS.txt ${datadir2}/gtf_output_ENSG_CCDS.txt ${datadir2}/ENST_table_full_condensed_CCDS.txt ${datadir3}/appris_principal_isoform_gencode_19_15_10_2014_CCDS.txt ${datadir3}/Pervasive_CCDS.txt ${datadir1}/Matrix_snpeff_CCDS_$4.txt
mkdir ${datadir4}/
perl $1/${bindir1}/53BIS_Fuse_Matrix\&Gene_based.pl ${datadir1}/Matrix_snpeff_$4.txt ${datadir3}/pRDG2.txt ${datadir3}/Genes_AllInnateImmunity.txt ${datadir3}/Genes_Antiviral.txt ${datadir3}/Genes_ISGs.txt ${datadir3}/Genes_OMIMrecessive.txt ${datadir3}/RVIS2.txt ${datadir4}/Matrix_snpeff_added_gene_based_scores_$4.txt
perl $1/${bindir1}/53BIS_Fuse_Matrix\&Gene_based.pl ${datadir1}/Matrix_snpeff_CCDS_$4.txt ${datadir3}/pRDG2.txt ${datadir3}/Genes_AllInnateImmunity.txt ${datadir3}/Genes_Antiviral.txt ${datadir3}/Genes_ISGs.txt ${datadir3}/Genes_OMIMrecessive.txt ${datadir3}/RVIS2.txt ${datadir4}/Matrix_snpeff_CCDS_added_gene_based_scores_$4.txt
echo "snpEff data matrix generated $4"