forked from caixu0518/ITIPs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02.get_TE_insertions_and_flankingSequences.pl
48 lines (30 loc) · 1.74 KB
/
02.get_TE_insertions_and_flankingSequences.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/perl
use warnings;
use strict;
use Getopt::Long;
#--Usage-----------------------------------------
my $usage=<<USAGE;
****** extract flanking seuqences of each TE insertion ******
Usage: perl $0 -refGenome <ref.fa> -refName <ref> -script <the path to scripts> reference_TE.insertions.xls non-reference_TE.insertions.xls
-refGenome [required] the reference genome in fasta foramt.
-refName [required] the reference genome name, same as provided in the first step.
-script [required] the path to perl scripts
Author: Xu Cai
Bug report: caixu0518\@163.com
USAGE
my ($in0, $in1, $script);
GetOptions(
"refGenome:s" =>\$in0,
"refName:s" =>\$in1,
"script:s" =>\$script,
);
die $usage if (!defined $in0 || !defined $in1 || ! defined $script);
die "the results generated by the first step should be checked !!!!\n\n", if(not -e "reference_TE.insertions.xls" || not -e "non-reference_TE.insertions.xls");
my $INSFile = $in1.".merged.INS.gz.withMorethan80_TE_cov";
die "!!! cannot not find the $in1.merged.INS.gz.withMorethan80_TE_cov file, the results generated by the first step should be checked !!!!\n\n", if(not -e $INSFile);
my $cmdString = "perl $script/get_referenceTEinsertions_and_flanking_Seqs.pl $in0 reference_TE.insertions.xls $in1";
print STDERR (localtime) . ": CMD: $cmdString\n";
system("$cmdString") == 0 or die "failed to execute: $cmdString\n";
$cmdString = "perl $script/get_nonReferenceTEinsertions_and_flanking_Seqs.pl $in0 non-reference_TE.insertions.xls $INSFile $in1";
print STDERR (localtime) . ": CMD: $cmdString\n";
system("$cmdString") == 0 or die "failed to execute: $cmdString\n";