-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparse_blastp_1hits_VFDB.pl
71 lines (62 loc) · 2.39 KB
/
parse_blastp_1hits_VFDB.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/perl
use strict;
use Bio::SearchIO;
die "Usage: $0 output.blast > besthit.tab \n" if(@ARGV != 1);
my $file = shift @ARGV;
my $in = Bio::SearchIO->new(-format => 'blast',
-file => $file);
print "Query\tQuery_function\tSubject\tQueryLen\tSubjectLen\tAlignmentLen\tQuerycov \(\%\)\tSubjectcov \(\%\)\tE_value\tIdentity \(\%\)\tSimilarity \(\%\)\tSymbol\tFunction\tVirulence factor\tVFDB ID\tSource species\n";
my $count = 0;
my $number = 0;
my $symbol = '';
my $subjectdes = '';
my $virulence = '';
my $vfdb = '';
my $species = '';
while( my $r = $in->next_result ) { #read a result
$count = 0;
$number++;
if ($r->num_hits < 1){
print $r->query_name,"\t","No hit\n";
next;
}
while( my $h = $r->next_hit ) { #read a hit
#warn " Hit is ", $h->name, " Hit raw scores ", $h->raw_score, " Hit Len is ", $h->length, " aa\n";
#>gb|ADN32826.1| BUN24p41.8 [Bacteroides uniformis]
my $hitlen = $h->length;
while( my $hsp = $h->next_hsp ) {
my $subjectname = $h->name;
$subjectname =~ s/\(.*?\)//g;
my $querycover = ($hsp->length('query'))/$r->query_length * 100;
$querycover = sprintf("%.1f",$querycover);
my $subjectcover = ($hsp->length('hit'))/$h->length * 100;
$subjectcover = sprintf("%.1f",$subjectcover);
my $identity = $hsp->frac_identical('total');
$identity = sprintf("%.1f", $identity*100);
my $similarity = $hsp->frac_conserved('total');
$similarity = sprintf("%.1f", $similarity*100);
my $alignment = $hsp->length('total');
my $desc = $h->description;
#warn $r->query_name, "\t$desc\n";
#> VFG013503(gi:30995470) (lsgA) lipopolysaccharide biosynthesis
#transporter [LOS (CVF494)] [Haemophilus influenzae Rd KW20]
if ($desc =~ /^(\S+)\s+(.*?)\s*\[(.*?)\s+\((.*?)\)\]\s*\[(.*?)\]/){
$symbol = $1;
$subjectdes = $2;
$virulence = $3;
$vfdb = $4;
$species = $5;
$symbol =~ s/^\(//g;
$symbol =~ s/\)$//g;
if ($count < 1){
print $r->query_name,"\t", $r->query_description,"\t", $subjectname, "\t",$r->query_length,"\t",$h->length,"\t",$alignment,"\t",
$querycover,"\t",$subjectcover,"\t",$hsp->evalue,"\t",$identity,"\t", $similarity, "\t",
$symbol,"\t",$subjectdes, "\t", $virulence, "\t", $vfdb, "\t", $species, "\n";
$count++;
}
}
last;
}
}
}
#warn "$number\n";