forked from owensgl/reformat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbed2genes.pl
79 lines (77 loc) · 1.83 KB
/
bed2genes.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/bin/perl
use warnings;
use strict;
#This turns the ubc annotation bed file into a more easily readable bed file.
my $first_line;
my @genes;
my %start;
my %stop;
my %chr;
my %product;
my %pos_gene;
my $genecount = 0;
while(<STDIN>){
chomp;
my @a = split(/\t/,$_);
if ($a[3] =~ m/mRNA/){
$genecount++;
if ($genecount % 5000 == 0){
print STDERR "Processed $genecount genes\n";
}
my $chr = $a[0];
my $start = $a[1];
my $end = $a[2];
my @info = split(/;/,$a[9]);
my $gene = $info[1];
$gene =~ s/Parent=//;
my $description = "NA";
foreach my $n (3..$#info){
if ($info[$n] =~ m/^product/){
$description = $info[$n];
$description =~ s/product=//;
$description =~ s/%3B/;/g;
$description =~ s/%2C/,/g;
}
}
push (@genes, $gene);
$start{$gene} = $start;
$stop{$gene} = $end;
$chr{$gene} = $chr;
$pos_gene{$chr.$start.$end} = $gene;
$product{$gene} = $description;
}
if ($a[3] =~ m/^match/){
my $chr = $a[0];
my $start = $a[1];
my $end = $a[2];
my $gene;
if ($pos_gene{$chr.$start.$end}){
$gene = $pos_gene{$chr.$start.$end};
}else{
next;
}
my @info = split(/;/,$a[9]);
my $description = "NA";
foreach my $n (3..$#info){
if ($info[$n] =~ m/^product/){
$description = $info[$n];
$description =~ s/product=//;
$description =~ s/%3B/;/g;
$description =~ s/%2C/,/g;
}
}
if ($description ne "NA"){
if ($description ne $product{$gene}){
$product{$gene} .= "/ $description";
}
}
}
}
foreach my $gene (@genes){
if ($first_line){
print "\n$gene\t$chr{$gene}\t$start{$gene}\t$stop{$gene}\t$product{$gene}";
}else{
print "$gene\t$chr{$gene}\t$start{$gene}\t$stop{$gene}\t$product{$gene}";
$first_line++;
}
}