forked from fanagislab/EndHiC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinearize_GFA.pl
executable file
·124 lines (90 loc) · 2.24 KB
/
linearize_GFA.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/perl
=head1 Name
linear_GFA.pl -- linearize the GFA file
=head1 Description
Break the most lowest link in a circular cluster
=head1 Version
Author: Fan Wei, [email protected]
Version: 1.0, Date: 2006-12-6
Note:
=head1 Usage
linearize_GFA.pl <gfa_file> <gfa_cluster_file>
--verbose output running progress information to screen
--help output help information to screen
=head1 Exmple
linearize_GFA.pl gfa_file gfa_cluster_file > linearized_gfa_file
=cut
use strict;
use Getopt::Long;
use FindBin qw($Bin $Script);
use File::Basename qw(basename dirname);
use Data::Dumper;
##get options from command line into variables and set default values
my ($Verbose,$Help);
GetOptions(
"verbose"=>\$Verbose,
"help"=>\$Help
);
die `pod2text $0` if (@ARGV == 0 || $Help);
my $gfa_file = shift;
my $gfa_cluster_file = shift;
my %LinkData;
my %DeletedLines;
open IN, $gfa_file || die "fail open $gfa_file";
while (<IN>) {
my $line = $_;
if (/^L/) {
#L ptg000002l + ptg000026l + 0M L1:i:0 RM:i:1
my @t = split /\s+/;
my $type = $t[0];
my $ctg1 = $t[1];
my $ctg1_strand = $t[2];
my $ctg2 = $t[3];
my $ctg2_strand = $t[4];
my $match = $t[5];
my $L1 = $t[6];
my $RM = $t[7];
my $contact = $1 if($t[8] =~ /Contact:i:(\d+)/);
$LinkData{$line} = $contact;
}
}
close IN;
open IN, $gfa_cluster_file || die "fail open $gfa_cluster_file";
while (<IN>) {
my @t = split /\s+/;
my $cluster_id = shift @t;
my $ctgs_num = shift @t;
my $type = pop @t;
my @ctgs = @t;
if ($type =~ /Circular/) {
#print $cluster_id."\n";
my %LinesInCluster;
foreach my $ctg (@ctgs) {
foreach my $line (keys %LinkData) {
my $contact = $LinkData{$line};
if ($line =~ /$ctg/) {
$LinesInCluster{$line} = $contact;
}
}
}
my $min_contact = 1000000000;
my $min_line;
foreach my $line (keys %LinesInCluster) {
my $contact = $LinesInCluster{$line};
if ($contact < $min_contact) {
$min_contact = $contact;
$min_line = $line;
}
}
#print $min_line."\n";
$DeletedLines{$min_line} = 1;
}
}
close IN;
open IN, $gfa_file || die "fail open $gfa_file";
while (<IN>) {
my $line = $_;
if (! exists $DeletedLines{$line}) {
print $line;
}
}