-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathr2c_to_r2g.pl
executable file
·77 lines (59 loc) · 1.53 KB
/
r2c_to_r2g.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env perl
=head1 Author
Dinghua Li <[email protected]>
=head1 Usage
r2c_to_r2g.pl [options] <read2contigs.lsam> <contigs.lsam>
=head1 Options
-t score threshold [40]
=cut
use warnings;
use strict;
use Getopt::Long;
my %contig2gid;
my $scoreT = 40;
GetOptions(
"t=f" => \$scoreT
);
unless (@ARGV == 2) {
die `pod2text $0`;
}
my $lsam = $ARGV[0];
my $ctglsam = $ARGV[1];
my $in_ctg;
if ($ctglsam eq "-") {
$in_ctg = *STDIN;
} elsif ($ctglsam =~ /^(\S+)\.gz/) {
open($in_ctg, "gzip -cd $ctglsam |") or die "cannot open $ctglsam";
} else {
open($in_ctg, "<", "$ctglsam") or die "cannot open $ctglsam";
}
while (<$in_ctg>) {
chomp;
my ($name, undef, undef, undef, undef, $labels, @opts) = split "\t";
next unless ($name =~ /^contig_(\S+)/);
$contig2gid{$1} = $labels;
}
close($in_ctg) unless $ctglsam eq "-";
my $in;
if ($lsam eq "-") {
$in = *STDIN;
} elsif ($lsam =~ /^(\S+)\.gz/) {
open($in, "gzip -cd $lsam |") or die "cannot open $lsam";
} else {
open($in, "<", "$lsam") or die "cannot open $lsam";
}
while (<$in>) {
my ($name, $flag, $score, $seq, $qual, $ctgs, @opts) = split /\s/;
next if grep(/^IGNORE$/, @opts);
my @contigs = split ";", $ctgs;
my @gids;
foreach my $contig (@contigs) {
next if ($contig eq "*");
my ($score, $ctg) = split ",", $contig;
push(@gids, $contig2gid{$ctg}) if $score > $scoreT and exists $contig2gid{$ctg};
}
my $gidTag = "*";
$gidTag = join(";", @gids) if scalar(@gids) > 0;
print join("\t", $name, $flag, $score, "*", "*", $gidTag, @opts)."\n";
}
close($in) unless $lsam eq "-";