-
Notifications
You must be signed in to change notification settings - Fork 0
/
fetch_nucleotides_from_alignments.pl
79 lines (69 loc) · 1.61 KB
/
fetch_nucleotides_from_alignments.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
sub usage{
print <<USAGE;
Name:
$0
Description:
get CDS region from a fasta format file.
Update:
2022-01-28 edit by Xiao-wen Hu
Usage:
perl $0 <f> <r> [options]
Options:
-r, --regions <string>* the region of CDS, start-end
-f, --fasta <string>* reference fasta file
-o, --outfile <string> output file [default: ./output ]
help print this help information
e.g
perl fetch_nucleotides_from_alignments.pl -f global.fas -r 266-1346,13468-21552 -o global_CDS.fas
USAGE
exit 0;
}
my ($help,$regions,$outfile,$fasta);
GetOptions(
"fasta|f=s" => \$fasta,
"region|r=s" => \$regions,
"outfile|o:s" => \$outfile,
"help|?" => \$help
);
die &usage if (!defined $regions || !defined $fasta || defined $help);
$outfile ||= "./output";
open FA,$fasta || die "Can't open the fasta file $!\n";
open(OUT, ">$outfile");
my @region=split/,/,$regions;
my $info = "";
my $seq = "";
while (<FA>){
s/\s+$//;
if(/^(>.*)/){
if($info){
&out_fasta($info, $seq);
$info = $1;
$seq = "";
}else{
$info = $1;
}
}else{
$seq .= $_;
}
if(eof){
&out_fasta($info, $seq);
}
}
close(FA);
sub out_fasta{
my ($info, $seq) = @_;
my $cds_seq = "";
foreach(@region){
s/\s+$//;
my ($start, $end)=split/-/;
my $split_len = $end - $start + 1;
my $began= $start - 1 ;
my $subseq = substr($seq, $began, $split_len);
$cds_seq .= $subseq;
}
print OUT $info."\n".$cds_seq."\n";
}