-
Notifications
You must be signed in to change notification settings - Fork 0
/
genbank_to_table.py
executable file
·70 lines (50 loc) · 1.75 KB
/
genbank_to_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
import sys
import argparse
from gb_parsers.genbank_parsers import parse_genbank_features, parse_gb_feature_dict
# for user inputs
parser = argparse.ArgumentParser(description='create table from genbank information')
parser.add_argument('-i', '--input_filelist', dest='input',
type=str,
help='file with list of gb files (one per line)')
parser.add_argument('-o', '--table_file', dest='out',
type=str,
help='output tab-delimited file')
args = parser.parse_args()
gb_filelist = args.input
tab_file = args.out
### parse the genbank files, add info to dictionary
tableDict = {}
filelist = open(gb_filelist, 'r')
for line in filelist:
line = line.strip('\n')
# annotate each file
annotations = parse_genbank_features(line)
annots = annotations[0]
feats = annotations[1]
features = parse_gb_feature_dict(feats)
source_feats = features[0]
for k,v in annots.items():
tableDict.setdefault(k, {})['accession']=v['seq_name']
tableDict.setdefault(k, {})['description']=v['description']
for key,value in source_feats.items():
if 'country' in value:
tableDict.setdefault(key, {})['country']=value['country'][0]
if 'isolate' in value:
tableDict.setdefault(key, {})['isolate']=value['isolate'][0]
tableDict.setdefault(key, {})['organism']=value['organism'][0]
## output to file
output = open(tab_file, 'w')
## order of table:
tableOrder = ['organism','accession','country','isolate','description']
for item in tableOrder:
output.write(item+'\t')
output.write('\n')
for k,v in tableDict.items():
for para in tableOrder:
if para in v:
output.write(v[para]+'\t')
else:
output.write('\t')
output.write('\n')
output.close()