This repository has been archived by the owner on Oct 20, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
restrictlib.py
157 lines (142 loc) · 5.46 KB
/
restrictlib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#python library of restriction enzymes
import re
import random
from Bio import Restriction
"""
'all_suppliers', 'buffers', 'catalyse', 'catalyze', 'charac', 'characteristic', 'compatible_end', 'compsite', 'cut_once', 'cut_twice', 'dna', 'elucidate', 'equischizomers', 'freq', 'frequency', 'fst3', 'fst5', 'inact_temp', 'is_3overhang', 'is_5overhang', 'is_ambiguous', 'is_blunt', 'is_comm', 'is_defined', 'is_equischizomer', 'is_isoschizomer', 'is_methylable', 'is_neoschizomer', 'is_palindromic', 'is_unknown', 'isoschizomers', 'mro', 'neoschizomers', 'opt_temp', 'overhang', 'ovhg', 'ovhgseq', 'results', 'scd3', 'scd5', 'search', 'site', 'size', 'substrat', 'suppl', 'supplier_list', 'suppliers'
"""
"""
Useful attributes:
. cut_once - True if the enzyme cut the sequence one time on each strand.
. cut_twice - True if the enzyme cut the sequence twice on each strand.
. elucidate - Return a representation of the site with the cut on the (+) strand
represented as '^' and the cut on the (-) strand as '_'.
. fst5 -> first 5' cut ((current strand) or None
. fst3 -> first 3' cut (complementary strand) or None
. is_palindromic - Return if the enzyme has a palindromic recoginition site.
. is_3overhang - True if the enzyme produces 3' overhang sticky end.
. is_5overhang - True if the enzyme produces 5' overhang sticky end.
. is_blunt - True if the enzyme produces blunt end.
. is_ambiguous - True if the sequence recognised and cut is ambiguous,
i.e. the recognition site is degenerated AND/OR the enzyme cut outside the site.
. is_defined - the recognition site is not degenerated AND the enzyme cut inside the site.
. is_unknown - True if the sequence is unknown, has not been characterised yet.
. overhang - Can be "3' overhang", "5' overhang", "blunt", "unknown".
. ovhgseq
. site -> recognition site
"""
#========================================
def check_for_good_ending(item):
#not used / not useful
if item.endswith("I"):
return True
if item.endswith("V"):
return True
if item.endswith("X"):
return True
if re.search("_mut[0-9]$", item):
return True
if re.search("_[0-9]$", item):
return True
return False
#========================================
def has_strict_sequence(enzyme_class):
m = re.search('^[ACGT]+$', enzyme_class.site)
if not m:
return False
return True
# not needed use enzyme_class.fst3 == 0:
if enzyme_class.fst3 == 0:
return False
m = re.search('^[ACGT_\^]+$', enzyme_class.elucidate())
if not m:
return False
return True
#========================================
def get_enzyme_list():
dir_result = dir(Restriction)
enzymes = []
for item in dir_result:
if not re.match("^[A-Z][a-z][a-z]", item):
#print(item)
continue
enzyme_class = enzyme_name_to_class(item)
if not hasattr(enzyme_class, 'site'):
#print(item)
continue
if enzyme_class.is_palindromic() is False:
#print("{0} - {1}".format(item, enzyme_class.site))
continue
if enzyme_class.cut_once() is False:
#print("{0} - {1}".format(item, enzyme_class.elucidate()))
continue
if enzyme_class.is_ambiguous() is True:
#print("{0} - {1}".format(item, enzyme_class.elucidate()))
continue
if enzyme_class.is_unknown() is True:
#print("{0} - {1}".format(item, enzyme_class.elucidate()))
continue
if enzyme_class.fst3 == 0:
#print("{0} - {1}".format(item, enzyme_class.elucidate()))
continue
if has_strict_sequence(enzyme_class) is False:
#print("{0} - {1}".format(item, enzyme_class.site))
continue
enzymes.append(item)
#print("Found {0:d} enzymes".format(len(enzymes)))
for enzyme in enzymes:
#print(enzyme)
pass
return enzymes
#========================================
def enzyme_name_to_class(enzyme_name):
enzyme_class = getattr(Restriction, enzyme_name)
return enzyme_class
#========================================
def random_enzyme(enzymes=None):
if enzymes is None:
enzymes = get_enzyme_list()
enzyme_name = random.choice(enzymes)
enzyme_class = enzyme_name_to_class(enzyme_name)
return enzyme_class
#========================================
def random_enzyme_one_end(enzymes=None, badletter="."):
if enzymes is None:
enzymes = get_enzyme_list()
enzyme_name = "x"
while (len(enzyme_name) != 4
or not enzyme_name.endswith("I")
or enzyme_name.startswith(badletter)):
enzyme_name = random.choice(enzymes)
enzyme_class = enzyme_name_to_class(enzyme_name)
return enzyme_class
#========================================
def random_enzyme_with_overhang(enzymes=None):
if enzymes is None:
enzymes = get_enzyme_list()
has_overhang = False
while has_overhang is False:
enzyme_name = random.choice(enzymes)
enzyme_class = enzyme_name_to_class(enzyme_name)
overhang = enzyme_class.overhang()
has_overhang = overhang.endswith('overhang')
return enzyme_class
#========================================
def format_enzyme(enzyme_class):
elucidate_str = enzyme_class.elucidate()
elucidate_str = elucidate_str.replace("^", "|")
elucidate_str = elucidate_str.replace("_", "")
final_str = "5'-{0}-3'".format(elucidate_str)
return final_str
#========================================
def html_monospace(txt):
return "<span style='font-family: 'andale mono', 'courier new', courier, monospace;'>{0}</span>".format(txt)
#========================================
#========================================
if __name__ == '__main__':
enzymes = get_enzyme_list()
print("Found {0:d} enzymes".format(len(enzymes)))
enzyme_class = random_enzyme_with_overhang(enzymes)
print(enzyme_class.__name__)
print(enzyme_class.site)
print(format_enzyme(enzyme_class))