diff --git a/.idea/deployment.xml b/.idea/deployment.xml new file mode 100644 index 0000000..7a6a01f --- /dev/null +++ b/.idea/deployment.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..28a804d --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..115a47a --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,316 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + DEFINITION_ORDER + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1570932161168 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/project01/Digraph.gv b/project01/Digraph.gv new file mode 100644 index 0000000..79d40a4 --- /dev/null +++ b/project01/Digraph.gv @@ -0,0 +1,36 @@ +digraph { + graph [rankdir=LR] + "" [color=white shape=circle] + node4 [shape=doublecircle] + node5 [shape=doublecircle] + node13 [shape=doublecircle] + node14 [shape=doublecircle] + node1 [shape=circle] + node1 -> node2 [label=a] + node2 [shape=circle] + node2 -> node5 [label="ε"] + node3 [shape=circle] + node3 -> node4 [label=a] + node4 -> node3 [label="ε"] + node5 -> node3 [label="ε"] + node6 [shape=circle] + node6 -> node7 [label=a] + node7 [shape=circle] + node7 -> node8 [label="ε"] + node8 [shape=circle] + node8 -> node9 [label=b] + node9 [shape=circle] + node9 -> node14 [label="ε"] + node10 [shape=circle] + node10 -> node11 [label=a] + node11 [shape=circle] + node11 -> node12 [label="ε"] + node12 [shape=circle] + node12 -> node13 [label=b] + node13 -> node10 [label="ε"] + node14 -> node10 [label="ε"] + node15 [shape=circle] + node15 -> node1 [label="ε"] + node15 -> node6 [label="ε"] + "" -> node15 +} diff --git a/project01/Digraph.gv.pdf b/project01/Digraph.gv.pdf new file mode 100644 index 0000000..cbd4deb Binary files /dev/null and b/project01/Digraph.gv.pdf differ diff --git a/project01/__pycache__/nfa.cpython-36.pyc b/project01/__pycache__/nfa.cpython-36.pyc new file mode 100644 index 0000000..7f311c5 Binary files /dev/null and b/project01/__pycache__/nfa.cpython-36.pyc differ diff --git a/project01/__pycache__/regular_expression.cpython-36.pyc b/project01/__pycache__/regular_expression.cpython-36.pyc new file mode 100644 index 0000000..65dfeee Binary files /dev/null and b/project01/__pycache__/regular_expression.cpython-36.pyc differ diff --git a/project01/__pycache__/test_REtoNFA.cpython-36.pyc b/project01/__pycache__/test_REtoNFA.cpython-36.pyc new file mode 100644 index 0000000..5bae2b8 Binary files /dev/null and b/project01/__pycache__/test_REtoNFA.cpython-36.pyc differ diff --git a/project01/ip.txt b/project01/ip.txt index b0bb7df..5864bab 100644 --- a/project01/ip.txt +++ b/project01/ip.txt @@ -1,8 +1,13 @@ +292.25.1.9 +192.168.56.256 +1.1.1.256 192.168.1.1 -192. -28. -1. -192.168.1 -255.255.255.255 -64.11.5 -5. \ No newline at end of file +19.23.35.6 +123.55.2.3 +198.162.2.3 +1.1.1.1 +0.2.1.6 +255.255.255.256 +32.266.21.2 +899.23.2.2 + diff --git a/project01/nfa.py b/project01/nfa.py index 2c85919..dbc77b6 100644 --- a/project01/nfa.py +++ b/project01/nfa.py @@ -28,18 +28,19 @@ def __repr__(self): return str(self) def union_t_function(t_function1, t_function2): - t_function = t_function1.copy() + t_function = {} + for key in t_function1.keys(): + t_function[key] = t_function1[key].copy() + for key in t_function2.keys(): if key not in t_function: - t_function[key] = t_function2[key] + t_function[key] = t_function2[key].copy() else: - t_function[key] += t_function2[key] + t_function[key] += t_function2[key].copy() return t_function class NFA(): - """Summary of class here. - - Defination of the regular language. + """Definition of the Nondeterministic Finite Automaton. Attributes: alphabet: list object to store finate alphabet @@ -115,12 +116,12 @@ def set_start_state(self, state): Returns: Raises: """ - if state not in self.states: - self.states.insert(0, state) - self.t_function[state] = dict() - else: - self.states.remove(state) - self.states.insert(0, state) + # if state not in self.states: + # self.states.insert(0, state) + # self.t_function[state] = dict() + # else: + # self.states.remove(state) + # self.states.insert(0, state) self.s_state = state #------------------------------------------------------------------------------ @@ -218,7 +219,7 @@ def or_nfa(self, other_nfa): # then new_s_state is added to be the states of new_nfa # set to be new_s_state (all included in nfa.set_start_state method) - nfa.set_start_state(new_s_state) + nfa.add_state(new_s_state) nfa.add_function_item(new_s_state, EMPTY_STRING, s_state1) nfa.add_function_item(new_s_state, EMPTY_STRING, s_state2) return nfa @@ -265,27 +266,17 @@ def repeat_nfa(self): Raises: """ # TODO(ShipXu): XiaoHanHou implement this function. - # new_s_state = generate_state() - # old_s_state = self.get_start_state() - # new_f_states = self.f_states + [new_s_state] - # nfa = NFA(self.alphabet, self.states, new_s_state, new_f_states, self.t_function) - # nfa.add_state(new_s_state) - # nfa.set_start_state(new_s_state) - # nfa.add_function_item(new_s_state, EMPTY_STRING, old_s_state) - - # for f_state in self.f_states: - # nfa.add_function_item(f_state, EMPTY_STRING, old_s_state) - new_s_state = generate_state() old_s_state = self.get_start_state() - nfa = NFA(self.alphabet, self.states, new_s_state, self.f_states, self.t_function) + new_f_states = self.f_states.copy() + [new_s_state] + nfa = NFA(self.alphabet, self.states.copy(), new_s_state, new_f_states, self.t_function) + nfa.add_state(new_s_state) nfa.set_start_state(new_s_state) - nfa.add_f_state(new_s_state) + nfa.add_function_item(new_s_state, EMPTY_STRING, old_s_state) for f_state in self.f_states: nfa.add_function_item(f_state, EMPTY_STRING, old_s_state) - return nfa #------------------------------------------------------------------------------ @@ -312,24 +303,78 @@ def __str__(self): ret += item_str + '\n' return ret +#------------------------------------------------------------------------------ +# Recogize if string is legal +#------------------------------------------------------------------------------ + def run(self, s): + """ provide a method for using nfa to recognize string, + + nfa will determine if the string is belong to the grammar of nfa. + Args: + s : string that is needed to be judged + Returns: + if nfa recognized a string, return True + if string is illegal to this nfa, return False + Raises: + """ + return self._run(s, self.s_state) + + def _run(self, s, present_node): + """ provide a method for using nfa to recognize string, + + The method provides a bfs-like method for recognizing string s, + if s is empty and present_node is in the self.f_states: + we can conclude that string is recognizable. + if not: + first we deals with the EMPTY STRING situation, the functions will + pass s directly to the next search; + secondly, we can check if s[0] is the item of present_node's transition + function in the nfa, and we pass s[1:] (s[0] is used) to the next search; + + Args: + s : string that is needed to be judged + present_node : the current state of current search turn + Returns: + if nfa recognized a string, return True + if string is illegal to this nfa, return False + """ + + if not s: + if present_node in self.f_states: + return True + else: + return False + + if EMPTY_STRING in self.t_function[present_node]: + for to_node in self.t_function[present_node][EMPTY_STRING]: + if self._run(s, to_node): + return True + + if s[0] in self.t_function[present_node]: + for to_node in self.t_function[present_node][s[0]]: + if self._run(s[1:], to_node): + return True + + return False if __name__ == '__main__': alphabet = ['a', 'b'] # test01: generate_state + print('test01: generate_state') s1 = generate_state() s2 = generate_state() s3 = generate_state() print(s1, s2, s3) # test02: nfa + print('test02: generate nfa') states = [s1, s2] s_state = s1 f_states = [s1, s2] nfa = NFA(alphabet, states, s_state, f_states) nfa.add_function_item(s1, 'a', s2) - nfa.add_function_item(s2, 'a', s2) print(nfa) # generate states variable @@ -339,6 +384,7 @@ def __str__(self): print(states) # test03: nfa a ; nfa b + print('test03: nfa a ; nfa b') nfa1 = NFA(alphabet, states[0:2], states[0], [states[1]]) nfa1.add_function_item(states[0], 'a', states[1]) print('-------nfa1--------') @@ -348,7 +394,14 @@ def __str__(self): print('-------nfa2--------') print(nfa2) + new_states = generate_states(2) + nfa3 = NFA(alphabet, new_states[0:2], new_states[0], [new_states[1]]) + nfa3.add_function_item(new_states[0], 'a', new_states[1]) + print('-------nfa3--------') + print(nfa3) + # test04: test for t_function_union + print('test04: for t_function_union') t_function1 = nfa1.get_t_function() t_function2 = nfa2.get_t_function() @@ -358,18 +411,17 @@ def __str__(self): print(union_t_function(t_function1, t_function2)) # test05: test for '+' + print('test05: test for +') print('-------nfa1 + nfa2--------') print(nfa1 + nfa2) # test05: test for '|' + print('test05: test for |') print('-------(nfa1 + nfa2) | nfa3--------') - new_states = generate_states(2) - - nfa3 = NFA(alphabet, new_states[0:2], new_states[0], [new_states[1]]) - nfa3.add_function_item(new_states[0], 'a', new_states[1]) print((nfa1 + nfa2) | nfa3) # test06: test for '*' - # print('-------((nfa1 + nfa2) | nfa1).repeat()--------') - # print(((nfa1 + nfa2) | nfa1).repeat()) - print(nfa2.repeat()) \ No newline at end of file + print('test06: test for *') + print('-------((nfa1 + nfa2) | nfa3).repeat()--------') + print(((nfa1 + nfa2) | nfa3).repeat()) + nfa4 = ((nfa1 + nfa2) | nfa3).repeat() \ No newline at end of file diff --git a/project01/regular_expression.py b/project01/regular_expression.py index f4b6ac5..fba696e 100644 --- a/project01/regular_expression.py +++ b/project01/regular_expression.py @@ -1,14 +1,14 @@ EMPTY_STRING = '' class RE(): - """Defination of the regular language. + """Definition of the regular language. Attributes: - s: the string to describe the regular language - + s: the string that describes the regular language + alphabet: list object to store finite alphabet """ def __init__(self, alphabet, s): - # TODO check is word in s is in the alphabeta + # TODO check is word in s is in the alphabet self.alphabet = alphabet self.s = s @@ -40,32 +40,32 @@ def __str__(self): return self.s # a function object that can used to produce a re object -# when alphabelt is given -EMPTY_RE = (lambda alphabelt: RE(alphabelt, EMPTY_STRING)) +# when alphabet is given +EMPTY_RE = (lambda alphabet: RE(alphabet, EMPTY_STRING)) -def get_alphabelt_re(alphabelt): - ret = EMPTY_RE(alphabelt) +def get_alphabelt_re(alphabet): + ret = EMPTY_RE(alphabet) - if alphabelt: - ret = RE(alphabelt, alphabelt[0]) - for a in alphabelt[1:]: - ret |= RE(alphabelt, a) + if alphabet: + ret = RE(alphabet, alphabet[0]) + for a in alphabet[1:]: + ret |= RE(alphabet, a) return ret() if __name__ == '__main__': - alphabelt = ['0', '1'] + alphabet = ['0', '1'] - re_01 = (lambda s: RE(alphabelt, s)) + re_01 = (lambda s: RE(alphabet, s)) print(re_01('0').repeat()) - # test pratices in P65 of book + # test practices in P65 of book # test01: 0*10* - # print(RE(alphabelt, '0')) + # print(RE(alphabet, '0')) print(re_01('0').repeat() + re_01('1') + re_01('0').repeat()) - # test02: (alphabelt)*1(alphabelt)* - re_alphabeta = get_alphabelt_re(alphabelt) - print(re_alphabeta.repeat() + re_01('1') + re_alphabeta.repeat()) + # test02: (alphabet)*1(alphabet)* + re_alphabet = get_alphabelt_re(alphabet) + print(re_alphabet.repeat() + re_01('1') + re_alphabet.repeat()) # test03: 01 | 10 print(re_01('01') | re_01('10')) \ No newline at end of file diff --git a/project01/test_REtoNFA.py b/project01/test_REtoNFA.py index 6462645..b258336 100644 --- a/project01/test_REtoNFA.py +++ b/project01/test_REtoNFA.py @@ -88,7 +88,7 @@ def read_repeat(s, alphabet, nfa): # Your code here pass -def read_parentheses(s, nfa): +def read_parentheses(s, alphabet, nfa): # TODO(ShipXu): This fuction reserved for XiaoHanHou. # Your code here # p_nfa @@ -142,12 +142,13 @@ def read_token(s, alphabet, nfa=None): if not s: return nfa - if s[0] == '*': - return read_repeat(s[1:], alphabet, nfa) - elif s[0] == '|': + # if s[0] == '*': + # return read_repeat(s[1:], alphabet, nfa) + # el + if s[0] == '|': return read_or(s[1:], alphabet, nfa) elif s[0] == '(': - index_rp, p_nfa = read_parentheses(s[1:], nfa) + index_rp, p_nfa = read_parentheses(s[1:], alphabet, nfa) # consider the left parentheses index_rp += 1 if len(s) >= index_rp + 2 and s[index_rp + 1] == '*': @@ -181,10 +182,10 @@ def trans_RE_to_NFA(re): """ return _trans_RE_to_NFA(re.s, re.alphabet) -def add_parathese_repeat(s): - return _add_parathese_repeat(s, 0) +def add_parentheses_repeat(s): + return _add_parentheses_repeat(s, 0) -def _add_parathese_repeat(s, index): +def _add_parentheses_repeat(s, index): if not s: return s @@ -197,22 +198,28 @@ def _add_parathese_repeat(s, index): if new_r_index >= 2 and s[new_r_index - 1] != ')': return (s[0 : new_r_index - 1] + '(' + s[new_r_index - 1] + ')*' - + add_parathese_repeat(s[new_r_index + 1:])) + + add_parentheses_repeat(s[new_r_index + 1:])) if new_r_index >= 1 and s[new_r_index - 1] != ')': return ('(' + s[0 : new_r_index] + ')*' - + add_parathese_repeat(s[new_r_index + 1:])) + + add_parentheses_repeat(s[new_r_index + 1:])) + + return s if __name__ == '__main__': - # alphabet = ['a', 'b'] - - # regualar_string = '(b)* | a' - regular_string = 'aaaa | b*' - print('before add parethese : {}'.format(regular_string)) - print('after adding parethese : {}'.format(add_parathese_repeat(regular_string))) - alphabet = list(set([word for word in regular_string - if word.isalpha() or word.isdigit()])) - re = RE(alphabet, add_parathese_repeat(regular_string)) - - nfa = trans_RE_to_NFA(re) - print(nfa) \ No newline at end of file + with open('test_retonfa.txt') as f: + count = 1 + for line in f.readlines(): + regular_string = line.replace('\n', '') + print('test {}'.format(str(count))) + print('before add parethese : {}'.format(regular_string)) + print('after adding parethese : {}'.format(add_parentheses_repeat(regular_string))) + + alphabet = list(set([word for word in regular_string + if word.isalpha() or word.isdigit()])) + re = RE(alphabet, add_parentheses_repeat(regular_string)) + + nfa = trans_RE_to_NFA(re) + print('the nfa of {}'.format(add_parentheses_repeat(regular_string))) + print(nfa) + count += 1 \ No newline at end of file diff --git a/project01/test_ip.py b/project01/test_ip.py index b96d82e..cd552b2 100644 --- a/project01/test_ip.py +++ b/project01/test_ip.py @@ -17,7 +17,7 @@ def ip_match(input_string): Raises: """ - pattern_string = '([0-2]?[0-9]?[0-9]\.){3}([0-2]?[0-9]?[0-9])' + pattern_string ='(([1-9])\.|([1-9][0-9])\.|(1\d\d)\.|(2[0-4]\d)\.|(25[0-5])\.)(([1-9]?\d)\.|(1\d\d)\.|(2[0-4]\d)\.|(25[0-5])\.){2}(([1-9]?\d)|(1\d\d)|(2[0-4]\d)|(25[0-5]))$' match_ret = re.match(pattern_string, input_string) return match_ret is not None diff --git a/project01/test_retonfa.txt b/project01/test_retonfa.txt new file mode 100644 index 0000000..9bf1233 --- /dev/null +++ b/project01/test_retonfa.txt @@ -0,0 +1,5 @@ +a +b +ab +ab|b +(ab|b)* \ No newline at end of file diff --git a/project01/test_run_re.py b/project01/test_run_re.py new file mode 100644 index 0000000..c43aacf --- /dev/null +++ b/project01/test_run_re.py @@ -0,0 +1,15 @@ +from regular_expression import RE +from test_REtoNFA import trans_RE_to_NFA, add_parentheses_repeat + +if __name__ == '__main__': + regular_string = '(ab|a)*' + alphabet = list(set([word for word in regular_string + if word.isalpha() or word.isdigit()])) + re = RE(alphabet, add_parentheses_repeat(regular_string)) + nfa = trans_RE_to_NFA(re) + + with open('test_run_re.txt') as file: + for line in file.readlines(): + line = line.replace('\n', '') + if (nfa.run(line)): + print('matched and matched result {}'.format(line.replace('\n', ''))) \ No newline at end of file diff --git a/project01/test_run_re.txt b/project01/test_run_re.txt new file mode 100644 index 0000000..4c67a0e --- /dev/null +++ b/project01/test_run_re.txt @@ -0,0 +1,5 @@ +a +ab +aba +abbbbb +abaabababa \ No newline at end of file diff --git a/project01/visualize_nfa.py b/project01/visualize_nfa.py new file mode 100644 index 0000000..e2b4cce --- /dev/null +++ b/project01/visualize_nfa.py @@ -0,0 +1,37 @@ +from nfa import EMPTY_STRING +from regular_expression import RE +from test_REtoNFA import trans_RE_to_NFA, add_parentheses_repeat +from graphviz import Digraph + +if __name__ == '__main__': + regular_string = 'aa*|ab(ab)*' + alphabet = list(set([word for word in regular_string + if word.isalpha() or word.isdigit()])) + re = RE(alphabet, add_parentheses_repeat(regular_string)) + nfa = trans_RE_to_NFA(re) + print(nfa) + + graph = Digraph() + s_node = nfa.get_start_state() + _s_node = '' + graph.node(_s_node, shape="circle", color='white') + + for f_node in nfa.get_f_states(): + graph.node('%s'%f_node, shape="doublecircle") + + for node in nfa.t_function.keys(): + # if node == nfa.s_state or node in nfa.f_states: + if node not in nfa.get_f_states(): + graph.node('%s'%node, shape="circle") + for action in nfa.t_function[node].keys(): + for to_node in nfa.t_function[node][action]: + if action == EMPTY_STRING: + graph.edge('%s'%node, '%s'%to_node, 'ε') + else: + graph.edge('%s'%node, '%s'%to_node, '%s'%action) + + graph.edge(_s_node, '%s'%s_node) + graph.graph_attr['rankdir'] = 'LR' + graph.view() + # graph.format = 'png' + # graph.render('output-graph.gv', view=True) \ No newline at end of file