rulesNFA.py
3.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
'''
Created on 24 sty 2014
@author: mlenart
'''
from morfeuszbuilder.fsa import fsa, state, encode
class RulesNFAState(object):
statesCounter = 0
def __init__(self, initial=False, final=False, weak=False):
self.transitionsMap = {}
self.transitionsDataMap = {}
self.initial = initial
self.final = final
self.weak = weak
self.idx = RulesNFAState.statesCounter
RulesNFAState.statesCounter += 1
def addTransition(self, label, targetState):
self.transitionsMap.setdefault(label, set())
self.transitionsMap[label].add(targetState)
self.transitionsDataMap[label] = 0
def setTransitionData(self, label, byte):
assert len(self.transitionsMap[label]) == 1
self.transitionsDataMap[label] = byte
def getClosure(self, visited):
if self in visited:
return set()
else:
visited.add(self)
res = set()
res.add(self)
for nextState in self.transitionsMap.get(None, []):
res |= nextState.getClosure(visited)
return res
def dfs(self, visitedStates=set()):
if not self in visitedStates:
visitedStates.add(self)
yield self
for _, nextStates in self.transitionsMap.iteritems():
for state in nextStates:
for state1 in state.dfs(visitedStates):
yield state1
def debug(self):
print '----------------'
print 'STATE:', self.idx
for label, nextStates in self.transitionsMap.iteritems():
print label, '-->', [s.idx for s in sorted(nextStates, key=lambda s: s.idx)]
class RulesNFA(object):
def __init__(self):
self.initialState = RulesNFAState(initial=True)
def _groupOutputByLabels(self, nfaStates):
res = {}
for nfaState in nfaStates:
for label, nextStates in nfaState.transitionsMap.iteritems():
if label is not None:
transitionData = nfaState.transitionsDataMap[label]
res.setdefault((label, transitionData), set())
for nextNFAState in nextStates:
res[(label, transitionData)] |= nextNFAState.getClosure(set())
return res
def _doConvertState(self, dfaState, nfaStates, nfaSubset2DFAState):
assert all(map(lambda state: state.weak, nfaStates)) \
or not any(map(lambda state: state.weak, nfaStates))
weak = all(map(lambda state: state.weak or not state.final, nfaStates))
final = any(map(lambda state: state.final, nfaStates))
assert not weak or not final
if final:
# dfaState should be final
# and contain info about weakness
dfaState.encodedData = bytearray([1 if weak else 0])
for (label, transitionData), nextNFAStates in self._groupOutputByLabels(nfaStates).iteritems():
key = frozenset(nextNFAStates)
if key in nfaSubset2DFAState:
nextDFAState = nfaSubset2DFAState[key]
else:
nextDFAState = state.State()
nfaSubset2DFAState[key] = nextDFAState
self._doConvertState(nextDFAState, nextNFAStates, nfaSubset2DFAState)
dfaState.setTransition(label, nextDFAState)
dfaState.setTransitionData(label, transitionData)
def convertToDFA(self):
dfa = fsa.FSA(encoder=None, encodeData=False, encodeWords=False)
startStates = self.initialState.getClosure(set())
assert not any(filter(lambda s: s.final, startStates))
dfa.initialState = state.State(additionalData=False)
self._doConvertState(dfa.initialState, startStates, {frozenset(startStates): dfa.initialState})
return dfa
def debug(self):
for state in self.initialState.dfs():
state.debug()