|
1
2
3
4
5
6
|
'''
Created on 24 sty 2014
@author: mlenart
'''
|
|
7
|
from morfeuszbuilder.segrules.rulesFSA import RulesFSA, RulesState
|
|
8
9
10
|
class RulesNFAState(object):
|
|
11
12
|
statesCounter = 0
|
|
13
|
def __init__(self, rule, initial=False, final=False, weak=False, autogenerated=False):
|
|
14
|
self.rule = rule
|
|
15
|
self.transitionsMap = {}
|
|
16
|
# self.transitionsDataMap = {}
|
|
17
18
|
self.initial = initial
self.final = final
|
|
19
|
self.weak = weak
|
|
20
|
self.autogenerated = autogenerated
|
|
21
22
23
24
|
self.idx = RulesNFAState.statesCounter
RulesNFAState.statesCounter += 1
def addTransition(self, label, targetState):
|
|
25
|
assert label is None or len(label) == 2
|
|
26
27
28
29
30
31
32
33
34
35
36
37
38
|
self.transitionsMap.setdefault(label, set())
self.transitionsMap[label].add(targetState)
def getClosure(self, visited):
if self in visited:
return set()
else:
visited.add(self)
res = set()
res.add(self)
for nextState in self.transitionsMap.get(None, []):
res |= nextState.getClosure(visited)
return res
|
|
39
|
|
|
40
41
42
43
|
def dfs(self, visitedStates=set()):
if not self in visitedStates:
visitedStates.add(self)
yield self
|
|
44
|
for _, nextStates in list(self.transitionsMap.items()):
|
|
45
|
for state in nextStates:
|
|
46
|
for state1 in state.dfs(visitedStates):
|
|
47
48
49
|
yield state1
def debug(self):
|
|
50
51
52
53
|
print('----------------')
print(('STATE:', self.idx))
for label, nextStates in list(self.transitionsMap.items()):
print((label, '-->', [s.idx for s in sorted(nextStates, key=lambda s: s.idx)]))
|
|
54
55
56
|
class RulesNFA(object):
|
|
57
|
def __init__(self):
|
|
58
|
self.initialState = RulesNFAState(rule=None, initial=True)
|
|
59
|
|
|
60
61
62
|
def _groupOutputByLabels(self, nfaStates):
res = {}
for nfaState in nfaStates:
|
|
63
|
for label, nextStates in list(nfaState.transitionsMap.items()):
|
|
64
|
if label is not None:
|
|
65
66
67
|
# transitionData = nfaState.transitionsDataMap[label]
segnum, shiftOrth = label
res.setdefault((segnum, shiftOrth), set())
|
|
68
|
for nextNFAState in nextStates:
|
|
69
|
res[(segnum, shiftOrth)] |= nextNFAState.getClosure(set())
|
|
70
71
72
|
return res
def _doConvertState(self, dfaState, nfaStates, nfaSubset2DFAState):
|
|
73
|
weakHits = [state.weak for state in [state for state in nfaStates if state.final and not state.autogenerated]]
|
|
74
75
|
if not all(weakHits) \
and any(weakHits):
|
|
76
77
|
weakState = list([state for state in nfaStates if state.final and state.weak])[0]
nonWeakState = list([state for state in nfaStates if state.final and not state.weak])[0]
|
|
78
|
raise InconsistentStateWeaknessException(weakState, nonWeakState)
|
|
79
80
|
weak = any([state.weak and state.final for state in [state for state in nfaStates if not state.autogenerated]])
final = any([state.final for state in nfaStates])
|
|
81
|
# assert not weak or not final
|
|
82
83
84
|
if final:
# dfaState should be final
# and contain info about weakness
|
|
85
86
|
dfaState.setAsAccepting(weak=weak)
# dfaState.encodedData = bytearray([1 if weak else 0])
|
|
87
|
for (segnum, shiftOrth), nextNFAStates in list(self._groupOutputByLabels(nfaStates).items()):
|
|
88
89
90
91
|
key = frozenset(nextNFAStates)
if key in nfaSubset2DFAState:
nextDFAState = nfaSubset2DFAState[key]
else:
|
|
92
|
nextDFAState = RulesState()
|
|
93
94
|
nfaSubset2DFAState[key] = nextDFAState
self._doConvertState(nextDFAState, nextNFAStates, nfaSubset2DFAState)
|
|
95
96
|
dfaState.setTransition((segnum, shiftOrth), nextDFAState)
# dfaState.setTransitionData(label, transitionData)
|
|
97
98
|
def convertToDFA(self):
|
|
99
|
dfa = RulesFSA()
|
|
100
|
startStates = self.initialState.getClosure(set())
|
|
101
|
assert not any([s for s in startStates if s.final])
|
|
102
|
dfa.initialState = RulesState()
|
|
103
104
105
106
107
108
|
self._doConvertState(dfa.initialState, startStates, {frozenset(startStates): dfa.initialState})
return dfa
def debug(self):
for state in self.initialState.dfs():
state.debug()
|
|
109
110
111
112
113
114
|
class InconsistentStateWeaknessException(Exception):
def __init__(self, weakState, nonWeakState):
self.weakState = weakState
self.nonWeakState = nonWeakState
|