|
1
2
3
4
5
6
|
'''
Created on 24 sty 2014
@author: mlenart
'''
|
|
7
|
import copy
|
|
8
9
|
from morfeuszbuilder.segrules.rulesNFA import RulesNFAState
|
|
10
11
|
class SegmentRule(object):
|
|
12
|
def __init__(self, linenum):
|
|
13
14
|
self.weak = False
|
|
15
|
self.linenum = linenum
|
|
16
17
18
19
|
def setWeak(self, weak):
self.weak = weak
return self
|
|
20
21
22
23
|
def addToNFA(self, fsa):
raise NotImplementedError()
|
|
24
25
26
|
def allowsEmptySequence(self):
raise NotImplementedError()
|
|
27
28
|
def _doAddToNFA(self, startStates, endState):
raise NotImplementedError()
|
|
29
30
31
32
33
34
35
36
37
|
def transformToGeneratorVersion(self):
raise NotImplementedError()
def isSinkRule(self):
return False
def isShiftOrthRule(self):
raise NotImplementedError()
|
|
38
39
40
|
class TagRule(SegmentRule):
|
|
41
|
def __init__(self, segnum, shiftOrth, segtype, linenum):
|
|
42
|
self.segnum = segnum
|
|
43
|
self.segtype = segtype
|
|
44
|
self.shiftOrth = shiftOrth
|
|
45
|
self.linenum = linenum
|
|
46
47
|
def addToNFA(self, fsa):
|
|
48
|
endState = RulesNFAState(self, final=True, weak=self.weak)
|
|
49
50
51
|
self._doAddToNFA(fsa.initialState, endState)
def _doAddToNFA(self, startState, endState):
|
|
52
|
startState.addTransition((self.segnum, self.shiftOrth), endState)
|
|
53
|
|
|
54
55
56
|
def allowsEmptySequence(self):
return False
|
|
57
|
def __str__(self):
|
|
58
|
res = self.segtype
|
|
59
|
res += '(' + str(self.segnum) + ')'
|
|
60
61
62
63
64
65
66
67
68
69
|
if self.shiftOrth:
res += '>'
return res
# return u'%s(%d)' % (self.segtype, self.segnum)
def transformToGeneratorVersion(self):
return copy.deepcopy(self)
def isShiftOrthRule(self):
return self.shiftOrth
|
|
70
71
72
|
class UnaryRule(SegmentRule):
|
|
73
|
def __init__(self, child, linenum):
|
|
74
|
super(UnaryRule, self).__init__(linenum)
|
|
75
|
self.child = child
|
|
76
77
78
79
|
assert not child.isSinkRule()
def isShiftOrthRule(self):
return self.child.isShiftOrthRule()
|
|
80
81
82
|
class ComplexRule(SegmentRule):
|
|
83
|
def __init__(self, children, linenum):
|
|
84
|
super(ComplexRule, self).__init__(linenum)
|
|
85
|
self.children = children
|
|
86
|
assert not any(map(lambda c: c.isSinkRule(), children))
|
|
87
88
|
def addToNFA(self, fsa):
|
|
89
|
endState = RulesNFAState(self, final=True, weak=self.weak)
|
|
90
|
self._doAddToNFA(fsa.initialState, endState)
|
|
91
92
93
|
class ConcatRule(ComplexRule):
|
|
94
95
|
def __init__(self, children, linenum):
super(ConcatRule, self).__init__(children, linenum)
|
|
96
97
98
99
|
def _doAddToNFA(self, startState, endState):
currStartState = startState
for child in self.children[:-1]:
|
|
100
|
currEndState = RulesNFAState(self)
|
|
101
|
child._doAddToNFA(currStartState, currEndState)
|
|
102
|
nextStartState = RulesNFAState(self)
|
|
103
104
105
106
|
currEndState.addTransition(None, nextStartState)
currStartState = nextStartState
lastChild = self.children[-1]
lastChild._doAddToNFA(currStartState, endState)
|
|
107
|
|
|
108
109
110
|
def allowsEmptySequence(self):
return all(map(lambda rule: rule.allowsEmptySequence(), self.children))
|
|
111
112
113
|
def __str__(self):
return u' '.join(map(lambda c: str(c), self.children))
|
|
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
def isShiftOrthRule(self):
return all(map(lambda c: c.isShiftOrthRule(), self.children))
def transformToGeneratorVersion(self):
newChildren = [child.transformToGeneratorVersion() for child in self.children if not child.allowsEmptySequence() or child.isShiftOrthRule()]
if newChildren == []:
return SinkRule()
hasNonOptionalNonShiftingRule = False
for child in newChildren:
# print 'child=', child
if child.isSinkRule() or hasNonOptionalNonShiftingRule:
return SinkRule()
elif not child.isShiftOrthRule():
hasNonOptionalNonShiftingRule = True
# print 'got nonshifting'
return ConcatRule(newChildren, self.linenum)
|
|
131
132
|
class OrRule(ComplexRule):
|
|
133
134
|
def __init__(self, children, linenum):
super(OrRule, self).__init__(children, linenum)
|
|
135
136
137
|
def _doAddToNFA(self, startState, endState):
for child in self.children:
|
|
138
139
|
intermStartState = RulesNFAState(self)
intermEndState = RulesNFAState(self)
|
|
140
141
142
|
startState.addTransition(None, intermStartState)
child._doAddToNFA(intermStartState, intermEndState)
intermEndState.addTransition(None, endState)
|
|
143
|
|
|
144
145
146
|
def allowsEmptySequence(self):
return any(map(lambda rule: rule.allowsEmptySequence(), self.children))
|
|
147
148
149
|
def __str__(self):
return u'|'.join(map(lambda c: str(c), self.children))
|
|
150
151
152
153
154
155
156
157
158
159
160
|
def isShiftOrthRule(self):
return all(map(lambda c: c.isShiftOrthRule(), self.children))
def transformToGeneratorVersion(self):
newChildren = [child.transformToGeneratorVersion() for child in self.children if not child.allowsEmptySequence() or child.isShiftOrthRule()]
newChildren = filter(lambda c: not c.isSinkRule(), newChildren)
if newChildren == []:
return SinkRule()
else:
return OrRule(newChildren, self.linenum)
|
|
161
162
|
class ZeroOrMoreRule(UnaryRule):
|
|
163
164
|
def __init__(self, child, linenum):
super(ZeroOrMoreRule, self).__init__(child, linenum)
|
|
165
|
assert isinstance(child, SegmentRule)
|
|
166
167
168
|
def addToNFA(self, fsa):
raise ValueError()
|
|
169
|
|
|
170
|
def _doAddToNFA(self, startState, endState):
|
|
171
172
|
intermStartState = RulesNFAState(self)
intermEndState = RulesNFAState(self)
|
|
173
174
175
176
177
|
startState.addTransition(None, intermStartState)
startState.addTransition(None, endState)
self.child._doAddToNFA(intermStartState, intermEndState)
intermEndState.addTransition(None, endState)
|
|
178
|
intermEndState.addTransition(None, intermStartState)
|
|
179
|
|
|
180
181
182
|
def allowsEmptySequence(self):
return True
|
|
183
184
185
186
187
188
|
def transformToGeneratorVersion(self):
if self.isShiftOrthRule():
return copy.deepcopy(self)
else:
return SinkRule()
|
|
189
190
|
def __str__(self):
return u'(' + str(self.child) + ')*'
|
|
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
|
class OptionalRule(UnaryRule):
def __init__(self, child, linenum):
super(OptionalRule, self).__init__(child, linenum)
assert isinstance(child, SegmentRule)
def addToNFA(self, fsa):
raise ValueError()
def _doAddToNFA(self, startState, endState):
intermStartState = RulesNFAState(self)
intermEndState = RulesNFAState(self)
startState.addTransition(None, intermStartState)
startState.addTransition(None, endState)
self.child._doAddToNFA(intermStartState, intermEndState)
intermEndState.addTransition(None, endState)
def allowsEmptySequence(self):
return True
|
|
213
214
215
216
217
218
|
def transformToGeneratorVersion(self):
if self.isShiftOrthRule():
return copy.deepcopy(self)
else:
return self.child.transformToGeneratorVersion()
|
|
219
220
|
def __str__(self):
return u'(' + str(self.child) + ')?'
|
|
221
222
223
224
225
226
227
228
229
230
231
|
class SinkRule(SegmentRule):
def __init__(self):
super(SinkRule, self).__init__(None)
def addToNFA(self, fsa):
return
def allowsEmptySequence(self):
return False
|
|
232
|
|
|
233
234
235
236
237
238
239
240
241
242
243
|
def _doAddToNFA(self, startStates, endState):
return
def transformToGeneratorVersion(self):
return self
def isSinkRule(self):
return True
def __str__(self):
return '<<REMOVED>>'
|