|
1
2
3
4
5
6
|
'''
Created on 24 sty 2014
@author: mlenart
'''
|
|
7
|
import copy
|
|
8
|
from morfeuszbuilder.segrules.rulesNFA import RulesNFAState
|
|
9
|
from morfeuszbuilder.utils.exceptions import ConfigFileException
|
|
10
|
|
|
11
12
|
class SegmentRule(object):
|
|
13
|
def __init__(self, linenum):
|
|
14
15
|
self.weak = False
|
|
16
|
self.linenum = linenum
|
|
17
|
self.autogenerated = False
|
|
18
19
20
21
|
def setWeak(self, weak):
self.weak = weak
return self
|
|
22
23
24
25
|
def addToNFA(self, fsa):
raise NotImplementedError()
|
|
26
27
28
|
def allowsEmptySequence(self):
raise NotImplementedError()
|
|
29
30
|
def _doAddToNFA(self, startStates, endState):
raise NotImplementedError()
|
|
31
32
33
34
35
36
37
38
39
|
def transformToGeneratorVersion(self):
raise NotImplementedError()
def isSinkRule(self):
return False
def isShiftOrthRule(self):
raise NotImplementedError()
|
|
40
|
|
|
41
42
43
44
45
|
def getAtomicRules(self):
raise NotImplementedError()
def getAdditionalAtomicRules4Generator(self):
raise NotImplementedError()
|
|
46
47
48
|
def makeShiftOrthRule(self):
pass
|
|
49
50
51
52
|
def __repr__(self):
return str(self)
|
|
53
54
55
|
def validate(self, filename):
pass
|
|
56
57
|
class TagRule(SegmentRule):
|
|
58
|
def __init__(self, segnum, shiftOrth, segtype, linenum, weak=False):
|
|
59
|
self.segnum = segnum
|
|
60
|
self.segtype = segtype
|
|
61
|
self.shiftOrth = shiftOrth
|
|
62
|
self.linenum = linenum
|
|
63
64
65
|
self.weak = weak
self.autogenerated = False
|
|
66
|
def addToNFA(self, fsa):
|
|
67
|
endState = RulesNFAState(self, final=True, weak=self.weak, autogenerated=self.autogenerated)
|
|
68
69
70
|
self._doAddToNFA(fsa.initialState, endState)
def _doAddToNFA(self, startState, endState):
|
|
71
|
startState.addTransition((self.segnum, self.shiftOrth), endState)
|
|
72
|
|
|
73
74
75
|
def allowsEmptySequence(self):
return False
|
|
76
|
def __str__(self):
|
|
77
|
res = self.segtype
|
|
78
|
# res += '(' + str(self.segnum) + ')'
|
|
79
80
81
82
83
84
85
86
87
88
|
if self.shiftOrth:
res += '>'
return res
# return u'%s(%d)' % (self.segtype, self.segnum)
def transformToGeneratorVersion(self):
return copy.deepcopy(self)
def isShiftOrthRule(self):
return self.shiftOrth
|
|
89
90
91
|
def makeShiftOrthRule(self):
self.shiftOrth = True
|
|
92
|
|
|
93
94
95
96
97
98
99
100
|
def getAtomicRules(self):
yield self
def getAdditionalAtomicRules4Generator(self):
res = [ copy.deepcopy(self) ]
res[0].autogenerated = True
return res
|
|
101
102
|
class UnaryRule(SegmentRule):
|
|
103
|
def __init__(self, child, linenum):
|
|
104
|
super(UnaryRule, self).__init__(linenum)
|
|
105
|
self.child = child
|
|
106
107
108
109
|
assert not child.isSinkRule()
def isShiftOrthRule(self):
return self.child.isShiftOrthRule()
|
|
110
111
112
|
def makeShiftOrthRule(self):
self.child.makeShiftOrthRule()
|
|
113
|
|
|
114
115
116
117
118
119
120
|
def getAtomicRules(self):
for leaf in self.child.getAtomicRules():
yield leaf
def getAdditionalAtomicRules4Generator(self):
return self.child.getAdditionalAtomicRules4Generator()
|
|
121
122
123
|
def validate(self, filename):
self.child.validate(filename)
|
|
124
125
|
class ComplexRule(SegmentRule):
|
|
126
|
def __init__(self, children, linenum):
|
|
127
|
super(ComplexRule, self).__init__(linenum)
|
|
128
|
self.children = children
|
|
129
|
assert not any([c.isSinkRule() for c in children])
|
|
130
131
|
def addToNFA(self, fsa):
|
|
132
|
endState = RulesNFAState(self, final=True, weak=self.weak, autogenerated=self.autogenerated)
|
|
133
|
self._doAddToNFA(fsa.initialState, endState)
|
|
134
|
|
|
135
136
137
138
|
def getAtomicRules(self):
for child in self.children:
for leaf in child.getAtomicRules():
yield leaf
|
|
139
140
141
142
|
def makeShiftOrthRule(self):
for child in self.children:
child.makeShiftOrthRule()
|
|
143
|
|
|
144
145
|
class ConcatRule(ComplexRule):
|
|
146
147
|
def __init__(self, children, linenum):
super(ConcatRule, self).__init__(children, linenum)
|
|
148
|
assert type(children) == list
|
|
149
|
|
|
150
151
152
|
def _doAddToNFA(self, startState, endState):
currStartState = startState
for child in self.children[:-1]:
|
|
153
|
currEndState = RulesNFAState(self)
|
|
154
|
child._doAddToNFA(currStartState, currEndState)
|
|
155
|
nextStartState = RulesNFAState(self)
|
|
156
157
158
159
|
currEndState.addTransition(None, nextStartState)
currStartState = nextStartState
lastChild = self.children[-1]
lastChild._doAddToNFA(currStartState, endState)
|
|
160
|
|
|
161
|
def allowsEmptySequence(self):
|
|
162
|
return all([rule.allowsEmptySequence() for rule in self.children])
|
|
163
|
|
|
164
|
def __str__(self):
|
|
165
|
return ' '.join([str(c) for c in self.children])
|
|
166
|
|
|
167
|
def isShiftOrthRule(self):
|
|
168
|
return all([c.isShiftOrthRule() for c in self.children])
|
|
169
170
171
172
173
174
175
176
177
178
179
180
181
|
def transformToGeneratorVersion(self):
newChildren = [child.transformToGeneratorVersion() for child in self.children if not child.allowsEmptySequence() or child.isShiftOrthRule()]
if newChildren == []:
return SinkRule()
hasNonOptionalNonShiftingRule = False
for child in newChildren:
# print 'child=', child
if child.isSinkRule() or hasNonOptionalNonShiftingRule:
return SinkRule()
elif not child.isShiftOrthRule():
hasNonOptionalNonShiftingRule = True
# print 'got nonshifting'
|
|
182
183
184
|
res = ConcatRule(newChildren, self.linenum)
res.setWeak(self.weak)
return res
|
|
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
|
def getAdditionalAtomicRules4Generator(self):
res = []
currShiftOrthRule = None
for rule in list(self.children):
if rule.isShiftOrthRule():
if currShiftOrthRule:
currShiftOrthRule = ConcatRule([currShiftOrthRule, rule], rule.linenum)
else:
currShiftOrthRule = rule
else:
for atomicRule in rule.getAdditionalAtomicRules4Generator():
if currShiftOrthRule:
res.append(ConcatRule([currShiftOrthRule, atomicRule], atomicRule.linenum))
else:
res.append(atomicRule)
currShiftOrthRule = None
for rule in res:
rule.autogenerated = True
return res
|
|
206
207
208
209
|
def validate(self, filename):
for rule in self.children:
rule.validate(filename)
if self.children[-1].isShiftOrthRule() \
|
|
210
|
and not all([c.isShiftOrthRule() for c in self.children]):
|
|
211
212
213
|
raise ConfigFileException(
filename,
self.linenum,
|
|
214
|
'If the rightmost subrule of concatenation "%s" is with ">", than all subrules must be with ">"' % str(self))
|
|
215
|
|
|
216
217
|
class OrRule(ComplexRule):
|
|
218
219
|
def __init__(self, children, linenum):
super(OrRule, self).__init__(children, linenum)
|
|
220
221
222
|
def _doAddToNFA(self, startState, endState):
for child in self.children:
|
|
223
224
|
intermStartState = RulesNFAState(self)
intermEndState = RulesNFAState(self)
|
|
225
226
227
|
startState.addTransition(None, intermStartState)
child._doAddToNFA(intermStartState, intermEndState)
intermEndState.addTransition(None, endState)
|
|
228
|
|
|
229
|
def allowsEmptySequence(self):
|
|
230
|
return any([rule.allowsEmptySequence() for rule in self.children])
|
|
231
|
|
|
232
|
def __str__(self):
|
|
233
|
return ' | '.join([str(c) for c in self.children])
|
|
234
|
|
|
235
|
def isShiftOrthRule(self):
|
|
236
|
return all([c.isShiftOrthRule() for c in self.children])
|
|
237
238
239
|
def transformToGeneratorVersion(self):
newChildren = [child.transformToGeneratorVersion() for child in self.children if not child.allowsEmptySequence() or child.isShiftOrthRule()]
|
|
240
|
newChildren = [c for c in newChildren if not c.isSinkRule()]
|
|
241
242
243
|
if newChildren == []:
return SinkRule()
else:
|
|
244
245
246
|
res = OrRule(newChildren, self.linenum)
res.setWeak(self.weak)
return res
|
|
247
248
249
250
251
252
|
def getAdditionalAtomicRules4Generator(self):
res = []
for rule in self.children:
res.extend(rule.getAdditionalAtomicRules4Generator())
return res
|
|
253
254
255
256
257
|
def validate(self, filename):
for rule in self.children:
rule.validate(filename)
if not (
|
|
258
259
|
all([c.isShiftOrthRule() for c in self.children])
or not any([c.isShiftOrthRule() for c in self.children])):
|
|
260
261
262
|
raise ConfigFileException(
filename,
self.linenum,
|
|
263
|
'All subrules of alternative "%s" must be either with or without ">"' % str(self))
|
|
264
|
|
|
265
266
|
class ZeroOrMoreRule(UnaryRule):
|
|
267
268
|
def __init__(self, child, linenum):
super(ZeroOrMoreRule, self).__init__(child, linenum)
|
|
269
|
assert isinstance(child, SegmentRule)
|
|
270
271
272
|
def addToNFA(self, fsa):
raise ValueError()
|
|
273
|
|
|
274
|
def _doAddToNFA(self, startState, endState):
|
|
275
276
|
intermStartState = RulesNFAState(self)
intermEndState = RulesNFAState(self)
|
|
277
278
279
280
281
|
startState.addTransition(None, intermStartState)
startState.addTransition(None, endState)
self.child._doAddToNFA(intermStartState, intermEndState)
intermEndState.addTransition(None, endState)
|
|
282
|
intermEndState.addTransition(None, intermStartState)
|
|
283
|
|
|
284
285
286
|
def allowsEmptySequence(self):
return True
|
|
287
288
289
290
291
292
|
def transformToGeneratorVersion(self):
if self.isShiftOrthRule():
return copy.deepcopy(self)
else:
return SinkRule()
|
|
293
|
def __str__(self):
|
|
294
|
return '(' + str(self.child) + ')*'
|
|
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
|
class OptionalRule(UnaryRule):
def __init__(self, child, linenum):
super(OptionalRule, self).__init__(child, linenum)
assert isinstance(child, SegmentRule)
def addToNFA(self, fsa):
raise ValueError()
def _doAddToNFA(self, startState, endState):
intermStartState = RulesNFAState(self)
intermEndState = RulesNFAState(self)
startState.addTransition(None, intermStartState)
startState.addTransition(None, endState)
self.child._doAddToNFA(intermStartState, intermEndState)
intermEndState.addTransition(None, endState)
def allowsEmptySequence(self):
return True
|
|
317
318
319
320
321
322
|
def transformToGeneratorVersion(self):
if self.isShiftOrthRule():
return copy.deepcopy(self)
else:
return self.child.transformToGeneratorVersion()
|
|
323
|
def __str__(self):
|
|
324
|
return '(' + str(self.child) + ')?'
|
|
325
326
327
328
329
330
331
332
333
334
335
|
class SinkRule(SegmentRule):
def __init__(self):
super(SinkRule, self).__init__(None)
def addToNFA(self, fsa):
return
def allowsEmptySequence(self):
return False
|
|
336
|
|
|
337
338
339
340
341
342
343
344
345
346
347
|
def _doAddToNFA(self, startStates, endState):
return
def transformToGeneratorVersion(self):
return self
def isSinkRule(self):
return True
def __str__(self):
return '<<REMOVED>>'
|
|
348
349
|
def getAdditionalAtomicRules4Generator(self):
|
|
350
|
return []
|