Blame view

fsabuilder/morfeuszbuilder/segrules/rules.py 10.9 KB
Michał Lenart authored
1
2
3
4
5
6
'''
Created on 24 sty 2014

@author: mlenart
'''
Michał Lenart authored
7
import copy
Michał Lenart authored
8
from morfeuszbuilder.segrules.rulesNFA import RulesNFAState
Michał Lenart authored
9
from morfeuszbuilder.utils.exceptions import ConfigFileException
Michał Lenart authored
10
Michał Lenart authored
11
12
class SegmentRule(object):
Michał Lenart authored
13
    def __init__(self, linenum):
Michał Lenart authored
14
15

        self.weak = False
Michał Lenart authored
16
        self.linenum = linenum
Michał Lenart authored
17
        self.autogenerated = False
Michał Lenart authored
18
19
20
21

    def setWeak(self, weak):
        self.weak = weak
        return self
Michał Lenart authored
22
23
24
25

    def addToNFA(self, fsa):
        raise NotImplementedError()
Michał Lenart authored
26
27
28
    def allowsEmptySequence(self):
        raise NotImplementedError()
Michał Lenart authored
29
30
    def _doAddToNFA(self, startStates, endState):
        raise NotImplementedError()
Michał Lenart authored
31
32
33
34
35
36
37
38
39

    def transformToGeneratorVersion(self):
        raise NotImplementedError()

    def isSinkRule(self):
        return False

    def isShiftOrthRule(self):
        raise NotImplementedError()
Michał Lenart authored
40
Michał Lenart authored
41
42
43
44
45
    def getAtomicRules(self):
        raise NotImplementedError()

    def getAdditionalAtomicRules4Generator(self):
        raise NotImplementedError()
Michał Lenart authored
46
47
48

    def makeShiftOrthRule(self):
        pass
Michał Lenart authored
49
50
51
52

    def __repr__(self):
        return str(self)
Michał Lenart authored
53
54
55
    def validate(self, filename):
        pass
Michał Lenart authored
56
57
class TagRule(SegmentRule):
Michał Lenart authored
58
    def __init__(self, segnum, shiftOrth, segtype, linenum, weak=False):
Michał Lenart authored
59
        self.segnum = segnum
Michał Lenart authored
60
        self.segtype = segtype
Michał Lenart authored
61
        self.shiftOrth = shiftOrth
Michał Lenart authored
62
        self.linenum = linenum
Michał Lenart authored
63
64
65
        self.weak = weak
        self.autogenerated = False
Michał Lenart authored
66
    def addToNFA(self, fsa):
Michał Lenart authored
67
        endState = RulesNFAState(self, final=True, weak=self.weak, autogenerated=self.autogenerated)
Michał Lenart authored
68
69
70
        self._doAddToNFA(fsa.initialState, endState)

    def _doAddToNFA(self, startState, endState):
Michał Lenart authored
71
        startState.addTransition((self.segnum, self.shiftOrth), endState)
Michał Lenart authored
72
Michał Lenart authored
73
74
75
    def allowsEmptySequence(self):
        return False
Michał Lenart authored
76
    def __str__(self):
Michał Lenart authored
77
        res = self.segtype
Michał Lenart authored
78
        # res += '(' + str(self.segnum) + ')'
Michał Lenart authored
79
80
81
82
83
84
85
86
87
88
        if self.shiftOrth:
            res += '>'
        return res
#         return u'%s(%d)' % (self.segtype, self.segnum)

    def transformToGeneratorVersion(self):
        return copy.deepcopy(self)

    def isShiftOrthRule(self):
        return self.shiftOrth
Michał Lenart authored
89
90
91

    def makeShiftOrthRule(self):
        self.shiftOrth = True
Michał Lenart authored
92
Michał Lenart authored
93
94
95
96
97
98
99
100
    def getAtomicRules(self):
        yield self

    def getAdditionalAtomicRules4Generator(self):
        res = [ copy.deepcopy(self) ]
        res[0].autogenerated = True
        return res
Michał Lenart authored
101
102
class UnaryRule(SegmentRule):
Michał Lenart authored
103
    def __init__(self, child, linenum):
Michał Lenart authored
104
        super(UnaryRule, self).__init__(linenum)
Michał Lenart authored
105
        self.child = child
Michał Lenart authored
106
107
108
109
        assert not child.isSinkRule()

    def isShiftOrthRule(self):
        return self.child.isShiftOrthRule()
Michał Lenart authored
110
111
112

    def makeShiftOrthRule(self):
        self.child.makeShiftOrthRule()
Michał Lenart authored
113
Michał Lenart authored
114
115
116
117
118
119
120
    def getAtomicRules(self):
        for leaf in self.child.getAtomicRules():
            yield leaf

    def getAdditionalAtomicRules4Generator(self):
        return self.child.getAdditionalAtomicRules4Generator()
Michał Lenart authored
121
122
123
    def validate(self, filename):
        self.child.validate(filename)
Michał Lenart authored
124
125
class ComplexRule(SegmentRule):
Michał Lenart authored
126
    def __init__(self, children, linenum):
Michał Lenart authored
127
        super(ComplexRule, self).__init__(linenum)
Michał Lenart authored
128
        self.children = children
Marcin Woliński authored
129
        assert not any([c.isSinkRule() for c in children])
Michał Lenart authored
130
131

    def addToNFA(self, fsa):
Michał Lenart authored
132
        endState = RulesNFAState(self, final=True, weak=self.weak, autogenerated=self.autogenerated)
Michał Lenart authored
133
        self._doAddToNFA(fsa.initialState, endState)
Michał Lenart authored
134
Michał Lenart authored
135
136
137
138
    def getAtomicRules(self):
        for child in self.children:
            for leaf in child.getAtomicRules():
                yield leaf
Michał Lenart authored
139
140
141
142

    def makeShiftOrthRule(self):
        for child in self.children:
            child.makeShiftOrthRule()
Michał Lenart authored
143
Michał Lenart authored
144
145
class ConcatRule(ComplexRule):
Michał Lenart authored
146
147
    def __init__(self, children, linenum):
        super(ConcatRule, self).__init__(children, linenum)
Michał Lenart authored
148
        assert type(children) == list
Michał Lenart authored
149
Michał Lenart authored
150
151
152
    def _doAddToNFA(self, startState, endState):
        currStartState = startState
        for child in self.children[:-1]:
Michał Lenart authored
153
            currEndState = RulesNFAState(self)
Michał Lenart authored
154
            child._doAddToNFA(currStartState, currEndState)
Michał Lenart authored
155
            nextStartState = RulesNFAState(self)
Michał Lenart authored
156
157
158
159
            currEndState.addTransition(None, nextStartState)
            currStartState = nextStartState
        lastChild = self.children[-1]
        lastChild._doAddToNFA(currStartState, endState)
Michał Lenart authored
160
Michał Lenart authored
161
    def allowsEmptySequence(self):
Marcin Woliński authored
162
        return all([rule.allowsEmptySequence() for rule in self.children])
Michał Lenart authored
163
Michał Lenart authored
164
    def __str__(self):
Marcin Woliński authored
165
        return ' '.join([str(c) for c in self.children])
Michał Lenart authored
166
Michał Lenart authored
167
    def isShiftOrthRule(self):
Marcin Woliński authored
168
        return all([c.isShiftOrthRule() for c in self.children])
Michał Lenart authored
169
170
171
172
173
174
175
176
177
178
179
180
181

    def transformToGeneratorVersion(self):
        newChildren = [child.transformToGeneratorVersion() for child in self.children if not child.allowsEmptySequence() or child.isShiftOrthRule()]
        if newChildren == []:
            return SinkRule()
        hasNonOptionalNonShiftingRule = False
        for child in newChildren:
#             print 'child=', child
            if child.isSinkRule() or hasNonOptionalNonShiftingRule:
                return SinkRule()
            elif not child.isShiftOrthRule():
                hasNonOptionalNonShiftingRule = True
#                 print 'got nonshifting'
Michał Lenart authored
182
183
184
        res = ConcatRule(newChildren, self.linenum)
        res.setWeak(self.weak)
        return res
Michał Lenart authored
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205

    def getAdditionalAtomicRules4Generator(self):
        res = []
        currShiftOrthRule = None
        for rule in list(self.children):
            if rule.isShiftOrthRule():
                if currShiftOrthRule:
                    currShiftOrthRule = ConcatRule([currShiftOrthRule, rule], rule.linenum)
                else:
                    currShiftOrthRule = rule
            else:
                for atomicRule in rule.getAdditionalAtomicRules4Generator():
                    if currShiftOrthRule:
                        res.append(ConcatRule([currShiftOrthRule, atomicRule], atomicRule.linenum))
                    else:
                        res.append(atomicRule)
                currShiftOrthRule = None
        for rule in res:
            rule.autogenerated = True
        return res
Michał Lenart authored
206
207
208
209
    def validate(self, filename):
        for rule in self.children:
            rule.validate(filename)
            if self.children[-1].isShiftOrthRule() \
Marcin Woliński authored
210
                    and not all([c.isShiftOrthRule() for c in self.children]):
Michał Lenart authored
211
212
213
                raise ConfigFileException(
                    filename,
                    self.linenum,
Marcin Woliński authored
214
                    'If the rightmost subrule of concatenation "%s" is with ">", than all subrules must be with ">"' % str(self))
Michał Lenart authored
215
Michał Lenart authored
216
217
class OrRule(ComplexRule):
Michał Lenart authored
218
219
    def __init__(self, children, linenum):
        super(OrRule, self).__init__(children, linenum)
Michał Lenart authored
220
221
222

    def _doAddToNFA(self, startState, endState):
        for child in self.children:
Michał Lenart authored
223
224
            intermStartState = RulesNFAState(self)
            intermEndState = RulesNFAState(self)
Michał Lenart authored
225
226
227
            startState.addTransition(None, intermStartState)
            child._doAddToNFA(intermStartState, intermEndState)
            intermEndState.addTransition(None, endState)
Michał Lenart authored
228
Michał Lenart authored
229
    def allowsEmptySequence(self):
Marcin Woliński authored
230
        return any([rule.allowsEmptySequence() for rule in self.children])
Michał Lenart authored
231
Michał Lenart authored
232
    def __str__(self):
Marcin Woliński authored
233
        return ' | '.join([str(c) for c in self.children])
Michał Lenart authored
234
Michał Lenart authored
235
    def isShiftOrthRule(self):
Marcin Woliński authored
236
        return all([c.isShiftOrthRule() for c in self.children])
Michał Lenart authored
237
238
239

    def transformToGeneratorVersion(self):
        newChildren = [child.transformToGeneratorVersion() for child in self.children if not child.allowsEmptySequence() or child.isShiftOrthRule()]
Marcin Woliński authored
240
        newChildren = [c for c in newChildren if not c.isSinkRule()]
Michał Lenart authored
241
242
243
        if newChildren == []:
            return SinkRule()
        else:
Michał Lenart authored
244
245
246
            res = OrRule(newChildren, self.linenum)
            res.setWeak(self.weak)
            return res
Michał Lenart authored
247
248
249
250
251
252

    def getAdditionalAtomicRules4Generator(self):
        res = []
        for rule in self.children:
            res.extend(rule.getAdditionalAtomicRules4Generator())
        return res
Michał Lenart authored
253
254
255
256
257

    def validate(self, filename):
        for rule in self.children:
            rule.validate(filename)
            if not (
Marcin Woliński authored
258
259
                    all([c.isShiftOrthRule() for c in self.children])
                    or not any([c.isShiftOrthRule() for c in self.children])):
Michał Lenart authored
260
261
262
                raise ConfigFileException(
                    filename,
                    self.linenum,
Marcin Woliński authored
263
                    'All subrules of alternative "%s" must be either with or without ">"' % str(self))
Michał Lenart authored
264
Michał Lenart authored
265
266
class ZeroOrMoreRule(UnaryRule):
Michał Lenart authored
267
268
    def __init__(self, child, linenum):
        super(ZeroOrMoreRule, self).__init__(child, linenum)
Michał Lenart authored
269
        assert isinstance(child, SegmentRule)
Michał Lenart authored
270
271
272

    def addToNFA(self, fsa):
        raise ValueError()
Michał Lenart authored
273
Michał Lenart authored
274
    def _doAddToNFA(self, startState, endState):
Michał Lenart authored
275
276
        intermStartState = RulesNFAState(self)
        intermEndState = RulesNFAState(self)
Michał Lenart authored
277
278
279
280
281

        startState.addTransition(None, intermStartState)
        startState.addTransition(None, endState)
        self.child._doAddToNFA(intermStartState, intermEndState)
        intermEndState.addTransition(None, endState)
Michał Lenart authored
282
        intermEndState.addTransition(None, intermStartState)
Michał Lenart authored
283
Michał Lenart authored
284
285
286
    def allowsEmptySequence(self):
        return True
Michał Lenart authored
287
288
289
290
291
292
    def transformToGeneratorVersion(self):
        if self.isShiftOrthRule():
            return copy.deepcopy(self)
        else:
            return SinkRule()
Michał Lenart authored
293
    def __str__(self):
Marcin Woliński authored
294
        return '(' + str(self.child) + ')*'
Michał Lenart authored
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316

class OptionalRule(UnaryRule):

    def __init__(self, child, linenum):
        super(OptionalRule, self).__init__(child, linenum)
        assert isinstance(child, SegmentRule)

    def addToNFA(self, fsa):
        raise ValueError()

    def _doAddToNFA(self, startState, endState):
        intermStartState = RulesNFAState(self)
        intermEndState = RulesNFAState(self)

        startState.addTransition(None, intermStartState)
        startState.addTransition(None, endState)
        self.child._doAddToNFA(intermStartState, intermEndState)
        intermEndState.addTransition(None, endState)

    def allowsEmptySequence(self):
        return True
Michał Lenart authored
317
318
319
320
321
322
    def transformToGeneratorVersion(self):
        if self.isShiftOrthRule():
            return copy.deepcopy(self)
        else:
            return self.child.transformToGeneratorVersion()
Michał Lenart authored
323
    def __str__(self):
Marcin Woliński authored
324
        return '(' + str(self.child) + ')?'
Michał Lenart authored
325
326
327
328
329
330
331
332
333
334
335

class SinkRule(SegmentRule):

    def __init__(self):
        super(SinkRule, self).__init__(None)

    def addToNFA(self, fsa):
        return

    def allowsEmptySequence(self):
        return False
Michał Lenart authored
336
Michał Lenart authored
337
338
339
340
341
342
343
344
345
346
347
    def _doAddToNFA(self, startStates, endState):
        return

    def transformToGeneratorVersion(self):
        return self

    def isSinkRule(self):
        return True

    def __str__(self):
        return '<<REMOVED>>'
Michał Lenart authored
348
349

    def getAdditionalAtomicRules4Generator(self):
Michał Lenart authored
350
        return []