Blame view

fsabuilder/morfeuszbuilder/segrules/rules.py 7.42 KB
Michał Lenart authored
1
2
3
4
5
6
'''
Created on 24 sty 2014

@author: mlenart
'''
Michał Lenart authored
7
import copy
Michał Lenart authored
8
9
from morfeuszbuilder.segrules.rulesNFA import RulesNFAState
Michał Lenart authored
10
11
class SegmentRule(object):
Michał Lenart authored
12
    def __init__(self, linenum):
Michał Lenart authored
13
14

        self.weak = False
Michał Lenart authored
15
        self.linenum = linenum
Michał Lenart authored
16
17
18
19

    def setWeak(self, weak):
        self.weak = weak
        return self
Michał Lenart authored
20
21
22
23

    def addToNFA(self, fsa):
        raise NotImplementedError()
Michał Lenart authored
24
25
26
    def allowsEmptySequence(self):
        raise NotImplementedError()
Michał Lenart authored
27
28
    def _doAddToNFA(self, startStates, endState):
        raise NotImplementedError()
Michał Lenart authored
29
30
31
32
33
34
35
36
37

    def transformToGeneratorVersion(self):
        raise NotImplementedError()

    def isSinkRule(self):
        return False

    def isShiftOrthRule(self):
        raise NotImplementedError()
Michał Lenart authored
38
39
40

class TagRule(SegmentRule):
Michał Lenart authored
41
    def __init__(self, segnum, shiftOrth, segtype, linenum):
Michał Lenart authored
42
        self.segnum = segnum
Michał Lenart authored
43
        self.segtype = segtype
Michał Lenart authored
44
        self.shiftOrth = shiftOrth
Michał Lenart authored
45
        self.linenum = linenum
Michał Lenart authored
46
47

    def addToNFA(self, fsa):
Michał Lenart authored
48
        endState = RulesNFAState(self, final=True, weak=self.weak)
Michał Lenart authored
49
50
51
        self._doAddToNFA(fsa.initialState, endState)

    def _doAddToNFA(self, startState, endState):
Michał Lenart authored
52
        startState.addTransition((self.segnum, self.shiftOrth), endState)
Michał Lenart authored
53
Michał Lenart authored
54
55
56
    def allowsEmptySequence(self):
        return False
Michał Lenart authored
57
    def __str__(self):
Michał Lenart authored
58
        res = self.segtype
Michał Lenart authored
59
        res += '(' + str(self.segnum) + ')'
Michał Lenart authored
60
61
62
63
64
65
66
67
68
69
        if self.shiftOrth:
            res += '>'
        return res
#         return u'%s(%d)' % (self.segtype, self.segnum)

    def transformToGeneratorVersion(self):
        return copy.deepcopy(self)

    def isShiftOrthRule(self):
        return self.shiftOrth
Michał Lenart authored
70
71
72

class UnaryRule(SegmentRule):
Michał Lenart authored
73
    def __init__(self, child, linenum):
Michał Lenart authored
74
        super(UnaryRule, self).__init__(linenum)
Michał Lenart authored
75
        self.child = child
Michał Lenart authored
76
77
78
79
        assert not child.isSinkRule()

    def isShiftOrthRule(self):
        return self.child.isShiftOrthRule()
Michał Lenart authored
80
81
82

class ComplexRule(SegmentRule):
Michał Lenart authored
83
    def __init__(self, children, linenum):
Michał Lenart authored
84
        super(ComplexRule, self).__init__(linenum)
Michał Lenart authored
85
        self.children = children
Michał Lenart authored
86
        assert not any(map(lambda c: c.isSinkRule(), children))
Michał Lenart authored
87
88

    def addToNFA(self, fsa):
Michał Lenart authored
89
        endState = RulesNFAState(self, final=True, weak=self.weak)
Michał Lenart authored
90
        self._doAddToNFA(fsa.initialState, endState)
Michał Lenart authored
91
92
93

class ConcatRule(ComplexRule):
Michał Lenart authored
94
95
    def __init__(self, children, linenum):
        super(ConcatRule, self).__init__(children, linenum)
Michał Lenart authored
96
97
98
99

    def _doAddToNFA(self, startState, endState):
        currStartState = startState
        for child in self.children[:-1]:
Michał Lenart authored
100
            currEndState = RulesNFAState(self)
Michał Lenart authored
101
            child._doAddToNFA(currStartState, currEndState)
Michał Lenart authored
102
            nextStartState = RulesNFAState(self)
Michał Lenart authored
103
104
105
106
            currEndState.addTransition(None, nextStartState)
            currStartState = nextStartState
        lastChild = self.children[-1]
        lastChild._doAddToNFA(currStartState, endState)
Michał Lenart authored
107
Michał Lenart authored
108
109
110
    def allowsEmptySequence(self):
        return all(map(lambda rule: rule.allowsEmptySequence(), self.children))
Michał Lenart authored
111
112
113
    def __str__(self):
        return u' '.join(map(lambda c: str(c), self.children))
Michał Lenart authored
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
    def isShiftOrthRule(self):
        return all(map(lambda c: c.isShiftOrthRule(), self.children))

    def transformToGeneratorVersion(self):
        newChildren = [child.transformToGeneratorVersion() for child in self.children if not child.allowsEmptySequence() or child.isShiftOrthRule()]
        if newChildren == []:
            return SinkRule()
        hasNonOptionalNonShiftingRule = False
        for child in newChildren:
#             print 'child=', child
            if child.isSinkRule() or hasNonOptionalNonShiftingRule:
                return SinkRule()
            elif not child.isShiftOrthRule():
                hasNonOptionalNonShiftingRule = True
#                 print 'got nonshifting'
        return ConcatRule(newChildren, self.linenum)
Michał Lenart authored
131
132
class OrRule(ComplexRule):
Michał Lenart authored
133
134
    def __init__(self, children, linenum):
        super(OrRule, self).__init__(children, linenum)
Michał Lenart authored
135
136
137

    def _doAddToNFA(self, startState, endState):
        for child in self.children:
Michał Lenart authored
138
139
            intermStartState = RulesNFAState(self)
            intermEndState = RulesNFAState(self)
Michał Lenart authored
140
141
142
            startState.addTransition(None, intermStartState)
            child._doAddToNFA(intermStartState, intermEndState)
            intermEndState.addTransition(None, endState)
Michał Lenart authored
143
Michał Lenart authored
144
145
146
    def allowsEmptySequence(self):
        return any(map(lambda rule: rule.allowsEmptySequence(), self.children))
Michał Lenart authored
147
148
149
    def __str__(self):
        return u'|'.join(map(lambda c: str(c), self.children))
Michał Lenart authored
150
151
152
153
154
155
156
157
158
159
160
    def isShiftOrthRule(self):
        return all(map(lambda c: c.isShiftOrthRule(), self.children))

    def transformToGeneratorVersion(self):
        newChildren = [child.transformToGeneratorVersion() for child in self.children if not child.allowsEmptySequence() or child.isShiftOrthRule()]
        newChildren = filter(lambda c: not c.isSinkRule(), newChildren)
        if newChildren == []:
            return SinkRule()
        else:
            return OrRule(newChildren, self.linenum)
Michał Lenart authored
161
162
class ZeroOrMoreRule(UnaryRule):
Michał Lenart authored
163
164
    def __init__(self, child, linenum):
        super(ZeroOrMoreRule, self).__init__(child, linenum)
Michał Lenart authored
165
        assert isinstance(child, SegmentRule)
Michał Lenart authored
166
167
168

    def addToNFA(self, fsa):
        raise ValueError()
Michał Lenart authored
169
Michał Lenart authored
170
    def _doAddToNFA(self, startState, endState):
Michał Lenart authored
171
172
        intermStartState = RulesNFAState(self)
        intermEndState = RulesNFAState(self)
Michał Lenart authored
173
174
175
176
177

        startState.addTransition(None, intermStartState)
        startState.addTransition(None, endState)
        self.child._doAddToNFA(intermStartState, intermEndState)
        intermEndState.addTransition(None, endState)
Michał Lenart authored
178
        intermEndState.addTransition(None, intermStartState)
Michał Lenart authored
179
Michał Lenart authored
180
181
182
    def allowsEmptySequence(self):
        return True
Michał Lenart authored
183
184
185
186
187
188
    def transformToGeneratorVersion(self):
        if self.isShiftOrthRule():
            return copy.deepcopy(self)
        else:
            return SinkRule()
Michał Lenart authored
189
190
    def __str__(self):
        return u'(' + str(self.child) + ')*'
Michał Lenart authored
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212

class OptionalRule(UnaryRule):

    def __init__(self, child, linenum):
        super(OptionalRule, self).__init__(child, linenum)
        assert isinstance(child, SegmentRule)

    def addToNFA(self, fsa):
        raise ValueError()

    def _doAddToNFA(self, startState, endState):
        intermStartState = RulesNFAState(self)
        intermEndState = RulesNFAState(self)

        startState.addTransition(None, intermStartState)
        startState.addTransition(None, endState)
        self.child._doAddToNFA(intermStartState, intermEndState)
        intermEndState.addTransition(None, endState)

    def allowsEmptySequence(self):
        return True
Michał Lenart authored
213
214
215
216
217
218
    def transformToGeneratorVersion(self):
        if self.isShiftOrthRule():
            return copy.deepcopy(self)
        else:
            return self.child.transformToGeneratorVersion()
Michał Lenart authored
219
220
    def __str__(self):
        return u'(' + str(self.child) + ')?'
Michał Lenart authored
221
222
223
224
225
226
227
228
229
230
231

class SinkRule(SegmentRule):

    def __init__(self):
        super(SinkRule, self).__init__(None)

    def addToNFA(self, fsa):
        return

    def allowsEmptySequence(self):
        return False
Michał Lenart authored
232
Michał Lenart authored
233
234
235
236
237
238
239
240
241
242
243
    def _doAddToNFA(self, startStates, endState):
        return

    def transformToGeneratorVersion(self):
        return self

    def isSinkRule(self):
        return True

    def __str__(self):
        return '<<REMOVED>>'