rulesNFA.py 3.89 KB
'''
Created on 24 sty 2014

@author: mlenart
'''

from morfeuszbuilder.fsa import fsa, state, encode

class RulesNFAState(object):
    
    statesCounter = 0
    
    def __init__(self, initial=False, final=False, weak=False):
        self.transitionsMap = {}
        self.transitionsDataMap = {}
        self.initial = initial
        self.final = final
        self.weak = weak
        self.idx = RulesNFAState.statesCounter
        RulesNFAState.statesCounter += 1
    
    def addTransition(self, label, targetState):
        self.transitionsMap.setdefault(label, set())
        self.transitionsMap[label].add(targetState)
        self.transitionsDataMap[label] = 0
    
    def setTransitionData(self, label, byte):
        assert len(self.transitionsMap[label]) == 1
        self.transitionsDataMap[label] = byte
    
    def getClosure(self, visited):
        if self in visited:
            return set()
        else:
            visited.add(self)
            res = set()
            res.add(self)
            for nextState in self.transitionsMap.get(None, []):
                res |= nextState.getClosure(visited)
            return res
    
    def dfs(self, visitedStates=set()):
        if not self in visitedStates:
            visitedStates.add(self)
            yield self
            for _, nextStates in self.transitionsMap.iteritems():
                for state in nextStates:
                    for state1 in state.dfs():
                        yield state1
    
    def debug(self):
        print '----------------'
        print 'STATE:', self.idx
        for label, nextStates in self.transitionsMap.iteritems():
            print label, '-->', [s.idx for s in sorted(nextStates, key=lambda s: s.idx)]

class RulesNFA(object):
    
    def __init__(self):
        self.initialState = RulesNFAState(initial=True)
    
    def _groupOutputByLabels(self, nfaStates):
        res = {}
        for nfaState in nfaStates:
            for label, nextStates in nfaState.transitionsMap.iteritems():
                if label is not None:
                    transitionData = nfaState.transitionsDataMap[label]
                    res.setdefault((label, transitionData), set())
                    for nextNFAState in nextStates:
                        res[(label, transitionData)] |= nextNFAState.getClosure(set())
        return res
    
    def _doConvertState(self, dfaState, nfaStates, nfaSubset2DFAState):
        assert all(map(lambda state: state.weak, nfaStates)) \
            or not any(map(lambda state: state.weak, nfaStates))
        weak = all(map(lambda state: state.weak or not state.final, nfaStates))
        final = any(map(lambda state: state.final, nfaStates))
        assert not weak or not final
        if final:
            # dfaState should be final
            # and contain info about weakness
            dfaState.encodedData = bytearray([1 if weak else 0])
        for (label, transitionData), nextNFAStates in self._groupOutputByLabels(nfaStates).iteritems():
            key = frozenset(nextNFAStates)
            if key in nfaSubset2DFAState:
                nextDFAState = nfaSubset2DFAState[key]
            else:
                nextDFAState = state.State()
                nfaSubset2DFAState[key] = nextDFAState
                self._doConvertState(nextDFAState, nextNFAStates, nfaSubset2DFAState)
            dfaState.setTransition(label, nextDFAState)
            dfaState.setTransitionData(label, transitionData)
    
    def convertToDFA(self):
        dfa = fsa.FSA(encoder=None, encodeData=False, encodeWords=False)
        startStates = self.initialState.getClosure(set())
        assert not any(filter(lambda s: s.final, startStates))
        dfa.initialState = state.State(additionalData=False)
        self._doConvertState(dfa.initialState, startStates, {frozenset(startStates): dfa.initialState})
        return dfa
    
    def debug(self):
        for state in self.initialState.dfs():
            state.debug()