Blame view

fsabuilder/morfeuszbuilder/fsa/state.py 2.53 KB
Michał Lenart authored
1
2
3
4
5
6
7
8
9
10
'''
Created on Oct 8, 2013

@author: mlenart
'''

class State(object):
    '''
    A state in an automaton
    '''
Michał Lenart authored
11
12

    statesCounter = 0
Michał Lenart authored
13
Michał Lenart authored
14
    def __init__(self, additionalData=None):
Michał Lenart authored
15
        self.transitionsMap = {}
Michał Lenart authored
16
#         self.transitionsDataMap = {}
Michał Lenart authored
17
        self.freq = 0
Michał Lenart authored
18
19
20
        self.encodedData = None
        self.reverseOffset = None
        self.offset = None
Michał Lenart authored
21
22
        self.label2Freq = {}
        self.serializeAsArray = False
Michał Lenart authored
23
        self.additionalData = additionalData
Michał Lenart authored
24
25
26

        self.idx = State.statesCounter
        State.statesCounter += 1
Michał Lenart authored
27
Michał Lenart authored
28
29
30
31
    @property
    def transitionsNum(self):
        return len(self.transitionsMap)
Michał Lenart authored
32
33
34
35
36
    def setTransition(self, label, nextState):
        self.transitionsMap[label] = nextState
#     
#     def setTransitionData(self, byte, data):
#         self.transitionsDataMap[byte] = data
Michał Lenart authored
37
Michał Lenart authored
38
39
40
    def hasNext(self, byte):
        return byte in self.transitionsMap
Michał Lenart authored
41
42
43
    def getNext(self, byte, addFreq=False):
        if addFreq:
            self.freq += 1
Michał Lenart authored
44
            self.label2Freq[byte] = self.label2Freq.get(byte, 0) + 1
Michał Lenart authored
45
46
47
48
49
50
51
52
        return self.transitionsMap.get(byte, None)

    def getRegisterKey(self):
        return ( frozenset(self.transitionsMap.iteritems()), tuple(self.encodedData) if self.encodedData else None )

    def isAccepting(self):
        return self.encodedData is not None
Michał Lenart authored
53
    def tryToRecognize(self, word, addFreq=False):
Michał Lenart authored
54
        if word:
Michał Lenart authored
55
            nextState = self.getNext(word[0], addFreq)
Michał Lenart authored
56
            if nextState:
Michał Lenart authored
57
                return nextState.tryToRecognize(word[1:], addFreq)
Michał Lenart authored
58
59
60
61
62
            else:
                return False
        else:
            return self.encodedData
Michał Lenart authored
63
    def dfs(self, alreadyVisited, sortKey=lambda (_, state): -state.freq):
Michał Lenart authored
64
        if not self in alreadyVisited:
Michał Lenart authored
65
            alreadyVisited.add(self)
Michał Lenart authored
66
            for _, state in sorted(self.transitionsMap.iteritems(), key=sortKey):
Michał Lenart authored
67
68
69
                for state1 in state.dfs(alreadyVisited):
                    yield state1
            yield self
Michał Lenart authored
70
Michał Lenart authored
71
72
73
74
75
76
77
78
    def calculateOffsets(self, sizeCounter):
        currReverseOffset = 0
        for state in self.dfs(set()):
            currReverseOffset += sizeCounter(state)
            state.reverseOffset = currReverseOffset
        for state in self.dfs(set()):
            state.offset = currReverseOffset - state.reverseOffset
Michał Lenart authored
79
80
    def debug(self):
        print '----------------'
Michał Lenart authored
81
        print 'STATE:', self.idx, 'accepting', self.isAccepting()
Michał Lenart authored
82
83
        for label, s in self.transitionsMap.iteritems():
            print label, '-->', s.idx