|
1
2
3
4
5
6
7
8
9
10
|
'''
Created on Oct 8, 2013
@author: mlenart
'''
class State(object):
'''
A state in an automaton
'''
|
|
11
12
|
statesCounter = 0
|
|
13
|
|
|
14
|
def __init__(self, additionalData=None):
|
|
15
|
self.transitionsMap = {}
|
|
16
|
# self.transitionsDataMap = {}
|
|
17
|
self.freq = 0
|
|
18
19
20
|
self.encodedData = None
self.reverseOffset = None
self.offset = None
|
|
21
22
|
self.label2Freq = {}
self.serializeAsArray = False
|
|
23
|
self.additionalData = additionalData
|
|
24
25
26
|
self.idx = State.statesCounter
State.statesCounter += 1
|
|
27
|
|
|
28
29
30
31
|
@property
def transitionsNum(self):
return len(self.transitionsMap)
|
|
32
33
34
35
36
|
def setTransition(self, label, nextState):
self.transitionsMap[label] = nextState
#
# def setTransitionData(self, byte, data):
# self.transitionsDataMap[byte] = data
|
|
37
|
|
|
38
39
40
|
def hasNext(self, byte):
return byte in self.transitionsMap
|
|
41
42
43
|
def getNext(self, byte, addFreq=False):
if addFreq:
self.freq += 1
|
|
44
|
self.label2Freq[byte] = self.label2Freq.get(byte, 0) + 1
|
|
45
46
47
48
49
50
51
52
|
return self.transitionsMap.get(byte, None)
def getRegisterKey(self):
return ( frozenset(self.transitionsMap.iteritems()), tuple(self.encodedData) if self.encodedData else None )
def isAccepting(self):
return self.encodedData is not None
|
|
53
|
def tryToRecognize(self, word, addFreq=False):
|
|
54
|
if word:
|
|
55
|
nextState = self.getNext(word[0], addFreq)
|
|
56
|
if nextState:
|
|
57
|
return nextState.tryToRecognize(word[1:], addFreq)
|
|
58
59
60
61
62
|
else:
return False
else:
return self.encodedData
|
|
63
|
def dfs(self, alreadyVisited, sortKey=lambda (_, state): -state.freq):
|
|
64
|
if not self in alreadyVisited:
|
|
65
|
alreadyVisited.add(self)
|
|
66
|
for _, state in sorted(self.transitionsMap.iteritems(), key=sortKey):
|
|
67
68
69
|
for state1 in state.dfs(alreadyVisited):
yield state1
yield self
|
|
70
|
|
|
71
72
73
74
75
76
77
78
|
def calculateOffsets(self, sizeCounter):
currReverseOffset = 0
for state in self.dfs(set()):
currReverseOffset += sizeCounter(state)
state.reverseOffset = currReverseOffset
for state in self.dfs(set()):
state.offset = currReverseOffset - state.reverseOffset
|
|
79
80
|
def debug(self):
print '----------------'
|
|
81
|
print 'STATE:', self.idx, 'accepting', self.isAccepting()
|
|
82
83
|
for label, s in self.transitionsMap.iteritems():
print label, '-->', s.idx
|