serializer.py
3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
'''
Created on Oct 20, 2013
@author: mlenart
'''
class Serializer(object):
def __init__(self):
pass
def serialize2CppFile(self, fsa, fname):
res = []
fsa.calculateOffsets(sizeCounter=lambda state: self.getStateSize(state))
res.append('const unsigned char DEFAULT_FSA[] = {')
for idx, state in enumerate(sorted(fsa.initialState.dfs(set()), key=lambda state: state.offset)):
res.append('// state '+str(idx))
partRes = []
for byte in self.state2bytearray(state):
partRes.append(hex(byte))
partRes.append(',')
res.append(' '.join(partRes))
res.append('}')
with open(fname, 'w') as f:
f.write('\n'.join(res))
def serialize2BinaryFile(self, fsa, fname):
res = bytearray()
fsa.calculateOffsets(sizeCounter=lambda state: self.getStateSize(state))
for state in sorted(fsa.initialState.dfs(set()), key=lambda state: state.offset):
# res.append('// state '+str(idx))
res.extend(self.state2bytearray(state))
with open(fname, 'wb') as f:
f.write(res)
def getStateSize(self, state):
raise NotImplementedError('Not implemented')
def fsa2bytearray(self, fsa):
res = bytearray()
fsa.calculateOffsets(sizeCounter=lambda state: self.getStateSize(state))
for state in sorted(fsa.initialState.dfs(set()), key=state.offset):
res.extend(self.state2bytearray(state))
return res
def state2bytearray(self, state):
raise NotImplementedError('Not implemented')
class SimpleSerializer(Serializer):
ACCEPTING_FLAG = 128
def getStateSize(self, state):
return 1 + 4 * len(state.transitionsMap.keys()) + self.getDataSize(state)
def getDataSize(self, state):
raise NotImplementedError('Not implemented')
def state2bytearray(self, state):
res = bytearray()
res.extend(self._stateData2bytearray(state))
res.extend(self._transitionsData2bytearray(state))
return res
def _stateData2bytearray(self, state):
res = bytearray()
firstByte = 0
if state.isAccepting():
firstByte |= SimpleSerializer.ACCEPTING_FLAG
firstByte |= len(state.transitionsMap)
assert firstByte < 256 and firstByte > 0
res.append(firstByte)
if state.isAccepting():
res.extend(state.encodedData)
return res
def _transitionsData2bytearray(self, state):
res = bytearray()
# must sort that strange way because it must be sorted according to char, not unsigned char
for byte, nextState in sorted(state.transitionsMap.iteritems(), key=lambda (c, _): c if (c >= 0 and c < 128) else c - 256):
res.append(byte)
offset = nextState.offset
res.append(offset & 0x0000FF)
res.append((offset & 0x00FF00) >> 8)
res.append((offset & 0xFF0000) >> 16)
return res
class SimpleSerializerWithStringValues(SimpleSerializer):
def getDataSize(self, state):
assert type(state.encodedData) == bytearray or not state.isAccepting()
return len(state.encodedData) if state.isAccepting() else 0