|
1
2
3
4
5
|
'''
Created on 20 lut 2014
@author: mlenart
'''
|
|
6
|
import logging
|
|
7
|
from morfeuszbuilder.utils.serializationUtils import htons, htonl
|
|
8
|
|
|
9
10
|
class RulesManager(object):
|
|
11
|
def __init__(self, segtypes):
|
|
12
|
self.options2DFA = {}
|
|
13
|
self.segtypes = segtypes
|
|
14
|
self.defaultOptions = None
|
|
15
16
17
18
|
def _options2Key(self, optionsMap):
return frozenset(optionsMap.items())
|
|
19
20
21
|
def _key2Options(self, optionsKey):
return dict(optionsKey)
|
|
22
23
24
|
def getDFA(self, optionsMap):
return self.options2DFA[self._options2Key(optionsMap)]
|
|
25
26
27
|
def setDefaultOptions(self, key2Def):
self.defaultOptions = key2Def
|
|
28
|
def addDFA(self, optionsMap, dfa):
|
|
29
30
|
self.options2DFA[self._options2Key(optionsMap)] = dfa
|
|
31
32
|
def lexeme2SegmentTypeNum(self, lemma, tagnum):
res = self.segtypes.lexeme2Segnum(lemma, tagnum)
|
|
33
34
35
36
37
|
if res is None:
raise ValueError()
else:
return res
|
|
38
|
def serialize(self):
|
|
39
40
41
42
43
44
45
46
|
res = bytearray()
dfasNum = len(self.options2DFA)
assert dfasNum > 0 and dfasNum < 256
res.append(dfasNum)
for key, dfa in self.options2DFA.iteritems():
optionsMap = self._key2Options(key)
res.extend(self._serializeOptionsMap(optionsMap))
res.extend(self._serializeDFA(dfa))
|
|
47
|
res.extend(self._serializeOptionsMap(self.defaultOptions))
|
|
48
|
logging.info('segmentation rules size: %s bytes', len(res))
|
|
49
50
51
52
53
|
return res
def _serializeOptionsMap(self, optionsMap):
assert len(optionsMap) < 256
res = bytearray()
|
|
54
55
|
res.append(2)
res.extend(self._serializeString('aggl'))
|
|
56
|
res.extend(self._serializeString(optionsMap['aggl']))
|
|
57
|
res.extend(self._serializeString('praet'))
|
|
58
|
res.extend(self._serializeString(optionsMap['praet']))
|
|
59
60
61
62
|
return res
def _serializeDFA(self, dfa):
res = bytearray()
|
|
63
64
65
|
# serializer = SimpleSerializer(dfa, serializeTransitionsData=True)
dfaBytearray = dfa.serialize()
res.extend(htonl(len(dfaBytearray)))
|
|
66
67
68
69
70
|
res.extend(dfaBytearray)
return res
def _serializeString(self, string):
res = bytearray()
|
|
71
|
# res.append(len(string))
|
|
72
|
res.extend(string.encode('utf8'))
|
|
73
|
res.append(0)
|
|
74
|
return res
|