shiftOrthMagic.py
1.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import sys
import logging
class ShiftOrthMagic(object):
def __init__(self):
# segnum -> newSegnum
# used to add (orth, orth, newSegnum) to input dictionary for each (orth, lemma, segnum) entry
self._bothShiftAndNonShiftSegnums = {}
# list of segnums
# used to replace each (orth, lemma, segnum) with (orth, orth, segnum) in input dictionary
self._onlyShiftSegnums = set()
def shouldReplaceLemmaWithOrth(self, typenum):
return typenum in self._onlyShiftSegnums
def getNewSegnum4ShiftOrth(self, typenum):
return self._bothShiftAndNonShiftSegnums.get(typenum, None)
def doShiftOrthMagic(self, resultsMap, segtypesHelper, shiftOrthSegtypes, nonShiftOrthSegtypes):
nextNewSegnum = segtypesHelper.getMaxSegnum() + 1
additionalIdsMap = {}
for segtype in (shiftOrthSegtypes & nonShiftOrthSegtypes):
oldSegnum = segtypesHelper.getSegnum4Segtype(segtype)
if not oldSegnum in self._bothShiftAndNonShiftSegnums:
self._bothShiftAndNonShiftSegnums[oldSegnum] = nextNewSegnum
logging.info('')
additionalIdsMap[nextNewSegnum] = (segtype + '>')
nextNewSegnum += 1
logging.info('segment number -> additional segment type (with ">")')
logging.info(str(additionalIdsMap))
for segtype in shiftOrthSegtypes - nonShiftOrthSegtypes:
self._onlyShiftSegnums.add(segtypesHelper.getSegnum4Segtype(segtype))
for _, rules in resultsMap.iteritems():
for rule in rules:
for atomicRule in rule.getAtomicRules():
if atomicRule.segnum in self._bothShiftAndNonShiftSegnums and atomicRule.shiftOrth:
# print 'replace segnum %d -> %d' % (atomicRule.segnum, self._bothShiftAndNonShiftSegnums[atomicRule.segnum])
atomicRule.segnum = self._bothShiftAndNonShiftSegnums[atomicRule.segnum]