nkjp.py
1.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import itertools
import poliqarp
class NKJP:
def __init__(self, corpora):
self.connection = self.__connect(corpora)
def close(self):
self.connection.close()
def __connect(self, corpus):
connection = poliqarp.Connection()
connection.make_session()
connection.open_corpus(corpus)
return connection
def check_expression(self, expression, new_segments):
query = self.__get_expression_query(expression, new_segments)
return self.__contains(query)
def __get_expression_query(self, expression, new_segments):
expr = ''
expr_segments = expression.segments.order_by('position_in_expr')
for expr_seg, form_seg in itertools.izip(expr_segments, new_segments):
if form_seg is None:
continue
orth = form_seg
if expr_seg.ctag == 'interp':
orth = u'[orth="\\%s"]' % orth
elif any(char.isdigit() for char in orth):
orth = u'[orth="%s"]' % orth
else:
orth = orth + '/i'
if expr_seg.has_nps:
expr += orth
else:
expr += ' %s' % orth
return expr.lstrip()
def __contains(self, query):
self.connection.make_query(query)
self.connection.run_query(1)
if self.connection.get_n_spotted_results() > 0:
return True
return False