nkjp.py 1.42 KB
import itertools

import poliqarp


class NKJP:

    def __init__(self, corpora):
        self.connection = self.__connect(corpora)

    def close(self):
        self.connection.close()

    def __connect(self, corpus):
        connection = poliqarp.Connection()
        connection.make_session()
        connection.open_corpus(corpus)
        return connection

    def check_expression(self, expression, new_segments):
        query = self.__get_expression_query(expression, new_segments)
        return self.__contains(query)

    def __get_expression_query(self, expression, new_segments):
        expr = ''
        expr_segments = expression.segments.order_by('position_in_expr')
        for expr_seg, form_seg in itertools.izip(expr_segments, new_segments):
            if form_seg is None:
                continue

            orth = form_seg

            if expr_seg.ctag == 'interp':
                orth = u'[orth="\\%s"]' % orth
            elif any(char.isdigit() for char in orth):
                orth = u'[orth="%s"]' % orth
            else:
                orth = orth + '/i'

            if expr_seg.has_nps:
                expr += orth
            else:
                expr += ' %s' % orth

        return expr.lstrip()

    def __contains(self, query):
        self.connection.make_query(query)
        self.connection.run_query(1)
        if self.connection.get_n_spotted_results() > 0:
            return True
        return False