Position.py 9.34 KB

Edit Raw Blame History

#! /usr/bin/python
# -*- coding: utf-8 -*-

from importer.Phrase import phrase_from_tree
from syntax.models import SyntacticFunction, Control, NaturalLanguageDescription
import syntax.models

from entries.phrase_descriptions.descriptions import phrase_description2

class Function:

    def __init__(self, value):
        self._value = value

    @classmethod
    def fromTree(cls, tree):
        value = tree._children[0]._attrs['value']
        return cls(value)

# @TODO: dwa rodzaje kontroli
class Control:

    def __init__(self, function):
        self._function = function


class Position:

    def __init__(self, position_id, function, controls, phrases, phrase_ids):
        self._id = position_id
        self._function = function
        self._control = controls
        self._phrases = phrases
        self._phrase_ids = phrase_ids
        self._db_id = None

    @classmethod
    def fromTree(cls, tree):
        function = None
        controls = None
        phrases = []
        phrase_ids = {}

        position_id = []
        for subtree in tree._children:
            if subtree._attrs['name'] == 'function':
                function = Function.fromTree(subtree)
            elif subtree._attrs['name'] == 'control':
                controls = [Control(c._attrs['value']) for c in subtree._children[0]._children]
            elif subtree._attrs['name'] == 'phrases':
                for phrase_tree in subtree._children[0]._children:
                    phrase = phrase_from_tree(phrase_tree)
                    phrases.append(phrase)
                    if phrase.getId() is not None:
                        phrase_ids[phrase.getId()] = phrase
                        position_id.append(int(phrase.getId().split('-')[0].split('.')[-1]))
                    # TODO So far, only schema positions were stored in database
                    # and all phrases had an id from xml:id attribute. Now we add
                    # modification positions where phrases have no id, so the
                    # text_rep is used. Is this enough? Perhaps text_rep could also be used
                    # for schema position phrases?
                    else:
                        phr_id = str(phrase)
                        phrase_ids[phr_id] = phrase
                        position_id.append(phr_id)

        position_id.sort()
        position_id = tuple(position_id)
        result = cls(position_id, function, controls, phrases, phrase_ids)
        for phrase in phrases:
            phrase._position = result
        return result

    def store(self, schema, stored_positions, schema_positions, negativity):
        label = [None, None, None, negativity]
        if self._function is not None:
            function = SyntacticFunction.objects.get(name=self._function._value)
            label[0] = self._function._value
        else:
            function = None
        control = None
        pred_control = None
        if self._control is not None:
            for c in self._control:
                if c._function[:4] == 'pred':
                    if pred_control is None:
                        pred_control = syntax.models.PredicativeControl.objects.get(name=c._function)
                        label[2] = c._function
                    else:
                        raise(DoublePredControlError)
                else:
                    if control is None:
                        control = syntax.models.Control.objects.get(name=c._function)
                        label[1] = c._function
                    else:
                        raise(DoubleControlError)
        i = 1
        extended_id = (tuple(label), self._id, i)
        while extended_id in schema_positions:
            i += 1
            extended_id = (tuple(label), self._id, i)
        schema_positions.add(extended_id)
        # self._id is None for parts of lex atr
        # TODO (KK) ^^^
        if self._id is not None and extended_id in stored_positions:
            position = syntax.models.Position.objects.get(id = stored_positions[extended_id])
            self._db_id = position.id
            schema.positions.add(position)
        else:
            position = schema.positions.create(function=function,
                                               control=control,
                                               pred_control=pred_control,
                                               phrases_count=len(self._phrases))
            self._db_id = position.id

            controller = None
            # fails for: uważać, uznać, uznawać
            assert(not control or not pred_control)
            if control and control.name == 'controllee':
                controller = self._schema.getController('controllee')
            if pred_control and pred_control.name == 'pred_controllee':
                controller = self._schema.getController('pred_controllee')

            for phrase in self._phrases:
                phrase.store(position, stored_positions)
                phrase_text = str(phrase)
                desc_count = NaturalLanguageDescription.objects.filter(
                                 negativity=negativity,
                                 function=position.function,
                                 control=position.control,
                                 pred_control=position.pred_control,
                                 phrase_str=phrase_text).count()
                if desc_count == 0:
                    for lang in ('pl', 'en'):
                        desc_text = phrase_description2(phrase, self, negativity, lang, controller=controller)
                        # TODO also index by controller function? other controller info?
                        desc = NaturalLanguageDescription(
                                         lang=lang,
                                         negativity=negativity,
                                         function=position.function,
                                         control=position.control,
                                         pred_control=position.pred_control,
                                         phrase_str=phrase_text,
                                         description=desc_text)
                        desc.save()
            if self._id is not None:
                stored_positions[extended_id] = position.id

    def isController(self):
        if self._control:
            for c in self._control:
                if c._function.endswith('controller'):
                    return True
        return False

    def getCase(self):
        cases = set()
        for phrase in self._phrases:
            case = phrase.getCase()
            if case:
                cases.add(case)
        try:
            assert(len(cases) == 1)
        except AssertionError:
            print('COULDN’T DETERMINE POSITION CASE:')
            print(' + '.join(map(str, self._phrases)))
            raise
        return cases.pop()

    def getPhraseIds(self):
        return self._phrase_ids

    def __unicode__(self):
        if self._function is None:
            return '[' + ','.join([phrase.toUnicode(self._function) for phrase in self._phrases]) + ']'
        else:
            return self._function._value + '([' + ','.join([phrase.toUnicode(self._function) for phrase in self._phrases]) + '])'

    def toUnicode(self, function, phrases=None):
        pre = ''
        post = ''
        if self._control is not None:
            for control in self._control:
                pre += control._function + '('
                post += ')'
        if phrases is None:
            if self._function is None or self._function._value == 'obj':
                return pre + '[' + ','.join([phrase.toUnicode(self._function) for phrase in self._phrases]) + ']' + post
            elif self._function._value == 'subj':
                return pre + 'subj([' + ','.join([phrase.toUnicode(self._function) for phrase in self._phrases]) + '])' + post
            elif self._function._value == 'obj':
                return pre + 'obj([' + ','.join([phrase.toUnicode(self._function) for phrase in self._phrases]) + '])' + post
            elif self._function._value == 'head':
                return pre + 'head([' + ','.join([phrase.toUnicode(self._function) for phrase in self._phrases]) + '])' + post
        else:
            temp = []
            for phrase in self._phrases:
                if phrase in phrases:
                    temp.append(phrase.toUnicode(self._function))
            if self._function is None:
                return pre + '[' + ','.join(temp) + ']' + post
            elif self._function._value == 'subj':
                return pre + 'subj([' + ','.join(temp) + '])' + post
            elif self._function._value == 'obj':
                return pre + 'obj([' + ','.join(temp) + '])' + post
            elif self._function._value == 'head':
                return pre + 'head([' + ','.join(temp) + '])' + post

    def subposition(self, phrases=None):
        c = ''
        if self._control is not None:
            c = ','.join([control._function for control in self._control])
        f = ''
        if self._function is not None:
            f = self._function._value
        if f != '' and c != '':
            f += ','
        if phrases is None:
            return f + c + '{' + ','.join([str(phrase) for phrase in self._phrases]) + '}'
        else:
            temp = [str(phrase) for phrase in self._phrases if phrase in phrases]
            return f + c + '{' + ','.join(temp) + '}'