Realizations.py 7.55 KB
#! /usr/bin/python
# -*- coding: utf-8 -*-

from collections import defaultdict

from syntax.models import Schema, Position
from syntax.models_phrase import PhraseType, Gender, Number
from semantics.models import Frame
from connections.models import ArgumentConnection, SchemaHook, RealisationDescription

from importer.RealizationDescriptions import get_phrase_description, select_phrase_description, get_realisation_description
import importer.WalentyXML

class ArgumentRealization:

    def __init__(self, argument, position, phrases):
        self._argument = argument
        self._position = position
        self._phrases = phrases
        self._phrase_descriptions = dict()
        self._description = None
        # gender and number of the nominal head of the description phrase
        # used for determining gender of verb form, controlled adjps etc.
        # when generating realisation description
        self._gender = None
        self._number = None

    @classmethod
    def fromTree(cls, tree, arguments, all_phrases):
        argument = arguments[tree._children[0]._children[0]._attrs['sameAs'][1:]]
        phrases = []
        for subtree in tree._children[1]._children[0]._children:
            phrases.append(all_phrases[subtree._attrs['sameAs'][1:]])
        position = phrases[0]._position
        return cls(argument, position, phrases)

    def store(self, subentry, frame, schema, alternation, controller_grammar=None):
        argument = self._argument.get(frame)
        connection, _ = ArgumentConnection.objects.get_or_create(argument=argument)
        position = Position.objects.get(id=self._position._db_id)
        for phrase_obj in self._phrases:
            phrase = PhraseType.objects.get(text_rep=str(phrase_obj))
            description, gender, number = get_phrase_description(subentry, argument, self._position, phrase_obj, controller_grammar=controller_grammar)
            self._phrase_descriptions[phrase_obj] = (description, gender, number)
            hook = SchemaHook(subentry=subentry,
                              schema=schema,
                              position=position,
                              phrase_type=phrase,
                              alternation=alternation,
                              description=description)
            hook.save()
            connection.schema_connections.add(hook)
        self._description, self._gender, self._number = select_phrase_description(self._position, self._phrase_descriptions)

    def matches(self, phrases):
        for phrase in phrases:
            if phrase not in self._phrases:
                return False
        return True

# np. ‹*oczy* ciskają *gromy*› – frazy z dwóch pozycji jako realizacja „argumentu” Lemma
# rozbicie na dwie realizacje arg., bo realizacja arg. ma przypisaną pozycję.
def split_argument_realisation_tree(tree):
    # def __init__(self, name, attrs, parent)
    arg_node, phrases_node = tree._children
    phrases_by_position = defaultdict(list)
    for phrase in phrases_node._children[0]._children:
        position_id = phrase._attrs['sameAs'].split('.')[2]
        phrases_by_position[position_id].append(phrase)
    ret = []
    for position_id, phrs in phrases_by_position.items():
        new_tree = importer.WalentyXML.XMLNode(tree._name, tree._attrs, tree._parent)
        new_tree.addChild(arg_node)
        new_phrases_node = importer.WalentyXML.XMLNode(phrases_node._name, phrases_node._attrs, new_tree)
        new_coll_node = importer.WalentyXML.XMLNode(phrases_node._children[0]._name, phrases_node._children[0]._attrs, new_phrases_node)
        for phr in phrs:
            new_coll_node.addChild(phr)
            phr._parent = new_coll_node
        new_phrases_node.addChild(new_coll_node)
        new_tree.addChild(new_phrases_node)
        ret.append(new_tree)
    return ret

class FrameRealization:

    def __init__(self, frame, schema, argument_realizations):
        self._frame = frame
        self._schema = schema
        self._argument_realizations = argument_realizations
        
    @classmethod
    def fromTree(cls, tree, arguments, phrases):
        argument_realizations = []
        for subtree in tree._children[0]._children[0]._children:
            for subtree2 in split_argument_realisation_tree(subtree):
                argument_realizations.append(ArgumentRealization.fromTree(subtree2, arguments, phrases))
        frame = argument_realizations[0]._argument._frame
        schema = argument_realizations[0]._phrases[0]._position._schema
        return cls(frame, schema, argument_realizations)

    def store(self, entry):
        alternation = 1
        frame = Frame.objects.get(id=self._frame._db_id)
        schema = Schema.objects.get(id=self._schema._db_id)
        for argument in frame.sorted_arguments():
            realizations = ArgumentConnection.objects.filter(argument=argument)
            for realization in realizations.all():
                connections = realization.schema_connections
                for connection in connections.all():
                    if connection.schema.id == schema.id:
                        if connection.alternation >= alternation:
                            alternation = connection.alternation + 1
        subentry = self._schema.getSubentry(entry)
        # during storing, gender and number is determined, so first store the controller
        # argument realisations to use those for controllee descriptions
        controllers, controllees, pred_controllers, rest = [], [], [], []
        for ar in self._argument_realizations:
            if ar._position.hasControl('controller'):
                controllers.append(ar)
            elif ar._position.hasControl('controllee'):
                controllees.append(ar)
            elif ar._position.hasControl('pred_controller'):
                pred_controllers.append(ar)
            else:
                rest.append(ar)
            #(controllers if ar._position.isController() else non_controllers).append(ar)
        controller_grammar = {}
        # in this order, e.g. uznać subj,controller{np(str)}+controllee,pred_controller{infp(_)}+pred_controllee{adjp(inst)}
        for ar in controllers:
            ar.store(subentry, frame, schema, alternation)
            controller_grammar[ar._position] = (ar._gender, ar._number)
        for ar in controllees:
            ar.store(subentry, frame, schema, alternation, controller_grammar=controller_grammar)
            if ar._position.hasControl('pred_controller'):
                controller_grammar[ar._position] = (ar._gender, ar._number)
        for ar in pred_controllers:
            ar.store(subentry, frame, schema, alternation)
            controller_grammar[ar._position] = (ar._gender, ar._number)
        for ar in rest:
            ar.store(subentry, frame, schema, alternation, controller_grammar=controller_grammar)
        #for ar in non_controllers:
        #    ar.store(subentry, frame, schema, alternation, controller_grammar=controller_grammar)
        description = get_realisation_description(self, subentry, self._schema.getAspect())
        #print('**************************', controller_grammar)
        #print('================================', description)
        RealisationDescription.objects.create(frame=frame,
                                              schema=schema,
                                              alternation=alternation,
                                              description=description)

    def findMatchingArgument(self, phrases):
        for ar in self._argument_realizations:
            if ar.matches(phrases):
                return ar._argument
        return None