Example.py 12 KB
#! /usr/bin/python
# -*- coding: utf-8 -*-

from collections import defaultdict
import examples.models
from connections.models import ExampleConnection, SchemaHook
from syntax.models import Schema, Position
from syntax.models_phrase import PhraseType
from semantics.models import Argument


def clean_sentence(sentence):
    return sentence.replace('\r\n', ' ').replace('\n', ' ').replace('\r', ' ')

class Example:

    def __init__(self, sentence, source, opinion, note, illustrated_syntax, meaning, illustrated_semantics):
        self._sentence = sentence
        self._source = source
        self._opinion = opinion
        self._note = note
        self._phrases = illustrated_syntax
        self._meaning = meaning
        self._arguments = illustrated_semantics

    @classmethod
    def fromTree(cls, example_tree, phrases, entry_semantics, base, meanings, in_data, out_file, misconnected):
        eid = int(example_tree._attrs['xml:id'].split('.')[1].split('-')[0])
        if len(example_tree._children) == 6:
            # both meaning and note
            meaning = example_tree._children[0]._children[0]._attrs['sameAs'][1:]
            sentence = clean_sentence(example_tree._children[2]._children[0]._content)
            illustrated_syntax = IllustratesSyntax.fromTree(example_tree._children[1], phrases, base, sentence, out_file)
            illustrated_semantics = IllustratesSemantics.interfere(sentence, base, meaning, illustrated_syntax, entry_semantics, meanings, eid, in_data, out_file, misconnected)
            source = example_tree._children[3]._children[0]._attrs['value']
            opinion = example_tree._children[4]._children[0]._attrs['value']
            note = example_tree._children[5]._children[0]._content
        elif len(example_tree._children) == 5 and example_tree._children[0]._attrs['name'] == 'meaning':
            # meaning and no note
            meaning = example_tree._children[0]._children[0]._attrs['sameAs'][1:]
            sentence = clean_sentence(example_tree._children[2]._children[0]._content)
            illustrated_syntax = IllustratesSyntax.fromTree(example_tree._children[1], phrases, base, sentence, out_file)
            illustrated_semantics = IllustratesSemantics.interfere(sentence, base, meaning, illustrated_syntax, entry_semantics, meanings, eid, in_data, out_file, misconnected)
            source = example_tree._children[3]._children[0]._attrs['value']
            opinion = example_tree._children[4]._children[0]._attrs['value']
            note = None
        elif len(example_tree._children) == 5:
            # note and no meaning
            meaning = None
            sentence = clean_sentence(example_tree._children[1]._children[0]._content)
            illustrated_syntax = IllustratesSyntax.fromTree(example_tree._children[0], phrases, base, sentence, out_file)
            illustrated_semantics = None
            source = example_tree._children[2]._children[0]._attrs['value']
            opinion = example_tree._children[3]._children[0]._attrs['value']
            note = example_tree._children[4]._children[0]._content
        elif len(example_tree._children) == 4:
            # no meaning and no note
            meaning = None
            sentence = clean_sentence(example_tree._children[1]._children[0]._content)
            illustrated_syntax = IllustratesSyntax.fromTree(example_tree._children[0], phrases, base, sentence, out_file)
            illustrated_semantics = None
            source = example_tree._children[2]._children[0]._attrs['value']
            opinion = example_tree._children[3]._children[0]._attrs['value']
            note = None
        else:
            print(example_tree)
            raise UnknownError()

        return cls(sentence, source, opinion, note, illustrated_syntax, meaning, illustrated_semantics)

    def store(self, entry, meanings):
#         self._phrases = illustrated_syntax
#         self._meaning = meaning
#         self._arguments = illustrated_semantics
        opinion = examples.models.ExampleOpinion.objects.get(key=self._opinion)
        source = examples.models.ExampleSource.objects.get(key=self._source)
        example = examples.models.Example(entry=entry,
                                          sentence=self._sentence,
                                          opinion=opinion,
                                          source=source,
                                          note=self._note)
        example.save()
        if not self._phrases.exists():
            # example not connected to syntax
            pass
        elif self._arguments is None:
            # example connected to syntax but not connected to semantics
            connection = ExampleConnection(example=example)
            connection.save()
            subpositions = self._phrases._subpositions
            subentry = subpositions[0][0]._position._schema.getSubentry(entry)
            schema = Schema.objects.get(id=subpositions[0][0]._position._schema._db_id)
            for subposition in subpositions:
                position = Position.objects.get(id=subposition[0]._position._db_id)
                for phrase_obj in subposition:
                    phrase = PhraseType.objects.get(text_rep=str(phrase_obj))
                    hook = SchemaHook(subentry=subentry,
                                      schema=schema,
                                      position=position,
                                      phrase_type=phrase,
                                      alternation=1)
                    hook.save()
                    connection.schema_connections.add(hook)
        elif self._arguments.exists():
            # example connected to both syntax and semantics
            # and connection can be transfered
            lemma, meaning = meanings[self._meaning]
            meaning = meaning.get()
            connection = ExampleConnection(example=example, lexical_unit=meaning)
            connection.save()
            for role_illustration in self._arguments._arguments:
                if role_illustration._argument is not None:
                    argument = Argument.objects.get(id=role_illustration._argument._db_id)
                    connection.arguments.add(argument)
                subposition = role_illustration._subposition
                subentry = subposition[0]._position._schema.getSubentry(entry)
                schema = Schema.objects.get(id=subposition[0]._position._schema._db_id)                
                position = Position.objects.get(id=subposition[0]._position._db_id)
                for phrase_obj in subposition:
                    phrase = PhraseType.objects.get(text_rep=str(phrase_obj))
                    hook = SchemaHook(subentry=subentry,
                                      schema=schema,
                                      position=position,
                                      phrase_type=phrase,
                                      alternation=role_illustration._alternation)
                    hook.save()
                    connection.schema_connections.add(hook)                
        else:
            # example connected to both syntax and semantics
            # but connection cannot be transfered
            # or connected to schema but not to any phrases in it
            # example is treated as not connected to syntax
            pass
        

class IllustratesSyntax:

    def __init__(self, schema_key, subpositions):
        self._schema_key = schema_key
        self._subpositions = subpositions

    @classmethod
    def fromTree(cls, tree, phrases, base, sentence, out_file):
        schema_key = None
        positions = defaultdict(lambda: [])
        for subtree in tree._children[0]._children:
            phrase_id = subtree._attrs['sameAs'][1:]
            schema_key = int(phrase_id.split('.')[1])
            position_key = int(phrase_id.split('.')[2])
            if subtree._attrs['sameAs'][1:] not in phrases:
                out_file.write('@@@ ' + base + ':\t' + sentence)
                # raise UnknownError()
            else:
                positions[position_key].append(phrases[subtree._attrs['sameAs'][1:]])

        return cls(schema_key, list(positions.values()))

    def exists(self):
        return self._schema_key is not None

class IllustratesSemanticRole:

    def __init__(self, argument, subposition, alternation):
        self._argument = argument
        self._subposition = subposition
        position = self._subposition[0]._position
        self._subposition_str = position.subposition(self._subposition)
        self._alternation = alternation
    
class IllustratesSemantics:

    def __init__(self, frame, arguments):
        self._frame = frame
        self._arguments = arguments

    @classmethod
    def interfere(cls, sentence, base, meaning, illustrated_syntax, semantics, meanings, eid, in_data, out_file, misconnected):
        arguments = []
        frame, all_realizations = semantics.findFrame(meaning)
        realizations = []
        for realization in all_realizations:
            realization_schema_key =  int(realization._schema._id.split('.')[1].split('-')[0])
            if realization_schema_key == illustrated_syntax._schema_key:
                realizations.append(realization)

        possible_arguments = defaultdict(lambda: [])
        alternation = 1
        for realization in realizations:
            possible_args = []
            loose = []
            for subposition in illustrated_syntax._subpositions:
                argument = realization.findMatchingArgument(subposition)
                if argument is not None:
                    connection = IllustratesSemanticRole(argument, subposition, alternation)
                    possible_args.append(connection)
                else:
                    connection = IllustratesSemanticRole(None, subposition, alternation)
                    loose.append(connection)
            possible_arguments[len(possible_args)].append((possible_args, loose))
            alternation += 1

        l = list(possible_arguments)
        if len(l) == 0:
            #example connected to schema but not connected to any phrases there !!!
            lu = meanings.locate(meaning)
            misconnected.write('% ' + sentence + '\n')
            misconnected.write(base + '\t' + str(lu) + '\t' + str(illustrated_syntax._schema_key) + '\t' + str(eid) + '\n\n')
        else:
            max_args = max(possible_arguments.keys())
            if len(possible_arguments[max_args]) > 1:
                lu = meanings.locate(meaning)
                l1 = '% ' + sentence + '\n'
                l2 = base + '\t' + str(lu) + '\t' + str(illustrated_syntax._schema_key) + '\t' + str(eid) + '\n'
                l3s = []
                equal = True
                for role_illustrations, _ in possible_arguments[max_args]:
                    roles = []
                    for role_illustration in role_illustrations:
                        role_str = str(role_illustration._argument._semantic_role)
                        subposition_str = role_illustration._subposition_str
                        argument_str = role_str + ': ' + subposition_str
                        roles.append(argument_str)
                        if len(l3s) > 0:
                            if argument_str not in l3s[0]:
                                equal = False
                    l3s.append('\t' + '\t'.join(roles))
                if equal:
                    arguments = []
                    for role_illustrations, free in possible_arguments[max_args]:
                        arguments += role_illustrations
                        arguments += free
                    print(('\n'.join(l3s) + '\n'))
                else:
                    out_file.write(l1)
                    out_file.write(l2)
                    out_file.write('\n'.join(l3s) + '\n')
                    out_file.write('\n')
            elif len(possible_arguments[max_args]) == 0:
                raise UnknownError()
            else:
                arguments = possible_arguments[max_args][0][0] + possible_arguments[max_args][0][1]
        return cls(frame, arguments)

    def exists(self):
        return len(self._arguments) > 0