import_tei.py 6.84 KB
#! /usr/bin/python
# -*- coding: utf-8 -*-

from django.core.management.base import BaseCommand

import sys, os, shutil, codecs, copy, errno
from xml.sax import saxutils, handler, make_parser
from importer.WalentyXML import WalentyTeiHandler
from importer.WalentyPreprocessXML import WalentyPreprocessTeiHandler
from shellvalier.settings import BASE_DIR
from connections.models import POS, Status
from syntax.models import SchemaOpinion, Aspect, InherentSie, Negativity, Predicativity, SyntacticFunction, Control, PredicativeControl
from semantics.models import FrameOpinion, ArgumentRole, SemanticRole, RoleAttribute, PredefinedSelectionalPreference, SelectionalPreferenceRelation


class Command(BaseCommand):
    args = 'none'
    help = ''

    def handle(self, **options):
        import_tei()

def import_tei():
    xml_file = os.path.join(BASE_DIR, 'data', 'tei', 'walenty_tei.xml')

    xml_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), xml_file)

    import_constants()
    
    parser = make_parser()
    parser.setFeature(handler.feature_external_ges, False)

    parser.setContentHandler(WalentyPreprocessTeiHandler())
    parser.parse(xml_path)

    entry_meanings = parser.getContentHandler().entry_meanings
    meanings = parser.getContentHandler().meanings    
    frames = parser.getContentHandler().frames
    
    parser.setContentHandler(WalentyTeiHandler(entry_meanings, meanings, frames))
    parser.parse(xml_path)

def import_constants():
    import_poses()
    import_statuses()
    import_schema_opinions()
    import_frame_opinions()
    import_aspects()
    import_inherent_sies()
    import_negativities()
    import_predicativities()
    import_syntactic_functions()
    import_control_tags()
    import_semantic_roles()
    import_predefined_preferences()
    import_preference_relations()
    pass

def import_poses():
    poses = [u'unk', u'adj', u'noun', u'adv', u'verb']
    for pos_tag in poses:
        pos = POS(tag=pos_tag)
        pos.save()

def import_statuses():
    statuses = [(10, u'do obróbki'), (20, u'w obróbce'), (25, u'do usunięcia'), (30, u'gotowe'), (35, u'zalążkowe'), (40, u'sprawdzone'), (50, u'(F) w obróbce'), (60, u'(F) gotowe'), (70, u'(F) sprawdzone'), (80, u'(S) w obróbce'), (90, u'(S) gotowe'), (100, u'(S) sprawdzone')]
    for pri, name in statuses:
        status = Status(key=name, priority=pri)
        status.save()

def import_schema_opinions():
    opinions = [(60, u'vul'), (50, u'col'), (40, u'dat'), (30, u'bad'), (20, u'unc'), (10, u'cer')]
    for pri, short in opinions:
        opinion = SchemaOpinion(key=short, priority=pri)
        opinion.save()

def import_frame_opinions():
    opinions = [(70, u'met'), (60, u'vul'), (50, u'col'), (40, u'dat'), (30, u'bad'), (20, u'unc'), (10, u'cer'), (80, u'dom'), (90, u'rar'), (100, u'unk')]
    for pri, short in opinions:
        opinion = FrameOpinion(key=short, priority=pri)
        opinion.save()
        
def import_aspects():
    aspects = [(10, u'imperf'), (20, u'perf'), (32, u'_'), (42, u'')]
    for pri, name in aspects:
        aspect = Aspect(name=name, priority=pri)
        aspect.save()

def import_inherent_sies():
    sies = [(10, u'false'), (20, u'true')]
    for pri, name in sies:
        sie = InherentSie(name=name, priority=pri)
        sie.save()

def import_negativities():
    negativities = [(20, u'aff'), (10, u'neg'), (31, u'_'), (41, u'')]
    for pri, name in negativities:
        neg = Negativity(name=name, priority=pri)
        neg.save()

def import_predicativities():
    predicativities = [(20, u'false'), (10, u'true')]
    for pri, name in predicativities:
        pred = Predicativity(name=name, priority=pri)
        pred.save()

def import_syntactic_functions():
    functions = [(0, u'subj'), (20, u'head'), (10, u'obj')]
    for pri, name in functions:
        sf = SyntacticFunction(name=name, priority=pri)
        sf.save()

def import_control_tags():
    controls = [(10, u'controller'), (20, u'controllee'), (30, u'controller2'), (40, u'controllee2')]
    for pri, name in controls:
        cont = Control(name=name, priority=pri)
        cont.save()
    controls = [(10, u'pred_controller'), (20, 'pred_controllee')]
    for pri, name in controls:
        cont = PredicativeControl(name=name, priority=pri)
        cont.save()        

def import_semantic_roles():
    roles = [(10, u'Initiator', u'91,106,217', None), (20, u'Stimulus', u'62,173,226', None), (30, u'Theme', u'90,179,69', None), (40, u'Experiencer', u'149,195,86', None), (50, u'Factor', u'82,150,87', None), (60, u'Instrument', u'199,221,60', None), (70, u'Recipient', u'203,77,141', None), (80, u'Result', u'231,155,159', None), (90, u'Attribute', u'220,53,47', None), (100, u'Manner', u'191,48,44', None), (110, u'Measure', u'238,72,154', None), (120, u'Location', u'187,129,45', None), (130, u'Path', u'224,121,44', None), (140, u'Time', u'242,236,54', None), (150, u'Duration', u'233,192,6', None), (160, u'Purpose', u'171,85,186', None), (170, u'Condition', u'127,199,195', None), (180, u'Lemma', u'256,256,256', None)]
    attributes = [(10, u'Source', None, u'left'), (30, u'Foreground', None, u'top'), (40, u'Background', None, u'bottom'), (20, u'Goal', None, u'right')]
    for pri, role, color, gradient in roles:
        role = SemanticRole(role=role, color=color, priority=pri)
        role.save()
    for pri, role, color, gradient in attributes:
        role = RoleAttribute(attribute=role, gradient=gradient, priority=pri)
        role.save()

    for role in SemanticRole.objects.all():
        r = ArgumentRole(role=role, attribute=None)
        r.save()        
        for attribute in RoleAttribute.objects.all():
            r = ArgumentRole(role=role, attribute=attribute)
            r.save()
        
def import_predefined_preferences():
    predefs = [u'ALL', u'LUDZIE', u'ISTOTY', u'PODMIOTY', u'KOMUNIKAT', u'KONCEPCJA', u'WYTWÓR', u'JADŁO', u'CZAS', u'OBIEKTY', u'CECHA', u'CZYNNOŚĆ', u'KIEDY', u'CZEMU', u'ILOŚĆ', u'POŁOŻENIE', u'DOBRA', u'MIEJSCE', u'SYTUACJA', u'OTOCZENIE']
    for name in predefs:
        predef = PredefinedSelectionalPreference(key=name)
        predef.save()

def import_preference_relations():
    relations = [(14, u'meronimia'), (15, u'holonimia'), (20, u'meronimia (typu część)'), (21, u'meronimia (typu porcja)'), (22, u'meronimia (typu miejsce)'), (23, u'meronimia (typu element)'), (24, u'meronimia (typu materiał)'), (25, u'holonimia (typu część)'), (26, u'holonimia (typu porcja)'), (27, u'holonimia (typu miejsce)'), (28, u'holonimia (typu element)'), (29, u'holonimia (typu materiał)'), (51, u'nosiciel stanu/cechy'), (52, u'stan/cecha'), (61, u'synonimia międzyparadygmatyczna'), (64, u'meronimia (typu element taksonomiczny)'), (65, u'holonimia (typu element taksonomiczny)'), (108, u'fuzzynimia synsetów'), (-1, u'RELAT')]
    for id, name in relations:
        relat = SelectionalPreferenceRelation(plwn_id=id, key=name)
        relat.save()