#! /usr/bin/python # -*- coding: utf-8 -*- import sys, os, codecs from django.core.management.base import BaseCommand from dictionary.models import Lemma from semantics.models import SemanticRole, FramePosition, Complement, LexicalUnit, SemanticFrame from settings import PROJECT_PATH class Command(BaseCommand): args = 'none' help = '' def handle(self, **options): #clear_import_data() import_frames() def clear_import_data(): FramePosition.objects.all().delete() Complement.objects.all().delete() SemanticFrame.objects.all().delete() def import_frames(): verbs_file_path = os.path.join(PROJECT_PATH, 'data', 'Semantics', 'plWN_verbs.csv') columns = ['id', 'od', 'do', 'range', 'level', 'ile', 'art', '2ls', 'synset', '4ls', 'xls', 'fnf', 'light', 'alt', 'conv', 'dev', 'senses', 'typ', 'change', 'causephase', 'laspect', 'maspect', 'Agent', 'Manipulator', 'Effector', 'Cognizer', 'Protagonist', 'Benefactor', 'Cause', 'Stimulus', 'Communicator', 'Path', 'Instrument', 'Patient', 'Theme', 'Experiencer', 'Resultee', 'Beneficiary', 'Object', 'Asset', 'Product', 'Content', 'Source', 'Material', 'Goal', 'Entity', 'Part', 'Collection', 'Attribute', 'Event', 'Phase', 'State-of-Affairs', 'Scenario', 'Background', 'Focus', 'Instance', 'Type', 'Location', 'Time'] fields = {} for column in columns: fields[column] = set() with_comma = 0 with codecs.open(verbs_file_path, encoding='utf_8', mode='r') as infile: first = True lines = 0 for line in iter(infile): data = {} if first: first = False else: cells = line.split('\t') if len(cells) < len(columns): print cells[8], len(cells), len(columns) named_cells = zip(columns, cells) invalid = False for column, cell in named_cells[(22 + len(cells) - len(columns)):]: if column == 'phase' or column == 'scenario': continue cell = cell.strip() if ',' in cell: invalid = True if cell != '' and not invalid: data[column] = [] delete = False for item in cell.split('|'): add = [] for part in item.split(':'): if part != '': part = part.strip() if part == 'b' or part == 'zero': delete = True add.append('') elif part == 'i' or part == 'o' or part == 're' or part == 'neg': continue elif part[0] == '\'': continue elif part == 'abl' or part == 'adl': add.append('xp(' + part + ')') elif '+' in part: d = part.split('+') if len(d) == 1: add.append('np(' + d[0] + ')') elif len(d) == 2: prep, case = part.split('+') if len(prep.split(' ')) > 1: add.append('comprepnp(' + prep + ')') else: add.append('prepnp(' + prep + ',' + case + ')') else: print part elif part == 'xp{locat}' or part == 'xp{locat)': add.append('xp(locat)') elif part == u'że': add.append(u'cp(' + part + ')') elif part == 'inf': add.append('infp(_)') # elif part == 'nom': # add.append('np(str)') elif part == 'bf' or part == 'sg': add.append(part) else: add.append('np(' + part + ')') data[column].append(':'.join(add)) if delete: empty = True for entry in data[column]: if entry != '': empty = False if empty: del data[column] if invalid: continue if len(data) > 0: alter = max([len(cell) for cell in data.values()]) else: alter = 1 for role in data: base = data[role] while len(data[role]) < alter: data[role] += base for i in range(alter): realizations = {} for key in data: if data[key][i] != '': if data[key][i] not in realizations: realizations[data[key][i]] = [key] else: realizations[data[key][i]].append(key) # background + focus if 'bf' in realizations: base_role = realizations['bf'] del realizations['bf'] for key in realizations: if 'background' in realizations[key]: realizations[key] += base_role if 'focus' in realizations[key]: realizations[key] += base_role # source + goal if 'sg' in realizations: base_role = realizations['sg'] del realizations['sg'] for key in realizations: if 'source' in realizations[key]: realizations[key] += base_role if 'goal' in realizations[key]: realizations[key] += base_role frame = {', '.join(l): r for r, l in realizations.items()} if len(frame) > 0: for unit in cells[8].split(','): lu = unit.strip() if lu[0] != u'k': continue lemmas = Lemma.objects.filter(entry=lu.split(' ')[0], old=False) if len(lemmas) != 1: # print lu, '->', len(lemmas), '!=', 1 continue lemma = lemmas[0] all_schemas = lemma.frames.all() if len(lu.split(' ')) == 2: unit = LexicalUnit.objects.get(base=lu.split(' ')[0], sense=int(lu.split(' ')[1])) # create empty frame f = SemanticFrame() f.save() f.lexical_units.add(unit) schemas = [] for schema in all_schemas: c = schema.characteristics.get(type=u'ZWROTNOŚĆ') if c.value.value == u'': schemas.append(schema) # create unconnected roles complements = {} for roles, argument in frame.items(): complements[argument] = Complement(frame=f) complements[argument].save() for r in roles.split(','): role = r.strip() print role dbrole = SemanticRole.objects.get(role=role) complements[argument].roles.add(dbrole) # connect to EVERY frame where ALL roles can be found compatible = [] for schema in schemas: schema_ok = True positions = schema.positions.all() connections = [] for argument in frame.values(): argument_ok = False for position in positions: if len(position.arguments.filter(text_rep=argument)) > 0: argument_ok = True connections.append((complements[argument], schema, position, position.arguments.filter(text_rep=argument)[0])) if argument == u'np(nom)': # subj + np(str) if len(position.arguments.filter(text_rep=u'np(str)')) > 0 and len(position.categories.filter(category=u'subj')) > 0: argument_ok = True connections.append((complements[argument], schema, position, position.arguments.filter(text_rep=u'np(str)')[0])) if argument == u'np(acc)': # obj + np(str) if len(position.arguments.filter(text_rep=u'np(str)')) > 0 and len(position.categories.filter(category=u'obj')) > 0: argument_ok = True connections.append((complements[argument], schema, position, position.arguments.filter(text_rep=u'np(str)')[0])) schema_ok &= argument_ok if schema_ok: compatible.append(schema) for c, f, p, a in connections: x = FramePosition.objects.filter(frame=f, position=p, argument=a) if len(x) > 0: c.realizations.add(x[0]) else: x = FramePosition(frame=f, position=p, argument=a) x.save() c.realizations.add(x) elif len(lu.split(' ')) == 3 and lu.split(' ')[1] == u'się': unit = LexicalUnit.objects.get(base=' '.join(lu.split(' ')[0:2]), sense=int(lu.split(' ')[2])) # create empty frame f = SemanticFrame() f.save() f.lexical_units.add(unit) schemas = [] for schema in all_schemas: c = schema.characteristics.get(type=u'ZWROTNOŚĆ') if c.value.value == u'się': schemas.append(schema) else: for position in schema.positions.all(): if len(position.arguments.filter(text_rep=u'refl')) > 0: schemas.append(schema) break # create unconnected roles complements = {} for roles, argument in frame.items(): complements[argument] = Complement(frame=f) complements[argument].save() for r in roles.split(','): role = r.strip() print role dbrole = SemanticRole.objects.get(role=role) complements[argument].roles.add(dbrole) # connect to EVERY frame where ALL roles can be found compatible = [] for schema in schemas: schema_ok = True positions = schema.positions.all() connections = [] for argument in frame.values(): argument_ok = False for position in positions: if len(position.arguments.filter(text_rep=argument)) > 0: argument_ok = True connections.append((complements[argument], schema, position, position.arguments.filter(text_rep=argument)[0])) if argument == u'np(nom)': # subj + np(str) if len(position.arguments.filter(text_rep=u'np(str)')) > 0 and len(position.categories.filter(category=u'subj')) > 0: argument_ok = True connections.append((complements[argument], schema, position, position.arguments.filter(text_rep=u'np(str)')[0])) if argument == u'np(acc)': # obj + np(str) if len(position.arguments.filter(text_rep=u'np(str)')) > 0 and len(position.categories.filter(category=u'obj')) > 0: argument_ok = True connections.append((complements[argument], schema, position, position.arguments.filter(text_rep=u'np(str)')[0])) schema_ok &= argument_ok if schema_ok: compatible.append(schema) for c, f, p, a in connections: x = FramePosition.objects.filter(frame=f, position=p, argument=a) if len(x) > 0: c.realizations.add(x[0]) else: x = FramePosition(frame=f, position=p, argument=a) x.save() c.realizations.add(x)