#! /usr/bin/python
# -*- coding: utf-8 -*-
import sys, os, codecs
from django.core.management.base import BaseCommand
from dictionary.models import Lemma
from semantics.models import SemanticRole, FramePosition, Complement, LexicalUnit, SemanticFrame
from settings import PROJECT_PATH
class Command(BaseCommand):
args = 'none'
help = ''
def handle(self, **options):
#clear_import_data()
import_frames()
def clear_import_data():
FramePosition.objects.all().delete()
Complement.objects.all().delete()
SemanticFrame.objects.all().delete()
def import_frames():
verbs_file_path = os.path.join(PROJECT_PATH, 'data', 'Semantics', 'plWN_verbs.csv')
columns = ['id', 'od', 'do', 'range', 'level', 'ile', 'art', '2ls', 'synset', '4ls', 'xls', 'fnf', 'light', 'alt', 'conv', 'dev', 'senses', 'typ', 'change', 'causephase', 'laspect', 'maspect', 'Agent', 'Manipulator', 'Effector', 'Cognizer', 'Protagonist', 'Benefactor', 'Cause', 'Stimulus', 'Communicator', 'Path', 'Instrument', 'Patient', 'Theme', 'Experiencer', 'Resultee', 'Beneficiary', 'Object', 'Asset', 'Product', 'Content', 'Source', 'Material', 'Goal', 'Entity', 'Part', 'Collection', 'Attribute', 'Event', 'Phase', 'State-of-Affairs', 'Scenario', 'Background', 'Focus', 'Instance', 'Type', 'Location', 'Time']
fields = {}
for column in columns:
fields[column] = set()
with_comma = 0
with codecs.open(verbs_file_path, encoding='utf_8', mode='r') as infile:
first = True
lines = 0
for line in iter(infile):
data = {}
if first:
first = False
else:
cells = line.split('\t')
if len(cells) < len(columns):
print cells[8], len(cells), len(columns)
named_cells = zip(columns, cells)
invalid = False
for column, cell in named_cells[(22 + len(cells) - len(columns)):]:
if column == 'phase' or column == 'scenario':
continue
cell = cell.strip()
if ',' in cell:
invalid = True
if cell != '' and not invalid:
data[column] = []
delete = False
for item in cell.split('|'):
add = []
for part in item.split(':'):
if part != '':
part = part.strip()
if part == 'b' or part == 'zero':
delete = True
add.append('')
elif part == 'i' or part == 'o' or part == 're' or part == 'neg':
continue
elif part[0] == '\'':
continue
elif part == 'abl' or part == 'adl':
add.append('xp(' + part + ')')
elif '+' in part:
d = part.split('+')
if len(d) == 1:
add.append('np(' + d[0] + ')')
elif len(d) == 2:
prep, case = part.split('+')
if len(prep.split(' ')) > 1:
add.append('comprepnp(' + prep + ')')
else:
add.append('prepnp(' + prep + ',' + case + ')')
else:
print part
elif part == 'xp{locat}' or part == 'xp{locat)':
add.append('xp(locat)')
elif part == u'że':
add.append(u'cp(' + part + ')')
elif part == 'inf':
add.append('infp(_)')
# elif part == 'nom':
# add.append('np(str)')
elif part == 'bf' or part == 'sg':
add.append(part)
else:
add.append('np(' + part + ')')
data[column].append(':'.join(add))
if delete:
empty = True
for entry in data[column]:
if entry != '':
empty = False
if empty:
del data[column]
if invalid:
continue
if len(data) > 0:
alter = max([len(cell) for cell in data.values()])
else:
alter = 1
for role in data:
base = data[role]
while len(data[role]) < alter:
data[role] += base
for i in range(alter):
realizations = {}
for key in data:
if data[key][i] != '':
if data[key][i] not in realizations:
realizations[data[key][i]] = [key]
else:
realizations[data[key][i]].append(key)
# background + focus
if 'bf' in realizations:
base_role = realizations['bf']
del realizations['bf']
for key in realizations:
if 'background' in realizations[key]:
realizations[key] += base_role
if 'focus' in realizations[key]:
realizations[key] += base_role
# source + goal
if 'sg' in realizations:
base_role = realizations['sg']
del realizations['sg']
for key in realizations:
if 'source' in realizations[key]:
realizations[key] += base_role
if 'goal' in realizations[key]:
realizations[key] += base_role
frame = {', '.join(l): r for r, l in realizations.items()}
if len(frame) > 0:
for unit in cells[8].split(','):
lu = unit.strip()
if lu[0] != u'k':
continue
lemmas = Lemma.objects.filter(entry=lu.split(' ')[0], old=False)
if len(lemmas) != 1:
# print lu, '->', len(lemmas), '!=', 1
continue
lemma = lemmas[0]
all_schemas = lemma.frames.all()
if len(lu.split(' ')) == 2:
unit = LexicalUnit.objects.get(base=lu.split(' ')[0], sense=int(lu.split(' ')[1]))
# create empty frame
f = SemanticFrame()
f.save()
f.lexical_units.add(unit)
schemas = []
for schema in all_schemas:
c = schema.characteristics.get(type=u'ZWROTNOŚĆ')
if c.value.value == u'':
schemas.append(schema)
# create unconnected roles
complements = {}
for roles, argument in frame.items():
complements[argument] = Complement(frame=f)
complements[argument].save()
for r in roles.split(','):
role = r.strip()
print role
dbrole = SemanticRole.objects.get(role=role)
complements[argument].roles.add(dbrole)
# connect to EVERY frame where ALL roles can be found
compatible = []
for schema in schemas:
schema_ok = True
positions = schema.positions.all()
connections = []
for argument in frame.values():
argument_ok = False
for position in positions:
if len(position.arguments.filter(text_rep=argument)) > 0:
argument_ok = True
connections.append((complements[argument], schema, position, position.arguments.filter(text_rep=argument)[0]))
if argument == u'np(nom)': # subj + np(str)
if len(position.arguments.filter(text_rep=u'np(str)')) > 0 and len(position.categories.filter(category=u'subj')) > 0:
argument_ok = True
connections.append((complements[argument], schema, position, position.arguments.filter(text_rep=u'np(str)')[0]))
if argument == u'np(acc)': # obj + np(str)
if len(position.arguments.filter(text_rep=u'np(str)')) > 0 and len(position.categories.filter(category=u'obj')) > 0:
argument_ok = True
connections.append((complements[argument], schema, position, position.arguments.filter(text_rep=u'np(str)')[0]))
schema_ok &= argument_ok
if schema_ok:
compatible.append(schema)
for c, f, p, a in connections:
x = FramePosition.objects.filter(frame=f, position=p, argument=a)
if len(x) > 0:
c.realizations.add(x[0])
else:
x = FramePosition(frame=f, position=p, argument=a)
x.save()
c.realizations.add(x)
elif len(lu.split(' ')) == 3 and lu.split(' ')[1] == u'się':
unit = LexicalUnit.objects.get(base=' '.join(lu.split(' ')[0:2]), sense=int(lu.split(' ')[2]))
# create empty frame
f = SemanticFrame()
f.save()
f.lexical_units.add(unit)
schemas = []
for schema in all_schemas:
c = schema.characteristics.get(type=u'ZWROTNOŚĆ')
if c.value.value == u'się':
schemas.append(schema)
else:
for position in schema.positions.all():
if len(position.arguments.filter(text_rep=u'refl')) > 0:
schemas.append(schema)
break
# create unconnected roles
complements = {}
for roles, argument in frame.items():
complements[argument] = Complement(frame=f)
complements[argument].save()
for r in roles.split(','):
role = r.strip()
print role
dbrole = SemanticRole.objects.get(role=role)
complements[argument].roles.add(dbrole)
# connect to EVERY frame where ALL roles can be found
compatible = []
for schema in schemas:
schema_ok = True
positions = schema.positions.all()
connections = []
for argument in frame.values():
argument_ok = False
for position in positions:
if len(position.arguments.filter(text_rep=argument)) > 0:
argument_ok = True
connections.append((complements[argument], schema, position, position.arguments.filter(text_rep=argument)[0]))
if argument == u'np(nom)': # subj + np(str)
if len(position.arguments.filter(text_rep=u'np(str)')) > 0 and len(position.categories.filter(category=u'subj')) > 0:
argument_ok = True
connections.append((complements[argument], schema, position, position.arguments.filter(text_rep=u'np(str)')[0]))
if argument == u'np(acc)': # obj + np(str)
if len(position.arguments.filter(text_rep=u'np(str)')) > 0 and len(position.categories.filter(category=u'obj')) > 0:
argument_ok = True
connections.append((complements[argument], schema, position, position.arguments.filter(text_rep=u'np(str)')[0]))
schema_ok &= argument_ok
if schema_ok:
compatible.append(schema)
for c, f, p, a in connections:
x = FramePosition.objects.filter(frame=f, position=p, argument=a)
if len(x) > 0:
c.realizations.add(x[0])
else:
x = FramePosition(frame=f, position=p, argument=a)
x.save()
c.realizations.add(x)