import_desc_plWordnet_verbs_csv.py
2.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#! /usr/bin/python
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand
import sys, os, codecs
from wordnet.models import Synset, LexicalUnit, Hypernymy, Synonymy
from settings import PROJECT_PATH
BULK = 250
class PlWNCSVHandler():
def __init__(self, out = sys.stdout):
self._out = out
self._lexical_units = {}
def parse(self, f):
with codecs.open(f, 'rt', 'utf-8') as infile:
for line in infile:
line = line.strip()
if len(line.split('|')) == 9:
sid, luid, base, sense, pos, desc, examples, synonyms, hypero = line.split('|')
self._lexical_units[int(luid)] = { 'luid': int(luid), 'synset': int(sid), 'base': base, 'sense': int(sense), 'pos': 'czasownik', 'desc': desc, 'examples': examples, 'synonyms': synonyms, 'hyperonyms': hypero }
else:
print line
sid, luid, base, sense, pos, desc, examples = line.split('|')
self._lexical_units[int(luid)] = { 'luid': int(luid), 'synset': int(sid), 'base': base, 'sense': int(sense), 'pos': 'czasownik', 'desc': desc, 'examples': examples, 'synonyms': "", 'hyperonyms': "" }
def store(self):
for luid in self._lexical_units:
lus = LexicalUnit.objects.filter(luid=luid)
if len(lus) > 0:
lu = lus[0]
if len(self._lexical_units[luid]['desc']) != 0:
lu.definition = self._lexical_units[luid]['desc']
lu.save()
elif len(self._lexical_units[luid]['examples']) != 0:
lu.definition = self._lexical_units[luid]['examples']
lu.save()
elif len(self._lexical_units[luid]['hyperonyms']) != 0:
lu.definition = "hiponim {" + self._lexical_units[luid]['hyperonyms'] + "}"
lu.save()
else:
print self._lexical_units[luid]['base'], self._lexical_units[luid]['sense']
#==========================================================#
class Command(BaseCommand):
args = 'none'
help = ''
def handle(self, **options):
update_plWordnet_verb_meanings()
def update_plWordnet_verb_meanings():
f = os.path.join(PROJECT_PATH, 'data', 'semantics', 'ela_verb.csv')
parser = PlWNCSVHandler()
print "Parsing Wordnet..."
parser.parse(f)
print "...DONE"
print ""
print "Storing Data..."
parser.store()
print "...DONE"