import_desc_plWordnet_verbs_csv.py 2.53 KB
#! /usr/bin/python
# -*- coding: utf-8 -*-

from django.core.management.base import BaseCommand

import sys, os, codecs
from wordnet.models import Synset, LexicalUnit, Hypernymy, Synonymy
from settings import PROJECT_PATH
    
BULK = 250
    
class PlWNCSVHandler():

    def __init__(self, out = sys.stdout):
        self._out = out
        self._lexical_units = {}

    def parse(self, f):
        with codecs.open(f, 'rt', 'utf-8') as infile:
            for line in infile:
                line = line.strip()
                if len(line.split('|')) == 9:
                    sid, luid, base, sense, pos, desc, examples, synonyms, hypero = line.split('|')
                    self._lexical_units[int(luid)] = { 'luid': int(luid), 'synset': int(sid), 'base': base, 'sense': int(sense), 'pos': 'czasownik', 'desc': desc, 'examples': examples, 'synonyms': synonyms, 'hyperonyms': hypero }
                else:
                    print line
                    sid, luid, base, sense, pos, desc, examples = line.split('|')
                    self._lexical_units[int(luid)] = { 'luid': int(luid), 'synset': int(sid), 'base': base, 'sense': int(sense), 'pos': 'czasownik', 'desc': desc, 'examples': examples, 'synonyms': "", 'hyperonyms': "" }
        
    def store(self):
        for luid in self._lexical_units:
            lus = LexicalUnit.objects.filter(luid=luid)
            if len(lus) > 0:
                lu = lus[0]
                if len(self._lexical_units[luid]['desc']) != 0:
                    lu.definition = self._lexical_units[luid]['desc']
                    lu.save()
                elif len(self._lexical_units[luid]['examples']) != 0:
                    lu.definition = self._lexical_units[luid]['examples']
                    lu.save()
                elif len(self._lexical_units[luid]['hyperonyms']) != 0:
                    lu.definition = "hiponim {" + self._lexical_units[luid]['hyperonyms'] + "}"
                    lu.save()
            else:
                print self._lexical_units[luid]['base'], self._lexical_units[luid]['sense'] 
        
#==========================================================#
class Command(BaseCommand):
    args = 'none'
    help = ''

    def handle(self, **options):
        update_plWordnet_verb_meanings()
        
def update_plWordnet_verb_meanings():
    f = os.path.join(PROJECT_PATH, 'data', 'semantics', 'ela_verb.csv')

    parser = PlWNCSVHandler()
    print "Parsing Wordnet..."
    parser.parse(f)
    print "...DONE"
    
    print ""
    
    print "Storing Data..."
    parser.store()
    print "...DONE"