import_sloval_rankings.py 2.25 KB

Edit Raw Blame History

#! /usr/bin/python
# -*- coding: utf-8 -*-

import sys, os, codecs

from django.core.management.base import BaseCommand

from dictionary.models import Frame
from semantics.models import FrameRankings
from settings import PROJECT_PATH
from wordnet.models import LexicalUnit

BULK = 50

class ValenceDictionaryRanksImporter:

    def __init__(self, out = sys.stdout):
        self._out = out
        self._ranks = []

    def exportToDatabase(self):

        print "STORING RANKS..."

        max_len = len(self._ranks)
        i = 0
        while i*BULK < max_len:
            store = self._ranks[i*BULK:min((i+1)*BULK, max_len)]
            FrameRankings.objects.bulk_create(store)
            i += 1
            print str(i*BULK) + "..."

        print str(max_len) + "..."
        print "...DONE"
        print ""

    def importFromTextFile(self, walenty_file):

        with codecs.open(walenty_file, encoding='utf_8', mode='r') as walenty:
            print "IMPORTING RANKS..."

            i = 1
            for line in iter(walenty):
                line = line.strip()
                if line[0] != 'k':
                    continue
                if i % BULK == 0:
                    print str(i) + "..."
                lexical_unit, frame_id, rank, _ = line.split('\t')
                print line
                try:
                    frame = Frame.objects.get(id=frame_id)
                except:
                    continue
                base, sense = lexical_unit.split('-')
                lexical_unit = LexicalUnit.objects.get(base=base, sense=sense)
                self._ranks.append(FrameRankings(lexical_unit=lexical_unit, frame=frame, rank=float(rank), done=False))
                i += 1

            print str(i) + "..."
            print "...DONE"
            print ""


#==========================================================#
class Command(BaseCommand):
    args = 'none'
    help = ''

    def handle(self, **options):
        import_rankings()

def import_rankings():
    f = os.path.join(PROJECT_PATH, 'data', 'Semantics', 'ranking_test.txt')
    parser = ValenceDictionaryRanksImporter()
    parser.importFromTextFile(f)
    parser.exportToDatabase()