import_sloval_rankings.py
2.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#! /usr/bin/python
# -*- coding: utf-8 -*-
import sys, os, codecs
from django.core.management.base import BaseCommand
from dictionary.models import Frame
from semantics.models import FrameRankings
from settings import PROJECT_PATH
from wordnet.models import LexicalUnit
BULK = 50
class ValenceDictionaryRanksImporter:
def __init__(self, out = sys.stdout):
self._out = out
self._ranks = []
def exportToDatabase(self):
print "STORING RANKS..."
max_len = len(self._ranks)
i = 0
while i*BULK < max_len:
store = self._ranks[i*BULK:min((i+1)*BULK, max_len)]
FrameRankings.objects.bulk_create(store)
i += 1
print str(i*BULK) + "..."
print str(max_len) + "..."
print "...DONE"
print ""
def importFromTextFile(self, walenty_file):
with codecs.open(walenty_file, encoding='utf_8', mode='r') as walenty:
print "IMPORTING RANKS..."
i = 1
for line in iter(walenty):
line = line.strip()
if line[0] != 'k':
continue
if i % BULK == 0:
print str(i) + "..."
lexical_unit, frame_id, rank, _ = line.split('\t')
print line
try:
frame = Frame.objects.get(id=frame_id)
except:
continue
base, sense = lexical_unit.split('-')
lexical_unit = LexicalUnit.objects.get(base=base, sense=sense)
self._ranks.append(FrameRankings(lexical_unit=lexical_unit, frame=frame, rank=float(rank), done=False))
i += 1
print str(i) + "..."
print "...DONE"
print ""
#==========================================================#
class Command(BaseCommand):
args = 'none'
help = ''
def handle(self, **options):
import_rankings()
def import_rankings():
f = os.path.join(PROJECT_PATH, 'data', 'Semantics', 'ranking_test.txt')
parser = ValenceDictionaryRanksImporter()
parser.importFromTextFile(f)
parser.exportToDatabase()