|
1
2
3
4
5
|
#-*- coding:utf-8 -*-
import datetime
import os
import tarfile
|
|
6
|
from collections import Counter
|
|
7
8
9
10
|
from django.core.management.base import BaseCommand
from accounts.models import User
|
|
11
12
13
|
from dictionary.ajax_vocabulary_management import create_text_walenty, \
get_stats, write_stats, \
update_walenty_stats
|
|
14
|
from dictionary.ajax_argument_realizations import create_realizations_file
|
|
15
16
|
from dictionary.models import Frame_Opinion, Lemma, Vocabulary, POS, \
get_checked_statuses, get_ready_statuses
|
|
17
18
19
|
from settings import WALENTY_PATH
class Command(BaseCommand):
|
|
20
21
|
args = '<dict dict ...>'
help = 'Get Walenty in text format.'
|
|
22
|
|
|
23
|
def handle(self, *args, **options):
|
|
24
|
now = datetime.datetime.now().strftime('%Y%m%d')
|
|
25
26
27
28
29
30
31
32
|
vocab_names = list(args)
vocab_names.sort()
if vocab_names:
filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now)
else:
filename_base = '%s_%s' % ('walenty', now)
|
|
33
|
realizations_path = os.path.join(WALENTY_PATH,
|
|
34
|
'%s_%s.txt' % ('phrase_types_expand', now))
|
|
35
36
37
38
|
checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%s.txt' % (filename_base.replace('walenty', 'stats'),
'verified'))
ready_stats_path = os.path.join(WALENTY_PATH, u'%s_%s.txt' % (filename_base.replace('walenty', 'stats'),
'all'))
|
|
39
40
41
42
43
|
vocabularies = Vocabulary.objects.none()
if vocab_names:
vocabularies = Vocabulary.objects.filter(name__in=vocab_names)
|
|
44
|
try:
|
|
45
46
47
|
all_stats = Counter({})
verified_stats = Counter({})
base_path = os.path.join(WALENTY_PATH, filename_base)
|
|
48
|
archive = tarfile.open(base_path + '-text.tar.gz', 'w:gz')
|
|
49
50
|
os.chdir(WALENTY_PATH)
for pos in POS.objects.exclude(tag=u'unk').order_by('priority'):
|
|
51
|
pos_stats = create_pos_archive_and_get_stats(archive, pos, vocabularies, filename_base)
|
|
52
53
|
all_stats = all_stats + Counter(pos_stats['all'])
verified_stats = verified_stats + Counter(pos_stats['verified'])
|
|
54
55
56
|
create_realizations_file(realizations_path)
archive.add(os.path.basename(realizations_path))
|
|
57
58
59
60
61
62
|
if not vocab_names:
write_stats(checked_stats_path, verified_stats)
archive.add(os.path.basename(checked_stats_path))
write_stats(ready_stats_path, all_stats)
archive.add(os.path.basename(ready_stats_path))
update_walenty_stats(all_stats)
|
|
63
64
65
|
finally:
archive.close()
os.remove(realizations_path)
|
|
66
67
68
|
if not vocab_names:
os.remove(checked_stats_path)
os.remove(ready_stats_path)
|
|
69
|
|
|
70
|
def create_pos_archive_and_get_stats(archive, pos, vocabularies, filename_base):
|
|
71
72
|
all_stats = {}
checked_stats = {}
|
|
73
74
75
|
try:
checked_statuses = get_checked_statuses()
ready_statuses = get_ready_statuses()
|
|
76
|
lemmas = Lemma.objects.filter(old=False, entry_obj__pos=pos).order_by('entry_obj__name')
|
|
77
78
79
|
checked_lemmas = lemmas.filter(status__in=checked_statuses)
ready_lemmas = lemmas.filter(status__in=ready_statuses)
|
|
80
|
all_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'all'))
|
|
81
82
|
walenty_path_ready = create_text_walenty(file_name=all_path,
lemmas=ready_lemmas,
|
|
83
|
vocabularies=vocabularies,
|
|
84
85
86
87
88
|
frame_opinions=Frame_Opinion.objects.none(),
lemma_statuses=ready_statuses,
owners=User.objects.none(),
poss=POS.objects.filter(pk=pos.pk),
add_frame_opinions=True)
|
|
89
90
91
92
|
all_filename = os.path.basename(walenty_path_ready)
archive.add(name=all_filename, arcname=os.path.join(u'%ss' % pos.tag, all_filename))
checked_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'verified'))
|
|
93
|
walenty_path_checked = create_text_walenty(file_name=checked_path,
|
|
94
95
|
lemmas=checked_lemmas,
vocabularies=vocabularies,
|
|
96
97
98
99
100
101
102
|
frame_opinions=Frame_Opinion.objects.none(),
lemma_statuses=checked_statuses,
owners=User.objects.none(),
poss=POS.objects.filter(pk=pos.pk),
add_frame_opinions=True)
checked_filename = os.path.basename(walenty_path_checked)
archive.add(name=checked_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_filename))
|
|
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
if not vocabularies.exists():
all_stats = get_stats(ready_statuses, pos.tag)
all_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'),
pos.tag, 'all'))
write_stats(all_stats_path, all_stats)
all_stats_filename = os.path.basename(all_stats_path)
archive.add(name=all_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, all_stats_filename))
checked_stats = get_stats(checked_statuses, pos.tag)
checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'),
pos.tag, 'verified'))
write_stats(checked_stats_path, checked_stats)
checked_stats_filename = os.path.basename(checked_stats_path)
archive.add(name=checked_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_stats_filename))
|
|
118
|
finally:
|
|
119
120
|
os.remove(walenty_path_ready)
os.remove(walenty_path_checked)
|
|
121
122
123
|
if not vocabularies.exists():
os.remove(all_stats_path)
os.remove(checked_stats_path)
|
|
124
125
|
return {'all': all_stats,
'verified': checked_stats}
|
|
126
|
|