create_tex_walenty.py
4.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#-*- coding:utf-8 -*-
import codecs
import datetime
import HTMLParser
import os
import tarfile
from django.core.management.base import BaseCommand
from django.template.loader import render_to_string
from django.utils.encoding import smart_str
from optparse import make_option
from dictionary.models import Lemma, WalentyStat, get_statuses
from settings import WALENTY_PATH
class Command(BaseCommand):
args = '<dict dict ...>'
help = 'Get Walenty in TeX format.'
option_list = BaseCommand.option_list + (
make_option('--min_status',
action='store',
type='string',
dest='min_status_type',
default='ready',
help='Minimum lemma status.'),
make_option('--pos',
action='store',
type='string',
dest='pos',
default='all',
help='Part of speech tag.'),
make_option('--start_date',
action='store',
type='string',
dest='start_date',
default='all',
help='Status change start date (format: YYYY-MM-DD).'),
)
def handle(self, *args, **options):
try:
now = datetime.datetime.now().strftime('%Y%m%d')
vocab_names = list(args)
vocab_names.sort()
if vocab_names:
filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now)
else:
filename_base = '%s_%s' % ('walenty', now)
base_path = os.path.join(WALENTY_PATH, filename_base)
outpath = base_path + '.tex'
statuses = get_statuses(options['min_status_type'])
lemmas = Lemma.objects.filter(old=False)
if vocab_names:
lemmas = lemmas.filter(vocabulary__name__in=vocab_names)
lemmas = lemmas.filter(status__in=statuses)
if options['start_date'] != 'all':
lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date'])
if options['pos'] != 'all':
lemmas = lemmas.filter(entry_obj__pos__tag=options['pos'])
lemmas = lemmas.order_by('entry_obj__name')
write_tex_walenty(outpath, lemmas)
archive = tarfile.open(base_path + '-tex.tar.gz', 'w:gz')
os.chdir(WALENTY_PATH)
archive.add(os.path.basename(outpath))
finally:
archive.close()
os.remove(outpath)
def filter_lemmas_by_status_change(self, lemmas, statuses, start_date_str):
start_date = self.parse_date(start_date_str)
filtered_lemmas_pks = []
for lemma in lemmas:
if lemma.status_history.filter(status=statuses[0], date__gte=start_date).exists():
filtered_lemmas_pks.append(lemma.pk)
return lemmas.filter(pk__in=filtered_lemmas_pks)
def parse_date(self, date_str):
date_parts = date_str.split('-')
year = int(date_parts[0])
month = int(date_parts[1].lstrip('0'))
day = int(date_parts[2].lstrip('0'))
date = datetime.datetime(year, month, day, 00, 00)
return date
def write_tex_walenty(outpath, lemmas):
try:
outfile = codecs.open(outpath, 'w')
h = HTMLParser.HTMLParser()
outfile.write(smart_str(h.unescape(render_to_string('tex/slowal.tex', {'lemmas': lemmas,
'q_frame_opinions': [],
'download_dict' : {'frame_opinions': []},
'frames_count' : WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value,
'schemata_count' : WalentyStat.objects.get(label=u'Łączna liczba schematów').value,
'lemmata_count' : WalentyStat.objects.get(label=u'Łączna liczba haseł').value}))))
finally:
outfile.close()