Blame view

dictionary/management/commands/create_text_walenty.py 6.53 KB
Bartłomiej Nitoń authored
1
2
3
4
5
#-*- coding:utf-8 -*-

import datetime
import os
import tarfile
Bartłomiej Nitoń authored
6
from collections import Counter
Bartłomiej Nitoń authored
7
8
9
10

from django.core.management.base import BaseCommand

from accounts.models import User
Bartłomiej Nitoń authored
11
12
13
from dictionary.ajax_vocabulary_management import create_text_walenty, \
                                                  get_stats, write_stats, \
                                                  update_walenty_stats
Bartłomiej Nitoń authored
14
from dictionary.ajax_argument_realizations import create_realizations_file
Bartłomiej Nitoń authored
15
16
from dictionary.models import Frame_Opinion, Lemma, Vocabulary, POS, \
                              get_checked_statuses, get_ready_statuses
Bartłomiej Nitoń authored
17
18
19
from settings import WALENTY_PATH

class Command(BaseCommand):
Bartłomiej Nitoń authored
20
21
    args = '<dict dict ...>'
    help = 'Get Walenty in text format.'
Bartłomiej Nitoń authored
22
Bartłomiej Nitoń authored
23
    def handle(self, *args, **options):
Bartłomiej Nitoń authored
24
        now = datetime.datetime.now().strftime('%Y%m%d')
Bartłomiej Nitoń authored
25
26
27
28
29
30
31
32

        vocab_names = list(args)
        vocab_names.sort()
        if vocab_names:
            filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now)
        else:
            filename_base = '%s_%s' % ('walenty', now)
Bartłomiej Nitoń authored
33
        realizations_path = os.path.join(WALENTY_PATH, 
Bartłomiej Nitoń authored
34
                                         '%s_%s.txt' % ('phrase_types_expand', now))
Bartłomiej Nitoń authored
35
36
37
38
        checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%s.txt' % (filename_base.replace('walenty', 'stats'), 
                                                                        'verified'))
        ready_stats_path = os.path.join(WALENTY_PATH, u'%s_%s.txt' % (filename_base.replace('walenty', 'stats'), 
                                                                      'all'))
Bartłomiej Nitoń authored
39
40
41
42
43

        vocabularies = Vocabulary.objects.none()
        if vocab_names:
            vocabularies = Vocabulary.objects.filter(name__in=vocab_names)
Bartłomiej Nitoń authored
44
        try:
Bartłomiej Nitoń authored
45
46
47
            all_stats = Counter({})
            verified_stats = Counter({})
            base_path = os.path.join(WALENTY_PATH, filename_base)
Bartłomiej Nitoń authored
48
            archive = tarfile.open(base_path + '-text.tar.gz', 'w:gz')
Bartłomiej Nitoń authored
49
50
            os.chdir(WALENTY_PATH)
            for pos in POS.objects.exclude(tag=u'unk').order_by('priority'):
Bartłomiej Nitoń authored
51
                pos_stats = create_pos_archive_and_get_stats(archive, pos, vocabularies, filename_base)
Bartłomiej Nitoń authored
52
53
                all_stats = all_stats + Counter(pos_stats['all'])
                verified_stats = verified_stats + Counter(pos_stats['verified'])
Bartłomiej Nitoń authored
54
55
56

            create_realizations_file(realizations_path)
            archive.add(os.path.basename(realizations_path))
Bartłomiej Nitoń authored
57
58
59
60
61
62
            if not vocab_names:
                write_stats(checked_stats_path, verified_stats)
                archive.add(os.path.basename(checked_stats_path))
                write_stats(ready_stats_path, all_stats)
                archive.add(os.path.basename(ready_stats_path))
                update_walenty_stats(all_stats)
Bartłomiej Nitoń authored
63
64
65
        finally:
            archive.close()
            os.remove(realizations_path)
Bartłomiej Nitoń authored
66
67
68
            if not vocab_names:
                os.remove(checked_stats_path)
                os.remove(ready_stats_path)
Bartłomiej Nitoń authored
69
Bartłomiej Nitoń authored
70
def create_pos_archive_and_get_stats(archive, pos, vocabularies, filename_base):
Bartłomiej Nitoń authored
71
72
    all_stats = {}
    checked_stats = {}
Bartłomiej Nitoń authored
73
74
75
    try:
        checked_statuses = get_checked_statuses()
        ready_statuses = get_ready_statuses()
Bartłomiej Nitoń authored
76
        lemmas = Lemma.objects.filter(old=False, entry_obj__pos=pos).order_by('entry_obj__name')
Bartłomiej Nitoń authored
77
78
79
        checked_lemmas = lemmas.filter(status__in=checked_statuses)
        ready_lemmas = lemmas.filter(status__in=ready_statuses)
Bartłomiej Nitoń authored
80
        all_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'all'))
Bartłomiej Nitoń authored
81
82
        walenty_path_ready = create_text_walenty(file_name=all_path,
                                                 lemmas=ready_lemmas, 
Bartłomiej Nitoń authored
83
                                                 vocabularies=vocabularies,
Bartłomiej Nitoń authored
84
85
86
87
88
                                                 frame_opinions=Frame_Opinion.objects.none(),
                                                 lemma_statuses=ready_statuses, 
                                                 owners=User.objects.none(), 
                                                 poss=POS.objects.filter(pk=pos.pk), 
                                                 add_frame_opinions=True)
Bartłomiej Nitoń authored
89
90
91
92
        all_filename = os.path.basename(walenty_path_ready)
        archive.add(name=all_filename, arcname=os.path.join(u'%ss' % pos.tag, all_filename))

        checked_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'verified'))
Bartłomiej Nitoń authored
93
        walenty_path_checked = create_text_walenty(file_name=checked_path,
Bartłomiej Nitoń authored
94
95
                                                   lemmas=checked_lemmas,
                                                   vocabularies=vocabularies,
Bartłomiej Nitoń authored
96
97
98
99
100
101
102
                                                   frame_opinions=Frame_Opinion.objects.none(),
                                                   lemma_statuses=checked_statuses, 
                                                   owners=User.objects.none(), 
                                                   poss=POS.objects.filter(pk=pos.pk), 
                                                   add_frame_opinions=True)
        checked_filename = os.path.basename(walenty_path_checked)
        archive.add(name=checked_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_filename))
Bartłomiej Nitoń authored
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117

        if not vocabularies.exists():
            all_stats = get_stats(ready_statuses, pos.tag)
            all_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'),
                                                                            pos.tag, 'all'))
            write_stats(all_stats_path, all_stats)
            all_stats_filename = os.path.basename(all_stats_path)
            archive.add(name=all_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, all_stats_filename))

            checked_stats = get_stats(checked_statuses, pos.tag)
            checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'),
                                                                                pos.tag, 'verified'))
            write_stats(checked_stats_path, checked_stats)
            checked_stats_filename = os.path.basename(checked_stats_path)
            archive.add(name=checked_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_stats_filename))
Bartłomiej Nitoń authored
118
    finally:
Bartłomiej Nitoń authored
119
120
        os.remove(walenty_path_ready)
        os.remove(walenty_path_checked)
Bartłomiej Nitoń authored
121
122
123
        if not vocabularies.exists():
            os.remove(all_stats_path)
            os.remove(checked_stats_path)
Bartłomiej Nitoń authored
124
125
        return {'all': all_stats,
                'verified': checked_stats}
Bartłomiej Nitoń authored
126