Commit 83ceb233ca020e4f70a9c4290901714efdf63d28
1 parent
40f59aee
Added possibility for selecting vocabularies while creating Walenty exports by scripts.
Showing
4 changed files
with
79 additions
and
37 deletions
dictionary/ajax_vocabulary_management.py
@@ -80,6 +80,8 @@ def create_text_walenty(file_name, lemmas, vocabularies, frame_opinions, | @@ -80,6 +80,8 @@ def create_text_walenty(file_name, lemmas, vocabularies, frame_opinions, | ||
80 | lemma_statuses_pks=lemma_statuses, | 80 | lemma_statuses_pks=lemma_statuses, |
81 | poss_pks=poss, | 81 | poss_pks=poss, |
82 | add_frame_opinions=add_frame_opinions)) | 82 | add_frame_opinions=add_frame_opinions)) |
83 | + if vocabularies.exists(): | ||
84 | + lemmas = lemmas.filter(vocabulary__in=vocabularies) | ||
83 | for lemma in lemmas: | 85 | for lemma in lemmas: |
84 | founded_frame_opinions = lemma.frame_opinions.filter(value__in=frame_opinions) | 86 | founded_frame_opinions = lemma.frame_opinions.filter(value__in=frame_opinions) |
85 | #frame_chars_dict = sorted_frame_char_values_dict() | 87 | #frame_chars_dict = sorted_frame_char_values_dict() |
dictionary/management/commands/create_TEI_walenty.py
@@ -12,17 +12,29 @@ from dictionary.teixml import createteixml, write_phrase_types_expansions_in_TEI | @@ -12,17 +12,29 @@ from dictionary.teixml import createteixml, write_phrase_types_expansions_in_TEI | ||
12 | from settings import WALENTY_PATH | 12 | from settings import WALENTY_PATH |
13 | 13 | ||
14 | class Command(BaseCommand): | 14 | class Command(BaseCommand): |
15 | - args = 'none' | 15 | + args = '<dict dict ...>' |
16 | + help = 'Get Walenty in TEI format.' | ||
16 | 17 | ||
17 | def handle(self, *args, **options): | 18 | def handle(self, *args, **options): |
18 | try: | 19 | try: |
19 | now = datetime.datetime.now().strftime('%Y%m%d') | 20 | now = datetime.datetime.now().strftime('%Y%m%d') |
20 | - filename_base = '%s_%s' % ('walenty', now) | 21 | + |
22 | + vocab_names = list(args) | ||
23 | + vocab_names.sort() | ||
24 | + if vocab_names: | ||
25 | + filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now) | ||
26 | + else: | ||
27 | + filename_base = '%s_%s' % ('walenty', now) | ||
28 | + | ||
21 | base_path = os.path.join(WALENTY_PATH, filename_base) | 29 | base_path = os.path.join(WALENTY_PATH, filename_base) |
22 | outpath = base_path + '.xml' | 30 | outpath = base_path + '.xml' |
23 | ready_statuses = get_ready_statuses() | 31 | ready_statuses = get_ready_statuses() |
24 | - lemmas = Lemma.objects.filter(old=False).order_by('entry_obj__name') | ||
25 | - ready_lemmas = lemmas.filter(status__in=ready_statuses) | 32 | + |
33 | + lemmas = Lemma.objects.filter(old=False) | ||
34 | + if vocab_names: | ||
35 | + lemmas = lemmas.filter(vocabulary__name__in=vocab_names) | ||
36 | + ready_lemmas = lemmas.filter(status__in=ready_statuses).order_by('entry_obj__name') | ||
37 | + | ||
26 | frame_opinion_values = Frame_Opinion_Value.objects.all() | 38 | frame_opinion_values = Frame_Opinion_Value.objects.all() |
27 | createteixml(outpath, ready_lemmas, frame_opinion_values) | 39 | createteixml(outpath, ready_lemmas, frame_opinion_values) |
28 | archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz') | 40 | archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz') |
dictionary/management/commands/create_tex_walenty.py
@@ -14,17 +14,28 @@ from dictionary.models import Lemma, WalentyStat, get_ready_statuses | @@ -14,17 +14,28 @@ from dictionary.models import Lemma, WalentyStat, get_ready_statuses | ||
14 | from settings import WALENTY_PATH | 14 | from settings import WALENTY_PATH |
15 | 15 | ||
16 | class Command(BaseCommand): | 16 | class Command(BaseCommand): |
17 | - args = 'none' | ||
18 | - help = 'Script for creating Walenty vocabulary in tex format.' | 17 | + args = '<dict dict ...>' |
18 | + help = 'Get Walenty in TeX format.' | ||
19 | 19 | ||
20 | def handle(self, *args, **options): | 20 | def handle(self, *args, **options): |
21 | try: | 21 | try: |
22 | now = datetime.datetime.now().strftime('%Y%m%d') | 22 | now = datetime.datetime.now().strftime('%Y%m%d') |
23 | - filename_base = '%s_%s' % ('walenty', now) | 23 | + |
24 | + vocab_names = list(args) | ||
25 | + vocab_names.sort() | ||
26 | + if vocab_names: | ||
27 | + filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now) | ||
28 | + else: | ||
29 | + filename_base = '%s_%s' % ('walenty', now) | ||
30 | + | ||
24 | base_path = os.path.join(WALENTY_PATH, filename_base) | 31 | base_path = os.path.join(WALENTY_PATH, filename_base) |
25 | outpath = base_path + '.tex' | 32 | outpath = base_path + '.tex' |
26 | ready_statuses = get_ready_statuses() | 33 | ready_statuses = get_ready_statuses() |
27 | lemmas = Lemma.objects.filter(old=False) | 34 | lemmas = Lemma.objects.filter(old=False) |
35 | + | ||
36 | + if vocab_names: | ||
37 | + lemmas = lemmas.filter(vocabulary__name__in=vocab_names) | ||
38 | + | ||
28 | ready_lemmas = lemmas.filter(status__in=ready_statuses).order_by('entry_obj__name') | 39 | ready_lemmas = lemmas.filter(status__in=ready_statuses).order_by('entry_obj__name') |
29 | write_tex_walenty(outpath, ready_lemmas) | 40 | write_tex_walenty(outpath, ready_lemmas) |
30 | archive = tarfile.open(base_path + '-tex.tar.gz', 'w:gz') | 41 | archive = tarfile.open(base_path + '-tex.tar.gz', 'w:gz') |
dictionary/management/commands/create_text_walenty.py
@@ -17,17 +17,30 @@ from dictionary.models import Frame_Opinion, Lemma, Vocabulary, POS, \ | @@ -17,17 +17,30 @@ from dictionary.models import Frame_Opinion, Lemma, Vocabulary, POS, \ | ||
17 | from settings import WALENTY_PATH | 17 | from settings import WALENTY_PATH |
18 | 18 | ||
19 | class Command(BaseCommand): | 19 | class Command(BaseCommand): |
20 | - args = 'none' | 20 | + args = '<dict dict ...>' |
21 | + help = 'Get Walenty in text format.' | ||
21 | 22 | ||
22 | def handle(self, *args, **options): | 23 | def handle(self, *args, **options): |
23 | now = datetime.datetime.now().strftime('%Y%m%d') | 24 | now = datetime.datetime.now().strftime('%Y%m%d') |
24 | - filename_base = '%s_%s' % ('walenty', now) | 25 | + |
26 | + vocab_names = list(args) | ||
27 | + vocab_names.sort() | ||
28 | + if vocab_names: | ||
29 | + filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now) | ||
30 | + else: | ||
31 | + filename_base = '%s_%s' % ('walenty', now) | ||
32 | + | ||
25 | realizations_path = os.path.join(WALENTY_PATH, | 33 | realizations_path = os.path.join(WALENTY_PATH, |
26 | '%s_%s.txt' % ('phrase_types_expand', now)) | 34 | '%s_%s.txt' % ('phrase_types_expand', now)) |
27 | checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%s.txt' % (filename_base.replace('walenty', 'stats'), | 35 | checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%s.txt' % (filename_base.replace('walenty', 'stats'), |
28 | 'verified')) | 36 | 'verified')) |
29 | ready_stats_path = os.path.join(WALENTY_PATH, u'%s_%s.txt' % (filename_base.replace('walenty', 'stats'), | 37 | ready_stats_path = os.path.join(WALENTY_PATH, u'%s_%s.txt' % (filename_base.replace('walenty', 'stats'), |
30 | 'all')) | 38 | 'all')) |
39 | + | ||
40 | + vocabularies = Vocabulary.objects.none() | ||
41 | + if vocab_names: | ||
42 | + vocabularies = Vocabulary.objects.filter(name__in=vocab_names) | ||
43 | + | ||
31 | try: | 44 | try: |
32 | all_stats = Counter({}) | 45 | all_stats = Counter({}) |
33 | verified_stats = Counter({}) | 46 | verified_stats = Counter({}) |
@@ -35,24 +48,26 @@ class Command(BaseCommand): | @@ -35,24 +48,26 @@ class Command(BaseCommand): | ||
35 | archive = tarfile.open(base_path + '-text.tar.gz', 'w:gz') | 48 | archive = tarfile.open(base_path + '-text.tar.gz', 'w:gz') |
36 | os.chdir(WALENTY_PATH) | 49 | os.chdir(WALENTY_PATH) |
37 | for pos in POS.objects.exclude(tag=u'unk').order_by('priority'): | 50 | for pos in POS.objects.exclude(tag=u'unk').order_by('priority'): |
38 | - pos_stats = create_pos_archive_and_get_stats(archive, pos, filename_base) | 51 | + pos_stats = create_pos_archive_and_get_stats(archive, pos, vocabularies, filename_base) |
39 | all_stats = all_stats + Counter(pos_stats['all']) | 52 | all_stats = all_stats + Counter(pos_stats['all']) |
40 | verified_stats = verified_stats + Counter(pos_stats['verified']) | 53 | verified_stats = verified_stats + Counter(pos_stats['verified']) |
41 | 54 | ||
42 | create_realizations_file(realizations_path) | 55 | create_realizations_file(realizations_path) |
43 | archive.add(os.path.basename(realizations_path)) | 56 | archive.add(os.path.basename(realizations_path)) |
44 | - write_stats(checked_stats_path, verified_stats) | ||
45 | - archive.add(os.path.basename(checked_stats_path)) | ||
46 | - write_stats(ready_stats_path, all_stats) | ||
47 | - archive.add(os.path.basename(ready_stats_path)) | ||
48 | - update_walenty_stats(all_stats) | 57 | + if not vocab_names: |
58 | + write_stats(checked_stats_path, verified_stats) | ||
59 | + archive.add(os.path.basename(checked_stats_path)) | ||
60 | + write_stats(ready_stats_path, all_stats) | ||
61 | + archive.add(os.path.basename(ready_stats_path)) | ||
62 | + update_walenty_stats(all_stats) | ||
49 | finally: | 63 | finally: |
50 | archive.close() | 64 | archive.close() |
51 | os.remove(realizations_path) | 65 | os.remove(realizations_path) |
52 | - os.remove(checked_stats_path) | ||
53 | - os.remove(ready_stats_path) | 66 | + if not vocab_names: |
67 | + os.remove(checked_stats_path) | ||
68 | + os.remove(ready_stats_path) | ||
54 | 69 | ||
55 | -def create_pos_archive_and_get_stats(archive, pos, filename_base): | 70 | +def create_pos_archive_and_get_stats(archive, pos, vocabularies, filename_base): |
56 | all_stats = {} | 71 | all_stats = {} |
57 | checked_stats = {} | 72 | checked_stats = {} |
58 | try: | 73 | try: |
@@ -65,7 +80,7 @@ def create_pos_archive_and_get_stats(archive, pos, filename_base): | @@ -65,7 +80,7 @@ def create_pos_archive_and_get_stats(archive, pos, filename_base): | ||
65 | all_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'all')) | 80 | all_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'all')) |
66 | walenty_path_ready = create_text_walenty(file_name=all_path, | 81 | walenty_path_ready = create_text_walenty(file_name=all_path, |
67 | lemmas=ready_lemmas, | 82 | lemmas=ready_lemmas, |
68 | - vocabularies=Vocabulary.objects.none(), | 83 | + vocabularies=vocabularies, |
69 | frame_opinions=Frame_Opinion.objects.none(), | 84 | frame_opinions=Frame_Opinion.objects.none(), |
70 | lemma_statuses=ready_statuses, | 85 | lemma_statuses=ready_statuses, |
71 | owners=User.objects.none(), | 86 | owners=User.objects.none(), |
@@ -76,8 +91,8 @@ def create_pos_archive_and_get_stats(archive, pos, filename_base): | @@ -76,8 +91,8 @@ def create_pos_archive_and_get_stats(archive, pos, filename_base): | ||
76 | 91 | ||
77 | checked_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'verified')) | 92 | checked_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'verified')) |
78 | walenty_path_checked = create_text_walenty(file_name=checked_path, | 93 | walenty_path_checked = create_text_walenty(file_name=checked_path, |
79 | - lemmas=checked_lemmas, | ||
80 | - vocabularies=Vocabulary.objects.none(), | 94 | + lemmas=checked_lemmas, |
95 | + vocabularies=vocabularies, | ||
81 | frame_opinions=Frame_Opinion.objects.none(), | 96 | frame_opinions=Frame_Opinion.objects.none(), |
82 | lemma_statuses=checked_statuses, | 97 | lemma_statuses=checked_statuses, |
83 | owners=User.objects.none(), | 98 | owners=User.objects.none(), |
@@ -85,25 +100,27 @@ def create_pos_archive_and_get_stats(archive, pos, filename_base): | @@ -85,25 +100,27 @@ def create_pos_archive_and_get_stats(archive, pos, filename_base): | ||
85 | add_frame_opinions=True) | 100 | add_frame_opinions=True) |
86 | checked_filename = os.path.basename(walenty_path_checked) | 101 | checked_filename = os.path.basename(walenty_path_checked) |
87 | archive.add(name=checked_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_filename)) | 102 | archive.add(name=checked_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_filename)) |
88 | - | ||
89 | - all_stats = get_stats(ready_statuses, pos.tag) | ||
90 | - all_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'), | ||
91 | - pos.tag, 'all')) | ||
92 | - write_stats(all_stats_path, all_stats) | ||
93 | - all_stats_filename = os.path.basename(all_stats_path) | ||
94 | - archive.add(name=all_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, all_stats_filename)) | ||
95 | - | ||
96 | - checked_stats = get_stats(checked_statuses, pos.tag) | ||
97 | - checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'), | ||
98 | - pos.tag, 'verified')) | ||
99 | - write_stats(checked_stats_path, checked_stats) | ||
100 | - checked_stats_filename = os.path.basename(checked_stats_path) | ||
101 | - archive.add(name=checked_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_stats_filename)) | 103 | + |
104 | + if not vocabularies.exists(): | ||
105 | + all_stats = get_stats(ready_statuses, pos.tag) | ||
106 | + all_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'), | ||
107 | + pos.tag, 'all')) | ||
108 | + write_stats(all_stats_path, all_stats) | ||
109 | + all_stats_filename = os.path.basename(all_stats_path) | ||
110 | + archive.add(name=all_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, all_stats_filename)) | ||
111 | + | ||
112 | + checked_stats = get_stats(checked_statuses, pos.tag) | ||
113 | + checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'), | ||
114 | + pos.tag, 'verified')) | ||
115 | + write_stats(checked_stats_path, checked_stats) | ||
116 | + checked_stats_filename = os.path.basename(checked_stats_path) | ||
117 | + archive.add(name=checked_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_stats_filename)) | ||
102 | finally: | 118 | finally: |
103 | os.remove(walenty_path_ready) | 119 | os.remove(walenty_path_ready) |
104 | os.remove(walenty_path_checked) | 120 | os.remove(walenty_path_checked) |
105 | - os.remove(all_stats_path) | ||
106 | - os.remove(checked_stats_path) | 121 | + if not vocabularies.exists(): |
122 | + os.remove(all_stats_path) | ||
123 | + os.remove(checked_stats_path) | ||
107 | return {'all': all_stats, | 124 | return {'all': all_stats, |
108 | 'verified': checked_stats} | 125 | 'verified': checked_stats} |
109 | 126 | ||
110 | \ No newline at end of file | 127 | \ No newline at end of file |