From 83ceb233ca020e4f70a9c4290901714efdf63d28 Mon Sep 17 00:00:00 2001 From: Bartlomiej Niton <bartek.niton@gmail.com> Date: Tue, 21 Feb 2017 11:46:58 +0100 Subject: [PATCH] Added possibility for selecting vocabularies while creating Walenty exports by scripts. --- dictionary/ajax_vocabulary_management.py | 2 ++ dictionary/management/commands/create_TEI_walenty.py | 20 ++++++++++++++++---- dictionary/management/commands/create_tex_walenty.py | 17 ++++++++++++++--- dictionary/management/commands/create_text_walenty.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++------------------------------ 4 files changed, 79 insertions(+), 37 deletions(-) diff --git a/dictionary/ajax_vocabulary_management.py b/dictionary/ajax_vocabulary_management.py index 53b5cab..c5b57e1 100644 --- a/dictionary/ajax_vocabulary_management.py +++ b/dictionary/ajax_vocabulary_management.py @@ -80,6 +80,8 @@ def create_text_walenty(file_name, lemmas, vocabularies, frame_opinions, lemma_statuses_pks=lemma_statuses, poss_pks=poss, add_frame_opinions=add_frame_opinions)) + if vocabularies.exists(): + lemmas = lemmas.filter(vocabulary__in=vocabularies) for lemma in lemmas: founded_frame_opinions = lemma.frame_opinions.filter(value__in=frame_opinions) #frame_chars_dict = sorted_frame_char_values_dict() diff --git a/dictionary/management/commands/create_TEI_walenty.py b/dictionary/management/commands/create_TEI_walenty.py index 01eef1f..3b89749 100644 --- a/dictionary/management/commands/create_TEI_walenty.py +++ b/dictionary/management/commands/create_TEI_walenty.py @@ -12,17 +12,29 @@ from dictionary.teixml import createteixml, write_phrase_types_expansions_in_TEI from settings import WALENTY_PATH class Command(BaseCommand): - args = 'none' + args = '<dict dict ...>' + help = 'Get Walenty in TEI format.' def handle(self, *args, **options): try: now = datetime.datetime.now().strftime('%Y%m%d') - filename_base = '%s_%s' % ('walenty', now) + + vocab_names = list(args) + vocab_names.sort() + if vocab_names: + filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now) + else: + filename_base = '%s_%s' % ('walenty', now) + base_path = os.path.join(WALENTY_PATH, filename_base) outpath = base_path + '.xml' ready_statuses = get_ready_statuses() - lemmas = Lemma.objects.filter(old=False).order_by('entry_obj__name') - ready_lemmas = lemmas.filter(status__in=ready_statuses) + + lemmas = Lemma.objects.filter(old=False) + if vocab_names: + lemmas = lemmas.filter(vocabulary__name__in=vocab_names) + ready_lemmas = lemmas.filter(status__in=ready_statuses).order_by('entry_obj__name') + frame_opinion_values = Frame_Opinion_Value.objects.all() createteixml(outpath, ready_lemmas, frame_opinion_values) archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz') diff --git a/dictionary/management/commands/create_tex_walenty.py b/dictionary/management/commands/create_tex_walenty.py index fefe39d..c3651e1 100644 --- a/dictionary/management/commands/create_tex_walenty.py +++ b/dictionary/management/commands/create_tex_walenty.py @@ -14,17 +14,28 @@ from dictionary.models import Lemma, WalentyStat, get_ready_statuses from settings import WALENTY_PATH class Command(BaseCommand): - args = 'none' - help = 'Script for creating Walenty vocabulary in tex format.' + args = '<dict dict ...>' + help = 'Get Walenty in TeX format.' def handle(self, *args, **options): try: now = datetime.datetime.now().strftime('%Y%m%d') - filename_base = '%s_%s' % ('walenty', now) + + vocab_names = list(args) + vocab_names.sort() + if vocab_names: + filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now) + else: + filename_base = '%s_%s' % ('walenty', now) + base_path = os.path.join(WALENTY_PATH, filename_base) outpath = base_path + '.tex' ready_statuses = get_ready_statuses() lemmas = Lemma.objects.filter(old=False) + + if vocab_names: + lemmas = lemmas.filter(vocabulary__name__in=vocab_names) + ready_lemmas = lemmas.filter(status__in=ready_statuses).order_by('entry_obj__name') write_tex_walenty(outpath, ready_lemmas) archive = tarfile.open(base_path + '-tex.tar.gz', 'w:gz') diff --git a/dictionary/management/commands/create_text_walenty.py b/dictionary/management/commands/create_text_walenty.py index 05d94ac..f93a9dc 100644 --- a/dictionary/management/commands/create_text_walenty.py +++ b/dictionary/management/commands/create_text_walenty.py @@ -17,17 +17,30 @@ from dictionary.models import Frame_Opinion, Lemma, Vocabulary, POS, \ from settings import WALENTY_PATH class Command(BaseCommand): - args = 'none' + args = '<dict dict ...>' + help = 'Get Walenty in text format.' def handle(self, *args, **options): now = datetime.datetime.now().strftime('%Y%m%d') - filename_base = '%s_%s' % ('walenty', now) + + vocab_names = list(args) + vocab_names.sort() + if vocab_names: + filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now) + else: + filename_base = '%s_%s' % ('walenty', now) + realizations_path = os.path.join(WALENTY_PATH, '%s_%s.txt' % ('phrase_types_expand', now)) checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%s.txt' % (filename_base.replace('walenty', 'stats'), 'verified')) ready_stats_path = os.path.join(WALENTY_PATH, u'%s_%s.txt' % (filename_base.replace('walenty', 'stats'), 'all')) + + vocabularies = Vocabulary.objects.none() + if vocab_names: + vocabularies = Vocabulary.objects.filter(name__in=vocab_names) + try: all_stats = Counter({}) verified_stats = Counter({}) @@ -35,24 +48,26 @@ class Command(BaseCommand): archive = tarfile.open(base_path + '-text.tar.gz', 'w:gz') os.chdir(WALENTY_PATH) for pos in POS.objects.exclude(tag=u'unk').order_by('priority'): - pos_stats = create_pos_archive_and_get_stats(archive, pos, filename_base) + pos_stats = create_pos_archive_and_get_stats(archive, pos, vocabularies, filename_base) all_stats = all_stats + Counter(pos_stats['all']) verified_stats = verified_stats + Counter(pos_stats['verified']) create_realizations_file(realizations_path) archive.add(os.path.basename(realizations_path)) - write_stats(checked_stats_path, verified_stats) - archive.add(os.path.basename(checked_stats_path)) - write_stats(ready_stats_path, all_stats) - archive.add(os.path.basename(ready_stats_path)) - update_walenty_stats(all_stats) + if not vocab_names: + write_stats(checked_stats_path, verified_stats) + archive.add(os.path.basename(checked_stats_path)) + write_stats(ready_stats_path, all_stats) + archive.add(os.path.basename(ready_stats_path)) + update_walenty_stats(all_stats) finally: archive.close() os.remove(realizations_path) - os.remove(checked_stats_path) - os.remove(ready_stats_path) + if not vocab_names: + os.remove(checked_stats_path) + os.remove(ready_stats_path) -def create_pos_archive_and_get_stats(archive, pos, filename_base): +def create_pos_archive_and_get_stats(archive, pos, vocabularies, filename_base): all_stats = {} checked_stats = {} try: @@ -65,7 +80,7 @@ def create_pos_archive_and_get_stats(archive, pos, filename_base): all_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'all')) walenty_path_ready = create_text_walenty(file_name=all_path, lemmas=ready_lemmas, - vocabularies=Vocabulary.objects.none(), + vocabularies=vocabularies, frame_opinions=Frame_Opinion.objects.none(), lemma_statuses=ready_statuses, owners=User.objects.none(), @@ -76,8 +91,8 @@ def create_pos_archive_and_get_stats(archive, pos, filename_base): checked_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'verified')) walenty_path_checked = create_text_walenty(file_name=checked_path, - lemmas=checked_lemmas, - vocabularies=Vocabulary.objects.none(), + lemmas=checked_lemmas, + vocabularies=vocabularies, frame_opinions=Frame_Opinion.objects.none(), lemma_statuses=checked_statuses, owners=User.objects.none(), @@ -85,25 +100,27 @@ def create_pos_archive_and_get_stats(archive, pos, filename_base): add_frame_opinions=True) checked_filename = os.path.basename(walenty_path_checked) archive.add(name=checked_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_filename)) - - all_stats = get_stats(ready_statuses, pos.tag) - all_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'), - pos.tag, 'all')) - write_stats(all_stats_path, all_stats) - all_stats_filename = os.path.basename(all_stats_path) - archive.add(name=all_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, all_stats_filename)) - - checked_stats = get_stats(checked_statuses, pos.tag) - checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'), - pos.tag, 'verified')) - write_stats(checked_stats_path, checked_stats) - checked_stats_filename = os.path.basename(checked_stats_path) - archive.add(name=checked_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_stats_filename)) + + if not vocabularies.exists(): + all_stats = get_stats(ready_statuses, pos.tag) + all_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'), + pos.tag, 'all')) + write_stats(all_stats_path, all_stats) + all_stats_filename = os.path.basename(all_stats_path) + archive.add(name=all_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, all_stats_filename)) + + checked_stats = get_stats(checked_statuses, pos.tag) + checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'), + pos.tag, 'verified')) + write_stats(checked_stats_path, checked_stats) + checked_stats_filename = os.path.basename(checked_stats_path) + archive.add(name=checked_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_stats_filename)) finally: os.remove(walenty_path_ready) os.remove(walenty_path_checked) - os.remove(all_stats_path) - os.remove(checked_stats_path) + if not vocabularies.exists(): + os.remove(all_stats_path) + os.remove(checked_stats_path) return {'all': all_stats, 'verified': checked_stats} \ No newline at end of file -- libgit2 0.22.2