Added possibility for selecting vocabularies while creating Walenty exports by scripts.

Bartłomiej Nitoń
1 parent 40f59aee
Showing 4 changed files with 79 additions and 37 deletions
dictionary/ajax_vocabulary_management.py
dictionary/management/commands/create_TEI_walenty.py
dictionary/management/commands/create_tex_walenty.py
dictionary/management/commands/create_text_walenty.py
@@ -80,6 +80,8 @@ def create_text_walenty(file_name, lemmas, vocabularies, frame_opinions,
                                       lemma_statuses_pks=lemma_statuses, 
                                       poss_pks=poss, 
                                       add_frame_opinions=add_frame_opinions))
+        if vocabularies.exists():
+            lemmas = lemmas.filter(vocabulary__in=vocabularies)
         for lemma in lemmas:
             founded_frame_opinions = lemma.frame_opinions.filter(value__in=frame_opinions)  
             #frame_chars_dict = sorted_frame_char_values_dict()  
@@ -12,17 +12,29 @@ from dictionary.teixml import createteixml, write_phrase_types_expansions_in_TEI
 from settings import WALENTY_PATH
  
 class Command(BaseCommand):
-    args = 'none'
+    args = '<dict dict ...>'
+    help = 'Get Walenty in TEI format.'
  
     def handle(self, *args, **options):
         try:
             now = datetime.datetime.now().strftime('%Y%m%d')
-            filename_base = '%s_%s' % ('walenty', now)
+
+            vocab_names = list(args)
+            vocab_names.sort()
+            if vocab_names:
+                filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now)
+            else:
+                filename_base = '%s_%s' % ('walenty', now)
+
             base_path = os.path.join(WALENTY_PATH, filename_base)
             outpath = base_path + '.xml'
             ready_statuses = get_ready_statuses()
-            lemmas = Lemma.objects.filter(old=False).order_by('entry_obj__name')
-            ready_lemmas = lemmas.filter(status__in=ready_statuses)
+
+            lemmas = Lemma.objects.filter(old=False)
+            if vocab_names:
+                lemmas = lemmas.filter(vocabulary__name__in=vocab_names)
+            ready_lemmas = lemmas.filter(status__in=ready_statuses).order_by('entry_obj__name')
+
             frame_opinion_values = Frame_Opinion_Value.objects.all()
             createteixml(outpath, ready_lemmas, frame_opinion_values)
             archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz')
@@ -14,17 +14,28 @@ from dictionary.models import Lemma, WalentyStat, get_ready_statuses
 from settings import WALENTY_PATH
  
 class Command(BaseCommand):
-    args = 'none'
-    help = 'Script for creating Walenty vocabulary in tex format.'
+    args = '<dict dict ...>'
+    help = 'Get Walenty in TeX format.'
  
     def handle(self, *args, **options):
         try:
             now = datetime.datetime.now().strftime('%Y%m%d')
-            filename_base = '%s_%s' % ('walenty', now)
+
+            vocab_names = list(args)
+            vocab_names.sort()
+            if vocab_names:
+                filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now)
+            else:
+                filename_base = '%s_%s' % ('walenty', now)
+
             base_path = os.path.join(WALENTY_PATH, filename_base)
             outpath = base_path + '.tex'
             ready_statuses = get_ready_statuses()
             lemmas = Lemma.objects.filter(old=False)
+
+            if vocab_names:
+                lemmas = lemmas.filter(vocabulary__name__in=vocab_names)
+
             ready_lemmas = lemmas.filter(status__in=ready_statuses).order_by('entry_obj__name')
             write_tex_walenty(outpath, ready_lemmas)
             archive = tarfile.open(base_path + '-tex.tar.gz', 'w:gz')
@@ -17,17 +17,30 @@ from dictionary.models import Frame_Opinion, Lemma, Vocabulary, POS, \
 from settings import WALENTY_PATH
  
 class Command(BaseCommand):
-    args = 'none'
+    args = '<dict dict ...>'
+    help = 'Get Walenty in text format.'
  
     def handle(self, *args, **options):
         now = datetime.datetime.now().strftime('%Y%m%d')
-        filename_base = '%s_%s' % ('walenty', now)
+
+        vocab_names = list(args)
+        vocab_names.sort()
+        if vocab_names:
+            filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now)
+        else:
+            filename_base = '%s_%s' % ('walenty', now)
+
         realizations_path = os.path.join(WALENTY_PATH, 
                                          '%s_%s.txt' % ('phrase_types_expand', now))
         checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%s.txt' % (filename_base.replace('walenty', 'stats'), 
                                                                         'verified'))
         ready_stats_path = os.path.join(WALENTY_PATH, u'%s_%s.txt' % (filename_base.replace('walenty', 'stats'), 
                                                                       'all'))
+
+        vocabularies = Vocabulary.objects.none()
+        if vocab_names:
+            vocabularies = Vocabulary.objects.filter(name__in=vocab_names)
+
         try:
             all_stats = Counter({})
             verified_stats = Counter({})
@@ -35,24 +48,26 @@ class Command(BaseCommand):
             archive = tarfile.open(base_path + '-text.tar.gz', 'w:gz')
             os.chdir(WALENTY_PATH)
             for pos in POS.objects.exclude(tag=u'unk').order_by('priority'):
-                pos_stats = create_pos_archive_and_get_stats(archive, pos, filename_base)
+                pos_stats = create_pos_archive_and_get_stats(archive, pos, vocabularies, filename_base)
                 all_stats = all_stats + Counter(pos_stats['all'])
                 verified_stats = verified_stats + Counter(pos_stats['verified'])
  
             create_realizations_file(realizations_path)
             archive.add(os.path.basename(realizations_path))
-            write_stats(checked_stats_path, verified_stats)
-            archive.add(os.path.basename(checked_stats_path))
-            write_stats(ready_stats_path, all_stats)
-            archive.add(os.path.basename(ready_stats_path))
-            update_walenty_stats(all_stats)
+            if not vocab_names:
+                write_stats(checked_stats_path, verified_stats)
+                archive.add(os.path.basename(checked_stats_path))
+                write_stats(ready_stats_path, all_stats)
+                archive.add(os.path.basename(ready_stats_path))
+                update_walenty_stats(all_stats)
         finally:
             archive.close()
             os.remove(realizations_path)
-            os.remove(checked_stats_path)
-            os.remove(ready_stats_path)
+            if not vocab_names:
+                os.remove(checked_stats_path)
+                os.remove(ready_stats_path)
  
-def create_pos_archive_and_get_stats(archive, pos, filename_base):
+def create_pos_archive_and_get_stats(archive, pos, vocabularies, filename_base):
     all_stats = {}
     checked_stats = {}
     try:
@@ -65,7 +80,7 @@ def create_pos_archive_and_get_stats(archive, pos, filename_base):
         all_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'all'))
         walenty_path_ready = create_text_walenty(file_name=all_path,
                                                  lemmas=ready_lemmas, 
-                                                 vocabularies=Vocabulary.objects.none(), 
+                                                 vocabularies=vocabularies,
                                                  frame_opinions=Frame_Opinion.objects.none(),
                                                  lemma_statuses=ready_statuses, 
                                                  owners=User.objects.none(), 
@@ -76,8 +91,8 @@ def create_pos_archive_and_get_stats(archive, pos, filename_base):
  
         checked_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'verified'))
         walenty_path_checked = create_text_walenty(file_name=checked_path,
-                                                   lemmas=checked_lemmas, 
-                                                   vocabularies=Vocabulary.objects.none(), 
+                                                   lemmas=checked_lemmas,
+                                                   vocabularies=vocabularies,
                                                    frame_opinions=Frame_Opinion.objects.none(),
                                                    lemma_statuses=checked_statuses, 
                                                    owners=User.objects.none(), 
@@ -85,25 +100,27 @@ def create_pos_archive_and_get_stats(archive, pos, filename_base):
                                                    add_frame_opinions=True)
         checked_filename = os.path.basename(walenty_path_checked)
         archive.add(name=checked_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_filename))
-        
-        all_stats = get_stats(ready_statuses, pos.tag)
-        all_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'), 
-                                                                        pos.tag, 'all'))
-        write_stats(all_stats_path, all_stats)
-        all_stats_filename = os.path.basename(all_stats_path)
-        archive.add(name=all_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, all_stats_filename))
-        
-        checked_stats = get_stats(checked_statuses, pos.tag)
-        checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'), 
-                                                                            pos.tag, 'verified'))
-        write_stats(checked_stats_path, checked_stats)
-        checked_stats_filename = os.path.basename(checked_stats_path)
-        archive.add(name=checked_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_stats_filename))
+
+        if not vocabularies.exists():
+            all_stats = get_stats(ready_statuses, pos.tag)
+            all_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'),
+                                                                            pos.tag, 'all'))
+            write_stats(all_stats_path, all_stats)
+            all_stats_filename = os.path.basename(all_stats_path)
+            archive.add(name=all_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, all_stats_filename))
+
+            checked_stats = get_stats(checked_statuses, pos.tag)
+            checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s.txt' % (filename_base.replace('walenty', 'stats'),
+                                                                                pos.tag, 'verified'))
+            write_stats(checked_stats_path, checked_stats)
+            checked_stats_filename = os.path.basename(checked_stats_path)
+            archive.add(name=checked_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_stats_filename))
     finally:
         os.remove(walenty_path_ready)
         os.remove(walenty_path_checked)
-        os.remove(all_stats_path)
-        os.remove(checked_stats_path)
+        if not vocabularies.exists():
+            os.remove(all_stats_path)
+            os.remove(checked_stats_path)
         return {'all': all_stats,
                 'verified': checked_stats}
  
 \ No newline at end of file