dodanie identyfikatorów sztucznie dodanych synsetów, zwiększono ilość wypisywanych "autoalternacji"

Tomasz Bartosiak
1 parent 2b4b720e
Showing 2 changed files with 51 additions and 38 deletions
dictionary/management/commands/create_TEI_walenty.py
dictionary/teixml.py
@@ -37,48 +37,47 @@ class Command(BaseCommand):
     )
  
     def handle(self, *args, **options):
-        try:
-            now = datetime.datetime.now().strftime('%Y%m%d')
-            vocab_names = list(args)
-            vocab_names.sort()
+        now = datetime.datetime.now().strftime('%Y%m%d')
+        vocab_names = list(args)
+        vocab_names.sort()
  
-            filename_base = self.create_filename_base(vocab_names, options, now)
+        filename_base = self.create_filename_base(vocab_names, options, now)
  
-            base_path = os.path.join(WALENTY_PATH, filename_base)
-            outpath = base_path + '.xml'
-            statuses = get_statuses(options['min_status_type'])
+        base_path = os.path.join(WALENTY_PATH, filename_base)
+        outpath = base_path + '.xml'
+        statuses = get_statuses(options['min_status_type'])
  
-            lemmas = Lemma.objects.filter(old=False)
-            if vocab_names:
-                lemmas = lemmas.filter(vocabulary__name__in=vocab_names)
-            lemmas = lemmas.filter(status__in=statuses)
-            if options['start_date'] != 'all':
-                lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date'])
+        lemmas = Lemma.objects.filter(old=False)
+        if vocab_names:
+            lemmas = lemmas.filter(vocabulary__name__in=vocab_names)
+        lemmas = lemmas.filter(status__in=statuses)
+        if options['start_date'] != 'all':
+            lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date'])
  
-            if options['pos'] != 'all':
-                lemmas = lemmas.filter(entry_obj__pos__tag=options['pos'])
+        if options['pos'] != 'all':
+            lemmas = lemmas.filter(entry_obj__pos__tag=options['pos'])
  
-            lemmas = self.add_related_lemmas(lemmas)
+        lemmas = self.add_related_lemmas(lemmas)
  
-            lemmas = lemmas.order_by('entry_obj__name')
+        lemmas = lemmas.order_by('entry_obj__name')
  
-            self.print_statistics(lemmas)
+        self.print_statistics(lemmas)
  
-            frame_opinion_values = Frame_Opinion_Value.objects.all()
-            createteixml(outpath, lemmas, frame_opinion_values)
+        frame_opinion_values = Frame_Opinion_Value.objects.all()
+        createteixml(outpath, lemmas, frame_opinion_values)
  
-            phrase_types_expand_path = os.path.join(WALENTY_PATH, 
-                                                    '%s_%s.xml' % ('phrase_types_expand', now))
-            write_phrase_types_expansions_in_TEI(phrase_types_expand_path)
-
-            archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz')
-            os.chdir(WALENTY_PATH)
-            archive.add(os.path.basename(outpath))
-            archive.add(os.path.basename(phrase_types_expand_path))
-        finally:
-            archive.close()
-            os.remove(outpath)
-            os.remove(phrase_types_expand_path)
+        phrase_types_expand_path = os.path.join(WALENTY_PATH, 
+                                                '%s_%s.xml' % ('phrase_types_expand', now))
+        write_phrase_types_expansions_in_TEI(phrase_types_expand_path)
+
+        archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz')
+        os.chdir(WALENTY_PATH)
+        archive.add(os.path.basename(outpath))
+        archive.add(os.path.basename(phrase_types_expand_path))
+        
+        archive.close()
+        os.remove(outpath)
+        os.remove(phrase_types_expand_path)
  
     def create_filename_base(self, vocab_names, options, now):
         start_date = ''
@@ -3,12 +3,13 @@ from semantics.models import LexicalUnitExamples
  
 import datetime
  
-from django.db.models import Count
+from django.db.models import Count, Min
 from lxml import etree
 from xml.sax.saxutils import escape
  
 from dictionary.models import Argument, Atribute_Model, WalentyStat, \
                               sortArguments, sortatributes, sortPositions, sort_positions
+from wordnet.models import Synset
  
 XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
  
@@ -796,11 +797,16 @@ def write_meaning(parent_elem, lex_unit):
         gloss_content_elem = etree.SubElement(gloss_f_elem, 'string')
         gloss_content_elem.text = lex_unit.glossa
  
+    if lex_unit.synset is None:
+        new_sid = Synset.objects.all().aggregate(Min('id'))['id__min']
+        new_synset = Synset(id=new_sid)
+        new_synset.save()
+        lex_unit.synset = new_synset
     plwnsid_f_elem = etree.SubElement(meaning_fs_elem, 'f')
     plwnsid_f_elem.attrib['name'] = 'plwnsid'
     plwnsid_numeric_elem = etree.SubElement(plwnsid_f_elem, 'numeric')
     plwnsid_numeric_elem.attrib['value'] = str(lex_unit.synset.id)
-        
+    
 def write_connections_layer(parent_elem, lemma):
     connections_layer_elem = etree.SubElement(parent_elem, 'fs')
     connections_layer_elem.attrib['type'] = 'connections_layer'
@@ -819,9 +825,17 @@ def write_alternations(parent_elem, lemma):
     for schema in lemma.frames.all():
         for frame in frames:
             matching_complements = frame.complements.filter(realizations__frame=schema).distinct()
-            write_alternation(parent_elem, entry, schema, frame, matching_complements, 1)
-            write_alternation(parent_elem, entry, schema, frame, matching_complements, 2)
-                        
+            m = get_max_alternation_number(matching_complements, schema)
+            for i in range(m):
+                write_alternation(parent_elem, entry, schema, frame, matching_complements, i + 1)
+
+def get_max_alternation_number(complements, schema):
+    global_max = 0
+    for complement in complements:
+        local_max = complement.realizations.filter(frame=schema).aggregate(Max('alternation'))['alternation__max']
+        global_max = max(global_max, local_max)
+    return global_max
+            
 def write_alternation(parent_elem, entry, schema, frame, complements, alternation):
     alternation_compls = complements.filter(realizations__alternation=alternation)
     if alternation_compls.exists():