diff --git a/dictionary/management/commands/create_TEI_walenty.py b/dictionary/management/commands/create_TEI_walenty.py index 66a09f2..5fe49c7 100644 --- a/dictionary/management/commands/create_TEI_walenty.py +++ b/dictionary/management/commands/create_TEI_walenty.py @@ -37,48 +37,47 @@ class Command(BaseCommand): ) def handle(self, *args, **options): - try: - now = datetime.datetime.now().strftime('%Y%m%d') - vocab_names = list(args) - vocab_names.sort() + now = datetime.datetime.now().strftime('%Y%m%d') + vocab_names = list(args) + vocab_names.sort() - filename_base = self.create_filename_base(vocab_names, options, now) + filename_base = self.create_filename_base(vocab_names, options, now) - base_path = os.path.join(WALENTY_PATH, filename_base) - outpath = base_path + '.xml' - statuses = get_statuses(options['min_status_type']) + base_path = os.path.join(WALENTY_PATH, filename_base) + outpath = base_path + '.xml' + statuses = get_statuses(options['min_status_type']) - lemmas = Lemma.objects.filter(old=False) - if vocab_names: - lemmas = lemmas.filter(vocabulary__name__in=vocab_names) - lemmas = lemmas.filter(status__in=statuses) - if options['start_date'] != 'all': - lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date']) + lemmas = Lemma.objects.filter(old=False) + if vocab_names: + lemmas = lemmas.filter(vocabulary__name__in=vocab_names) + lemmas = lemmas.filter(status__in=statuses) + if options['start_date'] != 'all': + lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date']) - if options['pos'] != 'all': - lemmas = lemmas.filter(entry_obj__pos__tag=options['pos']) + if options['pos'] != 'all': + lemmas = lemmas.filter(entry_obj__pos__tag=options['pos']) - lemmas = self.add_related_lemmas(lemmas) + lemmas = self.add_related_lemmas(lemmas) - lemmas = lemmas.order_by('entry_obj__name') + lemmas = lemmas.order_by('entry_obj__name') - self.print_statistics(lemmas) + self.print_statistics(lemmas) - frame_opinion_values = Frame_Opinion_Value.objects.all() - createteixml(outpath, lemmas, frame_opinion_values) + frame_opinion_values = Frame_Opinion_Value.objects.all() + createteixml(outpath, lemmas, frame_opinion_values) - phrase_types_expand_path = os.path.join(WALENTY_PATH, - '%s_%s.xml' % ('phrase_types_expand', now)) - write_phrase_types_expansions_in_TEI(phrase_types_expand_path) - - archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz') - os.chdir(WALENTY_PATH) - archive.add(os.path.basename(outpath)) - archive.add(os.path.basename(phrase_types_expand_path)) - finally: - archive.close() - os.remove(outpath) - os.remove(phrase_types_expand_path) + phrase_types_expand_path = os.path.join(WALENTY_PATH, + '%s_%s.xml' % ('phrase_types_expand', now)) + write_phrase_types_expansions_in_TEI(phrase_types_expand_path) + + archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz') + os.chdir(WALENTY_PATH) + archive.add(os.path.basename(outpath)) + archive.add(os.path.basename(phrase_types_expand_path)) + + archive.close() + os.remove(outpath) + os.remove(phrase_types_expand_path) def create_filename_base(self, vocab_names, options, now): start_date = '' diff --git a/dictionary/teixml.py b/dictionary/teixml.py index ea4af5a..30fb7b4 100644 --- a/dictionary/teixml.py +++ b/dictionary/teixml.py @@ -3,12 +3,13 @@ from semantics.models import LexicalUnitExamples import datetime -from django.db.models import Count +from django.db.models import Count, Min from lxml import etree from xml.sax.saxutils import escape from dictionary.models import Argument, Atribute_Model, WalentyStat, \ sortArguments, sortatributes, sortPositions, sort_positions +from wordnet.models import Synset XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' @@ -796,11 +797,16 @@ def write_meaning(parent_elem, lex_unit): gloss_content_elem = etree.SubElement(gloss_f_elem, 'string') gloss_content_elem.text = lex_unit.glossa + if lex_unit.synset is None: + new_sid = Synset.objects.all().aggregate(Min('id'))['id__min'] + new_synset = Synset(id=new_sid) + new_synset.save() + lex_unit.synset = new_synset plwnsid_f_elem = etree.SubElement(meaning_fs_elem, 'f') plwnsid_f_elem.attrib['name'] = 'plwnsid' plwnsid_numeric_elem = etree.SubElement(plwnsid_f_elem, 'numeric') plwnsid_numeric_elem.attrib['value'] = str(lex_unit.synset.id) - + def write_connections_layer(parent_elem, lemma): connections_layer_elem = etree.SubElement(parent_elem, 'fs') connections_layer_elem.attrib['type'] = 'connections_layer' @@ -819,9 +825,17 @@ def write_alternations(parent_elem, lemma): for schema in lemma.frames.all(): for frame in frames: matching_complements = frame.complements.filter(realizations__frame=schema).distinct() - write_alternation(parent_elem, entry, schema, frame, matching_complements, 1) - write_alternation(parent_elem, entry, schema, frame, matching_complements, 2) - + m = get_max_alternation_number(matching_complements, schema) + for i in range(m): + write_alternation(parent_elem, entry, schema, frame, matching_complements, i + 1) + +def get_max_alternation_number(complements, schema): + global_max = 0 + for complement in complements: + local_max = complement.realizations.filter(frame=schema).aggregate(Max('alternation'))['alternation__max'] + global_max = max(global_max, local_max) + return global_max + def write_alternation(parent_elem, entry, schema, frame, complements, alternation): alternation_compls = complements.filter(realizations__alternation=alternation) if alternation_compls.exists():