Commit 794f2d78272f1a9b584dd3ee95b65e2b02165530

Authored by Tomasz Bartosiak
1 parent 2b4b720e

dodanie identyfikatorów sztucznie dodanych synsetów, zwiększono ilość wypisywanych "autoalternacji"

dictionary/management/commands/create_TEI_walenty.py
... ... @@ -37,48 +37,47 @@ class Command(BaseCommand):
37 37 )
38 38  
39 39 def handle(self, *args, **options):
40   - try:
41   - now = datetime.datetime.now().strftime('%Y%m%d')
42   - vocab_names = list(args)
43   - vocab_names.sort()
  40 + now = datetime.datetime.now().strftime('%Y%m%d')
  41 + vocab_names = list(args)
  42 + vocab_names.sort()
44 43  
45   - filename_base = self.create_filename_base(vocab_names, options, now)
  44 + filename_base = self.create_filename_base(vocab_names, options, now)
46 45  
47   - base_path = os.path.join(WALENTY_PATH, filename_base)
48   - outpath = base_path + '.xml'
49   - statuses = get_statuses(options['min_status_type'])
  46 + base_path = os.path.join(WALENTY_PATH, filename_base)
  47 + outpath = base_path + '.xml'
  48 + statuses = get_statuses(options['min_status_type'])
50 49  
51   - lemmas = Lemma.objects.filter(old=False)
52   - if vocab_names:
53   - lemmas = lemmas.filter(vocabulary__name__in=vocab_names)
54   - lemmas = lemmas.filter(status__in=statuses)
55   - if options['start_date'] != 'all':
56   - lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date'])
  50 + lemmas = Lemma.objects.filter(old=False)
  51 + if vocab_names:
  52 + lemmas = lemmas.filter(vocabulary__name__in=vocab_names)
  53 + lemmas = lemmas.filter(status__in=statuses)
  54 + if options['start_date'] != 'all':
  55 + lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date'])
57 56  
58   - if options['pos'] != 'all':
59   - lemmas = lemmas.filter(entry_obj__pos__tag=options['pos'])
  57 + if options['pos'] != 'all':
  58 + lemmas = lemmas.filter(entry_obj__pos__tag=options['pos'])
60 59  
61   - lemmas = self.add_related_lemmas(lemmas)
  60 + lemmas = self.add_related_lemmas(lemmas)
62 61  
63   - lemmas = lemmas.order_by('entry_obj__name')
  62 + lemmas = lemmas.order_by('entry_obj__name')
64 63  
65   - self.print_statistics(lemmas)
  64 + self.print_statistics(lemmas)
66 65  
67   - frame_opinion_values = Frame_Opinion_Value.objects.all()
68   - createteixml(outpath, lemmas, frame_opinion_values)
  66 + frame_opinion_values = Frame_Opinion_Value.objects.all()
  67 + createteixml(outpath, lemmas, frame_opinion_values)
69 68  
70   - phrase_types_expand_path = os.path.join(WALENTY_PATH,
71   - '%s_%s.xml' % ('phrase_types_expand', now))
72   - write_phrase_types_expansions_in_TEI(phrase_types_expand_path)
73   -
74   - archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz')
75   - os.chdir(WALENTY_PATH)
76   - archive.add(os.path.basename(outpath))
77   - archive.add(os.path.basename(phrase_types_expand_path))
78   - finally:
79   - archive.close()
80   - os.remove(outpath)
81   - os.remove(phrase_types_expand_path)
  69 + phrase_types_expand_path = os.path.join(WALENTY_PATH,
  70 + '%s_%s.xml' % ('phrase_types_expand', now))
  71 + write_phrase_types_expansions_in_TEI(phrase_types_expand_path)
  72 +
  73 + archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz')
  74 + os.chdir(WALENTY_PATH)
  75 + archive.add(os.path.basename(outpath))
  76 + archive.add(os.path.basename(phrase_types_expand_path))
  77 +
  78 + archive.close()
  79 + os.remove(outpath)
  80 + os.remove(phrase_types_expand_path)
82 81  
83 82 def create_filename_base(self, vocab_names, options, now):
84 83 start_date = ''
... ...
dictionary/teixml.py
... ... @@ -3,12 +3,13 @@ from semantics.models import LexicalUnitExamples
3 3  
4 4 import datetime
5 5  
6   -from django.db.models import Count
  6 +from django.db.models import Count, Min
7 7 from lxml import etree
8 8 from xml.sax.saxutils import escape
9 9  
10 10 from dictionary.models import Argument, Atribute_Model, WalentyStat, \
11 11 sortArguments, sortatributes, sortPositions, sort_positions
  12 +from wordnet.models import Synset
12 13  
13 14 XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
14 15  
... ... @@ -796,11 +797,16 @@ def write_meaning(parent_elem, lex_unit):
796 797 gloss_content_elem = etree.SubElement(gloss_f_elem, 'string')
797 798 gloss_content_elem.text = lex_unit.glossa
798 799  
  800 + if lex_unit.synset is None:
  801 + new_sid = Synset.objects.all().aggregate(Min('id'))['id__min']
  802 + new_synset = Synset(id=new_sid)
  803 + new_synset.save()
  804 + lex_unit.synset = new_synset
799 805 plwnsid_f_elem = etree.SubElement(meaning_fs_elem, 'f')
800 806 plwnsid_f_elem.attrib['name'] = 'plwnsid'
801 807 plwnsid_numeric_elem = etree.SubElement(plwnsid_f_elem, 'numeric')
802 808 plwnsid_numeric_elem.attrib['value'] = str(lex_unit.synset.id)
803   -
  809 +
804 810 def write_connections_layer(parent_elem, lemma):
805 811 connections_layer_elem = etree.SubElement(parent_elem, 'fs')
806 812 connections_layer_elem.attrib['type'] = 'connections_layer'
... ... @@ -819,9 +825,17 @@ def write_alternations(parent_elem, lemma):
819 825 for schema in lemma.frames.all():
820 826 for frame in frames:
821 827 matching_complements = frame.complements.filter(realizations__frame=schema).distinct()
822   - write_alternation(parent_elem, entry, schema, frame, matching_complements, 1)
823   - write_alternation(parent_elem, entry, schema, frame, matching_complements, 2)
824   -
  828 + m = get_max_alternation_number(matching_complements, schema)
  829 + for i in range(m):
  830 + write_alternation(parent_elem, entry, schema, frame, matching_complements, i + 1)
  831 +
  832 +def get_max_alternation_number(complements, schema):
  833 + global_max = 0
  834 + for complement in complements:
  835 + local_max = complement.realizations.filter(frame=schema).aggregate(Max('alternation'))['alternation__max']
  836 + global_max = max(global_max, local_max)
  837 + return global_max
  838 +
825 839 def write_alternation(parent_elem, entry, schema, frame, complements, alternation):
826 840 alternation_compls = complements.filter(realizations__alternation=alternation)
827 841 if alternation_compls.exists():
... ...