Commit 794f2d78272f1a9b584dd3ee95b65e2b02165530

Authored by Tomasz Bartosiak
1 parent 2b4b720e

dodanie identyfikatorów sztucznie dodanych synsetów, zwiększono ilość wypisywanych "autoalternacji"

dictionary/management/commands/create_TEI_walenty.py
@@ -37,48 +37,47 @@ class Command(BaseCommand): @@ -37,48 +37,47 @@ class Command(BaseCommand):
37 ) 37 )
38 38
39 def handle(self, *args, **options): 39 def handle(self, *args, **options):
40 - try:  
41 - now = datetime.datetime.now().strftime('%Y%m%d')  
42 - vocab_names = list(args)  
43 - vocab_names.sort() 40 + now = datetime.datetime.now().strftime('%Y%m%d')
  41 + vocab_names = list(args)
  42 + vocab_names.sort()
44 43
45 - filename_base = self.create_filename_base(vocab_names, options, now) 44 + filename_base = self.create_filename_base(vocab_names, options, now)
46 45
47 - base_path = os.path.join(WALENTY_PATH, filename_base)  
48 - outpath = base_path + '.xml'  
49 - statuses = get_statuses(options['min_status_type']) 46 + base_path = os.path.join(WALENTY_PATH, filename_base)
  47 + outpath = base_path + '.xml'
  48 + statuses = get_statuses(options['min_status_type'])
50 49
51 - lemmas = Lemma.objects.filter(old=False)  
52 - if vocab_names:  
53 - lemmas = lemmas.filter(vocabulary__name__in=vocab_names)  
54 - lemmas = lemmas.filter(status__in=statuses)  
55 - if options['start_date'] != 'all':  
56 - lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date']) 50 + lemmas = Lemma.objects.filter(old=False)
  51 + if vocab_names:
  52 + lemmas = lemmas.filter(vocabulary__name__in=vocab_names)
  53 + lemmas = lemmas.filter(status__in=statuses)
  54 + if options['start_date'] != 'all':
  55 + lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date'])
57 56
58 - if options['pos'] != 'all':  
59 - lemmas = lemmas.filter(entry_obj__pos__tag=options['pos']) 57 + if options['pos'] != 'all':
  58 + lemmas = lemmas.filter(entry_obj__pos__tag=options['pos'])
60 59
61 - lemmas = self.add_related_lemmas(lemmas) 60 + lemmas = self.add_related_lemmas(lemmas)
62 61
63 - lemmas = lemmas.order_by('entry_obj__name') 62 + lemmas = lemmas.order_by('entry_obj__name')
64 63
65 - self.print_statistics(lemmas) 64 + self.print_statistics(lemmas)
66 65
67 - frame_opinion_values = Frame_Opinion_Value.objects.all()  
68 - createteixml(outpath, lemmas, frame_opinion_values) 66 + frame_opinion_values = Frame_Opinion_Value.objects.all()
  67 + createteixml(outpath, lemmas, frame_opinion_values)
69 68
70 - phrase_types_expand_path = os.path.join(WALENTY_PATH,  
71 - '%s_%s.xml' % ('phrase_types_expand', now))  
72 - write_phrase_types_expansions_in_TEI(phrase_types_expand_path)  
73 -  
74 - archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz')  
75 - os.chdir(WALENTY_PATH)  
76 - archive.add(os.path.basename(outpath))  
77 - archive.add(os.path.basename(phrase_types_expand_path))  
78 - finally:  
79 - archive.close()  
80 - os.remove(outpath)  
81 - os.remove(phrase_types_expand_path) 69 + phrase_types_expand_path = os.path.join(WALENTY_PATH,
  70 + '%s_%s.xml' % ('phrase_types_expand', now))
  71 + write_phrase_types_expansions_in_TEI(phrase_types_expand_path)
  72 +
  73 + archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz')
  74 + os.chdir(WALENTY_PATH)
  75 + archive.add(os.path.basename(outpath))
  76 + archive.add(os.path.basename(phrase_types_expand_path))
  77 +
  78 + archive.close()
  79 + os.remove(outpath)
  80 + os.remove(phrase_types_expand_path)
82 81
83 def create_filename_base(self, vocab_names, options, now): 82 def create_filename_base(self, vocab_names, options, now):
84 start_date = '' 83 start_date = ''
dictionary/teixml.py
@@ -3,12 +3,13 @@ from semantics.models import LexicalUnitExamples @@ -3,12 +3,13 @@ from semantics.models import LexicalUnitExamples
3 3
4 import datetime 4 import datetime
5 5
6 -from django.db.models import Count 6 +from django.db.models import Count, Min
7 from lxml import etree 7 from lxml import etree
8 from xml.sax.saxutils import escape 8 from xml.sax.saxutils import escape
9 9
10 from dictionary.models import Argument, Atribute_Model, WalentyStat, \ 10 from dictionary.models import Argument, Atribute_Model, WalentyStat, \
11 sortArguments, sortatributes, sortPositions, sort_positions 11 sortArguments, sortatributes, sortPositions, sort_positions
  12 +from wordnet.models import Synset
12 13
13 XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' 14 XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
14 15
@@ -796,11 +797,16 @@ def write_meaning(parent_elem, lex_unit): @@ -796,11 +797,16 @@ def write_meaning(parent_elem, lex_unit):
796 gloss_content_elem = etree.SubElement(gloss_f_elem, 'string') 797 gloss_content_elem = etree.SubElement(gloss_f_elem, 'string')
797 gloss_content_elem.text = lex_unit.glossa 798 gloss_content_elem.text = lex_unit.glossa
798 799
  800 + if lex_unit.synset is None:
  801 + new_sid = Synset.objects.all().aggregate(Min('id'))['id__min']
  802 + new_synset = Synset(id=new_sid)
  803 + new_synset.save()
  804 + lex_unit.synset = new_synset
799 plwnsid_f_elem = etree.SubElement(meaning_fs_elem, 'f') 805 plwnsid_f_elem = etree.SubElement(meaning_fs_elem, 'f')
800 plwnsid_f_elem.attrib['name'] = 'plwnsid' 806 plwnsid_f_elem.attrib['name'] = 'plwnsid'
801 plwnsid_numeric_elem = etree.SubElement(plwnsid_f_elem, 'numeric') 807 plwnsid_numeric_elem = etree.SubElement(plwnsid_f_elem, 'numeric')
802 plwnsid_numeric_elem.attrib['value'] = str(lex_unit.synset.id) 808 plwnsid_numeric_elem.attrib['value'] = str(lex_unit.synset.id)
803 - 809 +
804 def write_connections_layer(parent_elem, lemma): 810 def write_connections_layer(parent_elem, lemma):
805 connections_layer_elem = etree.SubElement(parent_elem, 'fs') 811 connections_layer_elem = etree.SubElement(parent_elem, 'fs')
806 connections_layer_elem.attrib['type'] = 'connections_layer' 812 connections_layer_elem.attrib['type'] = 'connections_layer'
@@ -819,9 +825,17 @@ def write_alternations(parent_elem, lemma): @@ -819,9 +825,17 @@ def write_alternations(parent_elem, lemma):
819 for schema in lemma.frames.all(): 825 for schema in lemma.frames.all():
820 for frame in frames: 826 for frame in frames:
821 matching_complements = frame.complements.filter(realizations__frame=schema).distinct() 827 matching_complements = frame.complements.filter(realizations__frame=schema).distinct()
822 - write_alternation(parent_elem, entry, schema, frame, matching_complements, 1)  
823 - write_alternation(parent_elem, entry, schema, frame, matching_complements, 2)  
824 - 828 + m = get_max_alternation_number(matching_complements, schema)
  829 + for i in range(m):
  830 + write_alternation(parent_elem, entry, schema, frame, matching_complements, i + 1)
  831 +
  832 +def get_max_alternation_number(complements, schema):
  833 + global_max = 0
  834 + for complement in complements:
  835 + local_max = complement.realizations.filter(frame=schema).aggregate(Max('alternation'))['alternation__max']
  836 + global_max = max(global_max, local_max)
  837 + return global_max
  838 +
825 def write_alternation(parent_elem, entry, schema, frame, complements, alternation): 839 def write_alternation(parent_elem, entry, schema, frame, complements, alternation):
826 alternation_compls = complements.filter(realizations__alternation=alternation) 840 alternation_compls = complements.filter(realizations__alternation=alternation)
827 if alternation_compls.exists(): 841 if alternation_compls.exists():