Commit 794f2d78272f1a9b584dd3ee95b65e2b02165530
1 parent
2b4b720e
dodanie identyfikatorów sztucznie dodanych synsetów, zwiększono ilość wypisywanych "autoalternacji"
Showing
2 changed files
with
51 additions
and
38 deletions
dictionary/management/commands/create_TEI_walenty.py
... | ... | @@ -37,48 +37,47 @@ class Command(BaseCommand): |
37 | 37 | ) |
38 | 38 | |
39 | 39 | def handle(self, *args, **options): |
40 | - try: | |
41 | - now = datetime.datetime.now().strftime('%Y%m%d') | |
42 | - vocab_names = list(args) | |
43 | - vocab_names.sort() | |
40 | + now = datetime.datetime.now().strftime('%Y%m%d') | |
41 | + vocab_names = list(args) | |
42 | + vocab_names.sort() | |
44 | 43 | |
45 | - filename_base = self.create_filename_base(vocab_names, options, now) | |
44 | + filename_base = self.create_filename_base(vocab_names, options, now) | |
46 | 45 | |
47 | - base_path = os.path.join(WALENTY_PATH, filename_base) | |
48 | - outpath = base_path + '.xml' | |
49 | - statuses = get_statuses(options['min_status_type']) | |
46 | + base_path = os.path.join(WALENTY_PATH, filename_base) | |
47 | + outpath = base_path + '.xml' | |
48 | + statuses = get_statuses(options['min_status_type']) | |
50 | 49 | |
51 | - lemmas = Lemma.objects.filter(old=False) | |
52 | - if vocab_names: | |
53 | - lemmas = lemmas.filter(vocabulary__name__in=vocab_names) | |
54 | - lemmas = lemmas.filter(status__in=statuses) | |
55 | - if options['start_date'] != 'all': | |
56 | - lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date']) | |
50 | + lemmas = Lemma.objects.filter(old=False) | |
51 | + if vocab_names: | |
52 | + lemmas = lemmas.filter(vocabulary__name__in=vocab_names) | |
53 | + lemmas = lemmas.filter(status__in=statuses) | |
54 | + if options['start_date'] != 'all': | |
55 | + lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date']) | |
57 | 56 | |
58 | - if options['pos'] != 'all': | |
59 | - lemmas = lemmas.filter(entry_obj__pos__tag=options['pos']) | |
57 | + if options['pos'] != 'all': | |
58 | + lemmas = lemmas.filter(entry_obj__pos__tag=options['pos']) | |
60 | 59 | |
61 | - lemmas = self.add_related_lemmas(lemmas) | |
60 | + lemmas = self.add_related_lemmas(lemmas) | |
62 | 61 | |
63 | - lemmas = lemmas.order_by('entry_obj__name') | |
62 | + lemmas = lemmas.order_by('entry_obj__name') | |
64 | 63 | |
65 | - self.print_statistics(lemmas) | |
64 | + self.print_statistics(lemmas) | |
66 | 65 | |
67 | - frame_opinion_values = Frame_Opinion_Value.objects.all() | |
68 | - createteixml(outpath, lemmas, frame_opinion_values) | |
66 | + frame_opinion_values = Frame_Opinion_Value.objects.all() | |
67 | + createteixml(outpath, lemmas, frame_opinion_values) | |
69 | 68 | |
70 | - phrase_types_expand_path = os.path.join(WALENTY_PATH, | |
71 | - '%s_%s.xml' % ('phrase_types_expand', now)) | |
72 | - write_phrase_types_expansions_in_TEI(phrase_types_expand_path) | |
73 | - | |
74 | - archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz') | |
75 | - os.chdir(WALENTY_PATH) | |
76 | - archive.add(os.path.basename(outpath)) | |
77 | - archive.add(os.path.basename(phrase_types_expand_path)) | |
78 | - finally: | |
79 | - archive.close() | |
80 | - os.remove(outpath) | |
81 | - os.remove(phrase_types_expand_path) | |
69 | + phrase_types_expand_path = os.path.join(WALENTY_PATH, | |
70 | + '%s_%s.xml' % ('phrase_types_expand', now)) | |
71 | + write_phrase_types_expansions_in_TEI(phrase_types_expand_path) | |
72 | + | |
73 | + archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz') | |
74 | + os.chdir(WALENTY_PATH) | |
75 | + archive.add(os.path.basename(outpath)) | |
76 | + archive.add(os.path.basename(phrase_types_expand_path)) | |
77 | + | |
78 | + archive.close() | |
79 | + os.remove(outpath) | |
80 | + os.remove(phrase_types_expand_path) | |
82 | 81 | |
83 | 82 | def create_filename_base(self, vocab_names, options, now): |
84 | 83 | start_date = '' |
... | ... |
dictionary/teixml.py
... | ... | @@ -3,12 +3,13 @@ from semantics.models import LexicalUnitExamples |
3 | 3 | |
4 | 4 | import datetime |
5 | 5 | |
6 | -from django.db.models import Count | |
6 | +from django.db.models import Count, Min | |
7 | 7 | from lxml import etree |
8 | 8 | from xml.sax.saxutils import escape |
9 | 9 | |
10 | 10 | from dictionary.models import Argument, Atribute_Model, WalentyStat, \ |
11 | 11 | sortArguments, sortatributes, sortPositions, sort_positions |
12 | +from wordnet.models import Synset | |
12 | 13 | |
13 | 14 | XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' |
14 | 15 | |
... | ... | @@ -796,11 +797,16 @@ def write_meaning(parent_elem, lex_unit): |
796 | 797 | gloss_content_elem = etree.SubElement(gloss_f_elem, 'string') |
797 | 798 | gloss_content_elem.text = lex_unit.glossa |
798 | 799 | |
800 | + if lex_unit.synset is None: | |
801 | + new_sid = Synset.objects.all().aggregate(Min('id'))['id__min'] | |
802 | + new_synset = Synset(id=new_sid) | |
803 | + new_synset.save() | |
804 | + lex_unit.synset = new_synset | |
799 | 805 | plwnsid_f_elem = etree.SubElement(meaning_fs_elem, 'f') |
800 | 806 | plwnsid_f_elem.attrib['name'] = 'plwnsid' |
801 | 807 | plwnsid_numeric_elem = etree.SubElement(plwnsid_f_elem, 'numeric') |
802 | 808 | plwnsid_numeric_elem.attrib['value'] = str(lex_unit.synset.id) |
803 | - | |
809 | + | |
804 | 810 | def write_connections_layer(parent_elem, lemma): |
805 | 811 | connections_layer_elem = etree.SubElement(parent_elem, 'fs') |
806 | 812 | connections_layer_elem.attrib['type'] = 'connections_layer' |
... | ... | @@ -819,9 +825,17 @@ def write_alternations(parent_elem, lemma): |
819 | 825 | for schema in lemma.frames.all(): |
820 | 826 | for frame in frames: |
821 | 827 | matching_complements = frame.complements.filter(realizations__frame=schema).distinct() |
822 | - write_alternation(parent_elem, entry, schema, frame, matching_complements, 1) | |
823 | - write_alternation(parent_elem, entry, schema, frame, matching_complements, 2) | |
824 | - | |
828 | + m = get_max_alternation_number(matching_complements, schema) | |
829 | + for i in range(m): | |
830 | + write_alternation(parent_elem, entry, schema, frame, matching_complements, i + 1) | |
831 | + | |
832 | +def get_max_alternation_number(complements, schema): | |
833 | + global_max = 0 | |
834 | + for complement in complements: | |
835 | + local_max = complement.realizations.filter(frame=schema).aggregate(Max('alternation'))['alternation__max'] | |
836 | + global_max = max(global_max, local_max) | |
837 | + return global_max | |
838 | + | |
825 | 839 | def write_alternation(parent_elem, entry, schema, frame, complements, alternation): |
826 | 840 | alternation_compls = complements.filter(realizations__alternation=alternation) |
827 | 841 | if alternation_compls.exists(): |
... | ... |