Commit 794f2d78272f1a9b584dd3ee95b65e2b02165530
1 parent
2b4b720e
dodanie identyfikatorów sztucznie dodanych synsetów, zwiększono ilość wypisywanych "autoalternacji"
Showing
2 changed files
with
51 additions
and
38 deletions
dictionary/management/commands/create_TEI_walenty.py
@@ -37,48 +37,47 @@ class Command(BaseCommand): | @@ -37,48 +37,47 @@ class Command(BaseCommand): | ||
37 | ) | 37 | ) |
38 | 38 | ||
39 | def handle(self, *args, **options): | 39 | def handle(self, *args, **options): |
40 | - try: | ||
41 | - now = datetime.datetime.now().strftime('%Y%m%d') | ||
42 | - vocab_names = list(args) | ||
43 | - vocab_names.sort() | 40 | + now = datetime.datetime.now().strftime('%Y%m%d') |
41 | + vocab_names = list(args) | ||
42 | + vocab_names.sort() | ||
44 | 43 | ||
45 | - filename_base = self.create_filename_base(vocab_names, options, now) | 44 | + filename_base = self.create_filename_base(vocab_names, options, now) |
46 | 45 | ||
47 | - base_path = os.path.join(WALENTY_PATH, filename_base) | ||
48 | - outpath = base_path + '.xml' | ||
49 | - statuses = get_statuses(options['min_status_type']) | 46 | + base_path = os.path.join(WALENTY_PATH, filename_base) |
47 | + outpath = base_path + '.xml' | ||
48 | + statuses = get_statuses(options['min_status_type']) | ||
50 | 49 | ||
51 | - lemmas = Lemma.objects.filter(old=False) | ||
52 | - if vocab_names: | ||
53 | - lemmas = lemmas.filter(vocabulary__name__in=vocab_names) | ||
54 | - lemmas = lemmas.filter(status__in=statuses) | ||
55 | - if options['start_date'] != 'all': | ||
56 | - lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date']) | 50 | + lemmas = Lemma.objects.filter(old=False) |
51 | + if vocab_names: | ||
52 | + lemmas = lemmas.filter(vocabulary__name__in=vocab_names) | ||
53 | + lemmas = lemmas.filter(status__in=statuses) | ||
54 | + if options['start_date'] != 'all': | ||
55 | + lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date']) | ||
57 | 56 | ||
58 | - if options['pos'] != 'all': | ||
59 | - lemmas = lemmas.filter(entry_obj__pos__tag=options['pos']) | 57 | + if options['pos'] != 'all': |
58 | + lemmas = lemmas.filter(entry_obj__pos__tag=options['pos']) | ||
60 | 59 | ||
61 | - lemmas = self.add_related_lemmas(lemmas) | 60 | + lemmas = self.add_related_lemmas(lemmas) |
62 | 61 | ||
63 | - lemmas = lemmas.order_by('entry_obj__name') | 62 | + lemmas = lemmas.order_by('entry_obj__name') |
64 | 63 | ||
65 | - self.print_statistics(lemmas) | 64 | + self.print_statistics(lemmas) |
66 | 65 | ||
67 | - frame_opinion_values = Frame_Opinion_Value.objects.all() | ||
68 | - createteixml(outpath, lemmas, frame_opinion_values) | 66 | + frame_opinion_values = Frame_Opinion_Value.objects.all() |
67 | + createteixml(outpath, lemmas, frame_opinion_values) | ||
69 | 68 | ||
70 | - phrase_types_expand_path = os.path.join(WALENTY_PATH, | ||
71 | - '%s_%s.xml' % ('phrase_types_expand', now)) | ||
72 | - write_phrase_types_expansions_in_TEI(phrase_types_expand_path) | ||
73 | - | ||
74 | - archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz') | ||
75 | - os.chdir(WALENTY_PATH) | ||
76 | - archive.add(os.path.basename(outpath)) | ||
77 | - archive.add(os.path.basename(phrase_types_expand_path)) | ||
78 | - finally: | ||
79 | - archive.close() | ||
80 | - os.remove(outpath) | ||
81 | - os.remove(phrase_types_expand_path) | 69 | + phrase_types_expand_path = os.path.join(WALENTY_PATH, |
70 | + '%s_%s.xml' % ('phrase_types_expand', now)) | ||
71 | + write_phrase_types_expansions_in_TEI(phrase_types_expand_path) | ||
72 | + | ||
73 | + archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz') | ||
74 | + os.chdir(WALENTY_PATH) | ||
75 | + archive.add(os.path.basename(outpath)) | ||
76 | + archive.add(os.path.basename(phrase_types_expand_path)) | ||
77 | + | ||
78 | + archive.close() | ||
79 | + os.remove(outpath) | ||
80 | + os.remove(phrase_types_expand_path) | ||
82 | 81 | ||
83 | def create_filename_base(self, vocab_names, options, now): | 82 | def create_filename_base(self, vocab_names, options, now): |
84 | start_date = '' | 83 | start_date = '' |
dictionary/teixml.py
@@ -3,12 +3,13 @@ from semantics.models import LexicalUnitExamples | @@ -3,12 +3,13 @@ from semantics.models import LexicalUnitExamples | ||
3 | 3 | ||
4 | import datetime | 4 | import datetime |
5 | 5 | ||
6 | -from django.db.models import Count | 6 | +from django.db.models import Count, Min |
7 | from lxml import etree | 7 | from lxml import etree |
8 | from xml.sax.saxutils import escape | 8 | from xml.sax.saxutils import escape |
9 | 9 | ||
10 | from dictionary.models import Argument, Atribute_Model, WalentyStat, \ | 10 | from dictionary.models import Argument, Atribute_Model, WalentyStat, \ |
11 | sortArguments, sortatributes, sortPositions, sort_positions | 11 | sortArguments, sortatributes, sortPositions, sort_positions |
12 | +from wordnet.models import Synset | ||
12 | 13 | ||
13 | XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' | 14 | XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' |
14 | 15 | ||
@@ -796,11 +797,16 @@ def write_meaning(parent_elem, lex_unit): | @@ -796,11 +797,16 @@ def write_meaning(parent_elem, lex_unit): | ||
796 | gloss_content_elem = etree.SubElement(gloss_f_elem, 'string') | 797 | gloss_content_elem = etree.SubElement(gloss_f_elem, 'string') |
797 | gloss_content_elem.text = lex_unit.glossa | 798 | gloss_content_elem.text = lex_unit.glossa |
798 | 799 | ||
800 | + if lex_unit.synset is None: | ||
801 | + new_sid = Synset.objects.all().aggregate(Min('id'))['id__min'] | ||
802 | + new_synset = Synset(id=new_sid) | ||
803 | + new_synset.save() | ||
804 | + lex_unit.synset = new_synset | ||
799 | plwnsid_f_elem = etree.SubElement(meaning_fs_elem, 'f') | 805 | plwnsid_f_elem = etree.SubElement(meaning_fs_elem, 'f') |
800 | plwnsid_f_elem.attrib['name'] = 'plwnsid' | 806 | plwnsid_f_elem.attrib['name'] = 'plwnsid' |
801 | plwnsid_numeric_elem = etree.SubElement(plwnsid_f_elem, 'numeric') | 807 | plwnsid_numeric_elem = etree.SubElement(plwnsid_f_elem, 'numeric') |
802 | plwnsid_numeric_elem.attrib['value'] = str(lex_unit.synset.id) | 808 | plwnsid_numeric_elem.attrib['value'] = str(lex_unit.synset.id) |
803 | - | 809 | + |
804 | def write_connections_layer(parent_elem, lemma): | 810 | def write_connections_layer(parent_elem, lemma): |
805 | connections_layer_elem = etree.SubElement(parent_elem, 'fs') | 811 | connections_layer_elem = etree.SubElement(parent_elem, 'fs') |
806 | connections_layer_elem.attrib['type'] = 'connections_layer' | 812 | connections_layer_elem.attrib['type'] = 'connections_layer' |
@@ -819,9 +825,17 @@ def write_alternations(parent_elem, lemma): | @@ -819,9 +825,17 @@ def write_alternations(parent_elem, lemma): | ||
819 | for schema in lemma.frames.all(): | 825 | for schema in lemma.frames.all(): |
820 | for frame in frames: | 826 | for frame in frames: |
821 | matching_complements = frame.complements.filter(realizations__frame=schema).distinct() | 827 | matching_complements = frame.complements.filter(realizations__frame=schema).distinct() |
822 | - write_alternation(parent_elem, entry, schema, frame, matching_complements, 1) | ||
823 | - write_alternation(parent_elem, entry, schema, frame, matching_complements, 2) | ||
824 | - | 828 | + m = get_max_alternation_number(matching_complements, schema) |
829 | + for i in range(m): | ||
830 | + write_alternation(parent_elem, entry, schema, frame, matching_complements, i + 1) | ||
831 | + | ||
832 | +def get_max_alternation_number(complements, schema): | ||
833 | + global_max = 0 | ||
834 | + for complement in complements: | ||
835 | + local_max = complement.realizations.filter(frame=schema).aggregate(Max('alternation'))['alternation__max'] | ||
836 | + global_max = max(global_max, local_max) | ||
837 | + return global_max | ||
838 | + | ||
825 | def write_alternation(parent_elem, entry, schema, frame, complements, alternation): | 839 | def write_alternation(parent_elem, entry, schema, frame, complements, alternation): |
826 | alternation_compls = complements.filter(realizations__alternation=alternation) | 840 | alternation_compls = complements.filter(realizations__alternation=alternation) |
827 | if alternation_compls.exists(): | 841 | if alternation_compls.exists(): |