#-*- coding:utf-8 -*- from semantics.models import LexicalUnitExamples #Copyright (c) 2015, Bartłomiej Nitoń #All rights reserved. #Redistribution and use in source and binary forms, with or without modification, are permitted provided #that the following conditions are met: # Redistributions of source code must retain the above copyright notice, this list of conditions and # the following disclaimer. # Redistributions in binary form must reproduce the above copyright notice, this list of conditions # and the following disclaimer in the documentation and/or other materials provided with the distribution. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. ''' File with functions responsible for creating TEI xml. ''' import datetime import operator from lxml import etree from xml.sax.saxutils import escape from dictionary.models import Atribute_Model, Frame_Opinion_Value, Frame_Char_Model, \ PositionCategory, Argument_Model, \ sortArguments, sortatributes, sortPositions, sort_positions XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace' def createteixml(outpath, lemmas, frame_opinion_values): root = write_root() write_header(root) #lemmas = lemmas.filter(entry=u'brnąć') write_entries(root, lemmas, frame_opinion_values) with open(outpath, 'w') as output_file: output_file.write(etree.tostring(root, pretty_print=True, xml_declaration=True, encoding='UTF-8', doctype=u'<!DOCTYPE TEI SYSTEM "tei_all.dtd">')) def write_root(): root = etree.Element('TEI') root.attrib[etree.QName(XML_NAMESPACE, 'lang')] = u'pl' root.attrib['xmlns'] = u'http://www.tei-c.org/ns/1.0' return root def write_header(root): tei_header = etree.SubElement(root, 'teiHeader') file_desc = etree.SubElement(tei_header, 'fileDesc') title_stmt = etree.SubElement(file_desc, 'titleStmt') title = etree.SubElement(title_stmt, 'title') title.text = u'Polish Valence Dictionary (Walenty)' publication_stmt = etree.SubElement(file_desc, 'publicationStmt') publisher = etree.SubElement(publication_stmt, 'publisher') publisher.text = u'IPI PAN ZIL' date = etree.SubElement(publication_stmt, 'date') date.attrib['when'] = datetime.datetime.now().strftime('%Y-%m-%d') source_desc = etree.SubElement(file_desc, 'sourceDesc') p = etree.SubElement(source_desc, 'p') p.text = u'File generated using Slowal. Mentioned tool available at: walenty.ipipan.waw.pl.' def write_entries(root, lemmas, frame_opinion_values): text = etree.SubElement(root, 'text') body = etree.SubElement(text, 'body') for lemma in lemmas: frame_opinions = lemma.frame_opinions.filter(value__in=frame_opinion_values) write_entry(body, lemma, frame_opinions, frame_opinion_values) def write_entry(body_elem, lemma, frame_opinions, frame_opinion_values): entry_xml_id = 'wal_%s-ent' % str(lemma.entry_obj.id) entry_elem = etree.SubElement(body_elem, 'entry') entry_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = entry_xml_id form_elem = etree.SubElement(entry_elem, 'form') orth_elem = etree.SubElement(form_elem, 'orth') orth_elem.text = lemma.entry pos_elem = etree.SubElement(form_elem, 'pos') pos_elem.text = lemma.entry_obj.pos.tag write_syntactic_layer(entry_elem, lemma, frame_opinions, frame_opinion_values) write_examples_layer(entry_elem, lemma) write_semantic_layer(entry_elem, lemma) write_meanings_layer(entry_elem, lemma) write_connections_layer(entry_elem, lemma) def write_syntactic_layer(entry_elem, lemma, frame_opinions, frame_opinion_values): synt_layer_fs_elem = etree.SubElement(entry_elem, 'fs') synt_layer_fs_elem.attrib['type'] = 'syntactic_layer' schemata_f_elem = etree.SubElement(synt_layer_fs_elem, 'f') schemata_f_elem.attrib['name'] = 'schemata' vColl_elem = etree.SubElement(schemata_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for reflex_val in lemma.get_existing_frame_char_values(u'ZWROTNOŚĆ'): for neg_val in lemma.get_existing_frame_char_values(u'NEGATYWNOŚĆ'): for pred_val in lemma.get_existing_frame_char_values(u'PREDYKATYWNOŚĆ'): for aspect_val in lemma.get_existing_frame_char_values(u'ASPEKT'): matchingframes = lemma.get_frames_by_char_values(reflex_val=reflex_val, neg_val=neg_val, pred_val=pred_val, aspect_val=aspect_val).order_by('text_rep') for frame in matchingframes: if (not frame_opinion_values.exists() or frame_opinions.filter(frame=frame).exists()): write_schema(vColl_elem, frame, lemma) def write_schema(parent_elem, schema, lemma): schema_xml_id = 'wal_%s.%s-sch' % (str(lemma.entry_obj.id), str(schema.id)) schema_fs_elem = etree.SubElement(parent_elem, 'fs') schema_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = schema_xml_id schema_fs_elem.attrib['type'] = 'schema' # opinia o ramce try: schema_opinion = lemma.frame_opinions.filter(frame=schema).all()[0].value.short except IndexError: schema_opinion = 'unk' opinion_f_elem = etree.SubElement(schema_fs_elem, 'f') opinion_f_elem.attrib['name'] = 'opinion' opinion_symbol = etree.SubElement(opinion_f_elem, 'symbol') opinion_symbol.attrib['value'] = schema_opinion # zwrotnosc reflex = schema.characteristics.get(type=u'ZWROTNOŚĆ') selfmark_f_elem = etree.SubElement(schema_fs_elem, 'f') selfmark_f_elem.attrib['name'] = 'reflexive_mark' selfmark_binary = etree.SubElement(selfmark_f_elem, 'binary') if reflex.value.value: selfmark_binary.attrib['value'] = 'true' else: selfmark_binary.attrib['value'] = 'false' # aspekt aspect = schema.characteristics.get(type=u'ASPEKT').value.value aspect_f_elem = etree.SubElement(schema_fs_elem, 'f') aspect_f_elem.attrib['name'] = 'aspect' if aspect: aspect_symbol = etree.SubElement(aspect_f_elem, 'symbol') aspect_symbol.attrib['value'] = aspect # negatywnosc negativity = schema.characteristics.get(type=u'NEGATYWNOŚĆ').value.value negativity_f_elem = etree.SubElement(schema_fs_elem, 'f') negativity_f_elem.attrib['name'] = 'negativity' if negativity: negativity_symbol = etree.SubElement(negativity_f_elem, 'symbol') negativity_symbol.attrib['value'] = negativity # predykatywnosc predicativity = schema.characteristics.get(type=u'PREDYKATYWNOŚĆ').value.value predicativity_f_elem = etree.SubElement(schema_fs_elem, 'f') predicativity_f_elem.attrib['name'] = 'predicativity' predicativity_binary = etree.SubElement(predicativity_f_elem, 'binary') if predicativity: predicativity_binary.attrib['value'] = 'true' else: predicativity_binary.attrib['value'] = 'false' # pozycje składniowe write_positions_feature(schema, schema_xml_id, schema_fs_elem) def write_positions_feature(schema, schema_xml_id, parent_elem): sorted_pos_dict = sortPositions(schema.positions.all()) if sorted_pos_dict: positions_f_elem = etree.SubElement(parent_elem, 'f') positions_f_elem.attrib['name'] = 'positions' vColl_elem = etree.SubElement(positions_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for position in sorted_pos_dict: write_position_elem(vColl_elem, schema_xml_id, position['position']) def write_position_elem(parent_elem, schema_xml_id, position): position_xml_id = None position_fs_elem = etree.SubElement(parent_elem, 'fs') if schema_xml_id: position_xml_id = schema_xml_id.replace(u'-sch', '.%d-psn' % position.id) position_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = position_xml_id position_fs_elem.attrib['type'] = 'position' functions = position.categories.filter(control=False) if functions.exists(): function_f_elem = etree.SubElement(position_fs_elem, 'f') function_f_elem.attrib['name'] = 'function' function_symbol_elem = etree.SubElement(function_f_elem, 'symbol') function_symbol_elem.attrib['value'] = functions[0].category write_control_features(position_fs_elem, position) write_phrases_feature(position_fs_elem, position, position_xml_id) def write_control_features(parent_elem, position): controls1 = position.categories.filter(control=True).exclude(category__endswith='2') controls2 = position.categories.filter(control=True, category__endswith='2') if controls1.exists() or controls2.exists(): control_f_elem = etree.SubElement(parent_elem, 'f') control_f_elem.attrib['name'] = 'control' vColl_elem = etree.SubElement(control_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' if controls1.exists(): control = controls1[0].category control1_symbol_elem = etree.SubElement(vColl_elem, 'symbol') control1_symbol_elem.attrib['value'] = control if controls2.exists(): control = controls2[0].category control2_symbol_elem = etree.SubElement(vColl_elem, 'symbol') control2_symbol_elem.attrib['value'] = control def write_phrases_feature(parent_elem, position, position_xml_id): sorted_phrases = sortArguments(position.arguments.all()) if sorted_phrases: phrases_f_elem = etree.SubElement(parent_elem, 'f') phrases_f_elem.attrib['name'] = 'phrases' vColl_elem = etree.SubElement(phrases_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for phrase in sorted_phrases: write_phrase(vColl_elem, phrase, position_xml_id) def write_phrase(parent_elem, phrase, position_xml_id): phrase_fs_elem = etree.SubElement(parent_elem, 'fs') if position_xml_id: phrase_xml_id = position_xml_id.replace(u'-psn', '.%d-phr' % phrase.id) phrase_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = phrase_xml_id phrase_fs_elem.attrib['type'] = phrase.type write_attributes(phrase_fs_elem, phrase) def write_attributes(parent_elem, phrase): attributes = sortatributes(phrase) for attribute in attributes: write_attribute(parent_elem, attribute) def write_attribute(parent_elem, attribute): attribute_model = Atribute_Model.objects.get(atr_model_name=attribute.type) attr_f_elem = etree.SubElement(parent_elem, 'f') attr_f_elem.attrib['name'] = attribute_model.sym_name attribute_type = attribute_model.type.sym_name selection_modes = attribute_model.values_selection_modes if attribute_type == 'text' and not selection_modes.exists(): write_simple_text_attr(attr_f_elem, attribute) elif attribute_type == 'text' and selection_modes.exists(): write_complex_text_attr(attr_f_elem, attribute_model, attribute) elif attribute_type == 'parameter' and not selection_modes.exists(): write_simple_parameter_attr(attr_f_elem, attribute_model, attribute) elif attribute_type == 'parameter' and selection_modes.exists(): write_complex_parameter_attr(attr_f_elem, attribute_model, attribute) elif attribute_type == 'argument' and not selection_modes.exists(): write_simple_phrase_type_attr(attr_f_elem, attribute) elif attribute_type == 'argument' and selection_modes.exists(): write_complex_phrase_type_attr(attr_f_elem, attribute) elif attribute_type == 'position': write_complex_position_attr(attr_f_elem, attribute) def write_simple_text_attr(parent_elem, attribute): string_elem = etree.SubElement(parent_elem, 'string') string_elem.text = unicode(attribute).strip("'") def write_complex_text_attr(parent_elem, attribute_model, attribute): complex_lemma_fs_elem = etree.SubElement(parent_elem, 'fs') complex_lemma_fs_elem.attrib['type'] = '%s_def' % parent_elem.attrib['name'] write_selection_mode_and_separator(complex_lemma_fs_elem, attribute_model, attribute) write_lemmas(complex_lemma_fs_elem, attribute) def write_selection_mode_and_separator(parent_elem, attr_model, attribute): if attribute.selection_mode: selection_mode = attribute.selection_mode.name else: selection_mode = attr_model.values_selection_modes.order_by('priority')[0].name sel_mode_f_elem = etree.SubElement(parent_elem, 'f') sel_mode_f_elem.attrib['name'] = 'selection_mode' sel_mode_symbol_elem = etree.SubElement(sel_mode_f_elem, 'symbol') sel_mode_symbol_elem.attrib['value'] = selection_mode if attribute.separator: separator = attribute.separator.symbol else: separator = attr_model.value_separators.order_by('priority')[0].symbol if separator == ';': separator = 'coord' elif separator == ',': separator = 'concat' separator_f_elem = etree.SubElement(parent_elem, 'f') separator_f_elem.attrib['name'] = 'cooccurrence' separator_symbol_elem = etree.SubElement(separator_f_elem, 'symbol') separator_symbol_elem.attrib['value'] = separator def write_lemmas(parent_elem, attribute): lemmas = [unicode(value) for value in attribute.values.order_by('text')] lemmas_f_elem = etree.SubElement(parent_elem, 'f') lemmas_f_elem.attrib['name'] = 'lemmas' vColl_elem = etree.SubElement(lemmas_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for lemma in lemmas: string_elem = etree.SubElement(vColl_elem, 'string') string_elem.text = lemma.strip("'") def write_simple_parameter_attr(parent_elem, attribute_model, attribute): param_value = attribute.values.all()[0] write_parameter(parent_elem, attribute_model, param_value) def write_parameter(parent_elem, attribute_model, param_value): if attribute_model.use_subparams(): param_fs_elem = etree.SubElement(parent_elem, 'fs') param_fs_elem.attrib['type'] = '%s_def' % parent_elem.attrib['name'] value_f_elem = etree.SubElement(param_fs_elem, 'f') value_f_elem.attrib['name'] = 'conjunction' value = param_value.parameter.type.name symbol_elem = etree.SubElement(value_f_elem, 'symbol') symbol_elem.attrib['value'] = value if param_value.parameter.subparameters.exists(): write_parameter_subparameters(param_fs_elem, param_value.parameter) else: value = unicode(param_value) if attribute_model.sym_name == 'reflexive_mark': selfmark_binary = etree.SubElement(parent_elem, 'binary') if value: selfmark_binary.attrib['value'] = 'true' else: selfmark_binary.attrib['value'] = 'false' elif value: symbol_elem = etree.SubElement(parent_elem, 'symbol') symbol_elem.attrib['value'] = value def write_parameter_subparameters(parent_elem, parameter): subparams_f_elem = etree.SubElement(parent_elem, 'f') subparams_f_elem.attrib['name'] = 'constraints' vColl_elem = etree.SubElement(subparams_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for subparameter in parameter.subparameters.order_by('name'): write_subparameter(vColl_elem, subparameter) def write_subparameter(parent_elem, subparameter): symbol_elem = etree.SubElement(parent_elem, 'symbol') symbol_elem.attrib['value'] = subparameter.name def write_complex_parameter_attr(parent_elem, attribute_model, attribute): vColl_elem = etree.SubElement(parent_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for value in attribute.values.order_by('parameter__type'): write_parameter(vColl_elem, attribute_model, value) def write_simple_phrase_type_attr(parent_elem, attribute): write_phrase(parent_elem, attribute.values.all()[0].argument, None) def write_complex_phrase_type_attr(parent_elem, attribute): selection_mode = attribute.selection_mode if selection_mode.sym_name == 'list': phrases = [value.argument for value in attribute.values.all()] write_phrases_set(parent_elem, phrases) else: complex_phrase_fs_elem = etree.SubElement(parent_elem, 'fs') complex_phrase_fs_elem.attrib['type'] = '%s_def' % parent_elem.attrib['name'] write_typed_phrase_attr(complex_phrase_fs_elem, attribute) def write_phrases_set(parent_elem, phrases): vColl_elem = etree.SubElement(parent_elem, 'vColl') vColl_elem.attrib['org'] = 'set' sorted_phrases = sortArguments(phrases) for phrase in sorted_phrases: write_phrase(vColl_elem, phrase, None) def write_typed_phrase_attr(parent_elem, attribute): selection_mode = attribute.selection_mode type_f_elem = etree.SubElement(parent_elem, 'f') type_f_elem.attrib['name'] = 'name' symbol_elem = etree.SubElement(type_f_elem, 'symbol') symbol_elem.attrib['value'] = selection_mode.name if attribute.values.exists(): phrases_f_elem = etree.SubElement(parent_elem, 'f') phrases_f_elem.attrib['name'] = 'constraints' phrases = [value.argument for value in attribute.values.all()] write_phrases_set(phrases_f_elem, phrases) def write_complex_position_attr(parent_elem, attribute): complex_positions_fs_elem = etree.SubElement(parent_elem, 'fs') complex_positions_fs_elem.attrib['type'] = '%s_def' % parent_elem.attrib['name'] selection_mode = attribute.selection_mode type_f_elem = etree.SubElement(complex_positions_fs_elem, 'f') type_f_elem.attrib['name'] = 'type' symbol_elem = etree.SubElement(type_f_elem, 'symbol') symbol_elem.attrib['value'] = selection_mode.name if attribute.values.exists(): positions_f_elem = etree.SubElement(complex_positions_fs_elem, 'f') positions_f_elem.attrib['name'] = 'positions' vColl_elem = etree.SubElement(positions_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' positions = [value.position for value in attribute.values.all()] sorted_positions = sort_positions(positions) for position in sorted_positions: write_position_elem(vColl_elem, None, position) def write_examples_layer(parent_elem, lemma): examples_layer_elem = etree.SubElement(parent_elem, 'fs') examples_layer_elem.attrib['type'] = 'examples_layer' examples_f_elem = etree.SubElement(examples_layer_elem, 'f') examples_f_elem.attrib['name'] = 'examples' vColl_elem = etree.SubElement(examples_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' write_examples_feature(vColl_elem, lemma) def write_examples_feature(parent_elem, lemma): entry = lemma.entry_obj for example in lemma.nkjp_examples.order_by('opinion__priority').all(): write_example(parent_elem, entry, example) for example in lemma.lemma_nkjp_examples.order_by('opinion__priority').all(): write_example(parent_elem, entry, example) def write_example(parent_elem, entry, example): example_xml_id = u'wal_%s.%s-exm' % (str(entry.id), str(example.id)) example_fs_elem = etree.SubElement(parent_elem, 'fs') example_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = example_xml_id example_fs_elem.attrib['type'] = 'example' get_and_write_meaning_link(example_fs_elem, entry, example) write_phrases_links(example_fs_elem, entry, example) sentence_f_elem = etree.SubElement(example_fs_elem, 'f') sentence_f_elem.attrib['name'] = 'sentence' sentence_content_elem = etree.SubElement(sentence_f_elem, 'string') sentence_content_elem.text = escape(example.sentence) # trzeba do zrodel dodac nazwy symboliczne source_f_elem = etree.SubElement(example_fs_elem, 'f') source_f_elem.attrib['name'] = 'source' source_symbol_elem = etree.SubElement(source_f_elem, 'symbol') source_symbol_elem.attrib['value'] = example.source.sym_name opinion_f_elem = etree.SubElement(example_fs_elem, 'f') opinion_f_elem.attrib['name'] = 'opinion' opinion_symbol_elem = etree.SubElement(opinion_f_elem, 'symbol') opinion_symbol_elem.attrib['value'] = example.opinion.opinion if example.comment: note_f_elem = etree.SubElement(example_fs_elem, 'f') note_f_elem.attrib['name'] = 'note' note_content_elem = etree.SubElement(note_f_elem, 'string') note_content_elem.text = escape(example.comment) def get_and_write_meaning_link(parent_elem, entry, example): try: lex_unit_example = LexicalUnitExamples.objects.get(example=example, lexical_unit__base=entry.name) meaning = lex_unit_example.lexical_unit meaning_xml_id = u'#wal_%s.%s-mng' % (str(entry.id), str(meaning.id)) meaning_f_elem = etree.SubElement(parent_elem, 'f') meaning_f_elem.attrib['name'] = 'meaning' meaning_link_elem = etree.SubElement(meaning_f_elem, 'fs') meaning_link_elem.attrib['sameAs'] = meaning_xml_id meaning_link_elem.attrib['type'] = 'lexical_unit' except LexicalUnitExamples.DoesNotExist: pass def write_phrases_links(parent_elem, entry, example): phrases_f_elem = etree.SubElement(parent_elem, 'f') phrases_f_elem.attrib['name'] = 'phrases' vColl_elem = etree.SubElement(phrases_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for phrase_selection in example.arguments.all(): create_and_write_phrase_link(vColl_elem, entry, example, phrase_selection) def create_and_write_phrase_link(parent_elem, entry, example, phrase_selection): link_base = u'#wal_%d.%d.%d.' % (entry.id, example.frame.id, phrase_selection.position.id) for phrase in phrase_selection.arguments.all(): link_end = u'%d-phr' % phrase.id link = link_base + link_end phrase_link_elem = etree.SubElement(parent_elem, 'fs') phrase_link_elem.attrib['sameAs'] = link phrase_link_elem.attrib['type'] = 'phrase' def write_semantic_layer(parent_elem, lemma): semantic_layer_elem = etree.SubElement(parent_elem, 'fs') semantic_layer_elem.attrib['type'] = 'semantic_layer' frames_f_elem = etree.SubElement(semantic_layer_elem, 'f') frames_f_elem.attrib['name'] = 'frames' vColl_elem = etree.SubElement(frames_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' write_frames(vColl_elem, lemma) def write_frames(parent_elem, lemma): entry = lemma.entry_obj frames = entry.actual_frames() for frame in frames: write_frame_fs(parent_elem, entry, frame) def write_frame_fs(parent_elem, entry, frame): frame_xml_id = u'wal_%d.%d-frm' % (entry.id, frame.id) frame_fs_elem = etree.SubElement(parent_elem, 'fs') frame_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = frame_xml_id frame_fs_elem.attrib['type'] = 'frame' write_frame_meanings(frame_fs_elem, entry, frame) write_frame_arguments(frame_fs_elem, entry, frame) def write_frame_meanings(parent_elem, entry, frame): meanings_f_elem = etree.SubElement(parent_elem, 'f') meanings_f_elem.attrib['name'] = 'meanings' vColl_elem = etree.SubElement(meanings_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for meaning in frame.lexical_units.all(): write_frame_meaning_link(vColl_elem, entry, meaning) def write_frame_meaning_link(parent_elem, entry, meaning): link = u'#wal_%d.%d-mng' % (entry.id, meaning.id) lex_unit_link_elem = etree.SubElement(parent_elem, 'fs') lex_unit_link_elem.attrib['sameAs'] = link lex_unit_link_elem.attrib['type'] = 'lexical_unit' def write_frame_arguments(parent_elem, entry, frame): arguments_f_elem = etree.SubElement(parent_elem, 'f') arguments_f_elem.attrib['name'] = 'arguments' vColl_elem = etree.SubElement(arguments_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for arg in frame.complements.all(): write_frame_argument(vColl_elem, entry, frame, arg) def write_frame_argument(parent_elem, entry, frame, arg): arg_base_id = u'wal_%d.%d' % (entry.id, frame.id) arg_xml_id = arg_base_id + u'.%d-arg' % arg.id argument_fs_elem = etree.SubElement(parent_elem, 'fs') argument_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = arg_xml_id argument_fs_elem.attrib['type'] = 'argument' write_roles(argument_fs_elem, arg) write_selective_preferences(argument_fs_elem, arg, arg_base_id) def write_roles(parent_elem, arg): for role in arg.roles.order_by('gradient'): if role.gradient: attribute_f_elem = etree.SubElement(parent_elem, 'f') attribute_f_elem.attrib['name'] = 'role_attribute' attribute_symbol_elem = etree.SubElement(attribute_f_elem, 'symbol') attribute_symbol_elem.attrib['value'] = unicode(role) else: role_f_elem = etree.SubElement(parent_elem, 'f') role_f_elem.attrib['name'] = 'role' role_symbol_elem = etree.SubElement(role_f_elem, 'symbol') role_symbol_elem.attrib['value'] = unicode(role) def write_selective_preferences(parent_elem, arg, arg_base_id): if(arg.selective_preference): sel_prefs_f_elem = etree.SubElement(parent_elem, 'f') sel_prefs_f_elem.attrib['name'] = 'sel_prefs' sel_prefs_groups_fs_elem = etree.SubElement(sel_prefs_f_elem, 'fs') sel_prefs_groups_fs_elem.attrib['type'] = 'sel_prefs_groups' write_synsets_sel_prefs(sel_prefs_groups_fs_elem, arg) write_predefined_sel_prefs(sel_prefs_groups_fs_elem, arg) write_relation_sel_prefs(sel_prefs_groups_fs_elem, arg, arg_base_id) write_synset_relation_sel_prefs(sel_prefs_groups_fs_elem, arg) def write_synsets_sel_prefs(parent_elem, arg): synsets = arg.selective_preference.synsets if synsets.exists(): synsets_f_elem = etree.SubElement(parent_elem, 'f') synsets_f_elem.attrib['name'] = 'synsets' vColl_elem = etree.SubElement(synsets_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for synset in synsets.all(): write_synset(vColl_elem, synset) def write_synset(parent_elem, synset): id_numeric_elem = etree.SubElement(parent_elem, 'numeric') id_numeric_elem.attrib['value'] = str(synset.id) def write_predefined_sel_prefs(parent_elem, arg): generals = arg.selective_preference.generals if generals.exists(): predefs_f_elem = etree.SubElement(parent_elem, 'f') predefs_f_elem.attrib['name'] = 'predefs' vColl_elem = etree.SubElement(predefs_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for predef in generals.all(): write_predef(vColl_elem, predef) def write_predef(parent_elem, predef): name_symbol_elem = etree.SubElement(parent_elem, 'symbol') name_symbol_elem.attrib['value'] = predef.name def write_relation_sel_prefs(parent_elem, arg, arg_base_id): relations = arg.selective_preference.relations if relations.exists(): relations_f_elem = etree.SubElement(parent_elem, 'f') relations_f_elem.attrib['name'] = 'relations' vColl_elem = etree.SubElement(relations_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for relation in relations.all(): write_relation(vColl_elem, relation, arg_base_id) def write_relation(parent_elem, relation, arg_base_id): relation_fs_elem = etree.SubElement(parent_elem, 'fs') relation_fs_elem.attrib['type'] = 'relation' relation_f_elem = etree.SubElement(relation_fs_elem, 'f') relation_f_elem.attrib['name'] = 'type' type_symbol_elem = etree.SubElement(relation_f_elem, 'symbol') type_symbol_elem.attrib['value'] = relation.relation.name to_f_elem = etree.SubElement(relation_fs_elem, 'f') to_f_elem.attrib['name'] = 'to' to_xml_link = '#%s.%d-arg' % (arg_base_id, relation.to.id) arg_link_elem = etree.SubElement(to_f_elem, 'fs') arg_link_elem.attrib['sameAs'] = to_xml_link arg_link_elem.attrib['type'] = 'argument' def write_synset_relation_sel_prefs(parent_elem, arg): relations = arg.selective_preference.synset_relations if relations.exists(): relations_f_elem = etree.SubElement(parent_elem, 'f') relations_f_elem.attrib['name'] = 'synset_relations' vColl_elem = etree.SubElement(relations_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for relation in relations.all(): write_synset_relation(vColl_elem, relation) def write_synset_relation(parent_elem, relation): relation_fs_elem = etree.SubElement(parent_elem, 'fs') relation_fs_elem.attrib['type'] = 'synset_relation' relation_f_elem = etree.SubElement(relation_fs_elem, 'f') relation_f_elem.attrib['name'] = 'type' type_symbol_elem = etree.SubElement(relation_f_elem, 'symbol') type_symbol_elem.attrib['value'] = relation.relation.name to_f_elem = etree.SubElement(relation_fs_elem, 'f') to_f_elem.attrib['name'] = 'to' write_synset(to_f_elem, relation.to) def write_meanings_layer(parent_elem, lemma): meanings_layer_elem = etree.SubElement(parent_elem, 'fs') meanings_layer_elem.attrib['type'] = 'meanings_layer' meanings_f_elem = etree.SubElement(meanings_layer_elem, 'f') meanings_f_elem.attrib['name'] = 'meanings' vColl_elem = etree.SubElement(meanings_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' write_meanings(vColl_elem, lemma) def write_meanings(parent_elem, lemma): entry = lemma.entry_obj lex_units = entry.lexical_units() for lex_unit in lex_units.all(): write_meaning(parent_elem, entry, lex_unit) def write_meaning(parent_elem, entry, lex_unit): meaning_xml_id = u'wal_%d.%d-mng' % (entry.id, lex_unit.id) meaning_fs_elem = etree.SubElement(parent_elem, 'fs') meaning_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = meaning_xml_id meaning_fs_elem.attrib['type'] = 'lexical_unit' name_f_elem = etree.SubElement(meaning_fs_elem, 'f') name_f_elem.attrib['name'] = 'name' name_content_elem = etree.SubElement(name_f_elem, 'string') name_content_elem.text = lex_unit.base variant_f_elem = etree.SubElement(meaning_fs_elem, 'f') variant_f_elem.attrib['name'] = 'variant' variant_string_elem = etree.SubElement(variant_f_elem, 'string') variant_string_elem.text = lex_unit.sense plwnluid_f_elem = etree.SubElement(meaning_fs_elem, 'f') plwnluid_f_elem.attrib['name'] = 'plwnluid' plwnluid_numeric_elem = etree.SubElement(plwnluid_f_elem, 'numeric') plwnluid_numeric_elem.attrib['value'] = str(lex_unit.luid) if lex_unit.glossa: gloss_f_elem = etree.SubElement(meaning_fs_elem, 'f') gloss_f_elem.attrib['name'] = 'gloss' gloss_content_elem = etree.SubElement(gloss_f_elem, 'string') gloss_content_elem.text = lex_unit.glossa def write_connections_layer(parent_elem, lemma): connections_layer_elem = etree.SubElement(parent_elem, 'fs') connections_layer_elem.attrib['type'] = 'connections_layer' alternations_f_elem = etree.SubElement(connections_layer_elem, 'f') alternations_f_elem.attrib['name'] = 'alternations' vColl_elem = etree.SubElement(alternations_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' write_alternations(vColl_elem, lemma) def write_alternations(parent_elem, lemma): entry = lemma.entry_obj frames = entry.actual_frames() for schema in lemma.frames.all(): for frame in frames: matching_complements = frame.complements.filter(realizations__frame=schema).distinct() if matching_complements.filter(realizations__alternation=1).exists(): alternation_fs_elem = etree.SubElement(parent_elem, 'fs') alternation_fs_elem.attrib['type'] = 'alternation' connections_f_elem = etree.SubElement(alternation_fs_elem, 'f') connections_f_elem.attrib['name'] = 'connections' vColl_elem = etree.SubElement(connections_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for arg in frame.complements.all(): alt_realizations = arg.realizations.filter(frame=schema, alternation=1) if alt_realizations.exists(): write_connection(vColl_elem, entry, frame, arg, alt_realizations) if matching_complements.filter(realizations__alternation=2).exists(): alternation_fs_elem = etree.SubElement(parent_elem, 'fs') alternation_fs_elem.attrib['type'] = 'alternation' connections_f_elem = etree.SubElement(alternation_fs_elem, 'f') connections_f_elem.attrib['name'] = 'connections' vColl_elem = etree.SubElement(connections_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for arg in frame.complements.all(): alt_realizations = arg.realizations.filter(frame=schema, alternation=2) if alt_realizations.exists(): write_connection(vColl_elem, entry, frame, arg, alt_realizations) def write_connection(parent_elem, entry, frame, arg, realizations): connection_fs_elem = etree.SubElement(parent_elem, 'fs') connection_fs_elem.attrib['type'] = 'connection' write_argument(connection_fs_elem, entry, frame, arg) write_phrases(connection_fs_elem, entry, realizations) def write_argument(parent_elem, entry, frame, arg): arg_f_elem = etree.SubElement(parent_elem, 'f') arg_f_elem.attrib['name'] = 'argument' arg_link = u'#wal_%d.%d.%d-arg' % (entry.id, frame.id, arg.id) arg_link_fs_elem = etree.SubElement(arg_f_elem, 'fs') arg_link_fs_elem.attrib['sameAs'] = arg_link arg_link_fs_elem.attrib['type'] = 'argument' def write_phrases(parent_elem, entry, realizations): phrases_f_elem = etree.SubElement(parent_elem, 'f') phrases_f_elem.attrib['name'] = 'phrases' vColl_elem = etree.SubElement(phrases_f_elem, 'vColl') vColl_elem.attrib['org'] = 'set' for realization in realizations: phrase_xml_link = u'#wal_%d.%d.%d.%d-phr' % (entry.id, realization.frame.id, realization.position.id, realization.argument.id) phrase_link_elem = etree.SubElement(vColl_elem, 'fs') phrase_link_elem.attrib['sameAs'] = phrase_xml_link phrase_link_elem.attrib['type'] = 'phrase' def writefsdecl(outfile): ''' Write feature structures declarations ''' outfile.write(u' <encodingDesc>\n') outfile.write(u' <fsdDecl>\n') # syntacticBahaviour fs declaration outfile.write(u' <fsDecl type="syntacticBehaviour">\n') outfile.write(u' <fsDescr>Describes syntactic behaviour of entry</fsDescr>\n') outfile.write(u' <fDecl name="frames">\n') outfile.write(u' <fDescr>syntactic frames</fDescr>\n') outfile.write(u' <vRange>\n') outfile.write(u' <vColl org="list">\n') outfile.write(u' <fs type="frame"/>\n') outfile.write(u' </vColl>\n') outfile.write(u' </vRange>\n') outfile.write(u' </fDecl>\n') outfile.write(u' </fsDecl>\n') # frame fs declaration outfile.write(u' <fsDecl type="frame">\n') outfile.write(u' <fsDescr>Describes syntactic frame</fsDescr>\n') # frame opinion outfile.write(u' <fDecl name="opinion">\n') outfile.write(u' <fDescr>frame opinion</fDescr>\n') outfile.write(u' <vRange>\n') outfile.write(u' <vAlt>\n') for alt in Frame_Opinion_Value.objects.order_by('priority'): outfile.write(u' <symbol value="%s"/>\n' % alt.short) outfile.write(u' </vAlt>\n') outfile.write(u' </vRange>\n') outfile.write(u' </fDecl>\n') # reflex outfile.write(u' <fDecl name="reflex">\n') outfile.write(u' <fDescr>frame reflexivity</fDescr>\n') outfile.write(u' <vRange>\n') outfile.write(u' <vAlt>\n') outfile.write(u' <binary value="true"/>\n') outfile.write(u' <binary value="false"/>\n') outfile.write(u' </vAlt>\n') outfile.write(u' </vRange>\n') outfile.write(u' </fDecl>\n') # aspect outfile.write(u' <fDecl name="aspect">\n') outfile.write(u' <fDescr>frame aspect</fDescr>\n') outfile.write(u' <vRange>\n') outfile.write(u' <vAlt>\n') aspect_obj = Frame_Char_Model.objects.get(model_name=u'ASPEKT') for alt in aspect_obj.frame_char_values.order_by('priority'): outfile.write(u' <symbol value="%s"/>\n' % alt.value) outfile.write(u' </vAlt>\n') outfile.write(u' </vRange>\n') outfile.write(u' </fDecl>\n') # negatywnosc outfile.write(u' <fDecl name="negativity">\n') outfile.write(u' <fDescr>frame negativity</fDescr>\n') outfile.write(u' <vRange>\n') outfile.write(u' <vAlt>\n') aspect_obj = Frame_Char_Model.objects.get(model_name=u'NEGATYWNOŚĆ') for alt in aspect_obj.frame_char_values.order_by('priority'): outfile.write(u' <symbol value="%s"/>\n' % alt.value) outfile.write(u' </vAlt>\n') outfile.write(u' </vRange>\n') outfile.write(u' </fDecl>\n') # predykatywnosc outfile.write(u' <fDecl name="predicativity">\n') outfile.write(u' <fDescr>frame predicativity</fDescr>\n') outfile.write(u' <vRange>\n') outfile.write(u' <vAlt>\n') aspect_obj = Frame_Char_Model.objects.get(model_name=u'PREDYKATYWNOŚĆ') for alt in aspect_obj.frame_char_values.order_by('priority'): outfile.write(u' <symbol value="%s"/>\n' % alt.value) outfile.write(u' </vAlt>\n') outfile.write(u' </vRange>\n') outfile.write(u' </fDecl>\n') # positions outfile.write(u' <fDecl name="positions">\n') outfile.write(u' <fDescr>syntactic positions</fDescr>\n') outfile.write(u' <vRange>\n') outfile.write(u' <vColl org="list">\n') outfile.write(u' <fs type="position"/>\n') outfile.write(u' </vColl>\n') outfile.write(u' </vRange>\n') outfile.write(u' </fDecl>\n') outfile.write(u' </fsDecl>\n') # position fs declaration outfile.write(u' <fsDecl type="position">\n') outfile.write(u' <fsDescr>Describes syntactic position</fsDescr>\n') # position category outfile.write(u' <fDecl name="category">\n') outfile.write(u' <fDescr>position category</fDescr>\n') outfile.write(u' <vRange>\n') outfile.write(u' <vAlt>\n') for alt in PositionCategory.objects.filter(control=False).order_by('priority'): outfile.write(u' <symbol value="%s"/>\n' % alt.category) outfile.write(u' </vAlt>\n') outfile.write(u' </vRange>\n') outfile.write(u' </fDecl>\n') # position control outfile.write(u' <fDecl name="control">\n') outfile.write(u' <fDescr>position category</fDescr>\n') outfile.write(u' <vRange>\n') outfile.write(u' <vAlt>\n') for alt in PositionCategory.objects.filter(control=True).order_by('priority'): outfile.write(u' <symbol value="%s"/>\n' % alt.category) outfile.write(u' </vAlt>\n') outfile.write(u' </vRange>\n') outfile.write(u' </fDecl>\n') # arguments outfile.write(u' <fDecl name="arguments">\n') outfile.write(u' <fDescr>syntactic arguments</fDescr>\n') outfile.write(u' <vRange>\n') outfile.write(u' <vColl org="list">\n') outfile.write(u' <fs type="argument"/>\n') outfile.write(u' </vColl>\n') outfile.write(u' </vRange>\n') outfile.write(u' </fDecl>\n') outfile.write(u' </fsDecl>\n') # argument fs declaration outfile.write(u' <fsDecl type="argument">\n') outfile.write(u' <fsDescr>Describes syntactic argument</fsDescr>\n') # position category outfile.write(u' <fDecl name="type">\n') outfile.write(u' <fDescr>type of argument</fDescr>\n') outfile.write(u' <vRange>\n') outfile.write(u' <vAlt>\n') for alt in Argument_Model.objects.order_by('priority'): outfile.write(u' <symbol value="%s"/>\n' % alt.arg_model_name) outfile.write(u' </vAlt>\n') outfile.write(u' </vRange>\n') outfile.write(u' </fDecl>\n') # attributes outfile.write(u' <fDecl name="attributes">\n') outfile.write(u' <fDescr>argument attributes</fDescr>\n') outfile.write(u' <vRange>\n') outfile.write(u' <vColl org="list">\n') outfile.write(u' <fs type="attribut"/>\n') outfile.write(u' </vColl>\n') outfile.write(u' </vRange>\n') outfile.write(u' </fDecl>\n') outfile.write(u' </fsDecl>\n') outfile.write(u' </fsdDecl>\n') outfile.write(u' </encodingDesc>\n')