Blame view

dictionary/teixml.py 41.6 KB
Bartłomiej Nitoń authored
1
2
3
4
5
#-*- coding:utf-8 -*-
from semantics.models import LexicalUnitExamples

import datetime
Bartłomiej Nitoń authored
6
from django.db.models import Count
Bartłomiej Nitoń authored
7
8
9
from lxml import etree
from xml.sax.saxutils import escape
Bartłomiej Nitoń authored
10
from dictionary.models import Argument, Atribute_Model, WalentyStat, \
Bartłomiej Nitoń authored
11
                              sortArguments, sortatributes, sortPositions, sort_positions
Bartłomiej Nitoń authored
12
13
14
15

XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'
Bartłomiej Nitoń authored
16
def createteixml(outpath, lemmas, frame_opinion_values):
Bartłomiej Nitoń authored
17
18
    root = write_root()
    write_header(root)
Bartłomiej Nitoń authored
19
20
21
22
23
    write_entries(root, lemmas, frame_opinion_values)
    with open(outpath, 'w') as output_file:
        output_file.write(etree.tostring(root, pretty_print=True, 
                                         xml_declaration=True, encoding='UTF-8', 
                                         doctype=u'<!DOCTYPE TEI SYSTEM "tei_all.dtd">'))
Bartłomiej Nitoń authored
24
25
26

def write_root():
    root = etree.Element('TEI')
Bartłomiej Nitoń authored
27
28
    root.attrib[etree.QName(XML_NAMESPACE, 'lang')] = u'pl'
    root.attrib['xmlns'] = u'http://www.tei-c.org/ns/1.0'
Bartłomiej Nitoń authored
29
30
    return root
Bartłomiej Nitoń authored
31
def write_header(root, extensions_file=False):
Bartłomiej Nitoń authored
32
33
34
35
36
    tei_header = etree.SubElement(root, 'teiHeader')
    file_desc = etree.SubElement(tei_header, 'fileDesc')

    title_stmt = etree.SubElement(file_desc, 'titleStmt')
    title = etree.SubElement(title_stmt, 'title')
Bartłomiej Nitoń authored
37
38
39
40
    if extensions_file:
        title.text = u'This file is part of Walenty: a valence dictionary of Polish (http://zil.ipipan.waw.pl/Walenty)'
    else:
        title.text = u'Walenty: a valence dictionary of Polish (http://zil.ipipan.waw.pl/Walenty)'
Bartłomiej Nitoń authored
41
Bartłomiej Nitoń authored
42
43
    publication_stmt = etree.SubElement(file_desc, 'publicationStmt')
    publisher = etree.SubElement(publication_stmt, 'publisher')
Bartłomiej Nitoń authored
44
45
    publisher.text = u'Institute of Computer Science, Polish Academy of Sciences (IPI PAN)'
Bartłomiej Nitoń authored
46
47
48
    date = etree.SubElement(publication_stmt, 'date')
    date.attrib['when'] = datetime.datetime.now().strftime('%Y-%m-%d')
Bartłomiej Nitoń authored
49
50
    write_license_elem(publication_stmt)
Bartłomiej Nitoń authored
51
52
    source_desc = etree.SubElement(file_desc, 'sourceDesc')
    p = etree.SubElement(source_desc, 'p')
Bartłomiej Nitoń authored
53
54
55
56
57
58
59
60
    p.text = u'File generated using Slowal. Mentioned tool available here: http://zil.ipipan.waw.pl/Slowal.'

def write_license_elem(parent_elem):
    availability = etree.SubElement(parent_elem, 'availability')
    licence = etree.SubElement(availability, 'licence')
    licence.attrib['target'] = u'http://creativecommons.org/licenses/by-sa/4.0/'

    p = etree.SubElement(licence, 'p')
Bartłomiej Nitoń authored
61
    p.text = u'(C) Copyright 2012–2017 by the Institute of Computer Science, Polish Academy of Sciences (IPI PAN)'
Bartłomiej Nitoń authored
62
63
64

    p = etree.SubElement(licence, 'p')
    p.text = u'This work is distributed under a CC BY-SA license: http://creativecommons.org/licenses/by-sa/4.0/'
Bartłomiej Nitoń authored
65
Bartłomiej Nitoń authored
66
67
68
69
70
71
72
73
74
75
76
77
78
79
    p = etree.SubElement(licence, 'p')
    p.text = u'Walenty is a valence dictionary of Polish developed at the Institute of Computer Science, Polish Academy of Sciences (IPI PAN). It currently contains %s schemata and %s frames for %s lemmata.' % (WalentyStat.objects.get(label=u'Łączna liczba schematów').value,
                                                                                                                                                                                                                   WalentyStat.objects.get(label=u'Łączna liczba ram semantycznych').value,
                                                                                                                                                                                                                   WalentyStat.objects.get(label=u'Łączna liczba haseł').value)

    p = etree.SubElement(licence, 'p')
    p.text = u'The original formalism of Walenty was established by Filip Skwarski, Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, Marcin Woliński, Marek Świdziński, and Magdalena Zawisławska. It has been further developed by Elżbieta Hajnicz, Agnieszka Patejuk, Adam Przepiórkowski, and Marcin Woliński. The semantic layer has been developed by Elżbieta Hajnicz and Anna Andrzejczuk.'

    p = etree.SubElement(licence, 'p')
    p.text = u'The original seed of Walenty was provided by the automatic conversion, manually reviewed by Filip Skwarski, of the verbal valence dictionary used by the Świgra2 parser (6396 schemata for 1462 lemmata), which was in turn based on SDPV, the Syntactic Dictionary of Polish Verbs by Marek Świdziński (4148 schemata for 1064 lemmata). Afterwards, Walenty has been developed independently by adding new entries, syntactic schemata, in particular phraseological ones, and semantic frames.'

    p = etree.SubElement(licence, 'p')
    p.text = u'Walenty has been edited and compiled using the Slowal tool (http://zil.ipipan.waw.pl/Slowal) created by Bartłomiej Nitoń and Tomasz Bartosiak.'
Bartłomiej Nitoń authored
80
def write_entries(root, lemmas, frame_opinion_values):
Bartłomiej Nitoń authored
81
82
83
    text = etree.SubElement(root, 'text')
    body = etree.SubElement(text, 'body')
    for lemma in lemmas:
Bartłomiej Nitoń authored
84
85
        frame_opinions = lemma.frame_opinions.filter(value__in=frame_opinion_values)
        write_entry(body, lemma, frame_opinions, frame_opinion_values)
Bartłomiej Nitoń authored
86
Bartłomiej Nitoń authored
87
def write_entry(body_elem, lemma, frame_opinions, frame_opinion_values):
Bartłomiej Nitoń authored
88
89
90
91
92
93
94
95
96
97
    entry_xml_id = 'wal_%s-ent' % str(lemma.entry_obj.id)
    entry_elem = etree.SubElement(body_elem, 'entry')
    entry_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = entry_xml_id

    form_elem = etree.SubElement(entry_elem, 'form')
    orth_elem = etree.SubElement(form_elem, 'orth')
    orth_elem.text = lemma.entry
    pos_elem = etree.SubElement(form_elem, 'pos')
    pos_elem.text = lemma.entry_obj.pos.tag
Bartłomiej Nitoń authored
98
    write_status_info(entry_elem, lemma)
Bartłomiej Nitoń authored
99
    write_syntactic_layer(entry_elem, lemma, frame_opinions, frame_opinion_values)
Bartłomiej Nitoń authored
100
    write_examples_layer(entry_elem, lemma)
Bartłomiej Nitoń authored
101
102
103
104
    if lemma.semantics_ready():
        write_semantic_layer(entry_elem, lemma)
        write_meanings_layer(entry_elem, lemma)
        write_connections_layer(entry_elem, lemma)
Bartłomiej Nitoń authored
105
106
107
108
109
110
111
112
113

def write_status_info(parent_elem, lemma):
    general_fs_elem = etree.SubElement(parent_elem, 'fs')
    general_fs_elem.attrib['type'] = 'general_info'
    status_f_elem =  etree.SubElement(general_fs_elem, 'f')
    status_f_elem.attrib['name'] = 'status'
    status_string = etree.SubElement(status_f_elem, 'string')
    status_string.text = unicode(lemma.status)
Bartłomiej Nitoń authored
114
def write_syntactic_layer(entry_elem, lemma, frame_opinions, frame_opinion_values):
Bartłomiej Nitoń authored
115
    synt_layer_fs_elem = etree.SubElement(entry_elem, 'fs')
Bartłomiej Nitoń authored
116
    synt_layer_fs_elem.attrib['type'] = 'syntactic_layer' 
Bartłomiej Nitoń authored
117
118
119
    schemata_f_elem =  etree.SubElement(synt_layer_fs_elem, 'f')
    schemata_f_elem.attrib['name'] = 'schemata'
    vColl_elem = etree.SubElement(schemata_f_elem, 'vColl')
Bartłomiej Nitoń authored
120
    vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
121
Bartłomiej Nitoń authored
122
123
124
125
    for reflex_val in lemma.get_existing_frame_char_values(u'ZWROTNOŚĆ'):
        for neg_val in lemma.get_existing_frame_char_values(u'NEGATYWNOŚĆ'):
            for pred_val in lemma.get_existing_frame_char_values(u'PREDYKATYWNOŚĆ'):
                for aspect_val in lemma.get_existing_frame_char_values(u'ASPEKT'):
Bartłomiej Nitoń authored
126
127
128
129
130
                    matchingframes = lemma.get_frames_by_char_values(reflex_val=reflex_val, 
                                                                     neg_val=neg_val, 
                                                                     pred_val=pred_val, 
                                                                     aspect_val=aspect_val).order_by('text_rep')
                    for frame in matchingframes:
Bartłomiej Nitoń authored
131
132
133
                        if not lemma.phraseology_ready() and frame.phraseologic:
                            continue
                        if (not frame_opinion_values.exists() or frame_opinions.filter(frame=frame).exists()):  
Bartłomiej Nitoń authored
134
                            write_schema(vColl_elem, frame, lemma)
Bartłomiej Nitoń authored
135
Bartłomiej Nitoń authored
136
def write_schema(parent_elem, schema, lemma):
Bartłomiej Nitoń authored
137
138
139
140
141
    schema_xml_id = 'wal_%s.%s-sch' % (str(lemma.entry_obj.id), str(schema.id))

    schema_fs_elem = etree.SubElement(parent_elem, 'fs')
    schema_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = schema_xml_id
    schema_fs_elem.attrib['type'] = 'schema'
Bartłomiej Nitoń authored
142
143
144
145
146

    # reprezentacja tekstowa
    text_rep_f_elem = etree.SubElement(schema_fs_elem, 'f')
    text_rep_f_elem.attrib['name'] = 'text_rep'
    text_rep_string = etree.SubElement(text_rep_f_elem, 'string')
Bartłomiej Nitoń authored
147
148
149
150
    text_rep = schema.get_position_spaced_text_rep()
    if schema.characteristics.filter(type=u'ZWROTNOŚĆ', value__value=u'się').exists():
        text_rep = ' ' + text_rep
    text_rep_string.text = lemma.entry_obj.name + text_rep.replace(':',': ')
Bartłomiej Nitoń authored
151
Bartłomiej Nitoń authored
152
    # opinia o schemacie
Bartłomiej Nitoń authored
153
154
155
156
157
158
159
160
161
162
    try:  
        schema_opinion = lemma.frame_opinions.filter(frame=schema).all()[0].value.short
    except IndexError:
        schema_opinion = 'unk'
    opinion_f_elem = etree.SubElement(schema_fs_elem, 'f')
    opinion_f_elem.attrib['name'] = 'opinion'
    opinion_symbol = etree.SubElement(opinion_f_elem, 'symbol')
    opinion_symbol.attrib['value'] = schema_opinion

    # zwrotnosc
Bartłomiej Nitoń authored
163
    reflex = schema.characteristics.get(type=u'ZWROTNOŚĆ')
Bartłomiej Nitoń authored
164
    selfmark_f_elem = etree.SubElement(schema_fs_elem, 'f')
Bartłomiej Nitoń authored
165
    selfmark_f_elem.attrib['name'] = 'inherent_sie'
Bartłomiej Nitoń authored
166
    selfmark_binary = etree.SubElement(selfmark_f_elem, 'binary')
Bartłomiej Nitoń authored
167
168
169
170
    if reflex.value.value:
        selfmark_binary.attrib['value'] = 'true'
    else:
        selfmark_binary.attrib['value'] = 'false'
Bartłomiej Nitoń authored
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191

    # aspekt
    aspect = schema.characteristics.get(type=u'ASPEKT').value.value
    aspect_f_elem = etree.SubElement(schema_fs_elem, 'f')
    aspect_f_elem.attrib['name'] = 'aspect'
    if aspect:
        aspect_symbol = etree.SubElement(aspect_f_elem, 'symbol')
        aspect_symbol.attrib['value'] = aspect

    # negatywnosc
    negativity = schema.characteristics.get(type=u'NEGATYWNOŚĆ').value.value
    negativity_f_elem = etree.SubElement(schema_fs_elem, 'f')
    negativity_f_elem.attrib['name'] = 'negativity'
    if negativity:
        negativity_symbol = etree.SubElement(negativity_f_elem, 'symbol')
        negativity_symbol.attrib['value'] = negativity

    # predykatywnosc     
    predicativity = schema.characteristics.get(type=u'PREDYKATYWNOŚĆ').value.value
    predicativity_f_elem = etree.SubElement(schema_fs_elem, 'f')
    predicativity_f_elem.attrib['name'] = 'predicativity'
Bartłomiej Nitoń authored
192
    predicativity_binary = etree.SubElement(predicativity_f_elem, 'binary')
Bartłomiej Nitoń authored
193
    if predicativity:
Bartłomiej Nitoń authored
194
195
196
        predicativity_binary.attrib['value'] = 'true'
    else:
        predicativity_binary.attrib['value'] = 'false'
Bartłomiej Nitoń authored
197
198

    # pozycje składniowe
Bartłomiej Nitoń authored
199
    write_positions_feature(schema, schema_xml_id, schema_fs_elem)
Bartłomiej Nitoń authored
200
Bartłomiej Nitoń authored
201
def write_positions_feature(schema, schema_xml_id, parent_elem):
Bartłomiej Nitoń authored
202
203
204
205
206
    sorted_pos_dict = sortPositions(schema.positions.all())
    if sorted_pos_dict:
        positions_f_elem = etree.SubElement(parent_elem, 'f')
        positions_f_elem.attrib['name'] = 'positions'
        vColl_elem = etree.SubElement(positions_f_elem, 'vColl')
Bartłomiej Nitoń authored
207
        vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
208
        for position in sorted_pos_dict:
Bartłomiej Nitoń authored
209
            write_position_elem(vColl_elem, schema_xml_id, position['position'])
Bartłomiej Nitoń authored
210
Bartłomiej Nitoń authored
211
212
def write_position_elem(parent_elem, schema_xml_id, position):
    position_xml_id = None
Bartłomiej Nitoń authored
213
    position_fs_elem = etree.SubElement(parent_elem, 'fs')
Bartłomiej Nitoń authored
214
215
216
    if schema_xml_id:
        position_xml_id = schema_xml_id.replace(u'-sch', '.%d-psn' % position.id)
        position_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = position_xml_id
Bartłomiej Nitoń authored
217
218
219
220
221
222
223
224
225
226
    position_fs_elem.attrib['type'] = 'position'

    functions = position.categories.filter(control=False)
    if functions.exists():
        function_f_elem = etree.SubElement(position_fs_elem, 'f')
        function_f_elem.attrib['name'] = 'function'
        function_symbol_elem = etree.SubElement(function_f_elem, 'symbol')
        function_symbol_elem.attrib['value'] = functions[0].category

    write_control_features(position_fs_elem, position)
Bartłomiej Nitoń authored
227
    write_phrases_feature(position_fs_elem, position, position_xml_id)
Bartłomiej Nitoń authored
228
229
230
231

def write_control_features(parent_elem, position):
    controls1 = position.categories.filter(control=True).exclude(category__endswith='2')
    controls2 = position.categories.filter(control=True, category__endswith='2')
Bartłomiej Nitoń authored
232
233
234
235
236
237
238
239
240
241
242
243
244
    if controls1.exists() or controls2.exists():
        control_f_elem = etree.SubElement(parent_elem, 'f')
        control_f_elem.attrib['name'] = 'control'
        vColl_elem = etree.SubElement(control_f_elem, 'vColl')
        vColl_elem.attrib['org'] = 'set'
        if controls1.exists():
            control = controls1[0].category
            control1_symbol_elem = etree.SubElement(vColl_elem, 'symbol')
            control1_symbol_elem.attrib['value'] = control
        if controls2.exists():
            control = controls2[0].category
            control2_symbol_elem = etree.SubElement(vColl_elem, 'symbol')
            control2_symbol_elem.attrib['value'] = control
Bartłomiej Nitoń authored
245
Bartłomiej Nitoń authored
246
247
def write_phrases_feature(parent_elem, position, position_xml_id):
    sorted_phrases = sortArguments(position.arguments.all())
Bartłomiej Nitoń authored
248
249
250
251
    if sorted_phrases:
        phrases_f_elem = etree.SubElement(parent_elem, 'f')
        phrases_f_elem.attrib['name'] = 'phrases'
        vColl_elem = etree.SubElement(phrases_f_elem, 'vColl')
Bartłomiej Nitoń authored
252
        vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
253
254
255
        for phrase in sorted_phrases:
            write_phrase(vColl_elem, phrase, position_xml_id)
Bartłomiej Nitoń authored
256
def write_phrase(parent_elem, phrase, position_xml_id, write_expansions_id=True):
Bartłomiej Nitoń authored
257
    phrase_fs_elem = etree.SubElement(parent_elem, 'fs')
Bartłomiej Nitoń authored
258
259
260
    if position_xml_id:
        phrase_xml_id = position_xml_id.replace(u'-psn', '.%d-phr' % phrase.id)
        phrase_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = phrase_xml_id
Bartłomiej Nitoń authored
261
    phrase_fs_elem.attrib['type'] = phrase.type
Bartłomiej Nitoń authored
262
263
    if phrase.realizations.exists() and write_expansions_id:
        write_expansions_link(phrase_fs_elem, phrase)
Bartłomiej Nitoń authored
264
    write_attributes(phrase_fs_elem, phrase)
Bartłomiej Nitoń authored
265
266
267
268
269
270
271
272

def write_expansions_link(parent_elem, phrase):
    expansions_f_elem = etree.SubElement(parent_elem, 'f')
    expansions_f_elem.attrib['name'] = 'expansions'
    expansions_link_elem = etree.SubElement(expansions_f_elem, 'fs')
    expansions_link_elem.attrib['sameAs'] = '#wal_%d-exp' % phrase.id
    #expansions_link_elem.attrib['type'] = 'phrase_type_expansions'
Bartłomiej Nitoń authored
273
274
275
def write_attributes(parent_elem, phrase):
    attributes = sortatributes(phrase)
    for attribute in attributes:
Bartłomiej Nitoń authored
276
        write_attribute(parent_elem, attribute)
Bartłomiej Nitoń authored
277
Bartłomiej Nitoń authored
278
def write_attribute(parent_elem, attribute):
Bartłomiej Nitoń authored
279
280
281
    attribute_model = Atribute_Model.objects.get(atr_model_name=attribute.type)
    attr_f_elem = etree.SubElement(parent_elem, 'f')
    attr_f_elem.attrib['name'] = attribute_model.sym_name
Bartłomiej Nitoń authored
282
283
284

    attribute_type = attribute_model.type.sym_name
    selection_modes = attribute_model.values_selection_modes
Bartłomiej Nitoń authored
285
    if attribute_type == 'text' and not selection_modes.exists():
Bartłomiej Nitoń authored
286
287
        write_simple_text_attr(attr_f_elem, attribute)
    elif attribute_type == 'text' and selection_modes.exists():
Bartłomiej Nitoń authored
288
        write_complex_text_attr(attr_f_elem, attribute_model, attribute)
Bartłomiej Nitoń authored
289
    elif attribute_type == 'parameter' and not selection_modes.exists():
Bartłomiej Nitoń authored
290
        write_simple_parameter_attr(attr_f_elem, attribute_model, attribute)
Bartłomiej Nitoń authored
291
    elif attribute_type == 'parameter' and selection_modes.exists():
Bartłomiej Nitoń authored
292
        write_complex_parameter_attr(attr_f_elem, attribute_model, attribute)      
Bartłomiej Nitoń authored
293
294
295
296
297
298
    elif attribute_type == 'argument' and not selection_modes.exists():
        write_simple_phrase_type_attr(attr_f_elem, attribute)
    elif attribute_type == 'argument' and selection_modes.exists():
        write_complex_phrase_type_attr(attr_f_elem, attribute)
    elif attribute_type == 'position':
        write_complex_position_attr(attr_f_elem, attribute)
Bartłomiej Nitoń authored
299
300
301

def write_simple_text_attr(parent_elem, attribute):
    string_elem = etree.SubElement(parent_elem, 'string')
Bartłomiej Nitoń authored
302
    string_elem.text = unicode(attribute).strip("'")
Bartłomiej Nitoń authored
303
Bartłomiej Nitoń authored
304
def write_complex_text_attr(parent_elem, attribute_model, attribute):
Bartłomiej Nitoń authored
305
    complex_lemma_fs_elem = etree.SubElement(parent_elem, 'fs')
Bartłomiej Nitoń authored
306
307
    complex_lemma_fs_elem.attrib['type'] = '%s_def' % parent_elem.attrib['name']
    write_selection_mode_and_separator(complex_lemma_fs_elem, attribute_model, attribute)
Bartłomiej Nitoń authored
308
309
    write_lemmas(complex_lemma_fs_elem, attribute)
Bartłomiej Nitoń authored
310
311
312
313
314
def write_selection_mode_and_separator(parent_elem, attr_model, attribute):
    if attribute.selection_mode:
        selection_mode = attribute.selection_mode.name
    else:
        selection_mode = attr_model.values_selection_modes.order_by('priority')[0].name
Bartłomiej Nitoń authored
315
    sel_mode_f_elem = etree.SubElement(parent_elem, 'f')
Bartłomiej Nitoń authored
316
    sel_mode_f_elem.attrib['name'] = 'selection_mode'
Bartłomiej Nitoń authored
317
318
319
    sel_mode_symbol_elem = etree.SubElement(sel_mode_f_elem, 'symbol')
    sel_mode_symbol_elem.attrib['value'] = selection_mode
Bartłomiej Nitoń authored
320
321
322
323
324
325
326
327
    if attribute.separator:
        separator = attribute.separator.symbol
    else:
        separator = attr_model.value_separators.order_by('priority')[0].symbol
    if separator == ';':
        separator = 'coord'
    elif separator == ',':
        separator = 'concat'
Bartłomiej Nitoń authored
328
    separator_f_elem = etree.SubElement(parent_elem, 'f')
Bartłomiej Nitoń authored
329
    separator_f_elem.attrib['name'] = 'cooccurrence'
Bartłomiej Nitoń authored
330
331
332
333
    separator_symbol_elem = etree.SubElement(separator_f_elem, 'symbol')
    separator_symbol_elem.attrib['value'] = separator

def write_lemmas(parent_elem, attribute):
Bartłomiej Nitoń authored
334
    lemmas = [unicode(value) for value in attribute.values.order_by('text')]
Bartłomiej Nitoń authored
335
336
337
    lemmas_f_elem = etree.SubElement(parent_elem, 'f')
    lemmas_f_elem.attrib['name'] = 'lemmas'
    vColl_elem = etree.SubElement(lemmas_f_elem, 'vColl')
Bartłomiej Nitoń authored
338
    vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
339
340

    for lemma in lemmas:
Bartłomiej Nitoń authored
341
342
343
        string_elem = etree.SubElement(vColl_elem, 'string')
        string_elem.text = lemma.strip("'")
Bartłomiej Nitoń authored
344
def write_simple_parameter_attr(parent_elem, attribute_model, attribute):
Bartłomiej Nitoń authored
345
    param_value = attribute.values.all()[0]
Bartłomiej Nitoń authored
346
    write_parameter(parent_elem, attribute_model, param_value)
Bartłomiej Nitoń authored
347
Bartłomiej Nitoń authored
348
349
def write_parameter(parent_elem, attribute_model, param_value):
    if attribute_model.use_subparams():
Bartłomiej Nitoń authored
350
        param_fs_elem = etree.SubElement(parent_elem, 'fs')
Bartłomiej Nitoń authored
351
        param_fs_elem.attrib['type'] = '%s_def' % parent_elem.attrib['name']
Bartłomiej Nitoń authored
352
Bartłomiej Nitoń authored
353
354
355
        value_f_elem = etree.SubElement(param_fs_elem, 'f')
        value_f_elem.attrib['name'] = 'conjunction'
        value = param_value.parameter.type.name
Bartłomiej Nitoń authored
356
357
        symbol_elem = etree.SubElement(value_f_elem, 'symbol')
        symbol_elem.attrib['value'] = value
Bartłomiej Nitoń authored
358
359
        if param_value.parameter.subparameters.exists():
            write_parameter_subparameters(param_fs_elem, param_value.parameter)
Bartłomiej Nitoń authored
360
361
    else:
        value = unicode(param_value)
Bartłomiej Nitoń authored
362
        if attribute_model.sym_name == 'reflex':
Bartłomiej Nitoń authored
363
364
365
366
367
            selfmark_binary = etree.SubElement(parent_elem, 'binary')
            if value:
                selfmark_binary.attrib['value'] = 'true'
            else:
                selfmark_binary.attrib['value'] = 'false'
Bartłomiej Nitoń authored
368
369
370
        elif attribute_model.sym_name == 'complex_preposition':
            string_elem = etree.SubElement(parent_elem, 'string')
            string_elem.text = value
Bartłomiej Nitoń authored
371
        elif value:
Bartłomiej Nitoń authored
372
373
            symbol_elem = etree.SubElement(parent_elem, 'symbol')
            symbol_elem.attrib['value'] = value
Bartłomiej Nitoń authored
374
Bartłomiej Nitoń authored
375
376
def write_parameter_subparameters(parent_elem, parameter):
    subparams_f_elem = etree.SubElement(parent_elem, 'f')
Bartłomiej Nitoń authored
377
    subparams_f_elem.attrib['name'] = 'constraints'
Bartłomiej Nitoń authored
378
379
380
381
382
383
384
385
    vColl_elem = etree.SubElement(subparams_f_elem, 'vColl')
    vColl_elem.attrib['org'] = 'set'
    for subparameter in parameter.subparameters.order_by('name'):
        write_subparameter(vColl_elem, subparameter)

def write_subparameter(parent_elem, subparameter):
    symbol_elem = etree.SubElement(parent_elem, 'symbol')
    symbol_elem.attrib['value'] = subparameter.name
Bartłomiej Nitoń authored
386
Bartłomiej Nitoń authored
387
def write_complex_parameter_attr(parent_elem, attribute_model, attribute):
Bartłomiej Nitoń authored
388
389
390
    vColl_elem = etree.SubElement(parent_elem, 'vColl')
    vColl_elem.attrib['org'] = 'set'
    for value in attribute.values.order_by('parameter__type'):
Bartłomiej Nitoń authored
391
        write_parameter(vColl_elem, attribute_model, value)
Bartłomiej Nitoń authored
392
393
394
395
396
397
398
399
400
401
402

def write_simple_phrase_type_attr(parent_elem, attribute):
    write_phrase(parent_elem, attribute.values.all()[0].argument, None)

def write_complex_phrase_type_attr(parent_elem, attribute):
    selection_mode = attribute.selection_mode
    if selection_mode.sym_name == 'list':
        phrases = [value.argument for value in attribute.values.all()]
        write_phrases_set(parent_elem, phrases)
    else:
        complex_phrase_fs_elem = etree.SubElement(parent_elem, 'fs')
Bartłomiej Nitoń authored
403
        complex_phrase_fs_elem.attrib['type'] = '%s_def' % parent_elem.attrib['name']
Bartłomiej Nitoń authored
404
405
406
407
408
409
410
411
412
413
414
415
        write_typed_phrase_attr(complex_phrase_fs_elem, attribute)

def write_phrases_set(parent_elem, phrases):
    vColl_elem = etree.SubElement(parent_elem, 'vColl')
    vColl_elem.attrib['org'] = 'set'
    sorted_phrases = sortArguments(phrases)
    for phrase in sorted_phrases:
        write_phrase(vColl_elem, phrase, None)

def write_typed_phrase_attr(parent_elem, attribute):
    selection_mode = attribute.selection_mode
    type_f_elem = etree.SubElement(parent_elem, 'f')
Bartłomiej Nitoń authored
416
    type_f_elem.attrib['name'] = 'name'
Bartłomiej Nitoń authored
417
418
419
420
421
    symbol_elem = etree.SubElement(type_f_elem, 'symbol')
    symbol_elem.attrib['value'] = selection_mode.name

    if attribute.values.exists():
        phrases_f_elem = etree.SubElement(parent_elem, 'f')
Bartłomiej Nitoń authored
422
        phrases_f_elem.attrib['name'] = 'constraints'
Bartłomiej Nitoń authored
423
424
425
426
427
        phrases = [value.argument for value in attribute.values.all()]
        write_phrases_set(phrases_f_elem, phrases)

def write_complex_position_attr(parent_elem, attribute):
    complex_positions_fs_elem = etree.SubElement(parent_elem, 'fs')
Bartłomiej Nitoń authored
428
    complex_positions_fs_elem.attrib['type'] = '%s_def' % parent_elem.attrib['name']
Bartłomiej Nitoń authored
429
430
431
432
433
434
435
436
437
438
439
440

    selection_mode = attribute.selection_mode
    type_f_elem = etree.SubElement(complex_positions_fs_elem, 'f')
    type_f_elem.attrib['name'] = 'type'
    symbol_elem = etree.SubElement(type_f_elem, 'symbol')
    symbol_elem.attrib['value'] = selection_mode.name

    if attribute.values.exists():
        positions_f_elem = etree.SubElement(complex_positions_fs_elem, 'f')
        positions_f_elem.attrib['name'] = 'positions'
        vColl_elem = etree.SubElement(positions_f_elem, 'vColl')
        vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
441
Bartłomiej Nitoń authored
442
443
444
445
        positions = [value.position for value in attribute.values.all()]
        sorted_positions = sort_positions(positions)
        for position in sorted_positions:
            write_position_elem(vColl_elem, None, position)
Bartłomiej Nitoń authored
446
447
448

def write_examples_layer(parent_elem, lemma):
    examples_layer_elem = etree.SubElement(parent_elem, 'fs')
Bartłomiej Nitoń authored
449
    examples_layer_elem.attrib['type'] = 'examples_layer'
Bartłomiej Nitoń authored
450
451
452
453
454

    examples_f_elem = etree.SubElement(examples_layer_elem, 'f')
    examples_f_elem.attrib['name'] = 'examples'

    vColl_elem = etree.SubElement(examples_f_elem, 'vColl')
Bartłomiej Nitoń authored
455
    vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
456
457

    write_examples_feature(vColl_elem, lemma)
Bartłomiej Nitoń authored
458
Bartłomiej Nitoń authored
459
460
def write_examples_feature(parent_elem, lemma):
    for example in lemma.nkjp_examples.order_by('opinion__priority').all():
Bartłomiej Nitoń authored
461
462
463
464
        if not lemma.phraseology_ready() and example.frame.phraseologic:
            pass
        else:
            write_example(parent_elem, lemma, example)  
Bartłomiej Nitoń authored
465
    for example in lemma.lemma_nkjp_examples.order_by('opinion__priority').all():
Bartłomiej Nitoń authored
466
        write_example(parent_elem, lemma, example)
Bartłomiej Nitoń authored
467
Bartłomiej Nitoń authored
468
469
def write_example(parent_elem, lemma, example):
    entry = lemma.entry_obj
Bartłomiej Nitoń authored
470
471
472
473
474
475
    example_xml_id = u'wal_%s.%s-exm' % (str(entry.id), str(example.id))

    example_fs_elem = etree.SubElement(parent_elem, 'fs')
    example_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = example_xml_id
    example_fs_elem.attrib['type'] = 'example'
Bartłomiej Nitoń authored
476
477
    if lemma.semantics_ready():
        get_and_write_meaning_link(example_fs_elem, entry, example)
Bartłomiej Nitoń authored
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
    write_phrases_links(example_fs_elem, entry, example)

    sentence_f_elem = etree.SubElement(example_fs_elem, 'f')
    sentence_f_elem.attrib['name'] = 'sentence'
    sentence_content_elem = etree.SubElement(sentence_f_elem, 'string')
    sentence_content_elem.text = escape(example.sentence)

    source_f_elem = etree.SubElement(example_fs_elem, 'f')
    source_f_elem.attrib['name'] = 'source'
    source_symbol_elem = etree.SubElement(source_f_elem, 'symbol')
    source_symbol_elem.attrib['value'] = example.source.sym_name

    opinion_f_elem = etree.SubElement(example_fs_elem, 'f')
    opinion_f_elem.attrib['name'] = 'opinion'
    opinion_symbol_elem = etree.SubElement(opinion_f_elem, 'symbol')
    opinion_symbol_elem.attrib['value'] = example.opinion.opinion
Bartłomiej Nitoń authored
495
496
497
498
499
    if example.comment:
        note_f_elem = etree.SubElement(example_fs_elem, 'f')
        note_f_elem.attrib['name'] = 'note'
        note_content_elem = etree.SubElement(note_f_elem, 'string')
        note_content_elem.text = escape(example.comment)
Bartłomiej Nitoń authored
500
501
502

def get_and_write_meaning_link(parent_elem, entry, example):
    try:
Bartłomiej Nitoń authored
503
        entry_lex_units = entry.meanings.all()
Bartłomiej Nitoń authored
504
        lex_unit_example = LexicalUnitExamples.objects.get(example=example,
Bartłomiej Nitoń authored
505
                                                           lexical_unit__in=entry_lex_units)
Bartłomiej Nitoń authored
506
507
508
509
510
511
512
513
        meaning = lex_unit_example.lexical_unit
        meaning_xml_id = u'#wal_%s.%s-mng' % (str(entry.id), str(meaning.id))

        meaning_f_elem = etree.SubElement(parent_elem, 'f')
        meaning_f_elem.attrib['name'] = 'meaning'

        meaning_link_elem = etree.SubElement(meaning_f_elem, 'fs')
        meaning_link_elem.attrib['sameAs'] = meaning_xml_id
Bartłomiej Nitoń authored
514
        meaning_link_elem.attrib['type'] = 'lexical_unit'
Bartłomiej Nitoń authored
515
516
517
518
519
520
521
522
    except LexicalUnitExamples.DoesNotExist:
        pass

def write_phrases_links(parent_elem, entry, example):
    phrases_f_elem = etree.SubElement(parent_elem, 'f')
    phrases_f_elem.attrib['name'] = 'phrases'

    vColl_elem = etree.SubElement(phrases_f_elem, 'vColl')
Bartłomiej Nitoń authored
523
    vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538

    for phrase_selection in example.arguments.all():
        create_and_write_phrase_link(vColl_elem, entry, example, phrase_selection)

def create_and_write_phrase_link(parent_elem, entry, example, phrase_selection):
    link_base = u'#wal_%d.%d.%d.' % (entry.id, example.frame.id, phrase_selection.position.id)
    for phrase in phrase_selection.arguments.all():
        link_end = u'%d-phr' % phrase.id
        link = link_base + link_end
        phrase_link_elem = etree.SubElement(parent_elem, 'fs')
        phrase_link_elem.attrib['sameAs'] = link
        phrase_link_elem.attrib['type'] = 'phrase'

def write_semantic_layer(parent_elem, lemma):
    semantic_layer_elem = etree.SubElement(parent_elem, 'fs')
Bartłomiej Nitoń authored
539
    semantic_layer_elem.attrib['type'] = 'semantic_layer'
Bartłomiej Nitoń authored
540
541
542
543
544

    frames_f_elem = etree.SubElement(semantic_layer_elem, 'f')
    frames_f_elem.attrib['name'] = 'frames'

    vColl_elem = etree.SubElement(frames_f_elem, 'vColl')
Bartłomiej Nitoń authored
545
    vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561

    write_frames(vColl_elem, lemma)

def write_frames(parent_elem, lemma):
    entry = lemma.entry_obj
    frames = entry.actual_frames()
    for frame in frames:
        write_frame_fs(parent_elem, entry, frame)

def write_frame_fs(parent_elem, entry, frame):
    frame_xml_id = u'wal_%d.%d-frm' % (entry.id, frame.id)

    frame_fs_elem = etree.SubElement(parent_elem, 'fs')
    frame_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = frame_xml_id
    frame_fs_elem.attrib['type'] = 'frame'
Bartłomiej Nitoń authored
562
    write_frame_opinion(frame_fs_elem, frame)
Bartłomiej Nitoń authored
563
564
565
    write_frame_meanings(frame_fs_elem, entry, frame)
    write_frame_arguments(frame_fs_elem, entry, frame)
Bartłomiej Nitoń authored
566
567
568
569
570
571
572
573
574
def write_frame_opinion(parent_elem, frame):
    frame_opinion = 'unk'
    if frame.opinion_selected():
        frame_opinion = frame.opinion.short
    opinion_f_elem = etree.SubElement(parent_elem, 'f')
    opinion_f_elem.attrib['name'] = 'opinion'
    opinion_symbol = etree.SubElement(opinion_f_elem, 'symbol')
    opinion_symbol.attrib['value'] = frame_opinion
Bartłomiej Nitoń authored
575
576
577
578
579
def write_frame_meanings(parent_elem, entry, frame):
    meanings_f_elem = etree.SubElement(parent_elem, 'f')
    meanings_f_elem.attrib['name'] = 'meanings'

    vColl_elem = etree.SubElement(meanings_f_elem, 'vColl')
Bartłomiej Nitoń authored
580
    vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
581
582
583
584
585
586
587
588

    for meaning in frame.lexical_units.all():
        write_frame_meaning_link(vColl_elem, entry, meaning)

def write_frame_meaning_link(parent_elem, entry, meaning):
    link = u'#wal_%d.%d-mng' % (entry.id, meaning.id)
    lex_unit_link_elem = etree.SubElement(parent_elem, 'fs')
    lex_unit_link_elem.attrib['sameAs'] = link
Bartłomiej Nitoń authored
589
    lex_unit_link_elem.attrib['type'] = 'lexical_unit'
Bartłomiej Nitoń authored
590
591
592
593
594
595

def write_frame_arguments(parent_elem, entry, frame):
    arguments_f_elem = etree.SubElement(parent_elem, 'f')
    arguments_f_elem.attrib['name'] = 'arguments'

    vColl_elem = etree.SubElement(arguments_f_elem, 'vColl')
Bartłomiej Nitoń authored
596
    vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615

    for arg in frame.complements.all():
        write_frame_argument(vColl_elem, entry, frame, arg)

def write_frame_argument(parent_elem, entry, frame, arg):
    arg_base_id = u'wal_%d.%d' % (entry.id, frame.id)
    arg_xml_id = arg_base_id + u'.%d-arg' % arg.id

    argument_fs_elem = etree.SubElement(parent_elem, 'fs')
    argument_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = arg_xml_id
    argument_fs_elem.attrib['type'] = 'argument'

    write_roles(argument_fs_elem, arg)
    write_selective_preferences(argument_fs_elem, arg, arg_base_id)

def write_roles(parent_elem, arg):
    for role in arg.roles.order_by('gradient'):
        if role.gradient:
            attribute_f_elem = etree.SubElement(parent_elem, 'f')
Bartłomiej Nitoń authored
616
            attribute_f_elem.attrib['name'] = 'role_attribute'
Bartłomiej Nitoń authored
617
618
619
620
621
622
623
624
625
626
627
            attribute_symbol_elem = etree.SubElement(attribute_f_elem, 'symbol')
            attribute_symbol_elem.attrib['value'] = unicode(role)
        else:
            role_f_elem = etree.SubElement(parent_elem, 'f')
            role_f_elem.attrib['name'] = 'role'
            role_symbol_elem = etree.SubElement(role_f_elem, 'symbol')
            role_symbol_elem.attrib['value'] = unicode(role)

def write_selective_preferences(parent_elem, arg, arg_base_id):
    if(arg.selective_preference):
        sel_prefs_f_elem = etree.SubElement(parent_elem, 'f')
Bartłomiej Nitoń authored
628
        sel_prefs_f_elem.attrib['name'] = 'sel_prefs'
Bartłomiej Nitoń authored
629
630

        sel_prefs_groups_fs_elem = etree.SubElement(sel_prefs_f_elem, 'fs')
Bartłomiej Nitoń authored
631
        sel_prefs_groups_fs_elem.attrib['type'] = 'sel_prefs_groups'
Bartłomiej Nitoń authored
632
633
634
635
636
637
638
639
640
641
642
643
644

        write_synsets_sel_prefs(sel_prefs_groups_fs_elem, arg)
        write_predefined_sel_prefs(sel_prefs_groups_fs_elem, arg)
        write_relation_sel_prefs(sel_prefs_groups_fs_elem, arg, arg_base_id)
        write_synset_relation_sel_prefs(sel_prefs_groups_fs_elem, arg)

def write_synsets_sel_prefs(parent_elem, arg):
    synsets = arg.selective_preference.synsets
    if synsets.exists():
        synsets_f_elem = etree.SubElement(parent_elem, 'f')
        synsets_f_elem.attrib['name'] = 'synsets'

        vColl_elem = etree.SubElement(synsets_f_elem, 'vColl')
Bartłomiej Nitoń authored
645
        vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
646
647
648
649
650

        for synset in synsets.all():
            write_synset(vColl_elem, synset)

def write_synset(parent_elem, synset):
Bartłomiej Nitoń authored
651
    id_numeric_elem = etree.SubElement(parent_elem, 'numeric')
Bartłomiej Nitoń authored
652
653
654
655
656
657
658
659
660
    id_numeric_elem.attrib['value'] = str(synset.id)

def write_predefined_sel_prefs(parent_elem, arg):
    generals = arg.selective_preference.generals
    if generals.exists():
        predefs_f_elem = etree.SubElement(parent_elem, 'f')
        predefs_f_elem.attrib['name'] = 'predefs'

        vColl_elem = etree.SubElement(predefs_f_elem, 'vColl')
Bartłomiej Nitoń authored
661
        vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
662
663
664
665
666

        for predef in generals.all():
            write_predef(vColl_elem, predef)

def write_predef(parent_elem, predef):
Bartłomiej Nitoń authored
667
    name_symbol_elem = etree.SubElement(parent_elem, 'symbol')
Bartłomiej Nitoń authored
668
669
670
671
672
673
674
675
676
    name_symbol_elem.attrib['value'] = predef.name

def write_relation_sel_prefs(parent_elem, arg, arg_base_id):
    relations = arg.selective_preference.relations
    if relations.exists():
        relations_f_elem = etree.SubElement(parent_elem, 'f')
        relations_f_elem.attrib['name'] = 'relations'

        vColl_elem = etree.SubElement(relations_f_elem, 'vColl')
Bartłomiej Nitoń authored
677
        vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
678
679
680
681
682
683
684
685
686
687
688

        for relation in relations.all():
            write_relation(vColl_elem, relation, arg_base_id)

def write_relation(parent_elem, relation, arg_base_id):
    relation_fs_elem = etree.SubElement(parent_elem, 'fs')
    relation_fs_elem.attrib['type'] = 'relation'

    relation_f_elem = etree.SubElement(relation_fs_elem, 'f')
    relation_f_elem.attrib['name'] = 'type'
Bartłomiej Nitoń authored
689
690
    type_symbol_elem = etree.SubElement(relation_f_elem, 'symbol')
    type_symbol_elem.attrib['value'] = relation.relation.name
Bartłomiej Nitoń authored
691
692
693
694
695
696
697
698
699
700
701
702
703

    to_f_elem = etree.SubElement(relation_fs_elem, 'f')
    to_f_elem.attrib['name'] = 'to'

    to_xml_link = '#%s.%d-arg' % (arg_base_id, relation.to.id)
    arg_link_elem = etree.SubElement(to_f_elem, 'fs')
    arg_link_elem.attrib['sameAs'] = to_xml_link
    arg_link_elem.attrib['type'] = 'argument'

def write_synset_relation_sel_prefs(parent_elem, arg):
    relations = arg.selective_preference.synset_relations
    if relations.exists():
        relations_f_elem = etree.SubElement(parent_elem, 'f')
Bartłomiej Nitoń authored
704
        relations_f_elem.attrib['name'] = 'synset_relations'
Bartłomiej Nitoń authored
705
706

        vColl_elem = etree.SubElement(relations_f_elem, 'vColl')
Bartłomiej Nitoń authored
707
        vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
708
709
710
711
712
713

        for relation in relations.all():
            write_synset_relation(vColl_elem, relation)

def write_synset_relation(parent_elem, relation):
    relation_fs_elem = etree.SubElement(parent_elem, 'fs')
Bartłomiej Nitoń authored
714
    relation_fs_elem.attrib['type'] = 'synset_relation'
Bartłomiej Nitoń authored
715
716
717
718

    relation_f_elem = etree.SubElement(relation_fs_elem, 'f')
    relation_f_elem.attrib['name'] = 'type'
Bartłomiej Nitoń authored
719
720
    type_symbol_elem = etree.SubElement(relation_f_elem, 'symbol')
    type_symbol_elem.attrib['value'] = relation.relation.name
Bartłomiej Nitoń authored
721
722
723
724
725
726
727

    to_f_elem = etree.SubElement(relation_fs_elem, 'f')
    to_f_elem.attrib['name'] = 'to'
    write_synset(to_f_elem, relation.to)

def write_meanings_layer(parent_elem, lemma):
    meanings_layer_elem = etree.SubElement(parent_elem, 'fs')
Bartłomiej Nitoń authored
728
    meanings_layer_elem.attrib['type'] = 'meanings_layer'
Bartłomiej Nitoń authored
729
730
731
732
733

    meanings_f_elem = etree.SubElement(meanings_layer_elem, 'f')
    meanings_f_elem.attrib['name'] = 'meanings'

    vColl_elem = etree.SubElement(meanings_f_elem, 'vColl')
Bartłomiej Nitoń authored
734
    vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
735
736
737
738
739

    write_meanings(vColl_elem, lemma)

def write_meanings(parent_elem, lemma):
    entry = lemma.entry_obj
Bartłomiej Nitoń authored
740
    for lex_unit in entry.meanings.all():
Bartłomiej Nitoń authored
741
742
743
744
745
746
747
        write_meaning(parent_elem, entry, lex_unit)

def write_meaning(parent_elem, entry, lex_unit):
    meaning_xml_id = u'wal_%d.%d-mng' % (entry.id, lex_unit.id)

    meaning_fs_elem = etree.SubElement(parent_elem, 'fs')
    meaning_fs_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = meaning_xml_id
Bartłomiej Nitoń authored
748
    meaning_fs_elem.attrib['type'] = 'lexical_unit'
Bartłomiej Nitoń authored
749
750
751
752
753
754
755
756

    name_f_elem = etree.SubElement(meaning_fs_elem, 'f')
    name_f_elem.attrib['name'] = 'name'
    name_content_elem = etree.SubElement(name_f_elem, 'string')
    name_content_elem.text = lex_unit.base

    variant_f_elem = etree.SubElement(meaning_fs_elem, 'f')
    variant_f_elem.attrib['name'] = 'variant'
Bartłomiej Nitoń authored
757
758
    variant_string_elem = etree.SubElement(variant_f_elem, 'string')
    variant_string_elem.text = lex_unit.sense
Bartłomiej Nitoń authored
759
760
761
762
763
764

    plwnluid_f_elem = etree.SubElement(meaning_fs_elem, 'f')
    plwnluid_f_elem.attrib['name'] = 'plwnluid'
    plwnluid_numeric_elem = etree.SubElement(plwnluid_f_elem, 'numeric')
    plwnluid_numeric_elem.attrib['value'] = str(lex_unit.luid)
Bartłomiej Nitoń authored
765
766
767
768
769
    if lex_unit.glossa:
        gloss_f_elem = etree.SubElement(meaning_fs_elem, 'f')
        gloss_f_elem.attrib['name'] = 'gloss'
        gloss_content_elem = etree.SubElement(gloss_f_elem, 'string')
        gloss_content_elem.text = lex_unit.glossa
Bartłomiej Nitoń authored
770
771
772

def write_connections_layer(parent_elem, lemma):
    connections_layer_elem = etree.SubElement(parent_elem, 'fs')
Bartłomiej Nitoń authored
773
    connections_layer_elem.attrib['type'] = 'connections_layer'
Bartłomiej Nitoń authored
774
775
776
777
778

    alternations_f_elem = etree.SubElement(connections_layer_elem, 'f')
    alternations_f_elem.attrib['name'] = 'alternations'

    vColl_elem = etree.SubElement(alternations_f_elem, 'vColl')
Bartłomiej Nitoń authored
779
    vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
780
781
782
783
784
785
786
787
788

    write_alternations(vColl_elem, lemma)

def write_alternations(parent_elem, lemma):
    entry = lemma.entry_obj
    frames = entry.actual_frames()
    for schema in lemma.frames.all():
        for frame in frames:
            matching_complements = frame.complements.filter(realizations__frame=schema).distinct()
Bartłomiej Nitoń authored
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
            write_alternation(parent_elem, entry, schema, frame, matching_complements, 1)
            write_alternation(parent_elem, entry, schema, frame, matching_complements, 2)

def write_alternation(parent_elem, entry, schema, frame, complements, alternation):
    alternation_compls = complements.filter(realizations__alternation=alternation)
    if alternation_compls.exists():
        first_connection = True 
        for arg in alternation_compls.all():
            alt_realizations = arg.realizations.filter(frame=schema, alternation=alternation)
            if alt_realizations.exists():
                if first_connection:
                    alternation_fs_elem = etree.SubElement(parent_elem, 'fs')
                    alternation_fs_elem.attrib['type'] = 'alternation'

                    connections_f_elem = etree.SubElement(alternation_fs_elem, 'f')
                    connections_f_elem.attrib['name'] = 'connections'

                    vColl_elem = etree.SubElement(connections_f_elem, 'vColl')
                    vColl_elem.attrib['org'] = 'set'
                    first_connection = False   
                write_connection(vColl_elem, entry, frame, arg, alt_realizations)
Bartłomiej Nitoń authored
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830

def write_connection(parent_elem, entry, frame, arg, realizations):
    connection_fs_elem = etree.SubElement(parent_elem, 'fs')
    connection_fs_elem.attrib['type'] = 'connection'

    write_argument(connection_fs_elem, entry, frame, arg)
    write_phrases(connection_fs_elem, entry, realizations)

def write_argument(parent_elem, entry, frame, arg):
    arg_f_elem = etree.SubElement(parent_elem, 'f')
    arg_f_elem.attrib['name'] = 'argument'

    arg_link = u'#wal_%d.%d.%d-arg' % (entry.id, frame.id, arg.id)
    arg_link_fs_elem = etree.SubElement(arg_f_elem, 'fs')
    arg_link_fs_elem.attrib['sameAs'] = arg_link
    arg_link_fs_elem.attrib['type'] = 'argument'

def write_phrases(parent_elem, entry, realizations): 
    phrases_f_elem = etree.SubElement(parent_elem, 'f')
    phrases_f_elem.attrib['name'] = 'phrases'
    vColl_elem = etree.SubElement(phrases_f_elem, 'vColl')
Bartłomiej Nitoń authored
831
    vColl_elem.attrib['org'] = 'set'
Bartłomiej Nitoń authored
832
833
834
835
836
837

    for realization in realizations:
        phrase_xml_link = u'#wal_%d.%d.%d.%d-phr' % (entry.id, realization.frame.id,
                                                     realization.position.id, realization.argument.id)
        phrase_link_elem = etree.SubElement(vColl_elem, 'fs')
        phrase_link_elem.attrib['sameAs'] = phrase_xml_link
Bartłomiej Nitoń authored
838
        phrase_link_elem.attrib['type'] = 'phrase'
Bartłomiej Nitoń authored
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898

############# phrase types expansions     
def write_phrase_types_expansions_in_TEI(outpath):
    root = write_root()
    write_header(root, True)
    write_expansions_entries(root)
    with open(outpath, 'w') as output_file:
        output_file.write(etree.tostring(root, pretty_print=True, 
                                         xml_declaration=True, encoding='UTF-8', 
                                         doctype=u'<!DOCTYPE TEI SYSTEM "tei_all.dtd">'))

def write_expansions_entries(root):
    phrase_types = Argument.objects.annotate(extensions_count=Count('realizations'))
    phrase_types_with_expansions = phrase_types.filter(extensions_count__gt=0)

    text = etree.SubElement(root, 'text')
    body = etree.SubElement(text, 'body')
    for phrase_type in phrase_types_with_expansions.order_by('text_rep'):
        expansions_xml_id = 'wal_%d-exp' % phrase_type.id
        expansions_elem = etree.SubElement(body, 'entry')
        expansions_elem.attrib[etree.QName(XML_NAMESPACE, 'id')] = expansions_xml_id

        write_main_phrase_type(expansions_elem, phrase_type)

def write_main_phrase_type(parent_elem, phrase_type):
    write_phrase(parent_elem, phrase_type, '', False)
    write_expansions(parent_elem, phrase_type)

def write_expansions(parent_elem, phrase_type):
    expansions_fs_elem = etree.SubElement(parent_elem, 'fs')
    expansions_fs_elem.attrib['type'] = 'phrase_type_expansions'

    expansions_f_elem =  etree.SubElement(expansions_fs_elem, 'f')
    expansions_f_elem.attrib['name'] = 'expansions'

    for expansion in phrase_type.realizations.order_by('opinion__priority',
                                                       'type__priority',
                                                       'argument__text_rep'):

        expansion_fs_elem = etree.SubElement(expansions_f_elem, 'fs')
        expansion_fs_elem.attrib['type'] = 'expansion'

        opinion_f_elem = etree.SubElement(expansion_fs_elem, 'f')
        opinion_f_elem.attrib['name'] = 'opinion'
        opinion_symbol = etree.SubElement(opinion_f_elem, 'symbol')
        opinion_symbol.attrib['value'] = unicode(expansion.opinion)

        if expansion.type.sym_name == 'positions':
            positions_f_elem = etree.SubElement(expansion_fs_elem, 'f')
            positions_f_elem.attrib['name'] = 'positions'
            vColl_elem = etree.SubElement(positions_f_elem, 'vColl')
            vColl_elem.attrib['org'] = 'set'
            for position in sort_positions(expansion.positions.all()):
                write_position_elem(vColl_elem, '', position)
        elif expansion.type.sym_name == 'phrase_type':
            phrases_f_elem = etree.SubElement(expansion_fs_elem, 'f')
            phrases_f_elem.attrib['name'] = 'phrases'
            vColl_elem = etree.SubElement(phrases_f_elem, 'vColl')
            vColl_elem.attrib['org'] = 'set' # pozostawione na przyszlosc
            write_phrase(vColl_elem, expansion.argument, '')
Bartłomiej Nitoń authored
899