Example.py
12.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#! /usr/bin/python
# -*- coding: utf-8 -*-
from collections import defaultdict
import examples.models
from connections.models import ExampleConnection, SchemaHook
from syntax.models import Schema, Position
from syntax.models_phrase import PhraseType
from semantics.models import Argument
def clean_sentence(sentence):
return sentence.replace('\r\n', ' ').replace('\n', ' ').replace('\r', ' ')
class Example:
def __init__(self, sentence, source, opinion, note, illustrated_syntax, meaning, illustrated_semantics):
self._sentence = sentence
self._source = source
self._opinion = opinion
self._note = note
self._phrases = illustrated_syntax
self._meaning = meaning
self._arguments = illustrated_semantics
@classmethod
def fromTree(cls, example_tree, phrases, entry_semantics, base, meanings, in_data, out_file, misconnected):
eid = int(example_tree._attrs['xml:id'].split('.')[1].split('-')[0])
if len(example_tree._children) == 6:
# both meaning and note
meaning = example_tree._children[0]._children[0]._attrs['sameAs'][1:]
sentence = clean_sentence(example_tree._children[2]._children[0]._content)
illustrated_syntax = IllustratesSyntax.fromTree(example_tree._children[1], phrases, base, sentence, out_file)
illustrated_semantics = IllustratesSemantics.interfere(sentence, base, meaning, illustrated_syntax, entry_semantics, meanings, eid, in_data, out_file, misconnected)
source = example_tree._children[3]._children[0]._attrs['value']
opinion = example_tree._children[4]._children[0]._attrs['value']
note = example_tree._children[5]._children[0]._content
elif len(example_tree._children) == 5 and example_tree._children[0]._attrs['name'] == 'meaning':
# meaning and no note
meaning = example_tree._children[0]._children[0]._attrs['sameAs'][1:]
sentence = clean_sentence(example_tree._children[2]._children[0]._content)
illustrated_syntax = IllustratesSyntax.fromTree(example_tree._children[1], phrases, base, sentence, out_file)
illustrated_semantics = IllustratesSemantics.interfere(sentence, base, meaning, illustrated_syntax, entry_semantics, meanings, eid, in_data, out_file, misconnected)
source = example_tree._children[3]._children[0]._attrs['value']
opinion = example_tree._children[4]._children[0]._attrs['value']
note = None
elif len(example_tree._children) == 5:
# note and no meaning
meaning = None
sentence = clean_sentence(example_tree._children[1]._children[0]._content)
illustrated_syntax = IllustratesSyntax.fromTree(example_tree._children[0], phrases, base, sentence, out_file)
illustrated_semantics = None
source = example_tree._children[2]._children[0]._attrs['value']
opinion = example_tree._children[3]._children[0]._attrs['value']
note = example_tree._children[4]._children[0]._content
elif len(example_tree._children) == 4:
# no meaning and no note
meaning = None
sentence = clean_sentence(example_tree._children[1]._children[0]._content)
illustrated_syntax = IllustratesSyntax.fromTree(example_tree._children[0], phrases, base, sentence, out_file)
illustrated_semantics = None
source = example_tree._children[2]._children[0]._attrs['value']
opinion = example_tree._children[3]._children[0]._attrs['value']
note = None
else:
print(example_tree)
raise UnknownError()
return cls(sentence, source, opinion, note, illustrated_syntax, meaning, illustrated_semantics)
def store(self, entry, meanings):
# self._phrases = illustrated_syntax
# self._meaning = meaning
# self._arguments = illustrated_semantics
opinion = examples.models.ExampleOpinion.objects.get(key=self._opinion)
source = examples.models.ExampleSource.objects.get(key=self._source)
example = examples.models.Example(entry=entry,
sentence=self._sentence,
opinion=opinion,
source=source,
note=self._note)
example.save()
if not self._phrases.exists():
print(' STORING EXAMPLE:', self._sentence)
print(' example not connected to syntax')
# example not connected to syntax
pass
elif self._arguments is None:
print(' STORING EXAMPLE:', self._sentence)
print(' example connected to syntax but not connected to semantics')
# example connected to syntax but not connected to semantics
connection = ExampleConnection(example=example)
connection.save()
subpositions = self._phrases._subpositions
subentry = subpositions[0][0]._position._schema.getSubentry(entry)
schema = Schema.objects.get(id=subpositions[0][0]._position._schema._db_id)
for subposition in subpositions:
position = Position.objects.get(id=subposition[0]._position._db_id)
for phrase_obj in subposition:
phrase = PhraseType.objects.get(text_rep=str(phrase_obj))
hook = SchemaHook(subentry=subentry,
schema=schema,
position=position,
phrase_type=phrase,
alternation=1)
hook.save()
connection.schema_connections.add(hook)
elif self._arguments.exists():
#print(' example connected to both syntax and semantics and connection can be transfered')
# example connected to both syntax and semantics
# and connection can be transfered
lemma, meaning = meanings[self._meaning]
meaning = meaning.get()
connection = ExampleConnection(example=example, lexical_unit=meaning)
connection.save()
for role_illustration in self._arguments._arguments:
if role_illustration._argument is not None:
argument = Argument.objects.get(id=role_illustration._argument._db_id)
connection.arguments.add(argument)
subposition = role_illustration._subposition
subentry = subposition[0]._position._schema.getSubentry(entry)
schema = Schema.objects.get(id=subposition[0]._position._schema._db_id)
position = Position.objects.get(id=subposition[0]._position._db_id)
for phrase_obj in subposition:
phrase = PhraseType.objects.get(text_rep=str(phrase_obj))
hook = SchemaHook(subentry=subentry,
schema=schema,
position=position,
phrase_type=phrase,
alternation=role_illustration._alternation)
hook.save()
connection.schema_connections.add(hook)
else:
print(' STORING EXAMPLE:', self._sentence)
print(' example connected to both syntax and semantics but connection cannot be transfered or connected to schema but not to any phrases in it; treated as not connected to syntax')
# example connected to both syntax and semantics
# but connection cannot be transfered
# or connected to schema but not to any phrases in it
# example is treated as not connected to syntax
pass
class IllustratesSyntax:
def __init__(self, schema_key, subpositions):
self._schema_key = schema_key
self._subpositions = subpositions
@classmethod
def fromTree(cls, tree, phrases, base, sentence, out_file):
schema_key = None
positions = defaultdict(lambda: [])
for subtree in tree._children[0]._children:
phrase_id = subtree._attrs['sameAs'][1:]
schema_key = int(phrase_id.split('.')[1])
position_key = int(phrase_id.split('.')[2])
if subtree._attrs['sameAs'][1:] not in phrases:
out_file.write('@@@ ' + base + ':\t' + sentence)
# raise UnknownError()
else:
positions[position_key].append(phrases[subtree._attrs['sameAs'][1:]])
return cls(schema_key, list(positions.values()))
def exists(self):
return self._schema_key is not None
class IllustratesSemanticRole:
def __init__(self, argument, subposition, alternation):
self._argument = argument
self._subposition = subposition
position = self._subposition[0]._position
self._subposition_str = position.subposition(self._subposition)
self._alternation = alternation
class IllustratesSemantics:
def __init__(self, frame, arguments):
self._frame = frame
self._arguments = arguments
@classmethod
def interfere(cls, sentence, base, meaning, illustrated_syntax, semantics, meanings, eid, in_data, out_file, misconnected):
arguments = []
frame, all_realizations = semantics.findFrame(meaning)
realizations = []
for realization in all_realizations:
realization_schema_key = int(realization._schema._id.split('.')[1].split('-')[0])
if realization_schema_key == illustrated_syntax._schema_key:
realizations.append(realization)
possible_arguments = defaultdict(lambda: [])
alternation = 1
for realization in realizations:
possible_args = []
loose = []
for subposition in illustrated_syntax._subpositions:
argument = realization.findMatchingArgument(subposition)
if argument is not None:
connection = IllustratesSemanticRole(argument, subposition, alternation)
possible_args.append(connection)
else:
connection = IllustratesSemanticRole(None, subposition, alternation)
loose.append(connection)
possible_arguments[len(possible_args)].append((possible_args, loose))
alternation += 1
l = list(possible_arguments)
if len(l) == 0:
#example connected to schema but not connected to any phrases there !!!
lu = meanings.locate(meaning)
misconnected.write('% ' + sentence + '\n')
misconnected.write(base + '\t' + str(lu) + '\t' + str(illustrated_syntax._schema_key) + '\t' + str(eid) + '\n\n')
# KKK return None here to attach only to schema? (e.g. awaria)
return None
else:
max_args = max(possible_arguments.keys())
if len(possible_arguments[max_args]) > 1:
lu = meanings.locate(meaning)
l1 = '% ' + sentence + '\n'
l2 = base + '\t' + str(lu) + '\t' + str(illustrated_syntax._schema_key) + '\t' + str(eid) + '\n'
l3s = []
equal = True
for role_illustrations, _ in possible_arguments[max_args]:
roles = []
for role_illustration in role_illustrations:
role_str = str(role_illustration._argument._semantic_role)
subposition_str = role_illustration._subposition_str
argument_str = role_str + ': ' + subposition_str
roles.append(argument_str)
if len(l3s) > 0:
if argument_str not in l3s[0]:
equal = False
l3s.append('\t' + '\t'.join(roles))
#if equal:
if True:
arguments = []
for role_illustrations, free in possible_arguments[max_args]:
arguments += role_illustrations
arguments += free
print(('\n'.join(l3s) + '\n'))
#else:
if not equal:
out_file.write(l1)
out_file.write(l2)
out_file.write('\n'.join(l3s) + '\n')
out_file.write('\n')
elif len(possible_arguments[max_args]) == 0:
raise UnknownError()
else:
arguments = possible_arguments[max_args][0][0] + possible_arguments[max_args][0][1]
return cls(frame, arguments)
def exists(self):
return len(self._arguments) > 0