Commit e9aff2d820b37f92e7effea41308383631aaefde
1 parent
915b67fc
przymiotnikowe modyfikatory rzeczowników + ogólne poprawki frazeologii
Showing
3 changed files
with
92 additions
and
44 deletions
semantics/phraseology_generator.py
... | ... | @@ -7,7 +7,7 @@ from copy import deepcopy |
7 | 7 | def lexicalisation(argument, subj, base, negativity, reference=None): |
8 | 8 | b = argument.type |
9 | 9 | if b == 'fixed': |
10 | - return (get_words(sortatributes(argument)[-1]), []) | |
10 | + return (get_words(sortatributes(argument)[-1]), [], 1) | |
11 | 11 | attributes = sortatributes(argument) |
12 | 12 | lexicalisation_type = attributes[0].values.all()[0].argument.type |
13 | 13 | lexicalisation_parameters = sortatributes(attributes[0].values.all()[0].argument) |
... | ... | @@ -16,34 +16,45 @@ def lexicalisation(argument, subj, base, negativity, reference=None): |
16 | 16 | lexicalisation_parameters = sortatributes(lexicalisation_parameters[0].values.all()[0].argument) |
17 | 17 | if lexicalisation_type == 'np': # np(case), number, nouns, atr |
18 | 18 | nps = get_nps(get_case(lexicalisation_parameters[0], subj, negativity), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3]) |
19 | - return (nps, get_verb(base, get_number(attributes[1], subj), subj)) | |
19 | + if subj: | |
20 | + return (nps, get_verb(base, get_number(attributes[1], subj), subj), -1) | |
21 | + else: | |
22 | + return (nps, [], 1) | |
20 | 23 | elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr |
21 | 24 | prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, negativity), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3]) |
22 | - return (prepnps, []) | |
25 | + return (prepnps, [], 1) | |
23 | 26 | elif lexicalisation_type == 'adjp': # adjp(case), number, gender, degree, adjectives, atr |
24 | 27 | adjps = get_adjps(get_case(lexicalisation_parameters[0], subj, negativity, reference), get_number(attributes[1], subj, reference), get_gender(attributes[2], reference), get_degree(attributes[3]), get_words(attributes[4]), attributes[5]) |
25 | - return (adjps, get_verb(base, get_number(attributes[1], subj), subj)) | |
28 | + if reference is None: | |
29 | + if lexicalisation_parameters[0].values.all()[0].parameter.type.name == u'agr': | |
30 | + return (adjps, [], 0) | |
31 | + else: | |
32 | + return (adjps, [], -1) | |
33 | + elif subj: | |
34 | + return (adjps, get_verb(base, get_number(attributes[1], subj), subj), -1) | |
35 | + else: | |
36 | + return (adjps, [], 1) | |
26 | 37 | elif lexicalisation_type == 'prepadjp': #prepadjp(prep, case), number, gender, degree, adjectives, atr |
27 | 38 | prepadjps = get_prepadjps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_number(attributes[1], subj, reference), get_gender(attributes[2], reference), get_degree(attributes[3]), get_words(attributes[4]), attributes[5]) |
28 | - return (prepadjps, []) | |
39 | + return (prepadjps, [], 1) | |
29 | 40 | elif lexicalisation_type == 'infp': |
30 | 41 | infps = get_infps(get_aspect(lexicalisation_parameters[0]), get_words(attributes[2]), attributes[4]) |
31 | - return (infps, []) | |
42 | + return (infps, [], 1) | |
32 | 43 | elif lexicalisation_type == 'advp': #advp(type), degree, adverb, atr |
33 | 44 | advps = get_advps(get_degree(attributes[1]), get_words(attributes[2]), attributes[3]) |
34 | - return (advps, [base]) | |
45 | + return (advps, [], -1) | |
35 | 46 | elif lexicalisation_type == 'nump': # nump(case), num, noun, atr |
36 | 47 | numps = get_numps(get_case(lexicalisation_parameters[0], subj, negativity, reference), get_words(attributes[1]), get_words(attributes[2]), attributes[3]) |
37 | - return (numps, get_verb(base, 'pl', subj)) | |
48 | + return (numps, get_verb(base, 'pl', subj), -1) | |
38 | 49 | elif lexicalisation_type == 'prepnump': # prepnump(prep,case), num, noun, atr |
39 | - numps = get_prepnumps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_words(attributes[1]), get_words(attributes[2]), attributes[3]) | |
40 | - return (numps, []) #get_verb(base, 'pl', subj)) | |
50 | + prepnumps = get_prepnumps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_words(attributes[1]), get_words(attributes[2]), attributes[3]) | |
51 | + return (prepnumps, [], 1) | |
41 | 52 | elif lexicalisation_type == 'qub': # qub, form, atr |
42 | 53 | qubs = get_qubs(get_words(attributes[1]), attributes[2]) |
43 | - return (qubs, [base]) | |
54 | + return (qubs, [], -1) | |
44 | 55 | else: |
45 | - return ([], []) | |
46 | - return ([], []) | |
56 | + return ([], [], 0) | |
57 | + return ([], [], 0) | |
47 | 58 | |
48 | 59 | def is_subj(categories): |
49 | 60 | for cat in categories: |
... | ... | @@ -82,6 +93,8 @@ def get_case(attribute, is_subj, negativity, reference=None): |
82 | 93 | case = [tag.split(':')[1]] |
83 | 94 | else: |
84 | 95 | case = [tag.split(':')[2]] |
96 | + elif case == u'agr' and reference is None: | |
97 | + case = [u'nom'] | |
85 | 98 | else: |
86 | 99 | case = [case] |
87 | 100 | return case |
... | ... | @@ -125,6 +138,12 @@ def get_degree(attribute): |
125 | 138 | degree = u'pos' |
126 | 139 | return degree |
127 | 140 | |
141 | +def in_tag(what, tag): | |
142 | + if u':' + what + u':' in tag or u':' + what + u'.' in tag or u'.' + what + u'.' in tag or u'.' + what + u':' in tag or tag.endswith(u'.' + what) or tag.endswith(u':' + what): | |
143 | + return True | |
144 | + else: | |
145 | + return False | |
146 | + | |
128 | 147 | def get_nps(cases, number, nouns, atr): |
129 | 148 | result = [] |
130 | 149 | for noun in nouns: |
... | ... | @@ -135,7 +154,7 @@ def get_nps(cases, number, nouns, atr): |
135 | 154 | filtered = [] |
136 | 155 | for option in options: |
137 | 156 | (orth, tag) = option |
138 | - if u':' + case in tag or u'.' + case in tag: | |
157 | + if in_tag(case, tag): | |
139 | 158 | filtered.append(option) |
140 | 159 | options_temp += filtered |
141 | 160 | else: |
... | ... | @@ -145,7 +164,7 @@ def get_nps(cases, number, nouns, atr): |
145 | 164 | filtered = [] |
146 | 165 | for option in options: |
147 | 166 | (orth, tag) = option |
148 | - if u':' + number + u':' in tag: | |
167 | + if in_tag(number, tag): | |
149 | 168 | filtered.append(option) |
150 | 169 | options = filtered |
151 | 170 | result += options |
... | ... | @@ -168,7 +187,7 @@ def get_infps(aspect, verbs, atr): |
168 | 187 | if aspect != u'_': |
169 | 188 | for option in options: |
170 | 189 | (orth, tag) = option |
171 | - if u':' + aspect + u':' in tag: | |
190 | + if in_tag(aspect, tag): | |
172 | 191 | filtered.append(option) |
173 | 192 | options = filtered |
174 | 193 | result += options |
... | ... | @@ -190,7 +209,7 @@ def get_adjps(cases, number, gender, degree, adjectives, atr): |
190 | 209 | filtered = [] |
191 | 210 | for option in options: |
192 | 211 | (orth, tag) = option |
193 | - if u':' + case + u':' in tag: | |
212 | + if in_tag(case, tag): | |
194 | 213 | filtered.append(option) |
195 | 214 | options_temp += filtered |
196 | 215 | else: |
... | ... | @@ -200,21 +219,21 @@ def get_adjps(cases, number, gender, degree, adjectives, atr): |
200 | 219 | filtered = [] |
201 | 220 | for option in options: |
202 | 221 | (orth, tag) = option |
203 | - if u':' + number + u':' in tag: | |
222 | + if in_tag(number, tag): | |
204 | 223 | filtered.append(option) |
205 | 224 | options = filtered |
206 | 225 | if gender != u'_': |
207 | 226 | filtered = [] |
208 | 227 | for option in options: |
209 | 228 | (orth, tag) = option |
210 | - if u':' + gender + u':' in tag or u'.' + gender + u':' in tag or u':' + gender + u'.' in tag or u'.' + gender + u'.' in tag or u'.' + gender + u':' in tag: | |
229 | + if in_tag(gender, tag): | |
211 | 230 | filtered.append(option) |
212 | 231 | options = filtered |
213 | 232 | if degree != u'_': |
214 | 233 | filtered = [] |
215 | 234 | for option in options: |
216 | 235 | (orth, tag) = option |
217 | - if u':' + degree in tag: | |
236 | + if in_tag(degree, tag): | |
218 | 237 | filtered.append(option) |
219 | 238 | options = filtered |
220 | 239 | result += options |
... | ... | @@ -231,14 +250,14 @@ def get_advps(degree, adverbs, atr): |
231 | 250 | filtered = [] |
232 | 251 | for option in options: |
233 | 252 | (orth, tag) = option |
234 | - if u'adv' in tag: | |
253 | + if tag.startswith(u'adv'): | |
235 | 254 | filtered.append(option) |
236 | 255 | options = filtered |
237 | 256 | if ':' in tag and degree != u'_': |
238 | 257 | filtered = [] |
239 | 258 | for option in options: |
240 | 259 | (orth, tag) = option |
241 | - if u':' + degree in tag: | |
260 | + if in_tag(degree, tag): | |
242 | 261 | filtered.append(option) |
243 | 262 | options = filtered |
244 | 263 | result += options |
... | ... | @@ -251,7 +270,7 @@ def get_qubs(qubs, atr): |
251 | 270 | filtered = [] |
252 | 271 | for option in options: |
253 | 272 | (orth, tag) = option |
254 | - if u'qub' in tag: | |
273 | + if tag.startswith(u'qub'): | |
255 | 274 | filtered.append(option) |
256 | 275 | options = filtered |
257 | 276 | result += options |
... | ... | @@ -265,7 +284,7 @@ def get_numps(cases, numerals, nouns, atr): |
265 | 284 | filtered = [] |
266 | 285 | for option in options: |
267 | 286 | (orth, tag) = option |
268 | - if u'num:' in tag: | |
287 | + if tag.startswith(u'num:'): | |
269 | 288 | filtered.append(option) |
270 | 289 | options = filtered |
271 | 290 | options_temp = [] |
... | ... | @@ -274,7 +293,7 @@ def get_numps(cases, numerals, nouns, atr): |
274 | 293 | filtered = [] |
275 | 294 | for option in options: |
276 | 295 | (orth, tag) = option |
277 | - if u':' + case + u':' in tag or u':' + case + u'.' in tag or u'.' + case + u'.' in tag or u'.' + case + u':' in tag: | |
296 | + if in_tag(case, tag): | |
278 | 297 | filtered.append(option) |
279 | 298 | options_temp += filtered |
280 | 299 | else: |
... | ... | @@ -289,7 +308,7 @@ def get_numps(cases, numerals, nouns, atr): |
289 | 308 | filtered = [] |
290 | 309 | for option in options: |
291 | 310 | (orth, tag) = option |
292 | - if u':pl:' in tag: | |
311 | + if in_tag(u'pl', tag): | |
293 | 312 | filtered.append(option) |
294 | 313 | options = filtered |
295 | 314 | if rec == 'rec': |
... | ... | @@ -304,7 +323,7 @@ def get_numps(cases, numerals, nouns, atr): |
304 | 323 | filtered = [] |
305 | 324 | for option in options: |
306 | 325 | (orth, tag) = option |
307 | - if u':' + case + u':' in tag or u':' + case + u'.' in tag or u'.' + case + u'.' in tag or u'.' + case + u':' in tag: | |
326 | + if in_tag(case, tag): | |
308 | 327 | filtered.append(option) |
309 | 328 | options_temp += filtered |
310 | 329 | else: |
... | ... | @@ -312,7 +331,7 @@ def get_numps(cases, numerals, nouns, atr): |
312 | 331 | options = options_temp |
313 | 332 | for (orth, tag) in options: |
314 | 333 | gender = tag.split(':')[3] |
315 | - if u':' + gender + u':' in num_tag or u':' + gender + u'.' in num_tag or u'.' + gender + u'.' in num_tag or u'.' + gender + u':' in num_tag: | |
334 | + if in_tag(gender, num_tag): | |
316 | 335 | results.append(num_orth + ' ' + orth) |
317 | 336 | |
318 | 337 | return results #ignoring ambiguos atr for numps |
... | ... | @@ -330,7 +349,7 @@ def get_verb(inf, number, is_subj): |
330 | 349 | filtered = [] |
331 | 350 | for option in options: |
332 | 351 | (orth, tag) = option |
333 | - if u'fin:' in tag and u':' + number + u':' in tag and u':ter:' in tag: | |
352 | + if tag.startswith(u'fin') and in_tag(number, tag) and in_tag(u'ter', tag): | |
334 | 353 | filtered.append(option) |
335 | 354 | options = filtered |
336 | 355 | return [orth for orth, _ in options] |
... | ... | @@ -354,7 +373,7 @@ def phrase(head, dependents): |
354 | 373 | type = argument.type |
355 | 374 | elif argument.type == u'lex': |
356 | 375 | type = sortatributes(argument)[0].values.all()[0].argument.type |
357 | - value, _ = lexicalisation(argument, False, '', False, head) | |
376 | + value, _, _ = lexicalisation(argument, False, '', False, head) | |
358 | 377 | values += value |
359 | 378 | if type == u'adjp': |
360 | 379 | modifiers['pre'].append(values) |
... | ... |
semantics/static/js/semantics_lexical_units.js
... | ... | @@ -256,6 +256,7 @@ function getMeaningsSelectionForFrame(frame_id) { |
256 | 256 | var options = []; |
257 | 257 | var vrb = []; |
258 | 258 | var pre = []; |
259 | + var both = []; | |
259 | 260 | sid_alt = rows[j].split('_'); |
260 | 261 | var sch = "schema_" + sid_alt[0] + "_"; |
261 | 262 | var k; |
... | ... | @@ -265,46 +266,55 @@ function getMeaningsSelectionForFrame(frame_id) { |
265 | 266 | if (connected[lem].indexOf(proper) != -1) { |
266 | 267 | if (schemas_content[sch].display.arguments[0][k].vrb != null && |
267 | 268 | schemas_content[sch].display.arguments[0][k].vrb.length > 0) { |
268 | - pre = pre.concat(schemas_content[sch].display.arguments[0][k].lex); | |
269 | 269 | vrb = schemas_content[sch].display.arguments[0][k].vrb; |
270 | - } else { | |
270 | + } | |
271 | + if (schemas_content[sch].display.arguments[0][k].loc == -1) { | |
272 | + pre = pre.concat(schemas_content[sch].display.arguments[0][k].lex); | |
273 | + } | |
274 | + if (schemas_content[sch].display.arguments[0][k].loc == 0) { | |
275 | + both.push(schemas_content[sch].display.arguments[0][k].lex); | |
276 | + } | |
277 | + if (schemas_content[sch].display.arguments[0][k].loc == 1) { | |
271 | 278 | options.push(schemas_content[sch].display.arguments[0][k].lex); |
272 | 279 | } |
273 | 280 | } |
274 | 281 | } |
275 | 282 | } |
283 | + var lemma; | |
276 | 284 | if (vrb.length == 0) { |
277 | - var lex = {lemma: [base], pre: pre, args: options}; | |
285 | + lemma = [base]; | |
278 | 286 | if (hasRefl(sch)) { |
279 | 287 | if (isNeg(sch)) { |
280 | - lex.lemma = ["nie " + base + " się"]; | |
288 | + lemma = ["nie " + base + " się"]; | |
281 | 289 | } else { |
282 | - lex.lemma = [base + " się"]; | |
290 | + lemma = [base + " się"]; | |
283 | 291 | } |
284 | 292 | } else { |
285 | 293 | if (isNeg(sch)) { |
286 | - lex.lemma = ["nie " + base]; | |
294 | + lemma = ["nie " + base]; | |
287 | 295 | } else { |
288 | - lex.lemma = [base]; | |
296 | + lemma = [base]; | |
289 | 297 | } |
290 | 298 | } |
291 | - lexicalisation.push(lex); | |
292 | 299 | } else { |
293 | - var lex = {lemma: vrb, pre: pre, args: options}; | |
300 | + lemma = vrb; | |
294 | 301 | if (hasRefl(sch)) { |
295 | 302 | var l = []; |
296 | 303 | var k; |
297 | 304 | for (k=0; k < vrb.length; k++) { |
298 | 305 | l.push(vrb[k] + " się"); |
299 | 306 | } |
300 | - lex.lemma = l; | |
307 | + lemma = l; | |
301 | 308 | } |
302 | - lexicalisation.push(lex); | |
303 | 309 | } |
310 | + | |
311 | + var lexes = get_lexes(lemma, pre, both, options); | |
312 | + lexicalisation = lexicalisation.concat(lexes); | |
304 | 313 | } |
305 | 314 | } |
306 | 315 | |
307 | 316 | display += getFormForLexicalisation(lexicalisation); |
317 | + console.log(lexicalisation); | |
308 | 318 | |
309 | 319 | display += "</div>"; |
310 | 320 | |
... | ... | @@ -347,6 +357,25 @@ function permute(list) { |
347 | 357 | |
348 | 358 | } |
349 | 359 | |
360 | +function get_lexes(lemma, pre, both, post) { | |
361 | + var i; | |
362 | + var struct = [{lemma:lemma, pre:pre, args:post}]; | |
363 | + for (i = 0; i < both.length; i++) { | |
364 | + temp_struct = []; | |
365 | + var j; | |
366 | + for (j = 0; j < struct.length; j++) { | |
367 | + var pre_lex = JSON.parse(JSON.stringify(struct[j])); | |
368 | + pre_lex.pre = pre_lex.pre.concat(both[i]); | |
369 | + temp_struct.push(pre_lex); | |
370 | + var post_lex = JSON.parse(JSON.stringify(struct[j])); | |
371 | + post_lex.args.push(both[i]); | |
372 | + temp_struct.push(post_lex); | |
373 | + } | |
374 | + struct = temp_struct; | |
375 | + } | |
376 | + return struct; | |
377 | +} | |
378 | + | |
350 | 379 | function cartesian(llist) { |
351 | 380 | if (llist.length == 0) { |
352 | 381 | return [[]]; |
... | ... |
semantics/views.py
... | ... | @@ -518,10 +518,10 @@ def ajax_schemas(request, lemma_id): |
518 | 518 | astr, aobj = a |
519 | 519 | if aobj is not None and aobj.is_phraseologic(): |
520 | 520 | tmp = lexicalisation(aobj, is_subj(p.categories.all()), lemma.entry_obj.name, ('neg' in characteristics[characteristic_id])) |
521 | - lex, vrb = tmp | |
521 | + lex, vrb, loc = tmp | |
522 | 522 | else: |
523 | - lex, vrb = ([], []) | |
524 | - arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex, "vrb": vrb}) | |
523 | + lex, vrb, loc = ([], [], 0) | |
524 | + arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex, "vrb": vrb, "loc": loc}) | |
525 | 525 | display["arguments"].append(arg) |
526 | 526 | |
527 | 527 | schema_display["schemas"].append({"schema_id": str(schema.id), "grade": lemma.get_schema_opinion(schema), "colspan": str(max(len(schema_categories), 1)), "rowspan": str(schema_arguments_rowspan), "display": display, "phraseologic": schema.phraseologic}) |
... | ... |