Commit 7a9badfd49802fdf1ef106b104a7e32c8fe0b25d
Merge branch 'tomek' into dev
Showing
7 changed files
with
233 additions
and
55 deletions
semantics/phraseology_generator.py
... | ... | @@ -3,10 +3,11 @@ |
3 | 3 | from dictionary.models import sort_arguments, sort_positions, sortatributes |
4 | 4 | from settings import MORFEUSZ2 |
5 | 5 | |
6 | -def lexicalisation(argument): | |
6 | +def lexicalisation(argument, categories, base): | |
7 | + subj = is_subj(categories) | |
7 | 8 | b = argument.type |
8 | 9 | if b == 'fixed': |
9 | - return get_words(sortatributes(argument)[-1]) | |
10 | + return (get_words(sortatributes(argument)[-1]), []) | |
10 | 11 | attributes = sortatributes(argument) |
11 | 12 | lexicalisation_type = attributes[0].values.all()[0].argument.type |
12 | 13 | lexicalisation_parameters = sortatributes(attributes[0].values.all()[0].argument) |
... | ... | @@ -14,14 +15,20 @@ def lexicalisation(argument): |
14 | 15 | lexicalisation_type = lexicalisation_parameters[0].values.all()[0].argument.type |
15 | 16 | lexicalisation_parameters = sortatributes(lexicalisation_parameters[0].values.all()[0].argument) |
16 | 17 | if lexicalisation_type == 'np': # np(case), number, nouns, atr |
17 | - nps = get_nps(get_case(lexicalisation_parameters[0]), get_number(attributes[1]), get_words(attributes[2]), attributes[3]) | |
18 | - return nps | |
18 | + nps = get_nps(get_case(lexicalisation_parameters[0], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3]) | |
19 | + return (nps, get_verb(base, get_number(attributes[1], subj), subj)) | |
19 | 20 | elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr |
20 | - prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1]), get_number(attributes[1]), get_words(attributes[2]), attributes[3]) | |
21 | - return prepnps | |
21 | + prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3]) | |
22 | + return (prepnps, []) | |
22 | 23 | else: |
23 | - return [] | |
24 | - return [] | |
24 | + return ([], []) | |
25 | + return ([], []) | |
26 | + | |
27 | +def is_subj(categories): | |
28 | + for cat in categories: | |
29 | + if cat.category == u'subj': | |
30 | + return True | |
31 | + return False | |
25 | 32 | |
26 | 33 | def get_preposition(attribute): |
27 | 34 | return attribute.values.all()[0].parameter.type.name |
... | ... | @@ -30,14 +37,20 @@ def get_words(attribute): |
30 | 37 | words = [word.text[1:-1] for word in attribute.values.all()] |
31 | 38 | return words |
32 | 39 | |
33 | -def get_case(attribute): | |
40 | +def get_case(attribute, is_subj): | |
34 | 41 | case = attribute.values.all()[0].parameter.type.name |
35 | 42 | if case == u'str': |
36 | - case = u'acc' | |
43 | + if is_subj: | |
44 | + case = u'nom' | |
45 | + else: | |
46 | + case = u'acc' | |
37 | 47 | return case |
38 | 48 | |
39 | -def get_number(attribute): | |
49 | +def get_number(attribute, is_subj): | |
40 | 50 | number = attribute.values.all()[0].parameter.type.name |
51 | + if number == u'_': | |
52 | + if is_subj: | |
53 | + number = u'sg' | |
41 | 54 | return number |
42 | 55 | |
43 | 56 | def get_nps(case, number, nouns, _atr): |
... | ... | @@ -65,3 +78,15 @@ def get_prepnps(prep, case, number, nouns, _atr): |
65 | 78 | nps = get_nps(case, number, nouns, _atr) |
66 | 79 | return [prep + ' ' + np for np in nps] |
67 | 80 | |
81 | +def get_verb(inf, number, is_subj): | |
82 | + if not is_subj: | |
83 | + return None | |
84 | + else: | |
85 | + options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(inf.encode('utf8'))] | |
86 | + filtered = [] | |
87 | + for option in options: | |
88 | + (orth, tag) = option | |
89 | + if u'fin' in tag and u'sg' in tag and u'ter' in tag: | |
90 | + filtered.append(option) | |
91 | + options = filtered | |
92 | + return [orth for orth, _ in options] | |
... | ... |
semantics/saving.py
... | ... | @@ -95,7 +95,7 @@ def make_operations(lemma_id, operations): |
95 | 95 | else: |
96 | 96 | frame_id = int(operation['frame_id']) |
97 | 97 | luids = [translation['unit_id'][int(m)] if int(m) in translation['unit_id'] else int(m) for m in operation['units']] |
98 | - change_units(frame_id, luids) | |
98 | + change_units(lemma_id, frame_id, luids) | |
99 | 99 | elif operation['operation'] == "set_opinion": |
100 | 100 | if int(operation['frame_id']) in translation['frame_id']: |
101 | 101 | frame_id = translation['frame_id'][int(operation['frame_id'])] |
... | ... | @@ -207,11 +207,13 @@ def validate_roles(roles): |
207 | 207 | ok = not ok |
208 | 208 | return ok |
209 | 209 | |
210 | -def change_units(frame_id, luids): | |
210 | +def change_units(lemma_id, frame_id, luids): | |
211 | 211 | frame = SemanticFrame.objects.get(id=frame_id) |
212 | 212 | frame.lexical_units = [] |
213 | 213 | for id in luids: |
214 | 214 | lu = LexicalUnit.objects.get(id=id) |
215 | + lu.entry = Lemma.objects.get(id=lemma_id).entry_obj | |
216 | + lu.save() | |
215 | 217 | frame.lexical_units.add(lu) |
216 | 218 | |
217 | 219 | def set_opinion(frame_id, opinion): |
... | ... |
semantics/sem_urls.py
... | ... | @@ -25,4 +25,5 @@ SEMANTIC_PATTERNS = patterns('semantics.views', |
25 | 25 | url(r'^ajax/general_preference_form/$', 'general_preference_form'), |
26 | 26 | url(r'^ajax/synset_preference_form/$', 'synset_preference_form'), |
27 | 27 | url(r'^ajax/relational_preference_form/$', 'relational_preference_form'), |
28 | + url(r'^ajax/get_mwe_list/$', 'ajax_get_mwes'), | |
28 | 29 | ) |
... | ... |
semantics/static/js/semantics_lexical_units.js
... | ... | @@ -220,20 +220,41 @@ function getMeaningsSelectionForFrame(frame_id) { |
220 | 220 | var j; |
221 | 221 | for (j = 0; j < rows.length; j++) { |
222 | 222 | var options = []; |
223 | + var vrb = []; | |
224 | + var pre = []; | |
223 | 225 | sid_alt = rows[j].split('_'); |
224 | 226 | var sch = "schema_" + sid_alt[0] + "_"; |
225 | 227 | var k; |
226 | 228 | for (k = 0; k < schemas_content[sch].display.arguments[0].length; k++) { |
227 | 229 | var proper = schemas_content[sch].display.arguments[0][k].csv_id + "alt_" + sid_alt[1] + "_"; |
228 | 230 | if (connected[lem].indexOf(proper) != -1) { |
229 | - options.push(schemas_content[sch].display.arguments[0][k].lex); | |
231 | + if (schemas_content[sch].display.arguments[0][k].vrb != null && | |
232 | + schemas_content[sch].display.arguments[0][k].vrb.length > 0) { | |
233 | + pre.push(schemas_content[sch].display.arguments[0][k].lex); | |
234 | + vrb = schemas_content[sch].display.arguments[0][k].vrb; | |
235 | + } else { | |
236 | + options.push(schemas_content[sch].display.arguments[0][k].lex); | |
237 | + } | |
230 | 238 | } |
231 | 239 | } |
232 | - var lex = {lemma: base, args: options}; | |
233 | - if (hasRefl(sch)) { | |
234 | - lex.lemma = base + " się"; | |
235 | - } | |
236 | - lexicalisation.push(lex); | |
240 | + if (vrb.length == 0) { | |
241 | + var lex = {lemma: [base], pre: pre, args: options}; | |
242 | + if (hasRefl(sch)) { | |
243 | + lex.lemma = [base + " się"]; | |
244 | + } | |
245 | + lexicalisation.push(lex); | |
246 | + } else { | |
247 | + var lex = {lemma: vrb, pre: pre, args: options}; | |
248 | + if (hasRefl(sch)) { | |
249 | + var l = []; | |
250 | + var k; | |
251 | + for (k=0; k < vrb.length; k++) { | |
252 | + l.push(vrb[k] + " się"); | |
253 | + } | |
254 | + lex.lemma = l; | |
255 | + } | |
256 | + lexicalisation.push(lex); | |
257 | + } | |
237 | 258 | } |
238 | 259 | } |
239 | 260 | |
... | ... | @@ -248,12 +269,11 @@ function getFormForLexicalisation(lexicalisation) { |
248 | 269 | var result = ""; |
249 | 270 | var i; |
250 | 271 | for (i = 0; i < lexicalisation.length; i++) { |
251 | - var perms = permute(lexicalisation[i].args); | |
252 | - var j; | |
253 | - for (j = 0; j < perms.length; j++) { | |
254 | - result += lexicalisationForm(lexicalisation[i].lemma, cartesian(perms[j])) | |
255 | - } | |
256 | - result += '<br\>'; | |
272 | + var perms = permute(lexicalisation[i].args); | |
273 | + var j; | |
274 | + for (j = 0; j < perms.length; j++) { | |
275 | + result += lexicalisationForm(lexicalisation[i].lemma, lexicalisation[i].pre, cartesian(perms[j])); | |
276 | + } | |
257 | 277 | } |
258 | 278 | return result; |
259 | 279 | } |
... | ... | @@ -302,26 +322,90 @@ function cartesian(llist) { |
302 | 322 | return result; |
303 | 323 | } |
304 | 324 | |
305 | -function lexicalisationForm(lemma, tokenised) { | |
325 | +function lexicalisationForm(lemma, pre, tokenised) { | |
326 | + var list; | |
327 | + if (pre.length == 0) { | |
328 | + list = noSubjUnits(lemma, tokenised); | |
329 | + } else { | |
330 | + list = subjUnits(pre, lemma, tokenised); | |
331 | + } | |
332 | + | |
333 | + $.ajax({ | |
334 | + type: "GET", | |
335 | + dataType: "json", | |
336 | + url: ajax_get_mwes, | |
337 | + data: {"options": JSON.stringify(list)}, | |
338 | + success: function(data){ | |
339 | + list = data.mwes; | |
340 | + }, | |
341 | + async: false | |
342 | + }); | |
343 | + | |
306 | 344 | var display = ""; |
307 | - var i; | |
308 | - for (i = 0; i < tokenised.length; i++) { | |
309 | - if (tokenised[i].length == 0) { | |
310 | - display += "<br\>"; | |
311 | - } else { | |
312 | - var j; | |
313 | - for (j = 0; j < lexical_units.length; j++) { | |
314 | - if (lemma + " " + tokenised[i].join(" ") == lexical_units[j].base) { | |
315 | - return ""; | |
316 | - } | |
317 | - } | |
318 | - display += "<input type = \"checkbox\" name = \"mwe\" value = \"" + lemma + " " + tokenised[i].join(" ") + "\">"; // TODO: unikalne wartości, wartość => dodanie odpowiedniej jednostki (nazwa jednostki w wartości?) | |
319 | - display += lemma + " " + tokenised[i].join(" ") + "<br\>"; | |
345 | + var i, j; | |
346 | + for (i = 0; i < list.length; i++) { | |
347 | + var included = false; | |
348 | + for (j = 0; j < lexical_units.length; j++) { | |
349 | + if (list[i].base == lexical_units[j].base) { | |
350 | + included = true; | |
351 | + } | |
352 | + } | |
353 | + if (!included) { | |
354 | + display += "<input type = \"checkbox\" name = \"mwe\" value = \"" + list[i].base + list[i].sense + "_" + list[i].id + "_" + list[i].luid + "_" + list[i].sid + "\">" + list[i].base + list[i].sense + "<br\>"; | |
320 | 355 | } |
321 | 356 | } |
322 | 357 | return display; |
323 | 358 | } |
324 | 359 | |
360 | +function noSubjUnits(lemmata, dependants) { | |
361 | + var result = []; | |
362 | + var i, j; | |
363 | + for (i = 0; i < lemmata.length; i++) { | |
364 | + if (dependants.length == 0) { | |
365 | + result.push(lemmata[i]); | |
366 | + } else { | |
367 | + for (j = 0; j < dependants.length; j++) { | |
368 | + result.push(lemmata[i] + " " + dependants[j].join(" ")); | |
369 | + } | |
370 | + } | |
371 | + } | |
372 | + return result; | |
373 | +} | |
374 | + | |
375 | +function subjUnits(pre, lemmata, dependants) { | |
376 | + var result = []; | |
377 | + var i, j; | |
378 | + var temp = noSubjUnits(lemmata, dependants); | |
379 | + i = decapitate(dependants); | |
380 | + var pre2 = i.heads; | |
381 | + var temp2 = noSubjUnits(lemmata, i.bodies); | |
382 | + for (i = 0; i < pre.length; i++) { | |
383 | + for (j = 0; j < temp.length; j++) { | |
384 | + result.push(pre[i] + " " + temp[j]); | |
385 | + } | |
386 | + for (j = 0; j < pre2.length; j++) { | |
387 | + result.push(pre[i] + " " + pre2[j] + " " + temp2[j]); | |
388 | + } | |
389 | + } | |
390 | + return result; | |
391 | +} | |
392 | + | |
393 | +function decapitate(llist) { | |
394 | + var heads = []; | |
395 | + var bodies = []; | |
396 | + var i; | |
397 | + for (i = 0; i < llist.length; i++) { | |
398 | + if (llist[i].length > 0) { | |
399 | + var body = llist[i].slice(); | |
400 | + var head = body[0]; | |
401 | + body.splice(0, 1); | |
402 | + heads.push(head); | |
403 | + bodies.push(body); | |
404 | + } | |
405 | + } | |
406 | + return {heads: heads, bodies: bodies}; | |
407 | +} | |
408 | + | |
325 | 409 | |
326 | 410 | // get readable form of lexical unit |
327 | 411 | function getLexicalUnit(luid) { |
... | ... | @@ -342,13 +426,22 @@ function addPhraseologicalUnit(mwe, glossa, relation, to) { |
342 | 426 | return (free_luid + 1); |
343 | 427 | } |
344 | 428 | |
345 | -function addPhraseologicalUnits(frame_id, old_units, mwes, glossa, relation, to) { | |
429 | +function unlockPhraseologicalUnit(mwe) { | |
430 | + var lu = {base: mwe.lu.split('-')[0], glossa: "", definition: "", id: mwe.id, luid: mwe.luid, refl: false, glossa: "", pos: "czasownik", sense: mwe.lu.split('-')[1], relation: 2, to: -1, location: ""}; | |
431 | + lexical_units.push(lu); | |
432 | + return mwe.id; | |
433 | +} | |
434 | + | |
435 | +function addPhraseologicalUnits(frame_id, old_units, old_mwes, new_mwes, glossa, relation, to) { | |
346 | 436 | var i; |
347 | - var units = []; | |
348 | - for (i = 0; i < mwes.length; i++) { | |
349 | - units.push(addPhraseologicalUnit(mwes[i], glossa, relation, to)); | |
437 | + var units = old_units.slice(); | |
438 | + for (i = 0; i < old_mwes.length; i++) { | |
439 | + units.push(unlockPhraseologicalUnit(old_mwes[i])); | |
440 | + } | |
441 | + for (i = 0; i < new_mwes.length; i++) { | |
442 | + units.push(addPhraseologicalUnit(new_mwes[i], glossa, relation, to)); | |
350 | 443 | } |
351 | - changeUnits(frame_id, old_units.concat(units)); | |
444 | + changeUnits(frame_id, units); | |
352 | 445 | } |
353 | 446 | |
354 | 447 | |
... | ... | @@ -359,7 +452,7 @@ function getPhraseologicalAlternations(frame_id) { |
359 | 452 | var i; |
360 | 453 | for (i = 0; i < connected[lem].length; i++) { |
361 | 454 | var ids = connected[lem][i].split('_'); |
362 | - result.push(ids[1] + "_" + ids[7]) | |
455 | + result.push(ids[1] + "_" + ids[7]); | |
363 | 456 | } |
364 | 457 | } |
365 | 458 | return unique(result); |
... | ... |
semantics/static/js/semantics_schemas.js
... | ... | @@ -130,10 +130,11 @@ function schemaBody(schema, alternation, lex){ |
130 | 130 | for (l = 0; l < display.arguments[k].length; l++) { |
131 | 131 | schema_body += '<td id="' + display.arguments[k][l].csv_id + 'alt_' + alternation + '_" class="' + display.arguments[k][l].csv_class + 'alt_' + alternation + '_" onclick="schemaClick(\'' + display.arguments[k][l].csv_id + 'alt_' + alternation +'_\', '; |
132 | 132 | if (display.arguments[k][l].lex.length != 0) { |
133 | - schema_body += '[\'' + display.arguments[k][l].lex.join('\', \'') + '\'])">'; | |
133 | + schema_body += '[\'' + display.arguments[k][l].lex.join('\', \'') + '\']'; | |
134 | 134 | } else { |
135 | - schema_body += '[])">'; | |
135 | + schema_body += '[]'; | |
136 | 136 | } |
137 | + schema_body += ')">'; | |
137 | 138 | schema_body += display.arguments[k][l].argument; |
138 | 139 | schema_body += '</td>'; |
139 | 140 | if (parseInt(display.arguments[k][l].csv_id.split('_')[5]) >= 0) { |
... | ... |
semantics/static/js/semantics_view.js
... | ... | @@ -397,6 +397,8 @@ function changeLexicalUnits() { |
397 | 397 | |
398 | 398 | var units = []; |
399 | 399 | var mwes = []; |
400 | + var new_mwes = []; | |
401 | + var old_mwes = []; | |
400 | 402 | var a = ""; |
401 | 403 | var gloss = ""; |
402 | 404 | |
... | ... | @@ -411,7 +413,7 @@ function changeLexicalUnits() { |
411 | 413 | if (v == 1) { |
412 | 414 | changeUnits(highlighted_id, units); |
413 | 415 | |
414 | - addPhraseologicalUnits(highlighted_id, units, mwes, f.glossa, f.relation, f.synset); | |
416 | + addPhraseologicalUnits(highlighted_id, units, old_mwes, new_mwes, f.glossa, f.relation, f.synset); | |
415 | 417 | |
416 | 418 | frameClick(""); |
417 | 419 | displayFrames(); |
... | ... | @@ -445,8 +447,46 @@ function changeLexicalUnits() { |
445 | 447 | frameClick(a); |
446 | 448 | $.prompt.close(); |
447 | 449 | } else { |
448 | - $.prompt.goToState('state1'); | |
449 | - attachPlWNContextAutocomplete(); | |
450 | + | |
451 | + var i; | |
452 | + for (i = 0; i < mwes.length; i++) { | |
453 | + var lu = mwes[i].split('_')[0]; | |
454 | + var id = mwes[i].split('_')[1]; | |
455 | + var luid = mwes[i].split('_')[2]; | |
456 | + var sid = mwes[i].split('_')[3]; | |
457 | + if (sid == '') { | |
458 | + new_mwes.push(lu); | |
459 | + } else { | |
460 | + old_mwes.push({lu: lu, id: parseInt(id), luid: parseInt(luid), sid: parseInt(sid)}); | |
461 | + } | |
462 | + } | |
463 | + | |
464 | + if (old_mwes.length > 0) { | |
465 | + var sid = old_mwes[0].sid | |
466 | + var ok = true; | |
467 | + for (i = 0; i < old_mwes.length; i++) { | |
468 | + if (old_mwes[i].sid != sid) { | |
469 | + ok = false; | |
470 | + } | |
471 | + } | |
472 | + if (ok) { | |
473 | + changeUnits(highlighted_id, units); | |
474 | + | |
475 | + | |
476 | + addPhraseologicalUnits(highlighted_id, units, old_mwes, new_mwes, old_mwes[0].lu, 1, sid); | |
477 | + | |
478 | + frameClick(""); | |
479 | + displayFrames(); | |
480 | + frameClick(a); | |
481 | + $.prompt.close(); | |
482 | + } else { | |
483 | + $.prompt.goToState('state1'); | |
484 | + attachPlWNContextAutocomplete(); | |
485 | + } | |
486 | + } else { | |
487 | + $.prompt.goToState('state1'); | |
488 | + attachPlWNContextAutocomplete(); | |
489 | + } | |
450 | 490 | } |
451 | 491 | |
452 | 492 | } |
... | ... | @@ -482,7 +522,7 @@ function changeLexicalUnits() { |
482 | 522 | $.prompt.goToState('state2'); |
483 | 523 | } else { |
484 | 524 | /* zignorowane umiejscowienie w Słowosieci */ |
485 | - addPhraseologicalUnits(highlighted_id, units, mwes, f.glossa, f.relation, -1); | |
525 | + addPhraseologicalUnits(highlighted_id, units, old_mwes, new_mwes, f.glossa, f.relation, -1); | |
486 | 526 | frameClick(""); |
487 | 527 | displayFrames(); |
488 | 528 | frameClick(a) |
... | ... |
semantics/views.py
... | ... | @@ -48,6 +48,7 @@ def ajax_semantics(request, id): |
48 | 48 | 'ajax_relations': reverse('ajax_relations'), |
49 | 49 | 'ajax_predefined_preferences': reverse('ajax_predefined_preferences'), |
50 | 50 | 'ajax_plWN_context_lookup': reverse('ajax_plWN_context_lookup'), |
51 | + 'ajax_get_mwes': reverse('ajax_get_mwes'), | |
51 | 52 | } |
52 | 53 | return context |
53 | 54 | |
... | ... | @@ -450,13 +451,14 @@ def ajax_schemas(request, lemma_id): |
450 | 451 | # identifier, class, argument |
451 | 452 | arg = [] |
452 | 453 | #ma["ala"] = kot |
453 | - for i, c, a in zip(idents, schema_ids, row): | |
454 | + for i, c, a, p in zip(idents, schema_ids, row, ordered_positions): | |
454 | 455 | astr, aobj = a |
455 | 456 | if aobj is not None and aobj.is_phraseologic(): |
456 | - lex = lexicalisation(aobj) | |
457 | + tmp = lexicalisation(aobj, p.categories.all(), lemma.entry_obj.name) | |
458 | + lex, vrb = tmp | |
457 | 459 | else: |
458 | - lex = [] | |
459 | - arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex}) | |
460 | + lex, vrb = ([], []) | |
461 | + arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex, "vrb": vrb}) | |
460 | 462 | display["arguments"].append(arg) |
461 | 463 | |
462 | 464 | schema_display["schemas"].append({"schema_id": str(schema.id), "grade": lemma.get_schema_opinion(schema), "colspan": str(max(len(schema_categories), 1)), "rowspan": str(schema_arguments_rowspan), "display": display, "phraseologic": schema.phraseologic}) |
... | ... | @@ -643,3 +645,17 @@ def synset_preference_form(request): |
643 | 645 | def relational_preference_form(request): |
644 | 646 | form = RelationalSelPrefForm() |
645 | 647 | return {'form': form} |
648 | + | |
649 | +@ajax(method='get', encode_result=True) | |
650 | +def ajax_get_mwes(request, options): | |
651 | + results = [] | |
652 | + for term in options: | |
653 | + term = unicode(term) | |
654 | + if len(term) > 0: | |
655 | + obj_results = LexicalUnit.objects.filter(base=term) | |
656 | + if len(obj_results) > 0: | |
657 | + for lu in obj_results: | |
658 | + results.append({'base': lu.base, 'sense': '-' + str(lu.sense), 'id': lu.id, 'luid': lu.luid, 'sid': lu.synset.id}) | |
659 | + else: | |
660 | + results.append({'base': term, 'sense': '', 'id': term, 'luid': -1, 'sid': ''}) | |
661 | + return {'mwes': results} | |
... | ... |