Commit 7a9badfd49802fdf1ef106b104a7e32c8fe0b25d

Authored by Tomasz Bartosiak
2 parents 10d042b7 ae00e34b

Merge branch 'tomek' into dev

semantics/phraseology_generator.py
... ... @@ -3,10 +3,11 @@
3 3 from dictionary.models import sort_arguments, sort_positions, sortatributes
4 4 from settings import MORFEUSZ2
5 5  
6   -def lexicalisation(argument):
  6 +def lexicalisation(argument, categories, base):
  7 + subj = is_subj(categories)
7 8 b = argument.type
8 9 if b == 'fixed':
9   - return get_words(sortatributes(argument)[-1])
  10 + return (get_words(sortatributes(argument)[-1]), [])
10 11 attributes = sortatributes(argument)
11 12 lexicalisation_type = attributes[0].values.all()[0].argument.type
12 13 lexicalisation_parameters = sortatributes(attributes[0].values.all()[0].argument)
... ... @@ -14,14 +15,20 @@ def lexicalisation(argument):
14 15 lexicalisation_type = lexicalisation_parameters[0].values.all()[0].argument.type
15 16 lexicalisation_parameters = sortatributes(lexicalisation_parameters[0].values.all()[0].argument)
16 17 if lexicalisation_type == 'np': # np(case), number, nouns, atr
17   - nps = get_nps(get_case(lexicalisation_parameters[0]), get_number(attributes[1]), get_words(attributes[2]), attributes[3])
18   - return nps
  18 + nps = get_nps(get_case(lexicalisation_parameters[0], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
  19 + return (nps, get_verb(base, get_number(attributes[1], subj), subj))
19 20 elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr
20   - prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1]), get_number(attributes[1]), get_words(attributes[2]), attributes[3])
21   - return prepnps
  21 + prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
  22 + return (prepnps, [])
22 23 else:
23   - return []
24   - return []
  24 + return ([], [])
  25 + return ([], [])
  26 +
  27 +def is_subj(categories):
  28 + for cat in categories:
  29 + if cat.category == u'subj':
  30 + return True
  31 + return False
25 32  
26 33 def get_preposition(attribute):
27 34 return attribute.values.all()[0].parameter.type.name
... ... @@ -30,14 +37,20 @@ def get_words(attribute):
30 37 words = [word.text[1:-1] for word in attribute.values.all()]
31 38 return words
32 39  
33   -def get_case(attribute):
  40 +def get_case(attribute, is_subj):
34 41 case = attribute.values.all()[0].parameter.type.name
35 42 if case == u'str':
36   - case = u'acc'
  43 + if is_subj:
  44 + case = u'nom'
  45 + else:
  46 + case = u'acc'
37 47 return case
38 48  
39   -def get_number(attribute):
  49 +def get_number(attribute, is_subj):
40 50 number = attribute.values.all()[0].parameter.type.name
  51 + if number == u'_':
  52 + if is_subj:
  53 + number = u'sg'
41 54 return number
42 55  
43 56 def get_nps(case, number, nouns, _atr):
... ... @@ -65,3 +78,15 @@ def get_prepnps(prep, case, number, nouns, _atr):
65 78 nps = get_nps(case, number, nouns, _atr)
66 79 return [prep + ' ' + np for np in nps]
67 80  
  81 +def get_verb(inf, number, is_subj):
  82 + if not is_subj:
  83 + return None
  84 + else:
  85 + options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(inf.encode('utf8'))]
  86 + filtered = []
  87 + for option in options:
  88 + (orth, tag) = option
  89 + if u'fin' in tag and u'sg' in tag and u'ter' in tag:
  90 + filtered.append(option)
  91 + options = filtered
  92 + return [orth for orth, _ in options]
... ...
semantics/saving.py
... ... @@ -95,7 +95,7 @@ def make_operations(lemma_id, operations):
95 95 else:
96 96 frame_id = int(operation['frame_id'])
97 97 luids = [translation['unit_id'][int(m)] if int(m) in translation['unit_id'] else int(m) for m in operation['units']]
98   - change_units(frame_id, luids)
  98 + change_units(lemma_id, frame_id, luids)
99 99 elif operation['operation'] == "set_opinion":
100 100 if int(operation['frame_id']) in translation['frame_id']:
101 101 frame_id = translation['frame_id'][int(operation['frame_id'])]
... ... @@ -207,11 +207,13 @@ def validate_roles(roles):
207 207 ok = not ok
208 208 return ok
209 209  
210   -def change_units(frame_id, luids):
  210 +def change_units(lemma_id, frame_id, luids):
211 211 frame = SemanticFrame.objects.get(id=frame_id)
212 212 frame.lexical_units = []
213 213 for id in luids:
214 214 lu = LexicalUnit.objects.get(id=id)
  215 + lu.entry = Lemma.objects.get(id=lemma_id).entry_obj
  216 + lu.save()
215 217 frame.lexical_units.add(lu)
216 218  
217 219 def set_opinion(frame_id, opinion):
... ...
semantics/sem_urls.py
... ... @@ -25,4 +25,5 @@ SEMANTIC_PATTERNS = patterns('semantics.views',
25 25 url(r'^ajax/general_preference_form/$', 'general_preference_form'),
26 26 url(r'^ajax/synset_preference_form/$', 'synset_preference_form'),
27 27 url(r'^ajax/relational_preference_form/$', 'relational_preference_form'),
  28 + url(r'^ajax/get_mwe_list/$', 'ajax_get_mwes'),
28 29 )
... ...
semantics/static/js/semantics_lexical_units.js
... ... @@ -220,20 +220,41 @@ function getMeaningsSelectionForFrame(frame_id) {
220 220 var j;
221 221 for (j = 0; j < rows.length; j++) {
222 222 var options = [];
  223 + var vrb = [];
  224 + var pre = [];
223 225 sid_alt = rows[j].split('_');
224 226 var sch = "schema_" + sid_alt[0] + "_";
225 227 var k;
226 228 for (k = 0; k < schemas_content[sch].display.arguments[0].length; k++) {
227 229 var proper = schemas_content[sch].display.arguments[0][k].csv_id + "alt_" + sid_alt[1] + "_";
228 230 if (connected[lem].indexOf(proper) != -1) {
229   - options.push(schemas_content[sch].display.arguments[0][k].lex);
  231 + if (schemas_content[sch].display.arguments[0][k].vrb != null &&
  232 + schemas_content[sch].display.arguments[0][k].vrb.length > 0) {
  233 + pre.push(schemas_content[sch].display.arguments[0][k].lex);
  234 + vrb = schemas_content[sch].display.arguments[0][k].vrb;
  235 + } else {
  236 + options.push(schemas_content[sch].display.arguments[0][k].lex);
  237 + }
230 238 }
231 239 }
232   - var lex = {lemma: base, args: options};
233   - if (hasRefl(sch)) {
234   - lex.lemma = base + " się";
235   - }
236   - lexicalisation.push(lex);
  240 + if (vrb.length == 0) {
  241 + var lex = {lemma: [base], pre: pre, args: options};
  242 + if (hasRefl(sch)) {
  243 + lex.lemma = [base + " się"];
  244 + }
  245 + lexicalisation.push(lex);
  246 + } else {
  247 + var lex = {lemma: vrb, pre: pre, args: options};
  248 + if (hasRefl(sch)) {
  249 + var l = [];
  250 + var k;
  251 + for (k=0; k < vrb.length; k++) {
  252 + l.push(vrb[k] + " się");
  253 + }
  254 + lex.lemma = l;
  255 + }
  256 + lexicalisation.push(lex);
  257 + }
237 258 }
238 259 }
239 260  
... ... @@ -248,12 +269,11 @@ function getFormForLexicalisation(lexicalisation) {
248 269 var result = "";
249 270 var i;
250 271 for (i = 0; i < lexicalisation.length; i++) {
251   - var perms = permute(lexicalisation[i].args);
252   - var j;
253   - for (j = 0; j < perms.length; j++) {
254   - result += lexicalisationForm(lexicalisation[i].lemma, cartesian(perms[j]))
255   - }
256   - result += '<br\>';
  272 + var perms = permute(lexicalisation[i].args);
  273 + var j;
  274 + for (j = 0; j < perms.length; j++) {
  275 + result += lexicalisationForm(lexicalisation[i].lemma, lexicalisation[i].pre, cartesian(perms[j]));
  276 + }
257 277 }
258 278 return result;
259 279 }
... ... @@ -302,26 +322,90 @@ function cartesian(llist) {
302 322 return result;
303 323 }
304 324  
305   -function lexicalisationForm(lemma, tokenised) {
  325 +function lexicalisationForm(lemma, pre, tokenised) {
  326 + var list;
  327 + if (pre.length == 0) {
  328 + list = noSubjUnits(lemma, tokenised);
  329 + } else {
  330 + list = subjUnits(pre, lemma, tokenised);
  331 + }
  332 +
  333 + $.ajax({
  334 + type: "GET",
  335 + dataType: "json",
  336 + url: ajax_get_mwes,
  337 + data: {"options": JSON.stringify(list)},
  338 + success: function(data){
  339 + list = data.mwes;
  340 + },
  341 + async: false
  342 + });
  343 +
306 344 var display = "";
307   - var i;
308   - for (i = 0; i < tokenised.length; i++) {
309   - if (tokenised[i].length == 0) {
310   - display += "<br\>";
311   - } else {
312   - var j;
313   - for (j = 0; j < lexical_units.length; j++) {
314   - if (lemma + " " + tokenised[i].join(" ") == lexical_units[j].base) {
315   - return "";
316   - }
317   - }
318   - display += "<input type = \"checkbox\" name = \"mwe\" value = \"" + lemma + " " + tokenised[i].join(" ") + "\">"; // TODO: unikalne wartości, wartość => dodanie odpowiedniej jednostki (nazwa jednostki w wartości?)
319   - display += lemma + " " + tokenised[i].join(" ") + "<br\>";
  345 + var i, j;
  346 + for (i = 0; i < list.length; i++) {
  347 + var included = false;
  348 + for (j = 0; j < lexical_units.length; j++) {
  349 + if (list[i].base == lexical_units[j].base) {
  350 + included = true;
  351 + }
  352 + }
  353 + if (!included) {
  354 + display += "<input type = \"checkbox\" name = \"mwe\" value = \"" + list[i].base + list[i].sense + "_" + list[i].id + "_" + list[i].luid + "_" + list[i].sid + "\">" + list[i].base + list[i].sense + "<br\>";
320 355 }
321 356 }
322 357 return display;
323 358 }
324 359  
  360 +function noSubjUnits(lemmata, dependants) {
  361 + var result = [];
  362 + var i, j;
  363 + for (i = 0; i < lemmata.length; i++) {
  364 + if (dependants.length == 0) {
  365 + result.push(lemmata[i]);
  366 + } else {
  367 + for (j = 0; j < dependants.length; j++) {
  368 + result.push(lemmata[i] + " " + dependants[j].join(" "));
  369 + }
  370 + }
  371 + }
  372 + return result;
  373 +}
  374 +
  375 +function subjUnits(pre, lemmata, dependants) {
  376 + var result = [];
  377 + var i, j;
  378 + var temp = noSubjUnits(lemmata, dependants);
  379 + i = decapitate(dependants);
  380 + var pre2 = i.heads;
  381 + var temp2 = noSubjUnits(lemmata, i.bodies);
  382 + for (i = 0; i < pre.length; i++) {
  383 + for (j = 0; j < temp.length; j++) {
  384 + result.push(pre[i] + " " + temp[j]);
  385 + }
  386 + for (j = 0; j < pre2.length; j++) {
  387 + result.push(pre[i] + " " + pre2[j] + " " + temp2[j]);
  388 + }
  389 + }
  390 + return result;
  391 +}
  392 +
  393 +function decapitate(llist) {
  394 + var heads = [];
  395 + var bodies = [];
  396 + var i;
  397 + for (i = 0; i < llist.length; i++) {
  398 + if (llist[i].length > 0) {
  399 + var body = llist[i].slice();
  400 + var head = body[0];
  401 + body.splice(0, 1);
  402 + heads.push(head);
  403 + bodies.push(body);
  404 + }
  405 + }
  406 + return {heads: heads, bodies: bodies};
  407 +}
  408 +
325 409  
326 410 // get readable form of lexical unit
327 411 function getLexicalUnit(luid) {
... ... @@ -342,13 +426,22 @@ function addPhraseologicalUnit(mwe, glossa, relation, to) {
342 426 return (free_luid + 1);
343 427 }
344 428  
345   -function addPhraseologicalUnits(frame_id, old_units, mwes, glossa, relation, to) {
  429 +function unlockPhraseologicalUnit(mwe) {
  430 + var lu = {base: mwe.lu.split('-')[0], glossa: "", definition: "", id: mwe.id, luid: mwe.luid, refl: false, glossa: "", pos: "czasownik", sense: mwe.lu.split('-')[1], relation: 2, to: -1, location: ""};
  431 + lexical_units.push(lu);
  432 + return mwe.id;
  433 +}
  434 +
  435 +function addPhraseologicalUnits(frame_id, old_units, old_mwes, new_mwes, glossa, relation, to) {
346 436 var i;
347   - var units = [];
348   - for (i = 0; i < mwes.length; i++) {
349   - units.push(addPhraseologicalUnit(mwes[i], glossa, relation, to));
  437 + var units = old_units.slice();
  438 + for (i = 0; i < old_mwes.length; i++) {
  439 + units.push(unlockPhraseologicalUnit(old_mwes[i]));
  440 + }
  441 + for (i = 0; i < new_mwes.length; i++) {
  442 + units.push(addPhraseologicalUnit(new_mwes[i], glossa, relation, to));
350 443 }
351   - changeUnits(frame_id, old_units.concat(units));
  444 + changeUnits(frame_id, units);
352 445 }
353 446  
354 447  
... ... @@ -359,7 +452,7 @@ function getPhraseologicalAlternations(frame_id) {
359 452 var i;
360 453 for (i = 0; i < connected[lem].length; i++) {
361 454 var ids = connected[lem][i].split('_');
362   - result.push(ids[1] + "_" + ids[7])
  455 + result.push(ids[1] + "_" + ids[7]);
363 456 }
364 457 }
365 458 return unique(result);
... ...
semantics/static/js/semantics_schemas.js
... ... @@ -130,10 +130,11 @@ function schemaBody(schema, alternation, lex){
130 130 for (l = 0; l < display.arguments[k].length; l++) {
131 131 schema_body += '<td id="' + display.arguments[k][l].csv_id + 'alt_' + alternation + '_" class="' + display.arguments[k][l].csv_class + 'alt_' + alternation + '_" onclick="schemaClick(\'' + display.arguments[k][l].csv_id + 'alt_' + alternation +'_\', ';
132 132 if (display.arguments[k][l].lex.length != 0) {
133   - schema_body += '[\'' + display.arguments[k][l].lex.join('\', \'') + '\'])">';
  133 + schema_body += '[\'' + display.arguments[k][l].lex.join('\', \'') + '\']';
134 134 } else {
135   - schema_body += '[])">';
  135 + schema_body += '[]';
136 136 }
  137 + schema_body += ')">';
137 138 schema_body += display.arguments[k][l].argument;
138 139 schema_body += '</td>';
139 140 if (parseInt(display.arguments[k][l].csv_id.split('_')[5]) >= 0) {
... ...
semantics/static/js/semantics_view.js
... ... @@ -397,6 +397,8 @@ function changeLexicalUnits() {
397 397  
398 398 var units = [];
399 399 var mwes = [];
  400 + var new_mwes = [];
  401 + var old_mwes = [];
400 402 var a = "";
401 403 var gloss = "";
402 404  
... ... @@ -411,7 +413,7 @@ function changeLexicalUnits() {
411 413 if (v == 1) {
412 414 changeUnits(highlighted_id, units);
413 415  
414   - addPhraseologicalUnits(highlighted_id, units, mwes, f.glossa, f.relation, f.synset);
  416 + addPhraseologicalUnits(highlighted_id, units, old_mwes, new_mwes, f.glossa, f.relation, f.synset);
415 417  
416 418 frameClick("");
417 419 displayFrames();
... ... @@ -445,8 +447,46 @@ function changeLexicalUnits() {
445 447 frameClick(a);
446 448 $.prompt.close();
447 449 } else {
448   - $.prompt.goToState('state1');
449   - attachPlWNContextAutocomplete();
  450 +
  451 + var i;
  452 + for (i = 0; i < mwes.length; i++) {
  453 + var lu = mwes[i].split('_')[0];
  454 + var id = mwes[i].split('_')[1];
  455 + var luid = mwes[i].split('_')[2];
  456 + var sid = mwes[i].split('_')[3];
  457 + if (sid == '') {
  458 + new_mwes.push(lu);
  459 + } else {
  460 + old_mwes.push({lu: lu, id: parseInt(id), luid: parseInt(luid), sid: parseInt(sid)});
  461 + }
  462 + }
  463 +
  464 + if (old_mwes.length > 0) {
  465 + var sid = old_mwes[0].sid
  466 + var ok = true;
  467 + for (i = 0; i < old_mwes.length; i++) {
  468 + if (old_mwes[i].sid != sid) {
  469 + ok = false;
  470 + }
  471 + }
  472 + if (ok) {
  473 + changeUnits(highlighted_id, units);
  474 +
  475 +
  476 + addPhraseologicalUnits(highlighted_id, units, old_mwes, new_mwes, old_mwes[0].lu, 1, sid);
  477 +
  478 + frameClick("");
  479 + displayFrames();
  480 + frameClick(a);
  481 + $.prompt.close();
  482 + } else {
  483 + $.prompt.goToState('state1');
  484 + attachPlWNContextAutocomplete();
  485 + }
  486 + } else {
  487 + $.prompt.goToState('state1');
  488 + attachPlWNContextAutocomplete();
  489 + }
450 490 }
451 491  
452 492 }
... ... @@ -482,7 +522,7 @@ function changeLexicalUnits() {
482 522 $.prompt.goToState('state2');
483 523 } else {
484 524 /* zignorowane umiejscowienie w Słowosieci */
485   - addPhraseologicalUnits(highlighted_id, units, mwes, f.glossa, f.relation, -1);
  525 + addPhraseologicalUnits(highlighted_id, units, old_mwes, new_mwes, f.glossa, f.relation, -1);
486 526 frameClick("");
487 527 displayFrames();
488 528 frameClick(a)
... ...
semantics/views.py
... ... @@ -48,6 +48,7 @@ def ajax_semantics(request, id):
48 48 'ajax_relations': reverse('ajax_relations'),
49 49 'ajax_predefined_preferences': reverse('ajax_predefined_preferences'),
50 50 'ajax_plWN_context_lookup': reverse('ajax_plWN_context_lookup'),
  51 + 'ajax_get_mwes': reverse('ajax_get_mwes'),
51 52 }
52 53 return context
53 54  
... ... @@ -450,13 +451,14 @@ def ajax_schemas(request, lemma_id):
450 451 # identifier, class, argument
451 452 arg = []
452 453 #ma["ala"] = kot
453   - for i, c, a in zip(idents, schema_ids, row):
  454 + for i, c, a, p in zip(idents, schema_ids, row, ordered_positions):
454 455 astr, aobj = a
455 456 if aobj is not None and aobj.is_phraseologic():
456   - lex = lexicalisation(aobj)
  457 + tmp = lexicalisation(aobj, p.categories.all(), lemma.entry_obj.name)
  458 + lex, vrb = tmp
457 459 else:
458   - lex = []
459   - arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex})
  460 + lex, vrb = ([], [])
  461 + arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex, "vrb": vrb})
460 462 display["arguments"].append(arg)
461 463  
462 464 schema_display["schemas"].append({"schema_id": str(schema.id), "grade": lemma.get_schema_opinion(schema), "colspan": str(max(len(schema_categories), 1)), "rowspan": str(schema_arguments_rowspan), "display": display, "phraseologic": schema.phraseologic})
... ... @@ -643,3 +645,17 @@ def synset_preference_form(request):
643 645 def relational_preference_form(request):
644 646 form = RelationalSelPrefForm()
645 647 return {'form': form}
  648 +
  649 +@ajax(method='get', encode_result=True)
  650 +def ajax_get_mwes(request, options):
  651 + results = []
  652 + for term in options:
  653 + term = unicode(term)
  654 + if len(term) > 0:
  655 + obj_results = LexicalUnit.objects.filter(base=term)
  656 + if len(obj_results) > 0:
  657 + for lu in obj_results:
  658 + results.append({'base': lu.base, 'sense': '-' + str(lu.sense), 'id': lu.id, 'luid': lu.luid, 'sid': lu.synset.id})
  659 + else:
  660 + results.append({'base': term, 'sense': '', 'id': term, 'luid': -1, 'sid': ''})
  661 + return {'mwes': results}
... ...