Merge branch 'tomek' into dev

Tomasz Bartosiak
2 parents 10d042b7 ae00e34b
Showing 7 changed files with 233 additions and 55 deletions
semantics/phraseology_generator.py
semantics/saving.py
semantics/sem_urls.py
semantics/static/js/semantics_lexical_units.js
semantics/static/js/semantics_schemas.js
semantics/static/js/semantics_view.js
semantics/views.py
@@ -3,10 +3,11 @@
 from dictionary.models import sort_arguments, sort_positions, sortatributes
 from settings import MORFEUSZ2
  
-def lexicalisation(argument):
+def lexicalisation(argument, categories, base):
+    subj = is_subj(categories)
     b = argument.type
     if b == 'fixed':
-        return get_words(sortatributes(argument)[-1])
+        return (get_words(sortatributes(argument)[-1]), [])
     attributes = sortatributes(argument)
     lexicalisation_type = attributes[0].values.all()[0].argument.type
     lexicalisation_parameters = sortatributes(attributes[0].values.all()[0].argument)
@@ -14,14 +15,20 @@ def lexicalisation(argument):
        lexicalisation_type = lexicalisation_parameters[0].values.all()[0].argument.type
        lexicalisation_parameters = sortatributes(lexicalisation_parameters[0].values.all()[0].argument)
     if lexicalisation_type == 'np': # np(case), number, nouns, atr
-        nps = get_nps(get_case(lexicalisation_parameters[0]), get_number(attributes[1]), get_words(attributes[2]), attributes[3])
-        return nps
+        nps = get_nps(get_case(lexicalisation_parameters[0], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
+        return (nps, get_verb(base, get_number(attributes[1], subj), subj))
     elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr
-        prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1]), get_number(attributes[1]), get_words(attributes[2]), attributes[3])
-        return prepnps
+        prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
+        return (prepnps, [])
     else:
-        return []
-    return []
+        return ([], [])
+    return ([], [])
+
+def is_subj(categories):
+    for cat in categories:
+        if cat.category == u'subj':
+            return True
+    return False
  
 def get_preposition(attribute):
     return attribute.values.all()[0].parameter.type.name
@@ -30,14 +37,20 @@ def get_words(attribute):
     words = [word.text[1:-1] for word in attribute.values.all()]
     return words
  
-def get_case(attribute):
+def get_case(attribute, is_subj):
     case = attribute.values.all()[0].parameter.type.name
     if case == u'str':
-        case = u'acc'
+        if is_subj:
+            case = u'nom'
+        else:
+            case = u'acc'
     return case
  
-def get_number(attribute):
+def get_number(attribute, is_subj):
     number = attribute.values.all()[0].parameter.type.name
+    if number == u'_':
+        if is_subj:
+            number = u'sg'
     return number
  
 def get_nps(case, number, nouns, _atr):
@@ -65,3 +78,15 @@ def get_prepnps(prep, case, number, nouns, _atr):
     nps = get_nps(case, number, nouns, _atr)
     return [prep + ' ' + np for np in nps]
  
+def get_verb(inf, number, is_subj):
+    if not is_subj:
+        return None
+    else:
+        options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(inf.encode('utf8'))]
+        filtered = []
+        for option in options:
+            (orth, tag) = option
+            if u'fin' in tag and u'sg' in tag and u'ter' in tag:
+                filtered.append(option)
+        options = filtered
+        return [orth for orth, _ in options]
@@ -95,7 +95,7 @@ def make_operations(lemma_id, operations):
             else:
                 frame_id = int(operation['frame_id'])
             luids = [translation['unit_id'][int(m)] if int(m) in translation['unit_id'] else int(m) for m in operation['units']]
-            change_units(frame_id, luids)
+            change_units(lemma_id, frame_id, luids)
         elif operation['operation'] == "set_opinion":
             if int(operation['frame_id']) in translation['frame_id']:
                 frame_id = translation['frame_id'][int(operation['frame_id'])]
@@ -207,11 +207,13 @@ def validate_roles(roles):
             ok = not ok
     return ok
  
-def change_units(frame_id, luids):
+def change_units(lemma_id, frame_id, luids):
     frame = SemanticFrame.objects.get(id=frame_id)
     frame.lexical_units = []
     for id in luids:
         lu = LexicalUnit.objects.get(id=id)
+        lu.entry = Lemma.objects.get(id=lemma_id).entry_obj
+        lu.save()
         frame.lexical_units.add(lu)
  
 def set_opinion(frame_id, opinion):
@@ -25,4 +25,5 @@ SEMANTIC_PATTERNS = patterns(&#39;semantics.views&#39;,
     url(r'^ajax/general_preference_form/$', 'general_preference_form'),
     url(r'^ajax/synset_preference_form/$', 'synset_preference_form'),
     url(r'^ajax/relational_preference_form/$', 'relational_preference_form'),
+    url(r'^ajax/get_mwe_list/$', 'ajax_get_mwes'),
 )
@@ -220,20 +220,41 @@ function getMeaningsSelectionForFrame(frame_id) {
 	var j;
 	for (j = 0; j < rows.length; j++) {
 	    var options = [];
+            var vrb = [];
+            var pre = [];
             sid_alt = rows[j].split('_');
 	    var sch = "schema_" + sid_alt[0] + "_";
 	    var k;
 	    for (k = 0; k < schemas_content[sch].display.arguments[0].length; k++) {
 		var proper = schemas_content[sch].display.arguments[0][k].csv_id + "alt_" + sid_alt[1] + "_";
                 if (connected[lem].indexOf(proper) != -1) {
-                    options.push(schemas_content[sch].display.arguments[0][k].lex);
+                    if (schemas_content[sch].display.arguments[0][k].vrb != null &&
+                        schemas_content[sch].display.arguments[0][k].vrb.length > 0) {
+                        pre.push(schemas_content[sch].display.arguments[0][k].lex);
+                        vrb = schemas_content[sch].display.arguments[0][k].vrb;
+                    } else {
+                        options.push(schemas_content[sch].display.arguments[0][k].lex);
+                    }
 		} 
 	    } 
-            var lex = {lemma: base, args: options};
-	    if (hasRefl(sch)) {
-		lex.lemma = base + " się";
-	    }
-	    lexicalisation.push(lex);
+            if (vrb.length == 0) {
+		var lex = {lemma: [base], pre: pre, args: options};
+		if (hasRefl(sch)) {
+		    lex.lemma = [base + " się"];
+		}
+		lexicalisation.push(lex);
+            } else {
+		var lex = {lemma: vrb, pre: pre, args: options};
+		if (hasRefl(sch)) {
+                    var l = [];
+                    var k;
+                    for (k=0; k < vrb.length; k++) {
+		        l.push(vrb[k] + " się");
+                    }
+                    lex.lemma = l;
+		}
+		lexicalisation.push(lex);
+            }
 	} 
     } 
  
@@ -248,12 +269,11 @@ function getFormForLexicalisation(lexicalisation) {
     var result = "";
     var i;
     for (i = 0; i < lexicalisation.length; i++) {
-        var perms = permute(lexicalisation[i].args);
-        var j;
-        for (j = 0; j < perms.length; j++) {
-            result += lexicalisationForm(lexicalisation[i].lemma, cartesian(perms[j]))
-        }
-        result += '<br\>';
+	var perms = permute(lexicalisation[i].args);
+	var j;
+	for (j = 0; j < perms.length; j++) {
+	    result += lexicalisationForm(lexicalisation[i].lemma, lexicalisation[i].pre, cartesian(perms[j]));
+	}
     }
     return result;
 }
@@ -302,26 +322,90 @@ function cartesian(llist) {
     return result;
 }
  
-function lexicalisationForm(lemma, tokenised) {
+function lexicalisationForm(lemma, pre, tokenised) {
+    var list;
+    if (pre.length == 0) {
+        list = noSubjUnits(lemma, tokenised);
+    } else {
+        list = subjUnits(pre, lemma, tokenised);
+    }
+
+    $.ajax({
+        type: "GET",
+        dataType: "json",
+        url: ajax_get_mwes,
+        data: {"options": JSON.stringify(list)},
+        success: function(data){
+            list = data.mwes;
+        },
+        async: false
+    });
+
     var display = "";
-    var i;
-    for (i = 0; i < tokenised.length; i++) {
-        if (tokenised[i].length == 0) {
-            display += "<br\>";
-        } else {
-	    var j;
-	    for (j = 0; j < lexical_units.length; j++) {
-		if (lemma + " " + tokenised[i].join(" ") == lexical_units[j].base) {
-		    return "";
-		}
-	    }
-            display += "<input type = \"checkbox\" name = \"mwe\" value = \"" + lemma + " " + tokenised[i].join(" ") + "\">"; // TODO: unikalne wartości, wartość => dodanie odpowiedniej jednostki (nazwa jednostki w wartości?)
-            display += lemma + " " + tokenised[i].join(" ") + "<br\>";
+    var i, j;
+    for (i = 0; i < list.length; i++) {
+        var included = false;
+        for (j = 0; j < lexical_units.length; j++) {
+            if (list[i].base == lexical_units[j].base) {
+                included = true;
+            }
+        }
+        if (!included) {
+            display += "<input type = \"checkbox\" name = \"mwe\" value = \"" + list[i].base + list[i].sense + "_" + list[i].id + "_" + list[i].luid + "_" + list[i].sid + "\">" + list[i].base + list[i].sense + "<br\>";
         }
     }
     return display;
 }
  
+function noSubjUnits(lemmata, dependants) {
+    var result = [];
+    var i, j;
+    for (i = 0; i < lemmata.length; i++) {
+        if (dependants.length == 0) {
+             result.push(lemmata[i]);
+        } else {
+            for (j = 0; j < dependants.length; j++) {
+                result.push(lemmata[i] + " " + dependants[j].join(" "));
+            }
+        }
+    }
+    return result;
+}
+
+function subjUnits(pre, lemmata, dependants) {
+    var result = [];
+    var i, j;
+    var temp = noSubjUnits(lemmata, dependants);
+    i = decapitate(dependants);
+    var pre2 = i.heads;
+    var temp2 = noSubjUnits(lemmata, i.bodies);
+    for (i = 0; i < pre.length; i++) {
+        for (j = 0; j < temp.length; j++) {
+            result.push(pre[i] + " " + temp[j]);
+        }
+        for (j = 0; j < pre2.length; j++) {
+            result.push(pre[i] + " " + pre2[j] + " " + temp2[j]);
+        }
+    }
+    return result;
+}
+
+function decapitate(llist) {
+    var heads = [];
+    var bodies = [];
+    var i;
+    for (i = 0; i < llist.length; i++) {
+        if (llist[i].length > 0) {
+            var body = llist[i].slice();
+            var head = body[0];
+            body.splice(0, 1);
+            heads.push(head);
+            bodies.push(body);
+        }
+    }
+    return {heads: heads, bodies: bodies};
+}
+
  
 // get readable form of lexical unit
 function getLexicalUnit(luid) {
@@ -342,13 +426,22 @@ function addPhraseologicalUnit(mwe, glossa, relation, to) {
     return (free_luid + 1);
 }
  
-function addPhraseologicalUnits(frame_id, old_units, mwes, glossa, relation, to) {
+function unlockPhraseologicalUnit(mwe) {
+    var lu = {base: mwe.lu.split('-')[0], glossa: "", definition: "", id: mwe.id, luid: mwe.luid, refl: false, glossa: "", pos: "czasownik", sense: mwe.lu.split('-')[1], relation: 2, to: -1, location: ""};
+    lexical_units.push(lu);
+    return mwe.id;
+}
+
+function addPhraseologicalUnits(frame_id, old_units, old_mwes, new_mwes, glossa, relation, to) {
     var i;
-    var units = [];
-    for (i = 0; i < mwes.length; i++) {
-        units.push(addPhraseologicalUnit(mwes[i], glossa, relation, to));
+    var units = old_units.slice();
+    for (i = 0; i < old_mwes.length; i++) {
+        units.push(unlockPhraseologicalUnit(old_mwes[i]));
+    }
+    for (i = 0; i < new_mwes.length; i++) {
+        units.push(addPhraseologicalUnit(new_mwes[i], glossa, relation, to));
     }
-    changeUnits(frame_id, old_units.concat(units));
+    changeUnits(frame_id, units);
 }
  
  
@@ -359,7 +452,7 @@ function getPhraseologicalAlternations(frame_id) {
 	var i;
 	for (i = 0; i < connected[lem].length; i++) {
             var ids = connected[lem][i].split('_');
-            result.push(ids[1] + "_" + ids[7])
+            result.push(ids[1] + "_" + ids[7]);
 	}
     } 
     return unique(result);
@@ -130,10 +130,11 @@ function schemaBody(schema, alternation, lex){
         for (l = 0; l < display.arguments[k].length; l++) {
             schema_body += '<td id="' + display.arguments[k][l].csv_id + 'alt_' + alternation + '_" class="' + display.arguments[k][l].csv_class + 'alt_' + alternation + '_"  onclick="schemaClick(\'' + display.arguments[k][l].csv_id + 'alt_' + alternation +'_\', ';
 	    if (display.arguments[k][l].lex.length != 0) {
-		schema_body += '[\'' + display.arguments[k][l].lex.join('\', \'') + '\'])">';
+		schema_body += '[\'' + display.arguments[k][l].lex.join('\', \'') + '\']';
 	    } else {
-		schema_body += '[])">';
+		schema_body += '[]';
 	    }
+            schema_body += ')">';
             schema_body += display.arguments[k][l].argument;
             schema_body += '</td>';
             if (parseInt(display.arguments[k][l].csv_id.split('_')[5]) >= 0) {
@@ -397,6 +397,8 @@ function changeLexicalUnits() {
  
     var units = [];
     var mwes = [];
+    var new_mwes = [];
+    var old_mwes = [];
     var a = "";
     var gloss = "";
  
@@ -411,7 +413,7 @@ function changeLexicalUnits() {
                     if (v == 1) {
                         changeUnits(highlighted_id, units);
  
-                        addPhraseologicalUnits(highlighted_id, units, mwes, f.glossa, f.relation, f.synset);
+                        addPhraseologicalUnits(highlighted_id, units, old_mwes, new_mwes, f.glossa, f.relation, f.synset);
  
                         frameClick("");
 		        displayFrames();
@@ -445,8 +447,46 @@ function changeLexicalUnits() {
                                 frameClick(a);
                                 $.prompt.close();
                             } else {
-                                 $.prompt.goToState('state1');
-                                 attachPlWNContextAutocomplete();
+                                 
+                                 var i;
+                                 for (i = 0; i < mwes.length; i++) {
+                                     var lu = mwes[i].split('_')[0];
+                                     var id = mwes[i].split('_')[1];
+                                     var luid = mwes[i].split('_')[2];
+                                     var sid = mwes[i].split('_')[3];
+                                     if (sid == '') {
+                                         new_mwes.push(lu);
+                                     } else {
+                                         old_mwes.push({lu: lu, id: parseInt(id), luid: parseInt(luid), sid: parseInt(sid)});
+                                     }
+                                 }
+                                 
+                                 if (old_mwes.length > 0) {
+                                     var sid = old_mwes[0].sid
+                                     var ok = true;
+                                     for (i = 0; i < old_mwes.length; i++) {
+                                         if (old_mwes[i].sid != sid) {
+                                             ok = false;
+                                         }
+                                     }
+                                     if (ok) {
+                                         changeUnits(highlighted_id, units);
+                                         
+
+                                         addPhraseologicalUnits(highlighted_id, units, old_mwes, new_mwes, old_mwes[0].lu, 1, sid);
+
+                                         frameClick("");
+                                         displayFrames();
+                                         frameClick(a);
+                                         $.prompt.close();
+                                     } else {
+                                         $.prompt.goToState('state1');
+                                         attachPlWNContextAutocomplete();
+                                     }
+                                 } else {
+                                     $.prompt.goToState('state1');
+                                     attachPlWNContextAutocomplete();
+                                 }
                             }
  
 			}
@@ -482,7 +522,7 @@ function changeLexicalUnits() {
                                 $.prompt.goToState('state2');
                             } else {
                                 /* zignorowane umiejscowienie w Słowosieci */
-                                addPhraseologicalUnits(highlighted_id, units, mwes, f.glossa, f.relation, -1);
+                                addPhraseologicalUnits(highlighted_id, units, old_mwes, new_mwes, f.glossa, f.relation, -1);
 			        frameClick("");
 			        displayFrames();
 			        frameClick(a)
@@ -48,6 +48,7 @@ def ajax_semantics(request, id):
         'ajax_relations': reverse('ajax_relations'),
         'ajax_predefined_preferences': reverse('ajax_predefined_preferences'),
         'ajax_plWN_context_lookup': reverse('ajax_plWN_context_lookup'),
+        'ajax_get_mwes': reverse('ajax_get_mwes'),
     }
     return context
  
@@ -450,13 +451,14 @@ def ajax_schemas(request, lemma_id):
                 # identifier, class, argument
                 arg =  []
                 #ma["ala"] = kot
-                for i, c, a in zip(idents, schema_ids, row):
+                for i, c, a, p in zip(idents, schema_ids, row, ordered_positions):
                     astr, aobj = a
                     if aobj is not None and aobj.is_phraseologic():
-                        lex = lexicalisation(aobj)
+                        tmp = lexicalisation(aobj, p.categories.all(), lemma.entry_obj.name)
+                        lex, vrb = tmp
                     else:
-                        lex = []
-                    arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex})
+                        lex, vrb = ([], [])
+                    arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex, "vrb": vrb})
                 display["arguments"].append(arg)
  
             schema_display["schemas"].append({"schema_id": str(schema.id), "grade": lemma.get_schema_opinion(schema), "colspan": str(max(len(schema_categories), 1)), "rowspan": str(schema_arguments_rowspan), "display": display, "phraseologic": schema.phraseologic})
@@ -643,3 +645,17 @@ def synset_preference_form(request):
 def relational_preference_form(request):
     form = RelationalSelPrefForm()
     return {'form': form}
+
+@ajax(method='get', encode_result=True)         
+def ajax_get_mwes(request, options):
+    results = []
+    for term in options:
+        term = unicode(term)
+        if len(term) > 0:
+            obj_results = LexicalUnit.objects.filter(base=term)
+            if len(obj_results) > 0:
+                for lu in obj_results:
+                    results.append({'base': lu.base, 'sense': '-' + str(lu.sense), 'id': lu.id, 'luid': lu.luid, 'sid': lu.synset.id})
+            else:
+                results.append({'base': term, 'sense': '', 'id': term, 'luid': -1, 'sid': ''})
+    return {'mwes': results}