przymiotnikowe modyfikatory rzeczowników + ogólne poprawki frazeologii

Tomasz Bartosiak
1 parent 915b67fc
Showing 3 changed files with 92 additions and 44 deletions
semantics/phraseology_generator.py
semantics/static/js/semantics_lexical_units.js
semantics/views.py
@@ -7,7 +7,7 @@ from copy import deepcopy
 def lexicalisation(argument, subj, base, negativity, reference=None):
     b = argument.type
     if b == 'fixed':
-        return (get_words(sortatributes(argument)[-1]), [])
+        return (get_words(sortatributes(argument)[-1]), [], 1)
     attributes = sortatributes(argument)
     lexicalisation_type = attributes[0].values.all()[0].argument.type
     lexicalisation_parameters = sortatributes(attributes[0].values.all()[0].argument)
@@ -16,34 +16,45 @@ def lexicalisation(argument, subj, base, negativity, reference=None):
        lexicalisation_parameters = sortatributes(lexicalisation_parameters[0].values.all()[0].argument)
     if lexicalisation_type == 'np': # np(case), number, nouns, atr
         nps = get_nps(get_case(lexicalisation_parameters[0], subj, negativity), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
-        return (nps, get_verb(base, get_number(attributes[1], subj), subj))
+        if subj:
+            return (nps, get_verb(base, get_number(attributes[1], subj), subj), -1)
+        else:
+            return (nps, [], 1)
     elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr
         prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, negativity), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
-        return (prepnps, [])
+        return (prepnps, [], 1)
     elif lexicalisation_type == 'adjp': # adjp(case), number, gender, degree, adjectives, atr
         adjps = get_adjps(get_case(lexicalisation_parameters[0], subj, negativity, reference), get_number(attributes[1], subj, reference), get_gender(attributes[2], reference), get_degree(attributes[3]), get_words(attributes[4]), attributes[5])
-        return (adjps, get_verb(base, get_number(attributes[1], subj), subj))
+        if reference is None:
+            if lexicalisation_parameters[0].values.all()[0].parameter.type.name == u'agr':
+                return (adjps, [], 0)
+            else:
+                return (adjps, [], -1)
+        elif subj:
+            return (adjps, get_verb(base, get_number(attributes[1], subj), subj), -1)
+        else:
+            return (adjps, [], 1)
     elif lexicalisation_type == 'prepadjp': #prepadjp(prep, case), number, gender, degree, adjectives, atr
         prepadjps = get_prepadjps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_number(attributes[1], subj, reference), get_gender(attributes[2], reference), get_degree(attributes[3]), get_words(attributes[4]), attributes[5])
-        return (prepadjps, [])
+        return (prepadjps, [], 1)
     elif lexicalisation_type == 'infp':
         infps = get_infps(get_aspect(lexicalisation_parameters[0]), get_words(attributes[2]), attributes[4])
-        return (infps, [])
+        return (infps, [], 1)
     elif lexicalisation_type == 'advp': #advp(type), degree, adverb, atr
         advps = get_advps(get_degree(attributes[1]), get_words(attributes[2]), attributes[3])
-        return (advps, [base])
+        return (advps, [], -1)
     elif lexicalisation_type == 'nump': # nump(case), num, noun, atr
         numps = get_numps(get_case(lexicalisation_parameters[0], subj, negativity, reference), get_words(attributes[1]), get_words(attributes[2]), attributes[3])
-        return (numps, get_verb(base, 'pl', subj))
+        return (numps, get_verb(base, 'pl', subj), -1)
     elif lexicalisation_type == 'prepnump': # prepnump(prep,case), num, noun, atr
-        numps = get_prepnumps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_words(attributes[1]), get_words(attributes[2]), attributes[3])
-        return (numps, []) #get_verb(base, 'pl', subj))
+        prepnumps = get_prepnumps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_words(attributes[1]), get_words(attributes[2]), attributes[3])
+        return (prepnumps, [], 1)
     elif lexicalisation_type == 'qub': # qub, form, atr
         qubs = get_qubs(get_words(attributes[1]), attributes[2])
-        return (qubs, [base])
+        return (qubs, [], -1)
     else:
-        return ([], [])
-    return ([], [])
+        return ([], [], 0)
+    return ([], [], 0)
  
 def is_subj(categories):
     for cat in categories:
@@ -82,6 +93,8 @@ def get_case(attribute, is_subj, negativity, reference=None):
             case = [tag.split(':')[1]]
         else:
             case = [tag.split(':')[2]]
+    elif case == u'agr' and reference is None:
+        case = [u'nom']
     else:
         case = [case]
     return case
@@ -125,6 +138,12 @@ def get_degree(attribute):
         degree = u'pos'
     return degree
  
+def in_tag(what, tag):
+    if u':' + what + u':' in tag or u':' + what + u'.' in tag or  u'.' + what + u'.' in tag or  u'.' + what + u':' in tag or tag.endswith(u'.' + what) or tag.endswith(u':' + what):
+        return True
+    else:
+        return False
+
 def get_nps(cases, number, nouns, atr):
     result = []
     for noun in nouns:
@@ -135,7 +154,7 @@ def get_nps(cases, number, nouns, atr):
                 filtered = []
                 for option in options:
                     (orth, tag) = option
-                    if u':' + case in tag or u'.' + case in tag:
+                    if in_tag(case, tag):
                         filtered.append(option)
                 options_temp += filtered
             else:
@@ -145,7 +164,7 @@ def get_nps(cases, number, nouns, atr):
             filtered = []
             for option in options:
                 (orth, tag) = option
-                if u':' + number + u':' in tag:
+                if in_tag(number, tag):
                     filtered.append(option)
             options = filtered
         result += options
@@ -168,7 +187,7 @@ def get_infps(aspect, verbs, atr):
         if aspect != u'_':
             for option in options:
                 (orth, tag) = option
-                if u':' + aspect + u':' in tag:
+                if in_tag(aspect, tag):
                     filtered.append(option)
             options = filtered        
         result += options
@@ -190,7 +209,7 @@ def get_adjps(cases, number, gender, degree, adjectives, atr):
                 filtered = []
                 for option in options:
                     (orth, tag) = option
-                    if u':' + case + u':' in tag:
+                    if in_tag(case, tag):
                         filtered.append(option)
                 options_temp += filtered
             else:
@@ -200,21 +219,21 @@ def get_adjps(cases, number, gender, degree, adjectives, atr):
             filtered = []
             for option in options:
                 (orth, tag) = option
-                if u':' + number + u':' in tag:
+                if in_tag(number, tag):
                     filtered.append(option)
             options = filtered
         if gender != u'_':
             filtered = []
             for option in options:
                 (orth, tag) = option
-                if u':' + gender + u':' in tag or  u'.' + gender + u':' in tag or  u':' + gender + u'.' in tag or  u'.' + gender + u'.' in tag or u'.' + gender + u':' in tag:
+                if in_tag(gender, tag):
                     filtered.append(option)
             options = filtered
         if degree != u'_':
             filtered = []
             for option in options:
                 (orth, tag) = option
-                if u':' + degree in tag:
+                if in_tag(degree, tag):
                     filtered.append(option)
             options = filtered
         result += options
@@ -231,14 +250,14 @@ def get_advps(degree, adverbs, atr):
         filtered = []
         for option in options:
             (orth, tag) = option
-            if u'adv' in tag:
+            if tag.startswith(u'adv'):
                 filtered.append(option)
         options = filtered
         if ':' in tag and degree != u'_':
             filtered = []
             for option in options:
                 (orth, tag) = option
-                if u':' + degree in tag:
+                if in_tag(degree, tag):
                     filtered.append(option)
             options = filtered
         result += options
@@ -251,7 +270,7 @@ def get_qubs(qubs, atr):
         filtered = []
         for option in options:
             (orth, tag) = option
-            if u'qub' in tag:
+            if tag.startswith(u'qub'):
                 filtered.append(option)
         options = filtered
         result += options
@@ -265,7 +284,7 @@ def get_numps(cases, numerals, nouns, atr):
         filtered = []
         for option in options:
             (orth, tag) = option
-            if u'num:' in tag:
+            if tag.startswith(u'num:'):
                 filtered.append(option)
         options = filtered
         options_temp = []
@@ -274,7 +293,7 @@ def get_numps(cases, numerals, nouns, atr):
                 filtered = []
                 for option in options:
                     (orth, tag) = option
-                    if u':' + case + u':' in tag or u':' + case + u'.' in tag or u'.' + case + u'.' in tag or u'.' + case + u':' in tag:
+                    if in_tag(case, tag):
                         filtered.append(option)
                 options_temp += filtered
             else:
@@ -289,7 +308,7 @@ def get_numps(cases, numerals, nouns, atr):
                 filtered = []
                 for option in options:
                     (orth, tag) = option
-                    if u':pl:' in tag:
+                    if in_tag(u'pl', tag):
                         filtered.append(option)
                 options = filtered
                 if rec == 'rec':
@@ -304,7 +323,7 @@ def get_numps(cases, numerals, nouns, atr):
                         filtered = []
                         for option in options:
                             (orth, tag) = option
-                            if u':' + case + u':' in tag or u':' + case + u'.' in tag or u'.' + case + u'.' in tag or u'.' + case + u':' in tag:
+                            if in_tag(case, tag):
                                 filtered.append(option)
                         options_temp += filtered
                     else:
@@ -312,7 +331,7 @@ def get_numps(cases, numerals, nouns, atr):
                 options = options_temp
                 for (orth, tag) in options:
                     gender = tag.split(':')[3]
-                    if u':' + gender + u':' in num_tag or u':' + gender + u'.' in num_tag or u'.' + gender + u'.' in num_tag or u'.' + gender + u':' in num_tag:
+                    if in_tag(gender, num_tag):
                         results.append(num_orth + ' ' + orth)
  
     return results #ignoring ambiguos atr for numps
@@ -330,7 +349,7 @@ def get_verb(inf, number, is_subj):
         filtered = []
         for option in options:
             (orth, tag) = option
-            if u'fin:' in tag and u':' + number + u':' in tag and u':ter:' in tag:
+            if tag.startswith(u'fin') and in_tag(number, tag) and in_tag(u'ter', tag):
                 filtered.append(option)
         options = filtered
         return [orth for orth, _ in options]
@@ -354,7 +373,7 @@ def phrase(head, dependents):
                 type = argument.type
             elif argument.type == u'lex':
                 type = sortatributes(argument)[0].values.all()[0].argument.type
-                value, _ = lexicalisation(argument, False, '', False, head)
+                value, _, _ = lexicalisation(argument, False, '', False, head)
                 values += value
         if type == u'adjp':
             modifiers['pre'].append(values)
@@ -256,6 +256,7 @@ function getMeaningsSelectionForFrame(frame_id) {
 	    var options = [];
             var vrb = [];
             var pre = [];
+	    var both = [];
             sid_alt = rows[j].split('_');
 	    var sch = "schema_" + sid_alt[0] + "_";
 	    var k;
@@ -265,46 +266,55 @@ function getMeaningsSelectionForFrame(frame_id) {
                     if (connected[lem].indexOf(proper) != -1) {
 			if (schemas_content[sch].display.arguments[0][k].vrb != null &&
                             schemas_content[sch].display.arguments[0][k].vrb.length > 0) {
-                            pre = pre.concat(schemas_content[sch].display.arguments[0][k].lex);
                             vrb = schemas_content[sch].display.arguments[0][k].vrb;
-			} else {
+			}
+			if (schemas_content[sch].display.arguments[0][k].loc == -1) {
+			    pre = pre.concat(schemas_content[sch].display.arguments[0][k].lex);
+			}
+			if (schemas_content[sch].display.arguments[0][k].loc == 0) {
+			    both.push(schemas_content[sch].display.arguments[0][k].lex);
+			}
+			if (schemas_content[sch].display.arguments[0][k].loc == 1) {
                             options.push(schemas_content[sch].display.arguments[0][k].lex);
 			}
 		    } 
 		}
 	    }
+	    var lemma;	
             if (vrb.length == 0) {
-		var lex = {lemma: [base], pre: pre, args: options};
+		lemma = [base];
 		if (hasRefl(sch)) {
                     if (isNeg(sch)) {
-                        lex.lemma = ["nie " + base + " się"];
+                        lemma = ["nie " + base + " się"];
                     } else {
-                        lex.lemma = [base + " się"];
+                        lemma = [base + " się"];
                     }
 		} else {
                     if (isNeg(sch)) {
-                        lex.lemma = ["nie " + base];
+                        lemma = ["nie " + base];
                     } else {
-                        lex.lemma = [base];
+                        lemma = [base];
                     }
                 }
-		lexicalisation.push(lex);
             } else {
-		var lex = {lemma: vrb, pre: pre, args: options};
+		lemma = vrb;
 		if (hasRefl(sch)) {
                     var l = [];
                     var k;
                     for (k=0; k < vrb.length; k++) {
 		        l.push(vrb[k] + " się");
                     }
-                    lex.lemma = l;
+                    lemma = l;
 		}
-		lexicalisation.push(lex);
             }
+		
+	    var lexes = get_lexes(lemma, pre, both, options);
+	    lexicalisation = lexicalisation.concat(lexes);
 	} 
     } 
  
     display += getFormForLexicalisation(lexicalisation);
+    console.log(lexicalisation);
  
     display += "</div>";
  
@@ -347,6 +357,25 @@ function permute(list) {
  
 }
  
+function get_lexes(lemma, pre, both, post) {
+    var i;
+    var struct = [{lemma:lemma, pre:pre, args:post}];
+    for (i = 0; i < both.length; i++) {
+	temp_struct = [];
+	var j;
+	for (j = 0; j < struct.length; j++) {
+	    var pre_lex = JSON.parse(JSON.stringify(struct[j]));
+	    pre_lex.pre = pre_lex.pre.concat(both[i]);
+	    temp_struct.push(pre_lex);
+	    var post_lex = JSON.parse(JSON.stringify(struct[j]));
+	    post_lex.args.push(both[i]);
+	    temp_struct.push(post_lex);
+	}
+	struct = temp_struct;
+    }
+    return struct;
+}
+    
 function cartesian(llist) {
     if (llist.length == 0) {
         return [[]];
@@ -518,10 +518,10 @@ def ajax_schemas(request, lemma_id):
                     astr, aobj = a
                     if aobj is not None and aobj.is_phraseologic():
                         tmp = lexicalisation(aobj, is_subj(p.categories.all()), lemma.entry_obj.name, ('neg' in characteristics[characteristic_id]))
-                        lex, vrb = tmp
+                        lex, vrb, loc = tmp
                     else:
-                        lex, vrb = ([], [])
-                    arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex, "vrb": vrb})
+                        lex, vrb, loc = ([], [], 0)
+                    arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex, "vrb": vrb, "loc": loc})
                 display["arguments"].append(arg)
  
             schema_display["schemas"].append({"schema_id": str(schema.id), "grade": lemma.get_schema_opinion(schema), "colspan": str(max(len(schema_categories), 1)), "rowspan": str(schema_arguments_rowspan), "display": display, "phraseologic": schema.phraseologic})