Commit 1e5542913a5001225fd08dba7f8a8034b396fd2b

Authored by Tomasz Bartosiak
1 parent f8298e06

generator frazeologii - podmioty; przygotowanie do synchronizacji ze Słowosiecią

semantics/phraseology_generator.py
@@ -3,7 +3,8 @@ @@ -3,7 +3,8 @@
3 from dictionary.models import sort_arguments, sort_positions, sortatributes 3 from dictionary.models import sort_arguments, sort_positions, sortatributes
4 from settings import MORFEUSZ2 4 from settings import MORFEUSZ2
5 5
6 -def lexicalisation(argument): 6 +def lexicalisation(argument, categories, base):
  7 + subj = is_subj(categories)
7 b = argument.type 8 b = argument.type
8 if b == 'fixed': 9 if b == 'fixed':
9 return get_words(sortatributes(argument)[-1]) 10 return get_words(sortatributes(argument)[-1])
@@ -14,15 +15,21 @@ def lexicalisation(argument): @@ -14,15 +15,21 @@ def lexicalisation(argument):
14 lexicalisation_type = lexicalisation_parameters[0].values.all()[0].argument.type 15 lexicalisation_type = lexicalisation_parameters[0].values.all()[0].argument.type
15 lexicalisation_parameters = sortatributes(lexicalisation_parameters[0].values.all()[0].argument) 16 lexicalisation_parameters = sortatributes(lexicalisation_parameters[0].values.all()[0].argument)
16 if lexicalisation_type == 'np': # np(case), number, nouns, atr 17 if lexicalisation_type == 'np': # np(case), number, nouns, atr
17 - nps = get_nps(get_case(lexicalisation_parameters[0]), get_number(attributes[1]), get_words(attributes[2]), attributes[3])  
18 - return nps 18 + nps = get_nps(get_case(lexicalisation_parameters[0], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
  19 + return (nps, get_verb(base, get_number(attributes[1], subj), subj))
19 elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr 20 elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr
20 - prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1]), get_number(attributes[1]), get_words(attributes[2]), attributes[3])  
21 - return prepnps 21 + prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
  22 + return (prepnps, [])
22 else: 23 else:
23 return [] 24 return []
24 return [] 25 return []
25 26
  27 +def is_subj(categories):
  28 + for cat in categories:
  29 + if cat.category == u'subj':
  30 + return True
  31 + return False
  32 +
26 def get_preposition(attribute): 33 def get_preposition(attribute):
27 return attribute.values.all()[0].parameter.type.name 34 return attribute.values.all()[0].parameter.type.name
28 35
@@ -30,14 +37,20 @@ def get_words(attribute): @@ -30,14 +37,20 @@ def get_words(attribute):
30 words = [word.text[1:-1] for word in attribute.values.all()] 37 words = [word.text[1:-1] for word in attribute.values.all()]
31 return words 38 return words
32 39
33 -def get_case(attribute): 40 +def get_case(attribute, is_subj):
34 case = attribute.values.all()[0].parameter.type.name 41 case = attribute.values.all()[0].parameter.type.name
35 if case == u'str': 42 if case == u'str':
36 - case = u'acc' 43 + if is_subj:
  44 + case = u'nom'
  45 + else:
  46 + case = u'acc'
37 return case 47 return case
38 48
39 -def get_number(attribute): 49 +def get_number(attribute, is_subj):
40 number = attribute.values.all()[0].parameter.type.name 50 number = attribute.values.all()[0].parameter.type.name
  51 + if number == u'_':
  52 + if is_subj:
  53 + number = u'sg'
41 return number 54 return number
42 55
43 def get_nps(case, number, nouns, _atr): 56 def get_nps(case, number, nouns, _atr):
@@ -65,3 +78,15 @@ def get_prepnps(prep, case, number, nouns, _atr): @@ -65,3 +78,15 @@ def get_prepnps(prep, case, number, nouns, _atr):
65 nps = get_nps(case, number, nouns, _atr) 78 nps = get_nps(case, number, nouns, _atr)
66 return [prep + ' ' + np for np in nps] 79 return [prep + ' ' + np for np in nps]
67 80
  81 +def get_verb(inf, number, is_subj):
  82 + if not is_subj:
  83 + return None
  84 + else:
  85 + options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(inf.encode('utf8'))]
  86 + filtered = []
  87 + for option in options:
  88 + (orth, tag) = option
  89 + if u'fin' in tag and u'sg' in tag and u'ter' in tag:
  90 + filtered.append(option)
  91 + options = filtered
  92 + return [orth for orth, _ in options]
semantics/static/js/semantics_lexical_units.js
@@ -220,20 +220,40 @@ function getMeaningsSelectionForFrame(frame_id) { @@ -220,20 +220,40 @@ function getMeaningsSelectionForFrame(frame_id) {
220 var j; 220 var j;
221 for (j = 0; j < rows.length; j++) { 221 for (j = 0; j < rows.length; j++) {
222 var options = []; 222 var options = [];
  223 + var vrb = [];
  224 + var pre = [];
223 sid_alt = rows[j].split('_'); 225 sid_alt = rows[j].split('_');
224 var sch = "schema_" + sid_alt[0] + "_"; 226 var sch = "schema_" + sid_alt[0] + "_";
225 var k; 227 var k;
226 for (k = 0; k < schemas_content[sch].display.arguments[0].length; k++) { 228 for (k = 0; k < schemas_content[sch].display.arguments[0].length; k++) {
227 var proper = schemas_content[sch].display.arguments[0][k].csv_id + "alt_" + sid_alt[1] + "_"; 229 var proper = schemas_content[sch].display.arguments[0][k].csv_id + "alt_" + sid_alt[1] + "_";
228 if (connected[lem].indexOf(proper) != -1) { 230 if (connected[lem].indexOf(proper) != -1) {
229 - options.push(schemas_content[sch].display.arguments[0][k].lex); 231 + if (schemas_content[sch].display.arguments[0][k].vrb.length > 0) {
  232 + pre.push(schemas_content[sch].display.arguments[0][k].lex);
  233 + vrb = schemas_content[sch].display.arguments[0][k].vrb;
  234 + } else {
  235 + options.push(schemas_content[sch].display.arguments[0][k].lex);
  236 + }
230 } 237 }
231 } 238 }
232 - var lex = {lemma: base, args: options};  
233 - if (hasRefl(sch)) {  
234 - lex.lemma = base + " się";  
235 - }  
236 - lexicalisation.push(lex); 239 + if (vrb.length == 0) {
  240 + var lex = {lemma: [base], pre: pre, args: options};
  241 + if (hasRefl(sch)) {
  242 + lex.lemma = [base + " się"];
  243 + }
  244 + lexicalisation.push(lex);
  245 + } else {
  246 + var lex = {lemma: vrb, pre: pre, args: options};
  247 + if (hasRefl(sch)) {
  248 + var l = [];
  249 + var k;
  250 + for (k=0; k < vrb.length; k++) {
  251 + l.push(vrb[k] + " się");
  252 + }
  253 + lex.lemma = l;
  254 + }
  255 + lexicalisation.push(lex);
  256 + }
237 } 257 }
238 } 258 }
239 259
@@ -248,12 +268,11 @@ function getFormForLexicalisation(lexicalisation) { @@ -248,12 +268,11 @@ function getFormForLexicalisation(lexicalisation) {
248 var result = ""; 268 var result = "";
249 var i; 269 var i;
250 for (i = 0; i < lexicalisation.length; i++) { 270 for (i = 0; i < lexicalisation.length; i++) {
251 - var perms = permute(lexicalisation[i].args);  
252 - var j;  
253 - for (j = 0; j < perms.length; j++) {  
254 - result += lexicalisationForm(lexicalisation[i].lemma, cartesian(perms[j]))  
255 - }  
256 - result += '<br\>'; 271 + var perms = permute(lexicalisation[i].args);
  272 + var j;
  273 + for (j = 0; j < perms.length; j++) {
  274 + result += lexicalisationForm(lexicalisation[i].lemma, lexicalisation[i].pre, cartesian(perms[j]));
  275 + }
257 } 276 }
258 return result; 277 return result;
259 } 278 }
@@ -302,26 +321,79 @@ function cartesian(llist) { @@ -302,26 +321,79 @@ function cartesian(llist) {
302 return result; 321 return result;
303 } 322 }
304 323
305 -function lexicalisationForm(lemma, tokenised) { 324 +function lexicalisationForm(lemma, pre, tokenised) {
  325 + var list;
  326 + if (pre.length == 0) {
  327 + list = noSubjUnits(lemma, tokenised);
  328 + } else {
  329 + list = subjUnits(pre, lemma, tokenised);
  330 + }
  331 +
306 var display = ""; 332 var display = "";
307 - var i;  
308 - for (i = 0; i < tokenised.length; i++) {  
309 - if (tokenised[i].length == 0) {  
310 - display += "<br\>";  
311 - } else {  
312 - var j;  
313 - for (j = 0; j < lexical_units.length; j++) {  
314 - if (lemma + " " + tokenised[i].join(" ") == lexical_units[j].base) {  
315 - return "";  
316 - }  
317 - }  
318 - display += "<input type = \"checkbox\" name = \"mwe\" value = \"" + lemma + " " + tokenised[i].join(" ") + "\">"; // TODO: unikalne wartości, wartość => dodanie odpowiedniej jednostki (nazwa jednostki w wartości?)  
319 - display += lemma + " " + tokenised[i].join(" ") + "<br\>"; 333 + var i, j;
  334 + for (i = 0; i < list.length; i++) {
  335 + var included = false;
  336 + for (j = 0; j < lexical_units.length; j++) {
  337 + if (list[i] == lexical_units[j].base) {
  338 + included = true;
  339 + }
  340 + }
  341 + if (!included) {
  342 + display += "<input type = \"checkbox\" name = \"mwe\" value = \"" + list[i] + "\">" + list[i] + "<br\>";
320 } 343 }
321 } 344 }
322 return display; 345 return display;
323 } 346 }
324 347
  348 +function noSubjUnits(lemmata, dependants) {
  349 + var result = [];
  350 + var i, j;
  351 + for (i = 0; i < lemmata.length; i++) {
  352 + if (dependants.length == 0) {
  353 + result.push(lemmata[i]);
  354 + } else {
  355 + for (j = 0; j < dependants.length; j++) {
  356 + result.push(lemmata[i] + dependants[j].join(" "));
  357 + }
  358 + }
  359 + }
  360 + return result;
  361 +}
  362 +
  363 +function subjUnits(pre, lemmata, dependants) {
  364 + var result = [];
  365 + var i, j;
  366 + var temp = noSubjUnits(lemmata, dependants);
  367 + i = decapitate(dependants);
  368 + var pre2 = i.heads;
  369 + var temp2 = noSubjUnits(lemmata, i.bodies);
  370 + for (i = 0; i < pre.length; i++) {
  371 + for (j = 0; j < temp.length; j++) {
  372 + result.push(pre[i] + " " + temp[j]);
  373 + }
  374 + for (j = 0; j < pre2.length; j++) {
  375 + result.push(pre[i] + " " + pre2[j] + " " + temp2[j]);
  376 + }
  377 + }
  378 + return result;
  379 +}
  380 +
  381 +function decapitate(llist) {
  382 + var heads = [];
  383 + var bodies = [];
  384 + var i;
  385 + for (i = 0; i < llist.length; i++) {
  386 + if (llist[i].length > 0) {
  387 + var body = llist[i].slice();
  388 + var head = body[0];
  389 + body.splice(0, 1);
  390 + heads.push(head);
  391 + bodies.push(body);
  392 + }
  393 + }
  394 + return {heads: heads, bodies: bodies};
  395 +}
  396 +
325 397
326 // get readable form of lexical unit 398 // get readable form of lexical unit
327 function getLexicalUnit(luid) { 399 function getLexicalUnit(luid) {
semantics/static/js/semantics_schemas.js
@@ -130,10 +130,11 @@ function schemaBody(schema, alternation, lex){ @@ -130,10 +130,11 @@ function schemaBody(schema, alternation, lex){
130 for (l = 0; l < display.arguments[k].length; l++) { 130 for (l = 0; l < display.arguments[k].length; l++) {
131 schema_body += '<td id="' + display.arguments[k][l].csv_id + 'alt_' + alternation + '_" class="' + display.arguments[k][l].csv_class + 'alt_' + alternation + '_" onclick="schemaClick(\'' + display.arguments[k][l].csv_id + 'alt_' + alternation +'_\', '; 131 schema_body += '<td id="' + display.arguments[k][l].csv_id + 'alt_' + alternation + '_" class="' + display.arguments[k][l].csv_class + 'alt_' + alternation + '_" onclick="schemaClick(\'' + display.arguments[k][l].csv_id + 'alt_' + alternation +'_\', ';
132 if (display.arguments[k][l].lex.length != 0) { 132 if (display.arguments[k][l].lex.length != 0) {
133 - schema_body += '[\'' + display.arguments[k][l].lex.join('\', \'') + '\'])">'; 133 + schema_body += '[\'' + display.arguments[k][l].lex.join('\', \'') + '\']';
134 } else { 134 } else {
135 - schema_body += '[])">'; 135 + schema_body += '[]';
136 } 136 }
  137 + schema_body += ')">';
137 schema_body += display.arguments[k][l].argument; 138 schema_body += display.arguments[k][l].argument;
138 schema_body += '</td>'; 139 schema_body += '</td>';
139 if (parseInt(display.arguments[k][l].csv_id.split('_')[5]) >= 0) { 140 if (parseInt(display.arguments[k][l].csv_id.split('_')[5]) >= 0) {
semantics/views.py
@@ -450,13 +450,13 @@ def ajax_schemas(request, lemma_id): @@ -450,13 +450,13 @@ def ajax_schemas(request, lemma_id):
450 # identifier, class, argument 450 # identifier, class, argument
451 arg = [] 451 arg = []
452 #ma["ala"] = kot 452 #ma["ala"] = kot
453 - for i, c, a in zip(idents, schema_ids, row): 453 + for i, c, a, p in zip(idents, schema_ids, row, ordered_positions):
454 astr, aobj = a 454 astr, aobj = a
455 if aobj is not None and aobj.is_phraseologic(): 455 if aobj is not None and aobj.is_phraseologic():
456 - lex = lexicalisation(aobj) 456 + lex, vrb = lexicalisation(aobj, p.categories.all(), lemma.entry_obj.name)
457 else: 457 else:
458 - lex = []  
459 - arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex}) 458 + lex, vrb = ([], [])
  459 + arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex, "vrb": vrb})
460 display["arguments"].append(arg) 460 display["arguments"].append(arg)
461 461
462 schema_display["schemas"].append({"schema_id": str(schema.id), "grade": lemma.get_schema_opinion(schema), "colspan": str(max(len(schema_categories), 1)), "rowspan": str(schema_arguments_rowspan), "display": display, "phraseologic": schema.phraseologic}) 462 schema_display["schemas"].append({"schema_id": str(schema.id), "grade": lemma.get_schema_opinion(schema), "colspan": str(max(len(schema_categories), 1)), "rowspan": str(schema_arguments_rowspan), "display": display, "phraseologic": schema.phraseologic})