Commit 1e5542913a5001225fd08dba7f8a8034b396fd2b

Authored by Tomasz Bartosiak
1 parent f8298e06

generator frazeologii - podmioty; przygotowanie do synchronizacji ze Słowosiecią

semantics/phraseology_generator.py
... ... @@ -3,7 +3,8 @@
3 3 from dictionary.models import sort_arguments, sort_positions, sortatributes
4 4 from settings import MORFEUSZ2
5 5  
6   -def lexicalisation(argument):
  6 +def lexicalisation(argument, categories, base):
  7 + subj = is_subj(categories)
7 8 b = argument.type
8 9 if b == 'fixed':
9 10 return get_words(sortatributes(argument)[-1])
... ... @@ -14,15 +15,21 @@ def lexicalisation(argument):
14 15 lexicalisation_type = lexicalisation_parameters[0].values.all()[0].argument.type
15 16 lexicalisation_parameters = sortatributes(lexicalisation_parameters[0].values.all()[0].argument)
16 17 if lexicalisation_type == 'np': # np(case), number, nouns, atr
17   - nps = get_nps(get_case(lexicalisation_parameters[0]), get_number(attributes[1]), get_words(attributes[2]), attributes[3])
18   - return nps
  18 + nps = get_nps(get_case(lexicalisation_parameters[0], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
  19 + return (nps, get_verb(base, get_number(attributes[1], subj), subj))
19 20 elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr
20   - prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1]), get_number(attributes[1]), get_words(attributes[2]), attributes[3])
21   - return prepnps
  21 + prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
  22 + return (prepnps, [])
22 23 else:
23 24 return []
24 25 return []
25 26  
  27 +def is_subj(categories):
  28 + for cat in categories:
  29 + if cat.category == u'subj':
  30 + return True
  31 + return False
  32 +
26 33 def get_preposition(attribute):
27 34 return attribute.values.all()[0].parameter.type.name
28 35  
... ... @@ -30,14 +37,20 @@ def get_words(attribute):
30 37 words = [word.text[1:-1] for word in attribute.values.all()]
31 38 return words
32 39  
33   -def get_case(attribute):
  40 +def get_case(attribute, is_subj):
34 41 case = attribute.values.all()[0].parameter.type.name
35 42 if case == u'str':
36   - case = u'acc'
  43 + if is_subj:
  44 + case = u'nom'
  45 + else:
  46 + case = u'acc'
37 47 return case
38 48  
39   -def get_number(attribute):
  49 +def get_number(attribute, is_subj):
40 50 number = attribute.values.all()[0].parameter.type.name
  51 + if number == u'_':
  52 + if is_subj:
  53 + number = u'sg'
41 54 return number
42 55  
43 56 def get_nps(case, number, nouns, _atr):
... ... @@ -65,3 +78,15 @@ def get_prepnps(prep, case, number, nouns, _atr):
65 78 nps = get_nps(case, number, nouns, _atr)
66 79 return [prep + ' ' + np for np in nps]
67 80  
  81 +def get_verb(inf, number, is_subj):
  82 + if not is_subj:
  83 + return None
  84 + else:
  85 + options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(inf.encode('utf8'))]
  86 + filtered = []
  87 + for option in options:
  88 + (orth, tag) = option
  89 + if u'fin' in tag and u'sg' in tag and u'ter' in tag:
  90 + filtered.append(option)
  91 + options = filtered
  92 + return [orth for orth, _ in options]
... ...
semantics/static/js/semantics_lexical_units.js
... ... @@ -220,20 +220,40 @@ function getMeaningsSelectionForFrame(frame_id) {
220 220 var j;
221 221 for (j = 0; j < rows.length; j++) {
222 222 var options = [];
  223 + var vrb = [];
  224 + var pre = [];
223 225 sid_alt = rows[j].split('_');
224 226 var sch = "schema_" + sid_alt[0] + "_";
225 227 var k;
226 228 for (k = 0; k < schemas_content[sch].display.arguments[0].length; k++) {
227 229 var proper = schemas_content[sch].display.arguments[0][k].csv_id + "alt_" + sid_alt[1] + "_";
228 230 if (connected[lem].indexOf(proper) != -1) {
229   - options.push(schemas_content[sch].display.arguments[0][k].lex);
  231 + if (schemas_content[sch].display.arguments[0][k].vrb.length > 0) {
  232 + pre.push(schemas_content[sch].display.arguments[0][k].lex);
  233 + vrb = schemas_content[sch].display.arguments[0][k].vrb;
  234 + } else {
  235 + options.push(schemas_content[sch].display.arguments[0][k].lex);
  236 + }
230 237 }
231 238 }
232   - var lex = {lemma: base, args: options};
233   - if (hasRefl(sch)) {
234   - lex.lemma = base + " się";
235   - }
236   - lexicalisation.push(lex);
  239 + if (vrb.length == 0) {
  240 + var lex = {lemma: [base], pre: pre, args: options};
  241 + if (hasRefl(sch)) {
  242 + lex.lemma = [base + " się"];
  243 + }
  244 + lexicalisation.push(lex);
  245 + } else {
  246 + var lex = {lemma: vrb, pre: pre, args: options};
  247 + if (hasRefl(sch)) {
  248 + var l = [];
  249 + var k;
  250 + for (k=0; k < vrb.length; k++) {
  251 + l.push(vrb[k] + " się");
  252 + }
  253 + lex.lemma = l;
  254 + }
  255 + lexicalisation.push(lex);
  256 + }
237 257 }
238 258 }
239 259  
... ... @@ -248,12 +268,11 @@ function getFormForLexicalisation(lexicalisation) {
248 268 var result = "";
249 269 var i;
250 270 for (i = 0; i < lexicalisation.length; i++) {
251   - var perms = permute(lexicalisation[i].args);
252   - var j;
253   - for (j = 0; j < perms.length; j++) {
254   - result += lexicalisationForm(lexicalisation[i].lemma, cartesian(perms[j]))
255   - }
256   - result += '<br\>';
  271 + var perms = permute(lexicalisation[i].args);
  272 + var j;
  273 + for (j = 0; j < perms.length; j++) {
  274 + result += lexicalisationForm(lexicalisation[i].lemma, lexicalisation[i].pre, cartesian(perms[j]));
  275 + }
257 276 }
258 277 return result;
259 278 }
... ... @@ -302,26 +321,79 @@ function cartesian(llist) {
302 321 return result;
303 322 }
304 323  
305   -function lexicalisationForm(lemma, tokenised) {
  324 +function lexicalisationForm(lemma, pre, tokenised) {
  325 + var list;
  326 + if (pre.length == 0) {
  327 + list = noSubjUnits(lemma, tokenised);
  328 + } else {
  329 + list = subjUnits(pre, lemma, tokenised);
  330 + }
  331 +
306 332 var display = "";
307   - var i;
308   - for (i = 0; i < tokenised.length; i++) {
309   - if (tokenised[i].length == 0) {
310   - display += "<br\>";
311   - } else {
312   - var j;
313   - for (j = 0; j < lexical_units.length; j++) {
314   - if (lemma + " " + tokenised[i].join(" ") == lexical_units[j].base) {
315   - return "";
316   - }
317   - }
318   - display += "<input type = \"checkbox\" name = \"mwe\" value = \"" + lemma + " " + tokenised[i].join(" ") + "\">"; // TODO: unikalne wartości, wartość => dodanie odpowiedniej jednostki (nazwa jednostki w wartości?)
319   - display += lemma + " " + tokenised[i].join(" ") + "<br\>";
  333 + var i, j;
  334 + for (i = 0; i < list.length; i++) {
  335 + var included = false;
  336 + for (j = 0; j < lexical_units.length; j++) {
  337 + if (list[i] == lexical_units[j].base) {
  338 + included = true;
  339 + }
  340 + }
  341 + if (!included) {
  342 + display += "<input type = \"checkbox\" name = \"mwe\" value = \"" + list[i] + "\">" + list[i] + "<br\>";
320 343 }
321 344 }
322 345 return display;
323 346 }
324 347  
  348 +function noSubjUnits(lemmata, dependants) {
  349 + var result = [];
  350 + var i, j;
  351 + for (i = 0; i < lemmata.length; i++) {
  352 + if (dependants.length == 0) {
  353 + result.push(lemmata[i]);
  354 + } else {
  355 + for (j = 0; j < dependants.length; j++) {
  356 + result.push(lemmata[i] + dependants[j].join(" "));
  357 + }
  358 + }
  359 + }
  360 + return result;
  361 +}
  362 +
  363 +function subjUnits(pre, lemmata, dependants) {
  364 + var result = [];
  365 + var i, j;
  366 + var temp = noSubjUnits(lemmata, dependants);
  367 + i = decapitate(dependants);
  368 + var pre2 = i.heads;
  369 + var temp2 = noSubjUnits(lemmata, i.bodies);
  370 + for (i = 0; i < pre.length; i++) {
  371 + for (j = 0; j < temp.length; j++) {
  372 + result.push(pre[i] + " " + temp[j]);
  373 + }
  374 + for (j = 0; j < pre2.length; j++) {
  375 + result.push(pre[i] + " " + pre2[j] + " " + temp2[j]);
  376 + }
  377 + }
  378 + return result;
  379 +}
  380 +
  381 +function decapitate(llist) {
  382 + var heads = [];
  383 + var bodies = [];
  384 + var i;
  385 + for (i = 0; i < llist.length; i++) {
  386 + if (llist[i].length > 0) {
  387 + var body = llist[i].slice();
  388 + var head = body[0];
  389 + body.splice(0, 1);
  390 + heads.push(head);
  391 + bodies.push(body);
  392 + }
  393 + }
  394 + return {heads: heads, bodies: bodies};
  395 +}
  396 +
325 397  
326 398 // get readable form of lexical unit
327 399 function getLexicalUnit(luid) {
... ...
semantics/static/js/semantics_schemas.js
... ... @@ -130,10 +130,11 @@ function schemaBody(schema, alternation, lex){
130 130 for (l = 0; l < display.arguments[k].length; l++) {
131 131 schema_body += '<td id="' + display.arguments[k][l].csv_id + 'alt_' + alternation + '_" class="' + display.arguments[k][l].csv_class + 'alt_' + alternation + '_" onclick="schemaClick(\'' + display.arguments[k][l].csv_id + 'alt_' + alternation +'_\', ';
132 132 if (display.arguments[k][l].lex.length != 0) {
133   - schema_body += '[\'' + display.arguments[k][l].lex.join('\', \'') + '\'])">';
  133 + schema_body += '[\'' + display.arguments[k][l].lex.join('\', \'') + '\']';
134 134 } else {
135   - schema_body += '[])">';
  135 + schema_body += '[]';
136 136 }
  137 + schema_body += ')">';
137 138 schema_body += display.arguments[k][l].argument;
138 139 schema_body += '</td>';
139 140 if (parseInt(display.arguments[k][l].csv_id.split('_')[5]) >= 0) {
... ...
semantics/views.py
... ... @@ -450,13 +450,13 @@ def ajax_schemas(request, lemma_id):
450 450 # identifier, class, argument
451 451 arg = []
452 452 #ma["ala"] = kot
453   - for i, c, a in zip(idents, schema_ids, row):
  453 + for i, c, a, p in zip(idents, schema_ids, row, ordered_positions):
454 454 astr, aobj = a
455 455 if aobj is not None and aobj.is_phraseologic():
456   - lex = lexicalisation(aobj)
  456 + lex, vrb = lexicalisation(aobj, p.categories.all(), lemma.entry_obj.name)
457 457 else:
458   - lex = []
459   - arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex})
  458 + lex, vrb = ([], [])
  459 + arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex, "vrb": vrb})
460 460 display["arguments"].append(arg)
461 461  
462 462 schema_display["schemas"].append({"schema_id": str(schema.id), "grade": lemma.get_schema_opinion(schema), "colspan": str(max(len(schema_categories), 1)), "rowspan": str(schema_arguments_rowspan), "display": display, "phraseologic": schema.phraseologic})
... ...