Commit e9aff2d820b37f92e7effea41308383631aaefde

Authored by Tomasz Bartosiak
1 parent 915b67fc

przymiotnikowe modyfikatory rzeczowników + ogólne poprawki frazeologii

semantics/phraseology_generator.py
... ... @@ -7,7 +7,7 @@ from copy import deepcopy
7 7 def lexicalisation(argument, subj, base, negativity, reference=None):
8 8 b = argument.type
9 9 if b == 'fixed':
10   - return (get_words(sortatributes(argument)[-1]), [])
  10 + return (get_words(sortatributes(argument)[-1]), [], 1)
11 11 attributes = sortatributes(argument)
12 12 lexicalisation_type = attributes[0].values.all()[0].argument.type
13 13 lexicalisation_parameters = sortatributes(attributes[0].values.all()[0].argument)
... ... @@ -16,34 +16,45 @@ def lexicalisation(argument, subj, base, negativity, reference=None):
16 16 lexicalisation_parameters = sortatributes(lexicalisation_parameters[0].values.all()[0].argument)
17 17 if lexicalisation_type == 'np': # np(case), number, nouns, atr
18 18 nps = get_nps(get_case(lexicalisation_parameters[0], subj, negativity), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
19   - return (nps, get_verb(base, get_number(attributes[1], subj), subj))
  19 + if subj:
  20 + return (nps, get_verb(base, get_number(attributes[1], subj), subj), -1)
  21 + else:
  22 + return (nps, [], 1)
20 23 elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr
21 24 prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, negativity), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
22   - return (prepnps, [])
  25 + return (prepnps, [], 1)
23 26 elif lexicalisation_type == 'adjp': # adjp(case), number, gender, degree, adjectives, atr
24 27 adjps = get_adjps(get_case(lexicalisation_parameters[0], subj, negativity, reference), get_number(attributes[1], subj, reference), get_gender(attributes[2], reference), get_degree(attributes[3]), get_words(attributes[4]), attributes[5])
25   - return (adjps, get_verb(base, get_number(attributes[1], subj), subj))
  28 + if reference is None:
  29 + if lexicalisation_parameters[0].values.all()[0].parameter.type.name == u'agr':
  30 + return (adjps, [], 0)
  31 + else:
  32 + return (adjps, [], -1)
  33 + elif subj:
  34 + return (adjps, get_verb(base, get_number(attributes[1], subj), subj), -1)
  35 + else:
  36 + return (adjps, [], 1)
26 37 elif lexicalisation_type == 'prepadjp': #prepadjp(prep, case), number, gender, degree, adjectives, atr
27 38 prepadjps = get_prepadjps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_number(attributes[1], subj, reference), get_gender(attributes[2], reference), get_degree(attributes[3]), get_words(attributes[4]), attributes[5])
28   - return (prepadjps, [])
  39 + return (prepadjps, [], 1)
29 40 elif lexicalisation_type == 'infp':
30 41 infps = get_infps(get_aspect(lexicalisation_parameters[0]), get_words(attributes[2]), attributes[4])
31   - return (infps, [])
  42 + return (infps, [], 1)
32 43 elif lexicalisation_type == 'advp': #advp(type), degree, adverb, atr
33 44 advps = get_advps(get_degree(attributes[1]), get_words(attributes[2]), attributes[3])
34   - return (advps, [base])
  45 + return (advps, [], -1)
35 46 elif lexicalisation_type == 'nump': # nump(case), num, noun, atr
36 47 numps = get_numps(get_case(lexicalisation_parameters[0], subj, negativity, reference), get_words(attributes[1]), get_words(attributes[2]), attributes[3])
37   - return (numps, get_verb(base, 'pl', subj))
  48 + return (numps, get_verb(base, 'pl', subj), -1)
38 49 elif lexicalisation_type == 'prepnump': # prepnump(prep,case), num, noun, atr
39   - numps = get_prepnumps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_words(attributes[1]), get_words(attributes[2]), attributes[3])
40   - return (numps, []) #get_verb(base, 'pl', subj))
  50 + prepnumps = get_prepnumps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_words(attributes[1]), get_words(attributes[2]), attributes[3])
  51 + return (prepnumps, [], 1)
41 52 elif lexicalisation_type == 'qub': # qub, form, atr
42 53 qubs = get_qubs(get_words(attributes[1]), attributes[2])
43   - return (qubs, [base])
  54 + return (qubs, [], -1)
44 55 else:
45   - return ([], [])
46   - return ([], [])
  56 + return ([], [], 0)
  57 + return ([], [], 0)
47 58  
48 59 def is_subj(categories):
49 60 for cat in categories:
... ... @@ -82,6 +93,8 @@ def get_case(attribute, is_subj, negativity, reference=None):
82 93 case = [tag.split(':')[1]]
83 94 else:
84 95 case = [tag.split(':')[2]]
  96 + elif case == u'agr' and reference is None:
  97 + case = [u'nom']
85 98 else:
86 99 case = [case]
87 100 return case
... ... @@ -125,6 +138,12 @@ def get_degree(attribute):
125 138 degree = u'pos'
126 139 return degree
127 140  
  141 +def in_tag(what, tag):
  142 + if u':' + what + u':' in tag or u':' + what + u'.' in tag or u'.' + what + u'.' in tag or u'.' + what + u':' in tag or tag.endswith(u'.' + what) or tag.endswith(u':' + what):
  143 + return True
  144 + else:
  145 + return False
  146 +
128 147 def get_nps(cases, number, nouns, atr):
129 148 result = []
130 149 for noun in nouns:
... ... @@ -135,7 +154,7 @@ def get_nps(cases, number, nouns, atr):
135 154 filtered = []
136 155 for option in options:
137 156 (orth, tag) = option
138   - if u':' + case in tag or u'.' + case in tag:
  157 + if in_tag(case, tag):
139 158 filtered.append(option)
140 159 options_temp += filtered
141 160 else:
... ... @@ -145,7 +164,7 @@ def get_nps(cases, number, nouns, atr):
145 164 filtered = []
146 165 for option in options:
147 166 (orth, tag) = option
148   - if u':' + number + u':' in tag:
  167 + if in_tag(number, tag):
149 168 filtered.append(option)
150 169 options = filtered
151 170 result += options
... ... @@ -168,7 +187,7 @@ def get_infps(aspect, verbs, atr):
168 187 if aspect != u'_':
169 188 for option in options:
170 189 (orth, tag) = option
171   - if u':' + aspect + u':' in tag:
  190 + if in_tag(aspect, tag):
172 191 filtered.append(option)
173 192 options = filtered
174 193 result += options
... ... @@ -190,7 +209,7 @@ def get_adjps(cases, number, gender, degree, adjectives, atr):
190 209 filtered = []
191 210 for option in options:
192 211 (orth, tag) = option
193   - if u':' + case + u':' in tag:
  212 + if in_tag(case, tag):
194 213 filtered.append(option)
195 214 options_temp += filtered
196 215 else:
... ... @@ -200,21 +219,21 @@ def get_adjps(cases, number, gender, degree, adjectives, atr):
200 219 filtered = []
201 220 for option in options:
202 221 (orth, tag) = option
203   - if u':' + number + u':' in tag:
  222 + if in_tag(number, tag):
204 223 filtered.append(option)
205 224 options = filtered
206 225 if gender != u'_':
207 226 filtered = []
208 227 for option in options:
209 228 (orth, tag) = option
210   - if u':' + gender + u':' in tag or u'.' + gender + u':' in tag or u':' + gender + u'.' in tag or u'.' + gender + u'.' in tag or u'.' + gender + u':' in tag:
  229 + if in_tag(gender, tag):
211 230 filtered.append(option)
212 231 options = filtered
213 232 if degree != u'_':
214 233 filtered = []
215 234 for option in options:
216 235 (orth, tag) = option
217   - if u':' + degree in tag:
  236 + if in_tag(degree, tag):
218 237 filtered.append(option)
219 238 options = filtered
220 239 result += options
... ... @@ -231,14 +250,14 @@ def get_advps(degree, adverbs, atr):
231 250 filtered = []
232 251 for option in options:
233 252 (orth, tag) = option
234   - if u'adv' in tag:
  253 + if tag.startswith(u'adv'):
235 254 filtered.append(option)
236 255 options = filtered
237 256 if ':' in tag and degree != u'_':
238 257 filtered = []
239 258 for option in options:
240 259 (orth, tag) = option
241   - if u':' + degree in tag:
  260 + if in_tag(degree, tag):
242 261 filtered.append(option)
243 262 options = filtered
244 263 result += options
... ... @@ -251,7 +270,7 @@ def get_qubs(qubs, atr):
251 270 filtered = []
252 271 for option in options:
253 272 (orth, tag) = option
254   - if u'qub' in tag:
  273 + if tag.startswith(u'qub'):
255 274 filtered.append(option)
256 275 options = filtered
257 276 result += options
... ... @@ -265,7 +284,7 @@ def get_numps(cases, numerals, nouns, atr):
265 284 filtered = []
266 285 for option in options:
267 286 (orth, tag) = option
268   - if u'num:' in tag:
  287 + if tag.startswith(u'num:'):
269 288 filtered.append(option)
270 289 options = filtered
271 290 options_temp = []
... ... @@ -274,7 +293,7 @@ def get_numps(cases, numerals, nouns, atr):
274 293 filtered = []
275 294 for option in options:
276 295 (orth, tag) = option
277   - if u':' + case + u':' in tag or u':' + case + u'.' in tag or u'.' + case + u'.' in tag or u'.' + case + u':' in tag:
  296 + if in_tag(case, tag):
278 297 filtered.append(option)
279 298 options_temp += filtered
280 299 else:
... ... @@ -289,7 +308,7 @@ def get_numps(cases, numerals, nouns, atr):
289 308 filtered = []
290 309 for option in options:
291 310 (orth, tag) = option
292   - if u':pl:' in tag:
  311 + if in_tag(u'pl', tag):
293 312 filtered.append(option)
294 313 options = filtered
295 314 if rec == 'rec':
... ... @@ -304,7 +323,7 @@ def get_numps(cases, numerals, nouns, atr):
304 323 filtered = []
305 324 for option in options:
306 325 (orth, tag) = option
307   - if u':' + case + u':' in tag or u':' + case + u'.' in tag or u'.' + case + u'.' in tag or u'.' + case + u':' in tag:
  326 + if in_tag(case, tag):
308 327 filtered.append(option)
309 328 options_temp += filtered
310 329 else:
... ... @@ -312,7 +331,7 @@ def get_numps(cases, numerals, nouns, atr):
312 331 options = options_temp
313 332 for (orth, tag) in options:
314 333 gender = tag.split(':')[3]
315   - if u':' + gender + u':' in num_tag or u':' + gender + u'.' in num_tag or u'.' + gender + u'.' in num_tag or u'.' + gender + u':' in num_tag:
  334 + if in_tag(gender, num_tag):
316 335 results.append(num_orth + ' ' + orth)
317 336  
318 337 return results #ignoring ambiguos atr for numps
... ... @@ -330,7 +349,7 @@ def get_verb(inf, number, is_subj):
330 349 filtered = []
331 350 for option in options:
332 351 (orth, tag) = option
333   - if u'fin:' in tag and u':' + number + u':' in tag and u':ter:' in tag:
  352 + if tag.startswith(u'fin') and in_tag(number, tag) and in_tag(u'ter', tag):
334 353 filtered.append(option)
335 354 options = filtered
336 355 return [orth for orth, _ in options]
... ... @@ -354,7 +373,7 @@ def phrase(head, dependents):
354 373 type = argument.type
355 374 elif argument.type == u'lex':
356 375 type = sortatributes(argument)[0].values.all()[0].argument.type
357   - value, _ = lexicalisation(argument, False, '', False, head)
  376 + value, _, _ = lexicalisation(argument, False, '', False, head)
358 377 values += value
359 378 if type == u'adjp':
360 379 modifiers['pre'].append(values)
... ...
semantics/static/js/semantics_lexical_units.js
... ... @@ -256,6 +256,7 @@ function getMeaningsSelectionForFrame(frame_id) {
256 256 var options = [];
257 257 var vrb = [];
258 258 var pre = [];
  259 + var both = [];
259 260 sid_alt = rows[j].split('_');
260 261 var sch = "schema_" + sid_alt[0] + "_";
261 262 var k;
... ... @@ -265,46 +266,55 @@ function getMeaningsSelectionForFrame(frame_id) {
265 266 if (connected[lem].indexOf(proper) != -1) {
266 267 if (schemas_content[sch].display.arguments[0][k].vrb != null &&
267 268 schemas_content[sch].display.arguments[0][k].vrb.length > 0) {
268   - pre = pre.concat(schemas_content[sch].display.arguments[0][k].lex);
269 269 vrb = schemas_content[sch].display.arguments[0][k].vrb;
270   - } else {
  270 + }
  271 + if (schemas_content[sch].display.arguments[0][k].loc == -1) {
  272 + pre = pre.concat(schemas_content[sch].display.arguments[0][k].lex);
  273 + }
  274 + if (schemas_content[sch].display.arguments[0][k].loc == 0) {
  275 + both.push(schemas_content[sch].display.arguments[0][k].lex);
  276 + }
  277 + if (schemas_content[sch].display.arguments[0][k].loc == 1) {
271 278 options.push(schemas_content[sch].display.arguments[0][k].lex);
272 279 }
273 280 }
274 281 }
275 282 }
  283 + var lemma;
276 284 if (vrb.length == 0) {
277   - var lex = {lemma: [base], pre: pre, args: options};
  285 + lemma = [base];
278 286 if (hasRefl(sch)) {
279 287 if (isNeg(sch)) {
280   - lex.lemma = ["nie " + base + " się"];
  288 + lemma = ["nie " + base + " się"];
281 289 } else {
282   - lex.lemma = [base + " się"];
  290 + lemma = [base + " się"];
283 291 }
284 292 } else {
285 293 if (isNeg(sch)) {
286   - lex.lemma = ["nie " + base];
  294 + lemma = ["nie " + base];
287 295 } else {
288   - lex.lemma = [base];
  296 + lemma = [base];
289 297 }
290 298 }
291   - lexicalisation.push(lex);
292 299 } else {
293   - var lex = {lemma: vrb, pre: pre, args: options};
  300 + lemma = vrb;
294 301 if (hasRefl(sch)) {
295 302 var l = [];
296 303 var k;
297 304 for (k=0; k < vrb.length; k++) {
298 305 l.push(vrb[k] + " się");
299 306 }
300   - lex.lemma = l;
  307 + lemma = l;
301 308 }
302   - lexicalisation.push(lex);
303 309 }
  310 +
  311 + var lexes = get_lexes(lemma, pre, both, options);
  312 + lexicalisation = lexicalisation.concat(lexes);
304 313 }
305 314 }
306 315  
307 316 display += getFormForLexicalisation(lexicalisation);
  317 + console.log(lexicalisation);
308 318  
309 319 display += "</div>";
310 320  
... ... @@ -347,6 +357,25 @@ function permute(list) {
347 357  
348 358 }
349 359  
  360 +function get_lexes(lemma, pre, both, post) {
  361 + var i;
  362 + var struct = [{lemma:lemma, pre:pre, args:post}];
  363 + for (i = 0; i < both.length; i++) {
  364 + temp_struct = [];
  365 + var j;
  366 + for (j = 0; j < struct.length; j++) {
  367 + var pre_lex = JSON.parse(JSON.stringify(struct[j]));
  368 + pre_lex.pre = pre_lex.pre.concat(both[i]);
  369 + temp_struct.push(pre_lex);
  370 + var post_lex = JSON.parse(JSON.stringify(struct[j]));
  371 + post_lex.args.push(both[i]);
  372 + temp_struct.push(post_lex);
  373 + }
  374 + struct = temp_struct;
  375 + }
  376 + return struct;
  377 +}
  378 +
350 379 function cartesian(llist) {
351 380 if (llist.length == 0) {
352 381 return [[]];
... ...
semantics/views.py
... ... @@ -518,10 +518,10 @@ def ajax_schemas(request, lemma_id):
518 518 astr, aobj = a
519 519 if aobj is not None and aobj.is_phraseologic():
520 520 tmp = lexicalisation(aobj, is_subj(p.categories.all()), lemma.entry_obj.name, ('neg' in characteristics[characteristic_id]))
521   - lex, vrb = tmp
  521 + lex, vrb, loc = tmp
522 522 else:
523   - lex, vrb = ([], [])
524   - arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex, "vrb": vrb})
  523 + lex, vrb, loc = ([], [], 0)
  524 + arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex, "vrb": vrb, "loc": loc})
525 525 display["arguments"].append(arg)
526 526  
527 527 schema_display["schemas"].append({"schema_id": str(schema.id), "grade": lemma.get_schema_opinion(schema), "colspan": str(max(len(schema_categories), 1)), "rowspan": str(schema_arguments_rowspan), "display": display, "phraseologic": schema.phraseologic})
... ...