From e9aff2d820b37f92e7effea41308383631aaefde Mon Sep 17 00:00:00 2001 From: Tomasz Bartosiak <tomasz.bartosiak@gmail.com> Date: Wed, 22 Aug 2018 15:33:44 +0200 Subject: [PATCH] przymiotnikowe modyfikatory rzeczowników + ogólne poprawki frazeologii --- semantics/phraseology_generator.py | 79 +++++++++++++++++++++++++++++++++++++++++++++++++------------------------------ semantics/static/js/semantics_lexical_units.js | 51 ++++++++++++++++++++++++++++++++++++++++----------- semantics/views.py | 6 +++--- 3 files changed, 92 insertions(+), 44 deletions(-) diff --git a/semantics/phraseology_generator.py b/semantics/phraseology_generator.py index 850e981..9874942 100644 --- a/semantics/phraseology_generator.py +++ b/semantics/phraseology_generator.py @@ -7,7 +7,7 @@ from copy import deepcopy def lexicalisation(argument, subj, base, negativity, reference=None): b = argument.type if b == 'fixed': - return (get_words(sortatributes(argument)[-1]), []) + return (get_words(sortatributes(argument)[-1]), [], 1) attributes = sortatributes(argument) lexicalisation_type = attributes[0].values.all()[0].argument.type lexicalisation_parameters = sortatributes(attributes[0].values.all()[0].argument) @@ -16,34 +16,45 @@ def lexicalisation(argument, subj, base, negativity, reference=None): lexicalisation_parameters = sortatributes(lexicalisation_parameters[0].values.all()[0].argument) if lexicalisation_type == 'np': # np(case), number, nouns, atr nps = get_nps(get_case(lexicalisation_parameters[0], subj, negativity), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3]) - return (nps, get_verb(base, get_number(attributes[1], subj), subj)) + if subj: + return (nps, get_verb(base, get_number(attributes[1], subj), subj), -1) + else: + return (nps, [], 1) elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, negativity), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3]) - return (prepnps, []) + return (prepnps, [], 1) elif lexicalisation_type == 'adjp': # adjp(case), number, gender, degree, adjectives, atr adjps = get_adjps(get_case(lexicalisation_parameters[0], subj, negativity, reference), get_number(attributes[1], subj, reference), get_gender(attributes[2], reference), get_degree(attributes[3]), get_words(attributes[4]), attributes[5]) - return (adjps, get_verb(base, get_number(attributes[1], subj), subj)) + if reference is None: + if lexicalisation_parameters[0].values.all()[0].parameter.type.name == u'agr': + return (adjps, [], 0) + else: + return (adjps, [], -1) + elif subj: + return (adjps, get_verb(base, get_number(attributes[1], subj), subj), -1) + else: + return (adjps, [], 1) elif lexicalisation_type == 'prepadjp': #prepadjp(prep, case), number, gender, degree, adjectives, atr prepadjps = get_prepadjps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_number(attributes[1], subj, reference), get_gender(attributes[2], reference), get_degree(attributes[3]), get_words(attributes[4]), attributes[5]) - return (prepadjps, []) + return (prepadjps, [], 1) elif lexicalisation_type == 'infp': infps = get_infps(get_aspect(lexicalisation_parameters[0]), get_words(attributes[2]), attributes[4]) - return (infps, []) + return (infps, [], 1) elif lexicalisation_type == 'advp': #advp(type), degree, adverb, atr advps = get_advps(get_degree(attributes[1]), get_words(attributes[2]), attributes[3]) - return (advps, [base]) + return (advps, [], -1) elif lexicalisation_type == 'nump': # nump(case), num, noun, atr numps = get_numps(get_case(lexicalisation_parameters[0], subj, negativity, reference), get_words(attributes[1]), get_words(attributes[2]), attributes[3]) - return (numps, get_verb(base, 'pl', subj)) + return (numps, get_verb(base, 'pl', subj), -1) elif lexicalisation_type == 'prepnump': # prepnump(prep,case), num, noun, atr - numps = get_prepnumps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_words(attributes[1]), get_words(attributes[2]), attributes[3]) - return (numps, []) #get_verb(base, 'pl', subj)) + prepnumps = get_prepnumps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_words(attributes[1]), get_words(attributes[2]), attributes[3]) + return (prepnumps, [], 1) elif lexicalisation_type == 'qub': # qub, form, atr qubs = get_qubs(get_words(attributes[1]), attributes[2]) - return (qubs, [base]) + return (qubs, [], -1) else: - return ([], []) - return ([], []) + return ([], [], 0) + return ([], [], 0) def is_subj(categories): for cat in categories: @@ -82,6 +93,8 @@ def get_case(attribute, is_subj, negativity, reference=None): case = [tag.split(':')[1]] else: case = [tag.split(':')[2]] + elif case == u'agr' and reference is None: + case = [u'nom'] else: case = [case] return case @@ -125,6 +138,12 @@ def get_degree(attribute): degree = u'pos' return degree +def in_tag(what, tag): + if u':' + what + u':' in tag or u':' + what + u'.' in tag or u'.' + what + u'.' in tag or u'.' + what + u':' in tag or tag.endswith(u'.' + what) or tag.endswith(u':' + what): + return True + else: + return False + def get_nps(cases, number, nouns, atr): result = [] for noun in nouns: @@ -135,7 +154,7 @@ def get_nps(cases, number, nouns, atr): filtered = [] for option in options: (orth, tag) = option - if u':' + case in tag or u'.' + case in tag: + if in_tag(case, tag): filtered.append(option) options_temp += filtered else: @@ -145,7 +164,7 @@ def get_nps(cases, number, nouns, atr): filtered = [] for option in options: (orth, tag) = option - if u':' + number + u':' in tag: + if in_tag(number, tag): filtered.append(option) options = filtered result += options @@ -168,7 +187,7 @@ def get_infps(aspect, verbs, atr): if aspect != u'_': for option in options: (orth, tag) = option - if u':' + aspect + u':' in tag: + if in_tag(aspect, tag): filtered.append(option) options = filtered result += options @@ -190,7 +209,7 @@ def get_adjps(cases, number, gender, degree, adjectives, atr): filtered = [] for option in options: (orth, tag) = option - if u':' + case + u':' in tag: + if in_tag(case, tag): filtered.append(option) options_temp += filtered else: @@ -200,21 +219,21 @@ def get_adjps(cases, number, gender, degree, adjectives, atr): filtered = [] for option in options: (orth, tag) = option - if u':' + number + u':' in tag: + if in_tag(number, tag): filtered.append(option) options = filtered if gender != u'_': filtered = [] for option in options: (orth, tag) = option - if u':' + gender + u':' in tag or u'.' + gender + u':' in tag or u':' + gender + u'.' in tag or u'.' + gender + u'.' in tag or u'.' + gender + u':' in tag: + if in_tag(gender, tag): filtered.append(option) options = filtered if degree != u'_': filtered = [] for option in options: (orth, tag) = option - if u':' + degree in tag: + if in_tag(degree, tag): filtered.append(option) options = filtered result += options @@ -231,14 +250,14 @@ def get_advps(degree, adverbs, atr): filtered = [] for option in options: (orth, tag) = option - if u'adv' in tag: + if tag.startswith(u'adv'): filtered.append(option) options = filtered if ':' in tag and degree != u'_': filtered = [] for option in options: (orth, tag) = option - if u':' + degree in tag: + if in_tag(degree, tag): filtered.append(option) options = filtered result += options @@ -251,7 +270,7 @@ def get_qubs(qubs, atr): filtered = [] for option in options: (orth, tag) = option - if u'qub' in tag: + if tag.startswith(u'qub'): filtered.append(option) options = filtered result += options @@ -265,7 +284,7 @@ def get_numps(cases, numerals, nouns, atr): filtered = [] for option in options: (orth, tag) = option - if u'num:' in tag: + if tag.startswith(u'num:'): filtered.append(option) options = filtered options_temp = [] @@ -274,7 +293,7 @@ def get_numps(cases, numerals, nouns, atr): filtered = [] for option in options: (orth, tag) = option - if u':' + case + u':' in tag or u':' + case + u'.' in tag or u'.' + case + u'.' in tag or u'.' + case + u':' in tag: + if in_tag(case, tag): filtered.append(option) options_temp += filtered else: @@ -289,7 +308,7 @@ def get_numps(cases, numerals, nouns, atr): filtered = [] for option in options: (orth, tag) = option - if u':pl:' in tag: + if in_tag(u'pl', tag): filtered.append(option) options = filtered if rec == 'rec': @@ -304,7 +323,7 @@ def get_numps(cases, numerals, nouns, atr): filtered = [] for option in options: (orth, tag) = option - if u':' + case + u':' in tag or u':' + case + u'.' in tag or u'.' + case + u'.' in tag or u'.' + case + u':' in tag: + if in_tag(case, tag): filtered.append(option) options_temp += filtered else: @@ -312,7 +331,7 @@ def get_numps(cases, numerals, nouns, atr): options = options_temp for (orth, tag) in options: gender = tag.split(':')[3] - if u':' + gender + u':' in num_tag or u':' + gender + u'.' in num_tag or u'.' + gender + u'.' in num_tag or u'.' + gender + u':' in num_tag: + if in_tag(gender, num_tag): results.append(num_orth + ' ' + orth) return results #ignoring ambiguos atr for numps @@ -330,7 +349,7 @@ def get_verb(inf, number, is_subj): filtered = [] for option in options: (orth, tag) = option - if u'fin:' in tag and u':' + number + u':' in tag and u':ter:' in tag: + if tag.startswith(u'fin') and in_tag(number, tag) and in_tag(u'ter', tag): filtered.append(option) options = filtered return [orth for orth, _ in options] @@ -354,7 +373,7 @@ def phrase(head, dependents): type = argument.type elif argument.type == u'lex': type = sortatributes(argument)[0].values.all()[0].argument.type - value, _ = lexicalisation(argument, False, '', False, head) + value, _, _ = lexicalisation(argument, False, '', False, head) values += value if type == u'adjp': modifiers['pre'].append(values) diff --git a/semantics/static/js/semantics_lexical_units.js b/semantics/static/js/semantics_lexical_units.js index b5ff517..fbb53fb 100644 --- a/semantics/static/js/semantics_lexical_units.js +++ b/semantics/static/js/semantics_lexical_units.js @@ -256,6 +256,7 @@ function getMeaningsSelectionForFrame(frame_id) { var options = []; var vrb = []; var pre = []; + var both = []; sid_alt = rows[j].split('_'); var sch = "schema_" + sid_alt[0] + "_"; var k; @@ -265,46 +266,55 @@ function getMeaningsSelectionForFrame(frame_id) { if (connected[lem].indexOf(proper) != -1) { if (schemas_content[sch].display.arguments[0][k].vrb != null && schemas_content[sch].display.arguments[0][k].vrb.length > 0) { - pre = pre.concat(schemas_content[sch].display.arguments[0][k].lex); vrb = schemas_content[sch].display.arguments[0][k].vrb; - } else { + } + if (schemas_content[sch].display.arguments[0][k].loc == -1) { + pre = pre.concat(schemas_content[sch].display.arguments[0][k].lex); + } + if (schemas_content[sch].display.arguments[0][k].loc == 0) { + both.push(schemas_content[sch].display.arguments[0][k].lex); + } + if (schemas_content[sch].display.arguments[0][k].loc == 1) { options.push(schemas_content[sch].display.arguments[0][k].lex); } } } } + var lemma; if (vrb.length == 0) { - var lex = {lemma: [base], pre: pre, args: options}; + lemma = [base]; if (hasRefl(sch)) { if (isNeg(sch)) { - lex.lemma = ["nie " + base + " się"]; + lemma = ["nie " + base + " się"]; } else { - lex.lemma = [base + " się"]; + lemma = [base + " się"]; } } else { if (isNeg(sch)) { - lex.lemma = ["nie " + base]; + lemma = ["nie " + base]; } else { - lex.lemma = [base]; + lemma = [base]; } } - lexicalisation.push(lex); } else { - var lex = {lemma: vrb, pre: pre, args: options}; + lemma = vrb; if (hasRefl(sch)) { var l = []; var k; for (k=0; k < vrb.length; k++) { l.push(vrb[k] + " się"); } - lex.lemma = l; + lemma = l; } - lexicalisation.push(lex); } + + var lexes = get_lexes(lemma, pre, both, options); + lexicalisation = lexicalisation.concat(lexes); } } display += getFormForLexicalisation(lexicalisation); + console.log(lexicalisation); display += "</div>"; @@ -347,6 +357,25 @@ function permute(list) { } +function get_lexes(lemma, pre, both, post) { + var i; + var struct = [{lemma:lemma, pre:pre, args:post}]; + for (i = 0; i < both.length; i++) { + temp_struct = []; + var j; + for (j = 0; j < struct.length; j++) { + var pre_lex = JSON.parse(JSON.stringify(struct[j])); + pre_lex.pre = pre_lex.pre.concat(both[i]); + temp_struct.push(pre_lex); + var post_lex = JSON.parse(JSON.stringify(struct[j])); + post_lex.args.push(both[i]); + temp_struct.push(post_lex); + } + struct = temp_struct; + } + return struct; +} + function cartesian(llist) { if (llist.length == 0) { return [[]]; diff --git a/semantics/views.py b/semantics/views.py index 524f5c8..8ce167f 100644 --- a/semantics/views.py +++ b/semantics/views.py @@ -518,10 +518,10 @@ def ajax_schemas(request, lemma_id): astr, aobj = a if aobj is not None and aobj.is_phraseologic(): tmp = lexicalisation(aobj, is_subj(p.categories.all()), lemma.entry_obj.name, ('neg' in characteristics[characteristic_id])) - lex, vrb = tmp + lex, vrb, loc = tmp else: - lex, vrb = ([], []) - arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex, "vrb": vrb}) + lex, vrb, loc = ([], [], 0) + arg.append({"csv_id": i, "csv_class": c, "argument": astr, "lex": lex, "vrb": vrb, "loc": loc}) display["arguments"].append(arg) schema_display["schemas"].append({"schema_id": str(schema.id), "grade": lemma.get_schema_opinion(schema), "colspan": str(max(len(schema_categories), 1)), "rowspan": str(schema_arguments_rowspan), "display": display, "phraseologic": schema.phraseologic}) -- libgit2 0.22.2