Commit d7dfbb4712ac5afd99133c5dbd31be6bbeebd04d
1 parent
5de7aa01
lematyzacja dla adjp, prepadjp
Showing
1 changed file
with
75 additions
and
4 deletions
semantics/phraseology_generator.py
| ... | ... | @@ -20,6 +20,12 @@ def lexicalisation(argument, categories, base): |
| 20 | 20 | elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr |
| 21 | 21 | prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3]) |
| 22 | 22 | return (prepnps, []) |
| 23 | + elif lexicalisation_type == 'adjp': # adjp(case), number, gender, degree, adjectives, atr | |
| 24 | + adjps = get_adjps(get_case(lexicalisation_parameters[0], subj), get_number(attributes[1], subj), get_gender(attributes[2]), get_degree(attributes[3]), get_words(attributes[4]), attributes[5]) | |
| 25 | + return (adjps, get_verb(base, get_number(attributes[1], subj), subj)) | |
| 26 | + elif lexicalisation_type == 'prepadjp': #prepadjp(prep, case), number, gender, degree, adjectives, atr | |
| 27 | + prepadjps = get_prepadjps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj), get_number(attributes[1], subj), get_gender(attributes[2]), get_degree(attributes[3]), get_words(attributes[4]), attributes[5]) | |
| 28 | + return (prepadjps, []) | |
| 23 | 29 | else: |
| 24 | 30 | return ([], []) |
| 25 | 31 | return ([], []) |
| ... | ... | @@ -33,6 +39,9 @@ def is_subj(categories): |
| 33 | 39 | def get_preposition(attribute): |
| 34 | 40 | return attribute.values.all()[0].parameter.type.name |
| 35 | 41 | |
| 42 | +def get_numerals(attribute): | |
| 43 | + return get_words(attribute) | |
| 44 | + | |
| 36 | 45 | def get_words(attribute): |
| 37 | 46 | words = [word.text[1:-1] for word in attribute.values.all()] |
| 38 | 47 | return words |
| ... | ... | @@ -53,6 +62,22 @@ def get_number(attribute, is_subj): |
| 53 | 62 | number = u'sg' |
| 54 | 63 | return number |
| 55 | 64 | |
| 65 | +def get_gender(attribute): | |
| 66 | + gender = attribute.values.all()[0].parameter.type.name | |
| 67 | + if gender == u'_': | |
| 68 | + gender = u'n' | |
| 69 | + if gender == u'n': | |
| 70 | + gender = u'n1' | |
| 71 | + if gender == u'm': | |
| 72 | + gender = u'm1' | |
| 73 | + return gender | |
| 74 | + | |
| 75 | +def get_degree(attribute): | |
| 76 | + degree = attribute.values.all()[0].parameter.type.name | |
| 77 | + if degree == u'_': | |
| 78 | + degree = u'pos' | |
| 79 | + return degree | |
| 80 | + | |
| 56 | 81 | def get_nps(case, number, nouns, _atr): |
| 57 | 82 | result = [] |
| 58 | 83 | for noun in nouns: |
| ... | ... | @@ -61,23 +86,69 @@ def get_nps(case, number, nouns, _atr): |
| 61 | 86 | filtered = [] |
| 62 | 87 | for option in options: |
| 63 | 88 | (orth, tag) = option |
| 64 | - if case in tag: | |
| 89 | + if u':' + case + u':' in tag: | |
| 65 | 90 | filtered.append(option) |
| 66 | 91 | options = filtered |
| 67 | 92 | if number != u'_': |
| 68 | 93 | filtered = [] |
| 69 | 94 | for option in options: |
| 70 | 95 | (orth, tag) = option |
| 71 | - if number in tag: | |
| 96 | + if u':' + number + u':' in tag: | |
| 72 | 97 | filtered.append(option) |
| 73 | 98 | options = filtered |
| 74 | - return [orth for orth, _ in options] | |
| 99 | + result += options | |
| 100 | + return [orth for orth, _ in result] | |
| 75 | 101 | |
| 76 | 102 | def get_prepnps(prep, case, number, nouns, _atr): |
| 77 | 103 | # ala["ma"] = kot |
| 78 | 104 | nps = get_nps(case, number, nouns, _atr) |
| 79 | 105 | return [prep + ' ' + np for np in nps] |
| 80 | 106 | |
| 107 | +def get_adjps(case, number, gender, degree, adjectives, _atr): | |
| 108 | + result = [] | |
| 109 | + for adjective in adjectives: | |
| 110 | + options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(adjective.encode('utf8'))] | |
| 111 | + filtered = [] | |
| 112 | + for option in options: | |
| 113 | + (orth, tag) = option | |
| 114 | + if u'adj:' in tag: | |
| 115 | + filtered.append(option) | |
| 116 | + options = filtered | |
| 117 | + if case != u'_': | |
| 118 | + filtered = [] | |
| 119 | + for option in options: | |
| 120 | + (orth, tag) = option | |
| 121 | + if u':' + case + u':' in tag: | |
| 122 | + filtered.append(option) | |
| 123 | + options = filtered | |
| 124 | + if number != u'_': | |
| 125 | + filtered = [] | |
| 126 | + for option in options: | |
| 127 | + (orth, tag) = option | |
| 128 | + if u':' + number + u':' in tag: | |
| 129 | + filtered.append(option) | |
| 130 | + options = filtered | |
| 131 | + if gender != u'_': | |
| 132 | + filtered = [] | |
| 133 | + for option in options: | |
| 134 | + (orth, tag) = option | |
| 135 | + if u':' + gender + u':' in tag or u'.' + gender + u':' in tag or u':' + gender + u'.' in tag or u'.' + gender + u'.' in tag: | |
| 136 | + filtered.append(option) | |
| 137 | + options = filtered | |
| 138 | + if degree != u'_': | |
| 139 | + filtered = [] | |
| 140 | + for option in options: | |
| 141 | + (orth, tag) = option | |
| 142 | + if u':' + degree in tag: | |
| 143 | + filtered.append(option) | |
| 144 | + options = filtered | |
| 145 | + result += options | |
| 146 | + return [orth for orth, _ in result] | |
| 147 | + | |
| 148 | +def get_prepadjps(prep, case, number, gender, degree, adjectives, _atr): | |
| 149 | + adjps = get_adjps(case, number, gender, degree, adjectives, _atr) | |
| 150 | + return [prep + ' ' + adjp for adjp in adjps] | |
| 151 | + | |
| 81 | 152 | def get_verb(inf, number, is_subj): |
| 82 | 153 | if not is_subj: |
| 83 | 154 | return None |
| ... | ... | @@ -86,7 +157,7 @@ def get_verb(inf, number, is_subj): |
| 86 | 157 | filtered = [] |
| 87 | 158 | for option in options: |
| 88 | 159 | (orth, tag) = option |
| 89 | - if u'fin' in tag and u'sg' in tag and u'ter' in tag: | |
| 160 | + if u'fin:' in tag and u':sg:' in tag and u':ter:' in tag: | |
| 90 | 161 | filtered.append(option) |
| 91 | 162 | options = filtered |
| 92 | 163 | return [orth for orth, _ in options] |
| ... | ... |