Commit d7dfbb4712ac5afd99133c5dbd31be6bbeebd04d
1 parent
5de7aa01
lematyzacja dla adjp, prepadjp
Showing
1 changed file
with
75 additions
and
4 deletions
semantics/phraseology_generator.py
... | ... | @@ -20,6 +20,12 @@ def lexicalisation(argument, categories, base): |
20 | 20 | elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr |
21 | 21 | prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3]) |
22 | 22 | return (prepnps, []) |
23 | + elif lexicalisation_type == 'adjp': # adjp(case), number, gender, degree, adjectives, atr | |
24 | + adjps = get_adjps(get_case(lexicalisation_parameters[0], subj), get_number(attributes[1], subj), get_gender(attributes[2]), get_degree(attributes[3]), get_words(attributes[4]), attributes[5]) | |
25 | + return (adjps, get_verb(base, get_number(attributes[1], subj), subj)) | |
26 | + elif lexicalisation_type == 'prepadjp': #prepadjp(prep, case), number, gender, degree, adjectives, atr | |
27 | + prepadjps = get_prepadjps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj), get_number(attributes[1], subj), get_gender(attributes[2]), get_degree(attributes[3]), get_words(attributes[4]), attributes[5]) | |
28 | + return (prepadjps, []) | |
23 | 29 | else: |
24 | 30 | return ([], []) |
25 | 31 | return ([], []) |
... | ... | @@ -33,6 +39,9 @@ def is_subj(categories): |
33 | 39 | def get_preposition(attribute): |
34 | 40 | return attribute.values.all()[0].parameter.type.name |
35 | 41 | |
42 | +def get_numerals(attribute): | |
43 | + return get_words(attribute) | |
44 | + | |
36 | 45 | def get_words(attribute): |
37 | 46 | words = [word.text[1:-1] for word in attribute.values.all()] |
38 | 47 | return words |
... | ... | @@ -53,6 +62,22 @@ def get_number(attribute, is_subj): |
53 | 62 | number = u'sg' |
54 | 63 | return number |
55 | 64 | |
65 | +def get_gender(attribute): | |
66 | + gender = attribute.values.all()[0].parameter.type.name | |
67 | + if gender == u'_': | |
68 | + gender = u'n' | |
69 | + if gender == u'n': | |
70 | + gender = u'n1' | |
71 | + if gender == u'm': | |
72 | + gender = u'm1' | |
73 | + return gender | |
74 | + | |
75 | +def get_degree(attribute): | |
76 | + degree = attribute.values.all()[0].parameter.type.name | |
77 | + if degree == u'_': | |
78 | + degree = u'pos' | |
79 | + return degree | |
80 | + | |
56 | 81 | def get_nps(case, number, nouns, _atr): |
57 | 82 | result = [] |
58 | 83 | for noun in nouns: |
... | ... | @@ -61,23 +86,69 @@ def get_nps(case, number, nouns, _atr): |
61 | 86 | filtered = [] |
62 | 87 | for option in options: |
63 | 88 | (orth, tag) = option |
64 | - if case in tag: | |
89 | + if u':' + case + u':' in tag: | |
65 | 90 | filtered.append(option) |
66 | 91 | options = filtered |
67 | 92 | if number != u'_': |
68 | 93 | filtered = [] |
69 | 94 | for option in options: |
70 | 95 | (orth, tag) = option |
71 | - if number in tag: | |
96 | + if u':' + number + u':' in tag: | |
72 | 97 | filtered.append(option) |
73 | 98 | options = filtered |
74 | - return [orth for orth, _ in options] | |
99 | + result += options | |
100 | + return [orth for orth, _ in result] | |
75 | 101 | |
76 | 102 | def get_prepnps(prep, case, number, nouns, _atr): |
77 | 103 | # ala["ma"] = kot |
78 | 104 | nps = get_nps(case, number, nouns, _atr) |
79 | 105 | return [prep + ' ' + np for np in nps] |
80 | 106 | |
107 | +def get_adjps(case, number, gender, degree, adjectives, _atr): | |
108 | + result = [] | |
109 | + for adjective in adjectives: | |
110 | + options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(adjective.encode('utf8'))] | |
111 | + filtered = [] | |
112 | + for option in options: | |
113 | + (orth, tag) = option | |
114 | + if u'adj:' in tag: | |
115 | + filtered.append(option) | |
116 | + options = filtered | |
117 | + if case != u'_': | |
118 | + filtered = [] | |
119 | + for option in options: | |
120 | + (orth, tag) = option | |
121 | + if u':' + case + u':' in tag: | |
122 | + filtered.append(option) | |
123 | + options = filtered | |
124 | + if number != u'_': | |
125 | + filtered = [] | |
126 | + for option in options: | |
127 | + (orth, tag) = option | |
128 | + if u':' + number + u':' in tag: | |
129 | + filtered.append(option) | |
130 | + options = filtered | |
131 | + if gender != u'_': | |
132 | + filtered = [] | |
133 | + for option in options: | |
134 | + (orth, tag) = option | |
135 | + if u':' + gender + u':' in tag or u'.' + gender + u':' in tag or u':' + gender + u'.' in tag or u'.' + gender + u'.' in tag: | |
136 | + filtered.append(option) | |
137 | + options = filtered | |
138 | + if degree != u'_': | |
139 | + filtered = [] | |
140 | + for option in options: | |
141 | + (orth, tag) = option | |
142 | + if u':' + degree in tag: | |
143 | + filtered.append(option) | |
144 | + options = filtered | |
145 | + result += options | |
146 | + return [orth for orth, _ in result] | |
147 | + | |
148 | +def get_prepadjps(prep, case, number, gender, degree, adjectives, _atr): | |
149 | + adjps = get_adjps(case, number, gender, degree, adjectives, _atr) | |
150 | + return [prep + ' ' + adjp for adjp in adjps] | |
151 | + | |
81 | 152 | def get_verb(inf, number, is_subj): |
82 | 153 | if not is_subj: |
83 | 154 | return None |
... | ... | @@ -86,7 +157,7 @@ def get_verb(inf, number, is_subj): |
86 | 157 | filtered = [] |
87 | 158 | for option in options: |
88 | 159 | (orth, tag) = option |
89 | - if u'fin' in tag and u'sg' in tag and u'ter' in tag: | |
160 | + if u'fin:' in tag and u':sg:' in tag and u':ter:' in tag: | |
90 | 161 | filtered.append(option) |
91 | 162 | options = filtered |
92 | 163 | return [orth for orth, _ in options] |
... | ... |