Commit 55180dd8e076caa2348b9a2fba1025c880eb663d
1 parent
9df46228
nump - wersja bez atrów
Showing
1 changed file
with
65 additions
and
1 deletions
semantics/phraseology_generator.py
... | ... | @@ -32,6 +32,9 @@ def lexicalisation(argument, subj, base, negativity, reference=None): |
32 | 32 | elif lexicalisation_type == 'advp': #advp(type), degree, adverb, atr |
33 | 33 | advps = get_advps(get_degree(attributes[1]), get_words(attributes[2]), attributes[3]) |
34 | 34 | return (advps, [base]) |
35 | + elif lexicalisation_type == 'nump': # nump(case), num, noun, atr | |
36 | + numps = get_numps(get_case(lexicalisation_parameters[0], subj, negativity, reference), get_words(attributes[1]), get_words(attributes[2]), attributes[3]) | |
37 | + return (numps, get_verb(base, 'pl', subj)) | |
35 | 38 | else: |
36 | 39 | return ([], []) |
37 | 40 | return ([], []) |
... | ... | @@ -89,6 +92,8 @@ def get_number(attribute, is_subj, reference=None): |
89 | 92 | number = u'_' |
90 | 93 | else: |
91 | 94 | number = tag.split(':')[1] |
95 | + elif number == u'agr' and reference is None: | |
96 | + number = u'sg' | |
92 | 97 | return number |
93 | 98 | |
94 | 99 | def get_gender(attribute, reference=None): |
... | ... | @@ -104,6 +109,8 @@ def get_gender(attribute, reference=None): |
104 | 109 | gender = u'_' |
105 | 110 | else: |
106 | 111 | gender = tag.split(':')[3] |
112 | + elif gender == u'agr' and reference is None: | |
113 | + gender = 'm1' | |
107 | 114 | return gender |
108 | 115 | |
109 | 116 | def get_degree(attribute): |
... | ... | @@ -139,7 +146,6 @@ def get_nps(cases, number, nouns, atr): |
139 | 146 | return dependents(atr, result) |
140 | 147 | |
141 | 148 | def get_prepnps(prep, cases, number, nouns, _atr): |
142 | - # ala["ma"] = kot | |
143 | 149 | nps = get_nps(cases, number, nouns, _atr) |
144 | 150 | return [prep + ' ' + np for np in nps] |
145 | 151 | |
... | ... | @@ -232,6 +238,64 @@ def get_advps(degree, adverbs, atr): |
232 | 238 | result += options |
233 | 239 | return dependents(atr, result) |
234 | 240 | |
241 | +def get_numps(cases, numerals, nouns, atr): | |
242 | + results = [] | |
243 | + nums = [] | |
244 | + for numeral in numerals: | |
245 | + options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(numeral.encode('utf8'))] | |
246 | + filtered = [] | |
247 | + for option in options: | |
248 | + (orth, tag) = option | |
249 | + if u'num:' in tag: | |
250 | + filtered.append(option) | |
251 | + options = filtered | |
252 | + options_temp = [] | |
253 | + for case in cases: | |
254 | + if case != u'_': | |
255 | + filtered = [] | |
256 | + for option in options: | |
257 | + (orth, tag) = option | |
258 | + if u':' + case + u':' in tag or ':' + case + '.' in tag or '.' + case + '.' in tag: | |
259 | + filtered.append(option) | |
260 | + options_temp += filtered | |
261 | + else: | |
262 | + options_temp += options | |
263 | + nums = options_temp | |
264 | + if len(nums) == 0: | |
265 | + return [] | |
266 | + for (num_orth, num_tag) in nums: | |
267 | + rec = num_tag.split(':')[4] | |
268 | + for noun in nouns: | |
269 | + options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(noun.encode('utf8')) if 'subst:' in interp.getTag(MORFEUSZ2)] | |
270 | + filtered = [] | |
271 | + for option in options: | |
272 | + (orth, tag) = option | |
273 | + if u':pl:' in tag: | |
274 | + filtered.append(option) | |
275 | + options = filtered | |
276 | + if rec == 'rec': | |
277 | + c = ['gen'] | |
278 | + else: | |
279 | + c = cases | |
280 | + options_temp = [] | |
281 | + for case in c: | |
282 | + if case != u'_': | |
283 | + filtered = [] | |
284 | + for option in options: | |
285 | + (orth, tag) = option | |
286 | + if u':' + case + u':' in tag or ':' + case + '.' in tag or '.' + case + '.' in tag: | |
287 | + filtered.append(option) | |
288 | + options_temp += filtered | |
289 | + else: | |
290 | + options_temp += options | |
291 | + options = options_temp | |
292 | + for (orth, tag) in options: | |
293 | + gender = tag.split(':')[3] | |
294 | + if u':' + gender + u':' in num_tag or ':' + gender + '.' in num_tag or '.' + gender + '.' in num_tag: | |
295 | + results.append(num_orth + ' ' + orth) | |
296 | + | |
297 | + return results #ignoring ambiguos atr for numps | |
298 | + | |
235 | 299 | |
236 | 300 | def get_verb(inf, number, is_subj): |
237 | 301 | if not is_subj: |
... | ... |