Commit 02459548268064a9915211a4a0bf21851abc23d4
1 parent
04becbf6
ratry -- działanie podstawowe
Showing
1 changed file
with
101 additions
and
12 deletions
semantics/phraseology_generator.py
... | ... | @@ -2,8 +2,9 @@ |
2 | 2 | |
3 | 3 | from dictionary.models import sort_arguments, sort_positions, sortatributes |
4 | 4 | from settings import MORFEUSZ2 |
5 | +from copy import deepcopy | |
5 | 6 | |
6 | -def lexicalisation(argument, subj, base, negativity): | |
7 | +def lexicalisation(argument, subj, base, negativity, reference=None): | |
7 | 8 | b = argument.type |
8 | 9 | if b == 'fixed': |
9 | 10 | return (get_words(sortatributes(argument)[-1]), []) |
... | ... | @@ -20,10 +21,10 @@ def lexicalisation(argument, subj, base, negativity): |
20 | 21 | prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, negativity), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3]) |
21 | 22 | return (prepnps, []) |
22 | 23 | elif lexicalisation_type == 'adjp': # adjp(case), number, gender, degree, adjectives, atr |
23 | - adjps = get_adjps(get_case(lexicalisation_parameters[0], subj, negativity), get_number(attributes[1], subj), get_gender(attributes[2]), get_degree(attributes[3]), get_words(attributes[4]), attributes[5]) | |
24 | + adjps = get_adjps(get_case(lexicalisation_parameters[0], subj, negativity, reference), get_number(attributes[1], subj, reference), get_gender(attributes[2], reference), get_degree(attributes[3]), get_words(attributes[4]), attributes[5]) | |
24 | 25 | return (adjps, get_verb(base, get_number(attributes[1], subj), subj)) |
25 | 26 | elif lexicalisation_type == 'prepadjp': #prepadjp(prep, case), number, gender, degree, adjectives, atr |
26 | - prepadjps = get_prepadjps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False), get_number(attributes[1], subj), get_gender(attributes[2]), get_degree(attributes[3]), get_words(attributes[4]), attributes[5]) | |
27 | + prepadjps = get_prepadjps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_number(attributes[1], subj, reference), get_gender(attributes[2], reference), get_degree(attributes[3]), get_words(attributes[4]), attributes[5]) | |
27 | 28 | return (prepadjps, []) |
28 | 29 | else: |
29 | 30 | return ([], []) |
... | ... | @@ -45,7 +46,7 @@ def get_words(attribute): |
45 | 46 | words = [word.text[1:-1] for word in attribute.values.all()] |
46 | 47 | return words |
47 | 48 | |
48 | -def get_case(attribute, is_subj, negativity): | |
49 | +def get_case(attribute, is_subj, negativity, reference=None): | |
49 | 50 | case = attribute.values.all()[0].parameter.type.name |
50 | 51 | if case == u'str': |
51 | 52 | if is_subj: |
... | ... | @@ -54,23 +55,32 @@ def get_case(attribute, is_subj, negativity): |
54 | 55 | case = u'gen' |
55 | 56 | else: |
56 | 57 | case = u'acc' |
58 | + elif case == u'agr' and reference is not None: | |
59 | + _, tag = reference | |
60 | + case = tag.split(':')[2] | |
57 | 61 | return case |
58 | 62 | |
59 | -def get_number(attribute, is_subj): | |
63 | +def get_number(attribute, is_subj, reference=None): | |
60 | 64 | number = attribute.values.all()[0].parameter.type.name |
61 | 65 | if number == u'_': |
62 | 66 | if is_subj: |
63 | 67 | number = u'sg' |
68 | + elif number == u'agr' and reference is not None: | |
69 | + _, tag = reference | |
70 | + number = tag.split(':')[1] | |
64 | 71 | return number |
65 | 72 | |
66 | -def get_gender(attribute): | |
73 | +def get_gender(attribute, reference=None): | |
67 | 74 | gender = attribute.values.all()[0].parameter.type.name |
68 | 75 | if gender == u'_': |
69 | 76 | gender = u'n' |
70 | - if gender == u'n': | |
77 | + elif gender == u'n': | |
71 | 78 | gender = u'n1' |
72 | - if gender == u'm': | |
79 | + elif gender == u'm': | |
73 | 80 | gender = u'm1' |
81 | + elif gender == u'agr' and reference is not None: | |
82 | + _, tag = reference | |
83 | + gender = tag.split(':')[3] | |
74 | 84 | return gender |
75 | 85 | |
76 | 86 | def get_degree(attribute): |
... | ... | @@ -79,7 +89,7 @@ def get_degree(attribute): |
79 | 89 | degree = u'pos' |
80 | 90 | return degree |
81 | 91 | |
82 | -def get_nps(case, number, nouns, _atr): | |
92 | +def get_nps(case, number, nouns, atr): | |
83 | 93 | result = [] |
84 | 94 | for noun in nouns: |
85 | 95 | options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(noun.encode('utf8'))] |
... | ... | @@ -98,14 +108,14 @@ def get_nps(case, number, nouns, _atr): |
98 | 108 | filtered.append(option) |
99 | 109 | options = filtered |
100 | 110 | result += options |
101 | - return [orth for orth, _ in result] | |
111 | + return dependents(atr, result) | |
102 | 112 | |
103 | 113 | def get_prepnps(prep, case, number, nouns, _atr): |
104 | 114 | # ala["ma"] = kot |
105 | 115 | nps = get_nps(case, number, nouns, _atr) |
106 | 116 | return [prep + ' ' + np for np in nps] |
107 | 117 | |
108 | -def get_adjps(case, number, gender, degree, adjectives, _atr): | |
118 | +def get_adjps(case, number, gender, degree, adjectives, atr): | |
109 | 119 | result = [] |
110 | 120 | for adjective in adjectives: |
111 | 121 | options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(adjective.encode('utf8'))] |
... | ... | @@ -144,7 +154,7 @@ def get_adjps(case, number, gender, degree, adjectives, _atr): |
144 | 154 | filtered.append(option) |
145 | 155 | options = filtered |
146 | 156 | result += options |
147 | - return [orth for orth, _ in result] | |
157 | + return dependents(atr, result) | |
148 | 158 | |
149 | 159 | def get_prepadjps(prep, case, number, gender, degree, adjectives, _atr): |
150 | 160 | adjps = get_adjps(case, number, gender, degree, adjectives, _atr) |
... | ... | @@ -162,3 +172,82 @@ def get_verb(inf, number, is_subj): |
162 | 172 | filtered.append(option) |
163 | 173 | options = filtered |
164 | 174 | return [orth for orth, _ in options] |
175 | + | |
176 | +def dependents(atr, options): | |
177 | + if atr.selection_mode.name == u'ratr' or atr.selection_mode.name == u'ratr1': | |
178 | + result = [] | |
179 | + for option in options: | |
180 | + result += phrase(option, atr.values.all()) | |
181 | + return result | |
182 | + else: | |
183 | + return [orth for orth, _ in options] | |
184 | + | |
185 | +def phrase(head, dependents): | |
186 | + modifiers = {'pre': [], 'post': []} | |
187 | + for dependent in dependents: | |
188 | + values = [] | |
189 | + type = None | |
190 | + for argument in dependent.position.arguments.all(): | |
191 | + if argument.type == u'fixed': | |
192 | + type = argument.type | |
193 | + elif argument.type == u'lex': | |
194 | + type = sortatributes(argument)[0].values.all()[0].argument.type | |
195 | + value, _ = lexicalisation(argument, False, '', False, head) | |
196 | + values += value | |
197 | + if type == 'adjp': | |
198 | + modifiers['pre'].append(values) | |
199 | + else: | |
200 | + modifiers['post'].append(values) | |
201 | + pre = [] | |
202 | + for permutation in permutations(modifiers['pre']): | |
203 | + pre += cartesian(permutation) | |
204 | + pre = [' '.join(words) for words in pre] | |
205 | + pre = list(set(pre)) | |
206 | + post = [] | |
207 | + for permutation in permutations(modifiers['post']): | |
208 | + post += cartesian(permutation) | |
209 | + post = [' '.join(words) for words in post] | |
210 | + post = list(set(post)) | |
211 | + orth, _ = head | |
212 | + result = [] | |
213 | + for prefix in pre: | |
214 | + for suffix in post: | |
215 | + if prefix == '' and suffix == '': | |
216 | + pass | |
217 | + elif prefix == '': | |
218 | + result.append(orth + ' ' + suffix) | |
219 | + elif suffix == '': | |
220 | + result.append(prefix + ' ' + orth) | |
221 | + else: | |
222 | + result.append(prefix + ' ' + orth + ' ' + suffix) | |
223 | + return result | |
224 | + | |
225 | + | |
226 | +def cartesian(llist): | |
227 | + if len(llist) == 0: | |
228 | + result = [[]] | |
229 | + else: | |
230 | + result = [] | |
231 | + tail = cartesian(llist[1:]) | |
232 | + for element in llist[0]: | |
233 | + tailcopy = deepcopy(tail) | |
234 | + for cart in tailcopy: | |
235 | + cart.insert(0, element) | |
236 | + result.append(cart) | |
237 | + result += tail | |
238 | + return result | |
239 | + | |
240 | +def permutations(llist): | |
241 | + if len(llist) == 0: | |
242 | + result = [[]] | |
243 | + else: | |
244 | + result = [] | |
245 | + perms = permutations(llist[1:]) | |
246 | + for perm in perms: | |
247 | + for i in range(0, len(perm) + 1): | |
248 | + permcopy = deepcopy(perm) | |
249 | + permcopy.insert(i, llist[0]) | |
250 | + result.append(permcopy) | |
251 | + return result | |
252 | + | |
253 | + | |
... | ... |