Commit 02459548268064a9915211a4a0bf21851abc23d4

Authored by Tomasz Bartosiak
1 parent 04becbf6

ratry -- działanie podstawowe

Showing 1 changed file with 101 additions and 12 deletions
semantics/phraseology_generator.py
... ... @@ -2,8 +2,9 @@
2 2  
3 3 from dictionary.models import sort_arguments, sort_positions, sortatributes
4 4 from settings import MORFEUSZ2
  5 +from copy import deepcopy
5 6  
6   -def lexicalisation(argument, subj, base, negativity):
  7 +def lexicalisation(argument, subj, base, negativity, reference=None):
7 8 b = argument.type
8 9 if b == 'fixed':
9 10 return (get_words(sortatributes(argument)[-1]), [])
... ... @@ -20,10 +21,10 @@ def lexicalisation(argument, subj, base, negativity):
20 21 prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, negativity), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
21 22 return (prepnps, [])
22 23 elif lexicalisation_type == 'adjp': # adjp(case), number, gender, degree, adjectives, atr
23   - adjps = get_adjps(get_case(lexicalisation_parameters[0], subj, negativity), get_number(attributes[1], subj), get_gender(attributes[2]), get_degree(attributes[3]), get_words(attributes[4]), attributes[5])
  24 + adjps = get_adjps(get_case(lexicalisation_parameters[0], subj, negativity, reference), get_number(attributes[1], subj, reference), get_gender(attributes[2], reference), get_degree(attributes[3]), get_words(attributes[4]), attributes[5])
24 25 return (adjps, get_verb(base, get_number(attributes[1], subj), subj))
25 26 elif lexicalisation_type == 'prepadjp': #prepadjp(prep, case), number, gender, degree, adjectives, atr
26   - prepadjps = get_prepadjps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False), get_number(attributes[1], subj), get_gender(attributes[2]), get_degree(attributes[3]), get_words(attributes[4]), attributes[5])
  27 + prepadjps = get_prepadjps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj, False, reference), get_number(attributes[1], subj, reference), get_gender(attributes[2], reference), get_degree(attributes[3]), get_words(attributes[4]), attributes[5])
27 28 return (prepadjps, [])
28 29 else:
29 30 return ([], [])
... ... @@ -45,7 +46,7 @@ def get_words(attribute):
45 46 words = [word.text[1:-1] for word in attribute.values.all()]
46 47 return words
47 48  
48   -def get_case(attribute, is_subj, negativity):
  49 +def get_case(attribute, is_subj, negativity, reference=None):
49 50 case = attribute.values.all()[0].parameter.type.name
50 51 if case == u'str':
51 52 if is_subj:
... ... @@ -54,23 +55,32 @@ def get_case(attribute, is_subj, negativity):
54 55 case = u'gen'
55 56 else:
56 57 case = u'acc'
  58 + elif case == u'agr' and reference is not None:
  59 + _, tag = reference
  60 + case = tag.split(':')[2]
57 61 return case
58 62  
59   -def get_number(attribute, is_subj):
  63 +def get_number(attribute, is_subj, reference=None):
60 64 number = attribute.values.all()[0].parameter.type.name
61 65 if number == u'_':
62 66 if is_subj:
63 67 number = u'sg'
  68 + elif number == u'agr' and reference is not None:
  69 + _, tag = reference
  70 + number = tag.split(':')[1]
64 71 return number
65 72  
66   -def get_gender(attribute):
  73 +def get_gender(attribute, reference=None):
67 74 gender = attribute.values.all()[0].parameter.type.name
68 75 if gender == u'_':
69 76 gender = u'n'
70   - if gender == u'n':
  77 + elif gender == u'n':
71 78 gender = u'n1'
72   - if gender == u'm':
  79 + elif gender == u'm':
73 80 gender = u'm1'
  81 + elif gender == u'agr' and reference is not None:
  82 + _, tag = reference
  83 + gender = tag.split(':')[3]
74 84 return gender
75 85  
76 86 def get_degree(attribute):
... ... @@ -79,7 +89,7 @@ def get_degree(attribute):
79 89 degree = u'pos'
80 90 return degree
81 91  
82   -def get_nps(case, number, nouns, _atr):
  92 +def get_nps(case, number, nouns, atr):
83 93 result = []
84 94 for noun in nouns:
85 95 options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(noun.encode('utf8'))]
... ... @@ -98,14 +108,14 @@ def get_nps(case, number, nouns, _atr):
98 108 filtered.append(option)
99 109 options = filtered
100 110 result += options
101   - return [orth for orth, _ in result]
  111 + return dependents(atr, result)
102 112  
103 113 def get_prepnps(prep, case, number, nouns, _atr):
104 114 # ala["ma"] = kot
105 115 nps = get_nps(case, number, nouns, _atr)
106 116 return [prep + ' ' + np for np in nps]
107 117  
108   -def get_adjps(case, number, gender, degree, adjectives, _atr):
  118 +def get_adjps(case, number, gender, degree, adjectives, atr):
109 119 result = []
110 120 for adjective in adjectives:
111 121 options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(adjective.encode('utf8'))]
... ... @@ -144,7 +154,7 @@ def get_adjps(case, number, gender, degree, adjectives, _atr):
144 154 filtered.append(option)
145 155 options = filtered
146 156 result += options
147   - return [orth for orth, _ in result]
  157 + return dependents(atr, result)
148 158  
149 159 def get_prepadjps(prep, case, number, gender, degree, adjectives, _atr):
150 160 adjps = get_adjps(case, number, gender, degree, adjectives, _atr)
... ... @@ -162,3 +172,82 @@ def get_verb(inf, number, is_subj):
162 172 filtered.append(option)
163 173 options = filtered
164 174 return [orth for orth, _ in options]
  175 +
  176 +def dependents(atr, options):
  177 + if atr.selection_mode.name == u'ratr' or atr.selection_mode.name == u'ratr1':
  178 + result = []
  179 + for option in options:
  180 + result += phrase(option, atr.values.all())
  181 + return result
  182 + else:
  183 + return [orth for orth, _ in options]
  184 +
  185 +def phrase(head, dependents):
  186 + modifiers = {'pre': [], 'post': []}
  187 + for dependent in dependents:
  188 + values = []
  189 + type = None
  190 + for argument in dependent.position.arguments.all():
  191 + if argument.type == u'fixed':
  192 + type = argument.type
  193 + elif argument.type == u'lex':
  194 + type = sortatributes(argument)[0].values.all()[0].argument.type
  195 + value, _ = lexicalisation(argument, False, '', False, head)
  196 + values += value
  197 + if type == 'adjp':
  198 + modifiers['pre'].append(values)
  199 + else:
  200 + modifiers['post'].append(values)
  201 + pre = []
  202 + for permutation in permutations(modifiers['pre']):
  203 + pre += cartesian(permutation)
  204 + pre = [' '.join(words) for words in pre]
  205 + pre = list(set(pre))
  206 + post = []
  207 + for permutation in permutations(modifiers['post']):
  208 + post += cartesian(permutation)
  209 + post = [' '.join(words) for words in post]
  210 + post = list(set(post))
  211 + orth, _ = head
  212 + result = []
  213 + for prefix in pre:
  214 + for suffix in post:
  215 + if prefix == '' and suffix == '':
  216 + pass
  217 + elif prefix == '':
  218 + result.append(orth + ' ' + suffix)
  219 + elif suffix == '':
  220 + result.append(prefix + ' ' + orth)
  221 + else:
  222 + result.append(prefix + ' ' + orth + ' ' + suffix)
  223 + return result
  224 +
  225 +
  226 +def cartesian(llist):
  227 + if len(llist) == 0:
  228 + result = [[]]
  229 + else:
  230 + result = []
  231 + tail = cartesian(llist[1:])
  232 + for element in llist[0]:
  233 + tailcopy = deepcopy(tail)
  234 + for cart in tailcopy:
  235 + cart.insert(0, element)
  236 + result.append(cart)
  237 + result += tail
  238 + return result
  239 +
  240 +def permutations(llist):
  241 + if len(llist) == 0:
  242 + result = [[]]
  243 + else:
  244 + result = []
  245 + perms = permutations(llist[1:])
  246 + for perm in perms:
  247 + for i in range(0, len(perm) + 1):
  248 + permcopy = deepcopy(perm)
  249 + permcopy.insert(i, llist[0])
  250 + result.append(permcopy)
  251 + return result
  252 +
  253 +
... ...