phraseology_generator.py
3.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# -*- coding: utf-8 -*-
from dictionary.models import sort_arguments, sort_positions, sortatributes
from settings import MORFEUSZ2
def lexicalisation(argument, categories, base):
subj = is_subj(categories)
b = argument.type
if b == 'fixed':
return (get_words(sortatributes(argument)[-1]), [])
attributes = sortatributes(argument)
lexicalisation_type = attributes[0].values.all()[0].argument.type
lexicalisation_parameters = sortatributes(attributes[0].values.all()[0].argument)
if lexicalisation_type == 'xp': # xp(...)[np/prepnp], ...
lexicalisation_type = lexicalisation_parameters[0].values.all()[0].argument.type
lexicalisation_parameters = sortatributes(lexicalisation_parameters[0].values.all()[0].argument)
if lexicalisation_type == 'np': # np(case), number, nouns, atr
nps = get_nps(get_case(lexicalisation_parameters[0], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
return (nps, get_verb(base, get_number(attributes[1], subj), subj))
elif lexicalisation_type == 'prepnp': #prepnp(prep, case), number, nouns, atr
prepnps = get_prepnps(get_preposition(lexicalisation_parameters[0]), get_case(lexicalisation_parameters[1], subj), get_number(attributes[1], subj), get_words(attributes[2]), attributes[3])
return (prepnps, [])
else:
return ([], [])
return ([], [])
def is_subj(categories):
for cat in categories:
if cat.category == u'subj':
return True
return False
def get_preposition(attribute):
return attribute.values.all()[0].parameter.type.name
def get_words(attribute):
words = [word.text[1:-1] for word in attribute.values.all()]
return words
def get_case(attribute, is_subj):
case = attribute.values.all()[0].parameter.type.name
if case == u'str':
if is_subj:
case = u'nom'
else:
case = u'acc'
return case
def get_number(attribute, is_subj):
number = attribute.values.all()[0].parameter.type.name
if number == u'_':
if is_subj:
number = u'sg'
return number
def get_nps(case, number, nouns, _atr):
result = []
for noun in nouns:
options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(noun.encode('utf8'))]
if case != u'_':
filtered = []
for option in options:
(orth, tag) = option
if case in tag:
filtered.append(option)
options = filtered
if number != u'_':
filtered = []
for option in options:
(orth, tag) = option
if number in tag:
filtered.append(option)
options = filtered
return [orth for orth, _ in options]
def get_prepnps(prep, case, number, nouns, _atr):
# ala["ma"] = kot
nps = get_nps(case, number, nouns, _atr)
return [prep + ' ' + np for np in nps]
def get_verb(inf, number, is_subj):
if not is_subj:
return None
else:
options = [(interp.orth, interp.getTag(MORFEUSZ2)) for interp in MORFEUSZ2.generate(inf.encode('utf8'))]
filtered = []
for option in options:
(orth, tag) = option
if u'fin' in tag and u'sg' in tag and u'ter' in tag:
filtered.append(option)
options = filtered
return [orth for orth, _ in options]