check_text_reps.py
4.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# -*- coding:utf-8 -*-
import codecs
import os
from django.core.management.base import BaseCommand
from django.db.models import Count
from dictionary.common_func import frame_data_to_text_rep, \
position_data_to_text_rep
from dictionary.models import Argument, Argument_Model, Frame, \
Position, sortatributes, AttributeParameter
from settings import PROJECT_PATH
WRONG_PARAMETERS_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_parameters_po_reperacji_20180801.txt')
WRONG_ARGUMENTS_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_arguments_po_reperacji_20180801.txt')
WRONG_POSITIONS_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_positions_po_reperacji_20180801.txt')
WRONG_FRAMES_PATH = os.path.join(PROJECT_PATH, 'data', 'wrong', 'wrong_frames_po_reperacji_20180801.txt')
class Command(BaseCommand):
args = 'none'
help = 'Looking for wrong text_reps.'
def handle(self, **options):
check_attr_parameters()
check_arguments_text_reps()
check_positions_text_reps()
check_frames_text_reps()
def check_attr_parameters():
print 'Checking parameters.'
# try:
wrong_parameters_file = codecs.open(WRONG_PARAMETERS_PATH, 'wt', 'utf-8')
for attr_param in AttributeParameter.objects.all():
print attr_param
possible_param_objs = AttributeParameter.objects.annotate(subparams_count=Count('subparameters')).filter(
subparams_count=attr_param.subparameters.count())
for subparam in attr_param.subparameters.all():
possible_param_objs = possible_param_objs.filter(subparameters=subparam)
# possible_param_objs = possible_param_objs.distinct()
possible_param_objs = possible_param_objs.filter(type=attr_param.type)
if possible_param_objs.count() > 1:
wrong_parameters_file.write(u'%s' % unicode(attr_param))
# finally:
wrong_parameters_file.close()
def check_arguments_text_reps():
print 'Checking arguments.'
# try:
wrong_arguments_file = codecs.open(WRONG_ARGUMENTS_PATH, 'wt', 'utf-8')
for argument in Argument.objects.all():
print argument
proper_text_rep = arg_data_to_text_rep(argument)
if proper_text_rep != argument.text_rep:
wrong_arguments_file.write(u'%s --> proper: %s\n' % (argument.text_rep, proper_text_rep))
# finally:
wrong_arguments_file.close()
def check_positions_text_reps():
print 'Checking positions.'
# try:
wrong_positions_file = codecs.open(WRONG_POSITIONS_PATH, 'wt', 'utf-8')
for position in Position.objects.all():
print position
proper_text_rep = position_data_to_text_rep(position.categories, position.arguments)
if proper_text_rep != position.text_rep:
wrong_positions_file.write(u'%s --> proper: %s\n' % (position.text_rep, proper_text_rep))
# finally:
wrong_positions_file.close()
def check_frames_text_reps():
print 'Checking frames.'
# try:
wrong_frames_file = codecs.open(WRONG_FRAMES_PATH, 'wt', 'utf-8')
for frame in Frame.objects.all():
print frame
proper_text_rep = frame_data_to_text_rep(frame.characteristics, frame.positions)
if proper_text_rep != frame.text_rep:
wrong_frames_file.write(u'%s --> proper: %s\n' % (frame.text_rep, proper_text_rep))
for lemma in frame.lemmas.all():
if lemma.old:
wrong_frames_file.write(u'\t\told: %s\n' % (lemma.entry))
else:
wrong_frames_file.write('u\t\tnew: %s\n' % (lemma.entry))
for opinion in frame.opinions.all():
wrong_frames_file.write(u'\t\topinion: %s\n' % (opinion.frame.text_rep))
for example in frame.nkjp_examples.all():
wrong_frames_file.write(u'\t\texample: %s\n' % (example.sentence))
# finally:
wrong_frames_file.close()
def arg_data_to_text_rep(argument):
sorted_attributes = sortatributes(argument)
arg_model = Argument_Model.objects.get(arg_model_name=argument.type)
# sorted_attributes = sort_arg_attributes(arg_model, attributes)
attributes_text_reps = [unicode(attr) for attr in sorted_attributes]
if len(sorted_attributes) == 0:
arg_text_rep = argument.type
elif arg_model.hide_type:
arg_text_rep = u'%s' % (','.join(attributes_text_reps))
else:
arg_text_rep = u'%s(%s)' % (argument.type, ','.join(attributes_text_reps))
return arg_text_rep