get_stats_from.py
5.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#-*- coding:utf-8 -*-
import datetime
from collections import Counter
from dictionary.models import Lemma
from django.core.management.base import BaseCommand
from django.db.models import Count, Max
from dictionary.models import get_ready_statuses
STARTDATE = datetime.datetime(2017, 1, 1, 00, 00)
class Command(BaseCommand):
help = 'Get Walenty statistics.'
def handle(self, **options):
nouns_stats_dict_all = get_stats('noun')
print_stats('noun', nouns_stats_dict_all)
verbs_stats_dict_all = get_stats('verb')
print_stats('verb', verbs_stats_dict_all)
def get_stats(pos):
ready_statuses = get_ready_statuses()
stats_dict = Counter({u'phrases': 0,
u'poss': 0,
u'lemmas': 0,
u'sem_lemmas': 0,
u'schemata': 0,
u'frames': 0,
u'frames_with_shared': 0,
u'coor_schemata': 0,
u'lex_schemata': 0,
u'coor_lemmas': 0,
u'lex_lemmas': 0,
u'sem_arguments': 0,
u'sem_arguments_with_shared': 0,
})
lemmas = Lemma.objects.filter(old=False,
entry_obj__pos__tag=pos).filter(status__in=ready_statuses).distinct()
for lemma in lemmas.order_by('entry_obj__name').all():
history_from = lemma.status_history.filter(date__gte=STARTDATE)
has_phraseology = False
has_coordination = False
if history_from.filter(status__type__sym_name='ready').exists():
stats_dict[u'lemmas'] += 1
stats_dict[u'schemata'] += lemma.frames.count()
for frame in lemma.frames.all():
stats_dict[u'poss'] += frame.positions.count()
flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))[
'num_args__max']
if flat_frames > 1:
stats_dict[u'coor_schemata'] += 1
has_coordination = True
for pos in frame.positions.all():
stats_dict[u'phrases'] += pos.arguments.count()
if frame.phraseologic and lemma.phraseology_ready():
stats_dict[u'lex_schemata'] += 1
has_phraseology = True
if has_phraseology and lemma.phraseology_ready():
stats_dict[u'lex_lemmas'] += 1
if has_coordination:
stats_dict[u'coor_lemmas'] += 1
if history_from.filter(status__type__sym_name='ready_f').exists() and not history_from.filter(status__type__sym_name='ready').exists() and lemma.phraseology_ready():
for frame in lemma.frames.all():
if not frame.phraseologic:
continue
stats_dict[u'schemata'] += 1
stats_dict[u'poss'] += frame.positions.count()
flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))[
'num_args__max']
if flat_frames > 1:
stats_dict[u'coor_schemata'] += 1
has_coordination = True
for pos in frame.positions.all():
stats_dict[u'phrases'] += pos.arguments.count()
stats_dict[u'lex_schemata'] += 1
has_phraseology = True
if has_phraseology:
stats_dict[u'lex_lemmas'] += 1
if has_coordination:
stats_dict[u'coor_lemmas'] += 1
if lemma.semantics_ready() and history_from.filter(status__type__sym_name='ready_s').exists():
actual_frames = lemma.entry_obj.actual_frames()
for sem_frame in actual_frames:
stats_dict[u'sem_arguments'] += sem_frame.complements.count()
visible_frames = lemma.entry_obj.visible_frames()
for sem_frame in visible_frames:
stats_dict[u'sem_arguments_with_shared'] += sem_frame.complements.count()
stats_dict[u'frames'] += actual_frames.count()
stats_dict[u'frames_with_shared'] += visible_frames.count()
stats_dict[u'sem_lemmas'] += 1
return stats_dict
def print_stats(pos, stats):
print(pos.upper(), 'stats:')
print(u'Liczba haseł gotowych składniowo:\t%d' % stats['lemmas'])
print(u'Liczba schematów:\t%d' % stats['schemata'])
print(u'Liczba schematów z koordynacją:\t%d' % stats['coor_schemata'])
print(u'Liczba schematów zleksykalizowanych:\t%d' % stats['lex_schemata'])
print(u'Liczba pozycji w schematach:\t%d' % stats['poss'])
print(u'Liczba realizacji w schematach:\t%d' % stats['phrases'])
print(u'Liczba haseł zawierających pozycje z koordynacją:\t%d' % stats['coor_lemmas'])
print(u'Liczba haseł zawierających schematy zleksykalizowane:\t%d' % stats['lex_lemmas'])
print(u'Liczba haseł gotowych semantycznie:\t%d' % stats['sem_lemmas'])
print(u'Liczba ram semantycznych:\t%d' % stats['frames'])
print(u'Liczba ram semantycznych wliczając współdzielone:\t%d' % stats['frames_with_shared'])
print(u'Liczba argumentów semantycznych:\t%d' % stats['sem_arguments'])
print(u'Liczba argumentów semantycznych wliczając współdzielone ramy:\t%d' % stats['sem_arguments_with_shared'])