#-*- coding:utf-8 -*- import datetime from collections import Counter from dictionary.models import Lemma from django.core.management.base import BaseCommand from django.db.models import Count, Max from dictionary.models import get_ready_statuses STARTDATE = datetime.datetime(2016, 7, 1, 00, 00) class Command(BaseCommand): help = 'Get Walenty statistics.' def handle(self, **options): nouns_stats_dict_all = get_stats('noun') print_stats('noun', nouns_stats_dict_all) verbs_stats_dict_all = get_stats('verb') print_stats('verb', verbs_stats_dict_all) def get_stats(pos): ready_statuses = get_ready_statuses() stats_dict = Counter({u'phrases': 0, u'poss': 0, u'lemmas': 0, u'sem_lemmas': 0, u'schemata': 0, u'frames': 0, u'frames_with_shared': 0, u'coor_schemata': 0, u'lex_schemata': 0, u'coor_lemmas': 0, u'lex_lemmas': 0, u'sem_arguments': 0, u'sem_arguments_with_shared': 0, }) lemmas = Lemma.objects.filter(old=False, entry_obj__pos__tag=pos).filter(status__in=ready_statuses).distinct() for lemma in lemmas.order_by('entry_obj__name').all(): history_from = lemma.status_history.filter(date__gte=STARTDATE) has_phraseology = False has_coordination = False if history_from.filter(status__type__sym_name='ready').exists(): stats_dict[u'lemmas'] += 1 stats_dict[u'schemata'] += lemma.frames.count() for frame in lemma.frames.all(): stats_dict[u'poss'] += frame.positions.count() flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))[ 'num_args__max'] if flat_frames > 1: stats_dict[u'coor_schemata'] += 1 has_coordination = True for pos in frame.positions.all(): stats_dict[u'phrases'] += pos.arguments.count() if frame.phraseologic and lemma.phraseology_ready(): stats_dict[u'lex_schemata'] += 1 has_phraseology = True if has_phraseology and lemma.phraseology_ready(): stats_dict[u'lex_lemmas'] += 1 if has_coordination: stats_dict[u'coor_lemmas'] += 1 if history_from.filter(status__type__sym_name='ready_f').exists() and not history_from.filter(status__type__sym_name='ready').exists() and lemma.phraseology_ready(): for frame in lemma.frames.all(): if not frame.phraseologic: continue stats_dict[u'schemata'] += 1 stats_dict[u'poss'] += frame.positions.count() flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))[ 'num_args__max'] if flat_frames > 1: stats_dict[u'coor_schemata'] += 1 has_coordination = True for pos in frame.positions.all(): stats_dict[u'phrases'] += pos.arguments.count() stats_dict[u'lex_schemata'] += 1 has_phraseology = True if has_phraseology: stats_dict[u'lex_lemmas'] += 1 if has_coordination: stats_dict[u'coor_lemmas'] += 1 if lemma.semantics_ready() and history_from.filter(status__type__sym_name='ready_s').exists(): actual_frames = lemma.entry_obj.actual_frames() for sem_frame in actual_frames: stats_dict[u'sem_arguments'] += sem_frame.complements.count() visible_frames = lemma.entry_obj.visible_frames() for sem_frame in visible_frames: stats_dict[u'sem_arguments_with_shared'] += sem_frame.complements.count() stats_dict[u'frames'] += actual_frames.count() stats_dict[u'frames_with_shared'] += visible_frames.count() stats_dict[u'sem_lemmas'] += 1 return stats_dict def print_stats(pos, stats): print(pos.upper(), 'stats:') print(u'Liczba haseł gotowych składniowo:\t%d' % stats['lemmas']) print(u'Liczba schematów:\t%d' % stats['schemata']) print(u'Liczba schematów z koordynacją:\t%d' % stats['coor_schemata']) print(u'Liczba schematów zleksykalizowanych:\t%d' % stats['lex_schemata']) print(u'Liczba pozycji w schematach:\t%d' % stats['poss']) print(u'Liczba realizacji w schematach:\t%d' % stats['phrases']) print(u'Liczba haseł zawierających pozycje z koordynacją:\t%d' % stats['coor_lemmas']) print(u'Liczba haseł zawierających schematy zleksykalizowane:\t%d' % stats['lex_lemmas']) print(u'Liczba haseł gotowych semantycznie:\t%d' % stats['sem_lemmas']) print(u'Liczba ram semantycznych:\t%d' % stats['frames']) print(u'Liczba ram semantycznych wliczając współdzielone:\t%d' % stats['frames_with_shared']) print(u'Liczba argumentów semantycznych:\t%d' % stats['sem_arguments']) print(u'Liczba argumentów semantycznych wliczając współdzielone ramy:\t%d' % stats['sem_arguments_with_shared'])