get_stats_from.py 5.39 KB
#-*- coding:utf-8 -*-

import datetime
from collections import Counter

from dictionary.models import Lemma

from django.core.management.base import BaseCommand
from django.db.models import Count, Max

from dictionary.models import get_ready_statuses


STARTDATE = datetime.datetime(2016, 7, 1, 00, 00)


class Command(BaseCommand):
    help = 'Get Walenty statistics.'

    def handle(self, **options):
        nouns_stats_dict_all = get_stats('noun')
        print_stats('noun', nouns_stats_dict_all)

        verbs_stats_dict_all = get_stats('verb')
        print_stats('verb', verbs_stats_dict_all)


def get_stats(pos):
    ready_statuses = get_ready_statuses()

    stats_dict = Counter({u'phrases': 0,
                          u'poss': 0,
                          u'lemmas': 0,
                          u'sem_lemmas': 0,
                          u'schemata': 0,
                          u'frames': 0,
                          u'frames_with_shared': 0,
                          u'coor_schemata': 0,
                          u'lex_schemata': 0,
                          u'coor_lemmas': 0,
                          u'lex_lemmas': 0,
                          u'sem_arguments': 0,
                          u'sem_arguments_with_shared': 0,
                          })

    lemmas = Lemma.objects.filter(old=False,
                                  entry_obj__pos__tag=pos).filter(status__in=ready_statuses).distinct()

    for lemma in lemmas.order_by('entry_obj__name').all():
        history_from = lemma.status_history.filter(date__gte=STARTDATE)

        has_phraseology = False
        has_coordination = False

        if history_from.filter(status__type__sym_name='ready').exists():

            stats_dict[u'lemmas'] += 1
            stats_dict[u'schemata'] += lemma.frames.count()

            for frame in lemma.frames.all():
                stats_dict[u'poss'] += frame.positions.count()
                flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))[
                    'num_args__max']
                if flat_frames > 1:
                    stats_dict[u'coor_schemata'] += 1
                    has_coordination = True
                for pos in frame.positions.all():
                    stats_dict[u'phrases'] += pos.arguments.count()
                if frame.phraseologic and lemma.phraseology_ready():
                    stats_dict[u'lex_schemata'] += 1
                    has_phraseology = True

            if has_phraseology and lemma.phraseology_ready():
                stats_dict[u'lex_lemmas'] += 1
            if has_coordination:
                stats_dict[u'coor_lemmas'] += 1

        if history_from.filter(status__type__sym_name='ready_f').exists() and not history_from.filter(status__type__sym_name='ready').exists() and lemma.phraseology_ready():
            for frame in lemma.frames.all():
                if not frame.phraseologic:
                    continue

                stats_dict[u'schemata'] += 1
                stats_dict[u'poss'] += frame.positions.count()
                flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))[
                    'num_args__max']
                if flat_frames > 1:
                    stats_dict[u'coor_schemata'] += 1
                    has_coordination = True
                for pos in frame.positions.all():
                    stats_dict[u'phrases'] += pos.arguments.count()

                stats_dict[u'lex_schemata'] += 1
                has_phraseology = True

            if has_phraseology:
                stats_dict[u'lex_lemmas'] += 1
            if has_coordination:
                stats_dict[u'coor_lemmas'] += 1


        if lemma.semantics_ready() and history_from.filter(status__type__sym_name='ready_s').exists():

            actual_frames = lemma.entry_obj.actual_frames()
            for sem_frame in actual_frames:
                stats_dict[u'sem_arguments'] += sem_frame.complements.count()

            visible_frames = lemma.entry_obj.visible_frames()
            for sem_frame in visible_frames:
                stats_dict[u'sem_arguments_with_shared'] += sem_frame.complements.count()

            stats_dict[u'frames'] += actual_frames.count()
            stats_dict[u'frames_with_shared'] += visible_frames.count()
            stats_dict[u'sem_lemmas'] += 1

    return stats_dict


def print_stats(pos, stats):
    print(pos.upper(), 'stats:')

    print(u'Liczba haseł gotowych składniowo:\t%d' % stats['lemmas'])
    print(u'Liczba schematów:\t%d' % stats['schemata'])
    print(u'Liczba schematów z koordynacją:\t%d' % stats['coor_schemata'])
    print(u'Liczba schematów zleksykalizowanych:\t%d' % stats['lex_schemata'])
    print(u'Liczba pozycji w schematach:\t%d' % stats['poss'])
    print(u'Liczba realizacji w schematach:\t%d' % stats['phrases'])

    print(u'Liczba haseł zawierających pozycje z koordynacją:\t%d' % stats['coor_lemmas'])
    print(u'Liczba haseł zawierających schematy zleksykalizowane:\t%d' % stats['lex_lemmas'])

    print(u'Liczba haseł gotowych semantycznie:\t%d' % stats['sem_lemmas'])
    print(u'Liczba ram semantycznych:\t%d' % stats['frames'])
    print(u'Liczba ram semantycznych wliczając współdzielone:\t%d' % stats['frames_with_shared'])
    print(u'Liczba argumentów semantycznych:\t%d' % stats['sem_arguments'])
    print(u'Liczba argumentów semantycznych wliczając współdzielone ramy:\t%d' % stats['sem_arguments_with_shared'])