Commit 92426c81077a7359886e57f1b870ae605a6d7f48

Authored by Bartłomiej Nitoń
1 parent 3ad52b13

Added statistics to auto generated package with Walenty in text format. Added ad…

…ditional statistics table to vocabulary management interface.
dictionary/ajax_vocabulary_management.py
... ... @@ -24,10 +24,11 @@ import codecs
24 24 import datetime
25 25 import HTMLParser
26 26 import os
  27 +from collections import Counter
27 28 from tempfile import mkdtemp, mkstemp
28 29  
29 30 from django.contrib.auth.models import User
30   -from django.db.models import Count, Sum, Q
  31 +from django.db.models import Count, Max, Sum, Q
31 32 from django.http import HttpResponse
32 33 from django.template.loader import render_to_string
33 34 from django.utils.encoding import smart_str
... ... @@ -35,10 +36,12 @@ from django.utils.encoding import smart_str
35 36 from common.decorators import ajax, AjaxError, render
36 37 from dictionary.forms import ManageVocabPermForm
37 38 from dictionary.models import Frame_Opinion_Value, Lemma, Lemma_Status, \
38   - POS, Vocabulary, VocabularyFormat, \
  39 + POS, Vocabulary, VocabularyFormat, WalentyStat,\
39 40 sorted_frame_char_values_dict
40 41 from dictionary.teixml import createteixml
41 42  
  43 +LEX_TYPES = ['lex', 'fixed', 'comprepnp']
  44 +
42 45 TEXT_VOCABULARY_CLAUSE = u"""
43 46 % The Polish Valence Dictionary (Walenty)
44 47 % <date>
... ... @@ -319,4 +322,126 @@ def count_schemas(lemmas):
319 322 if not schemas_count:
320 323 schemas_count = 0
321 324 return schemas_count
  325 +
  326 +@render('other_stats.html')
  327 +@ajax(method='get', encode_result=False)
  328 +def get_other_stats(request):
  329 + return WalentyStat.objects.order_by('label')
  330 +
  331 +def get_stats(statuses, pos):
  332 + stats_dict = Counter({u'phrases': 0,
  333 + u'poss': 0,
  334 + u'lemmas': 0,
  335 + u'sub_lemmas': 0,
  336 + u'schemata': 0,
  337 + u'cer_schemata': 0,
  338 + u'uncer_schemata': 0,
  339 + u'bad_schemata': 0,
  340 + u'arch_schemata': 0,
  341 + u'col_schemata': 0,
  342 + u'vul_schemata': 0,
  343 + u'coor_schemata': 0,
  344 + u'lex_schemata': 0,
  345 + u'coor_lemmas': 0,
  346 + u'lex_lemmas': 0})
  347 +
  348 + lemmas = Lemma.objects.filter(old=False,
  349 + entry_obj__pos__tag=pos).filter(status__in=statuses).distinct()
  350 + stats_dict[u'lemmas'] = lemmas.count()
  351 + for lemma in lemmas.order_by('entry').all():
  352 + print lemma
  353 + stats_dict[u'cer_schemata'] += lemma.frame_opinions.filter(value__value=u'pewny').count()
  354 + stats_dict[u'uncer_schemata'] += lemma.frame_opinions.filter(value__value=u'wątpliwy').count()
  355 + stats_dict[u'bad_schemata'] += lemma.frame_opinions.filter(value__value=u'zły').count()
  356 + stats_dict[u'arch_schemata'] += lemma.frame_opinions.filter(value__value=u'archaiczny').count()
  357 + stats_dict[u'col_schemata'] += lemma.frame_opinions.filter(value__value=u'potoczny').count()
  358 + stats_dict[u'vul_schemata'] += lemma.frame_opinions.filter(value__value=u'wulgarny').count()
  359 + stats_dict[u'schemata'] += lemma.frames.count()
  360 +
  361 + stats_dict = stats_dict + Counter(get_sub_entries_dict(lemma))
  362 +
  363 + has_phraseology = False
  364 + has_coordination = False
  365 + for frame in lemma.frames.all():
  366 + stats_dict[u'poss'] += frame.positions.count()
  367 + flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))['num_args__max']
  368 + if flat_frames > 1:
  369 + stats_dict[u'coor_schemata'] += 1
  370 + has_coordination = True
  371 + for pos in frame.positions.all():
  372 + stats_dict[u'phrases'] += pos.arguments.count()
  373 + if frame.positions.filter(arguments__type__in=LEX_TYPES).exists():
  374 + stats_dict[u'lex_schemata'] += 1
  375 + has_phraseology = True
  376 +
  377 + if has_phraseology:
  378 + stats_dict[u'lex_lemmas'] += 1
  379 + if has_coordination:
  380 + stats_dict[u'coor_lemmas'] += 1
  381 +
  382 + return stats_dict
  383 +
  384 +def get_sub_entries_dict(lemma):
  385 + sub_entries_dict = {'sub_lemmas': 0}
  386 + frame_chars_dict = sorted_frame_char_values_dict()
  387 + for reflex in frame_chars_dict['sorted_reflex_vals']:
  388 + for neg in frame_chars_dict['sorted_neg_vals']:
  389 + for pred in frame_chars_dict['sorted_pred_vals']:
  390 + for aspect in frame_chars_dict['sorted_aspect_vals']:
  391 + matching_frames = lemma.get_frames_by_char_values(reflex_val=reflex,
  392 + neg_val=neg,
  393 + pred_val=pred,
  394 + aspect_val=aspect)
  395 + if matching_frames.exists():
  396 + sub_entries_dict[u'sub_lemmas'] += 1
  397 + subentry_key = u'Liczba podhaseł postaci: (%s,%s,%s,%s)' % (reflex.value, neg.value,
  398 + pred.value, aspect.value)
  399 + if not subentry_key in sub_entries_dict:
  400 + sub_entries_dict[subentry_key] = 0
  401 + sub_entries_dict[subentry_key] += 1
  402 + return sub_entries_dict
  403 +
  404 +def write_stats(stats_path, stats):
  405 + try:
  406 + outfile = codecs.open(stats_path, 'wt', 'utf-8')
  407 +
  408 + outfile.write(u'Łączna liczba haseł:\t%d\n\n' % stats['lemmas'])
  409 + outfile.write(u'Łączna liczba pozycji w schematach:\t%d\n' % stats['poss'])
  410 + outfile.write(u'Łączna liczba realizacji w schematach:\t%d\n\n' % stats['phrases'])
  411 +
  412 + outfile.write(u'Łączna liczba podhaseł:\t%d\n' % stats['sub_lemmas'])
  413 + outfile.write(u'Liczba podhaseł postaci (ZWROTNOŚĆ, NEGATYWNOŚĆ, PREDYKATYWNOŚĆ, ASPEKT)\n')
  414 + write_subschemas_stats(outfile, stats)
  415 +
  416 + outfile.write(u'Łączna liczba schematów:\t%d\n' % stats['schemata'])
  417 + outfile.write(u'Liczba schematów pewnych:\t%d\n' % stats['cer_schemata'])
  418 + outfile.write(u'Liczba schematów wątpliwych:\t%d\n' % stats['uncer_schemata'])
  419 + outfile.write(u'Liczba schematów złych:\t%d\n' % stats['bad_schemata'])
  420 + outfile.write(u'Liczba schematów archaicznych:\t%d\n' % stats['arch_schemata'])
  421 + outfile.write(u'Liczba schematów potocznych:\t%d\n' % stats['col_schemata'])
  422 + outfile.write(u'Liczba schematów wulgarnych:\t%d\n\n' % stats['vul_schemata'])
  423 +
  424 + outfile.write(u'Łączna liczba schematów z koordynacją:\t%d\n' % stats['coor_schemata'])
  425 + outfile.write(u'Łączna liczba schematów zleksykalizowanych:\t%d\n\n' % stats['lex_schemata'])
  426 +
  427 + outfile.write(u'Łączna liczba haseł zawierających pozycje z koordynacją:\t%d\n' % stats['coor_lemmas'])
  428 + outfile.write(u'Łączna liczba haseł zawierających schematy zleksykalizowane:\t%d\n\n' % stats['lex_lemmas'])
  429 + finally:
  430 + outfile.close()
  431 +
  432 +def write_subschemas_stats(stats_file, stats):
  433 + subschemas_stats = ['%s:\t%d\n' % (k, v) for k,v in stats.iteritems() if k.startswith(u'Liczba podhaseł postaci:')]
  434 + subschemas_stats.sort()
  435 + for stat in subschemas_stats:
  436 + stats_file.write(stat)
  437 + stats_file.write('\n')
  438 +
  439 +def update_walenty_stats(stats):
  440 + WalentyStat.objects.all().delete()
  441 + WalentyStat(label=u'Łączna liczba haseł', value=str(stats['lemmas'])).save()
  442 + WalentyStat(label=u'Łączna liczba pozycji w schematach', value=str(stats['poss'])).save()
  443 + WalentyStat(label=u'Łączna liczba realizacji w schematach', value=str(stats['phrases'])).save()
  444 + WalentyStat(label=u'Łączna liczba schematów', value=str(stats['schemata'])).save()
  445 + WalentyStat(label=u'Łączna liczba schematów z koordynacją', value=str(stats['coor_schemata'])).save()
  446 + WalentyStat(label=u'Łączna liczba schematów zleksykalizowanych', value=str(stats['lex_schemata'])).save()
322 447  
323 448 \ No newline at end of file
... ...
dictionary/management/commands/create_walenty.py
1 1 #-*- coding:utf-8 -*-
2 2  
3   -#Copyright (c) 2013, Bartłomiej Nitoń
  3 +#Copyright (c) 2015, Bartłomiej Nitoń
4 4 #All rights reserved.
5 5  
6 6 #Redistribution and use in source and binary forms, with or without modification, are permitted provided
... ... @@ -23,11 +23,14 @@
23 23 import datetime
24 24 import os
25 25 import tarfile
  26 +from collections import Counter
26 27  
27 28 from django.core.management.base import BaseCommand
28 29  
29 30 from accounts.models import User
30   -from dictionary.ajax_vocabulary_management import create_text_walenty
  31 +from dictionary.ajax_vocabulary_management import create_text_walenty, \
  32 + get_stats, write_stats, \
  33 + update_walenty_stats
31 34 from dictionary.ajax_argument_realizations import create_realizations_file
32 35 from dictionary.models import Frame_Opinion, Lemma, Vocabulary, POS, \
33 36 get_checked_statuses, get_ready_statuses
... ... @@ -35,35 +38,49 @@ from settings import WALENTY_PATH
35 38  
36 39 class Command(BaseCommand):
37 40 args = 'none'
38   - help = 'Script for creating Walenty vocabulary.'
39   -
  41 +
40 42 def handle(self, *args, **options):
41   - filename_base = '%s_%s' % ('walenty', datetime.datetime.now().strftime('%Y%m%d'))
42   - base_path = os.path.join(WALENTY_PATH, filename_base)
  43 + now = datetime.datetime.now().strftime('%Y%m%d')
  44 + filename_base = '%s_%s' % ('walenty', now)
43 45 realizations_path = os.path.join(WALENTY_PATH,
44   - '%s_%s.txt' % ('phrase_types_expand', datetime.datetime.now().strftime('%Y%m%d')))
  46 + ' %s_%s.txt' % ('phrase_types_expand', now))
  47 + checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%s' % (filename_base.replace('walenty', 'stats'),
  48 + 'verified'))
  49 + ready_stats_path = os.path.join(WALENTY_PATH, u'%s_%s' % (filename_base.replace('walenty', 'stats'),
  50 + 'all'))
45 51 create_realizations_file(realizations_path)
46 52 try:
  53 + all_stats = Counter({})
  54 + verified_stats = Counter({})
  55 + base_path = os.path.join(WALENTY_PATH, filename_base)
47 56 archive = tarfile.open(base_path + '.tar.gz', 'w:gz')
48 57 os.chdir(WALENTY_PATH)
49 58 archive.add(os.path.basename(realizations_path))
50 59 for pos in POS.objects.exclude(tag=u'unk').order_by('priority'):
51   - create_pos_archive(archive, pos, filename_base)
  60 + pos_stats = create_pos_archive_and_get_stats(archive, pos, filename_base)
  61 + all_stats = all_stats + Counter(pos_stats['all'])
  62 + verified_stats = verified_stats + Counter(pos_stats['verified'])
  63 + write_stats(checked_stats_path, verified_stats)
  64 + archive.add(os.path.basename(checked_stats_path))
  65 + write_stats(ready_stats_path, all_stats)
  66 + archive.add(os.path.basename(ready_stats_path))
52 67 finally:
53 68 archive.close()
54 69 os.remove(realizations_path)
  70 + os.remove(checked_stats_path)
  71 + os.remove(ready_stats_path)
55 72  
56   -def create_pos_archive(archive, pos, filename_base):
  73 +def create_pos_archive_and_get_stats(archive, pos, filename_base):
  74 + all_stats = {}
  75 + checked_stats = {}
57 76 try:
58   - all_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'all'))
59   - checked_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'verified'))
60 77 checked_statuses = get_checked_statuses()
61 78 ready_statuses = get_ready_statuses()
62   -
63 79 lemmas = Lemma.objects.filter(old=False, entry_obj__pos=pos).order_by('entry')
64 80 checked_lemmas = lemmas.filter(status__in=checked_statuses)
65 81 ready_lemmas = lemmas.filter(status__in=ready_statuses)
66 82  
  83 + all_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'all'))
67 84 walenty_path_ready = create_text_walenty(file_name=all_path,
68 85 lemmas=ready_lemmas,
69 86 vocabularies=Vocabulary.objects.none(),
... ... @@ -72,6 +89,10 @@ def create_pos_archive(archive, pos, filename_base):
72 89 owners=User.objects.none(),
73 90 poss=POS.objects.filter(pk=pos.pk),
74 91 add_frame_opinions=True)
  92 + all_filename = os.path.basename(walenty_path_ready)
  93 + archive.add(name=all_filename, arcname=os.path.join(u'%ss' % pos.tag, all_filename))
  94 +
  95 + checked_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'verified'))
75 96 walenty_path_checked = create_text_walenty(file_name=checked_path,
76 97 lemmas=checked_lemmas,
77 98 vocabularies=Vocabulary.objects.none(),
... ... @@ -80,11 +101,29 @@ def create_pos_archive(archive, pos, filename_base):
80 101 owners=User.objects.none(),
81 102 poss=POS.objects.filter(pk=pos.pk),
82 103 add_frame_opinions=True)
83   - all_filename = os.path.basename(walenty_path_ready)
84 104 checked_filename = os.path.basename(walenty_path_checked)
85   - archive.add(name=all_filename, arcname=os.path.join(u'%ss' % pos.tag, all_filename))
86 105 archive.add(name=checked_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_filename))
  106 +
  107 + all_stats = get_stats(ready_statuses, pos.tag)
  108 + all_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base.replace('walenty', 'stats'),
  109 + pos.tag, 'all'))
  110 + write_stats(all_stats_path, all_stats)
  111 + all_stats_filename = os.path.basename(all_stats_path)
  112 + archive.add(name=all_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, all_stats_filename))
  113 +
  114 + checked_stats = get_stats(checked_statuses, pos.tag)
  115 + checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base.replace('walenty', 'stats'),
  116 + pos.tag, 'verified'))
  117 + write_stats(checked_stats_path, checked_stats)
  118 + checked_stats_filename = os.path.basename(checked_stats_path)
  119 + archive.add(name=checked_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_stats_filename))
  120 +
  121 + update_walenty_stats(all_stats)
87 122 finally:
88   - os.remove(walenty_path_checked)
89 123 os.remove(walenty_path_ready)
  124 + os.remove(walenty_path_checked)
  125 + os.remove(all_stats_path)
  126 + os.remove(checked_stats_path)
  127 + return {'all': all_stats,
  128 + 'verified': checked_stats}
90 129  
91 130 \ No newline at end of file
... ...
dictionary/management/commands/get_stats.py
1 1 #-*- coding:utf-8 -*-
2   -# author: B.Niton
3 2  
4   -import codecs
5 3 import datetime
6 4 from collections import Counter
7 5  
8 6 from django.core.management.base import BaseCommand
9   -from django.db.models import Count, Max
10 7  
11   -from dictionary.models import Lemma, get_checked_statuses, get_ready_statuses, \
12   - sorted_frame_char_values_dict
13   -
14   -LEX_TYPES = ['lex', 'fixed', 'comprepnp']
  8 +from dictionary.models import get_checked_statuses, get_ready_statuses
  9 +from dictionary.ajax_vocabulary_management import get_stats, write_stats
15 10  
16 11 class Command(BaseCommand):
17 12 help = 'Get Walenty statistics.'
... ... @@ -21,126 +16,28 @@ class Command(BaseCommand):
21 16 all_statuses = get_ready_statuses()
22 17 verified_statuses = get_checked_statuses()
23 18  
24   -# nouns_stats_dict_all = Counter(get_stats('data/statystyki_2015_06_30_nouns_all.txt', all_statuses, 'noun'))
25   -# nouns_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_nouns_verified.txt', verified_statuses, 'noun'))
26   -
27   -# adjs_stats_dict_all = Counter(get_stats('data/statystyki_2015_06_30_adjs_all.txt', all_statuses, 'adj'))
28   -# adjs_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_adjs_verified.txt', verified_statuses, 'adj'))
29   -#
30   -# verbs_stats_dict_all = Counter(get_stats('data/statystyki_2015_06_30_verbs_all.txt', all_statuses, 'verb'))
31   -# verbs_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_verbs_verified.txt', verified_statuses, 'verb'))
32   -#
33   - advs_stats_dict_all = Counter(get_stats(all_statuses, 'adv'))
34   - write_stats('data/stats_%s_advs_all.txt' % now, advs_stats_dict_all)
35   -# advs_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_advs_verified.txt', verified_statuses, 'adv'))
36   -#
37   -# all_stats_dict_all = nouns_stats_dict_all + adjs_stats_dict_all + verbs_stats_dict_all + advs_stats_dict_all
38   -# all_stats_dict_verified = nouns_stats_dict_verified + adjs_stats_dict_verified + verbs_stats_dict_verified + advs_stats_dict_verified
39   -#
40   -# write_all_stats('data/statystyki_2015_06_30_all.txt', all_stats_dict_all)
41   -# write_all_stats('data/statystyki_2015_06_30_verified.txt', all_stats_dict_verified)
42   -
43   -def write_stats(stats_path, stats_dict):
44   - try:
45   - outfile = codecs.open(stats_path, 'wt', 'utf-8')
  19 + nouns_stats_dict_all = Counter(get_stats(all_statuses, 'noun'))
  20 + write_stats('data/stats_%s_nouns_all.txt' % now, nouns_stats_dict_all)
  21 + nouns_stats_dict_verified = Counter(get_stats(verified_statuses, 'noun'))
  22 + write_stats('data/stats_%s_nouns_verified.txt' % now, nouns_stats_dict_verified)
46 23  
47   - outfile.write(u'Liczba typów fraz:\t%d\n' % stats_path['phrases'])
48   - outfile.write(u'Liczba pozycji:\t%d\n' % stats_path['poss'])
49   - outfile.write(u'Liczba haseł:\t%d\n\n' % stats_path['lemmas'])
  24 + adjs_stats_dict_all = Counter(get_stats(all_statuses, 'adj'))
  25 + write_stats('data/stats_%s_adjs_all.txt' % now, adjs_stats_dict_all)
  26 + adjs_stats_dict_verified = Counter(get_stats(verified_statuses, 'adj'))
  27 + write_stats('data/stats_%s_adjs_verified.txt' % now, adjs_stats_dict_verified)
50 28  
51   - outfile.write(u'Łączna liczba podhaseł:\t%d\n' % stats_path['sub_lemmas'])
52   - outfile.write(u'Liczba podhaseł postaci (ZWROTNOŚĆ, NEGATYWNOŚĆ, PREDYKATYWNOŚĆ, ASPEKT)\n')
53   -#### dokonczyc
54   -
55   - outfile.write(u'Łączna liczba schematów:\t%d\n' % stats_path['schemata'])
56   - outfile.write(u'Liczba schematów pewnych:\t%d\n' % stats_path['cer_schemata'])
57   - outfile.write(u'Liczba schematów wątpliwych:\t%d\n' % stats_path['uncer_schemata'])
58   - outfile.write(u'Liczba schematów złych:\t%d\n' % stats_path['bad_schemata'])
59   - outfile.write(u'Liczba schematów archaicznych:\t%d\n' % stats_path['arch_schemata'])
60   - outfile.write(u'Liczba schematów potocznych:\t%d\n' % stats_path['col_schemata'])
61   - outfile.write(u'Liczba schematów wulgarnych:\t%d\n\n' % stats_path['vul_schemata'])
  29 + verbs_stats_dict_all = Counter(get_stats(all_statuses, 'verb'))
  30 + write_stats('data/stats_%s_verbs_all.txt' % now, verbs_stats_dict_all)
  31 + verbs_stats_dict_verified = Counter(get_stats(verified_statuses, 'verb'))
  32 + write_stats('data/stats_%s_verbs_verified.txt' % now, verbs_stats_dict_verified)
62 33  
63   - outfile.write(u'Liczba schematów z koordynacją:\t%d\n' % stats_path['coor_schemata'])
64   - outfile.write(u'Liczba schematów zleksykalizowanych:\t%d\n\n' % stats_path['lex_schemata'])
  34 + advs_stats_dict_all = Counter(get_stats(all_statuses, 'adv'))
  35 + write_stats('data/stats_%s_advs_all.txt' % now, advs_stats_dict_all)
  36 + advs_stats_dict_verified = Counter(get_stats(verified_statuses, 'adv'))
  37 + write_stats('data/stats_%s_advs_verified.txt' % now, advs_stats_dict_verified)
65 38  
66   - outfile.write(u'Hasła zawierające pozycje z koordynacją:\t%d\n' % stats_path['coor_lemmas'])
67   - outfile.write(u'Hasła zawierające schematy zleksykalizowane:\t%d\n\n' % stats_path['lex_lemmas'])
  39 + all_stats_dict_all = nouns_stats_dict_all + adjs_stats_dict_all + verbs_stats_dict_all + advs_stats_dict_all
  40 + all_stats_dict_verified = nouns_stats_dict_verified + adjs_stats_dict_verified + verbs_stats_dict_verified + advs_stats_dict_verified
68 41  
69   - except:
70   - outfile.close()
71   -
72   -def get_stats(statuses, pos):
73   - stats_dict = Counter({u'phrases': 0,
74   - u'poss': 0,
75   - u'lemmas': 0,
76   - u'sub_lemmas': 0,
77   - u'schemata': 0,
78   - u'cer_schemata': 0,
79   - u'uncer_schemata': 0,
80   - u'bad_schemata': 0,
81   - u'arch_schemata': 0,
82   - u'col_schemata': 0,
83   - u'vul_schemata': 0,
84   - u'coor_schemata': 0,
85   - u'lex_schemata': 0,
86   - u'coor_lemmas': 0,
87   - u'lex_lemmas': 0})
88   -
89   - lemmas = Lemma.objects.filter(old=False,
90   - entry_obj__pos__tag=pos).filter(status__in=statuses).distinct()
91   - for lemma in lemmas.order_by('entry').all():
92   - print lemma
93   - stats_dict[u'lemmas'] += 1
94   - stats_dict[u'cer_schemata'] += lemma.frame_opinions.filter(value__value=u'pewny').count()
95   - stats_dict[u'uncer_schemata'] += lemma.frame_opinions.filter(value__value=u'wątpliwy').count()
96   - stats_dict[u'bad_schemata'] += lemma.frame_opinions.filter(value__value=u'zły').count()
97   - stats_dict[u'arch_schemata'] += lemma.frame_opinions.filter(value__value=u'archaiczny').count()
98   - stats_dict[u'col_schemata'] += lemma.frame_opinions.filter(value__value=u'potoczny').count()
99   - stats_dict[u'vul_schemata'] += lemma.frame_opinions.filter(value__value=u'wulgarny').count()
100   - stats_dict[u'schemata'] += lemma.frames.count()
101   -
102   - stats_dict = stats_dict + Counter(get_sub_entries_dict(lemma))
103   -
104   - has_phraseology = False
105   - has_coordination = False
106   - for frame in lemma.frames.all():
107   - stats_dict[u'poss'] += frame.positions.count()
108   - flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))['num_args__max']
109   - if flat_frames > 1:
110   - stats_dict[u'coor_schemata'] += 1
111   - has_coordination = True
112   - for pos in frame.positions.all():
113   - stats_dict[u'phrases'] += pos.arguments.count()
114   - if frame.positions.filter(arguments__type__in=LEX_TYPES).exists():
115   - stats_dict[u'lex_schemata'] += 1
116   - has_phraseology = True
117   -
118   - if has_phraseology:
119   - stats_dict[u'lex_lemmas'] += 1
120   - if has_coordination:
121   - stats_dict[u'coor_lemmas'] += 1
122   -
123   - return stats_dict
124   -
125   -def get_sub_entries_dict(lemma):
126   - sub_entries_dict = {}
127   - frame_chars_dict = sorted_frame_char_values_dict()
128   - for reflex in frame_chars_dict['sorted_reflex_vals']:
129   - for neg in frame_chars_dict['sorted_neg_vals']:
130   - for pred in frame_chars_dict['sorted_pred_vals']:
131   - for aspect in frame_chars_dict['sorted_aspect_vals']:
132   - matching_frames = lemma.get_frames_by_char_values(reflex_val=reflex,
133   - neg_val=neg,
134   - pred_val=pred,
135   - aspect_val=aspect)
136   - if matching_frames.exists():
137   - if not u'sub_lemmas' in sub_entries_dict:
138   - sub_entries_dict[u'sub_lemmas'] = 0
139   - sub_entries_dict[u'sub_lemmas'] += 1
140   -
141   - subentry_key = u'Liczba podhaseł postaci: (%s,%s,%s,%s)' % (reflex.value, neg.value,
142   - pred.value, aspect.value)
143   - if not subentry_key in sub_entries_dict:
144   - sub_entries_dict[subentry_key] = 0
145   - sub_entries_dict[subentry_key] += 1
146   - return sub_entries_dict
  42 + write_stats('data/stats_%s_all.txt' % now, all_stats_dict_all)
  43 + write_stats('data/stats_%s_verified.txt' % now, all_stats_dict_verified)
... ...
dictionary/models.py
... ... @@ -46,8 +46,15 @@ class VocabularyFormat(Model):
46 46 format = CharField(max_length=64, primary_key=True, unique=True, db_column='format_slownika')
47 47  
48 48 def __unicode__(self):
49   - return '%s' % self.format
50   -
  49 + return '%s' % self.format
  50 +
  51 +class WalentyStat(Model):
  52 + date = DateTimeField(auto_now_add=True, db_column='data_aktualizacji')
  53 + label = CharField(max_length=128, db_column='etykieta')
  54 + value = CharField(max_length=16, db_column='wartosc')
  55 +
  56 + def __unicode__(self):
  57 + return '%s:\t%s' % (self.label, self.value)
51 58  
52 59 class Vocabulary(Model):
53 60 name = CharField(max_length=64, primary_key=True, unique=True, db_column='slownik')
... ...
dictionary/static/js/manage-vocabularies.js
... ... @@ -14,6 +14,7 @@ $(function() {
14 14 $('form.voc-download-form').live('submit', download_form_submit);
15 15 $('#vocabulary-select').live('change', show_vocabulary_info);
16 16 $('form.vocab-perm-manage-form').live('submit', vocab_perm_manage_form_submit);
  17 + $('#other-statistisc').load(ajax_get_other_stats);
17 18 show_vocabulary_info();
18 19 });
19 20  
... ...
dictionary/templates/manage_vocabularies.html
... ... @@ -40,6 +40,12 @@
40 40 </div>
41 41 </td>
42 42 </tr>
  43 + <tr>
  44 + <td colspan='2'>
  45 + <div id="other-statistisc" style="vertical-align:top;">
  46 + </div>
  47 + </td>
  48 + </tr>
43 49 </table>
44 50  
45 51 {% if perms.dictionary.download_vocabulary %}
... ...
dictionary/views.py
... ... @@ -351,6 +351,7 @@ def manage_vocabulary(request):
351 351 'ajax_vocab_perm_manage_form': reverse('vocab_perm_manage_form'),
352 352 'ajax_vocab_perm_manage_form_submit': reverse('vocab_perm_manage_form_submit'),
353 353 'ajax_get_vocabulary_stats': reverse('get_vocabulary_stats'),
  354 + 'ajax_get_other_stats': reverse('get_other_stats')
354 355 }
355 356 return to_return
356 357  
... ...
... ... @@ -210,7 +210,6 @@ urlpatterns += patterns(&#39;dictionary.views&#39;,
210 210 url(r'^wszystkie_statystyki/$', 'all_statistics'),
211 211 url(r'^zarzadzanie_kontami/$', 'manage_users'),
212 212 url(r'^rozwiniecia_typow_fraz/$', 'manage_arg_realizations'),
213   - #url(r'^statystyki_argumentow/$', 'arg_stats'),
214 213 url(r'^pobieranie_slownika/$', 'download_walenty'),
215 214 )
216 215  
... ... @@ -228,7 +227,8 @@ urlpatterns += patterns(&#39;dictionary.ajax_vocabulary_management&#39;,
228 227 url(r'^ajax/create_vocabulary/$', 'create_vocabulary'),
229 228 url(r'^ajax/vocab_perm_manage_form/$', 'vocab_perm_manage_form'),
230 229 url(r'^ajax/vocab_perm_manage_form_submit/$', 'vocab_perm_manage_form_submit'),
231   - url(r'^ajax/get_vocabulary_stats/$', 'get_vocabulary_stats'),
  230 + url(r'^ajax/get_vocabulary_stats/$', 'get_vocabulary_stats'),
  231 + url(r'^ajax/get_other_stats/$', 'get_other_stats'),
232 232 )
233 233  
234 234 urlpatterns += SEMANTIC_PATTERNS
... ...