Commit 92426c81077a7359886e57f1b870ae605a6d7f48
1 parent
3ad52b13
Added statistics to auto generated package with Walenty in text format. Added ad…
…ditional statistics table to vocabulary management interface.
Showing
8 changed files
with
222 additions
and
146 deletions
dictionary/ajax_vocabulary_management.py
... | ... | @@ -24,10 +24,11 @@ import codecs |
24 | 24 | import datetime |
25 | 25 | import HTMLParser |
26 | 26 | import os |
27 | +from collections import Counter | |
27 | 28 | from tempfile import mkdtemp, mkstemp |
28 | 29 | |
29 | 30 | from django.contrib.auth.models import User |
30 | -from django.db.models import Count, Sum, Q | |
31 | +from django.db.models import Count, Max, Sum, Q | |
31 | 32 | from django.http import HttpResponse |
32 | 33 | from django.template.loader import render_to_string |
33 | 34 | from django.utils.encoding import smart_str |
... | ... | @@ -35,10 +36,12 @@ from django.utils.encoding import smart_str |
35 | 36 | from common.decorators import ajax, AjaxError, render |
36 | 37 | from dictionary.forms import ManageVocabPermForm |
37 | 38 | from dictionary.models import Frame_Opinion_Value, Lemma, Lemma_Status, \ |
38 | - POS, Vocabulary, VocabularyFormat, \ | |
39 | + POS, Vocabulary, VocabularyFormat, WalentyStat,\ | |
39 | 40 | sorted_frame_char_values_dict |
40 | 41 | from dictionary.teixml import createteixml |
41 | 42 | |
43 | +LEX_TYPES = ['lex', 'fixed', 'comprepnp'] | |
44 | + | |
42 | 45 | TEXT_VOCABULARY_CLAUSE = u""" |
43 | 46 | % The Polish Valence Dictionary (Walenty) |
44 | 47 | % <date> |
... | ... | @@ -319,4 +322,126 @@ def count_schemas(lemmas): |
319 | 322 | if not schemas_count: |
320 | 323 | schemas_count = 0 |
321 | 324 | return schemas_count |
325 | + | |
326 | +@render('other_stats.html') | |
327 | +@ajax(method='get', encode_result=False) | |
328 | +def get_other_stats(request): | |
329 | + return WalentyStat.objects.order_by('label') | |
330 | + | |
331 | +def get_stats(statuses, pos): | |
332 | + stats_dict = Counter({u'phrases': 0, | |
333 | + u'poss': 0, | |
334 | + u'lemmas': 0, | |
335 | + u'sub_lemmas': 0, | |
336 | + u'schemata': 0, | |
337 | + u'cer_schemata': 0, | |
338 | + u'uncer_schemata': 0, | |
339 | + u'bad_schemata': 0, | |
340 | + u'arch_schemata': 0, | |
341 | + u'col_schemata': 0, | |
342 | + u'vul_schemata': 0, | |
343 | + u'coor_schemata': 0, | |
344 | + u'lex_schemata': 0, | |
345 | + u'coor_lemmas': 0, | |
346 | + u'lex_lemmas': 0}) | |
347 | + | |
348 | + lemmas = Lemma.objects.filter(old=False, | |
349 | + entry_obj__pos__tag=pos).filter(status__in=statuses).distinct() | |
350 | + stats_dict[u'lemmas'] = lemmas.count() | |
351 | + for lemma in lemmas.order_by('entry').all(): | |
352 | + print lemma | |
353 | + stats_dict[u'cer_schemata'] += lemma.frame_opinions.filter(value__value=u'pewny').count() | |
354 | + stats_dict[u'uncer_schemata'] += lemma.frame_opinions.filter(value__value=u'wątpliwy').count() | |
355 | + stats_dict[u'bad_schemata'] += lemma.frame_opinions.filter(value__value=u'zły').count() | |
356 | + stats_dict[u'arch_schemata'] += lemma.frame_opinions.filter(value__value=u'archaiczny').count() | |
357 | + stats_dict[u'col_schemata'] += lemma.frame_opinions.filter(value__value=u'potoczny').count() | |
358 | + stats_dict[u'vul_schemata'] += lemma.frame_opinions.filter(value__value=u'wulgarny').count() | |
359 | + stats_dict[u'schemata'] += lemma.frames.count() | |
360 | + | |
361 | + stats_dict = stats_dict + Counter(get_sub_entries_dict(lemma)) | |
362 | + | |
363 | + has_phraseology = False | |
364 | + has_coordination = False | |
365 | + for frame in lemma.frames.all(): | |
366 | + stats_dict[u'poss'] += frame.positions.count() | |
367 | + flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))['num_args__max'] | |
368 | + if flat_frames > 1: | |
369 | + stats_dict[u'coor_schemata'] += 1 | |
370 | + has_coordination = True | |
371 | + for pos in frame.positions.all(): | |
372 | + stats_dict[u'phrases'] += pos.arguments.count() | |
373 | + if frame.positions.filter(arguments__type__in=LEX_TYPES).exists(): | |
374 | + stats_dict[u'lex_schemata'] += 1 | |
375 | + has_phraseology = True | |
376 | + | |
377 | + if has_phraseology: | |
378 | + stats_dict[u'lex_lemmas'] += 1 | |
379 | + if has_coordination: | |
380 | + stats_dict[u'coor_lemmas'] += 1 | |
381 | + | |
382 | + return stats_dict | |
383 | + | |
384 | +def get_sub_entries_dict(lemma): | |
385 | + sub_entries_dict = {'sub_lemmas': 0} | |
386 | + frame_chars_dict = sorted_frame_char_values_dict() | |
387 | + for reflex in frame_chars_dict['sorted_reflex_vals']: | |
388 | + for neg in frame_chars_dict['sorted_neg_vals']: | |
389 | + for pred in frame_chars_dict['sorted_pred_vals']: | |
390 | + for aspect in frame_chars_dict['sorted_aspect_vals']: | |
391 | + matching_frames = lemma.get_frames_by_char_values(reflex_val=reflex, | |
392 | + neg_val=neg, | |
393 | + pred_val=pred, | |
394 | + aspect_val=aspect) | |
395 | + if matching_frames.exists(): | |
396 | + sub_entries_dict[u'sub_lemmas'] += 1 | |
397 | + subentry_key = u'Liczba podhaseł postaci: (%s,%s,%s,%s)' % (reflex.value, neg.value, | |
398 | + pred.value, aspect.value) | |
399 | + if not subentry_key in sub_entries_dict: | |
400 | + sub_entries_dict[subentry_key] = 0 | |
401 | + sub_entries_dict[subentry_key] += 1 | |
402 | + return sub_entries_dict | |
403 | + | |
404 | +def write_stats(stats_path, stats): | |
405 | + try: | |
406 | + outfile = codecs.open(stats_path, 'wt', 'utf-8') | |
407 | + | |
408 | + outfile.write(u'Łączna liczba haseł:\t%d\n\n' % stats['lemmas']) | |
409 | + outfile.write(u'Łączna liczba pozycji w schematach:\t%d\n' % stats['poss']) | |
410 | + outfile.write(u'Łączna liczba realizacji w schematach:\t%d\n\n' % stats['phrases']) | |
411 | + | |
412 | + outfile.write(u'Łączna liczba podhaseł:\t%d\n' % stats['sub_lemmas']) | |
413 | + outfile.write(u'Liczba podhaseł postaci (ZWROTNOŚĆ, NEGATYWNOŚĆ, PREDYKATYWNOŚĆ, ASPEKT)\n') | |
414 | + write_subschemas_stats(outfile, stats) | |
415 | + | |
416 | + outfile.write(u'Łączna liczba schematów:\t%d\n' % stats['schemata']) | |
417 | + outfile.write(u'Liczba schematów pewnych:\t%d\n' % stats['cer_schemata']) | |
418 | + outfile.write(u'Liczba schematów wątpliwych:\t%d\n' % stats['uncer_schemata']) | |
419 | + outfile.write(u'Liczba schematów złych:\t%d\n' % stats['bad_schemata']) | |
420 | + outfile.write(u'Liczba schematów archaicznych:\t%d\n' % stats['arch_schemata']) | |
421 | + outfile.write(u'Liczba schematów potocznych:\t%d\n' % stats['col_schemata']) | |
422 | + outfile.write(u'Liczba schematów wulgarnych:\t%d\n\n' % stats['vul_schemata']) | |
423 | + | |
424 | + outfile.write(u'Łączna liczba schematów z koordynacją:\t%d\n' % stats['coor_schemata']) | |
425 | + outfile.write(u'Łączna liczba schematów zleksykalizowanych:\t%d\n\n' % stats['lex_schemata']) | |
426 | + | |
427 | + outfile.write(u'Łączna liczba haseł zawierających pozycje z koordynacją:\t%d\n' % stats['coor_lemmas']) | |
428 | + outfile.write(u'Łączna liczba haseł zawierających schematy zleksykalizowane:\t%d\n\n' % stats['lex_lemmas']) | |
429 | + finally: | |
430 | + outfile.close() | |
431 | + | |
432 | +def write_subschemas_stats(stats_file, stats): | |
433 | + subschemas_stats = ['%s:\t%d\n' % (k, v) for k,v in stats.iteritems() if k.startswith(u'Liczba podhaseł postaci:')] | |
434 | + subschemas_stats.sort() | |
435 | + for stat in subschemas_stats: | |
436 | + stats_file.write(stat) | |
437 | + stats_file.write('\n') | |
438 | + | |
439 | +def update_walenty_stats(stats): | |
440 | + WalentyStat.objects.all().delete() | |
441 | + WalentyStat(label=u'Łączna liczba haseł', value=str(stats['lemmas'])).save() | |
442 | + WalentyStat(label=u'Łączna liczba pozycji w schematach', value=str(stats['poss'])).save() | |
443 | + WalentyStat(label=u'Łączna liczba realizacji w schematach', value=str(stats['phrases'])).save() | |
444 | + WalentyStat(label=u'Łączna liczba schematów', value=str(stats['schemata'])).save() | |
445 | + WalentyStat(label=u'Łączna liczba schematów z koordynacją', value=str(stats['coor_schemata'])).save() | |
446 | + WalentyStat(label=u'Łączna liczba schematów zleksykalizowanych', value=str(stats['lex_schemata'])).save() | |
322 | 447 | |
323 | 448 | \ No newline at end of file |
... | ... |
dictionary/management/commands/create_walenty.py
1 | 1 | #-*- coding:utf-8 -*- |
2 | 2 | |
3 | -#Copyright (c) 2013, Bartłomiej Nitoń | |
3 | +#Copyright (c) 2015, Bartłomiej Nitoń | |
4 | 4 | #All rights reserved. |
5 | 5 | |
6 | 6 | #Redistribution and use in source and binary forms, with or without modification, are permitted provided |
... | ... | @@ -23,11 +23,14 @@ |
23 | 23 | import datetime |
24 | 24 | import os |
25 | 25 | import tarfile |
26 | +from collections import Counter | |
26 | 27 | |
27 | 28 | from django.core.management.base import BaseCommand |
28 | 29 | |
29 | 30 | from accounts.models import User |
30 | -from dictionary.ajax_vocabulary_management import create_text_walenty | |
31 | +from dictionary.ajax_vocabulary_management import create_text_walenty, \ | |
32 | + get_stats, write_stats, \ | |
33 | + update_walenty_stats | |
31 | 34 | from dictionary.ajax_argument_realizations import create_realizations_file |
32 | 35 | from dictionary.models import Frame_Opinion, Lemma, Vocabulary, POS, \ |
33 | 36 | get_checked_statuses, get_ready_statuses |
... | ... | @@ -35,35 +38,49 @@ from settings import WALENTY_PATH |
35 | 38 | |
36 | 39 | class Command(BaseCommand): |
37 | 40 | args = 'none' |
38 | - help = 'Script for creating Walenty vocabulary.' | |
39 | - | |
41 | + | |
40 | 42 | def handle(self, *args, **options): |
41 | - filename_base = '%s_%s' % ('walenty', datetime.datetime.now().strftime('%Y%m%d')) | |
42 | - base_path = os.path.join(WALENTY_PATH, filename_base) | |
43 | + now = datetime.datetime.now().strftime('%Y%m%d') | |
44 | + filename_base = '%s_%s' % ('walenty', now) | |
43 | 45 | realizations_path = os.path.join(WALENTY_PATH, |
44 | - '%s_%s.txt' % ('phrase_types_expand', datetime.datetime.now().strftime('%Y%m%d'))) | |
46 | + ' %s_%s.txt' % ('phrase_types_expand', now)) | |
47 | + checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%s' % (filename_base.replace('walenty', 'stats'), | |
48 | + 'verified')) | |
49 | + ready_stats_path = os.path.join(WALENTY_PATH, u'%s_%s' % (filename_base.replace('walenty', 'stats'), | |
50 | + 'all')) | |
45 | 51 | create_realizations_file(realizations_path) |
46 | 52 | try: |
53 | + all_stats = Counter({}) | |
54 | + verified_stats = Counter({}) | |
55 | + base_path = os.path.join(WALENTY_PATH, filename_base) | |
47 | 56 | archive = tarfile.open(base_path + '.tar.gz', 'w:gz') |
48 | 57 | os.chdir(WALENTY_PATH) |
49 | 58 | archive.add(os.path.basename(realizations_path)) |
50 | 59 | for pos in POS.objects.exclude(tag=u'unk').order_by('priority'): |
51 | - create_pos_archive(archive, pos, filename_base) | |
60 | + pos_stats = create_pos_archive_and_get_stats(archive, pos, filename_base) | |
61 | + all_stats = all_stats + Counter(pos_stats['all']) | |
62 | + verified_stats = verified_stats + Counter(pos_stats['verified']) | |
63 | + write_stats(checked_stats_path, verified_stats) | |
64 | + archive.add(os.path.basename(checked_stats_path)) | |
65 | + write_stats(ready_stats_path, all_stats) | |
66 | + archive.add(os.path.basename(ready_stats_path)) | |
52 | 67 | finally: |
53 | 68 | archive.close() |
54 | 69 | os.remove(realizations_path) |
70 | + os.remove(checked_stats_path) | |
71 | + os.remove(ready_stats_path) | |
55 | 72 | |
56 | -def create_pos_archive(archive, pos, filename_base): | |
73 | +def create_pos_archive_and_get_stats(archive, pos, filename_base): | |
74 | + all_stats = {} | |
75 | + checked_stats = {} | |
57 | 76 | try: |
58 | - all_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'all')) | |
59 | - checked_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'verified')) | |
60 | 77 | checked_statuses = get_checked_statuses() |
61 | 78 | ready_statuses = get_ready_statuses() |
62 | - | |
63 | 79 | lemmas = Lemma.objects.filter(old=False, entry_obj__pos=pos).order_by('entry') |
64 | 80 | checked_lemmas = lemmas.filter(status__in=checked_statuses) |
65 | 81 | ready_lemmas = lemmas.filter(status__in=ready_statuses) |
66 | 82 | |
83 | + all_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'all')) | |
67 | 84 | walenty_path_ready = create_text_walenty(file_name=all_path, |
68 | 85 | lemmas=ready_lemmas, |
69 | 86 | vocabularies=Vocabulary.objects.none(), |
... | ... | @@ -72,6 +89,10 @@ def create_pos_archive(archive, pos, filename_base): |
72 | 89 | owners=User.objects.none(), |
73 | 90 | poss=POS.objects.filter(pk=pos.pk), |
74 | 91 | add_frame_opinions=True) |
92 | + all_filename = os.path.basename(walenty_path_ready) | |
93 | + archive.add(name=all_filename, arcname=os.path.join(u'%ss' % pos.tag, all_filename)) | |
94 | + | |
95 | + checked_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'verified')) | |
75 | 96 | walenty_path_checked = create_text_walenty(file_name=checked_path, |
76 | 97 | lemmas=checked_lemmas, |
77 | 98 | vocabularies=Vocabulary.objects.none(), |
... | ... | @@ -80,11 +101,29 @@ def create_pos_archive(archive, pos, filename_base): |
80 | 101 | owners=User.objects.none(), |
81 | 102 | poss=POS.objects.filter(pk=pos.pk), |
82 | 103 | add_frame_opinions=True) |
83 | - all_filename = os.path.basename(walenty_path_ready) | |
84 | 104 | checked_filename = os.path.basename(walenty_path_checked) |
85 | - archive.add(name=all_filename, arcname=os.path.join(u'%ss' % pos.tag, all_filename)) | |
86 | 105 | archive.add(name=checked_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_filename)) |
106 | + | |
107 | + all_stats = get_stats(ready_statuses, pos.tag) | |
108 | + all_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base.replace('walenty', 'stats'), | |
109 | + pos.tag, 'all')) | |
110 | + write_stats(all_stats_path, all_stats) | |
111 | + all_stats_filename = os.path.basename(all_stats_path) | |
112 | + archive.add(name=all_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, all_stats_filename)) | |
113 | + | |
114 | + checked_stats = get_stats(checked_statuses, pos.tag) | |
115 | + checked_stats_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base.replace('walenty', 'stats'), | |
116 | + pos.tag, 'verified')) | |
117 | + write_stats(checked_stats_path, checked_stats) | |
118 | + checked_stats_filename = os.path.basename(checked_stats_path) | |
119 | + archive.add(name=checked_stats_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_stats_filename)) | |
120 | + | |
121 | + update_walenty_stats(all_stats) | |
87 | 122 | finally: |
88 | - os.remove(walenty_path_checked) | |
89 | 123 | os.remove(walenty_path_ready) |
124 | + os.remove(walenty_path_checked) | |
125 | + os.remove(all_stats_path) | |
126 | + os.remove(checked_stats_path) | |
127 | + return {'all': all_stats, | |
128 | + 'verified': checked_stats} | |
90 | 129 | |
91 | 130 | \ No newline at end of file |
... | ... |
dictionary/management/commands/get_stats.py
1 | 1 | #-*- coding:utf-8 -*- |
2 | -# author: B.Niton | |
3 | 2 | |
4 | -import codecs | |
5 | 3 | import datetime |
6 | 4 | from collections import Counter |
7 | 5 | |
8 | 6 | from django.core.management.base import BaseCommand |
9 | -from django.db.models import Count, Max | |
10 | 7 | |
11 | -from dictionary.models import Lemma, get_checked_statuses, get_ready_statuses, \ | |
12 | - sorted_frame_char_values_dict | |
13 | - | |
14 | -LEX_TYPES = ['lex', 'fixed', 'comprepnp'] | |
8 | +from dictionary.models import get_checked_statuses, get_ready_statuses | |
9 | +from dictionary.ajax_vocabulary_management import get_stats, write_stats | |
15 | 10 | |
16 | 11 | class Command(BaseCommand): |
17 | 12 | help = 'Get Walenty statistics.' |
... | ... | @@ -21,126 +16,28 @@ class Command(BaseCommand): |
21 | 16 | all_statuses = get_ready_statuses() |
22 | 17 | verified_statuses = get_checked_statuses() |
23 | 18 | |
24 | -# nouns_stats_dict_all = Counter(get_stats('data/statystyki_2015_06_30_nouns_all.txt', all_statuses, 'noun')) | |
25 | -# nouns_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_nouns_verified.txt', verified_statuses, 'noun')) | |
26 | - | |
27 | -# adjs_stats_dict_all = Counter(get_stats('data/statystyki_2015_06_30_adjs_all.txt', all_statuses, 'adj')) | |
28 | -# adjs_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_adjs_verified.txt', verified_statuses, 'adj')) | |
29 | -# | |
30 | -# verbs_stats_dict_all = Counter(get_stats('data/statystyki_2015_06_30_verbs_all.txt', all_statuses, 'verb')) | |
31 | -# verbs_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_verbs_verified.txt', verified_statuses, 'verb')) | |
32 | -# | |
33 | - advs_stats_dict_all = Counter(get_stats(all_statuses, 'adv')) | |
34 | - write_stats('data/stats_%s_advs_all.txt' % now, advs_stats_dict_all) | |
35 | -# advs_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_advs_verified.txt', verified_statuses, 'adv')) | |
36 | -# | |
37 | -# all_stats_dict_all = nouns_stats_dict_all + adjs_stats_dict_all + verbs_stats_dict_all + advs_stats_dict_all | |
38 | -# all_stats_dict_verified = nouns_stats_dict_verified + adjs_stats_dict_verified + verbs_stats_dict_verified + advs_stats_dict_verified | |
39 | -# | |
40 | -# write_all_stats('data/statystyki_2015_06_30_all.txt', all_stats_dict_all) | |
41 | -# write_all_stats('data/statystyki_2015_06_30_verified.txt', all_stats_dict_verified) | |
42 | - | |
43 | -def write_stats(stats_path, stats_dict): | |
44 | - try: | |
45 | - outfile = codecs.open(stats_path, 'wt', 'utf-8') | |
19 | + nouns_stats_dict_all = Counter(get_stats(all_statuses, 'noun')) | |
20 | + write_stats('data/stats_%s_nouns_all.txt' % now, nouns_stats_dict_all) | |
21 | + nouns_stats_dict_verified = Counter(get_stats(verified_statuses, 'noun')) | |
22 | + write_stats('data/stats_%s_nouns_verified.txt' % now, nouns_stats_dict_verified) | |
46 | 23 | |
47 | - outfile.write(u'Liczba typów fraz:\t%d\n' % stats_path['phrases']) | |
48 | - outfile.write(u'Liczba pozycji:\t%d\n' % stats_path['poss']) | |
49 | - outfile.write(u'Liczba haseł:\t%d\n\n' % stats_path['lemmas']) | |
24 | + adjs_stats_dict_all = Counter(get_stats(all_statuses, 'adj')) | |
25 | + write_stats('data/stats_%s_adjs_all.txt' % now, adjs_stats_dict_all) | |
26 | + adjs_stats_dict_verified = Counter(get_stats(verified_statuses, 'adj')) | |
27 | + write_stats('data/stats_%s_adjs_verified.txt' % now, adjs_stats_dict_verified) | |
50 | 28 | |
51 | - outfile.write(u'Łączna liczba podhaseł:\t%d\n' % stats_path['sub_lemmas']) | |
52 | - outfile.write(u'Liczba podhaseł postaci (ZWROTNOŚĆ, NEGATYWNOŚĆ, PREDYKATYWNOŚĆ, ASPEKT)\n') | |
53 | -#### dokonczyc | |
54 | - | |
55 | - outfile.write(u'Łączna liczba schematów:\t%d\n' % stats_path['schemata']) | |
56 | - outfile.write(u'Liczba schematów pewnych:\t%d\n' % stats_path['cer_schemata']) | |
57 | - outfile.write(u'Liczba schematów wątpliwych:\t%d\n' % stats_path['uncer_schemata']) | |
58 | - outfile.write(u'Liczba schematów złych:\t%d\n' % stats_path['bad_schemata']) | |
59 | - outfile.write(u'Liczba schematów archaicznych:\t%d\n' % stats_path['arch_schemata']) | |
60 | - outfile.write(u'Liczba schematów potocznych:\t%d\n' % stats_path['col_schemata']) | |
61 | - outfile.write(u'Liczba schematów wulgarnych:\t%d\n\n' % stats_path['vul_schemata']) | |
29 | + verbs_stats_dict_all = Counter(get_stats(all_statuses, 'verb')) | |
30 | + write_stats('data/stats_%s_verbs_all.txt' % now, verbs_stats_dict_all) | |
31 | + verbs_stats_dict_verified = Counter(get_stats(verified_statuses, 'verb')) | |
32 | + write_stats('data/stats_%s_verbs_verified.txt' % now, verbs_stats_dict_verified) | |
62 | 33 | |
63 | - outfile.write(u'Liczba schematów z koordynacją:\t%d\n' % stats_path['coor_schemata']) | |
64 | - outfile.write(u'Liczba schematów zleksykalizowanych:\t%d\n\n' % stats_path['lex_schemata']) | |
34 | + advs_stats_dict_all = Counter(get_stats(all_statuses, 'adv')) | |
35 | + write_stats('data/stats_%s_advs_all.txt' % now, advs_stats_dict_all) | |
36 | + advs_stats_dict_verified = Counter(get_stats(verified_statuses, 'adv')) | |
37 | + write_stats('data/stats_%s_advs_verified.txt' % now, advs_stats_dict_verified) | |
65 | 38 | |
66 | - outfile.write(u'Hasła zawierające pozycje z koordynacją:\t%d\n' % stats_path['coor_lemmas']) | |
67 | - outfile.write(u'Hasła zawierające schematy zleksykalizowane:\t%d\n\n' % stats_path['lex_lemmas']) | |
39 | + all_stats_dict_all = nouns_stats_dict_all + adjs_stats_dict_all + verbs_stats_dict_all + advs_stats_dict_all | |
40 | + all_stats_dict_verified = nouns_stats_dict_verified + adjs_stats_dict_verified + verbs_stats_dict_verified + advs_stats_dict_verified | |
68 | 41 | |
69 | - except: | |
70 | - outfile.close() | |
71 | - | |
72 | -def get_stats(statuses, pos): | |
73 | - stats_dict = Counter({u'phrases': 0, | |
74 | - u'poss': 0, | |
75 | - u'lemmas': 0, | |
76 | - u'sub_lemmas': 0, | |
77 | - u'schemata': 0, | |
78 | - u'cer_schemata': 0, | |
79 | - u'uncer_schemata': 0, | |
80 | - u'bad_schemata': 0, | |
81 | - u'arch_schemata': 0, | |
82 | - u'col_schemata': 0, | |
83 | - u'vul_schemata': 0, | |
84 | - u'coor_schemata': 0, | |
85 | - u'lex_schemata': 0, | |
86 | - u'coor_lemmas': 0, | |
87 | - u'lex_lemmas': 0}) | |
88 | - | |
89 | - lemmas = Lemma.objects.filter(old=False, | |
90 | - entry_obj__pos__tag=pos).filter(status__in=statuses).distinct() | |
91 | - for lemma in lemmas.order_by('entry').all(): | |
92 | - print lemma | |
93 | - stats_dict[u'lemmas'] += 1 | |
94 | - stats_dict[u'cer_schemata'] += lemma.frame_opinions.filter(value__value=u'pewny').count() | |
95 | - stats_dict[u'uncer_schemata'] += lemma.frame_opinions.filter(value__value=u'wątpliwy').count() | |
96 | - stats_dict[u'bad_schemata'] += lemma.frame_opinions.filter(value__value=u'zły').count() | |
97 | - stats_dict[u'arch_schemata'] += lemma.frame_opinions.filter(value__value=u'archaiczny').count() | |
98 | - stats_dict[u'col_schemata'] += lemma.frame_opinions.filter(value__value=u'potoczny').count() | |
99 | - stats_dict[u'vul_schemata'] += lemma.frame_opinions.filter(value__value=u'wulgarny').count() | |
100 | - stats_dict[u'schemata'] += lemma.frames.count() | |
101 | - | |
102 | - stats_dict = stats_dict + Counter(get_sub_entries_dict(lemma)) | |
103 | - | |
104 | - has_phraseology = False | |
105 | - has_coordination = False | |
106 | - for frame in lemma.frames.all(): | |
107 | - stats_dict[u'poss'] += frame.positions.count() | |
108 | - flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))['num_args__max'] | |
109 | - if flat_frames > 1: | |
110 | - stats_dict[u'coor_schemata'] += 1 | |
111 | - has_coordination = True | |
112 | - for pos in frame.positions.all(): | |
113 | - stats_dict[u'phrases'] += pos.arguments.count() | |
114 | - if frame.positions.filter(arguments__type__in=LEX_TYPES).exists(): | |
115 | - stats_dict[u'lex_schemata'] += 1 | |
116 | - has_phraseology = True | |
117 | - | |
118 | - if has_phraseology: | |
119 | - stats_dict[u'lex_lemmas'] += 1 | |
120 | - if has_coordination: | |
121 | - stats_dict[u'coor_lemmas'] += 1 | |
122 | - | |
123 | - return stats_dict | |
124 | - | |
125 | -def get_sub_entries_dict(lemma): | |
126 | - sub_entries_dict = {} | |
127 | - frame_chars_dict = sorted_frame_char_values_dict() | |
128 | - for reflex in frame_chars_dict['sorted_reflex_vals']: | |
129 | - for neg in frame_chars_dict['sorted_neg_vals']: | |
130 | - for pred in frame_chars_dict['sorted_pred_vals']: | |
131 | - for aspect in frame_chars_dict['sorted_aspect_vals']: | |
132 | - matching_frames = lemma.get_frames_by_char_values(reflex_val=reflex, | |
133 | - neg_val=neg, | |
134 | - pred_val=pred, | |
135 | - aspect_val=aspect) | |
136 | - if matching_frames.exists(): | |
137 | - if not u'sub_lemmas' in sub_entries_dict: | |
138 | - sub_entries_dict[u'sub_lemmas'] = 0 | |
139 | - sub_entries_dict[u'sub_lemmas'] += 1 | |
140 | - | |
141 | - subentry_key = u'Liczba podhaseł postaci: (%s,%s,%s,%s)' % (reflex.value, neg.value, | |
142 | - pred.value, aspect.value) | |
143 | - if not subentry_key in sub_entries_dict: | |
144 | - sub_entries_dict[subentry_key] = 0 | |
145 | - sub_entries_dict[subentry_key] += 1 | |
146 | - return sub_entries_dict | |
42 | + write_stats('data/stats_%s_all.txt' % now, all_stats_dict_all) | |
43 | + write_stats('data/stats_%s_verified.txt' % now, all_stats_dict_verified) | |
... | ... |
dictionary/models.py
... | ... | @@ -46,8 +46,15 @@ class VocabularyFormat(Model): |
46 | 46 | format = CharField(max_length=64, primary_key=True, unique=True, db_column='format_slownika') |
47 | 47 | |
48 | 48 | def __unicode__(self): |
49 | - return '%s' % self.format | |
50 | - | |
49 | + return '%s' % self.format | |
50 | + | |
51 | +class WalentyStat(Model): | |
52 | + date = DateTimeField(auto_now_add=True, db_column='data_aktualizacji') | |
53 | + label = CharField(max_length=128, db_column='etykieta') | |
54 | + value = CharField(max_length=16, db_column='wartosc') | |
55 | + | |
56 | + def __unicode__(self): | |
57 | + return '%s:\t%s' % (self.label, self.value) | |
51 | 58 | |
52 | 59 | class Vocabulary(Model): |
53 | 60 | name = CharField(max_length=64, primary_key=True, unique=True, db_column='slownik') |
... | ... |
dictionary/static/js/manage-vocabularies.js
... | ... | @@ -14,6 +14,7 @@ $(function() { |
14 | 14 | $('form.voc-download-form').live('submit', download_form_submit); |
15 | 15 | $('#vocabulary-select').live('change', show_vocabulary_info); |
16 | 16 | $('form.vocab-perm-manage-form').live('submit', vocab_perm_manage_form_submit); |
17 | + $('#other-statistisc').load(ajax_get_other_stats); | |
17 | 18 | show_vocabulary_info(); |
18 | 19 | }); |
19 | 20 | |
... | ... |
dictionary/templates/manage_vocabularies.html
dictionary/views.py
... | ... | @@ -351,6 +351,7 @@ def manage_vocabulary(request): |
351 | 351 | 'ajax_vocab_perm_manage_form': reverse('vocab_perm_manage_form'), |
352 | 352 | 'ajax_vocab_perm_manage_form_submit': reverse('vocab_perm_manage_form_submit'), |
353 | 353 | 'ajax_get_vocabulary_stats': reverse('get_vocabulary_stats'), |
354 | + 'ajax_get_other_stats': reverse('get_other_stats') | |
354 | 355 | } |
355 | 356 | return to_return |
356 | 357 | |
... | ... |
urls.py
... | ... | @@ -210,7 +210,6 @@ urlpatterns += patterns('dictionary.views', |
210 | 210 | url(r'^wszystkie_statystyki/$', 'all_statistics'), |
211 | 211 | url(r'^zarzadzanie_kontami/$', 'manage_users'), |
212 | 212 | url(r'^rozwiniecia_typow_fraz/$', 'manage_arg_realizations'), |
213 | - #url(r'^statystyki_argumentow/$', 'arg_stats'), | |
214 | 213 | url(r'^pobieranie_slownika/$', 'download_walenty'), |
215 | 214 | ) |
216 | 215 | |
... | ... | @@ -228,7 +227,8 @@ urlpatterns += patterns('dictionary.ajax_vocabulary_management', |
228 | 227 | url(r'^ajax/create_vocabulary/$', 'create_vocabulary'), |
229 | 228 | url(r'^ajax/vocab_perm_manage_form/$', 'vocab_perm_manage_form'), |
230 | 229 | url(r'^ajax/vocab_perm_manage_form_submit/$', 'vocab_perm_manage_form_submit'), |
231 | - url(r'^ajax/get_vocabulary_stats/$', 'get_vocabulary_stats'), | |
230 | + url(r'^ajax/get_vocabulary_stats/$', 'get_vocabulary_stats'), | |
231 | + url(r'^ajax/get_other_stats/$', 'get_other_stats'), | |
232 | 232 | ) |
233 | 233 | |
234 | 234 | urlpatterns += SEMANTIC_PATTERNS |
... | ... |