Commit 3ad52b13a56f9b536ff84948d9ab21fe31e3b5a9
1 parent
536ab813
Added instalation files and informations to repository.
Showing
4 changed files
with
244 additions
and
1021 deletions
INSTALL
1 | -Slowal installation guide: | ||
2 | -To run Slowal you will need at least: | ||
3 | - - python (tested on 2.7 version); | ||
4 | - - django with django-registration (tested on version 0.7) and django-extensions packages; | ||
5 | - - database system (tested on PostgreSQL 9.1); | ||
6 | - - morphological analyser Morfeusz (http://sgjp.pl/morfeusz/). | ||
7 | - | ||
8 | - Slowal can be installed on production server as any other Django application. Comprehensive tutorial for setting Django on production server can be found at: http://bailey.st/blog/2012/05/02/ubuntu-django-postgresql-and-nginx-a-rock-solid-web-stack/. Tutorial can also be helpful for setting database for Slowal project. | ||
9 | - | ||
10 | -Installation: | ||
11 | - 1) Change database_data.py file to get connection to yours database (see: http://bailey.st/blog/2012/05/02/ubuntu-django-postgresql-and-nginx-a-rock-solid-web-stack/ for hints). | ||
12 | -When you are creating database it is important to make database coding 'utf8' and locale Polish. | ||
13 | -In PostgreSQL such database would be created by command: | ||
14 | ->>> createdb databaseName -E UTF8 -T template0 -l pl_PL.utf8 | ||
15 | - 2) If you want to run Slowal in the domain subfolder change SITE_PREFIX = '' value to other, for example SITE_PREFIX = '/Slowal' | ||
16 | - 3) Create database table running: | ||
17 | ->>> python manage.py syncdb | ||
18 | -command in the main folder of project. Remember to create superuser it will help you in managing database and give access to all Slowal functionalities. | ||
19 | - 4) Fill database with initial values running: | ||
20 | ->>> python manage.py import_models | ||
21 | -command in the main folder of project. | ||
22 | - 5) Create default user groups running: | ||
23 | ->>> python manage.py create_groups.py | ||
24 | -command in the main folder of project. | ||
25 | - 6) Slowal is ready to run. | ||
26 | - | ||
27 | -Slowal was tested on Safari, Opera, Firefox and Chrome web browsers. Working on Internet Explorer compatibility is still in progress | 1 | +Zainstaluj pipa: |
2 | +>> apt-get update | ||
3 | +>> apt-get -y install python-pip | ||
4 | + | ||
5 | +Zainstaluj Django w wersji 1.4.8: | ||
6 | +>> pip install Django==1.4.8 | ||
7 | + | ||
8 | +Zainstaluj Django south: | ||
9 | +>> apt-get install python-django-south | ||
10 | + | ||
11 | +Zainstaluj Django extensions: | ||
12 | +>> apt-get install python-django-extensions | ||
13 | + | ||
14 | +Zainstaluj Django registration: | ||
15 | +>> apt-get install python-django-registration | ||
16 | + | ||
17 | +Zainstaluj pythonowy moduł lxml: | ||
18 | +>> apt-get install python-lxml | ||
19 | + | ||
20 | +Zainstaluj Postgresa: | ||
21 | +>> sudo apt-get update | ||
22 | +>> sudo apt-get install postgresql postgresql-contrib | ||
23 | +Zmień użytkownika na postgres: | ||
24 | +>> sudo -i -u postgres | ||
25 | +A następnie dodaj poszczególne role do postgresa komendą: | ||
26 | +>> createuser --interactive | ||
27 | +Stwórz pustą bazę danych dla Slowala: | ||
28 | +>> createdb slowal -E UTF8 -T template0 -l pl_PL.utf8 | ||
29 | +Jeśli locale pl_PL.utf8 nie istnieje dodatkowo należy uruchomić komendy: | ||
30 | +>> sudo locale-gen pl_PL.utf8 | ||
31 | +>> service postgresql restart | ||
32 | +Załaduj dump bazy danych poleceniem (zrzut bazy umieszczony jest w archiwum INSTALL_PACK.zip): | ||
33 | +>> psql slowal < obraz_bazy.db | ||
34 | + | ||
35 | +Zainstaluj gita: | ||
36 | +>> apt-get install git | ||
37 | + | ||
38 | +Sklonuj repozytorium gitowe z GitLaba: | ||
39 | +>> git clone http://git.nlp.ipipan.waw.pl/walenty/Slowal.git | ||
40 | + | ||
41 | +Stwórz folder "Walenty" w folderze "data": | ||
42 | +>> cd data | ||
43 | +>> mkdir Walenty | ||
44 | + | ||
45 | +Dodaj w głównym folderze projektu plik konfiguracyjny settings.py (plik umieszczony jest w archiwum INSTALL_PACK.zip): | ||
46 | + Zmień w nim zmienną STATIC_ROOT, tak by wskazywała na położenie plików statycznych strony, np.: | ||
47 | + STATIC_ROOT = "/home/zil/static/Slowal" | ||
48 | + | ||
49 | +Dodaj w głównym folderze projektu plik konfiguracyjny database_data.py oraz zdefiniuj w nim połączenie z bazą danych, np.: | ||
50 | + DATABASES = { | ||
51 | + 'default': { | ||
52 | + 'ENGINE': 'django.db.backends.postgresql_psycopg2', | ||
53 | + 'NAME': 'slowal', | ||
54 | + 'USER': 'zil', | ||
55 | + 'PASSWORD': '', | ||
56 | + 'HOST': '', | ||
57 | + 'PORT': '5432', | ||
58 | + } | ||
59 | + } | ||
60 | + | ||
61 | +Zainstaluj moduł psycopg2: | ||
62 | +>> sudo apt-get install python-psycopg2 | ||
63 | + | ||
64 | +Zgraj pliki statyczne do dedykowanego katalogu poleceniem: | ||
65 | +>> python manage.py collectstatic | ||
66 | + | ||
67 | +Zainstaluj Apacha: | ||
68 | +>> apt-get install apache2 | ||
69 | + | ||
70 | +Zainstaluj mod-wsgi: | ||
71 | +>> apt-get install libapache2-mod-wsgi | ||
72 | + | ||
73 | +Utwórz plik slowal.wsgi odpowiednio definiując w nim ścieżki do plików statycznych. Przykładowa treść pliku poniżej: | ||
74 | +-------------------------------------------- | ||
75 | +import os, sys | ||
76 | + | ||
77 | +sys.path.append('/home/zil/static') | ||
78 | +sys.path.append('/home/zil/static/Slowal') | ||
79 | +os.environ['DJANGO_SETTINGS_MODULE'] = 'Slowal.settings' | ||
80 | + | ||
81 | +import django.core.handlers.wsgi | ||
82 | + | ||
83 | +application = django.core.handlers.wsgi.WSGIHandler() | ||
84 | +-------------------------------------------- | ||
85 | + | ||
86 | +Skonfiguruj apacha dodając plik konfiguracyjny (np. o nazwie slowal.conf) do folderu sites-available apacha (domyślnie /etc/apache2/sites-enabled/), ścieżka WSGIScriptAlias musi wskazywać na plik slowal.wsgi. Przykładowy plik konfiguracyjny poniżej: | ||
87 | +-------------------------------------------- | ||
88 | +<VirtualHost *:80> | ||
89 | + ServerAdmin bartek.niton@gmail.com | ||
90 | + ServerName slowal.nlp.ipipan.waw.pl | ||
91 | + | ||
92 | + ServerAlias walenty.ipipan.waw.pl | ||
93 | + | ||
94 | + DocumentRoot /home/zil/Slowal/templates/ | ||
95 | + <Directory /> | ||
96 | + Options FollowSymLinks | ||
97 | + AllowOverride None | ||
98 | + Require all granted | ||
99 | + </Directory> | ||
100 | + WSGIScriptAlias / /home/zil/scripts/slowal.wsgi | ||
101 | + WSGIDaemonProcess Slowal user=zil group=zil processes=2 threads=15 | ||
102 | + WSGIProcessGroup Slowal | ||
103 | + Alias /static/ /home/zil/static/Slowal/ | ||
104 | + <Directory "/home/zil/static/Slowal"> | ||
105 | + Require all granted | ||
106 | + </Directory> | ||
107 | + | ||
108 | + ErrorLog /home/zil/logs/Slowal/error.log | ||
109 | + CustomLog /home/zil/logs/Slowal/access.log combined | ||
110 | +</VirtualHost> | ||
111 | +-------------------------------------------- | ||
112 | + | ||
113 | +Uruchom stronę poleceniem: | ||
114 | +>> a2ensite slowal.conf | ||
115 | + | ||
116 | +Zrestartuj apacha: | ||
117 | +>> sudo service apache2 restart | ||
118 | + | ||
119 | +Zainstaluj Morfeusza2 zgodnie z instrukcjami na stronie http://sgjp.pl/morfeusz/dopobrania.html. | ||
120 | + | ||
121 | +Ustaw w crontabie cykliczne uruchamianie komend create_walenty i count_positions_occurrences: | ||
122 | +1 0 * * 5 python /home/zil/Slowal/manage.py create_walenty | ||
123 | +0 1 * * * python /home/zil/Slowal/manage.py count_positions_occurrences |
dictionary/management/commands/create_walenty.py
@@ -29,8 +29,8 @@ from django.core.management.base import BaseCommand | @@ -29,8 +29,8 @@ from django.core.management.base import BaseCommand | ||
29 | from accounts.models import User | 29 | from accounts.models import User |
30 | from dictionary.ajax_vocabulary_management import create_text_walenty | 30 | from dictionary.ajax_vocabulary_management import create_text_walenty |
31 | from dictionary.ajax_argument_realizations import create_realizations_file | 31 | from dictionary.ajax_argument_realizations import create_realizations_file |
32 | -from dictionary.models import Frame_Opinion, Lemma, Lemma_Status, \ | ||
33 | - LemmaStatusType, Vocabulary, POS | 32 | +from dictionary.models import Frame_Opinion, Lemma, Vocabulary, POS, \ |
33 | + get_checked_statuses, get_ready_statuses | ||
34 | from settings import WALENTY_PATH | 34 | from settings import WALENTY_PATH |
35 | 35 | ||
36 | class Command(BaseCommand): | 36 | class Command(BaseCommand): |
@@ -87,12 +87,4 @@ def create_pos_archive(archive, pos, filename_base): | @@ -87,12 +87,4 @@ def create_pos_archive(archive, pos, filename_base): | ||
87 | finally: | 87 | finally: |
88 | os.remove(walenty_path_checked) | 88 | os.remove(walenty_path_checked) |
89 | os.remove(walenty_path_ready) | 89 | os.remove(walenty_path_ready) |
90 | - | ||
91 | -def get_checked_statuses(): | ||
92 | - checked_type = LemmaStatusType.objects.get(sym_name='checked') | ||
93 | - return Lemma_Status.objects.filter(type__priority__gte=checked_type.priority).distinct() | ||
94 | - | ||
95 | -def get_ready_statuses(): | ||
96 | - ready_type = LemmaStatusType.objects.get(sym_name='ready') | ||
97 | - return Lemma_Status.objects.filter(type__priority__gte=ready_type.priority).distinct() | ||
98 | 90 | ||
99 | \ No newline at end of file | 91 | \ No newline at end of file |
dictionary/management/commands/get_stats.py
@@ -2,1018 +2,145 @@ | @@ -2,1018 +2,145 @@ | ||
2 | # author: B.Niton | 2 | # author: B.Niton |
3 | 3 | ||
4 | import codecs | 4 | import codecs |
5 | -import operator | 5 | +import datetime |
6 | from collections import Counter | 6 | from collections import Counter |
7 | 7 | ||
8 | from django.core.management.base import BaseCommand | 8 | from django.core.management.base import BaseCommand |
9 | +from django.db.models import Count, Max | ||
9 | 10 | ||
10 | -from dictionary.models import * | 11 | +from dictionary.models import Lemma, get_checked_statuses, get_ready_statuses, \ |
12 | + sorted_frame_char_values_dict | ||
11 | 13 | ||
12 | -#PHRASEOLOGIC_TYPES = ['comprepnp', 'preplexnp', 'lexnp', 'lex', | ||
13 | -# 'fixed'] | 14 | +LEX_TYPES = ['lex', 'fixed', 'comprepnp'] |
14 | 15 | ||
15 | class Command(BaseCommand): | 16 | class Command(BaseCommand): |
16 | - help = 'Get slowal statistics.' | 17 | + help = 'Get Walenty statistics.' |
17 | 18 | ||
18 | def handle(self, **options): | 19 | def handle(self, **options): |
19 | - all_statuses = [Q(status__status=u'gotowe'), | ||
20 | - Q(status__status=u'zalążkowe'), | ||
21 | - Q(status__status=u'sprawdzone'), | ||
22 | - Q(status__status=u'(F) w obróbce'), | ||
23 | - Q(status__status=u'(F) gotowe'), | ||
24 | - Q(status__status=u'(F) sprawdzone'), | ||
25 | - Q(status__status=u'(S) w obróbce'), | ||
26 | - Q(status__status=u'(S) gotowe'), | ||
27 | - Q(status__status=u'(S) sprawdzone')] | ||
28 | - verified_statuses = [Q(status__status=u'zalążkowe'), | ||
29 | - Q(status__status=u'sprawdzone'), | ||
30 | - Q(status__status=u'(F) w obróbce'), | ||
31 | - Q(status__status=u'(F) gotowe'), | ||
32 | - Q(status__status=u'(F) sprawdzone'), | ||
33 | - Q(status__status=u'(S) w obróbce'), | ||
34 | - Q(status__status=u'(S) gotowe'), | ||
35 | - Q(status__status=u'(S) sprawdzone')] | 20 | + now = datetime.datetime.now().strftime('%Y%m%d') |
21 | + all_statuses = get_ready_statuses() | ||
22 | + verified_statuses = get_checked_statuses() | ||
36 | 23 | ||
37 | - nouns_stats_dict_all = Counter(get_nouns_stats('data/statystyki_2015_06_30_nouns_all.txt', all_statuses)) | ||
38 | - nouns_stats_dict_verified = Counter(get_nouns_stats('data/statystyki_2015_06_30_nouns_verified.txt', verified_statuses)) | ||
39 | - | ||
40 | - adjs_stats_dict_all = Counter(get_adjs_stats('data/statystyki_2015_06_30_adjs_all.txt', all_statuses)) | ||
41 | - adjs_stats_dict_verified = Counter(get_adjs_stats('data/statystyki_2015_06_30_adjs_verified.txt', verified_statuses)) | 24 | +# nouns_stats_dict_all = Counter(get_stats('data/statystyki_2015_06_30_nouns_all.txt', all_statuses, 'noun')) |
25 | +# nouns_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_nouns_verified.txt', verified_statuses, 'noun')) | ||
26 | + | ||
27 | +# adjs_stats_dict_all = Counter(get_stats('data/statystyki_2015_06_30_adjs_all.txt', all_statuses, 'adj')) | ||
28 | +# adjs_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_adjs_verified.txt', verified_statuses, 'adj')) | ||
29 | +# | ||
30 | +# verbs_stats_dict_all = Counter(get_stats('data/statystyki_2015_06_30_verbs_all.txt', all_statuses, 'verb')) | ||
31 | +# verbs_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_verbs_verified.txt', verified_statuses, 'verb')) | ||
32 | +# | ||
33 | + advs_stats_dict_all = Counter(get_stats(all_statuses, 'adv')) | ||
34 | + write_stats('data/stats_%s_advs_all.txt' % now, advs_stats_dict_all) | ||
35 | +# advs_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_advs_verified.txt', verified_statuses, 'adv')) | ||
36 | +# | ||
37 | +# all_stats_dict_all = nouns_stats_dict_all + adjs_stats_dict_all + verbs_stats_dict_all + advs_stats_dict_all | ||
38 | +# all_stats_dict_verified = nouns_stats_dict_verified + adjs_stats_dict_verified + verbs_stats_dict_verified + advs_stats_dict_verified | ||
39 | +# | ||
40 | +# write_all_stats('data/statystyki_2015_06_30_all.txt', all_stats_dict_all) | ||
41 | +# write_all_stats('data/statystyki_2015_06_30_verified.txt', all_stats_dict_verified) | ||
42 | 42 | ||
43 | - verbs_stats_dict_all = Counter(get_verb_stats('data/statystyki_2015_06_30_verbs_all.txt', all_statuses)) | ||
44 | - verbs_stats_dict_verified = Counter(get_verb_stats('data/statystyki_2015_06_30_verbs_verified.txt', verified_statuses)) | 43 | +def write_stats(stats_path, stats_dict): |
44 | + try: | ||
45 | + outfile = codecs.open(stats_path, 'wt', 'utf-8') | ||
45 | 46 | ||
46 | - advs_stats_dict_all = Counter(get_advs_stats('data/statystyki_2015_06_30_advs_all.txt', all_statuses)) | ||
47 | - advs_stats_dict_verified = Counter(get_advs_stats('data/statystyki_2015_06_30_advs_verified.txt', verified_statuses)) | 47 | + outfile.write(u'Liczba typów fraz:\t%d\n' % stats_path['phrases']) |
48 | + outfile.write(u'Liczba pozycji:\t%d\n' % stats_path['poss']) | ||
49 | + outfile.write(u'Liczba haseł:\t%d\n\n' % stats_path['lemmas']) | ||
48 | 50 | ||
49 | - all_stats_dict_all = nouns_stats_dict_all + adjs_stats_dict_all + verbs_stats_dict_all + advs_stats_dict_all | ||
50 | - all_stats_dict_verified = nouns_stats_dict_verified + adjs_stats_dict_verified + verbs_stats_dict_verified + advs_stats_dict_verified | 51 | + outfile.write(u'Łączna liczba podhaseł:\t%d\n' % stats_path['sub_lemmas']) |
52 | + outfile.write(u'Liczba podhaseł postaci (ZWROTNOŚĆ, NEGATYWNOŚĆ, PREDYKATYWNOŚĆ, ASPEKT)\n') | ||
53 | +#### dokonczyc | ||
54 | + | ||
55 | + outfile.write(u'Łączna liczba schematów:\t%d\n' % stats_path['schemata']) | ||
56 | + outfile.write(u'Liczba schematów pewnych:\t%d\n' % stats_path['cer_schemata']) | ||
57 | + outfile.write(u'Liczba schematów wątpliwych:\t%d\n' % stats_path['uncer_schemata']) | ||
58 | + outfile.write(u'Liczba schematów złych:\t%d\n' % stats_path['bad_schemata']) | ||
59 | + outfile.write(u'Liczba schematów archaicznych:\t%d\n' % stats_path['arch_schemata']) | ||
60 | + outfile.write(u'Liczba schematów potocznych:\t%d\n' % stats_path['col_schemata']) | ||
61 | + outfile.write(u'Liczba schematów wulgarnych:\t%d\n\n' % stats_path['vul_schemata']) | ||
51 | 62 | ||
52 | - write_all_stats('data/statystyki_2015_06_30_all.txt', all_stats_dict_all) | ||
53 | - write_all_stats('data/statystyki_2015_06_30_verified.txt', all_stats_dict_verified) | 63 | + outfile.write(u'Liczba schematów z koordynacją:\t%d\n' % stats_path['coor_schemata']) |
64 | + outfile.write(u'Liczba schematów zleksykalizowanych:\t%d\n\n' % stats_path['lex_schemata']) | ||
54 | 65 | ||
55 | -def write_all_stats(stats_path, stats_dict): | ||
56 | - outfile = codecs.open(stats_path, 'wt', 'utf-8') | ||
57 | - | ||
58 | - outfile.write(u'Liczba argumentów: ' + str(stats_dict['arg_count']) + '\n') | ||
59 | - outfile.write(u'Liczba pozycji: ' + str(stats_dict['pos_count']) + '\n') | ||
60 | - outfile.write(u'Liczba haseł: ' + str(stats_dict['lemma_count']) + '\n') | ||
61 | - outfile.write('\n') | ||
62 | - outfile.write(u'Liczba podhaseł: ' + str(stats_dict['sub_lemma_count']) + '\n') | ||
63 | - | ||
64 | - outfile.write(u'Liczba podhaseł postaci (ZWROTNOŚĆ, NEGATYWNOŚĆ, PREDYKATYWNOŚĆ, ASPEKT)\n') | ||
65 | - | ||
66 | - outfile.write(u'Liczba podhaseł postaci (się, _, , imperf): ' + str(stats_dict['sub_sie_imperf_oboj']) + '\n') | ||
67 | - outfile.write(u'Liczba podhaseł postaci (się, _, , perf): ' + str(stats_dict['sub_sie_perf_oboj']) + '\n') | ||
68 | - outfile.write(u'Liczba podhaseł postaci (się, _, , _): ' + str(stats_dict['sub_sie_oboj_oboj']) + '\n') | ||
69 | - outfile.write(u'Liczba podhaseł postaci ( , _, , imperf): ' + str(stats_dict['sub_imperf_oboj']) + '\n') | ||
70 | - outfile.write(u'Liczba podhaseł postaci ( , _, , perf): ' + str(stats_dict['sub_perf_oboj']) + '\n') | ||
71 | - outfile.write(u'Liczba podhaseł postaci ( , _, , _): ' + str(stats_dict['sub_oboj_oboj']) + '\n') | ||
72 | - | ||
73 | - outfile.write(u'Liczba podhaseł postaci (się, neg, , imperf): ' + str(stats_dict['sub_sie_imperf_neg']) + '\n') | ||
74 | - outfile.write(u'Liczba podhaseł postaci (się, neg, , perf): ' + str(stats_dict['sub_sie_perf_neg']) + '\n') | ||
75 | - outfile.write(u'Liczba podhaseł postaci (się, neg, , _): ' + str(stats_dict['sub_sie_oboj_neg']) + '\n') | ||
76 | - outfile.write(u'Liczba podhaseł postaci ( , neg, , imperf): ' + str(stats_dict['sub_imperf_neg']) + '\n') | ||
77 | - outfile.write(u'Liczba podhaseł postaci ( , neg, , perf): ' + str(stats_dict['sub_perf_neg']) + '\n') | ||
78 | - outfile.write(u'Liczba podhaseł postaci ( , neg, , _): ' + str(stats_dict['sub_oboj_neg']) + '\n') | ||
79 | - | ||
80 | - outfile.write(u'Liczba podhaseł postaci (się, aff, , imperf): ' + str(stats_dict['sub_sie_imperf_aff']) + '\n') | ||
81 | - outfile.write(u'Liczba podhaseł postaci (się, aff, , perf): ' + str(stats_dict['sub_sie_perf_aff']) + '\n') | ||
82 | - outfile.write(u'Liczba podhaseł postaci (się, aff, , _): ' + str(stats_dict['sub_sie_oboj_aff']) + '\n') | ||
83 | - outfile.write(u'Liczba podhaseł postaci ( , aff, , imperf): ' + str(stats_dict['sub_imperf_aff']) + '\n') | ||
84 | - outfile.write(u'Liczba podhaseł postaci ( , aff, , perf): ' + str(stats_dict['sub_perf_aff']) + '\n') | ||
85 | - outfile.write(u'Liczba podhaseł postaci ( , aff, , _): ' + str(stats_dict['sub_oboj_aff']) + '\n') | ||
86 | - | ||
87 | - outfile.write(u'Liczba podhaseł postaci ( , , pred, ): ' + str(stats_dict['pred_lemma']) + '\n') | ||
88 | - outfile.write(u'Liczba podhaseł postaci ( , , , ): ' + str(stats_dict['npred_lemma']) + '\n') | ||
89 | - | ||
90 | - outfile.write('\n') | ||
91 | - outfile.write(u'Liczba ramek: ' + str(stats_dict['all_frames']) + '\n') | ||
92 | - outfile.write(u'Liczba ramek pewnych: ' + str(stats_dict['pewna_frames']) + '\n') | ||
93 | - outfile.write(u'Liczba ramek wątpliwych: ' + str(stats_dict['watpliwa_frames']) + '\n') | ||
94 | - outfile.write(u'Liczba ramek złych: ' + str(stats_dict['zla_frames']) + '\n') | ||
95 | - outfile.write(u'Liczba ramek archaicznych: ' + str(stats_dict['arch_frames']) + '\n') | ||
96 | - outfile.write(u'Liczba ramek potocznych: ' + str(stats_dict['potoczna_frames']) + '\n') | ||
97 | - outfile.write(u'Liczba ramek wulgarnych: ' + str(stats_dict['wulgarna_frames']) + '\n') | ||
98 | - outfile.write('\n') | ||
99 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą argumentów: ' + str(stats_dict['spec_pos']) + '\n') | ||
100 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie argumentów: ' + str(stats_dict['spec_args']) + '\n') | ||
101 | - outfile.write(u'Liczba ramek z pozycjami wieloargumentowymi: ' + str(stats_dict['spec_frames']) + '\n') | ||
102 | - outfile.write('\n') | ||
103 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą specyfikacji: ' + str(stats_dict['spec_pos_up']) + '\n') | ||
104 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie specyfikacji: ' + str(stats_dict['spec_args_up']) + '\n') | ||
105 | - outfile.write(u'Liczba ramek z pozycjami o większej niż jeden liczbie specyfikacji: ' + str(stats_dict['spec_frames_up']) + '\n') | ||
106 | - outfile.write(u'Liczba ramek frazeologicznych: ' + str(stats_dict['frames_with_phraseology']) + '\n') | ||
107 | - outfile.write('\n') | ||
108 | - outfile.write(u'Hasła zawierające schematy frazeologiczne: ' + str(stats_dict['lemmas_with_phraseology']) + '\n') | ||
109 | - outfile.write(u'Hasła zawierające pozycje z koordynacją: ' + str(stats_dict['lemmas_with_coordination']) + '\n') | ||
110 | - | ||
111 | - outfile.close() | ||
112 | - | ||
113 | -def get_adjs_stats(stats_path, q_statuses): | ||
114 | - print 'Be patient, it can take a while.' | ||
115 | - outfile = codecs.open(stats_path, 'wt', 'utf-8') | ||
116 | - arg_count = 0 | ||
117 | - pos_count = 0 | ||
118 | - lemma_count = 0 | ||
119 | -######################## | ||
120 | - sub_lemma_count = 0 | ||
121 | - | ||
122 | - pred_lemma = 0 | ||
123 | - npred_lemma = 0 | ||
124 | -################## | ||
125 | - pewna_frames = 0 | ||
126 | - watpliwa_frames = 0 | ||
127 | - zla_frames = 0 | ||
128 | - arch_frames = 0 | ||
129 | - potoczna_frames = 0 | ||
130 | - wulgarna_frames = 0 | ||
131 | - all_frames = 0 | ||
132 | - | ||
133 | - spec_frames = 0 | ||
134 | - spec_pos = 0 | ||
135 | - spec_args = 0 | ||
136 | - | ||
137 | - spec_frames_up = 0 | ||
138 | - spec_pos_up = 0 | ||
139 | - spec_args_up = 0 | ||
140 | - | ||
141 | - lemmas_with_phraseology = 0 | ||
142 | - lemmas_with_coordination = 0 | ||
143 | - | ||
144 | - frames_with_phraseology = 0 | ||
145 | - | ||
146 | - lemmas = Lemma.objects.filter(old=False, | ||
147 | - entry_obj__pos__tag='adj').filter(reduce(operator.or_, q_statuses)).distinct().order_by('entry').all() | ||
148 | - for lemma in lemmas: | ||
149 | - print lemma | ||
150 | - lemma_count += 1 | 66 | + outfile.write(u'Hasła zawierające pozycje z koordynacją:\t%d\n' % stats_path['coor_lemmas']) |
67 | + outfile.write(u'Hasła zawierające schematy zleksykalizowane:\t%d\n\n' % stats_path['lex_lemmas']) | ||
151 | 68 | ||
152 | - pewna_frames += lemma.frame_opinions.filter(value__value=u'pewny').count() | ||
153 | - watpliwa_frames += lemma.frame_opinions.filter(value__value=u'wątpliwy').count() | ||
154 | - zla_frames += lemma.frame_opinions.filter(value__value=u'zły').count() | ||
155 | - arch_frames += lemma.frame_opinions.filter(value__value=u'archaiczny').count() | ||
156 | - potoczna_frames += lemma.frame_opinions.filter(value__value=u'potoczny').count() | ||
157 | - wulgarna_frames += lemma.frame_opinions.filter(value__value=u'wulgarny').count() | ||
158 | - all_frames += lemma.frames.count() | ||
159 | -# if (lemma.frames.count() != lemma.frame_opinions.filter(value__value=u'pewna').count() + | ||
160 | -# lemma.frame_opinions.filter(value__value=u'wątpliwa').count() + | ||
161 | -# lemma.frame_opinions.filter(value__value=u'zła').count() + | ||
162 | -# lemma.frame_opinions.filter(value__value=u'archaiczna').count() + | ||
163 | -# lemma.frame_opinions.filter(value__value=u'potoczna').count() + | ||
164 | -# lemma.frame_opinions.filter(value__value=u'wulgarna').count()): | ||
165 | -# print lemma | ||
166 | -# print lemma.frame_opinions.filter(value__value=u'pewna').count() | ||
167 | -# print lemma.frame_opinions.filter(value__value=u'wątpliwa').count() | ||
168 | -# print lemma.frame_opinions.filter(value__value=u'zła').count() | ||
169 | -# print lemma.frame_opinions.filter(value__value=u'archaiczna').count() | ||
170 | -# print lemma.frame_opinions.filter(value__value=u'potoczna').count() | ||
171 | -# print lemma.frame_opinions.filter(value__value=u'wulgarna').count() | ||
172 | -## break | ||
173 | - | ||
174 | -################################################################################ | ||
175 | - | ||
176 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'pred', | ||
177 | - characteristics__type=u'PREDYKATYWNOŚĆ') | ||
178 | - if sub_frame.count() > 0: | ||
179 | - pred_lemma += 1 | ||
180 | - sub_lemma_count += 1 | ||
181 | - | ||
182 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | ||
183 | - characteristics__type=u'PREDYKATYWNOŚĆ') | ||
184 | - if sub_frame.count() > 0: | ||
185 | - npred_lemma += 1 | ||
186 | - sub_lemma_count += 1 | ||
187 | - | ||
188 | -################################################################################# | ||
189 | - | ||
190 | - if lemma.frames.count() != lemma.frame_opinions.count(): | ||
191 | - print lemma.entry | ||
192 | - | ||
193 | - has_phraseology = False | ||
194 | - has_coordination = False | ||
195 | - for frame in lemma.frames.all(): | ||
196 | - phraseologic_frame = False | ||
197 | - | ||
198 | - pos_count += frame.positions.count() | ||
199 | - flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))['num_args__max'] | ||
200 | - | ||
201 | - if flat_frames > 1: | ||
202 | - spec_frames += 1 | ||
203 | - has_coordination = True | ||
204 | - if frame.has_phraseologic_arguments(): | ||
205 | - has_phraseology = True | ||
206 | - phraseologic_frame = True | ||
207 | - | ||
208 | - for pos in frame.positions.all(): | ||
209 | - args = pos.arguments.count() | ||
210 | - arg_count += args | ||
211 | -# for arg in pos.arguments.all(): | ||
212 | -# if arg.type in PHRASEOLOGIC_TYPES: | ||
213 | -# has_phraseology = True | ||
214 | -# phraseologic_frame = True | ||
215 | -# break | ||
216 | - if phraseologic_frame: | ||
217 | - frames_with_phraseology += 1 | ||
218 | - | ||
219 | - if has_phraseology: | ||
220 | - lemmas_with_phraseology += 1 | ||
221 | - if has_coordination: | ||
222 | - lemmas_with_coordination += 1 | ||
223 | -# if args > 1: | ||
224 | -# spec_pos += 1 | ||
225 | -# spec_args += args | ||
226 | -# | ||
227 | -# prep_args = pos.arguments.filter(Q(type=u'prepnp') | | ||
228 | -# Q(type=u'prepncp')) | ||
229 | -# np_args = pos.arguments.filter(Q(type=u'np') | | ||
230 | -# Q(type=u'ncp')) | ||
231 | -# similar_args = True | ||
232 | -# if prep_args.count() == args: | ||
233 | -# first_arg_case = prep_args.all()[0].atributes.get(type=u'PRZYPADEK').atribute_value.value | ||
234 | -# first_arg_prep = prep_args.all()[0].atributes.get(type=u'PRZYIMEK').atribute_value.value | ||
235 | -# for arg in prep_args: | ||
236 | -# if (first_arg_case != arg.atributes.get(type=u'PRZYPADEK').atribute_value.value or | ||
237 | -# first_arg_prep != arg.atributes.get(type=u'PRZYIMEK').atribute_value.value): | ||
238 | -# similar_args = False | ||
239 | -# break | ||
240 | -# elif np_args.count() == args: | ||
241 | -# first_arg_case = np_args.all()[0].atributes.get(type=u'PRZYPADEK').atribute_value.value | ||
242 | -# for arg in np_args: | ||
243 | -# if (first_arg_case != arg.atributes.get(type=u'PRZYPADEK').atribute_value.value): | ||
244 | -# similar_args = False | ||
245 | -# break | ||
246 | -# else: | ||
247 | -# similar_args = False | ||
248 | -# if not similar_args and args > 1: | ||
249 | -# spec_pos_up += 1 | ||
250 | -# spec_args_up += args | ||
251 | -# add_spec_frame_up = True | ||
252 | -# if add_spec_frame_up: | ||
253 | -# spec_frames_up += 1 | ||
254 | - | ||
255 | - outfile.write(u'Liczba argumentów: ' + str(arg_count) + '\n') | ||
256 | - outfile.write(u'Liczba pozycji: ' + str(pos_count) + '\n') | ||
257 | - outfile.write(u'Liczba haseł: ' + str(lemma_count) + '\n') | ||
258 | - outfile.write('\n') | ||
259 | - outfile.write(u'Liczba podhaseł: ' + str(sub_lemma_count) + '\n') | ||
260 | - outfile.write(u'Liczba podhaseł postaci (ZWROTNOŚĆ, NEGATYWNOŚĆ, PREDYKATYWNOŚĆ, ASPEKT)\n') | ||
261 | - outfile.write(u'Liczba podhaseł postaci ( , , pred, ): ' + str(pred_lemma) + '\n') | ||
262 | - outfile.write(u'Liczba podhaseł postaci ( , , , ): ' + str(npred_lemma) + '\n') | ||
263 | - outfile.write('\n') | ||
264 | - outfile.write(u'Liczba ramek: ' + str(all_frames) + '\n') | ||
265 | - outfile.write(u'Liczba ramek pewnych: ' + str(pewna_frames) + '\n') | ||
266 | - outfile.write(u'Liczba ramek wątpliwych: ' + str(watpliwa_frames) + '\n') | ||
267 | - outfile.write(u'Liczba ramek złych: ' + str(zla_frames) + '\n') | ||
268 | - outfile.write(u'Liczba ramek archaicznych: ' + str(arch_frames) + '\n') | ||
269 | - outfile.write(u'Liczba ramek potocznych: ' + str(potoczna_frames) + '\n') | ||
270 | - outfile.write(u'Liczba ramek wulgarnych: ' + str(wulgarna_frames) + '\n') | ||
271 | - outfile.write('\n') | ||
272 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą argumentów: ' + str(spec_pos) + '\n') | ||
273 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie argumentów: ' + str(spec_args) + '\n') | ||
274 | - outfile.write(u'Liczba ramek z pozycjami wieloargumentowymi: ' + str(spec_frames) + '\n') | ||
275 | - outfile.write('\n') | ||
276 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą specyfikacji: ' + str(spec_pos_up) + '\n') | ||
277 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie specyfikacji: ' + str(spec_args_up) + '\n') | ||
278 | - outfile.write(u'Liczba ramek z pozycjami o większej niż jeden liczbie specyfikacji: ' + str(spec_frames_up) + '\n') | ||
279 | - outfile.write(u'Liczba ramek frazeologicznych: ' + str(frames_with_phraseology) + '\n') | ||
280 | - outfile.write('\n') | ||
281 | - outfile.write(u'Hasła zawierające schematy frazeologiczne: ' + str(lemmas_with_phraseology) + '\n') | ||
282 | - outfile.write(u'Hasła zawierające pozycje z koordynacją: ' + str(lemmas_with_coordination) + '\n') | ||
283 | - | ||
284 | - | ||
285 | - | ||
286 | - adjs_stats_dict = {'arg_count': arg_count, | ||
287 | - 'pos_count': pos_count, | ||
288 | - 'lemma_count': lemma_count, | ||
289 | - | ||
290 | - 'sub_lemma_count': sub_lemma_count, | ||
291 | - | ||
292 | - 'pred_lemma': pred_lemma, | ||
293 | - 'npred_lemma': npred_lemma, | ||
294 | - | ||
295 | - 'all_frames': all_frames, | ||
296 | - 'pewna_frames': pewna_frames, | ||
297 | - 'watpliwa_frames': watpliwa_frames, | ||
298 | - 'zla_frames': zla_frames, | ||
299 | - 'arch_frames': arch_frames, | ||
300 | - 'potoczna_frames': potoczna_frames, | ||
301 | - 'wulgarna_frames': wulgarna_frames, | ||
302 | - | ||
303 | - 'spec_pos': spec_pos, | ||
304 | - 'spec_args': spec_args, | ||
305 | - 'spec_frames': spec_frames, | ||
306 | - | ||
307 | - 'spec_pos_up': spec_pos_up, | ||
308 | - 'spec_args_up': spec_args_up, | ||
309 | - 'spec_frames_up': spec_frames_up, | ||
310 | - 'frames_with_phraseology': frames_with_phraseology, | ||
311 | - | ||
312 | - 'lemmas_with_phraseology': lemmas_with_phraseology, | ||
313 | - 'lemmas_with_coordination': lemmas_with_coordination | ||
314 | - } | ||
315 | - | ||
316 | - outfile.close() | ||
317 | - return adjs_stats_dict | ||
318 | - | ||
319 | -def get_nouns_stats(stats_path, q_statuses): | ||
320 | - print 'Be patient, it can take a while.' | ||
321 | - outfile = codecs.open(stats_path, 'wt', 'utf-8') | ||
322 | - arg_count = 0 | ||
323 | - pos_count = 0 | ||
324 | - lemma_count = 0 | ||
325 | - | ||
326 | -################## | ||
327 | - pewna_frames = 0 | ||
328 | - watpliwa_frames = 0 | ||
329 | - zla_frames = 0 | ||
330 | - arch_frames = 0 | ||
331 | - potoczna_frames = 0 | ||
332 | - wulgarna_frames = 0 | ||
333 | - all_frames = 0 | ||
334 | - | ||
335 | - spec_frames = 0 | ||
336 | - spec_pos = 0 | ||
337 | - spec_args = 0 | ||
338 | - | ||
339 | - spec_frames_up = 0 | ||
340 | - spec_pos_up = 0 | ||
341 | - spec_args_up = 0 | ||
342 | - | ||
343 | - lemmas_with_phraseology = 0 | ||
344 | - lemmas_with_coordination = 0 | ||
345 | - | ||
346 | - frames_with_phraseology = 0 | 69 | + except: |
70 | + outfile.close() | ||
71 | + | ||
72 | +def get_stats(statuses, pos): | ||
73 | + stats_dict = Counter({u'phrases': 0, | ||
74 | + u'poss': 0, | ||
75 | + u'lemmas': 0, | ||
76 | + u'sub_lemmas': 0, | ||
77 | + u'schemata': 0, | ||
78 | + u'cer_schemata': 0, | ||
79 | + u'uncer_schemata': 0, | ||
80 | + u'bad_schemata': 0, | ||
81 | + u'arch_schemata': 0, | ||
82 | + u'col_schemata': 0, | ||
83 | + u'vul_schemata': 0, | ||
84 | + u'coor_schemata': 0, | ||
85 | + u'lex_schemata': 0, | ||
86 | + u'coor_lemmas': 0, | ||
87 | + u'lex_lemmas': 0}) | ||
347 | 88 | ||
348 | lemmas = Lemma.objects.filter(old=False, | 89 | lemmas = Lemma.objects.filter(old=False, |
349 | - entry_obj__pos__tag='noun').filter(reduce(operator.or_, q_statuses)).distinct().order_by('entry').all() | ||
350 | - for lemma in lemmas: | 90 | + entry_obj__pos__tag=pos).filter(status__in=statuses).distinct() |
91 | + for lemma in lemmas.order_by('entry').all(): | ||
351 | print lemma | 92 | print lemma |
352 | - lemma_count += 1 | ||
353 | - | ||
354 | - pewna_frames += lemma.frame_opinions.filter(value__value=u'pewny').count() | ||
355 | - watpliwa_frames += lemma.frame_opinions.filter(value__value=u'wątpliwy').count() | ||
356 | - zla_frames += lemma.frame_opinions.filter(value__value=u'zły').count() | ||
357 | - arch_frames += lemma.frame_opinions.filter(value__value=u'archaiczny').count() | ||
358 | - potoczna_frames += lemma.frame_opinions.filter(value__value=u'potoczny').count() | ||
359 | - wulgarna_frames += lemma.frame_opinions.filter(value__value=u'wulgarny').count() | ||
360 | - all_frames += lemma.frames.count() | ||
361 | -# if (lemma.frames.count() != lemma.frame_opinions.filter(value__value=u'pewna').count() + | ||
362 | -# lemma.frame_opinions.filter(value__value=u'wątpliwa').count() + | ||
363 | -# lemma.frame_opinions.filter(value__value=u'zła').count() + | ||
364 | -# lemma.frame_opinions.filter(value__value=u'archaiczna').count() + | ||
365 | -# lemma.frame_opinions.filter(value__value=u'potoczna').count() + | ||
366 | -# lemma.frame_opinions.filter(value__value=u'wulgarna').count()): | ||
367 | -# print lemma | ||
368 | -# print lemma.frame_opinions.filter(value__value=u'pewna').count() | ||
369 | -# print lemma.frame_opinions.filter(value__value=u'wątpliwa').count() | ||
370 | -# print lemma.frame_opinions.filter(value__value=u'zła').count() | ||
371 | -# print lemma.frame_opinions.filter(value__value=u'archaiczna').count() | ||
372 | -# print lemma.frame_opinions.filter(value__value=u'potoczna').count() | ||
373 | -# print lemma.frame_opinions.filter(value__value=u'wulgarna').count() | ||
374 | -## break | ||
375 | - | ||
376 | - if lemma.frames.count() != lemma.frame_opinions.count(): | ||
377 | - print lemma.entry | ||
378 | - | 93 | + stats_dict[u'lemmas'] += 1 |
94 | + stats_dict[u'cer_schemata'] += lemma.frame_opinions.filter(value__value=u'pewny').count() | ||
95 | + stats_dict[u'uncer_schemata'] += lemma.frame_opinions.filter(value__value=u'wątpliwy').count() | ||
96 | + stats_dict[u'bad_schemata'] += lemma.frame_opinions.filter(value__value=u'zły').count() | ||
97 | + stats_dict[u'arch_schemata'] += lemma.frame_opinions.filter(value__value=u'archaiczny').count() | ||
98 | + stats_dict[u'col_schemata'] += lemma.frame_opinions.filter(value__value=u'potoczny').count() | ||
99 | + stats_dict[u'vul_schemata'] += lemma.frame_opinions.filter(value__value=u'wulgarny').count() | ||
100 | + stats_dict[u'schemata'] += lemma.frames.count() | ||
101 | + | ||
102 | + stats_dict = stats_dict + Counter(get_sub_entries_dict(lemma)) | ||
103 | + | ||
379 | has_phraseology = False | 104 | has_phraseology = False |
380 | has_coordination = False | 105 | has_coordination = False |
381 | for frame in lemma.frames.all(): | 106 | for frame in lemma.frames.all(): |
382 | - phraseologic_frame = False | ||
383 | - | ||
384 | - pos_count += frame.positions.count() | 107 | + stats_dict[u'poss'] += frame.positions.count() |
385 | flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))['num_args__max'] | 108 | flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))['num_args__max'] |
386 | - | ||
387 | if flat_frames > 1: | 109 | if flat_frames > 1: |
388 | - spec_frames += 1 | 110 | + stats_dict[u'coor_schemata'] += 1 |
389 | has_coordination = True | 111 | has_coordination = True |
390 | - if frame.has_phraseologic_arguments(): | ||
391 | - has_phraseology = True | ||
392 | - phraseologic_frame = True | ||
393 | - | ||
394 | for pos in frame.positions.all(): | 112 | for pos in frame.positions.all(): |
395 | - args = pos.arguments.count() | ||
396 | - arg_count += args | ||
397 | -# for arg in pos.arguments.all(): | ||
398 | -# if arg.type in PHRASEOLOGIC_TYPES: | ||
399 | -# has_phraseology = True | ||
400 | -# phraseologic_frame = True | ||
401 | -# break | ||
402 | - if phraseologic_frame: | ||
403 | - frames_with_phraseology += 1 | ||
404 | - | ||
405 | - if has_phraseology: | ||
406 | - lemmas_with_phraseology += 1 | ||
407 | - if has_coordination: | ||
408 | - lemmas_with_coordination += 1 | ||
409 | - | ||
410 | - outfile.write(u'Liczba argumentów: ' + str(arg_count) + '\n') | ||
411 | - outfile.write(u'Liczba pozycji: ' + str(pos_count) + '\n') | ||
412 | - outfile.write(u'Liczba haseł: ' + str(lemma_count) + '\n') | ||
413 | - outfile.write('\n') | ||
414 | - | ||
415 | - outfile.write('\n') | ||
416 | - outfile.write(u'Liczba ramek: ' + str(all_frames) + '\n') | ||
417 | - outfile.write(u'Liczba ramek pewnych: ' + str(pewna_frames) + '\n') | ||
418 | - outfile.write(u'Liczba ramek wątpliwych: ' + str(watpliwa_frames) + '\n') | ||
419 | - outfile.write(u'Liczba ramek złych: ' + str(zla_frames) + '\n') | ||
420 | - outfile.write(u'Liczba ramek archaicznych: ' + str(arch_frames) + '\n') | ||
421 | - outfile.write(u'Liczba ramek potocznych: ' + str(potoczna_frames) + '\n') | ||
422 | - outfile.write(u'Liczba ramek wulgarnych: ' + str(wulgarna_frames) + '\n') | ||
423 | - outfile.write('\n') | ||
424 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą argumentów: ' + str(spec_pos) + '\n') | ||
425 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie argumentów: ' + str(spec_args) + '\n') | ||
426 | - outfile.write(u'Liczba ramek z pozycjami wieloargumentowymi: ' + str(spec_frames) + '\n') | ||
427 | - outfile.write('\n') | ||
428 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą specyfikacji: ' + str(spec_pos_up) + '\n') | ||
429 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie specyfikacji: ' + str(spec_args_up) + '\n') | ||
430 | - outfile.write(u'Liczba ramek z pozycjami o większej niż jeden liczbie specyfikacji: ' + str(spec_frames_up) + '\n') | ||
431 | - outfile.write(u'Liczba ramek frazeologicznych: ' + str(frames_with_phraseology) + '\n') | ||
432 | - outfile.write('\n') | ||
433 | - outfile.write(u'Hasła zawierające schematy frazeologiczne: ' + str(lemmas_with_phraseology) + '\n') | ||
434 | - outfile.write(u'Hasła zawierające pozycje z koordynacją: ' + str(lemmas_with_coordination) + '\n') | ||
435 | - | ||
436 | - noun_stats_dict = {'arg_count': arg_count, | ||
437 | - 'pos_count': pos_count, | ||
438 | - 'lemma_count': lemma_count, | ||
439 | - | ||
440 | - 'sub_lemma_count': lemma_count, | ||
441 | - | ||
442 | - 'npred_lemma': lemma_count, | ||
443 | - | ||
444 | - 'all_frames': all_frames, | ||
445 | - 'pewna_frames': pewna_frames, | ||
446 | - 'watpliwa_frames': watpliwa_frames, | ||
447 | - 'zla_frames': zla_frames, | ||
448 | - 'arch_frames': arch_frames, | ||
449 | - 'potoczna_frames': potoczna_frames, | ||
450 | - 'wulgarna_frames': wulgarna_frames, | ||
451 | - | ||
452 | - 'spec_pos': spec_pos, | ||
453 | - 'spec_args': spec_args, | ||
454 | - 'spec_frames': spec_frames, | ||
455 | - | ||
456 | - 'spec_pos_up': spec_pos_up, | ||
457 | - 'spec_args_up': spec_args_up, | ||
458 | - 'spec_frames_up': spec_frames_up, | ||
459 | - 'frames_with_phraseology': frames_with_phraseology, | ||
460 | - | ||
461 | - 'lemmas_with_phraseology': lemmas_with_phraseology, | ||
462 | - 'lemmas_with_coordination': lemmas_with_coordination | ||
463 | - } | ||
464 | - | ||
465 | - outfile.close() | ||
466 | - return noun_stats_dict | ||
467 | - | ||
468 | -def get_advs_stats(stats_path, q_statuses): | ||
469 | - print 'Be patient, it can take a while.' | ||
470 | - outfile = codecs.open(stats_path, 'wt', 'utf-8') | ||
471 | - arg_count = 0 | ||
472 | - pos_count = 0 | ||
473 | - lemma_count = 0 | ||
474 | - | ||
475 | -################## | ||
476 | - pewna_frames = 0 | ||
477 | - watpliwa_frames = 0 | ||
478 | - zla_frames = 0 | ||
479 | - arch_frames = 0 | ||
480 | - potoczna_frames = 0 | ||
481 | - wulgarna_frames = 0 | ||
482 | - all_frames = 0 | ||
483 | - | ||
484 | - spec_frames = 0 | ||
485 | - spec_pos = 0 | ||
486 | - spec_args = 0 | ||
487 | - | ||
488 | - spec_frames_up = 0 | ||
489 | - spec_pos_up = 0 | ||
490 | - spec_args_up = 0 | ||
491 | - | ||
492 | - lemmas_with_phraseology = 0 | ||
493 | - lemmas_with_coordination = 0 | ||
494 | - | ||
495 | - frames_with_phraseology = 0 | ||
496 | - | ||
497 | - lemmas = Lemma.objects.filter(old=False, | ||
498 | - entry_obj__pos__tag='adv').filter(reduce(operator.or_, q_statuses)).distinct().order_by('entry').all() | ||
499 | - for lemma in lemmas: | ||
500 | - print lemma | ||
501 | - lemma_count += 1 | ||
502 | - | ||
503 | - pewna_frames += lemma.frame_opinions.filter(value__value=u'pewny').count() | ||
504 | - watpliwa_frames += lemma.frame_opinions.filter(value__value=u'wątpliwy').count() | ||
505 | - zla_frames += lemma.frame_opinions.filter(value__value=u'zły').count() | ||
506 | - arch_frames += lemma.frame_opinions.filter(value__value=u'archaiczny').count() | ||
507 | - potoczna_frames += lemma.frame_opinions.filter(value__value=u'potoczny').count() | ||
508 | - wulgarna_frames += lemma.frame_opinions.filter(value__value=u'wulgarny').count() | ||
509 | - all_frames += lemma.frames.count() | ||
510 | -# if (lemma.frames.count() != lemma.frame_opinions.filter(value__value=u'pewna').count() + | ||
511 | -# lemma.frame_opinions.filter(value__value=u'wątpliwa').count() + | ||
512 | -# lemma.frame_opinions.filter(value__value=u'zła').count() + | ||
513 | -# lemma.frame_opinions.filter(value__value=u'archaiczna').count() + | ||
514 | -# lemma.frame_opinions.filter(value__value=u'potoczna').count() + | ||
515 | -# lemma.frame_opinions.filter(value__value=u'wulgarna').count()): | ||
516 | -# print lemma | ||
517 | -# print lemma.frame_opinions.filter(value__value=u'pewna').count() | ||
518 | -# print lemma.frame_opinions.filter(value__value=u'wątpliwa').count() | ||
519 | -# print lemma.frame_opinions.filter(value__value=u'zła').count() | ||
520 | -# print lemma.frame_opinions.filter(value__value=u'archaiczna').count() | ||
521 | -# print lemma.frame_opinions.filter(value__value=u'potoczna').count() | ||
522 | -# print lemma.frame_opinions.filter(value__value=u'wulgarna').count() | ||
523 | -## break | ||
524 | - | ||
525 | - if lemma.frames.count() != lemma.frame_opinions.count(): | ||
526 | - print lemma.entry | ||
527 | - | ||
528 | - has_phraseology = False | ||
529 | - has_coordination = False | ||
530 | - for frame in lemma.frames.all(): | ||
531 | - phraseologic_frame = False | ||
532 | - | ||
533 | - pos_count += frame.positions.count() | ||
534 | - flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))['num_args__max'] | ||
535 | - | ||
536 | - if flat_frames > 1: | ||
537 | - spec_frames += 1 | ||
538 | - has_coordination = True | ||
539 | - if frame.has_phraseologic_arguments(): | 113 | + stats_dict[u'phrases'] += pos.arguments.count() |
114 | + if frame.positions.filter(arguments__type__in=LEX_TYPES).exists(): | ||
115 | + stats_dict[u'lex_schemata'] += 1 | ||
540 | has_phraseology = True | 116 | has_phraseology = True |
541 | - phraseologic_frame = True | ||
542 | - | ||
543 | - for pos in frame.positions.all(): | ||
544 | - args = pos.arguments.count() | ||
545 | - arg_count += args | ||
546 | -# for arg in pos.arguments.all(): | ||
547 | -# if arg.type in PHRASEOLOGIC_TYPES: | ||
548 | -# has_phraseology = True | ||
549 | -# phraseologic_frame = True | ||
550 | -# break | ||
551 | - if phraseologic_frame: | ||
552 | - frames_with_phraseology += 1 | ||
553 | 117 | ||
554 | if has_phraseology: | 118 | if has_phraseology: |
555 | - lemmas_with_phraseology += 1 | 119 | + stats_dict[u'lex_lemmas'] += 1 |
556 | if has_coordination: | 120 | if has_coordination: |
557 | - lemmas_with_coordination += 1 | ||
558 | - | ||
559 | - outfile.write(u'Liczba argumentów: ' + str(arg_count) + '\n') | ||
560 | - outfile.write(u'Liczba pozycji: ' + str(pos_count) + '\n') | ||
561 | - outfile.write(u'Liczba haseł: ' + str(lemma_count) + '\n') | ||
562 | - outfile.write('\n') | ||
563 | - | ||
564 | - outfile.write('\n') | ||
565 | - outfile.write(u'Liczba ramek: ' + str(all_frames) + '\n') | ||
566 | - outfile.write(u'Liczba ramek pewnych: ' + str(pewna_frames) + '\n') | ||
567 | - outfile.write(u'Liczba ramek wątpliwych: ' + str(watpliwa_frames) + '\n') | ||
568 | - outfile.write(u'Liczba ramek złych: ' + str(zla_frames) + '\n') | ||
569 | - outfile.write(u'Liczba ramek archaicznych: ' + str(arch_frames) + '\n') | ||
570 | - outfile.write(u'Liczba ramek potocznych: ' + str(potoczna_frames) + '\n') | ||
571 | - outfile.write(u'Liczba ramek wulgarnych: ' + str(wulgarna_frames) + '\n') | ||
572 | - outfile.write('\n') | ||
573 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą argumentów: ' + str(spec_pos) + '\n') | ||
574 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie argumentów: ' + str(spec_args) + '\n') | ||
575 | - outfile.write(u'Liczba ramek z pozycjami wieloargumentowymi: ' + str(spec_frames) + '\n') | ||
576 | - outfile.write('\n') | ||
577 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą specyfikacji: ' + str(spec_pos_up) + '\n') | ||
578 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie specyfikacji: ' + str(spec_args_up) + '\n') | ||
579 | - outfile.write(u'Liczba ramek z pozycjami o większej niż jeden liczbie specyfikacji: ' + str(spec_frames_up) + '\n') | ||
580 | - outfile.write(u'Liczba ramek frazeologicznych: ' + str(frames_with_phraseology) + '\n') | ||
581 | - outfile.write('\n') | ||
582 | - outfile.write(u'Hasła zawierające schematy frazeologiczne: ' + str(lemmas_with_phraseology) + '\n') | ||
583 | - outfile.write(u'Hasła zawierające pozycje z koordynacją: ' + str(lemmas_with_coordination) + '\n') | ||
584 | - | ||
585 | - advs_stats_dict = {'arg_count': arg_count, | ||
586 | - 'pos_count': pos_count, | ||
587 | - 'lemma_count': lemma_count, | ||
588 | - | ||
589 | - 'sub_lemma_count': lemma_count, | ||
590 | - | ||
591 | - 'npred_lemma': lemma_count, | ||
592 | - | ||
593 | - 'all_frames': all_frames, | ||
594 | - 'pewna_frames': pewna_frames, | ||
595 | - 'watpliwa_frames': watpliwa_frames, | ||
596 | - 'zla_frames': zla_frames, | ||
597 | - 'arch_frames': arch_frames, | ||
598 | - 'potoczna_frames': potoczna_frames, | ||
599 | - 'wulgarna_frames': wulgarna_frames, | ||
600 | - | ||
601 | - 'spec_pos': spec_pos, | ||
602 | - 'spec_args': spec_args, | ||
603 | - 'spec_frames': spec_frames, | ||
604 | - | ||
605 | - 'spec_pos_up': spec_pos_up, | ||
606 | - 'spec_args_up': spec_args_up, | ||
607 | - 'spec_frames_up': spec_frames_up, | ||
608 | - 'frames_with_phraseology': frames_with_phraseology, | ||
609 | - | ||
610 | - 'lemmas_with_phraseology': lemmas_with_phraseology, | ||
611 | - 'lemmas_with_coordination': lemmas_with_coordination | ||
612 | - } | ||
613 | - | ||
614 | - outfile.close() | ||
615 | - return advs_stats_dict | ||
616 | - | ||
617 | -def get_verb_stats(stats_path, q_statuses): | ||
618 | - print 'Be patient, it can take a while.' | ||
619 | - outfile = codecs.open(stats_path, 'wt', 'utf-8') | ||
620 | - arg_count = 0 | ||
621 | - pos_count = 0 | ||
622 | - lemma_count = 0 | ||
623 | -##################3 | ||
624 | - sub_lemma_count = 0 | ||
625 | - | ||
626 | - sub_sie_imperf_oboj = 0 | ||
627 | - sub_sie_perf_oboj = 0 | ||
628 | - sub_sie_oboj_oboj = 0 | ||
629 | - sub_imperf_oboj = 0 | ||
630 | - sub_perf_oboj = 0 | ||
631 | - sub_oboj_oboj = 0 | ||
632 | - | ||
633 | - sub_sie_imperf_neg = 0 | ||
634 | - sub_sie_perf_neg = 0 | ||
635 | - sub_sie_oboj_neg = 0 | ||
636 | - sub_imperf_neg = 0 | ||
637 | - sub_perf_neg = 0 | ||
638 | - sub_oboj_neg = 0 | ||
639 | - | ||
640 | - sub_sie_imperf_aff = 0 | ||
641 | - sub_sie_perf_aff = 0 | ||
642 | - sub_sie_oboj_aff = 0 | ||
643 | - sub_imperf_aff = 0 | ||
644 | - sub_perf_aff = 0 | ||
645 | - sub_oboj_aff = 0 | ||
646 | - | ||
647 | - sie_lemma = 0 | ||
648 | - nsie_lemma = 0 | ||
649 | -################## | ||
650 | - pewna_frames = 0 | ||
651 | - watpliwa_frames = 0 | ||
652 | - zla_frames = 0 | ||
653 | - arch_frames = 0 | ||
654 | - potoczna_frames = 0 | ||
655 | - wulgarna_frames = 0 | ||
656 | - all_frames = 0 | ||
657 | - | ||
658 | - spec_frames = 0 | ||
659 | - spec_pos = 0 | ||
660 | - spec_args = 0 | ||
661 | - | ||
662 | - spec_frames_up = 0 | ||
663 | - spec_pos_up = 0 | ||
664 | - spec_args_up = 0 | ||
665 | - | ||
666 | - lemmas_with_phraseology = 0 | ||
667 | - lemmas_with_coordination = 0 | ||
668 | - | ||
669 | - frames_with_phraseology = 0 | ||
670 | - | ||
671 | - lemmas = Lemma.objects.filter(old=False, | ||
672 | - entry_obj__pos__tag='verb').filter(reduce(operator.or_, q_statuses)).distinct().order_by('entry').all() | ||
673 | - for lemma in lemmas: | ||
674 | - # jak chcemy bez zapłaconych | ||
675 | -# if RealizedLemma.objects.filter(Q(status__status=u'sprawdzone') | | ||
676 | -# Q(status__status=u'tymczasowy')).filter(lemma__entry=lemma.entry, | ||
677 | -# paid=False).exists(): | ||
678 | -# pass | ||
679 | -# else: | ||
680 | -# continue | ||
681 | - print lemma | ||
682 | - lemma_count += 1 | ||
683 | - | ||
684 | - pewna_frames += lemma.frame_opinions.filter(value__value=u'pewny').count() | ||
685 | - watpliwa_frames += lemma.frame_opinions.filter(value__value=u'wątpliwy').count() | ||
686 | - zla_frames += lemma.frame_opinions.filter(value__value=u'zły').count() | ||
687 | - arch_frames += lemma.frame_opinions.filter(value__value=u'archaiczny').count() | ||
688 | - potoczna_frames += lemma.frame_opinions.filter(value__value=u'potoczny').count() | ||
689 | - wulgarna_frames += lemma.frame_opinions.filter(value__value=u'wulgarny').count() | ||
690 | - all_frames += lemma.frames.count() | ||
691 | -# if (lemma.frames.count() != lemma.frame_opinions.filter(value__value=u'pewna').count() + | ||
692 | -# lemma.frame_opinions.filter(value__value=u'wątpliwa').count() + | ||
693 | -# lemma.frame_opinions.filter(value__value=u'zła').count() + | ||
694 | -# lemma.frame_opinions.filter(value__value=u'archaiczna').count() + | ||
695 | -# lemma.frame_opinions.filter(value__value=u'potoczna').count() + | ||
696 | -# lemma.frame_opinions.filter(value__value=u'wulgarna').count()): | ||
697 | -# print lemma | ||
698 | -# print lemma.frame_opinions.filter(value__value=u'pewna').count() | ||
699 | -# print lemma.frame_opinions.filter(value__value=u'wątpliwa').count() | ||
700 | -# print lemma.frame_opinions.filter(value__value=u'zła').count() | ||
701 | -# print lemma.frame_opinions.filter(value__value=u'archaiczna').count() | ||
702 | -# print lemma.frame_opinions.filter(value__value=u'potoczna').count() | ||
703 | -# print lemma.frame_opinions.filter(value__value=u'wulgarna').count() | ||
704 | -## break | ||
705 | - | ||
706 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | ||
707 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
708 | - characteristics__value__value=u'imperf').filter( | ||
709 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
710 | - characteristics__value__value=u'_') | ||
711 | - if sub_frame.count() > 0: | ||
712 | - sub_sie_imperf_oboj += 1 | ||
713 | - sub_lemma_count += 1 | ||
714 | - | ||
715 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | ||
716 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
717 | - characteristics__value__value=u'perf').filter( | ||
718 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
719 | - characteristics__value__value=u'_') | ||
720 | - if sub_frame.count() > 0: | ||
721 | - sub_sie_perf_oboj += 1 | ||
722 | - sub_lemma_count += 1 | ||
723 | - | ||
724 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | ||
725 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
726 | - characteristics__value__value=u'_').filter( | ||
727 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
728 | - characteristics__value__value=u'_') | ||
729 | - if sub_frame.count() > 0: | ||
730 | - sub_sie_oboj_oboj += 1 | ||
731 | - sub_lemma_count += 1 | ||
732 | - | ||
733 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | ||
734 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
735 | - characteristics__value__value=u'imperf').filter( | ||
736 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
737 | - characteristics__value__value=u'_') | ||
738 | - if sub_frame.count() > 0: | ||
739 | - sub_imperf_oboj += 1 | ||
740 | - sub_lemma_count += 1 | ||
741 | - | ||
742 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | ||
743 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
744 | - characteristics__value__value=u'perf').filter( | ||
745 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
746 | - characteristics__value__value=u'_') | ||
747 | - if sub_frame.count() > 0: | ||
748 | - sub_perf_oboj += 1 | ||
749 | - sub_lemma_count += 1 | ||
750 | - | ||
751 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | ||
752 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
753 | - characteristics__value__value=u'_').filter( | ||
754 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
755 | - characteristics__value__value=u'_') | ||
756 | - if sub_frame.count() > 0: | ||
757 | - sub_oboj_oboj += 1 | ||
758 | - sub_lemma_count += 1 | ||
759 | - | ||
760 | -################################### neg ######################################### | ||
761 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | ||
762 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
763 | - characteristics__value__value=u'imperf').filter( | ||
764 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
765 | - characteristics__value__value=u'neg') | ||
766 | - if sub_frame.count() > 0: | ||
767 | - sub_sie_imperf_neg += 1 | ||
768 | - sub_lemma_count += 1 | ||
769 | - | ||
770 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | ||
771 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
772 | - characteristics__value__value=u'perf').filter( | ||
773 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
774 | - characteristics__value__value=u'neg') | ||
775 | - if sub_frame.count() > 0: | ||
776 | - sub_sie_perf_neg += 1 | ||
777 | - sub_lemma_count += 1 | ||
778 | - | ||
779 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | ||
780 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
781 | - characteristics__value__value=u'_').filter( | ||
782 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
783 | - characteristics__value__value=u'neg') | ||
784 | - if sub_frame.count() > 0: | ||
785 | - sub_sie_oboj_neg += 1 | ||
786 | - sub_lemma_count += 1 | ||
787 | - | ||
788 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | ||
789 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
790 | - characteristics__value__value=u'imperf').filter( | ||
791 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
792 | - characteristics__value__value=u'neg') | ||
793 | - if sub_frame.count() > 0: | ||
794 | - sub_imperf_neg += 1 | ||
795 | - sub_lemma_count += 1 | ||
796 | - | ||
797 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | ||
798 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
799 | - characteristics__value__value=u'perf').filter( | ||
800 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
801 | - characteristics__value__value=u'neg') | ||
802 | - if sub_frame.count() > 0: | ||
803 | - sub_perf_neg += 1 | ||
804 | - sub_lemma_count += 1 | ||
805 | - | ||
806 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | ||
807 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
808 | - characteristics__value__value=u'_').filter( | ||
809 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
810 | - characteristics__value__value=u'neg') | ||
811 | - if sub_frame.count() > 0: | ||
812 | - sub_oboj_neg += 1 | ||
813 | - sub_lemma_count += 1 | ||
814 | - | ||
815 | -#################################### aff ######################################## | ||
816 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | ||
817 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
818 | - characteristics__value__value=u'imperf').filter( | ||
819 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
820 | - characteristics__value__value=u'aff') | ||
821 | - if sub_frame.count() > 0: | ||
822 | - sub_sie_imperf_aff += 1 | ||
823 | - sub_lemma_count += 1 | ||
824 | - | ||
825 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | ||
826 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
827 | - characteristics__value__value=u'perf').filter( | ||
828 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
829 | - characteristics__value__value=u'aff') | ||
830 | - if sub_frame.count() > 0: | ||
831 | - sub_sie_perf_aff += 1 | ||
832 | - sub_lemma_count += 1 | ||
833 | - | ||
834 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | ||
835 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
836 | - characteristics__value__value=u'_').filter( | ||
837 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
838 | - characteristics__value__value=u'aff') | ||
839 | - if sub_frame.count() > 0: | ||
840 | - sub_sie_oboj_aff += 1 | ||
841 | - sub_lemma_count += 1 | ||
842 | - | ||
843 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | ||
844 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
845 | - characteristics__value__value=u'imperf').filter( | ||
846 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
847 | - characteristics__value__value=u'aff') | ||
848 | - if sub_frame.count() > 0: | ||
849 | - sub_imperf_aff += 1 | ||
850 | - sub_lemma_count += 1 | ||
851 | - | ||
852 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | ||
853 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
854 | - characteristics__value__value=u'perf').filter( | ||
855 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
856 | - characteristics__value__value=u'aff') | ||
857 | - if sub_frame.count() > 0: | ||
858 | - sub_perf_aff += 1 | ||
859 | - sub_lemma_count += 1 | ||
860 | - | ||
861 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | ||
862 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | ||
863 | - characteristics__value__value=u'_').filter( | ||
864 | - characteristics__type=u'NEGATYWNOŚĆ', | ||
865 | - characteristics__value__value=u'aff') | ||
866 | - if sub_frame.count() > 0: | ||
867 | - sub_oboj_aff += 1 | ||
868 | - sub_lemma_count += 1 | ||
869 | -################################################################################ | ||
870 | - | ||
871 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | ||
872 | - characteristics__type=u'ZWROTNOŚĆ') | ||
873 | - if sub_frame.count() > 0: | ||
874 | - sie_lemma += 1 | ||
875 | - | ||
876 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | ||
877 | - characteristics__type=u'ZWROTNOŚĆ') | ||
878 | - if sub_frame.count() > 0: | ||
879 | - nsie_lemma += 1 | ||
880 | - | ||
881 | - | ||
882 | -################################################################################# | ||
883 | - | ||
884 | - if lemma.frames.count() != lemma.frame_opinions.count(): | ||
885 | - print lemma.entry | ||
886 | - | ||
887 | - has_phraseology = False | ||
888 | - has_coordination = False | ||
889 | - for frame in lemma.frames.all(): | ||
890 | - phraseologic_frame = False | ||
891 | - | ||
892 | - pos_count += frame.positions.count() | ||
893 | - flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))['num_args__max'] | ||
894 | - | ||
895 | - if flat_frames > 1: | ||
896 | - spec_frames += 1 | ||
897 | - has_coordination = True | ||
898 | - if frame.has_phraseologic_arguments(): | ||
899 | - has_phraseology = True | ||
900 | - phraseologic_frame = True | ||
901 | - | ||
902 | - for pos in frame.positions.all(): | ||
903 | - args = pos.arguments.count() | ||
904 | - arg_count += args | ||
905 | -# for arg in pos.arguments.all(): | ||
906 | -# if arg.type in PHRASEOLOGIC_TYPES: | ||
907 | -# has_phraseology = True | ||
908 | -# phraseologic_frame = True | ||
909 | -# break | ||
910 | - if phraseologic_frame: | ||
911 | - frames_with_phraseology += 1 | 121 | + stats_dict[u'coor_lemmas'] += 1 |
122 | + | ||
123 | + return stats_dict | ||
124 | + | ||
125 | +def get_sub_entries_dict(lemma): | ||
126 | + sub_entries_dict = {} | ||
127 | + frame_chars_dict = sorted_frame_char_values_dict() | ||
128 | + for reflex in frame_chars_dict['sorted_reflex_vals']: | ||
129 | + for neg in frame_chars_dict['sorted_neg_vals']: | ||
130 | + for pred in frame_chars_dict['sorted_pred_vals']: | ||
131 | + for aspect in frame_chars_dict['sorted_aspect_vals']: | ||
132 | + matching_frames = lemma.get_frames_by_char_values(reflex_val=reflex, | ||
133 | + neg_val=neg, | ||
134 | + pred_val=pred, | ||
135 | + aspect_val=aspect) | ||
136 | + if matching_frames.exists(): | ||
137 | + if not u'sub_lemmas' in sub_entries_dict: | ||
138 | + sub_entries_dict[u'sub_lemmas'] = 0 | ||
139 | + sub_entries_dict[u'sub_lemmas'] += 1 | ||
912 | 140 | ||
913 | - if has_phraseology: | ||
914 | - lemmas_with_phraseology += 1 | ||
915 | - if has_coordination: | ||
916 | - lemmas_with_coordination += 1 | ||
917 | - | ||
918 | - outfile.write(u'Liczba argumentów: ' + str(arg_count) + '\n') | ||
919 | - outfile.write(u'Liczba pozycji: ' + str(pos_count) + '\n') | ||
920 | - outfile.write(u'Liczba haseł: ' + str(lemma_count) + '\n') | ||
921 | - outfile.write('\n') | ||
922 | - outfile.write(u'Liczba podhaseł: ' + str(sub_lemma_count) + '\n') | ||
923 | - | ||
924 | - outfile.write(u'Liczba podhaseł postaci (ZWROTNOŚĆ, NEGATYWNOŚĆ, PREDYKATYWNOŚĆ, ASPEKT)\n') | ||
925 | - | ||
926 | - outfile.write(u'Liczba podhaseł postaci (się, _, , imperf): ' + str(sub_sie_imperf_oboj) + '\n') | ||
927 | - outfile.write(u'Liczba podhaseł postaci (się, _, , perf): ' + str(sub_sie_perf_oboj) + '\n') | ||
928 | - outfile.write(u'Liczba podhaseł postaci (się, _, , _): ' + str(sub_sie_oboj_oboj) + '\n') | ||
929 | - outfile.write(u'Liczba podhaseł postaci ( , _, , imperf): ' + str(sub_imperf_oboj) + '\n') | ||
930 | - outfile.write(u'Liczba podhaseł postaci ( , _, , perf): ' + str(sub_perf_oboj) + '\n') | ||
931 | - outfile.write(u'Liczba podhaseł postaci ( , _, , _): ' + str(sub_oboj_oboj) + '\n') | ||
932 | - | ||
933 | - outfile.write(u'Liczba podhaseł postaci (się, neg, , imperf): ' + str(sub_sie_imperf_neg) + '\n') | ||
934 | - outfile.write(u'Liczba podhaseł postaci (się, neg, , perf): ' + str(sub_sie_perf_neg) + '\n') | ||
935 | - outfile.write(u'Liczba podhaseł postaci (się, neg, , _): ' + str(sub_sie_oboj_neg) + '\n') | ||
936 | - outfile.write(u'Liczba podhaseł postaci ( , neg, , imperf): ' + str(sub_imperf_neg) + '\n') | ||
937 | - outfile.write(u'Liczba podhaseł postaci ( , neg, , perf): ' + str(sub_perf_neg) + '\n') | ||
938 | - outfile.write(u'Liczba podhaseł postaci ( , neg, , _): ' + str(sub_oboj_neg) + '\n') | ||
939 | - | ||
940 | - outfile.write(u'Liczba podhaseł postaci (się, aff, , imperf): ' + str(sub_sie_imperf_aff) + '\n') | ||
941 | - outfile.write(u'Liczba podhaseł postaci (się, aff, , perf): ' + str(sub_sie_perf_aff) + '\n') | ||
942 | - outfile.write(u'Liczba podhaseł postaci (się, aff, , _): ' + str(sub_sie_oboj_aff) + '\n') | ||
943 | - outfile.write(u'Liczba podhaseł postaci ( , aff, , imperf): ' + str(sub_imperf_aff) + '\n') | ||
944 | - outfile.write(u'Liczba podhaseł postaci ( , aff, , perf): ' + str(sub_perf_aff) + '\n') | ||
945 | - outfile.write(u'Liczba podhaseł postaci ( , aff, , _): ' + str(sub_oboj_aff) + '\n') | ||
946 | - | ||
947 | - outfile.write('\n') | ||
948 | - outfile.write(u'Liczba ramek: ' + str(all_frames) + '\n') | ||
949 | - outfile.write(u'Liczba ramek pewnych: ' + str(pewna_frames) + '\n') | ||
950 | - outfile.write(u'Liczba ramek wątpliwych: ' + str(watpliwa_frames) + '\n') | ||
951 | - outfile.write(u'Liczba ramek złych: ' + str(zla_frames) + '\n') | ||
952 | - outfile.write(u'Liczba ramek archaicznych: ' + str(arch_frames) + '\n') | ||
953 | - outfile.write(u'Liczba ramek potocznych: ' + str(potoczna_frames) + '\n') | ||
954 | - outfile.write(u'Liczba ramek wulgarnych: ' + str(wulgarna_frames) + '\n') | ||
955 | - outfile.write('\n') | ||
956 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą argumentów: ' + str(spec_pos) + '\n') | ||
957 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie argumentów: ' + str(spec_args) + '\n') | ||
958 | - outfile.write(u'Liczba ramek z pozycjami wieloargumentowymi: ' + str(spec_frames) + '\n') | ||
959 | - outfile.write('\n') | ||
960 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą specyfikacji: ' + str(spec_pos_up) + '\n') | ||
961 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie specyfikacji: ' + str(spec_args_up) + '\n') | ||
962 | - outfile.write(u'Liczba ramek z pozycjami o większej niż jeden liczbie specyfikacji: ' + str(spec_frames_up) + '\n') | ||
963 | - outfile.write(u'Liczba ramek frazeologicznych: ' + str(frames_with_phraseology) + '\n') | ||
964 | - outfile.write('\n') | ||
965 | - outfile.write(u'Hasła zawierające schematy frazeologiczne: ' + str(lemmas_with_phraseology) + '\n') | ||
966 | - outfile.write(u'Hasła zawierające pozycje z koordynacją: ' + str(lemmas_with_coordination) + '\n') | ||
967 | - | ||
968 | - | ||
969 | - verb_stats_dict = {'arg_count': arg_count, | ||
970 | - 'pos_count': pos_count, | ||
971 | - 'lemma_count': lemma_count, | ||
972 | - | ||
973 | - 'sub_lemma_count': sub_lemma_count, | ||
974 | - | ||
975 | - 'sub_sie_imperf_oboj': sub_sie_imperf_oboj, | ||
976 | - 'sub_sie_perf_oboj': sub_sie_perf_oboj, | ||
977 | - 'sub_sie_oboj_oboj': sub_sie_oboj_oboj, | ||
978 | - 'sub_imperf_oboj': sub_imperf_oboj, | ||
979 | - 'sub_perf_oboj': sub_perf_oboj, | ||
980 | - 'sub_oboj_oboj': sub_oboj_oboj, | ||
981 | - | ||
982 | - 'sub_sie_imperf_neg': sub_sie_imperf_neg, | ||
983 | - 'sub_sie_perf_neg': sub_sie_perf_neg, | ||
984 | - 'sub_sie_oboj_neg': sub_sie_oboj_neg, | ||
985 | - 'sub_imperf_neg': sub_imperf_neg, | ||
986 | - 'sub_perf_neg': sub_perf_neg, | ||
987 | - 'sub_oboj_neg': sub_oboj_neg, | ||
988 | - | ||
989 | - 'sub_sie_imperf_aff': sub_sie_imperf_aff, | ||
990 | - 'sub_sie_perf_aff': sub_sie_perf_aff, | ||
991 | - 'sub_sie_oboj_aff': sub_sie_oboj_aff, | ||
992 | - 'sub_imperf_aff': sub_imperf_aff, | ||
993 | - 'sub_perf_aff': sub_perf_aff, | ||
994 | - 'sub_oboj_aff': sub_oboj_aff, | ||
995 | - | ||
996 | - 'all_frames': all_frames, | ||
997 | - 'pewna_frames': pewna_frames, | ||
998 | - 'watpliwa_frames': watpliwa_frames, | ||
999 | - 'zla_frames': zla_frames, | ||
1000 | - 'arch_frames': arch_frames, | ||
1001 | - 'potoczna_frames': potoczna_frames, | ||
1002 | - 'wulgarna_frames': wulgarna_frames, | ||
1003 | - | ||
1004 | - 'spec_pos': spec_pos, | ||
1005 | - 'spec_args': spec_args, | ||
1006 | - 'spec_frames': spec_frames, | ||
1007 | - | ||
1008 | - 'spec_pos_up': spec_pos_up, | ||
1009 | - 'spec_args_up': spec_args_up, | ||
1010 | - 'spec_frames_up': spec_frames_up, | ||
1011 | - 'frames_with_phraseology': frames_with_phraseology, | ||
1012 | - | ||
1013 | - 'lemmas_with_phraseology': lemmas_with_phraseology, | ||
1014 | - 'lemmas_with_coordination': lemmas_with_coordination | ||
1015 | - } | ||
1016 | - | ||
1017 | - outfile.close() | ||
1018 | - return verb_stats_dict | ||
1019 | - | ||
1020 | \ No newline at end of file | 141 | \ No newline at end of file |
142 | + subentry_key = u'Liczba podhaseł postaci: (%s,%s,%s,%s)' % (reflex.value, neg.value, | ||
143 | + pred.value, aspect.value) | ||
144 | + if not subentry_key in sub_entries_dict: | ||
145 | + sub_entries_dict[subentry_key] = 0 | ||
146 | + sub_entries_dict[subentry_key] += 1 | ||
147 | + return sub_entries_dict |
dictionary/models.py
@@ -107,6 +107,14 @@ class Lemma_Status(Model): | @@ -107,6 +107,14 @@ class Lemma_Status(Model): | ||
107 | ('see_stats', u'Może oglądać swoje statystyki.'), | 107 | ('see_stats', u'Może oglądać swoje statystyki.'), |
108 | ('see_all_stats', u'Może oglądać statystyki wszystkich.'), | 108 | ('see_all_stats', u'Może oglądać statystyki wszystkich.'), |
109 | ) | 109 | ) |
110 | + | ||
111 | +def get_checked_statuses(): | ||
112 | + checked_type = LemmaStatusType.objects.get(sym_name='checked') | ||
113 | + return Lemma_Status.objects.filter(type__priority__gte=checked_type.priority).distinct() | ||
114 | + | ||
115 | +def get_ready_statuses(): | ||
116 | + ready_type = LemmaStatusType.objects.get(sym_name='ready') | ||
117 | + return Lemma_Status.objects.filter(type__priority__gte=ready_type.priority).distinct() | ||
110 | 118 | ||
111 | 119 | ||
112 | class LemmaStatusType(Model): | 120 | class LemmaStatusType(Model): |