Commit 3ad52b13a56f9b536ff84948d9ab21fe31e3b5a9
1 parent
536ab813
Added instalation files and informations to repository.
Showing
4 changed files
with
244 additions
and
1021 deletions
INSTALL
1 | -Slowal installation guide: | |
2 | -To run Slowal you will need at least: | |
3 | - - python (tested on 2.7 version); | |
4 | - - django with django-registration (tested on version 0.7) and django-extensions packages; | |
5 | - - database system (tested on PostgreSQL 9.1); | |
6 | - - morphological analyser Morfeusz (http://sgjp.pl/morfeusz/). | |
7 | - | |
8 | - Slowal can be installed on production server as any other Django application. Comprehensive tutorial for setting Django on production server can be found at: http://bailey.st/blog/2012/05/02/ubuntu-django-postgresql-and-nginx-a-rock-solid-web-stack/. Tutorial can also be helpful for setting database for Slowal project. | |
9 | - | |
10 | -Installation: | |
11 | - 1) Change database_data.py file to get connection to yours database (see: http://bailey.st/blog/2012/05/02/ubuntu-django-postgresql-and-nginx-a-rock-solid-web-stack/ for hints). | |
12 | -When you are creating database it is important to make database coding 'utf8' and locale Polish. | |
13 | -In PostgreSQL such database would be created by command: | |
14 | ->>> createdb databaseName -E UTF8 -T template0 -l pl_PL.utf8 | |
15 | - 2) If you want to run Slowal in the domain subfolder change SITE_PREFIX = '' value to other, for example SITE_PREFIX = '/Slowal' | |
16 | - 3) Create database table running: | |
17 | ->>> python manage.py syncdb | |
18 | -command in the main folder of project. Remember to create superuser it will help you in managing database and give access to all Slowal functionalities. | |
19 | - 4) Fill database with initial values running: | |
20 | ->>> python manage.py import_models | |
21 | -command in the main folder of project. | |
22 | - 5) Create default user groups running: | |
23 | ->>> python manage.py create_groups.py | |
24 | -command in the main folder of project. | |
25 | - 6) Slowal is ready to run. | |
26 | - | |
27 | -Slowal was tested on Safari, Opera, Firefox and Chrome web browsers. Working on Internet Explorer compatibility is still in progress | |
1 | +Zainstaluj pipa: | |
2 | +>> apt-get update | |
3 | +>> apt-get -y install python-pip | |
4 | + | |
5 | +Zainstaluj Django w wersji 1.4.8: | |
6 | +>> pip install Django==1.4.8 | |
7 | + | |
8 | +Zainstaluj Django south: | |
9 | +>> apt-get install python-django-south | |
10 | + | |
11 | +Zainstaluj Django extensions: | |
12 | +>> apt-get install python-django-extensions | |
13 | + | |
14 | +Zainstaluj Django registration: | |
15 | +>> apt-get install python-django-registration | |
16 | + | |
17 | +Zainstaluj pythonowy moduł lxml: | |
18 | +>> apt-get install python-lxml | |
19 | + | |
20 | +Zainstaluj Postgresa: | |
21 | +>> sudo apt-get update | |
22 | +>> sudo apt-get install postgresql postgresql-contrib | |
23 | +Zmień użytkownika na postgres: | |
24 | +>> sudo -i -u postgres | |
25 | +A następnie dodaj poszczególne role do postgresa komendą: | |
26 | +>> createuser --interactive | |
27 | +Stwórz pustą bazę danych dla Slowala: | |
28 | +>> createdb slowal -E UTF8 -T template0 -l pl_PL.utf8 | |
29 | +Jeśli locale pl_PL.utf8 nie istnieje dodatkowo należy uruchomić komendy: | |
30 | +>> sudo locale-gen pl_PL.utf8 | |
31 | +>> service postgresql restart | |
32 | +Załaduj dump bazy danych poleceniem (zrzut bazy umieszczony jest w archiwum INSTALL_PACK.zip): | |
33 | +>> psql slowal < obraz_bazy.db | |
34 | + | |
35 | +Zainstaluj gita: | |
36 | +>> apt-get install git | |
37 | + | |
38 | +Sklonuj repozytorium gitowe z GitLaba: | |
39 | +>> git clone http://git.nlp.ipipan.waw.pl/walenty/Slowal.git | |
40 | + | |
41 | +Stwórz folder "Walenty" w folderze "data": | |
42 | +>> cd data | |
43 | +>> mkdir Walenty | |
44 | + | |
45 | +Dodaj w głównym folderze projektu plik konfiguracyjny settings.py (plik umieszczony jest w archiwum INSTALL_PACK.zip): | |
46 | + Zmień w nim zmienną STATIC_ROOT, tak by wskazywała na położenie plików statycznych strony, np.: | |
47 | + STATIC_ROOT = "/home/zil/static/Slowal" | |
48 | + | |
49 | +Dodaj w głównym folderze projektu plik konfiguracyjny database_data.py oraz zdefiniuj w nim połączenie z bazą danych, np.: | |
50 | + DATABASES = { | |
51 | + 'default': { | |
52 | + 'ENGINE': 'django.db.backends.postgresql_psycopg2', | |
53 | + 'NAME': 'slowal', | |
54 | + 'USER': 'zil', | |
55 | + 'PASSWORD': '', | |
56 | + 'HOST': '', | |
57 | + 'PORT': '5432', | |
58 | + } | |
59 | + } | |
60 | + | |
61 | +Zainstaluj moduł psycopg2: | |
62 | +>> sudo apt-get install python-psycopg2 | |
63 | + | |
64 | +Zgraj pliki statyczne do dedykowanego katalogu poleceniem: | |
65 | +>> python manage.py collectstatic | |
66 | + | |
67 | +Zainstaluj Apacha: | |
68 | +>> apt-get install apache2 | |
69 | + | |
70 | +Zainstaluj mod-wsgi: | |
71 | +>> apt-get install libapache2-mod-wsgi | |
72 | + | |
73 | +Utwórz plik slowal.wsgi odpowiednio definiując w nim ścieżki do plików statycznych. Przykładowa treść pliku poniżej: | |
74 | +-------------------------------------------- | |
75 | +import os, sys | |
76 | + | |
77 | +sys.path.append('/home/zil/static') | |
78 | +sys.path.append('/home/zil/static/Slowal') | |
79 | +os.environ['DJANGO_SETTINGS_MODULE'] = 'Slowal.settings' | |
80 | + | |
81 | +import django.core.handlers.wsgi | |
82 | + | |
83 | +application = django.core.handlers.wsgi.WSGIHandler() | |
84 | +-------------------------------------------- | |
85 | + | |
86 | +Skonfiguruj apacha dodając plik konfiguracyjny (np. o nazwie slowal.conf) do folderu sites-available apacha (domyślnie /etc/apache2/sites-enabled/), ścieżka WSGIScriptAlias musi wskazywać na plik slowal.wsgi. Przykładowy plik konfiguracyjny poniżej: | |
87 | +-------------------------------------------- | |
88 | +<VirtualHost *:80> | |
89 | + ServerAdmin bartek.niton@gmail.com | |
90 | + ServerName slowal.nlp.ipipan.waw.pl | |
91 | + | |
92 | + ServerAlias walenty.ipipan.waw.pl | |
93 | + | |
94 | + DocumentRoot /home/zil/Slowal/templates/ | |
95 | + <Directory /> | |
96 | + Options FollowSymLinks | |
97 | + AllowOverride None | |
98 | + Require all granted | |
99 | + </Directory> | |
100 | + WSGIScriptAlias / /home/zil/scripts/slowal.wsgi | |
101 | + WSGIDaemonProcess Slowal user=zil group=zil processes=2 threads=15 | |
102 | + WSGIProcessGroup Slowal | |
103 | + Alias /static/ /home/zil/static/Slowal/ | |
104 | + <Directory "/home/zil/static/Slowal"> | |
105 | + Require all granted | |
106 | + </Directory> | |
107 | + | |
108 | + ErrorLog /home/zil/logs/Slowal/error.log | |
109 | + CustomLog /home/zil/logs/Slowal/access.log combined | |
110 | +</VirtualHost> | |
111 | +-------------------------------------------- | |
112 | + | |
113 | +Uruchom stronę poleceniem: | |
114 | +>> a2ensite slowal.conf | |
115 | + | |
116 | +Zrestartuj apacha: | |
117 | +>> sudo service apache2 restart | |
118 | + | |
119 | +Zainstaluj Morfeusza2 zgodnie z instrukcjami na stronie http://sgjp.pl/morfeusz/dopobrania.html. | |
120 | + | |
121 | +Ustaw w crontabie cykliczne uruchamianie komend create_walenty i count_positions_occurrences: | |
122 | +1 0 * * 5 python /home/zil/Slowal/manage.py create_walenty | |
123 | +0 1 * * * python /home/zil/Slowal/manage.py count_positions_occurrences | |
... | ... |
dictionary/management/commands/create_walenty.py
... | ... | @@ -29,8 +29,8 @@ from django.core.management.base import BaseCommand |
29 | 29 | from accounts.models import User |
30 | 30 | from dictionary.ajax_vocabulary_management import create_text_walenty |
31 | 31 | from dictionary.ajax_argument_realizations import create_realizations_file |
32 | -from dictionary.models import Frame_Opinion, Lemma, Lemma_Status, \ | |
33 | - LemmaStatusType, Vocabulary, POS | |
32 | +from dictionary.models import Frame_Opinion, Lemma, Vocabulary, POS, \ | |
33 | + get_checked_statuses, get_ready_statuses | |
34 | 34 | from settings import WALENTY_PATH |
35 | 35 | |
36 | 36 | class Command(BaseCommand): |
... | ... | @@ -87,12 +87,4 @@ def create_pos_archive(archive, pos, filename_base): |
87 | 87 | finally: |
88 | 88 | os.remove(walenty_path_checked) |
89 | 89 | os.remove(walenty_path_ready) |
90 | - | |
91 | -def get_checked_statuses(): | |
92 | - checked_type = LemmaStatusType.objects.get(sym_name='checked') | |
93 | - return Lemma_Status.objects.filter(type__priority__gte=checked_type.priority).distinct() | |
94 | - | |
95 | -def get_ready_statuses(): | |
96 | - ready_type = LemmaStatusType.objects.get(sym_name='ready') | |
97 | - return Lemma_Status.objects.filter(type__priority__gte=ready_type.priority).distinct() | |
98 | 90 | |
99 | 91 | \ No newline at end of file |
... | ... |
dictionary/management/commands/get_stats.py
... | ... | @@ -2,1018 +2,145 @@ |
2 | 2 | # author: B.Niton |
3 | 3 | |
4 | 4 | import codecs |
5 | -import operator | |
5 | +import datetime | |
6 | 6 | from collections import Counter |
7 | 7 | |
8 | 8 | from django.core.management.base import BaseCommand |
9 | +from django.db.models import Count, Max | |
9 | 10 | |
10 | -from dictionary.models import * | |
11 | +from dictionary.models import Lemma, get_checked_statuses, get_ready_statuses, \ | |
12 | + sorted_frame_char_values_dict | |
11 | 13 | |
12 | -#PHRASEOLOGIC_TYPES = ['comprepnp', 'preplexnp', 'lexnp', 'lex', | |
13 | -# 'fixed'] | |
14 | +LEX_TYPES = ['lex', 'fixed', 'comprepnp'] | |
14 | 15 | |
15 | 16 | class Command(BaseCommand): |
16 | - help = 'Get slowal statistics.' | |
17 | + help = 'Get Walenty statistics.' | |
17 | 18 | |
18 | 19 | def handle(self, **options): |
19 | - all_statuses = [Q(status__status=u'gotowe'), | |
20 | - Q(status__status=u'zalążkowe'), | |
21 | - Q(status__status=u'sprawdzone'), | |
22 | - Q(status__status=u'(F) w obróbce'), | |
23 | - Q(status__status=u'(F) gotowe'), | |
24 | - Q(status__status=u'(F) sprawdzone'), | |
25 | - Q(status__status=u'(S) w obróbce'), | |
26 | - Q(status__status=u'(S) gotowe'), | |
27 | - Q(status__status=u'(S) sprawdzone')] | |
28 | - verified_statuses = [Q(status__status=u'zalążkowe'), | |
29 | - Q(status__status=u'sprawdzone'), | |
30 | - Q(status__status=u'(F) w obróbce'), | |
31 | - Q(status__status=u'(F) gotowe'), | |
32 | - Q(status__status=u'(F) sprawdzone'), | |
33 | - Q(status__status=u'(S) w obróbce'), | |
34 | - Q(status__status=u'(S) gotowe'), | |
35 | - Q(status__status=u'(S) sprawdzone')] | |
20 | + now = datetime.datetime.now().strftime('%Y%m%d') | |
21 | + all_statuses = get_ready_statuses() | |
22 | + verified_statuses = get_checked_statuses() | |
36 | 23 | |
37 | - nouns_stats_dict_all = Counter(get_nouns_stats('data/statystyki_2015_06_30_nouns_all.txt', all_statuses)) | |
38 | - nouns_stats_dict_verified = Counter(get_nouns_stats('data/statystyki_2015_06_30_nouns_verified.txt', verified_statuses)) | |
39 | - | |
40 | - adjs_stats_dict_all = Counter(get_adjs_stats('data/statystyki_2015_06_30_adjs_all.txt', all_statuses)) | |
41 | - adjs_stats_dict_verified = Counter(get_adjs_stats('data/statystyki_2015_06_30_adjs_verified.txt', verified_statuses)) | |
24 | +# nouns_stats_dict_all = Counter(get_stats('data/statystyki_2015_06_30_nouns_all.txt', all_statuses, 'noun')) | |
25 | +# nouns_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_nouns_verified.txt', verified_statuses, 'noun')) | |
26 | + | |
27 | +# adjs_stats_dict_all = Counter(get_stats('data/statystyki_2015_06_30_adjs_all.txt', all_statuses, 'adj')) | |
28 | +# adjs_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_adjs_verified.txt', verified_statuses, 'adj')) | |
29 | +# | |
30 | +# verbs_stats_dict_all = Counter(get_stats('data/statystyki_2015_06_30_verbs_all.txt', all_statuses, 'verb')) | |
31 | +# verbs_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_verbs_verified.txt', verified_statuses, 'verb')) | |
32 | +# | |
33 | + advs_stats_dict_all = Counter(get_stats(all_statuses, 'adv')) | |
34 | + write_stats('data/stats_%s_advs_all.txt' % now, advs_stats_dict_all) | |
35 | +# advs_stats_dict_verified = Counter(get_stats('data/statystyki_2015_06_30_advs_verified.txt', verified_statuses, 'adv')) | |
36 | +# | |
37 | +# all_stats_dict_all = nouns_stats_dict_all + adjs_stats_dict_all + verbs_stats_dict_all + advs_stats_dict_all | |
38 | +# all_stats_dict_verified = nouns_stats_dict_verified + adjs_stats_dict_verified + verbs_stats_dict_verified + advs_stats_dict_verified | |
39 | +# | |
40 | +# write_all_stats('data/statystyki_2015_06_30_all.txt', all_stats_dict_all) | |
41 | +# write_all_stats('data/statystyki_2015_06_30_verified.txt', all_stats_dict_verified) | |
42 | 42 | |
43 | - verbs_stats_dict_all = Counter(get_verb_stats('data/statystyki_2015_06_30_verbs_all.txt', all_statuses)) | |
44 | - verbs_stats_dict_verified = Counter(get_verb_stats('data/statystyki_2015_06_30_verbs_verified.txt', verified_statuses)) | |
43 | +def write_stats(stats_path, stats_dict): | |
44 | + try: | |
45 | + outfile = codecs.open(stats_path, 'wt', 'utf-8') | |
45 | 46 | |
46 | - advs_stats_dict_all = Counter(get_advs_stats('data/statystyki_2015_06_30_advs_all.txt', all_statuses)) | |
47 | - advs_stats_dict_verified = Counter(get_advs_stats('data/statystyki_2015_06_30_advs_verified.txt', verified_statuses)) | |
47 | + outfile.write(u'Liczba typów fraz:\t%d\n' % stats_path['phrases']) | |
48 | + outfile.write(u'Liczba pozycji:\t%d\n' % stats_path['poss']) | |
49 | + outfile.write(u'Liczba haseł:\t%d\n\n' % stats_path['lemmas']) | |
48 | 50 | |
49 | - all_stats_dict_all = nouns_stats_dict_all + adjs_stats_dict_all + verbs_stats_dict_all + advs_stats_dict_all | |
50 | - all_stats_dict_verified = nouns_stats_dict_verified + adjs_stats_dict_verified + verbs_stats_dict_verified + advs_stats_dict_verified | |
51 | + outfile.write(u'Łączna liczba podhaseł:\t%d\n' % stats_path['sub_lemmas']) | |
52 | + outfile.write(u'Liczba podhaseł postaci (ZWROTNOŚĆ, NEGATYWNOŚĆ, PREDYKATYWNOŚĆ, ASPEKT)\n') | |
53 | +#### dokonczyc | |
54 | + | |
55 | + outfile.write(u'Łączna liczba schematów:\t%d\n' % stats_path['schemata']) | |
56 | + outfile.write(u'Liczba schematów pewnych:\t%d\n' % stats_path['cer_schemata']) | |
57 | + outfile.write(u'Liczba schematów wątpliwych:\t%d\n' % stats_path['uncer_schemata']) | |
58 | + outfile.write(u'Liczba schematów złych:\t%d\n' % stats_path['bad_schemata']) | |
59 | + outfile.write(u'Liczba schematów archaicznych:\t%d\n' % stats_path['arch_schemata']) | |
60 | + outfile.write(u'Liczba schematów potocznych:\t%d\n' % stats_path['col_schemata']) | |
61 | + outfile.write(u'Liczba schematów wulgarnych:\t%d\n\n' % stats_path['vul_schemata']) | |
51 | 62 | |
52 | - write_all_stats('data/statystyki_2015_06_30_all.txt', all_stats_dict_all) | |
53 | - write_all_stats('data/statystyki_2015_06_30_verified.txt', all_stats_dict_verified) | |
63 | + outfile.write(u'Liczba schematów z koordynacją:\t%d\n' % stats_path['coor_schemata']) | |
64 | + outfile.write(u'Liczba schematów zleksykalizowanych:\t%d\n\n' % stats_path['lex_schemata']) | |
54 | 65 | |
55 | -def write_all_stats(stats_path, stats_dict): | |
56 | - outfile = codecs.open(stats_path, 'wt', 'utf-8') | |
57 | - | |
58 | - outfile.write(u'Liczba argumentów: ' + str(stats_dict['arg_count']) + '\n') | |
59 | - outfile.write(u'Liczba pozycji: ' + str(stats_dict['pos_count']) + '\n') | |
60 | - outfile.write(u'Liczba haseł: ' + str(stats_dict['lemma_count']) + '\n') | |
61 | - outfile.write('\n') | |
62 | - outfile.write(u'Liczba podhaseł: ' + str(stats_dict['sub_lemma_count']) + '\n') | |
63 | - | |
64 | - outfile.write(u'Liczba podhaseł postaci (ZWROTNOŚĆ, NEGATYWNOŚĆ, PREDYKATYWNOŚĆ, ASPEKT)\n') | |
65 | - | |
66 | - outfile.write(u'Liczba podhaseł postaci (się, _, , imperf): ' + str(stats_dict['sub_sie_imperf_oboj']) + '\n') | |
67 | - outfile.write(u'Liczba podhaseł postaci (się, _, , perf): ' + str(stats_dict['sub_sie_perf_oboj']) + '\n') | |
68 | - outfile.write(u'Liczba podhaseł postaci (się, _, , _): ' + str(stats_dict['sub_sie_oboj_oboj']) + '\n') | |
69 | - outfile.write(u'Liczba podhaseł postaci ( , _, , imperf): ' + str(stats_dict['sub_imperf_oboj']) + '\n') | |
70 | - outfile.write(u'Liczba podhaseł postaci ( , _, , perf): ' + str(stats_dict['sub_perf_oboj']) + '\n') | |
71 | - outfile.write(u'Liczba podhaseł postaci ( , _, , _): ' + str(stats_dict['sub_oboj_oboj']) + '\n') | |
72 | - | |
73 | - outfile.write(u'Liczba podhaseł postaci (się, neg, , imperf): ' + str(stats_dict['sub_sie_imperf_neg']) + '\n') | |
74 | - outfile.write(u'Liczba podhaseł postaci (się, neg, , perf): ' + str(stats_dict['sub_sie_perf_neg']) + '\n') | |
75 | - outfile.write(u'Liczba podhaseł postaci (się, neg, , _): ' + str(stats_dict['sub_sie_oboj_neg']) + '\n') | |
76 | - outfile.write(u'Liczba podhaseł postaci ( , neg, , imperf): ' + str(stats_dict['sub_imperf_neg']) + '\n') | |
77 | - outfile.write(u'Liczba podhaseł postaci ( , neg, , perf): ' + str(stats_dict['sub_perf_neg']) + '\n') | |
78 | - outfile.write(u'Liczba podhaseł postaci ( , neg, , _): ' + str(stats_dict['sub_oboj_neg']) + '\n') | |
79 | - | |
80 | - outfile.write(u'Liczba podhaseł postaci (się, aff, , imperf): ' + str(stats_dict['sub_sie_imperf_aff']) + '\n') | |
81 | - outfile.write(u'Liczba podhaseł postaci (się, aff, , perf): ' + str(stats_dict['sub_sie_perf_aff']) + '\n') | |
82 | - outfile.write(u'Liczba podhaseł postaci (się, aff, , _): ' + str(stats_dict['sub_sie_oboj_aff']) + '\n') | |
83 | - outfile.write(u'Liczba podhaseł postaci ( , aff, , imperf): ' + str(stats_dict['sub_imperf_aff']) + '\n') | |
84 | - outfile.write(u'Liczba podhaseł postaci ( , aff, , perf): ' + str(stats_dict['sub_perf_aff']) + '\n') | |
85 | - outfile.write(u'Liczba podhaseł postaci ( , aff, , _): ' + str(stats_dict['sub_oboj_aff']) + '\n') | |
86 | - | |
87 | - outfile.write(u'Liczba podhaseł postaci ( , , pred, ): ' + str(stats_dict['pred_lemma']) + '\n') | |
88 | - outfile.write(u'Liczba podhaseł postaci ( , , , ): ' + str(stats_dict['npred_lemma']) + '\n') | |
89 | - | |
90 | - outfile.write('\n') | |
91 | - outfile.write(u'Liczba ramek: ' + str(stats_dict['all_frames']) + '\n') | |
92 | - outfile.write(u'Liczba ramek pewnych: ' + str(stats_dict['pewna_frames']) + '\n') | |
93 | - outfile.write(u'Liczba ramek wątpliwych: ' + str(stats_dict['watpliwa_frames']) + '\n') | |
94 | - outfile.write(u'Liczba ramek złych: ' + str(stats_dict['zla_frames']) + '\n') | |
95 | - outfile.write(u'Liczba ramek archaicznych: ' + str(stats_dict['arch_frames']) + '\n') | |
96 | - outfile.write(u'Liczba ramek potocznych: ' + str(stats_dict['potoczna_frames']) + '\n') | |
97 | - outfile.write(u'Liczba ramek wulgarnych: ' + str(stats_dict['wulgarna_frames']) + '\n') | |
98 | - outfile.write('\n') | |
99 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą argumentów: ' + str(stats_dict['spec_pos']) + '\n') | |
100 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie argumentów: ' + str(stats_dict['spec_args']) + '\n') | |
101 | - outfile.write(u'Liczba ramek z pozycjami wieloargumentowymi: ' + str(stats_dict['spec_frames']) + '\n') | |
102 | - outfile.write('\n') | |
103 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą specyfikacji: ' + str(stats_dict['spec_pos_up']) + '\n') | |
104 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie specyfikacji: ' + str(stats_dict['spec_args_up']) + '\n') | |
105 | - outfile.write(u'Liczba ramek z pozycjami o większej niż jeden liczbie specyfikacji: ' + str(stats_dict['spec_frames_up']) + '\n') | |
106 | - outfile.write(u'Liczba ramek frazeologicznych: ' + str(stats_dict['frames_with_phraseology']) + '\n') | |
107 | - outfile.write('\n') | |
108 | - outfile.write(u'Hasła zawierające schematy frazeologiczne: ' + str(stats_dict['lemmas_with_phraseology']) + '\n') | |
109 | - outfile.write(u'Hasła zawierające pozycje z koordynacją: ' + str(stats_dict['lemmas_with_coordination']) + '\n') | |
110 | - | |
111 | - outfile.close() | |
112 | - | |
113 | -def get_adjs_stats(stats_path, q_statuses): | |
114 | - print 'Be patient, it can take a while.' | |
115 | - outfile = codecs.open(stats_path, 'wt', 'utf-8') | |
116 | - arg_count = 0 | |
117 | - pos_count = 0 | |
118 | - lemma_count = 0 | |
119 | -######################## | |
120 | - sub_lemma_count = 0 | |
121 | - | |
122 | - pred_lemma = 0 | |
123 | - npred_lemma = 0 | |
124 | -################## | |
125 | - pewna_frames = 0 | |
126 | - watpliwa_frames = 0 | |
127 | - zla_frames = 0 | |
128 | - arch_frames = 0 | |
129 | - potoczna_frames = 0 | |
130 | - wulgarna_frames = 0 | |
131 | - all_frames = 0 | |
132 | - | |
133 | - spec_frames = 0 | |
134 | - spec_pos = 0 | |
135 | - spec_args = 0 | |
136 | - | |
137 | - spec_frames_up = 0 | |
138 | - spec_pos_up = 0 | |
139 | - spec_args_up = 0 | |
140 | - | |
141 | - lemmas_with_phraseology = 0 | |
142 | - lemmas_with_coordination = 0 | |
143 | - | |
144 | - frames_with_phraseology = 0 | |
145 | - | |
146 | - lemmas = Lemma.objects.filter(old=False, | |
147 | - entry_obj__pos__tag='adj').filter(reduce(operator.or_, q_statuses)).distinct().order_by('entry').all() | |
148 | - for lemma in lemmas: | |
149 | - print lemma | |
150 | - lemma_count += 1 | |
66 | + outfile.write(u'Hasła zawierające pozycje z koordynacją:\t%d\n' % stats_path['coor_lemmas']) | |
67 | + outfile.write(u'Hasła zawierające schematy zleksykalizowane:\t%d\n\n' % stats_path['lex_lemmas']) | |
151 | 68 | |
152 | - pewna_frames += lemma.frame_opinions.filter(value__value=u'pewny').count() | |
153 | - watpliwa_frames += lemma.frame_opinions.filter(value__value=u'wątpliwy').count() | |
154 | - zla_frames += lemma.frame_opinions.filter(value__value=u'zły').count() | |
155 | - arch_frames += lemma.frame_opinions.filter(value__value=u'archaiczny').count() | |
156 | - potoczna_frames += lemma.frame_opinions.filter(value__value=u'potoczny').count() | |
157 | - wulgarna_frames += lemma.frame_opinions.filter(value__value=u'wulgarny').count() | |
158 | - all_frames += lemma.frames.count() | |
159 | -# if (lemma.frames.count() != lemma.frame_opinions.filter(value__value=u'pewna').count() + | |
160 | -# lemma.frame_opinions.filter(value__value=u'wątpliwa').count() + | |
161 | -# lemma.frame_opinions.filter(value__value=u'zła').count() + | |
162 | -# lemma.frame_opinions.filter(value__value=u'archaiczna').count() + | |
163 | -# lemma.frame_opinions.filter(value__value=u'potoczna').count() + | |
164 | -# lemma.frame_opinions.filter(value__value=u'wulgarna').count()): | |
165 | -# print lemma | |
166 | -# print lemma.frame_opinions.filter(value__value=u'pewna').count() | |
167 | -# print lemma.frame_opinions.filter(value__value=u'wątpliwa').count() | |
168 | -# print lemma.frame_opinions.filter(value__value=u'zła').count() | |
169 | -# print lemma.frame_opinions.filter(value__value=u'archaiczna').count() | |
170 | -# print lemma.frame_opinions.filter(value__value=u'potoczna').count() | |
171 | -# print lemma.frame_opinions.filter(value__value=u'wulgarna').count() | |
172 | -## break | |
173 | - | |
174 | -################################################################################ | |
175 | - | |
176 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'pred', | |
177 | - characteristics__type=u'PREDYKATYWNOŚĆ') | |
178 | - if sub_frame.count() > 0: | |
179 | - pred_lemma += 1 | |
180 | - sub_lemma_count += 1 | |
181 | - | |
182 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | |
183 | - characteristics__type=u'PREDYKATYWNOŚĆ') | |
184 | - if sub_frame.count() > 0: | |
185 | - npred_lemma += 1 | |
186 | - sub_lemma_count += 1 | |
187 | - | |
188 | -################################################################################# | |
189 | - | |
190 | - if lemma.frames.count() != lemma.frame_opinions.count(): | |
191 | - print lemma.entry | |
192 | - | |
193 | - has_phraseology = False | |
194 | - has_coordination = False | |
195 | - for frame in lemma.frames.all(): | |
196 | - phraseologic_frame = False | |
197 | - | |
198 | - pos_count += frame.positions.count() | |
199 | - flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))['num_args__max'] | |
200 | - | |
201 | - if flat_frames > 1: | |
202 | - spec_frames += 1 | |
203 | - has_coordination = True | |
204 | - if frame.has_phraseologic_arguments(): | |
205 | - has_phraseology = True | |
206 | - phraseologic_frame = True | |
207 | - | |
208 | - for pos in frame.positions.all(): | |
209 | - args = pos.arguments.count() | |
210 | - arg_count += args | |
211 | -# for arg in pos.arguments.all(): | |
212 | -# if arg.type in PHRASEOLOGIC_TYPES: | |
213 | -# has_phraseology = True | |
214 | -# phraseologic_frame = True | |
215 | -# break | |
216 | - if phraseologic_frame: | |
217 | - frames_with_phraseology += 1 | |
218 | - | |
219 | - if has_phraseology: | |
220 | - lemmas_with_phraseology += 1 | |
221 | - if has_coordination: | |
222 | - lemmas_with_coordination += 1 | |
223 | -# if args > 1: | |
224 | -# spec_pos += 1 | |
225 | -# spec_args += args | |
226 | -# | |
227 | -# prep_args = pos.arguments.filter(Q(type=u'prepnp') | | |
228 | -# Q(type=u'prepncp')) | |
229 | -# np_args = pos.arguments.filter(Q(type=u'np') | | |
230 | -# Q(type=u'ncp')) | |
231 | -# similar_args = True | |
232 | -# if prep_args.count() == args: | |
233 | -# first_arg_case = prep_args.all()[0].atributes.get(type=u'PRZYPADEK').atribute_value.value | |
234 | -# first_arg_prep = prep_args.all()[0].atributes.get(type=u'PRZYIMEK').atribute_value.value | |
235 | -# for arg in prep_args: | |
236 | -# if (first_arg_case != arg.atributes.get(type=u'PRZYPADEK').atribute_value.value or | |
237 | -# first_arg_prep != arg.atributes.get(type=u'PRZYIMEK').atribute_value.value): | |
238 | -# similar_args = False | |
239 | -# break | |
240 | -# elif np_args.count() == args: | |
241 | -# first_arg_case = np_args.all()[0].atributes.get(type=u'PRZYPADEK').atribute_value.value | |
242 | -# for arg in np_args: | |
243 | -# if (first_arg_case != arg.atributes.get(type=u'PRZYPADEK').atribute_value.value): | |
244 | -# similar_args = False | |
245 | -# break | |
246 | -# else: | |
247 | -# similar_args = False | |
248 | -# if not similar_args and args > 1: | |
249 | -# spec_pos_up += 1 | |
250 | -# spec_args_up += args | |
251 | -# add_spec_frame_up = True | |
252 | -# if add_spec_frame_up: | |
253 | -# spec_frames_up += 1 | |
254 | - | |
255 | - outfile.write(u'Liczba argumentów: ' + str(arg_count) + '\n') | |
256 | - outfile.write(u'Liczba pozycji: ' + str(pos_count) + '\n') | |
257 | - outfile.write(u'Liczba haseł: ' + str(lemma_count) + '\n') | |
258 | - outfile.write('\n') | |
259 | - outfile.write(u'Liczba podhaseł: ' + str(sub_lemma_count) + '\n') | |
260 | - outfile.write(u'Liczba podhaseł postaci (ZWROTNOŚĆ, NEGATYWNOŚĆ, PREDYKATYWNOŚĆ, ASPEKT)\n') | |
261 | - outfile.write(u'Liczba podhaseł postaci ( , , pred, ): ' + str(pred_lemma) + '\n') | |
262 | - outfile.write(u'Liczba podhaseł postaci ( , , , ): ' + str(npred_lemma) + '\n') | |
263 | - outfile.write('\n') | |
264 | - outfile.write(u'Liczba ramek: ' + str(all_frames) + '\n') | |
265 | - outfile.write(u'Liczba ramek pewnych: ' + str(pewna_frames) + '\n') | |
266 | - outfile.write(u'Liczba ramek wątpliwych: ' + str(watpliwa_frames) + '\n') | |
267 | - outfile.write(u'Liczba ramek złych: ' + str(zla_frames) + '\n') | |
268 | - outfile.write(u'Liczba ramek archaicznych: ' + str(arch_frames) + '\n') | |
269 | - outfile.write(u'Liczba ramek potocznych: ' + str(potoczna_frames) + '\n') | |
270 | - outfile.write(u'Liczba ramek wulgarnych: ' + str(wulgarna_frames) + '\n') | |
271 | - outfile.write('\n') | |
272 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą argumentów: ' + str(spec_pos) + '\n') | |
273 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie argumentów: ' + str(spec_args) + '\n') | |
274 | - outfile.write(u'Liczba ramek z pozycjami wieloargumentowymi: ' + str(spec_frames) + '\n') | |
275 | - outfile.write('\n') | |
276 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą specyfikacji: ' + str(spec_pos_up) + '\n') | |
277 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie specyfikacji: ' + str(spec_args_up) + '\n') | |
278 | - outfile.write(u'Liczba ramek z pozycjami o większej niż jeden liczbie specyfikacji: ' + str(spec_frames_up) + '\n') | |
279 | - outfile.write(u'Liczba ramek frazeologicznych: ' + str(frames_with_phraseology) + '\n') | |
280 | - outfile.write('\n') | |
281 | - outfile.write(u'Hasła zawierające schematy frazeologiczne: ' + str(lemmas_with_phraseology) + '\n') | |
282 | - outfile.write(u'Hasła zawierające pozycje z koordynacją: ' + str(lemmas_with_coordination) + '\n') | |
283 | - | |
284 | - | |
285 | - | |
286 | - adjs_stats_dict = {'arg_count': arg_count, | |
287 | - 'pos_count': pos_count, | |
288 | - 'lemma_count': lemma_count, | |
289 | - | |
290 | - 'sub_lemma_count': sub_lemma_count, | |
291 | - | |
292 | - 'pred_lemma': pred_lemma, | |
293 | - 'npred_lemma': npred_lemma, | |
294 | - | |
295 | - 'all_frames': all_frames, | |
296 | - 'pewna_frames': pewna_frames, | |
297 | - 'watpliwa_frames': watpliwa_frames, | |
298 | - 'zla_frames': zla_frames, | |
299 | - 'arch_frames': arch_frames, | |
300 | - 'potoczna_frames': potoczna_frames, | |
301 | - 'wulgarna_frames': wulgarna_frames, | |
302 | - | |
303 | - 'spec_pos': spec_pos, | |
304 | - 'spec_args': spec_args, | |
305 | - 'spec_frames': spec_frames, | |
306 | - | |
307 | - 'spec_pos_up': spec_pos_up, | |
308 | - 'spec_args_up': spec_args_up, | |
309 | - 'spec_frames_up': spec_frames_up, | |
310 | - 'frames_with_phraseology': frames_with_phraseology, | |
311 | - | |
312 | - 'lemmas_with_phraseology': lemmas_with_phraseology, | |
313 | - 'lemmas_with_coordination': lemmas_with_coordination | |
314 | - } | |
315 | - | |
316 | - outfile.close() | |
317 | - return adjs_stats_dict | |
318 | - | |
319 | -def get_nouns_stats(stats_path, q_statuses): | |
320 | - print 'Be patient, it can take a while.' | |
321 | - outfile = codecs.open(stats_path, 'wt', 'utf-8') | |
322 | - arg_count = 0 | |
323 | - pos_count = 0 | |
324 | - lemma_count = 0 | |
325 | - | |
326 | -################## | |
327 | - pewna_frames = 0 | |
328 | - watpliwa_frames = 0 | |
329 | - zla_frames = 0 | |
330 | - arch_frames = 0 | |
331 | - potoczna_frames = 0 | |
332 | - wulgarna_frames = 0 | |
333 | - all_frames = 0 | |
334 | - | |
335 | - spec_frames = 0 | |
336 | - spec_pos = 0 | |
337 | - spec_args = 0 | |
338 | - | |
339 | - spec_frames_up = 0 | |
340 | - spec_pos_up = 0 | |
341 | - spec_args_up = 0 | |
342 | - | |
343 | - lemmas_with_phraseology = 0 | |
344 | - lemmas_with_coordination = 0 | |
345 | - | |
346 | - frames_with_phraseology = 0 | |
69 | + except: | |
70 | + outfile.close() | |
71 | + | |
72 | +def get_stats(statuses, pos): | |
73 | + stats_dict = Counter({u'phrases': 0, | |
74 | + u'poss': 0, | |
75 | + u'lemmas': 0, | |
76 | + u'sub_lemmas': 0, | |
77 | + u'schemata': 0, | |
78 | + u'cer_schemata': 0, | |
79 | + u'uncer_schemata': 0, | |
80 | + u'bad_schemata': 0, | |
81 | + u'arch_schemata': 0, | |
82 | + u'col_schemata': 0, | |
83 | + u'vul_schemata': 0, | |
84 | + u'coor_schemata': 0, | |
85 | + u'lex_schemata': 0, | |
86 | + u'coor_lemmas': 0, | |
87 | + u'lex_lemmas': 0}) | |
347 | 88 | |
348 | 89 | lemmas = Lemma.objects.filter(old=False, |
349 | - entry_obj__pos__tag='noun').filter(reduce(operator.or_, q_statuses)).distinct().order_by('entry').all() | |
350 | - for lemma in lemmas: | |
90 | + entry_obj__pos__tag=pos).filter(status__in=statuses).distinct() | |
91 | + for lemma in lemmas.order_by('entry').all(): | |
351 | 92 | print lemma |
352 | - lemma_count += 1 | |
353 | - | |
354 | - pewna_frames += lemma.frame_opinions.filter(value__value=u'pewny').count() | |
355 | - watpliwa_frames += lemma.frame_opinions.filter(value__value=u'wątpliwy').count() | |
356 | - zla_frames += lemma.frame_opinions.filter(value__value=u'zły').count() | |
357 | - arch_frames += lemma.frame_opinions.filter(value__value=u'archaiczny').count() | |
358 | - potoczna_frames += lemma.frame_opinions.filter(value__value=u'potoczny').count() | |
359 | - wulgarna_frames += lemma.frame_opinions.filter(value__value=u'wulgarny').count() | |
360 | - all_frames += lemma.frames.count() | |
361 | -# if (lemma.frames.count() != lemma.frame_opinions.filter(value__value=u'pewna').count() + | |
362 | -# lemma.frame_opinions.filter(value__value=u'wątpliwa').count() + | |
363 | -# lemma.frame_opinions.filter(value__value=u'zła').count() + | |
364 | -# lemma.frame_opinions.filter(value__value=u'archaiczna').count() + | |
365 | -# lemma.frame_opinions.filter(value__value=u'potoczna').count() + | |
366 | -# lemma.frame_opinions.filter(value__value=u'wulgarna').count()): | |
367 | -# print lemma | |
368 | -# print lemma.frame_opinions.filter(value__value=u'pewna').count() | |
369 | -# print lemma.frame_opinions.filter(value__value=u'wątpliwa').count() | |
370 | -# print lemma.frame_opinions.filter(value__value=u'zła').count() | |
371 | -# print lemma.frame_opinions.filter(value__value=u'archaiczna').count() | |
372 | -# print lemma.frame_opinions.filter(value__value=u'potoczna').count() | |
373 | -# print lemma.frame_opinions.filter(value__value=u'wulgarna').count() | |
374 | -## break | |
375 | - | |
376 | - if lemma.frames.count() != lemma.frame_opinions.count(): | |
377 | - print lemma.entry | |
378 | - | |
93 | + stats_dict[u'lemmas'] += 1 | |
94 | + stats_dict[u'cer_schemata'] += lemma.frame_opinions.filter(value__value=u'pewny').count() | |
95 | + stats_dict[u'uncer_schemata'] += lemma.frame_opinions.filter(value__value=u'wątpliwy').count() | |
96 | + stats_dict[u'bad_schemata'] += lemma.frame_opinions.filter(value__value=u'zły').count() | |
97 | + stats_dict[u'arch_schemata'] += lemma.frame_opinions.filter(value__value=u'archaiczny').count() | |
98 | + stats_dict[u'col_schemata'] += lemma.frame_opinions.filter(value__value=u'potoczny').count() | |
99 | + stats_dict[u'vul_schemata'] += lemma.frame_opinions.filter(value__value=u'wulgarny').count() | |
100 | + stats_dict[u'schemata'] += lemma.frames.count() | |
101 | + | |
102 | + stats_dict = stats_dict + Counter(get_sub_entries_dict(lemma)) | |
103 | + | |
379 | 104 | has_phraseology = False |
380 | 105 | has_coordination = False |
381 | 106 | for frame in lemma.frames.all(): |
382 | - phraseologic_frame = False | |
383 | - | |
384 | - pos_count += frame.positions.count() | |
107 | + stats_dict[u'poss'] += frame.positions.count() | |
385 | 108 | flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))['num_args__max'] |
386 | - | |
387 | 109 | if flat_frames > 1: |
388 | - spec_frames += 1 | |
110 | + stats_dict[u'coor_schemata'] += 1 | |
389 | 111 | has_coordination = True |
390 | - if frame.has_phraseologic_arguments(): | |
391 | - has_phraseology = True | |
392 | - phraseologic_frame = True | |
393 | - | |
394 | 112 | for pos in frame.positions.all(): |
395 | - args = pos.arguments.count() | |
396 | - arg_count += args | |
397 | -# for arg in pos.arguments.all(): | |
398 | -# if arg.type in PHRASEOLOGIC_TYPES: | |
399 | -# has_phraseology = True | |
400 | -# phraseologic_frame = True | |
401 | -# break | |
402 | - if phraseologic_frame: | |
403 | - frames_with_phraseology += 1 | |
404 | - | |
405 | - if has_phraseology: | |
406 | - lemmas_with_phraseology += 1 | |
407 | - if has_coordination: | |
408 | - lemmas_with_coordination += 1 | |
409 | - | |
410 | - outfile.write(u'Liczba argumentów: ' + str(arg_count) + '\n') | |
411 | - outfile.write(u'Liczba pozycji: ' + str(pos_count) + '\n') | |
412 | - outfile.write(u'Liczba haseł: ' + str(lemma_count) + '\n') | |
413 | - outfile.write('\n') | |
414 | - | |
415 | - outfile.write('\n') | |
416 | - outfile.write(u'Liczba ramek: ' + str(all_frames) + '\n') | |
417 | - outfile.write(u'Liczba ramek pewnych: ' + str(pewna_frames) + '\n') | |
418 | - outfile.write(u'Liczba ramek wątpliwych: ' + str(watpliwa_frames) + '\n') | |
419 | - outfile.write(u'Liczba ramek złych: ' + str(zla_frames) + '\n') | |
420 | - outfile.write(u'Liczba ramek archaicznych: ' + str(arch_frames) + '\n') | |
421 | - outfile.write(u'Liczba ramek potocznych: ' + str(potoczna_frames) + '\n') | |
422 | - outfile.write(u'Liczba ramek wulgarnych: ' + str(wulgarna_frames) + '\n') | |
423 | - outfile.write('\n') | |
424 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą argumentów: ' + str(spec_pos) + '\n') | |
425 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie argumentów: ' + str(spec_args) + '\n') | |
426 | - outfile.write(u'Liczba ramek z pozycjami wieloargumentowymi: ' + str(spec_frames) + '\n') | |
427 | - outfile.write('\n') | |
428 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą specyfikacji: ' + str(spec_pos_up) + '\n') | |
429 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie specyfikacji: ' + str(spec_args_up) + '\n') | |
430 | - outfile.write(u'Liczba ramek z pozycjami o większej niż jeden liczbie specyfikacji: ' + str(spec_frames_up) + '\n') | |
431 | - outfile.write(u'Liczba ramek frazeologicznych: ' + str(frames_with_phraseology) + '\n') | |
432 | - outfile.write('\n') | |
433 | - outfile.write(u'Hasła zawierające schematy frazeologiczne: ' + str(lemmas_with_phraseology) + '\n') | |
434 | - outfile.write(u'Hasła zawierające pozycje z koordynacją: ' + str(lemmas_with_coordination) + '\n') | |
435 | - | |
436 | - noun_stats_dict = {'arg_count': arg_count, | |
437 | - 'pos_count': pos_count, | |
438 | - 'lemma_count': lemma_count, | |
439 | - | |
440 | - 'sub_lemma_count': lemma_count, | |
441 | - | |
442 | - 'npred_lemma': lemma_count, | |
443 | - | |
444 | - 'all_frames': all_frames, | |
445 | - 'pewna_frames': pewna_frames, | |
446 | - 'watpliwa_frames': watpliwa_frames, | |
447 | - 'zla_frames': zla_frames, | |
448 | - 'arch_frames': arch_frames, | |
449 | - 'potoczna_frames': potoczna_frames, | |
450 | - 'wulgarna_frames': wulgarna_frames, | |
451 | - | |
452 | - 'spec_pos': spec_pos, | |
453 | - 'spec_args': spec_args, | |
454 | - 'spec_frames': spec_frames, | |
455 | - | |
456 | - 'spec_pos_up': spec_pos_up, | |
457 | - 'spec_args_up': spec_args_up, | |
458 | - 'spec_frames_up': spec_frames_up, | |
459 | - 'frames_with_phraseology': frames_with_phraseology, | |
460 | - | |
461 | - 'lemmas_with_phraseology': lemmas_with_phraseology, | |
462 | - 'lemmas_with_coordination': lemmas_with_coordination | |
463 | - } | |
464 | - | |
465 | - outfile.close() | |
466 | - return noun_stats_dict | |
467 | - | |
468 | -def get_advs_stats(stats_path, q_statuses): | |
469 | - print 'Be patient, it can take a while.' | |
470 | - outfile = codecs.open(stats_path, 'wt', 'utf-8') | |
471 | - arg_count = 0 | |
472 | - pos_count = 0 | |
473 | - lemma_count = 0 | |
474 | - | |
475 | -################## | |
476 | - pewna_frames = 0 | |
477 | - watpliwa_frames = 0 | |
478 | - zla_frames = 0 | |
479 | - arch_frames = 0 | |
480 | - potoczna_frames = 0 | |
481 | - wulgarna_frames = 0 | |
482 | - all_frames = 0 | |
483 | - | |
484 | - spec_frames = 0 | |
485 | - spec_pos = 0 | |
486 | - spec_args = 0 | |
487 | - | |
488 | - spec_frames_up = 0 | |
489 | - spec_pos_up = 0 | |
490 | - spec_args_up = 0 | |
491 | - | |
492 | - lemmas_with_phraseology = 0 | |
493 | - lemmas_with_coordination = 0 | |
494 | - | |
495 | - frames_with_phraseology = 0 | |
496 | - | |
497 | - lemmas = Lemma.objects.filter(old=False, | |
498 | - entry_obj__pos__tag='adv').filter(reduce(operator.or_, q_statuses)).distinct().order_by('entry').all() | |
499 | - for lemma in lemmas: | |
500 | - print lemma | |
501 | - lemma_count += 1 | |
502 | - | |
503 | - pewna_frames += lemma.frame_opinions.filter(value__value=u'pewny').count() | |
504 | - watpliwa_frames += lemma.frame_opinions.filter(value__value=u'wątpliwy').count() | |
505 | - zla_frames += lemma.frame_opinions.filter(value__value=u'zły').count() | |
506 | - arch_frames += lemma.frame_opinions.filter(value__value=u'archaiczny').count() | |
507 | - potoczna_frames += lemma.frame_opinions.filter(value__value=u'potoczny').count() | |
508 | - wulgarna_frames += lemma.frame_opinions.filter(value__value=u'wulgarny').count() | |
509 | - all_frames += lemma.frames.count() | |
510 | -# if (lemma.frames.count() != lemma.frame_opinions.filter(value__value=u'pewna').count() + | |
511 | -# lemma.frame_opinions.filter(value__value=u'wątpliwa').count() + | |
512 | -# lemma.frame_opinions.filter(value__value=u'zła').count() + | |
513 | -# lemma.frame_opinions.filter(value__value=u'archaiczna').count() + | |
514 | -# lemma.frame_opinions.filter(value__value=u'potoczna').count() + | |
515 | -# lemma.frame_opinions.filter(value__value=u'wulgarna').count()): | |
516 | -# print lemma | |
517 | -# print lemma.frame_opinions.filter(value__value=u'pewna').count() | |
518 | -# print lemma.frame_opinions.filter(value__value=u'wątpliwa').count() | |
519 | -# print lemma.frame_opinions.filter(value__value=u'zła').count() | |
520 | -# print lemma.frame_opinions.filter(value__value=u'archaiczna').count() | |
521 | -# print lemma.frame_opinions.filter(value__value=u'potoczna').count() | |
522 | -# print lemma.frame_opinions.filter(value__value=u'wulgarna').count() | |
523 | -## break | |
524 | - | |
525 | - if lemma.frames.count() != lemma.frame_opinions.count(): | |
526 | - print lemma.entry | |
527 | - | |
528 | - has_phraseology = False | |
529 | - has_coordination = False | |
530 | - for frame in lemma.frames.all(): | |
531 | - phraseologic_frame = False | |
532 | - | |
533 | - pos_count += frame.positions.count() | |
534 | - flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))['num_args__max'] | |
535 | - | |
536 | - if flat_frames > 1: | |
537 | - spec_frames += 1 | |
538 | - has_coordination = True | |
539 | - if frame.has_phraseologic_arguments(): | |
113 | + stats_dict[u'phrases'] += pos.arguments.count() | |
114 | + if frame.positions.filter(arguments__type__in=LEX_TYPES).exists(): | |
115 | + stats_dict[u'lex_schemata'] += 1 | |
540 | 116 | has_phraseology = True |
541 | - phraseologic_frame = True | |
542 | - | |
543 | - for pos in frame.positions.all(): | |
544 | - args = pos.arguments.count() | |
545 | - arg_count += args | |
546 | -# for arg in pos.arguments.all(): | |
547 | -# if arg.type in PHRASEOLOGIC_TYPES: | |
548 | -# has_phraseology = True | |
549 | -# phraseologic_frame = True | |
550 | -# break | |
551 | - if phraseologic_frame: | |
552 | - frames_with_phraseology += 1 | |
553 | 117 | |
554 | 118 | if has_phraseology: |
555 | - lemmas_with_phraseology += 1 | |
119 | + stats_dict[u'lex_lemmas'] += 1 | |
556 | 120 | if has_coordination: |
557 | - lemmas_with_coordination += 1 | |
558 | - | |
559 | - outfile.write(u'Liczba argumentów: ' + str(arg_count) + '\n') | |
560 | - outfile.write(u'Liczba pozycji: ' + str(pos_count) + '\n') | |
561 | - outfile.write(u'Liczba haseł: ' + str(lemma_count) + '\n') | |
562 | - outfile.write('\n') | |
563 | - | |
564 | - outfile.write('\n') | |
565 | - outfile.write(u'Liczba ramek: ' + str(all_frames) + '\n') | |
566 | - outfile.write(u'Liczba ramek pewnych: ' + str(pewna_frames) + '\n') | |
567 | - outfile.write(u'Liczba ramek wątpliwych: ' + str(watpliwa_frames) + '\n') | |
568 | - outfile.write(u'Liczba ramek złych: ' + str(zla_frames) + '\n') | |
569 | - outfile.write(u'Liczba ramek archaicznych: ' + str(arch_frames) + '\n') | |
570 | - outfile.write(u'Liczba ramek potocznych: ' + str(potoczna_frames) + '\n') | |
571 | - outfile.write(u'Liczba ramek wulgarnych: ' + str(wulgarna_frames) + '\n') | |
572 | - outfile.write('\n') | |
573 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą argumentów: ' + str(spec_pos) + '\n') | |
574 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie argumentów: ' + str(spec_args) + '\n') | |
575 | - outfile.write(u'Liczba ramek z pozycjami wieloargumentowymi: ' + str(spec_frames) + '\n') | |
576 | - outfile.write('\n') | |
577 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą specyfikacji: ' + str(spec_pos_up) + '\n') | |
578 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie specyfikacji: ' + str(spec_args_up) + '\n') | |
579 | - outfile.write(u'Liczba ramek z pozycjami o większej niż jeden liczbie specyfikacji: ' + str(spec_frames_up) + '\n') | |
580 | - outfile.write(u'Liczba ramek frazeologicznych: ' + str(frames_with_phraseology) + '\n') | |
581 | - outfile.write('\n') | |
582 | - outfile.write(u'Hasła zawierające schematy frazeologiczne: ' + str(lemmas_with_phraseology) + '\n') | |
583 | - outfile.write(u'Hasła zawierające pozycje z koordynacją: ' + str(lemmas_with_coordination) + '\n') | |
584 | - | |
585 | - advs_stats_dict = {'arg_count': arg_count, | |
586 | - 'pos_count': pos_count, | |
587 | - 'lemma_count': lemma_count, | |
588 | - | |
589 | - 'sub_lemma_count': lemma_count, | |
590 | - | |
591 | - 'npred_lemma': lemma_count, | |
592 | - | |
593 | - 'all_frames': all_frames, | |
594 | - 'pewna_frames': pewna_frames, | |
595 | - 'watpliwa_frames': watpliwa_frames, | |
596 | - 'zla_frames': zla_frames, | |
597 | - 'arch_frames': arch_frames, | |
598 | - 'potoczna_frames': potoczna_frames, | |
599 | - 'wulgarna_frames': wulgarna_frames, | |
600 | - | |
601 | - 'spec_pos': spec_pos, | |
602 | - 'spec_args': spec_args, | |
603 | - 'spec_frames': spec_frames, | |
604 | - | |
605 | - 'spec_pos_up': spec_pos_up, | |
606 | - 'spec_args_up': spec_args_up, | |
607 | - 'spec_frames_up': spec_frames_up, | |
608 | - 'frames_with_phraseology': frames_with_phraseology, | |
609 | - | |
610 | - 'lemmas_with_phraseology': lemmas_with_phraseology, | |
611 | - 'lemmas_with_coordination': lemmas_with_coordination | |
612 | - } | |
613 | - | |
614 | - outfile.close() | |
615 | - return advs_stats_dict | |
616 | - | |
617 | -def get_verb_stats(stats_path, q_statuses): | |
618 | - print 'Be patient, it can take a while.' | |
619 | - outfile = codecs.open(stats_path, 'wt', 'utf-8') | |
620 | - arg_count = 0 | |
621 | - pos_count = 0 | |
622 | - lemma_count = 0 | |
623 | -##################3 | |
624 | - sub_lemma_count = 0 | |
625 | - | |
626 | - sub_sie_imperf_oboj = 0 | |
627 | - sub_sie_perf_oboj = 0 | |
628 | - sub_sie_oboj_oboj = 0 | |
629 | - sub_imperf_oboj = 0 | |
630 | - sub_perf_oboj = 0 | |
631 | - sub_oboj_oboj = 0 | |
632 | - | |
633 | - sub_sie_imperf_neg = 0 | |
634 | - sub_sie_perf_neg = 0 | |
635 | - sub_sie_oboj_neg = 0 | |
636 | - sub_imperf_neg = 0 | |
637 | - sub_perf_neg = 0 | |
638 | - sub_oboj_neg = 0 | |
639 | - | |
640 | - sub_sie_imperf_aff = 0 | |
641 | - sub_sie_perf_aff = 0 | |
642 | - sub_sie_oboj_aff = 0 | |
643 | - sub_imperf_aff = 0 | |
644 | - sub_perf_aff = 0 | |
645 | - sub_oboj_aff = 0 | |
646 | - | |
647 | - sie_lemma = 0 | |
648 | - nsie_lemma = 0 | |
649 | -################## | |
650 | - pewna_frames = 0 | |
651 | - watpliwa_frames = 0 | |
652 | - zla_frames = 0 | |
653 | - arch_frames = 0 | |
654 | - potoczna_frames = 0 | |
655 | - wulgarna_frames = 0 | |
656 | - all_frames = 0 | |
657 | - | |
658 | - spec_frames = 0 | |
659 | - spec_pos = 0 | |
660 | - spec_args = 0 | |
661 | - | |
662 | - spec_frames_up = 0 | |
663 | - spec_pos_up = 0 | |
664 | - spec_args_up = 0 | |
665 | - | |
666 | - lemmas_with_phraseology = 0 | |
667 | - lemmas_with_coordination = 0 | |
668 | - | |
669 | - frames_with_phraseology = 0 | |
670 | - | |
671 | - lemmas = Lemma.objects.filter(old=False, | |
672 | - entry_obj__pos__tag='verb').filter(reduce(operator.or_, q_statuses)).distinct().order_by('entry').all() | |
673 | - for lemma in lemmas: | |
674 | - # jak chcemy bez zapłaconych | |
675 | -# if RealizedLemma.objects.filter(Q(status__status=u'sprawdzone') | | |
676 | -# Q(status__status=u'tymczasowy')).filter(lemma__entry=lemma.entry, | |
677 | -# paid=False).exists(): | |
678 | -# pass | |
679 | -# else: | |
680 | -# continue | |
681 | - print lemma | |
682 | - lemma_count += 1 | |
683 | - | |
684 | - pewna_frames += lemma.frame_opinions.filter(value__value=u'pewny').count() | |
685 | - watpliwa_frames += lemma.frame_opinions.filter(value__value=u'wątpliwy').count() | |
686 | - zla_frames += lemma.frame_opinions.filter(value__value=u'zły').count() | |
687 | - arch_frames += lemma.frame_opinions.filter(value__value=u'archaiczny').count() | |
688 | - potoczna_frames += lemma.frame_opinions.filter(value__value=u'potoczny').count() | |
689 | - wulgarna_frames += lemma.frame_opinions.filter(value__value=u'wulgarny').count() | |
690 | - all_frames += lemma.frames.count() | |
691 | -# if (lemma.frames.count() != lemma.frame_opinions.filter(value__value=u'pewna').count() + | |
692 | -# lemma.frame_opinions.filter(value__value=u'wątpliwa').count() + | |
693 | -# lemma.frame_opinions.filter(value__value=u'zła').count() + | |
694 | -# lemma.frame_opinions.filter(value__value=u'archaiczna').count() + | |
695 | -# lemma.frame_opinions.filter(value__value=u'potoczna').count() + | |
696 | -# lemma.frame_opinions.filter(value__value=u'wulgarna').count()): | |
697 | -# print lemma | |
698 | -# print lemma.frame_opinions.filter(value__value=u'pewna').count() | |
699 | -# print lemma.frame_opinions.filter(value__value=u'wątpliwa').count() | |
700 | -# print lemma.frame_opinions.filter(value__value=u'zła').count() | |
701 | -# print lemma.frame_opinions.filter(value__value=u'archaiczna').count() | |
702 | -# print lemma.frame_opinions.filter(value__value=u'potoczna').count() | |
703 | -# print lemma.frame_opinions.filter(value__value=u'wulgarna').count() | |
704 | -## break | |
705 | - | |
706 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | |
707 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
708 | - characteristics__value__value=u'imperf').filter( | |
709 | - characteristics__type=u'NEGATYWNOŚĆ', | |
710 | - characteristics__value__value=u'_') | |
711 | - if sub_frame.count() > 0: | |
712 | - sub_sie_imperf_oboj += 1 | |
713 | - sub_lemma_count += 1 | |
714 | - | |
715 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | |
716 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
717 | - characteristics__value__value=u'perf').filter( | |
718 | - characteristics__type=u'NEGATYWNOŚĆ', | |
719 | - characteristics__value__value=u'_') | |
720 | - if sub_frame.count() > 0: | |
721 | - sub_sie_perf_oboj += 1 | |
722 | - sub_lemma_count += 1 | |
723 | - | |
724 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | |
725 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
726 | - characteristics__value__value=u'_').filter( | |
727 | - characteristics__type=u'NEGATYWNOŚĆ', | |
728 | - characteristics__value__value=u'_') | |
729 | - if sub_frame.count() > 0: | |
730 | - sub_sie_oboj_oboj += 1 | |
731 | - sub_lemma_count += 1 | |
732 | - | |
733 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | |
734 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
735 | - characteristics__value__value=u'imperf').filter( | |
736 | - characteristics__type=u'NEGATYWNOŚĆ', | |
737 | - characteristics__value__value=u'_') | |
738 | - if sub_frame.count() > 0: | |
739 | - sub_imperf_oboj += 1 | |
740 | - sub_lemma_count += 1 | |
741 | - | |
742 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | |
743 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
744 | - characteristics__value__value=u'perf').filter( | |
745 | - characteristics__type=u'NEGATYWNOŚĆ', | |
746 | - characteristics__value__value=u'_') | |
747 | - if sub_frame.count() > 0: | |
748 | - sub_perf_oboj += 1 | |
749 | - sub_lemma_count += 1 | |
750 | - | |
751 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | |
752 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
753 | - characteristics__value__value=u'_').filter( | |
754 | - characteristics__type=u'NEGATYWNOŚĆ', | |
755 | - characteristics__value__value=u'_') | |
756 | - if sub_frame.count() > 0: | |
757 | - sub_oboj_oboj += 1 | |
758 | - sub_lemma_count += 1 | |
759 | - | |
760 | -################################### neg ######################################### | |
761 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | |
762 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
763 | - characteristics__value__value=u'imperf').filter( | |
764 | - characteristics__type=u'NEGATYWNOŚĆ', | |
765 | - characteristics__value__value=u'neg') | |
766 | - if sub_frame.count() > 0: | |
767 | - sub_sie_imperf_neg += 1 | |
768 | - sub_lemma_count += 1 | |
769 | - | |
770 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | |
771 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
772 | - characteristics__value__value=u'perf').filter( | |
773 | - characteristics__type=u'NEGATYWNOŚĆ', | |
774 | - characteristics__value__value=u'neg') | |
775 | - if sub_frame.count() > 0: | |
776 | - sub_sie_perf_neg += 1 | |
777 | - sub_lemma_count += 1 | |
778 | - | |
779 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | |
780 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
781 | - characteristics__value__value=u'_').filter( | |
782 | - characteristics__type=u'NEGATYWNOŚĆ', | |
783 | - characteristics__value__value=u'neg') | |
784 | - if sub_frame.count() > 0: | |
785 | - sub_sie_oboj_neg += 1 | |
786 | - sub_lemma_count += 1 | |
787 | - | |
788 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | |
789 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
790 | - characteristics__value__value=u'imperf').filter( | |
791 | - characteristics__type=u'NEGATYWNOŚĆ', | |
792 | - characteristics__value__value=u'neg') | |
793 | - if sub_frame.count() > 0: | |
794 | - sub_imperf_neg += 1 | |
795 | - sub_lemma_count += 1 | |
796 | - | |
797 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | |
798 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
799 | - characteristics__value__value=u'perf').filter( | |
800 | - characteristics__type=u'NEGATYWNOŚĆ', | |
801 | - characteristics__value__value=u'neg') | |
802 | - if sub_frame.count() > 0: | |
803 | - sub_perf_neg += 1 | |
804 | - sub_lemma_count += 1 | |
805 | - | |
806 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | |
807 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
808 | - characteristics__value__value=u'_').filter( | |
809 | - characteristics__type=u'NEGATYWNOŚĆ', | |
810 | - characteristics__value__value=u'neg') | |
811 | - if sub_frame.count() > 0: | |
812 | - sub_oboj_neg += 1 | |
813 | - sub_lemma_count += 1 | |
814 | - | |
815 | -#################################### aff ######################################## | |
816 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | |
817 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
818 | - characteristics__value__value=u'imperf').filter( | |
819 | - characteristics__type=u'NEGATYWNOŚĆ', | |
820 | - characteristics__value__value=u'aff') | |
821 | - if sub_frame.count() > 0: | |
822 | - sub_sie_imperf_aff += 1 | |
823 | - sub_lemma_count += 1 | |
824 | - | |
825 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | |
826 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
827 | - characteristics__value__value=u'perf').filter( | |
828 | - characteristics__type=u'NEGATYWNOŚĆ', | |
829 | - characteristics__value__value=u'aff') | |
830 | - if sub_frame.count() > 0: | |
831 | - sub_sie_perf_aff += 1 | |
832 | - sub_lemma_count += 1 | |
833 | - | |
834 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | |
835 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
836 | - characteristics__value__value=u'_').filter( | |
837 | - characteristics__type=u'NEGATYWNOŚĆ', | |
838 | - characteristics__value__value=u'aff') | |
839 | - if sub_frame.count() > 0: | |
840 | - sub_sie_oboj_aff += 1 | |
841 | - sub_lemma_count += 1 | |
842 | - | |
843 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | |
844 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
845 | - characteristics__value__value=u'imperf').filter( | |
846 | - characteristics__type=u'NEGATYWNOŚĆ', | |
847 | - characteristics__value__value=u'aff') | |
848 | - if sub_frame.count() > 0: | |
849 | - sub_imperf_aff += 1 | |
850 | - sub_lemma_count += 1 | |
851 | - | |
852 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | |
853 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
854 | - characteristics__value__value=u'perf').filter( | |
855 | - characteristics__type=u'NEGATYWNOŚĆ', | |
856 | - characteristics__value__value=u'aff') | |
857 | - if sub_frame.count() > 0: | |
858 | - sub_perf_aff += 1 | |
859 | - sub_lemma_count += 1 | |
860 | - | |
861 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | |
862 | - characteristics__type=u'ZWROTNOŚĆ').filter(characteristics__type=u'ASPEKT', | |
863 | - characteristics__value__value=u'_').filter( | |
864 | - characteristics__type=u'NEGATYWNOŚĆ', | |
865 | - characteristics__value__value=u'aff') | |
866 | - if sub_frame.count() > 0: | |
867 | - sub_oboj_aff += 1 | |
868 | - sub_lemma_count += 1 | |
869 | -################################################################################ | |
870 | - | |
871 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'się', | |
872 | - characteristics__type=u'ZWROTNOŚĆ') | |
873 | - if sub_frame.count() > 0: | |
874 | - sie_lemma += 1 | |
875 | - | |
876 | - sub_frame = lemma.frames.filter(characteristics__value__value=u'', | |
877 | - characteristics__type=u'ZWROTNOŚĆ') | |
878 | - if sub_frame.count() > 0: | |
879 | - nsie_lemma += 1 | |
880 | - | |
881 | - | |
882 | -################################################################################# | |
883 | - | |
884 | - if lemma.frames.count() != lemma.frame_opinions.count(): | |
885 | - print lemma.entry | |
886 | - | |
887 | - has_phraseology = False | |
888 | - has_coordination = False | |
889 | - for frame in lemma.frames.all(): | |
890 | - phraseologic_frame = False | |
891 | - | |
892 | - pos_count += frame.positions.count() | |
893 | - flat_frames = frame.positions.annotate(num_args=Count('arguments')).aggregate(Max('num_args'))['num_args__max'] | |
894 | - | |
895 | - if flat_frames > 1: | |
896 | - spec_frames += 1 | |
897 | - has_coordination = True | |
898 | - if frame.has_phraseologic_arguments(): | |
899 | - has_phraseology = True | |
900 | - phraseologic_frame = True | |
901 | - | |
902 | - for pos in frame.positions.all(): | |
903 | - args = pos.arguments.count() | |
904 | - arg_count += args | |
905 | -# for arg in pos.arguments.all(): | |
906 | -# if arg.type in PHRASEOLOGIC_TYPES: | |
907 | -# has_phraseology = True | |
908 | -# phraseologic_frame = True | |
909 | -# break | |
910 | - if phraseologic_frame: | |
911 | - frames_with_phraseology += 1 | |
121 | + stats_dict[u'coor_lemmas'] += 1 | |
122 | + | |
123 | + return stats_dict | |
124 | + | |
125 | +def get_sub_entries_dict(lemma): | |
126 | + sub_entries_dict = {} | |
127 | + frame_chars_dict = sorted_frame_char_values_dict() | |
128 | + for reflex in frame_chars_dict['sorted_reflex_vals']: | |
129 | + for neg in frame_chars_dict['sorted_neg_vals']: | |
130 | + for pred in frame_chars_dict['sorted_pred_vals']: | |
131 | + for aspect in frame_chars_dict['sorted_aspect_vals']: | |
132 | + matching_frames = lemma.get_frames_by_char_values(reflex_val=reflex, | |
133 | + neg_val=neg, | |
134 | + pred_val=pred, | |
135 | + aspect_val=aspect) | |
136 | + if matching_frames.exists(): | |
137 | + if not u'sub_lemmas' in sub_entries_dict: | |
138 | + sub_entries_dict[u'sub_lemmas'] = 0 | |
139 | + sub_entries_dict[u'sub_lemmas'] += 1 | |
912 | 140 | |
913 | - if has_phraseology: | |
914 | - lemmas_with_phraseology += 1 | |
915 | - if has_coordination: | |
916 | - lemmas_with_coordination += 1 | |
917 | - | |
918 | - outfile.write(u'Liczba argumentów: ' + str(arg_count) + '\n') | |
919 | - outfile.write(u'Liczba pozycji: ' + str(pos_count) + '\n') | |
920 | - outfile.write(u'Liczba haseł: ' + str(lemma_count) + '\n') | |
921 | - outfile.write('\n') | |
922 | - outfile.write(u'Liczba podhaseł: ' + str(sub_lemma_count) + '\n') | |
923 | - | |
924 | - outfile.write(u'Liczba podhaseł postaci (ZWROTNOŚĆ, NEGATYWNOŚĆ, PREDYKATYWNOŚĆ, ASPEKT)\n') | |
925 | - | |
926 | - outfile.write(u'Liczba podhaseł postaci (się, _, , imperf): ' + str(sub_sie_imperf_oboj) + '\n') | |
927 | - outfile.write(u'Liczba podhaseł postaci (się, _, , perf): ' + str(sub_sie_perf_oboj) + '\n') | |
928 | - outfile.write(u'Liczba podhaseł postaci (się, _, , _): ' + str(sub_sie_oboj_oboj) + '\n') | |
929 | - outfile.write(u'Liczba podhaseł postaci ( , _, , imperf): ' + str(sub_imperf_oboj) + '\n') | |
930 | - outfile.write(u'Liczba podhaseł postaci ( , _, , perf): ' + str(sub_perf_oboj) + '\n') | |
931 | - outfile.write(u'Liczba podhaseł postaci ( , _, , _): ' + str(sub_oboj_oboj) + '\n') | |
932 | - | |
933 | - outfile.write(u'Liczba podhaseł postaci (się, neg, , imperf): ' + str(sub_sie_imperf_neg) + '\n') | |
934 | - outfile.write(u'Liczba podhaseł postaci (się, neg, , perf): ' + str(sub_sie_perf_neg) + '\n') | |
935 | - outfile.write(u'Liczba podhaseł postaci (się, neg, , _): ' + str(sub_sie_oboj_neg) + '\n') | |
936 | - outfile.write(u'Liczba podhaseł postaci ( , neg, , imperf): ' + str(sub_imperf_neg) + '\n') | |
937 | - outfile.write(u'Liczba podhaseł postaci ( , neg, , perf): ' + str(sub_perf_neg) + '\n') | |
938 | - outfile.write(u'Liczba podhaseł postaci ( , neg, , _): ' + str(sub_oboj_neg) + '\n') | |
939 | - | |
940 | - outfile.write(u'Liczba podhaseł postaci (się, aff, , imperf): ' + str(sub_sie_imperf_aff) + '\n') | |
941 | - outfile.write(u'Liczba podhaseł postaci (się, aff, , perf): ' + str(sub_sie_perf_aff) + '\n') | |
942 | - outfile.write(u'Liczba podhaseł postaci (się, aff, , _): ' + str(sub_sie_oboj_aff) + '\n') | |
943 | - outfile.write(u'Liczba podhaseł postaci ( , aff, , imperf): ' + str(sub_imperf_aff) + '\n') | |
944 | - outfile.write(u'Liczba podhaseł postaci ( , aff, , perf): ' + str(sub_perf_aff) + '\n') | |
945 | - outfile.write(u'Liczba podhaseł postaci ( , aff, , _): ' + str(sub_oboj_aff) + '\n') | |
946 | - | |
947 | - outfile.write('\n') | |
948 | - outfile.write(u'Liczba ramek: ' + str(all_frames) + '\n') | |
949 | - outfile.write(u'Liczba ramek pewnych: ' + str(pewna_frames) + '\n') | |
950 | - outfile.write(u'Liczba ramek wątpliwych: ' + str(watpliwa_frames) + '\n') | |
951 | - outfile.write(u'Liczba ramek złych: ' + str(zla_frames) + '\n') | |
952 | - outfile.write(u'Liczba ramek archaicznych: ' + str(arch_frames) + '\n') | |
953 | - outfile.write(u'Liczba ramek potocznych: ' + str(potoczna_frames) + '\n') | |
954 | - outfile.write(u'Liczba ramek wulgarnych: ' + str(wulgarna_frames) + '\n') | |
955 | - outfile.write('\n') | |
956 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą argumentów: ' + str(spec_pos) + '\n') | |
957 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie argumentów: ' + str(spec_args) + '\n') | |
958 | - outfile.write(u'Liczba ramek z pozycjami wieloargumentowymi: ' + str(spec_frames) + '\n') | |
959 | - outfile.write('\n') | |
960 | - outfile.write(u'Liczba pozycji z większą niż jeden liczbą specyfikacji: ' + str(spec_pos_up) + '\n') | |
961 | - outfile.write(u'Liczba argumentów występujących w pozycjach o większej niż jeden liczbie specyfikacji: ' + str(spec_args_up) + '\n') | |
962 | - outfile.write(u'Liczba ramek z pozycjami o większej niż jeden liczbie specyfikacji: ' + str(spec_frames_up) + '\n') | |
963 | - outfile.write(u'Liczba ramek frazeologicznych: ' + str(frames_with_phraseology) + '\n') | |
964 | - outfile.write('\n') | |
965 | - outfile.write(u'Hasła zawierające schematy frazeologiczne: ' + str(lemmas_with_phraseology) + '\n') | |
966 | - outfile.write(u'Hasła zawierające pozycje z koordynacją: ' + str(lemmas_with_coordination) + '\n') | |
967 | - | |
968 | - | |
969 | - verb_stats_dict = {'arg_count': arg_count, | |
970 | - 'pos_count': pos_count, | |
971 | - 'lemma_count': lemma_count, | |
972 | - | |
973 | - 'sub_lemma_count': sub_lemma_count, | |
974 | - | |
975 | - 'sub_sie_imperf_oboj': sub_sie_imperf_oboj, | |
976 | - 'sub_sie_perf_oboj': sub_sie_perf_oboj, | |
977 | - 'sub_sie_oboj_oboj': sub_sie_oboj_oboj, | |
978 | - 'sub_imperf_oboj': sub_imperf_oboj, | |
979 | - 'sub_perf_oboj': sub_perf_oboj, | |
980 | - 'sub_oboj_oboj': sub_oboj_oboj, | |
981 | - | |
982 | - 'sub_sie_imperf_neg': sub_sie_imperf_neg, | |
983 | - 'sub_sie_perf_neg': sub_sie_perf_neg, | |
984 | - 'sub_sie_oboj_neg': sub_sie_oboj_neg, | |
985 | - 'sub_imperf_neg': sub_imperf_neg, | |
986 | - 'sub_perf_neg': sub_perf_neg, | |
987 | - 'sub_oboj_neg': sub_oboj_neg, | |
988 | - | |
989 | - 'sub_sie_imperf_aff': sub_sie_imperf_aff, | |
990 | - 'sub_sie_perf_aff': sub_sie_perf_aff, | |
991 | - 'sub_sie_oboj_aff': sub_sie_oboj_aff, | |
992 | - 'sub_imperf_aff': sub_imperf_aff, | |
993 | - 'sub_perf_aff': sub_perf_aff, | |
994 | - 'sub_oboj_aff': sub_oboj_aff, | |
995 | - | |
996 | - 'all_frames': all_frames, | |
997 | - 'pewna_frames': pewna_frames, | |
998 | - 'watpliwa_frames': watpliwa_frames, | |
999 | - 'zla_frames': zla_frames, | |
1000 | - 'arch_frames': arch_frames, | |
1001 | - 'potoczna_frames': potoczna_frames, | |
1002 | - 'wulgarna_frames': wulgarna_frames, | |
1003 | - | |
1004 | - 'spec_pos': spec_pos, | |
1005 | - 'spec_args': spec_args, | |
1006 | - 'spec_frames': spec_frames, | |
1007 | - | |
1008 | - 'spec_pos_up': spec_pos_up, | |
1009 | - 'spec_args_up': spec_args_up, | |
1010 | - 'spec_frames_up': spec_frames_up, | |
1011 | - 'frames_with_phraseology': frames_with_phraseology, | |
1012 | - | |
1013 | - 'lemmas_with_phraseology': lemmas_with_phraseology, | |
1014 | - 'lemmas_with_coordination': lemmas_with_coordination | |
1015 | - } | |
1016 | - | |
1017 | - outfile.close() | |
1018 | - return verb_stats_dict | |
1019 | - | |
1020 | 141 | \ No newline at end of file |
142 | + subentry_key = u'Liczba podhaseł postaci: (%s,%s,%s,%s)' % (reflex.value, neg.value, | |
143 | + pred.value, aspect.value) | |
144 | + if not subentry_key in sub_entries_dict: | |
145 | + sub_entries_dict[subentry_key] = 0 | |
146 | + sub_entries_dict[subentry_key] += 1 | |
147 | + return sub_entries_dict | |
... | ... |
dictionary/models.py
... | ... | @@ -107,6 +107,14 @@ class Lemma_Status(Model): |
107 | 107 | ('see_stats', u'Może oglądać swoje statystyki.'), |
108 | 108 | ('see_all_stats', u'Może oglądać statystyki wszystkich.'), |
109 | 109 | ) |
110 | + | |
111 | +def get_checked_statuses(): | |
112 | + checked_type = LemmaStatusType.objects.get(sym_name='checked') | |
113 | + return Lemma_Status.objects.filter(type__priority__gte=checked_type.priority).distinct() | |
114 | + | |
115 | +def get_ready_statuses(): | |
116 | + ready_type = LemmaStatusType.objects.get(sym_name='ready') | |
117 | + return Lemma_Status.objects.filter(type__priority__gte=ready_type.priority).distinct() | |
110 | 118 | |
111 | 119 | |
112 | 120 | class LemmaStatusType(Model): |
... | ... |