Commit e72757a4579563f2eb24ab4b0ec6faf8477696f0
1 parent
76f54edc
Added minimum status and start date options to create_TEI_walenty script.
Showing
3 changed files
with
389 additions
and
9 deletions
accounts/management/commands/get_payments_data.py
0 → 100644
1 | +# -*- coding:utf-8 -*- | |
2 | + | |
3 | +import codecs | |
4 | +import datetime | |
5 | + | |
6 | +from django.contrib.auth.models import User | |
7 | +from django.core.management.base import BaseCommand | |
8 | +from django.db.models import Sum | |
9 | + | |
10 | +from accounts.models import RealizedLemma, RealizedPhraseology, RealizedPhraseologyBinding, \ | |
11 | + RealizedSemantics | |
12 | +from dictionary.ajax_user_stats import get_used_bindings | |
13 | +from dictionary.models import Lemma | |
14 | + | |
15 | + | |
16 | +USERNAME = 'JakubS' | |
17 | +FUNCTION = 'Leksykograf' | |
18 | +POS = 'noun' | |
19 | +STARTDATE = datetime.datetime(2011, 1, 1, 00, 00) | |
20 | +ENDDATE = (datetime.datetime.now() - | |
21 | + datetime.timedelta(days=1)).replace(hour=23, minute=59, second=59) | |
22 | + | |
23 | + | |
24 | +class Command(BaseCommand): | |
25 | + args = 'none' | |
26 | + | |
27 | + def handle(self, **options): | |
28 | + get_payments_data(FUNCTION) | |
29 | + | |
30 | + | |
31 | +def get_payments_data(function): | |
32 | + start = STARTDATE.strftime('%Y%m%d') | |
33 | + end = ENDDATE.strftime('%Y%m%d') | |
34 | + payments_path = 'data/work_%s_%s_%s-%s.csv' % (USERNAME, FUNCTION, start, end) | |
35 | + payments_file = codecs.open(payments_path, 'wt', 'utf-8') | |
36 | + user = User.objects.get(username=USERNAME) | |
37 | + | |
38 | + if function == 'Semantyk': | |
39 | + work_stats = write_semantic_stats(payments_file, user, POS) | |
40 | + elif function == 'Superfrazeolog': | |
41 | + work_stats = write_superphraseologic_stats(payments_file, user, POS) | |
42 | + elif function == 'Frazeolog': | |
43 | + work_stats = write_phraseologic_stats(payments_file, user, POS) | |
44 | + elif function == 'Leksykograf': | |
45 | + work_stats = write_lexicographic_stats(payments_file, user, POS) | |
46 | + elif function == 'Superleksykograf': | |
47 | + work_stats = write_superlexicographic_stats(payments_file, user, POS) | |
48 | + total_earned_cash = work_stats['earned_cash'] | |
49 | + if total_earned_cash > 0.0: | |
50 | + payments_file.write(u'\n%s\t%.2f\n' % (user.username, | |
51 | + total_earned_cash)) | |
52 | + payments_file.close() | |
53 | + | |
54 | + | |
55 | +def write_superlexicographic_stats(payments_file, user, pos): | |
56 | + real_lemmas = RealizedLemma.objects.filter(user_stats__user=user, | |
57 | + lemma__entry_obj__pos__tag=pos, | |
58 | + date__gte=STARTDATE, | |
59 | + date__lte=ENDDATE, | |
60 | + status__type__sym_name='checked', | |
61 | + bonus=False) | |
62 | + | |
63 | + earned_cash = real_lemmas.aggregate(Sum('cash'))['cash__sum'] | |
64 | + if earned_cash == None: | |
65 | + earned_cash = 0.0 | |
66 | + | |
67 | + payments_file.write(u'Sprawdzone:\n') | |
68 | + for done_lemma in real_lemmas.order_by('date'): | |
69 | + payments_file.write(u'%s\t%.2f\t%s\n' % (done_lemma.lemma.entry_obj.name, | |
70 | + done_lemma.cash, | |
71 | + done_lemma.date.strftime('%Y%m%d'))) | |
72 | + | |
73 | + lex_work_stats = {'earned_cash': round(earned_cash, 2)} | |
74 | + return lex_work_stats | |
75 | + | |
76 | + | |
77 | +def write_semantic_stats(payments_file, user, pos): | |
78 | + | |
79 | + real_semantics = RealizedSemantics.objects.filter(user_stats__user=user, | |
80 | + date__gte=STARTDATE, | |
81 | + date__lte=ENDDATE, | |
82 | + entry__pos__tag=pos) | |
83 | + | |
84 | + earned_cash = real_semantics.filter(user_stats__user=user).aggregate(Sum('cash'))['cash__sum'] | |
85 | + if earned_cash == None: | |
86 | + earned_cash = 0.0 | |
87 | + | |
88 | + bonus_cash = real_semantics.filter(user_stats__user=user, | |
89 | + bonus=True).aggregate(Sum('cash'))['cash__sum'] | |
90 | + if bonus_cash == None: | |
91 | + bonus_cash = 0.0 | |
92 | + prop_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('prop_frames'))[ | |
93 | + 'prop_frames__sum'] | |
94 | + if prop_frames == None: | |
95 | + prop_frames = 0 | |
96 | + part_prop_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('part_prop_frames'))[ | |
97 | + 'part_prop_frames__sum'] | |
98 | + if part_prop_frames == None: | |
99 | + part_prop_frames = 0 | |
100 | + wrong_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('wrong_frames'))[ | |
101 | + 'wrong_frames__sum'] | |
102 | + if wrong_frames == None: | |
103 | + wrong_frames = 0 | |
104 | + corr_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('corr_frames'))[ | |
105 | + 'corr_frames__sum'] | |
106 | + if corr_frames == None: | |
107 | + corr_frames = 0 | |
108 | + part_corr_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('part_corr_frames'))[ | |
109 | + 'part_corr_frames__sum'] | |
110 | + if part_corr_frames == None: | |
111 | + part_corr_frames = 0 | |
112 | + ncorr_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('ncorr_frames'))[ | |
113 | + 'ncorr_frames__sum'] | |
114 | + if ncorr_frames == None: | |
115 | + ncorr_frames = 0 | |
116 | + made_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('made_frames'))[ | |
117 | + 'made_frames__sum'] | |
118 | + if made_frames == None: | |
119 | + made_frames = 0 | |
120 | + added_connections = real_semantics.filter(user_stats__user=user).aggregate(Sum('added_connections'))[ | |
121 | + 'added_connections__sum'] | |
122 | + if added_connections == None: | |
123 | + added_connections = 0 | |
124 | + efficacy = 0.0 | |
125 | + if prop_frames + wrong_frames > 0: | |
126 | + efficacy = float(prop_frames) / float(prop_frames + wrong_frames) * 100.0 | |
127 | + | |
128 | + payments_file.write(u'Wykonane:\n') | |
129 | + done_semantics = real_semantics.filter(bonus=False).order_by('date') | |
130 | + for done_sem in done_semantics: | |
131 | + done_cash = done_sem.cash | |
132 | + try: | |
133 | + done_bonus = real_semantics.get(bonus=True, entry=done_sem.entry).cash | |
134 | + done_cash += done_bonus | |
135 | + except RealizedSemantics.DoesNotExist: | |
136 | + pass | |
137 | + payments_file.write(u'%s\t%.2f\t%s\n' % (done_sem.entry.name, | |
138 | + done_cash, | |
139 | + done_sem.date.strftime('%Y%m%d'))) | |
140 | + | |
141 | + sem_work_stats = {'earned_cash': round(earned_cash, 2), | |
142 | + 'bonus_cash': round(bonus_cash, 2), | |
143 | + 'prop_frames': prop_frames, | |
144 | + 'part_prop_frames': part_prop_frames, | |
145 | + 'wrong_frames': wrong_frames, | |
146 | + 'corr_frames': corr_frames, | |
147 | + 'part_corr_frames': part_corr_frames, | |
148 | + 'checked_frames': ncorr_frames + corr_frames + part_corr_frames, | |
149 | + 'made_frames': made_frames, | |
150 | + 'efficacy': round(efficacy, 2), | |
151 | + 'added_connections': added_connections} | |
152 | + return sem_work_stats | |
153 | + | |
154 | + | |
155 | +def write_superphraseologic_stats(payments_file, user, pos): | |
156 | + added_bindings = RealizedPhraseologyBinding.objects.filter(user_stats__user=user, | |
157 | + date__gte=STARTDATE, | |
158 | + date__lte=ENDDATE) | |
159 | + used_bindings = get_used_bindings(added_bindings) | |
160 | + | |
161 | + checked_phraseology = RealizedPhraseology.objects.filter(user_stats__user=user, | |
162 | + date__gte=STARTDATE, | |
163 | + date__lte=ENDDATE, | |
164 | + bonus=False, | |
165 | + status__type__sym_name='checked_f', | |
166 | + lemma__entry_obj__pos__tag=pos) | |
167 | + | |
168 | + earned_cash_frames = checked_phraseology.aggregate(Sum('cash'))['cash__sum'] | |
169 | + if earned_cash_frames == None: | |
170 | + earned_cash_frames = 0.0 | |
171 | + earned_cash_bindings = used_bindings.aggregate(Sum('cash'))['cash__sum'] | |
172 | + if earned_cash_bindings == None: | |
173 | + earned_cash_bindings = 0.0 | |
174 | + earned_cash = earned_cash_frames + earned_cash_bindings | |
175 | + | |
176 | + phraseologic_empty_frame_value = 1.0 | |
177 | + empty_value = 0.0 | |
178 | + | |
179 | + payments_file.write(u'Sprawdzone:\n') | |
180 | + checked_phraseology = checked_phraseology.order_by('date') | |
181 | + for checked_phr in checked_phraseology: | |
182 | + cash = checked_phr.cash | |
183 | + if cash == 0.0: | |
184 | + cash = phraseologic_empty_frame_value | |
185 | + empty_value += phraseologic_empty_frame_value | |
186 | + payments_file.write(u'%s\t%.2f\t%s\n' % (checked_phr.lemma.entry_obj.name, | |
187 | + cash, | |
188 | + checked_phr.date.strftime('%Y%m%d'))) | |
189 | + earned_cash += empty_value | |
190 | + | |
191 | + payments_file.write(u'\n\nDodane powiazania frazeologiczne:\n') | |
192 | + for binding in used_bindings.order_by('date'): | |
193 | + payments_file.write(u'%s\t%.2f\t%s\n' % (binding.binded_entry.name, | |
194 | + binding.cash, | |
195 | + binding.date.strftime('%Y%m%d'))) | |
196 | + | |
197 | + | |
198 | + phraseology_work_stats = {'earned_cash': round(earned_cash, 2), | |
199 | + 'added_bindings': added_bindings.count(), | |
200 | + 'used_bindings': used_bindings.count()} | |
201 | + return phraseology_work_stats | |
202 | + | |
203 | + | |
204 | +def write_phraseologic_stats(payments_file, user, pos): | |
205 | + added_bindings = RealizedPhraseologyBinding.objects.filter(user_stats__user=user, | |
206 | + date__gte=STARTDATE, | |
207 | + date__lte=ENDDATE) | |
208 | + used_bindings = get_used_bindings(added_bindings) | |
209 | + | |
210 | + checked_and_done_phraseology = RealizedPhraseology.objects.filter(user_stats__user=user, | |
211 | + date__gte=STARTDATE, | |
212 | + date__lte=ENDDATE, | |
213 | + lemma__entry_obj__pos__tag=pos) | |
214 | + | |
215 | + done_phraseology = checked_and_done_phraseology.filter(status__type__sym_name='ready_f', | |
216 | + bonus=False) | |
217 | + | |
218 | + earned_cash_frames = done_phraseology.aggregate(Sum('cash'))['cash__sum'] | |
219 | + if earned_cash_frames == None: | |
220 | + earned_cash_frames = 0.0 | |
221 | + earned_cash_bindings = used_bindings.aggregate(Sum('cash'))['cash__sum'] | |
222 | + if earned_cash_bindings == None: | |
223 | + earned_cash_bindings = 0.0 | |
224 | + earned_cash = earned_cash_frames + earned_cash_bindings | |
225 | + | |
226 | + bonus_cash = checked_and_done_phraseology.filter(bonus=True).aggregate(Sum('cash'))['cash__sum'] | |
227 | + if bonus_cash == None: | |
228 | + bonus_cash = 0.0 | |
229 | + earned_cash += bonus_cash | |
230 | + | |
231 | + phraseologic_empty_frame_value = 1.0 | |
232 | + empty_value = 0.0 | |
233 | + | |
234 | + payments_file.write(u'Wykonane:\n') | |
235 | + for done_phr in done_phraseology.order_by('date'): | |
236 | + cash = done_phr.cash | |
237 | + if cash == 0.0: | |
238 | + cash = phraseologic_empty_frame_value | |
239 | + empty_value += phraseologic_empty_frame_value | |
240 | + try: | |
241 | + done_bonus = checked_and_done_phraseology.get(bonus=True, lemma__entry_obj=done_phr.lemma.entry_obj).cash | |
242 | + cash += done_bonus | |
243 | + except RealizedPhraseology.DoesNotExist: | |
244 | + pass | |
245 | + payments_file.write(u'%s\t%.2f\t%s\n' % (done_phr.lemma.entry_obj.name, | |
246 | + cash, | |
247 | + done_phr.date.strftime('%Y%m%d'))) | |
248 | + | |
249 | + payments_file.write(u'\n\nDodane powiazania frazeologiczne:\n') | |
250 | + for binding in used_bindings.order_by('date'): | |
251 | + payments_file.write(u'%s\t%.2f\t%s\n' % (binding.binded_entry.name, | |
252 | + binding.cash, | |
253 | + binding.date.strftime('%Y%m%d'))) | |
254 | + | |
255 | + earned_cash += empty_value | |
256 | + | |
257 | + phraseology_work_stats = {'earned_cash': round(earned_cash, 2), | |
258 | + 'added_bindings': added_bindings.count(), | |
259 | + 'used_bindings': used_bindings.count(),} | |
260 | + return phraseology_work_stats | |
261 | + | |
262 | + | |
263 | +def write_lexicographic_stats(payments_file, user, pos): | |
264 | + | |
265 | + real_lemmas = RealizedLemma.objects.filter(user_stats__user=user, | |
266 | + lemma__entry_obj__pos__tag=pos, | |
267 | + date__gte=STARTDATE, | |
268 | + date__lte=ENDDATE) | |
269 | + | |
270 | + earned_cash = real_lemmas.filter(status__type__sym_name='ready').aggregate(Sum('cash'))['cash__sum'] | |
271 | + if earned_cash == None: | |
272 | + earned_cash = 0.0 | |
273 | + | |
274 | + lemmas_to_erase_cash = 0.0 | |
275 | + lemmas_marked_to_erase = Lemma.objects.filter(owner=user, | |
276 | + old=False, | |
277 | + status__type__sym_name='erase', | |
278 | + entry_obj__pos__tag=pos) | |
279 | + | |
280 | + payments_file.write(u'Zaznaczone do usunięcia:\n') | |
281 | + for lemma in lemmas_marked_to_erase: | |
282 | + erase_date = lemma.status_history.order_by('-date')[0].date | |
283 | + if erase_date >= STARTDATE and erase_date <= ENDDATE: | |
284 | + payments_file.write(u'%s\t%.2f\t%s\n' % (lemma.entry_obj.name, | |
285 | + 1.0, | |
286 | + erase_date.strftime('%Y%m%d'))) | |
287 | + lemmas_to_erase_cash += 1.0 | |
288 | + earned_cash += lemmas_to_erase_cash | |
289 | + | |
290 | + bonus_cash = real_lemmas.filter(bonus=True).aggregate(Sum('cash'))['cash__sum'] | |
291 | + if bonus_cash == None: | |
292 | + bonus_cash = 0.0 | |
293 | + earned_cash += bonus_cash | |
294 | + | |
295 | + payments_file.write(u'\n\nWykonane:\n') | |
296 | + done_lemmas = real_lemmas.filter(bonus=False, | |
297 | + status__type__sym_name='ready').order_by('date') | |
298 | + for done_lemma in done_lemmas: | |
299 | + cash = done_lemma.cash | |
300 | + try: | |
301 | + bonus = real_lemmas.get(bonus=True, lemma__entry_obj=done_lemma.lemma.entry_obj).cash | |
302 | + cash += bonus | |
303 | + except RealizedLemma.DoesNotExist: | |
304 | + pass | |
305 | + payments_file.write(u'%s\t%.2f\t%s\n' % (done_lemma.lemma.entry_obj.name, | |
306 | + cash, | |
307 | + done_lemma.date.strftime('%Y%m%d'))) | |
308 | + | |
309 | + lex_work_stats = {'earned_cash': round(earned_cash, 2), | |
310 | + 'bonus_cash': round(bonus_cash, 2), | |
311 | + 'lemmas_to_erase_cash': round(lemmas_to_erase_cash, 2)} | |
312 | + return lex_work_stats | |
... | ... |
dictionary/management/commands/create_TEI_walenty.py
... | ... | @@ -5,38 +5,56 @@ import os |
5 | 5 | import tarfile |
6 | 6 | |
7 | 7 | from django.core.management.base import BaseCommand |
8 | +from optparse import make_option | |
8 | 9 | |
9 | 10 | from dictionary.models import Lemma, Frame_Opinion_Value, \ |
10 | - get_ready_statuses | |
11 | + get_statuses | |
11 | 12 | from dictionary.teixml import createteixml, write_phrase_types_expansions_in_TEI |
12 | 13 | from settings import WALENTY_PATH |
13 | 14 | |
14 | 15 | class Command(BaseCommand): |
15 | 16 | args = '<dict dict ...>' |
16 | 17 | help = 'Get Walenty in TEI format.' |
18 | + option_list = BaseCommand.option_list + ( | |
19 | + make_option('--min_status', | |
20 | + action='store', | |
21 | + type='string', | |
22 | + dest='min_status_type', | |
23 | + default='ready', | |
24 | + help='Minimum lemma status.'), | |
25 | + make_option('--start_date', | |
26 | + action='store', | |
27 | + type='string', | |
28 | + dest='start_date', | |
29 | + default='all', | |
30 | + help='Status change start date (format: YYYY-MM-DD).'), | |
31 | + | |
32 | + ) | |
17 | 33 | |
18 | 34 | def handle(self, *args, **options): |
19 | 35 | try: |
20 | 36 | now = datetime.datetime.now().strftime('%Y%m%d') |
21 | - | |
22 | 37 | vocab_names = list(args) |
23 | 38 | vocab_names.sort() |
24 | - if vocab_names: | |
25 | - filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now) | |
26 | - else: | |
27 | - filename_base = '%s_%s' % ('walenty', now) | |
39 | + | |
40 | + filename_base = self.create_filename_base(vocab_names, options, now) | |
28 | 41 | |
29 | 42 | base_path = os.path.join(WALENTY_PATH, filename_base) |
30 | 43 | outpath = base_path + '.xml' |
31 | - ready_statuses = get_ready_statuses() | |
44 | + statuses = get_statuses(options['min_status_type']) | |
32 | 45 | |
33 | 46 | lemmas = Lemma.objects.filter(old=False) |
34 | 47 | if vocab_names: |
35 | 48 | lemmas = lemmas.filter(vocabulary__name__in=vocab_names) |
36 | - ready_lemmas = lemmas.filter(status__in=ready_statuses).order_by('entry_obj__name') | |
49 | + lemmas = lemmas.filter(status__in=statuses) | |
50 | + if options['start_date'] != 'all': | |
51 | + lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date']) | |
52 | + lemmas = lemmas.order_by('entry_obj__name') | |
53 | + | |
54 | + self.print_statistics(lemmas) | |
37 | 55 | |
38 | 56 | frame_opinion_values = Frame_Opinion_Value.objects.all() |
39 | - createteixml(outpath, ready_lemmas, frame_opinion_values) | |
57 | + createteixml(outpath, lemmas, frame_opinion_values) | |
40 | 58 | archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz') |
41 | 59 | |
42 | 60 | phrase_types_expand_path = os.path.join(WALENTY_PATH, |
... | ... | @@ -50,3 +68,49 @@ class Command(BaseCommand): |
50 | 68 | archive.close() |
51 | 69 | os.remove(outpath) |
52 | 70 | os.remove(phrase_types_expand_path) |
71 | + | |
72 | + def create_filename_base(self, vocab_names, options, now): | |
73 | + start_date = '' | |
74 | + if options['start_date'] != 'all': | |
75 | + start_date = '-' + options['start_date'].replace('-', '') | |
76 | + | |
77 | + vocab_names_str = '' | |
78 | + if vocab_names: | |
79 | + vocab_names_str = '-' + '+'.join(vocab_names) | |
80 | + | |
81 | + min_status = '' | |
82 | + if options['min_status_type'] != 'ready': | |
83 | + min_status = '-' + options['min_status_type'] | |
84 | + | |
85 | + filename_base = 'walenty%s%s%s_%s' % (min_status, vocab_names_str, | |
86 | + start_date, now) | |
87 | + return filename_base | |
88 | + | |
89 | + | |
90 | + def filter_lemmas_by_status_change(self, lemmas, statuses, start_date_str): | |
91 | + start_date = self.parse_date(start_date_str) | |
92 | + filtered_lemmas_pks = [] | |
93 | + for lemma in lemmas: | |
94 | + if lemma.status_history.filter(status=statuses[0], date__gte=start_date).exists(): | |
95 | + filtered_lemmas_pks.append(lemma.pk) | |
96 | + return lemmas.filter(pk__in=filtered_lemmas_pks) | |
97 | + | |
98 | + def parse_date(self, date_str): | |
99 | + date_parts = date_str.split('-') | |
100 | + year = int(date_parts[0]) | |
101 | + month = int(date_parts[1].lstrip('0')) | |
102 | + day = int(date_parts[2].lstrip('0')) | |
103 | + date = datetime.datetime(year, month, day, 00, 00) | |
104 | + return date | |
105 | + | |
106 | + def print_statistics(self, lemmas): | |
107 | + count = {'frames': 0, | |
108 | + 'arguments': 0} | |
109 | + for lemma in lemmas: | |
110 | + frames = lemma.entry_obj.actual_frames() | |
111 | + count['frames'] += frames.count() | |
112 | + for frame in frames.all(): | |
113 | + count['arguments'] += frame.complements.count() | |
114 | + print (u'Lemmas:\t%d' % lemmas.count()) | |
115 | + print (u'Frames:\t%d' % count['frames']) | |
116 | + print (u'Arguments:\t%d' % count['arguments']) | |
... | ... |
dictionary/models.py
... | ... | @@ -100,6 +100,10 @@ def get_checked_statuses(): |
100 | 100 | def get_ready_statuses(): |
101 | 101 | ready_type = LemmaStatusType.objects.get(sym_name='ready') |
102 | 102 | return Lemma_Status.objects.filter(type__priority__gte=ready_type.priority).distinct() |
103 | + | |
104 | +def get_statuses(min_status_type): | |
105 | + min_type = LemmaStatusType.objects.get(sym_name=min_status_type) | |
106 | + return Lemma_Status.objects.filter(type__priority__gte=min_type.priority).distinct() | |
103 | 107 | |
104 | 108 | |
105 | 109 | class LemmaStatusType(Model): |
... | ... |