Commit e72757a4579563f2eb24ab4b0ec6faf8477696f0

Authored by Bartłomiej Nitoń
1 parent 76f54edc

Added minimum status and start date options to create_TEI_walenty script.

accounts/management/commands/get_payments_data.py 0 → 100644
  1 +# -*- coding:utf-8 -*-
  2 +
  3 +import codecs
  4 +import datetime
  5 +
  6 +from django.contrib.auth.models import User
  7 +from django.core.management.base import BaseCommand
  8 +from django.db.models import Sum
  9 +
  10 +from accounts.models import RealizedLemma, RealizedPhraseology, RealizedPhraseologyBinding, \
  11 + RealizedSemantics
  12 +from dictionary.ajax_user_stats import get_used_bindings
  13 +from dictionary.models import Lemma
  14 +
  15 +
  16 +USERNAME = 'JakubS'
  17 +FUNCTION = 'Leksykograf'
  18 +POS = 'noun'
  19 +STARTDATE = datetime.datetime(2011, 1, 1, 00, 00)
  20 +ENDDATE = (datetime.datetime.now() -
  21 + datetime.timedelta(days=1)).replace(hour=23, minute=59, second=59)
  22 +
  23 +
  24 +class Command(BaseCommand):
  25 + args = 'none'
  26 +
  27 + def handle(self, **options):
  28 + get_payments_data(FUNCTION)
  29 +
  30 +
  31 +def get_payments_data(function):
  32 + start = STARTDATE.strftime('%Y%m%d')
  33 + end = ENDDATE.strftime('%Y%m%d')
  34 + payments_path = 'data/work_%s_%s_%s-%s.csv' % (USERNAME, FUNCTION, start, end)
  35 + payments_file = codecs.open(payments_path, 'wt', 'utf-8')
  36 + user = User.objects.get(username=USERNAME)
  37 +
  38 + if function == 'Semantyk':
  39 + work_stats = write_semantic_stats(payments_file, user, POS)
  40 + elif function == 'Superfrazeolog':
  41 + work_stats = write_superphraseologic_stats(payments_file, user, POS)
  42 + elif function == 'Frazeolog':
  43 + work_stats = write_phraseologic_stats(payments_file, user, POS)
  44 + elif function == 'Leksykograf':
  45 + work_stats = write_lexicographic_stats(payments_file, user, POS)
  46 + elif function == 'Superleksykograf':
  47 + work_stats = write_superlexicographic_stats(payments_file, user, POS)
  48 + total_earned_cash = work_stats['earned_cash']
  49 + if total_earned_cash > 0.0:
  50 + payments_file.write(u'\n%s\t%.2f\n' % (user.username,
  51 + total_earned_cash))
  52 + payments_file.close()
  53 +
  54 +
  55 +def write_superlexicographic_stats(payments_file, user, pos):
  56 + real_lemmas = RealizedLemma.objects.filter(user_stats__user=user,
  57 + lemma__entry_obj__pos__tag=pos,
  58 + date__gte=STARTDATE,
  59 + date__lte=ENDDATE,
  60 + status__type__sym_name='checked',
  61 + bonus=False)
  62 +
  63 + earned_cash = real_lemmas.aggregate(Sum('cash'))['cash__sum']
  64 + if earned_cash == None:
  65 + earned_cash = 0.0
  66 +
  67 + payments_file.write(u'Sprawdzone:\n')
  68 + for done_lemma in real_lemmas.order_by('date'):
  69 + payments_file.write(u'%s\t%.2f\t%s\n' % (done_lemma.lemma.entry_obj.name,
  70 + done_lemma.cash,
  71 + done_lemma.date.strftime('%Y%m%d')))
  72 +
  73 + lex_work_stats = {'earned_cash': round(earned_cash, 2)}
  74 + return lex_work_stats
  75 +
  76 +
  77 +def write_semantic_stats(payments_file, user, pos):
  78 +
  79 + real_semantics = RealizedSemantics.objects.filter(user_stats__user=user,
  80 + date__gte=STARTDATE,
  81 + date__lte=ENDDATE,
  82 + entry__pos__tag=pos)
  83 +
  84 + earned_cash = real_semantics.filter(user_stats__user=user).aggregate(Sum('cash'))['cash__sum']
  85 + if earned_cash == None:
  86 + earned_cash = 0.0
  87 +
  88 + bonus_cash = real_semantics.filter(user_stats__user=user,
  89 + bonus=True).aggregate(Sum('cash'))['cash__sum']
  90 + if bonus_cash == None:
  91 + bonus_cash = 0.0
  92 + prop_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('prop_frames'))[
  93 + 'prop_frames__sum']
  94 + if prop_frames == None:
  95 + prop_frames = 0
  96 + part_prop_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('part_prop_frames'))[
  97 + 'part_prop_frames__sum']
  98 + if part_prop_frames == None:
  99 + part_prop_frames = 0
  100 + wrong_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('wrong_frames'))[
  101 + 'wrong_frames__sum']
  102 + if wrong_frames == None:
  103 + wrong_frames = 0
  104 + corr_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('corr_frames'))[
  105 + 'corr_frames__sum']
  106 + if corr_frames == None:
  107 + corr_frames = 0
  108 + part_corr_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('part_corr_frames'))[
  109 + 'part_corr_frames__sum']
  110 + if part_corr_frames == None:
  111 + part_corr_frames = 0
  112 + ncorr_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('ncorr_frames'))[
  113 + 'ncorr_frames__sum']
  114 + if ncorr_frames == None:
  115 + ncorr_frames = 0
  116 + made_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('made_frames'))[
  117 + 'made_frames__sum']
  118 + if made_frames == None:
  119 + made_frames = 0
  120 + added_connections = real_semantics.filter(user_stats__user=user).aggregate(Sum('added_connections'))[
  121 + 'added_connections__sum']
  122 + if added_connections == None:
  123 + added_connections = 0
  124 + efficacy = 0.0
  125 + if prop_frames + wrong_frames > 0:
  126 + efficacy = float(prop_frames) / float(prop_frames + wrong_frames) * 100.0
  127 +
  128 + payments_file.write(u'Wykonane:\n')
  129 + done_semantics = real_semantics.filter(bonus=False).order_by('date')
  130 + for done_sem in done_semantics:
  131 + done_cash = done_sem.cash
  132 + try:
  133 + done_bonus = real_semantics.get(bonus=True, entry=done_sem.entry).cash
  134 + done_cash += done_bonus
  135 + except RealizedSemantics.DoesNotExist:
  136 + pass
  137 + payments_file.write(u'%s\t%.2f\t%s\n' % (done_sem.entry.name,
  138 + done_cash,
  139 + done_sem.date.strftime('%Y%m%d')))
  140 +
  141 + sem_work_stats = {'earned_cash': round(earned_cash, 2),
  142 + 'bonus_cash': round(bonus_cash, 2),
  143 + 'prop_frames': prop_frames,
  144 + 'part_prop_frames': part_prop_frames,
  145 + 'wrong_frames': wrong_frames,
  146 + 'corr_frames': corr_frames,
  147 + 'part_corr_frames': part_corr_frames,
  148 + 'checked_frames': ncorr_frames + corr_frames + part_corr_frames,
  149 + 'made_frames': made_frames,
  150 + 'efficacy': round(efficacy, 2),
  151 + 'added_connections': added_connections}
  152 + return sem_work_stats
  153 +
  154 +
  155 +def write_superphraseologic_stats(payments_file, user, pos):
  156 + added_bindings = RealizedPhraseologyBinding.objects.filter(user_stats__user=user,
  157 + date__gte=STARTDATE,
  158 + date__lte=ENDDATE)
  159 + used_bindings = get_used_bindings(added_bindings)
  160 +
  161 + checked_phraseology = RealizedPhraseology.objects.filter(user_stats__user=user,
  162 + date__gte=STARTDATE,
  163 + date__lte=ENDDATE,
  164 + bonus=False,
  165 + status__type__sym_name='checked_f',
  166 + lemma__entry_obj__pos__tag=pos)
  167 +
  168 + earned_cash_frames = checked_phraseology.aggregate(Sum('cash'))['cash__sum']
  169 + if earned_cash_frames == None:
  170 + earned_cash_frames = 0.0
  171 + earned_cash_bindings = used_bindings.aggregate(Sum('cash'))['cash__sum']
  172 + if earned_cash_bindings == None:
  173 + earned_cash_bindings = 0.0
  174 + earned_cash = earned_cash_frames + earned_cash_bindings
  175 +
  176 + phraseologic_empty_frame_value = 1.0
  177 + empty_value = 0.0
  178 +
  179 + payments_file.write(u'Sprawdzone:\n')
  180 + checked_phraseology = checked_phraseology.order_by('date')
  181 + for checked_phr in checked_phraseology:
  182 + cash = checked_phr.cash
  183 + if cash == 0.0:
  184 + cash = phraseologic_empty_frame_value
  185 + empty_value += phraseologic_empty_frame_value
  186 + payments_file.write(u'%s\t%.2f\t%s\n' % (checked_phr.lemma.entry_obj.name,
  187 + cash,
  188 + checked_phr.date.strftime('%Y%m%d')))
  189 + earned_cash += empty_value
  190 +
  191 + payments_file.write(u'\n\nDodane powiazania frazeologiczne:\n')
  192 + for binding in used_bindings.order_by('date'):
  193 + payments_file.write(u'%s\t%.2f\t%s\n' % (binding.binded_entry.name,
  194 + binding.cash,
  195 + binding.date.strftime('%Y%m%d')))
  196 +
  197 +
  198 + phraseology_work_stats = {'earned_cash': round(earned_cash, 2),
  199 + 'added_bindings': added_bindings.count(),
  200 + 'used_bindings': used_bindings.count()}
  201 + return phraseology_work_stats
  202 +
  203 +
  204 +def write_phraseologic_stats(payments_file, user, pos):
  205 + added_bindings = RealizedPhraseologyBinding.objects.filter(user_stats__user=user,
  206 + date__gte=STARTDATE,
  207 + date__lte=ENDDATE)
  208 + used_bindings = get_used_bindings(added_bindings)
  209 +
  210 + checked_and_done_phraseology = RealizedPhraseology.objects.filter(user_stats__user=user,
  211 + date__gte=STARTDATE,
  212 + date__lte=ENDDATE,
  213 + lemma__entry_obj__pos__tag=pos)
  214 +
  215 + done_phraseology = checked_and_done_phraseology.filter(status__type__sym_name='ready_f',
  216 + bonus=False)
  217 +
  218 + earned_cash_frames = done_phraseology.aggregate(Sum('cash'))['cash__sum']
  219 + if earned_cash_frames == None:
  220 + earned_cash_frames = 0.0
  221 + earned_cash_bindings = used_bindings.aggregate(Sum('cash'))['cash__sum']
  222 + if earned_cash_bindings == None:
  223 + earned_cash_bindings = 0.0
  224 + earned_cash = earned_cash_frames + earned_cash_bindings
  225 +
  226 + bonus_cash = checked_and_done_phraseology.filter(bonus=True).aggregate(Sum('cash'))['cash__sum']
  227 + if bonus_cash == None:
  228 + bonus_cash = 0.0
  229 + earned_cash += bonus_cash
  230 +
  231 + phraseologic_empty_frame_value = 1.0
  232 + empty_value = 0.0
  233 +
  234 + payments_file.write(u'Wykonane:\n')
  235 + for done_phr in done_phraseology.order_by('date'):
  236 + cash = done_phr.cash
  237 + if cash == 0.0:
  238 + cash = phraseologic_empty_frame_value
  239 + empty_value += phraseologic_empty_frame_value
  240 + try:
  241 + done_bonus = checked_and_done_phraseology.get(bonus=True, lemma__entry_obj=done_phr.lemma.entry_obj).cash
  242 + cash += done_bonus
  243 + except RealizedPhraseology.DoesNotExist:
  244 + pass
  245 + payments_file.write(u'%s\t%.2f\t%s\n' % (done_phr.lemma.entry_obj.name,
  246 + cash,
  247 + done_phr.date.strftime('%Y%m%d')))
  248 +
  249 + payments_file.write(u'\n\nDodane powiazania frazeologiczne:\n')
  250 + for binding in used_bindings.order_by('date'):
  251 + payments_file.write(u'%s\t%.2f\t%s\n' % (binding.binded_entry.name,
  252 + binding.cash,
  253 + binding.date.strftime('%Y%m%d')))
  254 +
  255 + earned_cash += empty_value
  256 +
  257 + phraseology_work_stats = {'earned_cash': round(earned_cash, 2),
  258 + 'added_bindings': added_bindings.count(),
  259 + 'used_bindings': used_bindings.count(),}
  260 + return phraseology_work_stats
  261 +
  262 +
  263 +def write_lexicographic_stats(payments_file, user, pos):
  264 +
  265 + real_lemmas = RealizedLemma.objects.filter(user_stats__user=user,
  266 + lemma__entry_obj__pos__tag=pos,
  267 + date__gte=STARTDATE,
  268 + date__lte=ENDDATE)
  269 +
  270 + earned_cash = real_lemmas.filter(status__type__sym_name='ready').aggregate(Sum('cash'))['cash__sum']
  271 + if earned_cash == None:
  272 + earned_cash = 0.0
  273 +
  274 + lemmas_to_erase_cash = 0.0
  275 + lemmas_marked_to_erase = Lemma.objects.filter(owner=user,
  276 + old=False,
  277 + status__type__sym_name='erase',
  278 + entry_obj__pos__tag=pos)
  279 +
  280 + payments_file.write(u'Zaznaczone do usunięcia:\n')
  281 + for lemma in lemmas_marked_to_erase:
  282 + erase_date = lemma.status_history.order_by('-date')[0].date
  283 + if erase_date >= STARTDATE and erase_date <= ENDDATE:
  284 + payments_file.write(u'%s\t%.2f\t%s\n' % (lemma.entry_obj.name,
  285 + 1.0,
  286 + erase_date.strftime('%Y%m%d')))
  287 + lemmas_to_erase_cash += 1.0
  288 + earned_cash += lemmas_to_erase_cash
  289 +
  290 + bonus_cash = real_lemmas.filter(bonus=True).aggregate(Sum('cash'))['cash__sum']
  291 + if bonus_cash == None:
  292 + bonus_cash = 0.0
  293 + earned_cash += bonus_cash
  294 +
  295 + payments_file.write(u'\n\nWykonane:\n')
  296 + done_lemmas = real_lemmas.filter(bonus=False,
  297 + status__type__sym_name='ready').order_by('date')
  298 + for done_lemma in done_lemmas:
  299 + cash = done_lemma.cash
  300 + try:
  301 + bonus = real_lemmas.get(bonus=True, lemma__entry_obj=done_lemma.lemma.entry_obj).cash
  302 + cash += bonus
  303 + except RealizedLemma.DoesNotExist:
  304 + pass
  305 + payments_file.write(u'%s\t%.2f\t%s\n' % (done_lemma.lemma.entry_obj.name,
  306 + cash,
  307 + done_lemma.date.strftime('%Y%m%d')))
  308 +
  309 + lex_work_stats = {'earned_cash': round(earned_cash, 2),
  310 + 'bonus_cash': round(bonus_cash, 2),
  311 + 'lemmas_to_erase_cash': round(lemmas_to_erase_cash, 2)}
  312 + return lex_work_stats
... ...
dictionary/management/commands/create_TEI_walenty.py
... ... @@ -5,38 +5,56 @@ import os
5 5 import tarfile
6 6  
7 7 from django.core.management.base import BaseCommand
  8 +from optparse import make_option
8 9  
9 10 from dictionary.models import Lemma, Frame_Opinion_Value, \
10   - get_ready_statuses
  11 + get_statuses
11 12 from dictionary.teixml import createteixml, write_phrase_types_expansions_in_TEI
12 13 from settings import WALENTY_PATH
13 14  
14 15 class Command(BaseCommand):
15 16 args = '<dict dict ...>'
16 17 help = 'Get Walenty in TEI format.'
  18 + option_list = BaseCommand.option_list + (
  19 + make_option('--min_status',
  20 + action='store',
  21 + type='string',
  22 + dest='min_status_type',
  23 + default='ready',
  24 + help='Minimum lemma status.'),
  25 + make_option('--start_date',
  26 + action='store',
  27 + type='string',
  28 + dest='start_date',
  29 + default='all',
  30 + help='Status change start date (format: YYYY-MM-DD).'),
  31 +
  32 + )
17 33  
18 34 def handle(self, *args, **options):
19 35 try:
20 36 now = datetime.datetime.now().strftime('%Y%m%d')
21   -
22 37 vocab_names = list(args)
23 38 vocab_names.sort()
24   - if vocab_names:
25   - filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now)
26   - else:
27   - filename_base = '%s_%s' % ('walenty', now)
  39 +
  40 + filename_base = self.create_filename_base(vocab_names, options, now)
28 41  
29 42 base_path = os.path.join(WALENTY_PATH, filename_base)
30 43 outpath = base_path + '.xml'
31   - ready_statuses = get_ready_statuses()
  44 + statuses = get_statuses(options['min_status_type'])
32 45  
33 46 lemmas = Lemma.objects.filter(old=False)
34 47 if vocab_names:
35 48 lemmas = lemmas.filter(vocabulary__name__in=vocab_names)
36   - ready_lemmas = lemmas.filter(status__in=ready_statuses).order_by('entry_obj__name')
  49 + lemmas = lemmas.filter(status__in=statuses)
  50 + if options['start_date'] != 'all':
  51 + lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date'])
  52 + lemmas = lemmas.order_by('entry_obj__name')
  53 +
  54 + self.print_statistics(lemmas)
37 55  
38 56 frame_opinion_values = Frame_Opinion_Value.objects.all()
39   - createteixml(outpath, ready_lemmas, frame_opinion_values)
  57 + createteixml(outpath, lemmas, frame_opinion_values)
40 58 archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz')
41 59  
42 60 phrase_types_expand_path = os.path.join(WALENTY_PATH,
... ... @@ -50,3 +68,49 @@ class Command(BaseCommand):
50 68 archive.close()
51 69 os.remove(outpath)
52 70 os.remove(phrase_types_expand_path)
  71 +
  72 + def create_filename_base(self, vocab_names, options, now):
  73 + start_date = ''
  74 + if options['start_date'] != 'all':
  75 + start_date = '-' + options['start_date'].replace('-', '')
  76 +
  77 + vocab_names_str = ''
  78 + if vocab_names:
  79 + vocab_names_str = '-' + '+'.join(vocab_names)
  80 +
  81 + min_status = ''
  82 + if options['min_status_type'] != 'ready':
  83 + min_status = '-' + options['min_status_type']
  84 +
  85 + filename_base = 'walenty%s%s%s_%s' % (min_status, vocab_names_str,
  86 + start_date, now)
  87 + return filename_base
  88 +
  89 +
  90 + def filter_lemmas_by_status_change(self, lemmas, statuses, start_date_str):
  91 + start_date = self.parse_date(start_date_str)
  92 + filtered_lemmas_pks = []
  93 + for lemma in lemmas:
  94 + if lemma.status_history.filter(status=statuses[0], date__gte=start_date).exists():
  95 + filtered_lemmas_pks.append(lemma.pk)
  96 + return lemmas.filter(pk__in=filtered_lemmas_pks)
  97 +
  98 + def parse_date(self, date_str):
  99 + date_parts = date_str.split('-')
  100 + year = int(date_parts[0])
  101 + month = int(date_parts[1].lstrip('0'))
  102 + day = int(date_parts[2].lstrip('0'))
  103 + date = datetime.datetime(year, month, day, 00, 00)
  104 + return date
  105 +
  106 + def print_statistics(self, lemmas):
  107 + count = {'frames': 0,
  108 + 'arguments': 0}
  109 + for lemma in lemmas:
  110 + frames = lemma.entry_obj.actual_frames()
  111 + count['frames'] += frames.count()
  112 + for frame in frames.all():
  113 + count['arguments'] += frame.complements.count()
  114 + print (u'Lemmas:\t%d' % lemmas.count())
  115 + print (u'Frames:\t%d' % count['frames'])
  116 + print (u'Arguments:\t%d' % count['arguments'])
... ...
dictionary/models.py
... ... @@ -100,6 +100,10 @@ def get_checked_statuses():
100 100 def get_ready_statuses():
101 101 ready_type = LemmaStatusType.objects.get(sym_name='ready')
102 102 return Lemma_Status.objects.filter(type__priority__gte=ready_type.priority).distinct()
  103 +
  104 +def get_statuses(min_status_type):
  105 + min_type = LemmaStatusType.objects.get(sym_name=min_status_type)
  106 + return Lemma_Status.objects.filter(type__priority__gte=min_type.priority).distinct()
103 107  
104 108  
105 109 class LemmaStatusType(Model):
... ...