diff --git a/accounts/management/commands/get_payments_data.py b/accounts/management/commands/get_payments_data.py new file mode 100644 index 0000000..d637bfa --- /dev/null +++ b/accounts/management/commands/get_payments_data.py @@ -0,0 +1,312 @@ +# -*- coding:utf-8 -*- + +import codecs +import datetime + +from django.contrib.auth.models import User +from django.core.management.base import BaseCommand +from django.db.models import Sum + +from accounts.models import RealizedLemma, RealizedPhraseology, RealizedPhraseologyBinding, \ + RealizedSemantics +from dictionary.ajax_user_stats import get_used_bindings +from dictionary.models import Lemma + + +USERNAME = 'JakubS' +FUNCTION = 'Leksykograf' +POS = 'noun' +STARTDATE = datetime.datetime(2011, 1, 1, 00, 00) +ENDDATE = (datetime.datetime.now() - + datetime.timedelta(days=1)).replace(hour=23, minute=59, second=59) + + +class Command(BaseCommand): + args = 'none' + + def handle(self, **options): + get_payments_data(FUNCTION) + + +def get_payments_data(function): + start = STARTDATE.strftime('%Y%m%d') + end = ENDDATE.strftime('%Y%m%d') + payments_path = 'data/work_%s_%s_%s-%s.csv' % (USERNAME, FUNCTION, start, end) + payments_file = codecs.open(payments_path, 'wt', 'utf-8') + user = User.objects.get(username=USERNAME) + + if function == 'Semantyk': + work_stats = write_semantic_stats(payments_file, user, POS) + elif function == 'Superfrazeolog': + work_stats = write_superphraseologic_stats(payments_file, user, POS) + elif function == 'Frazeolog': + work_stats = write_phraseologic_stats(payments_file, user, POS) + elif function == 'Leksykograf': + work_stats = write_lexicographic_stats(payments_file, user, POS) + elif function == 'Superleksykograf': + work_stats = write_superlexicographic_stats(payments_file, user, POS) + total_earned_cash = work_stats['earned_cash'] + if total_earned_cash > 0.0: + payments_file.write(u'\n%s\t%.2f\n' % (user.username, + total_earned_cash)) + payments_file.close() + + +def write_superlexicographic_stats(payments_file, user, pos): + real_lemmas = RealizedLemma.objects.filter(user_stats__user=user, + lemma__entry_obj__pos__tag=pos, + date__gte=STARTDATE, + date__lte=ENDDATE, + status__type__sym_name='checked', + bonus=False) + + earned_cash = real_lemmas.aggregate(Sum('cash'))['cash__sum'] + if earned_cash == None: + earned_cash = 0.0 + + payments_file.write(u'Sprawdzone:\n') + for done_lemma in real_lemmas.order_by('date'): + payments_file.write(u'%s\t%.2f\t%s\n' % (done_lemma.lemma.entry_obj.name, + done_lemma.cash, + done_lemma.date.strftime('%Y%m%d'))) + + lex_work_stats = {'earned_cash': round(earned_cash, 2)} + return lex_work_stats + + +def write_semantic_stats(payments_file, user, pos): + + real_semantics = RealizedSemantics.objects.filter(user_stats__user=user, + date__gte=STARTDATE, + date__lte=ENDDATE, + entry__pos__tag=pos) + + earned_cash = real_semantics.filter(user_stats__user=user).aggregate(Sum('cash'))['cash__sum'] + if earned_cash == None: + earned_cash = 0.0 + + bonus_cash = real_semantics.filter(user_stats__user=user, + bonus=True).aggregate(Sum('cash'))['cash__sum'] + if bonus_cash == None: + bonus_cash = 0.0 + prop_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('prop_frames'))[ + 'prop_frames__sum'] + if prop_frames == None: + prop_frames = 0 + part_prop_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('part_prop_frames'))[ + 'part_prop_frames__sum'] + if part_prop_frames == None: + part_prop_frames = 0 + wrong_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('wrong_frames'))[ + 'wrong_frames__sum'] + if wrong_frames == None: + wrong_frames = 0 + corr_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('corr_frames'))[ + 'corr_frames__sum'] + if corr_frames == None: + corr_frames = 0 + part_corr_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('part_corr_frames'))[ + 'part_corr_frames__sum'] + if part_corr_frames == None: + part_corr_frames = 0 + ncorr_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('ncorr_frames'))[ + 'ncorr_frames__sum'] + if ncorr_frames == None: + ncorr_frames = 0 + made_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('made_frames'))[ + 'made_frames__sum'] + if made_frames == None: + made_frames = 0 + added_connections = real_semantics.filter(user_stats__user=user).aggregate(Sum('added_connections'))[ + 'added_connections__sum'] + if added_connections == None: + added_connections = 0 + efficacy = 0.0 + if prop_frames + wrong_frames > 0: + efficacy = float(prop_frames) / float(prop_frames + wrong_frames) * 100.0 + + payments_file.write(u'Wykonane:\n') + done_semantics = real_semantics.filter(bonus=False).order_by('date') + for done_sem in done_semantics: + done_cash = done_sem.cash + try: + done_bonus = real_semantics.get(bonus=True, entry=done_sem.entry).cash + done_cash += done_bonus + except RealizedSemantics.DoesNotExist: + pass + payments_file.write(u'%s\t%.2f\t%s\n' % (done_sem.entry.name, + done_cash, + done_sem.date.strftime('%Y%m%d'))) + + sem_work_stats = {'earned_cash': round(earned_cash, 2), + 'bonus_cash': round(bonus_cash, 2), + 'prop_frames': prop_frames, + 'part_prop_frames': part_prop_frames, + 'wrong_frames': wrong_frames, + 'corr_frames': corr_frames, + 'part_corr_frames': part_corr_frames, + 'checked_frames': ncorr_frames + corr_frames + part_corr_frames, + 'made_frames': made_frames, + 'efficacy': round(efficacy, 2), + 'added_connections': added_connections} + return sem_work_stats + + +def write_superphraseologic_stats(payments_file, user, pos): + added_bindings = RealizedPhraseologyBinding.objects.filter(user_stats__user=user, + date__gte=STARTDATE, + date__lte=ENDDATE) + used_bindings = get_used_bindings(added_bindings) + + checked_phraseology = RealizedPhraseology.objects.filter(user_stats__user=user, + date__gte=STARTDATE, + date__lte=ENDDATE, + bonus=False, + status__type__sym_name='checked_f', + lemma__entry_obj__pos__tag=pos) + + earned_cash_frames = checked_phraseology.aggregate(Sum('cash'))['cash__sum'] + if earned_cash_frames == None: + earned_cash_frames = 0.0 + earned_cash_bindings = used_bindings.aggregate(Sum('cash'))['cash__sum'] + if earned_cash_bindings == None: + earned_cash_bindings = 0.0 + earned_cash = earned_cash_frames + earned_cash_bindings + + phraseologic_empty_frame_value = 1.0 + empty_value = 0.0 + + payments_file.write(u'Sprawdzone:\n') + checked_phraseology = checked_phraseology.order_by('date') + for checked_phr in checked_phraseology: + cash = checked_phr.cash + if cash == 0.0: + cash = phraseologic_empty_frame_value + empty_value += phraseologic_empty_frame_value + payments_file.write(u'%s\t%.2f\t%s\n' % (checked_phr.lemma.entry_obj.name, + cash, + checked_phr.date.strftime('%Y%m%d'))) + earned_cash += empty_value + + payments_file.write(u'\n\nDodane powiazania frazeologiczne:\n') + for binding in used_bindings.order_by('date'): + payments_file.write(u'%s\t%.2f\t%s\n' % (binding.binded_entry.name, + binding.cash, + binding.date.strftime('%Y%m%d'))) + + + phraseology_work_stats = {'earned_cash': round(earned_cash, 2), + 'added_bindings': added_bindings.count(), + 'used_bindings': used_bindings.count()} + return phraseology_work_stats + + +def write_phraseologic_stats(payments_file, user, pos): + added_bindings = RealizedPhraseologyBinding.objects.filter(user_stats__user=user, + date__gte=STARTDATE, + date__lte=ENDDATE) + used_bindings = get_used_bindings(added_bindings) + + checked_and_done_phraseology = RealizedPhraseology.objects.filter(user_stats__user=user, + date__gte=STARTDATE, + date__lte=ENDDATE, + lemma__entry_obj__pos__tag=pos) + + done_phraseology = checked_and_done_phraseology.filter(status__type__sym_name='ready_f', + bonus=False) + + earned_cash_frames = done_phraseology.aggregate(Sum('cash'))['cash__sum'] + if earned_cash_frames == None: + earned_cash_frames = 0.0 + earned_cash_bindings = used_bindings.aggregate(Sum('cash'))['cash__sum'] + if earned_cash_bindings == None: + earned_cash_bindings = 0.0 + earned_cash = earned_cash_frames + earned_cash_bindings + + bonus_cash = checked_and_done_phraseology.filter(bonus=True).aggregate(Sum('cash'))['cash__sum'] + if bonus_cash == None: + bonus_cash = 0.0 + earned_cash += bonus_cash + + phraseologic_empty_frame_value = 1.0 + empty_value = 0.0 + + payments_file.write(u'Wykonane:\n') + for done_phr in done_phraseology.order_by('date'): + cash = done_phr.cash + if cash == 0.0: + cash = phraseologic_empty_frame_value + empty_value += phraseologic_empty_frame_value + try: + done_bonus = checked_and_done_phraseology.get(bonus=True, lemma__entry_obj=done_phr.lemma.entry_obj).cash + cash += done_bonus + except RealizedPhraseology.DoesNotExist: + pass + payments_file.write(u'%s\t%.2f\t%s\n' % (done_phr.lemma.entry_obj.name, + cash, + done_phr.date.strftime('%Y%m%d'))) + + payments_file.write(u'\n\nDodane powiazania frazeologiczne:\n') + for binding in used_bindings.order_by('date'): + payments_file.write(u'%s\t%.2f\t%s\n' % (binding.binded_entry.name, + binding.cash, + binding.date.strftime('%Y%m%d'))) + + earned_cash += empty_value + + phraseology_work_stats = {'earned_cash': round(earned_cash, 2), + 'added_bindings': added_bindings.count(), + 'used_bindings': used_bindings.count(),} + return phraseology_work_stats + + +def write_lexicographic_stats(payments_file, user, pos): + + real_lemmas = RealizedLemma.objects.filter(user_stats__user=user, + lemma__entry_obj__pos__tag=pos, + date__gte=STARTDATE, + date__lte=ENDDATE) + + earned_cash = real_lemmas.filter(status__type__sym_name='ready').aggregate(Sum('cash'))['cash__sum'] + if earned_cash == None: + earned_cash = 0.0 + + lemmas_to_erase_cash = 0.0 + lemmas_marked_to_erase = Lemma.objects.filter(owner=user, + old=False, + status__type__sym_name='erase', + entry_obj__pos__tag=pos) + + payments_file.write(u'Zaznaczone do usunięcia:\n') + for lemma in lemmas_marked_to_erase: + erase_date = lemma.status_history.order_by('-date')[0].date + if erase_date >= STARTDATE and erase_date <= ENDDATE: + payments_file.write(u'%s\t%.2f\t%s\n' % (lemma.entry_obj.name, + 1.0, + erase_date.strftime('%Y%m%d'))) + lemmas_to_erase_cash += 1.0 + earned_cash += lemmas_to_erase_cash + + bonus_cash = real_lemmas.filter(bonus=True).aggregate(Sum('cash'))['cash__sum'] + if bonus_cash == None: + bonus_cash = 0.0 + earned_cash += bonus_cash + + payments_file.write(u'\n\nWykonane:\n') + done_lemmas = real_lemmas.filter(bonus=False, + status__type__sym_name='ready').order_by('date') + for done_lemma in done_lemmas: + cash = done_lemma.cash + try: + bonus = real_lemmas.get(bonus=True, lemma__entry_obj=done_lemma.lemma.entry_obj).cash + cash += bonus + except RealizedLemma.DoesNotExist: + pass + payments_file.write(u'%s\t%.2f\t%s\n' % (done_lemma.lemma.entry_obj.name, + cash, + done_lemma.date.strftime('%Y%m%d'))) + + lex_work_stats = {'earned_cash': round(earned_cash, 2), + 'bonus_cash': round(bonus_cash, 2), + 'lemmas_to_erase_cash': round(lemmas_to_erase_cash, 2)} + return lex_work_stats diff --git a/dictionary/management/commands/create_TEI_walenty.py b/dictionary/management/commands/create_TEI_walenty.py index 3b89749..a6fe739 100644 --- a/dictionary/management/commands/create_TEI_walenty.py +++ b/dictionary/management/commands/create_TEI_walenty.py @@ -5,38 +5,56 @@ import os import tarfile from django.core.management.base import BaseCommand +from optparse import make_option from dictionary.models import Lemma, Frame_Opinion_Value, \ - get_ready_statuses + get_statuses from dictionary.teixml import createteixml, write_phrase_types_expansions_in_TEI from settings import WALENTY_PATH class Command(BaseCommand): args = '<dict dict ...>' help = 'Get Walenty in TEI format.' + option_list = BaseCommand.option_list + ( + make_option('--min_status', + action='store', + type='string', + dest='min_status_type', + default='ready', + help='Minimum lemma status.'), + make_option('--start_date', + action='store', + type='string', + dest='start_date', + default='all', + help='Status change start date (format: YYYY-MM-DD).'), + + ) def handle(self, *args, **options): try: now = datetime.datetime.now().strftime('%Y%m%d') - vocab_names = list(args) vocab_names.sort() - if vocab_names: - filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now) - else: - filename_base = '%s_%s' % ('walenty', now) + + filename_base = self.create_filename_base(vocab_names, options, now) base_path = os.path.join(WALENTY_PATH, filename_base) outpath = base_path + '.xml' - ready_statuses = get_ready_statuses() + statuses = get_statuses(options['min_status_type']) lemmas = Lemma.objects.filter(old=False) if vocab_names: lemmas = lemmas.filter(vocabulary__name__in=vocab_names) - ready_lemmas = lemmas.filter(status__in=ready_statuses).order_by('entry_obj__name') + lemmas = lemmas.filter(status__in=statuses) + if options['start_date'] != 'all': + lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date']) + lemmas = lemmas.order_by('entry_obj__name') + + self.print_statistics(lemmas) frame_opinion_values = Frame_Opinion_Value.objects.all() - createteixml(outpath, ready_lemmas, frame_opinion_values) + createteixml(outpath, lemmas, frame_opinion_values) archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz') phrase_types_expand_path = os.path.join(WALENTY_PATH, @@ -50,3 +68,49 @@ class Command(BaseCommand): archive.close() os.remove(outpath) os.remove(phrase_types_expand_path) + + def create_filename_base(self, vocab_names, options, now): + start_date = '' + if options['start_date'] != 'all': + start_date = '-' + options['start_date'].replace('-', '') + + vocab_names_str = '' + if vocab_names: + vocab_names_str = '-' + '+'.join(vocab_names) + + min_status = '' + if options['min_status_type'] != 'ready': + min_status = '-' + options['min_status_type'] + + filename_base = 'walenty%s%s%s_%s' % (min_status, vocab_names_str, + start_date, now) + return filename_base + + + def filter_lemmas_by_status_change(self, lemmas, statuses, start_date_str): + start_date = self.parse_date(start_date_str) + filtered_lemmas_pks = [] + for lemma in lemmas: + if lemma.status_history.filter(status=statuses[0], date__gte=start_date).exists(): + filtered_lemmas_pks.append(lemma.pk) + return lemmas.filter(pk__in=filtered_lemmas_pks) + + def parse_date(self, date_str): + date_parts = date_str.split('-') + year = int(date_parts[0]) + month = int(date_parts[1].lstrip('0')) + day = int(date_parts[2].lstrip('0')) + date = datetime.datetime(year, month, day, 00, 00) + return date + + def print_statistics(self, lemmas): + count = {'frames': 0, + 'arguments': 0} + for lemma in lemmas: + frames = lemma.entry_obj.actual_frames() + count['frames'] += frames.count() + for frame in frames.all(): + count['arguments'] += frame.complements.count() + print (u'Lemmas:\t%d' % lemmas.count()) + print (u'Frames:\t%d' % count['frames']) + print (u'Arguments:\t%d' % count['arguments']) diff --git a/dictionary/models.py b/dictionary/models.py index d187207..1ea63bf 100644 --- a/dictionary/models.py +++ b/dictionary/models.py @@ -100,6 +100,10 @@ def get_checked_statuses(): def get_ready_statuses(): ready_type = LemmaStatusType.objects.get(sym_name='ready') return Lemma_Status.objects.filter(type__priority__gte=ready_type.priority).distinct() + +def get_statuses(min_status_type): + min_type = LemmaStatusType.objects.get(sym_name=min_status_type) + return Lemma_Status.objects.filter(type__priority__gte=min_type.priority).distinct() class LemmaStatusType(Model):