Added minimum status and start date options to create_TEI_walenty script.

Bartłomiej Nitoń
1 parent 76f54edc
Showing 3 changed files with 389 additions and 9 deletions
accounts/management/commands/get_payments_data.py
dictionary/management/commands/create_TEI_walenty.py
dictionary/models.py
+# -*- coding:utf-8 -*-
+
+import codecs
+import datetime
+
+from django.contrib.auth.models import User
+from django.core.management.base import BaseCommand
+from django.db.models import Sum
+
+from accounts.models import RealizedLemma, RealizedPhraseology, RealizedPhraseologyBinding, \
+    RealizedSemantics
+from dictionary.ajax_user_stats import get_used_bindings
+from dictionary.models import Lemma
+
+
+USERNAME = 'JakubS'
+FUNCTION = 'Leksykograf'
+POS = 'noun'
+STARTDATE = datetime.datetime(2011, 1, 1, 00, 00)
+ENDDATE = (datetime.datetime.now() -
+           datetime.timedelta(days=1)).replace(hour=23, minute=59, second=59)
+
+
+class Command(BaseCommand):
+    args = 'none'
+
+    def handle(self, **options):
+        get_payments_data(FUNCTION)
+
+
+def get_payments_data(function):
+    start = STARTDATE.strftime('%Y%m%d')
+    end = ENDDATE.strftime('%Y%m%d')
+    payments_path = 'data/work_%s_%s_%s-%s.csv' % (USERNAME, FUNCTION, start, end)
+    payments_file = codecs.open(payments_path, 'wt', 'utf-8')
+    user = User.objects.get(username=USERNAME)
+
+    if function == 'Semantyk':
+        work_stats = write_semantic_stats(payments_file, user, POS)
+    elif function == 'Superfrazeolog':
+        work_stats = write_superphraseologic_stats(payments_file, user, POS)
+    elif function == 'Frazeolog':
+        work_stats = write_phraseologic_stats(payments_file, user, POS)
+    elif function == 'Leksykograf':
+        work_stats = write_lexicographic_stats(payments_file, user, POS)
+    elif function == 'Superleksykograf':
+        work_stats = write_superlexicographic_stats(payments_file, user, POS)
+    total_earned_cash = work_stats['earned_cash']
+    if total_earned_cash > 0.0:
+        payments_file.write(u'\n%s\t%.2f\n' % (user.username,
+                                               total_earned_cash))
+    payments_file.close()
+
+
+def write_superlexicographic_stats(payments_file, user, pos):
+    real_lemmas = RealizedLemma.objects.filter(user_stats__user=user,
+                                               lemma__entry_obj__pos__tag=pos,
+                                               date__gte=STARTDATE,
+                                               date__lte=ENDDATE,
+                                               status__type__sym_name='checked',
+                                               bonus=False)
+
+    earned_cash = real_lemmas.aggregate(Sum('cash'))['cash__sum']
+    if earned_cash == None:
+        earned_cash = 0.0
+
+    payments_file.write(u'Sprawdzone:\n')
+    for done_lemma in real_lemmas.order_by('date'):
+        payments_file.write(u'%s\t%.2f\t%s\n' % (done_lemma.lemma.entry_obj.name,
+                                                 done_lemma.cash,
+                                                 done_lemma.date.strftime('%Y%m%d')))
+
+    lex_work_stats = {'earned_cash': round(earned_cash, 2)}
+    return lex_work_stats
+
+
+def write_semantic_stats(payments_file, user, pos):
+
+    real_semantics = RealizedSemantics.objects.filter(user_stats__user=user,
+                                                      date__gte=STARTDATE,
+                                                      date__lte=ENDDATE,
+                                                      entry__pos__tag=pos)
+
+    earned_cash = real_semantics.filter(user_stats__user=user).aggregate(Sum('cash'))['cash__sum']
+    if earned_cash == None:
+        earned_cash = 0.0
+
+    bonus_cash = real_semantics.filter(user_stats__user=user,
+                                                  bonus=True).aggregate(Sum('cash'))['cash__sum']
+    if bonus_cash == None:
+        bonus_cash = 0.0
+    prop_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('prop_frames'))[
+        'prop_frames__sum']
+    if prop_frames == None:
+        prop_frames = 0
+    part_prop_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('part_prop_frames'))[
+        'part_prop_frames__sum']
+    if part_prop_frames == None:
+        part_prop_frames = 0
+    wrong_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('wrong_frames'))[
+        'wrong_frames__sum']
+    if wrong_frames == None:
+        wrong_frames = 0
+    corr_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('corr_frames'))[
+        'corr_frames__sum']
+    if corr_frames == None:
+        corr_frames = 0
+    part_corr_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('part_corr_frames'))[
+        'part_corr_frames__sum']
+    if part_corr_frames == None:
+        part_corr_frames = 0
+    ncorr_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('ncorr_frames'))[
+        'ncorr_frames__sum']
+    if ncorr_frames == None:
+        ncorr_frames = 0
+    made_frames = real_semantics.filter(user_stats__user=user).aggregate(Sum('made_frames'))[
+        'made_frames__sum']
+    if made_frames == None:
+        made_frames = 0
+    added_connections = real_semantics.filter(user_stats__user=user).aggregate(Sum('added_connections'))[
+        'added_connections__sum']
+    if added_connections == None:
+        added_connections = 0
+    efficacy = 0.0
+    if prop_frames + wrong_frames > 0:
+        efficacy = float(prop_frames) / float(prop_frames + wrong_frames) * 100.0
+
+    payments_file.write(u'Wykonane:\n')
+    done_semantics = real_semantics.filter(bonus=False).order_by('date')
+    for done_sem in done_semantics:
+        done_cash = done_sem.cash
+        try:
+            done_bonus = real_semantics.get(bonus=True, entry=done_sem.entry).cash
+            done_cash += done_bonus
+        except RealizedSemantics.DoesNotExist:
+            pass
+        payments_file.write(u'%s\t%.2f\t%s\n' % (done_sem.entry.name,
+                                                 done_cash,
+                                                 done_sem.date.strftime('%Y%m%d')))
+
+    sem_work_stats = {'earned_cash': round(earned_cash, 2),
+                      'bonus_cash': round(bonus_cash, 2),
+                      'prop_frames': prop_frames,
+                      'part_prop_frames': part_prop_frames,
+                      'wrong_frames': wrong_frames,
+                      'corr_frames': corr_frames,
+                      'part_corr_frames': part_corr_frames,
+                      'checked_frames': ncorr_frames + corr_frames + part_corr_frames,
+                      'made_frames': made_frames,
+                      'efficacy': round(efficacy, 2),
+                      'added_connections': added_connections}
+    return sem_work_stats
+
+
+def write_superphraseologic_stats(payments_file, user, pos):
+    added_bindings = RealizedPhraseologyBinding.objects.filter(user_stats__user=user,
+                                                               date__gte=STARTDATE,
+                                                               date__lte=ENDDATE)
+    used_bindings = get_used_bindings(added_bindings)
+
+    checked_phraseology = RealizedPhraseology.objects.filter(user_stats__user=user,
+                                                             date__gte=STARTDATE,
+                                                             date__lte=ENDDATE,
+                                                             bonus=False,
+                                                             status__type__sym_name='checked_f',
+                                                             lemma__entry_obj__pos__tag=pos)
+
+    earned_cash_frames = checked_phraseology.aggregate(Sum('cash'))['cash__sum']
+    if earned_cash_frames == None:
+        earned_cash_frames = 0.0
+    earned_cash_bindings = used_bindings.aggregate(Sum('cash'))['cash__sum']
+    if earned_cash_bindings == None:
+        earned_cash_bindings = 0.0
+    earned_cash = earned_cash_frames + earned_cash_bindings
+
+    phraseologic_empty_frame_value = 1.0
+    empty_value = 0.0
+
+    payments_file.write(u'Sprawdzone:\n')
+    checked_phraseology = checked_phraseology.order_by('date')
+    for checked_phr in checked_phraseology:
+        cash = checked_phr.cash
+        if cash == 0.0:
+            cash = phraseologic_empty_frame_value
+            empty_value += phraseologic_empty_frame_value
+        payments_file.write(u'%s\t%.2f\t%s\n' % (checked_phr.lemma.entry_obj.name,
+                                                 cash,
+                                                 checked_phr.date.strftime('%Y%m%d')))
+    earned_cash += empty_value
+
+    payments_file.write(u'\n\nDodane powiazania frazeologiczne:\n')
+    for binding in used_bindings.order_by('date'):
+        payments_file.write(u'%s\t%.2f\t%s\n' % (binding.binded_entry.name,
+                                                 binding.cash,
+                                                 binding.date.strftime('%Y%m%d')))
+
+
+    phraseology_work_stats = {'earned_cash': round(earned_cash, 2),
+                              'added_bindings': added_bindings.count(),
+                              'used_bindings': used_bindings.count()}
+    return phraseology_work_stats
+
+
+def write_phraseologic_stats(payments_file, user, pos):
+    added_bindings = RealizedPhraseologyBinding.objects.filter(user_stats__user=user,
+                                                               date__gte=STARTDATE,
+                                                               date__lte=ENDDATE)
+    used_bindings = get_used_bindings(added_bindings)
+
+    checked_and_done_phraseology = RealizedPhraseology.objects.filter(user_stats__user=user,
+                                                                      date__gte=STARTDATE,
+                                                                      date__lte=ENDDATE,
+                                                                      lemma__entry_obj__pos__tag=pos)
+
+    done_phraseology = checked_and_done_phraseology.filter(status__type__sym_name='ready_f',
+                                                           bonus=False)
+
+    earned_cash_frames = done_phraseology.aggregate(Sum('cash'))['cash__sum']
+    if earned_cash_frames == None:
+        earned_cash_frames = 0.0
+    earned_cash_bindings = used_bindings.aggregate(Sum('cash'))['cash__sum']
+    if earned_cash_bindings == None:
+        earned_cash_bindings = 0.0
+    earned_cash = earned_cash_frames + earned_cash_bindings
+
+    bonus_cash = checked_and_done_phraseology.filter(bonus=True).aggregate(Sum('cash'))['cash__sum']
+    if bonus_cash == None:
+        bonus_cash = 0.0
+    earned_cash += bonus_cash
+
+    phraseologic_empty_frame_value = 1.0
+    empty_value = 0.0
+
+    payments_file.write(u'Wykonane:\n')
+    for done_phr in done_phraseology.order_by('date'):
+        cash = done_phr.cash
+        if cash == 0.0:
+            cash = phraseologic_empty_frame_value
+            empty_value += phraseologic_empty_frame_value
+        try:
+            done_bonus = checked_and_done_phraseology.get(bonus=True, lemma__entry_obj=done_phr.lemma.entry_obj).cash
+            cash += done_bonus
+        except RealizedPhraseology.DoesNotExist:
+            pass
+        payments_file.write(u'%s\t%.2f\t%s\n' % (done_phr.lemma.entry_obj.name,
+                                                 cash,
+                                                 done_phr.date.strftime('%Y%m%d')))
+
+    payments_file.write(u'\n\nDodane powiazania frazeologiczne:\n')
+    for binding in used_bindings.order_by('date'):
+        payments_file.write(u'%s\t%.2f\t%s\n' % (binding.binded_entry.name,
+                                                 binding.cash,
+                                                 binding.date.strftime('%Y%m%d')))
+
+    earned_cash += empty_value
+
+    phraseology_work_stats = {'earned_cash': round(earned_cash, 2),
+                              'added_bindings': added_bindings.count(),
+                              'used_bindings': used_bindings.count(),}
+    return phraseology_work_stats
+
+
+def write_lexicographic_stats(payments_file, user, pos):
+
+    real_lemmas = RealizedLemma.objects.filter(user_stats__user=user,
+                                               lemma__entry_obj__pos__tag=pos,
+                                               date__gte=STARTDATE,
+                                               date__lte=ENDDATE)
+
+    earned_cash = real_lemmas.filter(status__type__sym_name='ready').aggregate(Sum('cash'))['cash__sum']
+    if earned_cash == None:
+        earned_cash = 0.0
+
+    lemmas_to_erase_cash = 0.0
+    lemmas_marked_to_erase = Lemma.objects.filter(owner=user,
+                                                  old=False,
+                                                  status__type__sym_name='erase',
+                                                  entry_obj__pos__tag=pos)
+
+    payments_file.write(u'Zaznaczone do usunięcia:\n')
+    for lemma in lemmas_marked_to_erase:
+        erase_date = lemma.status_history.order_by('-date')[0].date
+        if erase_date >= STARTDATE and erase_date <= ENDDATE:
+            payments_file.write(u'%s\t%.2f\t%s\n' % (lemma.entry_obj.name,
+                                                     1.0,
+                                                     erase_date.strftime('%Y%m%d')))
+            lemmas_to_erase_cash += 1.0
+    earned_cash += lemmas_to_erase_cash
+
+    bonus_cash = real_lemmas.filter(bonus=True).aggregate(Sum('cash'))['cash__sum']
+    if bonus_cash == None:
+        bonus_cash = 0.0
+    earned_cash += bonus_cash
+
+    payments_file.write(u'\n\nWykonane:\n')
+    done_lemmas = real_lemmas.filter(bonus=False,
+                                     status__type__sym_name='ready').order_by('date')
+    for done_lemma in done_lemmas:
+        cash = done_lemma.cash
+        try:
+            bonus = real_lemmas.get(bonus=True, lemma__entry_obj=done_lemma.lemma.entry_obj).cash
+            cash += bonus
+        except RealizedLemma.DoesNotExist:
+            pass
+        payments_file.write(u'%s\t%.2f\t%s\n' % (done_lemma.lemma.entry_obj.name,
+                                                 cash,
+                                                 done_lemma.date.strftime('%Y%m%d')))
+
+    lex_work_stats = {'earned_cash': round(earned_cash, 2),
+                      'bonus_cash': round(bonus_cash, 2),
+                      'lemmas_to_erase_cash': round(lemmas_to_erase_cash, 2)}
+    return lex_work_stats
@@ -5,38 +5,56 @@ import os
 import tarfile
  
 from django.core.management.base import BaseCommand
+from optparse import make_option
  
 from dictionary.models import Lemma, Frame_Opinion_Value, \
-                              get_ready_statuses
+                              get_statuses
 from dictionary.teixml import createteixml, write_phrase_types_expansions_in_TEI
 from settings import WALENTY_PATH
  
 class Command(BaseCommand):
     args = '<dict dict ...>'
     help = 'Get Walenty in TEI format.'
+    option_list = BaseCommand.option_list + (
+        make_option('--min_status',
+                    action='store',
+                    type='string',
+                    dest='min_status_type',
+                    default='ready',
+                    help='Minimum lemma status.'),
+        make_option('--start_date',
+                    action='store',
+                    type='string',
+                    dest='start_date',
+                    default='all',
+                    help='Status change start date (format: YYYY-MM-DD).'),
+
+    )
  
     def handle(self, *args, **options):
         try:
             now = datetime.datetime.now().strftime('%Y%m%d')
-
             vocab_names = list(args)
             vocab_names.sort()
-            if vocab_names:
-                filename_base = '%s_%s_%s' % ('walenty', '+'.join(vocab_names), now)
-            else:
-                filename_base = '%s_%s' % ('walenty', now)
+
+            filename_base = self.create_filename_base(vocab_names, options, now)
  
             base_path = os.path.join(WALENTY_PATH, filename_base)
             outpath = base_path + '.xml'
-            ready_statuses = get_ready_statuses()
+            statuses = get_statuses(options['min_status_type'])
  
             lemmas = Lemma.objects.filter(old=False)
             if vocab_names:
                 lemmas = lemmas.filter(vocabulary__name__in=vocab_names)
-            ready_lemmas = lemmas.filter(status__in=ready_statuses).order_by('entry_obj__name')
+            lemmas = lemmas.filter(status__in=statuses)
+            if options['start_date'] != 'all':
+                lemmas = self.filter_lemmas_by_status_change(lemmas, statuses, options['start_date'])
+            lemmas = lemmas.order_by('entry_obj__name')
+
+            self.print_statistics(lemmas)
  
             frame_opinion_values = Frame_Opinion_Value.objects.all()
-            createteixml(outpath, ready_lemmas, frame_opinion_values)
+            createteixml(outpath, lemmas, frame_opinion_values)
             archive = tarfile.open(base_path + '-TEI.tar.gz', 'w:gz')
  
             phrase_types_expand_path = os.path.join(WALENTY_PATH, 
@@ -50,3 +68,49 @@ class Command(BaseCommand):
             archive.close()
             os.remove(outpath)
             os.remove(phrase_types_expand_path)
+
+    def create_filename_base(self, vocab_names, options, now):
+        start_date = ''
+        if options['start_date'] != 'all':
+            start_date = '-' + options['start_date'].replace('-', '')
+
+        vocab_names_str = ''
+        if vocab_names:
+            vocab_names_str = '-' + '+'.join(vocab_names)
+
+        min_status = ''
+        if options['min_status_type'] != 'ready':
+            min_status = '-' + options['min_status_type']
+
+        filename_base = 'walenty%s%s%s_%s' % (min_status, vocab_names_str,
+                                              start_date, now)
+        return filename_base
+
+
+    def filter_lemmas_by_status_change(self, lemmas, statuses, start_date_str):
+        start_date = self.parse_date(start_date_str)
+        filtered_lemmas_pks = []
+        for lemma in lemmas:
+            if lemma.status_history.filter(status=statuses[0], date__gte=start_date).exists():
+                filtered_lemmas_pks.append(lemma.pk)
+        return lemmas.filter(pk__in=filtered_lemmas_pks)
+
+    def parse_date(self, date_str):
+        date_parts = date_str.split('-')
+        year = int(date_parts[0])
+        month = int(date_parts[1].lstrip('0'))
+        day = int(date_parts[2].lstrip('0'))
+        date = datetime.datetime(year, month, day, 00, 00)
+        return date
+
+    def print_statistics(self, lemmas):
+        count = {'frames': 0,
+                 'arguments': 0}
+        for lemma in lemmas:
+            frames = lemma.entry_obj.actual_frames()
+            count['frames'] += frames.count()
+            for frame in frames.all():
+                count['arguments'] += frame.complements.count()
+        print (u'Lemmas:\t%d' % lemmas.count())
+        print (u'Frames:\t%d' % count['frames'])
+        print (u'Arguments:\t%d' % count['arguments'])
@@ -100,6 +100,10 @@ def get_checked_statuses():
 def get_ready_statuses():
     ready_type = LemmaStatusType.objects.get(sym_name='ready')
     return Lemma_Status.objects.filter(type__priority__gte=ready_type.priority).distinct()
+
+def get_statuses(min_status_type):
+    min_type = LemmaStatusType.objects.get(sym_name=min_status_type)
+    return Lemma_Status.objects.filter(type__priority__gte=min_type.priority).distinct()
  
  
 class LemmaStatusType(Model):