stats_atlas.py 5.33 KB
#!/usr/bin/env python

import sys
import os
import re
import shutil
from collections import defaultdict
from optparse import OptionParser

# Solution with no hard coded path would be welcome
sys.path.append(os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), ".."))

from dfs.database import Database
from dfs.repo import Repo
from dfs.config import Config

def fill(text):
    return fill_custom(text, 20, " ")

def fill_custom(text, l, sym):
    to_add = max(0, l - len(text))
    spaces = ""
    for i in range(to_add):
        spaces = spaces + sym
    return text + spaces

def count_rejected(db):
    rejected = 0
    for filename, file in db.file_index.iteritems():
       if db.rejected(file):
           rejected = rejected + 1
    return rejected
        

def count_file_stats(path):
    text = ""
    with open(path + ".txt", "r") as f:
        for line in f.readlines():
            if line.startswith("#### SUMMARIES ####"):
                break;
            text += line +"\n"                
                    
    return len(text)
        
def count_anno_stats(db, all, anno_per_file):
    finished = {}
    current = {}
    returned = {}
    returned_fixed = {}
    finished_by_all = 0
    fetched_by_all = 0
    chars = {}
    
    for filename, file in db.file_index.iteritems():
        if db.rejected(file):
            continue
        
        fin = True
        idx = 0
        for annotation in file.findall("ann"):
            owner = annotation.find("annName").text            
                
            if not owner in finished:
                finished[owner] = 0
            if not owner in current:
                current[owner] = 0                 
            if not owner in returned:
                returned[owner] = 0   
            if not owner in returned_fixed:
                returned_fixed[owner] = 0     
            
            if db.owned(annotation):                
                current[owner] = current[owner] + 1
            elif db.returned(annotation):
                returned[owner] = returned[owner] + 1
                if db.fixed(annotation):
                    returned_fixed[owner] = returned_fixed[owner] + 1                
            elif db.finished(annotation):
                finished[owner] = finished[owner] + 1
            else:
                print "Strange state! ", annotation.text
                
            if not db.finished(annotation):
                fin = False
            else:
                path = wc.upload_path(filename, idx)
                ccnt = count_file_stats(path)
                if owner not in chars:
                    chars[owner] = 0
                chars[owner] = chars[owner] + ccnt                
                
        if len(file.findall("ann")) == anno_per_file:
            if fin:
                finished_by_all = finished_by_all + 1
            else:
                fetched_by_all = fetched_by_all + 1  
                
        idx += 1
        
    
    print fill("Login"), fill("Akt. pobr."), fill("Zwrocone (Napr.)"), fill("Zakonczone"), fill("Zak. znaki"), fill("Do pobrania")
    print fill_custom("", 120, "-")
    for user in set(current.keys()) | set(finished.keys()) | set(returned_fixed.keys()) | set(returned.keys()):
        print fill(user), fill(str(current[user])), fill(str(returned[user]) + " (" + str(returned_fixed[user]) + ")"), fill(str(finished[user])),
        print fill(str(chars[user])),
        priority, normal = db.for_annotation(user)
        print fill(str(len(normal)))
                
    print fill_custom("", 120, "-")
    all_curr = sum([i for i in current.values()])
    all_ret = sum([i for i in returned.values()])
    all_ret_fixed = sum([i for i in returned_fixed.values()])    
    all_fin = sum([i for i in finished.values()])
    all_chars = sum([i for i in chars.values()])
    print fill("Suma"), fill(str(all_curr)), fill(str(all_ret) + " (" + str(all_ret_fixed) + ")"), fill(str(all_fin)), fill(str(all_chars)), fill(str(all - all_curr - all_fin - all_ret))
    print ""
    print "Pobranych (nie zakonczonych) jednoczesnie przez", anno_per_file, "tekstow:", fetched_by_all
    print "Zakonczonych jednoczesnie przez", anno_per_file, "tekstow:", finished_by_all
    print "Pozostalo do pobrania naprawionych:", all_ret_fixed
    print "Pozostalo do obejrzenia zwroconych:", all_ret - all_ret_fixed
    print 
    
    return finished_by_all   
    
if __name__ == "__main__":
    optparser = OptionParser(usage="""usage: %prog CONFIG""")
    (options, args) = optparser.parse_args()
    if len(args) < 1:
        optparser.print_help()
        sys.exit(0)
        
    conf_path = args[0]
    cfg = Config(conf_path)
    wc = Repo(cfg["svn.repository"], cfg["svn.login"], cfg["svn.passwd"])
    db = Database(wc.db_path(), int(cfg["anno_per_file"]))    
    anno_per_file = int(cfg["anno_per_file"])

    rejected = count_rejected(db)  
    print    
    print fill_custom("", 100, "-")
    all = len(db.file_index.keys())
    print "Wszystkich tekstow w bazie: " + str(all)    
    print "Odrzuconych: " + str(rejected)
    print "Wszystkich tekstow w bazie bez odrzuconych: " + str(all - rejected)    
    all = all * anno_per_file - rejected
    print "Anotacji do wykonania: " + str(all)
        
    print fill_custom("", 100, "-")
    print 
    
    print fill_custom("----Anotacja", 100, "-")
    anno_count = count_anno_stats(db, all, anno_per_file)