add_files.py 3.07 KB
#!/usr/bin/env python

import sys
import os
import re
import shutil
from collections import defaultdict
from optparse import OptionParser

# Solution with no hard coded path would be welcome
sys.path.append(os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), ".."))

from dfs.database import Database
from dfs.repo import Repo
from dfs.config import Config
            
def add_file(wc, db, file_id, src_paths, anno_per_file):
    try:
        db.add(file_id)
    except Exception as ex:
        print str(ex)
        return False
    
    print "Adding file "+file_id+":"
    added = []    
    for src_path in src_paths:
        try:
            wc.add(src_path)
            print "\t - "+src_path
            added.append(src_path)
        except Exception as ex:
            print "\t error: "+str(ex)
            for a in added:
                print "\t\t removing already added "+a
                for b in wc.remove(added, anno_per_file):
                    print "\t\t\t "+b    
                
            db.remove(file_id)
            return False
        
    return True            
                        
def match_ext(path, exts):
    for ext in exts:
        if path.endswith(ext):
            return ext

def path_id(path, ext):
    _, filename = os.path.split(path)
    return re.sub("%s$" % ext, "", filename)

def group_paths(paths, exts):
    result = defaultdict(list)            
    for path in paths:
        ext = match_ext(path, exts)
        if (ext != None):
            file_id = path_id(path, ext)
            result[file_id].append(path)
    return result

def get_rec_paths(paths):
    result = []
    for path in paths:
        if os.path.isdir(path):
            for dirname, dirnames, filenames in os.walk(path):                
                for filename in filenames:
                    result.append(os.path.join(dirname, filename))                
        else:
            result.append(path)
    return result

if __name__ == "__main__":
    optparser = OptionParser(usage="""usage: %prog [options] CONFIG FILES""")
    optparser.add_option("--extensions", dest="exts", default=".mmax,_mentions.xml,_words.xml",
            help="List of comma-separated file extensions")
    (options, args) = optparser.parse_args()
    if len(args) < 2:
        optparser.print_help()
        sys.exit(0)

    conf_path = args[0]    
    cfg = Config(conf_path)
    anno_per_file = int(cfg["anno_per_file"])
    paths = get_rec_paths(args[1:])
    files = group_paths(paths, options.exts.split(","))
    wc = Repo(cfg["svn.repository"], cfg["svn.login"], cfg["svn.passwd"])
    db = Database(wc.db_path(), anno_per_file)
    
    success = []
    fail = []
    for file_id, paths in files.iteritems():
        if add_file(wc, db, file_id, paths, anno_per_file):
            success.append(file_id)
        else:
            fail.append(file_id)     
    
    db.save()           
    wc.commit("Added files: "+str(success))        
    
    print ""
    if len(success) > 0:
        print "Added files: "+str(success)
    if len(fail) > 0:
        print "Failed to add files: "+str(fail)