create_walenty.py 5.3 KB
#-*- coding:utf-8 -*-

#Copyright (c) 2013, Bartłomiej Nitoń
#All rights reserved.

#Redistribution and use in source and binary forms, with or without modification, are permitted provided 
#that the following conditions are met:

#    Redistributions of source code must retain the above copyright notice, this list of conditions and 
#    the following disclaimer.
#    Redistributions in binary form must reproduce the above copyright notice, this list of conditions 
#    and the following disclaimer in the documentation and/or other materials provided with the distribution.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED 
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
# POSSIBILITY OF SUCH DAMAGE.

import datetime
import os
import tarfile

from django.core.management.base import BaseCommand

from accounts.models import User
from dictionary.ajax_vocabulary_management import create_text_walenty
from dictionary.ajax_argument_realizations import create_realizations_file
from dictionary.models import Frame_Opinion, Lemma, Lemma_Status, \
                              LemmaStatusType, Vocabulary, POS
from settings import WALENTY_PATH

class Command(BaseCommand):
    args = 'none'
    help = 'Script for creating Walenty vocabulary.'

    def handle(self, *args, **options):
        filename_base = '%s_%s' % ('walenty', datetime.datetime.now().strftime('%Y%m%d'))
        base_path = os.path.join(WALENTY_PATH, filename_base)
        realizations_path = os.path.join(WALENTY_PATH, 
                                     '%s_%s.txt' % ('phrase_types_expand', datetime.datetime.now().strftime('%Y%m%d')))
        create_realizations_file(realizations_path)
        try:
            archive = tarfile.open(base_path + '.tar.gz', 'w:gz')
            os.chdir(WALENTY_PATH)
            archive.add(os.path.basename(realizations_path))
            for pos in POS.objects.exclude(tag=u'unk').order_by('priority'):
                create_pos_archive(archive, pos, filename_base)
        finally:
            archive.close()
            os.remove(realizations_path)
            
def create_pos_archive(archive, pos, filename_base):
    try:
        all_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'all'))
        checked_path = os.path.join(WALENTY_PATH, u'%s_%ss_%s' % (filename_base, pos.tag, 'verified'))
        checked_statuses = get_checked_statuses()
        ready_statuses = get_ready_statuses()

        lemmas = Lemma.objects.filter(old=False, entry_obj__pos=pos).order_by('entry')
        checked_lemmas = lemmas.filter(status__in=checked_statuses)
        ready_lemmas = lemmas.filter(status__in=ready_statuses)
        
        walenty_path_ready = create_text_walenty(file_name=all_path,
                                                 lemmas=ready_lemmas, 
                                                 vocabularies=Vocabulary.objects.none(), 
                                                 frame_opinions=Frame_Opinion.objects.none(),
                                                 lemma_statuses=ready_statuses, 
                                                 owners=User.objects.none(), 
                                                 poss=POS.objects.filter(pk=pos.pk), 
                                                 add_frame_opinions=True)
        walenty_path_checked = create_text_walenty(file_name=checked_path,
                                                   lemmas=checked_lemmas, 
                                                   vocabularies=Vocabulary.objects.none(), 
                                                   frame_opinions=Frame_Opinion.objects.none(),
                                                   lemma_statuses=checked_statuses, 
                                                   owners=User.objects.none(), 
                                                   poss=POS.objects.filter(pk=pos.pk), 
                                                   add_frame_opinions=True)
        all_filename = os.path.basename(walenty_path_ready)
        checked_filename = os.path.basename(walenty_path_checked)
        archive.add(name=all_filename, arcname=os.path.join(u'%ss' % pos.tag, all_filename))
        archive.add(name=checked_filename, arcname=os.path.join(u'%ss' % pos.tag, checked_filename))
    finally:
        os.remove(walenty_path_checked)
        os.remove(walenty_path_ready)

def get_checked_statuses():
    checked_type = LemmaStatusType.objects.get(sym_name='checked')
    return Lemma_Status.objects.filter(type__priority__gte=checked_type.priority).distinct()
      
def get_ready_statuses():
    ready_type = LemmaStatusType.objects.get(sym_name='ready')
    return Lemma_Status.objects.filter(type__priority__gte=ready_type.priority).distinct()