get_examples.py 3.65 KB
#-*- coding:utf-8 -*-

import codecs
import datetime
import os

from django.core.management.base import BaseCommand

from dictionary.models import Lemma, get_ready_statuses
from settings import PROJECT_PATH

BASE_PATH = os.path.join(PROJECT_PATH, 'data')

LABELS = (u'hasło', 
          u'status hasła',
          u'identyfikator schematu',
          u'schemat', 
          u'opinia o schemacie',
          u'przykład',
          u'opinia o przykładzie',
          u'zródło przykładu',
          u'wybór typów fraz')

class Command(BaseCommand):
    help = 'Get pinned examples from Slowal.'

    def handle(self, **options):
        get_examples()
        
def get_examples():
    ready_statuses = get_ready_statuses()
    write_detailed_examples(ready_statuses)
    # write_examples(ready_statuses)
    
def write_detailed_examples(statuses):
    try:
        lemmas = Lemma.objects.filter(old=False)
        lemmas = lemmas.filter(status__in=statuses)
        now = datetime.datetime.now().strftime('%Y%m%d')
        examples_file = codecs.open(os.path.join(BASE_PATH, 'detailed_examples_%s.csv' % now), 'wt', 'utf-8') 
        examples_file.write(u'%s\n' % u'\t'.join(LABELS)) 
        for lemma in lemmas.order_by('entry_obj__name'):
            print lemma
            lemma_entry = lemma.entry_obj.name
            lemma_status = lemma.status.status
            for frame in lemma.frames.order_by('text_rep').all():
                if not lemma.phraseology_ready() and frame.phraseologic:
                    continue
                frame_opinion = lemma.frame_opinions.filter(frame=frame).all()[0].value
                for example in lemma.nkjp_examples.filter(frame=frame):
                    sentence = example.sentence.replace('\n', ' ').replace('\r', '').replace('\t', ' ') 
                    arguments_selection = u'%s' % u' + '.join([u'%s' % selection.__unicode__() for selection in example.arguments.all()])
                    examples_file.write(u'%s\t%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s\n' % (lemma_entry,
                                                                                   lemma_status,
                                                                                   frame.id,
                                                                                   frame.get_position_spaced_text_rep(),
                                                                                   frame_opinion,
                                                                                   sentence,
                                                                                   example.opinion.opinion,
                                                                                   example.source.source,
                                                                                   arguments_selection))
    finally:
        examples_file.close()

def write_examples(statuses):
    try:       
        examples_file = codecs.open(os.path.join(BASE_PATH, 
                                                 'examples_gotowe_plus.txt'), 'wt', 'utf-8')  
        for lemma in Lemma.objects.filter(old=False).filter(status__in=statuses).order_by('entry').all():
            print lemma
            examples_file.write(lemma.entry+'\n')
            for frame in lemma.frames.order_by('text_rep').all():
                if lemma.frame_opinions.get(frame=frame).value.value != u'zła':
                    examples_file.write('\t%s\n' % frame.text_rep)
                    for example in lemma.nkjp_examples.filter(frame=frame):
                        examples_file.write('\t\t--> %s\n' % example.sentence)
            examples_file.write('\n\n')
    finally:
        examples_file.close()