export_as_csv.py 1.9 KB
# -*- coding:utf-8 -*-

import codecs
import datetime
import os

from django.core.management.base import BaseCommand

from normalization import normalize
from settings import PROJECT_PATH
from webapp.models import Entry

AUTHORIZED = False
FULL_EXPORT = True
NORMALIZE = True
SOURCES = ['sjp']

FILE_NAME_PARTS = ['periphraser']

if AUTHORIZED:
    FILE_NAME_PARTS.append('auth')

if NORMALIZE:
    FILE_NAME_PARTS.append('normalized')

if not FULL_EXPORT:
    FILE_NAME_PARTS.append('_'.join(SOURCES))

FILE_NAME_PARTS.append(datetime.datetime.now().strftime('%Y%m%d'))

CSV_PATH = os.path.join(PROJECT_PATH, 'data', '%s.csv' % '-'.join(FILE_NAME_PARTS))


class Command(BaseCommand):
    help = 'Get database as csv.'

    def handle(self, *args, **options):
        export_csv()


def export_csv():
    try:
        csv_file = codecs.open(CSV_PATH, 'wt', 'utf-8')
        entries = Entry.objects

        if not AUTHORIZED:
            entries = entries.filter(protected=False)

        for entry in entries.order_by('name'):
            print entry
            for meaning in entry.meanings.order_by('id'):
                mng_expressions = meaning.valid_expressions(AUTHORIZED)

                if not FULL_EXPORT:
                    mng_expressions = mng_expressions.filter(link__source__key__in=SOURCES).distinct()

                if mng_expressions.count() < 2:
                    continue

                mng_expressions = mng_expressions.order_by('-is_catchword', 'text')
                if NORMALIZE:
                    orth_expressions = [expr.orth_text for expr in mng_expressions]
                    orth_expressions.extend(normalize.expressions(mng_expressions))
                    csv_file.write(u'%s\n' % u'\t'.join([expr for expr in orth_expressions]))
                else:
                    csv_file.write(u'%s\n' % u'\t'.join([expr.orth_text for expr in mng_expressions]))
    finally:
        csv_file.close()