export_as_csv.py
1.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# -*- coding:utf-8 -*-
import codecs
import datetime
import os
from django.core.management.base import BaseCommand
from normalization import normalize
from settings import PROJECT_PATH
from webapp.models import Entry
AUTHORIZED = False
FULL_EXPORT = True
NORMALIZE = True
SOURCES = ['sjp']
FILE_NAME_PARTS = ['periphraser']
if AUTHORIZED:
FILE_NAME_PARTS.append('auth')
if NORMALIZE:
FILE_NAME_PARTS.append('normalized')
if not FULL_EXPORT:
FILE_NAME_PARTS.append('_'.join(SOURCES))
FILE_NAME_PARTS.append(datetime.datetime.now().strftime('%Y%m%d'))
CSV_PATH = os.path.join(PROJECT_PATH, 'data', '%s.csv' % '-'.join(FILE_NAME_PARTS))
class Command(BaseCommand):
help = 'Get database as csv.'
def handle(self, *args, **options):
export_csv()
def export_csv():
try:
csv_file = codecs.open(CSV_PATH, 'wt', 'utf-8')
entries = Entry.objects
if not AUTHORIZED:
entries = entries.filter(protected=False)
for entry in entries.order_by('name'):
print entry
for meaning in entry.meanings.order_by('id'):
mng_expressions = meaning.valid_expressions(AUTHORIZED)
if not FULL_EXPORT:
mng_expressions = mng_expressions.filter(link__source__key__in=SOURCES).distinct()
if mng_expressions.count() < 2:
continue
mng_expressions = mng_expressions.order_by('-is_catchword', 'text')
if NORMALIZE:
orth_expressions = [expr.orth_text for expr in mng_expressions]
orth_expressions.extend(normalize.expressions(mng_expressions))
csv_file.write(u'%s\n' % u'\t'.join([expr for expr in orth_expressions]))
else:
csv_file.write(u'%s\n' % u'\t'.join([expr.orth_text for expr in mng_expressions]))
finally:
csv_file.close()