export_as_xml.py
3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# -*- coding:utf-8 -*-
import sys
from optparse import make_option
from lxml import etree
from django.core.management.base import BaseCommand
from webapp.models import Entry
class Command(BaseCommand):
args = '<source source ...>'
help = 'Get database as xml file.'
option_list = BaseCommand.option_list + (
make_option('-o',
'--output',
action='store',
dest='output',
type='str',
default='',
help='output path'),
make_option('-a',
'--authorized',
action='store_true',
dest='authorized',
default=False,
help='add protected data'),
make_option('-n',
'--normalize',
action='store_true',
dest='normalize',
default=False,
help='use normalization'),
)
def handle(self, *args, **options):
if not options['output']:
print >> sys.stderr, 'Output must be selected!'
return
sources = list(args)
write_xml(sources, options['output'], options['authorized'], options['normalize'])
def write_xml(sources, outpath, authorized, normalize):
try:
root = etree.Element('entries')
write_entries(sources, root, authorized, normalize)
finally:
with open(outpath, 'w') as output_file:
output_file.write(etree.tostring(root, pretty_print=True, encoding='UTF-8'))
def write_entries(sources, root, authorized, normalize):
meanings_count = 0
expressions_count = 0
entries = Entry.objects
if not authorized:
entries = entries.filter(protected=False)
for entry in entries.order_by('name'):
print (entry)
for meaning in entry.meanings.order_by('id'):
expressions = meaning.valid_expressions(authorized)
if sources:
expressions = expressions.filter(link__source__key__in=sources).distinct()
if expressions.count() > 1:
write_meaning(meaning, expressions.order_by('-is_catchword', 'text'), root, normalize)
meanings_count += 1
expressions_count += expressions.count()
print 'Meanings:\t', str(meanings_count)
print 'Expressions:\t', str(expressions_count)
def write_meaning(meaning, expressions, root, normalize):
meaning_node = etree.SubElement(root, 'meaning')
categories = [domain.name for domain in meaning.domains.order_by('name')]
for expr in expressions:
desc = etree.SubElement(meaning_node, 'desc')
desc.attrib['catchword'] = 'true' if expr.is_catchword else 'false'
desc.attrib['entrylink'] = expr.link.exact_link
desc.attrib['source'] = expr.link.source.key
desc.attrib['base'] = expr.base_text
desc.attrib['categories'] = ';'.join(categories)
try:
desc.attrib['head_orth'] = expr.segments.get(is_head=True).orth
desc.attrib['head_base'] = expr.segments.get(is_head=True).base
desc.attrib['ctag'] = expr.segments.get(is_head=True).ctag
desc.attrib['msd'] = expr.segments.get(is_head=True).msd
except:
desc.attrib['head_orth'] = ''
desc.attrib['head_base'] = ''
desc.attrib['ctag'] = ''
desc.attrib['msd'] = ''
desc.text = expr.orth_text
# if NORMALIZE:
# orth_expressions = [expr.orth_text for expr in mng_expressions]
# orth_expressions.extend(normalize.expressions(mng_expressions))
# csv_file.write(u'%d\t%s\n' % (meaning.id, u'\t'.join([expr for expr in orth_expressions])))