export_as_xml.py
2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# -*- coding:utf-8 -*-
import sys
from optparse import make_option
from lxml import etree
from django.core.management.base import BaseCommand
from webapp.models import Expression, Meaning
class Command(BaseCommand):
args = '<source source ...>'
help = 'Get database as xml file.'
option_list = BaseCommand.option_list + (
make_option('--output',
action='store',
dest='output',
type='str',
default='',
help='output path'),
)
def handle(self, *args, **options):
if not options['output']:
print >> sys.stderr, "Output must be selected!"
return
sources = list(args)
write_xml(sources, options['output'])
def write_xml(sources, outpath):
try:
root = etree.Element('entries')
write_entries(sources, root)
finally:
with open(outpath, 'w') as output_file:
output_file.write(etree.tostring(root, pretty_print=True, encoding='UTF-8'))
def write_entries(sources, root):
meanings_count = 0
expressions_count = 0
meanings = Meaning.objects.all()
if sources:
meanings = meanings.filter(expressions__link__source__key__in=sources).distinct()
for meaning in meanings:
expressions = meaning.expressions.filter(main_expression=None)
if expressions.count() > 1:
write_entry(meaning, expressions, root)
meanings_count += 1
expressions_count += expressions.count()
print 'Meanings:\t', str(meanings_count)
print 'Expressions:\t', str(expressions_count)
def write_entry(meaning, expressions, root):
entry = etree.SubElement(root, 'entry')
categories = [domain.name for domain in meaning.domains.order_by('name')]
for expr in expressions:
print expr.text
desc = etree.SubElement(entry, 'desc')
desc.attrib['catchword'] = 'true' if expr.is_catchword else 'false'
desc.attrib['entrylink'] = expr.link.exact_link
desc.attrib['source'] = expr.link.source.key
desc.attrib['base'] = expr.base_text
desc.attrib['categories'] = ';'.join(categories)
try:
desc.attrib['head_orth'] = expr.segments.get(is_head=True).orth
desc.attrib['head_base'] = expr.segments.get(is_head=True).base
desc.attrib['ctag'] = expr.segments.get(is_head=True).ctag
desc.attrib['msd'] = expr.segments.get(is_head=True).msd
except:
desc.attrib['head_orth'] = ''
desc.attrib['head_base'] = ''
desc.attrib['ctag'] = ''
desc.attrib['msd'] = ''
desc.text = expr.orth_text