export_as_xml.py 3.69 KB
# -*- coding:utf-8 -*-

import sys

from optparse import make_option

from lxml import etree

from django.core.management.base import BaseCommand

from webapp.models import Entry


class Command(BaseCommand):
    args = '<source source ...>'
    help = 'Get database as xml file.'

    option_list = BaseCommand.option_list + (
        make_option('-o',
                    '--output',
                    action='store',
                    dest='output',
                    type='str',
                    default='',
                    help='output path'),
        make_option('-a',
                    '--authorized',
                    action='store_true',
                    dest='authorized',
                    default=False,
                    help='add protected data'),
        make_option('-n',
                    '--normalize',
                    action='store_true',
                    dest='normalize',
                    default=False,
                    help='use normalization'),
    )

    def handle(self, *args, **options):

        if not options['output']:
            print >> sys.stderr, 'Output must be selected!'
            return

        sources = list(args)
        write_xml(sources, options['output'], options['authorized'], options['normalize'])


def write_xml(sources, outpath, authorized, normalize):
    try:
        root = etree.Element('entries')
        write_entries(sources, root, authorized, normalize)
    finally:
        with open(outpath, 'w') as output_file:
            output_file.write(etree.tostring(root, pretty_print=True, encoding='UTF-8'))


def write_entries(sources, root, authorized, normalize):
    meanings_count = 0
    expressions_count = 0

    entries = Entry.objects

    if not authorized:
        entries = entries.filter(protected=False)

    for entry in entries.order_by('name'):
        print (entry)
        for meaning in entry.meanings.order_by('id'):
            expressions = meaning.valid_expressions(authorized)
            if sources:
                expressions = expressions.filter(link__source__key__in=sources).distinct()

            if expressions.count() > 1:
                write_meaning(meaning, expressions.order_by('-is_catchword', 'text'), root, normalize)
                meanings_count += 1
                expressions_count += expressions.count()
    print 'Meanings:\t', str(meanings_count)
    print 'Expressions:\t', str(expressions_count)


def write_meaning(meaning, expressions, root, normalize):
    meaning_node = etree.SubElement(root, 'meaning')
    categories = [domain.name for domain in meaning.domains.order_by('name')]

    for expr in expressions:
        desc = etree.SubElement(meaning_node, 'desc')
        desc.attrib['catchword'] = 'true' if expr.is_catchword else 'false'
        desc.attrib['entrylink'] = expr.link.exact_link
        desc.attrib['source'] = expr.link.source.key
        desc.attrib['base'] = expr.base_text
        desc.attrib['categories'] = ';'.join(categories)
        try:
            desc.attrib['head_orth'] = expr.segments.get(is_head=True).orth
            desc.attrib['head_base'] = expr.segments.get(is_head=True).base
            desc.attrib['ctag'] = expr.segments.get(is_head=True).ctag
            desc.attrib['msd'] = expr.segments.get(is_head=True).msd
        except:
            desc.attrib['head_orth'] = ''
            desc.attrib['head_base'] = ''
            desc.attrib['ctag'] = ''
            desc.attrib['msd'] = ''
        desc.text = expr.orth_text

# if NORMALIZE:
#     orth_expressions = [expr.orth_text for expr in mng_expressions]
#     orth_expressions.extend(normalize.expressions(mng_expressions))
#     csv_file.write(u'%d\t%s\n' % (meaning.id, u'\t'.join([expr for expr in orth_expressions])))