export_as_xml.py 2.66 KB
# -*- coding:utf-8 -*-

import sys

from optparse import make_option

from lxml import etree

from django.core.management.base import BaseCommand

from webapp.models import Expression, Meaning


class Command(BaseCommand):
    args = '<source source ...>'
    help = 'Get database as xml file.'

    option_list = BaseCommand.option_list + (
        make_option('--output',
                    action='store',
                    dest='output',
                    type='str',
                    default='',
                    help='output path'),
    )

    def handle(self, *args, **options):

        if not options['output']:
            print >> sys.stderr, "Output must be selected!"
            return

        sources = list(args)

        write_xml(sources, options['output'])


def write_xml(sources, outpath):
    try:
        root = etree.Element('entries')
        write_entries(sources, root)
    finally:
        with open(outpath, 'w') as output_file:
            output_file.write(etree.tostring(root, pretty_print=True, encoding='UTF-8'))


def write_entries(sources, root):
    meanings_count = 0
    expressions_count = 0
    meanings = Meaning.objects.all()
    if sources:
        meanings = meanings.filter(expressions__link__source__key__in=sources).distinct()
    for meaning in meanings:
        expressions = meaning.expressions.filter(main_expression=None)
        if expressions.count() > 1:
            write_entry(meaning, expressions, root)
            meanings_count += 1
            expressions_count += expressions.count()
    print 'Meanings:\t', str(meanings_count)
    print 'Expressions:\t', str(expressions_count)


def write_entry(meaning, expressions, root):
    entry = etree.SubElement(root, 'entry')
    categories = [domain.name for domain in meaning.domains.order_by('name')]

    for expr in expressions:
        print expr.text
        desc = etree.SubElement(entry, 'desc')
        desc.attrib['catchword'] = 'true' if expr.is_catchword else 'false'
        desc.attrib['entrylink'] = expr.link.exact_link
        desc.attrib['source'] = expr.link.source.key
        desc.attrib['base'] = expr.base_text
        desc.attrib['categories'] = ';'.join(categories)
        try:
            desc.attrib['head_orth'] = expr.segments.get(is_head=True).orth
            desc.attrib['head_base'] = expr.segments.get(is_head=True).base
            desc.attrib['ctag'] = expr.segments.get(is_head=True).ctag
            desc.attrib['msd'] = expr.segments.get(is_head=True).msd
        except:
            desc.attrib['head_orth'] = ''
            desc.attrib['head_base'] = ''
            desc.attrib['ctag'] = ''
            desc.attrib['msd'] = ''
        desc.text = expr.orth_text