PhraseTypeXML.py 2.45 KB
#! /usr/bin/python
# -*- coding: utf-8 -*-

from xml.sax import handler
from PhraseTypeEntry import PhraseTypeEntry


class XMLNode:
    
    def __init__(self, name, attrs, parent):
        self._name = name
        self._attrs = attrs
        self._children = []
        self._parent = parent
        self._content = ""

    def addChild(self, child):
        self._children.append(child)

    def setContent(self, content):
        self._content = content

    def __str__(self):
        att = zip(self._attrs.keys(), self._attrs.values())
        return self._name + '[' + str(att) + '](' + ';'.join([str(temp) for temp in self._children]) + ')'
        
    
class PhraseTypeTeiHandler(handler.ContentHandler):

    def __init__(self, out):
        handler.ContentHandler.__init__(self)
        self._out = out
        self._subtree = None
        self._current = None
        self._constructing = False
        self._content = ""

    def printMeta(self, text):
        for out in self._out.values():
            out.write('% ' + text + '\n')
        
    def startElement(self, name, attrs):
        if name == 'date':
            self.printMeta(attrs['when'])
        if name == 'entry':
            self._constructing = True
            self._content = ""
        if (self._constructing):
            node = XMLNode(name, attrs, self._current)
            if self._current is not None:
                self._current.addChild(node)
            else:
                self._subtree = node
            self._current = node


    def endElement(self, name):
        if self._current is not None:
            self._current.setContent(self._content)
            self._current = self._current._parent
            if name == 'entry':
                if self._current is not None:
                    raise TEIStructureError()
                entry = PhraseTypeEntry(self._subtree)
                entry.write(self._out)
            self._content = ''
        else:
            if name == 'title':
                self.printMeta(self._content)
                self._content = ''
            elif name == 'publisher':
                self.printMeta(self._content)
                self._content = ''
            elif name == 'licence':
                self.printMeta(self._content)
                self.content = ''
            elif name == 'p':
                self._content += '\n% '

    def characters(self, content):
        self._content += content.strip()
        
    def endDocument(self):
        pass