WalentyXML.py 2.39 KB
#! /usr/bin/python
# -*- coding: utf-8 -*-

from xml.sax import handler
from importer.Entry import Entry


class XMLNode:
    
    def __init__(self, name, attrs, parent):
        self._name = name
        self._attrs = attrs
        self._children = []
        self._parent = parent
        self._content = ""

    def addChild(self, child):
        self._children.append(child)

    def setContent(self, content):
        self._content = content

    def __str__(self):
        att = zip(self._attrs.keys(), self._attrs.values())
        return self._name + '[' + str(att) + '](' + ';'.join([str(temp) for temp in self._children]) + ')'
        
    
class WalentyTeiHandler(handler.ContentHandler):

    def __init__(self, entry_meanings, meanings, frames):
        handler.ContentHandler.__init__(self)
        self._subtree = None
        self._current = None
        self._constructing = False
        self._content = ""
        self._entry_meanings = entry_meanings
        self._meanings = meanings
        self._frames = frames
        self._stored_positions = {}
        
    def startElement(self, name, attrs):
        if name == 'date':
            #self.printMeta(attrs['when'])
            pass
        if name == 'entry':
            self._constructing = True
            self._content = ""
        if (self._constructing):
            node = XMLNode(name, attrs, self._current)
            if self._current is not None:
                self._current.addChild(node)
            else:
                self._subtree = node
            self._current = node


    def endElement(self, name):
        if self._current is not None:
            self._current.setContent(self._content.strip())
            self._current = self._current._parent
            if name == 'entry':
                if self._current is not None:
                    raise TEIStructureError()
                entry = Entry(self._subtree, self._entry_meanings, self._meanings, self._frames)
                entry.store(self._meanings, self._stored_positions)
            self._content = ''
        else:
            if name == 'title':
                pass
            elif name == 'publisher':
                pass
            elif name == 'licence':
                pass
            elif name == 'p':
                self._content += '\n% '

    def characters(self, content):
        self._content += content
        
    def endDocument(self):
        pass