converter.py 1.99 KB
'ODT to markdown conversion'

import subprocess
import os
from cleaner import Cleaner
from markdowntei import MarkdownTei


class Converter:
    'ODT to Markdown converter'

    def __init__(self, args):
        self.args = args

    def convert(self, path):
        'Convert file or folder'
        if os.path.isdir(path):
            for filename in os.listdir(path):
                self._convert_filename(os.path.join(path, filename))
        else:
            self._convert_filename(path)

    def _convert_filename(self, filename):
        'Convert a single file based on extension'
        if not os.path.isfile(filename):
            return
        ext = os.path.splitext(filename)[1]
        functions = {'.odt': self._convert_odt, '.md': self._convert_md}
        if ext in functions:
            functions[ext](filename)

    def _convert_odt(self, filename):
        'Convert a single file to Markdown'
        output = self._output_file(filename, 'md')
        subprocess.call(['pandoc', filename, '-o', output])
        if os.path.getsize(output) > 100:
            return self._convert_md(output)
        print(f'Warning: {filename} is empty or in wrong format.')
        return False

    def _convert_md(self, filename):
        'Correct Markdown file'
        cleaner = Cleaner(self.args.verbose)
        output = self._output_file(filename, 'md', 'clean')
        cleaner.clean(filename, output)
        self._save_tei(output)

    def _save_tei(self, filename):
        outdir = f'{self.args.destination}/tei'
        if not os.path.isdir(outdir):
            os.makedirs(outdir)
        markdown = MarkdownTei()
        markdown.convert(filename, outdir)

    def _output_file(self, path, extension, folder=''):
        'Create output file path'
        folder = folder or extension
        basefile = os.path.splitext(os.path.basename(path))[0]
        outdir = f'{self.args.destination}/{folder}'
        if not os.path.isdir(outdir):
            os.makedirs(outdir)
        return f'{outdir}/{basefile}.{extension}'