converter.py
2.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
'ODT to markdown conversion'
import subprocess
import os
from cleaner import Cleaner
from markdowntei import MarkdownTei
class Converter:
'ODT to Markdown converter'
def __init__(self, args):
self.args = args
def convert(self, path):
'Convert file or folder'
if os.path.isdir(path):
for filename in os.listdir(path):
self._convert_filename(os.path.join(path, filename))
elif os.path.isfile(path):
self._convert_filename(path)
else:
print(f'Warning {path} does not exist')
def _convert_filename(self, filename):
'Convert a single file based on extension'
if not os.path.isfile(filename):
return
ext = os.path.splitext(filename)[1]
functions = {'.odt': self._convert_odt, '.md': self._convert_md}
if ext in functions:
functions[ext](filename)
def _convert_odt(self, filename):
'Convert a single file to Markdown'
output = self._output_file(filename, 'md')
subprocess.call(['pandoc', filename, '-o', output])
if os.path.getsize(output) > 100:
return self._convert_md(output)
print(f'Warning: {filename} is empty or in wrong format.')
return False
def _convert_md(self, filename):
'Correct Markdown file'
cleaner = Cleaner(self.args.verbose)
output = self._output_file(filename, 'md', 'clean')
cleaner.clean(filename, output)
self._save_tei(output)
def _save_tei(self, filename):
outdir = f'{self.args.destination}/tei'
if not os.path.isdir(outdir):
os.makedirs(outdir)
markdown = MarkdownTei()
markdown.convert(filename, outdir)
def _output_file(self, path, extension, folder=''):
'Create output file path'
folder = folder or extension
basefile = os.path.splitext(os.path.basename(path))[0]
outdir = f'{self.args.destination}/{folder}'
if not os.path.isdir(outdir):
os.makedirs(outdir)
return f'{outdir}/{basefile}.{extension}'