odt2tei.py 1.05 KB
"Convert scanned ODT to TEI"
import argparse
from odt2tei.converter import Converter


def parse_options():
    parser = argparse.ArgumentParser(description="Convert ODT documents to PCC TEI")
    parser.add_argument("filename", nargs="+", help="folder or filename")
    parser.add_argument(
        "-d",
        "--destination",
        default="output",
        metavar="<folder>",
        help="folder to save data",
    )
    parser.add_argument(
        "-v",
        "--verbose",
        default="",
        metavar="<folder>",
        help="category to debug",
        choices=[
            "",
            "lines",
            "odt",
            "spaceout",
            "whitespace",
            "punctuation",
            "merge",
            "speakers",
            "comments",
            "split",
        ],
    )
    args = parser.parse_args()
    return args


def main():
    "Main loop"
    args = parse_options()

    converter = Converter(args)
    for filename in args.filename:
        converter.convert(filename)


if __name__ == "__main__":
    main()