odt2tei.py 1.61 KB
"Convert scanned ODT to TEI"
import argparse
import logging
from math import log
from odt2tei.converter import Converter


def parse_options():
    parser = argparse.ArgumentParser(description="Convert ODT documents to PCC TEI")
    parser.add_argument(
        "-o",
        "--output",
        default="output",
        metavar="<folder>",
        help="folder to save data",
    )
    parser.add_argument(
        "-d",
        "--debug",
        default="",
        metavar="<folder>",
        help="category to debug",
        choices=[
            "",
            "lines",
            "odt",
            "spaceout",
            "whitespace",
            "punctuation",
            "merge",
            "illegible",
            "speakers",
            "comments",
            "split",
        ],
    )
    parser.add_argument("-f", "--force", action="store_true", help="save invalid files")
    parser.add_argument(
        "-v", "--verbose", action="store_true", help="print correct files"
    )
    parser.add_argument(
        "-s", "--stop-on-error", action="store_true", help="stop on error"
    )
    parser.add_argument("filename", nargs="+", help="folder or filename")
    args = parser.parse_args()
    if args.debug:
        loglevel = logging.DEBUG
    elif args.verbose:
        loglevel = logging.INFO
    else:
        loglevel = logging.WARNING
    logging.basicConfig(format="%(message)s", level=loglevel)
    return args


def main():
    "Main loop"
    args = parse_options()

    converter = Converter(args)
    for filename in args.filename:
        converter.convert(filename)


if __name__ == "__main__":
    main()