txt_plus.py 2.63 KB
import importlib
import json
import os

from writers import txt


def write(document):
    txt.write(document)
    if document.chunks.exists() and not document.image and not document.broken_source:
        _write_metadata(document, os.path.join(document.path, 'meta.json'))


def _write_metadata(document, meta_path):
    print('Writing %s metadata in JSON format.' % document.id)
    project_mappings = importlib.import_module('projects.%s.mappings' % document.pipeline.project.name)

    metadata = {'pipeline': document.pipeline.project.name,
                'content-type': 'text/plain',
                'language': document.lang,
                'publisher': document.publisher,
                'date': document.publication_date.strftime('%Y-%m-%d'),
                'title': document.title,
                'status': document.status,
                'in_effect': document.in_effect,
                'type': document.type,
                'keywords': [keyword.label for keyword in document.keywords.all()],
                'source_url': document.source_url,
                'meta_url': document.meta_url,
                'file_url': document.file_url}

    for meta in document.metadata.order_by('sequence'):
        meta_multivalue, meta_separator = _meta_multivalue(project_mappings.META_TYPES, meta.name)
        translated_meta_name = _en(project_mappings.META_TYPES, meta.name)

        if meta_multivalue:
            if translated_meta_name in metadata:
                if meta_separator:
                    metadata[translated_meta_name].extend(meta.value.split(meta_separator))
                else:
                    metadata[translated_meta_name].append(meta.value)
            else:
                if meta_separator:
                    metadata[translated_meta_name] = meta.value.split(meta_separator)
                else:
                    metadata[translated_meta_name] = [meta.value]
        else:
            if meta.value.isdigit():
                metadata[translated_meta_name] = int(meta.value)
            else:
                metadata[translated_meta_name] = meta.value

    with open(meta_path, 'w', encoding='utf-8') as f:
        json.dump(metadata, f, ensure_ascii=False)


def _meta_multivalue(meta_mapping, pl_name):
    for meta_type in meta_mapping:
        if meta_type['pl'] == pl_name:
            return meta_type['multivalue'], meta_type['separator']
    return False, None


def _en(translations, pl_name):
    for translation in translations:
        if translation['pl'] == pl_name:
            if translation['en'] is None:
                return pl_name
            else:
                return '%s' % translation['en']
    return pl_name