convert_tables.py 11.3 KB
# -*- coding: utf-8 -*-
from django.core.management import BaseCommand
from common.util import GroupDict
from dictionary.models import Cell, NewTableTemplate, LexicalClass, PatternType, NewCell, TableTemplate, NewTableCell, TableHeader, NewTableHeader, LexemeAttribute, NewExportCell


class Command(BaseCommand):
    args = ''
    help = ''

    def handle(self, **options):
        convert_tables()

nQ_ICS = ('dk', 'ndk', 'ndk/dk', 'dk/ndk', 'ndk/(dk)', 'dk/(ndk)')

Q_ICS = ('qndk', 'qdk', 'qndk/dk')

ASPECT_POS = {'fin', 'imps', 'impt', 'inf', 'praet', 'ger', 'pact', 'ppas'}
ASPECT_TAG = 'ASPEKT'

GENDER_POS = {'subst'}
GENDER_TAG = 'RODZAJ'

PERSON_POS = {'ppron12'}
PERSON_TAG = 'OSOBA'

CASE_POS = {'prep'}
CASE_TAG = 'PRZYPADEK'

TABLE_VARIANTS = ('0', '1')

TABLE_TEMPLATES = [
    # formy bazowe
    (u'rzeczowniki', '0', ('subst', 'osc', 'skrs'), ('f', 'm', 'n', '0'), ()),
    (u'pron', '0', ('subst',), ('z0', "z0'", 'z1', 'z1p', 'z2'), ()),
    (u'my/wy', '0', ('ppron',), ('a',), ()),
    (u'ja/ty/się', '0', ('ppron',), ('b', "b'"), ()),
    (u'on', '0', ('ppron',), ('c',), ()),
    (u'przymiotniki', '0', ('adj',), (), ()), # formy opcjonalne...
    (u'adjcom', '0', ('adjcom',), (), ()), # połączyć z adj
    (u'czasowniki', '0', ('v',), ('', '67', 'b'), (
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    )),
    (u'winien', '0', ('v',), ('p',), (
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    )),
    (u'niewłaściwe', '0', ('v', 'pred'), (), (
        (u'właściwy', ('Q',), Q_ICS),
    )),
    (u'ger', '0', ('ger',), (), ()), # docelowo razem z subst
    (u'imiesłowy', '0', ('pact', 'ppas', 'appas'), (), ()), # docelowo z adj
    (u'num a', '0', ('num',), ('a', 'a1', "a'"), ()),
    (u'num b', '0', ('num',), ('b',), ()),
    (u'num d', '0', ('num',), ('c', 'd', "d'", 'd"'), ()),
    (u'nieodmienne', '0', (
        'adv', 'advcom', 'advndm', 'burk', 'prep', 'pref', 'comp', 'conj',
        'interj', 'qub'), (), ()),

    # tabelki SGJP
    (u'rzeczowniki', '1', ('subst', 'osc', 'skrs'), ('f', 'm', 'n', '0'), ()),
    (u'pron', '1', ('subst',), ('z0', "z0'", 'z1', 'z1p', 'z2'), ()),
    (u'my/wy', '1', ('ppron',), ('a',), ()),
    (u'ja/ty/się', '1', ('ppron',), ('b', "b'"), ()),
    (u'on', '1', ('ppron',), ('c',), ()),
    (u'przymiotniki', '1', ('adj',), (), ()), # formy opcjonalne...
    (u'adjcom', '1', ('adjcom',), (), ()), # połączyć z adj
    (u'czasowniki', '1', ('v',), ('', '67', 'b'), (
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    )),
    (u'winien', '1', ('v',), ('p',), (
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    )),
    (u'niewłaściwe', '1', ('v', 'pred'), (), (
        (u'właściwy', ('Q',), Q_ICS),
    )),
    (u'ger', '1', ('ger',), (), ()), # docelowo razem z subst
    (u'imiesłowy', '1', ('pact', 'ppas', 'appas'), (), ()), # docelowo z adj
    (u'num a', '1', ('num',), ('a', 'a1', "a'"), ()),
    (u'num b', '1', ('num',), ('b',), ()),
    (u'num c', '1', ('num',), ('c',), ()), # można próbować połączyć z d
    (u'num d', '1', ('num',), ('d', "d'", 'd"'), ()),
    (u'nieodmienne', '1', (
        'adv', 'advcom', 'advndm', 'burk', 'prep', 'pref', 'comp', 'conj',
        'interj', 'qub'), (), ()),

    # Morfeusz
    (u'rzeczowniki', 'Morfeusz', ('subst', 'osc', 'skrs'), ('f', 'm', 'n', '0'), ()),
    (u'pron', 'Morfeusz', ('subst',), ('z0', "z0'", 'z1', 'z1p', 'z2'), ()),
    (u'my/wy', 'Morfeusz', ('ppron',), ('a',), ()),
    (u'ja/ty/się', 'Morfeusz', ('ppron',), ('b', "b'"), ()),
    (u'on', 'Morfeusz', ('ppron',), ('c',), ()),
    (u'przymiotniki', 'Morfeusz', ('adj',), (), ()),
    (u'adjcom', 'Morfeusz', ('adjcom',), (), ()),
    (u'czasowniki', 'Morfeusz', ('v',), ('', '67', 'b'), (
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    )),
    (u'winien', 'Morfeusz', ('v',), ('p',), (
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    )),
    (u'niewłaściwe', 'Morfeusz', ('v', 'pred'), (), (
        (u'właściwy', ('Q',), Q_ICS),
    )),
    (u'gerundia', 'Morfeusz', ('ger',), (), ()),
    (u'imiesłowy czynne', 'Morfeusz', ('pact',), (), ()),
    (u'imiesłowy bierne', 'Morfeusz', ('ppas', 'appas'), (), ()),
    (u'num a', 'Morfeusz', ('num',), ('a', 'a1', "a'"), ()),
    (u'num b', 'Morfeusz', ('num',), ('b',), ()),
    (u'num cd', 'Morfeusz', ('num',), ('c', 'd', "d'", 'd"'), ()),
    (u'nieodmienne', 'Morfeusz', (
        'adv', 'advcom', 'advndm', 'burk', 'prep', 'pref', 'comp', 'conj',
        'interj', 'qub'), (), ()),

    # Morfologik
    (u'rzeczowniki', 'Morfologik', ('subst', 'osc', 'skrs'), ('f', 'm', 'n', '0'), ()),
    (u'pron', 'Morfologik', ('subst',), ('z0', "z0'", 'z1', 'z1p', 'z2'), ()),
    (u'my/wy', 'Morfologik', ('ppron',), ('a',), ()),
    (u'ja/ty/się', 'Morfologik', ('ppron',), ('b', "b'"), ()),
    (u'on', 'Morfologik', ('ppron',), ('c',), ()),
    (u'przymiotniki', 'Morfologik', ('adj',), (), ()),
    (u'adjcom', 'Morfologik', ('adjcom',), (), ()),
    (u'czasowniki', 'Morfologik', ('v',), ('', '67', 'b'), (
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    )),
    (u'winien', 'Morfologik', ('v',), ('p',), (
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    )),
    (u'niewłaściwe', 'Morfologik', ('v', 'pred'), (), (
        (u'właściwy', ('Q',), Q_ICS),
    )),
    (u'gerundia', 'Morfologik', ('ger',), (), ()),
    (u'imiesłowy czynne', 'Morfologik', ('pact',), (), ()),
    (u'imiesłowy bierne', 'Morfologik', ('ppas', 'appas'), (), ()),
    (u'num a', 'Morfologik', ('num',), ('a', 'a1', "a'"), ()),
    (u'num b', 'Morfologik', ('num',), ('b',), ()),
    (u'num cd', 'Morfologik', ('num',), ('c', 'd', "d'", 'd"'), ()),
    (u'nieodmienne', 'Morfologik', (
        'adv', 'advcom', 'advndm', 'burk', 'prep', 'pref', 'comp', 'conj',
        'interj', 'qub'), (), ()),
]


def rectangles(templates):
    groups = {}
    for template in templates:
        if template.pattern_type not in groups:
            groups[template.pattern_type] = set()
        groups[template.pattern_type].add(
            template.inflection_characteristic)
    reverse_groups = GroupDict()
    for pt, ic_set in groups.iteritems():
        reverse_groups.add(tuple(sorted(ic_set, key=lambda ic: ic.id)), pt)
    #print reverse_groups
    return reverse_groups


def convert_table_cells(cell_group, new_cell):
    table_groups = GroupDict()
    for cell in cell_group:
        tc = cell.tablecell
        table_groups.add(
            (tc.row, tc.col, tc.rowspan, tc.colspan, cell.index),
            cell)
    for table_key, table_group in table_groups.iteritems():
        #print table_key
        templates = TableTemplate.objects.filter(cell__in=table_group)
        for ics, pts in rectangles(templates).iteritems():
            new_table_cell = NewTableCell(
                cell=new_cell,
                row=table_key[0], col=table_key[1],
                rowspan=table_key[2], colspan=table_key[3],
                index=table_key[4])
            new_table_cell.save()
            for ic in ics:
                new_table_cell.inflection_characteristics.add(ic) #add
            for pt in pts:
                new_table_cell.pattern_types.add(pt) #add


def convert_export_cells(cell_group, new_cell):
    export_groups = GroupDict()
    for cell in cell_group:
        key = cell.tag
        ic = cell.table_template.inflection_characteristic
        pos = key.split(':', 1)[0]
        if pos in ASPECT_POS:
            key = key.replace('imperf.perf', ASPECT_TAG)
            key = key.replace('imperf', 'ASPEKT').replace('perf', ASPECT_TAG)
        elif pos in GENDER_POS and key.split(':')[-1] == ic.symbol:
            key = key[:key.rfind(':')] + ':' + GENDER_TAG
        elif pos in PERSON_POS:
            key = key.replace(ic.symbol, PERSON_TAG)
        elif pos in CASE_POS and ic.symbol:
            key = key.replace(ic.symbol, CASE_TAG)
        export_groups.add(key, cell)
    for tag_template, export_group in export_groups.iteritems():
        templates = TableTemplate.objects.filter(cell__in=export_group)
        for ics, pts in rectangles(templates).iteritems():
            new_export_cell = NewExportCell(
                cell=new_cell, tag_template=tag_template)
            new_export_cell.save()
            for ic in ics:
                new_export_cell.inflection_characteristics.add(ic) #add
            for pt in pts:
                new_export_cell.pattern_types.add(pt) #add


def convert_cells(cells, new_template, variant):
    cell_groups = GroupDict()
    for cell in cells:
        cell_groups.add(
            (cell.prefix, cell.base_form_label, cell.suffix), cell)
    for key, cell_group in cell_groups.iteritems():
        #print key[1]
        new_cell = NewCell(
            table_template=new_template, base_form_label=key[1],
            prefix=key[0], suffix=key[2])
        new_cell.save()
        if variant in TABLE_VARIANTS:
            convert_table_cells(cell_group, new_cell)
        else:
            convert_export_cells(cell_group, new_cell)


def convert_headers(headers, new_template):
    header_groups = GroupDict()
    for h in headers:
        header_groups.add(
            (h.row, h.col, h.rowspan, h.colspan, h.label, h.css_class),
            h)
    for key, header_group in header_groups.iteritems():
        templates = TableTemplate.objects.filter(tableheader__in=header_group)
        for ics, pts in rectangles(templates).iteritems():
            new_header = NewTableHeader(
                table_template=new_template,
                row=key[0], col=key[1], rowspan=key[2], colspan=key[3],
                label=key[4], css_class=key[5])
            new_header.save()
            for ic in ics:
                new_header.inflection_characteristics.add(ic)
            for pt in pts:
                new_header.pattern_types.add(pt)


def convert_tables():
    NewTableTemplate.objects.all().delete()
    for name, variant, poses, p_types, attrs in TABLE_TEMPLATES:
        new_template = NewTableTemplate(name=name, variant_id=variant)
        new_template.save()
        for pos in poses:
            new_template.parts_of_speech.add(pos) #add
        lexical_classes = LexicalClass.objects.filter(
            partofspeech__symbol__in=poses)
        pattern_types = PatternType.objects.filter(
            lexical_class__in=lexical_classes)
        if p_types:
            pattern_types = pattern_types.filter(symbol__in=p_types)
        for pt in pattern_types:
            new_template.pattern_types.add(pt) #add
        for name, values, ics in attrs:
            attr = LexemeAttribute.objects.get(name=name)
            new_template.attributes.add(attr) #add
            for v in attr.values.filter(value__in=values):
                new_template.attribute_values.add(v)
        tts = TableTemplate.objects.filter(
            inflection_characteristic__part_of_speech__symbol__in=poses,
            variant__id=variant)
        cells = Cell.objects.filter(table_template__in=tts)
        if p_types:
            cells = cells.filter(
                table_template__pattern_type__symbol__in=p_types)
        for name, values, ics in attrs:
            cells = cells.filter(
                table_template__inflection_characteristic__symbol__in=ics)
        convert_cells(cells, new_template, variant)
        if variant in TABLE_VARIANTS:
            headers = TableHeader.objects.filter(table_template__in=tts)
            if p_types:
                headers = headers.filter(
                    table_template__pattern_type__symbol__in=p_types)
            convert_headers(headers, new_template)