convert_tables.py 13.8 KB
# -*- coding: utf-8 -*-
from django.core.management import BaseCommand
from common.util import GroupDict
from dictionary.models import Cell, TableTemplate, LexicalClass, \
    PatternType, TableTemplate, TableCell, TableHeader, \
    TableHeader, LexemeAttribute, ExportCell, Variant, \
    InflectionCharacteristic, LexemeAttributeValue


class Command(BaseCommand):
    args = ''
    help = ''

    def handle(self, **options):
        raise Exception("stale code")
        #convert_tables()

nQ_ICS = ('dk', 'ndk', 'ndk/dk', 'dk/ndk', 'ndk/(dk)', 'dk/(ndk)')

Q_ICS = ('qndk', 'qdk', 'qndk/dk', 'qndk/(dk)')

ASPECT_POS = {
    'fin', 'imps', 'impt', 'inf', 'praet', 'ger', 'pact', 'ppas', 'verb'
}
ASPECT_TAG = 'ASPEKT'

GENDER_POS = {'subst'}
GENDER_TAG = 'RODZAJ'

PERSON_POS = {'ppron12'}
PERSON_TAG = 'OSOBA'

CASE_POS = {'prep'}
CASE_TAG = 'PRZYPADEK'

TABLE_TEMPLATES = [
    # formy bazowe
    (u'rzeczowniki', '0', ('subst', 'osc', 'skrs'), ('f', 'm', 'n', '0'), ()),
    (u'pron', '0', ('subst',), ('z0', "z0'", 'z1', 'z1p', 'z2'), ()), # rodzaj nieistotny
    (u'my/wy', '0', ('ppron',), ('a',), ()),
    (u'ja/ty/się', '0', ('ppron',), ('b', "b'"), ()),
    (u'on', '0', ('ppron',), ('c',), ()),
    (u'przymiotniki', '0', ('adj',), (), ((), (
        u'forma złoż.', u'forma poprz.'
    ))),
    (u'adjcom', '0', ('adjcom',), (), ()), # połączyć z adj
    (u'czasowniki', '0', ('v',), ('', '67', 'b'), ((
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    ), ())),
    (u'winien', '0', ('v',), ('p',), ((
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    ), ())),
    (u'niewłaściwe', '0', ('v',), ('', '67'), ((
        (u'właściwy', ('Q',), Q_ICS),
    ), ())),
    (u'ger', '0', ('ger',), (), ()), # docelowo razem z subst
    (u'imiesłowy', '0', ('pact', 'ppas', 'appas'), (), ()), # docelowo z adj
    (u'num a', '0', ('num',), ('a', 'a1', "a'"), ()),
    (u'num b', '0', ('num',), ('b',), ()),
    (u'num cd', '0', ('num',), ('c', 'd', "d'", 'd"'), ()),
    (u'nieodmienne', '0', (
        'adv', 'advcom', 'advndm', 'burk', 'prep', 'comp', 'conj',
        'interj', 'qub', 'pred'), (), ()),
    (u'prefiksy', '0', ('pref',), (), ()),

    # tabelki SGJP
    (u'rzeczowniki', '1', ('subst', 'osc', 'skrs'), ('f', 'm', 'n', '0'), ()),
    (u'pron', '1', ('subst',), ('z0', "z0'", 'z1', 'z1p', 'z2'), ()), # rodzaj nieistotny
    (u'my/wy', '1', ('ppron',), ('a',), ()),
    (u'ja/ty/się', '1', ('ppron',), ('b', "b'"), ()),
    (u'on', '1', ('ppron',), ('c',), ()),
    (u'przymiotniki', '1', ('adj',), (), ((), (
        u'forma złoż.', u'forma poprz.'
    ))),
    (u'adjcom', '1', ('adjcom',), (), ()), # połączyć z adj
    (u'czasowniki', '1', ('v',), ('', '67', 'b'), ((
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    ), (
        u'aspekt',
    ))),
    (u'winien', '1', ('v',), ('p',), ((
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    ), ())),
    (u'niewłaściwe', '1', ('v', 'pred'), ('', '67'), ((
        (u'właściwy', ('Q',), Q_ICS),
    ), (
        u'aspekt',
    ))),
    (u'ger', '1', ('ger',), (), ()), # docelowo razem z subst
    (u'imiesłowy', '1', ('pact', 'ppas', 'appas'), (), ()), # docelowo z adj
    (u'num a', '1', ('num',), ('a', 'a1', "a'"), ()),
    (u'num b', '1', ('num',), ('b',), ()),
    (u'num c', '1', ('num',), ('c',), ()), # można próbować połączyć z d
    (u'num d', '1', ('num',), ('d', "d'", 'd"'), ()),
    (u'nieodmienne', '1', (
        'adv', 'advcom', 'advndm', 'burk', 'prep', 'comp', 'conj',
        'interj', 'qub'), (), ()),
    (u'prefiksy', '1', ('pref',), (), ()),

    # Morfeusz
    (u'rzeczowniki', 'Morfeusz', ('subst', 'osc', 'skrs'), ('f', 'm', 'n', '0'), ()),
    (u'pron', 'Morfeusz', ('subst',), ('z0', "z0'", 'z1', 'z1p', 'z2'), ()), # rodzaj istotny!
    (u'my/wy', 'Morfeusz', ('ppron',), ('a',), ()),
    (u'ja/ty/się', 'Morfeusz', ('ppron',), ('b', "b'"), ()),
    (u'on', 'Morfeusz', ('ppron',), ('c',), ()),
    (u'przymiotniki', 'Morfeusz', ('adj',), (), ((), (
        u'forma złoż.', u'forma poprz.'
    ))),
    (u'przymiotniki wyższe', 'Morfeusz', ('adjcom',), (), ()),
    (u'czasowniki', 'Morfeusz', ('v',), ('', '67', 'b'), ((
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    ), (
        u'aspekt',
    ))),
    (u'winien', 'Morfeusz', ('v',), ('p',), ((
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    ), ())),
    (u'niewłaściwe', 'Morfeusz', ('v', 'pred'), ('', '67'), ((
        (u'właściwy', ('Q',), Q_ICS),
    ), (
        u'aspekt',
    ))),
    (u'gerundia', 'Morfeusz', ('ger',), (), ()),
    (u'imiesłowy czynne', 'Morfeusz', ('pact',), (), ()),
    (u'imiesłowy bierne', 'Morfeusz', ('ppas', 'appas'), (), ()),
    (u'num a', 'Morfeusz', ('num',), ('a', 'a1', "a'"), ()),
    (u'num b', 'Morfeusz', ('num',), ('b',), ()),
    (u'num cd', 'Morfeusz', ('num',), ('c', 'd', "d'", 'd"'), ()),
    (u'przysłówki', 'Morfeusz', ('adv',), (), ()),
    (u'przysłówki wyższe', 'Morfeusz', ('advcom',), (), ()),
    (u'przysłówki ndm', 'Morfeusz', ('advndm',), (), ()),
    (u'burkinostki', 'Morfeusz', ('burk',), (), ()),
    (u'przyimki', 'Morfeusz', ('prep',), (), ()),
    (u'prefiksy', 'Morfeusz', ('pref',), (), ()),
    (u'spójniki podrz.', 'Morfeusz', ('comp',), (), ()),
    (u'spójniki współ.', 'Morfeusz', ('conj',), (), ()),
    (u'wykrzykniki', 'Morfeusz', ('interj',), (), ()),
    (u'kubliki', 'Morfeusz', ('qub',), (), ()),

    # Morfologik
    (u'rzeczowniki', 'Morfologik',
        ('subst', 'osc', 'skrs'),
        ('f', 'm', 'n', '0'), ()),
    (u'pron', 'Morfologik', ('subst',), ('z0', "z0'", 'z1', 'z1p', 'z2'), ()), # jw.
    (u'my/wy', 'Morfologik', ('ppron',), ('a',), ()),
    (u'ja/ty/się', 'Morfologik', ('ppron',), ('b', "b'"), ()),
    (u'on', 'Morfologik', ('ppron',), ('c',), ()),
    (u'przymiotniki', 'Morfologik', ('adj',), (), ((), (
        u'forma złoż.', u'forma poprz.'
    ))),
    (u'przymiotniki wyższe', 'Morfologik', ('adjcom',), (), ()),
    (u'czasowniki', 'Morfologik', ('v',), ('', '67', 'b'), ((
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    ), (
        u'aspekt',
    ))),
    (u'winien', 'Morfologik', ('v',), ('p',), ((
        (u'właściwy', ('', '(Q)'), nQ_ICS),
    ), ())),
    (u'niewłaściwe', 'Morfologik', ('v', 'pred'), ('', '67'), ((
        (u'właściwy', ('Q',), Q_ICS),
    ), (
        u'aspekt',
    ))),
    (u'gerundia', 'Morfologik', ('ger',), (), ()),
    (u'imiesłowy czynne', 'Morfologik', ('pact',), (), ()),
    (u'imiesłowy bierne', 'Morfologik', ('ppas', 'appas'), (), ()),
    (u'num a', 'Morfologik', ('num',), ('a', 'a1', "a'"), ()),
    (u'num b', 'Morfologik', ('num',), ('b',), ()),
    (u'num cd', 'Morfologik', ('num',), ('c', 'd', "d'", 'd"'), ()),
    (u'przysłówki', 'Morfologik', ('adv',), (), ()),
    (u'przysłówki wyższe', 'Morfologik', ('advcom',), (), ()),
    (u'przysłówki ndm', 'Morfologik', ('advndm',), (), ()),
    (u'burkinostki', 'Morfologik', ('burk',), (), ()),
    (u'przyimki', 'Morfologik', ('prep',), (), ()),
    (u'prefiksy', 'Morfologik', ('pref',), (), ()),
    (u'spójniki podrz.', 'Morfologik', ('comp',), (), ()),
    (u'spójniki współ.', 'Morfologik', ('conj',), (), ()),
    (u'wykrzykniki', 'Morfologik', ('interj',), (), ()),
    (u'kubliki', 'Morfologik', ('qub',), (), ()),
]

ADEF_ICS = set(InflectionCharacteristic.objects.filter(
    symbol__in=('', '3+'), part_of_speech__symbol='adj'))

F_OSC = InflectionCharacteristic.objects.get(
    symbol='f', part_of_speech__symbol='osc')

def rectangles(templates):
    groups = {}
    for template in templates:
        if template.pattern_type not in groups:
            groups[template.pattern_type] = set()
        groups[template.pattern_type].add(
            template.inflection_characteristic)
        # QUICK & DIRTY
        if template.pattern_type.symbol == 'adef':
            groups[template.pattern_type] |= ADEF_ICS
        if template.inflection_characteristic.symbol == 'f':
            groups[template.pattern_type].add(F_OSC)
    reverse_groups = GroupDict()
    for pt, ic_set in groups.iteritems():
        reverse_groups.add(tuple(sorted(ic_set, key=lambda ic: ic.id)), pt)
    #print reverse_groups
    return reverse_groups


def add_restrictions(x, pts, ics, attr_vals):
    for pt_id in pts:
        x.pattern_types.add(pt_id) #add
    for ic_id in ics:
        x.inflection_characteristics.add(ic_id) #add
    ic_symbols = set(ic.symbol for ic in ics)
    pos = ics[0].part_of_speech.symbol
    for av in attr_vals:
        if pos != 'v' or av.value in ic_symbols:
            x.attribute_values.add(av) #add


def convert_table_cells(cell_group, new_cell, attr_vals):
    table_groups = GroupDict()
    for cell in cell_group:
        tc = cell.tablecell
        table_groups.add(
            (tc.row, tc.col, tc.rowspan, tc.colspan, cell.index),
            cell)
    for table_key, table_group in table_groups.iteritems():
        #print table_key
        templates = TableTemplate.objects.filter(cell__in=table_group)
        for ics, pts in rectangles(templates).iteritems():
            new_table_cell = TableCell(
                row=table_key[0], col=table_key[1],
                rowspan=table_key[2], colspan=table_key[3],
                index=table_key[4], **new_cell)
            new_table_cell.save()
            add_restrictions(new_table_cell, pts, ics, attr_vals)


def convert_export_cells(cell_group, new_cell, attr_vals):
    export_groups = GroupDict()
    for cell in cell_group:
        key = cell.tag
        ic = cell.table_template.inflection_characteristic
        pos = key.split(':', 1)[0]
        if pos in ASPECT_POS:
            key = key.replace('imperf.perf', ASPECT_TAG)
            key = key.replace('imperf', 'ASPEKT').replace('perf', ASPECT_TAG)
        elif pos in GENDER_POS and key.split(':')[-1] == ic.symbol:
            key = key[:key.rfind(':')] + ':' + GENDER_TAG
        elif pos in PERSON_POS:
            key = key.replace(ic.symbol, PERSON_TAG)
        elif pos in CASE_POS and ic.symbol:
            key = key.replace(ic.symbol, CASE_TAG)
        export_groups.add(key, cell)
    for tag_template, export_group in export_groups.iteritems():
        templates = TableTemplate.objects.filter(cell__in=export_group)
        for ics, pts in rectangles(templates).iteritems():
            new_export_cell = ExportCell(
                tag_template=tag_template, **new_cell)
            new_export_cell.save()
            add_restrictions(new_export_cell, pts, ics, attr_vals)


def convert_cells(cells, new_template, variant, attr_vals):
    cell_groups = GroupDict()
    for cell in cells:
        cell_groups.add(
            (cell.prefix, cell.base_form_label, cell.suffix), cell)
    for key, cell_group in cell_groups.iteritems():
        #print key[1]
        new_cell = {
            'table_template': new_template,
            'base_form_label': key[1],
            'prefix': key[0],
            'suffix': key[2]
        }
        if variant.type == 'table':
            convert_table_cells(cell_group, new_cell, attr_vals)
        else:
            convert_export_cells(cell_group, new_cell, attr_vals)


def convert_headers(headers, new_template, attr_vals):
    header_groups = GroupDict()
    for h in headers:
        header_groups.add(
            (h.row, h.col, h.rowspan, h.colspan, h.label, h.css_class),
            h)
    for key, header_group in header_groups.iteritems():
        templates = TableTemplate.objects.filter(tableheader__in=header_group)
        for ics, pts in rectangles(templates).iteritems():
            new_header = TableHeader(
                table_template=new_template,
                row=key[0], col=key[1], rowspan=key[2], colspan=key[3],
                label=key[4], css_class=key[5])
            new_header.save()
            add_restrictions(new_header, pts, ics, attr_vals)


def convert_tables():
    TableTemplate.objects.all().delete()
    for name, variant_id, poses, p_types, attrs in TABLE_TEMPLATES:
        variant = Variant.objects.get(id=variant_id)
        new_template = TableTemplate(name=name, variant=variant)
        new_template.save()
        for pos in poses:
            new_template.parts_of_speech.add(pos) #add
        lexical_classes = LexicalClass.objects.filter(
            partofspeech__symbol__in=poses)
        pattern_types = PatternType.objects.filter(
            lexical_class__in=lexical_classes)
        if p_types:
            pattern_types = pattern_types.filter(symbol__in=p_types)
        for pt in pattern_types:
            new_template.pattern_types.add(pt) #add
        if attrs:
            for name, values, ics in attrs[0]:
                attr = LexemeAttribute.objects.get(name=name)
                new_template.attributes.add(attr) #add
                for v in attr.values.filter(value__in=values):
                    new_template.attribute_values.add(v)
            cell_attrs = LexemeAttribute.objects.filter(name__in=attrs[1])
        else:
            cell_attrs = ()
        attr_vals = LexemeAttributeValue.objects.filter(
            attribute__in=cell_attrs)
        for attr in cell_attrs:
            new_template.cell_attributes.add(attr) #add
        tts = TableTemplate.objects.filter(
            inflection_characteristic__part_of_speech__symbol__in=poses,
            variant=variant)
        cells = Cell.objects.filter(table_template__in=tts)
        headers = TableHeader.objects.filter(table_template__in=tts)
        if p_types:
            cells = cells.filter(
                table_template__pattern_type__symbol__in=p_types)
            headers = headers.filter(
                table_template__pattern_type__symbol__in=p_types)
        if attrs:
            for name, values, ics in attrs[0]:
                cells = cells.filter(
                    table_template__inflection_characteristic__symbol__in=ics)
                headers = headers.filter(
                    table_template__inflection_characteristic__symbol__in=ics)
        convert_cells(cells, new_template, variant, attr_vals)
        if variant.type == 'table':
            convert_headers(headers, new_template, attr_vals)