check_ispell.py 1.1 KB
# -*- coding: utf-8 -*-

from django.core.management.base import BaseCommand
from dictionary.models import Lexeme


class Command(BaseCommand):
    args = '<input file>'
    help = ''

    def handle(self, input_file, **options):
        check_ispell(input_file)


def inc_count(d, key):
    d[key] = 1 + d.get(key, 0)


def dict_repr(d):
    return '|'.join('%s:%s' % (key, value) for key, value in d.iteritems())


def check_ispell(input_file):
    results = {}
    for line in open(input_file):
        line = line.decode('utf-8').strip()
        entry, flags = line.split('/', 1)
        lexemes = Lexeme.objects.filter(entry=entry).exclude(status='cand')
        if flags not in results:
            results[flags] = {'pos': {}, 'ics': {}}
        for l in lexemes:
            ics = l.lip_data()['inflection_characteristics']
            inc_count(results[flags]['pos'], l.part_of_speech.symbol)
            inc_count(results[flags]['ics'], ics)
    for flags, res in results.iteritems():
        print ('%s=%s,%s' %
               ((flags, dict_repr(res['pos']),
                 dict_repr(res['ic'])))).encode('utf-8')