check_ispell.py
1.03 KB
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand
from dictionary.models import Lexeme
class Command(BaseCommand):
args = '<input file>'
help = ''
def handle(self, input_file, **options):
check_ispell(input_file)
def inc_count(d, key):
d[key] = 1 + d.get(key, 0)
def dict_repr(d):
return '|'.join('%s:%s' % (key, value) for key, value in d.iteritems())
def check_ispell(input_file):
results = {}
for line in open(input_file):
line = line.decode('utf-8').strip()
entry, flags = line.split('/', 1)
lexemes = Lexeme.objects.filter(deleted=False, entry=entry).exclude(
status='cand')
if flags not in results:
results[flags] = {'pos': {}, 'ics': {}}
for l in lexemes:
ics = l.lip_data()['inflection_characteristics']
inc_count(results[flags]['pos'], l.part_of_speech.symbol)
inc_count(results[flags]['ics'], ics)
for flags, res in results.iteritems():
print ('%s=%s,%s' % ((flags,) + tuple(
dict_repr(res[x]) for x in ('pos', 'ics')))).encode('utf-8')