check_ispell.py 1.03 KB
# -*- coding: utf-8 -*-

from django.core.management.base import BaseCommand
from dictionary.models import Lexeme

class Command(BaseCommand):
  args = '<input file>'
  help = ''

  def handle(self, input_file, **options):
    check_ispell(input_file)

def inc_count(d, key):
  d[key] = 1 + d.get(key, 0)

def dict_repr(d):
  return '|'.join('%s:%s' % (key, value) for key, value in d.iteritems())

def check_ispell(input_file):
  results = {}
  for line in open(input_file):
    line = line.decode('utf-8').strip()
    entry, flags = line.split('/', 1)
    lexemes = Lexeme.objects.filter(deleted=False, entry=entry).exclude(
      status='cand')
    if flags not in results:
      results[flags] = {'pos': {}, 'ics': {}}
    for l in lexemes:
      ics = l.lip_data()['inflection_characteristics']
      inc_count(results[flags]['pos'], l.part_of_speech.symbol)
      inc_count(results[flags]['ics'], ics)
  for flags, res in results.iteritems():
    print ('%s=%s,%s' % ((flags,) + tuple(
      dict_repr(res[x]) for x in ('pos', 'ics')))).encode('utf-8')