check_ispell.py
1.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand
from common.util import uniprint, uniopen
from dictionary.models import Lexeme
class Command(BaseCommand):
args = '<input file>'
help = ''
def handle(self, input_file, **options):
check_ispell(input_file)
def inc_count(d, key):
d[key] = 1 + d.get(key, 0)
def dict_repr(d):
return '|'.join('%s:%s' % (key, value) for key, value in d.iteritems())
def check_ispell(input_file):
results = {}
for line in uniopen(input_file):
entry, flags = line.strip().split('/', 1)
lexemes = Lexeme.objects.filter(entry=entry).exclude(status='cand')
if flags not in results:
results[flags] = {'pos': {}, 'ics': {}}
for l in lexemes:
ics = l.lip_data()['inflection_characteristics']
inc_count(results[flags]['pos'], l.part_of_speech.symbol)
inc_count(results[flags]['ics'], ics)
for flags, res in results.iteritems():
uniprint(
'%s=%s,%s' % (flags, dict_repr(res['pos']), dict_repr(res['ic'])))