#-*- coding:utf-8 -*- # author: B.Niton import codecs from django.db.models import Q import operator from django.core.management.base import BaseCommand from optparse import make_option from dictionary.models import * DEFAULT_SAVE_PATH = 'tmp/frames_freq.txt' class Command(BaseCommand): help = 'Gets frames frequency list.' option_list = BaseCommand.option_list + ( make_option('--status', action='store', default='checked', help='Minimal lemma status. Values: all, progress, done, checked.'), make_option('--filepath', action='store', default=None, help='Path to file with verbs list.'), make_option('--dicts', action='store', default='all', help='List of dicts to select verbs from.'), make_option('--out', action='store', default=DEFAULT_SAVE_PATH, help='Path to output file.'), ) def handle(self, *args, **options): print options['status'] min_status = None if options['status'] == 'all': min_status = u'do obróbki' elif options['status'] == 'progress': min_status = u'w obróbce' elif options['status'] == 'done': min_status = u'gotowe' elif options['status'] == 'checked': min_status = u'sprawdzone' else: print 'Select proper status name.' lemma_statuses = Lemma_Status.objects.all() sel_min_status = Lemma_Status.objects.get(status=min_status) sel_statuses = [] for lemma_status in lemma_statuses: if lemma_status.priority >= sel_min_status.priority: sel_statuses.append(lemma_status) q_sel_statuses = [] for status in sel_statuses: q_sel_statuses.append(Q(status=status)) lemmas = Lemma.objects.filter(old=False).filter(reduce(operator.or_, q_sel_statuses)) if options['filepath']: get_frames_freq_file(lemmas, options['filepath'], options['out']) elif options['dicts']: dicts_list = options['dicts'].split() get_frames_freq_dicts(lemmas, dicts_list, options['out']) else: print 'No verbs input selected.' def get_frames_freq_file(lemmas, verbs_path, out): """Gets frames frequency list for verbs in selected file.""" with codecs.open(verbs_path, 'rt', 'utf8') as infile: lemmas_ls = [] for line in infile: pos_lemma = line.strip() try: lemma = lemmas.get(entry=pos_lemma) lemmas_ls.append(lemma) except Lemma.DoesNotExist: pass write_frame_freq(lemmas_ls, out) def get_frames_freq_dicts(lemmas, dicts_list, out): """Gets frames frequency list for verbs in selected dicts.""" if 'all' in dicts_list: vocabs = Vocabulary.objects.all() for vocab in vocabs: dicts_list.append(vocab.name) q_sel_dicts = [] for vocab in dicts_list: q_sel_dicts.append(Q(vocabulary__name=vocab)) lemmas = lemmas.filter(reduce(operator.or_, q_sel_dicts)) write_frame_freq(lemmas.all(), out) def write_frame_freq(lemmas, out): """Writes frames frequency list for given lemmas to given file.""" try: outfile = codecs.open(out, 'wt', 'utf-8') frames_freq_ls = [] for lemma in lemmas: for frame in lemma.frames.all(): text_rep_frg = frame.text_rep.split(":") text_rep = text_rep_frg[0] + ':' + text_rep_frg[2] try: index = map(operator.itemgetter('text_rep'), frames_freq_ls).index(text_rep) frames_freq_ls[index]['freq'] += 1 except ValueError: frames_freq_ls.append({'text_rep': text_rep, 'freq': 1}) print frames_freq_ls frames_freq_ls.sort(key=lambda x:x['freq'], reverse=True) print frames_freq_ls for frame in frames_freq_ls: print frame outfile.write(str(frame['freq']) + ' ' + frame['text_rep'].strip().replace('+', ' + '). replace(':',': ').replace(';', '; ') + '\n') finally: outfile.close()