prep_freq_list.py 2.64 KB
#-*- coding:utf-8 -*-

#Copyright (c) 2012, Bartłomiej Nitoń
#All rights reserved.

#Redistribution and use in source and binary forms, with or without modification, are permitted provided 
#that the following conditions are met:

#    Redistributions of source code must retain the above copyright notice, this list of conditions and 
#    the following disclaimer.
#    Redistributions in binary form must reproduce the above copyright notice, this list of conditions 
#    and the following disclaimer in the documentation and/or other materials provided with the distribution.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED 
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
# POSSIBILITY OF SUCH DAMAGE.

"""Script for getting prepositions usage frequency list."""

from django.core.management.base import BaseCommand

from dictionary.models import *

class Command(BaseCommand):
    'SloVal command for getting preposition frequency list.'
    help = 'Get prepositions frequency use list.'

    def handle(self, **options):
        prep_freq_ls()  

def prep_freq_ls():
    'Get prepositions frequency list.'
    print 'Be patient, it can take a while.'
    prep_ls = []
    for prep in Atribute_Model.objects.get(atr_model_name='PRZYIMEK').atribute_values.all():
        prep_ls.append({'prep' : prep.value,
                        'count': 0})
    lemmas = Lemma.objects.filter(old=False, status__status=u'sprawdzone')
    for lemma in lemmas.all():
        for frame in lemma.frames.all():
            for position in frame.positions.all():
                for arg in position.arguments.all():
                    for attr in arg.atributes.filter(Q(type='PRZYIMEK') | Q(type='PRZYIMEK1')):
                        prep = (item for item in prep_ls if item['prep'] == attr.atribute_value.value).next()
                        prep['count'] += 1
    prep_ls.sort(key=lambda x:x['count'])
    prep_ls.reverse()     
    for prep in prep_ls:
        print smart_str(prep['prep']) + ':  ' + str(prep['count'])