#-*- coding:utf-8 -*- # author: B.Niton import re import os import codecs from django.core.management.base import BaseCommand from dictionary.models import * from settings import PROJECT_PATH FREQ_300M_PATH = os.path.join(PROJECT_PATH, 'data', 'dis-verbs-300M-counts.txt') FREQ_1M_PATH = os.path.join(PROJECT_PATH, 'data', 'dis-verbs-1M-counts.txt') class Command(BaseCommand): help = 'Load lemma frequency informations to system.' def handle(self, **options): load_frequency() def load_frequency(): with codecs.open(FREQ_300M_PATH, 'rt', 'utf-8') as infile: for line in infile: line = line.strip() freq_pattern = re.compile(ur'^(\*\*)?[\s]*([^\s]+)[\s]*([\d]+).*$') m = freq_pattern.match(line) if m: lemma_str = m.group(2).strip() frequency = int(m.group(3).strip()) lemmas = Lemma.objects.filter(entry=lemma_str, entry_obj__pos__tag='verb').all() for lemma in lemmas: if lemma.frequency_300M == 0: print lemma lemma.frequency_300M = frequency lemma.save() with codecs.open(FREQ_1M_PATH, 'rt', 'utf-8') as infile: for line in infile: line = line.strip() freq_pattern = re.compile(ur'^(\*\*)?[\s]*([^\s]+)[\s]*([\d]+).*$') m = freq_pattern.match(line) if m: lemma_str = m.group(2).strip() frequency = int(m.group(3).strip()) lemmas = Lemma.objects.filter(entry=lemma_str, entry_obj__pos__tag='verb').all() for lemma in lemmas: if lemma.frequency_1M == 0: print lemma lemma.frequency_1M = frequency lemma.save()