mark_to_delete.py 2.78 KB
#-*- coding:utf-8 -*-

import random
import time

from django.core.management.base import BaseCommand

from verification.duckduckgo import DuckDuckGo
from webapp.models import Meaning, MeaningStatus

SOURCE = 'szarada'


class Command(BaseCommand):
    help = 'Mark expressions to delete.'

    def handle(self, *args, **options):
        mark_to_delete()


def mark_to_delete():
    duckduckgo = DuckDuckGo()
    meanings = Meaning.objects.filter(expressions__link__source__key=SOURCE).all()

    while meanings:
        meanings = check_meanings(duckduckgo, meanings)

    while meanings:
        meanings = check_expressions(duckduckgo, meanings)


def check_meanings(duckduckgo, meanings):
    check_again_meanings = []
    for meaning in meanings:
        # meaning.comment = ''
        # meaning.status = None
        # meaning.save()

        if meaning.status:
            continue

        for catchword in meaning.expressions.filter(is_catchword=True):
            for expression in meaning.expressions.exclude(is_catchword=True):
                time.sleep(random.uniform(0.5, 3.0))
                try:
                    if not duckduckgo.check_entry(catchword, expression):
                        print 'Erase:\t',  catchword.orth_text, u'\t-->\t', expression.orth_text
                        if not meaning.status:
                            meaning.comment = u'Do usunięcia:\t%s' % expression.orth_text
                            meaning.status = MeaningStatus.objects.get(key='delete')
                            meaning.save()
                    else:
                        print 'OK:\t',  catchword.orth_text, u'\t-->\t', expression.orth_text
                except RuntimeError:
                    print 'Try again:\t', catchword.orth_text, u'\t-->\t', expression.orth_text
                    check_again_meanings.append(meaning)
                    break
    return check_again_meanings


def check_expressions(duckduckgo, meanings):
    check_again_meanings = []
    for meaning in meanings:

        if meaning.status:
            continue

        for expression in meaning.expressions.all():
            time.sleep(random.uniform(0.5, 3.0))
            try:
                if not duckduckgo.check_expression(expression):
                    print 'Erase:\t', expression.orth_text
                    if not meaning.status:
                        meaning.comment = u'Do usunięcia:\t%s' % expression.orth_text
                        meaning.status = MeaningStatus.objects.get(key='delete')
                        meaning.save()
                else:
                    print 'OK:\t',  expression.orth_text
            except RuntimeError:
                print 'Try again:\t', expression.orth_text
                check_again_meanings.append(meaning)
                break
    return check_again_meanings