duckduckgo.py 4.84 KB
import codecs
import itertools
import os

from tempfile import mkdtemp, mkstemp
from subprocess import check_call

import settings


class DuckDuckGo:

    def __init__(self, min_hits=settings.DUCKDUCKGO_MIN_HITS):
        self.min_hits = min_hits

    def check_expression(self, expression, new_segments=[]):
        tmp_folder = mkdtemp()
        tmp_response_file, tmp_response_filename = mkstemp(dir=tmp_folder)
        tmp_error_file, tmp_error_filename = mkstemp(dir=tmp_folder)

        expression_query = self.__get_expression_query(expression, new_segments)

        check_call(['ddgr',
                    '-n', str(self.min_hits),
                    '--json',
                    expression_query], stdout=tmp_response_file, stderr=tmp_error_file)

        os.close(tmp_response_file)
        response_reader = codecs.open(tmp_response_filename, 'rt', encoding='utf-8')

        os.close(tmp_error_file)
        error_reader = codecs.open(tmp_error_filename, 'rt', encoding='utf-8')
        for line in error_reader:
            raise RuntimeError('Error: 403')

        linecount = 0
        for line in response_reader:
            linecount += 1

        if linecount > 5*self.min_hits:
            return True

        return False

    def expression_responses_count(self, expression, new_segments=[]):
        tmp_folder = mkdtemp()
        tmp_response_file, tmp_response_filename = mkstemp(dir=tmp_folder)
        tmp_error_file, tmp_error_filename = mkstemp(dir=tmp_folder)

        expression_query = self.__get_expression_query(expression, new_segments)

        check_call(['ddgr',
                    '-n', str(self.min_hits),
                    '--json',
                    expression_query], stdout=tmp_response_file, stderr=tmp_error_file)

        os.close(tmp_response_file)
        response_reader = codecs.open(tmp_response_filename, 'rt', encoding='utf-8')

        os.close(tmp_error_file)
        error_reader = codecs.open(tmp_error_filename, 'rt', encoding='utf-8')
        for line in error_reader:
            raise RuntimeError('Error: 403')

        linecount = 0
        for line in response_reader:
            linecount += 1

        return int(linecount/5)

    def check_entry(self, catchword, definition):
        tmp_folder = mkdtemp()
        tmp_response_file, tmp_response_filename = mkstemp(dir=tmp_folder)
        tmp_error_file, tmp_error_filename = mkstemp(dir=tmp_folder)

        catchword_query = self.__get_expression_query(catchword)
        definition_query = self.__get_expression_query(definition)

        check_call(['ddgr',
                    '-n', str(self.min_hits),
                    '--json',
                    catchword_query, definition_query], stdout=tmp_response_file, stderr=tmp_error_file)

        os.close(tmp_response_file)
        response_reader = codecs.open(tmp_response_filename, 'rt', encoding='utf-8')

        os.close(tmp_error_file)
        error_reader = codecs.open(tmp_error_filename, 'rt', encoding='utf-8')
        for line in error_reader:
            raise RuntimeError('Error: 403')

        linecount = 0
        for line in response_reader:
            linecount += 1

        if linecount > 5*self.min_hits:
            return True

        return False

    def entry_responses_count(self, catchword, definition):
        tmp_folder = mkdtemp()
        tmp_response_file, tmp_response_filename = mkstemp(dir=tmp_folder)
        tmp_error_file, tmp_error_filename = mkstemp(dir=tmp_folder)

        catchword_query = self.__get_expression_query(catchword)
        definition_query = self.__get_expression_query(definition)

        check_call(['ddgr',
                    '-n', str(self.min_hits),
                    '--json',
                    catchword_query, definition_query], stdout=tmp_response_file, stderr=tmp_error_file)

        os.close(tmp_response_file)
        response_reader = codecs.open(tmp_response_filename, 'rt', encoding='utf-8')

        os.close(tmp_error_file)
        error_reader = codecs.open(tmp_error_filename, 'rt', encoding='utf-8')
        for line in error_reader:
            raise RuntimeError('Error: 403')

        linecount = 0
        for line in response_reader:
            linecount += 1

        return int(linecount/5)

    def __get_expression_query(self, expression, new_segments=[]):
        expr = ''
        expr_segments = expression.segments.order_by('position_in_expr')

        if not new_segments:
            new_segments = [seg.orth for seg in expr_segments]

        for expr_seg, new_seg in itertools.izip(expr_segments, new_segments):

            if new_seg is None:
                continue

            orth = new_seg
            if expr_seg.ctag == 'interp' and expr_seg.orth != '"':
                orth = u'\\%s' % orth

            if expr_seg.has_nps:
                expr += orth
            else:
                expr += ' %s' % orth

        return u'\"' + expr.lstrip() + u'\"'