views.py 16 KB
import datetime
import operator

from collections import defaultdict
from functools import reduce
from itertools import chain

import simplejson

from django.db.models import Q
from django.http import JsonResponse, QueryDict
from django.shortcuts import render
from django.template.context_processors import csrf

from crispy_forms.utils import render_crispy_form

from connections.models import Entry, Subentry, ArgumentConnection
from syntax.models import NaturalLanguageDescription, Schema
from semantics.models import Frame

from common.decorators import ajax_required

from .forms import (
    #MainEntryForm,
    EntryForm,
    SchemaFormFactory,
    FrameFormFactory,
    PositionFormFactory,
    PhraseAttributesFormFactory,
    LexFormFactory,
    LemmaFormFactory,
    ArgumentFormFactory,
    PredefinedPreferenceFormFactory,
    RelationalPreferenceFormFactory,
    SynsetPreferenceFormFactory,
)

from .polish_strings import SCHEMA_OPINION, FRAME_OPINION, EXAMPLE_SOURCE, EXAMPLE_OPINION

from .phrase_descriptions.descriptions import position_prop_description, phrase_description

def test(request):
    return render(request, 'test.html', {})

def entries(request):
    return render(request, 'entries.html', { 'entries_form' : EntryForm() })

FORM_TYPES = {
    #'entry-main' : MainEntryForm,
    'entry'      : EntryForm,
    
}

FORM_FACTORY_TYPES = {
    'schema'     : SchemaFormFactory,
    'position'   : PositionFormFactory,
    'phrase_lex' : LexFormFactory,
    'lemma'      : LemmaFormFactory,
    'frame'      : FrameFormFactory,
    'argument'   : ArgumentFormFactory,
    'predefined' : PredefinedPreferenceFormFactory,
    'relational' : RelationalPreferenceFormFactory,
    'synset'     : SynsetPreferenceFormFactory,
}


def make_form(form_type, data=None, unique_number=None):
    print('======== MAKE FORM', form_type)
    if form_type in FORM_FACTORY_TYPES:
        return FORM_FACTORY_TYPES[form_type].get_form(data=data, unique_number=unique_number)
    if form_type in FORM_TYPES:
        return FORM_TYPES[form_type](data=data)
    elif form_type.startswith('phrase_'):
        phrase_type = form_type[7:]
        print('================ MAKE PHRASE FORM', phrase_type, unique_number)
        return PhraseAttributesFormFactory.get_form(phrase_type, data=data, unique_number=unique_number)
    return None


@ajax_required
def get_subform(request):
    if request.method == 'GET':
        ctx = {}
        ctx.update(csrf(request))
        form_type = request.GET['subform_type']
        form = make_form(form_type)
        try:
            form_html = render_crispy_form(form, context=ctx)
        except:
            print('******************', form_type)
            raise
        return JsonResponse({'form_html' : form_html})
    
#TODO clean this code bordello up

def filter_objects(objects, queries, tab=''):
    #print(tab + '===================================================================')
    for query in queries:
        #print(tab + '***', query)
        objects = objects.filter(query).distinct()
        #print(tab + '---------------------------------------------------------------')
        #print(tab, objects)
        #print('\n')
    #print(tab + '===================================================================')
    return objects.distinct()


def collect_forms(forms_json, tab='   '):
    data = simplejson.loads(forms_json)
    form_type = data['formtype']
    form_number = data.get('formnumber', 0)
    if form_type == 'or':
        return 'or'
    else:
        print(tab, 'FORM:', data['form'])
        print(tab, 'TYPE:', form_type, 'NUMBER:', form_number)
        #print(tab, 'DATA:', data)
        query_params = QueryDict(data['form'])
        print(tab, 'PARAMS:', query_params)
        form = make_form(form_type, data=query_params, unique_number=form_number)
        print(tab, 'FORM TYPE:', type(form))
        if not form.is_valid():
            print(form.errors)
            # TODO return validation errors
            1/0
        print(tab, '{} CHILDREN GROUP(S)'.format(len(data['children'])))
        # a form may have one or more children forms, organised into and-or
        # (e.g. an entry form has child schema forms, frame forms etc.)
        subform_groups = []
        for subforms_json in data['children']:
            subform_group = simplejson.loads(subforms_json)
            subform_type, subforms = subform_group['formtype'], subform_group['subforms']
            children = [[]]
            for child in subforms:
                child_form = collect_forms(child, tab + '    ')
                if child_form == 'or':
                    children.append([])
                else:
                    children[-1].append(child_form)
            subforms = list(filter(None, children))
            if subforms:
                subform_groups.append((subform_type, subforms))
        return (form, subform_groups)


def get_filtered_objects(forms, initial_objects=None, tab='   '):
    form, children = forms
    objects = form.model_class.objects.all() if initial_objects is None else initial_objects.all()
    queries = form.get_queries()
    print(tab, type(form), 'FOR FILTERING:', form.model_class)
    print(tab, queries)
    objects = filter_objects(objects, queries, tab=tab)
    print(tab, 'OK')
    for children_group in children:
        print(tab, 'CHILD FORMS')
        object_ids_or = []
        prefixes = set()
        for or_children in children_group[1]:
            objects_and = form.model_class.objects.all() if initial_objects is None else initial_objects.all()
            for child in or_children:
                child_form = child[0]
                child_objects = get_filtered_objects(child, tab=tab + '        ')
                prefix = form.get_child_form_prefix(child_form)
                prefixes.add(prefix)
                child_ids = [co.id for co in child_objects]
                q = Q((prefix, child_ids))
                if child[0].is_negated():
                    objects_and = objects_and.exclude(q)
                else:
                    objects_and = objects_and.filter(q)
            object_ids_or.append({o.id for o in objects_and})
        assert(len(prefixes) == 1)
        object_ids = reduce(operator.or_, object_ids_or)
        objects = objects.filter(id__in=object_ids)
    objects = objects.distinct()
    print(tab, 'FILTERED:', form.model_class)
    return objects
            

# forms – an ‘or’ list of ‘and’ lists of forms, the forms are flattened and treated as one ‘or’ list.
# The function is used for filtering out schemata/frames. E.g. if the user chooses entries with a schema
# safisfying X AND a schema satisfying Y, schemata satisfying X OR Y should be displayed (and all other
# schemata should be hidden).
def get_filtered_objects2(objects, forms):
    filtered_ids = [{ schema.id for schema in get_filtered_objects(form, initial_objects=objects) } for form in chain.from_iterable(forms)]
    filtered_ids = reduce(operator.or_, filtered_ids)
    return objects.filter(id__in=filtered_ids)


# TODO a more efficient loading of results will be implemented,
# for now truncate them to speed up testing 
DEBUG_N = 500
@ajax_required
def get_entries(request):
    t1 = datetime.datetime.now()
    if request.method == 'POST':
        forms = collect_forms(request.POST['forms[]'])
        entries = get_filtered_objects(forms)
        # TODO return validation errors
        #if not form.is_valid():
            #print(form.errors)
            #return JsonResponse({ 'result' : [], 'errors' : form.errors })
        t2 = datetime.datetime.now()
        # TODO remove! – this is for debug
        # some entries with an obj,controlee position, >3 frames etc.
        #entries = entries.filter(name__in=('dozwalać', 'dozwolić', 'obiecywać'))
        #entries = entries.filter(subentries__schemata__positions__phrase_types__text_rep__contains='lex')
        #entries = entries.filter(frames_count__gt=1)
        #entries = entries.filter(subentries__schema_hooks__alternation__gt=1)
        #entries = entries.filter(subentries__schema_hooks__argument_connections__argument__role__attribute__isnull=False)
        t3 = datetime.datetime.now()
        entries_list = list(entries.values_list('id', 'name', 'status__key'))[:DEBUG_N]
        t4 = datetime.datetime.now()
        s = ' truncated to {}'.format(DEBUG_N) if len(entries_list) == DEBUG_N else ''
        debug = '[{} s] [{} results{}] '.format(t2, entries.count(), s)
        debug += '[filtering: {:.1} s] '.format((t2 - t1).total_seconds())
        debug += '[processing: {:.1} s] '.format((t4 - t3).total_seconds())
        debug += '[total time: {:.1} s] '.format((t4 - t1).total_seconds())
        return JsonResponse({ 'result' : entries_list, 'debug' : debug })
    return JsonResponse({})


def subentry2str(subentry):
    ret = subentry.entry.name
    if subentry.inherent_sie.name == 'true':
        ret += ' się'
    elems = []
    if subentry.aspect:
        elems.append(subentry.aspect.name)
    if subentry.negativity:
        elems.append(subentry.negativity.name)
    if elems:
        ret += ' ({})'.format(', '.join(elems))
    if subentry.predicativity.name == 'true':
        ret += ' pred.'
    return ret


def position_prop2dict(prop):
    return {
        'str'  : prop.name,
        'desc' : position_prop_description(prop.name),
    } if prop else {
        'str'  : '',
        'desc' : '',
    }


def get_phrase_desc(phrase, position, negativity):
    return NaturalLanguageDescription.objects.get(
            phrase_str=phrase.text_rep,
            function=position.function,
            control=position.control,
            pred_control=position.pred_control,
            negativity=negativity).description


def schema2dict(schema, negativity):
    return {
        'opinion'   : SCHEMA_OPINION()[schema.opinion.key],
        'id'        : str(schema.id),
        'positions' : [
            {
                'func'      : position_prop2dict(p.function),
                'control'   : position_prop2dict(p.control),
                'p_control' : position_prop2dict(p.pred_control),
                'phrases' : [
                    {
                        'str'       : str(pt),
                        'id'        : '{}-{}-{}'.format(schema.id, p.id, pt.id),
                        'desc'      : get_phrase_desc(pt, p, negativity),
                    } for pt in p.phrase_types.all()
                ],
            } for p in schema.positions.all()
         ],
    }


def get_prefs_list(argument):
    prefs = [argument.predefined.all(), argument.synsets.all(), argument.relations.all()]
    return list(chain.from_iterable(map(list, prefs)))


def frame2dict(frame):
    return {
        'opinion'   : FRAME_OPINION()[frame.opinion.key],
        'id'        : str(frame.id),
        'arguments' : [
            {
                'str'         : str(a),
                'id'          : '{}-{}'.format(frame.id, a.id),
                'role'        : '{}{}'.format(a.role.role.role.lower(), ' ' + a.role.attribute.attribute.lower() if a.role.attribute else ''),
                'preferences' : list(map(str, get_prefs_list(a))),
            } for a in sorted(frame.arguments.all(), key=lambda a: a.role.role.priority + (a.role.attribute.priority if a.role.attribute else 2))
         ],
    }

# returns a dict: {
#     frame_id1 : {
#         schema_id1 : { [alt_1*, ..., alt_l] }
#         schema_idk : {...}
#     }
#     ...
#     frame_idn : {...}
# }
# *alternation is a dict: {
#    key: extended argument id (frame_id-arg_id)
#    val: list of extended phrase ids (schema_id-position_id-phr_id)
def get_alternations(schemata, frames):
    # TODO czy alternacja może być podpięta do całej pozycji, bez konkretnej frazy?
    alternations = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list))))
    for schema in schemata:
        for hook in schema.schema_hooks.all():
            arg_conns = hook.argument_connections.all()
            assert (len(arg_conns) < 2)
            if (arg_conns):
                argument = arg_conns[0].argument
                frame = argument.frame
                if frame not in frames:
                    continue
                phr_id = '{}-{}-{}'.format(schema.id, hook.position.id, hook.phrase_type.id)
                arg_id = '{}-{}'.format(frame.id, argument.id)
                alternations[frame.id][schema.id][hook.alternation][arg_id].append(phr_id)
    alt_dict = defaultdict(lambda: defaultdict(list))
    for frame_id, frame_schema_alternations in alternations.items():
        for schema_id, schema_alternations in frame_schema_alternations.items():
            for alt_no in sorted(schema_alternations.keys()):
                alt_dict[frame_id][schema_id].append(schema_alternations[alt_no])
    return alt_dict

def get_examples(entry):
    examples = []
    for example in entry.examples.all():
        frame_ids, argument_ids, phrases = set(), set(), set()
        for connection in example.example_connections.all():
            for argument in connection.arguments.all():
                frame_ids.add(argument.frame.id)
                argument_ids.add('{}-{}'.format(argument.frame.id, argument.id))
            for hook in connection.schema_connections.all():
                phrases.add('{}-{}-{}-{}'.format(hook.schema.id, hook.position.id, hook.phrase_type.id, hook.alternation - 1))
        examples.append({
            'id'           : str(example.id), 
            'sentence'     : example.sentence,
            'source'       : EXAMPLE_SOURCE()[example.source.key],
            'opinion'      : EXAMPLE_OPINION()[example.opinion.key],
            'note'         : example.note,
            'frame_ids'    : sorted(frame_ids),
            'argument_ids' : sorted(argument_ids),
            'phrases'      : sorted(phrases),
        })
    return sorted(examples, key=lambda x: x['sentence'])

@ajax_required
def get_entry(request):
    if request.method == 'POST':
        form = EntryForm(request.POST)
        eid = request.POST['entry']
        if eid.isdigit() and form.is_valid():
            eid = int(eid)
            entry = Entry.objects.get(id=eid)
            
            entry_form, children_forms = collect_forms(request.POST['forms[]'])
            filter_schemata, filter_frames = entry_form.cleaned_data['filter_schemata'], entry_form.cleaned_data['filter_frames']
            if filter_schemata:
                schema_forms = [frms[1] for frms in children_forms if frms[0] == 'schema']
                assert (len(schema_forms) <= 1)
                if schema_forms:
                    schema_forms = schema_forms[0]
                else:
                    filter_schemata = False
            if filter_frames:
                frame_forms = [frms[1] for frms in children_forms if frms[0] == 'frame']
                assert (len(frame_forms) <= 1)
                if frame_forms:
                    frame_forms = frame_forms[0]
                else:
                    filter_frames = False
            
            subentries = []
            all_schema_objects = []
            for subentry in entry.subentries.all():
                schemata = []
                schema_objects = subentry.schemata.all()
                if filter_schemata:
                    schema_objects = get_filtered_objects2(schema_objects, schema_forms)
                for schema in schema_objects:
                    schemata.append(schema2dict(schema, subentry.negativity))
                if schemata:
                    all_schema_objects += list(schema_objects)
                    subentries.append({ 'str' : subentry2str(subentry), 'schemata' : schemata })
            frame_objects = Frame.objects.filter(arguments__argument_connections__schema_connections__subentry__entry=entry).distinct()
            if filter_frames:
                frame_objects = get_filtered_objects2(frame_objects, frame_forms)
            frames = [frame2dict(frame) for frame in frame_objects]
            alternations = get_alternations(all_schema_objects, frame_objects)
            examples = get_examples(entry)
            return JsonResponse({ 'subentries' : subentries, 'frames' : frames, 'alternations' : alternations, 'examples' : examples })
    return JsonResponse({})