views.py 23.8 KB

Edit Raw Blame History

import operator
import time

from collections import defaultdict
from functools import reduce
from itertools import chain, product

import simplejson

from django.db.models import Q
from django.http import JsonResponse, QueryDict
from django.shortcuts import render
from django.template.context_processors import csrf
from django.utils.translation import gettext as _

from crispy_forms.utils import render_crispy_form

from connections.models import Entry, Subentry, ArgumentConnection, RealisationDescription
from syntax.models import NaturalLanguageDescription, Schema
from semantics.models import Frame

from common.decorators import ajax_required

from .forms import (
    #MainEntryForm,
    EntryForm,
    SchemaFormFactory,
    FrameFormFactory,
    PositionFormFactory,
    PhraseAttributesFormFactory,
    LexFormFactory,
    LemmaFormFactory,
    ArgumentFormFactory,
    PredefinedPreferenceFormFactory,
    RelationalPreferenceFormFactory,
    SynsetPreferenceFormFactory,
)

from .polish_strings import SCHEMA_OPINION, FRAME_OPINION, EXAMPLE_SOURCE, EXAMPLE_OPINION, RELATION

from .phrase_descriptions.descriptions import position_prop_description

MAX_LAST_VISITED = 10

def test(request):
    return render(request, 'test.html', {})

def entries(request):
    # TODO make this automatic by subclassing/configuring session object
    if 'last_visited' not in request.session:
        request.session['last_visited'] = []
    return render(request, 'entries.html', { 'entries_form' : EntryForm() })

FORM_TYPES = {
    #'entry-main' : MainEntryForm,
    'entry'      : EntryForm,

}

FORM_FACTORY_TYPES = {
    'schema'     : SchemaFormFactory,
    'position'   : PositionFormFactory,
    'phrase_lex' : LexFormFactory,
    'lemma'      : LemmaFormFactory,
    'frame'      : FrameFormFactory,
    'argument'   : ArgumentFormFactory,
    'predefined' : PredefinedPreferenceFormFactory,
    'relational' : RelationalPreferenceFormFactory,
    'synset'     : SynsetPreferenceFormFactory,
}


def make_form(form_type, data=None, unique_number=None):
    #print('======== MAKE FORM', form_type)
    if form_type in FORM_FACTORY_TYPES:
        return FORM_FACTORY_TYPES[form_type].get_form(data=data, unique_number=unique_number)
    if form_type in FORM_TYPES:
        return FORM_TYPES[form_type](data=data)
    elif form_type.startswith('phrase_'):
        phrase_type = form_type[7:]
        #print('================ MAKE PHRASE FORM', phrase_type, unique_number)
        return PhraseAttributesFormFactory.get_form(phrase_type, data=data, unique_number=unique_number)
    return None


@ajax_required
def get_subform(request):
    if request.method == 'GET':
        ctx = {}
        ctx.update(csrf(request))
        form_type = request.GET['subform_type']
        form = make_form(form_type)
        try:
            form_html = render_crispy_form(form, context=ctx)
        except:
            print('******************', form_type)
            raise
        return JsonResponse({'form_html' : form_html})

#TODO clean this code bordello up

def filter_objects(objects, queries, tab=''):
    #print(tab + '===================================================================')
    for query in queries:
        #print(tab + '***', query)
        objects = objects.filter(query).distinct()
        #print(tab + '---------------------------------------------------------------')
        #print(tab, objects)
        #print('\n')
    #print(tab + '===================================================================')
    return objects.distinct()


def collect_forms(forms_json, tab='   '):
    data = simplejson.loads(forms_json)
    form_type = data['formtype']
    form_number = data.get('formnumber', 0)
    if form_type in ('or', 'other'):
        return form_type
    else:
        #print(tab, 'FORM:', data['form'])
        #print(tab, 'TYPE:', form_type, 'NUMBER:', form_number)
        #print(tab, 'DATA:', data)
        query_params = QueryDict(data['form'])
        #print(tab, 'PARAMS:', query_params)
        form = make_form(form_type, data=query_params, unique_number=form_number)
        #print(tab, 'FORM TYPE:', type(form))
        if not form.is_valid():
            print(form.errors)
            # TODO return validation errors
            1/0
        #print(tab, '{} CHILDREN GROUP(S)'.format(len(data['children'])))
        # a form may have one or more children forms, organised into and-or
        # (e.g. an entry form has child schema forms, frame forms etc.)
        subform_groups = []
        for subforms_json in data['children']:
            subform_group = simplejson.loads(subforms_json)
            subform_type, subforms = subform_group['formtype'], subform_group['subforms']
            children = [[]]
            conjunctions = set()
            for child in subforms:
                child_form = collect_forms(child, tab + '    ')
                if child_form in ('or', 'other'):
                    children.append([])
                    conjunctions.add(child_form)
                else:
                    children[-1].append(child_form)
            assert(len(conjunctions) <= 1)
            conjunction = 'or' if not conjunctions else conjunctions.pop()
            subforms = list(filter(None, children))
            if subforms:
                subform_groups.append((subform_type, conjunction, subforms))
        return (form, data['negated'], subform_groups)


def reduce_ids_and(ids):
    # sum the negated ids
    ids[False] = reduce(lambda x, y: x.union(y), ids[False], set())
    if ids[True]:
        # intersect the non-negated and subtract the sum of negated
        return (True, reduce(lambda x, y: x.intersection(y), ids[True]) - ids[False])
    else:
        # negate the sum of negated
        return (False, ids[False])


def other_operator(tab, form, children_group, parent_objects):
    name, conjunction, children = children_group
    print(tab, '==============', name, conjunction)
    print(tab, 'PARENT OBJECTS:', parent_objects)
    prefixes = set()
    # matches[id][j] -> set of child ids of ‹id› parent satisfying ‹j›th specification
    matches = defaultdict(lambda: defaultdict(set))
    for i, conj_children in enumerate(children):
        print(tab, '    ', conjunction)
        child_ids_and = { True: [], False: [] }
        for child in conj_children:
            child_form = child[0]
            child_objects = get_filtered_objects(child, tab=tab + '        ')
            prefix = form.get_child_form_prefix(child_form)
            prefixes.add(prefix)
            child_ids = set(co.id for co in child_objects)
            child_ids_and[not child_form.is_negated()].append(child_ids)
        child_ids_and = reduce_ids_and(child_ids_and)
        print(tab, '    ===>', '[]'.format(i), child_ids_and[0], len(child_ids_and[1]), sorted(child_ids_and[1])[:10])
        # TODO enable negations?
        assert(child_ids_and[0] == True)
        for child_id in child_ids_and[1]:
            assert(prefix.endswith('__in'))
            for parent in parent_objects.filter(Q((prefix, [child_id]))):
                matches[parent.id][i].add(child_id)
    assert(len(prefixes) == 1)
    N = len(children)
    matching_parent_ids = set()
    for parent_id, mtchs in matches.items():
        print(tab, parent_id, mtchs)
        if len(mtchs) < len(children):
            print(tab, 'not all matched')
            continue
        for x in product(*mtchs.values()):
            if len(x) == len(set(x)):
                # found N different children objects, each satisfying different specification
                matching_parent_ids.add(parent_id)
                print(tab, 'MATCH:', x)
                continue
        if parent_id not in matching_parent_ids:
            print(tab, 'no match')
    return parent_objects.filter(id__in=matching_parent_ids)


def get_filtered_objects(forms, initial_objects=None, tab='   '):
    form, negated_attrs, children = forms
    objects = form.model_class.objects.all() if initial_objects is None else initial_objects.all()
    queries = form.get_queries(negated_attrs)
    #print(tab, type(form), 'FOR FILTERING:', form.model_class)
    #print(tab, queries)
    objects = filter_objects(objects, queries, tab=tab)
    #print(tab, 'OK')
    for children_group in children:
        if children_group[1] == 'other':
            objects = other_operator(tab, form, children_group, objects)
            continue
        #print(tab, 'CHILD FORMS')
        object_ids_or = []
        prefixes = set()
        for or_children in children_group[2]:
            objects_and = form.model_class.objects.all() if initial_objects is None else initial_objects.all()
            for child in or_children:
                child_form = child[0]
                child_objects = get_filtered_objects(child, tab=tab + '        ')
                prefix = form.get_child_form_prefix(child_form)
                prefixes.add(prefix)
                child_ids = [co.id for co in child_objects]
                q = Q((prefix, child_ids))
                if child_form.is_negated():
                    objects_and = objects_and.exclude(q)
                else:
                    objects_and = objects_and.filter(q)
            object_ids_or.append({o.id for o in objects_and})
        assert(len(prefixes) == 1)
        object_ids = reduce(operator.or_, object_ids_or)
        objects = objects.filter(id__in=object_ids)
    objects = objects.distinct()
    #print(tab, 'FILTERED:', form.model_class)
    return objects


# forms – an ‘or’ list of ‘and’ lists of forms, the forms are flattened and treated as one ‘or’ list.
# The function is used for filtering out schemata/frames. E.g. if the user chooses entries with a schema
# safisfying X AND a schema satisfying Y, schemata satisfying X OR Y should be displayed (and all other
# schemata should be hidden).
def get_filtered_objects2(objects, forms):
    print(forms)
    filtered_ids = [{ schema.id for schema in get_filtered_objects(form, initial_objects=objects) } for form in chain.from_iterable(forms)]
    filtered_ids = reduce(operator.or_, filtered_ids)
    return objects.filter(id__in=filtered_ids)


@ajax_required
def send_form(request):
    if request.method == 'POST':
        forms = collect_forms(request.POST['forms[]'])
        # TODO return validation errors
        #    #if not form.is_valid():
        #    #    print(form.errors)
        #    #    return JsonResponse({ 'errors' : form.errors })
        request.session['forms'] = request.POST['forms[]']
        return JsonResponse({ 'success' : 1 })
    return JsonResponse({})

def get_scroller_params(POST_data):
    order = (int(POST_data['order[0][column]']), POST_data['order[0][dir]'])
    return {
        'draw'   : int(POST_data['draw']),
        'start'  : int(POST_data['start']),
        'length' : int(POST_data['length']),
        'order'  : order,
        'filter' : POST_data['search[value]']
    }

@ajax_required
def get_entries(request):
    if request.method == 'POST':
        forms = collect_forms(request.session['forms'])
        scroller_params = get_scroller_params(request.POST)
        entries = get_filtered_objects(forms).filter(import_error=False)
        total = entries.count()
        if scroller_params['filter']:
            entries = entries.filter(name__startswith=scroller_params['filter'])
        filtered = entries.count()
        i, j = scroller_params['start'], scroller_params['start'] + scroller_params['length']
        order_field, order_dir = scroller_params['order']
        if order_field != 0 or order_dir != 'asc':
            # ordering other than lemma ascending (database default)
            order_field = 'name' if order_field == 0 else 'status__key'
            if order_dir == 'desc':
                order_field = '-' + order_field
            entries = entries.order_by(order_field)
        entries_list = list(entries.values('id', 'name', 'status__key'))
        result = {
            'draw' : scroller_params['draw'],
            'recordsTotal': total,
            'recordsFiltered': filtered,
            'data': entries_list[i:j],
        }
        return JsonResponse(result)
    return JsonResponse({})


def subentry2str(subentry):
    ret = subentry.entry.name
    if subentry.inherent_sie.name == 'true':
        ret += ' się'
    elems = []
    if subentry.aspect:
        elems.append(subentry.aspect.name)
    if subentry.negativity:
        elems.append(subentry.negativity.name)
    if elems:
        ret += ' ({})'.format(', '.join(elems))
    if subentry.predicativity.name == 'true':
        ret += ' pred.'
    return ret


def position_prop2dict(prop):
    return {
        'str'  : prop.name,
        'desc' : position_prop_description(prop.name),
    } if prop else {
        'str'  : '',
        'desc' : '',
    }


def get_phrase_desc(phrase, position, negativity, lang):
    return NaturalLanguageDescription.objects.get(
            phrase_str=phrase.text_rep,
            function=position.function,
            control=position.control,
            pred_control=position.pred_control,
            negativity=negativity,
            lang=lang).description


def schema2dict(schema, negativity, lang):
    return {
        'opinion'     : SCHEMA_OPINION()[schema.opinion.key],
        'opinion_key' : schema.opinion.key,
        'id'          : str(schema.id),
        'positions'   : [
            {
                'func'      : position_prop2dict(p.function),
                'control'   : position_prop2dict(p.control),
                'p_control' : position_prop2dict(p.pred_control),
                'id'        : '{}-{}'.format(schema.id, p.id),
                'phrases' : [
                    {
                        'str'       : str(pt),
                        'id'        : '{}-{}-{}'.format(schema.id, p.id, pt.id),
                        'desc'      : get_phrase_desc(pt, p, negativity, lang),
                    } for pt in p.phrase_types.all()
                ],
            } for p in schema.positions.all()
         ],
    }


def get_rel_pref_desc(pref):
    relation = pref.relation.key
    if relation == 'RELAT':
        desc = _('Realizacja tego argumentu w zdaniu powinna być powiązana jakąkolwiek relacją')
    else:
        desc = _('Realizacja tego argumentu w zdaniu powinna być powiązana relacją <i>{}</i>').format(RELATION()[relation])
    return desc + ' ' + _('z realizacją argumentu <i>{}</i>.').format(pref.to)


def make_ul(items):
    return '<ul>{}</ul>'.format(''.join(map('<li>{}</li>'.format, items)))

def get_synset_def(synset):
    ret = []
    if synset.definition:
        ret.append(_('definicja:') + make_ul([synset.definition]))
    #ret.append(_('jednostki leksykalne: ') + ', '.join(map(str, synset.lexical_units.all())))
    hypernyms = list(synset.hypernyms.all())
    if hypernyms:
        ret.append(_('hiperonimy:') + make_ul(map(str, hypernyms)))
    return ' '.join(ret)


def get_prefs_list(argument):
    return sorted(
        ({
            'str' : str(p),
        } for p in argument.predefined.all()),
        key=lambda x: x['str']
    ) + sorted(
        ({
            'str'  : str(s),
            'url'  : 'http://plwordnet21.clarin-pl.eu/synset/{}'.format(s.id),
            'info' : get_synset_def(s),
        } for s in argument.synsets.all()),
        key=lambda x: x['str']
    ) + sorted(
        ({
            'str'  : str(r),
            'info' : get_rel_pref_desc(r),
        } for r in argument.relations.all()),
        key=lambda x: x['str']
    )


def frame2dict(frame, entry_meanings):
    return {
        'opinion'       : FRAME_OPINION()[frame.opinion.key],
        'opinion_key'   : frame.opinion.key,
        'id'            : str(frame.id),
        'lexical_units' : [
            {
                'str'           : lu.text_rep,
                'id'            : lu.id,
                'entry_meaning' : lu in entry_meanings,
                'definition'    : lu.definition,
                'gloss'         : lu.gloss,
                'url'           : None if lu.luid is None else 'http://plwordnet21.clarin-pl.eu/lemma/{}/{}'.format(lu.base, lu.sense)
            } for lu in frame.lexical_units.all()
        ],
        'arguments'     : [
            {
                'str'         : str(a),
                'id'          : '{}-{}'.format(frame.id, a.id),
                'role'        : '{}{}'.format(a.role.role.role.lower(), ' ' + a.role.attribute.attribute.lower() if a.role.attribute else ''),
                'preferences' : get_prefs_list(a),
            } for a in sorted(frame.arguments.all(), key=lambda a: a.role.role.priority + (a.role.attribute.priority if a.role.attribute else 2))
        ],
    }

# returns two dicts:
# (1) {
#     frame_id_1 : {
#         schema_id_1 : { [alt_1*, ..., alt_l] },
#         schema_id_k : {...}
#     }
#     ...
#     frame_id_n : {...}
# }
# *alternation is a dict: {
#    key: extended argument id (frame_id-arg_id)
#    val: list of extended phrase ids (schema_id-position_id-phr_id)
# (2) {
#     extended_arg_id_1 : {
#         alt_1 : {
#             extended_phr_id_1 : psedo_natural_language_phrase_1_1_1,
#             extended_phr_id_l : psedo_natural_language_phrase_1_1_l,
#         }
#         ...
#         alt_k : {...}
#     }
#     ...
#     extended_arg_idn : {...}
# }
def get_alternations(schemata, frames):
    # TODO czy alternacja może być podpięta do całej pozycji, bez konkretnej frazy?
    alternations = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list))))
    phrases = defaultdict(lambda: defaultdict(dict))
    realisation_descriptions = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
    for schema in schemata:
        for hook in schema.schema_hooks.all():
            arg_conns = hook.argument_connections.all()
            assert (len(arg_conns) < 2)
            if (arg_conns):
                argument = arg_conns[0].argument
                frame = argument.frame
                if frame not in frames:
                    continue
                phr_id = '{}-{}-{}'.format(schema.id, hook.position.id, hook.phrase_type.id)
                arg_id = '{}-{}'.format(frame.id, argument.id)
                alternations[frame.id][schema.id][hook.alternation][arg_id].append(phr_id)
                phrases[arg_id][hook.alternation - 1][phr_id] = hook.description
    alt_dict = defaultdict(lambda: defaultdict(list))

    for frame_id, frame_schema_alternations in alternations.items():
        for schema_id, schema_alternations in frame_schema_alternations.items():
            for alt_no in sorted(schema_alternations.keys()):
                alt_dict[frame_id][schema_id].append(schema_alternations[alt_no])
                realisation_descriptions[frame_id][schema_id][alt_no - 1] = RealisationDescription.objects.get(frame__id=frame_id, schema__id=schema_id, alternation=alt_no).description
    return alt_dict, phrases, realisation_descriptions

def get_examples(entry):
    examples = []
    for example in entry.examples.all():
        frame_ids, argument_ids, lu_ids, schema_ids, phrases, phrases_syntax, positions = set(), set(), set(), set(), set(), set(), set()
        for connection in example.example_connections.all():
            for argument in connection.arguments.all():
                frame_ids.add(argument.frame.id)
                argument_ids.add('{}-{}'.format(argument.frame.id, argument.id))
            if connection.lexical_unit:
                lu_ids.add(connection.lexical_unit.id)
            for hook in connection.schema_connections.all():
                schema_ids.add(hook.schema.id);
                phrases.add('{}-{}-{}-{}'.format(hook.schema.id, hook.position.id, hook.phrase_type.id, hook.alternation - 1))
                phrases_syntax.add('{}-{}-{}'.format(hook.schema.id, hook.position.id, hook.phrase_type.id))
                positions.add('{}-{}'.format(hook.schema.id, hook.position.id))
        examples.append({
            'id'             : str(example.id),
            'sentence'       : example.sentence,
            'source'         : EXAMPLE_SOURCE()[example.source.key],
            'opinion'        : EXAMPLE_OPINION()[example.opinion.key],
            'note'           : example.note,
            'frame_ids'      : sorted(frame_ids),
            'argument_ids'   : sorted(argument_ids),
            'lu_ids'         : sorted(lu_ids),
            'schema_ids'     : sorted(schema_ids),
            'phrases'        : sorted(phrases),
            'phrases_syntax' : sorted(phrases_syntax),
            'positions'      : sorted(positions),
        })
    return sorted(examples, key=lambda x: x['sentence'])

@ajax_required
def get_entry(request):
    if request.method == 'POST':
        form = EntryForm(request.POST)
        eid = request.POST['entry']
        if eid.isdigit() and form.is_valid():
            eid = int(eid)
            # TODO check that Entry has no import errors
            entry = Entry.objects.get(id=eid)

            entry_form, _, children_forms = collect_forms(request.POST['forms[]'])
            filter_schemata, filter_frames = entry_form.cleaned_data['filter_schemata'], entry_form.cleaned_data['filter_frames']
            if filter_schemata:
                schema_forms = [frms[2] for frms in children_forms if frms[0] == 'schema']
                assert (len(schema_forms) <= 1)
                if schema_forms:
                    schema_forms = schema_forms[0]
                else:
                    filter_schemata = False
            if filter_frames:
                frame_forms = [frms[2] for frms in children_forms if frms[0] == 'frame']
                assert (len(frame_forms) <= 1)
                if frame_forms:
                    frame_forms = frame_forms[0]
                else:
                    filter_frames = False

            subentries = []
            all_schema_objects = []
            for subentry in entry.subentries.all():
                schemata = []
                schema_objects = subentry.schemata.all()
                if filter_schemata:
                    schema_objects = get_filtered_objects2(schema_objects, schema_forms)
                for schema in schema_objects:
                    schemata.append(schema2dict(schema, subentry.negativity, request.LANGUAGE_CODE))
                if schemata:
                    all_schema_objects += list(schema_objects)
                    subentries.append({ 'str' : subentry2str(subentry), 'schemata' : schemata })
            frame_objects = Frame.objects.filter(arguments__argument_connections__schema_connections__subentry__entry=entry).distinct()
            if filter_frames:
                frame_objects = get_filtered_objects2(frame_objects, frame_forms)
            frames = [frame2dict(frame, entry.lexical_units.all()) for frame in frame_objects]
            alternations, realisation_phrases, realisation_descriptions = get_alternations(all_schema_objects, frame_objects)
            examples = get_examples(entry)
            if 'last_visited' not in request.session:
                request.session['last_visited'] = []
            if 'show_reals_desc' not in request.session:
                request.session['show_reals_desc'] = True
            # https://docs.djangoproject.com/en/2.2/topics/http/sessions/#when-sessions-are-saved
            if [entry.name, entry.id] in request.session['last_visited']:
                request.session['last_visited'].remove([entry.name, entry.id])
            request.session['last_visited'].insert(0, (entry.name, entry.id))
            request.session['last_visited'] = request.session['last_visited'][:(MAX_LAST_VISITED + 1)]
            request.session.modified = True
            return JsonResponse({ 'subentries' : subentries, 'frames' : frames, 'alternations' : alternations, 'realisation_phrases' : realisation_phrases, 'realisation_descriptions' : realisation_descriptions, 'examples' : examples })
    return JsonResponse({})

@ajax_required
def change_show_reals_desc(request):
    if request.method == 'POST':
        val = simplejson.loads(request.POST['val'])
        request.session['show_reals_desc'] = val
        return JsonResponse({ 'success' : 1 })
    return JsonResponse({})