Commit 9e5589dfce75c67299f6c934dfae9dce02bfc009

Authored by Bartłomiej Nitoń
1 parent 9dcdfedb

Added get_lexes_to_check script.

dictionary/management/commands/get_lexes_to_check.py 0 → 100644
  1 +# -*- coding:utf-8 -*-
  2 +
  3 +import codecs
  4 +import itertools
  5 +import os
  6 +
  7 +from django.core.management.base import BaseCommand
  8 +
  9 +from dictionary.models import Lemma
  10 +from settings import PROJECT_PATH
  11 +
  12 +TO_CHECK_PATH = os.path.join(PROJECT_PATH, 'data', 'lemmas2check-20180907.csv')
  13 +
  14 +
  15 +class Command(BaseCommand):
  16 + def handle(self, **options):
  17 + write_schemata_to_check()
  18 +
  19 +
  20 +def write_schemata_to_check():
  21 + to_check_file = codecs.open(TO_CHECK_PATH, 'wt', 'utf-8')
  22 + lemmas = Lemma.objects.filter(old=False).order_by('entry_obj__name')
  23 + for lemma in lemmas:
  24 + print lemma
  25 + if lemma.frames.count() > 1:
  26 + combinations = itertools.combinations(lemma.frames.all(), 2)
  27 + for comb in combinations:
  28 + if only_lex_diff(comb[0], comb[1]):
  29 + to_check_file.write('%s (%s)\n' % (lemma.entry_obj.name, lemma.status.status))
  30 + break
  31 + to_check_file.close()
  32 +
  33 +
  34 +def only_lex_diff(schema1, schema2):
  35 + if (schema1.characteristics.get(type=u'ZWROTNOŚĆ') == schema2.characteristics.get(type=u'ZWROTNOŚĆ') and
  36 + schema1.characteristics.get(type=u'ASPEKT') == schema2.characteristics.get(type=u'ASPEKT') and
  37 + schema1.characteristics.get(type=u'NEGATYWNOŚĆ') == schema2.characteristics.get(type=u'NEGATYWNOŚĆ') and
  38 + schema1.characteristics.get(type=u'PREDYKATYWNOŚĆ') == schema2.characteristics.get(type=u'PREDYKATYWNOŚĆ') and
  39 + schema1.positions.count() == schema2.positions.count() and (schema1.phraseologic or schema2.phraseologic)):
  40 +
  41 + pos_diff1 = schema1.positions.exclude(text_rep__in=[pos.text_rep for pos in schema2.positions.all()])
  42 + pos_diff2 = schema2.positions.exclude(text_rep__in=[pos.text_rep for pos in schema1.positions.all()])
  43 + for pos1 in pos_diff1:
  44 + match = False
  45 + for pos2 in pos_diff2:
  46 + if positions_lexically_match(pos1, pos2):
  47 + match = True
  48 + if not match:
  49 + return False
  50 + return True
  51 +
  52 + return False
  53 +
  54 +
  55 +def positions_lexically_match(pos1, pos2):
  56 + if (pos1.categories.count() == pos2.categories.count() and
  57 + (pos1.categories.all() & pos2.categories.all()).count() == pos1.categories.count() and
  58 + phrase_types_match(pos1, pos2)):
  59 + return True
  60 + return False
  61 +
  62 +
  63 +def phrase_types_match(pos1, pos2):
  64 + if pos1.arguments.count() == pos2.arguments.count():
  65 + pt_diff1 = pos1.arguments.exclude(pk__in=pos2.arguments.all())
  66 + pt_diff2 = pos2.arguments.exclude(pk__in=pos1.arguments.all())
  67 + for pt1 in pt_diff1:
  68 + match = False
  69 + for pt2 in pt_diff2:
  70 + if lex_to_phrase_type_match(pt1, pt2) or lex_to_phrase_type_match(pt2, pt1):
  71 + match = True
  72 + break
  73 + if not match:
  74 + return False
  75 + return True
  76 + return False
  77 +
  78 +
  79 +def lex_to_phrase_type_match(pt1, pt2):
  80 + if pt1.type == 'lex' and pt2.type in ['adjp', 'prepadjp']:
  81 + lexicalized_pt = pt1.atributes.get(type='TYP FRAZY').values.all()[0].argument
  82 +
  83 + if (pt2.type == 'adjp' and lexicalized_pt.type in ['adjp', 'ppasp', 'pactp'] and
  84 + pt2.text_rep.split('(')[1] == lexicalized_pt.text_rep.split('(')[1]):
  85 + return True
  86 + elif (pt2.type == 'prepadjp' and lexicalized_pt.type in ['prepadjp', 'prepppasp', 'preppactp'] and
  87 + pt2.text_rep.split('(')[1] == lexicalized_pt.text_rep.split('(')[1]):
  88 + return True
  89 +
  90 + return False
... ...