Added get_lexes_to_check script. (9e5589df) | Commits | Walenty / Slowal

Browse Code »

Commit 9e5589dfce75c67299f6c934dfae9dce02bfc009

Authored by Bartłomiej Nitoń 7 years ago

1 parent 9dcdfedb

Added get_lexes_to_check script.

Inline Side-by-side

Showing 1 changed file with 90 additions and 0 deletions

dictionary/management/commands/get_lexes_to_check.py 0 → 100644

View file @9e5589d

	1	+# -- coding:utf-8 --
	2	+
	3	+import codecs
	4	+import itertools
	5	+import os
	6	+
	7	+from django.core.management.base import BaseCommand
	8	+
	9	+from dictionary.models import Lemma
	10	+from settings import PROJECT_PATH
	11	+
	12	+TO_CHECK_PATH = os.path.join(PROJECT_PATH, 'data', 'lemmas2check-20180907.csv')
	13	+
	14	+
	15	+class Command(BaseCommand):
	16	+ def handle(self, **options):
	17	+ write_schemata_to_check()
	18	+
	19	+
	20	+def write_schemata_to_check():
	21	+ to_check_file = codecs.open(TO_CHECK_PATH, 'wt', 'utf-8')
	22	+ lemmas = Lemma.objects.filter(old=False).order_by('entry_obj__name')
	23	+ for lemma in lemmas:
	24	+ print lemma
	25	+ if lemma.frames.count() > 1:
	26	+ combinations = itertools.combinations(lemma.frames.all(), 2)
	27	+ for comb in combinations:
	28	+ if only_lex_diff(comb[0], comb[1]):
	29	+ to_check_file.write('%s (%s)\n' % (lemma.entry_obj.name, lemma.status.status))
	30	+ break
	31	+ to_check_file.close()
	32	+
	33	+
	34	+def only_lex_diff(schema1, schema2):
	35	+ if (schema1.characteristics.get(type=u'ZWROTNOŚĆ') == schema2.characteristics.get(type=u'ZWROTNOŚĆ') and
	36	+ schema1.characteristics.get(type=u'ASPEKT') == schema2.characteristics.get(type=u'ASPEKT') and
	37	+ schema1.characteristics.get(type=u'NEGATYWNOŚĆ') == schema2.characteristics.get(type=u'NEGATYWNOŚĆ') and
	38	+ schema1.characteristics.get(type=u'PREDYKATYWNOŚĆ') == schema2.characteristics.get(type=u'PREDYKATYWNOŚĆ') and
	39	+ schema1.positions.count() == schema2.positions.count() and (schema1.phraseologic or schema2.phraseologic)):
	40	+
	41	+ pos_diff1 = schema1.positions.exclude(text_rep__in=[pos.text_rep for pos in schema2.positions.all()])
	42	+ pos_diff2 = schema2.positions.exclude(text_rep__in=[pos.text_rep for pos in schema1.positions.all()])
	43	+ for pos1 in pos_diff1:
	44	+ match = False
	45	+ for pos2 in pos_diff2:
	46	+ if positions_lexically_match(pos1, pos2):
	47	+ match = True
	48	+ if not match:
	49	+ return False
	50	+ return True
	51	+
	52	+ return False
	53	+
	54	+
	55	+def positions_lexically_match(pos1, pos2):
	56	+ if (pos1.categories.count() == pos2.categories.count() and
	57	+ (pos1.categories.all() & pos2.categories.all()).count() == pos1.categories.count() and
	58	+ phrase_types_match(pos1, pos2)):
	59	+ return True
	60	+ return False
	61	+
	62	+
	63	+def phrase_types_match(pos1, pos2):
	64	+ if pos1.arguments.count() == pos2.arguments.count():
	65	+ pt_diff1 = pos1.arguments.exclude(pk__in=pos2.arguments.all())
	66	+ pt_diff2 = pos2.arguments.exclude(pk__in=pos1.arguments.all())
	67	+ for pt1 in pt_diff1:
	68	+ match = False
	69	+ for pt2 in pt_diff2:
	70	+ if lex_to_phrase_type_match(pt1, pt2) or lex_to_phrase_type_match(pt2, pt1):
	71	+ match = True
	72	+ break
	73	+ if not match:
	74	+ return False
	75	+ return True
	76	+ return False
	77	+
	78	+
	79	+def lex_to_phrase_type_match(pt1, pt2):
	80	+ if pt1.type == 'lex' and pt2.type in ['adjp', 'prepadjp']:
	81	+ lexicalized_pt = pt1.atributes.get(type='TYP FRAZY').values.all()[0].argument
	82	+
	83	+ if (pt2.type == 'adjp' and lexicalized_pt.type in ['adjp', 'ppasp', 'pactp'] and
	84	+ pt2.text_rep.split('(')[1] == lexicalized_pt.text_rep.split('(')[1]):
	85	+ return True
	86	+ elif (pt2.type == 'prepadjp' and lexicalized_pt.type in ['prepadjp', 'prepppasp', 'preppactp'] and
	87	+ pt2.text_rep.split('(')[1] == lexicalized_pt.text_rep.split('(')[1]):
	88	+ return True
	89	+
	90	+ return False
...	...