Commit 9e5589dfce75c67299f6c934dfae9dce02bfc009
1 parent
9dcdfedb
Added get_lexes_to_check script.
Showing
1 changed file
with
90 additions
and
0 deletions
dictionary/management/commands/get_lexes_to_check.py
0 → 100644
1 | +# -*- coding:utf-8 -*- | |
2 | + | |
3 | +import codecs | |
4 | +import itertools | |
5 | +import os | |
6 | + | |
7 | +from django.core.management.base import BaseCommand | |
8 | + | |
9 | +from dictionary.models import Lemma | |
10 | +from settings import PROJECT_PATH | |
11 | + | |
12 | +TO_CHECK_PATH = os.path.join(PROJECT_PATH, 'data', 'lemmas2check-20180907.csv') | |
13 | + | |
14 | + | |
15 | +class Command(BaseCommand): | |
16 | + def handle(self, **options): | |
17 | + write_schemata_to_check() | |
18 | + | |
19 | + | |
20 | +def write_schemata_to_check(): | |
21 | + to_check_file = codecs.open(TO_CHECK_PATH, 'wt', 'utf-8') | |
22 | + lemmas = Lemma.objects.filter(old=False).order_by('entry_obj__name') | |
23 | + for lemma in lemmas: | |
24 | + print lemma | |
25 | + if lemma.frames.count() > 1: | |
26 | + combinations = itertools.combinations(lemma.frames.all(), 2) | |
27 | + for comb in combinations: | |
28 | + if only_lex_diff(comb[0], comb[1]): | |
29 | + to_check_file.write('%s (%s)\n' % (lemma.entry_obj.name, lemma.status.status)) | |
30 | + break | |
31 | + to_check_file.close() | |
32 | + | |
33 | + | |
34 | +def only_lex_diff(schema1, schema2): | |
35 | + if (schema1.characteristics.get(type=u'ZWROTNOŚĆ') == schema2.characteristics.get(type=u'ZWROTNOŚĆ') and | |
36 | + schema1.characteristics.get(type=u'ASPEKT') == schema2.characteristics.get(type=u'ASPEKT') and | |
37 | + schema1.characteristics.get(type=u'NEGATYWNOŚĆ') == schema2.characteristics.get(type=u'NEGATYWNOŚĆ') and | |
38 | + schema1.characteristics.get(type=u'PREDYKATYWNOŚĆ') == schema2.characteristics.get(type=u'PREDYKATYWNOŚĆ') and | |
39 | + schema1.positions.count() == schema2.positions.count() and (schema1.phraseologic or schema2.phraseologic)): | |
40 | + | |
41 | + pos_diff1 = schema1.positions.exclude(text_rep__in=[pos.text_rep for pos in schema2.positions.all()]) | |
42 | + pos_diff2 = schema2.positions.exclude(text_rep__in=[pos.text_rep for pos in schema1.positions.all()]) | |
43 | + for pos1 in pos_diff1: | |
44 | + match = False | |
45 | + for pos2 in pos_diff2: | |
46 | + if positions_lexically_match(pos1, pos2): | |
47 | + match = True | |
48 | + if not match: | |
49 | + return False | |
50 | + return True | |
51 | + | |
52 | + return False | |
53 | + | |
54 | + | |
55 | +def positions_lexically_match(pos1, pos2): | |
56 | + if (pos1.categories.count() == pos2.categories.count() and | |
57 | + (pos1.categories.all() & pos2.categories.all()).count() == pos1.categories.count() and | |
58 | + phrase_types_match(pos1, pos2)): | |
59 | + return True | |
60 | + return False | |
61 | + | |
62 | + | |
63 | +def phrase_types_match(pos1, pos2): | |
64 | + if pos1.arguments.count() == pos2.arguments.count(): | |
65 | + pt_diff1 = pos1.arguments.exclude(pk__in=pos2.arguments.all()) | |
66 | + pt_diff2 = pos2.arguments.exclude(pk__in=pos1.arguments.all()) | |
67 | + for pt1 in pt_diff1: | |
68 | + match = False | |
69 | + for pt2 in pt_diff2: | |
70 | + if lex_to_phrase_type_match(pt1, pt2) or lex_to_phrase_type_match(pt2, pt1): | |
71 | + match = True | |
72 | + break | |
73 | + if not match: | |
74 | + return False | |
75 | + return True | |
76 | + return False | |
77 | + | |
78 | + | |
79 | +def lex_to_phrase_type_match(pt1, pt2): | |
80 | + if pt1.type == 'lex' and pt2.type in ['adjp', 'prepadjp']: | |
81 | + lexicalized_pt = pt1.atributes.get(type='TYP FRAZY').values.all()[0].argument | |
82 | + | |
83 | + if (pt2.type == 'adjp' and lexicalized_pt.type in ['adjp', 'ppasp', 'pactp'] and | |
84 | + pt2.text_rep.split('(')[1] == lexicalized_pt.text_rep.split('(')[1]): | |
85 | + return True | |
86 | + elif (pt2.type == 'prepadjp' and lexicalized_pt.type in ['prepadjp', 'prepppasp', 'preppactp'] and | |
87 | + pt2.text_rep.split('(')[1] == lexicalized_pt.text_rep.split('(')[1]): | |
88 | + return True | |
89 | + | |
90 | + return False | |
... | ... |