#-*- coding:utf-8 -*- # author: B.Niton import re import codecs import itertools from django.core.management.base import BaseCommand from dictionary.models import * DICT_PATH = 'data/Skladnica/ramki_skl_v_140213.txt' BAD_PATH = 'data/Skladnica/bad_frames.txt' GOOD_PATH = 'data/Skladnica/good_frames.txt' NLEMMA_PATH = 'data/Skladnica/nlemma_frames.txt' ADVP_PP_PATH = 'data/Skladnica/advp_pp_frames.txt' class Command(BaseCommand): help = 'Checks if Skladnica frames exists in Walenty.' def handle(self, file_path=DICT_PATH, **options): check_frames() def update_case(arg): arg = arg.replace(u'mian', u'nom') arg = arg.replace(u'bier', u'acc') arg = arg.replace(u'cel', u'dat') arg = arg.replace(u'dop', u'gen') arg = arg.replace(u'miej', u'loc') arg = arg.replace(u'narz', u'inst') arg = arg.replace(u'pop', u'postp') return arg #def triple_arg_poss(arg, positions_cats_ls, need_controll, case, ): # possibilities = [] # for pos_cat in positions_cats_ls: # possibilities.append({'category_ls' : [pos_cat], # 'arg' : arg, # 'need_controll': False, # 'preposition' : '', # 'case' : '', # 'pos_nps' : []}) # return possibilities def possible_args(arg, pos): posibilities = [] if arg == 'subj': posibilities.append({'category_ls' : ['subj'], 'arg' : 'np(str)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) elif arg == 'np(bier)': posibilities.append({'category_ls': [], 'arg' : u'np(str)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'np(str)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) # ==> np(acc) posibilities.append({'category_ls': ['subj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) # ==> np(acc) posibilities.append({'category_ls': ['obj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) # ==> np(acc) posibilities.append({'category_ls': [], 'arg' : u'np(part)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'np(part)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'np(part)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) elif arg == 'np(dop)': posibilities.append({'category_ls': [], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) # ==> np(gen) posibilities.append({'category_ls': ['subj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) # ==> np(gen) posibilities.append({'category_ls': ['obj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) # ==> np(gen) posibilities.append({'category_ls': [], 'arg' : u'np(part)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'np(part)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'np(part)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) if pos == 'ger': # przechodzi tez na biernik posibilities.append({'category_ls': [], 'arg' : u'np(str)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'np(str)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : u'np(acc)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'np(acc)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'np(acc)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) elif (arg.startswith('prepnp(jak,') or arg.startswith('prepnp(jako,') or arg.startswith(u'prepnp(niż,')): prepnp_atr_ls = arg.replace('prepnp(', '').replace(')', '').split(',') preposition = prepnp_atr_ls[0] case = prepnp_atr_ls[1] posibilities.append({'category_ls': [], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) if case == 'mian': pos_nps = [] pos_nps.extend(possible_args('subj', pos)) pos_nps.extend(possible_args('np(mian)', pos)) posibilities.append({'category_ls': [], 'arg' : 'prepnp(' + preposition + ',str)', 'need_controll': True, 'preposition' : preposition, 'case' : case, 'pos_nps' : pos_nps}) elif case == 'bier': posibilities.append({'category_ls': [], 'arg' : 'prepnp(' + preposition + ',str)', 'need_controll': True, 'preposition' : preposition, 'case' : case, 'pos_nps' : possible_args('np(bier)', pos)}) elif(arg == u"prepnp(na temat,dop)"): # nie znajduje tego w wywleczonych posibilities.append({'category_ls': [], 'arg' : u'comprepnp(na temat)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'comprepnp(na temat)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'comprepnp(na temat)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) elif(arg == u"prepnp(w sprawie,dop)"): posibilities.append({'category_ls': [], 'arg' : u'comprepnp(w sprawie)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'comprepnp(w sprawie)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'comprepnp(w sprawie)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) elif(arg == u"prepnp(z powodu,dop)"): posibilities.append({'category_ls': [], 'arg' : u'comprepnp(z powodu)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'comprepnp(z powodu)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'comprepnp(z powodu)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) elif(arg == u"adjp(mian)"): posibilities.append({'category_ls': [], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : 'adjp(pred)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : 'adjp(pred)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : 'adjp(pred)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) elif(arg == u"adjp(narz)"): posibilities.append({'category_ls': [], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : 'adjp(pred)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : 'adjp(pred)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : 'adjp(pred)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) elif(arg.startswith(u'sentp')): # liczy przecinki by sprawdzic liczbe atrybutow w 'sentp' number_of_commas = arg.count(u',') conv_arg = arg.replace(u'pz', u'int') if(number_of_commas == 0): posibilities.append({'category_ls': [], 'arg' : conv_arg.replace(u'sentp', u'cp'), 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : conv_arg.replace(u'sentp', u'cp'), 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : conv_arg.replace(u'sentp', u'cp'), 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) elif(number_of_commas == 1): posibilities.append({'category_ls': [], 'arg' : conv_arg.replace(u'sentp', u'ncp'), 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : conv_arg.replace(u'sentp', u'ncp'), 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : conv_arg.replace(u'sentp', u'ncp'), 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) elif(number_of_commas == 2): posibilities.append({'category_ls': [], 'arg' : conv_arg.replace(u'sentp', u'prepncp'), 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : conv_arg.replace(u'sentp', u'prepncp'), 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : conv_arg.replace(u'sentp', u'prepncp'), 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) elif(arg == u'advp'): posibilities.append({'category_ls': [], 'arg' : u'xp(_)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'xp(_)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'xp(_)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : u'advp(pron)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'advp(pron)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'advp(pron)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : u'advp(misc)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'advp(misc)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'advp(misc)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : u'advp(locat)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'advp(locat)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'advp(locat)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : u'advp(abl)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'advp(abl)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'advp(abl)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : u'advp(adl)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'advp(adl)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'advp(adl)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : u'advp(perl)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'advp(perl)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'advp(perl)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : u'advp(temp)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'advp(temp)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'advp(temp)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : u'advp(dur)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'advp(dur)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'advp(dur)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : u'advp(mod)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'advp(mod)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'advp(mod)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : u'advp(pred)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : u'advp(pred)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : u'advp(pred)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) elif(arg == u'prepnp(przez,bier)'): posibilities.append({'category_ls': [], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) if pos == 'ger' or pos == 'ppas': posibilities.append({'category_ls': ['subj'], 'arg' : u'np(str)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) elif arg.startswith('infp('): conv_arg = copy.deepcopy(arg) conv_arg = conv_arg.replace('(nd)', '(imperf)') conv_arg = conv_arg.replace('(dk)', '(perf)') posibilities.append({'category_ls': [], 'arg' : conv_arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : conv_arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : conv_arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': [], 'arg' : 'infp(_)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : 'infp(_)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : 'infp(_)', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) else: posibilities.append({'category_ls': [], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['subj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) posibilities.append({'category_ls': ['obj'], 'arg' : arg, 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) poss_to_add = [] for poss in posibilities: poss['arg'] = poss['arg'].replace(', ', ',') poss['arg'] = update_case(poss['arg']) poss['case'] = update_case(poss['case']) # dodawanie powiazanych argumentow np. przeciw/przeciwko for atr in Atribute_Value.objects.filter(related=True): for main_atr in atr.main_attr_values.all(): arg_str = poss['arg'].replace(u'(%s)' % atr.value, u'(%s)' % main_atr.value) arg_str = arg_str.replace(u'(%s,' % atr.value, u'(%s,' % main_atr.value) arg_str = arg_str.replace(u',%s)' % atr.value, u',%s)' % main_atr.value) arg_str = arg_str.replace(u',%s,' % atr.value, u',%s,' % main_atr.value) new_poss = copy.deepcopy(poss) new_poss['arg'] = arg_str poss_to_add.append(new_poss) posibilities.extend(poss_to_add) return posibilities def check_frame(frame, conv_frame, preps, args_to_match, check_sie): args_match = False sie_match = True not_this_frame = False somelists = [] positions = frame.positions not_categorized_positions = frame.positions.exclude(categories__control=False) # jesli liczba argumentow jest wieksz niz liczba pozycji, pomin ramke if positions.count() < args_to_match: return False for arg in conv_frame['args']: arg['poss_positions'] = [] for poss in arg['poss_args']: new_poss_positions = match_arg(poss, positions, preps, not_categorized_positions) if new_poss_positions: arg['poss_positions'].extend(new_poss_positions) if not arg['poss_positions']: not_this_frame = True break somelists.append(arg['poss_positions']) if not_this_frame: return False for element in itertools.product(*somelists): if not element: continue if len(element) == len(set(element)) and len(set(element)) >= args_to_match: args_match = True break # sprawdzenie siennosci if check_sie: try: frame.characteristics.get(value__value=u'się') except Frame_Characteristic.DoesNotExist: refl_exist = False refl_list = [] refl_list.append({'category_ls': [], 'arg' : 'refl', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) refl_list.append({'category_ls': ['subj'], 'arg' : 'refl', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) refl_list.append({'category_ls': ['obj'], 'arg' : 'refl', 'need_controll': False, 'preposition' : '', 'case' : '', 'pos_nps' : []}) for refl in refl_list: if len(match_arg(refl, positions, preps, not_categorized_positions)) > 0: refl_exist = True break if not refl_exist: sie_match = False else: try: frame.characteristics.get(value__value=u'') except Frame_Characteristic.DoesNotExist: sie_match = False # znaleziono ramke odpowiadajaca skladnicowej w Walentym if (args_match or args_to_match == 0) and sie_match: return True else: return False def check_frames(): print 'Be patient, it can take a while.' try: f = codecs.open(DICT_PATH, "rt", 'utf-8') badfile = codecs.open(BAD_PATH, 'wt', 'utf-8') goodfile = codecs.open(GOOD_PATH, 'wt', 'utf-8') nlemmafile = codecs.open(NLEMMA_PATH, 'wt', 'utf-8') advp_pp_file = codecs.open(ADVP_PP_PATH, 'wt', 'utf-8') try: for line in f: line_pattern = re.compile(ur'^([^\s]+)[\s]*([^\s]+)[\s]*(\[[^\]]*\])[\s]*(\[[^\]]*\])(.*)$') m = line_pattern.match(line) if not m: print smart_str(line) if m: lemma_str = m.group(1).strip() pos = m.group(2).strip() frame_str = m.group(3).strip() prep_str = m.group(4).strip() try: lemma = Lemma.objects.get(old=False, entry=lemma_str, status__status=u'sprawdzone') except Lemma.DoesNotExist: nlemmafile.write(line.strip() + u'\n') continue if frame_str == '[]' or frame_str == '_': continue tokens = frame_str.replace('[', '').replace(']', '').split(',') preps = [] if not prep_str == '[]': for prep in prep_str.replace('[', '').replace(']', '').split(';'): if not prep.startswith('cat='): preps.append({'arg': prep, 'poss_args': possible_args(prep, pos), 'poss_positions': []}) args_ls = [] arg_str = '' for tok in tokens: arg_str += tok.strip() + ',' if (('(' in arg_str and ')' in arg_str) or (not ('(' in arg_str) and not (')' in arg_str))): args_ls.append(arg_str.strip().rstrip(',')) arg_str = '' conv_frame = {'args' : [], 'reflex' : u' '} check_sie = False args_to_match = len(args_ls) for arg in args_ls: possibilities = [] if arg == 'sie': args_to_match -= 1 check_sie = True conv_frame['reflex'] = u'się' continue else: possibilities = possible_args(arg, pos) conv_frame['args'].append({'arg': arg, 'poss_args': possibilities, 'poss_positions': []}) frame_exist = False for frame in lemma.frames.all(): # znaleziono ramke odpowiadajaca skladnicowej w Walentym if check_frame(frame, conv_frame, preps, args_to_match, check_sie): print 'OK' frame_exist = True goodfile.write(line) break if not frame_exist: advp_found = False for conv_arg in conv_frame['args']: if conv_arg['arg'] == 'advp': advp_found = True for prep in preps: conv_arg['poss_args'].extend(prep['poss_args']) if advp_found: for frame in lemma.frames.all(): if check_frame(frame, conv_frame, preps, args_to_match, check_sie): print 'OK-wkladka' frame_exist = True advp_pp_file.write(line) break if not frame_exist: print 'BAD' badfile.write(line.strip() + '\n') finally: f.close() badfile.close() goodfile.close() nlemmafile.close() advp_pp_file.close() except IOError: return 'Error: Can not work on file %s, check if it exists!' % DICT_PATH def match_arg(arg, positions, preps, not_categorized_positions): ret_positions = [] if len(arg['category_ls']) > 0: category = arg['category_ls'][0] if arg['arg'].startswith('xp'): xp_positions = positions.filter(categories__category=category, arguments__type='xp') if xp_positions.count() > 0 and len(preps) == 0: ret_positions = xp_positions.all() else: for position in xp_positions: pos_cats = position.categories.exclude(control=True) for xp_arg in position.arguments.filter(type='xp'): match = False for prep in preps: for poss_prep in prep['poss_args']: pos_cat_match = False # badanie kontroli if not poss_prep['need_controll']: if ((pos_cats.count() == 0 and not poss_prep['category_ls']) or (pos_cats.count() > 0 and pos_cats.all()[0] == poss_prep['category_ls'][0])): pos_cat_match = True else: if ((pos_cats.count() == 0 and not poss_prep['category_ls']) or (pos_cats.count() > 0 and pos_cats.all()[0] == poss_prep['category_ls'][0]) and position.categories.filter(category__startswith='controllee').count() > 0): #and position.categories.filter(control=True, ).count() > 0: controllee_cats = position.categories.filter(category__startswith='controllee').all() found_controlling_np = False for controllee in controllee_cats: control_id = controllee.category.replace('controllee', 'controller') for np in poss_prep['pos_nps']: pos_control_positions = positions.filter(categories__category=control_id) pos_control_positions = match_arg(np, pos_control_positions, preps, not_categorized_positions) if len(pos_control_positions) > 0: found_controlling_np = True pos_cat_match = True break if found_controlling_np: break if pos_cat_match and xp_arg.realizations.filter(argument__text_rep=poss_prep['arg']).count() > 0: match = True break if match: break if match: ret_positions.append(position) break else: if not arg['need_controll']: ret_positions = positions.filter(categories__category=category, arguments__text_rep=arg['arg']).all() else: pos_positions_ls = positions.filter(categories__category=category, arguments__text_rep=arg['arg']).filter(categories__control__startswith='controllee').all() ret_positions_q = positions.filter(categories__category=category, arguments__text_rep=arg['arg']).filter(categories__control__startswith='controllee') for position in pos_positions_ls: controllee_cats = position.categories.filter(category__startswith='controllee').all() found_controlling_np = False for controllee in controllee_cats: control_id = controllee.category.replace('controllee', 'controller') for np in arg['pos_nps']: pos_control_positions = positions.filter(categories__category=control_id) pos_control_positions = match_arg(np, pos_control_positions, preps, not_categorized_positions) if len(pos_control_positions) > 0: found_controlling_np = True break if found_controlling_np: break if not found_controlling_np: ret_positions_q = ret_positions_q.exclude(pk=position.pk) ret_positions = ret_positions_q.all() elif len(arg['category_ls']) == 0: if arg['arg'].startswith('xp'): xp_positions = not_categorized_positions.filter(arguments__type='xp')#.exclude(categories__control=False) if xp_positions.count() > 0 and len(preps) == 0: ret_positions = xp_positions else: for position in xp_positions: pos_cats = position.categories.exclude(control=True) for xp_arg in position.arguments.filter(type='xp'): match = False for prep in preps: for poss_prep in prep['poss_args']: pos_cat_match = False if not poss_prep['need_controll']: if ((pos_cats.count() == 0 and not poss_prep['category_ls']) or (pos_cats.count() > 0 and pos_cats.all()[0] == poss_prep['category_ls'][0])): pos_cat_match = True else: if ((pos_cats.count() == 0 and not poss_prep['category_ls']) or (pos_cats.count() > 0 and pos_cats.all()[0] == poss_prep['category_ls'][0]) and position.categories.filter(category__startswith='controllee').count() > 0): controllee_cats = position.categories.filter(category__startswith='controllee').all() found_controlling_np = False for controllee in controllee_cats: control_id = controllee.category.replace('controllee', 'controller') for np in poss_prep['pos_nps']: pos_control_positions = positions.filter(categories__category=control_id) pos_control_positions = match_arg(np, pos_control_positions, preps, not_categorized_positions) if len(pos_control_positions) > 0: found_controlling_np = True pos_cat_match = True break if found_controlling_np: break if pos_cat_match and xp_arg.realizations.filter(argument__text_rep=poss_prep['arg']).count() > 0: match = True break if match: break if match: ret_positions.append(position) break else: if not arg['need_controll']: ret_positions = not_categorized_positions.filter(arguments__text_rep=arg['arg']).all()#.exclude(categories__control=False).all() else: pos_positions_ls = not_categorized_positions.filter(arguments__text_rep=arg['arg']).filter(categories__control=True).filter(categories__control__startswith='controllee').all() ret_positions_q = not_categorized_positions.filter(arguments__text_rep=arg['arg']).filter(categories__control=True).filter(categories__control__startswith='controllee') for position in pos_positions_ls: controllee_cats = position.categories.filter(category__startswith='controllee').all() found_controlling_np = False for controllee in controllee_cats: control_id = controllee.category.replace('controllee', 'controller') for np in arg['pos_nps']: pos_control_positions = positions.filter(categories__category=control_id) pos_control_positions = match_arg(np, pos_control_positions, preps, not_categorized_positions) if len(pos_control_positions) > 0: found_controlling_np = True break if found_controlling_np: break if not found_controlling_np: ret_positions_q = ret_positions_q.exclude(pk=position.pk) ret_positions = ret_positions_q.all() return ret_positions