Commit 02d52c3af814e9ca7e1bcc91fb6e5a10538e2c51

Authored by Katarzyna Krasnowska
1 parent 52d56294

switched import time recording to DEBUG only, added phrase desc. error handling

entries/phrase_descriptions/descriptions.py
... ... @@ -8,6 +8,9 @@ from importer.Phrase import *
8 8 from .polish_strings import *
9 9 from .utils import *
10 10  
  11 +class PhraseDescriptionError(Exception):
  12 + pass
  13 +
11 14 def powerset(iterable):
12 15 s = list(iterable)
13 16 return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))
... ... @@ -35,12 +38,11 @@ def phrase_description2(phrase, position, negativity, lang, controller=None):
35 38 try:
36 39 desc = phrase_description(phrase, function, negativity, controller=controller)
37 40 except:
38   - return '???'
  41 + raise PhraseDescriptionError('couldn’t generate description: {}'.format(phrase))
39 42 translation.activate(curr_lang)
40 43 return desc
41 44  
42 45 def phrase_description(phrase, function, negativity, desc_case='nom', inside_lex=False, controller=None):
43   - #print('******', function, '***', negativity, '***', str(phrase))
44 46 if str(phrase) in (
45 47 # malowany -> ppas in in Morfeusz
46 48 #'lex(adjp(agr),agr,agr,pos,malować,natr)',
... ... @@ -350,13 +352,11 @@ def make_phraseologisms(phrase, function, negativity, attrs={}, controller=None,
350 352 else:
351 353 if phrase2._case._value == 'pred':
352 354 assert(controller)
353   - assert(controller_grammar)
354   - #function = controller._function._value if controller._function else None
355   - #control = True
  355 + #assert(controller_grammar)
356 356 CASE = correct_case(phrase2._case._value, function, negativity)
357 357 # np. uczynić coś *jakimś* / kobietę *jakąś*
358   - # TODO czy są sytuacje, kiedy jest kontrola, ale nie powinniśmy jej w ten sposób uwzględniać?
359   - if controller:
  358 + # w składni nie będzie controller_grammar
  359 + if controller and controller_grammar:
360 360 function = controller._function._value if controller._function else None
361 361 control = True
362 362 if phrase._number == 'agr' and 'num' in attrs:
... ... @@ -396,8 +396,8 @@ def make_phraseologisms(phrase, function, negativity, attrs={}, controller=None,
396 396 assert(controller)
397 397 assert(controller_grammar)
398 398 # np. uznawać kogoś *za jakiegoś* / coś *za jakieś* / facetów *za jakichś*
399   - # TODO czy są sytuacje, kiedy jest kontrola, ale nie powinniśmy jej w ten sposób uwzględniać?
400   - if controller:
  399 + # w składni nie będzie controller_grammar
  400 + if controller and controller_grammar:
401 401 function = controller._function._value if controller._function else None
402 402 control = True
403 403 if control:
... ... @@ -510,13 +510,15 @@ def make_phraseologisms(phrase, function, negativity, attrs={}, controller=None,
510 510 else:
511 511 rest.append(dep_phr)
512 512 # all realisations should have been matched by modifications
513   - # TODO: fails (but shouldn’t!) for lex(cp(int[jaki]),aff,żyć,,ratr1({lex(prepnp(na,loc),sg,świat,ratr1({lex(adjp(agr),agr,agr,pos,jaki,natr)}))})) – nested ‘jaki’
514   - assert (not realisations)
515   - #print()
516   - #print('--- FIRST:', list(map(str, first)))
517   - #print('--- SUBJ:', subj)
518   - #print('--- REST:', list(map(str, rest)))
519   - #print(typ)
  513 + # TODO: assertion fails (but shouldn’t!) for lex(cp(int[jaki]),aff,żyć,,ratr1({lex(prepnp(na,loc),sg,świat,ratr1({lex(adjp(agr),agr,agr,pos,jaki,natr)}))})) – nested ‘jaki’
  514 + # TODO: assertion fails for some phrases where the realisation is not among modifications, see (*****) in notes — correct those schemata
  515 + #assert (not realisations)
  516 + # TODO workaround:
  517 + if realisations:
  518 + # TODO workaround produces mess for ‘na jakim świecie żyje’
  519 + assert (len(realisations) == 1 and not first)
  520 + first.append((None, list(realisations)))
  521 +
520 522 assert (len(first) == 1 or typ not in ('int',))
521 523 #print()
522 524 deps1 = [d[1] for d in first] + [d[1] for d in pron]
... ...
importer/RealizationDescriptions.py
... ... @@ -4,7 +4,7 @@ import os
4 4 from collections import Counter, defaultdict
5 5 from itertools import chain
6 6  
7   -from shellvalier.settings import BASE_DIR
  7 +from shellvalier.settings import BASE_DIR, DEBUG
8 8  
9 9 from meanings.models import LexicalUnit, Synset
10 10 from semantics.models import SemanticRole, RoleAttribute
... ... @@ -65,16 +65,6 @@ def get_hyponyms(synset, seen=None, tab=' '):
65 65 hyponyms.update(get_hyponyms(hypo, seen, tab=tab + ' '))
66 66 return hyponyms
67 67  
68   -# przyspieszająca heurystyka na podstawie oglądania danych
69   -PRIORITY_SYNSETS = (
70   - #rzecz-4
71   - #103156,
72   - #przedmiot-1
73   - #2646,
74   - #substancja-1
75   - #5236,
76   -)
77   -
78 68 # for benchmarking
79 69 BENCH3 = defaultdict(list)
80 70  
... ... @@ -203,11 +193,6 @@ HYPONYM_CACHE = {
203 193 }
204 194  
205 195 def select_synsets(synsets):
206   - for p_synset in PRIORITY_SYNSETS:
207   - ssets = [s for s in synsets if s.id == p_synset]
208   - if ssets:
209   - assert(len(ssets) == 1)
210   - return ssets
211 196 by_num_hyponyms = defaultdict(set)
212 197 for synset in synsets:
213 198 sid = synset.id
... ... @@ -221,7 +206,8 @@ def select_synsets(synsets):
221 206 t2 = datetime.datetime.now()
222 207 # deciseconds :)
223 208 d = round((t2 - t1).total_seconds() * 10)
224   - BENCH3[d].append((HYPONYM_CACHE[sid], sid, synset))
  209 + if DEBUG:
  210 + BENCH3[d].append((HYPONYM_CACHE[sid], sid, synset))
225 211 # ----
226 212 N = HYPONYM_CACHE[sid]
227 213 by_num_hyponyms[N].add(synset)
... ... @@ -309,7 +295,8 @@ def get_argument_lemma(argument, xp=False):
309 295 t2 = datetime.datetime.now()
310 296 # deciseconds :)
311 297 d = round((t2 - t1).total_seconds() * 10)
312   - BENCH2[d].append((argument.predefined.all(), argument.synsets.all(), ret))
  298 + if DEBUG:
  299 + BENCH2[d].append((argument.predefined.all(), argument.synsets.all(), ret))
313 300 return ret
314 301  
315 302 def get_argument_lemma2(argument, xp=False):
... ... @@ -705,7 +692,8 @@ def get_phrase_description(subentry, argument, position, phrase, controller_gram
705 692 t2 = datetime.datetime.now()
706 693 # deciseconds :)
707 694 d = round((t2 - t1).total_seconds() * 10)
708   - BENCH[d].append((subentry.entry.name, argument.role.role.role, ret[0]))
  695 + if DEBUG:
  696 + BENCH[d].append((subentry.entry.name, argument.role.role.role, ret[0]))
709 697 return ret
710 698  
711 699 # subentry, argument: DB model objects
... ... @@ -876,19 +864,6 @@ def get_phrase_priority(phrase):
876 864 # key: phrase importer object
877 865 # value: (description, gender, number)
878 866 # result: phrase description to use in the realisation description
879   -# TODO!! dzwonić – dwie lex(prepnp(w,loc))!
880   -# TODO!! kapać – dwie lex(np(inst))!
881   -# TODO!! popukać – dwie lex(prepnp(do,gen))!
882   -# TODO!! przeczyć – dwie lex(np(dat))!
883   -# TODO!! pukać – dwie lex(prepnp(do,gen))!
884   -# TODO!! regenerować – dwie lex(np(str))!
885   -# TODO!! rosić – dwie lex(np(inst))!
886   -# TODO!! spychać – dwie lex(prepnp(na,acc))!
887   -# TODO!! szwankować – dwie lex(prepnp(na,loc))!
888   -# TODO!! wypchać – dwie lex(np(inst))!
889   -# TODO!! zapukać – dwie lex(prepnp(do,gen))!
890   -# TODO!! zepchnąć – dwie lex(prepnp(na,acc))!
891   -# TODO!! zrosić – dwie lex(np(inst))!
892 867 def select_phrase_description(position, phrase_descriptions):
893 868 #print(type(position))
894 869 #print(phrase_descriptions)
... ... @@ -905,16 +880,13 @@ def select_phrase_description(position, phrase_descriptions):
905 880 assert (desc[0] != '???')
906 881 return desc
907 882 else:
908   - # TODO? napsuć zdrowia/nerwów
909   - if set(desc[0] for desc in phrase_descriptions.values()) == {'zdrowia', 'nerwów'}:
910   - return ('zdrowia i nerwów', 'n', 'pl')
911   - #for phrase, desc in phrase_descriptions.items():
912   - # print('***', type(phrase))
913   - # print('*** ', phrase, desc)
914   - #for priority, phrases in sorted(by_priority.items()):
915   - # print('===', priority)
916   - # print('=== ', phrases)
917   - raise RealisationDescriptionError('couldn’t select phrase description: {}'.format(' * '.join(desc[0] for desc in phrase_descriptions.values())))
  883 + # all are lex phrases
  884 + assert(all(isinstance(p, LexPhrase) for p, d in min_priority_phrases))
  885 + # all have the same grammatical type
  886 + assert(len(set(str(p._lex_phrase()) for p, d in min_priority_phrases)) == 1)
  887 + # heuristic: return first lexicographically
  888 + return sorted(min_priority_phrases, key=lambda x: x[1][0])[0][1]
  889 + #raise RealisationDescriptionError('couldn’t select phrase description: {}'.format(' * '.join(desc[0] for desc in phrase_descriptions.values())))
918 890  
919 891  
920 892 FUNCTION_RANK = {
... ...