Commit 02d52c3af814e9ca7e1bcc91fb6e5a10538e2c51
1 parent
52d56294
switched import time recording to DEBUG only, added phrase desc. error handling
Showing
2 changed files
with
32 additions
and
58 deletions
entries/phrase_descriptions/descriptions.py
... | ... | @@ -8,6 +8,9 @@ from importer.Phrase import * |
8 | 8 | from .polish_strings import * |
9 | 9 | from .utils import * |
10 | 10 | |
11 | +class PhraseDescriptionError(Exception): | |
12 | + pass | |
13 | + | |
11 | 14 | def powerset(iterable): |
12 | 15 | s = list(iterable) |
13 | 16 | return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) |
... | ... | @@ -35,12 +38,11 @@ def phrase_description2(phrase, position, negativity, lang, controller=None): |
35 | 38 | try: |
36 | 39 | desc = phrase_description(phrase, function, negativity, controller=controller) |
37 | 40 | except: |
38 | - return '???' | |
41 | + raise PhraseDescriptionError('couldn’t generate description: {}'.format(phrase)) | |
39 | 42 | translation.activate(curr_lang) |
40 | 43 | return desc |
41 | 44 | |
42 | 45 | def phrase_description(phrase, function, negativity, desc_case='nom', inside_lex=False, controller=None): |
43 | - #print('******', function, '***', negativity, '***', str(phrase)) | |
44 | 46 | if str(phrase) in ( |
45 | 47 | # malowany -> ppas in in Morfeusz |
46 | 48 | #'lex(adjp(agr),agr,agr,pos,malować,natr)', |
... | ... | @@ -350,13 +352,11 @@ def make_phraseologisms(phrase, function, negativity, attrs={}, controller=None, |
350 | 352 | else: |
351 | 353 | if phrase2._case._value == 'pred': |
352 | 354 | assert(controller) |
353 | - assert(controller_grammar) | |
354 | - #function = controller._function._value if controller._function else None | |
355 | - #control = True | |
355 | + #assert(controller_grammar) | |
356 | 356 | CASE = correct_case(phrase2._case._value, function, negativity) |
357 | 357 | # np. uczynić coś *jakimś* / kobietę *jakąś* |
358 | - # TODO czy są sytuacje, kiedy jest kontrola, ale nie powinniśmy jej w ten sposób uwzględniać? | |
359 | - if controller: | |
358 | + # w składni nie będzie controller_grammar | |
359 | + if controller and controller_grammar: | |
360 | 360 | function = controller._function._value if controller._function else None |
361 | 361 | control = True |
362 | 362 | if phrase._number == 'agr' and 'num' in attrs: |
... | ... | @@ -396,8 +396,8 @@ def make_phraseologisms(phrase, function, negativity, attrs={}, controller=None, |
396 | 396 | assert(controller) |
397 | 397 | assert(controller_grammar) |
398 | 398 | # np. uznawać kogoś *za jakiegoś* / coś *za jakieś* / facetów *za jakichś* |
399 | - # TODO czy są sytuacje, kiedy jest kontrola, ale nie powinniśmy jej w ten sposób uwzględniać? | |
400 | - if controller: | |
399 | + # w składni nie będzie controller_grammar | |
400 | + if controller and controller_grammar: | |
401 | 401 | function = controller._function._value if controller._function else None |
402 | 402 | control = True |
403 | 403 | if control: |
... | ... | @@ -510,13 +510,15 @@ def make_phraseologisms(phrase, function, negativity, attrs={}, controller=None, |
510 | 510 | else: |
511 | 511 | rest.append(dep_phr) |
512 | 512 | # all realisations should have been matched by modifications |
513 | - # TODO: fails (but shouldn’t!) for lex(cp(int[jaki]),aff,żyć,,ratr1({lex(prepnp(na,loc),sg,świat,ratr1({lex(adjp(agr),agr,agr,pos,jaki,natr)}))})) – nested ‘jaki’ | |
514 | - assert (not realisations) | |
515 | - #print() | |
516 | - #print('--- FIRST:', list(map(str, first))) | |
517 | - #print('--- SUBJ:', subj) | |
518 | - #print('--- REST:', list(map(str, rest))) | |
519 | - #print(typ) | |
513 | + # TODO: assertion fails (but shouldn’t!) for lex(cp(int[jaki]),aff,żyć,,ratr1({lex(prepnp(na,loc),sg,świat,ratr1({lex(adjp(agr),agr,agr,pos,jaki,natr)}))})) – nested ‘jaki’ | |
514 | + # TODO: assertion fails for some phrases where the realisation is not among modifications, see (*****) in notes — correct those schemata | |
515 | + #assert (not realisations) | |
516 | + # TODO workaround: | |
517 | + if realisations: | |
518 | + # TODO workaround produces mess for ‘na jakim świecie żyje’ | |
519 | + assert (len(realisations) == 1 and not first) | |
520 | + first.append((None, list(realisations))) | |
521 | + | |
520 | 522 | assert (len(first) == 1 or typ not in ('int',)) |
521 | 523 | #print() |
522 | 524 | deps1 = [d[1] for d in first] + [d[1] for d in pron] |
... | ... |
importer/RealizationDescriptions.py
... | ... | @@ -4,7 +4,7 @@ import os |
4 | 4 | from collections import Counter, defaultdict |
5 | 5 | from itertools import chain |
6 | 6 | |
7 | -from shellvalier.settings import BASE_DIR | |
7 | +from shellvalier.settings import BASE_DIR, DEBUG | |
8 | 8 | |
9 | 9 | from meanings.models import LexicalUnit, Synset |
10 | 10 | from semantics.models import SemanticRole, RoleAttribute |
... | ... | @@ -65,16 +65,6 @@ def get_hyponyms(synset, seen=None, tab=' '): |
65 | 65 | hyponyms.update(get_hyponyms(hypo, seen, tab=tab + ' ')) |
66 | 66 | return hyponyms |
67 | 67 | |
68 | -# przyspieszająca heurystyka na podstawie oglądania danych | |
69 | -PRIORITY_SYNSETS = ( | |
70 | - #rzecz-4 | |
71 | - #103156, | |
72 | - #przedmiot-1 | |
73 | - #2646, | |
74 | - #substancja-1 | |
75 | - #5236, | |
76 | -) | |
77 | - | |
78 | 68 | # for benchmarking |
79 | 69 | BENCH3 = defaultdict(list) |
80 | 70 | |
... | ... | @@ -203,11 +193,6 @@ HYPONYM_CACHE = { |
203 | 193 | } |
204 | 194 | |
205 | 195 | def select_synsets(synsets): |
206 | - for p_synset in PRIORITY_SYNSETS: | |
207 | - ssets = [s for s in synsets if s.id == p_synset] | |
208 | - if ssets: | |
209 | - assert(len(ssets) == 1) | |
210 | - return ssets | |
211 | 196 | by_num_hyponyms = defaultdict(set) |
212 | 197 | for synset in synsets: |
213 | 198 | sid = synset.id |
... | ... | @@ -221,7 +206,8 @@ def select_synsets(synsets): |
221 | 206 | t2 = datetime.datetime.now() |
222 | 207 | # deciseconds :) |
223 | 208 | d = round((t2 - t1).total_seconds() * 10) |
224 | - BENCH3[d].append((HYPONYM_CACHE[sid], sid, synset)) | |
209 | + if DEBUG: | |
210 | + BENCH3[d].append((HYPONYM_CACHE[sid], sid, synset)) | |
225 | 211 | # ---- |
226 | 212 | N = HYPONYM_CACHE[sid] |
227 | 213 | by_num_hyponyms[N].add(synset) |
... | ... | @@ -309,7 +295,8 @@ def get_argument_lemma(argument, xp=False): |
309 | 295 | t2 = datetime.datetime.now() |
310 | 296 | # deciseconds :) |
311 | 297 | d = round((t2 - t1).total_seconds() * 10) |
312 | - BENCH2[d].append((argument.predefined.all(), argument.synsets.all(), ret)) | |
298 | + if DEBUG: | |
299 | + BENCH2[d].append((argument.predefined.all(), argument.synsets.all(), ret)) | |
313 | 300 | return ret |
314 | 301 | |
315 | 302 | def get_argument_lemma2(argument, xp=False): |
... | ... | @@ -705,7 +692,8 @@ def get_phrase_description(subentry, argument, position, phrase, controller_gram |
705 | 692 | t2 = datetime.datetime.now() |
706 | 693 | # deciseconds :) |
707 | 694 | d = round((t2 - t1).total_seconds() * 10) |
708 | - BENCH[d].append((subentry.entry.name, argument.role.role.role, ret[0])) | |
695 | + if DEBUG: | |
696 | + BENCH[d].append((subentry.entry.name, argument.role.role.role, ret[0])) | |
709 | 697 | return ret |
710 | 698 | |
711 | 699 | # subentry, argument: DB model objects |
... | ... | @@ -876,19 +864,6 @@ def get_phrase_priority(phrase): |
876 | 864 | # key: phrase importer object |
877 | 865 | # value: (description, gender, number) |
878 | 866 | # result: phrase description to use in the realisation description |
879 | -# TODO!! dzwonić – dwie lex(prepnp(w,loc))! | |
880 | -# TODO!! kapać – dwie lex(np(inst))! | |
881 | -# TODO!! popukać – dwie lex(prepnp(do,gen))! | |
882 | -# TODO!! przeczyć – dwie lex(np(dat))! | |
883 | -# TODO!! pukać – dwie lex(prepnp(do,gen))! | |
884 | -# TODO!! regenerować – dwie lex(np(str))! | |
885 | -# TODO!! rosić – dwie lex(np(inst))! | |
886 | -# TODO!! spychać – dwie lex(prepnp(na,acc))! | |
887 | -# TODO!! szwankować – dwie lex(prepnp(na,loc))! | |
888 | -# TODO!! wypchać – dwie lex(np(inst))! | |
889 | -# TODO!! zapukać – dwie lex(prepnp(do,gen))! | |
890 | -# TODO!! zepchnąć – dwie lex(prepnp(na,acc))! | |
891 | -# TODO!! zrosić – dwie lex(np(inst))! | |
892 | 867 | def select_phrase_description(position, phrase_descriptions): |
893 | 868 | #print(type(position)) |
894 | 869 | #print(phrase_descriptions) |
... | ... | @@ -905,16 +880,13 @@ def select_phrase_description(position, phrase_descriptions): |
905 | 880 | assert (desc[0] != '???') |
906 | 881 | return desc |
907 | 882 | else: |
908 | - # TODO? napsuć zdrowia/nerwów | |
909 | - if set(desc[0] for desc in phrase_descriptions.values()) == {'zdrowia', 'nerwów'}: | |
910 | - return ('zdrowia i nerwów', 'n', 'pl') | |
911 | - #for phrase, desc in phrase_descriptions.items(): | |
912 | - # print('***', type(phrase)) | |
913 | - # print('*** ', phrase, desc) | |
914 | - #for priority, phrases in sorted(by_priority.items()): | |
915 | - # print('===', priority) | |
916 | - # print('=== ', phrases) | |
917 | - raise RealisationDescriptionError('couldn’t select phrase description: {}'.format(' * '.join(desc[0] for desc in phrase_descriptions.values()))) | |
883 | + # all are lex phrases | |
884 | + assert(all(isinstance(p, LexPhrase) for p, d in min_priority_phrases)) | |
885 | + # all have the same grammatical type | |
886 | + assert(len(set(str(p._lex_phrase()) for p, d in min_priority_phrases)) == 1) | |
887 | + # heuristic: return first lexicographically | |
888 | + return sorted(min_priority_phrases, key=lambda x: x[1][0])[0][1] | |
889 | + #raise RealisationDescriptionError('couldn’t select phrase description: {}'.format(' * '.join(desc[0] for desc in phrase_descriptions.values()))) | |
918 | 890 | |
919 | 891 | |
920 | 892 | FUNCTION_RANK = { |
... | ... |