Commit 08bfdfc449fbd24d5e4ea612e611c59fb2291f11
1 parent
168743f7
Minor fixes.
Showing
1 changed file
with
6 additions
and
5 deletions
preparator.py
... | ... | @@ -29,7 +29,7 @@ TITLE2REDIRECT_PATH = os.path.abspath(os.path.join(MAIN_PATH, 'data', 'wikipedia |
29 | 29 | |
30 | 30 | ANNO_PATH = TEST_PATH |
31 | 31 | OUT_PATH = os.path.abspath(os.path.join(MAIN_PATH, 'data', |
32 | - 'test-20170720.csv')) | |
32 | + 'test-1to5-20170720.csv')) | |
33 | 33 | EACH_TEXT_SEPARATELLY = False |
34 | 34 | |
35 | 35 | CONTEXT = 5 |
... | ... | @@ -39,8 +39,9 @@ MODEL = os.path.abspath(os.path.join(MAIN_PATH, 'models', |
39 | 39 | 'w2v_allwiki_nkjpfull_%d.model' % W2V_SIZE)) |
40 | 40 | |
41 | 41 | FIRST_SECOND_PERSON = ['pri', 'sec'] |
42 | -INDICATIVE_PRONS_BASES = ["ten", "ta", "to", "ci", "te", "tamten", "tamta", | |
43 | - "tamto", "tamci", "tamte", "ów", "owa", "owo", "owi", "owe"] | |
42 | +INDICATIVE_PRONS_BASES = [u'ten', u'ta', u'to', u'ci', u'te', u'tamten', u'tamta', | |
43 | + u'tamto', u'tamci', u'tamte', u'ów', u'owa', u'owo', | |
44 | + u'owi', u'owe'] | |
44 | 45 | SIEBIE_TAGS = ['siebie'] |
45 | 46 | MASCULINE_TAGS = ['m1', 'm2', 'm3'] |
46 | 47 | |
... | ... | @@ -50,7 +51,7 @@ ZERO_TAGS = ['fin', 'praet', 'bedzie', 'impt', 'winien', 'aglt'] |
50 | 51 | POSSIBLE_HEADS = [u'§', u'%', u'*', u'"', u'„', u'&', u'-'] |
51 | 52 | HYPHEN_SIGNS = ['-', '#'] |
52 | 53 | |
53 | -NEG_PROPORTION = 1 | |
54 | +NEG_PROPORTION = 5 | |
54 | 55 | RANDOM_VECTORS = True |
55 | 56 | |
56 | 57 | DEBUG = False |
... | ... | @@ -612,7 +613,7 @@ def check_one_way_acronym(acronym, expression): |
612 | 613 | for expr2 in expr1.split(): |
613 | 614 | expr2 = expr2.strip() |
614 | 615 | if expr2: |
615 | - initials += str(expr2[0]).upper() | |
616 | + initials += expr2[0].upper() | |
616 | 617 | if acronym == initials: |
617 | 618 | return 1 |
618 | 619 | return 0 |
... | ... |