Commit 08bfdfc449fbd24d5e4ea612e611c59fb2291f11
1 parent
168743f7
Minor fixes.
Showing
1 changed file
with
6 additions
and
5 deletions
preparator.py
@@ -29,7 +29,7 @@ TITLE2REDIRECT_PATH = os.path.abspath(os.path.join(MAIN_PATH, 'data', 'wikipedia | @@ -29,7 +29,7 @@ TITLE2REDIRECT_PATH = os.path.abspath(os.path.join(MAIN_PATH, 'data', 'wikipedia | ||
29 | 29 | ||
30 | ANNO_PATH = TEST_PATH | 30 | ANNO_PATH = TEST_PATH |
31 | OUT_PATH = os.path.abspath(os.path.join(MAIN_PATH, 'data', | 31 | OUT_PATH = os.path.abspath(os.path.join(MAIN_PATH, 'data', |
32 | - 'test-20170720.csv')) | 32 | + 'test-1to5-20170720.csv')) |
33 | EACH_TEXT_SEPARATELLY = False | 33 | EACH_TEXT_SEPARATELLY = False |
34 | 34 | ||
35 | CONTEXT = 5 | 35 | CONTEXT = 5 |
@@ -39,8 +39,9 @@ MODEL = os.path.abspath(os.path.join(MAIN_PATH, 'models', | @@ -39,8 +39,9 @@ MODEL = os.path.abspath(os.path.join(MAIN_PATH, 'models', | ||
39 | 'w2v_allwiki_nkjpfull_%d.model' % W2V_SIZE)) | 39 | 'w2v_allwiki_nkjpfull_%d.model' % W2V_SIZE)) |
40 | 40 | ||
41 | FIRST_SECOND_PERSON = ['pri', 'sec'] | 41 | FIRST_SECOND_PERSON = ['pri', 'sec'] |
42 | -INDICATIVE_PRONS_BASES = ["ten", "ta", "to", "ci", "te", "tamten", "tamta", | ||
43 | - "tamto", "tamci", "tamte", "ów", "owa", "owo", "owi", "owe"] | 42 | +INDICATIVE_PRONS_BASES = [u'ten', u'ta', u'to', u'ci', u'te', u'tamten', u'tamta', |
43 | + u'tamto', u'tamci', u'tamte', u'ów', u'owa', u'owo', | ||
44 | + u'owi', u'owe'] | ||
44 | SIEBIE_TAGS = ['siebie'] | 45 | SIEBIE_TAGS = ['siebie'] |
45 | MASCULINE_TAGS = ['m1', 'm2', 'm3'] | 46 | MASCULINE_TAGS = ['m1', 'm2', 'm3'] |
46 | 47 | ||
@@ -50,7 +51,7 @@ ZERO_TAGS = ['fin', 'praet', 'bedzie', 'impt', 'winien', 'aglt'] | @@ -50,7 +51,7 @@ ZERO_TAGS = ['fin', 'praet', 'bedzie', 'impt', 'winien', 'aglt'] | ||
50 | POSSIBLE_HEADS = [u'§', u'%', u'*', u'"', u'„', u'&', u'-'] | 51 | POSSIBLE_HEADS = [u'§', u'%', u'*', u'"', u'„', u'&', u'-'] |
51 | HYPHEN_SIGNS = ['-', '#'] | 52 | HYPHEN_SIGNS = ['-', '#'] |
52 | 53 | ||
53 | -NEG_PROPORTION = 1 | 54 | +NEG_PROPORTION = 5 |
54 | RANDOM_VECTORS = True | 55 | RANDOM_VECTORS = True |
55 | 56 | ||
56 | DEBUG = False | 57 | DEBUG = False |
@@ -612,7 +613,7 @@ def check_one_way_acronym(acronym, expression): | @@ -612,7 +613,7 @@ def check_one_way_acronym(acronym, expression): | ||
612 | for expr2 in expr1.split(): | 613 | for expr2 in expr1.split(): |
613 | expr2 = expr2.strip() | 614 | expr2 = expr2.strip() |
614 | if expr2: | 615 | if expr2: |
615 | - initials += str(expr2[0]).upper() | 616 | + initials += expr2[0].upper() |
616 | if acronym == initials: | 617 | if acronym == initials: |
617 | return 1 | 618 | return 1 |
618 | return 0 | 619 | return 0 |