Commit 08bfdfc449fbd24d5e4ea612e611c59fb2291f11

Authored by Bartłomiej Nitoń
1 parent 168743f7

Minor fixes.

Showing 1 changed file with 6 additions and 5 deletions
preparator.py
@@ -29,7 +29,7 @@ TITLE2REDIRECT_PATH = os.path.abspath(os.path.join(MAIN_PATH, 'data', 'wikipedia @@ -29,7 +29,7 @@ TITLE2REDIRECT_PATH = os.path.abspath(os.path.join(MAIN_PATH, 'data', 'wikipedia
29 29
30 ANNO_PATH = TEST_PATH 30 ANNO_PATH = TEST_PATH
31 OUT_PATH = os.path.abspath(os.path.join(MAIN_PATH, 'data', 31 OUT_PATH = os.path.abspath(os.path.join(MAIN_PATH, 'data',
32 - 'test-20170720.csv')) 32 + 'test-1to5-20170720.csv'))
33 EACH_TEXT_SEPARATELLY = False 33 EACH_TEXT_SEPARATELLY = False
34 34
35 CONTEXT = 5 35 CONTEXT = 5
@@ -39,8 +39,9 @@ MODEL = os.path.abspath(os.path.join(MAIN_PATH, 'models', @@ -39,8 +39,9 @@ MODEL = os.path.abspath(os.path.join(MAIN_PATH, 'models',
39 'w2v_allwiki_nkjpfull_%d.model' % W2V_SIZE)) 39 'w2v_allwiki_nkjpfull_%d.model' % W2V_SIZE))
40 40
41 FIRST_SECOND_PERSON = ['pri', 'sec'] 41 FIRST_SECOND_PERSON = ['pri', 'sec']
42 -INDICATIVE_PRONS_BASES = ["ten", "ta", "to", "ci", "te", "tamten", "tamta",  
43 - "tamto", "tamci", "tamte", "ów", "owa", "owo", "owi", "owe"] 42 +INDICATIVE_PRONS_BASES = [u'ten', u'ta', u'to', u'ci', u'te', u'tamten', u'tamta',
  43 + u'tamto', u'tamci', u'tamte', u'ów', u'owa', u'owo',
  44 + u'owi', u'owe']
44 SIEBIE_TAGS = ['siebie'] 45 SIEBIE_TAGS = ['siebie']
45 MASCULINE_TAGS = ['m1', 'm2', 'm3'] 46 MASCULINE_TAGS = ['m1', 'm2', 'm3']
46 47
@@ -50,7 +51,7 @@ ZERO_TAGS = ['fin', 'praet', 'bedzie', 'impt', 'winien', 'aglt'] @@ -50,7 +51,7 @@ ZERO_TAGS = ['fin', 'praet', 'bedzie', 'impt', 'winien', 'aglt']
50 POSSIBLE_HEADS = [u'§', u'%', u'*', u'"', u'„', u'&', u'-'] 51 POSSIBLE_HEADS = [u'§', u'%', u'*', u'"', u'„', u'&', u'-']
51 HYPHEN_SIGNS = ['-', '#'] 52 HYPHEN_SIGNS = ['-', '#']
52 53
53 -NEG_PROPORTION = 1 54 +NEG_PROPORTION = 5
54 RANDOM_VECTORS = True 55 RANDOM_VECTORS = True
55 56
56 DEBUG = False 57 DEBUG = False
@@ -612,7 +613,7 @@ def check_one_way_acronym(acronym, expression): @@ -612,7 +613,7 @@ def check_one_way_acronym(acronym, expression):
612 for expr2 in expr1.split(): 613 for expr2 in expr1.split():
613 expr2 = expr2.strip() 614 expr2 = expr2.strip()
614 if expr2: 615 if expr2:
615 - initials += str(expr2[0]).upper() 616 + initials += expr2[0].upper()
616 if acronym == initials: 617 if acronym == initials:
617 return 1 618 return 1
618 return 0 619 return 0