Commit 7a4d45b1265ac142c8642221ef9241f907c1500d

Authored by Tomasz Bartosiak
1 parent 355b3ade

Changed frame similarity measuring

semantics/management/commands/frame_compare_0.py
... ... @@ -25,21 +25,25 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
42   - print len(frames)
  46 + print " ...done"
43 47  
44 48 # LOCAL DATABASE CONNECTION
45 49 session, TT_dict = get_db_data()
... ...
semantics/management/commands/frame_compare_1.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 1
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_10.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 10
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_11.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 11
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_12.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 12
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_13.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 13
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_14.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 14
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_15.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 15
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_2.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 2
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_3.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 3
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_4.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 4
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_5.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 5
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_6.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 6
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_7.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 7
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_8.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 8
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_9.py
... ... @@ -17,7 +17,7 @@ from settings import PROJECT_PATH
17 17  
18 18 BUNDLE_SIZE = 50
19 19 PROCESSES_NO = 16
20   -PROCESS_ID = 9
  20 +PROCESS_ID = 0
21 21  
22 22 class Command(BaseCommand):
23 23 args = 'none'
... ... @@ -25,20 +25,26 @@ class Command(BaseCommand):
25 25  
26 26 def handle(self, **options):
27 27 # SPLIT FRAMES IN BUNLES
  28 + print "splitting frames in bundles..."
28 29 frames = SemanticFrame.objects.filter(next=None, removed=False, entry__isnull=False).order_by('id')
29 30 frame_bundles = []
30 31 frame_bundle = []
31 32 i = 0 # frame_bundle size
32 33 for frame in frames:
33   - frame_bundle.append(frame)
34   - i += 1
35   - if i == BUNDLE_SIZE:
36   - frame_bundles.append(frame_bundle)
37   - frame_bundle = []
38   - i = 0
  34 + if frame.entry.actual_lemma().status.priority >= 90: # (S)ready / (S)validated
  35 + lus = frame.lexical_units.all()
  36 + if len(lus) > 0 and\
  37 + max([lu.luid for lu in lus]) >=0:
  38 + frame_bundle.append(frame)
  39 + i += 1
  40 + if i == BUNDLE_SIZE:
  41 + frame_bundles.append(frame_bundle)
  42 + frame_bundle = []
  43 + i = 0
39 44 if i != 0:
40 45 frame_bundles.append(frame_bundle)
41   -
  46 + print " ...done"
  47 +
42 48 # LOCAL DATABASE CONNECTION
43 49 session, TT_dict = get_db_data()
44 50 LexicalUnit._session = session
... ... @@ -54,13 +60,13 @@ class Command(BaseCommand):
54 60 f = Frame.from_slowal(frame)
55 61 frames1.append(f)
56 62 if i == j:
57   - match_frames_diagonal(frames1, session, TT_dict) # , verbose=True, fake=True)
  63 + match_frames_diagonal(frames1, session, TT_dict)# , verbose=True, fake=True)
58 64 else:
59 65 frames2 = []
60 66 for frame in frame_bundles[j]:
61 67 f = Frame.from_slowal(frame)
62 68 frames2.append(f)
63   - match_frames(frames1, frames2, session, TT_dict) # , verbose=True, fake=True)
  69 + match_frames(frames1, frames2, session, TT_dict)# , verbose=True, fake=True)
64 70 c += 1
65 71  
66 72  
... ...
semantics/management/commands/frame_compare_modules/match_frames.py
... ... @@ -36,6 +36,15 @@ cut_value[u'Attribute'] = -np.log(0.8)
36 36 cut_value[u'Measure'] = -np.log(0.8)
37 37 cut_value[u'Lemma'] = -np.log(1.0)
38 38  
  39 +arg_rank = defaultdict(lambda: 0)
  40 +arg_rank[u'Time'] = 1
  41 +arg_rank[u'Location'] = 1
  42 +arg_rank[u'Path'] = 1
  43 +arg_rank[u'Attribute'] = 1
  44 +arg_rank[u'Measure'] = 1
  45 +arg_rank[u'Lemma'] = 1
  46 +
  47 +
39 48 def find_max_arg_matching_value(label, arglist1, arglist2, selprefs_table):
40 49 s1 = len(arglist1)
41 50 s2 = len(arglist2)
... ... @@ -47,13 +56,14 @@ def find_max_arg_matching_value(label, arglist1, arglist2, selprefs_table):
47 56 cut = s2
48 57 ans_pos = hungarian_algorithm(array.copy())
49 58 result = 0
  59 + missing = [0, 0, 0]
50 60 for i, j in ans_pos:
51 61 # i and j are matched
52 62 if i < cut:
53 63 result += array[i][j]
54 64 else:
55   - result += cut_value[label]
56   - return result
  65 + missing[arg_rank[label]] += 1
  66 + return result, tuple(missing)
57 67  
58 68 def match_undefined_preferences_and_max_match_the_rest(label, arglist1, arglist2, selprefs_table):
59 69 selprefs1 = copy(arglist1)
... ... @@ -74,18 +84,35 @@ def match_undefined_preferences_and_max_match_the_rest(label, arglist1, arglist2
74 84 del selprefs2[j]
75 85 # i and j are matched
76 86 if len(selprefs1) == 0 and len(selprefs2) == 0:
77   - return 1.0
  87 + return 1.0, (0, 0, 0)
78 88 else:
79 89 return find_max_arg_matching_value(label, selprefs1, selprefs2, selprefs_table)
80   -
81   -
  90 +
  91 +
  92 +misses_coefficient = defaultdict(lambda: -np.log(0.1))
  93 +misses_coefficient[0.0] = -np.log(1.0)
  94 +misses_coefficient[1.0/3] = -np.log(0.99)
  95 +misses_coefficient[2.0/3] = -np.log(0.97)
  96 +misses_coefficient[1.0] = -np.log(0.95)
  97 +misses_coefficient[4.0/3] = -np.log(0.92)
  98 +misses_coefficient[5.0/3] = -np.log(0.9)
  99 +misses_coefficient[2.0] = -np.log(0.5)
  100 +
82 101 def find_matching_value(frame1, frame2, selprefs_table):
83 102 labels = set(frame1.get_role_labels()) | set(frame2.get_role_labels())
84 103 tmp = 0
  104 + missing0 = 0
  105 + missing1 = 0
  106 + missing2 = 0
85 107 for label in sorted(labels):
86   - # tmp += find_max_arg_matching_value(label, frame1.get_arguments(label), frame2.get_arguments(label), selprefs_table)
87   - tmp += match_undefined_preferences_and_max_match_the_rest(label, frame1.get_arguments(label), frame2.get_arguments(label), selprefs_table)
88   - return np.exp(-tmp)
  108 + val, (m0, m1, m2) = match_undefined_preferences_and_max_match_the_rest(label, frame1.get_arguments(label), frame2.get_arguments(label), selprefs_table)
  109 + tmp += val
  110 + missing0 += m0
  111 + missing1 += m1
  112 + missing2 += m2
  113 + m = missing0 + (missing1 * 1.0) / 3
  114 + res = tmp + misses_coefficient[m]
  115 + return np.exp(-res)
89 116  
90 117 def match_transformed_frames(frame1, frame2, rule, selprefs_table):
91 118 v = find_matching_value(frame1, frame2, selprefs_table)
... ...