Commit 9b1356f1782dbff55917f1511a27519d8eb88240

Authored by Tomasz Bartosiak
1 parent ca3f9dba

importing examples from XML

connections/models.py
@@ -50,6 +50,7 @@ class Status(models.Model): @@ -50,6 +50,7 @@ class Status(models.Model):
50 class ExampleConnection(models.Model): 50 class ExampleConnection(models.Model):
51 example = models.ForeignKey(Example, related_name='example_connections', on_delete=models.PROTECT) 51 example = models.ForeignKey(Example, related_name='example_connections', on_delete=models.PROTECT)
52 lexical_unit = models.ForeignKey(LexicalUnit, related_name='example_connections', null=True, on_delete=models.PROTECT) 52 lexical_unit = models.ForeignKey(LexicalUnit, related_name='example_connections', null=True, on_delete=models.PROTECT)
  53 + arguments = models.ManyToManyField(Argument, related_name='example_connections')
53 schema_connections = models.ManyToManyField('SchemaHook', related_name='example_connections') 54 schema_connections = models.ManyToManyField('SchemaHook', related_name='example_connections')
54 55
55 56
examples/models.py
1 from django.db import models 1 from django.db import models
2 2
3 -  
4 class Example(models.Model): 3 class Example(models.Model):
5 entry = models.ForeignKey('connections.Entry', related_name='examples', on_delete=models.PROTECT) 4 entry = models.ForeignKey('connections.Entry', related_name='examples', on_delete=models.PROTECT)
6 sentence = models.TextField() 5 sentence = models.TextField()
7 opinion = models.ForeignKey('ExampleOpinion', related_name='examples', on_delete=models.PROTECT) 6 opinion = models.ForeignKey('ExampleOpinion', related_name='examples', on_delete=models.PROTECT)
8 source = models.ForeignKey('ExampleSource', related_name='examples', on_delete=models.PROTECT) 7 source = models.ForeignKey('ExampleSource', related_name='examples', on_delete=models.PROTECT)
9 - 8 + note = models.TextField(null=True)
  9 +
10 def __str__(self): 10 def __str__(self):
11 return self.sentence 11 return self.sentence
12 12
importer/Argument.py
@@ -31,8 +31,8 @@ class Relation: @@ -31,8 +31,8 @@ class Relation:
31 pref.save() 31 pref.save()
32 argument.relations.add(pref) 32 argument.relations.add(pref)
33 33
34 - def __unicode__(self):  
35 - return self._type + '->' + unicode(self._to._semantic_role) 34 + def __str__(self):
  35 + return self._type + '->' + str(self._to._semantic_role)
36 36
37 class SelectionalPreference: 37 class SelectionalPreference:
38 38
@@ -84,8 +84,8 @@ class SelectionalPreference: @@ -84,8 +84,8 @@ class SelectionalPreference:
84 print(type, values) 84 print(type, values)
85 raise UnknownError() 85 raise UnknownError()
86 86
87 - def __unicode__(self):  
88 - return unicode(self._value) 87 + def __str__(self):
  88 + return str(self._value)
89 89
90 90
91 class SemanticRole: 91 class SemanticRole:
@@ -106,7 +106,7 @@ class SemanticRole: @@ -106,7 +106,7 @@ class SemanticRole:
106 argument_role = semantics.models.ArgumentRole.objects.get(role=role, attribute=attribute) 106 argument_role = semantics.models.ArgumentRole.objects.get(role=role, attribute=attribute)
107 return argument_role 107 return argument_role
108 108
109 - def __unicode__(self): 109 + def __str__(self):
110 if self._attribute is None: 110 if self._attribute is None:
111 return self._value.lower() 111 return self._value.lower()
112 else: 112 else:
@@ -121,6 +121,7 @@ class Argument: @@ -121,6 +121,7 @@ class Argument:
121 self._selectional_preferences = selectional_preferences 121 self._selectional_preferences = selectional_preferences
122 self._references = references 122 self._references = references
123 self._id = id 123 self._id = id
  124 + self._db_id = None
124 125
125 @classmethod 126 @classmethod
126 def fromTree(cls, tree): 127 def fromTree(cls, tree):
@@ -158,12 +159,13 @@ class Argument: @@ -158,12 +159,13 @@ class Argument:
158 frame=frame, 159 frame=frame,
159 preferences_count=len(self._selectional_preferences)) 160 preferences_count=len(self._selectional_preferences))
160 argument.save() 161 argument.save()
  162 + self._db_id = argument.id
161 return argument 163 return argument
162 164
163 def store_preferences(self, frame, argument): 165 def store_preferences(self, frame, argument):
164 for preference in self._selectional_preferences: 166 for preference in self._selectional_preferences:
165 preference.store(frame, argument) 167 preference.store(frame, argument)
166 168
167 - def __unicode__(self):  
168 - return unicode(self._semantic_role) + '[' + ','.join([unicode(pref) for pref in self._selectional_preferences]) + ']' 169 + def __str__(self):
  170 + return str(self._semantic_role) + '[' + ','.join([str(pref) for pref in self._selectional_preferences]) + ']'
169 171
importer/Entry.py
@@ -4,23 +4,24 @@ @@ -4,23 +4,24 @@
4 from importer.Syntax import Syntax 4 from importer.Syntax import Syntax
5 from importer.Meanings import Meanings 5 from importer.Meanings import Meanings
6 from importer.Semantics import Semantics 6 from importer.Semantics import Semantics
  7 +from importer.Examples import Examples
7 from connections.models import POS, Status 8 from connections.models import POS, Status
8 import connections.models 9 import connections.models
9 10
10 class Entry: 11 class Entry:
11 12
12 - def __init__(self, entry_tree, entry_meanings, meanings, frames): 13 + def __init__(self, entry_tree, entry_meanings, meanings, frames, examples_in_data, examples_out_file, misconnected_out_file):
13 self._base = entry_tree._children[0]._children[0]._content 14 self._base = entry_tree._children[0]._children[0]._content
14 self._pos = entry_tree._children[0]._children[1]._content 15 self._pos = entry_tree._children[0]._children[1]._content
  16 + print("processing: " + self._base)
15 self._status = entry_tree._children[1]._children[0]._children[0]._content 17 self._status = entry_tree._children[1]._children[0]._children[0]._content
16 - print(self._base)  
17 self._syntax = Syntax.fromTree(entry_tree._children[2]) 18 self._syntax = Syntax.fromTree(entry_tree._children[2])
  19 + self._meanings = None
  20 + self._semantics = None
18 if len(entry_tree._children) >= 7: 21 if len(entry_tree._children) >= 7:
19 self._meanings = Meanings.fromTree(entry_tree._children[5]) 22 self._meanings = Meanings.fromTree(entry_tree._children[5])
20 self._semantics = Semantics.fromTree(self._base, self._pos, entry_tree._children[4], frames, self._meanings, self._syntax, entry_tree._children[6]) 23 self._semantics = Semantics.fromTree(self._base, self._pos, entry_tree._children[4], frames, self._meanings, self._syntax, entry_tree._children[6])
21 - else:  
22 - self._meanings = None  
23 - self._semantics = None 24 + self._examples = Examples.fromTree(entry_tree._children[3], self._syntax, self._semantics, self._base, self._meanings, examples_in_data, examples_out_file, misconnected_out_file)
24 25
25 def store(self, all_meanings, stored_positions): 26 def store(self, all_meanings, stored_positions):
26 pos = POS.objects.get(tag=self._pos) 27 pos = POS.objects.get(tag=self._pos)
@@ -31,6 +32,7 @@ class Entry: @@ -31,6 +32,7 @@ class Entry:
31 self._syntax.store(entry, stored_positions) 32 self._syntax.store(entry, stored_positions)
32 if self._semantics is not None: 33 if self._semantics is not None:
33 self._semantics.store(entry, all_meanings) 34 self._semantics.store(entry, all_meanings)
  35 + self._examples.store(entry, all_meanings)
34 36
35 def __str__(self): 37 def __str__(self):
36 return self._pos + '(' + self._base + ',' + str(self._syntax) + ').' 38 return self._pos + '(' + self._base + ',' + str(self._syntax) + ').'
importer/Frame.py
@@ -69,7 +69,8 @@ class Frame: @@ -69,7 +69,8 @@ class Frame:
69 69
70 70
71 def getSignature(self): 71 def getSignature(self):
72 - return self._base + '\t[' + ','.join([unicode(meaning) for meaning in self._meanings]) + ']' 72 + return self._base + '\t[' + ','.join([str(meaning) for meaning in self._meanings]) + ']'
  73 +
  74 + def __str__(self):
  75 + return self.getSignature() + '\t[' + ','.join([str(argument) for argument in self._arguments.values()]) + ']'
73 76
74 - def __unicode__(self):  
75 - return self.getSignature() + '\t[' + ','.join([unicode(argument) for argument in self._arguments.values()]) + ']'  
importer/Meanings.py
@@ -62,7 +62,15 @@ class Meaning: @@ -62,7 +62,15 @@ class Meaning:
62 unit.entry = entry 62 unit.entry = entry
63 unit.save() 63 unit.save()
64 frame.lexical_units.add(unit) 64 frame.lexical_units.add(unit)
65 - 65 +
  66 + def get(self):
  67 + units = LexicalUnit.objects.filter(base=self._name, sense=self._variant)
  68 + if len(units) == 0:
  69 + print(self)
  70 + raise UnknownError()
  71 + else:
  72 + return units[0]
  73 +
66 def __unicode__(self): 74 def __unicode__(self):
67 return '\'' + self._name + '\'-' + self._variant 75 return '\'' + self._name + '\'-' + self._variant
68 76
@@ -86,7 +94,7 @@ class Meanings: @@ -86,7 +94,7 @@ class Meanings:
86 return self._meanings 94 return self._meanings
87 95
88 def locate(self, meaning_id): 96 def locate(self, meaning_id):
89 - if meanings_id in self._meanings: 97 + if meaning_id in self._meanings:
90 return self._meanings[meaning_id] 98 return self._meanings[meaning_id]
91 else: 99 else:
92 return None 100 return None
importer/Position.py
@@ -112,7 +112,8 @@ class Position: @@ -112,7 +112,8 @@ class Position:
112 pred_control=position.pred_control, 112 pred_control=position.pred_control,
113 phrase_str=phrase_text).count() 113 phrase_str=phrase_text).count()
114 if desc_count == 0: 114 if desc_count == 0:
115 - desc_text = phrase_description2(phrase, self, negativity) 115 + #desc_text = phrase_description2(phrase, self, negativity)
  116 + desc_text = "tu pójdzie opis"
116 desc = NaturalLanguageDescription( 117 desc = NaturalLanguageDescription(
117 negativity=negativity, 118 negativity=negativity,
118 function=position.function, 119 function=position.function,
@@ -162,3 +163,18 @@ class Position: @@ -162,3 +163,18 @@ class Position:
162 return pre + 'obj([' + ','.join(temp) + '])' + post 163 return pre + 'obj([' + ','.join(temp) + '])' + post
163 elif self._function._value == 'head': 164 elif self._function._value == 'head':
164 return pre + 'head([' + ','.join(temp) + '])' + post 165 return pre + 'head([' + ','.join(temp) + '])' + post
  166 +
  167 + def subposition(self, phrases=None):
  168 + c = ''
  169 + if self._control is not None:
  170 + c = ','.join([control._function for control in self._control])
  171 + f = ''
  172 + if self._function is not None:
  173 + f = self._function._value
  174 + if f != '' and c != '':
  175 + f += ','
  176 + if phrases is None:
  177 + return f + c + '{' + ','.join([str(phrase) for phrase in self._phrases]) + '}'
  178 + else:
  179 + temp = [str(phrase) for phrase in self._phrases if phrase in phrases]
  180 + return f + c + '{' + ','.join(temp) + '}'
importer/Realizations.py
@@ -35,7 +35,12 @@ class ArgumentRealization: @@ -35,7 +35,12 @@ class ArgumentRealization:
35 hook.save() 35 hook.save()
36 connection.schema_connections.add(hook) 36 connection.schema_connections.add(hook)
37 37
38 - 38 + def matches(self, phrases):
  39 + for phrase in phrases:
  40 + if phrase not in self._phrases:
  41 + return False
  42 + return True
  43 +
39 class FrameRealization: 44 class FrameRealization:
40 45
41 def __init__(self, frame, schema, argument_realizations): 46 def __init__(self, frame, schema, argument_realizations):
@@ -67,5 +72,12 @@ class FrameRealization: @@ -67,5 +72,12 @@ class FrameRealization:
67 subentry = self._schema.getSubentry(entry) 72 subentry = self._schema.getSubentry(entry)
68 for ar in self._argument_realizations: 73 for ar in self._argument_realizations:
69 ar.store(subentry, frame, schema, alternation) 74 ar.store(subentry, frame, schema, alternation)
  75 +
  76 + def findMatchingArgument(self, phrases):
  77 + for ar in self._argument_realizations:
  78 + if ar.matches(phrases):
  79 + return ar._argument
  80 + return None
  81 +
70 82
71 83
importer/Semantics.py
@@ -39,6 +39,16 @@ class Semantics: @@ -39,6 +39,16 @@ class Semantics:
39 def getPhraseIds(self): 39 def getPhraseIds(self):
40 return self._phrases 40 return self._phrases
41 41
  42 + def findFrame(self, meaning):
  43 + for frame in self._frames:
  44 + if meaning in frame._meanings:
  45 + realizations = []
  46 + for realization in self._realizations:
  47 + if realization._frame._id == frame._id:
  48 + realizations.append(realization)
  49 + return frame, realizations
  50 + return None
  51 +
42 # def preferencesToUnicode(self): 52 # def preferencesToUnicode(self):
43 # return '\n'.join([unicode(frame) for frame in self._frames]) 53 # return '\n'.join([unicode(frame) for frame in self._frames])
44 54
importer/WalentyPreprocessXML.py
@@ -80,6 +80,7 @@ class WalentyPreprocessTeiHandler(handler.ContentHandler): @@ -80,6 +80,7 @@ class WalentyPreprocessTeiHandler(handler.ContentHandler):
80 for entry_data, meaning in self.meanings.values(): 80 for entry_data, meaning in self.meanings.values():
81 name, pos = entry_data 81 name, pos = entry_data
82 meaning.save(pos) 82 meaning.save(pos)
  83 + print("Stored")
83 84
84 def extend(self, base, pos, meanings, frames): 85 def extend(self, base, pos, meanings, frames):
85 self.entry_meanings[(base, pos)] = [id for id in meanings._meanings] 86 self.entry_meanings[(base, pos)] = [id for id in meanings._meanings]
importer/WalentyXML.py
@@ -4,6 +4,9 @@ @@ -4,6 +4,9 @@
4 from xml.sax import handler 4 from xml.sax import handler
5 from importer.Entry import Entry 5 from importer.Entry import Entry
6 6
  7 +examples_out_file = 'examples_ambig.txt'
  8 +misconnected_examples_out_file = 'examples_to_reattach.txt'
  9 +
7 10
8 class XMLNode: 11 class XMLNode:
9 12
@@ -37,6 +40,9 @@ class WalentyTeiHandler(handler.ContentHandler): @@ -37,6 +40,9 @@ class WalentyTeiHandler(handler.ContentHandler):
37 self._meanings = meanings 40 self._meanings = meanings
38 self._frames = frames 41 self._frames = frames
39 self._stored_positions = {} 42 self._stored_positions = {}
  43 + self._examples_in = None # @TODO: read disambiguated file
  44 + self._examples_out = open(examples_out_file, "w")
  45 + self._misconnected_out = open(misconnected_examples_out_file, "w")
40 46
41 def startElement(self, name, attrs): 47 def startElement(self, name, attrs):
42 if name == 'date': 48 if name == 'date':
@@ -61,7 +67,7 @@ class WalentyTeiHandler(handler.ContentHandler): @@ -61,7 +67,7 @@ class WalentyTeiHandler(handler.ContentHandler):
61 if name == 'entry': 67 if name == 'entry':
62 if self._current is not None: 68 if self._current is not None:
63 raise TEIStructureError() 69 raise TEIStructureError()
64 - entry = Entry(self._subtree, self._entry_meanings, self._meanings, self._frames) 70 + entry = Entry(self._subtree, self._entry_meanings, self._meanings, self._frames, self._examples_in, self._examples_out, self._misconnected_out)
65 entry.store(self._meanings, self._stored_positions) 71 entry.store(self._meanings, self._stored_positions)
66 self._content = '' 72 self._content = ''
67 else: 73 else:
@@ -78,5 +84,6 @@ class WalentyTeiHandler(handler.ContentHandler): @@ -78,5 +84,6 @@ class WalentyTeiHandler(handler.ContentHandler):
78 self._content += content 84 self._content += content
79 85
80 def endDocument(self): 86 def endDocument(self):
81 - pass 87 + self._examples_out.close()
  88 + self._misconnected_out.close()
82 89
reset_db.sh
@@ -4,3 +4,5 @@ dropdb shellvalier @@ -4,3 +4,5 @@ dropdb shellvalier
4 createdb shellvalier -E UTF8 -T template0 -l pl_PL.utf8 4 createdb shellvalier -E UTF8 -T template0 -l pl_PL.utf8
5 python3 manage.py migrate 5 python3 manage.py migrate
6 python3 manage.py import_plWordnet 6 python3 manage.py import_plWordnet
  7 +python3 manage.py import_tei
  8 +
syntax/management/commands/import_tei.py
@@ -9,6 +9,7 @@ from importer.WalentyXML import WalentyTeiHandler @@ -9,6 +9,7 @@ from importer.WalentyXML import WalentyTeiHandler
9 from importer.WalentyPreprocessXML import WalentyPreprocessTeiHandler 9 from importer.WalentyPreprocessXML import WalentyPreprocessTeiHandler
10 from shellvalier.settings import BASE_DIR 10 from shellvalier.settings import BASE_DIR
11 from connections.models import POS, Status 11 from connections.models import POS, Status
  12 +from examples.models import ExampleOpinion, ExampleSource
12 from syntax.models import SchemaOpinion, Aspect, InherentSie, Negativity, Predicativity, SyntacticFunction, Control, PredicativeControl 13 from syntax.models import SchemaOpinion, Aspect, InherentSie, Negativity, Predicativity, SyntacticFunction, Control, PredicativeControl
13 from semantics.models import FrameOpinion, ArgumentRole, SemanticRole, RoleAttribute, PredefinedSelectionalPreference, SelectionalPreferenceRelation 14 from semantics.models import FrameOpinion, ArgumentRole, SemanticRole, RoleAttribute, PredefinedSelectionalPreference, SelectionalPreferenceRelation
14 15
@@ -54,6 +55,8 @@ def import_constants(): @@ -54,6 +55,8 @@ def import_constants():
54 import_semantic_roles() 55 import_semantic_roles()
55 import_predefined_preferences() 56 import_predefined_preferences()
56 import_preference_relations() 57 import_preference_relations()
  58 + import_examples_sources()
  59 + import_examples_opinions()
57 pass 60 pass
58 61
59 def import_poses(): 62 def import_poses():
@@ -149,5 +152,15 @@ def import_preference_relations(): @@ -149,5 +152,15 @@ def import_preference_relations():
149 relat = SelectionalPreferenceRelation(plwn_id=id, key=name) 152 relat = SelectionalPreferenceRelation(plwn_id=id, key=name)
150 relat.save() 153 relat.save()
151 154
152 - 155 +def import_examples_sources():
  156 + sources = [(0, u'NKJP0.5M'), (1, u'NKJP1.2M'), (2, u'NKJP30M'), (3, u'NKJP250M'), (4, u'NKJP300M'), (5, u'NKJP500M'), (6, u'NKJP1800M'), (7, u'linguistic_literature'), (8, u'other_literature'), (9, u'own')]
  157 + for pri, name in sources:
  158 + es = ExampleSource(key=name, priority=pri)
  159 + es.save()
  160 +
  161 +def import_examples_opinions():
  162 + opinions = [(0, 'zły'), (1, 'wątpliwy'), (2, 'dobry')]
  163 + for pri, name in opinions:
  164 + eo = ExampleOpinion(key=name, priority=pri)
  165 + eo.save()
153 166