Commit 9b1356f1782dbff55917f1511a27519d8eb88240
1 parent
ca3f9dba
importing examples from XML
Showing
13 changed files
with
99 additions
and
24 deletions
connections/models.py
@@ -50,6 +50,7 @@ class Status(models.Model): | @@ -50,6 +50,7 @@ class Status(models.Model): | ||
50 | class ExampleConnection(models.Model): | 50 | class ExampleConnection(models.Model): |
51 | example = models.ForeignKey(Example, related_name='example_connections', on_delete=models.PROTECT) | 51 | example = models.ForeignKey(Example, related_name='example_connections', on_delete=models.PROTECT) |
52 | lexical_unit = models.ForeignKey(LexicalUnit, related_name='example_connections', null=True, on_delete=models.PROTECT) | 52 | lexical_unit = models.ForeignKey(LexicalUnit, related_name='example_connections', null=True, on_delete=models.PROTECT) |
53 | + arguments = models.ManyToManyField(Argument, related_name='example_connections') | ||
53 | schema_connections = models.ManyToManyField('SchemaHook', related_name='example_connections') | 54 | schema_connections = models.ManyToManyField('SchemaHook', related_name='example_connections') |
54 | 55 | ||
55 | 56 |
examples/models.py
1 | from django.db import models | 1 | from django.db import models |
2 | 2 | ||
3 | - | ||
4 | class Example(models.Model): | 3 | class Example(models.Model): |
5 | entry = models.ForeignKey('connections.Entry', related_name='examples', on_delete=models.PROTECT) | 4 | entry = models.ForeignKey('connections.Entry', related_name='examples', on_delete=models.PROTECT) |
6 | sentence = models.TextField() | 5 | sentence = models.TextField() |
7 | opinion = models.ForeignKey('ExampleOpinion', related_name='examples', on_delete=models.PROTECT) | 6 | opinion = models.ForeignKey('ExampleOpinion', related_name='examples', on_delete=models.PROTECT) |
8 | source = models.ForeignKey('ExampleSource', related_name='examples', on_delete=models.PROTECT) | 7 | source = models.ForeignKey('ExampleSource', related_name='examples', on_delete=models.PROTECT) |
9 | - | 8 | + note = models.TextField(null=True) |
9 | + | ||
10 | def __str__(self): | 10 | def __str__(self): |
11 | return self.sentence | 11 | return self.sentence |
12 | 12 |
importer/Argument.py
@@ -31,8 +31,8 @@ class Relation: | @@ -31,8 +31,8 @@ class Relation: | ||
31 | pref.save() | 31 | pref.save() |
32 | argument.relations.add(pref) | 32 | argument.relations.add(pref) |
33 | 33 | ||
34 | - def __unicode__(self): | ||
35 | - return self._type + '->' + unicode(self._to._semantic_role) | 34 | + def __str__(self): |
35 | + return self._type + '->' + str(self._to._semantic_role) | ||
36 | 36 | ||
37 | class SelectionalPreference: | 37 | class SelectionalPreference: |
38 | 38 | ||
@@ -84,8 +84,8 @@ class SelectionalPreference: | @@ -84,8 +84,8 @@ class SelectionalPreference: | ||
84 | print(type, values) | 84 | print(type, values) |
85 | raise UnknownError() | 85 | raise UnknownError() |
86 | 86 | ||
87 | - def __unicode__(self): | ||
88 | - return unicode(self._value) | 87 | + def __str__(self): |
88 | + return str(self._value) | ||
89 | 89 | ||
90 | 90 | ||
91 | class SemanticRole: | 91 | class SemanticRole: |
@@ -106,7 +106,7 @@ class SemanticRole: | @@ -106,7 +106,7 @@ class SemanticRole: | ||
106 | argument_role = semantics.models.ArgumentRole.objects.get(role=role, attribute=attribute) | 106 | argument_role = semantics.models.ArgumentRole.objects.get(role=role, attribute=attribute) |
107 | return argument_role | 107 | return argument_role |
108 | 108 | ||
109 | - def __unicode__(self): | 109 | + def __str__(self): |
110 | if self._attribute is None: | 110 | if self._attribute is None: |
111 | return self._value.lower() | 111 | return self._value.lower() |
112 | else: | 112 | else: |
@@ -121,6 +121,7 @@ class Argument: | @@ -121,6 +121,7 @@ class Argument: | ||
121 | self._selectional_preferences = selectional_preferences | 121 | self._selectional_preferences = selectional_preferences |
122 | self._references = references | 122 | self._references = references |
123 | self._id = id | 123 | self._id = id |
124 | + self._db_id = None | ||
124 | 125 | ||
125 | @classmethod | 126 | @classmethod |
126 | def fromTree(cls, tree): | 127 | def fromTree(cls, tree): |
@@ -158,12 +159,13 @@ class Argument: | @@ -158,12 +159,13 @@ class Argument: | ||
158 | frame=frame, | 159 | frame=frame, |
159 | preferences_count=len(self._selectional_preferences)) | 160 | preferences_count=len(self._selectional_preferences)) |
160 | argument.save() | 161 | argument.save() |
162 | + self._db_id = argument.id | ||
161 | return argument | 163 | return argument |
162 | 164 | ||
163 | def store_preferences(self, frame, argument): | 165 | def store_preferences(self, frame, argument): |
164 | for preference in self._selectional_preferences: | 166 | for preference in self._selectional_preferences: |
165 | preference.store(frame, argument) | 167 | preference.store(frame, argument) |
166 | 168 | ||
167 | - def __unicode__(self): | ||
168 | - return unicode(self._semantic_role) + '[' + ','.join([unicode(pref) for pref in self._selectional_preferences]) + ']' | 169 | + def __str__(self): |
170 | + return str(self._semantic_role) + '[' + ','.join([str(pref) for pref in self._selectional_preferences]) + ']' | ||
169 | 171 |
importer/Entry.py
@@ -4,23 +4,24 @@ | @@ -4,23 +4,24 @@ | ||
4 | from importer.Syntax import Syntax | 4 | from importer.Syntax import Syntax |
5 | from importer.Meanings import Meanings | 5 | from importer.Meanings import Meanings |
6 | from importer.Semantics import Semantics | 6 | from importer.Semantics import Semantics |
7 | +from importer.Examples import Examples | ||
7 | from connections.models import POS, Status | 8 | from connections.models import POS, Status |
8 | import connections.models | 9 | import connections.models |
9 | 10 | ||
10 | class Entry: | 11 | class Entry: |
11 | 12 | ||
12 | - def __init__(self, entry_tree, entry_meanings, meanings, frames): | 13 | + def __init__(self, entry_tree, entry_meanings, meanings, frames, examples_in_data, examples_out_file, misconnected_out_file): |
13 | self._base = entry_tree._children[0]._children[0]._content | 14 | self._base = entry_tree._children[0]._children[0]._content |
14 | self._pos = entry_tree._children[0]._children[1]._content | 15 | self._pos = entry_tree._children[0]._children[1]._content |
16 | + print("processing: " + self._base) | ||
15 | self._status = entry_tree._children[1]._children[0]._children[0]._content | 17 | self._status = entry_tree._children[1]._children[0]._children[0]._content |
16 | - print(self._base) | ||
17 | self._syntax = Syntax.fromTree(entry_tree._children[2]) | 18 | self._syntax = Syntax.fromTree(entry_tree._children[2]) |
19 | + self._meanings = None | ||
20 | + self._semantics = None | ||
18 | if len(entry_tree._children) >= 7: | 21 | if len(entry_tree._children) >= 7: |
19 | self._meanings = Meanings.fromTree(entry_tree._children[5]) | 22 | self._meanings = Meanings.fromTree(entry_tree._children[5]) |
20 | self._semantics = Semantics.fromTree(self._base, self._pos, entry_tree._children[4], frames, self._meanings, self._syntax, entry_tree._children[6]) | 23 | self._semantics = Semantics.fromTree(self._base, self._pos, entry_tree._children[4], frames, self._meanings, self._syntax, entry_tree._children[6]) |
21 | - else: | ||
22 | - self._meanings = None | ||
23 | - self._semantics = None | 24 | + self._examples = Examples.fromTree(entry_tree._children[3], self._syntax, self._semantics, self._base, self._meanings, examples_in_data, examples_out_file, misconnected_out_file) |
24 | 25 | ||
25 | def store(self, all_meanings, stored_positions): | 26 | def store(self, all_meanings, stored_positions): |
26 | pos = POS.objects.get(tag=self._pos) | 27 | pos = POS.objects.get(tag=self._pos) |
@@ -31,6 +32,7 @@ class Entry: | @@ -31,6 +32,7 @@ class Entry: | ||
31 | self._syntax.store(entry, stored_positions) | 32 | self._syntax.store(entry, stored_positions) |
32 | if self._semantics is not None: | 33 | if self._semantics is not None: |
33 | self._semantics.store(entry, all_meanings) | 34 | self._semantics.store(entry, all_meanings) |
35 | + self._examples.store(entry, all_meanings) | ||
34 | 36 | ||
35 | def __str__(self): | 37 | def __str__(self): |
36 | return self._pos + '(' + self._base + ',' + str(self._syntax) + ').' | 38 | return self._pos + '(' + self._base + ',' + str(self._syntax) + ').' |
importer/Frame.py
@@ -69,7 +69,8 @@ class Frame: | @@ -69,7 +69,8 @@ class Frame: | ||
69 | 69 | ||
70 | 70 | ||
71 | def getSignature(self): | 71 | def getSignature(self): |
72 | - return self._base + '\t[' + ','.join([unicode(meaning) for meaning in self._meanings]) + ']' | 72 | + return self._base + '\t[' + ','.join([str(meaning) for meaning in self._meanings]) + ']' |
73 | + | ||
74 | + def __str__(self): | ||
75 | + return self.getSignature() + '\t[' + ','.join([str(argument) for argument in self._arguments.values()]) + ']' | ||
73 | 76 | ||
74 | - def __unicode__(self): | ||
75 | - return self.getSignature() + '\t[' + ','.join([unicode(argument) for argument in self._arguments.values()]) + ']' |
importer/Meanings.py
@@ -62,7 +62,15 @@ class Meaning: | @@ -62,7 +62,15 @@ class Meaning: | ||
62 | unit.entry = entry | 62 | unit.entry = entry |
63 | unit.save() | 63 | unit.save() |
64 | frame.lexical_units.add(unit) | 64 | frame.lexical_units.add(unit) |
65 | - | 65 | + |
66 | + def get(self): | ||
67 | + units = LexicalUnit.objects.filter(base=self._name, sense=self._variant) | ||
68 | + if len(units) == 0: | ||
69 | + print(self) | ||
70 | + raise UnknownError() | ||
71 | + else: | ||
72 | + return units[0] | ||
73 | + | ||
66 | def __unicode__(self): | 74 | def __unicode__(self): |
67 | return '\'' + self._name + '\'-' + self._variant | 75 | return '\'' + self._name + '\'-' + self._variant |
68 | 76 | ||
@@ -86,7 +94,7 @@ class Meanings: | @@ -86,7 +94,7 @@ class Meanings: | ||
86 | return self._meanings | 94 | return self._meanings |
87 | 95 | ||
88 | def locate(self, meaning_id): | 96 | def locate(self, meaning_id): |
89 | - if meanings_id in self._meanings: | 97 | + if meaning_id in self._meanings: |
90 | return self._meanings[meaning_id] | 98 | return self._meanings[meaning_id] |
91 | else: | 99 | else: |
92 | return None | 100 | return None |
importer/Position.py
@@ -112,7 +112,8 @@ class Position: | @@ -112,7 +112,8 @@ class Position: | ||
112 | pred_control=position.pred_control, | 112 | pred_control=position.pred_control, |
113 | phrase_str=phrase_text).count() | 113 | phrase_str=phrase_text).count() |
114 | if desc_count == 0: | 114 | if desc_count == 0: |
115 | - desc_text = phrase_description2(phrase, self, negativity) | 115 | + #desc_text = phrase_description2(phrase, self, negativity) |
116 | + desc_text = "tu pójdzie opis" | ||
116 | desc = NaturalLanguageDescription( | 117 | desc = NaturalLanguageDescription( |
117 | negativity=negativity, | 118 | negativity=negativity, |
118 | function=position.function, | 119 | function=position.function, |
@@ -162,3 +163,18 @@ class Position: | @@ -162,3 +163,18 @@ class Position: | ||
162 | return pre + 'obj([' + ','.join(temp) + '])' + post | 163 | return pre + 'obj([' + ','.join(temp) + '])' + post |
163 | elif self._function._value == 'head': | 164 | elif self._function._value == 'head': |
164 | return pre + 'head([' + ','.join(temp) + '])' + post | 165 | return pre + 'head([' + ','.join(temp) + '])' + post |
166 | + | ||
167 | + def subposition(self, phrases=None): | ||
168 | + c = '' | ||
169 | + if self._control is not None: | ||
170 | + c = ','.join([control._function for control in self._control]) | ||
171 | + f = '' | ||
172 | + if self._function is not None: | ||
173 | + f = self._function._value | ||
174 | + if f != '' and c != '': | ||
175 | + f += ',' | ||
176 | + if phrases is None: | ||
177 | + return f + c + '{' + ','.join([str(phrase) for phrase in self._phrases]) + '}' | ||
178 | + else: | ||
179 | + temp = [str(phrase) for phrase in self._phrases if phrase in phrases] | ||
180 | + return f + c + '{' + ','.join(temp) + '}' |
importer/Realizations.py
@@ -35,7 +35,12 @@ class ArgumentRealization: | @@ -35,7 +35,12 @@ class ArgumentRealization: | ||
35 | hook.save() | 35 | hook.save() |
36 | connection.schema_connections.add(hook) | 36 | connection.schema_connections.add(hook) |
37 | 37 | ||
38 | - | 38 | + def matches(self, phrases): |
39 | + for phrase in phrases: | ||
40 | + if phrase not in self._phrases: | ||
41 | + return False | ||
42 | + return True | ||
43 | + | ||
39 | class FrameRealization: | 44 | class FrameRealization: |
40 | 45 | ||
41 | def __init__(self, frame, schema, argument_realizations): | 46 | def __init__(self, frame, schema, argument_realizations): |
@@ -67,5 +72,12 @@ class FrameRealization: | @@ -67,5 +72,12 @@ class FrameRealization: | ||
67 | subentry = self._schema.getSubentry(entry) | 72 | subentry = self._schema.getSubentry(entry) |
68 | for ar in self._argument_realizations: | 73 | for ar in self._argument_realizations: |
69 | ar.store(subentry, frame, schema, alternation) | 74 | ar.store(subentry, frame, schema, alternation) |
75 | + | ||
76 | + def findMatchingArgument(self, phrases): | ||
77 | + for ar in self._argument_realizations: | ||
78 | + if ar.matches(phrases): | ||
79 | + return ar._argument | ||
80 | + return None | ||
81 | + | ||
70 | 82 | ||
71 | 83 |
importer/Semantics.py
@@ -39,6 +39,16 @@ class Semantics: | @@ -39,6 +39,16 @@ class Semantics: | ||
39 | def getPhraseIds(self): | 39 | def getPhraseIds(self): |
40 | return self._phrases | 40 | return self._phrases |
41 | 41 | ||
42 | + def findFrame(self, meaning): | ||
43 | + for frame in self._frames: | ||
44 | + if meaning in frame._meanings: | ||
45 | + realizations = [] | ||
46 | + for realization in self._realizations: | ||
47 | + if realization._frame._id == frame._id: | ||
48 | + realizations.append(realization) | ||
49 | + return frame, realizations | ||
50 | + return None | ||
51 | + | ||
42 | # def preferencesToUnicode(self): | 52 | # def preferencesToUnicode(self): |
43 | # return '\n'.join([unicode(frame) for frame in self._frames]) | 53 | # return '\n'.join([unicode(frame) for frame in self._frames]) |
44 | 54 |
importer/WalentyPreprocessXML.py
@@ -80,6 +80,7 @@ class WalentyPreprocessTeiHandler(handler.ContentHandler): | @@ -80,6 +80,7 @@ class WalentyPreprocessTeiHandler(handler.ContentHandler): | ||
80 | for entry_data, meaning in self.meanings.values(): | 80 | for entry_data, meaning in self.meanings.values(): |
81 | name, pos = entry_data | 81 | name, pos = entry_data |
82 | meaning.save(pos) | 82 | meaning.save(pos) |
83 | + print("Stored") | ||
83 | 84 | ||
84 | def extend(self, base, pos, meanings, frames): | 85 | def extend(self, base, pos, meanings, frames): |
85 | self.entry_meanings[(base, pos)] = [id for id in meanings._meanings] | 86 | self.entry_meanings[(base, pos)] = [id for id in meanings._meanings] |
importer/WalentyXML.py
@@ -4,6 +4,9 @@ | @@ -4,6 +4,9 @@ | ||
4 | from xml.sax import handler | 4 | from xml.sax import handler |
5 | from importer.Entry import Entry | 5 | from importer.Entry import Entry |
6 | 6 | ||
7 | +examples_out_file = 'examples_ambig.txt' | ||
8 | +misconnected_examples_out_file = 'examples_to_reattach.txt' | ||
9 | + | ||
7 | 10 | ||
8 | class XMLNode: | 11 | class XMLNode: |
9 | 12 | ||
@@ -37,6 +40,9 @@ class WalentyTeiHandler(handler.ContentHandler): | @@ -37,6 +40,9 @@ class WalentyTeiHandler(handler.ContentHandler): | ||
37 | self._meanings = meanings | 40 | self._meanings = meanings |
38 | self._frames = frames | 41 | self._frames = frames |
39 | self._stored_positions = {} | 42 | self._stored_positions = {} |
43 | + self._examples_in = None # @TODO: read disambiguated file | ||
44 | + self._examples_out = open(examples_out_file, "w") | ||
45 | + self._misconnected_out = open(misconnected_examples_out_file, "w") | ||
40 | 46 | ||
41 | def startElement(self, name, attrs): | 47 | def startElement(self, name, attrs): |
42 | if name == 'date': | 48 | if name == 'date': |
@@ -61,7 +67,7 @@ class WalentyTeiHandler(handler.ContentHandler): | @@ -61,7 +67,7 @@ class WalentyTeiHandler(handler.ContentHandler): | ||
61 | if name == 'entry': | 67 | if name == 'entry': |
62 | if self._current is not None: | 68 | if self._current is not None: |
63 | raise TEIStructureError() | 69 | raise TEIStructureError() |
64 | - entry = Entry(self._subtree, self._entry_meanings, self._meanings, self._frames) | 70 | + entry = Entry(self._subtree, self._entry_meanings, self._meanings, self._frames, self._examples_in, self._examples_out, self._misconnected_out) |
65 | entry.store(self._meanings, self._stored_positions) | 71 | entry.store(self._meanings, self._stored_positions) |
66 | self._content = '' | 72 | self._content = '' |
67 | else: | 73 | else: |
@@ -78,5 +84,6 @@ class WalentyTeiHandler(handler.ContentHandler): | @@ -78,5 +84,6 @@ class WalentyTeiHandler(handler.ContentHandler): | ||
78 | self._content += content | 84 | self._content += content |
79 | 85 | ||
80 | def endDocument(self): | 86 | def endDocument(self): |
81 | - pass | 87 | + self._examples_out.close() |
88 | + self._misconnected_out.close() | ||
82 | 89 |
reset_db.sh
@@ -4,3 +4,5 @@ dropdb shellvalier | @@ -4,3 +4,5 @@ dropdb shellvalier | ||
4 | createdb shellvalier -E UTF8 -T template0 -l pl_PL.utf8 | 4 | createdb shellvalier -E UTF8 -T template0 -l pl_PL.utf8 |
5 | python3 manage.py migrate | 5 | python3 manage.py migrate |
6 | python3 manage.py import_plWordnet | 6 | python3 manage.py import_plWordnet |
7 | +python3 manage.py import_tei | ||
8 | + |
syntax/management/commands/import_tei.py
@@ -9,6 +9,7 @@ from importer.WalentyXML import WalentyTeiHandler | @@ -9,6 +9,7 @@ from importer.WalentyXML import WalentyTeiHandler | ||
9 | from importer.WalentyPreprocessXML import WalentyPreprocessTeiHandler | 9 | from importer.WalentyPreprocessXML import WalentyPreprocessTeiHandler |
10 | from shellvalier.settings import BASE_DIR | 10 | from shellvalier.settings import BASE_DIR |
11 | from connections.models import POS, Status | 11 | from connections.models import POS, Status |
12 | +from examples.models import ExampleOpinion, ExampleSource | ||
12 | from syntax.models import SchemaOpinion, Aspect, InherentSie, Negativity, Predicativity, SyntacticFunction, Control, PredicativeControl | 13 | from syntax.models import SchemaOpinion, Aspect, InherentSie, Negativity, Predicativity, SyntacticFunction, Control, PredicativeControl |
13 | from semantics.models import FrameOpinion, ArgumentRole, SemanticRole, RoleAttribute, PredefinedSelectionalPreference, SelectionalPreferenceRelation | 14 | from semantics.models import FrameOpinion, ArgumentRole, SemanticRole, RoleAttribute, PredefinedSelectionalPreference, SelectionalPreferenceRelation |
14 | 15 | ||
@@ -54,6 +55,8 @@ def import_constants(): | @@ -54,6 +55,8 @@ def import_constants(): | ||
54 | import_semantic_roles() | 55 | import_semantic_roles() |
55 | import_predefined_preferences() | 56 | import_predefined_preferences() |
56 | import_preference_relations() | 57 | import_preference_relations() |
58 | + import_examples_sources() | ||
59 | + import_examples_opinions() | ||
57 | pass | 60 | pass |
58 | 61 | ||
59 | def import_poses(): | 62 | def import_poses(): |
@@ -149,5 +152,15 @@ def import_preference_relations(): | @@ -149,5 +152,15 @@ def import_preference_relations(): | ||
149 | relat = SelectionalPreferenceRelation(plwn_id=id, key=name) | 152 | relat = SelectionalPreferenceRelation(plwn_id=id, key=name) |
150 | relat.save() | 153 | relat.save() |
151 | 154 | ||
152 | - | 155 | +def import_examples_sources(): |
156 | + sources = [(0, u'NKJP0.5M'), (1, u'NKJP1.2M'), (2, u'NKJP30M'), (3, u'NKJP250M'), (4, u'NKJP300M'), (5, u'NKJP500M'), (6, u'NKJP1800M'), (7, u'linguistic_literature'), (8, u'other_literature'), (9, u'own')] | ||
157 | + for pri, name in sources: | ||
158 | + es = ExampleSource(key=name, priority=pri) | ||
159 | + es.save() | ||
160 | + | ||
161 | +def import_examples_opinions(): | ||
162 | + opinions = [(0, 'zły'), (1, 'wątpliwy'), (2, 'dobry')] | ||
163 | + for pri, name in opinions: | ||
164 | + eo = ExampleOpinion(key=name, priority=pri) | ||
165 | + eo.save() | ||
153 | 166 |