Commit 9b1356f1782dbff55917f1511a27519d8eb88240
1 parent
ca3f9dba
importing examples from XML
Showing
13 changed files
with
99 additions
and
24 deletions
connections/models.py
... | ... | @@ -50,6 +50,7 @@ class Status(models.Model): |
50 | 50 | class ExampleConnection(models.Model): |
51 | 51 | example = models.ForeignKey(Example, related_name='example_connections', on_delete=models.PROTECT) |
52 | 52 | lexical_unit = models.ForeignKey(LexicalUnit, related_name='example_connections', null=True, on_delete=models.PROTECT) |
53 | + arguments = models.ManyToManyField(Argument, related_name='example_connections') | |
53 | 54 | schema_connections = models.ManyToManyField('SchemaHook', related_name='example_connections') |
54 | 55 | |
55 | 56 | |
... | ... |
examples/models.py
1 | 1 | from django.db import models |
2 | 2 | |
3 | - | |
4 | 3 | class Example(models.Model): |
5 | 4 | entry = models.ForeignKey('connections.Entry', related_name='examples', on_delete=models.PROTECT) |
6 | 5 | sentence = models.TextField() |
7 | 6 | opinion = models.ForeignKey('ExampleOpinion', related_name='examples', on_delete=models.PROTECT) |
8 | 7 | source = models.ForeignKey('ExampleSource', related_name='examples', on_delete=models.PROTECT) |
9 | - | |
8 | + note = models.TextField(null=True) | |
9 | + | |
10 | 10 | def __str__(self): |
11 | 11 | return self.sentence |
12 | 12 | |
... | ... |
importer/Argument.py
... | ... | @@ -31,8 +31,8 @@ class Relation: |
31 | 31 | pref.save() |
32 | 32 | argument.relations.add(pref) |
33 | 33 | |
34 | - def __unicode__(self): | |
35 | - return self._type + '->' + unicode(self._to._semantic_role) | |
34 | + def __str__(self): | |
35 | + return self._type + '->' + str(self._to._semantic_role) | |
36 | 36 | |
37 | 37 | class SelectionalPreference: |
38 | 38 | |
... | ... | @@ -84,8 +84,8 @@ class SelectionalPreference: |
84 | 84 | print(type, values) |
85 | 85 | raise UnknownError() |
86 | 86 | |
87 | - def __unicode__(self): | |
88 | - return unicode(self._value) | |
87 | + def __str__(self): | |
88 | + return str(self._value) | |
89 | 89 | |
90 | 90 | |
91 | 91 | class SemanticRole: |
... | ... | @@ -106,7 +106,7 @@ class SemanticRole: |
106 | 106 | argument_role = semantics.models.ArgumentRole.objects.get(role=role, attribute=attribute) |
107 | 107 | return argument_role |
108 | 108 | |
109 | - def __unicode__(self): | |
109 | + def __str__(self): | |
110 | 110 | if self._attribute is None: |
111 | 111 | return self._value.lower() |
112 | 112 | else: |
... | ... | @@ -121,6 +121,7 @@ class Argument: |
121 | 121 | self._selectional_preferences = selectional_preferences |
122 | 122 | self._references = references |
123 | 123 | self._id = id |
124 | + self._db_id = None | |
124 | 125 | |
125 | 126 | @classmethod |
126 | 127 | def fromTree(cls, tree): |
... | ... | @@ -158,12 +159,13 @@ class Argument: |
158 | 159 | frame=frame, |
159 | 160 | preferences_count=len(self._selectional_preferences)) |
160 | 161 | argument.save() |
162 | + self._db_id = argument.id | |
161 | 163 | return argument |
162 | 164 | |
163 | 165 | def store_preferences(self, frame, argument): |
164 | 166 | for preference in self._selectional_preferences: |
165 | 167 | preference.store(frame, argument) |
166 | 168 | |
167 | - def __unicode__(self): | |
168 | - return unicode(self._semantic_role) + '[' + ','.join([unicode(pref) for pref in self._selectional_preferences]) + ']' | |
169 | + def __str__(self): | |
170 | + return str(self._semantic_role) + '[' + ','.join([str(pref) for pref in self._selectional_preferences]) + ']' | |
169 | 171 | |
... | ... |
importer/Entry.py
... | ... | @@ -4,23 +4,24 @@ |
4 | 4 | from importer.Syntax import Syntax |
5 | 5 | from importer.Meanings import Meanings |
6 | 6 | from importer.Semantics import Semantics |
7 | +from importer.Examples import Examples | |
7 | 8 | from connections.models import POS, Status |
8 | 9 | import connections.models |
9 | 10 | |
10 | 11 | class Entry: |
11 | 12 | |
12 | - def __init__(self, entry_tree, entry_meanings, meanings, frames): | |
13 | + def __init__(self, entry_tree, entry_meanings, meanings, frames, examples_in_data, examples_out_file, misconnected_out_file): | |
13 | 14 | self._base = entry_tree._children[0]._children[0]._content |
14 | 15 | self._pos = entry_tree._children[0]._children[1]._content |
16 | + print("processing: " + self._base) | |
15 | 17 | self._status = entry_tree._children[1]._children[0]._children[0]._content |
16 | - print(self._base) | |
17 | 18 | self._syntax = Syntax.fromTree(entry_tree._children[2]) |
19 | + self._meanings = None | |
20 | + self._semantics = None | |
18 | 21 | if len(entry_tree._children) >= 7: |
19 | 22 | self._meanings = Meanings.fromTree(entry_tree._children[5]) |
20 | 23 | self._semantics = Semantics.fromTree(self._base, self._pos, entry_tree._children[4], frames, self._meanings, self._syntax, entry_tree._children[6]) |
21 | - else: | |
22 | - self._meanings = None | |
23 | - self._semantics = None | |
24 | + self._examples = Examples.fromTree(entry_tree._children[3], self._syntax, self._semantics, self._base, self._meanings, examples_in_data, examples_out_file, misconnected_out_file) | |
24 | 25 | |
25 | 26 | def store(self, all_meanings, stored_positions): |
26 | 27 | pos = POS.objects.get(tag=self._pos) |
... | ... | @@ -31,6 +32,7 @@ class Entry: |
31 | 32 | self._syntax.store(entry, stored_positions) |
32 | 33 | if self._semantics is not None: |
33 | 34 | self._semantics.store(entry, all_meanings) |
35 | + self._examples.store(entry, all_meanings) | |
34 | 36 | |
35 | 37 | def __str__(self): |
36 | 38 | return self._pos + '(' + self._base + ',' + str(self._syntax) + ').' |
... | ... |
importer/Frame.py
... | ... | @@ -69,7 +69,8 @@ class Frame: |
69 | 69 | |
70 | 70 | |
71 | 71 | def getSignature(self): |
72 | - return self._base + '\t[' + ','.join([unicode(meaning) for meaning in self._meanings]) + ']' | |
72 | + return self._base + '\t[' + ','.join([str(meaning) for meaning in self._meanings]) + ']' | |
73 | + | |
74 | + def __str__(self): | |
75 | + return self.getSignature() + '\t[' + ','.join([str(argument) for argument in self._arguments.values()]) + ']' | |
73 | 76 | |
74 | - def __unicode__(self): | |
75 | - return self.getSignature() + '\t[' + ','.join([unicode(argument) for argument in self._arguments.values()]) + ']' | |
... | ... |
importer/Meanings.py
... | ... | @@ -62,7 +62,15 @@ class Meaning: |
62 | 62 | unit.entry = entry |
63 | 63 | unit.save() |
64 | 64 | frame.lexical_units.add(unit) |
65 | - | |
65 | + | |
66 | + def get(self): | |
67 | + units = LexicalUnit.objects.filter(base=self._name, sense=self._variant) | |
68 | + if len(units) == 0: | |
69 | + print(self) | |
70 | + raise UnknownError() | |
71 | + else: | |
72 | + return units[0] | |
73 | + | |
66 | 74 | def __unicode__(self): |
67 | 75 | return '\'' + self._name + '\'-' + self._variant |
68 | 76 | |
... | ... | @@ -86,7 +94,7 @@ class Meanings: |
86 | 94 | return self._meanings |
87 | 95 | |
88 | 96 | def locate(self, meaning_id): |
89 | - if meanings_id in self._meanings: | |
97 | + if meaning_id in self._meanings: | |
90 | 98 | return self._meanings[meaning_id] |
91 | 99 | else: |
92 | 100 | return None |
... | ... |
importer/Position.py
... | ... | @@ -112,7 +112,8 @@ class Position: |
112 | 112 | pred_control=position.pred_control, |
113 | 113 | phrase_str=phrase_text).count() |
114 | 114 | if desc_count == 0: |
115 | - desc_text = phrase_description2(phrase, self, negativity) | |
115 | + #desc_text = phrase_description2(phrase, self, negativity) | |
116 | + desc_text = "tu pójdzie opis" | |
116 | 117 | desc = NaturalLanguageDescription( |
117 | 118 | negativity=negativity, |
118 | 119 | function=position.function, |
... | ... | @@ -162,3 +163,18 @@ class Position: |
162 | 163 | return pre + 'obj([' + ','.join(temp) + '])' + post |
163 | 164 | elif self._function._value == 'head': |
164 | 165 | return pre + 'head([' + ','.join(temp) + '])' + post |
166 | + | |
167 | + def subposition(self, phrases=None): | |
168 | + c = '' | |
169 | + if self._control is not None: | |
170 | + c = ','.join([control._function for control in self._control]) | |
171 | + f = '' | |
172 | + if self._function is not None: | |
173 | + f = self._function._value | |
174 | + if f != '' and c != '': | |
175 | + f += ',' | |
176 | + if phrases is None: | |
177 | + return f + c + '{' + ','.join([str(phrase) for phrase in self._phrases]) + '}' | |
178 | + else: | |
179 | + temp = [str(phrase) for phrase in self._phrases if phrase in phrases] | |
180 | + return f + c + '{' + ','.join(temp) + '}' | |
... | ... |
importer/Realizations.py
... | ... | @@ -35,7 +35,12 @@ class ArgumentRealization: |
35 | 35 | hook.save() |
36 | 36 | connection.schema_connections.add(hook) |
37 | 37 | |
38 | - | |
38 | + def matches(self, phrases): | |
39 | + for phrase in phrases: | |
40 | + if phrase not in self._phrases: | |
41 | + return False | |
42 | + return True | |
43 | + | |
39 | 44 | class FrameRealization: |
40 | 45 | |
41 | 46 | def __init__(self, frame, schema, argument_realizations): |
... | ... | @@ -67,5 +72,12 @@ class FrameRealization: |
67 | 72 | subentry = self._schema.getSubentry(entry) |
68 | 73 | for ar in self._argument_realizations: |
69 | 74 | ar.store(subentry, frame, schema, alternation) |
75 | + | |
76 | + def findMatchingArgument(self, phrases): | |
77 | + for ar in self._argument_realizations: | |
78 | + if ar.matches(phrases): | |
79 | + return ar._argument | |
80 | + return None | |
81 | + | |
70 | 82 | |
71 | 83 | |
... | ... |
importer/Semantics.py
... | ... | @@ -39,6 +39,16 @@ class Semantics: |
39 | 39 | def getPhraseIds(self): |
40 | 40 | return self._phrases |
41 | 41 | |
42 | + def findFrame(self, meaning): | |
43 | + for frame in self._frames: | |
44 | + if meaning in frame._meanings: | |
45 | + realizations = [] | |
46 | + for realization in self._realizations: | |
47 | + if realization._frame._id == frame._id: | |
48 | + realizations.append(realization) | |
49 | + return frame, realizations | |
50 | + return None | |
51 | + | |
42 | 52 | # def preferencesToUnicode(self): |
43 | 53 | # return '\n'.join([unicode(frame) for frame in self._frames]) |
44 | 54 | |
... | ... |
importer/WalentyPreprocessXML.py
... | ... | @@ -80,6 +80,7 @@ class WalentyPreprocessTeiHandler(handler.ContentHandler): |
80 | 80 | for entry_data, meaning in self.meanings.values(): |
81 | 81 | name, pos = entry_data |
82 | 82 | meaning.save(pos) |
83 | + print("Stored") | |
83 | 84 | |
84 | 85 | def extend(self, base, pos, meanings, frames): |
85 | 86 | self.entry_meanings[(base, pos)] = [id for id in meanings._meanings] |
... | ... |
importer/WalentyXML.py
... | ... | @@ -4,6 +4,9 @@ |
4 | 4 | from xml.sax import handler |
5 | 5 | from importer.Entry import Entry |
6 | 6 | |
7 | +examples_out_file = 'examples_ambig.txt' | |
8 | +misconnected_examples_out_file = 'examples_to_reattach.txt' | |
9 | + | |
7 | 10 | |
8 | 11 | class XMLNode: |
9 | 12 | |
... | ... | @@ -37,6 +40,9 @@ class WalentyTeiHandler(handler.ContentHandler): |
37 | 40 | self._meanings = meanings |
38 | 41 | self._frames = frames |
39 | 42 | self._stored_positions = {} |
43 | + self._examples_in = None # @TODO: read disambiguated file | |
44 | + self._examples_out = open(examples_out_file, "w") | |
45 | + self._misconnected_out = open(misconnected_examples_out_file, "w") | |
40 | 46 | |
41 | 47 | def startElement(self, name, attrs): |
42 | 48 | if name == 'date': |
... | ... | @@ -61,7 +67,7 @@ class WalentyTeiHandler(handler.ContentHandler): |
61 | 67 | if name == 'entry': |
62 | 68 | if self._current is not None: |
63 | 69 | raise TEIStructureError() |
64 | - entry = Entry(self._subtree, self._entry_meanings, self._meanings, self._frames) | |
70 | + entry = Entry(self._subtree, self._entry_meanings, self._meanings, self._frames, self._examples_in, self._examples_out, self._misconnected_out) | |
65 | 71 | entry.store(self._meanings, self._stored_positions) |
66 | 72 | self._content = '' |
67 | 73 | else: |
... | ... | @@ -78,5 +84,6 @@ class WalentyTeiHandler(handler.ContentHandler): |
78 | 84 | self._content += content |
79 | 85 | |
80 | 86 | def endDocument(self): |
81 | - pass | |
87 | + self._examples_out.close() | |
88 | + self._misconnected_out.close() | |
82 | 89 | |
... | ... |
reset_db.sh
syntax/management/commands/import_tei.py
... | ... | @@ -9,6 +9,7 @@ from importer.WalentyXML import WalentyTeiHandler |
9 | 9 | from importer.WalentyPreprocessXML import WalentyPreprocessTeiHandler |
10 | 10 | from shellvalier.settings import BASE_DIR |
11 | 11 | from connections.models import POS, Status |
12 | +from examples.models import ExampleOpinion, ExampleSource | |
12 | 13 | from syntax.models import SchemaOpinion, Aspect, InherentSie, Negativity, Predicativity, SyntacticFunction, Control, PredicativeControl |
13 | 14 | from semantics.models import FrameOpinion, ArgumentRole, SemanticRole, RoleAttribute, PredefinedSelectionalPreference, SelectionalPreferenceRelation |
14 | 15 | |
... | ... | @@ -54,6 +55,8 @@ def import_constants(): |
54 | 55 | import_semantic_roles() |
55 | 56 | import_predefined_preferences() |
56 | 57 | import_preference_relations() |
58 | + import_examples_sources() | |
59 | + import_examples_opinions() | |
57 | 60 | pass |
58 | 61 | |
59 | 62 | def import_poses(): |
... | ... | @@ -149,5 +152,15 @@ def import_preference_relations(): |
149 | 152 | relat = SelectionalPreferenceRelation(plwn_id=id, key=name) |
150 | 153 | relat.save() |
151 | 154 | |
152 | - | |
155 | +def import_examples_sources(): | |
156 | + sources = [(0, u'NKJP0.5M'), (1, u'NKJP1.2M'), (2, u'NKJP30M'), (3, u'NKJP250M'), (4, u'NKJP300M'), (5, u'NKJP500M'), (6, u'NKJP1800M'), (7, u'linguistic_literature'), (8, u'other_literature'), (9, u'own')] | |
157 | + for pri, name in sources: | |
158 | + es = ExampleSource(key=name, priority=pri) | |
159 | + es.save() | |
160 | + | |
161 | +def import_examples_opinions(): | |
162 | + opinions = [(0, 'zły'), (1, 'wątpliwy'), (2, 'dobry')] | |
163 | + for pri, name in opinions: | |
164 | + eo = ExampleOpinion(key=name, priority=pri) | |
165 | + eo.save() | |
153 | 166 | |
... | ... |