Commit 9b1356f1782dbff55917f1511a27519d8eb88240

Authored by Tomasz Bartosiak
1 parent ca3f9dba

importing examples from XML

connections/models.py
... ... @@ -50,6 +50,7 @@ class Status(models.Model):
50 50 class ExampleConnection(models.Model):
51 51 example = models.ForeignKey(Example, related_name='example_connections', on_delete=models.PROTECT)
52 52 lexical_unit = models.ForeignKey(LexicalUnit, related_name='example_connections', null=True, on_delete=models.PROTECT)
  53 + arguments = models.ManyToManyField(Argument, related_name='example_connections')
53 54 schema_connections = models.ManyToManyField('SchemaHook', related_name='example_connections')
54 55  
55 56  
... ...
examples/models.py
1 1 from django.db import models
2 2  
3   -
4 3 class Example(models.Model):
5 4 entry = models.ForeignKey('connections.Entry', related_name='examples', on_delete=models.PROTECT)
6 5 sentence = models.TextField()
7 6 opinion = models.ForeignKey('ExampleOpinion', related_name='examples', on_delete=models.PROTECT)
8 7 source = models.ForeignKey('ExampleSource', related_name='examples', on_delete=models.PROTECT)
9   -
  8 + note = models.TextField(null=True)
  9 +
10 10 def __str__(self):
11 11 return self.sentence
12 12  
... ...
importer/Argument.py
... ... @@ -31,8 +31,8 @@ class Relation:
31 31 pref.save()
32 32 argument.relations.add(pref)
33 33  
34   - def __unicode__(self):
35   - return self._type + '->' + unicode(self._to._semantic_role)
  34 + def __str__(self):
  35 + return self._type + '->' + str(self._to._semantic_role)
36 36  
37 37 class SelectionalPreference:
38 38  
... ... @@ -84,8 +84,8 @@ class SelectionalPreference:
84 84 print(type, values)
85 85 raise UnknownError()
86 86  
87   - def __unicode__(self):
88   - return unicode(self._value)
  87 + def __str__(self):
  88 + return str(self._value)
89 89  
90 90  
91 91 class SemanticRole:
... ... @@ -106,7 +106,7 @@ class SemanticRole:
106 106 argument_role = semantics.models.ArgumentRole.objects.get(role=role, attribute=attribute)
107 107 return argument_role
108 108  
109   - def __unicode__(self):
  109 + def __str__(self):
110 110 if self._attribute is None:
111 111 return self._value.lower()
112 112 else:
... ... @@ -121,6 +121,7 @@ class Argument:
121 121 self._selectional_preferences = selectional_preferences
122 122 self._references = references
123 123 self._id = id
  124 + self._db_id = None
124 125  
125 126 @classmethod
126 127 def fromTree(cls, tree):
... ... @@ -158,12 +159,13 @@ class Argument:
158 159 frame=frame,
159 160 preferences_count=len(self._selectional_preferences))
160 161 argument.save()
  162 + self._db_id = argument.id
161 163 return argument
162 164  
163 165 def store_preferences(self, frame, argument):
164 166 for preference in self._selectional_preferences:
165 167 preference.store(frame, argument)
166 168  
167   - def __unicode__(self):
168   - return unicode(self._semantic_role) + '[' + ','.join([unicode(pref) for pref in self._selectional_preferences]) + ']'
  169 + def __str__(self):
  170 + return str(self._semantic_role) + '[' + ','.join([str(pref) for pref in self._selectional_preferences]) + ']'
169 171  
... ...
importer/Entry.py
... ... @@ -4,23 +4,24 @@
4 4 from importer.Syntax import Syntax
5 5 from importer.Meanings import Meanings
6 6 from importer.Semantics import Semantics
  7 +from importer.Examples import Examples
7 8 from connections.models import POS, Status
8 9 import connections.models
9 10  
10 11 class Entry:
11 12  
12   - def __init__(self, entry_tree, entry_meanings, meanings, frames):
  13 + def __init__(self, entry_tree, entry_meanings, meanings, frames, examples_in_data, examples_out_file, misconnected_out_file):
13 14 self._base = entry_tree._children[0]._children[0]._content
14 15 self._pos = entry_tree._children[0]._children[1]._content
  16 + print("processing: " + self._base)
15 17 self._status = entry_tree._children[1]._children[0]._children[0]._content
16   - print(self._base)
17 18 self._syntax = Syntax.fromTree(entry_tree._children[2])
  19 + self._meanings = None
  20 + self._semantics = None
18 21 if len(entry_tree._children) >= 7:
19 22 self._meanings = Meanings.fromTree(entry_tree._children[5])
20 23 self._semantics = Semantics.fromTree(self._base, self._pos, entry_tree._children[4], frames, self._meanings, self._syntax, entry_tree._children[6])
21   - else:
22   - self._meanings = None
23   - self._semantics = None
  24 + self._examples = Examples.fromTree(entry_tree._children[3], self._syntax, self._semantics, self._base, self._meanings, examples_in_data, examples_out_file, misconnected_out_file)
24 25  
25 26 def store(self, all_meanings, stored_positions):
26 27 pos = POS.objects.get(tag=self._pos)
... ... @@ -31,6 +32,7 @@ class Entry:
31 32 self._syntax.store(entry, stored_positions)
32 33 if self._semantics is not None:
33 34 self._semantics.store(entry, all_meanings)
  35 + self._examples.store(entry, all_meanings)
34 36  
35 37 def __str__(self):
36 38 return self._pos + '(' + self._base + ',' + str(self._syntax) + ').'
... ...
importer/Frame.py
... ... @@ -69,7 +69,8 @@ class Frame:
69 69  
70 70  
71 71 def getSignature(self):
72   - return self._base + '\t[' + ','.join([unicode(meaning) for meaning in self._meanings]) + ']'
  72 + return self._base + '\t[' + ','.join([str(meaning) for meaning in self._meanings]) + ']'
  73 +
  74 + def __str__(self):
  75 + return self.getSignature() + '\t[' + ','.join([str(argument) for argument in self._arguments.values()]) + ']'
73 76  
74   - def __unicode__(self):
75   - return self.getSignature() + '\t[' + ','.join([unicode(argument) for argument in self._arguments.values()]) + ']'
... ...
importer/Meanings.py
... ... @@ -62,7 +62,15 @@ class Meaning:
62 62 unit.entry = entry
63 63 unit.save()
64 64 frame.lexical_units.add(unit)
65   -
  65 +
  66 + def get(self):
  67 + units = LexicalUnit.objects.filter(base=self._name, sense=self._variant)
  68 + if len(units) == 0:
  69 + print(self)
  70 + raise UnknownError()
  71 + else:
  72 + return units[0]
  73 +
66 74 def __unicode__(self):
67 75 return '\'' + self._name + '\'-' + self._variant
68 76  
... ... @@ -86,7 +94,7 @@ class Meanings:
86 94 return self._meanings
87 95  
88 96 def locate(self, meaning_id):
89   - if meanings_id in self._meanings:
  97 + if meaning_id in self._meanings:
90 98 return self._meanings[meaning_id]
91 99 else:
92 100 return None
... ...
importer/Position.py
... ... @@ -112,7 +112,8 @@ class Position:
112 112 pred_control=position.pred_control,
113 113 phrase_str=phrase_text).count()
114 114 if desc_count == 0:
115   - desc_text = phrase_description2(phrase, self, negativity)
  115 + #desc_text = phrase_description2(phrase, self, negativity)
  116 + desc_text = "tu pójdzie opis"
116 117 desc = NaturalLanguageDescription(
117 118 negativity=negativity,
118 119 function=position.function,
... ... @@ -162,3 +163,18 @@ class Position:
162 163 return pre + 'obj([' + ','.join(temp) + '])' + post
163 164 elif self._function._value == 'head':
164 165 return pre + 'head([' + ','.join(temp) + '])' + post
  166 +
  167 + def subposition(self, phrases=None):
  168 + c = ''
  169 + if self._control is not None:
  170 + c = ','.join([control._function for control in self._control])
  171 + f = ''
  172 + if self._function is not None:
  173 + f = self._function._value
  174 + if f != '' and c != '':
  175 + f += ','
  176 + if phrases is None:
  177 + return f + c + '{' + ','.join([str(phrase) for phrase in self._phrases]) + '}'
  178 + else:
  179 + temp = [str(phrase) for phrase in self._phrases if phrase in phrases]
  180 + return f + c + '{' + ','.join(temp) + '}'
... ...
importer/Realizations.py
... ... @@ -35,7 +35,12 @@ class ArgumentRealization:
35 35 hook.save()
36 36 connection.schema_connections.add(hook)
37 37  
38   -
  38 + def matches(self, phrases):
  39 + for phrase in phrases:
  40 + if phrase not in self._phrases:
  41 + return False
  42 + return True
  43 +
39 44 class FrameRealization:
40 45  
41 46 def __init__(self, frame, schema, argument_realizations):
... ... @@ -67,5 +72,12 @@ class FrameRealization:
67 72 subentry = self._schema.getSubentry(entry)
68 73 for ar in self._argument_realizations:
69 74 ar.store(subentry, frame, schema, alternation)
  75 +
  76 + def findMatchingArgument(self, phrases):
  77 + for ar in self._argument_realizations:
  78 + if ar.matches(phrases):
  79 + return ar._argument
  80 + return None
  81 +
70 82  
71 83  
... ...
importer/Semantics.py
... ... @@ -39,6 +39,16 @@ class Semantics:
39 39 def getPhraseIds(self):
40 40 return self._phrases
41 41  
  42 + def findFrame(self, meaning):
  43 + for frame in self._frames:
  44 + if meaning in frame._meanings:
  45 + realizations = []
  46 + for realization in self._realizations:
  47 + if realization._frame._id == frame._id:
  48 + realizations.append(realization)
  49 + return frame, realizations
  50 + return None
  51 +
42 52 # def preferencesToUnicode(self):
43 53 # return '\n'.join([unicode(frame) for frame in self._frames])
44 54  
... ...
importer/WalentyPreprocessXML.py
... ... @@ -80,6 +80,7 @@ class WalentyPreprocessTeiHandler(handler.ContentHandler):
80 80 for entry_data, meaning in self.meanings.values():
81 81 name, pos = entry_data
82 82 meaning.save(pos)
  83 + print("Stored")
83 84  
84 85 def extend(self, base, pos, meanings, frames):
85 86 self.entry_meanings[(base, pos)] = [id for id in meanings._meanings]
... ...
importer/WalentyXML.py
... ... @@ -4,6 +4,9 @@
4 4 from xml.sax import handler
5 5 from importer.Entry import Entry
6 6  
  7 +examples_out_file = 'examples_ambig.txt'
  8 +misconnected_examples_out_file = 'examples_to_reattach.txt'
  9 +
7 10  
8 11 class XMLNode:
9 12  
... ... @@ -37,6 +40,9 @@ class WalentyTeiHandler(handler.ContentHandler):
37 40 self._meanings = meanings
38 41 self._frames = frames
39 42 self._stored_positions = {}
  43 + self._examples_in = None # @TODO: read disambiguated file
  44 + self._examples_out = open(examples_out_file, "w")
  45 + self._misconnected_out = open(misconnected_examples_out_file, "w")
40 46  
41 47 def startElement(self, name, attrs):
42 48 if name == 'date':
... ... @@ -61,7 +67,7 @@ class WalentyTeiHandler(handler.ContentHandler):
61 67 if name == 'entry':
62 68 if self._current is not None:
63 69 raise TEIStructureError()
64   - entry = Entry(self._subtree, self._entry_meanings, self._meanings, self._frames)
  70 + entry = Entry(self._subtree, self._entry_meanings, self._meanings, self._frames, self._examples_in, self._examples_out, self._misconnected_out)
65 71 entry.store(self._meanings, self._stored_positions)
66 72 self._content = ''
67 73 else:
... ... @@ -78,5 +84,6 @@ class WalentyTeiHandler(handler.ContentHandler):
78 84 self._content += content
79 85  
80 86 def endDocument(self):
81   - pass
  87 + self._examples_out.close()
  88 + self._misconnected_out.close()
82 89  
... ...
reset_db.sh
... ... @@ -4,3 +4,5 @@ dropdb shellvalier
4 4 createdb shellvalier -E UTF8 -T template0 -l pl_PL.utf8
5 5 python3 manage.py migrate
6 6 python3 manage.py import_plWordnet
  7 +python3 manage.py import_tei
  8 +
... ...
syntax/management/commands/import_tei.py
... ... @@ -9,6 +9,7 @@ from importer.WalentyXML import WalentyTeiHandler
9 9 from importer.WalentyPreprocessXML import WalentyPreprocessTeiHandler
10 10 from shellvalier.settings import BASE_DIR
11 11 from connections.models import POS, Status
  12 +from examples.models import ExampleOpinion, ExampleSource
12 13 from syntax.models import SchemaOpinion, Aspect, InherentSie, Negativity, Predicativity, SyntacticFunction, Control, PredicativeControl
13 14 from semantics.models import FrameOpinion, ArgumentRole, SemanticRole, RoleAttribute, PredefinedSelectionalPreference, SelectionalPreferenceRelation
14 15  
... ... @@ -54,6 +55,8 @@ def import_constants():
54 55 import_semantic_roles()
55 56 import_predefined_preferences()
56 57 import_preference_relations()
  58 + import_examples_sources()
  59 + import_examples_opinions()
57 60 pass
58 61  
59 62 def import_poses():
... ... @@ -149,5 +152,15 @@ def import_preference_relations():
149 152 relat = SelectionalPreferenceRelation(plwn_id=id, key=name)
150 153 relat.save()
151 154  
152   -
  155 +def import_examples_sources():
  156 + sources = [(0, u'NKJP0.5M'), (1, u'NKJP1.2M'), (2, u'NKJP30M'), (3, u'NKJP250M'), (4, u'NKJP300M'), (5, u'NKJP500M'), (6, u'NKJP1800M'), (7, u'linguistic_literature'), (8, u'other_literature'), (9, u'own')]
  157 + for pri, name in sources:
  158 + es = ExampleSource(key=name, priority=pri)
  159 + es.save()
  160 +
  161 +def import_examples_opinions():
  162 + opinions = [(0, 'zły'), (1, 'wątpliwy'), (2, 'dobry')]
  163 + for pri, name in opinions:
  164 + eo = ExampleOpinion(key=name, priority=pri)
  165 + eo.save()
153 166  
... ...