Commit 92fd5c36277e29243591b6c00b97b8ef63c67c05
1 parent
a2ea5daa
Changed morfeusz based validations to use morfeusz2.
Showing
7 changed files
with
619 additions
and
319 deletions
common/_morfeusz2.so
0 → 100755
No preview for this file type
common/morfeusz.py deleted
1 | -# encoding=UTF-8 | ||
2 | - | ||
3 | -# Copyright © 2007, 2008, 2010, 2011 Jakub Wilk <jwilk@jwilk.net> | ||
4 | -# | ||
5 | -# Permission is hereby granted, free of charge, to any person obtaining a copy | ||
6 | -# of this software and associated documentation files (the “Softwareâ€), to deal | ||
7 | -# in the Software without restriction, including without limitation the rights | ||
8 | -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
9 | -# copies of the Software, and to permit persons to whom the Software is | ||
10 | -# furnished to do so, subject to the following conditions: | ||
11 | -# | ||
12 | -# The above copyright notice and this permission notice shall be included in | ||
13 | -# all copies or substantial portions of the Software. | ||
14 | -# | ||
15 | -# THE SOFTWARE IS PROVIDED “AS ISâ€, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
18 | -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
20 | -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
21 | -# SOFTWARE. | ||
22 | - | ||
23 | -''' | ||
24 | -Bindings for Morfeusz_, a Polish morphological analyser. | ||
25 | - | ||
26 | -.. _Morfeusz: | ||
27 | - http://sgjp.pl/morfeusz/ | ||
28 | -''' | ||
29 | - | ||
30 | -from __future__ import with_statement | ||
31 | - | ||
32 | -import collections | ||
33 | -import ctypes | ||
34 | -import sys | ||
35 | - | ||
36 | -py3k = sys.version_info >= (3, 0) | ||
37 | - | ||
38 | -if py3k: | ||
39 | - import _thread as thread | ||
40 | -else: | ||
41 | - import thread | ||
42 | -if not py3k: | ||
43 | - from itertools import izip as zip | ||
44 | - | ||
45 | -if py3k: | ||
46 | - unicode = str | ||
47 | - | ||
48 | -__author__ = 'Jakub Wilk <jwilk@jwilk.net>' | ||
49 | -__version__ = '0.3300' | ||
50 | -__all__ = ['analyse', 'about', 'expand_tags', 'ATTRIBUTES', 'VALUES'] | ||
51 | - | ||
52 | -ATTRIBUTES = ''' | ||
53 | -subst=number case gender | ||
54 | -depr=number case gender | ||
55 | -adj=number case gender degree | ||
56 | -adja= | ||
57 | -adjc= | ||
58 | -adjp= | ||
59 | -adv=degree | ||
60 | -num=number case gender accommodability | ||
61 | -numcol=number case gender accommodability | ||
62 | -ppron12=number case gender person accentability | ||
63 | -ppron3=number case gender person accentability post_prepositionality | ||
64 | -siebie=case | ||
65 | -fin=number person aspect | ||
66 | -bedzie=number person aspect | ||
67 | -aglt=number person aspect vocalicity | ||
68 | -praet=number gender aspect agglutination | ||
69 | -impt=number person aspect | ||
70 | -imps=aspect | ||
71 | -inf=aspect | ||
72 | -pcon=aspect | ||
73 | -pant=aspect | ||
74 | -ger=number case gender aspect negation | ||
75 | -pact=number case gender aspect negation | ||
76 | -ppas=number case gender aspect negation | ||
77 | -winien=number gender aspect | ||
78 | -pred= | ||
79 | -prep=case vocalicity | ||
80 | -conj= | ||
81 | -comp= | ||
82 | -brev=fullstoppedness | ||
83 | -burk= | ||
84 | -interj= | ||
85 | -qub=vocalicity | ||
86 | -xxs=number case gender | ||
87 | -xxx= | ||
88 | -interp= | ||
89 | -ign= | ||
90 | -sp= | ||
91 | -''' | ||
92 | -ATTRIBUTES = \ | ||
93 | -dict( | ||
94 | - (key, tuple(values.split())) | ||
95 | - for line in ATTRIBUTES.splitlines() if line | ||
96 | - for (key, values) in (line.split('=', 1),) | ||
97 | -) | ||
98 | - | ||
99 | -VALUES = ''' | ||
100 | -number=sg pl | ||
101 | -case=nom gen dat acc inst loc voc | ||
102 | -gender=m1 m2 m3 f n1 n2 p1 p2 p3 | ||
103 | -person=pri sec ter | ||
104 | -degree=pos comp sup | ||
105 | -aspect=imperf perf | ||
106 | -negation=aff neg | ||
107 | -accentability=akc nakc | ||
108 | -post_prepositionality=npraep praep | ||
109 | -accommodability=congr rec | ||
110 | -agglutination=agl nagl | ||
111 | -vocalicity=nwok wok | ||
112 | -fullstoppedness=pun npun | ||
113 | -''' | ||
114 | -VALUES = \ | ||
115 | -dict( | ||
116 | - (key, tuple(values.split())) | ||
117 | - for line in VALUES.splitlines() if line | ||
118 | - for (key, values) in (line.split('=', 1),) | ||
119 | -) | ||
120 | - | ||
121 | -libmorfeusz = ctypes.CDLL('libmorfeusz.so.0') | ||
122 | - | ||
123 | -MORFOPT_ENCODING = 1 | ||
124 | -MORFEUSZ_UTF_8 = 8 | ||
125 | - | ||
126 | -MORFOPT_WHITESPACE = 2 | ||
127 | -MORFEUSZ_SKIP_WHITESPACE = 0 | ||
128 | -MORFEUSZ_KEEP_WHITESPACE = 2 | ||
129 | - | ||
130 | -libmorfeusz.morfeusz_set_option(MORFOPT_ENCODING, MORFEUSZ_UTF_8) | ||
131 | -libmorfeusz_lock = thread.allocate_lock() | ||
132 | - | ||
133 | -class InterpEdge(ctypes.Structure): | ||
134 | - _fields_ = \ | ||
135 | - ( | ||
136 | - ('i', ctypes.c_int), | ||
137 | - ('j', ctypes.c_int), | ||
138 | - ('_orth', ctypes.c_char_p), | ||
139 | - ('_base', ctypes.c_char_p), | ||
140 | - ('_tags', ctypes.c_char_p) | ||
141 | - ) | ||
142 | - | ||
143 | - if py3k: | ||
144 | - @property | ||
145 | - def tags(self): | ||
146 | - if self._tags is not None: | ||
147 | - return self._tags.decode('UTF-8') | ||
148 | - else: | ||
149 | - @property | ||
150 | - def tags(self): | ||
151 | - return self._tags | ||
152 | - | ||
153 | - @property | ||
154 | - def orth(self): | ||
155 | - if self._orth is not None: | ||
156 | - return self._orth.decode('UTF-8') | ||
157 | - | ||
158 | - @property | ||
159 | - def base(self): | ||
160 | - if self._base is not None: | ||
161 | - return self._base.decode('UTF-8') | ||
162 | - | ||
163 | -libmorfeusz_analyse = libmorfeusz.morfeusz_analyse | ||
164 | -libmorfeusz_analyse.restype = ctypes.POINTER(InterpEdge) | ||
165 | -libmorfeusz_about = libmorfeusz.morfeusz_about | ||
166 | -libmorfeusz_about.restype = ctypes.c_char_p | ||
167 | - | ||
168 | -def expand_tags(tags, expand_dot=True, expand_underscore=True): | ||
169 | - | ||
170 | - if tags is None: | ||
171 | - yield | ||
172 | - return | ||
173 | - tags = str(tags) | ||
174 | - for tag in tags.split('|'): | ||
175 | - tag = tag.split(':') | ||
176 | - pos = tag.pop(0) | ||
177 | - chunks = [(pos,)] | ||
178 | - chunks += \ | ||
179 | - ( | ||
180 | - VALUES[attribute] if chunk == '_' and expand_underscore | ||
181 | - else chunk.split('.') | ||
182 | - for chunk, attribute in zip(tag, ATTRIBUTES[pos]) | ||
183 | - ) | ||
184 | - | ||
185 | - if not expand_dot: | ||
186 | - yield ':'.join('.'.join(values) for values in chunks) | ||
187 | - continue | ||
188 | - | ||
189 | - def expand_chunks(i): | ||
190 | - if i >= len(chunks): | ||
191 | - yield () | ||
192 | - else: | ||
193 | - tail = tuple(expand_chunks(i + 1)) | ||
194 | - for chunk_variant in chunks[i]: | ||
195 | - for tail_variant in tail: | ||
196 | - yield (chunk_variant,) + tail_variant | ||
197 | - | ||
198 | - for x in expand_chunks(0): | ||
199 | - yield ':'.join(x) | ||
200 | - | ||
201 | -_expand_tags = expand_tags | ||
202 | - | ||
203 | -def _dont_expand_tags(s, **kwargs): | ||
204 | - return [s] | ||
205 | - | ||
206 | -def analyse(text, expand_tags=True, expand_dot=True, expand_underscore=True, dag=False, keep_whitespace=False): | ||
207 | - ''' | ||
208 | - Analyse the text. | ||
209 | - ''' | ||
210 | - expand_tags = _expand_tags if expand_tags else _dont_expand_tags | ||
211 | - text = unicode(text) | ||
212 | - text = text.encode('UTF-8') | ||
213 | - analyse = _analyse_as_dag if dag else _analyse_as_list | ||
214 | - return analyse( | ||
215 | - text=text, | ||
216 | - expand_tags=expand_tags, | ||
217 | - expand_dot=expand_dot, | ||
218 | - expand_underscore=expand_underscore, | ||
219 | - keep_whitespace=keep_whitespace | ||
220 | - ) | ||
221 | - | ||
222 | -def _analyse_as_dag(text, expand_tags, expand_dot, expand_underscore, keep_whitespace): | ||
223 | - result = [] | ||
224 | - with libmorfeusz_lock: | ||
225 | - if keep_whitespace: | ||
226 | - if libmorfeusz.morfeusz_set_option(MORFOPT_WHITESPACE, MORFEUSZ_KEEP_WHITESPACE) != 1: | ||
227 | - raise NotImplementedError("This version of Morfeusz doesn't support keep_whitespace") | ||
228 | - for edge in libmorfeusz_analyse(text): | ||
229 | - if edge.i == -1: | ||
230 | - break | ||
231 | - for tag in expand_tags(edge.tags, expand_dot=expand_dot, expand_underscore=expand_underscore): | ||
232 | - result += [(edge.i, edge.j, (edge.orth, edge.base, tag))] | ||
233 | - if keep_whitespace: | ||
234 | - libmorfeusz.morfeusz_set_option(MORFOPT_WHITESPACE, MORFEUSZ_SKIP_WHITESPACE) | ||
235 | - return result | ||
236 | - | ||
237 | -def _analyse_as_list(text, expand_tags, expand_dot, expand_underscore, keep_whitespace): | ||
238 | - dag = collections.defaultdict(list) | ||
239 | - with libmorfeusz_lock: | ||
240 | - if keep_whitespace: | ||
241 | - if libmorfeusz.morfeusz_set_option(MORFOPT_WHITESPACE, MORFEUSZ_KEEP_WHITESPACE) != 1: | ||
242 | - raise NotImplementedError("This version of Morfeusz doesn't support keep_whitespace") | ||
243 | - for edge in libmorfeusz_analyse(text): | ||
244 | - if edge.i == -1: | ||
245 | - break | ||
246 | - for tag in expand_tags(edge.tags, expand_dot=expand_dot, expand_underscore=expand_underscore): | ||
247 | - dag[edge.i] += [((edge.orth, edge.base, tag), edge.j)] | ||
248 | - if keep_whitespace: | ||
249 | - libmorfeusz.morfeusz_set_option(MORFOPT_WHITESPACE, MORFEUSZ_SKIP_WHITESPACE) | ||
250 | - def expand_dag(i): | ||
251 | - nexts = dag[i] | ||
252 | - if not nexts: | ||
253 | - yield [] | ||
254 | - else: | ||
255 | - for head, j in nexts: | ||
256 | - for tail in expand_dag(j): | ||
257 | - yield [head] + tail | ||
258 | - return list(expand_dag(0)) | ||
259 | - | ||
260 | -def about(): | ||
261 | - ''' | ||
262 | - Return a string containing information on authors and version of the | ||
263 | - underlying library. | ||
264 | - ''' | ||
265 | - about = libmorfeusz_about() | ||
266 | - try: | ||
267 | - return about.decode('UTF-8') | ||
268 | - except UnicodeError: | ||
269 | - return about.decode('ISO-8859-2') | ||
270 | - | ||
271 | -# vim:ts=4 sw=4 et |
common/morfeusz2.py
0 → 100644
1 | +# This file was automatically generated by SWIG (http://www.swig.org). | ||
2 | +# Version 2.0.4 | ||
3 | +# | ||
4 | +# Do not make changes to this file unless you know what you are doing--modify | ||
5 | +# the SWIG interface file instead. | ||
6 | + | ||
7 | + | ||
8 | + | ||
9 | +from sys import version_info | ||
10 | +if version_info >= (2,6,0): | ||
11 | + def swig_import_helper(): | ||
12 | + from os.path import dirname | ||
13 | + import imp | ||
14 | + fp = None | ||
15 | + try: | ||
16 | + fp, pathname, description = imp.find_module('_morfeusz2', [dirname(__file__)]) | ||
17 | + except ImportError: | ||
18 | + import _morfeusz2 | ||
19 | + return _morfeusz2 | ||
20 | + if fp is not None: | ||
21 | + try: | ||
22 | + _mod = imp.load_module('_morfeusz2', fp, pathname, description) | ||
23 | + finally: | ||
24 | + fp.close() | ||
25 | + return _mod | ||
26 | + _morfeusz2 = swig_import_helper() | ||
27 | + del swig_import_helper | ||
28 | +else: | ||
29 | + import _morfeusz2 | ||
30 | +del version_info | ||
31 | +try: | ||
32 | + _swig_property = property | ||
33 | +except NameError: | ||
34 | + pass # Python < 2.2 doesn't have 'property'. | ||
35 | +def _swig_setattr_nondynamic(self,class_type,name,value,static=1): | ||
36 | + if (name == "thisown"): return self.this.own(value) | ||
37 | + if (name == "this"): | ||
38 | + if type(value).__name__ == 'SwigPyObject': | ||
39 | + self.__dict__[name] = value | ||
40 | + return | ||
41 | + method = class_type.__swig_setmethods__.get(name,None) | ||
42 | + if method: return method(self,value) | ||
43 | + if (not static): | ||
44 | + self.__dict__[name] = value | ||
45 | + else: | ||
46 | + raise AttributeError("You cannot add attributes to %s" % self) | ||
47 | + | ||
48 | +def _swig_setattr(self,class_type,name,value): | ||
49 | + return _swig_setattr_nondynamic(self,class_type,name,value,0) | ||
50 | + | ||
51 | +def _swig_getattr(self,class_type,name): | ||
52 | + if (name == "thisown"): return self.this.own() | ||
53 | + method = class_type.__swig_getmethods__.get(name,None) | ||
54 | + if method: return method(self) | ||
55 | + raise AttributeError(name) | ||
56 | + | ||
57 | +def _swig_repr(self): | ||
58 | + try: strthis = "proxy of " + self.this.__repr__() | ||
59 | + except: strthis = "" | ||
60 | + return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,) | ||
61 | + | ||
62 | +try: | ||
63 | + _object = object | ||
64 | + _newclass = 1 | ||
65 | +except AttributeError: | ||
66 | + class _object : pass | ||
67 | + _newclass = 0 | ||
68 | + | ||
69 | + | ||
70 | +class SwigPyIterator(_object): | ||
71 | + __swig_setmethods__ = {} | ||
72 | + __setattr__ = lambda self, name, value: _swig_setattr(self, SwigPyIterator, name, value) | ||
73 | + __swig_getmethods__ = {} | ||
74 | + __getattr__ = lambda self, name: _swig_getattr(self, SwigPyIterator, name) | ||
75 | + def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract") | ||
76 | + __repr__ = _swig_repr | ||
77 | + __swig_destroy__ = _morfeusz2.delete_SwigPyIterator | ||
78 | + __del__ = lambda self : None; | ||
79 | + def value(self): return _morfeusz2.SwigPyIterator_value(self) | ||
80 | + def incr(self, n = 1): return _morfeusz2.SwigPyIterator_incr(self, n) | ||
81 | + def decr(self, n = 1): return _morfeusz2.SwigPyIterator_decr(self, n) | ||
82 | + def distance(self, *args): return _morfeusz2.SwigPyIterator_distance(self, *args) | ||
83 | + def equal(self, *args): return _morfeusz2.SwigPyIterator_equal(self, *args) | ||
84 | + def copy(self): return _morfeusz2.SwigPyIterator_copy(self) | ||
85 | + def next(self): return _morfeusz2.SwigPyIterator_next(self) | ||
86 | + def __next__(self): return _morfeusz2.SwigPyIterator___next__(self) | ||
87 | + def previous(self): return _morfeusz2.SwigPyIterator_previous(self) | ||
88 | + def advance(self, *args): return _morfeusz2.SwigPyIterator_advance(self, *args) | ||
89 | + def __eq__(self, *args): return _morfeusz2.SwigPyIterator___eq__(self, *args) | ||
90 | + def __ne__(self, *args): return _morfeusz2.SwigPyIterator___ne__(self, *args) | ||
91 | + def __iadd__(self, *args): return _morfeusz2.SwigPyIterator___iadd__(self, *args) | ||
92 | + def __isub__(self, *args): return _morfeusz2.SwigPyIterator___isub__(self, *args) | ||
93 | + def __add__(self, *args): return _morfeusz2.SwigPyIterator___add__(self, *args) | ||
94 | + def __sub__(self, *args): return _morfeusz2.SwigPyIterator___sub__(self, *args) | ||
95 | + def __iter__(self): return self | ||
96 | +SwigPyIterator_swigregister = _morfeusz2.SwigPyIterator_swigregister | ||
97 | +SwigPyIterator_swigregister(SwigPyIterator) | ||
98 | + | ||
99 | +class InterpsList(_object): | ||
100 | + __swig_setmethods__ = {} | ||
101 | + __setattr__ = lambda self, name, value: _swig_setattr(self, InterpsList, name, value) | ||
102 | + __swig_getmethods__ = {} | ||
103 | + __getattr__ = lambda self, name: _swig_getattr(self, InterpsList, name) | ||
104 | + __repr__ = _swig_repr | ||
105 | + def iterator(self): return _morfeusz2.InterpsList_iterator(self) | ||
106 | + def __iter__(self): return self.iterator() | ||
107 | + def __nonzero__(self): return _morfeusz2.InterpsList___nonzero__(self) | ||
108 | + def __bool__(self): return _morfeusz2.InterpsList___bool__(self) | ||
109 | + def __len__(self): return _morfeusz2.InterpsList___len__(self) | ||
110 | + def pop(self): return _morfeusz2.InterpsList_pop(self) | ||
111 | + def __getslice__(self, *args): return _morfeusz2.InterpsList___getslice__(self, *args) | ||
112 | + def __setslice__(self, *args): return _morfeusz2.InterpsList___setslice__(self, *args) | ||
113 | + def __delslice__(self, *args): return _morfeusz2.InterpsList___delslice__(self, *args) | ||
114 | + def __delitem__(self, *args): return _morfeusz2.InterpsList___delitem__(self, *args) | ||
115 | + def __getitem__(self, *args): return _morfeusz2.InterpsList___getitem__(self, *args) | ||
116 | + def __setitem__(self, *args): return _morfeusz2.InterpsList___setitem__(self, *args) | ||
117 | + def append(self, *args): return _morfeusz2.InterpsList_append(self, *args) | ||
118 | + def empty(self): return _morfeusz2.InterpsList_empty(self) | ||
119 | + def size(self): return _morfeusz2.InterpsList_size(self) | ||
120 | + def clear(self): return _morfeusz2.InterpsList_clear(self) | ||
121 | + def swap(self, *args): return _morfeusz2.InterpsList_swap(self, *args) | ||
122 | + def get_allocator(self): return _morfeusz2.InterpsList_get_allocator(self) | ||
123 | + def begin(self): return _morfeusz2.InterpsList_begin(self) | ||
124 | + def end(self): return _morfeusz2.InterpsList_end(self) | ||
125 | + def rbegin(self): return _morfeusz2.InterpsList_rbegin(self) | ||
126 | + def rend(self): return _morfeusz2.InterpsList_rend(self) | ||
127 | + def pop_back(self): return _morfeusz2.InterpsList_pop_back(self) | ||
128 | + def erase(self, *args): return _morfeusz2.InterpsList_erase(self, *args) | ||
129 | + def __init__(self, *args): | ||
130 | + this = _morfeusz2.new_InterpsList(*args) | ||
131 | + try: self.this.append(this) | ||
132 | + except: self.this = this | ||
133 | + def push_back(self, *args): return _morfeusz2.InterpsList_push_back(self, *args) | ||
134 | + def front(self): return _morfeusz2.InterpsList_front(self) | ||
135 | + def back(self): return _morfeusz2.InterpsList_back(self) | ||
136 | + def assign(self, *args): return _morfeusz2.InterpsList_assign(self, *args) | ||
137 | + def resize(self, *args): return _morfeusz2.InterpsList_resize(self, *args) | ||
138 | + def insert(self, *args): return _morfeusz2.InterpsList_insert(self, *args) | ||
139 | + def reserve(self, *args): return _morfeusz2.InterpsList_reserve(self, *args) | ||
140 | + def capacity(self): return _morfeusz2.InterpsList_capacity(self) | ||
141 | + __swig_destroy__ = _morfeusz2.delete_InterpsList | ||
142 | + __del__ = lambda self : None; | ||
143 | +InterpsList_swigregister = _morfeusz2.InterpsList_swigregister | ||
144 | +InterpsList_swigregister(InterpsList) | ||
145 | + | ||
146 | +class StringsList(_object): | ||
147 | + __swig_setmethods__ = {} | ||
148 | + __setattr__ = lambda self, name, value: _swig_setattr(self, StringsList, name, value) | ||
149 | + __swig_getmethods__ = {} | ||
150 | + __getattr__ = lambda self, name: _swig_getattr(self, StringsList, name) | ||
151 | + __repr__ = _swig_repr | ||
152 | + def iterator(self): return _morfeusz2.StringsList_iterator(self) | ||
153 | + def __iter__(self): return self.iterator() | ||
154 | + def __nonzero__(self): return _morfeusz2.StringsList___nonzero__(self) | ||
155 | + def __bool__(self): return _morfeusz2.StringsList___bool__(self) | ||
156 | + def __len__(self): return _morfeusz2.StringsList___len__(self) | ||
157 | + def pop(self): return _morfeusz2.StringsList_pop(self) | ||
158 | + def __getslice__(self, *args): return _morfeusz2.StringsList___getslice__(self, *args) | ||
159 | + def __setslice__(self, *args): return _morfeusz2.StringsList___setslice__(self, *args) | ||
160 | + def __delslice__(self, *args): return _morfeusz2.StringsList___delslice__(self, *args) | ||
161 | + def __delitem__(self, *args): return _morfeusz2.StringsList___delitem__(self, *args) | ||
162 | + def __getitem__(self, *args): return _morfeusz2.StringsList___getitem__(self, *args) | ||
163 | + def __setitem__(self, *args): return _morfeusz2.StringsList___setitem__(self, *args) | ||
164 | + def append(self, *args): return _morfeusz2.StringsList_append(self, *args) | ||
165 | + def empty(self): return _morfeusz2.StringsList_empty(self) | ||
166 | + def size(self): return _morfeusz2.StringsList_size(self) | ||
167 | + def clear(self): return _morfeusz2.StringsList_clear(self) | ||
168 | + def swap(self, *args): return _morfeusz2.StringsList_swap(self, *args) | ||
169 | + def get_allocator(self): return _morfeusz2.StringsList_get_allocator(self) | ||
170 | + def begin(self): return _morfeusz2.StringsList_begin(self) | ||
171 | + def end(self): return _morfeusz2.StringsList_end(self) | ||
172 | + def rbegin(self): return _morfeusz2.StringsList_rbegin(self) | ||
173 | + def rend(self): return _morfeusz2.StringsList_rend(self) | ||
174 | + def pop_back(self): return _morfeusz2.StringsList_pop_back(self) | ||
175 | + def erase(self, *args): return _morfeusz2.StringsList_erase(self, *args) | ||
176 | + def __init__(self, *args): | ||
177 | + this = _morfeusz2.new_StringsList(*args) | ||
178 | + try: self.this.append(this) | ||
179 | + except: self.this = this | ||
180 | + def push_back(self, *args): return _morfeusz2.StringsList_push_back(self, *args) | ||
181 | + def front(self): return _morfeusz2.StringsList_front(self) | ||
182 | + def back(self): return _morfeusz2.StringsList_back(self) | ||
183 | + def assign(self, *args): return _morfeusz2.StringsList_assign(self, *args) | ||
184 | + def resize(self, *args): return _morfeusz2.StringsList_resize(self, *args) | ||
185 | + def insert(self, *args): return _morfeusz2.StringsList_insert(self, *args) | ||
186 | + def reserve(self, *args): return _morfeusz2.StringsList_reserve(self, *args) | ||
187 | + def capacity(self): return _morfeusz2.StringsList_capacity(self) | ||
188 | + __swig_destroy__ = _morfeusz2.delete_StringsList | ||
189 | + __del__ = lambda self : None; | ||
190 | +StringsList_swigregister = _morfeusz2.StringsList_swigregister | ||
191 | +StringsList_swigregister(StringsList) | ||
192 | + | ||
193 | +class StringsLinkedList(_object): | ||
194 | + __swig_setmethods__ = {} | ||
195 | + __setattr__ = lambda self, name, value: _swig_setattr(self, StringsLinkedList, name, value) | ||
196 | + __swig_getmethods__ = {} | ||
197 | + __getattr__ = lambda self, name: _swig_getattr(self, StringsLinkedList, name) | ||
198 | + __repr__ = _swig_repr | ||
199 | + def iterator(self): return _morfeusz2.StringsLinkedList_iterator(self) | ||
200 | + def __iter__(self): return self.iterator() | ||
201 | + def __nonzero__(self): return _morfeusz2.StringsLinkedList___nonzero__(self) | ||
202 | + def __bool__(self): return _morfeusz2.StringsLinkedList___bool__(self) | ||
203 | + def __len__(self): return _morfeusz2.StringsLinkedList___len__(self) | ||
204 | + def pop(self): return _morfeusz2.StringsLinkedList_pop(self) | ||
205 | + def __getslice__(self, *args): return _morfeusz2.StringsLinkedList___getslice__(self, *args) | ||
206 | + def __setslice__(self, *args): return _morfeusz2.StringsLinkedList___setslice__(self, *args) | ||
207 | + def __delslice__(self, *args): return _morfeusz2.StringsLinkedList___delslice__(self, *args) | ||
208 | + def __delitem__(self, *args): return _morfeusz2.StringsLinkedList___delitem__(self, *args) | ||
209 | + def __getitem__(self, *args): return _morfeusz2.StringsLinkedList___getitem__(self, *args) | ||
210 | + def __setitem__(self, *args): return _morfeusz2.StringsLinkedList___setitem__(self, *args) | ||
211 | + def append(self, *args): return _morfeusz2.StringsLinkedList_append(self, *args) | ||
212 | + def empty(self): return _morfeusz2.StringsLinkedList_empty(self) | ||
213 | + def size(self): return _morfeusz2.StringsLinkedList_size(self) | ||
214 | + def clear(self): return _morfeusz2.StringsLinkedList_clear(self) | ||
215 | + def swap(self, *args): return _morfeusz2.StringsLinkedList_swap(self, *args) | ||
216 | + def get_allocator(self): return _morfeusz2.StringsLinkedList_get_allocator(self) | ||
217 | + def begin(self): return _morfeusz2.StringsLinkedList_begin(self) | ||
218 | + def end(self): return _morfeusz2.StringsLinkedList_end(self) | ||
219 | + def rbegin(self): return _morfeusz2.StringsLinkedList_rbegin(self) | ||
220 | + def rend(self): return _morfeusz2.StringsLinkedList_rend(self) | ||
221 | + def pop_back(self): return _morfeusz2.StringsLinkedList_pop_back(self) | ||
222 | + def erase(self, *args): return _morfeusz2.StringsLinkedList_erase(self, *args) | ||
223 | + def __init__(self, *args): | ||
224 | + this = _morfeusz2.new_StringsLinkedList(*args) | ||
225 | + try: self.this.append(this) | ||
226 | + except: self.this = this | ||
227 | + def push_back(self, *args): return _morfeusz2.StringsLinkedList_push_back(self, *args) | ||
228 | + def front(self): return _morfeusz2.StringsLinkedList_front(self) | ||
229 | + def back(self): return _morfeusz2.StringsLinkedList_back(self) | ||
230 | + def assign(self, *args): return _morfeusz2.StringsLinkedList_assign(self, *args) | ||
231 | + def resize(self, *args): return _morfeusz2.StringsLinkedList_resize(self, *args) | ||
232 | + def insert(self, *args): return _morfeusz2.StringsLinkedList_insert(self, *args) | ||
233 | + def pop_front(self): return _morfeusz2.StringsLinkedList_pop_front(self) | ||
234 | + def push_front(self, *args): return _morfeusz2.StringsLinkedList_push_front(self, *args) | ||
235 | + def reverse(self): return _morfeusz2.StringsLinkedList_reverse(self) | ||
236 | + __swig_destroy__ = _morfeusz2.delete_StringsLinkedList | ||
237 | + __del__ = lambda self : None; | ||
238 | +StringsLinkedList_swigregister = _morfeusz2.StringsLinkedList_swigregister | ||
239 | +StringsLinkedList_swigregister(StringsLinkedList) | ||
240 | + | ||
241 | +class StringsSet(_object): | ||
242 | + __swig_setmethods__ = {} | ||
243 | + __setattr__ = lambda self, name, value: _swig_setattr(self, StringsSet, name, value) | ||
244 | + __swig_getmethods__ = {} | ||
245 | + __getattr__ = lambda self, name: _swig_getattr(self, StringsSet, name) | ||
246 | + __repr__ = _swig_repr | ||
247 | + def iterator(self): return _morfeusz2.StringsSet_iterator(self) | ||
248 | + def __iter__(self): return self.iterator() | ||
249 | + def __nonzero__(self): return _morfeusz2.StringsSet___nonzero__(self) | ||
250 | + def __bool__(self): return _morfeusz2.StringsSet___bool__(self) | ||
251 | + def __len__(self): return _morfeusz2.StringsSet___len__(self) | ||
252 | + def append(self, *args): return _morfeusz2.StringsSet_append(self, *args) | ||
253 | + def __contains__(self, *args): return _morfeusz2.StringsSet___contains__(self, *args) | ||
254 | + def __getitem__(self, *args): return _morfeusz2.StringsSet___getitem__(self, *args) | ||
255 | + def __init__(self, *args): | ||
256 | + this = _morfeusz2.new_StringsSet(*args) | ||
257 | + try: self.this.append(this) | ||
258 | + except: self.this = this | ||
259 | + def empty(self): return _morfeusz2.StringsSet_empty(self) | ||
260 | + def size(self): return _morfeusz2.StringsSet_size(self) | ||
261 | + def clear(self): return _morfeusz2.StringsSet_clear(self) | ||
262 | + def swap(self, *args): return _morfeusz2.StringsSet_swap(self, *args) | ||
263 | + def count(self, *args): return _morfeusz2.StringsSet_count(self, *args) | ||
264 | + def begin(self): return _morfeusz2.StringsSet_begin(self) | ||
265 | + def end(self): return _morfeusz2.StringsSet_end(self) | ||
266 | + def rbegin(self): return _morfeusz2.StringsSet_rbegin(self) | ||
267 | + def rend(self): return _morfeusz2.StringsSet_rend(self) | ||
268 | + def erase(self, *args): return _morfeusz2.StringsSet_erase(self, *args) | ||
269 | + def find(self, *args): return _morfeusz2.StringsSet_find(self, *args) | ||
270 | + def lower_bound(self, *args): return _morfeusz2.StringsSet_lower_bound(self, *args) | ||
271 | + def upper_bound(self, *args): return _morfeusz2.StringsSet_upper_bound(self, *args) | ||
272 | + def equal_range(self, *args): return _morfeusz2.StringsSet_equal_range(self, *args) | ||
273 | + def insert(self, *args): return _morfeusz2.StringsSet_insert(self, *args) | ||
274 | + __swig_destroy__ = _morfeusz2.delete_StringsSet | ||
275 | + __del__ = lambda self : None; | ||
276 | +StringsSet_swigregister = _morfeusz2.StringsSet_swigregister | ||
277 | +StringsSet_swigregister(StringsSet) | ||
278 | + | ||
279 | +SEPARATE_NUMBERING = _morfeusz2.SEPARATE_NUMBERING | ||
280 | +CONTINUOUS_NUMBERING = _morfeusz2.CONTINUOUS_NUMBERING | ||
281 | +CONDITIONALLY_CASE_SENSITIVE = _morfeusz2.CONDITIONALLY_CASE_SENSITIVE | ||
282 | +STRICTLY_CASE_SENSITIVE = _morfeusz2.STRICTLY_CASE_SENSITIVE | ||
283 | +IGNORE_CASE = _morfeusz2.IGNORE_CASE | ||
284 | +SKIP_WHITESPACES = _morfeusz2.SKIP_WHITESPACES | ||
285 | +APPEND_WHITESPACES = _morfeusz2.APPEND_WHITESPACES | ||
286 | +KEEP_WHITESPACES = _morfeusz2.KEEP_WHITESPACES | ||
287 | +ANALYSE_ONLY = _morfeusz2.ANALYSE_ONLY | ||
288 | +GENERATE_ONLY = _morfeusz2.GENERATE_ONLY | ||
289 | +BOTH_ANALYSE_AND_GENERATE = _morfeusz2.BOTH_ANALYSE_AND_GENERATE | ||
290 | +class Morfeusz(_object): | ||
291 | + __swig_setmethods__ = {} | ||
292 | + __setattr__ = lambda self, name, value: _swig_setattr(self, Morfeusz, name, value) | ||
293 | + __swig_getmethods__ = {} | ||
294 | + __getattr__ = lambda self, name: _swig_getattr(self, Morfeusz, name) | ||
295 | + def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract") | ||
296 | + __repr__ = _swig_repr | ||
297 | + __swig_getmethods__["getVersion"] = lambda x: _morfeusz2.Morfeusz_getVersion | ||
298 | + if _newclass:getVersion = staticmethod(_morfeusz2.Morfeusz_getVersion) | ||
299 | + __swig_getmethods__["getDefaultDictName"] = lambda x: _morfeusz2.Morfeusz_getDefaultDictName | ||
300 | + if _newclass:getDefaultDictName = staticmethod(_morfeusz2.Morfeusz_getDefaultDictName) | ||
301 | + __swig_getmethods__["getCopyright"] = lambda x: _morfeusz2.Morfeusz_getCopyright | ||
302 | + if _newclass:getCopyright = staticmethod(_morfeusz2.Morfeusz_getCopyright) | ||
303 | + __swig_getmethods__["createInstance"] = lambda x: _morfeusz2.Morfeusz_createInstance | ||
304 | + if _newclass:createInstance = staticmethod(_morfeusz2.Morfeusz_createInstance) | ||
305 | + def getDictID(self): return _morfeusz2.Morfeusz_getDictID(self) | ||
306 | + def getDictCopyright(self): return _morfeusz2.Morfeusz_getDictCopyright(self) | ||
307 | + def clone(self): return _morfeusz2.Morfeusz_clone(self) | ||
308 | + __swig_destroy__ = _morfeusz2.delete_Morfeusz | ||
309 | + __del__ = lambda self : None; | ||
310 | + def analyse(self, text): | ||
311 | + """ | ||
312 | + Analyse given text and return a list of MorphInterpretation objects. | ||
313 | + """ | ||
314 | + res = InterpsList() | ||
315 | + _morfeusz2.Morfeusz_analyse(self, text, res) | ||
316 | + return res | ||
317 | + | ||
318 | + | ||
319 | + def generate(self, lemma, tagId=None): | ||
320 | + """ | ||
321 | + Perform morphological synthesis on given text and return a list of MorphInterpretation objects. | ||
322 | + """ | ||
323 | + if tagId is not None: | ||
324 | + return self._generateByTagId(lemma, tagId) | ||
325 | + else: | ||
326 | + res = InterpsList() | ||
327 | + _morfeusz2.Morfeusz_generate(self, lemma, res) | ||
328 | + return res | ||
329 | + | ||
330 | + | ||
331 | + def setAggl(self, optionString): | ||
332 | + """ | ||
333 | + Select agglutination rules option | ||
334 | + """ | ||
335 | + _morfeusz2.Morfeusz_setAggl(self, optionString.encode('utf8')) | ||
336 | + | ||
337 | + | ||
338 | + def getAggl(self): return _morfeusz2.Morfeusz_getAggl(self) | ||
339 | + def setPraet(self, optionString): | ||
340 | + """ | ||
341 | + Select past tense segmentation | ||
342 | + """ | ||
343 | + _morfeusz2.Morfeusz_setPraet(self, optionString.encode('utf8')) | ||
344 | + | ||
345 | + | ||
346 | + def getPraet(self): return _morfeusz2.Morfeusz_getPraet(self) | ||
347 | + def setCaseHandling(self, option): | ||
348 | + """ | ||
349 | + Set case handling option (valid options are CONDITIONALLY_CASE_SENSITIVE, STRICTLY_CASE_SENSITIVE, IGNORE_CASE) | ||
350 | + """ | ||
351 | + _morfeusz2.Morfeusz_setCaseHandling(self, option) | ||
352 | + | ||
353 | + | ||
354 | + def getCaseHandling(self): return _morfeusz2.Morfeusz_getCaseHandling(self) | ||
355 | + def setTokenNumbering(self, option): | ||
356 | + """ | ||
357 | + Set token numbering option (valid options are SEPARATE_NUMBERING, CONTINUOUS_NUMBERING) | ||
358 | + """ | ||
359 | + _morfeusz2.Morfeusz_setTokenNumbering(self, option) | ||
360 | + | ||
361 | + | ||
362 | + def getTokenNumbering(self): return _morfeusz2.Morfeusz_getTokenNumbering(self) | ||
363 | + def setWhitespaceHandling(self, option): | ||
364 | + """ | ||
365 | + Set whitespace handling handling option (valid options are SKIP_WHITESPACES, KEEP_WHITESPACES, APPEND_WHITESPACES) | ||
366 | + """ | ||
367 | + _morfeusz2.Morfeusz_setWhitespaceHandling(self, option) | ||
368 | + | ||
369 | + | ||
370 | + def getWhitespaceHandling(self): return _morfeusz2.Morfeusz_getWhitespaceHandling(self) | ||
371 | + def getIdResolver(self): return _morfeusz2.Morfeusz_getIdResolver(self) | ||
372 | + def setDictionary(self, dictName): | ||
373 | + """ | ||
374 | + Set dictionary to be used by this instance (by name) | ||
375 | + """ | ||
376 | + _morfeusz2.Morfeusz_setDictionary(self, dictName.encode('utf8')) | ||
377 | + | ||
378 | + | ||
379 | + __swig_setmethods__["dictionarySearchPaths"] = _morfeusz2.Morfeusz_dictionarySearchPaths_set | ||
380 | + __swig_getmethods__["dictionarySearchPaths"] = _morfeusz2.Morfeusz_dictionarySearchPaths_get | ||
381 | + if _newclass:dictionarySearchPaths = _swig_property(_morfeusz2.Morfeusz_dictionarySearchPaths_get, _morfeusz2.Morfeusz_dictionarySearchPaths_set) | ||
382 | + def getAvailableAgglOptions(self): return _morfeusz2.Morfeusz_getAvailableAgglOptions(self) | ||
383 | + def getAvailablePraetOptions(self): return _morfeusz2.Morfeusz_getAvailablePraetOptions(self) | ||
384 | + def _generateByTagId(self, *args): return _morfeusz2.Morfeusz__generateByTagId(self, *args) | ||
385 | + def analyse_iter(self, text): | ||
386 | + """ | ||
387 | + Analyse given text and return an iterator over MorphInterpretation objects as a result. | ||
388 | + """ | ||
389 | + return _morfeusz2.Morfeusz__analyseAsIterator(self, text) | ||
390 | + | ||
391 | + | ||
392 | +Morfeusz_swigregister = _morfeusz2.Morfeusz_swigregister | ||
393 | +Morfeusz_swigregister(Morfeusz) | ||
394 | + | ||
395 | +def Morfeusz_getVersion(): | ||
396 | + return _morfeusz2.Morfeusz_getVersion() | ||
397 | +Morfeusz_getVersion = _morfeusz2.Morfeusz_getVersion | ||
398 | + | ||
399 | +def Morfeusz_getDefaultDictName(): | ||
400 | + return _morfeusz2.Morfeusz_getDefaultDictName() | ||
401 | +Morfeusz_getDefaultDictName = _morfeusz2.Morfeusz_getDefaultDictName | ||
402 | + | ||
403 | +def Morfeusz_getCopyright(): | ||
404 | + return _morfeusz2.Morfeusz_getCopyright() | ||
405 | +Morfeusz_getCopyright = _morfeusz2.Morfeusz_getCopyright | ||
406 | + | ||
407 | +def Morfeusz_createInstance(*args): | ||
408 | + return _morfeusz2.Morfeusz_createInstance(*args) | ||
409 | +Morfeusz_createInstance = _morfeusz2.Morfeusz_createInstance | ||
410 | +cvar = _morfeusz2.cvar | ||
411 | + | ||
412 | +class ResultsIterator(_object): | ||
413 | + __swig_setmethods__ = {} | ||
414 | + __setattr__ = lambda self, name, value: _swig_setattr(self, ResultsIterator, name, value) | ||
415 | + __swig_getmethods__ = {} | ||
416 | + __getattr__ = lambda self, name: _swig_getattr(self, ResultsIterator, name) | ||
417 | + def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract") | ||
418 | + __repr__ = _swig_repr | ||
419 | + def hasNext(self): return _morfeusz2.ResultsIterator_hasNext(self) | ||
420 | + def peek(self): return _morfeusz2.ResultsIterator_peek(self) | ||
421 | + def next(self): | ||
422 | + if self.hasNext(): | ||
423 | + return _morfeusz2.ResultsIterator_next(self) | ||
424 | + else: | ||
425 | + raise StopIteration | ||
426 | + | ||
427 | + | ||
428 | + __swig_destroy__ = _morfeusz2.delete_ResultsIterator | ||
429 | + __del__ = lambda self : None; | ||
430 | + def __iter__(self): return _morfeusz2.ResultsIterator___iter__(self) | ||
431 | +ResultsIterator_swigregister = _morfeusz2.ResultsIterator_swigregister | ||
432 | +ResultsIterator_swigregister(ResultsIterator) | ||
433 | + | ||
434 | +class IdResolver(_object): | ||
435 | + __swig_setmethods__ = {} | ||
436 | + __setattr__ = lambda self, name, value: _swig_setattr(self, IdResolver, name, value) | ||
437 | + __swig_getmethods__ = {} | ||
438 | + __getattr__ = lambda self, name: _swig_getattr(self, IdResolver, name) | ||
439 | + def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract") | ||
440 | + __repr__ = _swig_repr | ||
441 | + def getTagsetId(self): return _morfeusz2.IdResolver_getTagsetId(self) | ||
442 | + def getTag(self, tagId): | ||
443 | + return _morfeusz2.IdResolver_getTag(self, tagId).decode('utf8') | ||
444 | + | ||
445 | + | ||
446 | + def getTagId(self, tag): | ||
447 | + return _morfeusz2.IdResolver_getTagId(self, tag.encode('utf8')) | ||
448 | + | ||
449 | + | ||
450 | + def getName(self, nameId): | ||
451 | + return _morfeusz2.IdResolver_getName(self, nameId).decode('utf8') | ||
452 | + | ||
453 | + | ||
454 | + def getNameId(self, name): | ||
455 | + return _morfeusz2.IdResolver_getNameId(self, name.encode('utf8')) | ||
456 | + | ||
457 | + | ||
458 | + def getLabelsAsUnicode(self, labelsId): | ||
459 | + return _morfeusz2.IdResolver_getLabelsAsString(self, labelsId).decode('utf8') | ||
460 | + | ||
461 | + | ||
462 | + def getLabels(self, labelsId): | ||
463 | + return { l.decode('utf8') for l in _morfeusz2.IdResolver_getLabels(self, labelsId) } | ||
464 | + | ||
465 | + | ||
466 | + def getLabelsId(self, labelsStr): | ||
467 | + return _morfeusz2.IdResolver_getLabelsId(self, labelsStr.encode('utf8')) | ||
468 | + | ||
469 | + | ||
470 | + def getTagsCount(self): return _morfeusz2.IdResolver_getTagsCount(self) | ||
471 | + def getNamesCount(self): return _morfeusz2.IdResolver_getNamesCount(self) | ||
472 | + def getLabelsCount(self): return _morfeusz2.IdResolver_getLabelsCount(self) | ||
473 | + __swig_destroy__ = _morfeusz2.delete_IdResolver | ||
474 | + __del__ = lambda self : None; | ||
475 | +IdResolver_swigregister = _morfeusz2.IdResolver_swigregister | ||
476 | +IdResolver_swigregister(IdResolver) | ||
477 | + | ||
478 | +class MorphInterpretation(_object): | ||
479 | + __swig_setmethods__ = {} | ||
480 | + __setattr__ = lambda self, name, value: _swig_setattr(self, MorphInterpretation, name, value) | ||
481 | + __swig_getmethods__ = {} | ||
482 | + __getattr__ = lambda self, name: _swig_getattr(self, MorphInterpretation, name) | ||
483 | + __repr__ = _swig_repr | ||
484 | + def __init__(self): | ||
485 | + this = _morfeusz2.new_MorphInterpretation() | ||
486 | + try: self.this.append(this) | ||
487 | + except: self.this = this | ||
488 | + __swig_getmethods__["createIgn"] = lambda x: _morfeusz2.MorphInterpretation_createIgn | ||
489 | + if _newclass:createIgn = staticmethod(_morfeusz2.MorphInterpretation_createIgn) | ||
490 | + __swig_getmethods__["createWhitespace"] = lambda x: _morfeusz2.MorphInterpretation_createWhitespace | ||
491 | + if _newclass:createWhitespace = staticmethod(_morfeusz2.MorphInterpretation_createWhitespace) | ||
492 | + def isIgn(self): return _morfeusz2.MorphInterpretation_isIgn(self) | ||
493 | + def isWhitespace(self): return _morfeusz2.MorphInterpretation_isWhitespace(self) | ||
494 | + def getTag(self, morfeusz): | ||
495 | + """ | ||
496 | + Returns tag as string. | ||
497 | + """ | ||
498 | + return _morfeusz2.MorphInterpretation_getTag(self, morfeusz) | ||
499 | + | ||
500 | + | ||
501 | + def getName(self, morfeusz): | ||
502 | + """ | ||
503 | + Returns this interpretation named entity as string | ||
504 | + """ | ||
505 | + return _morfeusz2.MorphInterpretation_getName(self, morfeusz) | ||
506 | + | ||
507 | + | ||
508 | + def getLabelsAsUnicode(self, morfeusz): | ||
509 | + """ | ||
510 | + Returns this interpretation labels as string | ||
511 | + """ | ||
512 | + return _morfeusz2.MorphInterpretation_getLabelsAsString(self, morfeusz).decode('utf8') | ||
513 | + | ||
514 | + | ||
515 | + def getLabels(self, morfeusz): | ||
516 | + """ | ||
517 | + Returns this interpretation labels as a set of strings | ||
518 | + """ | ||
519 | + return { l.decode('utf8') for l in _morfeusz2.MorphInterpretation_getLabels(self, morfeusz) } | ||
520 | + | ||
521 | + | ||
522 | + __swig_setmethods__["startNode"] = _morfeusz2.MorphInterpretation_startNode_set | ||
523 | + __swig_getmethods__["startNode"] = _morfeusz2.MorphInterpretation_startNode_get | ||
524 | + if _newclass:startNode = _swig_property(_morfeusz2.MorphInterpretation_startNode_get, _morfeusz2.MorphInterpretation_startNode_set) | ||
525 | + __swig_setmethods__["endNode"] = _morfeusz2.MorphInterpretation_endNode_set | ||
526 | + __swig_getmethods__["endNode"] = _morfeusz2.MorphInterpretation_endNode_get | ||
527 | + if _newclass:endNode = _swig_property(_morfeusz2.MorphInterpretation_endNode_get, _morfeusz2.MorphInterpretation_endNode_set) | ||
528 | + __swig_setmethods__["_orth"] = _morfeusz2.MorphInterpretation__orth_set | ||
529 | + __swig_getmethods__["_orth"] = _morfeusz2.MorphInterpretation__orth_get | ||
530 | + if _newclass:_orth = _swig_property(_morfeusz2.MorphInterpretation__orth_get, _morfeusz2.MorphInterpretation__orth_set) | ||
531 | + __swig_setmethods__["_lemma"] = _morfeusz2.MorphInterpretation__lemma_set | ||
532 | + __swig_getmethods__["_lemma"] = _morfeusz2.MorphInterpretation__lemma_get | ||
533 | + if _newclass:_lemma = _swig_property(_morfeusz2.MorphInterpretation__lemma_get, _morfeusz2.MorphInterpretation__lemma_set) | ||
534 | + __swig_setmethods__["tagId"] = _morfeusz2.MorphInterpretation_tagId_set | ||
535 | + __swig_getmethods__["tagId"] = _morfeusz2.MorphInterpretation_tagId_get | ||
536 | + if _newclass:tagId = _swig_property(_morfeusz2.MorphInterpretation_tagId_get, _morfeusz2.MorphInterpretation_tagId_set) | ||
537 | + __swig_setmethods__["nameId"] = _morfeusz2.MorphInterpretation_nameId_set | ||
538 | + __swig_getmethods__["nameId"] = _morfeusz2.MorphInterpretation_nameId_get | ||
539 | + if _newclass:nameId = _swig_property(_morfeusz2.MorphInterpretation_nameId_get, _morfeusz2.MorphInterpretation_nameId_set) | ||
540 | + __swig_setmethods__["labelsId"] = _morfeusz2.MorphInterpretation_labelsId_set | ||
541 | + __swig_getmethods__["labelsId"] = _morfeusz2.MorphInterpretation_labelsId_get | ||
542 | + if _newclass:labelsId = _swig_property(_morfeusz2.MorphInterpretation_labelsId_get, _morfeusz2.MorphInterpretation_labelsId_set) | ||
543 | + @property | ||
544 | + def orth(self): | ||
545 | + return self._orth.decode('utf8') | ||
546 | + | ||
547 | + @orth.setter | ||
548 | + def orth(self, val): | ||
549 | + self._orth = val.encode('utf8') | ||
550 | + | ||
551 | + @property | ||
552 | + def lemma(self): | ||
553 | + return self._lemma.decode('utf8') | ||
554 | + | ||
555 | + @lemma.setter | ||
556 | + def lemma(self, val): | ||
557 | + self._lemma = val.encode('utf8') | ||
558 | + | ||
559 | + __swig_destroy__ = _morfeusz2.delete_MorphInterpretation | ||
560 | + __del__ = lambda self : None; | ||
561 | +MorphInterpretation_swigregister = _morfeusz2.MorphInterpretation_swigregister | ||
562 | +MorphInterpretation_swigregister(MorphInterpretation) | ||
563 | + | ||
564 | +def MorphInterpretation_createIgn(*args): | ||
565 | + return _morfeusz2.MorphInterpretation_createIgn(*args) | ||
566 | +MorphInterpretation_createIgn = _morfeusz2.MorphInterpretation_createIgn | ||
567 | + | ||
568 | +def MorphInterpretation_createWhitespace(*args): | ||
569 | + return _morfeusz2.MorphInterpretation_createWhitespace(*args) | ||
570 | +MorphInterpretation_createWhitespace = _morfeusz2.MorphInterpretation_createWhitespace | ||
571 | + | ||
572 | +# This file is compatible with both classic and new-style classes. | ||
573 | + | ||
574 | + |
dictionary/ajax_argument_form.py
@@ -27,7 +27,6 @@ from django.db.models import Count, Q, Sum | @@ -27,7 +27,6 @@ from django.db.models import Count, Q, Sum | ||
27 | from accounts.models import can_modify_phraseology_only | 27 | from accounts.models import can_modify_phraseology_only |
28 | from common.decorators import render, ajax, AjaxError | 28 | from common.decorators import render, ajax, AjaxError |
29 | from common.js_to_obj import jsArgToObj | 29 | from common.js_to_obj import jsArgToObj |
30 | -from common.morfeusz import analyse | ||
31 | from dictionary.forms import AddArgumentForm, ArgPropositionsForm, Atribute_Model, \ | 30 | from dictionary.forms import AddArgumentForm, ArgPropositionsForm, Atribute_Model, \ |
32 | AtributeChoiceForm, AtributeTextForm, ValueAttrMultiValueForm, \ | 31 | AtributeChoiceForm, AtributeTextForm, ValueAttrMultiValueForm, \ |
33 | TextAttrMultiValueForm, PositionsForm, SelectArgumentForm, \ | 32 | TextAttrMultiValueForm, PositionsForm, SelectArgumentForm, \ |
@@ -38,6 +37,7 @@ from dictionary.models import Argument, Argument_Model, Atribute, Atribute_Value | @@ -38,6 +37,7 @@ from dictionary.models import Argument, Argument_Model, Atribute, Atribute_Value | ||
38 | sortatributes, sortPositions, sortArguments, get_or_create_attr_parameter, \ | 37 | sortatributes, sortPositions, sortArguments, get_or_create_attr_parameter, \ |
39 | get_attr_models_to_exclude, is_morfeusz_exception, get_or_create_attribute, \ | 38 | get_attr_models_to_exclude, is_morfeusz_exception, get_or_create_attribute, \ |
40 | get_or_create_parameter_attr_value | 39 | get_or_create_parameter_attr_value |
40 | +from settings import MORFEUSZ2 | ||
41 | 41 | ||
42 | @render('argument_form.html') | 42 | @render('argument_form.html') |
43 | @ajax(method='post', encode_result=False) | 43 | @ajax(method='post', encode_result=False) |
@@ -317,7 +317,6 @@ def create_type_form(pos, arg_model, phraseologic_modification=False, | @@ -317,7 +317,6 @@ def create_type_form(pos, arg_model, phraseologic_modification=False, | ||
317 | def create_attributes_forms(pos, arg_model, subforms_values, form_type='standard'): | 317 | def create_attributes_forms(pos, arg_model, subforms_values, form_type='standard'): |
318 | sheets = [] | 318 | sheets = [] |
319 | attribute_models = get_attribute_models(arg_model, subforms_values) | 319 | attribute_models = get_attribute_models(arg_model, subforms_values) |
320 | - #attr_values_to_exclude = get_attr_values_to_exclude(arg_model) | ||
321 | param_models_to_exclude = get_parameter_types_to_exclude(arg_model) | 320 | param_models_to_exclude = get_parameter_types_to_exclude(arg_model) |
322 | for i in range(len(attribute_models)): | 321 | for i in range(len(attribute_models)): |
323 | attr_form_values = [] | 322 | attr_form_values = [] |
@@ -488,8 +487,8 @@ def get_parameter_types_to_exclude(arg_model): | @@ -488,8 +487,8 @@ def get_parameter_types_to_exclude(arg_model): | ||
488 | def is_correct_lemma(argument_model, attribute_model, lemma): | 487 | def is_correct_lemma(argument_model, attribute_model, lemma): |
489 | correct_form = False | 488 | correct_form = False |
490 | possible_pos_tags = argument_model.get_possible_lemma_tags(attribute_model) | 489 | possible_pos_tags = argument_model.get_possible_lemma_tags(attribute_model) |
491 | - for interp in analyse(lemma): | ||
492 | - if (is_single_word(interp) and base_form_correct(interp, lemma) and | 490 | + for interp in MORFEUSZ2.analyse(lemma.encode('utf8')): |
491 | + if (base_form_correct(interp, lemma) and | ||
493 | pos_tag_correct(interp, possible_pos_tags)): | 492 | pos_tag_correct(interp, possible_pos_tags)): |
494 | correct_form = True | 493 | correct_form = True |
495 | break | 494 | break |
@@ -501,18 +500,13 @@ def is_correct_lemma(argument_model, attribute_model, lemma): | @@ -501,18 +500,13 @@ def is_correct_lemma(argument_model, attribute_model, lemma): | ||
501 | break | 500 | break |
502 | return correct_form | 501 | return correct_form |
503 | 502 | ||
504 | -def is_single_word(interp): | ||
505 | - if len(interp) == 1: | ||
506 | - return True | ||
507 | - return False | ||
508 | - | ||
509 | def base_form_correct(interp, lemma): | 503 | def base_form_correct(interp, lemma): |
510 | - if interp[0][1] == lemma: | 504 | + if interp.lemma == lemma: |
511 | return True | 505 | return True |
512 | return False | 506 | return False |
513 | 507 | ||
514 | def pos_tag_correct(interp, possible_pos_tags): | 508 | def pos_tag_correct(interp, possible_pos_tags): |
515 | - tagstr = interp[0][2] | 509 | + tagstr = interp.getTag(MORFEUSZ2) |
516 | pos_tag = tagstr.split(':')[0] | 510 | pos_tag = tagstr.split(':')[0] |
517 | if possible_pos_tags.filter(name=pos_tag).exists(): | 511 | if possible_pos_tags.filter(name=pos_tag).exists(): |
518 | return True | 512 | return True |
@@ -520,16 +514,18 @@ def pos_tag_correct(interp, possible_pos_tags): | @@ -520,16 +514,18 @@ def pos_tag_correct(interp, possible_pos_tags): | ||
520 | 514 | ||
521 | def contains_separator(lemma): | 515 | def contains_separator(lemma): |
522 | contains_separator = False | 516 | contains_separator = False |
523 | - for interp in analyse(lemma): | ||
524 | - if len(interp) > 1: | 517 | + results_iter = MORFEUSZ2.analyse_iter(lemma.encode('utf8')) |
518 | + while results_iter.hasNext(): | ||
519 | + if results_iter.peek().orth != lemma: | ||
525 | contains_separator = True | 520 | contains_separator = True |
526 | break | 521 | break |
522 | + results_iter.next() | ||
527 | return contains_separator | 523 | return contains_separator |
528 | 524 | ||
529 | def is_preposition_case_pair_valid(preposition_obj, case_obj): | 525 | def is_preposition_case_pair_valid(preposition_obj, case_obj): |
530 | # postp is used by prepadjp | 526 | # postp is used by prepadjp |
531 | case_str = unicode(case_obj) | 527 | case_str = unicode(case_obj) |
532 | - prep_str = unicode(preposition_obj) | 528 | + prep_str = unicode(preposition_obj).split()[-1] |
533 | if case_str != 'postp': | 529 | if case_str != 'postp': |
534 | # str is used by prepadjp | 530 | # str is used by prepadjp |
535 | if case_str == 'str': | 531 | if case_str == 'str': |
@@ -537,12 +533,14 @@ def is_preposition_case_pair_valid(preposition_obj, case_obj): | @@ -537,12 +533,14 @@ def is_preposition_case_pair_valid(preposition_obj, case_obj): | ||
537 | else: | 533 | else: |
538 | pcase = [case_str] | 534 | pcase = [case_str] |
539 | for case in pcase: | 535 | for case in pcase: |
540 | - for interp in analyse(prep_str): | ||
541 | - for token in interp: | ||
542 | - tag = token[2].split(':') | ||
543 | - if tag[0] == 'prep': | ||
544 | - if tag[1] == case: | ||
545 | - return True | 536 | + for interp in MORFEUSZ2.analyse(prep_str.encode('utf8')): |
537 | + tagstr = interp.getTag(MORFEUSZ2) | ||
538 | + tag_parts = tagstr.split(':') | ||
539 | + if len(tag_parts) > 1: | ||
540 | + interp_pos = tag_parts[0] | ||
541 | + interp_case = tag_parts[1] | ||
542 | + if interp_pos == 'prep' and interp_case == case: | ||
543 | + return True | ||
546 | return False | 544 | return False |
547 | return True | 545 | return True |
548 | 546 |
dictionary/ajax_lemma_view.py
@@ -85,7 +85,6 @@ from accounts.models import UserSettings, RealizedLemma, UserStats, RealizedPhra | @@ -85,7 +85,6 @@ from accounts.models import UserSettings, RealizedLemma, UserStats, RealizedPhra | ||
85 | can_modify_phraseology_only, get_anon_profile | 85 | can_modify_phraseology_only, get_anon_profile |
86 | 86 | ||
87 | from ajax_jqgrid import JqGridAjax, default_sort_rules, default_filter_rules | 87 | from ajax_jqgrid import JqGridAjax, default_sort_rules, default_filter_rules |
88 | -from common.morfeusz import analyse | ||
89 | 88 | ||
90 | import locale | 89 | import locale |
91 | from functools import cmp_to_key | 90 | from functools import cmp_to_key |
dictionary/validation.py
@@ -27,13 +27,13 @@ import operator | @@ -27,13 +27,13 @@ import operator | ||
27 | from django.db.models import Sum, Q | 27 | from django.db.models import Sum, Q |
28 | 28 | ||
29 | from common.js_to_obj import frameObjToSerializableDict | 29 | from common.js_to_obj import frameObjToSerializableDict |
30 | -from common.morfeusz import analyse | ||
31 | from dictionary.common_func import subframe_exists | 30 | from dictionary.common_func import subframe_exists |
32 | from dictionary.convert_frames import frame_conversion | 31 | from dictionary.convert_frames import frame_conversion |
33 | from dictionary.forms import FrameAspectForm, FrameOpinionForm | 32 | from dictionary.forms import FrameAspectForm, FrameOpinionForm |
34 | from dictionary.models import Argument_Model, AspectRelationsGroup, Frame_Char_Model, \ | 33 | from dictionary.models import Argument_Model, AspectRelationsGroup, Frame_Char_Model, \ |
35 | Frame_Characteristic, Lemma, Position, PositionCategory, \ | 34 | Frame_Characteristic, Lemma, Position, PositionCategory, \ |
36 | get_attribute_models, sortArguments, sortPositions | 35 | get_attribute_models, sortArguments, sortPositions |
36 | +from settings import MORFEUSZ2 | ||
37 | 37 | ||
38 | def get_wrong_aspect_frames(lemma, frames): | 38 | def get_wrong_aspect_frames(lemma, frames): |
39 | wrong_aspect_frames = [] | 39 | wrong_aspect_frames = [] |
@@ -47,18 +47,18 @@ def check_aspect(lemma, frame): | @@ -47,18 +47,18 @@ def check_aspect(lemma, frame): | ||
47 | inf_present = False | 47 | inf_present = False |
48 | frame_aspect_obj = frame.characteristics.get(type=u'ASPEKT') | 48 | frame_aspect_obj = frame.characteristics.get(type=u'ASPEKT') |
49 | frame_aspect = frame_aspect_obj.value.value | 49 | frame_aspect = frame_aspect_obj.value.value |
50 | - interps = analyse(lemma.entry) | ||
51 | - if interps[0][0][1] and frame_aspect != '_': | 50 | + interps = MORFEUSZ2.analyse(lemma.entry.encode('utf8')) |
51 | + if frame_aspect != '_': | ||
52 | for interp in interps: | 52 | for interp in interps: |
53 | - for token in interp: | ||
54 | - tag = token[2].split(':') | ||
55 | - if tag[0] == 'inf': | ||
56 | - inf_present = True | ||
57 | - if tag[1] == frame_aspect: | ||
58 | - good_aspect = True | ||
59 | - break | ||
60 | - if good_aspect: | ||
61 | - break | 53 | + tagstr = interp.getTag(MORFEUSZ2) |
54 | + tag_parts = tagstr.split(':') | ||
55 | + pos = tag_parts[0] | ||
56 | + if pos == 'inf': | ||
57 | + aspect = tag_parts[1] | ||
58 | + inf_present = True | ||
59 | + if aspect == frame_aspect: | ||
60 | + good_aspect = True | ||
61 | + break | ||
62 | if good_aspect or not inf_present: | 62 | if good_aspect or not inf_present: |
63 | good_aspect = True | 63 | good_aspect = True |
64 | return good_aspect | 64 | return good_aspect |
@@ -84,13 +84,13 @@ def get_missing_aspects_msg(lemma): | @@ -84,13 +84,13 @@ def get_missing_aspects_msg(lemma): | ||
84 | 84 | ||
85 | def get_possible_aspects(lemma): | 85 | def get_possible_aspects(lemma): |
86 | possible_aspects = [] | 86 | possible_aspects = [] |
87 | - interps = analyse(lemma.entry) | ||
88 | - if interps[0][0][1]: | ||
89 | - for interp in interps: | ||
90 | - for token in interp: | ||
91 | - tag = token[2].split(':') | ||
92 | - if tag[0] == 'inf' and tag[1] not in possible_aspects: | ||
93 | - possible_aspects.append(tag[1]) | 87 | + interps = MORFEUSZ2.analyse(lemma.entry.encode('utf8')) |
88 | + for interp in interps: | ||
89 | + tagstr = interp.getTag(MORFEUSZ2) | ||
90 | + tag_parts = tagstr.split(':') | ||
91 | + pos = tag_parts[0] | ||
92 | + if pos == 'inf' and tag_parts[1] not in possible_aspects: | ||
93 | + possible_aspects.append(tag_parts[1]) | ||
94 | return possible_aspects | 94 | return possible_aspects |
95 | 95 | ||
96 | def match_arg_poss(arg_poss, frame): | 96 | def match_arg_poss(arg_poss, frame): |
@@ -355,7 +355,7 @@ def prep_check(arg): | @@ -355,7 +355,7 @@ def prep_check(arg): | ||
355 | #if arg.find('prep') != -1 and not arg.startswith('comprepnp('): | 355 | #if arg.find('prep') != -1 and not arg.startswith('comprepnp('): |
356 | if arg.startswith('prep'): | 356 | if arg.startswith('prep'): |
357 | params = arg[arg.find('(')+1:arg.find(')')].split(',') | 357 | params = arg[arg.find('(')+1:arg.find(')')].split(',') |
358 | - pform = params[0] | 358 | + pform = params[0].split()[-1] |
359 | pcase = params[1] | 359 | pcase = params[1] |
360 | # postp is used by prepadjp | 360 | # postp is used by prepadjp |
361 | if pcase != 'postp': | 361 | if pcase != 'postp': |
@@ -365,12 +365,12 @@ def prep_check(arg): | @@ -365,12 +365,12 @@ def prep_check(arg): | ||
365 | else: | 365 | else: |
366 | pcase = [pcase] | 366 | pcase = [pcase] |
367 | for case in pcase: | 367 | for case in pcase: |
368 | - for interp in analyse(pform): | ||
369 | - for token in interp: | ||
370 | - tag = token[2].split(':') | ||
371 | - if tag[0] == 'prep': | ||
372 | - if tag[1] == case: | ||
373 | - return True | 368 | + for interp in MORFEUSZ2.analyse(pform.encode('utf8')): |
369 | + tagstr = interp.getTag(MORFEUSZ2) | ||
370 | + tag_parts = tagstr.split(':') | ||
371 | + pos = tag_parts[0] | ||
372 | + if pos == 'prep' and tag_parts[1] == case: | ||
373 | + return True | ||
374 | return False | 374 | return False |
375 | return True | 375 | return True |
376 | 376 |
manage.py
@@ -18,4 +18,4 @@ if __name__ == "__main__": | @@ -18,4 +18,4 @@ if __name__ == "__main__": | ||
18 | # | 18 | # |
19 | # from django.core.management import execute_from_command_line | 19 | # from django.core.management import execute_from_command_line |
20 | # | 20 | # |
21 | -# execute_from_command_line(sys.argv) | 21 | -# execute_from_command_line(sys.argv) |
22 | +# execute_from_command_line(sys.argv) | ||
22 | \ No newline at end of file | 23 | \ No newline at end of file |