Commit 92fd5c36277e29243591b6c00b97b8ef63c67c05
1 parent
a2ea5daa
Changed morfeusz based validations to use morfeusz2.
Showing
7 changed files
with
619 additions
and
319 deletions
common/_morfeusz2.so
0 → 100755
No preview for this file type
common/morfeusz.py deleted
1 | -# encoding=UTF-8 | |
2 | - | |
3 | -# Copyright © 2007, 2008, 2010, 2011 Jakub Wilk <jwilk@jwilk.net> | |
4 | -# | |
5 | -# Permission is hereby granted, free of charge, to any person obtaining a copy | |
6 | -# of this software and associated documentation files (the “Softwareâ€), to deal | |
7 | -# in the Software without restriction, including without limitation the rights | |
8 | -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
9 | -# copies of the Software, and to permit persons to whom the Software is | |
10 | -# furnished to do so, subject to the following conditions: | |
11 | -# | |
12 | -# The above copyright notice and this permission notice shall be included in | |
13 | -# all copies or substantial portions of the Software. | |
14 | -# | |
15 | -# THE SOFTWARE IS PROVIDED “AS ISâ€, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
18 | -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
20 | -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
21 | -# SOFTWARE. | |
22 | - | |
23 | -''' | |
24 | -Bindings for Morfeusz_, a Polish morphological analyser. | |
25 | - | |
26 | -.. _Morfeusz: | |
27 | - http://sgjp.pl/morfeusz/ | |
28 | -''' | |
29 | - | |
30 | -from __future__ import with_statement | |
31 | - | |
32 | -import collections | |
33 | -import ctypes | |
34 | -import sys | |
35 | - | |
36 | -py3k = sys.version_info >= (3, 0) | |
37 | - | |
38 | -if py3k: | |
39 | - import _thread as thread | |
40 | -else: | |
41 | - import thread | |
42 | -if not py3k: | |
43 | - from itertools import izip as zip | |
44 | - | |
45 | -if py3k: | |
46 | - unicode = str | |
47 | - | |
48 | -__author__ = 'Jakub Wilk <jwilk@jwilk.net>' | |
49 | -__version__ = '0.3300' | |
50 | -__all__ = ['analyse', 'about', 'expand_tags', 'ATTRIBUTES', 'VALUES'] | |
51 | - | |
52 | -ATTRIBUTES = ''' | |
53 | -subst=number case gender | |
54 | -depr=number case gender | |
55 | -adj=number case gender degree | |
56 | -adja= | |
57 | -adjc= | |
58 | -adjp= | |
59 | -adv=degree | |
60 | -num=number case gender accommodability | |
61 | -numcol=number case gender accommodability | |
62 | -ppron12=number case gender person accentability | |
63 | -ppron3=number case gender person accentability post_prepositionality | |
64 | -siebie=case | |
65 | -fin=number person aspect | |
66 | -bedzie=number person aspect | |
67 | -aglt=number person aspect vocalicity | |
68 | -praet=number gender aspect agglutination | |
69 | -impt=number person aspect | |
70 | -imps=aspect | |
71 | -inf=aspect | |
72 | -pcon=aspect | |
73 | -pant=aspect | |
74 | -ger=number case gender aspect negation | |
75 | -pact=number case gender aspect negation | |
76 | -ppas=number case gender aspect negation | |
77 | -winien=number gender aspect | |
78 | -pred= | |
79 | -prep=case vocalicity | |
80 | -conj= | |
81 | -comp= | |
82 | -brev=fullstoppedness | |
83 | -burk= | |
84 | -interj= | |
85 | -qub=vocalicity | |
86 | -xxs=number case gender | |
87 | -xxx= | |
88 | -interp= | |
89 | -ign= | |
90 | -sp= | |
91 | -''' | |
92 | -ATTRIBUTES = \ | |
93 | -dict( | |
94 | - (key, tuple(values.split())) | |
95 | - for line in ATTRIBUTES.splitlines() if line | |
96 | - for (key, values) in (line.split('=', 1),) | |
97 | -) | |
98 | - | |
99 | -VALUES = ''' | |
100 | -number=sg pl | |
101 | -case=nom gen dat acc inst loc voc | |
102 | -gender=m1 m2 m3 f n1 n2 p1 p2 p3 | |
103 | -person=pri sec ter | |
104 | -degree=pos comp sup | |
105 | -aspect=imperf perf | |
106 | -negation=aff neg | |
107 | -accentability=akc nakc | |
108 | -post_prepositionality=npraep praep | |
109 | -accommodability=congr rec | |
110 | -agglutination=agl nagl | |
111 | -vocalicity=nwok wok | |
112 | -fullstoppedness=pun npun | |
113 | -''' | |
114 | -VALUES = \ | |
115 | -dict( | |
116 | - (key, tuple(values.split())) | |
117 | - for line in VALUES.splitlines() if line | |
118 | - for (key, values) in (line.split('=', 1),) | |
119 | -) | |
120 | - | |
121 | -libmorfeusz = ctypes.CDLL('libmorfeusz.so.0') | |
122 | - | |
123 | -MORFOPT_ENCODING = 1 | |
124 | -MORFEUSZ_UTF_8 = 8 | |
125 | - | |
126 | -MORFOPT_WHITESPACE = 2 | |
127 | -MORFEUSZ_SKIP_WHITESPACE = 0 | |
128 | -MORFEUSZ_KEEP_WHITESPACE = 2 | |
129 | - | |
130 | -libmorfeusz.morfeusz_set_option(MORFOPT_ENCODING, MORFEUSZ_UTF_8) | |
131 | -libmorfeusz_lock = thread.allocate_lock() | |
132 | - | |
133 | -class InterpEdge(ctypes.Structure): | |
134 | - _fields_ = \ | |
135 | - ( | |
136 | - ('i', ctypes.c_int), | |
137 | - ('j', ctypes.c_int), | |
138 | - ('_orth', ctypes.c_char_p), | |
139 | - ('_base', ctypes.c_char_p), | |
140 | - ('_tags', ctypes.c_char_p) | |
141 | - ) | |
142 | - | |
143 | - if py3k: | |
144 | - @property | |
145 | - def tags(self): | |
146 | - if self._tags is not None: | |
147 | - return self._tags.decode('UTF-8') | |
148 | - else: | |
149 | - @property | |
150 | - def tags(self): | |
151 | - return self._tags | |
152 | - | |
153 | - @property | |
154 | - def orth(self): | |
155 | - if self._orth is not None: | |
156 | - return self._orth.decode('UTF-8') | |
157 | - | |
158 | - @property | |
159 | - def base(self): | |
160 | - if self._base is not None: | |
161 | - return self._base.decode('UTF-8') | |
162 | - | |
163 | -libmorfeusz_analyse = libmorfeusz.morfeusz_analyse | |
164 | -libmorfeusz_analyse.restype = ctypes.POINTER(InterpEdge) | |
165 | -libmorfeusz_about = libmorfeusz.morfeusz_about | |
166 | -libmorfeusz_about.restype = ctypes.c_char_p | |
167 | - | |
168 | -def expand_tags(tags, expand_dot=True, expand_underscore=True): | |
169 | - | |
170 | - if tags is None: | |
171 | - yield | |
172 | - return | |
173 | - tags = str(tags) | |
174 | - for tag in tags.split('|'): | |
175 | - tag = tag.split(':') | |
176 | - pos = tag.pop(0) | |
177 | - chunks = [(pos,)] | |
178 | - chunks += \ | |
179 | - ( | |
180 | - VALUES[attribute] if chunk == '_' and expand_underscore | |
181 | - else chunk.split('.') | |
182 | - for chunk, attribute in zip(tag, ATTRIBUTES[pos]) | |
183 | - ) | |
184 | - | |
185 | - if not expand_dot: | |
186 | - yield ':'.join('.'.join(values) for values in chunks) | |
187 | - continue | |
188 | - | |
189 | - def expand_chunks(i): | |
190 | - if i >= len(chunks): | |
191 | - yield () | |
192 | - else: | |
193 | - tail = tuple(expand_chunks(i + 1)) | |
194 | - for chunk_variant in chunks[i]: | |
195 | - for tail_variant in tail: | |
196 | - yield (chunk_variant,) + tail_variant | |
197 | - | |
198 | - for x in expand_chunks(0): | |
199 | - yield ':'.join(x) | |
200 | - | |
201 | -_expand_tags = expand_tags | |
202 | - | |
203 | -def _dont_expand_tags(s, **kwargs): | |
204 | - return [s] | |
205 | - | |
206 | -def analyse(text, expand_tags=True, expand_dot=True, expand_underscore=True, dag=False, keep_whitespace=False): | |
207 | - ''' | |
208 | - Analyse the text. | |
209 | - ''' | |
210 | - expand_tags = _expand_tags if expand_tags else _dont_expand_tags | |
211 | - text = unicode(text) | |
212 | - text = text.encode('UTF-8') | |
213 | - analyse = _analyse_as_dag if dag else _analyse_as_list | |
214 | - return analyse( | |
215 | - text=text, | |
216 | - expand_tags=expand_tags, | |
217 | - expand_dot=expand_dot, | |
218 | - expand_underscore=expand_underscore, | |
219 | - keep_whitespace=keep_whitespace | |
220 | - ) | |
221 | - | |
222 | -def _analyse_as_dag(text, expand_tags, expand_dot, expand_underscore, keep_whitespace): | |
223 | - result = [] | |
224 | - with libmorfeusz_lock: | |
225 | - if keep_whitespace: | |
226 | - if libmorfeusz.morfeusz_set_option(MORFOPT_WHITESPACE, MORFEUSZ_KEEP_WHITESPACE) != 1: | |
227 | - raise NotImplementedError("This version of Morfeusz doesn't support keep_whitespace") | |
228 | - for edge in libmorfeusz_analyse(text): | |
229 | - if edge.i == -1: | |
230 | - break | |
231 | - for tag in expand_tags(edge.tags, expand_dot=expand_dot, expand_underscore=expand_underscore): | |
232 | - result += [(edge.i, edge.j, (edge.orth, edge.base, tag))] | |
233 | - if keep_whitespace: | |
234 | - libmorfeusz.morfeusz_set_option(MORFOPT_WHITESPACE, MORFEUSZ_SKIP_WHITESPACE) | |
235 | - return result | |
236 | - | |
237 | -def _analyse_as_list(text, expand_tags, expand_dot, expand_underscore, keep_whitespace): | |
238 | - dag = collections.defaultdict(list) | |
239 | - with libmorfeusz_lock: | |
240 | - if keep_whitespace: | |
241 | - if libmorfeusz.morfeusz_set_option(MORFOPT_WHITESPACE, MORFEUSZ_KEEP_WHITESPACE) != 1: | |
242 | - raise NotImplementedError("This version of Morfeusz doesn't support keep_whitespace") | |
243 | - for edge in libmorfeusz_analyse(text): | |
244 | - if edge.i == -1: | |
245 | - break | |
246 | - for tag in expand_tags(edge.tags, expand_dot=expand_dot, expand_underscore=expand_underscore): | |
247 | - dag[edge.i] += [((edge.orth, edge.base, tag), edge.j)] | |
248 | - if keep_whitespace: | |
249 | - libmorfeusz.morfeusz_set_option(MORFOPT_WHITESPACE, MORFEUSZ_SKIP_WHITESPACE) | |
250 | - def expand_dag(i): | |
251 | - nexts = dag[i] | |
252 | - if not nexts: | |
253 | - yield [] | |
254 | - else: | |
255 | - for head, j in nexts: | |
256 | - for tail in expand_dag(j): | |
257 | - yield [head] + tail | |
258 | - return list(expand_dag(0)) | |
259 | - | |
260 | -def about(): | |
261 | - ''' | |
262 | - Return a string containing information on authors and version of the | |
263 | - underlying library. | |
264 | - ''' | |
265 | - about = libmorfeusz_about() | |
266 | - try: | |
267 | - return about.decode('UTF-8') | |
268 | - except UnicodeError: | |
269 | - return about.decode('ISO-8859-2') | |
270 | - | |
271 | -# vim:ts=4 sw=4 et |
common/morfeusz2.py
0 → 100644
1 | +# This file was automatically generated by SWIG (http://www.swig.org). | |
2 | +# Version 2.0.4 | |
3 | +# | |
4 | +# Do not make changes to this file unless you know what you are doing--modify | |
5 | +# the SWIG interface file instead. | |
6 | + | |
7 | + | |
8 | + | |
9 | +from sys import version_info | |
10 | +if version_info >= (2,6,0): | |
11 | + def swig_import_helper(): | |
12 | + from os.path import dirname | |
13 | + import imp | |
14 | + fp = None | |
15 | + try: | |
16 | + fp, pathname, description = imp.find_module('_morfeusz2', [dirname(__file__)]) | |
17 | + except ImportError: | |
18 | + import _morfeusz2 | |
19 | + return _morfeusz2 | |
20 | + if fp is not None: | |
21 | + try: | |
22 | + _mod = imp.load_module('_morfeusz2', fp, pathname, description) | |
23 | + finally: | |
24 | + fp.close() | |
25 | + return _mod | |
26 | + _morfeusz2 = swig_import_helper() | |
27 | + del swig_import_helper | |
28 | +else: | |
29 | + import _morfeusz2 | |
30 | +del version_info | |
31 | +try: | |
32 | + _swig_property = property | |
33 | +except NameError: | |
34 | + pass # Python < 2.2 doesn't have 'property'. | |
35 | +def _swig_setattr_nondynamic(self,class_type,name,value,static=1): | |
36 | + if (name == "thisown"): return self.this.own(value) | |
37 | + if (name == "this"): | |
38 | + if type(value).__name__ == 'SwigPyObject': | |
39 | + self.__dict__[name] = value | |
40 | + return | |
41 | + method = class_type.__swig_setmethods__.get(name,None) | |
42 | + if method: return method(self,value) | |
43 | + if (not static): | |
44 | + self.__dict__[name] = value | |
45 | + else: | |
46 | + raise AttributeError("You cannot add attributes to %s" % self) | |
47 | + | |
48 | +def _swig_setattr(self,class_type,name,value): | |
49 | + return _swig_setattr_nondynamic(self,class_type,name,value,0) | |
50 | + | |
51 | +def _swig_getattr(self,class_type,name): | |
52 | + if (name == "thisown"): return self.this.own() | |
53 | + method = class_type.__swig_getmethods__.get(name,None) | |
54 | + if method: return method(self) | |
55 | + raise AttributeError(name) | |
56 | + | |
57 | +def _swig_repr(self): | |
58 | + try: strthis = "proxy of " + self.this.__repr__() | |
59 | + except: strthis = "" | |
60 | + return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,) | |
61 | + | |
62 | +try: | |
63 | + _object = object | |
64 | + _newclass = 1 | |
65 | +except AttributeError: | |
66 | + class _object : pass | |
67 | + _newclass = 0 | |
68 | + | |
69 | + | |
70 | +class SwigPyIterator(_object): | |
71 | + __swig_setmethods__ = {} | |
72 | + __setattr__ = lambda self, name, value: _swig_setattr(self, SwigPyIterator, name, value) | |
73 | + __swig_getmethods__ = {} | |
74 | + __getattr__ = lambda self, name: _swig_getattr(self, SwigPyIterator, name) | |
75 | + def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract") | |
76 | + __repr__ = _swig_repr | |
77 | + __swig_destroy__ = _morfeusz2.delete_SwigPyIterator | |
78 | + __del__ = lambda self : None; | |
79 | + def value(self): return _morfeusz2.SwigPyIterator_value(self) | |
80 | + def incr(self, n = 1): return _morfeusz2.SwigPyIterator_incr(self, n) | |
81 | + def decr(self, n = 1): return _morfeusz2.SwigPyIterator_decr(self, n) | |
82 | + def distance(self, *args): return _morfeusz2.SwigPyIterator_distance(self, *args) | |
83 | + def equal(self, *args): return _morfeusz2.SwigPyIterator_equal(self, *args) | |
84 | + def copy(self): return _morfeusz2.SwigPyIterator_copy(self) | |
85 | + def next(self): return _morfeusz2.SwigPyIterator_next(self) | |
86 | + def __next__(self): return _morfeusz2.SwigPyIterator___next__(self) | |
87 | + def previous(self): return _morfeusz2.SwigPyIterator_previous(self) | |
88 | + def advance(self, *args): return _morfeusz2.SwigPyIterator_advance(self, *args) | |
89 | + def __eq__(self, *args): return _morfeusz2.SwigPyIterator___eq__(self, *args) | |
90 | + def __ne__(self, *args): return _morfeusz2.SwigPyIterator___ne__(self, *args) | |
91 | + def __iadd__(self, *args): return _morfeusz2.SwigPyIterator___iadd__(self, *args) | |
92 | + def __isub__(self, *args): return _morfeusz2.SwigPyIterator___isub__(self, *args) | |
93 | + def __add__(self, *args): return _morfeusz2.SwigPyIterator___add__(self, *args) | |
94 | + def __sub__(self, *args): return _morfeusz2.SwigPyIterator___sub__(self, *args) | |
95 | + def __iter__(self): return self | |
96 | +SwigPyIterator_swigregister = _morfeusz2.SwigPyIterator_swigregister | |
97 | +SwigPyIterator_swigregister(SwigPyIterator) | |
98 | + | |
99 | +class InterpsList(_object): | |
100 | + __swig_setmethods__ = {} | |
101 | + __setattr__ = lambda self, name, value: _swig_setattr(self, InterpsList, name, value) | |
102 | + __swig_getmethods__ = {} | |
103 | + __getattr__ = lambda self, name: _swig_getattr(self, InterpsList, name) | |
104 | + __repr__ = _swig_repr | |
105 | + def iterator(self): return _morfeusz2.InterpsList_iterator(self) | |
106 | + def __iter__(self): return self.iterator() | |
107 | + def __nonzero__(self): return _morfeusz2.InterpsList___nonzero__(self) | |
108 | + def __bool__(self): return _morfeusz2.InterpsList___bool__(self) | |
109 | + def __len__(self): return _morfeusz2.InterpsList___len__(self) | |
110 | + def pop(self): return _morfeusz2.InterpsList_pop(self) | |
111 | + def __getslice__(self, *args): return _morfeusz2.InterpsList___getslice__(self, *args) | |
112 | + def __setslice__(self, *args): return _morfeusz2.InterpsList___setslice__(self, *args) | |
113 | + def __delslice__(self, *args): return _morfeusz2.InterpsList___delslice__(self, *args) | |
114 | + def __delitem__(self, *args): return _morfeusz2.InterpsList___delitem__(self, *args) | |
115 | + def __getitem__(self, *args): return _morfeusz2.InterpsList___getitem__(self, *args) | |
116 | + def __setitem__(self, *args): return _morfeusz2.InterpsList___setitem__(self, *args) | |
117 | + def append(self, *args): return _morfeusz2.InterpsList_append(self, *args) | |
118 | + def empty(self): return _morfeusz2.InterpsList_empty(self) | |
119 | + def size(self): return _morfeusz2.InterpsList_size(self) | |
120 | + def clear(self): return _morfeusz2.InterpsList_clear(self) | |
121 | + def swap(self, *args): return _morfeusz2.InterpsList_swap(self, *args) | |
122 | + def get_allocator(self): return _morfeusz2.InterpsList_get_allocator(self) | |
123 | + def begin(self): return _morfeusz2.InterpsList_begin(self) | |
124 | + def end(self): return _morfeusz2.InterpsList_end(self) | |
125 | + def rbegin(self): return _morfeusz2.InterpsList_rbegin(self) | |
126 | + def rend(self): return _morfeusz2.InterpsList_rend(self) | |
127 | + def pop_back(self): return _morfeusz2.InterpsList_pop_back(self) | |
128 | + def erase(self, *args): return _morfeusz2.InterpsList_erase(self, *args) | |
129 | + def __init__(self, *args): | |
130 | + this = _morfeusz2.new_InterpsList(*args) | |
131 | + try: self.this.append(this) | |
132 | + except: self.this = this | |
133 | + def push_back(self, *args): return _morfeusz2.InterpsList_push_back(self, *args) | |
134 | + def front(self): return _morfeusz2.InterpsList_front(self) | |
135 | + def back(self): return _morfeusz2.InterpsList_back(self) | |
136 | + def assign(self, *args): return _morfeusz2.InterpsList_assign(self, *args) | |
137 | + def resize(self, *args): return _morfeusz2.InterpsList_resize(self, *args) | |
138 | + def insert(self, *args): return _morfeusz2.InterpsList_insert(self, *args) | |
139 | + def reserve(self, *args): return _morfeusz2.InterpsList_reserve(self, *args) | |
140 | + def capacity(self): return _morfeusz2.InterpsList_capacity(self) | |
141 | + __swig_destroy__ = _morfeusz2.delete_InterpsList | |
142 | + __del__ = lambda self : None; | |
143 | +InterpsList_swigregister = _morfeusz2.InterpsList_swigregister | |
144 | +InterpsList_swigregister(InterpsList) | |
145 | + | |
146 | +class StringsList(_object): | |
147 | + __swig_setmethods__ = {} | |
148 | + __setattr__ = lambda self, name, value: _swig_setattr(self, StringsList, name, value) | |
149 | + __swig_getmethods__ = {} | |
150 | + __getattr__ = lambda self, name: _swig_getattr(self, StringsList, name) | |
151 | + __repr__ = _swig_repr | |
152 | + def iterator(self): return _morfeusz2.StringsList_iterator(self) | |
153 | + def __iter__(self): return self.iterator() | |
154 | + def __nonzero__(self): return _morfeusz2.StringsList___nonzero__(self) | |
155 | + def __bool__(self): return _morfeusz2.StringsList___bool__(self) | |
156 | + def __len__(self): return _morfeusz2.StringsList___len__(self) | |
157 | + def pop(self): return _morfeusz2.StringsList_pop(self) | |
158 | + def __getslice__(self, *args): return _morfeusz2.StringsList___getslice__(self, *args) | |
159 | + def __setslice__(self, *args): return _morfeusz2.StringsList___setslice__(self, *args) | |
160 | + def __delslice__(self, *args): return _morfeusz2.StringsList___delslice__(self, *args) | |
161 | + def __delitem__(self, *args): return _morfeusz2.StringsList___delitem__(self, *args) | |
162 | + def __getitem__(self, *args): return _morfeusz2.StringsList___getitem__(self, *args) | |
163 | + def __setitem__(self, *args): return _morfeusz2.StringsList___setitem__(self, *args) | |
164 | + def append(self, *args): return _morfeusz2.StringsList_append(self, *args) | |
165 | + def empty(self): return _morfeusz2.StringsList_empty(self) | |
166 | + def size(self): return _morfeusz2.StringsList_size(self) | |
167 | + def clear(self): return _morfeusz2.StringsList_clear(self) | |
168 | + def swap(self, *args): return _morfeusz2.StringsList_swap(self, *args) | |
169 | + def get_allocator(self): return _morfeusz2.StringsList_get_allocator(self) | |
170 | + def begin(self): return _morfeusz2.StringsList_begin(self) | |
171 | + def end(self): return _morfeusz2.StringsList_end(self) | |
172 | + def rbegin(self): return _morfeusz2.StringsList_rbegin(self) | |
173 | + def rend(self): return _morfeusz2.StringsList_rend(self) | |
174 | + def pop_back(self): return _morfeusz2.StringsList_pop_back(self) | |
175 | + def erase(self, *args): return _morfeusz2.StringsList_erase(self, *args) | |
176 | + def __init__(self, *args): | |
177 | + this = _morfeusz2.new_StringsList(*args) | |
178 | + try: self.this.append(this) | |
179 | + except: self.this = this | |
180 | + def push_back(self, *args): return _morfeusz2.StringsList_push_back(self, *args) | |
181 | + def front(self): return _morfeusz2.StringsList_front(self) | |
182 | + def back(self): return _morfeusz2.StringsList_back(self) | |
183 | + def assign(self, *args): return _morfeusz2.StringsList_assign(self, *args) | |
184 | + def resize(self, *args): return _morfeusz2.StringsList_resize(self, *args) | |
185 | + def insert(self, *args): return _morfeusz2.StringsList_insert(self, *args) | |
186 | + def reserve(self, *args): return _morfeusz2.StringsList_reserve(self, *args) | |
187 | + def capacity(self): return _morfeusz2.StringsList_capacity(self) | |
188 | + __swig_destroy__ = _morfeusz2.delete_StringsList | |
189 | + __del__ = lambda self : None; | |
190 | +StringsList_swigregister = _morfeusz2.StringsList_swigregister | |
191 | +StringsList_swigregister(StringsList) | |
192 | + | |
193 | +class StringsLinkedList(_object): | |
194 | + __swig_setmethods__ = {} | |
195 | + __setattr__ = lambda self, name, value: _swig_setattr(self, StringsLinkedList, name, value) | |
196 | + __swig_getmethods__ = {} | |
197 | + __getattr__ = lambda self, name: _swig_getattr(self, StringsLinkedList, name) | |
198 | + __repr__ = _swig_repr | |
199 | + def iterator(self): return _morfeusz2.StringsLinkedList_iterator(self) | |
200 | + def __iter__(self): return self.iterator() | |
201 | + def __nonzero__(self): return _morfeusz2.StringsLinkedList___nonzero__(self) | |
202 | + def __bool__(self): return _morfeusz2.StringsLinkedList___bool__(self) | |
203 | + def __len__(self): return _morfeusz2.StringsLinkedList___len__(self) | |
204 | + def pop(self): return _morfeusz2.StringsLinkedList_pop(self) | |
205 | + def __getslice__(self, *args): return _morfeusz2.StringsLinkedList___getslice__(self, *args) | |
206 | + def __setslice__(self, *args): return _morfeusz2.StringsLinkedList___setslice__(self, *args) | |
207 | + def __delslice__(self, *args): return _morfeusz2.StringsLinkedList___delslice__(self, *args) | |
208 | + def __delitem__(self, *args): return _morfeusz2.StringsLinkedList___delitem__(self, *args) | |
209 | + def __getitem__(self, *args): return _morfeusz2.StringsLinkedList___getitem__(self, *args) | |
210 | + def __setitem__(self, *args): return _morfeusz2.StringsLinkedList___setitem__(self, *args) | |
211 | + def append(self, *args): return _morfeusz2.StringsLinkedList_append(self, *args) | |
212 | + def empty(self): return _morfeusz2.StringsLinkedList_empty(self) | |
213 | + def size(self): return _morfeusz2.StringsLinkedList_size(self) | |
214 | + def clear(self): return _morfeusz2.StringsLinkedList_clear(self) | |
215 | + def swap(self, *args): return _morfeusz2.StringsLinkedList_swap(self, *args) | |
216 | + def get_allocator(self): return _morfeusz2.StringsLinkedList_get_allocator(self) | |
217 | + def begin(self): return _morfeusz2.StringsLinkedList_begin(self) | |
218 | + def end(self): return _morfeusz2.StringsLinkedList_end(self) | |
219 | + def rbegin(self): return _morfeusz2.StringsLinkedList_rbegin(self) | |
220 | + def rend(self): return _morfeusz2.StringsLinkedList_rend(self) | |
221 | + def pop_back(self): return _morfeusz2.StringsLinkedList_pop_back(self) | |
222 | + def erase(self, *args): return _morfeusz2.StringsLinkedList_erase(self, *args) | |
223 | + def __init__(self, *args): | |
224 | + this = _morfeusz2.new_StringsLinkedList(*args) | |
225 | + try: self.this.append(this) | |
226 | + except: self.this = this | |
227 | + def push_back(self, *args): return _morfeusz2.StringsLinkedList_push_back(self, *args) | |
228 | + def front(self): return _morfeusz2.StringsLinkedList_front(self) | |
229 | + def back(self): return _morfeusz2.StringsLinkedList_back(self) | |
230 | + def assign(self, *args): return _morfeusz2.StringsLinkedList_assign(self, *args) | |
231 | + def resize(self, *args): return _morfeusz2.StringsLinkedList_resize(self, *args) | |
232 | + def insert(self, *args): return _morfeusz2.StringsLinkedList_insert(self, *args) | |
233 | + def pop_front(self): return _morfeusz2.StringsLinkedList_pop_front(self) | |
234 | + def push_front(self, *args): return _morfeusz2.StringsLinkedList_push_front(self, *args) | |
235 | + def reverse(self): return _morfeusz2.StringsLinkedList_reverse(self) | |
236 | + __swig_destroy__ = _morfeusz2.delete_StringsLinkedList | |
237 | + __del__ = lambda self : None; | |
238 | +StringsLinkedList_swigregister = _morfeusz2.StringsLinkedList_swigregister | |
239 | +StringsLinkedList_swigregister(StringsLinkedList) | |
240 | + | |
241 | +class StringsSet(_object): | |
242 | + __swig_setmethods__ = {} | |
243 | + __setattr__ = lambda self, name, value: _swig_setattr(self, StringsSet, name, value) | |
244 | + __swig_getmethods__ = {} | |
245 | + __getattr__ = lambda self, name: _swig_getattr(self, StringsSet, name) | |
246 | + __repr__ = _swig_repr | |
247 | + def iterator(self): return _morfeusz2.StringsSet_iterator(self) | |
248 | + def __iter__(self): return self.iterator() | |
249 | + def __nonzero__(self): return _morfeusz2.StringsSet___nonzero__(self) | |
250 | + def __bool__(self): return _morfeusz2.StringsSet___bool__(self) | |
251 | + def __len__(self): return _morfeusz2.StringsSet___len__(self) | |
252 | + def append(self, *args): return _morfeusz2.StringsSet_append(self, *args) | |
253 | + def __contains__(self, *args): return _morfeusz2.StringsSet___contains__(self, *args) | |
254 | + def __getitem__(self, *args): return _morfeusz2.StringsSet___getitem__(self, *args) | |
255 | + def __init__(self, *args): | |
256 | + this = _morfeusz2.new_StringsSet(*args) | |
257 | + try: self.this.append(this) | |
258 | + except: self.this = this | |
259 | + def empty(self): return _morfeusz2.StringsSet_empty(self) | |
260 | + def size(self): return _morfeusz2.StringsSet_size(self) | |
261 | + def clear(self): return _morfeusz2.StringsSet_clear(self) | |
262 | + def swap(self, *args): return _morfeusz2.StringsSet_swap(self, *args) | |
263 | + def count(self, *args): return _morfeusz2.StringsSet_count(self, *args) | |
264 | + def begin(self): return _morfeusz2.StringsSet_begin(self) | |
265 | + def end(self): return _morfeusz2.StringsSet_end(self) | |
266 | + def rbegin(self): return _morfeusz2.StringsSet_rbegin(self) | |
267 | + def rend(self): return _morfeusz2.StringsSet_rend(self) | |
268 | + def erase(self, *args): return _morfeusz2.StringsSet_erase(self, *args) | |
269 | + def find(self, *args): return _morfeusz2.StringsSet_find(self, *args) | |
270 | + def lower_bound(self, *args): return _morfeusz2.StringsSet_lower_bound(self, *args) | |
271 | + def upper_bound(self, *args): return _morfeusz2.StringsSet_upper_bound(self, *args) | |
272 | + def equal_range(self, *args): return _morfeusz2.StringsSet_equal_range(self, *args) | |
273 | + def insert(self, *args): return _morfeusz2.StringsSet_insert(self, *args) | |
274 | + __swig_destroy__ = _morfeusz2.delete_StringsSet | |
275 | + __del__ = lambda self : None; | |
276 | +StringsSet_swigregister = _morfeusz2.StringsSet_swigregister | |
277 | +StringsSet_swigregister(StringsSet) | |
278 | + | |
279 | +SEPARATE_NUMBERING = _morfeusz2.SEPARATE_NUMBERING | |
280 | +CONTINUOUS_NUMBERING = _morfeusz2.CONTINUOUS_NUMBERING | |
281 | +CONDITIONALLY_CASE_SENSITIVE = _morfeusz2.CONDITIONALLY_CASE_SENSITIVE | |
282 | +STRICTLY_CASE_SENSITIVE = _morfeusz2.STRICTLY_CASE_SENSITIVE | |
283 | +IGNORE_CASE = _morfeusz2.IGNORE_CASE | |
284 | +SKIP_WHITESPACES = _morfeusz2.SKIP_WHITESPACES | |
285 | +APPEND_WHITESPACES = _morfeusz2.APPEND_WHITESPACES | |
286 | +KEEP_WHITESPACES = _morfeusz2.KEEP_WHITESPACES | |
287 | +ANALYSE_ONLY = _morfeusz2.ANALYSE_ONLY | |
288 | +GENERATE_ONLY = _morfeusz2.GENERATE_ONLY | |
289 | +BOTH_ANALYSE_AND_GENERATE = _morfeusz2.BOTH_ANALYSE_AND_GENERATE | |
290 | +class Morfeusz(_object): | |
291 | + __swig_setmethods__ = {} | |
292 | + __setattr__ = lambda self, name, value: _swig_setattr(self, Morfeusz, name, value) | |
293 | + __swig_getmethods__ = {} | |
294 | + __getattr__ = lambda self, name: _swig_getattr(self, Morfeusz, name) | |
295 | + def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract") | |
296 | + __repr__ = _swig_repr | |
297 | + __swig_getmethods__["getVersion"] = lambda x: _morfeusz2.Morfeusz_getVersion | |
298 | + if _newclass:getVersion = staticmethod(_morfeusz2.Morfeusz_getVersion) | |
299 | + __swig_getmethods__["getDefaultDictName"] = lambda x: _morfeusz2.Morfeusz_getDefaultDictName | |
300 | + if _newclass:getDefaultDictName = staticmethod(_morfeusz2.Morfeusz_getDefaultDictName) | |
301 | + __swig_getmethods__["getCopyright"] = lambda x: _morfeusz2.Morfeusz_getCopyright | |
302 | + if _newclass:getCopyright = staticmethod(_morfeusz2.Morfeusz_getCopyright) | |
303 | + __swig_getmethods__["createInstance"] = lambda x: _morfeusz2.Morfeusz_createInstance | |
304 | + if _newclass:createInstance = staticmethod(_morfeusz2.Morfeusz_createInstance) | |
305 | + def getDictID(self): return _morfeusz2.Morfeusz_getDictID(self) | |
306 | + def getDictCopyright(self): return _morfeusz2.Morfeusz_getDictCopyright(self) | |
307 | + def clone(self): return _morfeusz2.Morfeusz_clone(self) | |
308 | + __swig_destroy__ = _morfeusz2.delete_Morfeusz | |
309 | + __del__ = lambda self : None; | |
310 | + def analyse(self, text): | |
311 | + """ | |
312 | + Analyse given text and return a list of MorphInterpretation objects. | |
313 | + """ | |
314 | + res = InterpsList() | |
315 | + _morfeusz2.Morfeusz_analyse(self, text, res) | |
316 | + return res | |
317 | + | |
318 | + | |
319 | + def generate(self, lemma, tagId=None): | |
320 | + """ | |
321 | + Perform morphological synthesis on given text and return a list of MorphInterpretation objects. | |
322 | + """ | |
323 | + if tagId is not None: | |
324 | + return self._generateByTagId(lemma, tagId) | |
325 | + else: | |
326 | + res = InterpsList() | |
327 | + _morfeusz2.Morfeusz_generate(self, lemma, res) | |
328 | + return res | |
329 | + | |
330 | + | |
331 | + def setAggl(self, optionString): | |
332 | + """ | |
333 | + Select agglutination rules option | |
334 | + """ | |
335 | + _morfeusz2.Morfeusz_setAggl(self, optionString.encode('utf8')) | |
336 | + | |
337 | + | |
338 | + def getAggl(self): return _morfeusz2.Morfeusz_getAggl(self) | |
339 | + def setPraet(self, optionString): | |
340 | + """ | |
341 | + Select past tense segmentation | |
342 | + """ | |
343 | + _morfeusz2.Morfeusz_setPraet(self, optionString.encode('utf8')) | |
344 | + | |
345 | + | |
346 | + def getPraet(self): return _morfeusz2.Morfeusz_getPraet(self) | |
347 | + def setCaseHandling(self, option): | |
348 | + """ | |
349 | + Set case handling option (valid options are CONDITIONALLY_CASE_SENSITIVE, STRICTLY_CASE_SENSITIVE, IGNORE_CASE) | |
350 | + """ | |
351 | + _morfeusz2.Morfeusz_setCaseHandling(self, option) | |
352 | + | |
353 | + | |
354 | + def getCaseHandling(self): return _morfeusz2.Morfeusz_getCaseHandling(self) | |
355 | + def setTokenNumbering(self, option): | |
356 | + """ | |
357 | + Set token numbering option (valid options are SEPARATE_NUMBERING, CONTINUOUS_NUMBERING) | |
358 | + """ | |
359 | + _morfeusz2.Morfeusz_setTokenNumbering(self, option) | |
360 | + | |
361 | + | |
362 | + def getTokenNumbering(self): return _morfeusz2.Morfeusz_getTokenNumbering(self) | |
363 | + def setWhitespaceHandling(self, option): | |
364 | + """ | |
365 | + Set whitespace handling handling option (valid options are SKIP_WHITESPACES, KEEP_WHITESPACES, APPEND_WHITESPACES) | |
366 | + """ | |
367 | + _morfeusz2.Morfeusz_setWhitespaceHandling(self, option) | |
368 | + | |
369 | + | |
370 | + def getWhitespaceHandling(self): return _morfeusz2.Morfeusz_getWhitespaceHandling(self) | |
371 | + def getIdResolver(self): return _morfeusz2.Morfeusz_getIdResolver(self) | |
372 | + def setDictionary(self, dictName): | |
373 | + """ | |
374 | + Set dictionary to be used by this instance (by name) | |
375 | + """ | |
376 | + _morfeusz2.Morfeusz_setDictionary(self, dictName.encode('utf8')) | |
377 | + | |
378 | + | |
379 | + __swig_setmethods__["dictionarySearchPaths"] = _morfeusz2.Morfeusz_dictionarySearchPaths_set | |
380 | + __swig_getmethods__["dictionarySearchPaths"] = _morfeusz2.Morfeusz_dictionarySearchPaths_get | |
381 | + if _newclass:dictionarySearchPaths = _swig_property(_morfeusz2.Morfeusz_dictionarySearchPaths_get, _morfeusz2.Morfeusz_dictionarySearchPaths_set) | |
382 | + def getAvailableAgglOptions(self): return _morfeusz2.Morfeusz_getAvailableAgglOptions(self) | |
383 | + def getAvailablePraetOptions(self): return _morfeusz2.Morfeusz_getAvailablePraetOptions(self) | |
384 | + def _generateByTagId(self, *args): return _morfeusz2.Morfeusz__generateByTagId(self, *args) | |
385 | + def analyse_iter(self, text): | |
386 | + """ | |
387 | + Analyse given text and return an iterator over MorphInterpretation objects as a result. | |
388 | + """ | |
389 | + return _morfeusz2.Morfeusz__analyseAsIterator(self, text) | |
390 | + | |
391 | + | |
392 | +Morfeusz_swigregister = _morfeusz2.Morfeusz_swigregister | |
393 | +Morfeusz_swigregister(Morfeusz) | |
394 | + | |
395 | +def Morfeusz_getVersion(): | |
396 | + return _morfeusz2.Morfeusz_getVersion() | |
397 | +Morfeusz_getVersion = _morfeusz2.Morfeusz_getVersion | |
398 | + | |
399 | +def Morfeusz_getDefaultDictName(): | |
400 | + return _morfeusz2.Morfeusz_getDefaultDictName() | |
401 | +Morfeusz_getDefaultDictName = _morfeusz2.Morfeusz_getDefaultDictName | |
402 | + | |
403 | +def Morfeusz_getCopyright(): | |
404 | + return _morfeusz2.Morfeusz_getCopyright() | |
405 | +Morfeusz_getCopyright = _morfeusz2.Morfeusz_getCopyright | |
406 | + | |
407 | +def Morfeusz_createInstance(*args): | |
408 | + return _morfeusz2.Morfeusz_createInstance(*args) | |
409 | +Morfeusz_createInstance = _morfeusz2.Morfeusz_createInstance | |
410 | +cvar = _morfeusz2.cvar | |
411 | + | |
412 | +class ResultsIterator(_object): | |
413 | + __swig_setmethods__ = {} | |
414 | + __setattr__ = lambda self, name, value: _swig_setattr(self, ResultsIterator, name, value) | |
415 | + __swig_getmethods__ = {} | |
416 | + __getattr__ = lambda self, name: _swig_getattr(self, ResultsIterator, name) | |
417 | + def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract") | |
418 | + __repr__ = _swig_repr | |
419 | + def hasNext(self): return _morfeusz2.ResultsIterator_hasNext(self) | |
420 | + def peek(self): return _morfeusz2.ResultsIterator_peek(self) | |
421 | + def next(self): | |
422 | + if self.hasNext(): | |
423 | + return _morfeusz2.ResultsIterator_next(self) | |
424 | + else: | |
425 | + raise StopIteration | |
426 | + | |
427 | + | |
428 | + __swig_destroy__ = _morfeusz2.delete_ResultsIterator | |
429 | + __del__ = lambda self : None; | |
430 | + def __iter__(self): return _morfeusz2.ResultsIterator___iter__(self) | |
431 | +ResultsIterator_swigregister = _morfeusz2.ResultsIterator_swigregister | |
432 | +ResultsIterator_swigregister(ResultsIterator) | |
433 | + | |
434 | +class IdResolver(_object): | |
435 | + __swig_setmethods__ = {} | |
436 | + __setattr__ = lambda self, name, value: _swig_setattr(self, IdResolver, name, value) | |
437 | + __swig_getmethods__ = {} | |
438 | + __getattr__ = lambda self, name: _swig_getattr(self, IdResolver, name) | |
439 | + def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract") | |
440 | + __repr__ = _swig_repr | |
441 | + def getTagsetId(self): return _morfeusz2.IdResolver_getTagsetId(self) | |
442 | + def getTag(self, tagId): | |
443 | + return _morfeusz2.IdResolver_getTag(self, tagId).decode('utf8') | |
444 | + | |
445 | + | |
446 | + def getTagId(self, tag): | |
447 | + return _morfeusz2.IdResolver_getTagId(self, tag.encode('utf8')) | |
448 | + | |
449 | + | |
450 | + def getName(self, nameId): | |
451 | + return _morfeusz2.IdResolver_getName(self, nameId).decode('utf8') | |
452 | + | |
453 | + | |
454 | + def getNameId(self, name): | |
455 | + return _morfeusz2.IdResolver_getNameId(self, name.encode('utf8')) | |
456 | + | |
457 | + | |
458 | + def getLabelsAsUnicode(self, labelsId): | |
459 | + return _morfeusz2.IdResolver_getLabelsAsString(self, labelsId).decode('utf8') | |
460 | + | |
461 | + | |
462 | + def getLabels(self, labelsId): | |
463 | + return { l.decode('utf8') for l in _morfeusz2.IdResolver_getLabels(self, labelsId) } | |
464 | + | |
465 | + | |
466 | + def getLabelsId(self, labelsStr): | |
467 | + return _morfeusz2.IdResolver_getLabelsId(self, labelsStr.encode('utf8')) | |
468 | + | |
469 | + | |
470 | + def getTagsCount(self): return _morfeusz2.IdResolver_getTagsCount(self) | |
471 | + def getNamesCount(self): return _morfeusz2.IdResolver_getNamesCount(self) | |
472 | + def getLabelsCount(self): return _morfeusz2.IdResolver_getLabelsCount(self) | |
473 | + __swig_destroy__ = _morfeusz2.delete_IdResolver | |
474 | + __del__ = lambda self : None; | |
475 | +IdResolver_swigregister = _morfeusz2.IdResolver_swigregister | |
476 | +IdResolver_swigregister(IdResolver) | |
477 | + | |
478 | +class MorphInterpretation(_object): | |
479 | + __swig_setmethods__ = {} | |
480 | + __setattr__ = lambda self, name, value: _swig_setattr(self, MorphInterpretation, name, value) | |
481 | + __swig_getmethods__ = {} | |
482 | + __getattr__ = lambda self, name: _swig_getattr(self, MorphInterpretation, name) | |
483 | + __repr__ = _swig_repr | |
484 | + def __init__(self): | |
485 | + this = _morfeusz2.new_MorphInterpretation() | |
486 | + try: self.this.append(this) | |
487 | + except: self.this = this | |
488 | + __swig_getmethods__["createIgn"] = lambda x: _morfeusz2.MorphInterpretation_createIgn | |
489 | + if _newclass:createIgn = staticmethod(_morfeusz2.MorphInterpretation_createIgn) | |
490 | + __swig_getmethods__["createWhitespace"] = lambda x: _morfeusz2.MorphInterpretation_createWhitespace | |
491 | + if _newclass:createWhitespace = staticmethod(_morfeusz2.MorphInterpretation_createWhitespace) | |
492 | + def isIgn(self): return _morfeusz2.MorphInterpretation_isIgn(self) | |
493 | + def isWhitespace(self): return _morfeusz2.MorphInterpretation_isWhitespace(self) | |
494 | + def getTag(self, morfeusz): | |
495 | + """ | |
496 | + Returns tag as string. | |
497 | + """ | |
498 | + return _morfeusz2.MorphInterpretation_getTag(self, morfeusz) | |
499 | + | |
500 | + | |
501 | + def getName(self, morfeusz): | |
502 | + """ | |
503 | + Returns this interpretation named entity as string | |
504 | + """ | |
505 | + return _morfeusz2.MorphInterpretation_getName(self, morfeusz) | |
506 | + | |
507 | + | |
508 | + def getLabelsAsUnicode(self, morfeusz): | |
509 | + """ | |
510 | + Returns this interpretation labels as string | |
511 | + """ | |
512 | + return _morfeusz2.MorphInterpretation_getLabelsAsString(self, morfeusz).decode('utf8') | |
513 | + | |
514 | + | |
515 | + def getLabels(self, morfeusz): | |
516 | + """ | |
517 | + Returns this interpretation labels as a set of strings | |
518 | + """ | |
519 | + return { l.decode('utf8') for l in _morfeusz2.MorphInterpretation_getLabels(self, morfeusz) } | |
520 | + | |
521 | + | |
522 | + __swig_setmethods__["startNode"] = _morfeusz2.MorphInterpretation_startNode_set | |
523 | + __swig_getmethods__["startNode"] = _morfeusz2.MorphInterpretation_startNode_get | |
524 | + if _newclass:startNode = _swig_property(_morfeusz2.MorphInterpretation_startNode_get, _morfeusz2.MorphInterpretation_startNode_set) | |
525 | + __swig_setmethods__["endNode"] = _morfeusz2.MorphInterpretation_endNode_set | |
526 | + __swig_getmethods__["endNode"] = _morfeusz2.MorphInterpretation_endNode_get | |
527 | + if _newclass:endNode = _swig_property(_morfeusz2.MorphInterpretation_endNode_get, _morfeusz2.MorphInterpretation_endNode_set) | |
528 | + __swig_setmethods__["_orth"] = _morfeusz2.MorphInterpretation__orth_set | |
529 | + __swig_getmethods__["_orth"] = _morfeusz2.MorphInterpretation__orth_get | |
530 | + if _newclass:_orth = _swig_property(_morfeusz2.MorphInterpretation__orth_get, _morfeusz2.MorphInterpretation__orth_set) | |
531 | + __swig_setmethods__["_lemma"] = _morfeusz2.MorphInterpretation__lemma_set | |
532 | + __swig_getmethods__["_lemma"] = _morfeusz2.MorphInterpretation__lemma_get | |
533 | + if _newclass:_lemma = _swig_property(_morfeusz2.MorphInterpretation__lemma_get, _morfeusz2.MorphInterpretation__lemma_set) | |
534 | + __swig_setmethods__["tagId"] = _morfeusz2.MorphInterpretation_tagId_set | |
535 | + __swig_getmethods__["tagId"] = _morfeusz2.MorphInterpretation_tagId_get | |
536 | + if _newclass:tagId = _swig_property(_morfeusz2.MorphInterpretation_tagId_get, _morfeusz2.MorphInterpretation_tagId_set) | |
537 | + __swig_setmethods__["nameId"] = _morfeusz2.MorphInterpretation_nameId_set | |
538 | + __swig_getmethods__["nameId"] = _morfeusz2.MorphInterpretation_nameId_get | |
539 | + if _newclass:nameId = _swig_property(_morfeusz2.MorphInterpretation_nameId_get, _morfeusz2.MorphInterpretation_nameId_set) | |
540 | + __swig_setmethods__["labelsId"] = _morfeusz2.MorphInterpretation_labelsId_set | |
541 | + __swig_getmethods__["labelsId"] = _morfeusz2.MorphInterpretation_labelsId_get | |
542 | + if _newclass:labelsId = _swig_property(_morfeusz2.MorphInterpretation_labelsId_get, _morfeusz2.MorphInterpretation_labelsId_set) | |
543 | + @property | |
544 | + def orth(self): | |
545 | + return self._orth.decode('utf8') | |
546 | + | |
547 | + @orth.setter | |
548 | + def orth(self, val): | |
549 | + self._orth = val.encode('utf8') | |
550 | + | |
551 | + @property | |
552 | + def lemma(self): | |
553 | + return self._lemma.decode('utf8') | |
554 | + | |
555 | + @lemma.setter | |
556 | + def lemma(self, val): | |
557 | + self._lemma = val.encode('utf8') | |
558 | + | |
559 | + __swig_destroy__ = _morfeusz2.delete_MorphInterpretation | |
560 | + __del__ = lambda self : None; | |
561 | +MorphInterpretation_swigregister = _morfeusz2.MorphInterpretation_swigregister | |
562 | +MorphInterpretation_swigregister(MorphInterpretation) | |
563 | + | |
564 | +def MorphInterpretation_createIgn(*args): | |
565 | + return _morfeusz2.MorphInterpretation_createIgn(*args) | |
566 | +MorphInterpretation_createIgn = _morfeusz2.MorphInterpretation_createIgn | |
567 | + | |
568 | +def MorphInterpretation_createWhitespace(*args): | |
569 | + return _morfeusz2.MorphInterpretation_createWhitespace(*args) | |
570 | +MorphInterpretation_createWhitespace = _morfeusz2.MorphInterpretation_createWhitespace | |
571 | + | |
572 | +# This file is compatible with both classic and new-style classes. | |
573 | + | |
574 | + | |
... | ... |
dictionary/ajax_argument_form.py
... | ... | @@ -27,7 +27,6 @@ from django.db.models import Count, Q, Sum |
27 | 27 | from accounts.models import can_modify_phraseology_only |
28 | 28 | from common.decorators import render, ajax, AjaxError |
29 | 29 | from common.js_to_obj import jsArgToObj |
30 | -from common.morfeusz import analyse | |
31 | 30 | from dictionary.forms import AddArgumentForm, ArgPropositionsForm, Atribute_Model, \ |
32 | 31 | AtributeChoiceForm, AtributeTextForm, ValueAttrMultiValueForm, \ |
33 | 32 | TextAttrMultiValueForm, PositionsForm, SelectArgumentForm, \ |
... | ... | @@ -38,6 +37,7 @@ from dictionary.models import Argument, Argument_Model, Atribute, Atribute_Value |
38 | 37 | sortatributes, sortPositions, sortArguments, get_or_create_attr_parameter, \ |
39 | 38 | get_attr_models_to_exclude, is_morfeusz_exception, get_or_create_attribute, \ |
40 | 39 | get_or_create_parameter_attr_value |
40 | +from settings import MORFEUSZ2 | |
41 | 41 | |
42 | 42 | @render('argument_form.html') |
43 | 43 | @ajax(method='post', encode_result=False) |
... | ... | @@ -317,7 +317,6 @@ def create_type_form(pos, arg_model, phraseologic_modification=False, |
317 | 317 | def create_attributes_forms(pos, arg_model, subforms_values, form_type='standard'): |
318 | 318 | sheets = [] |
319 | 319 | attribute_models = get_attribute_models(arg_model, subforms_values) |
320 | - #attr_values_to_exclude = get_attr_values_to_exclude(arg_model) | |
321 | 320 | param_models_to_exclude = get_parameter_types_to_exclude(arg_model) |
322 | 321 | for i in range(len(attribute_models)): |
323 | 322 | attr_form_values = [] |
... | ... | @@ -488,8 +487,8 @@ def get_parameter_types_to_exclude(arg_model): |
488 | 487 | def is_correct_lemma(argument_model, attribute_model, lemma): |
489 | 488 | correct_form = False |
490 | 489 | possible_pos_tags = argument_model.get_possible_lemma_tags(attribute_model) |
491 | - for interp in analyse(lemma): | |
492 | - if (is_single_word(interp) and base_form_correct(interp, lemma) and | |
490 | + for interp in MORFEUSZ2.analyse(lemma.encode('utf8')): | |
491 | + if (base_form_correct(interp, lemma) and | |
493 | 492 | pos_tag_correct(interp, possible_pos_tags)): |
494 | 493 | correct_form = True |
495 | 494 | break |
... | ... | @@ -501,18 +500,13 @@ def is_correct_lemma(argument_model, attribute_model, lemma): |
501 | 500 | break |
502 | 501 | return correct_form |
503 | 502 | |
504 | -def is_single_word(interp): | |
505 | - if len(interp) == 1: | |
506 | - return True | |
507 | - return False | |
508 | - | |
509 | 503 | def base_form_correct(interp, lemma): |
510 | - if interp[0][1] == lemma: | |
504 | + if interp.lemma == lemma: | |
511 | 505 | return True |
512 | 506 | return False |
513 | 507 | |
514 | 508 | def pos_tag_correct(interp, possible_pos_tags): |
515 | - tagstr = interp[0][2] | |
509 | + tagstr = interp.getTag(MORFEUSZ2) | |
516 | 510 | pos_tag = tagstr.split(':')[0] |
517 | 511 | if possible_pos_tags.filter(name=pos_tag).exists(): |
518 | 512 | return True |
... | ... | @@ -520,16 +514,18 @@ def pos_tag_correct(interp, possible_pos_tags): |
520 | 514 | |
521 | 515 | def contains_separator(lemma): |
522 | 516 | contains_separator = False |
523 | - for interp in analyse(lemma): | |
524 | - if len(interp) > 1: | |
517 | + results_iter = MORFEUSZ2.analyse_iter(lemma.encode('utf8')) | |
518 | + while results_iter.hasNext(): | |
519 | + if results_iter.peek().orth != lemma: | |
525 | 520 | contains_separator = True |
526 | 521 | break |
522 | + results_iter.next() | |
527 | 523 | return contains_separator |
528 | 524 | |
529 | 525 | def is_preposition_case_pair_valid(preposition_obj, case_obj): |
530 | 526 | # postp is used by prepadjp |
531 | 527 | case_str = unicode(case_obj) |
532 | - prep_str = unicode(preposition_obj) | |
528 | + prep_str = unicode(preposition_obj).split()[-1] | |
533 | 529 | if case_str != 'postp': |
534 | 530 | # str is used by prepadjp |
535 | 531 | if case_str == 'str': |
... | ... | @@ -537,12 +533,14 @@ def is_preposition_case_pair_valid(preposition_obj, case_obj): |
537 | 533 | else: |
538 | 534 | pcase = [case_str] |
539 | 535 | for case in pcase: |
540 | - for interp in analyse(prep_str): | |
541 | - for token in interp: | |
542 | - tag = token[2].split(':') | |
543 | - if tag[0] == 'prep': | |
544 | - if tag[1] == case: | |
545 | - return True | |
536 | + for interp in MORFEUSZ2.analyse(prep_str.encode('utf8')): | |
537 | + tagstr = interp.getTag(MORFEUSZ2) | |
538 | + tag_parts = tagstr.split(':') | |
539 | + if len(tag_parts) > 1: | |
540 | + interp_pos = tag_parts[0] | |
541 | + interp_case = tag_parts[1] | |
542 | + if interp_pos == 'prep' and interp_case == case: | |
543 | + return True | |
546 | 544 | return False |
547 | 545 | return True |
548 | 546 | |
... | ... |
dictionary/ajax_lemma_view.py
... | ... | @@ -85,7 +85,6 @@ from accounts.models import UserSettings, RealizedLemma, UserStats, RealizedPhra |
85 | 85 | can_modify_phraseology_only, get_anon_profile |
86 | 86 | |
87 | 87 | from ajax_jqgrid import JqGridAjax, default_sort_rules, default_filter_rules |
88 | -from common.morfeusz import analyse | |
89 | 88 | |
90 | 89 | import locale |
91 | 90 | from functools import cmp_to_key |
... | ... |
dictionary/validation.py
... | ... | @@ -27,13 +27,13 @@ import operator |
27 | 27 | from django.db.models import Sum, Q |
28 | 28 | |
29 | 29 | from common.js_to_obj import frameObjToSerializableDict |
30 | -from common.morfeusz import analyse | |
31 | 30 | from dictionary.common_func import subframe_exists |
32 | 31 | from dictionary.convert_frames import frame_conversion |
33 | 32 | from dictionary.forms import FrameAspectForm, FrameOpinionForm |
34 | 33 | from dictionary.models import Argument_Model, AspectRelationsGroup, Frame_Char_Model, \ |
35 | 34 | Frame_Characteristic, Lemma, Position, PositionCategory, \ |
36 | 35 | get_attribute_models, sortArguments, sortPositions |
36 | +from settings import MORFEUSZ2 | |
37 | 37 | |
38 | 38 | def get_wrong_aspect_frames(lemma, frames): |
39 | 39 | wrong_aspect_frames = [] |
... | ... | @@ -47,18 +47,18 @@ def check_aspect(lemma, frame): |
47 | 47 | inf_present = False |
48 | 48 | frame_aspect_obj = frame.characteristics.get(type=u'ASPEKT') |
49 | 49 | frame_aspect = frame_aspect_obj.value.value |
50 | - interps = analyse(lemma.entry) | |
51 | - if interps[0][0][1] and frame_aspect != '_': | |
50 | + interps = MORFEUSZ2.analyse(lemma.entry.encode('utf8')) | |
51 | + if frame_aspect != '_': | |
52 | 52 | for interp in interps: |
53 | - for token in interp: | |
54 | - tag = token[2].split(':') | |
55 | - if tag[0] == 'inf': | |
56 | - inf_present = True | |
57 | - if tag[1] == frame_aspect: | |
58 | - good_aspect = True | |
59 | - break | |
60 | - if good_aspect: | |
61 | - break | |
53 | + tagstr = interp.getTag(MORFEUSZ2) | |
54 | + tag_parts = tagstr.split(':') | |
55 | + pos = tag_parts[0] | |
56 | + if pos == 'inf': | |
57 | + aspect = tag_parts[1] | |
58 | + inf_present = True | |
59 | + if aspect == frame_aspect: | |
60 | + good_aspect = True | |
61 | + break | |
62 | 62 | if good_aspect or not inf_present: |
63 | 63 | good_aspect = True |
64 | 64 | return good_aspect |
... | ... | @@ -84,13 +84,13 @@ def get_missing_aspects_msg(lemma): |
84 | 84 | |
85 | 85 | def get_possible_aspects(lemma): |
86 | 86 | possible_aspects = [] |
87 | - interps = analyse(lemma.entry) | |
88 | - if interps[0][0][1]: | |
89 | - for interp in interps: | |
90 | - for token in interp: | |
91 | - tag = token[2].split(':') | |
92 | - if tag[0] == 'inf' and tag[1] not in possible_aspects: | |
93 | - possible_aspects.append(tag[1]) | |
87 | + interps = MORFEUSZ2.analyse(lemma.entry.encode('utf8')) | |
88 | + for interp in interps: | |
89 | + tagstr = interp.getTag(MORFEUSZ2) | |
90 | + tag_parts = tagstr.split(':') | |
91 | + pos = tag_parts[0] | |
92 | + if pos == 'inf' and tag_parts[1] not in possible_aspects: | |
93 | + possible_aspects.append(tag_parts[1]) | |
94 | 94 | return possible_aspects |
95 | 95 | |
96 | 96 | def match_arg_poss(arg_poss, frame): |
... | ... | @@ -355,7 +355,7 @@ def prep_check(arg): |
355 | 355 | #if arg.find('prep') != -1 and not arg.startswith('comprepnp('): |
356 | 356 | if arg.startswith('prep'): |
357 | 357 | params = arg[arg.find('(')+1:arg.find(')')].split(',') |
358 | - pform = params[0] | |
358 | + pform = params[0].split()[-1] | |
359 | 359 | pcase = params[1] |
360 | 360 | # postp is used by prepadjp |
361 | 361 | if pcase != 'postp': |
... | ... | @@ -365,12 +365,12 @@ def prep_check(arg): |
365 | 365 | else: |
366 | 366 | pcase = [pcase] |
367 | 367 | for case in pcase: |
368 | - for interp in analyse(pform): | |
369 | - for token in interp: | |
370 | - tag = token[2].split(':') | |
371 | - if tag[0] == 'prep': | |
372 | - if tag[1] == case: | |
373 | - return True | |
368 | + for interp in MORFEUSZ2.analyse(pform.encode('utf8')): | |
369 | + tagstr = interp.getTag(MORFEUSZ2) | |
370 | + tag_parts = tagstr.split(':') | |
371 | + pos = tag_parts[0] | |
372 | + if pos == 'prep' and tag_parts[1] == case: | |
373 | + return True | |
374 | 374 | return False |
375 | 375 | return True |
376 | 376 | |
... | ... |
manage.py