diff --git a/common/_morfeusz2.so b/common/_morfeusz2.so new file mode 100755 index 0000000..0ffdd07 --- /dev/null +++ b/common/_morfeusz2.so diff --git a/common/morfeusz.py b/common/morfeusz.py deleted file mode 100644 index d1a4136..0000000 --- a/common/morfeusz.py +++ /dev/null @@ -1,271 +0,0 @@ -# encoding=UTF-8 - -# Copyright © 2007, 2008, 2010, 2011 Jakub Wilk <jwilk@jwilk.net> -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the “Softwareâ€), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED “AS ISâ€, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -''' -Bindings for Morfeusz_, a Polish morphological analyser. - -.. _Morfeusz: - http://sgjp.pl/morfeusz/ -''' - -from __future__ import with_statement - -import collections -import ctypes -import sys - -py3k = sys.version_info >= (3, 0) - -if py3k: - import _thread as thread -else: - import thread -if not py3k: - from itertools import izip as zip - -if py3k: - unicode = str - -__author__ = 'Jakub Wilk <jwilk@jwilk.net>' -__version__ = '0.3300' -__all__ = ['analyse', 'about', 'expand_tags', 'ATTRIBUTES', 'VALUES'] - -ATTRIBUTES = ''' -subst=number case gender -depr=number case gender -adj=number case gender degree -adja= -adjc= -adjp= -adv=degree -num=number case gender accommodability -numcol=number case gender accommodability -ppron12=number case gender person accentability -ppron3=number case gender person accentability post_prepositionality -siebie=case -fin=number person aspect -bedzie=number person aspect -aglt=number person aspect vocalicity -praet=number gender aspect agglutination -impt=number person aspect -imps=aspect -inf=aspect -pcon=aspect -pant=aspect -ger=number case gender aspect negation -pact=number case gender aspect negation -ppas=number case gender aspect negation -winien=number gender aspect -pred= -prep=case vocalicity -conj= -comp= -brev=fullstoppedness -burk= -interj= -qub=vocalicity -xxs=number case gender -xxx= -interp= -ign= -sp= -''' -ATTRIBUTES = \ -dict( - (key, tuple(values.split())) - for line in ATTRIBUTES.splitlines() if line - for (key, values) in (line.split('=', 1),) -) - -VALUES = ''' -number=sg pl -case=nom gen dat acc inst loc voc -gender=m1 m2 m3 f n1 n2 p1 p2 p3 -person=pri sec ter -degree=pos comp sup -aspect=imperf perf -negation=aff neg -accentability=akc nakc -post_prepositionality=npraep praep -accommodability=congr rec -agglutination=agl nagl -vocalicity=nwok wok -fullstoppedness=pun npun -''' -VALUES = \ -dict( - (key, tuple(values.split())) - for line in VALUES.splitlines() if line - for (key, values) in (line.split('=', 1),) -) - -libmorfeusz = ctypes.CDLL('libmorfeusz.so.0') - -MORFOPT_ENCODING = 1 -MORFEUSZ_UTF_8 = 8 - -MORFOPT_WHITESPACE = 2 -MORFEUSZ_SKIP_WHITESPACE = 0 -MORFEUSZ_KEEP_WHITESPACE = 2 - -libmorfeusz.morfeusz_set_option(MORFOPT_ENCODING, MORFEUSZ_UTF_8) -libmorfeusz_lock = thread.allocate_lock() - -class InterpEdge(ctypes.Structure): - _fields_ = \ - ( - ('i', ctypes.c_int), - ('j', ctypes.c_int), - ('_orth', ctypes.c_char_p), - ('_base', ctypes.c_char_p), - ('_tags', ctypes.c_char_p) - ) - - if py3k: - @property - def tags(self): - if self._tags is not None: - return self._tags.decode('UTF-8') - else: - @property - def tags(self): - return self._tags - - @property - def orth(self): - if self._orth is not None: - return self._orth.decode('UTF-8') - - @property - def base(self): - if self._base is not None: - return self._base.decode('UTF-8') - -libmorfeusz_analyse = libmorfeusz.morfeusz_analyse -libmorfeusz_analyse.restype = ctypes.POINTER(InterpEdge) -libmorfeusz_about = libmorfeusz.morfeusz_about -libmorfeusz_about.restype = ctypes.c_char_p - -def expand_tags(tags, expand_dot=True, expand_underscore=True): - - if tags is None: - yield - return - tags = str(tags) - for tag in tags.split('|'): - tag = tag.split(':') - pos = tag.pop(0) - chunks = [(pos,)] - chunks += \ - ( - VALUES[attribute] if chunk == '_' and expand_underscore - else chunk.split('.') - for chunk, attribute in zip(tag, ATTRIBUTES[pos]) - ) - - if not expand_dot: - yield ':'.join('.'.join(values) for values in chunks) - continue - - def expand_chunks(i): - if i >= len(chunks): - yield () - else: - tail = tuple(expand_chunks(i + 1)) - for chunk_variant in chunks[i]: - for tail_variant in tail: - yield (chunk_variant,) + tail_variant - - for x in expand_chunks(0): - yield ':'.join(x) - -_expand_tags = expand_tags - -def _dont_expand_tags(s, **kwargs): - return [s] - -def analyse(text, expand_tags=True, expand_dot=True, expand_underscore=True, dag=False, keep_whitespace=False): - ''' - Analyse the text. - ''' - expand_tags = _expand_tags if expand_tags else _dont_expand_tags - text = unicode(text) - text = text.encode('UTF-8') - analyse = _analyse_as_dag if dag else _analyse_as_list - return analyse( - text=text, - expand_tags=expand_tags, - expand_dot=expand_dot, - expand_underscore=expand_underscore, - keep_whitespace=keep_whitespace - ) - -def _analyse_as_dag(text, expand_tags, expand_dot, expand_underscore, keep_whitespace): - result = [] - with libmorfeusz_lock: - if keep_whitespace: - if libmorfeusz.morfeusz_set_option(MORFOPT_WHITESPACE, MORFEUSZ_KEEP_WHITESPACE) != 1: - raise NotImplementedError("This version of Morfeusz doesn't support keep_whitespace") - for edge in libmorfeusz_analyse(text): - if edge.i == -1: - break - for tag in expand_tags(edge.tags, expand_dot=expand_dot, expand_underscore=expand_underscore): - result += [(edge.i, edge.j, (edge.orth, edge.base, tag))] - if keep_whitespace: - libmorfeusz.morfeusz_set_option(MORFOPT_WHITESPACE, MORFEUSZ_SKIP_WHITESPACE) - return result - -def _analyse_as_list(text, expand_tags, expand_dot, expand_underscore, keep_whitespace): - dag = collections.defaultdict(list) - with libmorfeusz_lock: - if keep_whitespace: - if libmorfeusz.morfeusz_set_option(MORFOPT_WHITESPACE, MORFEUSZ_KEEP_WHITESPACE) != 1: - raise NotImplementedError("This version of Morfeusz doesn't support keep_whitespace") - for edge in libmorfeusz_analyse(text): - if edge.i == -1: - break - for tag in expand_tags(edge.tags, expand_dot=expand_dot, expand_underscore=expand_underscore): - dag[edge.i] += [((edge.orth, edge.base, tag), edge.j)] - if keep_whitespace: - libmorfeusz.morfeusz_set_option(MORFOPT_WHITESPACE, MORFEUSZ_SKIP_WHITESPACE) - def expand_dag(i): - nexts = dag[i] - if not nexts: - yield [] - else: - for head, j in nexts: - for tail in expand_dag(j): - yield [head] + tail - return list(expand_dag(0)) - -def about(): - ''' - Return a string containing information on authors and version of the - underlying library. - ''' - about = libmorfeusz_about() - try: - return about.decode('UTF-8') - except UnicodeError: - return about.decode('ISO-8859-2') - -# vim:ts=4 sw=4 et diff --git a/common/morfeusz2.py b/common/morfeusz2.py new file mode 100644 index 0000000..ec7da29 --- /dev/null +++ b/common/morfeusz2.py @@ -0,0 +1,574 @@ +# This file was automatically generated by SWIG (http://www.swig.org). +# Version 2.0.4 +# +# Do not make changes to this file unless you know what you are doing--modify +# the SWIG interface file instead. + + + +from sys import version_info +if version_info >= (2,6,0): + def swig_import_helper(): + from os.path import dirname + import imp + fp = None + try: + fp, pathname, description = imp.find_module('_morfeusz2', [dirname(__file__)]) + except ImportError: + import _morfeusz2 + return _morfeusz2 + if fp is not None: + try: + _mod = imp.load_module('_morfeusz2', fp, pathname, description) + finally: + fp.close() + return _mod + _morfeusz2 = swig_import_helper() + del swig_import_helper +else: + import _morfeusz2 +del version_info +try: + _swig_property = property +except NameError: + pass # Python < 2.2 doesn't have 'property'. +def _swig_setattr_nondynamic(self,class_type,name,value,static=1): + if (name == "thisown"): return self.this.own(value) + if (name == "this"): + if type(value).__name__ == 'SwigPyObject': + self.__dict__[name] = value + return + method = class_type.__swig_setmethods__.get(name,None) + if method: return method(self,value) + if (not static): + self.__dict__[name] = value + else: + raise AttributeError("You cannot add attributes to %s" % self) + +def _swig_setattr(self,class_type,name,value): + return _swig_setattr_nondynamic(self,class_type,name,value,0) + +def _swig_getattr(self,class_type,name): + if (name == "thisown"): return self.this.own() + method = class_type.__swig_getmethods__.get(name,None) + if method: return method(self) + raise AttributeError(name) + +def _swig_repr(self): + try: strthis = "proxy of " + self.this.__repr__() + except: strthis = "" + return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,) + +try: + _object = object + _newclass = 1 +except AttributeError: + class _object : pass + _newclass = 0 + + +class SwigPyIterator(_object): + __swig_setmethods__ = {} + __setattr__ = lambda self, name, value: _swig_setattr(self, SwigPyIterator, name, value) + __swig_getmethods__ = {} + __getattr__ = lambda self, name: _swig_getattr(self, SwigPyIterator, name) + def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract") + __repr__ = _swig_repr + __swig_destroy__ = _morfeusz2.delete_SwigPyIterator + __del__ = lambda self : None; + def value(self): return _morfeusz2.SwigPyIterator_value(self) + def incr(self, n = 1): return _morfeusz2.SwigPyIterator_incr(self, n) + def decr(self, n = 1): return _morfeusz2.SwigPyIterator_decr(self, n) + def distance(self, *args): return _morfeusz2.SwigPyIterator_distance(self, *args) + def equal(self, *args): return _morfeusz2.SwigPyIterator_equal(self, *args) + def copy(self): return _morfeusz2.SwigPyIterator_copy(self) + def next(self): return _morfeusz2.SwigPyIterator_next(self) + def __next__(self): return _morfeusz2.SwigPyIterator___next__(self) + def previous(self): return _morfeusz2.SwigPyIterator_previous(self) + def advance(self, *args): return _morfeusz2.SwigPyIterator_advance(self, *args) + def __eq__(self, *args): return _morfeusz2.SwigPyIterator___eq__(self, *args) + def __ne__(self, *args): return _morfeusz2.SwigPyIterator___ne__(self, *args) + def __iadd__(self, *args): return _morfeusz2.SwigPyIterator___iadd__(self, *args) + def __isub__(self, *args): return _morfeusz2.SwigPyIterator___isub__(self, *args) + def __add__(self, *args): return _morfeusz2.SwigPyIterator___add__(self, *args) + def __sub__(self, *args): return _morfeusz2.SwigPyIterator___sub__(self, *args) + def __iter__(self): return self +SwigPyIterator_swigregister = _morfeusz2.SwigPyIterator_swigregister +SwigPyIterator_swigregister(SwigPyIterator) + +class InterpsList(_object): + __swig_setmethods__ = {} + __setattr__ = lambda self, name, value: _swig_setattr(self, InterpsList, name, value) + __swig_getmethods__ = {} + __getattr__ = lambda self, name: _swig_getattr(self, InterpsList, name) + __repr__ = _swig_repr + def iterator(self): return _morfeusz2.InterpsList_iterator(self) + def __iter__(self): return self.iterator() + def __nonzero__(self): return _morfeusz2.InterpsList___nonzero__(self) + def __bool__(self): return _morfeusz2.InterpsList___bool__(self) + def __len__(self): return _morfeusz2.InterpsList___len__(self) + def pop(self): return _morfeusz2.InterpsList_pop(self) + def __getslice__(self, *args): return _morfeusz2.InterpsList___getslice__(self, *args) + def __setslice__(self, *args): return _morfeusz2.InterpsList___setslice__(self, *args) + def __delslice__(self, *args): return _morfeusz2.InterpsList___delslice__(self, *args) + def __delitem__(self, *args): return _morfeusz2.InterpsList___delitem__(self, *args) + def __getitem__(self, *args): return _morfeusz2.InterpsList___getitem__(self, *args) + def __setitem__(self, *args): return _morfeusz2.InterpsList___setitem__(self, *args) + def append(self, *args): return _morfeusz2.InterpsList_append(self, *args) + def empty(self): return _morfeusz2.InterpsList_empty(self) + def size(self): return _morfeusz2.InterpsList_size(self) + def clear(self): return _morfeusz2.InterpsList_clear(self) + def swap(self, *args): return _morfeusz2.InterpsList_swap(self, *args) + def get_allocator(self): return _morfeusz2.InterpsList_get_allocator(self) + def begin(self): return _morfeusz2.InterpsList_begin(self) + def end(self): return _morfeusz2.InterpsList_end(self) + def rbegin(self): return _morfeusz2.InterpsList_rbegin(self) + def rend(self): return _morfeusz2.InterpsList_rend(self) + def pop_back(self): return _morfeusz2.InterpsList_pop_back(self) + def erase(self, *args): return _morfeusz2.InterpsList_erase(self, *args) + def __init__(self, *args): + this = _morfeusz2.new_InterpsList(*args) + try: self.this.append(this) + except: self.this = this + def push_back(self, *args): return _morfeusz2.InterpsList_push_back(self, *args) + def front(self): return _morfeusz2.InterpsList_front(self) + def back(self): return _morfeusz2.InterpsList_back(self) + def assign(self, *args): return _morfeusz2.InterpsList_assign(self, *args) + def resize(self, *args): return _morfeusz2.InterpsList_resize(self, *args) + def insert(self, *args): return _morfeusz2.InterpsList_insert(self, *args) + def reserve(self, *args): return _morfeusz2.InterpsList_reserve(self, *args) + def capacity(self): return _morfeusz2.InterpsList_capacity(self) + __swig_destroy__ = _morfeusz2.delete_InterpsList + __del__ = lambda self : None; +InterpsList_swigregister = _morfeusz2.InterpsList_swigregister +InterpsList_swigregister(InterpsList) + +class StringsList(_object): + __swig_setmethods__ = {} + __setattr__ = lambda self, name, value: _swig_setattr(self, StringsList, name, value) + __swig_getmethods__ = {} + __getattr__ = lambda self, name: _swig_getattr(self, StringsList, name) + __repr__ = _swig_repr + def iterator(self): return _morfeusz2.StringsList_iterator(self) + def __iter__(self): return self.iterator() + def __nonzero__(self): return _morfeusz2.StringsList___nonzero__(self) + def __bool__(self): return _morfeusz2.StringsList___bool__(self) + def __len__(self): return _morfeusz2.StringsList___len__(self) + def pop(self): return _morfeusz2.StringsList_pop(self) + def __getslice__(self, *args): return _morfeusz2.StringsList___getslice__(self, *args) + def __setslice__(self, *args): return _morfeusz2.StringsList___setslice__(self, *args) + def __delslice__(self, *args): return _morfeusz2.StringsList___delslice__(self, *args) + def __delitem__(self, *args): return _morfeusz2.StringsList___delitem__(self, *args) + def __getitem__(self, *args): return _morfeusz2.StringsList___getitem__(self, *args) + def __setitem__(self, *args): return _morfeusz2.StringsList___setitem__(self, *args) + def append(self, *args): return _morfeusz2.StringsList_append(self, *args) + def empty(self): return _morfeusz2.StringsList_empty(self) + def size(self): return _morfeusz2.StringsList_size(self) + def clear(self): return _morfeusz2.StringsList_clear(self) + def swap(self, *args): return _morfeusz2.StringsList_swap(self, *args) + def get_allocator(self): return _morfeusz2.StringsList_get_allocator(self) + def begin(self): return _morfeusz2.StringsList_begin(self) + def end(self): return _morfeusz2.StringsList_end(self) + def rbegin(self): return _morfeusz2.StringsList_rbegin(self) + def rend(self): return _morfeusz2.StringsList_rend(self) + def pop_back(self): return _morfeusz2.StringsList_pop_back(self) + def erase(self, *args): return _morfeusz2.StringsList_erase(self, *args) + def __init__(self, *args): + this = _morfeusz2.new_StringsList(*args) + try: self.this.append(this) + except: self.this = this + def push_back(self, *args): return _morfeusz2.StringsList_push_back(self, *args) + def front(self): return _morfeusz2.StringsList_front(self) + def back(self): return _morfeusz2.StringsList_back(self) + def assign(self, *args): return _morfeusz2.StringsList_assign(self, *args) + def resize(self, *args): return _morfeusz2.StringsList_resize(self, *args) + def insert(self, *args): return _morfeusz2.StringsList_insert(self, *args) + def reserve(self, *args): return _morfeusz2.StringsList_reserve(self, *args) + def capacity(self): return _morfeusz2.StringsList_capacity(self) + __swig_destroy__ = _morfeusz2.delete_StringsList + __del__ = lambda self : None; +StringsList_swigregister = _morfeusz2.StringsList_swigregister +StringsList_swigregister(StringsList) + +class StringsLinkedList(_object): + __swig_setmethods__ = {} + __setattr__ = lambda self, name, value: _swig_setattr(self, StringsLinkedList, name, value) + __swig_getmethods__ = {} + __getattr__ = lambda self, name: _swig_getattr(self, StringsLinkedList, name) + __repr__ = _swig_repr + def iterator(self): return _morfeusz2.StringsLinkedList_iterator(self) + def __iter__(self): return self.iterator() + def __nonzero__(self): return _morfeusz2.StringsLinkedList___nonzero__(self) + def __bool__(self): return _morfeusz2.StringsLinkedList___bool__(self) + def __len__(self): return _morfeusz2.StringsLinkedList___len__(self) + def pop(self): return _morfeusz2.StringsLinkedList_pop(self) + def __getslice__(self, *args): return _morfeusz2.StringsLinkedList___getslice__(self, *args) + def __setslice__(self, *args): return _morfeusz2.StringsLinkedList___setslice__(self, *args) + def __delslice__(self, *args): return _morfeusz2.StringsLinkedList___delslice__(self, *args) + def __delitem__(self, *args): return _morfeusz2.StringsLinkedList___delitem__(self, *args) + def __getitem__(self, *args): return _morfeusz2.StringsLinkedList___getitem__(self, *args) + def __setitem__(self, *args): return _morfeusz2.StringsLinkedList___setitem__(self, *args) + def append(self, *args): return _morfeusz2.StringsLinkedList_append(self, *args) + def empty(self): return _morfeusz2.StringsLinkedList_empty(self) + def size(self): return _morfeusz2.StringsLinkedList_size(self) + def clear(self): return _morfeusz2.StringsLinkedList_clear(self) + def swap(self, *args): return _morfeusz2.StringsLinkedList_swap(self, *args) + def get_allocator(self): return _morfeusz2.StringsLinkedList_get_allocator(self) + def begin(self): return _morfeusz2.StringsLinkedList_begin(self) + def end(self): return _morfeusz2.StringsLinkedList_end(self) + def rbegin(self): return _morfeusz2.StringsLinkedList_rbegin(self) + def rend(self): return _morfeusz2.StringsLinkedList_rend(self) + def pop_back(self): return _morfeusz2.StringsLinkedList_pop_back(self) + def erase(self, *args): return _morfeusz2.StringsLinkedList_erase(self, *args) + def __init__(self, *args): + this = _morfeusz2.new_StringsLinkedList(*args) + try: self.this.append(this) + except: self.this = this + def push_back(self, *args): return _morfeusz2.StringsLinkedList_push_back(self, *args) + def front(self): return _morfeusz2.StringsLinkedList_front(self) + def back(self): return _morfeusz2.StringsLinkedList_back(self) + def assign(self, *args): return _morfeusz2.StringsLinkedList_assign(self, *args) + def resize(self, *args): return _morfeusz2.StringsLinkedList_resize(self, *args) + def insert(self, *args): return _morfeusz2.StringsLinkedList_insert(self, *args) + def pop_front(self): return _morfeusz2.StringsLinkedList_pop_front(self) + def push_front(self, *args): return _morfeusz2.StringsLinkedList_push_front(self, *args) + def reverse(self): return _morfeusz2.StringsLinkedList_reverse(self) + __swig_destroy__ = _morfeusz2.delete_StringsLinkedList + __del__ = lambda self : None; +StringsLinkedList_swigregister = _morfeusz2.StringsLinkedList_swigregister +StringsLinkedList_swigregister(StringsLinkedList) + +class StringsSet(_object): + __swig_setmethods__ = {} + __setattr__ = lambda self, name, value: _swig_setattr(self, StringsSet, name, value) + __swig_getmethods__ = {} + __getattr__ = lambda self, name: _swig_getattr(self, StringsSet, name) + __repr__ = _swig_repr + def iterator(self): return _morfeusz2.StringsSet_iterator(self) + def __iter__(self): return self.iterator() + def __nonzero__(self): return _morfeusz2.StringsSet___nonzero__(self) + def __bool__(self): return _morfeusz2.StringsSet___bool__(self) + def __len__(self): return _morfeusz2.StringsSet___len__(self) + def append(self, *args): return _morfeusz2.StringsSet_append(self, *args) + def __contains__(self, *args): return _morfeusz2.StringsSet___contains__(self, *args) + def __getitem__(self, *args): return _morfeusz2.StringsSet___getitem__(self, *args) + def __init__(self, *args): + this = _morfeusz2.new_StringsSet(*args) + try: self.this.append(this) + except: self.this = this + def empty(self): return _morfeusz2.StringsSet_empty(self) + def size(self): return _morfeusz2.StringsSet_size(self) + def clear(self): return _morfeusz2.StringsSet_clear(self) + def swap(self, *args): return _morfeusz2.StringsSet_swap(self, *args) + def count(self, *args): return _morfeusz2.StringsSet_count(self, *args) + def begin(self): return _morfeusz2.StringsSet_begin(self) + def end(self): return _morfeusz2.StringsSet_end(self) + def rbegin(self): return _morfeusz2.StringsSet_rbegin(self) + def rend(self): return _morfeusz2.StringsSet_rend(self) + def erase(self, *args): return _morfeusz2.StringsSet_erase(self, *args) + def find(self, *args): return _morfeusz2.StringsSet_find(self, *args) + def lower_bound(self, *args): return _morfeusz2.StringsSet_lower_bound(self, *args) + def upper_bound(self, *args): return _morfeusz2.StringsSet_upper_bound(self, *args) + def equal_range(self, *args): return _morfeusz2.StringsSet_equal_range(self, *args) + def insert(self, *args): return _morfeusz2.StringsSet_insert(self, *args) + __swig_destroy__ = _morfeusz2.delete_StringsSet + __del__ = lambda self : None; +StringsSet_swigregister = _morfeusz2.StringsSet_swigregister +StringsSet_swigregister(StringsSet) + +SEPARATE_NUMBERING = _morfeusz2.SEPARATE_NUMBERING +CONTINUOUS_NUMBERING = _morfeusz2.CONTINUOUS_NUMBERING +CONDITIONALLY_CASE_SENSITIVE = _morfeusz2.CONDITIONALLY_CASE_SENSITIVE +STRICTLY_CASE_SENSITIVE = _morfeusz2.STRICTLY_CASE_SENSITIVE +IGNORE_CASE = _morfeusz2.IGNORE_CASE +SKIP_WHITESPACES = _morfeusz2.SKIP_WHITESPACES +APPEND_WHITESPACES = _morfeusz2.APPEND_WHITESPACES +KEEP_WHITESPACES = _morfeusz2.KEEP_WHITESPACES +ANALYSE_ONLY = _morfeusz2.ANALYSE_ONLY +GENERATE_ONLY = _morfeusz2.GENERATE_ONLY +BOTH_ANALYSE_AND_GENERATE = _morfeusz2.BOTH_ANALYSE_AND_GENERATE +class Morfeusz(_object): + __swig_setmethods__ = {} + __setattr__ = lambda self, name, value: _swig_setattr(self, Morfeusz, name, value) + __swig_getmethods__ = {} + __getattr__ = lambda self, name: _swig_getattr(self, Morfeusz, name) + def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract") + __repr__ = _swig_repr + __swig_getmethods__["getVersion"] = lambda x: _morfeusz2.Morfeusz_getVersion + if _newclass:getVersion = staticmethod(_morfeusz2.Morfeusz_getVersion) + __swig_getmethods__["getDefaultDictName"] = lambda x: _morfeusz2.Morfeusz_getDefaultDictName + if _newclass:getDefaultDictName = staticmethod(_morfeusz2.Morfeusz_getDefaultDictName) + __swig_getmethods__["getCopyright"] = lambda x: _morfeusz2.Morfeusz_getCopyright + if _newclass:getCopyright = staticmethod(_morfeusz2.Morfeusz_getCopyright) + __swig_getmethods__["createInstance"] = lambda x: _morfeusz2.Morfeusz_createInstance + if _newclass:createInstance = staticmethod(_morfeusz2.Morfeusz_createInstance) + def getDictID(self): return _morfeusz2.Morfeusz_getDictID(self) + def getDictCopyright(self): return _morfeusz2.Morfeusz_getDictCopyright(self) + def clone(self): return _morfeusz2.Morfeusz_clone(self) + __swig_destroy__ = _morfeusz2.delete_Morfeusz + __del__ = lambda self : None; + def analyse(self, text): + """ + Analyse given text and return a list of MorphInterpretation objects. + """ + res = InterpsList() + _morfeusz2.Morfeusz_analyse(self, text, res) + return res + + + def generate(self, lemma, tagId=None): + """ + Perform morphological synthesis on given text and return a list of MorphInterpretation objects. + """ + if tagId is not None: + return self._generateByTagId(lemma, tagId) + else: + res = InterpsList() + _morfeusz2.Morfeusz_generate(self, lemma, res) + return res + + + def setAggl(self, optionString): + """ + Select agglutination rules option + """ + _morfeusz2.Morfeusz_setAggl(self, optionString.encode('utf8')) + + + def getAggl(self): return _morfeusz2.Morfeusz_getAggl(self) + def setPraet(self, optionString): + """ + Select past tense segmentation + """ + _morfeusz2.Morfeusz_setPraet(self, optionString.encode('utf8')) + + + def getPraet(self): return _morfeusz2.Morfeusz_getPraet(self) + def setCaseHandling(self, option): + """ + Set case handling option (valid options are CONDITIONALLY_CASE_SENSITIVE, STRICTLY_CASE_SENSITIVE, IGNORE_CASE) + """ + _morfeusz2.Morfeusz_setCaseHandling(self, option) + + + def getCaseHandling(self): return _morfeusz2.Morfeusz_getCaseHandling(self) + def setTokenNumbering(self, option): + """ + Set token numbering option (valid options are SEPARATE_NUMBERING, CONTINUOUS_NUMBERING) + """ + _morfeusz2.Morfeusz_setTokenNumbering(self, option) + + + def getTokenNumbering(self): return _morfeusz2.Morfeusz_getTokenNumbering(self) + def setWhitespaceHandling(self, option): + """ + Set whitespace handling handling option (valid options are SKIP_WHITESPACES, KEEP_WHITESPACES, APPEND_WHITESPACES) + """ + _morfeusz2.Morfeusz_setWhitespaceHandling(self, option) + + + def getWhitespaceHandling(self): return _morfeusz2.Morfeusz_getWhitespaceHandling(self) + def getIdResolver(self): return _morfeusz2.Morfeusz_getIdResolver(self) + def setDictionary(self, dictName): + """ + Set dictionary to be used by this instance (by name) + """ + _morfeusz2.Morfeusz_setDictionary(self, dictName.encode('utf8')) + + + __swig_setmethods__["dictionarySearchPaths"] = _morfeusz2.Morfeusz_dictionarySearchPaths_set + __swig_getmethods__["dictionarySearchPaths"] = _morfeusz2.Morfeusz_dictionarySearchPaths_get + if _newclass:dictionarySearchPaths = _swig_property(_morfeusz2.Morfeusz_dictionarySearchPaths_get, _morfeusz2.Morfeusz_dictionarySearchPaths_set) + def getAvailableAgglOptions(self): return _morfeusz2.Morfeusz_getAvailableAgglOptions(self) + def getAvailablePraetOptions(self): return _morfeusz2.Morfeusz_getAvailablePraetOptions(self) + def _generateByTagId(self, *args): return _morfeusz2.Morfeusz__generateByTagId(self, *args) + def analyse_iter(self, text): + """ + Analyse given text and return an iterator over MorphInterpretation objects as a result. + """ + return _morfeusz2.Morfeusz__analyseAsIterator(self, text) + + +Morfeusz_swigregister = _morfeusz2.Morfeusz_swigregister +Morfeusz_swigregister(Morfeusz) + +def Morfeusz_getVersion(): + return _morfeusz2.Morfeusz_getVersion() +Morfeusz_getVersion = _morfeusz2.Morfeusz_getVersion + +def Morfeusz_getDefaultDictName(): + return _morfeusz2.Morfeusz_getDefaultDictName() +Morfeusz_getDefaultDictName = _morfeusz2.Morfeusz_getDefaultDictName + +def Morfeusz_getCopyright(): + return _morfeusz2.Morfeusz_getCopyright() +Morfeusz_getCopyright = _morfeusz2.Morfeusz_getCopyright + +def Morfeusz_createInstance(*args): + return _morfeusz2.Morfeusz_createInstance(*args) +Morfeusz_createInstance = _morfeusz2.Morfeusz_createInstance +cvar = _morfeusz2.cvar + +class ResultsIterator(_object): + __swig_setmethods__ = {} + __setattr__ = lambda self, name, value: _swig_setattr(self, ResultsIterator, name, value) + __swig_getmethods__ = {} + __getattr__ = lambda self, name: _swig_getattr(self, ResultsIterator, name) + def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract") + __repr__ = _swig_repr + def hasNext(self): return _morfeusz2.ResultsIterator_hasNext(self) + def peek(self): return _morfeusz2.ResultsIterator_peek(self) + def next(self): + if self.hasNext(): + return _morfeusz2.ResultsIterator_next(self) + else: + raise StopIteration + + + __swig_destroy__ = _morfeusz2.delete_ResultsIterator + __del__ = lambda self : None; + def __iter__(self): return _morfeusz2.ResultsIterator___iter__(self) +ResultsIterator_swigregister = _morfeusz2.ResultsIterator_swigregister +ResultsIterator_swigregister(ResultsIterator) + +class IdResolver(_object): + __swig_setmethods__ = {} + __setattr__ = lambda self, name, value: _swig_setattr(self, IdResolver, name, value) + __swig_getmethods__ = {} + __getattr__ = lambda self, name: _swig_getattr(self, IdResolver, name) + def __init__(self, *args, **kwargs): raise AttributeError("No constructor defined - class is abstract") + __repr__ = _swig_repr + def getTagsetId(self): return _morfeusz2.IdResolver_getTagsetId(self) + def getTag(self, tagId): + return _morfeusz2.IdResolver_getTag(self, tagId).decode('utf8') + + + def getTagId(self, tag): + return _morfeusz2.IdResolver_getTagId(self, tag.encode('utf8')) + + + def getName(self, nameId): + return _morfeusz2.IdResolver_getName(self, nameId).decode('utf8') + + + def getNameId(self, name): + return _morfeusz2.IdResolver_getNameId(self, name.encode('utf8')) + + + def getLabelsAsUnicode(self, labelsId): + return _morfeusz2.IdResolver_getLabelsAsString(self, labelsId).decode('utf8') + + + def getLabels(self, labelsId): + return { l.decode('utf8') for l in _morfeusz2.IdResolver_getLabels(self, labelsId) } + + + def getLabelsId(self, labelsStr): + return _morfeusz2.IdResolver_getLabelsId(self, labelsStr.encode('utf8')) + + + def getTagsCount(self): return _morfeusz2.IdResolver_getTagsCount(self) + def getNamesCount(self): return _morfeusz2.IdResolver_getNamesCount(self) + def getLabelsCount(self): return _morfeusz2.IdResolver_getLabelsCount(self) + __swig_destroy__ = _morfeusz2.delete_IdResolver + __del__ = lambda self : None; +IdResolver_swigregister = _morfeusz2.IdResolver_swigregister +IdResolver_swigregister(IdResolver) + +class MorphInterpretation(_object): + __swig_setmethods__ = {} + __setattr__ = lambda self, name, value: _swig_setattr(self, MorphInterpretation, name, value) + __swig_getmethods__ = {} + __getattr__ = lambda self, name: _swig_getattr(self, MorphInterpretation, name) + __repr__ = _swig_repr + def __init__(self): + this = _morfeusz2.new_MorphInterpretation() + try: self.this.append(this) + except: self.this = this + __swig_getmethods__["createIgn"] = lambda x: _morfeusz2.MorphInterpretation_createIgn + if _newclass:createIgn = staticmethod(_morfeusz2.MorphInterpretation_createIgn) + __swig_getmethods__["createWhitespace"] = lambda x: _morfeusz2.MorphInterpretation_createWhitespace + if _newclass:createWhitespace = staticmethod(_morfeusz2.MorphInterpretation_createWhitespace) + def isIgn(self): return _morfeusz2.MorphInterpretation_isIgn(self) + def isWhitespace(self): return _morfeusz2.MorphInterpretation_isWhitespace(self) + def getTag(self, morfeusz): + """ + Returns tag as string. + """ + return _morfeusz2.MorphInterpretation_getTag(self, morfeusz) + + + def getName(self, morfeusz): + """ + Returns this interpretation named entity as string + """ + return _morfeusz2.MorphInterpretation_getName(self, morfeusz) + + + def getLabelsAsUnicode(self, morfeusz): + """ + Returns this interpretation labels as string + """ + return _morfeusz2.MorphInterpretation_getLabelsAsString(self, morfeusz).decode('utf8') + + + def getLabels(self, morfeusz): + """ + Returns this interpretation labels as a set of strings + """ + return { l.decode('utf8') for l in _morfeusz2.MorphInterpretation_getLabels(self, morfeusz) } + + + __swig_setmethods__["startNode"] = _morfeusz2.MorphInterpretation_startNode_set + __swig_getmethods__["startNode"] = _morfeusz2.MorphInterpretation_startNode_get + if _newclass:startNode = _swig_property(_morfeusz2.MorphInterpretation_startNode_get, _morfeusz2.MorphInterpretation_startNode_set) + __swig_setmethods__["endNode"] = _morfeusz2.MorphInterpretation_endNode_set + __swig_getmethods__["endNode"] = _morfeusz2.MorphInterpretation_endNode_get + if _newclass:endNode = _swig_property(_morfeusz2.MorphInterpretation_endNode_get, _morfeusz2.MorphInterpretation_endNode_set) + __swig_setmethods__["_orth"] = _morfeusz2.MorphInterpretation__orth_set + __swig_getmethods__["_orth"] = _morfeusz2.MorphInterpretation__orth_get + if _newclass:_orth = _swig_property(_morfeusz2.MorphInterpretation__orth_get, _morfeusz2.MorphInterpretation__orth_set) + __swig_setmethods__["_lemma"] = _morfeusz2.MorphInterpretation__lemma_set + __swig_getmethods__["_lemma"] = _morfeusz2.MorphInterpretation__lemma_get + if _newclass:_lemma = _swig_property(_morfeusz2.MorphInterpretation__lemma_get, _morfeusz2.MorphInterpretation__lemma_set) + __swig_setmethods__["tagId"] = _morfeusz2.MorphInterpretation_tagId_set + __swig_getmethods__["tagId"] = _morfeusz2.MorphInterpretation_tagId_get + if _newclass:tagId = _swig_property(_morfeusz2.MorphInterpretation_tagId_get, _morfeusz2.MorphInterpretation_tagId_set) + __swig_setmethods__["nameId"] = _morfeusz2.MorphInterpretation_nameId_set + __swig_getmethods__["nameId"] = _morfeusz2.MorphInterpretation_nameId_get + if _newclass:nameId = _swig_property(_morfeusz2.MorphInterpretation_nameId_get, _morfeusz2.MorphInterpretation_nameId_set) + __swig_setmethods__["labelsId"] = _morfeusz2.MorphInterpretation_labelsId_set + __swig_getmethods__["labelsId"] = _morfeusz2.MorphInterpretation_labelsId_get + if _newclass:labelsId = _swig_property(_morfeusz2.MorphInterpretation_labelsId_get, _morfeusz2.MorphInterpretation_labelsId_set) + @property + def orth(self): + return self._orth.decode('utf8') + + @orth.setter + def orth(self, val): + self._orth = val.encode('utf8') + + @property + def lemma(self): + return self._lemma.decode('utf8') + + @lemma.setter + def lemma(self, val): + self._lemma = val.encode('utf8') + + __swig_destroy__ = _morfeusz2.delete_MorphInterpretation + __del__ = lambda self : None; +MorphInterpretation_swigregister = _morfeusz2.MorphInterpretation_swigregister +MorphInterpretation_swigregister(MorphInterpretation) + +def MorphInterpretation_createIgn(*args): + return _morfeusz2.MorphInterpretation_createIgn(*args) +MorphInterpretation_createIgn = _morfeusz2.MorphInterpretation_createIgn + +def MorphInterpretation_createWhitespace(*args): + return _morfeusz2.MorphInterpretation_createWhitespace(*args) +MorphInterpretation_createWhitespace = _morfeusz2.MorphInterpretation_createWhitespace + +# This file is compatible with both classic and new-style classes. + + diff --git a/dictionary/ajax_argument_form.py b/dictionary/ajax_argument_form.py index ca8812c..9f75bd6 100644 --- a/dictionary/ajax_argument_form.py +++ b/dictionary/ajax_argument_form.py @@ -27,7 +27,6 @@ from django.db.models import Count, Q, Sum from accounts.models import can_modify_phraseology_only from common.decorators import render, ajax, AjaxError from common.js_to_obj import jsArgToObj -from common.morfeusz import analyse from dictionary.forms import AddArgumentForm, ArgPropositionsForm, Atribute_Model, \ AtributeChoiceForm, AtributeTextForm, ValueAttrMultiValueForm, \ TextAttrMultiValueForm, PositionsForm, SelectArgumentForm, \ @@ -38,6 +37,7 @@ from dictionary.models import Argument, Argument_Model, Atribute, Atribute_Value sortatributes, sortPositions, sortArguments, get_or_create_attr_parameter, \ get_attr_models_to_exclude, is_morfeusz_exception, get_or_create_attribute, \ get_or_create_parameter_attr_value +from settings import MORFEUSZ2 @render('argument_form.html') @ajax(method='post', encode_result=False) @@ -317,7 +317,6 @@ def create_type_form(pos, arg_model, phraseologic_modification=False, def create_attributes_forms(pos, arg_model, subforms_values, form_type='standard'): sheets = [] attribute_models = get_attribute_models(arg_model, subforms_values) - #attr_values_to_exclude = get_attr_values_to_exclude(arg_model) param_models_to_exclude = get_parameter_types_to_exclude(arg_model) for i in range(len(attribute_models)): attr_form_values = [] @@ -488,8 +487,8 @@ def get_parameter_types_to_exclude(arg_model): def is_correct_lemma(argument_model, attribute_model, lemma): correct_form = False possible_pos_tags = argument_model.get_possible_lemma_tags(attribute_model) - for interp in analyse(lemma): - if (is_single_word(interp) and base_form_correct(interp, lemma) and + for interp in MORFEUSZ2.analyse(lemma.encode('utf8')): + if (base_form_correct(interp, lemma) and pos_tag_correct(interp, possible_pos_tags)): correct_form = True break @@ -501,18 +500,13 @@ def is_correct_lemma(argument_model, attribute_model, lemma): break return correct_form -def is_single_word(interp): - if len(interp) == 1: - return True - return False - def base_form_correct(interp, lemma): - if interp[0][1] == lemma: + if interp.lemma == lemma: return True return False def pos_tag_correct(interp, possible_pos_tags): - tagstr = interp[0][2] + tagstr = interp.getTag(MORFEUSZ2) pos_tag = tagstr.split(':')[0] if possible_pos_tags.filter(name=pos_tag).exists(): return True @@ -520,16 +514,18 @@ def pos_tag_correct(interp, possible_pos_tags): def contains_separator(lemma): contains_separator = False - for interp in analyse(lemma): - if len(interp) > 1: + results_iter = MORFEUSZ2.analyse_iter(lemma.encode('utf8')) + while results_iter.hasNext(): + if results_iter.peek().orth != lemma: contains_separator = True break + results_iter.next() return contains_separator def is_preposition_case_pair_valid(preposition_obj, case_obj): # postp is used by prepadjp case_str = unicode(case_obj) - prep_str = unicode(preposition_obj) + prep_str = unicode(preposition_obj).split()[-1] if case_str != 'postp': # str is used by prepadjp if case_str == 'str': @@ -537,12 +533,14 @@ def is_preposition_case_pair_valid(preposition_obj, case_obj): else: pcase = [case_str] for case in pcase: - for interp in analyse(prep_str): - for token in interp: - tag = token[2].split(':') - if tag[0] == 'prep': - if tag[1] == case: - return True + for interp in MORFEUSZ2.analyse(prep_str.encode('utf8')): + tagstr = interp.getTag(MORFEUSZ2) + tag_parts = tagstr.split(':') + if len(tag_parts) > 1: + interp_pos = tag_parts[0] + interp_case = tag_parts[1] + if interp_pos == 'prep' and interp_case == case: + return True return False return True diff --git a/dictionary/ajax_lemma_view.py b/dictionary/ajax_lemma_view.py index 62edd3c..b5515ad 100644 --- a/dictionary/ajax_lemma_view.py +++ b/dictionary/ajax_lemma_view.py @@ -85,7 +85,6 @@ from accounts.models import UserSettings, RealizedLemma, UserStats, RealizedPhra can_modify_phraseology_only, get_anon_profile from ajax_jqgrid import JqGridAjax, default_sort_rules, default_filter_rules -from common.morfeusz import analyse import locale from functools import cmp_to_key diff --git a/dictionary/validation.py b/dictionary/validation.py index f8b0e04..5b0b176 100644 --- a/dictionary/validation.py +++ b/dictionary/validation.py @@ -27,13 +27,13 @@ import operator from django.db.models import Sum, Q from common.js_to_obj import frameObjToSerializableDict -from common.morfeusz import analyse from dictionary.common_func import subframe_exists from dictionary.convert_frames import frame_conversion from dictionary.forms import FrameAspectForm, FrameOpinionForm from dictionary.models import Argument_Model, AspectRelationsGroup, Frame_Char_Model, \ Frame_Characteristic, Lemma, Position, PositionCategory, \ get_attribute_models, sortArguments, sortPositions +from settings import MORFEUSZ2 def get_wrong_aspect_frames(lemma, frames): wrong_aspect_frames = [] @@ -47,18 +47,18 @@ def check_aspect(lemma, frame): inf_present = False frame_aspect_obj = frame.characteristics.get(type=u'ASPEKT') frame_aspect = frame_aspect_obj.value.value - interps = analyse(lemma.entry) - if interps[0][0][1] and frame_aspect != '_': + interps = MORFEUSZ2.analyse(lemma.entry.encode('utf8')) + if frame_aspect != '_': for interp in interps: - for token in interp: - tag = token[2].split(':') - if tag[0] == 'inf': - inf_present = True - if tag[1] == frame_aspect: - good_aspect = True - break - if good_aspect: - break + tagstr = interp.getTag(MORFEUSZ2) + tag_parts = tagstr.split(':') + pos = tag_parts[0] + if pos == 'inf': + aspect = tag_parts[1] + inf_present = True + if aspect == frame_aspect: + good_aspect = True + break if good_aspect or not inf_present: good_aspect = True return good_aspect @@ -84,13 +84,13 @@ def get_missing_aspects_msg(lemma): def get_possible_aspects(lemma): possible_aspects = [] - interps = analyse(lemma.entry) - if interps[0][0][1]: - for interp in interps: - for token in interp: - tag = token[2].split(':') - if tag[0] == 'inf' and tag[1] not in possible_aspects: - possible_aspects.append(tag[1]) + interps = MORFEUSZ2.analyse(lemma.entry.encode('utf8')) + for interp in interps: + tagstr = interp.getTag(MORFEUSZ2) + tag_parts = tagstr.split(':') + pos = tag_parts[0] + if pos == 'inf' and tag_parts[1] not in possible_aspects: + possible_aspects.append(tag_parts[1]) return possible_aspects def match_arg_poss(arg_poss, frame): @@ -355,7 +355,7 @@ def prep_check(arg): #if arg.find('prep') != -1 and not arg.startswith('comprepnp('): if arg.startswith('prep'): params = arg[arg.find('(')+1:arg.find(')')].split(',') - pform = params[0] + pform = params[0].split()[-1] pcase = params[1] # postp is used by prepadjp if pcase != 'postp': @@ -365,12 +365,12 @@ def prep_check(arg): else: pcase = [pcase] for case in pcase: - for interp in analyse(pform): - for token in interp: - tag = token[2].split(':') - if tag[0] == 'prep': - if tag[1] == case: - return True + for interp in MORFEUSZ2.analyse(pform.encode('utf8')): + tagstr = interp.getTag(MORFEUSZ2) + tag_parts = tagstr.split(':') + pos = tag_parts[0] + if pos == 'prep' and tag_parts[1] == case: + return True return False return True diff --git a/manage.py b/manage.py index 02cb009..8560487 100644 --- a/manage.py +++ b/manage.py @@ -18,4 +18,4 @@ if __name__ == "__main__": # # from django.core.management import execute_from_command_line # -# execute_from_command_line(sys.argv) +# execute_from_command_line(sys.argv) \ No newline at end of file