Commit 4cf58ae124fe2e9801ce0193c285627b1dc98ee2

Authored by Michał Lenart
1 parent 6409a580

- nowa wersja API przechodząca testy

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/trunk@245 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing 43 changed files with 1628 additions and 858 deletions
CMakeLists.txt
... ... @@ -66,7 +66,7 @@ endif ()
66 66  
67 67 # INPUT_TAGSET
68 68 if (NOT INPUT_TAGSET)
69   - set (INPUT_TAGSET ${PROJECT_SOURCE_DIR}/input/sgjp-morfeusz.tagset)
  69 + set (INPUT_TAGSET ${PROJECT_SOURCE_DIR}/input/morfeusz-sgjp.tagset)
70 70 endif ()
71 71  
72 72 # SEGMENT_RULES_FILE
... ...
fsabuilder/morfeusz_builder
... ... @@ -258,11 +258,11 @@ def main(opts):
258 258 if __name__ == '__main__':
259 259 import os
260 260 opts = _parseOptions()
261   - try:
262   - main(opts)
263   - except Exception as ex:
264   - print >> sys.stderr, u'Building dictionary file failed:', unicode(ex).encode('utf8'), 'type of error:', type(ex)
265   - sys.exit(1)
266   - finally:
267   - pass
  261 + #~ try:
  262 + main(opts)
  263 + #~ except Exception as ex:
  264 + #~ print >> sys.stderr, u'Building dictionary file failed:', unicode(ex).encode('utf8'), 'type of error:', type(ex)
  265 + #~ sys.exit(1)
  266 + #~ finally:
  267 + #~ pass
268 268  
... ...
fsabuilder/morfeuszbuilder/fsa/serializer.py
... ... @@ -42,7 +42,7 @@ class Serializer(object):
42 42  
43 43 # get the Morfeusz file format version that is being encoded
44 44 def getVersion(self):
45   - return 18
  45 + return 19
46 46  
47 47 def serialize2CppFile(self, fname, isGenerator, headerFilename="data/default_fsa.hpp"):
48 48 res = []
... ... @@ -113,11 +113,13 @@ class Serializer(object):
113 113 def serializeQualifiersMap(self):
114 114 res = bytearray()
115 115 res.extend(htons(len(self.qualifiersMap)))
116   - for qualifiers, n in sorted(self.qualifiersMap.iteritems(), key=lambda (qs, n): n):
117   - res.append(len(qualifiers))
118   - for q in qualifiers:
119   - res.extend(q.encode('utf8'))
120   - res.append(0)
  116 + label2labelId = dict([ (u'|'.join(qualifiers), n) for qualifiers, n in sorted(self.qualifiersMap.iteritems(), key=lambda (qs, n): n) ])
  117 + res.extend(self._serializeTags(label2labelId))
  118 + #~ for qualifiers, n in sorted(self.qualifiersMap.iteritems(), key=lambda (qs, n): n):
  119 + #~ res.append(len(qualifiers))
  120 + #~ for q in qualifiers:
  121 + #~ res.extend(q.encode('utf8'))
  122 + #~ res.append(0)
121 123 return res
122 124  
123 125 def serializePrologue(self):
... ...
fsabuilder/morfeuszbuilder/tagset/tagset.py
... ... @@ -23,14 +23,15 @@ class Tagset(object):
23 23 def _doInit(self, filename, encoding):
24 24 addingTo = None
25 25 with codecs.open(filename, 'r', encoding) as f:
26   - for line in f:
27   - line = line.strip('\n')
  26 + for linenum, line in enumerate(f, start=1):
  27 + line = line.strip('\n\r')
28 28 if line == u'[TAGS]':
29 29 addingTo = Tagset.TAGS
30 30 elif line == u'[NAMES]':
31 31 addingTo = Tagset.NAMES
32 32 elif line and not line.startswith(u'#'):
33   - assert addingTo in [Tagset.TAGS, Tagset.NAMES]
  33 + if not addingTo in [Tagset.TAGS, Tagset.NAMES]:
  34 + raise FSABuilderException('"%s" - text outside [TAGS] section in tagset file line %d' % (line, linenum))
34 35 res = {Tagset.TAGS: self._tag2tagnum,
35 36 Tagset.NAMES: self._name2namenum}[addingTo]
36 37 tagNum = line.split(Tagset.SEP)[0]
... ...
input/morfeusz-sgjp.tagset 0 → 100644
  1 +#!TAGSET-ID pl.sgjp.morfeusz-0.5.0
  2 +
  3 +[TAGS]
  4 +# special: unknown word (ignotum):
  5 +0 ign
  6 +# special: space/blank:
  7 +1 sp
  8 +# NOUNS
  9 +694 subst:sg:nom:m1
  10 +695 subst:sg:nom:m2
  11 +696 subst:sg:nom:m3
  12 +697 subst:sg:nom:n1
  13 +698 subst:sg:nom:n2
  14 +693 subst:sg:nom:f
  15 +676 subst:sg:gen:m1
  16 +677 subst:sg:gen:m2
  17 +678 subst:sg:gen:m3
  18 +679 subst:sg:gen:n1
  19 +680 subst:sg:gen:n2
  20 +675 subst:sg:gen:f
  21 +670 subst:sg:dat:m1
  22 +671 subst:sg:dat:m2
  23 +672 subst:sg:dat:m3
  24 +673 subst:sg:dat:n1
  25 +674 subst:sg:dat:n2
  26 +669 subst:sg:dat:f
  27 +664 subst:sg:acc:m1
  28 +665 subst:sg:acc:m2
  29 +666 subst:sg:acc:m3
  30 +667 subst:sg:acc:n1
  31 +668 subst:sg:acc:n2
  32 +663 subst:sg:acc:f
  33 +682 subst:sg:inst:m1
  34 +683 subst:sg:inst:m2
  35 +684 subst:sg:inst:m3
  36 +685 subst:sg:inst:n1
  37 +686 subst:sg:inst:n2
  38 +681 subst:sg:inst:f
  39 +688 subst:sg:loc:m1
  40 +689 subst:sg:loc:m2
  41 +690 subst:sg:loc:m3
  42 +691 subst:sg:loc:n1
  43 +692 subst:sg:loc:n2
  44 +687 subst:sg:loc:f
  45 +700 subst:sg:voc:m1
  46 +701 subst:sg:voc:m2
  47 +702 subst:sg:voc:m3
  48 +703 subst:sg:voc:n1
  49 +704 subst:sg:voc:n2
  50 +699 subst:sg:voc:f
  51 +646 subst:pl:nom:m1
  52 +647 subst:pl:nom:m2
  53 +648 subst:pl:nom:m3
  54 +649 subst:pl:nom:n1
  55 +650 subst:pl:nom:n2
  56 +651 subst:pl:nom:p1
  57 +652 subst:pl:nom:p2
  58 +653 subst:pl:nom:p3
  59 +645 subst:pl:nom:f
  60 +619 subst:pl:gen:m1
  61 +620 subst:pl:gen:m2
  62 +621 subst:pl:gen:m3
  63 +622 subst:pl:gen:n1
  64 +623 subst:pl:gen:n2
  65 +624 subst:pl:gen:p1
  66 +625 subst:pl:gen:p2
  67 +626 subst:pl:gen:p3
  68 +618 subst:pl:gen:f
  69 +610 subst:pl:dat:m1
  70 +611 subst:pl:dat:m2
  71 +612 subst:pl:dat:m3
  72 +613 subst:pl:dat:n1
  73 +614 subst:pl:dat:n2
  74 +615 subst:pl:dat:p1
  75 +616 subst:pl:dat:p2
  76 +617 subst:pl:dat:p3
  77 +609 subst:pl:dat:f
  78 +601 subst:pl:acc:m1
  79 +602 subst:pl:acc:m2
  80 +603 subst:pl:acc:m3
  81 +604 subst:pl:acc:n1
  82 +605 subst:pl:acc:n2
  83 +606 subst:pl:acc:p1
  84 +607 subst:pl:acc:p2
  85 +608 subst:pl:acc:p3
  86 +600 subst:pl:acc:f
  87 +628 subst:pl:inst:m1
  88 +629 subst:pl:inst:m2
  89 +630 subst:pl:inst:m3
  90 +631 subst:pl:inst:n1
  91 +632 subst:pl:inst:n2
  92 +633 subst:pl:inst:p1
  93 +634 subst:pl:inst:p2
  94 +635 subst:pl:inst:p3
  95 +627 subst:pl:inst:f
  96 +637 subst:pl:loc:m1
  97 +638 subst:pl:loc:m2
  98 +639 subst:pl:loc:m3
  99 +640 subst:pl:loc:n1
  100 +641 subst:pl:loc:n2
  101 +642 subst:pl:loc:p1
  102 +643 subst:pl:loc:p2
  103 +644 subst:pl:loc:p3
  104 +636 subst:pl:loc:f
  105 +654 subst:pl:voc:f
  106 +655 subst:pl:voc:m1
  107 +656 subst:pl:voc:m2
  108 +657 subst:pl:voc:m3
  109 +658 subst:pl:voc:n1
  110 +659 subst:pl:voc:n2
  111 +660 subst:pl:voc:p1
  112 +661 subst:pl:voc:p2
  113 +662 subst:pl:voc:p3
  114 +# depreciative nominal flexeme:
  115 +149 depr:pl:nom:m2
  116 +150 depr:pl:voc:m2
  117 +# nominal compounds forming form:
  118 +599 substa
  119 +# PERSONAL PRONOUNS
  120 +443 ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:akc
  121 +444 ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:nakc
  122 +445 ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:akc
  123 +446 ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:nakc
  124 +447 ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:akc
  125 +448 ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:nakc
  126 +449 ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:akc
  127 +450 ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:nakc
  128 +451 ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:akc
  129 +452 ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:nakc
  130 +453 ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:akc
  131 +454 ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:nakc
  132 +455 ppron12:sg:inst:m1.m2.m3.f.n1.n2:pri
  133 +456 ppron12:sg:inst:m1.m2.m3.f.n1.n2:sec
  134 +457 ppron12:sg:loc:m1.m2.m3.f.n1.n2:pri
  135 +458 ppron12:sg:loc:m1.m2.m3.f.n1.n2:sec
  136 +459 ppron12:sg:nom:m1.m2.m3.f.n1.n2:pri
  137 +460 ppron12:sg:nom:m1.m2.m3.f.n1.n2:sec
  138 +461 ppron12:sg:voc:m1.m2.m3.f.n1.n2:sec
  139 +429 ppron12:pl:acc:_:pri
  140 +430 ppron12:pl:acc:_:sec
  141 +431 ppron12:pl:dat:_:pri
  142 +432 ppron12:pl:dat:_:sec
  143 +433 ppron12:pl:gen:_:pri
  144 +434 ppron12:pl:gen:_:sec
  145 +435 ppron12:pl:inst:_:pri
  146 +436 ppron12:pl:inst:_:sec
  147 +437 ppron12:pl:loc:_:pri
  148 +438 ppron12:pl:loc:_:sec
  149 +439 ppron12:pl:nom:_:pri
  150 +440 ppron12:pl:nom:_:sec
  151 +441 ppron12:pl:voc:_:pri
  152 +442 ppron12:pl:voc:_:sec
  153 +474 ppron3:sg:acc:f:ter:_:npraep
  154 +475 ppron3:sg:acc:f:ter:_:praep
  155 +476 ppron3:sg:acc:m1.m2.m3:ter:akc:npraep
  156 +477 ppron3:sg:acc:m1.m2.m3:ter:akc:praep
  157 +478 ppron3:sg:acc:m1.m2.m3:ter:nakc:npraep
  158 +479 ppron3:sg:acc:m1.m2.m3:ter:nakc:praep
  159 +480 ppron3:sg:acc:n1.n2:ter:_:npraep
  160 +481 ppron3:sg:acc:n1.n2:ter:_:praep
  161 +482 ppron3:sg:dat:f:ter:_:npraep
  162 +483 ppron3:sg:dat:f:ter:_:praep
  163 +484 ppron3:sg:dat:m1.m2.m3:ter:akc:npraep
  164 +485 ppron3:sg:dat:m1.m2.m3:ter:nakc:npraep
  165 +486 ppron3:sg:dat:m1.m2.m3:ter:_:praep
  166 +487 ppron3:sg:dat:n1.n2:ter:akc:npraep
  167 +488 ppron3:sg:dat:n1.n2:ter:nakc:npraep
  168 +489 ppron3:sg:dat:n1.n2:ter:_:praep
  169 +490 ppron3:sg:gen.acc:m1.m2.m3:ter:nakc:praep
  170 +491 ppron3:sg:gen:f:ter:_:npraep
  171 +492 ppron3:sg:gen:f:ter:_:praep
  172 +493 ppron3:sg:gen:m1.m2.m3:ter:akc:npraep
  173 +494 ppron3:sg:gen:m1.m2.m3:ter:akc:praep
  174 +495 ppron3:sg:gen:m1.m2.m3:ter:nakc:npraep
  175 +496 ppron3:sg:gen:m1.m2.m3:ter:nakc:praep
  176 +497 ppron3:sg:gen:n1.n2:ter:akc:npraep
  177 +498 ppron3:sg:gen:n1.n2:ter:nakc:npraep
  178 +499 ppron3:sg:gen:n1.n2:ter:_:praep
  179 +500 ppron3:sg:inst:f:ter:_:praep
  180 +501 ppron3:sg:inst:m1.m2.m3:ter:_:_
  181 +502 ppron3:sg:inst:n1.n2:ter:_:_
  182 +503 ppron3:sg:loc:f:ter:_:_
  183 +504 ppron3:sg:loc:m1.m2.m3:ter:_:_
  184 +505 ppron3:sg:loc:n1.n2:ter:_:_
  185 +506 ppron3:sg:nom:f:ter:_:_
  186 +507 ppron3:sg:nom:m1.m2.m3:ter:_:_
  187 +508 ppron3:sg:nom:n1.n2:ter:_:_
  188 +462 ppron3:pl:acc:m1.p1:ter:_:npraep
  189 +463 ppron3:pl:acc:m1.p1:ter:_:praep
  190 +464 ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:npraep
  191 +465 ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:praep
  192 +466 ppron3:pl:dat:_:ter:_:npraep
  193 +467 ppron3:pl:dat:_:ter:_:praep
  194 +468 ppron3:pl:gen:_:ter:_:npraep
  195 +469 ppron3:pl:gen:_:ter:_:praep
  196 +470 ppron3:pl:inst:_:ter:_:_
  197 +471 ppron3:pl:loc:_:ter:_:_
  198 +472 ppron3:pl:nom:m1.p1:ter:_:_
  199 +473 ppron3:pl:nom:m2.m3.f.n1.n2.p2.p3:ter:_:_
  200 +# PRONOUN ‘SIEBIE’
  201 +594 siebie:acc
  202 +595 siebie:dat
  203 +596 siebie:gen
  204 +597 siebie:inst
  205 +598 siebie:loc
  206 +# ADJECTIVES
  207 +5 adj:pl:acc:m1.p1:com
  208 +6 adj:pl:acc:m1.p1:pos
  209 +7 adj:pl:acc:m1.p1:sup
  210 +8 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:com
  211 +9 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos
  212 +10 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:sup
  213 +11 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:com
  214 +12 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos
  215 +13 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:sup
  216 +14 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:com
  217 +15 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos
  218 +16 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:sup
  219 +17 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:com
  220 +18 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos
  221 +19 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:sup
  222 +20 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:com
  223 +21 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos
  224 +22 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:sup
  225 +23 adj:pl:nom:m1.p1:pos
  226 +24 adj:pl:nom:m2.m3.f.n1.n2.p2.p3:pos
  227 +25 adj:pl:nom.voc:m1.p1:com
  228 +26 adj:pl:nom.voc:m1.p1:pos
  229 +27 adj:pl:nom.voc:m1.p1:sup
  230 +28 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:com
  231 +29 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos
  232 +30 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:sup
  233 +31 adj:sg:acc:f:com
  234 +32 adj:sg:acc:f:pos
  235 +33 adj:sg:acc:f:sup
  236 +34 adj:sg:acc:m1.m2:com
  237 +35 adj:sg:acc:m1.m2:pos
  238 +36 adj:sg:acc:m1.m2:sup
  239 +37 adj:sg:acc:m3:com
  240 +38 adj:sg:acc:m3:pos
  241 +39 adj:sg:acc:m3:sup
  242 +40 adj:sg:acc:n1.n2:com
  243 +41 adj:sg:acc:n1.n2:pos
  244 +42 adj:sg:acc:n1.n2:sup
  245 +43 adj:sg:dat:f:com
  246 +44 adj:sg:dat:f:pos
  247 +45 adj:sg:dat:f:sup
  248 +46 adj:sg:dat:m1.m2.m3.n1.n2:com
  249 +47 adj:sg:dat:m1.m2.m3.n1.n2:pos
  250 +48 adj:sg:dat:m1.m2.m3.n1.n2:sup
  251 +49 adj:sg:gen:f:com
  252 +50 adj:sg:gen:f:pos
  253 +51 adj:sg:gen:f:sup
  254 +52 adj:sg:gen:m1.m2.m3.n1.n2:com
  255 +53 adj:sg:gen:m1.m2.m3.n1.n2:pos
  256 +54 adj:sg:gen:m1.m2.m3.n1.n2:sup
  257 +55 adj:sg:inst:f:com
  258 +56 adj:sg:inst:f:pos
  259 +57 adj:sg:inst:f:sup
  260 +58 adj:sg:inst:m1.m2.m3.n1.n2:com
  261 +59 adj:sg:inst:m1.m2.m3.n1.n2:pos
  262 +60 adj:sg:inst:m1.m2.m3.n1.n2:sup
  263 +61 adj:sg:loc:f:com
  264 +62 adj:sg:loc:f:pos
  265 +63 adj:sg:loc:f:sup
  266 +64 adj:sg:loc:m1.m2.m3.n1.n2:com
  267 +65 adj:sg:loc:m1.m2.m3.n1.n2:pos
  268 +66 adj:sg:loc:m1.m2.m3.n1.n2:sup
  269 +67 adj:sg:nom:f:pos
  270 +68 adj:sg:nom:m1.m2.m3:pos
  271 +69 adj:sg:nom:n1.n2:pos
  272 +70 adj:sg:nom.voc:f:com
  273 +71 adj:sg:nom.voc:f:pos
  274 +72 adj:sg:nom.voc:f:sup
  275 +73 adj:sg:nom.voc:m1.m2.m3:com
  276 +74 adj:sg:nom.voc:m1.m2.m3:pos
  277 +75 adj:sg:nom.voc:m1.m2.m3:sup
  278 +76 adj:sg:nom.voc:n1.n2:com
  279 +77 adj:sg:nom.voc:n1.n2:pos
  280 +78 adj:sg:nom.voc:n1.n2:sup
  281 +# adjectival compounds forming form:
  282 +2 adja
  283 +# predicative adjective:
  284 +3 adjc
  285 +# post-prepositional adjective:
  286 +4 adjp
  287 +# VERBS
  288 +# finitive (present/future) flexeme:
  289 +153 fin:pl:pri:imperf
  290 +154 fin:pl:pri:imperf.perf
  291 +155 fin:pl:pri:perf
  292 +156 fin:pl:sec:imperf
  293 +157 fin:pl:sec:imperf.perf
  294 +158 fin:pl:sec:perf
  295 +159 fin:pl:ter:imperf
  296 +160 fin:pl:ter:imperf.perf
  297 +161 fin:pl:ter:perf
  298 +162 fin:sg:pri:imperf
  299 +163 fin:sg:pri:imperf.perf
  300 +164 fin:sg:pri:perf
  301 +165 fin:sg:sec:imperf
  302 +166 fin:sg:sec:imperf.perf
  303 +167 fin:sg:sec:perf
  304 +168 fin:sg:ter:imperf
  305 +169 fin:sg:ter:imperf.perf
  306 +170 fin:sg:ter:perf
  307 +# past flexeme:
  308 +# praet=split (unsued otherwise):
  309 +509 praet:pl:m1.p1:imperf
  310 +510 praet:pl:m1.p1:imperf.perf
  311 +511 praet:pl:m1.p1:perf
  312 +521 praet:pl:m2.m3.f.n1.n2.p2.p3:imperf
  313 +522 praet:pl:m2.m3.f.n1.n2.p2.p3:imperf.perf
  314 +523 praet:pl:m2.m3.f.n1.n2.p2.p3:perf
  315 +533 praet:sg:f:imperf
  316 +534 praet:sg:f:imperf.perf
  317 +535 praet:sg:f:perf
  318 +545 praet:sg:m1.m2.m3:imperf
  319 +546 praet:sg:m1.m2.m3:imperf:agl
  320 +547 praet:sg:m1.m2.m3:imperf:nagl
  321 +548 praet:sg:m1.m2.m3:imperf.perf
  322 +549 praet:sg:m1.m2.m3:perf
  323 +550 praet:sg:m1.m2.m3:perf:agl
  324 +551 praet:sg:m1.m2.m3:perf:nagl
  325 +561 praet:sg:n1.n2:imperf
  326 +562 praet:sg:n1.n2:imperf.perf
  327 +563 praet:sg:n1.n2:perf
  328 +# praet=composite (unsued otherwise):
  329 +512 praet:pl:m1.p1:pri:imperf
  330 +513 praet:pl:m1.p1:pri:imperf.perf
  331 +514 praet:pl:m1.p1:pri:perf
  332 +515 praet:pl:m1.p1:sec:imperf
  333 +516 praet:pl:m1.p1:sec:imperf.perf
  334 +517 praet:pl:m1.p1:sec:perf
  335 +518 praet:pl:m1.p1:ter:imperf
  336 +519 praet:pl:m1.p1:ter:imperf.perf
  337 +520 praet:pl:m1.p1:ter:perf
  338 +524 praet:pl:m2.m3.f.n1.n2.p2.p3:pri:imperf
  339 +525 praet:pl:m2.m3.f.n1.n2.p2.p3:pri:imperf.perf
  340 +526 praet:pl:m2.m3.f.n1.n2.p2.p3:pri:perf
  341 +527 praet:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf
  342 +528 praet:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf.perf
  343 +529 praet:pl:m2.m3.f.n1.n2.p2.p3:sec:perf
  344 +530 praet:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf
  345 +531 praet:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf.perf
  346 +532 praet:pl:m2.m3.f.n1.n2.p2.p3:ter:perf
  347 +536 praet:sg:f:pri:imperf
  348 +537 praet:sg:f:pri:imperf.perf
  349 +538 praet:sg:f:pri:perf
  350 +539 praet:sg:f:sec:imperf
  351 +540 praet:sg:f:sec:imperf.perf
  352 +541 praet:sg:f:sec:perf
  353 +542 praet:sg:f:ter:imperf
  354 +543 praet:sg:f:ter:imperf.perf
  355 +544 praet:sg:f:ter:perf
  356 +552 praet:sg:m1.m2.m3:pri:imperf
  357 +553 praet:sg:m1.m2.m3:pri:imperf.perf
  358 +554 praet:sg:m1.m2.m3:pri:perf
  359 +555 praet:sg:m1.m2.m3:sec:imperf
  360 +556 praet:sg:m1.m2.m3:sec:imperf.perf
  361 +557 praet:sg:m1.m2.m3:sec:perf
  362 +558 praet:sg:m1.m2.m3:ter:imperf
  363 +559 praet:sg:m1.m2.m3:ter:imperf.perf
  364 +560 praet:sg:m1.m2.m3:ter:perf
  365 +564 praet:sg:n1.n2:pri:imperf
  366 +565 praet:sg:n1.n2:pri:imperf.perf
  367 +566 praet:sg:n1.n2:pri:perf
  368 +567 praet:sg:n1.n2:sec:imperf
  369 +568 praet:sg:n1.n2:sec:imperf.perf
  370 +569 praet:sg:n1.n2:sec:perf
  371 +570 praet:sg:n1.n2:ter:imperf
  372 +571 praet:sg:n1.n2:ter:imperf.perf
  373 +572 praet:sg:n1.n2:ter:perf
  374 +# conditional mood (used only with praet=composite)
  375 +100 cond:pl:m1.p1:pri:imperf
  376 +101 cond:pl:m1.p1:pri:imperf.perf
  377 +102 cond:pl:m1.p1:pri:perf
  378 +103 cond:pl:m1.p1:sec:imperf
  379 +104 cond:pl:m1.p1:sec:imperf.perf
  380 +105 cond:pl:m1.p1:sec:perf
  381 +106 cond:pl:m1.p1:ter:imperf
  382 +107 cond:pl:m1.p1:ter:imperf.perf
  383 +108 cond:pl:m1.p1:ter:perf
  384 +109 cond:pl:m2.m3.f.n1.n2.p2.p3:pri:imperf
  385 +110 cond:pl:m2.m3.f.n1.n2.p2.p3:pri:imperf.perf
  386 +111 cond:pl:m2.m3.f.n1.n2.p2.p3:pri:perf
  387 +112 cond:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf
  388 +113 cond:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf.perf
  389 +114 cond:pl:m2.m3.f.n1.n2.p2.p3:sec:perf
  390 +115 cond:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf
  391 +116 cond:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf.perf
  392 +117 cond:pl:m2.m3.f.n1.n2.p2.p3:ter:perf
  393 +118 cond:sg:f:pri:imperf
  394 +119 cond:sg:f:pri:imperf.perf
  395 +120 cond:sg:f:pri:perf
  396 +121 cond:sg:f:sec:imperf
  397 +122 cond:sg:f:sec:imperf.perf
  398 +123 cond:sg:f:sec:perf
  399 +124 cond:sg:f:ter:imperf
  400 +125 cond:sg:f:ter:imperf.perf
  401 +126 cond:sg:f:ter:perf
  402 +127 cond:sg:m1.m2.m3:pri:imperf
  403 +128 cond:sg:m1.m2.m3:pri:imperf.perf
  404 +129 cond:sg:m1.m2.m3:pri:perf
  405 +130 cond:sg:m1.m2.m3:sec:imperf
  406 +131 cond:sg:m1.m2.m3:sec:imperf.perf
  407 +132 cond:sg:m1.m2.m3:sec:perf
  408 +133 cond:sg:m1.m2.m3:ter:imperf
  409 +134 cond:sg:m1.m2.m3:ter:imperf.perf
  410 +135 cond:sg:m1.m2.m3:ter:perf
  411 +136 cond:sg:n1.n2:imperf
  412 +137 cond:sg:n1.n2:imperf.perf
  413 +138 cond:sg:n1.n2:perf
  414 +139 cond:sg:n1.n2:pri:imperf
  415 +140 cond:sg:n1.n2:pri:imperf.perf
  416 +141 cond:sg:n1.n2:pri:perf
  417 +142 cond:sg:n1.n2:sec:imperf
  418 +143 cond:sg:n1.n2:sec:imperf.perf
  419 +144 cond:sg:n1.n2:sec:perf
  420 +145 cond:sg:n1.n2:ter:imperf
  421 +146 cond:sg:n1.n2:ter:imperf.perf
  422 +147 cond:sg:n1.n2:ter:perf
  423 +# impersonal flexeme:
  424 +219 imps:imperf
  425 +220 imps:imperf.perf
  426 +221 imps:perf
  427 +# imperative flexeme:
  428 +222 impt:pl:pri:imperf
  429 +223 impt:pl:pri:imperf.perf
  430 +224 impt:pl:pri:perf
  431 +225 impt:pl:sec:imperf
  432 +226 impt:pl:sec:imperf.perf
  433 +227 impt:pl:sec:perf
  434 +228 impt:sg:sec:imperf
  435 +229 impt:sg:sec:imperf.perf
  436 +230 impt:sg:sec:perf
  437 +# infinitival flexeme:
  438 +231 inf:imperf
  439 +232 inf:imperf.perf
  440 +233 inf:perf
  441 +# agglutinative forms of ‘być’:
  442 +83 aglt:pl:pri:imperf:nwok
  443 +84 aglt:pl:pri:imperf:wok
  444 +85 aglt:pl:sec:imperf:nwok
  445 +86 aglt:pl:sec:imperf:wok
  446 +87 aglt:sg:pri:imperf:nwok
  447 +88 aglt:sg:pri:imperf:wok
  448 +89 aglt:sg:sec:imperf:nwok
  449 +90 aglt:sg:sec:imperf:wok
  450 +# future forms of ‘być’:
  451 +91 bedzie:pl:pri:imperf
  452 +92 bedzie:pl:sec:imperf
  453 +93 bedzie:pl:ter:imperf
  454 +94 bedzie:sg:pri:imperf
  455 +95 bedzie:sg:sec:imperf
  456 +96 bedzie:sg:ter:imperf
  457 +# ‘winien’ type verbs:
  458 +705 winien:pl:m1.p1:imperf
  459 +706 winien:pl:m1.p1:pri:imperf
  460 +707 winien:pl:m1.p1:sec:imperf
  461 +708 winien:pl:m1.p1:ter:imperf
  462 +709 winien:pl:m2.m3.f.n1.n2.p2.p3:imperf
  463 +710 winien:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf
  464 +711 winien:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf
  465 +712 winien:sg:f:imperf
  466 +713 winien:sg:f:pri:imperf
  467 +714 winien:sg:f:sec:imperf
  468 +715 winien:sg:f:ter:imperf
  469 +716 winien:sg:m1.m2.m3:imperf
  470 +717 winien:sg:m1.m2.m3:pri:imperf
  471 +718 winien:sg:m1.m2.m3:sec:imperf
  472 +719 winien:sg:m1.m2.m3:ter:imperf
  473 +720 winien:sg:n1.n2:imperf
  474 +721 winien:sg:n1.n2:pri:imperf
  475 +722 winien:sg:n1.n2:sec:imperf
  476 +723 winien:sg:n1.n2:ter:imperf
  477 +# predicative flexeme:
  478 +573 pred
  479 +# gerunds
  480 +171 ger:pl:dat.loc:n2:imperf:aff
  481 +172 ger:pl:dat.loc:n2:imperf:neg
  482 +173 ger:pl:dat.loc:n2:imperf.perf:aff
  483 +174 ger:pl:dat.loc:n2:imperf.perf:neg
  484 +175 ger:pl:dat.loc:n2:perf:aff
  485 +176 ger:pl:dat.loc:n2:perf:neg
  486 +177 ger:pl:gen:n2:imperf:aff
  487 +178 ger:pl:gen:n2:imperf:neg
  488 +179 ger:pl:gen:n2:imperf.perf:aff
  489 +180 ger:pl:gen:n2:imperf.perf:neg
  490 +181 ger:pl:gen:n2:perf:aff
  491 +182 ger:pl:gen:n2:perf:neg
  492 +183 ger:pl:inst:n2:imperf:aff
  493 +184 ger:pl:inst:n2:imperf:neg
  494 +185 ger:pl:inst:n2:imperf.perf:aff
  495 +186 ger:pl:inst:n2:imperf.perf:neg
  496 +187 ger:pl:inst:n2:perf:aff
  497 +188 ger:pl:inst:n2:perf:neg
  498 +189 ger:pl:nom.acc:n2:imperf:aff
  499 +190 ger:pl:nom.acc:n2:imperf:neg
  500 +191 ger:pl:nom.acc:n2:imperf.perf:aff
  501 +192 ger:pl:nom.acc:n2:imperf.perf:neg
  502 +193 ger:pl:nom.acc:n2:perf:aff
  503 +194 ger:pl:nom.acc:n2:perf:neg
  504 +195 ger:sg:dat.loc:n2:imperf:aff
  505 +196 ger:sg:dat.loc:n2:imperf:neg
  506 +197 ger:sg:dat.loc:n2:imperf.perf:aff
  507 +198 ger:sg:dat.loc:n2:imperf.perf:neg
  508 +199 ger:sg:dat.loc:n2:perf:aff
  509 +200 ger:sg:dat.loc:n2:perf:neg
  510 +201 ger:sg:gen:n2:imperf:aff
  511 +202 ger:sg:gen:n2:imperf:neg
  512 +203 ger:sg:gen:n2:imperf.perf:aff
  513 +204 ger:sg:gen:n2:imperf.perf:neg
  514 +205 ger:sg:gen:n2:perf:aff
  515 +206 ger:sg:gen:n2:perf:neg
  516 +207 ger:sg:inst:n2:imperf:aff
  517 +208 ger:sg:inst:n2:imperf:neg
  518 +209 ger:sg:inst:n2:imperf.perf:aff
  519 +210 ger:sg:inst:n2:imperf.perf:neg
  520 +211 ger:sg:inst:n2:perf:aff
  521 +212 ger:sg:inst:n2:perf:neg
  522 +213 ger:sg:nom.acc:n2:imperf:aff
  523 +214 ger:sg:nom.acc:n2:imperf:neg
  524 +215 ger:sg:nom.acc:n2:imperf.perf:aff
  525 +216 ger:sg:nom.acc:n2:imperf.perf:neg
  526 +217 ger:sg:nom.acc:n2:perf:aff
  527 +218 ger:sg:nom.acc:n2:perf:neg
  528 +# participles
  529 +# adverbial participles:
  530 +332 pcon:imperf
  531 +331 pant:perf
  532 +# adjectival active participle:
  533 +267 pact:pl:acc:m1.p1:imperf:aff
  534 +268 pact:pl:acc:m1.p1:imperf:neg
  535 +269 pact:pl:acc:m1.p1:imperf.perf:aff
  536 +270 pact:pl:acc:m1.p1:imperf.perf:neg
  537 +271 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff
  538 +272 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg
  539 +273 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff
  540 +274 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg
  541 +275 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff
  542 +276 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg
  543 +277 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff
  544 +278 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg
  545 +279 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff
  546 +280 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg
  547 +281 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff
  548 +282 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg
  549 +283 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff
  550 +284 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg
  551 +285 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff
  552 +286 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg
  553 +287 pact:pl:nom.voc:m1.p1:imperf:aff
  554 +288 pact:pl:nom.voc:m1.p1:imperf:neg
  555 +289 pact:pl:nom.voc:m1.p1:imperf.perf:aff
  556 +290 pact:pl:nom.voc:m1.p1:imperf.perf:neg
  557 +291 pact:sg:acc.inst:f:imperf:aff
  558 +292 pact:sg:acc.inst:f:imperf:neg
  559 +293 pact:sg:acc.inst:f:imperf.perf:aff
  560 +294 pact:sg:acc.inst:f:imperf.perf:neg
  561 +295 pact:sg:acc:m1.m2:imperf:aff
  562 +296 pact:sg:acc:m1.m2:imperf:neg
  563 +297 pact:sg:acc:m1.m2:imperf.perf:aff
  564 +298 pact:sg:acc:m1.m2:imperf.perf:neg
  565 +299 pact:sg:acc:m3:imperf:aff
  566 +300 pact:sg:acc:m3:imperf:neg
  567 +301 pact:sg:acc:m3:imperf.perf:aff
  568 +302 pact:sg:acc:m3:imperf.perf:neg
  569 +303 pact:sg:dat:m1.m2.m3.n1.n2:imperf:aff
  570 +304 pact:sg:dat:m1.m2.m3.n1.n2:imperf:neg
  571 +305 pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff
  572 +306 pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg
  573 +307 pact:sg:gen.dat.loc:f:imperf:aff
  574 +308 pact:sg:gen.dat.loc:f:imperf:neg
  575 +309 pact:sg:gen.dat.loc:f:imperf.perf:aff
  576 +310 pact:sg:gen.dat.loc:f:imperf.perf:neg
  577 +311 pact:sg:gen:m1.m2.m3.n1.n2:imperf:aff
  578 +312 pact:sg:gen:m1.m2.m3.n1.n2:imperf:neg
  579 +313 pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff
  580 +314 pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg
  581 +315 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff
  582 +316 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg
  583 +317 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff
  584 +318 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg
  585 +319 pact:sg:nom.acc.voc:n1.n2:imperf:aff
  586 +320 pact:sg:nom.acc.voc:n1.n2:imperf:neg
  587 +321 pact:sg:nom.acc.voc:n1.n2:imperf.perf:aff
  588 +322 pact:sg:nom.acc.voc:n1.n2:imperf.perf:neg
  589 +323 pact:sg:nom.voc:f:imperf:aff
  590 +324 pact:sg:nom.voc:f:imperf:neg
  591 +325 pact:sg:nom.voc:f:imperf.perf:aff
  592 +326 pact:sg:nom.voc:f:imperf.perf:neg
  593 +327 pact:sg:nom.voc:m1.m2.m3:imperf:aff
  594 +328 pact:sg:nom.voc:m1.m2.m3:imperf:neg
  595 +329 pact:sg:nom.voc:m1.m2.m3:imperf.perf:aff
  596 +330 pact:sg:nom.voc:m1.m2.m3:imperf.perf:neg
  597 +# adjectival passive participle:
  598 +333 ppas:pl:acc:m1.p1:imperf:aff
  599 +334 ppas:pl:acc:m1.p1:imperf:neg
  600 +335 ppas:pl:acc:m1.p1:imperf.perf:aff
  601 +336 ppas:pl:acc:m1.p1:imperf.perf:neg
  602 +337 ppas:pl:acc:m1.p1:perf:aff
  603 +338 ppas:pl:acc:m1.p1:perf:neg
  604 +339 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff
  605 +340 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg
  606 +341 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff
  607 +342 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg
  608 +343 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff
  609 +344 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg
  610 +345 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff
  611 +346 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg
  612 +347 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff
  613 +348 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg
  614 +349 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff
  615 +350 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg
  616 +351 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff
  617 +352 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg
  618 +353 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff
  619 +354 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg
  620 +355 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff
  621 +356 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg
  622 +357 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff
  623 +358 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg
  624 +359 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff
  625 +360 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg
  626 +361 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff
  627 +362 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:neg
  628 +363 ppas:pl:nom.voc:m1.p1:imperf:aff
  629 +364 ppas:pl:nom.voc:m1.p1:imperf:neg
  630 +365 ppas:pl:nom.voc:m1.p1:imperf.perf:aff
  631 +366 ppas:pl:nom.voc:m1.p1:imperf.perf:neg
  632 +367 ppas:pl:nom.voc:m1.p1:perf:aff
  633 +368 ppas:pl:nom.voc:m1.p1:perf:neg
  634 +369 ppas:sg:acc.inst:f:imperf:aff
  635 +370 ppas:sg:acc.inst:f:imperf:neg
  636 +371 ppas:sg:acc.inst:f:imperf.perf:aff
  637 +372 ppas:sg:acc.inst:f:imperf.perf:neg
  638 +373 ppas:sg:acc.inst:f:perf:aff
  639 +374 ppas:sg:acc.inst:f:perf:neg
  640 +375 ppas:sg:acc:m1.m2:imperf:aff
  641 +376 ppas:sg:acc:m1.m2:imperf:neg
  642 +377 ppas:sg:acc:m1.m2:imperf.perf:aff
  643 +378 ppas:sg:acc:m1.m2:imperf.perf:neg
  644 +379 ppas:sg:acc:m1.m2:perf:aff
  645 +380 ppas:sg:acc:m1.m2:perf:neg
  646 +381 ppas:sg:acc:m3:imperf:aff
  647 +382 ppas:sg:acc:m3:imperf:neg
  648 +383 ppas:sg:acc:m3:imperf.perf:aff
  649 +384 ppas:sg:acc:m3:imperf.perf:neg
  650 +385 ppas:sg:acc:m3:perf:aff
  651 +386 ppas:sg:acc:m3:perf:neg
  652 +387 ppas:sg:dat:m1.m2.m3.n1.n2:imperf:aff
  653 +388 ppas:sg:dat:m1.m2.m3.n1.n2:imperf:neg
  654 +389 ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff
  655 +390 ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg
  656 +391 ppas:sg:dat:m1.m2.m3.n1.n2:perf:aff
  657 +392 ppas:sg:dat:m1.m2.m3.n1.n2:perf:neg
  658 +393 ppas:sg:gen.dat.loc:f:imperf:aff
  659 +394 ppas:sg:gen.dat.loc:f:imperf:neg
  660 +395 ppas:sg:gen.dat.loc:f:imperf.perf:aff
  661 +396 ppas:sg:gen.dat.loc:f:imperf.perf:neg
  662 +397 ppas:sg:gen.dat.loc:f:perf:aff
  663 +398 ppas:sg:gen.dat.loc:f:perf:neg
  664 +399 ppas:sg:gen:m1.m2.m3.n1.n2:imperf:aff
  665 +400 ppas:sg:gen:m1.m2.m3.n1.n2:imperf:neg
  666 +401 ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff
  667 +402 ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg
  668 +403 ppas:sg:gen:m1.m2.m3.n1.n2:perf:aff
  669 +404 ppas:sg:gen:m1.m2.m3.n1.n2:perf:neg
  670 +405 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff
  671 +406 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg
  672 +407 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff
  673 +408 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg
  674 +409 ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:aff
  675 +410 ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:neg
  676 +411 ppas:sg:nom.acc.voc:n1.n2:imperf:aff
  677 +412 ppas:sg:nom.acc.voc:n1.n2:imperf:neg
  678 +413 ppas:sg:nom.acc.voc:n1.n2:imperf.perf:aff
  679 +414 ppas:sg:nom.acc.voc:n1.n2:imperf.perf:neg
  680 +415 ppas:sg:nom.acc.voc:n1.n2:perf:aff
  681 +416 ppas:sg:nom.acc.voc:n1.n2:perf:neg
  682 +417 ppas:sg:nom.voc:f:imperf:aff
  683 +418 ppas:sg:nom.voc:f:imperf:neg
  684 +419 ppas:sg:nom.voc:f:imperf.perf:aff
  685 +420 ppas:sg:nom.voc:f:imperf.perf:neg
  686 +421 ppas:sg:nom.voc:f:perf:aff
  687 +422 ppas:sg:nom.voc:f:perf:neg
  688 +423 ppas:sg:nom.voc:m1.m2.m3:imperf:aff
  689 +424 ppas:sg:nom.voc:m1.m2.m3:imperf:neg
  690 +425 ppas:sg:nom.voc:m1.m2.m3:imperf.perf:aff
  691 +426 ppas:sg:nom.voc:m1.m2.m3:imperf.perf:neg
  692 +427 ppas:sg:nom.voc:m1.m2.m3:perf:aff
  693 +428 ppas:sg:nom.voc:m1.m2.m3:perf:neg
  694 +# NUMERALS
  695 +239 num:pl:acc:m1:rec
  696 +240 num:pl:dat.loc:n1.p1.p2:congr.rec
  697 +241 num:pl:dat:m1.m2.m3.n2.f:congr
  698 +242 num:pl:gen.dat.inst.loc:m1.m2.m3.f.n1.n2.p1.p2:congr
  699 +243 num:pl:gen.dat.inst.loc:m1.m2.m3.f.n2:congr
  700 +244 num:pl:gen.dat.loc:m1.m2.m3.n2.f:congr
  701 +245 num:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2:congr
  702 +246 num:pl:gen.loc:m1.m2.m3.n2.f:congr
  703 +247 num:pl:gen:n1.p1.p2:rec
  704 +248 num:pl:inst:f:congr
  705 +249 num:pl:inst:m1.m2.m3.f.n1.n2.p1.p2:congr
  706 +250 num:pl:inst:m1.m2.m3.f.n2:congr
  707 +251 num:pl:inst:m1.m2.m3.n2:congr
  708 +252 num:pl:inst:m1.m2.m3.n2.f:congr
  709 +253 num:pl:inst:n1.p1.p2:rec
  710 +254 num:pl:nom.acc:m1.m2.m3.f.n1.n2.p1.p2:rec
  711 +255 num:pl:nom.acc.voc:f:congr
  712 +256 num:pl:nom.acc.voc:m1:rec
  713 +257 num:pl:nom.acc.voc:m2.m3.f.n1.n2.p1.p2:rec
  714 +258 num:pl:nom.acc.voc:m2.m3.f.n2:rec
  715 +259 num:pl:nom.acc.voc:m2.m3.n2:congr
  716 +260 num:pl:nom.acc.voc:m2.m3.n2.f:congr
  717 +261 num:pl:nom.acc.voc:n1.p1.p2:rec
  718 +262 num:pl:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.f.n1.n2.p1.p2:rec
  719 +263 num:pl:nom.voc:m1:congr
  720 +264 num:pl:nom.voc:m1:rec
  721 +265 num:sg:nom.gen.dat.inst.acc.loc.voc:f:rec
  722 +266 num:sg:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.n1.n2:rec
  723 +# numeral compounds forming form:
  724 +238 num:comp
  725 +# PREPOSITIONS
  726 +578 prep:acc
  727 +579 prep:acc:nwok
  728 +580 prep:acc:wok
  729 +581 prep:dat
  730 +582 prep:gen
  731 +583 prep:gen:nwok
  732 +584 prep:gen:wok
  733 +585 prep:inst
  734 +586 prep:inst:nwok
  735 +587 prep:inst:wok
  736 +588 prep:loc
  737 +589 prep:loc:nwok
  738 +590 prep:loc:wok
  739 +591 prep:nom
  740 +# ADVERBS
  741 +79 adv
  742 +80 adv:com
  743 +81 adv:pos
  744 +82 adv:sup
  745 +# OTHER
  746 +# kubliki (particles):
  747 +592 qub
  748 +# conjunctions:
  749 +148 conj
  750 +# complementizers:
  751 +99 comp
  752 +# interjections:
  753 +234 interj
  754 +# burkinostki (bound words):
  755 +98 burk
  756 +# abbreviations:
  757 +97 brev:pun
  758 +97 brev:npun
  759 +# punctuation:
  760 +235 interp
  761 +# digits:
  762 +151 dig
  763 +# Roman digits:
  764 +593 romandig
  765 +# emoticons:
  766 +152 emoticon
  767 +# prefixes:
  768 +574 prefa
  769 +575 prefppas
  770 +576 prefs
  771 +577 prefv
  772 +# (special)
  773 +236 naj
  774 +237 nie
  775 +
  776 +[NAMES]
  777 +0
  778 +1 astr.
  779 +2 budowla
  780 +3 członek rodu
  781 +4 człon nazwiska
  782 +5 człon nazwiska (herb)
  783 +6 człon nazwy firmy
  784 +7 firma
  785 +8 geograficzna
  786 +9 imię
  787 +10 instytucja
  788 +11 język programowania
  789 +12 krój pisma
  790 +13 marka
  791 +14 nazwisko
  792 +15 oprogramowanie
  793 +16 organizacja
  794 +17 patronimicum
  795 +18 pospolita
  796 +19 przydomek
  797 +20 pseudonim
  798 +21 sufiks nazwiska
  799 +22 środek lokomocji
  800 +23 święto
  801 +24 tytuł
  802 +25 własna
... ...
morfeusz/CMakeLists.txt
... ... @@ -32,8 +32,7 @@ set(SRC_FILES
32 32 ${ANALYZER_DICTIONARY_CPP}
33 33 ${GENERATOR_DICTIONARY_CPP}
34 34 Environment.cpp
35   - DefaultTagset.cpp
36   - Qualifiers.cpp
  35 + IdResolverImpl.cpp
37 36 fsa/const.cpp
38 37 MorphInterpretation.cpp
39 38 Morfeusz.cpp
... ...
morfeusz/DefaultTagset.cpp deleted
1   -
2   -#include <string>
3   -#include <vector>
4   -#include "DefaultTagset.hpp"
5   -#include "fsa/const.hpp"
6   -#include "utils.hpp"
7   -#include "deserialization/deserializationUtils.hpp"
8   -
9   -using namespace std;
10   -
11   -namespace morfeusz {
12   -
13   - DefaultTagset::DefaultTagset(const unsigned char* ptr, const CharsetConverter* charsetConverter)
14   - : tags(),
15   - names(),
16   - charsetConverter(charsetConverter) {
17   - uint32_t fsaSize = readInt32Const(ptr + FSA_DATA_SIZE_OFFSET);
18   - const unsigned char* currPtr = ptr + FSA_DATA_OFFSET + fsaSize + 4;
19   - readTags(currPtr, this->tags);
20   - readTags(currPtr, this->names);
21   - setCharsetConverter(charsetConverter);
22   - }
23   -
24   - const string& DefaultTagset::getTag(const int tagNum) const {
25   - return this->tags.at(tagNum);
26   -// if (charsetConverter == &UTF8CharsetConverter::getInstance()) {
27   -// return this->tags.at(tagNum);
28   -// }
29   -// else {
30   -// return charsetConverter->fromUTF8(this->tags.at(tagNum));
31   -// }
32   - }
33   -
34   - const string& DefaultTagset::getName(const int nameNum) const {
35   - return this->names.at(nameNum);
36   -// if (charsetConverter == &UTF8CharsetConverter::getInstance()) {
37   -// return this->names.at(nameNum);
38   -// }
39   -// else {
40   -// return charsetConverter->fromUTF8(this->names.at(nameNum));
41   -// }
42   - }
43   -
44   - size_t DefaultTagset::getTagsSize() const {
45   - return this->tags.size();
46   - }
47   -
48   - size_t DefaultTagset::getNamesSize() const {
49   - return this->names.size();
50   - }
51   -
52   - // FIXME - probably should not convert whole tagset on every setCharsetConverter method invocation.
53   - void DefaultTagset::setCharsetConverter(const CharsetConverter* charsetConverter) {
54   -
55   - for (unsigned int i = 0; i < tags.size(); i++) {
56   - tags[i] = charsetConverter->fromUTF8(
57   - this->charsetConverter->toUTF8(tags[i]));
58   - }
59   -
60   - for (unsigned int j = 0; j < names.size(); j++) {
61   - names[j] = charsetConverter->fromUTF8(
62   - this->charsetConverter->toUTF8(names[j]));
63   - }
64   -
65   - this->charsetConverter = charsetConverter;
66   - }
67   -
68   -}
morfeusz/DefaultTagset.hpp deleted
1   -/*
2   - * File: tagset.hpp
3   - * Author: mlenart
4   - *
5   - * Created on 12 listopad 2013, 14:09
6   - */
7   -
8   -#ifndef DEFAULTTAGSET_HPP
9   -#define DEFAULTTAGSET_HPP
10   -
11   -#include <string>
12   -#include <vector>
13   -#include <map>
14   -#include "morfeusz2.h"
15   -#include "charset/CharsetConverter.hpp"
16   -
17   -namespace morfeusz {
18   -
19   - /**
20   - * Represents a tagset
21   - */
22   - class DefaultTagset : public Tagset<std::string> {
23   - public:
24   - /**
25   - * Constructs a tagset from binary data.
26   - *
27   - * @param fsaData - pointer to the beginning of automaton data.
28   - */
29   - DefaultTagset(const unsigned char* fsaData, const CharsetConverter* charsetConverter);
30   -
31   - const std::string& getTag(const int tagNum) const;
32   -
33   - const std::string& getName(const int nameNum) const;
34   -
35   - size_t getTagsSize() const;
36   -
37   - size_t getNamesSize() const;
38   -
39   - void setCharsetConverter(const CharsetConverter* charsetConverter);
40   -
41   - private:
42   - std::vector<std::string> tags;
43   - std::vector<std::string> names;
44   - const CharsetConverter* charsetConverter;
45   - };
46   -
47   -}
48   -
49   -#endif /* DEFAULTTAGSET_HPP */
50   -
morfeusz/Environment.cpp
... ... @@ -40,7 +40,7 @@ Environment::Environment(
40 40 : currentCharsetConverter(getCharsetConverter(charset)),
41 41 caseConverter(),
42 42 tagset(fsaFileStartPtr, currentCharsetConverter),
43   -qualifiers(fsaFileStartPtr),
  43 +//qualifiers(fsaFileStartPtr),
44 44 fsaFileStartPtr(fsaFileStartPtr),
45 45 fsa(FSAType::getFSA(fsaFileStartPtr, initializeDeserializer(processorType))),
46 46 separatorsList(getSeparatorsList(fsaFileStartPtr)),
... ... @@ -94,12 +94,12 @@ const CaseConverter&amp; Environment::getCaseConverter() const {
94 94 return this->caseConverter;
95 95 }
96 96  
97   -void Environment::setTagset(DefaultTagset& tagset) {
  97 +void Environment::setTagset(IdResolverImpl& tagset) {
98 98 this->tagset = tagset;
99 99 this->tagset.setCharsetConverter(currentCharsetConverter);
100 100 }
101 101  
102   -const DefaultTagset& Environment::getTagset() const {
  102 +const IdResolverImpl& Environment::getTagset() const {
103 103 return this->tagset;
104 104 }
105 105  
... ... @@ -115,8 +115,8 @@ void Environment::setDictionaryFile(const std::string&amp; filename) {
115 115 this->segrulesFSAsMap = createSegrulesFSAsMap(this->fsaFileStartPtr);
116 116 this->currSegrulesFSA = getDefaultSegrulesFSA(this->segrulesFSAsMap, this->fsaFileStartPtr);
117 117 this->isFromFile = true;
118   - this->tagset = DefaultTagset(fsaFileStartPtr, currentCharsetConverter);
119   - this->qualifiers = Qualifiers(fsaFileStartPtr);
  118 + this->tagset = IdResolverImpl(fsaFileStartPtr, currentCharsetConverter);
  119 +// this->qualifiers = Qualifiers(fsaFileStartPtr);
120 120 }
121 121  
122 122 const SegrulesFSA& Environment::getCurrentSegrulesFSA() const {
... ... @@ -156,9 +156,9 @@ const CasePatternHelper&amp; Environment::getCasePatternHelper() const {
156 156 return *this->casePatternHelper;
157 157 }
158 158  
159   -const Qualifiers& Environment::getQualifiersHelper() const {
160   - return this->qualifiers;
161   -}
  159 +//const Qualifiers& Environment::getQualifiersHelper() const {
  160 +// return this->qualifiers;
  161 +//}
162 162  
163 163 bool Environment::isSeparator(uint32_t codepoint) const {
164 164 return binary_search(
... ...
morfeusz/Environment.hpp
... ... @@ -15,7 +15,7 @@
15 15 #include "fsa/fsa.hpp"
16 16 #include "segrules/segrules.hpp"
17 17 #include "const.hpp"
18   -#include "DefaultTagset.hpp"
  18 +#include "IdResolverImpl.hpp"
19 19 #include "InterpsGroup.hpp"
20 20 #include "case/CasePatternHelper.hpp"
21 21 #include "Qualifiers.hpp"
... ... @@ -82,14 +82,14 @@ public:
82 82 *
83 83 * @param tagset
84 84 */
85   - void setTagset(DefaultTagset& tagset);
  85 + void setTagset(IdResolverImpl& tagset);
86 86  
87 87 /**
88 88 * Gets currently used tagset.
89 89 *
90 90 * @return
91 91 */
92   - const DefaultTagset& getTagset() const;
  92 + const IdResolverImpl& getTagset() const;
93 93  
94 94 /**
95 95 * Sets binary dictionary file used by this environment.
... ... @@ -143,7 +143,7 @@ public:
143 143 * Return current qualifiers helper.
144 144 * @return
145 145 */
146   - const Qualifiers& getQualifiersHelper() const;
  146 +// const Qualifiers& getQualifiersHelper() const;
147 147  
148 148 /**
149 149 * Returns true iff given codepoint denotes a separator char for ign handling.
... ... @@ -156,8 +156,8 @@ public:
156 156 private:
157 157 const CharsetConverter* currentCharsetConverter;
158 158 const CaseConverter caseConverter;
159   - DefaultTagset tagset;
160   - Qualifiers qualifiers;
  159 + IdResolverImpl tagset;
  160 +// Qualifiers qualifiers;
161 161  
162 162 const unsigned char* fsaFileStartPtr;
163 163 const FSAType* fsa;
... ...
morfeusz/IdResolverImpl.cpp 0 → 100644
  1 +
  2 +#include "IdResolverImpl.hpp"
  3 +#include "fsa/const.hpp"
  4 +#include "utils.hpp"
  5 +#include "const.hpp"
  6 +#include "deserialization/deserializationUtils.hpp"
  7 +#include "morfeusz2.h"
  8 +
  9 +using namespace std;
  10 +
  11 +namespace morfeusz {
  12 +
  13 + inline static void readTags(const unsigned char*& currPtr, std::vector<std::string>& tags) {
  14 + tags.clear();
  15 + tags.resize(65536);
  16 + uint16_t tagsNum = readInt16(currPtr);
  17 + for (unsigned int i = 0; i < tagsNum; i++) {
  18 + unsigned int tagNum = readInt16(currPtr);
  19 + tags[tagNum] = readString(currPtr);
  20 + }
  21 + }
  22 +
  23 + inline static void createReverseMapping(IdResolverImpl::IdStringMapping& mapping) {
  24 + mapping.string2Id.clear();
  25 + for (unsigned int i = 0; i < mapping.id2String.size(); i++) {
  26 + mapping.string2Id[mapping.id2String[i]] = i;
  27 + }
  28 + }
  29 +
  30 + template <class T>
  31 + inline static const T& getFromMap(map<string, T> string2T, const string& key, const char* errMsg) {
  32 + if (string2T.count(key) != 0) {
  33 +// map<string, T>::const_iterator it;
  34 +// it = string2T.find(key);
  35 + return string2T.find(key)->second;
  36 + }
  37 + else {
  38 + throw MorfeuszException(string(errMsg) + ": " + key);
  39 + }
  40 + }
  41 +
  42 + inline static void convertCharset(const CharsetConverter* charsetConverter, IdResolverImpl::IdStringMapping& mapping) {
  43 + for (unsigned int i = 0; i < mapping.id2String.size(); i++) {
  44 + mapping.id2String[i] = charsetConverter->fromUTF8(
  45 + charsetConverter->toUTF8(mapping.id2String[i]));
  46 + }
  47 + createReverseMapping(mapping);
  48 + }
  49 +
  50 + IdResolverImpl::IdResolverImpl(const unsigned char* ptr, const CharsetConverter* charsetConverter)
  51 + : tags(),
  52 + names(),
  53 + labels(),
  54 + labelsAsSets(),
  55 + charsetConverter(charsetConverter) {
  56 + uint32_t fsaSize = readInt32Const(ptr + FSA_DATA_SIZE_OFFSET);
  57 + const unsigned char* currPtr = ptr + FSA_DATA_OFFSET + fsaSize + 4;
  58 +
  59 + readTags(currPtr, this->tags.id2String);
  60 + createReverseMapping(this->tags);
  61 +
  62 + readTags(currPtr, this->names.id2String);
  63 + createReverseMapping(this->names);
  64 +
  65 + readTags(currPtr, this->labels.id2String);
  66 + createReverseMapping(this->labels);
  67 + for (unsigned int i = 0; i < this->labels.id2String.size(); i++) {
  68 + vector<string> labelsVector = split(this->labels.id2String[i], LABELS_SEPARATOR);
  69 + this->labelsAsSets.push_back(set<string>(labelsVector.begin(), labelsVector.end()));
  70 + }
  71 +
  72 + setCharsetConverter(charsetConverter);
  73 + }
  74 +
  75 + // FIXME - probably should not convert whole tagset on every setCharsetConverter method invocation.
  76 +
  77 + void IdResolverImpl::setCharsetConverter(const CharsetConverter* charsetConverter) {
  78 + convertCharset(charsetConverter, this->tags);
  79 + convertCharset(charsetConverter, this->names);
  80 + convertCharset(charsetConverter, this->labels);
  81 +// for (unsigned int i = 0; i < tags.id2String.size(); i++) {
  82 +// tags.id2String[i] = charsetConverter->fromUTF8(
  83 +// this->charsetConverter->toUTF8(tags[i]));
  84 +// }
  85 +//
  86 +// for (unsigned int j = 0; j < names.id2String.size(); j++) {
  87 +// names[j] = charsetConverter->fromUTF8(
  88 +// this->charsetConverter->toUTF8(names[j]));
  89 +// }
  90 +
  91 + this->charsetConverter = charsetConverter;
  92 + }
  93 +
  94 + const string& IdResolverImpl::getTag(const int tagId) const {
  95 + return this->tags.id2String.at(tagId);
  96 + }
  97 +
  98 + int IdResolverImpl::getTagId(const std::string& tag) const {
  99 + return getFromMap(this->tags.string2Id, tag, "Invalid tag");
  100 + }
  101 +
  102 + const string& IdResolverImpl::getName(const int nameId) const {
  103 + return this->names.id2String.at(nameId);
  104 + }
  105 +
  106 + int IdResolverImpl::getNameId(const std::string& name) const {
  107 + return getFromMap(this->names.string2Id, name, "Invalid name");
  108 + }
  109 +
  110 + const string& IdResolverImpl::getLabelsAsString(int labelsId) const {
  111 + return this->labels.id2String.at(labelsId);
  112 + }
  113 +
  114 + const set<string>& IdResolverImpl::getLabels(int labelsId) const {
  115 + return this->labelsAsSets.at(labelsId);
  116 + }
  117 +
  118 + int IdResolverImpl::getLabelsId(const string& labelsStr) const {
  119 + return getFromMap(this->labels.string2Id, labelsStr, "Invalid labels string");
  120 + }
  121 +
  122 + size_t IdResolverImpl::getTagsCount() const {
  123 + return this->tags.id2String.size();
  124 + }
  125 +
  126 + size_t IdResolverImpl::getNamesCount() const {
  127 + return this->names.id2String.size();
  128 + }
  129 +
  130 + size_t IdResolverImpl::getLabelsCount() const {
  131 + return this->labels.id2String.size();
  132 + }
  133 +}
... ...
morfeusz/IdResolverImpl.hpp 0 → 100644
  1 +/*
  2 + * File: tagset.hpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on 12 listopad 2013, 14:09
  6 + */
  7 +
  8 +#ifndef DEFAULTTAGSET_HPP
  9 +#define DEFAULTTAGSET_HPP
  10 +
  11 +#include <string>
  12 +#include <vector>
  13 +#include <map>
  14 +#include "morfeusz2.h"
  15 +#include "charset/CharsetConverter.hpp"
  16 +
  17 +namespace morfeusz {
  18 +
  19 + class IdResolverImpl : public IdResolver {
  20 + public:
  21 +
  22 + IdResolverImpl(const unsigned char* ptr, const CharsetConverter* charsetConverter);
  23 +
  24 + void setCharsetConverter(const CharsetConverter* charsetConverter);
  25 +
  26 + const std::string& getTag(const int tagId) const;
  27 + int getTagId(const std::string& tag) const;
  28 +
  29 + const std::string& getName(const int nameId) const;
  30 + int getNameId(const std::string& name) const;
  31 +
  32 + const std::string& getLabelsAsString(int labelsId) const;
  33 + const std::set<std::string>& getLabels(int labelsId) const;
  34 + int getLabelsId(const std::string& labelsStr) const;
  35 +
  36 + size_t getTagsCount() const;
  37 + size_t getNamesCount() const;
  38 + size_t getLabelsCount() const;
  39 +
  40 + virtual ~IdResolverImpl() {
  41 + }
  42 +
  43 + struct IdStringMapping {
  44 + std::vector<std::string> id2String;
  45 + std::map<std::string, int> string2Id;
  46 + };
  47 +
  48 + private:
  49 +
  50 + IdStringMapping tags;
  51 + IdStringMapping names;
  52 + IdStringMapping labels;
  53 + std::vector< std::set<std::string> > labelsAsSets;
  54 +//
  55 +// std::vector<std::string> tags;
  56 +// std::vector<std::string> names;
  57 + const CharsetConverter* charsetConverter;
  58 + };
  59 +
  60 +}
  61 +
  62 +#endif /* DEFAULTTAGSET_HPP */
  63 +
... ...
morfeusz/MorfeuszInternal.cpp
... ... @@ -196,7 +196,7 @@ namespace morfeusz {
196 196 vector<MorphInterpretation>& results,
197 197 bool insideIgnHandler) const {
198 198 if (handleWhitespacesAtBeginning(env, reader, startNodeNum, results)) {
199   - startNodeNum = results.back().getEndNode();
  199 + startNodeNum = results.back().endNode;
200 200 }
201 201  
202 202 if (reader.isAtEnd()) {
... ... @@ -361,8 +361,7 @@ namespace morfeusz {
361 361 int startNodeNum,
362 362 std::vector<MorphInterpretation>& results) const {
363 363 string orth(reader.readWhitespacesChunk());
364   - MorphInterpretation mi(MorphInterpretation::createWhitespace(startNodeNum, startNodeNum + 1, orth, this->getDefaultAnalyzerTagset()));
365   - results.push_back(mi);
  364 + results.push_back(MorphInterpretation::createWhitespace(startNodeNum, startNodeNum + 1, orth));
366 365 }
367 366  
368 367 void MorfeuszInternal::handleIgnChunk(
... ... @@ -390,7 +389,7 @@ namespace morfeusz {
390 389 if (nonSeparatorInputEnd != prevInput) {
391 390 // there are non-separators + separators
392 391  
393   - int startNode = results.empty() ? startNodeNum : results.back().getEndNode();
  392 + int startNode = results.empty() ? startNodeNum : results.back().endNode;
394 393 // process part before separators
395 394 TextReader newReader1(prevInput, nonSeparatorInputEnd, env);
396 395 notMatchingCaseSegs = 0;
... ... @@ -400,7 +399,7 @@ namespace morfeusz {
400 399 if (currInput == chunkBounds.wordEndPtr) {
401 400 currInput = chunkBounds.chunkEndPtr;
402 401 }
403   - startNode = results.empty() ? startNodeNum : results.back().getEndNode();
  402 + startNode = results.empty() ? startNodeNum : results.back().endNode;
404 403 TextReader newReader2(nonSeparatorInputEnd, currInput, env);
405 404 this->processOneWord(env, newReader2, startNode, results, true);
406 405 }
... ... @@ -409,7 +408,7 @@ namespace morfeusz {
409 408 if (currInput == chunkBounds.wordEndPtr) {
410 409 currInput = chunkBounds.chunkEndPtr;
411 410 }
412   - int startNode = results.empty() ? startNodeNum : results.back().getEndNode();
  411 + int startNode = results.empty() ? startNodeNum : results.back().endNode;
413 412 TextReader newReader3(prevInput, currInput, env);
414 413 notMatchingCaseSegs = 0;
415 414 this->processOneWord(env, newReader3, startNode, results, true);
... ... @@ -421,7 +420,7 @@ namespace morfeusz {
421 420 if (!env.isSeparator(codepoint)) {
422 421 if (separatorFound) {
423 422 // process part after separators
424   - int startNode = results.empty() ? startNodeNum : results.back().getEndNode();
  423 + int startNode = results.empty() ? startNodeNum : results.back().endNode;
425 424 TextReader newReader4(prevInput, chunkBounds.chunkEndPtr, env);
426 425 this->processOneWord(env, newReader4, startNode, results, true);
427 426 }
... ... @@ -438,16 +437,15 @@ namespace morfeusz {
438 437 std::vector<MorphInterpretation>& results) const {
439 438 string orth(chunkBounds.chunkStartPtr, chunkBounds.chunkEndPtr);
440 439 string lemma(chunkBounds.wordStartPtr, chunkBounds.wordEndPtr);
441   - MorphInterpretation interp(MorphInterpretation::createIgn(startNodeNum, startNodeNum + 1, orth, lemma, env.getTagset()));
442   - results.push_back(interp);
  440 + results.push_back(MorphInterpretation::createIgn(startNodeNum, startNodeNum + 1, orth, lemma));
443 441 }
444 442  
445   - void MorfeuszInternal::analyzeOneWord(
  443 + void MorfeuszInternal::analyseOneWord(
446 444 TextReader& reader,
447 445 vector<MorphInterpretation>& results) const {
448 446 this->processOneWord(this->analyzerEnv, reader, nextNodeNum, results);
449 447 if (!results.empty()) {
450   - nextNodeNum = results.back().getEndNode();
  448 + nextNodeNum = results.back().endNode;
451 449 }
452 450 }
453 451  
... ... @@ -457,14 +455,14 @@ namespace morfeusz {
457 455 }
458 456 }
459 457  
460   - ResultsIterator* MorfeuszInternal::analyze(const string& text) const {
  458 + ResultsIterator* MorfeuszInternal::analyse(const string& text) const {
461 459 adjustTokensCounter();
462 460 char* textCopy = new char[text.length() + 1];
463 461 strcpy(textCopy, text.c_str());
464 462 return new ResultsIteratorImpl(*this, textCopy, textCopy + text.length(), true);
465 463 }
466 464  
467   - ResultsIterator* MorfeuszInternal::analyzeWithCopy(const char* text) const {
  465 + ResultsIterator* MorfeuszInternal::analyseWithCopy(const char* text) const {
468 466 adjustTokensCounter();
469 467 long n = strlen(text);
470 468 char* textCopy = new char[n + 1];
... ... @@ -472,16 +470,16 @@ namespace morfeusz {
472 470 return new ResultsIteratorImpl(*this, textCopy, textCopy + n, true);
473 471 }
474 472  
475   - ResultsIterator* MorfeuszInternal::analyze(const char* text) const {
  473 + ResultsIterator* MorfeuszInternal::analyse(const char* text) const {
476 474 adjustTokensCounter();
477 475 return new ResultsIteratorImpl(*this, text, text + strlen(text), false);
478 476 }
479 477  
480   - void MorfeuszInternal::analyze(const string& text, vector<MorphInterpretation>& results) const {
  478 + void MorfeuszInternal::analyse(const string& text, vector<MorphInterpretation>& results) const {
481 479 adjustTokensCounter();
482 480 TextReader reader(text, this->analyzerEnv);
483 481 while (!reader.isAtEnd()) {
484   - analyzeOneWord(reader, results);
  482 + analyseOneWord(reader, results);
485 483 }
486 484 }
487 485  
... ... @@ -496,12 +494,12 @@ namespace morfeusz {
496 494 }
497 495 }
498 496  
499   - void MorfeuszInternal::generate(const std::string& lemma, int tagnum, vector<MorphInterpretation>& result) const {
  497 + void MorfeuszInternal::generate(const std::string& lemma, int tagId, vector<MorphInterpretation>& result) const {
500 498 vector<MorphInterpretation> partRes;
501 499 this->generate(lemma, partRes);
502 500 for (unsigned int i = 0; i < partRes.size(); i++) {
503 501 // XXX - someday it should be improved
504   - if (partRes[i].getTagnum() == tagnum) {
  502 + if (partRes[i].tagId == tagId) {
505 503 result.push_back(partRes[i]);
506 504 }
507 505 }
... ... @@ -524,7 +522,6 @@ namespace morfeusz {
524 522 }
525 523  
526 524 void MorfeuszInternal::setCaseHandling(CaseHandling caseHandling) {
527   - this->options.caseHandling = caseHandling;
528 525 this->analyzerEnv.setCaseSensitive(caseHandling != IGNORE_CASE);
529 526 }
530 527  
... ... @@ -541,12 +538,15 @@ namespace morfeusz {
541 538 this->options.debug = debug;
542 539 }
543 540  
544   - const Tagset<string>& MorfeuszInternal::getDefaultAnalyzerTagset() const {
  541 + const IdResolver& MorfeuszInternal::getDefaultAnalyzerTagset() const {
545 542 return this->generatorEnv.getTagset();
546 543 }
547 544  
548   - const Tagset<string>& MorfeuszInternal::getDefaultGeneratorTagset() const {
  545 + const IdResolver& MorfeuszInternal::getDefaultGeneratorTagset() const {
  546 + return this->analyzerEnv.getTagset();
  547 + }
  548 +
  549 + const IdResolver& MorfeuszInternal::getIdResolver() const {
549 550 return this->analyzerEnv.getTagset();
550 551 }
551   -
552 552 }
... ...
morfeusz/MorfeuszInternal.hpp
... ... @@ -60,15 +60,15 @@ namespace morfeusz {
60 60  
61 61 virtual ~MorfeuszInternal();
62 62  
63   - ResultsIterator* analyze(const std::string& text) const;
  63 + ResultsIterator* analyse(const std::string& text) const;
64 64  
65   - ResultsIterator* analyze(const char* text) const;
  65 + ResultsIterator* analyse(const char* text) const;
66 66  
67   - void analyze(const std::string& text, std::vector<MorphInterpretation>& result) const;
  67 + void analyse(const std::string& text, std::vector<MorphInterpretation>& result) const;
68 68  
69 69 void generate(const std::string& lemma, std::vector<MorphInterpretation>& result) const;
70 70  
71   - void generate(const std::string& lemma, int tagnum, std::vector<MorphInterpretation>& result) const;
  71 + void generate(const std::string& lemma, int tagId, std::vector<MorphInterpretation>& result) const;
72 72  
73 73 void setCharset(Charset encoding);
74 74  
... ... @@ -84,11 +84,13 @@ namespace morfeusz {
84 84  
85 85 void setDebug(bool debug);
86 86  
87   - const Tagset<std::string>& getDefaultAnalyzerTagset() const;
  87 + const IdResolver& getDefaultAnalyzerTagset() const;
88 88  
89   - const Tagset<std::string>& getDefaultGeneratorTagset() const;
  89 + const IdResolver& getDefaultGeneratorTagset() const;
90 90  
91   - ResultsIterator* analyzeWithCopy(const char* text) const;
  91 + ResultsIterator* analyseWithCopy(const char* text) const;
  92 +
  93 + const IdResolver& getIdResolver() const;
92 94  
93 95 friend class ResultsIteratorImpl;
94 96  
... ... @@ -101,7 +103,7 @@ namespace morfeusz {
101 103 std::vector<MorphInterpretation>& result,
102 104 bool insideIgnHandler = false) const;
103 105  
104   - void analyzeOneWord(
  106 + void analyseOneWord(
105 107 TextReader& reader,
106 108 std::vector<MorphInterpretation>& results) const;
107 109  
... ...
morfeusz/MorphInterpretation.cpp
... ... @@ -15,49 +15,33 @@ using namespace std;
15 15  
16 16 namespace morfeusz {
17 17  
18   - /**
19   - * used for ignotium and whitespace tags who don't have any qualifiers.
20   - */
21   - static vector<string> emptyQualifiers;
22   -
23   - MorphInterpretation::MorphInterpretation(
24   - int startNode,
25   - int endNode,
26   - const string& orth,
27   - const string& lemma,
28   - int tagnum,
29   - int namenum,
30   - const vector<string>* qualifiers,
31   - const Tagset<string>* tagset)
32   - : startNode(startNode),
33   - endNode(endNode),
34   - orth(orth),
35   - lemma(lemma),
36   - tagnum(tagnum),
37   - namenum(namenum),
38   - qualifiers(qualifiers),
39   - tagset(tagset) {
40   - }
41   -
42   - MorphInterpretation::MorphInterpretation()
43   - : startNode(),
44   - endNode(),
45   - orth(),
46   - lemma(),
47   - tagnum(),
48   - namenum(),
49   - qualifiers(&emptyQualifiers),
50   - tagset(NULL) {
51   -
52   - }
53   -
54   - MorphInterpretation MorphInterpretation::createIgn(int startNode, int endNode, const std::string& orth, const std::string& lemma, const Tagset<string>& tagset) {
55   - MorphInterpretation mi(startNode, endNode, orth, lemma, 0, 0, &emptyQualifiers, &tagset);
  18 + MorphInterpretation MorphInterpretation::createIgn(
  19 + int startNode, int endNode,
  20 + const std::string& orth,
  21 + const std::string& lemma) {
  22 + MorphInterpretation mi;
  23 + mi.startNode = startNode;
  24 + mi.endNode = endNode;
  25 + mi.orth = orth;
  26 + mi.lemma = lemma;
  27 + mi.tagId = 0;
  28 + mi.nameId = 0;
  29 + mi.labelsId = 0;
56 30 return mi;
57 31 }
58   -
59   - MorphInterpretation MorphInterpretation::createWhitespace(int startNode, int endNode, const std::string& orth, const Tagset<string>& tagset) {
60   - MorphInterpretation mi(startNode, endNode, orth, orth, 1, 0, &emptyQualifiers, &tagset);
  32 +
  33 + /**
  34 + * Creates new instance with "sp" tag (meaning: "this is a sequence of whitespaces")
  35 + */
  36 + MorphInterpretation MorphInterpretation::createWhitespace(int startNode, int endNode, const std::string& orth) {
  37 + MorphInterpretation mi;
  38 + mi.startNode = startNode;
  39 + mi.endNode = endNode;
  40 + mi.orth = orth;
  41 + mi.lemma = orth;
  42 + mi.tagId = 1;
  43 + mi.nameId = 0;
  44 + mi.labelsId = 0;
61 45 return mi;
62 46 }
63 47  
... ... @@ -77,17 +61,6 @@ namespace morfeusz {
77 61 && hasEnding(this->lemma, homonymId);
78 62 }
79 63  
80   - static inline string getQualifiersStr(const MorphInterpretation& mi) {
81   - string res;
82   - for (unsigned int i = 0; i < mi.getQualifiers().size(); i++) {
83   - res += mi.getQualifiers()[i];
84   - if (i + 1 < mi.getQualifiers().size()) {
85   - res += "|";
86   - }
87   - }
88   - return res;
89   - }
90   -
91 64 std::string MorphInterpretation::toString(bool includeNodeNumbers) const {
92 65 std::stringstream res;
93 66 if (includeNodeNumbers) {
... ... @@ -98,13 +71,13 @@ namespace morfeusz {
98 71 res << lemma;
99 72 res << ",";
100 73  
101   - res << getTag();
102   - if (!getName().empty()) {
103   - res << "," << getName();
104   - }
105   - if (!getQualifiers().empty()) {
106   - res << "," << getQualifiersStr(*this);
107   - }
  74 + // res << getTag();
  75 + // if (!getName().empty()) {
  76 + // res << "," << getName();
  77 + // }
  78 + // if (!getQualifiers().empty()) {
  79 + // res << "," << getQualifiersStr(*this);
  80 + // }
108 81 return res.str();
109 82 }
110 83  
... ...
morfeusz/ResultsIteratorImpl.cpp
... ... @@ -52,7 +52,7 @@ namespace morfeusz {
52 52 assert(bufferIterator == buffer.end());
53 53 buffer.resize(0);
54 54 if (!reader.isAtEnd()) {
55   - morfeusz.analyzeOneWord(reader, buffer);
  55 + morfeusz.analyseOneWord(reader, buffer);
56 56 }
57 57 bufferIterator = buffer.begin();
58 58 return bufferIterator != buffer.end();
... ...
morfeusz/c_api/ResultsManager.cpp
... ... @@ -15,8 +15,8 @@ namespace morfeusz {
15 15  
16 16 static const int initialSize = 1024;
17 17  
18   - ResultsManager::ResultsManager()
19   - : results(new InterpMorf[initialSize]()),
  18 + ResultsManager::ResultsManager(const Morfeusz* morfeusz)
  19 + : morfeusz(morfeusz), results(new InterpMorf[initialSize]()),
20 20 resultsArraySize(initialSize) {
21 21 }
22 22  
... ... @@ -45,11 +45,11 @@ namespace morfeusz {
45 45  
46 46 InterpMorf ResultsManager::convertOneResult(const MorphInterpretation& res) {
47 47 InterpMorf convertedRes;
48   - convertedRes.p = res.getStartNode();
49   - convertedRes.k = res.getEndNode();
50   - convertedRes.forma = const_cast<char*>(res.getOrth().c_str());
51   - convertedRes.haslo = const_cast<char*>(res.getLemma().c_str());
52   - convertedRes.interp = const_cast<char*>(res.getTag().c_str());
  48 + convertedRes.p = res.startNode;
  49 + convertedRes.k = res.endNode;
  50 + convertedRes.forma = const_cast<char*>(res.orth.c_str());
  51 + convertedRes.haslo = const_cast<char*>(res.lemma.c_str());
  52 + convertedRes.interp = const_cast<char*>(morfeusz->getIdResolver().getTag(res.tagId).c_str());
53 53 return convertedRes;
54 54 }
55 55 }
... ...
morfeusz/c_api/ResultsManager.hpp
... ... @@ -16,11 +16,11 @@ namespace morfeusz {
16 16  
17 17 class ResultsManager {
18 18 public:
19   - ResultsManager();
  19 + ResultsManager(const Morfeusz* morfeusz);
20 20 InterpMorf* convertResults(const std::vector<MorphInterpretation>& res);
21 21 virtual ~ResultsManager();
22 22 private:
23   -
  23 + const Morfeusz* morfeusz;
24 24 InterpMorf* results;
25 25 unsigned int resultsArraySize;
26 26  
... ...
morfeusz/cli/outputUtils.hpp
... ... @@ -13,14 +13,14 @@
13 13  
14 14 namespace morfeusz {
15 15  
16   -void printMorphResults(const std::vector<MorphInterpretation>& res, bool printNodeNumbers) {
  16 +void printMorphResults(const Morfeusz& morfeusz, const std::vector<MorphInterpretation>& res, bool printNodeNumbers) {
17 17 printf("[");
18 18 int prevStart = -1;
19 19 int prevEnd = -1;
20 20 for (unsigned int i = 0; i < res.size(); i++) {
21 21 const MorphInterpretation& mi = res[i];
22 22 if (prevStart != -1
23   - && (prevStart != mi.getStartNode() || prevEnd != mi.getEndNode())) {
  23 + && (prevStart != mi.startNode || prevEnd != mi.endNode)) {
24 24 printf("]\n[");
25 25 }
26 26 else if (prevStart != -1) {
... ... @@ -28,24 +28,16 @@ void printMorphResults(const std::vector&lt;MorphInterpretation&gt;&amp; res, bool printNo
28 28 }
29 29 // printf("%s", mi.toString(true).c_str());
30 30 if (printNodeNumbers) {
31   - printf("%d,%d,", mi.getStartNode(), mi.getEndNode());
  31 + printf("%d,%d,", mi.startNode, mi.endNode);
32 32 }
33   - printf("%s,%s,%s,%s,",
34   - mi.getOrth().c_str(),
35   - mi.getLemma().c_str(),
36   - mi.getTag().c_str(),
37   - mi.getName().empty() ? "_" : mi.getName().c_str());
38   - if (!mi.getQualifiers().empty()) {
39   - printf("%s", mi.getQualifiers()[0].c_str());
40   - for (unsigned int i = 1; i < mi.getQualifiers().size(); i++) {
41   - printf("|%s", mi.getQualifiers()[i].c_str());
42   - }
43   - }
44   - else {
45   - printf("_");
46   - }
47   - prevStart = mi.getStartNode();
48   - prevEnd = mi.getEndNode();
  33 + printf("%s,%s,%s,%s,%s",
  34 + mi.orth.c_str(),
  35 + mi.lemma.c_str(),
  36 + morfeusz.getIdResolver().getTag(mi.tagId).c_str(),
  37 + mi.nameId == 0 ? "_" : morfeusz.getIdResolver().getName(mi.nameId).c_str(),
  38 + mi.labelsId == 0 ? "_" : morfeusz.getIdResolver().getLabelsAsString(mi.labelsId).c_str());
  39 + prevStart = mi.startNode;
  40 + prevEnd = mi.endNode;
49 41 }
50 42 printf("]\n");
51 43 }
... ...
morfeusz/const.cpp
... ... @@ -9,5 +9,6 @@ extern const unsigned char SHIFT_ORTH_NODE = 1;
9 9  
10 10 extern const char HOMONYM_SEPARATOR = ':';
11 11  
  12 +extern const char LABELS_SEPARATOR = '|';
12 13  
13 14 }
14 15 \ No newline at end of file
... ...
morfeusz/const.hpp
... ... @@ -23,6 +23,8 @@ extern const unsigned char SHIFT_ORTH_NODE;
23 23  
24 24 extern const char HOMONYM_SEPARATOR;
25 25  
  26 +extern const char LABELS_SEPARATOR;
  27 +
26 28 }
27 29  
28 30 #endif /* CONST_HPP */
... ...
morfeusz/deserialization/deserializationUtils.hpp
... ... @@ -48,16 +48,6 @@ inline std::string readString(const unsigned char*&amp; currPtr) {
48 48 return res;
49 49 }
50 50  
51   -inline void readTags(const unsigned char*& currPtr, std::vector<std::string>& tags) {
52   - tags.clear();
53   - tags.resize(65536);
54   - uint16_t tagsNum = readInt16(currPtr);
55   - for (unsigned int i = 0; i < tagsNum; i++) {
56   - unsigned int tagNum = readInt16(currPtr);
57   - tags[tagNum] = readString(currPtr);
58   - }
59   -}
60   -
61 51 }
62 52  
63 53 #endif /* DESERIALIZATIONUTILS_HPP */
... ...
morfeusz/deserialization/morphInterps/EncodedInterpretation.hpp
... ... @@ -11,7 +11,7 @@
11 11 #include <string>
12 12 #include <vector>
13 13 #include <inttypes.h>
14   -#include "DefaultTagset.hpp"
  14 +#include "IdResolverImpl.hpp"
15 15  
16 16 namespace morfeusz {
17 17  
... ...
morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.cpp
... ... @@ -112,14 +112,24 @@ void InterpretedChunksDecoder4Analyzer::decodeMorphInterpretation(
112 112 string lemma(params.lemma4Prefixes);
113 113 lemma.reserve(lemma.size() + normalizedCodepoints.size());
114 114 this->decodeLemma(ei.value, params.chunk.codepointsNum, false, lemma);
115   - MorphInterpretation mi(
116   - params.startNode, params.endNode,
117   - params.orth, lemma,
118   - ei.tag,
119   - ei.nameClassifier,
120   - &env.getQualifiersHelper().getQualifiers(ei.qualifiers),
121   - &env.getTagset());
122   - out.push_back(mi);
  115 + size_t newIdx = out.size();
  116 + out.resize(newIdx + 1);
  117 + MorphInterpretation& newElem = out[newIdx];
  118 + newElem.startNode = params.startNode;
  119 + newElem.endNode = params.endNode;
  120 + newElem.orth = params.orth;
  121 + newElem.lemma = lemma;
  122 + newElem.tagId = ei.tag;
  123 + newElem.nameId = ei.nameClassifier;
  124 + newElem.labelsId = ei.qualifiers;
  125 +// MorphInterpretation mi(
  126 +// params.startNode, params.endNode,
  127 +// params.orth, lemma,
  128 +// ei.tag,
  129 +// ei.nameClassifier,
  130 +// &env.getQualifiersHelper().getQualifiers(ei.qualifiers),
  131 +// &env.getTagset());
  132 +// out.push_back(mi);
123 133 }
124 134 }
125 135  
... ...
morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Generator.cpp
... ... @@ -13,92 +13,102 @@ using namespace std;
13 13  
14 14 namespace morfeusz {
15 15  
16   -InterpretedChunksDecoder4Generator::InterpretedChunksDecoder4Generator(const Environment& env) : InterpretedChunksDecoder(env) {
17   -}
  16 + InterpretedChunksDecoder4Generator::InterpretedChunksDecoder4Generator(const Environment& env) : InterpretedChunksDecoder(env) {
  17 + }
18 18  
19   -void InterpretedChunksDecoder4Generator::decode(
20   - unsigned int startNode,
21   - unsigned int endNode,
22   - const InterpretedChunk& interpretedChunk,
23   - std::vector<MorphInterpretation>& out) const {
24   - string orthPrefix;
25   - string lemma;
26   -// convertPrefixes(interpretedChunk, orthPrefix, lemma);
27   - // lemma += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints);
28   - lemma.insert(lemma.end(), interpretedChunk.textStartPtr, interpretedChunk.textEndPtr);
29   - const unsigned char* currPtr = getInterpretationsPtr(interpretedChunk.interpsGroupPtr);
30   - while (currPtr < interpretedChunk.interpsEndPtr) {
31   - MorphInterpretation mi = this->decodeMorphInterpretation(startNode, endNode, orthPrefix, lemma, interpretedChunk, currPtr);
32   - // cerr << mi.toString(false) << endl;
33   - // cerr << "required='" << interpretedChunk.requiredHomonymId << "' morphInterp='" << mi.getHomonymId() << "'" << endl;
34   - if (interpretedChunk.requiredHomonymId.empty() || mi.hasHomonym(interpretedChunk.requiredHomonymId)) {
35   - out.push_back(mi);
  19 + void InterpretedChunksDecoder4Generator::decode(
  20 + unsigned int startNode,
  21 + unsigned int endNode,
  22 + const InterpretedChunk& interpretedChunk,
  23 + std::vector<MorphInterpretation>& out) const {
  24 + string orthPrefix;
  25 + string lemma;
  26 + // convertPrefixes(interpretedChunk, orthPrefix, lemma);
  27 + // lemma += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints);
  28 + lemma.insert(lemma.end(), interpretedChunk.textStartPtr, interpretedChunk.textEndPtr);
  29 + const unsigned char* currPtr = getInterpretationsPtr(interpretedChunk.interpsGroupPtr);
  30 + while (currPtr < interpretedChunk.interpsEndPtr) {
  31 + MorphInterpretation mi = this->decodeMorphInterpretation(startNode, endNode, orthPrefix, lemma, interpretedChunk, currPtr);
  32 + // cerr << mi.toString(false) << endl;
  33 + // cerr << "required='" << interpretedChunk.requiredHomonymId << "' morphInterp='" << mi.getHomonymId() << "'" << endl;
  34 + if (interpretedChunk.requiredHomonymId.empty() || mi.hasHomonym(interpretedChunk.requiredHomonymId)) {
  35 + out.push_back(mi);
  36 + }
36 37 }
37 38 }
38   -}
39 39  
40   -//void InterpretedChunksDecoder4Generator::convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orthPrefix, std::string& lemma) const {
41   -// for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) {
42   -// const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i];
43   -//// lemma.insert(lemma.end(), prefixChunk.textStartPtr, prefixChunk.textEndPtr);
44   -// const unsigned char* ptr = getInterpretationsPtr(interpretedChunk.interpsGroupPtr);
45   -// MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, orthPrefix, string(""), prefixChunk, ptr);
46   -//// orthPrefix += mi.getOrth();
47   -// }
48   -//}
  40 + //void InterpretedChunksDecoder4Generator::convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orthPrefix, std::string& lemma) const {
  41 + // for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) {
  42 + // const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i];
  43 + //// lemma.insert(lemma.end(), prefixChunk.textStartPtr, prefixChunk.textEndPtr);
  44 + // const unsigned char* ptr = getInterpretationsPtr(interpretedChunk.interpsGroupPtr);
  45 + // MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, orthPrefix, string(""), prefixChunk, ptr);
  46 + //// orthPrefix += mi.getOrth();
  47 + // }
  48 + //}
49 49  
50   -MorphInterpretation InterpretedChunksDecoder4Generator::decodeMorphInterpretation(
51   - unsigned int startNode, unsigned int endNode,
52   - const string& orthPrefix,
53   - const string& lemma,
54   - const InterpretedChunk& chunk,
55   - const unsigned char*& ptr) const {
56   - string orth = orthPrefix;
57   - EncodedInterpretation ei = this->deserializeInterp(ptr);
58   - codepoints.resize(0);
59   - const char* currPtr = chunk.textStartPtr;
60   - while (currPtr != chunk.textEndPtr) {
61   - uint32_t cp = env.getCharsetConverter().next(currPtr, chunk.textEndPtr);
62   - codepoints.push_back(cp);
63   - }
64   - this->decodeForm(codepoints, ei.value, orth);
65   - MorphInterpretation res(
66   - startNode, endNode,
67   - orth, ei.homonymId.empty() ? lemma : (lemma + HOMONYM_SEPARATOR + ei.homonymId),
68   - // ei.homonymId,
69   - ei.tag,
70   - ei.nameClassifier,
71   - &env.getQualifiersHelper().getQualifiers(ei.qualifiers),
72   - &env.getTagset());
73   - return res;
74   -}
  50 + MorphInterpretation InterpretedChunksDecoder4Generator::decodeMorphInterpretation(
  51 + unsigned int startNode, unsigned int endNode,
  52 + const string& orthPrefix,
  53 + const string& lemma,
  54 + const InterpretedChunk& chunk,
  55 + const unsigned char*& ptr) const {
  56 + string orth = orthPrefix;
  57 + EncodedInterpretation ei = this->deserializeInterp(ptr);
  58 + codepoints.resize(0);
  59 + const char* currPtr = chunk.textStartPtr;
  60 + while (currPtr != chunk.textEndPtr) {
  61 + uint32_t cp = env.getCharsetConverter().next(currPtr, chunk.textEndPtr);
  62 + codepoints.push_back(cp);
  63 + }
  64 + this->decodeForm(codepoints, ei.value, orth);
  65 +
  66 + MorphInterpretation res;
  67 + res.startNode = startNode;
  68 + res.endNode = endNode;
  69 + res.orth = orth;
  70 + res.lemma = ei.homonymId.empty() ? lemma : (lemma + HOMONYM_SEPARATOR + ei.homonymId);
  71 + res.tagId = ei.tag;
  72 + res.nameId = ei.nameClassifier;
  73 + res.labelsId = ei.qualifiers;
75 74  
76   -void InterpretedChunksDecoder4Generator::decodeForm(
77   - const vector<uint32_t>& lemma,
78   - const EncodedForm& orth,
79   - string& res) const {
80   - res += orth.prefixToAdd;
81   - for (unsigned int i = 0; i < lemma.size() - orth.suffixToCut; i++) {
82   - env.getCharsetConverter().append(lemma[i], res);
  75 + // MorphInterpretation res(
  76 + // startNode, endNode,
  77 + // orth, ei.homonymId.empty() ? lemma : (lemma + HOMONYM_SEPARATOR + ei.homonymId),
  78 + // // ei.homonymId,
  79 + // ei.tag,
  80 + // ei.nameClassifier,
  81 + // &env.getQualifiersHelper().getQualifiers(ei.qualifiers),
  82 + // &env.getTagset());
  83 + return res;
83 84 }
84   - const char* suffixPtr = orth.suffixToAdd.c_str();
85   - const char* suffixEnd = suffixPtr + orth.suffixToAdd.length();
86   - while (suffixPtr != suffixEnd) {
87   - uint32_t cp = UTF8CharsetConverter::getInstance().next(suffixPtr, suffixEnd);
88   - env.getCharsetConverter().append(cp, res);
  85 +
  86 + void InterpretedChunksDecoder4Generator::decodeForm(
  87 + const vector<uint32_t>& lemma,
  88 + const EncodedForm& orth,
  89 + string& res) const {
  90 + res += orth.prefixToAdd;
  91 + for (unsigned int i = 0; i < lemma.size() - orth.suffixToCut; i++) {
  92 + env.getCharsetConverter().append(lemma[i], res);
  93 + }
  94 + const char* suffixPtr = orth.suffixToAdd.c_str();
  95 + const char* suffixEnd = suffixPtr + orth.suffixToAdd.length();
  96 + while (suffixPtr != suffixEnd) {
  97 + uint32_t cp = UTF8CharsetConverter::getInstance().next(suffixPtr, suffixEnd);
  98 + env.getCharsetConverter().append(cp, res);
  99 + }
89 100 }
90   -}
91 101  
92   -EncodedInterpretation InterpretedChunksDecoder4Generator::deserializeInterp(const unsigned char*& ptr) const {
93   - EncodedInterpretation interp;
94   - interp.homonymId = readString(ptr);
95   - interp.value.prefixToAdd = readString(ptr);
96   - interp.value.suffixToCut = readInt8(ptr);
97   - interp.value.suffixToAdd = readString(ptr);
98   - interp.tag = readInt16(ptr);
99   - interp.nameClassifier = readInt8(ptr);
100   - interp.qualifiers = readInt16(ptr);
101   - return interp;
102   -}
  102 + EncodedInterpretation InterpretedChunksDecoder4Generator::deserializeInterp(const unsigned char*& ptr) const {
  103 + EncodedInterpretation interp;
  104 + interp.homonymId = readString(ptr);
  105 + interp.value.prefixToAdd = readString(ptr);
  106 + interp.value.suffixToCut = readInt8(ptr);
  107 + interp.value.suffixToAdd = readString(ptr);
  108 + interp.tag = readInt16(ptr);
  109 + interp.nameClassifier = readInt8(ptr);
  110 + interp.qualifiers = readInt16(ptr);
  111 + return interp;
  112 + }
103 113  
104 114 }
... ...
morfeusz/fsa/const.cpp
... ... @@ -4,7 +4,7 @@
4 4 namespace morfeusz {
5 5  
6 6 extern const uint32_t MAGIC_NUMBER = 0x8fc2bc1b;
7   -extern const uint8_t VERSION_NUM = 18;
  7 +extern const uint8_t VERSION_NUM = 19;
8 8  
9 9 extern const unsigned int VERSION_NUM_OFFSET = 4;
10 10 extern const unsigned int IMPLEMENTATION_NUM_OFFSET = 5;
... ...
morfeusz/morfeusz2.h
... ... @@ -11,6 +11,7 @@
11 11 #include <vector>
12 12 #include <string>
13 13 #include <list>
  14 +#include <set>
14 15  
15 16 #ifndef __WIN32
16 17 #define DLLIMPORT
... ... @@ -28,7 +29,7 @@ namespace morfeusz {
28 29 class DLLIMPORT MorphInterpretation;
29 30 class DLLIMPORT Morfeusz;
30 31 class DLLIMPORT ResultsIterator;
31   - template <class T> class DLLIMPORT Tagset;
  32 + class DLLIMPORT IdResolver;
32 33 class DLLIMPORT MorfeuszException;
33 34  
34 35 enum Charset {
... ... @@ -110,20 +111,6 @@ namespace morfeusz {
110 111 */
111 112 static Morfeusz* createInstance();
112 113  
113   - /**
114   - * Set a file used for morphological analysis.
115   - *
116   - * @param filename
117   - */
118   - virtual void setAnalyzerDictionary(const std::string& filename) = 0;
119   -
120   - /**
121   - * Set a file used for morphological synthesis.
122   - *
123   - * @param filename
124   - */
125   - virtual void setGeneratorDictionary(const std::string& filename) = 0;
126   -
127 114 virtual ~Morfeusz();
128 115  
129 116 /**
... ... @@ -134,7 +121,7 @@ namespace morfeusz {
134 121 * @param text - text for morphological analysis.
135 122 * @return - iterator over morphological analysis results
136 123 */
137   - virtual ResultsIterator* analyze(const std::string& text) const = 0;
  124 + virtual ResultsIterator* analyse(const std::string& text) const = 0;
138 125  
139 126 /**
140 127 * Analyze given text and return the results as iterator.
... ... @@ -144,7 +131,7 @@ namespace morfeusz {
144 131 * @param text - text for morphological analysis. This pointer must not be deleted before returned ResultsIterator object.
145 132 * @return - iterator over morphological analysis results
146 133 */
147   - virtual ResultsIterator* analyze(const char* text) const = 0;
  134 + virtual ResultsIterator* analyse(const char* text) const = 0;
148 135  
149 136 /**
150 137 * Perform morphological analysis on a given text and put results in a vector.
... ... @@ -152,7 +139,7 @@ namespace morfeusz {
152 139 * @param text - text to be analyzed
153 140 * @param result - results vector
154 141 */
155   - virtual void analyze(const std::string& text, std::vector<MorphInterpretation>& result) const = 0;
  142 + virtual void analyse(const std::string& text, std::vector<MorphInterpretation>& result) const = 0;
156 143  
157 144 /**
158 145 * Perform morphological synthesis on a given lemma and put results in a vector.
... ... @@ -170,7 +157,7 @@ namespace morfeusz {
170 157 * @param tag - tag of result interpretations
171 158 * @param result - results vector
172 159 */
173   - virtual void generate(const std::string& lemma, int tagnum, std::vector<MorphInterpretation>& result) const = 0;
  160 + virtual void generate(const std::string& lemma, int tagId, std::vector<MorphInterpretation>& result) const = 0;
174 161  
175 162 /**
176 163 * Set encoding for input and output string objects.
... ... @@ -220,25 +207,40 @@ namespace morfeusz {
220 207 * @param debug
221 208 */
222 209 virtual void setDebug(bool debug) = 0;
223   -
  210 +
224 211 /**
225   - * Gets default tagset used for morphological analysis.
226   - * @return
  212 + * Get reference to tagset currently being in use.
  213 + *
  214 + * @return currently used tagset
227 215 */
228   - virtual const Tagset<std::string>& getDefaultAnalyzerTagset() const = 0;
229   -
  216 + virtual const IdResolver& getIdResolver() const = 0;
  217 +
230 218 /**
231   - * Gets default tagset used for morphological synthesis.
232   - * @return
  219 + * Set current dictionary to the one with provided name.
  220 + *
  221 + * This is NOT thread safe (no other thread may invoke setDictionary
  222 + * either within this instance, or any other in the same application.
  223 + *
  224 + * @param dictName dictionary name
233 225 */
234   - virtual const Tagset<std::string>& getDefaultGeneratorTagset() const = 0;
  226 +// virtual void setDictionary(const std::string& dictName) = 0;
  227 +
  228 + /**
  229 + * List of directories where current Morfeusz instance will look for dictionaries.
  230 + */
  231 + std::list<std::string> dictionarySearchPaths;
235 232  
  233 +
  234 + virtual void setAnalyzerDictionary(const std::string& filename) = 0;
  235 +
  236 + virtual void setGeneratorDictionary(const std::string& filename) = 0;
  237 +
236 238 protected:
237 239 /**
238 240 * Same as analyze(text) but copies the text under the hood.
239 241 * Useful for wrappers to other languages.
240 242 */
241   - virtual ResultsIterator* analyzeWithCopy(const char* text) const = 0;
  243 + virtual ResultsIterator* analyseWithCopy(const char* text) const = 0;
242 244 };
243 245  
244 246 class DLLIMPORT ResultsIterator {
... ... @@ -253,8 +255,7 @@ namespace morfeusz {
253 255 /**
254 256 * Represents a tagset
255 257 */
256   - template <class T>
257   - class DLLIMPORT Tagset {
  258 + class DLLIMPORT IdResolver {
258 259 public:
259 260  
260 261 /**
... ... @@ -263,7 +264,15 @@ namespace morfeusz {
263 264 * @param tagNum - tag index in the tagset.
264 265 * @return - the tag
265 266 */
266   - virtual const T& getTag(const int tagNum) const = 0;
  267 + virtual const std::string& getTag(const int tagId) const = 0;
  268 +
  269 + /**
  270 + * Returns identifier for given tag.
  271 + * Throws MorfeuszException when none exists.
  272 + *
  273 + * @return identifier for given tag
  274 + */
  275 + virtual int getTagId(const std::string& tag) const = 0;
267 276  
268 277 /**
269 278 * Returns named entity type (denoted by its index).
... ... @@ -271,23 +280,39 @@ namespace morfeusz {
271 280 * @param nameNum - name index in the tagset.
272 281 * @return - the named entity type
273 282 */
274   - virtual const T& getName(const int nameNum) const = 0;
  283 + virtual const std::string& getName(const int nameId) const = 0;
  284 +
  285 + /**
  286 + * Returns identifier for given named entity.
  287 + * Throws MorfeuszException when none exists.
  288 + *
  289 + * @return identifier for given named entity
  290 + */
  291 + virtual int getNameId(const std::string& name) const = 0;
  292 +
  293 + virtual const std::string& getLabelsAsString(int labelsId) const = 0;
  294 +
  295 + virtual const std::set<std::string>& getLabels(int labelsId) const = 0;
  296 +
  297 + virtual int getLabelsId(const std::string& labelsStr) const = 0;
275 298  
276 299 /**
277 300 * Returs number of tags this tagset contains.
278 301 *
279 302 * @return
280 303 */
281   - virtual size_t getTagsSize() const = 0;
  304 + virtual size_t getTagsCount() const = 0;
282 305  
283 306 /**
284 307 * Returs number of named entity types this tagset contains.
285 308 *
286 309 * @return
287 310 */
288   - virtual size_t getNamesSize() const = 0;
  311 + virtual size_t getNamesCount() const = 0;
  312 +
  313 + virtual size_t getLabelsCount() const = 0;
289 314  
290   - virtual ~Tagset() {
  315 + virtual ~IdResolver() {
291 316 }
292 317 };
293 318  
... ... @@ -311,109 +336,41 @@ namespace morfeusz {
311 336 The structure below describes one edge of this DAG:
312 337  
313 338 */
314   - class DLLIMPORT MorphInterpretation {
315   - public:
316   -
317   - /**
318   - *
319   - * @param startNode - number of start node in DAG.
320   - * @param endNode - number of end node in DAG.
321   - * @param orth - orthographic form
322   - * @param lemma - base form
323   - * @param tagnum - tag identifier (0 for "unrecognized", 1 for "whitespace")
324   - * @param namenum - named entity identifier (0 for "not a named entity")
325   - * @param qualifiers - pointer to vector of qualifiers (not owned by this)
326   - * @param tagset - pointer to default tagset used by Morfeusz (not owned by this)
327   - */
328   - MorphInterpretation(
329   - int startNode,
330   - int endNode,
331   - const std::string& orth,
332   - const std::string& lemma,
333   - int tagnum,
334   - int namenum,
335   - const std::vector<std::string>* qualifiers,
336   - const Tagset<std::string>* tagset);
337   -
338   - MorphInterpretation();
  339 + struct DLLIMPORT MorphInterpretation {
339 340  
340 341 /**
341 342 * Creates new instance with "ign" tag (meaning: "not found in the dictionary")
342 343 */
343 344 static MorphInterpretation createIgn(
344 345 int startNode, int endNode,
345   - const std::string& orth, const std::string& lemma,
346   - const Tagset<std::string>& tagset);
  346 + const std::string& orth, const std::string& lemma);
347 347  
348 348 /**
349 349 * Creates new instance with "sp" tag (meaning: "this is a sequence of whitespaces")
350 350 */
351   - static MorphInterpretation createWhitespace(int startNode, int endNode, const std::string& orth, const Tagset<std::string>& tagset);
352   -
353   - inline int getStartNode() const {
354   - return startNode;
355   - }
356   -
357   - inline int getEndNode() const {
358   - return endNode;
359   - }
360   -
361   - inline const std::string& getOrth() const {
362   - return orth;
363   - }
364   -
365   - inline const std::string& getLemma() const {
366   - return lemma;
367   - }
368   -
369   - inline int getTagnum() const {
370   - return tagnum;
371   - }
372   -
373   - inline int getNamenum() const {
374   - return namenum;
375   - }
  351 + static MorphInterpretation createWhitespace(int startNode, int endNode, const std::string& orth);
376 352  
377 353 inline bool isIgn() const {
378   - return tagnum == 0;
  354 + return tagId == 0;
379 355 }
380 356  
381 357 inline bool isWhitespace() const {
382   - return tagnum == 1;
383   - }
384   -
385   - inline const std::string& getTag() const {
386   - return tagset->getTag(tagnum);
387   - }
388   -
389   - inline const std::string& getName() const {
390   - return tagset->getName(namenum);
  358 + return tagId == 1;
391 359 }
392   -
393   - inline const std::vector<std::string>& getQualifiers() const {
394   - return *qualifiers;
395   - }
396   -
  360 +
  361 + // FIXME - do wyrzucenia gdzie indziej
397 362 bool hasHomonym(const std::string& homonymId) const;
398 363  
  364 + // FIXME - do wyrzucenia gdzie indziej
399 365 std::string toString(bool includeNodeNumbers) const;
400   - private:
  366 +
401 367 int startNode;
402 368 int endNode;
403 369 std::string orth;
404 370 std::string lemma;
405   - int tagnum;
406   - int namenum;
407   -
408   - /**
409   - * not owned by this
410   - */
411   - const std::vector<std::string>* qualifiers;
412   -
413   - /**
414   - * not owned by this
415   - */
416   - const Tagset<std::string>* tagset;
  371 + int tagId;
  372 + int nameId;
  373 + int labelsId;
417 374 };
418 375  
419 376 class DLLIMPORT MorfeuszException : public std::exception {
... ...
morfeusz/morfeusz2_c.cpp
... ... @@ -13,7 +13,7 @@ using namespace morfeusz;
13 13  
14 14 static Morfeusz* morfeuszInstance = Morfeusz::createInstance();
15 15 static vector<MorphInterpretation> results;
16   -static ResultsManager resultsManager;
  16 +static ResultsManager resultsManager(morfeuszInstance);
17 17  
18 18 extern "C" DLLIMPORT
19 19 char* morfeusz_about() {
... ... @@ -23,7 +23,7 @@ char* morfeusz_about() {
23 23 extern "C" DLLIMPORT
24 24 InterpMorf* morfeusz_analyse(char *tekst) {
25 25 results.clear();
26   - morfeuszInstance->analyze(string(tekst), results);
  26 + morfeuszInstance->analyse(string(tekst), results);
27 27 return resultsManager.convertResults(results);
28 28 }
29 29  
... ...
morfeusz/morfeusz_analyzer.cpp
... ... @@ -26,8 +26,8 @@ int main(int argc, const char** argv) {
26 26 while (getline(cin, line)) {
27 27 // printf("%s\n", line.c_str());
28 28 res.clear();
29   - morfeusz->analyze(line, res);
30   - printMorphResults(res, true);
  29 + morfeusz->analyse(line, res);
  30 + printMorphResults(*morfeusz, res, true);
31 31 }
32 32 delete morfeusz;
33 33 printf("\n");
... ...
morfeusz/morfeusz_generator.cpp
... ... @@ -25,7 +25,7 @@ int main(int argc, const char** argv) {
25 25 // printf("%s\n", line.c_str());
26 26 res.clear();
27 27 morfeusz->generate(line, res);
28   - printMorphResults(res, false);
  28 + printMorphResults(*morfeusz, res, false);
29 29 }
30 30 printf("\n");
31 31 delete &opt;
... ...
morfeusz/test/consoleUtils.hpp deleted
1   -/*
2   - * File: consoleUtils.hpp
3   - * Author: lennyn
4   - *
5   - * Created on April 4, 2014, 7:36 PM
6   - */
7   -
8   -#ifndef CONSOLEUTILS_HPP
9   -#define CONSOLEUTILS_HPP
10   -
11   -#include <vector>
12   -#include <string>
13   -#include "morfeusz2.h"
14   -
15   -namespace morfeusz {
16   -
17   -template <class OutputStream>
18   -void appendMorfeuszResults(const std::vector<MorphInterpretation>& res, OutputStream& out) {
19   - int prevStart = -1;
20   - int prevEnd = -1;
21   - out << "[";
22   - for (unsigned int i = 0; i < res.size(); i++) {
23   - const MorphInterpretation& mi = res[i];
24   - if (prevStart != -1
25   - && (prevStart != mi.getStartNode() || prevEnd != mi.getEndNode())) {
26   - out << "]\n[";
27   - }
28   - else if (prevStart != -1) {
29   - out << "; ";
30   - }
31   - out << mi.getStartNode() << ","
32   - << mi.getEndNode() << ","
33   - << mi.getOrth() << ","
34   - << mi.getLemma() << ","
35   - << mi.getTag() << ","
36   - << mi.getName();
37   - prevStart = mi.getStartNode();
38   - prevEnd = mi.getEndNode();
39   - }
40   - out << "]\n";
41   -}
42   -
43   -}
44   -
45   -#endif /* CONSOLEUTILS_HPP */
46   -
morfeusz/test/test_recognize_dict.cpp deleted
1   -/*
2   - * File: test_morph.cpp
3   - * Author: mlenart
4   - *
5   - * Created on November 8, 2013, 4:12 PM
6   - */
7   -
8   -//#include <cstdlib>
9   -#include <sstream>
10   -#include <iostream>
11   -#include "utils.hpp"
12   -#include "MorfeuszInternal.hpp"
13   -#include "morfeusz2.h"
14   -
15   -using namespace std;
16   -using namespace morfeusz;
17   -
18   -int main(int argc, char** argv) {
19   - validate(argc == 3, "Must provide exactly 2 arguments - input FSA filename and dictionary filename.");
20   - string fsaFilename = argv[1];
21   - string dictFilename = argv[2];
22   - MorfeuszInternal morfeusz;
23   - morfeusz.setAnalyzerDictionary(fsaFilename);
24   - ifstream in;
25   - in.open(dictFilename.c_str());
26   - string line;
27   - while (getline(in, line)) {
28   - cerr << "TEST " << line << endl;
29   - vector<string> splitVector(split(line, '\t'));
30   - string orth = splitVector[0];
31   - string lemma = splitVector[1];
32   - string tag = splitVector[2];
33   - string name = splitVector[3];
34   -
35   - vector<MorphInterpretation> res;
36   - cerr << "ANALYZE '" << orth << "'" << endl;
37   - morfeusz.analyze(orth, res);
38   - bool found = false;
39   -
40   - for (unsigned int i = 0; i < res.size(); i++) {
41   - MorphInterpretation& mi = res[i];
42   - DEBUG("FOUND: " + mi.getLemma() + ":" + mi.getTag());
43   - if (lemma == mi.getLemma() && tag == mi.getTag() && name == mi.getName()) {
44   - DEBUG("RECOGNIZED " + orth + " " + lemma + ":" + tag + ":" + name);
45   - found = true;
46   - }
47   - }
48   - validate(found, "Failed to recognize " + orth + " " + lemma + ":" + tag + ":" + name);
49   - }
50   - return 0;
51   -}
morfeusz/test/test_result_equals.cpp deleted
1   -/*
2   - * File: test_result_equals.cpp
3   - * Author: lennyn
4   - *
5   - * Created on December 6, 2013, 12:45 PM
6   - */
7   -
8   -#include <cstdlib>
9   -#include <cassert>
10   -#include <string>
11   -#include <sstream>
12   -#include <fstream>
13   -#include <iostream>
14   -#include "MorfeuszInternal.hpp"
15   -#include "consoleUtils.hpp"
16   -
17   -using namespace std;
18   -using namespace morfeusz;
19   -
20   -static Charset getEncoding(const string& encodingStr) {
21   - if (encodingStr == "UTF8")
22   - return UTF8;
23   - else if (encodingStr == "ISO8859_2")
24   - return ISO8859_2;
25   - else if (encodingStr == "CP1250")
26   - return CP1250;
27   - else if (encodingStr == "CP852")
28   - return CP852;
29   - else {
30   - cerr << "Invalid encoding: " << encodingStr << " must be one of: UTF8, ISO8859_2, WINDOWS1250" << endl;
31   - throw "Invalid encoding";
32   - }
33   -}
34   -
35   -int main(int argc, char** argv) {
36   - validate(argc == 3 || argc == 4, "Must provide exactly 2 or 3 arguments - input filename, required output filename, (optional) encoding.");
37   - string inputFilename = argv[1];
38   - ifstream in;
39   -// in.exceptions(std::ifstream::failbit | std::ifstream::badbit);
40   - cerr << "OPEN " << inputFilename << endl;
41   - in.open(inputFilename.c_str());
42   - string requiredOutputFilename = argv[2];
43   - ifstream requiredIn;
44   -// requiredIn.exceptions(std::ifstream::failbit | std::ifstream::badbit);
45   - cerr << "OPEN " << requiredOutputFilename << endl;
46   - requiredIn.open(requiredOutputFilename.c_str());
47   - // string requiredOutput = readFile<char>(requiredOutputFilename);
48   - cerr << "TEST START" << endl;
49   - MorfeuszInternal morfeusz;
50   - if (argc == 4) {
51   - Charset encoding = getEncoding(argv[3]);
52   - morfeusz.setCharset(encoding);
53   - }
54   - string line;
55   - while (getline(in, line)) {
56   - cerr << "TEST " << line << endl;
57   - vector<MorphInterpretation> res;
58   - morfeusz.analyze(line, res);
59   - stringstream out;
60   - appendMorfeuszResults(res, out);
61   - string gotOutputLine;
62   - string requiredOutputLine;
63   - while (getline(out, gotOutputLine)) {
64   - getline(requiredIn, requiredOutputLine);
65   - cerr << "REQUIRED LINE " << requiredOutputLine << endl;
66   - cerr << "GOT LINE " << gotOutputLine << endl;
67   - cerr << (requiredOutputLine == gotOutputLine) << endl;
68   - validate(gotOutputLine == requiredOutputLine, "lines do not match");
69   - }
70   - }
71   - return 0;
72   -}
73   -
morfeusz/tests/TestMorfeusz.cpp
... ... @@ -32,10 +32,10 @@ void TestMorfeusz::tearDown() {
32 32  
33 33 void TestMorfeusz::testAnalyzeIterate1() {
34 34 cerr << "testAnalyzeIterate1" << endl;
35   - ResultsIterator* it = morfeusz->analyze("AAAAbbbbCCCC");
  35 + ResultsIterator* it = morfeusz->analyse("AAAAbbbbCCCC");
36 36 CPPUNIT_ASSERT(it->hasNext());
37   - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->peek().getOrth());
38   - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->next().getOrth());
  37 + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->peek().orth);
  38 + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->next().orth);
39 39 CPPUNIT_ASSERT(!it->hasNext());
40 40 CPPUNIT_ASSERT_THROW(it->peek(), MorfeuszException);
41 41 CPPUNIT_ASSERT_THROW(it->next(), MorfeuszException);
... ... @@ -45,25 +45,25 @@ void TestMorfeusz::testAnalyzeIterate1() {
45 45 void TestMorfeusz::testAnalyzeIterateWithWhitespaceHandlingKEEP() {
46 46 cerr << "testAnalyzeIterateWithWhitespaceHandlingKEEP" << endl;
47 47 morfeusz->setWhitespaceHandling(KEEP_WHITESPACES);
48   - ResultsIterator* it = morfeusz->analyze(" AAAAbbbbCCCC DDDDeeee.\t");
  48 + ResultsIterator* it = morfeusz->analyse(" AAAAbbbbCCCC DDDDeeee.\t");
49 49  
50 50 CPPUNIT_ASSERT(it->hasNext());
51   - CPPUNIT_ASSERT_EQUAL(string(" "), it->next().getOrth());
  51 + CPPUNIT_ASSERT_EQUAL(string(" "), it->next().orth);
52 52  
53 53 CPPUNIT_ASSERT(it->hasNext());
54   - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->next().getOrth());
  54 + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->next().orth);
55 55  
56 56 CPPUNIT_ASSERT(it->hasNext());
57   - CPPUNIT_ASSERT_EQUAL(string(" "), it->next().getOrth());
  57 + CPPUNIT_ASSERT_EQUAL(string(" "), it->next().orth);
58 58  
59 59 CPPUNIT_ASSERT(it->hasNext());
60   - CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), it->next().getOrth());
  60 + CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), it->next().orth);
61 61  
62 62 CPPUNIT_ASSERT(it->hasNext());
63   - CPPUNIT_ASSERT_EQUAL(string("."), it->next().getOrth());
  63 + CPPUNIT_ASSERT_EQUAL(string("."), it->next().orth);
64 64  
65 65 CPPUNIT_ASSERT(it->hasNext());
66   - CPPUNIT_ASSERT_EQUAL(string("\t"), it->next().getOrth());
  66 + CPPUNIT_ASSERT_EQUAL(string("\t"), it->next().orth);
67 67  
68 68 CPPUNIT_ASSERT(!it->hasNext());
69 69 CPPUNIT_ASSERT_THROW(it->peek(), MorfeuszException);
... ... @@ -74,16 +74,16 @@ void TestMorfeusz::testAnalyzeIterateWithWhitespaceHandlingKEEP() {
74 74 void TestMorfeusz::testAnalyzeIterateWithWhitespaceHandlingAPPEND() {
75 75 cerr << "testAnalyzeIterateWithWhitespaceHandlingAPPEND" << endl;
76 76 morfeusz->setWhitespaceHandling(APPEND_WHITESPACES);
77   - ResultsIterator* it = morfeusz->analyze(" AAAAbbbbCCCC DDDDeeee.\t");
  77 + ResultsIterator* it = morfeusz->analyse(" AAAAbbbbCCCC DDDDeeee.\t");
78 78  
79 79 CPPUNIT_ASSERT(it->hasNext());
80   - CPPUNIT_ASSERT_EQUAL(string(" AAAAbbbbCCCC "), it->next().getOrth());
  80 + CPPUNIT_ASSERT_EQUAL(string(" AAAAbbbbCCCC "), it->next().orth);
81 81  
82 82 CPPUNIT_ASSERT(it->hasNext());
83   - CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), it->next().getOrth());
  83 + CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), it->next().orth);
84 84  
85 85 CPPUNIT_ASSERT(it->hasNext());
86   - CPPUNIT_ASSERT_EQUAL(string(".\t"), it->next().getOrth());
  86 + CPPUNIT_ASSERT_EQUAL(string(".\t"), it->next().orth);
87 87  
88 88 CPPUNIT_ASSERT(!it->hasNext());
89 89 CPPUNIT_ASSERT_THROW(it->peek(), MorfeuszException);
... ... @@ -94,10 +94,10 @@ void TestMorfeusz::testAnalyzeIterateWithWhitespaceHandlingAPPEND() {
94 94 void TestMorfeusz::testAnalyzeVector1() {
95 95 cerr << "testAnalyzeVector1" << endl;
96 96 vector<MorphInterpretation> res;
97   - morfeusz->analyze("AAAAbbbbCCCC", res);
  97 + morfeusz->analyse("AAAAbbbbCCCC", res);
98 98 CPPUNIT_ASSERT_EQUAL((size_t) 1, res.size());
99   - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].getOrth());
100   - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].getLemma());
  99 + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].orth);
  100 + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].lemma);
101 101 }
102 102  
103 103 static inline string prepareErrorneusTmpFile() {
... ... @@ -135,35 +135,35 @@ void TestMorfeusz::testWhitespaceHandlingKEEP() {
135 135 cerr << "testWhitespaceHandlingKEEP" << endl;
136 136 vector<MorphInterpretation> res;
137 137 morfeusz->setWhitespaceHandling(KEEP_WHITESPACES);
138   - morfeusz->analyze(" AAAAbbbbCCCC DDDDeeee\t", res);
  138 + morfeusz->analyse(" AAAAbbbbCCCC DDDDeeee\t", res);
139 139 CPPUNIT_ASSERT_EQUAL((size_t) 5, res.size());
140   - CPPUNIT_ASSERT_EQUAL(string(" "), res[0].getOrth());
141   - CPPUNIT_ASSERT_EQUAL(string(" "), res[0].getLemma());
142   - CPPUNIT_ASSERT_EQUAL(1, res[0].getTagnum());
143   - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[1].getOrth());
144   - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[1].getLemma());
145   - CPPUNIT_ASSERT_EQUAL(0, res[1].getTagnum());
146   - CPPUNIT_ASSERT_EQUAL(string(" "), res[2].getOrth());
147   - CPPUNIT_ASSERT_EQUAL(string(" "), res[2].getLemma());
148   - CPPUNIT_ASSERT_EQUAL(1, res[2].getTagnum());
149   - CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[3].getOrth());
150   - CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[3].getLemma());
151   - CPPUNIT_ASSERT_EQUAL(0, res[3].getTagnum());
152   - CPPUNIT_ASSERT_EQUAL(string("\t"), res[4].getOrth());
153   - CPPUNIT_ASSERT_EQUAL(string("\t"), res[4].getLemma());
154   - CPPUNIT_ASSERT_EQUAL(1, res[4].getTagnum());
  140 + CPPUNIT_ASSERT_EQUAL(string(" "), res[0].orth);
  141 + CPPUNIT_ASSERT_EQUAL(string(" "), res[0].lemma);
  142 + CPPUNIT_ASSERT_EQUAL(1, res[0].tagId);
  143 + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[1].orth);
  144 + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[1].lemma);
  145 + CPPUNIT_ASSERT_EQUAL(0, res[1].tagId);
  146 + CPPUNIT_ASSERT_EQUAL(string(" "), res[2].orth);
  147 + CPPUNIT_ASSERT_EQUAL(string(" "), res[2].lemma);
  148 + CPPUNIT_ASSERT_EQUAL(1, res[2].tagId);
  149 + CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[3].orth);
  150 + CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[3].lemma);
  151 + CPPUNIT_ASSERT_EQUAL(0, res[3].tagId);
  152 + CPPUNIT_ASSERT_EQUAL(string("\t"), res[4].orth);
  153 + CPPUNIT_ASSERT_EQUAL(string("\t"), res[4].lemma);
  154 + CPPUNIT_ASSERT_EQUAL(1, res[4].tagId);
155 155 }
156 156  
157 157 void TestMorfeusz::testWhitespaceHandlingAPPEND() {
158 158 cerr << "testWhitespaceHandlingAPPEND" << endl;
159 159 vector<MorphInterpretation> res;
160 160 morfeusz->setWhitespaceHandling(APPEND_WHITESPACES);
161   - morfeusz->analyze(" AAAAbbbbCCCC DDDDeeee\t", res);
  161 + morfeusz->analyse(" AAAAbbbbCCCC DDDDeeee\t", res);
162 162 CPPUNIT_ASSERT_EQUAL((size_t) 2, res.size());
163   - CPPUNIT_ASSERT_EQUAL(string(" AAAAbbbbCCCC "), res[0].getOrth());
164   - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].getLemma());
165   - CPPUNIT_ASSERT_EQUAL(0, res[0].getTagnum());
166   - CPPUNIT_ASSERT_EQUAL(string("DDDDeeee\t"), res[1].getOrth());
167   - CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[1].getLemma());
168   - CPPUNIT_ASSERT_EQUAL(0, res[1].getTagnum());
  163 + CPPUNIT_ASSERT_EQUAL(string(" AAAAbbbbCCCC "), res[0].orth);
  164 + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].lemma);
  165 + CPPUNIT_ASSERT_EQUAL(0, res[0].tagId);
  166 + CPPUNIT_ASSERT_EQUAL(string("DDDDeeee\t"), res[1].orth);
  167 + CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[1].lemma);
  168 + CPPUNIT_ASSERT_EQUAL(0, res[1].tagId);
169 169 }
... ...
morfeusz/wrappers/java/pl/waw/ipipan/morfeusz/app/App.java
... ... @@ -14,9 +14,9 @@ public class App {
14 14 System.getProperty("user.dir"));
15 15 System.err.println("java.library.path="+System.getProperty("java.library.path"));
16 16 Morfeusz morfeusz = Morfeusz.createInstance();
17   - ResultsIterator it = morfeusz.analyzeAsIterator("Ala ma kota i żółć.");
  17 + ResultsIterator it = morfeusz.analyseAsIterator("Ala ma kota i żółć.");
18 18 while (it.hasNext()) {
19   - System.out.println(MorfeuszUtils.getInterpretationString(it.next()));
  19 + System.out.println(MorfeuszUtils.getInterpretationString(it.next(), morfeusz));
20 20 }
21 21 }
22 22 }
... ...
morfeusz/wrappers/java/pl/waw/ipipan/morfeusz/app/MorfeuszUtils.java
... ... @@ -3,6 +3,7 @@ package pl.waw.ipipan.morfeusz.app;
3 3 import java.io.ByteArrayOutputStream;
4 4 import java.io.PrintStream;
5 5 import pl.waw.ipipan.morfeusz.MorphInterpretation;
  6 +import pl.waw.ipipan.morfeusz.Morfeusz;
6 7  
7 8 /**
8 9 *
... ... @@ -10,7 +11,7 @@ import pl.waw.ipipan.morfeusz.MorphInterpretation;
10 11 */
11 12 public class MorfeuszUtils {
12 13  
13   - public static String getInterpretationString(MorphInterpretation interp) {
  14 + public static String getInterpretationString(MorphInterpretation interp, Morfeusz morfeusz) {
14 15 ByteArrayOutputStream baos = new ByteArrayOutputStream();
15 16 PrintStream ps = new PrintStream(baos);
16 17 ps.printf("%d %d %s %s %s %s",
... ... @@ -18,8 +19,9 @@ public class MorfeuszUtils {
18 19 interp.getEndNode(),
19 20 interp.getOrth(),
20 21 interp.getLemma(),
21   - interp.getTag(),
22   - interp.getName());
  22 + morfeusz.getIdResolver().getTag(interp.getTagId()),
  23 + morfeusz.getIdResolver().getName(interp.getNameId()),
  24 + morfeusz.getIdResolver().getLabelsAsString(interp.getLabelsId()));
23 25 ps.flush();
24 26 return baos.toString();
25 27 }
... ...
morfeusz/wrappers/morfeusz.i
... ... @@ -13,71 +13,21 @@
13 13 #include "morfeusz2.h"
14 14 #include "MorfeuszInternal.hpp"
15 15 #include <vector>
  16 +#include <list>
16 17 %}
17 18  
18 19 #ifdef SWIGJAVA
19 20 %include "morfeusz_java.i"
20 21 #endif
21 22  
22   -%include "std_vector.i"
23   -%include "std_string.i"
24   -%include "std_except.i"
25   -%include "exception.i"
26   -%include "typemaps.i"
27   -
28   -%exception {
29   - try{
30   - $action
31   - }
32   - catch(const morfeusz::FileFormatException& e) {
33   - SWIG_exception(SWIG_IOError, const_cast<char*>(e.what()));
34   - }
35   - catch(const std::exception& e) {
36   - SWIG_exception(SWIG_RuntimeError, const_cast<char*>(e.what()));
37   - }
38   - catch(...) {
39   - SWIG_exception(SWIG_RuntimeError, "Unknown exception");
40   - }
41   -}
42   -
43   -namespace morfeusz {
44   -
45   - %ignore MorphInterpretation::MorphInterpretation(
46   - int startNode,
47   - int endNode,
48   - const std::string& orth,
49   - const std::string& lemma,
50   - int tagnum,
51   - int namenum,
52   - const std::vector<std::string>* qualifiers,
53   - const Tagset<std::string>* tagset);
54   -
55   - %ignore MorphInterpretation::createIgn(int startNode, int endNode, const std::string& orth, const Tagset<std::string>& tagset);
56   - %ignore MorphInterpretation::createWhitespace(int startNode, int endNode, const std::string& orth, const Tagset<std::string>& tagset);
57   - %ignore Morfeusz::analyze(const char*) const;
58   - %ignore Morfeusz::analyze(const std::string&) const;
59   - %ignore Morfeusz::setCharset(Charset);
60   -// %ignore Morfeusz::analyze(const std::string&, std::vector<MorphInterpretation>&) const;
61   -// %ignore Morfeusz::generate(const std::string&, std::vector<MorphInterpretation>&) const;
62   -// %ignore Morfeusz::generate(const std::string&, int, std::vector<MorphInterpretation>&) const;
63   - %ignore Morfeusz::setDebug(bool);
64   -
65   - %newobject Morfeusz::createInstance();
66   - %newobject Morfeusz::analyzeAsIterator(const char*) const;
67   -}
68   -
69   -%extend morfeusz::Morfeusz {
70   - morfeusz::ResultsIterator* morfeusz::Morfeusz::analyzeAsIterator(const char* text) const {
71   - return dynamic_cast<const morfeusz::MorfeuszInternal*>($self)->analyzeWithCopy(text);
72   - }
73   -}
74   -
75   -%template(InterpsList) std::vector<morfeusz::MorphInterpretation>;
76   -%template(StringsList) std::vector<std::string>;
77   -
78   -%include "../morfeusz2.h"
79   -
80   -
81 23 #ifdef SWIGPYTHON
82 24 %include "morfeusz_python.i"
83 25 #endif
  26 +
  27 +#ifdef SWIGPERL
  28 +%include "morfeusz_perl.i"
  29 +#endif
  30 +
  31 +%include "morfeusz_common.i"
  32 +
  33 +%include "../morfeusz2.h"
... ...
morfeusz/wrappers/morfeusz_common.i 0 → 100644
  1 +
  2 +%exception {
  3 + try{
  4 + $action
  5 + }
  6 + catch(const morfeusz::FileFormatException& e) {
  7 + SWIG_exception(SWIG_IOError, const_cast<char*>(e.what()));
  8 + }
  9 + catch(const std::exception& e) {
  10 + SWIG_exception(SWIG_RuntimeError, const_cast<char*>(e.what()));
  11 + }
  12 + catch(...) {
  13 + SWIG_exception(SWIG_RuntimeError, "Unknown exception");
  14 + }
  15 +}
  16 +
  17 +namespace morfeusz {
  18 +
  19 + %ignore MorphInterpretation::createIgn(int startNode, int endNode, const std::string& orth, const std::string& lemma);
  20 + %ignore MorphInterpretation::createWhitespace(int startNode, int endNode, const std::string& orth);
  21 + %ignore Morfeusz::analyse(const char*) const;
  22 + %ignore Morfeusz::analyse(const std::string&) const;
  23 + %ignore Morfeusz::setCharset(Charset);
  24 +// %rename(_doGetNext) ResultsIterator::next();
  25 +// %ignore Morfeusz::analyse(const std::string&, std::vector<MorphInterpretation>&) const;
  26 +// %ignore Morfeusz::generate(const std::string&, std::vector<MorphInterpretation>&) const;
  27 +// %ignore Morfeusz::generate(const std::string&, int, std::vector<MorphInterpretation>&) const;
  28 + %ignore Morfeusz::setDebug(bool);
  29 +
  30 + %newobject Morfeusz::createInstance();
  31 + %newobject Morfeusz::analyseAsIterator(const char*) const;
  32 +}
  33 +
  34 +%extend morfeusz::Morfeusz {
  35 + morfeusz::ResultsIterator* morfeusz::Morfeusz::analyseAsIterator(const char* text) const {
  36 + return dynamic_cast<const morfeusz::MorfeuszInternal*>($self)->analyseWithCopy(text);
  37 + }
  38 +}
  39 +
  40 +%template(InterpsList) std::vector<morfeusz::MorphInterpretation>;
  41 +%template(StringsList) std::vector<std::string>;
  42 +%template(StringsLinkedList) std::list<std::string>;
  43 +
  44 +#ifndef SWIGPERL
  45 +%template(StringsSet) std::set<std::string>;
  46 +#endif
... ...
morfeusz/wrappers/morfeusz_java.i
... ... @@ -3,7 +3,7 @@
3 3  
4 4 %include <stdint.i>
5 5 %include <std_except.i>
6   -
  6 +%include <std_common.i>
7 7 // make vector compatible with java.util.List interface
8 8  
9 9 namespace std {
... ... @@ -42,16 +42,85 @@ namespace std {
42 42 }
43 43 }
44 44 };
  45 +
  46 + template<class T> class list {
  47 + public:
  48 + typedef size_t size_type;
  49 + typedef T value_type;
  50 + typedef const value_type& const_reference;
  51 +
  52 + %rename(isEmpty) empty;
  53 + bool empty() const;
  54 + void clear();
  55 +
  56 + %extend {
  57 +
  58 + const_reference get(int32_t i) const throw (std::out_of_range) {
  59 + std::list<T>::const_iterator it = $self->begin();
  60 + std::advance(it, i);
  61 + return *it;
  62 + }
  63 +
  64 + value_type set(int32_t i, const value_type& VECTOR_VALUE_IN) throw (std::out_of_range) {
  65 + std::list<T>::iterator it = $self->begin();
  66 + std::advance(it, i);
  67 + std::string old = *it;
  68 + *it = VECTOR_VALUE_IN;
  69 + return old;
  70 + }
  71 +
  72 + void add(int32_t i, const value_type& VECTOR_VALUE_IN) {
  73 + std::list<T>::iterator it = $self->begin();
  74 + std::advance(it, i);
  75 + $self->insert(it, VECTOR_VALUE_IN);
  76 + }
  77 +
  78 + value_type remove(int32_t i, const value_type& VECTOR_VALUE_IN) throw (std::out_of_range) {
  79 + std::list<T>::iterator it = $self->begin();
  80 + std::advance(it, i);
  81 + std::string old = *it;
  82 + $self->erase(it);
  83 + return old;
  84 + }
  85 +
  86 + int32_t size() const {
  87 + return $self->size();
  88 + }
  89 + }
  90 + };
  91 +
  92 + template<class T> class set {
  93 + public:
  94 + typedef size_t size_type;
  95 + typedef T value_type;
  96 + typedef const value_type& const_reference;
  97 +
  98 + %rename(isEmpty) empty;
  99 + bool empty() const;
  100 +
  101 + %extend {
  102 +
  103 + const_reference get(int32_t i) const throw (std::out_of_range) {
  104 + std::set<T>::const_iterator it = $self->begin();
  105 + std::advance(it, i);
  106 + return *it;
  107 + }
  108 +
  109 + int32_t size() const {
  110 + return $self->size();
  111 + }
  112 + }
  113 + };
45 114 }
46 115  
47 116 %typemap(javaimports) morfeusz::Morfeusz %{
48 117 import java.io.IOException;
49 118 import java.lang.RuntimeException;
50 119 import java.util.List;
51   -import java.util.Collections;
  120 +import java.util.ArrayList;
52 121  
53 122 /**
54   - * Performs morphological analysis (analyze methods) and syntesis (generate methods).
  123 + * Performs morphological analysis (analyse methods) and syntesis (generate methods).
55 124 *
56 125 * It is NOT thread-safe
57 126 * but it is possible to use separate Morfeusz instance for each concurrent thread.
... ... @@ -59,18 +128,14 @@ import java.util.Collections;
59 128 %}
60 129  
61 130 %typemap(javaimports) morfeusz::ResultsIterator %{
62   -import java.util.Iterator;
63   -
64 131 /**
65 132 * Iterates through morphological analysis and synthesis results.
66 133 *
67 134 */
68 135 %}
69 136  
70   -%typemap(javaimports) std::vector %{
71   -import java.util.List;
72   -import java.util.AbstractList;
73   -%}
  137 +%rename(_dictionarySearchPaths) morfeusz::Morfeusz::dictionarySearchPaths;
  138 +%rename(_getLabels) morfeusz::IdResolver::getLabels;
74 139  
75 140 %javaexception("IOException") morfeusz::Morfeusz::setAnalyzerDictionary {
76 141 try {
... ... @@ -94,10 +159,12 @@ import java.util.AbstractList;
94 159 }
95 160 }
96 161  
97   -%typemap(javainterfaces) morfeusz::ResultsIterator "Iterator<MorphInterpretation>"
98   -%typemap(javabase) std::vector<morfeusz::MorphInterpretation> "AbstractList<MorphInterpretation>"
99   -%typemap(javabase) std::vector<morfeusz::String> "AbstractList<String>"
100   -%typemap(javabase) morfeusz::MorfeuszException "RuntimeException"
  162 +%typemap(javainterfaces) morfeusz::ResultsIterator "java.util.Iterator<MorphInterpretation>"
  163 +%typemap(javabase) std::vector<morfeusz::MorphInterpretation> "java.util.AbstractList<MorphInterpretation>"
  164 +%typemap(javabase) std::vector<std::string> "java.util.AbstractList<java.lang.String>"
  165 +%typemap(javabase) std::list<std::string> "java.util.AbstractList<java.lang.String>"
  166 +%typemap(javabase) std::set<std::string> "java.util.AbstractList<java.lang.String>"
  167 +%typemap(javabase) morfeusz::MorfeuszException "java.lang.RuntimeException"
101 168  
102 169 %typemap(javacode) morfeusz::Morfeusz %{
103 170  
... ... @@ -107,10 +174,10 @@ import java.util.AbstractList;
107 174 * @param text text for morphological analysis.
108 175 * @return list containing the results of morphological analysis
109 176 */
110   - public List<MorphInterpretation> analyzeAsList(String text) {
  177 + public List<MorphInterpretation> analyseAsList(String text) {
111 178 InterpsList res = new InterpsList();
112   - analyze(text, res);
113   - return Collections.unmodifiableList(res);
  179 + analyse(text, res);
  180 + return new ArrayList<MorphInterpretation>(res);
114 181 }
115 182  
116 183 /**
... ... @@ -122,21 +189,30 @@ import java.util.AbstractList;
122 189 public List<MorphInterpretation> generate(String lemma) {
123 190 InterpsList res = new InterpsList();
124 191 generate(lemma, res);
125   - return Collections.unmodifiableList(res);
  192 + return new ArrayList<MorphInterpretation>(res);
126 193 }
127 194  
128 195 /**
129 196 * Perform morphological synthesis on a given lemma.
130 197 * Limit results to interpretations with the specified tag.
131 198 *
132   - * @param lemma lemma to be analyzed
  199 + * @param lemma lemma to be analysed
133 200 * @param tagnum tag number of result interpretations
134 201 * @return list containing results of the morphological synthesis
135 202 */
136 203 public List<MorphInterpretation> generate(String lemma, int tagnum) {
137 204 InterpsList res = new InterpsList();
138 205 generate(lemma, tagnum, res);
139   - return Collections.unmodifiableList(res);
  206 + return new ArrayList<MorphInterpretation>(res);
  207 + }
  208 +
  209 + /**
  210 + * Get list of paths for dictionaries searching
  211 + *
  212 + * @return modifiable list of paths
  213 + */
  214 + public List<String> getDictionarySearchPaths() {
  215 + return this.get_dictionarySearchPaths();
140 216 }
141 217 %}
142 218  
... ... @@ -150,6 +226,13 @@ import java.util.AbstractList;
150 226 }
151 227 %}
152 228  
  229 +%typemap(javacode) morfeusz::IdResolver %{
  230 +
  231 + public java.util.Collection<java.lang.String> getLabels(int labelsId) {
  232 + return _getLabels(labelsId);
  233 + }
  234 +%}
  235 +
153 236 %typemap(javafinalize) SWIGTYPE %{
154 237 protected void finalize() {
155 238 if (swigCMemOwn) {
... ... @@ -160,10 +243,16 @@ import java.util.AbstractList;
160 243  
161 244 %typemap(javadestruct, methodname="delete", methodmodifiers="private") SWIGTYPE "";
162 245  
163   -%javamethodmodifiers morfeusz::Morfeusz::analyze(const std::string&, std::vector<MorphInterpretation>&) const "private";
  246 +%javamethodmodifiers morfeusz::Morfeusz::analyse(const std::string&, std::vector<MorphInterpretation>&) const "private";
164 247 %javamethodmodifiers morfeusz::Morfeusz::generate(const std::string&, std::vector<MorphInterpretation>&) const "private";
165 248 %javamethodmodifiers morfeusz::Morfeusz::generate(const std::string&, int, std::vector<MorphInterpretation>&) const "private";
166 249  
  250 +// should be overwritten by getDictionarySearchPaths() in typemap(javacode)
  251 +%javamethodmodifiers morfeusz::Morfeusz::dictionarySearchPaths "private";
  252 +
  253 +// should be overwritten by getLabels() in typemap(javacode)
  254 +%javamethodmodifiers morfeusz::IdResolver::getLabels "private";
  255 +
167 256 %typemap(javaclassmodifiers) std::vector "class"
168 257  
169 258 %include "enums.swg"
... ... @@ -180,3 +269,10 @@ import java.util.AbstractList;
180 269 %pragma(java) jniclassimports=%{
181 270 import java.io.IOException;
182 271 %}
  272 +
  273 +%include "std_vector.i"
  274 +%include "std_string.i"
  275 +%include "std_except.i"
  276 +%include "exception.i"
  277 +%include "typemaps.i"
  278 +
... ...
morfeusz/wrappers/morfeusz_javadoc.i
... ... @@ -13,7 +13,7 @@
13 13 */
14 14 public";
15 15  
16   -%javamethodmodifiers morfeusz::Morfeusz::analyzeAsIterator(const char*) const "
  16 +%javamethodmodifiers morfeusz::Morfeusz::analyseAsIterator(const char*) const "
17 17 /**
18 18 * Analyze given text and return the results as iterator.
19 19 * It does not store results for whole text at once, so may be less memory-consuming for analysis of big texts.
... ...
morfeusz/wrappers/morfeusz_perl.i 0 → 100644
  1 +
  2 +%include "std_vector.i"
  3 +%include "std_string.i"
  4 +%include "std_list.i"
  5 +%include "std_except.i"
  6 +%include "exception.i"
  7 +%include "typemaps.i"
... ...
morfeusz/wrappers/morfeusz_python.i
  1 +
1 2 %pythoncode %{
2 3  
3   -def _analyze(self, text):
  4 +def _analyse(self, text):
4 5 res = InterpsVector()
5   - _morfeusz2.Morfeusz_analyze(self, text.encode('utf8'), res)
  6 + _morfeusz2.Morfeusz_analyse(self, text.encode('utf8'), res)
6 7 return list(res)
7 8  
8   -Morfeusz.analyze = _analyze
  9 +Morfeusz.analyse = _analyse
9 10  
10 11 def _generate(self, text):
11 12 res = InterpsVector()
... ... @@ -30,4 +31,12 @@ MorphInterpretation.getOrth = _getOrth
30 31 MorphInterpretation.getLemma = _getLemma
31 32 MorphInterpretation.getTag = _getTag
32 33 MorphInterpretation.getName = _getName
33   -%}
34 34 \ No newline at end of file
  35 +%}
  36 +
  37 +%include "std_vector.i"
  38 +%include "std_string.i"
  39 +%include "std_list.i"
  40 +%include "std_set.i"
  41 +%include "std_except.i"
  42 +%include "exception.i"
  43 +%include "typemaps.i"
... ...
nbproject/configurations.xml
... ... @@ -59,17 +59,13 @@
59 59 <in>SegrulesFSA.cpp</in>
60 60 <in>segrules.cpp</in>
61 61 </df>
62   - <df name="test">
63   - <in>test_recognize_dict.cpp</in>
64   - <in>test_result_equals.cpp</in>
65   - </df>
66 62 <df name="tests">
67 63 <in>TestCAPI.cpp</in>
68 64 <in>TestMorfeusz.cpp</in>
69 65 <in>test_c_api.cpp</in>
70 66 </df>
71   - <in>DefaultTagset.cpp</in>
72 67 <in>Environment.cpp</in>
  68 + <in>IdResolverImpl.cpp</in>
73 69 <in>InflexionGraph.cpp</in>
74 70 <in>Morfeusz.cpp</in>
75 71 <in>MorfeuszInternal.cpp</in>
... ... @@ -168,8 +164,9 @@
168 164 <rebuildPropChanged>false</rebuildPropChanged>
169 165 </toolsSet>
170 166 <flagsDictionary>
171   - <element flagsID="0" commonFlags="-std=c++98 -O3"/>
  167 + <element flagsID="0" commonFlags="-std=c++98"/>
172 168 <element flagsID="1" commonFlags="-std=c++98 -O3 -fPIC"/>
  169 + <element flagsID="2" commonFlags="-std=c++98 -fPIC"/>
173 170 </flagsDictionary>
174 171 <codeAssistance>
175 172 </codeAssistance>
... ... @@ -190,6 +187,7 @@
190 187 <pElem>build/fsa</pElem>
191 188 </incDir>
192 189 <preprocessorList>
  190 + <Elem>NDEBUG</Elem>
193 191 <Elem>_OPTIMIZE__=1</Elem>
194 192 <Elem>__PIC__=2</Elem>
195 193 <Elem>__pic__=2</Elem>
... ... @@ -210,6 +208,7 @@
210 208 <pElem>build/fsa</pElem>
211 209 </incDir>
212 210 <preprocessorList>
  211 + <Elem>NDEBUG</Elem>
213 212 <Elem>_OPTIMIZE__=1</Elem>
214 213 <Elem>__PIC__=2</Elem>
215 214 <Elem>__pic__=2</Elem>
... ... @@ -222,11 +221,12 @@
222 221 </ccTool>
223 222 </item>
224 223 <item path="build/morfeusz/default_fsa.cpp" ex="false" tool="1" flavor2="4">
225   - <ccTool flags="1">
  224 + <ccTool flags="2">
226 225 <incDir>
227 226 <pElem>morfeusz</pElem>
228 227 </incDir>
229 228 <preprocessorList>
  229 + <Elem>BUILDING_MORFEUSZ</Elem>
230 230 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
231 231 </preprocessorList>
232 232 </ccTool>
... ... @@ -235,11 +235,12 @@
235 235 ex="false"
236 236 tool="1"
237 237 flavor2="4">
238   - <ccTool flags="1">
  238 + <ccTool flags="2">
239 239 <incDir>
240 240 <pElem>morfeusz</pElem>
241 241 </incDir>
242 242 <preprocessorList>
  243 + <Elem>BUILDING_MORFEUSZ</Elem>
243 244 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
244 245 </preprocessorList>
245 246 </ccTool>
... ... @@ -260,6 +261,7 @@
260 261 <pElem>build/morfeusz/java</pElem>
261 262 </incDir>
262 263 <preprocessorList>
  264 + <Elem>NDEBUG</Elem>
263 265 <Elem>_OPTIMIZE__=1</Elem>
264 266 <Elem>__PIC__=2</Elem>
265 267 <Elem>__pic__=2</Elem>
... ... @@ -286,6 +288,7 @@
286 288 <pElem>build/morfeusz/perl</pElem>
287 289 </incDir>
288 290 <preprocessorList>
  291 + <Elem>NDEBUG</Elem>
289 292 <Elem>_OPTIMIZE__=1</Elem>
290 293 <Elem>morfeusz_perl_EXPORTS</Elem>
291 294 </preprocessorList>
... ... @@ -308,6 +311,7 @@
308 311 <pElem>build/morfeusz/python</pElem>
309 312 </incDir>
310 313 <preprocessorList>
  314 + <Elem>NDEBUG</Elem>
311 315 <Elem>_OPTIMIZE__=1</Elem>
312 316 <Elem>__PIC__=2</Elem>
313 317 <Elem>__pic__=2</Elem>
... ... @@ -329,12 +333,14 @@
329 333 ex="false"
330 334 tool="1"
331 335 flavor2="4">
  336 + <ccTool flags="2">
  337 + </ccTool>
332 338 </item>
333 339 <item path="build/morfeusz/wrappers/morfeuszPERL_wrap.cxx"
334 340 ex="false"
335 341 tool="1"
336 342 flavor2="4">
337   - <ccTool flags="1">
  343 + <ccTool flags="2">
338 344 <incDir>
339 345 <pElem>/usr/lib/perl/5.14/CORE</pElem>
340 346 <pElem>build/morfeusz/wrappers/perl</pElem>
... ... @@ -356,6 +362,7 @@
356 362 <pElem>morfeusz/build/morfeusz</pElem>
357 363 </incDir>
358 364 <preprocessorList>
  365 + <Elem>NDEBUG</Elem>
359 366 <Elem>_OPTIMIZE__=1</Elem>
360 367 </preprocessorList>
361 368 <undefinedList>
... ... @@ -373,6 +380,7 @@
373 380 <pElem>morfeusz/build/morfeusz</pElem>
374 381 </incDir>
375 382 <preprocessorList>
  383 + <Elem>NDEBUG</Elem>
376 384 <Elem>_OPTIMIZE__=1</Elem>
377 385 </preprocessorList>
378 386 <undefinedList>
... ... @@ -387,8 +395,8 @@
387 395 <pElem>build/morfeusz</pElem>
388 396 </incDir>
389 397 <preprocessorList>
  398 + <Elem>BUILDING_MORFEUSZ</Elem>
390 399 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
391   - <Elem>NDEBUG</Elem>
392 400 <Elem>libmorfeusz_EXPORTS</Elem>
393 401 </preprocessorList>
394 402 </ccTool>
... ... @@ -400,8 +408,8 @@
400 408 <pElem>build/morfeusz</pElem>
401 409 </incDir>
402 410 <preprocessorList>
  411 + <Elem>BUILDING_MORFEUSZ</Elem>
403 412 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
404   - <Elem>NDEBUG</Elem>
405 413 <Elem>libmorfeusz_EXPORTS</Elem>
406 414 </preprocessorList>
407 415 </ccTool>
... ... @@ -413,8 +421,8 @@
413 421 <pElem>build/morfeusz</pElem>
414 422 </incDir>
415 423 <preprocessorList>
  424 + <Elem>BUILDING_MORFEUSZ</Elem>
416 425 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
417   - <Elem>NDEBUG</Elem>
418 426 <Elem>libmorfeusz_EXPORTS</Elem>
419 427 </preprocessorList>
420 428 </ccTool>
... ... @@ -426,8 +434,8 @@
426 434 <pElem>build/morfeusz</pElem>
427 435 </incDir>
428 436 <preprocessorList>
  437 + <Elem>BUILDING_MORFEUSZ</Elem>
429 438 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
430   - <Elem>NDEBUG</Elem>
431 439 <Elem>libmorfeusz_EXPORTS</Elem>
432 440 </preprocessorList>
433 441 </ccTool>
... ... @@ -439,8 +447,8 @@
439 447 <pElem>build/morfeusz</pElem>
440 448 </incDir>
441 449 <preprocessorList>
  450 + <Elem>BUILDING_MORFEUSZ</Elem>
442 451 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
443   - <Elem>NDEBUG</Elem>
444 452 <Elem>libmorfeusz_EXPORTS</Elem>
445 453 </preprocessorList>
446 454 </ccTool>
... ... @@ -452,8 +460,8 @@
452 460 <pElem>build/morfeusz</pElem>
453 461 </incDir>
454 462 <preprocessorList>
  463 + <Elem>BUILDING_MORFEUSZ</Elem>
455 464 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
456   - <Elem>NDEBUG</Elem>
457 465 <Elem>libmorfeusz_EXPORTS</Elem>
458 466 </preprocessorList>
459 467 </ccTool>
... ... @@ -465,21 +473,8 @@
465 473 <pElem>build/morfeusz</pElem>
466 474 </incDir>
467 475 <preprocessorList>
  476 + <Elem>BUILDING_MORFEUSZ</Elem>
468 477 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
469   - <Elem>NDEBUG</Elem>
470   - <Elem>libmorfeusz_EXPORTS</Elem>
471   - </preprocessorList>
472   - </ccTool>
473   - </folder>
474   - <folder path="0/test">
475   - <ccTool>
476   - <incDir>
477   - <pElem>build</pElem>
478   - <pElem>morfeusz</pElem>
479   - <pElem>build/morfeusz</pElem>
480   - </incDir>
481   - <preprocessorList>
482   - <Elem>NDEBUG</Elem>
483 478 <Elem>libmorfeusz_EXPORTS</Elem>
484 479 </preprocessorList>
485 480 </ccTool>
... ... @@ -613,19 +608,13 @@
613 608 <output>${TESTDIR}/TestFiles/f9</output>
614 609 </linkerTool>
615 610 </folder>
616   - <folder path="build">
617   - <ccTool>
618   - <preprocessorList>
619   - <Elem>NDEBUG</Elem>
620   - </preprocessorList>
621   - </ccTool>
622   - </folder>
623 611 <folder path="build/morfeusz">
624 612 <ccTool>
625 613 <incDir>
626 614 <pElem>morfeusz</pElem>
627 615 </incDir>
628 616 <preprocessorList>
  617 + <Elem>BUILDING_MORFEUSZ</Elem>
629 618 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
630 619 </preprocessorList>
631 620 </ccTool>
... ... @@ -664,7 +653,6 @@
664 653 <folder path="morfeusz">
665 654 <ccTool>
666 655 <preprocessorList>
667   - <Elem>NDEBUG</Elem>
668 656 <Elem>libmorfeusz_EXPORTS</Elem>
669 657 </preprocessorList>
670 658 </ccTool>
... ... @@ -679,6 +667,7 @@
679 667 <pElem>/usr/lib/jvm/default-java/include</pElem>
680 668 </incDir>
681 669 <preprocessorList>
  670 + <Elem>NDEBUG</Elem>
682 671 <Elem>_OPTIMIZE__=1</Elem>
683 672 <Elem>libjmorfeusz_EXPORTS</Elem>
684 673 </preprocessorList>
... ... @@ -694,80 +683,80 @@
694 683 </incDir>
695 684 </ccTool>
696 685 </folder>
697   - <item path="morfeusz/DefaultTagset.cpp" ex="false" tool="1" flavor2="4">
698   - <ccTool flags="1">
  686 + <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4">
  687 + <ccTool flags="2">
699 688 <incDir>
700 689 <pElem>morfeusz</pElem>
701 690 <pElem>build/morfeusz</pElem>
702 691 </incDir>
703 692 <preprocessorList>
  693 + <Elem>BUILDING_MORFEUSZ</Elem>
704 694 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
705   - <Elem>NDEBUG</Elem>
706 695 <Elem>libmorfeusz_EXPORTS</Elem>
707 696 </preprocessorList>
708 697 </ccTool>
709 698 </item>
710   - <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4">
711   - <ccTool flags="1">
  699 + <item path="morfeusz/IdResolverImpl.cpp" ex="false" tool="1" flavor2="4">
  700 + <ccTool flags="2">
712 701 <incDir>
713 702 <pElem>morfeusz</pElem>
714 703 <pElem>build/morfeusz</pElem>
715 704 </incDir>
716 705 <preprocessorList>
  706 + <Elem>BUILDING_MORFEUSZ</Elem>
717 707 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
718   - <Elem>NDEBUG</Elem>
719 708 <Elem>libmorfeusz_EXPORTS</Elem>
720 709 </preprocessorList>
721 710 </ccTool>
722 711 </item>
723 712 <item path="morfeusz/InflexionGraph.cpp" ex="false" tool="1" flavor2="4">
724   - <ccTool flags="1">
  713 + <ccTool flags="2">
725 714 <incDir>
726 715 <pElem>morfeusz</pElem>
727 716 <pElem>build/morfeusz</pElem>
728 717 </incDir>
729 718 <preprocessorList>
  719 + <Elem>BUILDING_MORFEUSZ</Elem>
730 720 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
731   - <Elem>NDEBUG</Elem>
732 721 <Elem>libmorfeusz_EXPORTS</Elem>
733 722 </preprocessorList>
734 723 </ccTool>
735 724 </item>
736 725 <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="4">
737   - <ccTool flags="1">
  726 + <ccTool flags="2">
738 727 <incDir>
739 728 <pElem>morfeusz</pElem>
740 729 <pElem>build/morfeusz</pElem>
741 730 </incDir>
742 731 <preprocessorList>
  732 + <Elem>BUILDING_MORFEUSZ</Elem>
743 733 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
744   - <Elem>NDEBUG</Elem>
745 734 <Elem>libmorfeusz_EXPORTS</Elem>
746 735 </preprocessorList>
747 736 </ccTool>
748 737 </item>
749 738 <item path="morfeusz/MorfeuszInternal.cpp" ex="false" tool="1" flavor2="4">
750   - <ccTool flags="1">
  739 + <ccTool flags="2">
751 740 <incDir>
752 741 <pElem>morfeusz</pElem>
753 742 <pElem>build/morfeusz</pElem>
754 743 </incDir>
755 744 <preprocessorList>
  745 + <Elem>BUILDING_MORFEUSZ</Elem>
756 746 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
757   - <Elem>NDEBUG</Elem>
758 747 <Elem>libmorfeusz_EXPORTS</Elem>
759 748 </preprocessorList>
760 749 </ccTool>
761 750 </item>
762 751 <item path="morfeusz/MorphInterpretation.cpp" ex="false" tool="1" flavor2="4">
763   - <ccTool flags="1">
  752 + <ccTool flags="2">
764 753 <incDir>
765 754 <pElem>morfeusz</pElem>
766 755 <pElem>build/morfeusz</pElem>
767 756 </incDir>
768 757 <preprocessorList>
  758 + <Elem>BUILDING_MORFEUSZ</Elem>
769 759 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
770   - <Elem>NDEBUG</Elem>
771 760 <Elem>libmorfeusz_EXPORTS</Elem>
772 761 </preprocessorList>
773 762 </ccTool>
... ... @@ -786,52 +775,68 @@
786 775 </ccTool>
787 776 </item>
788 777 <item path="morfeusz/ResultsIteratorImpl.cpp" ex="false" tool="1" flavor2="4">
789   - <ccTool flags="1">
  778 + <ccTool flags="2">
790 779 <incDir>
791 780 <pElem>morfeusz</pElem>
792 781 <pElem>build/morfeusz</pElem>
793 782 </incDir>
794 783 <preprocessorList>
  784 + <Elem>BUILDING_MORFEUSZ</Elem>
795 785 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
796   - <Elem>NDEBUG</Elem>
797 786 <Elem>libmorfeusz_EXPORTS</Elem>
798 787 </preprocessorList>
799 788 </ccTool>
800 789 </item>
801 790 <item path="morfeusz/c_api/ResultsManager.cpp" ex="false" tool="1" flavor2="4">
  791 + <ccTool flags="2">
  792 + </ccTool>
802 793 </item>
803 794 <item path="morfeusz/case/CaseConverter.cpp" ex="false" tool="1" flavor2="4">
  795 + <ccTool flags="2">
  796 + </ccTool>
804 797 </item>
805 798 <item path="morfeusz/case/CasePatternHelper.cpp"
806 799 ex="false"
807 800 tool="1"
808 801 flavor2="4">
  802 + <ccTool flags="2">
  803 + </ccTool>
809 804 </item>
810 805 <item path="morfeusz/case/caseconv.cpp" ex="false" tool="1" flavor2="4">
  806 + <ccTool flags="2">
  807 + </ccTool>
811 808 </item>
812 809 <item path="morfeusz/charset/CharsetConverter.cpp"
813 810 ex="false"
814 811 tool="1"
815 812 flavor2="4">
  813 + <ccTool flags="2">
  814 + </ccTool>
816 815 </item>
817 816 <item path="morfeusz/charset/TextReader.cpp" ex="false" tool="1" flavor2="4">
  817 + <ccTool flags="2">
  818 + </ccTool>
818 819 </item>
819 820 <item path="morfeusz/charset/conversion_tables.cpp"
820 821 ex="false"
821 822 tool="1"
822 823 flavor2="4">
  824 + <ccTool flags="2">
  825 + </ccTool>
823 826 </item>
824 827 <item path="morfeusz/cli/cli.cpp" ex="false" tool="1" flavor2="4">
  828 + <ccTool flags="2">
  829 + </ccTool>
825 830 </item>
826 831 <item path="morfeusz/const.cpp" ex="false" tool="1" flavor2="4">
827   - <ccTool flags="1">
  832 + <ccTool flags="2">
828 833 <incDir>
829 834 <pElem>morfeusz</pElem>
830 835 <pElem>build/morfeusz</pElem>
831 836 </incDir>
832 837 <preprocessorList>
  838 + <Elem>BUILDING_MORFEUSZ</Elem>
833 839 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
834   - <Elem>NDEBUG</Elem>
835 840 <Elem>libmorfeusz_EXPORTS</Elem>
836 841 </preprocessorList>
837 842 </ccTool>
... ... @@ -840,38 +845,50 @@
840 845 ex="false"
841 846 tool="1"
842 847 flavor2="4">
  848 + <ccTool flags="2">
  849 + </ccTool>
843 850 </item>
844 851 <item path="morfeusz/deserialization/MorphDeserializer.cpp"
845 852 ex="false"
846 853 tool="1"
847 854 flavor2="4">
  855 + <ccTool flags="2">
  856 + </ccTool>
848 857 </item>
849 858 <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder.cpp"
850 859 ex="false"
851 860 tool="1"
852 861 flavor2="4">
  862 + <ccTool flags="2">
  863 + </ccTool>
853 864 </item>
854 865 <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.cpp"
855 866 ex="false"
856 867 tool="1"
857 868 flavor2="4">
  869 + <ccTool flags="2">
  870 + </ccTool>
858 871 </item>
859 872 <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Generator.cpp"
860 873 ex="false"
861 874 tool="1"
862 875 flavor2="4">
  876 + <ccTool flags="2">
  877 + </ccTool>
863 878 </item>
864 879 <item path="morfeusz/fsa/const.cpp" ex="false" tool="1" flavor2="4">
  880 + <ccTool flags="2">
  881 + </ccTool>
865 882 </item>
866 883 <item path="morfeusz/morfeusz2_c.cpp" ex="false" tool="1" flavor2="4">
867   - <ccTool flags="1">
  884 + <ccTool flags="2">
868 885 <incDir>
869 886 <pElem>morfeusz</pElem>
870 887 <pElem>build/morfeusz</pElem>
871 888 </incDir>
872 889 <preprocessorList>
  890 + <Elem>BUILDING_MORFEUSZ</Elem>
873 891 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
874   - <Elem>NDEBUG</Elem>
875 892 <Elem>libmorfeusz_EXPORTS</Elem>
876 893 </preprocessorList>
877 894 </ccTool>
... ... @@ -883,8 +900,8 @@
883 900 <pElem>build/morfeusz</pElem>
884 901 </incDir>
885 902 <preprocessorList>
  903 + <Elem>BUILDING_MORFEUSZ</Elem>
886 904 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
887   - <Elem>NDEBUG</Elem>
888 905 </preprocessorList>
889 906 </ccTool>
890 907 </item>
... ... @@ -895,24 +912,18 @@
895 912 <pElem>build/morfeusz</pElem>
896 913 </incDir>
897 914 <preprocessorList>
  915 + <Elem>BUILDING_MORFEUSZ</Elem>
898 916 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
899   - <Elem>NDEBUG</Elem>
900 917 </preprocessorList>
901 918 </ccTool>
902 919 </item>
903 920 <item path="morfeusz/segrules/SegrulesFSA.cpp" ex="false" tool="1" flavor2="4">
  921 + <ccTool flags="2">
  922 + </ccTool>
904 923 </item>
905 924 <item path="morfeusz/segrules/segrules.cpp" ex="false" tool="1" flavor2="4">
906   - </item>
907   - <item path="morfeusz/test/test_recognize_dict.cpp"
908   - ex="false"
909   - tool="1"
910   - flavor2="4">
911   - </item>
912   - <item path="morfeusz/test/test_result_equals.cpp"
913   - ex="false"
914   - tool="1"
915   - flavor2="4">
  925 + <ccTool flags="2">
  926 + </ccTool>
916 927 </item>
917 928 <item path="morfeusz/test_runner.cpp" ex="false" tool="1" flavor2="4">
918 929 <ccTool flags="0">
... ... @@ -921,8 +932,8 @@
921 932 <pElem>build/morfeusz</pElem>
922 933 </incDir>
923 934 <preprocessorList>
  935 + <Elem>BUILDING_MORFEUSZ</Elem>
924 936 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
925   - <Elem>NDEBUG</Elem>
926 937 </preprocessorList>
927 938 </ccTool>
928 939 </item>
... ... @@ -935,8 +946,8 @@
935 946 <pElem>build/morfeusz</pElem>
936 947 </incDir>
937 948 <preprocessorList>
  949 + <Elem>BUILDING_MORFEUSZ</Elem>
938 950 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
939   - <Elem>NDEBUG</Elem>
940 951 </preprocessorList>
941 952 </ccTool>
942 953 </item>
... ... @@ -947,8 +958,8 @@
947 958 <pElem>build/morfeusz</pElem>
948 959 </incDir>
949 960 <preprocessorList>
  961 + <Elem>BUILDING_MORFEUSZ</Elem>
950 962 <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem>
951   - <Elem>NDEBUG</Elem>
952 963 </preprocessorList>
953 964 </ccTool>
954 965 </item>
... ...