Commit 4cf58ae124fe2e9801ce0193c285627b1dc98ee2
1 parent
6409a580
- nowa wersja API przechodząca testy
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/trunk@245 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
43 changed files
with
1628 additions
and
858 deletions
CMakeLists.txt
fsabuilder/morfeusz_builder
... | ... | @@ -258,11 +258,11 @@ def main(opts): |
258 | 258 | if __name__ == '__main__': |
259 | 259 | import os |
260 | 260 | opts = _parseOptions() |
261 | - try: | |
262 | - main(opts) | |
263 | - except Exception as ex: | |
264 | - print >> sys.stderr, u'Building dictionary file failed:', unicode(ex).encode('utf8'), 'type of error:', type(ex) | |
265 | - sys.exit(1) | |
266 | - finally: | |
267 | - pass | |
261 | + #~ try: | |
262 | + main(opts) | |
263 | + #~ except Exception as ex: | |
264 | + #~ print >> sys.stderr, u'Building dictionary file failed:', unicode(ex).encode('utf8'), 'type of error:', type(ex) | |
265 | + #~ sys.exit(1) | |
266 | + #~ finally: | |
267 | + #~ pass | |
268 | 268 | |
... | ... |
fsabuilder/morfeuszbuilder/fsa/serializer.py
... | ... | @@ -42,7 +42,7 @@ class Serializer(object): |
42 | 42 | |
43 | 43 | # get the Morfeusz file format version that is being encoded |
44 | 44 | def getVersion(self): |
45 | - return 18 | |
45 | + return 19 | |
46 | 46 | |
47 | 47 | def serialize2CppFile(self, fname, isGenerator, headerFilename="data/default_fsa.hpp"): |
48 | 48 | res = [] |
... | ... | @@ -113,11 +113,13 @@ class Serializer(object): |
113 | 113 | def serializeQualifiersMap(self): |
114 | 114 | res = bytearray() |
115 | 115 | res.extend(htons(len(self.qualifiersMap))) |
116 | - for qualifiers, n in sorted(self.qualifiersMap.iteritems(), key=lambda (qs, n): n): | |
117 | - res.append(len(qualifiers)) | |
118 | - for q in qualifiers: | |
119 | - res.extend(q.encode('utf8')) | |
120 | - res.append(0) | |
116 | + label2labelId = dict([ (u'|'.join(qualifiers), n) for qualifiers, n in sorted(self.qualifiersMap.iteritems(), key=lambda (qs, n): n) ]) | |
117 | + res.extend(self._serializeTags(label2labelId)) | |
118 | + #~ for qualifiers, n in sorted(self.qualifiersMap.iteritems(), key=lambda (qs, n): n): | |
119 | + #~ res.append(len(qualifiers)) | |
120 | + #~ for q in qualifiers: | |
121 | + #~ res.extend(q.encode('utf8')) | |
122 | + #~ res.append(0) | |
121 | 123 | return res |
122 | 124 | |
123 | 125 | def serializePrologue(self): |
... | ... |
fsabuilder/morfeuszbuilder/tagset/tagset.py
... | ... | @@ -23,14 +23,15 @@ class Tagset(object): |
23 | 23 | def _doInit(self, filename, encoding): |
24 | 24 | addingTo = None |
25 | 25 | with codecs.open(filename, 'r', encoding) as f: |
26 | - for line in f: | |
27 | - line = line.strip('\n') | |
26 | + for linenum, line in enumerate(f, start=1): | |
27 | + line = line.strip('\n\r') | |
28 | 28 | if line == u'[TAGS]': |
29 | 29 | addingTo = Tagset.TAGS |
30 | 30 | elif line == u'[NAMES]': |
31 | 31 | addingTo = Tagset.NAMES |
32 | 32 | elif line and not line.startswith(u'#'): |
33 | - assert addingTo in [Tagset.TAGS, Tagset.NAMES] | |
33 | + if not addingTo in [Tagset.TAGS, Tagset.NAMES]: | |
34 | + raise FSABuilderException('"%s" - text outside [TAGS] section in tagset file line %d' % (line, linenum)) | |
34 | 35 | res = {Tagset.TAGS: self._tag2tagnum, |
35 | 36 | Tagset.NAMES: self._name2namenum}[addingTo] |
36 | 37 | tagNum = line.split(Tagset.SEP)[0] |
... | ... |
input/morfeusz-sgjp.tagset
0 → 100644
1 | +#!TAGSET-ID pl.sgjp.morfeusz-0.5.0 | |
2 | + | |
3 | +[TAGS] | |
4 | +# special: unknown word (ignotum): | |
5 | +0 ign | |
6 | +# special: space/blank: | |
7 | +1 sp | |
8 | +# NOUNS | |
9 | +694 subst:sg:nom:m1 | |
10 | +695 subst:sg:nom:m2 | |
11 | +696 subst:sg:nom:m3 | |
12 | +697 subst:sg:nom:n1 | |
13 | +698 subst:sg:nom:n2 | |
14 | +693 subst:sg:nom:f | |
15 | +676 subst:sg:gen:m1 | |
16 | +677 subst:sg:gen:m2 | |
17 | +678 subst:sg:gen:m3 | |
18 | +679 subst:sg:gen:n1 | |
19 | +680 subst:sg:gen:n2 | |
20 | +675 subst:sg:gen:f | |
21 | +670 subst:sg:dat:m1 | |
22 | +671 subst:sg:dat:m2 | |
23 | +672 subst:sg:dat:m3 | |
24 | +673 subst:sg:dat:n1 | |
25 | +674 subst:sg:dat:n2 | |
26 | +669 subst:sg:dat:f | |
27 | +664 subst:sg:acc:m1 | |
28 | +665 subst:sg:acc:m2 | |
29 | +666 subst:sg:acc:m3 | |
30 | +667 subst:sg:acc:n1 | |
31 | +668 subst:sg:acc:n2 | |
32 | +663 subst:sg:acc:f | |
33 | +682 subst:sg:inst:m1 | |
34 | +683 subst:sg:inst:m2 | |
35 | +684 subst:sg:inst:m3 | |
36 | +685 subst:sg:inst:n1 | |
37 | +686 subst:sg:inst:n2 | |
38 | +681 subst:sg:inst:f | |
39 | +688 subst:sg:loc:m1 | |
40 | +689 subst:sg:loc:m2 | |
41 | +690 subst:sg:loc:m3 | |
42 | +691 subst:sg:loc:n1 | |
43 | +692 subst:sg:loc:n2 | |
44 | +687 subst:sg:loc:f | |
45 | +700 subst:sg:voc:m1 | |
46 | +701 subst:sg:voc:m2 | |
47 | +702 subst:sg:voc:m3 | |
48 | +703 subst:sg:voc:n1 | |
49 | +704 subst:sg:voc:n2 | |
50 | +699 subst:sg:voc:f | |
51 | +646 subst:pl:nom:m1 | |
52 | +647 subst:pl:nom:m2 | |
53 | +648 subst:pl:nom:m3 | |
54 | +649 subst:pl:nom:n1 | |
55 | +650 subst:pl:nom:n2 | |
56 | +651 subst:pl:nom:p1 | |
57 | +652 subst:pl:nom:p2 | |
58 | +653 subst:pl:nom:p3 | |
59 | +645 subst:pl:nom:f | |
60 | +619 subst:pl:gen:m1 | |
61 | +620 subst:pl:gen:m2 | |
62 | +621 subst:pl:gen:m3 | |
63 | +622 subst:pl:gen:n1 | |
64 | +623 subst:pl:gen:n2 | |
65 | +624 subst:pl:gen:p1 | |
66 | +625 subst:pl:gen:p2 | |
67 | +626 subst:pl:gen:p3 | |
68 | +618 subst:pl:gen:f | |
69 | +610 subst:pl:dat:m1 | |
70 | +611 subst:pl:dat:m2 | |
71 | +612 subst:pl:dat:m3 | |
72 | +613 subst:pl:dat:n1 | |
73 | +614 subst:pl:dat:n2 | |
74 | +615 subst:pl:dat:p1 | |
75 | +616 subst:pl:dat:p2 | |
76 | +617 subst:pl:dat:p3 | |
77 | +609 subst:pl:dat:f | |
78 | +601 subst:pl:acc:m1 | |
79 | +602 subst:pl:acc:m2 | |
80 | +603 subst:pl:acc:m3 | |
81 | +604 subst:pl:acc:n1 | |
82 | +605 subst:pl:acc:n2 | |
83 | +606 subst:pl:acc:p1 | |
84 | +607 subst:pl:acc:p2 | |
85 | +608 subst:pl:acc:p3 | |
86 | +600 subst:pl:acc:f | |
87 | +628 subst:pl:inst:m1 | |
88 | +629 subst:pl:inst:m2 | |
89 | +630 subst:pl:inst:m3 | |
90 | +631 subst:pl:inst:n1 | |
91 | +632 subst:pl:inst:n2 | |
92 | +633 subst:pl:inst:p1 | |
93 | +634 subst:pl:inst:p2 | |
94 | +635 subst:pl:inst:p3 | |
95 | +627 subst:pl:inst:f | |
96 | +637 subst:pl:loc:m1 | |
97 | +638 subst:pl:loc:m2 | |
98 | +639 subst:pl:loc:m3 | |
99 | +640 subst:pl:loc:n1 | |
100 | +641 subst:pl:loc:n2 | |
101 | +642 subst:pl:loc:p1 | |
102 | +643 subst:pl:loc:p2 | |
103 | +644 subst:pl:loc:p3 | |
104 | +636 subst:pl:loc:f | |
105 | +654 subst:pl:voc:f | |
106 | +655 subst:pl:voc:m1 | |
107 | +656 subst:pl:voc:m2 | |
108 | +657 subst:pl:voc:m3 | |
109 | +658 subst:pl:voc:n1 | |
110 | +659 subst:pl:voc:n2 | |
111 | +660 subst:pl:voc:p1 | |
112 | +661 subst:pl:voc:p2 | |
113 | +662 subst:pl:voc:p3 | |
114 | +# depreciative nominal flexeme: | |
115 | +149 depr:pl:nom:m2 | |
116 | +150 depr:pl:voc:m2 | |
117 | +# nominal compounds forming form: | |
118 | +599 substa | |
119 | +# PERSONAL PRONOUNS | |
120 | +443 ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:akc | |
121 | +444 ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:nakc | |
122 | +445 ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:akc | |
123 | +446 ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:nakc | |
124 | +447 ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:akc | |
125 | +448 ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:nakc | |
126 | +449 ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:akc | |
127 | +450 ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:nakc | |
128 | +451 ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:akc | |
129 | +452 ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:nakc | |
130 | +453 ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:akc | |
131 | +454 ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:nakc | |
132 | +455 ppron12:sg:inst:m1.m2.m3.f.n1.n2:pri | |
133 | +456 ppron12:sg:inst:m1.m2.m3.f.n1.n2:sec | |
134 | +457 ppron12:sg:loc:m1.m2.m3.f.n1.n2:pri | |
135 | +458 ppron12:sg:loc:m1.m2.m3.f.n1.n2:sec | |
136 | +459 ppron12:sg:nom:m1.m2.m3.f.n1.n2:pri | |
137 | +460 ppron12:sg:nom:m1.m2.m3.f.n1.n2:sec | |
138 | +461 ppron12:sg:voc:m1.m2.m3.f.n1.n2:sec | |
139 | +429 ppron12:pl:acc:_:pri | |
140 | +430 ppron12:pl:acc:_:sec | |
141 | +431 ppron12:pl:dat:_:pri | |
142 | +432 ppron12:pl:dat:_:sec | |
143 | +433 ppron12:pl:gen:_:pri | |
144 | +434 ppron12:pl:gen:_:sec | |
145 | +435 ppron12:pl:inst:_:pri | |
146 | +436 ppron12:pl:inst:_:sec | |
147 | +437 ppron12:pl:loc:_:pri | |
148 | +438 ppron12:pl:loc:_:sec | |
149 | +439 ppron12:pl:nom:_:pri | |
150 | +440 ppron12:pl:nom:_:sec | |
151 | +441 ppron12:pl:voc:_:pri | |
152 | +442 ppron12:pl:voc:_:sec | |
153 | +474 ppron3:sg:acc:f:ter:_:npraep | |
154 | +475 ppron3:sg:acc:f:ter:_:praep | |
155 | +476 ppron3:sg:acc:m1.m2.m3:ter:akc:npraep | |
156 | +477 ppron3:sg:acc:m1.m2.m3:ter:akc:praep | |
157 | +478 ppron3:sg:acc:m1.m2.m3:ter:nakc:npraep | |
158 | +479 ppron3:sg:acc:m1.m2.m3:ter:nakc:praep | |
159 | +480 ppron3:sg:acc:n1.n2:ter:_:npraep | |
160 | +481 ppron3:sg:acc:n1.n2:ter:_:praep | |
161 | +482 ppron3:sg:dat:f:ter:_:npraep | |
162 | +483 ppron3:sg:dat:f:ter:_:praep | |
163 | +484 ppron3:sg:dat:m1.m2.m3:ter:akc:npraep | |
164 | +485 ppron3:sg:dat:m1.m2.m3:ter:nakc:npraep | |
165 | +486 ppron3:sg:dat:m1.m2.m3:ter:_:praep | |
166 | +487 ppron3:sg:dat:n1.n2:ter:akc:npraep | |
167 | +488 ppron3:sg:dat:n1.n2:ter:nakc:npraep | |
168 | +489 ppron3:sg:dat:n1.n2:ter:_:praep | |
169 | +490 ppron3:sg:gen.acc:m1.m2.m3:ter:nakc:praep | |
170 | +491 ppron3:sg:gen:f:ter:_:npraep | |
171 | +492 ppron3:sg:gen:f:ter:_:praep | |
172 | +493 ppron3:sg:gen:m1.m2.m3:ter:akc:npraep | |
173 | +494 ppron3:sg:gen:m1.m2.m3:ter:akc:praep | |
174 | +495 ppron3:sg:gen:m1.m2.m3:ter:nakc:npraep | |
175 | +496 ppron3:sg:gen:m1.m2.m3:ter:nakc:praep | |
176 | +497 ppron3:sg:gen:n1.n2:ter:akc:npraep | |
177 | +498 ppron3:sg:gen:n1.n2:ter:nakc:npraep | |
178 | +499 ppron3:sg:gen:n1.n2:ter:_:praep | |
179 | +500 ppron3:sg:inst:f:ter:_:praep | |
180 | +501 ppron3:sg:inst:m1.m2.m3:ter:_:_ | |
181 | +502 ppron3:sg:inst:n1.n2:ter:_:_ | |
182 | +503 ppron3:sg:loc:f:ter:_:_ | |
183 | +504 ppron3:sg:loc:m1.m2.m3:ter:_:_ | |
184 | +505 ppron3:sg:loc:n1.n2:ter:_:_ | |
185 | +506 ppron3:sg:nom:f:ter:_:_ | |
186 | +507 ppron3:sg:nom:m1.m2.m3:ter:_:_ | |
187 | +508 ppron3:sg:nom:n1.n2:ter:_:_ | |
188 | +462 ppron3:pl:acc:m1.p1:ter:_:npraep | |
189 | +463 ppron3:pl:acc:m1.p1:ter:_:praep | |
190 | +464 ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:npraep | |
191 | +465 ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:_:praep | |
192 | +466 ppron3:pl:dat:_:ter:_:npraep | |
193 | +467 ppron3:pl:dat:_:ter:_:praep | |
194 | +468 ppron3:pl:gen:_:ter:_:npraep | |
195 | +469 ppron3:pl:gen:_:ter:_:praep | |
196 | +470 ppron3:pl:inst:_:ter:_:_ | |
197 | +471 ppron3:pl:loc:_:ter:_:_ | |
198 | +472 ppron3:pl:nom:m1.p1:ter:_:_ | |
199 | +473 ppron3:pl:nom:m2.m3.f.n1.n2.p2.p3:ter:_:_ | |
200 | +# PRONOUN ‘SIEBIE’ | |
201 | +594 siebie:acc | |
202 | +595 siebie:dat | |
203 | +596 siebie:gen | |
204 | +597 siebie:inst | |
205 | +598 siebie:loc | |
206 | +# ADJECTIVES | |
207 | +5 adj:pl:acc:m1.p1:com | |
208 | +6 adj:pl:acc:m1.p1:pos | |
209 | +7 adj:pl:acc:m1.p1:sup | |
210 | +8 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:com | |
211 | +9 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos | |
212 | +10 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:sup | |
213 | +11 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:com | |
214 | +12 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos | |
215 | +13 adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:sup | |
216 | +14 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:com | |
217 | +15 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos | |
218 | +16 adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:sup | |
219 | +17 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:com | |
220 | +18 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos | |
221 | +19 adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:sup | |
222 | +20 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:com | |
223 | +21 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos | |
224 | +22 adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:sup | |
225 | +23 adj:pl:nom:m1.p1:pos | |
226 | +24 adj:pl:nom:m2.m3.f.n1.n2.p2.p3:pos | |
227 | +25 adj:pl:nom.voc:m1.p1:com | |
228 | +26 adj:pl:nom.voc:m1.p1:pos | |
229 | +27 adj:pl:nom.voc:m1.p1:sup | |
230 | +28 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:com | |
231 | +29 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos | |
232 | +30 adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:sup | |
233 | +31 adj:sg:acc:f:com | |
234 | +32 adj:sg:acc:f:pos | |
235 | +33 adj:sg:acc:f:sup | |
236 | +34 adj:sg:acc:m1.m2:com | |
237 | +35 adj:sg:acc:m1.m2:pos | |
238 | +36 adj:sg:acc:m1.m2:sup | |
239 | +37 adj:sg:acc:m3:com | |
240 | +38 adj:sg:acc:m3:pos | |
241 | +39 adj:sg:acc:m3:sup | |
242 | +40 adj:sg:acc:n1.n2:com | |
243 | +41 adj:sg:acc:n1.n2:pos | |
244 | +42 adj:sg:acc:n1.n2:sup | |
245 | +43 adj:sg:dat:f:com | |
246 | +44 adj:sg:dat:f:pos | |
247 | +45 adj:sg:dat:f:sup | |
248 | +46 adj:sg:dat:m1.m2.m3.n1.n2:com | |
249 | +47 adj:sg:dat:m1.m2.m3.n1.n2:pos | |
250 | +48 adj:sg:dat:m1.m2.m3.n1.n2:sup | |
251 | +49 adj:sg:gen:f:com | |
252 | +50 adj:sg:gen:f:pos | |
253 | +51 adj:sg:gen:f:sup | |
254 | +52 adj:sg:gen:m1.m2.m3.n1.n2:com | |
255 | +53 adj:sg:gen:m1.m2.m3.n1.n2:pos | |
256 | +54 adj:sg:gen:m1.m2.m3.n1.n2:sup | |
257 | +55 adj:sg:inst:f:com | |
258 | +56 adj:sg:inst:f:pos | |
259 | +57 adj:sg:inst:f:sup | |
260 | +58 adj:sg:inst:m1.m2.m3.n1.n2:com | |
261 | +59 adj:sg:inst:m1.m2.m3.n1.n2:pos | |
262 | +60 adj:sg:inst:m1.m2.m3.n1.n2:sup | |
263 | +61 adj:sg:loc:f:com | |
264 | +62 adj:sg:loc:f:pos | |
265 | +63 adj:sg:loc:f:sup | |
266 | +64 adj:sg:loc:m1.m2.m3.n1.n2:com | |
267 | +65 adj:sg:loc:m1.m2.m3.n1.n2:pos | |
268 | +66 adj:sg:loc:m1.m2.m3.n1.n2:sup | |
269 | +67 adj:sg:nom:f:pos | |
270 | +68 adj:sg:nom:m1.m2.m3:pos | |
271 | +69 adj:sg:nom:n1.n2:pos | |
272 | +70 adj:sg:nom.voc:f:com | |
273 | +71 adj:sg:nom.voc:f:pos | |
274 | +72 adj:sg:nom.voc:f:sup | |
275 | +73 adj:sg:nom.voc:m1.m2.m3:com | |
276 | +74 adj:sg:nom.voc:m1.m2.m3:pos | |
277 | +75 adj:sg:nom.voc:m1.m2.m3:sup | |
278 | +76 adj:sg:nom.voc:n1.n2:com | |
279 | +77 adj:sg:nom.voc:n1.n2:pos | |
280 | +78 adj:sg:nom.voc:n1.n2:sup | |
281 | +# adjectival compounds forming form: | |
282 | +2 adja | |
283 | +# predicative adjective: | |
284 | +3 adjc | |
285 | +# post-prepositional adjective: | |
286 | +4 adjp | |
287 | +# VERBS | |
288 | +# finitive (present/future) flexeme: | |
289 | +153 fin:pl:pri:imperf | |
290 | +154 fin:pl:pri:imperf.perf | |
291 | +155 fin:pl:pri:perf | |
292 | +156 fin:pl:sec:imperf | |
293 | +157 fin:pl:sec:imperf.perf | |
294 | +158 fin:pl:sec:perf | |
295 | +159 fin:pl:ter:imperf | |
296 | +160 fin:pl:ter:imperf.perf | |
297 | +161 fin:pl:ter:perf | |
298 | +162 fin:sg:pri:imperf | |
299 | +163 fin:sg:pri:imperf.perf | |
300 | +164 fin:sg:pri:perf | |
301 | +165 fin:sg:sec:imperf | |
302 | +166 fin:sg:sec:imperf.perf | |
303 | +167 fin:sg:sec:perf | |
304 | +168 fin:sg:ter:imperf | |
305 | +169 fin:sg:ter:imperf.perf | |
306 | +170 fin:sg:ter:perf | |
307 | +# past flexeme: | |
308 | +# praet=split (unsued otherwise): | |
309 | +509 praet:pl:m1.p1:imperf | |
310 | +510 praet:pl:m1.p1:imperf.perf | |
311 | +511 praet:pl:m1.p1:perf | |
312 | +521 praet:pl:m2.m3.f.n1.n2.p2.p3:imperf | |
313 | +522 praet:pl:m2.m3.f.n1.n2.p2.p3:imperf.perf | |
314 | +523 praet:pl:m2.m3.f.n1.n2.p2.p3:perf | |
315 | +533 praet:sg:f:imperf | |
316 | +534 praet:sg:f:imperf.perf | |
317 | +535 praet:sg:f:perf | |
318 | +545 praet:sg:m1.m2.m3:imperf | |
319 | +546 praet:sg:m1.m2.m3:imperf:agl | |
320 | +547 praet:sg:m1.m2.m3:imperf:nagl | |
321 | +548 praet:sg:m1.m2.m3:imperf.perf | |
322 | +549 praet:sg:m1.m2.m3:perf | |
323 | +550 praet:sg:m1.m2.m3:perf:agl | |
324 | +551 praet:sg:m1.m2.m3:perf:nagl | |
325 | +561 praet:sg:n1.n2:imperf | |
326 | +562 praet:sg:n1.n2:imperf.perf | |
327 | +563 praet:sg:n1.n2:perf | |
328 | +# praet=composite (unsued otherwise): | |
329 | +512 praet:pl:m1.p1:pri:imperf | |
330 | +513 praet:pl:m1.p1:pri:imperf.perf | |
331 | +514 praet:pl:m1.p1:pri:perf | |
332 | +515 praet:pl:m1.p1:sec:imperf | |
333 | +516 praet:pl:m1.p1:sec:imperf.perf | |
334 | +517 praet:pl:m1.p1:sec:perf | |
335 | +518 praet:pl:m1.p1:ter:imperf | |
336 | +519 praet:pl:m1.p1:ter:imperf.perf | |
337 | +520 praet:pl:m1.p1:ter:perf | |
338 | +524 praet:pl:m2.m3.f.n1.n2.p2.p3:pri:imperf | |
339 | +525 praet:pl:m2.m3.f.n1.n2.p2.p3:pri:imperf.perf | |
340 | +526 praet:pl:m2.m3.f.n1.n2.p2.p3:pri:perf | |
341 | +527 praet:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf | |
342 | +528 praet:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf.perf | |
343 | +529 praet:pl:m2.m3.f.n1.n2.p2.p3:sec:perf | |
344 | +530 praet:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf | |
345 | +531 praet:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf.perf | |
346 | +532 praet:pl:m2.m3.f.n1.n2.p2.p3:ter:perf | |
347 | +536 praet:sg:f:pri:imperf | |
348 | +537 praet:sg:f:pri:imperf.perf | |
349 | +538 praet:sg:f:pri:perf | |
350 | +539 praet:sg:f:sec:imperf | |
351 | +540 praet:sg:f:sec:imperf.perf | |
352 | +541 praet:sg:f:sec:perf | |
353 | +542 praet:sg:f:ter:imperf | |
354 | +543 praet:sg:f:ter:imperf.perf | |
355 | +544 praet:sg:f:ter:perf | |
356 | +552 praet:sg:m1.m2.m3:pri:imperf | |
357 | +553 praet:sg:m1.m2.m3:pri:imperf.perf | |
358 | +554 praet:sg:m1.m2.m3:pri:perf | |
359 | +555 praet:sg:m1.m2.m3:sec:imperf | |
360 | +556 praet:sg:m1.m2.m3:sec:imperf.perf | |
361 | +557 praet:sg:m1.m2.m3:sec:perf | |
362 | +558 praet:sg:m1.m2.m3:ter:imperf | |
363 | +559 praet:sg:m1.m2.m3:ter:imperf.perf | |
364 | +560 praet:sg:m1.m2.m3:ter:perf | |
365 | +564 praet:sg:n1.n2:pri:imperf | |
366 | +565 praet:sg:n1.n2:pri:imperf.perf | |
367 | +566 praet:sg:n1.n2:pri:perf | |
368 | +567 praet:sg:n1.n2:sec:imperf | |
369 | +568 praet:sg:n1.n2:sec:imperf.perf | |
370 | +569 praet:sg:n1.n2:sec:perf | |
371 | +570 praet:sg:n1.n2:ter:imperf | |
372 | +571 praet:sg:n1.n2:ter:imperf.perf | |
373 | +572 praet:sg:n1.n2:ter:perf | |
374 | +# conditional mood (used only with praet=composite) | |
375 | +100 cond:pl:m1.p1:pri:imperf | |
376 | +101 cond:pl:m1.p1:pri:imperf.perf | |
377 | +102 cond:pl:m1.p1:pri:perf | |
378 | +103 cond:pl:m1.p1:sec:imperf | |
379 | +104 cond:pl:m1.p1:sec:imperf.perf | |
380 | +105 cond:pl:m1.p1:sec:perf | |
381 | +106 cond:pl:m1.p1:ter:imperf | |
382 | +107 cond:pl:m1.p1:ter:imperf.perf | |
383 | +108 cond:pl:m1.p1:ter:perf | |
384 | +109 cond:pl:m2.m3.f.n1.n2.p2.p3:pri:imperf | |
385 | +110 cond:pl:m2.m3.f.n1.n2.p2.p3:pri:imperf.perf | |
386 | +111 cond:pl:m2.m3.f.n1.n2.p2.p3:pri:perf | |
387 | +112 cond:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf | |
388 | +113 cond:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf.perf | |
389 | +114 cond:pl:m2.m3.f.n1.n2.p2.p3:sec:perf | |
390 | +115 cond:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf | |
391 | +116 cond:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf.perf | |
392 | +117 cond:pl:m2.m3.f.n1.n2.p2.p3:ter:perf | |
393 | +118 cond:sg:f:pri:imperf | |
394 | +119 cond:sg:f:pri:imperf.perf | |
395 | +120 cond:sg:f:pri:perf | |
396 | +121 cond:sg:f:sec:imperf | |
397 | +122 cond:sg:f:sec:imperf.perf | |
398 | +123 cond:sg:f:sec:perf | |
399 | +124 cond:sg:f:ter:imperf | |
400 | +125 cond:sg:f:ter:imperf.perf | |
401 | +126 cond:sg:f:ter:perf | |
402 | +127 cond:sg:m1.m2.m3:pri:imperf | |
403 | +128 cond:sg:m1.m2.m3:pri:imperf.perf | |
404 | +129 cond:sg:m1.m2.m3:pri:perf | |
405 | +130 cond:sg:m1.m2.m3:sec:imperf | |
406 | +131 cond:sg:m1.m2.m3:sec:imperf.perf | |
407 | +132 cond:sg:m1.m2.m3:sec:perf | |
408 | +133 cond:sg:m1.m2.m3:ter:imperf | |
409 | +134 cond:sg:m1.m2.m3:ter:imperf.perf | |
410 | +135 cond:sg:m1.m2.m3:ter:perf | |
411 | +136 cond:sg:n1.n2:imperf | |
412 | +137 cond:sg:n1.n2:imperf.perf | |
413 | +138 cond:sg:n1.n2:perf | |
414 | +139 cond:sg:n1.n2:pri:imperf | |
415 | +140 cond:sg:n1.n2:pri:imperf.perf | |
416 | +141 cond:sg:n1.n2:pri:perf | |
417 | +142 cond:sg:n1.n2:sec:imperf | |
418 | +143 cond:sg:n1.n2:sec:imperf.perf | |
419 | +144 cond:sg:n1.n2:sec:perf | |
420 | +145 cond:sg:n1.n2:ter:imperf | |
421 | +146 cond:sg:n1.n2:ter:imperf.perf | |
422 | +147 cond:sg:n1.n2:ter:perf | |
423 | +# impersonal flexeme: | |
424 | +219 imps:imperf | |
425 | +220 imps:imperf.perf | |
426 | +221 imps:perf | |
427 | +# imperative flexeme: | |
428 | +222 impt:pl:pri:imperf | |
429 | +223 impt:pl:pri:imperf.perf | |
430 | +224 impt:pl:pri:perf | |
431 | +225 impt:pl:sec:imperf | |
432 | +226 impt:pl:sec:imperf.perf | |
433 | +227 impt:pl:sec:perf | |
434 | +228 impt:sg:sec:imperf | |
435 | +229 impt:sg:sec:imperf.perf | |
436 | +230 impt:sg:sec:perf | |
437 | +# infinitival flexeme: | |
438 | +231 inf:imperf | |
439 | +232 inf:imperf.perf | |
440 | +233 inf:perf | |
441 | +# agglutinative forms of ‘być’: | |
442 | +83 aglt:pl:pri:imperf:nwok | |
443 | +84 aglt:pl:pri:imperf:wok | |
444 | +85 aglt:pl:sec:imperf:nwok | |
445 | +86 aglt:pl:sec:imperf:wok | |
446 | +87 aglt:sg:pri:imperf:nwok | |
447 | +88 aglt:sg:pri:imperf:wok | |
448 | +89 aglt:sg:sec:imperf:nwok | |
449 | +90 aglt:sg:sec:imperf:wok | |
450 | +# future forms of ‘być’: | |
451 | +91 bedzie:pl:pri:imperf | |
452 | +92 bedzie:pl:sec:imperf | |
453 | +93 bedzie:pl:ter:imperf | |
454 | +94 bedzie:sg:pri:imperf | |
455 | +95 bedzie:sg:sec:imperf | |
456 | +96 bedzie:sg:ter:imperf | |
457 | +# ‘winien’ type verbs: | |
458 | +705 winien:pl:m1.p1:imperf | |
459 | +706 winien:pl:m1.p1:pri:imperf | |
460 | +707 winien:pl:m1.p1:sec:imperf | |
461 | +708 winien:pl:m1.p1:ter:imperf | |
462 | +709 winien:pl:m2.m3.f.n1.n2.p2.p3:imperf | |
463 | +710 winien:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf | |
464 | +711 winien:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf | |
465 | +712 winien:sg:f:imperf | |
466 | +713 winien:sg:f:pri:imperf | |
467 | +714 winien:sg:f:sec:imperf | |
468 | +715 winien:sg:f:ter:imperf | |
469 | +716 winien:sg:m1.m2.m3:imperf | |
470 | +717 winien:sg:m1.m2.m3:pri:imperf | |
471 | +718 winien:sg:m1.m2.m3:sec:imperf | |
472 | +719 winien:sg:m1.m2.m3:ter:imperf | |
473 | +720 winien:sg:n1.n2:imperf | |
474 | +721 winien:sg:n1.n2:pri:imperf | |
475 | +722 winien:sg:n1.n2:sec:imperf | |
476 | +723 winien:sg:n1.n2:ter:imperf | |
477 | +# predicative flexeme: | |
478 | +573 pred | |
479 | +# gerunds | |
480 | +171 ger:pl:dat.loc:n2:imperf:aff | |
481 | +172 ger:pl:dat.loc:n2:imperf:neg | |
482 | +173 ger:pl:dat.loc:n2:imperf.perf:aff | |
483 | +174 ger:pl:dat.loc:n2:imperf.perf:neg | |
484 | +175 ger:pl:dat.loc:n2:perf:aff | |
485 | +176 ger:pl:dat.loc:n2:perf:neg | |
486 | +177 ger:pl:gen:n2:imperf:aff | |
487 | +178 ger:pl:gen:n2:imperf:neg | |
488 | +179 ger:pl:gen:n2:imperf.perf:aff | |
489 | +180 ger:pl:gen:n2:imperf.perf:neg | |
490 | +181 ger:pl:gen:n2:perf:aff | |
491 | +182 ger:pl:gen:n2:perf:neg | |
492 | +183 ger:pl:inst:n2:imperf:aff | |
493 | +184 ger:pl:inst:n2:imperf:neg | |
494 | +185 ger:pl:inst:n2:imperf.perf:aff | |
495 | +186 ger:pl:inst:n2:imperf.perf:neg | |
496 | +187 ger:pl:inst:n2:perf:aff | |
497 | +188 ger:pl:inst:n2:perf:neg | |
498 | +189 ger:pl:nom.acc:n2:imperf:aff | |
499 | +190 ger:pl:nom.acc:n2:imperf:neg | |
500 | +191 ger:pl:nom.acc:n2:imperf.perf:aff | |
501 | +192 ger:pl:nom.acc:n2:imperf.perf:neg | |
502 | +193 ger:pl:nom.acc:n2:perf:aff | |
503 | +194 ger:pl:nom.acc:n2:perf:neg | |
504 | +195 ger:sg:dat.loc:n2:imperf:aff | |
505 | +196 ger:sg:dat.loc:n2:imperf:neg | |
506 | +197 ger:sg:dat.loc:n2:imperf.perf:aff | |
507 | +198 ger:sg:dat.loc:n2:imperf.perf:neg | |
508 | +199 ger:sg:dat.loc:n2:perf:aff | |
509 | +200 ger:sg:dat.loc:n2:perf:neg | |
510 | +201 ger:sg:gen:n2:imperf:aff | |
511 | +202 ger:sg:gen:n2:imperf:neg | |
512 | +203 ger:sg:gen:n2:imperf.perf:aff | |
513 | +204 ger:sg:gen:n2:imperf.perf:neg | |
514 | +205 ger:sg:gen:n2:perf:aff | |
515 | +206 ger:sg:gen:n2:perf:neg | |
516 | +207 ger:sg:inst:n2:imperf:aff | |
517 | +208 ger:sg:inst:n2:imperf:neg | |
518 | +209 ger:sg:inst:n2:imperf.perf:aff | |
519 | +210 ger:sg:inst:n2:imperf.perf:neg | |
520 | +211 ger:sg:inst:n2:perf:aff | |
521 | +212 ger:sg:inst:n2:perf:neg | |
522 | +213 ger:sg:nom.acc:n2:imperf:aff | |
523 | +214 ger:sg:nom.acc:n2:imperf:neg | |
524 | +215 ger:sg:nom.acc:n2:imperf.perf:aff | |
525 | +216 ger:sg:nom.acc:n2:imperf.perf:neg | |
526 | +217 ger:sg:nom.acc:n2:perf:aff | |
527 | +218 ger:sg:nom.acc:n2:perf:neg | |
528 | +# participles | |
529 | +# adverbial participles: | |
530 | +332 pcon:imperf | |
531 | +331 pant:perf | |
532 | +# adjectival active participle: | |
533 | +267 pact:pl:acc:m1.p1:imperf:aff | |
534 | +268 pact:pl:acc:m1.p1:imperf:neg | |
535 | +269 pact:pl:acc:m1.p1:imperf.perf:aff | |
536 | +270 pact:pl:acc:m1.p1:imperf.perf:neg | |
537 | +271 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
538 | +272 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
539 | +273 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
540 | +274 pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
541 | +275 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
542 | +276 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
543 | +277 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
544 | +278 pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
545 | +279 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
546 | +280 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
547 | +281 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
548 | +282 pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
549 | +283 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff | |
550 | +284 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg | |
551 | +285 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff | |
552 | +286 pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg | |
553 | +287 pact:pl:nom.voc:m1.p1:imperf:aff | |
554 | +288 pact:pl:nom.voc:m1.p1:imperf:neg | |
555 | +289 pact:pl:nom.voc:m1.p1:imperf.perf:aff | |
556 | +290 pact:pl:nom.voc:m1.p1:imperf.perf:neg | |
557 | +291 pact:sg:acc.inst:f:imperf:aff | |
558 | +292 pact:sg:acc.inst:f:imperf:neg | |
559 | +293 pact:sg:acc.inst:f:imperf.perf:aff | |
560 | +294 pact:sg:acc.inst:f:imperf.perf:neg | |
561 | +295 pact:sg:acc:m1.m2:imperf:aff | |
562 | +296 pact:sg:acc:m1.m2:imperf:neg | |
563 | +297 pact:sg:acc:m1.m2:imperf.perf:aff | |
564 | +298 pact:sg:acc:m1.m2:imperf.perf:neg | |
565 | +299 pact:sg:acc:m3:imperf:aff | |
566 | +300 pact:sg:acc:m3:imperf:neg | |
567 | +301 pact:sg:acc:m3:imperf.perf:aff | |
568 | +302 pact:sg:acc:m3:imperf.perf:neg | |
569 | +303 pact:sg:dat:m1.m2.m3.n1.n2:imperf:aff | |
570 | +304 pact:sg:dat:m1.m2.m3.n1.n2:imperf:neg | |
571 | +305 pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff | |
572 | +306 pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg | |
573 | +307 pact:sg:gen.dat.loc:f:imperf:aff | |
574 | +308 pact:sg:gen.dat.loc:f:imperf:neg | |
575 | +309 pact:sg:gen.dat.loc:f:imperf.perf:aff | |
576 | +310 pact:sg:gen.dat.loc:f:imperf.perf:neg | |
577 | +311 pact:sg:gen:m1.m2.m3.n1.n2:imperf:aff | |
578 | +312 pact:sg:gen:m1.m2.m3.n1.n2:imperf:neg | |
579 | +313 pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff | |
580 | +314 pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg | |
581 | +315 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff | |
582 | +316 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg | |
583 | +317 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff | |
584 | +318 pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg | |
585 | +319 pact:sg:nom.acc.voc:n1.n2:imperf:aff | |
586 | +320 pact:sg:nom.acc.voc:n1.n2:imperf:neg | |
587 | +321 pact:sg:nom.acc.voc:n1.n2:imperf.perf:aff | |
588 | +322 pact:sg:nom.acc.voc:n1.n2:imperf.perf:neg | |
589 | +323 pact:sg:nom.voc:f:imperf:aff | |
590 | +324 pact:sg:nom.voc:f:imperf:neg | |
591 | +325 pact:sg:nom.voc:f:imperf.perf:aff | |
592 | +326 pact:sg:nom.voc:f:imperf.perf:neg | |
593 | +327 pact:sg:nom.voc:m1.m2.m3:imperf:aff | |
594 | +328 pact:sg:nom.voc:m1.m2.m3:imperf:neg | |
595 | +329 pact:sg:nom.voc:m1.m2.m3:imperf.perf:aff | |
596 | +330 pact:sg:nom.voc:m1.m2.m3:imperf.perf:neg | |
597 | +# adjectival passive participle: | |
598 | +333 ppas:pl:acc:m1.p1:imperf:aff | |
599 | +334 ppas:pl:acc:m1.p1:imperf:neg | |
600 | +335 ppas:pl:acc:m1.p1:imperf.perf:aff | |
601 | +336 ppas:pl:acc:m1.p1:imperf.perf:neg | |
602 | +337 ppas:pl:acc:m1.p1:perf:aff | |
603 | +338 ppas:pl:acc:m1.p1:perf:neg | |
604 | +339 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
605 | +340 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
606 | +341 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
607 | +342 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
608 | +343 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff | |
609 | +344 ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg | |
610 | +345 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
611 | +346 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
612 | +347 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
613 | +348 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
614 | +349 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff | |
615 | +350 ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg | |
616 | +351 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff | |
617 | +352 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg | |
618 | +353 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff | |
619 | +354 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg | |
620 | +355 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff | |
621 | +356 ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg | |
622 | +357 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff | |
623 | +358 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg | |
624 | +359 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff | |
625 | +360 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg | |
626 | +361 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff | |
627 | +362 ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:neg | |
628 | +363 ppas:pl:nom.voc:m1.p1:imperf:aff | |
629 | +364 ppas:pl:nom.voc:m1.p1:imperf:neg | |
630 | +365 ppas:pl:nom.voc:m1.p1:imperf.perf:aff | |
631 | +366 ppas:pl:nom.voc:m1.p1:imperf.perf:neg | |
632 | +367 ppas:pl:nom.voc:m1.p1:perf:aff | |
633 | +368 ppas:pl:nom.voc:m1.p1:perf:neg | |
634 | +369 ppas:sg:acc.inst:f:imperf:aff | |
635 | +370 ppas:sg:acc.inst:f:imperf:neg | |
636 | +371 ppas:sg:acc.inst:f:imperf.perf:aff | |
637 | +372 ppas:sg:acc.inst:f:imperf.perf:neg | |
638 | +373 ppas:sg:acc.inst:f:perf:aff | |
639 | +374 ppas:sg:acc.inst:f:perf:neg | |
640 | +375 ppas:sg:acc:m1.m2:imperf:aff | |
641 | +376 ppas:sg:acc:m1.m2:imperf:neg | |
642 | +377 ppas:sg:acc:m1.m2:imperf.perf:aff | |
643 | +378 ppas:sg:acc:m1.m2:imperf.perf:neg | |
644 | +379 ppas:sg:acc:m1.m2:perf:aff | |
645 | +380 ppas:sg:acc:m1.m2:perf:neg | |
646 | +381 ppas:sg:acc:m3:imperf:aff | |
647 | +382 ppas:sg:acc:m3:imperf:neg | |
648 | +383 ppas:sg:acc:m3:imperf.perf:aff | |
649 | +384 ppas:sg:acc:m3:imperf.perf:neg | |
650 | +385 ppas:sg:acc:m3:perf:aff | |
651 | +386 ppas:sg:acc:m3:perf:neg | |
652 | +387 ppas:sg:dat:m1.m2.m3.n1.n2:imperf:aff | |
653 | +388 ppas:sg:dat:m1.m2.m3.n1.n2:imperf:neg | |
654 | +389 ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff | |
655 | +390 ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg | |
656 | +391 ppas:sg:dat:m1.m2.m3.n1.n2:perf:aff | |
657 | +392 ppas:sg:dat:m1.m2.m3.n1.n2:perf:neg | |
658 | +393 ppas:sg:gen.dat.loc:f:imperf:aff | |
659 | +394 ppas:sg:gen.dat.loc:f:imperf:neg | |
660 | +395 ppas:sg:gen.dat.loc:f:imperf.perf:aff | |
661 | +396 ppas:sg:gen.dat.loc:f:imperf.perf:neg | |
662 | +397 ppas:sg:gen.dat.loc:f:perf:aff | |
663 | +398 ppas:sg:gen.dat.loc:f:perf:neg | |
664 | +399 ppas:sg:gen:m1.m2.m3.n1.n2:imperf:aff | |
665 | +400 ppas:sg:gen:m1.m2.m3.n1.n2:imperf:neg | |
666 | +401 ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff | |
667 | +402 ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg | |
668 | +403 ppas:sg:gen:m1.m2.m3.n1.n2:perf:aff | |
669 | +404 ppas:sg:gen:m1.m2.m3.n1.n2:perf:neg | |
670 | +405 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff | |
671 | +406 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg | |
672 | +407 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff | |
673 | +408 ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg | |
674 | +409 ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:aff | |
675 | +410 ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:neg | |
676 | +411 ppas:sg:nom.acc.voc:n1.n2:imperf:aff | |
677 | +412 ppas:sg:nom.acc.voc:n1.n2:imperf:neg | |
678 | +413 ppas:sg:nom.acc.voc:n1.n2:imperf.perf:aff | |
679 | +414 ppas:sg:nom.acc.voc:n1.n2:imperf.perf:neg | |
680 | +415 ppas:sg:nom.acc.voc:n1.n2:perf:aff | |
681 | +416 ppas:sg:nom.acc.voc:n1.n2:perf:neg | |
682 | +417 ppas:sg:nom.voc:f:imperf:aff | |
683 | +418 ppas:sg:nom.voc:f:imperf:neg | |
684 | +419 ppas:sg:nom.voc:f:imperf.perf:aff | |
685 | +420 ppas:sg:nom.voc:f:imperf.perf:neg | |
686 | +421 ppas:sg:nom.voc:f:perf:aff | |
687 | +422 ppas:sg:nom.voc:f:perf:neg | |
688 | +423 ppas:sg:nom.voc:m1.m2.m3:imperf:aff | |
689 | +424 ppas:sg:nom.voc:m1.m2.m3:imperf:neg | |
690 | +425 ppas:sg:nom.voc:m1.m2.m3:imperf.perf:aff | |
691 | +426 ppas:sg:nom.voc:m1.m2.m3:imperf.perf:neg | |
692 | +427 ppas:sg:nom.voc:m1.m2.m3:perf:aff | |
693 | +428 ppas:sg:nom.voc:m1.m2.m3:perf:neg | |
694 | +# NUMERALS | |
695 | +239 num:pl:acc:m1:rec | |
696 | +240 num:pl:dat.loc:n1.p1.p2:congr.rec | |
697 | +241 num:pl:dat:m1.m2.m3.n2.f:congr | |
698 | +242 num:pl:gen.dat.inst.loc:m1.m2.m3.f.n1.n2.p1.p2:congr | |
699 | +243 num:pl:gen.dat.inst.loc:m1.m2.m3.f.n2:congr | |
700 | +244 num:pl:gen.dat.loc:m1.m2.m3.n2.f:congr | |
701 | +245 num:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2:congr | |
702 | +246 num:pl:gen.loc:m1.m2.m3.n2.f:congr | |
703 | +247 num:pl:gen:n1.p1.p2:rec | |
704 | +248 num:pl:inst:f:congr | |
705 | +249 num:pl:inst:m1.m2.m3.f.n1.n2.p1.p2:congr | |
706 | +250 num:pl:inst:m1.m2.m3.f.n2:congr | |
707 | +251 num:pl:inst:m1.m2.m3.n2:congr | |
708 | +252 num:pl:inst:m1.m2.m3.n2.f:congr | |
709 | +253 num:pl:inst:n1.p1.p2:rec | |
710 | +254 num:pl:nom.acc:m1.m2.m3.f.n1.n2.p1.p2:rec | |
711 | +255 num:pl:nom.acc.voc:f:congr | |
712 | +256 num:pl:nom.acc.voc:m1:rec | |
713 | +257 num:pl:nom.acc.voc:m2.m3.f.n1.n2.p1.p2:rec | |
714 | +258 num:pl:nom.acc.voc:m2.m3.f.n2:rec | |
715 | +259 num:pl:nom.acc.voc:m2.m3.n2:congr | |
716 | +260 num:pl:nom.acc.voc:m2.m3.n2.f:congr | |
717 | +261 num:pl:nom.acc.voc:n1.p1.p2:rec | |
718 | +262 num:pl:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.f.n1.n2.p1.p2:rec | |
719 | +263 num:pl:nom.voc:m1:congr | |
720 | +264 num:pl:nom.voc:m1:rec | |
721 | +265 num:sg:nom.gen.dat.inst.acc.loc.voc:f:rec | |
722 | +266 num:sg:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.n1.n2:rec | |
723 | +# numeral compounds forming form: | |
724 | +238 num:comp | |
725 | +# PREPOSITIONS | |
726 | +578 prep:acc | |
727 | +579 prep:acc:nwok | |
728 | +580 prep:acc:wok | |
729 | +581 prep:dat | |
730 | +582 prep:gen | |
731 | +583 prep:gen:nwok | |
732 | +584 prep:gen:wok | |
733 | +585 prep:inst | |
734 | +586 prep:inst:nwok | |
735 | +587 prep:inst:wok | |
736 | +588 prep:loc | |
737 | +589 prep:loc:nwok | |
738 | +590 prep:loc:wok | |
739 | +591 prep:nom | |
740 | +# ADVERBS | |
741 | +79 adv | |
742 | +80 adv:com | |
743 | +81 adv:pos | |
744 | +82 adv:sup | |
745 | +# OTHER | |
746 | +# kubliki (particles): | |
747 | +592 qub | |
748 | +# conjunctions: | |
749 | +148 conj | |
750 | +# complementizers: | |
751 | +99 comp | |
752 | +# interjections: | |
753 | +234 interj | |
754 | +# burkinostki (bound words): | |
755 | +98 burk | |
756 | +# abbreviations: | |
757 | +97 brev:pun | |
758 | +97 brev:npun | |
759 | +# punctuation: | |
760 | +235 interp | |
761 | +# digits: | |
762 | +151 dig | |
763 | +# Roman digits: | |
764 | +593 romandig | |
765 | +# emoticons: | |
766 | +152 emoticon | |
767 | +# prefixes: | |
768 | +574 prefa | |
769 | +575 prefppas | |
770 | +576 prefs | |
771 | +577 prefv | |
772 | +# (special) | |
773 | +236 naj | |
774 | +237 nie | |
775 | + | |
776 | +[NAMES] | |
777 | +0 | |
778 | +1 astr. | |
779 | +2 budowla | |
780 | +3 członek rodu | |
781 | +4 człon nazwiska | |
782 | +5 człon nazwiska (herb) | |
783 | +6 człon nazwy firmy | |
784 | +7 firma | |
785 | +8 geograficzna | |
786 | +9 imię | |
787 | +10 instytucja | |
788 | +11 język programowania | |
789 | +12 krój pisma | |
790 | +13 marka | |
791 | +14 nazwisko | |
792 | +15 oprogramowanie | |
793 | +16 organizacja | |
794 | +17 patronimicum | |
795 | +18 pospolita | |
796 | +19 przydomek | |
797 | +20 pseudonim | |
798 | +21 sufiks nazwiska | |
799 | +22 środek lokomocji | |
800 | +23 święto | |
801 | +24 tytuł | |
802 | +25 własna | |
... | ... |
morfeusz/CMakeLists.txt
morfeusz/DefaultTagset.cpp deleted
1 | - | |
2 | -#include <string> | |
3 | -#include <vector> | |
4 | -#include "DefaultTagset.hpp" | |
5 | -#include "fsa/const.hpp" | |
6 | -#include "utils.hpp" | |
7 | -#include "deserialization/deserializationUtils.hpp" | |
8 | - | |
9 | -using namespace std; | |
10 | - | |
11 | -namespace morfeusz { | |
12 | - | |
13 | - DefaultTagset::DefaultTagset(const unsigned char* ptr, const CharsetConverter* charsetConverter) | |
14 | - : tags(), | |
15 | - names(), | |
16 | - charsetConverter(charsetConverter) { | |
17 | - uint32_t fsaSize = readInt32Const(ptr + FSA_DATA_SIZE_OFFSET); | |
18 | - const unsigned char* currPtr = ptr + FSA_DATA_OFFSET + fsaSize + 4; | |
19 | - readTags(currPtr, this->tags); | |
20 | - readTags(currPtr, this->names); | |
21 | - setCharsetConverter(charsetConverter); | |
22 | - } | |
23 | - | |
24 | - const string& DefaultTagset::getTag(const int tagNum) const { | |
25 | - return this->tags.at(tagNum); | |
26 | -// if (charsetConverter == &UTF8CharsetConverter::getInstance()) { | |
27 | -// return this->tags.at(tagNum); | |
28 | -// } | |
29 | -// else { | |
30 | -// return charsetConverter->fromUTF8(this->tags.at(tagNum)); | |
31 | -// } | |
32 | - } | |
33 | - | |
34 | - const string& DefaultTagset::getName(const int nameNum) const { | |
35 | - return this->names.at(nameNum); | |
36 | -// if (charsetConverter == &UTF8CharsetConverter::getInstance()) { | |
37 | -// return this->names.at(nameNum); | |
38 | -// } | |
39 | -// else { | |
40 | -// return charsetConverter->fromUTF8(this->names.at(nameNum)); | |
41 | -// } | |
42 | - } | |
43 | - | |
44 | - size_t DefaultTagset::getTagsSize() const { | |
45 | - return this->tags.size(); | |
46 | - } | |
47 | - | |
48 | - size_t DefaultTagset::getNamesSize() const { | |
49 | - return this->names.size(); | |
50 | - } | |
51 | - | |
52 | - // FIXME - probably should not convert whole tagset on every setCharsetConverter method invocation. | |
53 | - void DefaultTagset::setCharsetConverter(const CharsetConverter* charsetConverter) { | |
54 | - | |
55 | - for (unsigned int i = 0; i < tags.size(); i++) { | |
56 | - tags[i] = charsetConverter->fromUTF8( | |
57 | - this->charsetConverter->toUTF8(tags[i])); | |
58 | - } | |
59 | - | |
60 | - for (unsigned int j = 0; j < names.size(); j++) { | |
61 | - names[j] = charsetConverter->fromUTF8( | |
62 | - this->charsetConverter->toUTF8(names[j])); | |
63 | - } | |
64 | - | |
65 | - this->charsetConverter = charsetConverter; | |
66 | - } | |
67 | - | |
68 | -} |
morfeusz/DefaultTagset.hpp deleted
1 | -/* | |
2 | - * File: tagset.hpp | |
3 | - * Author: mlenart | |
4 | - * | |
5 | - * Created on 12 listopad 2013, 14:09 | |
6 | - */ | |
7 | - | |
8 | -#ifndef DEFAULTTAGSET_HPP | |
9 | -#define DEFAULTTAGSET_HPP | |
10 | - | |
11 | -#include <string> | |
12 | -#include <vector> | |
13 | -#include <map> | |
14 | -#include "morfeusz2.h" | |
15 | -#include "charset/CharsetConverter.hpp" | |
16 | - | |
17 | -namespace morfeusz { | |
18 | - | |
19 | - /** | |
20 | - * Represents a tagset | |
21 | - */ | |
22 | - class DefaultTagset : public Tagset<std::string> { | |
23 | - public: | |
24 | - /** | |
25 | - * Constructs a tagset from binary data. | |
26 | - * | |
27 | - * @param fsaData - pointer to the beginning of automaton data. | |
28 | - */ | |
29 | - DefaultTagset(const unsigned char* fsaData, const CharsetConverter* charsetConverter); | |
30 | - | |
31 | - const std::string& getTag(const int tagNum) const; | |
32 | - | |
33 | - const std::string& getName(const int nameNum) const; | |
34 | - | |
35 | - size_t getTagsSize() const; | |
36 | - | |
37 | - size_t getNamesSize() const; | |
38 | - | |
39 | - void setCharsetConverter(const CharsetConverter* charsetConverter); | |
40 | - | |
41 | - private: | |
42 | - std::vector<std::string> tags; | |
43 | - std::vector<std::string> names; | |
44 | - const CharsetConverter* charsetConverter; | |
45 | - }; | |
46 | - | |
47 | -} | |
48 | - | |
49 | -#endif /* DEFAULTTAGSET_HPP */ | |
50 | - |
morfeusz/Environment.cpp
... | ... | @@ -40,7 +40,7 @@ Environment::Environment( |
40 | 40 | : currentCharsetConverter(getCharsetConverter(charset)), |
41 | 41 | caseConverter(), |
42 | 42 | tagset(fsaFileStartPtr, currentCharsetConverter), |
43 | -qualifiers(fsaFileStartPtr), | |
43 | +//qualifiers(fsaFileStartPtr), | |
44 | 44 | fsaFileStartPtr(fsaFileStartPtr), |
45 | 45 | fsa(FSAType::getFSA(fsaFileStartPtr, initializeDeserializer(processorType))), |
46 | 46 | separatorsList(getSeparatorsList(fsaFileStartPtr)), |
... | ... | @@ -94,12 +94,12 @@ const CaseConverter& Environment::getCaseConverter() const { |
94 | 94 | return this->caseConverter; |
95 | 95 | } |
96 | 96 | |
97 | -void Environment::setTagset(DefaultTagset& tagset) { | |
97 | +void Environment::setTagset(IdResolverImpl& tagset) { | |
98 | 98 | this->tagset = tagset; |
99 | 99 | this->tagset.setCharsetConverter(currentCharsetConverter); |
100 | 100 | } |
101 | 101 | |
102 | -const DefaultTagset& Environment::getTagset() const { | |
102 | +const IdResolverImpl& Environment::getTagset() const { | |
103 | 103 | return this->tagset; |
104 | 104 | } |
105 | 105 | |
... | ... | @@ -115,8 +115,8 @@ void Environment::setDictionaryFile(const std::string& filename) { |
115 | 115 | this->segrulesFSAsMap = createSegrulesFSAsMap(this->fsaFileStartPtr); |
116 | 116 | this->currSegrulesFSA = getDefaultSegrulesFSA(this->segrulesFSAsMap, this->fsaFileStartPtr); |
117 | 117 | this->isFromFile = true; |
118 | - this->tagset = DefaultTagset(fsaFileStartPtr, currentCharsetConverter); | |
119 | - this->qualifiers = Qualifiers(fsaFileStartPtr); | |
118 | + this->tagset = IdResolverImpl(fsaFileStartPtr, currentCharsetConverter); | |
119 | +// this->qualifiers = Qualifiers(fsaFileStartPtr); | |
120 | 120 | } |
121 | 121 | |
122 | 122 | const SegrulesFSA& Environment::getCurrentSegrulesFSA() const { |
... | ... | @@ -156,9 +156,9 @@ const CasePatternHelper& Environment::getCasePatternHelper() const { |
156 | 156 | return *this->casePatternHelper; |
157 | 157 | } |
158 | 158 | |
159 | -const Qualifiers& Environment::getQualifiersHelper() const { | |
160 | - return this->qualifiers; | |
161 | -} | |
159 | +//const Qualifiers& Environment::getQualifiersHelper() const { | |
160 | +// return this->qualifiers; | |
161 | +//} | |
162 | 162 | |
163 | 163 | bool Environment::isSeparator(uint32_t codepoint) const { |
164 | 164 | return binary_search( |
... | ... |
morfeusz/Environment.hpp
... | ... | @@ -15,7 +15,7 @@ |
15 | 15 | #include "fsa/fsa.hpp" |
16 | 16 | #include "segrules/segrules.hpp" |
17 | 17 | #include "const.hpp" |
18 | -#include "DefaultTagset.hpp" | |
18 | +#include "IdResolverImpl.hpp" | |
19 | 19 | #include "InterpsGroup.hpp" |
20 | 20 | #include "case/CasePatternHelper.hpp" |
21 | 21 | #include "Qualifiers.hpp" |
... | ... | @@ -82,14 +82,14 @@ public: |
82 | 82 | * |
83 | 83 | * @param tagset |
84 | 84 | */ |
85 | - void setTagset(DefaultTagset& tagset); | |
85 | + void setTagset(IdResolverImpl& tagset); | |
86 | 86 | |
87 | 87 | /** |
88 | 88 | * Gets currently used tagset. |
89 | 89 | * |
90 | 90 | * @return |
91 | 91 | */ |
92 | - const DefaultTagset& getTagset() const; | |
92 | + const IdResolverImpl& getTagset() const; | |
93 | 93 | |
94 | 94 | /** |
95 | 95 | * Sets binary dictionary file used by this environment. |
... | ... | @@ -143,7 +143,7 @@ public: |
143 | 143 | * Return current qualifiers helper. |
144 | 144 | * @return |
145 | 145 | */ |
146 | - const Qualifiers& getQualifiersHelper() const; | |
146 | +// const Qualifiers& getQualifiersHelper() const; | |
147 | 147 | |
148 | 148 | /** |
149 | 149 | * Returns true iff given codepoint denotes a separator char for ign handling. |
... | ... | @@ -156,8 +156,8 @@ public: |
156 | 156 | private: |
157 | 157 | const CharsetConverter* currentCharsetConverter; |
158 | 158 | const CaseConverter caseConverter; |
159 | - DefaultTagset tagset; | |
160 | - Qualifiers qualifiers; | |
159 | + IdResolverImpl tagset; | |
160 | +// Qualifiers qualifiers; | |
161 | 161 | |
162 | 162 | const unsigned char* fsaFileStartPtr; |
163 | 163 | const FSAType* fsa; |
... | ... |
morfeusz/IdResolverImpl.cpp
0 → 100644
1 | + | |
2 | +#include "IdResolverImpl.hpp" | |
3 | +#include "fsa/const.hpp" | |
4 | +#include "utils.hpp" | |
5 | +#include "const.hpp" | |
6 | +#include "deserialization/deserializationUtils.hpp" | |
7 | +#include "morfeusz2.h" | |
8 | + | |
9 | +using namespace std; | |
10 | + | |
11 | +namespace morfeusz { | |
12 | + | |
13 | + inline static void readTags(const unsigned char*& currPtr, std::vector<std::string>& tags) { | |
14 | + tags.clear(); | |
15 | + tags.resize(65536); | |
16 | + uint16_t tagsNum = readInt16(currPtr); | |
17 | + for (unsigned int i = 0; i < tagsNum; i++) { | |
18 | + unsigned int tagNum = readInt16(currPtr); | |
19 | + tags[tagNum] = readString(currPtr); | |
20 | + } | |
21 | + } | |
22 | + | |
23 | + inline static void createReverseMapping(IdResolverImpl::IdStringMapping& mapping) { | |
24 | + mapping.string2Id.clear(); | |
25 | + for (unsigned int i = 0; i < mapping.id2String.size(); i++) { | |
26 | + mapping.string2Id[mapping.id2String[i]] = i; | |
27 | + } | |
28 | + } | |
29 | + | |
30 | + template <class T> | |
31 | + inline static const T& getFromMap(map<string, T> string2T, const string& key, const char* errMsg) { | |
32 | + if (string2T.count(key) != 0) { | |
33 | +// map<string, T>::const_iterator it; | |
34 | +// it = string2T.find(key); | |
35 | + return string2T.find(key)->second; | |
36 | + } | |
37 | + else { | |
38 | + throw MorfeuszException(string(errMsg) + ": " + key); | |
39 | + } | |
40 | + } | |
41 | + | |
42 | + inline static void convertCharset(const CharsetConverter* charsetConverter, IdResolverImpl::IdStringMapping& mapping) { | |
43 | + for (unsigned int i = 0; i < mapping.id2String.size(); i++) { | |
44 | + mapping.id2String[i] = charsetConverter->fromUTF8( | |
45 | + charsetConverter->toUTF8(mapping.id2String[i])); | |
46 | + } | |
47 | + createReverseMapping(mapping); | |
48 | + } | |
49 | + | |
50 | + IdResolverImpl::IdResolverImpl(const unsigned char* ptr, const CharsetConverter* charsetConverter) | |
51 | + : tags(), | |
52 | + names(), | |
53 | + labels(), | |
54 | + labelsAsSets(), | |
55 | + charsetConverter(charsetConverter) { | |
56 | + uint32_t fsaSize = readInt32Const(ptr + FSA_DATA_SIZE_OFFSET); | |
57 | + const unsigned char* currPtr = ptr + FSA_DATA_OFFSET + fsaSize + 4; | |
58 | + | |
59 | + readTags(currPtr, this->tags.id2String); | |
60 | + createReverseMapping(this->tags); | |
61 | + | |
62 | + readTags(currPtr, this->names.id2String); | |
63 | + createReverseMapping(this->names); | |
64 | + | |
65 | + readTags(currPtr, this->labels.id2String); | |
66 | + createReverseMapping(this->labels); | |
67 | + for (unsigned int i = 0; i < this->labels.id2String.size(); i++) { | |
68 | + vector<string> labelsVector = split(this->labels.id2String[i], LABELS_SEPARATOR); | |
69 | + this->labelsAsSets.push_back(set<string>(labelsVector.begin(), labelsVector.end())); | |
70 | + } | |
71 | + | |
72 | + setCharsetConverter(charsetConverter); | |
73 | + } | |
74 | + | |
75 | + // FIXME - probably should not convert whole tagset on every setCharsetConverter method invocation. | |
76 | + | |
77 | + void IdResolverImpl::setCharsetConverter(const CharsetConverter* charsetConverter) { | |
78 | + convertCharset(charsetConverter, this->tags); | |
79 | + convertCharset(charsetConverter, this->names); | |
80 | + convertCharset(charsetConverter, this->labels); | |
81 | +// for (unsigned int i = 0; i < tags.id2String.size(); i++) { | |
82 | +// tags.id2String[i] = charsetConverter->fromUTF8( | |
83 | +// this->charsetConverter->toUTF8(tags[i])); | |
84 | +// } | |
85 | +// | |
86 | +// for (unsigned int j = 0; j < names.id2String.size(); j++) { | |
87 | +// names[j] = charsetConverter->fromUTF8( | |
88 | +// this->charsetConverter->toUTF8(names[j])); | |
89 | +// } | |
90 | + | |
91 | + this->charsetConverter = charsetConverter; | |
92 | + } | |
93 | + | |
94 | + const string& IdResolverImpl::getTag(const int tagId) const { | |
95 | + return this->tags.id2String.at(tagId); | |
96 | + } | |
97 | + | |
98 | + int IdResolverImpl::getTagId(const std::string& tag) const { | |
99 | + return getFromMap(this->tags.string2Id, tag, "Invalid tag"); | |
100 | + } | |
101 | + | |
102 | + const string& IdResolverImpl::getName(const int nameId) const { | |
103 | + return this->names.id2String.at(nameId); | |
104 | + } | |
105 | + | |
106 | + int IdResolverImpl::getNameId(const std::string& name) const { | |
107 | + return getFromMap(this->names.string2Id, name, "Invalid name"); | |
108 | + } | |
109 | + | |
110 | + const string& IdResolverImpl::getLabelsAsString(int labelsId) const { | |
111 | + return this->labels.id2String.at(labelsId); | |
112 | + } | |
113 | + | |
114 | + const set<string>& IdResolverImpl::getLabels(int labelsId) const { | |
115 | + return this->labelsAsSets.at(labelsId); | |
116 | + } | |
117 | + | |
118 | + int IdResolverImpl::getLabelsId(const string& labelsStr) const { | |
119 | + return getFromMap(this->labels.string2Id, labelsStr, "Invalid labels string"); | |
120 | + } | |
121 | + | |
122 | + size_t IdResolverImpl::getTagsCount() const { | |
123 | + return this->tags.id2String.size(); | |
124 | + } | |
125 | + | |
126 | + size_t IdResolverImpl::getNamesCount() const { | |
127 | + return this->names.id2String.size(); | |
128 | + } | |
129 | + | |
130 | + size_t IdResolverImpl::getLabelsCount() const { | |
131 | + return this->labels.id2String.size(); | |
132 | + } | |
133 | +} | |
... | ... |
morfeusz/IdResolverImpl.hpp
0 → 100644
1 | +/* | |
2 | + * File: tagset.hpp | |
3 | + * Author: mlenart | |
4 | + * | |
5 | + * Created on 12 listopad 2013, 14:09 | |
6 | + */ | |
7 | + | |
8 | +#ifndef DEFAULTTAGSET_HPP | |
9 | +#define DEFAULTTAGSET_HPP | |
10 | + | |
11 | +#include <string> | |
12 | +#include <vector> | |
13 | +#include <map> | |
14 | +#include "morfeusz2.h" | |
15 | +#include "charset/CharsetConverter.hpp" | |
16 | + | |
17 | +namespace morfeusz { | |
18 | + | |
19 | + class IdResolverImpl : public IdResolver { | |
20 | + public: | |
21 | + | |
22 | + IdResolverImpl(const unsigned char* ptr, const CharsetConverter* charsetConverter); | |
23 | + | |
24 | + void setCharsetConverter(const CharsetConverter* charsetConverter); | |
25 | + | |
26 | + const std::string& getTag(const int tagId) const; | |
27 | + int getTagId(const std::string& tag) const; | |
28 | + | |
29 | + const std::string& getName(const int nameId) const; | |
30 | + int getNameId(const std::string& name) const; | |
31 | + | |
32 | + const std::string& getLabelsAsString(int labelsId) const; | |
33 | + const std::set<std::string>& getLabels(int labelsId) const; | |
34 | + int getLabelsId(const std::string& labelsStr) const; | |
35 | + | |
36 | + size_t getTagsCount() const; | |
37 | + size_t getNamesCount() const; | |
38 | + size_t getLabelsCount() const; | |
39 | + | |
40 | + virtual ~IdResolverImpl() { | |
41 | + } | |
42 | + | |
43 | + struct IdStringMapping { | |
44 | + std::vector<std::string> id2String; | |
45 | + std::map<std::string, int> string2Id; | |
46 | + }; | |
47 | + | |
48 | + private: | |
49 | + | |
50 | + IdStringMapping tags; | |
51 | + IdStringMapping names; | |
52 | + IdStringMapping labels; | |
53 | + std::vector< std::set<std::string> > labelsAsSets; | |
54 | +// | |
55 | +// std::vector<std::string> tags; | |
56 | +// std::vector<std::string> names; | |
57 | + const CharsetConverter* charsetConverter; | |
58 | + }; | |
59 | + | |
60 | +} | |
61 | + | |
62 | +#endif /* DEFAULTTAGSET_HPP */ | |
63 | + | |
... | ... |
morfeusz/MorfeuszInternal.cpp
... | ... | @@ -196,7 +196,7 @@ namespace morfeusz { |
196 | 196 | vector<MorphInterpretation>& results, |
197 | 197 | bool insideIgnHandler) const { |
198 | 198 | if (handleWhitespacesAtBeginning(env, reader, startNodeNum, results)) { |
199 | - startNodeNum = results.back().getEndNode(); | |
199 | + startNodeNum = results.back().endNode; | |
200 | 200 | } |
201 | 201 | |
202 | 202 | if (reader.isAtEnd()) { |
... | ... | @@ -361,8 +361,7 @@ namespace morfeusz { |
361 | 361 | int startNodeNum, |
362 | 362 | std::vector<MorphInterpretation>& results) const { |
363 | 363 | string orth(reader.readWhitespacesChunk()); |
364 | - MorphInterpretation mi(MorphInterpretation::createWhitespace(startNodeNum, startNodeNum + 1, orth, this->getDefaultAnalyzerTagset())); | |
365 | - results.push_back(mi); | |
364 | + results.push_back(MorphInterpretation::createWhitespace(startNodeNum, startNodeNum + 1, orth)); | |
366 | 365 | } |
367 | 366 | |
368 | 367 | void MorfeuszInternal::handleIgnChunk( |
... | ... | @@ -390,7 +389,7 @@ namespace morfeusz { |
390 | 389 | if (nonSeparatorInputEnd != prevInput) { |
391 | 390 | // there are non-separators + separators |
392 | 391 | |
393 | - int startNode = results.empty() ? startNodeNum : results.back().getEndNode(); | |
392 | + int startNode = results.empty() ? startNodeNum : results.back().endNode; | |
394 | 393 | // process part before separators |
395 | 394 | TextReader newReader1(prevInput, nonSeparatorInputEnd, env); |
396 | 395 | notMatchingCaseSegs = 0; |
... | ... | @@ -400,7 +399,7 @@ namespace morfeusz { |
400 | 399 | if (currInput == chunkBounds.wordEndPtr) { |
401 | 400 | currInput = chunkBounds.chunkEndPtr; |
402 | 401 | } |
403 | - startNode = results.empty() ? startNodeNum : results.back().getEndNode(); | |
402 | + startNode = results.empty() ? startNodeNum : results.back().endNode; | |
404 | 403 | TextReader newReader2(nonSeparatorInputEnd, currInput, env); |
405 | 404 | this->processOneWord(env, newReader2, startNode, results, true); |
406 | 405 | } |
... | ... | @@ -409,7 +408,7 @@ namespace morfeusz { |
409 | 408 | if (currInput == chunkBounds.wordEndPtr) { |
410 | 409 | currInput = chunkBounds.chunkEndPtr; |
411 | 410 | } |
412 | - int startNode = results.empty() ? startNodeNum : results.back().getEndNode(); | |
411 | + int startNode = results.empty() ? startNodeNum : results.back().endNode; | |
413 | 412 | TextReader newReader3(prevInput, currInput, env); |
414 | 413 | notMatchingCaseSegs = 0; |
415 | 414 | this->processOneWord(env, newReader3, startNode, results, true); |
... | ... | @@ -421,7 +420,7 @@ namespace morfeusz { |
421 | 420 | if (!env.isSeparator(codepoint)) { |
422 | 421 | if (separatorFound) { |
423 | 422 | // process part after separators |
424 | - int startNode = results.empty() ? startNodeNum : results.back().getEndNode(); | |
423 | + int startNode = results.empty() ? startNodeNum : results.back().endNode; | |
425 | 424 | TextReader newReader4(prevInput, chunkBounds.chunkEndPtr, env); |
426 | 425 | this->processOneWord(env, newReader4, startNode, results, true); |
427 | 426 | } |
... | ... | @@ -438,16 +437,15 @@ namespace morfeusz { |
438 | 437 | std::vector<MorphInterpretation>& results) const { |
439 | 438 | string orth(chunkBounds.chunkStartPtr, chunkBounds.chunkEndPtr); |
440 | 439 | string lemma(chunkBounds.wordStartPtr, chunkBounds.wordEndPtr); |
441 | - MorphInterpretation interp(MorphInterpretation::createIgn(startNodeNum, startNodeNum + 1, orth, lemma, env.getTagset())); | |
442 | - results.push_back(interp); | |
440 | + results.push_back(MorphInterpretation::createIgn(startNodeNum, startNodeNum + 1, orth, lemma)); | |
443 | 441 | } |
444 | 442 | |
445 | - void MorfeuszInternal::analyzeOneWord( | |
443 | + void MorfeuszInternal::analyseOneWord( | |
446 | 444 | TextReader& reader, |
447 | 445 | vector<MorphInterpretation>& results) const { |
448 | 446 | this->processOneWord(this->analyzerEnv, reader, nextNodeNum, results); |
449 | 447 | if (!results.empty()) { |
450 | - nextNodeNum = results.back().getEndNode(); | |
448 | + nextNodeNum = results.back().endNode; | |
451 | 449 | } |
452 | 450 | } |
453 | 451 | |
... | ... | @@ -457,14 +455,14 @@ namespace morfeusz { |
457 | 455 | } |
458 | 456 | } |
459 | 457 | |
460 | - ResultsIterator* MorfeuszInternal::analyze(const string& text) const { | |
458 | + ResultsIterator* MorfeuszInternal::analyse(const string& text) const { | |
461 | 459 | adjustTokensCounter(); |
462 | 460 | char* textCopy = new char[text.length() + 1]; |
463 | 461 | strcpy(textCopy, text.c_str()); |
464 | 462 | return new ResultsIteratorImpl(*this, textCopy, textCopy + text.length(), true); |
465 | 463 | } |
466 | 464 | |
467 | - ResultsIterator* MorfeuszInternal::analyzeWithCopy(const char* text) const { | |
465 | + ResultsIterator* MorfeuszInternal::analyseWithCopy(const char* text) const { | |
468 | 466 | adjustTokensCounter(); |
469 | 467 | long n = strlen(text); |
470 | 468 | char* textCopy = new char[n + 1]; |
... | ... | @@ -472,16 +470,16 @@ namespace morfeusz { |
472 | 470 | return new ResultsIteratorImpl(*this, textCopy, textCopy + n, true); |
473 | 471 | } |
474 | 472 | |
475 | - ResultsIterator* MorfeuszInternal::analyze(const char* text) const { | |
473 | + ResultsIterator* MorfeuszInternal::analyse(const char* text) const { | |
476 | 474 | adjustTokensCounter(); |
477 | 475 | return new ResultsIteratorImpl(*this, text, text + strlen(text), false); |
478 | 476 | } |
479 | 477 | |
480 | - void MorfeuszInternal::analyze(const string& text, vector<MorphInterpretation>& results) const { | |
478 | + void MorfeuszInternal::analyse(const string& text, vector<MorphInterpretation>& results) const { | |
481 | 479 | adjustTokensCounter(); |
482 | 480 | TextReader reader(text, this->analyzerEnv); |
483 | 481 | while (!reader.isAtEnd()) { |
484 | - analyzeOneWord(reader, results); | |
482 | + analyseOneWord(reader, results); | |
485 | 483 | } |
486 | 484 | } |
487 | 485 | |
... | ... | @@ -496,12 +494,12 @@ namespace morfeusz { |
496 | 494 | } |
497 | 495 | } |
498 | 496 | |
499 | - void MorfeuszInternal::generate(const std::string& lemma, int tagnum, vector<MorphInterpretation>& result) const { | |
497 | + void MorfeuszInternal::generate(const std::string& lemma, int tagId, vector<MorphInterpretation>& result) const { | |
500 | 498 | vector<MorphInterpretation> partRes; |
501 | 499 | this->generate(lemma, partRes); |
502 | 500 | for (unsigned int i = 0; i < partRes.size(); i++) { |
503 | 501 | // XXX - someday it should be improved |
504 | - if (partRes[i].getTagnum() == tagnum) { | |
502 | + if (partRes[i].tagId == tagId) { | |
505 | 503 | result.push_back(partRes[i]); |
506 | 504 | } |
507 | 505 | } |
... | ... | @@ -524,7 +522,6 @@ namespace morfeusz { |
524 | 522 | } |
525 | 523 | |
526 | 524 | void MorfeuszInternal::setCaseHandling(CaseHandling caseHandling) { |
527 | - this->options.caseHandling = caseHandling; | |
528 | 525 | this->analyzerEnv.setCaseSensitive(caseHandling != IGNORE_CASE); |
529 | 526 | } |
530 | 527 | |
... | ... | @@ -541,12 +538,15 @@ namespace morfeusz { |
541 | 538 | this->options.debug = debug; |
542 | 539 | } |
543 | 540 | |
544 | - const Tagset<string>& MorfeuszInternal::getDefaultAnalyzerTagset() const { | |
541 | + const IdResolver& MorfeuszInternal::getDefaultAnalyzerTagset() const { | |
545 | 542 | return this->generatorEnv.getTagset(); |
546 | 543 | } |
547 | 544 | |
548 | - const Tagset<string>& MorfeuszInternal::getDefaultGeneratorTagset() const { | |
545 | + const IdResolver& MorfeuszInternal::getDefaultGeneratorTagset() const { | |
546 | + return this->analyzerEnv.getTagset(); | |
547 | + } | |
548 | + | |
549 | + const IdResolver& MorfeuszInternal::getIdResolver() const { | |
549 | 550 | return this->analyzerEnv.getTagset(); |
550 | 551 | } |
551 | - | |
552 | 552 | } |
... | ... |
morfeusz/MorfeuszInternal.hpp
... | ... | @@ -60,15 +60,15 @@ namespace morfeusz { |
60 | 60 | |
61 | 61 | virtual ~MorfeuszInternal(); |
62 | 62 | |
63 | - ResultsIterator* analyze(const std::string& text) const; | |
63 | + ResultsIterator* analyse(const std::string& text) const; | |
64 | 64 | |
65 | - ResultsIterator* analyze(const char* text) const; | |
65 | + ResultsIterator* analyse(const char* text) const; | |
66 | 66 | |
67 | - void analyze(const std::string& text, std::vector<MorphInterpretation>& result) const; | |
67 | + void analyse(const std::string& text, std::vector<MorphInterpretation>& result) const; | |
68 | 68 | |
69 | 69 | void generate(const std::string& lemma, std::vector<MorphInterpretation>& result) const; |
70 | 70 | |
71 | - void generate(const std::string& lemma, int tagnum, std::vector<MorphInterpretation>& result) const; | |
71 | + void generate(const std::string& lemma, int tagId, std::vector<MorphInterpretation>& result) const; | |
72 | 72 | |
73 | 73 | void setCharset(Charset encoding); |
74 | 74 | |
... | ... | @@ -84,11 +84,13 @@ namespace morfeusz { |
84 | 84 | |
85 | 85 | void setDebug(bool debug); |
86 | 86 | |
87 | - const Tagset<std::string>& getDefaultAnalyzerTagset() const; | |
87 | + const IdResolver& getDefaultAnalyzerTagset() const; | |
88 | 88 | |
89 | - const Tagset<std::string>& getDefaultGeneratorTagset() const; | |
89 | + const IdResolver& getDefaultGeneratorTagset() const; | |
90 | 90 | |
91 | - ResultsIterator* analyzeWithCopy(const char* text) const; | |
91 | + ResultsIterator* analyseWithCopy(const char* text) const; | |
92 | + | |
93 | + const IdResolver& getIdResolver() const; | |
92 | 94 | |
93 | 95 | friend class ResultsIteratorImpl; |
94 | 96 | |
... | ... | @@ -101,7 +103,7 @@ namespace morfeusz { |
101 | 103 | std::vector<MorphInterpretation>& result, |
102 | 104 | bool insideIgnHandler = false) const; |
103 | 105 | |
104 | - void analyzeOneWord( | |
106 | + void analyseOneWord( | |
105 | 107 | TextReader& reader, |
106 | 108 | std::vector<MorphInterpretation>& results) const; |
107 | 109 | |
... | ... |
morfeusz/MorphInterpretation.cpp
... | ... | @@ -15,49 +15,33 @@ using namespace std; |
15 | 15 | |
16 | 16 | namespace morfeusz { |
17 | 17 | |
18 | - /** | |
19 | - * used for ignotium and whitespace tags who don't have any qualifiers. | |
20 | - */ | |
21 | - static vector<string> emptyQualifiers; | |
22 | - | |
23 | - MorphInterpretation::MorphInterpretation( | |
24 | - int startNode, | |
25 | - int endNode, | |
26 | - const string& orth, | |
27 | - const string& lemma, | |
28 | - int tagnum, | |
29 | - int namenum, | |
30 | - const vector<string>* qualifiers, | |
31 | - const Tagset<string>* tagset) | |
32 | - : startNode(startNode), | |
33 | - endNode(endNode), | |
34 | - orth(orth), | |
35 | - lemma(lemma), | |
36 | - tagnum(tagnum), | |
37 | - namenum(namenum), | |
38 | - qualifiers(qualifiers), | |
39 | - tagset(tagset) { | |
40 | - } | |
41 | - | |
42 | - MorphInterpretation::MorphInterpretation() | |
43 | - : startNode(), | |
44 | - endNode(), | |
45 | - orth(), | |
46 | - lemma(), | |
47 | - tagnum(), | |
48 | - namenum(), | |
49 | - qualifiers(&emptyQualifiers), | |
50 | - tagset(NULL) { | |
51 | - | |
52 | - } | |
53 | - | |
54 | - MorphInterpretation MorphInterpretation::createIgn(int startNode, int endNode, const std::string& orth, const std::string& lemma, const Tagset<string>& tagset) { | |
55 | - MorphInterpretation mi(startNode, endNode, orth, lemma, 0, 0, &emptyQualifiers, &tagset); | |
18 | + MorphInterpretation MorphInterpretation::createIgn( | |
19 | + int startNode, int endNode, | |
20 | + const std::string& orth, | |
21 | + const std::string& lemma) { | |
22 | + MorphInterpretation mi; | |
23 | + mi.startNode = startNode; | |
24 | + mi.endNode = endNode; | |
25 | + mi.orth = orth; | |
26 | + mi.lemma = lemma; | |
27 | + mi.tagId = 0; | |
28 | + mi.nameId = 0; | |
29 | + mi.labelsId = 0; | |
56 | 30 | return mi; |
57 | 31 | } |
58 | - | |
59 | - MorphInterpretation MorphInterpretation::createWhitespace(int startNode, int endNode, const std::string& orth, const Tagset<string>& tagset) { | |
60 | - MorphInterpretation mi(startNode, endNode, orth, orth, 1, 0, &emptyQualifiers, &tagset); | |
32 | + | |
33 | + /** | |
34 | + * Creates new instance with "sp" tag (meaning: "this is a sequence of whitespaces") | |
35 | + */ | |
36 | + MorphInterpretation MorphInterpretation::createWhitespace(int startNode, int endNode, const std::string& orth) { | |
37 | + MorphInterpretation mi; | |
38 | + mi.startNode = startNode; | |
39 | + mi.endNode = endNode; | |
40 | + mi.orth = orth; | |
41 | + mi.lemma = orth; | |
42 | + mi.tagId = 1; | |
43 | + mi.nameId = 0; | |
44 | + mi.labelsId = 0; | |
61 | 45 | return mi; |
62 | 46 | } |
63 | 47 | |
... | ... | @@ -77,17 +61,6 @@ namespace morfeusz { |
77 | 61 | && hasEnding(this->lemma, homonymId); |
78 | 62 | } |
79 | 63 | |
80 | - static inline string getQualifiersStr(const MorphInterpretation& mi) { | |
81 | - string res; | |
82 | - for (unsigned int i = 0; i < mi.getQualifiers().size(); i++) { | |
83 | - res += mi.getQualifiers()[i]; | |
84 | - if (i + 1 < mi.getQualifiers().size()) { | |
85 | - res += "|"; | |
86 | - } | |
87 | - } | |
88 | - return res; | |
89 | - } | |
90 | - | |
91 | 64 | std::string MorphInterpretation::toString(bool includeNodeNumbers) const { |
92 | 65 | std::stringstream res; |
93 | 66 | if (includeNodeNumbers) { |
... | ... | @@ -98,13 +71,13 @@ namespace morfeusz { |
98 | 71 | res << lemma; |
99 | 72 | res << ","; |
100 | 73 | |
101 | - res << getTag(); | |
102 | - if (!getName().empty()) { | |
103 | - res << "," << getName(); | |
104 | - } | |
105 | - if (!getQualifiers().empty()) { | |
106 | - res << "," << getQualifiersStr(*this); | |
107 | - } | |
74 | + // res << getTag(); | |
75 | + // if (!getName().empty()) { | |
76 | + // res << "," << getName(); | |
77 | + // } | |
78 | + // if (!getQualifiers().empty()) { | |
79 | + // res << "," << getQualifiersStr(*this); | |
80 | + // } | |
108 | 81 | return res.str(); |
109 | 82 | } |
110 | 83 | |
... | ... |
morfeusz/ResultsIteratorImpl.cpp
... | ... | @@ -52,7 +52,7 @@ namespace morfeusz { |
52 | 52 | assert(bufferIterator == buffer.end()); |
53 | 53 | buffer.resize(0); |
54 | 54 | if (!reader.isAtEnd()) { |
55 | - morfeusz.analyzeOneWord(reader, buffer); | |
55 | + morfeusz.analyseOneWord(reader, buffer); | |
56 | 56 | } |
57 | 57 | bufferIterator = buffer.begin(); |
58 | 58 | return bufferIterator != buffer.end(); |
... | ... |
morfeusz/c_api/ResultsManager.cpp
... | ... | @@ -15,8 +15,8 @@ namespace morfeusz { |
15 | 15 | |
16 | 16 | static const int initialSize = 1024; |
17 | 17 | |
18 | - ResultsManager::ResultsManager() | |
19 | - : results(new InterpMorf[initialSize]()), | |
18 | + ResultsManager::ResultsManager(const Morfeusz* morfeusz) | |
19 | + : morfeusz(morfeusz), results(new InterpMorf[initialSize]()), | |
20 | 20 | resultsArraySize(initialSize) { |
21 | 21 | } |
22 | 22 | |
... | ... | @@ -45,11 +45,11 @@ namespace morfeusz { |
45 | 45 | |
46 | 46 | InterpMorf ResultsManager::convertOneResult(const MorphInterpretation& res) { |
47 | 47 | InterpMorf convertedRes; |
48 | - convertedRes.p = res.getStartNode(); | |
49 | - convertedRes.k = res.getEndNode(); | |
50 | - convertedRes.forma = const_cast<char*>(res.getOrth().c_str()); | |
51 | - convertedRes.haslo = const_cast<char*>(res.getLemma().c_str()); | |
52 | - convertedRes.interp = const_cast<char*>(res.getTag().c_str()); | |
48 | + convertedRes.p = res.startNode; | |
49 | + convertedRes.k = res.endNode; | |
50 | + convertedRes.forma = const_cast<char*>(res.orth.c_str()); | |
51 | + convertedRes.haslo = const_cast<char*>(res.lemma.c_str()); | |
52 | + convertedRes.interp = const_cast<char*>(morfeusz->getIdResolver().getTag(res.tagId).c_str()); | |
53 | 53 | return convertedRes; |
54 | 54 | } |
55 | 55 | } |
... | ... |
morfeusz/c_api/ResultsManager.hpp
... | ... | @@ -16,11 +16,11 @@ namespace morfeusz { |
16 | 16 | |
17 | 17 | class ResultsManager { |
18 | 18 | public: |
19 | - ResultsManager(); | |
19 | + ResultsManager(const Morfeusz* morfeusz); | |
20 | 20 | InterpMorf* convertResults(const std::vector<MorphInterpretation>& res); |
21 | 21 | virtual ~ResultsManager(); |
22 | 22 | private: |
23 | - | |
23 | + const Morfeusz* morfeusz; | |
24 | 24 | InterpMorf* results; |
25 | 25 | unsigned int resultsArraySize; |
26 | 26 | |
... | ... |
morfeusz/cli/outputUtils.hpp
... | ... | @@ -13,14 +13,14 @@ |
13 | 13 | |
14 | 14 | namespace morfeusz { |
15 | 15 | |
16 | -void printMorphResults(const std::vector<MorphInterpretation>& res, bool printNodeNumbers) { | |
16 | +void printMorphResults(const Morfeusz& morfeusz, const std::vector<MorphInterpretation>& res, bool printNodeNumbers) { | |
17 | 17 | printf("["); |
18 | 18 | int prevStart = -1; |
19 | 19 | int prevEnd = -1; |
20 | 20 | for (unsigned int i = 0; i < res.size(); i++) { |
21 | 21 | const MorphInterpretation& mi = res[i]; |
22 | 22 | if (prevStart != -1 |
23 | - && (prevStart != mi.getStartNode() || prevEnd != mi.getEndNode())) { | |
23 | + && (prevStart != mi.startNode || prevEnd != mi.endNode)) { | |
24 | 24 | printf("]\n["); |
25 | 25 | } |
26 | 26 | else if (prevStart != -1) { |
... | ... | @@ -28,24 +28,16 @@ void printMorphResults(const std::vector<MorphInterpretation>& res, bool printNo |
28 | 28 | } |
29 | 29 | // printf("%s", mi.toString(true).c_str()); |
30 | 30 | if (printNodeNumbers) { |
31 | - printf("%d,%d,", mi.getStartNode(), mi.getEndNode()); | |
31 | + printf("%d,%d,", mi.startNode, mi.endNode); | |
32 | 32 | } |
33 | - printf("%s,%s,%s,%s,", | |
34 | - mi.getOrth().c_str(), | |
35 | - mi.getLemma().c_str(), | |
36 | - mi.getTag().c_str(), | |
37 | - mi.getName().empty() ? "_" : mi.getName().c_str()); | |
38 | - if (!mi.getQualifiers().empty()) { | |
39 | - printf("%s", mi.getQualifiers()[0].c_str()); | |
40 | - for (unsigned int i = 1; i < mi.getQualifiers().size(); i++) { | |
41 | - printf("|%s", mi.getQualifiers()[i].c_str()); | |
42 | - } | |
43 | - } | |
44 | - else { | |
45 | - printf("_"); | |
46 | - } | |
47 | - prevStart = mi.getStartNode(); | |
48 | - prevEnd = mi.getEndNode(); | |
33 | + printf("%s,%s,%s,%s,%s", | |
34 | + mi.orth.c_str(), | |
35 | + mi.lemma.c_str(), | |
36 | + morfeusz.getIdResolver().getTag(mi.tagId).c_str(), | |
37 | + mi.nameId == 0 ? "_" : morfeusz.getIdResolver().getName(mi.nameId).c_str(), | |
38 | + mi.labelsId == 0 ? "_" : morfeusz.getIdResolver().getLabelsAsString(mi.labelsId).c_str()); | |
39 | + prevStart = mi.startNode; | |
40 | + prevEnd = mi.endNode; | |
49 | 41 | } |
50 | 42 | printf("]\n"); |
51 | 43 | } |
... | ... |
morfeusz/const.cpp
morfeusz/const.hpp
morfeusz/deserialization/deserializationUtils.hpp
... | ... | @@ -48,16 +48,6 @@ inline std::string readString(const unsigned char*& currPtr) { |
48 | 48 | return res; |
49 | 49 | } |
50 | 50 | |
51 | -inline void readTags(const unsigned char*& currPtr, std::vector<std::string>& tags) { | |
52 | - tags.clear(); | |
53 | - tags.resize(65536); | |
54 | - uint16_t tagsNum = readInt16(currPtr); | |
55 | - for (unsigned int i = 0; i < tagsNum; i++) { | |
56 | - unsigned int tagNum = readInt16(currPtr); | |
57 | - tags[tagNum] = readString(currPtr); | |
58 | - } | |
59 | -} | |
60 | - | |
61 | 51 | } |
62 | 52 | |
63 | 53 | #endif /* DESERIALIZATIONUTILS_HPP */ |
... | ... |
morfeusz/deserialization/morphInterps/EncodedInterpretation.hpp
morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.cpp
... | ... | @@ -112,14 +112,24 @@ void InterpretedChunksDecoder4Analyzer::decodeMorphInterpretation( |
112 | 112 | string lemma(params.lemma4Prefixes); |
113 | 113 | lemma.reserve(lemma.size() + normalizedCodepoints.size()); |
114 | 114 | this->decodeLemma(ei.value, params.chunk.codepointsNum, false, lemma); |
115 | - MorphInterpretation mi( | |
116 | - params.startNode, params.endNode, | |
117 | - params.orth, lemma, | |
118 | - ei.tag, | |
119 | - ei.nameClassifier, | |
120 | - &env.getQualifiersHelper().getQualifiers(ei.qualifiers), | |
121 | - &env.getTagset()); | |
122 | - out.push_back(mi); | |
115 | + size_t newIdx = out.size(); | |
116 | + out.resize(newIdx + 1); | |
117 | + MorphInterpretation& newElem = out[newIdx]; | |
118 | + newElem.startNode = params.startNode; | |
119 | + newElem.endNode = params.endNode; | |
120 | + newElem.orth = params.orth; | |
121 | + newElem.lemma = lemma; | |
122 | + newElem.tagId = ei.tag; | |
123 | + newElem.nameId = ei.nameClassifier; | |
124 | + newElem.labelsId = ei.qualifiers; | |
125 | +// MorphInterpretation mi( | |
126 | +// params.startNode, params.endNode, | |
127 | +// params.orth, lemma, | |
128 | +// ei.tag, | |
129 | +// ei.nameClassifier, | |
130 | +// &env.getQualifiersHelper().getQualifiers(ei.qualifiers), | |
131 | +// &env.getTagset()); | |
132 | +// out.push_back(mi); | |
123 | 133 | } |
124 | 134 | } |
125 | 135 | |
... | ... |
morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Generator.cpp
... | ... | @@ -13,92 +13,102 @@ using namespace std; |
13 | 13 | |
14 | 14 | namespace morfeusz { |
15 | 15 | |
16 | -InterpretedChunksDecoder4Generator::InterpretedChunksDecoder4Generator(const Environment& env) : InterpretedChunksDecoder(env) { | |
17 | -} | |
16 | + InterpretedChunksDecoder4Generator::InterpretedChunksDecoder4Generator(const Environment& env) : InterpretedChunksDecoder(env) { | |
17 | + } | |
18 | 18 | |
19 | -void InterpretedChunksDecoder4Generator::decode( | |
20 | - unsigned int startNode, | |
21 | - unsigned int endNode, | |
22 | - const InterpretedChunk& interpretedChunk, | |
23 | - std::vector<MorphInterpretation>& out) const { | |
24 | - string orthPrefix; | |
25 | - string lemma; | |
26 | -// convertPrefixes(interpretedChunk, orthPrefix, lemma); | |
27 | - // lemma += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints); | |
28 | - lemma.insert(lemma.end(), interpretedChunk.textStartPtr, interpretedChunk.textEndPtr); | |
29 | - const unsigned char* currPtr = getInterpretationsPtr(interpretedChunk.interpsGroupPtr); | |
30 | - while (currPtr < interpretedChunk.interpsEndPtr) { | |
31 | - MorphInterpretation mi = this->decodeMorphInterpretation(startNode, endNode, orthPrefix, lemma, interpretedChunk, currPtr); | |
32 | - // cerr << mi.toString(false) << endl; | |
33 | - // cerr << "required='" << interpretedChunk.requiredHomonymId << "' morphInterp='" << mi.getHomonymId() << "'" << endl; | |
34 | - if (interpretedChunk.requiredHomonymId.empty() || mi.hasHomonym(interpretedChunk.requiredHomonymId)) { | |
35 | - out.push_back(mi); | |
19 | + void InterpretedChunksDecoder4Generator::decode( | |
20 | + unsigned int startNode, | |
21 | + unsigned int endNode, | |
22 | + const InterpretedChunk& interpretedChunk, | |
23 | + std::vector<MorphInterpretation>& out) const { | |
24 | + string orthPrefix; | |
25 | + string lemma; | |
26 | + // convertPrefixes(interpretedChunk, orthPrefix, lemma); | |
27 | + // lemma += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints); | |
28 | + lemma.insert(lemma.end(), interpretedChunk.textStartPtr, interpretedChunk.textEndPtr); | |
29 | + const unsigned char* currPtr = getInterpretationsPtr(interpretedChunk.interpsGroupPtr); | |
30 | + while (currPtr < interpretedChunk.interpsEndPtr) { | |
31 | + MorphInterpretation mi = this->decodeMorphInterpretation(startNode, endNode, orthPrefix, lemma, interpretedChunk, currPtr); | |
32 | + // cerr << mi.toString(false) << endl; | |
33 | + // cerr << "required='" << interpretedChunk.requiredHomonymId << "' morphInterp='" << mi.getHomonymId() << "'" << endl; | |
34 | + if (interpretedChunk.requiredHomonymId.empty() || mi.hasHomonym(interpretedChunk.requiredHomonymId)) { | |
35 | + out.push_back(mi); | |
36 | + } | |
36 | 37 | } |
37 | 38 | } |
38 | -} | |
39 | 39 | |
40 | -//void InterpretedChunksDecoder4Generator::convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orthPrefix, std::string& lemma) const { | |
41 | -// for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) { | |
42 | -// const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i]; | |
43 | -//// lemma.insert(lemma.end(), prefixChunk.textStartPtr, prefixChunk.textEndPtr); | |
44 | -// const unsigned char* ptr = getInterpretationsPtr(interpretedChunk.interpsGroupPtr); | |
45 | -// MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, orthPrefix, string(""), prefixChunk, ptr); | |
46 | -//// orthPrefix += mi.getOrth(); | |
47 | -// } | |
48 | -//} | |
40 | + //void InterpretedChunksDecoder4Generator::convertPrefixes(const InterpretedChunk& interpretedChunk, std::string& orthPrefix, std::string& lemma) const { | |
41 | + // for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) { | |
42 | + // const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i]; | |
43 | + //// lemma.insert(lemma.end(), prefixChunk.textStartPtr, prefixChunk.textEndPtr); | |
44 | + // const unsigned char* ptr = getInterpretationsPtr(interpretedChunk.interpsGroupPtr); | |
45 | + // MorphInterpretation mi = this->decodeMorphInterpretation(0, 0, orthPrefix, string(""), prefixChunk, ptr); | |
46 | + //// orthPrefix += mi.getOrth(); | |
47 | + // } | |
48 | + //} | |
49 | 49 | |
50 | -MorphInterpretation InterpretedChunksDecoder4Generator::decodeMorphInterpretation( | |
51 | - unsigned int startNode, unsigned int endNode, | |
52 | - const string& orthPrefix, | |
53 | - const string& lemma, | |
54 | - const InterpretedChunk& chunk, | |
55 | - const unsigned char*& ptr) const { | |
56 | - string orth = orthPrefix; | |
57 | - EncodedInterpretation ei = this->deserializeInterp(ptr); | |
58 | - codepoints.resize(0); | |
59 | - const char* currPtr = chunk.textStartPtr; | |
60 | - while (currPtr != chunk.textEndPtr) { | |
61 | - uint32_t cp = env.getCharsetConverter().next(currPtr, chunk.textEndPtr); | |
62 | - codepoints.push_back(cp); | |
63 | - } | |
64 | - this->decodeForm(codepoints, ei.value, orth); | |
65 | - MorphInterpretation res( | |
66 | - startNode, endNode, | |
67 | - orth, ei.homonymId.empty() ? lemma : (lemma + HOMONYM_SEPARATOR + ei.homonymId), | |
68 | - // ei.homonymId, | |
69 | - ei.tag, | |
70 | - ei.nameClassifier, | |
71 | - &env.getQualifiersHelper().getQualifiers(ei.qualifiers), | |
72 | - &env.getTagset()); | |
73 | - return res; | |
74 | -} | |
50 | + MorphInterpretation InterpretedChunksDecoder4Generator::decodeMorphInterpretation( | |
51 | + unsigned int startNode, unsigned int endNode, | |
52 | + const string& orthPrefix, | |
53 | + const string& lemma, | |
54 | + const InterpretedChunk& chunk, | |
55 | + const unsigned char*& ptr) const { | |
56 | + string orth = orthPrefix; | |
57 | + EncodedInterpretation ei = this->deserializeInterp(ptr); | |
58 | + codepoints.resize(0); | |
59 | + const char* currPtr = chunk.textStartPtr; | |
60 | + while (currPtr != chunk.textEndPtr) { | |
61 | + uint32_t cp = env.getCharsetConverter().next(currPtr, chunk.textEndPtr); | |
62 | + codepoints.push_back(cp); | |
63 | + } | |
64 | + this->decodeForm(codepoints, ei.value, orth); | |
65 | + | |
66 | + MorphInterpretation res; | |
67 | + res.startNode = startNode; | |
68 | + res.endNode = endNode; | |
69 | + res.orth = orth; | |
70 | + res.lemma = ei.homonymId.empty() ? lemma : (lemma + HOMONYM_SEPARATOR + ei.homonymId); | |
71 | + res.tagId = ei.tag; | |
72 | + res.nameId = ei.nameClassifier; | |
73 | + res.labelsId = ei.qualifiers; | |
75 | 74 | |
76 | -void InterpretedChunksDecoder4Generator::decodeForm( | |
77 | - const vector<uint32_t>& lemma, | |
78 | - const EncodedForm& orth, | |
79 | - string& res) const { | |
80 | - res += orth.prefixToAdd; | |
81 | - for (unsigned int i = 0; i < lemma.size() - orth.suffixToCut; i++) { | |
82 | - env.getCharsetConverter().append(lemma[i], res); | |
75 | + // MorphInterpretation res( | |
76 | + // startNode, endNode, | |
77 | + // orth, ei.homonymId.empty() ? lemma : (lemma + HOMONYM_SEPARATOR + ei.homonymId), | |
78 | + // // ei.homonymId, | |
79 | + // ei.tag, | |
80 | + // ei.nameClassifier, | |
81 | + // &env.getQualifiersHelper().getQualifiers(ei.qualifiers), | |
82 | + // &env.getTagset()); | |
83 | + return res; | |
83 | 84 | } |
84 | - const char* suffixPtr = orth.suffixToAdd.c_str(); | |
85 | - const char* suffixEnd = suffixPtr + orth.suffixToAdd.length(); | |
86 | - while (suffixPtr != suffixEnd) { | |
87 | - uint32_t cp = UTF8CharsetConverter::getInstance().next(suffixPtr, suffixEnd); | |
88 | - env.getCharsetConverter().append(cp, res); | |
85 | + | |
86 | + void InterpretedChunksDecoder4Generator::decodeForm( | |
87 | + const vector<uint32_t>& lemma, | |
88 | + const EncodedForm& orth, | |
89 | + string& res) const { | |
90 | + res += orth.prefixToAdd; | |
91 | + for (unsigned int i = 0; i < lemma.size() - orth.suffixToCut; i++) { | |
92 | + env.getCharsetConverter().append(lemma[i], res); | |
93 | + } | |
94 | + const char* suffixPtr = orth.suffixToAdd.c_str(); | |
95 | + const char* suffixEnd = suffixPtr + orth.suffixToAdd.length(); | |
96 | + while (suffixPtr != suffixEnd) { | |
97 | + uint32_t cp = UTF8CharsetConverter::getInstance().next(suffixPtr, suffixEnd); | |
98 | + env.getCharsetConverter().append(cp, res); | |
99 | + } | |
89 | 100 | } |
90 | -} | |
91 | 101 | |
92 | -EncodedInterpretation InterpretedChunksDecoder4Generator::deserializeInterp(const unsigned char*& ptr) const { | |
93 | - EncodedInterpretation interp; | |
94 | - interp.homonymId = readString(ptr); | |
95 | - interp.value.prefixToAdd = readString(ptr); | |
96 | - interp.value.suffixToCut = readInt8(ptr); | |
97 | - interp.value.suffixToAdd = readString(ptr); | |
98 | - interp.tag = readInt16(ptr); | |
99 | - interp.nameClassifier = readInt8(ptr); | |
100 | - interp.qualifiers = readInt16(ptr); | |
101 | - return interp; | |
102 | -} | |
102 | + EncodedInterpretation InterpretedChunksDecoder4Generator::deserializeInterp(const unsigned char*& ptr) const { | |
103 | + EncodedInterpretation interp; | |
104 | + interp.homonymId = readString(ptr); | |
105 | + interp.value.prefixToAdd = readString(ptr); | |
106 | + interp.value.suffixToCut = readInt8(ptr); | |
107 | + interp.value.suffixToAdd = readString(ptr); | |
108 | + interp.tag = readInt16(ptr); | |
109 | + interp.nameClassifier = readInt8(ptr); | |
110 | + interp.qualifiers = readInt16(ptr); | |
111 | + return interp; | |
112 | + } | |
103 | 113 | |
104 | 114 | } |
... | ... |
morfeusz/fsa/const.cpp
... | ... | @@ -4,7 +4,7 @@ |
4 | 4 | namespace morfeusz { |
5 | 5 | |
6 | 6 | extern const uint32_t MAGIC_NUMBER = 0x8fc2bc1b; |
7 | -extern const uint8_t VERSION_NUM = 18; | |
7 | +extern const uint8_t VERSION_NUM = 19; | |
8 | 8 | |
9 | 9 | extern const unsigned int VERSION_NUM_OFFSET = 4; |
10 | 10 | extern const unsigned int IMPLEMENTATION_NUM_OFFSET = 5; |
... | ... |
morfeusz/morfeusz2.h
... | ... | @@ -11,6 +11,7 @@ |
11 | 11 | #include <vector> |
12 | 12 | #include <string> |
13 | 13 | #include <list> |
14 | +#include <set> | |
14 | 15 | |
15 | 16 | #ifndef __WIN32 |
16 | 17 | #define DLLIMPORT |
... | ... | @@ -28,7 +29,7 @@ namespace morfeusz { |
28 | 29 | class DLLIMPORT MorphInterpretation; |
29 | 30 | class DLLIMPORT Morfeusz; |
30 | 31 | class DLLIMPORT ResultsIterator; |
31 | - template <class T> class DLLIMPORT Tagset; | |
32 | + class DLLIMPORT IdResolver; | |
32 | 33 | class DLLIMPORT MorfeuszException; |
33 | 34 | |
34 | 35 | enum Charset { |
... | ... | @@ -110,20 +111,6 @@ namespace morfeusz { |
110 | 111 | */ |
111 | 112 | static Morfeusz* createInstance(); |
112 | 113 | |
113 | - /** | |
114 | - * Set a file used for morphological analysis. | |
115 | - * | |
116 | - * @param filename | |
117 | - */ | |
118 | - virtual void setAnalyzerDictionary(const std::string& filename) = 0; | |
119 | - | |
120 | - /** | |
121 | - * Set a file used for morphological synthesis. | |
122 | - * | |
123 | - * @param filename | |
124 | - */ | |
125 | - virtual void setGeneratorDictionary(const std::string& filename) = 0; | |
126 | - | |
127 | 114 | virtual ~Morfeusz(); |
128 | 115 | |
129 | 116 | /** |
... | ... | @@ -134,7 +121,7 @@ namespace morfeusz { |
134 | 121 | * @param text - text for morphological analysis. |
135 | 122 | * @return - iterator over morphological analysis results |
136 | 123 | */ |
137 | - virtual ResultsIterator* analyze(const std::string& text) const = 0; | |
124 | + virtual ResultsIterator* analyse(const std::string& text) const = 0; | |
138 | 125 | |
139 | 126 | /** |
140 | 127 | * Analyze given text and return the results as iterator. |
... | ... | @@ -144,7 +131,7 @@ namespace morfeusz { |
144 | 131 | * @param text - text for morphological analysis. This pointer must not be deleted before returned ResultsIterator object. |
145 | 132 | * @return - iterator over morphological analysis results |
146 | 133 | */ |
147 | - virtual ResultsIterator* analyze(const char* text) const = 0; | |
134 | + virtual ResultsIterator* analyse(const char* text) const = 0; | |
148 | 135 | |
149 | 136 | /** |
150 | 137 | * Perform morphological analysis on a given text and put results in a vector. |
... | ... | @@ -152,7 +139,7 @@ namespace morfeusz { |
152 | 139 | * @param text - text to be analyzed |
153 | 140 | * @param result - results vector |
154 | 141 | */ |
155 | - virtual void analyze(const std::string& text, std::vector<MorphInterpretation>& result) const = 0; | |
142 | + virtual void analyse(const std::string& text, std::vector<MorphInterpretation>& result) const = 0; | |
156 | 143 | |
157 | 144 | /** |
158 | 145 | * Perform morphological synthesis on a given lemma and put results in a vector. |
... | ... | @@ -170,7 +157,7 @@ namespace morfeusz { |
170 | 157 | * @param tag - tag of result interpretations |
171 | 158 | * @param result - results vector |
172 | 159 | */ |
173 | - virtual void generate(const std::string& lemma, int tagnum, std::vector<MorphInterpretation>& result) const = 0; | |
160 | + virtual void generate(const std::string& lemma, int tagId, std::vector<MorphInterpretation>& result) const = 0; | |
174 | 161 | |
175 | 162 | /** |
176 | 163 | * Set encoding for input and output string objects. |
... | ... | @@ -220,25 +207,40 @@ namespace morfeusz { |
220 | 207 | * @param debug |
221 | 208 | */ |
222 | 209 | virtual void setDebug(bool debug) = 0; |
223 | - | |
210 | + | |
224 | 211 | /** |
225 | - * Gets default tagset used for morphological analysis. | |
226 | - * @return | |
212 | + * Get reference to tagset currently being in use. | |
213 | + * | |
214 | + * @return currently used tagset | |
227 | 215 | */ |
228 | - virtual const Tagset<std::string>& getDefaultAnalyzerTagset() const = 0; | |
229 | - | |
216 | + virtual const IdResolver& getIdResolver() const = 0; | |
217 | + | |
230 | 218 | /** |
231 | - * Gets default tagset used for morphological synthesis. | |
232 | - * @return | |
219 | + * Set current dictionary to the one with provided name. | |
220 | + * | |
221 | + * This is NOT thread safe (no other thread may invoke setDictionary | |
222 | + * either within this instance, or any other in the same application. | |
223 | + * | |
224 | + * @param dictName dictionary name | |
233 | 225 | */ |
234 | - virtual const Tagset<std::string>& getDefaultGeneratorTagset() const = 0; | |
226 | +// virtual void setDictionary(const std::string& dictName) = 0; | |
227 | + | |
228 | + /** | |
229 | + * List of directories where current Morfeusz instance will look for dictionaries. | |
230 | + */ | |
231 | + std::list<std::string> dictionarySearchPaths; | |
235 | 232 | |
233 | + | |
234 | + virtual void setAnalyzerDictionary(const std::string& filename) = 0; | |
235 | + | |
236 | + virtual void setGeneratorDictionary(const std::string& filename) = 0; | |
237 | + | |
236 | 238 | protected: |
237 | 239 | /** |
238 | 240 | * Same as analyze(text) but copies the text under the hood. |
239 | 241 | * Useful for wrappers to other languages. |
240 | 242 | */ |
241 | - virtual ResultsIterator* analyzeWithCopy(const char* text) const = 0; | |
243 | + virtual ResultsIterator* analyseWithCopy(const char* text) const = 0; | |
242 | 244 | }; |
243 | 245 | |
244 | 246 | class DLLIMPORT ResultsIterator { |
... | ... | @@ -253,8 +255,7 @@ namespace morfeusz { |
253 | 255 | /** |
254 | 256 | * Represents a tagset |
255 | 257 | */ |
256 | - template <class T> | |
257 | - class DLLIMPORT Tagset { | |
258 | + class DLLIMPORT IdResolver { | |
258 | 259 | public: |
259 | 260 | |
260 | 261 | /** |
... | ... | @@ -263,7 +264,15 @@ namespace morfeusz { |
263 | 264 | * @param tagNum - tag index in the tagset. |
264 | 265 | * @return - the tag |
265 | 266 | */ |
266 | - virtual const T& getTag(const int tagNum) const = 0; | |
267 | + virtual const std::string& getTag(const int tagId) const = 0; | |
268 | + | |
269 | + /** | |
270 | + * Returns identifier for given tag. | |
271 | + * Throws MorfeuszException when none exists. | |
272 | + * | |
273 | + * @return identifier for given tag | |
274 | + */ | |
275 | + virtual int getTagId(const std::string& tag) const = 0; | |
267 | 276 | |
268 | 277 | /** |
269 | 278 | * Returns named entity type (denoted by its index). |
... | ... | @@ -271,23 +280,39 @@ namespace morfeusz { |
271 | 280 | * @param nameNum - name index in the tagset. |
272 | 281 | * @return - the named entity type |
273 | 282 | */ |
274 | - virtual const T& getName(const int nameNum) const = 0; | |
283 | + virtual const std::string& getName(const int nameId) const = 0; | |
284 | + | |
285 | + /** | |
286 | + * Returns identifier for given named entity. | |
287 | + * Throws MorfeuszException when none exists. | |
288 | + * | |
289 | + * @return identifier for given named entity | |
290 | + */ | |
291 | + virtual int getNameId(const std::string& name) const = 0; | |
292 | + | |
293 | + virtual const std::string& getLabelsAsString(int labelsId) const = 0; | |
294 | + | |
295 | + virtual const std::set<std::string>& getLabels(int labelsId) const = 0; | |
296 | + | |
297 | + virtual int getLabelsId(const std::string& labelsStr) const = 0; | |
275 | 298 | |
276 | 299 | /** |
277 | 300 | * Returs number of tags this tagset contains. |
278 | 301 | * |
279 | 302 | * @return |
280 | 303 | */ |
281 | - virtual size_t getTagsSize() const = 0; | |
304 | + virtual size_t getTagsCount() const = 0; | |
282 | 305 | |
283 | 306 | /** |
284 | 307 | * Returs number of named entity types this tagset contains. |
285 | 308 | * |
286 | 309 | * @return |
287 | 310 | */ |
288 | - virtual size_t getNamesSize() const = 0; | |
311 | + virtual size_t getNamesCount() const = 0; | |
312 | + | |
313 | + virtual size_t getLabelsCount() const = 0; | |
289 | 314 | |
290 | - virtual ~Tagset() { | |
315 | + virtual ~IdResolver() { | |
291 | 316 | } |
292 | 317 | }; |
293 | 318 | |
... | ... | @@ -311,109 +336,41 @@ namespace morfeusz { |
311 | 336 | The structure below describes one edge of this DAG: |
312 | 337 | |
313 | 338 | */ |
314 | - class DLLIMPORT MorphInterpretation { | |
315 | - public: | |
316 | - | |
317 | - /** | |
318 | - * | |
319 | - * @param startNode - number of start node in DAG. | |
320 | - * @param endNode - number of end node in DAG. | |
321 | - * @param orth - orthographic form | |
322 | - * @param lemma - base form | |
323 | - * @param tagnum - tag identifier (0 for "unrecognized", 1 for "whitespace") | |
324 | - * @param namenum - named entity identifier (0 for "not a named entity") | |
325 | - * @param qualifiers - pointer to vector of qualifiers (not owned by this) | |
326 | - * @param tagset - pointer to default tagset used by Morfeusz (not owned by this) | |
327 | - */ | |
328 | - MorphInterpretation( | |
329 | - int startNode, | |
330 | - int endNode, | |
331 | - const std::string& orth, | |
332 | - const std::string& lemma, | |
333 | - int tagnum, | |
334 | - int namenum, | |
335 | - const std::vector<std::string>* qualifiers, | |
336 | - const Tagset<std::string>* tagset); | |
337 | - | |
338 | - MorphInterpretation(); | |
339 | + struct DLLIMPORT MorphInterpretation { | |
339 | 340 | |
340 | 341 | /** |
341 | 342 | * Creates new instance with "ign" tag (meaning: "not found in the dictionary") |
342 | 343 | */ |
343 | 344 | static MorphInterpretation createIgn( |
344 | 345 | int startNode, int endNode, |
345 | - const std::string& orth, const std::string& lemma, | |
346 | - const Tagset<std::string>& tagset); | |
346 | + const std::string& orth, const std::string& lemma); | |
347 | 347 | |
348 | 348 | /** |
349 | 349 | * Creates new instance with "sp" tag (meaning: "this is a sequence of whitespaces") |
350 | 350 | */ |
351 | - static MorphInterpretation createWhitespace(int startNode, int endNode, const std::string& orth, const Tagset<std::string>& tagset); | |
352 | - | |
353 | - inline int getStartNode() const { | |
354 | - return startNode; | |
355 | - } | |
356 | - | |
357 | - inline int getEndNode() const { | |
358 | - return endNode; | |
359 | - } | |
360 | - | |
361 | - inline const std::string& getOrth() const { | |
362 | - return orth; | |
363 | - } | |
364 | - | |
365 | - inline const std::string& getLemma() const { | |
366 | - return lemma; | |
367 | - } | |
368 | - | |
369 | - inline int getTagnum() const { | |
370 | - return tagnum; | |
371 | - } | |
372 | - | |
373 | - inline int getNamenum() const { | |
374 | - return namenum; | |
375 | - } | |
351 | + static MorphInterpretation createWhitespace(int startNode, int endNode, const std::string& orth); | |
376 | 352 | |
377 | 353 | inline bool isIgn() const { |
378 | - return tagnum == 0; | |
354 | + return tagId == 0; | |
379 | 355 | } |
380 | 356 | |
381 | 357 | inline bool isWhitespace() const { |
382 | - return tagnum == 1; | |
383 | - } | |
384 | - | |
385 | - inline const std::string& getTag() const { | |
386 | - return tagset->getTag(tagnum); | |
387 | - } | |
388 | - | |
389 | - inline const std::string& getName() const { | |
390 | - return tagset->getName(namenum); | |
358 | + return tagId == 1; | |
391 | 359 | } |
392 | - | |
393 | - inline const std::vector<std::string>& getQualifiers() const { | |
394 | - return *qualifiers; | |
395 | - } | |
396 | - | |
360 | + | |
361 | + // FIXME - do wyrzucenia gdzie indziej | |
397 | 362 | bool hasHomonym(const std::string& homonymId) const; |
398 | 363 | |
364 | + // FIXME - do wyrzucenia gdzie indziej | |
399 | 365 | std::string toString(bool includeNodeNumbers) const; |
400 | - private: | |
366 | + | |
401 | 367 | int startNode; |
402 | 368 | int endNode; |
403 | 369 | std::string orth; |
404 | 370 | std::string lemma; |
405 | - int tagnum; | |
406 | - int namenum; | |
407 | - | |
408 | - /** | |
409 | - * not owned by this | |
410 | - */ | |
411 | - const std::vector<std::string>* qualifiers; | |
412 | - | |
413 | - /** | |
414 | - * not owned by this | |
415 | - */ | |
416 | - const Tagset<std::string>* tagset; | |
371 | + int tagId; | |
372 | + int nameId; | |
373 | + int labelsId; | |
417 | 374 | }; |
418 | 375 | |
419 | 376 | class DLLIMPORT MorfeuszException : public std::exception { |
... | ... |
morfeusz/morfeusz2_c.cpp
... | ... | @@ -13,7 +13,7 @@ using namespace morfeusz; |
13 | 13 | |
14 | 14 | static Morfeusz* morfeuszInstance = Morfeusz::createInstance(); |
15 | 15 | static vector<MorphInterpretation> results; |
16 | -static ResultsManager resultsManager; | |
16 | +static ResultsManager resultsManager(morfeuszInstance); | |
17 | 17 | |
18 | 18 | extern "C" DLLIMPORT |
19 | 19 | char* morfeusz_about() { |
... | ... | @@ -23,7 +23,7 @@ char* morfeusz_about() { |
23 | 23 | extern "C" DLLIMPORT |
24 | 24 | InterpMorf* morfeusz_analyse(char *tekst) { |
25 | 25 | results.clear(); |
26 | - morfeuszInstance->analyze(string(tekst), results); | |
26 | + morfeuszInstance->analyse(string(tekst), results); | |
27 | 27 | return resultsManager.convertResults(results); |
28 | 28 | } |
29 | 29 | |
... | ... |
morfeusz/morfeusz_analyzer.cpp
... | ... | @@ -26,8 +26,8 @@ int main(int argc, const char** argv) { |
26 | 26 | while (getline(cin, line)) { |
27 | 27 | // printf("%s\n", line.c_str()); |
28 | 28 | res.clear(); |
29 | - morfeusz->analyze(line, res); | |
30 | - printMorphResults(res, true); | |
29 | + morfeusz->analyse(line, res); | |
30 | + printMorphResults(*morfeusz, res, true); | |
31 | 31 | } |
32 | 32 | delete morfeusz; |
33 | 33 | printf("\n"); |
... | ... |
morfeusz/morfeusz_generator.cpp
morfeusz/test/consoleUtils.hpp deleted
1 | -/* | |
2 | - * File: consoleUtils.hpp | |
3 | - * Author: lennyn | |
4 | - * | |
5 | - * Created on April 4, 2014, 7:36 PM | |
6 | - */ | |
7 | - | |
8 | -#ifndef CONSOLEUTILS_HPP | |
9 | -#define CONSOLEUTILS_HPP | |
10 | - | |
11 | -#include <vector> | |
12 | -#include <string> | |
13 | -#include "morfeusz2.h" | |
14 | - | |
15 | -namespace morfeusz { | |
16 | - | |
17 | -template <class OutputStream> | |
18 | -void appendMorfeuszResults(const std::vector<MorphInterpretation>& res, OutputStream& out) { | |
19 | - int prevStart = -1; | |
20 | - int prevEnd = -1; | |
21 | - out << "["; | |
22 | - for (unsigned int i = 0; i < res.size(); i++) { | |
23 | - const MorphInterpretation& mi = res[i]; | |
24 | - if (prevStart != -1 | |
25 | - && (prevStart != mi.getStartNode() || prevEnd != mi.getEndNode())) { | |
26 | - out << "]\n["; | |
27 | - } | |
28 | - else if (prevStart != -1) { | |
29 | - out << "; "; | |
30 | - } | |
31 | - out << mi.getStartNode() << "," | |
32 | - << mi.getEndNode() << "," | |
33 | - << mi.getOrth() << "," | |
34 | - << mi.getLemma() << "," | |
35 | - << mi.getTag() << "," | |
36 | - << mi.getName(); | |
37 | - prevStart = mi.getStartNode(); | |
38 | - prevEnd = mi.getEndNode(); | |
39 | - } | |
40 | - out << "]\n"; | |
41 | -} | |
42 | - | |
43 | -} | |
44 | - | |
45 | -#endif /* CONSOLEUTILS_HPP */ | |
46 | - |
morfeusz/test/test_recognize_dict.cpp deleted
1 | -/* | |
2 | - * File: test_morph.cpp | |
3 | - * Author: mlenart | |
4 | - * | |
5 | - * Created on November 8, 2013, 4:12 PM | |
6 | - */ | |
7 | - | |
8 | -//#include <cstdlib> | |
9 | -#include <sstream> | |
10 | -#include <iostream> | |
11 | -#include "utils.hpp" | |
12 | -#include "MorfeuszInternal.hpp" | |
13 | -#include "morfeusz2.h" | |
14 | - | |
15 | -using namespace std; | |
16 | -using namespace morfeusz; | |
17 | - | |
18 | -int main(int argc, char** argv) { | |
19 | - validate(argc == 3, "Must provide exactly 2 arguments - input FSA filename and dictionary filename."); | |
20 | - string fsaFilename = argv[1]; | |
21 | - string dictFilename = argv[2]; | |
22 | - MorfeuszInternal morfeusz; | |
23 | - morfeusz.setAnalyzerDictionary(fsaFilename); | |
24 | - ifstream in; | |
25 | - in.open(dictFilename.c_str()); | |
26 | - string line; | |
27 | - while (getline(in, line)) { | |
28 | - cerr << "TEST " << line << endl; | |
29 | - vector<string> splitVector(split(line, '\t')); | |
30 | - string orth = splitVector[0]; | |
31 | - string lemma = splitVector[1]; | |
32 | - string tag = splitVector[2]; | |
33 | - string name = splitVector[3]; | |
34 | - | |
35 | - vector<MorphInterpretation> res; | |
36 | - cerr << "ANALYZE '" << orth << "'" << endl; | |
37 | - morfeusz.analyze(orth, res); | |
38 | - bool found = false; | |
39 | - | |
40 | - for (unsigned int i = 0; i < res.size(); i++) { | |
41 | - MorphInterpretation& mi = res[i]; | |
42 | - DEBUG("FOUND: " + mi.getLemma() + ":" + mi.getTag()); | |
43 | - if (lemma == mi.getLemma() && tag == mi.getTag() && name == mi.getName()) { | |
44 | - DEBUG("RECOGNIZED " + orth + " " + lemma + ":" + tag + ":" + name); | |
45 | - found = true; | |
46 | - } | |
47 | - } | |
48 | - validate(found, "Failed to recognize " + orth + " " + lemma + ":" + tag + ":" + name); | |
49 | - } | |
50 | - return 0; | |
51 | -} |
morfeusz/test/test_result_equals.cpp deleted
1 | -/* | |
2 | - * File: test_result_equals.cpp | |
3 | - * Author: lennyn | |
4 | - * | |
5 | - * Created on December 6, 2013, 12:45 PM | |
6 | - */ | |
7 | - | |
8 | -#include <cstdlib> | |
9 | -#include <cassert> | |
10 | -#include <string> | |
11 | -#include <sstream> | |
12 | -#include <fstream> | |
13 | -#include <iostream> | |
14 | -#include "MorfeuszInternal.hpp" | |
15 | -#include "consoleUtils.hpp" | |
16 | - | |
17 | -using namespace std; | |
18 | -using namespace morfeusz; | |
19 | - | |
20 | -static Charset getEncoding(const string& encodingStr) { | |
21 | - if (encodingStr == "UTF8") | |
22 | - return UTF8; | |
23 | - else if (encodingStr == "ISO8859_2") | |
24 | - return ISO8859_2; | |
25 | - else if (encodingStr == "CP1250") | |
26 | - return CP1250; | |
27 | - else if (encodingStr == "CP852") | |
28 | - return CP852; | |
29 | - else { | |
30 | - cerr << "Invalid encoding: " << encodingStr << " must be one of: UTF8, ISO8859_2, WINDOWS1250" << endl; | |
31 | - throw "Invalid encoding"; | |
32 | - } | |
33 | -} | |
34 | - | |
35 | -int main(int argc, char** argv) { | |
36 | - validate(argc == 3 || argc == 4, "Must provide exactly 2 or 3 arguments - input filename, required output filename, (optional) encoding."); | |
37 | - string inputFilename = argv[1]; | |
38 | - ifstream in; | |
39 | -// in.exceptions(std::ifstream::failbit | std::ifstream::badbit); | |
40 | - cerr << "OPEN " << inputFilename << endl; | |
41 | - in.open(inputFilename.c_str()); | |
42 | - string requiredOutputFilename = argv[2]; | |
43 | - ifstream requiredIn; | |
44 | -// requiredIn.exceptions(std::ifstream::failbit | std::ifstream::badbit); | |
45 | - cerr << "OPEN " << requiredOutputFilename << endl; | |
46 | - requiredIn.open(requiredOutputFilename.c_str()); | |
47 | - // string requiredOutput = readFile<char>(requiredOutputFilename); | |
48 | - cerr << "TEST START" << endl; | |
49 | - MorfeuszInternal morfeusz; | |
50 | - if (argc == 4) { | |
51 | - Charset encoding = getEncoding(argv[3]); | |
52 | - morfeusz.setCharset(encoding); | |
53 | - } | |
54 | - string line; | |
55 | - while (getline(in, line)) { | |
56 | - cerr << "TEST " << line << endl; | |
57 | - vector<MorphInterpretation> res; | |
58 | - morfeusz.analyze(line, res); | |
59 | - stringstream out; | |
60 | - appendMorfeuszResults(res, out); | |
61 | - string gotOutputLine; | |
62 | - string requiredOutputLine; | |
63 | - while (getline(out, gotOutputLine)) { | |
64 | - getline(requiredIn, requiredOutputLine); | |
65 | - cerr << "REQUIRED LINE " << requiredOutputLine << endl; | |
66 | - cerr << "GOT LINE " << gotOutputLine << endl; | |
67 | - cerr << (requiredOutputLine == gotOutputLine) << endl; | |
68 | - validate(gotOutputLine == requiredOutputLine, "lines do not match"); | |
69 | - } | |
70 | - } | |
71 | - return 0; | |
72 | -} | |
73 | - |
morfeusz/tests/TestMorfeusz.cpp
... | ... | @@ -32,10 +32,10 @@ void TestMorfeusz::tearDown() { |
32 | 32 | |
33 | 33 | void TestMorfeusz::testAnalyzeIterate1() { |
34 | 34 | cerr << "testAnalyzeIterate1" << endl; |
35 | - ResultsIterator* it = morfeusz->analyze("AAAAbbbbCCCC"); | |
35 | + ResultsIterator* it = morfeusz->analyse("AAAAbbbbCCCC"); | |
36 | 36 | CPPUNIT_ASSERT(it->hasNext()); |
37 | - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->peek().getOrth()); | |
38 | - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->next().getOrth()); | |
37 | + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->peek().orth); | |
38 | + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->next().orth); | |
39 | 39 | CPPUNIT_ASSERT(!it->hasNext()); |
40 | 40 | CPPUNIT_ASSERT_THROW(it->peek(), MorfeuszException); |
41 | 41 | CPPUNIT_ASSERT_THROW(it->next(), MorfeuszException); |
... | ... | @@ -45,25 +45,25 @@ void TestMorfeusz::testAnalyzeIterate1() { |
45 | 45 | void TestMorfeusz::testAnalyzeIterateWithWhitespaceHandlingKEEP() { |
46 | 46 | cerr << "testAnalyzeIterateWithWhitespaceHandlingKEEP" << endl; |
47 | 47 | morfeusz->setWhitespaceHandling(KEEP_WHITESPACES); |
48 | - ResultsIterator* it = morfeusz->analyze(" AAAAbbbbCCCC DDDDeeee.\t"); | |
48 | + ResultsIterator* it = morfeusz->analyse(" AAAAbbbbCCCC DDDDeeee.\t"); | |
49 | 49 | |
50 | 50 | CPPUNIT_ASSERT(it->hasNext()); |
51 | - CPPUNIT_ASSERT_EQUAL(string(" "), it->next().getOrth()); | |
51 | + CPPUNIT_ASSERT_EQUAL(string(" "), it->next().orth); | |
52 | 52 | |
53 | 53 | CPPUNIT_ASSERT(it->hasNext()); |
54 | - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->next().getOrth()); | |
54 | + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->next().orth); | |
55 | 55 | |
56 | 56 | CPPUNIT_ASSERT(it->hasNext()); |
57 | - CPPUNIT_ASSERT_EQUAL(string(" "), it->next().getOrth()); | |
57 | + CPPUNIT_ASSERT_EQUAL(string(" "), it->next().orth); | |
58 | 58 | |
59 | 59 | CPPUNIT_ASSERT(it->hasNext()); |
60 | - CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), it->next().getOrth()); | |
60 | + CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), it->next().orth); | |
61 | 61 | |
62 | 62 | CPPUNIT_ASSERT(it->hasNext()); |
63 | - CPPUNIT_ASSERT_EQUAL(string("."), it->next().getOrth()); | |
63 | + CPPUNIT_ASSERT_EQUAL(string("."), it->next().orth); | |
64 | 64 | |
65 | 65 | CPPUNIT_ASSERT(it->hasNext()); |
66 | - CPPUNIT_ASSERT_EQUAL(string("\t"), it->next().getOrth()); | |
66 | + CPPUNIT_ASSERT_EQUAL(string("\t"), it->next().orth); | |
67 | 67 | |
68 | 68 | CPPUNIT_ASSERT(!it->hasNext()); |
69 | 69 | CPPUNIT_ASSERT_THROW(it->peek(), MorfeuszException); |
... | ... | @@ -74,16 +74,16 @@ void TestMorfeusz::testAnalyzeIterateWithWhitespaceHandlingKEEP() { |
74 | 74 | void TestMorfeusz::testAnalyzeIterateWithWhitespaceHandlingAPPEND() { |
75 | 75 | cerr << "testAnalyzeIterateWithWhitespaceHandlingAPPEND" << endl; |
76 | 76 | morfeusz->setWhitespaceHandling(APPEND_WHITESPACES); |
77 | - ResultsIterator* it = morfeusz->analyze(" AAAAbbbbCCCC DDDDeeee.\t"); | |
77 | + ResultsIterator* it = morfeusz->analyse(" AAAAbbbbCCCC DDDDeeee.\t"); | |
78 | 78 | |
79 | 79 | CPPUNIT_ASSERT(it->hasNext()); |
80 | - CPPUNIT_ASSERT_EQUAL(string(" AAAAbbbbCCCC "), it->next().getOrth()); | |
80 | + CPPUNIT_ASSERT_EQUAL(string(" AAAAbbbbCCCC "), it->next().orth); | |
81 | 81 | |
82 | 82 | CPPUNIT_ASSERT(it->hasNext()); |
83 | - CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), it->next().getOrth()); | |
83 | + CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), it->next().orth); | |
84 | 84 | |
85 | 85 | CPPUNIT_ASSERT(it->hasNext()); |
86 | - CPPUNIT_ASSERT_EQUAL(string(".\t"), it->next().getOrth()); | |
86 | + CPPUNIT_ASSERT_EQUAL(string(".\t"), it->next().orth); | |
87 | 87 | |
88 | 88 | CPPUNIT_ASSERT(!it->hasNext()); |
89 | 89 | CPPUNIT_ASSERT_THROW(it->peek(), MorfeuszException); |
... | ... | @@ -94,10 +94,10 @@ void TestMorfeusz::testAnalyzeIterateWithWhitespaceHandlingAPPEND() { |
94 | 94 | void TestMorfeusz::testAnalyzeVector1() { |
95 | 95 | cerr << "testAnalyzeVector1" << endl; |
96 | 96 | vector<MorphInterpretation> res; |
97 | - morfeusz->analyze("AAAAbbbbCCCC", res); | |
97 | + morfeusz->analyse("AAAAbbbbCCCC", res); | |
98 | 98 | CPPUNIT_ASSERT_EQUAL((size_t) 1, res.size()); |
99 | - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].getOrth()); | |
100 | - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].getLemma()); | |
99 | + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].orth); | |
100 | + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].lemma); | |
101 | 101 | } |
102 | 102 | |
103 | 103 | static inline string prepareErrorneusTmpFile() { |
... | ... | @@ -135,35 +135,35 @@ void TestMorfeusz::testWhitespaceHandlingKEEP() { |
135 | 135 | cerr << "testWhitespaceHandlingKEEP" << endl; |
136 | 136 | vector<MorphInterpretation> res; |
137 | 137 | morfeusz->setWhitespaceHandling(KEEP_WHITESPACES); |
138 | - morfeusz->analyze(" AAAAbbbbCCCC DDDDeeee\t", res); | |
138 | + morfeusz->analyse(" AAAAbbbbCCCC DDDDeeee\t", res); | |
139 | 139 | CPPUNIT_ASSERT_EQUAL((size_t) 5, res.size()); |
140 | - CPPUNIT_ASSERT_EQUAL(string(" "), res[0].getOrth()); | |
141 | - CPPUNIT_ASSERT_EQUAL(string(" "), res[0].getLemma()); | |
142 | - CPPUNIT_ASSERT_EQUAL(1, res[0].getTagnum()); | |
143 | - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[1].getOrth()); | |
144 | - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[1].getLemma()); | |
145 | - CPPUNIT_ASSERT_EQUAL(0, res[1].getTagnum()); | |
146 | - CPPUNIT_ASSERT_EQUAL(string(" "), res[2].getOrth()); | |
147 | - CPPUNIT_ASSERT_EQUAL(string(" "), res[2].getLemma()); | |
148 | - CPPUNIT_ASSERT_EQUAL(1, res[2].getTagnum()); | |
149 | - CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[3].getOrth()); | |
150 | - CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[3].getLemma()); | |
151 | - CPPUNIT_ASSERT_EQUAL(0, res[3].getTagnum()); | |
152 | - CPPUNIT_ASSERT_EQUAL(string("\t"), res[4].getOrth()); | |
153 | - CPPUNIT_ASSERT_EQUAL(string("\t"), res[4].getLemma()); | |
154 | - CPPUNIT_ASSERT_EQUAL(1, res[4].getTagnum()); | |
140 | + CPPUNIT_ASSERT_EQUAL(string(" "), res[0].orth); | |
141 | + CPPUNIT_ASSERT_EQUAL(string(" "), res[0].lemma); | |
142 | + CPPUNIT_ASSERT_EQUAL(1, res[0].tagId); | |
143 | + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[1].orth); | |
144 | + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[1].lemma); | |
145 | + CPPUNIT_ASSERT_EQUAL(0, res[1].tagId); | |
146 | + CPPUNIT_ASSERT_EQUAL(string(" "), res[2].orth); | |
147 | + CPPUNIT_ASSERT_EQUAL(string(" "), res[2].lemma); | |
148 | + CPPUNIT_ASSERT_EQUAL(1, res[2].tagId); | |
149 | + CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[3].orth); | |
150 | + CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[3].lemma); | |
151 | + CPPUNIT_ASSERT_EQUAL(0, res[3].tagId); | |
152 | + CPPUNIT_ASSERT_EQUAL(string("\t"), res[4].orth); | |
153 | + CPPUNIT_ASSERT_EQUAL(string("\t"), res[4].lemma); | |
154 | + CPPUNIT_ASSERT_EQUAL(1, res[4].tagId); | |
155 | 155 | } |
156 | 156 | |
157 | 157 | void TestMorfeusz::testWhitespaceHandlingAPPEND() { |
158 | 158 | cerr << "testWhitespaceHandlingAPPEND" << endl; |
159 | 159 | vector<MorphInterpretation> res; |
160 | 160 | morfeusz->setWhitespaceHandling(APPEND_WHITESPACES); |
161 | - morfeusz->analyze(" AAAAbbbbCCCC DDDDeeee\t", res); | |
161 | + morfeusz->analyse(" AAAAbbbbCCCC DDDDeeee\t", res); | |
162 | 162 | CPPUNIT_ASSERT_EQUAL((size_t) 2, res.size()); |
163 | - CPPUNIT_ASSERT_EQUAL(string(" AAAAbbbbCCCC "), res[0].getOrth()); | |
164 | - CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].getLemma()); | |
165 | - CPPUNIT_ASSERT_EQUAL(0, res[0].getTagnum()); | |
166 | - CPPUNIT_ASSERT_EQUAL(string("DDDDeeee\t"), res[1].getOrth()); | |
167 | - CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[1].getLemma()); | |
168 | - CPPUNIT_ASSERT_EQUAL(0, res[1].getTagnum()); | |
163 | + CPPUNIT_ASSERT_EQUAL(string(" AAAAbbbbCCCC "), res[0].orth); | |
164 | + CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].lemma); | |
165 | + CPPUNIT_ASSERT_EQUAL(0, res[0].tagId); | |
166 | + CPPUNIT_ASSERT_EQUAL(string("DDDDeeee\t"), res[1].orth); | |
167 | + CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[1].lemma); | |
168 | + CPPUNIT_ASSERT_EQUAL(0, res[1].tagId); | |
169 | 169 | } |
... | ... |
morfeusz/wrappers/java/pl/waw/ipipan/morfeusz/app/App.java
... | ... | @@ -14,9 +14,9 @@ public class App { |
14 | 14 | System.getProperty("user.dir")); |
15 | 15 | System.err.println("java.library.path="+System.getProperty("java.library.path")); |
16 | 16 | Morfeusz morfeusz = Morfeusz.createInstance(); |
17 | - ResultsIterator it = morfeusz.analyzeAsIterator("Ala ma kota i żółć."); | |
17 | + ResultsIterator it = morfeusz.analyseAsIterator("Ala ma kota i żółć."); | |
18 | 18 | while (it.hasNext()) { |
19 | - System.out.println(MorfeuszUtils.getInterpretationString(it.next())); | |
19 | + System.out.println(MorfeuszUtils.getInterpretationString(it.next(), morfeusz)); | |
20 | 20 | } |
21 | 21 | } |
22 | 22 | } |
... | ... |
morfeusz/wrappers/java/pl/waw/ipipan/morfeusz/app/MorfeuszUtils.java
... | ... | @@ -3,6 +3,7 @@ package pl.waw.ipipan.morfeusz.app; |
3 | 3 | import java.io.ByteArrayOutputStream; |
4 | 4 | import java.io.PrintStream; |
5 | 5 | import pl.waw.ipipan.morfeusz.MorphInterpretation; |
6 | +import pl.waw.ipipan.morfeusz.Morfeusz; | |
6 | 7 | |
7 | 8 | /** |
8 | 9 | * |
... | ... | @@ -10,7 +11,7 @@ import pl.waw.ipipan.morfeusz.MorphInterpretation; |
10 | 11 | */ |
11 | 12 | public class MorfeuszUtils { |
12 | 13 | |
13 | - public static String getInterpretationString(MorphInterpretation interp) { | |
14 | + public static String getInterpretationString(MorphInterpretation interp, Morfeusz morfeusz) { | |
14 | 15 | ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
15 | 16 | PrintStream ps = new PrintStream(baos); |
16 | 17 | ps.printf("%d %d %s %s %s %s", |
... | ... | @@ -18,8 +19,9 @@ public class MorfeuszUtils { |
18 | 19 | interp.getEndNode(), |
19 | 20 | interp.getOrth(), |
20 | 21 | interp.getLemma(), |
21 | - interp.getTag(), | |
22 | - interp.getName()); | |
22 | + morfeusz.getIdResolver().getTag(interp.getTagId()), | |
23 | + morfeusz.getIdResolver().getName(interp.getNameId()), | |
24 | + morfeusz.getIdResolver().getLabelsAsString(interp.getLabelsId())); | |
23 | 25 | ps.flush(); |
24 | 26 | return baos.toString(); |
25 | 27 | } |
... | ... |
morfeusz/wrappers/morfeusz.i
... | ... | @@ -13,71 +13,21 @@ |
13 | 13 | #include "morfeusz2.h" |
14 | 14 | #include "MorfeuszInternal.hpp" |
15 | 15 | #include <vector> |
16 | +#include <list> | |
16 | 17 | %} |
17 | 18 | |
18 | 19 | #ifdef SWIGJAVA |
19 | 20 | %include "morfeusz_java.i" |
20 | 21 | #endif |
21 | 22 | |
22 | -%include "std_vector.i" | |
23 | -%include "std_string.i" | |
24 | -%include "std_except.i" | |
25 | -%include "exception.i" | |
26 | -%include "typemaps.i" | |
27 | - | |
28 | -%exception { | |
29 | - try{ | |
30 | - $action | |
31 | - } | |
32 | - catch(const morfeusz::FileFormatException& e) { | |
33 | - SWIG_exception(SWIG_IOError, const_cast<char*>(e.what())); | |
34 | - } | |
35 | - catch(const std::exception& e) { | |
36 | - SWIG_exception(SWIG_RuntimeError, const_cast<char*>(e.what())); | |
37 | - } | |
38 | - catch(...) { | |
39 | - SWIG_exception(SWIG_RuntimeError, "Unknown exception"); | |
40 | - } | |
41 | -} | |
42 | - | |
43 | -namespace morfeusz { | |
44 | - | |
45 | - %ignore MorphInterpretation::MorphInterpretation( | |
46 | - int startNode, | |
47 | - int endNode, | |
48 | - const std::string& orth, | |
49 | - const std::string& lemma, | |
50 | - int tagnum, | |
51 | - int namenum, | |
52 | - const std::vector<std::string>* qualifiers, | |
53 | - const Tagset<std::string>* tagset); | |
54 | - | |
55 | - %ignore MorphInterpretation::createIgn(int startNode, int endNode, const std::string& orth, const Tagset<std::string>& tagset); | |
56 | - %ignore MorphInterpretation::createWhitespace(int startNode, int endNode, const std::string& orth, const Tagset<std::string>& tagset); | |
57 | - %ignore Morfeusz::analyze(const char*) const; | |
58 | - %ignore Morfeusz::analyze(const std::string&) const; | |
59 | - %ignore Morfeusz::setCharset(Charset); | |
60 | -// %ignore Morfeusz::analyze(const std::string&, std::vector<MorphInterpretation>&) const; | |
61 | -// %ignore Morfeusz::generate(const std::string&, std::vector<MorphInterpretation>&) const; | |
62 | -// %ignore Morfeusz::generate(const std::string&, int, std::vector<MorphInterpretation>&) const; | |
63 | - %ignore Morfeusz::setDebug(bool); | |
64 | - | |
65 | - %newobject Morfeusz::createInstance(); | |
66 | - %newobject Morfeusz::analyzeAsIterator(const char*) const; | |
67 | -} | |
68 | - | |
69 | -%extend morfeusz::Morfeusz { | |
70 | - morfeusz::ResultsIterator* morfeusz::Morfeusz::analyzeAsIterator(const char* text) const { | |
71 | - return dynamic_cast<const morfeusz::MorfeuszInternal*>($self)->analyzeWithCopy(text); | |
72 | - } | |
73 | -} | |
74 | - | |
75 | -%template(InterpsList) std::vector<morfeusz::MorphInterpretation>; | |
76 | -%template(StringsList) std::vector<std::string>; | |
77 | - | |
78 | -%include "../morfeusz2.h" | |
79 | - | |
80 | - | |
81 | 23 | #ifdef SWIGPYTHON |
82 | 24 | %include "morfeusz_python.i" |
83 | 25 | #endif |
26 | + | |
27 | +#ifdef SWIGPERL | |
28 | +%include "morfeusz_perl.i" | |
29 | +#endif | |
30 | + | |
31 | +%include "morfeusz_common.i" | |
32 | + | |
33 | +%include "../morfeusz2.h" | |
... | ... |
morfeusz/wrappers/morfeusz_common.i
0 → 100644
1 | + | |
2 | +%exception { | |
3 | + try{ | |
4 | + $action | |
5 | + } | |
6 | + catch(const morfeusz::FileFormatException& e) { | |
7 | + SWIG_exception(SWIG_IOError, const_cast<char*>(e.what())); | |
8 | + } | |
9 | + catch(const std::exception& e) { | |
10 | + SWIG_exception(SWIG_RuntimeError, const_cast<char*>(e.what())); | |
11 | + } | |
12 | + catch(...) { | |
13 | + SWIG_exception(SWIG_RuntimeError, "Unknown exception"); | |
14 | + } | |
15 | +} | |
16 | + | |
17 | +namespace morfeusz { | |
18 | + | |
19 | + %ignore MorphInterpretation::createIgn(int startNode, int endNode, const std::string& orth, const std::string& lemma); | |
20 | + %ignore MorphInterpretation::createWhitespace(int startNode, int endNode, const std::string& orth); | |
21 | + %ignore Morfeusz::analyse(const char*) const; | |
22 | + %ignore Morfeusz::analyse(const std::string&) const; | |
23 | + %ignore Morfeusz::setCharset(Charset); | |
24 | +// %rename(_doGetNext) ResultsIterator::next(); | |
25 | +// %ignore Morfeusz::analyse(const std::string&, std::vector<MorphInterpretation>&) const; | |
26 | +// %ignore Morfeusz::generate(const std::string&, std::vector<MorphInterpretation>&) const; | |
27 | +// %ignore Morfeusz::generate(const std::string&, int, std::vector<MorphInterpretation>&) const; | |
28 | + %ignore Morfeusz::setDebug(bool); | |
29 | + | |
30 | + %newobject Morfeusz::createInstance(); | |
31 | + %newobject Morfeusz::analyseAsIterator(const char*) const; | |
32 | +} | |
33 | + | |
34 | +%extend morfeusz::Morfeusz { | |
35 | + morfeusz::ResultsIterator* morfeusz::Morfeusz::analyseAsIterator(const char* text) const { | |
36 | + return dynamic_cast<const morfeusz::MorfeuszInternal*>($self)->analyseWithCopy(text); | |
37 | + } | |
38 | +} | |
39 | + | |
40 | +%template(InterpsList) std::vector<morfeusz::MorphInterpretation>; | |
41 | +%template(StringsList) std::vector<std::string>; | |
42 | +%template(StringsLinkedList) std::list<std::string>; | |
43 | + | |
44 | +#ifndef SWIGPERL | |
45 | +%template(StringsSet) std::set<std::string>; | |
46 | +#endif | |
... | ... |
morfeusz/wrappers/morfeusz_java.i
... | ... | @@ -3,7 +3,7 @@ |
3 | 3 | |
4 | 4 | %include <stdint.i> |
5 | 5 | %include <std_except.i> |
6 | - | |
6 | +%include <std_common.i> | |
7 | 7 | // make vector compatible with java.util.List interface |
8 | 8 | |
9 | 9 | namespace std { |
... | ... | @@ -42,16 +42,85 @@ namespace std { |
42 | 42 | } |
43 | 43 | } |
44 | 44 | }; |
45 | + | |
46 | + template<class T> class list { | |
47 | + public: | |
48 | + typedef size_t size_type; | |
49 | + typedef T value_type; | |
50 | + typedef const value_type& const_reference; | |
51 | + | |
52 | + %rename(isEmpty) empty; | |
53 | + bool empty() const; | |
54 | + void clear(); | |
55 | + | |
56 | + %extend { | |
57 | + | |
58 | + const_reference get(int32_t i) const throw (std::out_of_range) { | |
59 | + std::list<T>::const_iterator it = $self->begin(); | |
60 | + std::advance(it, i); | |
61 | + return *it; | |
62 | + } | |
63 | + | |
64 | + value_type set(int32_t i, const value_type& VECTOR_VALUE_IN) throw (std::out_of_range) { | |
65 | + std::list<T>::iterator it = $self->begin(); | |
66 | + std::advance(it, i); | |
67 | + std::string old = *it; | |
68 | + *it = VECTOR_VALUE_IN; | |
69 | + return old; | |
70 | + } | |
71 | + | |
72 | + void add(int32_t i, const value_type& VECTOR_VALUE_IN) { | |
73 | + std::list<T>::iterator it = $self->begin(); | |
74 | + std::advance(it, i); | |
75 | + $self->insert(it, VECTOR_VALUE_IN); | |
76 | + } | |
77 | + | |
78 | + value_type remove(int32_t i, const value_type& VECTOR_VALUE_IN) throw (std::out_of_range) { | |
79 | + std::list<T>::iterator it = $self->begin(); | |
80 | + std::advance(it, i); | |
81 | + std::string old = *it; | |
82 | + $self->erase(it); | |
83 | + return old; | |
84 | + } | |
85 | + | |
86 | + int32_t size() const { | |
87 | + return $self->size(); | |
88 | + } | |
89 | + } | |
90 | + }; | |
91 | + | |
92 | + template<class T> class set { | |
93 | + public: | |
94 | + typedef size_t size_type; | |
95 | + typedef T value_type; | |
96 | + typedef const value_type& const_reference; | |
97 | + | |
98 | + %rename(isEmpty) empty; | |
99 | + bool empty() const; | |
100 | + | |
101 | + %extend { | |
102 | + | |
103 | + const_reference get(int32_t i) const throw (std::out_of_range) { | |
104 | + std::set<T>::const_iterator it = $self->begin(); | |
105 | + std::advance(it, i); | |
106 | + return *it; | |
107 | + } | |
108 | + | |
109 | + int32_t size() const { | |
110 | + return $self->size(); | |
111 | + } | |
112 | + } | |
113 | + }; | |
45 | 114 | } |
46 | 115 | |
47 | 116 | %typemap(javaimports) morfeusz::Morfeusz %{ |
48 | 117 | import java.io.IOException; |
49 | 118 | import java.lang.RuntimeException; |
50 | 119 | import java.util.List; |
51 | -import java.util.Collections; | |
120 | +import java.util.ArrayList; | |
52 | 121 | |
53 | 122 | /** |
54 | - * Performs morphological analysis (analyze methods) and syntesis (generate methods). | |
123 | + * Performs morphological analysis (analyse methods) and syntesis (generate methods). | |
55 | 124 | * |
56 | 125 | * It is NOT thread-safe |
57 | 126 | * but it is possible to use separate Morfeusz instance for each concurrent thread. |
... | ... | @@ -59,18 +128,14 @@ import java.util.Collections; |
59 | 128 | %} |
60 | 129 | |
61 | 130 | %typemap(javaimports) morfeusz::ResultsIterator %{ |
62 | -import java.util.Iterator; | |
63 | - | |
64 | 131 | /** |
65 | 132 | * Iterates through morphological analysis and synthesis results. |
66 | 133 | * |
67 | 134 | */ |
68 | 135 | %} |
69 | 136 | |
70 | -%typemap(javaimports) std::vector %{ | |
71 | -import java.util.List; | |
72 | -import java.util.AbstractList; | |
73 | -%} | |
137 | +%rename(_dictionarySearchPaths) morfeusz::Morfeusz::dictionarySearchPaths; | |
138 | +%rename(_getLabels) morfeusz::IdResolver::getLabels; | |
74 | 139 | |
75 | 140 | %javaexception("IOException") morfeusz::Morfeusz::setAnalyzerDictionary { |
76 | 141 | try { |
... | ... | @@ -94,10 +159,12 @@ import java.util.AbstractList; |
94 | 159 | } |
95 | 160 | } |
96 | 161 | |
97 | -%typemap(javainterfaces) morfeusz::ResultsIterator "Iterator<MorphInterpretation>" | |
98 | -%typemap(javabase) std::vector<morfeusz::MorphInterpretation> "AbstractList<MorphInterpretation>" | |
99 | -%typemap(javabase) std::vector<morfeusz::String> "AbstractList<String>" | |
100 | -%typemap(javabase) morfeusz::MorfeuszException "RuntimeException" | |
162 | +%typemap(javainterfaces) morfeusz::ResultsIterator "java.util.Iterator<MorphInterpretation>" | |
163 | +%typemap(javabase) std::vector<morfeusz::MorphInterpretation> "java.util.AbstractList<MorphInterpretation>" | |
164 | +%typemap(javabase) std::vector<std::string> "java.util.AbstractList<java.lang.String>" | |
165 | +%typemap(javabase) std::list<std::string> "java.util.AbstractList<java.lang.String>" | |
166 | +%typemap(javabase) std::set<std::string> "java.util.AbstractList<java.lang.String>" | |
167 | +%typemap(javabase) morfeusz::MorfeuszException "java.lang.RuntimeException" | |
101 | 168 | |
102 | 169 | %typemap(javacode) morfeusz::Morfeusz %{ |
103 | 170 | |
... | ... | @@ -107,10 +174,10 @@ import java.util.AbstractList; |
107 | 174 | * @param text text for morphological analysis. |
108 | 175 | * @return list containing the results of morphological analysis |
109 | 176 | */ |
110 | - public List<MorphInterpretation> analyzeAsList(String text) { | |
177 | + public List<MorphInterpretation> analyseAsList(String text) { | |
111 | 178 | InterpsList res = new InterpsList(); |
112 | - analyze(text, res); | |
113 | - return Collections.unmodifiableList(res); | |
179 | + analyse(text, res); | |
180 | + return new ArrayList<MorphInterpretation>(res); | |
114 | 181 | } |
115 | 182 | |
116 | 183 | /** |
... | ... | @@ -122,21 +189,30 @@ import java.util.AbstractList; |
122 | 189 | public List<MorphInterpretation> generate(String lemma) { |
123 | 190 | InterpsList res = new InterpsList(); |
124 | 191 | generate(lemma, res); |
125 | - return Collections.unmodifiableList(res); | |
192 | + return new ArrayList<MorphInterpretation>(res); | |
126 | 193 | } |
127 | 194 | |
128 | 195 | /** |
129 | 196 | * Perform morphological synthesis on a given lemma. |
130 | 197 | * Limit results to interpretations with the specified tag. |
131 | 198 | * |
132 | - * @param lemma lemma to be analyzed | |
199 | + * @param lemma lemma to be analysed | |
133 | 200 | * @param tagnum tag number of result interpretations |
134 | 201 | * @return list containing results of the morphological synthesis |
135 | 202 | */ |
136 | 203 | public List<MorphInterpretation> generate(String lemma, int tagnum) { |
137 | 204 | InterpsList res = new InterpsList(); |
138 | 205 | generate(lemma, tagnum, res); |
139 | - return Collections.unmodifiableList(res); | |
206 | + return new ArrayList<MorphInterpretation>(res); | |
207 | + } | |
208 | + | |
209 | + /** | |
210 | + * Get list of paths for dictionaries searching | |
211 | + * | |
212 | + * @return modifiable list of paths | |
213 | + */ | |
214 | + public List<String> getDictionarySearchPaths() { | |
215 | + return this.get_dictionarySearchPaths(); | |
140 | 216 | } |
141 | 217 | %} |
142 | 218 | |
... | ... | @@ -150,6 +226,13 @@ import java.util.AbstractList; |
150 | 226 | } |
151 | 227 | %} |
152 | 228 | |
229 | +%typemap(javacode) morfeusz::IdResolver %{ | |
230 | + | |
231 | + public java.util.Collection<java.lang.String> getLabels(int labelsId) { | |
232 | + return _getLabels(labelsId); | |
233 | + } | |
234 | +%} | |
235 | + | |
153 | 236 | %typemap(javafinalize) SWIGTYPE %{ |
154 | 237 | protected void finalize() { |
155 | 238 | if (swigCMemOwn) { |
... | ... | @@ -160,10 +243,16 @@ import java.util.AbstractList; |
160 | 243 | |
161 | 244 | %typemap(javadestruct, methodname="delete", methodmodifiers="private") SWIGTYPE ""; |
162 | 245 | |
163 | -%javamethodmodifiers morfeusz::Morfeusz::analyze(const std::string&, std::vector<MorphInterpretation>&) const "private"; | |
246 | +%javamethodmodifiers morfeusz::Morfeusz::analyse(const std::string&, std::vector<MorphInterpretation>&) const "private"; | |
164 | 247 | %javamethodmodifiers morfeusz::Morfeusz::generate(const std::string&, std::vector<MorphInterpretation>&) const "private"; |
165 | 248 | %javamethodmodifiers morfeusz::Morfeusz::generate(const std::string&, int, std::vector<MorphInterpretation>&) const "private"; |
166 | 249 | |
250 | +// should be overwritten by getDictionarySearchPaths() in typemap(javacode) | |
251 | +%javamethodmodifiers morfeusz::Morfeusz::dictionarySearchPaths "private"; | |
252 | + | |
253 | +// should be overwritten by getLabels() in typemap(javacode) | |
254 | +%javamethodmodifiers morfeusz::IdResolver::getLabels "private"; | |
255 | + | |
167 | 256 | %typemap(javaclassmodifiers) std::vector "class" |
168 | 257 | |
169 | 258 | %include "enums.swg" |
... | ... | @@ -180,3 +269,10 @@ import java.util.AbstractList; |
180 | 269 | %pragma(java) jniclassimports=%{ |
181 | 270 | import java.io.IOException; |
182 | 271 | %} |
272 | + | |
273 | +%include "std_vector.i" | |
274 | +%include "std_string.i" | |
275 | +%include "std_except.i" | |
276 | +%include "exception.i" | |
277 | +%include "typemaps.i" | |
278 | + | |
... | ... |
morfeusz/wrappers/morfeusz_javadoc.i
... | ... | @@ -13,7 +13,7 @@ |
13 | 13 | */ |
14 | 14 | public"; |
15 | 15 | |
16 | -%javamethodmodifiers morfeusz::Morfeusz::analyzeAsIterator(const char*) const " | |
16 | +%javamethodmodifiers morfeusz::Morfeusz::analyseAsIterator(const char*) const " | |
17 | 17 | /** |
18 | 18 | * Analyze given text and return the results as iterator. |
19 | 19 | * It does not store results for whole text at once, so may be less memory-consuming for analysis of big texts. |
... | ... |
morfeusz/wrappers/morfeusz_perl.i
0 → 100644
morfeusz/wrappers/morfeusz_python.i
1 | + | |
1 | 2 | %pythoncode %{ |
2 | 3 | |
3 | -def _analyze(self, text): | |
4 | +def _analyse(self, text): | |
4 | 5 | res = InterpsVector() |
5 | - _morfeusz2.Morfeusz_analyze(self, text.encode('utf8'), res) | |
6 | + _morfeusz2.Morfeusz_analyse(self, text.encode('utf8'), res) | |
6 | 7 | return list(res) |
7 | 8 | |
8 | -Morfeusz.analyze = _analyze | |
9 | +Morfeusz.analyse = _analyse | |
9 | 10 | |
10 | 11 | def _generate(self, text): |
11 | 12 | res = InterpsVector() |
... | ... | @@ -30,4 +31,12 @@ MorphInterpretation.getOrth = _getOrth |
30 | 31 | MorphInterpretation.getLemma = _getLemma |
31 | 32 | MorphInterpretation.getTag = _getTag |
32 | 33 | MorphInterpretation.getName = _getName |
33 | -%} | |
34 | 34 | \ No newline at end of file |
35 | +%} | |
36 | + | |
37 | +%include "std_vector.i" | |
38 | +%include "std_string.i" | |
39 | +%include "std_list.i" | |
40 | +%include "std_set.i" | |
41 | +%include "std_except.i" | |
42 | +%include "exception.i" | |
43 | +%include "typemaps.i" | |
... | ... |
nbproject/configurations.xml
... | ... | @@ -59,17 +59,13 @@ |
59 | 59 | <in>SegrulesFSA.cpp</in> |
60 | 60 | <in>segrules.cpp</in> |
61 | 61 | </df> |
62 | - <df name="test"> | |
63 | - <in>test_recognize_dict.cpp</in> | |
64 | - <in>test_result_equals.cpp</in> | |
65 | - </df> | |
66 | 62 | <df name="tests"> |
67 | 63 | <in>TestCAPI.cpp</in> |
68 | 64 | <in>TestMorfeusz.cpp</in> |
69 | 65 | <in>test_c_api.cpp</in> |
70 | 66 | </df> |
71 | - <in>DefaultTagset.cpp</in> | |
72 | 67 | <in>Environment.cpp</in> |
68 | + <in>IdResolverImpl.cpp</in> | |
73 | 69 | <in>InflexionGraph.cpp</in> |
74 | 70 | <in>Morfeusz.cpp</in> |
75 | 71 | <in>MorfeuszInternal.cpp</in> |
... | ... | @@ -168,8 +164,9 @@ |
168 | 164 | <rebuildPropChanged>false</rebuildPropChanged> |
169 | 165 | </toolsSet> |
170 | 166 | <flagsDictionary> |
171 | - <element flagsID="0" commonFlags="-std=c++98 -O3"/> | |
167 | + <element flagsID="0" commonFlags="-std=c++98"/> | |
172 | 168 | <element flagsID="1" commonFlags="-std=c++98 -O3 -fPIC"/> |
169 | + <element flagsID="2" commonFlags="-std=c++98 -fPIC"/> | |
173 | 170 | </flagsDictionary> |
174 | 171 | <codeAssistance> |
175 | 172 | </codeAssistance> |
... | ... | @@ -190,6 +187,7 @@ |
190 | 187 | <pElem>build/fsa</pElem> |
191 | 188 | </incDir> |
192 | 189 | <preprocessorList> |
190 | + <Elem>NDEBUG</Elem> | |
193 | 191 | <Elem>_OPTIMIZE__=1</Elem> |
194 | 192 | <Elem>__PIC__=2</Elem> |
195 | 193 | <Elem>__pic__=2</Elem> |
... | ... | @@ -210,6 +208,7 @@ |
210 | 208 | <pElem>build/fsa</pElem> |
211 | 209 | </incDir> |
212 | 210 | <preprocessorList> |
211 | + <Elem>NDEBUG</Elem> | |
213 | 212 | <Elem>_OPTIMIZE__=1</Elem> |
214 | 213 | <Elem>__PIC__=2</Elem> |
215 | 214 | <Elem>__pic__=2</Elem> |
... | ... | @@ -222,11 +221,12 @@ |
222 | 221 | </ccTool> |
223 | 222 | </item> |
224 | 223 | <item path="build/morfeusz/default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
225 | - <ccTool flags="1"> | |
224 | + <ccTool flags="2"> | |
226 | 225 | <incDir> |
227 | 226 | <pElem>morfeusz</pElem> |
228 | 227 | </incDir> |
229 | 228 | <preprocessorList> |
229 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
230 | 230 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
231 | 231 | </preprocessorList> |
232 | 232 | </ccTool> |
... | ... | @@ -235,11 +235,12 @@ |
235 | 235 | ex="false" |
236 | 236 | tool="1" |
237 | 237 | flavor2="4"> |
238 | - <ccTool flags="1"> | |
238 | + <ccTool flags="2"> | |
239 | 239 | <incDir> |
240 | 240 | <pElem>morfeusz</pElem> |
241 | 241 | </incDir> |
242 | 242 | <preprocessorList> |
243 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
243 | 244 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
244 | 245 | </preprocessorList> |
245 | 246 | </ccTool> |
... | ... | @@ -260,6 +261,7 @@ |
260 | 261 | <pElem>build/morfeusz/java</pElem> |
261 | 262 | </incDir> |
262 | 263 | <preprocessorList> |
264 | + <Elem>NDEBUG</Elem> | |
263 | 265 | <Elem>_OPTIMIZE__=1</Elem> |
264 | 266 | <Elem>__PIC__=2</Elem> |
265 | 267 | <Elem>__pic__=2</Elem> |
... | ... | @@ -286,6 +288,7 @@ |
286 | 288 | <pElem>build/morfeusz/perl</pElem> |
287 | 289 | </incDir> |
288 | 290 | <preprocessorList> |
291 | + <Elem>NDEBUG</Elem> | |
289 | 292 | <Elem>_OPTIMIZE__=1</Elem> |
290 | 293 | <Elem>morfeusz_perl_EXPORTS</Elem> |
291 | 294 | </preprocessorList> |
... | ... | @@ -308,6 +311,7 @@ |
308 | 311 | <pElem>build/morfeusz/python</pElem> |
309 | 312 | </incDir> |
310 | 313 | <preprocessorList> |
314 | + <Elem>NDEBUG</Elem> | |
311 | 315 | <Elem>_OPTIMIZE__=1</Elem> |
312 | 316 | <Elem>__PIC__=2</Elem> |
313 | 317 | <Elem>__pic__=2</Elem> |
... | ... | @@ -329,12 +333,14 @@ |
329 | 333 | ex="false" |
330 | 334 | tool="1" |
331 | 335 | flavor2="4"> |
336 | + <ccTool flags="2"> | |
337 | + </ccTool> | |
332 | 338 | </item> |
333 | 339 | <item path="build/morfeusz/wrappers/morfeuszPERL_wrap.cxx" |
334 | 340 | ex="false" |
335 | 341 | tool="1" |
336 | 342 | flavor2="4"> |
337 | - <ccTool flags="1"> | |
343 | + <ccTool flags="2"> | |
338 | 344 | <incDir> |
339 | 345 | <pElem>/usr/lib/perl/5.14/CORE</pElem> |
340 | 346 | <pElem>build/morfeusz/wrappers/perl</pElem> |
... | ... | @@ -356,6 +362,7 @@ |
356 | 362 | <pElem>morfeusz/build/morfeusz</pElem> |
357 | 363 | </incDir> |
358 | 364 | <preprocessorList> |
365 | + <Elem>NDEBUG</Elem> | |
359 | 366 | <Elem>_OPTIMIZE__=1</Elem> |
360 | 367 | </preprocessorList> |
361 | 368 | <undefinedList> |
... | ... | @@ -373,6 +380,7 @@ |
373 | 380 | <pElem>morfeusz/build/morfeusz</pElem> |
374 | 381 | </incDir> |
375 | 382 | <preprocessorList> |
383 | + <Elem>NDEBUG</Elem> | |
376 | 384 | <Elem>_OPTIMIZE__=1</Elem> |
377 | 385 | </preprocessorList> |
378 | 386 | <undefinedList> |
... | ... | @@ -387,8 +395,8 @@ |
387 | 395 | <pElem>build/morfeusz</pElem> |
388 | 396 | </incDir> |
389 | 397 | <preprocessorList> |
398 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
390 | 399 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
391 | - <Elem>NDEBUG</Elem> | |
392 | 400 | <Elem>libmorfeusz_EXPORTS</Elem> |
393 | 401 | </preprocessorList> |
394 | 402 | </ccTool> |
... | ... | @@ -400,8 +408,8 @@ |
400 | 408 | <pElem>build/morfeusz</pElem> |
401 | 409 | </incDir> |
402 | 410 | <preprocessorList> |
411 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
403 | 412 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
404 | - <Elem>NDEBUG</Elem> | |
405 | 413 | <Elem>libmorfeusz_EXPORTS</Elem> |
406 | 414 | </preprocessorList> |
407 | 415 | </ccTool> |
... | ... | @@ -413,8 +421,8 @@ |
413 | 421 | <pElem>build/morfeusz</pElem> |
414 | 422 | </incDir> |
415 | 423 | <preprocessorList> |
424 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
416 | 425 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
417 | - <Elem>NDEBUG</Elem> | |
418 | 426 | <Elem>libmorfeusz_EXPORTS</Elem> |
419 | 427 | </preprocessorList> |
420 | 428 | </ccTool> |
... | ... | @@ -426,8 +434,8 @@ |
426 | 434 | <pElem>build/morfeusz</pElem> |
427 | 435 | </incDir> |
428 | 436 | <preprocessorList> |
437 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
429 | 438 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
430 | - <Elem>NDEBUG</Elem> | |
431 | 439 | <Elem>libmorfeusz_EXPORTS</Elem> |
432 | 440 | </preprocessorList> |
433 | 441 | </ccTool> |
... | ... | @@ -439,8 +447,8 @@ |
439 | 447 | <pElem>build/morfeusz</pElem> |
440 | 448 | </incDir> |
441 | 449 | <preprocessorList> |
450 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
442 | 451 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
443 | - <Elem>NDEBUG</Elem> | |
444 | 452 | <Elem>libmorfeusz_EXPORTS</Elem> |
445 | 453 | </preprocessorList> |
446 | 454 | </ccTool> |
... | ... | @@ -452,8 +460,8 @@ |
452 | 460 | <pElem>build/morfeusz</pElem> |
453 | 461 | </incDir> |
454 | 462 | <preprocessorList> |
463 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
455 | 464 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
456 | - <Elem>NDEBUG</Elem> | |
457 | 465 | <Elem>libmorfeusz_EXPORTS</Elem> |
458 | 466 | </preprocessorList> |
459 | 467 | </ccTool> |
... | ... | @@ -465,21 +473,8 @@ |
465 | 473 | <pElem>build/morfeusz</pElem> |
466 | 474 | </incDir> |
467 | 475 | <preprocessorList> |
476 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
468 | 477 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
469 | - <Elem>NDEBUG</Elem> | |
470 | - <Elem>libmorfeusz_EXPORTS</Elem> | |
471 | - </preprocessorList> | |
472 | - </ccTool> | |
473 | - </folder> | |
474 | - <folder path="0/test"> | |
475 | - <ccTool> | |
476 | - <incDir> | |
477 | - <pElem>build</pElem> | |
478 | - <pElem>morfeusz</pElem> | |
479 | - <pElem>build/morfeusz</pElem> | |
480 | - </incDir> | |
481 | - <preprocessorList> | |
482 | - <Elem>NDEBUG</Elem> | |
483 | 478 | <Elem>libmorfeusz_EXPORTS</Elem> |
484 | 479 | </preprocessorList> |
485 | 480 | </ccTool> |
... | ... | @@ -613,19 +608,13 @@ |
613 | 608 | <output>${TESTDIR}/TestFiles/f9</output> |
614 | 609 | </linkerTool> |
615 | 610 | </folder> |
616 | - <folder path="build"> | |
617 | - <ccTool> | |
618 | - <preprocessorList> | |
619 | - <Elem>NDEBUG</Elem> | |
620 | - </preprocessorList> | |
621 | - </ccTool> | |
622 | - </folder> | |
623 | 611 | <folder path="build/morfeusz"> |
624 | 612 | <ccTool> |
625 | 613 | <incDir> |
626 | 614 | <pElem>morfeusz</pElem> |
627 | 615 | </incDir> |
628 | 616 | <preprocessorList> |
617 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
629 | 618 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
630 | 619 | </preprocessorList> |
631 | 620 | </ccTool> |
... | ... | @@ -664,7 +653,6 @@ |
664 | 653 | <folder path="morfeusz"> |
665 | 654 | <ccTool> |
666 | 655 | <preprocessorList> |
667 | - <Elem>NDEBUG</Elem> | |
668 | 656 | <Elem>libmorfeusz_EXPORTS</Elem> |
669 | 657 | </preprocessorList> |
670 | 658 | </ccTool> |
... | ... | @@ -679,6 +667,7 @@ |
679 | 667 | <pElem>/usr/lib/jvm/default-java/include</pElem> |
680 | 668 | </incDir> |
681 | 669 | <preprocessorList> |
670 | + <Elem>NDEBUG</Elem> | |
682 | 671 | <Elem>_OPTIMIZE__=1</Elem> |
683 | 672 | <Elem>libjmorfeusz_EXPORTS</Elem> |
684 | 673 | </preprocessorList> |
... | ... | @@ -694,80 +683,80 @@ |
694 | 683 | </incDir> |
695 | 684 | </ccTool> |
696 | 685 | </folder> |
697 | - <item path="morfeusz/DefaultTagset.cpp" ex="false" tool="1" flavor2="4"> | |
698 | - <ccTool flags="1"> | |
686 | + <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4"> | |
687 | + <ccTool flags="2"> | |
699 | 688 | <incDir> |
700 | 689 | <pElem>morfeusz</pElem> |
701 | 690 | <pElem>build/morfeusz</pElem> |
702 | 691 | </incDir> |
703 | 692 | <preprocessorList> |
693 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
704 | 694 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
705 | - <Elem>NDEBUG</Elem> | |
706 | 695 | <Elem>libmorfeusz_EXPORTS</Elem> |
707 | 696 | </preprocessorList> |
708 | 697 | </ccTool> |
709 | 698 | </item> |
710 | - <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4"> | |
711 | - <ccTool flags="1"> | |
699 | + <item path="morfeusz/IdResolverImpl.cpp" ex="false" tool="1" flavor2="4"> | |
700 | + <ccTool flags="2"> | |
712 | 701 | <incDir> |
713 | 702 | <pElem>morfeusz</pElem> |
714 | 703 | <pElem>build/morfeusz</pElem> |
715 | 704 | </incDir> |
716 | 705 | <preprocessorList> |
706 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
717 | 707 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
718 | - <Elem>NDEBUG</Elem> | |
719 | 708 | <Elem>libmorfeusz_EXPORTS</Elem> |
720 | 709 | </preprocessorList> |
721 | 710 | </ccTool> |
722 | 711 | </item> |
723 | 712 | <item path="morfeusz/InflexionGraph.cpp" ex="false" tool="1" flavor2="4"> |
724 | - <ccTool flags="1"> | |
713 | + <ccTool flags="2"> | |
725 | 714 | <incDir> |
726 | 715 | <pElem>morfeusz</pElem> |
727 | 716 | <pElem>build/morfeusz</pElem> |
728 | 717 | </incDir> |
729 | 718 | <preprocessorList> |
719 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
730 | 720 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
731 | - <Elem>NDEBUG</Elem> | |
732 | 721 | <Elem>libmorfeusz_EXPORTS</Elem> |
733 | 722 | </preprocessorList> |
734 | 723 | </ccTool> |
735 | 724 | </item> |
736 | 725 | <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="4"> |
737 | - <ccTool flags="1"> | |
726 | + <ccTool flags="2"> | |
738 | 727 | <incDir> |
739 | 728 | <pElem>morfeusz</pElem> |
740 | 729 | <pElem>build/morfeusz</pElem> |
741 | 730 | </incDir> |
742 | 731 | <preprocessorList> |
732 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
743 | 733 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
744 | - <Elem>NDEBUG</Elem> | |
745 | 734 | <Elem>libmorfeusz_EXPORTS</Elem> |
746 | 735 | </preprocessorList> |
747 | 736 | </ccTool> |
748 | 737 | </item> |
749 | 738 | <item path="morfeusz/MorfeuszInternal.cpp" ex="false" tool="1" flavor2="4"> |
750 | - <ccTool flags="1"> | |
739 | + <ccTool flags="2"> | |
751 | 740 | <incDir> |
752 | 741 | <pElem>morfeusz</pElem> |
753 | 742 | <pElem>build/morfeusz</pElem> |
754 | 743 | </incDir> |
755 | 744 | <preprocessorList> |
745 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
756 | 746 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
757 | - <Elem>NDEBUG</Elem> | |
758 | 747 | <Elem>libmorfeusz_EXPORTS</Elem> |
759 | 748 | </preprocessorList> |
760 | 749 | </ccTool> |
761 | 750 | </item> |
762 | 751 | <item path="morfeusz/MorphInterpretation.cpp" ex="false" tool="1" flavor2="4"> |
763 | - <ccTool flags="1"> | |
752 | + <ccTool flags="2"> | |
764 | 753 | <incDir> |
765 | 754 | <pElem>morfeusz</pElem> |
766 | 755 | <pElem>build/morfeusz</pElem> |
767 | 756 | </incDir> |
768 | 757 | <preprocessorList> |
758 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
769 | 759 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
770 | - <Elem>NDEBUG</Elem> | |
771 | 760 | <Elem>libmorfeusz_EXPORTS</Elem> |
772 | 761 | </preprocessorList> |
773 | 762 | </ccTool> |
... | ... | @@ -786,52 +775,68 @@ |
786 | 775 | </ccTool> |
787 | 776 | </item> |
788 | 777 | <item path="morfeusz/ResultsIteratorImpl.cpp" ex="false" tool="1" flavor2="4"> |
789 | - <ccTool flags="1"> | |
778 | + <ccTool flags="2"> | |
790 | 779 | <incDir> |
791 | 780 | <pElem>morfeusz</pElem> |
792 | 781 | <pElem>build/morfeusz</pElem> |
793 | 782 | </incDir> |
794 | 783 | <preprocessorList> |
784 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
795 | 785 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
796 | - <Elem>NDEBUG</Elem> | |
797 | 786 | <Elem>libmorfeusz_EXPORTS</Elem> |
798 | 787 | </preprocessorList> |
799 | 788 | </ccTool> |
800 | 789 | </item> |
801 | 790 | <item path="morfeusz/c_api/ResultsManager.cpp" ex="false" tool="1" flavor2="4"> |
791 | + <ccTool flags="2"> | |
792 | + </ccTool> | |
802 | 793 | </item> |
803 | 794 | <item path="morfeusz/case/CaseConverter.cpp" ex="false" tool="1" flavor2="4"> |
795 | + <ccTool flags="2"> | |
796 | + </ccTool> | |
804 | 797 | </item> |
805 | 798 | <item path="morfeusz/case/CasePatternHelper.cpp" |
806 | 799 | ex="false" |
807 | 800 | tool="1" |
808 | 801 | flavor2="4"> |
802 | + <ccTool flags="2"> | |
803 | + </ccTool> | |
809 | 804 | </item> |
810 | 805 | <item path="morfeusz/case/caseconv.cpp" ex="false" tool="1" flavor2="4"> |
806 | + <ccTool flags="2"> | |
807 | + </ccTool> | |
811 | 808 | </item> |
812 | 809 | <item path="morfeusz/charset/CharsetConverter.cpp" |
813 | 810 | ex="false" |
814 | 811 | tool="1" |
815 | 812 | flavor2="4"> |
813 | + <ccTool flags="2"> | |
814 | + </ccTool> | |
816 | 815 | </item> |
817 | 816 | <item path="morfeusz/charset/TextReader.cpp" ex="false" tool="1" flavor2="4"> |
817 | + <ccTool flags="2"> | |
818 | + </ccTool> | |
818 | 819 | </item> |
819 | 820 | <item path="morfeusz/charset/conversion_tables.cpp" |
820 | 821 | ex="false" |
821 | 822 | tool="1" |
822 | 823 | flavor2="4"> |
824 | + <ccTool flags="2"> | |
825 | + </ccTool> | |
823 | 826 | </item> |
824 | 827 | <item path="morfeusz/cli/cli.cpp" ex="false" tool="1" flavor2="4"> |
828 | + <ccTool flags="2"> | |
829 | + </ccTool> | |
825 | 830 | </item> |
826 | 831 | <item path="morfeusz/const.cpp" ex="false" tool="1" flavor2="4"> |
827 | - <ccTool flags="1"> | |
832 | + <ccTool flags="2"> | |
828 | 833 | <incDir> |
829 | 834 | <pElem>morfeusz</pElem> |
830 | 835 | <pElem>build/morfeusz</pElem> |
831 | 836 | </incDir> |
832 | 837 | <preprocessorList> |
838 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
833 | 839 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
834 | - <Elem>NDEBUG</Elem> | |
835 | 840 | <Elem>libmorfeusz_EXPORTS</Elem> |
836 | 841 | </preprocessorList> |
837 | 842 | </ccTool> |
... | ... | @@ -840,38 +845,50 @@ |
840 | 845 | ex="false" |
841 | 846 | tool="1" |
842 | 847 | flavor2="4"> |
848 | + <ccTool flags="2"> | |
849 | + </ccTool> | |
843 | 850 | </item> |
844 | 851 | <item path="morfeusz/deserialization/MorphDeserializer.cpp" |
845 | 852 | ex="false" |
846 | 853 | tool="1" |
847 | 854 | flavor2="4"> |
855 | + <ccTool flags="2"> | |
856 | + </ccTool> | |
848 | 857 | </item> |
849 | 858 | <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder.cpp" |
850 | 859 | ex="false" |
851 | 860 | tool="1" |
852 | 861 | flavor2="4"> |
862 | + <ccTool flags="2"> | |
863 | + </ccTool> | |
853 | 864 | </item> |
854 | 865 | <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.cpp" |
855 | 866 | ex="false" |
856 | 867 | tool="1" |
857 | 868 | flavor2="4"> |
869 | + <ccTool flags="2"> | |
870 | + </ccTool> | |
858 | 871 | </item> |
859 | 872 | <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Generator.cpp" |
860 | 873 | ex="false" |
861 | 874 | tool="1" |
862 | 875 | flavor2="4"> |
876 | + <ccTool flags="2"> | |
877 | + </ccTool> | |
863 | 878 | </item> |
864 | 879 | <item path="morfeusz/fsa/const.cpp" ex="false" tool="1" flavor2="4"> |
880 | + <ccTool flags="2"> | |
881 | + </ccTool> | |
865 | 882 | </item> |
866 | 883 | <item path="morfeusz/morfeusz2_c.cpp" ex="false" tool="1" flavor2="4"> |
867 | - <ccTool flags="1"> | |
884 | + <ccTool flags="2"> | |
868 | 885 | <incDir> |
869 | 886 | <pElem>morfeusz</pElem> |
870 | 887 | <pElem>build/morfeusz</pElem> |
871 | 888 | </incDir> |
872 | 889 | <preprocessorList> |
890 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
873 | 891 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
874 | - <Elem>NDEBUG</Elem> | |
875 | 892 | <Elem>libmorfeusz_EXPORTS</Elem> |
876 | 893 | </preprocessorList> |
877 | 894 | </ccTool> |
... | ... | @@ -883,8 +900,8 @@ |
883 | 900 | <pElem>build/morfeusz</pElem> |
884 | 901 | </incDir> |
885 | 902 | <preprocessorList> |
903 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
886 | 904 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
887 | - <Elem>NDEBUG</Elem> | |
888 | 905 | </preprocessorList> |
889 | 906 | </ccTool> |
890 | 907 | </item> |
... | ... | @@ -895,24 +912,18 @@ |
895 | 912 | <pElem>build/morfeusz</pElem> |
896 | 913 | </incDir> |
897 | 914 | <preprocessorList> |
915 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
898 | 916 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
899 | - <Elem>NDEBUG</Elem> | |
900 | 917 | </preprocessorList> |
901 | 918 | </ccTool> |
902 | 919 | </item> |
903 | 920 | <item path="morfeusz/segrules/SegrulesFSA.cpp" ex="false" tool="1" flavor2="4"> |
921 | + <ccTool flags="2"> | |
922 | + </ccTool> | |
904 | 923 | </item> |
905 | 924 | <item path="morfeusz/segrules/segrules.cpp" ex="false" tool="1" flavor2="4"> |
906 | - </item> | |
907 | - <item path="morfeusz/test/test_recognize_dict.cpp" | |
908 | - ex="false" | |
909 | - tool="1" | |
910 | - flavor2="4"> | |
911 | - </item> | |
912 | - <item path="morfeusz/test/test_result_equals.cpp" | |
913 | - ex="false" | |
914 | - tool="1" | |
915 | - flavor2="4"> | |
925 | + <ccTool flags="2"> | |
926 | + </ccTool> | |
916 | 927 | </item> |
917 | 928 | <item path="morfeusz/test_runner.cpp" ex="false" tool="1" flavor2="4"> |
918 | 929 | <ccTool flags="0"> |
... | ... | @@ -921,8 +932,8 @@ |
921 | 932 | <pElem>build/morfeusz</pElem> |
922 | 933 | </incDir> |
923 | 934 | <preprocessorList> |
935 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
924 | 936 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
925 | - <Elem>NDEBUG</Elem> | |
926 | 937 | </preprocessorList> |
927 | 938 | </ccTool> |
928 | 939 | </item> |
... | ... | @@ -935,8 +946,8 @@ |
935 | 946 | <pElem>build/morfeusz</pElem> |
936 | 947 | </incDir> |
937 | 948 | <preprocessorList> |
949 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
938 | 950 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
939 | - <Elem>NDEBUG</Elem> | |
940 | 951 | </preprocessorList> |
941 | 952 | </ccTool> |
942 | 953 | </item> |
... | ... | @@ -947,8 +958,8 @@ |
947 | 958 | <pElem>build/morfeusz</pElem> |
948 | 959 | </incDir> |
949 | 960 | <preprocessorList> |
961 | + <Elem>BUILDING_MORFEUSZ</Elem> | |
950 | 962 | <Elem>MORFEUSZ2_VERSION="2.0.0"</Elem> |
951 | - <Elem>NDEBUG</Elem> | |
952 | 963 | </preprocessorList> |
953 | 964 | </ccTool> |
954 | 965 | </item> |
... | ... |