Commit 8148835a47574769c6705c325af983b18bc83f6d
1 parent
229cfad6
update
Showing
209 changed files
with
27681 additions
and
10521 deletions
Too many changes to show.
To preserve performance only 29 of 209 files are displayed.
conf/parser/mtas.xml
1 | 1 | <?xml version="1.0" encoding="UTF-8" ?> |
2 | 2 | <mtas> |
3 | 3 | <configurations type="mtas.analysis.util.MtasTokenizerFactory"> |
4 | + <configuration name="test" file="mtas/folia_test.xml" /> | |
5 | + <configuration name="CRM" file="mtas/crm_test.xml" /> | |
4 | 6 | <configuration name="DBNL" file="mtas/folia_dbnl.xml" /> |
7 | + <configuration name="DDD" file="mtas/folia_ddd.xml" /> | |
5 | 8 | <configuration name="EDBO" file="mtas/folia_edbo.xml" /> |
6 | 9 | <configuration name="SONAR" file="mtas/folia_sonar.xml" /> |
7 | 10 | </configurations> |
8 | 11 | <configurations type="mtas.analysis.util.MtasCharFilterFactory"> |
12 | + <configuration name="test" type="file" /> | |
13 | + <configuration name="CRM" type="file" prefix="/Users/matthijs/Software/Mtas/data/CRM/data/files/" postfix=".txt" /> | |
9 | 14 | <configuration name="DBNL" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> |
15 | + <configuration name="DDD" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> | |
10 | 16 | <configuration name="EDBO" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> |
11 | 17 | <configuration name="SONAR" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> |
12 | 18 | </configurations> |
... | ... |
conf/parser/mtas/crm_test.xml
0 โ 100644
1 | +<?xml version="1.0" encoding="UTF-8" ?> | |
2 | +<mtas> | |
3 | + | |
4 | + <!-- START MTAS INDEX CONFIGURATION --> | |
5 | + <index> | |
6 | + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS --> | |
7 | + <payload index="false" /> | |
8 | + <offset index="false" /> | |
9 | + <realoffset index="false" /> | |
10 | + <parent index="true" /> | |
11 | + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS --> | |
12 | + </index> | |
13 | + <!-- END MTAS INDEX CONFIGURATION --> | |
14 | + | |
15 | + | |
16 | + | |
17 | + <!-- START CONFIGURATION MTAS FOLIA PARSER --> | |
18 | + <parser name="mtas.analysis.parser.MtasCRMParser"> | |
19 | + | |
20 | + <!-- START GENERAL SETTINGS MTAS PARSER --> | |
21 | + <autorepair value="true" /> | |
22 | + <makeunique value="true" /> | |
23 | + <!-- END GENERAL SETTINGS MTAS PARSER --> | |
24 | + | |
25 | + <mappings> | |
26 | + | |
27 | + <mapping type="word"> | |
28 | + </mapping> | |
29 | + | |
30 | + <mapping type="wordAnnotation" name="0"> | |
31 | + <token type="string" offset="false" parent="false"> | |
32 | + <pre> | |
33 | + <item type="string" value="t" /> | |
34 | + </pre> | |
35 | + <post> | |
36 | + <item type="text" /> | |
37 | + </post> | |
38 | + </token> | |
39 | + </mapping> | |
40 | + <mapping type="wordAnnotation" name="0"> | |
41 | + <token type="string" offset="false" parent="false"> | |
42 | + <pre> | |
43 | + <item type="string" value="t_lc" /> | |
44 | + </pre> | |
45 | + <post> | |
46 | + <item type="text" filter="ascii,lowercase" /> | |
47 | + </post> | |
48 | + </token> | |
49 | + </mapping> | |
50 | + <mapping type="wordAnnotation" name="1"> | |
51 | + <token type="string" offset="false" parent="false"> | |
52 | + <pre> | |
53 | + <item type="string" value="t1" /> | |
54 | + </pre> | |
55 | + <post> | |
56 | + <item type="text" /> | |
57 | + </post> | |
58 | + </token> | |
59 | + </mapping> | |
60 | + <mapping type="wordAnnotation" name="1"> | |
61 | + <token type="string" offset="false" parent="false"> | |
62 | + <pre> | |
63 | + <item type="string" value="t1_lc" /> | |
64 | + </pre> | |
65 | + <post> | |
66 | + <item type="text" filter="ascii,lowercase" /> | |
67 | + </post> | |
68 | + </token> | |
69 | + </mapping> | |
70 | + <mapping type="wordAnnotation" name="2"> | |
71 | + <token type="string" offset="false" parent="false"> | |
72 | + <pre> | |
73 | + <item type="string" value="t2" /> | |
74 | + </pre> | |
75 | + <post> | |
76 | + <item type="text" /> | |
77 | + </post> | |
78 | + </token> | |
79 | + </mapping> | |
80 | + <mapping type="wordAnnotation" name="2"> | |
81 | + <token type="string" offset="false" parent="false"> | |
82 | + <pre> | |
83 | + <item type="string" value="t2_lc" /> | |
84 | + </pre> | |
85 | + <post> | |
86 | + <item type="text" filter="ascii,lowercase" /> | |
87 | + </post> | |
88 | + </token> | |
89 | + </mapping> | |
90 | + <mapping type="wordAnnotation" name="3"> | |
91 | + <token type="string" offset="false" parent="false"> | |
92 | + <pre> | |
93 | + <item type="string" value="lemma" /> | |
94 | + </pre> | |
95 | + <post> | |
96 | + <item type="text" /> | |
97 | + </post> | |
98 | + </token> | |
99 | + </mapping> | |
100 | + <mapping type="wordAnnotation" name="4"> | |
101 | + <token type="string" offset="false" parent="false"> | |
102 | + <pre> | |
103 | + <item type="string" value="crm" /> | |
104 | + </pre> | |
105 | + <post> | |
106 | + <item type="text" /> | |
107 | + </post> | |
108 | + </token> | |
109 | + </mapping> | |
110 | + | |
111 | + <mapping type="crmPair" name="6"> | |
112 | + <condition> | |
113 | + <item type="text" not="true" condition="-" /> | |
114 | + </condition> | |
115 | + </mapping> | |
116 | + <mapping type="crmPair" name="part"> | |
117 | + <token type="string" offset="false" parent="false"> | |
118 | + <pre> | |
119 | + <item type="name" /> | |
120 | + </pre> | |
121 | + <post> | |
122 | + <item type="text" /> | |
123 | + </post> | |
124 | + </token> | |
125 | + </mapping> | |
126 | + | |
127 | + <mapping type="crmSentence" name="7"> | |
128 | + <token type="string" offset="false" parent="false"> | |
129 | + <pre> | |
130 | + <item type="string" value="s"/> | |
131 | + </pre> | |
132 | + <post> | |
133 | + <item type="text" /> | |
134 | + </post> | |
135 | + </token> | |
136 | + <condition> | |
137 | + <item type="text" not="true" condition="-" /> | |
138 | + <item type="text" not="true" condition="2" /> | |
139 | + <item type="text" not="true" condition="4" /> | |
140 | + <item type="text" not="true" condition="5" /> | |
141 | + <item type="text" not="true" condition="6" /> | |
142 | + <item type="text" not="true" condition="8" /> | |
143 | + </condition> | |
144 | + </mapping> | |
145 | + <mapping type="crmClause" name="7"> | |
146 | + <token type="string" offset="false" parent="false"> | |
147 | + <pre> | |
148 | + <item type="string" value="sc"/> | |
149 | + </pre> | |
150 | + <post> | |
151 | + <item type="text" /> | |
152 | + </post> | |
153 | + </token> | |
154 | + <condition> | |
155 | + <item type="text" not="true" condition="-" /> | |
156 | + <item type="text" not="true" condition="0" /> | |
157 | + <item type="text" not="true" condition="1" /> | |
158 | + </condition> | |
159 | + </mapping> | |
160 | + <mapping type="crmClause" name="7"> | |
161 | + <condition> | |
162 | + <item type="text" not="true" condition="-" /> | |
163 | + </condition> | |
164 | + </mapping> | |
165 | + | |
166 | + <mapping type="wordAnnotation" name="pos"> | |
167 | + <token type="string" offset="false" parent="false"> | |
168 | + <pre> | |
169 | + <item type="name" /> | |
170 | + </pre> | |
171 | + <post> | |
172 | + <item type="text" /> | |
173 | + </post> | |
174 | + </token> | |
175 | + </mapping> | |
176 | + <mapping type="wordAnnotation" name="feat.getal"> | |
177 | + <token type="string" offset="false" parent="false"> | |
178 | + <pre> | |
179 | + <item type="name" /> | |
180 | + </pre> | |
181 | + <post> | |
182 | + <item type="text" /> | |
183 | + </post> | |
184 | + </token> | |
185 | + </mapping> | |
186 | + <mapping type="wordAnnotation" name="feat.persoon"> | |
187 | + <token type="string" offset="false" parent="false"> | |
188 | + <pre> | |
189 | + <item type="name" /> | |
190 | + </pre> | |
191 | + <post> | |
192 | + <item type="text" /> | |
193 | + </post> | |
194 | + </token> | |
195 | + </mapping> | |
196 | + <mapping type="wordAnnotation" name="feat.ntype"> | |
197 | + <token type="string" offset="false" parent="false"> | |
198 | + <pre> | |
199 | + <item type="name" /> | |
200 | + </pre> | |
201 | + <post> | |
202 | + <item type="text" /> | |
203 | + </post> | |
204 | + </token> | |
205 | + </mapping> | |
206 | + <mapping type="wordAnnotation" name="feat.pvtijd"> | |
207 | + <token type="string" offset="false" parent="false"> | |
208 | + <pre> | |
209 | + <item type="name" /> | |
210 | + </pre> | |
211 | + <post> | |
212 | + <item type="text" /> | |
213 | + </post> | |
214 | + </token> | |
215 | + </mapping> | |
216 | + <mapping type="wordAnnotation" name="feat.wvorm"> | |
217 | + <token type="string" offset="false" parent="false"> | |
218 | + <pre> | |
219 | + <item type="name" /> | |
220 | + </pre> | |
221 | + <post> | |
222 | + <item type="text" /> | |
223 | + </post> | |
224 | + </token> | |
225 | + </mapping> | |
226 | + <mapping type="wordAnnotation" name="feat.numtype"> | |
227 | + <token type="string" offset="false" parent="false"> | |
228 | + <pre> | |
229 | + <item type="name" /> | |
230 | + </pre> | |
231 | + <post> | |
232 | + <item type="text" /> | |
233 | + </post> | |
234 | + </token> | |
235 | + </mapping> | |
236 | + <mapping type="wordAnnotation" name="feat.vwtype"> | |
237 | + <token type="string" offset="false" parent="false"> | |
238 | + <pre> | |
239 | + <item type="name" /> | |
240 | + </pre> | |
241 | + <post> | |
242 | + <item type="text" /> | |
243 | + </post> | |
244 | + </token> | |
245 | + </mapping> | |
246 | + <mapping type="wordAnnotation" name="feat.lwtype"> | |
247 | + <token type="string" offset="false" parent="false"> | |
248 | + <pre> | |
249 | + <item type="name" /> | |
250 | + </pre> | |
251 | + <post> | |
252 | + <item type="text" /> | |
253 | + </post> | |
254 | + </token> | |
255 | + </mapping> | |
256 | + <mapping type="wordAnnotation" name="feat.form"> | |
257 | + <token type="string" offset="false" parent="false"> | |
258 | + <pre> | |
259 | + <item type="name" /> | |
260 | + </pre> | |
261 | + <post> | |
262 | + <item type="text" /> | |
263 | + </post> | |
264 | + </token> | |
265 | + </mapping> | |
266 | + <mapping type="wordAnnotation" name="feat.probleemgeval"> | |
267 | + <token type="string" offset="false" parent="false"> | |
268 | + <pre> | |
269 | + <item type="name" /> | |
270 | + </pre> | |
271 | + <post> | |
272 | + <item type="text" /> | |
273 | + </post> | |
274 | + </token> | |
275 | + </mapping> | |
276 | + </mappings> | |
277 | + | |
278 | + <functions> | |
279 | + <function type="crmPair" name="6" split="+"> | |
280 | + <condition value=""> | |
281 | + <output name="part" /> | |
282 | + </condition> | |
283 | + </function> | |
284 | + <function type="wordAnnotation" name="4" split="+"> | |
285 | + <condition value="000,001,002,003,004,005,006,009"> | |
286 | + <output name="pos" value="N" /> | |
287 | + <output name="feat.getal" value="ev" /> | |
288 | + </condition> | |
289 | + <condition value="010,011,012,013,014,015,016,019"> | |
290 | + <output name="pos" value="N" /> | |
291 | + <output name="feat.getal" value="mv" /> | |
292 | + </condition> | |
293 | + <condition value="020,021,022,023,024,025,026,029"> | |
294 | + <output name="pos" value="N" /> | |
295 | + <output name="feat.ntype" value="eigen" /> | |
296 | + </condition> | |
297 | + <condition value="090,091,092,093,094,095,096,099"> | |
298 | + <output name="pos" value="N" /> | |
299 | + <output name="feat.probleemgeval" /> | |
300 | + </condition> | |
301 | + <condition value="100,101,102,103,104,105,106,109"> | |
302 | + <output name="pos" value="ADJ" /> | |
303 | + <output name="feat.getal" value="ev" /> | |
304 | + </condition> | |
305 | + <condition value="110,111,112,113,114,115,116,119"> | |
306 | + <output name="pos" value="ADJ" /> | |
307 | + <output name="feat.getal" value="mv" /> | |
308 | + </condition> | |
309 | + <condition value="190,191,192,193,194,195,196,199"> | |
310 | + <output name="pos" value="ADJ" /> | |
311 | + <output name="feat.probleemgeval" /> | |
312 | + </condition> | |
313 | + | |
314 | + <condition value="200,201,202,203,204,205,206,209"> | |
315 | + <output name="pos" value="WW" /> | |
316 | + <output name="feat.pvtijd" value="tgw" /> | |
317 | + </condition> | |
318 | + <condition value="210,211,212,213,214,215,216,219"> | |
319 | + <output name="pos" value="WW" /> | |
320 | + <output name="feat.pvtijd" value="tgw" /> | |
321 | + </condition> | |
322 | + <condition value="220,221,222,223,224,225,226,229"> | |
323 | + <output name="pos" value="WW" /> | |
324 | + <output name="feat.pvtijd" value="verl" /> | |
325 | + </condition> | |
326 | + <condition value="230,231,232,233,234,235,236,239"> | |
327 | + <output name="pos" value="WW" /> | |
328 | + <output name="feat.pvtijd" value="verl" /> | |
329 | + </condition> | |
330 | + <condition value="240,241,242,243,244,245,246,249"> | |
331 | + <output name="pos" value="WW" /> | |
332 | + </condition> | |
333 | + <condition value="250,251,252,253,254,255,256,259"> | |
334 | + <output name="pos" value="WW" /> | |
335 | + <output name="feat.wvorm" value="inf" /> | |
336 | + </condition> | |
337 | + <condition value="260,261,262,263,264,265,266,269"> | |
338 | + <output name="pos" value="WW" /> | |
339 | + <output name="feat.wvorm" value="inf" /> | |
340 | + </condition> | |
341 | + <condition value="270,271,272,273,274,275,276,279"> | |
342 | + <output name="pos" value="WW" /> | |
343 | + </condition> | |
344 | + <condition value="280,281,282,283,284,285,286,289"> | |
345 | + <output name="pos" value="WW" /> | |
346 | + </condition> | |
347 | + <condition value="290,291,292,293,294,295,296,299"> | |
348 | + <output name="pos" value="WW" /> | |
349 | + <output name="feat.probleemgeval" /> | |
350 | + </condition> | |
351 | + | |
352 | + | |
353 | + <condition value="300,301,302,303,304,305,306,309"> | |
354 | + <output name="pos" value="TW" /> | |
355 | + <output name="feat.numtype" value="hoofd" /> | |
356 | + </condition> | |
357 | + <condition value="310,311,312,313,314,315,316,319"> | |
358 | + <output name="pos" value="TW" /> | |
359 | + <output name="feat.numtype" value="rang" /> | |
360 | + </condition> | |
361 | + <condition value="320,321,322,323,324,325,326,329"> | |
362 | + <output name="pos" value="TW" /> | |
363 | + </condition> | |
364 | + <condition value="390,391,392,393,394,395,396,399"> | |
365 | + <output name="pos" value="TW" /> | |
366 | + <output name="feat.probleemgeval" /> | |
367 | + </condition> | |
368 | + | |
369 | + <condition value="401"> | |
370 | + <output name="pos" value="VNW" /> | |
371 | + <output name="feat.getal" value="ev" /> | |
372 | + <output name="feat.persoon" value="1" /> | |
373 | + </condition> | |
374 | + <condition value="402"> | |
375 | + <output name="pos" value="VNW" /> | |
376 | + <output name="feat.getal" value="ev" /> | |
377 | + <output name="feat.persoon" value="2" /> | |
378 | + </condition> | |
379 | + <condition value="403"> | |
380 | + <output name="pos" value="VNW" /> | |
381 | + <output name="feat.getal" value="ev" /> | |
382 | + <output name="feat.persoon" value="3" /> | |
383 | + </condition> | |
384 | + <condition value="404"> | |
385 | + <output name="pos" value="VNW" /> | |
386 | + <output name="feat.getal" value="mv" /> | |
387 | + <output name="feat.persoon" value="1" /> | |
388 | + </condition> | |
389 | + <condition value="405"> | |
390 | + <output name="pos" value="VNW" /> | |
391 | + <output name="feat.getal" value="mv" /> | |
392 | + <output name="feat.persoon" value="2" /> | |
393 | + </condition> | |
394 | + <condition value="406"> | |
395 | + <output name="pos" value="VNW" /> | |
396 | + <output name="feat.getal" value="mv" /> | |
397 | + <output name="feat.persoon" value="3" /> | |
398 | + </condition> | |
399 | + <condition value="409"> | |
400 | + <output name="pos" value="VNW" /> | |
401 | + <output name="feat.probleemgeval" /> | |
402 | + </condition> | |
403 | + <condition value="410,411,412,413,414,415,416,419"> | |
404 | + <output name="pos" value="VNW" /> | |
405 | + <output name="feat.vwtype" value="aanw" /> | |
406 | + </condition> | |
407 | + <condition value="420,421,422,423,424,425,426,429"> | |
408 | + <output name="pos" value="VNW" /> | |
409 | + <output name="feat.vwtype" value="betr" /> | |
410 | + </condition> | |
411 | + <condition value="430,431,432,433,434,435,436,439"> | |
412 | + <output name="pos" value="VNW" /> | |
413 | + <output name="feat.vwtype" value="vb" /> | |
414 | + </condition> | |
415 | + <condition value="434,441,442,443,444,445,446,449"> | |
416 | + <output name="pos" value="VNW" /> | |
417 | + <output name="feat.vwtype" value="vb" /> | |
418 | + </condition> | |
419 | + <condition value="440,441,442,443,444,445,446,449"> | |
420 | + <output name="pos" value="VNW" /> | |
421 | + <output name="feat.lwtype" value="onbep" /> | |
422 | + </condition> | |
423 | + <condition value="450,451,452,453,454,455,456,459"> | |
424 | + <output name="pos" value="VNW" /> | |
425 | + <output name="feat.vwtype" value="bez" /> | |
426 | + </condition> | |
427 | + <condition value="461"> | |
428 | + <output name="pos" value="VNW" /> | |
429 | + <output name="feat.vwtype" value="refl" /> | |
430 | + <output name="feat.getal" value="ev" /> | |
431 | + <output name="feat.persoon" value="1" /> | |
432 | + </condition> | |
433 | + <condition value="462"> | |
434 | + <output name="pos" value="VNW" /> | |
435 | + <output name="feat.vwtype" value="refl" /> | |
436 | + <output name="feat.getal" value="ev" /> | |
437 | + <output name="feat.persoon" value="2" /> | |
438 | + </condition> | |
439 | + <condition value="463"> | |
440 | + <output name="pos" value="VNW" /> | |
441 | + <output name="feat.vwtype" value="refl" /> | |
442 | + <output name="feat.getal" value="ev" /> | |
443 | + <output name="feat.persoon" value="3" /> | |
444 | + </condition> | |
445 | + <condition value="464"> | |
446 | + <output name="pos" value="VNW" /> | |
447 | + <output name="feat.vwtype" value="refl" /> | |
448 | + <output name="feat.getal" value="mv" /> | |
449 | + <output name="feat.persoon" value="1" /> | |
450 | + </condition> | |
451 | + <condition value="465"> | |
452 | + <output name="pos" value="VNW" /> | |
453 | + <output name="feat.vwtype" value="refl" /> | |
454 | + <output name="feat.getal" value="mv" /> | |
455 | + <output name="feat.persoon" value="2" /> | |
456 | + </condition> | |
457 | + <condition value="466"> | |
458 | + <output name="pos" value="VNW" /> | |
459 | + <output name="feat.vwtype" value="refl" /> | |
460 | + <output name="feat.getal" value="mv" /> | |
461 | + <output name="feat.persoon" value="3" /> | |
462 | + </condition> | |
463 | + <condition value="469"> | |
464 | + <output name="pos" value="VNW" /> | |
465 | + <output name="feat.vwtype" value="refl" /> | |
466 | + <output name="feat.probleemgeval" /> | |
467 | + </condition> | |
468 | + <condition value="470,471,472,473,474,475,476,479"> | |
469 | + <output name="pos" value="LID" /> | |
470 | + </condition> | |
471 | + <condition value="480,481,482,483,484,485,486,489"> | |
472 | + <output name="pos" value="LID" /> | |
473 | + </condition> | |
474 | + <condition value="490,491,492,493,494,495,496,499"> | |
475 | + <output name="pos" value="VNW" /> | |
476 | + <output name="feat.probleemgeval" /> | |
477 | + </condition> | |
478 | + | |
479 | + <condition value="500,501,502,503,504,505,506,509"> | |
480 | + <output name="pos" value="BW" /> | |
481 | + </condition> | |
482 | + <condition value="510,511,512,513,514,515,516,519"> | |
483 | + <output name="pos" value="BW" /> | |
484 | + </condition> | |
485 | + <condition value="520,521,522,523,524,525,526,529"> | |
486 | + <output name="pos" value="BW" /> | |
487 | + </condition> | |
488 | + <condition value="530,531,532,533,534,535,536,539"> | |
489 | + <output name="pos" value="BW" /> | |
490 | + </condition> | |
491 | + <condition value="540,541,542,543,544,545,546,549"> | |
492 | + <output name="pos" value="BW" /> | |
493 | + </condition> | |
494 | + <condition value="550,551,552,553,554,555,556,559"> | |
495 | + <output name="pos" value="BW" /> | |
496 | + </condition> | |
497 | + <condition value="560,561,562,563,564,565,566,569"> | |
498 | + <output name="pos" value="BW" /> | |
499 | + </condition> | |
500 | + <condition value="590,591,592,593,594,595,596,599"> | |
501 | + <output name="pos" value="BW" /> | |
502 | + <output name="feat.probleemgeval" /> | |
503 | + </condition> | |
504 | + | |
505 | + <condition value="600,601,602,603,604,605,606,609"> | |
506 | + <output name="pos" value="BW" /> | |
507 | + </condition> | |
508 | + <condition value="610,611,612,613,614,615,616,619"> | |
509 | + <output name="pos" value="BW" /> | |
510 | + </condition> | |
511 | + <condition value="620,621,622,623,624,625,626,629"> | |
512 | + <output name="pos" value="BW" /> | |
513 | + </condition> | |
514 | + <condition value="630,631,632,633,634,635,636,639"> | |
515 | + <output name="pos" value="BW" /> | |
516 | + </condition> | |
517 | + <condition value="640,641,642,643,644,645,646,649"> | |
518 | + <output name="pos" value="BW" /> | |
519 | + </condition> | |
520 | + <condition value="650,651,652,653,654,655,656,659"> | |
521 | + <output name="pos" value="BW" /> | |
522 | + </condition> | |
523 | + <condition value="690,691,692,693,694,695,696,699"> | |
524 | + <output name="pos" value="BW" /> | |
525 | + <output name="feat.probleemgeval" /> | |
526 | + </condition> | |
527 | + | |
528 | + <condition value="700,701,702,703,704,705,706,709"> | |
529 | + <output name="pos" value="VZ" /> | |
530 | + </condition> | |
531 | + <condition value="790,791,792,793,794,795,796,799"> | |
532 | + <output name="pos" value="VZ" /> | |
533 | + </condition> | |
534 | + | |
535 | + <condition value="800,801,802,803,804,805,806,809"> | |
536 | + <output name="pos" value="VG" /> | |
537 | + </condition> | |
538 | + <condition value="810,811,812,813,814,815,816,819"> | |
539 | + <output name="pos" value="VG" /> | |
540 | + </condition> | |
541 | + <condition value="820,821,822,823,824,825,826,829"> | |
542 | + <output name="pos" value="VG" /> | |
543 | + </condition> | |
544 | + <condition value="830,831,832,833,834,835,836,839"> | |
545 | + <output name="pos" value="VG" /> | |
546 | + </condition> | |
547 | + <condition value="840,841,842,843,844,845,846,849"> | |
548 | + <output name="pos" value="VG" /> | |
549 | + </condition> | |
550 | + <condition value="850,851,852,853,854,855,856,859"> | |
551 | + <output name="pos" value="VG" /> | |
552 | + </condition> | |
553 | + <condition value="860,861,862,863,864,865,866,869"> | |
554 | + <output name="pos" value="VG" /> | |
555 | + </condition> | |
556 | + <condition value="870,871,872,873,874,875,876,879"> | |
557 | + <output name="pos" value="VG" /> | |
558 | + </condition> | |
559 | + <condition value="880,881,882,883,884,885,886,889"> | |
560 | + <output name="pos" value="VG" /> | |
561 | + </condition> | |
562 | + <condition value="890,891,892,893,894,895,896,899"> | |
563 | + <output name="pos" value="VG" /> | |
564 | + <output name="feat.probleemgeval" /> | |
565 | + </condition> | |
566 | + | |
567 | + <condition value="900,901,902,903,904,905,906,909"> | |
568 | + <output name="feat.probleemgeval" /> | |
569 | + </condition> | |
570 | + <condition value="900,901,902,903,904,905,906,909"> | |
571 | + <output name="feat.probleemgeval" /> | |
572 | + </condition> | |
573 | + <condition value="990,991,992,993,994,995,996,999"> | |
574 | + <output name="feat.probleemgeval" /> | |
575 | + </condition> | |
576 | + | |
577 | + <condition | |
578 | + value="001,011,021,091,101,111,191,201,211,221,231,241,251,261,271,281,291,301,311,321,391,411,421,431,441,451,471,481,491,501,511,521,531,541,551,561,591,601,611,621,631,641,651,691,701,791,801,811,821,831,841,851,861,871,881,891,901,911,991"> | |
579 | + <output name="feat.form" value="-e" /> | |
580 | + </condition> | |
581 | + <condition | |
582 | + value="002,012,022,092,102,112,192,202,212,222,232,242,252,262,272,282,292,302,312,322,392,412,422,432,442,452,472,482,492,502,512,522,532,542,552,562,592,602,612,622,632,642,652,692,702,792,802,812,822,832,842,852,862,872,882,892,902,912,992"> | |
583 | + <output name="feat.form" value="-s/-th" /> | |
584 | + </condition> | |
585 | + <condition | |
586 | + value="003,013,023,093,103,113,193,203,213,223,233,243,253,263,273,283,293,303,313,323,393,413,423,433,443,453,473,483,493,503,513,523,533,543,553,563,593,603,613,623,633,643,653,693,703,793,803,813,823,833,843,853,863,873,883,893,903,913,993"> | |
587 | + <output name="feat.form" value="-t" /> | |
588 | + </condition> | |
589 | + <condition | |
590 | + value="004,014,024,094,104,114,194,204,214,224,234,244,254,264,274,284,294,304,314,324,394,414,424,434,444,454,474,484,494,504,514,524,534,544,554,564,594,604,614,624,634,644,654,694,704,794,804,814,824,834,844,854,864,874,884,894,904,914,994"> | |
591 | + <output name="feat.form" value="-n" /> | |
592 | + </condition> | |
593 | + <condition | |
594 | + value="005,015,025,095,105,115,195,205,215,225,235,245,255,265,275,285,295,305,315,325,395,415,425,435,445,455,475,485,495,505,515,525,535,545,555,565,595,605,615,625,635,645,655,695,705,795,805,815,825,835,845,855,865,875,885,895,905,915,995"> | |
595 | + <output name="feat.form" value="-r/-re" /> | |
596 | + </condition> | |
597 | + <condition | |
598 | + value="006,016,026,096,106,116,196,206,216,226,236,246,256,266,276,286,296,306,316,326,396,416,426,436,446,456,476,486,496,506,516,526,536,546,556,566,596,606,616,626,636,646,656,696,706,796,806,816,826,836,846,856,866,876,886,896,906,916,996"> | |
599 | + <output name="feat.form" value="-a" /> | |
600 | + </condition> | |
601 | + <condition value="009,019,029,099"> | |
602 | + <output name="feat.form" value="unclear" /> | |
603 | + </condition> | |
604 | + | |
605 | + </function> | |
606 | + </functions> | |
607 | + | |
608 | + </parser> | |
609 | + <!-- END CONFIGURATION MTAS FOLIA PARSER --> | |
610 | + | |
611 | + | |
612 | +</mtas> | |
0 | 613 | \ No newline at end of file |
... | ... |
conf/parser/mtas/elan_mks.xml
... | ... | @@ -17,9 +17,10 @@ |
17 | 17 | <!-- START CONFIGURATION MTAS FOLIA PARSER --> |
18 | 18 | <parser name="mtas.analysis.parser.MtasElanParser"> |
19 | 19 | |
20 | - <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | |
20 | + <!-- START GENERAL SETTINGS MTAS PARSER --> | |
21 | 21 | <autorepair value="true" /> |
22 | - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
22 | + <makeunique value="true" /> | |
23 | + <!-- END GENERAL SETTINGS MTAS PARSER --> | |
23 | 24 | |
24 | 25 | <!-- START REFERENCES --> |
25 | 26 | <references> |
... | ... |
conf/parser/mtas/folia_dbnl.xml
... | ... | @@ -19,7 +19,8 @@ |
19 | 19 | |
20 | 20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
21 | 21 | <autorepair value="true" /> |
22 | - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
22 | + <makeunique value="true" /> | |
23 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
23 | 24 | |
24 | 25 | <!-- START REFERENCES --> |
25 | 26 | <references> |
... | ... |
conf/parser/mtas/folia_ddd.xml
0 โ 100644
1 | +<?xml version="1.0" encoding="UTF-8" ?> | |
2 | +<mtas> | |
3 | + | |
4 | + <!-- START MTAS INDEX CONFIGURATION --> | |
5 | + <index> | |
6 | + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS --> | |
7 | + <payload index="false" /> | |
8 | + <offset index="false" /> | |
9 | + <realoffset index="false" /> | |
10 | + <parent index="true" /> | |
11 | + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS --> | |
12 | + </index> | |
13 | + <!-- END MTAS INDEX CONFIGURATION --> | |
14 | + | |
15 | + | |
16 | + | |
17 | + <!-- START CONFIGURATION MTAS FOLIA PARSER --> | |
18 | + <parser name="mtas.analysis.parser.MtasFoliaParser"> | |
19 | + | |
20 | + <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | |
21 | + <autorepair value="true" /> | |
22 | + <makeunique value="true" /> | |
23 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
24 | + | |
25 | + <!-- START REFERENCES --> | |
26 | + <references> | |
27 | + <reference name="wref" ref="id" /> | |
28 | + </references> | |
29 | + <!-- END REFERENCES --> | |
30 | + | |
31 | + <!-- START MAPPINGS --> | |
32 | + <mappings> | |
33 | + | |
34 | + <!-- START WORDS --> | |
35 | + <mapping type="word" name="w"> | |
36 | + </mapping> | |
37 | + <mapping type="word" name="w"> | |
38 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
39 | + <pre> | |
40 | + <item type="name" /> | |
41 | + </pre> | |
42 | + <post> | |
43 | + <item type="attribute" name="class" /> | |
44 | + </post> | |
45 | + </token> | |
46 | + <condition> | |
47 | + <item type="attribute" name="class" /> | |
48 | + <item type="attribute" name="class" not="true" condition="WORD" /> | |
49 | + </condition> | |
50 | + </mapping> | |
51 | + <!-- END WORDS --> | |
52 | + | |
53 | + <!-- START WORD ANNOTATIONS --> | |
54 | + <mapping type="wordAnnotation" name="t"> | |
55 | + <token type="string" offset="false"> | |
56 | + <pre> | |
57 | + <item type="name" /> | |
58 | + </pre> | |
59 | + <post> | |
60 | + <item type="text" /> | |
61 | + </post> | |
62 | + </token> | |
63 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
64 | + <pre> | |
65 | + <item type="name" /> | |
66 | + <item type="string" value="_lc" /> | |
67 | + </pre> | |
68 | + <post> | |
69 | + <item type="text" filter="ascii,lowercase" /> | |
70 | + </post> | |
71 | + </token> | |
72 | + <condition> | |
73 | + <item type="ancestor" number="0" /> | |
74 | + <item type="ancestorWord" number="1" /> | |
75 | + <item type="unknownAncestor" number="0" /> | |
76 | + </condition> | |
77 | + </mapping> | |
78 | + <!-- END WORD ANNOTATIONS --> | |
79 | + | |
80 | + <!-- START RELATIONS --> | |
81 | + <!-- END RELATIONS --> | |
82 | + | |
83 | + <!-- START GROUPS --> | |
84 | + <mapping type="group" name="s"> | |
85 | + <token type="string" offset="false"> | |
86 | + <pre> | |
87 | + <item type="name" /> | |
88 | + </pre> | |
89 | + <post> | |
90 | + <item type="attribute" name="class" /> | |
91 | + </post> | |
92 | + </token> | |
93 | + </mapping> | |
94 | + <mapping type="group" name="p"> | |
95 | + <token type="string" offset="false"> | |
96 | + <pre> | |
97 | + <item type="name" /> | |
98 | + </pre> | |
99 | + <post> | |
100 | + <item type="attribute" name="class" /> | |
101 | + </post> | |
102 | + </token> | |
103 | + </mapping> | |
104 | + <mapping type="group" name="div"> | |
105 | + <token type="string" offset="false"> | |
106 | + <pre> | |
107 | + <item type="name" /> | |
108 | + </pre> | |
109 | + <post> | |
110 | + <item type="attribute" name="class" /> | |
111 | + </post> | |
112 | + </token> | |
113 | + </mapping> | |
114 | + <mapping type="group" name="head"> | |
115 | + <token type="string" offset="false"> | |
116 | + <pre> | |
117 | + <item type="name" /> | |
118 | + </pre> | |
119 | + <post> | |
120 | + <item type="attribute" name="class" /> | |
121 | + </post> | |
122 | + </token> | |
123 | + </mapping> | |
124 | + <!-- END GROUPS --> | |
125 | + | |
126 | + <!-- START GROUP ANNOTATIONS --> | |
127 | + <mapping type="groupAnnotation" name="lang"> | |
128 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
129 | + <pre> | |
130 | + <item type="name" /> | |
131 | + </pre> | |
132 | + <post> | |
133 | + <item type="attribute" name="class" /> | |
134 | + </post> | |
135 | + </token> | |
136 | + </mapping> | |
137 | + <!-- END GROUP ANNOTATIONS --> | |
138 | + | |
139 | + </mappings> | |
140 | + <!-- END MAPPINGS --> | |
141 | + | |
142 | + </parser> | |
143 | + <!-- END CONFIGURATION MTAS FOLIA PARSER --> | |
144 | + | |
145 | + | |
146 | +</mtas> | |
0 | 147 | \ No newline at end of file |
... | ... |
conf/parser/mtas/folia_edbo.xml
... | ... | @@ -17,6 +17,7 @@ |
17 | 17 | |
18 | 18 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
19 | 19 | <autorepair value="true" /> |
20 | + <makeunique value="true" /> | |
20 | 21 | <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> |
21 | 22 | |
22 | 23 | <!-- START REFERENCES --> |
... | ... | @@ -72,30 +73,6 @@ |
72 | 73 | <item type="unknownAncestor" number="0" /> |
73 | 74 | </condition> |
74 | 75 | </mapping> |
75 | - <mapping type="wordAnnotation" name="aref"> | |
76 | - <token type="string" offset="false"> | |
77 | - <pre> | |
78 | - <item type="string" value="translated.t" /> | |
79 | - </pre> | |
80 | - <post> | |
81 | - <item type="attribute" name="t" /> | |
82 | - </post> | |
83 | - </token> | |
84 | - <token type="string" offset="false" realoffset="false" parent="false"> | |
85 | - <pre> | |
86 | - <item type="string" value="translated.t" /> | |
87 | - <item type="string" value="_lc" /> | |
88 | - </pre> | |
89 | - <post> | |
90 | - <item type="attribute" name="t" filter="ascii,lowercase" /> | |
91 | - </post> | |
92 | - </token> | |
93 | - <condition> | |
94 | - <item type="ancestor" number="0" /> | |
95 | - <item type="ancestorWord" number="1" /> | |
96 | - <item type="unknownAncestor" number="1" /> | |
97 | - </condition> | |
98 | - </mapping> | |
99 | 76 | <mapping type="wordAnnotation" name="lemma"> |
100 | 77 | <token type="string" offset="false" realoffset="false" parent="false"> |
101 | 78 | <pre> |
... | ... | @@ -109,24 +86,6 @@ |
109 | 86 | <item type="attribute" name="class" /> |
110 | 87 | <item type="ancestor" number="0" /> |
111 | 88 | <item type="unknownAncestor" number="0" /> |
112 | - <item type="attribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mblem-nl" /> | |
113 | - </condition> | |
114 | - </mapping> | |
115 | - <mapping type="wordAnnotation" name="lemma"> | |
116 | - <token type="string" offset="false" realoffset="false" parent="false"> | |
117 | - <pre> | |
118 | - <item type="string" value="translated." /> | |
119 | - <item type="name" /> | |
120 | - </pre> | |
121 | - <post> | |
122 | - <item type="attribute" name="class" /> | |
123 | - </post> | |
124 | - </token> | |
125 | - <condition> | |
126 | - <item type="attribute" name="class" /> | |
127 | - <item type="ancestor" number="0" /> | |
128 | - <item type="unknownAncestor" number="1" /> | |
129 | - <item type="attribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mblem-nl" /> | |
130 | 89 | </condition> |
131 | 90 | </mapping> |
132 | 91 | <mapping type="wordAnnotation" name="morphology"> |
... | ... | @@ -166,54 +125,11 @@ |
166 | 125 | <item type="ancestor" number="0" /> |
167 | 126 | <item type="unknownAncestor" number="0" /> |
168 | 127 | <item type="attribute" name="class" /> |
169 | - <item type="attribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" /> | |
170 | - </condition> | |
171 | - </mapping> | |
172 | - <mapping type="wordAnnotation" name="pos"> | |
173 | - <token type="string" offset="false" realoffset="false" parent="false"> | |
174 | - <pre> | |
175 | - <item type="string" value="translated." /> | |
176 | - <item type="name" /> | |
177 | - </pre> | |
178 | - <post> | |
179 | - <item type="attribute" name="head" /> | |
180 | - </post> | |
181 | - <payload> | |
182 | - <item type="attribute" name="confidence" /> | |
183 | - </payload> | |
184 | - </token> | |
185 | - <condition> | |
186 | - <item type="ancestor" number="0" /> | |
187 | - <item type="unknownAncestor" number="1" /> | |
188 | - <item type="attribute" name="class" /> | |
189 | - <item type="attribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" /> | |
190 | - </condition> | |
191 | - </mapping> | |
192 | - <mapping type="wordAnnotation" name="feat"> | |
193 | - <token type="string" offset="false" realoffset="false" parent="false"> | |
194 | - <pre> | |
195 | - <item type="name" /> | |
196 | - <item type="attribute" name="subset" prefix="." /> | |
197 | - </pre> | |
198 | - <post> | |
199 | - <item type="attribute" name="class" /> | |
200 | - </post> | |
201 | - <payload> | |
202 | - <item type="ancestorAttribute" distance="0" name="confidence" /> | |
203 | - </payload> | |
204 | - </token> | |
205 | - <condition> | |
206 | - <item type="ancestor" number="1" /> | |
207 | - <item type="unknownAncestor" number="0" /> | |
208 | - <item type="attribute" name="class" /> | |
209 | - <item type="attribute" name="subset" /> | |
210 | - <item type="ancestorAttribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" /> | |
211 | 128 | </condition> |
212 | 129 | </mapping> |
213 | 130 | <mapping type="wordAnnotation" name="feat"> |
214 | 131 | <token type="string" offset="false" realoffset="false" parent="false"> |
215 | 132 | <pre> |
216 | - <item type="string" value="translated." /> | |
217 | 133 | <item type="name" /> |
218 | 134 | <item type="attribute" name="subset" prefix="." /> |
219 | 135 | </pre> |
... | ... | @@ -229,7 +145,6 @@ |
229 | 145 | <item type="unknownAncestor" number="0" /> |
230 | 146 | <item type="attribute" name="class" /> |
231 | 147 | <item type="attribute" name="subset" /> |
232 | - <item type="ancestorAttribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" /> | |
233 | 148 | </condition> |
234 | 149 | </mapping> |
235 | 150 | <!-- END WORD ANNOTATIONS --> |
... | ... |
conf/parser/mtas/folia_mimore.xml
... | ... | @@ -18,7 +18,8 @@ |
18 | 18 | |
19 | 19 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
20 | 20 | <autorepair value="false" /> |
21 | - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
21 | + <makeunique value="true" /> | |
22 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
22 | 23 | |
23 | 24 | <!-- START REFERENCES --> |
24 | 25 | <references> |
... | ... |
conf/parser/mtas/folia_mtas.xml
... | ... | @@ -19,7 +19,8 @@ |
19 | 19 | |
20 | 20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
21 | 21 | <autorepair value="true" /> |
22 | - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
22 | + <makeunique value="true" /> | |
23 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
23 | 24 | |
24 | 25 | <!-- START REFERENCES --> |
25 | 26 | <references> |
... | ... |
conf/parser/mtas/folia_oeaw.xml
0 โ 100644
1 | +<?xml version="1.0" encoding="UTF-8" ?> | |
2 | +<mtas> | |
3 | + | |
4 | + <!-- START MTAS INDEX CONFIGURATION --> | |
5 | + <index> | |
6 | + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS --> | |
7 | + <payload index="false" /> | |
8 | + <offset index="false" /> | |
9 | + <realoffset index="false" /> | |
10 | + <parent index="true" /> | |
11 | + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS --> | |
12 | + </index> | |
13 | + <!-- END MTAS INDEX CONFIGURATION --> | |
14 | + | |
15 | + | |
16 | + | |
17 | + <!-- START CONFIGURATION MTAS FOLIA PARSER --> | |
18 | + <parser name="mtas.analysis.parser.MtasFoliaParser"> | |
19 | + | |
20 | + <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | |
21 | + <autorepair value="true" /> | |
22 | + <makeunique value="true" /> | |
23 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
24 | + | |
25 | + <!-- START REFERENCES --> | |
26 | + <references> | |
27 | + <reference name="wref" ref="id" /> | |
28 | + </references> | |
29 | + <!-- END REFERENCES --> | |
30 | + | |
31 | + <!-- START MAPPINGS --> | |
32 | + <mappings> | |
33 | + | |
34 | + <!-- START WORDS --> | |
35 | + <mapping type="word" name="w"> | |
36 | + </mapping> | |
37 | + <mapping type="word" name="w"> | |
38 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
39 | + <pre> | |
40 | + <item type="name" /> | |
41 | + </pre> | |
42 | + <post> | |
43 | + <item type="attribute" name="class" /> | |
44 | + </post> | |
45 | + </token> | |
46 | + <condition> | |
47 | + <item type="attribute" name="class" /> | |
48 | + <item type="attribute" name="class" not="true" condition="WORD" /> | |
49 | + </condition> | |
50 | + </mapping> | |
51 | + <!-- END WORDS --> | |
52 | + | |
53 | + <!-- START WORD ANNOTATIONS --> | |
54 | + <mapping type="wordAnnotation" name="t"> | |
55 | + <token type="string" offset="false"> | |
56 | + <pre> | |
57 | + <item type="name" /> | |
58 | + </pre> | |
59 | + <post> | |
60 | + <item type="text" /> | |
61 | + </post> | |
62 | + </token> | |
63 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
64 | + <pre> | |
65 | + <item type="name" /> | |
66 | + <item type="string" value="_lc" /> | |
67 | + </pre> | |
68 | + <post> | |
69 | + <item type="text" filter="ascii,lowercase" /> | |
70 | + </post> | |
71 | + </token> | |
72 | + <condition> | |
73 | + <item type="ancestor" number="0" /> | |
74 | + <item type="ancestorWord" number="1" /> | |
75 | + <item type="unknownAncestor" number="0" /> | |
76 | + </condition> | |
77 | + </mapping> | |
78 | + <mapping type="wordAnnotation" name="lemma"> | |
79 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
80 | + <pre> | |
81 | + <item type="name" /> | |
82 | + </pre> | |
83 | + <post> | |
84 | + <item type="attribute" name="class" /> | |
85 | + </post> | |
86 | + </token> | |
87 | + <condition> | |
88 | + <item type="attribute" name="class" /> | |
89 | + <item type="ancestor" number="0" /> | |
90 | + <item type="unknownAncestor" number="0" /> | |
91 | + </condition> | |
92 | + </mapping> | |
93 | + <mapping type="wordAnnotation" name="pos"> | |
94 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
95 | + <pre> | |
96 | + <item type="attribute" name="set" /> | |
97 | + </pre> | |
98 | + <post> | |
99 | + <item type="attribute" name="head" /> | |
100 | + </post> | |
101 | + </token> | |
102 | + <condition> | |
103 | + <item type="ancestor" number="0" /> | |
104 | + <item type="unknownAncestor" number="0" /> | |
105 | + <item type="attribute" name="class" /> | |
106 | + <item type="attribute" name="set" /> | |
107 | + </condition> | |
108 | + </mapping> | |
109 | + <mapping type="wordAnnotation" name="feat"> | |
110 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
111 | + <pre> | |
112 | + <item type="name" /> | |
113 | + <item type="attribute" name="subset" prefix="." /> | |
114 | + </pre> | |
115 | + <post> | |
116 | + <item type="attribute" name="class" /> | |
117 | + </post> | |
118 | + </token> | |
119 | + <condition> | |
120 | + <item type="ancestor" number="1" /> | |
121 | + <item type="unknownAncestor" number="0" /> | |
122 | + <item type="attribute" name="class" /> | |
123 | + <item type="attribute" name="subset" /> | |
124 | + </condition> | |
125 | + </mapping> | |
126 | + <!-- END WORD ANNOTATIONS --> | |
127 | + | |
128 | + <!-- START RELATIONS --> | |
129 | + <mapping type="relation" name="entities"> | |
130 | + </mapping> | |
131 | + <mapping type="relation" name="entity"> | |
132 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
133 | + <pre> | |
134 | + <item type="name" /> | |
135 | + </pre> | |
136 | + <post> | |
137 | + <item type="attribute" name="class" /> | |
138 | + </post> | |
139 | + </token> | |
140 | + <condition> | |
141 | + <item type="ancestor" number="1" /> | |
142 | + <item type="ancestorName" condition="entities" /> | |
143 | + </condition> | |
144 | + </mapping> | |
145 | + <!-- END RELATIONS --> | |
146 | + | |
147 | + <!-- START RELATION ANNOTATIONS --> | |
148 | + <mapping type="relationAnnotation" name="feat"> | |
149 | + <token type="string" offset="false" realoffset="false"> | |
150 | + <pre> | |
151 | + <item type="ancestorRelationName" /> | |
152 | + <item type="name" prefix="." /> | |
153 | + <item type="attribute" name="subset" prefix="." /> | |
154 | + </pre> | |
155 | + <post> | |
156 | + <item type="attribute" name="class" /> | |
157 | + </post> | |
158 | + </token> | |
159 | + </mapping> | |
160 | + <!-- END RELATION ANNOTATIONS --> | |
161 | + | |
162 | + <!-- START GROUPS --> | |
163 | + <mapping type="group" name="s"> | |
164 | + <token type="string" offset="false"> | |
165 | + <pre> | |
166 | + <item type="name" /> | |
167 | + </pre> | |
168 | + <post> | |
169 | + <item type="attribute" name="class" /> | |
170 | + </post> | |
171 | + </token> | |
172 | + </mapping> | |
173 | + <mapping type="group" name="p"> | |
174 | + <token type="string" offset="false"> | |
175 | + <pre> | |
176 | + <item type="name" /> | |
177 | + </pre> | |
178 | + <post> | |
179 | + <item type="attribute" name="class" /> | |
180 | + </post> | |
181 | + </token> | |
182 | + </mapping> | |
183 | + <mapping type="group" name="div"> | |
184 | + <token type="string" offset="false"> | |
185 | + <pre> | |
186 | + <item type="name" /> | |
187 | + </pre> | |
188 | + <post> | |
189 | + <item type="attribute" name="class" /> | |
190 | + </post> | |
191 | + </token> | |
192 | + </mapping> | |
193 | + <!-- END GROUPS --> | |
194 | + | |
195 | + <!-- START GROUP ANNOTATIONS --> | |
196 | + <!-- END GROUP ANNOTATIONS --> | |
197 | + | |
198 | + </mappings> | |
199 | + <!-- END MAPPINGS --> | |
200 | + | |
201 | + </parser> | |
202 | + <!-- END CONFIGURATION MTAS FOLIA PARSER --> | |
203 | + | |
204 | + | |
205 | +</mtas> | |
0 | 206 | \ No newline at end of file |
... | ... |
conf/parser/mtas/folia_sonar.xml
conf/parser/mtas/folia_test.xml
... | ... | @@ -19,7 +19,8 @@ |
19 | 19 | |
20 | 20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
21 | 21 | <autorepair value="true" /> |
22 | - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
22 | + <makeunique value="true" /> | |
23 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
23 | 24 | |
24 | 25 | <!-- START REFERENCES --> |
25 | 26 | <references> |
... | ... |
conf/parser/mtas/sketch_acdh.xml
... | ... | @@ -18,7 +18,8 @@ |
18 | 18 | <parser name="mtas.analysis.parser.MtasSketchParser"> |
19 | 19 | <!-- START GENERAL SETTINGS MTAS SKETCH PARSER --> |
20 | 20 | <autorepair value="true" /> |
21 | - <!-- END GENERAL SETTINGS MTAS SKETCH PARSER --> | |
21 | + <makeunique value="true" /> | |
22 | + <!-- END GENERAL SETTINGS MTAS SKETCH PARSER --> | |
22 | 23 | |
23 | 24 | <mappings> |
24 | 25 | |
... | ... |
conf/parser/mtas/tei_test.xml
... | ... | @@ -19,7 +19,8 @@ |
19 | 19 | |
20 | 20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
21 | 21 | <autorepair value="true" /> |
22 | - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
22 | + <makeunique value="true" /> | |
23 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
23 | 24 | |
24 | 25 | <!-- START REFERENCES --> |
25 | 26 | <references> |
... | ... |
conf/parser/mtasSource.xml
0 โ 100644
1 | +<?xml version="1.0" encoding="UTF-8" ?> | |
2 | +<mtas> | |
3 | + <configurations type="mtas.analysis.util.MtasTokenizerFactory"> | |
4 | + <configuration name="EDBO" file="mtasSource/folia_edbo.xml" /> | |
5 | + </configurations> | |
6 | + <configurations type="mtas.analysis.util.MtasCharFilterFactory"> | |
7 | + <configuration name="EDBO" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> | |
8 | + </configurations> | |
9 | +</mtas> | |
... | ... |
conf/parser/mtasSource/folia_edbo.xml
0 โ 100644
1 | +<?xml version="1.0" encoding="UTF-8" ?> | |
2 | +<mtas> | |
3 | + | |
4 | + <!-- START MTAS INDEX CONFIGURATION --> | |
5 | + <index> | |
6 | + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS --> | |
7 | + <payload index="false" /> | |
8 | + <offset index="false" /> | |
9 | + <realoffset index="false" /> | |
10 | + <parent index="true" /> | |
11 | + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS --> | |
12 | + </index> | |
13 | + <!-- END MTAS INDEX CONFIGURATION --> | |
14 | + | |
15 | + <!-- START CONFIGURATION MTAS FOLIA PARSER --> | |
16 | + <parser name="mtas.analysis.parser.MtasFoliaParser"> | |
17 | + | |
18 | + <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | |
19 | + <autorepair value="true" /> | |
20 | + <makeunique value="true" /> | |
21 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
22 | + | |
23 | + <!-- START REFERENCES --> | |
24 | + <references> | |
25 | + </references> | |
26 | + <!-- END REFERENCES --> | |
27 | + | |
28 | + <!-- START MAPPINGS --> | |
29 | + <mappings> | |
30 | + | |
31 | + <!-- START WORDS --> | |
32 | + <mapping type="word" name="str"> | |
33 | + </mapping> | |
34 | + <!-- END WORDS --> | |
35 | + | |
36 | + <!-- START WORD ANNOTATIONS --> | |
37 | + <mapping type="wordAnnotation" name="t"> | |
38 | + <token type="string" offset="false"> | |
39 | + <pre> | |
40 | + <item type="name" /> | |
41 | + </pre> | |
42 | + <post> | |
43 | + <item type="text" /> | |
44 | + </post> | |
45 | + </token> | |
46 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
47 | + <pre> | |
48 | + <item type="name" /> | |
49 | + <item type="string" value="_lc" /> | |
50 | + </pre> | |
51 | + <post> | |
52 | + <item type="text" filter="ascii,lowercase" /> | |
53 | + </post> | |
54 | + </token> | |
55 | + <condition> | |
56 | + <item type="ancestor" number="0" /> | |
57 | + <item type="ancestorWord" number="1" /> | |
58 | + <item type="unknownAncestor" number="0" /> | |
59 | + <item type="attribute" name="class" condition="Ticcl"/> | |
60 | + </condition> | |
61 | + </mapping> | |
62 | + <mapping type="wordAnnotation" name="correction"> | |
63 | + </mapping> | |
64 | + <mapping type="wordAnnotation" name="new"> | |
65 | + </mapping> | |
66 | + <mapping type="wordAnnotation" name="original"> | |
67 | + </mapping> | |
68 | + <mapping type="wordAnnotation" name="suggestion"> | |
69 | + </mapping> | |
70 | + <mapping type="wordAnnotation" name="t"> | |
71 | + <token type="string" offset="false"> | |
72 | + <pre> | |
73 | + <item type="name" /> | |
74 | + </pre> | |
75 | + <post> | |
76 | + <item type="text" /> | |
77 | + </post> | |
78 | + </token> | |
79 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
80 | + <pre> | |
81 | + <item type="name" /> | |
82 | + <item type="string" value="_lc" /> | |
83 | + </pre> | |
84 | + <post> | |
85 | + <item type="text" filter="ascii,lowercase" /> | |
86 | + </post> | |
87 | + </token> | |
88 | + <condition> | |
89 | + <item type="ancestor" number="2" /> | |
90 | + <item type="ancestorName" condition="new" /> | |
91 | + <item type="unknownAncestor" number="0" /> | |
92 | + <item type="attribute" name="class" condition="Ticcl"/> | |
93 | + </condition> | |
94 | + </mapping> | |
95 | + <mapping type="wordAnnotation" name="t"> | |
96 | + <token type="string" offset="false"> | |
97 | + <pre> | |
98 | + <item type="name" /> | |
99 | + <item type="ancestorName" prefix="."/> | |
100 | + </pre> | |
101 | + <post> | |
102 | + <item type="text" /> | |
103 | + </post> | |
104 | + </token> | |
105 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
106 | + <pre> | |
107 | + <item type="name" /> | |
108 | + <item type="string" value="_lc" /> | |
109 | + <item type="ancestorName" prefix="."/> | |
110 | + </pre> | |
111 | + <post> | |
112 | + <item type="text" filter="ascii,lowercase" /> | |
113 | + </post> | |
114 | + </token> | |
115 | + <condition> | |
116 | + <item type="ancestor" number="2" /> | |
117 | + <item type="ancestorName" condition="original" /> | |
118 | + <item type="unknownAncestor" number="0" /> | |
119 | + </condition> | |
120 | + </mapping> | |
121 | + <mapping type="wordAnnotation" name="t"> | |
122 | + <token type="string" offset="false"> | |
123 | + <pre> | |
124 | + <item type="name" /> | |
125 | + <item type="ancestorName" prefix="."/> | |
126 | + </pre> | |
127 | + <post> | |
128 | + <item type="text" /> | |
129 | + </post> | |
130 | + </token> | |
131 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
132 | + <pre> | |
133 | + <item type="name" /> | |
134 | + <item type="string" value="_lc" /> | |
135 | + <item type="ancestorName" prefix="."/> | |
136 | + </pre> | |
137 | + <post> | |
138 | + <item type="text" filter="ascii,lowercase" /> | |
139 | + </post> | |
140 | + </token> | |
141 | + <condition> | |
142 | + <item type="ancestor" number="2" /> | |
143 | + <item type="ancestorName" condition="suggestion" /> | |
144 | + <item type="unknownAncestor" number="0" /> | |
145 | + </condition> | |
146 | + </mapping> | |
147 | + <!-- END WORD ANNOTATIONS --> | |
148 | + | |
149 | + <!-- START RELATIONS --> | |
150 | + <!-- END RELATIONS --> | |
151 | + | |
152 | + <!-- START GROUPS --> | |
153 | + <mapping type="group" name="p"> | |
154 | + <token type="string" offset="false"> | |
155 | + <pre> | |
156 | + <item type="name" /> | |
157 | + </pre> | |
158 | + <post> | |
159 | + <item type="attribute" name="class" /> | |
160 | + </post> | |
161 | + </token> | |
162 | + </mapping> | |
163 | + <mapping type="group" name="div"> | |
164 | + <token type="string" offset="false"> | |
165 | + <pre> | |
166 | + <item type="name" /> | |
167 | + </pre> | |
168 | + <post> | |
169 | + <item type="attribute" name="class" /> | |
170 | + </post> | |
171 | + </token> | |
172 | + </mapping> | |
173 | + <mapping type="group" name="head"> | |
174 | + <token type="string" offset="false"> | |
175 | + <pre> | |
176 | + <item type="name" /> | |
177 | + </pre> | |
178 | + <post> | |
179 | + <item type="attribute" name="class" /> | |
180 | + </post> | |
181 | + </token> | |
182 | + </mapping> | |
183 | + <!-- END GROUPS --> | |
184 | + | |
185 | + <!-- START GROUP ANNOTATIONS --> | |
186 | + <mapping type="groupAnnotation" name="lang"> | |
187 | + <token type="string" offset="false" realoffset="false" parent="false"> | |
188 | + <pre> | |
189 | + <item type="name" /> | |
190 | + </pre> | |
191 | + <post> | |
192 | + <item type="attribute" name="class" /> | |
193 | + </post> | |
194 | + </token> | |
195 | + </mapping> | |
196 | + <!-- END GROUP ANNOTATIONS --> | |
197 | + | |
198 | + </mappings> | |
199 | + <!-- END MAPPINGS --> | |
200 | + | |
201 | + </parser> | |
202 | + <!-- END CONFIGURATION MTAS FOLIA PARSER --> | |
203 | + | |
204 | +</mtas> | |
0 | 205 | \ No newline at end of file |
... | ... |
conf/solr/schemaNederlab.xml
... | ... | @@ -255,8 +255,8 @@ |
255 | 255 | <field name="NLContent_folia_available" type="nederlab_boolean" |
256 | 256 | required="false" multiValued="false" indexed="true" stored="true" /> |
257 | 257 | <field name="NLContent_mtas" type="mtas_text" indexed="true" |
258 | - stored="true" /> | |
259 | - <field name="NLContent_mtas_error" type="nederlab_string" | |
258 | + stored="true" /> | |
259 | + <field name="NLContent_mtas_error" type="nederlab_string" | |
260 | 260 | indexed="true" stored="true" /> |
261 | 261 | <field name="NLContent_mtas_numberOfTokens" type="nederlab_int" |
262 | 262 | indexed="true" stored="true" /> |
... | ... | @@ -264,7 +264,17 @@ |
264 | 264 | indexed="true" stored="true" /> |
265 | 265 | <field name="NLContent_mtas_size" type="nederlab_int" indexed="true" |
266 | 266 | stored="true" /> |
267 | - <!-- Combined Field Metadata --> | |
267 | + <field name="NLContent_mtasSource" type="mtasSource_text" indexed="true" | |
268 | + stored="true" /> | |
269 | + <field name="NLContent_mtasSource_error" type="nederlab_string" | |
270 | + indexed="true" stored="true" /> | |
271 | + <field name="NLContent_mtasSource_numberOfTokens" type="nederlab_int" | |
272 | + indexed="true" stored="true" /> | |
273 | + <field name="NLContent_mtasSource_numberOfPositions" type="nederlab_int" | |
274 | + indexed="true" stored="true" /> | |
275 | + <field name="NLContent_mtasSource_size" type="nederlab_int" indexed="true" | |
276 | + stored="true" /> | |
277 | + <!-- Combined Field Metadata --> | |
268 | 278 | <field name="NLMetadata" type="nederlab_text" required="false" |
269 | 279 | multiValued="true" indexed="true" stored="false" /> |
270 | 280 | <copyField source="NLCore_NLIdentification_nederlabID" dest="NLMetadata" /> |
... | ... | @@ -420,5 +430,27 @@ |
420 | 430 | prefix="t" /> |
421 | 431 | </analyzer> |
422 | 432 | </fieldType> |
433 | + | |
434 | + <fieldType name="mtasSource_text_example_config" class="solr.TextField" | |
435 | + postingsFormat="MtasCodec"> | |
436 | + <analyzer type="index"> | |
437 | + <charFilter class="mtas.analysis.util.MtasCharFilterFactory" | |
438 | + config="mtasSource.xml" /> | |
439 | + <tokenizer class="mtas.analysis.util.MtasTokenizerFactory" | |
440 | + config="mtasSource.xml" /> | |
441 | + </analyzer> | |
442 | + </fieldType> | |
443 | + | |
444 | + <fieldType name="mtasSource_text" class="mtas.solr.schema.MtasPreAnalyzedField" | |
445 | + followIndexAnalyzer="mtasSource_text_example_config" | |
446 | + configurationFromField="NLCore_NLAdministrative_sourceCollection" setNumberOfTokens="NLContent_mtasSource_numberOfTokens" | |
447 | + setNumberOfPositions="NLContent_mtasSource_numberOfPositions" setSize="NLContent_mtasSource_size" | |
448 | + setError="NLContent_mtasSource_error" postingsFormat="MtasCodec"> | |
449 | + <analyzer type="query"> | |
450 | + <tokenizer class="solr.WhitespaceTokenizerFactory" /> | |
451 | + <filter class="mtas.analysis.util.MtasPrefixTokenFilterFactory" | |
452 | + prefix="t" /> | |
453 | + </analyzer> | |
454 | + </fieldType> | |
423 | 455 | |
424 | 456 | </schema> |
... | ... |
conf/solr/schemaOeaw.xml
0 โ 100644
1 | +<?xml version="1.0" encoding="UTF-8" ?> | |
2 | + | |
3 | +<schema name="nederlab" version="1.5"> | |
4 | + | |
5 | + <field name="_version_" type="nederlab_long" indexed="true" | |
6 | + stored="true" /> | |
7 | + | |
8 | + <!-- component Profile --> | |
9 | + <field name="NLProfile_name" type="nederlab_string" required="true" | |
10 | + multiValued="false" indexed="true" stored="true" /> | |
11 | + | |
12 | + <!-- component ResourceProxy --> | |
13 | + <field name="ResourceProxy_resourceRef" type="nederlab_string" | |
14 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
15 | + <dynamicField name="ResourceProxy_resourceRef_mimeType_*" | |
16 | + type="nederlab_string" required="false" multiValued="true" indexed="true" | |
17 | + stored="true" /> | |
18 | + | |
19 | + <!-- component NLCore --> | |
20 | + <field name="NLCore_NLIdentification_nederlabID" type="nederlab_uuid" | |
21 | + required="true" multiValued="false" indexed="true" stored="true" /> | |
22 | + <field name="NLCore_NLIdentification_editorialCode" type="nederlab_string" | |
23 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
24 | + <field name="NLCore_NLIdentification_versionID" type="nederlab_string" | |
25 | + required="true" multiValued="false" indexed="true" stored="true" /> | |
26 | + <field name="NLCore_NLIdentification_sourceRef" type="nederlab_string" | |
27 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
28 | + <field name="NLCore_NLIdentification_sourceUrl" type="nederlab_string" | |
29 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
30 | + <field name="NLCore_NLIdentification_sourceRefUrl_serialized" | |
31 | + type="nederlab_string" required="false" multiValued="true" indexed="false" | |
32 | + stored="true" /> | |
33 | + <field name="NLCore_NLAdministrative_ingestTime" type="nederlab_date" | |
34 | + required="true" multiValued="false" indexed="true" stored="true" /> | |
35 | + <field name="NLCore_NLAdministrative_expirationTime" type="nederlab_date" | |
36 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
37 | + <field name="NLCore_NLAdministrative_lastEditedBy" type="nederlab_string" | |
38 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
39 | + <field name="NLCore_NLAdministrative_modificationTime" type="nederlab_date" | |
40 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
41 | + <field name="NLCore_NLAdministrative_editorialNote" type="nederlab_text" | |
42 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
43 | + <field name="NLCore_NLAdministrative_sourceCollection" type="nederlab_string" | |
44 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
45 | + <field name="NLCore_NLAdministrative_isThesaurusElement" type="nederlab_boolean" | |
46 | + required="true" multiValued="false" indexed="true" stored="true" /> | |
47 | + <field name="NLCore_NLExternalReference_organizationName" type="nederlab_text" | |
48 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
49 | + <field name="NLCore_NLExternalReference_collectionName" type="nederlab_string" | |
50 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
51 | + <field name="NLCore_NLExternalReference_resourceRef" type="nederlab_string" | |
52 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
53 | + <field name="NLCore_NLExternalReference_serialized" type="nederlab_string" | |
54 | + required="false" multiValued="true" indexed="false" stored="true" /> | |
55 | + | |
56 | + <!-- component NLTitle --> | |
57 | + <field name="NLTitle_title" type="nederlab_text" required="false" | |
58 | + multiValued="false" indexed="true" stored="true" /> | |
59 | + <field name="NLTitle_subtitle" type="nederlab_text" required="false" | |
60 | + multiValued="false" indexed="true" stored="true" /> | |
61 | + <field name="NLTitle_genre" type="nederlab_string" required="false" | |
62 | + multiValued="true" indexed="true" stored="true" /> | |
63 | + <field name="NLTitle_category" type="nederlab_string" required="false" | |
64 | + multiValued="true" indexed="true" stored="true" /> | |
65 | + <field name="NLTitle_yearOfPublicationMin" type="nederlab_int" | |
66 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
67 | + <field name="NLTitle_yearOfPublicationMax" type="nederlab_int" | |
68 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
69 | + <field name="NLTitle_yearOfPublicationApprox" type="nederlab_boolean" | |
70 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
71 | + <field name="NLTitle_yearOfPublicationLabel" type="nederlab_text" | |
72 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
73 | + <field name="NLTitle_edition" type="nederlab_string" required="false" | |
74 | + multiValued="false" indexed="true" stored="true" /> | |
75 | + <field name="NLTitle_inNederlabAs" type="nederlab_uuid" required="false" | |
76 | + multiValued="false" indexed="true" stored="true" /> | |
77 | + <field name="NLTitle_NLPublicationPlace_placeOfPublication" type="nederlab_string" | |
78 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
79 | + <field name="NLTitle_NLPublicationPlace_placeID" type="nederlab_string" | |
80 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
81 | + <field name="NLTitle_NLPublicationPlace_placeOfPublicationOriginal" | |
82 | + type="nederlab_text" required="false" multiValued="true" indexed="true" | |
83 | + stored="true" /> | |
84 | + <field name="NLTitle_numberOfPages" type="nederlab_int" required="false" | |
85 | + multiValued="false" indexed="true" stored="true" /> | |
86 | + <field name="NLTitle_numberOfWords" type="nederlab_int" required="false" | |
87 | + multiValued="false" indexed="true" stored="true" /> | |
88 | + <field name="NLTitle_primaryLanguage" type="nederlab_string" | |
89 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
90 | + <field name="NLTitle_isTranslation" type="nederlab_boolean" | |
91 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
92 | + <field name="NLTitle_characterEncoding" type="nederlab_string" | |
93 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
94 | + <field name="NLTitle_codingStandard" type="nederlab_string" | |
95 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
96 | + <field name="NLTitle_textQuality" type="nederlab_text" required="false" | |
97 | + multiValued="false" indexed="true" stored="true" /> | |
98 | + <field name="NLTitle_processingMethod" type="nederlab_text" | |
99 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
100 | + <field name="NLTitle_autopsyPerformed" type="nederlab_boolean" | |
101 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
102 | + <field name="NLTitle_NLPersonRef_personID" type="nederlab_uuid" | |
103 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
104 | + <field name="NLTitle_NLPersonRef_role" type="nederlab_string" | |
105 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
106 | + <dynamicField name="NLTitle_NLPersonRef_personID_role_*" | |
107 | + type="nederlab_uuid" required="false" multiValued="true" indexed="true" | |
108 | + stored="true" /> | |
109 | + <field name="NLTitle_contains" type="nederlab_uuid" required="false" | |
110 | + multiValued="true" indexed="true" stored="true" /> | |
111 | + <field name="NLTitle_seriesTitleID" type="nederlab_uuid" | |
112 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
113 | + <field name="NLTitle_seriesTitleID_parent" type="nederlab_uuid" | |
114 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
115 | + <field name="NLTitle_seriesTitleID_root" type="nederlab_uuid" | |
116 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
117 | + | |
118 | + <!-- component NLDependentTitle --> | |
119 | + <field name="NLDependentTitle_title" type="nederlab_text" | |
120 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
121 | + <field name="NLDependentTitle_subtitle" type="nederlab_text" | |
122 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
123 | + <field name="NLDependentTitle_primaryLanguage" type="nederlab_string" | |
124 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
125 | + <field name="NLDependentTitle_parentTitleID" type="nederlab_uuid" | |
126 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
127 | + <field name="NLDependentTitle_inNederlabAs" type="nederlab_uuid" | |
128 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
129 | + <field name="NLDependentTitle_NLPersonRef_personID" type="nederlab_uuid" | |
130 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
131 | + <field name="NLDependentTitle_NLPersonRef_role" type="nederlab_string" | |
132 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
133 | + <dynamicField name="NLDependentTitle_NLPersonRef_personID_role_*" | |
134 | + type="nederlab_uuid" required="false" multiValued="true" indexed="true" | |
135 | + stored="true" /> | |
136 | + <field name="NLDependentTitle_startPage" type="nederlab_int" | |
137 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
138 | + <field name="NLDependentTitle_endPage" type="nederlab_int" | |
139 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
140 | + | |
141 | + <!-- component NLPerson --> | |
142 | + <field name="NLPerson_NLPersonName_nameId" type="nederlab_uuid" | |
143 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
144 | + <field name="NLPerson_NLPersonName_lastName" type="nederlab_text" | |
145 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
146 | + <field name="NLPerson_NLPersonName_firstName" type="nederlab_text" | |
147 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
148 | + <field name="NLPerson_NLPersonName_infixes" type="nederlab_text" | |
149 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
150 | + <field name="NLPerson_NLPersonName_firstNameFull" type="nederlab_text" | |
151 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
152 | + <field name="NLPerson_NLPersonName_fullName" type="nederlab_text" | |
153 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
154 | + <field name="NLPerson_NLPersonName_fullName_serialized" type="nederlab_string" | |
155 | + required="false" multiValued="true" indexed="false" stored="true" /> | |
156 | + <field name="NLPerson_NLPersonName_preferredNameID" type="nederlab_uuid" | |
157 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
158 | + <field name="NLPerson_NLPersonName_preferredLastName" type="nederlab_string" | |
159 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
160 | + <field name="NLPerson_NLPersonName_preferredFirstName" type="nederlab_string" | |
161 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
162 | + <field name="NLPerson_NLPersonName_preferredFirstNameFull" type="nederlab_string" | |
163 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
164 | + <field name="NLPerson_NLPersonName_preferredInfixes" type="nederlab_string" | |
165 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
166 | + <field name="NLPerson_NLPersonName_preferredFullName" type="nederlab_text" | |
167 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
168 | + <field name="NLPerson_NLPersonName_preferredFullName_serialized" | |
169 | + type="nederlab_string" required="false" multiValued="false" indexed="false" | |
170 | + stored="true" /> | |
171 | + <field name="NLPerson_dateOfBirthDayMonth" type="nederlab_text" | |
172 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
173 | + <field name="NLPerson_dateOfBirthMonth" type="nederlab_int" | |
174 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
175 | + <field name="NLPerson_dateOfBirthDay" type="nederlab_int" | |
176 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
177 | + <field name="NLPerson_yearOfBirthMin" type="nederlab_int" | |
178 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
179 | + <field name="NLPerson_yearOfBirthMax" type="nederlab_int" | |
180 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
181 | + <field name="NLPerson_yearOfBirthApprox" type="nederlab_boolean" | |
182 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
183 | + <field name="NLPerson_yearOfBirthLabel" type="nederlab_text" | |
184 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
185 | + <field name="NLPerson_placeOfBirth" type="nederlab_string" | |
186 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
187 | + <field name="NLPerson_placeOfBirthID" type="nederlab_string" | |
188 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
189 | + <field name="NLPerson_dateOfDeathDayMonth" type="nederlab_text" | |
190 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
191 | + <field name="NLPerson_dateOfDeathMonth" type="nederlab_int" | |
192 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
193 | + <field name="NLPerson_dateOfDeathDay" type="nederlab_int" | |
194 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
195 | + <field name="NLPerson_yearOfDeathMin" type="nederlab_int" | |
196 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
197 | + <field name="NLPerson_yearOfDeathMax" type="nederlab_int" | |
198 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
199 | + <field name="NLPerson_yearOfDeathApprox" type="nederlab_boolean" | |
200 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
201 | + <field name="NLPerson_yearOfDeathLabel" type="nederlab_text" | |
202 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
203 | + <field name="NLPerson_placeOfDeath" type="nederlab_string" | |
204 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
205 | + <field name="NLPerson_placeOfDeathID" type="nederlab_string" | |
206 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
207 | + <field name="NLPerson_gender" type="nederlab_string" required="false" | |
208 | + multiValued="false" indexed="true" stored="true" /> | |
209 | + <field name="NLPerson_profession" type="nederlab_string" | |
210 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
211 | + <field name="NLPerson_education" type="nederlab_string" required="false" | |
212 | + multiValued="true" indexed="true" stored="true" /> | |
213 | + <field name="NLPerson_inThesaurusAs" type="nederlab_uuid" | |
214 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
215 | + | |
216 | + <!-- component NLSeriesTitle --> | |
217 | + <field name="NLSeriesTitle_title" type="nederlab_text" required="false" | |
218 | + multiValued="false" indexed="true" stored="true" /> | |
219 | + <field name="NLSeriesTitle_years" type="nederlab_text" required="false" | |
220 | + multiValued="false" indexed="true" stored="true" /> | |
221 | + <field name="NLSeriesTitle_description" type="nederlab_text" | |
222 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
223 | + <field name="NLSeriesTitle_inNederlabAs" type="nederlab_uuid" | |
224 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
225 | + <field name="NLSeriesTitle_seriesTitleID" type="nederlab_uuid" | |
226 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
227 | + <field name="NLSeriesTitle_seriesTitleID_parent" type="nederlab_uuid" | |
228 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
229 | + <field name="NLSeriesTitle_seriesTitleID_root" type="nederlab_uuid" | |
230 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
231 | + | |
232 | + <!-- component NLCollectionSpecific --> | |
233 | + <dynamicField name="NLCollectionSpecific_*" type="nederlab_string" | |
234 | + required="false" multiValued="true" indexed="true" stored="true" /> | |
235 | + | |
236 | + <!-- component NLContent old --> | |
237 | + | |
238 | + <field name="NLContent_text_available" type="nederlab_boolean" | |
239 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
240 | + <field name="NLContent_text" type="nederlab_content" required="false" | |
241 | + multiValued="false" indexed="true" stored="true" termVectors="true" | |
242 | + termPositions="true" termOffsets="true" /> | |
243 | + <field name="NLContent_text_lowercase" type="nederlab_content_lowercase" | |
244 | + required="false" multiValued="false" indexed="true" stored="true" | |
245 | + termVectors="true" termPositions="true" termOffsets="true" /> | |
246 | + <copyField source="NLContent_text" dest="NLContent_text_lowercase" /> | |
247 | + <field name="NLContent_ticcl_available" type="nederlab_boolean" | |
248 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
249 | + <field name="NLContent_ticcl_lowercase" type="nederlab_content_lowercase" | |
250 | + required="false" multiValued="false" indexed="true" stored="true" | |
251 | + termVectors="true" termPositions="true" termOffsets="true" /> | |
252 | + | |
253 | + <!-- component NLContent --> | |
254 | + | |
255 | + <field name="NLContent_folia_available" type="nederlab_boolean" | |
256 | + required="false" multiValued="false" indexed="true" stored="true" /> | |
257 | + <field name="NLContent_mtas" type="mtas_text" indexed="true" | |
258 | + stored="true" /> | |
259 | + <field name="NLContent_mtas_error" type="nederlab_string" | |
260 | + indexed="true" stored="true" /> | |
261 | + <field name="NLContent_mtas_numberOfTokens" type="nederlab_int" | |
262 | + indexed="true" stored="true" /> | |
263 | + <field name="NLContent_mtas_numberOfPositions" type="nederlab_int" | |
264 | + indexed="true" stored="true" /> | |
265 | + <field name="NLContent_mtas_size" type="nederlab_int" indexed="true" | |
266 | + stored="true" /> | |
267 | + <!-- Combined Field Metadata --> | |
268 | + <field name="NLMetadata" type="nederlab_text" required="false" | |
269 | + multiValued="true" indexed="true" stored="false" /> | |
270 | + <copyField source="NLCore_NLIdentification_nederlabID" dest="NLMetadata" /> | |
271 | + <copyField source="NLCore_NLIdentification_editorialCode" | |
272 | + dest="NLMetadata" /> | |
273 | + <copyField source="NLCore_NLIdentification_sourceRef" dest="NLMetadata" /> | |
274 | + <copyField source="NLCore_NLAdministrative_editorialNote" | |
275 | + dest="NLMetadata" /> | |
276 | + <copyField source="NLCore_NLAdministrative_sourceCollection" | |
277 | + dest="NLMetadata" /> | |
278 | + <copyField source="NLCore_NLExternalReference_organizationName" | |
279 | + dest="NLMetadata" /> | |
280 | + <copyField source="NLCore_NLExternalReference_collectionName" | |
281 | + dest="NLMetadata" /> | |
282 | + <copyField source="NLCore_NLExternalReference_resourceRef" | |
283 | + dest="NLMetadata" /> | |
284 | + <copyField source="NLTitle_title" dest="NLMetadata" /> | |
285 | + <copyField source="NLTitle_subtitle" dest="NLMetadata" /> | |
286 | + <copyField source="NLTitle_genre" dest="NLMetadata" /> | |
287 | + <copyField source="NLTitle_category" dest="NLMetadata" /> | |
288 | + <copyField source="NLTitle_yearOfPublicationMin" dest="NLMetadata" /> | |
289 | + <copyField source="NLTitle_yearOfPublicationMax" dest="NLMetadata" /> | |
290 | + <copyField source="NLTitle_yearOfPublicationLabel" dest="NLMetadata" /> | |
291 | + <copyField source="NLTitle_edition" dest="NLMetadata" /> | |
292 | + <copyField source="NLTitle_NLPublicationPlace_placeOfPublication" | |
293 | + dest="NLMetadata" /> | |
294 | + <copyField source="NLTitle_NLPublicationPlace_placeID" dest="NLMetadata" /> | |
295 | + <copyField source="NLTitle_NLPublicationPlace_placeOfPublicationOriginal" | |
296 | + dest="NLMetadata" /> | |
297 | + <copyField source="NLTitle_primaryLanguage" dest="NLMetadata" /> | |
298 | + <copyField source="NLTitle_characterEncoding" dest="NLMetadata" /> | |
299 | + <copyField source="NLTitle_codingStandard" dest="NLMetadata" /> | |
300 | + <copyField source="NLTitle_textQuality" dest="NLMetadata" /> | |
301 | + <copyField source="NLTitle_processingMethod" dest="NLMetadata" /> | |
302 | + <copyField source="NLTitle_NLPersonRef_role" dest="NLMetadata" /> | |
303 | + <copyField source="NLDependentTitle_title" dest="NLMetadata" /> | |
304 | + <copyField source="NLDependentTitle_subtitle" dest="NLMetadata" /> | |
305 | + <copyField source="NLDependentTitle_primaryLanguage" dest="NLMetadata" /> | |
306 | + <copyField source="NLDependentTitle_NLPersonRef_role" dest="NLMetadata" /> | |
307 | + <copyField source="NLPerson_NLPersonName_lastName" dest="NLMetadata" /> | |
308 | + <copyField source="NLPerson_NLPersonName_firstName" dest="NLMetadata" /> | |
309 | + <copyField source="NLPerson_NLPersonName_infixes" dest="NLMetadata" /> | |
310 | + <copyField source="NLPerson_NLPersonName_firstNameFull" dest="NLMetadata" /> | |
311 | + <copyField source="NLPerson_NLPersonName_fullName" dest="NLMetadata" /> | |
312 | + <copyField source="NLPerson_dateOfBirthDayMonth" dest="NLMetadata" /> | |
313 | + <copyField source="NLPerson_yearOfBirthMin" dest="NLMetadata" /> | |
314 | + <copyField source="NLPerson_yearOfBirthMax" dest="NLMetadata" /> | |
315 | + <copyField source="NLPerson_yearOfBirthLabel" dest="NLMetadata" /> | |
316 | + <copyField source="NLPerson_placeOfBirth" dest="NLMetadata" /> | |
317 | + <copyField source="NLPerson_placeOfBirthID" dest="NLMetadata" /> | |
318 | + <copyField source="NLPerson_dateOfDeathDayMonth" dest="NLMetadata" /> | |
319 | + <copyField source="NLPerson_yearOfDeathMin" dest="NLMetadata" /> | |
320 | + <copyField source="NLPerson_yearOfDeathMax" dest="NLMetadata" /> | |
321 | + <copyField source="NLPerson_yearOfDeathLabel" dest="NLMetadata" /> | |
322 | + <copyField source="NLPerson_placeOfDeath" dest="NLMetadata" /> | |
323 | + <copyField source="NLPerson_placeOfDeathID" dest="NLMetadata" /> | |
324 | + <copyField source="NLPerson_gender" dest="NLMetadata" /> | |
325 | + <copyField source="NLPerson_profession" dest="NLMetadata" /> | |
326 | + <copyField source="NLPerson_education" dest="NLMetadata" /> | |
327 | + <copyField source="NLSeriesTitle_title" dest="NLMetadata" /> | |
328 | + <copyField source="NLSeriesTitle_years" dest="NLMetadata" /> | |
329 | + <copyField source="NLSeriesTitle_description" dest="NLMetadata" /> | |
330 | + <copyField source="NLCollectionSpecific_*" dest="NLMetadata" /> | |
331 | + | |
332 | + <uniqueKey>NLCore_NLIdentification_versionID</uniqueKey> | |
333 | + | |
334 | + <fieldType name="nederlab_string" class="solr.StrField" | |
335 | + sortMissingLast="true" /> | |
336 | + <fieldType name="nederlab_uuid" class="solr.StrField" | |
337 | + sortMissingLast="true" /> | |
338 | + <fieldType name="nederlab_boolean" class="solr.BoolField" | |
339 | + sortMissingLast="true" /> | |
340 | + <fieldType name="nederlab_int" class="solr.TrieIntField" | |
341 | + precisionStep="8" positionIncrementGap="0" /> | |
342 | + <fieldType name="nederlab_long" class="solr.TrieLongField" | |
343 | + precisionStep="0" positionIncrementGap="0" /> | |
344 | + <fieldType name="nederlab_date" class="solr.TrieDateField" | |
345 | + precisionStep="6" positionIncrementGap="0" /> | |
346 | + <fieldtype name="nederlab_binary" class="solr.BinaryField" /> | |
347 | + | |
348 | + <fieldType name="nederlab_text" class="solr.TextField" | |
349 | + positionIncrementGap="100"> | |
350 | + <analyzer type="index"> | |
351 | + <tokenizer class="solr.StandardTokenizerFactory" /> | |
352 | + <filter class="solr.LowerCaseFilterFactory" /> | |
353 | + </analyzer> | |
354 | + <analyzer type="query"> | |
355 | + <tokenizer class="solr.StandardTokenizerFactory" /> | |
356 | + <filter class="solr.LowerCaseFilterFactory" /> | |
357 | + </analyzer> | |
358 | + </fieldType> | |
359 | + | |
360 | + <fieldType name="nederlab_content" class="solr.TextField" | |
361 | + positionIncrementGap="100"> | |
362 | + <analyzer type="index"> | |
363 | + <tokenizer class="solr.StandardTokenizerFactory" /> | |
364 | + </analyzer> | |
365 | + <analyzer type="query"> | |
366 | + <tokenizer class="solr.StandardTokenizerFactory" /> | |
367 | + </analyzer> | |
368 | + </fieldType> | |
369 | + | |
370 | + <fieldType name="nederlab_content_lowercase" class="solr.TextField" | |
371 | + positionIncrementGap="100"> | |
372 | + <analyzer type="index"> | |
373 | + <tokenizer class="solr.StandardTokenizerFactory" /> | |
374 | + <filter class="solr.LowerCaseFilterFactory" /> | |
375 | + </analyzer> | |
376 | + <analyzer type="query"> | |
377 | + <tokenizer class="solr.StandardTokenizerFactory" /> | |
378 | + <filter class="solr.LowerCaseFilterFactory" /> | |
379 | + </analyzer> | |
380 | + </fieldType> | |
381 | + | |
382 | + <fieldType name="mtas_text" class="solr.TextField" | |
383 | + postingsFormat="MtasCodec"> | |
384 | + <analyzer type="index"> | |
385 | + <charFilter class="mtas.analysis.util.MtasCharFilterFactory" | |
386 | + type="file" prefix="/local/data/" /> | |
387 | + <tokenizer class="mtas.analysis.util.MtasTokenizerFactory" | |
388 | + configFile="mtas/folia_oeaw.xml" /> | |
389 | + </analyzer> | |
390 | + </fieldType> | |
391 | + | |
392 | +</schema> | |
... | ... |
conf/solr/schemaTest.xml
... | ... | @@ -115,11 +115,7 @@ |
115 | 115 | |
116 | 116 | WARNING: The _text_ catch-all field will significantly increase your index size. |
117 | 117 | If you don't need it, consider removing it and the corresponding copyField directive. |
118 | - --> | |
119 | - <! | |
120 | - <fieldType name="string_simpletext" class="solr.StrField" postingsFormat="SimpleText" /> | |
121 | - <field name="simple_string" type="string_simpletext" indexed="true" stored="true" required="false" multiValued="false" /> | |
122 | - --> | |
118 | + --> | |
123 | 119 | |
124 | 120 | <fieldType name="mtas_text" class="solr.TextField" postingsFormat="MtasCodec"> |
125 | 121 | <analyzer type="index"> |
... | ... |
junit/mtas/parser/MtasCQLParserTestSentence.java
... | ... | @@ -29,11 +29,11 @@ public class MtasCQLParserTestSentence { |
29 | 29 | basicTests(); |
30 | 30 | } |
31 | 31 | |
32 | - private void testCQLParse(String field, String cql, SpanQuery q) { | |
32 | + private void testCQLParse(String field, String defaultPrefix, String cql, SpanQuery q) { | |
33 | 33 | MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql))); |
34 | 34 | try { |
35 | 35 | System.out.print("CQL parsing:\t"+cql); |
36 | - assertEquals(p.parse(field) ,q); | |
36 | + assertEquals(p.parse(field, defaultPrefix) ,q); | |
37 | 37 | System.out.print("\n"); |
38 | 38 | } catch (ParseException e) { |
39 | 39 | System.out.println("Error CQL parsing:\t"+cql); |
... | ... | @@ -41,12 +41,12 @@ public class MtasCQLParserTestSentence { |
41 | 41 | } |
42 | 42 | } |
43 | 43 | |
44 | - private void testCQLEquivalent(String field, String cql1, String cql2) { | |
44 | + private void testCQLEquivalent(String field, String defaultPrefix, String cql1, String cql2) { | |
45 | 45 | MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1))); |
46 | 46 | MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2))); |
47 | 47 | try { |
48 | 48 | System.out.print("CQL equivalent:\t"+cql1+" and "+cql2); |
49 | - assertEquals(p1.parse(field) ,p2.parse(field)); | |
49 | + assertEquals(p1.parse(field, defaultPrefix) ,p2.parse(field, defaultPrefix)); | |
50 | 50 | System.out.print("\n"); |
51 | 51 | } catch (ParseException e) { |
52 | 52 | System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2); |
... | ... | @@ -73,6 +73,7 @@ public class MtasCQLParserTestSentence { |
73 | 73 | basicTest16(); |
74 | 74 | basicTest17(); |
75 | 75 | basicTest18(); |
76 | + basicTest19(); | |
76 | 77 | } |
77 | 78 | |
78 | 79 | private void basicTest1() { |
... | ... | @@ -84,14 +85,14 @@ public class MtasCQLParserTestSentence { |
84 | 85 | items.add(new MtasSpanSequenceItem(q1, false)); |
85 | 86 | items.add(new MtasSpanSequenceItem(q2, false)); |
86 | 87 | SpanQuery q = new MtasSpanSequenceQuery(items); |
87 | - testCQLParse(field, cql, q); | |
88 | + testCQLParse(field, null, cql, q); | |
88 | 89 | } |
89 | 90 | |
90 | 91 | private void basicTest2() { |
91 | 92 | String field = "testveld"; |
92 | 93 | String cql1 = "[pos=\"LID\"] [] []? [] [lemma=\"koe\"]"; |
93 | 94 | String cql2 = "[pos=\"LID\"] []{2,3} [lemma=\"koe\"]"; |
94 | - testCQLEquivalent(field, cql1, cql2); | |
95 | + testCQLEquivalent(field, null, cql1, cql2); | |
95 | 96 | } |
96 | 97 | |
97 | 98 | private void basicTest3() { |
... | ... | @@ -100,7 +101,7 @@ public class MtasCQLParserTestSentence { |
100 | 101 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID"); |
101 | 102 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe"); |
102 | 103 | SpanQuery q = new MtasSpanOrQuery(q1,q2); |
103 | - testCQLParse(field, cql, q); | |
104 | + testCQLParse(field, null, cql, q); | |
104 | 105 | } |
105 | 106 | |
106 | 107 | private void basicTest4() { |
... | ... | @@ -114,28 +115,28 @@ public class MtasCQLParserTestSentence { |
114 | 115 | items.add(new MtasSpanSequenceItem(q3, false)); |
115 | 116 | SpanQuery q4 = new MtasSpanSequenceQuery(items); |
116 | 117 | SpanQuery q = new MtasSpanOrQuery(q1,q4); |
117 | - testCQLParse(field, cql, q); | |
118 | + testCQLParse(field, null, cql, q); | |
118 | 119 | } |
119 | 120 | |
120 | 121 | private void basicTest5() { |
121 | 122 | String field = "testveld"; |
122 | 123 | String cql1 = "([pos=\"LID\"]([pos=\"ADJ\"][lemma=\"koe\"]))"; |
123 | 124 | String cql2 = "[pos=\"LID\"][pos=\"ADJ\"][lemma=\"koe\"]"; |
124 | - testCQLEquivalent(field, cql1, cql2); | |
125 | + testCQLEquivalent(field, null, cql1, cql2); | |
125 | 126 | } |
126 | 127 | |
127 | 128 | private void basicTest6() { |
128 | 129 | String field = "testveld"; |
129 | 130 | String cql1 = "([pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"])|([pos=\"ADJ\"]|([lemma=\"het\"]([lemma=\"paard\"])))"; |
130 | 131 | String cql2 = "[pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"]|[pos=\"ADJ\"]|[lemma=\"het\"][lemma=\"paard\"]"; |
131 | - testCQLEquivalent(field, cql1, cql2); | |
132 | + testCQLEquivalent(field, null, cql1, cql2); | |
132 | 133 | } |
133 | 134 | |
134 | 135 | private void basicTest7() { |
135 | 136 | String field = "testveld"; |
136 | 137 | String cql1 = "[pos=\"LID\"] []{0,1} []{3,5} []{2,4}"; |
137 | 138 | String cql2 = "[pos=\"LID\"] []{5,10}"; |
138 | - testCQLEquivalent(field, cql1, cql2); | |
139 | + testCQLEquivalent(field, null, cql1, cql2); | |
139 | 140 | } |
140 | 141 | |
141 | 142 | private void basicTest8() { |
... | ... | @@ -149,7 +150,7 @@ public class MtasCQLParserTestSentence { |
149 | 150 | items.add(new MtasSpanSequenceItem(q1, false)); |
150 | 151 | items.add(new MtasSpanSequenceItem(q4, false)); |
151 | 152 | SpanQuery q = new MtasSpanSequenceQuery(items); |
152 | - testCQLParse(field, cql, q); | |
153 | + testCQLParse(field, null, cql, q); | |
153 | 154 | } |
154 | 155 | |
155 | 156 | private void basicTest9() { |
... | ... | @@ -165,7 +166,7 @@ public class MtasCQLParserTestSentence { |
165 | 166 | items.add(new MtasSpanSequenceItem(q5, false)); |
166 | 167 | items.add(new MtasSpanSequenceItem(q4, false)); |
167 | 168 | SpanQuery q = new MtasSpanSequenceQuery(items); |
168 | - testCQLParse(field, cql, q); | |
169 | + testCQLParse(field, null, cql, q); | |
169 | 170 | } |
170 | 171 | |
171 | 172 | private void basicTest10() { |
... | ... | @@ -179,7 +180,7 @@ public class MtasCQLParserTestSentence { |
179 | 180 | items.add(new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q2,1,3), false)); |
180 | 181 | items.add(new MtasSpanSequenceItem(q3, false)); |
181 | 182 | SpanQuery q = new MtasSpanSequenceQuery(items); |
182 | - testCQLParse(field, cql, q); | |
183 | + testCQLParse(field, null, cql, q); | |
183 | 184 | } |
184 | 185 | |
185 | 186 | private void basicTest11() { |
... | ... | @@ -188,7 +189,7 @@ public class MtasCQLParserTestSentence { |
188 | 189 | SpanQuery q1 = new MtasCQLParserGroupQuery(field,"sentence"); |
189 | 190 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe"); |
190 | 191 | SpanQuery q = new SpanContainingQuery(q1, q2); |
191 | - testCQLParse(field, cql, q); | |
192 | + testCQLParse(field, null, cql, q); | |
192 | 193 | } |
193 | 194 | |
194 | 195 | private void basicTest12() { |
... | ... | @@ -197,7 +198,7 @@ public class MtasCQLParserTestSentence { |
197 | 198 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe"); |
198 | 199 | SpanQuery q2 = new MtasCQLParserGroupQuery(field,"sentence"); |
199 | 200 | SpanQuery q = new SpanWithinQuery(q2, q1); |
200 | - testCQLParse(field, cql, q); | |
201 | + testCQLParse(field, null, cql, q); | |
201 | 202 | } |
202 | 203 | |
203 | 204 | private void basicTest13() { |
... | ... | @@ -211,7 +212,7 @@ public class MtasCQLParserTestSentence { |
211 | 212 | items.add(new MtasSpanSequenceItem(q1, false)); |
212 | 213 | items.add(new MtasSpanSequenceItem(q4, false)); |
213 | 214 | SpanQuery q = new MtasSpanSequenceQuery(items); |
214 | - testCQLParse(field, cql, q); | |
215 | + testCQLParse(field, null, cql, q); | |
215 | 216 | } |
216 | 217 | |
217 | 218 | private void basicTest14() { |
... | ... | @@ -225,7 +226,7 @@ public class MtasCQLParserTestSentence { |
225 | 226 | items.add(new MtasSpanSequenceItem(q3, false)); |
226 | 227 | items.add(new MtasSpanSequenceItem(q4, false)); |
227 | 228 | SpanQuery q = new MtasSpanSequenceQuery(items); |
228 | - testCQLParse(field, cql, q); | |
229 | + testCQLParse(field, null, cql, q); | |
229 | 230 | } |
230 | 231 | |
231 | 232 | private void basicTest15() { |
... | ... | @@ -246,7 +247,7 @@ public class MtasCQLParserTestSentence { |
246 | 247 | items2.add(new MtasSpanSequenceItem(q1, false)); |
247 | 248 | items2.add(new MtasSpanSequenceItem(q8, false)); |
248 | 249 | SpanQuery q = new MtasSpanSequenceQuery(items2); |
249 | - testCQLParse(field, cql, q); | |
250 | + testCQLParse(field, null, cql, q); | |
250 | 251 | } |
251 | 252 | |
252 | 253 | private void basicTest16() { |
... | ... | @@ -258,7 +259,7 @@ public class MtasCQLParserTestSentence { |
258 | 259 | SpanQuery q4 = new SpanContainingQuery(q2, q3); |
259 | 260 | SpanQuery q5 = new SpanWithinQuery(q4, q1); |
260 | 261 | SpanQuery q = new SpanNotQuery(q5,new SpanContainingQuery(q5, q3)); |
261 | - testCQLParse(field, cql, q); | |
262 | + testCQLParse(field, null, cql, q); | |
262 | 263 | } |
263 | 264 | |
264 | 265 | private void basicTest17() { |
... | ... | @@ -271,11 +272,23 @@ public class MtasCQLParserTestSentence { |
271 | 272 | items.add(new MtasSpanSequenceItem(q2, false)); |
272 | 273 | items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false)); |
273 | 274 | SpanQuery q = new MtasSpanSequenceQuery(items); |
274 | - testCQLParse(field, cql, q); | |
275 | + testCQLParse(field, null, cql, q); | |
275 | 276 | } |
276 | 277 | |
277 | 278 | private void basicTest18() { |
278 | 279 | String field = "testveld"; |
280 | + String cql = "\"de\" [pos=\"N\"]"; | |
281 | + SpanQuery q1 = new MtasCQLParserWordQuery(field,"t_lc","de"); | |
282 | + SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N"); | |
283 | + List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>(); | |
284 | + items.add(new MtasSpanSequenceItem(q1, false)); | |
285 | + items.add(new MtasSpanSequenceItem(q2, false)); | |
286 | + SpanQuery q = new MtasSpanSequenceQuery(items); | |
287 | + testCQLParse(field, "t_lc", cql, q); | |
288 | + } | |
289 | + | |
290 | + private void basicTest19() { | |
291 | + String field = "testveld"; | |
279 | 292 | String cql = "([]<entity=\"loc\"/>{1,2}[]){3,4}"; |
280 | 293 | SpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc"); |
281 | 294 | SpanQuery q2 = new MtasSpanRecurrenceQuery(q1,1,2); |
... | ... | @@ -285,7 +298,7 @@ public class MtasCQLParserTestSentence { |
285 | 298 | items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false)); |
286 | 299 | SpanQuery q3 = new MtasSpanSequenceQuery(items); |
287 | 300 | SpanQuery q = new MtasSpanRecurrenceQuery(q3,3,4); |
288 | - testCQLParse(field, cql, q); | |
301 | + testCQLParse(field, null, cql, q); | |
289 | 302 | } |
290 | 303 | |
291 | 304 | } |
... | ... |
junit/mtas/parser/MtasCQLParserTestWord.java
... | ... | @@ -23,10 +23,10 @@ public class MtasCQLParserTestWord { |
23 | 23 | basicNotTests(); |
24 | 24 | } |
25 | 25 | |
26 | - private void testCQLParse(String field, String cql, SpanQuery q) { | |
26 | + private void testCQLParse(String field, String defaultPrefix, String cql, SpanQuery q) { | |
27 | 27 | MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql))); |
28 | 28 | try { |
29 | - assertEquals(p.parse(field) ,q); | |
29 | + assertEquals(p.parse(field, defaultPrefix) ,q); | |
30 | 30 | System.out.println("Tested CQL parsing:\t"+cql); |
31 | 31 | } catch (ParseException e) { |
32 | 32 | System.out.println("Error CQL parsing:\t"+cql); |
... | ... | @@ -34,11 +34,11 @@ public class MtasCQLParserTestWord { |
34 | 34 | } |
35 | 35 | } |
36 | 36 | |
37 | - private void testCQLEquivalent(String field, String cql1, String cql2) { | |
37 | + private void testCQLEquivalent(String field, String defaultPrefix, String cql1, String cql2) { | |
38 | 38 | MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1))); |
39 | 39 | MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2))); |
40 | 40 | try { |
41 | - assertEquals(p1.parse(field) ,p2.parse(field)); | |
41 | + assertEquals(p1.parse(field, defaultPrefix) ,p2.parse(field, defaultPrefix)); | |
42 | 42 | System.out.println("Tested CQL equivalent:\t"+cql1+" and "+cql2); |
43 | 43 | } catch (ParseException e) { |
44 | 44 | System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2); |
... | ... | @@ -67,6 +67,7 @@ public class MtasCQLParserTestWord { |
67 | 67 | basicTest10(); |
68 | 68 | basicTest11(); |
69 | 69 | basicTest12(); |
70 | + basicTest13(); | |
70 | 71 | } |
71 | 72 | |
72 | 73 | private void basicNotTest1() { |
... | ... | @@ -75,14 +76,14 @@ public class MtasCQLParserTestWord { |
75 | 76 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID"); |
76 | 77 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","de"); |
77 | 78 | SpanQuery q = new SpanNotQuery(q1,q2); |
78 | - testCQLParse(field, cql, q); | |
79 | + testCQLParse(field, null, cql, q); | |
79 | 80 | } |
80 | 81 | |
81 | 82 | private void basicNotTest2() { |
82 | 83 | String field = "testveld"; |
83 | 84 | String cql1 = "[pos=\"LID\" & (!lemma=\"de\")]"; |
84 | 85 | String cql2 = "[pos=\"LID\" & !(lemma=\"de\")]"; |
85 | - testCQLEquivalent(field, cql1, cql2); | |
86 | + testCQLEquivalent(field, null, cql1, cql2); | |
86 | 87 | } |
87 | 88 | |
88 | 89 | private void basicNotTest3() { |
... | ... | @@ -93,28 +94,28 @@ public class MtasCQLParserTestWord { |
93 | 94 | SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","een"); |
94 | 95 | SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3}); |
95 | 96 | SpanQuery q = new SpanNotQuery(q1,q4); |
96 | - testCQLParse(field, cql, q); | |
97 | + testCQLParse(field, null, cql, q); | |
97 | 98 | } |
98 | 99 | |
99 | 100 | private void basicNotTest4() { |
100 | 101 | String field = "testveld"; |
101 | 102 | String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]"; |
102 | 103 | String cql2 = "[pos=\"LID\" & (!lemma=\"de\" & !lemma=\"een\")]"; |
103 | - testCQLEquivalent(field, cql1, cql2); | |
104 | + testCQLEquivalent(field, null, cql1, cql2); | |
104 | 105 | } |
105 | 106 | |
106 | 107 | private void basicNotTest5() { |
107 | 108 | String field = "testveld"; |
108 | 109 | String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]"; |
109 | 110 | String cql2 = "[pos=\"LID\" & !lemma=\"de\" & !lemma=\"een\"]"; |
110 | - testCQLEquivalent(field, cql1, cql2); | |
111 | + testCQLEquivalent(field, null, cql1, cql2); | |
111 | 112 | } |
112 | 113 | |
113 | 114 | private void basicTest1() { |
114 | 115 | String field = "testveld"; |
115 | 116 | String cql = "[lemma=\"koe\"]"; |
116 | 117 | SpanQuery q = new MtasCQLParserWordQuery(field, "lemma", "koe"); |
117 | - testCQLParse(field, cql, q); | |
118 | + testCQLParse(field, null, cql, q); | |
118 | 119 | } |
119 | 120 | |
120 | 121 | private void basicTest2() { |
... | ... | @@ -123,7 +124,7 @@ public class MtasCQLParserTestWord { |
123 | 124 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe"); |
124 | 125 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N"); |
125 | 126 | SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q2}); |
126 | - testCQLParse(field, cql, q); | |
127 | + testCQLParse(field, null, cql, q); | |
127 | 128 | } |
128 | 129 | |
129 | 130 | private void basicTest3() { |
... | ... | @@ -132,14 +133,14 @@ public class MtasCQLParserTestWord { |
132 | 133 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe"); |
133 | 134 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","paard"); |
134 | 135 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2}); |
135 | - testCQLParse(field, cql, q); | |
136 | + testCQLParse(field, null, cql, q); | |
136 | 137 | } |
137 | 138 | |
138 | 139 | private void basicTest4() { |
139 | 140 | String field = "testveld"; |
140 | 141 | String cql1 = "[lemma=\"koe\" | lemma=\"paard\"]"; |
141 | 142 | String cql2 = "[(lemma=\"koe\" | lemma=\"paard\")]"; |
142 | - testCQLEquivalent(field, cql1, cql2); | |
143 | + testCQLEquivalent(field, null, cql1, cql2); | |
143 | 144 | } |
144 | 145 | |
145 | 146 | private void basicTest5() { |
... | ... | @@ -150,7 +151,7 @@ public class MtasCQLParserTestWord { |
150 | 151 | SpanQuery q3 = new MtasSpanOrQuery(new SpanQuery[]{q1,q2}); |
151 | 152 | SpanQuery q4 = new MtasCQLParserWordQuery(field,"pos","N"); |
152 | 153 | SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q3,q4}); |
153 | - testCQLParse(field, cql, q); | |
154 | + testCQLParse(field, null, cql, q); | |
154 | 155 | } |
155 | 156 | |
156 | 157 | private void basicTest6() { |
... | ... | @@ -161,7 +162,7 @@ public class MtasCQLParserTestWord { |
161 | 162 | SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","paard"); |
162 | 163 | SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3}); |
163 | 164 | SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q4}); |
164 | - testCQLParse(field, cql, q); | |
165 | + testCQLParse(field, null, cql, q); | |
165 | 166 | } |
166 | 167 | |
167 | 168 | private void basicTest7() { |
... | ... | @@ -172,7 +173,7 @@ public class MtasCQLParserTestWord { |
172 | 173 | SpanQuery q3 = new MtasCQLParserWordQuery(field,"pos","N"); |
173 | 174 | SpanQuery q4 = new MtasSpanAndQuery(new SpanQuery[]{q2,q3}); |
174 | 175 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q4}); |
175 | - testCQLParse(field, cql, q); | |
176 | + testCQLParse(field, null, cql, q); | |
176 | 177 | } |
177 | 178 | |
178 | 179 | private void basicTest8() { |
... | ... | @@ -185,7 +186,7 @@ public class MtasCQLParserTestWord { |
185 | 186 | SpanQuery q5 = new MtasSpanAndQuery(new SpanQuery[]{q1,q2}); |
186 | 187 | SpanQuery q6 = new MtasSpanAndQuery(new SpanQuery[]{q3,q4}); |
187 | 188 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q5,q6}); |
188 | - testCQLParse(field, cql, q); | |
189 | + testCQLParse(field, null, cql, q); | |
189 | 190 | } |
190 | 191 | |
191 | 192 | private void basicTest9() { |
... | ... | @@ -200,7 +201,7 @@ public class MtasCQLParserTestWord { |
200 | 201 | SpanQuery q7 = new MtasSpanAndQuery(new SpanQuery[]{q6,q3}); |
201 | 202 | SpanQuery q8 = new MtasSpanAndQuery(new SpanQuery[]{q4,q5}); |
202 | 203 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q7,q8}); |
203 | - testCQLParse(field, cql, q); | |
204 | + testCQLParse(field, null, cql, q); | |
204 | 205 | } |
205 | 206 | |
206 | 207 | private void basicTest10() { |
... | ... | @@ -217,22 +218,22 @@ public class MtasCQLParserTestWord { |
217 | 218 | SpanQuery q9 = new MtasSpanOrQuery(new SpanQuery[]{q4,q5}); |
218 | 219 | SpanQuery q10 = new MtasSpanAndQuery(new SpanQuery[]{q9,q6}); |
219 | 220 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q8,q10}); |
220 | - testCQLParse(field, cql, q); | |
221 | + testCQLParse(field, null, cql, q); | |
221 | 222 | } |
222 | 223 | |
223 | 224 | private void basicTest11() { |
224 | 225 | String field = "testveld"; |
225 | 226 | String cql1 = "[#300]"; |
226 | 227 | SpanQuery q1 = new MtasCQLParserWordPositionQuery(field, 300); |
227 | - testCQLParse(field, cql1, q1); | |
228 | + testCQLParse(field, null, cql1, q1); | |
228 | 229 | String cql2 = "[#100-110]"; |
229 | 230 | SpanQuery q2 = new MtasCQLParserWordPositionQuery(field, 100, 110); |
230 | - testCQLParse(field, cql2, q2); | |
231 | + testCQLParse(field, null, cql2, q2); | |
231 | 232 | String cql3 = "[#100-105 | #110]"; |
232 | 233 | SpanQuery q3a = new MtasCQLParserWordPositionQuery(field, 100, 105); |
233 | 234 | SpanQuery q3b = new MtasCQLParserWordPositionQuery(field, 110); |
234 | 235 | SpanQuery q3 = new MtasSpanOrQuery(q3a, q3b); |
235 | - testCQLParse(field, cql3, q3); | |
236 | + testCQLParse(field, null, cql3, q3); | |
236 | 237 | } |
237 | 238 | |
238 | 239 | private void basicTest12() { |
... | ... | @@ -242,6 +243,13 @@ public class MtasCQLParserTestWord { |
242 | 243 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"t_lc","het"); |
243 | 244 | SpanQuery q3 = new MtasCQLParserWordQuery(field,"t_lc","paard"); |
244 | 245 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2,q3}); |
245 | - testCQLParse(field, cql, q); | |
246 | - } | |
246 | + testCQLParse(field, null, cql, q); | |
247 | + } | |
248 | + | |
249 | + private void basicTest13() { | |
250 | + String field = "testveld"; | |
251 | + String cql = "\"de\""; | |
252 | + SpanQuery q = new MtasCQLParserWordQuery(field,"t_lc","de"); | |
253 | + testCQLParse(field, "t_lc", cql, q); | |
254 | + } | |
247 | 255 | } |
... | ... |
pom.xml
... | ... | @@ -2,11 +2,13 @@ |
2 | 2 | xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
3 | 3 | <properties> |
4 | 4 | <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
5 | + <currentDevelopmentVersion>6.2.0</currentDevelopmentVersion> | |
6 | + <currentDevelopmentRelease>20160802</currentDevelopmentRelease> | |
5 | 7 | </properties> |
6 | 8 | <modelVersion>4.0.0</modelVersion> |
7 | 9 | <groupId>dev.meertens.mtas</groupId> |
8 | 10 | <artifactId>mtas</artifactId> |
9 | - <version>6.1.0</version> | |
11 | + <version>6.2.0</version> | |
10 | 12 | <packaging>jar</packaging> |
11 | 13 | <licenses> |
12 | 14 | <license> |
... | ... | @@ -23,12 +25,12 @@ |
23 | 25 | <developers> |
24 | 26 | <developer> |
25 | 27 | <name>Matthijs Brouwer</name> |
26 | - <url>https://www.meertens.knaw.nl/cms/nl/medewerkers/144373-matthijsb</url> | |
28 | + <url>https://nl.linkedin.com/in/brouwermatthijs/</url> | |
27 | 29 | </developer> |
28 | 30 | <developer> |
29 | - <name>Marc Kemps-Snijders</name> | |
30 | - <url>https://www.meertens.knaw.nl/cms/nl/medewerkers/143329-marck</url> | |
31 | - </developer> | |
31 | + <name>Marc Kemps-Snijders</name> | |
32 | + <url>https://nl.linkedin.com/in/marc-kemps-snijders-1b33753</url> | |
33 | + </developer> | |
32 | 34 | </developers> |
33 | 35 | <build> |
34 | 36 | <sourceDirectory>src</sourceDirectory> |
... | ... | @@ -39,6 +41,24 @@ |
39 | 41 | </resources> |
40 | 42 | <plugins> |
41 | 43 | <plugin> |
44 | + <artifactId>maven-clean-plugin</artifactId> | |
45 | + <version>3.0.0</version> | |
46 | + <configuration> | |
47 | + <filesets> | |
48 | + <fileset> | |
49 | + <directory>gh-pages</directory> | |
50 | + <includes> | |
51 | + <include>**/*</include> | |
52 | + </includes> | |
53 | + <excludes> | |
54 | + <exclude>**/.git/</exclude> | |
55 | + </excludes> | |
56 | + <followSymlinks>false</followSymlinks> | |
57 | + </fileset> | |
58 | + </filesets> | |
59 | + </configuration> | |
60 | + </plugin> | |
61 | + <plugin> | |
42 | 62 | <groupId>org.apache.maven.plugins</groupId> |
43 | 63 | <artifactId>maven-compiler-plugin</artifactId> |
44 | 64 | <version>3.5.1</version> |
... | ... | @@ -46,7 +66,7 @@ |
46 | 66 | <source>1.8</source> |
47 | 67 | <target>1.8</target> |
48 | 68 | </configuration> |
49 | - </plugin> | |
69 | + </plugin> | |
50 | 70 | <plugin> |
51 | 71 | <groupId>org.apache.maven.plugins</groupId> |
52 | 72 | <artifactId>maven-site-plugin</artifactId> |
... | ... | @@ -145,27 +165,27 @@ |
145 | 165 | <dependency> |
146 | 166 | <groupId>org.apache.lucene</groupId> |
147 | 167 | <artifactId>lucene-core</artifactId> |
148 | - <version>6.1.0</version> | |
168 | + <version>6.2.0</version> | |
149 | 169 | </dependency> |
150 | 170 | <dependency> |
151 | 171 | <groupId>org.apache.lucene</groupId> |
152 | 172 | <artifactId>lucene-analyzers-common</artifactId> |
153 | - <version>6.1.0</version> | |
173 | + <version>6.2.0</version> | |
154 | 174 | </dependency> |
155 | 175 | <dependency> |
156 | 176 | <groupId>org.apache.lucene</groupId> |
157 | 177 | <artifactId>lucene-queryparser</artifactId> |
158 | - <version>6.1.0</version> | |
178 | + <version>6.2.0</version> | |
159 | 179 | </dependency> |
160 | 180 | <dependency> |
161 | 181 | <groupId>org.apache.lucene</groupId> |
162 | 182 | <artifactId>lucene-codecs</artifactId> |
163 | - <version>6.1.0</version> | |
183 | + <version>6.2.0</version> | |
164 | 184 | </dependency> |
165 | 185 | <dependency> |
166 | 186 | <groupId>org.apache.solr</groupId> |
167 | 187 | <artifactId>solr-core</artifactId> |
168 | - <version>6.1.0</version> | |
188 | + <version>6.2.0</version> | |
169 | 189 | </dependency> |
170 | 190 | <dependency> |
171 | 191 | <groupId>org.apache.commons</groupId> |
... | ... |
src/mtas/analysis/MtasTokenizer.java
... | ... | @@ -25,8 +25,11 @@ import org.apache.lucene.util.AttributeFactory; |
25 | 25 | |
26 | 26 | /** |
27 | 27 | * The Class MtasTokenizer. |
28 | + * | |
29 | + * @param <T> | |
30 | + * the generic type | |
28 | 31 | */ |
29 | -public final class MtasTokenizer extends Tokenizer { | |
32 | +public final class MtasTokenizer<T> extends Tokenizer { | |
30 | 33 | |
31 | 34 | /** The configuration mtas. */ |
32 | 35 | public static String CONFIGURATION_MTAS = "mtas"; |
... | ... | @@ -73,7 +76,8 @@ public final class MtasTokenizer extends Tokenizer { |
73 | 76 | /** |
74 | 77 | * Instantiates a new mtas tokenizer. |
75 | 78 | * |
76 | - * @param configFileName the config file name | |
79 | + * @param configFileName | |
80 | + * the config file name | |
77 | 81 | */ |
78 | 82 | public MtasTokenizer(String configFileName) { |
79 | 83 | readConfigurationFile(configFileName); |
... | ... | @@ -82,8 +86,10 @@ public final class MtasTokenizer extends Tokenizer { |
82 | 86 | /** |
83 | 87 | * Instantiates a new mtas tokenizer. |
84 | 88 | * |
85 | - * @param config the config | |
86 | - * @throws IOException Signals that an I/O exception has occurred. | |
89 | + * @param config | |
90 | + * the config | |
91 | + * @throws IOException | |
92 | + * Signals that an I/O exception has occurred. | |
87 | 93 | */ |
88 | 94 | public MtasTokenizer(MtasConfiguration config) throws IOException { |
89 | 95 | processConfiguration(config); |
... | ... | @@ -92,8 +98,10 @@ public final class MtasTokenizer extends Tokenizer { |
92 | 98 | /** |
93 | 99 | * Instantiates a new mtas tokenizer. |
94 | 100 | * |
95 | - * @param reader the reader | |
96 | - * @throws IOException Signals that an I/O exception has occurred. | |
101 | + * @param reader | |
102 | + * the reader | |
103 | + * @throws IOException | |
104 | + * Signals that an I/O exception has occurred. | |
97 | 105 | */ |
98 | 106 | public MtasTokenizer(InputStream reader) throws IOException { |
99 | 107 | processConfiguration(MtasConfiguration.readConfiguration(reader)); |
... | ... | @@ -102,9 +110,12 @@ public final class MtasTokenizer extends Tokenizer { |
102 | 110 | /** |
103 | 111 | * Instantiates a new mtas tokenizer. |
104 | 112 | * |
105 | - * @param factory the factory | |
106 | - * @param config the config | |
107 | - * @throws IOException Signals that an I/O exception has occurred. | |
113 | + * @param factory | |
114 | + * the factory | |
115 | + * @param config | |
116 | + * the config | |
117 | + * @throws IOException | |
118 | + * Signals that an I/O exception has occurred. | |
108 | 119 | */ |
109 | 120 | public MtasTokenizer(AttributeFactory factory, MtasConfiguration config) |
110 | 121 | throws IOException { |
... | ... | @@ -112,7 +123,9 @@ public final class MtasTokenizer extends Tokenizer { |
112 | 123 | processConfiguration(config); |
113 | 124 | } |
114 | 125 | |
115 | - /* (non-Javadoc) | |
126 | + /* | |
127 | + * (non-Javadoc) | |
128 | + * | |
116 | 129 | * @see org.apache.lucene.analysis.TokenStream#incrementToken() |
117 | 130 | */ |
118 | 131 | @Override |
... | ... | @@ -128,7 +141,7 @@ public final class MtasTokenizer extends Tokenizer { |
128 | 141 | // compute info |
129 | 142 | positionIncrement = token.getPositionStart() - currentPosition; |
130 | 143 | currentPosition = token.getPositionStart(); |
131 | - payloadEncoder = new MtasPayloadEncoder(token, encodingFlags); | |
144 | + payloadEncoder = new MtasPayloadEncoder(token, encodingFlags); | |
132 | 145 | // set info |
133 | 146 | termAtt.append(token.getValue().toString()); |
134 | 147 | positionIncrementAtt.setPositionIncrement(positionIncrement); |
... | ... | @@ -157,7 +170,6 @@ public final class MtasTokenizer extends Tokenizer { |
157 | 170 | e.getClass().getSimpleName() + ": " + e.getMessage()); |
158 | 171 | } catch (MtasParserException e) { |
159 | 172 | tokenCollectionIterator = null; |
160 | - e.printStackTrace(); | |
161 | 173 | throw new IOException( |
162 | 174 | e.getClass().getSimpleName() + ": " + e.getMessage()); |
163 | 175 | } |
... | ... | @@ -167,14 +179,19 @@ public final class MtasTokenizer extends Tokenizer { |
167 | 179 | /** |
168 | 180 | * Prints the. |
169 | 181 | * |
170 | - * @param r the r | |
171 | - * @throws IOException Signals that an I/O exception has occurred. | |
172 | - * @throws MtasParserException the mtas parser exception | |
182 | + * @param r | |
183 | + * the r | |
184 | + * @throws IOException | |
185 | + * Signals that an I/O exception has occurred. | |
186 | + * @throws MtasParserException | |
187 | + * the mtas parser exception | |
173 | 188 | */ |
174 | 189 | public void print(Reader r) throws IOException, MtasParserException { |
175 | 190 | setReader(r); |
176 | 191 | reset(); |
177 | - tokenCollection.print(); | |
192 | + if (tokenCollection != null) { | |
193 | + tokenCollection.print(); | |
194 | + } | |
178 | 195 | end(); |
179 | 196 | close(); |
180 | 197 | } |
... | ... | @@ -182,10 +199,13 @@ public final class MtasTokenizer extends Tokenizer { |
182 | 199 | /** |
183 | 200 | * Gets the list. |
184 | 201 | * |
185 | - * @param r the r | |
202 | + * @param r | |
203 | + * the r | |
186 | 204 | * @return the list |
187 | - * @throws IOException Signals that an I/O exception has occurred. | |
188 | - * @throws MtasParserException the mtas parser exception | |
205 | + * @throws IOException | |
206 | + * Signals that an I/O exception has occurred. | |
207 | + * @throws MtasParserException | |
208 | + * the mtas parser exception | |
189 | 209 | */ |
190 | 210 | public String[][] getList(Reader r) throws IOException, MtasParserException { |
191 | 211 | setReader(r); |
... | ... | @@ -199,9 +219,12 @@ public final class MtasTokenizer extends Tokenizer { |
199 | 219 | /** |
200 | 220 | * Construct token collection. |
201 | 221 | * |
202 | - * @param reader the reader | |
203 | - * @throws MtasConfigException the mtas config exception | |
204 | - * @throws MtasParserException the mtas parser exception | |
222 | + * @param reader | |
223 | + * the reader | |
224 | + * @throws MtasConfigException | |
225 | + * the mtas config exception | |
226 | + * @throws MtasParserException | |
227 | + * the mtas parser exception | |
205 | 228 | */ |
206 | 229 | private void constructTokenCollection(Reader reader) |
207 | 230 | throws MtasConfigException, MtasParserException { |
... | ... | @@ -216,29 +239,28 @@ public final class MtasTokenizer extends Tokenizer { |
216 | 239 | try { |
217 | 240 | tokenCollection = parser.createTokenCollection(reader); |
218 | 241 | return; |
219 | - } catch (MtasParserException e) { | |
242 | + } catch (MtasParserException e) { | |
220 | 243 | tokenCollection = new MtasTokenCollection(); |
221 | - e.printStackTrace(); | |
222 | 244 | throw new MtasParserException(e.getMessage()); |
223 | 245 | } |
224 | 246 | } else { |
225 | 247 | throw new MtasConfigException("no instance of MtasParser"); |
226 | 248 | } |
227 | 249 | } catch (NoSuchMethodException e) { |
228 | - throw new MtasConfigException(e.getClass().getName() | |
229 | - + " : '" + e.getMessage() + "'"); | |
250 | + throw new MtasConfigException( | |
251 | + e.getClass().getName() + " : '" + e.getMessage() + "'"); | |
230 | 252 | } catch (InvocationTargetException e) { |
231 | - throw new MtasConfigException(e.getClass().getName() | |
232 | - + " : '" + e.getMessage() + "'"); | |
253 | + throw new MtasConfigException( | |
254 | + e.getClass().getName() + " : '" + e.getMessage() + "'"); | |
233 | 255 | } catch (IllegalAccessException e) { |
234 | - throw new MtasConfigException(e.getClass().getName() | |
235 | - + " : '" + e.getMessage() + "'"); | |
256 | + throw new MtasConfigException( | |
257 | + e.getClass().getName() + " : '" + e.getMessage() + "'"); | |
236 | 258 | } catch (ClassNotFoundException e) { |
237 | - throw new MtasConfigException(e.getClass().getName() | |
238 | - + " : '" + e.getMessage() + "'"); | |
259 | + throw new MtasConfigException( | |
260 | + e.getClass().getName() + " : '" + e.getMessage() + "'"); | |
239 | 261 | } catch (InstantiationException e) { |
240 | - throw new MtasConfigException(e.getClass().getName() | |
241 | - + " : '" + e.getMessage() + "'"); | |
262 | + throw new MtasConfigException( | |
263 | + e.getClass().getName() + " : '" + e.getMessage() + "'"); | |
242 | 264 | } |
243 | 265 | |
244 | 266 | } |
... | ... | @@ -246,7 +268,8 @@ public final class MtasTokenizer extends Tokenizer { |
246 | 268 | /** |
247 | 269 | * Read configuration file. |
248 | 270 | * |
249 | - * @param configFile the config file | |
271 | + * @param configFile | |
272 | + * the config file | |
250 | 273 | */ |
251 | 274 | private void readConfigurationFile(String configFile) { |
252 | 275 | InputStream is; |
... | ... | @@ -261,13 +284,13 @@ public final class MtasTokenizer extends Tokenizer { |
261 | 284 | } |
262 | 285 | } |
263 | 286 | |
264 | - | |
265 | - | |
266 | 287 | /** |
267 | 288 | * Process configuration. |
268 | 289 | * |
269 | - * @param config the config | |
270 | - * @throws IOException Signals that an I/O exception has occurred. | |
290 | + * @param config | |
291 | + * the config | |
292 | + * @throws IOException | |
293 | + * Signals that an I/O exception has occurred. | |
271 | 294 | */ |
272 | 295 | private void processConfiguration(MtasConfiguration config) |
273 | 296 | throws IOException { |
... | ... |
src/mtas/analysis/parser/MtasBasicParser.java
... | ... | @@ -165,7 +165,8 @@ abstract public class MtasBasicParser extends MtasParser { |
165 | 165 | /** |
166 | 166 | * Instantiates a new mtas basic parser. |
167 | 167 | * |
168 | - * @param config the config | |
168 | + * @param config | |
169 | + * the config | |
169 | 170 | */ |
170 | 171 | public MtasBasicParser(MtasConfiguration config) { |
171 | 172 | this.config = config; |
... | ... | @@ -174,11 +175,16 @@ abstract public class MtasBasicParser extends MtasParser { |
174 | 175 | /** |
175 | 176 | * Compute mappings from object. |
176 | 177 | * |
177 | - * @param object the object | |
178 | - * @param currentList the current list | |
179 | - * @param updateList the update list | |
180 | - * @throws MtasParserException the mtas parser exception | |
181 | - * @throws MtasConfigException the mtas config exception | |
178 | + * @param object | |
179 | + * the object | |
180 | + * @param currentList | |
181 | + * the current list | |
182 | + * @param updateList | |
183 | + * the update list | |
184 | + * @throws MtasParserException | |
185 | + * the mtas parser exception | |
186 | + * @throws MtasConfigException | |
187 | + * the mtas config exception | |
182 | 188 | */ |
183 | 189 | protected void computeMappingsFromObject(MtasParserObject object, |
184 | 190 | HashMap<String, ArrayList<MtasParserObject>> currentList, |
... | ... | @@ -196,7 +202,6 @@ abstract public class MtasBasicParser extends MtasParser { |
196 | 202 | updateList.get(UPDATE_TYPE_OFFSET).put(tokenId, object.getRefIds()); |
197 | 203 | } |
198 | 204 | } |
199 | - | |
200 | 205 | for (MtasParserMapping<?> mapping : mappings) { |
201 | 206 | try { |
202 | 207 | if (mapping.getTokens().size() == 0) { |
... | ... | @@ -271,9 +276,18 @@ abstract public class MtasBasicParser extends MtasParser { |
271 | 276 | String checkType = object.objectType.getType(); |
272 | 277 | // register id for update when parent is created |
273 | 278 | if (currentList.get(checkType).size() > 0) { |
274 | - currentList.get(checkType) | |
275 | - .get(currentList.get(checkType).size() - 1) | |
276 | - .registerUpdateableMappingAtParent(token.getId()); | |
279 | + if (currentList.get(checkType).contains(object)) { | |
280 | + int listPosition = currentList.get(checkType) | |
281 | + .indexOf(object); | |
282 | + if (listPosition > 0) { | |
283 | + currentList.get(checkType).get(listPosition - 1) | |
284 | + .registerUpdateableMappingAtParent(token.getId()); | |
285 | + } | |
286 | + } else { | |
287 | + currentList.get(checkType) | |
288 | + .get(currentList.get(checkType).size() - 1) | |
289 | + .registerUpdateableMappingAtParent(token.getId()); | |
290 | + } | |
277 | 291 | // if no real ancestor, register id update when group |
278 | 292 | // ancestor is created |
279 | 293 | } else if (currentList.get(MAPPING_TYPE_GROUP).size() > 0) { |
... | ... | @@ -287,7 +301,9 @@ abstract public class MtasBasicParser extends MtasParser { |
287 | 301 | } |
288 | 302 | // update children |
289 | 303 | for (Integer tmpId : object.getUpdateableMappingsAsParent()) { |
290 | - tokenCollection.get(tmpId).setParentId(token.getId()); | |
304 | + if (tokenCollection.get(tmpId) != null) { | |
305 | + tokenCollection.get(tmpId).setParentId(token.getId()); | |
306 | + } | |
291 | 307 | } |
292 | 308 | object.resetUpdateableMappingsAsParent(); |
293 | 309 | // use own position |
... | ... | @@ -372,10 +388,20 @@ abstract public class MtasBasicParser extends MtasParser { |
372 | 388 | } |
373 | 389 | // copy remaining updateableMappings to new parent |
374 | 390 | if (currentList.get(objectType.getType()).size() > 0) { |
375 | - currentList.get(objectType.getType()) | |
376 | - .get(currentList.get(objectType.getType()).size() - 1) | |
377 | - .registerUpdateableMappingsAtParent( | |
378 | - object.getUpdateableMappingsAsParent()); | |
391 | + if (currentList.get(objectType.getType()).contains(object)) { | |
392 | + int listPosition = currentList.get(objectType.getType()) | |
393 | + .indexOf(object); | |
394 | + if (listPosition > 0) { | |
395 | + currentList.get(objectType.getType()).get(listPosition - 1) | |
396 | + .registerUpdateableMappingsAtParent( | |
397 | + object.getUpdateableMappingsAsParent()); | |
398 | + } | |
399 | + } else { | |
400 | + currentList.get(objectType.getType()) | |
401 | + .get(currentList.get(objectType.getType()).size() - 1) | |
402 | + .registerUpdateableMappingsAtParent( | |
403 | + object.getUpdateableMappingsAsParent()); | |
404 | + } | |
379 | 405 | } else if (currentList.get(MAPPING_TYPE_GROUP).size() > 0) { |
380 | 406 | currentList.get(MAPPING_TYPE_GROUP) |
381 | 407 | .get(currentList.get(MAPPING_TYPE_GROUP).size() - 1) |
... | ... | @@ -392,9 +418,11 @@ abstract public class MtasBasicParser extends MtasParser { |
392 | 418 | /** |
393 | 419 | * Compute type from mapping source. |
394 | 420 | * |
395 | - * @param source the source | |
421 | + * @param source | |
422 | + * the source | |
396 | 423 | * @return the string |
397 | - * @throws MtasParserException the mtas parser exception | |
424 | + * @throws MtasParserException | |
425 | + * the mtas parser exception | |
398 | 426 | */ |
399 | 427 | private String computeTypeFromMappingSource(String source) |
400 | 428 | throws MtasParserException { |
... | ... | @@ -423,11 +451,15 @@ abstract public class MtasBasicParser extends MtasParser { |
423 | 451 | /** |
424 | 452 | * Compute object from mapping value. |
425 | 453 | * |
426 | - * @param object the object | |
427 | - * @param mappingValue the mapping value | |
428 | - * @param currentList the current list | |
454 | + * @param object | |
455 | + * the object | |
456 | + * @param mappingValue | |
457 | + * the mapping value | |
458 | + * @param currentList | |
459 | + * the current list | |
429 | 460 | * @return the mtas parser object[] |
430 | - * @throws MtasParserException the mtas parser exception | |
461 | + * @throws MtasParserException | |
462 | + * the mtas parser exception | |
431 | 463 | */ |
432 | 464 | private MtasParserObject[] computeObjectFromMappingValue( |
433 | 465 | MtasParserObject object, HashMap<String, String> mappingValue, |
... | ... | @@ -469,12 +501,17 @@ abstract public class MtasBasicParser extends MtasParser { |
469 | 501 | /** |
470 | 502 | * Compute value from mapping values. |
471 | 503 | * |
472 | - * @param object the object | |
473 | - * @param mappingValues the mapping values | |
474 | - * @param currentList the current list | |
504 | + * @param object | |
505 | + * the object | |
506 | + * @param mappingValues | |
507 | + * the mapping values | |
508 | + * @param currentList | |
509 | + * the current list | |
475 | 510 | * @return the string[] |
476 | - * @throws MtasParserException the mtas parser exception | |
477 | - * @throws MtasConfigException the mtas config exception | |
511 | + * @throws MtasParserException | |
512 | + * the mtas parser exception | |
513 | + * @throws MtasConfigException | |
514 | + * the mtas config exception | |
478 | 515 | */ |
479 | 516 | private String[] computeValueFromMappingValues(MtasParserObject object, |
480 | 517 | ArrayList<HashMap<String, String>> mappingValues, |
... | ... | @@ -486,8 +523,8 @@ abstract public class MtasBasicParser extends MtasParser { |
486 | 523 | if (mappingValue.get("source").equals(MtasParserMapping.SOURCE_STRING)) { |
487 | 524 | if (mappingValue.get("type") |
488 | 525 | .equals(MtasParserMapping.PARSER_TYPE_STRING)) { |
489 | - String subvalue = computeFilteredPrefixedValue(mappingValue.get("type"), | |
490 | - mappingValue.get("text"), null, null); | |
526 | + String subvalue = computeFilteredPrefixedValue( | |
527 | + mappingValue.get("type"), mappingValue.get("text"), null, null); | |
491 | 528 | if (subvalue != null) { |
492 | 529 | for (int i = 0; i < value.length; i++) { |
493 | 530 | value[i] = value[i] + subvalue; |
... | ... | @@ -499,7 +536,7 @@ abstract public class MtasBasicParser extends MtasParser { |
499 | 536 | MtasParserObject[] checkObjects = computeObjectFromMappingValue(object, |
500 | 537 | mappingValue, currentList); |
501 | 538 | // create value |
502 | - if (checkObjects != null) { | |
539 | + if (checkObjects != null && checkObjects.length > 0) { | |
503 | 540 | MtasParserType checkType = checkObjects[0].getType(); |
504 | 541 | // add name to value |
505 | 542 | if (mappingValue.get("type") |
... | ... | @@ -586,11 +623,15 @@ abstract public class MtasBasicParser extends MtasParser { |
586 | 623 | /** |
587 | 624 | * Compute payload from mapping payload. |
588 | 625 | * |
589 | - * @param object the object | |
590 | - * @param mappingPayloads the mapping payloads | |
591 | - * @param currentList the current list | |
626 | + * @param object | |
627 | + * the object | |
628 | + * @param mappingPayloads | |
629 | + * the mapping payloads | |
630 | + * @param currentList | |
631 | + * the current list | |
592 | 632 | * @return the bytes ref |
593 | - * @throws MtasParserException the mtas parser exception | |
633 | + * @throws MtasParserException | |
634 | + * the mtas parser exception | |
594 | 635 | */ |
595 | 636 | private BytesRef computePayloadFromMappingPayload(MtasParserObject object, |
596 | 637 | ArrayList<HashMap<String, String>> mappingPayloads, |
... | ... | @@ -605,7 +646,7 @@ abstract public class MtasBasicParser extends MtasParser { |
605 | 646 | if (mappingPayload.get("text") != null) { |
606 | 647 | BytesRef subpayload = computeMaximumFilteredPayload( |
607 | 648 | mappingPayload.get("text"), payload, null); |
608 | - payload = (subpayload != null) ? subpayload : payload; | |
649 | + payload = (subpayload != null) ? subpayload : payload; | |
609 | 650 | } |
610 | 651 | } |
611 | 652 | // from objects |
... | ... | @@ -637,8 +678,10 @@ abstract public class MtasBasicParser extends MtasParser { |
637 | 678 | /** |
638 | 679 | * Prevalidate object. |
639 | 680 | * |
640 | - * @param object the object | |
641 | - * @param currentList the current list | |
681 | + * @param object | |
682 | + * the object | |
683 | + * @param currentList | |
684 | + * the current list | |
642 | 685 | * @return the boolean |
643 | 686 | */ |
644 | 687 | Boolean prevalidateObject(MtasParserObject object, |
... | ... | @@ -663,10 +706,14 @@ abstract public class MtasBasicParser extends MtasParser { |
663 | 706 | /** |
664 | 707 | * Precheck mapping conditions. |
665 | 708 | * |
666 | - * @param object the object | |
667 | - * @param mappingConditions the mapping conditions | |
668 | - * @param currentList the current list | |
669 | - * @throws MtasParserException the mtas parser exception | |
709 | + * @param object | |
710 | + * the object | |
711 | + * @param mappingConditions | |
712 | + * the mapping conditions | |
713 | + * @param currentList | |
714 | + * the current list | |
715 | + * @throws MtasParserException | |
716 | + * the mtas parser exception | |
670 | 717 | */ |
671 | 718 | void precheckMappingConditions(MtasParserObject object, |
672 | 719 | ArrayList<HashMap<String, String>> mappingConditions, |
... | ... | @@ -771,7 +818,31 @@ abstract public class MtasBasicParser extends MtasParser { |
771 | 818 | // condition on text |
772 | 819 | } else if (mappingCondition.get("type") |
773 | 820 | .equals(MtasParserMapping.PARSER_TYPE_TEXT)) { |
774 | - // can't pre-check this type of condition | |
821 | + // can't pre-check this type of condition, only for group | |
822 | + if (object.getType().precheckText()) { | |
823 | + String textCondition = mappingCondition.get("condition"); | |
824 | + String textValue = object.getText(); | |
825 | + if ((textCondition == null) | |
826 | + && ((textValue == null) || textValue.equals(""))) { | |
827 | + if (!notCondition) { | |
828 | + throw new MtasParserException("no text available"); | |
829 | + } | |
830 | + } else if ((textCondition != null) && (textValue == null)) { | |
831 | + if (!notCondition) { | |
832 | + throw new MtasParserException("condition " + textCondition | |
833 | + + " on text not matched (is null)"); | |
834 | + } | |
835 | + } else if (textCondition != null) { | |
836 | + if (!notCondition && !textCondition.equals(textValue)) { | |
837 | + throw new MtasParserException("condition " + textCondition | |
838 | + + " on text not matched (is " + textValue + ")"); | |
839 | + } else if (notCondition && textCondition.equals(textValue)) { | |
840 | + throw new MtasParserException( | |
841 | + "condition NOT " + textCondition | |
842 | + + " on text not matched (is " + textValue + ")"); | |
843 | + } | |
844 | + } | |
845 | + } | |
775 | 846 | } |
776 | 847 | } |
777 | 848 | } else if (!notCondition) { |
... | ... | @@ -785,10 +856,14 @@ abstract public class MtasBasicParser extends MtasParser { |
785 | 856 | /** |
786 | 857 | * Postcheck mapping conditions. |
787 | 858 | * |
788 | - * @param object the object | |
789 | - * @param mappingConditions the mapping conditions | |
790 | - * @param currentList the current list | |
791 | - * @throws MtasParserException the mtas parser exception | |
859 | + * @param object | |
860 | + * the object | |
861 | + * @param mappingConditions | |
862 | + * the mapping conditions | |
863 | + * @param currentList | |
864 | + * the current list | |
865 | + * @throws MtasParserException | |
866 | + * the mtas parser exception | |
792 | 867 | */ |
793 | 868 | private void postcheckMappingConditions(MtasParserObject object, |
794 | 869 | ArrayList<HashMap<String, String>> mappingConditions, |
... | ... | @@ -835,10 +910,13 @@ abstract public class MtasBasicParser extends MtasParser { |
835 | 910 | /** |
836 | 911 | * Compute filtered split values. |
837 | 912 | * |
838 | - * @param values the values | |
839 | - * @param filter the filter | |
913 | + * @param values | |
914 | + * the values | |
915 | + * @param filter | |
916 | + * the filter | |
840 | 917 | * @return the string[] |
841 | - * @throws MtasConfigException the mtas config exception | |
918 | + * @throws MtasConfigException | |
919 | + * the mtas config exception | |
842 | 920 | */ |
843 | 921 | private String[] computeFilteredSplitValues(String[] values, String filter) |
844 | 922 | throws MtasConfigException { |
... | ... | @@ -847,39 +925,42 @@ abstract public class MtasBasicParser extends MtasParser { |
847 | 925 | boolean[] valuesFilter = new boolean[values.length]; |
848 | 926 | boolean doSplitFilter = false; |
849 | 927 | for (String item : filters) { |
850 | - if (item.trim() | |
851 | - .matches("^"+Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)$")) { | |
928 | + if (item.trim().matches( | |
929 | + "^" + Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)$")) { | |
852 | 930 | doSplitFilter = true; |
853 | - Pattern splitContent = Pattern.compile("^"+Pattern.quote(MAPPING_FILTER_SPLIT) + "\\(([0-9]+)(-([0-9]+))?\\)$"); | |
931 | + Pattern splitContent = Pattern | |
932 | + .compile("^" + Pattern.quote(MAPPING_FILTER_SPLIT) | |
933 | + + "\\(([0-9]+)(-([0-9]+))?\\)$"); | |
854 | 934 | Matcher splitContentMatcher = splitContent.matcher(item.trim()); |
855 | - while(splitContentMatcher.find()) { | |
856 | - if(splitContentMatcher.group(3)==null) { | |
935 | + while (splitContentMatcher.find()) { | |
936 | + if (splitContentMatcher.group(3) == null) { | |
857 | 937 | int i = Integer.parseInt(splitContentMatcher.group(1)); |
858 | - if(i>=0 && i<values.length) { | |
938 | + if (i >= 0 && i < values.length) { | |
859 | 939 | valuesFilter[i] = true; |
860 | - } | |
940 | + } | |
861 | 941 | } else { |
862 | 942 | int i1 = Integer.parseInt(splitContentMatcher.group(1)); |
863 | 943 | int i2 = Integer.parseInt(splitContentMatcher.group(3)); |
864 | - for(int i=Math.max(0, i1); i<Math.min(values.length, i2); i++) { | |
944 | + for (int i = Math.max(0, i1); i < Math.min(values.length, | |
945 | + i2); i++) { | |
865 | 946 | valuesFilter[i] = true; |
866 | 947 | } |
867 | 948 | } |
868 | - } | |
949 | + } | |
869 | 950 | } |
870 | 951 | } |
871 | - if(doSplitFilter) { | |
952 | + if (doSplitFilter) { | |
872 | 953 | int number = 0; |
873 | - for(int i=0;i<valuesFilter.length; i++) { | |
874 | - if(valuesFilter[i]) { | |
954 | + for (int i = 0; i < valuesFilter.length; i++) { | |
955 | + if (valuesFilter[i]) { | |
875 | 956 | number++; |
876 | 957 | } |
877 | 958 | } |
878 | - if(number>0) { | |
959 | + if (number > 0) { | |
879 | 960 | String[] newValues = new String[number]; |
880 | 961 | number = 0; |
881 | - for(int i=0;i<valuesFilter.length; i++) { | |
882 | - if(valuesFilter[i]) { | |
962 | + for (int i = 0; i < valuesFilter.length; i++) { | |
963 | + if (valuesFilter[i]) { | |
883 | 964 | newValues[number] = values[i]; |
884 | 965 | number++; |
885 | 966 | } |
... | ... | @@ -888,7 +969,7 @@ abstract public class MtasBasicParser extends MtasParser { |
888 | 969 | } else { |
889 | 970 | return null; |
890 | 971 | } |
891 | - } | |
972 | + } | |
892 | 973 | } |
893 | 974 | return values; |
894 | 975 | } |
... | ... | @@ -896,12 +977,17 @@ abstract public class MtasBasicParser extends MtasParser { |
896 | 977 | /** |
897 | 978 | * Compute filtered prefixed value. |
898 | 979 | * |
899 | - * @param type the type | |
900 | - * @param value the value | |
901 | - * @param filter the filter | |
902 | - * @param prefix the prefix | |
980 | + * @param type | |
981 | + * the type | |
982 | + * @param value | |
983 | + * the value | |
984 | + * @param filter | |
985 | + * the filter | |
986 | + * @param prefix | |
987 | + * the prefix | |
903 | 988 | * @return the string |
904 | - * @throws MtasConfigException the mtas config exception | |
989 | + * @throws MtasConfigException | |
990 | + * the mtas config exception | |
905 | 991 | */ |
906 | 992 | private String computeFilteredPrefixedValue(String type, String value, |
907 | 993 | String filter, String prefix) throws MtasConfigException { |
... | ... | @@ -926,7 +1012,7 @@ abstract public class MtasBasicParser extends MtasParser { |
926 | 1012 | } |
927 | 1013 | } else if (item.trim() |
928 | 1014 | .matches(Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)")) { |
929 | - if(!type.equals(MtasParserMapping.PARSER_TYPE_TEXT_SPLIT)) { | |
1015 | + if (!type.equals(MtasParserMapping.PARSER_TYPE_TEXT_SPLIT)) { | |
930 | 1016 | throw new MtasConfigException( |
931 | 1017 | "split filter not allowed for " + type); |
932 | 1018 | } |
... | ... | @@ -947,9 +1033,12 @@ abstract public class MtasBasicParser extends MtasParser { |
947 | 1033 | /** |
948 | 1034 | * Compute maximum filtered payload. |
949 | 1035 | * |
950 | - * @param value the value | |
951 | - * @param payload the payload | |
952 | - * @param filter the filter | |
1036 | + * @param value | |
1037 | + * the value | |
1038 | + * @param payload | |
1039 | + * the payload | |
1040 | + * @param filter | |
1041 | + * the filter | |
953 | 1042 | * @return the bytes ref |
954 | 1043 | */ |
955 | 1044 | private BytesRef computeMaximumFilteredPayload(String value, BytesRef payload, |
... | ... | @@ -981,6 +1070,9 @@ abstract public class MtasBasicParser extends MtasParser { |
981 | 1070 | /** The name. */ |
982 | 1071 | private String name; |
983 | 1072 | |
1073 | + /** The precheck text. */ | |
1074 | + protected boolean precheckText; | |
1075 | + | |
984 | 1076 | /** The ref attribute name. */ |
985 | 1077 | private String refAttributeName; |
986 | 1078 | |
... | ... | @@ -990,23 +1082,34 @@ abstract public class MtasBasicParser extends MtasParser { |
990 | 1082 | /** |
991 | 1083 | * Instantiates a new mtas parser type. |
992 | 1084 | * |
993 | - * @param type the type | |
994 | - * @param name the name | |
1085 | + * @param type | |
1086 | + * the type | |
1087 | + * @param name | |
1088 | + * the name | |
1089 | + * @param precheckText | |
1090 | + * the precheck text | |
995 | 1091 | */ |
996 | - MtasParserType(String type, String name) { | |
1092 | + MtasParserType(String type, String name, boolean precheckText) { | |
997 | 1093 | this.type = type; |
998 | 1094 | this.name = name; |
1095 | + this.precheckText = precheckText; | |
999 | 1096 | } |
1000 | 1097 | |
1001 | 1098 | /** |
1002 | 1099 | * Instantiates a new mtas parser type. |
1003 | 1100 | * |
1004 | - * @param type the type | |
1005 | - * @param name the name | |
1006 | - * @param refAttributeName the ref attribute name | |
1101 | + * @param type | |
1102 | + * the type | |
1103 | + * @param name | |
1104 | + * the name | |
1105 | + * @param precheckText | |
1106 | + * the precheck text | |
1107 | + * @param refAttributeName | |
1108 | + * the ref attribute name | |
1007 | 1109 | */ |
1008 | - MtasParserType(String type, String name, String refAttributeName) { | |
1009 | - this(type, name); | |
1110 | + MtasParserType(String type, String name, boolean precheckText, | |
1111 | + String refAttributeName) { | |
1112 | + this(type, name, precheckText); | |
1010 | 1113 | this.refAttributeName = refAttributeName; |
1011 | 1114 | } |
1012 | 1115 | |
... | ... | @@ -1038,9 +1141,19 @@ abstract public class MtasBasicParser extends MtasParser { |
1038 | 1141 | } |
1039 | 1142 | |
1040 | 1143 | /** |
1144 | + * Precheck text. | |
1145 | + * | |
1146 | + * @return true, if successful | |
1147 | + */ | |
1148 | + public boolean precheckText() { | |
1149 | + return precheckText; | |
1150 | + } | |
1151 | + | |
1152 | + /** | |
1041 | 1153 | * Adds the mapping. |
1042 | 1154 | * |
1043 | - * @param mapping the mapping | |
1155 | + * @param mapping | |
1156 | + * the mapping | |
1044 | 1157 | */ |
1045 | 1158 | public void addMapping(MtasParserMapping<?> mapping) { |
1046 | 1159 | mappings.add(mapping); |
... | ... | @@ -1080,7 +1193,8 @@ abstract public class MtasBasicParser extends MtasParser { |
1080 | 1193 | /** |
1081 | 1194 | * Instantiates a new mtas parser mapping token. |
1082 | 1195 | * |
1083 | - * @param tokenType the token type | |
1196 | + * @param tokenType | |
1197 | + * the token type | |
1084 | 1198 | */ |
1085 | 1199 | public MtasParserMappingToken(String tokenType) { |
1086 | 1200 | type = tokenType; |
... | ... | @@ -1095,7 +1209,8 @@ abstract public class MtasBasicParser extends MtasParser { |
1095 | 1209 | /** |
1096 | 1210 | * Sets the offset. |
1097 | 1211 | * |
1098 | - * @param tokenOffset the new offset | |
1212 | + * @param tokenOffset | |
1213 | + * the new offset | |
1099 | 1214 | */ |
1100 | 1215 | public void setOffset(Boolean tokenOffset) { |
1101 | 1216 | offset = tokenOffset; |
... | ... | @@ -1104,7 +1219,8 @@ abstract public class MtasBasicParser extends MtasParser { |
1104 | 1219 | /** |
1105 | 1220 | * Sets the real offset. |
1106 | 1221 | * |
1107 | - * @param tokenRealOffset the new real offset | |
1222 | + * @param tokenRealOffset | |
1223 | + * the new real offset | |
1108 | 1224 | */ |
1109 | 1225 | public void setRealOffset(Boolean tokenRealOffset) { |
1110 | 1226 | realoffset = tokenRealOffset; |
... | ... | @@ -1113,7 +1229,8 @@ abstract public class MtasBasicParser extends MtasParser { |
1113 | 1229 | /** |
1114 | 1230 | * Sets the parent. |
1115 | 1231 | * |
1116 | - * @param tokenParent the new parent | |
1232 | + * @param tokenParent | |
1233 | + * the new parent | |
1117 | 1234 | */ |
1118 | 1235 | public void setParent(Boolean tokenParent) { |
1119 | 1236 | parent = tokenParent; |
... | ... | @@ -1124,7 +1241,8 @@ abstract public class MtasBasicParser extends MtasParser { |
1124 | 1241 | /** |
1125 | 1242 | * The Class MtasParserMapping. |
1126 | 1243 | * |
1127 | - * @param <T> the generic type | |
1244 | + * @param <T> | |
1245 | + * the generic type | |
1128 | 1246 | */ |
1129 | 1247 | protected abstract class MtasParserMapping<T extends MtasParserMapping<T>> { |
1130 | 1248 | |
... | ... | @@ -1216,8 +1334,10 @@ abstract public class MtasBasicParser extends MtasParser { |
1216 | 1334 | /** |
1217 | 1335 | * Process config. |
1218 | 1336 | * |
1219 | - * @param config the config | |
1220 | - * @throws MtasConfigException the mtas config exception | |
1337 | + * @param config | |
1338 | + * the config | |
1339 | + * @throws MtasConfigException | |
1340 | + * the mtas config exception | |
1221 | 1341 | */ |
1222 | 1342 | public void processConfig(MtasConfiguration config) |
1223 | 1343 | throws MtasConfigException { |
... | ... | @@ -1581,7 +1701,8 @@ abstract public class MtasBasicParser extends MtasParser { |
1581 | 1701 | /** |
1582 | 1702 | * Condition unknown ancestor. |
1583 | 1703 | * |
1584 | - * @param number the number | |
1704 | + * @param number | |
1705 | + * the number | |
1585 | 1706 | */ |
1586 | 1707 | private void conditionUnknownAncestor(String number) { |
1587 | 1708 | HashMap<String, String> mapConstructionItem = new HashMap<String, String>(); |
... | ... | @@ -1593,9 +1714,12 @@ abstract public class MtasBasicParser extends MtasParser { |
1593 | 1714 | /** |
1594 | 1715 | * Adds the string. |
1595 | 1716 | * |
1596 | - * @param mappingToken the mapping token | |
1597 | - * @param type the type | |
1598 | - * @param text the text | |
1717 | + * @param mappingToken | |
1718 | + * the mapping token | |
1719 | + * @param type | |
1720 | + * the type | |
1721 | + * @param text | |
1722 | + * the text | |
1599 | 1723 | */ |
1600 | 1724 | private void addString(MtasParserMappingToken mappingToken, String type, |
1601 | 1725 | String text) { |
... | ... | @@ -1613,8 +1737,10 @@ abstract public class MtasBasicParser extends MtasParser { |
1613 | 1737 | /** |
1614 | 1738 | * Payload string. |
1615 | 1739 | * |
1616 | - * @param mappingToken the mapping token | |
1617 | - * @param text the text | |
1740 | + * @param mappingToken | |
1741 | + * the mapping token | |
1742 | + * @param text | |
1743 | + * the text | |
1618 | 1744 | */ |
1619 | 1745 | private void payloadString(MtasParserMappingToken mappingToken, |
1620 | 1746 | String text) { |
... | ... | @@ -1628,10 +1754,14 @@ abstract public class MtasBasicParser extends MtasParser { |
1628 | 1754 | /** |
1629 | 1755 | * Adds the name. |
1630 | 1756 | * |
1631 | - * @param mappingToken the mapping token | |
1632 | - * @param type the type | |
1633 | - * @param prefix the prefix | |
1634 | - * @param filter the filter | |
1757 | + * @param mappingToken | |
1758 | + * the mapping token | |
1759 | + * @param type | |
1760 | + * the type | |
1761 | + * @param prefix | |
1762 | + * the prefix | |
1763 | + * @param filter | |
1764 | + * the filter | |
1635 | 1765 | */ |
1636 | 1766 | private void addName(MtasParserMappingToken mappingToken, String type, |
1637 | 1767 | String prefix, String filter) { |
... | ... | @@ -1650,8 +1780,10 @@ abstract public class MtasBasicParser extends MtasParser { |
1650 | 1780 | /** |
1651 | 1781 | * Condition name. |
1652 | 1782 | * |
1653 | - * @param condition the condition | |
1654 | - * @param not the not | |
1783 | + * @param condition | |
1784 | + * the condition | |
1785 | + * @param not | |
1786 | + * the not | |
1655 | 1787 | */ |
1656 | 1788 | private void conditionName(String condition, String not) { |
1657 | 1789 | HashMap<String, String> mapConstructionItem = new HashMap<String, String>(); |
... | ... | @@ -1665,10 +1797,14 @@ abstract public class MtasBasicParser extends MtasParser { |
1665 | 1797 | /** |
1666 | 1798 | * Adds the text. |
1667 | 1799 | * |
1668 | - * @param mappingToken the mapping token | |
1669 | - * @param type the type | |
1670 | - * @param prefix the prefix | |
1671 | - * @param filter the filter | |
1800 | + * @param mappingToken | |
1801 | + * the mapping token | |
1802 | + * @param type | |
1803 | + * the type | |
1804 | + * @param prefix | |
1805 | + * the prefix | |
1806 | + * @param filter | |
1807 | + * the filter | |
1672 | 1808 | */ |
1673 | 1809 | private void addText(MtasParserMappingToken mappingToken, String type, |
1674 | 1810 | String prefix, String filter) { |
... | ... | @@ -1687,11 +1823,16 @@ abstract public class MtasBasicParser extends MtasParser { |
1687 | 1823 | /** |
1688 | 1824 | * Adds the text split. |
1689 | 1825 | * |
1690 | - * @param mappingToken the mapping token | |
1691 | - * @param type the type | |
1692 | - * @param split the split | |
1693 | - * @param prefix the prefix | |
1694 | - * @param filter the filter | |
1826 | + * @param mappingToken | |
1827 | + * the mapping token | |
1828 | + * @param type | |
1829 | + * the type | |
1830 | + * @param split | |
1831 | + * the split | |
1832 | + * @param prefix | |
1833 | + * the prefix | |
1834 | + * @param filter | |
1835 | + * the filter | |
1695 | 1836 | */ |
1696 | 1837 | private void addTextSplit(MtasParserMappingToken mappingToken, String type, |
1697 | 1838 | String split, String prefix, String filter) { |
... | ... | @@ -1711,9 +1852,12 @@ abstract public class MtasBasicParser extends MtasParser { |
1711 | 1852 | /** |
1712 | 1853 | * Condition text. |
1713 | 1854 | * |
1714 | - * @param condition the condition | |
1715 | - * @param filter the filter | |
1716 | - * @param not the not | |
1855 | + * @param condition | |
1856 | + * the condition | |
1857 | + * @param filter | |
1858 | + * the filter | |
1859 | + * @param not | |
1860 | + * the not | |
1717 | 1861 | */ |
1718 | 1862 | private void conditionText(String condition, String filter, String not) { |
1719 | 1863 | HashMap<String, String> mapConstructionItem = new HashMap<String, String>(); |
... | ... | @@ -1728,8 +1872,10 @@ abstract public class MtasBasicParser extends MtasParser { |
1728 | 1872 | /** |
1729 | 1873 | * Payload text. |
1730 | 1874 | * |
1731 | - * @param mappingToken the mapping token | |
1732 | - * @param filter the filter | |
1875 | + * @param mappingToken | |
1876 | + * the mapping token | |
1877 | + * @param filter | |
1878 | + * the filter | |
1733 | 1879 | */ |
1734 | 1880 | private void payloadText(MtasParserMappingToken mappingToken, |
1735 | 1881 | String filter) { |
... | ... | @@ -1743,11 +1889,16 @@ abstract public class MtasBasicParser extends MtasParser { |
1743 | 1889 | /** |
1744 | 1890 | * Adds the attribute. |
1745 | 1891 | * |
1746 | - * @param mappingToken the mapping token | |
1747 | - * @param type the type | |
1748 | - * @param name the name | |
1749 | - * @param prefix the prefix | |
1750 | - * @param filter the filter | |
1892 | + * @param mappingToken | |
1893 | + * the mapping token | |
1894 | + * @param type | |
1895 | + * the type | |
1896 | + * @param name | |
1897 | + * the name | |
1898 | + * @param prefix | |
1899 | + * the prefix | |
1900 | + * @param filter | |
1901 | + * the filter | |
1751 | 1902 | */ |
1752 | 1903 | private void addAttribute(MtasParserMappingToken mappingToken, String type, |
1753 | 1904 | String name, String prefix, String filter) { |
... | ... | @@ -1769,10 +1920,14 @@ abstract public class MtasBasicParser extends MtasParser { |
1769 | 1920 | /** |
1770 | 1921 | * Condition attribute. |
1771 | 1922 | * |
1772 | - * @param name the name | |
1773 | - * @param condition the condition | |
1774 | - * @param filter the filter | |
1775 | - * @param not the not | |
1923 | + * @param name | |
1924 | + * the name | |
1925 | + * @param condition | |
1926 | + * the condition | |
1927 | + * @param filter | |
1928 | + * the filter | |
1929 | + * @param not | |
1930 | + * the not | |
1776 | 1931 | */ |
1777 | 1932 | private void conditionAttribute(String name, String condition, |
1778 | 1933 | String filter, String not) { |
... | ... | @@ -1791,9 +1946,12 @@ abstract public class MtasBasicParser extends MtasParser { |
1791 | 1946 | /** |
1792 | 1947 | * Payload attribute. |
1793 | 1948 | * |
1794 | - * @param mappingToken the mapping token | |
1795 | - * @param name the name | |
1796 | - * @param filter the filter | |
1949 | + * @param mappingToken | |
1950 | + * the mapping token | |
1951 | + * @param name | |
1952 | + * the name | |
1953 | + * @param filter | |
1954 | + * the filter | |
1797 | 1955 | */ |
1798 | 1956 | private void payloadAttribute(MtasParserMappingToken mappingToken, |
1799 | 1957 | String name, String filter) { |
... | ... | @@ -1808,8 +1966,10 @@ abstract public class MtasBasicParser extends MtasParser { |
1808 | 1966 | /** |
1809 | 1967 | * Condition ancestor. |
1810 | 1968 | * |
1811 | - * @param ancestorType the ancestor type | |
1812 | - * @param number the number | |
1969 | + * @param ancestorType | |
1970 | + * the ancestor type | |
1971 | + * @param number | |
1972 | + * the number | |
1813 | 1973 | */ |
1814 | 1974 | public void conditionAncestor(String ancestorType, String number) { |
1815 | 1975 | if (ancestorType.equals(SOURCE_ANCESTOR_GROUP) |
... | ... | @@ -1829,12 +1989,18 @@ abstract public class MtasBasicParser extends MtasParser { |
1829 | 1989 | /** |
1830 | 1990 | * Adds the ancestor name. |
1831 | 1991 | * |
1832 | - * @param ancestorType the ancestor type | |
1833 | - * @param mappingToken the mapping token | |
1834 | - * @param type the type | |
1835 | - * @param distance the distance | |
1836 | - * @param prefix the prefix | |
1837 | - * @param filter the filter | |
1992 | + * @param ancestorType | |
1993 | + * the ancestor type | |
1994 | + * @param mappingToken | |
1995 | + * the mapping token | |
1996 | + * @param type | |
1997 | + * the type | |
1998 | + * @param distance | |
1999 | + * the distance | |
2000 | + * @param prefix | |
2001 | + * the prefix | |
2002 | + * @param filter | |
2003 | + * the filter | |
1838 | 2004 | */ |
1839 | 2005 | private void addAncestorName(String ancestorType, |
1840 | 2006 | MtasParserMappingToken mappingToken, String type, String distance, |
... | ... | @@ -1862,11 +2028,16 @@ abstract public class MtasBasicParser extends MtasParser { |
1862 | 2028 | /** |
1863 | 2029 | * Condition ancestor name. |
1864 | 2030 | * |
1865 | - * @param ancestorType the ancestor type | |
1866 | - * @param distance the distance | |
1867 | - * @param condition the condition | |
1868 | - * @param filter the filter | |
1869 | - * @param not the not | |
2031 | + * @param ancestorType | |
2032 | + * the ancestor type | |
2033 | + * @param distance | |
2034 | + * the distance | |
2035 | + * @param condition | |
2036 | + * the condition | |
2037 | + * @param filter | |
2038 | + * the filter | |
2039 | + * @param not | |
2040 | + * the not | |
1870 | 2041 | */ |
1871 | 2042 | public void conditionAncestorName(String ancestorType, String distance, |
1872 | 2043 | String condition, String filter, String not) { |
... | ... | @@ -1890,13 +2061,20 @@ abstract public class MtasBasicParser extends MtasParser { |
1890 | 2061 | /** |
1891 | 2062 | * Adds the ancestor attribute. |
1892 | 2063 | * |
1893 | - * @param ancestorType the ancestor type | |
1894 | - * @param mappingToken the mapping token | |
1895 | - * @param type the type | |
1896 | - * @param distance the distance | |
1897 | - * @param name the name | |
1898 | - * @param prefix the prefix | |
1899 | - * @param filter the filter | |
2064 | + * @param ancestorType | |
2065 | + * the ancestor type | |
2066 | + * @param mappingToken | |
2067 | + * the mapping token | |
2068 | + * @param type | |
2069 | + * the type | |
2070 | + * @param distance | |
2071 | + * the distance | |
2072 | + * @param name | |
2073 | + * the name | |
2074 | + * @param prefix | |
2075 | + * the prefix | |
2076 | + * @param filter | |
2077 | + * the filter | |
1900 | 2078 | */ |
1901 | 2079 | public void addAncestorAttribute(String ancestorType, |
1902 | 2080 | MtasParserMappingToken mappingToken, String type, String distance, |
... | ... | @@ -1927,12 +2105,18 @@ abstract public class MtasBasicParser extends MtasParser { |
1927 | 2105 | /** |
1928 | 2106 | * Condition ancestor attribute. |
1929 | 2107 | * |
1930 | - * @param ancestorType the ancestor type | |
1931 | - * @param distance the distance | |
1932 | - * @param name the name | |
1933 | - * @param condition the condition | |
1934 | - * @param filter the filter | |
1935 | - * @param not the not | |
2108 | + * @param ancestorType | |
2109 | + * the ancestor type | |
2110 | + * @param distance | |
2111 | + * the distance | |
2112 | + * @param name | |
2113 | + * the name | |
2114 | + * @param condition | |
2115 | + * the condition | |
2116 | + * @param filter | |
2117 | + * the filter | |
2118 | + * @param not | |
2119 | + * the not | |
1936 | 2120 | */ |
1937 | 2121 | public void conditionAncestorAttribute(String ancestorType, String distance, |
1938 | 2122 | String name, String condition, String filter, String not) { |
... | ... | @@ -1959,11 +2143,16 @@ abstract public class MtasBasicParser extends MtasParser { |
1959 | 2143 | /** |
1960 | 2144 | * Payload ancestor attribute. |
1961 | 2145 | * |
1962 | - * @param mappingToken the mapping token | |
1963 | - * @param ancestorType the ancestor type | |
1964 | - * @param distance the distance | |
1965 | - * @param name the name | |
1966 | - * @param filter the filter | |
2146 | + * @param mappingToken | |
2147 | + * the mapping token | |
2148 | + * @param ancestorType | |
2149 | + * the ancestor type | |
2150 | + * @param distance | |
2151 | + * the distance | |
2152 | + * @param name | |
2153 | + * the name | |
2154 | + * @param filter | |
2155 | + * the filter | |
1967 | 2156 | */ |
1968 | 2157 | private void payloadAncestorAttribute(MtasParserMappingToken mappingToken, |
1969 | 2158 | String ancestorType, String distance, String name, String filter) { |
... | ... | @@ -1988,9 +2177,11 @@ abstract public class MtasBasicParser extends MtasParser { |
1988 | 2177 | /** |
1989 | 2178 | * Compute ancestor source type. |
1990 | 2179 | * |
1991 | - * @param type the type | |
2180 | + * @param type | |
2181 | + * the type | |
1992 | 2182 | * @return the string |
1993 | - * @throws MtasConfigException the mtas config exception | |
2183 | + * @throws MtasConfigException | |
2184 | + * the mtas config exception | |
1994 | 2185 | */ |
1995 | 2186 | private String computeAncestorSourceType(String type) |
1996 | 2187 | throws MtasConfigException { |
... | ... | @@ -2014,7 +2205,8 @@ abstract public class MtasBasicParser extends MtasParser { |
2014 | 2205 | /** |
2015 | 2206 | * Compute distance. |
2016 | 2207 | * |
2017 | - * @param distance the distance | |
2208 | + * @param distance | |
2209 | + * the distance | |
2018 | 2210 | * @return the string |
2019 | 2211 | */ |
2020 | 2212 | private String computeDistance(String distance) { |
... | ... | @@ -2033,7 +2225,8 @@ abstract public class MtasBasicParser extends MtasParser { |
2033 | 2225 | /** |
2034 | 2226 | * Compute number. |
2035 | 2227 | * |
2036 | - * @param number the number | |
2228 | + * @param number | |
2229 | + * the number | |
2037 | 2230 | * @return the string |
2038 | 2231 | */ |
2039 | 2232 | private String computeNumber(String number) { |
... | ... |
src/mtas/analysis/parser/MtasCRMParser.java
0 โ 100644
1 | +package mtas.analysis.parser; | |
2 | + | |
3 | +import java.io.IOException; | |
4 | +import java.io.Reader; | |
5 | +import java.util.ArrayList; | |
6 | +import java.util.Arrays; | |
7 | +import java.util.Collection; | |
8 | +import java.util.HashMap; | |
9 | +import java.util.HashSet; | |
10 | +import java.util.TreeSet; | |
11 | +import java.util.Map.Entry; | |
12 | +import java.util.concurrent.atomic.AtomicInteger; | |
13 | +import java.util.regex.Matcher; | |
14 | +import java.util.regex.Pattern; | |
15 | + | |
16 | +import mtas.analysis.token.MtasToken; | |
17 | +import mtas.analysis.token.MtasTokenCollection; | |
18 | +import mtas.analysis.util.MtasBufferedReader; | |
19 | +import mtas.analysis.util.MtasConfigException; | |
20 | +import mtas.analysis.util.MtasConfiguration; | |
21 | +import mtas.analysis.util.MtasParserException; | |
22 | + | |
23 | +/** | |
24 | + * The Class MtasCRMParser. | |
25 | + */ | |
26 | + | |
27 | +public class MtasCRMParser extends MtasBasicParser { | |
28 | + | |
29 | + /** The word type. */ | |
30 | + private MtasParserType wordType = null; | |
31 | + | |
32 | + /** The word annotation types. */ | |
33 | + private HashMap<String, MtasParserType> wordAnnotationTypes = new HashMap<String, MtasParserType>(); | |
34 | + | |
35 | + /** The crm sentence types. */ | |
36 | + private HashMap<String, MtasParserType> crmSentenceTypes = new HashMap<String, MtasParserType>(); | |
37 | + | |
38 | + /** The crm clause types. */ | |
39 | + private HashMap<String, MtasParserType> crmClauseTypes = new HashMap<String, MtasParserType>(); | |
40 | + | |
41 | + /** The crm pair types. */ | |
42 | + private HashMap<String, MtasParserType> crmPairTypes = new HashMap<String, MtasParserType>(); | |
43 | + | |
44 | + /** The functions. */ | |
45 | + private HashMap<String, HashMap<String, MtasCRMParserFunction>> functions = new HashMap<String, HashMap<String, MtasCRMParserFunction>>(); | |
46 | + | |
47 | + /** The Constant MAPPING_TYPE_CRM_SENTENCE. */ | |
48 | + protected final static String MAPPING_TYPE_CRM_SENTENCE = "crmSentence"; | |
49 | + | |
50 | + /** The Constant MAPPING_TYPE_CRM_CLAUSE. */ | |
51 | + protected final static String MAPPING_TYPE_CRM_CLAUSE = "crmClause"; | |
52 | + | |
53 | + /** The Constant MAPPING_TYPE_CRM_PAIR. */ | |
54 | + protected final static String MAPPING_TYPE_CRM_PAIR = "crmPair"; | |
55 | + | |
56 | + /** The history pair. */ | |
57 | + private HashMap<String, HashMap<String, MtasParserObject>> historyPair = new HashMap<String, HashMap<String, MtasParserObject>>(); | |
58 | + | |
59 | + /** The pair pattern. */ | |
60 | + Pattern pairPattern = Pattern.compile("^([b|e])([a-z])([0-9]+)$"); | |
61 | + | |
62 | + /** | |
63 | + * Instantiates a new mtas crm parser. | |
64 | + * | |
65 | + * @param config | |
66 | + * the config | |
67 | + */ | |
68 | + public MtasCRMParser(MtasConfiguration config) { | |
69 | + super(config); | |
70 | + try { | |
71 | + initParser(); | |
72 | + // System.out.print(printConfig()); | |
73 | + } catch (MtasConfigException e) { | |
74 | + e.printStackTrace(); | |
75 | + } | |
76 | + } | |
77 | + | |
78 | + /* | |
79 | + * (non-Javadoc) | |
80 | + * | |
81 | + * @see mtas.analysis.parser.MtasParser#initParser() | |
82 | + */ | |
83 | + @SuppressWarnings("unchecked") | |
84 | + @Override | |
85 | + protected void initParser() throws MtasConfigException { | |
86 | + super.initParser(); | |
87 | + if (config != null) { | |
88 | + // always word, no mappings | |
89 | + wordType = new MtasParserType(MAPPING_TYPE_WORD, null, false); | |
90 | + for (int i = 0; i < config.children.size(); i++) { | |
91 | + MtasConfiguration current = config.children.get(i); | |
92 | + if (current.name.equals("mappings")) { | |
93 | + for (int j = 0; j < current.children.size(); j++) { | |
94 | + if (current.children.get(j).name.equals("mapping")) { | |
95 | + MtasConfiguration mapping = current.children.get(j); | |
96 | + String typeMapping = mapping.attributes.get("type"); | |
97 | + String nameMapping = mapping.attributes.get("name"); | |
98 | + if ((typeMapping != null)) { | |
99 | + if (typeMapping.equals(MAPPING_TYPE_WORD)) { | |
100 | + MtasCRMParserMappingWordAnnotation m = new MtasCRMParserMappingWordAnnotation(); | |
101 | + m.processConfig(mapping); | |
102 | + wordType.addMapping(m); | |
103 | + } else if (typeMapping.equals(MAPPING_TYPE_WORD_ANNOTATION) | |
104 | + && (nameMapping != null)) { | |
105 | + MtasCRMParserMappingWordAnnotation m = new MtasCRMParserMappingWordAnnotation(); | |
106 | + m.processConfig(mapping); | |
107 | + if (wordAnnotationTypes.containsKey(nameMapping)) { | |
108 | + wordAnnotationTypes.get(nameMapping).addMapping(m); | |
109 | + } else { | |
110 | + MtasParserType t = new MtasParserType(typeMapping, | |
111 | + nameMapping, false); | |
112 | + t.addMapping(m); | |
113 | + wordAnnotationTypes.put(nameMapping, t); | |
114 | + } | |
115 | + } else if (typeMapping.equals(MAPPING_TYPE_CRM_SENTENCE)) { | |
116 | + MtasCRMParserMappingCRMSentence m = new MtasCRMParserMappingCRMSentence(); | |
117 | + m.processConfig(mapping); | |
118 | + if (crmSentenceTypes.containsKey(nameMapping)) { | |
119 | + crmSentenceTypes.get(nameMapping).addMapping(m); | |
120 | + } else { | |
121 | + MtasParserType t = new MtasParserType(MAPPING_TYPE_GROUP, | |
122 | + nameMapping, true); | |
123 | + t.addMapping(m); | |
124 | + crmSentenceTypes.put(nameMapping, t); | |
125 | + } | |
126 | + } else if (typeMapping.equals(MAPPING_TYPE_CRM_CLAUSE)) { | |
127 | + MtasCRMParserMappingCRMSentence m = new MtasCRMParserMappingCRMSentence(); | |
128 | + m.processConfig(mapping); | |
129 | + if (crmClauseTypes.containsKey(nameMapping)) { | |
130 | + crmClauseTypes.get(nameMapping).addMapping(m); | |
131 | + } else { | |
132 | + MtasParserType t = new MtasParserType(MAPPING_TYPE_GROUP, | |
133 | + nameMapping, true); | |
134 | + t.addMapping(m); | |
135 | + crmClauseTypes.put(nameMapping, t); | |
136 | + } | |
137 | + } else if (typeMapping.equals(MAPPING_TYPE_CRM_PAIR)) { | |
138 | + MtasCRMParserMappingCRMPair m = new MtasCRMParserMappingCRMPair(); | |
139 | + m.processConfig(mapping); | |
140 | + if (crmPairTypes.containsKey(nameMapping)) { | |
141 | + crmPairTypes.get(nameMapping).addMapping(m); | |
142 | + } else { | |
143 | + MtasParserType t = new MtasParserType(MAPPING_TYPE_RELATION, | |
144 | + nameMapping, true); | |
145 | + t.addMapping(m); | |
146 | + crmPairTypes.put(nameMapping, t); | |
147 | + } | |
148 | + } else { | |
149 | + throw new MtasConfigException("unknown mapping type " | |
150 | + + typeMapping + " or missing name"); | |
151 | + } | |
152 | + } | |
153 | + } | |
154 | + } | |
155 | + } else if (current.name.equals("functions")) { | |
156 | + for (int j = 0; j < current.children.size(); j++) { | |
157 | + if (current.children.get(j).name.equals("function")) { | |
158 | + MtasConfiguration function = current.children.get(j); | |
159 | + String nameFunction = function.attributes.get("name"); | |
160 | + String typeFunction = function.attributes.get("type"); | |
161 | + String splitFunction = function.attributes.get("split"); | |
162 | + if (nameFunction != null && typeFunction != null) { | |
163 | + MtasCRMParserFunction mtasCRMParserFunction = new MtasCRMParserFunction( | |
164 | + typeFunction, splitFunction); | |
165 | + if (!functions.containsKey(typeFunction)) { | |
166 | + functions.put(typeFunction, | |
167 | + new HashMap<String, MtasCRMParserFunction>()); | |
168 | + } | |
169 | + functions.get(typeFunction).put(nameFunction, | |
170 | + mtasCRMParserFunction); | |
171 | + MtasConfiguration subCurrent = current.children.get(j); | |
172 | + for (int k = 0; k < subCurrent.children.size(); k++) { | |
173 | + if (subCurrent.children.get(k).name.equals("condition")) { | |
174 | + MtasConfiguration subSubCurrent = subCurrent.children | |
175 | + .get(k); | |
176 | + if (subSubCurrent.attributes.containsKey("value")) { | |
177 | + String[] valuesCondition = subSubCurrent.attributes | |
178 | + .get("value").split(Pattern.quote(",")); | |
179 | + ArrayList<MtasCRMParserFunctionOutput> valueOutputList = new ArrayList<MtasCRMParserFunctionOutput>(); | |
180 | + for (int l = 0; l < subSubCurrent.children.size(); l++) { | |
181 | + if (subSubCurrent.children.get(l).name | |
182 | + .equals("output")) { | |
183 | + String valueOutput = subSubCurrent.children | |
184 | + .get(l).attributes.get("value"); | |
185 | + String nameOutput = subSubCurrent.children | |
186 | + .get(l).attributes.get("name"); | |
187 | + if (nameOutput != null) { | |
188 | + MtasCRMParserFunctionOutput o = new MtasCRMParserFunctionOutput( | |
189 | + nameOutput, valueOutput); | |
190 | + valueOutputList.add(o); | |
191 | + } | |
192 | + } | |
193 | + } | |
194 | + if (valueOutputList.size() > 0) { | |
195 | + for (String valueCondition : valuesCondition) { | |
196 | + if (mtasCRMParserFunction.output | |
197 | + .containsKey(valueCondition)) { | |
198 | + mtasCRMParserFunction.output.get(valueCondition) | |
199 | + .addAll( | |
200 | + (Collection<? extends MtasCRMParserFunctionOutput>) valueOutputList | |
201 | + .clone()); | |
202 | + } else { | |
203 | + mtasCRMParserFunction.output.put(valueCondition, | |
204 | + (ArrayList<MtasCRMParserFunctionOutput>) valueOutputList | |
205 | + .clone()); | |
206 | + } | |
207 | + } | |
208 | + } | |
209 | + } | |
210 | + } | |
211 | + } | |
212 | + } | |
213 | + } | |
214 | + } | |
215 | + } | |
216 | + } | |
217 | + } | |
218 | + } | |
219 | + | |
220 | + /* | |
221 | + * (non-Javadoc) | |
222 | + * | |
223 | + * @see mtas.analysis.parser.MtasParser#createTokenCollection(java.io.Reader) | |
224 | + */ | |
225 | + @Override | |
226 | + public MtasTokenCollection createTokenCollection(Reader reader) | |
227 | + throws MtasParserException, MtasConfigException { | |
228 | + AtomicInteger position = new AtomicInteger(0); | |
229 | + Integer unknownAncestors = 0; | |
230 | + | |
231 | + HashMap<String, TreeSet<Integer>> idPositions = new HashMap<String, TreeSet<Integer>>(); | |
232 | + HashMap<String, Integer[]> idOffsets = new HashMap<String, Integer[]>(); | |
233 | + | |
234 | + HashMap<String, HashMap<Integer, HashSet<String>>> updateList = new HashMap<String, HashMap<Integer, HashSet<String>>>(); | |
235 | + updateList.put(UPDATE_TYPE_OFFSET, new HashMap<Integer, HashSet<String>>()); | |
236 | + updateList.put(UPDATE_TYPE_POSITION, | |
237 | + new HashMap<Integer, HashSet<String>>()); | |
238 | + | |
239 | + HashMap<String, ArrayList<MtasParserObject>> currentList = new HashMap<String, ArrayList<MtasParserObject>>(); | |
240 | + currentList.put(MAPPING_TYPE_RELATION, new ArrayList<MtasParserObject>()); | |
241 | + currentList.put(MAPPING_TYPE_RELATION_ANNOTATION, | |
242 | + new ArrayList<MtasParserObject>()); | |
243 | + currentList.put(MAPPING_TYPE_REF, new ArrayList<MtasParserObject>()); | |
244 | + currentList.put(MAPPING_TYPE_GROUP, new ArrayList<MtasParserObject>()); | |
245 | + currentList.put(MAPPING_TYPE_GROUP_ANNOTATION, | |
246 | + new ArrayList<MtasParserObject>()); | |
247 | + currentList.put(MAPPING_TYPE_WORD, new ArrayList<MtasParserObject>()); | |
248 | + currentList.put(MAPPING_TYPE_WORD_ANNOTATION, | |
249 | + new ArrayList<MtasParserObject>()); | |
250 | + | |
251 | + tokenCollection = new MtasTokenCollection(); | |
252 | + MtasToken.resetId(); | |
253 | + try (MtasBufferedReader br = new MtasBufferedReader(reader)) { | |
254 | + String line; | |
255 | + int currentOffset, previousOffset = br.getPosition(); | |
256 | + MtasParserObject currentObject; | |
257 | + Pattern headerPattern = Pattern.compile("^@ @ @(.*)$"); | |
258 | + Pattern regularPattern = Pattern.compile( | |
259 | + "^([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+)$"); | |
260 | + Matcher matcherHeader, matcherRegular = null; | |
261 | + HashSet<MtasParserObject> newPreviousSentence = new HashSet<MtasParserObject>(), | |
262 | + previousSentence = new HashSet<MtasParserObject>(); | |
263 | + HashSet<MtasParserObject> newPreviousClause = new HashSet<MtasParserObject>(), | |
264 | + previousClause = new HashSet<MtasParserObject>(); | |
265 | + while ((line = br.readLine()) != null) { | |
266 | + currentOffset = br.getPosition(); | |
267 | + matcherHeader = headerPattern.matcher(line.trim()); | |
268 | + matcherRegular = regularPattern.matcher(line.trim()); | |
269 | + if (matcherRegular.matches()) { | |
270 | + newPreviousSentence.clear(); | |
271 | + for (int i = 4; i < 8; i++) { | |
272 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>(); | |
273 | + HashSet<MtasParserObject> tmpList = processCRMSentence( | |
274 | + String.valueOf(i), matcherRegular.group((i + 1)), currentOffset, | |
275 | + functionOutputList, unknownAncestors, currentList, updateList, | |
276 | + idPositions, idOffsets, previousSentence, previousClause); | |
277 | + if (tmpList != null) { | |
278 | + newPreviousSentence.addAll(tmpList); | |
279 | + } | |
280 | + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) { | |
281 | + tmpList = processCRMSentence(functionOutput.name, | |
282 | + functionOutput.value, currentOffset, functionOutputList, | |
283 | + unknownAncestors, currentList, updateList, idPositions, | |
284 | + idOffsets, previousSentence, previousClause); | |
285 | + if (tmpList != null) { | |
286 | + newPreviousSentence.addAll(tmpList); | |
287 | + } | |
288 | + } | |
289 | + } | |
290 | + if (newPreviousSentence.size() > 0) { | |
291 | + previousSentence.clear(); | |
292 | + previousSentence.addAll(newPreviousSentence); | |
293 | + } | |
294 | + newPreviousClause.clear(); | |
295 | + for (int i = 4; i < 8; i++) { | |
296 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>(); | |
297 | + HashSet<MtasParserObject> tmpList = processCRMClause( | |
298 | + String.valueOf(i), matcherRegular.group((i + 1)), currentOffset, | |
299 | + functionOutputList, unknownAncestors, currentList, updateList, | |
300 | + idPositions, idOffsets, previousClause); | |
301 | + if (tmpList != null) { | |
302 | + newPreviousClause.addAll(tmpList); | |
303 | + } | |
304 | + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) { | |
305 | + tmpList = processCRMClause(functionOutput.name, | |
306 | + functionOutput.value, currentOffset, functionOutputList, | |
307 | + unknownAncestors, currentList, updateList, idPositions, | |
308 | + idOffsets, previousClause); | |
309 | + if (tmpList != null) { | |
310 | + newPreviousClause.addAll(tmpList); | |
311 | + } | |
312 | + } | |
313 | + } | |
314 | + if (newPreviousClause.size() > 0) { | |
315 | + previousClause.clear(); | |
316 | + previousClause.addAll(newPreviousClause); | |
317 | + } | |
318 | + } | |
319 | + | |
320 | + if (matcherRegular.matches() && !matcherHeader.matches()) { | |
321 | + matcherRegular = regularPattern.matcher(line.trim()); | |
322 | + if (matcherRegular.matches()) { | |
323 | + // regular line - start word | |
324 | + currentObject = new MtasParserObject(wordType); | |
325 | + currentObject.setOffsetStart(previousOffset); | |
326 | + currentObject.setRealOffsetStart(previousOffset); | |
327 | + currentObject.setUnknownAncestorNumber(unknownAncestors); | |
328 | + if (!prevalidateObject(currentObject, currentList)) { | |
329 | + unknownAncestors++; | |
330 | + } else { | |
331 | + int p = position.getAndIncrement(); | |
332 | + currentObject.addPosition(p); | |
333 | + currentObject.objectId = "word_" + String.valueOf(p); | |
334 | + currentList.get(MAPPING_TYPE_WORD).add(currentObject); | |
335 | + unknownAncestors = 0; | |
336 | + // check for crmPair | |
337 | + for (int i = 0; i < 8; i++) { | |
338 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>(); | |
339 | + processCRMPair(p, String.valueOf(i), | |
340 | + matcherRegular.group((i + 1)), currentOffset, | |
341 | + functionOutputList, unknownAncestors, currentList, | |
342 | + updateList, idPositions, idOffsets); | |
343 | + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) { | |
344 | + processCRMPair(p, functionOutput.name, functionOutput.value, | |
345 | + currentOffset, functionOutputList, unknownAncestors, | |
346 | + currentList, updateList, idPositions, idOffsets); | |
347 | + } | |
348 | + } | |
349 | + // compute word annotations | |
350 | + for (int i = 0; i < 8; i++) { | |
351 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>(); | |
352 | + functionOutputList.addAll(processWordAnnotation( | |
353 | + String.valueOf(i), matcherRegular.group((i + 1)), | |
354 | + previousOffset, currentOffset, unknownAncestors, | |
355 | + currentList, updateList, idPositions, idOffsets)); | |
356 | + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) { | |
357 | + processWordAnnotation(functionOutput.name, | |
358 | + functionOutput.value, previousOffset, currentOffset, | |
359 | + unknownAncestors, currentList, updateList, idPositions, | |
360 | + idOffsets); | |
361 | + } | |
362 | + } | |
363 | + } | |
364 | + // finish word | |
365 | + if (unknownAncestors > 0) { | |
366 | + unknownAncestors--; | |
367 | + } else { | |
368 | + currentObject = currentList.get(MAPPING_TYPE_WORD) | |
369 | + .remove(currentList.get(MAPPING_TYPE_WORD).size() - 1); | |
370 | + assert unknownAncestors == 0 : "error in administration " | |
371 | + + currentObject.getType().getName(); | |
372 | + currentObject.setText(null); | |
373 | + currentObject.setOffsetEnd(currentOffset - 1); | |
374 | + currentObject.setRealOffsetEnd(currentOffset - 1); | |
375 | + // update ancestor groups with position and offset | |
376 | + for (MtasParserObject currentGroup : currentList | |
377 | + .get(MAPPING_TYPE_GROUP)) { | |
378 | + currentGroup.addPositions(currentObject.getPositions()); | |
379 | + currentGroup.addOffsetStart(currentObject.getOffsetStart()); | |
380 | + currentGroup.addOffsetEnd(currentObject.getOffsetEnd()); | |
381 | + } | |
382 | + idPositions.put(currentObject.getId(), | |
383 | + currentObject.getPositions()); | |
384 | + idOffsets.put(currentObject.getId(), currentObject.getOffset()); | |
385 | + currentObject.updateMappings(idPositions, idOffsets); | |
386 | + unknownAncestors = currentObject.getUnknownAncestorNumber(); | |
387 | + computeMappingsFromObject(currentObject, currentList, updateList); | |
388 | + } | |
389 | + | |
390 | + } else { | |
391 | + // System.out.println("PROBLEM: " + line); | |
392 | + } | |
393 | + } | |
394 | + previousOffset = br.getPosition(); | |
395 | + } | |
396 | + closePrevious(previousSentence, previousOffset, unknownAncestors, | |
397 | + currentList, updateList, idPositions, idOffsets); | |
398 | + closePrevious(previousClause, previousOffset, unknownAncestors, | |
399 | + currentList, updateList, idPositions, idOffsets); | |
400 | + } catch (IOException e) { | |
401 | + throw new MtasParserException(e.getMessage()); | |
402 | + } | |
403 | + // final check | |
404 | + tokenCollection.check(autorepair, makeunique); | |
405 | + return tokenCollection; | |
406 | + | |
407 | + } | |
408 | + | |
409 | + /** | |
410 | + * Process word annotation. | |
411 | + * | |
412 | + * @param name | |
413 | + * the name | |
414 | + * @param text | |
415 | + * the text | |
416 | + * @param previousOffset | |
417 | + * the previous offset | |
418 | + * @param currentOffset | |
419 | + * the current offset | |
420 | + * @param unknownAncestors | |
421 | + * the unknown ancestors | |
422 | + * @param currentList | |
423 | + * the current list | |
424 | + * @param updateList | |
425 | + * the update list | |
426 | + * @param idPositions | |
427 | + * the id positions | |
428 | + * @param idOffsets | |
429 | + * the id offsets | |
430 | + * @return the array list | |
431 | + * @throws MtasParserException | |
432 | + * the mtas parser exception | |
433 | + * @throws MtasConfigException | |
434 | + * the mtas config exception | |
435 | + */ | |
436 | + private ArrayList<MtasCRMParserFunctionOutput> processWordAnnotation( | |
437 | + String name, String text, Integer previousOffset, Integer currentOffset, | |
438 | + Integer unknownAncestors, | |
439 | + HashMap<String, ArrayList<MtasParserObject>> currentList, | |
440 | + HashMap<String, HashMap<Integer, HashSet<String>>> updateList, | |
441 | + HashMap<String, TreeSet<Integer>> idPositions, | |
442 | + HashMap<String, Integer[]> idOffsets) | |
443 | + throws MtasParserException, MtasConfigException { | |
444 | + MtasParserType tmpCurrentType; | |
445 | + MtasParserObject currentObject; | |
446 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>(); | |
447 | + if ((tmpCurrentType = wordAnnotationTypes.get(name)) != null) { | |
448 | + // start word annotation | |
449 | + currentObject = new MtasParserObject(tmpCurrentType); | |
450 | + currentObject.setRealOffsetStart(previousOffset); | |
451 | + currentObject.addPositions(currentList.get(MAPPING_TYPE_WORD) | |
452 | + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1)).getPositions()); | |
453 | + currentObject.setUnknownAncestorNumber(unknownAncestors); | |
454 | + if (!prevalidateObject(currentObject, currentList)) { | |
455 | + unknownAncestors++; | |
456 | + } else { | |
457 | + currentList.get(MAPPING_TYPE_WORD_ANNOTATION).add(currentObject); | |
458 | + unknownAncestors = 0; | |
459 | + } | |
460 | + // finish word annotation | |
461 | + if (unknownAncestors > 0) { | |
462 | + unknownAncestors--; | |
463 | + } else { | |
464 | + currentObject = currentList.get(MAPPING_TYPE_WORD_ANNOTATION) | |
465 | + .remove(currentList.get(MAPPING_TYPE_WORD_ANNOTATION).size() - 1); | |
466 | + assert unknownAncestors == 0 : "error in administration " | |
467 | + + currentObject.getType().getName(); | |
468 | + if (functions.containsKey(MAPPING_TYPE_WORD_ANNOTATION) | |
469 | + && functions.get(MAPPING_TYPE_WORD_ANNOTATION).containsKey(name) | |
470 | + && text != null) { | |
471 | + MtasCRMParserFunction function = functions | |
472 | + .get(MAPPING_TYPE_WORD_ANNOTATION).get(name); | |
473 | + String[] value; | |
474 | + if (function.split != null) { | |
475 | + value = text.split(Pattern.quote(function.split)); | |
476 | + } else { | |
477 | + value = new String[] { text }; | |
478 | + } | |
479 | + for (int c = 0; c < value.length; c++) { | |
480 | + if (function.output.containsKey(value[c])) { | |
481 | + functionOutputList.addAll(function.output.get(value[c])); | |
482 | + } | |
483 | + } | |
484 | + } | |
485 | + currentObject.setText(text); | |
486 | + currentObject.setRealOffsetEnd(currentOffset - 1); | |
487 | + idPositions.put(currentObject.getId(), currentObject.getPositions()); | |
488 | + idOffsets.put(currentObject.getId(), currentObject.getOffset()); | |
489 | + // offset always null, so update later with word (should be possible) | |
490 | + if ((currentObject.getId() != null) | |
491 | + && (currentList.get(MAPPING_TYPE_WORD).size() > 0)) { | |
492 | + currentList.get(MAPPING_TYPE_WORD) | |
493 | + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1)) | |
494 | + .addUpdateableIdWithOffset(currentObject.getId()); | |
495 | + } | |
496 | + currentObject.updateMappings(idPositions, idOffsets); | |
497 | + unknownAncestors = currentObject.getUnknownAncestorNumber(); | |
498 | + computeMappingsFromObject(currentObject, currentList, updateList); | |
499 | + } | |
500 | + } | |
501 | + return functionOutputList; | |
502 | + } | |
503 | + | |
504 | + /** | |
505 | + * Process crm sentence. | |
506 | + * | |
507 | + * @param name | |
508 | + * the name | |
509 | + * @param text | |
510 | + * the text | |
511 | + * @param currentOffset | |
512 | + * the current offset | |
513 | + * @param functionOutputList | |
514 | + * the function output list | |
515 | + * @param unknownAncestors | |
516 | + * the unknown ancestors | |
517 | + * @param currentList | |
518 | + * the current list | |
519 | + * @param updateList | |
520 | + * the update list | |
521 | + * @param idPositions | |
522 | + * the id positions | |
523 | + * @param idOffsets | |
524 | + * the id offsets | |
525 | + * @param previous | |
526 | + * the previous | |
527 | + * @param previousClause | |
528 | + * the previous clause | |
529 | + * @return the hash set | |
530 | + * @throws MtasParserException | |
531 | + * the mtas parser exception | |
532 | + * @throws MtasConfigException | |
533 | + * the mtas config exception | |
534 | + */ | |
535 | + private HashSet<MtasParserObject> processCRMSentence(String name, String text, | |
536 | + Integer currentOffset, | |
537 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList, | |
538 | + Integer unknownAncestors, | |
539 | + HashMap<String, ArrayList<MtasParserObject>> currentList, | |
540 | + HashMap<String, HashMap<Integer, HashSet<String>>> updateList, | |
541 | + HashMap<String, TreeSet<Integer>> idPositions, | |
542 | + HashMap<String, Integer[]> idOffsets, HashSet<MtasParserObject> previous, | |
543 | + HashSet<MtasParserObject> previousClause) | |
544 | + throws MtasParserException, MtasConfigException { | |
545 | + MtasParserType tmpCurrentType; | |
546 | + MtasParserObject currentObject; | |
547 | + if ((tmpCurrentType = crmSentenceTypes.get(name)) != null) { | |
548 | + currentObject = new MtasParserObject(tmpCurrentType); | |
549 | + currentObject.setUnknownAncestorNumber(unknownAncestors); | |
550 | + currentObject.setRealOffsetStart(currentOffset); | |
551 | + currentObject.setText(text); | |
552 | + if (!prevalidateObject(currentObject, currentList)) { | |
553 | + return null; | |
554 | + } else { | |
555 | + closePrevious(previousClause, currentOffset, unknownAncestors, | |
556 | + currentList, updateList, idPositions, idOffsets); | |
557 | + closePrevious(previous, currentOffset, unknownAncestors, currentList, | |
558 | + updateList, idPositions, idOffsets); | |
559 | + previous.clear(); | |
560 | + currentList.get(MAPPING_TYPE_GROUP).add(currentObject); | |
561 | + unknownAncestors = 0; | |
562 | + return new HashSet<MtasParserObject>(Arrays.asList(currentObject)); | |
563 | + } | |
564 | + } | |
565 | + return null; | |
566 | + } | |
567 | + | |
568 | + /** | |
569 | + * Process crm clause. | |
570 | + * | |
571 | + * @param name | |
572 | + * the name | |
573 | + * @param text | |
574 | + * the text | |
575 | + * @param currentOffset | |
576 | + * the current offset | |
577 | + * @param functionOutputList | |
578 | + * the function output list | |
579 | + * @param unknownAncestors | |
580 | + * the unknown ancestors | |
581 | + * @param currentList | |
582 | + * the current list | |
583 | + * @param updateList | |
584 | + * the update list | |
585 | + * @param idPositions | |
586 | + * the id positions | |
587 | + * @param idOffsets | |
588 | + * the id offsets | |
589 | + * @param previous | |
590 | + * the previous | |
591 | + * @return the hash set | |
592 | + * @throws MtasParserException | |
593 | + * the mtas parser exception | |
594 | + * @throws MtasConfigException | |
595 | + * the mtas config exception | |
596 | + */ | |
597 | + private HashSet<MtasParserObject> processCRMClause(String name, String text, | |
598 | + Integer currentOffset, | |
599 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList, | |
600 | + Integer unknownAncestors, | |
601 | + HashMap<String, ArrayList<MtasParserObject>> currentList, | |
602 | + HashMap<String, HashMap<Integer, HashSet<String>>> updateList, | |
603 | + HashMap<String, TreeSet<Integer>> idPositions, | |
604 | + HashMap<String, Integer[]> idOffsets, HashSet<MtasParserObject> previous) | |
605 | + throws MtasParserException, MtasConfigException { | |
606 | + MtasParserType tmpCurrentType; | |
607 | + MtasParserObject currentObject; | |
608 | + if ((tmpCurrentType = crmClauseTypes.get(name)) != null) { | |
609 | + currentObject = new MtasParserObject(tmpCurrentType); | |
610 | + currentObject.setUnknownAncestorNumber(unknownAncestors); | |
611 | + currentObject.setRealOffsetStart(currentOffset); | |
612 | + currentObject.setText(text); | |
613 | + if (!prevalidateObject(currentObject, currentList)) { | |
614 | + return null; | |
615 | + } else { | |
616 | + closePrevious(previous, currentOffset, unknownAncestors, currentList, | |
617 | + updateList, idPositions, idOffsets); | |
618 | + previous.clear(); | |
619 | + currentList.get(MAPPING_TYPE_GROUP).add(currentObject); | |
620 | + unknownAncestors = 0; | |
621 | + return new HashSet<MtasParserObject>(Arrays.asList(currentObject)); | |
622 | + } | |
623 | + } | |
624 | + return null; | |
625 | + } | |
626 | + | |
627 | + /** | |
628 | + * Close previous. | |
629 | + * | |
630 | + * @param previous | |
631 | + * the previous | |
632 | + * @param currentOffset | |
633 | + * the current offset | |
634 | + * @param unknownAncestors | |
635 | + * the unknown ancestors | |
636 | + * @param currentList | |
637 | + * the current list | |
638 | + * @param updateList | |
639 | + * the update list | |
640 | + * @param idPositions | |
641 | + * the id positions | |
642 | + * @param idOffsets | |
643 | + * the id offsets | |
644 | + * @throws MtasParserException | |
645 | + * the mtas parser exception | |
646 | + * @throws MtasConfigException | |
647 | + * the mtas config exception | |
648 | + */ | |
649 | + private void closePrevious(HashSet<MtasParserObject> previous, | |
650 | + Integer currentOffset, Integer unknownAncestors, | |
651 | + HashMap<String, ArrayList<MtasParserObject>> currentList, | |
652 | + HashMap<String, HashMap<Integer, HashSet<String>>> updateList, | |
653 | + HashMap<String, TreeSet<Integer>> idPositions, | |
654 | + HashMap<String, Integer[]> idOffsets) | |
655 | + throws MtasParserException, MtasConfigException { | |
656 | + for (MtasParserObject previousObject : previous) { | |
657 | + previousObject.setRealOffsetEnd(currentOffset); | |
658 | + idPositions.put(previousObject.getId(), previousObject.getPositions()); | |
659 | + idOffsets.put(previousObject.getId(), previousObject.getOffset()); | |
660 | + previousObject.updateMappings(idPositions, idOffsets); | |
661 | + unknownAncestors = previousObject.getUnknownAncestorNumber(); | |
662 | + computeMappingsFromObject(previousObject, currentList, updateList); | |
663 | + currentList.get(MAPPING_TYPE_GROUP).remove(previousObject); | |
664 | + } | |
665 | + } | |
666 | + | |
667 | + /** | |
668 | + * Process crm pair. | |
669 | + * | |
670 | + * @param position | |
671 | + * the position | |
672 | + * @param name | |
673 | + * the name | |
674 | + * @param text | |
675 | + * the text | |
676 | + * @param currentOffset | |
677 | + * the current offset | |
678 | + * @param functionOutputList | |
679 | + * the function output list | |
680 | + * @param unknownAncestors | |
681 | + * the unknown ancestors | |
682 | + * @param currentList | |
683 | + * the current list | |
684 | + * @param updateList | |
685 | + * the update list | |
686 | + * @param idPositions | |
687 | + * the id positions | |
688 | + * @param idOffsets | |
689 | + * the id offsets | |
690 | + * @throws MtasParserException | |
691 | + * the mtas parser exception | |
692 | + * @throws MtasConfigException | |
693 | + * the mtas config exception | |
694 | + */ | |
695 | + private void processCRMPair(int position, String name, String text, | |
696 | + Integer currentOffset, | |
697 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList, | |
698 | + Integer unknownAncestors, | |
699 | + HashMap<String, ArrayList<MtasParserObject>> currentList, | |
700 | + HashMap<String, HashMap<Integer, HashSet<String>>> updateList, | |
701 | + HashMap<String, TreeSet<Integer>> idPositions, | |
702 | + HashMap<String, Integer[]> idOffsets) | |
703 | + throws MtasParserException, MtasConfigException { | |
704 | + | |
705 | + MtasParserType tmpCurrentType; | |
706 | + MtasParserObject currentObject; | |
707 | + | |
708 | + if ((tmpCurrentType = crmPairTypes.get(name)) != null) { | |
709 | + if ((tmpCurrentType = crmPairTypes.get(name)) != null) { | |
710 | + // get history | |
711 | + HashMap<String, MtasParserObject> currentNamePairHistory; | |
712 | + if (!historyPair.containsKey(name)) { | |
713 | + currentNamePairHistory = new HashMap<String, MtasParserObject>(); | |
714 | + historyPair.put(name, currentNamePairHistory); | |
715 | + } else { | |
716 | + currentNamePairHistory = historyPair.get(name); | |
717 | + } | |
718 | + Matcher m = pairPattern.matcher(text); | |
719 | + if (m.find()) { | |
720 | + String thisKey = m.group(1) + m.group(2); | |
721 | + String otherKey = (m.group(1).equals("b") ? "e" : "b") + m.group(2); | |
722 | + if (currentNamePairHistory.containsKey(otherKey)) { | |
723 | + currentObject = currentNamePairHistory.remove(otherKey); | |
724 | + currentObject.setText(currentObject.getText() + "+" + text); | |
725 | + currentObject.addPosition(position); | |
726 | + processFunctions(name, text, MAPPING_TYPE_CRM_PAIR, | |
727 | + functionOutputList); | |
728 | + currentObject.setRealOffsetEnd(currentOffset + 1); | |
729 | + currentObject.setOffsetEnd(currentOffset + 1); | |
730 | + idPositions.put(currentObject.getId(), | |
731 | + currentObject.getPositions()); | |
732 | + idOffsets.put(currentObject.getId(), currentObject.getOffset()); | |
733 | + currentObject.updateMappings(idPositions, idOffsets); | |
734 | + unknownAncestors = currentObject.getUnknownAncestorNumber(); | |
735 | + computeMappingsFromObject(currentObject, currentList, updateList); | |
736 | + } else { | |
737 | + currentObject = new MtasParserObject(tmpCurrentType); | |
738 | + currentObject.setUnknownAncestorNumber(unknownAncestors); | |
739 | + currentObject.setRealOffsetStart(currentOffset); | |
740 | + currentObject.setOffsetStart(currentOffset); | |
741 | + currentObject.setText(text); | |
742 | + currentObject.addPosition(position); | |
743 | + if (!prevalidateObject(currentObject, currentList)) { | |
744 | + unknownAncestors++; | |
745 | + } else { | |
746 | + currentNamePairHistory.put(thisKey, currentObject); | |
747 | + processFunctions(name, text, MAPPING_TYPE_CRM_PAIR, | |
748 | + functionOutputList); | |
749 | + currentObject.setRealOffsetEnd(currentOffset + 1); | |
750 | + currentObject.setOffsetEnd(currentOffset + 1); | |
751 | + idPositions.put(currentObject.getId(), | |
752 | + currentObject.getPositions()); | |
753 | + idOffsets.put(currentObject.getId(), currentObject.getOffset()); | |
754 | + // offset always null, so update later with word (should be | |
755 | + // possible) | |
756 | + if ((currentObject.getId() != null) | |
757 | + && (currentList.get(MAPPING_TYPE_WORD).size() > 0)) { | |
758 | + currentList.get(MAPPING_TYPE_WORD) | |
759 | + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1)) | |
760 | + .addUpdateableIdWithOffset(currentObject.getId()); | |
761 | + } | |
762 | + | |
763 | + } | |
764 | + } | |
765 | + } | |
766 | + } | |
767 | + } | |
768 | + | |
769 | + } | |
770 | + | |
771 | + /** | |
772 | + * Process functions. | |
773 | + * | |
774 | + * @param name | |
775 | + * the name | |
776 | + * @param text | |
777 | + * the text | |
778 | + * @param type | |
779 | + * the type | |
780 | + * @param functionOutputList | |
781 | + * the function output list | |
782 | + */ | |
783 | + private void processFunctions(String name, String text, String type, | |
784 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList) { | |
785 | + if (functions.containsKey(type) && functions.get(type).containsKey(name) | |
786 | + && text != null) { | |
787 | + if (functions.get(type).containsKey(name)) { | |
788 | + MtasCRMParserFunction function = functions.get(type).get(name); | |
789 | + String[] value; | |
790 | + if (function.split != null) { | |
791 | + value = text.split(Pattern.quote(function.split)); | |
792 | + } else { | |
793 | + value = new String[] { text }; | |
794 | + } | |
795 | + for (int c = 0; c < value.length; c++) { | |
796 | + boolean checkedEmpty = false; | |
797 | + if (value[c].equals("")) { | |
798 | + checkedEmpty = true; | |
799 | + } | |
800 | + if (function.output.containsKey(value[c])) { | |
801 | + ArrayList<MtasCRMParserFunctionOutput> list = function.output | |
802 | + .get(value[c]); | |
803 | + for (MtasCRMParserFunctionOutput listItem : list) { | |
804 | + functionOutputList.add(listItem.create(value[c])); | |
805 | + } | |
806 | + } | |
807 | + if (!checkedEmpty && function.output.containsKey("")) { | |
808 | + ArrayList<MtasCRMParserFunctionOutput> list = function.output | |
809 | + .get(""); | |
810 | + for (MtasCRMParserFunctionOutput listItem : list) { | |
811 | + functionOutputList.add(listItem.create(value[c])); | |
812 | + } | |
813 | + } | |
814 | + } | |
815 | + } | |
816 | + } | |
817 | + } | |
818 | + | |
819 | + /* | |
820 | + * (non-Javadoc) | |
821 | + * | |
822 | + * @see mtas.analysis.parser.MtasParser#printConfig() | |
823 | + */ | |
824 | + @Override | |
825 | + public String printConfig() { | |
826 | + String text = ""; | |
827 | + text += "=== CONFIGURATION ===\n"; | |
828 | + text += "type: " + wordAnnotationTypes.size() + " x wordAnnotation"; | |
829 | + text += printConfigTypes(wordAnnotationTypes); | |
830 | + text += "=== CONFIGURATION ===\n"; | |
831 | + return text; | |
832 | + } | |
833 | + | |
834 | + /** | |
835 | + * Prints the config types. | |
836 | + * | |
837 | + * @param types | |
838 | + * the types | |
839 | + * @return the string | |
840 | + */ | |
841 | + private String printConfigTypes(HashMap<?, MtasParserType> types) { | |
842 | + String text = ""; | |
843 | + for (Entry<?, MtasParserType> entry : types.entrySet()) { | |
844 | + text += "- " + entry.getKey() + ": " + entry.getValue().mappings.size() | |
845 | + + " mapping(s)\n"; | |
846 | + for (int i = 0; i < entry.getValue().mappings.size(); i++) { | |
847 | + text += "\t" + entry.getValue().mappings.get(i) + "\n"; | |
848 | + } | |
849 | + } | |
850 | + return text; | |
851 | + } | |
852 | + | |
853 | + /** | |
854 | + * The Class MtasCRMParserFunction. | |
855 | + */ | |
856 | + private class MtasCRMParserFunction { | |
857 | + | |
858 | + /** The split. */ | |
859 | + public String split; | |
860 | + | |
861 | + /** The output. */ | |
862 | + public HashMap<String, ArrayList<MtasCRMParserFunctionOutput>> output; | |
863 | + | |
864 | + /** | |
865 | + * Instantiates a new mtas crm parser function. | |
866 | + * | |
867 | + * @param type | |
868 | + * the type | |
869 | + * @param split | |
870 | + * the split | |
871 | + */ | |
872 | + public MtasCRMParserFunction(String type, String split) { | |
873 | + this.split = split; | |
874 | + output = new HashMap<String, ArrayList<MtasCRMParserFunctionOutput>>(); | |
875 | + } | |
876 | + | |
877 | + } | |
878 | + | |
879 | + /** | |
880 | + * The Class MtasCRMParserFunctionOutput. | |
881 | + */ | |
882 | + private class MtasCRMParserFunctionOutput { | |
883 | + | |
884 | + /** The name. */ | |
885 | + public String name; | |
886 | + | |
887 | + /** The value. */ | |
888 | + public String value; | |
889 | + | |
890 | + /** | |
891 | + * Instantiates a new mtas crm parser function output. | |
892 | + * | |
893 | + * @param name | |
894 | + * the name | |
895 | + * @param value | |
896 | + * the value | |
897 | + */ | |
898 | + public MtasCRMParserFunctionOutput(String name, String value) { | |
899 | + this.name = name; | |
900 | + this.value = value; | |
901 | + } | |
902 | + | |
903 | + /** | |
904 | + * Creates the. | |
905 | + * | |
906 | + * @param originalValue | |
907 | + * the original value | |
908 | + * @return the mtas crm parser function output | |
909 | + */ | |
910 | + public MtasCRMParserFunctionOutput create(String originalValue) { | |
911 | + if (value != null) { | |
912 | + return this; | |
913 | + } else { | |
914 | + return new MtasCRMParserFunctionOutput(name, originalValue); | |
915 | + } | |
916 | + } | |
917 | + | |
918 | + /* | |
919 | + * (non-Javadoc) | |
920 | + * | |
921 | + * @see java.lang.Object#toString() | |
922 | + */ | |
923 | + @Override | |
924 | + public String toString() { | |
925 | + return "MtasCRMParserFunctionOutput[" + name + "," + value + "]"; | |
926 | + } | |
927 | + } | |
928 | + | |
929 | + /** | |
930 | + * The Class MtasCRMParserMappingWordAnnotation. | |
931 | + */ | |
932 | + private class MtasCRMParserMappingWordAnnotation | |
933 | + extends MtasParserMapping<MtasCRMParserMappingWordAnnotation> { | |
934 | + | |
935 | + /** | |
936 | + * Instantiates a new mtas crm parser mapping word annotation. | |
937 | + */ | |
938 | + public MtasCRMParserMappingWordAnnotation() { | |
939 | + super(); | |
940 | + this.position = SOURCE_OWN; | |
941 | + this.realOffset = SOURCE_OWN; | |
942 | + this.offset = SOURCE_ANCESTOR_WORD; | |
943 | + this.type = MAPPING_TYPE_WORD_ANNOTATION; | |
944 | + } | |
945 | + | |
946 | + /* | |
947 | + * (non-Javadoc) | |
948 | + * | |
949 | + * @see mtas.analysis.parser.MtasParser.MtasParserMapping#self() | |
950 | + */ | |
951 | + @Override | |
952 | + protected MtasCRMParserMappingWordAnnotation self() { | |
953 | + return this; | |
954 | + } | |
955 | + } | |
956 | + | |
957 | + /** | |
958 | + * The Class MtasCRMParserMappingCRMSentence. | |
959 | + */ | |
960 | + private class MtasCRMParserMappingCRMSentence | |
961 | + extends MtasParserMapping<MtasCRMParserMappingCRMSentence> { | |
962 | + | |
963 | + /** | |
964 | + * Instantiates a new mtas crm parser mapping crm sentence. | |
965 | + */ | |
966 | + public MtasCRMParserMappingCRMSentence() { | |
967 | + super(); | |
968 | + this.position = SOURCE_OWN; | |
969 | + this.realOffset = SOURCE_OWN; | |
970 | + this.offset = SOURCE_OWN; | |
971 | + this.type = MAPPING_TYPE_GROUP; | |
972 | + } | |
973 | + | |
974 | + /* | |
975 | + * (non-Javadoc) | |
976 | + * | |
977 | + * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self() | |
978 | + */ | |
979 | + @Override | |
980 | + protected MtasCRMParserMappingCRMSentence self() { | |
981 | + return this; | |
982 | + } | |
983 | + } | |
984 | + | |
985 | + /** | |
986 | + * The Class MtasCRMParserMappingCRMPair. | |
987 | + */ | |
988 | + private class MtasCRMParserMappingCRMPair | |
989 | + extends MtasParserMapping<MtasCRMParserMappingCRMPair> { | |
990 | + | |
991 | + /** | |
992 | + * Instantiates a new mtas crm parser mapping crm pair. | |
993 | + */ | |
994 | + public MtasCRMParserMappingCRMPair() { | |
995 | + super(); | |
996 | + this.position = SOURCE_OWN; | |
997 | + this.realOffset = SOURCE_OWN; | |
998 | + this.offset = SOURCE_OWN; | |
999 | + this.type = MAPPING_TYPE_RELATION; | |
1000 | + } | |
1001 | + | |
1002 | + /* | |
1003 | + * (non-Javadoc) | |
1004 | + * | |
1005 | + * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self() | |
1006 | + */ | |
1007 | + @Override | |
1008 | + protected MtasCRMParserMappingCRMPair self() { | |
1009 | + return this; | |
1010 | + } | |
1011 | + } | |
1012 | + | |
1013 | +} | |
... | ... |
src/mtas/analysis/parser/MtasElanParser.java
... | ... | @@ -17,10 +17,12 @@ final public class MtasElanParser extends MtasXMLParser { |
17 | 17 | * @param config the config |
18 | 18 | */ |
19 | 19 | public MtasElanParser(MtasConfiguration config) { |
20 | - super(config); | |
20 | + super(config); | |
21 | 21 | } |
22 | - | |
23 | - /* (non-Javadoc) | |
22 | + | |
23 | + /* | |
24 | + * (non-Javadoc) | |
25 | + * | |
24 | 26 | * @see mtas.analysis.parser.MtasXMLParser#initParser() |
25 | 27 | */ |
26 | 28 | @Override |
... | ... |
src/mtas/analysis/parser/MtasFoliaParser.java
... | ... | @@ -17,10 +17,12 @@ final public class MtasFoliaParser extends MtasXMLParser { |
17 | 17 | * @param config the config |
18 | 18 | */ |
19 | 19 | public MtasFoliaParser(MtasConfiguration config) { |
20 | - super(config); | |
20 | + super(config); | |
21 | 21 | } |
22 | - | |
23 | - /* (non-Javadoc) | |
22 | + | |
23 | + /* | |
24 | + * (non-Javadoc) | |
25 | + * | |
24 | 26 | * @see mtas.analysis.parser.MtasXMLParser#initParser() |
25 | 27 | */ |
26 | 28 | @Override |
... | ... |
src/mtas/analysis/parser/MtasParser.java
... | ... | @@ -15,16 +15,19 @@ import mtas.analysis.util.MtasParserException; |
15 | 15 | * The Class MtasParser. |
16 | 16 | */ |
17 | 17 | abstract public class MtasParser { |
18 | - | |
18 | + | |
19 | 19 | /** The token collection. */ |
20 | 20 | protected MtasTokenCollection tokenCollection; |
21 | - | |
21 | + | |
22 | 22 | /** The config. */ |
23 | 23 | protected MtasConfiguration config; |
24 | 24 | |
25 | 25 | /** The autorepair. */ |
26 | 26 | protected Boolean autorepair = false; |
27 | - | |
27 | + | |
28 | + /** The makeunique. */ | |
29 | + protected Boolean makeunique = false; | |
30 | + | |
28 | 31 | /** |
29 | 32 | * Inits the parser. |
30 | 33 | * |
... | ... | @@ -38,10 +41,13 @@ abstract public class MtasParser { |
38 | 41 | if (current.name.equals("autorepair")) { |
39 | 42 | autorepair = current.attributes.get("value").equals("true"); |
40 | 43 | } |
44 | + if (current.name.equals("makeunique")) { | |
45 | + makeunique = current.attributes.get("value").equals("true"); | |
46 | + } | |
41 | 47 | } |
42 | 48 | } |
43 | 49 | } |
44 | - | |
50 | + | |
45 | 51 | /** |
46 | 52 | * Creates the token collection. |
47 | 53 | * |
... | ... | @@ -59,7 +65,7 @@ abstract public class MtasParser { |
59 | 65 | * @return the string |
60 | 66 | */ |
61 | 67 | public abstract String printConfig(); |
62 | - | |
68 | + | |
63 | 69 | /** |
64 | 70 | * The Class MtasParserObject. |
65 | 71 | */ |
... | ... | @@ -259,17 +265,17 @@ abstract public class MtasParser { |
259 | 265 | public void setText(String text) { |
260 | 266 | objectText = text; |
261 | 267 | } |
262 | - | |
268 | + | |
263 | 269 | /** |
264 | 270 | * Adds the text. |
265 | 271 | * |
266 | 272 | * @param text the text |
267 | 273 | */ |
268 | 274 | public void addText(String text) { |
269 | - if(objectText==null) { | |
275 | + if (objectText == null) { | |
270 | 276 | objectText = text; |
271 | 277 | } else { |
272 | - objectText+=text; | |
278 | + objectText += text; | |
273 | 279 | } |
274 | 280 | } |
275 | 281 | |
... | ... |
src/mtas/analysis/parser/MtasSketchParser.java
... | ... | @@ -53,7 +53,9 @@ final public class MtasSketchParser extends MtasBasicParser { |
53 | 53 | } |
54 | 54 | } |
55 | 55 | |
56 | - /* (non-Javadoc) | |
56 | + /* | |
57 | + * (non-Javadoc) | |
58 | + * | |
57 | 59 | * @see mtas.analysis.parser.MtasParser#initParser() |
58 | 60 | */ |
59 | 61 | @Override |
... | ... | @@ -62,7 +64,7 @@ final public class MtasSketchParser extends MtasBasicParser { |
62 | 64 | if (config != null) { |
63 | 65 | |
64 | 66 | // always word, no mappings |
65 | - wordType = new MtasParserType(MAPPING_TYPE_WORD, null); | |
67 | + wordType = new MtasParserType(MAPPING_TYPE_WORD, null, false); | |
66 | 68 | |
67 | 69 | for (int i = 0; i < config.children.size(); i++) { |
68 | 70 | MtasConfiguration current = config.children.get(i); |
... | ... | @@ -74,7 +76,7 @@ final public class MtasSketchParser extends MtasBasicParser { |
74 | 76 | String nameMapping = mapping.attributes.get("name"); |
75 | 77 | if ((typeMapping != null)) { |
76 | 78 | if (typeMapping.equals(MAPPING_TYPE_WORD)) { |
77 | - MtasSketchParserMappingWordAnnotation m = new MtasSketchParserMappingWordAnnotation(); | |
79 | + MtasSketchParserMappingWord m = new MtasSketchParserMappingWord(); | |
78 | 80 | m.processConfig(mapping); |
79 | 81 | wordType.addMapping(m); |
80 | 82 | } else if (typeMapping.equals(MAPPING_TYPE_WORD_ANNOTATION) |
... | ... | @@ -85,7 +87,7 @@ final public class MtasSketchParser extends MtasBasicParser { |
85 | 87 | wordAnnotationTypes.get(nameMapping).addMapping(m); |
86 | 88 | } else { |
87 | 89 | MtasParserType t = new MtasParserType(typeMapping, |
88 | - nameMapping); | |
90 | + nameMapping, false); | |
89 | 91 | t.addMapping(m); |
90 | 92 | wordAnnotationTypes.put(Integer.parseInt(nameMapping), t); |
91 | 93 | } |
... | ... | @@ -97,7 +99,7 @@ final public class MtasSketchParser extends MtasBasicParser { |
97 | 99 | groupTypes.get(nameMapping).addMapping(m); |
98 | 100 | } else { |
99 | 101 | MtasParserType t = new MtasParserType(typeMapping, |
100 | - nameMapping); | |
102 | + nameMapping, false); | |
101 | 103 | t.addMapping(m); |
102 | 104 | groupTypes.put(nameMapping, t); |
103 | 105 | } |
... | ... | @@ -113,7 +115,9 @@ final public class MtasSketchParser extends MtasBasicParser { |
113 | 115 | } |
114 | 116 | } |
115 | 117 | |
116 | - /* (non-Javadoc) | |
118 | + /* | |
119 | + * (non-Javadoc) | |
120 | + * | |
117 | 121 | * @see mtas.analysis.parser.MtasParser#createTokenCollection(java.io.Reader) |
118 | 122 | */ |
119 | 123 | @Override |
... | ... | @@ -337,11 +341,13 @@ final public class MtasSketchParser extends MtasBasicParser { |
337 | 341 | } |
338 | 342 | } |
339 | 343 | // final check |
340 | - tokenCollection.check(autorepair); | |
344 | + tokenCollection.check(autorepair, makeunique); | |
341 | 345 | return tokenCollection; |
342 | 346 | } |
343 | 347 | |
344 | - /* (non-Javadoc) | |
348 | + /* | |
349 | + * (non-Javadoc) | |
350 | + * | |
345 | 351 | * @see mtas.analysis.parser.MtasParser#printConfig() |
346 | 352 | */ |
347 | 353 | @Override |
... | ... | @@ -373,6 +379,34 @@ final public class MtasSketchParser extends MtasBasicParser { |
373 | 379 | } |
374 | 380 | |
375 | 381 | /** |
382 | + * The Class MtasSketchParserMappingWord. | |
383 | + */ | |
384 | + private class MtasSketchParserMappingWord | |
385 | + extends MtasParserMapping<MtasSketchParserMappingWord> { | |
386 | + | |
387 | + /** | |
388 | + * Instantiates a new mtas sketch parser mapping word. | |
389 | + */ | |
390 | + public MtasSketchParserMappingWord() { | |
391 | + super(); | |
392 | + this.position = SOURCE_OWN; | |
393 | + this.realOffset = SOURCE_OWN; | |
394 | + this.offset = SOURCE_OWN; | |
395 | + this.type = MAPPING_TYPE_WORD; | |
396 | + } | |
397 | + | |
398 | + /* | |
399 | + * (non-Javadoc) | |
400 | + * | |
401 | + * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self() | |
402 | + */ | |
403 | + @Override | |
404 | + protected MtasSketchParserMappingWord self() { | |
405 | + return this; | |
406 | + } | |
407 | + } | |
408 | + | |
409 | + /** | |
376 | 410 | * The Class MtasSketchParserMappingWordAnnotation. |
377 | 411 | */ |
378 | 412 | private class MtasSketchParserMappingWordAnnotation |
... | ... |
src/mtas/analysis/parser/MtasTEIParser.java
... | ... | @@ -17,10 +17,12 @@ final public class MtasTEIParser extends MtasXMLParser { |
17 | 17 | * @param config the config |
18 | 18 | */ |
19 | 19 | public MtasTEIParser(MtasConfiguration config) { |
20 | - super(config); | |
20 | + super(config); | |
21 | 21 | } |
22 | - | |
23 | - /* (non-Javadoc) | |
22 | + | |
23 | + /* | |
24 | + * (non-Javadoc) | |
25 | + * | |
24 | 26 | * @see mtas.analysis.parser.MtasXMLParser#initParser() |
25 | 27 | */ |
26 | 28 | @Override |
... | ... |