Commit 8148835a47574769c6705c325af983b18bc83f6d
1 parent
229cfad6
update
Showing
209 changed files
with
27681 additions
and
10521 deletions
Too many changes to show.
To preserve performance only 29 of 209 files are displayed.
conf/parser/mtas.xml
1 | <?xml version="1.0" encoding="UTF-8" ?> | 1 | <?xml version="1.0" encoding="UTF-8" ?> |
2 | <mtas> | 2 | <mtas> |
3 | <configurations type="mtas.analysis.util.MtasTokenizerFactory"> | 3 | <configurations type="mtas.analysis.util.MtasTokenizerFactory"> |
4 | + <configuration name="test" file="mtas/folia_test.xml" /> | ||
5 | + <configuration name="CRM" file="mtas/crm_test.xml" /> | ||
4 | <configuration name="DBNL" file="mtas/folia_dbnl.xml" /> | 6 | <configuration name="DBNL" file="mtas/folia_dbnl.xml" /> |
7 | + <configuration name="DDD" file="mtas/folia_ddd.xml" /> | ||
5 | <configuration name="EDBO" file="mtas/folia_edbo.xml" /> | 8 | <configuration name="EDBO" file="mtas/folia_edbo.xml" /> |
6 | <configuration name="SONAR" file="mtas/folia_sonar.xml" /> | 9 | <configuration name="SONAR" file="mtas/folia_sonar.xml" /> |
7 | </configurations> | 10 | </configurations> |
8 | <configurations type="mtas.analysis.util.MtasCharFilterFactory"> | 11 | <configurations type="mtas.analysis.util.MtasCharFilterFactory"> |
12 | + <configuration name="test" type="file" /> | ||
13 | + <configuration name="CRM" type="file" prefix="/Users/matthijs/Software/Mtas/data/CRM/data/files/" postfix=".txt" /> | ||
9 | <configuration name="DBNL" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> | 14 | <configuration name="DBNL" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> |
15 | + <configuration name="DDD" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> | ||
10 | <configuration name="EDBO" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> | 16 | <configuration name="EDBO" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> |
11 | <configuration name="SONAR" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> | 17 | <configuration name="SONAR" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> |
12 | </configurations> | 18 | </configurations> |
conf/parser/mtas/crm_test.xml
0 โ 100644
1 | +<?xml version="1.0" encoding="UTF-8" ?> | ||
2 | +<mtas> | ||
3 | + | ||
4 | + <!-- START MTAS INDEX CONFIGURATION --> | ||
5 | + <index> | ||
6 | + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS --> | ||
7 | + <payload index="false" /> | ||
8 | + <offset index="false" /> | ||
9 | + <realoffset index="false" /> | ||
10 | + <parent index="true" /> | ||
11 | + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS --> | ||
12 | + </index> | ||
13 | + <!-- END MTAS INDEX CONFIGURATION --> | ||
14 | + | ||
15 | + | ||
16 | + | ||
17 | + <!-- START CONFIGURATION MTAS FOLIA PARSER --> | ||
18 | + <parser name="mtas.analysis.parser.MtasCRMParser"> | ||
19 | + | ||
20 | + <!-- START GENERAL SETTINGS MTAS PARSER --> | ||
21 | + <autorepair value="true" /> | ||
22 | + <makeunique value="true" /> | ||
23 | + <!-- END GENERAL SETTINGS MTAS PARSER --> | ||
24 | + | ||
25 | + <mappings> | ||
26 | + | ||
27 | + <mapping type="word"> | ||
28 | + </mapping> | ||
29 | + | ||
30 | + <mapping type="wordAnnotation" name="0"> | ||
31 | + <token type="string" offset="false" parent="false"> | ||
32 | + <pre> | ||
33 | + <item type="string" value="t" /> | ||
34 | + </pre> | ||
35 | + <post> | ||
36 | + <item type="text" /> | ||
37 | + </post> | ||
38 | + </token> | ||
39 | + </mapping> | ||
40 | + <mapping type="wordAnnotation" name="0"> | ||
41 | + <token type="string" offset="false" parent="false"> | ||
42 | + <pre> | ||
43 | + <item type="string" value="t_lc" /> | ||
44 | + </pre> | ||
45 | + <post> | ||
46 | + <item type="text" filter="ascii,lowercase" /> | ||
47 | + </post> | ||
48 | + </token> | ||
49 | + </mapping> | ||
50 | + <mapping type="wordAnnotation" name="1"> | ||
51 | + <token type="string" offset="false" parent="false"> | ||
52 | + <pre> | ||
53 | + <item type="string" value="t1" /> | ||
54 | + </pre> | ||
55 | + <post> | ||
56 | + <item type="text" /> | ||
57 | + </post> | ||
58 | + </token> | ||
59 | + </mapping> | ||
60 | + <mapping type="wordAnnotation" name="1"> | ||
61 | + <token type="string" offset="false" parent="false"> | ||
62 | + <pre> | ||
63 | + <item type="string" value="t1_lc" /> | ||
64 | + </pre> | ||
65 | + <post> | ||
66 | + <item type="text" filter="ascii,lowercase" /> | ||
67 | + </post> | ||
68 | + </token> | ||
69 | + </mapping> | ||
70 | + <mapping type="wordAnnotation" name="2"> | ||
71 | + <token type="string" offset="false" parent="false"> | ||
72 | + <pre> | ||
73 | + <item type="string" value="t2" /> | ||
74 | + </pre> | ||
75 | + <post> | ||
76 | + <item type="text" /> | ||
77 | + </post> | ||
78 | + </token> | ||
79 | + </mapping> | ||
80 | + <mapping type="wordAnnotation" name="2"> | ||
81 | + <token type="string" offset="false" parent="false"> | ||
82 | + <pre> | ||
83 | + <item type="string" value="t2_lc" /> | ||
84 | + </pre> | ||
85 | + <post> | ||
86 | + <item type="text" filter="ascii,lowercase" /> | ||
87 | + </post> | ||
88 | + </token> | ||
89 | + </mapping> | ||
90 | + <mapping type="wordAnnotation" name="3"> | ||
91 | + <token type="string" offset="false" parent="false"> | ||
92 | + <pre> | ||
93 | + <item type="string" value="lemma" /> | ||
94 | + </pre> | ||
95 | + <post> | ||
96 | + <item type="text" /> | ||
97 | + </post> | ||
98 | + </token> | ||
99 | + </mapping> | ||
100 | + <mapping type="wordAnnotation" name="4"> | ||
101 | + <token type="string" offset="false" parent="false"> | ||
102 | + <pre> | ||
103 | + <item type="string" value="crm" /> | ||
104 | + </pre> | ||
105 | + <post> | ||
106 | + <item type="text" /> | ||
107 | + </post> | ||
108 | + </token> | ||
109 | + </mapping> | ||
110 | + | ||
111 | + <mapping type="crmPair" name="6"> | ||
112 | + <condition> | ||
113 | + <item type="text" not="true" condition="-" /> | ||
114 | + </condition> | ||
115 | + </mapping> | ||
116 | + <mapping type="crmPair" name="part"> | ||
117 | + <token type="string" offset="false" parent="false"> | ||
118 | + <pre> | ||
119 | + <item type="name" /> | ||
120 | + </pre> | ||
121 | + <post> | ||
122 | + <item type="text" /> | ||
123 | + </post> | ||
124 | + </token> | ||
125 | + </mapping> | ||
126 | + | ||
127 | + <mapping type="crmSentence" name="7"> | ||
128 | + <token type="string" offset="false" parent="false"> | ||
129 | + <pre> | ||
130 | + <item type="string" value="s"/> | ||
131 | + </pre> | ||
132 | + <post> | ||
133 | + <item type="text" /> | ||
134 | + </post> | ||
135 | + </token> | ||
136 | + <condition> | ||
137 | + <item type="text" not="true" condition="-" /> | ||
138 | + <item type="text" not="true" condition="2" /> | ||
139 | + <item type="text" not="true" condition="4" /> | ||
140 | + <item type="text" not="true" condition="5" /> | ||
141 | + <item type="text" not="true" condition="6" /> | ||
142 | + <item type="text" not="true" condition="8" /> | ||
143 | + </condition> | ||
144 | + </mapping> | ||
145 | + <mapping type="crmClause" name="7"> | ||
146 | + <token type="string" offset="false" parent="false"> | ||
147 | + <pre> | ||
148 | + <item type="string" value="sc"/> | ||
149 | + </pre> | ||
150 | + <post> | ||
151 | + <item type="text" /> | ||
152 | + </post> | ||
153 | + </token> | ||
154 | + <condition> | ||
155 | + <item type="text" not="true" condition="-" /> | ||
156 | + <item type="text" not="true" condition="0" /> | ||
157 | + <item type="text" not="true" condition="1" /> | ||
158 | + </condition> | ||
159 | + </mapping> | ||
160 | + <mapping type="crmClause" name="7"> | ||
161 | + <condition> | ||
162 | + <item type="text" not="true" condition="-" /> | ||
163 | + </condition> | ||
164 | + </mapping> | ||
165 | + | ||
166 | + <mapping type="wordAnnotation" name="pos"> | ||
167 | + <token type="string" offset="false" parent="false"> | ||
168 | + <pre> | ||
169 | + <item type="name" /> | ||
170 | + </pre> | ||
171 | + <post> | ||
172 | + <item type="text" /> | ||
173 | + </post> | ||
174 | + </token> | ||
175 | + </mapping> | ||
176 | + <mapping type="wordAnnotation" name="feat.getal"> | ||
177 | + <token type="string" offset="false" parent="false"> | ||
178 | + <pre> | ||
179 | + <item type="name" /> | ||
180 | + </pre> | ||
181 | + <post> | ||
182 | + <item type="text" /> | ||
183 | + </post> | ||
184 | + </token> | ||
185 | + </mapping> | ||
186 | + <mapping type="wordAnnotation" name="feat.persoon"> | ||
187 | + <token type="string" offset="false" parent="false"> | ||
188 | + <pre> | ||
189 | + <item type="name" /> | ||
190 | + </pre> | ||
191 | + <post> | ||
192 | + <item type="text" /> | ||
193 | + </post> | ||
194 | + </token> | ||
195 | + </mapping> | ||
196 | + <mapping type="wordAnnotation" name="feat.ntype"> | ||
197 | + <token type="string" offset="false" parent="false"> | ||
198 | + <pre> | ||
199 | + <item type="name" /> | ||
200 | + </pre> | ||
201 | + <post> | ||
202 | + <item type="text" /> | ||
203 | + </post> | ||
204 | + </token> | ||
205 | + </mapping> | ||
206 | + <mapping type="wordAnnotation" name="feat.pvtijd"> | ||
207 | + <token type="string" offset="false" parent="false"> | ||
208 | + <pre> | ||
209 | + <item type="name" /> | ||
210 | + </pre> | ||
211 | + <post> | ||
212 | + <item type="text" /> | ||
213 | + </post> | ||
214 | + </token> | ||
215 | + </mapping> | ||
216 | + <mapping type="wordAnnotation" name="feat.wvorm"> | ||
217 | + <token type="string" offset="false" parent="false"> | ||
218 | + <pre> | ||
219 | + <item type="name" /> | ||
220 | + </pre> | ||
221 | + <post> | ||
222 | + <item type="text" /> | ||
223 | + </post> | ||
224 | + </token> | ||
225 | + </mapping> | ||
226 | + <mapping type="wordAnnotation" name="feat.numtype"> | ||
227 | + <token type="string" offset="false" parent="false"> | ||
228 | + <pre> | ||
229 | + <item type="name" /> | ||
230 | + </pre> | ||
231 | + <post> | ||
232 | + <item type="text" /> | ||
233 | + </post> | ||
234 | + </token> | ||
235 | + </mapping> | ||
236 | + <mapping type="wordAnnotation" name="feat.vwtype"> | ||
237 | + <token type="string" offset="false" parent="false"> | ||
238 | + <pre> | ||
239 | + <item type="name" /> | ||
240 | + </pre> | ||
241 | + <post> | ||
242 | + <item type="text" /> | ||
243 | + </post> | ||
244 | + </token> | ||
245 | + </mapping> | ||
246 | + <mapping type="wordAnnotation" name="feat.lwtype"> | ||
247 | + <token type="string" offset="false" parent="false"> | ||
248 | + <pre> | ||
249 | + <item type="name" /> | ||
250 | + </pre> | ||
251 | + <post> | ||
252 | + <item type="text" /> | ||
253 | + </post> | ||
254 | + </token> | ||
255 | + </mapping> | ||
256 | + <mapping type="wordAnnotation" name="feat.form"> | ||
257 | + <token type="string" offset="false" parent="false"> | ||
258 | + <pre> | ||
259 | + <item type="name" /> | ||
260 | + </pre> | ||
261 | + <post> | ||
262 | + <item type="text" /> | ||
263 | + </post> | ||
264 | + </token> | ||
265 | + </mapping> | ||
266 | + <mapping type="wordAnnotation" name="feat.probleemgeval"> | ||
267 | + <token type="string" offset="false" parent="false"> | ||
268 | + <pre> | ||
269 | + <item type="name" /> | ||
270 | + </pre> | ||
271 | + <post> | ||
272 | + <item type="text" /> | ||
273 | + </post> | ||
274 | + </token> | ||
275 | + </mapping> | ||
276 | + </mappings> | ||
277 | + | ||
278 | + <functions> | ||
279 | + <function type="crmPair" name="6" split="+"> | ||
280 | + <condition value=""> | ||
281 | + <output name="part" /> | ||
282 | + </condition> | ||
283 | + </function> | ||
284 | + <function type="wordAnnotation" name="4" split="+"> | ||
285 | + <condition value="000,001,002,003,004,005,006,009"> | ||
286 | + <output name="pos" value="N" /> | ||
287 | + <output name="feat.getal" value="ev" /> | ||
288 | + </condition> | ||
289 | + <condition value="010,011,012,013,014,015,016,019"> | ||
290 | + <output name="pos" value="N" /> | ||
291 | + <output name="feat.getal" value="mv" /> | ||
292 | + </condition> | ||
293 | + <condition value="020,021,022,023,024,025,026,029"> | ||
294 | + <output name="pos" value="N" /> | ||
295 | + <output name="feat.ntype" value="eigen" /> | ||
296 | + </condition> | ||
297 | + <condition value="090,091,092,093,094,095,096,099"> | ||
298 | + <output name="pos" value="N" /> | ||
299 | + <output name="feat.probleemgeval" /> | ||
300 | + </condition> | ||
301 | + <condition value="100,101,102,103,104,105,106,109"> | ||
302 | + <output name="pos" value="ADJ" /> | ||
303 | + <output name="feat.getal" value="ev" /> | ||
304 | + </condition> | ||
305 | + <condition value="110,111,112,113,114,115,116,119"> | ||
306 | + <output name="pos" value="ADJ" /> | ||
307 | + <output name="feat.getal" value="mv" /> | ||
308 | + </condition> | ||
309 | + <condition value="190,191,192,193,194,195,196,199"> | ||
310 | + <output name="pos" value="ADJ" /> | ||
311 | + <output name="feat.probleemgeval" /> | ||
312 | + </condition> | ||
313 | + | ||
314 | + <condition value="200,201,202,203,204,205,206,209"> | ||
315 | + <output name="pos" value="WW" /> | ||
316 | + <output name="feat.pvtijd" value="tgw" /> | ||
317 | + </condition> | ||
318 | + <condition value="210,211,212,213,214,215,216,219"> | ||
319 | + <output name="pos" value="WW" /> | ||
320 | + <output name="feat.pvtijd" value="tgw" /> | ||
321 | + </condition> | ||
322 | + <condition value="220,221,222,223,224,225,226,229"> | ||
323 | + <output name="pos" value="WW" /> | ||
324 | + <output name="feat.pvtijd" value="verl" /> | ||
325 | + </condition> | ||
326 | + <condition value="230,231,232,233,234,235,236,239"> | ||
327 | + <output name="pos" value="WW" /> | ||
328 | + <output name="feat.pvtijd" value="verl" /> | ||
329 | + </condition> | ||
330 | + <condition value="240,241,242,243,244,245,246,249"> | ||
331 | + <output name="pos" value="WW" /> | ||
332 | + </condition> | ||
333 | + <condition value="250,251,252,253,254,255,256,259"> | ||
334 | + <output name="pos" value="WW" /> | ||
335 | + <output name="feat.wvorm" value="inf" /> | ||
336 | + </condition> | ||
337 | + <condition value="260,261,262,263,264,265,266,269"> | ||
338 | + <output name="pos" value="WW" /> | ||
339 | + <output name="feat.wvorm" value="inf" /> | ||
340 | + </condition> | ||
341 | + <condition value="270,271,272,273,274,275,276,279"> | ||
342 | + <output name="pos" value="WW" /> | ||
343 | + </condition> | ||
344 | + <condition value="280,281,282,283,284,285,286,289"> | ||
345 | + <output name="pos" value="WW" /> | ||
346 | + </condition> | ||
347 | + <condition value="290,291,292,293,294,295,296,299"> | ||
348 | + <output name="pos" value="WW" /> | ||
349 | + <output name="feat.probleemgeval" /> | ||
350 | + </condition> | ||
351 | + | ||
352 | + | ||
353 | + <condition value="300,301,302,303,304,305,306,309"> | ||
354 | + <output name="pos" value="TW" /> | ||
355 | + <output name="feat.numtype" value="hoofd" /> | ||
356 | + </condition> | ||
357 | + <condition value="310,311,312,313,314,315,316,319"> | ||
358 | + <output name="pos" value="TW" /> | ||
359 | + <output name="feat.numtype" value="rang" /> | ||
360 | + </condition> | ||
361 | + <condition value="320,321,322,323,324,325,326,329"> | ||
362 | + <output name="pos" value="TW" /> | ||
363 | + </condition> | ||
364 | + <condition value="390,391,392,393,394,395,396,399"> | ||
365 | + <output name="pos" value="TW" /> | ||
366 | + <output name="feat.probleemgeval" /> | ||
367 | + </condition> | ||
368 | + | ||
369 | + <condition value="401"> | ||
370 | + <output name="pos" value="VNW" /> | ||
371 | + <output name="feat.getal" value="ev" /> | ||
372 | + <output name="feat.persoon" value="1" /> | ||
373 | + </condition> | ||
374 | + <condition value="402"> | ||
375 | + <output name="pos" value="VNW" /> | ||
376 | + <output name="feat.getal" value="ev" /> | ||
377 | + <output name="feat.persoon" value="2" /> | ||
378 | + </condition> | ||
379 | + <condition value="403"> | ||
380 | + <output name="pos" value="VNW" /> | ||
381 | + <output name="feat.getal" value="ev" /> | ||
382 | + <output name="feat.persoon" value="3" /> | ||
383 | + </condition> | ||
384 | + <condition value="404"> | ||
385 | + <output name="pos" value="VNW" /> | ||
386 | + <output name="feat.getal" value="mv" /> | ||
387 | + <output name="feat.persoon" value="1" /> | ||
388 | + </condition> | ||
389 | + <condition value="405"> | ||
390 | + <output name="pos" value="VNW" /> | ||
391 | + <output name="feat.getal" value="mv" /> | ||
392 | + <output name="feat.persoon" value="2" /> | ||
393 | + </condition> | ||
394 | + <condition value="406"> | ||
395 | + <output name="pos" value="VNW" /> | ||
396 | + <output name="feat.getal" value="mv" /> | ||
397 | + <output name="feat.persoon" value="3" /> | ||
398 | + </condition> | ||
399 | + <condition value="409"> | ||
400 | + <output name="pos" value="VNW" /> | ||
401 | + <output name="feat.probleemgeval" /> | ||
402 | + </condition> | ||
403 | + <condition value="410,411,412,413,414,415,416,419"> | ||
404 | + <output name="pos" value="VNW" /> | ||
405 | + <output name="feat.vwtype" value="aanw" /> | ||
406 | + </condition> | ||
407 | + <condition value="420,421,422,423,424,425,426,429"> | ||
408 | + <output name="pos" value="VNW" /> | ||
409 | + <output name="feat.vwtype" value="betr" /> | ||
410 | + </condition> | ||
411 | + <condition value="430,431,432,433,434,435,436,439"> | ||
412 | + <output name="pos" value="VNW" /> | ||
413 | + <output name="feat.vwtype" value="vb" /> | ||
414 | + </condition> | ||
415 | + <condition value="434,441,442,443,444,445,446,449"> | ||
416 | + <output name="pos" value="VNW" /> | ||
417 | + <output name="feat.vwtype" value="vb" /> | ||
418 | + </condition> | ||
419 | + <condition value="440,441,442,443,444,445,446,449"> | ||
420 | + <output name="pos" value="VNW" /> | ||
421 | + <output name="feat.lwtype" value="onbep" /> | ||
422 | + </condition> | ||
423 | + <condition value="450,451,452,453,454,455,456,459"> | ||
424 | + <output name="pos" value="VNW" /> | ||
425 | + <output name="feat.vwtype" value="bez" /> | ||
426 | + </condition> | ||
427 | + <condition value="461"> | ||
428 | + <output name="pos" value="VNW" /> | ||
429 | + <output name="feat.vwtype" value="refl" /> | ||
430 | + <output name="feat.getal" value="ev" /> | ||
431 | + <output name="feat.persoon" value="1" /> | ||
432 | + </condition> | ||
433 | + <condition value="462"> | ||
434 | + <output name="pos" value="VNW" /> | ||
435 | + <output name="feat.vwtype" value="refl" /> | ||
436 | + <output name="feat.getal" value="ev" /> | ||
437 | + <output name="feat.persoon" value="2" /> | ||
438 | + </condition> | ||
439 | + <condition value="463"> | ||
440 | + <output name="pos" value="VNW" /> | ||
441 | + <output name="feat.vwtype" value="refl" /> | ||
442 | + <output name="feat.getal" value="ev" /> | ||
443 | + <output name="feat.persoon" value="3" /> | ||
444 | + </condition> | ||
445 | + <condition value="464"> | ||
446 | + <output name="pos" value="VNW" /> | ||
447 | + <output name="feat.vwtype" value="refl" /> | ||
448 | + <output name="feat.getal" value="mv" /> | ||
449 | + <output name="feat.persoon" value="1" /> | ||
450 | + </condition> | ||
451 | + <condition value="465"> | ||
452 | + <output name="pos" value="VNW" /> | ||
453 | + <output name="feat.vwtype" value="refl" /> | ||
454 | + <output name="feat.getal" value="mv" /> | ||
455 | + <output name="feat.persoon" value="2" /> | ||
456 | + </condition> | ||
457 | + <condition value="466"> | ||
458 | + <output name="pos" value="VNW" /> | ||
459 | + <output name="feat.vwtype" value="refl" /> | ||
460 | + <output name="feat.getal" value="mv" /> | ||
461 | + <output name="feat.persoon" value="3" /> | ||
462 | + </condition> | ||
463 | + <condition value="469"> | ||
464 | + <output name="pos" value="VNW" /> | ||
465 | + <output name="feat.vwtype" value="refl" /> | ||
466 | + <output name="feat.probleemgeval" /> | ||
467 | + </condition> | ||
468 | + <condition value="470,471,472,473,474,475,476,479"> | ||
469 | + <output name="pos" value="LID" /> | ||
470 | + </condition> | ||
471 | + <condition value="480,481,482,483,484,485,486,489"> | ||
472 | + <output name="pos" value="LID" /> | ||
473 | + </condition> | ||
474 | + <condition value="490,491,492,493,494,495,496,499"> | ||
475 | + <output name="pos" value="VNW" /> | ||
476 | + <output name="feat.probleemgeval" /> | ||
477 | + </condition> | ||
478 | + | ||
479 | + <condition value="500,501,502,503,504,505,506,509"> | ||
480 | + <output name="pos" value="BW" /> | ||
481 | + </condition> | ||
482 | + <condition value="510,511,512,513,514,515,516,519"> | ||
483 | + <output name="pos" value="BW" /> | ||
484 | + </condition> | ||
485 | + <condition value="520,521,522,523,524,525,526,529"> | ||
486 | + <output name="pos" value="BW" /> | ||
487 | + </condition> | ||
488 | + <condition value="530,531,532,533,534,535,536,539"> | ||
489 | + <output name="pos" value="BW" /> | ||
490 | + </condition> | ||
491 | + <condition value="540,541,542,543,544,545,546,549"> | ||
492 | + <output name="pos" value="BW" /> | ||
493 | + </condition> | ||
494 | + <condition value="550,551,552,553,554,555,556,559"> | ||
495 | + <output name="pos" value="BW" /> | ||
496 | + </condition> | ||
497 | + <condition value="560,561,562,563,564,565,566,569"> | ||
498 | + <output name="pos" value="BW" /> | ||
499 | + </condition> | ||
500 | + <condition value="590,591,592,593,594,595,596,599"> | ||
501 | + <output name="pos" value="BW" /> | ||
502 | + <output name="feat.probleemgeval" /> | ||
503 | + </condition> | ||
504 | + | ||
505 | + <condition value="600,601,602,603,604,605,606,609"> | ||
506 | + <output name="pos" value="BW" /> | ||
507 | + </condition> | ||
508 | + <condition value="610,611,612,613,614,615,616,619"> | ||
509 | + <output name="pos" value="BW" /> | ||
510 | + </condition> | ||
511 | + <condition value="620,621,622,623,624,625,626,629"> | ||
512 | + <output name="pos" value="BW" /> | ||
513 | + </condition> | ||
514 | + <condition value="630,631,632,633,634,635,636,639"> | ||
515 | + <output name="pos" value="BW" /> | ||
516 | + </condition> | ||
517 | + <condition value="640,641,642,643,644,645,646,649"> | ||
518 | + <output name="pos" value="BW" /> | ||
519 | + </condition> | ||
520 | + <condition value="650,651,652,653,654,655,656,659"> | ||
521 | + <output name="pos" value="BW" /> | ||
522 | + </condition> | ||
523 | + <condition value="690,691,692,693,694,695,696,699"> | ||
524 | + <output name="pos" value="BW" /> | ||
525 | + <output name="feat.probleemgeval" /> | ||
526 | + </condition> | ||
527 | + | ||
528 | + <condition value="700,701,702,703,704,705,706,709"> | ||
529 | + <output name="pos" value="VZ" /> | ||
530 | + </condition> | ||
531 | + <condition value="790,791,792,793,794,795,796,799"> | ||
532 | + <output name="pos" value="VZ" /> | ||
533 | + </condition> | ||
534 | + | ||
535 | + <condition value="800,801,802,803,804,805,806,809"> | ||
536 | + <output name="pos" value="VG" /> | ||
537 | + </condition> | ||
538 | + <condition value="810,811,812,813,814,815,816,819"> | ||
539 | + <output name="pos" value="VG" /> | ||
540 | + </condition> | ||
541 | + <condition value="820,821,822,823,824,825,826,829"> | ||
542 | + <output name="pos" value="VG" /> | ||
543 | + </condition> | ||
544 | + <condition value="830,831,832,833,834,835,836,839"> | ||
545 | + <output name="pos" value="VG" /> | ||
546 | + </condition> | ||
547 | + <condition value="840,841,842,843,844,845,846,849"> | ||
548 | + <output name="pos" value="VG" /> | ||
549 | + </condition> | ||
550 | + <condition value="850,851,852,853,854,855,856,859"> | ||
551 | + <output name="pos" value="VG" /> | ||
552 | + </condition> | ||
553 | + <condition value="860,861,862,863,864,865,866,869"> | ||
554 | + <output name="pos" value="VG" /> | ||
555 | + </condition> | ||
556 | + <condition value="870,871,872,873,874,875,876,879"> | ||
557 | + <output name="pos" value="VG" /> | ||
558 | + </condition> | ||
559 | + <condition value="880,881,882,883,884,885,886,889"> | ||
560 | + <output name="pos" value="VG" /> | ||
561 | + </condition> | ||
562 | + <condition value="890,891,892,893,894,895,896,899"> | ||
563 | + <output name="pos" value="VG" /> | ||
564 | + <output name="feat.probleemgeval" /> | ||
565 | + </condition> | ||
566 | + | ||
567 | + <condition value="900,901,902,903,904,905,906,909"> | ||
568 | + <output name="feat.probleemgeval" /> | ||
569 | + </condition> | ||
570 | + <condition value="900,901,902,903,904,905,906,909"> | ||
571 | + <output name="feat.probleemgeval" /> | ||
572 | + </condition> | ||
573 | + <condition value="990,991,992,993,994,995,996,999"> | ||
574 | + <output name="feat.probleemgeval" /> | ||
575 | + </condition> | ||
576 | + | ||
577 | + <condition | ||
578 | + value="001,011,021,091,101,111,191,201,211,221,231,241,251,261,271,281,291,301,311,321,391,411,421,431,441,451,471,481,491,501,511,521,531,541,551,561,591,601,611,621,631,641,651,691,701,791,801,811,821,831,841,851,861,871,881,891,901,911,991"> | ||
579 | + <output name="feat.form" value="-e" /> | ||
580 | + </condition> | ||
581 | + <condition | ||
582 | + value="002,012,022,092,102,112,192,202,212,222,232,242,252,262,272,282,292,302,312,322,392,412,422,432,442,452,472,482,492,502,512,522,532,542,552,562,592,602,612,622,632,642,652,692,702,792,802,812,822,832,842,852,862,872,882,892,902,912,992"> | ||
583 | + <output name="feat.form" value="-s/-th" /> | ||
584 | + </condition> | ||
585 | + <condition | ||
586 | + value="003,013,023,093,103,113,193,203,213,223,233,243,253,263,273,283,293,303,313,323,393,413,423,433,443,453,473,483,493,503,513,523,533,543,553,563,593,603,613,623,633,643,653,693,703,793,803,813,823,833,843,853,863,873,883,893,903,913,993"> | ||
587 | + <output name="feat.form" value="-t" /> | ||
588 | + </condition> | ||
589 | + <condition | ||
590 | + value="004,014,024,094,104,114,194,204,214,224,234,244,254,264,274,284,294,304,314,324,394,414,424,434,444,454,474,484,494,504,514,524,534,544,554,564,594,604,614,624,634,644,654,694,704,794,804,814,824,834,844,854,864,874,884,894,904,914,994"> | ||
591 | + <output name="feat.form" value="-n" /> | ||
592 | + </condition> | ||
593 | + <condition | ||
594 | + value="005,015,025,095,105,115,195,205,215,225,235,245,255,265,275,285,295,305,315,325,395,415,425,435,445,455,475,485,495,505,515,525,535,545,555,565,595,605,615,625,635,645,655,695,705,795,805,815,825,835,845,855,865,875,885,895,905,915,995"> | ||
595 | + <output name="feat.form" value="-r/-re" /> | ||
596 | + </condition> | ||
597 | + <condition | ||
598 | + value="006,016,026,096,106,116,196,206,216,226,236,246,256,266,276,286,296,306,316,326,396,416,426,436,446,456,476,486,496,506,516,526,536,546,556,566,596,606,616,626,636,646,656,696,706,796,806,816,826,836,846,856,866,876,886,896,906,916,996"> | ||
599 | + <output name="feat.form" value="-a" /> | ||
600 | + </condition> | ||
601 | + <condition value="009,019,029,099"> | ||
602 | + <output name="feat.form" value="unclear" /> | ||
603 | + </condition> | ||
604 | + | ||
605 | + </function> | ||
606 | + </functions> | ||
607 | + | ||
608 | + </parser> | ||
609 | + <!-- END CONFIGURATION MTAS FOLIA PARSER --> | ||
610 | + | ||
611 | + | ||
612 | +</mtas> | ||
0 | \ No newline at end of file | 613 | \ No newline at end of file |
conf/parser/mtas/elan_mks.xml
@@ -17,9 +17,10 @@ | @@ -17,9 +17,10 @@ | ||
17 | <!-- START CONFIGURATION MTAS FOLIA PARSER --> | 17 | <!-- START CONFIGURATION MTAS FOLIA PARSER --> |
18 | <parser name="mtas.analysis.parser.MtasElanParser"> | 18 | <parser name="mtas.analysis.parser.MtasElanParser"> |
19 | 19 | ||
20 | - <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | 20 | + <!-- START GENERAL SETTINGS MTAS PARSER --> |
21 | <autorepair value="true" /> | 21 | <autorepair value="true" /> |
22 | - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | 22 | + <makeunique value="true" /> |
23 | + <!-- END GENERAL SETTINGS MTAS PARSER --> | ||
23 | 24 | ||
24 | <!-- START REFERENCES --> | 25 | <!-- START REFERENCES --> |
25 | <references> | 26 | <references> |
conf/parser/mtas/folia_dbnl.xml
@@ -19,7 +19,8 @@ | @@ -19,7 +19,8 @@ | ||
19 | 19 | ||
20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | 20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
21 | <autorepair value="true" /> | 21 | <autorepair value="true" /> |
22 | - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | 22 | + <makeunique value="true" /> |
23 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | ||
23 | 24 | ||
24 | <!-- START REFERENCES --> | 25 | <!-- START REFERENCES --> |
25 | <references> | 26 | <references> |
conf/parser/mtas/folia_ddd.xml
0 โ 100644
1 | +<?xml version="1.0" encoding="UTF-8" ?> | ||
2 | +<mtas> | ||
3 | + | ||
4 | + <!-- START MTAS INDEX CONFIGURATION --> | ||
5 | + <index> | ||
6 | + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS --> | ||
7 | + <payload index="false" /> | ||
8 | + <offset index="false" /> | ||
9 | + <realoffset index="false" /> | ||
10 | + <parent index="true" /> | ||
11 | + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS --> | ||
12 | + </index> | ||
13 | + <!-- END MTAS INDEX CONFIGURATION --> | ||
14 | + | ||
15 | + | ||
16 | + | ||
17 | + <!-- START CONFIGURATION MTAS FOLIA PARSER --> | ||
18 | + <parser name="mtas.analysis.parser.MtasFoliaParser"> | ||
19 | + | ||
20 | + <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | ||
21 | + <autorepair value="true" /> | ||
22 | + <makeunique value="true" /> | ||
23 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | ||
24 | + | ||
25 | + <!-- START REFERENCES --> | ||
26 | + <references> | ||
27 | + <reference name="wref" ref="id" /> | ||
28 | + </references> | ||
29 | + <!-- END REFERENCES --> | ||
30 | + | ||
31 | + <!-- START MAPPINGS --> | ||
32 | + <mappings> | ||
33 | + | ||
34 | + <!-- START WORDS --> | ||
35 | + <mapping type="word" name="w"> | ||
36 | + </mapping> | ||
37 | + <mapping type="word" name="w"> | ||
38 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
39 | + <pre> | ||
40 | + <item type="name" /> | ||
41 | + </pre> | ||
42 | + <post> | ||
43 | + <item type="attribute" name="class" /> | ||
44 | + </post> | ||
45 | + </token> | ||
46 | + <condition> | ||
47 | + <item type="attribute" name="class" /> | ||
48 | + <item type="attribute" name="class" not="true" condition="WORD" /> | ||
49 | + </condition> | ||
50 | + </mapping> | ||
51 | + <!-- END WORDS --> | ||
52 | + | ||
53 | + <!-- START WORD ANNOTATIONS --> | ||
54 | + <mapping type="wordAnnotation" name="t"> | ||
55 | + <token type="string" offset="false"> | ||
56 | + <pre> | ||
57 | + <item type="name" /> | ||
58 | + </pre> | ||
59 | + <post> | ||
60 | + <item type="text" /> | ||
61 | + </post> | ||
62 | + </token> | ||
63 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
64 | + <pre> | ||
65 | + <item type="name" /> | ||
66 | + <item type="string" value="_lc" /> | ||
67 | + </pre> | ||
68 | + <post> | ||
69 | + <item type="text" filter="ascii,lowercase" /> | ||
70 | + </post> | ||
71 | + </token> | ||
72 | + <condition> | ||
73 | + <item type="ancestor" number="0" /> | ||
74 | + <item type="ancestorWord" number="1" /> | ||
75 | + <item type="unknownAncestor" number="0" /> | ||
76 | + </condition> | ||
77 | + </mapping> | ||
78 | + <!-- END WORD ANNOTATIONS --> | ||
79 | + | ||
80 | + <!-- START RELATIONS --> | ||
81 | + <!-- END RELATIONS --> | ||
82 | + | ||
83 | + <!-- START GROUPS --> | ||
84 | + <mapping type="group" name="s"> | ||
85 | + <token type="string" offset="false"> | ||
86 | + <pre> | ||
87 | + <item type="name" /> | ||
88 | + </pre> | ||
89 | + <post> | ||
90 | + <item type="attribute" name="class" /> | ||
91 | + </post> | ||
92 | + </token> | ||
93 | + </mapping> | ||
94 | + <mapping type="group" name="p"> | ||
95 | + <token type="string" offset="false"> | ||
96 | + <pre> | ||
97 | + <item type="name" /> | ||
98 | + </pre> | ||
99 | + <post> | ||
100 | + <item type="attribute" name="class" /> | ||
101 | + </post> | ||
102 | + </token> | ||
103 | + </mapping> | ||
104 | + <mapping type="group" name="div"> | ||
105 | + <token type="string" offset="false"> | ||
106 | + <pre> | ||
107 | + <item type="name" /> | ||
108 | + </pre> | ||
109 | + <post> | ||
110 | + <item type="attribute" name="class" /> | ||
111 | + </post> | ||
112 | + </token> | ||
113 | + </mapping> | ||
114 | + <mapping type="group" name="head"> | ||
115 | + <token type="string" offset="false"> | ||
116 | + <pre> | ||
117 | + <item type="name" /> | ||
118 | + </pre> | ||
119 | + <post> | ||
120 | + <item type="attribute" name="class" /> | ||
121 | + </post> | ||
122 | + </token> | ||
123 | + </mapping> | ||
124 | + <!-- END GROUPS --> | ||
125 | + | ||
126 | + <!-- START GROUP ANNOTATIONS --> | ||
127 | + <mapping type="groupAnnotation" name="lang"> | ||
128 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
129 | + <pre> | ||
130 | + <item type="name" /> | ||
131 | + </pre> | ||
132 | + <post> | ||
133 | + <item type="attribute" name="class" /> | ||
134 | + </post> | ||
135 | + </token> | ||
136 | + </mapping> | ||
137 | + <!-- END GROUP ANNOTATIONS --> | ||
138 | + | ||
139 | + </mappings> | ||
140 | + <!-- END MAPPINGS --> | ||
141 | + | ||
142 | + </parser> | ||
143 | + <!-- END CONFIGURATION MTAS FOLIA PARSER --> | ||
144 | + | ||
145 | + | ||
146 | +</mtas> | ||
0 | \ No newline at end of file | 147 | \ No newline at end of file |
conf/parser/mtas/folia_edbo.xml
@@ -17,6 +17,7 @@ | @@ -17,6 +17,7 @@ | ||
17 | 17 | ||
18 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | 18 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
19 | <autorepair value="true" /> | 19 | <autorepair value="true" /> |
20 | + <makeunique value="true" /> | ||
20 | <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | 21 | <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> |
21 | 22 | ||
22 | <!-- START REFERENCES --> | 23 | <!-- START REFERENCES --> |
@@ -72,30 +73,6 @@ | @@ -72,30 +73,6 @@ | ||
72 | <item type="unknownAncestor" number="0" /> | 73 | <item type="unknownAncestor" number="0" /> |
73 | </condition> | 74 | </condition> |
74 | </mapping> | 75 | </mapping> |
75 | - <mapping type="wordAnnotation" name="aref"> | ||
76 | - <token type="string" offset="false"> | ||
77 | - <pre> | ||
78 | - <item type="string" value="translated.t" /> | ||
79 | - </pre> | ||
80 | - <post> | ||
81 | - <item type="attribute" name="t" /> | ||
82 | - </post> | ||
83 | - </token> | ||
84 | - <token type="string" offset="false" realoffset="false" parent="false"> | ||
85 | - <pre> | ||
86 | - <item type="string" value="translated.t" /> | ||
87 | - <item type="string" value="_lc" /> | ||
88 | - </pre> | ||
89 | - <post> | ||
90 | - <item type="attribute" name="t" filter="ascii,lowercase" /> | ||
91 | - </post> | ||
92 | - </token> | ||
93 | - <condition> | ||
94 | - <item type="ancestor" number="0" /> | ||
95 | - <item type="ancestorWord" number="1" /> | ||
96 | - <item type="unknownAncestor" number="1" /> | ||
97 | - </condition> | ||
98 | - </mapping> | ||
99 | <mapping type="wordAnnotation" name="lemma"> | 76 | <mapping type="wordAnnotation" name="lemma"> |
100 | <token type="string" offset="false" realoffset="false" parent="false"> | 77 | <token type="string" offset="false" realoffset="false" parent="false"> |
101 | <pre> | 78 | <pre> |
@@ -109,24 +86,6 @@ | @@ -109,24 +86,6 @@ | ||
109 | <item type="attribute" name="class" /> | 86 | <item type="attribute" name="class" /> |
110 | <item type="ancestor" number="0" /> | 87 | <item type="ancestor" number="0" /> |
111 | <item type="unknownAncestor" number="0" /> | 88 | <item type="unknownAncestor" number="0" /> |
112 | - <item type="attribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mblem-nl" /> | ||
113 | - </condition> | ||
114 | - </mapping> | ||
115 | - <mapping type="wordAnnotation" name="lemma"> | ||
116 | - <token type="string" offset="false" realoffset="false" parent="false"> | ||
117 | - <pre> | ||
118 | - <item type="string" value="translated." /> | ||
119 | - <item type="name" /> | ||
120 | - </pre> | ||
121 | - <post> | ||
122 | - <item type="attribute" name="class" /> | ||
123 | - </post> | ||
124 | - </token> | ||
125 | - <condition> | ||
126 | - <item type="attribute" name="class" /> | ||
127 | - <item type="ancestor" number="0" /> | ||
128 | - <item type="unknownAncestor" number="1" /> | ||
129 | - <item type="attribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mblem-nl" /> | ||
130 | </condition> | 89 | </condition> |
131 | </mapping> | 90 | </mapping> |
132 | <mapping type="wordAnnotation" name="morphology"> | 91 | <mapping type="wordAnnotation" name="morphology"> |
@@ -166,54 +125,11 @@ | @@ -166,54 +125,11 @@ | ||
166 | <item type="ancestor" number="0" /> | 125 | <item type="ancestor" number="0" /> |
167 | <item type="unknownAncestor" number="0" /> | 126 | <item type="unknownAncestor" number="0" /> |
168 | <item type="attribute" name="class" /> | 127 | <item type="attribute" name="class" /> |
169 | - <item type="attribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" /> | ||
170 | - </condition> | ||
171 | - </mapping> | ||
172 | - <mapping type="wordAnnotation" name="pos"> | ||
173 | - <token type="string" offset="false" realoffset="false" parent="false"> | ||
174 | - <pre> | ||
175 | - <item type="string" value="translated." /> | ||
176 | - <item type="name" /> | ||
177 | - </pre> | ||
178 | - <post> | ||
179 | - <item type="attribute" name="head" /> | ||
180 | - </post> | ||
181 | - <payload> | ||
182 | - <item type="attribute" name="confidence" /> | ||
183 | - </payload> | ||
184 | - </token> | ||
185 | - <condition> | ||
186 | - <item type="ancestor" number="0" /> | ||
187 | - <item type="unknownAncestor" number="1" /> | ||
188 | - <item type="attribute" name="class" /> | ||
189 | - <item type="attribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" /> | ||
190 | - </condition> | ||
191 | - </mapping> | ||
192 | - <mapping type="wordAnnotation" name="feat"> | ||
193 | - <token type="string" offset="false" realoffset="false" parent="false"> | ||
194 | - <pre> | ||
195 | - <item type="name" /> | ||
196 | - <item type="attribute" name="subset" prefix="." /> | ||
197 | - </pre> | ||
198 | - <post> | ||
199 | - <item type="attribute" name="class" /> | ||
200 | - </post> | ||
201 | - <payload> | ||
202 | - <item type="ancestorAttribute" distance="0" name="confidence" /> | ||
203 | - </payload> | ||
204 | - </token> | ||
205 | - <condition> | ||
206 | - <item type="ancestor" number="1" /> | ||
207 | - <item type="unknownAncestor" number="0" /> | ||
208 | - <item type="attribute" name="class" /> | ||
209 | - <item type="attribute" name="subset" /> | ||
210 | - <item type="ancestorAttribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" /> | ||
211 | </condition> | 128 | </condition> |
212 | </mapping> | 129 | </mapping> |
213 | <mapping type="wordAnnotation" name="feat"> | 130 | <mapping type="wordAnnotation" name="feat"> |
214 | <token type="string" offset="false" realoffset="false" parent="false"> | 131 | <token type="string" offset="false" realoffset="false" parent="false"> |
215 | <pre> | 132 | <pre> |
216 | - <item type="string" value="translated." /> | ||
217 | <item type="name" /> | 133 | <item type="name" /> |
218 | <item type="attribute" name="subset" prefix="." /> | 134 | <item type="attribute" name="subset" prefix="." /> |
219 | </pre> | 135 | </pre> |
@@ -229,7 +145,6 @@ | @@ -229,7 +145,6 @@ | ||
229 | <item type="unknownAncestor" number="0" /> | 145 | <item type="unknownAncestor" number="0" /> |
230 | <item type="attribute" name="class" /> | 146 | <item type="attribute" name="class" /> |
231 | <item type="attribute" name="subset" /> | 147 | <item type="attribute" name="subset" /> |
232 | - <item type="ancestorAttribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" /> | ||
233 | </condition> | 148 | </condition> |
234 | </mapping> | 149 | </mapping> |
235 | <!-- END WORD ANNOTATIONS --> | 150 | <!-- END WORD ANNOTATIONS --> |
conf/parser/mtas/folia_mimore.xml
@@ -18,7 +18,8 @@ | @@ -18,7 +18,8 @@ | ||
18 | 18 | ||
19 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | 19 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
20 | <autorepair value="false" /> | 20 | <autorepair value="false" /> |
21 | - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | 21 | + <makeunique value="true" /> |
22 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | ||
22 | 23 | ||
23 | <!-- START REFERENCES --> | 24 | <!-- START REFERENCES --> |
24 | <references> | 25 | <references> |
conf/parser/mtas/folia_mtas.xml
@@ -19,7 +19,8 @@ | @@ -19,7 +19,8 @@ | ||
19 | 19 | ||
20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | 20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
21 | <autorepair value="true" /> | 21 | <autorepair value="true" /> |
22 | - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | 22 | + <makeunique value="true" /> |
23 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | ||
23 | 24 | ||
24 | <!-- START REFERENCES --> | 25 | <!-- START REFERENCES --> |
25 | <references> | 26 | <references> |
conf/parser/mtas/folia_oeaw.xml
0 โ 100644
1 | +<?xml version="1.0" encoding="UTF-8" ?> | ||
2 | +<mtas> | ||
3 | + | ||
4 | + <!-- START MTAS INDEX CONFIGURATION --> | ||
5 | + <index> | ||
6 | + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS --> | ||
7 | + <payload index="false" /> | ||
8 | + <offset index="false" /> | ||
9 | + <realoffset index="false" /> | ||
10 | + <parent index="true" /> | ||
11 | + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS --> | ||
12 | + </index> | ||
13 | + <!-- END MTAS INDEX CONFIGURATION --> | ||
14 | + | ||
15 | + | ||
16 | + | ||
17 | + <!-- START CONFIGURATION MTAS FOLIA PARSER --> | ||
18 | + <parser name="mtas.analysis.parser.MtasFoliaParser"> | ||
19 | + | ||
20 | + <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | ||
21 | + <autorepair value="true" /> | ||
22 | + <makeunique value="true" /> | ||
23 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | ||
24 | + | ||
25 | + <!-- START REFERENCES --> | ||
26 | + <references> | ||
27 | + <reference name="wref" ref="id" /> | ||
28 | + </references> | ||
29 | + <!-- END REFERENCES --> | ||
30 | + | ||
31 | + <!-- START MAPPINGS --> | ||
32 | + <mappings> | ||
33 | + | ||
34 | + <!-- START WORDS --> | ||
35 | + <mapping type="word" name="w"> | ||
36 | + </mapping> | ||
37 | + <mapping type="word" name="w"> | ||
38 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
39 | + <pre> | ||
40 | + <item type="name" /> | ||
41 | + </pre> | ||
42 | + <post> | ||
43 | + <item type="attribute" name="class" /> | ||
44 | + </post> | ||
45 | + </token> | ||
46 | + <condition> | ||
47 | + <item type="attribute" name="class" /> | ||
48 | + <item type="attribute" name="class" not="true" condition="WORD" /> | ||
49 | + </condition> | ||
50 | + </mapping> | ||
51 | + <!-- END WORDS --> | ||
52 | + | ||
53 | + <!-- START WORD ANNOTATIONS --> | ||
54 | + <mapping type="wordAnnotation" name="t"> | ||
55 | + <token type="string" offset="false"> | ||
56 | + <pre> | ||
57 | + <item type="name" /> | ||
58 | + </pre> | ||
59 | + <post> | ||
60 | + <item type="text" /> | ||
61 | + </post> | ||
62 | + </token> | ||
63 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
64 | + <pre> | ||
65 | + <item type="name" /> | ||
66 | + <item type="string" value="_lc" /> | ||
67 | + </pre> | ||
68 | + <post> | ||
69 | + <item type="text" filter="ascii,lowercase" /> | ||
70 | + </post> | ||
71 | + </token> | ||
72 | + <condition> | ||
73 | + <item type="ancestor" number="0" /> | ||
74 | + <item type="ancestorWord" number="1" /> | ||
75 | + <item type="unknownAncestor" number="0" /> | ||
76 | + </condition> | ||
77 | + </mapping> | ||
78 | + <mapping type="wordAnnotation" name="lemma"> | ||
79 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
80 | + <pre> | ||
81 | + <item type="name" /> | ||
82 | + </pre> | ||
83 | + <post> | ||
84 | + <item type="attribute" name="class" /> | ||
85 | + </post> | ||
86 | + </token> | ||
87 | + <condition> | ||
88 | + <item type="attribute" name="class" /> | ||
89 | + <item type="ancestor" number="0" /> | ||
90 | + <item type="unknownAncestor" number="0" /> | ||
91 | + </condition> | ||
92 | + </mapping> | ||
93 | + <mapping type="wordAnnotation" name="pos"> | ||
94 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
95 | + <pre> | ||
96 | + <item type="attribute" name="set" /> | ||
97 | + </pre> | ||
98 | + <post> | ||
99 | + <item type="attribute" name="head" /> | ||
100 | + </post> | ||
101 | + </token> | ||
102 | + <condition> | ||
103 | + <item type="ancestor" number="0" /> | ||
104 | + <item type="unknownAncestor" number="0" /> | ||
105 | + <item type="attribute" name="class" /> | ||
106 | + <item type="attribute" name="set" /> | ||
107 | + </condition> | ||
108 | + </mapping> | ||
109 | + <mapping type="wordAnnotation" name="feat"> | ||
110 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
111 | + <pre> | ||
112 | + <item type="name" /> | ||
113 | + <item type="attribute" name="subset" prefix="." /> | ||
114 | + </pre> | ||
115 | + <post> | ||
116 | + <item type="attribute" name="class" /> | ||
117 | + </post> | ||
118 | + </token> | ||
119 | + <condition> | ||
120 | + <item type="ancestor" number="1" /> | ||
121 | + <item type="unknownAncestor" number="0" /> | ||
122 | + <item type="attribute" name="class" /> | ||
123 | + <item type="attribute" name="subset" /> | ||
124 | + </condition> | ||
125 | + </mapping> | ||
126 | + <!-- END WORD ANNOTATIONS --> | ||
127 | + | ||
128 | + <!-- START RELATIONS --> | ||
129 | + <mapping type="relation" name="entities"> | ||
130 | + </mapping> | ||
131 | + <mapping type="relation" name="entity"> | ||
132 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
133 | + <pre> | ||
134 | + <item type="name" /> | ||
135 | + </pre> | ||
136 | + <post> | ||
137 | + <item type="attribute" name="class" /> | ||
138 | + </post> | ||
139 | + </token> | ||
140 | + <condition> | ||
141 | + <item type="ancestor" number="1" /> | ||
142 | + <item type="ancestorName" condition="entities" /> | ||
143 | + </condition> | ||
144 | + </mapping> | ||
145 | + <!-- END RELATIONS --> | ||
146 | + | ||
147 | + <!-- START RELATION ANNOTATIONS --> | ||
148 | + <mapping type="relationAnnotation" name="feat"> | ||
149 | + <token type="string" offset="false" realoffset="false"> | ||
150 | + <pre> | ||
151 | + <item type="ancestorRelationName" /> | ||
152 | + <item type="name" prefix="." /> | ||
153 | + <item type="attribute" name="subset" prefix="." /> | ||
154 | + </pre> | ||
155 | + <post> | ||
156 | + <item type="attribute" name="class" /> | ||
157 | + </post> | ||
158 | + </token> | ||
159 | + </mapping> | ||
160 | + <!-- END RELATION ANNOTATIONS --> | ||
161 | + | ||
162 | + <!-- START GROUPS --> | ||
163 | + <mapping type="group" name="s"> | ||
164 | + <token type="string" offset="false"> | ||
165 | + <pre> | ||
166 | + <item type="name" /> | ||
167 | + </pre> | ||
168 | + <post> | ||
169 | + <item type="attribute" name="class" /> | ||
170 | + </post> | ||
171 | + </token> | ||
172 | + </mapping> | ||
173 | + <mapping type="group" name="p"> | ||
174 | + <token type="string" offset="false"> | ||
175 | + <pre> | ||
176 | + <item type="name" /> | ||
177 | + </pre> | ||
178 | + <post> | ||
179 | + <item type="attribute" name="class" /> | ||
180 | + </post> | ||
181 | + </token> | ||
182 | + </mapping> | ||
183 | + <mapping type="group" name="div"> | ||
184 | + <token type="string" offset="false"> | ||
185 | + <pre> | ||
186 | + <item type="name" /> | ||
187 | + </pre> | ||
188 | + <post> | ||
189 | + <item type="attribute" name="class" /> | ||
190 | + </post> | ||
191 | + </token> | ||
192 | + </mapping> | ||
193 | + <!-- END GROUPS --> | ||
194 | + | ||
195 | + <!-- START GROUP ANNOTATIONS --> | ||
196 | + <!-- END GROUP ANNOTATIONS --> | ||
197 | + | ||
198 | + </mappings> | ||
199 | + <!-- END MAPPINGS --> | ||
200 | + | ||
201 | + </parser> | ||
202 | + <!-- END CONFIGURATION MTAS FOLIA PARSER --> | ||
203 | + | ||
204 | + | ||
205 | +</mtas> | ||
0 | \ No newline at end of file | 206 | \ No newline at end of file |
conf/parser/mtas/folia_sonar.xml
@@ -18,6 +18,7 @@ | @@ -18,6 +18,7 @@ | ||
18 | 18 | ||
19 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | 19 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
20 | <autorepair value="true" /> | 20 | <autorepair value="true" /> |
21 | + <makeunique value="true" /> | ||
21 | <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | 22 | <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> |
22 | 23 | ||
23 | <!-- START REFERENCES --> | 24 | <!-- START REFERENCES --> |
conf/parser/mtas/folia_test.xml
@@ -19,7 +19,8 @@ | @@ -19,7 +19,8 @@ | ||
19 | 19 | ||
20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | 20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
21 | <autorepair value="true" /> | 21 | <autorepair value="true" /> |
22 | - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | 22 | + <makeunique value="true" /> |
23 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | ||
23 | 24 | ||
24 | <!-- START REFERENCES --> | 25 | <!-- START REFERENCES --> |
25 | <references> | 26 | <references> |
conf/parser/mtas/sketch_acdh.xml
@@ -18,7 +18,8 @@ | @@ -18,7 +18,8 @@ | ||
18 | <parser name="mtas.analysis.parser.MtasSketchParser"> | 18 | <parser name="mtas.analysis.parser.MtasSketchParser"> |
19 | <!-- START GENERAL SETTINGS MTAS SKETCH PARSER --> | 19 | <!-- START GENERAL SETTINGS MTAS SKETCH PARSER --> |
20 | <autorepair value="true" /> | 20 | <autorepair value="true" /> |
21 | - <!-- END GENERAL SETTINGS MTAS SKETCH PARSER --> | 21 | + <makeunique value="true" /> |
22 | + <!-- END GENERAL SETTINGS MTAS SKETCH PARSER --> | ||
22 | 23 | ||
23 | <mappings> | 24 | <mappings> |
24 | 25 |
conf/parser/mtas/tei_test.xml
@@ -19,7 +19,8 @@ | @@ -19,7 +19,8 @@ | ||
19 | 19 | ||
20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | 20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
21 | <autorepair value="true" /> | 21 | <autorepair value="true" /> |
22 | - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | 22 | + <makeunique value="true" /> |
23 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | ||
23 | 24 | ||
24 | <!-- START REFERENCES --> | 25 | <!-- START REFERENCES --> |
25 | <references> | 26 | <references> |
conf/parser/mtasSource.xml
0 โ 100644
1 | +<?xml version="1.0" encoding="UTF-8" ?> | ||
2 | +<mtas> | ||
3 | + <configurations type="mtas.analysis.util.MtasTokenizerFactory"> | ||
4 | + <configuration name="EDBO" file="mtasSource/folia_edbo.xml" /> | ||
5 | + </configurations> | ||
6 | + <configurations type="mtas.analysis.util.MtasCharFilterFactory"> | ||
7 | + <configuration name="EDBO" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> | ||
8 | + </configurations> | ||
9 | +</mtas> |
conf/parser/mtasSource/folia_edbo.xml
0 โ 100644
1 | +<?xml version="1.0" encoding="UTF-8" ?> | ||
2 | +<mtas> | ||
3 | + | ||
4 | + <!-- START MTAS INDEX CONFIGURATION --> | ||
5 | + <index> | ||
6 | + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS --> | ||
7 | + <payload index="false" /> | ||
8 | + <offset index="false" /> | ||
9 | + <realoffset index="false" /> | ||
10 | + <parent index="true" /> | ||
11 | + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS --> | ||
12 | + </index> | ||
13 | + <!-- END MTAS INDEX CONFIGURATION --> | ||
14 | + | ||
15 | + <!-- START CONFIGURATION MTAS FOLIA PARSER --> | ||
16 | + <parser name="mtas.analysis.parser.MtasFoliaParser"> | ||
17 | + | ||
18 | + <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | ||
19 | + <autorepair value="true" /> | ||
20 | + <makeunique value="true" /> | ||
21 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | ||
22 | + | ||
23 | + <!-- START REFERENCES --> | ||
24 | + <references> | ||
25 | + </references> | ||
26 | + <!-- END REFERENCES --> | ||
27 | + | ||
28 | + <!-- START MAPPINGS --> | ||
29 | + <mappings> | ||
30 | + | ||
31 | + <!-- START WORDS --> | ||
32 | + <mapping type="word" name="str"> | ||
33 | + </mapping> | ||
34 | + <!-- END WORDS --> | ||
35 | + | ||
36 | + <!-- START WORD ANNOTATIONS --> | ||
37 | + <mapping type="wordAnnotation" name="t"> | ||
38 | + <token type="string" offset="false"> | ||
39 | + <pre> | ||
40 | + <item type="name" /> | ||
41 | + </pre> | ||
42 | + <post> | ||
43 | + <item type="text" /> | ||
44 | + </post> | ||
45 | + </token> | ||
46 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
47 | + <pre> | ||
48 | + <item type="name" /> | ||
49 | + <item type="string" value="_lc" /> | ||
50 | + </pre> | ||
51 | + <post> | ||
52 | + <item type="text" filter="ascii,lowercase" /> | ||
53 | + </post> | ||
54 | + </token> | ||
55 | + <condition> | ||
56 | + <item type="ancestor" number="0" /> | ||
57 | + <item type="ancestorWord" number="1" /> | ||
58 | + <item type="unknownAncestor" number="0" /> | ||
59 | + <item type="attribute" name="class" condition="Ticcl"/> | ||
60 | + </condition> | ||
61 | + </mapping> | ||
62 | + <mapping type="wordAnnotation" name="correction"> | ||
63 | + </mapping> | ||
64 | + <mapping type="wordAnnotation" name="new"> | ||
65 | + </mapping> | ||
66 | + <mapping type="wordAnnotation" name="original"> | ||
67 | + </mapping> | ||
68 | + <mapping type="wordAnnotation" name="suggestion"> | ||
69 | + </mapping> | ||
70 | + <mapping type="wordAnnotation" name="t"> | ||
71 | + <token type="string" offset="false"> | ||
72 | + <pre> | ||
73 | + <item type="name" /> | ||
74 | + </pre> | ||
75 | + <post> | ||
76 | + <item type="text" /> | ||
77 | + </post> | ||
78 | + </token> | ||
79 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
80 | + <pre> | ||
81 | + <item type="name" /> | ||
82 | + <item type="string" value="_lc" /> | ||
83 | + </pre> | ||
84 | + <post> | ||
85 | + <item type="text" filter="ascii,lowercase" /> | ||
86 | + </post> | ||
87 | + </token> | ||
88 | + <condition> | ||
89 | + <item type="ancestor" number="2" /> | ||
90 | + <item type="ancestorName" condition="new" /> | ||
91 | + <item type="unknownAncestor" number="0" /> | ||
92 | + <item type="attribute" name="class" condition="Ticcl"/> | ||
93 | + </condition> | ||
94 | + </mapping> | ||
95 | + <mapping type="wordAnnotation" name="t"> | ||
96 | + <token type="string" offset="false"> | ||
97 | + <pre> | ||
98 | + <item type="name" /> | ||
99 | + <item type="ancestorName" prefix="."/> | ||
100 | + </pre> | ||
101 | + <post> | ||
102 | + <item type="text" /> | ||
103 | + </post> | ||
104 | + </token> | ||
105 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
106 | + <pre> | ||
107 | + <item type="name" /> | ||
108 | + <item type="string" value="_lc" /> | ||
109 | + <item type="ancestorName" prefix="."/> | ||
110 | + </pre> | ||
111 | + <post> | ||
112 | + <item type="text" filter="ascii,lowercase" /> | ||
113 | + </post> | ||
114 | + </token> | ||
115 | + <condition> | ||
116 | + <item type="ancestor" number="2" /> | ||
117 | + <item type="ancestorName" condition="original" /> | ||
118 | + <item type="unknownAncestor" number="0" /> | ||
119 | + </condition> | ||
120 | + </mapping> | ||
121 | + <mapping type="wordAnnotation" name="t"> | ||
122 | + <token type="string" offset="false"> | ||
123 | + <pre> | ||
124 | + <item type="name" /> | ||
125 | + <item type="ancestorName" prefix="."/> | ||
126 | + </pre> | ||
127 | + <post> | ||
128 | + <item type="text" /> | ||
129 | + </post> | ||
130 | + </token> | ||
131 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
132 | + <pre> | ||
133 | + <item type="name" /> | ||
134 | + <item type="string" value="_lc" /> | ||
135 | + <item type="ancestorName" prefix="."/> | ||
136 | + </pre> | ||
137 | + <post> | ||
138 | + <item type="text" filter="ascii,lowercase" /> | ||
139 | + </post> | ||
140 | + </token> | ||
141 | + <condition> | ||
142 | + <item type="ancestor" number="2" /> | ||
143 | + <item type="ancestorName" condition="suggestion" /> | ||
144 | + <item type="unknownAncestor" number="0" /> | ||
145 | + </condition> | ||
146 | + </mapping> | ||
147 | + <!-- END WORD ANNOTATIONS --> | ||
148 | + | ||
149 | + <!-- START RELATIONS --> | ||
150 | + <!-- END RELATIONS --> | ||
151 | + | ||
152 | + <!-- START GROUPS --> | ||
153 | + <mapping type="group" name="p"> | ||
154 | + <token type="string" offset="false"> | ||
155 | + <pre> | ||
156 | + <item type="name" /> | ||
157 | + </pre> | ||
158 | + <post> | ||
159 | + <item type="attribute" name="class" /> | ||
160 | + </post> | ||
161 | + </token> | ||
162 | + </mapping> | ||
163 | + <mapping type="group" name="div"> | ||
164 | + <token type="string" offset="false"> | ||
165 | + <pre> | ||
166 | + <item type="name" /> | ||
167 | + </pre> | ||
168 | + <post> | ||
169 | + <item type="attribute" name="class" /> | ||
170 | + </post> | ||
171 | + </token> | ||
172 | + </mapping> | ||
173 | + <mapping type="group" name="head"> | ||
174 | + <token type="string" offset="false"> | ||
175 | + <pre> | ||
176 | + <item type="name" /> | ||
177 | + </pre> | ||
178 | + <post> | ||
179 | + <item type="attribute" name="class" /> | ||
180 | + </post> | ||
181 | + </token> | ||
182 | + </mapping> | ||
183 | + <!-- END GROUPS --> | ||
184 | + | ||
185 | + <!-- START GROUP ANNOTATIONS --> | ||
186 | + <mapping type="groupAnnotation" name="lang"> | ||
187 | + <token type="string" offset="false" realoffset="false" parent="false"> | ||
188 | + <pre> | ||
189 | + <item type="name" /> | ||
190 | + </pre> | ||
191 | + <post> | ||
192 | + <item type="attribute" name="class" /> | ||
193 | + </post> | ||
194 | + </token> | ||
195 | + </mapping> | ||
196 | + <!-- END GROUP ANNOTATIONS --> | ||
197 | + | ||
198 | + </mappings> | ||
199 | + <!-- END MAPPINGS --> | ||
200 | + | ||
201 | + </parser> | ||
202 | + <!-- END CONFIGURATION MTAS FOLIA PARSER --> | ||
203 | + | ||
204 | +</mtas> | ||
0 | \ No newline at end of file | 205 | \ No newline at end of file |
conf/solr/schemaNederlab.xml
@@ -255,8 +255,8 @@ | @@ -255,8 +255,8 @@ | ||
255 | <field name="NLContent_folia_available" type="nederlab_boolean" | 255 | <field name="NLContent_folia_available" type="nederlab_boolean" |
256 | required="false" multiValued="false" indexed="true" stored="true" /> | 256 | required="false" multiValued="false" indexed="true" stored="true" /> |
257 | <field name="NLContent_mtas" type="mtas_text" indexed="true" | 257 | <field name="NLContent_mtas" type="mtas_text" indexed="true" |
258 | - stored="true" /> | ||
259 | - <field name="NLContent_mtas_error" type="nederlab_string" | 258 | + stored="true" /> |
259 | + <field name="NLContent_mtas_error" type="nederlab_string" | ||
260 | indexed="true" stored="true" /> | 260 | indexed="true" stored="true" /> |
261 | <field name="NLContent_mtas_numberOfTokens" type="nederlab_int" | 261 | <field name="NLContent_mtas_numberOfTokens" type="nederlab_int" |
262 | indexed="true" stored="true" /> | 262 | indexed="true" stored="true" /> |
@@ -264,7 +264,17 @@ | @@ -264,7 +264,17 @@ | ||
264 | indexed="true" stored="true" /> | 264 | indexed="true" stored="true" /> |
265 | <field name="NLContent_mtas_size" type="nederlab_int" indexed="true" | 265 | <field name="NLContent_mtas_size" type="nederlab_int" indexed="true" |
266 | stored="true" /> | 266 | stored="true" /> |
267 | - <!-- Combined Field Metadata --> | 267 | + <field name="NLContent_mtasSource" type="mtasSource_text" indexed="true" |
268 | + stored="true" /> | ||
269 | + <field name="NLContent_mtasSource_error" type="nederlab_string" | ||
270 | + indexed="true" stored="true" /> | ||
271 | + <field name="NLContent_mtasSource_numberOfTokens" type="nederlab_int" | ||
272 | + indexed="true" stored="true" /> | ||
273 | + <field name="NLContent_mtasSource_numberOfPositions" type="nederlab_int" | ||
274 | + indexed="true" stored="true" /> | ||
275 | + <field name="NLContent_mtasSource_size" type="nederlab_int" indexed="true" | ||
276 | + stored="true" /> | ||
277 | + <!-- Combined Field Metadata --> | ||
268 | <field name="NLMetadata" type="nederlab_text" required="false" | 278 | <field name="NLMetadata" type="nederlab_text" required="false" |
269 | multiValued="true" indexed="true" stored="false" /> | 279 | multiValued="true" indexed="true" stored="false" /> |
270 | <copyField source="NLCore_NLIdentification_nederlabID" dest="NLMetadata" /> | 280 | <copyField source="NLCore_NLIdentification_nederlabID" dest="NLMetadata" /> |
@@ -420,5 +430,27 @@ | @@ -420,5 +430,27 @@ | ||
420 | prefix="t" /> | 430 | prefix="t" /> |
421 | </analyzer> | 431 | </analyzer> |
422 | </fieldType> | 432 | </fieldType> |
433 | + | ||
434 | + <fieldType name="mtasSource_text_example_config" class="solr.TextField" | ||
435 | + postingsFormat="MtasCodec"> | ||
436 | + <analyzer type="index"> | ||
437 | + <charFilter class="mtas.analysis.util.MtasCharFilterFactory" | ||
438 | + config="mtasSource.xml" /> | ||
439 | + <tokenizer class="mtas.analysis.util.MtasTokenizerFactory" | ||
440 | + config="mtasSource.xml" /> | ||
441 | + </analyzer> | ||
442 | + </fieldType> | ||
443 | + | ||
444 | + <fieldType name="mtasSource_text" class="mtas.solr.schema.MtasPreAnalyzedField" | ||
445 | + followIndexAnalyzer="mtasSource_text_example_config" | ||
446 | + configurationFromField="NLCore_NLAdministrative_sourceCollection" setNumberOfTokens="NLContent_mtasSource_numberOfTokens" | ||
447 | + setNumberOfPositions="NLContent_mtasSource_numberOfPositions" setSize="NLContent_mtasSource_size" | ||
448 | + setError="NLContent_mtasSource_error" postingsFormat="MtasCodec"> | ||
449 | + <analyzer type="query"> | ||
450 | + <tokenizer class="solr.WhitespaceTokenizerFactory" /> | ||
451 | + <filter class="mtas.analysis.util.MtasPrefixTokenFilterFactory" | ||
452 | + prefix="t" /> | ||
453 | + </analyzer> | ||
454 | + </fieldType> | ||
423 | 455 | ||
424 | </schema> | 456 | </schema> |
conf/solr/schemaOeaw.xml
0 โ 100644
1 | +<?xml version="1.0" encoding="UTF-8" ?> | ||
2 | + | ||
3 | +<schema name="nederlab" version="1.5"> | ||
4 | + | ||
5 | + <field name="_version_" type="nederlab_long" indexed="true" | ||
6 | + stored="true" /> | ||
7 | + | ||
8 | + <!-- component Profile --> | ||
9 | + <field name="NLProfile_name" type="nederlab_string" required="true" | ||
10 | + multiValued="false" indexed="true" stored="true" /> | ||
11 | + | ||
12 | + <!-- component ResourceProxy --> | ||
13 | + <field name="ResourceProxy_resourceRef" type="nederlab_string" | ||
14 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
15 | + <dynamicField name="ResourceProxy_resourceRef_mimeType_*" | ||
16 | + type="nederlab_string" required="false" multiValued="true" indexed="true" | ||
17 | + stored="true" /> | ||
18 | + | ||
19 | + <!-- component NLCore --> | ||
20 | + <field name="NLCore_NLIdentification_nederlabID" type="nederlab_uuid" | ||
21 | + required="true" multiValued="false" indexed="true" stored="true" /> | ||
22 | + <field name="NLCore_NLIdentification_editorialCode" type="nederlab_string" | ||
23 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
24 | + <field name="NLCore_NLIdentification_versionID" type="nederlab_string" | ||
25 | + required="true" multiValued="false" indexed="true" stored="true" /> | ||
26 | + <field name="NLCore_NLIdentification_sourceRef" type="nederlab_string" | ||
27 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
28 | + <field name="NLCore_NLIdentification_sourceUrl" type="nederlab_string" | ||
29 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
30 | + <field name="NLCore_NLIdentification_sourceRefUrl_serialized" | ||
31 | + type="nederlab_string" required="false" multiValued="true" indexed="false" | ||
32 | + stored="true" /> | ||
33 | + <field name="NLCore_NLAdministrative_ingestTime" type="nederlab_date" | ||
34 | + required="true" multiValued="false" indexed="true" stored="true" /> | ||
35 | + <field name="NLCore_NLAdministrative_expirationTime" type="nederlab_date" | ||
36 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
37 | + <field name="NLCore_NLAdministrative_lastEditedBy" type="nederlab_string" | ||
38 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
39 | + <field name="NLCore_NLAdministrative_modificationTime" type="nederlab_date" | ||
40 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
41 | + <field name="NLCore_NLAdministrative_editorialNote" type="nederlab_text" | ||
42 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
43 | + <field name="NLCore_NLAdministrative_sourceCollection" type="nederlab_string" | ||
44 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
45 | + <field name="NLCore_NLAdministrative_isThesaurusElement" type="nederlab_boolean" | ||
46 | + required="true" multiValued="false" indexed="true" stored="true" /> | ||
47 | + <field name="NLCore_NLExternalReference_organizationName" type="nederlab_text" | ||
48 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
49 | + <field name="NLCore_NLExternalReference_collectionName" type="nederlab_string" | ||
50 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
51 | + <field name="NLCore_NLExternalReference_resourceRef" type="nederlab_string" | ||
52 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
53 | + <field name="NLCore_NLExternalReference_serialized" type="nederlab_string" | ||
54 | + required="false" multiValued="true" indexed="false" stored="true" /> | ||
55 | + | ||
56 | + <!-- component NLTitle --> | ||
57 | + <field name="NLTitle_title" type="nederlab_text" required="false" | ||
58 | + multiValued="false" indexed="true" stored="true" /> | ||
59 | + <field name="NLTitle_subtitle" type="nederlab_text" required="false" | ||
60 | + multiValued="false" indexed="true" stored="true" /> | ||
61 | + <field name="NLTitle_genre" type="nederlab_string" required="false" | ||
62 | + multiValued="true" indexed="true" stored="true" /> | ||
63 | + <field name="NLTitle_category" type="nederlab_string" required="false" | ||
64 | + multiValued="true" indexed="true" stored="true" /> | ||
65 | + <field name="NLTitle_yearOfPublicationMin" type="nederlab_int" | ||
66 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
67 | + <field name="NLTitle_yearOfPublicationMax" type="nederlab_int" | ||
68 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
69 | + <field name="NLTitle_yearOfPublicationApprox" type="nederlab_boolean" | ||
70 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
71 | + <field name="NLTitle_yearOfPublicationLabel" type="nederlab_text" | ||
72 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
73 | + <field name="NLTitle_edition" type="nederlab_string" required="false" | ||
74 | + multiValued="false" indexed="true" stored="true" /> | ||
75 | + <field name="NLTitle_inNederlabAs" type="nederlab_uuid" required="false" | ||
76 | + multiValued="false" indexed="true" stored="true" /> | ||
77 | + <field name="NLTitle_NLPublicationPlace_placeOfPublication" type="nederlab_string" | ||
78 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
79 | + <field name="NLTitle_NLPublicationPlace_placeID" type="nederlab_string" | ||
80 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
81 | + <field name="NLTitle_NLPublicationPlace_placeOfPublicationOriginal" | ||
82 | + type="nederlab_text" required="false" multiValued="true" indexed="true" | ||
83 | + stored="true" /> | ||
84 | + <field name="NLTitle_numberOfPages" type="nederlab_int" required="false" | ||
85 | + multiValued="false" indexed="true" stored="true" /> | ||
86 | + <field name="NLTitle_numberOfWords" type="nederlab_int" required="false" | ||
87 | + multiValued="false" indexed="true" stored="true" /> | ||
88 | + <field name="NLTitle_primaryLanguage" type="nederlab_string" | ||
89 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
90 | + <field name="NLTitle_isTranslation" type="nederlab_boolean" | ||
91 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
92 | + <field name="NLTitle_characterEncoding" type="nederlab_string" | ||
93 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
94 | + <field name="NLTitle_codingStandard" type="nederlab_string" | ||
95 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
96 | + <field name="NLTitle_textQuality" type="nederlab_text" required="false" | ||
97 | + multiValued="false" indexed="true" stored="true" /> | ||
98 | + <field name="NLTitle_processingMethod" type="nederlab_text" | ||
99 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
100 | + <field name="NLTitle_autopsyPerformed" type="nederlab_boolean" | ||
101 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
102 | + <field name="NLTitle_NLPersonRef_personID" type="nederlab_uuid" | ||
103 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
104 | + <field name="NLTitle_NLPersonRef_role" type="nederlab_string" | ||
105 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
106 | + <dynamicField name="NLTitle_NLPersonRef_personID_role_*" | ||
107 | + type="nederlab_uuid" required="false" multiValued="true" indexed="true" | ||
108 | + stored="true" /> | ||
109 | + <field name="NLTitle_contains" type="nederlab_uuid" required="false" | ||
110 | + multiValued="true" indexed="true" stored="true" /> | ||
111 | + <field name="NLTitle_seriesTitleID" type="nederlab_uuid" | ||
112 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
113 | + <field name="NLTitle_seriesTitleID_parent" type="nederlab_uuid" | ||
114 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
115 | + <field name="NLTitle_seriesTitleID_root" type="nederlab_uuid" | ||
116 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
117 | + | ||
118 | + <!-- component NLDependentTitle --> | ||
119 | + <field name="NLDependentTitle_title" type="nederlab_text" | ||
120 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
121 | + <field name="NLDependentTitle_subtitle" type="nederlab_text" | ||
122 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
123 | + <field name="NLDependentTitle_primaryLanguage" type="nederlab_string" | ||
124 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
125 | + <field name="NLDependentTitle_parentTitleID" type="nederlab_uuid" | ||
126 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
127 | + <field name="NLDependentTitle_inNederlabAs" type="nederlab_uuid" | ||
128 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
129 | + <field name="NLDependentTitle_NLPersonRef_personID" type="nederlab_uuid" | ||
130 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
131 | + <field name="NLDependentTitle_NLPersonRef_role" type="nederlab_string" | ||
132 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
133 | + <dynamicField name="NLDependentTitle_NLPersonRef_personID_role_*" | ||
134 | + type="nederlab_uuid" required="false" multiValued="true" indexed="true" | ||
135 | + stored="true" /> | ||
136 | + <field name="NLDependentTitle_startPage" type="nederlab_int" | ||
137 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
138 | + <field name="NLDependentTitle_endPage" type="nederlab_int" | ||
139 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
140 | + | ||
141 | + <!-- component NLPerson --> | ||
142 | + <field name="NLPerson_NLPersonName_nameId" type="nederlab_uuid" | ||
143 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
144 | + <field name="NLPerson_NLPersonName_lastName" type="nederlab_text" | ||
145 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
146 | + <field name="NLPerson_NLPersonName_firstName" type="nederlab_text" | ||
147 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
148 | + <field name="NLPerson_NLPersonName_infixes" type="nederlab_text" | ||
149 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
150 | + <field name="NLPerson_NLPersonName_firstNameFull" type="nederlab_text" | ||
151 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
152 | + <field name="NLPerson_NLPersonName_fullName" type="nederlab_text" | ||
153 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
154 | + <field name="NLPerson_NLPersonName_fullName_serialized" type="nederlab_string" | ||
155 | + required="false" multiValued="true" indexed="false" stored="true" /> | ||
156 | + <field name="NLPerson_NLPersonName_preferredNameID" type="nederlab_uuid" | ||
157 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
158 | + <field name="NLPerson_NLPersonName_preferredLastName" type="nederlab_string" | ||
159 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
160 | + <field name="NLPerson_NLPersonName_preferredFirstName" type="nederlab_string" | ||
161 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
162 | + <field name="NLPerson_NLPersonName_preferredFirstNameFull" type="nederlab_string" | ||
163 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
164 | + <field name="NLPerson_NLPersonName_preferredInfixes" type="nederlab_string" | ||
165 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
166 | + <field name="NLPerson_NLPersonName_preferredFullName" type="nederlab_text" | ||
167 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
168 | + <field name="NLPerson_NLPersonName_preferredFullName_serialized" | ||
169 | + type="nederlab_string" required="false" multiValued="false" indexed="false" | ||
170 | + stored="true" /> | ||
171 | + <field name="NLPerson_dateOfBirthDayMonth" type="nederlab_text" | ||
172 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
173 | + <field name="NLPerson_dateOfBirthMonth" type="nederlab_int" | ||
174 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
175 | + <field name="NLPerson_dateOfBirthDay" type="nederlab_int" | ||
176 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
177 | + <field name="NLPerson_yearOfBirthMin" type="nederlab_int" | ||
178 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
179 | + <field name="NLPerson_yearOfBirthMax" type="nederlab_int" | ||
180 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
181 | + <field name="NLPerson_yearOfBirthApprox" type="nederlab_boolean" | ||
182 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
183 | + <field name="NLPerson_yearOfBirthLabel" type="nederlab_text" | ||
184 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
185 | + <field name="NLPerson_placeOfBirth" type="nederlab_string" | ||
186 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
187 | + <field name="NLPerson_placeOfBirthID" type="nederlab_string" | ||
188 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
189 | + <field name="NLPerson_dateOfDeathDayMonth" type="nederlab_text" | ||
190 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
191 | + <field name="NLPerson_dateOfDeathMonth" type="nederlab_int" | ||
192 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
193 | + <field name="NLPerson_dateOfDeathDay" type="nederlab_int" | ||
194 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
195 | + <field name="NLPerson_yearOfDeathMin" type="nederlab_int" | ||
196 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
197 | + <field name="NLPerson_yearOfDeathMax" type="nederlab_int" | ||
198 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
199 | + <field name="NLPerson_yearOfDeathApprox" type="nederlab_boolean" | ||
200 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
201 | + <field name="NLPerson_yearOfDeathLabel" type="nederlab_text" | ||
202 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
203 | + <field name="NLPerson_placeOfDeath" type="nederlab_string" | ||
204 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
205 | + <field name="NLPerson_placeOfDeathID" type="nederlab_string" | ||
206 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
207 | + <field name="NLPerson_gender" type="nederlab_string" required="false" | ||
208 | + multiValued="false" indexed="true" stored="true" /> | ||
209 | + <field name="NLPerson_profession" type="nederlab_string" | ||
210 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
211 | + <field name="NLPerson_education" type="nederlab_string" required="false" | ||
212 | + multiValued="true" indexed="true" stored="true" /> | ||
213 | + <field name="NLPerson_inThesaurusAs" type="nederlab_uuid" | ||
214 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
215 | + | ||
216 | + <!-- component NLSeriesTitle --> | ||
217 | + <field name="NLSeriesTitle_title" type="nederlab_text" required="false" | ||
218 | + multiValued="false" indexed="true" stored="true" /> | ||
219 | + <field name="NLSeriesTitle_years" type="nederlab_text" required="false" | ||
220 | + multiValued="false" indexed="true" stored="true" /> | ||
221 | + <field name="NLSeriesTitle_description" type="nederlab_text" | ||
222 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
223 | + <field name="NLSeriesTitle_inNederlabAs" type="nederlab_uuid" | ||
224 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
225 | + <field name="NLSeriesTitle_seriesTitleID" type="nederlab_uuid" | ||
226 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
227 | + <field name="NLSeriesTitle_seriesTitleID_parent" type="nederlab_uuid" | ||
228 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
229 | + <field name="NLSeriesTitle_seriesTitleID_root" type="nederlab_uuid" | ||
230 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
231 | + | ||
232 | + <!-- component NLCollectionSpecific --> | ||
233 | + <dynamicField name="NLCollectionSpecific_*" type="nederlab_string" | ||
234 | + required="false" multiValued="true" indexed="true" stored="true" /> | ||
235 | + | ||
236 | + <!-- component NLContent old --> | ||
237 | + | ||
238 | + <field name="NLContent_text_available" type="nederlab_boolean" | ||
239 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
240 | + <field name="NLContent_text" type="nederlab_content" required="false" | ||
241 | + multiValued="false" indexed="true" stored="true" termVectors="true" | ||
242 | + termPositions="true" termOffsets="true" /> | ||
243 | + <field name="NLContent_text_lowercase" type="nederlab_content_lowercase" | ||
244 | + required="false" multiValued="false" indexed="true" stored="true" | ||
245 | + termVectors="true" termPositions="true" termOffsets="true" /> | ||
246 | + <copyField source="NLContent_text" dest="NLContent_text_lowercase" /> | ||
247 | + <field name="NLContent_ticcl_available" type="nederlab_boolean" | ||
248 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
249 | + <field name="NLContent_ticcl_lowercase" type="nederlab_content_lowercase" | ||
250 | + required="false" multiValued="false" indexed="true" stored="true" | ||
251 | + termVectors="true" termPositions="true" termOffsets="true" /> | ||
252 | + | ||
253 | + <!-- component NLContent --> | ||
254 | + | ||
255 | + <field name="NLContent_folia_available" type="nederlab_boolean" | ||
256 | + required="false" multiValued="false" indexed="true" stored="true" /> | ||
257 | + <field name="NLContent_mtas" type="mtas_text" indexed="true" | ||
258 | + stored="true" /> | ||
259 | + <field name="NLContent_mtas_error" type="nederlab_string" | ||
260 | + indexed="true" stored="true" /> | ||
261 | + <field name="NLContent_mtas_numberOfTokens" type="nederlab_int" | ||
262 | + indexed="true" stored="true" /> | ||
263 | + <field name="NLContent_mtas_numberOfPositions" type="nederlab_int" | ||
264 | + indexed="true" stored="true" /> | ||
265 | + <field name="NLContent_mtas_size" type="nederlab_int" indexed="true" | ||
266 | + stored="true" /> | ||
267 | + <!-- Combined Field Metadata --> | ||
268 | + <field name="NLMetadata" type="nederlab_text" required="false" | ||
269 | + multiValued="true" indexed="true" stored="false" /> | ||
270 | + <copyField source="NLCore_NLIdentification_nederlabID" dest="NLMetadata" /> | ||
271 | + <copyField source="NLCore_NLIdentification_editorialCode" | ||
272 | + dest="NLMetadata" /> | ||
273 | + <copyField source="NLCore_NLIdentification_sourceRef" dest="NLMetadata" /> | ||
274 | + <copyField source="NLCore_NLAdministrative_editorialNote" | ||
275 | + dest="NLMetadata" /> | ||
276 | + <copyField source="NLCore_NLAdministrative_sourceCollection" | ||
277 | + dest="NLMetadata" /> | ||
278 | + <copyField source="NLCore_NLExternalReference_organizationName" | ||
279 | + dest="NLMetadata" /> | ||
280 | + <copyField source="NLCore_NLExternalReference_collectionName" | ||
281 | + dest="NLMetadata" /> | ||
282 | + <copyField source="NLCore_NLExternalReference_resourceRef" | ||
283 | + dest="NLMetadata" /> | ||
284 | + <copyField source="NLTitle_title" dest="NLMetadata" /> | ||
285 | + <copyField source="NLTitle_subtitle" dest="NLMetadata" /> | ||
286 | + <copyField source="NLTitle_genre" dest="NLMetadata" /> | ||
287 | + <copyField source="NLTitle_category" dest="NLMetadata" /> | ||
288 | + <copyField source="NLTitle_yearOfPublicationMin" dest="NLMetadata" /> | ||
289 | + <copyField source="NLTitle_yearOfPublicationMax" dest="NLMetadata" /> | ||
290 | + <copyField source="NLTitle_yearOfPublicationLabel" dest="NLMetadata" /> | ||
291 | + <copyField source="NLTitle_edition" dest="NLMetadata" /> | ||
292 | + <copyField source="NLTitle_NLPublicationPlace_placeOfPublication" | ||
293 | + dest="NLMetadata" /> | ||
294 | + <copyField source="NLTitle_NLPublicationPlace_placeID" dest="NLMetadata" /> | ||
295 | + <copyField source="NLTitle_NLPublicationPlace_placeOfPublicationOriginal" | ||
296 | + dest="NLMetadata" /> | ||
297 | + <copyField source="NLTitle_primaryLanguage" dest="NLMetadata" /> | ||
298 | + <copyField source="NLTitle_characterEncoding" dest="NLMetadata" /> | ||
299 | + <copyField source="NLTitle_codingStandard" dest="NLMetadata" /> | ||
300 | + <copyField source="NLTitle_textQuality" dest="NLMetadata" /> | ||
301 | + <copyField source="NLTitle_processingMethod" dest="NLMetadata" /> | ||
302 | + <copyField source="NLTitle_NLPersonRef_role" dest="NLMetadata" /> | ||
303 | + <copyField source="NLDependentTitle_title" dest="NLMetadata" /> | ||
304 | + <copyField source="NLDependentTitle_subtitle" dest="NLMetadata" /> | ||
305 | + <copyField source="NLDependentTitle_primaryLanguage" dest="NLMetadata" /> | ||
306 | + <copyField source="NLDependentTitle_NLPersonRef_role" dest="NLMetadata" /> | ||
307 | + <copyField source="NLPerson_NLPersonName_lastName" dest="NLMetadata" /> | ||
308 | + <copyField source="NLPerson_NLPersonName_firstName" dest="NLMetadata" /> | ||
309 | + <copyField source="NLPerson_NLPersonName_infixes" dest="NLMetadata" /> | ||
310 | + <copyField source="NLPerson_NLPersonName_firstNameFull" dest="NLMetadata" /> | ||
311 | + <copyField source="NLPerson_NLPersonName_fullName" dest="NLMetadata" /> | ||
312 | + <copyField source="NLPerson_dateOfBirthDayMonth" dest="NLMetadata" /> | ||
313 | + <copyField source="NLPerson_yearOfBirthMin" dest="NLMetadata" /> | ||
314 | + <copyField source="NLPerson_yearOfBirthMax" dest="NLMetadata" /> | ||
315 | + <copyField source="NLPerson_yearOfBirthLabel" dest="NLMetadata" /> | ||
316 | + <copyField source="NLPerson_placeOfBirth" dest="NLMetadata" /> | ||
317 | + <copyField source="NLPerson_placeOfBirthID" dest="NLMetadata" /> | ||
318 | + <copyField source="NLPerson_dateOfDeathDayMonth" dest="NLMetadata" /> | ||
319 | + <copyField source="NLPerson_yearOfDeathMin" dest="NLMetadata" /> | ||
320 | + <copyField source="NLPerson_yearOfDeathMax" dest="NLMetadata" /> | ||
321 | + <copyField source="NLPerson_yearOfDeathLabel" dest="NLMetadata" /> | ||
322 | + <copyField source="NLPerson_placeOfDeath" dest="NLMetadata" /> | ||
323 | + <copyField source="NLPerson_placeOfDeathID" dest="NLMetadata" /> | ||
324 | + <copyField source="NLPerson_gender" dest="NLMetadata" /> | ||
325 | + <copyField source="NLPerson_profession" dest="NLMetadata" /> | ||
326 | + <copyField source="NLPerson_education" dest="NLMetadata" /> | ||
327 | + <copyField source="NLSeriesTitle_title" dest="NLMetadata" /> | ||
328 | + <copyField source="NLSeriesTitle_years" dest="NLMetadata" /> | ||
329 | + <copyField source="NLSeriesTitle_description" dest="NLMetadata" /> | ||
330 | + <copyField source="NLCollectionSpecific_*" dest="NLMetadata" /> | ||
331 | + | ||
332 | + <uniqueKey>NLCore_NLIdentification_versionID</uniqueKey> | ||
333 | + | ||
334 | + <fieldType name="nederlab_string" class="solr.StrField" | ||
335 | + sortMissingLast="true" /> | ||
336 | + <fieldType name="nederlab_uuid" class="solr.StrField" | ||
337 | + sortMissingLast="true" /> | ||
338 | + <fieldType name="nederlab_boolean" class="solr.BoolField" | ||
339 | + sortMissingLast="true" /> | ||
340 | + <fieldType name="nederlab_int" class="solr.TrieIntField" | ||
341 | + precisionStep="8" positionIncrementGap="0" /> | ||
342 | + <fieldType name="nederlab_long" class="solr.TrieLongField" | ||
343 | + precisionStep="0" positionIncrementGap="0" /> | ||
344 | + <fieldType name="nederlab_date" class="solr.TrieDateField" | ||
345 | + precisionStep="6" positionIncrementGap="0" /> | ||
346 | + <fieldtype name="nederlab_binary" class="solr.BinaryField" /> | ||
347 | + | ||
348 | + <fieldType name="nederlab_text" class="solr.TextField" | ||
349 | + positionIncrementGap="100"> | ||
350 | + <analyzer type="index"> | ||
351 | + <tokenizer class="solr.StandardTokenizerFactory" /> | ||
352 | + <filter class="solr.LowerCaseFilterFactory" /> | ||
353 | + </analyzer> | ||
354 | + <analyzer type="query"> | ||
355 | + <tokenizer class="solr.StandardTokenizerFactory" /> | ||
356 | + <filter class="solr.LowerCaseFilterFactory" /> | ||
357 | + </analyzer> | ||
358 | + </fieldType> | ||
359 | + | ||
360 | + <fieldType name="nederlab_content" class="solr.TextField" | ||
361 | + positionIncrementGap="100"> | ||
362 | + <analyzer type="index"> | ||
363 | + <tokenizer class="solr.StandardTokenizerFactory" /> | ||
364 | + </analyzer> | ||
365 | + <analyzer type="query"> | ||
366 | + <tokenizer class="solr.StandardTokenizerFactory" /> | ||
367 | + </analyzer> | ||
368 | + </fieldType> | ||
369 | + | ||
370 | + <fieldType name="nederlab_content_lowercase" class="solr.TextField" | ||
371 | + positionIncrementGap="100"> | ||
372 | + <analyzer type="index"> | ||
373 | + <tokenizer class="solr.StandardTokenizerFactory" /> | ||
374 | + <filter class="solr.LowerCaseFilterFactory" /> | ||
375 | + </analyzer> | ||
376 | + <analyzer type="query"> | ||
377 | + <tokenizer class="solr.StandardTokenizerFactory" /> | ||
378 | + <filter class="solr.LowerCaseFilterFactory" /> | ||
379 | + </analyzer> | ||
380 | + </fieldType> | ||
381 | + | ||
382 | + <fieldType name="mtas_text" class="solr.TextField" | ||
383 | + postingsFormat="MtasCodec"> | ||
384 | + <analyzer type="index"> | ||
385 | + <charFilter class="mtas.analysis.util.MtasCharFilterFactory" | ||
386 | + type="file" prefix="/local/data/" /> | ||
387 | + <tokenizer class="mtas.analysis.util.MtasTokenizerFactory" | ||
388 | + configFile="mtas/folia_oeaw.xml" /> | ||
389 | + </analyzer> | ||
390 | + </fieldType> | ||
391 | + | ||
392 | +</schema> |
conf/solr/schemaTest.xml
@@ -115,11 +115,7 @@ | @@ -115,11 +115,7 @@ | ||
115 | 115 | ||
116 | WARNING: The _text_ catch-all field will significantly increase your index size. | 116 | WARNING: The _text_ catch-all field will significantly increase your index size. |
117 | If you don't need it, consider removing it and the corresponding copyField directive. | 117 | If you don't need it, consider removing it and the corresponding copyField directive. |
118 | - --> | ||
119 | - <! | ||
120 | - <fieldType name="string_simpletext" class="solr.StrField" postingsFormat="SimpleText" /> | ||
121 | - <field name="simple_string" type="string_simpletext" indexed="true" stored="true" required="false" multiValued="false" /> | ||
122 | - --> | 118 | + --> |
123 | 119 | ||
124 | <fieldType name="mtas_text" class="solr.TextField" postingsFormat="MtasCodec"> | 120 | <fieldType name="mtas_text" class="solr.TextField" postingsFormat="MtasCodec"> |
125 | <analyzer type="index"> | 121 | <analyzer type="index"> |
junit/mtas/parser/MtasCQLParserTestSentence.java
@@ -29,11 +29,11 @@ public class MtasCQLParserTestSentence { | @@ -29,11 +29,11 @@ public class MtasCQLParserTestSentence { | ||
29 | basicTests(); | 29 | basicTests(); |
30 | } | 30 | } |
31 | 31 | ||
32 | - private void testCQLParse(String field, String cql, SpanQuery q) { | 32 | + private void testCQLParse(String field, String defaultPrefix, String cql, SpanQuery q) { |
33 | MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql))); | 33 | MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql))); |
34 | try { | 34 | try { |
35 | System.out.print("CQL parsing:\t"+cql); | 35 | System.out.print("CQL parsing:\t"+cql); |
36 | - assertEquals(p.parse(field) ,q); | 36 | + assertEquals(p.parse(field, defaultPrefix) ,q); |
37 | System.out.print("\n"); | 37 | System.out.print("\n"); |
38 | } catch (ParseException e) { | 38 | } catch (ParseException e) { |
39 | System.out.println("Error CQL parsing:\t"+cql); | 39 | System.out.println("Error CQL parsing:\t"+cql); |
@@ -41,12 +41,12 @@ public class MtasCQLParserTestSentence { | @@ -41,12 +41,12 @@ public class MtasCQLParserTestSentence { | ||
41 | } | 41 | } |
42 | } | 42 | } |
43 | 43 | ||
44 | - private void testCQLEquivalent(String field, String cql1, String cql2) { | 44 | + private void testCQLEquivalent(String field, String defaultPrefix, String cql1, String cql2) { |
45 | MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1))); | 45 | MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1))); |
46 | MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2))); | 46 | MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2))); |
47 | try { | 47 | try { |
48 | System.out.print("CQL equivalent:\t"+cql1+" and "+cql2); | 48 | System.out.print("CQL equivalent:\t"+cql1+" and "+cql2); |
49 | - assertEquals(p1.parse(field) ,p2.parse(field)); | 49 | + assertEquals(p1.parse(field, defaultPrefix) ,p2.parse(field, defaultPrefix)); |
50 | System.out.print("\n"); | 50 | System.out.print("\n"); |
51 | } catch (ParseException e) { | 51 | } catch (ParseException e) { |
52 | System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2); | 52 | System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2); |
@@ -73,6 +73,7 @@ public class MtasCQLParserTestSentence { | @@ -73,6 +73,7 @@ public class MtasCQLParserTestSentence { | ||
73 | basicTest16(); | 73 | basicTest16(); |
74 | basicTest17(); | 74 | basicTest17(); |
75 | basicTest18(); | 75 | basicTest18(); |
76 | + basicTest19(); | ||
76 | } | 77 | } |
77 | 78 | ||
78 | private void basicTest1() { | 79 | private void basicTest1() { |
@@ -84,14 +85,14 @@ public class MtasCQLParserTestSentence { | @@ -84,14 +85,14 @@ public class MtasCQLParserTestSentence { | ||
84 | items.add(new MtasSpanSequenceItem(q1, false)); | 85 | items.add(new MtasSpanSequenceItem(q1, false)); |
85 | items.add(new MtasSpanSequenceItem(q2, false)); | 86 | items.add(new MtasSpanSequenceItem(q2, false)); |
86 | SpanQuery q = new MtasSpanSequenceQuery(items); | 87 | SpanQuery q = new MtasSpanSequenceQuery(items); |
87 | - testCQLParse(field, cql, q); | 88 | + testCQLParse(field, null, cql, q); |
88 | } | 89 | } |
89 | 90 | ||
90 | private void basicTest2() { | 91 | private void basicTest2() { |
91 | String field = "testveld"; | 92 | String field = "testveld"; |
92 | String cql1 = "[pos=\"LID\"] [] []? [] [lemma=\"koe\"]"; | 93 | String cql1 = "[pos=\"LID\"] [] []? [] [lemma=\"koe\"]"; |
93 | String cql2 = "[pos=\"LID\"] []{2,3} [lemma=\"koe\"]"; | 94 | String cql2 = "[pos=\"LID\"] []{2,3} [lemma=\"koe\"]"; |
94 | - testCQLEquivalent(field, cql1, cql2); | 95 | + testCQLEquivalent(field, null, cql1, cql2); |
95 | } | 96 | } |
96 | 97 | ||
97 | private void basicTest3() { | 98 | private void basicTest3() { |
@@ -100,7 +101,7 @@ public class MtasCQLParserTestSentence { | @@ -100,7 +101,7 @@ public class MtasCQLParserTestSentence { | ||
100 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID"); | 101 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID"); |
101 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe"); | 102 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe"); |
102 | SpanQuery q = new MtasSpanOrQuery(q1,q2); | 103 | SpanQuery q = new MtasSpanOrQuery(q1,q2); |
103 | - testCQLParse(field, cql, q); | 104 | + testCQLParse(field, null, cql, q); |
104 | } | 105 | } |
105 | 106 | ||
106 | private void basicTest4() { | 107 | private void basicTest4() { |
@@ -114,28 +115,28 @@ public class MtasCQLParserTestSentence { | @@ -114,28 +115,28 @@ public class MtasCQLParserTestSentence { | ||
114 | items.add(new MtasSpanSequenceItem(q3, false)); | 115 | items.add(new MtasSpanSequenceItem(q3, false)); |
115 | SpanQuery q4 = new MtasSpanSequenceQuery(items); | 116 | SpanQuery q4 = new MtasSpanSequenceQuery(items); |
116 | SpanQuery q = new MtasSpanOrQuery(q1,q4); | 117 | SpanQuery q = new MtasSpanOrQuery(q1,q4); |
117 | - testCQLParse(field, cql, q); | 118 | + testCQLParse(field, null, cql, q); |
118 | } | 119 | } |
119 | 120 | ||
120 | private void basicTest5() { | 121 | private void basicTest5() { |
121 | String field = "testveld"; | 122 | String field = "testveld"; |
122 | String cql1 = "([pos=\"LID\"]([pos=\"ADJ\"][lemma=\"koe\"]))"; | 123 | String cql1 = "([pos=\"LID\"]([pos=\"ADJ\"][lemma=\"koe\"]))"; |
123 | String cql2 = "[pos=\"LID\"][pos=\"ADJ\"][lemma=\"koe\"]"; | 124 | String cql2 = "[pos=\"LID\"][pos=\"ADJ\"][lemma=\"koe\"]"; |
124 | - testCQLEquivalent(field, cql1, cql2); | 125 | + testCQLEquivalent(field, null, cql1, cql2); |
125 | } | 126 | } |
126 | 127 | ||
127 | private void basicTest6() { | 128 | private void basicTest6() { |
128 | String field = "testveld"; | 129 | String field = "testveld"; |
129 | String cql1 = "([pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"])|([pos=\"ADJ\"]|([lemma=\"het\"]([lemma=\"paard\"])))"; | 130 | String cql1 = "([pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"])|([pos=\"ADJ\"]|([lemma=\"het\"]([lemma=\"paard\"])))"; |
130 | String cql2 = "[pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"]|[pos=\"ADJ\"]|[lemma=\"het\"][lemma=\"paard\"]"; | 131 | String cql2 = "[pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"]|[pos=\"ADJ\"]|[lemma=\"het\"][lemma=\"paard\"]"; |
131 | - testCQLEquivalent(field, cql1, cql2); | 132 | + testCQLEquivalent(field, null, cql1, cql2); |
132 | } | 133 | } |
133 | 134 | ||
134 | private void basicTest7() { | 135 | private void basicTest7() { |
135 | String field = "testveld"; | 136 | String field = "testveld"; |
136 | String cql1 = "[pos=\"LID\"] []{0,1} []{3,5} []{2,4}"; | 137 | String cql1 = "[pos=\"LID\"] []{0,1} []{3,5} []{2,4}"; |
137 | String cql2 = "[pos=\"LID\"] []{5,10}"; | 138 | String cql2 = "[pos=\"LID\"] []{5,10}"; |
138 | - testCQLEquivalent(field, cql1, cql2); | 139 | + testCQLEquivalent(field, null, cql1, cql2); |
139 | } | 140 | } |
140 | 141 | ||
141 | private void basicTest8() { | 142 | private void basicTest8() { |
@@ -149,7 +150,7 @@ public class MtasCQLParserTestSentence { | @@ -149,7 +150,7 @@ public class MtasCQLParserTestSentence { | ||
149 | items.add(new MtasSpanSequenceItem(q1, false)); | 150 | items.add(new MtasSpanSequenceItem(q1, false)); |
150 | items.add(new MtasSpanSequenceItem(q4, false)); | 151 | items.add(new MtasSpanSequenceItem(q4, false)); |
151 | SpanQuery q = new MtasSpanSequenceQuery(items); | 152 | SpanQuery q = new MtasSpanSequenceQuery(items); |
152 | - testCQLParse(field, cql, q); | 153 | + testCQLParse(field, null, cql, q); |
153 | } | 154 | } |
154 | 155 | ||
155 | private void basicTest9() { | 156 | private void basicTest9() { |
@@ -165,7 +166,7 @@ public class MtasCQLParserTestSentence { | @@ -165,7 +166,7 @@ public class MtasCQLParserTestSentence { | ||
165 | items.add(new MtasSpanSequenceItem(q5, false)); | 166 | items.add(new MtasSpanSequenceItem(q5, false)); |
166 | items.add(new MtasSpanSequenceItem(q4, false)); | 167 | items.add(new MtasSpanSequenceItem(q4, false)); |
167 | SpanQuery q = new MtasSpanSequenceQuery(items); | 168 | SpanQuery q = new MtasSpanSequenceQuery(items); |
168 | - testCQLParse(field, cql, q); | 169 | + testCQLParse(field, null, cql, q); |
169 | } | 170 | } |
170 | 171 | ||
171 | private void basicTest10() { | 172 | private void basicTest10() { |
@@ -179,7 +180,7 @@ public class MtasCQLParserTestSentence { | @@ -179,7 +180,7 @@ public class MtasCQLParserTestSentence { | ||
179 | items.add(new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q2,1,3), false)); | 180 | items.add(new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q2,1,3), false)); |
180 | items.add(new MtasSpanSequenceItem(q3, false)); | 181 | items.add(new MtasSpanSequenceItem(q3, false)); |
181 | SpanQuery q = new MtasSpanSequenceQuery(items); | 182 | SpanQuery q = new MtasSpanSequenceQuery(items); |
182 | - testCQLParse(field, cql, q); | 183 | + testCQLParse(field, null, cql, q); |
183 | } | 184 | } |
184 | 185 | ||
185 | private void basicTest11() { | 186 | private void basicTest11() { |
@@ -188,7 +189,7 @@ public class MtasCQLParserTestSentence { | @@ -188,7 +189,7 @@ public class MtasCQLParserTestSentence { | ||
188 | SpanQuery q1 = new MtasCQLParserGroupQuery(field,"sentence"); | 189 | SpanQuery q1 = new MtasCQLParserGroupQuery(field,"sentence"); |
189 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe"); | 190 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe"); |
190 | SpanQuery q = new SpanContainingQuery(q1, q2); | 191 | SpanQuery q = new SpanContainingQuery(q1, q2); |
191 | - testCQLParse(field, cql, q); | 192 | + testCQLParse(field, null, cql, q); |
192 | } | 193 | } |
193 | 194 | ||
194 | private void basicTest12() { | 195 | private void basicTest12() { |
@@ -197,7 +198,7 @@ public class MtasCQLParserTestSentence { | @@ -197,7 +198,7 @@ public class MtasCQLParserTestSentence { | ||
197 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe"); | 198 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe"); |
198 | SpanQuery q2 = new MtasCQLParserGroupQuery(field,"sentence"); | 199 | SpanQuery q2 = new MtasCQLParserGroupQuery(field,"sentence"); |
199 | SpanQuery q = new SpanWithinQuery(q2, q1); | 200 | SpanQuery q = new SpanWithinQuery(q2, q1); |
200 | - testCQLParse(field, cql, q); | 201 | + testCQLParse(field, null, cql, q); |
201 | } | 202 | } |
202 | 203 | ||
203 | private void basicTest13() { | 204 | private void basicTest13() { |
@@ -211,7 +212,7 @@ public class MtasCQLParserTestSentence { | @@ -211,7 +212,7 @@ public class MtasCQLParserTestSentence { | ||
211 | items.add(new MtasSpanSequenceItem(q1, false)); | 212 | items.add(new MtasSpanSequenceItem(q1, false)); |
212 | items.add(new MtasSpanSequenceItem(q4, false)); | 213 | items.add(new MtasSpanSequenceItem(q4, false)); |
213 | SpanQuery q = new MtasSpanSequenceQuery(items); | 214 | SpanQuery q = new MtasSpanSequenceQuery(items); |
214 | - testCQLParse(field, cql, q); | 215 | + testCQLParse(field, null, cql, q); |
215 | } | 216 | } |
216 | 217 | ||
217 | private void basicTest14() { | 218 | private void basicTest14() { |
@@ -225,7 +226,7 @@ public class MtasCQLParserTestSentence { | @@ -225,7 +226,7 @@ public class MtasCQLParserTestSentence { | ||
225 | items.add(new MtasSpanSequenceItem(q3, false)); | 226 | items.add(new MtasSpanSequenceItem(q3, false)); |
226 | items.add(new MtasSpanSequenceItem(q4, false)); | 227 | items.add(new MtasSpanSequenceItem(q4, false)); |
227 | SpanQuery q = new MtasSpanSequenceQuery(items); | 228 | SpanQuery q = new MtasSpanSequenceQuery(items); |
228 | - testCQLParse(field, cql, q); | 229 | + testCQLParse(field, null, cql, q); |
229 | } | 230 | } |
230 | 231 | ||
231 | private void basicTest15() { | 232 | private void basicTest15() { |
@@ -246,7 +247,7 @@ public class MtasCQLParserTestSentence { | @@ -246,7 +247,7 @@ public class MtasCQLParserTestSentence { | ||
246 | items2.add(new MtasSpanSequenceItem(q1, false)); | 247 | items2.add(new MtasSpanSequenceItem(q1, false)); |
247 | items2.add(new MtasSpanSequenceItem(q8, false)); | 248 | items2.add(new MtasSpanSequenceItem(q8, false)); |
248 | SpanQuery q = new MtasSpanSequenceQuery(items2); | 249 | SpanQuery q = new MtasSpanSequenceQuery(items2); |
249 | - testCQLParse(field, cql, q); | 250 | + testCQLParse(field, null, cql, q); |
250 | } | 251 | } |
251 | 252 | ||
252 | private void basicTest16() { | 253 | private void basicTest16() { |
@@ -258,7 +259,7 @@ public class MtasCQLParserTestSentence { | @@ -258,7 +259,7 @@ public class MtasCQLParserTestSentence { | ||
258 | SpanQuery q4 = new SpanContainingQuery(q2, q3); | 259 | SpanQuery q4 = new SpanContainingQuery(q2, q3); |
259 | SpanQuery q5 = new SpanWithinQuery(q4, q1); | 260 | SpanQuery q5 = new SpanWithinQuery(q4, q1); |
260 | SpanQuery q = new SpanNotQuery(q5,new SpanContainingQuery(q5, q3)); | 261 | SpanQuery q = new SpanNotQuery(q5,new SpanContainingQuery(q5, q3)); |
261 | - testCQLParse(field, cql, q); | 262 | + testCQLParse(field, null, cql, q); |
262 | } | 263 | } |
263 | 264 | ||
264 | private void basicTest17() { | 265 | private void basicTest17() { |
@@ -271,11 +272,23 @@ public class MtasCQLParserTestSentence { | @@ -271,11 +272,23 @@ public class MtasCQLParserTestSentence { | ||
271 | items.add(new MtasSpanSequenceItem(q2, false)); | 272 | items.add(new MtasSpanSequenceItem(q2, false)); |
272 | items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false)); | 273 | items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false)); |
273 | SpanQuery q = new MtasSpanSequenceQuery(items); | 274 | SpanQuery q = new MtasSpanSequenceQuery(items); |
274 | - testCQLParse(field, cql, q); | 275 | + testCQLParse(field, null, cql, q); |
275 | } | 276 | } |
276 | 277 | ||
277 | private void basicTest18() { | 278 | private void basicTest18() { |
278 | String field = "testveld"; | 279 | String field = "testveld"; |
280 | + String cql = "\"de\" [pos=\"N\"]"; | ||
281 | + SpanQuery q1 = new MtasCQLParserWordQuery(field,"t_lc","de"); | ||
282 | + SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N"); | ||
283 | + List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>(); | ||
284 | + items.add(new MtasSpanSequenceItem(q1, false)); | ||
285 | + items.add(new MtasSpanSequenceItem(q2, false)); | ||
286 | + SpanQuery q = new MtasSpanSequenceQuery(items); | ||
287 | + testCQLParse(field, "t_lc", cql, q); | ||
288 | + } | ||
289 | + | ||
290 | + private void basicTest19() { | ||
291 | + String field = "testveld"; | ||
279 | String cql = "([]<entity=\"loc\"/>{1,2}[]){3,4}"; | 292 | String cql = "([]<entity=\"loc\"/>{1,2}[]){3,4}"; |
280 | SpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc"); | 293 | SpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc"); |
281 | SpanQuery q2 = new MtasSpanRecurrenceQuery(q1,1,2); | 294 | SpanQuery q2 = new MtasSpanRecurrenceQuery(q1,1,2); |
@@ -285,7 +298,7 @@ public class MtasCQLParserTestSentence { | @@ -285,7 +298,7 @@ public class MtasCQLParserTestSentence { | ||
285 | items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false)); | 298 | items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false)); |
286 | SpanQuery q3 = new MtasSpanSequenceQuery(items); | 299 | SpanQuery q3 = new MtasSpanSequenceQuery(items); |
287 | SpanQuery q = new MtasSpanRecurrenceQuery(q3,3,4); | 300 | SpanQuery q = new MtasSpanRecurrenceQuery(q3,3,4); |
288 | - testCQLParse(field, cql, q); | 301 | + testCQLParse(field, null, cql, q); |
289 | } | 302 | } |
290 | 303 | ||
291 | } | 304 | } |
junit/mtas/parser/MtasCQLParserTestWord.java
@@ -23,10 +23,10 @@ public class MtasCQLParserTestWord { | @@ -23,10 +23,10 @@ public class MtasCQLParserTestWord { | ||
23 | basicNotTests(); | 23 | basicNotTests(); |
24 | } | 24 | } |
25 | 25 | ||
26 | - private void testCQLParse(String field, String cql, SpanQuery q) { | 26 | + private void testCQLParse(String field, String defaultPrefix, String cql, SpanQuery q) { |
27 | MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql))); | 27 | MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql))); |
28 | try { | 28 | try { |
29 | - assertEquals(p.parse(field) ,q); | 29 | + assertEquals(p.parse(field, defaultPrefix) ,q); |
30 | System.out.println("Tested CQL parsing:\t"+cql); | 30 | System.out.println("Tested CQL parsing:\t"+cql); |
31 | } catch (ParseException e) { | 31 | } catch (ParseException e) { |
32 | System.out.println("Error CQL parsing:\t"+cql); | 32 | System.out.println("Error CQL parsing:\t"+cql); |
@@ -34,11 +34,11 @@ public class MtasCQLParserTestWord { | @@ -34,11 +34,11 @@ public class MtasCQLParserTestWord { | ||
34 | } | 34 | } |
35 | } | 35 | } |
36 | 36 | ||
37 | - private void testCQLEquivalent(String field, String cql1, String cql2) { | 37 | + private void testCQLEquivalent(String field, String defaultPrefix, String cql1, String cql2) { |
38 | MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1))); | 38 | MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1))); |
39 | MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2))); | 39 | MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2))); |
40 | try { | 40 | try { |
41 | - assertEquals(p1.parse(field) ,p2.parse(field)); | 41 | + assertEquals(p1.parse(field, defaultPrefix) ,p2.parse(field, defaultPrefix)); |
42 | System.out.println("Tested CQL equivalent:\t"+cql1+" and "+cql2); | 42 | System.out.println("Tested CQL equivalent:\t"+cql1+" and "+cql2); |
43 | } catch (ParseException e) { | 43 | } catch (ParseException e) { |
44 | System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2); | 44 | System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2); |
@@ -67,6 +67,7 @@ public class MtasCQLParserTestWord { | @@ -67,6 +67,7 @@ public class MtasCQLParserTestWord { | ||
67 | basicTest10(); | 67 | basicTest10(); |
68 | basicTest11(); | 68 | basicTest11(); |
69 | basicTest12(); | 69 | basicTest12(); |
70 | + basicTest13(); | ||
70 | } | 71 | } |
71 | 72 | ||
72 | private void basicNotTest1() { | 73 | private void basicNotTest1() { |
@@ -75,14 +76,14 @@ public class MtasCQLParserTestWord { | @@ -75,14 +76,14 @@ public class MtasCQLParserTestWord { | ||
75 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID"); | 76 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID"); |
76 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","de"); | 77 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","de"); |
77 | SpanQuery q = new SpanNotQuery(q1,q2); | 78 | SpanQuery q = new SpanNotQuery(q1,q2); |
78 | - testCQLParse(field, cql, q); | 79 | + testCQLParse(field, null, cql, q); |
79 | } | 80 | } |
80 | 81 | ||
81 | private void basicNotTest2() { | 82 | private void basicNotTest2() { |
82 | String field = "testveld"; | 83 | String field = "testveld"; |
83 | String cql1 = "[pos=\"LID\" & (!lemma=\"de\")]"; | 84 | String cql1 = "[pos=\"LID\" & (!lemma=\"de\")]"; |
84 | String cql2 = "[pos=\"LID\" & !(lemma=\"de\")]"; | 85 | String cql2 = "[pos=\"LID\" & !(lemma=\"de\")]"; |
85 | - testCQLEquivalent(field, cql1, cql2); | 86 | + testCQLEquivalent(field, null, cql1, cql2); |
86 | } | 87 | } |
87 | 88 | ||
88 | private void basicNotTest3() { | 89 | private void basicNotTest3() { |
@@ -93,28 +94,28 @@ public class MtasCQLParserTestWord { | @@ -93,28 +94,28 @@ public class MtasCQLParserTestWord { | ||
93 | SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","een"); | 94 | SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","een"); |
94 | SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3}); | 95 | SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3}); |
95 | SpanQuery q = new SpanNotQuery(q1,q4); | 96 | SpanQuery q = new SpanNotQuery(q1,q4); |
96 | - testCQLParse(field, cql, q); | 97 | + testCQLParse(field, null, cql, q); |
97 | } | 98 | } |
98 | 99 | ||
99 | private void basicNotTest4() { | 100 | private void basicNotTest4() { |
100 | String field = "testveld"; | 101 | String field = "testveld"; |
101 | String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]"; | 102 | String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]"; |
102 | String cql2 = "[pos=\"LID\" & (!lemma=\"de\" & !lemma=\"een\")]"; | 103 | String cql2 = "[pos=\"LID\" & (!lemma=\"de\" & !lemma=\"een\")]"; |
103 | - testCQLEquivalent(field, cql1, cql2); | 104 | + testCQLEquivalent(field, null, cql1, cql2); |
104 | } | 105 | } |
105 | 106 | ||
106 | private void basicNotTest5() { | 107 | private void basicNotTest5() { |
107 | String field = "testveld"; | 108 | String field = "testveld"; |
108 | String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]"; | 109 | String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]"; |
109 | String cql2 = "[pos=\"LID\" & !lemma=\"de\" & !lemma=\"een\"]"; | 110 | String cql2 = "[pos=\"LID\" & !lemma=\"de\" & !lemma=\"een\"]"; |
110 | - testCQLEquivalent(field, cql1, cql2); | 111 | + testCQLEquivalent(field, null, cql1, cql2); |
111 | } | 112 | } |
112 | 113 | ||
113 | private void basicTest1() { | 114 | private void basicTest1() { |
114 | String field = "testveld"; | 115 | String field = "testveld"; |
115 | String cql = "[lemma=\"koe\"]"; | 116 | String cql = "[lemma=\"koe\"]"; |
116 | SpanQuery q = new MtasCQLParserWordQuery(field, "lemma", "koe"); | 117 | SpanQuery q = new MtasCQLParserWordQuery(field, "lemma", "koe"); |
117 | - testCQLParse(field, cql, q); | 118 | + testCQLParse(field, null, cql, q); |
118 | } | 119 | } |
119 | 120 | ||
120 | private void basicTest2() { | 121 | private void basicTest2() { |
@@ -123,7 +124,7 @@ public class MtasCQLParserTestWord { | @@ -123,7 +124,7 @@ public class MtasCQLParserTestWord { | ||
123 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe"); | 124 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe"); |
124 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N"); | 125 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N"); |
125 | SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q2}); | 126 | SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q2}); |
126 | - testCQLParse(field, cql, q); | 127 | + testCQLParse(field, null, cql, q); |
127 | } | 128 | } |
128 | 129 | ||
129 | private void basicTest3() { | 130 | private void basicTest3() { |
@@ -132,14 +133,14 @@ public class MtasCQLParserTestWord { | @@ -132,14 +133,14 @@ public class MtasCQLParserTestWord { | ||
132 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe"); | 133 | SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe"); |
133 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","paard"); | 134 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","paard"); |
134 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2}); | 135 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2}); |
135 | - testCQLParse(field, cql, q); | 136 | + testCQLParse(field, null, cql, q); |
136 | } | 137 | } |
137 | 138 | ||
138 | private void basicTest4() { | 139 | private void basicTest4() { |
139 | String field = "testveld"; | 140 | String field = "testveld"; |
140 | String cql1 = "[lemma=\"koe\" | lemma=\"paard\"]"; | 141 | String cql1 = "[lemma=\"koe\" | lemma=\"paard\"]"; |
141 | String cql2 = "[(lemma=\"koe\" | lemma=\"paard\")]"; | 142 | String cql2 = "[(lemma=\"koe\" | lemma=\"paard\")]"; |
142 | - testCQLEquivalent(field, cql1, cql2); | 143 | + testCQLEquivalent(field, null, cql1, cql2); |
143 | } | 144 | } |
144 | 145 | ||
145 | private void basicTest5() { | 146 | private void basicTest5() { |
@@ -150,7 +151,7 @@ public class MtasCQLParserTestWord { | @@ -150,7 +151,7 @@ public class MtasCQLParserTestWord { | ||
150 | SpanQuery q3 = new MtasSpanOrQuery(new SpanQuery[]{q1,q2}); | 151 | SpanQuery q3 = new MtasSpanOrQuery(new SpanQuery[]{q1,q2}); |
151 | SpanQuery q4 = new MtasCQLParserWordQuery(field,"pos","N"); | 152 | SpanQuery q4 = new MtasCQLParserWordQuery(field,"pos","N"); |
152 | SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q3,q4}); | 153 | SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q3,q4}); |
153 | - testCQLParse(field, cql, q); | 154 | + testCQLParse(field, null, cql, q); |
154 | } | 155 | } |
155 | 156 | ||
156 | private void basicTest6() { | 157 | private void basicTest6() { |
@@ -161,7 +162,7 @@ public class MtasCQLParserTestWord { | @@ -161,7 +162,7 @@ public class MtasCQLParserTestWord { | ||
161 | SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","paard"); | 162 | SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","paard"); |
162 | SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3}); | 163 | SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3}); |
163 | SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q4}); | 164 | SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q4}); |
164 | - testCQLParse(field, cql, q); | 165 | + testCQLParse(field, null, cql, q); |
165 | } | 166 | } |
166 | 167 | ||
167 | private void basicTest7() { | 168 | private void basicTest7() { |
@@ -172,7 +173,7 @@ public class MtasCQLParserTestWord { | @@ -172,7 +173,7 @@ public class MtasCQLParserTestWord { | ||
172 | SpanQuery q3 = new MtasCQLParserWordQuery(field,"pos","N"); | 173 | SpanQuery q3 = new MtasCQLParserWordQuery(field,"pos","N"); |
173 | SpanQuery q4 = new MtasSpanAndQuery(new SpanQuery[]{q2,q3}); | 174 | SpanQuery q4 = new MtasSpanAndQuery(new SpanQuery[]{q2,q3}); |
174 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q4}); | 175 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q4}); |
175 | - testCQLParse(field, cql, q); | 176 | + testCQLParse(field, null, cql, q); |
176 | } | 177 | } |
177 | 178 | ||
178 | private void basicTest8() { | 179 | private void basicTest8() { |
@@ -185,7 +186,7 @@ public class MtasCQLParserTestWord { | @@ -185,7 +186,7 @@ public class MtasCQLParserTestWord { | ||
185 | SpanQuery q5 = new MtasSpanAndQuery(new SpanQuery[]{q1,q2}); | 186 | SpanQuery q5 = new MtasSpanAndQuery(new SpanQuery[]{q1,q2}); |
186 | SpanQuery q6 = new MtasSpanAndQuery(new SpanQuery[]{q3,q4}); | 187 | SpanQuery q6 = new MtasSpanAndQuery(new SpanQuery[]{q3,q4}); |
187 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q5,q6}); | 188 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q5,q6}); |
188 | - testCQLParse(field, cql, q); | 189 | + testCQLParse(field, null, cql, q); |
189 | } | 190 | } |
190 | 191 | ||
191 | private void basicTest9() { | 192 | private void basicTest9() { |
@@ -200,7 +201,7 @@ public class MtasCQLParserTestWord { | @@ -200,7 +201,7 @@ public class MtasCQLParserTestWord { | ||
200 | SpanQuery q7 = new MtasSpanAndQuery(new SpanQuery[]{q6,q3}); | 201 | SpanQuery q7 = new MtasSpanAndQuery(new SpanQuery[]{q6,q3}); |
201 | SpanQuery q8 = new MtasSpanAndQuery(new SpanQuery[]{q4,q5}); | 202 | SpanQuery q8 = new MtasSpanAndQuery(new SpanQuery[]{q4,q5}); |
202 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q7,q8}); | 203 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q7,q8}); |
203 | - testCQLParse(field, cql, q); | 204 | + testCQLParse(field, null, cql, q); |
204 | } | 205 | } |
205 | 206 | ||
206 | private void basicTest10() { | 207 | private void basicTest10() { |
@@ -217,22 +218,22 @@ public class MtasCQLParserTestWord { | @@ -217,22 +218,22 @@ public class MtasCQLParserTestWord { | ||
217 | SpanQuery q9 = new MtasSpanOrQuery(new SpanQuery[]{q4,q5}); | 218 | SpanQuery q9 = new MtasSpanOrQuery(new SpanQuery[]{q4,q5}); |
218 | SpanQuery q10 = new MtasSpanAndQuery(new SpanQuery[]{q9,q6}); | 219 | SpanQuery q10 = new MtasSpanAndQuery(new SpanQuery[]{q9,q6}); |
219 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q8,q10}); | 220 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q8,q10}); |
220 | - testCQLParse(field, cql, q); | 221 | + testCQLParse(field, null, cql, q); |
221 | } | 222 | } |
222 | 223 | ||
223 | private void basicTest11() { | 224 | private void basicTest11() { |
224 | String field = "testveld"; | 225 | String field = "testveld"; |
225 | String cql1 = "[#300]"; | 226 | String cql1 = "[#300]"; |
226 | SpanQuery q1 = new MtasCQLParserWordPositionQuery(field, 300); | 227 | SpanQuery q1 = new MtasCQLParserWordPositionQuery(field, 300); |
227 | - testCQLParse(field, cql1, q1); | 228 | + testCQLParse(field, null, cql1, q1); |
228 | String cql2 = "[#100-110]"; | 229 | String cql2 = "[#100-110]"; |
229 | SpanQuery q2 = new MtasCQLParserWordPositionQuery(field, 100, 110); | 230 | SpanQuery q2 = new MtasCQLParserWordPositionQuery(field, 100, 110); |
230 | - testCQLParse(field, cql2, q2); | 231 | + testCQLParse(field, null, cql2, q2); |
231 | String cql3 = "[#100-105 | #110]"; | 232 | String cql3 = "[#100-105 | #110]"; |
232 | SpanQuery q3a = new MtasCQLParserWordPositionQuery(field, 100, 105); | 233 | SpanQuery q3a = new MtasCQLParserWordPositionQuery(field, 100, 105); |
233 | SpanQuery q3b = new MtasCQLParserWordPositionQuery(field, 110); | 234 | SpanQuery q3b = new MtasCQLParserWordPositionQuery(field, 110); |
234 | SpanQuery q3 = new MtasSpanOrQuery(q3a, q3b); | 235 | SpanQuery q3 = new MtasSpanOrQuery(q3a, q3b); |
235 | - testCQLParse(field, cql3, q3); | 236 | + testCQLParse(field, null, cql3, q3); |
236 | } | 237 | } |
237 | 238 | ||
238 | private void basicTest12() { | 239 | private void basicTest12() { |
@@ -242,6 +243,13 @@ public class MtasCQLParserTestWord { | @@ -242,6 +243,13 @@ public class MtasCQLParserTestWord { | ||
242 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"t_lc","het"); | 243 | SpanQuery q2 = new MtasCQLParserWordQuery(field,"t_lc","het"); |
243 | SpanQuery q3 = new MtasCQLParserWordQuery(field,"t_lc","paard"); | 244 | SpanQuery q3 = new MtasCQLParserWordQuery(field,"t_lc","paard"); |
244 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2,q3}); | 245 | SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2,q3}); |
245 | - testCQLParse(field, cql, q); | ||
246 | - } | 246 | + testCQLParse(field, null, cql, q); |
247 | + } | ||
248 | + | ||
249 | + private void basicTest13() { | ||
250 | + String field = "testveld"; | ||
251 | + String cql = "\"de\""; | ||
252 | + SpanQuery q = new MtasCQLParserWordQuery(field,"t_lc","de"); | ||
253 | + testCQLParse(field, "t_lc", cql, q); | ||
254 | + } | ||
247 | } | 255 | } |
pom.xml
@@ -2,11 +2,13 @@ | @@ -2,11 +2,13 @@ | ||
2 | xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | 2 | xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
3 | <properties> | 3 | <properties> |
4 | <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | 4 | <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
5 | + <currentDevelopmentVersion>6.2.0</currentDevelopmentVersion> | ||
6 | + <currentDevelopmentRelease>20160802</currentDevelopmentRelease> | ||
5 | </properties> | 7 | </properties> |
6 | <modelVersion>4.0.0</modelVersion> | 8 | <modelVersion>4.0.0</modelVersion> |
7 | <groupId>dev.meertens.mtas</groupId> | 9 | <groupId>dev.meertens.mtas</groupId> |
8 | <artifactId>mtas</artifactId> | 10 | <artifactId>mtas</artifactId> |
9 | - <version>6.1.0</version> | 11 | + <version>6.2.0</version> |
10 | <packaging>jar</packaging> | 12 | <packaging>jar</packaging> |
11 | <licenses> | 13 | <licenses> |
12 | <license> | 14 | <license> |
@@ -23,12 +25,12 @@ | @@ -23,12 +25,12 @@ | ||
23 | <developers> | 25 | <developers> |
24 | <developer> | 26 | <developer> |
25 | <name>Matthijs Brouwer</name> | 27 | <name>Matthijs Brouwer</name> |
26 | - <url>https://www.meertens.knaw.nl/cms/nl/medewerkers/144373-matthijsb</url> | 28 | + <url>https://nl.linkedin.com/in/brouwermatthijs/</url> |
27 | </developer> | 29 | </developer> |
28 | <developer> | 30 | <developer> |
29 | - <name>Marc Kemps-Snijders</name> | ||
30 | - <url>https://www.meertens.knaw.nl/cms/nl/medewerkers/143329-marck</url> | ||
31 | - </developer> | 31 | + <name>Marc Kemps-Snijders</name> |
32 | + <url>https://nl.linkedin.com/in/marc-kemps-snijders-1b33753</url> | ||
33 | + </developer> | ||
32 | </developers> | 34 | </developers> |
33 | <build> | 35 | <build> |
34 | <sourceDirectory>src</sourceDirectory> | 36 | <sourceDirectory>src</sourceDirectory> |
@@ -39,6 +41,24 @@ | @@ -39,6 +41,24 @@ | ||
39 | </resources> | 41 | </resources> |
40 | <plugins> | 42 | <plugins> |
41 | <plugin> | 43 | <plugin> |
44 | + <artifactId>maven-clean-plugin</artifactId> | ||
45 | + <version>3.0.0</version> | ||
46 | + <configuration> | ||
47 | + <filesets> | ||
48 | + <fileset> | ||
49 | + <directory>gh-pages</directory> | ||
50 | + <includes> | ||
51 | + <include>**/*</include> | ||
52 | + </includes> | ||
53 | + <excludes> | ||
54 | + <exclude>**/.git/</exclude> | ||
55 | + </excludes> | ||
56 | + <followSymlinks>false</followSymlinks> | ||
57 | + </fileset> | ||
58 | + </filesets> | ||
59 | + </configuration> | ||
60 | + </plugin> | ||
61 | + <plugin> | ||
42 | <groupId>org.apache.maven.plugins</groupId> | 62 | <groupId>org.apache.maven.plugins</groupId> |
43 | <artifactId>maven-compiler-plugin</artifactId> | 63 | <artifactId>maven-compiler-plugin</artifactId> |
44 | <version>3.5.1</version> | 64 | <version>3.5.1</version> |
@@ -46,7 +66,7 @@ | @@ -46,7 +66,7 @@ | ||
46 | <source>1.8</source> | 66 | <source>1.8</source> |
47 | <target>1.8</target> | 67 | <target>1.8</target> |
48 | </configuration> | 68 | </configuration> |
49 | - </plugin> | 69 | + </plugin> |
50 | <plugin> | 70 | <plugin> |
51 | <groupId>org.apache.maven.plugins</groupId> | 71 | <groupId>org.apache.maven.plugins</groupId> |
52 | <artifactId>maven-site-plugin</artifactId> | 72 | <artifactId>maven-site-plugin</artifactId> |
@@ -145,27 +165,27 @@ | @@ -145,27 +165,27 @@ | ||
145 | <dependency> | 165 | <dependency> |
146 | <groupId>org.apache.lucene</groupId> | 166 | <groupId>org.apache.lucene</groupId> |
147 | <artifactId>lucene-core</artifactId> | 167 | <artifactId>lucene-core</artifactId> |
148 | - <version>6.1.0</version> | 168 | + <version>6.2.0</version> |
149 | </dependency> | 169 | </dependency> |
150 | <dependency> | 170 | <dependency> |
151 | <groupId>org.apache.lucene</groupId> | 171 | <groupId>org.apache.lucene</groupId> |
152 | <artifactId>lucene-analyzers-common</artifactId> | 172 | <artifactId>lucene-analyzers-common</artifactId> |
153 | - <version>6.1.0</version> | 173 | + <version>6.2.0</version> |
154 | </dependency> | 174 | </dependency> |
155 | <dependency> | 175 | <dependency> |
156 | <groupId>org.apache.lucene</groupId> | 176 | <groupId>org.apache.lucene</groupId> |
157 | <artifactId>lucene-queryparser</artifactId> | 177 | <artifactId>lucene-queryparser</artifactId> |
158 | - <version>6.1.0</version> | 178 | + <version>6.2.0</version> |
159 | </dependency> | 179 | </dependency> |
160 | <dependency> | 180 | <dependency> |
161 | <groupId>org.apache.lucene</groupId> | 181 | <groupId>org.apache.lucene</groupId> |
162 | <artifactId>lucene-codecs</artifactId> | 182 | <artifactId>lucene-codecs</artifactId> |
163 | - <version>6.1.0</version> | 183 | + <version>6.2.0</version> |
164 | </dependency> | 184 | </dependency> |
165 | <dependency> | 185 | <dependency> |
166 | <groupId>org.apache.solr</groupId> | 186 | <groupId>org.apache.solr</groupId> |
167 | <artifactId>solr-core</artifactId> | 187 | <artifactId>solr-core</artifactId> |
168 | - <version>6.1.0</version> | 188 | + <version>6.2.0</version> |
169 | </dependency> | 189 | </dependency> |
170 | <dependency> | 190 | <dependency> |
171 | <groupId>org.apache.commons</groupId> | 191 | <groupId>org.apache.commons</groupId> |
src/mtas/analysis/MtasTokenizer.java
@@ -25,8 +25,11 @@ import org.apache.lucene.util.AttributeFactory; | @@ -25,8 +25,11 @@ import org.apache.lucene.util.AttributeFactory; | ||
25 | 25 | ||
26 | /** | 26 | /** |
27 | * The Class MtasTokenizer. | 27 | * The Class MtasTokenizer. |
28 | + * | ||
29 | + * @param <T> | ||
30 | + * the generic type | ||
28 | */ | 31 | */ |
29 | -public final class MtasTokenizer extends Tokenizer { | 32 | +public final class MtasTokenizer<T> extends Tokenizer { |
30 | 33 | ||
31 | /** The configuration mtas. */ | 34 | /** The configuration mtas. */ |
32 | public static String CONFIGURATION_MTAS = "mtas"; | 35 | public static String CONFIGURATION_MTAS = "mtas"; |
@@ -73,7 +76,8 @@ public final class MtasTokenizer extends Tokenizer { | @@ -73,7 +76,8 @@ public final class MtasTokenizer extends Tokenizer { | ||
73 | /** | 76 | /** |
74 | * Instantiates a new mtas tokenizer. | 77 | * Instantiates a new mtas tokenizer. |
75 | * | 78 | * |
76 | - * @param configFileName the config file name | 79 | + * @param configFileName |
80 | + * the config file name | ||
77 | */ | 81 | */ |
78 | public MtasTokenizer(String configFileName) { | 82 | public MtasTokenizer(String configFileName) { |
79 | readConfigurationFile(configFileName); | 83 | readConfigurationFile(configFileName); |
@@ -82,8 +86,10 @@ public final class MtasTokenizer extends Tokenizer { | @@ -82,8 +86,10 @@ public final class MtasTokenizer extends Tokenizer { | ||
82 | /** | 86 | /** |
83 | * Instantiates a new mtas tokenizer. | 87 | * Instantiates a new mtas tokenizer. |
84 | * | 88 | * |
85 | - * @param config the config | ||
86 | - * @throws IOException Signals that an I/O exception has occurred. | 89 | + * @param config |
90 | + * the config | ||
91 | + * @throws IOException | ||
92 | + * Signals that an I/O exception has occurred. | ||
87 | */ | 93 | */ |
88 | public MtasTokenizer(MtasConfiguration config) throws IOException { | 94 | public MtasTokenizer(MtasConfiguration config) throws IOException { |
89 | processConfiguration(config); | 95 | processConfiguration(config); |
@@ -92,8 +98,10 @@ public final class MtasTokenizer extends Tokenizer { | @@ -92,8 +98,10 @@ public final class MtasTokenizer extends Tokenizer { | ||
92 | /** | 98 | /** |
93 | * Instantiates a new mtas tokenizer. | 99 | * Instantiates a new mtas tokenizer. |
94 | * | 100 | * |
95 | - * @param reader the reader | ||
96 | - * @throws IOException Signals that an I/O exception has occurred. | 101 | + * @param reader |
102 | + * the reader | ||
103 | + * @throws IOException | ||
104 | + * Signals that an I/O exception has occurred. | ||
97 | */ | 105 | */ |
98 | public MtasTokenizer(InputStream reader) throws IOException { | 106 | public MtasTokenizer(InputStream reader) throws IOException { |
99 | processConfiguration(MtasConfiguration.readConfiguration(reader)); | 107 | processConfiguration(MtasConfiguration.readConfiguration(reader)); |
@@ -102,9 +110,12 @@ public final class MtasTokenizer extends Tokenizer { | @@ -102,9 +110,12 @@ public final class MtasTokenizer extends Tokenizer { | ||
102 | /** | 110 | /** |
103 | * Instantiates a new mtas tokenizer. | 111 | * Instantiates a new mtas tokenizer. |
104 | * | 112 | * |
105 | - * @param factory the factory | ||
106 | - * @param config the config | ||
107 | - * @throws IOException Signals that an I/O exception has occurred. | 113 | + * @param factory |
114 | + * the factory | ||
115 | + * @param config | ||
116 | + * the config | ||
117 | + * @throws IOException | ||
118 | + * Signals that an I/O exception has occurred. | ||
108 | */ | 119 | */ |
109 | public MtasTokenizer(AttributeFactory factory, MtasConfiguration config) | 120 | public MtasTokenizer(AttributeFactory factory, MtasConfiguration config) |
110 | throws IOException { | 121 | throws IOException { |
@@ -112,7 +123,9 @@ public final class MtasTokenizer extends Tokenizer { | @@ -112,7 +123,9 @@ public final class MtasTokenizer extends Tokenizer { | ||
112 | processConfiguration(config); | 123 | processConfiguration(config); |
113 | } | 124 | } |
114 | 125 | ||
115 | - /* (non-Javadoc) | 126 | + /* |
127 | + * (non-Javadoc) | ||
128 | + * | ||
116 | * @see org.apache.lucene.analysis.TokenStream#incrementToken() | 129 | * @see org.apache.lucene.analysis.TokenStream#incrementToken() |
117 | */ | 130 | */ |
118 | @Override | 131 | @Override |
@@ -128,7 +141,7 @@ public final class MtasTokenizer extends Tokenizer { | @@ -128,7 +141,7 @@ public final class MtasTokenizer extends Tokenizer { | ||
128 | // compute info | 141 | // compute info |
129 | positionIncrement = token.getPositionStart() - currentPosition; | 142 | positionIncrement = token.getPositionStart() - currentPosition; |
130 | currentPosition = token.getPositionStart(); | 143 | currentPosition = token.getPositionStart(); |
131 | - payloadEncoder = new MtasPayloadEncoder(token, encodingFlags); | 144 | + payloadEncoder = new MtasPayloadEncoder(token, encodingFlags); |
132 | // set info | 145 | // set info |
133 | termAtt.append(token.getValue().toString()); | 146 | termAtt.append(token.getValue().toString()); |
134 | positionIncrementAtt.setPositionIncrement(positionIncrement); | 147 | positionIncrementAtt.setPositionIncrement(positionIncrement); |
@@ -157,7 +170,6 @@ public final class MtasTokenizer extends Tokenizer { | @@ -157,7 +170,6 @@ public final class MtasTokenizer extends Tokenizer { | ||
157 | e.getClass().getSimpleName() + ": " + e.getMessage()); | 170 | e.getClass().getSimpleName() + ": " + e.getMessage()); |
158 | } catch (MtasParserException e) { | 171 | } catch (MtasParserException e) { |
159 | tokenCollectionIterator = null; | 172 | tokenCollectionIterator = null; |
160 | - e.printStackTrace(); | ||
161 | throw new IOException( | 173 | throw new IOException( |
162 | e.getClass().getSimpleName() + ": " + e.getMessage()); | 174 | e.getClass().getSimpleName() + ": " + e.getMessage()); |
163 | } | 175 | } |
@@ -167,14 +179,19 @@ public final class MtasTokenizer extends Tokenizer { | @@ -167,14 +179,19 @@ public final class MtasTokenizer extends Tokenizer { | ||
167 | /** | 179 | /** |
168 | * Prints the. | 180 | * Prints the. |
169 | * | 181 | * |
170 | - * @param r the r | ||
171 | - * @throws IOException Signals that an I/O exception has occurred. | ||
172 | - * @throws MtasParserException the mtas parser exception | 182 | + * @param r |
183 | + * the r | ||
184 | + * @throws IOException | ||
185 | + * Signals that an I/O exception has occurred. | ||
186 | + * @throws MtasParserException | ||
187 | + * the mtas parser exception | ||
173 | */ | 188 | */ |
174 | public void print(Reader r) throws IOException, MtasParserException { | 189 | public void print(Reader r) throws IOException, MtasParserException { |
175 | setReader(r); | 190 | setReader(r); |
176 | reset(); | 191 | reset(); |
177 | - tokenCollection.print(); | 192 | + if (tokenCollection != null) { |
193 | + tokenCollection.print(); | ||
194 | + } | ||
178 | end(); | 195 | end(); |
179 | close(); | 196 | close(); |
180 | } | 197 | } |
@@ -182,10 +199,13 @@ public final class MtasTokenizer extends Tokenizer { | @@ -182,10 +199,13 @@ public final class MtasTokenizer extends Tokenizer { | ||
182 | /** | 199 | /** |
183 | * Gets the list. | 200 | * Gets the list. |
184 | * | 201 | * |
185 | - * @param r the r | 202 | + * @param r |
203 | + * the r | ||
186 | * @return the list | 204 | * @return the list |
187 | - * @throws IOException Signals that an I/O exception has occurred. | ||
188 | - * @throws MtasParserException the mtas parser exception | 205 | + * @throws IOException |
206 | + * Signals that an I/O exception has occurred. | ||
207 | + * @throws MtasParserException | ||
208 | + * the mtas parser exception | ||
189 | */ | 209 | */ |
190 | public String[][] getList(Reader r) throws IOException, MtasParserException { | 210 | public String[][] getList(Reader r) throws IOException, MtasParserException { |
191 | setReader(r); | 211 | setReader(r); |
@@ -199,9 +219,12 @@ public final class MtasTokenizer extends Tokenizer { | @@ -199,9 +219,12 @@ public final class MtasTokenizer extends Tokenizer { | ||
199 | /** | 219 | /** |
200 | * Construct token collection. | 220 | * Construct token collection. |
201 | * | 221 | * |
202 | - * @param reader the reader | ||
203 | - * @throws MtasConfigException the mtas config exception | ||
204 | - * @throws MtasParserException the mtas parser exception | 222 | + * @param reader |
223 | + * the reader | ||
224 | + * @throws MtasConfigException | ||
225 | + * the mtas config exception | ||
226 | + * @throws MtasParserException | ||
227 | + * the mtas parser exception | ||
205 | */ | 228 | */ |
206 | private void constructTokenCollection(Reader reader) | 229 | private void constructTokenCollection(Reader reader) |
207 | throws MtasConfigException, MtasParserException { | 230 | throws MtasConfigException, MtasParserException { |
@@ -216,29 +239,28 @@ public final class MtasTokenizer extends Tokenizer { | @@ -216,29 +239,28 @@ public final class MtasTokenizer extends Tokenizer { | ||
216 | try { | 239 | try { |
217 | tokenCollection = parser.createTokenCollection(reader); | 240 | tokenCollection = parser.createTokenCollection(reader); |
218 | return; | 241 | return; |
219 | - } catch (MtasParserException e) { | 242 | + } catch (MtasParserException e) { |
220 | tokenCollection = new MtasTokenCollection(); | 243 | tokenCollection = new MtasTokenCollection(); |
221 | - e.printStackTrace(); | ||
222 | throw new MtasParserException(e.getMessage()); | 244 | throw new MtasParserException(e.getMessage()); |
223 | } | 245 | } |
224 | } else { | 246 | } else { |
225 | throw new MtasConfigException("no instance of MtasParser"); | 247 | throw new MtasConfigException("no instance of MtasParser"); |
226 | } | 248 | } |
227 | } catch (NoSuchMethodException e) { | 249 | } catch (NoSuchMethodException e) { |
228 | - throw new MtasConfigException(e.getClass().getName() | ||
229 | - + " : '" + e.getMessage() + "'"); | 250 | + throw new MtasConfigException( |
251 | + e.getClass().getName() + " : '" + e.getMessage() + "'"); | ||
230 | } catch (InvocationTargetException e) { | 252 | } catch (InvocationTargetException e) { |
231 | - throw new MtasConfigException(e.getClass().getName() | ||
232 | - + " : '" + e.getMessage() + "'"); | 253 | + throw new MtasConfigException( |
254 | + e.getClass().getName() + " : '" + e.getMessage() + "'"); | ||
233 | } catch (IllegalAccessException e) { | 255 | } catch (IllegalAccessException e) { |
234 | - throw new MtasConfigException(e.getClass().getName() | ||
235 | - + " : '" + e.getMessage() + "'"); | 256 | + throw new MtasConfigException( |
257 | + e.getClass().getName() + " : '" + e.getMessage() + "'"); | ||
236 | } catch (ClassNotFoundException e) { | 258 | } catch (ClassNotFoundException e) { |
237 | - throw new MtasConfigException(e.getClass().getName() | ||
238 | - + " : '" + e.getMessage() + "'"); | 259 | + throw new MtasConfigException( |
260 | + e.getClass().getName() + " : '" + e.getMessage() + "'"); | ||
239 | } catch (InstantiationException e) { | 261 | } catch (InstantiationException e) { |
240 | - throw new MtasConfigException(e.getClass().getName() | ||
241 | - + " : '" + e.getMessage() + "'"); | 262 | + throw new MtasConfigException( |
263 | + e.getClass().getName() + " : '" + e.getMessage() + "'"); | ||
242 | } | 264 | } |
243 | 265 | ||
244 | } | 266 | } |
@@ -246,7 +268,8 @@ public final class MtasTokenizer extends Tokenizer { | @@ -246,7 +268,8 @@ public final class MtasTokenizer extends Tokenizer { | ||
246 | /** | 268 | /** |
247 | * Read configuration file. | 269 | * Read configuration file. |
248 | * | 270 | * |
249 | - * @param configFile the config file | 271 | + * @param configFile |
272 | + * the config file | ||
250 | */ | 273 | */ |
251 | private void readConfigurationFile(String configFile) { | 274 | private void readConfigurationFile(String configFile) { |
252 | InputStream is; | 275 | InputStream is; |
@@ -261,13 +284,13 @@ public final class MtasTokenizer extends Tokenizer { | @@ -261,13 +284,13 @@ public final class MtasTokenizer extends Tokenizer { | ||
261 | } | 284 | } |
262 | } | 285 | } |
263 | 286 | ||
264 | - | ||
265 | - | ||
266 | /** | 287 | /** |
267 | * Process configuration. | 288 | * Process configuration. |
268 | * | 289 | * |
269 | - * @param config the config | ||
270 | - * @throws IOException Signals that an I/O exception has occurred. | 290 | + * @param config |
291 | + * the config | ||
292 | + * @throws IOException | ||
293 | + * Signals that an I/O exception has occurred. | ||
271 | */ | 294 | */ |
272 | private void processConfiguration(MtasConfiguration config) | 295 | private void processConfiguration(MtasConfiguration config) |
273 | throws IOException { | 296 | throws IOException { |
src/mtas/analysis/parser/MtasBasicParser.java
@@ -165,7 +165,8 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -165,7 +165,8 @@ abstract public class MtasBasicParser extends MtasParser { | ||
165 | /** | 165 | /** |
166 | * Instantiates a new mtas basic parser. | 166 | * Instantiates a new mtas basic parser. |
167 | * | 167 | * |
168 | - * @param config the config | 168 | + * @param config |
169 | + * the config | ||
169 | */ | 170 | */ |
170 | public MtasBasicParser(MtasConfiguration config) { | 171 | public MtasBasicParser(MtasConfiguration config) { |
171 | this.config = config; | 172 | this.config = config; |
@@ -174,11 +175,16 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -174,11 +175,16 @@ abstract public class MtasBasicParser extends MtasParser { | ||
174 | /** | 175 | /** |
175 | * Compute mappings from object. | 176 | * Compute mappings from object. |
176 | * | 177 | * |
177 | - * @param object the object | ||
178 | - * @param currentList the current list | ||
179 | - * @param updateList the update list | ||
180 | - * @throws MtasParserException the mtas parser exception | ||
181 | - * @throws MtasConfigException the mtas config exception | 178 | + * @param object |
179 | + * the object | ||
180 | + * @param currentList | ||
181 | + * the current list | ||
182 | + * @param updateList | ||
183 | + * the update list | ||
184 | + * @throws MtasParserException | ||
185 | + * the mtas parser exception | ||
186 | + * @throws MtasConfigException | ||
187 | + * the mtas config exception | ||
182 | */ | 188 | */ |
183 | protected void computeMappingsFromObject(MtasParserObject object, | 189 | protected void computeMappingsFromObject(MtasParserObject object, |
184 | HashMap<String, ArrayList<MtasParserObject>> currentList, | 190 | HashMap<String, ArrayList<MtasParserObject>> currentList, |
@@ -196,7 +202,6 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -196,7 +202,6 @@ abstract public class MtasBasicParser extends MtasParser { | ||
196 | updateList.get(UPDATE_TYPE_OFFSET).put(tokenId, object.getRefIds()); | 202 | updateList.get(UPDATE_TYPE_OFFSET).put(tokenId, object.getRefIds()); |
197 | } | 203 | } |
198 | } | 204 | } |
199 | - | ||
200 | for (MtasParserMapping<?> mapping : mappings) { | 205 | for (MtasParserMapping<?> mapping : mappings) { |
201 | try { | 206 | try { |
202 | if (mapping.getTokens().size() == 0) { | 207 | if (mapping.getTokens().size() == 0) { |
@@ -271,9 +276,18 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -271,9 +276,18 @@ abstract public class MtasBasicParser extends MtasParser { | ||
271 | String checkType = object.objectType.getType(); | 276 | String checkType = object.objectType.getType(); |
272 | // register id for update when parent is created | 277 | // register id for update when parent is created |
273 | if (currentList.get(checkType).size() > 0) { | 278 | if (currentList.get(checkType).size() > 0) { |
274 | - currentList.get(checkType) | ||
275 | - .get(currentList.get(checkType).size() - 1) | ||
276 | - .registerUpdateableMappingAtParent(token.getId()); | 279 | + if (currentList.get(checkType).contains(object)) { |
280 | + int listPosition = currentList.get(checkType) | ||
281 | + .indexOf(object); | ||
282 | + if (listPosition > 0) { | ||
283 | + currentList.get(checkType).get(listPosition - 1) | ||
284 | + .registerUpdateableMappingAtParent(token.getId()); | ||
285 | + } | ||
286 | + } else { | ||
287 | + currentList.get(checkType) | ||
288 | + .get(currentList.get(checkType).size() - 1) | ||
289 | + .registerUpdateableMappingAtParent(token.getId()); | ||
290 | + } | ||
277 | // if no real ancestor, register id update when group | 291 | // if no real ancestor, register id update when group |
278 | // ancestor is created | 292 | // ancestor is created |
279 | } else if (currentList.get(MAPPING_TYPE_GROUP).size() > 0) { | 293 | } else if (currentList.get(MAPPING_TYPE_GROUP).size() > 0) { |
@@ -287,7 +301,9 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -287,7 +301,9 @@ abstract public class MtasBasicParser extends MtasParser { | ||
287 | } | 301 | } |
288 | // update children | 302 | // update children |
289 | for (Integer tmpId : object.getUpdateableMappingsAsParent()) { | 303 | for (Integer tmpId : object.getUpdateableMappingsAsParent()) { |
290 | - tokenCollection.get(tmpId).setParentId(token.getId()); | 304 | + if (tokenCollection.get(tmpId) != null) { |
305 | + tokenCollection.get(tmpId).setParentId(token.getId()); | ||
306 | + } | ||
291 | } | 307 | } |
292 | object.resetUpdateableMappingsAsParent(); | 308 | object.resetUpdateableMappingsAsParent(); |
293 | // use own position | 309 | // use own position |
@@ -372,10 +388,20 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -372,10 +388,20 @@ abstract public class MtasBasicParser extends MtasParser { | ||
372 | } | 388 | } |
373 | // copy remaining updateableMappings to new parent | 389 | // copy remaining updateableMappings to new parent |
374 | if (currentList.get(objectType.getType()).size() > 0) { | 390 | if (currentList.get(objectType.getType()).size() > 0) { |
375 | - currentList.get(objectType.getType()) | ||
376 | - .get(currentList.get(objectType.getType()).size() - 1) | ||
377 | - .registerUpdateableMappingsAtParent( | ||
378 | - object.getUpdateableMappingsAsParent()); | 391 | + if (currentList.get(objectType.getType()).contains(object)) { |
392 | + int listPosition = currentList.get(objectType.getType()) | ||
393 | + .indexOf(object); | ||
394 | + if (listPosition > 0) { | ||
395 | + currentList.get(objectType.getType()).get(listPosition - 1) | ||
396 | + .registerUpdateableMappingsAtParent( | ||
397 | + object.getUpdateableMappingsAsParent()); | ||
398 | + } | ||
399 | + } else { | ||
400 | + currentList.get(objectType.getType()) | ||
401 | + .get(currentList.get(objectType.getType()).size() - 1) | ||
402 | + .registerUpdateableMappingsAtParent( | ||
403 | + object.getUpdateableMappingsAsParent()); | ||
404 | + } | ||
379 | } else if (currentList.get(MAPPING_TYPE_GROUP).size() > 0) { | 405 | } else if (currentList.get(MAPPING_TYPE_GROUP).size() > 0) { |
380 | currentList.get(MAPPING_TYPE_GROUP) | 406 | currentList.get(MAPPING_TYPE_GROUP) |
381 | .get(currentList.get(MAPPING_TYPE_GROUP).size() - 1) | 407 | .get(currentList.get(MAPPING_TYPE_GROUP).size() - 1) |
@@ -392,9 +418,11 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -392,9 +418,11 @@ abstract public class MtasBasicParser extends MtasParser { | ||
392 | /** | 418 | /** |
393 | * Compute type from mapping source. | 419 | * Compute type from mapping source. |
394 | * | 420 | * |
395 | - * @param source the source | 421 | + * @param source |
422 | + * the source | ||
396 | * @return the string | 423 | * @return the string |
397 | - * @throws MtasParserException the mtas parser exception | 424 | + * @throws MtasParserException |
425 | + * the mtas parser exception | ||
398 | */ | 426 | */ |
399 | private String computeTypeFromMappingSource(String source) | 427 | private String computeTypeFromMappingSource(String source) |
400 | throws MtasParserException { | 428 | throws MtasParserException { |
@@ -423,11 +451,15 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -423,11 +451,15 @@ abstract public class MtasBasicParser extends MtasParser { | ||
423 | /** | 451 | /** |
424 | * Compute object from mapping value. | 452 | * Compute object from mapping value. |
425 | * | 453 | * |
426 | - * @param object the object | ||
427 | - * @param mappingValue the mapping value | ||
428 | - * @param currentList the current list | 454 | + * @param object |
455 | + * the object | ||
456 | + * @param mappingValue | ||
457 | + * the mapping value | ||
458 | + * @param currentList | ||
459 | + * the current list | ||
429 | * @return the mtas parser object[] | 460 | * @return the mtas parser object[] |
430 | - * @throws MtasParserException the mtas parser exception | 461 | + * @throws MtasParserException |
462 | + * the mtas parser exception | ||
431 | */ | 463 | */ |
432 | private MtasParserObject[] computeObjectFromMappingValue( | 464 | private MtasParserObject[] computeObjectFromMappingValue( |
433 | MtasParserObject object, HashMap<String, String> mappingValue, | 465 | MtasParserObject object, HashMap<String, String> mappingValue, |
@@ -469,12 +501,17 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -469,12 +501,17 @@ abstract public class MtasBasicParser extends MtasParser { | ||
469 | /** | 501 | /** |
470 | * Compute value from mapping values. | 502 | * Compute value from mapping values. |
471 | * | 503 | * |
472 | - * @param object the object | ||
473 | - * @param mappingValues the mapping values | ||
474 | - * @param currentList the current list | 504 | + * @param object |
505 | + * the object | ||
506 | + * @param mappingValues | ||
507 | + * the mapping values | ||
508 | + * @param currentList | ||
509 | + * the current list | ||
475 | * @return the string[] | 510 | * @return the string[] |
476 | - * @throws MtasParserException the mtas parser exception | ||
477 | - * @throws MtasConfigException the mtas config exception | 511 | + * @throws MtasParserException |
512 | + * the mtas parser exception | ||
513 | + * @throws MtasConfigException | ||
514 | + * the mtas config exception | ||
478 | */ | 515 | */ |
479 | private String[] computeValueFromMappingValues(MtasParserObject object, | 516 | private String[] computeValueFromMappingValues(MtasParserObject object, |
480 | ArrayList<HashMap<String, String>> mappingValues, | 517 | ArrayList<HashMap<String, String>> mappingValues, |
@@ -486,8 +523,8 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -486,8 +523,8 @@ abstract public class MtasBasicParser extends MtasParser { | ||
486 | if (mappingValue.get("source").equals(MtasParserMapping.SOURCE_STRING)) { | 523 | if (mappingValue.get("source").equals(MtasParserMapping.SOURCE_STRING)) { |
487 | if (mappingValue.get("type") | 524 | if (mappingValue.get("type") |
488 | .equals(MtasParserMapping.PARSER_TYPE_STRING)) { | 525 | .equals(MtasParserMapping.PARSER_TYPE_STRING)) { |
489 | - String subvalue = computeFilteredPrefixedValue(mappingValue.get("type"), | ||
490 | - mappingValue.get("text"), null, null); | 526 | + String subvalue = computeFilteredPrefixedValue( |
527 | + mappingValue.get("type"), mappingValue.get("text"), null, null); | ||
491 | if (subvalue != null) { | 528 | if (subvalue != null) { |
492 | for (int i = 0; i < value.length; i++) { | 529 | for (int i = 0; i < value.length; i++) { |
493 | value[i] = value[i] + subvalue; | 530 | value[i] = value[i] + subvalue; |
@@ -499,7 +536,7 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -499,7 +536,7 @@ abstract public class MtasBasicParser extends MtasParser { | ||
499 | MtasParserObject[] checkObjects = computeObjectFromMappingValue(object, | 536 | MtasParserObject[] checkObjects = computeObjectFromMappingValue(object, |
500 | mappingValue, currentList); | 537 | mappingValue, currentList); |
501 | // create value | 538 | // create value |
502 | - if (checkObjects != null) { | 539 | + if (checkObjects != null && checkObjects.length > 0) { |
503 | MtasParserType checkType = checkObjects[0].getType(); | 540 | MtasParserType checkType = checkObjects[0].getType(); |
504 | // add name to value | 541 | // add name to value |
505 | if (mappingValue.get("type") | 542 | if (mappingValue.get("type") |
@@ -586,11 +623,15 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -586,11 +623,15 @@ abstract public class MtasBasicParser extends MtasParser { | ||
586 | /** | 623 | /** |
587 | * Compute payload from mapping payload. | 624 | * Compute payload from mapping payload. |
588 | * | 625 | * |
589 | - * @param object the object | ||
590 | - * @param mappingPayloads the mapping payloads | ||
591 | - * @param currentList the current list | 626 | + * @param object |
627 | + * the object | ||
628 | + * @param mappingPayloads | ||
629 | + * the mapping payloads | ||
630 | + * @param currentList | ||
631 | + * the current list | ||
592 | * @return the bytes ref | 632 | * @return the bytes ref |
593 | - * @throws MtasParserException the mtas parser exception | 633 | + * @throws MtasParserException |
634 | + * the mtas parser exception | ||
594 | */ | 635 | */ |
595 | private BytesRef computePayloadFromMappingPayload(MtasParserObject object, | 636 | private BytesRef computePayloadFromMappingPayload(MtasParserObject object, |
596 | ArrayList<HashMap<String, String>> mappingPayloads, | 637 | ArrayList<HashMap<String, String>> mappingPayloads, |
@@ -605,7 +646,7 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -605,7 +646,7 @@ abstract public class MtasBasicParser extends MtasParser { | ||
605 | if (mappingPayload.get("text") != null) { | 646 | if (mappingPayload.get("text") != null) { |
606 | BytesRef subpayload = computeMaximumFilteredPayload( | 647 | BytesRef subpayload = computeMaximumFilteredPayload( |
607 | mappingPayload.get("text"), payload, null); | 648 | mappingPayload.get("text"), payload, null); |
608 | - payload = (subpayload != null) ? subpayload : payload; | 649 | + payload = (subpayload != null) ? subpayload : payload; |
609 | } | 650 | } |
610 | } | 651 | } |
611 | // from objects | 652 | // from objects |
@@ -637,8 +678,10 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -637,8 +678,10 @@ abstract public class MtasBasicParser extends MtasParser { | ||
637 | /** | 678 | /** |
638 | * Prevalidate object. | 679 | * Prevalidate object. |
639 | * | 680 | * |
640 | - * @param object the object | ||
641 | - * @param currentList the current list | 681 | + * @param object |
682 | + * the object | ||
683 | + * @param currentList | ||
684 | + * the current list | ||
642 | * @return the boolean | 685 | * @return the boolean |
643 | */ | 686 | */ |
644 | Boolean prevalidateObject(MtasParserObject object, | 687 | Boolean prevalidateObject(MtasParserObject object, |
@@ -663,10 +706,14 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -663,10 +706,14 @@ abstract public class MtasBasicParser extends MtasParser { | ||
663 | /** | 706 | /** |
664 | * Precheck mapping conditions. | 707 | * Precheck mapping conditions. |
665 | * | 708 | * |
666 | - * @param object the object | ||
667 | - * @param mappingConditions the mapping conditions | ||
668 | - * @param currentList the current list | ||
669 | - * @throws MtasParserException the mtas parser exception | 709 | + * @param object |
710 | + * the object | ||
711 | + * @param mappingConditions | ||
712 | + * the mapping conditions | ||
713 | + * @param currentList | ||
714 | + * the current list | ||
715 | + * @throws MtasParserException | ||
716 | + * the mtas parser exception | ||
670 | */ | 717 | */ |
671 | void precheckMappingConditions(MtasParserObject object, | 718 | void precheckMappingConditions(MtasParserObject object, |
672 | ArrayList<HashMap<String, String>> mappingConditions, | 719 | ArrayList<HashMap<String, String>> mappingConditions, |
@@ -771,7 +818,31 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -771,7 +818,31 @@ abstract public class MtasBasicParser extends MtasParser { | ||
771 | // condition on text | 818 | // condition on text |
772 | } else if (mappingCondition.get("type") | 819 | } else if (mappingCondition.get("type") |
773 | .equals(MtasParserMapping.PARSER_TYPE_TEXT)) { | 820 | .equals(MtasParserMapping.PARSER_TYPE_TEXT)) { |
774 | - // can't pre-check this type of condition | 821 | + // can't pre-check this type of condition, only for group |
822 | + if (object.getType().precheckText()) { | ||
823 | + String textCondition = mappingCondition.get("condition"); | ||
824 | + String textValue = object.getText(); | ||
825 | + if ((textCondition == null) | ||
826 | + && ((textValue == null) || textValue.equals(""))) { | ||
827 | + if (!notCondition) { | ||
828 | + throw new MtasParserException("no text available"); | ||
829 | + } | ||
830 | + } else if ((textCondition != null) && (textValue == null)) { | ||
831 | + if (!notCondition) { | ||
832 | + throw new MtasParserException("condition " + textCondition | ||
833 | + + " on text not matched (is null)"); | ||
834 | + } | ||
835 | + } else if (textCondition != null) { | ||
836 | + if (!notCondition && !textCondition.equals(textValue)) { | ||
837 | + throw new MtasParserException("condition " + textCondition | ||
838 | + + " on text not matched (is " + textValue + ")"); | ||
839 | + } else if (notCondition && textCondition.equals(textValue)) { | ||
840 | + throw new MtasParserException( | ||
841 | + "condition NOT " + textCondition | ||
842 | + + " on text not matched (is " + textValue + ")"); | ||
843 | + } | ||
844 | + } | ||
845 | + } | ||
775 | } | 846 | } |
776 | } | 847 | } |
777 | } else if (!notCondition) { | 848 | } else if (!notCondition) { |
@@ -785,10 +856,14 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -785,10 +856,14 @@ abstract public class MtasBasicParser extends MtasParser { | ||
785 | /** | 856 | /** |
786 | * Postcheck mapping conditions. | 857 | * Postcheck mapping conditions. |
787 | * | 858 | * |
788 | - * @param object the object | ||
789 | - * @param mappingConditions the mapping conditions | ||
790 | - * @param currentList the current list | ||
791 | - * @throws MtasParserException the mtas parser exception | 859 | + * @param object |
860 | + * the object | ||
861 | + * @param mappingConditions | ||
862 | + * the mapping conditions | ||
863 | + * @param currentList | ||
864 | + * the current list | ||
865 | + * @throws MtasParserException | ||
866 | + * the mtas parser exception | ||
792 | */ | 867 | */ |
793 | private void postcheckMappingConditions(MtasParserObject object, | 868 | private void postcheckMappingConditions(MtasParserObject object, |
794 | ArrayList<HashMap<String, String>> mappingConditions, | 869 | ArrayList<HashMap<String, String>> mappingConditions, |
@@ -835,10 +910,13 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -835,10 +910,13 @@ abstract public class MtasBasicParser extends MtasParser { | ||
835 | /** | 910 | /** |
836 | * Compute filtered split values. | 911 | * Compute filtered split values. |
837 | * | 912 | * |
838 | - * @param values the values | ||
839 | - * @param filter the filter | 913 | + * @param values |
914 | + * the values | ||
915 | + * @param filter | ||
916 | + * the filter | ||
840 | * @return the string[] | 917 | * @return the string[] |
841 | - * @throws MtasConfigException the mtas config exception | 918 | + * @throws MtasConfigException |
919 | + * the mtas config exception | ||
842 | */ | 920 | */ |
843 | private String[] computeFilteredSplitValues(String[] values, String filter) | 921 | private String[] computeFilteredSplitValues(String[] values, String filter) |
844 | throws MtasConfigException { | 922 | throws MtasConfigException { |
@@ -847,39 +925,42 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -847,39 +925,42 @@ abstract public class MtasBasicParser extends MtasParser { | ||
847 | boolean[] valuesFilter = new boolean[values.length]; | 925 | boolean[] valuesFilter = new boolean[values.length]; |
848 | boolean doSplitFilter = false; | 926 | boolean doSplitFilter = false; |
849 | for (String item : filters) { | 927 | for (String item : filters) { |
850 | - if (item.trim() | ||
851 | - .matches("^"+Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)$")) { | 928 | + if (item.trim().matches( |
929 | + "^" + Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)$")) { | ||
852 | doSplitFilter = true; | 930 | doSplitFilter = true; |
853 | - Pattern splitContent = Pattern.compile("^"+Pattern.quote(MAPPING_FILTER_SPLIT) + "\\(([0-9]+)(-([0-9]+))?\\)$"); | 931 | + Pattern splitContent = Pattern |
932 | + .compile("^" + Pattern.quote(MAPPING_FILTER_SPLIT) | ||
933 | + + "\\(([0-9]+)(-([0-9]+))?\\)$"); | ||
854 | Matcher splitContentMatcher = splitContent.matcher(item.trim()); | 934 | Matcher splitContentMatcher = splitContent.matcher(item.trim()); |
855 | - while(splitContentMatcher.find()) { | ||
856 | - if(splitContentMatcher.group(3)==null) { | 935 | + while (splitContentMatcher.find()) { |
936 | + if (splitContentMatcher.group(3) == null) { | ||
857 | int i = Integer.parseInt(splitContentMatcher.group(1)); | 937 | int i = Integer.parseInt(splitContentMatcher.group(1)); |
858 | - if(i>=0 && i<values.length) { | 938 | + if (i >= 0 && i < values.length) { |
859 | valuesFilter[i] = true; | 939 | valuesFilter[i] = true; |
860 | - } | 940 | + } |
861 | } else { | 941 | } else { |
862 | int i1 = Integer.parseInt(splitContentMatcher.group(1)); | 942 | int i1 = Integer.parseInt(splitContentMatcher.group(1)); |
863 | int i2 = Integer.parseInt(splitContentMatcher.group(3)); | 943 | int i2 = Integer.parseInt(splitContentMatcher.group(3)); |
864 | - for(int i=Math.max(0, i1); i<Math.min(values.length, i2); i++) { | 944 | + for (int i = Math.max(0, i1); i < Math.min(values.length, |
945 | + i2); i++) { | ||
865 | valuesFilter[i] = true; | 946 | valuesFilter[i] = true; |
866 | } | 947 | } |
867 | } | 948 | } |
868 | - } | 949 | + } |
869 | } | 950 | } |
870 | } | 951 | } |
871 | - if(doSplitFilter) { | 952 | + if (doSplitFilter) { |
872 | int number = 0; | 953 | int number = 0; |
873 | - for(int i=0;i<valuesFilter.length; i++) { | ||
874 | - if(valuesFilter[i]) { | 954 | + for (int i = 0; i < valuesFilter.length; i++) { |
955 | + if (valuesFilter[i]) { | ||
875 | number++; | 956 | number++; |
876 | } | 957 | } |
877 | } | 958 | } |
878 | - if(number>0) { | 959 | + if (number > 0) { |
879 | String[] newValues = new String[number]; | 960 | String[] newValues = new String[number]; |
880 | number = 0; | 961 | number = 0; |
881 | - for(int i=0;i<valuesFilter.length; i++) { | ||
882 | - if(valuesFilter[i]) { | 962 | + for (int i = 0; i < valuesFilter.length; i++) { |
963 | + if (valuesFilter[i]) { | ||
883 | newValues[number] = values[i]; | 964 | newValues[number] = values[i]; |
884 | number++; | 965 | number++; |
885 | } | 966 | } |
@@ -888,7 +969,7 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -888,7 +969,7 @@ abstract public class MtasBasicParser extends MtasParser { | ||
888 | } else { | 969 | } else { |
889 | return null; | 970 | return null; |
890 | } | 971 | } |
891 | - } | 972 | + } |
892 | } | 973 | } |
893 | return values; | 974 | return values; |
894 | } | 975 | } |
@@ -896,12 +977,17 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -896,12 +977,17 @@ abstract public class MtasBasicParser extends MtasParser { | ||
896 | /** | 977 | /** |
897 | * Compute filtered prefixed value. | 978 | * Compute filtered prefixed value. |
898 | * | 979 | * |
899 | - * @param type the type | ||
900 | - * @param value the value | ||
901 | - * @param filter the filter | ||
902 | - * @param prefix the prefix | 980 | + * @param type |
981 | + * the type | ||
982 | + * @param value | ||
983 | + * the value | ||
984 | + * @param filter | ||
985 | + * the filter | ||
986 | + * @param prefix | ||
987 | + * the prefix | ||
903 | * @return the string | 988 | * @return the string |
904 | - * @throws MtasConfigException the mtas config exception | 989 | + * @throws MtasConfigException |
990 | + * the mtas config exception | ||
905 | */ | 991 | */ |
906 | private String computeFilteredPrefixedValue(String type, String value, | 992 | private String computeFilteredPrefixedValue(String type, String value, |
907 | String filter, String prefix) throws MtasConfigException { | 993 | String filter, String prefix) throws MtasConfigException { |
@@ -926,7 +1012,7 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -926,7 +1012,7 @@ abstract public class MtasBasicParser extends MtasParser { | ||
926 | } | 1012 | } |
927 | } else if (item.trim() | 1013 | } else if (item.trim() |
928 | .matches(Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)")) { | 1014 | .matches(Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)")) { |
929 | - if(!type.equals(MtasParserMapping.PARSER_TYPE_TEXT_SPLIT)) { | 1015 | + if (!type.equals(MtasParserMapping.PARSER_TYPE_TEXT_SPLIT)) { |
930 | throw new MtasConfigException( | 1016 | throw new MtasConfigException( |
931 | "split filter not allowed for " + type); | 1017 | "split filter not allowed for " + type); |
932 | } | 1018 | } |
@@ -947,9 +1033,12 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -947,9 +1033,12 @@ abstract public class MtasBasicParser extends MtasParser { | ||
947 | /** | 1033 | /** |
948 | * Compute maximum filtered payload. | 1034 | * Compute maximum filtered payload. |
949 | * | 1035 | * |
950 | - * @param value the value | ||
951 | - * @param payload the payload | ||
952 | - * @param filter the filter | 1036 | + * @param value |
1037 | + * the value | ||
1038 | + * @param payload | ||
1039 | + * the payload | ||
1040 | + * @param filter | ||
1041 | + * the filter | ||
953 | * @return the bytes ref | 1042 | * @return the bytes ref |
954 | */ | 1043 | */ |
955 | private BytesRef computeMaximumFilteredPayload(String value, BytesRef payload, | 1044 | private BytesRef computeMaximumFilteredPayload(String value, BytesRef payload, |
@@ -981,6 +1070,9 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -981,6 +1070,9 @@ abstract public class MtasBasicParser extends MtasParser { | ||
981 | /** The name. */ | 1070 | /** The name. */ |
982 | private String name; | 1071 | private String name; |
983 | 1072 | ||
1073 | + /** The precheck text. */ | ||
1074 | + protected boolean precheckText; | ||
1075 | + | ||
984 | /** The ref attribute name. */ | 1076 | /** The ref attribute name. */ |
985 | private String refAttributeName; | 1077 | private String refAttributeName; |
986 | 1078 | ||
@@ -990,23 +1082,34 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -990,23 +1082,34 @@ abstract public class MtasBasicParser extends MtasParser { | ||
990 | /** | 1082 | /** |
991 | * Instantiates a new mtas parser type. | 1083 | * Instantiates a new mtas parser type. |
992 | * | 1084 | * |
993 | - * @param type the type | ||
994 | - * @param name the name | 1085 | + * @param type |
1086 | + * the type | ||
1087 | + * @param name | ||
1088 | + * the name | ||
1089 | + * @param precheckText | ||
1090 | + * the precheck text | ||
995 | */ | 1091 | */ |
996 | - MtasParserType(String type, String name) { | 1092 | + MtasParserType(String type, String name, boolean precheckText) { |
997 | this.type = type; | 1093 | this.type = type; |
998 | this.name = name; | 1094 | this.name = name; |
1095 | + this.precheckText = precheckText; | ||
999 | } | 1096 | } |
1000 | 1097 | ||
1001 | /** | 1098 | /** |
1002 | * Instantiates a new mtas parser type. | 1099 | * Instantiates a new mtas parser type. |
1003 | * | 1100 | * |
1004 | - * @param type the type | ||
1005 | - * @param name the name | ||
1006 | - * @param refAttributeName the ref attribute name | 1101 | + * @param type |
1102 | + * the type | ||
1103 | + * @param name | ||
1104 | + * the name | ||
1105 | + * @param precheckText | ||
1106 | + * the precheck text | ||
1107 | + * @param refAttributeName | ||
1108 | + * the ref attribute name | ||
1007 | */ | 1109 | */ |
1008 | - MtasParserType(String type, String name, String refAttributeName) { | ||
1009 | - this(type, name); | 1110 | + MtasParserType(String type, String name, boolean precheckText, |
1111 | + String refAttributeName) { | ||
1112 | + this(type, name, precheckText); | ||
1010 | this.refAttributeName = refAttributeName; | 1113 | this.refAttributeName = refAttributeName; |
1011 | } | 1114 | } |
1012 | 1115 | ||
@@ -1038,9 +1141,19 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1038,9 +1141,19 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1038 | } | 1141 | } |
1039 | 1142 | ||
1040 | /** | 1143 | /** |
1144 | + * Precheck text. | ||
1145 | + * | ||
1146 | + * @return true, if successful | ||
1147 | + */ | ||
1148 | + public boolean precheckText() { | ||
1149 | + return precheckText; | ||
1150 | + } | ||
1151 | + | ||
1152 | + /** | ||
1041 | * Adds the mapping. | 1153 | * Adds the mapping. |
1042 | * | 1154 | * |
1043 | - * @param mapping the mapping | 1155 | + * @param mapping |
1156 | + * the mapping | ||
1044 | */ | 1157 | */ |
1045 | public void addMapping(MtasParserMapping<?> mapping) { | 1158 | public void addMapping(MtasParserMapping<?> mapping) { |
1046 | mappings.add(mapping); | 1159 | mappings.add(mapping); |
@@ -1080,7 +1193,8 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1080,7 +1193,8 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1080 | /** | 1193 | /** |
1081 | * Instantiates a new mtas parser mapping token. | 1194 | * Instantiates a new mtas parser mapping token. |
1082 | * | 1195 | * |
1083 | - * @param tokenType the token type | 1196 | + * @param tokenType |
1197 | + * the token type | ||
1084 | */ | 1198 | */ |
1085 | public MtasParserMappingToken(String tokenType) { | 1199 | public MtasParserMappingToken(String tokenType) { |
1086 | type = tokenType; | 1200 | type = tokenType; |
@@ -1095,7 +1209,8 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1095,7 +1209,8 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1095 | /** | 1209 | /** |
1096 | * Sets the offset. | 1210 | * Sets the offset. |
1097 | * | 1211 | * |
1098 | - * @param tokenOffset the new offset | 1212 | + * @param tokenOffset |
1213 | + * the new offset | ||
1099 | */ | 1214 | */ |
1100 | public void setOffset(Boolean tokenOffset) { | 1215 | public void setOffset(Boolean tokenOffset) { |
1101 | offset = tokenOffset; | 1216 | offset = tokenOffset; |
@@ -1104,7 +1219,8 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1104,7 +1219,8 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1104 | /** | 1219 | /** |
1105 | * Sets the real offset. | 1220 | * Sets the real offset. |
1106 | * | 1221 | * |
1107 | - * @param tokenRealOffset the new real offset | 1222 | + * @param tokenRealOffset |
1223 | + * the new real offset | ||
1108 | */ | 1224 | */ |
1109 | public void setRealOffset(Boolean tokenRealOffset) { | 1225 | public void setRealOffset(Boolean tokenRealOffset) { |
1110 | realoffset = tokenRealOffset; | 1226 | realoffset = tokenRealOffset; |
@@ -1113,7 +1229,8 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1113,7 +1229,8 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1113 | /** | 1229 | /** |
1114 | * Sets the parent. | 1230 | * Sets the parent. |
1115 | * | 1231 | * |
1116 | - * @param tokenParent the new parent | 1232 | + * @param tokenParent |
1233 | + * the new parent | ||
1117 | */ | 1234 | */ |
1118 | public void setParent(Boolean tokenParent) { | 1235 | public void setParent(Boolean tokenParent) { |
1119 | parent = tokenParent; | 1236 | parent = tokenParent; |
@@ -1124,7 +1241,8 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1124,7 +1241,8 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1124 | /** | 1241 | /** |
1125 | * The Class MtasParserMapping. | 1242 | * The Class MtasParserMapping. |
1126 | * | 1243 | * |
1127 | - * @param <T> the generic type | 1244 | + * @param <T> |
1245 | + * the generic type | ||
1128 | */ | 1246 | */ |
1129 | protected abstract class MtasParserMapping<T extends MtasParserMapping<T>> { | 1247 | protected abstract class MtasParserMapping<T extends MtasParserMapping<T>> { |
1130 | 1248 | ||
@@ -1216,8 +1334,10 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1216,8 +1334,10 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1216 | /** | 1334 | /** |
1217 | * Process config. | 1335 | * Process config. |
1218 | * | 1336 | * |
1219 | - * @param config the config | ||
1220 | - * @throws MtasConfigException the mtas config exception | 1337 | + * @param config |
1338 | + * the config | ||
1339 | + * @throws MtasConfigException | ||
1340 | + * the mtas config exception | ||
1221 | */ | 1341 | */ |
1222 | public void processConfig(MtasConfiguration config) | 1342 | public void processConfig(MtasConfiguration config) |
1223 | throws MtasConfigException { | 1343 | throws MtasConfigException { |
@@ -1581,7 +1701,8 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1581,7 +1701,8 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1581 | /** | 1701 | /** |
1582 | * Condition unknown ancestor. | 1702 | * Condition unknown ancestor. |
1583 | * | 1703 | * |
1584 | - * @param number the number | 1704 | + * @param number |
1705 | + * the number | ||
1585 | */ | 1706 | */ |
1586 | private void conditionUnknownAncestor(String number) { | 1707 | private void conditionUnknownAncestor(String number) { |
1587 | HashMap<String, String> mapConstructionItem = new HashMap<String, String>(); | 1708 | HashMap<String, String> mapConstructionItem = new HashMap<String, String>(); |
@@ -1593,9 +1714,12 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1593,9 +1714,12 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1593 | /** | 1714 | /** |
1594 | * Adds the string. | 1715 | * Adds the string. |
1595 | * | 1716 | * |
1596 | - * @param mappingToken the mapping token | ||
1597 | - * @param type the type | ||
1598 | - * @param text the text | 1717 | + * @param mappingToken |
1718 | + * the mapping token | ||
1719 | + * @param type | ||
1720 | + * the type | ||
1721 | + * @param text | ||
1722 | + * the text | ||
1599 | */ | 1723 | */ |
1600 | private void addString(MtasParserMappingToken mappingToken, String type, | 1724 | private void addString(MtasParserMappingToken mappingToken, String type, |
1601 | String text) { | 1725 | String text) { |
@@ -1613,8 +1737,10 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1613,8 +1737,10 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1613 | /** | 1737 | /** |
1614 | * Payload string. | 1738 | * Payload string. |
1615 | * | 1739 | * |
1616 | - * @param mappingToken the mapping token | ||
1617 | - * @param text the text | 1740 | + * @param mappingToken |
1741 | + * the mapping token | ||
1742 | + * @param text | ||
1743 | + * the text | ||
1618 | */ | 1744 | */ |
1619 | private void payloadString(MtasParserMappingToken mappingToken, | 1745 | private void payloadString(MtasParserMappingToken mappingToken, |
1620 | String text) { | 1746 | String text) { |
@@ -1628,10 +1754,14 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1628,10 +1754,14 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1628 | /** | 1754 | /** |
1629 | * Adds the name. | 1755 | * Adds the name. |
1630 | * | 1756 | * |
1631 | - * @param mappingToken the mapping token | ||
1632 | - * @param type the type | ||
1633 | - * @param prefix the prefix | ||
1634 | - * @param filter the filter | 1757 | + * @param mappingToken |
1758 | + * the mapping token | ||
1759 | + * @param type | ||
1760 | + * the type | ||
1761 | + * @param prefix | ||
1762 | + * the prefix | ||
1763 | + * @param filter | ||
1764 | + * the filter | ||
1635 | */ | 1765 | */ |
1636 | private void addName(MtasParserMappingToken mappingToken, String type, | 1766 | private void addName(MtasParserMappingToken mappingToken, String type, |
1637 | String prefix, String filter) { | 1767 | String prefix, String filter) { |
@@ -1650,8 +1780,10 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1650,8 +1780,10 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1650 | /** | 1780 | /** |
1651 | * Condition name. | 1781 | * Condition name. |
1652 | * | 1782 | * |
1653 | - * @param condition the condition | ||
1654 | - * @param not the not | 1783 | + * @param condition |
1784 | + * the condition | ||
1785 | + * @param not | ||
1786 | + * the not | ||
1655 | */ | 1787 | */ |
1656 | private void conditionName(String condition, String not) { | 1788 | private void conditionName(String condition, String not) { |
1657 | HashMap<String, String> mapConstructionItem = new HashMap<String, String>(); | 1789 | HashMap<String, String> mapConstructionItem = new HashMap<String, String>(); |
@@ -1665,10 +1797,14 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1665,10 +1797,14 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1665 | /** | 1797 | /** |
1666 | * Adds the text. | 1798 | * Adds the text. |
1667 | * | 1799 | * |
1668 | - * @param mappingToken the mapping token | ||
1669 | - * @param type the type | ||
1670 | - * @param prefix the prefix | ||
1671 | - * @param filter the filter | 1800 | + * @param mappingToken |
1801 | + * the mapping token | ||
1802 | + * @param type | ||
1803 | + * the type | ||
1804 | + * @param prefix | ||
1805 | + * the prefix | ||
1806 | + * @param filter | ||
1807 | + * the filter | ||
1672 | */ | 1808 | */ |
1673 | private void addText(MtasParserMappingToken mappingToken, String type, | 1809 | private void addText(MtasParserMappingToken mappingToken, String type, |
1674 | String prefix, String filter) { | 1810 | String prefix, String filter) { |
@@ -1687,11 +1823,16 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1687,11 +1823,16 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1687 | /** | 1823 | /** |
1688 | * Adds the text split. | 1824 | * Adds the text split. |
1689 | * | 1825 | * |
1690 | - * @param mappingToken the mapping token | ||
1691 | - * @param type the type | ||
1692 | - * @param split the split | ||
1693 | - * @param prefix the prefix | ||
1694 | - * @param filter the filter | 1826 | + * @param mappingToken |
1827 | + * the mapping token | ||
1828 | + * @param type | ||
1829 | + * the type | ||
1830 | + * @param split | ||
1831 | + * the split | ||
1832 | + * @param prefix | ||
1833 | + * the prefix | ||
1834 | + * @param filter | ||
1835 | + * the filter | ||
1695 | */ | 1836 | */ |
1696 | private void addTextSplit(MtasParserMappingToken mappingToken, String type, | 1837 | private void addTextSplit(MtasParserMappingToken mappingToken, String type, |
1697 | String split, String prefix, String filter) { | 1838 | String split, String prefix, String filter) { |
@@ -1711,9 +1852,12 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1711,9 +1852,12 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1711 | /** | 1852 | /** |
1712 | * Condition text. | 1853 | * Condition text. |
1713 | * | 1854 | * |
1714 | - * @param condition the condition | ||
1715 | - * @param filter the filter | ||
1716 | - * @param not the not | 1855 | + * @param condition |
1856 | + * the condition | ||
1857 | + * @param filter | ||
1858 | + * the filter | ||
1859 | + * @param not | ||
1860 | + * the not | ||
1717 | */ | 1861 | */ |
1718 | private void conditionText(String condition, String filter, String not) { | 1862 | private void conditionText(String condition, String filter, String not) { |
1719 | HashMap<String, String> mapConstructionItem = new HashMap<String, String>(); | 1863 | HashMap<String, String> mapConstructionItem = new HashMap<String, String>(); |
@@ -1728,8 +1872,10 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1728,8 +1872,10 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1728 | /** | 1872 | /** |
1729 | * Payload text. | 1873 | * Payload text. |
1730 | * | 1874 | * |
1731 | - * @param mappingToken the mapping token | ||
1732 | - * @param filter the filter | 1875 | + * @param mappingToken |
1876 | + * the mapping token | ||
1877 | + * @param filter | ||
1878 | + * the filter | ||
1733 | */ | 1879 | */ |
1734 | private void payloadText(MtasParserMappingToken mappingToken, | 1880 | private void payloadText(MtasParserMappingToken mappingToken, |
1735 | String filter) { | 1881 | String filter) { |
@@ -1743,11 +1889,16 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1743,11 +1889,16 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1743 | /** | 1889 | /** |
1744 | * Adds the attribute. | 1890 | * Adds the attribute. |
1745 | * | 1891 | * |
1746 | - * @param mappingToken the mapping token | ||
1747 | - * @param type the type | ||
1748 | - * @param name the name | ||
1749 | - * @param prefix the prefix | ||
1750 | - * @param filter the filter | 1892 | + * @param mappingToken |
1893 | + * the mapping token | ||
1894 | + * @param type | ||
1895 | + * the type | ||
1896 | + * @param name | ||
1897 | + * the name | ||
1898 | + * @param prefix | ||
1899 | + * the prefix | ||
1900 | + * @param filter | ||
1901 | + * the filter | ||
1751 | */ | 1902 | */ |
1752 | private void addAttribute(MtasParserMappingToken mappingToken, String type, | 1903 | private void addAttribute(MtasParserMappingToken mappingToken, String type, |
1753 | String name, String prefix, String filter) { | 1904 | String name, String prefix, String filter) { |
@@ -1769,10 +1920,14 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1769,10 +1920,14 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1769 | /** | 1920 | /** |
1770 | * Condition attribute. | 1921 | * Condition attribute. |
1771 | * | 1922 | * |
1772 | - * @param name the name | ||
1773 | - * @param condition the condition | ||
1774 | - * @param filter the filter | ||
1775 | - * @param not the not | 1923 | + * @param name |
1924 | + * the name | ||
1925 | + * @param condition | ||
1926 | + * the condition | ||
1927 | + * @param filter | ||
1928 | + * the filter | ||
1929 | + * @param not | ||
1930 | + * the not | ||
1776 | */ | 1931 | */ |
1777 | private void conditionAttribute(String name, String condition, | 1932 | private void conditionAttribute(String name, String condition, |
1778 | String filter, String not) { | 1933 | String filter, String not) { |
@@ -1791,9 +1946,12 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1791,9 +1946,12 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1791 | /** | 1946 | /** |
1792 | * Payload attribute. | 1947 | * Payload attribute. |
1793 | * | 1948 | * |
1794 | - * @param mappingToken the mapping token | ||
1795 | - * @param name the name | ||
1796 | - * @param filter the filter | 1949 | + * @param mappingToken |
1950 | + * the mapping token | ||
1951 | + * @param name | ||
1952 | + * the name | ||
1953 | + * @param filter | ||
1954 | + * the filter | ||
1797 | */ | 1955 | */ |
1798 | private void payloadAttribute(MtasParserMappingToken mappingToken, | 1956 | private void payloadAttribute(MtasParserMappingToken mappingToken, |
1799 | String name, String filter) { | 1957 | String name, String filter) { |
@@ -1808,8 +1966,10 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1808,8 +1966,10 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1808 | /** | 1966 | /** |
1809 | * Condition ancestor. | 1967 | * Condition ancestor. |
1810 | * | 1968 | * |
1811 | - * @param ancestorType the ancestor type | ||
1812 | - * @param number the number | 1969 | + * @param ancestorType |
1970 | + * the ancestor type | ||
1971 | + * @param number | ||
1972 | + * the number | ||
1813 | */ | 1973 | */ |
1814 | public void conditionAncestor(String ancestorType, String number) { | 1974 | public void conditionAncestor(String ancestorType, String number) { |
1815 | if (ancestorType.equals(SOURCE_ANCESTOR_GROUP) | 1975 | if (ancestorType.equals(SOURCE_ANCESTOR_GROUP) |
@@ -1829,12 +1989,18 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1829,12 +1989,18 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1829 | /** | 1989 | /** |
1830 | * Adds the ancestor name. | 1990 | * Adds the ancestor name. |
1831 | * | 1991 | * |
1832 | - * @param ancestorType the ancestor type | ||
1833 | - * @param mappingToken the mapping token | ||
1834 | - * @param type the type | ||
1835 | - * @param distance the distance | ||
1836 | - * @param prefix the prefix | ||
1837 | - * @param filter the filter | 1992 | + * @param ancestorType |
1993 | + * the ancestor type | ||
1994 | + * @param mappingToken | ||
1995 | + * the mapping token | ||
1996 | + * @param type | ||
1997 | + * the type | ||
1998 | + * @param distance | ||
1999 | + * the distance | ||
2000 | + * @param prefix | ||
2001 | + * the prefix | ||
2002 | + * @param filter | ||
2003 | + * the filter | ||
1838 | */ | 2004 | */ |
1839 | private void addAncestorName(String ancestorType, | 2005 | private void addAncestorName(String ancestorType, |
1840 | MtasParserMappingToken mappingToken, String type, String distance, | 2006 | MtasParserMappingToken mappingToken, String type, String distance, |
@@ -1862,11 +2028,16 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1862,11 +2028,16 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1862 | /** | 2028 | /** |
1863 | * Condition ancestor name. | 2029 | * Condition ancestor name. |
1864 | * | 2030 | * |
1865 | - * @param ancestorType the ancestor type | ||
1866 | - * @param distance the distance | ||
1867 | - * @param condition the condition | ||
1868 | - * @param filter the filter | ||
1869 | - * @param not the not | 2031 | + * @param ancestorType |
2032 | + * the ancestor type | ||
2033 | + * @param distance | ||
2034 | + * the distance | ||
2035 | + * @param condition | ||
2036 | + * the condition | ||
2037 | + * @param filter | ||
2038 | + * the filter | ||
2039 | + * @param not | ||
2040 | + * the not | ||
1870 | */ | 2041 | */ |
1871 | public void conditionAncestorName(String ancestorType, String distance, | 2042 | public void conditionAncestorName(String ancestorType, String distance, |
1872 | String condition, String filter, String not) { | 2043 | String condition, String filter, String not) { |
@@ -1890,13 +2061,20 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1890,13 +2061,20 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1890 | /** | 2061 | /** |
1891 | * Adds the ancestor attribute. | 2062 | * Adds the ancestor attribute. |
1892 | * | 2063 | * |
1893 | - * @param ancestorType the ancestor type | ||
1894 | - * @param mappingToken the mapping token | ||
1895 | - * @param type the type | ||
1896 | - * @param distance the distance | ||
1897 | - * @param name the name | ||
1898 | - * @param prefix the prefix | ||
1899 | - * @param filter the filter | 2064 | + * @param ancestorType |
2065 | + * the ancestor type | ||
2066 | + * @param mappingToken | ||
2067 | + * the mapping token | ||
2068 | + * @param type | ||
2069 | + * the type | ||
2070 | + * @param distance | ||
2071 | + * the distance | ||
2072 | + * @param name | ||
2073 | + * the name | ||
2074 | + * @param prefix | ||
2075 | + * the prefix | ||
2076 | + * @param filter | ||
2077 | + * the filter | ||
1900 | */ | 2078 | */ |
1901 | public void addAncestorAttribute(String ancestorType, | 2079 | public void addAncestorAttribute(String ancestorType, |
1902 | MtasParserMappingToken mappingToken, String type, String distance, | 2080 | MtasParserMappingToken mappingToken, String type, String distance, |
@@ -1927,12 +2105,18 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1927,12 +2105,18 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1927 | /** | 2105 | /** |
1928 | * Condition ancestor attribute. | 2106 | * Condition ancestor attribute. |
1929 | * | 2107 | * |
1930 | - * @param ancestorType the ancestor type | ||
1931 | - * @param distance the distance | ||
1932 | - * @param name the name | ||
1933 | - * @param condition the condition | ||
1934 | - * @param filter the filter | ||
1935 | - * @param not the not | 2108 | + * @param ancestorType |
2109 | + * the ancestor type | ||
2110 | + * @param distance | ||
2111 | + * the distance | ||
2112 | + * @param name | ||
2113 | + * the name | ||
2114 | + * @param condition | ||
2115 | + * the condition | ||
2116 | + * @param filter | ||
2117 | + * the filter | ||
2118 | + * @param not | ||
2119 | + * the not | ||
1936 | */ | 2120 | */ |
1937 | public void conditionAncestorAttribute(String ancestorType, String distance, | 2121 | public void conditionAncestorAttribute(String ancestorType, String distance, |
1938 | String name, String condition, String filter, String not) { | 2122 | String name, String condition, String filter, String not) { |
@@ -1959,11 +2143,16 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1959,11 +2143,16 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1959 | /** | 2143 | /** |
1960 | * Payload ancestor attribute. | 2144 | * Payload ancestor attribute. |
1961 | * | 2145 | * |
1962 | - * @param mappingToken the mapping token | ||
1963 | - * @param ancestorType the ancestor type | ||
1964 | - * @param distance the distance | ||
1965 | - * @param name the name | ||
1966 | - * @param filter the filter | 2146 | + * @param mappingToken |
2147 | + * the mapping token | ||
2148 | + * @param ancestorType | ||
2149 | + * the ancestor type | ||
2150 | + * @param distance | ||
2151 | + * the distance | ||
2152 | + * @param name | ||
2153 | + * the name | ||
2154 | + * @param filter | ||
2155 | + * the filter | ||
1967 | */ | 2156 | */ |
1968 | private void payloadAncestorAttribute(MtasParserMappingToken mappingToken, | 2157 | private void payloadAncestorAttribute(MtasParserMappingToken mappingToken, |
1969 | String ancestorType, String distance, String name, String filter) { | 2158 | String ancestorType, String distance, String name, String filter) { |
@@ -1988,9 +2177,11 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -1988,9 +2177,11 @@ abstract public class MtasBasicParser extends MtasParser { | ||
1988 | /** | 2177 | /** |
1989 | * Compute ancestor source type. | 2178 | * Compute ancestor source type. |
1990 | * | 2179 | * |
1991 | - * @param type the type | 2180 | + * @param type |
2181 | + * the type | ||
1992 | * @return the string | 2182 | * @return the string |
1993 | - * @throws MtasConfigException the mtas config exception | 2183 | + * @throws MtasConfigException |
2184 | + * the mtas config exception | ||
1994 | */ | 2185 | */ |
1995 | private String computeAncestorSourceType(String type) | 2186 | private String computeAncestorSourceType(String type) |
1996 | throws MtasConfigException { | 2187 | throws MtasConfigException { |
@@ -2014,7 +2205,8 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -2014,7 +2205,8 @@ abstract public class MtasBasicParser extends MtasParser { | ||
2014 | /** | 2205 | /** |
2015 | * Compute distance. | 2206 | * Compute distance. |
2016 | * | 2207 | * |
2017 | - * @param distance the distance | 2208 | + * @param distance |
2209 | + * the distance | ||
2018 | * @return the string | 2210 | * @return the string |
2019 | */ | 2211 | */ |
2020 | private String computeDistance(String distance) { | 2212 | private String computeDistance(String distance) { |
@@ -2033,7 +2225,8 @@ abstract public class MtasBasicParser extends MtasParser { | @@ -2033,7 +2225,8 @@ abstract public class MtasBasicParser extends MtasParser { | ||
2033 | /** | 2225 | /** |
2034 | * Compute number. | 2226 | * Compute number. |
2035 | * | 2227 | * |
2036 | - * @param number the number | 2228 | + * @param number |
2229 | + * the number | ||
2037 | * @return the string | 2230 | * @return the string |
2038 | */ | 2231 | */ |
2039 | private String computeNumber(String number) { | 2232 | private String computeNumber(String number) { |
src/mtas/analysis/parser/MtasCRMParser.java
0 โ 100644
1 | +package mtas.analysis.parser; | ||
2 | + | ||
3 | +import java.io.IOException; | ||
4 | +import java.io.Reader; | ||
5 | +import java.util.ArrayList; | ||
6 | +import java.util.Arrays; | ||
7 | +import java.util.Collection; | ||
8 | +import java.util.HashMap; | ||
9 | +import java.util.HashSet; | ||
10 | +import java.util.TreeSet; | ||
11 | +import java.util.Map.Entry; | ||
12 | +import java.util.concurrent.atomic.AtomicInteger; | ||
13 | +import java.util.regex.Matcher; | ||
14 | +import java.util.regex.Pattern; | ||
15 | + | ||
16 | +import mtas.analysis.token.MtasToken; | ||
17 | +import mtas.analysis.token.MtasTokenCollection; | ||
18 | +import mtas.analysis.util.MtasBufferedReader; | ||
19 | +import mtas.analysis.util.MtasConfigException; | ||
20 | +import mtas.analysis.util.MtasConfiguration; | ||
21 | +import mtas.analysis.util.MtasParserException; | ||
22 | + | ||
23 | +/** | ||
24 | + * The Class MtasCRMParser. | ||
25 | + */ | ||
26 | + | ||
27 | +public class MtasCRMParser extends MtasBasicParser { | ||
28 | + | ||
29 | + /** The word type. */ | ||
30 | + private MtasParserType wordType = null; | ||
31 | + | ||
32 | + /** The word annotation types. */ | ||
33 | + private HashMap<String, MtasParserType> wordAnnotationTypes = new HashMap<String, MtasParserType>(); | ||
34 | + | ||
35 | + /** The crm sentence types. */ | ||
36 | + private HashMap<String, MtasParserType> crmSentenceTypes = new HashMap<String, MtasParserType>(); | ||
37 | + | ||
38 | + /** The crm clause types. */ | ||
39 | + private HashMap<String, MtasParserType> crmClauseTypes = new HashMap<String, MtasParserType>(); | ||
40 | + | ||
41 | + /** The crm pair types. */ | ||
42 | + private HashMap<String, MtasParserType> crmPairTypes = new HashMap<String, MtasParserType>(); | ||
43 | + | ||
44 | + /** The functions. */ | ||
45 | + private HashMap<String, HashMap<String, MtasCRMParserFunction>> functions = new HashMap<String, HashMap<String, MtasCRMParserFunction>>(); | ||
46 | + | ||
47 | + /** The Constant MAPPING_TYPE_CRM_SENTENCE. */ | ||
48 | + protected final static String MAPPING_TYPE_CRM_SENTENCE = "crmSentence"; | ||
49 | + | ||
50 | + /** The Constant MAPPING_TYPE_CRM_CLAUSE. */ | ||
51 | + protected final static String MAPPING_TYPE_CRM_CLAUSE = "crmClause"; | ||
52 | + | ||
53 | + /** The Constant MAPPING_TYPE_CRM_PAIR. */ | ||
54 | + protected final static String MAPPING_TYPE_CRM_PAIR = "crmPair"; | ||
55 | + | ||
56 | + /** The history pair. */ | ||
57 | + private HashMap<String, HashMap<String, MtasParserObject>> historyPair = new HashMap<String, HashMap<String, MtasParserObject>>(); | ||
58 | + | ||
59 | + /** The pair pattern. */ | ||
60 | + Pattern pairPattern = Pattern.compile("^([b|e])([a-z])([0-9]+)$"); | ||
61 | + | ||
62 | + /** | ||
63 | + * Instantiates a new mtas crm parser. | ||
64 | + * | ||
65 | + * @param config | ||
66 | + * the config | ||
67 | + */ | ||
68 | + public MtasCRMParser(MtasConfiguration config) { | ||
69 | + super(config); | ||
70 | + try { | ||
71 | + initParser(); | ||
72 | + // System.out.print(printConfig()); | ||
73 | + } catch (MtasConfigException e) { | ||
74 | + e.printStackTrace(); | ||
75 | + } | ||
76 | + } | ||
77 | + | ||
78 | + /* | ||
79 | + * (non-Javadoc) | ||
80 | + * | ||
81 | + * @see mtas.analysis.parser.MtasParser#initParser() | ||
82 | + */ | ||
83 | + @SuppressWarnings("unchecked") | ||
84 | + @Override | ||
85 | + protected void initParser() throws MtasConfigException { | ||
86 | + super.initParser(); | ||
87 | + if (config != null) { | ||
88 | + // always word, no mappings | ||
89 | + wordType = new MtasParserType(MAPPING_TYPE_WORD, null, false); | ||
90 | + for (int i = 0; i < config.children.size(); i++) { | ||
91 | + MtasConfiguration current = config.children.get(i); | ||
92 | + if (current.name.equals("mappings")) { | ||
93 | + for (int j = 0; j < current.children.size(); j++) { | ||
94 | + if (current.children.get(j).name.equals("mapping")) { | ||
95 | + MtasConfiguration mapping = current.children.get(j); | ||
96 | + String typeMapping = mapping.attributes.get("type"); | ||
97 | + String nameMapping = mapping.attributes.get("name"); | ||
98 | + if ((typeMapping != null)) { | ||
99 | + if (typeMapping.equals(MAPPING_TYPE_WORD)) { | ||
100 | + MtasCRMParserMappingWordAnnotation m = new MtasCRMParserMappingWordAnnotation(); | ||
101 | + m.processConfig(mapping); | ||
102 | + wordType.addMapping(m); | ||
103 | + } else if (typeMapping.equals(MAPPING_TYPE_WORD_ANNOTATION) | ||
104 | + && (nameMapping != null)) { | ||
105 | + MtasCRMParserMappingWordAnnotation m = new MtasCRMParserMappingWordAnnotation(); | ||
106 | + m.processConfig(mapping); | ||
107 | + if (wordAnnotationTypes.containsKey(nameMapping)) { | ||
108 | + wordAnnotationTypes.get(nameMapping).addMapping(m); | ||
109 | + } else { | ||
110 | + MtasParserType t = new MtasParserType(typeMapping, | ||
111 | + nameMapping, false); | ||
112 | + t.addMapping(m); | ||
113 | + wordAnnotationTypes.put(nameMapping, t); | ||
114 | + } | ||
115 | + } else if (typeMapping.equals(MAPPING_TYPE_CRM_SENTENCE)) { | ||
116 | + MtasCRMParserMappingCRMSentence m = new MtasCRMParserMappingCRMSentence(); | ||
117 | + m.processConfig(mapping); | ||
118 | + if (crmSentenceTypes.containsKey(nameMapping)) { | ||
119 | + crmSentenceTypes.get(nameMapping).addMapping(m); | ||
120 | + } else { | ||
121 | + MtasParserType t = new MtasParserType(MAPPING_TYPE_GROUP, | ||
122 | + nameMapping, true); | ||
123 | + t.addMapping(m); | ||
124 | + crmSentenceTypes.put(nameMapping, t); | ||
125 | + } | ||
126 | + } else if (typeMapping.equals(MAPPING_TYPE_CRM_CLAUSE)) { | ||
127 | + MtasCRMParserMappingCRMSentence m = new MtasCRMParserMappingCRMSentence(); | ||
128 | + m.processConfig(mapping); | ||
129 | + if (crmClauseTypes.containsKey(nameMapping)) { | ||
130 | + crmClauseTypes.get(nameMapping).addMapping(m); | ||
131 | + } else { | ||
132 | + MtasParserType t = new MtasParserType(MAPPING_TYPE_GROUP, | ||
133 | + nameMapping, true); | ||
134 | + t.addMapping(m); | ||
135 | + crmClauseTypes.put(nameMapping, t); | ||
136 | + } | ||
137 | + } else if (typeMapping.equals(MAPPING_TYPE_CRM_PAIR)) { | ||
138 | + MtasCRMParserMappingCRMPair m = new MtasCRMParserMappingCRMPair(); | ||
139 | + m.processConfig(mapping); | ||
140 | + if (crmPairTypes.containsKey(nameMapping)) { | ||
141 | + crmPairTypes.get(nameMapping).addMapping(m); | ||
142 | + } else { | ||
143 | + MtasParserType t = new MtasParserType(MAPPING_TYPE_RELATION, | ||
144 | + nameMapping, true); | ||
145 | + t.addMapping(m); | ||
146 | + crmPairTypes.put(nameMapping, t); | ||
147 | + } | ||
148 | + } else { | ||
149 | + throw new MtasConfigException("unknown mapping type " | ||
150 | + + typeMapping + " or missing name"); | ||
151 | + } | ||
152 | + } | ||
153 | + } | ||
154 | + } | ||
155 | + } else if (current.name.equals("functions")) { | ||
156 | + for (int j = 0; j < current.children.size(); j++) { | ||
157 | + if (current.children.get(j).name.equals("function")) { | ||
158 | + MtasConfiguration function = current.children.get(j); | ||
159 | + String nameFunction = function.attributes.get("name"); | ||
160 | + String typeFunction = function.attributes.get("type"); | ||
161 | + String splitFunction = function.attributes.get("split"); | ||
162 | + if (nameFunction != null && typeFunction != null) { | ||
163 | + MtasCRMParserFunction mtasCRMParserFunction = new MtasCRMParserFunction( | ||
164 | + typeFunction, splitFunction); | ||
165 | + if (!functions.containsKey(typeFunction)) { | ||
166 | + functions.put(typeFunction, | ||
167 | + new HashMap<String, MtasCRMParserFunction>()); | ||
168 | + } | ||
169 | + functions.get(typeFunction).put(nameFunction, | ||
170 | + mtasCRMParserFunction); | ||
171 | + MtasConfiguration subCurrent = current.children.get(j); | ||
172 | + for (int k = 0; k < subCurrent.children.size(); k++) { | ||
173 | + if (subCurrent.children.get(k).name.equals("condition")) { | ||
174 | + MtasConfiguration subSubCurrent = subCurrent.children | ||
175 | + .get(k); | ||
176 | + if (subSubCurrent.attributes.containsKey("value")) { | ||
177 | + String[] valuesCondition = subSubCurrent.attributes | ||
178 | + .get("value").split(Pattern.quote(",")); | ||
179 | + ArrayList<MtasCRMParserFunctionOutput> valueOutputList = new ArrayList<MtasCRMParserFunctionOutput>(); | ||
180 | + for (int l = 0; l < subSubCurrent.children.size(); l++) { | ||
181 | + if (subSubCurrent.children.get(l).name | ||
182 | + .equals("output")) { | ||
183 | + String valueOutput = subSubCurrent.children | ||
184 | + .get(l).attributes.get("value"); | ||
185 | + String nameOutput = subSubCurrent.children | ||
186 | + .get(l).attributes.get("name"); | ||
187 | + if (nameOutput != null) { | ||
188 | + MtasCRMParserFunctionOutput o = new MtasCRMParserFunctionOutput( | ||
189 | + nameOutput, valueOutput); | ||
190 | + valueOutputList.add(o); | ||
191 | + } | ||
192 | + } | ||
193 | + } | ||
194 | + if (valueOutputList.size() > 0) { | ||
195 | + for (String valueCondition : valuesCondition) { | ||
196 | + if (mtasCRMParserFunction.output | ||
197 | + .containsKey(valueCondition)) { | ||
198 | + mtasCRMParserFunction.output.get(valueCondition) | ||
199 | + .addAll( | ||
200 | + (Collection<? extends MtasCRMParserFunctionOutput>) valueOutputList | ||
201 | + .clone()); | ||
202 | + } else { | ||
203 | + mtasCRMParserFunction.output.put(valueCondition, | ||
204 | + (ArrayList<MtasCRMParserFunctionOutput>) valueOutputList | ||
205 | + .clone()); | ||
206 | + } | ||
207 | + } | ||
208 | + } | ||
209 | + } | ||
210 | + } | ||
211 | + } | ||
212 | + } | ||
213 | + } | ||
214 | + } | ||
215 | + } | ||
216 | + } | ||
217 | + } | ||
218 | + } | ||
219 | + | ||
220 | + /* | ||
221 | + * (non-Javadoc) | ||
222 | + * | ||
223 | + * @see mtas.analysis.parser.MtasParser#createTokenCollection(java.io.Reader) | ||
224 | + */ | ||
225 | + @Override | ||
226 | + public MtasTokenCollection createTokenCollection(Reader reader) | ||
227 | + throws MtasParserException, MtasConfigException { | ||
228 | + AtomicInteger position = new AtomicInteger(0); | ||
229 | + Integer unknownAncestors = 0; | ||
230 | + | ||
231 | + HashMap<String, TreeSet<Integer>> idPositions = new HashMap<String, TreeSet<Integer>>(); | ||
232 | + HashMap<String, Integer[]> idOffsets = new HashMap<String, Integer[]>(); | ||
233 | + | ||
234 | + HashMap<String, HashMap<Integer, HashSet<String>>> updateList = new HashMap<String, HashMap<Integer, HashSet<String>>>(); | ||
235 | + updateList.put(UPDATE_TYPE_OFFSET, new HashMap<Integer, HashSet<String>>()); | ||
236 | + updateList.put(UPDATE_TYPE_POSITION, | ||
237 | + new HashMap<Integer, HashSet<String>>()); | ||
238 | + | ||
239 | + HashMap<String, ArrayList<MtasParserObject>> currentList = new HashMap<String, ArrayList<MtasParserObject>>(); | ||
240 | + currentList.put(MAPPING_TYPE_RELATION, new ArrayList<MtasParserObject>()); | ||
241 | + currentList.put(MAPPING_TYPE_RELATION_ANNOTATION, | ||
242 | + new ArrayList<MtasParserObject>()); | ||
243 | + currentList.put(MAPPING_TYPE_REF, new ArrayList<MtasParserObject>()); | ||
244 | + currentList.put(MAPPING_TYPE_GROUP, new ArrayList<MtasParserObject>()); | ||
245 | + currentList.put(MAPPING_TYPE_GROUP_ANNOTATION, | ||
246 | + new ArrayList<MtasParserObject>()); | ||
247 | + currentList.put(MAPPING_TYPE_WORD, new ArrayList<MtasParserObject>()); | ||
248 | + currentList.put(MAPPING_TYPE_WORD_ANNOTATION, | ||
249 | + new ArrayList<MtasParserObject>()); | ||
250 | + | ||
251 | + tokenCollection = new MtasTokenCollection(); | ||
252 | + MtasToken.resetId(); | ||
253 | + try (MtasBufferedReader br = new MtasBufferedReader(reader)) { | ||
254 | + String line; | ||
255 | + int currentOffset, previousOffset = br.getPosition(); | ||
256 | + MtasParserObject currentObject; | ||
257 | + Pattern headerPattern = Pattern.compile("^@ @ @(.*)$"); | ||
258 | + Pattern regularPattern = Pattern.compile( | ||
259 | + "^([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+)$"); | ||
260 | + Matcher matcherHeader, matcherRegular = null; | ||
261 | + HashSet<MtasParserObject> newPreviousSentence = new HashSet<MtasParserObject>(), | ||
262 | + previousSentence = new HashSet<MtasParserObject>(); | ||
263 | + HashSet<MtasParserObject> newPreviousClause = new HashSet<MtasParserObject>(), | ||
264 | + previousClause = new HashSet<MtasParserObject>(); | ||
265 | + while ((line = br.readLine()) != null) { | ||
266 | + currentOffset = br.getPosition(); | ||
267 | + matcherHeader = headerPattern.matcher(line.trim()); | ||
268 | + matcherRegular = regularPattern.matcher(line.trim()); | ||
269 | + if (matcherRegular.matches()) { | ||
270 | + newPreviousSentence.clear(); | ||
271 | + for (int i = 4; i < 8; i++) { | ||
272 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>(); | ||
273 | + HashSet<MtasParserObject> tmpList = processCRMSentence( | ||
274 | + String.valueOf(i), matcherRegular.group((i + 1)), currentOffset, | ||
275 | + functionOutputList, unknownAncestors, currentList, updateList, | ||
276 | + idPositions, idOffsets, previousSentence, previousClause); | ||
277 | + if (tmpList != null) { | ||
278 | + newPreviousSentence.addAll(tmpList); | ||
279 | + } | ||
280 | + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) { | ||
281 | + tmpList = processCRMSentence(functionOutput.name, | ||
282 | + functionOutput.value, currentOffset, functionOutputList, | ||
283 | + unknownAncestors, currentList, updateList, idPositions, | ||
284 | + idOffsets, previousSentence, previousClause); | ||
285 | + if (tmpList != null) { | ||
286 | + newPreviousSentence.addAll(tmpList); | ||
287 | + } | ||
288 | + } | ||
289 | + } | ||
290 | + if (newPreviousSentence.size() > 0) { | ||
291 | + previousSentence.clear(); | ||
292 | + previousSentence.addAll(newPreviousSentence); | ||
293 | + } | ||
294 | + newPreviousClause.clear(); | ||
295 | + for (int i = 4; i < 8; i++) { | ||
296 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>(); | ||
297 | + HashSet<MtasParserObject> tmpList = processCRMClause( | ||
298 | + String.valueOf(i), matcherRegular.group((i + 1)), currentOffset, | ||
299 | + functionOutputList, unknownAncestors, currentList, updateList, | ||
300 | + idPositions, idOffsets, previousClause); | ||
301 | + if (tmpList != null) { | ||
302 | + newPreviousClause.addAll(tmpList); | ||
303 | + } | ||
304 | + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) { | ||
305 | + tmpList = processCRMClause(functionOutput.name, | ||
306 | + functionOutput.value, currentOffset, functionOutputList, | ||
307 | + unknownAncestors, currentList, updateList, idPositions, | ||
308 | + idOffsets, previousClause); | ||
309 | + if (tmpList != null) { | ||
310 | + newPreviousClause.addAll(tmpList); | ||
311 | + } | ||
312 | + } | ||
313 | + } | ||
314 | + if (newPreviousClause.size() > 0) { | ||
315 | + previousClause.clear(); | ||
316 | + previousClause.addAll(newPreviousClause); | ||
317 | + } | ||
318 | + } | ||
319 | + | ||
320 | + if (matcherRegular.matches() && !matcherHeader.matches()) { | ||
321 | + matcherRegular = regularPattern.matcher(line.trim()); | ||
322 | + if (matcherRegular.matches()) { | ||
323 | + // regular line - start word | ||
324 | + currentObject = new MtasParserObject(wordType); | ||
325 | + currentObject.setOffsetStart(previousOffset); | ||
326 | + currentObject.setRealOffsetStart(previousOffset); | ||
327 | + currentObject.setUnknownAncestorNumber(unknownAncestors); | ||
328 | + if (!prevalidateObject(currentObject, currentList)) { | ||
329 | + unknownAncestors++; | ||
330 | + } else { | ||
331 | + int p = position.getAndIncrement(); | ||
332 | + currentObject.addPosition(p); | ||
333 | + currentObject.objectId = "word_" + String.valueOf(p); | ||
334 | + currentList.get(MAPPING_TYPE_WORD).add(currentObject); | ||
335 | + unknownAncestors = 0; | ||
336 | + // check for crmPair | ||
337 | + for (int i = 0; i < 8; i++) { | ||
338 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>(); | ||
339 | + processCRMPair(p, String.valueOf(i), | ||
340 | + matcherRegular.group((i + 1)), currentOffset, | ||
341 | + functionOutputList, unknownAncestors, currentList, | ||
342 | + updateList, idPositions, idOffsets); | ||
343 | + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) { | ||
344 | + processCRMPair(p, functionOutput.name, functionOutput.value, | ||
345 | + currentOffset, functionOutputList, unknownAncestors, | ||
346 | + currentList, updateList, idPositions, idOffsets); | ||
347 | + } | ||
348 | + } | ||
349 | + // compute word annotations | ||
350 | + for (int i = 0; i < 8; i++) { | ||
351 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>(); | ||
352 | + functionOutputList.addAll(processWordAnnotation( | ||
353 | + String.valueOf(i), matcherRegular.group((i + 1)), | ||
354 | + previousOffset, currentOffset, unknownAncestors, | ||
355 | + currentList, updateList, idPositions, idOffsets)); | ||
356 | + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) { | ||
357 | + processWordAnnotation(functionOutput.name, | ||
358 | + functionOutput.value, previousOffset, currentOffset, | ||
359 | + unknownAncestors, currentList, updateList, idPositions, | ||
360 | + idOffsets); | ||
361 | + } | ||
362 | + } | ||
363 | + } | ||
364 | + // finish word | ||
365 | + if (unknownAncestors > 0) { | ||
366 | + unknownAncestors--; | ||
367 | + } else { | ||
368 | + currentObject = currentList.get(MAPPING_TYPE_WORD) | ||
369 | + .remove(currentList.get(MAPPING_TYPE_WORD).size() - 1); | ||
370 | + assert unknownAncestors == 0 : "error in administration " | ||
371 | + + currentObject.getType().getName(); | ||
372 | + currentObject.setText(null); | ||
373 | + currentObject.setOffsetEnd(currentOffset - 1); | ||
374 | + currentObject.setRealOffsetEnd(currentOffset - 1); | ||
375 | + // update ancestor groups with position and offset | ||
376 | + for (MtasParserObject currentGroup : currentList | ||
377 | + .get(MAPPING_TYPE_GROUP)) { | ||
378 | + currentGroup.addPositions(currentObject.getPositions()); | ||
379 | + currentGroup.addOffsetStart(currentObject.getOffsetStart()); | ||
380 | + currentGroup.addOffsetEnd(currentObject.getOffsetEnd()); | ||
381 | + } | ||
382 | + idPositions.put(currentObject.getId(), | ||
383 | + currentObject.getPositions()); | ||
384 | + idOffsets.put(currentObject.getId(), currentObject.getOffset()); | ||
385 | + currentObject.updateMappings(idPositions, idOffsets); | ||
386 | + unknownAncestors = currentObject.getUnknownAncestorNumber(); | ||
387 | + computeMappingsFromObject(currentObject, currentList, updateList); | ||
388 | + } | ||
389 | + | ||
390 | + } else { | ||
391 | + // System.out.println("PROBLEM: " + line); | ||
392 | + } | ||
393 | + } | ||
394 | + previousOffset = br.getPosition(); | ||
395 | + } | ||
396 | + closePrevious(previousSentence, previousOffset, unknownAncestors, | ||
397 | + currentList, updateList, idPositions, idOffsets); | ||
398 | + closePrevious(previousClause, previousOffset, unknownAncestors, | ||
399 | + currentList, updateList, idPositions, idOffsets); | ||
400 | + } catch (IOException e) { | ||
401 | + throw new MtasParserException(e.getMessage()); | ||
402 | + } | ||
403 | + // final check | ||
404 | + tokenCollection.check(autorepair, makeunique); | ||
405 | + return tokenCollection; | ||
406 | + | ||
407 | + } | ||
408 | + | ||
409 | + /** | ||
410 | + * Process word annotation. | ||
411 | + * | ||
412 | + * @param name | ||
413 | + * the name | ||
414 | + * @param text | ||
415 | + * the text | ||
416 | + * @param previousOffset | ||
417 | + * the previous offset | ||
418 | + * @param currentOffset | ||
419 | + * the current offset | ||
420 | + * @param unknownAncestors | ||
421 | + * the unknown ancestors | ||
422 | + * @param currentList | ||
423 | + * the current list | ||
424 | + * @param updateList | ||
425 | + * the update list | ||
426 | + * @param idPositions | ||
427 | + * the id positions | ||
428 | + * @param idOffsets | ||
429 | + * the id offsets | ||
430 | + * @return the array list | ||
431 | + * @throws MtasParserException | ||
432 | + * the mtas parser exception | ||
433 | + * @throws MtasConfigException | ||
434 | + * the mtas config exception | ||
435 | + */ | ||
436 | + private ArrayList<MtasCRMParserFunctionOutput> processWordAnnotation( | ||
437 | + String name, String text, Integer previousOffset, Integer currentOffset, | ||
438 | + Integer unknownAncestors, | ||
439 | + HashMap<String, ArrayList<MtasParserObject>> currentList, | ||
440 | + HashMap<String, HashMap<Integer, HashSet<String>>> updateList, | ||
441 | + HashMap<String, TreeSet<Integer>> idPositions, | ||
442 | + HashMap<String, Integer[]> idOffsets) | ||
443 | + throws MtasParserException, MtasConfigException { | ||
444 | + MtasParserType tmpCurrentType; | ||
445 | + MtasParserObject currentObject; | ||
446 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>(); | ||
447 | + if ((tmpCurrentType = wordAnnotationTypes.get(name)) != null) { | ||
448 | + // start word annotation | ||
449 | + currentObject = new MtasParserObject(tmpCurrentType); | ||
450 | + currentObject.setRealOffsetStart(previousOffset); | ||
451 | + currentObject.addPositions(currentList.get(MAPPING_TYPE_WORD) | ||
452 | + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1)).getPositions()); | ||
453 | + currentObject.setUnknownAncestorNumber(unknownAncestors); | ||
454 | + if (!prevalidateObject(currentObject, currentList)) { | ||
455 | + unknownAncestors++; | ||
456 | + } else { | ||
457 | + currentList.get(MAPPING_TYPE_WORD_ANNOTATION).add(currentObject); | ||
458 | + unknownAncestors = 0; | ||
459 | + } | ||
460 | + // finish word annotation | ||
461 | + if (unknownAncestors > 0) { | ||
462 | + unknownAncestors--; | ||
463 | + } else { | ||
464 | + currentObject = currentList.get(MAPPING_TYPE_WORD_ANNOTATION) | ||
465 | + .remove(currentList.get(MAPPING_TYPE_WORD_ANNOTATION).size() - 1); | ||
466 | + assert unknownAncestors == 0 : "error in administration " | ||
467 | + + currentObject.getType().getName(); | ||
468 | + if (functions.containsKey(MAPPING_TYPE_WORD_ANNOTATION) | ||
469 | + && functions.get(MAPPING_TYPE_WORD_ANNOTATION).containsKey(name) | ||
470 | + && text != null) { | ||
471 | + MtasCRMParserFunction function = functions | ||
472 | + .get(MAPPING_TYPE_WORD_ANNOTATION).get(name); | ||
473 | + String[] value; | ||
474 | + if (function.split != null) { | ||
475 | + value = text.split(Pattern.quote(function.split)); | ||
476 | + } else { | ||
477 | + value = new String[] { text }; | ||
478 | + } | ||
479 | + for (int c = 0; c < value.length; c++) { | ||
480 | + if (function.output.containsKey(value[c])) { | ||
481 | + functionOutputList.addAll(function.output.get(value[c])); | ||
482 | + } | ||
483 | + } | ||
484 | + } | ||
485 | + currentObject.setText(text); | ||
486 | + currentObject.setRealOffsetEnd(currentOffset - 1); | ||
487 | + idPositions.put(currentObject.getId(), currentObject.getPositions()); | ||
488 | + idOffsets.put(currentObject.getId(), currentObject.getOffset()); | ||
489 | + // offset always null, so update later with word (should be possible) | ||
490 | + if ((currentObject.getId() != null) | ||
491 | + && (currentList.get(MAPPING_TYPE_WORD).size() > 0)) { | ||
492 | + currentList.get(MAPPING_TYPE_WORD) | ||
493 | + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1)) | ||
494 | + .addUpdateableIdWithOffset(currentObject.getId()); | ||
495 | + } | ||
496 | + currentObject.updateMappings(idPositions, idOffsets); | ||
497 | + unknownAncestors = currentObject.getUnknownAncestorNumber(); | ||
498 | + computeMappingsFromObject(currentObject, currentList, updateList); | ||
499 | + } | ||
500 | + } | ||
501 | + return functionOutputList; | ||
502 | + } | ||
503 | + | ||
504 | + /** | ||
505 | + * Process crm sentence. | ||
506 | + * | ||
507 | + * @param name | ||
508 | + * the name | ||
509 | + * @param text | ||
510 | + * the text | ||
511 | + * @param currentOffset | ||
512 | + * the current offset | ||
513 | + * @param functionOutputList | ||
514 | + * the function output list | ||
515 | + * @param unknownAncestors | ||
516 | + * the unknown ancestors | ||
517 | + * @param currentList | ||
518 | + * the current list | ||
519 | + * @param updateList | ||
520 | + * the update list | ||
521 | + * @param idPositions | ||
522 | + * the id positions | ||
523 | + * @param idOffsets | ||
524 | + * the id offsets | ||
525 | + * @param previous | ||
526 | + * the previous | ||
527 | + * @param previousClause | ||
528 | + * the previous clause | ||
529 | + * @return the hash set | ||
530 | + * @throws MtasParserException | ||
531 | + * the mtas parser exception | ||
532 | + * @throws MtasConfigException | ||
533 | + * the mtas config exception | ||
534 | + */ | ||
535 | + private HashSet<MtasParserObject> processCRMSentence(String name, String text, | ||
536 | + Integer currentOffset, | ||
537 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList, | ||
538 | + Integer unknownAncestors, | ||
539 | + HashMap<String, ArrayList<MtasParserObject>> currentList, | ||
540 | + HashMap<String, HashMap<Integer, HashSet<String>>> updateList, | ||
541 | + HashMap<String, TreeSet<Integer>> idPositions, | ||
542 | + HashMap<String, Integer[]> idOffsets, HashSet<MtasParserObject> previous, | ||
543 | + HashSet<MtasParserObject> previousClause) | ||
544 | + throws MtasParserException, MtasConfigException { | ||
545 | + MtasParserType tmpCurrentType; | ||
546 | + MtasParserObject currentObject; | ||
547 | + if ((tmpCurrentType = crmSentenceTypes.get(name)) != null) { | ||
548 | + currentObject = new MtasParserObject(tmpCurrentType); | ||
549 | + currentObject.setUnknownAncestorNumber(unknownAncestors); | ||
550 | + currentObject.setRealOffsetStart(currentOffset); | ||
551 | + currentObject.setText(text); | ||
552 | + if (!prevalidateObject(currentObject, currentList)) { | ||
553 | + return null; | ||
554 | + } else { | ||
555 | + closePrevious(previousClause, currentOffset, unknownAncestors, | ||
556 | + currentList, updateList, idPositions, idOffsets); | ||
557 | + closePrevious(previous, currentOffset, unknownAncestors, currentList, | ||
558 | + updateList, idPositions, idOffsets); | ||
559 | + previous.clear(); | ||
560 | + currentList.get(MAPPING_TYPE_GROUP).add(currentObject); | ||
561 | + unknownAncestors = 0; | ||
562 | + return new HashSet<MtasParserObject>(Arrays.asList(currentObject)); | ||
563 | + } | ||
564 | + } | ||
565 | + return null; | ||
566 | + } | ||
567 | + | ||
568 | + /** | ||
569 | + * Process crm clause. | ||
570 | + * | ||
571 | + * @param name | ||
572 | + * the name | ||
573 | + * @param text | ||
574 | + * the text | ||
575 | + * @param currentOffset | ||
576 | + * the current offset | ||
577 | + * @param functionOutputList | ||
578 | + * the function output list | ||
579 | + * @param unknownAncestors | ||
580 | + * the unknown ancestors | ||
581 | + * @param currentList | ||
582 | + * the current list | ||
583 | + * @param updateList | ||
584 | + * the update list | ||
585 | + * @param idPositions | ||
586 | + * the id positions | ||
587 | + * @param idOffsets | ||
588 | + * the id offsets | ||
589 | + * @param previous | ||
590 | + * the previous | ||
591 | + * @return the hash set | ||
592 | + * @throws MtasParserException | ||
593 | + * the mtas parser exception | ||
594 | + * @throws MtasConfigException | ||
595 | + * the mtas config exception | ||
596 | + */ | ||
597 | + private HashSet<MtasParserObject> processCRMClause(String name, String text, | ||
598 | + Integer currentOffset, | ||
599 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList, | ||
600 | + Integer unknownAncestors, | ||
601 | + HashMap<String, ArrayList<MtasParserObject>> currentList, | ||
602 | + HashMap<String, HashMap<Integer, HashSet<String>>> updateList, | ||
603 | + HashMap<String, TreeSet<Integer>> idPositions, | ||
604 | + HashMap<String, Integer[]> idOffsets, HashSet<MtasParserObject> previous) | ||
605 | + throws MtasParserException, MtasConfigException { | ||
606 | + MtasParserType tmpCurrentType; | ||
607 | + MtasParserObject currentObject; | ||
608 | + if ((tmpCurrentType = crmClauseTypes.get(name)) != null) { | ||
609 | + currentObject = new MtasParserObject(tmpCurrentType); | ||
610 | + currentObject.setUnknownAncestorNumber(unknownAncestors); | ||
611 | + currentObject.setRealOffsetStart(currentOffset); | ||
612 | + currentObject.setText(text); | ||
613 | + if (!prevalidateObject(currentObject, currentList)) { | ||
614 | + return null; | ||
615 | + } else { | ||
616 | + closePrevious(previous, currentOffset, unknownAncestors, currentList, | ||
617 | + updateList, idPositions, idOffsets); | ||
618 | + previous.clear(); | ||
619 | + currentList.get(MAPPING_TYPE_GROUP).add(currentObject); | ||
620 | + unknownAncestors = 0; | ||
621 | + return new HashSet<MtasParserObject>(Arrays.asList(currentObject)); | ||
622 | + } | ||
623 | + } | ||
624 | + return null; | ||
625 | + } | ||
626 | + | ||
627 | + /** | ||
628 | + * Close previous. | ||
629 | + * | ||
630 | + * @param previous | ||
631 | + * the previous | ||
632 | + * @param currentOffset | ||
633 | + * the current offset | ||
634 | + * @param unknownAncestors | ||
635 | + * the unknown ancestors | ||
636 | + * @param currentList | ||
637 | + * the current list | ||
638 | + * @param updateList | ||
639 | + * the update list | ||
640 | + * @param idPositions | ||
641 | + * the id positions | ||
642 | + * @param idOffsets | ||
643 | + * the id offsets | ||
644 | + * @throws MtasParserException | ||
645 | + * the mtas parser exception | ||
646 | + * @throws MtasConfigException | ||
647 | + * the mtas config exception | ||
648 | + */ | ||
649 | + private void closePrevious(HashSet<MtasParserObject> previous, | ||
650 | + Integer currentOffset, Integer unknownAncestors, | ||
651 | + HashMap<String, ArrayList<MtasParserObject>> currentList, | ||
652 | + HashMap<String, HashMap<Integer, HashSet<String>>> updateList, | ||
653 | + HashMap<String, TreeSet<Integer>> idPositions, | ||
654 | + HashMap<String, Integer[]> idOffsets) | ||
655 | + throws MtasParserException, MtasConfigException { | ||
656 | + for (MtasParserObject previousObject : previous) { | ||
657 | + previousObject.setRealOffsetEnd(currentOffset); | ||
658 | + idPositions.put(previousObject.getId(), previousObject.getPositions()); | ||
659 | + idOffsets.put(previousObject.getId(), previousObject.getOffset()); | ||
660 | + previousObject.updateMappings(idPositions, idOffsets); | ||
661 | + unknownAncestors = previousObject.getUnknownAncestorNumber(); | ||
662 | + computeMappingsFromObject(previousObject, currentList, updateList); | ||
663 | + currentList.get(MAPPING_TYPE_GROUP).remove(previousObject); | ||
664 | + } | ||
665 | + } | ||
666 | + | ||
667 | + /** | ||
668 | + * Process crm pair. | ||
669 | + * | ||
670 | + * @param position | ||
671 | + * the position | ||
672 | + * @param name | ||
673 | + * the name | ||
674 | + * @param text | ||
675 | + * the text | ||
676 | + * @param currentOffset | ||
677 | + * the current offset | ||
678 | + * @param functionOutputList | ||
679 | + * the function output list | ||
680 | + * @param unknownAncestors | ||
681 | + * the unknown ancestors | ||
682 | + * @param currentList | ||
683 | + * the current list | ||
684 | + * @param updateList | ||
685 | + * the update list | ||
686 | + * @param idPositions | ||
687 | + * the id positions | ||
688 | + * @param idOffsets | ||
689 | + * the id offsets | ||
690 | + * @throws MtasParserException | ||
691 | + * the mtas parser exception | ||
692 | + * @throws MtasConfigException | ||
693 | + * the mtas config exception | ||
694 | + */ | ||
695 | + private void processCRMPair(int position, String name, String text, | ||
696 | + Integer currentOffset, | ||
697 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList, | ||
698 | + Integer unknownAncestors, | ||
699 | + HashMap<String, ArrayList<MtasParserObject>> currentList, | ||
700 | + HashMap<String, HashMap<Integer, HashSet<String>>> updateList, | ||
701 | + HashMap<String, TreeSet<Integer>> idPositions, | ||
702 | + HashMap<String, Integer[]> idOffsets) | ||
703 | + throws MtasParserException, MtasConfigException { | ||
704 | + | ||
705 | + MtasParserType tmpCurrentType; | ||
706 | + MtasParserObject currentObject; | ||
707 | + | ||
708 | + if ((tmpCurrentType = crmPairTypes.get(name)) != null) { | ||
709 | + if ((tmpCurrentType = crmPairTypes.get(name)) != null) { | ||
710 | + // get history | ||
711 | + HashMap<String, MtasParserObject> currentNamePairHistory; | ||
712 | + if (!historyPair.containsKey(name)) { | ||
713 | + currentNamePairHistory = new HashMap<String, MtasParserObject>(); | ||
714 | + historyPair.put(name, currentNamePairHistory); | ||
715 | + } else { | ||
716 | + currentNamePairHistory = historyPair.get(name); | ||
717 | + } | ||
718 | + Matcher m = pairPattern.matcher(text); | ||
719 | + if (m.find()) { | ||
720 | + String thisKey = m.group(1) + m.group(2); | ||
721 | + String otherKey = (m.group(1).equals("b") ? "e" : "b") + m.group(2); | ||
722 | + if (currentNamePairHistory.containsKey(otherKey)) { | ||
723 | + currentObject = currentNamePairHistory.remove(otherKey); | ||
724 | + currentObject.setText(currentObject.getText() + "+" + text); | ||
725 | + currentObject.addPosition(position); | ||
726 | + processFunctions(name, text, MAPPING_TYPE_CRM_PAIR, | ||
727 | + functionOutputList); | ||
728 | + currentObject.setRealOffsetEnd(currentOffset + 1); | ||
729 | + currentObject.setOffsetEnd(currentOffset + 1); | ||
730 | + idPositions.put(currentObject.getId(), | ||
731 | + currentObject.getPositions()); | ||
732 | + idOffsets.put(currentObject.getId(), currentObject.getOffset()); | ||
733 | + currentObject.updateMappings(idPositions, idOffsets); | ||
734 | + unknownAncestors = currentObject.getUnknownAncestorNumber(); | ||
735 | + computeMappingsFromObject(currentObject, currentList, updateList); | ||
736 | + } else { | ||
737 | + currentObject = new MtasParserObject(tmpCurrentType); | ||
738 | + currentObject.setUnknownAncestorNumber(unknownAncestors); | ||
739 | + currentObject.setRealOffsetStart(currentOffset); | ||
740 | + currentObject.setOffsetStart(currentOffset); | ||
741 | + currentObject.setText(text); | ||
742 | + currentObject.addPosition(position); | ||
743 | + if (!prevalidateObject(currentObject, currentList)) { | ||
744 | + unknownAncestors++; | ||
745 | + } else { | ||
746 | + currentNamePairHistory.put(thisKey, currentObject); | ||
747 | + processFunctions(name, text, MAPPING_TYPE_CRM_PAIR, | ||
748 | + functionOutputList); | ||
749 | + currentObject.setRealOffsetEnd(currentOffset + 1); | ||
750 | + currentObject.setOffsetEnd(currentOffset + 1); | ||
751 | + idPositions.put(currentObject.getId(), | ||
752 | + currentObject.getPositions()); | ||
753 | + idOffsets.put(currentObject.getId(), currentObject.getOffset()); | ||
754 | + // offset always null, so update later with word (should be | ||
755 | + // possible) | ||
756 | + if ((currentObject.getId() != null) | ||
757 | + && (currentList.get(MAPPING_TYPE_WORD).size() > 0)) { | ||
758 | + currentList.get(MAPPING_TYPE_WORD) | ||
759 | + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1)) | ||
760 | + .addUpdateableIdWithOffset(currentObject.getId()); | ||
761 | + } | ||
762 | + | ||
763 | + } | ||
764 | + } | ||
765 | + } | ||
766 | + } | ||
767 | + } | ||
768 | + | ||
769 | + } | ||
770 | + | ||
771 | + /** | ||
772 | + * Process functions. | ||
773 | + * | ||
774 | + * @param name | ||
775 | + * the name | ||
776 | + * @param text | ||
777 | + * the text | ||
778 | + * @param type | ||
779 | + * the type | ||
780 | + * @param functionOutputList | ||
781 | + * the function output list | ||
782 | + */ | ||
783 | + private void processFunctions(String name, String text, String type, | ||
784 | + ArrayList<MtasCRMParserFunctionOutput> functionOutputList) { | ||
785 | + if (functions.containsKey(type) && functions.get(type).containsKey(name) | ||
786 | + && text != null) { | ||
787 | + if (functions.get(type).containsKey(name)) { | ||
788 | + MtasCRMParserFunction function = functions.get(type).get(name); | ||
789 | + String[] value; | ||
790 | + if (function.split != null) { | ||
791 | + value = text.split(Pattern.quote(function.split)); | ||
792 | + } else { | ||
793 | + value = new String[] { text }; | ||
794 | + } | ||
795 | + for (int c = 0; c < value.length; c++) { | ||
796 | + boolean checkedEmpty = false; | ||
797 | + if (value[c].equals("")) { | ||
798 | + checkedEmpty = true; | ||
799 | + } | ||
800 | + if (function.output.containsKey(value[c])) { | ||
801 | + ArrayList<MtasCRMParserFunctionOutput> list = function.output | ||
802 | + .get(value[c]); | ||
803 | + for (MtasCRMParserFunctionOutput listItem : list) { | ||
804 | + functionOutputList.add(listItem.create(value[c])); | ||
805 | + } | ||
806 | + } | ||
807 | + if (!checkedEmpty && function.output.containsKey("")) { | ||
808 | + ArrayList<MtasCRMParserFunctionOutput> list = function.output | ||
809 | + .get(""); | ||
810 | + for (MtasCRMParserFunctionOutput listItem : list) { | ||
811 | + functionOutputList.add(listItem.create(value[c])); | ||
812 | + } | ||
813 | + } | ||
814 | + } | ||
815 | + } | ||
816 | + } | ||
817 | + } | ||
818 | + | ||
819 | + /* | ||
820 | + * (non-Javadoc) | ||
821 | + * | ||
822 | + * @see mtas.analysis.parser.MtasParser#printConfig() | ||
823 | + */ | ||
824 | + @Override | ||
825 | + public String printConfig() { | ||
826 | + String text = ""; | ||
827 | + text += "=== CONFIGURATION ===\n"; | ||
828 | + text += "type: " + wordAnnotationTypes.size() + " x wordAnnotation"; | ||
829 | + text += printConfigTypes(wordAnnotationTypes); | ||
830 | + text += "=== CONFIGURATION ===\n"; | ||
831 | + return text; | ||
832 | + } | ||
833 | + | ||
834 | + /** | ||
835 | + * Prints the config types. | ||
836 | + * | ||
837 | + * @param types | ||
838 | + * the types | ||
839 | + * @return the string | ||
840 | + */ | ||
841 | + private String printConfigTypes(HashMap<?, MtasParserType> types) { | ||
842 | + String text = ""; | ||
843 | + for (Entry<?, MtasParserType> entry : types.entrySet()) { | ||
844 | + text += "- " + entry.getKey() + ": " + entry.getValue().mappings.size() | ||
845 | + + " mapping(s)\n"; | ||
846 | + for (int i = 0; i < entry.getValue().mappings.size(); i++) { | ||
847 | + text += "\t" + entry.getValue().mappings.get(i) + "\n"; | ||
848 | + } | ||
849 | + } | ||
850 | + return text; | ||
851 | + } | ||
852 | + | ||
853 | + /** | ||
854 | + * The Class MtasCRMParserFunction. | ||
855 | + */ | ||
856 | + private class MtasCRMParserFunction { | ||
857 | + | ||
858 | + /** The split. */ | ||
859 | + public String split; | ||
860 | + | ||
861 | + /** The output. */ | ||
862 | + public HashMap<String, ArrayList<MtasCRMParserFunctionOutput>> output; | ||
863 | + | ||
864 | + /** | ||
865 | + * Instantiates a new mtas crm parser function. | ||
866 | + * | ||
867 | + * @param type | ||
868 | + * the type | ||
869 | + * @param split | ||
870 | + * the split | ||
871 | + */ | ||
872 | + public MtasCRMParserFunction(String type, String split) { | ||
873 | + this.split = split; | ||
874 | + output = new HashMap<String, ArrayList<MtasCRMParserFunctionOutput>>(); | ||
875 | + } | ||
876 | + | ||
877 | + } | ||
878 | + | ||
879 | + /** | ||
880 | + * The Class MtasCRMParserFunctionOutput. | ||
881 | + */ | ||
882 | + private class MtasCRMParserFunctionOutput { | ||
883 | + | ||
884 | + /** The name. */ | ||
885 | + public String name; | ||
886 | + | ||
887 | + /** The value. */ | ||
888 | + public String value; | ||
889 | + | ||
890 | + /** | ||
891 | + * Instantiates a new mtas crm parser function output. | ||
892 | + * | ||
893 | + * @param name | ||
894 | + * the name | ||
895 | + * @param value | ||
896 | + * the value | ||
897 | + */ | ||
898 | + public MtasCRMParserFunctionOutput(String name, String value) { | ||
899 | + this.name = name; | ||
900 | + this.value = value; | ||
901 | + } | ||
902 | + | ||
903 | + /** | ||
904 | + * Creates the. | ||
905 | + * | ||
906 | + * @param originalValue | ||
907 | + * the original value | ||
908 | + * @return the mtas crm parser function output | ||
909 | + */ | ||
910 | + public MtasCRMParserFunctionOutput create(String originalValue) { | ||
911 | + if (value != null) { | ||
912 | + return this; | ||
913 | + } else { | ||
914 | + return new MtasCRMParserFunctionOutput(name, originalValue); | ||
915 | + } | ||
916 | + } | ||
917 | + | ||
918 | + /* | ||
919 | + * (non-Javadoc) | ||
920 | + * | ||
921 | + * @see java.lang.Object#toString() | ||
922 | + */ | ||
923 | + @Override | ||
924 | + public String toString() { | ||
925 | + return "MtasCRMParserFunctionOutput[" + name + "," + value + "]"; | ||
926 | + } | ||
927 | + } | ||
928 | + | ||
929 | + /** | ||
930 | + * The Class MtasCRMParserMappingWordAnnotation. | ||
931 | + */ | ||
932 | + private class MtasCRMParserMappingWordAnnotation | ||
933 | + extends MtasParserMapping<MtasCRMParserMappingWordAnnotation> { | ||
934 | + | ||
935 | + /** | ||
936 | + * Instantiates a new mtas crm parser mapping word annotation. | ||
937 | + */ | ||
938 | + public MtasCRMParserMappingWordAnnotation() { | ||
939 | + super(); | ||
940 | + this.position = SOURCE_OWN; | ||
941 | + this.realOffset = SOURCE_OWN; | ||
942 | + this.offset = SOURCE_ANCESTOR_WORD; | ||
943 | + this.type = MAPPING_TYPE_WORD_ANNOTATION; | ||
944 | + } | ||
945 | + | ||
946 | + /* | ||
947 | + * (non-Javadoc) | ||
948 | + * | ||
949 | + * @see mtas.analysis.parser.MtasParser.MtasParserMapping#self() | ||
950 | + */ | ||
951 | + @Override | ||
952 | + protected MtasCRMParserMappingWordAnnotation self() { | ||
953 | + return this; | ||
954 | + } | ||
955 | + } | ||
956 | + | ||
957 | + /** | ||
958 | + * The Class MtasCRMParserMappingCRMSentence. | ||
959 | + */ | ||
960 | + private class MtasCRMParserMappingCRMSentence | ||
961 | + extends MtasParserMapping<MtasCRMParserMappingCRMSentence> { | ||
962 | + | ||
963 | + /** | ||
964 | + * Instantiates a new mtas crm parser mapping crm sentence. | ||
965 | + */ | ||
966 | + public MtasCRMParserMappingCRMSentence() { | ||
967 | + super(); | ||
968 | + this.position = SOURCE_OWN; | ||
969 | + this.realOffset = SOURCE_OWN; | ||
970 | + this.offset = SOURCE_OWN; | ||
971 | + this.type = MAPPING_TYPE_GROUP; | ||
972 | + } | ||
973 | + | ||
974 | + /* | ||
975 | + * (non-Javadoc) | ||
976 | + * | ||
977 | + * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self() | ||
978 | + */ | ||
979 | + @Override | ||
980 | + protected MtasCRMParserMappingCRMSentence self() { | ||
981 | + return this; | ||
982 | + } | ||
983 | + } | ||
984 | + | ||
985 | + /** | ||
986 | + * The Class MtasCRMParserMappingCRMPair. | ||
987 | + */ | ||
988 | + private class MtasCRMParserMappingCRMPair | ||
989 | + extends MtasParserMapping<MtasCRMParserMappingCRMPair> { | ||
990 | + | ||
991 | + /** | ||
992 | + * Instantiates a new mtas crm parser mapping crm pair. | ||
993 | + */ | ||
994 | + public MtasCRMParserMappingCRMPair() { | ||
995 | + super(); | ||
996 | + this.position = SOURCE_OWN; | ||
997 | + this.realOffset = SOURCE_OWN; | ||
998 | + this.offset = SOURCE_OWN; | ||
999 | + this.type = MAPPING_TYPE_RELATION; | ||
1000 | + } | ||
1001 | + | ||
1002 | + /* | ||
1003 | + * (non-Javadoc) | ||
1004 | + * | ||
1005 | + * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self() | ||
1006 | + */ | ||
1007 | + @Override | ||
1008 | + protected MtasCRMParserMappingCRMPair self() { | ||
1009 | + return this; | ||
1010 | + } | ||
1011 | + } | ||
1012 | + | ||
1013 | +} |
src/mtas/analysis/parser/MtasElanParser.java
@@ -17,10 +17,12 @@ final public class MtasElanParser extends MtasXMLParser { | @@ -17,10 +17,12 @@ final public class MtasElanParser extends MtasXMLParser { | ||
17 | * @param config the config | 17 | * @param config the config |
18 | */ | 18 | */ |
19 | public MtasElanParser(MtasConfiguration config) { | 19 | public MtasElanParser(MtasConfiguration config) { |
20 | - super(config); | 20 | + super(config); |
21 | } | 21 | } |
22 | - | ||
23 | - /* (non-Javadoc) | 22 | + |
23 | + /* | ||
24 | + * (non-Javadoc) | ||
25 | + * | ||
24 | * @see mtas.analysis.parser.MtasXMLParser#initParser() | 26 | * @see mtas.analysis.parser.MtasXMLParser#initParser() |
25 | */ | 27 | */ |
26 | @Override | 28 | @Override |
src/mtas/analysis/parser/MtasFoliaParser.java
@@ -17,10 +17,12 @@ final public class MtasFoliaParser extends MtasXMLParser { | @@ -17,10 +17,12 @@ final public class MtasFoliaParser extends MtasXMLParser { | ||
17 | * @param config the config | 17 | * @param config the config |
18 | */ | 18 | */ |
19 | public MtasFoliaParser(MtasConfiguration config) { | 19 | public MtasFoliaParser(MtasConfiguration config) { |
20 | - super(config); | 20 | + super(config); |
21 | } | 21 | } |
22 | - | ||
23 | - /* (non-Javadoc) | 22 | + |
23 | + /* | ||
24 | + * (non-Javadoc) | ||
25 | + * | ||
24 | * @see mtas.analysis.parser.MtasXMLParser#initParser() | 26 | * @see mtas.analysis.parser.MtasXMLParser#initParser() |
25 | */ | 27 | */ |
26 | @Override | 28 | @Override |
src/mtas/analysis/parser/MtasParser.java
@@ -15,16 +15,19 @@ import mtas.analysis.util.MtasParserException; | @@ -15,16 +15,19 @@ import mtas.analysis.util.MtasParserException; | ||
15 | * The Class MtasParser. | 15 | * The Class MtasParser. |
16 | */ | 16 | */ |
17 | abstract public class MtasParser { | 17 | abstract public class MtasParser { |
18 | - | 18 | + |
19 | /** The token collection. */ | 19 | /** The token collection. */ |
20 | protected MtasTokenCollection tokenCollection; | 20 | protected MtasTokenCollection tokenCollection; |
21 | - | 21 | + |
22 | /** The config. */ | 22 | /** The config. */ |
23 | protected MtasConfiguration config; | 23 | protected MtasConfiguration config; |
24 | 24 | ||
25 | /** The autorepair. */ | 25 | /** The autorepair. */ |
26 | protected Boolean autorepair = false; | 26 | protected Boolean autorepair = false; |
27 | - | 27 | + |
28 | + /** The makeunique. */ | ||
29 | + protected Boolean makeunique = false; | ||
30 | + | ||
28 | /** | 31 | /** |
29 | * Inits the parser. | 32 | * Inits the parser. |
30 | * | 33 | * |
@@ -38,10 +41,13 @@ abstract public class MtasParser { | @@ -38,10 +41,13 @@ abstract public class MtasParser { | ||
38 | if (current.name.equals("autorepair")) { | 41 | if (current.name.equals("autorepair")) { |
39 | autorepair = current.attributes.get("value").equals("true"); | 42 | autorepair = current.attributes.get("value").equals("true"); |
40 | } | 43 | } |
44 | + if (current.name.equals("makeunique")) { | ||
45 | + makeunique = current.attributes.get("value").equals("true"); | ||
46 | + } | ||
41 | } | 47 | } |
42 | } | 48 | } |
43 | } | 49 | } |
44 | - | 50 | + |
45 | /** | 51 | /** |
46 | * Creates the token collection. | 52 | * Creates the token collection. |
47 | * | 53 | * |
@@ -59,7 +65,7 @@ abstract public class MtasParser { | @@ -59,7 +65,7 @@ abstract public class MtasParser { | ||
59 | * @return the string | 65 | * @return the string |
60 | */ | 66 | */ |
61 | public abstract String printConfig(); | 67 | public abstract String printConfig(); |
62 | - | 68 | + |
63 | /** | 69 | /** |
64 | * The Class MtasParserObject. | 70 | * The Class MtasParserObject. |
65 | */ | 71 | */ |
@@ -259,17 +265,17 @@ abstract public class MtasParser { | @@ -259,17 +265,17 @@ abstract public class MtasParser { | ||
259 | public void setText(String text) { | 265 | public void setText(String text) { |
260 | objectText = text; | 266 | objectText = text; |
261 | } | 267 | } |
262 | - | 268 | + |
263 | /** | 269 | /** |
264 | * Adds the text. | 270 | * Adds the text. |
265 | * | 271 | * |
266 | * @param text the text | 272 | * @param text the text |
267 | */ | 273 | */ |
268 | public void addText(String text) { | 274 | public void addText(String text) { |
269 | - if(objectText==null) { | 275 | + if (objectText == null) { |
270 | objectText = text; | 276 | objectText = text; |
271 | } else { | 277 | } else { |
272 | - objectText+=text; | 278 | + objectText += text; |
273 | } | 279 | } |
274 | } | 280 | } |
275 | 281 |
src/mtas/analysis/parser/MtasSketchParser.java
@@ -53,7 +53,9 @@ final public class MtasSketchParser extends MtasBasicParser { | @@ -53,7 +53,9 @@ final public class MtasSketchParser extends MtasBasicParser { | ||
53 | } | 53 | } |
54 | } | 54 | } |
55 | 55 | ||
56 | - /* (non-Javadoc) | 56 | + /* |
57 | + * (non-Javadoc) | ||
58 | + * | ||
57 | * @see mtas.analysis.parser.MtasParser#initParser() | 59 | * @see mtas.analysis.parser.MtasParser#initParser() |
58 | */ | 60 | */ |
59 | @Override | 61 | @Override |
@@ -62,7 +64,7 @@ final public class MtasSketchParser extends MtasBasicParser { | @@ -62,7 +64,7 @@ final public class MtasSketchParser extends MtasBasicParser { | ||
62 | if (config != null) { | 64 | if (config != null) { |
63 | 65 | ||
64 | // always word, no mappings | 66 | // always word, no mappings |
65 | - wordType = new MtasParserType(MAPPING_TYPE_WORD, null); | 67 | + wordType = new MtasParserType(MAPPING_TYPE_WORD, null, false); |
66 | 68 | ||
67 | for (int i = 0; i < config.children.size(); i++) { | 69 | for (int i = 0; i < config.children.size(); i++) { |
68 | MtasConfiguration current = config.children.get(i); | 70 | MtasConfiguration current = config.children.get(i); |
@@ -74,7 +76,7 @@ final public class MtasSketchParser extends MtasBasicParser { | @@ -74,7 +76,7 @@ final public class MtasSketchParser extends MtasBasicParser { | ||
74 | String nameMapping = mapping.attributes.get("name"); | 76 | String nameMapping = mapping.attributes.get("name"); |
75 | if ((typeMapping != null)) { | 77 | if ((typeMapping != null)) { |
76 | if (typeMapping.equals(MAPPING_TYPE_WORD)) { | 78 | if (typeMapping.equals(MAPPING_TYPE_WORD)) { |
77 | - MtasSketchParserMappingWordAnnotation m = new MtasSketchParserMappingWordAnnotation(); | 79 | + MtasSketchParserMappingWord m = new MtasSketchParserMappingWord(); |
78 | m.processConfig(mapping); | 80 | m.processConfig(mapping); |
79 | wordType.addMapping(m); | 81 | wordType.addMapping(m); |
80 | } else if (typeMapping.equals(MAPPING_TYPE_WORD_ANNOTATION) | 82 | } else if (typeMapping.equals(MAPPING_TYPE_WORD_ANNOTATION) |
@@ -85,7 +87,7 @@ final public class MtasSketchParser extends MtasBasicParser { | @@ -85,7 +87,7 @@ final public class MtasSketchParser extends MtasBasicParser { | ||
85 | wordAnnotationTypes.get(nameMapping).addMapping(m); | 87 | wordAnnotationTypes.get(nameMapping).addMapping(m); |
86 | } else { | 88 | } else { |
87 | MtasParserType t = new MtasParserType(typeMapping, | 89 | MtasParserType t = new MtasParserType(typeMapping, |
88 | - nameMapping); | 90 | + nameMapping, false); |
89 | t.addMapping(m); | 91 | t.addMapping(m); |
90 | wordAnnotationTypes.put(Integer.parseInt(nameMapping), t); | 92 | wordAnnotationTypes.put(Integer.parseInt(nameMapping), t); |
91 | } | 93 | } |
@@ -97,7 +99,7 @@ final public class MtasSketchParser extends MtasBasicParser { | @@ -97,7 +99,7 @@ final public class MtasSketchParser extends MtasBasicParser { | ||
97 | groupTypes.get(nameMapping).addMapping(m); | 99 | groupTypes.get(nameMapping).addMapping(m); |
98 | } else { | 100 | } else { |
99 | MtasParserType t = new MtasParserType(typeMapping, | 101 | MtasParserType t = new MtasParserType(typeMapping, |
100 | - nameMapping); | 102 | + nameMapping, false); |
101 | t.addMapping(m); | 103 | t.addMapping(m); |
102 | groupTypes.put(nameMapping, t); | 104 | groupTypes.put(nameMapping, t); |
103 | } | 105 | } |
@@ -113,7 +115,9 @@ final public class MtasSketchParser extends MtasBasicParser { | @@ -113,7 +115,9 @@ final public class MtasSketchParser extends MtasBasicParser { | ||
113 | } | 115 | } |
114 | } | 116 | } |
115 | 117 | ||
116 | - /* (non-Javadoc) | 118 | + /* |
119 | + * (non-Javadoc) | ||
120 | + * | ||
117 | * @see mtas.analysis.parser.MtasParser#createTokenCollection(java.io.Reader) | 121 | * @see mtas.analysis.parser.MtasParser#createTokenCollection(java.io.Reader) |
118 | */ | 122 | */ |
119 | @Override | 123 | @Override |
@@ -337,11 +341,13 @@ final public class MtasSketchParser extends MtasBasicParser { | @@ -337,11 +341,13 @@ final public class MtasSketchParser extends MtasBasicParser { | ||
337 | } | 341 | } |
338 | } | 342 | } |
339 | // final check | 343 | // final check |
340 | - tokenCollection.check(autorepair); | 344 | + tokenCollection.check(autorepair, makeunique); |
341 | return tokenCollection; | 345 | return tokenCollection; |
342 | } | 346 | } |
343 | 347 | ||
344 | - /* (non-Javadoc) | 348 | + /* |
349 | + * (non-Javadoc) | ||
350 | + * | ||
345 | * @see mtas.analysis.parser.MtasParser#printConfig() | 351 | * @see mtas.analysis.parser.MtasParser#printConfig() |
346 | */ | 352 | */ |
347 | @Override | 353 | @Override |
@@ -373,6 +379,34 @@ final public class MtasSketchParser extends MtasBasicParser { | @@ -373,6 +379,34 @@ final public class MtasSketchParser extends MtasBasicParser { | ||
373 | } | 379 | } |
374 | 380 | ||
375 | /** | 381 | /** |
382 | + * The Class MtasSketchParserMappingWord. | ||
383 | + */ | ||
384 | + private class MtasSketchParserMappingWord | ||
385 | + extends MtasParserMapping<MtasSketchParserMappingWord> { | ||
386 | + | ||
387 | + /** | ||
388 | + * Instantiates a new mtas sketch parser mapping word. | ||
389 | + */ | ||
390 | + public MtasSketchParserMappingWord() { | ||
391 | + super(); | ||
392 | + this.position = SOURCE_OWN; | ||
393 | + this.realOffset = SOURCE_OWN; | ||
394 | + this.offset = SOURCE_OWN; | ||
395 | + this.type = MAPPING_TYPE_WORD; | ||
396 | + } | ||
397 | + | ||
398 | + /* | ||
399 | + * (non-Javadoc) | ||
400 | + * | ||
401 | + * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self() | ||
402 | + */ | ||
403 | + @Override | ||
404 | + protected MtasSketchParserMappingWord self() { | ||
405 | + return this; | ||
406 | + } | ||
407 | + } | ||
408 | + | ||
409 | + /** | ||
376 | * The Class MtasSketchParserMappingWordAnnotation. | 410 | * The Class MtasSketchParserMappingWordAnnotation. |
377 | */ | 411 | */ |
378 | private class MtasSketchParserMappingWordAnnotation | 412 | private class MtasSketchParserMappingWordAnnotation |
src/mtas/analysis/parser/MtasTEIParser.java
@@ -17,10 +17,12 @@ final public class MtasTEIParser extends MtasXMLParser { | @@ -17,10 +17,12 @@ final public class MtasTEIParser extends MtasXMLParser { | ||
17 | * @param config the config | 17 | * @param config the config |
18 | */ | 18 | */ |
19 | public MtasTEIParser(MtasConfiguration config) { | 19 | public MtasTEIParser(MtasConfiguration config) { |
20 | - super(config); | 20 | + super(config); |
21 | } | 21 | } |
22 | - | ||
23 | - /* (non-Javadoc) | 22 | + |
23 | + /* | ||
24 | + * (non-Javadoc) | ||
25 | + * | ||
24 | * @see mtas.analysis.parser.MtasXMLParser#initParser() | 26 | * @see mtas.analysis.parser.MtasXMLParser#initParser() |
25 | */ | 27 | */ |
26 | @Override | 28 | @Override |