Commit 8148835a47574769c6705c325af983b18bc83f6d

Authored by Matthijs Brouwer
1 parent 229cfad6

update

Showing 209 changed files with 27681 additions and 10521 deletions

Too many changes to show.

To preserve performance only 29 of 209 files are displayed.

conf/parser/mtas.xml
1 1 <?xml version="1.0" encoding="UTF-8" ?>
2 2 <mtas>
3 3 <configurations type="mtas.analysis.util.MtasTokenizerFactory">
  4 + <configuration name="test" file="mtas/folia_test.xml" />
  5 + <configuration name="CRM" file="mtas/crm_test.xml" />
4 6 <configuration name="DBNL" file="mtas/folia_dbnl.xml" />
  7 + <configuration name="DDD" file="mtas/folia_ddd.xml" />
5 8 <configuration name="EDBO" file="mtas/folia_edbo.xml" />
6 9 <configuration name="SONAR" file="mtas/folia_sonar.xml" />
7 10 </configurations>
8 11 <configurations type="mtas.analysis.util.MtasCharFilterFactory">
  12 + <configuration name="test" type="file" />
  13 + <configuration name="CRM" type="file" prefix="/Users/matthijs/Software/Mtas/data/CRM/data/files/" postfix=".txt" />
9 14 <configuration name="DBNL" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
  15 + <configuration name="DDD" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
10 16 <configuration name="EDBO" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
11 17 <configuration name="SONAR" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
12 18 </configurations>
... ...
conf/parser/mtas/crm_test.xml 0 โ†’ 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<mtas>
  3 +
  4 + <!-- START MTAS INDEX CONFIGURATION -->
  5 + <index>
  6 + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
  7 + <payload index="false" />
  8 + <offset index="false" />
  9 + <realoffset index="false" />
  10 + <parent index="true" />
  11 + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
  12 + </index>
  13 + <!-- END MTAS INDEX CONFIGURATION -->
  14 +
  15 +
  16 +
  17 + <!-- START CONFIGURATION MTAS FOLIA PARSER -->
  18 + <parser name="mtas.analysis.parser.MtasCRMParser">
  19 +
  20 + <!-- START GENERAL SETTINGS MTAS PARSER -->
  21 + <autorepair value="true" />
  22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS PARSER -->
  24 +
  25 + <mappings>
  26 +
  27 + <mapping type="word">
  28 + </mapping>
  29 +
  30 + <mapping type="wordAnnotation" name="0">
  31 + <token type="string" offset="false" parent="false">
  32 + <pre>
  33 + <item type="string" value="t" />
  34 + </pre>
  35 + <post>
  36 + <item type="text" />
  37 + </post>
  38 + </token>
  39 + </mapping>
  40 + <mapping type="wordAnnotation" name="0">
  41 + <token type="string" offset="false" parent="false">
  42 + <pre>
  43 + <item type="string" value="t_lc" />
  44 + </pre>
  45 + <post>
  46 + <item type="text" filter="ascii,lowercase" />
  47 + </post>
  48 + </token>
  49 + </mapping>
  50 + <mapping type="wordAnnotation" name="1">
  51 + <token type="string" offset="false" parent="false">
  52 + <pre>
  53 + <item type="string" value="t1" />
  54 + </pre>
  55 + <post>
  56 + <item type="text" />
  57 + </post>
  58 + </token>
  59 + </mapping>
  60 + <mapping type="wordAnnotation" name="1">
  61 + <token type="string" offset="false" parent="false">
  62 + <pre>
  63 + <item type="string" value="t1_lc" />
  64 + </pre>
  65 + <post>
  66 + <item type="text" filter="ascii,lowercase" />
  67 + </post>
  68 + </token>
  69 + </mapping>
  70 + <mapping type="wordAnnotation" name="2">
  71 + <token type="string" offset="false" parent="false">
  72 + <pre>
  73 + <item type="string" value="t2" />
  74 + </pre>
  75 + <post>
  76 + <item type="text" />
  77 + </post>
  78 + </token>
  79 + </mapping>
  80 + <mapping type="wordAnnotation" name="2">
  81 + <token type="string" offset="false" parent="false">
  82 + <pre>
  83 + <item type="string" value="t2_lc" />
  84 + </pre>
  85 + <post>
  86 + <item type="text" filter="ascii,lowercase" />
  87 + </post>
  88 + </token>
  89 + </mapping>
  90 + <mapping type="wordAnnotation" name="3">
  91 + <token type="string" offset="false" parent="false">
  92 + <pre>
  93 + <item type="string" value="lemma" />
  94 + </pre>
  95 + <post>
  96 + <item type="text" />
  97 + </post>
  98 + </token>
  99 + </mapping>
  100 + <mapping type="wordAnnotation" name="4">
  101 + <token type="string" offset="false" parent="false">
  102 + <pre>
  103 + <item type="string" value="crm" />
  104 + </pre>
  105 + <post>
  106 + <item type="text" />
  107 + </post>
  108 + </token>
  109 + </mapping>
  110 +
  111 + <mapping type="crmPair" name="6">
  112 + <condition>
  113 + <item type="text" not="true" condition="-" />
  114 + </condition>
  115 + </mapping>
  116 + <mapping type="crmPair" name="part">
  117 + <token type="string" offset="false" parent="false">
  118 + <pre>
  119 + <item type="name" />
  120 + </pre>
  121 + <post>
  122 + <item type="text" />
  123 + </post>
  124 + </token>
  125 + </mapping>
  126 +
  127 + <mapping type="crmSentence" name="7">
  128 + <token type="string" offset="false" parent="false">
  129 + <pre>
  130 + <item type="string" value="s"/>
  131 + </pre>
  132 + <post>
  133 + <item type="text" />
  134 + </post>
  135 + </token>
  136 + <condition>
  137 + <item type="text" not="true" condition="-" />
  138 + <item type="text" not="true" condition="2" />
  139 + <item type="text" not="true" condition="4" />
  140 + <item type="text" not="true" condition="5" />
  141 + <item type="text" not="true" condition="6" />
  142 + <item type="text" not="true" condition="8" />
  143 + </condition>
  144 + </mapping>
  145 + <mapping type="crmClause" name="7">
  146 + <token type="string" offset="false" parent="false">
  147 + <pre>
  148 + <item type="string" value="sc"/>
  149 + </pre>
  150 + <post>
  151 + <item type="text" />
  152 + </post>
  153 + </token>
  154 + <condition>
  155 + <item type="text" not="true" condition="-" />
  156 + <item type="text" not="true" condition="0" />
  157 + <item type="text" not="true" condition="1" />
  158 + </condition>
  159 + </mapping>
  160 + <mapping type="crmClause" name="7">
  161 + <condition>
  162 + <item type="text" not="true" condition="-" />
  163 + </condition>
  164 + </mapping>
  165 +
  166 + <mapping type="wordAnnotation" name="pos">
  167 + <token type="string" offset="false" parent="false">
  168 + <pre>
  169 + <item type="name" />
  170 + </pre>
  171 + <post>
  172 + <item type="text" />
  173 + </post>
  174 + </token>
  175 + </mapping>
  176 + <mapping type="wordAnnotation" name="feat.getal">
  177 + <token type="string" offset="false" parent="false">
  178 + <pre>
  179 + <item type="name" />
  180 + </pre>
  181 + <post>
  182 + <item type="text" />
  183 + </post>
  184 + </token>
  185 + </mapping>
  186 + <mapping type="wordAnnotation" name="feat.persoon">
  187 + <token type="string" offset="false" parent="false">
  188 + <pre>
  189 + <item type="name" />
  190 + </pre>
  191 + <post>
  192 + <item type="text" />
  193 + </post>
  194 + </token>
  195 + </mapping>
  196 + <mapping type="wordAnnotation" name="feat.ntype">
  197 + <token type="string" offset="false" parent="false">
  198 + <pre>
  199 + <item type="name" />
  200 + </pre>
  201 + <post>
  202 + <item type="text" />
  203 + </post>
  204 + </token>
  205 + </mapping>
  206 + <mapping type="wordAnnotation" name="feat.pvtijd">
  207 + <token type="string" offset="false" parent="false">
  208 + <pre>
  209 + <item type="name" />
  210 + </pre>
  211 + <post>
  212 + <item type="text" />
  213 + </post>
  214 + </token>
  215 + </mapping>
  216 + <mapping type="wordAnnotation" name="feat.wvorm">
  217 + <token type="string" offset="false" parent="false">
  218 + <pre>
  219 + <item type="name" />
  220 + </pre>
  221 + <post>
  222 + <item type="text" />
  223 + </post>
  224 + </token>
  225 + </mapping>
  226 + <mapping type="wordAnnotation" name="feat.numtype">
  227 + <token type="string" offset="false" parent="false">
  228 + <pre>
  229 + <item type="name" />
  230 + </pre>
  231 + <post>
  232 + <item type="text" />
  233 + </post>
  234 + </token>
  235 + </mapping>
  236 + <mapping type="wordAnnotation" name="feat.vwtype">
  237 + <token type="string" offset="false" parent="false">
  238 + <pre>
  239 + <item type="name" />
  240 + </pre>
  241 + <post>
  242 + <item type="text" />
  243 + </post>
  244 + </token>
  245 + </mapping>
  246 + <mapping type="wordAnnotation" name="feat.lwtype">
  247 + <token type="string" offset="false" parent="false">
  248 + <pre>
  249 + <item type="name" />
  250 + </pre>
  251 + <post>
  252 + <item type="text" />
  253 + </post>
  254 + </token>
  255 + </mapping>
  256 + <mapping type="wordAnnotation" name="feat.form">
  257 + <token type="string" offset="false" parent="false">
  258 + <pre>
  259 + <item type="name" />
  260 + </pre>
  261 + <post>
  262 + <item type="text" />
  263 + </post>
  264 + </token>
  265 + </mapping>
  266 + <mapping type="wordAnnotation" name="feat.probleemgeval">
  267 + <token type="string" offset="false" parent="false">
  268 + <pre>
  269 + <item type="name" />
  270 + </pre>
  271 + <post>
  272 + <item type="text" />
  273 + </post>
  274 + </token>
  275 + </mapping>
  276 + </mappings>
  277 +
  278 + <functions>
  279 + <function type="crmPair" name="6" split="+">
  280 + <condition value="">
  281 + <output name="part" />
  282 + </condition>
  283 + </function>
  284 + <function type="wordAnnotation" name="4" split="+">
  285 + <condition value="000,001,002,003,004,005,006,009">
  286 + <output name="pos" value="N" />
  287 + <output name="feat.getal" value="ev" />
  288 + </condition>
  289 + <condition value="010,011,012,013,014,015,016,019">
  290 + <output name="pos" value="N" />
  291 + <output name="feat.getal" value="mv" />
  292 + </condition>
  293 + <condition value="020,021,022,023,024,025,026,029">
  294 + <output name="pos" value="N" />
  295 + <output name="feat.ntype" value="eigen" />
  296 + </condition>
  297 + <condition value="090,091,092,093,094,095,096,099">
  298 + <output name="pos" value="N" />
  299 + <output name="feat.probleemgeval" />
  300 + </condition>
  301 + <condition value="100,101,102,103,104,105,106,109">
  302 + <output name="pos" value="ADJ" />
  303 + <output name="feat.getal" value="ev" />
  304 + </condition>
  305 + <condition value="110,111,112,113,114,115,116,119">
  306 + <output name="pos" value="ADJ" />
  307 + <output name="feat.getal" value="mv" />
  308 + </condition>
  309 + <condition value="190,191,192,193,194,195,196,199">
  310 + <output name="pos" value="ADJ" />
  311 + <output name="feat.probleemgeval" />
  312 + </condition>
  313 +
  314 + <condition value="200,201,202,203,204,205,206,209">
  315 + <output name="pos" value="WW" />
  316 + <output name="feat.pvtijd" value="tgw" />
  317 + </condition>
  318 + <condition value="210,211,212,213,214,215,216,219">
  319 + <output name="pos" value="WW" />
  320 + <output name="feat.pvtijd" value="tgw" />
  321 + </condition>
  322 + <condition value="220,221,222,223,224,225,226,229">
  323 + <output name="pos" value="WW" />
  324 + <output name="feat.pvtijd" value="verl" />
  325 + </condition>
  326 + <condition value="230,231,232,233,234,235,236,239">
  327 + <output name="pos" value="WW" />
  328 + <output name="feat.pvtijd" value="verl" />
  329 + </condition>
  330 + <condition value="240,241,242,243,244,245,246,249">
  331 + <output name="pos" value="WW" />
  332 + </condition>
  333 + <condition value="250,251,252,253,254,255,256,259">
  334 + <output name="pos" value="WW" />
  335 + <output name="feat.wvorm" value="inf" />
  336 + </condition>
  337 + <condition value="260,261,262,263,264,265,266,269">
  338 + <output name="pos" value="WW" />
  339 + <output name="feat.wvorm" value="inf" />
  340 + </condition>
  341 + <condition value="270,271,272,273,274,275,276,279">
  342 + <output name="pos" value="WW" />
  343 + </condition>
  344 + <condition value="280,281,282,283,284,285,286,289">
  345 + <output name="pos" value="WW" />
  346 + </condition>
  347 + <condition value="290,291,292,293,294,295,296,299">
  348 + <output name="pos" value="WW" />
  349 + <output name="feat.probleemgeval" />
  350 + </condition>
  351 +
  352 +
  353 + <condition value="300,301,302,303,304,305,306,309">
  354 + <output name="pos" value="TW" />
  355 + <output name="feat.numtype" value="hoofd" />
  356 + </condition>
  357 + <condition value="310,311,312,313,314,315,316,319">
  358 + <output name="pos" value="TW" />
  359 + <output name="feat.numtype" value="rang" />
  360 + </condition>
  361 + <condition value="320,321,322,323,324,325,326,329">
  362 + <output name="pos" value="TW" />
  363 + </condition>
  364 + <condition value="390,391,392,393,394,395,396,399">
  365 + <output name="pos" value="TW" />
  366 + <output name="feat.probleemgeval" />
  367 + </condition>
  368 +
  369 + <condition value="401">
  370 + <output name="pos" value="VNW" />
  371 + <output name="feat.getal" value="ev" />
  372 + <output name="feat.persoon" value="1" />
  373 + </condition>
  374 + <condition value="402">
  375 + <output name="pos" value="VNW" />
  376 + <output name="feat.getal" value="ev" />
  377 + <output name="feat.persoon" value="2" />
  378 + </condition>
  379 + <condition value="403">
  380 + <output name="pos" value="VNW" />
  381 + <output name="feat.getal" value="ev" />
  382 + <output name="feat.persoon" value="3" />
  383 + </condition>
  384 + <condition value="404">
  385 + <output name="pos" value="VNW" />
  386 + <output name="feat.getal" value="mv" />
  387 + <output name="feat.persoon" value="1" />
  388 + </condition>
  389 + <condition value="405">
  390 + <output name="pos" value="VNW" />
  391 + <output name="feat.getal" value="mv" />
  392 + <output name="feat.persoon" value="2" />
  393 + </condition>
  394 + <condition value="406">
  395 + <output name="pos" value="VNW" />
  396 + <output name="feat.getal" value="mv" />
  397 + <output name="feat.persoon" value="3" />
  398 + </condition>
  399 + <condition value="409">
  400 + <output name="pos" value="VNW" />
  401 + <output name="feat.probleemgeval" />
  402 + </condition>
  403 + <condition value="410,411,412,413,414,415,416,419">
  404 + <output name="pos" value="VNW" />
  405 + <output name="feat.vwtype" value="aanw" />
  406 + </condition>
  407 + <condition value="420,421,422,423,424,425,426,429">
  408 + <output name="pos" value="VNW" />
  409 + <output name="feat.vwtype" value="betr" />
  410 + </condition>
  411 + <condition value="430,431,432,433,434,435,436,439">
  412 + <output name="pos" value="VNW" />
  413 + <output name="feat.vwtype" value="vb" />
  414 + </condition>
  415 + <condition value="434,441,442,443,444,445,446,449">
  416 + <output name="pos" value="VNW" />
  417 + <output name="feat.vwtype" value="vb" />
  418 + </condition>
  419 + <condition value="440,441,442,443,444,445,446,449">
  420 + <output name="pos" value="VNW" />
  421 + <output name="feat.lwtype" value="onbep" />
  422 + </condition>
  423 + <condition value="450,451,452,453,454,455,456,459">
  424 + <output name="pos" value="VNW" />
  425 + <output name="feat.vwtype" value="bez" />
  426 + </condition>
  427 + <condition value="461">
  428 + <output name="pos" value="VNW" />
  429 + <output name="feat.vwtype" value="refl" />
  430 + <output name="feat.getal" value="ev" />
  431 + <output name="feat.persoon" value="1" />
  432 + </condition>
  433 + <condition value="462">
  434 + <output name="pos" value="VNW" />
  435 + <output name="feat.vwtype" value="refl" />
  436 + <output name="feat.getal" value="ev" />
  437 + <output name="feat.persoon" value="2" />
  438 + </condition>
  439 + <condition value="463">
  440 + <output name="pos" value="VNW" />
  441 + <output name="feat.vwtype" value="refl" />
  442 + <output name="feat.getal" value="ev" />
  443 + <output name="feat.persoon" value="3" />
  444 + </condition>
  445 + <condition value="464">
  446 + <output name="pos" value="VNW" />
  447 + <output name="feat.vwtype" value="refl" />
  448 + <output name="feat.getal" value="mv" />
  449 + <output name="feat.persoon" value="1" />
  450 + </condition>
  451 + <condition value="465">
  452 + <output name="pos" value="VNW" />
  453 + <output name="feat.vwtype" value="refl" />
  454 + <output name="feat.getal" value="mv" />
  455 + <output name="feat.persoon" value="2" />
  456 + </condition>
  457 + <condition value="466">
  458 + <output name="pos" value="VNW" />
  459 + <output name="feat.vwtype" value="refl" />
  460 + <output name="feat.getal" value="mv" />
  461 + <output name="feat.persoon" value="3" />
  462 + </condition>
  463 + <condition value="469">
  464 + <output name="pos" value="VNW" />
  465 + <output name="feat.vwtype" value="refl" />
  466 + <output name="feat.probleemgeval" />
  467 + </condition>
  468 + <condition value="470,471,472,473,474,475,476,479">
  469 + <output name="pos" value="LID" />
  470 + </condition>
  471 + <condition value="480,481,482,483,484,485,486,489">
  472 + <output name="pos" value="LID" />
  473 + </condition>
  474 + <condition value="490,491,492,493,494,495,496,499">
  475 + <output name="pos" value="VNW" />
  476 + <output name="feat.probleemgeval" />
  477 + </condition>
  478 +
  479 + <condition value="500,501,502,503,504,505,506,509">
  480 + <output name="pos" value="BW" />
  481 + </condition>
  482 + <condition value="510,511,512,513,514,515,516,519">
  483 + <output name="pos" value="BW" />
  484 + </condition>
  485 + <condition value="520,521,522,523,524,525,526,529">
  486 + <output name="pos" value="BW" />
  487 + </condition>
  488 + <condition value="530,531,532,533,534,535,536,539">
  489 + <output name="pos" value="BW" />
  490 + </condition>
  491 + <condition value="540,541,542,543,544,545,546,549">
  492 + <output name="pos" value="BW" />
  493 + </condition>
  494 + <condition value="550,551,552,553,554,555,556,559">
  495 + <output name="pos" value="BW" />
  496 + </condition>
  497 + <condition value="560,561,562,563,564,565,566,569">
  498 + <output name="pos" value="BW" />
  499 + </condition>
  500 + <condition value="590,591,592,593,594,595,596,599">
  501 + <output name="pos" value="BW" />
  502 + <output name="feat.probleemgeval" />
  503 + </condition>
  504 +
  505 + <condition value="600,601,602,603,604,605,606,609">
  506 + <output name="pos" value="BW" />
  507 + </condition>
  508 + <condition value="610,611,612,613,614,615,616,619">
  509 + <output name="pos" value="BW" />
  510 + </condition>
  511 + <condition value="620,621,622,623,624,625,626,629">
  512 + <output name="pos" value="BW" />
  513 + </condition>
  514 + <condition value="630,631,632,633,634,635,636,639">
  515 + <output name="pos" value="BW" />
  516 + </condition>
  517 + <condition value="640,641,642,643,644,645,646,649">
  518 + <output name="pos" value="BW" />
  519 + </condition>
  520 + <condition value="650,651,652,653,654,655,656,659">
  521 + <output name="pos" value="BW" />
  522 + </condition>
  523 + <condition value="690,691,692,693,694,695,696,699">
  524 + <output name="pos" value="BW" />
  525 + <output name="feat.probleemgeval" />
  526 + </condition>
  527 +
  528 + <condition value="700,701,702,703,704,705,706,709">
  529 + <output name="pos" value="VZ" />
  530 + </condition>
  531 + <condition value="790,791,792,793,794,795,796,799">
  532 + <output name="pos" value="VZ" />
  533 + </condition>
  534 +
  535 + <condition value="800,801,802,803,804,805,806,809">
  536 + <output name="pos" value="VG" />
  537 + </condition>
  538 + <condition value="810,811,812,813,814,815,816,819">
  539 + <output name="pos" value="VG" />
  540 + </condition>
  541 + <condition value="820,821,822,823,824,825,826,829">
  542 + <output name="pos" value="VG" />
  543 + </condition>
  544 + <condition value="830,831,832,833,834,835,836,839">
  545 + <output name="pos" value="VG" />
  546 + </condition>
  547 + <condition value="840,841,842,843,844,845,846,849">
  548 + <output name="pos" value="VG" />
  549 + </condition>
  550 + <condition value="850,851,852,853,854,855,856,859">
  551 + <output name="pos" value="VG" />
  552 + </condition>
  553 + <condition value="860,861,862,863,864,865,866,869">
  554 + <output name="pos" value="VG" />
  555 + </condition>
  556 + <condition value="870,871,872,873,874,875,876,879">
  557 + <output name="pos" value="VG" />
  558 + </condition>
  559 + <condition value="880,881,882,883,884,885,886,889">
  560 + <output name="pos" value="VG" />
  561 + </condition>
  562 + <condition value="890,891,892,893,894,895,896,899">
  563 + <output name="pos" value="VG" />
  564 + <output name="feat.probleemgeval" />
  565 + </condition>
  566 +
  567 + <condition value="900,901,902,903,904,905,906,909">
  568 + <output name="feat.probleemgeval" />
  569 + </condition>
  570 + <condition value="900,901,902,903,904,905,906,909">
  571 + <output name="feat.probleemgeval" />
  572 + </condition>
  573 + <condition value="990,991,992,993,994,995,996,999">
  574 + <output name="feat.probleemgeval" />
  575 + </condition>
  576 +
  577 + <condition
  578 + value="001,011,021,091,101,111,191,201,211,221,231,241,251,261,271,281,291,301,311,321,391,411,421,431,441,451,471,481,491,501,511,521,531,541,551,561,591,601,611,621,631,641,651,691,701,791,801,811,821,831,841,851,861,871,881,891,901,911,991">
  579 + <output name="feat.form" value="-e" />
  580 + </condition>
  581 + <condition
  582 + value="002,012,022,092,102,112,192,202,212,222,232,242,252,262,272,282,292,302,312,322,392,412,422,432,442,452,472,482,492,502,512,522,532,542,552,562,592,602,612,622,632,642,652,692,702,792,802,812,822,832,842,852,862,872,882,892,902,912,992">
  583 + <output name="feat.form" value="-s/-th" />
  584 + </condition>
  585 + <condition
  586 + value="003,013,023,093,103,113,193,203,213,223,233,243,253,263,273,283,293,303,313,323,393,413,423,433,443,453,473,483,493,503,513,523,533,543,553,563,593,603,613,623,633,643,653,693,703,793,803,813,823,833,843,853,863,873,883,893,903,913,993">
  587 + <output name="feat.form" value="-t" />
  588 + </condition>
  589 + <condition
  590 + value="004,014,024,094,104,114,194,204,214,224,234,244,254,264,274,284,294,304,314,324,394,414,424,434,444,454,474,484,494,504,514,524,534,544,554,564,594,604,614,624,634,644,654,694,704,794,804,814,824,834,844,854,864,874,884,894,904,914,994">
  591 + <output name="feat.form" value="-n" />
  592 + </condition>
  593 + <condition
  594 + value="005,015,025,095,105,115,195,205,215,225,235,245,255,265,275,285,295,305,315,325,395,415,425,435,445,455,475,485,495,505,515,525,535,545,555,565,595,605,615,625,635,645,655,695,705,795,805,815,825,835,845,855,865,875,885,895,905,915,995">
  595 + <output name="feat.form" value="-r/-re" />
  596 + </condition>
  597 + <condition
  598 + value="006,016,026,096,106,116,196,206,216,226,236,246,256,266,276,286,296,306,316,326,396,416,426,436,446,456,476,486,496,506,516,526,536,546,556,566,596,606,616,626,636,646,656,696,706,796,806,816,826,836,846,856,866,876,886,896,906,916,996">
  599 + <output name="feat.form" value="-a" />
  600 + </condition>
  601 + <condition value="009,019,029,099">
  602 + <output name="feat.form" value="unclear" />
  603 + </condition>
  604 +
  605 + </function>
  606 + </functions>
  607 +
  608 + </parser>
  609 + <!-- END CONFIGURATION MTAS FOLIA PARSER -->
  610 +
  611 +
  612 +</mtas>
0 613 \ No newline at end of file
... ...
conf/parser/mtas/elan_mks.xml
... ... @@ -17,9 +17,10 @@
17 17 <!-- START CONFIGURATION MTAS FOLIA PARSER -->
18 18 <parser name="mtas.analysis.parser.MtasElanParser">
19 19  
20   - <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
  20 + <!-- START GENERAL SETTINGS MTAS PARSER -->
21 21 <autorepair value="true" />
22   - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS PARSER -->
23 24  
24 25 <!-- START REFERENCES -->
25 26 <references>
... ...
conf/parser/mtas/folia_dbnl.xml
... ... @@ -19,7 +19,8 @@
19 19  
20 20 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
21 21 <autorepair value="true" />
22   - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
23 24  
24 25 <!-- START REFERENCES -->
25 26 <references>
... ...
conf/parser/mtas/folia_ddd.xml 0 โ†’ 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<mtas>
  3 +
  4 + <!-- START MTAS INDEX CONFIGURATION -->
  5 + <index>
  6 + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
  7 + <payload index="false" />
  8 + <offset index="false" />
  9 + <realoffset index="false" />
  10 + <parent index="true" />
  11 + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
  12 + </index>
  13 + <!-- END MTAS INDEX CONFIGURATION -->
  14 +
  15 +
  16 +
  17 + <!-- START CONFIGURATION MTAS FOLIA PARSER -->
  18 + <parser name="mtas.analysis.parser.MtasFoliaParser">
  19 +
  20 + <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
  21 + <autorepair value="true" />
  22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  24 +
  25 + <!-- START REFERENCES -->
  26 + <references>
  27 + <reference name="wref" ref="id" />
  28 + </references>
  29 + <!-- END REFERENCES -->
  30 +
  31 + <!-- START MAPPINGS -->
  32 + <mappings>
  33 +
  34 + <!-- START WORDS -->
  35 + <mapping type="word" name="w">
  36 + </mapping>
  37 + <mapping type="word" name="w">
  38 + <token type="string" offset="false" realoffset="false" parent="false">
  39 + <pre>
  40 + <item type="name" />
  41 + </pre>
  42 + <post>
  43 + <item type="attribute" name="class" />
  44 + </post>
  45 + </token>
  46 + <condition>
  47 + <item type="attribute" name="class" />
  48 + <item type="attribute" name="class" not="true" condition="WORD" />
  49 + </condition>
  50 + </mapping>
  51 + <!-- END WORDS -->
  52 +
  53 + <!-- START WORD ANNOTATIONS -->
  54 + <mapping type="wordAnnotation" name="t">
  55 + <token type="string" offset="false">
  56 + <pre>
  57 + <item type="name" />
  58 + </pre>
  59 + <post>
  60 + <item type="text" />
  61 + </post>
  62 + </token>
  63 + <token type="string" offset="false" realoffset="false" parent="false">
  64 + <pre>
  65 + <item type="name" />
  66 + <item type="string" value="_lc" />
  67 + </pre>
  68 + <post>
  69 + <item type="text" filter="ascii,lowercase" />
  70 + </post>
  71 + </token>
  72 + <condition>
  73 + <item type="ancestor" number="0" />
  74 + <item type="ancestorWord" number="1" />
  75 + <item type="unknownAncestor" number="0" />
  76 + </condition>
  77 + </mapping>
  78 + <!-- END WORD ANNOTATIONS -->
  79 +
  80 + <!-- START RELATIONS -->
  81 + <!-- END RELATIONS -->
  82 +
  83 + <!-- START GROUPS -->
  84 + <mapping type="group" name="s">
  85 + <token type="string" offset="false">
  86 + <pre>
  87 + <item type="name" />
  88 + </pre>
  89 + <post>
  90 + <item type="attribute" name="class" />
  91 + </post>
  92 + </token>
  93 + </mapping>
  94 + <mapping type="group" name="p">
  95 + <token type="string" offset="false">
  96 + <pre>
  97 + <item type="name" />
  98 + </pre>
  99 + <post>
  100 + <item type="attribute" name="class" />
  101 + </post>
  102 + </token>
  103 + </mapping>
  104 + <mapping type="group" name="div">
  105 + <token type="string" offset="false">
  106 + <pre>
  107 + <item type="name" />
  108 + </pre>
  109 + <post>
  110 + <item type="attribute" name="class" />
  111 + </post>
  112 + </token>
  113 + </mapping>
  114 + <mapping type="group" name="head">
  115 + <token type="string" offset="false">
  116 + <pre>
  117 + <item type="name" />
  118 + </pre>
  119 + <post>
  120 + <item type="attribute" name="class" />
  121 + </post>
  122 + </token>
  123 + </mapping>
  124 + <!-- END GROUPS -->
  125 +
  126 + <!-- START GROUP ANNOTATIONS -->
  127 + <mapping type="groupAnnotation" name="lang">
  128 + <token type="string" offset="false" realoffset="false" parent="false">
  129 + <pre>
  130 + <item type="name" />
  131 + </pre>
  132 + <post>
  133 + <item type="attribute" name="class" />
  134 + </post>
  135 + </token>
  136 + </mapping>
  137 + <!-- END GROUP ANNOTATIONS -->
  138 +
  139 + </mappings>
  140 + <!-- END MAPPINGS -->
  141 +
  142 + </parser>
  143 + <!-- END CONFIGURATION MTAS FOLIA PARSER -->
  144 +
  145 +
  146 +</mtas>
0 147 \ No newline at end of file
... ...
conf/parser/mtas/folia_edbo.xml
... ... @@ -17,6 +17,7 @@
17 17  
18 18 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
19 19 <autorepair value="true" />
  20 + <makeunique value="true" />
20 21 <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
21 22  
22 23 <!-- START REFERENCES -->
... ... @@ -72,30 +73,6 @@
72 73 <item type="unknownAncestor" number="0" />
73 74 </condition>
74 75 </mapping>
75   - <mapping type="wordAnnotation" name="aref">
76   - <token type="string" offset="false">
77   - <pre>
78   - <item type="string" value="translated.t" />
79   - </pre>
80   - <post>
81   - <item type="attribute" name="t" />
82   - </post>
83   - </token>
84   - <token type="string" offset="false" realoffset="false" parent="false">
85   - <pre>
86   - <item type="string" value="translated.t" />
87   - <item type="string" value="_lc" />
88   - </pre>
89   - <post>
90   - <item type="attribute" name="t" filter="ascii,lowercase" />
91   - </post>
92   - </token>
93   - <condition>
94   - <item type="ancestor" number="0" />
95   - <item type="ancestorWord" number="1" />
96   - <item type="unknownAncestor" number="1" />
97   - </condition>
98   - </mapping>
99 76 <mapping type="wordAnnotation" name="lemma">
100 77 <token type="string" offset="false" realoffset="false" parent="false">
101 78 <pre>
... ... @@ -109,24 +86,6 @@
109 86 <item type="attribute" name="class" />
110 87 <item type="ancestor" number="0" />
111 88 <item type="unknownAncestor" number="0" />
112   - <item type="attribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mblem-nl" />
113   - </condition>
114   - </mapping>
115   - <mapping type="wordAnnotation" name="lemma">
116   - <token type="string" offset="false" realoffset="false" parent="false">
117   - <pre>
118   - <item type="string" value="translated." />
119   - <item type="name" />
120   - </pre>
121   - <post>
122   - <item type="attribute" name="class" />
123   - </post>
124   - </token>
125   - <condition>
126   - <item type="attribute" name="class" />
127   - <item type="ancestor" number="0" />
128   - <item type="unknownAncestor" number="1" />
129   - <item type="attribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mblem-nl" />
130 89 </condition>
131 90 </mapping>
132 91 <mapping type="wordAnnotation" name="morphology">
... ... @@ -166,54 +125,11 @@
166 125 <item type="ancestor" number="0" />
167 126 <item type="unknownAncestor" number="0" />
168 127 <item type="attribute" name="class" />
169   - <item type="attribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />
170   - </condition>
171   - </mapping>
172   - <mapping type="wordAnnotation" name="pos">
173   - <token type="string" offset="false" realoffset="false" parent="false">
174   - <pre>
175   - <item type="string" value="translated." />
176   - <item type="name" />
177   - </pre>
178   - <post>
179   - <item type="attribute" name="head" />
180   - </post>
181   - <payload>
182   - <item type="attribute" name="confidence" />
183   - </payload>
184   - </token>
185   - <condition>
186   - <item type="ancestor" number="0" />
187   - <item type="unknownAncestor" number="1" />
188   - <item type="attribute" name="class" />
189   - <item type="attribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />
190   - </condition>
191   - </mapping>
192   - <mapping type="wordAnnotation" name="feat">
193   - <token type="string" offset="false" realoffset="false" parent="false">
194   - <pre>
195   - <item type="name" />
196   - <item type="attribute" name="subset" prefix="." />
197   - </pre>
198   - <post>
199   - <item type="attribute" name="class" />
200   - </post>
201   - <payload>
202   - <item type="ancestorAttribute" distance="0" name="confidence" />
203   - </payload>
204   - </token>
205   - <condition>
206   - <item type="ancestor" number="1" />
207   - <item type="unknownAncestor" number="0" />
208   - <item type="attribute" name="class" />
209   - <item type="attribute" name="subset" />
210   - <item type="ancestorAttribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />
211 128 </condition>
212 129 </mapping>
213 130 <mapping type="wordAnnotation" name="feat">
214 131 <token type="string" offset="false" realoffset="false" parent="false">
215 132 <pre>
216   - <item type="string" value="translated." />
217 133 <item type="name" />
218 134 <item type="attribute" name="subset" prefix="." />
219 135 </pre>
... ... @@ -229,7 +145,6 @@
229 145 <item type="unknownAncestor" number="0" />
230 146 <item type="attribute" name="class" />
231 147 <item type="attribute" name="subset" />
232   - <item type="ancestorAttribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />
233 148 </condition>
234 149 </mapping>
235 150 <!-- END WORD ANNOTATIONS -->
... ...
conf/parser/mtas/folia_mimore.xml
... ... @@ -18,7 +18,8 @@
18 18  
19 19 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
20 20 <autorepair value="false" />
21   - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  21 + <makeunique value="true" />
  22 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
22 23  
23 24 <!-- START REFERENCES -->
24 25 <references>
... ...
conf/parser/mtas/folia_mtas.xml
... ... @@ -19,7 +19,8 @@
19 19  
20 20 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
21 21 <autorepair value="true" />
22   - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
23 24  
24 25 <!-- START REFERENCES -->
25 26 <references>
... ...
conf/parser/mtas/folia_oeaw.xml 0 โ†’ 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<mtas>
  3 +
  4 + <!-- START MTAS INDEX CONFIGURATION -->
  5 + <index>
  6 + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
  7 + <payload index="false" />
  8 + <offset index="false" />
  9 + <realoffset index="false" />
  10 + <parent index="true" />
  11 + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
  12 + </index>
  13 + <!-- END MTAS INDEX CONFIGURATION -->
  14 +
  15 +
  16 +
  17 + <!-- START CONFIGURATION MTAS FOLIA PARSER -->
  18 + <parser name="mtas.analysis.parser.MtasFoliaParser">
  19 +
  20 + <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
  21 + <autorepair value="true" />
  22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  24 +
  25 + <!-- START REFERENCES -->
  26 + <references>
  27 + <reference name="wref" ref="id" />
  28 + </references>
  29 + <!-- END REFERENCES -->
  30 +
  31 + <!-- START MAPPINGS -->
  32 + <mappings>
  33 +
  34 + <!-- START WORDS -->
  35 + <mapping type="word" name="w">
  36 + </mapping>
  37 + <mapping type="word" name="w">
  38 + <token type="string" offset="false" realoffset="false" parent="false">
  39 + <pre>
  40 + <item type="name" />
  41 + </pre>
  42 + <post>
  43 + <item type="attribute" name="class" />
  44 + </post>
  45 + </token>
  46 + <condition>
  47 + <item type="attribute" name="class" />
  48 + <item type="attribute" name="class" not="true" condition="WORD" />
  49 + </condition>
  50 + </mapping>
  51 + <!-- END WORDS -->
  52 +
  53 + <!-- START WORD ANNOTATIONS -->
  54 + <mapping type="wordAnnotation" name="t">
  55 + <token type="string" offset="false">
  56 + <pre>
  57 + <item type="name" />
  58 + </pre>
  59 + <post>
  60 + <item type="text" />
  61 + </post>
  62 + </token>
  63 + <token type="string" offset="false" realoffset="false" parent="false">
  64 + <pre>
  65 + <item type="name" />
  66 + <item type="string" value="_lc" />
  67 + </pre>
  68 + <post>
  69 + <item type="text" filter="ascii,lowercase" />
  70 + </post>
  71 + </token>
  72 + <condition>
  73 + <item type="ancestor" number="0" />
  74 + <item type="ancestorWord" number="1" />
  75 + <item type="unknownAncestor" number="0" />
  76 + </condition>
  77 + </mapping>
  78 + <mapping type="wordAnnotation" name="lemma">
  79 + <token type="string" offset="false" realoffset="false" parent="false">
  80 + <pre>
  81 + <item type="name" />
  82 + </pre>
  83 + <post>
  84 + <item type="attribute" name="class" />
  85 + </post>
  86 + </token>
  87 + <condition>
  88 + <item type="attribute" name="class" />
  89 + <item type="ancestor" number="0" />
  90 + <item type="unknownAncestor" number="0" />
  91 + </condition>
  92 + </mapping>
  93 + <mapping type="wordAnnotation" name="pos">
  94 + <token type="string" offset="false" realoffset="false" parent="false">
  95 + <pre>
  96 + <item type="attribute" name="set" />
  97 + </pre>
  98 + <post>
  99 + <item type="attribute" name="head" />
  100 + </post>
  101 + </token>
  102 + <condition>
  103 + <item type="ancestor" number="0" />
  104 + <item type="unknownAncestor" number="0" />
  105 + <item type="attribute" name="class" />
  106 + <item type="attribute" name="set" />
  107 + </condition>
  108 + </mapping>
  109 + <mapping type="wordAnnotation" name="feat">
  110 + <token type="string" offset="false" realoffset="false" parent="false">
  111 + <pre>
  112 + <item type="name" />
  113 + <item type="attribute" name="subset" prefix="." />
  114 + </pre>
  115 + <post>
  116 + <item type="attribute" name="class" />
  117 + </post>
  118 + </token>
  119 + <condition>
  120 + <item type="ancestor" number="1" />
  121 + <item type="unknownAncestor" number="0" />
  122 + <item type="attribute" name="class" />
  123 + <item type="attribute" name="subset" />
  124 + </condition>
  125 + </mapping>
  126 + <!-- END WORD ANNOTATIONS -->
  127 +
  128 + <!-- START RELATIONS -->
  129 + <mapping type="relation" name="entities">
  130 + </mapping>
  131 + <mapping type="relation" name="entity">
  132 + <token type="string" offset="false" realoffset="false" parent="false">
  133 + <pre>
  134 + <item type="name" />
  135 + </pre>
  136 + <post>
  137 + <item type="attribute" name="class" />
  138 + </post>
  139 + </token>
  140 + <condition>
  141 + <item type="ancestor" number="1" />
  142 + <item type="ancestorName" condition="entities" />
  143 + </condition>
  144 + </mapping>
  145 + <!-- END RELATIONS -->
  146 +
  147 + <!-- START RELATION ANNOTATIONS -->
  148 + <mapping type="relationAnnotation" name="feat">
  149 + <token type="string" offset="false" realoffset="false">
  150 + <pre>
  151 + <item type="ancestorRelationName" />
  152 + <item type="name" prefix="." />
  153 + <item type="attribute" name="subset" prefix="." />
  154 + </pre>
  155 + <post>
  156 + <item type="attribute" name="class" />
  157 + </post>
  158 + </token>
  159 + </mapping>
  160 + <!-- END RELATION ANNOTATIONS -->
  161 +
  162 + <!-- START GROUPS -->
  163 + <mapping type="group" name="s">
  164 + <token type="string" offset="false">
  165 + <pre>
  166 + <item type="name" />
  167 + </pre>
  168 + <post>
  169 + <item type="attribute" name="class" />
  170 + </post>
  171 + </token>
  172 + </mapping>
  173 + <mapping type="group" name="p">
  174 + <token type="string" offset="false">
  175 + <pre>
  176 + <item type="name" />
  177 + </pre>
  178 + <post>
  179 + <item type="attribute" name="class" />
  180 + </post>
  181 + </token>
  182 + </mapping>
  183 + <mapping type="group" name="div">
  184 + <token type="string" offset="false">
  185 + <pre>
  186 + <item type="name" />
  187 + </pre>
  188 + <post>
  189 + <item type="attribute" name="class" />
  190 + </post>
  191 + </token>
  192 + </mapping>
  193 + <!-- END GROUPS -->
  194 +
  195 + <!-- START GROUP ANNOTATIONS -->
  196 + <!-- END GROUP ANNOTATIONS -->
  197 +
  198 + </mappings>
  199 + <!-- END MAPPINGS -->
  200 +
  201 + </parser>
  202 + <!-- END CONFIGURATION MTAS FOLIA PARSER -->
  203 +
  204 +
  205 +</mtas>
0 206 \ No newline at end of file
... ...
conf/parser/mtas/folia_sonar.xml
... ... @@ -18,6 +18,7 @@
18 18  
19 19 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
20 20 <autorepair value="true" />
  21 + <makeunique value="true" />
21 22 <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
22 23  
23 24 <!-- START REFERENCES -->
... ...
conf/parser/mtas/folia_test.xml
... ... @@ -19,7 +19,8 @@
19 19  
20 20 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
21 21 <autorepair value="true" />
22   - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
23 24  
24 25 <!-- START REFERENCES -->
25 26 <references>
... ...
conf/parser/mtas/sketch_acdh.xml
... ... @@ -18,7 +18,8 @@
18 18 <parser name="mtas.analysis.parser.MtasSketchParser">
19 19 <!-- START GENERAL SETTINGS MTAS SKETCH PARSER -->
20 20 <autorepair value="true" />
21   - <!-- END GENERAL SETTINGS MTAS SKETCH PARSER -->
  21 + <makeunique value="true" />
  22 + <!-- END GENERAL SETTINGS MTAS SKETCH PARSER -->
22 23  
23 24 <mappings>
24 25  
... ...
conf/parser/mtas/tei_test.xml
... ... @@ -19,7 +19,8 @@
19 19  
20 20 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
21 21 <autorepair value="true" />
22   - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
23 24  
24 25 <!-- START REFERENCES -->
25 26 <references>
... ...
conf/parser/mtasSource.xml 0 โ†’ 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<mtas>
  3 + <configurations type="mtas.analysis.util.MtasTokenizerFactory">
  4 + <configuration name="EDBO" file="mtasSource/folia_edbo.xml" />
  5 + </configurations>
  6 + <configurations type="mtas.analysis.util.MtasCharFilterFactory">
  7 + <configuration name="EDBO" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
  8 + </configurations>
  9 +</mtas>
... ...
conf/parser/mtasSource/folia_edbo.xml 0 โ†’ 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<mtas>
  3 +
  4 + <!-- START MTAS INDEX CONFIGURATION -->
  5 + <index>
  6 + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
  7 + <payload index="false" />
  8 + <offset index="false" />
  9 + <realoffset index="false" />
  10 + <parent index="true" />
  11 + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
  12 + </index>
  13 + <!-- END MTAS INDEX CONFIGURATION -->
  14 +
  15 + <!-- START CONFIGURATION MTAS FOLIA PARSER -->
  16 + <parser name="mtas.analysis.parser.MtasFoliaParser">
  17 +
  18 + <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
  19 + <autorepair value="true" />
  20 + <makeunique value="true" />
  21 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  22 +
  23 + <!-- START REFERENCES -->
  24 + <references>
  25 + </references>
  26 + <!-- END REFERENCES -->
  27 +
  28 + <!-- START MAPPINGS -->
  29 + <mappings>
  30 +
  31 + <!-- START WORDS -->
  32 + <mapping type="word" name="str">
  33 + </mapping>
  34 + <!-- END WORDS -->
  35 +
  36 + <!-- START WORD ANNOTATIONS -->
  37 + <mapping type="wordAnnotation" name="t">
  38 + <token type="string" offset="false">
  39 + <pre>
  40 + <item type="name" />
  41 + </pre>
  42 + <post>
  43 + <item type="text" />
  44 + </post>
  45 + </token>
  46 + <token type="string" offset="false" realoffset="false" parent="false">
  47 + <pre>
  48 + <item type="name" />
  49 + <item type="string" value="_lc" />
  50 + </pre>
  51 + <post>
  52 + <item type="text" filter="ascii,lowercase" />
  53 + </post>
  54 + </token>
  55 + <condition>
  56 + <item type="ancestor" number="0" />
  57 + <item type="ancestorWord" number="1" />
  58 + <item type="unknownAncestor" number="0" />
  59 + <item type="attribute" name="class" condition="Ticcl"/>
  60 + </condition>
  61 + </mapping>
  62 + <mapping type="wordAnnotation" name="correction">
  63 + </mapping>
  64 + <mapping type="wordAnnotation" name="new">
  65 + </mapping>
  66 + <mapping type="wordAnnotation" name="original">
  67 + </mapping>
  68 + <mapping type="wordAnnotation" name="suggestion">
  69 + </mapping>
  70 + <mapping type="wordAnnotation" name="t">
  71 + <token type="string" offset="false">
  72 + <pre>
  73 + <item type="name" />
  74 + </pre>
  75 + <post>
  76 + <item type="text" />
  77 + </post>
  78 + </token>
  79 + <token type="string" offset="false" realoffset="false" parent="false">
  80 + <pre>
  81 + <item type="name" />
  82 + <item type="string" value="_lc" />
  83 + </pre>
  84 + <post>
  85 + <item type="text" filter="ascii,lowercase" />
  86 + </post>
  87 + </token>
  88 + <condition>
  89 + <item type="ancestor" number="2" />
  90 + <item type="ancestorName" condition="new" />
  91 + <item type="unknownAncestor" number="0" />
  92 + <item type="attribute" name="class" condition="Ticcl"/>
  93 + </condition>
  94 + </mapping>
  95 + <mapping type="wordAnnotation" name="t">
  96 + <token type="string" offset="false">
  97 + <pre>
  98 + <item type="name" />
  99 + <item type="ancestorName" prefix="."/>
  100 + </pre>
  101 + <post>
  102 + <item type="text" />
  103 + </post>
  104 + </token>
  105 + <token type="string" offset="false" realoffset="false" parent="false">
  106 + <pre>
  107 + <item type="name" />
  108 + <item type="string" value="_lc" />
  109 + <item type="ancestorName" prefix="."/>
  110 + </pre>
  111 + <post>
  112 + <item type="text" filter="ascii,lowercase" />
  113 + </post>
  114 + </token>
  115 + <condition>
  116 + <item type="ancestor" number="2" />
  117 + <item type="ancestorName" condition="original" />
  118 + <item type="unknownAncestor" number="0" />
  119 + </condition>
  120 + </mapping>
  121 + <mapping type="wordAnnotation" name="t">
  122 + <token type="string" offset="false">
  123 + <pre>
  124 + <item type="name" />
  125 + <item type="ancestorName" prefix="."/>
  126 + </pre>
  127 + <post>
  128 + <item type="text" />
  129 + </post>
  130 + </token>
  131 + <token type="string" offset="false" realoffset="false" parent="false">
  132 + <pre>
  133 + <item type="name" />
  134 + <item type="string" value="_lc" />
  135 + <item type="ancestorName" prefix="."/>
  136 + </pre>
  137 + <post>
  138 + <item type="text" filter="ascii,lowercase" />
  139 + </post>
  140 + </token>
  141 + <condition>
  142 + <item type="ancestor" number="2" />
  143 + <item type="ancestorName" condition="suggestion" />
  144 + <item type="unknownAncestor" number="0" />
  145 + </condition>
  146 + </mapping>
  147 + <!-- END WORD ANNOTATIONS -->
  148 +
  149 + <!-- START RELATIONS -->
  150 + <!-- END RELATIONS -->
  151 +
  152 + <!-- START GROUPS -->
  153 + <mapping type="group" name="p">
  154 + <token type="string" offset="false">
  155 + <pre>
  156 + <item type="name" />
  157 + </pre>
  158 + <post>
  159 + <item type="attribute" name="class" />
  160 + </post>
  161 + </token>
  162 + </mapping>
  163 + <mapping type="group" name="div">
  164 + <token type="string" offset="false">
  165 + <pre>
  166 + <item type="name" />
  167 + </pre>
  168 + <post>
  169 + <item type="attribute" name="class" />
  170 + </post>
  171 + </token>
  172 + </mapping>
  173 + <mapping type="group" name="head">
  174 + <token type="string" offset="false">
  175 + <pre>
  176 + <item type="name" />
  177 + </pre>
  178 + <post>
  179 + <item type="attribute" name="class" />
  180 + </post>
  181 + </token>
  182 + </mapping>
  183 + <!-- END GROUPS -->
  184 +
  185 + <!-- START GROUP ANNOTATIONS -->
  186 + <mapping type="groupAnnotation" name="lang">
  187 + <token type="string" offset="false" realoffset="false" parent="false">
  188 + <pre>
  189 + <item type="name" />
  190 + </pre>
  191 + <post>
  192 + <item type="attribute" name="class" />
  193 + </post>
  194 + </token>
  195 + </mapping>
  196 + <!-- END GROUP ANNOTATIONS -->
  197 +
  198 + </mappings>
  199 + <!-- END MAPPINGS -->
  200 +
  201 + </parser>
  202 + <!-- END CONFIGURATION MTAS FOLIA PARSER -->
  203 +
  204 +</mtas>
0 205 \ No newline at end of file
... ...
conf/solr/schemaNederlab.xml
... ... @@ -255,8 +255,8 @@
255 255 <field name="NLContent_folia_available" type="nederlab_boolean"
256 256 required="false" multiValued="false" indexed="true" stored="true" />
257 257 <field name="NLContent_mtas" type="mtas_text" indexed="true"
258   - stored="true" />
259   - <field name="NLContent_mtas_error" type="nederlab_string"
  258 + stored="true" />
  259 + <field name="NLContent_mtas_error" type="nederlab_string"
260 260 indexed="true" stored="true" />
261 261 <field name="NLContent_mtas_numberOfTokens" type="nederlab_int"
262 262 indexed="true" stored="true" />
... ... @@ -264,7 +264,17 @@
264 264 indexed="true" stored="true" />
265 265 <field name="NLContent_mtas_size" type="nederlab_int" indexed="true"
266 266 stored="true" />
267   - <!-- Combined Field Metadata -->
  267 + <field name="NLContent_mtasSource" type="mtasSource_text" indexed="true"
  268 + stored="true" />
  269 + <field name="NLContent_mtasSource_error" type="nederlab_string"
  270 + indexed="true" stored="true" />
  271 + <field name="NLContent_mtasSource_numberOfTokens" type="nederlab_int"
  272 + indexed="true" stored="true" />
  273 + <field name="NLContent_mtasSource_numberOfPositions" type="nederlab_int"
  274 + indexed="true" stored="true" />
  275 + <field name="NLContent_mtasSource_size" type="nederlab_int" indexed="true"
  276 + stored="true" />
  277 + <!-- Combined Field Metadata -->
268 278 <field name="NLMetadata" type="nederlab_text" required="false"
269 279 multiValued="true" indexed="true" stored="false" />
270 280 <copyField source="NLCore_NLIdentification_nederlabID" dest="NLMetadata" />
... ... @@ -420,5 +430,27 @@
420 430 prefix="t" />
421 431 </analyzer>
422 432 </fieldType>
  433 +
  434 + <fieldType name="mtasSource_text_example_config" class="solr.TextField"
  435 + postingsFormat="MtasCodec">
  436 + <analyzer type="index">
  437 + <charFilter class="mtas.analysis.util.MtasCharFilterFactory"
  438 + config="mtasSource.xml" />
  439 + <tokenizer class="mtas.analysis.util.MtasTokenizerFactory"
  440 + config="mtasSource.xml" />
  441 + </analyzer>
  442 + </fieldType>
  443 +
  444 + <fieldType name="mtasSource_text" class="mtas.solr.schema.MtasPreAnalyzedField"
  445 + followIndexAnalyzer="mtasSource_text_example_config"
  446 + configurationFromField="NLCore_NLAdministrative_sourceCollection" setNumberOfTokens="NLContent_mtasSource_numberOfTokens"
  447 + setNumberOfPositions="NLContent_mtasSource_numberOfPositions" setSize="NLContent_mtasSource_size"
  448 + setError="NLContent_mtasSource_error" postingsFormat="MtasCodec">
  449 + <analyzer type="query">
  450 + <tokenizer class="solr.WhitespaceTokenizerFactory" />
  451 + <filter class="mtas.analysis.util.MtasPrefixTokenFilterFactory"
  452 + prefix="t" />
  453 + </analyzer>
  454 + </fieldType>
423 455  
424 456 </schema>
... ...
conf/solr/schemaOeaw.xml 0 โ†’ 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +
  3 +<schema name="nederlab" version="1.5">
  4 +
  5 + <field name="_version_" type="nederlab_long" indexed="true"
  6 + stored="true" />
  7 +
  8 + <!-- component Profile -->
  9 + <field name="NLProfile_name" type="nederlab_string" required="true"
  10 + multiValued="false" indexed="true" stored="true" />
  11 +
  12 + <!-- component ResourceProxy -->
  13 + <field name="ResourceProxy_resourceRef" type="nederlab_string"
  14 + required="false" multiValued="true" indexed="true" stored="true" />
  15 + <dynamicField name="ResourceProxy_resourceRef_mimeType_*"
  16 + type="nederlab_string" required="false" multiValued="true" indexed="true"
  17 + stored="true" />
  18 +
  19 + <!-- component NLCore -->
  20 + <field name="NLCore_NLIdentification_nederlabID" type="nederlab_uuid"
  21 + required="true" multiValued="false" indexed="true" stored="true" />
  22 + <field name="NLCore_NLIdentification_editorialCode" type="nederlab_string"
  23 + required="false" multiValued="false" indexed="true" stored="true" />
  24 + <field name="NLCore_NLIdentification_versionID" type="nederlab_string"
  25 + required="true" multiValued="false" indexed="true" stored="true" />
  26 + <field name="NLCore_NLIdentification_sourceRef" type="nederlab_string"
  27 + required="false" multiValued="true" indexed="true" stored="true" />
  28 + <field name="NLCore_NLIdentification_sourceUrl" type="nederlab_string"
  29 + required="false" multiValued="true" indexed="true" stored="true" />
  30 + <field name="NLCore_NLIdentification_sourceRefUrl_serialized"
  31 + type="nederlab_string" required="false" multiValued="true" indexed="false"
  32 + stored="true" />
  33 + <field name="NLCore_NLAdministrative_ingestTime" type="nederlab_date"
  34 + required="true" multiValued="false" indexed="true" stored="true" />
  35 + <field name="NLCore_NLAdministrative_expirationTime" type="nederlab_date"
  36 + required="false" multiValued="false" indexed="true" stored="true" />
  37 + <field name="NLCore_NLAdministrative_lastEditedBy" type="nederlab_string"
  38 + required="false" multiValued="false" indexed="true" stored="true" />
  39 + <field name="NLCore_NLAdministrative_modificationTime" type="nederlab_date"
  40 + required="false" multiValued="false" indexed="true" stored="true" />
  41 + <field name="NLCore_NLAdministrative_editorialNote" type="nederlab_text"
  42 + required="false" multiValued="true" indexed="true" stored="true" />
  43 + <field name="NLCore_NLAdministrative_sourceCollection" type="nederlab_string"
  44 + required="false" multiValued="false" indexed="true" stored="true" />
  45 + <field name="NLCore_NLAdministrative_isThesaurusElement" type="nederlab_boolean"
  46 + required="true" multiValued="false" indexed="true" stored="true" />
  47 + <field name="NLCore_NLExternalReference_organizationName" type="nederlab_text"
  48 + required="false" multiValued="true" indexed="true" stored="true" />
  49 + <field name="NLCore_NLExternalReference_collectionName" type="nederlab_string"
  50 + required="false" multiValued="true" indexed="true" stored="true" />
  51 + <field name="NLCore_NLExternalReference_resourceRef" type="nederlab_string"
  52 + required="false" multiValued="true" indexed="true" stored="true" />
  53 + <field name="NLCore_NLExternalReference_serialized" type="nederlab_string"
  54 + required="false" multiValued="true" indexed="false" stored="true" />
  55 +
  56 + <!-- component NLTitle -->
  57 + <field name="NLTitle_title" type="nederlab_text" required="false"
  58 + multiValued="false" indexed="true" stored="true" />
  59 + <field name="NLTitle_subtitle" type="nederlab_text" required="false"
  60 + multiValued="false" indexed="true" stored="true" />
  61 + <field name="NLTitle_genre" type="nederlab_string" required="false"
  62 + multiValued="true" indexed="true" stored="true" />
  63 + <field name="NLTitle_category" type="nederlab_string" required="false"
  64 + multiValued="true" indexed="true" stored="true" />
  65 + <field name="NLTitle_yearOfPublicationMin" type="nederlab_int"
  66 + required="false" multiValued="false" indexed="true" stored="true" />
  67 + <field name="NLTitle_yearOfPublicationMax" type="nederlab_int"
  68 + required="false" multiValued="false" indexed="true" stored="true" />
  69 + <field name="NLTitle_yearOfPublicationApprox" type="nederlab_boolean"
  70 + required="false" multiValued="false" indexed="true" stored="true" />
  71 + <field name="NLTitle_yearOfPublicationLabel" type="nederlab_text"
  72 + required="false" multiValued="false" indexed="true" stored="true" />
  73 + <field name="NLTitle_edition" type="nederlab_string" required="false"
  74 + multiValued="false" indexed="true" stored="true" />
  75 + <field name="NLTitle_inNederlabAs" type="nederlab_uuid" required="false"
  76 + multiValued="false" indexed="true" stored="true" />
  77 + <field name="NLTitle_NLPublicationPlace_placeOfPublication" type="nederlab_string"
  78 + required="false" multiValued="true" indexed="true" stored="true" />
  79 + <field name="NLTitle_NLPublicationPlace_placeID" type="nederlab_string"
  80 + required="false" multiValued="true" indexed="true" stored="true" />
  81 + <field name="NLTitle_NLPublicationPlace_placeOfPublicationOriginal"
  82 + type="nederlab_text" required="false" multiValued="true" indexed="true"
  83 + stored="true" />
  84 + <field name="NLTitle_numberOfPages" type="nederlab_int" required="false"
  85 + multiValued="false" indexed="true" stored="true" />
  86 + <field name="NLTitle_numberOfWords" type="nederlab_int" required="false"
  87 + multiValued="false" indexed="true" stored="true" />
  88 + <field name="NLTitle_primaryLanguage" type="nederlab_string"
  89 + required="false" multiValued="false" indexed="true" stored="true" />
  90 + <field name="NLTitle_isTranslation" type="nederlab_boolean"
  91 + required="false" multiValued="false" indexed="true" stored="true" />
  92 + <field name="NLTitle_characterEncoding" type="nederlab_string"
  93 + required="false" multiValued="false" indexed="true" stored="true" />
  94 + <field name="NLTitle_codingStandard" type="nederlab_string"
  95 + required="false" multiValued="true" indexed="true" stored="true" />
  96 + <field name="NLTitle_textQuality" type="nederlab_text" required="false"
  97 + multiValued="false" indexed="true" stored="true" />
  98 + <field name="NLTitle_processingMethod" type="nederlab_text"
  99 + required="false" multiValued="false" indexed="true" stored="true" />
  100 + <field name="NLTitle_autopsyPerformed" type="nederlab_boolean"
  101 + required="false" multiValued="false" indexed="true" stored="true" />
  102 + <field name="NLTitle_NLPersonRef_personID" type="nederlab_uuid"
  103 + required="false" multiValued="true" indexed="true" stored="true" />
  104 + <field name="NLTitle_NLPersonRef_role" type="nederlab_string"
  105 + required="false" multiValued="true" indexed="true" stored="true" />
  106 + <dynamicField name="NLTitle_NLPersonRef_personID_role_*"
  107 + type="nederlab_uuid" required="false" multiValued="true" indexed="true"
  108 + stored="true" />
  109 + <field name="NLTitle_contains" type="nederlab_uuid" required="false"
  110 + multiValued="true" indexed="true" stored="true" />
  111 + <field name="NLTitle_seriesTitleID" type="nederlab_uuid"
  112 + required="false" multiValued="true" indexed="true" stored="true" />
  113 + <field name="NLTitle_seriesTitleID_parent" type="nederlab_uuid"
  114 + required="false" multiValued="false" indexed="true" stored="true" />
  115 + <field name="NLTitle_seriesTitleID_root" type="nederlab_uuid"
  116 + required="false" multiValued="false" indexed="true" stored="true" />
  117 +
  118 + <!-- component NLDependentTitle -->
  119 + <field name="NLDependentTitle_title" type="nederlab_text"
  120 + required="false" multiValued="false" indexed="true" stored="true" />
  121 + <field name="NLDependentTitle_subtitle" type="nederlab_text"
  122 + required="false" multiValued="false" indexed="true" stored="true" />
  123 + <field name="NLDependentTitle_primaryLanguage" type="nederlab_string"
  124 + required="false" multiValued="false" indexed="true" stored="true" />
  125 + <field name="NLDependentTitle_parentTitleID" type="nederlab_uuid"
  126 + required="false" multiValued="false" indexed="true" stored="true" />
  127 + <field name="NLDependentTitle_inNederlabAs" type="nederlab_uuid"
  128 + required="false" multiValued="false" indexed="true" stored="true" />
  129 + <field name="NLDependentTitle_NLPersonRef_personID" type="nederlab_uuid"
  130 + required="false" multiValued="true" indexed="true" stored="true" />
  131 + <field name="NLDependentTitle_NLPersonRef_role" type="nederlab_string"
  132 + required="false" multiValued="true" indexed="true" stored="true" />
  133 + <dynamicField name="NLDependentTitle_NLPersonRef_personID_role_*"
  134 + type="nederlab_uuid" required="false" multiValued="true" indexed="true"
  135 + stored="true" />
  136 + <field name="NLDependentTitle_startPage" type="nederlab_int"
  137 + required="false" multiValued="false" indexed="true" stored="true" />
  138 + <field name="NLDependentTitle_endPage" type="nederlab_int"
  139 + required="false" multiValued="false" indexed="true" stored="true" />
  140 +
  141 + <!-- component NLPerson -->
  142 + <field name="NLPerson_NLPersonName_nameId" type="nederlab_uuid"
  143 + required="false" multiValued="true" indexed="true" stored="true" />
  144 + <field name="NLPerson_NLPersonName_lastName" type="nederlab_text"
  145 + required="false" multiValued="true" indexed="true" stored="true" />
  146 + <field name="NLPerson_NLPersonName_firstName" type="nederlab_text"
  147 + required="false" multiValued="true" indexed="true" stored="true" />
  148 + <field name="NLPerson_NLPersonName_infixes" type="nederlab_text"
  149 + required="false" multiValued="true" indexed="true" stored="true" />
  150 + <field name="NLPerson_NLPersonName_firstNameFull" type="nederlab_text"
  151 + required="false" multiValued="true" indexed="true" stored="true" />
  152 + <field name="NLPerson_NLPersonName_fullName" type="nederlab_text"
  153 + required="false" multiValued="true" indexed="true" stored="true" />
  154 + <field name="NLPerson_NLPersonName_fullName_serialized" type="nederlab_string"
  155 + required="false" multiValued="true" indexed="false" stored="true" />
  156 + <field name="NLPerson_NLPersonName_preferredNameID" type="nederlab_uuid"
  157 + required="false" multiValued="false" indexed="true" stored="true" />
  158 + <field name="NLPerson_NLPersonName_preferredLastName" type="nederlab_string"
  159 + required="false" multiValued="false" indexed="true" stored="true" />
  160 + <field name="NLPerson_NLPersonName_preferredFirstName" type="nederlab_string"
  161 + required="false" multiValued="false" indexed="true" stored="true" />
  162 + <field name="NLPerson_NLPersonName_preferredFirstNameFull" type="nederlab_string"
  163 + required="false" multiValued="false" indexed="true" stored="true" />
  164 + <field name="NLPerson_NLPersonName_preferredInfixes" type="nederlab_string"
  165 + required="false" multiValued="false" indexed="true" stored="true" />
  166 + <field name="NLPerson_NLPersonName_preferredFullName" type="nederlab_text"
  167 + required="false" multiValued="false" indexed="true" stored="true" />
  168 + <field name="NLPerson_NLPersonName_preferredFullName_serialized"
  169 + type="nederlab_string" required="false" multiValued="false" indexed="false"
  170 + stored="true" />
  171 + <field name="NLPerson_dateOfBirthDayMonth" type="nederlab_text"
  172 + required="false" multiValued="false" indexed="true" stored="true" />
  173 + <field name="NLPerson_dateOfBirthMonth" type="nederlab_int"
  174 + required="false" multiValued="false" indexed="true" stored="true" />
  175 + <field name="NLPerson_dateOfBirthDay" type="nederlab_int"
  176 + required="false" multiValued="false" indexed="true" stored="true" />
  177 + <field name="NLPerson_yearOfBirthMin" type="nederlab_int"
  178 + required="false" multiValued="false" indexed="true" stored="true" />
  179 + <field name="NLPerson_yearOfBirthMax" type="nederlab_int"
  180 + required="false" multiValued="false" indexed="true" stored="true" />
  181 + <field name="NLPerson_yearOfBirthApprox" type="nederlab_boolean"
  182 + required="false" multiValued="false" indexed="true" stored="true" />
  183 + <field name="NLPerson_yearOfBirthLabel" type="nederlab_text"
  184 + required="false" multiValued="false" indexed="true" stored="true" />
  185 + <field name="NLPerson_placeOfBirth" type="nederlab_string"
  186 + required="false" multiValued="false" indexed="true" stored="true" />
  187 + <field name="NLPerson_placeOfBirthID" type="nederlab_string"
  188 + required="false" multiValued="false" indexed="true" stored="true" />
  189 + <field name="NLPerson_dateOfDeathDayMonth" type="nederlab_text"
  190 + required="false" multiValued="false" indexed="true" stored="true" />
  191 + <field name="NLPerson_dateOfDeathMonth" type="nederlab_int"
  192 + required="false" multiValued="false" indexed="true" stored="true" />
  193 + <field name="NLPerson_dateOfDeathDay" type="nederlab_int"
  194 + required="false" multiValued="false" indexed="true" stored="true" />
  195 + <field name="NLPerson_yearOfDeathMin" type="nederlab_int"
  196 + required="false" multiValued="false" indexed="true" stored="true" />
  197 + <field name="NLPerson_yearOfDeathMax" type="nederlab_int"
  198 + required="false" multiValued="false" indexed="true" stored="true" />
  199 + <field name="NLPerson_yearOfDeathApprox" type="nederlab_boolean"
  200 + required="false" multiValued="false" indexed="true" stored="true" />
  201 + <field name="NLPerson_yearOfDeathLabel" type="nederlab_text"
  202 + required="false" multiValued="false" indexed="true" stored="true" />
  203 + <field name="NLPerson_placeOfDeath" type="nederlab_string"
  204 + required="false" multiValued="false" indexed="true" stored="true" />
  205 + <field name="NLPerson_placeOfDeathID" type="nederlab_string"
  206 + required="false" multiValued="false" indexed="true" stored="true" />
  207 + <field name="NLPerson_gender" type="nederlab_string" required="false"
  208 + multiValued="false" indexed="true" stored="true" />
  209 + <field name="NLPerson_profession" type="nederlab_string"
  210 + required="false" multiValued="true" indexed="true" stored="true" />
  211 + <field name="NLPerson_education" type="nederlab_string" required="false"
  212 + multiValued="true" indexed="true" stored="true" />
  213 + <field name="NLPerson_inThesaurusAs" type="nederlab_uuid"
  214 + required="false" multiValued="false" indexed="true" stored="true" />
  215 +
  216 + <!-- component NLSeriesTitle -->
  217 + <field name="NLSeriesTitle_title" type="nederlab_text" required="false"
  218 + multiValued="false" indexed="true" stored="true" />
  219 + <field name="NLSeriesTitle_years" type="nederlab_text" required="false"
  220 + multiValued="false" indexed="true" stored="true" />
  221 + <field name="NLSeriesTitle_description" type="nederlab_text"
  222 + required="false" multiValued="false" indexed="true" stored="true" />
  223 + <field name="NLSeriesTitle_inNederlabAs" type="nederlab_uuid"
  224 + required="false" multiValued="false" indexed="true" stored="true" />
  225 + <field name="NLSeriesTitle_seriesTitleID" type="nederlab_uuid"
  226 + required="false" multiValued="true" indexed="true" stored="true" />
  227 + <field name="NLSeriesTitle_seriesTitleID_parent" type="nederlab_uuid"
  228 + required="false" multiValued="false" indexed="true" stored="true" />
  229 + <field name="NLSeriesTitle_seriesTitleID_root" type="nederlab_uuid"
  230 + required="false" multiValued="false" indexed="true" stored="true" />
  231 +
  232 + <!-- component NLCollectionSpecific -->
  233 + <dynamicField name="NLCollectionSpecific_*" type="nederlab_string"
  234 + required="false" multiValued="true" indexed="true" stored="true" />
  235 +
  236 + <!-- component NLContent old -->
  237 +
  238 + <field name="NLContent_text_available" type="nederlab_boolean"
  239 + required="false" multiValued="false" indexed="true" stored="true" />
  240 + <field name="NLContent_text" type="nederlab_content" required="false"
  241 + multiValued="false" indexed="true" stored="true" termVectors="true"
  242 + termPositions="true" termOffsets="true" />
  243 + <field name="NLContent_text_lowercase" type="nederlab_content_lowercase"
  244 + required="false" multiValued="false" indexed="true" stored="true"
  245 + termVectors="true" termPositions="true" termOffsets="true" />
  246 + <copyField source="NLContent_text" dest="NLContent_text_lowercase" />
  247 + <field name="NLContent_ticcl_available" type="nederlab_boolean"
  248 + required="false" multiValued="false" indexed="true" stored="true" />
  249 + <field name="NLContent_ticcl_lowercase" type="nederlab_content_lowercase"
  250 + required="false" multiValued="false" indexed="true" stored="true"
  251 + termVectors="true" termPositions="true" termOffsets="true" />
  252 +
  253 + <!-- component NLContent -->
  254 +
  255 + <field name="NLContent_folia_available" type="nederlab_boolean"
  256 + required="false" multiValued="false" indexed="true" stored="true" />
  257 + <field name="NLContent_mtas" type="mtas_text" indexed="true"
  258 + stored="true" />
  259 + <field name="NLContent_mtas_error" type="nederlab_string"
  260 + indexed="true" stored="true" />
  261 + <field name="NLContent_mtas_numberOfTokens" type="nederlab_int"
  262 + indexed="true" stored="true" />
  263 + <field name="NLContent_mtas_numberOfPositions" type="nederlab_int"
  264 + indexed="true" stored="true" />
  265 + <field name="NLContent_mtas_size" type="nederlab_int" indexed="true"
  266 + stored="true" />
  267 + <!-- Combined Field Metadata -->
  268 + <field name="NLMetadata" type="nederlab_text" required="false"
  269 + multiValued="true" indexed="true" stored="false" />
  270 + <copyField source="NLCore_NLIdentification_nederlabID" dest="NLMetadata" />
  271 + <copyField source="NLCore_NLIdentification_editorialCode"
  272 + dest="NLMetadata" />
  273 + <copyField source="NLCore_NLIdentification_sourceRef" dest="NLMetadata" />
  274 + <copyField source="NLCore_NLAdministrative_editorialNote"
  275 + dest="NLMetadata" />
  276 + <copyField source="NLCore_NLAdministrative_sourceCollection"
  277 + dest="NLMetadata" />
  278 + <copyField source="NLCore_NLExternalReference_organizationName"
  279 + dest="NLMetadata" />
  280 + <copyField source="NLCore_NLExternalReference_collectionName"
  281 + dest="NLMetadata" />
  282 + <copyField source="NLCore_NLExternalReference_resourceRef"
  283 + dest="NLMetadata" />
  284 + <copyField source="NLTitle_title" dest="NLMetadata" />
  285 + <copyField source="NLTitle_subtitle" dest="NLMetadata" />
  286 + <copyField source="NLTitle_genre" dest="NLMetadata" />
  287 + <copyField source="NLTitle_category" dest="NLMetadata" />
  288 + <copyField source="NLTitle_yearOfPublicationMin" dest="NLMetadata" />
  289 + <copyField source="NLTitle_yearOfPublicationMax" dest="NLMetadata" />
  290 + <copyField source="NLTitle_yearOfPublicationLabel" dest="NLMetadata" />
  291 + <copyField source="NLTitle_edition" dest="NLMetadata" />
  292 + <copyField source="NLTitle_NLPublicationPlace_placeOfPublication"
  293 + dest="NLMetadata" />
  294 + <copyField source="NLTitle_NLPublicationPlace_placeID" dest="NLMetadata" />
  295 + <copyField source="NLTitle_NLPublicationPlace_placeOfPublicationOriginal"
  296 + dest="NLMetadata" />
  297 + <copyField source="NLTitle_primaryLanguage" dest="NLMetadata" />
  298 + <copyField source="NLTitle_characterEncoding" dest="NLMetadata" />
  299 + <copyField source="NLTitle_codingStandard" dest="NLMetadata" />
  300 + <copyField source="NLTitle_textQuality" dest="NLMetadata" />
  301 + <copyField source="NLTitle_processingMethod" dest="NLMetadata" />
  302 + <copyField source="NLTitle_NLPersonRef_role" dest="NLMetadata" />
  303 + <copyField source="NLDependentTitle_title" dest="NLMetadata" />
  304 + <copyField source="NLDependentTitle_subtitle" dest="NLMetadata" />
  305 + <copyField source="NLDependentTitle_primaryLanguage" dest="NLMetadata" />
  306 + <copyField source="NLDependentTitle_NLPersonRef_role" dest="NLMetadata" />
  307 + <copyField source="NLPerson_NLPersonName_lastName" dest="NLMetadata" />
  308 + <copyField source="NLPerson_NLPersonName_firstName" dest="NLMetadata" />
  309 + <copyField source="NLPerson_NLPersonName_infixes" dest="NLMetadata" />
  310 + <copyField source="NLPerson_NLPersonName_firstNameFull" dest="NLMetadata" />
  311 + <copyField source="NLPerson_NLPersonName_fullName" dest="NLMetadata" />
  312 + <copyField source="NLPerson_dateOfBirthDayMonth" dest="NLMetadata" />
  313 + <copyField source="NLPerson_yearOfBirthMin" dest="NLMetadata" />
  314 + <copyField source="NLPerson_yearOfBirthMax" dest="NLMetadata" />
  315 + <copyField source="NLPerson_yearOfBirthLabel" dest="NLMetadata" />
  316 + <copyField source="NLPerson_placeOfBirth" dest="NLMetadata" />
  317 + <copyField source="NLPerson_placeOfBirthID" dest="NLMetadata" />
  318 + <copyField source="NLPerson_dateOfDeathDayMonth" dest="NLMetadata" />
  319 + <copyField source="NLPerson_yearOfDeathMin" dest="NLMetadata" />
  320 + <copyField source="NLPerson_yearOfDeathMax" dest="NLMetadata" />
  321 + <copyField source="NLPerson_yearOfDeathLabel" dest="NLMetadata" />
  322 + <copyField source="NLPerson_placeOfDeath" dest="NLMetadata" />
  323 + <copyField source="NLPerson_placeOfDeathID" dest="NLMetadata" />
  324 + <copyField source="NLPerson_gender" dest="NLMetadata" />
  325 + <copyField source="NLPerson_profession" dest="NLMetadata" />
  326 + <copyField source="NLPerson_education" dest="NLMetadata" />
  327 + <copyField source="NLSeriesTitle_title" dest="NLMetadata" />
  328 + <copyField source="NLSeriesTitle_years" dest="NLMetadata" />
  329 + <copyField source="NLSeriesTitle_description" dest="NLMetadata" />
  330 + <copyField source="NLCollectionSpecific_*" dest="NLMetadata" />
  331 +
  332 + <uniqueKey>NLCore_NLIdentification_versionID</uniqueKey>
  333 +
  334 + <fieldType name="nederlab_string" class="solr.StrField"
  335 + sortMissingLast="true" />
  336 + <fieldType name="nederlab_uuid" class="solr.StrField"
  337 + sortMissingLast="true" />
  338 + <fieldType name="nederlab_boolean" class="solr.BoolField"
  339 + sortMissingLast="true" />
  340 + <fieldType name="nederlab_int" class="solr.TrieIntField"
  341 + precisionStep="8" positionIncrementGap="0" />
  342 + <fieldType name="nederlab_long" class="solr.TrieLongField"
  343 + precisionStep="0" positionIncrementGap="0" />
  344 + <fieldType name="nederlab_date" class="solr.TrieDateField"
  345 + precisionStep="6" positionIncrementGap="0" />
  346 + <fieldtype name="nederlab_binary" class="solr.BinaryField" />
  347 +
  348 + <fieldType name="nederlab_text" class="solr.TextField"
  349 + positionIncrementGap="100">
  350 + <analyzer type="index">
  351 + <tokenizer class="solr.StandardTokenizerFactory" />
  352 + <filter class="solr.LowerCaseFilterFactory" />
  353 + </analyzer>
  354 + <analyzer type="query">
  355 + <tokenizer class="solr.StandardTokenizerFactory" />
  356 + <filter class="solr.LowerCaseFilterFactory" />
  357 + </analyzer>
  358 + </fieldType>
  359 +
  360 + <fieldType name="nederlab_content" class="solr.TextField"
  361 + positionIncrementGap="100">
  362 + <analyzer type="index">
  363 + <tokenizer class="solr.StandardTokenizerFactory" />
  364 + </analyzer>
  365 + <analyzer type="query">
  366 + <tokenizer class="solr.StandardTokenizerFactory" />
  367 + </analyzer>
  368 + </fieldType>
  369 +
  370 + <fieldType name="nederlab_content_lowercase" class="solr.TextField"
  371 + positionIncrementGap="100">
  372 + <analyzer type="index">
  373 + <tokenizer class="solr.StandardTokenizerFactory" />
  374 + <filter class="solr.LowerCaseFilterFactory" />
  375 + </analyzer>
  376 + <analyzer type="query">
  377 + <tokenizer class="solr.StandardTokenizerFactory" />
  378 + <filter class="solr.LowerCaseFilterFactory" />
  379 + </analyzer>
  380 + </fieldType>
  381 +
  382 + <fieldType name="mtas_text" class="solr.TextField"
  383 + postingsFormat="MtasCodec">
  384 + <analyzer type="index">
  385 + <charFilter class="mtas.analysis.util.MtasCharFilterFactory"
  386 + type="file" prefix="/local/data/" />
  387 + <tokenizer class="mtas.analysis.util.MtasTokenizerFactory"
  388 + configFile="mtas/folia_oeaw.xml" />
  389 + </analyzer>
  390 + </fieldType>
  391 +
  392 +</schema>
... ...
conf/solr/schemaTest.xml
... ... @@ -115,11 +115,7 @@
115 115  
116 116 WARNING: The _text_ catch-all field will significantly increase your index size.
117 117 If you don't need it, consider removing it and the corresponding copyField directive.
118   - -->
119   - <!
120   - <fieldType name="string_simpletext" class="solr.StrField" postingsFormat="SimpleText" />
121   - <field name="simple_string" type="string_simpletext" indexed="true" stored="true" required="false" multiValued="false" />
122   - -->
  118 + -->
123 119  
124 120 <fieldType name="mtas_text" class="solr.TextField" postingsFormat="MtasCodec">
125 121 <analyzer type="index">
... ...
junit/mtas/parser/MtasCQLParserTestSentence.java
... ... @@ -29,11 +29,11 @@ public class MtasCQLParserTestSentence {
29 29 basicTests();
30 30 }
31 31  
32   - private void testCQLParse(String field, String cql, SpanQuery q) {
  32 + private void testCQLParse(String field, String defaultPrefix, String cql, SpanQuery q) {
33 33 MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql)));
34 34 try {
35 35 System.out.print("CQL parsing:\t"+cql);
36   - assertEquals(p.parse(field) ,q);
  36 + assertEquals(p.parse(field, defaultPrefix) ,q);
37 37 System.out.print("\n");
38 38 } catch (ParseException e) {
39 39 System.out.println("Error CQL parsing:\t"+cql);
... ... @@ -41,12 +41,12 @@ public class MtasCQLParserTestSentence {
41 41 }
42 42 }
43 43  
44   - private void testCQLEquivalent(String field, String cql1, String cql2) {
  44 + private void testCQLEquivalent(String field, String defaultPrefix, String cql1, String cql2) {
45 45 MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1)));
46 46 MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2)));
47 47 try {
48 48 System.out.print("CQL equivalent:\t"+cql1+" and "+cql2);
49   - assertEquals(p1.parse(field) ,p2.parse(field));
  49 + assertEquals(p1.parse(field, defaultPrefix) ,p2.parse(field, defaultPrefix));
50 50 System.out.print("\n");
51 51 } catch (ParseException e) {
52 52 System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2);
... ... @@ -73,6 +73,7 @@ public class MtasCQLParserTestSentence {
73 73 basicTest16();
74 74 basicTest17();
75 75 basicTest18();
  76 + basicTest19();
76 77 }
77 78  
78 79 private void basicTest1() {
... ... @@ -84,14 +85,14 @@ public class MtasCQLParserTestSentence {
84 85 items.add(new MtasSpanSequenceItem(q1, false));
85 86 items.add(new MtasSpanSequenceItem(q2, false));
86 87 SpanQuery q = new MtasSpanSequenceQuery(items);
87   - testCQLParse(field, cql, q);
  88 + testCQLParse(field, null, cql, q);
88 89 }
89 90  
90 91 private void basicTest2() {
91 92 String field = "testveld";
92 93 String cql1 = "[pos=\"LID\"] [] []? [] [lemma=\"koe\"]";
93 94 String cql2 = "[pos=\"LID\"] []{2,3} [lemma=\"koe\"]";
94   - testCQLEquivalent(field, cql1, cql2);
  95 + testCQLEquivalent(field, null, cql1, cql2);
95 96 }
96 97  
97 98 private void basicTest3() {
... ... @@ -100,7 +101,7 @@ public class MtasCQLParserTestSentence {
100 101 SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
101 102 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe");
102 103 SpanQuery q = new MtasSpanOrQuery(q1,q2);
103   - testCQLParse(field, cql, q);
  104 + testCQLParse(field, null, cql, q);
104 105 }
105 106  
106 107 private void basicTest4() {
... ... @@ -114,28 +115,28 @@ public class MtasCQLParserTestSentence {
114 115 items.add(new MtasSpanSequenceItem(q3, false));
115 116 SpanQuery q4 = new MtasSpanSequenceQuery(items);
116 117 SpanQuery q = new MtasSpanOrQuery(q1,q4);
117   - testCQLParse(field, cql, q);
  118 + testCQLParse(field, null, cql, q);
118 119 }
119 120  
120 121 private void basicTest5() {
121 122 String field = "testveld";
122 123 String cql1 = "([pos=\"LID\"]([pos=\"ADJ\"][lemma=\"koe\"]))";
123 124 String cql2 = "[pos=\"LID\"][pos=\"ADJ\"][lemma=\"koe\"]";
124   - testCQLEquivalent(field, cql1, cql2);
  125 + testCQLEquivalent(field, null, cql1, cql2);
125 126 }
126 127  
127 128 private void basicTest6() {
128 129 String field = "testveld";
129 130 String cql1 = "([pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"])|([pos=\"ADJ\"]|([lemma=\"het\"]([lemma=\"paard\"])))";
130 131 String cql2 = "[pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"]|[pos=\"ADJ\"]|[lemma=\"het\"][lemma=\"paard\"]";
131   - testCQLEquivalent(field, cql1, cql2);
  132 + testCQLEquivalent(field, null, cql1, cql2);
132 133 }
133 134  
134 135 private void basicTest7() {
135 136 String field = "testveld";
136 137 String cql1 = "[pos=\"LID\"] []{0,1} []{3,5} []{2,4}";
137 138 String cql2 = "[pos=\"LID\"] []{5,10}";
138   - testCQLEquivalent(field, cql1, cql2);
  139 + testCQLEquivalent(field, null, cql1, cql2);
139 140 }
140 141  
141 142 private void basicTest8() {
... ... @@ -149,7 +150,7 @@ public class MtasCQLParserTestSentence {
149 150 items.add(new MtasSpanSequenceItem(q1, false));
150 151 items.add(new MtasSpanSequenceItem(q4, false));
151 152 SpanQuery q = new MtasSpanSequenceQuery(items);
152   - testCQLParse(field, cql, q);
  153 + testCQLParse(field, null, cql, q);
153 154 }
154 155  
155 156 private void basicTest9() {
... ... @@ -165,7 +166,7 @@ public class MtasCQLParserTestSentence {
165 166 items.add(new MtasSpanSequenceItem(q5, false));
166 167 items.add(new MtasSpanSequenceItem(q4, false));
167 168 SpanQuery q = new MtasSpanSequenceQuery(items);
168   - testCQLParse(field, cql, q);
  169 + testCQLParse(field, null, cql, q);
169 170 }
170 171  
171 172 private void basicTest10() {
... ... @@ -179,7 +180,7 @@ public class MtasCQLParserTestSentence {
179 180 items.add(new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q2,1,3), false));
180 181 items.add(new MtasSpanSequenceItem(q3, false));
181 182 SpanQuery q = new MtasSpanSequenceQuery(items);
182   - testCQLParse(field, cql, q);
  183 + testCQLParse(field, null, cql, q);
183 184 }
184 185  
185 186 private void basicTest11() {
... ... @@ -188,7 +189,7 @@ public class MtasCQLParserTestSentence {
188 189 SpanQuery q1 = new MtasCQLParserGroupQuery(field,"sentence");
189 190 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe");
190 191 SpanQuery q = new SpanContainingQuery(q1, q2);
191   - testCQLParse(field, cql, q);
  192 + testCQLParse(field, null, cql, q);
192 193 }
193 194  
194 195 private void basicTest12() {
... ... @@ -197,7 +198,7 @@ public class MtasCQLParserTestSentence {
197 198 SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
198 199 SpanQuery q2 = new MtasCQLParserGroupQuery(field,"sentence");
199 200 SpanQuery q = new SpanWithinQuery(q2, q1);
200   - testCQLParse(field, cql, q);
  201 + testCQLParse(field, null, cql, q);
201 202 }
202 203  
203 204 private void basicTest13() {
... ... @@ -211,7 +212,7 @@ public class MtasCQLParserTestSentence {
211 212 items.add(new MtasSpanSequenceItem(q1, false));
212 213 items.add(new MtasSpanSequenceItem(q4, false));
213 214 SpanQuery q = new MtasSpanSequenceQuery(items);
214   - testCQLParse(field, cql, q);
  215 + testCQLParse(field, null, cql, q);
215 216 }
216 217  
217 218 private void basicTest14() {
... ... @@ -225,7 +226,7 @@ public class MtasCQLParserTestSentence {
225 226 items.add(new MtasSpanSequenceItem(q3, false));
226 227 items.add(new MtasSpanSequenceItem(q4, false));
227 228 SpanQuery q = new MtasSpanSequenceQuery(items);
228   - testCQLParse(field, cql, q);
  229 + testCQLParse(field, null, cql, q);
229 230 }
230 231  
231 232 private void basicTest15() {
... ... @@ -246,7 +247,7 @@ public class MtasCQLParserTestSentence {
246 247 items2.add(new MtasSpanSequenceItem(q1, false));
247 248 items2.add(new MtasSpanSequenceItem(q8, false));
248 249 SpanQuery q = new MtasSpanSequenceQuery(items2);
249   - testCQLParse(field, cql, q);
  250 + testCQLParse(field, null, cql, q);
250 251 }
251 252  
252 253 private void basicTest16() {
... ... @@ -258,7 +259,7 @@ public class MtasCQLParserTestSentence {
258 259 SpanQuery q4 = new SpanContainingQuery(q2, q3);
259 260 SpanQuery q5 = new SpanWithinQuery(q4, q1);
260 261 SpanQuery q = new SpanNotQuery(q5,new SpanContainingQuery(q5, q3));
261   - testCQLParse(field, cql, q);
  262 + testCQLParse(field, null, cql, q);
262 263 }
263 264  
264 265 private void basicTest17() {
... ... @@ -271,11 +272,23 @@ public class MtasCQLParserTestSentence {
271 272 items.add(new MtasSpanSequenceItem(q2, false));
272 273 items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false));
273 274 SpanQuery q = new MtasSpanSequenceQuery(items);
274   - testCQLParse(field, cql, q);
  275 + testCQLParse(field, null, cql, q);
275 276 }
276 277  
277 278 private void basicTest18() {
278 279 String field = "testveld";
  280 + String cql = "\"de\" [pos=\"N\"]";
  281 + SpanQuery q1 = new MtasCQLParserWordQuery(field,"t_lc","de");
  282 + SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N");
  283 + List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
  284 + items.add(new MtasSpanSequenceItem(q1, false));
  285 + items.add(new MtasSpanSequenceItem(q2, false));
  286 + SpanQuery q = new MtasSpanSequenceQuery(items);
  287 + testCQLParse(field, "t_lc", cql, q);
  288 + }
  289 +
  290 + private void basicTest19() {
  291 + String field = "testveld";
279 292 String cql = "([]<entity=\"loc\"/>{1,2}[]){3,4}";
280 293 SpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc");
281 294 SpanQuery q2 = new MtasSpanRecurrenceQuery(q1,1,2);
... ... @@ -285,7 +298,7 @@ public class MtasCQLParserTestSentence {
285 298 items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false));
286 299 SpanQuery q3 = new MtasSpanSequenceQuery(items);
287 300 SpanQuery q = new MtasSpanRecurrenceQuery(q3,3,4);
288   - testCQLParse(field, cql, q);
  301 + testCQLParse(field, null, cql, q);
289 302 }
290 303  
291 304 }
... ...
junit/mtas/parser/MtasCQLParserTestWord.java
... ... @@ -23,10 +23,10 @@ public class MtasCQLParserTestWord {
23 23 basicNotTests();
24 24 }
25 25  
26   - private void testCQLParse(String field, String cql, SpanQuery q) {
  26 + private void testCQLParse(String field, String defaultPrefix, String cql, SpanQuery q) {
27 27 MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql)));
28 28 try {
29   - assertEquals(p.parse(field) ,q);
  29 + assertEquals(p.parse(field, defaultPrefix) ,q);
30 30 System.out.println("Tested CQL parsing:\t"+cql);
31 31 } catch (ParseException e) {
32 32 System.out.println("Error CQL parsing:\t"+cql);
... ... @@ -34,11 +34,11 @@ public class MtasCQLParserTestWord {
34 34 }
35 35 }
36 36  
37   - private void testCQLEquivalent(String field, String cql1, String cql2) {
  37 + private void testCQLEquivalent(String field, String defaultPrefix, String cql1, String cql2) {
38 38 MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1)));
39 39 MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2)));
40 40 try {
41   - assertEquals(p1.parse(field) ,p2.parse(field));
  41 + assertEquals(p1.parse(field, defaultPrefix) ,p2.parse(field, defaultPrefix));
42 42 System.out.println("Tested CQL equivalent:\t"+cql1+" and "+cql2);
43 43 } catch (ParseException e) {
44 44 System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2);
... ... @@ -67,6 +67,7 @@ public class MtasCQLParserTestWord {
67 67 basicTest10();
68 68 basicTest11();
69 69 basicTest12();
  70 + basicTest13();
70 71 }
71 72  
72 73 private void basicNotTest1() {
... ... @@ -75,14 +76,14 @@ public class MtasCQLParserTestWord {
75 76 SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
76 77 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","de");
77 78 SpanQuery q = new SpanNotQuery(q1,q2);
78   - testCQLParse(field, cql, q);
  79 + testCQLParse(field, null, cql, q);
79 80 }
80 81  
81 82 private void basicNotTest2() {
82 83 String field = "testveld";
83 84 String cql1 = "[pos=\"LID\" & (!lemma=\"de\")]";
84 85 String cql2 = "[pos=\"LID\" & !(lemma=\"de\")]";
85   - testCQLEquivalent(field, cql1, cql2);
  86 + testCQLEquivalent(field, null, cql1, cql2);
86 87 }
87 88  
88 89 private void basicNotTest3() {
... ... @@ -93,28 +94,28 @@ public class MtasCQLParserTestWord {
93 94 SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","een");
94 95 SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3});
95 96 SpanQuery q = new SpanNotQuery(q1,q4);
96   - testCQLParse(field, cql, q);
  97 + testCQLParse(field, null, cql, q);
97 98 }
98 99  
99 100 private void basicNotTest4() {
100 101 String field = "testveld";
101 102 String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]";
102 103 String cql2 = "[pos=\"LID\" & (!lemma=\"de\" & !lemma=\"een\")]";
103   - testCQLEquivalent(field, cql1, cql2);
  104 + testCQLEquivalent(field, null, cql1, cql2);
104 105 }
105 106  
106 107 private void basicNotTest5() {
107 108 String field = "testveld";
108 109 String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]";
109 110 String cql2 = "[pos=\"LID\" & !lemma=\"de\" & !lemma=\"een\"]";
110   - testCQLEquivalent(field, cql1, cql2);
  111 + testCQLEquivalent(field, null, cql1, cql2);
111 112 }
112 113  
113 114 private void basicTest1() {
114 115 String field = "testveld";
115 116 String cql = "[lemma=\"koe\"]";
116 117 SpanQuery q = new MtasCQLParserWordQuery(field, "lemma", "koe");
117   - testCQLParse(field, cql, q);
  118 + testCQLParse(field, null, cql, q);
118 119 }
119 120  
120 121 private void basicTest2() {
... ... @@ -123,7 +124,7 @@ public class MtasCQLParserTestWord {
123 124 SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
124 125 SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N");
125 126 SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q2});
126   - testCQLParse(field, cql, q);
  127 + testCQLParse(field, null, cql, q);
127 128 }
128 129  
129 130 private void basicTest3() {
... ... @@ -132,14 +133,14 @@ public class MtasCQLParserTestWord {
132 133 SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
133 134 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","paard");
134 135 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2});
135   - testCQLParse(field, cql, q);
  136 + testCQLParse(field, null, cql, q);
136 137 }
137 138  
138 139 private void basicTest4() {
139 140 String field = "testveld";
140 141 String cql1 = "[lemma=\"koe\" | lemma=\"paard\"]";
141 142 String cql2 = "[(lemma=\"koe\" | lemma=\"paard\")]";
142   - testCQLEquivalent(field, cql1, cql2);
  143 + testCQLEquivalent(field, null, cql1, cql2);
143 144 }
144 145  
145 146 private void basicTest5() {
... ... @@ -150,7 +151,7 @@ public class MtasCQLParserTestWord {
150 151 SpanQuery q3 = new MtasSpanOrQuery(new SpanQuery[]{q1,q2});
151 152 SpanQuery q4 = new MtasCQLParserWordQuery(field,"pos","N");
152 153 SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q3,q4});
153   - testCQLParse(field, cql, q);
  154 + testCQLParse(field, null, cql, q);
154 155 }
155 156  
156 157 private void basicTest6() {
... ... @@ -161,7 +162,7 @@ public class MtasCQLParserTestWord {
161 162 SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","paard");
162 163 SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3});
163 164 SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q4});
164   - testCQLParse(field, cql, q);
  165 + testCQLParse(field, null, cql, q);
165 166 }
166 167  
167 168 private void basicTest7() {
... ... @@ -172,7 +173,7 @@ public class MtasCQLParserTestWord {
172 173 SpanQuery q3 = new MtasCQLParserWordQuery(field,"pos","N");
173 174 SpanQuery q4 = new MtasSpanAndQuery(new SpanQuery[]{q2,q3});
174 175 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q4});
175   - testCQLParse(field, cql, q);
  176 + testCQLParse(field, null, cql, q);
176 177 }
177 178  
178 179 private void basicTest8() {
... ... @@ -185,7 +186,7 @@ public class MtasCQLParserTestWord {
185 186 SpanQuery q5 = new MtasSpanAndQuery(new SpanQuery[]{q1,q2});
186 187 SpanQuery q6 = new MtasSpanAndQuery(new SpanQuery[]{q3,q4});
187 188 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q5,q6});
188   - testCQLParse(field, cql, q);
  189 + testCQLParse(field, null, cql, q);
189 190 }
190 191  
191 192 private void basicTest9() {
... ... @@ -200,7 +201,7 @@ public class MtasCQLParserTestWord {
200 201 SpanQuery q7 = new MtasSpanAndQuery(new SpanQuery[]{q6,q3});
201 202 SpanQuery q8 = new MtasSpanAndQuery(new SpanQuery[]{q4,q5});
202 203 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q7,q8});
203   - testCQLParse(field, cql, q);
  204 + testCQLParse(field, null, cql, q);
204 205 }
205 206  
206 207 private void basicTest10() {
... ... @@ -217,22 +218,22 @@ public class MtasCQLParserTestWord {
217 218 SpanQuery q9 = new MtasSpanOrQuery(new SpanQuery[]{q4,q5});
218 219 SpanQuery q10 = new MtasSpanAndQuery(new SpanQuery[]{q9,q6});
219 220 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q8,q10});
220   - testCQLParse(field, cql, q);
  221 + testCQLParse(field, null, cql, q);
221 222 }
222 223  
223 224 private void basicTest11() {
224 225 String field = "testveld";
225 226 String cql1 = "[#300]";
226 227 SpanQuery q1 = new MtasCQLParserWordPositionQuery(field, 300);
227   - testCQLParse(field, cql1, q1);
  228 + testCQLParse(field, null, cql1, q1);
228 229 String cql2 = "[#100-110]";
229 230 SpanQuery q2 = new MtasCQLParserWordPositionQuery(field, 100, 110);
230   - testCQLParse(field, cql2, q2);
  231 + testCQLParse(field, null, cql2, q2);
231 232 String cql3 = "[#100-105 | #110]";
232 233 SpanQuery q3a = new MtasCQLParserWordPositionQuery(field, 100, 105);
233 234 SpanQuery q3b = new MtasCQLParserWordPositionQuery(field, 110);
234 235 SpanQuery q3 = new MtasSpanOrQuery(q3a, q3b);
235   - testCQLParse(field, cql3, q3);
  236 + testCQLParse(field, null, cql3, q3);
236 237 }
237 238  
238 239 private void basicTest12() {
... ... @@ -242,6 +243,13 @@ public class MtasCQLParserTestWord {
242 243 SpanQuery q2 = new MtasCQLParserWordQuery(field,"t_lc","het");
243 244 SpanQuery q3 = new MtasCQLParserWordQuery(field,"t_lc","paard");
244 245 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2,q3});
245   - testCQLParse(field, cql, q);
246   - }
  246 + testCQLParse(field, null, cql, q);
  247 + }
  248 +
  249 + private void basicTest13() {
  250 + String field = "testveld";
  251 + String cql = "\"de\"";
  252 + SpanQuery q = new MtasCQLParserWordQuery(field,"t_lc","de");
  253 + testCQLParse(field, "t_lc", cql, q);
  254 + }
247 255 }
... ...
... ... @@ -2,11 +2,13 @@
2 2 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3 3 <properties>
4 4 <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  5 + <currentDevelopmentVersion>6.2.0</currentDevelopmentVersion>
  6 + <currentDevelopmentRelease>20160802</currentDevelopmentRelease>
5 7 </properties>
6 8 <modelVersion>4.0.0</modelVersion>
7 9 <groupId>dev.meertens.mtas</groupId>
8 10 <artifactId>mtas</artifactId>
9   - <version>6.1.0</version>
  11 + <version>6.2.0</version>
10 12 <packaging>jar</packaging>
11 13 <licenses>
12 14 <license>
... ... @@ -23,12 +25,12 @@
23 25 <developers>
24 26 <developer>
25 27 <name>Matthijs Brouwer</name>
26   - <url>https://www.meertens.knaw.nl/cms/nl/medewerkers/144373-matthijsb</url>
  28 + <url>https://nl.linkedin.com/in/brouwermatthijs/</url>
27 29 </developer>
28 30 <developer>
29   - <name>Marc Kemps-Snijders</name>
30   - <url>https://www.meertens.knaw.nl/cms/nl/medewerkers/143329-marck</url>
31   - </developer>
  31 + <name>Marc Kemps-Snijders</name>
  32 + <url>https://nl.linkedin.com/in/marc-kemps-snijders-1b33753</url>
  33 + </developer>
32 34 </developers>
33 35 <build>
34 36 <sourceDirectory>src</sourceDirectory>
... ... @@ -39,6 +41,24 @@
39 41 </resources>
40 42 <plugins>
41 43 <plugin>
  44 + <artifactId>maven-clean-plugin</artifactId>
  45 + <version>3.0.0</version>
  46 + <configuration>
  47 + <filesets>
  48 + <fileset>
  49 + <directory>gh-pages</directory>
  50 + <includes>
  51 + <include>**/*</include>
  52 + </includes>
  53 + <excludes>
  54 + <exclude>**/.git/</exclude>
  55 + </excludes>
  56 + <followSymlinks>false</followSymlinks>
  57 + </fileset>
  58 + </filesets>
  59 + </configuration>
  60 + </plugin>
  61 + <plugin>
42 62 <groupId>org.apache.maven.plugins</groupId>
43 63 <artifactId>maven-compiler-plugin</artifactId>
44 64 <version>3.5.1</version>
... ... @@ -46,7 +66,7 @@
46 66 <source>1.8</source>
47 67 <target>1.8</target>
48 68 </configuration>
49   - </plugin>
  69 + </plugin>
50 70 <plugin>
51 71 <groupId>org.apache.maven.plugins</groupId>
52 72 <artifactId>maven-site-plugin</artifactId>
... ... @@ -145,27 +165,27 @@
145 165 <dependency>
146 166 <groupId>org.apache.lucene</groupId>
147 167 <artifactId>lucene-core</artifactId>
148   - <version>6.1.0</version>
  168 + <version>6.2.0</version>
149 169 </dependency>
150 170 <dependency>
151 171 <groupId>org.apache.lucene</groupId>
152 172 <artifactId>lucene-analyzers-common</artifactId>
153   - <version>6.1.0</version>
  173 + <version>6.2.0</version>
154 174 </dependency>
155 175 <dependency>
156 176 <groupId>org.apache.lucene</groupId>
157 177 <artifactId>lucene-queryparser</artifactId>
158   - <version>6.1.0</version>
  178 + <version>6.2.0</version>
159 179 </dependency>
160 180 <dependency>
161 181 <groupId>org.apache.lucene</groupId>
162 182 <artifactId>lucene-codecs</artifactId>
163   - <version>6.1.0</version>
  183 + <version>6.2.0</version>
164 184 </dependency>
165 185 <dependency>
166 186 <groupId>org.apache.solr</groupId>
167 187 <artifactId>solr-core</artifactId>
168   - <version>6.1.0</version>
  188 + <version>6.2.0</version>
169 189 </dependency>
170 190 <dependency>
171 191 <groupId>org.apache.commons</groupId>
... ...
src/mtas/analysis/MtasTokenizer.java
... ... @@ -25,8 +25,11 @@ import org.apache.lucene.util.AttributeFactory;
25 25  
26 26 /**
27 27 * The Class MtasTokenizer.
  28 + *
  29 + * @param <T>
  30 + * the generic type
28 31 */
29   -public final class MtasTokenizer extends Tokenizer {
  32 +public final class MtasTokenizer<T> extends Tokenizer {
30 33  
31 34 /** The configuration mtas. */
32 35 public static String CONFIGURATION_MTAS = "mtas";
... ... @@ -73,7 +76,8 @@ public final class MtasTokenizer extends Tokenizer {
73 76 /**
74 77 * Instantiates a new mtas tokenizer.
75 78 *
76   - * @param configFileName the config file name
  79 + * @param configFileName
  80 + * the config file name
77 81 */
78 82 public MtasTokenizer(String configFileName) {
79 83 readConfigurationFile(configFileName);
... ... @@ -82,8 +86,10 @@ public final class MtasTokenizer extends Tokenizer {
82 86 /**
83 87 * Instantiates a new mtas tokenizer.
84 88 *
85   - * @param config the config
86   - * @throws IOException Signals that an I/O exception has occurred.
  89 + * @param config
  90 + * the config
  91 + * @throws IOException
  92 + * Signals that an I/O exception has occurred.
87 93 */
88 94 public MtasTokenizer(MtasConfiguration config) throws IOException {
89 95 processConfiguration(config);
... ... @@ -92,8 +98,10 @@ public final class MtasTokenizer extends Tokenizer {
92 98 /**
93 99 * Instantiates a new mtas tokenizer.
94 100 *
95   - * @param reader the reader
96   - * @throws IOException Signals that an I/O exception has occurred.
  101 + * @param reader
  102 + * the reader
  103 + * @throws IOException
  104 + * Signals that an I/O exception has occurred.
97 105 */
98 106 public MtasTokenizer(InputStream reader) throws IOException {
99 107 processConfiguration(MtasConfiguration.readConfiguration(reader));
... ... @@ -102,9 +110,12 @@ public final class MtasTokenizer extends Tokenizer {
102 110 /**
103 111 * Instantiates a new mtas tokenizer.
104 112 *
105   - * @param factory the factory
106   - * @param config the config
107   - * @throws IOException Signals that an I/O exception has occurred.
  113 + * @param factory
  114 + * the factory
  115 + * @param config
  116 + * the config
  117 + * @throws IOException
  118 + * Signals that an I/O exception has occurred.
108 119 */
109 120 public MtasTokenizer(AttributeFactory factory, MtasConfiguration config)
110 121 throws IOException {
... ... @@ -112,7 +123,9 @@ public final class MtasTokenizer extends Tokenizer {
112 123 processConfiguration(config);
113 124 }
114 125  
115   - /* (non-Javadoc)
  126 + /*
  127 + * (non-Javadoc)
  128 + *
116 129 * @see org.apache.lucene.analysis.TokenStream#incrementToken()
117 130 */
118 131 @Override
... ... @@ -128,7 +141,7 @@ public final class MtasTokenizer extends Tokenizer {
128 141 // compute info
129 142 positionIncrement = token.getPositionStart() - currentPosition;
130 143 currentPosition = token.getPositionStart();
131   - payloadEncoder = new MtasPayloadEncoder(token, encodingFlags);
  144 + payloadEncoder = new MtasPayloadEncoder(token, encodingFlags);
132 145 // set info
133 146 termAtt.append(token.getValue().toString());
134 147 positionIncrementAtt.setPositionIncrement(positionIncrement);
... ... @@ -157,7 +170,6 @@ public final class MtasTokenizer extends Tokenizer {
157 170 e.getClass().getSimpleName() + ": " + e.getMessage());
158 171 } catch (MtasParserException e) {
159 172 tokenCollectionIterator = null;
160   - e.printStackTrace();
161 173 throw new IOException(
162 174 e.getClass().getSimpleName() + ": " + e.getMessage());
163 175 }
... ... @@ -167,14 +179,19 @@ public final class MtasTokenizer extends Tokenizer {
167 179 /**
168 180 * Prints the.
169 181 *
170   - * @param r the r
171   - * @throws IOException Signals that an I/O exception has occurred.
172   - * @throws MtasParserException the mtas parser exception
  182 + * @param r
  183 + * the r
  184 + * @throws IOException
  185 + * Signals that an I/O exception has occurred.
  186 + * @throws MtasParserException
  187 + * the mtas parser exception
173 188 */
174 189 public void print(Reader r) throws IOException, MtasParserException {
175 190 setReader(r);
176 191 reset();
177   - tokenCollection.print();
  192 + if (tokenCollection != null) {
  193 + tokenCollection.print();
  194 + }
178 195 end();
179 196 close();
180 197 }
... ... @@ -182,10 +199,13 @@ public final class MtasTokenizer extends Tokenizer {
182 199 /**
183 200 * Gets the list.
184 201 *
185   - * @param r the r
  202 + * @param r
  203 + * the r
186 204 * @return the list
187   - * @throws IOException Signals that an I/O exception has occurred.
188   - * @throws MtasParserException the mtas parser exception
  205 + * @throws IOException
  206 + * Signals that an I/O exception has occurred.
  207 + * @throws MtasParserException
  208 + * the mtas parser exception
189 209 */
190 210 public String[][] getList(Reader r) throws IOException, MtasParserException {
191 211 setReader(r);
... ... @@ -199,9 +219,12 @@ public final class MtasTokenizer extends Tokenizer {
199 219 /**
200 220 * Construct token collection.
201 221 *
202   - * @param reader the reader
203   - * @throws MtasConfigException the mtas config exception
204   - * @throws MtasParserException the mtas parser exception
  222 + * @param reader
  223 + * the reader
  224 + * @throws MtasConfigException
  225 + * the mtas config exception
  226 + * @throws MtasParserException
  227 + * the mtas parser exception
205 228 */
206 229 private void constructTokenCollection(Reader reader)
207 230 throws MtasConfigException, MtasParserException {
... ... @@ -216,29 +239,28 @@ public final class MtasTokenizer extends Tokenizer {
216 239 try {
217 240 tokenCollection = parser.createTokenCollection(reader);
218 241 return;
219   - } catch (MtasParserException e) {
  242 + } catch (MtasParserException e) {
220 243 tokenCollection = new MtasTokenCollection();
221   - e.printStackTrace();
222 244 throw new MtasParserException(e.getMessage());
223 245 }
224 246 } else {
225 247 throw new MtasConfigException("no instance of MtasParser");
226 248 }
227 249 } catch (NoSuchMethodException e) {
228   - throw new MtasConfigException(e.getClass().getName()
229   - + " : '" + e.getMessage() + "'");
  250 + throw new MtasConfigException(
  251 + e.getClass().getName() + " : '" + e.getMessage() + "'");
230 252 } catch (InvocationTargetException e) {
231   - throw new MtasConfigException(e.getClass().getName()
232   - + " : '" + e.getMessage() + "'");
  253 + throw new MtasConfigException(
  254 + e.getClass().getName() + " : '" + e.getMessage() + "'");
233 255 } catch (IllegalAccessException e) {
234   - throw new MtasConfigException(e.getClass().getName()
235   - + " : '" + e.getMessage() + "'");
  256 + throw new MtasConfigException(
  257 + e.getClass().getName() + " : '" + e.getMessage() + "'");
236 258 } catch (ClassNotFoundException e) {
237   - throw new MtasConfigException(e.getClass().getName()
238   - + " : '" + e.getMessage() + "'");
  259 + throw new MtasConfigException(
  260 + e.getClass().getName() + " : '" + e.getMessage() + "'");
239 261 } catch (InstantiationException e) {
240   - throw new MtasConfigException(e.getClass().getName()
241   - + " : '" + e.getMessage() + "'");
  262 + throw new MtasConfigException(
  263 + e.getClass().getName() + " : '" + e.getMessage() + "'");
242 264 }
243 265  
244 266 }
... ... @@ -246,7 +268,8 @@ public final class MtasTokenizer extends Tokenizer {
246 268 /**
247 269 * Read configuration file.
248 270 *
249   - * @param configFile the config file
  271 + * @param configFile
  272 + * the config file
250 273 */
251 274 private void readConfigurationFile(String configFile) {
252 275 InputStream is;
... ... @@ -261,13 +284,13 @@ public final class MtasTokenizer extends Tokenizer {
261 284 }
262 285 }
263 286  
264   -
265   -
266 287 /**
267 288 * Process configuration.
268 289 *
269   - * @param config the config
270   - * @throws IOException Signals that an I/O exception has occurred.
  290 + * @param config
  291 + * the config
  292 + * @throws IOException
  293 + * Signals that an I/O exception has occurred.
271 294 */
272 295 private void processConfiguration(MtasConfiguration config)
273 296 throws IOException {
... ...
src/mtas/analysis/parser/MtasBasicParser.java
... ... @@ -165,7 +165,8 @@ abstract public class MtasBasicParser extends MtasParser {
165 165 /**
166 166 * Instantiates a new mtas basic parser.
167 167 *
168   - * @param config the config
  168 + * @param config
  169 + * the config
169 170 */
170 171 public MtasBasicParser(MtasConfiguration config) {
171 172 this.config = config;
... ... @@ -174,11 +175,16 @@ abstract public class MtasBasicParser extends MtasParser {
174 175 /**
175 176 * Compute mappings from object.
176 177 *
177   - * @param object the object
178   - * @param currentList the current list
179   - * @param updateList the update list
180   - * @throws MtasParserException the mtas parser exception
181   - * @throws MtasConfigException the mtas config exception
  178 + * @param object
  179 + * the object
  180 + * @param currentList
  181 + * the current list
  182 + * @param updateList
  183 + * the update list
  184 + * @throws MtasParserException
  185 + * the mtas parser exception
  186 + * @throws MtasConfigException
  187 + * the mtas config exception
182 188 */
183 189 protected void computeMappingsFromObject(MtasParserObject object,
184 190 HashMap<String, ArrayList<MtasParserObject>> currentList,
... ... @@ -196,7 +202,6 @@ abstract public class MtasBasicParser extends MtasParser {
196 202 updateList.get(UPDATE_TYPE_OFFSET).put(tokenId, object.getRefIds());
197 203 }
198 204 }
199   -
200 205 for (MtasParserMapping<?> mapping : mappings) {
201 206 try {
202 207 if (mapping.getTokens().size() == 0) {
... ... @@ -271,9 +276,18 @@ abstract public class MtasBasicParser extends MtasParser {
271 276 String checkType = object.objectType.getType();
272 277 // register id for update when parent is created
273 278 if (currentList.get(checkType).size() > 0) {
274   - currentList.get(checkType)
275   - .get(currentList.get(checkType).size() - 1)
276   - .registerUpdateableMappingAtParent(token.getId());
  279 + if (currentList.get(checkType).contains(object)) {
  280 + int listPosition = currentList.get(checkType)
  281 + .indexOf(object);
  282 + if (listPosition > 0) {
  283 + currentList.get(checkType).get(listPosition - 1)
  284 + .registerUpdateableMappingAtParent(token.getId());
  285 + }
  286 + } else {
  287 + currentList.get(checkType)
  288 + .get(currentList.get(checkType).size() - 1)
  289 + .registerUpdateableMappingAtParent(token.getId());
  290 + }
277 291 // if no real ancestor, register id update when group
278 292 // ancestor is created
279 293 } else if (currentList.get(MAPPING_TYPE_GROUP).size() > 0) {
... ... @@ -287,7 +301,9 @@ abstract public class MtasBasicParser extends MtasParser {
287 301 }
288 302 // update children
289 303 for (Integer tmpId : object.getUpdateableMappingsAsParent()) {
290   - tokenCollection.get(tmpId).setParentId(token.getId());
  304 + if (tokenCollection.get(tmpId) != null) {
  305 + tokenCollection.get(tmpId).setParentId(token.getId());
  306 + }
291 307 }
292 308 object.resetUpdateableMappingsAsParent();
293 309 // use own position
... ... @@ -372,10 +388,20 @@ abstract public class MtasBasicParser extends MtasParser {
372 388 }
373 389 // copy remaining updateableMappings to new parent
374 390 if (currentList.get(objectType.getType()).size() > 0) {
375   - currentList.get(objectType.getType())
376   - .get(currentList.get(objectType.getType()).size() - 1)
377   - .registerUpdateableMappingsAtParent(
378   - object.getUpdateableMappingsAsParent());
  391 + if (currentList.get(objectType.getType()).contains(object)) {
  392 + int listPosition = currentList.get(objectType.getType())
  393 + .indexOf(object);
  394 + if (listPosition > 0) {
  395 + currentList.get(objectType.getType()).get(listPosition - 1)
  396 + .registerUpdateableMappingsAtParent(
  397 + object.getUpdateableMappingsAsParent());
  398 + }
  399 + } else {
  400 + currentList.get(objectType.getType())
  401 + .get(currentList.get(objectType.getType()).size() - 1)
  402 + .registerUpdateableMappingsAtParent(
  403 + object.getUpdateableMappingsAsParent());
  404 + }
379 405 } else if (currentList.get(MAPPING_TYPE_GROUP).size() > 0) {
380 406 currentList.get(MAPPING_TYPE_GROUP)
381 407 .get(currentList.get(MAPPING_TYPE_GROUP).size() - 1)
... ... @@ -392,9 +418,11 @@ abstract public class MtasBasicParser extends MtasParser {
392 418 /**
393 419 * Compute type from mapping source.
394 420 *
395   - * @param source the source
  421 + * @param source
  422 + * the source
396 423 * @return the string
397   - * @throws MtasParserException the mtas parser exception
  424 + * @throws MtasParserException
  425 + * the mtas parser exception
398 426 */
399 427 private String computeTypeFromMappingSource(String source)
400 428 throws MtasParserException {
... ... @@ -423,11 +451,15 @@ abstract public class MtasBasicParser extends MtasParser {
423 451 /**
424 452 * Compute object from mapping value.
425 453 *
426   - * @param object the object
427   - * @param mappingValue the mapping value
428   - * @param currentList the current list
  454 + * @param object
  455 + * the object
  456 + * @param mappingValue
  457 + * the mapping value
  458 + * @param currentList
  459 + * the current list
429 460 * @return the mtas parser object[]
430   - * @throws MtasParserException the mtas parser exception
  461 + * @throws MtasParserException
  462 + * the mtas parser exception
431 463 */
432 464 private MtasParserObject[] computeObjectFromMappingValue(
433 465 MtasParserObject object, HashMap<String, String> mappingValue,
... ... @@ -469,12 +501,17 @@ abstract public class MtasBasicParser extends MtasParser {
469 501 /**
470 502 * Compute value from mapping values.
471 503 *
472   - * @param object the object
473   - * @param mappingValues the mapping values
474   - * @param currentList the current list
  504 + * @param object
  505 + * the object
  506 + * @param mappingValues
  507 + * the mapping values
  508 + * @param currentList
  509 + * the current list
475 510 * @return the string[]
476   - * @throws MtasParserException the mtas parser exception
477   - * @throws MtasConfigException the mtas config exception
  511 + * @throws MtasParserException
  512 + * the mtas parser exception
  513 + * @throws MtasConfigException
  514 + * the mtas config exception
478 515 */
479 516 private String[] computeValueFromMappingValues(MtasParserObject object,
480 517 ArrayList<HashMap<String, String>> mappingValues,
... ... @@ -486,8 +523,8 @@ abstract public class MtasBasicParser extends MtasParser {
486 523 if (mappingValue.get("source").equals(MtasParserMapping.SOURCE_STRING)) {
487 524 if (mappingValue.get("type")
488 525 .equals(MtasParserMapping.PARSER_TYPE_STRING)) {
489   - String subvalue = computeFilteredPrefixedValue(mappingValue.get("type"),
490   - mappingValue.get("text"), null, null);
  526 + String subvalue = computeFilteredPrefixedValue(
  527 + mappingValue.get("type"), mappingValue.get("text"), null, null);
491 528 if (subvalue != null) {
492 529 for (int i = 0; i < value.length; i++) {
493 530 value[i] = value[i] + subvalue;
... ... @@ -499,7 +536,7 @@ abstract public class MtasBasicParser extends MtasParser {
499 536 MtasParserObject[] checkObjects = computeObjectFromMappingValue(object,
500 537 mappingValue, currentList);
501 538 // create value
502   - if (checkObjects != null) {
  539 + if (checkObjects != null && checkObjects.length > 0) {
503 540 MtasParserType checkType = checkObjects[0].getType();
504 541 // add name to value
505 542 if (mappingValue.get("type")
... ... @@ -586,11 +623,15 @@ abstract public class MtasBasicParser extends MtasParser {
586 623 /**
587 624 * Compute payload from mapping payload.
588 625 *
589   - * @param object the object
590   - * @param mappingPayloads the mapping payloads
591   - * @param currentList the current list
  626 + * @param object
  627 + * the object
  628 + * @param mappingPayloads
  629 + * the mapping payloads
  630 + * @param currentList
  631 + * the current list
592 632 * @return the bytes ref
593   - * @throws MtasParserException the mtas parser exception
  633 + * @throws MtasParserException
  634 + * the mtas parser exception
594 635 */
595 636 private BytesRef computePayloadFromMappingPayload(MtasParserObject object,
596 637 ArrayList<HashMap<String, String>> mappingPayloads,
... ... @@ -605,7 +646,7 @@ abstract public class MtasBasicParser extends MtasParser {
605 646 if (mappingPayload.get("text") != null) {
606 647 BytesRef subpayload = computeMaximumFilteredPayload(
607 648 mappingPayload.get("text"), payload, null);
608   - payload = (subpayload != null) ? subpayload : payload;
  649 + payload = (subpayload != null) ? subpayload : payload;
609 650 }
610 651 }
611 652 // from objects
... ... @@ -637,8 +678,10 @@ abstract public class MtasBasicParser extends MtasParser {
637 678 /**
638 679 * Prevalidate object.
639 680 *
640   - * @param object the object
641   - * @param currentList the current list
  681 + * @param object
  682 + * the object
  683 + * @param currentList
  684 + * the current list
642 685 * @return the boolean
643 686 */
644 687 Boolean prevalidateObject(MtasParserObject object,
... ... @@ -663,10 +706,14 @@ abstract public class MtasBasicParser extends MtasParser {
663 706 /**
664 707 * Precheck mapping conditions.
665 708 *
666   - * @param object the object
667   - * @param mappingConditions the mapping conditions
668   - * @param currentList the current list
669   - * @throws MtasParserException the mtas parser exception
  709 + * @param object
  710 + * the object
  711 + * @param mappingConditions
  712 + * the mapping conditions
  713 + * @param currentList
  714 + * the current list
  715 + * @throws MtasParserException
  716 + * the mtas parser exception
670 717 */
671 718 void precheckMappingConditions(MtasParserObject object,
672 719 ArrayList<HashMap<String, String>> mappingConditions,
... ... @@ -771,7 +818,31 @@ abstract public class MtasBasicParser extends MtasParser {
771 818 // condition on text
772 819 } else if (mappingCondition.get("type")
773 820 .equals(MtasParserMapping.PARSER_TYPE_TEXT)) {
774   - // can't pre-check this type of condition
  821 + // can't pre-check this type of condition, only for group
  822 + if (object.getType().precheckText()) {
  823 + String textCondition = mappingCondition.get("condition");
  824 + String textValue = object.getText();
  825 + if ((textCondition == null)
  826 + && ((textValue == null) || textValue.equals(""))) {
  827 + if (!notCondition) {
  828 + throw new MtasParserException("no text available");
  829 + }
  830 + } else if ((textCondition != null) && (textValue == null)) {
  831 + if (!notCondition) {
  832 + throw new MtasParserException("condition " + textCondition
  833 + + " on text not matched (is null)");
  834 + }
  835 + } else if (textCondition != null) {
  836 + if (!notCondition && !textCondition.equals(textValue)) {
  837 + throw new MtasParserException("condition " + textCondition
  838 + + " on text not matched (is " + textValue + ")");
  839 + } else if (notCondition && textCondition.equals(textValue)) {
  840 + throw new MtasParserException(
  841 + "condition NOT " + textCondition
  842 + + " on text not matched (is " + textValue + ")");
  843 + }
  844 + }
  845 + }
775 846 }
776 847 }
777 848 } else if (!notCondition) {
... ... @@ -785,10 +856,14 @@ abstract public class MtasBasicParser extends MtasParser {
785 856 /**
786 857 * Postcheck mapping conditions.
787 858 *
788   - * @param object the object
789   - * @param mappingConditions the mapping conditions
790   - * @param currentList the current list
791   - * @throws MtasParserException the mtas parser exception
  859 + * @param object
  860 + * the object
  861 + * @param mappingConditions
  862 + * the mapping conditions
  863 + * @param currentList
  864 + * the current list
  865 + * @throws MtasParserException
  866 + * the mtas parser exception
792 867 */
793 868 private void postcheckMappingConditions(MtasParserObject object,
794 869 ArrayList<HashMap<String, String>> mappingConditions,
... ... @@ -835,10 +910,13 @@ abstract public class MtasBasicParser extends MtasParser {
835 910 /**
836 911 * Compute filtered split values.
837 912 *
838   - * @param values the values
839   - * @param filter the filter
  913 + * @param values
  914 + * the values
  915 + * @param filter
  916 + * the filter
840 917 * @return the string[]
841   - * @throws MtasConfigException the mtas config exception
  918 + * @throws MtasConfigException
  919 + * the mtas config exception
842 920 */
843 921 private String[] computeFilteredSplitValues(String[] values, String filter)
844 922 throws MtasConfigException {
... ... @@ -847,39 +925,42 @@ abstract public class MtasBasicParser extends MtasParser {
847 925 boolean[] valuesFilter = new boolean[values.length];
848 926 boolean doSplitFilter = false;
849 927 for (String item : filters) {
850   - if (item.trim()
851   - .matches("^"+Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)$")) {
  928 + if (item.trim().matches(
  929 + "^" + Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)$")) {
852 930 doSplitFilter = true;
853   - Pattern splitContent = Pattern.compile("^"+Pattern.quote(MAPPING_FILTER_SPLIT) + "\\(([0-9]+)(-([0-9]+))?\\)$");
  931 + Pattern splitContent = Pattern
  932 + .compile("^" + Pattern.quote(MAPPING_FILTER_SPLIT)
  933 + + "\\(([0-9]+)(-([0-9]+))?\\)$");
854 934 Matcher splitContentMatcher = splitContent.matcher(item.trim());
855   - while(splitContentMatcher.find()) {
856   - if(splitContentMatcher.group(3)==null) {
  935 + while (splitContentMatcher.find()) {
  936 + if (splitContentMatcher.group(3) == null) {
857 937 int i = Integer.parseInt(splitContentMatcher.group(1));
858   - if(i>=0 && i<values.length) {
  938 + if (i >= 0 && i < values.length) {
859 939 valuesFilter[i] = true;
860   - }
  940 + }
861 941 } else {
862 942 int i1 = Integer.parseInt(splitContentMatcher.group(1));
863 943 int i2 = Integer.parseInt(splitContentMatcher.group(3));
864   - for(int i=Math.max(0, i1); i<Math.min(values.length, i2); i++) {
  944 + for (int i = Math.max(0, i1); i < Math.min(values.length,
  945 + i2); i++) {
865 946 valuesFilter[i] = true;
866 947 }
867 948 }
868   - }
  949 + }
869 950 }
870 951 }
871   - if(doSplitFilter) {
  952 + if (doSplitFilter) {
872 953 int number = 0;
873   - for(int i=0;i<valuesFilter.length; i++) {
874   - if(valuesFilter[i]) {
  954 + for (int i = 0; i < valuesFilter.length; i++) {
  955 + if (valuesFilter[i]) {
875 956 number++;
876 957 }
877 958 }
878   - if(number>0) {
  959 + if (number > 0) {
879 960 String[] newValues = new String[number];
880 961 number = 0;
881   - for(int i=0;i<valuesFilter.length; i++) {
882   - if(valuesFilter[i]) {
  962 + for (int i = 0; i < valuesFilter.length; i++) {
  963 + if (valuesFilter[i]) {
883 964 newValues[number] = values[i];
884 965 number++;
885 966 }
... ... @@ -888,7 +969,7 @@ abstract public class MtasBasicParser extends MtasParser {
888 969 } else {
889 970 return null;
890 971 }
891   - }
  972 + }
892 973 }
893 974 return values;
894 975 }
... ... @@ -896,12 +977,17 @@ abstract public class MtasBasicParser extends MtasParser {
896 977 /**
897 978 * Compute filtered prefixed value.
898 979 *
899   - * @param type the type
900   - * @param value the value
901   - * @param filter the filter
902   - * @param prefix the prefix
  980 + * @param type
  981 + * the type
  982 + * @param value
  983 + * the value
  984 + * @param filter
  985 + * the filter
  986 + * @param prefix
  987 + * the prefix
903 988 * @return the string
904   - * @throws MtasConfigException the mtas config exception
  989 + * @throws MtasConfigException
  990 + * the mtas config exception
905 991 */
906 992 private String computeFilteredPrefixedValue(String type, String value,
907 993 String filter, String prefix) throws MtasConfigException {
... ... @@ -926,7 +1012,7 @@ abstract public class MtasBasicParser extends MtasParser {
926 1012 }
927 1013 } else if (item.trim()
928 1014 .matches(Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)")) {
929   - if(!type.equals(MtasParserMapping.PARSER_TYPE_TEXT_SPLIT)) {
  1015 + if (!type.equals(MtasParserMapping.PARSER_TYPE_TEXT_SPLIT)) {
930 1016 throw new MtasConfigException(
931 1017 "split filter not allowed for " + type);
932 1018 }
... ... @@ -947,9 +1033,12 @@ abstract public class MtasBasicParser extends MtasParser {
947 1033 /**
948 1034 * Compute maximum filtered payload.
949 1035 *
950   - * @param value the value
951   - * @param payload the payload
952   - * @param filter the filter
  1036 + * @param value
  1037 + * the value
  1038 + * @param payload
  1039 + * the payload
  1040 + * @param filter
  1041 + * the filter
953 1042 * @return the bytes ref
954 1043 */
955 1044 private BytesRef computeMaximumFilteredPayload(String value, BytesRef payload,
... ... @@ -981,6 +1070,9 @@ abstract public class MtasBasicParser extends MtasParser {
981 1070 /** The name. */
982 1071 private String name;
983 1072  
  1073 + /** The precheck text. */
  1074 + protected boolean precheckText;
  1075 +
984 1076 /** The ref attribute name. */
985 1077 private String refAttributeName;
986 1078  
... ... @@ -990,23 +1082,34 @@ abstract public class MtasBasicParser extends MtasParser {
990 1082 /**
991 1083 * Instantiates a new mtas parser type.
992 1084 *
993   - * @param type the type
994   - * @param name the name
  1085 + * @param type
  1086 + * the type
  1087 + * @param name
  1088 + * the name
  1089 + * @param precheckText
  1090 + * the precheck text
995 1091 */
996   - MtasParserType(String type, String name) {
  1092 + MtasParserType(String type, String name, boolean precheckText) {
997 1093 this.type = type;
998 1094 this.name = name;
  1095 + this.precheckText = precheckText;
999 1096 }
1000 1097  
1001 1098 /**
1002 1099 * Instantiates a new mtas parser type.
1003 1100 *
1004   - * @param type the type
1005   - * @param name the name
1006   - * @param refAttributeName the ref attribute name
  1101 + * @param type
  1102 + * the type
  1103 + * @param name
  1104 + * the name
  1105 + * @param precheckText
  1106 + * the precheck text
  1107 + * @param refAttributeName
  1108 + * the ref attribute name
1007 1109 */
1008   - MtasParserType(String type, String name, String refAttributeName) {
1009   - this(type, name);
  1110 + MtasParserType(String type, String name, boolean precheckText,
  1111 + String refAttributeName) {
  1112 + this(type, name, precheckText);
1010 1113 this.refAttributeName = refAttributeName;
1011 1114 }
1012 1115  
... ... @@ -1038,9 +1141,19 @@ abstract public class MtasBasicParser extends MtasParser {
1038 1141 }
1039 1142  
1040 1143 /**
  1144 + * Precheck text.
  1145 + *
  1146 + * @return true, if successful
  1147 + */
  1148 + public boolean precheckText() {
  1149 + return precheckText;
  1150 + }
  1151 +
  1152 + /**
1041 1153 * Adds the mapping.
1042 1154 *
1043   - * @param mapping the mapping
  1155 + * @param mapping
  1156 + * the mapping
1044 1157 */
1045 1158 public void addMapping(MtasParserMapping<?> mapping) {
1046 1159 mappings.add(mapping);
... ... @@ -1080,7 +1193,8 @@ abstract public class MtasBasicParser extends MtasParser {
1080 1193 /**
1081 1194 * Instantiates a new mtas parser mapping token.
1082 1195 *
1083   - * @param tokenType the token type
  1196 + * @param tokenType
  1197 + * the token type
1084 1198 */
1085 1199 public MtasParserMappingToken(String tokenType) {
1086 1200 type = tokenType;
... ... @@ -1095,7 +1209,8 @@ abstract public class MtasBasicParser extends MtasParser {
1095 1209 /**
1096 1210 * Sets the offset.
1097 1211 *
1098   - * @param tokenOffset the new offset
  1212 + * @param tokenOffset
  1213 + * the new offset
1099 1214 */
1100 1215 public void setOffset(Boolean tokenOffset) {
1101 1216 offset = tokenOffset;
... ... @@ -1104,7 +1219,8 @@ abstract public class MtasBasicParser extends MtasParser {
1104 1219 /**
1105 1220 * Sets the real offset.
1106 1221 *
1107   - * @param tokenRealOffset the new real offset
  1222 + * @param tokenRealOffset
  1223 + * the new real offset
1108 1224 */
1109 1225 public void setRealOffset(Boolean tokenRealOffset) {
1110 1226 realoffset = tokenRealOffset;
... ... @@ -1113,7 +1229,8 @@ abstract public class MtasBasicParser extends MtasParser {
1113 1229 /**
1114 1230 * Sets the parent.
1115 1231 *
1116   - * @param tokenParent the new parent
  1232 + * @param tokenParent
  1233 + * the new parent
1117 1234 */
1118 1235 public void setParent(Boolean tokenParent) {
1119 1236 parent = tokenParent;
... ... @@ -1124,7 +1241,8 @@ abstract public class MtasBasicParser extends MtasParser {
1124 1241 /**
1125 1242 * The Class MtasParserMapping.
1126 1243 *
1127   - * @param <T> the generic type
  1244 + * @param <T>
  1245 + * the generic type
1128 1246 */
1129 1247 protected abstract class MtasParserMapping<T extends MtasParserMapping<T>> {
1130 1248  
... ... @@ -1216,8 +1334,10 @@ abstract public class MtasBasicParser extends MtasParser {
1216 1334 /**
1217 1335 * Process config.
1218 1336 *
1219   - * @param config the config
1220   - * @throws MtasConfigException the mtas config exception
  1337 + * @param config
  1338 + * the config
  1339 + * @throws MtasConfigException
  1340 + * the mtas config exception
1221 1341 */
1222 1342 public void processConfig(MtasConfiguration config)
1223 1343 throws MtasConfigException {
... ... @@ -1581,7 +1701,8 @@ abstract public class MtasBasicParser extends MtasParser {
1581 1701 /**
1582 1702 * Condition unknown ancestor.
1583 1703 *
1584   - * @param number the number
  1704 + * @param number
  1705 + * the number
1585 1706 */
1586 1707 private void conditionUnknownAncestor(String number) {
1587 1708 HashMap<String, String> mapConstructionItem = new HashMap<String, String>();
... ... @@ -1593,9 +1714,12 @@ abstract public class MtasBasicParser extends MtasParser {
1593 1714 /**
1594 1715 * Adds the string.
1595 1716 *
1596   - * @param mappingToken the mapping token
1597   - * @param type the type
1598   - * @param text the text
  1717 + * @param mappingToken
  1718 + * the mapping token
  1719 + * @param type
  1720 + * the type
  1721 + * @param text
  1722 + * the text
1599 1723 */
1600 1724 private void addString(MtasParserMappingToken mappingToken, String type,
1601 1725 String text) {
... ... @@ -1613,8 +1737,10 @@ abstract public class MtasBasicParser extends MtasParser {
1613 1737 /**
1614 1738 * Payload string.
1615 1739 *
1616   - * @param mappingToken the mapping token
1617   - * @param text the text
  1740 + * @param mappingToken
  1741 + * the mapping token
  1742 + * @param text
  1743 + * the text
1618 1744 */
1619 1745 private void payloadString(MtasParserMappingToken mappingToken,
1620 1746 String text) {
... ... @@ -1628,10 +1754,14 @@ abstract public class MtasBasicParser extends MtasParser {
1628 1754 /**
1629 1755 * Adds the name.
1630 1756 *
1631   - * @param mappingToken the mapping token
1632   - * @param type the type
1633   - * @param prefix the prefix
1634   - * @param filter the filter
  1757 + * @param mappingToken
  1758 + * the mapping token
  1759 + * @param type
  1760 + * the type
  1761 + * @param prefix
  1762 + * the prefix
  1763 + * @param filter
  1764 + * the filter
1635 1765 */
1636 1766 private void addName(MtasParserMappingToken mappingToken, String type,
1637 1767 String prefix, String filter) {
... ... @@ -1650,8 +1780,10 @@ abstract public class MtasBasicParser extends MtasParser {
1650 1780 /**
1651 1781 * Condition name.
1652 1782 *
1653   - * @param condition the condition
1654   - * @param not the not
  1783 + * @param condition
  1784 + * the condition
  1785 + * @param not
  1786 + * the not
1655 1787 */
1656 1788 private void conditionName(String condition, String not) {
1657 1789 HashMap<String, String> mapConstructionItem = new HashMap<String, String>();
... ... @@ -1665,10 +1797,14 @@ abstract public class MtasBasicParser extends MtasParser {
1665 1797 /**
1666 1798 * Adds the text.
1667 1799 *
1668   - * @param mappingToken the mapping token
1669   - * @param type the type
1670   - * @param prefix the prefix
1671   - * @param filter the filter
  1800 + * @param mappingToken
  1801 + * the mapping token
  1802 + * @param type
  1803 + * the type
  1804 + * @param prefix
  1805 + * the prefix
  1806 + * @param filter
  1807 + * the filter
1672 1808 */
1673 1809 private void addText(MtasParserMappingToken mappingToken, String type,
1674 1810 String prefix, String filter) {
... ... @@ -1687,11 +1823,16 @@ abstract public class MtasBasicParser extends MtasParser {
1687 1823 /**
1688 1824 * Adds the text split.
1689 1825 *
1690   - * @param mappingToken the mapping token
1691   - * @param type the type
1692   - * @param split the split
1693   - * @param prefix the prefix
1694   - * @param filter the filter
  1826 + * @param mappingToken
  1827 + * the mapping token
  1828 + * @param type
  1829 + * the type
  1830 + * @param split
  1831 + * the split
  1832 + * @param prefix
  1833 + * the prefix
  1834 + * @param filter
  1835 + * the filter
1695 1836 */
1696 1837 private void addTextSplit(MtasParserMappingToken mappingToken, String type,
1697 1838 String split, String prefix, String filter) {
... ... @@ -1711,9 +1852,12 @@ abstract public class MtasBasicParser extends MtasParser {
1711 1852 /**
1712 1853 * Condition text.
1713 1854 *
1714   - * @param condition the condition
1715   - * @param filter the filter
1716   - * @param not the not
  1855 + * @param condition
  1856 + * the condition
  1857 + * @param filter
  1858 + * the filter
  1859 + * @param not
  1860 + * the not
1717 1861 */
1718 1862 private void conditionText(String condition, String filter, String not) {
1719 1863 HashMap<String, String> mapConstructionItem = new HashMap<String, String>();
... ... @@ -1728,8 +1872,10 @@ abstract public class MtasBasicParser extends MtasParser {
1728 1872 /**
1729 1873 * Payload text.
1730 1874 *
1731   - * @param mappingToken the mapping token
1732   - * @param filter the filter
  1875 + * @param mappingToken
  1876 + * the mapping token
  1877 + * @param filter
  1878 + * the filter
1733 1879 */
1734 1880 private void payloadText(MtasParserMappingToken mappingToken,
1735 1881 String filter) {
... ... @@ -1743,11 +1889,16 @@ abstract public class MtasBasicParser extends MtasParser {
1743 1889 /**
1744 1890 * Adds the attribute.
1745 1891 *
1746   - * @param mappingToken the mapping token
1747   - * @param type the type
1748   - * @param name the name
1749   - * @param prefix the prefix
1750   - * @param filter the filter
  1892 + * @param mappingToken
  1893 + * the mapping token
  1894 + * @param type
  1895 + * the type
  1896 + * @param name
  1897 + * the name
  1898 + * @param prefix
  1899 + * the prefix
  1900 + * @param filter
  1901 + * the filter
1751 1902 */
1752 1903 private void addAttribute(MtasParserMappingToken mappingToken, String type,
1753 1904 String name, String prefix, String filter) {
... ... @@ -1769,10 +1920,14 @@ abstract public class MtasBasicParser extends MtasParser {
1769 1920 /**
1770 1921 * Condition attribute.
1771 1922 *
1772   - * @param name the name
1773   - * @param condition the condition
1774   - * @param filter the filter
1775   - * @param not the not
  1923 + * @param name
  1924 + * the name
  1925 + * @param condition
  1926 + * the condition
  1927 + * @param filter
  1928 + * the filter
  1929 + * @param not
  1930 + * the not
1776 1931 */
1777 1932 private void conditionAttribute(String name, String condition,
1778 1933 String filter, String not) {
... ... @@ -1791,9 +1946,12 @@ abstract public class MtasBasicParser extends MtasParser {
1791 1946 /**
1792 1947 * Payload attribute.
1793 1948 *
1794   - * @param mappingToken the mapping token
1795   - * @param name the name
1796   - * @param filter the filter
  1949 + * @param mappingToken
  1950 + * the mapping token
  1951 + * @param name
  1952 + * the name
  1953 + * @param filter
  1954 + * the filter
1797 1955 */
1798 1956 private void payloadAttribute(MtasParserMappingToken mappingToken,
1799 1957 String name, String filter) {
... ... @@ -1808,8 +1966,10 @@ abstract public class MtasBasicParser extends MtasParser {
1808 1966 /**
1809 1967 * Condition ancestor.
1810 1968 *
1811   - * @param ancestorType the ancestor type
1812   - * @param number the number
  1969 + * @param ancestorType
  1970 + * the ancestor type
  1971 + * @param number
  1972 + * the number
1813 1973 */
1814 1974 public void conditionAncestor(String ancestorType, String number) {
1815 1975 if (ancestorType.equals(SOURCE_ANCESTOR_GROUP)
... ... @@ -1829,12 +1989,18 @@ abstract public class MtasBasicParser extends MtasParser {
1829 1989 /**
1830 1990 * Adds the ancestor name.
1831 1991 *
1832   - * @param ancestorType the ancestor type
1833   - * @param mappingToken the mapping token
1834   - * @param type the type
1835   - * @param distance the distance
1836   - * @param prefix the prefix
1837   - * @param filter the filter
  1992 + * @param ancestorType
  1993 + * the ancestor type
  1994 + * @param mappingToken
  1995 + * the mapping token
  1996 + * @param type
  1997 + * the type
  1998 + * @param distance
  1999 + * the distance
  2000 + * @param prefix
  2001 + * the prefix
  2002 + * @param filter
  2003 + * the filter
1838 2004 */
1839 2005 private void addAncestorName(String ancestorType,
1840 2006 MtasParserMappingToken mappingToken, String type, String distance,
... ... @@ -1862,11 +2028,16 @@ abstract public class MtasBasicParser extends MtasParser {
1862 2028 /**
1863 2029 * Condition ancestor name.
1864 2030 *
1865   - * @param ancestorType the ancestor type
1866   - * @param distance the distance
1867   - * @param condition the condition
1868   - * @param filter the filter
1869   - * @param not the not
  2031 + * @param ancestorType
  2032 + * the ancestor type
  2033 + * @param distance
  2034 + * the distance
  2035 + * @param condition
  2036 + * the condition
  2037 + * @param filter
  2038 + * the filter
  2039 + * @param not
  2040 + * the not
1870 2041 */
1871 2042 public void conditionAncestorName(String ancestorType, String distance,
1872 2043 String condition, String filter, String not) {
... ... @@ -1890,13 +2061,20 @@ abstract public class MtasBasicParser extends MtasParser {
1890 2061 /**
1891 2062 * Adds the ancestor attribute.
1892 2063 *
1893   - * @param ancestorType the ancestor type
1894   - * @param mappingToken the mapping token
1895   - * @param type the type
1896   - * @param distance the distance
1897   - * @param name the name
1898   - * @param prefix the prefix
1899   - * @param filter the filter
  2064 + * @param ancestorType
  2065 + * the ancestor type
  2066 + * @param mappingToken
  2067 + * the mapping token
  2068 + * @param type
  2069 + * the type
  2070 + * @param distance
  2071 + * the distance
  2072 + * @param name
  2073 + * the name
  2074 + * @param prefix
  2075 + * the prefix
  2076 + * @param filter
  2077 + * the filter
1900 2078 */
1901 2079 public void addAncestorAttribute(String ancestorType,
1902 2080 MtasParserMappingToken mappingToken, String type, String distance,
... ... @@ -1927,12 +2105,18 @@ abstract public class MtasBasicParser extends MtasParser {
1927 2105 /**
1928 2106 * Condition ancestor attribute.
1929 2107 *
1930   - * @param ancestorType the ancestor type
1931   - * @param distance the distance
1932   - * @param name the name
1933   - * @param condition the condition
1934   - * @param filter the filter
1935   - * @param not the not
  2108 + * @param ancestorType
  2109 + * the ancestor type
  2110 + * @param distance
  2111 + * the distance
  2112 + * @param name
  2113 + * the name
  2114 + * @param condition
  2115 + * the condition
  2116 + * @param filter
  2117 + * the filter
  2118 + * @param not
  2119 + * the not
1936 2120 */
1937 2121 public void conditionAncestorAttribute(String ancestorType, String distance,
1938 2122 String name, String condition, String filter, String not) {
... ... @@ -1959,11 +2143,16 @@ abstract public class MtasBasicParser extends MtasParser {
1959 2143 /**
1960 2144 * Payload ancestor attribute.
1961 2145 *
1962   - * @param mappingToken the mapping token
1963   - * @param ancestorType the ancestor type
1964   - * @param distance the distance
1965   - * @param name the name
1966   - * @param filter the filter
  2146 + * @param mappingToken
  2147 + * the mapping token
  2148 + * @param ancestorType
  2149 + * the ancestor type
  2150 + * @param distance
  2151 + * the distance
  2152 + * @param name
  2153 + * the name
  2154 + * @param filter
  2155 + * the filter
1967 2156 */
1968 2157 private void payloadAncestorAttribute(MtasParserMappingToken mappingToken,
1969 2158 String ancestorType, String distance, String name, String filter) {
... ... @@ -1988,9 +2177,11 @@ abstract public class MtasBasicParser extends MtasParser {
1988 2177 /**
1989 2178 * Compute ancestor source type.
1990 2179 *
1991   - * @param type the type
  2180 + * @param type
  2181 + * the type
1992 2182 * @return the string
1993   - * @throws MtasConfigException the mtas config exception
  2183 + * @throws MtasConfigException
  2184 + * the mtas config exception
1994 2185 */
1995 2186 private String computeAncestorSourceType(String type)
1996 2187 throws MtasConfigException {
... ... @@ -2014,7 +2205,8 @@ abstract public class MtasBasicParser extends MtasParser {
2014 2205 /**
2015 2206 * Compute distance.
2016 2207 *
2017   - * @param distance the distance
  2208 + * @param distance
  2209 + * the distance
2018 2210 * @return the string
2019 2211 */
2020 2212 private String computeDistance(String distance) {
... ... @@ -2033,7 +2225,8 @@ abstract public class MtasBasicParser extends MtasParser {
2033 2225 /**
2034 2226 * Compute number.
2035 2227 *
2036   - * @param number the number
  2228 + * @param number
  2229 + * the number
2037 2230 * @return the string
2038 2231 */
2039 2232 private String computeNumber(String number) {
... ...
src/mtas/analysis/parser/MtasCRMParser.java 0 โ†’ 100644
  1 +package mtas.analysis.parser;
  2 +
  3 +import java.io.IOException;
  4 +import java.io.Reader;
  5 +import java.util.ArrayList;
  6 +import java.util.Arrays;
  7 +import java.util.Collection;
  8 +import java.util.HashMap;
  9 +import java.util.HashSet;
  10 +import java.util.TreeSet;
  11 +import java.util.Map.Entry;
  12 +import java.util.concurrent.atomic.AtomicInteger;
  13 +import java.util.regex.Matcher;
  14 +import java.util.regex.Pattern;
  15 +
  16 +import mtas.analysis.token.MtasToken;
  17 +import mtas.analysis.token.MtasTokenCollection;
  18 +import mtas.analysis.util.MtasBufferedReader;
  19 +import mtas.analysis.util.MtasConfigException;
  20 +import mtas.analysis.util.MtasConfiguration;
  21 +import mtas.analysis.util.MtasParserException;
  22 +
  23 +/**
  24 + * The Class MtasCRMParser.
  25 + */
  26 +
  27 +public class MtasCRMParser extends MtasBasicParser {
  28 +
  29 + /** The word type. */
  30 + private MtasParserType wordType = null;
  31 +
  32 + /** The word annotation types. */
  33 + private HashMap<String, MtasParserType> wordAnnotationTypes = new HashMap<String, MtasParserType>();
  34 +
  35 + /** The crm sentence types. */
  36 + private HashMap<String, MtasParserType> crmSentenceTypes = new HashMap<String, MtasParserType>();
  37 +
  38 + /** The crm clause types. */
  39 + private HashMap<String, MtasParserType> crmClauseTypes = new HashMap<String, MtasParserType>();
  40 +
  41 + /** The crm pair types. */
  42 + private HashMap<String, MtasParserType> crmPairTypes = new HashMap<String, MtasParserType>();
  43 +
  44 + /** The functions. */
  45 + private HashMap<String, HashMap<String, MtasCRMParserFunction>> functions = new HashMap<String, HashMap<String, MtasCRMParserFunction>>();
  46 +
  47 + /** The Constant MAPPING_TYPE_CRM_SENTENCE. */
  48 + protected final static String MAPPING_TYPE_CRM_SENTENCE = "crmSentence";
  49 +
  50 + /** The Constant MAPPING_TYPE_CRM_CLAUSE. */
  51 + protected final static String MAPPING_TYPE_CRM_CLAUSE = "crmClause";
  52 +
  53 + /** The Constant MAPPING_TYPE_CRM_PAIR. */
  54 + protected final static String MAPPING_TYPE_CRM_PAIR = "crmPair";
  55 +
  56 + /** The history pair. */
  57 + private HashMap<String, HashMap<String, MtasParserObject>> historyPair = new HashMap<String, HashMap<String, MtasParserObject>>();
  58 +
  59 + /** The pair pattern. */
  60 + Pattern pairPattern = Pattern.compile("^([b|e])([a-z])([0-9]+)$");
  61 +
  62 + /**
  63 + * Instantiates a new mtas crm parser.
  64 + *
  65 + * @param config
  66 + * the config
  67 + */
  68 + public MtasCRMParser(MtasConfiguration config) {
  69 + super(config);
  70 + try {
  71 + initParser();
  72 + // System.out.print(printConfig());
  73 + } catch (MtasConfigException e) {
  74 + e.printStackTrace();
  75 + }
  76 + }
  77 +
  78 + /*
  79 + * (non-Javadoc)
  80 + *
  81 + * @see mtas.analysis.parser.MtasParser#initParser()
  82 + */
  83 + @SuppressWarnings("unchecked")
  84 + @Override
  85 + protected void initParser() throws MtasConfigException {
  86 + super.initParser();
  87 + if (config != null) {
  88 + // always word, no mappings
  89 + wordType = new MtasParserType(MAPPING_TYPE_WORD, null, false);
  90 + for (int i = 0; i < config.children.size(); i++) {
  91 + MtasConfiguration current = config.children.get(i);
  92 + if (current.name.equals("mappings")) {
  93 + for (int j = 0; j < current.children.size(); j++) {
  94 + if (current.children.get(j).name.equals("mapping")) {
  95 + MtasConfiguration mapping = current.children.get(j);
  96 + String typeMapping = mapping.attributes.get("type");
  97 + String nameMapping = mapping.attributes.get("name");
  98 + if ((typeMapping != null)) {
  99 + if (typeMapping.equals(MAPPING_TYPE_WORD)) {
  100 + MtasCRMParserMappingWordAnnotation m = new MtasCRMParserMappingWordAnnotation();
  101 + m.processConfig(mapping);
  102 + wordType.addMapping(m);
  103 + } else if (typeMapping.equals(MAPPING_TYPE_WORD_ANNOTATION)
  104 + && (nameMapping != null)) {
  105 + MtasCRMParserMappingWordAnnotation m = new MtasCRMParserMappingWordAnnotation();
  106 + m.processConfig(mapping);
  107 + if (wordAnnotationTypes.containsKey(nameMapping)) {
  108 + wordAnnotationTypes.get(nameMapping).addMapping(m);
  109 + } else {
  110 + MtasParserType t = new MtasParserType(typeMapping,
  111 + nameMapping, false);
  112 + t.addMapping(m);
  113 + wordAnnotationTypes.put(nameMapping, t);
  114 + }
  115 + } else if (typeMapping.equals(MAPPING_TYPE_CRM_SENTENCE)) {
  116 + MtasCRMParserMappingCRMSentence m = new MtasCRMParserMappingCRMSentence();
  117 + m.processConfig(mapping);
  118 + if (crmSentenceTypes.containsKey(nameMapping)) {
  119 + crmSentenceTypes.get(nameMapping).addMapping(m);
  120 + } else {
  121 + MtasParserType t = new MtasParserType(MAPPING_TYPE_GROUP,
  122 + nameMapping, true);
  123 + t.addMapping(m);
  124 + crmSentenceTypes.put(nameMapping, t);
  125 + }
  126 + } else if (typeMapping.equals(MAPPING_TYPE_CRM_CLAUSE)) {
  127 + MtasCRMParserMappingCRMSentence m = new MtasCRMParserMappingCRMSentence();
  128 + m.processConfig(mapping);
  129 + if (crmClauseTypes.containsKey(nameMapping)) {
  130 + crmClauseTypes.get(nameMapping).addMapping(m);
  131 + } else {
  132 + MtasParserType t = new MtasParserType(MAPPING_TYPE_GROUP,
  133 + nameMapping, true);
  134 + t.addMapping(m);
  135 + crmClauseTypes.put(nameMapping, t);
  136 + }
  137 + } else if (typeMapping.equals(MAPPING_TYPE_CRM_PAIR)) {
  138 + MtasCRMParserMappingCRMPair m = new MtasCRMParserMappingCRMPair();
  139 + m.processConfig(mapping);
  140 + if (crmPairTypes.containsKey(nameMapping)) {
  141 + crmPairTypes.get(nameMapping).addMapping(m);
  142 + } else {
  143 + MtasParserType t = new MtasParserType(MAPPING_TYPE_RELATION,
  144 + nameMapping, true);
  145 + t.addMapping(m);
  146 + crmPairTypes.put(nameMapping, t);
  147 + }
  148 + } else {
  149 + throw new MtasConfigException("unknown mapping type "
  150 + + typeMapping + " or missing name");
  151 + }
  152 + }
  153 + }
  154 + }
  155 + } else if (current.name.equals("functions")) {
  156 + for (int j = 0; j < current.children.size(); j++) {
  157 + if (current.children.get(j).name.equals("function")) {
  158 + MtasConfiguration function = current.children.get(j);
  159 + String nameFunction = function.attributes.get("name");
  160 + String typeFunction = function.attributes.get("type");
  161 + String splitFunction = function.attributes.get("split");
  162 + if (nameFunction != null && typeFunction != null) {
  163 + MtasCRMParserFunction mtasCRMParserFunction = new MtasCRMParserFunction(
  164 + typeFunction, splitFunction);
  165 + if (!functions.containsKey(typeFunction)) {
  166 + functions.put(typeFunction,
  167 + new HashMap<String, MtasCRMParserFunction>());
  168 + }
  169 + functions.get(typeFunction).put(nameFunction,
  170 + mtasCRMParserFunction);
  171 + MtasConfiguration subCurrent = current.children.get(j);
  172 + for (int k = 0; k < subCurrent.children.size(); k++) {
  173 + if (subCurrent.children.get(k).name.equals("condition")) {
  174 + MtasConfiguration subSubCurrent = subCurrent.children
  175 + .get(k);
  176 + if (subSubCurrent.attributes.containsKey("value")) {
  177 + String[] valuesCondition = subSubCurrent.attributes
  178 + .get("value").split(Pattern.quote(","));
  179 + ArrayList<MtasCRMParserFunctionOutput> valueOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  180 + for (int l = 0; l < subSubCurrent.children.size(); l++) {
  181 + if (subSubCurrent.children.get(l).name
  182 + .equals("output")) {
  183 + String valueOutput = subSubCurrent.children
  184 + .get(l).attributes.get("value");
  185 + String nameOutput = subSubCurrent.children
  186 + .get(l).attributes.get("name");
  187 + if (nameOutput != null) {
  188 + MtasCRMParserFunctionOutput o = new MtasCRMParserFunctionOutput(
  189 + nameOutput, valueOutput);
  190 + valueOutputList.add(o);
  191 + }
  192 + }
  193 + }
  194 + if (valueOutputList.size() > 0) {
  195 + for (String valueCondition : valuesCondition) {
  196 + if (mtasCRMParserFunction.output
  197 + .containsKey(valueCondition)) {
  198 + mtasCRMParserFunction.output.get(valueCondition)
  199 + .addAll(
  200 + (Collection<? extends MtasCRMParserFunctionOutput>) valueOutputList
  201 + .clone());
  202 + } else {
  203 + mtasCRMParserFunction.output.put(valueCondition,
  204 + (ArrayList<MtasCRMParserFunctionOutput>) valueOutputList
  205 + .clone());
  206 + }
  207 + }
  208 + }
  209 + }
  210 + }
  211 + }
  212 + }
  213 + }
  214 + }
  215 + }
  216 + }
  217 + }
  218 + }
  219 +
  220 + /*
  221 + * (non-Javadoc)
  222 + *
  223 + * @see mtas.analysis.parser.MtasParser#createTokenCollection(java.io.Reader)
  224 + */
  225 + @Override
  226 + public MtasTokenCollection createTokenCollection(Reader reader)
  227 + throws MtasParserException, MtasConfigException {
  228 + AtomicInteger position = new AtomicInteger(0);
  229 + Integer unknownAncestors = 0;
  230 +
  231 + HashMap<String, TreeSet<Integer>> idPositions = new HashMap<String, TreeSet<Integer>>();
  232 + HashMap<String, Integer[]> idOffsets = new HashMap<String, Integer[]>();
  233 +
  234 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList = new HashMap<String, HashMap<Integer, HashSet<String>>>();
  235 + updateList.put(UPDATE_TYPE_OFFSET, new HashMap<Integer, HashSet<String>>());
  236 + updateList.put(UPDATE_TYPE_POSITION,
  237 + new HashMap<Integer, HashSet<String>>());
  238 +
  239 + HashMap<String, ArrayList<MtasParserObject>> currentList = new HashMap<String, ArrayList<MtasParserObject>>();
  240 + currentList.put(MAPPING_TYPE_RELATION, new ArrayList<MtasParserObject>());
  241 + currentList.put(MAPPING_TYPE_RELATION_ANNOTATION,
  242 + new ArrayList<MtasParserObject>());
  243 + currentList.put(MAPPING_TYPE_REF, new ArrayList<MtasParserObject>());
  244 + currentList.put(MAPPING_TYPE_GROUP, new ArrayList<MtasParserObject>());
  245 + currentList.put(MAPPING_TYPE_GROUP_ANNOTATION,
  246 + new ArrayList<MtasParserObject>());
  247 + currentList.put(MAPPING_TYPE_WORD, new ArrayList<MtasParserObject>());
  248 + currentList.put(MAPPING_TYPE_WORD_ANNOTATION,
  249 + new ArrayList<MtasParserObject>());
  250 +
  251 + tokenCollection = new MtasTokenCollection();
  252 + MtasToken.resetId();
  253 + try (MtasBufferedReader br = new MtasBufferedReader(reader)) {
  254 + String line;
  255 + int currentOffset, previousOffset = br.getPosition();
  256 + MtasParserObject currentObject;
  257 + Pattern headerPattern = Pattern.compile("^@ @ @(.*)$");
  258 + Pattern regularPattern = Pattern.compile(
  259 + "^([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+)$");
  260 + Matcher matcherHeader, matcherRegular = null;
  261 + HashSet<MtasParserObject> newPreviousSentence = new HashSet<MtasParserObject>(),
  262 + previousSentence = new HashSet<MtasParserObject>();
  263 + HashSet<MtasParserObject> newPreviousClause = new HashSet<MtasParserObject>(),
  264 + previousClause = new HashSet<MtasParserObject>();
  265 + while ((line = br.readLine()) != null) {
  266 + currentOffset = br.getPosition();
  267 + matcherHeader = headerPattern.matcher(line.trim());
  268 + matcherRegular = regularPattern.matcher(line.trim());
  269 + if (matcherRegular.matches()) {
  270 + newPreviousSentence.clear();
  271 + for (int i = 4; i < 8; i++) {
  272 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  273 + HashSet<MtasParserObject> tmpList = processCRMSentence(
  274 + String.valueOf(i), matcherRegular.group((i + 1)), currentOffset,
  275 + functionOutputList, unknownAncestors, currentList, updateList,
  276 + idPositions, idOffsets, previousSentence, previousClause);
  277 + if (tmpList != null) {
  278 + newPreviousSentence.addAll(tmpList);
  279 + }
  280 + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) {
  281 + tmpList = processCRMSentence(functionOutput.name,
  282 + functionOutput.value, currentOffset, functionOutputList,
  283 + unknownAncestors, currentList, updateList, idPositions,
  284 + idOffsets, previousSentence, previousClause);
  285 + if (tmpList != null) {
  286 + newPreviousSentence.addAll(tmpList);
  287 + }
  288 + }
  289 + }
  290 + if (newPreviousSentence.size() > 0) {
  291 + previousSentence.clear();
  292 + previousSentence.addAll(newPreviousSentence);
  293 + }
  294 + newPreviousClause.clear();
  295 + for (int i = 4; i < 8; i++) {
  296 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  297 + HashSet<MtasParserObject> tmpList = processCRMClause(
  298 + String.valueOf(i), matcherRegular.group((i + 1)), currentOffset,
  299 + functionOutputList, unknownAncestors, currentList, updateList,
  300 + idPositions, idOffsets, previousClause);
  301 + if (tmpList != null) {
  302 + newPreviousClause.addAll(tmpList);
  303 + }
  304 + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) {
  305 + tmpList = processCRMClause(functionOutput.name,
  306 + functionOutput.value, currentOffset, functionOutputList,
  307 + unknownAncestors, currentList, updateList, idPositions,
  308 + idOffsets, previousClause);
  309 + if (tmpList != null) {
  310 + newPreviousClause.addAll(tmpList);
  311 + }
  312 + }
  313 + }
  314 + if (newPreviousClause.size() > 0) {
  315 + previousClause.clear();
  316 + previousClause.addAll(newPreviousClause);
  317 + }
  318 + }
  319 +
  320 + if (matcherRegular.matches() && !matcherHeader.matches()) {
  321 + matcherRegular = regularPattern.matcher(line.trim());
  322 + if (matcherRegular.matches()) {
  323 + // regular line - start word
  324 + currentObject = new MtasParserObject(wordType);
  325 + currentObject.setOffsetStart(previousOffset);
  326 + currentObject.setRealOffsetStart(previousOffset);
  327 + currentObject.setUnknownAncestorNumber(unknownAncestors);
  328 + if (!prevalidateObject(currentObject, currentList)) {
  329 + unknownAncestors++;
  330 + } else {
  331 + int p = position.getAndIncrement();
  332 + currentObject.addPosition(p);
  333 + currentObject.objectId = "word_" + String.valueOf(p);
  334 + currentList.get(MAPPING_TYPE_WORD).add(currentObject);
  335 + unknownAncestors = 0;
  336 + // check for crmPair
  337 + for (int i = 0; i < 8; i++) {
  338 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  339 + processCRMPair(p, String.valueOf(i),
  340 + matcherRegular.group((i + 1)), currentOffset,
  341 + functionOutputList, unknownAncestors, currentList,
  342 + updateList, idPositions, idOffsets);
  343 + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) {
  344 + processCRMPair(p, functionOutput.name, functionOutput.value,
  345 + currentOffset, functionOutputList, unknownAncestors,
  346 + currentList, updateList, idPositions, idOffsets);
  347 + }
  348 + }
  349 + // compute word annotations
  350 + for (int i = 0; i < 8; i++) {
  351 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  352 + functionOutputList.addAll(processWordAnnotation(
  353 + String.valueOf(i), matcherRegular.group((i + 1)),
  354 + previousOffset, currentOffset, unknownAncestors,
  355 + currentList, updateList, idPositions, idOffsets));
  356 + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) {
  357 + processWordAnnotation(functionOutput.name,
  358 + functionOutput.value, previousOffset, currentOffset,
  359 + unknownAncestors, currentList, updateList, idPositions,
  360 + idOffsets);
  361 + }
  362 + }
  363 + }
  364 + // finish word
  365 + if (unknownAncestors > 0) {
  366 + unknownAncestors--;
  367 + } else {
  368 + currentObject = currentList.get(MAPPING_TYPE_WORD)
  369 + .remove(currentList.get(MAPPING_TYPE_WORD).size() - 1);
  370 + assert unknownAncestors == 0 : "error in administration "
  371 + + currentObject.getType().getName();
  372 + currentObject.setText(null);
  373 + currentObject.setOffsetEnd(currentOffset - 1);
  374 + currentObject.setRealOffsetEnd(currentOffset - 1);
  375 + // update ancestor groups with position and offset
  376 + for (MtasParserObject currentGroup : currentList
  377 + .get(MAPPING_TYPE_GROUP)) {
  378 + currentGroup.addPositions(currentObject.getPositions());
  379 + currentGroup.addOffsetStart(currentObject.getOffsetStart());
  380 + currentGroup.addOffsetEnd(currentObject.getOffsetEnd());
  381 + }
  382 + idPositions.put(currentObject.getId(),
  383 + currentObject.getPositions());
  384 + idOffsets.put(currentObject.getId(), currentObject.getOffset());
  385 + currentObject.updateMappings(idPositions, idOffsets);
  386 + unknownAncestors = currentObject.getUnknownAncestorNumber();
  387 + computeMappingsFromObject(currentObject, currentList, updateList);
  388 + }
  389 +
  390 + } else {
  391 + // System.out.println("PROBLEM: " + line);
  392 + }
  393 + }
  394 + previousOffset = br.getPosition();
  395 + }
  396 + closePrevious(previousSentence, previousOffset, unknownAncestors,
  397 + currentList, updateList, idPositions, idOffsets);
  398 + closePrevious(previousClause, previousOffset, unknownAncestors,
  399 + currentList, updateList, idPositions, idOffsets);
  400 + } catch (IOException e) {
  401 + throw new MtasParserException(e.getMessage());
  402 + }
  403 + // final check
  404 + tokenCollection.check(autorepair, makeunique);
  405 + return tokenCollection;
  406 +
  407 + }
  408 +
  409 + /**
  410 + * Process word annotation.
  411 + *
  412 + * @param name
  413 + * the name
  414 + * @param text
  415 + * the text
  416 + * @param previousOffset
  417 + * the previous offset
  418 + * @param currentOffset
  419 + * the current offset
  420 + * @param unknownAncestors
  421 + * the unknown ancestors
  422 + * @param currentList
  423 + * the current list
  424 + * @param updateList
  425 + * the update list
  426 + * @param idPositions
  427 + * the id positions
  428 + * @param idOffsets
  429 + * the id offsets
  430 + * @return the array list
  431 + * @throws MtasParserException
  432 + * the mtas parser exception
  433 + * @throws MtasConfigException
  434 + * the mtas config exception
  435 + */
  436 + private ArrayList<MtasCRMParserFunctionOutput> processWordAnnotation(
  437 + String name, String text, Integer previousOffset, Integer currentOffset,
  438 + Integer unknownAncestors,
  439 + HashMap<String, ArrayList<MtasParserObject>> currentList,
  440 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList,
  441 + HashMap<String, TreeSet<Integer>> idPositions,
  442 + HashMap<String, Integer[]> idOffsets)
  443 + throws MtasParserException, MtasConfigException {
  444 + MtasParserType tmpCurrentType;
  445 + MtasParserObject currentObject;
  446 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  447 + if ((tmpCurrentType = wordAnnotationTypes.get(name)) != null) {
  448 + // start word annotation
  449 + currentObject = new MtasParserObject(tmpCurrentType);
  450 + currentObject.setRealOffsetStart(previousOffset);
  451 + currentObject.addPositions(currentList.get(MAPPING_TYPE_WORD)
  452 + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1)).getPositions());
  453 + currentObject.setUnknownAncestorNumber(unknownAncestors);
  454 + if (!prevalidateObject(currentObject, currentList)) {
  455 + unknownAncestors++;
  456 + } else {
  457 + currentList.get(MAPPING_TYPE_WORD_ANNOTATION).add(currentObject);
  458 + unknownAncestors = 0;
  459 + }
  460 + // finish word annotation
  461 + if (unknownAncestors > 0) {
  462 + unknownAncestors--;
  463 + } else {
  464 + currentObject = currentList.get(MAPPING_TYPE_WORD_ANNOTATION)
  465 + .remove(currentList.get(MAPPING_TYPE_WORD_ANNOTATION).size() - 1);
  466 + assert unknownAncestors == 0 : "error in administration "
  467 + + currentObject.getType().getName();
  468 + if (functions.containsKey(MAPPING_TYPE_WORD_ANNOTATION)
  469 + && functions.get(MAPPING_TYPE_WORD_ANNOTATION).containsKey(name)
  470 + && text != null) {
  471 + MtasCRMParserFunction function = functions
  472 + .get(MAPPING_TYPE_WORD_ANNOTATION).get(name);
  473 + String[] value;
  474 + if (function.split != null) {
  475 + value = text.split(Pattern.quote(function.split));
  476 + } else {
  477 + value = new String[] { text };
  478 + }
  479 + for (int c = 0; c < value.length; c++) {
  480 + if (function.output.containsKey(value[c])) {
  481 + functionOutputList.addAll(function.output.get(value[c]));
  482 + }
  483 + }
  484 + }
  485 + currentObject.setText(text);
  486 + currentObject.setRealOffsetEnd(currentOffset - 1);
  487 + idPositions.put(currentObject.getId(), currentObject.getPositions());
  488 + idOffsets.put(currentObject.getId(), currentObject.getOffset());
  489 + // offset always null, so update later with word (should be possible)
  490 + if ((currentObject.getId() != null)
  491 + && (currentList.get(MAPPING_TYPE_WORD).size() > 0)) {
  492 + currentList.get(MAPPING_TYPE_WORD)
  493 + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1))
  494 + .addUpdateableIdWithOffset(currentObject.getId());
  495 + }
  496 + currentObject.updateMappings(idPositions, idOffsets);
  497 + unknownAncestors = currentObject.getUnknownAncestorNumber();
  498 + computeMappingsFromObject(currentObject, currentList, updateList);
  499 + }
  500 + }
  501 + return functionOutputList;
  502 + }
  503 +
  504 + /**
  505 + * Process crm sentence.
  506 + *
  507 + * @param name
  508 + * the name
  509 + * @param text
  510 + * the text
  511 + * @param currentOffset
  512 + * the current offset
  513 + * @param functionOutputList
  514 + * the function output list
  515 + * @param unknownAncestors
  516 + * the unknown ancestors
  517 + * @param currentList
  518 + * the current list
  519 + * @param updateList
  520 + * the update list
  521 + * @param idPositions
  522 + * the id positions
  523 + * @param idOffsets
  524 + * the id offsets
  525 + * @param previous
  526 + * the previous
  527 + * @param previousClause
  528 + * the previous clause
  529 + * @return the hash set
  530 + * @throws MtasParserException
  531 + * the mtas parser exception
  532 + * @throws MtasConfigException
  533 + * the mtas config exception
  534 + */
  535 + private HashSet<MtasParserObject> processCRMSentence(String name, String text,
  536 + Integer currentOffset,
  537 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList,
  538 + Integer unknownAncestors,
  539 + HashMap<String, ArrayList<MtasParserObject>> currentList,
  540 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList,
  541 + HashMap<String, TreeSet<Integer>> idPositions,
  542 + HashMap<String, Integer[]> idOffsets, HashSet<MtasParserObject> previous,
  543 + HashSet<MtasParserObject> previousClause)
  544 + throws MtasParserException, MtasConfigException {
  545 + MtasParserType tmpCurrentType;
  546 + MtasParserObject currentObject;
  547 + if ((tmpCurrentType = crmSentenceTypes.get(name)) != null) {
  548 + currentObject = new MtasParserObject(tmpCurrentType);
  549 + currentObject.setUnknownAncestorNumber(unknownAncestors);
  550 + currentObject.setRealOffsetStart(currentOffset);
  551 + currentObject.setText(text);
  552 + if (!prevalidateObject(currentObject, currentList)) {
  553 + return null;
  554 + } else {
  555 + closePrevious(previousClause, currentOffset, unknownAncestors,
  556 + currentList, updateList, idPositions, idOffsets);
  557 + closePrevious(previous, currentOffset, unknownAncestors, currentList,
  558 + updateList, idPositions, idOffsets);
  559 + previous.clear();
  560 + currentList.get(MAPPING_TYPE_GROUP).add(currentObject);
  561 + unknownAncestors = 0;
  562 + return new HashSet<MtasParserObject>(Arrays.asList(currentObject));
  563 + }
  564 + }
  565 + return null;
  566 + }
  567 +
  568 + /**
  569 + * Process crm clause.
  570 + *
  571 + * @param name
  572 + * the name
  573 + * @param text
  574 + * the text
  575 + * @param currentOffset
  576 + * the current offset
  577 + * @param functionOutputList
  578 + * the function output list
  579 + * @param unknownAncestors
  580 + * the unknown ancestors
  581 + * @param currentList
  582 + * the current list
  583 + * @param updateList
  584 + * the update list
  585 + * @param idPositions
  586 + * the id positions
  587 + * @param idOffsets
  588 + * the id offsets
  589 + * @param previous
  590 + * the previous
  591 + * @return the hash set
  592 + * @throws MtasParserException
  593 + * the mtas parser exception
  594 + * @throws MtasConfigException
  595 + * the mtas config exception
  596 + */
  597 + private HashSet<MtasParserObject> processCRMClause(String name, String text,
  598 + Integer currentOffset,
  599 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList,
  600 + Integer unknownAncestors,
  601 + HashMap<String, ArrayList<MtasParserObject>> currentList,
  602 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList,
  603 + HashMap<String, TreeSet<Integer>> idPositions,
  604 + HashMap<String, Integer[]> idOffsets, HashSet<MtasParserObject> previous)
  605 + throws MtasParserException, MtasConfigException {
  606 + MtasParserType tmpCurrentType;
  607 + MtasParserObject currentObject;
  608 + if ((tmpCurrentType = crmClauseTypes.get(name)) != null) {
  609 + currentObject = new MtasParserObject(tmpCurrentType);
  610 + currentObject.setUnknownAncestorNumber(unknownAncestors);
  611 + currentObject.setRealOffsetStart(currentOffset);
  612 + currentObject.setText(text);
  613 + if (!prevalidateObject(currentObject, currentList)) {
  614 + return null;
  615 + } else {
  616 + closePrevious(previous, currentOffset, unknownAncestors, currentList,
  617 + updateList, idPositions, idOffsets);
  618 + previous.clear();
  619 + currentList.get(MAPPING_TYPE_GROUP).add(currentObject);
  620 + unknownAncestors = 0;
  621 + return new HashSet<MtasParserObject>(Arrays.asList(currentObject));
  622 + }
  623 + }
  624 + return null;
  625 + }
  626 +
  627 + /**
  628 + * Close previous.
  629 + *
  630 + * @param previous
  631 + * the previous
  632 + * @param currentOffset
  633 + * the current offset
  634 + * @param unknownAncestors
  635 + * the unknown ancestors
  636 + * @param currentList
  637 + * the current list
  638 + * @param updateList
  639 + * the update list
  640 + * @param idPositions
  641 + * the id positions
  642 + * @param idOffsets
  643 + * the id offsets
  644 + * @throws MtasParserException
  645 + * the mtas parser exception
  646 + * @throws MtasConfigException
  647 + * the mtas config exception
  648 + */
  649 + private void closePrevious(HashSet<MtasParserObject> previous,
  650 + Integer currentOffset, Integer unknownAncestors,
  651 + HashMap<String, ArrayList<MtasParserObject>> currentList,
  652 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList,
  653 + HashMap<String, TreeSet<Integer>> idPositions,
  654 + HashMap<String, Integer[]> idOffsets)
  655 + throws MtasParserException, MtasConfigException {
  656 + for (MtasParserObject previousObject : previous) {
  657 + previousObject.setRealOffsetEnd(currentOffset);
  658 + idPositions.put(previousObject.getId(), previousObject.getPositions());
  659 + idOffsets.put(previousObject.getId(), previousObject.getOffset());
  660 + previousObject.updateMappings(idPositions, idOffsets);
  661 + unknownAncestors = previousObject.getUnknownAncestorNumber();
  662 + computeMappingsFromObject(previousObject, currentList, updateList);
  663 + currentList.get(MAPPING_TYPE_GROUP).remove(previousObject);
  664 + }
  665 + }
  666 +
  667 + /**
  668 + * Process crm pair.
  669 + *
  670 + * @param position
  671 + * the position
  672 + * @param name
  673 + * the name
  674 + * @param text
  675 + * the text
  676 + * @param currentOffset
  677 + * the current offset
  678 + * @param functionOutputList
  679 + * the function output list
  680 + * @param unknownAncestors
  681 + * the unknown ancestors
  682 + * @param currentList
  683 + * the current list
  684 + * @param updateList
  685 + * the update list
  686 + * @param idPositions
  687 + * the id positions
  688 + * @param idOffsets
  689 + * the id offsets
  690 + * @throws MtasParserException
  691 + * the mtas parser exception
  692 + * @throws MtasConfigException
  693 + * the mtas config exception
  694 + */
  695 + private void processCRMPair(int position, String name, String text,
  696 + Integer currentOffset,
  697 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList,
  698 + Integer unknownAncestors,
  699 + HashMap<String, ArrayList<MtasParserObject>> currentList,
  700 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList,
  701 + HashMap<String, TreeSet<Integer>> idPositions,
  702 + HashMap<String, Integer[]> idOffsets)
  703 + throws MtasParserException, MtasConfigException {
  704 +
  705 + MtasParserType tmpCurrentType;
  706 + MtasParserObject currentObject;
  707 +
  708 + if ((tmpCurrentType = crmPairTypes.get(name)) != null) {
  709 + if ((tmpCurrentType = crmPairTypes.get(name)) != null) {
  710 + // get history
  711 + HashMap<String, MtasParserObject> currentNamePairHistory;
  712 + if (!historyPair.containsKey(name)) {
  713 + currentNamePairHistory = new HashMap<String, MtasParserObject>();
  714 + historyPair.put(name, currentNamePairHistory);
  715 + } else {
  716 + currentNamePairHistory = historyPair.get(name);
  717 + }
  718 + Matcher m = pairPattern.matcher(text);
  719 + if (m.find()) {
  720 + String thisKey = m.group(1) + m.group(2);
  721 + String otherKey = (m.group(1).equals("b") ? "e" : "b") + m.group(2);
  722 + if (currentNamePairHistory.containsKey(otherKey)) {
  723 + currentObject = currentNamePairHistory.remove(otherKey);
  724 + currentObject.setText(currentObject.getText() + "+" + text);
  725 + currentObject.addPosition(position);
  726 + processFunctions(name, text, MAPPING_TYPE_CRM_PAIR,
  727 + functionOutputList);
  728 + currentObject.setRealOffsetEnd(currentOffset + 1);
  729 + currentObject.setOffsetEnd(currentOffset + 1);
  730 + idPositions.put(currentObject.getId(),
  731 + currentObject.getPositions());
  732 + idOffsets.put(currentObject.getId(), currentObject.getOffset());
  733 + currentObject.updateMappings(idPositions, idOffsets);
  734 + unknownAncestors = currentObject.getUnknownAncestorNumber();
  735 + computeMappingsFromObject(currentObject, currentList, updateList);
  736 + } else {
  737 + currentObject = new MtasParserObject(tmpCurrentType);
  738 + currentObject.setUnknownAncestorNumber(unknownAncestors);
  739 + currentObject.setRealOffsetStart(currentOffset);
  740 + currentObject.setOffsetStart(currentOffset);
  741 + currentObject.setText(text);
  742 + currentObject.addPosition(position);
  743 + if (!prevalidateObject(currentObject, currentList)) {
  744 + unknownAncestors++;
  745 + } else {
  746 + currentNamePairHistory.put(thisKey, currentObject);
  747 + processFunctions(name, text, MAPPING_TYPE_CRM_PAIR,
  748 + functionOutputList);
  749 + currentObject.setRealOffsetEnd(currentOffset + 1);
  750 + currentObject.setOffsetEnd(currentOffset + 1);
  751 + idPositions.put(currentObject.getId(),
  752 + currentObject.getPositions());
  753 + idOffsets.put(currentObject.getId(), currentObject.getOffset());
  754 + // offset always null, so update later with word (should be
  755 + // possible)
  756 + if ((currentObject.getId() != null)
  757 + && (currentList.get(MAPPING_TYPE_WORD).size() > 0)) {
  758 + currentList.get(MAPPING_TYPE_WORD)
  759 + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1))
  760 + .addUpdateableIdWithOffset(currentObject.getId());
  761 + }
  762 +
  763 + }
  764 + }
  765 + }
  766 + }
  767 + }
  768 +
  769 + }
  770 +
  771 + /**
  772 + * Process functions.
  773 + *
  774 + * @param name
  775 + * the name
  776 + * @param text
  777 + * the text
  778 + * @param type
  779 + * the type
  780 + * @param functionOutputList
  781 + * the function output list
  782 + */
  783 + private void processFunctions(String name, String text, String type,
  784 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList) {
  785 + if (functions.containsKey(type) && functions.get(type).containsKey(name)
  786 + && text != null) {
  787 + if (functions.get(type).containsKey(name)) {
  788 + MtasCRMParserFunction function = functions.get(type).get(name);
  789 + String[] value;
  790 + if (function.split != null) {
  791 + value = text.split(Pattern.quote(function.split));
  792 + } else {
  793 + value = new String[] { text };
  794 + }
  795 + for (int c = 0; c < value.length; c++) {
  796 + boolean checkedEmpty = false;
  797 + if (value[c].equals("")) {
  798 + checkedEmpty = true;
  799 + }
  800 + if (function.output.containsKey(value[c])) {
  801 + ArrayList<MtasCRMParserFunctionOutput> list = function.output
  802 + .get(value[c]);
  803 + for (MtasCRMParserFunctionOutput listItem : list) {
  804 + functionOutputList.add(listItem.create(value[c]));
  805 + }
  806 + }
  807 + if (!checkedEmpty && function.output.containsKey("")) {
  808 + ArrayList<MtasCRMParserFunctionOutput> list = function.output
  809 + .get("");
  810 + for (MtasCRMParserFunctionOutput listItem : list) {
  811 + functionOutputList.add(listItem.create(value[c]));
  812 + }
  813 + }
  814 + }
  815 + }
  816 + }
  817 + }
  818 +
  819 + /*
  820 + * (non-Javadoc)
  821 + *
  822 + * @see mtas.analysis.parser.MtasParser#printConfig()
  823 + */
  824 + @Override
  825 + public String printConfig() {
  826 + String text = "";
  827 + text += "=== CONFIGURATION ===\n";
  828 + text += "type: " + wordAnnotationTypes.size() + " x wordAnnotation";
  829 + text += printConfigTypes(wordAnnotationTypes);
  830 + text += "=== CONFIGURATION ===\n";
  831 + return text;
  832 + }
  833 +
  834 + /**
  835 + * Prints the config types.
  836 + *
  837 + * @param types
  838 + * the types
  839 + * @return the string
  840 + */
  841 + private String printConfigTypes(HashMap<?, MtasParserType> types) {
  842 + String text = "";
  843 + for (Entry<?, MtasParserType> entry : types.entrySet()) {
  844 + text += "- " + entry.getKey() + ": " + entry.getValue().mappings.size()
  845 + + " mapping(s)\n";
  846 + for (int i = 0; i < entry.getValue().mappings.size(); i++) {
  847 + text += "\t" + entry.getValue().mappings.get(i) + "\n";
  848 + }
  849 + }
  850 + return text;
  851 + }
  852 +
  853 + /**
  854 + * The Class MtasCRMParserFunction.
  855 + */
  856 + private class MtasCRMParserFunction {
  857 +
  858 + /** The split. */
  859 + public String split;
  860 +
  861 + /** The output. */
  862 + public HashMap<String, ArrayList<MtasCRMParserFunctionOutput>> output;
  863 +
  864 + /**
  865 + * Instantiates a new mtas crm parser function.
  866 + *
  867 + * @param type
  868 + * the type
  869 + * @param split
  870 + * the split
  871 + */
  872 + public MtasCRMParserFunction(String type, String split) {
  873 + this.split = split;
  874 + output = new HashMap<String, ArrayList<MtasCRMParserFunctionOutput>>();
  875 + }
  876 +
  877 + }
  878 +
  879 + /**
  880 + * The Class MtasCRMParserFunctionOutput.
  881 + */
  882 + private class MtasCRMParserFunctionOutput {
  883 +
  884 + /** The name. */
  885 + public String name;
  886 +
  887 + /** The value. */
  888 + public String value;
  889 +
  890 + /**
  891 + * Instantiates a new mtas crm parser function output.
  892 + *
  893 + * @param name
  894 + * the name
  895 + * @param value
  896 + * the value
  897 + */
  898 + public MtasCRMParserFunctionOutput(String name, String value) {
  899 + this.name = name;
  900 + this.value = value;
  901 + }
  902 +
  903 + /**
  904 + * Creates the.
  905 + *
  906 + * @param originalValue
  907 + * the original value
  908 + * @return the mtas crm parser function output
  909 + */
  910 + public MtasCRMParserFunctionOutput create(String originalValue) {
  911 + if (value != null) {
  912 + return this;
  913 + } else {
  914 + return new MtasCRMParserFunctionOutput(name, originalValue);
  915 + }
  916 + }
  917 +
  918 + /*
  919 + * (non-Javadoc)
  920 + *
  921 + * @see java.lang.Object#toString()
  922 + */
  923 + @Override
  924 + public String toString() {
  925 + return "MtasCRMParserFunctionOutput[" + name + "," + value + "]";
  926 + }
  927 + }
  928 +
  929 + /**
  930 + * The Class MtasCRMParserMappingWordAnnotation.
  931 + */
  932 + private class MtasCRMParserMappingWordAnnotation
  933 + extends MtasParserMapping<MtasCRMParserMappingWordAnnotation> {
  934 +
  935 + /**
  936 + * Instantiates a new mtas crm parser mapping word annotation.
  937 + */
  938 + public MtasCRMParserMappingWordAnnotation() {
  939 + super();
  940 + this.position = SOURCE_OWN;
  941 + this.realOffset = SOURCE_OWN;
  942 + this.offset = SOURCE_ANCESTOR_WORD;
  943 + this.type = MAPPING_TYPE_WORD_ANNOTATION;
  944 + }
  945 +
  946 + /*
  947 + * (non-Javadoc)
  948 + *
  949 + * @see mtas.analysis.parser.MtasParser.MtasParserMapping#self()
  950 + */
  951 + @Override
  952 + protected MtasCRMParserMappingWordAnnotation self() {
  953 + return this;
  954 + }
  955 + }
  956 +
  957 + /**
  958 + * The Class MtasCRMParserMappingCRMSentence.
  959 + */
  960 + private class MtasCRMParserMappingCRMSentence
  961 + extends MtasParserMapping<MtasCRMParserMappingCRMSentence> {
  962 +
  963 + /**
  964 + * Instantiates a new mtas crm parser mapping crm sentence.
  965 + */
  966 + public MtasCRMParserMappingCRMSentence() {
  967 + super();
  968 + this.position = SOURCE_OWN;
  969 + this.realOffset = SOURCE_OWN;
  970 + this.offset = SOURCE_OWN;
  971 + this.type = MAPPING_TYPE_GROUP;
  972 + }
  973 +
  974 + /*
  975 + * (non-Javadoc)
  976 + *
  977 + * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self()
  978 + */
  979 + @Override
  980 + protected MtasCRMParserMappingCRMSentence self() {
  981 + return this;
  982 + }
  983 + }
  984 +
  985 + /**
  986 + * The Class MtasCRMParserMappingCRMPair.
  987 + */
  988 + private class MtasCRMParserMappingCRMPair
  989 + extends MtasParserMapping<MtasCRMParserMappingCRMPair> {
  990 +
  991 + /**
  992 + * Instantiates a new mtas crm parser mapping crm pair.
  993 + */
  994 + public MtasCRMParserMappingCRMPair() {
  995 + super();
  996 + this.position = SOURCE_OWN;
  997 + this.realOffset = SOURCE_OWN;
  998 + this.offset = SOURCE_OWN;
  999 + this.type = MAPPING_TYPE_RELATION;
  1000 + }
  1001 +
  1002 + /*
  1003 + * (non-Javadoc)
  1004 + *
  1005 + * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self()
  1006 + */
  1007 + @Override
  1008 + protected MtasCRMParserMappingCRMPair self() {
  1009 + return this;
  1010 + }
  1011 + }
  1012 +
  1013 +}
... ...
src/mtas/analysis/parser/MtasElanParser.java
... ... @@ -17,10 +17,12 @@ final public class MtasElanParser extends MtasXMLParser {
17 17 * @param config the config
18 18 */
19 19 public MtasElanParser(MtasConfiguration config) {
20   - super(config);
  20 + super(config);
21 21 }
22   -
23   - /* (non-Javadoc)
  22 +
  23 + /*
  24 + * (non-Javadoc)
  25 + *
24 26 * @see mtas.analysis.parser.MtasXMLParser#initParser()
25 27 */
26 28 @Override
... ...
src/mtas/analysis/parser/MtasFoliaParser.java
... ... @@ -17,10 +17,12 @@ final public class MtasFoliaParser extends MtasXMLParser {
17 17 * @param config the config
18 18 */
19 19 public MtasFoliaParser(MtasConfiguration config) {
20   - super(config);
  20 + super(config);
21 21 }
22   -
23   - /* (non-Javadoc)
  22 +
  23 + /*
  24 + * (non-Javadoc)
  25 + *
24 26 * @see mtas.analysis.parser.MtasXMLParser#initParser()
25 27 */
26 28 @Override
... ...
src/mtas/analysis/parser/MtasParser.java
... ... @@ -15,16 +15,19 @@ import mtas.analysis.util.MtasParserException;
15 15 * The Class MtasParser.
16 16 */
17 17 abstract public class MtasParser {
18   -
  18 +
19 19 /** The token collection. */
20 20 protected MtasTokenCollection tokenCollection;
21   -
  21 +
22 22 /** The config. */
23 23 protected MtasConfiguration config;
24 24  
25 25 /** The autorepair. */
26 26 protected Boolean autorepair = false;
27   -
  27 +
  28 + /** The makeunique. */
  29 + protected Boolean makeunique = false;
  30 +
28 31 /**
29 32 * Inits the parser.
30 33 *
... ... @@ -38,10 +41,13 @@ abstract public class MtasParser {
38 41 if (current.name.equals("autorepair")) {
39 42 autorepair = current.attributes.get("value").equals("true");
40 43 }
  44 + if (current.name.equals("makeunique")) {
  45 + makeunique = current.attributes.get("value").equals("true");
  46 + }
41 47 }
42 48 }
43 49 }
44   -
  50 +
45 51 /**
46 52 * Creates the token collection.
47 53 *
... ... @@ -59,7 +65,7 @@ abstract public class MtasParser {
59 65 * @return the string
60 66 */
61 67 public abstract String printConfig();
62   -
  68 +
63 69 /**
64 70 * The Class MtasParserObject.
65 71 */
... ... @@ -259,17 +265,17 @@ abstract public class MtasParser {
259 265 public void setText(String text) {
260 266 objectText = text;
261 267 }
262   -
  268 +
263 269 /**
264 270 * Adds the text.
265 271 *
266 272 * @param text the text
267 273 */
268 274 public void addText(String text) {
269   - if(objectText==null) {
  275 + if (objectText == null) {
270 276 objectText = text;
271 277 } else {
272   - objectText+=text;
  278 + objectText += text;
273 279 }
274 280 }
275 281  
... ...
src/mtas/analysis/parser/MtasSketchParser.java
... ... @@ -53,7 +53,9 @@ final public class MtasSketchParser extends MtasBasicParser {
53 53 }
54 54 }
55 55  
56   - /* (non-Javadoc)
  56 + /*
  57 + * (non-Javadoc)
  58 + *
57 59 * @see mtas.analysis.parser.MtasParser#initParser()
58 60 */
59 61 @Override
... ... @@ -62,7 +64,7 @@ final public class MtasSketchParser extends MtasBasicParser {
62 64 if (config != null) {
63 65  
64 66 // always word, no mappings
65   - wordType = new MtasParserType(MAPPING_TYPE_WORD, null);
  67 + wordType = new MtasParserType(MAPPING_TYPE_WORD, null, false);
66 68  
67 69 for (int i = 0; i < config.children.size(); i++) {
68 70 MtasConfiguration current = config.children.get(i);
... ... @@ -74,7 +76,7 @@ final public class MtasSketchParser extends MtasBasicParser {
74 76 String nameMapping = mapping.attributes.get("name");
75 77 if ((typeMapping != null)) {
76 78 if (typeMapping.equals(MAPPING_TYPE_WORD)) {
77   - MtasSketchParserMappingWordAnnotation m = new MtasSketchParserMappingWordAnnotation();
  79 + MtasSketchParserMappingWord m = new MtasSketchParserMappingWord();
78 80 m.processConfig(mapping);
79 81 wordType.addMapping(m);
80 82 } else if (typeMapping.equals(MAPPING_TYPE_WORD_ANNOTATION)
... ... @@ -85,7 +87,7 @@ final public class MtasSketchParser extends MtasBasicParser {
85 87 wordAnnotationTypes.get(nameMapping).addMapping(m);
86 88 } else {
87 89 MtasParserType t = new MtasParserType(typeMapping,
88   - nameMapping);
  90 + nameMapping, false);
89 91 t.addMapping(m);
90 92 wordAnnotationTypes.put(Integer.parseInt(nameMapping), t);
91 93 }
... ... @@ -97,7 +99,7 @@ final public class MtasSketchParser extends MtasBasicParser {
97 99 groupTypes.get(nameMapping).addMapping(m);
98 100 } else {
99 101 MtasParserType t = new MtasParserType(typeMapping,
100   - nameMapping);
  102 + nameMapping, false);
101 103 t.addMapping(m);
102 104 groupTypes.put(nameMapping, t);
103 105 }
... ... @@ -113,7 +115,9 @@ final public class MtasSketchParser extends MtasBasicParser {
113 115 }
114 116 }
115 117  
116   - /* (non-Javadoc)
  118 + /*
  119 + * (non-Javadoc)
  120 + *
117 121 * @see mtas.analysis.parser.MtasParser#createTokenCollection(java.io.Reader)
118 122 */
119 123 @Override
... ... @@ -337,11 +341,13 @@ final public class MtasSketchParser extends MtasBasicParser {
337 341 }
338 342 }
339 343 // final check
340   - tokenCollection.check(autorepair);
  344 + tokenCollection.check(autorepair, makeunique);
341 345 return tokenCollection;
342 346 }
343 347  
344   - /* (non-Javadoc)
  348 + /*
  349 + * (non-Javadoc)
  350 + *
345 351 * @see mtas.analysis.parser.MtasParser#printConfig()
346 352 */
347 353 @Override
... ... @@ -373,6 +379,34 @@ final public class MtasSketchParser extends MtasBasicParser {
373 379 }
374 380  
375 381 /**
  382 + * The Class MtasSketchParserMappingWord.
  383 + */
  384 + private class MtasSketchParserMappingWord
  385 + extends MtasParserMapping<MtasSketchParserMappingWord> {
  386 +
  387 + /**
  388 + * Instantiates a new mtas sketch parser mapping word.
  389 + */
  390 + public MtasSketchParserMappingWord() {
  391 + super();
  392 + this.position = SOURCE_OWN;
  393 + this.realOffset = SOURCE_OWN;
  394 + this.offset = SOURCE_OWN;
  395 + this.type = MAPPING_TYPE_WORD;
  396 + }
  397 +
  398 + /*
  399 + * (non-Javadoc)
  400 + *
  401 + * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self()
  402 + */
  403 + @Override
  404 + protected MtasSketchParserMappingWord self() {
  405 + return this;
  406 + }
  407 + }
  408 +
  409 + /**
376 410 * The Class MtasSketchParserMappingWordAnnotation.
377 411 */
378 412 private class MtasSketchParserMappingWordAnnotation
... ...
src/mtas/analysis/parser/MtasTEIParser.java
... ... @@ -17,10 +17,12 @@ final public class MtasTEIParser extends MtasXMLParser {
17 17 * @param config the config
18 18 */
19 19 public MtasTEIParser(MtasConfiguration config) {
20   - super(config);
  20 + super(config);
21 21 }
22   -
23   - /* (non-Javadoc)
  22 +
  23 + /*
  24 + * (non-Javadoc)
  25 + *
24 26 * @see mtas.analysis.parser.MtasXMLParser#initParser()
25 27 */
26 28 @Override
... ...