Commit 8148835a47574769c6705c325af983b18bc83f6d

Authored by Matthijs Brouwer
1 parent 229cfad6

update

Showing 209 changed files with 27681 additions and 10521 deletions

Too many changes to show.

To preserve performance only 29 of 209 files are displayed.

conf/parser/mtas.xml
1 <?xml version="1.0" encoding="UTF-8" ?> 1 <?xml version="1.0" encoding="UTF-8" ?>
2 <mtas> 2 <mtas>
3 <configurations type="mtas.analysis.util.MtasTokenizerFactory"> 3 <configurations type="mtas.analysis.util.MtasTokenizerFactory">
  4 + <configuration name="test" file="mtas/folia_test.xml" />
  5 + <configuration name="CRM" file="mtas/crm_test.xml" />
4 <configuration name="DBNL" file="mtas/folia_dbnl.xml" /> 6 <configuration name="DBNL" file="mtas/folia_dbnl.xml" />
  7 + <configuration name="DDD" file="mtas/folia_ddd.xml" />
5 <configuration name="EDBO" file="mtas/folia_edbo.xml" /> 8 <configuration name="EDBO" file="mtas/folia_edbo.xml" />
6 <configuration name="SONAR" file="mtas/folia_sonar.xml" /> 9 <configuration name="SONAR" file="mtas/folia_sonar.xml" />
7 </configurations> 10 </configurations>
8 <configurations type="mtas.analysis.util.MtasCharFilterFactory"> 11 <configurations type="mtas.analysis.util.MtasCharFilterFactory">
  12 + <configuration name="test" type="file" />
  13 + <configuration name="CRM" type="file" prefix="/Users/matthijs/Software/Mtas/data/CRM/data/files/" postfix=".txt" />
9 <configuration name="DBNL" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> 14 <configuration name="DBNL" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
  15 + <configuration name="DDD" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
10 <configuration name="EDBO" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> 16 <configuration name="EDBO" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
11 <configuration name="SONAR" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" /> 17 <configuration name="SONAR" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
12 </configurations> 18 </configurations>
conf/parser/mtas/crm_test.xml 0 โ†’ 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<mtas>
  3 +
  4 + <!-- START MTAS INDEX CONFIGURATION -->
  5 + <index>
  6 + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
  7 + <payload index="false" />
  8 + <offset index="false" />
  9 + <realoffset index="false" />
  10 + <parent index="true" />
  11 + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
  12 + </index>
  13 + <!-- END MTAS INDEX CONFIGURATION -->
  14 +
  15 +
  16 +
  17 + <!-- START CONFIGURATION MTAS FOLIA PARSER -->
  18 + <parser name="mtas.analysis.parser.MtasCRMParser">
  19 +
  20 + <!-- START GENERAL SETTINGS MTAS PARSER -->
  21 + <autorepair value="true" />
  22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS PARSER -->
  24 +
  25 + <mappings>
  26 +
  27 + <mapping type="word">
  28 + </mapping>
  29 +
  30 + <mapping type="wordAnnotation" name="0">
  31 + <token type="string" offset="false" parent="false">
  32 + <pre>
  33 + <item type="string" value="t" />
  34 + </pre>
  35 + <post>
  36 + <item type="text" />
  37 + </post>
  38 + </token>
  39 + </mapping>
  40 + <mapping type="wordAnnotation" name="0">
  41 + <token type="string" offset="false" parent="false">
  42 + <pre>
  43 + <item type="string" value="t_lc" />
  44 + </pre>
  45 + <post>
  46 + <item type="text" filter="ascii,lowercase" />
  47 + </post>
  48 + </token>
  49 + </mapping>
  50 + <mapping type="wordAnnotation" name="1">
  51 + <token type="string" offset="false" parent="false">
  52 + <pre>
  53 + <item type="string" value="t1" />
  54 + </pre>
  55 + <post>
  56 + <item type="text" />
  57 + </post>
  58 + </token>
  59 + </mapping>
  60 + <mapping type="wordAnnotation" name="1">
  61 + <token type="string" offset="false" parent="false">
  62 + <pre>
  63 + <item type="string" value="t1_lc" />
  64 + </pre>
  65 + <post>
  66 + <item type="text" filter="ascii,lowercase" />
  67 + </post>
  68 + </token>
  69 + </mapping>
  70 + <mapping type="wordAnnotation" name="2">
  71 + <token type="string" offset="false" parent="false">
  72 + <pre>
  73 + <item type="string" value="t2" />
  74 + </pre>
  75 + <post>
  76 + <item type="text" />
  77 + </post>
  78 + </token>
  79 + </mapping>
  80 + <mapping type="wordAnnotation" name="2">
  81 + <token type="string" offset="false" parent="false">
  82 + <pre>
  83 + <item type="string" value="t2_lc" />
  84 + </pre>
  85 + <post>
  86 + <item type="text" filter="ascii,lowercase" />
  87 + </post>
  88 + </token>
  89 + </mapping>
  90 + <mapping type="wordAnnotation" name="3">
  91 + <token type="string" offset="false" parent="false">
  92 + <pre>
  93 + <item type="string" value="lemma" />
  94 + </pre>
  95 + <post>
  96 + <item type="text" />
  97 + </post>
  98 + </token>
  99 + </mapping>
  100 + <mapping type="wordAnnotation" name="4">
  101 + <token type="string" offset="false" parent="false">
  102 + <pre>
  103 + <item type="string" value="crm" />
  104 + </pre>
  105 + <post>
  106 + <item type="text" />
  107 + </post>
  108 + </token>
  109 + </mapping>
  110 +
  111 + <mapping type="crmPair" name="6">
  112 + <condition>
  113 + <item type="text" not="true" condition="-" />
  114 + </condition>
  115 + </mapping>
  116 + <mapping type="crmPair" name="part">
  117 + <token type="string" offset="false" parent="false">
  118 + <pre>
  119 + <item type="name" />
  120 + </pre>
  121 + <post>
  122 + <item type="text" />
  123 + </post>
  124 + </token>
  125 + </mapping>
  126 +
  127 + <mapping type="crmSentence" name="7">
  128 + <token type="string" offset="false" parent="false">
  129 + <pre>
  130 + <item type="string" value="s"/>
  131 + </pre>
  132 + <post>
  133 + <item type="text" />
  134 + </post>
  135 + </token>
  136 + <condition>
  137 + <item type="text" not="true" condition="-" />
  138 + <item type="text" not="true" condition="2" />
  139 + <item type="text" not="true" condition="4" />
  140 + <item type="text" not="true" condition="5" />
  141 + <item type="text" not="true" condition="6" />
  142 + <item type="text" not="true" condition="8" />
  143 + </condition>
  144 + </mapping>
  145 + <mapping type="crmClause" name="7">
  146 + <token type="string" offset="false" parent="false">
  147 + <pre>
  148 + <item type="string" value="sc"/>
  149 + </pre>
  150 + <post>
  151 + <item type="text" />
  152 + </post>
  153 + </token>
  154 + <condition>
  155 + <item type="text" not="true" condition="-" />
  156 + <item type="text" not="true" condition="0" />
  157 + <item type="text" not="true" condition="1" />
  158 + </condition>
  159 + </mapping>
  160 + <mapping type="crmClause" name="7">
  161 + <condition>
  162 + <item type="text" not="true" condition="-" />
  163 + </condition>
  164 + </mapping>
  165 +
  166 + <mapping type="wordAnnotation" name="pos">
  167 + <token type="string" offset="false" parent="false">
  168 + <pre>
  169 + <item type="name" />
  170 + </pre>
  171 + <post>
  172 + <item type="text" />
  173 + </post>
  174 + </token>
  175 + </mapping>
  176 + <mapping type="wordAnnotation" name="feat.getal">
  177 + <token type="string" offset="false" parent="false">
  178 + <pre>
  179 + <item type="name" />
  180 + </pre>
  181 + <post>
  182 + <item type="text" />
  183 + </post>
  184 + </token>
  185 + </mapping>
  186 + <mapping type="wordAnnotation" name="feat.persoon">
  187 + <token type="string" offset="false" parent="false">
  188 + <pre>
  189 + <item type="name" />
  190 + </pre>
  191 + <post>
  192 + <item type="text" />
  193 + </post>
  194 + </token>
  195 + </mapping>
  196 + <mapping type="wordAnnotation" name="feat.ntype">
  197 + <token type="string" offset="false" parent="false">
  198 + <pre>
  199 + <item type="name" />
  200 + </pre>
  201 + <post>
  202 + <item type="text" />
  203 + </post>
  204 + </token>
  205 + </mapping>
  206 + <mapping type="wordAnnotation" name="feat.pvtijd">
  207 + <token type="string" offset="false" parent="false">
  208 + <pre>
  209 + <item type="name" />
  210 + </pre>
  211 + <post>
  212 + <item type="text" />
  213 + </post>
  214 + </token>
  215 + </mapping>
  216 + <mapping type="wordAnnotation" name="feat.wvorm">
  217 + <token type="string" offset="false" parent="false">
  218 + <pre>
  219 + <item type="name" />
  220 + </pre>
  221 + <post>
  222 + <item type="text" />
  223 + </post>
  224 + </token>
  225 + </mapping>
  226 + <mapping type="wordAnnotation" name="feat.numtype">
  227 + <token type="string" offset="false" parent="false">
  228 + <pre>
  229 + <item type="name" />
  230 + </pre>
  231 + <post>
  232 + <item type="text" />
  233 + </post>
  234 + </token>
  235 + </mapping>
  236 + <mapping type="wordAnnotation" name="feat.vwtype">
  237 + <token type="string" offset="false" parent="false">
  238 + <pre>
  239 + <item type="name" />
  240 + </pre>
  241 + <post>
  242 + <item type="text" />
  243 + </post>
  244 + </token>
  245 + </mapping>
  246 + <mapping type="wordAnnotation" name="feat.lwtype">
  247 + <token type="string" offset="false" parent="false">
  248 + <pre>
  249 + <item type="name" />
  250 + </pre>
  251 + <post>
  252 + <item type="text" />
  253 + </post>
  254 + </token>
  255 + </mapping>
  256 + <mapping type="wordAnnotation" name="feat.form">
  257 + <token type="string" offset="false" parent="false">
  258 + <pre>
  259 + <item type="name" />
  260 + </pre>
  261 + <post>
  262 + <item type="text" />
  263 + </post>
  264 + </token>
  265 + </mapping>
  266 + <mapping type="wordAnnotation" name="feat.probleemgeval">
  267 + <token type="string" offset="false" parent="false">
  268 + <pre>
  269 + <item type="name" />
  270 + </pre>
  271 + <post>
  272 + <item type="text" />
  273 + </post>
  274 + </token>
  275 + </mapping>
  276 + </mappings>
  277 +
  278 + <functions>
  279 + <function type="crmPair" name="6" split="+">
  280 + <condition value="">
  281 + <output name="part" />
  282 + </condition>
  283 + </function>
  284 + <function type="wordAnnotation" name="4" split="+">
  285 + <condition value="000,001,002,003,004,005,006,009">
  286 + <output name="pos" value="N" />
  287 + <output name="feat.getal" value="ev" />
  288 + </condition>
  289 + <condition value="010,011,012,013,014,015,016,019">
  290 + <output name="pos" value="N" />
  291 + <output name="feat.getal" value="mv" />
  292 + </condition>
  293 + <condition value="020,021,022,023,024,025,026,029">
  294 + <output name="pos" value="N" />
  295 + <output name="feat.ntype" value="eigen" />
  296 + </condition>
  297 + <condition value="090,091,092,093,094,095,096,099">
  298 + <output name="pos" value="N" />
  299 + <output name="feat.probleemgeval" />
  300 + </condition>
  301 + <condition value="100,101,102,103,104,105,106,109">
  302 + <output name="pos" value="ADJ" />
  303 + <output name="feat.getal" value="ev" />
  304 + </condition>
  305 + <condition value="110,111,112,113,114,115,116,119">
  306 + <output name="pos" value="ADJ" />
  307 + <output name="feat.getal" value="mv" />
  308 + </condition>
  309 + <condition value="190,191,192,193,194,195,196,199">
  310 + <output name="pos" value="ADJ" />
  311 + <output name="feat.probleemgeval" />
  312 + </condition>
  313 +
  314 + <condition value="200,201,202,203,204,205,206,209">
  315 + <output name="pos" value="WW" />
  316 + <output name="feat.pvtijd" value="tgw" />
  317 + </condition>
  318 + <condition value="210,211,212,213,214,215,216,219">
  319 + <output name="pos" value="WW" />
  320 + <output name="feat.pvtijd" value="tgw" />
  321 + </condition>
  322 + <condition value="220,221,222,223,224,225,226,229">
  323 + <output name="pos" value="WW" />
  324 + <output name="feat.pvtijd" value="verl" />
  325 + </condition>
  326 + <condition value="230,231,232,233,234,235,236,239">
  327 + <output name="pos" value="WW" />
  328 + <output name="feat.pvtijd" value="verl" />
  329 + </condition>
  330 + <condition value="240,241,242,243,244,245,246,249">
  331 + <output name="pos" value="WW" />
  332 + </condition>
  333 + <condition value="250,251,252,253,254,255,256,259">
  334 + <output name="pos" value="WW" />
  335 + <output name="feat.wvorm" value="inf" />
  336 + </condition>
  337 + <condition value="260,261,262,263,264,265,266,269">
  338 + <output name="pos" value="WW" />
  339 + <output name="feat.wvorm" value="inf" />
  340 + </condition>
  341 + <condition value="270,271,272,273,274,275,276,279">
  342 + <output name="pos" value="WW" />
  343 + </condition>
  344 + <condition value="280,281,282,283,284,285,286,289">
  345 + <output name="pos" value="WW" />
  346 + </condition>
  347 + <condition value="290,291,292,293,294,295,296,299">
  348 + <output name="pos" value="WW" />
  349 + <output name="feat.probleemgeval" />
  350 + </condition>
  351 +
  352 +
  353 + <condition value="300,301,302,303,304,305,306,309">
  354 + <output name="pos" value="TW" />
  355 + <output name="feat.numtype" value="hoofd" />
  356 + </condition>
  357 + <condition value="310,311,312,313,314,315,316,319">
  358 + <output name="pos" value="TW" />
  359 + <output name="feat.numtype" value="rang" />
  360 + </condition>
  361 + <condition value="320,321,322,323,324,325,326,329">
  362 + <output name="pos" value="TW" />
  363 + </condition>
  364 + <condition value="390,391,392,393,394,395,396,399">
  365 + <output name="pos" value="TW" />
  366 + <output name="feat.probleemgeval" />
  367 + </condition>
  368 +
  369 + <condition value="401">
  370 + <output name="pos" value="VNW" />
  371 + <output name="feat.getal" value="ev" />
  372 + <output name="feat.persoon" value="1" />
  373 + </condition>
  374 + <condition value="402">
  375 + <output name="pos" value="VNW" />
  376 + <output name="feat.getal" value="ev" />
  377 + <output name="feat.persoon" value="2" />
  378 + </condition>
  379 + <condition value="403">
  380 + <output name="pos" value="VNW" />
  381 + <output name="feat.getal" value="ev" />
  382 + <output name="feat.persoon" value="3" />
  383 + </condition>
  384 + <condition value="404">
  385 + <output name="pos" value="VNW" />
  386 + <output name="feat.getal" value="mv" />
  387 + <output name="feat.persoon" value="1" />
  388 + </condition>
  389 + <condition value="405">
  390 + <output name="pos" value="VNW" />
  391 + <output name="feat.getal" value="mv" />
  392 + <output name="feat.persoon" value="2" />
  393 + </condition>
  394 + <condition value="406">
  395 + <output name="pos" value="VNW" />
  396 + <output name="feat.getal" value="mv" />
  397 + <output name="feat.persoon" value="3" />
  398 + </condition>
  399 + <condition value="409">
  400 + <output name="pos" value="VNW" />
  401 + <output name="feat.probleemgeval" />
  402 + </condition>
  403 + <condition value="410,411,412,413,414,415,416,419">
  404 + <output name="pos" value="VNW" />
  405 + <output name="feat.vwtype" value="aanw" />
  406 + </condition>
  407 + <condition value="420,421,422,423,424,425,426,429">
  408 + <output name="pos" value="VNW" />
  409 + <output name="feat.vwtype" value="betr" />
  410 + </condition>
  411 + <condition value="430,431,432,433,434,435,436,439">
  412 + <output name="pos" value="VNW" />
  413 + <output name="feat.vwtype" value="vb" />
  414 + </condition>
  415 + <condition value="434,441,442,443,444,445,446,449">
  416 + <output name="pos" value="VNW" />
  417 + <output name="feat.vwtype" value="vb" />
  418 + </condition>
  419 + <condition value="440,441,442,443,444,445,446,449">
  420 + <output name="pos" value="VNW" />
  421 + <output name="feat.lwtype" value="onbep" />
  422 + </condition>
  423 + <condition value="450,451,452,453,454,455,456,459">
  424 + <output name="pos" value="VNW" />
  425 + <output name="feat.vwtype" value="bez" />
  426 + </condition>
  427 + <condition value="461">
  428 + <output name="pos" value="VNW" />
  429 + <output name="feat.vwtype" value="refl" />
  430 + <output name="feat.getal" value="ev" />
  431 + <output name="feat.persoon" value="1" />
  432 + </condition>
  433 + <condition value="462">
  434 + <output name="pos" value="VNW" />
  435 + <output name="feat.vwtype" value="refl" />
  436 + <output name="feat.getal" value="ev" />
  437 + <output name="feat.persoon" value="2" />
  438 + </condition>
  439 + <condition value="463">
  440 + <output name="pos" value="VNW" />
  441 + <output name="feat.vwtype" value="refl" />
  442 + <output name="feat.getal" value="ev" />
  443 + <output name="feat.persoon" value="3" />
  444 + </condition>
  445 + <condition value="464">
  446 + <output name="pos" value="VNW" />
  447 + <output name="feat.vwtype" value="refl" />
  448 + <output name="feat.getal" value="mv" />
  449 + <output name="feat.persoon" value="1" />
  450 + </condition>
  451 + <condition value="465">
  452 + <output name="pos" value="VNW" />
  453 + <output name="feat.vwtype" value="refl" />
  454 + <output name="feat.getal" value="mv" />
  455 + <output name="feat.persoon" value="2" />
  456 + </condition>
  457 + <condition value="466">
  458 + <output name="pos" value="VNW" />
  459 + <output name="feat.vwtype" value="refl" />
  460 + <output name="feat.getal" value="mv" />
  461 + <output name="feat.persoon" value="3" />
  462 + </condition>
  463 + <condition value="469">
  464 + <output name="pos" value="VNW" />
  465 + <output name="feat.vwtype" value="refl" />
  466 + <output name="feat.probleemgeval" />
  467 + </condition>
  468 + <condition value="470,471,472,473,474,475,476,479">
  469 + <output name="pos" value="LID" />
  470 + </condition>
  471 + <condition value="480,481,482,483,484,485,486,489">
  472 + <output name="pos" value="LID" />
  473 + </condition>
  474 + <condition value="490,491,492,493,494,495,496,499">
  475 + <output name="pos" value="VNW" />
  476 + <output name="feat.probleemgeval" />
  477 + </condition>
  478 +
  479 + <condition value="500,501,502,503,504,505,506,509">
  480 + <output name="pos" value="BW" />
  481 + </condition>
  482 + <condition value="510,511,512,513,514,515,516,519">
  483 + <output name="pos" value="BW" />
  484 + </condition>
  485 + <condition value="520,521,522,523,524,525,526,529">
  486 + <output name="pos" value="BW" />
  487 + </condition>
  488 + <condition value="530,531,532,533,534,535,536,539">
  489 + <output name="pos" value="BW" />
  490 + </condition>
  491 + <condition value="540,541,542,543,544,545,546,549">
  492 + <output name="pos" value="BW" />
  493 + </condition>
  494 + <condition value="550,551,552,553,554,555,556,559">
  495 + <output name="pos" value="BW" />
  496 + </condition>
  497 + <condition value="560,561,562,563,564,565,566,569">
  498 + <output name="pos" value="BW" />
  499 + </condition>
  500 + <condition value="590,591,592,593,594,595,596,599">
  501 + <output name="pos" value="BW" />
  502 + <output name="feat.probleemgeval" />
  503 + </condition>
  504 +
  505 + <condition value="600,601,602,603,604,605,606,609">
  506 + <output name="pos" value="BW" />
  507 + </condition>
  508 + <condition value="610,611,612,613,614,615,616,619">
  509 + <output name="pos" value="BW" />
  510 + </condition>
  511 + <condition value="620,621,622,623,624,625,626,629">
  512 + <output name="pos" value="BW" />
  513 + </condition>
  514 + <condition value="630,631,632,633,634,635,636,639">
  515 + <output name="pos" value="BW" />
  516 + </condition>
  517 + <condition value="640,641,642,643,644,645,646,649">
  518 + <output name="pos" value="BW" />
  519 + </condition>
  520 + <condition value="650,651,652,653,654,655,656,659">
  521 + <output name="pos" value="BW" />
  522 + </condition>
  523 + <condition value="690,691,692,693,694,695,696,699">
  524 + <output name="pos" value="BW" />
  525 + <output name="feat.probleemgeval" />
  526 + </condition>
  527 +
  528 + <condition value="700,701,702,703,704,705,706,709">
  529 + <output name="pos" value="VZ" />
  530 + </condition>
  531 + <condition value="790,791,792,793,794,795,796,799">
  532 + <output name="pos" value="VZ" />
  533 + </condition>
  534 +
  535 + <condition value="800,801,802,803,804,805,806,809">
  536 + <output name="pos" value="VG" />
  537 + </condition>
  538 + <condition value="810,811,812,813,814,815,816,819">
  539 + <output name="pos" value="VG" />
  540 + </condition>
  541 + <condition value="820,821,822,823,824,825,826,829">
  542 + <output name="pos" value="VG" />
  543 + </condition>
  544 + <condition value="830,831,832,833,834,835,836,839">
  545 + <output name="pos" value="VG" />
  546 + </condition>
  547 + <condition value="840,841,842,843,844,845,846,849">
  548 + <output name="pos" value="VG" />
  549 + </condition>
  550 + <condition value="850,851,852,853,854,855,856,859">
  551 + <output name="pos" value="VG" />
  552 + </condition>
  553 + <condition value="860,861,862,863,864,865,866,869">
  554 + <output name="pos" value="VG" />
  555 + </condition>
  556 + <condition value="870,871,872,873,874,875,876,879">
  557 + <output name="pos" value="VG" />
  558 + </condition>
  559 + <condition value="880,881,882,883,884,885,886,889">
  560 + <output name="pos" value="VG" />
  561 + </condition>
  562 + <condition value="890,891,892,893,894,895,896,899">
  563 + <output name="pos" value="VG" />
  564 + <output name="feat.probleemgeval" />
  565 + </condition>
  566 +
  567 + <condition value="900,901,902,903,904,905,906,909">
  568 + <output name="feat.probleemgeval" />
  569 + </condition>
  570 + <condition value="900,901,902,903,904,905,906,909">
  571 + <output name="feat.probleemgeval" />
  572 + </condition>
  573 + <condition value="990,991,992,993,994,995,996,999">
  574 + <output name="feat.probleemgeval" />
  575 + </condition>
  576 +
  577 + <condition
  578 + value="001,011,021,091,101,111,191,201,211,221,231,241,251,261,271,281,291,301,311,321,391,411,421,431,441,451,471,481,491,501,511,521,531,541,551,561,591,601,611,621,631,641,651,691,701,791,801,811,821,831,841,851,861,871,881,891,901,911,991">
  579 + <output name="feat.form" value="-e" />
  580 + </condition>
  581 + <condition
  582 + value="002,012,022,092,102,112,192,202,212,222,232,242,252,262,272,282,292,302,312,322,392,412,422,432,442,452,472,482,492,502,512,522,532,542,552,562,592,602,612,622,632,642,652,692,702,792,802,812,822,832,842,852,862,872,882,892,902,912,992">
  583 + <output name="feat.form" value="-s/-th" />
  584 + </condition>
  585 + <condition
  586 + value="003,013,023,093,103,113,193,203,213,223,233,243,253,263,273,283,293,303,313,323,393,413,423,433,443,453,473,483,493,503,513,523,533,543,553,563,593,603,613,623,633,643,653,693,703,793,803,813,823,833,843,853,863,873,883,893,903,913,993">
  587 + <output name="feat.form" value="-t" />
  588 + </condition>
  589 + <condition
  590 + value="004,014,024,094,104,114,194,204,214,224,234,244,254,264,274,284,294,304,314,324,394,414,424,434,444,454,474,484,494,504,514,524,534,544,554,564,594,604,614,624,634,644,654,694,704,794,804,814,824,834,844,854,864,874,884,894,904,914,994">
  591 + <output name="feat.form" value="-n" />
  592 + </condition>
  593 + <condition
  594 + value="005,015,025,095,105,115,195,205,215,225,235,245,255,265,275,285,295,305,315,325,395,415,425,435,445,455,475,485,495,505,515,525,535,545,555,565,595,605,615,625,635,645,655,695,705,795,805,815,825,835,845,855,865,875,885,895,905,915,995">
  595 + <output name="feat.form" value="-r/-re" />
  596 + </condition>
  597 + <condition
  598 + value="006,016,026,096,106,116,196,206,216,226,236,246,256,266,276,286,296,306,316,326,396,416,426,436,446,456,476,486,496,506,516,526,536,546,556,566,596,606,616,626,636,646,656,696,706,796,806,816,826,836,846,856,866,876,886,896,906,916,996">
  599 + <output name="feat.form" value="-a" />
  600 + </condition>
  601 + <condition value="009,019,029,099">
  602 + <output name="feat.form" value="unclear" />
  603 + </condition>
  604 +
  605 + </function>
  606 + </functions>
  607 +
  608 + </parser>
  609 + <!-- END CONFIGURATION MTAS FOLIA PARSER -->
  610 +
  611 +
  612 +</mtas>
0 \ No newline at end of file 613 \ No newline at end of file
conf/parser/mtas/elan_mks.xml
@@ -17,9 +17,10 @@ @@ -17,9 +17,10 @@
17 <!-- START CONFIGURATION MTAS FOLIA PARSER --> 17 <!-- START CONFIGURATION MTAS FOLIA PARSER -->
18 <parser name="mtas.analysis.parser.MtasElanParser"> 18 <parser name="mtas.analysis.parser.MtasElanParser">
19 19
20 - <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> 20 + <!-- START GENERAL SETTINGS MTAS PARSER -->
21 <autorepair value="true" /> 21 <autorepair value="true" />
22 - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> 22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS PARSER -->
23 24
24 <!-- START REFERENCES --> 25 <!-- START REFERENCES -->
25 <references> 26 <references>
conf/parser/mtas/folia_dbnl.xml
@@ -19,7 +19,8 @@ @@ -19,7 +19,8 @@
19 19
20 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> 20 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
21 <autorepair value="true" /> 21 <autorepair value="true" />
22 - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> 22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
23 24
24 <!-- START REFERENCES --> 25 <!-- START REFERENCES -->
25 <references> 26 <references>
conf/parser/mtas/folia_ddd.xml 0 โ†’ 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<mtas>
  3 +
  4 + <!-- START MTAS INDEX CONFIGURATION -->
  5 + <index>
  6 + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
  7 + <payload index="false" />
  8 + <offset index="false" />
  9 + <realoffset index="false" />
  10 + <parent index="true" />
  11 + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
  12 + </index>
  13 + <!-- END MTAS INDEX CONFIGURATION -->
  14 +
  15 +
  16 +
  17 + <!-- START CONFIGURATION MTAS FOLIA PARSER -->
  18 + <parser name="mtas.analysis.parser.MtasFoliaParser">
  19 +
  20 + <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
  21 + <autorepair value="true" />
  22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  24 +
  25 + <!-- START REFERENCES -->
  26 + <references>
  27 + <reference name="wref" ref="id" />
  28 + </references>
  29 + <!-- END REFERENCES -->
  30 +
  31 + <!-- START MAPPINGS -->
  32 + <mappings>
  33 +
  34 + <!-- START WORDS -->
  35 + <mapping type="word" name="w">
  36 + </mapping>
  37 + <mapping type="word" name="w">
  38 + <token type="string" offset="false" realoffset="false" parent="false">
  39 + <pre>
  40 + <item type="name" />
  41 + </pre>
  42 + <post>
  43 + <item type="attribute" name="class" />
  44 + </post>
  45 + </token>
  46 + <condition>
  47 + <item type="attribute" name="class" />
  48 + <item type="attribute" name="class" not="true" condition="WORD" />
  49 + </condition>
  50 + </mapping>
  51 + <!-- END WORDS -->
  52 +
  53 + <!-- START WORD ANNOTATIONS -->
  54 + <mapping type="wordAnnotation" name="t">
  55 + <token type="string" offset="false">
  56 + <pre>
  57 + <item type="name" />
  58 + </pre>
  59 + <post>
  60 + <item type="text" />
  61 + </post>
  62 + </token>
  63 + <token type="string" offset="false" realoffset="false" parent="false">
  64 + <pre>
  65 + <item type="name" />
  66 + <item type="string" value="_lc" />
  67 + </pre>
  68 + <post>
  69 + <item type="text" filter="ascii,lowercase" />
  70 + </post>
  71 + </token>
  72 + <condition>
  73 + <item type="ancestor" number="0" />
  74 + <item type="ancestorWord" number="1" />
  75 + <item type="unknownAncestor" number="0" />
  76 + </condition>
  77 + </mapping>
  78 + <!-- END WORD ANNOTATIONS -->
  79 +
  80 + <!-- START RELATIONS -->
  81 + <!-- END RELATIONS -->
  82 +
  83 + <!-- START GROUPS -->
  84 + <mapping type="group" name="s">
  85 + <token type="string" offset="false">
  86 + <pre>
  87 + <item type="name" />
  88 + </pre>
  89 + <post>
  90 + <item type="attribute" name="class" />
  91 + </post>
  92 + </token>
  93 + </mapping>
  94 + <mapping type="group" name="p">
  95 + <token type="string" offset="false">
  96 + <pre>
  97 + <item type="name" />
  98 + </pre>
  99 + <post>
  100 + <item type="attribute" name="class" />
  101 + </post>
  102 + </token>
  103 + </mapping>
  104 + <mapping type="group" name="div">
  105 + <token type="string" offset="false">
  106 + <pre>
  107 + <item type="name" />
  108 + </pre>
  109 + <post>
  110 + <item type="attribute" name="class" />
  111 + </post>
  112 + </token>
  113 + </mapping>
  114 + <mapping type="group" name="head">
  115 + <token type="string" offset="false">
  116 + <pre>
  117 + <item type="name" />
  118 + </pre>
  119 + <post>
  120 + <item type="attribute" name="class" />
  121 + </post>
  122 + </token>
  123 + </mapping>
  124 + <!-- END GROUPS -->
  125 +
  126 + <!-- START GROUP ANNOTATIONS -->
  127 + <mapping type="groupAnnotation" name="lang">
  128 + <token type="string" offset="false" realoffset="false" parent="false">
  129 + <pre>
  130 + <item type="name" />
  131 + </pre>
  132 + <post>
  133 + <item type="attribute" name="class" />
  134 + </post>
  135 + </token>
  136 + </mapping>
  137 + <!-- END GROUP ANNOTATIONS -->
  138 +
  139 + </mappings>
  140 + <!-- END MAPPINGS -->
  141 +
  142 + </parser>
  143 + <!-- END CONFIGURATION MTAS FOLIA PARSER -->
  144 +
  145 +
  146 +</mtas>
0 \ No newline at end of file 147 \ No newline at end of file
conf/parser/mtas/folia_edbo.xml
@@ -17,6 +17,7 @@ @@ -17,6 +17,7 @@
17 17
18 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> 18 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
19 <autorepair value="true" /> 19 <autorepair value="true" />
  20 + <makeunique value="true" />
20 <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> 21 <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
21 22
22 <!-- START REFERENCES --> 23 <!-- START REFERENCES -->
@@ -72,30 +73,6 @@ @@ -72,30 +73,6 @@
72 <item type="unknownAncestor" number="0" /> 73 <item type="unknownAncestor" number="0" />
73 </condition> 74 </condition>
74 </mapping> 75 </mapping>
75 - <mapping type="wordAnnotation" name="aref">  
76 - <token type="string" offset="false">  
77 - <pre>  
78 - <item type="string" value="translated.t" />  
79 - </pre>  
80 - <post>  
81 - <item type="attribute" name="t" />  
82 - </post>  
83 - </token>  
84 - <token type="string" offset="false" realoffset="false" parent="false">  
85 - <pre>  
86 - <item type="string" value="translated.t" />  
87 - <item type="string" value="_lc" />  
88 - </pre>  
89 - <post>  
90 - <item type="attribute" name="t" filter="ascii,lowercase" />  
91 - </post>  
92 - </token>  
93 - <condition>  
94 - <item type="ancestor" number="0" />  
95 - <item type="ancestorWord" number="1" />  
96 - <item type="unknownAncestor" number="1" />  
97 - </condition>  
98 - </mapping>  
99 <mapping type="wordAnnotation" name="lemma"> 76 <mapping type="wordAnnotation" name="lemma">
100 <token type="string" offset="false" realoffset="false" parent="false"> 77 <token type="string" offset="false" realoffset="false" parent="false">
101 <pre> 78 <pre>
@@ -109,24 +86,6 @@ @@ -109,24 +86,6 @@
109 <item type="attribute" name="class" /> 86 <item type="attribute" name="class" />
110 <item type="ancestor" number="0" /> 87 <item type="ancestor" number="0" />
111 <item type="unknownAncestor" number="0" /> 88 <item type="unknownAncestor" number="0" />
112 - <item type="attribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mblem-nl" />  
113 - </condition>  
114 - </mapping>  
115 - <mapping type="wordAnnotation" name="lemma">  
116 - <token type="string" offset="false" realoffset="false" parent="false">  
117 - <pre>  
118 - <item type="string" value="translated." />  
119 - <item type="name" />  
120 - </pre>  
121 - <post>  
122 - <item type="attribute" name="class" />  
123 - </post>  
124 - </token>  
125 - <condition>  
126 - <item type="attribute" name="class" />  
127 - <item type="ancestor" number="0" />  
128 - <item type="unknownAncestor" number="1" />  
129 - <item type="attribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mblem-nl" />  
130 </condition> 89 </condition>
131 </mapping> 90 </mapping>
132 <mapping type="wordAnnotation" name="morphology"> 91 <mapping type="wordAnnotation" name="morphology">
@@ -166,54 +125,11 @@ @@ -166,54 +125,11 @@
166 <item type="ancestor" number="0" /> 125 <item type="ancestor" number="0" />
167 <item type="unknownAncestor" number="0" /> 126 <item type="unknownAncestor" number="0" />
168 <item type="attribute" name="class" /> 127 <item type="attribute" name="class" />
169 - <item type="attribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />  
170 - </condition>  
171 - </mapping>  
172 - <mapping type="wordAnnotation" name="pos">  
173 - <token type="string" offset="false" realoffset="false" parent="false">  
174 - <pre>  
175 - <item type="string" value="translated." />  
176 - <item type="name" />  
177 - </pre>  
178 - <post>  
179 - <item type="attribute" name="head" />  
180 - </post>  
181 - <payload>  
182 - <item type="attribute" name="confidence" />  
183 - </payload>  
184 - </token>  
185 - <condition>  
186 - <item type="ancestor" number="0" />  
187 - <item type="unknownAncestor" number="1" />  
188 - <item type="attribute" name="class" />  
189 - <item type="attribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />  
190 - </condition>  
191 - </mapping>  
192 - <mapping type="wordAnnotation" name="feat">  
193 - <token type="string" offset="false" realoffset="false" parent="false">  
194 - <pre>  
195 - <item type="name" />  
196 - <item type="attribute" name="subset" prefix="." />  
197 - </pre>  
198 - <post>  
199 - <item type="attribute" name="class" />  
200 - </post>  
201 - <payload>  
202 - <item type="ancestorAttribute" distance="0" name="confidence" />  
203 - </payload>  
204 - </token>  
205 - <condition>  
206 - <item type="ancestor" number="1" />  
207 - <item type="unknownAncestor" number="0" />  
208 - <item type="attribute" name="class" />  
209 - <item type="attribute" name="subset" />  
210 - <item type="ancestorAttribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />  
211 </condition> 128 </condition>
212 </mapping> 129 </mapping>
213 <mapping type="wordAnnotation" name="feat"> 130 <mapping type="wordAnnotation" name="feat">
214 <token type="string" offset="false" realoffset="false" parent="false"> 131 <token type="string" offset="false" realoffset="false" parent="false">
215 <pre> 132 <pre>
216 - <item type="string" value="translated." />  
217 <item type="name" /> 133 <item type="name" />
218 <item type="attribute" name="subset" prefix="." /> 134 <item type="attribute" name="subset" prefix="." />
219 </pre> 135 </pre>
@@ -229,7 +145,6 @@ @@ -229,7 +145,6 @@
229 <item type="unknownAncestor" number="0" /> 145 <item type="unknownAncestor" number="0" />
230 <item type="attribute" name="class" /> 146 <item type="attribute" name="class" />
231 <item type="attribute" name="subset" /> 147 <item type="attribute" name="subset" />
232 - <item type="ancestorAttribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />  
233 </condition> 148 </condition>
234 </mapping> 149 </mapping>
235 <!-- END WORD ANNOTATIONS --> 150 <!-- END WORD ANNOTATIONS -->
conf/parser/mtas/folia_mimore.xml
@@ -18,7 +18,8 @@ @@ -18,7 +18,8 @@
18 18
19 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> 19 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
20 <autorepair value="false" /> 20 <autorepair value="false" />
21 - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> 21 + <makeunique value="true" />
  22 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
22 23
23 <!-- START REFERENCES --> 24 <!-- START REFERENCES -->
24 <references> 25 <references>
conf/parser/mtas/folia_mtas.xml
@@ -19,7 +19,8 @@ @@ -19,7 +19,8 @@
19 19
20 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> 20 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
21 <autorepair value="true" /> 21 <autorepair value="true" />
22 - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> 22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
23 24
24 <!-- START REFERENCES --> 25 <!-- START REFERENCES -->
25 <references> 26 <references>
conf/parser/mtas/folia_oeaw.xml 0 โ†’ 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<mtas>
  3 +
  4 + <!-- START MTAS INDEX CONFIGURATION -->
  5 + <index>
  6 + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
  7 + <payload index="false" />
  8 + <offset index="false" />
  9 + <realoffset index="false" />
  10 + <parent index="true" />
  11 + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
  12 + </index>
  13 + <!-- END MTAS INDEX CONFIGURATION -->
  14 +
  15 +
  16 +
  17 + <!-- START CONFIGURATION MTAS FOLIA PARSER -->
  18 + <parser name="mtas.analysis.parser.MtasFoliaParser">
  19 +
  20 + <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
  21 + <autorepair value="true" />
  22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  24 +
  25 + <!-- START REFERENCES -->
  26 + <references>
  27 + <reference name="wref" ref="id" />
  28 + </references>
  29 + <!-- END REFERENCES -->
  30 +
  31 + <!-- START MAPPINGS -->
  32 + <mappings>
  33 +
  34 + <!-- START WORDS -->
  35 + <mapping type="word" name="w">
  36 + </mapping>
  37 + <mapping type="word" name="w">
  38 + <token type="string" offset="false" realoffset="false" parent="false">
  39 + <pre>
  40 + <item type="name" />
  41 + </pre>
  42 + <post>
  43 + <item type="attribute" name="class" />
  44 + </post>
  45 + </token>
  46 + <condition>
  47 + <item type="attribute" name="class" />
  48 + <item type="attribute" name="class" not="true" condition="WORD" />
  49 + </condition>
  50 + </mapping>
  51 + <!-- END WORDS -->
  52 +
  53 + <!-- START WORD ANNOTATIONS -->
  54 + <mapping type="wordAnnotation" name="t">
  55 + <token type="string" offset="false">
  56 + <pre>
  57 + <item type="name" />
  58 + </pre>
  59 + <post>
  60 + <item type="text" />
  61 + </post>
  62 + </token>
  63 + <token type="string" offset="false" realoffset="false" parent="false">
  64 + <pre>
  65 + <item type="name" />
  66 + <item type="string" value="_lc" />
  67 + </pre>
  68 + <post>
  69 + <item type="text" filter="ascii,lowercase" />
  70 + </post>
  71 + </token>
  72 + <condition>
  73 + <item type="ancestor" number="0" />
  74 + <item type="ancestorWord" number="1" />
  75 + <item type="unknownAncestor" number="0" />
  76 + </condition>
  77 + </mapping>
  78 + <mapping type="wordAnnotation" name="lemma">
  79 + <token type="string" offset="false" realoffset="false" parent="false">
  80 + <pre>
  81 + <item type="name" />
  82 + </pre>
  83 + <post>
  84 + <item type="attribute" name="class" />
  85 + </post>
  86 + </token>
  87 + <condition>
  88 + <item type="attribute" name="class" />
  89 + <item type="ancestor" number="0" />
  90 + <item type="unknownAncestor" number="0" />
  91 + </condition>
  92 + </mapping>
  93 + <mapping type="wordAnnotation" name="pos">
  94 + <token type="string" offset="false" realoffset="false" parent="false">
  95 + <pre>
  96 + <item type="attribute" name="set" />
  97 + </pre>
  98 + <post>
  99 + <item type="attribute" name="head" />
  100 + </post>
  101 + </token>
  102 + <condition>
  103 + <item type="ancestor" number="0" />
  104 + <item type="unknownAncestor" number="0" />
  105 + <item type="attribute" name="class" />
  106 + <item type="attribute" name="set" />
  107 + </condition>
  108 + </mapping>
  109 + <mapping type="wordAnnotation" name="feat">
  110 + <token type="string" offset="false" realoffset="false" parent="false">
  111 + <pre>
  112 + <item type="name" />
  113 + <item type="attribute" name="subset" prefix="." />
  114 + </pre>
  115 + <post>
  116 + <item type="attribute" name="class" />
  117 + </post>
  118 + </token>
  119 + <condition>
  120 + <item type="ancestor" number="1" />
  121 + <item type="unknownAncestor" number="0" />
  122 + <item type="attribute" name="class" />
  123 + <item type="attribute" name="subset" />
  124 + </condition>
  125 + </mapping>
  126 + <!-- END WORD ANNOTATIONS -->
  127 +
  128 + <!-- START RELATIONS -->
  129 + <mapping type="relation" name="entities">
  130 + </mapping>
  131 + <mapping type="relation" name="entity">
  132 + <token type="string" offset="false" realoffset="false" parent="false">
  133 + <pre>
  134 + <item type="name" />
  135 + </pre>
  136 + <post>
  137 + <item type="attribute" name="class" />
  138 + </post>
  139 + </token>
  140 + <condition>
  141 + <item type="ancestor" number="1" />
  142 + <item type="ancestorName" condition="entities" />
  143 + </condition>
  144 + </mapping>
  145 + <!-- END RELATIONS -->
  146 +
  147 + <!-- START RELATION ANNOTATIONS -->
  148 + <mapping type="relationAnnotation" name="feat">
  149 + <token type="string" offset="false" realoffset="false">
  150 + <pre>
  151 + <item type="ancestorRelationName" />
  152 + <item type="name" prefix="." />
  153 + <item type="attribute" name="subset" prefix="." />
  154 + </pre>
  155 + <post>
  156 + <item type="attribute" name="class" />
  157 + </post>
  158 + </token>
  159 + </mapping>
  160 + <!-- END RELATION ANNOTATIONS -->
  161 +
  162 + <!-- START GROUPS -->
  163 + <mapping type="group" name="s">
  164 + <token type="string" offset="false">
  165 + <pre>
  166 + <item type="name" />
  167 + </pre>
  168 + <post>
  169 + <item type="attribute" name="class" />
  170 + </post>
  171 + </token>
  172 + </mapping>
  173 + <mapping type="group" name="p">
  174 + <token type="string" offset="false">
  175 + <pre>
  176 + <item type="name" />
  177 + </pre>
  178 + <post>
  179 + <item type="attribute" name="class" />
  180 + </post>
  181 + </token>
  182 + </mapping>
  183 + <mapping type="group" name="div">
  184 + <token type="string" offset="false">
  185 + <pre>
  186 + <item type="name" />
  187 + </pre>
  188 + <post>
  189 + <item type="attribute" name="class" />
  190 + </post>
  191 + </token>
  192 + </mapping>
  193 + <!-- END GROUPS -->
  194 +
  195 + <!-- START GROUP ANNOTATIONS -->
  196 + <!-- END GROUP ANNOTATIONS -->
  197 +
  198 + </mappings>
  199 + <!-- END MAPPINGS -->
  200 +
  201 + </parser>
  202 + <!-- END CONFIGURATION MTAS FOLIA PARSER -->
  203 +
  204 +
  205 +</mtas>
0 \ No newline at end of file 206 \ No newline at end of file
conf/parser/mtas/folia_sonar.xml
@@ -18,6 +18,7 @@ @@ -18,6 +18,7 @@
18 18
19 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> 19 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
20 <autorepair value="true" /> 20 <autorepair value="true" />
  21 + <makeunique value="true" />
21 <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> 22 <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
22 23
23 <!-- START REFERENCES --> 24 <!-- START REFERENCES -->
conf/parser/mtas/folia_test.xml
@@ -19,7 +19,8 @@ @@ -19,7 +19,8 @@
19 19
20 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> 20 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
21 <autorepair value="true" /> 21 <autorepair value="true" />
22 - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> 22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
23 24
24 <!-- START REFERENCES --> 25 <!-- START REFERENCES -->
25 <references> 26 <references>
conf/parser/mtas/sketch_acdh.xml
@@ -18,7 +18,8 @@ @@ -18,7 +18,8 @@
18 <parser name="mtas.analysis.parser.MtasSketchParser"> 18 <parser name="mtas.analysis.parser.MtasSketchParser">
19 <!-- START GENERAL SETTINGS MTAS SKETCH PARSER --> 19 <!-- START GENERAL SETTINGS MTAS SKETCH PARSER -->
20 <autorepair value="true" /> 20 <autorepair value="true" />
21 - <!-- END GENERAL SETTINGS MTAS SKETCH PARSER --> 21 + <makeunique value="true" />
  22 + <!-- END GENERAL SETTINGS MTAS SKETCH PARSER -->
22 23
23 <mappings> 24 <mappings>
24 25
conf/parser/mtas/tei_test.xml
@@ -19,7 +19,8 @@ @@ -19,7 +19,8 @@
19 19
20 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> 20 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
21 <autorepair value="true" /> 21 <autorepair value="true" />
22 - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> 22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
23 24
24 <!-- START REFERENCES --> 25 <!-- START REFERENCES -->
25 <references> 26 <references>
conf/parser/mtasSource.xml 0 โ†’ 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<mtas>
  3 + <configurations type="mtas.analysis.util.MtasTokenizerFactory">
  4 + <configuration name="EDBO" file="mtasSource/folia_edbo.xml" />
  5 + </configurations>
  6 + <configurations type="mtas.analysis.util.MtasCharFilterFactory">
  7 + <configuration name="EDBO" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
  8 + </configurations>
  9 +</mtas>
conf/parser/mtasSource/folia_edbo.xml 0 โ†’ 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<mtas>
  3 +
  4 + <!-- START MTAS INDEX CONFIGURATION -->
  5 + <index>
  6 + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
  7 + <payload index="false" />
  8 + <offset index="false" />
  9 + <realoffset index="false" />
  10 + <parent index="true" />
  11 + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
  12 + </index>
  13 + <!-- END MTAS INDEX CONFIGURATION -->
  14 +
  15 + <!-- START CONFIGURATION MTAS FOLIA PARSER -->
  16 + <parser name="mtas.analysis.parser.MtasFoliaParser">
  17 +
  18 + <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
  19 + <autorepair value="true" />
  20 + <makeunique value="true" />
  21 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  22 +
  23 + <!-- START REFERENCES -->
  24 + <references>
  25 + </references>
  26 + <!-- END REFERENCES -->
  27 +
  28 + <!-- START MAPPINGS -->
  29 + <mappings>
  30 +
  31 + <!-- START WORDS -->
  32 + <mapping type="word" name="str">
  33 + </mapping>
  34 + <!-- END WORDS -->
  35 +
  36 + <!-- START WORD ANNOTATIONS -->
  37 + <mapping type="wordAnnotation" name="t">
  38 + <token type="string" offset="false">
  39 + <pre>
  40 + <item type="name" />
  41 + </pre>
  42 + <post>
  43 + <item type="text" />
  44 + </post>
  45 + </token>
  46 + <token type="string" offset="false" realoffset="false" parent="false">
  47 + <pre>
  48 + <item type="name" />
  49 + <item type="string" value="_lc" />
  50 + </pre>
  51 + <post>
  52 + <item type="text" filter="ascii,lowercase" />
  53 + </post>
  54 + </token>
  55 + <condition>
  56 + <item type="ancestor" number="0" />
  57 + <item type="ancestorWord" number="1" />
  58 + <item type="unknownAncestor" number="0" />
  59 + <item type="attribute" name="class" condition="Ticcl"/>
  60 + </condition>
  61 + </mapping>
  62 + <mapping type="wordAnnotation" name="correction">
  63 + </mapping>
  64 + <mapping type="wordAnnotation" name="new">
  65 + </mapping>
  66 + <mapping type="wordAnnotation" name="original">
  67 + </mapping>
  68 + <mapping type="wordAnnotation" name="suggestion">
  69 + </mapping>
  70 + <mapping type="wordAnnotation" name="t">
  71 + <token type="string" offset="false">
  72 + <pre>
  73 + <item type="name" />
  74 + </pre>
  75 + <post>
  76 + <item type="text" />
  77 + </post>
  78 + </token>
  79 + <token type="string" offset="false" realoffset="false" parent="false">
  80 + <pre>
  81 + <item type="name" />
  82 + <item type="string" value="_lc" />
  83 + </pre>
  84 + <post>
  85 + <item type="text" filter="ascii,lowercase" />
  86 + </post>
  87 + </token>
  88 + <condition>
  89 + <item type="ancestor" number="2" />
  90 + <item type="ancestorName" condition="new" />
  91 + <item type="unknownAncestor" number="0" />
  92 + <item type="attribute" name="class" condition="Ticcl"/>
  93 + </condition>
  94 + </mapping>
  95 + <mapping type="wordAnnotation" name="t">
  96 + <token type="string" offset="false">
  97 + <pre>
  98 + <item type="name" />
  99 + <item type="ancestorName" prefix="."/>
  100 + </pre>
  101 + <post>
  102 + <item type="text" />
  103 + </post>
  104 + </token>
  105 + <token type="string" offset="false" realoffset="false" parent="false">
  106 + <pre>
  107 + <item type="name" />
  108 + <item type="string" value="_lc" />
  109 + <item type="ancestorName" prefix="."/>
  110 + </pre>
  111 + <post>
  112 + <item type="text" filter="ascii,lowercase" />
  113 + </post>
  114 + </token>
  115 + <condition>
  116 + <item type="ancestor" number="2" />
  117 + <item type="ancestorName" condition="original" />
  118 + <item type="unknownAncestor" number="0" />
  119 + </condition>
  120 + </mapping>
  121 + <mapping type="wordAnnotation" name="t">
  122 + <token type="string" offset="false">
  123 + <pre>
  124 + <item type="name" />
  125 + <item type="ancestorName" prefix="."/>
  126 + </pre>
  127 + <post>
  128 + <item type="text" />
  129 + </post>
  130 + </token>
  131 + <token type="string" offset="false" realoffset="false" parent="false">
  132 + <pre>
  133 + <item type="name" />
  134 + <item type="string" value="_lc" />
  135 + <item type="ancestorName" prefix="."/>
  136 + </pre>
  137 + <post>
  138 + <item type="text" filter="ascii,lowercase" />
  139 + </post>
  140 + </token>
  141 + <condition>
  142 + <item type="ancestor" number="2" />
  143 + <item type="ancestorName" condition="suggestion" />
  144 + <item type="unknownAncestor" number="0" />
  145 + </condition>
  146 + </mapping>
  147 + <!-- END WORD ANNOTATIONS -->
  148 +
  149 + <!-- START RELATIONS -->
  150 + <!-- END RELATIONS -->
  151 +
  152 + <!-- START GROUPS -->
  153 + <mapping type="group" name="p">
  154 + <token type="string" offset="false">
  155 + <pre>
  156 + <item type="name" />
  157 + </pre>
  158 + <post>
  159 + <item type="attribute" name="class" />
  160 + </post>
  161 + </token>
  162 + </mapping>
  163 + <mapping type="group" name="div">
  164 + <token type="string" offset="false">
  165 + <pre>
  166 + <item type="name" />
  167 + </pre>
  168 + <post>
  169 + <item type="attribute" name="class" />
  170 + </post>
  171 + </token>
  172 + </mapping>
  173 + <mapping type="group" name="head">
  174 + <token type="string" offset="false">
  175 + <pre>
  176 + <item type="name" />
  177 + </pre>
  178 + <post>
  179 + <item type="attribute" name="class" />
  180 + </post>
  181 + </token>
  182 + </mapping>
  183 + <!-- END GROUPS -->
  184 +
  185 + <!-- START GROUP ANNOTATIONS -->
  186 + <mapping type="groupAnnotation" name="lang">
  187 + <token type="string" offset="false" realoffset="false" parent="false">
  188 + <pre>
  189 + <item type="name" />
  190 + </pre>
  191 + <post>
  192 + <item type="attribute" name="class" />
  193 + </post>
  194 + </token>
  195 + </mapping>
  196 + <!-- END GROUP ANNOTATIONS -->
  197 +
  198 + </mappings>
  199 + <!-- END MAPPINGS -->
  200 +
  201 + </parser>
  202 + <!-- END CONFIGURATION MTAS FOLIA PARSER -->
  203 +
  204 +</mtas>
0 \ No newline at end of file 205 \ No newline at end of file
conf/solr/schemaNederlab.xml
@@ -255,8 +255,8 @@ @@ -255,8 +255,8 @@
255 <field name="NLContent_folia_available" type="nederlab_boolean" 255 <field name="NLContent_folia_available" type="nederlab_boolean"
256 required="false" multiValued="false" indexed="true" stored="true" /> 256 required="false" multiValued="false" indexed="true" stored="true" />
257 <field name="NLContent_mtas" type="mtas_text" indexed="true" 257 <field name="NLContent_mtas" type="mtas_text" indexed="true"
258 - stored="true" />  
259 - <field name="NLContent_mtas_error" type="nederlab_string" 258 + stored="true" />
  259 + <field name="NLContent_mtas_error" type="nederlab_string"
260 indexed="true" stored="true" /> 260 indexed="true" stored="true" />
261 <field name="NLContent_mtas_numberOfTokens" type="nederlab_int" 261 <field name="NLContent_mtas_numberOfTokens" type="nederlab_int"
262 indexed="true" stored="true" /> 262 indexed="true" stored="true" />
@@ -264,7 +264,17 @@ @@ -264,7 +264,17 @@
264 indexed="true" stored="true" /> 264 indexed="true" stored="true" />
265 <field name="NLContent_mtas_size" type="nederlab_int" indexed="true" 265 <field name="NLContent_mtas_size" type="nederlab_int" indexed="true"
266 stored="true" /> 266 stored="true" />
267 - <!-- Combined Field Metadata --> 267 + <field name="NLContent_mtasSource" type="mtasSource_text" indexed="true"
  268 + stored="true" />
  269 + <field name="NLContent_mtasSource_error" type="nederlab_string"
  270 + indexed="true" stored="true" />
  271 + <field name="NLContent_mtasSource_numberOfTokens" type="nederlab_int"
  272 + indexed="true" stored="true" />
  273 + <field name="NLContent_mtasSource_numberOfPositions" type="nederlab_int"
  274 + indexed="true" stored="true" />
  275 + <field name="NLContent_mtasSource_size" type="nederlab_int" indexed="true"
  276 + stored="true" />
  277 + <!-- Combined Field Metadata -->
268 <field name="NLMetadata" type="nederlab_text" required="false" 278 <field name="NLMetadata" type="nederlab_text" required="false"
269 multiValued="true" indexed="true" stored="false" /> 279 multiValued="true" indexed="true" stored="false" />
270 <copyField source="NLCore_NLIdentification_nederlabID" dest="NLMetadata" /> 280 <copyField source="NLCore_NLIdentification_nederlabID" dest="NLMetadata" />
@@ -420,5 +430,27 @@ @@ -420,5 +430,27 @@
420 prefix="t" /> 430 prefix="t" />
421 </analyzer> 431 </analyzer>
422 </fieldType> 432 </fieldType>
  433 +
  434 + <fieldType name="mtasSource_text_example_config" class="solr.TextField"
  435 + postingsFormat="MtasCodec">
  436 + <analyzer type="index">
  437 + <charFilter class="mtas.analysis.util.MtasCharFilterFactory"
  438 + config="mtasSource.xml" />
  439 + <tokenizer class="mtas.analysis.util.MtasTokenizerFactory"
  440 + config="mtasSource.xml" />
  441 + </analyzer>
  442 + </fieldType>
  443 +
  444 + <fieldType name="mtasSource_text" class="mtas.solr.schema.MtasPreAnalyzedField"
  445 + followIndexAnalyzer="mtasSource_text_example_config"
  446 + configurationFromField="NLCore_NLAdministrative_sourceCollection" setNumberOfTokens="NLContent_mtasSource_numberOfTokens"
  447 + setNumberOfPositions="NLContent_mtasSource_numberOfPositions" setSize="NLContent_mtasSource_size"
  448 + setError="NLContent_mtasSource_error" postingsFormat="MtasCodec">
  449 + <analyzer type="query">
  450 + <tokenizer class="solr.WhitespaceTokenizerFactory" />
  451 + <filter class="mtas.analysis.util.MtasPrefixTokenFilterFactory"
  452 + prefix="t" />
  453 + </analyzer>
  454 + </fieldType>
423 455
424 </schema> 456 </schema>
conf/solr/schemaOeaw.xml 0 โ†’ 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +
  3 +<schema name="nederlab" version="1.5">
  4 +
  5 + <field name="_version_" type="nederlab_long" indexed="true"
  6 + stored="true" />
  7 +
  8 + <!-- component Profile -->
  9 + <field name="NLProfile_name" type="nederlab_string" required="true"
  10 + multiValued="false" indexed="true" stored="true" />
  11 +
  12 + <!-- component ResourceProxy -->
  13 + <field name="ResourceProxy_resourceRef" type="nederlab_string"
  14 + required="false" multiValued="true" indexed="true" stored="true" />
  15 + <dynamicField name="ResourceProxy_resourceRef_mimeType_*"
  16 + type="nederlab_string" required="false" multiValued="true" indexed="true"
  17 + stored="true" />
  18 +
  19 + <!-- component NLCore -->
  20 + <field name="NLCore_NLIdentification_nederlabID" type="nederlab_uuid"
  21 + required="true" multiValued="false" indexed="true" stored="true" />
  22 + <field name="NLCore_NLIdentification_editorialCode" type="nederlab_string"
  23 + required="false" multiValued="false" indexed="true" stored="true" />
  24 + <field name="NLCore_NLIdentification_versionID" type="nederlab_string"
  25 + required="true" multiValued="false" indexed="true" stored="true" />
  26 + <field name="NLCore_NLIdentification_sourceRef" type="nederlab_string"
  27 + required="false" multiValued="true" indexed="true" stored="true" />
  28 + <field name="NLCore_NLIdentification_sourceUrl" type="nederlab_string"
  29 + required="false" multiValued="true" indexed="true" stored="true" />
  30 + <field name="NLCore_NLIdentification_sourceRefUrl_serialized"
  31 + type="nederlab_string" required="false" multiValued="true" indexed="false"
  32 + stored="true" />
  33 + <field name="NLCore_NLAdministrative_ingestTime" type="nederlab_date"
  34 + required="true" multiValued="false" indexed="true" stored="true" />
  35 + <field name="NLCore_NLAdministrative_expirationTime" type="nederlab_date"
  36 + required="false" multiValued="false" indexed="true" stored="true" />
  37 + <field name="NLCore_NLAdministrative_lastEditedBy" type="nederlab_string"
  38 + required="false" multiValued="false" indexed="true" stored="true" />
  39 + <field name="NLCore_NLAdministrative_modificationTime" type="nederlab_date"
  40 + required="false" multiValued="false" indexed="true" stored="true" />
  41 + <field name="NLCore_NLAdministrative_editorialNote" type="nederlab_text"
  42 + required="false" multiValued="true" indexed="true" stored="true" />
  43 + <field name="NLCore_NLAdministrative_sourceCollection" type="nederlab_string"
  44 + required="false" multiValued="false" indexed="true" stored="true" />
  45 + <field name="NLCore_NLAdministrative_isThesaurusElement" type="nederlab_boolean"
  46 + required="true" multiValued="false" indexed="true" stored="true" />
  47 + <field name="NLCore_NLExternalReference_organizationName" type="nederlab_text"
  48 + required="false" multiValued="true" indexed="true" stored="true" />
  49 + <field name="NLCore_NLExternalReference_collectionName" type="nederlab_string"
  50 + required="false" multiValued="true" indexed="true" stored="true" />
  51 + <field name="NLCore_NLExternalReference_resourceRef" type="nederlab_string"
  52 + required="false" multiValued="true" indexed="true" stored="true" />
  53 + <field name="NLCore_NLExternalReference_serialized" type="nederlab_string"
  54 + required="false" multiValued="true" indexed="false" stored="true" />
  55 +
  56 + <!-- component NLTitle -->
  57 + <field name="NLTitle_title" type="nederlab_text" required="false"
  58 + multiValued="false" indexed="true" stored="true" />
  59 + <field name="NLTitle_subtitle" type="nederlab_text" required="false"
  60 + multiValued="false" indexed="true" stored="true" />
  61 + <field name="NLTitle_genre" type="nederlab_string" required="false"
  62 + multiValued="true" indexed="true" stored="true" />
  63 + <field name="NLTitle_category" type="nederlab_string" required="false"
  64 + multiValued="true" indexed="true" stored="true" />
  65 + <field name="NLTitle_yearOfPublicationMin" type="nederlab_int"
  66 + required="false" multiValued="false" indexed="true" stored="true" />
  67 + <field name="NLTitle_yearOfPublicationMax" type="nederlab_int"
  68 + required="false" multiValued="false" indexed="true" stored="true" />
  69 + <field name="NLTitle_yearOfPublicationApprox" type="nederlab_boolean"
  70 + required="false" multiValued="false" indexed="true" stored="true" />
  71 + <field name="NLTitle_yearOfPublicationLabel" type="nederlab_text"
  72 + required="false" multiValued="false" indexed="true" stored="true" />
  73 + <field name="NLTitle_edition" type="nederlab_string" required="false"
  74 + multiValued="false" indexed="true" stored="true" />
  75 + <field name="NLTitle_inNederlabAs" type="nederlab_uuid" required="false"
  76 + multiValued="false" indexed="true" stored="true" />
  77 + <field name="NLTitle_NLPublicationPlace_placeOfPublication" type="nederlab_string"
  78 + required="false" multiValued="true" indexed="true" stored="true" />
  79 + <field name="NLTitle_NLPublicationPlace_placeID" type="nederlab_string"
  80 + required="false" multiValued="true" indexed="true" stored="true" />
  81 + <field name="NLTitle_NLPublicationPlace_placeOfPublicationOriginal"
  82 + type="nederlab_text" required="false" multiValued="true" indexed="true"
  83 + stored="true" />
  84 + <field name="NLTitle_numberOfPages" type="nederlab_int" required="false"
  85 + multiValued="false" indexed="true" stored="true" />
  86 + <field name="NLTitle_numberOfWords" type="nederlab_int" required="false"
  87 + multiValued="false" indexed="true" stored="true" />
  88 + <field name="NLTitle_primaryLanguage" type="nederlab_string"
  89 + required="false" multiValued="false" indexed="true" stored="true" />
  90 + <field name="NLTitle_isTranslation" type="nederlab_boolean"
  91 + required="false" multiValued="false" indexed="true" stored="true" />
  92 + <field name="NLTitle_characterEncoding" type="nederlab_string"
  93 + required="false" multiValued="false" indexed="true" stored="true" />
  94 + <field name="NLTitle_codingStandard" type="nederlab_string"
  95 + required="false" multiValued="true" indexed="true" stored="true" />
  96 + <field name="NLTitle_textQuality" type="nederlab_text" required="false"
  97 + multiValued="false" indexed="true" stored="true" />
  98 + <field name="NLTitle_processingMethod" type="nederlab_text"
  99 + required="false" multiValued="false" indexed="true" stored="true" />
  100 + <field name="NLTitle_autopsyPerformed" type="nederlab_boolean"
  101 + required="false" multiValued="false" indexed="true" stored="true" />
  102 + <field name="NLTitle_NLPersonRef_personID" type="nederlab_uuid"
  103 + required="false" multiValued="true" indexed="true" stored="true" />
  104 + <field name="NLTitle_NLPersonRef_role" type="nederlab_string"
  105 + required="false" multiValued="true" indexed="true" stored="true" />
  106 + <dynamicField name="NLTitle_NLPersonRef_personID_role_*"
  107 + type="nederlab_uuid" required="false" multiValued="true" indexed="true"
  108 + stored="true" />
  109 + <field name="NLTitle_contains" type="nederlab_uuid" required="false"
  110 + multiValued="true" indexed="true" stored="true" />
  111 + <field name="NLTitle_seriesTitleID" type="nederlab_uuid"
  112 + required="false" multiValued="true" indexed="true" stored="true" />
  113 + <field name="NLTitle_seriesTitleID_parent" type="nederlab_uuid"
  114 + required="false" multiValued="false" indexed="true" stored="true" />
  115 + <field name="NLTitle_seriesTitleID_root" type="nederlab_uuid"
  116 + required="false" multiValued="false" indexed="true" stored="true" />
  117 +
  118 + <!-- component NLDependentTitle -->
  119 + <field name="NLDependentTitle_title" type="nederlab_text"
  120 + required="false" multiValued="false" indexed="true" stored="true" />
  121 + <field name="NLDependentTitle_subtitle" type="nederlab_text"
  122 + required="false" multiValued="false" indexed="true" stored="true" />
  123 + <field name="NLDependentTitle_primaryLanguage" type="nederlab_string"
  124 + required="false" multiValued="false" indexed="true" stored="true" />
  125 + <field name="NLDependentTitle_parentTitleID" type="nederlab_uuid"
  126 + required="false" multiValued="false" indexed="true" stored="true" />
  127 + <field name="NLDependentTitle_inNederlabAs" type="nederlab_uuid"
  128 + required="false" multiValued="false" indexed="true" stored="true" />
  129 + <field name="NLDependentTitle_NLPersonRef_personID" type="nederlab_uuid"
  130 + required="false" multiValued="true" indexed="true" stored="true" />
  131 + <field name="NLDependentTitle_NLPersonRef_role" type="nederlab_string"
  132 + required="false" multiValued="true" indexed="true" stored="true" />
  133 + <dynamicField name="NLDependentTitle_NLPersonRef_personID_role_*"
  134 + type="nederlab_uuid" required="false" multiValued="true" indexed="true"
  135 + stored="true" />
  136 + <field name="NLDependentTitle_startPage" type="nederlab_int"
  137 + required="false" multiValued="false" indexed="true" stored="true" />
  138 + <field name="NLDependentTitle_endPage" type="nederlab_int"
  139 + required="false" multiValued="false" indexed="true" stored="true" />
  140 +
  141 + <!-- component NLPerson -->
  142 + <field name="NLPerson_NLPersonName_nameId" type="nederlab_uuid"
  143 + required="false" multiValued="true" indexed="true" stored="true" />
  144 + <field name="NLPerson_NLPersonName_lastName" type="nederlab_text"
  145 + required="false" multiValued="true" indexed="true" stored="true" />
  146 + <field name="NLPerson_NLPersonName_firstName" type="nederlab_text"
  147 + required="false" multiValued="true" indexed="true" stored="true" />
  148 + <field name="NLPerson_NLPersonName_infixes" type="nederlab_text"
  149 + required="false" multiValued="true" indexed="true" stored="true" />
  150 + <field name="NLPerson_NLPersonName_firstNameFull" type="nederlab_text"
  151 + required="false" multiValued="true" indexed="true" stored="true" />
  152 + <field name="NLPerson_NLPersonName_fullName" type="nederlab_text"
  153 + required="false" multiValued="true" indexed="true" stored="true" />
  154 + <field name="NLPerson_NLPersonName_fullName_serialized" type="nederlab_string"
  155 + required="false" multiValued="true" indexed="false" stored="true" />
  156 + <field name="NLPerson_NLPersonName_preferredNameID" type="nederlab_uuid"
  157 + required="false" multiValued="false" indexed="true" stored="true" />
  158 + <field name="NLPerson_NLPersonName_preferredLastName" type="nederlab_string"
  159 + required="false" multiValued="false" indexed="true" stored="true" />
  160 + <field name="NLPerson_NLPersonName_preferredFirstName" type="nederlab_string"
  161 + required="false" multiValued="false" indexed="true" stored="true" />
  162 + <field name="NLPerson_NLPersonName_preferredFirstNameFull" type="nederlab_string"
  163 + required="false" multiValued="false" indexed="true" stored="true" />
  164 + <field name="NLPerson_NLPersonName_preferredInfixes" type="nederlab_string"
  165 + required="false" multiValued="false" indexed="true" stored="true" />
  166 + <field name="NLPerson_NLPersonName_preferredFullName" type="nederlab_text"
  167 + required="false" multiValued="false" indexed="true" stored="true" />
  168 + <field name="NLPerson_NLPersonName_preferredFullName_serialized"
  169 + type="nederlab_string" required="false" multiValued="false" indexed="false"
  170 + stored="true" />
  171 + <field name="NLPerson_dateOfBirthDayMonth" type="nederlab_text"
  172 + required="false" multiValued="false" indexed="true" stored="true" />
  173 + <field name="NLPerson_dateOfBirthMonth" type="nederlab_int"
  174 + required="false" multiValued="false" indexed="true" stored="true" />
  175 + <field name="NLPerson_dateOfBirthDay" type="nederlab_int"
  176 + required="false" multiValued="false" indexed="true" stored="true" />
  177 + <field name="NLPerson_yearOfBirthMin" type="nederlab_int"
  178 + required="false" multiValued="false" indexed="true" stored="true" />
  179 + <field name="NLPerson_yearOfBirthMax" type="nederlab_int"
  180 + required="false" multiValued="false" indexed="true" stored="true" />
  181 + <field name="NLPerson_yearOfBirthApprox" type="nederlab_boolean"
  182 + required="false" multiValued="false" indexed="true" stored="true" />
  183 + <field name="NLPerson_yearOfBirthLabel" type="nederlab_text"
  184 + required="false" multiValued="false" indexed="true" stored="true" />
  185 + <field name="NLPerson_placeOfBirth" type="nederlab_string"
  186 + required="false" multiValued="false" indexed="true" stored="true" />
  187 + <field name="NLPerson_placeOfBirthID" type="nederlab_string"
  188 + required="false" multiValued="false" indexed="true" stored="true" />
  189 + <field name="NLPerson_dateOfDeathDayMonth" type="nederlab_text"
  190 + required="false" multiValued="false" indexed="true" stored="true" />
  191 + <field name="NLPerson_dateOfDeathMonth" type="nederlab_int"
  192 + required="false" multiValued="false" indexed="true" stored="true" />
  193 + <field name="NLPerson_dateOfDeathDay" type="nederlab_int"
  194 + required="false" multiValued="false" indexed="true" stored="true" />
  195 + <field name="NLPerson_yearOfDeathMin" type="nederlab_int"
  196 + required="false" multiValued="false" indexed="true" stored="true" />
  197 + <field name="NLPerson_yearOfDeathMax" type="nederlab_int"
  198 + required="false" multiValued="false" indexed="true" stored="true" />
  199 + <field name="NLPerson_yearOfDeathApprox" type="nederlab_boolean"
  200 + required="false" multiValued="false" indexed="true" stored="true" />
  201 + <field name="NLPerson_yearOfDeathLabel" type="nederlab_text"
  202 + required="false" multiValued="false" indexed="true" stored="true" />
  203 + <field name="NLPerson_placeOfDeath" type="nederlab_string"
  204 + required="false" multiValued="false" indexed="true" stored="true" />
  205 + <field name="NLPerson_placeOfDeathID" type="nederlab_string"
  206 + required="false" multiValued="false" indexed="true" stored="true" />
  207 + <field name="NLPerson_gender" type="nederlab_string" required="false"
  208 + multiValued="false" indexed="true" stored="true" />
  209 + <field name="NLPerson_profession" type="nederlab_string"
  210 + required="false" multiValued="true" indexed="true" stored="true" />
  211 + <field name="NLPerson_education" type="nederlab_string" required="false"
  212 + multiValued="true" indexed="true" stored="true" />
  213 + <field name="NLPerson_inThesaurusAs" type="nederlab_uuid"
  214 + required="false" multiValued="false" indexed="true" stored="true" />
  215 +
  216 + <!-- component NLSeriesTitle -->
  217 + <field name="NLSeriesTitle_title" type="nederlab_text" required="false"
  218 + multiValued="false" indexed="true" stored="true" />
  219 + <field name="NLSeriesTitle_years" type="nederlab_text" required="false"
  220 + multiValued="false" indexed="true" stored="true" />
  221 + <field name="NLSeriesTitle_description" type="nederlab_text"
  222 + required="false" multiValued="false" indexed="true" stored="true" />
  223 + <field name="NLSeriesTitle_inNederlabAs" type="nederlab_uuid"
  224 + required="false" multiValued="false" indexed="true" stored="true" />
  225 + <field name="NLSeriesTitle_seriesTitleID" type="nederlab_uuid"
  226 + required="false" multiValued="true" indexed="true" stored="true" />
  227 + <field name="NLSeriesTitle_seriesTitleID_parent" type="nederlab_uuid"
  228 + required="false" multiValued="false" indexed="true" stored="true" />
  229 + <field name="NLSeriesTitle_seriesTitleID_root" type="nederlab_uuid"
  230 + required="false" multiValued="false" indexed="true" stored="true" />
  231 +
  232 + <!-- component NLCollectionSpecific -->
  233 + <dynamicField name="NLCollectionSpecific_*" type="nederlab_string"
  234 + required="false" multiValued="true" indexed="true" stored="true" />
  235 +
  236 + <!-- component NLContent old -->
  237 +
  238 + <field name="NLContent_text_available" type="nederlab_boolean"
  239 + required="false" multiValued="false" indexed="true" stored="true" />
  240 + <field name="NLContent_text" type="nederlab_content" required="false"
  241 + multiValued="false" indexed="true" stored="true" termVectors="true"
  242 + termPositions="true" termOffsets="true" />
  243 + <field name="NLContent_text_lowercase" type="nederlab_content_lowercase"
  244 + required="false" multiValued="false" indexed="true" stored="true"
  245 + termVectors="true" termPositions="true" termOffsets="true" />
  246 + <copyField source="NLContent_text" dest="NLContent_text_lowercase" />
  247 + <field name="NLContent_ticcl_available" type="nederlab_boolean"
  248 + required="false" multiValued="false" indexed="true" stored="true" />
  249 + <field name="NLContent_ticcl_lowercase" type="nederlab_content_lowercase"
  250 + required="false" multiValued="false" indexed="true" stored="true"
  251 + termVectors="true" termPositions="true" termOffsets="true" />
  252 +
  253 + <!-- component NLContent -->
  254 +
  255 + <field name="NLContent_folia_available" type="nederlab_boolean"
  256 + required="false" multiValued="false" indexed="true" stored="true" />
  257 + <field name="NLContent_mtas" type="mtas_text" indexed="true"
  258 + stored="true" />
  259 + <field name="NLContent_mtas_error" type="nederlab_string"
  260 + indexed="true" stored="true" />
  261 + <field name="NLContent_mtas_numberOfTokens" type="nederlab_int"
  262 + indexed="true" stored="true" />
  263 + <field name="NLContent_mtas_numberOfPositions" type="nederlab_int"
  264 + indexed="true" stored="true" />
  265 + <field name="NLContent_mtas_size" type="nederlab_int" indexed="true"
  266 + stored="true" />
  267 + <!-- Combined Field Metadata -->
  268 + <field name="NLMetadata" type="nederlab_text" required="false"
  269 + multiValued="true" indexed="true" stored="false" />
  270 + <copyField source="NLCore_NLIdentification_nederlabID" dest="NLMetadata" />
  271 + <copyField source="NLCore_NLIdentification_editorialCode"
  272 + dest="NLMetadata" />
  273 + <copyField source="NLCore_NLIdentification_sourceRef" dest="NLMetadata" />
  274 + <copyField source="NLCore_NLAdministrative_editorialNote"
  275 + dest="NLMetadata" />
  276 + <copyField source="NLCore_NLAdministrative_sourceCollection"
  277 + dest="NLMetadata" />
  278 + <copyField source="NLCore_NLExternalReference_organizationName"
  279 + dest="NLMetadata" />
  280 + <copyField source="NLCore_NLExternalReference_collectionName"
  281 + dest="NLMetadata" />
  282 + <copyField source="NLCore_NLExternalReference_resourceRef"
  283 + dest="NLMetadata" />
  284 + <copyField source="NLTitle_title" dest="NLMetadata" />
  285 + <copyField source="NLTitle_subtitle" dest="NLMetadata" />
  286 + <copyField source="NLTitle_genre" dest="NLMetadata" />
  287 + <copyField source="NLTitle_category" dest="NLMetadata" />
  288 + <copyField source="NLTitle_yearOfPublicationMin" dest="NLMetadata" />
  289 + <copyField source="NLTitle_yearOfPublicationMax" dest="NLMetadata" />
  290 + <copyField source="NLTitle_yearOfPublicationLabel" dest="NLMetadata" />
  291 + <copyField source="NLTitle_edition" dest="NLMetadata" />
  292 + <copyField source="NLTitle_NLPublicationPlace_placeOfPublication"
  293 + dest="NLMetadata" />
  294 + <copyField source="NLTitle_NLPublicationPlace_placeID" dest="NLMetadata" />
  295 + <copyField source="NLTitle_NLPublicationPlace_placeOfPublicationOriginal"
  296 + dest="NLMetadata" />
  297 + <copyField source="NLTitle_primaryLanguage" dest="NLMetadata" />
  298 + <copyField source="NLTitle_characterEncoding" dest="NLMetadata" />
  299 + <copyField source="NLTitle_codingStandard" dest="NLMetadata" />
  300 + <copyField source="NLTitle_textQuality" dest="NLMetadata" />
  301 + <copyField source="NLTitle_processingMethod" dest="NLMetadata" />
  302 + <copyField source="NLTitle_NLPersonRef_role" dest="NLMetadata" />
  303 + <copyField source="NLDependentTitle_title" dest="NLMetadata" />
  304 + <copyField source="NLDependentTitle_subtitle" dest="NLMetadata" />
  305 + <copyField source="NLDependentTitle_primaryLanguage" dest="NLMetadata" />
  306 + <copyField source="NLDependentTitle_NLPersonRef_role" dest="NLMetadata" />
  307 + <copyField source="NLPerson_NLPersonName_lastName" dest="NLMetadata" />
  308 + <copyField source="NLPerson_NLPersonName_firstName" dest="NLMetadata" />
  309 + <copyField source="NLPerson_NLPersonName_infixes" dest="NLMetadata" />
  310 + <copyField source="NLPerson_NLPersonName_firstNameFull" dest="NLMetadata" />
  311 + <copyField source="NLPerson_NLPersonName_fullName" dest="NLMetadata" />
  312 + <copyField source="NLPerson_dateOfBirthDayMonth" dest="NLMetadata" />
  313 + <copyField source="NLPerson_yearOfBirthMin" dest="NLMetadata" />
  314 + <copyField source="NLPerson_yearOfBirthMax" dest="NLMetadata" />
  315 + <copyField source="NLPerson_yearOfBirthLabel" dest="NLMetadata" />
  316 + <copyField source="NLPerson_placeOfBirth" dest="NLMetadata" />
  317 + <copyField source="NLPerson_placeOfBirthID" dest="NLMetadata" />
  318 + <copyField source="NLPerson_dateOfDeathDayMonth" dest="NLMetadata" />
  319 + <copyField source="NLPerson_yearOfDeathMin" dest="NLMetadata" />
  320 + <copyField source="NLPerson_yearOfDeathMax" dest="NLMetadata" />
  321 + <copyField source="NLPerson_yearOfDeathLabel" dest="NLMetadata" />
  322 + <copyField source="NLPerson_placeOfDeath" dest="NLMetadata" />
  323 + <copyField source="NLPerson_placeOfDeathID" dest="NLMetadata" />
  324 + <copyField source="NLPerson_gender" dest="NLMetadata" />
  325 + <copyField source="NLPerson_profession" dest="NLMetadata" />
  326 + <copyField source="NLPerson_education" dest="NLMetadata" />
  327 + <copyField source="NLSeriesTitle_title" dest="NLMetadata" />
  328 + <copyField source="NLSeriesTitle_years" dest="NLMetadata" />
  329 + <copyField source="NLSeriesTitle_description" dest="NLMetadata" />
  330 + <copyField source="NLCollectionSpecific_*" dest="NLMetadata" />
  331 +
  332 + <uniqueKey>NLCore_NLIdentification_versionID</uniqueKey>
  333 +
  334 + <fieldType name="nederlab_string" class="solr.StrField"
  335 + sortMissingLast="true" />
  336 + <fieldType name="nederlab_uuid" class="solr.StrField"
  337 + sortMissingLast="true" />
  338 + <fieldType name="nederlab_boolean" class="solr.BoolField"
  339 + sortMissingLast="true" />
  340 + <fieldType name="nederlab_int" class="solr.TrieIntField"
  341 + precisionStep="8" positionIncrementGap="0" />
  342 + <fieldType name="nederlab_long" class="solr.TrieLongField"
  343 + precisionStep="0" positionIncrementGap="0" />
  344 + <fieldType name="nederlab_date" class="solr.TrieDateField"
  345 + precisionStep="6" positionIncrementGap="0" />
  346 + <fieldtype name="nederlab_binary" class="solr.BinaryField" />
  347 +
  348 + <fieldType name="nederlab_text" class="solr.TextField"
  349 + positionIncrementGap="100">
  350 + <analyzer type="index">
  351 + <tokenizer class="solr.StandardTokenizerFactory" />
  352 + <filter class="solr.LowerCaseFilterFactory" />
  353 + </analyzer>
  354 + <analyzer type="query">
  355 + <tokenizer class="solr.StandardTokenizerFactory" />
  356 + <filter class="solr.LowerCaseFilterFactory" />
  357 + </analyzer>
  358 + </fieldType>
  359 +
  360 + <fieldType name="nederlab_content" class="solr.TextField"
  361 + positionIncrementGap="100">
  362 + <analyzer type="index">
  363 + <tokenizer class="solr.StandardTokenizerFactory" />
  364 + </analyzer>
  365 + <analyzer type="query">
  366 + <tokenizer class="solr.StandardTokenizerFactory" />
  367 + </analyzer>
  368 + </fieldType>
  369 +
  370 + <fieldType name="nederlab_content_lowercase" class="solr.TextField"
  371 + positionIncrementGap="100">
  372 + <analyzer type="index">
  373 + <tokenizer class="solr.StandardTokenizerFactory" />
  374 + <filter class="solr.LowerCaseFilterFactory" />
  375 + </analyzer>
  376 + <analyzer type="query">
  377 + <tokenizer class="solr.StandardTokenizerFactory" />
  378 + <filter class="solr.LowerCaseFilterFactory" />
  379 + </analyzer>
  380 + </fieldType>
  381 +
  382 + <fieldType name="mtas_text" class="solr.TextField"
  383 + postingsFormat="MtasCodec">
  384 + <analyzer type="index">
  385 + <charFilter class="mtas.analysis.util.MtasCharFilterFactory"
  386 + type="file" prefix="/local/data/" />
  387 + <tokenizer class="mtas.analysis.util.MtasTokenizerFactory"
  388 + configFile="mtas/folia_oeaw.xml" />
  389 + </analyzer>
  390 + </fieldType>
  391 +
  392 +</schema>
conf/solr/schemaTest.xml
@@ -115,11 +115,7 @@ @@ -115,11 +115,7 @@
115 115
116 WARNING: The _text_ catch-all field will significantly increase your index size. 116 WARNING: The _text_ catch-all field will significantly increase your index size.
117 If you don't need it, consider removing it and the corresponding copyField directive. 117 If you don't need it, consider removing it and the corresponding copyField directive.
118 - -->  
119 - <!  
120 - <fieldType name="string_simpletext" class="solr.StrField" postingsFormat="SimpleText" />  
121 - <field name="simple_string" type="string_simpletext" indexed="true" stored="true" required="false" multiValued="false" />  
122 - --> 118 + -->
123 119
124 <fieldType name="mtas_text" class="solr.TextField" postingsFormat="MtasCodec"> 120 <fieldType name="mtas_text" class="solr.TextField" postingsFormat="MtasCodec">
125 <analyzer type="index"> 121 <analyzer type="index">
junit/mtas/parser/MtasCQLParserTestSentence.java
@@ -29,11 +29,11 @@ public class MtasCQLParserTestSentence { @@ -29,11 +29,11 @@ public class MtasCQLParserTestSentence {
29 basicTests(); 29 basicTests();
30 } 30 }
31 31
32 - private void testCQLParse(String field, String cql, SpanQuery q) { 32 + private void testCQLParse(String field, String defaultPrefix, String cql, SpanQuery q) {
33 MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql))); 33 MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql)));
34 try { 34 try {
35 System.out.print("CQL parsing:\t"+cql); 35 System.out.print("CQL parsing:\t"+cql);
36 - assertEquals(p.parse(field) ,q); 36 + assertEquals(p.parse(field, defaultPrefix) ,q);
37 System.out.print("\n"); 37 System.out.print("\n");
38 } catch (ParseException e) { 38 } catch (ParseException e) {
39 System.out.println("Error CQL parsing:\t"+cql); 39 System.out.println("Error CQL parsing:\t"+cql);
@@ -41,12 +41,12 @@ public class MtasCQLParserTestSentence { @@ -41,12 +41,12 @@ public class MtasCQLParserTestSentence {
41 } 41 }
42 } 42 }
43 43
44 - private void testCQLEquivalent(String field, String cql1, String cql2) { 44 + private void testCQLEquivalent(String field, String defaultPrefix, String cql1, String cql2) {
45 MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1))); 45 MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1)));
46 MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2))); 46 MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2)));
47 try { 47 try {
48 System.out.print("CQL equivalent:\t"+cql1+" and "+cql2); 48 System.out.print("CQL equivalent:\t"+cql1+" and "+cql2);
49 - assertEquals(p1.parse(field) ,p2.parse(field)); 49 + assertEquals(p1.parse(field, defaultPrefix) ,p2.parse(field, defaultPrefix));
50 System.out.print("\n"); 50 System.out.print("\n");
51 } catch (ParseException e) { 51 } catch (ParseException e) {
52 System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2); 52 System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2);
@@ -73,6 +73,7 @@ public class MtasCQLParserTestSentence { @@ -73,6 +73,7 @@ public class MtasCQLParserTestSentence {
73 basicTest16(); 73 basicTest16();
74 basicTest17(); 74 basicTest17();
75 basicTest18(); 75 basicTest18();
  76 + basicTest19();
76 } 77 }
77 78
78 private void basicTest1() { 79 private void basicTest1() {
@@ -84,14 +85,14 @@ public class MtasCQLParserTestSentence { @@ -84,14 +85,14 @@ public class MtasCQLParserTestSentence {
84 items.add(new MtasSpanSequenceItem(q1, false)); 85 items.add(new MtasSpanSequenceItem(q1, false));
85 items.add(new MtasSpanSequenceItem(q2, false)); 86 items.add(new MtasSpanSequenceItem(q2, false));
86 SpanQuery q = new MtasSpanSequenceQuery(items); 87 SpanQuery q = new MtasSpanSequenceQuery(items);
87 - testCQLParse(field, cql, q); 88 + testCQLParse(field, null, cql, q);
88 } 89 }
89 90
90 private void basicTest2() { 91 private void basicTest2() {
91 String field = "testveld"; 92 String field = "testveld";
92 String cql1 = "[pos=\"LID\"] [] []? [] [lemma=\"koe\"]"; 93 String cql1 = "[pos=\"LID\"] [] []? [] [lemma=\"koe\"]";
93 String cql2 = "[pos=\"LID\"] []{2,3} [lemma=\"koe\"]"; 94 String cql2 = "[pos=\"LID\"] []{2,3} [lemma=\"koe\"]";
94 - testCQLEquivalent(field, cql1, cql2); 95 + testCQLEquivalent(field, null, cql1, cql2);
95 } 96 }
96 97
97 private void basicTest3() { 98 private void basicTest3() {
@@ -100,7 +101,7 @@ public class MtasCQLParserTestSentence { @@ -100,7 +101,7 @@ public class MtasCQLParserTestSentence {
100 SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID"); 101 SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
101 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe"); 102 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe");
102 SpanQuery q = new MtasSpanOrQuery(q1,q2); 103 SpanQuery q = new MtasSpanOrQuery(q1,q2);
103 - testCQLParse(field, cql, q); 104 + testCQLParse(field, null, cql, q);
104 } 105 }
105 106
106 private void basicTest4() { 107 private void basicTest4() {
@@ -114,28 +115,28 @@ public class MtasCQLParserTestSentence { @@ -114,28 +115,28 @@ public class MtasCQLParserTestSentence {
114 items.add(new MtasSpanSequenceItem(q3, false)); 115 items.add(new MtasSpanSequenceItem(q3, false));
115 SpanQuery q4 = new MtasSpanSequenceQuery(items); 116 SpanQuery q4 = new MtasSpanSequenceQuery(items);
116 SpanQuery q = new MtasSpanOrQuery(q1,q4); 117 SpanQuery q = new MtasSpanOrQuery(q1,q4);
117 - testCQLParse(field, cql, q); 118 + testCQLParse(field, null, cql, q);
118 } 119 }
119 120
120 private void basicTest5() { 121 private void basicTest5() {
121 String field = "testveld"; 122 String field = "testveld";
122 String cql1 = "([pos=\"LID\"]([pos=\"ADJ\"][lemma=\"koe\"]))"; 123 String cql1 = "([pos=\"LID\"]([pos=\"ADJ\"][lemma=\"koe\"]))";
123 String cql2 = "[pos=\"LID\"][pos=\"ADJ\"][lemma=\"koe\"]"; 124 String cql2 = "[pos=\"LID\"][pos=\"ADJ\"][lemma=\"koe\"]";
124 - testCQLEquivalent(field, cql1, cql2); 125 + testCQLEquivalent(field, null, cql1, cql2);
125 } 126 }
126 127
127 private void basicTest6() { 128 private void basicTest6() {
128 String field = "testveld"; 129 String field = "testveld";
129 String cql1 = "([pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"])|([pos=\"ADJ\"]|([lemma=\"het\"]([lemma=\"paard\"])))"; 130 String cql1 = "([pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"])|([pos=\"ADJ\"]|([lemma=\"het\"]([lemma=\"paard\"])))";
130 String cql2 = "[pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"]|[pos=\"ADJ\"]|[lemma=\"het\"][lemma=\"paard\"]"; 131 String cql2 = "[pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"]|[pos=\"ADJ\"]|[lemma=\"het\"][lemma=\"paard\"]";
131 - testCQLEquivalent(field, cql1, cql2); 132 + testCQLEquivalent(field, null, cql1, cql2);
132 } 133 }
133 134
134 private void basicTest7() { 135 private void basicTest7() {
135 String field = "testveld"; 136 String field = "testveld";
136 String cql1 = "[pos=\"LID\"] []{0,1} []{3,5} []{2,4}"; 137 String cql1 = "[pos=\"LID\"] []{0,1} []{3,5} []{2,4}";
137 String cql2 = "[pos=\"LID\"] []{5,10}"; 138 String cql2 = "[pos=\"LID\"] []{5,10}";
138 - testCQLEquivalent(field, cql1, cql2); 139 + testCQLEquivalent(field, null, cql1, cql2);
139 } 140 }
140 141
141 private void basicTest8() { 142 private void basicTest8() {
@@ -149,7 +150,7 @@ public class MtasCQLParserTestSentence { @@ -149,7 +150,7 @@ public class MtasCQLParserTestSentence {
149 items.add(new MtasSpanSequenceItem(q1, false)); 150 items.add(new MtasSpanSequenceItem(q1, false));
150 items.add(new MtasSpanSequenceItem(q4, false)); 151 items.add(new MtasSpanSequenceItem(q4, false));
151 SpanQuery q = new MtasSpanSequenceQuery(items); 152 SpanQuery q = new MtasSpanSequenceQuery(items);
152 - testCQLParse(field, cql, q); 153 + testCQLParse(field, null, cql, q);
153 } 154 }
154 155
155 private void basicTest9() { 156 private void basicTest9() {
@@ -165,7 +166,7 @@ public class MtasCQLParserTestSentence { @@ -165,7 +166,7 @@ public class MtasCQLParserTestSentence {
165 items.add(new MtasSpanSequenceItem(q5, false)); 166 items.add(new MtasSpanSequenceItem(q5, false));
166 items.add(new MtasSpanSequenceItem(q4, false)); 167 items.add(new MtasSpanSequenceItem(q4, false));
167 SpanQuery q = new MtasSpanSequenceQuery(items); 168 SpanQuery q = new MtasSpanSequenceQuery(items);
168 - testCQLParse(field, cql, q); 169 + testCQLParse(field, null, cql, q);
169 } 170 }
170 171
171 private void basicTest10() { 172 private void basicTest10() {
@@ -179,7 +180,7 @@ public class MtasCQLParserTestSentence { @@ -179,7 +180,7 @@ public class MtasCQLParserTestSentence {
179 items.add(new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q2,1,3), false)); 180 items.add(new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q2,1,3), false));
180 items.add(new MtasSpanSequenceItem(q3, false)); 181 items.add(new MtasSpanSequenceItem(q3, false));
181 SpanQuery q = new MtasSpanSequenceQuery(items); 182 SpanQuery q = new MtasSpanSequenceQuery(items);
182 - testCQLParse(field, cql, q); 183 + testCQLParse(field, null, cql, q);
183 } 184 }
184 185
185 private void basicTest11() { 186 private void basicTest11() {
@@ -188,7 +189,7 @@ public class MtasCQLParserTestSentence { @@ -188,7 +189,7 @@ public class MtasCQLParserTestSentence {
188 SpanQuery q1 = new MtasCQLParserGroupQuery(field,"sentence"); 189 SpanQuery q1 = new MtasCQLParserGroupQuery(field,"sentence");
189 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe"); 190 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe");
190 SpanQuery q = new SpanContainingQuery(q1, q2); 191 SpanQuery q = new SpanContainingQuery(q1, q2);
191 - testCQLParse(field, cql, q); 192 + testCQLParse(field, null, cql, q);
192 } 193 }
193 194
194 private void basicTest12() { 195 private void basicTest12() {
@@ -197,7 +198,7 @@ public class MtasCQLParserTestSentence { @@ -197,7 +198,7 @@ public class MtasCQLParserTestSentence {
197 SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe"); 198 SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
198 SpanQuery q2 = new MtasCQLParserGroupQuery(field,"sentence"); 199 SpanQuery q2 = new MtasCQLParserGroupQuery(field,"sentence");
199 SpanQuery q = new SpanWithinQuery(q2, q1); 200 SpanQuery q = new SpanWithinQuery(q2, q1);
200 - testCQLParse(field, cql, q); 201 + testCQLParse(field, null, cql, q);
201 } 202 }
202 203
203 private void basicTest13() { 204 private void basicTest13() {
@@ -211,7 +212,7 @@ public class MtasCQLParserTestSentence { @@ -211,7 +212,7 @@ public class MtasCQLParserTestSentence {
211 items.add(new MtasSpanSequenceItem(q1, false)); 212 items.add(new MtasSpanSequenceItem(q1, false));
212 items.add(new MtasSpanSequenceItem(q4, false)); 213 items.add(new MtasSpanSequenceItem(q4, false));
213 SpanQuery q = new MtasSpanSequenceQuery(items); 214 SpanQuery q = new MtasSpanSequenceQuery(items);
214 - testCQLParse(field, cql, q); 215 + testCQLParse(field, null, cql, q);
215 } 216 }
216 217
217 private void basicTest14() { 218 private void basicTest14() {
@@ -225,7 +226,7 @@ public class MtasCQLParserTestSentence { @@ -225,7 +226,7 @@ public class MtasCQLParserTestSentence {
225 items.add(new MtasSpanSequenceItem(q3, false)); 226 items.add(new MtasSpanSequenceItem(q3, false));
226 items.add(new MtasSpanSequenceItem(q4, false)); 227 items.add(new MtasSpanSequenceItem(q4, false));
227 SpanQuery q = new MtasSpanSequenceQuery(items); 228 SpanQuery q = new MtasSpanSequenceQuery(items);
228 - testCQLParse(field, cql, q); 229 + testCQLParse(field, null, cql, q);
229 } 230 }
230 231
231 private void basicTest15() { 232 private void basicTest15() {
@@ -246,7 +247,7 @@ public class MtasCQLParserTestSentence { @@ -246,7 +247,7 @@ public class MtasCQLParserTestSentence {
246 items2.add(new MtasSpanSequenceItem(q1, false)); 247 items2.add(new MtasSpanSequenceItem(q1, false));
247 items2.add(new MtasSpanSequenceItem(q8, false)); 248 items2.add(new MtasSpanSequenceItem(q8, false));
248 SpanQuery q = new MtasSpanSequenceQuery(items2); 249 SpanQuery q = new MtasSpanSequenceQuery(items2);
249 - testCQLParse(field, cql, q); 250 + testCQLParse(field, null, cql, q);
250 } 251 }
251 252
252 private void basicTest16() { 253 private void basicTest16() {
@@ -258,7 +259,7 @@ public class MtasCQLParserTestSentence { @@ -258,7 +259,7 @@ public class MtasCQLParserTestSentence {
258 SpanQuery q4 = new SpanContainingQuery(q2, q3); 259 SpanQuery q4 = new SpanContainingQuery(q2, q3);
259 SpanQuery q5 = new SpanWithinQuery(q4, q1); 260 SpanQuery q5 = new SpanWithinQuery(q4, q1);
260 SpanQuery q = new SpanNotQuery(q5,new SpanContainingQuery(q5, q3)); 261 SpanQuery q = new SpanNotQuery(q5,new SpanContainingQuery(q5, q3));
261 - testCQLParse(field, cql, q); 262 + testCQLParse(field, null, cql, q);
262 } 263 }
263 264
264 private void basicTest17() { 265 private void basicTest17() {
@@ -271,11 +272,23 @@ public class MtasCQLParserTestSentence { @@ -271,11 +272,23 @@ public class MtasCQLParserTestSentence {
271 items.add(new MtasSpanSequenceItem(q2, false)); 272 items.add(new MtasSpanSequenceItem(q2, false));
272 items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false)); 273 items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false));
273 SpanQuery q = new MtasSpanSequenceQuery(items); 274 SpanQuery q = new MtasSpanSequenceQuery(items);
274 - testCQLParse(field, cql, q); 275 + testCQLParse(field, null, cql, q);
275 } 276 }
276 277
277 private void basicTest18() { 278 private void basicTest18() {
278 String field = "testveld"; 279 String field = "testveld";
  280 + String cql = "\"de\" [pos=\"N\"]";
  281 + SpanQuery q1 = new MtasCQLParserWordQuery(field,"t_lc","de");
  282 + SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N");
  283 + List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
  284 + items.add(new MtasSpanSequenceItem(q1, false));
  285 + items.add(new MtasSpanSequenceItem(q2, false));
  286 + SpanQuery q = new MtasSpanSequenceQuery(items);
  287 + testCQLParse(field, "t_lc", cql, q);
  288 + }
  289 +
  290 + private void basicTest19() {
  291 + String field = "testveld";
279 String cql = "([]<entity=\"loc\"/>{1,2}[]){3,4}"; 292 String cql = "([]<entity=\"loc\"/>{1,2}[]){3,4}";
280 SpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc"); 293 SpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc");
281 SpanQuery q2 = new MtasSpanRecurrenceQuery(q1,1,2); 294 SpanQuery q2 = new MtasSpanRecurrenceQuery(q1,1,2);
@@ -285,7 +298,7 @@ public class MtasCQLParserTestSentence { @@ -285,7 +298,7 @@ public class MtasCQLParserTestSentence {
285 items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false)); 298 items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false));
286 SpanQuery q3 = new MtasSpanSequenceQuery(items); 299 SpanQuery q3 = new MtasSpanSequenceQuery(items);
287 SpanQuery q = new MtasSpanRecurrenceQuery(q3,3,4); 300 SpanQuery q = new MtasSpanRecurrenceQuery(q3,3,4);
288 - testCQLParse(field, cql, q); 301 + testCQLParse(field, null, cql, q);
289 } 302 }
290 303
291 } 304 }
junit/mtas/parser/MtasCQLParserTestWord.java
@@ -23,10 +23,10 @@ public class MtasCQLParserTestWord { @@ -23,10 +23,10 @@ public class MtasCQLParserTestWord {
23 basicNotTests(); 23 basicNotTests();
24 } 24 }
25 25
26 - private void testCQLParse(String field, String cql, SpanQuery q) { 26 + private void testCQLParse(String field, String defaultPrefix, String cql, SpanQuery q) {
27 MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql))); 27 MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql)));
28 try { 28 try {
29 - assertEquals(p.parse(field) ,q); 29 + assertEquals(p.parse(field, defaultPrefix) ,q);
30 System.out.println("Tested CQL parsing:\t"+cql); 30 System.out.println("Tested CQL parsing:\t"+cql);
31 } catch (ParseException e) { 31 } catch (ParseException e) {
32 System.out.println("Error CQL parsing:\t"+cql); 32 System.out.println("Error CQL parsing:\t"+cql);
@@ -34,11 +34,11 @@ public class MtasCQLParserTestWord { @@ -34,11 +34,11 @@ public class MtasCQLParserTestWord {
34 } 34 }
35 } 35 }
36 36
37 - private void testCQLEquivalent(String field, String cql1, String cql2) { 37 + private void testCQLEquivalent(String field, String defaultPrefix, String cql1, String cql2) {
38 MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1))); 38 MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1)));
39 MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2))); 39 MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2)));
40 try { 40 try {
41 - assertEquals(p1.parse(field) ,p2.parse(field)); 41 + assertEquals(p1.parse(field, defaultPrefix) ,p2.parse(field, defaultPrefix));
42 System.out.println("Tested CQL equivalent:\t"+cql1+" and "+cql2); 42 System.out.println("Tested CQL equivalent:\t"+cql1+" and "+cql2);
43 } catch (ParseException e) { 43 } catch (ParseException e) {
44 System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2); 44 System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2);
@@ -67,6 +67,7 @@ public class MtasCQLParserTestWord { @@ -67,6 +67,7 @@ public class MtasCQLParserTestWord {
67 basicTest10(); 67 basicTest10();
68 basicTest11(); 68 basicTest11();
69 basicTest12(); 69 basicTest12();
  70 + basicTest13();
70 } 71 }
71 72
72 private void basicNotTest1() { 73 private void basicNotTest1() {
@@ -75,14 +76,14 @@ public class MtasCQLParserTestWord { @@ -75,14 +76,14 @@ public class MtasCQLParserTestWord {
75 SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID"); 76 SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
76 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","de"); 77 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","de");
77 SpanQuery q = new SpanNotQuery(q1,q2); 78 SpanQuery q = new SpanNotQuery(q1,q2);
78 - testCQLParse(field, cql, q); 79 + testCQLParse(field, null, cql, q);
79 } 80 }
80 81
81 private void basicNotTest2() { 82 private void basicNotTest2() {
82 String field = "testveld"; 83 String field = "testveld";
83 String cql1 = "[pos=\"LID\" & (!lemma=\"de\")]"; 84 String cql1 = "[pos=\"LID\" & (!lemma=\"de\")]";
84 String cql2 = "[pos=\"LID\" & !(lemma=\"de\")]"; 85 String cql2 = "[pos=\"LID\" & !(lemma=\"de\")]";
85 - testCQLEquivalent(field, cql1, cql2); 86 + testCQLEquivalent(field, null, cql1, cql2);
86 } 87 }
87 88
88 private void basicNotTest3() { 89 private void basicNotTest3() {
@@ -93,28 +94,28 @@ public class MtasCQLParserTestWord { @@ -93,28 +94,28 @@ public class MtasCQLParserTestWord {
93 SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","een"); 94 SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","een");
94 SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3}); 95 SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3});
95 SpanQuery q = new SpanNotQuery(q1,q4); 96 SpanQuery q = new SpanNotQuery(q1,q4);
96 - testCQLParse(field, cql, q); 97 + testCQLParse(field, null, cql, q);
97 } 98 }
98 99
99 private void basicNotTest4() { 100 private void basicNotTest4() {
100 String field = "testveld"; 101 String field = "testveld";
101 String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]"; 102 String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]";
102 String cql2 = "[pos=\"LID\" & (!lemma=\"de\" & !lemma=\"een\")]"; 103 String cql2 = "[pos=\"LID\" & (!lemma=\"de\" & !lemma=\"een\")]";
103 - testCQLEquivalent(field, cql1, cql2); 104 + testCQLEquivalent(field, null, cql1, cql2);
104 } 105 }
105 106
106 private void basicNotTest5() { 107 private void basicNotTest5() {
107 String field = "testveld"; 108 String field = "testveld";
108 String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]"; 109 String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]";
109 String cql2 = "[pos=\"LID\" & !lemma=\"de\" & !lemma=\"een\"]"; 110 String cql2 = "[pos=\"LID\" & !lemma=\"de\" & !lemma=\"een\"]";
110 - testCQLEquivalent(field, cql1, cql2); 111 + testCQLEquivalent(field, null, cql1, cql2);
111 } 112 }
112 113
113 private void basicTest1() { 114 private void basicTest1() {
114 String field = "testveld"; 115 String field = "testveld";
115 String cql = "[lemma=\"koe\"]"; 116 String cql = "[lemma=\"koe\"]";
116 SpanQuery q = new MtasCQLParserWordQuery(field, "lemma", "koe"); 117 SpanQuery q = new MtasCQLParserWordQuery(field, "lemma", "koe");
117 - testCQLParse(field, cql, q); 118 + testCQLParse(field, null, cql, q);
118 } 119 }
119 120
120 private void basicTest2() { 121 private void basicTest2() {
@@ -123,7 +124,7 @@ public class MtasCQLParserTestWord { @@ -123,7 +124,7 @@ public class MtasCQLParserTestWord {
123 SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe"); 124 SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
124 SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N"); 125 SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N");
125 SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q2}); 126 SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q2});
126 - testCQLParse(field, cql, q); 127 + testCQLParse(field, null, cql, q);
127 } 128 }
128 129
129 private void basicTest3() { 130 private void basicTest3() {
@@ -132,14 +133,14 @@ public class MtasCQLParserTestWord { @@ -132,14 +133,14 @@ public class MtasCQLParserTestWord {
132 SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe"); 133 SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
133 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","paard"); 134 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","paard");
134 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2}); 135 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2});
135 - testCQLParse(field, cql, q); 136 + testCQLParse(field, null, cql, q);
136 } 137 }
137 138
138 private void basicTest4() { 139 private void basicTest4() {
139 String field = "testveld"; 140 String field = "testveld";
140 String cql1 = "[lemma=\"koe\" | lemma=\"paard\"]"; 141 String cql1 = "[lemma=\"koe\" | lemma=\"paard\"]";
141 String cql2 = "[(lemma=\"koe\" | lemma=\"paard\")]"; 142 String cql2 = "[(lemma=\"koe\" | lemma=\"paard\")]";
142 - testCQLEquivalent(field, cql1, cql2); 143 + testCQLEquivalent(field, null, cql1, cql2);
143 } 144 }
144 145
145 private void basicTest5() { 146 private void basicTest5() {
@@ -150,7 +151,7 @@ public class MtasCQLParserTestWord { @@ -150,7 +151,7 @@ public class MtasCQLParserTestWord {
150 SpanQuery q3 = new MtasSpanOrQuery(new SpanQuery[]{q1,q2}); 151 SpanQuery q3 = new MtasSpanOrQuery(new SpanQuery[]{q1,q2});
151 SpanQuery q4 = new MtasCQLParserWordQuery(field,"pos","N"); 152 SpanQuery q4 = new MtasCQLParserWordQuery(field,"pos","N");
152 SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q3,q4}); 153 SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q3,q4});
153 - testCQLParse(field, cql, q); 154 + testCQLParse(field, null, cql, q);
154 } 155 }
155 156
156 private void basicTest6() { 157 private void basicTest6() {
@@ -161,7 +162,7 @@ public class MtasCQLParserTestWord { @@ -161,7 +162,7 @@ public class MtasCQLParserTestWord {
161 SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","paard"); 162 SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","paard");
162 SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3}); 163 SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3});
163 SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q4}); 164 SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q4});
164 - testCQLParse(field, cql, q); 165 + testCQLParse(field, null, cql, q);
165 } 166 }
166 167
167 private void basicTest7() { 168 private void basicTest7() {
@@ -172,7 +173,7 @@ public class MtasCQLParserTestWord { @@ -172,7 +173,7 @@ public class MtasCQLParserTestWord {
172 SpanQuery q3 = new MtasCQLParserWordQuery(field,"pos","N"); 173 SpanQuery q3 = new MtasCQLParserWordQuery(field,"pos","N");
173 SpanQuery q4 = new MtasSpanAndQuery(new SpanQuery[]{q2,q3}); 174 SpanQuery q4 = new MtasSpanAndQuery(new SpanQuery[]{q2,q3});
174 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q4}); 175 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q4});
175 - testCQLParse(field, cql, q); 176 + testCQLParse(field, null, cql, q);
176 } 177 }
177 178
178 private void basicTest8() { 179 private void basicTest8() {
@@ -185,7 +186,7 @@ public class MtasCQLParserTestWord { @@ -185,7 +186,7 @@ public class MtasCQLParserTestWord {
185 SpanQuery q5 = new MtasSpanAndQuery(new SpanQuery[]{q1,q2}); 186 SpanQuery q5 = new MtasSpanAndQuery(new SpanQuery[]{q1,q2});
186 SpanQuery q6 = new MtasSpanAndQuery(new SpanQuery[]{q3,q4}); 187 SpanQuery q6 = new MtasSpanAndQuery(new SpanQuery[]{q3,q4});
187 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q5,q6}); 188 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q5,q6});
188 - testCQLParse(field, cql, q); 189 + testCQLParse(field, null, cql, q);
189 } 190 }
190 191
191 private void basicTest9() { 192 private void basicTest9() {
@@ -200,7 +201,7 @@ public class MtasCQLParserTestWord { @@ -200,7 +201,7 @@ public class MtasCQLParserTestWord {
200 SpanQuery q7 = new MtasSpanAndQuery(new SpanQuery[]{q6,q3}); 201 SpanQuery q7 = new MtasSpanAndQuery(new SpanQuery[]{q6,q3});
201 SpanQuery q8 = new MtasSpanAndQuery(new SpanQuery[]{q4,q5}); 202 SpanQuery q8 = new MtasSpanAndQuery(new SpanQuery[]{q4,q5});
202 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q7,q8}); 203 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q7,q8});
203 - testCQLParse(field, cql, q); 204 + testCQLParse(field, null, cql, q);
204 } 205 }
205 206
206 private void basicTest10() { 207 private void basicTest10() {
@@ -217,22 +218,22 @@ public class MtasCQLParserTestWord { @@ -217,22 +218,22 @@ public class MtasCQLParserTestWord {
217 SpanQuery q9 = new MtasSpanOrQuery(new SpanQuery[]{q4,q5}); 218 SpanQuery q9 = new MtasSpanOrQuery(new SpanQuery[]{q4,q5});
218 SpanQuery q10 = new MtasSpanAndQuery(new SpanQuery[]{q9,q6}); 219 SpanQuery q10 = new MtasSpanAndQuery(new SpanQuery[]{q9,q6});
219 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q8,q10}); 220 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q8,q10});
220 - testCQLParse(field, cql, q); 221 + testCQLParse(field, null, cql, q);
221 } 222 }
222 223
223 private void basicTest11() { 224 private void basicTest11() {
224 String field = "testveld"; 225 String field = "testveld";
225 String cql1 = "[#300]"; 226 String cql1 = "[#300]";
226 SpanQuery q1 = new MtasCQLParserWordPositionQuery(field, 300); 227 SpanQuery q1 = new MtasCQLParserWordPositionQuery(field, 300);
227 - testCQLParse(field, cql1, q1); 228 + testCQLParse(field, null, cql1, q1);
228 String cql2 = "[#100-110]"; 229 String cql2 = "[#100-110]";
229 SpanQuery q2 = new MtasCQLParserWordPositionQuery(field, 100, 110); 230 SpanQuery q2 = new MtasCQLParserWordPositionQuery(field, 100, 110);
230 - testCQLParse(field, cql2, q2); 231 + testCQLParse(field, null, cql2, q2);
231 String cql3 = "[#100-105 | #110]"; 232 String cql3 = "[#100-105 | #110]";
232 SpanQuery q3a = new MtasCQLParserWordPositionQuery(field, 100, 105); 233 SpanQuery q3a = new MtasCQLParserWordPositionQuery(field, 100, 105);
233 SpanQuery q3b = new MtasCQLParserWordPositionQuery(field, 110); 234 SpanQuery q3b = new MtasCQLParserWordPositionQuery(field, 110);
234 SpanQuery q3 = new MtasSpanOrQuery(q3a, q3b); 235 SpanQuery q3 = new MtasSpanOrQuery(q3a, q3b);
235 - testCQLParse(field, cql3, q3); 236 + testCQLParse(field, null, cql3, q3);
236 } 237 }
237 238
238 private void basicTest12() { 239 private void basicTest12() {
@@ -242,6 +243,13 @@ public class MtasCQLParserTestWord { @@ -242,6 +243,13 @@ public class MtasCQLParserTestWord {
242 SpanQuery q2 = new MtasCQLParserWordQuery(field,"t_lc","het"); 243 SpanQuery q2 = new MtasCQLParserWordQuery(field,"t_lc","het");
243 SpanQuery q3 = new MtasCQLParserWordQuery(field,"t_lc","paard"); 244 SpanQuery q3 = new MtasCQLParserWordQuery(field,"t_lc","paard");
244 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2,q3}); 245 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2,q3});
245 - testCQLParse(field, cql, q);  
246 - } 246 + testCQLParse(field, null, cql, q);
  247 + }
  248 +
  249 + private void basicTest13() {
  250 + String field = "testveld";
  251 + String cql = "\"de\"";
  252 + SpanQuery q = new MtasCQLParserWordQuery(field,"t_lc","de");
  253 + testCQLParse(field, "t_lc", cql, q);
  254 + }
247 } 255 }
@@ -2,11 +2,13 @@ @@ -2,11 +2,13 @@
2 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 2 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3 <properties> 3 <properties>
4 <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> 4 <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  5 + <currentDevelopmentVersion>6.2.0</currentDevelopmentVersion>
  6 + <currentDevelopmentRelease>20160802</currentDevelopmentRelease>
5 </properties> 7 </properties>
6 <modelVersion>4.0.0</modelVersion> 8 <modelVersion>4.0.0</modelVersion>
7 <groupId>dev.meertens.mtas</groupId> 9 <groupId>dev.meertens.mtas</groupId>
8 <artifactId>mtas</artifactId> 10 <artifactId>mtas</artifactId>
9 - <version>6.1.0</version> 11 + <version>6.2.0</version>
10 <packaging>jar</packaging> 12 <packaging>jar</packaging>
11 <licenses> 13 <licenses>
12 <license> 14 <license>
@@ -23,12 +25,12 @@ @@ -23,12 +25,12 @@
23 <developers> 25 <developers>
24 <developer> 26 <developer>
25 <name>Matthijs Brouwer</name> 27 <name>Matthijs Brouwer</name>
26 - <url>https://www.meertens.knaw.nl/cms/nl/medewerkers/144373-matthijsb</url> 28 + <url>https://nl.linkedin.com/in/brouwermatthijs/</url>
27 </developer> 29 </developer>
28 <developer> 30 <developer>
29 - <name>Marc Kemps-Snijders</name>  
30 - <url>https://www.meertens.knaw.nl/cms/nl/medewerkers/143329-marck</url>  
31 - </developer> 31 + <name>Marc Kemps-Snijders</name>
  32 + <url>https://nl.linkedin.com/in/marc-kemps-snijders-1b33753</url>
  33 + </developer>
32 </developers> 34 </developers>
33 <build> 35 <build>
34 <sourceDirectory>src</sourceDirectory> 36 <sourceDirectory>src</sourceDirectory>
@@ -39,6 +41,24 @@ @@ -39,6 +41,24 @@
39 </resources> 41 </resources>
40 <plugins> 42 <plugins>
41 <plugin> 43 <plugin>
  44 + <artifactId>maven-clean-plugin</artifactId>
  45 + <version>3.0.0</version>
  46 + <configuration>
  47 + <filesets>
  48 + <fileset>
  49 + <directory>gh-pages</directory>
  50 + <includes>
  51 + <include>**/*</include>
  52 + </includes>
  53 + <excludes>
  54 + <exclude>**/.git/</exclude>
  55 + </excludes>
  56 + <followSymlinks>false</followSymlinks>
  57 + </fileset>
  58 + </filesets>
  59 + </configuration>
  60 + </plugin>
  61 + <plugin>
42 <groupId>org.apache.maven.plugins</groupId> 62 <groupId>org.apache.maven.plugins</groupId>
43 <artifactId>maven-compiler-plugin</artifactId> 63 <artifactId>maven-compiler-plugin</artifactId>
44 <version>3.5.1</version> 64 <version>3.5.1</version>
@@ -46,7 +66,7 @@ @@ -46,7 +66,7 @@
46 <source>1.8</source> 66 <source>1.8</source>
47 <target>1.8</target> 67 <target>1.8</target>
48 </configuration> 68 </configuration>
49 - </plugin> 69 + </plugin>
50 <plugin> 70 <plugin>
51 <groupId>org.apache.maven.plugins</groupId> 71 <groupId>org.apache.maven.plugins</groupId>
52 <artifactId>maven-site-plugin</artifactId> 72 <artifactId>maven-site-plugin</artifactId>
@@ -145,27 +165,27 @@ @@ -145,27 +165,27 @@
145 <dependency> 165 <dependency>
146 <groupId>org.apache.lucene</groupId> 166 <groupId>org.apache.lucene</groupId>
147 <artifactId>lucene-core</artifactId> 167 <artifactId>lucene-core</artifactId>
148 - <version>6.1.0</version> 168 + <version>6.2.0</version>
149 </dependency> 169 </dependency>
150 <dependency> 170 <dependency>
151 <groupId>org.apache.lucene</groupId> 171 <groupId>org.apache.lucene</groupId>
152 <artifactId>lucene-analyzers-common</artifactId> 172 <artifactId>lucene-analyzers-common</artifactId>
153 - <version>6.1.0</version> 173 + <version>6.2.0</version>
154 </dependency> 174 </dependency>
155 <dependency> 175 <dependency>
156 <groupId>org.apache.lucene</groupId> 176 <groupId>org.apache.lucene</groupId>
157 <artifactId>lucene-queryparser</artifactId> 177 <artifactId>lucene-queryparser</artifactId>
158 - <version>6.1.0</version> 178 + <version>6.2.0</version>
159 </dependency> 179 </dependency>
160 <dependency> 180 <dependency>
161 <groupId>org.apache.lucene</groupId> 181 <groupId>org.apache.lucene</groupId>
162 <artifactId>lucene-codecs</artifactId> 182 <artifactId>lucene-codecs</artifactId>
163 - <version>6.1.0</version> 183 + <version>6.2.0</version>
164 </dependency> 184 </dependency>
165 <dependency> 185 <dependency>
166 <groupId>org.apache.solr</groupId> 186 <groupId>org.apache.solr</groupId>
167 <artifactId>solr-core</artifactId> 187 <artifactId>solr-core</artifactId>
168 - <version>6.1.0</version> 188 + <version>6.2.0</version>
169 </dependency> 189 </dependency>
170 <dependency> 190 <dependency>
171 <groupId>org.apache.commons</groupId> 191 <groupId>org.apache.commons</groupId>
src/mtas/analysis/MtasTokenizer.java
@@ -25,8 +25,11 @@ import org.apache.lucene.util.AttributeFactory; @@ -25,8 +25,11 @@ import org.apache.lucene.util.AttributeFactory;
25 25
26 /** 26 /**
27 * The Class MtasTokenizer. 27 * The Class MtasTokenizer.
  28 + *
  29 + * @param <T>
  30 + * the generic type
28 */ 31 */
29 -public final class MtasTokenizer extends Tokenizer { 32 +public final class MtasTokenizer<T> extends Tokenizer {
30 33
31 /** The configuration mtas. */ 34 /** The configuration mtas. */
32 public static String CONFIGURATION_MTAS = "mtas"; 35 public static String CONFIGURATION_MTAS = "mtas";
@@ -73,7 +76,8 @@ public final class MtasTokenizer extends Tokenizer { @@ -73,7 +76,8 @@ public final class MtasTokenizer extends Tokenizer {
73 /** 76 /**
74 * Instantiates a new mtas tokenizer. 77 * Instantiates a new mtas tokenizer.
75 * 78 *
76 - * @param configFileName the config file name 79 + * @param configFileName
  80 + * the config file name
77 */ 81 */
78 public MtasTokenizer(String configFileName) { 82 public MtasTokenizer(String configFileName) {
79 readConfigurationFile(configFileName); 83 readConfigurationFile(configFileName);
@@ -82,8 +86,10 @@ public final class MtasTokenizer extends Tokenizer { @@ -82,8 +86,10 @@ public final class MtasTokenizer extends Tokenizer {
82 /** 86 /**
83 * Instantiates a new mtas tokenizer. 87 * Instantiates a new mtas tokenizer.
84 * 88 *
85 - * @param config the config  
86 - * @throws IOException Signals that an I/O exception has occurred. 89 + * @param config
  90 + * the config
  91 + * @throws IOException
  92 + * Signals that an I/O exception has occurred.
87 */ 93 */
88 public MtasTokenizer(MtasConfiguration config) throws IOException { 94 public MtasTokenizer(MtasConfiguration config) throws IOException {
89 processConfiguration(config); 95 processConfiguration(config);
@@ -92,8 +98,10 @@ public final class MtasTokenizer extends Tokenizer { @@ -92,8 +98,10 @@ public final class MtasTokenizer extends Tokenizer {
92 /** 98 /**
93 * Instantiates a new mtas tokenizer. 99 * Instantiates a new mtas tokenizer.
94 * 100 *
95 - * @param reader the reader  
96 - * @throws IOException Signals that an I/O exception has occurred. 101 + * @param reader
  102 + * the reader
  103 + * @throws IOException
  104 + * Signals that an I/O exception has occurred.
97 */ 105 */
98 public MtasTokenizer(InputStream reader) throws IOException { 106 public MtasTokenizer(InputStream reader) throws IOException {
99 processConfiguration(MtasConfiguration.readConfiguration(reader)); 107 processConfiguration(MtasConfiguration.readConfiguration(reader));
@@ -102,9 +110,12 @@ public final class MtasTokenizer extends Tokenizer { @@ -102,9 +110,12 @@ public final class MtasTokenizer extends Tokenizer {
102 /** 110 /**
103 * Instantiates a new mtas tokenizer. 111 * Instantiates a new mtas tokenizer.
104 * 112 *
105 - * @param factory the factory  
106 - * @param config the config  
107 - * @throws IOException Signals that an I/O exception has occurred. 113 + * @param factory
  114 + * the factory
  115 + * @param config
  116 + * the config
  117 + * @throws IOException
  118 + * Signals that an I/O exception has occurred.
108 */ 119 */
109 public MtasTokenizer(AttributeFactory factory, MtasConfiguration config) 120 public MtasTokenizer(AttributeFactory factory, MtasConfiguration config)
110 throws IOException { 121 throws IOException {
@@ -112,7 +123,9 @@ public final class MtasTokenizer extends Tokenizer { @@ -112,7 +123,9 @@ public final class MtasTokenizer extends Tokenizer {
112 processConfiguration(config); 123 processConfiguration(config);
113 } 124 }
114 125
115 - /* (non-Javadoc) 126 + /*
  127 + * (non-Javadoc)
  128 + *
116 * @see org.apache.lucene.analysis.TokenStream#incrementToken() 129 * @see org.apache.lucene.analysis.TokenStream#incrementToken()
117 */ 130 */
118 @Override 131 @Override
@@ -128,7 +141,7 @@ public final class MtasTokenizer extends Tokenizer { @@ -128,7 +141,7 @@ public final class MtasTokenizer extends Tokenizer {
128 // compute info 141 // compute info
129 positionIncrement = token.getPositionStart() - currentPosition; 142 positionIncrement = token.getPositionStart() - currentPosition;
130 currentPosition = token.getPositionStart(); 143 currentPosition = token.getPositionStart();
131 - payloadEncoder = new MtasPayloadEncoder(token, encodingFlags); 144 + payloadEncoder = new MtasPayloadEncoder(token, encodingFlags);
132 // set info 145 // set info
133 termAtt.append(token.getValue().toString()); 146 termAtt.append(token.getValue().toString());
134 positionIncrementAtt.setPositionIncrement(positionIncrement); 147 positionIncrementAtt.setPositionIncrement(positionIncrement);
@@ -157,7 +170,6 @@ public final class MtasTokenizer extends Tokenizer { @@ -157,7 +170,6 @@ public final class MtasTokenizer extends Tokenizer {
157 e.getClass().getSimpleName() + ": " + e.getMessage()); 170 e.getClass().getSimpleName() + ": " + e.getMessage());
158 } catch (MtasParserException e) { 171 } catch (MtasParserException e) {
159 tokenCollectionIterator = null; 172 tokenCollectionIterator = null;
160 - e.printStackTrace();  
161 throw new IOException( 173 throw new IOException(
162 e.getClass().getSimpleName() + ": " + e.getMessage()); 174 e.getClass().getSimpleName() + ": " + e.getMessage());
163 } 175 }
@@ -167,14 +179,19 @@ public final class MtasTokenizer extends Tokenizer { @@ -167,14 +179,19 @@ public final class MtasTokenizer extends Tokenizer {
167 /** 179 /**
168 * Prints the. 180 * Prints the.
169 * 181 *
170 - * @param r the r  
171 - * @throws IOException Signals that an I/O exception has occurred.  
172 - * @throws MtasParserException the mtas parser exception 182 + * @param r
  183 + * the r
  184 + * @throws IOException
  185 + * Signals that an I/O exception has occurred.
  186 + * @throws MtasParserException
  187 + * the mtas parser exception
173 */ 188 */
174 public void print(Reader r) throws IOException, MtasParserException { 189 public void print(Reader r) throws IOException, MtasParserException {
175 setReader(r); 190 setReader(r);
176 reset(); 191 reset();
177 - tokenCollection.print(); 192 + if (tokenCollection != null) {
  193 + tokenCollection.print();
  194 + }
178 end(); 195 end();
179 close(); 196 close();
180 } 197 }
@@ -182,10 +199,13 @@ public final class MtasTokenizer extends Tokenizer { @@ -182,10 +199,13 @@ public final class MtasTokenizer extends Tokenizer {
182 /** 199 /**
183 * Gets the list. 200 * Gets the list.
184 * 201 *
185 - * @param r the r 202 + * @param r
  203 + * the r
186 * @return the list 204 * @return the list
187 - * @throws IOException Signals that an I/O exception has occurred.  
188 - * @throws MtasParserException the mtas parser exception 205 + * @throws IOException
  206 + * Signals that an I/O exception has occurred.
  207 + * @throws MtasParserException
  208 + * the mtas parser exception
189 */ 209 */
190 public String[][] getList(Reader r) throws IOException, MtasParserException { 210 public String[][] getList(Reader r) throws IOException, MtasParserException {
191 setReader(r); 211 setReader(r);
@@ -199,9 +219,12 @@ public final class MtasTokenizer extends Tokenizer { @@ -199,9 +219,12 @@ public final class MtasTokenizer extends Tokenizer {
199 /** 219 /**
200 * Construct token collection. 220 * Construct token collection.
201 * 221 *
202 - * @param reader the reader  
203 - * @throws MtasConfigException the mtas config exception  
204 - * @throws MtasParserException the mtas parser exception 222 + * @param reader
  223 + * the reader
  224 + * @throws MtasConfigException
  225 + * the mtas config exception
  226 + * @throws MtasParserException
  227 + * the mtas parser exception
205 */ 228 */
206 private void constructTokenCollection(Reader reader) 229 private void constructTokenCollection(Reader reader)
207 throws MtasConfigException, MtasParserException { 230 throws MtasConfigException, MtasParserException {
@@ -216,29 +239,28 @@ public final class MtasTokenizer extends Tokenizer { @@ -216,29 +239,28 @@ public final class MtasTokenizer extends Tokenizer {
216 try { 239 try {
217 tokenCollection = parser.createTokenCollection(reader); 240 tokenCollection = parser.createTokenCollection(reader);
218 return; 241 return;
219 - } catch (MtasParserException e) { 242 + } catch (MtasParserException e) {
220 tokenCollection = new MtasTokenCollection(); 243 tokenCollection = new MtasTokenCollection();
221 - e.printStackTrace();  
222 throw new MtasParserException(e.getMessage()); 244 throw new MtasParserException(e.getMessage());
223 } 245 }
224 } else { 246 } else {
225 throw new MtasConfigException("no instance of MtasParser"); 247 throw new MtasConfigException("no instance of MtasParser");
226 } 248 }
227 } catch (NoSuchMethodException e) { 249 } catch (NoSuchMethodException e) {
228 - throw new MtasConfigException(e.getClass().getName()  
229 - + " : '" + e.getMessage() + "'"); 250 + throw new MtasConfigException(
  251 + e.getClass().getName() + " : '" + e.getMessage() + "'");
230 } catch (InvocationTargetException e) { 252 } catch (InvocationTargetException e) {
231 - throw new MtasConfigException(e.getClass().getName()  
232 - + " : '" + e.getMessage() + "'"); 253 + throw new MtasConfigException(
  254 + e.getClass().getName() + " : '" + e.getMessage() + "'");
233 } catch (IllegalAccessException e) { 255 } catch (IllegalAccessException e) {
234 - throw new MtasConfigException(e.getClass().getName()  
235 - + " : '" + e.getMessage() + "'"); 256 + throw new MtasConfigException(
  257 + e.getClass().getName() + " : '" + e.getMessage() + "'");
236 } catch (ClassNotFoundException e) { 258 } catch (ClassNotFoundException e) {
237 - throw new MtasConfigException(e.getClass().getName()  
238 - + " : '" + e.getMessage() + "'"); 259 + throw new MtasConfigException(
  260 + e.getClass().getName() + " : '" + e.getMessage() + "'");
239 } catch (InstantiationException e) { 261 } catch (InstantiationException e) {
240 - throw new MtasConfigException(e.getClass().getName()  
241 - + " : '" + e.getMessage() + "'"); 262 + throw new MtasConfigException(
  263 + e.getClass().getName() + " : '" + e.getMessage() + "'");
242 } 264 }
243 265
244 } 266 }
@@ -246,7 +268,8 @@ public final class MtasTokenizer extends Tokenizer { @@ -246,7 +268,8 @@ public final class MtasTokenizer extends Tokenizer {
246 /** 268 /**
247 * Read configuration file. 269 * Read configuration file.
248 * 270 *
249 - * @param configFile the config file 271 + * @param configFile
  272 + * the config file
250 */ 273 */
251 private void readConfigurationFile(String configFile) { 274 private void readConfigurationFile(String configFile) {
252 InputStream is; 275 InputStream is;
@@ -261,13 +284,13 @@ public final class MtasTokenizer extends Tokenizer { @@ -261,13 +284,13 @@ public final class MtasTokenizer extends Tokenizer {
261 } 284 }
262 } 285 }
263 286
264 -  
265 -  
266 /** 287 /**
267 * Process configuration. 288 * Process configuration.
268 * 289 *
269 - * @param config the config  
270 - * @throws IOException Signals that an I/O exception has occurred. 290 + * @param config
  291 + * the config
  292 + * @throws IOException
  293 + * Signals that an I/O exception has occurred.
271 */ 294 */
272 private void processConfiguration(MtasConfiguration config) 295 private void processConfiguration(MtasConfiguration config)
273 throws IOException { 296 throws IOException {
src/mtas/analysis/parser/MtasBasicParser.java
@@ -165,7 +165,8 @@ abstract public class MtasBasicParser extends MtasParser { @@ -165,7 +165,8 @@ abstract public class MtasBasicParser extends MtasParser {
165 /** 165 /**
166 * Instantiates a new mtas basic parser. 166 * Instantiates a new mtas basic parser.
167 * 167 *
168 - * @param config the config 168 + * @param config
  169 + * the config
169 */ 170 */
170 public MtasBasicParser(MtasConfiguration config) { 171 public MtasBasicParser(MtasConfiguration config) {
171 this.config = config; 172 this.config = config;
@@ -174,11 +175,16 @@ abstract public class MtasBasicParser extends MtasParser { @@ -174,11 +175,16 @@ abstract public class MtasBasicParser extends MtasParser {
174 /** 175 /**
175 * Compute mappings from object. 176 * Compute mappings from object.
176 * 177 *
177 - * @param object the object  
178 - * @param currentList the current list  
179 - * @param updateList the update list  
180 - * @throws MtasParserException the mtas parser exception  
181 - * @throws MtasConfigException the mtas config exception 178 + * @param object
  179 + * the object
  180 + * @param currentList
  181 + * the current list
  182 + * @param updateList
  183 + * the update list
  184 + * @throws MtasParserException
  185 + * the mtas parser exception
  186 + * @throws MtasConfigException
  187 + * the mtas config exception
182 */ 188 */
183 protected void computeMappingsFromObject(MtasParserObject object, 189 protected void computeMappingsFromObject(MtasParserObject object,
184 HashMap<String, ArrayList<MtasParserObject>> currentList, 190 HashMap<String, ArrayList<MtasParserObject>> currentList,
@@ -196,7 +202,6 @@ abstract public class MtasBasicParser extends MtasParser { @@ -196,7 +202,6 @@ abstract public class MtasBasicParser extends MtasParser {
196 updateList.get(UPDATE_TYPE_OFFSET).put(tokenId, object.getRefIds()); 202 updateList.get(UPDATE_TYPE_OFFSET).put(tokenId, object.getRefIds());
197 } 203 }
198 } 204 }
199 -  
200 for (MtasParserMapping<?> mapping : mappings) { 205 for (MtasParserMapping<?> mapping : mappings) {
201 try { 206 try {
202 if (mapping.getTokens().size() == 0) { 207 if (mapping.getTokens().size() == 0) {
@@ -271,9 +276,18 @@ abstract public class MtasBasicParser extends MtasParser { @@ -271,9 +276,18 @@ abstract public class MtasBasicParser extends MtasParser {
271 String checkType = object.objectType.getType(); 276 String checkType = object.objectType.getType();
272 // register id for update when parent is created 277 // register id for update when parent is created
273 if (currentList.get(checkType).size() > 0) { 278 if (currentList.get(checkType).size() > 0) {
274 - currentList.get(checkType)  
275 - .get(currentList.get(checkType).size() - 1)  
276 - .registerUpdateableMappingAtParent(token.getId()); 279 + if (currentList.get(checkType).contains(object)) {
  280 + int listPosition = currentList.get(checkType)
  281 + .indexOf(object);
  282 + if (listPosition > 0) {
  283 + currentList.get(checkType).get(listPosition - 1)
  284 + .registerUpdateableMappingAtParent(token.getId());
  285 + }
  286 + } else {
  287 + currentList.get(checkType)
  288 + .get(currentList.get(checkType).size() - 1)
  289 + .registerUpdateableMappingAtParent(token.getId());
  290 + }
277 // if no real ancestor, register id update when group 291 // if no real ancestor, register id update when group
278 // ancestor is created 292 // ancestor is created
279 } else if (currentList.get(MAPPING_TYPE_GROUP).size() > 0) { 293 } else if (currentList.get(MAPPING_TYPE_GROUP).size() > 0) {
@@ -287,7 +301,9 @@ abstract public class MtasBasicParser extends MtasParser { @@ -287,7 +301,9 @@ abstract public class MtasBasicParser extends MtasParser {
287 } 301 }
288 // update children 302 // update children
289 for (Integer tmpId : object.getUpdateableMappingsAsParent()) { 303 for (Integer tmpId : object.getUpdateableMappingsAsParent()) {
290 - tokenCollection.get(tmpId).setParentId(token.getId()); 304 + if (tokenCollection.get(tmpId) != null) {
  305 + tokenCollection.get(tmpId).setParentId(token.getId());
  306 + }
291 } 307 }
292 object.resetUpdateableMappingsAsParent(); 308 object.resetUpdateableMappingsAsParent();
293 // use own position 309 // use own position
@@ -372,10 +388,20 @@ abstract public class MtasBasicParser extends MtasParser { @@ -372,10 +388,20 @@ abstract public class MtasBasicParser extends MtasParser {
372 } 388 }
373 // copy remaining updateableMappings to new parent 389 // copy remaining updateableMappings to new parent
374 if (currentList.get(objectType.getType()).size() > 0) { 390 if (currentList.get(objectType.getType()).size() > 0) {
375 - currentList.get(objectType.getType())  
376 - .get(currentList.get(objectType.getType()).size() - 1)  
377 - .registerUpdateableMappingsAtParent(  
378 - object.getUpdateableMappingsAsParent()); 391 + if (currentList.get(objectType.getType()).contains(object)) {
  392 + int listPosition = currentList.get(objectType.getType())
  393 + .indexOf(object);
  394 + if (listPosition > 0) {
  395 + currentList.get(objectType.getType()).get(listPosition - 1)
  396 + .registerUpdateableMappingsAtParent(
  397 + object.getUpdateableMappingsAsParent());
  398 + }
  399 + } else {
  400 + currentList.get(objectType.getType())
  401 + .get(currentList.get(objectType.getType()).size() - 1)
  402 + .registerUpdateableMappingsAtParent(
  403 + object.getUpdateableMappingsAsParent());
  404 + }
379 } else if (currentList.get(MAPPING_TYPE_GROUP).size() > 0) { 405 } else if (currentList.get(MAPPING_TYPE_GROUP).size() > 0) {
380 currentList.get(MAPPING_TYPE_GROUP) 406 currentList.get(MAPPING_TYPE_GROUP)
381 .get(currentList.get(MAPPING_TYPE_GROUP).size() - 1) 407 .get(currentList.get(MAPPING_TYPE_GROUP).size() - 1)
@@ -392,9 +418,11 @@ abstract public class MtasBasicParser extends MtasParser { @@ -392,9 +418,11 @@ abstract public class MtasBasicParser extends MtasParser {
392 /** 418 /**
393 * Compute type from mapping source. 419 * Compute type from mapping source.
394 * 420 *
395 - * @param source the source 421 + * @param source
  422 + * the source
396 * @return the string 423 * @return the string
397 - * @throws MtasParserException the mtas parser exception 424 + * @throws MtasParserException
  425 + * the mtas parser exception
398 */ 426 */
399 private String computeTypeFromMappingSource(String source) 427 private String computeTypeFromMappingSource(String source)
400 throws MtasParserException { 428 throws MtasParserException {
@@ -423,11 +451,15 @@ abstract public class MtasBasicParser extends MtasParser { @@ -423,11 +451,15 @@ abstract public class MtasBasicParser extends MtasParser {
423 /** 451 /**
424 * Compute object from mapping value. 452 * Compute object from mapping value.
425 * 453 *
426 - * @param object the object  
427 - * @param mappingValue the mapping value  
428 - * @param currentList the current list 454 + * @param object
  455 + * the object
  456 + * @param mappingValue
  457 + * the mapping value
  458 + * @param currentList
  459 + * the current list
429 * @return the mtas parser object[] 460 * @return the mtas parser object[]
430 - * @throws MtasParserException the mtas parser exception 461 + * @throws MtasParserException
  462 + * the mtas parser exception
431 */ 463 */
432 private MtasParserObject[] computeObjectFromMappingValue( 464 private MtasParserObject[] computeObjectFromMappingValue(
433 MtasParserObject object, HashMap<String, String> mappingValue, 465 MtasParserObject object, HashMap<String, String> mappingValue,
@@ -469,12 +501,17 @@ abstract public class MtasBasicParser extends MtasParser { @@ -469,12 +501,17 @@ abstract public class MtasBasicParser extends MtasParser {
469 /** 501 /**
470 * Compute value from mapping values. 502 * Compute value from mapping values.
471 * 503 *
472 - * @param object the object  
473 - * @param mappingValues the mapping values  
474 - * @param currentList the current list 504 + * @param object
  505 + * the object
  506 + * @param mappingValues
  507 + * the mapping values
  508 + * @param currentList
  509 + * the current list
475 * @return the string[] 510 * @return the string[]
476 - * @throws MtasParserException the mtas parser exception  
477 - * @throws MtasConfigException the mtas config exception 511 + * @throws MtasParserException
  512 + * the mtas parser exception
  513 + * @throws MtasConfigException
  514 + * the mtas config exception
478 */ 515 */
479 private String[] computeValueFromMappingValues(MtasParserObject object, 516 private String[] computeValueFromMappingValues(MtasParserObject object,
480 ArrayList<HashMap<String, String>> mappingValues, 517 ArrayList<HashMap<String, String>> mappingValues,
@@ -486,8 +523,8 @@ abstract public class MtasBasicParser extends MtasParser { @@ -486,8 +523,8 @@ abstract public class MtasBasicParser extends MtasParser {
486 if (mappingValue.get("source").equals(MtasParserMapping.SOURCE_STRING)) { 523 if (mappingValue.get("source").equals(MtasParserMapping.SOURCE_STRING)) {
487 if (mappingValue.get("type") 524 if (mappingValue.get("type")
488 .equals(MtasParserMapping.PARSER_TYPE_STRING)) { 525 .equals(MtasParserMapping.PARSER_TYPE_STRING)) {
489 - String subvalue = computeFilteredPrefixedValue(mappingValue.get("type"),  
490 - mappingValue.get("text"), null, null); 526 + String subvalue = computeFilteredPrefixedValue(
  527 + mappingValue.get("type"), mappingValue.get("text"), null, null);
491 if (subvalue != null) { 528 if (subvalue != null) {
492 for (int i = 0; i < value.length; i++) { 529 for (int i = 0; i < value.length; i++) {
493 value[i] = value[i] + subvalue; 530 value[i] = value[i] + subvalue;
@@ -499,7 +536,7 @@ abstract public class MtasBasicParser extends MtasParser { @@ -499,7 +536,7 @@ abstract public class MtasBasicParser extends MtasParser {
499 MtasParserObject[] checkObjects = computeObjectFromMappingValue(object, 536 MtasParserObject[] checkObjects = computeObjectFromMappingValue(object,
500 mappingValue, currentList); 537 mappingValue, currentList);
501 // create value 538 // create value
502 - if (checkObjects != null) { 539 + if (checkObjects != null && checkObjects.length > 0) {
503 MtasParserType checkType = checkObjects[0].getType(); 540 MtasParserType checkType = checkObjects[0].getType();
504 // add name to value 541 // add name to value
505 if (mappingValue.get("type") 542 if (mappingValue.get("type")
@@ -586,11 +623,15 @@ abstract public class MtasBasicParser extends MtasParser { @@ -586,11 +623,15 @@ abstract public class MtasBasicParser extends MtasParser {
586 /** 623 /**
587 * Compute payload from mapping payload. 624 * Compute payload from mapping payload.
588 * 625 *
589 - * @param object the object  
590 - * @param mappingPayloads the mapping payloads  
591 - * @param currentList the current list 626 + * @param object
  627 + * the object
  628 + * @param mappingPayloads
  629 + * the mapping payloads
  630 + * @param currentList
  631 + * the current list
592 * @return the bytes ref 632 * @return the bytes ref
593 - * @throws MtasParserException the mtas parser exception 633 + * @throws MtasParserException
  634 + * the mtas parser exception
594 */ 635 */
595 private BytesRef computePayloadFromMappingPayload(MtasParserObject object, 636 private BytesRef computePayloadFromMappingPayload(MtasParserObject object,
596 ArrayList<HashMap<String, String>> mappingPayloads, 637 ArrayList<HashMap<String, String>> mappingPayloads,
@@ -605,7 +646,7 @@ abstract public class MtasBasicParser extends MtasParser { @@ -605,7 +646,7 @@ abstract public class MtasBasicParser extends MtasParser {
605 if (mappingPayload.get("text") != null) { 646 if (mappingPayload.get("text") != null) {
606 BytesRef subpayload = computeMaximumFilteredPayload( 647 BytesRef subpayload = computeMaximumFilteredPayload(
607 mappingPayload.get("text"), payload, null); 648 mappingPayload.get("text"), payload, null);
608 - payload = (subpayload != null) ? subpayload : payload; 649 + payload = (subpayload != null) ? subpayload : payload;
609 } 650 }
610 } 651 }
611 // from objects 652 // from objects
@@ -637,8 +678,10 @@ abstract public class MtasBasicParser extends MtasParser { @@ -637,8 +678,10 @@ abstract public class MtasBasicParser extends MtasParser {
637 /** 678 /**
638 * Prevalidate object. 679 * Prevalidate object.
639 * 680 *
640 - * @param object the object  
641 - * @param currentList the current list 681 + * @param object
  682 + * the object
  683 + * @param currentList
  684 + * the current list
642 * @return the boolean 685 * @return the boolean
643 */ 686 */
644 Boolean prevalidateObject(MtasParserObject object, 687 Boolean prevalidateObject(MtasParserObject object,
@@ -663,10 +706,14 @@ abstract public class MtasBasicParser extends MtasParser { @@ -663,10 +706,14 @@ abstract public class MtasBasicParser extends MtasParser {
663 /** 706 /**
664 * Precheck mapping conditions. 707 * Precheck mapping conditions.
665 * 708 *
666 - * @param object the object  
667 - * @param mappingConditions the mapping conditions  
668 - * @param currentList the current list  
669 - * @throws MtasParserException the mtas parser exception 709 + * @param object
  710 + * the object
  711 + * @param mappingConditions
  712 + * the mapping conditions
  713 + * @param currentList
  714 + * the current list
  715 + * @throws MtasParserException
  716 + * the mtas parser exception
670 */ 717 */
671 void precheckMappingConditions(MtasParserObject object, 718 void precheckMappingConditions(MtasParserObject object,
672 ArrayList<HashMap<String, String>> mappingConditions, 719 ArrayList<HashMap<String, String>> mappingConditions,
@@ -771,7 +818,31 @@ abstract public class MtasBasicParser extends MtasParser { @@ -771,7 +818,31 @@ abstract public class MtasBasicParser extends MtasParser {
771 // condition on text 818 // condition on text
772 } else if (mappingCondition.get("type") 819 } else if (mappingCondition.get("type")
773 .equals(MtasParserMapping.PARSER_TYPE_TEXT)) { 820 .equals(MtasParserMapping.PARSER_TYPE_TEXT)) {
774 - // can't pre-check this type of condition 821 + // can't pre-check this type of condition, only for group
  822 + if (object.getType().precheckText()) {
  823 + String textCondition = mappingCondition.get("condition");
  824 + String textValue = object.getText();
  825 + if ((textCondition == null)
  826 + && ((textValue == null) || textValue.equals(""))) {
  827 + if (!notCondition) {
  828 + throw new MtasParserException("no text available");
  829 + }
  830 + } else if ((textCondition != null) && (textValue == null)) {
  831 + if (!notCondition) {
  832 + throw new MtasParserException("condition " + textCondition
  833 + + " on text not matched (is null)");
  834 + }
  835 + } else if (textCondition != null) {
  836 + if (!notCondition && !textCondition.equals(textValue)) {
  837 + throw new MtasParserException("condition " + textCondition
  838 + + " on text not matched (is " + textValue + ")");
  839 + } else if (notCondition && textCondition.equals(textValue)) {
  840 + throw new MtasParserException(
  841 + "condition NOT " + textCondition
  842 + + " on text not matched (is " + textValue + ")");
  843 + }
  844 + }
  845 + }
775 } 846 }
776 } 847 }
777 } else if (!notCondition) { 848 } else if (!notCondition) {
@@ -785,10 +856,14 @@ abstract public class MtasBasicParser extends MtasParser { @@ -785,10 +856,14 @@ abstract public class MtasBasicParser extends MtasParser {
785 /** 856 /**
786 * Postcheck mapping conditions. 857 * Postcheck mapping conditions.
787 * 858 *
788 - * @param object the object  
789 - * @param mappingConditions the mapping conditions  
790 - * @param currentList the current list  
791 - * @throws MtasParserException the mtas parser exception 859 + * @param object
  860 + * the object
  861 + * @param mappingConditions
  862 + * the mapping conditions
  863 + * @param currentList
  864 + * the current list
  865 + * @throws MtasParserException
  866 + * the mtas parser exception
792 */ 867 */
793 private void postcheckMappingConditions(MtasParserObject object, 868 private void postcheckMappingConditions(MtasParserObject object,
794 ArrayList<HashMap<String, String>> mappingConditions, 869 ArrayList<HashMap<String, String>> mappingConditions,
@@ -835,10 +910,13 @@ abstract public class MtasBasicParser extends MtasParser { @@ -835,10 +910,13 @@ abstract public class MtasBasicParser extends MtasParser {
835 /** 910 /**
836 * Compute filtered split values. 911 * Compute filtered split values.
837 * 912 *
838 - * @param values the values  
839 - * @param filter the filter 913 + * @param values
  914 + * the values
  915 + * @param filter
  916 + * the filter
840 * @return the string[] 917 * @return the string[]
841 - * @throws MtasConfigException the mtas config exception 918 + * @throws MtasConfigException
  919 + * the mtas config exception
842 */ 920 */
843 private String[] computeFilteredSplitValues(String[] values, String filter) 921 private String[] computeFilteredSplitValues(String[] values, String filter)
844 throws MtasConfigException { 922 throws MtasConfigException {
@@ -847,39 +925,42 @@ abstract public class MtasBasicParser extends MtasParser { @@ -847,39 +925,42 @@ abstract public class MtasBasicParser extends MtasParser {
847 boolean[] valuesFilter = new boolean[values.length]; 925 boolean[] valuesFilter = new boolean[values.length];
848 boolean doSplitFilter = false; 926 boolean doSplitFilter = false;
849 for (String item : filters) { 927 for (String item : filters) {
850 - if (item.trim()  
851 - .matches("^"+Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)$")) { 928 + if (item.trim().matches(
  929 + "^" + Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)$")) {
852 doSplitFilter = true; 930 doSplitFilter = true;
853 - Pattern splitContent = Pattern.compile("^"+Pattern.quote(MAPPING_FILTER_SPLIT) + "\\(([0-9]+)(-([0-9]+))?\\)$"); 931 + Pattern splitContent = Pattern
  932 + .compile("^" + Pattern.quote(MAPPING_FILTER_SPLIT)
  933 + + "\\(([0-9]+)(-([0-9]+))?\\)$");
854 Matcher splitContentMatcher = splitContent.matcher(item.trim()); 934 Matcher splitContentMatcher = splitContent.matcher(item.trim());
855 - while(splitContentMatcher.find()) {  
856 - if(splitContentMatcher.group(3)==null) { 935 + while (splitContentMatcher.find()) {
  936 + if (splitContentMatcher.group(3) == null) {
857 int i = Integer.parseInt(splitContentMatcher.group(1)); 937 int i = Integer.parseInt(splitContentMatcher.group(1));
858 - if(i>=0 && i<values.length) { 938 + if (i >= 0 && i < values.length) {
859 valuesFilter[i] = true; 939 valuesFilter[i] = true;
860 - } 940 + }
861 } else { 941 } else {
862 int i1 = Integer.parseInt(splitContentMatcher.group(1)); 942 int i1 = Integer.parseInt(splitContentMatcher.group(1));
863 int i2 = Integer.parseInt(splitContentMatcher.group(3)); 943 int i2 = Integer.parseInt(splitContentMatcher.group(3));
864 - for(int i=Math.max(0, i1); i<Math.min(values.length, i2); i++) { 944 + for (int i = Math.max(0, i1); i < Math.min(values.length,
  945 + i2); i++) {
865 valuesFilter[i] = true; 946 valuesFilter[i] = true;
866 } 947 }
867 } 948 }
868 - } 949 + }
869 } 950 }
870 } 951 }
871 - if(doSplitFilter) { 952 + if (doSplitFilter) {
872 int number = 0; 953 int number = 0;
873 - for(int i=0;i<valuesFilter.length; i++) {  
874 - if(valuesFilter[i]) { 954 + for (int i = 0; i < valuesFilter.length; i++) {
  955 + if (valuesFilter[i]) {
875 number++; 956 number++;
876 } 957 }
877 } 958 }
878 - if(number>0) { 959 + if (number > 0) {
879 String[] newValues = new String[number]; 960 String[] newValues = new String[number];
880 number = 0; 961 number = 0;
881 - for(int i=0;i<valuesFilter.length; i++) {  
882 - if(valuesFilter[i]) { 962 + for (int i = 0; i < valuesFilter.length; i++) {
  963 + if (valuesFilter[i]) {
883 newValues[number] = values[i]; 964 newValues[number] = values[i];
884 number++; 965 number++;
885 } 966 }
@@ -888,7 +969,7 @@ abstract public class MtasBasicParser extends MtasParser { @@ -888,7 +969,7 @@ abstract public class MtasBasicParser extends MtasParser {
888 } else { 969 } else {
889 return null; 970 return null;
890 } 971 }
891 - } 972 + }
892 } 973 }
893 return values; 974 return values;
894 } 975 }
@@ -896,12 +977,17 @@ abstract public class MtasBasicParser extends MtasParser { @@ -896,12 +977,17 @@ abstract public class MtasBasicParser extends MtasParser {
896 /** 977 /**
897 * Compute filtered prefixed value. 978 * Compute filtered prefixed value.
898 * 979 *
899 - * @param type the type  
900 - * @param value the value  
901 - * @param filter the filter  
902 - * @param prefix the prefix 980 + * @param type
  981 + * the type
  982 + * @param value
  983 + * the value
  984 + * @param filter
  985 + * the filter
  986 + * @param prefix
  987 + * the prefix
903 * @return the string 988 * @return the string
904 - * @throws MtasConfigException the mtas config exception 989 + * @throws MtasConfigException
  990 + * the mtas config exception
905 */ 991 */
906 private String computeFilteredPrefixedValue(String type, String value, 992 private String computeFilteredPrefixedValue(String type, String value,
907 String filter, String prefix) throws MtasConfigException { 993 String filter, String prefix) throws MtasConfigException {
@@ -926,7 +1012,7 @@ abstract public class MtasBasicParser extends MtasParser { @@ -926,7 +1012,7 @@ abstract public class MtasBasicParser extends MtasParser {
926 } 1012 }
927 } else if (item.trim() 1013 } else if (item.trim()
928 .matches(Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)")) { 1014 .matches(Pattern.quote(MAPPING_FILTER_SPLIT) + "\\([0-9\\-]+\\)")) {
929 - if(!type.equals(MtasParserMapping.PARSER_TYPE_TEXT_SPLIT)) { 1015 + if (!type.equals(MtasParserMapping.PARSER_TYPE_TEXT_SPLIT)) {
930 throw new MtasConfigException( 1016 throw new MtasConfigException(
931 "split filter not allowed for " + type); 1017 "split filter not allowed for " + type);
932 } 1018 }
@@ -947,9 +1033,12 @@ abstract public class MtasBasicParser extends MtasParser { @@ -947,9 +1033,12 @@ abstract public class MtasBasicParser extends MtasParser {
947 /** 1033 /**
948 * Compute maximum filtered payload. 1034 * Compute maximum filtered payload.
949 * 1035 *
950 - * @param value the value  
951 - * @param payload the payload  
952 - * @param filter the filter 1036 + * @param value
  1037 + * the value
  1038 + * @param payload
  1039 + * the payload
  1040 + * @param filter
  1041 + * the filter
953 * @return the bytes ref 1042 * @return the bytes ref
954 */ 1043 */
955 private BytesRef computeMaximumFilteredPayload(String value, BytesRef payload, 1044 private BytesRef computeMaximumFilteredPayload(String value, BytesRef payload,
@@ -981,6 +1070,9 @@ abstract public class MtasBasicParser extends MtasParser { @@ -981,6 +1070,9 @@ abstract public class MtasBasicParser extends MtasParser {
981 /** The name. */ 1070 /** The name. */
982 private String name; 1071 private String name;
983 1072
  1073 + /** The precheck text. */
  1074 + protected boolean precheckText;
  1075 +
984 /** The ref attribute name. */ 1076 /** The ref attribute name. */
985 private String refAttributeName; 1077 private String refAttributeName;
986 1078
@@ -990,23 +1082,34 @@ abstract public class MtasBasicParser extends MtasParser { @@ -990,23 +1082,34 @@ abstract public class MtasBasicParser extends MtasParser {
990 /** 1082 /**
991 * Instantiates a new mtas parser type. 1083 * Instantiates a new mtas parser type.
992 * 1084 *
993 - * @param type the type  
994 - * @param name the name 1085 + * @param type
  1086 + * the type
  1087 + * @param name
  1088 + * the name
  1089 + * @param precheckText
  1090 + * the precheck text
995 */ 1091 */
996 - MtasParserType(String type, String name) { 1092 + MtasParserType(String type, String name, boolean precheckText) {
997 this.type = type; 1093 this.type = type;
998 this.name = name; 1094 this.name = name;
  1095 + this.precheckText = precheckText;
999 } 1096 }
1000 1097
1001 /** 1098 /**
1002 * Instantiates a new mtas parser type. 1099 * Instantiates a new mtas parser type.
1003 * 1100 *
1004 - * @param type the type  
1005 - * @param name the name  
1006 - * @param refAttributeName the ref attribute name 1101 + * @param type
  1102 + * the type
  1103 + * @param name
  1104 + * the name
  1105 + * @param precheckText
  1106 + * the precheck text
  1107 + * @param refAttributeName
  1108 + * the ref attribute name
1007 */ 1109 */
1008 - MtasParserType(String type, String name, String refAttributeName) {  
1009 - this(type, name); 1110 + MtasParserType(String type, String name, boolean precheckText,
  1111 + String refAttributeName) {
  1112 + this(type, name, precheckText);
1010 this.refAttributeName = refAttributeName; 1113 this.refAttributeName = refAttributeName;
1011 } 1114 }
1012 1115
@@ -1038,9 +1141,19 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1038,9 +1141,19 @@ abstract public class MtasBasicParser extends MtasParser {
1038 } 1141 }
1039 1142
1040 /** 1143 /**
  1144 + * Precheck text.
  1145 + *
  1146 + * @return true, if successful
  1147 + */
  1148 + public boolean precheckText() {
  1149 + return precheckText;
  1150 + }
  1151 +
  1152 + /**
1041 * Adds the mapping. 1153 * Adds the mapping.
1042 * 1154 *
1043 - * @param mapping the mapping 1155 + * @param mapping
  1156 + * the mapping
1044 */ 1157 */
1045 public void addMapping(MtasParserMapping<?> mapping) { 1158 public void addMapping(MtasParserMapping<?> mapping) {
1046 mappings.add(mapping); 1159 mappings.add(mapping);
@@ -1080,7 +1193,8 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1080,7 +1193,8 @@ abstract public class MtasBasicParser extends MtasParser {
1080 /** 1193 /**
1081 * Instantiates a new mtas parser mapping token. 1194 * Instantiates a new mtas parser mapping token.
1082 * 1195 *
1083 - * @param tokenType the token type 1196 + * @param tokenType
  1197 + * the token type
1084 */ 1198 */
1085 public MtasParserMappingToken(String tokenType) { 1199 public MtasParserMappingToken(String tokenType) {
1086 type = tokenType; 1200 type = tokenType;
@@ -1095,7 +1209,8 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1095,7 +1209,8 @@ abstract public class MtasBasicParser extends MtasParser {
1095 /** 1209 /**
1096 * Sets the offset. 1210 * Sets the offset.
1097 * 1211 *
1098 - * @param tokenOffset the new offset 1212 + * @param tokenOffset
  1213 + * the new offset
1099 */ 1214 */
1100 public void setOffset(Boolean tokenOffset) { 1215 public void setOffset(Boolean tokenOffset) {
1101 offset = tokenOffset; 1216 offset = tokenOffset;
@@ -1104,7 +1219,8 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1104,7 +1219,8 @@ abstract public class MtasBasicParser extends MtasParser {
1104 /** 1219 /**
1105 * Sets the real offset. 1220 * Sets the real offset.
1106 * 1221 *
1107 - * @param tokenRealOffset the new real offset 1222 + * @param tokenRealOffset
  1223 + * the new real offset
1108 */ 1224 */
1109 public void setRealOffset(Boolean tokenRealOffset) { 1225 public void setRealOffset(Boolean tokenRealOffset) {
1110 realoffset = tokenRealOffset; 1226 realoffset = tokenRealOffset;
@@ -1113,7 +1229,8 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1113,7 +1229,8 @@ abstract public class MtasBasicParser extends MtasParser {
1113 /** 1229 /**
1114 * Sets the parent. 1230 * Sets the parent.
1115 * 1231 *
1116 - * @param tokenParent the new parent 1232 + * @param tokenParent
  1233 + * the new parent
1117 */ 1234 */
1118 public void setParent(Boolean tokenParent) { 1235 public void setParent(Boolean tokenParent) {
1119 parent = tokenParent; 1236 parent = tokenParent;
@@ -1124,7 +1241,8 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1124,7 +1241,8 @@ abstract public class MtasBasicParser extends MtasParser {
1124 /** 1241 /**
1125 * The Class MtasParserMapping. 1242 * The Class MtasParserMapping.
1126 * 1243 *
1127 - * @param <T> the generic type 1244 + * @param <T>
  1245 + * the generic type
1128 */ 1246 */
1129 protected abstract class MtasParserMapping<T extends MtasParserMapping<T>> { 1247 protected abstract class MtasParserMapping<T extends MtasParserMapping<T>> {
1130 1248
@@ -1216,8 +1334,10 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1216,8 +1334,10 @@ abstract public class MtasBasicParser extends MtasParser {
1216 /** 1334 /**
1217 * Process config. 1335 * Process config.
1218 * 1336 *
1219 - * @param config the config  
1220 - * @throws MtasConfigException the mtas config exception 1337 + * @param config
  1338 + * the config
  1339 + * @throws MtasConfigException
  1340 + * the mtas config exception
1221 */ 1341 */
1222 public void processConfig(MtasConfiguration config) 1342 public void processConfig(MtasConfiguration config)
1223 throws MtasConfigException { 1343 throws MtasConfigException {
@@ -1581,7 +1701,8 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1581,7 +1701,8 @@ abstract public class MtasBasicParser extends MtasParser {
1581 /** 1701 /**
1582 * Condition unknown ancestor. 1702 * Condition unknown ancestor.
1583 * 1703 *
1584 - * @param number the number 1704 + * @param number
  1705 + * the number
1585 */ 1706 */
1586 private void conditionUnknownAncestor(String number) { 1707 private void conditionUnknownAncestor(String number) {
1587 HashMap<String, String> mapConstructionItem = new HashMap<String, String>(); 1708 HashMap<String, String> mapConstructionItem = new HashMap<String, String>();
@@ -1593,9 +1714,12 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1593,9 +1714,12 @@ abstract public class MtasBasicParser extends MtasParser {
1593 /** 1714 /**
1594 * Adds the string. 1715 * Adds the string.
1595 * 1716 *
1596 - * @param mappingToken the mapping token  
1597 - * @param type the type  
1598 - * @param text the text 1717 + * @param mappingToken
  1718 + * the mapping token
  1719 + * @param type
  1720 + * the type
  1721 + * @param text
  1722 + * the text
1599 */ 1723 */
1600 private void addString(MtasParserMappingToken mappingToken, String type, 1724 private void addString(MtasParserMappingToken mappingToken, String type,
1601 String text) { 1725 String text) {
@@ -1613,8 +1737,10 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1613,8 +1737,10 @@ abstract public class MtasBasicParser extends MtasParser {
1613 /** 1737 /**
1614 * Payload string. 1738 * Payload string.
1615 * 1739 *
1616 - * @param mappingToken the mapping token  
1617 - * @param text the text 1740 + * @param mappingToken
  1741 + * the mapping token
  1742 + * @param text
  1743 + * the text
1618 */ 1744 */
1619 private void payloadString(MtasParserMappingToken mappingToken, 1745 private void payloadString(MtasParserMappingToken mappingToken,
1620 String text) { 1746 String text) {
@@ -1628,10 +1754,14 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1628,10 +1754,14 @@ abstract public class MtasBasicParser extends MtasParser {
1628 /** 1754 /**
1629 * Adds the name. 1755 * Adds the name.
1630 * 1756 *
1631 - * @param mappingToken the mapping token  
1632 - * @param type the type  
1633 - * @param prefix the prefix  
1634 - * @param filter the filter 1757 + * @param mappingToken
  1758 + * the mapping token
  1759 + * @param type
  1760 + * the type
  1761 + * @param prefix
  1762 + * the prefix
  1763 + * @param filter
  1764 + * the filter
1635 */ 1765 */
1636 private void addName(MtasParserMappingToken mappingToken, String type, 1766 private void addName(MtasParserMappingToken mappingToken, String type,
1637 String prefix, String filter) { 1767 String prefix, String filter) {
@@ -1650,8 +1780,10 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1650,8 +1780,10 @@ abstract public class MtasBasicParser extends MtasParser {
1650 /** 1780 /**
1651 * Condition name. 1781 * Condition name.
1652 * 1782 *
1653 - * @param condition the condition  
1654 - * @param not the not 1783 + * @param condition
  1784 + * the condition
  1785 + * @param not
  1786 + * the not
1655 */ 1787 */
1656 private void conditionName(String condition, String not) { 1788 private void conditionName(String condition, String not) {
1657 HashMap<String, String> mapConstructionItem = new HashMap<String, String>(); 1789 HashMap<String, String> mapConstructionItem = new HashMap<String, String>();
@@ -1665,10 +1797,14 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1665,10 +1797,14 @@ abstract public class MtasBasicParser extends MtasParser {
1665 /** 1797 /**
1666 * Adds the text. 1798 * Adds the text.
1667 * 1799 *
1668 - * @param mappingToken the mapping token  
1669 - * @param type the type  
1670 - * @param prefix the prefix  
1671 - * @param filter the filter 1800 + * @param mappingToken
  1801 + * the mapping token
  1802 + * @param type
  1803 + * the type
  1804 + * @param prefix
  1805 + * the prefix
  1806 + * @param filter
  1807 + * the filter
1672 */ 1808 */
1673 private void addText(MtasParserMappingToken mappingToken, String type, 1809 private void addText(MtasParserMappingToken mappingToken, String type,
1674 String prefix, String filter) { 1810 String prefix, String filter) {
@@ -1687,11 +1823,16 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1687,11 +1823,16 @@ abstract public class MtasBasicParser extends MtasParser {
1687 /** 1823 /**
1688 * Adds the text split. 1824 * Adds the text split.
1689 * 1825 *
1690 - * @param mappingToken the mapping token  
1691 - * @param type the type  
1692 - * @param split the split  
1693 - * @param prefix the prefix  
1694 - * @param filter the filter 1826 + * @param mappingToken
  1827 + * the mapping token
  1828 + * @param type
  1829 + * the type
  1830 + * @param split
  1831 + * the split
  1832 + * @param prefix
  1833 + * the prefix
  1834 + * @param filter
  1835 + * the filter
1695 */ 1836 */
1696 private void addTextSplit(MtasParserMappingToken mappingToken, String type, 1837 private void addTextSplit(MtasParserMappingToken mappingToken, String type,
1697 String split, String prefix, String filter) { 1838 String split, String prefix, String filter) {
@@ -1711,9 +1852,12 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1711,9 +1852,12 @@ abstract public class MtasBasicParser extends MtasParser {
1711 /** 1852 /**
1712 * Condition text. 1853 * Condition text.
1713 * 1854 *
1714 - * @param condition the condition  
1715 - * @param filter the filter  
1716 - * @param not the not 1855 + * @param condition
  1856 + * the condition
  1857 + * @param filter
  1858 + * the filter
  1859 + * @param not
  1860 + * the not
1717 */ 1861 */
1718 private void conditionText(String condition, String filter, String not) { 1862 private void conditionText(String condition, String filter, String not) {
1719 HashMap<String, String> mapConstructionItem = new HashMap<String, String>(); 1863 HashMap<String, String> mapConstructionItem = new HashMap<String, String>();
@@ -1728,8 +1872,10 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1728,8 +1872,10 @@ abstract public class MtasBasicParser extends MtasParser {
1728 /** 1872 /**
1729 * Payload text. 1873 * Payload text.
1730 * 1874 *
1731 - * @param mappingToken the mapping token  
1732 - * @param filter the filter 1875 + * @param mappingToken
  1876 + * the mapping token
  1877 + * @param filter
  1878 + * the filter
1733 */ 1879 */
1734 private void payloadText(MtasParserMappingToken mappingToken, 1880 private void payloadText(MtasParserMappingToken mappingToken,
1735 String filter) { 1881 String filter) {
@@ -1743,11 +1889,16 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1743,11 +1889,16 @@ abstract public class MtasBasicParser extends MtasParser {
1743 /** 1889 /**
1744 * Adds the attribute. 1890 * Adds the attribute.
1745 * 1891 *
1746 - * @param mappingToken the mapping token  
1747 - * @param type the type  
1748 - * @param name the name  
1749 - * @param prefix the prefix  
1750 - * @param filter the filter 1892 + * @param mappingToken
  1893 + * the mapping token
  1894 + * @param type
  1895 + * the type
  1896 + * @param name
  1897 + * the name
  1898 + * @param prefix
  1899 + * the prefix
  1900 + * @param filter
  1901 + * the filter
1751 */ 1902 */
1752 private void addAttribute(MtasParserMappingToken mappingToken, String type, 1903 private void addAttribute(MtasParserMappingToken mappingToken, String type,
1753 String name, String prefix, String filter) { 1904 String name, String prefix, String filter) {
@@ -1769,10 +1920,14 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1769,10 +1920,14 @@ abstract public class MtasBasicParser extends MtasParser {
1769 /** 1920 /**
1770 * Condition attribute. 1921 * Condition attribute.
1771 * 1922 *
1772 - * @param name the name  
1773 - * @param condition the condition  
1774 - * @param filter the filter  
1775 - * @param not the not 1923 + * @param name
  1924 + * the name
  1925 + * @param condition
  1926 + * the condition
  1927 + * @param filter
  1928 + * the filter
  1929 + * @param not
  1930 + * the not
1776 */ 1931 */
1777 private void conditionAttribute(String name, String condition, 1932 private void conditionAttribute(String name, String condition,
1778 String filter, String not) { 1933 String filter, String not) {
@@ -1791,9 +1946,12 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1791,9 +1946,12 @@ abstract public class MtasBasicParser extends MtasParser {
1791 /** 1946 /**
1792 * Payload attribute. 1947 * Payload attribute.
1793 * 1948 *
1794 - * @param mappingToken the mapping token  
1795 - * @param name the name  
1796 - * @param filter the filter 1949 + * @param mappingToken
  1950 + * the mapping token
  1951 + * @param name
  1952 + * the name
  1953 + * @param filter
  1954 + * the filter
1797 */ 1955 */
1798 private void payloadAttribute(MtasParserMappingToken mappingToken, 1956 private void payloadAttribute(MtasParserMappingToken mappingToken,
1799 String name, String filter) { 1957 String name, String filter) {
@@ -1808,8 +1966,10 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1808,8 +1966,10 @@ abstract public class MtasBasicParser extends MtasParser {
1808 /** 1966 /**
1809 * Condition ancestor. 1967 * Condition ancestor.
1810 * 1968 *
1811 - * @param ancestorType the ancestor type  
1812 - * @param number the number 1969 + * @param ancestorType
  1970 + * the ancestor type
  1971 + * @param number
  1972 + * the number
1813 */ 1973 */
1814 public void conditionAncestor(String ancestorType, String number) { 1974 public void conditionAncestor(String ancestorType, String number) {
1815 if (ancestorType.equals(SOURCE_ANCESTOR_GROUP) 1975 if (ancestorType.equals(SOURCE_ANCESTOR_GROUP)
@@ -1829,12 +1989,18 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1829,12 +1989,18 @@ abstract public class MtasBasicParser extends MtasParser {
1829 /** 1989 /**
1830 * Adds the ancestor name. 1990 * Adds the ancestor name.
1831 * 1991 *
1832 - * @param ancestorType the ancestor type  
1833 - * @param mappingToken the mapping token  
1834 - * @param type the type  
1835 - * @param distance the distance  
1836 - * @param prefix the prefix  
1837 - * @param filter the filter 1992 + * @param ancestorType
  1993 + * the ancestor type
  1994 + * @param mappingToken
  1995 + * the mapping token
  1996 + * @param type
  1997 + * the type
  1998 + * @param distance
  1999 + * the distance
  2000 + * @param prefix
  2001 + * the prefix
  2002 + * @param filter
  2003 + * the filter
1838 */ 2004 */
1839 private void addAncestorName(String ancestorType, 2005 private void addAncestorName(String ancestorType,
1840 MtasParserMappingToken mappingToken, String type, String distance, 2006 MtasParserMappingToken mappingToken, String type, String distance,
@@ -1862,11 +2028,16 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1862,11 +2028,16 @@ abstract public class MtasBasicParser extends MtasParser {
1862 /** 2028 /**
1863 * Condition ancestor name. 2029 * Condition ancestor name.
1864 * 2030 *
1865 - * @param ancestorType the ancestor type  
1866 - * @param distance the distance  
1867 - * @param condition the condition  
1868 - * @param filter the filter  
1869 - * @param not the not 2031 + * @param ancestorType
  2032 + * the ancestor type
  2033 + * @param distance
  2034 + * the distance
  2035 + * @param condition
  2036 + * the condition
  2037 + * @param filter
  2038 + * the filter
  2039 + * @param not
  2040 + * the not
1870 */ 2041 */
1871 public void conditionAncestorName(String ancestorType, String distance, 2042 public void conditionAncestorName(String ancestorType, String distance,
1872 String condition, String filter, String not) { 2043 String condition, String filter, String not) {
@@ -1890,13 +2061,20 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1890,13 +2061,20 @@ abstract public class MtasBasicParser extends MtasParser {
1890 /** 2061 /**
1891 * Adds the ancestor attribute. 2062 * Adds the ancestor attribute.
1892 * 2063 *
1893 - * @param ancestorType the ancestor type  
1894 - * @param mappingToken the mapping token  
1895 - * @param type the type  
1896 - * @param distance the distance  
1897 - * @param name the name  
1898 - * @param prefix the prefix  
1899 - * @param filter the filter 2064 + * @param ancestorType
  2065 + * the ancestor type
  2066 + * @param mappingToken
  2067 + * the mapping token
  2068 + * @param type
  2069 + * the type
  2070 + * @param distance
  2071 + * the distance
  2072 + * @param name
  2073 + * the name
  2074 + * @param prefix
  2075 + * the prefix
  2076 + * @param filter
  2077 + * the filter
1900 */ 2078 */
1901 public void addAncestorAttribute(String ancestorType, 2079 public void addAncestorAttribute(String ancestorType,
1902 MtasParserMappingToken mappingToken, String type, String distance, 2080 MtasParserMappingToken mappingToken, String type, String distance,
@@ -1927,12 +2105,18 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1927,12 +2105,18 @@ abstract public class MtasBasicParser extends MtasParser {
1927 /** 2105 /**
1928 * Condition ancestor attribute. 2106 * Condition ancestor attribute.
1929 * 2107 *
1930 - * @param ancestorType the ancestor type  
1931 - * @param distance the distance  
1932 - * @param name the name  
1933 - * @param condition the condition  
1934 - * @param filter the filter  
1935 - * @param not the not 2108 + * @param ancestorType
  2109 + * the ancestor type
  2110 + * @param distance
  2111 + * the distance
  2112 + * @param name
  2113 + * the name
  2114 + * @param condition
  2115 + * the condition
  2116 + * @param filter
  2117 + * the filter
  2118 + * @param not
  2119 + * the not
1936 */ 2120 */
1937 public void conditionAncestorAttribute(String ancestorType, String distance, 2121 public void conditionAncestorAttribute(String ancestorType, String distance,
1938 String name, String condition, String filter, String not) { 2122 String name, String condition, String filter, String not) {
@@ -1959,11 +2143,16 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1959,11 +2143,16 @@ abstract public class MtasBasicParser extends MtasParser {
1959 /** 2143 /**
1960 * Payload ancestor attribute. 2144 * Payload ancestor attribute.
1961 * 2145 *
1962 - * @param mappingToken the mapping token  
1963 - * @param ancestorType the ancestor type  
1964 - * @param distance the distance  
1965 - * @param name the name  
1966 - * @param filter the filter 2146 + * @param mappingToken
  2147 + * the mapping token
  2148 + * @param ancestorType
  2149 + * the ancestor type
  2150 + * @param distance
  2151 + * the distance
  2152 + * @param name
  2153 + * the name
  2154 + * @param filter
  2155 + * the filter
1967 */ 2156 */
1968 private void payloadAncestorAttribute(MtasParserMappingToken mappingToken, 2157 private void payloadAncestorAttribute(MtasParserMappingToken mappingToken,
1969 String ancestorType, String distance, String name, String filter) { 2158 String ancestorType, String distance, String name, String filter) {
@@ -1988,9 +2177,11 @@ abstract public class MtasBasicParser extends MtasParser { @@ -1988,9 +2177,11 @@ abstract public class MtasBasicParser extends MtasParser {
1988 /** 2177 /**
1989 * Compute ancestor source type. 2178 * Compute ancestor source type.
1990 * 2179 *
1991 - * @param type the type 2180 + * @param type
  2181 + * the type
1992 * @return the string 2182 * @return the string
1993 - * @throws MtasConfigException the mtas config exception 2183 + * @throws MtasConfigException
  2184 + * the mtas config exception
1994 */ 2185 */
1995 private String computeAncestorSourceType(String type) 2186 private String computeAncestorSourceType(String type)
1996 throws MtasConfigException { 2187 throws MtasConfigException {
@@ -2014,7 +2205,8 @@ abstract public class MtasBasicParser extends MtasParser { @@ -2014,7 +2205,8 @@ abstract public class MtasBasicParser extends MtasParser {
2014 /** 2205 /**
2015 * Compute distance. 2206 * Compute distance.
2016 * 2207 *
2017 - * @param distance the distance 2208 + * @param distance
  2209 + * the distance
2018 * @return the string 2210 * @return the string
2019 */ 2211 */
2020 private String computeDistance(String distance) { 2212 private String computeDistance(String distance) {
@@ -2033,7 +2225,8 @@ abstract public class MtasBasicParser extends MtasParser { @@ -2033,7 +2225,8 @@ abstract public class MtasBasicParser extends MtasParser {
2033 /** 2225 /**
2034 * Compute number. 2226 * Compute number.
2035 * 2227 *
2036 - * @param number the number 2228 + * @param number
  2229 + * the number
2037 * @return the string 2230 * @return the string
2038 */ 2231 */
2039 private String computeNumber(String number) { 2232 private String computeNumber(String number) {
src/mtas/analysis/parser/MtasCRMParser.java 0 โ†’ 100644
  1 +package mtas.analysis.parser;
  2 +
  3 +import java.io.IOException;
  4 +import java.io.Reader;
  5 +import java.util.ArrayList;
  6 +import java.util.Arrays;
  7 +import java.util.Collection;
  8 +import java.util.HashMap;
  9 +import java.util.HashSet;
  10 +import java.util.TreeSet;
  11 +import java.util.Map.Entry;
  12 +import java.util.concurrent.atomic.AtomicInteger;
  13 +import java.util.regex.Matcher;
  14 +import java.util.regex.Pattern;
  15 +
  16 +import mtas.analysis.token.MtasToken;
  17 +import mtas.analysis.token.MtasTokenCollection;
  18 +import mtas.analysis.util.MtasBufferedReader;
  19 +import mtas.analysis.util.MtasConfigException;
  20 +import mtas.analysis.util.MtasConfiguration;
  21 +import mtas.analysis.util.MtasParserException;
  22 +
  23 +/**
  24 + * The Class MtasCRMParser.
  25 + */
  26 +
  27 +public class MtasCRMParser extends MtasBasicParser {
  28 +
  29 + /** The word type. */
  30 + private MtasParserType wordType = null;
  31 +
  32 + /** The word annotation types. */
  33 + private HashMap<String, MtasParserType> wordAnnotationTypes = new HashMap<String, MtasParserType>();
  34 +
  35 + /** The crm sentence types. */
  36 + private HashMap<String, MtasParserType> crmSentenceTypes = new HashMap<String, MtasParserType>();
  37 +
  38 + /** The crm clause types. */
  39 + private HashMap<String, MtasParserType> crmClauseTypes = new HashMap<String, MtasParserType>();
  40 +
  41 + /** The crm pair types. */
  42 + private HashMap<String, MtasParserType> crmPairTypes = new HashMap<String, MtasParserType>();
  43 +
  44 + /** The functions. */
  45 + private HashMap<String, HashMap<String, MtasCRMParserFunction>> functions = new HashMap<String, HashMap<String, MtasCRMParserFunction>>();
  46 +
  47 + /** The Constant MAPPING_TYPE_CRM_SENTENCE. */
  48 + protected final static String MAPPING_TYPE_CRM_SENTENCE = "crmSentence";
  49 +
  50 + /** The Constant MAPPING_TYPE_CRM_CLAUSE. */
  51 + protected final static String MAPPING_TYPE_CRM_CLAUSE = "crmClause";
  52 +
  53 + /** The Constant MAPPING_TYPE_CRM_PAIR. */
  54 + protected final static String MAPPING_TYPE_CRM_PAIR = "crmPair";
  55 +
  56 + /** The history pair. */
  57 + private HashMap<String, HashMap<String, MtasParserObject>> historyPair = new HashMap<String, HashMap<String, MtasParserObject>>();
  58 +
  59 + /** The pair pattern. */
  60 + Pattern pairPattern = Pattern.compile("^([b|e])([a-z])([0-9]+)$");
  61 +
  62 + /**
  63 + * Instantiates a new mtas crm parser.
  64 + *
  65 + * @param config
  66 + * the config
  67 + */
  68 + public MtasCRMParser(MtasConfiguration config) {
  69 + super(config);
  70 + try {
  71 + initParser();
  72 + // System.out.print(printConfig());
  73 + } catch (MtasConfigException e) {
  74 + e.printStackTrace();
  75 + }
  76 + }
  77 +
  78 + /*
  79 + * (non-Javadoc)
  80 + *
  81 + * @see mtas.analysis.parser.MtasParser#initParser()
  82 + */
  83 + @SuppressWarnings("unchecked")
  84 + @Override
  85 + protected void initParser() throws MtasConfigException {
  86 + super.initParser();
  87 + if (config != null) {
  88 + // always word, no mappings
  89 + wordType = new MtasParserType(MAPPING_TYPE_WORD, null, false);
  90 + for (int i = 0; i < config.children.size(); i++) {
  91 + MtasConfiguration current = config.children.get(i);
  92 + if (current.name.equals("mappings")) {
  93 + for (int j = 0; j < current.children.size(); j++) {
  94 + if (current.children.get(j).name.equals("mapping")) {
  95 + MtasConfiguration mapping = current.children.get(j);
  96 + String typeMapping = mapping.attributes.get("type");
  97 + String nameMapping = mapping.attributes.get("name");
  98 + if ((typeMapping != null)) {
  99 + if (typeMapping.equals(MAPPING_TYPE_WORD)) {
  100 + MtasCRMParserMappingWordAnnotation m = new MtasCRMParserMappingWordAnnotation();
  101 + m.processConfig(mapping);
  102 + wordType.addMapping(m);
  103 + } else if (typeMapping.equals(MAPPING_TYPE_WORD_ANNOTATION)
  104 + && (nameMapping != null)) {
  105 + MtasCRMParserMappingWordAnnotation m = new MtasCRMParserMappingWordAnnotation();
  106 + m.processConfig(mapping);
  107 + if (wordAnnotationTypes.containsKey(nameMapping)) {
  108 + wordAnnotationTypes.get(nameMapping).addMapping(m);
  109 + } else {
  110 + MtasParserType t = new MtasParserType(typeMapping,
  111 + nameMapping, false);
  112 + t.addMapping(m);
  113 + wordAnnotationTypes.put(nameMapping, t);
  114 + }
  115 + } else if (typeMapping.equals(MAPPING_TYPE_CRM_SENTENCE)) {
  116 + MtasCRMParserMappingCRMSentence m = new MtasCRMParserMappingCRMSentence();
  117 + m.processConfig(mapping);
  118 + if (crmSentenceTypes.containsKey(nameMapping)) {
  119 + crmSentenceTypes.get(nameMapping).addMapping(m);
  120 + } else {
  121 + MtasParserType t = new MtasParserType(MAPPING_TYPE_GROUP,
  122 + nameMapping, true);
  123 + t.addMapping(m);
  124 + crmSentenceTypes.put(nameMapping, t);
  125 + }
  126 + } else if (typeMapping.equals(MAPPING_TYPE_CRM_CLAUSE)) {
  127 + MtasCRMParserMappingCRMSentence m = new MtasCRMParserMappingCRMSentence();
  128 + m.processConfig(mapping);
  129 + if (crmClauseTypes.containsKey(nameMapping)) {
  130 + crmClauseTypes.get(nameMapping).addMapping(m);
  131 + } else {
  132 + MtasParserType t = new MtasParserType(MAPPING_TYPE_GROUP,
  133 + nameMapping, true);
  134 + t.addMapping(m);
  135 + crmClauseTypes.put(nameMapping, t);
  136 + }
  137 + } else if (typeMapping.equals(MAPPING_TYPE_CRM_PAIR)) {
  138 + MtasCRMParserMappingCRMPair m = new MtasCRMParserMappingCRMPair();
  139 + m.processConfig(mapping);
  140 + if (crmPairTypes.containsKey(nameMapping)) {
  141 + crmPairTypes.get(nameMapping).addMapping(m);
  142 + } else {
  143 + MtasParserType t = new MtasParserType(MAPPING_TYPE_RELATION,
  144 + nameMapping, true);
  145 + t.addMapping(m);
  146 + crmPairTypes.put(nameMapping, t);
  147 + }
  148 + } else {
  149 + throw new MtasConfigException("unknown mapping type "
  150 + + typeMapping + " or missing name");
  151 + }
  152 + }
  153 + }
  154 + }
  155 + } else if (current.name.equals("functions")) {
  156 + for (int j = 0; j < current.children.size(); j++) {
  157 + if (current.children.get(j).name.equals("function")) {
  158 + MtasConfiguration function = current.children.get(j);
  159 + String nameFunction = function.attributes.get("name");
  160 + String typeFunction = function.attributes.get("type");
  161 + String splitFunction = function.attributes.get("split");
  162 + if (nameFunction != null && typeFunction != null) {
  163 + MtasCRMParserFunction mtasCRMParserFunction = new MtasCRMParserFunction(
  164 + typeFunction, splitFunction);
  165 + if (!functions.containsKey(typeFunction)) {
  166 + functions.put(typeFunction,
  167 + new HashMap<String, MtasCRMParserFunction>());
  168 + }
  169 + functions.get(typeFunction).put(nameFunction,
  170 + mtasCRMParserFunction);
  171 + MtasConfiguration subCurrent = current.children.get(j);
  172 + for (int k = 0; k < subCurrent.children.size(); k++) {
  173 + if (subCurrent.children.get(k).name.equals("condition")) {
  174 + MtasConfiguration subSubCurrent = subCurrent.children
  175 + .get(k);
  176 + if (subSubCurrent.attributes.containsKey("value")) {
  177 + String[] valuesCondition = subSubCurrent.attributes
  178 + .get("value").split(Pattern.quote(","));
  179 + ArrayList<MtasCRMParserFunctionOutput> valueOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  180 + for (int l = 0; l < subSubCurrent.children.size(); l++) {
  181 + if (subSubCurrent.children.get(l).name
  182 + .equals("output")) {
  183 + String valueOutput = subSubCurrent.children
  184 + .get(l).attributes.get("value");
  185 + String nameOutput = subSubCurrent.children
  186 + .get(l).attributes.get("name");
  187 + if (nameOutput != null) {
  188 + MtasCRMParserFunctionOutput o = new MtasCRMParserFunctionOutput(
  189 + nameOutput, valueOutput);
  190 + valueOutputList.add(o);
  191 + }
  192 + }
  193 + }
  194 + if (valueOutputList.size() > 0) {
  195 + for (String valueCondition : valuesCondition) {
  196 + if (mtasCRMParserFunction.output
  197 + .containsKey(valueCondition)) {
  198 + mtasCRMParserFunction.output.get(valueCondition)
  199 + .addAll(
  200 + (Collection<? extends MtasCRMParserFunctionOutput>) valueOutputList
  201 + .clone());
  202 + } else {
  203 + mtasCRMParserFunction.output.put(valueCondition,
  204 + (ArrayList<MtasCRMParserFunctionOutput>) valueOutputList
  205 + .clone());
  206 + }
  207 + }
  208 + }
  209 + }
  210 + }
  211 + }
  212 + }
  213 + }
  214 + }
  215 + }
  216 + }
  217 + }
  218 + }
  219 +
  220 + /*
  221 + * (non-Javadoc)
  222 + *
  223 + * @see mtas.analysis.parser.MtasParser#createTokenCollection(java.io.Reader)
  224 + */
  225 + @Override
  226 + public MtasTokenCollection createTokenCollection(Reader reader)
  227 + throws MtasParserException, MtasConfigException {
  228 + AtomicInteger position = new AtomicInteger(0);
  229 + Integer unknownAncestors = 0;
  230 +
  231 + HashMap<String, TreeSet<Integer>> idPositions = new HashMap<String, TreeSet<Integer>>();
  232 + HashMap<String, Integer[]> idOffsets = new HashMap<String, Integer[]>();
  233 +
  234 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList = new HashMap<String, HashMap<Integer, HashSet<String>>>();
  235 + updateList.put(UPDATE_TYPE_OFFSET, new HashMap<Integer, HashSet<String>>());
  236 + updateList.put(UPDATE_TYPE_POSITION,
  237 + new HashMap<Integer, HashSet<String>>());
  238 +
  239 + HashMap<String, ArrayList<MtasParserObject>> currentList = new HashMap<String, ArrayList<MtasParserObject>>();
  240 + currentList.put(MAPPING_TYPE_RELATION, new ArrayList<MtasParserObject>());
  241 + currentList.put(MAPPING_TYPE_RELATION_ANNOTATION,
  242 + new ArrayList<MtasParserObject>());
  243 + currentList.put(MAPPING_TYPE_REF, new ArrayList<MtasParserObject>());
  244 + currentList.put(MAPPING_TYPE_GROUP, new ArrayList<MtasParserObject>());
  245 + currentList.put(MAPPING_TYPE_GROUP_ANNOTATION,
  246 + new ArrayList<MtasParserObject>());
  247 + currentList.put(MAPPING_TYPE_WORD, new ArrayList<MtasParserObject>());
  248 + currentList.put(MAPPING_TYPE_WORD_ANNOTATION,
  249 + new ArrayList<MtasParserObject>());
  250 +
  251 + tokenCollection = new MtasTokenCollection();
  252 + MtasToken.resetId();
  253 + try (MtasBufferedReader br = new MtasBufferedReader(reader)) {
  254 + String line;
  255 + int currentOffset, previousOffset = br.getPosition();
  256 + MtasParserObject currentObject;
  257 + Pattern headerPattern = Pattern.compile("^@ @ @(.*)$");
  258 + Pattern regularPattern = Pattern.compile(
  259 + "^([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+)$");
  260 + Matcher matcherHeader, matcherRegular = null;
  261 + HashSet<MtasParserObject> newPreviousSentence = new HashSet<MtasParserObject>(),
  262 + previousSentence = new HashSet<MtasParserObject>();
  263 + HashSet<MtasParserObject> newPreviousClause = new HashSet<MtasParserObject>(),
  264 + previousClause = new HashSet<MtasParserObject>();
  265 + while ((line = br.readLine()) != null) {
  266 + currentOffset = br.getPosition();
  267 + matcherHeader = headerPattern.matcher(line.trim());
  268 + matcherRegular = regularPattern.matcher(line.trim());
  269 + if (matcherRegular.matches()) {
  270 + newPreviousSentence.clear();
  271 + for (int i = 4; i < 8; i++) {
  272 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  273 + HashSet<MtasParserObject> tmpList = processCRMSentence(
  274 + String.valueOf(i), matcherRegular.group((i + 1)), currentOffset,
  275 + functionOutputList, unknownAncestors, currentList, updateList,
  276 + idPositions, idOffsets, previousSentence, previousClause);
  277 + if (tmpList != null) {
  278 + newPreviousSentence.addAll(tmpList);
  279 + }
  280 + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) {
  281 + tmpList = processCRMSentence(functionOutput.name,
  282 + functionOutput.value, currentOffset, functionOutputList,
  283 + unknownAncestors, currentList, updateList, idPositions,
  284 + idOffsets, previousSentence, previousClause);
  285 + if (tmpList != null) {
  286 + newPreviousSentence.addAll(tmpList);
  287 + }
  288 + }
  289 + }
  290 + if (newPreviousSentence.size() > 0) {
  291 + previousSentence.clear();
  292 + previousSentence.addAll(newPreviousSentence);
  293 + }
  294 + newPreviousClause.clear();
  295 + for (int i = 4; i < 8; i++) {
  296 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  297 + HashSet<MtasParserObject> tmpList = processCRMClause(
  298 + String.valueOf(i), matcherRegular.group((i + 1)), currentOffset,
  299 + functionOutputList, unknownAncestors, currentList, updateList,
  300 + idPositions, idOffsets, previousClause);
  301 + if (tmpList != null) {
  302 + newPreviousClause.addAll(tmpList);
  303 + }
  304 + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) {
  305 + tmpList = processCRMClause(functionOutput.name,
  306 + functionOutput.value, currentOffset, functionOutputList,
  307 + unknownAncestors, currentList, updateList, idPositions,
  308 + idOffsets, previousClause);
  309 + if (tmpList != null) {
  310 + newPreviousClause.addAll(tmpList);
  311 + }
  312 + }
  313 + }
  314 + if (newPreviousClause.size() > 0) {
  315 + previousClause.clear();
  316 + previousClause.addAll(newPreviousClause);
  317 + }
  318 + }
  319 +
  320 + if (matcherRegular.matches() && !matcherHeader.matches()) {
  321 + matcherRegular = regularPattern.matcher(line.trim());
  322 + if (matcherRegular.matches()) {
  323 + // regular line - start word
  324 + currentObject = new MtasParserObject(wordType);
  325 + currentObject.setOffsetStart(previousOffset);
  326 + currentObject.setRealOffsetStart(previousOffset);
  327 + currentObject.setUnknownAncestorNumber(unknownAncestors);
  328 + if (!prevalidateObject(currentObject, currentList)) {
  329 + unknownAncestors++;
  330 + } else {
  331 + int p = position.getAndIncrement();
  332 + currentObject.addPosition(p);
  333 + currentObject.objectId = "word_" + String.valueOf(p);
  334 + currentList.get(MAPPING_TYPE_WORD).add(currentObject);
  335 + unknownAncestors = 0;
  336 + // check for crmPair
  337 + for (int i = 0; i < 8; i++) {
  338 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  339 + processCRMPair(p, String.valueOf(i),
  340 + matcherRegular.group((i + 1)), currentOffset,
  341 + functionOutputList, unknownAncestors, currentList,
  342 + updateList, idPositions, idOffsets);
  343 + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) {
  344 + processCRMPair(p, functionOutput.name, functionOutput.value,
  345 + currentOffset, functionOutputList, unknownAncestors,
  346 + currentList, updateList, idPositions, idOffsets);
  347 + }
  348 + }
  349 + // compute word annotations
  350 + for (int i = 0; i < 8; i++) {
  351 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  352 + functionOutputList.addAll(processWordAnnotation(
  353 + String.valueOf(i), matcherRegular.group((i + 1)),
  354 + previousOffset, currentOffset, unknownAncestors,
  355 + currentList, updateList, idPositions, idOffsets));
  356 + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) {
  357 + processWordAnnotation(functionOutput.name,
  358 + functionOutput.value, previousOffset, currentOffset,
  359 + unknownAncestors, currentList, updateList, idPositions,
  360 + idOffsets);
  361 + }
  362 + }
  363 + }
  364 + // finish word
  365 + if (unknownAncestors > 0) {
  366 + unknownAncestors--;
  367 + } else {
  368 + currentObject = currentList.get(MAPPING_TYPE_WORD)
  369 + .remove(currentList.get(MAPPING_TYPE_WORD).size() - 1);
  370 + assert unknownAncestors == 0 : "error in administration "
  371 + + currentObject.getType().getName();
  372 + currentObject.setText(null);
  373 + currentObject.setOffsetEnd(currentOffset - 1);
  374 + currentObject.setRealOffsetEnd(currentOffset - 1);
  375 + // update ancestor groups with position and offset
  376 + for (MtasParserObject currentGroup : currentList
  377 + .get(MAPPING_TYPE_GROUP)) {
  378 + currentGroup.addPositions(currentObject.getPositions());
  379 + currentGroup.addOffsetStart(currentObject.getOffsetStart());
  380 + currentGroup.addOffsetEnd(currentObject.getOffsetEnd());
  381 + }
  382 + idPositions.put(currentObject.getId(),
  383 + currentObject.getPositions());
  384 + idOffsets.put(currentObject.getId(), currentObject.getOffset());
  385 + currentObject.updateMappings(idPositions, idOffsets);
  386 + unknownAncestors = currentObject.getUnknownAncestorNumber();
  387 + computeMappingsFromObject(currentObject, currentList, updateList);
  388 + }
  389 +
  390 + } else {
  391 + // System.out.println("PROBLEM: " + line);
  392 + }
  393 + }
  394 + previousOffset = br.getPosition();
  395 + }
  396 + closePrevious(previousSentence, previousOffset, unknownAncestors,
  397 + currentList, updateList, idPositions, idOffsets);
  398 + closePrevious(previousClause, previousOffset, unknownAncestors,
  399 + currentList, updateList, idPositions, idOffsets);
  400 + } catch (IOException e) {
  401 + throw new MtasParserException(e.getMessage());
  402 + }
  403 + // final check
  404 + tokenCollection.check(autorepair, makeunique);
  405 + return tokenCollection;
  406 +
  407 + }
  408 +
  409 + /**
  410 + * Process word annotation.
  411 + *
  412 + * @param name
  413 + * the name
  414 + * @param text
  415 + * the text
  416 + * @param previousOffset
  417 + * the previous offset
  418 + * @param currentOffset
  419 + * the current offset
  420 + * @param unknownAncestors
  421 + * the unknown ancestors
  422 + * @param currentList
  423 + * the current list
  424 + * @param updateList
  425 + * the update list
  426 + * @param idPositions
  427 + * the id positions
  428 + * @param idOffsets
  429 + * the id offsets
  430 + * @return the array list
  431 + * @throws MtasParserException
  432 + * the mtas parser exception
  433 + * @throws MtasConfigException
  434 + * the mtas config exception
  435 + */
  436 + private ArrayList<MtasCRMParserFunctionOutput> processWordAnnotation(
  437 + String name, String text, Integer previousOffset, Integer currentOffset,
  438 + Integer unknownAncestors,
  439 + HashMap<String, ArrayList<MtasParserObject>> currentList,
  440 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList,
  441 + HashMap<String, TreeSet<Integer>> idPositions,
  442 + HashMap<String, Integer[]> idOffsets)
  443 + throws MtasParserException, MtasConfigException {
  444 + MtasParserType tmpCurrentType;
  445 + MtasParserObject currentObject;
  446 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  447 + if ((tmpCurrentType = wordAnnotationTypes.get(name)) != null) {
  448 + // start word annotation
  449 + currentObject = new MtasParserObject(tmpCurrentType);
  450 + currentObject.setRealOffsetStart(previousOffset);
  451 + currentObject.addPositions(currentList.get(MAPPING_TYPE_WORD)
  452 + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1)).getPositions());
  453 + currentObject.setUnknownAncestorNumber(unknownAncestors);
  454 + if (!prevalidateObject(currentObject, currentList)) {
  455 + unknownAncestors++;
  456 + } else {
  457 + currentList.get(MAPPING_TYPE_WORD_ANNOTATION).add(currentObject);
  458 + unknownAncestors = 0;
  459 + }
  460 + // finish word annotation
  461 + if (unknownAncestors > 0) {
  462 + unknownAncestors--;
  463 + } else {
  464 + currentObject = currentList.get(MAPPING_TYPE_WORD_ANNOTATION)
  465 + .remove(currentList.get(MAPPING_TYPE_WORD_ANNOTATION).size() - 1);
  466 + assert unknownAncestors == 0 : "error in administration "
  467 + + currentObject.getType().getName();
  468 + if (functions.containsKey(MAPPING_TYPE_WORD_ANNOTATION)
  469 + && functions.get(MAPPING_TYPE_WORD_ANNOTATION).containsKey(name)
  470 + && text != null) {
  471 + MtasCRMParserFunction function = functions
  472 + .get(MAPPING_TYPE_WORD_ANNOTATION).get(name);
  473 + String[] value;
  474 + if (function.split != null) {
  475 + value = text.split(Pattern.quote(function.split));
  476 + } else {
  477 + value = new String[] { text };
  478 + }
  479 + for (int c = 0; c < value.length; c++) {
  480 + if (function.output.containsKey(value[c])) {
  481 + functionOutputList.addAll(function.output.get(value[c]));
  482 + }
  483 + }
  484 + }
  485 + currentObject.setText(text);
  486 + currentObject.setRealOffsetEnd(currentOffset - 1);
  487 + idPositions.put(currentObject.getId(), currentObject.getPositions());
  488 + idOffsets.put(currentObject.getId(), currentObject.getOffset());
  489 + // offset always null, so update later with word (should be possible)
  490 + if ((currentObject.getId() != null)
  491 + && (currentList.get(MAPPING_TYPE_WORD).size() > 0)) {
  492 + currentList.get(MAPPING_TYPE_WORD)
  493 + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1))
  494 + .addUpdateableIdWithOffset(currentObject.getId());
  495 + }
  496 + currentObject.updateMappings(idPositions, idOffsets);
  497 + unknownAncestors = currentObject.getUnknownAncestorNumber();
  498 + computeMappingsFromObject(currentObject, currentList, updateList);
  499 + }
  500 + }
  501 + return functionOutputList;
  502 + }
  503 +
  504 + /**
  505 + * Process crm sentence.
  506 + *
  507 + * @param name
  508 + * the name
  509 + * @param text
  510 + * the text
  511 + * @param currentOffset
  512 + * the current offset
  513 + * @param functionOutputList
  514 + * the function output list
  515 + * @param unknownAncestors
  516 + * the unknown ancestors
  517 + * @param currentList
  518 + * the current list
  519 + * @param updateList
  520 + * the update list
  521 + * @param idPositions
  522 + * the id positions
  523 + * @param idOffsets
  524 + * the id offsets
  525 + * @param previous
  526 + * the previous
  527 + * @param previousClause
  528 + * the previous clause
  529 + * @return the hash set
  530 + * @throws MtasParserException
  531 + * the mtas parser exception
  532 + * @throws MtasConfigException
  533 + * the mtas config exception
  534 + */
  535 + private HashSet<MtasParserObject> processCRMSentence(String name, String text,
  536 + Integer currentOffset,
  537 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList,
  538 + Integer unknownAncestors,
  539 + HashMap<String, ArrayList<MtasParserObject>> currentList,
  540 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList,
  541 + HashMap<String, TreeSet<Integer>> idPositions,
  542 + HashMap<String, Integer[]> idOffsets, HashSet<MtasParserObject> previous,
  543 + HashSet<MtasParserObject> previousClause)
  544 + throws MtasParserException, MtasConfigException {
  545 + MtasParserType tmpCurrentType;
  546 + MtasParserObject currentObject;
  547 + if ((tmpCurrentType = crmSentenceTypes.get(name)) != null) {
  548 + currentObject = new MtasParserObject(tmpCurrentType);
  549 + currentObject.setUnknownAncestorNumber(unknownAncestors);
  550 + currentObject.setRealOffsetStart(currentOffset);
  551 + currentObject.setText(text);
  552 + if (!prevalidateObject(currentObject, currentList)) {
  553 + return null;
  554 + } else {
  555 + closePrevious(previousClause, currentOffset, unknownAncestors,
  556 + currentList, updateList, idPositions, idOffsets);
  557 + closePrevious(previous, currentOffset, unknownAncestors, currentList,
  558 + updateList, idPositions, idOffsets);
  559 + previous.clear();
  560 + currentList.get(MAPPING_TYPE_GROUP).add(currentObject);
  561 + unknownAncestors = 0;
  562 + return new HashSet<MtasParserObject>(Arrays.asList(currentObject));
  563 + }
  564 + }
  565 + return null;
  566 + }
  567 +
  568 + /**
  569 + * Process crm clause.
  570 + *
  571 + * @param name
  572 + * the name
  573 + * @param text
  574 + * the text
  575 + * @param currentOffset
  576 + * the current offset
  577 + * @param functionOutputList
  578 + * the function output list
  579 + * @param unknownAncestors
  580 + * the unknown ancestors
  581 + * @param currentList
  582 + * the current list
  583 + * @param updateList
  584 + * the update list
  585 + * @param idPositions
  586 + * the id positions
  587 + * @param idOffsets
  588 + * the id offsets
  589 + * @param previous
  590 + * the previous
  591 + * @return the hash set
  592 + * @throws MtasParserException
  593 + * the mtas parser exception
  594 + * @throws MtasConfigException
  595 + * the mtas config exception
  596 + */
  597 + private HashSet<MtasParserObject> processCRMClause(String name, String text,
  598 + Integer currentOffset,
  599 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList,
  600 + Integer unknownAncestors,
  601 + HashMap<String, ArrayList<MtasParserObject>> currentList,
  602 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList,
  603 + HashMap<String, TreeSet<Integer>> idPositions,
  604 + HashMap<String, Integer[]> idOffsets, HashSet<MtasParserObject> previous)
  605 + throws MtasParserException, MtasConfigException {
  606 + MtasParserType tmpCurrentType;
  607 + MtasParserObject currentObject;
  608 + if ((tmpCurrentType = crmClauseTypes.get(name)) != null) {
  609 + currentObject = new MtasParserObject(tmpCurrentType);
  610 + currentObject.setUnknownAncestorNumber(unknownAncestors);
  611 + currentObject.setRealOffsetStart(currentOffset);
  612 + currentObject.setText(text);
  613 + if (!prevalidateObject(currentObject, currentList)) {
  614 + return null;
  615 + } else {
  616 + closePrevious(previous, currentOffset, unknownAncestors, currentList,
  617 + updateList, idPositions, idOffsets);
  618 + previous.clear();
  619 + currentList.get(MAPPING_TYPE_GROUP).add(currentObject);
  620 + unknownAncestors = 0;
  621 + return new HashSet<MtasParserObject>(Arrays.asList(currentObject));
  622 + }
  623 + }
  624 + return null;
  625 + }
  626 +
  627 + /**
  628 + * Close previous.
  629 + *
  630 + * @param previous
  631 + * the previous
  632 + * @param currentOffset
  633 + * the current offset
  634 + * @param unknownAncestors
  635 + * the unknown ancestors
  636 + * @param currentList
  637 + * the current list
  638 + * @param updateList
  639 + * the update list
  640 + * @param idPositions
  641 + * the id positions
  642 + * @param idOffsets
  643 + * the id offsets
  644 + * @throws MtasParserException
  645 + * the mtas parser exception
  646 + * @throws MtasConfigException
  647 + * the mtas config exception
  648 + */
  649 + private void closePrevious(HashSet<MtasParserObject> previous,
  650 + Integer currentOffset, Integer unknownAncestors,
  651 + HashMap<String, ArrayList<MtasParserObject>> currentList,
  652 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList,
  653 + HashMap<String, TreeSet<Integer>> idPositions,
  654 + HashMap<String, Integer[]> idOffsets)
  655 + throws MtasParserException, MtasConfigException {
  656 + for (MtasParserObject previousObject : previous) {
  657 + previousObject.setRealOffsetEnd(currentOffset);
  658 + idPositions.put(previousObject.getId(), previousObject.getPositions());
  659 + idOffsets.put(previousObject.getId(), previousObject.getOffset());
  660 + previousObject.updateMappings(idPositions, idOffsets);
  661 + unknownAncestors = previousObject.getUnknownAncestorNumber();
  662 + computeMappingsFromObject(previousObject, currentList, updateList);
  663 + currentList.get(MAPPING_TYPE_GROUP).remove(previousObject);
  664 + }
  665 + }
  666 +
  667 + /**
  668 + * Process crm pair.
  669 + *
  670 + * @param position
  671 + * the position
  672 + * @param name
  673 + * the name
  674 + * @param text
  675 + * the text
  676 + * @param currentOffset
  677 + * the current offset
  678 + * @param functionOutputList
  679 + * the function output list
  680 + * @param unknownAncestors
  681 + * the unknown ancestors
  682 + * @param currentList
  683 + * the current list
  684 + * @param updateList
  685 + * the update list
  686 + * @param idPositions
  687 + * the id positions
  688 + * @param idOffsets
  689 + * the id offsets
  690 + * @throws MtasParserException
  691 + * the mtas parser exception
  692 + * @throws MtasConfigException
  693 + * the mtas config exception
  694 + */
  695 + private void processCRMPair(int position, String name, String text,
  696 + Integer currentOffset,
  697 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList,
  698 + Integer unknownAncestors,
  699 + HashMap<String, ArrayList<MtasParserObject>> currentList,
  700 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList,
  701 + HashMap<String, TreeSet<Integer>> idPositions,
  702 + HashMap<String, Integer[]> idOffsets)
  703 + throws MtasParserException, MtasConfigException {
  704 +
  705 + MtasParserType tmpCurrentType;
  706 + MtasParserObject currentObject;
  707 +
  708 + if ((tmpCurrentType = crmPairTypes.get(name)) != null) {
  709 + if ((tmpCurrentType = crmPairTypes.get(name)) != null) {
  710 + // get history
  711 + HashMap<String, MtasParserObject> currentNamePairHistory;
  712 + if (!historyPair.containsKey(name)) {
  713 + currentNamePairHistory = new HashMap<String, MtasParserObject>();
  714 + historyPair.put(name, currentNamePairHistory);
  715 + } else {
  716 + currentNamePairHistory = historyPair.get(name);
  717 + }
  718 + Matcher m = pairPattern.matcher(text);
  719 + if (m.find()) {
  720 + String thisKey = m.group(1) + m.group(2);
  721 + String otherKey = (m.group(1).equals("b") ? "e" : "b") + m.group(2);
  722 + if (currentNamePairHistory.containsKey(otherKey)) {
  723 + currentObject = currentNamePairHistory.remove(otherKey);
  724 + currentObject.setText(currentObject.getText() + "+" + text);
  725 + currentObject.addPosition(position);
  726 + processFunctions(name, text, MAPPING_TYPE_CRM_PAIR,
  727 + functionOutputList);
  728 + currentObject.setRealOffsetEnd(currentOffset + 1);
  729 + currentObject.setOffsetEnd(currentOffset + 1);
  730 + idPositions.put(currentObject.getId(),
  731 + currentObject.getPositions());
  732 + idOffsets.put(currentObject.getId(), currentObject.getOffset());
  733 + currentObject.updateMappings(idPositions, idOffsets);
  734 + unknownAncestors = currentObject.getUnknownAncestorNumber();
  735 + computeMappingsFromObject(currentObject, currentList, updateList);
  736 + } else {
  737 + currentObject = new MtasParserObject(tmpCurrentType);
  738 + currentObject.setUnknownAncestorNumber(unknownAncestors);
  739 + currentObject.setRealOffsetStart(currentOffset);
  740 + currentObject.setOffsetStart(currentOffset);
  741 + currentObject.setText(text);
  742 + currentObject.addPosition(position);
  743 + if (!prevalidateObject(currentObject, currentList)) {
  744 + unknownAncestors++;
  745 + } else {
  746 + currentNamePairHistory.put(thisKey, currentObject);
  747 + processFunctions(name, text, MAPPING_TYPE_CRM_PAIR,
  748 + functionOutputList);
  749 + currentObject.setRealOffsetEnd(currentOffset + 1);
  750 + currentObject.setOffsetEnd(currentOffset + 1);
  751 + idPositions.put(currentObject.getId(),
  752 + currentObject.getPositions());
  753 + idOffsets.put(currentObject.getId(), currentObject.getOffset());
  754 + // offset always null, so update later with word (should be
  755 + // possible)
  756 + if ((currentObject.getId() != null)
  757 + && (currentList.get(MAPPING_TYPE_WORD).size() > 0)) {
  758 + currentList.get(MAPPING_TYPE_WORD)
  759 + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1))
  760 + .addUpdateableIdWithOffset(currentObject.getId());
  761 + }
  762 +
  763 + }
  764 + }
  765 + }
  766 + }
  767 + }
  768 +
  769 + }
  770 +
  771 + /**
  772 + * Process functions.
  773 + *
  774 + * @param name
  775 + * the name
  776 + * @param text
  777 + * the text
  778 + * @param type
  779 + * the type
  780 + * @param functionOutputList
  781 + * the function output list
  782 + */
  783 + private void processFunctions(String name, String text, String type,
  784 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList) {
  785 + if (functions.containsKey(type) && functions.get(type).containsKey(name)
  786 + && text != null) {
  787 + if (functions.get(type).containsKey(name)) {
  788 + MtasCRMParserFunction function = functions.get(type).get(name);
  789 + String[] value;
  790 + if (function.split != null) {
  791 + value = text.split(Pattern.quote(function.split));
  792 + } else {
  793 + value = new String[] { text };
  794 + }
  795 + for (int c = 0; c < value.length; c++) {
  796 + boolean checkedEmpty = false;
  797 + if (value[c].equals("")) {
  798 + checkedEmpty = true;
  799 + }
  800 + if (function.output.containsKey(value[c])) {
  801 + ArrayList<MtasCRMParserFunctionOutput> list = function.output
  802 + .get(value[c]);
  803 + for (MtasCRMParserFunctionOutput listItem : list) {
  804 + functionOutputList.add(listItem.create(value[c]));
  805 + }
  806 + }
  807 + if (!checkedEmpty && function.output.containsKey("")) {
  808 + ArrayList<MtasCRMParserFunctionOutput> list = function.output
  809 + .get("");
  810 + for (MtasCRMParserFunctionOutput listItem : list) {
  811 + functionOutputList.add(listItem.create(value[c]));
  812 + }
  813 + }
  814 + }
  815 + }
  816 + }
  817 + }
  818 +
  819 + /*
  820 + * (non-Javadoc)
  821 + *
  822 + * @see mtas.analysis.parser.MtasParser#printConfig()
  823 + */
  824 + @Override
  825 + public String printConfig() {
  826 + String text = "";
  827 + text += "=== CONFIGURATION ===\n";
  828 + text += "type: " + wordAnnotationTypes.size() + " x wordAnnotation";
  829 + text += printConfigTypes(wordAnnotationTypes);
  830 + text += "=== CONFIGURATION ===\n";
  831 + return text;
  832 + }
  833 +
  834 + /**
  835 + * Prints the config types.
  836 + *
  837 + * @param types
  838 + * the types
  839 + * @return the string
  840 + */
  841 + private String printConfigTypes(HashMap<?, MtasParserType> types) {
  842 + String text = "";
  843 + for (Entry<?, MtasParserType> entry : types.entrySet()) {
  844 + text += "- " + entry.getKey() + ": " + entry.getValue().mappings.size()
  845 + + " mapping(s)\n";
  846 + for (int i = 0; i < entry.getValue().mappings.size(); i++) {
  847 + text += "\t" + entry.getValue().mappings.get(i) + "\n";
  848 + }
  849 + }
  850 + return text;
  851 + }
  852 +
  853 + /**
  854 + * The Class MtasCRMParserFunction.
  855 + */
  856 + private class MtasCRMParserFunction {
  857 +
  858 + /** The split. */
  859 + public String split;
  860 +
  861 + /** The output. */
  862 + public HashMap<String, ArrayList<MtasCRMParserFunctionOutput>> output;
  863 +
  864 + /**
  865 + * Instantiates a new mtas crm parser function.
  866 + *
  867 + * @param type
  868 + * the type
  869 + * @param split
  870 + * the split
  871 + */
  872 + public MtasCRMParserFunction(String type, String split) {
  873 + this.split = split;
  874 + output = new HashMap<String, ArrayList<MtasCRMParserFunctionOutput>>();
  875 + }
  876 +
  877 + }
  878 +
  879 + /**
  880 + * The Class MtasCRMParserFunctionOutput.
  881 + */
  882 + private class MtasCRMParserFunctionOutput {
  883 +
  884 + /** The name. */
  885 + public String name;
  886 +
  887 + /** The value. */
  888 + public String value;
  889 +
  890 + /**
  891 + * Instantiates a new mtas crm parser function output.
  892 + *
  893 + * @param name
  894 + * the name
  895 + * @param value
  896 + * the value
  897 + */
  898 + public MtasCRMParserFunctionOutput(String name, String value) {
  899 + this.name = name;
  900 + this.value = value;
  901 + }
  902 +
  903 + /**
  904 + * Creates the.
  905 + *
  906 + * @param originalValue
  907 + * the original value
  908 + * @return the mtas crm parser function output
  909 + */
  910 + public MtasCRMParserFunctionOutput create(String originalValue) {
  911 + if (value != null) {
  912 + return this;
  913 + } else {
  914 + return new MtasCRMParserFunctionOutput(name, originalValue);
  915 + }
  916 + }
  917 +
  918 + /*
  919 + * (non-Javadoc)
  920 + *
  921 + * @see java.lang.Object#toString()
  922 + */
  923 + @Override
  924 + public String toString() {
  925 + return "MtasCRMParserFunctionOutput[" + name + "," + value + "]";
  926 + }
  927 + }
  928 +
  929 + /**
  930 + * The Class MtasCRMParserMappingWordAnnotation.
  931 + */
  932 + private class MtasCRMParserMappingWordAnnotation
  933 + extends MtasParserMapping<MtasCRMParserMappingWordAnnotation> {
  934 +
  935 + /**
  936 + * Instantiates a new mtas crm parser mapping word annotation.
  937 + */
  938 + public MtasCRMParserMappingWordAnnotation() {
  939 + super();
  940 + this.position = SOURCE_OWN;
  941 + this.realOffset = SOURCE_OWN;
  942 + this.offset = SOURCE_ANCESTOR_WORD;
  943 + this.type = MAPPING_TYPE_WORD_ANNOTATION;
  944 + }
  945 +
  946 + /*
  947 + * (non-Javadoc)
  948 + *
  949 + * @see mtas.analysis.parser.MtasParser.MtasParserMapping#self()
  950 + */
  951 + @Override
  952 + protected MtasCRMParserMappingWordAnnotation self() {
  953 + return this;
  954 + }
  955 + }
  956 +
  957 + /**
  958 + * The Class MtasCRMParserMappingCRMSentence.
  959 + */
  960 + private class MtasCRMParserMappingCRMSentence
  961 + extends MtasParserMapping<MtasCRMParserMappingCRMSentence> {
  962 +
  963 + /**
  964 + * Instantiates a new mtas crm parser mapping crm sentence.
  965 + */
  966 + public MtasCRMParserMappingCRMSentence() {
  967 + super();
  968 + this.position = SOURCE_OWN;
  969 + this.realOffset = SOURCE_OWN;
  970 + this.offset = SOURCE_OWN;
  971 + this.type = MAPPING_TYPE_GROUP;
  972 + }
  973 +
  974 + /*
  975 + * (non-Javadoc)
  976 + *
  977 + * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self()
  978 + */
  979 + @Override
  980 + protected MtasCRMParserMappingCRMSentence self() {
  981 + return this;
  982 + }
  983 + }
  984 +
  985 + /**
  986 + * The Class MtasCRMParserMappingCRMPair.
  987 + */
  988 + private class MtasCRMParserMappingCRMPair
  989 + extends MtasParserMapping<MtasCRMParserMappingCRMPair> {
  990 +
  991 + /**
  992 + * Instantiates a new mtas crm parser mapping crm pair.
  993 + */
  994 + public MtasCRMParserMappingCRMPair() {
  995 + super();
  996 + this.position = SOURCE_OWN;
  997 + this.realOffset = SOURCE_OWN;
  998 + this.offset = SOURCE_OWN;
  999 + this.type = MAPPING_TYPE_RELATION;
  1000 + }
  1001 +
  1002 + /*
  1003 + * (non-Javadoc)
  1004 + *
  1005 + * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self()
  1006 + */
  1007 + @Override
  1008 + protected MtasCRMParserMappingCRMPair self() {
  1009 + return this;
  1010 + }
  1011 + }
  1012 +
  1013 +}
src/mtas/analysis/parser/MtasElanParser.java
@@ -17,10 +17,12 @@ final public class MtasElanParser extends MtasXMLParser { @@ -17,10 +17,12 @@ final public class MtasElanParser extends MtasXMLParser {
17 * @param config the config 17 * @param config the config
18 */ 18 */
19 public MtasElanParser(MtasConfiguration config) { 19 public MtasElanParser(MtasConfiguration config) {
20 - super(config); 20 + super(config);
21 } 21 }
22 -  
23 - /* (non-Javadoc) 22 +
  23 + /*
  24 + * (non-Javadoc)
  25 + *
24 * @see mtas.analysis.parser.MtasXMLParser#initParser() 26 * @see mtas.analysis.parser.MtasXMLParser#initParser()
25 */ 27 */
26 @Override 28 @Override
src/mtas/analysis/parser/MtasFoliaParser.java
@@ -17,10 +17,12 @@ final public class MtasFoliaParser extends MtasXMLParser { @@ -17,10 +17,12 @@ final public class MtasFoliaParser extends MtasXMLParser {
17 * @param config the config 17 * @param config the config
18 */ 18 */
19 public MtasFoliaParser(MtasConfiguration config) { 19 public MtasFoliaParser(MtasConfiguration config) {
20 - super(config); 20 + super(config);
21 } 21 }
22 -  
23 - /* (non-Javadoc) 22 +
  23 + /*
  24 + * (non-Javadoc)
  25 + *
24 * @see mtas.analysis.parser.MtasXMLParser#initParser() 26 * @see mtas.analysis.parser.MtasXMLParser#initParser()
25 */ 27 */
26 @Override 28 @Override
src/mtas/analysis/parser/MtasParser.java
@@ -15,16 +15,19 @@ import mtas.analysis.util.MtasParserException; @@ -15,16 +15,19 @@ import mtas.analysis.util.MtasParserException;
15 * The Class MtasParser. 15 * The Class MtasParser.
16 */ 16 */
17 abstract public class MtasParser { 17 abstract public class MtasParser {
18 - 18 +
19 /** The token collection. */ 19 /** The token collection. */
20 protected MtasTokenCollection tokenCollection; 20 protected MtasTokenCollection tokenCollection;
21 - 21 +
22 /** The config. */ 22 /** The config. */
23 protected MtasConfiguration config; 23 protected MtasConfiguration config;
24 24
25 /** The autorepair. */ 25 /** The autorepair. */
26 protected Boolean autorepair = false; 26 protected Boolean autorepair = false;
27 - 27 +
  28 + /** The makeunique. */
  29 + protected Boolean makeunique = false;
  30 +
28 /** 31 /**
29 * Inits the parser. 32 * Inits the parser.
30 * 33 *
@@ -38,10 +41,13 @@ abstract public class MtasParser { @@ -38,10 +41,13 @@ abstract public class MtasParser {
38 if (current.name.equals("autorepair")) { 41 if (current.name.equals("autorepair")) {
39 autorepair = current.attributes.get("value").equals("true"); 42 autorepair = current.attributes.get("value").equals("true");
40 } 43 }
  44 + if (current.name.equals("makeunique")) {
  45 + makeunique = current.attributes.get("value").equals("true");
  46 + }
41 } 47 }
42 } 48 }
43 } 49 }
44 - 50 +
45 /** 51 /**
46 * Creates the token collection. 52 * Creates the token collection.
47 * 53 *
@@ -59,7 +65,7 @@ abstract public class MtasParser { @@ -59,7 +65,7 @@ abstract public class MtasParser {
59 * @return the string 65 * @return the string
60 */ 66 */
61 public abstract String printConfig(); 67 public abstract String printConfig();
62 - 68 +
63 /** 69 /**
64 * The Class MtasParserObject. 70 * The Class MtasParserObject.
65 */ 71 */
@@ -259,17 +265,17 @@ abstract public class MtasParser { @@ -259,17 +265,17 @@ abstract public class MtasParser {
259 public void setText(String text) { 265 public void setText(String text) {
260 objectText = text; 266 objectText = text;
261 } 267 }
262 - 268 +
263 /** 269 /**
264 * Adds the text. 270 * Adds the text.
265 * 271 *
266 * @param text the text 272 * @param text the text
267 */ 273 */
268 public void addText(String text) { 274 public void addText(String text) {
269 - if(objectText==null) { 275 + if (objectText == null) {
270 objectText = text; 276 objectText = text;
271 } else { 277 } else {
272 - objectText+=text; 278 + objectText += text;
273 } 279 }
274 } 280 }
275 281
src/mtas/analysis/parser/MtasSketchParser.java
@@ -53,7 +53,9 @@ final public class MtasSketchParser extends MtasBasicParser { @@ -53,7 +53,9 @@ final public class MtasSketchParser extends MtasBasicParser {
53 } 53 }
54 } 54 }
55 55
56 - /* (non-Javadoc) 56 + /*
  57 + * (non-Javadoc)
  58 + *
57 * @see mtas.analysis.parser.MtasParser#initParser() 59 * @see mtas.analysis.parser.MtasParser#initParser()
58 */ 60 */
59 @Override 61 @Override
@@ -62,7 +64,7 @@ final public class MtasSketchParser extends MtasBasicParser { @@ -62,7 +64,7 @@ final public class MtasSketchParser extends MtasBasicParser {
62 if (config != null) { 64 if (config != null) {
63 65
64 // always word, no mappings 66 // always word, no mappings
65 - wordType = new MtasParserType(MAPPING_TYPE_WORD, null); 67 + wordType = new MtasParserType(MAPPING_TYPE_WORD, null, false);
66 68
67 for (int i = 0; i < config.children.size(); i++) { 69 for (int i = 0; i < config.children.size(); i++) {
68 MtasConfiguration current = config.children.get(i); 70 MtasConfiguration current = config.children.get(i);
@@ -74,7 +76,7 @@ final public class MtasSketchParser extends MtasBasicParser { @@ -74,7 +76,7 @@ final public class MtasSketchParser extends MtasBasicParser {
74 String nameMapping = mapping.attributes.get("name"); 76 String nameMapping = mapping.attributes.get("name");
75 if ((typeMapping != null)) { 77 if ((typeMapping != null)) {
76 if (typeMapping.equals(MAPPING_TYPE_WORD)) { 78 if (typeMapping.equals(MAPPING_TYPE_WORD)) {
77 - MtasSketchParserMappingWordAnnotation m = new MtasSketchParserMappingWordAnnotation(); 79 + MtasSketchParserMappingWord m = new MtasSketchParserMappingWord();
78 m.processConfig(mapping); 80 m.processConfig(mapping);
79 wordType.addMapping(m); 81 wordType.addMapping(m);
80 } else if (typeMapping.equals(MAPPING_TYPE_WORD_ANNOTATION) 82 } else if (typeMapping.equals(MAPPING_TYPE_WORD_ANNOTATION)
@@ -85,7 +87,7 @@ final public class MtasSketchParser extends MtasBasicParser { @@ -85,7 +87,7 @@ final public class MtasSketchParser extends MtasBasicParser {
85 wordAnnotationTypes.get(nameMapping).addMapping(m); 87 wordAnnotationTypes.get(nameMapping).addMapping(m);
86 } else { 88 } else {
87 MtasParserType t = new MtasParserType(typeMapping, 89 MtasParserType t = new MtasParserType(typeMapping,
88 - nameMapping); 90 + nameMapping, false);
89 t.addMapping(m); 91 t.addMapping(m);
90 wordAnnotationTypes.put(Integer.parseInt(nameMapping), t); 92 wordAnnotationTypes.put(Integer.parseInt(nameMapping), t);
91 } 93 }
@@ -97,7 +99,7 @@ final public class MtasSketchParser extends MtasBasicParser { @@ -97,7 +99,7 @@ final public class MtasSketchParser extends MtasBasicParser {
97 groupTypes.get(nameMapping).addMapping(m); 99 groupTypes.get(nameMapping).addMapping(m);
98 } else { 100 } else {
99 MtasParserType t = new MtasParserType(typeMapping, 101 MtasParserType t = new MtasParserType(typeMapping,
100 - nameMapping); 102 + nameMapping, false);
101 t.addMapping(m); 103 t.addMapping(m);
102 groupTypes.put(nameMapping, t); 104 groupTypes.put(nameMapping, t);
103 } 105 }
@@ -113,7 +115,9 @@ final public class MtasSketchParser extends MtasBasicParser { @@ -113,7 +115,9 @@ final public class MtasSketchParser extends MtasBasicParser {
113 } 115 }
114 } 116 }
115 117
116 - /* (non-Javadoc) 118 + /*
  119 + * (non-Javadoc)
  120 + *
117 * @see mtas.analysis.parser.MtasParser#createTokenCollection(java.io.Reader) 121 * @see mtas.analysis.parser.MtasParser#createTokenCollection(java.io.Reader)
118 */ 122 */
119 @Override 123 @Override
@@ -337,11 +341,13 @@ final public class MtasSketchParser extends MtasBasicParser { @@ -337,11 +341,13 @@ final public class MtasSketchParser extends MtasBasicParser {
337 } 341 }
338 } 342 }
339 // final check 343 // final check
340 - tokenCollection.check(autorepair); 344 + tokenCollection.check(autorepair, makeunique);
341 return tokenCollection; 345 return tokenCollection;
342 } 346 }
343 347
344 - /* (non-Javadoc) 348 + /*
  349 + * (non-Javadoc)
  350 + *
345 * @see mtas.analysis.parser.MtasParser#printConfig() 351 * @see mtas.analysis.parser.MtasParser#printConfig()
346 */ 352 */
347 @Override 353 @Override
@@ -373,6 +379,34 @@ final public class MtasSketchParser extends MtasBasicParser { @@ -373,6 +379,34 @@ final public class MtasSketchParser extends MtasBasicParser {
373 } 379 }
374 380
375 /** 381 /**
  382 + * The Class MtasSketchParserMappingWord.
  383 + */
  384 + private class MtasSketchParserMappingWord
  385 + extends MtasParserMapping<MtasSketchParserMappingWord> {
  386 +
  387 + /**
  388 + * Instantiates a new mtas sketch parser mapping word.
  389 + */
  390 + public MtasSketchParserMappingWord() {
  391 + super();
  392 + this.position = SOURCE_OWN;
  393 + this.realOffset = SOURCE_OWN;
  394 + this.offset = SOURCE_OWN;
  395 + this.type = MAPPING_TYPE_WORD;
  396 + }
  397 +
  398 + /*
  399 + * (non-Javadoc)
  400 + *
  401 + * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self()
  402 + */
  403 + @Override
  404 + protected MtasSketchParserMappingWord self() {
  405 + return this;
  406 + }
  407 + }
  408 +
  409 + /**
376 * The Class MtasSketchParserMappingWordAnnotation. 410 * The Class MtasSketchParserMappingWordAnnotation.
377 */ 411 */
378 private class MtasSketchParserMappingWordAnnotation 412 private class MtasSketchParserMappingWordAnnotation
src/mtas/analysis/parser/MtasTEIParser.java
@@ -17,10 +17,12 @@ final public class MtasTEIParser extends MtasXMLParser { @@ -17,10 +17,12 @@ final public class MtasTEIParser extends MtasXMLParser {
17 * @param config the config 17 * @param config the config
18 */ 18 */
19 public MtasTEIParser(MtasConfiguration config) { 19 public MtasTEIParser(MtasConfiguration config) {
20 - super(config); 20 + super(config);
21 } 21 }
22 -  
23 - /* (non-Javadoc) 22 +
  23 + /*
  24 + * (non-Javadoc)
  25 + *
24 * @see mtas.analysis.parser.MtasXMLParser#initParser() 26 * @see mtas.analysis.parser.MtasXMLParser#initParser()
25 */ 27 */
26 @Override 28 @Override