Commit ca49b0d7351ce75af35d7b81dc865cb9236340d0

Authored by Matthijs Brouwer
1 parent 229cfad6

update

Showing 43 changed files with 3465 additions and 1876 deletions

Too many changes to show.

To preserve performance only 33 of 43 files are displayed.

conf/parser/mtas.xml
... ... @@ -2,11 +2,13 @@
2 2 <mtas>
3 3 <configurations type="mtas.analysis.util.MtasTokenizerFactory">
4 4 <configuration name="DBNL" file="mtas/folia_dbnl.xml" />
  5 + <configuration name="DDD" file="mtas/folia_ddd.xml" />
5 6 <configuration name="EDBO" file="mtas/folia_edbo.xml" />
6 7 <configuration name="SONAR" file="mtas/folia_sonar.xml" />
7 8 </configurations>
8 9 <configurations type="mtas.analysis.util.MtasCharFilterFactory">
9 10 <configuration name="DBNL" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
  11 + <configuration name="DDD" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
10 12 <configuration name="EDBO" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
11 13 <configuration name="SONAR" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
12 14 </configurations>
... ...
conf/parser/mtas/crm_test.xml 0 → 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<mtas>
  3 +
  4 + <!-- START MTAS INDEX CONFIGURATION -->
  5 + <index>
  6 + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
  7 + <payload index="false" />
  8 + <offset index="false" />
  9 + <realoffset index="false" />
  10 + <parent index="true" />
  11 + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
  12 + </index>
  13 + <!-- END MTAS INDEX CONFIGURATION -->
  14 +
  15 +
  16 +
  17 + <!-- START CONFIGURATION MTAS FOLIA PARSER -->
  18 + <parser name="mtas.analysis.parser.MtasCRMParser">
  19 +
  20 + <mappings>
  21 +
  22 + <mapping type="word">
  23 + </mapping>
  24 +
  25 + <mapping type="wordAnnotation" name="2">
  26 + <token type="string" offset="false" parent="false">
  27 + <pre>
  28 + <item type="string" value="t" />
  29 + </pre>
  30 + <post>
  31 + <item type="text" />
  32 + </post>
  33 + </token>
  34 + </mapping>
  35 + <mapping type="wordAnnotation" name="3">
  36 + <token type="string" offset="false" parent="false">
  37 + <pre>
  38 + <item type="string" value="lemma" />
  39 + </pre>
  40 + <post>
  41 + <item type="text" />
  42 + </post>
  43 + </token>
  44 + </mapping>
  45 + <mapping type="wordAnnotation" name="4">
  46 + </mapping>
  47 + <mapping type="wordAnnotation" name="5">
  48 + </mapping>
  49 + <mapping type="wordAnnotation" name="6">
  50 + </mapping>
  51 + <mapping type="wordAnnotation" name="7">
  52 + <token type="string" offset="false" parent="false">
  53 + <pre>
  54 + <item type="string" value="sentence" />
  55 + </pre>
  56 + <post>
  57 + <item type="text" />
  58 + </post>
  59 + </token>
  60 + <condition>
  61 + <item type="text" not="true" condition="-" />
  62 + </condition>
  63 + </mapping>
  64 + <mapping type="wordAnnotation" name="pos">
  65 + <token type="string" offset="false" parent="false">
  66 + <pre>
  67 + <item type="name" />
  68 + </pre>
  69 + <post>
  70 + <item type="text" />
  71 + </post>
  72 + </token>
  73 + </mapping>
  74 + <mapping type="wordAnnotation" name="feat.getal">
  75 + <token type="string" offset="false" parent="false">
  76 + <pre>
  77 + <item type="name" />
  78 + </pre>
  79 + <post>
  80 + <item type="text" />
  81 + </post>
  82 + </token>
  83 + </mapping>
  84 + <mapping type="wordAnnotation" name="feat.persoon">
  85 + <token type="string" offset="false" parent="false">
  86 + <pre>
  87 + <item type="name" />
  88 + </pre>
  89 + <post>
  90 + <item type="text" />
  91 + </post>
  92 + </token>
  93 + </mapping>
  94 + <mapping type="wordAnnotation" name="feat.ntype">
  95 + <token type="string" offset="false" parent="false">
  96 + <pre>
  97 + <item type="name" />
  98 + </pre>
  99 + <post>
  100 + <item type="text" />
  101 + </post>
  102 + </token>
  103 + </mapping>
  104 + <mapping type="wordAnnotation" name="feat.pvtijd">
  105 + <token type="string" offset="false" parent="false">
  106 + <pre>
  107 + <item type="name" />
  108 + </pre>
  109 + <post>
  110 + <item type="text" />
  111 + </post>
  112 + </token>
  113 + </mapping>
  114 + <mapping type="wordAnnotation" name="feat.wvorm">
  115 + <token type="string" offset="false" parent="false">
  116 + <pre>
  117 + <item type="name" />
  118 + </pre>
  119 + <post>
  120 + <item type="text" />
  121 + </post>
  122 + </token>
  123 + </mapping>
  124 + <mapping type="wordAnnotation" name="feat.numtype">
  125 + <token type="string" offset="false" parent="false">
  126 + <pre>
  127 + <item type="name" />
  128 + </pre>
  129 + <post>
  130 + <item type="text" />
  131 + </post>
  132 + </token>
  133 + </mapping>
  134 + <mapping type="wordAnnotation" name="feat.vwtype">
  135 + <token type="string" offset="false" parent="false">
  136 + <pre>
  137 + <item type="name" />
  138 + </pre>
  139 + <post>
  140 + <item type="text" />
  141 + </post>
  142 + </token>
  143 + </mapping>
  144 + <mapping type="wordAnnotation" name="feat.lwtype">
  145 + <token type="string" offset="false" parent="false">
  146 + <pre>
  147 + <item type="name" />
  148 + </pre>
  149 + <post>
  150 + <item type="text" />
  151 + </post>
  152 + </token>
  153 + </mapping>
  154 + <mapping type="wordAnnotation" name="feat.probleemgeval">
  155 + <token type="string" offset="false" parent="false">
  156 + <pre>
  157 + <item type="name" />
  158 + </pre>
  159 + <post>
  160 + <item type="text" />
  161 + </post>
  162 + </token>
  163 + </mapping>
  164 + </mappings>
  165 +
  166 + <functions>
  167 + <function name="4" split="+">
  168 + <condition value="000,001,002,003,004,005,006,009">
  169 + <output name="pos" value="N" />
  170 + <output name="feat.getal" value="ev" />
  171 + </condition>
  172 + <condition value="010,011,012,013,014,015,016,019">
  173 + <output name="pos" value="N" />
  174 + <output name="feat.getal" value="mv" />
  175 + </condition>
  176 + <condition value="020,021,022,023,024,025,026,029">
  177 + <output name="pos" value="N" />
  178 + <output name="feat.ntype" value="eigen" />
  179 + </condition>
  180 + <condition value="090,091,092,093,094,095,096,099">
  181 + <output name="pos" value="N" />
  182 + <output name="feat.probleemgeval" />
  183 + </condition>
  184 + <condition value="100,101,102,103,104,105,106,109">
  185 + <output name="pos" value="ADJ" />
  186 + <output name="feat.getal" value="ev" />
  187 + </condition>
  188 + <condition value="110,111,112,113,114,115,116,119">
  189 + <output name="pos" value="ADJ" />
  190 + <output name="feat.getal" value="mv" />
  191 + </condition>
  192 + <condition value="190,191,192,193,194,195,196,199">
  193 + <output name="pos" value="ADJ" />
  194 + <output name="feat.probleemgeval" />
  195 + </condition>
  196 +
  197 +
  198 + <condition value="200,201,202,203,204,205,206,209">
  199 + <output name="pos" value="WW" />
  200 + <output name="feat.pvtijd" value="tgw" />
  201 + </condition>
  202 + <condition value="210,211,212,213,214,215,216,219">
  203 + <output name="pos" value="WW" />
  204 + <output name="feat.pvtijd" value="tgw" />
  205 + </condition>
  206 + <condition value="220,221,222,223,224,225,226,229">
  207 + <output name="pos" value="WW" />
  208 + <output name="feat.pvtijd" value="verl" />
  209 + </condition>
  210 + <condition value="230,231,232,233,234,235,236,239">
  211 + <output name="pos" value="WW" />
  212 + <output name="feat.pvtijd" value="verl" />
  213 + </condition>
  214 + <condition value="240,241,242,243,244,245,246,249">
  215 + <output name="pos" value="WW" />
  216 + </condition>
  217 + <condition value="250,251,252,253,254,255,256,259">
  218 + <output name="pos" value="WW" />
  219 + <output name="feat.wvorm" value="inf" />
  220 + </condition> <condition value="260,261,262,263,264,265,266,269">
  221 + <output name="pos" value="WW" />
  222 + <output name="feat.wvorm" value="inf" />
  223 + </condition> <condition value="270,271,272,273,274,275,276,279">
  224 + <output name="pos" value="WW" />
  225 + </condition> <condition value="280,281,282,283,284,285,286,289">
  226 + <output name="pos" value="WW" />
  227 + </condition>
  228 + <condition value="290,291,292,293,294,295,296,299">
  229 + <output name="pos" value="WW" />
  230 + <output name="feat.probleemgeval" />
  231 + </condition>
  232 +
  233 +
  234 + <condition value="300,301,302,303,304,305,306,309">
  235 + <output name="pos" value="TW" />
  236 + <output name="feat.numtype" value="hoofd" />
  237 + </condition>
  238 + <condition value="310,311,312,313,314,315,316,319">
  239 + <output name="pos" value="TW" />
  240 + <output name="feat.numtype" value="rang" />
  241 + </condition>
  242 + <condition value="320,321,322,323,324,325,326,329">
  243 + <output name="pos" value="TW" />
  244 + </condition>
  245 + <condition value="390,391,392,393,394,395,396,399">
  246 + <output name="pos" value="TW" />
  247 + <output name="feat.probleemgeval" />
  248 + </condition>
  249 +
  250 + <condition value="401">
  251 + <output name="pos" value="VNW" />
  252 + <output name="feat.getal" value="ev" />
  253 + <output name="feat.persoon" value="1" />
  254 + </condition>
  255 + <condition value="402">
  256 + <output name="pos" value="VNW" />
  257 + <output name="feat.getal" value="ev" />
  258 + <output name="feat.persoon" value="2" />
  259 + </condition>
  260 + <condition value="403">
  261 + <output name="pos" value="VNW" />
  262 + <output name="feat.getal" value="ev" />
  263 + <output name="feat.persoon" value="3" />
  264 + </condition>
  265 + <condition value="404">
  266 + <output name="pos" value="VNW" />
  267 + <output name="feat.getal" value="mv" />
  268 + <output name="feat.persoon" value="1" />
  269 + </condition>
  270 + <condition value="405">
  271 + <output name="pos" value="VNW" />
  272 + <output name="feat.getal" value="mv" />
  273 + <output name="feat.persoon" value="2" />
  274 + </condition>
  275 + <condition value="406">
  276 + <output name="pos" value="VNW" />
  277 + <output name="feat.getal" value="mv" />
  278 + <output name="feat.persoon" value="3" />
  279 + </condition>
  280 + <condition value="409">
  281 + <output name="pos" value="VNW" />
  282 + <output name="feat.probleemgeval" />
  283 + </condition>
  284 + <condition value="410,411,412,413,414,415,416,419">
  285 + <output name="pos" value="VNW" />
  286 + <output name="feat.vwtype" value="aanw" />
  287 + </condition>
  288 + <condition value="420,421,422,423,424,425,426,429">
  289 + <output name="pos" value="VNW" />
  290 + <output name="feat.vwtype" value="betr" />
  291 + </condition>
  292 + <condition value="430,431,432,433,434,435,436,439">
  293 + <output name="pos" value="VNW" />
  294 + <output name="feat.vwtype" value="vb" />
  295 + </condition>
  296 + <condition value="434,441,442,443,444,445,446,449">
  297 + <output name="pos" value="VNW" />
  298 + <output name="feat.vwtype" value="vb" />
  299 + </condition>
  300 + <condition value="440,441,442,443,444,445,446,449">
  301 + <output name="pos" value="VNW" />
  302 + <output name="feat.lwtype" value="onbep" />
  303 + </condition>
  304 + <condition value="450,451,452,453,454,455,456,459">
  305 + <output name="pos" value="VNW" />
  306 + <output name="feat.vwtype" value="bez" />
  307 + </condition>
  308 +
  309 +
  310 + <condition value="001,011,021,091">
  311 + <output name="feat.form" value="-e" />
  312 + </condition>
  313 + <condition value="002,012,022,092">
  314 + <output name="feat.form" value="-s/-th" />
  315 + </condition>
  316 + <condition value="003,013,023,092">
  317 + <output name="feat.form" value="-t" />
  318 + </condition>
  319 + <condition value="004,014,024,092">
  320 + <output name="feat.form" value="-n" />
  321 + </condition>
  322 + <condition value="005,015,025,095">
  323 + <output name="feat.form" value="-r/-re" />
  324 + </condition>
  325 + <condition value="006,016,026,096">
  326 + <output name="feat.form" value="-a" />
  327 + </condition>
  328 + <condition value="009,019,029,099">
  329 + <output name="feat.form" value="unclear" />
  330 + </condition>
  331 +
  332 + </function>
  333 + </functions>
  334 +
  335 + </parser>
  336 + <!-- END CONFIGURATION MTAS FOLIA PARSER -->
  337 +
  338 +
  339 +</mtas>
0 340 \ No newline at end of file
... ...
conf/parser/mtas/folia_ddd.xml 0 → 100644
  1 +<?xml version="1.0" encoding="UTF-8" ?>
  2 +<mtas>
  3 +
  4 + <!-- START MTAS INDEX CONFIGURATION -->
  5 + <index>
  6 + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
  7 + <payload index="false" />
  8 + <offset index="false" />
  9 + <realoffset index="false" />
  10 + <parent index="true" />
  11 + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
  12 + </index>
  13 + <!-- END MTAS INDEX CONFIGURATION -->
  14 +
  15 +
  16 +
  17 + <!-- START CONFIGURATION MTAS FOLIA PARSER -->
  18 + <parser name="mtas.analysis.parser.MtasFoliaParser">
  19 +
  20 + <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
  21 + <autorepair value="true" />
  22 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  23 +
  24 + <!-- START REFERENCES -->
  25 + <references>
  26 + <reference name="wref" ref="id" />
  27 + </references>
  28 + <!-- END REFERENCES -->
  29 +
  30 + <!-- START MAPPINGS -->
  31 + <mappings>
  32 +
  33 + <!-- START WORDS -->
  34 + <mapping type="word" name="w">
  35 + </mapping>
  36 + <mapping type="word" name="w">
  37 + <token type="string" offset="false" realoffset="false" parent="false">
  38 + <pre>
  39 + <item type="name" />
  40 + </pre>
  41 + <post>
  42 + <item type="attribute" name="class" />
  43 + </post>
  44 + </token>
  45 + <condition>
  46 + <item type="attribute" name="class" />
  47 + <item type="attribute" name="class" not="true" condition="WORD" />
  48 + </condition>
  49 + </mapping>
  50 + <!-- END WORDS -->
  51 +
  52 + <!-- START WORD ANNOTATIONS -->
  53 + <mapping type="wordAnnotation" name="t">
  54 + <token type="string" offset="false">
  55 + <pre>
  56 + <item type="name" />
  57 + </pre>
  58 + <post>
  59 + <item type="text" />
  60 + </post>
  61 + </token>
  62 + <token type="string" offset="false" realoffset="false" parent="false">
  63 + <pre>
  64 + <item type="name" />
  65 + <item type="string" value="_lc" />
  66 + </pre>
  67 + <post>
  68 + <item type="text" filter="ascii,lowercase" />
  69 + </post>
  70 + </token>
  71 + <condition>
  72 + <item type="ancestor" number="0" />
  73 + <item type="ancestorWord" number="1" />
  74 + <item type="unknownAncestor" number="0" />
  75 + </condition>
  76 + </mapping>
  77 + <!-- END WORD ANNOTATIONS -->
  78 +
  79 + <!-- START RELATIONS -->
  80 + <!-- END RELATIONS -->
  81 +
  82 + <!-- START GROUPS -->
  83 + <mapping type="group" name="s">
  84 + <token type="string" offset="false">
  85 + <pre>
  86 + <item type="name" />
  87 + </pre>
  88 + <post>
  89 + <item type="attribute" name="class" />
  90 + </post>
  91 + </token>
  92 + </mapping>
  93 + <mapping type="group" name="p">
  94 + <token type="string" offset="false">
  95 + <pre>
  96 + <item type="name" />
  97 + </pre>
  98 + <post>
  99 + <item type="attribute" name="class" />
  100 + </post>
  101 + </token>
  102 + </mapping>
  103 + <mapping type="group" name="div">
  104 + <token type="string" offset="false">
  105 + <pre>
  106 + <item type="name" />
  107 + </pre>
  108 + <post>
  109 + <item type="attribute" name="class" />
  110 + </post>
  111 + </token>
  112 + </mapping>
  113 + <mapping type="group" name="head">
  114 + <token type="string" offset="false">
  115 + <pre>
  116 + <item type="name" />
  117 + </pre>
  118 + <post>
  119 + <item type="attribute" name="class" />
  120 + </post>
  121 + </token>
  122 + </mapping>
  123 + <!-- END GROUPS -->
  124 +
  125 + <!-- START GROUP ANNOTATIONS -->
  126 + <mapping type="groupAnnotation" name="lang">
  127 + <token type="string" offset="false" realoffset="false" parent="false">
  128 + <pre>
  129 + <item type="name" />
  130 + </pre>
  131 + <post>
  132 + <item type="attribute" name="class" />
  133 + </post>
  134 + </token>
  135 + </mapping>
  136 + <!-- END GROUP ANNOTATIONS -->
  137 +
  138 + </mappings>
  139 + <!-- END MAPPINGS -->
  140 +
  141 + </parser>
  142 + <!-- END CONFIGURATION MTAS FOLIA PARSER -->
  143 +
  144 +
  145 +</mtas>
0 146 \ No newline at end of file
... ...
junit/mtas/parser/MtasCQLParserTestSentence.java
... ... @@ -29,11 +29,11 @@ public class MtasCQLParserTestSentence {
29 29 basicTests();
30 30 }
31 31  
32   - private void testCQLParse(String field, String cql, SpanQuery q) {
  32 + private void testCQLParse(String field, String defaultPrefix, String cql, SpanQuery q) {
33 33 MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql)));
34 34 try {
35 35 System.out.print("CQL parsing:\t"+cql);
36   - assertEquals(p.parse(field) ,q);
  36 + assertEquals(p.parse(field, defaultPrefix) ,q);
37 37 System.out.print("\n");
38 38 } catch (ParseException e) {
39 39 System.out.println("Error CQL parsing:\t"+cql);
... ... @@ -41,12 +41,12 @@ public class MtasCQLParserTestSentence {
41 41 }
42 42 }
43 43  
44   - private void testCQLEquivalent(String field, String cql1, String cql2) {
  44 + private void testCQLEquivalent(String field, String defaultPrefix, String cql1, String cql2) {
45 45 MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1)));
46 46 MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2)));
47 47 try {
48 48 System.out.print("CQL equivalent:\t"+cql1+" and "+cql2);
49   - assertEquals(p1.parse(field) ,p2.parse(field));
  49 + assertEquals(p1.parse(field, defaultPrefix) ,p2.parse(field, defaultPrefix));
50 50 System.out.print("\n");
51 51 } catch (ParseException e) {
52 52 System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2);
... ... @@ -73,6 +73,7 @@ public class MtasCQLParserTestSentence {
73 73 basicTest16();
74 74 basicTest17();
75 75 basicTest18();
  76 + basicTest19();
76 77 }
77 78  
78 79 private void basicTest1() {
... ... @@ -84,14 +85,14 @@ public class MtasCQLParserTestSentence {
84 85 items.add(new MtasSpanSequenceItem(q1, false));
85 86 items.add(new MtasSpanSequenceItem(q2, false));
86 87 SpanQuery q = new MtasSpanSequenceQuery(items);
87   - testCQLParse(field, cql, q);
  88 + testCQLParse(field, null, cql, q);
88 89 }
89 90  
90 91 private void basicTest2() {
91 92 String field = "testveld";
92 93 String cql1 = "[pos=\"LID\"] [] []? [] [lemma=\"koe\"]";
93 94 String cql2 = "[pos=\"LID\"] []{2,3} [lemma=\"koe\"]";
94   - testCQLEquivalent(field, cql1, cql2);
  95 + testCQLEquivalent(field, null, cql1, cql2);
95 96 }
96 97  
97 98 private void basicTest3() {
... ... @@ -100,7 +101,7 @@ public class MtasCQLParserTestSentence {
100 101 SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
101 102 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe");
102 103 SpanQuery q = new MtasSpanOrQuery(q1,q2);
103   - testCQLParse(field, cql, q);
  104 + testCQLParse(field, null, cql, q);
104 105 }
105 106  
106 107 private void basicTest4() {
... ... @@ -114,28 +115,28 @@ public class MtasCQLParserTestSentence {
114 115 items.add(new MtasSpanSequenceItem(q3, false));
115 116 SpanQuery q4 = new MtasSpanSequenceQuery(items);
116 117 SpanQuery q = new MtasSpanOrQuery(q1,q4);
117   - testCQLParse(field, cql, q);
  118 + testCQLParse(field, null, cql, q);
118 119 }
119 120  
120 121 private void basicTest5() {
121 122 String field = "testveld";
122 123 String cql1 = "([pos=\"LID\"]([pos=\"ADJ\"][lemma=\"koe\"]))";
123 124 String cql2 = "[pos=\"LID\"][pos=\"ADJ\"][lemma=\"koe\"]";
124   - testCQLEquivalent(field, cql1, cql2);
  125 + testCQLEquivalent(field, null, cql1, cql2);
125 126 }
126 127  
127 128 private void basicTest6() {
128 129 String field = "testveld";
129 130 String cql1 = "([pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"])|([pos=\"ADJ\"]|([lemma=\"het\"]([lemma=\"paard\"])))";
130 131 String cql2 = "[pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"]|[pos=\"ADJ\"]|[lemma=\"het\"][lemma=\"paard\"]";
131   - testCQLEquivalent(field, cql1, cql2);
  132 + testCQLEquivalent(field, null, cql1, cql2);
132 133 }
133 134  
134 135 private void basicTest7() {
135 136 String field = "testveld";
136 137 String cql1 = "[pos=\"LID\"] []{0,1} []{3,5} []{2,4}";
137 138 String cql2 = "[pos=\"LID\"] []{5,10}";
138   - testCQLEquivalent(field, cql1, cql2);
  139 + testCQLEquivalent(field, null, cql1, cql2);
139 140 }
140 141  
141 142 private void basicTest8() {
... ... @@ -149,7 +150,7 @@ public class MtasCQLParserTestSentence {
149 150 items.add(new MtasSpanSequenceItem(q1, false));
150 151 items.add(new MtasSpanSequenceItem(q4, false));
151 152 SpanQuery q = new MtasSpanSequenceQuery(items);
152   - testCQLParse(field, cql, q);
  153 + testCQLParse(field, null, cql, q);
153 154 }
154 155  
155 156 private void basicTest9() {
... ... @@ -165,7 +166,7 @@ public class MtasCQLParserTestSentence {
165 166 items.add(new MtasSpanSequenceItem(q5, false));
166 167 items.add(new MtasSpanSequenceItem(q4, false));
167 168 SpanQuery q = new MtasSpanSequenceQuery(items);
168   - testCQLParse(field, cql, q);
  169 + testCQLParse(field, null, cql, q);
169 170 }
170 171  
171 172 private void basicTest10() {
... ... @@ -179,7 +180,7 @@ public class MtasCQLParserTestSentence {
179 180 items.add(new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q2,1,3), false));
180 181 items.add(new MtasSpanSequenceItem(q3, false));
181 182 SpanQuery q = new MtasSpanSequenceQuery(items);
182   - testCQLParse(field, cql, q);
  183 + testCQLParse(field, null, cql, q);
183 184 }
184 185  
185 186 private void basicTest11() {
... ... @@ -188,7 +189,7 @@ public class MtasCQLParserTestSentence {
188 189 SpanQuery q1 = new MtasCQLParserGroupQuery(field,"sentence");
189 190 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe");
190 191 SpanQuery q = new SpanContainingQuery(q1, q2);
191   - testCQLParse(field, cql, q);
  192 + testCQLParse(field, null, cql, q);
192 193 }
193 194  
194 195 private void basicTest12() {
... ... @@ -197,7 +198,7 @@ public class MtasCQLParserTestSentence {
197 198 SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
198 199 SpanQuery q2 = new MtasCQLParserGroupQuery(field,"sentence");
199 200 SpanQuery q = new SpanWithinQuery(q2, q1);
200   - testCQLParse(field, cql, q);
  201 + testCQLParse(field, null, cql, q);
201 202 }
202 203  
203 204 private void basicTest13() {
... ... @@ -211,7 +212,7 @@ public class MtasCQLParserTestSentence {
211 212 items.add(new MtasSpanSequenceItem(q1, false));
212 213 items.add(new MtasSpanSequenceItem(q4, false));
213 214 SpanQuery q = new MtasSpanSequenceQuery(items);
214   - testCQLParse(field, cql, q);
  215 + testCQLParse(field, null, cql, q);
215 216 }
216 217  
217 218 private void basicTest14() {
... ... @@ -225,7 +226,7 @@ public class MtasCQLParserTestSentence {
225 226 items.add(new MtasSpanSequenceItem(q3, false));
226 227 items.add(new MtasSpanSequenceItem(q4, false));
227 228 SpanQuery q = new MtasSpanSequenceQuery(items);
228   - testCQLParse(field, cql, q);
  229 + testCQLParse(field, null, cql, q);
229 230 }
230 231  
231 232 private void basicTest15() {
... ... @@ -246,7 +247,7 @@ public class MtasCQLParserTestSentence {
246 247 items2.add(new MtasSpanSequenceItem(q1, false));
247 248 items2.add(new MtasSpanSequenceItem(q8, false));
248 249 SpanQuery q = new MtasSpanSequenceQuery(items2);
249   - testCQLParse(field, cql, q);
  250 + testCQLParse(field, null, cql, q);
250 251 }
251 252  
252 253 private void basicTest16() {
... ... @@ -258,7 +259,7 @@ public class MtasCQLParserTestSentence {
258 259 SpanQuery q4 = new SpanContainingQuery(q2, q3);
259 260 SpanQuery q5 = new SpanWithinQuery(q4, q1);
260 261 SpanQuery q = new SpanNotQuery(q5,new SpanContainingQuery(q5, q3));
261   - testCQLParse(field, cql, q);
  262 + testCQLParse(field, null, cql, q);
262 263 }
263 264  
264 265 private void basicTest17() {
... ... @@ -271,11 +272,23 @@ public class MtasCQLParserTestSentence {
271 272 items.add(new MtasSpanSequenceItem(q2, false));
272 273 items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false));
273 274 SpanQuery q = new MtasSpanSequenceQuery(items);
274   - testCQLParse(field, cql, q);
  275 + testCQLParse(field, null, cql, q);
275 276 }
276 277  
277 278 private void basicTest18() {
278 279 String field = "testveld";
  280 + String cql = "\"de\" [pos=\"N\"]";
  281 + SpanQuery q1 = new MtasCQLParserWordQuery(field,"t_lc","de");
  282 + SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N");
  283 + List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
  284 + items.add(new MtasSpanSequenceItem(q1, false));
  285 + items.add(new MtasSpanSequenceItem(q2, false));
  286 + SpanQuery q = new MtasSpanSequenceQuery(items);
  287 + testCQLParse(field, "t_lc", cql, q);
  288 + }
  289 +
  290 + private void basicTest19() {
  291 + String field = "testveld";
279 292 String cql = "([]<entity=\"loc\"/>{1,2}[]){3,4}";
280 293 SpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc");
281 294 SpanQuery q2 = new MtasSpanRecurrenceQuery(q1,1,2);
... ... @@ -285,7 +298,7 @@ public class MtasCQLParserTestSentence {
285 298 items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false));
286 299 SpanQuery q3 = new MtasSpanSequenceQuery(items);
287 300 SpanQuery q = new MtasSpanRecurrenceQuery(q3,3,4);
288   - testCQLParse(field, cql, q);
  301 + testCQLParse(field, null, cql, q);
289 302 }
290 303  
291 304 }
... ...
junit/mtas/parser/MtasCQLParserTestWord.java
... ... @@ -23,10 +23,10 @@ public class MtasCQLParserTestWord {
23 23 basicNotTests();
24 24 }
25 25  
26   - private void testCQLParse(String field, String cql, SpanQuery q) {
  26 + private void testCQLParse(String field, String defaultPrefix, String cql, SpanQuery q) {
27 27 MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql)));
28 28 try {
29   - assertEquals(p.parse(field) ,q);
  29 + assertEquals(p.parse(field, defaultPrefix) ,q);
30 30 System.out.println("Tested CQL parsing:\t"+cql);
31 31 } catch (ParseException e) {
32 32 System.out.println("Error CQL parsing:\t"+cql);
... ... @@ -34,11 +34,11 @@ public class MtasCQLParserTestWord {
34 34 }
35 35 }
36 36  
37   - private void testCQLEquivalent(String field, String cql1, String cql2) {
  37 + private void testCQLEquivalent(String field, String defaultPrefix, String cql1, String cql2) {
38 38 MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1)));
39 39 MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2)));
40 40 try {
41   - assertEquals(p1.parse(field) ,p2.parse(field));
  41 + assertEquals(p1.parse(field, defaultPrefix) ,p2.parse(field, defaultPrefix));
42 42 System.out.println("Tested CQL equivalent:\t"+cql1+" and "+cql2);
43 43 } catch (ParseException e) {
44 44 System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2);
... ... @@ -67,6 +67,7 @@ public class MtasCQLParserTestWord {
67 67 basicTest10();
68 68 basicTest11();
69 69 basicTest12();
  70 + basicTest13();
70 71 }
71 72  
72 73 private void basicNotTest1() {
... ... @@ -75,14 +76,14 @@ public class MtasCQLParserTestWord {
75 76 SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
76 77 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","de");
77 78 SpanQuery q = new SpanNotQuery(q1,q2);
78   - testCQLParse(field, cql, q);
  79 + testCQLParse(field, null, cql, q);
79 80 }
80 81  
81 82 private void basicNotTest2() {
82 83 String field = "testveld";
83 84 String cql1 = "[pos=\"LID\" & (!lemma=\"de\")]";
84 85 String cql2 = "[pos=\"LID\" & !(lemma=\"de\")]";
85   - testCQLEquivalent(field, cql1, cql2);
  86 + testCQLEquivalent(field, null, cql1, cql2);
86 87 }
87 88  
88 89 private void basicNotTest3() {
... ... @@ -93,28 +94,28 @@ public class MtasCQLParserTestWord {
93 94 SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","een");
94 95 SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3});
95 96 SpanQuery q = new SpanNotQuery(q1,q4);
96   - testCQLParse(field, cql, q);
  97 + testCQLParse(field, null, cql, q);
97 98 }
98 99  
99 100 private void basicNotTest4() {
100 101 String field = "testveld";
101 102 String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]";
102 103 String cql2 = "[pos=\"LID\" & (!lemma=\"de\" & !lemma=\"een\")]";
103   - testCQLEquivalent(field, cql1, cql2);
  104 + testCQLEquivalent(field, null, cql1, cql2);
104 105 }
105 106  
106 107 private void basicNotTest5() {
107 108 String field = "testveld";
108 109 String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]";
109 110 String cql2 = "[pos=\"LID\" & !lemma=\"de\" & !lemma=\"een\"]";
110   - testCQLEquivalent(field, cql1, cql2);
  111 + testCQLEquivalent(field, null, cql1, cql2);
111 112 }
112 113  
113 114 private void basicTest1() {
114 115 String field = "testveld";
115 116 String cql = "[lemma=\"koe\"]";
116 117 SpanQuery q = new MtasCQLParserWordQuery(field, "lemma", "koe");
117   - testCQLParse(field, cql, q);
  118 + testCQLParse(field, null, cql, q);
118 119 }
119 120  
120 121 private void basicTest2() {
... ... @@ -123,7 +124,7 @@ public class MtasCQLParserTestWord {
123 124 SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
124 125 SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N");
125 126 SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q2});
126   - testCQLParse(field, cql, q);
  127 + testCQLParse(field, null, cql, q);
127 128 }
128 129  
129 130 private void basicTest3() {
... ... @@ -132,14 +133,14 @@ public class MtasCQLParserTestWord {
132 133 SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
133 134 SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","paard");
134 135 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2});
135   - testCQLParse(field, cql, q);
  136 + testCQLParse(field, null, cql, q);
136 137 }
137 138  
138 139 private void basicTest4() {
139 140 String field = "testveld";
140 141 String cql1 = "[lemma=\"koe\" | lemma=\"paard\"]";
141 142 String cql2 = "[(lemma=\"koe\" | lemma=\"paard\")]";
142   - testCQLEquivalent(field, cql1, cql2);
  143 + testCQLEquivalent(field, null, cql1, cql2);
143 144 }
144 145  
145 146 private void basicTest5() {
... ... @@ -150,7 +151,7 @@ public class MtasCQLParserTestWord {
150 151 SpanQuery q3 = new MtasSpanOrQuery(new SpanQuery[]{q1,q2});
151 152 SpanQuery q4 = new MtasCQLParserWordQuery(field,"pos","N");
152 153 SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q3,q4});
153   - testCQLParse(field, cql, q);
  154 + testCQLParse(field, null, cql, q);
154 155 }
155 156  
156 157 private void basicTest6() {
... ... @@ -161,7 +162,7 @@ public class MtasCQLParserTestWord {
161 162 SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","paard");
162 163 SpanQuery q4 = new MtasSpanOrQuery(new SpanQuery[]{q2,q3});
163 164 SpanQuery q = new MtasSpanAndQuery(new SpanQuery[]{q1,q4});
164   - testCQLParse(field, cql, q);
  165 + testCQLParse(field, null, cql, q);
165 166 }
166 167  
167 168 private void basicTest7() {
... ... @@ -172,7 +173,7 @@ public class MtasCQLParserTestWord {
172 173 SpanQuery q3 = new MtasCQLParserWordQuery(field,"pos","N");
173 174 SpanQuery q4 = new MtasSpanAndQuery(new SpanQuery[]{q2,q3});
174 175 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q4});
175   - testCQLParse(field, cql, q);
  176 + testCQLParse(field, null, cql, q);
176 177 }
177 178  
178 179 private void basicTest8() {
... ... @@ -185,7 +186,7 @@ public class MtasCQLParserTestWord {
185 186 SpanQuery q5 = new MtasSpanAndQuery(new SpanQuery[]{q1,q2});
186 187 SpanQuery q6 = new MtasSpanAndQuery(new SpanQuery[]{q3,q4});
187 188 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q5,q6});
188   - testCQLParse(field, cql, q);
  189 + testCQLParse(field, null, cql, q);
189 190 }
190 191  
191 192 private void basicTest9() {
... ... @@ -200,7 +201,7 @@ public class MtasCQLParserTestWord {
200 201 SpanQuery q7 = new MtasSpanAndQuery(new SpanQuery[]{q6,q3});
201 202 SpanQuery q8 = new MtasSpanAndQuery(new SpanQuery[]{q4,q5});
202 203 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q7,q8});
203   - testCQLParse(field, cql, q);
  204 + testCQLParse(field, null, cql, q);
204 205 }
205 206  
206 207 private void basicTest10() {
... ... @@ -217,22 +218,22 @@ public class MtasCQLParserTestWord {
217 218 SpanQuery q9 = new MtasSpanOrQuery(new SpanQuery[]{q4,q5});
218 219 SpanQuery q10 = new MtasSpanAndQuery(new SpanQuery[]{q9,q6});
219 220 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q8,q10});
220   - testCQLParse(field, cql, q);
  221 + testCQLParse(field, null, cql, q);
221 222 }
222 223  
223 224 private void basicTest11() {
224 225 String field = "testveld";
225 226 String cql1 = "[#300]";
226 227 SpanQuery q1 = new MtasCQLParserWordPositionQuery(field, 300);
227   - testCQLParse(field, cql1, q1);
  228 + testCQLParse(field, null, cql1, q1);
228 229 String cql2 = "[#100-110]";
229 230 SpanQuery q2 = new MtasCQLParserWordPositionQuery(field, 100, 110);
230   - testCQLParse(field, cql2, q2);
  231 + testCQLParse(field, null, cql2, q2);
231 232 String cql3 = "[#100-105 | #110]";
232 233 SpanQuery q3a = new MtasCQLParserWordPositionQuery(field, 100, 105);
233 234 SpanQuery q3b = new MtasCQLParserWordPositionQuery(field, 110);
234 235 SpanQuery q3 = new MtasSpanOrQuery(q3a, q3b);
235   - testCQLParse(field, cql3, q3);
  236 + testCQLParse(field, null, cql3, q3);
236 237 }
237 238  
238 239 private void basicTest12() {
... ... @@ -242,6 +243,13 @@ public class MtasCQLParserTestWord {
242 243 SpanQuery q2 = new MtasCQLParserWordQuery(field,"t_lc","het");
243 244 SpanQuery q3 = new MtasCQLParserWordQuery(field,"t_lc","paard");
244 245 SpanQuery q = new MtasSpanOrQuery(new SpanQuery[]{q1,q2,q3});
245   - testCQLParse(field, cql, q);
246   - }
  246 + testCQLParse(field, null, cql, q);
  247 + }
  248 +
  249 + private void basicTest13() {
  250 + String field = "testveld";
  251 + String cql = "\"de\"";
  252 + SpanQuery q = new MtasCQLParserWordQuery(field,"t_lc","de");
  253 + testCQLParse(field, "t_lc", cql, q);
  254 + }
247 255 }
... ...
... ... @@ -26,9 +26,9 @@
26 26 <url>https://www.meertens.knaw.nl/cms/nl/medewerkers/144373-matthijsb</url>
27 27 </developer>
28 28 <developer>
29   - <name>Marc Kemps-Snijders</name>
30   - <url>https://www.meertens.knaw.nl/cms/nl/medewerkers/143329-marck</url>
31   - </developer>
  29 + <name>Marc Kemps-Snijders</name>
  30 + <url>https://www.meertens.knaw.nl/cms/nl/medewerkers/143329-marck</url>
  31 + </developer>
32 32 </developers>
33 33 <build>
34 34 <sourceDirectory>src</sourceDirectory>
... ... @@ -46,7 +46,7 @@
46 46 <source>1.8</source>
47 47 <target>1.8</target>
48 48 </configuration>
49   - </plugin>
  49 + </plugin>
50 50 <plugin>
51 51 <groupId>org.apache.maven.plugins</groupId>
52 52 <artifactId>maven-site-plugin</artifactId>
... ... @@ -64,7 +64,7 @@
64 64 <addMavenDescriptor>false</addMavenDescriptor>
65 65 </archive>
66 66 </configuration>
67   - </plugin>
  67 + </plugin>
68 68 </plugins>
69 69 </build>
70 70 <reporting>
... ...
src/mtas/analysis/MtasTokenizer.java
... ... @@ -86,7 +86,7 @@ public final class MtasTokenizer extends Tokenizer {
86 86 * @throws IOException Signals that an I/O exception has occurred.
87 87 */
88 88 public MtasTokenizer(MtasConfiguration config) throws IOException {
89   - processConfiguration(config);
  89 + processConfiguration(config);
90 90 }
91 91  
92 92 /**
... ... @@ -174,7 +174,9 @@ public final class MtasTokenizer extends Tokenizer {
174 174 public void print(Reader r) throws IOException, MtasParserException {
175 175 setReader(r);
176 176 reset();
177   - tokenCollection.print();
  177 + if(tokenCollection!=null) {
  178 + tokenCollection.print();
  179 + }
178 180 end();
179 181 close();
180 182 }
... ...
src/mtas/analysis/parser/MtasCRMParser.java 0 → 100644
  1 +package mtas.analysis.parser;
  2 +
  3 +import java.io.IOException;
  4 +import java.io.Reader;
  5 +import java.util.ArrayList;
  6 +import java.util.HashMap;
  7 +import java.util.HashSet;
  8 +import java.util.TreeSet;
  9 +import java.util.Map.Entry;
  10 +import java.util.concurrent.atomic.AtomicInteger;
  11 +import java.util.regex.Matcher;
  12 +import java.util.regex.Pattern;
  13 +
  14 +import mtas.analysis.token.MtasToken;
  15 +import mtas.analysis.token.MtasTokenCollection;
  16 +import mtas.analysis.util.MtasBufferedReader;
  17 +import mtas.analysis.util.MtasConfigException;
  18 +import mtas.analysis.util.MtasConfiguration;
  19 +import mtas.analysis.util.MtasParserException;
  20 +
  21 +public class MtasCRMParser extends MtasBasicParser {
  22 +
  23 + /** The word type. */
  24 + private MtasParserType wordType = null;
  25 +
  26 + /** The word annotation types. */
  27 + private HashMap<String, MtasParserType> wordAnnotationTypes = new HashMap<String, MtasParserType>();
  28 +
  29 + private HashMap<String, MtasCRMParserFunction> functions = new HashMap<String, MtasCRMParserFunction>();
  30 +
  31 + public MtasCRMParser(MtasConfiguration config) {
  32 + super(config);
  33 + try {
  34 + initParser();
  35 + // System.out.print(printConfig());
  36 + } catch (MtasConfigException e) {
  37 + e.printStackTrace();
  38 + }
  39 + }
  40 +
  41 + @Override
  42 + protected void initParser() throws MtasConfigException {
  43 + super.initParser();
  44 + if (config != null) {
  45 +
  46 + // always word, no mappings
  47 + wordType = new MtasParserType(MAPPING_TYPE_WORD, null);
  48 +
  49 + for (int i = 0; i < config.children.size(); i++) {
  50 + MtasConfiguration current = config.children.get(i);
  51 + if (current.name.equals("mappings")) {
  52 + for (int j = 0; j < current.children.size(); j++) {
  53 + if (current.children.get(j).name.equals("mapping")) {
  54 + MtasConfiguration mapping = current.children.get(j);
  55 + String typeMapping = mapping.attributes.get("type");
  56 + String nameMapping = mapping.attributes.get("name");
  57 + if ((typeMapping != null)) {
  58 + if (typeMapping.equals(MAPPING_TYPE_WORD)) {
  59 + MtasCRMParserMappingWordAnnotation m = new MtasCRMParserMappingWordAnnotation();
  60 + m.processConfig(mapping);
  61 + wordType.addMapping(m);
  62 + } else if (typeMapping.equals(MAPPING_TYPE_WORD_ANNOTATION)
  63 + && (nameMapping != null)) {
  64 + MtasCRMParserMappingWordAnnotation m = new MtasCRMParserMappingWordAnnotation();
  65 + m.processConfig(mapping);
  66 + if (wordAnnotationTypes.containsKey(nameMapping)) {
  67 + wordAnnotationTypes.get(nameMapping).addMapping(m);
  68 + } else {
  69 + MtasParserType t = new MtasParserType(typeMapping,
  70 + nameMapping);
  71 + t.addMapping(m);
  72 + wordAnnotationTypes.put(nameMapping, t);
  73 + }
  74 + } else {
  75 + throw new MtasConfigException("unknown mapping type "
  76 + + typeMapping + " or missing name");
  77 + }
  78 + }
  79 + }
  80 + }
  81 + } else if (current.name.equals("functions")) {
  82 + for (int j = 0; j < current.children.size(); j++) {
  83 + if (current.children.get(j).name.equals("function")) {
  84 + MtasConfiguration function = current.children.get(j);
  85 + String nameFunction = function.attributes.get("name");
  86 + String splitFunction = function.attributes.get("split");
  87 + if (nameFunction != null) {
  88 + MtasCRMParserFunction mtasCRMParserFunction = new MtasCRMParserFunction(
  89 + splitFunction);
  90 + functions.put(nameFunction, mtasCRMParserFunction);
  91 + MtasConfiguration subCurrent = current.children.get(j);
  92 + for (int k = 0; k < subCurrent.children.size(); k++) {
  93 + if (subCurrent.children.get(k).name.equals("condition")) {
  94 + MtasConfiguration subSubCurrent = subCurrent.children
  95 + .get(k);
  96 + if (subSubCurrent.attributes.containsKey("value")) {
  97 + String[] valuesCondition = subSubCurrent.attributes
  98 + .get("value").split(Pattern.quote(","));
  99 + ArrayList<MtasCRMParserFunctionOutput> valueOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  100 + for (int l = 0; l < subSubCurrent.children.size(); l++) {
  101 + if (subSubCurrent.children.get(l).name
  102 + .equals("output")) {
  103 + String valueOutput = subSubCurrent.children
  104 + .get(l).attributes.get("value");
  105 + String nameOutput = subSubCurrent.children
  106 + .get(l).attributes.get("name");
  107 + if (nameOutput != null) {
  108 + MtasCRMParserFunctionOutput o = new MtasCRMParserFunctionOutput(
  109 + nameOutput, valueOutput);
  110 + valueOutputList.add(o);
  111 + }
  112 + }
  113 + }
  114 + if (valueOutputList.size() > 0) {
  115 + for (String valueCondition : valuesCondition) {
  116 + mtasCRMParserFunction.output.put(valueCondition,
  117 + valueOutputList);
  118 + }
  119 + }
  120 + }
  121 + }
  122 + }
  123 + }
  124 + }
  125 + }
  126 + }
  127 + }
  128 + }
  129 + }
  130 +
  131 + @Override
  132 + public MtasTokenCollection createTokenCollection(Reader reader)
  133 + throws MtasParserException, MtasConfigException {
  134 + AtomicInteger position = new AtomicInteger(0);
  135 + Integer unknownAncestors = 0;
  136 +
  137 + HashMap<String, TreeSet<Integer>> idPositions = new HashMap<String, TreeSet<Integer>>();
  138 + HashMap<String, Integer[]> idOffsets = new HashMap<String, Integer[]>();
  139 +
  140 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList = new HashMap<String, HashMap<Integer, HashSet<String>>>();
  141 + updateList.put(UPDATE_TYPE_OFFSET, new HashMap<Integer, HashSet<String>>());
  142 + updateList.put(UPDATE_TYPE_POSITION,
  143 + new HashMap<Integer, HashSet<String>>());
  144 +
  145 + HashMap<String, ArrayList<MtasParserObject>> currentList = new HashMap<String, ArrayList<MtasParserObject>>();
  146 + currentList.put(MAPPING_TYPE_RELATION, new ArrayList<MtasParserObject>());
  147 + currentList.put(MAPPING_TYPE_RELATION_ANNOTATION,
  148 + new ArrayList<MtasParserObject>());
  149 + currentList.put(MAPPING_TYPE_REF, new ArrayList<MtasParserObject>());
  150 + currentList.put(MAPPING_TYPE_GROUP, new ArrayList<MtasParserObject>());
  151 + currentList.put(MAPPING_TYPE_GROUP_ANNOTATION,
  152 + new ArrayList<MtasParserObject>());
  153 + currentList.put(MAPPING_TYPE_WORD, new ArrayList<MtasParserObject>());
  154 + currentList.put(MAPPING_TYPE_WORD_ANNOTATION,
  155 + new ArrayList<MtasParserObject>());
  156 +
  157 + tokenCollection = new MtasTokenCollection();
  158 + MtasToken.resetId();
  159 + try (MtasBufferedReader br = new MtasBufferedReader(reader)) {
  160 + String line;
  161 + int currentOffset, previousOffset = br.getPosition();
  162 + MtasParserObject currentObject;
  163 + Pattern headerPattern = Pattern.compile("^@ @ @(.*)$");
  164 + Pattern regularPattern = Pattern.compile(
  165 + "^([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+)$");
  166 + Matcher matcherHeader, matcherRegular;
  167 + while ((line = br.readLine()) != null) {
  168 + currentOffset = br.getPosition();
  169 + matcherHeader = headerPattern.matcher(line.trim());
  170 + if (matcherHeader.matches()) {
  171 + // System.out.println(line);
  172 + } else {
  173 + matcherRegular = regularPattern.matcher(line.trim());
  174 + if (matcherRegular.matches()) {
  175 + // regular line
  176 + if ((currentList.get(MAPPING_TYPE_RELATION).size() == 0)
  177 + && (currentList.get(MAPPING_TYPE_GROUP_ANNOTATION).size() == 0)
  178 + && (currentList.get(MAPPING_TYPE_WORD).size() == 0)
  179 + && (currentList.get(MAPPING_TYPE_WORD_ANNOTATION).size() == 0)
  180 + && (wordType != null)) {
  181 + // start word
  182 + currentObject = new MtasParserObject(wordType);
  183 + currentObject.setOffsetStart(previousOffset);
  184 + currentObject.setRealOffsetStart(previousOffset);
  185 + currentObject.setUnknownAncestorNumber(unknownAncestors);
  186 + if (!prevalidateObject(currentObject, currentList)) {
  187 + unknownAncestors++;
  188 + } else {
  189 + int p = position.getAndIncrement();
  190 + currentObject.addPosition(p);
  191 + currentList.get(MAPPING_TYPE_WORD).add(currentObject);
  192 + unknownAncestors = 0;
  193 + }
  194 + if ((currentList.get(MAPPING_TYPE_RELATION).size() == 0)
  195 + && (currentList.get(MAPPING_TYPE_GROUP_ANNOTATION)
  196 + .size() == 0)
  197 + && (currentList.get(MAPPING_TYPE_WORD).size() > 0)) {
  198 + // compute word annotations
  199 + for (int i = 0; i < 8; i++) {
  200 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList = new ArrayList<MtasCRMParserFunctionOutput>();
  201 + processWordAnnotation(String.valueOf(i),
  202 + matcherRegular.group((i + 1)), previousOffset,
  203 + currentOffset, functionOutputList, unknownAncestors,
  204 + currentList, updateList, idPositions, idOffsets);
  205 + for (MtasCRMParserFunctionOutput functionOutput : functionOutputList) {
  206 + processWordAnnotation(functionOutput.name,
  207 + functionOutput.value, previousOffset, currentOffset,
  208 + functionOutputList, unknownAncestors, currentList,
  209 + updateList, idPositions, idOffsets);
  210 + }
  211 + }
  212 + }
  213 + // finish word
  214 + if (unknownAncestors > 0) {
  215 + unknownAncestors--;
  216 + } else {
  217 + currentObject = currentList.get(MAPPING_TYPE_WORD)
  218 + .remove(currentList.get(MAPPING_TYPE_WORD).size() - 1);
  219 + assert unknownAncestors == 0 : "error in administration "
  220 + + currentObject.getType().getName();
  221 + currentObject.setText(null);
  222 + currentObject.setOffsetEnd(currentOffset - 1);
  223 + currentObject.setRealOffsetEnd(currentOffset - 1);
  224 + // update ancestor groups with position and offset
  225 + for (MtasParserObject currentGroup : currentList
  226 + .get(MAPPING_TYPE_GROUP)) {
  227 + currentGroup.addPositions(currentObject.getPositions());
  228 + currentGroup.addOffsetStart(currentObject.getOffsetStart());
  229 + currentGroup.addOffsetEnd(currentObject.getOffsetEnd());
  230 + }
  231 + idPositions.put(currentObject.getId(),
  232 + currentObject.getPositions());
  233 + idOffsets.put(currentObject.getId(), currentObject.getOffset());
  234 + currentObject.updateMappings(idPositions, idOffsets);
  235 + unknownAncestors = currentObject.getUnknownAncestorNumber();
  236 + computeMappingsFromObject(currentObject, currentList,
  237 + updateList);
  238 + }
  239 + }
  240 + } else {
  241 + //System.out.println("PROBLEM: " + line);
  242 + }
  243 + }
  244 + previousOffset = br.getPosition();
  245 + }
  246 + } catch (IOException e) {
  247 + throw new MtasParserException(e.getMessage());
  248 + }
  249 + // final check
  250 + tokenCollection.check(autorepair);
  251 + return tokenCollection;
  252 +
  253 + }
  254 +
  255 + private void processWordAnnotation(String name, String text,
  256 + Integer previousOffset, Integer currentOffset,
  257 + ArrayList<MtasCRMParserFunctionOutput> functionOutputList,
  258 + Integer unknownAncestors,
  259 + HashMap<String, ArrayList<MtasParserObject>> currentList,
  260 + HashMap<String, HashMap<Integer, HashSet<String>>> updateList,
  261 + HashMap<String, TreeSet<Integer>> idPositions,
  262 + HashMap<String, Integer[]> idOffsets)
  263 + throws MtasParserException, MtasConfigException {
  264 + MtasParserType tmpCurrentType;
  265 + MtasParserObject currentObject;
  266 + if ((tmpCurrentType = wordAnnotationTypes.get(name)) != null) {
  267 + // start word annotation
  268 + currentObject = new MtasParserObject(tmpCurrentType);
  269 + currentObject.setRealOffsetStart(previousOffset);
  270 + currentObject.addPositions(currentList.get(MAPPING_TYPE_WORD)
  271 + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1)).getPositions());
  272 + currentObject.setUnknownAncestorNumber(unknownAncestors);
  273 + if (!prevalidateObject(currentObject, currentList)) {
  274 + unknownAncestors++;
  275 + } else {
  276 + currentList.get(MAPPING_TYPE_WORD_ANNOTATION).add(currentObject);
  277 + unknownAncestors = 0;
  278 + }
  279 + // finish word annotation
  280 + if (unknownAncestors > 0) {
  281 + unknownAncestors--;
  282 + } else {
  283 + currentObject = currentList.get(MAPPING_TYPE_WORD_ANNOTATION)
  284 + .remove(currentList.get(MAPPING_TYPE_WORD_ANNOTATION).size() - 1);
  285 + assert unknownAncestors == 0 : "error in administration "
  286 + + currentObject.getType().getName();
  287 + if (functions.containsKey(name) && text!=null) {
  288 + MtasCRMParserFunction function = functions.get(name);
  289 + String[] value;
  290 + if (function.split != null) {
  291 + value = text.split(Pattern.quote(function.split));
  292 + } else {
  293 + value = new String[] { text };
  294 + }
  295 + for (int c = 0; c < value.length; c++) {
  296 + if (function.output.containsKey(value[c])) {
  297 + functionOutputList.addAll(function.output.get(value[c]));
  298 + }
  299 + }
  300 + }
  301 + currentObject.setText(text);
  302 + currentObject.setRealOffsetEnd(currentOffset - 1);
  303 + idPositions.put(currentObject.getId(), currentObject.getPositions());
  304 + idOffsets.put(currentObject.getId(), currentObject.getOffset());
  305 + // offset always null, so update later with word (should
  306 + // be
  307 + // possible)
  308 + if ((currentObject.getId() != null)
  309 + && (currentList.get(MAPPING_TYPE_WORD).size() > 0)) {
  310 + currentList.get(MAPPING_TYPE_WORD)
  311 + .get((currentList.get(MAPPING_TYPE_WORD).size() - 1))
  312 + .addUpdateableIdWithOffset(currentObject.getId());
  313 + }
  314 + currentObject.updateMappings(idPositions, idOffsets);
  315 + unknownAncestors = currentObject.getUnknownAncestorNumber();
  316 + computeMappingsFromObject(currentObject, currentList, updateList);
  317 + }
  318 + }
  319 + }
  320 +
  321 + @Override
  322 + public String printConfig() {
  323 + String text = "";
  324 + text += "=== CONFIGURATION ===\n";
  325 + text += "type: " + wordAnnotationTypes.size() + " x wordAnnotation";
  326 + text += printConfigTypes(wordAnnotationTypes);
  327 + text += "=== CONFIGURATION ===\n";
  328 + return text;
  329 + }
  330 +
  331 + private String printConfigTypes(HashMap<?, MtasParserType> types) {
  332 + String text = "";
  333 + for (Entry<?, MtasParserType> entry : types.entrySet()) {
  334 + text += "- " + entry.getKey() + ": " + entry.getValue().mappings.size()
  335 + + " mapping(s)\n";
  336 + for (int i = 0; i < entry.getValue().mappings.size(); i++) {
  337 + text += "\t" + entry.getValue().mappings.get(i) + "\n";
  338 + }
  339 + }
  340 + return text;
  341 + }
  342 +
  343 + private class MtasCRMParserFunction {
  344 +
  345 + public String type;
  346 + public String split;
  347 + public HashMap<String, ArrayList<MtasCRMParserFunctionOutput>> output;
  348 +
  349 + public MtasCRMParserFunction(String split) {
  350 + this.split = split;
  351 + output = new HashMap<String, ArrayList<MtasCRMParserFunctionOutput>>();
  352 + }
  353 +
  354 + }
  355 +
  356 + private class MtasCRMParserFunctionOutput {
  357 + public String name;
  358 + public String value;
  359 +
  360 + public MtasCRMParserFunctionOutput(String name, String value) {
  361 + this.name = name;
  362 + this.value = value;
  363 + }
  364 + }
  365 +
  366 + private class MtasCRMParserMappingWordAnnotation
  367 + extends MtasParserMapping<MtasCRMParserMappingWordAnnotation> {
  368 +
  369 + /**
  370 + * Instantiates a new mtas sketch parser mapping word annotation.
  371 + */
  372 + public MtasCRMParserMappingWordAnnotation() {
  373 + super();
  374 + this.position = SOURCE_OWN;
  375 + this.realOffset = SOURCE_OWN;
  376 + this.offset = SOURCE_ANCESTOR_WORD;
  377 + this.type = MAPPING_TYPE_WORD_ANNOTATION;
  378 + }
  379 +
  380 + /*
  381 + * (non-Javadoc)
  382 + *
  383 + * @see mtas.analysis.parser.MtasParser.MtasParserMapping#self()
  384 + */
  385 + @Override
  386 + protected MtasCRMParserMappingWordAnnotation self() {
  387 + return this;
  388 + }
  389 + }
  390 +
  391 +}
... ...
src/mtas/analysis/parser/MtasSketchParser.java
... ... @@ -41,7 +41,8 @@ final public class MtasSketchParser extends MtasBasicParser {
41 41 /**
42 42 * Instantiates a new mtas sketch parser.
43 43 *
44   - * @param config the config
  44 + * @param config
  45 + * the config
45 46 */
46 47 public MtasSketchParser(MtasConfiguration config) {
47 48 super(config);
... ... @@ -53,7 +54,9 @@ final public class MtasSketchParser extends MtasBasicParser {
53 54 }
54 55 }
55 56  
56   - /* (non-Javadoc)
  57 + /*
  58 + * (non-Javadoc)
  59 + *
57 60 * @see mtas.analysis.parser.MtasParser#initParser()
58 61 */
59 62 @Override
... ... @@ -74,7 +77,7 @@ final public class MtasSketchParser extends MtasBasicParser {
74 77 String nameMapping = mapping.attributes.get("name");
75 78 if ((typeMapping != null)) {
76 79 if (typeMapping.equals(MAPPING_TYPE_WORD)) {
77   - MtasSketchParserMappingWordAnnotation m = new MtasSketchParserMappingWordAnnotation();
  80 + MtasSketchParserMappingWord m = new MtasSketchParserMappingWord();
78 81 m.processConfig(mapping);
79 82 wordType.addMapping(m);
80 83 } else if (typeMapping.equals(MAPPING_TYPE_WORD_ANNOTATION)
... ... @@ -113,7 +116,9 @@ final public class MtasSketchParser extends MtasBasicParser {
113 116 }
114 117 }
115 118  
116   - /* (non-Javadoc)
  119 + /*
  120 + * (non-Javadoc)
  121 + *
117 122 * @see mtas.analysis.parser.MtasParser#createTokenCollection(java.io.Reader)
118 123 */
119 124 @Override
... ... @@ -341,7 +346,9 @@ final public class MtasSketchParser extends MtasBasicParser {
341 346 return tokenCollection;
342 347 }
343 348  
344   - /* (non-Javadoc)
  349 + /*
  350 + * (non-Javadoc)
  351 + *
345 352 * @see mtas.analysis.parser.MtasParser#printConfig()
346 353 */
347 354 @Override
... ... @@ -357,7 +364,8 @@ final public class MtasSketchParser extends MtasBasicParser {
357 364 /**
358 365 * Prints the config types.
359 366 *
360   - * @param types the types
  367 + * @param types
  368 + * the types
361 369 * @return the string
362 370 */
363 371 private String printConfigTypes(HashMap<?, MtasParserType> types) {
... ... @@ -372,6 +380,22 @@ final public class MtasSketchParser extends MtasBasicParser {
372 380 return text;
373 381 }
374 382  
  383 + private class MtasSketchParserMappingWord
  384 + extends MtasParserMapping<MtasSketchParserMappingWord> {
  385 + public MtasSketchParserMappingWord() {
  386 + super();
  387 + this.position = SOURCE_OWN;
  388 + this.realOffset = SOURCE_OWN;
  389 + this.offset = SOURCE_OWN;
  390 + this.type = MAPPING_TYPE_WORD;
  391 + }
  392 +
  393 + @Override
  394 + protected MtasSketchParserMappingWord self() {
  395 + return this;
  396 + }
  397 + }
  398 +
375 399 /**
376 400 * The Class MtasSketchParserMappingWordAnnotation.
377 401 */
... ...
src/mtas/analysis/token/MtasTokenCollection.java
1 1 package mtas.analysis.token;
2 2  
  3 +import java.io.IOException;
3 4 import java.util.ArrayList;
4 5 import java.util.Arrays;
5 6 import java.util.Collections;
... ... @@ -91,7 +92,7 @@ public class MtasTokenCollection {
91 92 Iterator<MtasToken<?>> it = this.iterator();
92 93 while (it.hasNext()) {
93 94 MtasToken<?> token = it.next();
94   - System.out.println(token);
  95 + System.out.println(token);
95 96 }
96 97 }
97 98  
... ... @@ -194,9 +195,11 @@ public class MtasTokenCollection {
194 195 trash.add(i);
195 196 } else if ((token.getPositionStart() == null)
196 197 || (token.getPositionEnd() == null)) {
197   - trash.add(i);
  198 + trash.add(i);
198 199 } else if (token.getValue() == null || (token.getValue().equals(""))) {
199 200 trash.add(i);
  201 + } else if (token.getPrefix() == null || (token.getPrefix().equals(""))) {
  202 + trash.add(i);
200 203 }
201 204 }
202 205 // check parentId
... ... @@ -277,6 +280,9 @@ public class MtasTokenCollection {
277 280 } else if (token.getValue() == null || (token.getValue().equals(""))) {
278 281 throw new MtasParserException(
279 282 "no value for token with id " + token.getId());
  283 + } else if (token.getPrefix() == null || (token.getPrefix().equals(""))) {
  284 + throw new MtasParserException(
  285 + "no prefix for token with id " + token.getId());
280 286 } else if ((token.getParentId() != null)
281 287 && !tokenCollection.containsKey(token.getParentId())) {
282 288 throw new MtasParserException(
... ...
src/mtas/codec/MtasCodecPostingsFormat.java
... ... @@ -23,10 +23,10 @@ public class MtasCodecPostingsFormat extends PostingsFormat {
23 23  
24 24 /** The Constant VERSION_START. */
25 25 public static final int VERSION_START = 1;
26   -
  26 +
27 27 /** The Constant VERSION_OLD_1. */
28 28 public static final int VERSION_OLD_1 = 1;
29   -
  29 +
30 30 /** The Constant VERSION_OLD_2. */
31 31 public static final int VERSION_OLD_2 = 2;
32 32  
... ... @@ -50,7 +50,7 @@ public class MtasCodecPostingsFormat extends PostingsFormat {
50 50  
51 51 /** The Constant MTAS_OBJECT_HAS_PAYLOAD. */
52 52 static final int MTAS_OBJECT_HAS_PAYLOAD = 32;
53   -
  53 +
54 54 /** The Constant MTAS_STORAGE_BYTE. */
55 55 public static final int MTAS_STORAGE_BYTE = 0;
56 56  
... ... @@ -136,7 +136,8 @@ public class MtasCodecPostingsFormat extends PostingsFormat {
136 136 /**
137 137 * Instantiates a new mtas codec postings format.
138 138 *
139   - * @param delegate the delegate
  139 + * @param delegate
  140 + * the delegate
140 141 */
141 142 public MtasCodecPostingsFormat(PostingsFormat delegate) {
142 143 super(MtasCodec.MTAS_CODEC_NAME);
... ... @@ -160,7 +161,8 @@ public class MtasCodecPostingsFormat extends PostingsFormat {
160 161 /**
161 162 * Instantiates a new mtas codec postings format.
162 163 *
163   - * @param codecName the codec name
  164 + * @param codecName
  165 + * the codec name
164 166 */
165 167 public MtasCodecPostingsFormat(String codecName) {
166 168 super(codecName);
... ... @@ -218,66 +220,68 @@ public class MtasCodecPostingsFormat extends PostingsFormat {
218 220 /**
219 221 * Gets the token.
220 222 *
221   - * @param inObject the in object
222   - * @param inTerm the in term
223   - * @param ref the ref
  223 + * @param inObject
  224 + * the in object
  225 + * @param inTerm
  226 + * the in term
  227 + * @param ref
  228 + * the ref
224 229 * @return the token
  230 + * @throws IOException
225 231 */
226 232 public static MtasToken<String> getToken(IndexInput inObject,
227   - IndexInput inTerm, Long ref) {
  233 + IndexInput inTerm, Long ref) throws IOException {
228 234 MtasToken<String> token = null;
229   - try {
230   - inObject.seek(ref);
231   - token = new MtasTokenString("");
232   - token.setId(inObject.readVInt());
233   - token.setTokenRef(ref);
234   - int objectFlags = inObject.readVInt();
235   - TreeSet<Integer> positions = new TreeSet<Integer>();
236   - if ((objectFlags & MTAS_OBJECT_HAS_PARENT) == MTAS_OBJECT_HAS_PARENT) {
237   - int parentId = inObject.readVInt();
238   - token.setParentId(parentId);
239   - }
240   - if ((objectFlags & MTAS_OBJECT_HAS_POSITION_RANGE) == MTAS_OBJECT_HAS_POSITION_RANGE) {
241   - int positionStart = inObject.readVInt();
242   - int positionEnd = positionStart + inObject.readVInt();
243   - token.addPositionRange(positionStart, positionEnd);
244   - } else if ((objectFlags & MTAS_OBJECT_HAS_POSITION_SET) == MTAS_OBJECT_HAS_POSITION_SET) {
245   - int size = inObject.readVInt();
246   - int tmpPrevious = 0;
247   - for (int t = 0; t < size; t++) {
248   - int position = tmpPrevious + inObject.readVInt();
249   - tmpPrevious = position;
250   - positions.add(position);
251   - }
252   - token.addPositions(positions);
253   - } else {
254   - int position = inObject.readVInt();
255   - token.addPosition(position);
256   - }
257   - if ((objectFlags & MTAS_OBJECT_HAS_OFFSET) == MTAS_OBJECT_HAS_OFFSET) {
258   - int offsetStart = inObject.readVInt();
259   - int offsetEnd = offsetStart + inObject.readVInt();
260   - token.setOffset(offsetStart, offsetEnd);
261   - }
262   - if ((objectFlags & MTAS_OBJECT_HAS_REALOFFSET) == MTAS_OBJECT_HAS_REALOFFSET) {
263   - int realOffsetStart = inObject.readVInt();
264   - int realOffsetEnd = realOffsetStart + inObject.readVInt();
265   - token.setRealOffset(realOffsetStart, realOffsetEnd);
266   - }
267   - if ((objectFlags & MTAS_OBJECT_HAS_PAYLOAD) == MTAS_OBJECT_HAS_PAYLOAD) {
268   - int length = inObject.readVInt();
269   - byte[] mtasPayload = new byte[length];
270   - inObject.readBytes(mtasPayload, 0, length);
271   - token.setPayload(new BytesRef(mtasPayload));
  235 + inObject.seek(ref);
  236 + token = new MtasTokenString("");
  237 + token.setId(inObject.readVInt());
  238 + token.setTokenRef(ref);
  239 + int objectFlags = inObject.readVInt();
  240 + TreeSet<Integer> positions = new TreeSet<Integer>();
  241 + if ((objectFlags & MTAS_OBJECT_HAS_PARENT) == MTAS_OBJECT_HAS_PARENT) {
  242 + int parentId = inObject.readVInt();
  243 + token.setParentId(parentId);
  244 + }
  245 + if ((objectFlags
  246 + & MTAS_OBJECT_HAS_POSITION_RANGE) == MTAS_OBJECT_HAS_POSITION_RANGE) {
  247 + int positionStart = inObject.readVInt();
  248 + int positionEnd = positionStart + inObject.readVInt();
  249 + token.addPositionRange(positionStart, positionEnd);
  250 + } else if ((objectFlags
  251 + & MTAS_OBJECT_HAS_POSITION_SET) == MTAS_OBJECT_HAS_POSITION_SET) {
  252 + int size = inObject.readVInt();
  253 + int tmpPrevious = 0;
  254 + for (int t = 0; t < size; t++) {
  255 + int position = tmpPrevious + inObject.readVInt();
  256 + tmpPrevious = position;
  257 + positions.add(position);
272 258 }
273   - Long termRef = inObject.readVLong();
274   - inTerm.seek(termRef);
275   - token.setTermRef(termRef);
276   - token.setValue(inTerm.readString());
277   - } catch (IOException e) {
278   - e.printStackTrace();
279   - return null;
  259 + token.addPositions(positions);
  260 + } else {
  261 + int position = inObject.readVInt();
  262 + token.addPosition(position);
  263 + }
  264 + if ((objectFlags & MTAS_OBJECT_HAS_OFFSET) == MTAS_OBJECT_HAS_OFFSET) {
  265 + int offsetStart = inObject.readVInt();
  266 + int offsetEnd = offsetStart + inObject.readVInt();
  267 + token.setOffset(offsetStart, offsetEnd);
  268 + }
  269 + if ((objectFlags
  270 + & MTAS_OBJECT_HAS_REALOFFSET) == MTAS_OBJECT_HAS_REALOFFSET) {
  271 + int realOffsetStart = inObject.readVInt();
  272 + int realOffsetEnd = realOffsetStart + inObject.readVInt();
  273 + token.setRealOffset(realOffsetStart, realOffsetEnd);
  274 + }
  275 + if ((objectFlags & MTAS_OBJECT_HAS_PAYLOAD) == MTAS_OBJECT_HAS_PAYLOAD) {
  276 + int length = inObject.readVInt();
  277 + byte[] mtasPayload = new byte[length];
  278 + inObject.readBytes(mtasPayload, 0, length);
  279 + token.setPayload(new BytesRef(mtasPayload));
280 280 }
  281 + Long termRef = inObject.readVLong();
  282 + inTerm.seek(termRef);
  283 + token.setTermRef(termRef);
  284 + token.setValue(inTerm.readString());
281 285 return token;
282 286 }
283 287  
... ...
src/mtas/codec/MtasFieldsConsumer.java
1 1 package mtas.codec;
2 2  
  3 +import java.io.Closeable;
3 4 import java.io.EOFException;
4 5 import java.io.IOException;
5 6 import java.util.ArrayList;
... ... @@ -28,14 +29,19 @@ import org.apache.lucene.index.FieldInfo;
28 29 import org.apache.lucene.index.FieldInfos;
29 30 import org.apache.lucene.index.Fields;
30 31 import org.apache.lucene.index.IndexFileNames;
  32 +import org.apache.lucene.index.MergeState;
31 33 import org.apache.lucene.index.PostingsEnum;
32 34 import org.apache.lucene.index.SegmentWriteState;
33 35 import org.apache.lucene.index.Terms;
34 36 import org.apache.lucene.index.TermsEnum;
35 37 import org.apache.lucene.search.DocIdSetIterator;
  38 +import org.apache.lucene.store.IOContext;
36 39 import org.apache.lucene.store.IndexInput;
37 40 import org.apache.lucene.store.IndexOutput;
  41 +import org.apache.lucene.store.Lock;
38 42 import org.apache.lucene.util.BytesRef;
  43 +import org.apache.lucene.util.IOUtils;
  44 +import org.apache.solr.update.processor.LogUpdateProcessorFactory;
39 45  
40 46 /**
41 47 * The Class MtasFieldsConsumer.
... ... @@ -440,7 +446,7 @@ public class MtasFieldsConsumer extends FieldsConsumer {
440 446 prefixIdIndex.put(field, new HashMap<String, Integer>());
441 447 }
442 448 if (!prefixReferenceIndex.get(field).containsKey(prefix)) {
443   - int id = prefixReferenceIndex.get(field).size();
  449 + int id = 1 + prefixReferenceIndex.get(field).size();
444 450 prefixReferenceIndex.get(field).put(prefix, outPrefix.getFilePointer());
445 451 prefixIdIndex.get(field).put(prefix, id);
446 452 outPrefix.writeString(prefix);
... ... @@ -583,6 +589,11 @@ public class MtasFieldsConsumer extends FieldsConsumer {
583 589 return text;
584 590 }
585 591  
  592 + @Override
  593 + public void merge(MergeState mergeState) throws IOException {
  594 + delegateFieldsConsumer.merge(mergeState);
  595 + }
  596 +
586 597 /*
587 598 * (non-Javadoc)
588 599 *
... ... @@ -610,6 +621,7 @@ public class MtasFieldsConsumer extends FieldsConsumer {
610 621 outIndexObjectPosition, outIndexObjectParent, outTerm, outObject,
611 622 outPrefix;
612 623 IndexOutput outTmpDoc, outTmpField;
  624 + ArrayList<Closeable> closeables = new ArrayList<Closeable>();
613 625  
614 626 // temporary temporary index in memory for doc
615 627 TreeMap<Integer, Long> memoryIndexTemporaryObject = new TreeMap<Integer, Long>();
... ... @@ -618,547 +630,574 @@ public class MtasFieldsConsumer extends FieldsConsumer {
618 630 // list of objectIds and references to objects
619 631 TreeMap<Integer, Long> memoryIndexDocList = new TreeMap<Integer, Long>();
620 632  
621   - // create file tmpDoc
622   - outTmpDoc = state.directory.createOutput(mtasTmpDocFileName, state.context);
623   - // create file tmpField
624   - outTmpField = state.directory.createOutput(mtasTmpFieldFileName,
625   - state.context);
626   - // create file indexDoc
627   - outDoc = state.directory.createOutput(mtasDocFileName, state.context);
628   - CodecUtil.writeIndexHeader(outDoc, name,
629   - MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
630   - state.segmentSuffix);
631   - outDoc.writeString(delegatePostingsFormatName);
632   - // create file indexDocId
633   - outIndexDocId = state.directory.createOutput(mtasIndexDocIdFileName,
634   - state.context);
635   - CodecUtil.writeIndexHeader(outIndexDocId, name,
636   - MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
637   - state.segmentSuffix);
638   - outIndexDocId.writeString(delegatePostingsFormatName);
639   - // create file indexObjectId
640   - outIndexObjectId = state.directory.createOutput(mtasIndexObjectIdFileName,
641   - state.context);
642   - CodecUtil.writeIndexHeader(outIndexObjectId, name,
643   - MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
644   - state.segmentSuffix);
645   - outIndexObjectId.writeString(delegatePostingsFormatName);
646   - // create file indexObjectPosition
647   - outIndexObjectPosition = state.directory
648   - .createOutput(mtasIndexObjectPositionFileName, state.context);
649   - CodecUtil.writeIndexHeader(outIndexObjectPosition, name,
650   - MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
651   - state.segmentSuffix);
652   - outIndexObjectPosition.writeString(delegatePostingsFormatName);
653   - // create file indexObjectParent
654   - outIndexObjectParent = state.directory
655   - .createOutput(mtasIndexObjectParentFileName, state.context);
656   - CodecUtil.writeIndexHeader(outIndexObjectParent, name,
657   - MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
658   - state.segmentSuffix);
659   - outIndexObjectParent.writeString(delegatePostingsFormatName);
660   - // create file term
661   - outTerm = state.directory.createOutput(mtasTermFileName, state.context);
662   - CodecUtil.writeIndexHeader(outTerm, name,
663   - MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
664   - state.segmentSuffix);
665   - outTerm.writeString(delegatePostingsFormatName);
666   - // create file prefix
667   - outPrefix = state.directory.createOutput(mtasPrefixFileName, state.context);
668   - CodecUtil.writeIndexHeader(outPrefix, name,
669   - MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
670   - state.segmentSuffix);
671   - outPrefix.writeString(delegatePostingsFormatName);
672   - // create file object
673   - outObject = state.directory.createOutput(mtasObjectFileName, state.context);
674   - CodecUtil.writeIndexHeader(outObject, name,
675   - MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
676   - state.segmentSuffix);
677   - outObject.writeString(delegatePostingsFormatName);
678   - // For each field
679   - for (String field : fields) {
680   - Terms terms = fields.terms(field);
681   - if (terms == null) {
682   - continue;
683   - } else {
684   - // new temporary object storage for this field
685   - IndexOutput outTmpObject = state.directory
686   - .createOutput(mtasTmpObjectFileName, state.context);
687   - // new temporary index docs for this field
688   - IndexOutput outTmpDocs = state.directory
689   - .createOutput(mtasTmpDocsFileName, state.context);
690   - // get fieldInfo
691   - FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
692   - // get properties terms
693   - boolean hasPositions = terms.hasPositions();
694   - boolean hasFreqs = terms.hasFreqs();
695   - boolean hasPayloads = fieldInfo.hasPayloads();
696   - boolean hasOffsets = terms.hasOffsets();
697   - // register references
698   - Long smallestTermFilepointer = outTerm.getFilePointer();
699   - Long smallestPrefixFilepointer = outTerm.getFilePointer();
700   - int termCounter = 0;
701   - // only if freqs, positions and payload available
702   - if (hasFreqs && hasPositions && hasPayloads) {
703   - // compute flags
704   - int flags = PostingsEnum.POSITIONS | PostingsEnum.PAYLOADS;
705   - if (hasOffsets) {
706   - flags = flags | PostingsEnum.OFFSETS;
707   - }
708   - // get terms
709   - TermsEnum termsEnum = terms.iterator();
710   - PostingsEnum postingsEnum = null;
711   - // for each term in field
712   - while (true) {
713   - BytesRef term = termsEnum.next();
714   - if (term == null) {
715   - break;
  633 + try {
  634 + // create file tmpDoc
  635 + closeables.add(outTmpDoc = state.directory
  636 + .createOutput(mtasTmpDocFileName, state.context));
  637 + // create file tmpField
  638 + closeables.add(outTmpField = state.directory
  639 + .createOutput(mtasTmpFieldFileName, state.context));
  640 + // create file indexDoc
  641 + closeables.add(outDoc = state.directory.createOutput(mtasDocFileName,
  642 + state.context));
  643 + CodecUtil.writeIndexHeader(outDoc, name,
  644 + MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  645 + state.segmentSuffix);
  646 + outDoc.writeString(delegatePostingsFormatName);
  647 + // create file indexDocId
  648 + closeables.add(outIndexDocId = state.directory
  649 + .createOutput(mtasIndexDocIdFileName, state.context));
  650 + CodecUtil.writeIndexHeader(outIndexDocId, name,
  651 + MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  652 + state.segmentSuffix);
  653 + outIndexDocId.writeString(delegatePostingsFormatName);
  654 + // create file indexObjectId
  655 + closeables.add(outIndexObjectId = state.directory
  656 + .createOutput(mtasIndexObjectIdFileName, state.context));
  657 + CodecUtil.writeIndexHeader(outIndexObjectId, name,
  658 + MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  659 + state.segmentSuffix);
  660 + outIndexObjectId.writeString(delegatePostingsFormatName);
  661 + // create file indexObjectPosition
  662 + closeables.add(outIndexObjectPosition = state.directory
  663 + .createOutput(mtasIndexObjectPositionFileName, state.context));
  664 + CodecUtil.writeIndexHeader(outIndexObjectPosition, name,
  665 + MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  666 + state.segmentSuffix);
  667 + outIndexObjectPosition.writeString(delegatePostingsFormatName);
  668 + // create file indexObjectParent
  669 + closeables.add(outIndexObjectParent = state.directory
  670 + .createOutput(mtasIndexObjectParentFileName, state.context));
  671 + CodecUtil.writeIndexHeader(outIndexObjectParent, name,
  672 + MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  673 + state.segmentSuffix);
  674 + outIndexObjectParent.writeString(delegatePostingsFormatName);
  675 + // create file term
  676 + closeables.add(outTerm = state.directory.createOutput(mtasTermFileName,
  677 + state.context));
  678 + CodecUtil.writeIndexHeader(outTerm, name,
  679 + MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  680 + state.segmentSuffix);
  681 + outTerm.writeString(delegatePostingsFormatName);
  682 + // create file prefix
  683 + closeables.add(outPrefix = state.directory
  684 + .createOutput(mtasPrefixFileName, state.context));
  685 + CodecUtil.writeIndexHeader(outPrefix, name,
  686 + MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  687 + state.segmentSuffix);
  688 + outPrefix.writeString(delegatePostingsFormatName);
  689 + // create file object
  690 + closeables.add(outObject = state.directory
  691 + .createOutput(mtasObjectFileName, state.context));
  692 + CodecUtil.writeIndexHeader(outObject, name,
  693 + MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  694 + state.segmentSuffix);
  695 + outObject.writeString(delegatePostingsFormatName);
  696 + // For each field
  697 + for (String field : fields) {
  698 + Terms terms = fields.terms(field);
  699 + if (terms == null) {
  700 + continue;
  701 + } else {
  702 + // new temporary object storage for this field
  703 + IndexOutput outTmpObject = state.directory
  704 + .createOutput(mtasTmpObjectFileName, state.context);
  705 + closeables.add(outTmpObject);
  706 + // new temporary index docs for this field
  707 + IndexOutput outTmpDocs = state.directory
  708 + .createOutput(mtasTmpDocsFileName, state.context);
  709 + closeables.add(outTmpDocs);
  710 + // get fieldInfo
  711 + FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
  712 + // get properties terms
  713 + boolean hasPositions = terms.hasPositions();
  714 + boolean hasFreqs = terms.hasFreqs();
  715 + boolean hasPayloads = fieldInfo.hasPayloads();
  716 + boolean hasOffsets = terms.hasOffsets();
  717 + // register references
  718 + Long smallestTermFilepointer = outTerm.getFilePointer();
  719 + Long smallestPrefixFilepointer = outTerm.getFilePointer();
  720 + int termCounter = 0;
  721 + // only if freqs, positions and payload available
  722 + if (hasFreqs && hasPositions && hasPayloads) {
  723 + // compute flags
  724 + int flags = PostingsEnum.POSITIONS | PostingsEnum.PAYLOADS;
  725 + if (hasOffsets) {
  726 + flags = flags | PostingsEnum.OFFSETS;
716 727 }
717   - // store term and get ref
718   - Long termRef = outTerm.getFilePointer();
719   - outTerm.writeString(term.utf8ToString());
720   - termCounter++;
721   - // get postings
722   - postingsEnum = termsEnum.postings(postingsEnum, flags);
723   - // for each doc in field+term
  728 + // get terms
  729 + TermsEnum termsEnum = terms.iterator();
  730 + PostingsEnum postingsEnum = null;
  731 + // for each term in field
724 732 while (true) {
725   - Integer doc = postingsEnum.nextDoc();
726   - if (doc.equals(DocIdSetIterator.NO_MORE_DOCS)) {
  733 + BytesRef term = termsEnum.next();
  734 + if (term == null) {
727 735 break;
728 736 }
729   - int freq = postingsEnum.freq();
730   - // temporary storage objects and temporary index in memory for
731   - // doc
732   - memoryIndexTemporaryObject.clear();
733   - Long offsetFilePointerTmpObject = outTmpObject.getFilePointer();
734   - for (int i = 0; i < freq; i++) {
735   - Long currentFilePointerTmpObject = outTmpObject
736   - .getFilePointer();
737   - Integer mtasId;
738   - int position = postingsEnum.nextPosition();
739   - BytesRef payload = postingsEnum.getPayload();
740   - if (hasOffsets) {
741   - mtasId = createObjectAndRegisterPrefix(field, outTmpObject,
742   - term, termRef, position, payload,
743   - postingsEnum.startOffset(), postingsEnum.endOffset(),
744   - outPrefix);
745   - } else {
746   - mtasId = createObjectAndRegisterPrefix(field, outTmpObject,
747   - term, termRef, position, payload, outPrefix);
748   - }
749   - if (mtasId != null) {
750   - assert !memoryIndexTemporaryObject.containsKey(
751   - mtasId) : "mtasId should be unique in this selection";
752   - memoryIndexTemporaryObject.put(mtasId,
753   - currentFilePointerTmpObject);
  737 + // store term and get ref
  738 + Long termRef = outTerm.getFilePointer();
  739 + outTerm.writeString(term.utf8ToString());
  740 + termCounter++;
  741 + // get postings
  742 + postingsEnum = termsEnum.postings(postingsEnum, flags);
  743 + // for each doc in field+term
  744 + while (true) {
  745 + Integer doc = postingsEnum.nextDoc();
  746 + if (doc.equals(DocIdSetIterator.NO_MORE_DOCS)) {
  747 + break;
754 748 }
755   - } // end loop positions
756   - // store temporary index for this doc
757   - if (memoryIndexTemporaryObject.size() > 0) {
758   - // docId for this part
759   - outTmpDocs.writeVInt(doc);
760   - // number of objects/tokens in this part
761   - outTmpDocs.writeVInt(memoryIndexTemporaryObject.size());
762   - // offset to be used for references
763   - outTmpDocs.writeVLong(offsetFilePointerTmpObject);
764   - // loop over tokens
765   - for (Entry<Integer, Long> entry : memoryIndexTemporaryObject
766   - .entrySet()) {
767   - // mtasId object
768   - outTmpDocs.writeVInt(entry.getKey());
769   - // reference object
770   - outTmpDocs.writeVLong(
771   - (entry.getValue() - offsetFilePointerTmpObject));
  749 + int freq = postingsEnum.freq();
  750 + // temporary storage objects and temporary index in memory for
  751 + // doc
  752 + memoryIndexTemporaryObject.clear();
  753 + Long offsetFilePointerTmpObject = outTmpObject.getFilePointer();
  754 + for (int i = 0; i < freq; i++) {
  755 + Long currentFilePointerTmpObject = outTmpObject
  756 + .getFilePointer();
  757 + Integer mtasId;
  758 + int position = postingsEnum.nextPosition();
  759 + BytesRef payload = postingsEnum.getPayload();
  760 + if (hasOffsets) {
  761 + mtasId = createObjectAndRegisterPrefix(field, outTmpObject,
  762 + term, termRef, position, payload,
  763 + postingsEnum.startOffset(), postingsEnum.endOffset(),
  764 + outPrefix);
  765 + } else {
  766 + mtasId = createObjectAndRegisterPrefix(field, outTmpObject,
  767 + term, termRef, position, payload, outPrefix);
  768 + }
  769 + if (mtasId != null) {
  770 + assert !memoryIndexTemporaryObject.containsKey(
  771 + mtasId) : "mtasId should be unique in this selection";
  772 + memoryIndexTemporaryObject.put(mtasId,
  773 + currentFilePointerTmpObject);
  774 + }
  775 + } // end loop positions
  776 + // store temporary index for this doc
  777 + if (memoryIndexTemporaryObject.size() > 0) {
  778 + // docId for this part
  779 + outTmpDocs.writeVInt(doc);
  780 + // number of objects/tokens in this part
  781 + outTmpDocs.writeVInt(memoryIndexTemporaryObject.size());
  782 + // offset to be used for references
  783 + outTmpDocs.writeVLong(offsetFilePointerTmpObject);
  784 + // loop over tokens
  785 + for (Entry<Integer, Long> entry : memoryIndexTemporaryObject
  786 + .entrySet()) {
  787 + // mtasId object
  788 + outTmpDocs.writeVInt(entry.getKey());
  789 + // reference object
  790 + outTmpDocs.writeVLong(
  791 + (entry.getValue() - offsetFilePointerTmpObject));
  792 + }
772 793 }
773   - }
774   - // clean up
775   - memoryIndexTemporaryObject.clear();
776   - } // end loop docs
777   - } // end loop terms
778   - // set fieldInfo
779   - fieldInfos.fieldInfo(field).putAttribute(
780   - MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION,
781   - getPrefixStatsSinglePositionPrefixAttribute(field));
782   - fieldInfos.fieldInfo(field).putAttribute(
783   - MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_MULTIPLE_POSITION,
784   - getPrefixStatsMultiplePositionPrefixAttribute(field));
785   - fieldInfos.fieldInfo(field).putAttribute(
786   - MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SET_POSITION,
787   - getPrefixStatsSetPositionPrefixAttribute(field));
  794 + // clean up
  795 + memoryIndexTemporaryObject.clear();
  796 + } // end loop docs
  797 + } // end loop terms
  798 + // set fieldInfo
  799 + fieldInfos.fieldInfo(field).putAttribute(
  800 + MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION,
  801 + getPrefixStatsSinglePositionPrefixAttribute(field));
  802 + fieldInfos.fieldInfo(field).putAttribute(
  803 + MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_MULTIPLE_POSITION,
  804 + getPrefixStatsMultiplePositionPrefixAttribute(field));
  805 + fieldInfos.fieldInfo(field).putAttribute(
  806 + MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SET_POSITION,
  807 + getPrefixStatsSetPositionPrefixAttribute(field));
788 808  
789   - } // end processing field with freqs, positions and payload
790   - // close temporary object storage and index docs
791   - outTmpObject.close();
792   - outTmpDocs.close();
  809 + } // end processing field with freqs, positions and payload
  810 + // close temporary object storage and index docs
  811 + outTmpObject.close();
  812 + outTmpDocs.close();
793 813  
794   - // create (backwards) chained new temporary index docs
795   - IndexInput inTmpDocs = state.directory.openInput(mtasTmpDocsFileName,
796   - state.context);
797   - IndexOutput outTmpDocsChained = state.directory
798   - .createOutput(mtasTmpDocsChainedFileName, state.context);
799   - memoryTmpDocChainList.clear();
800   - while (true) {
801   - try {
802   - Long currentFilepointer = outTmpDocsChained.getFilePointer();
803   - // copy docId
804   - int docId = inTmpDocs.readVInt();
805   - outTmpDocsChained.writeVInt(docId);
806   - // copy size
807   - int size = inTmpDocs.readVInt();
808   - outTmpDocsChained.writeVInt(size);
809   - // offset references
810   - outTmpDocsChained.writeVLong(inTmpDocs.readVLong());
811   - for (int t = 0; t < size; t++) {
812   - outTmpDocsChained.writeVInt(inTmpDocs.readVInt());
  814 + // create (backwards) chained new temporary index docs
  815 + IndexInput inTmpDocs = state.directory.openInput(mtasTmpDocsFileName,
  816 + state.context);
  817 + closeables.add(inTmpDocs);
  818 + IndexOutput outTmpDocsChained = state.directory
  819 + .createOutput(mtasTmpDocsChainedFileName, state.context);
  820 + closeables.add(outTmpDocsChained);
  821 + memoryTmpDocChainList.clear();
  822 + while (true) {
  823 + try {
  824 + Long currentFilepointer = outTmpDocsChained.getFilePointer();
  825 + // copy docId
  826 + int docId = inTmpDocs.readVInt();
  827 + outTmpDocsChained.writeVInt(docId);
  828 + // copy size
  829 + int size = inTmpDocs.readVInt();
  830 + outTmpDocsChained.writeVInt(size);
  831 + // offset references
813 832 outTmpDocsChained.writeVLong(inTmpDocs.readVLong());
814   - }
815   - // set back reference to part with same docId
816   - if (memoryTmpDocChainList.containsKey(docId)) {
817   - // reference to previous
818   - outTmpDocsChained.writeVLong(memoryTmpDocChainList.get(docId));
819   - } else {
820   - // self reference indicates end of chain
821   - outTmpDocsChained.writeVLong(currentFilepointer);
822   - }
823   - // update temporary index in memory
824   - memoryTmpDocChainList.put(docId, currentFilepointer);
825   - } catch (IOException ex) {
826   - break;
827   - }
828   - }
829   - outTmpDocsChained.close();
830   - inTmpDocs.close();
831   - state.directory.deleteFile(mtasTmpDocsFileName);
832   -
833   - // set reference to tmpDoc in Field
834   - if (memoryTmpDocChainList.size() > 0) {
835   - outTmpField.writeString(field);
836   - outTmpField.writeVLong(outTmpDoc.getFilePointer());
837   - outTmpField.writeVInt(memoryTmpDocChainList.size());
838   - outTmpField.writeVLong(smallestTermFilepointer);
839   - outTmpField.writeVInt(termCounter);
840   - outTmpField.writeVLong(smallestPrefixFilepointer);
841   - outTmpField.writeVInt(prefixReferenceIndex.get(field).size());
842   - // fill indexDoc
843   - IndexInput inTmpDocsChained = state.directory
844   - .openInput(mtasTmpDocsChainedFileName, state.context);
845   - IndexInput inTmpObject = state.directory
846   - .openInput(mtasTmpObjectFileName, state.context);
847   - for (Entry<Integer, Long> entry : memoryTmpDocChainList.entrySet()) {
848   - Integer docId = entry.getKey();
849   - Long currentFilePointer, newFilePointer;
850   - // list of objectIds and references to objects
851   - memoryIndexDocList.clear();
852   - // construct final object + indexObjectId for docId
853   - currentFilePointer = entry.getValue();
854   - // collect objects for document
855   - tokenStatsMinPos = null;
856   - tokenStatsMaxPos = null;
857   - tokenStatsNumber = 0;
858   - while (true) {
859   - inTmpDocsChained.seek(currentFilePointer);
860   - Integer docIdPart = inTmpDocsChained.readVInt();
861   - assert docIdPart.equals(
862   - docId) : "conflicting docId in reference to temporaryIndexDocsChained";
863   - // number of objects/tokens in part
864   - int size = inTmpDocsChained.readVInt();
865   - long offsetFilePointerTmpObject = inTmpDocsChained.readVLong();
866   - assert size > 0 : "number of objects/tokens in part cannot be "
867   - + size;
868 833 for (int t = 0; t < size; t++) {
869   - int mtasId = inTmpDocsChained.readVInt();
870   - Long tmpObjectRef = inTmpDocsChained.readVLong()
871   - + offsetFilePointerTmpObject;
872   - assert !memoryIndexDocList.containsKey(
873   - mtasId) : "mtasId should be unique in this selection";
874   - // initially, store ref to tmpObject
875   - memoryIndexDocList.put(mtasId, tmpObjectRef);
  834 + outTmpDocsChained.writeVInt(inTmpDocs.readVInt());
  835 + outTmpDocsChained.writeVLong(inTmpDocs.readVLong());
876 836 }
877   - // reference to next part
878   - newFilePointer = inTmpDocsChained.readVLong();
879   - if (newFilePointer.equals(currentFilePointer)) {
880   - break; // end of chained parts
  837 + // set back reference to part with same docId
  838 + if (memoryTmpDocChainList.containsKey(docId)) {
  839 + // reference to previous
  840 + outTmpDocsChained.writeVLong(memoryTmpDocChainList.get(docId));
881 841 } else {
882   - currentFilePointer = newFilePointer;
  842 + // self reference indicates end of chain
  843 + outTmpDocsChained.writeVLong(currentFilepointer);
883 844 }
  845 + // update temporary index in memory
  846 + memoryTmpDocChainList.put(docId, currentFilepointer);
  847 + } catch (IOException ex) {
  848 + break;
884 849 }
885   - // now create new objects, sorted by mtasId
886   - Long smallestObjectFilepointer = outObject.getFilePointer();
887   - for (Entry<Integer, Long> objectEntry : memoryIndexDocList
  850 + }
  851 + outTmpDocsChained.close();
  852 + inTmpDocs.close();
  853 + state.directory.deleteFile(mtasTmpDocsFileName);
  854 +
  855 + // set reference to tmpDoc in Field
  856 + if (memoryTmpDocChainList.size() > 0) {
  857 + outTmpField.writeString(field);
  858 + outTmpField.writeVLong(outTmpDoc.getFilePointer());
  859 + outTmpField.writeVInt(memoryTmpDocChainList.size());
  860 + outTmpField.writeVLong(smallestTermFilepointer);
  861 + outTmpField.writeVInt(termCounter);
  862 + outTmpField.writeVLong(smallestPrefixFilepointer);
  863 + outTmpField.writeVInt(prefixReferenceIndex.get(field).size());
  864 + // fill indexDoc
  865 + IndexInput inTmpDocsChained = state.directory
  866 + .openInput(mtasTmpDocsChainedFileName, state.context);
  867 + closeables.add(inTmpDocsChained);
  868 + IndexInput inTmpObject = state.directory
  869 + .openInput(mtasTmpObjectFileName, state.context);
  870 + closeables.add(inTmpObject);
  871 + for (Entry<Integer, Long> entry : memoryTmpDocChainList
888 872 .entrySet()) {
889   - int mtasId = objectEntry.getKey();
890   - Long tmpObjectRef = objectEntry.getValue();
891   - Long objectRef = outObject.getFilePointer();
892   - copyObjectAndUpdateStats(mtasId, inTmpObject, tmpObjectRef,
893   - outObject);
894   - // update with new ref
895   - memoryIndexDocList.put(mtasId, objectRef);
896   - }
897   - // check mtasIds properties
898   - assert memoryIndexDocList.firstKey()
899   - .equals(0) : "first mtasId should not be "
900   - + memoryIndexDocList.firstKey();
901   - assert (1 + memoryIndexDocList.lastKey()
902   - - memoryIndexDocList.firstKey()) == memoryIndexDocList
903   - .size() : "missing mtasId";
904   - assert tokenStatsNumber.equals(memoryIndexDocList
905   - .size()) : "incorrect number of items in tokenStats";
  873 + Integer docId = entry.getKey();
  874 + Long currentFilePointer, newFilePointer;
  875 + // list of objectIds and references to objects
  876 + memoryIndexDocList.clear();
  877 + // construct final object + indexObjectId for docId
  878 + currentFilePointer = entry.getValue();
  879 + // collect objects for document
  880 + tokenStatsMinPos = null;
  881 + tokenStatsMaxPos = null;
  882 + tokenStatsNumber = 0;
  883 + while (true) {
  884 + inTmpDocsChained.seek(currentFilePointer);
  885 + Integer docIdPart = inTmpDocsChained.readVInt();
  886 + assert docIdPart.equals(
  887 + docId) : "conflicting docId in reference to temporaryIndexDocsChained";
  888 + // number of objects/tokens in part
  889 + int size = inTmpDocsChained.readVInt();
  890 + long offsetFilePointerTmpObject = inTmpDocsChained.readVLong();
  891 + assert size > 0 : "number of objects/tokens in part cannot be "
  892 + + size;
  893 + for (int t = 0; t < size; t++) {
  894 + int mtasId = inTmpDocsChained.readVInt();
  895 + Long tmpObjectRef = inTmpDocsChained.readVLong()
  896 + + offsetFilePointerTmpObject;
  897 + assert !memoryIndexDocList.containsKey(
  898 + mtasId) : "mtasId should be unique in this selection";
  899 + // initially, store ref to tmpObject
  900 + memoryIndexDocList.put(mtasId, tmpObjectRef);
  901 + }
  902 + // reference to next part
  903 + newFilePointer = inTmpDocsChained.readVLong();
  904 + if (newFilePointer.equals(currentFilePointer)) {
  905 + break; // end of chained parts
  906 + } else {
  907 + currentFilePointer = newFilePointer;
  908 + }
  909 + }
  910 + // now create new objects, sorted by mtasId
  911 + Long smallestObjectFilepointer = outObject.getFilePointer();
  912 + for (Entry<Integer, Long> objectEntry : memoryIndexDocList
  913 + .entrySet()) {
  914 + int mtasId = objectEntry.getKey();
  915 + Long tmpObjectRef = objectEntry.getValue();
  916 + Long objectRef = outObject.getFilePointer();
  917 + copyObjectAndUpdateStats(mtasId, inTmpObject, tmpObjectRef,
  918 + outObject);
  919 + // update with new ref
  920 + memoryIndexDocList.put(mtasId, objectRef);
  921 + }
  922 + // check mtasIds properties
  923 + assert memoryIndexDocList.firstKey()
  924 + .equals(0) : "first mtasId should not be "
  925 + + memoryIndexDocList.firstKey();
  926 + assert (1 + memoryIndexDocList.lastKey()
  927 + - memoryIndexDocList.firstKey()) == memoryIndexDocList
  928 + .size() : "missing mtasId";
  929 + assert tokenStatsNumber.equals(memoryIndexDocList
  930 + .size()) : "incorrect number of items in tokenStats";
906 931  
907   - // store item in tmpDoc
908   - outTmpDoc.writeVInt(docId);
909   - outTmpDoc.writeVLong(outIndexObjectId.getFilePointer());
  932 + // store item in tmpDoc
  933 + outTmpDoc.writeVInt(docId);
  934 + outTmpDoc.writeVLong(outIndexObjectId.getFilePointer());
910 935  
911   - int mtasId = 0;
912   - // compute linear approximation (least squares method, integer
913   - // constants)
914   - long tmpN = memoryIndexDocList.size();
915   - long tmpSumY = 0, tmpSumXY = 0;
916   - long tmpSumX = 0, tmpSumXX = 0;
917   - for (Entry<Integer, Long> objectEntry : memoryIndexDocList
918   - .entrySet()) {
919   - assert objectEntry.getKey().equals(mtasId) : "unexpected mtasId";
920   - tmpSumY += objectEntry.getValue();
921   - tmpSumX += mtasId;
922   - tmpSumXY += mtasId * objectEntry.getValue();
923   - tmpSumXX += mtasId * mtasId;
924   - mtasId++;
925   - }
926   - int objectRefApproxQuotient = (int) (((tmpN * tmpSumXY)
927   - - (tmpSumX * tmpSumY))
928   - / ((tmpN * tmpSumXX) - (tmpSumX * tmpSumX)));
929   - long objectRefApproxOffset = (tmpSumY
930   - - objectRefApproxQuotient * tmpSumX) / tmpN;
931   - Long objectRefApproxCorrection;
932   - long maxAbsObjectRefApproxCorrection = 0;
933   - // compute maximum correction
934   - mtasId = 0;
935   - for (Entry<Integer, Long> objectEntry : memoryIndexDocList
936   - .entrySet()) {
937   - objectRefApproxCorrection = (objectEntry.getValue()
938   - - (objectRefApproxOffset
939   - + (mtasId * objectRefApproxQuotient)));
940   - maxAbsObjectRefApproxCorrection = Math.max(
941   - maxAbsObjectRefApproxCorrection,
942   - Math.abs(objectRefApproxCorrection));
943   - mtasId++;
944   - }
945   - byte storageFlags;
946   - if (maxAbsObjectRefApproxCorrection <= Long.valueOf(Byte.MAX_VALUE)
947   - + 1) {
948   - storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_BYTE;
949   - } else if (maxAbsObjectRefApproxCorrection <= Long
950   - .valueOf(Short.MAX_VALUE) + 1) {
951   - storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_SHORT;
952   - } else if (maxAbsObjectRefApproxCorrection <= Long
953   - .valueOf(Integer.MAX_VALUE) + 1) {
954   - storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_INTEGER;
955   - } else {
956   - storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_LONG;
957   - }
958   - // update indexObjectId with correction on approximated ref (assume
959   - // can be stored as int)
960   - mtasId = 0;
961   - for (Entry<Integer, Long> objectEntry : memoryIndexDocList
962   - .entrySet()) {
963   - objectRefApproxCorrection = (objectEntry.getValue()
964   - - (objectRefApproxOffset
965   - + (mtasId * objectRefApproxQuotient)));
966   - if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_BYTE) {
967   - outIndexObjectId
968   - .writeByte(objectRefApproxCorrection.byteValue());
969   - } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_SHORT) {
970   - outIndexObjectId
971   - .writeShort(objectRefApproxCorrection.shortValue());
972   - } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_INTEGER) {
973   - outIndexObjectId.writeInt(objectRefApproxCorrection.intValue());
  936 + int mtasId = 0;
  937 + // compute linear approximation (least squares method, integer
  938 + // constants)
  939 + long tmpN = memoryIndexDocList.size();
  940 + long tmpSumY = 0, tmpSumXY = 0;
  941 + long tmpSumX = 0, tmpSumXX = 0;
  942 + for (Entry<Integer, Long> objectEntry : memoryIndexDocList
  943 + .entrySet()) {
  944 + assert objectEntry.getKey()
  945 + .equals(mtasId) : "unexpected mtasId";
  946 + tmpSumY += objectEntry.getValue();
  947 + tmpSumX += mtasId;
  948 + tmpSumXY += mtasId * objectEntry.getValue();
  949 + tmpSumXX += mtasId * mtasId;
  950 + mtasId++;
  951 + }
  952 + int objectRefApproxQuotient = (int) (((tmpN * tmpSumXY)
  953 + - (tmpSumX * tmpSumY))
  954 + / ((tmpN * tmpSumXX) - (tmpSumX * tmpSumX)));
  955 + long objectRefApproxOffset = (tmpSumY
  956 + - objectRefApproxQuotient * tmpSumX) / tmpN;
  957 + Long objectRefApproxCorrection;
  958 + long maxAbsObjectRefApproxCorrection = 0;
  959 + // compute maximum correction
  960 + mtasId = 0;
  961 + for (Entry<Integer, Long> objectEntry : memoryIndexDocList
  962 + .entrySet()) {
  963 + objectRefApproxCorrection = (objectEntry.getValue()
  964 + - (objectRefApproxOffset
  965 + + (mtasId * objectRefApproxQuotient)));
  966 + maxAbsObjectRefApproxCorrection = Math.max(
  967 + maxAbsObjectRefApproxCorrection,
  968 + Math.abs(objectRefApproxCorrection));
  969 + mtasId++;
  970 + }
  971 + byte storageFlags;
  972 + if (maxAbsObjectRefApproxCorrection <= Long
  973 + .valueOf(Byte.MAX_VALUE) + 1) {
  974 + storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_BYTE;
  975 + } else if (maxAbsObjectRefApproxCorrection <= Long
  976 + .valueOf(Short.MAX_VALUE) + 1) {
  977 + storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_SHORT;
  978 + } else if (maxAbsObjectRefApproxCorrection <= Long
  979 + .valueOf(Integer.MAX_VALUE) + 1) {
  980 + storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_INTEGER;
974 981 } else {
975   - outIndexObjectId.writeLong(objectRefApproxCorrection);
  982 + storageFlags = MtasCodecPostingsFormat.MTAS_STORAGE_LONG;
976 983 }
977   - mtasId++;
978   - }
979   - outTmpDoc.writeVLong(smallestObjectFilepointer);
980   - outTmpDoc.writeVInt(objectRefApproxQuotient);
981   - outTmpDoc.writeZLong(objectRefApproxOffset);
982   - outTmpDoc.writeByte(storageFlags);
983   - outTmpDoc.writeVInt(tokenStatsNumber);
984   - outTmpDoc.writeVInt(tokenStatsMinPos);
985   - outTmpDoc.writeVInt(tokenStatsMaxPos);
986   - // clean up
987   - memoryIndexDocList.clear();
988   - } // end loop over docs
989   - inTmpDocsChained.close();
990   - inTmpObject.close();
991   - }
992   - // clean up
993   - memoryTmpDocChainList.clear();
994   - // remove temporary files
995   - state.directory.deleteFile(mtasTmpObjectFileName);
996   - state.directory.deleteFile(mtasTmpDocsChainedFileName);
997   - // store references for field
  984 + // update indexObjectId with correction on approximated ref
  985 + // (assume
  986 + // can be stored as int)
  987 + mtasId = 0;
  988 + for (Entry<Integer, Long> objectEntry : memoryIndexDocList
  989 + .entrySet()) {
  990 + objectRefApproxCorrection = (objectEntry.getValue()
  991 + - (objectRefApproxOffset
  992 + + (mtasId * objectRefApproxQuotient)));
  993 + if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_BYTE) {
  994 + outIndexObjectId
  995 + .writeByte(objectRefApproxCorrection.byteValue());
  996 + } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_SHORT) {
  997 + outIndexObjectId
  998 + .writeShort(objectRefApproxCorrection.shortValue());
  999 + } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_INTEGER) {
  1000 + outIndexObjectId
  1001 + .writeInt(objectRefApproxCorrection.intValue());
  1002 + } else {
  1003 + outIndexObjectId.writeLong(objectRefApproxCorrection);
  1004 + }
  1005 + mtasId++;
  1006 + }
  1007 + outTmpDoc.writeVLong(smallestObjectFilepointer);
  1008 + outTmpDoc.writeVInt(objectRefApproxQuotient);
  1009 + outTmpDoc.writeZLong(objectRefApproxOffset);
  1010 + outTmpDoc.writeByte(storageFlags);
  1011 + outTmpDoc.writeVInt(tokenStatsNumber);
  1012 + outTmpDoc.writeVInt(tokenStatsMinPos);
  1013 + outTmpDoc.writeVInt(tokenStatsMaxPos);
  1014 + // clean up
  1015 + memoryIndexDocList.clear();
  1016 + } // end loop over docs
  1017 + inTmpDocsChained.close();
  1018 + inTmpObject.close();
  1019 + }
  1020 + // clean up
  1021 + memoryTmpDocChainList.clear();
  1022 + // remove temporary files
  1023 + state.directory.deleteFile(mtasTmpObjectFileName);
  1024 + state.directory.deleteFile(mtasTmpDocsChainedFileName);
  1025 + // store references for field
998 1026  
999   - } // end processing field
1000   - } // end loop fields
1001   - // close temporary index doc
1002   - outTmpDoc.close();
1003   - // close indexField, indexObjectId and object
1004   - CodecUtil.writeFooter(outTmpField);
1005   - outTmpField.close();
1006   - CodecUtil.writeFooter(outIndexObjectId);
1007   - outIndexObjectId.close();
1008   - CodecUtil.writeFooter(outObject);
1009   - outObject.close();
1010   - CodecUtil.writeFooter(outTerm);
1011   - outTerm.close();
1012   - CodecUtil.writeFooter(outPrefix);
1013   - outPrefix.close();
  1027 + } // end processing field
  1028 + } // end loop fields
  1029 + // close temporary index doc
  1030 + outTmpDoc.close();
  1031 + // close indexField, indexObjectId and object
  1032 + CodecUtil.writeFooter(outTmpField);
  1033 + outTmpField.close();
  1034 + CodecUtil.writeFooter(outIndexObjectId);
  1035 + outIndexObjectId.close();
  1036 + CodecUtil.writeFooter(outObject);
  1037 + outObject.close();
  1038 + CodecUtil.writeFooter(outTerm);
  1039 + outTerm.close();
  1040 + CodecUtil.writeFooter(outPrefix);
  1041 + outPrefix.close();
1014 1042  
1015   - // create final doc, fill indexObjectPosition, indexObjectParent and
1016   - // indexTermPrefixPosition, create final field
1017   - IndexInput inTmpField = state.directory.openInput(mtasTmpFieldFileName,
1018   - state.context);
1019   - IndexInput inTmpDoc = state.directory.openInput(mtasTmpDocFileName,
1020   - state.context);
1021   - IndexInput inObjectId = state.directory.openInput(mtasIndexObjectIdFileName,
1022   - state.context);
1023   - IndexInput inObject = state.directory.openInput(mtasObjectFileName,
1024   - state.context);
1025   - IndexInput inTerm = state.directory.openInput(mtasTermFileName,
1026   - state.context);
1027   - outField = state.directory.createOutput(mtasIndexFieldFileName,
1028   - state.context);
1029   - CodecUtil.writeIndexHeader(outField, name,
1030   - MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
1031   - state.segmentSuffix);
1032   - outField.writeString(delegatePostingsFormatName);
  1043 + // create final doc, fill indexObjectPosition, indexObjectParent and
  1044 + // indexTermPrefixPosition, create final field
  1045 + IndexInput inTmpField = state.directory.openInput(mtasTmpFieldFileName,
  1046 + state.context);
  1047 + closeables.add(inTmpField);
  1048 + IndexInput inTmpDoc = state.directory.openInput(mtasTmpDocFileName,
  1049 + state.context);
  1050 + closeables.add(inTmpDoc);
  1051 + IndexInput inObjectId = state.directory
  1052 + .openInput(mtasIndexObjectIdFileName, state.context);
  1053 + closeables.add(inObjectId);
  1054 + IndexInput inObject = state.directory.openInput(mtasObjectFileName,
  1055 + state.context);
  1056 + closeables.add(inObject);
  1057 + IndexInput inTerm = state.directory.openInput(mtasTermFileName,
  1058 + state.context);
  1059 + closeables.add(inTerm);
  1060 + closeables.add(outField = state.directory
  1061 + .createOutput(mtasIndexFieldFileName, state.context));
  1062 + CodecUtil.writeIndexHeader(outField, name,
  1063 + MtasCodecPostingsFormat.VERSION_CURRENT, state.segmentInfo.getId(),
  1064 + state.segmentSuffix);
  1065 + outField.writeString(delegatePostingsFormatName);
1033 1066  
1034   - while (true) {
1035   - try {
1036   - // read from tmpField
1037   - String field = inTmpField.readString();
1038   - long fpTmpDoc = inTmpField.readVLong();
1039   - int numberDocs = inTmpField.readVInt();
1040   - long fpTerm = inTmpField.readVLong();
1041   - int numberTerms = inTmpField.readVInt();
1042   - long fpPrefix = inTmpField.readVLong();
1043   - int numberPrefixes = inTmpField.readVInt();
1044   - inTmpDoc.seek(fpTmpDoc);
1045   - long fpFirstDoc = outDoc.getFilePointer();
1046   - // get prefixId index
1047   - HashMap<String, Integer> prefixIdIndexField = prefixIdIndex.get(field);
1048   - // construct MtasRBTree for indexDocId
1049   - MtasRBTree mtasDocIdTree = new MtasRBTree(true, false);
1050   - for (int docCounter = 0; docCounter < numberDocs; docCounter++) {
1051   - try {
1052   - // get info from tmpDoc
1053   - int docId = inTmpDoc.readVInt();
1054   - // filePointer indexObjectId
1055   - Long fpIndexObjectId = inTmpDoc.readVLong();
1056   - // filePointer indexObjectPosition (unknown)
1057   - Long fpIndexObjectPosition;
1058   - // filePointer indexObjectParent (unknown)
1059   - Long fpIndexObjectParent;
1060   - // constants for approximation object references for this document
1061   - long smallestObjectFilepointer = inTmpDoc.readVLong();
1062   - int objectRefApproxQuotient = inTmpDoc.readVInt();
1063   - long objectRefApproxOffset = inTmpDoc.readZLong();
1064   - byte storageFlags = inTmpDoc.readByte();
1065   - // number objects/tokens
1066   - int size = inTmpDoc.readVInt();
1067   - // construct MtasRBTree
1068   - MtasRBTree mtasPositionTree = new MtasRBTree(false, true);
1069   - MtasRBTree mtasParentTree = new MtasRBTree(false, true);
1070   - inObjectId.seek(fpIndexObjectId);
1071   - long refCorrection;
1072   - long ref;
1073   - for (int mtasId = 0; mtasId < size; mtasId++) {
1074   - if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_BYTE) {
1075   - refCorrection = inObjectId.readByte();
1076   - } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_SHORT) {
1077   - refCorrection = inObjectId.readShort();
1078   - } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_INTEGER) {
1079   - refCorrection = inObjectId.readInt();
1080   - } else {
1081   - refCorrection = inObjectId.readLong();
  1067 + while (true) {
  1068 + try {
  1069 + // read from tmpField
  1070 + String field = inTmpField.readString();
  1071 + long fpTmpDoc = inTmpField.readVLong();
  1072 + int numberDocs = inTmpField.readVInt();
  1073 + long fpTerm = inTmpField.readVLong();
  1074 + int numberTerms = inTmpField.readVInt();
  1075 + long fpPrefix = inTmpField.readVLong();
  1076 + int numberPrefixes = inTmpField.readVInt();
  1077 + inTmpDoc.seek(fpTmpDoc);
  1078 + long fpFirstDoc = outDoc.getFilePointer();
  1079 + // get prefixId index
  1080 + HashMap<String, Integer> prefixIdIndexField = prefixIdIndex
  1081 + .get(field);
  1082 + // construct MtasRBTree for indexDocId
  1083 + MtasRBTree mtasDocIdTree = new MtasRBTree(true, false);
  1084 + for (int docCounter = 0; docCounter < numberDocs; docCounter++) {
  1085 + try {
  1086 + // get info from tmpDoc
  1087 + int docId = inTmpDoc.readVInt();
  1088 + // filePointer indexObjectId
  1089 + Long fpIndexObjectId = inTmpDoc.readVLong();
  1090 + // filePointer indexObjectPosition (unknown)
  1091 + Long fpIndexObjectPosition;
  1092 + // filePointer indexObjectParent (unknown)
  1093 + Long fpIndexObjectParent;
  1094 + // constants for approximation object references for this document
  1095 + long smallestObjectFilepointer = inTmpDoc.readVLong();
  1096 + int objectRefApproxQuotient = inTmpDoc.readVInt();
  1097 + long objectRefApproxOffset = inTmpDoc.readZLong();
  1098 + byte storageFlags = inTmpDoc.readByte();
  1099 + // number objects/tokens
  1100 + int size = inTmpDoc.readVInt();
  1101 + // construct MtasRBTree
  1102 + MtasRBTree mtasPositionTree = new MtasRBTree(false, true);
  1103 + MtasRBTree mtasParentTree = new MtasRBTree(false, true);
  1104 + inObjectId.seek(fpIndexObjectId);
  1105 + long refCorrection;
  1106 + long ref;
  1107 + for (int mtasId = 0; mtasId < size; mtasId++) {
  1108 + if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_BYTE) {
  1109 + refCorrection = inObjectId.readByte();
  1110 + } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_SHORT) {
  1111 + refCorrection = inObjectId.readShort();
  1112 + } else if (storageFlags == MtasCodecPostingsFormat.MTAS_STORAGE_INTEGER) {
  1113 + refCorrection = inObjectId.readInt();
  1114 + } else {
  1115 + refCorrection = inObjectId.readLong();
  1116 + }
  1117 + ref = objectRefApproxOffset + mtasId * objectRefApproxQuotient
  1118 + + refCorrection;
  1119 + MtasToken<String> token = MtasCodecPostingsFormat
  1120 + .getToken(inObject, inTerm, ref);
  1121 + String prefix = token.getPrefix();
  1122 + int prefixId = prefixIdIndexField.containsKey(prefix)
  1123 + ? prefixIdIndexField.get(prefix) : 0;
  1124 + token.setPrefixId(prefixId);
  1125 + assert token.getId().equals(mtasId) : "unexpected mtasId "
  1126 + + mtasId;
  1127 + mtasPositionTree.addPositionAndObjectFromToken(token);
  1128 + mtasParentTree.addParentFromToken(token);
1082 1129 }
1083   - ref = objectRefApproxOffset + mtasId * objectRefApproxQuotient
1084   - + refCorrection;
1085   - MtasToken<String> token = MtasCodecPostingsFormat
1086   - .getToken(inObject, inTerm, ref);
1087   - String prefix = token.getPrefix();
1088   - Integer prefixId = prefixIdIndexField.get(prefix);
1089   - token.setPrefixId(prefixId);
1090   - assert token.getId().equals(mtasId) : "unexpected mtasId "
1091   - + mtasId;
1092   - mtasPositionTree.addPositionAndObjectFromToken(token);
1093   - mtasParentTree.addParentFromToken(token);
  1130 + // store mtasPositionTree and mtasParentTree
  1131 + fpIndexObjectPosition = storeTree(mtasPositionTree,
  1132 + outIndexObjectPosition, smallestObjectFilepointer);
  1133 + fpIndexObjectParent = storeTree(mtasParentTree,
  1134 + outIndexObjectParent, smallestObjectFilepointer);
  1135 + long fpDoc = outDoc.getFilePointer();
  1136 + // create indexDoc with updated fpIndexObjectPosition from tmpDoc
  1137 + outDoc.writeVInt(docId); // docId
  1138 + // reference indexObjectId
  1139 + outDoc.writeVLong(fpIndexObjectId);
  1140 + // reference indexObjectPosition
  1141 + outDoc.writeVLong(fpIndexObjectPosition);
  1142 + // reference indexObjectParent
  1143 + outDoc.writeVLong(fpIndexObjectParent);
  1144 + // variables approximation and storage references object
  1145 + outDoc.writeVLong(smallestObjectFilepointer);
  1146 + outDoc.writeVInt(objectRefApproxQuotient);
  1147 + outDoc.writeZLong(objectRefApproxOffset);
  1148 + outDoc.writeByte(storageFlags);
  1149 + // number of objects
  1150 + outDoc.writeVInt(size);
  1151 + // minPosition
  1152 + outDoc.writeVInt(inTmpDoc.readVInt());
  1153 + // maxPosition
  1154 + outDoc.writeVInt(inTmpDoc.readVInt());
  1155 + // add to tree for indexDocId
  1156 + mtasDocIdTree.addIdFromDoc(docId, fpDoc);
  1157 + } catch (IOException ex) {
  1158 + break;
1094 1159 }
1095   - // store mtasPositionTree and mtasParentTree
1096   - fpIndexObjectPosition = storeTree(mtasPositionTree,
1097   - outIndexObjectPosition, smallestObjectFilepointer);
1098   - fpIndexObjectParent = storeTree(mtasParentTree,
1099   - outIndexObjectParent, smallestObjectFilepointer);
1100   - long fpDoc = outDoc.getFilePointer();
1101   - // create indexDoc with updated fpIndexObjectPosition from tmpDoc
1102   - outDoc.writeVInt(docId); // docId
1103   - // reference indexObjectId
1104   - outDoc.writeVLong(fpIndexObjectId);
1105   - // reference indexObjectPosition
1106   - outDoc.writeVLong(fpIndexObjectPosition);
1107   - // reference indexObjectParent
1108   - outDoc.writeVLong(fpIndexObjectParent);
1109   - // variables approximation and storage references object
1110   - outDoc.writeVLong(smallestObjectFilepointer);
1111   - outDoc.writeVInt(objectRefApproxQuotient);
1112   - outDoc.writeZLong(objectRefApproxOffset);
1113   - outDoc.writeByte(storageFlags);
1114   - // number of objects
1115   - outDoc.writeVInt(size);
1116   - // minPosition
1117   - outDoc.writeVInt(inTmpDoc.readVInt());
1118   - // maxPosition
1119   - outDoc.writeVInt(inTmpDoc.readVInt());
1120   - // add to tree for indexDocId
1121   - mtasDocIdTree.addIdFromDoc(docId, fpDoc);
1122   - } catch (IOException ex) {
1123   - break;
  1160 +
1124 1161 }
  1162 + long fpIndexDocId = storeTree(mtasDocIdTree, outIndexDocId,
  1163 + fpFirstDoc);
1125 1164  
  1165 + // store in indexField
  1166 + outField.writeString(field);
  1167 + outField.writeVLong(fpFirstDoc);
  1168 + outField.writeVLong(fpIndexDocId);
  1169 + outField.writeVInt(numberDocs);
  1170 + outField.writeVLong(fpTerm);
  1171 + outField.writeVInt(numberTerms);
  1172 + outField.writeVLong(fpPrefix);
  1173 + outField.writeVInt(numberPrefixes);
  1174 + } catch (EOFException e) {
  1175 + break;
1126 1176 }
1127   - long fpIndexDocId = storeTree(mtasDocIdTree, outIndexDocId, fpFirstDoc);
1128   -
1129   - // store in indexField
1130   - outField.writeString(field);
1131   - outField.writeVLong(fpFirstDoc);
1132   - outField.writeVLong(fpIndexDocId);
1133   - outField.writeVInt(numberDocs);
1134   - outField.writeVLong(fpTerm);
1135   - outField.writeVInt(numberTerms);
1136   - outField.writeVLong(fpPrefix);
1137   - outField.writeVInt(numberPrefixes);
1138   - } catch (EOFException e) {
1139   - break;
  1177 + // end loop over fields
1140 1178 }
1141   - // end loop over fields
1142   - }
1143   - inObject.close();
1144   - inObjectId.close();
1145   - inTmpDoc.close();
1146   - inTmpField.close();
  1179 + inObject.close();
  1180 + inObjectId.close();
  1181 + inTmpDoc.close();
  1182 + inTmpField.close();
1147 1183  
1148   - // remove temporary files
1149   - state.directory.deleteFile(mtasTmpDocFileName);
1150   - state.directory.deleteFile(mtasTmpFieldFileName);
1151   - // close indexDoc, indexObjectPosition and indexObjectParent
1152   - CodecUtil.writeFooter(outDoc);
1153   - outDoc.close();
1154   - CodecUtil.writeFooter(outIndexObjectPosition);
1155   - outIndexObjectPosition.close();
1156   - CodecUtil.writeFooter(outIndexObjectParent);
1157   - outIndexObjectParent.close();
1158   - CodecUtil.writeFooter(outIndexDocId);
1159   - outIndexDocId.close();
1160   - CodecUtil.writeFooter(outField);
1161   - outField.close();
  1184 + // remove temporary files
  1185 + state.directory.deleteFile(mtasTmpDocFileName);
  1186 + state.directory.deleteFile(mtasTmpFieldFileName);
  1187 + // close indexDoc, indexObjectPosition and indexObjectParent
  1188 + CodecUtil.writeFooter(outDoc);
  1189 + outDoc.close();
  1190 + CodecUtil.writeFooter(outIndexObjectPosition);
  1191 + outIndexObjectPosition.close();
  1192 + CodecUtil.writeFooter(outIndexObjectParent);
  1193 + outIndexObjectParent.close();
  1194 + CodecUtil.writeFooter(outIndexDocId);
  1195 + outIndexDocId.close();
  1196 + CodecUtil.writeFooter(outField);
  1197 + outField.close();
  1198 + } finally {
  1199 + IOUtils.closeWhileHandlingException(closeables);
  1200 + }
1162 1201  
1163 1202 }
1164 1203  
... ... @@ -1228,12 +1267,12 @@ public class MtasFieldsConsumer extends FieldsConsumer {
1228 1267 Integer mtasParentId = payloadDecoder.getMtasParentId();
1229 1268 byte[] mtasPayload = payloadDecoder.getMtasPayload();
1230 1269 MtasPosition mtasPosition = payloadDecoder.getMtasPosition();
1231   - if (mtasPosition == null) {
  1270 + MtasOffset mtasOffset = payloadDecoder.getMtasOffset();
  1271 + if (mtasOffset == null) {
1232 1272 if (startOffset != null) {
1233   - mtasPosition = new MtasPosition(startOffset, endOffset);
  1273 + mtasOffset = new MtasOffset(startOffset, endOffset);
1234 1274 }
1235 1275 }
1236   - MtasOffset mtasOffset = payloadDecoder.getMtasOffset();
1237 1276 MtasOffset mtasRealOffset = payloadDecoder.getMtasRealOffset();
1238 1277 // only if really mtas object
1239 1278 if (mtasId != null) {
... ... @@ -1254,6 +1293,8 @@ public class MtasFieldsConsumer extends FieldsConsumer {
1254 1293 registerPrefixStatsSinglePositionValue(field, term.utf8ToString(),
1255 1294 outPrefix);
1256 1295 }
  1296 + } else {
  1297 + throw new IOException("no position");
1257 1298 }
1258 1299 if (mtasParentId != null) {
1259 1300 objectFlags = objectFlags
... ... @@ -1360,75 +1401,80 @@ public class MtasFieldsConsumer extends FieldsConsumer {
1360 1401 private Long storeTree(MtasTreeNode<?> node, boolean isSinglePoint,
1361 1402 boolean storeAdditionalInformation, IndexOutput out,
1362 1403 Long nodeRefApproxOffset, long refApproxOffset) throws IOException {
1363   - if (node != null) {
1364   - Boolean isRoot = false;
1365   - if (nodeRefApproxOffset == null) {
1366   - nodeRefApproxOffset = out.getFilePointer();
1367   - isRoot = true;
1368   - }
1369   - Long fpIndexObjectPositionLeftChild, fpIndexObjectPositionRightChild;
1370   - if (node.leftChild != null) {
1371   - fpIndexObjectPositionLeftChild = storeTree(node.leftChild,
1372   - isSinglePoint, storeAdditionalInformation, out, nodeRefApproxOffset,
1373   - refApproxOffset);
1374   - } else {
1375   - fpIndexObjectPositionLeftChild = (long) 0; // tmp
1376   - }
1377   - if (node.rightChild != null) {
1378   - fpIndexObjectPositionRightChild = storeTree(node.rightChild,
1379   - isSinglePoint, storeAdditionalInformation, out, nodeRefApproxOffset,
1380   - refApproxOffset);
1381   - } else {
1382   - fpIndexObjectPositionRightChild = (long) 0; // tmp
1383   - }
1384   - Long fpIndexObjectPosition = out.getFilePointer();
1385   - if (node.leftChild == null) {
1386   - fpIndexObjectPositionLeftChild = fpIndexObjectPosition;
1387   - }
1388   - if (node.rightChild == null) {
1389   - fpIndexObjectPositionRightChild = fpIndexObjectPosition;
1390   - }
1391   - if (isRoot) {
1392   - out.writeVLong(nodeRefApproxOffset);
1393   - byte flag = 0;
1394   - if (isSinglePoint) {
1395   - flag |= MtasTree.SINGLE_POSITION_TREE;
  1404 + try {
  1405 + if (node != null) {
  1406 + Boolean isRoot = false;
  1407 + if (nodeRefApproxOffset == null) {
  1408 + nodeRefApproxOffset = out.getFilePointer();
  1409 + isRoot = true;
1396 1410 }
1397   - if (storeAdditionalInformation) {
1398   - flag |= MtasTree.STORE_ADDITIONAL_ID;
  1411 + Long fpIndexObjectPositionLeftChild, fpIndexObjectPositionRightChild;
  1412 + if (node.leftChild != null) {
  1413 + fpIndexObjectPositionLeftChild = storeTree(node.leftChild,
  1414 + isSinglePoint, storeAdditionalInformation, out,
  1415 + nodeRefApproxOffset, refApproxOffset);
  1416 + } else {
  1417 + fpIndexObjectPositionLeftChild = (long) 0; // tmp
1399 1418 }
1400   - out.writeByte(flag);
1401   - }
1402   - out.writeVInt(node.left);
1403   - out.writeVInt(node.right);
1404   - out.writeVInt(node.max);
1405   - out.writeVLong((fpIndexObjectPositionLeftChild - nodeRefApproxOffset));
1406   - out.writeVLong((fpIndexObjectPositionRightChild - nodeRefApproxOffset));
1407   - if (!isSinglePoint) {
1408   - out.writeVInt(node.ids.size());
1409   - }
1410   - HashMap<Integer, MtasTreeNodeId> ids = node.ids;
1411   - Long objectRefCorrected;
1412   - long objectRefCorrectedPrevious = 0;
1413   - // sort refs
1414   - List<MtasTreeNodeId> nodeIds = new ArrayList<MtasTreeNodeId>(
1415   - ids.values());
1416   - Collections.sort(nodeIds);
1417   - if (isSinglePoint && (nodeIds.size() != 1)) {
1418   - throw new IOException("singlePoint tree, but missing single point...");
1419   - }
1420   - for (MtasTreeNodeId nodeId : nodeIds) {
1421   - objectRefCorrected = (nodeId.ref - refApproxOffset);
1422   - out.writeVLong((objectRefCorrected - objectRefCorrectedPrevious));
1423   - objectRefCorrectedPrevious = objectRefCorrected;
1424   - if (storeAdditionalInformation) {
1425   - out.writeVInt(nodeId.additionalId);
1426   - out.writeVLong(nodeId.additionalRef);
  1419 + if (node.rightChild != null) {
  1420 + fpIndexObjectPositionRightChild = storeTree(node.rightChild,
  1421 + isSinglePoint, storeAdditionalInformation, out,
  1422 + nodeRefApproxOffset, refApproxOffset);
  1423 + } else {
  1424 + fpIndexObjectPositionRightChild = (long) 0; // tmp
  1425 + }
  1426 + Long fpIndexObjectPosition = out.getFilePointer();
  1427 + if (node.leftChild == null) {
  1428 + fpIndexObjectPositionLeftChild = fpIndexObjectPosition;
  1429 + }
  1430 + if (node.rightChild == null) {
  1431 + fpIndexObjectPositionRightChild = fpIndexObjectPosition;
  1432 + }
  1433 + if (isRoot) {
  1434 + out.writeVLong(nodeRefApproxOffset);
  1435 + byte flag = 0;
  1436 + if (isSinglePoint) {
  1437 + flag |= MtasTree.SINGLE_POSITION_TREE;
  1438 + }
  1439 + if (storeAdditionalInformation) {
  1440 + flag |= MtasTree.STORE_ADDITIONAL_ID;
  1441 + }
  1442 + out.writeByte(flag);
  1443 + }
  1444 + out.writeVInt(node.left);
  1445 + out.writeVInt(node.right);
  1446 + out.writeVInt(node.max);
  1447 + out.writeVLong((fpIndexObjectPositionLeftChild - nodeRefApproxOffset));
  1448 + out.writeVLong((fpIndexObjectPositionRightChild - nodeRefApproxOffset));
  1449 + if (!isSinglePoint) {
  1450 + out.writeVInt(node.ids.size());
  1451 + }
  1452 + HashMap<Integer, MtasTreeNodeId> ids = node.ids;
  1453 + Long objectRefCorrected;
  1454 + long objectRefCorrectedPrevious = 0;
  1455 + // sort refs
  1456 + List<MtasTreeNodeId> nodeIds = new ArrayList<MtasTreeNodeId>(
  1457 + ids.values());
  1458 + Collections.sort(nodeIds);
  1459 + if (isSinglePoint && (nodeIds.size() != 1)) {
  1460 + throw new IOException(
  1461 + "singlePoint tree, but missing single point...");
  1462 + }
  1463 + for (MtasTreeNodeId nodeId : nodeIds) {
  1464 + objectRefCorrected = (nodeId.ref - refApproxOffset);
  1465 + out.writeVLong((objectRefCorrected - objectRefCorrectedPrevious));
  1466 + objectRefCorrectedPrevious = objectRefCorrected;
  1467 + if (storeAdditionalInformation) {
  1468 + out.writeVInt(nodeId.additionalId);
  1469 + out.writeVLong(nodeId.additionalRef);
  1470 + }
1427 1471 }
  1472 + return fpIndexObjectPosition;
  1473 + } else {
  1474 + return null;
1428 1475 }
1429   - return fpIndexObjectPosition;
1430   - } else {
1431   - return null;
  1476 + } catch (IllegalArgumentException e) {
  1477 + throw new IOException(e.getMessage());
1432 1478 }
1433 1479 }
1434 1480  
... ... @@ -1529,9 +1575,6 @@ public class MtasFieldsConsumer extends FieldsConsumer {
1529 1575 out.writeVLong(in.readVLong());
1530 1576 }
1531 1577  
1532   - /** The closed. */
1533   - private boolean closed;
1534   -
1535 1578 /*
1536 1579 * (non-Javadoc)
1537 1580 *
... ... @@ -1539,10 +1582,6 @@ public class MtasFieldsConsumer extends FieldsConsumer {
1539 1582 */
1540 1583 @Override
1541 1584 public void close() throws IOException {
1542   - if (closed) {
1543   - return;
1544   - }
1545   - closed = true;
1546 1585 delegateFieldsConsumer.close();
1547 1586 }
1548 1587  
... ...
src/mtas/codec/util/CodecCollector.java
... ... @@ -569,6 +569,8 @@ public class CodecCollector {
569 569 MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION);
570 570 String multiplePositionPrefixes = fi.getAttribute(
571 571 MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_MULTIPLE_POSITION);
  572 + String setPositionPrefixes = fi.getAttribute(
  573 + MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SET_POSITION);
572 574 if (singlePositionPrefixes != null) {
573 575 String[] prefixes = singlePositionPrefixes
574 576 .split(Pattern.quote(MtasToken.DELIMITER));
... ... @@ -583,6 +585,13 @@ public class CodecCollector {
583 585 fieldInfo.prefix.addMultiplePosition(prefixes[i]);
584 586 }
585 587 }
  588 + if (setPositionPrefixes != null) {
  589 + String[] prefixes = setPositionPrefixes
  590 + .split(Pattern.quote(MtasToken.DELIMITER));
  591 + for (int i = 0; i < prefixes.length; i++) {
  592 + fieldInfo.prefix.addSetPosition(prefixes[i]);
  593 + }
  594 + }
586 595 }
587 596 }
588 597 }
... ... @@ -1875,8 +1884,9 @@ public class CodecCollector {
1875 1884 if (!termVector.dataDefaultCollector.checkExistenceNecessaryKeys()) {
1876 1885 needSecondRound = true;
1877 1886 }
1878   - }
1879   - }
  1887 + termVector.dataDefaultCollector.reduceToSegmentKeys();
  1888 + }
  1889 + }
1880 1890 return needSecondRound;
1881 1891 }
1882 1892  
... ...
src/mtas/codec/util/CodecComponent.java
... ... @@ -23,20 +23,15 @@ import mtas.parser.function.util.MtasFunctionParserFunction;
23 23 import mtas.parser.function.util.MtasFunctionParserFunctionDefault;
24 24 import org.apache.commons.lang.ArrayUtils;
25 25 import org.apache.lucene.search.spans.SpanQuery;
  26 +import org.apache.lucene.util.automaton.Automata;
  27 +import org.apache.lucene.util.automaton.Automaton;
26 28 import org.apache.lucene.util.automaton.CompiledAutomaton;
  29 +import org.apache.lucene.util.automaton.Operations;
27 30 import org.apache.lucene.util.automaton.RegExp;
28 31  
29 32 /**
30 33 * The Class CodecComponent.
31 34 */
32   -/**
33   - * @author matthijs
34   - *
35   - */
36   -/**
37   - * @author matthijs
38   - *
39   - */
40 35 public class CodecComponent {
41 36  
42 37 /**
... ... @@ -139,8 +134,10 @@ public class CodecComponent {
139 134 /**
140 135 * Instantiates a new component field.
141 136 *
142   - * @param field the field
143   - * @param uniqueKeyField the unique key field
  137 + * @param field
  138 + * the field
  139 + * @param uniqueKeyField
  140 + * the unique key field
144 141 */
145 142 public ComponentField(String field, String uniqueKeyField) {
146 143 this.field = field;
... ... @@ -172,42 +169,66 @@ public class CodecComponent {
172 169 /** The multiple position list. */
173 170 public TreeSet<String> multiplePositionList;
174 171  
  172 + /** The set position list. */
  173 + public TreeSet<String> setPositionList;
  174 +
175 175 /**
176 176 * Instantiates a new component prefix.
177 177 *
178   - * @param key the key
  178 + * @param key
  179 + * the key
179 180 */
180 181 public ComponentPrefix(String key) {
181 182 this.key = key;
182 183 singlePositionList = new TreeSet<String>();
183 184 multiplePositionList = new TreeSet<String>();
  185 + setPositionList = new TreeSet<String>();
184 186 }
185 187  
186 188 /**
187 189 * Adds the single position.
188 190 *
189   - * @param prefix the prefix
  191 + * @param prefix
  192 + * the prefix
190 193 */
191 194 public void addSinglePosition(String prefix) {
192   - if (!singlePositionList.contains(prefix)
193   - && !multiplePositionList.contains(prefix)) {
194   - singlePositionList.add(prefix);
  195 + if (!prefix.trim().equals("")) {
  196 + if (!singlePositionList.contains(prefix)
  197 + && !multiplePositionList.contains(prefix)) {
  198 + singlePositionList.add(prefix);
  199 + }
195 200 }
196 201 }
197 202  
198 203 /**
199 204 * Adds the multiple position.
200 205 *
201   - * @param prefix the prefix
  206 + * @param prefix
  207 + * the prefix
202 208 */
203 209 public void addMultiplePosition(String prefix) {
204   - if (!singlePositionList.contains(prefix)) {
205   - if (!multiplePositionList.contains(prefix)) {
  210 + if (!prefix.trim().equals("")) {
  211 + if (!singlePositionList.contains(prefix)) {
  212 + if (!multiplePositionList.contains(prefix)) {
  213 + multiplePositionList.add(prefix);
  214 + }
  215 + } else {
  216 + singlePositionList.remove(prefix);
206 217 multiplePositionList.add(prefix);
207 218 }
208   - } else {
209   - singlePositionList.remove(prefix);
210   - multiplePositionList.add(prefix);
  219 + }
  220 + }
  221 +
  222 + public void addSetPosition(String prefix) {
  223 + if (!prefix.trim().equals("")) {
  224 + if (!singlePositionList.contains(prefix)) {
  225 + if (!setPositionList.contains(prefix)) {
  226 + setPositionList.add(prefix);
  227 + }
  228 + } else {
  229 + singlePositionList.remove(prefix);
  230 + setPositionList.add(prefix);
  231 + }
211 232 }
212 233 }
213 234 }
... ... @@ -262,15 +283,24 @@ public class CodecComponent {
262 283 /**
263 284 * Instantiates a new component kwic.
264 285 *
265   - * @param query the query
266   - * @param key the key
267   - * @param prefixes the prefixes
268   - * @param number the number
269   - * @param start the start
270   - * @param left the left
271   - * @param right the right
272   - * @param output the output
273   - * @throws IOException Signals that an I/O exception has occurred.
  286 + * @param query
  287 + * the query
  288 + * @param key
  289 + * the key
  290 + * @param prefixes
  291 + * the prefixes
  292 + * @param number
  293 + * the number
  294 + * @param start
  295 + * the start
  296 + * @param left
  297 + * the left
  298 + * @param right
  299 + * the right
  300 + * @param output
  301 + * the output
  302 + * @throws IOException
  303 + * Signals that an I/O exception has occurred.
274 304 */
275 305 public ComponentKwic(SpanQuery query, String key, String prefixes,
276 306 Integer number, int start, int left, int right, String output)
... ... @@ -360,18 +390,30 @@ public class CodecComponent {
360 390 /**
361 391 * Instantiates a new component list.
362 392 *
363   - * @param spanQuery the span query
364   - * @param field the field
365   - * @param queryValue the query value
366   - * @param queryType the query type
367   - * @param key the key
368   - * @param prefix the prefix
369   - * @param start the start
370   - * @param number the number
371   - * @param left the left
372   - * @param right the right
373   - * @param output the output
374   - * @throws IOException Signals that an I/O exception has occurred.
  393 + * @param spanQuery
  394 + * the span query
  395 + * @param field
  396 + * the field
  397 + * @param queryValue
  398 + * the query value
  399 + * @param queryType
  400 + * the query type
  401 + * @param key
  402 + * the key
  403 + * @param prefix
  404 + * the prefix
  405 + * @param start
  406 + * the start
  407 + * @param number
  408 + * the number
  409 + * @param left
  410 + * the left
  411 + * @param right
  412 + * the right
  413 + * @param output
  414 + * the output
  415 + * @throws IOException
  416 + * Signals that an I/O exception has occurred.
375 417 */
376 418 public ComponentList(SpanQuery spanQuery, String field, String queryValue,
377 419 String queryType, String key, String prefix, int start, int number,
... ... @@ -454,25 +496,44 @@ public class CodecComponent {
454 496 /**
455 497 * Instantiates a new component group.
456 498 *
457   - * @param spanQuery the span query
458   - * @param field the field
459   - * @param queryValue the query value
460   - * @param queryType the query type
461   - * @param key the key
462   - * @param groupingHitInsidePrefixes the grouping hit inside prefixes
463   - * @param groupingHitInsideLeftPosition the grouping hit inside left position
464   - * @param groupingHitInsideLeftPrefixes the grouping hit inside left prefixes
465   - * @param groupingHitInsideRightPosition the grouping hit inside right position
466   - * @param groupingHitInsideRightPrefixes the grouping hit inside right prefixes
467   - * @param groupingHitLeftPosition the grouping hit left position
468   - * @param groupingHitLeftPrefixes the grouping hit left prefixes
469   - * @param groupingHitRightPosition the grouping hit right position
470   - * @param groupingHitRightPrefixes the grouping hit right prefixes
471   - * @param groupingLeftPosition the grouping left position
472   - * @param groupingLeftPrefixes the grouping left prefixes
473   - * @param groupingRightPosition the grouping right position
474   - * @param groupingRightPrefixes the grouping right prefixes
475   - * @throws IOException Signals that an I/O exception has occurred.
  499 + * @param spanQuery
  500 + * the span query
  501 + * @param field
  502 + * the field
  503 + * @param queryValue
  504 + * the query value
  505 + * @param queryType
  506 + * the query type
  507 + * @param key
  508 + * the key
  509 + * @param groupingHitInsidePrefixes
  510 + * the grouping hit inside prefixes
  511 + * @param groupingHitInsideLeftPosition
  512 + * the grouping hit inside left position
  513 + * @param groupingHitInsideLeftPrefixes
  514 + * the grouping hit inside left prefixes
  515 + * @param groupingHitInsideRightPosition
  516 + * the grouping hit inside right position
  517 + * @param groupingHitInsideRightPrefixes
  518 + * the grouping hit inside right prefixes
  519 + * @param groupingHitLeftPosition
  520 + * the grouping hit left position
  521 + * @param groupingHitLeftPrefixes
  522 + * the grouping hit left prefixes
  523 + * @param groupingHitRightPosition
  524 + * the grouping hit right position
  525 + * @param groupingHitRightPrefixes
  526 + * the grouping hit right prefixes
  527 + * @param groupingLeftPosition
  528 + * the grouping left position
  529 + * @param groupingLeftPrefixes
  530 + * the grouping left prefixes
  531 + * @param groupingRightPosition
  532 + * the grouping right position
  533 + * @param groupingRightPrefixes
  534 + * the grouping right prefixes
  535 + * @throws IOException
  536 + * Signals that an I/O exception has occurred.
476 537 */
477 538 public ComponentGroup(SpanQuery spanQuery, String field, String queryValue,
478 539 String queryType, String key, String groupingHitInsidePrefixes,
... ... @@ -537,11 +598,15 @@ public class CodecComponent {
537 598 /**
538 599 * Creates the positioned prefixes.
539 600 *
540   - * @param prefixList the prefix list
541   - * @param position the position
542   - * @param prefixes the prefixes
  601 + * @param prefixList
  602 + * the prefix list
  603 + * @param position
  604 + * the position
  605 + * @param prefixes
  606 + * the prefixes
543 607 * @return the hash set[]
544   - * @throws IOException Signals that an I/O exception has occurred.
  608 + * @throws IOException
  609 + * Signals that an I/O exception has occurred.
545 610 */
546 611 private static HashSet<String>[] createPositionedPrefixes(
547 612 HashSet<String> prefixList, String[] position, String[] prefixes)
... ... @@ -663,20 +728,34 @@ public class CodecComponent {
663 728 /**
664 729 * Instantiates a new component facet.
665 730 *
666   - * @param spanQueries the span queries
667   - * @param field the field
668   - * @param key the key
669   - * @param baseFields the base fields
670   - * @param baseFieldTypes the base field types
671   - * @param baseTypes the base types
672   - * @param baseSortTypes the base sort types
673   - * @param baseSortDirections the base sort directions
674   - * @param baseNumbers the base numbers
675   - * @param baseMinimumDoubles the base minimum doubles
676   - * @param baseMaximumDoubles the base maximum doubles
677   - * @param baseFunctions the base functions
678   - * @throws IOException Signals that an I/O exception has occurred.
679   - * @throws ParseException the parse exception
  731 + * @param spanQueries
  732 + * the span queries
  733 + * @param field
  734 + * the field
  735 + * @param key
  736 + * the key
  737 + * @param baseFields
  738 + * the base fields
  739 + * @param baseFieldTypes
  740 + * the base field types
  741 + * @param baseTypes
  742 + * the base types
  743 + * @param baseSortTypes
  744 + * the base sort types
  745 + * @param baseSortDirections
  746 + * the base sort directions
  747 + * @param baseNumbers
  748 + * the base numbers
  749 + * @param baseMinimumDoubles
  750 + * the base minimum doubles
  751 + * @param baseMaximumDoubles
  752 + * the base maximum doubles
  753 + * @param baseFunctions
  754 + * the base functions
  755 + * @throws IOException
  756 + * Signals that an I/O exception has occurred.
  757 + * @throws ParseException
  758 + * the parse exception
680 759 */
681 760 public ComponentFacet(SpanQuery[] spanQueries, String field, String key,
682 761 String[] baseFields, String[] baseFieldTypes, String[] baseTypes,
... ... @@ -787,7 +866,10 @@ public class CodecComponent {
787 866  
788 867 /** The sort direction. */
789 868 public String key, defaultDataType, functionDataType, defaultStatsType,
790   - functionStatsType, prefix, regexp, sortType, sortDirection;
  869 + functionStatsType, prefix, regexp, sortType, sortDirection, function,
  870 + statsType;
  871 +
  872 + public String[] list;
791 873  
792 874 /** The default stats items. */
793 875 public TreeSet<String> functionStatsItems, defaultStatsItems;
... ... @@ -816,35 +898,59 @@ public class CodecComponent {
816 898 /**
817 899 * Instantiates a new component term vector.
818 900 *
819   - * @param key the key
820   - * @param prefix the prefix
821   - * @param regexp the regexp
822   - * @param statsType the stats type
823   - * @param sortType the sort type
824   - * @param sortDirection the sort direction
825   - * @param startValue the start value
826   - * @param number the number
827   - * @param function the function
828   - * @throws IOException Signals that an I/O exception has occurred.
829   - * @throws ParseException the parse exception
  901 + * @param key
  902 + * the key
  903 + * @param prefix
  904 + * the prefix
  905 + * @param regexp
  906 + * the regexp
  907 + * @param statsType
  908 + * the stats type
  909 + * @param sortType
  910 + * the sort type
  911 + * @param sortDirection
  912 + * the sort direction
  913 + * @param startValue
  914 + * the start value
  915 + * @param number
  916 + * the number
  917 + * @param function
  918 + * the function
  919 + * @throws IOException
  920 + * Signals that an I/O exception has occurred.
  921 + * @throws ParseException
  922 + * the parse exception
830 923 */
831 924 public ComponentTermVector(String key, String prefix, String regexp,
832 925 String statsType, String sortType, String sortDirection,
833   - String startValue, int number, String function)
  926 + String startValue, int number, String function, String[] list)
834 927 throws IOException, ParseException {
835   - this.key = key;
  928 + this.key = key;
  929 + this.prefix = prefix;
  930 + this.regexp = regexp;
  931 + this.sortType = sortType == null ? CodecUtil.SORT_TERM : sortType;
  932 + this.sortDirection = sortDirection == null
  933 + ? (this.sortType == CodecUtil.SORT_TERM) ? CodecUtil.SORT_ASC
  934 + : CodecUtil.SORT_DESC
  935 + : sortDirection;
  936 + this.startValue = startValue;
  937 + this.start = 0;
  938 + this.number = number;
  939 + this.list = list;
  940 + this.function = function;
  941 + this.statsType = statsType;
836 942 this.defaultParser = new MtasFunctionParserFunctionDefault(1);
837 943 if (function != null) {
838 944 functionParser = new MtasFunctionParser(
839 945 new BufferedReader(new StringReader(function))).parse();
840 946 functionDataType = functionParser.getType();
841 947 functionStatsItems = CodecUtil.createStatsItems(statsType);
842   - functionStatsType = CodecUtil.createStatsType(
843   - this.functionStatsItems, this.sortType, this.functionParser);
  948 + functionStatsType = CodecUtil.createStatsType(this.functionStatsItems,
  949 + this.sortType, this.functionParser);
844 950 defaultDataType = defaultParser.getType();
845 951 defaultStatsItems = CodecUtil.createStatsItems(null);
846   - defaultStatsType = CodecUtil.createStatsType(
847   - this.defaultStatsItems, this.sortType, this.defaultParser);
  952 + defaultStatsType = CodecUtil.createStatsType(this.defaultStatsItems,
  953 + this.sortType, this.defaultParser);
848 954 } else {
849 955 functionParser = null;
850 956 functionDataType = null;
... ... @@ -852,19 +958,16 @@ public class CodecComponent {
852 958 functionStatsType = null;
853 959 defaultDataType = defaultParser.getType();
854 960 defaultStatsItems = CodecUtil.createStatsItems(statsType);
855   - defaultStatsType = CodecUtil.createStatsType(
856   - this.defaultStatsItems, this.sortType, this.defaultParser);
  961 + defaultStatsType = CodecUtil.createStatsType(this.defaultStatsItems,
  962 + this.sortType, this.defaultParser);
  963 + }
  964 + if (this.list != null) {
  965 + this.regexp = null;
  966 + this.number = list.length;
  967 + this.startValue = null;
  968 + this.sortType = CodecUtil.SORT_TERM;
  969 + this.sortDirection = CodecUtil.SORT_ASC;
857 970 }
858   - this.prefix = prefix;
859   - this.regexp = regexp;
860   - this.sortType = sortType == null ? CodecUtil.SORT_TERM : sortType;
861   - this.sortDirection = sortDirection == null
862   - ? (this.sortType == CodecUtil.SORT_TERM) ? CodecUtil.SORT_ASC
863   - : CodecUtil.SORT_DESC
864   - : sortDirection;
865   - this.startValue = startValue;
866   - this.start = 0;
867   - this.number = number;
868 971 if (!this.sortType.equals(CodecUtil.SORT_TERM)
869 972 && !CodecUtil.STATS_TYPES.contains(this.sortType)) {
870 973 throw new IOException("unknown sortType '" + this.sortType + "'");
... ... @@ -881,7 +984,7 @@ public class CodecComponent {
881 984 "unrecognized sortDirection '" + this.sortDirection + "'");
882 985 }
883 986 boolean segmentRegistration = !this.sortType.equals(CodecUtil.SORT_TERM);
884   - //create datacollectors
  987 + // create datacollectors
885 988 if (functionParser != null) {
886 989 dataFunctionCollector = DataCollector.getCollector(
887 990 DataCollector.COLLECTOR_TYPE_LIST, this.functionDataType,
... ... @@ -901,7 +1004,18 @@ public class CodecComponent {
901 1004 this.sortDirection, this.start, this.number, null, null, null, null,
902 1005 null, null, null, null, segmentRegistration);
903 1006 }
904   - if ((regexp == null) || (regexp.isEmpty())) {
  1007 + if ((list != null) && list.length > 0) {
  1008 + ArrayList<Automaton> al = new ArrayList<Automaton>(),
  1009 + asl = new ArrayList<Automaton>();
  1010 + Automaton postAutomaton = (new RegExp("\u0000*")).toAutomaton();
  1011 + for (String listItem : list) {
  1012 + asl.clear();
  1013 + asl.add(Automata.makeString(prefix + MtasToken.DELIMITER + listItem));
  1014 + asl.add(postAutomaton);
  1015 + al.add(Operations.concatenate(asl));
  1016 + }
  1017 + compiledAutomaton = new CompiledAutomaton(Operations.union(al));
  1018 + } else if ((regexp == null) || (regexp.isEmpty())) {
905 1019 RegExp re = new RegExp(prefix + MtasToken.DELIMITER + ".*");
906 1020 compiledAutomaton = new CompiledAutomaton(re.toAutomaton());
907 1021 } else {
... ... @@ -947,13 +1061,20 @@ public class CodecComponent {
947 1061 /**
948 1062 * Instantiates a new component span.
949 1063 *
950   - * @param queries the queries
951   - * @param key the key
952   - * @param minimumDouble the minimum double
953   - * @param maximumDouble the maximum double
954   - * @param type the type
955   - * @param function the function
956   - * @throws IOException Signals that an I/O exception has occurred.
  1064 + * @param queries
  1065 + * the queries
  1066 + * @param key
  1067 + * the key
  1068 + * @param minimumDouble
  1069 + * the minimum double
  1070 + * @param maximumDouble
  1071 + * the maximum double
  1072 + * @param type
  1073 + * the type
  1074 + * @param function
  1075 + * the function
  1076 + * @throws IOException
  1077 + * Signals that an I/O exception has occurred.
957 1078 */
958 1079 public ComponentSpan(SpanQuery[] queries, String key, Double minimumDouble,
959 1080 Double maximumDouble, String type, String function) throws IOException {
... ... @@ -1026,13 +1147,20 @@ public class CodecComponent {
1026 1147 /**
1027 1148 * Instantiates a new component position.
1028 1149 *
1029   - * @param field the field
1030   - * @param key the key
1031   - * @param minimumDouble the minimum double
1032   - * @param maximumDouble the maximum double
1033   - * @param statsType the stats type
1034   - * @throws IOException Signals that an I/O exception has occurred.
1035   - * @throws ParseException the parse exception
  1150 + * @param field
  1151 + * the field
  1152 + * @param key
  1153 + * the key
  1154 + * @param minimumDouble
  1155 + * the minimum double
  1156 + * @param maximumDouble
  1157 + * the maximum double
  1158 + * @param statsType
  1159 + * the stats type
  1160 + * @throws IOException
  1161 + * Signals that an I/O exception has occurred.
  1162 + * @throws ParseException
  1163 + * the parse exception
1036 1164 */
1037 1165 public ComponentPosition(String field, String key, Double minimumDouble,
1038 1166 Double maximumDouble, String statsType)
... ... @@ -1096,13 +1224,20 @@ public class CodecComponent {
1096 1224 /**
1097 1225 * Instantiates a new component token.
1098 1226 *
1099   - * @param field the field
1100   - * @param key the key
1101   - * @param minimumDouble the minimum double
1102   - * @param maximumDouble the maximum double
1103   - * @param statsType the stats type
1104   - * @throws IOException Signals that an I/O exception has occurred.
1105   - * @throws ParseException the parse exception
  1227 + * @param field
  1228 + * the field
  1229 + * @param key
  1230 + * the key
  1231 + * @param minimumDouble
  1232 + * the minimum double
  1233 + * @param maximumDouble
  1234 + * the maximum double
  1235 + * @param statsType
  1236 + * the stats type
  1237 + * @throws IOException
  1238 + * Signals that an I/O exception has occurred.
  1239 + * @throws ParseException
  1240 + * the parse exception
1106 1241 */
1107 1242 public ComponentToken(String field, String key, Double minimumDouble,
1108 1243 Double maximumDouble, String statsType)
... ... @@ -1151,8 +1286,10 @@ public class CodecComponent {
1151 1286 /**
1152 1287 * Instantiates a new kwic token.
1153 1288 *
1154   - * @param match the match
1155   - * @param tokens the tokens
  1289 + * @param match
  1290 + * the match
  1291 + * @param tokens
  1292 + * the tokens
1156 1293 */
1157 1294 public KwicToken(Match match, ArrayList<MtasToken<String>> tokens) {
1158 1295 startPosition = match.startPosition;
... ... @@ -1178,8 +1315,10 @@ public class CodecComponent {
1178 1315 /**
1179 1316 * Instantiates a new kwic hit.
1180 1317 *
1181   - * @param match the match
1182   - * @param hits the hits
  1318 + * @param match
  1319 + * the match
  1320 + * @param hits
  1321 + * the hits
1183 1322 */
1184 1323 public KwicHit(Match match, HashMap<Integer, ArrayList<String>> hits) {
1185 1324 startPosition = match.startPosition;
... ... @@ -1202,7 +1341,8 @@ public class CodecComponent {
1202 1341 /**
1203 1342 * Sort.
1204 1343 *
1205   - * @param data the data
  1344 + * @param data
  1345 + * the data
1206 1346 * @return the array list
1207 1347 */
1208 1348 private ArrayList<MtasTreeHit<String>> sort(
... ... @@ -1219,7 +1359,8 @@ public class CodecComponent {
1219 1359 /**
1220 1360 * Creates the hash.
1221 1361 *
1222   - * @param data the data
  1362 + * @param data
  1363 + * the data
1223 1364 * @return the string
1224 1365 */
1225 1366 private String createHash(ArrayList<MtasTreeHit<String>> data) {
... ... @@ -1234,12 +1375,18 @@ public class CodecComponent {
1234 1375 /**
1235 1376 * Instantiates a new group hit.
1236 1377 *
1237   - * @param list the list
1238   - * @param start the start
1239   - * @param end the end
1240   - * @param hitStart the hit start
1241   - * @param hitEnd the hit end
1242   - * @param group the group
  1378 + * @param list
  1379 + * the list
  1380 + * @param start
  1381 + * the start
  1382 + * @param end
  1383 + * the end
  1384 + * @param hitStart
  1385 + * the hit start
  1386 + * @param hitEnd
  1387 + * the hit end
  1388 + * @param group
  1389 + * the group
1243 1390 */
1244 1391 public GroupHit(ArrayList<MtasTreeHit<String>> list, int start, int end,
1245 1392 int hitStart, int hitEnd, ComponentGroup group) {
... ... @@ -1392,7 +1539,8 @@ public class CodecComponent {
1392 1539 /**
1393 1540 * Data to string.
1394 1541 *
1395   - * @param data the data
  1542 + * @param data
  1543 + * the data
1396 1544 * @return the string
1397 1545 */
1398 1546 private String dataToString(ArrayList<String>[] data) {
... ... @@ -1443,10 +1591,14 @@ public class CodecComponent {
1443 1591 /**
1444 1592 * Instantiates a new list token.
1445 1593 *
1446   - * @param docId the doc id
1447   - * @param docPosition the doc position
1448   - * @param match the match
1449   - * @param tokens the tokens
  1594 + * @param docId
  1595 + * the doc id
  1596 + * @param docPosition
  1597 + * the doc position
  1598 + * @param match
  1599 + * the match
  1600 + * @param tokens
  1601 + * the tokens
1450 1602 */
1451 1603 public ListToken(Integer docId, Integer docPosition, Match match,
1452 1604 ArrayList<MtasToken<String>> tokens) {
... ... @@ -1475,10 +1627,14 @@ public class CodecComponent {
1475 1627 /**
1476 1628 * Instantiates a new list hit.
1477 1629 *
1478   - * @param docId the doc id
1479   - * @param docPosition the doc position
1480   - * @param match the match
1481   - * @param hits the hits
  1630 + * @param docId
  1631 + * the doc id
  1632 + * @param docPosition
  1633 + * the doc position
  1634 + * @param match
  1635 + * the match
  1636 + * @param hits
  1637 + * the hits
1482 1638 */
1483 1639 public ListHit(Integer docId, Integer docPosition, Match match,
1484 1640 HashMap<Integer, ArrayList<String>> hits) {
... ... @@ -1504,8 +1660,10 @@ public class CodecComponent {
1504 1660 /**
1505 1661 * Instantiates a new match.
1506 1662 *
1507   - * @param startPosition the start position
1508   - * @param endPosition the end position
  1663 + * @param startPosition
  1664 + * the start position
  1665 + * @param endPosition
  1666 + * the end position
1509 1667 */
1510 1668 public Match(int startPosition, int endPosition) {
1511 1669 this.startPosition = startPosition;
... ...
src/mtas/codec/util/CodecInfo.java
... ... @@ -231,15 +231,16 @@ public class CodecInfo {
231 231 * @param hits the hits
232 232 * @param prefixes the prefixes
233 233 * @return the prefix filtered objects
  234 + * @throws IOException
234 235 */
235 236 private ArrayList<MtasToken<String>> getPrefixFilteredObjects(
236   - List<MtasTreeHit<?>> hits, ArrayList<String> prefixes) {
  237 + List<MtasTreeHit<?>> hits, ArrayList<String> prefixes) throws IOException {
237 238 ArrayList<MtasToken<String>> tokens = new ArrayList<MtasToken<String>>();
238 239 IndexInput inObject = indexInputList.get("object");
239 240 IndexInput inTerm = indexInputList.get("term");
240 241 for (MtasTreeHit<?> hit : hits) {
241 242 MtasToken<String> token = MtasCodecPostingsFormat.getToken(inObject,
242   - inTerm, hit.ref);
  243 + inTerm, hit.ref);
243 244 if (token != null) {
244 245 if (prefixes.size() > 0) {
245 246 if (prefixes.contains(token.getPrefix())) {
... ... @@ -373,8 +374,9 @@ public class CodecInfo {
373 374 *
374 375 * @param hits the hits
375 376 * @return the objects
  377 + * @throws IOException
376 378 */
377   - public ArrayList<MtasToken<String>> getObjects(List<MtasTreeHit<?>> hits) {
  379 + public ArrayList<MtasToken<String>> getObjects(List<MtasTreeHit<?>> hits) throws IOException {
378 380 ArrayList<MtasToken<String>> tokens = new ArrayList<MtasToken<String>>();
379 381 IndexInput inObject = indexInputList.get("object");
380 382 IndexInput inTerm = indexInputList.get("term");
... ...
src/mtas/codec/util/DataCollector.java
... ... @@ -122,7 +122,7 @@ public class DataCollector {
122 122  
123 123 /** The size. */
124 124 // size and position current level
125   - private int size;
  125 + protected int size;
126 126  
127 127 /** The position. */
128 128 protected int position;
... ... @@ -160,8 +160,9 @@ public class DataCollector {
160 160 protected HashMap<String, Integer>[] errorList;
161 161  
162 162 /** The key list. */
163   - // administration keys
164 163 protected String[] keyList;
  164 +
  165 + protected int[] sourceNumberList;
165 166  
166 167 /** The segment registration. */
167 168 protected boolean segmentRegistration;
... ... @@ -234,6 +235,8 @@ public class DataCollector {
234 235  
235 236 /** The new key list. */
236 237 protected String[] newKeyList = null;
  238 +
  239 + protected int[] newSourceNumberList = null;
237 240  
238 241 /** The new error number. */
239 242 protected int[] newErrorNumber;
... ... @@ -308,6 +311,7 @@ public class DataCollector {
308 311 }
309 312 // initialize administration
310 313 keyList = new String[0];
  314 + sourceNumberList = new int[0];
311 315 errorNumber = new int[0];
312 316 errorList = (HashMap<String, Integer>[]) new HashMap<?, ?>[0];
313 317 size = 0;
... ... @@ -394,7 +398,7 @@ public class DataCollector {
394 398 * @param newDataCollector the new data collector
395 399 * @throws IOException Signals that an I/O exception has occurred.
396 400 */
397   - abstract public void merge(MtasDataCollector<?, ?> newDataCollector)
  401 + abstract public void merge(MtasDataCollector<?, ?> newDataCollector, boolean increaseSourceNumber)
398 402 throws IOException;
399 403  
400 404 /**
... ... @@ -445,6 +449,7 @@ public class DataCollector {
445 449 newCurrentPosition = 0;
446 450 newSize = maxNumberOfTerms + size;
447 451 newKeyList = new String[newSize];
  452 + newSourceNumberList = new int[newSize];
448 453 newErrorNumber = new int[newSize];
449 454 newErrorList = (HashMap<String, Integer>[]) new HashMap<?, ?>[newSize];
450 455 if (hasSub) {
... ... @@ -457,14 +462,17 @@ public class DataCollector {
457 462 */
458 463 protected void increaseNewListSize() {
459 464 String[] tmpNewKeyList = newKeyList;
  465 + int[] tmpNewSourceNumberList = newSourceNumberList;
460 466 int[] tmpNewErrorNumber = newErrorNumber;
461 467 HashMap<String, Integer>[] tmpNewErrorList = newErrorList;
462 468 int tmpNewSize = newSize;
463 469 newSize = 2 * newSize;
464 470 newKeyList = new String[newSize];
  471 + newSourceNumberList = new int[newSize];
465 472 newErrorNumber = new int[newSize];
466 473 newErrorList = (HashMap<String, Integer>[]) new HashMap<?, ?>[newSize];
467 474 System.arraycopy(tmpNewKeyList, 0, newKeyList, 0, tmpNewSize);
  475 + System.arraycopy(tmpNewSourceNumberList, 0, newSourceNumberList, 0, tmpNewSize);
468 476 System.arraycopy(tmpNewErrorNumber, 0, newErrorNumber, 0, tmpNewSize);
469 477 System.arraycopy(tmpNewErrorList, 0, newErrorList, 0, tmpNewSize);
470 478 if (hasSub) {
... ... @@ -481,7 +489,7 @@ public class DataCollector {
481 489 * @return the mtas data collector
482 490 * @throws IOException Signals that an I/O exception has occurred.
483 491 */
484   - protected final MtasDataCollector<?, ?> add() throws IOException {
  492 + protected final MtasDataCollector<?, ?> add(boolean increaseSourceNumber) throws IOException {
485 493 if (!collectorType.equals(COLLECTOR_TYPE_DATA)) {
486 494 throw new IOException("collector should be " + COLLECTOR_TYPE_DATA);
487 495 } else {
... ... @@ -490,6 +498,10 @@ public class DataCollector {
490 498 } else if (position < getSize()) {
491 499 // copy
492 500 newKeyList[0] = keyList[0];
  501 + newSourceNumberList[0] = sourceNumberList[0];
  502 + if(increaseSourceNumber) {
  503 + newSourceNumberList[0]++;
  504 + }
493 505 newErrorNumber[0] = errorNumber[0];
494 506 newErrorList[0] = errorList[0];
495 507 if (hasSub) {
... ... @@ -502,6 +514,7 @@ public class DataCollector {
502 514 } else {
503 515 // add key
504 516 newKeyList[0] = COLLECTOR_TYPE_DATA;
  517 + newSourceNumberList[0] = 1;
505 518 newErrorNumber[0] = 0;
506 519 newErrorList[0] = new HashMap<String, Integer>();
507 520 newPosition = 1;
... ... @@ -531,7 +544,7 @@ public class DataCollector {
531 544 * @return the mtas data collector
532 545 * @throws IOException Signals that an I/O exception has occurred.
533 546 */
534   - protected final MtasDataCollector<?, ?> add(String key) throws IOException {
  547 + protected final MtasDataCollector<?, ?> add(String key, boolean increaseSourceNumber) throws IOException {
535 548 if (collectorType.equals(COLLECTOR_TYPE_DATA)) {
536 549 throw new IOException("collector should be " + COLLECTOR_TYPE_LIST);
537 550 } else if (key == null) {
... ... @@ -563,6 +576,7 @@ public class DataCollector {
563 576 }
564 577 // copy
565 578 newKeyList[newPosition] = keyList[position];
  579 + newSourceNumberList[newPosition] = sourceNumberList[position];
566 580 newErrorNumber[newPosition] = errorNumber[position];
567 581 newErrorList[newPosition] = errorList[position];
568 582 if (hasSub) {
... ... @@ -573,6 +587,9 @@ public class DataCollector {
573 587 position++;
574 588 // check if added key from list is right key
575 589 if (newKeyList[(newPosition - 1)].equals(key)) {
  590 + if(increaseSourceNumber) {
  591 + newSourceNumberList[(newPosition-1)]++;
  592 + }
576 593 newCurrentPosition = newPosition - 1;
577 594 newCurrentExisting = true;
578 595 // ready
... ... @@ -593,6 +610,7 @@ public class DataCollector {
593 610 }
594 611 // add key
595 612 newKeyList[newPosition] = key;
  613 + newSourceNumberList[newPosition] = 1;
596 614 newErrorNumber[newPosition] = 0;
597 615 newErrorList[newPosition] = new HashMap<String, Integer>();
598 616 newPosition++;
... ... @@ -731,6 +749,8 @@ public class DataCollector {
731 749 this.segmentName = null;
732 750 }
733 751  
  752 + public abstract void reduceToSegmentKeys();
  753 +
734 754 /**
735 755 * Check existence necessary keys.
736 756 *
... ... @@ -870,10 +890,12 @@ public class DataCollector {
870 890 protected void remapData(int[][] mapping) throws IOException {
871 891 // remap and merge keys
872 892 String[] newKeyList = new String[mapping.length];
  893 + int[] newSourceNumberList = new int[mapping.length];
873 894 int[] newErrorNumber = new int[mapping.length];
874 895 HashMap<String, Integer>[] newErrorList = (HashMap<String, Integer>[]) new HashMap<?, ?>[mapping.length];
875 896 for (int i = 0; i < mapping.length; i++) {
876 897 newKeyList[i] = keyList[mapping[i][0]];
  898 + newSourceNumberList[i] = sourceNumberList[mapping[i][0]];
877 899 for (int j = 0; j < mapping[i].length; j++) {
878 900 if (j == 0) {
879 901 newErrorNumber[i] = errorNumber[mapping[i][j]];
... ... @@ -900,13 +922,14 @@ public class DataCollector {
900 922 newSubCollectorListNextLevel[i] = subCollectorListNextLevel[mapping[i][j]];
901 923 } else {
902 924 newSubCollectorListNextLevel[i]
903   - .merge(subCollectorListNextLevel[mapping[i][j]]);
  925 + .merge(subCollectorListNextLevel[mapping[i][j]], false);
904 926 }
905 927 }
906 928 }
907 929 subCollectorListNextLevel = newSubCollectorListNextLevel;
908 930 }
909 931 keyList = newKeyList;
  932 + sourceNumberList = newSourceNumberList;
910 933 errorNumber = newErrorNumber;
911 934 errorList = newErrorList;
912 935 size = keyList.length;
... ... @@ -929,6 +952,7 @@ public class DataCollector {
929 952 increaseNewListSize();
930 953 }
931 954 newKeyList[newPosition] = keyList[position];
  955 + newSourceNumberList[newPosition] = sourceNumberList[position];
932 956 newErrorNumber[newPosition] = errorNumber[position];
933 957 newErrorList[newPosition] = errorList[position];
934 958 if (hasSub) {
... ... @@ -940,6 +964,7 @@ public class DataCollector {
940 964 }
941 965 // copy
942 966 keyList = newKeyList;
  967 + sourceNumberList = newSourceNumberList;
943 968 errorNumber = newErrorNumber;
944 969 errorList = newErrorList;
945 970 subCollectorListNextLevel = newSubCollectorListNextLevel;
... ... @@ -1093,13 +1118,17 @@ public class DataCollector {
1093 1118 + statsType + " " + statsItems + " " + hasSub;
1094 1119 }
1095 1120  
  1121 + public final SortedMap<String, T2> getList() throws IOException {
  1122 + return getList(false);
  1123 + }
  1124 +
1096 1125 /**
1097 1126 * Gets the list.
1098 1127 *
1099 1128 * @return the list
1100 1129 * @throws IOException Signals that an I/O exception has occurred.
1101 1130 */
1102   - public final SortedMap<String, T2> getList() throws IOException {
  1131 + public final SortedMap<String, T2> getList(boolean showFull) throws IOException {
1103 1132 final TreeMap<String, T2> basicList = getBasicList();
1104 1133 SortedMap<String, T2> list = null;
1105 1134 if (sortType.equals(CodecUtil.SORT_TERM)) {
... ... @@ -1127,7 +1156,7 @@ public class DataCollector {
1127 1156 throw new IOException("unknown sort type " + sortType);
1128 1157 }
1129 1158 int start = this.start == null ? 0 : this.start;
1130   - if (number == null || (start == 0 && number >= list.size())) {
  1159 + if (showFull || number == null || (start == 0 && number >= list.size())) {
1131 1160 // ful list
1132 1161 return list;
1133 1162 } else if (start < list.size() && number > 0) {
... ...
src/mtas/codec/util/collector/MtasDataAdvanced.java
... ... @@ -88,7 +88,7 @@ abstract class MtasDataAdvanced&lt;T1 extends Number, T2 extends Number, T3 extends
88 88 */
89 89 @Override
90 90 public final void error(String error) throws IOException {
91   - add();
  91 + add(false);
92 92 setError(newCurrentPosition, error, newCurrentExisting);
93 93 }
94 94  
... ... @@ -103,7 +103,7 @@ abstract class MtasDataAdvanced&lt;T1 extends Number, T2 extends Number, T3 extends
103 103 public final void error(String[] keys, String error) throws IOException {
104 104 if (keys != null && keys.length > 0) {
105 105 for (int i = 0; i < keys.length; i++) {
106   - add(keys[i]);
  106 + add(keys[i], false);
107 107 setError(newCurrentPosition, error, newCurrentExisting);
108 108 }
109 109 }
... ... @@ -175,6 +175,32 @@ abstract class MtasDataAdvanced&lt;T1 extends Number, T2 extends Number, T3 extends
175 175 tmpOldSize);
176 176 }
177 177  
  178 + public void reduceToSegmentKeys() {
  179 + if(segmentRegistration) {
  180 + int sizeCopy = size;
  181 + String[] keyListCopy = keyList.clone();
  182 + T1[] advancedValueSumListCopy = advancedValueSumList.clone();
  183 + T1[] advancedValueMaxListCopy = advancedValueMaxList.clone();
  184 + T1[] advancedValueMinListCopy = advancedValueMinList.clone();
  185 + T1[] advancedValueSumOfSquaresListCopy = advancedValueSumOfSquaresList.clone();
  186 + T2[] advancedValueSumOfLogsListCopy = advancedValueSumOfLogsList.clone();
  187 + long[] advancedValueNListCopy = advancedValueNList.clone();
  188 + size = 0;
  189 + for(int i=0; i< sizeCopy; i++) {
  190 + if(segmentKeys.contains(keyListCopy[i])) {
  191 + keyList[size] = keyListCopy[i];
  192 + advancedValueSumList[size] = advancedValueSumListCopy[i];
  193 + advancedValueMaxList[size] = advancedValueMaxListCopy[i];
  194 + advancedValueMinList[size] = advancedValueMinListCopy[i];
  195 + advancedValueSumOfSquaresList[size] = advancedValueSumOfSquaresListCopy[i];
  196 + advancedValueSumOfLogsList[size] = advancedValueSumOfLogsListCopy[i];
  197 + advancedValueNList[size] = advancedValueNListCopy[i];
  198 + size++;
  199 + }
  200 + }
  201 + }
  202 + }
  203 +
178 204 /*
179 205 * (non-Javadoc)
180 206 *
... ... @@ -332,7 +358,7 @@ abstract class MtasDataAdvanced&lt;T1 extends Number, T2 extends Number, T3 extends
332 358 * DataCollector.MtasDataCollector)
333 359 */
334 360 @Override
335   - public void merge(MtasDataCollector<?, ?> newDataCollector)
  361 + public void merge(MtasDataCollector<?, ?> newDataCollector, boolean increaseSourceNumber)
336 362 throws IOException {
337 363 closeNewList();
338 364 if (!collectorType.equals(newDataCollector.getCollectorType())
... ... @@ -347,7 +373,7 @@ abstract class MtasDataAdvanced&lt;T1 extends Number, T2 extends Number, T3 extends
347 373 if (collectorType.equals(DataCollector.COLLECTOR_TYPE_LIST)) {
348 374 for (int i = 0; i < newMtasDataAdvanced.getSize(); i++) {
349 375 MtasDataCollector<?, ?>[] subCollectors = new MtasDataCollector[1];
350   - subCollectors[0] = add(newMtasDataAdvanced.keyList[i]);
  376 + subCollectors[0] = add(newMtasDataAdvanced.keyList[i], increaseSourceNumber);
351 377 setError(newCurrentPosition, newMtasDataAdvanced.errorNumber[i],
352 378 newMtasDataAdvanced.errorList[i], newCurrentExisting);
353 379 setValue(newCurrentPosition,
... ... @@ -359,13 +385,13 @@ abstract class MtasDataAdvanced&lt;T1 extends Number, T2 extends Number, T3 extends
359 385 newMtasDataAdvanced.advancedValueNList[i], newCurrentExisting);
360 386 if (hasSub() && newMtasDataAdvanced.hasSub()) {
361 387 subCollectors[0]
362   - .merge(newMtasDataAdvanced.subCollectorListNextLevel[i]);
  388 + .merge(newMtasDataAdvanced.subCollectorListNextLevel[i], increaseSourceNumber);
363 389 }
364 390 }
365 391 closeNewList();
366 392 } else if (collectorType.equals(DataCollector.COLLECTOR_TYPE_DATA)) {
367 393 if (newMtasDataAdvanced.getSize() > 0) {
368   - MtasDataCollector subCollector = add();
  394 + MtasDataCollector subCollector = add(increaseSourceNumber);
369 395 setError(newCurrentPosition, newMtasDataAdvanced.errorNumber[0],
370 396 newMtasDataAdvanced.errorList[0], newCurrentExisting);
371 397 setValue(newCurrentPosition,
... ... @@ -376,7 +402,7 @@ abstract class MtasDataAdvanced&lt;T1 extends Number, T2 extends Number, T3 extends
376 402 newMtasDataAdvanced.advancedValueMaxList[0],
377 403 newMtasDataAdvanced.advancedValueNList[0], newCurrentExisting);
378 404 if (hasSub() && newMtasDataAdvanced.hasSub()) {
379   - subCollector.merge(newMtasDataAdvanced.subCollectorNextLevel);
  405 + subCollector.merge(newMtasDataAdvanced.subCollectorNextLevel, increaseSourceNumber);
380 406 }
381 407 }
382 408 closeNewList();
... ...
src/mtas/codec/util/collector/MtasDataBasic.java
... ... @@ -10,9 +10,12 @@ import mtas.codec.util.DataCollector.MtasDataCollector;
10 10 /**
11 11 * The Class MtasDataBasic.
12 12 *
13   - * @param <T1> the generic type
14   - * @param <T2> the generic type
15   - * @param <T3> the generic type
  13 + * @param <T1>
  14 + * the generic type
  15 + * @param <T2>
  16 + * the generic type
  17 + * @param <T3>
  18 + * the generic type
16 19 */
17 20 abstract class MtasDataBasic<T1 extends Number, T2 extends Number, T3 extends MtasDataItem<T1>>
18 21 extends MtasDataCollector<T1, T3> implements Serializable {
... ... @@ -32,24 +35,42 @@ abstract class MtasDataBasic&lt;T1 extends Number, T2 extends Number, T3 extends Mt
32 35 /**
33 36 * Instantiates a new mtas data basic.
34 37 *
35   - * @param collectorType the collector type
36   - * @param dataType the data type
37   - * @param statsItems the stats items
38   - * @param sortType the sort type
39   - * @param sortDirection the sort direction
40   - * @param start the start
41   - * @param number the number
42   - * @param subCollectorTypes the sub collector types
43   - * @param subDataTypes the sub data types
44   - * @param subStatsTypes the sub stats types
45   - * @param subStatsItems the sub stats items
46   - * @param subSortTypes the sub sort types
47   - * @param subSortDirections the sub sort directions
48   - * @param subStart the sub start
49   - * @param subNumber the sub number
50   - * @param operations the operations
51   - * @param segmentRegistration the segment registration
52   - * @throws IOException Signals that an I/O exception has occurred.
  38 + * @param collectorType
  39 + * the collector type
  40 + * @param dataType
  41 + * the data type
  42 + * @param statsItems
  43 + * the stats items
  44 + * @param sortType
  45 + * the sort type
  46 + * @param sortDirection
  47 + * the sort direction
  48 + * @param start
  49 + * the start
  50 + * @param number
  51 + * the number
  52 + * @param subCollectorTypes
  53 + * the sub collector types
  54 + * @param subDataTypes
  55 + * the sub data types
  56 + * @param subStatsTypes
  57 + * the sub stats types
  58 + * @param subStatsItems
  59 + * the sub stats items
  60 + * @param subSortTypes
  61 + * the sub sort types
  62 + * @param subSortDirections
  63 + * the sub sort directions
  64 + * @param subStart
  65 + * the sub start
  66 + * @param subNumber
  67 + * the sub number
  68 + * @param operations
  69 + * the operations
  70 + * @param segmentRegistration
  71 + * the segment registration
  72 + * @throws IOException
  73 + * Signals that an I/O exception has occurred.
53 74 */
54 75 public MtasDataBasic(String collectorType, String dataType,
55 76 TreeSet<String> statsItems, String sortType, String sortDirection,
... ... @@ -74,7 +95,7 @@ abstract class MtasDataBasic&lt;T1 extends Number, T2 extends Number, T3 extends Mt
74 95 */
75 96 @Override
76 97 public final void error(String error) throws IOException {
77   - add();
  98 + add(false);
78 99 setError(newCurrentPosition, error, newCurrentExisting);
79 100 }
80 101  
... ... @@ -89,7 +110,7 @@ abstract class MtasDataBasic&lt;T1 extends Number, T2 extends Number, T3 extends Mt
89 110 public final void error(String[] keys, String error) throws IOException {
90 111 if (keys != null && keys.length > 0) {
91 112 for (int i = 0; i < keys.length; i++) {
92   - add(keys[i]);
  113 + add(keys[i], false);
93 114 setError(newCurrentPosition, error, newCurrentExisting);
94 115 }
95 116 }
... ... @@ -98,9 +119,12 @@ abstract class MtasDataBasic&lt;T1 extends Number, T2 extends Number, T3 extends Mt
98 119 /**
99 120 * Sets the error.
100 121 *
101   - * @param newPosition the new position
102   - * @param error the error
103   - * @param currentExisting the current existing
  122 + * @param newPosition
  123 + * the new position
  124 + * @param error
  125 + * the error
  126 + * @param currentExisting
  127 + * the current existing
104 128 */
105 129 protected void setError(int newPosition, String error,
106 130 boolean currentExisting) {
... ... @@ -120,10 +144,14 @@ abstract class MtasDataBasic&lt;T1 extends Number, T2 extends Number, T3 extends Mt
120 144 /**
121 145 * Sets the value.
122 146 *
123   - * @param newPosition the new position
124   - * @param valueSum the value sum
125   - * @param valueN the value n
126   - * @param currentExisting the current existing
  147 + * @param newPosition
  148 + * the new position
  149 + * @param valueSum
  150 + * the value sum
  151 + * @param valueN
  152 + * the value n
  153 + * @param currentExisting
  154 + * the current existing
127 155 */
128 156 protected void setValue(int newPosition, T1 valueSum, long valueN,
129 157 boolean currentExisting) {
... ... @@ -142,10 +170,14 @@ abstract class MtasDataBasic&lt;T1 extends Number, T2 extends Number, T3 extends Mt
142 170 /**
143 171 * Sets the value.
144 172 *
145   - * @param newPosition the new position
146   - * @param values the values
147   - * @param number the number
148   - * @param currentExisting the current existing
  173 + * @param newPosition
  174 + * the new position
  175 + * @param values
  176 + * the values
  177 + * @param number
  178 + * the number
  179 + * @param currentExisting
  180 + * the current existing
149 181 */
150 182 protected void setValue(int newPosition, T1[] values, int number,
151 183 boolean currentExisting) {
... ... @@ -189,6 +221,24 @@ abstract class MtasDataBasic&lt;T1 extends Number, T2 extends Number, T3 extends Mt
189 221 tmpOldSize);
190 222 }
191 223  
  224 + public void reduceToSegmentKeys() {
  225 + if (segmentRegistration) {
  226 + int sizeCopy = size;
  227 + String[] keyListCopy = keyList.clone();
  228 + T1[] basicValueSumListCopy = basicValueSumList.clone();
  229 + long[] basicValueNListCopy = basicValueNList.clone();
  230 + size = 0;
  231 + for (int i = 0; i < sizeCopy; i++) {
  232 + if (segmentKeys.contains(keyListCopy[i])) {
  233 + keyList[size] = keyListCopy[i];
  234 + basicValueSumList[size] = basicValueSumListCopy[i];
  235 + basicValueNList[size] = basicValueNListCopy[i];
  236 + size++;
  237 + }
  238 + }
  239 + }
  240 + }
  241 +
192 242 /*
193 243 * (non-Javadoc)
194 244 *
... ... @@ -245,8 +295,8 @@ abstract class MtasDataBasic&lt;T1 extends Number, T2 extends Number, T3 extends Mt
245 295 * DataCollector.MtasDataCollector)
246 296 */
247 297 @Override
248   - public void merge(MtasDataCollector<?, ?> newDataCollector)
249   - throws IOException {
  298 + public void merge(MtasDataCollector<?, ?> newDataCollector,
  299 + boolean increaseSourceNumber) throws IOException {
250 300 closeNewList();
251 301 if (!collectorType.equals(newDataCollector.getCollectorType())
252 302 || !dataType.equals(newDataCollector.getDataType())
... ... @@ -261,27 +311,30 @@ abstract class MtasDataBasic&lt;T1 extends Number, T2 extends Number, T3 extends Mt
261 311 String[] keys = new String[1];
262 312 for (int i = 0; i < newMtasDataBasic.getSize(); i++) {
263 313 MtasDataCollector<?, ?>[] subCollectors = new MtasDataCollector[1];
264   - subCollectors[0] = add(newMtasDataBasic.keyList[i]);
  314 + subCollectors[0] = add(newMtasDataBasic.keyList[i],
  315 + increaseSourceNumber);
265 316 setError(newCurrentPosition, newMtasDataBasic.errorNumber[i],
266 317 newMtasDataBasic.errorList[i], newCurrentExisting);
267 318 setValue(newCurrentPosition, newMtasDataBasic.basicValueSumList[i],
268 319 newMtasDataBasic.basicValueNList[i], newCurrentExisting);
269 320 if (hasSub() && newMtasDataBasic.hasSub()) {
270 321 // single key implies exactly one subCollector if hasSub
271   - subCollectors[0]
272   - .merge(newMtasDataBasic.subCollectorListNextLevel[i]);
  322 + subCollectors[0].merge(
  323 + newMtasDataBasic.subCollectorListNextLevel[i],
  324 + increaseSourceNumber);
273 325 }
274 326 }
275 327 closeNewList();
276 328 } else if (collectorType.equals(DataCollector.COLLECTOR_TYPE_DATA)) {
277 329 if (newMtasDataBasic.getSize() > 0) {
278   - MtasDataCollector<?, ?> subCollector = add();
  330 + MtasDataCollector<?, ?> subCollector = add(increaseSourceNumber);
279 331 setError(newCurrentPosition, newMtasDataBasic.errorNumber[0],
280 332 newMtasDataBasic.errorList[0], newCurrentExisting);
281 333 setValue(newCurrentPosition, newMtasDataBasic.basicValueSumList[0],
282 334 newMtasDataBasic.basicValueNList[0], newCurrentExisting);
283 335 if (hasSub() && newMtasDataBasic.hasSub()) {
284   - subCollector.merge(newMtasDataBasic.subCollectorNextLevel);
  336 + subCollector.merge(newMtasDataBasic.subCollectorNextLevel,
  337 + increaseSourceNumber);
285 338 }
286 339 }
287 340 closeNewList();
... ... @@ -318,7 +371,8 @@ abstract class MtasDataBasic&lt;T1 extends Number, T2 extends Number, T3 extends Mt
318 371 /**
319 372 * Inits the new list basic.
320 373 *
321   - * @param maxNumberOfTerms the max number of terms
  374 + * @param maxNumberOfTerms
  375 + * the max number of terms
322 376 */
323 377 private void initNewListBasic(int maxNumberOfTerms) {
324 378 newBasicValueSumList = operations.createVector1(newSize);
... ...
src/mtas/codec/util/collector/MtasDataDoubleAdvanced.java
... ... @@ -61,7 +61,7 @@ public class MtasDataDoubleAdvanced
61 61 advancedValueSumOfLogsList[i], advancedValueSumOfSquaresList[i],
62 62 advancedValueMinList[i], advancedValueMaxList[i],
63 63 advancedValueNList[i], hasSub() ? subCollectorListNextLevel[i] : null,
64   - statsItems, sortType, sortDirection, errorNumber[i], errorList[i]);
  64 + statsItems, sortType, sortDirection, errorNumber[i], errorList[i], sourceNumberList[i]);
65 65 }
66 66  
67 67 /*
... ... @@ -83,7 +83,7 @@ public class MtasDataDoubleAdvanced
83 83 @Override
84 84 public MtasDataCollector<?, ?> add(long[] values, int number)
85 85 throws IOException {
86   - MtasDataCollector<?, ?> dataCollector = add();
  86 + MtasDataCollector<?, ?> dataCollector = add(false);
87 87 Double[] newValues = new Double[number];
88 88 for (int i = 0; i < values.length; i++)
89 89 newValues[i] = Long.valueOf(values[i]).doubleValue();
... ... @@ -110,7 +110,7 @@ public class MtasDataDoubleAdvanced
110 110 @Override
111 111 public MtasDataCollector<?, ?> add(double[] values, int number)
112 112 throws IOException {
113   - MtasDataCollector<?, ?> dataCollector = add();
  113 + MtasDataCollector<?, ?> dataCollector = add(false);
114 114 setValue(newCurrentPosition, ArrayUtils.toObject(values), number,
115 115 newCurrentExisting);
116 116 return dataCollector;
... ... @@ -145,7 +145,7 @@ public class MtasDataDoubleAdvanced
145 145 newValues[i] = Long.valueOf(values[i]).doubleValue();
146 146 MtasDataCollector<?, ?>[] subCollectors = new MtasDataCollector<?, ?>[keys.length];
147 147 for (int i = 0; i < keys.length; i++) {
148   - subCollectors[i] = add(keys[i]);
  148 + subCollectors[i] = add(keys[i], false);
149 149 setValue(newCurrentPosition, newValues, number, newCurrentExisting);
150 150 }
151 151 return subCollectors;
... ... @@ -180,7 +180,7 @@ public class MtasDataDoubleAdvanced
180 180 if (keys != null && keys.length > 0) {
181 181 MtasDataCollector<?, ?>[] subCollectors = new MtasDataCollector<?, ?>[keys.length];
182 182 for (int i = 0; i < keys.length; i++) {
183   - subCollectors[i] = add(keys[i]);
  183 + subCollectors[i] = add(keys[i], false);
184 184 setValue(newCurrentPosition, ArrayUtils.toObject(values), number,
185 185 newCurrentExisting);
186 186 }
... ...
src/mtas/codec/util/collector/MtasDataDoubleBasic.java
... ... @@ -58,7 +58,7 @@ public class MtasDataDoubleBasic
58 58 protected MtasDataItemDoubleBasic getItem(int i) {
59 59 return new MtasDataItemDoubleBasic(basicValueSumList[i],
60 60 basicValueNList[i], hasSub() ? subCollectorListNextLevel[i] : null,
61   - statsItems, sortType, sortDirection, errorNumber[i], errorList[i]);
  61 + statsItems, sortType, sortDirection, errorNumber[i], errorList[i], sourceNumberList[i]);
62 62 }
63 63  
64 64 /*
... ... @@ -69,7 +69,7 @@ public class MtasDataDoubleBasic
69 69 @Override
70 70 public MtasDataCollector<?, ?> add(long valueSum, long valueN)
71 71 throws IOException {
72   - MtasDataCollector<?, ?> dataCollector = add();
  72 + MtasDataCollector<?, ?> dataCollector = add(false);
73 73 setValue(newCurrentPosition, Double.valueOf(valueSum), valueN,
74 74 newCurrentExisting);
75 75 return dataCollector;
... ... @@ -83,7 +83,7 @@ public class MtasDataDoubleBasic
83 83 @Override
84 84 public MtasDataCollector<?, ?> add(long[] values, int number)
85 85 throws IOException {
86   - MtasDataCollector<?, ?> dataCollector = add();
  86 + MtasDataCollector<?, ?> dataCollector = add(false);
87 87 Double[] newValues = new Double[number];
88 88 for (int i = 0; i < values.length; i++)
89 89 newValues[i] = Long.valueOf(values[i]).doubleValue();
... ... @@ -99,7 +99,7 @@ public class MtasDataDoubleBasic
99 99 @Override
100 100 public MtasDataCollector<?, ?> add(double valueSum, long valueN)
101 101 throws IOException {
102   - MtasDataCollector<?, ?> dataCollector = add();
  102 + MtasDataCollector<?, ?> dataCollector = add(false);
103 103 setValue(newCurrentPosition, valueSum, valueN, newCurrentExisting);
104 104 return dataCollector;
105 105 }
... ... @@ -112,7 +112,7 @@ public class MtasDataDoubleBasic
112 112 @Override
113 113 public MtasDataCollector<?, ?> add(double[] values, int number)
114 114 throws IOException {
115   - MtasDataCollector<?, ?> dataCollector = add();
  115 + MtasDataCollector<?, ?> dataCollector = add(false);
116 116 setValue(newCurrentPosition, ArrayUtils.toObject(values), number,
117 117 newCurrentExisting);
118 118 return dataCollector;
... ... @@ -131,7 +131,7 @@ public class MtasDataDoubleBasic
131 131 if (keys != null && keys.length > 0) {
132 132 MtasDataCollector<?, ?>[] subCollectors = new MtasDataCollector<?, ?>[keys.length];
133 133 for (int i = 0; i < keys.length; i++) {
134   - subCollectors[i] = add(keys[i]);
  134 + subCollectors[i] = add(keys[i], false);
135 135 setValue(newCurrentPosition, Double.valueOf(valueSum), valueN,
136 136 newCurrentExisting);
137 137 }
... ... @@ -157,7 +157,7 @@ public class MtasDataDoubleBasic
157 157 newValues[i] = Long.valueOf(values[i]).doubleValue();
158 158 MtasDataCollector<?, ?>[] subCollectors = new MtasDataCollector<?, ?>[keys.length];
159 159 for (int i = 0; i < keys.length; i++) {
160   - subCollectors[i] = add(keys[i]);
  160 + subCollectors[i] = add(keys[i], false);
161 161 setValue(newCurrentPosition, newValues, number, newCurrentExisting);
162 162 }
163 163 return subCollectors;
... ... @@ -179,7 +179,7 @@ public class MtasDataDoubleBasic
179 179 if (keys != null && keys.length > 0) {
180 180 MtasDataCollector<?, ?>[] subCollectors = new MtasDataCollector<?, ?>[keys.length];
181 181 for (int i = 0; i < keys.length; i++) {
182   - subCollectors[i] = add(keys[i]);
  182 + subCollectors[i] = add(keys[i], false);
183 183 setValue(newCurrentPosition, valueSum, valueN, newCurrentExisting);
184 184 }
185 185 return subCollectors;
... ... @@ -201,7 +201,7 @@ public class MtasDataDoubleBasic
201 201 if (keys != null && keys.length > 0) {
202 202 MtasDataCollector<?, ?>[] subCollectors = new MtasDataCollector<?, ?>[keys.length];
203 203 for (int i = 0; i < keys.length; i++) {
204   - subCollectors[i] = add(keys[i]);
  204 + subCollectors[i] = add(keys[i], false);
205 205 setValue(newCurrentPosition, ArrayUtils.toObject(values), number,
206 206 newCurrentExisting);
207 207 }
... ...
src/mtas/codec/util/collector/MtasDataDoubleFull.java
... ... @@ -59,7 +59,7 @@ public class MtasDataDoubleFull
59 59 return new MtasDataItemDoubleFull(
60 60 ArrayUtils.toPrimitive(fullValueList[i]),
61 61 hasSub() ? subCollectorListNextLevel[i] : null, statsItems, sortType,
62   - sortDirection, errorNumber[i], errorList[i]);
  62 + sortDirection, errorNumber[i], errorList[i], sourceNumberList[i]);
63 63 }
64 64  
65 65 /*
... ... @@ -81,7 +81,7 @@ public class MtasDataDoubleFull
81 81 @Override
82 82 public MtasDataCollector<?, ?> add(long[] values, int number)
83 83 throws IOException {
84   - MtasDataCollector<?, ?> dataCollector = add();
  84 + MtasDataCollector<?, ?> dataCollector = add(false);
85 85 Double[] newValues = new Double[number];
86 86 for (int i = 0; i < values.length; i++)
87 87 newValues[i] = Long.valueOf(values[i]).doubleValue();
... ... @@ -108,7 +108,7 @@ public class MtasDataDoubleFull
108 108 @Override
109 109 public MtasDataCollector<?, ?> add(double[] values, int number)
110 110 throws IOException {
111   - MtasDataCollector<?, ?> dataCollector = add();
  111 + MtasDataCollector<?, ?> dataCollector = add(false);
112 112 setValue(newCurrentPosition, ArrayUtils.toObject(values), number,
113 113 newCurrentExisting);
114 114 return dataCollector;
... ... @@ -143,7 +143,7 @@ public class MtasDataDoubleFull
143 143 newValues[i] = Long.valueOf(values[i]).doubleValue();
144 144 MtasDataCollector<?, ?>[] subCollectors = new MtasDataCollector<?, ?>[keys.length];
145 145 for (int i = 0; i < keys.length; i++) {
146   - subCollectors[i] = add(keys[i]);
  146 + subCollectors[i] = add(keys[i], false);
147 147 setValue(newCurrentPosition, newValues, number, newCurrentExisting);
148 148 }
149 149 return subCollectors;
... ... @@ -178,7 +178,7 @@ public class MtasDataDoubleFull
178 178 if (keys != null && keys.length > 0) {
179 179 MtasDataCollector<?, ?>[] subCollectors = new MtasDataCollector<?, ?>[keys.length];
180 180 for (int i = 0; i < keys.length; i++) {
181   - subCollectors[i] = add(keys[i]);
  181 + subCollectors[i] = add(keys[i], false);
182 182 setValue(newCurrentPosition, ArrayUtils.toObject(values), number,
183 183 newCurrentExisting);
184 184 }
... ...
src/mtas/codec/util/collector/MtasDataFull.java
... ... @@ -10,9 +10,12 @@ import mtas.codec.util.DataCollector.MtasDataCollector;
10 10 /**
11 11 * The Class MtasDataFull.
12 12 *
13   - * @param <T1> the generic type
14   - * @param <T2> the generic type
15   - * @param <T3> the generic type
  13 + * @param <T1>
  14 + * the generic type
  15 + * @param <T2>
  16 + * the generic type
  17 + * @param <T3>
  18 + * the generic type
16 19 */
17 20 abstract class MtasDataFull<T1 extends Number, T2 extends Number, T3 extends MtasDataItem<T1>>
18 21 extends MtasDataCollector<T1, T3> implements Serializable {
... ... @@ -29,24 +32,42 @@ abstract class MtasDataFull&lt;T1 extends Number, T2 extends Number, T3 extends Mta
29 32 /**
30 33 * Instantiates a new mtas data full.
31 34 *
32   - * @param collectorType the collector type
33   - * @param dataType the data type
34   - * @param statsItems the stats items
35   - * @param sortType the sort type
36   - * @param sortDirection the sort direction
37   - * @param start the start
38   - * @param number the number
39   - * @param subCollectorTypes the sub collector types
40   - * @param subDataTypes the sub data types
41   - * @param subStatsTypes the sub stats types
42   - * @param subStatsItems the sub stats items
43   - * @param subSortTypes the sub sort types
44   - * @param subSortDirections the sub sort directions
45   - * @param subStart the sub start
46   - * @param subNumber the sub number
47   - * @param operations the operations
48   - * @param segmentRegistration the segment registration
49   - * @throws IOException Signals that an I/O exception has occurred.
  35 + * @param collectorType
  36 + * the collector type
  37 + * @param dataType
  38 + * the data type
  39 + * @param statsItems
  40 + * the stats items
  41 + * @param sortType
  42 + * the sort type
  43 + * @param sortDirection
  44 + * the sort direction
  45 + * @param start
  46 + * the start
  47 + * @param number
  48 + * the number
  49 + * @param subCollectorTypes
  50 + * the sub collector types
  51 + * @param subDataTypes
  52 + * the sub data types
  53 + * @param subStatsTypes
  54 + * the sub stats types
  55 + * @param subStatsItems
  56 + * the sub stats items
  57 + * @param subSortTypes
  58 + * the sub sort types
  59 + * @param subSortDirections
  60 + * the sub sort directions
  61 + * @param subStart
  62 + * the sub start
  63 + * @param subNumber
  64 + * the sub number
  65 + * @param operations
  66 + * the operations
  67 + * @param segmentRegistration
  68 + * the segment registration
  69 + * @throws IOException
  70 + * Signals that an I/O exception has occurred.
50 71 */
51 72 public MtasDataFull(String collectorType, String dataType,
52 73 TreeSet<String> statsItems, String sortType, String sortDirection,
... ... @@ -58,8 +79,8 @@ abstract class MtasDataFull&lt;T1 extends Number, T2 extends Number, T3 extends Mta
58 79 throws IOException {
59 80 super(collectorType, dataType, CodecUtil.STATS_FULL, statsItems, sortType,
60 81 sortDirection, start, number, subCollectorTypes, subDataTypes,
61   - subStatsTypes, subStatsItems, subSortTypes, subSortDirections,
62   - subStart, subNumber, segmentRegistration);
  82 + subStatsTypes, subStatsItems, subSortTypes, subSortDirections, subStart,
  83 + subNumber, segmentRegistration);
63 84 this.operations = operations;
64 85 }
65 86  
... ... @@ -71,7 +92,7 @@ abstract class MtasDataFull&lt;T1 extends Number, T2 extends Number, T3 extends Mta
71 92 */
72 93 @Override
73 94 public final void error(String error) throws IOException {
74   - add();
  95 + add(false);
75 96 setError(newCurrentPosition, error, newCurrentExisting);
76 97 }
77 98  
... ... @@ -86,7 +107,7 @@ abstract class MtasDataFull&lt;T1 extends Number, T2 extends Number, T3 extends Mta
86 107 public final void error(String[] keys, String error) throws IOException {
87 108 if (keys != null && keys.length > 0) {
88 109 for (int i = 0; i < keys.length; i++) {
89   - add(keys[i]);
  110 + add(keys[i], false);
90 111 setError(newCurrentPosition, error, newCurrentExisting);
91 112 }
92 113 }
... ... @@ -95,9 +116,12 @@ abstract class MtasDataFull&lt;T1 extends Number, T2 extends Number, T3 extends Mta
95 116 /**
96 117 * Sets the error.
97 118 *
98   - * @param newPosition the new position
99   - * @param error the error
100   - * @param currentExisting the current existing
  119 + * @param newPosition
  120 + * the new position
  121 + * @param error
  122 + * the error
  123 + * @param currentExisting
  124 + * the current existing
101 125 */
102 126 protected void setError(int newPosition, String error,
103 127 boolean currentExisting) {
... ... @@ -116,8 +140,7 @@ abstract class MtasDataFull&lt;T1 extends Number, T2 extends Number, T3 extends Mta
116 140 /*
117 141 * (non-Javadoc)
118 142 *
119   - * @see
120   - * mtas.codec.util.DataCollector.MtasDataCollector#increaseNewListSize()
  143 + * @see mtas.codec.util.DataCollector.MtasDataCollector#increaseNewListSize()
121 144 */
122 145 @Override
123 146 protected final void increaseNewListSize() {
... ... @@ -133,6 +156,22 @@ abstract class MtasDataFull&lt;T1 extends Number, T2 extends Number, T3 extends Mta
133 156 System.arraycopy(tmpNewFullValueList, 0, newFullValueList, 0, tmpOldSize);
134 157 }
135 158  
  159 + public void reduceToSegmentKeys() {
  160 + if(segmentRegistration) {
  161 + int sizeCopy = size;
  162 + String[] keyListCopy = keyList.clone();
  163 + T1[][] fullValueListCopy = fullValueList.clone();
  164 + size = 0;
  165 + for(int i=0; i< sizeCopy; i++) {
  166 + if(segmentKeys.contains(keyListCopy[i])) {
  167 + keyList[size] = keyListCopy[i];
  168 + fullValueList[size] = fullValueListCopy[i];
  169 + size++;
  170 + }
  171 + }
  172 + }
  173 + }
  174 +
136 175 /*
137 176 * (non-Javadoc)
138 177 *
... ... @@ -156,10 +195,14 @@ abstract class MtasDataFull&lt;T1 extends Number, T2 extends Number, T3 extends Mta
156 195 /**
157 196 * Sets the value.
158 197 *
159   - * @param newPosition the new position
160   - * @param values the values
161   - * @param number the number
162   - * @param currentExisting the current existing
  198 + * @param newPosition
  199 + * the new position
  200 + * @param values
  201 + * the values
  202 + * @param number
  203 + * the number
  204 + * @param currentExisting
  205 + * the current existing
163 206 */
164 207 protected void setValue(int newPosition, T1[] values, int number,
165 208 boolean currentExisting) {
... ... @@ -211,12 +254,11 @@ abstract class MtasDataFull&lt;T1 extends Number, T2 extends Number, T3 extends Mta
211 254 /*
212 255 * (non-Javadoc)
213 256 *
214   - * @see
215   - * mtas.codec.util.DataCollector.MtasDataCollector#merge(mtas.codec.util.
  257 + * @see mtas.codec.util.DataCollector.MtasDataCollector#merge(mtas.codec.util.
216 258 * DataCollector.MtasDataCollector)
217 259 */
218 260 @Override
219   - public void merge(MtasDataCollector<?, ?> newDataCollector)
  261 + public void merge(MtasDataCollector<?, ?> newDataCollector, boolean increaseSourceNumber)
220 262 throws IOException {
221 263 closeNewList();
222 264 if (!collectorType.equals(newDataCollector.getCollectorType())
... ... @@ -233,7 +275,7 @@ abstract class MtasDataFull&lt;T1 extends Number, T2 extends Number, T3 extends Mta
233 275 for (int i = 0; i < newMtasDataFull.getSize(); i++) {
234 276 if (newMtasDataFull.fullValueList[i].length > 0) {
235 277 MtasDataCollector<?, ?>[] subCollectors = new MtasDataCollector<?, ?>[1];
236   - subCollectors[0] = add(newMtasDataFull.keyList[i]);
  278 + subCollectors[0] = add(newMtasDataFull.keyList[i], increaseSourceNumber);
237 279 setError(newCurrentPosition, newMtasDataFull.errorNumber[i],
238 280 newMtasDataFull.errorList[i], newCurrentExisting);
239 281 setValue(newCurrentPosition, newMtasDataFull.fullValueList[i],
... ... @@ -241,19 +283,19 @@ abstract class MtasDataFull&lt;T1 extends Number, T2 extends Number, T3 extends Mta
241 283 if (hasSub() && newMtasDataFull.hasSub()) {
242 284 // single key implies exactly one subCollector if hasSub
243 285 subCollectors[0]
244   - .merge(newMtasDataFull.subCollectorListNextLevel[i]);
  286 + .merge(newMtasDataFull.subCollectorListNextLevel[i], increaseSourceNumber);
245 287 }
246 288 }
247 289 }
248 290 } else if (collectorType.equals(DataCollector.COLLECTOR_TYPE_DATA)) {
249 291 if (newMtasDataFull.getSize() > 0) {
250   - MtasDataCollector<?, ?> subCollector = add();
  292 + MtasDataCollector<?, ?> subCollector = add(increaseSourceNumber);
251 293 setError(newCurrentPosition, newMtasDataFull.errorNumber[0],
252 294 newMtasDataFull.errorList[0], newCurrentExisting);
253 295 setValue(newCurrentPosition, newMtasDataFull.fullValueList[0],
254 296 newMtasDataFull.fullValueList[0].length, newCurrentExisting);
255 297 if (hasSub() && newMtasDataFull.hasSub()) {
256   - subCollector.merge(newMtasDataFull.subCollectorNextLevel);
  298 + subCollector.merge(newMtasDataFull.subCollectorNextLevel, increaseSourceNumber);
257 299 }
258 300 }
259 301 } else {
... ... @@ -290,12 +332,11 @@ abstract class MtasDataFull&lt;T1 extends Number, T2 extends Number, T3 extends Mta
290 332 /**
291 333 * Inits the new list basic.
292 334 *
293   - * @param maxNumberOfTerms the max number of terms
  335 + * @param maxNumberOfTerms
  336 + * the max number of terms
294 337 */
295 338 private void initNewListBasic(int maxNumberOfTerms) {
296 339 newFullValueList = operations.createMatrix1(newSize);
297 340 }
298 341  
299 342 }
300   -
301   -
... ...
src/mtas/codec/util/collector/MtasDataItem.java
... ... @@ -33,6 +33,8 @@ public abstract class MtasDataItem&lt;T extends Number&gt;
33 33  
34 34 /** The error list. */
35 35 protected HashMap<String, Integer> errorList;
  36 +
  37 + protected int sourceNumber;
36 38  
37 39 /**
38 40 * Instantiates a new mtas data item.
... ... @@ -46,13 +48,14 @@ public abstract class MtasDataItem&lt;T extends Number&gt;
46 48 */
47 49 public MtasDataItem(MtasDataCollector<?, ?> sub, TreeSet<String> statsItems,
48 50 String sortType, String sortDirection, int errorNumber,
49   - HashMap<String, Integer> errorList) {
  51 + HashMap<String, Integer> errorList, int sourceNumber) {
50 52 this.sub = sub;
51 53 this.statsItems = statsItems;
52 54 this.sortType = sortType;
53 55 this.sortDirection = sortDirection;
54 56 this.errorNumber = errorNumber;
55 57 this.errorList = errorList;
  58 + this.sourceNumber = sourceNumber;
56 59 }
57 60  
58 61 /**
... ... @@ -69,7 +72,7 @@ public abstract class MtasDataItem&lt;T extends Number&gt;
69 72 * @return the map
70 73 * @throws IOException Signals that an I/O exception has occurred.
71 74 */
72   - public abstract Map<String, Object> rewrite() throws IOException;
  75 + public abstract Map<String, Object> rewrite(boolean showDebugInfo) throws IOException;
73 76  
74 77 /**
75 78 * Gets the sub.
... ...
src/mtas/codec/util/collector/MtasDataItemAdvanced.java
... ... @@ -64,8 +64,8 @@ abstract class MtasDataItemAdvanced&lt;T1 extends Number, T2 extends Number&gt;
64 64 MtasDataCollector<?, ?> sub, TreeSet<String> statsItems,
65 65 String sortType, String sortDirection, int errorNumber,
66 66 HashMap<String, Integer> errorList,
67   - MtasDataOperations<T1, T2> operations) {
68   - super(sub, statsItems, sortType, sortDirection, errorNumber, errorList);
  67 + MtasDataOperations<T1, T2> operations, int sourceNumber) {
  68 + super(sub, statsItems, sortType, sortDirection, errorNumber, errorList, sourceNumber);
69 69 this.valueSum = valueSum;
70 70 this.valueSumOfLogs = valueSumOfLogs;
71 71 this.valueSumOfSquares = valueSumOfSquares;
... ... @@ -104,7 +104,7 @@ abstract class MtasDataItemAdvanced&lt;T1 extends Number, T2 extends Number&gt;
104 104 * @see mtas.codec.util.DataCollector.MtasDataItem#rewrite()
105 105 */
106 106 @Override
107   - public Map<String, Object> rewrite() throws IOException {
  107 + public Map<String, Object> rewrite(boolean showDebugInfo) throws IOException {
108 108 Map<String, Object> response = new HashMap<String, Object>();
109 109 for (String statsItem : statsItems) {
110 110 if (statsItem.equals(CodecUtil.STATS_TYPE_SUM)) {
... ... @@ -143,7 +143,10 @@ abstract class MtasDataItemAdvanced&lt;T1 extends Number, T2 extends Number&gt;
143 143 response.put("errorNumber", errorNumber);
144 144 response.put("errorList", errorResponse);
145 145 }
146   - // response.put("stats", "advanced");
  146 + if(showDebugInfo) {
  147 + response.put("sourceNumber", sourceNumber);
  148 + response.put("stats", "advanced");
  149 + }
147 150 return response;
148 151 }
149 152  
... ...
src/mtas/codec/util/collector/MtasDataItemBasic.java
... ... @@ -47,8 +47,8 @@ abstract class MtasDataItemBasic&lt;T1 extends Number, T2 extends Number&gt;
47 47 MtasDataCollector<?, ?> sub, TreeSet<String> statsItems,
48 48 String sortType, String sortDirection, int errorNumber,
49 49 HashMap<String, Integer> errorList,
50   - MtasDataOperations<T1, T2> operations) {
51   - super(sub, statsItems, sortType, sortDirection, errorNumber, errorList);
  50 + MtasDataOperations<T1, T2> operations, int sourceNumber) {
  51 + super(sub, statsItems, sortType, sortDirection, errorNumber, errorList, sourceNumber);
52 52 this.valueSum = valueSum;
53 53 this.valueN = valueN;
54 54 this.operations = operations;
... ... @@ -77,7 +77,7 @@ abstract class MtasDataItemBasic&lt;T1 extends Number, T2 extends Number&gt;
77 77 * @see mtas.codec.util.DataCollector.MtasDataItem#rewrite()
78 78 */
79 79 @Override
80   - public Map<String, Object> rewrite() throws IOException {
  80 + public Map<String, Object> rewrite(boolean showDebugInfo) throws IOException {
81 81 Map<String, Object> response = new HashMap<String, Object>();
82 82 for (String statsItem : statsItems) {
83 83 if (statsItem.equals(CodecUtil.STATS_TYPE_SUM)) {
... ... @@ -97,8 +97,11 @@ abstract class MtasDataItemBasic&lt;T1 extends Number, T2 extends Number&gt;
97 97 }
98 98 response.put("errorNumber", errorNumber);
99 99 response.put("errorList", errorResponse);
100   - }
101   - // response.put("stats", "basic");
  100 + }
  101 + if(showDebugInfo) {
  102 + response.put("sourceNumber", sourceNumber);
  103 + response.put("stats", "basic");
  104 + }
102 105 return response;
103 106 }
104 107  
... ...
src/mtas/codec/util/collector/MtasDataItemDoubleAdvanced.java
... ... @@ -34,10 +34,10 @@ public class MtasDataItemDoubleAdvanced
34 34 double valueSumOfSquares, double valueMin, double valueMax, long valueN,
35 35 MtasDataCollector<?, ?> sub, TreeSet<String> statsItems,
36 36 String sortType, String sortDirection, int errorNumber,
37   - HashMap<String, Integer> errorList) {
  37 + HashMap<String, Integer> errorList, int sourceNumber) {
38 38 super(valueSum, valueSumOfLogs, valueSumOfSquares, valueMin, valueMax,
39 39 valueN, sub, statsItems, sortType, sortDirection, errorNumber,
40   - errorList, new MtasDataDoubleOperations());
  40 + errorList, new MtasDataDoubleOperations(), sourceNumber);
41 41 }
42 42  
43 43 /*
... ...
src/mtas/codec/util/collector/MtasDataItemDoubleBasic.java
... ... @@ -29,9 +29,9 @@ public class MtasDataItemDoubleBasic
29 29 public MtasDataItemDoubleBasic(double valueSum, long valueN,
30 30 MtasDataCollector<?, ?> sub, TreeSet<String> statsItems,
31 31 String sortType, String sortDirection, int errorNumber,
32   - HashMap<String, Integer> errorList) {
  32 + HashMap<String, Integer> errorList, int sourceNumber) {
33 33 super(valueSum, valueN, sub, statsItems, sortType, sortDirection,
34   - errorNumber, errorList, new MtasDataDoubleOperations());
  34 + errorNumber, errorList, new MtasDataDoubleOperations(), sourceNumber);
35 35 }
36 36  
37 37 /*
... ...
src/mtas/codec/util/collector/MtasDataItemDoubleFull.java
... ... @@ -34,10 +34,10 @@ public class MtasDataItemDoubleFull
34 34 */
35 35 public MtasDataItemDoubleFull(double[] value, MtasDataCollector<?, ?> sub,
36 36 TreeSet<String> statsItems, String sortType, String sortDirection,
37   - int errorNumber, HashMap<String, Integer> errorList) {
  37 + int errorNumber, HashMap<String, Integer> errorList, int sourceNumber) {
38 38 super(ArrayUtils.toObject(value), sub, statsItems, sortType,
39 39 sortDirection, errorNumber, errorList,
40   - new MtasDataDoubleOperations());
  40 + new MtasDataDoubleOperations(), sourceNumber);
41 41 }
42 42  
43 43 /**
... ...
src/mtas/codec/util/collector/MtasDataItemFull.java
... ... @@ -52,8 +52,8 @@ abstract class MtasDataItemFull&lt;T1 extends Number, T2 extends Number&gt;
52 52 public MtasDataItemFull(T1[] value, MtasDataCollector<?, ?> sub,
53 53 TreeSet<String> statsItems, String sortType, String sortDirection,
54 54 int errorNumber, HashMap<String, Integer> errorList,
55   - MtasDataOperations<T1, T2> operations) {
56   - super(sub, statsItems, sortType, sortDirection, errorNumber, errorList);
  55 + MtasDataOperations<T1, T2> operations, int sourceNumber) {
  56 + super(sub, statsItems, sortType, sortDirection, errorNumber, errorList, sourceNumber);
57 57 this.fullValues = value;
58 58 this.operations = operations;
59 59 }
... ... @@ -106,7 +106,7 @@ abstract class MtasDataItemFull&lt;T1 extends Number, T2 extends Number&gt;
106 106 * @see mtas.codec.util.DataCollector.MtasDataItem#rewrite()
107 107 */
108 108 @Override
109   - public Map<String, Object> rewrite() throws IOException {
  109 + public Map<String, Object> rewrite(boolean showDebugInfo) throws IOException {
110 110 createStats();
111 111 Map<String, Object> response = new HashMap<String, Object>();
112 112 for (String statsItem : statsItems) {
... ... @@ -163,7 +163,10 @@ abstract class MtasDataItemFull&lt;T1 extends Number, T2 extends Number&gt;
163 163 response.put("errorNumber", errorNumber);
164 164 response.put("errorList", errorResponse);
165 165 }
166   - // response.put("stats", "full");
  166 + if(showDebugInfo) {
  167 + response.put("sourceNumber", sourceNumber);
  168 + response.put("stats", "full");
  169 + }
167 170 return response;
168 171 }
169 172  
... ...
src/mtas/codec/util/collector/MtasDataItemLongAdvanced.java
... ... @@ -34,10 +34,10 @@ class MtasDataItemLongAdvanced
34 34 long valueSumOfSquares, long valueMin, long valueMax, long valueN,
35 35 MtasDataCollector<?, ?> sub, TreeSet<String> statsItems,
36 36 String sortType, String sortDirection, int errorNumber,
37   - HashMap<String, Integer> errorList) {
  37 + HashMap<String, Integer> errorList, int sourceNumber) {
38 38 super(valueSum, valueSumOfLogs, valueSumOfSquares, valueMin, valueMax,
39 39 valueN, sub, statsItems, sortType, sortDirection, errorNumber,
40   - errorList, new MtasDataLongOperations());
  40 + errorList, new MtasDataLongOperations(), sourceNumber);
41 41 }
42 42  
43 43 /*
... ...
src/mtas/codec/util/collector/MtasDataItemLongBasic.java
... ... @@ -29,9 +29,9 @@ class MtasDataItemLongBasic
29 29 public MtasDataItemLongBasic(long valueSum, long valueN,
30 30 MtasDataCollector<?, ?> sub, TreeSet<String> statsItems,
31 31 String sortType, String sortDirection, int errorNumber,
32   - HashMap<String, Integer> errorList) {
  32 + HashMap<String, Integer> errorList, int sourceNumber) {
33 33 super(valueSum, valueN, sub, statsItems, sortType, sortDirection,
34   - errorNumber, errorList, new MtasDataLongOperations());
  34 + errorNumber, errorList, new MtasDataLongOperations(), sourceNumber);
35 35 }
36 36  
37 37 /*
... ...
src/mtas/codec/util/collector/MtasDataItemLongFull.java
... ... @@ -33,9 +33,9 @@ class MtasDataItemLongFull extends MtasDataItemFull&lt;Long, Double&gt; {
33 33 */
34 34 public MtasDataItemLongFull(long[] value, MtasDataCollector<?, ?> sub,
35 35 TreeSet<String> statsItems, String sortType, String sortDirection,
36   - int errorNumber, HashMap<String, Integer> errorList) {
  36 + int errorNumber, HashMap<String, Integer> errorList, int sourceNumber) {
37 37 super(ArrayUtils.toObject(value), sub, statsItems, sortType, sortDirection,
38   - errorNumber, errorList, new MtasDataLongOperations());
  38 + errorNumber, errorList, new MtasDataLongOperations(), sourceNumber);
39 39 }
40 40  
41 41 /*
... ...
src/mtas/codec/util/collector/MtasDataLongAdvanced.java
... ... @@ -61,7 +61,7 @@ public class MtasDataLongAdvanced
61 61 advancedValueSumOfLogsList[i], advancedValueSumOfSquaresList[i],
62 62 advancedValueMinList[i], advancedValueMaxList[i],
63 63 advancedValueNList[i], hasSub() ? subCollectorListNextLevel[i] : null,
64   - statsItems, sortType, sortDirection, errorNumber[i], errorList[i]);
  64 + statsItems, sortType, sortDirection, errorNumber[i], errorList[i], sourceNumberList[i]);
65 65 }
66 66  
67 67 /*
... ... @@ -83,7 +83,7 @@ public class MtasDataLongAdvanced
83 83 @Override
84 84 public MtasDataCollector<?, ?> add(long[] values, int number)
85 85 throws IOException {
86   - MtasDataCollector<?, ?> dataCollector = add();
  86 + MtasDataCollector<?, ?> dataCollector = add(false);
87 87 setValue(newCurrentPosition, ArrayUtils.toObject(values), number,
88 88 newCurrentExisting);
89 89 return dataCollector;
... ... @@ -108,7 +108,7 @@ public class MtasDataLongAdvanced
108 108 @Override
109 109 public MtasDataCollector<?, ?> add(double[] values, int number)
110 110 throws IOException {
111   - MtasDataCollector<?, ?> dataCollector = add();
  111 + MtasDataCollector<?, ?> dataCollector = add(false);
112 112 Long[] newValues = new Long[number];
113 113 for (int i = 0; i < values.length; i++)
114 114 newValues[i] = Double.valueOf(values[i]).longValue();
... ... @@ -142,7 +142,7 @@ public class MtasDataLongAdvanced
142 142 if (keys != null && keys.length > 0) {
143 143 MtasDataCollector<?, ?>[] subCollectors = new MtasDataCollector<?, ?>[keys.length];
144 144 for (int i = 0; i < keys.length; i++) {
145   - subCollectors[i] = add(keys[i]);
  145 + subCollectors[i] = add(keys[i], false);
146 146 setValue(newCurrentPosition, ArrayUtils.toObject(values), number,
147 147 newCurrentExisting);
148 148 }
... ... @@ -181,7 +181,7 @@ public class MtasDataLongAdvanced
181 181 newValues[i] = Double.valueOf(values[i]).longValue();
182 182 MtasDataCollector<?, ?>[] subCollectors = new MtasDataCollector<?, ?>[keys.length];
183 183 for (int i = 0; i < keys.length; i++) {
184   - subCollectors[i] = add(keys[i]);
  184 + subCollectors[i] = add(keys[i], false);
185 185 setValue(newCurrentPosition, newValues, number, newCurrentExisting);
186 186 }
187 187 return subCollectors;
... ...