Commit d43a3ac6d420cd643e7403a2f5b89289437ea6bc

Authored by Matthijs Brouwer
2 parents 8148835a cac6601c

Merge branch 'master' of https://github.com/meertensinstituut/mtas

# Conflicts:
#	conf/parser/mtas/crm_test.xml
#	conf/parser/mtas/folia_ddd.xml
#	pom.xml
#	src/mtas/analysis/MtasTokenizer.java
#	src/mtas/analysis/parser/MtasCRMParser.java
#	src/mtas/analysis/token/MtasTokenCollection.java
#	src/mtas/codec/MtasCodecPostingsFormat.java
#	src/mtas/codec/MtasFieldsConsumer.java
#	src/mtas/codec/util/CodecComponent.java
#	src/mtas/codec/util/CodecInfo.java
#	src/mtas/codec/util/DataCollector.java
#	src/mtas/codec/util/collector/MtasDataAdvanced.java
#	src/mtas/codec/util/collector/MtasDataBasic.java
#	src/mtas/codec/util/collector/MtasDataDoubleAdvanced.java
#	src/mtas/codec/util/collector/MtasDataDoubleBasic.java
#	src/mtas/codec/util/collector/MtasDataDoubleFull.java
#	src/mtas/codec/util/collector/MtasDataFull.java
#	src/mtas/codec/util/collector/MtasDataItem.java
#	src/mtas/codec/util/collector/MtasDataItemAdvanced.java
#	src/mtas/codec/util/collector/MtasDataItemBasic.java
#	src/mtas/codec/util/collector/MtasDataItemDoubleAdvanced.java
#	src/mtas/codec/util/collector/MtasDataItemDoubleBasic.java
#	src/mtas/codec/util/collector/MtasDataItemDoubleFull.java
#	src/mtas/codec/util/collector/MtasDataItemFull.java
#	src/mtas/codec/util/collector/MtasDataItemLongAdvanced.java
#	src/mtas/codec/util/collector/MtasDataItemLongBasic.java
#	src/mtas/codec/util/collector/MtasDataLongAdvanced.java
#	src/mtas/codec/util/collector/MtasDataLongBasic.java
#	src/mtas/codec/util/collector/MtasDataLongFull.java
#	src/mtas/parser/cql/util/MtasCQLParserDefaultPrefixCondition.java
#	src/mtas/parser/cql/util/MtasCQLParserSentenceCondition.java
#	src/mtas/parser/cql/util/MtasCQLParserSentencePartCondition.java
#	src/mtas/solr/handler/component/MtasSolrSearchComponent.java
#	src/mtas/solr/search/MtasCQLQParser.java
#	src/mtas/solr/update/processor/MtasUpdateRequestProcessorFactory.java
#	src/mtas/solr/update/processor/MtasUpdateRequestProcessorResultWriter.java
#	src/site/markdown/download.md.vm
#	src/site/markdown/index.md
#	src/site/markdown/installation.md
#	src/site/markdown/installation_lucene.md
#	src/site/markdown/installation_solr.md
#	src/site/site.xml
conf/parser/mtas/crm_test.xml
... ... @@ -17,16 +17,20 @@
17 17 <!-- START CONFIGURATION MTAS FOLIA PARSER -->
18 18 <parser name="mtas.analysis.parser.MtasCRMParser">
19 19  
  20 +<<<<<<< HEAD
20 21 <!-- START GENERAL SETTINGS MTAS PARSER -->
21 22 <autorepair value="true" />
22 23 <makeunique value="true" />
23 24 <!-- END GENERAL SETTINGS MTAS PARSER -->
24 25  
  26 +=======
  27 +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90
25 28 <mappings>
26 29  
27 30 <mapping type="word">
28 31 </mapping>
29 32  
  33 +<<<<<<< HEAD
30 34 <mapping type="wordAnnotation" name="0">
31 35 <token type="string" offset="false" parent="false">
32 36 <pre>
... ... @@ -71,12 +75,19 @@
71 75 <token type="string" offset="false" parent="false">
72 76 <pre>
73 77 <item type="string" value="t2" />
  78 +=======
  79 + <mapping type="wordAnnotation" name="2">
  80 + <token type="string" offset="false" parent="false">
  81 + <pre>
  82 + <item type="string" value="t" />
  83 +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90
74 84 </pre>
75 85 <post>
76 86 <item type="text" />
77 87 </post>
78 88 </token>
79 89 </mapping>
  90 +<<<<<<< HEAD
80 91 <mapping type="wordAnnotation" name="2">
81 92 <token type="string" offset="false" parent="false">
82 93 <pre>
... ... @@ -87,6 +98,8 @@
87 98 </post>
88 99 </token>
89 100 </mapping>
  101 +=======
  102 +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90
90 103 <mapping type="wordAnnotation" name="3">
91 104 <token type="string" offset="false" parent="false">
92 105 <pre>
... ... @@ -98,6 +111,7 @@
98 111 </token>
99 112 </mapping>
100 113 <mapping type="wordAnnotation" name="4">
  114 +<<<<<<< HEAD
101 115 <token type="string" offset="false" parent="false">
102 116 <pre>
103 117 <item type="string" value="crm" />
... ... @@ -114,12 +128,34 @@
114 128 </condition>
115 129 </mapping>
116 130 <mapping type="crmPair" name="part">
  131 +=======
  132 + </mapping>
  133 + <mapping type="wordAnnotation" name="5">
  134 + </mapping>
  135 + <mapping type="wordAnnotation" name="6">
  136 + </mapping>
  137 + <mapping type="wordAnnotation" name="7">
  138 + <token type="string" offset="false" parent="false">
  139 + <pre>
  140 + <item type="string" value="sentence" />
  141 + </pre>
  142 + <post>
  143 + <item type="text" />
  144 + </post>
  145 + </token>
  146 + <condition>
  147 + <item type="text" not="true" condition="-" />
  148 + </condition>
  149 + </mapping>
  150 + <mapping type="wordAnnotation" name="pos">
  151 +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90
117 152 <token type="string" offset="false" parent="false">
118 153 <pre>
119 154 <item type="name" />
120 155 </pre>
121 156 <post>
122 157 <item type="text" />
  158 +<<<<<<< HEAD
123 159 </post>
124 160 </token>
125 161 </mapping>
... ... @@ -282,11 +318,111 @@
282 318 </condition>
283 319 </function>
284 320 <function type="wordAnnotation" name="4" split="+">
  321 +=======
  322 + </post>
  323 + </token>
  324 + </mapping>
  325 + <mapping type="wordAnnotation" name="feat.getal">
  326 + <token type="string" offset="false" parent="false">
  327 + <pre>
  328 + <item type="name" />
  329 + </pre>
  330 + <post>
  331 + <item type="text" />
  332 + </post>
  333 + </token>
  334 + </mapping>
  335 + <mapping type="wordAnnotation" name="feat.persoon">
  336 + <token type="string" offset="false" parent="false">
  337 + <pre>
  338 + <item type="name" />
  339 + </pre>
  340 + <post>
  341 + <item type="text" />
  342 + </post>
  343 + </token>
  344 + </mapping>
  345 + <mapping type="wordAnnotation" name="feat.ntype">
  346 + <token type="string" offset="false" parent="false">
  347 + <pre>
  348 + <item type="name" />
  349 + </pre>
  350 + <post>
  351 + <item type="text" />
  352 + </post>
  353 + </token>
  354 + </mapping>
  355 + <mapping type="wordAnnotation" name="feat.pvtijd">
  356 + <token type="string" offset="false" parent="false">
  357 + <pre>
  358 + <item type="name" />
  359 + </pre>
  360 + <post>
  361 + <item type="text" />
  362 + </post>
  363 + </token>
  364 + </mapping>
  365 + <mapping type="wordAnnotation" name="feat.wvorm">
  366 + <token type="string" offset="false" parent="false">
  367 + <pre>
  368 + <item type="name" />
  369 + </pre>
  370 + <post>
  371 + <item type="text" />
  372 + </post>
  373 + </token>
  374 + </mapping>
  375 + <mapping type="wordAnnotation" name="feat.numtype">
  376 + <token type="string" offset="false" parent="false">
  377 + <pre>
  378 + <item type="name" />
  379 + </pre>
  380 + <post>
  381 + <item type="text" />
  382 + </post>
  383 + </token>
  384 + </mapping>
  385 + <mapping type="wordAnnotation" name="feat.vwtype">
  386 + <token type="string" offset="false" parent="false">
  387 + <pre>
  388 + <item type="name" />
  389 + </pre>
  390 + <post>
  391 + <item type="text" />
  392 + </post>
  393 + </token>
  394 + </mapping>
  395 + <mapping type="wordAnnotation" name="feat.lwtype">
  396 + <token type="string" offset="false" parent="false">
  397 + <pre>
  398 + <item type="name" />
  399 + </pre>
  400 + <post>
  401 + <item type="text" />
  402 + </post>
  403 + </token>
  404 + </mapping>
  405 + <mapping type="wordAnnotation" name="feat.probleemgeval">
  406 + <token type="string" offset="false" parent="false">
  407 + <pre>
  408 + <item type="name" />
  409 + </pre>
  410 + <post>
  411 + <item type="text" />
  412 + </post>
  413 + </token>
  414 + </mapping>
  415 + </mappings>
  416 +
  417 + <functions>
  418 + <function name="4" split="+">
  419 +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90
285 420 <condition value="000,001,002,003,004,005,006,009">
286 421 <output name="pos" value="N" />
287 422 <output name="feat.getal" value="ev" />
288 423 </condition>
289 424 <condition value="010,011,012,013,014,015,016,019">
  425 +<<<<<<< HEAD
290 426 <output name="pos" value="N" />
291 427 <output name="feat.getal" value="mv" />
292 428 </condition>
... ... @@ -602,6 +738,167 @@
602 738 <output name="feat.form" value="unclear" />
603 739 </condition>
604 740  
  741 +=======
  742 + <output name="pos" value="N" />
  743 + <output name="feat.getal" value="mv" />
  744 + </condition>
  745 + <condition value="020,021,022,023,024,025,026,029">
  746 + <output name="pos" value="N" />
  747 + <output name="feat.ntype" value="eigen" />
  748 + </condition>
  749 + <condition value="090,091,092,093,094,095,096,099">
  750 + <output name="pos" value="N" />
  751 + <output name="feat.probleemgeval" />
  752 + </condition>
  753 + <condition value="100,101,102,103,104,105,106,109">
  754 + <output name="pos" value="ADJ" />
  755 + <output name="feat.getal" value="ev" />
  756 + </condition>
  757 + <condition value="110,111,112,113,114,115,116,119">
  758 + <output name="pos" value="ADJ" />
  759 + <output name="feat.getal" value="mv" />
  760 + </condition>
  761 + <condition value="190,191,192,193,194,195,196,199">
  762 + <output name="pos" value="ADJ" />
  763 + <output name="feat.probleemgeval" />
  764 + </condition>
  765 +
  766 +
  767 + <condition value="200,201,202,203,204,205,206,209">
  768 + <output name="pos" value="WW" />
  769 + <output name="feat.pvtijd" value="tgw" />
  770 + </condition>
  771 + <condition value="210,211,212,213,214,215,216,219">
  772 + <output name="pos" value="WW" />
  773 + <output name="feat.pvtijd" value="tgw" />
  774 + </condition>
  775 + <condition value="220,221,222,223,224,225,226,229">
  776 + <output name="pos" value="WW" />
  777 + <output name="feat.pvtijd" value="verl" />
  778 + </condition>
  779 + <condition value="230,231,232,233,234,235,236,239">
  780 + <output name="pos" value="WW" />
  781 + <output name="feat.pvtijd" value="verl" />
  782 + </condition>
  783 + <condition value="240,241,242,243,244,245,246,249">
  784 + <output name="pos" value="WW" />
  785 + </condition>
  786 + <condition value="250,251,252,253,254,255,256,259">
  787 + <output name="pos" value="WW" />
  788 + <output name="feat.wvorm" value="inf" />
  789 + </condition> <condition value="260,261,262,263,264,265,266,269">
  790 + <output name="pos" value="WW" />
  791 + <output name="feat.wvorm" value="inf" />
  792 + </condition> <condition value="270,271,272,273,274,275,276,279">
  793 + <output name="pos" value="WW" />
  794 + </condition> <condition value="280,281,282,283,284,285,286,289">
  795 + <output name="pos" value="WW" />
  796 + </condition>
  797 + <condition value="290,291,292,293,294,295,296,299">
  798 + <output name="pos" value="WW" />
  799 + <output name="feat.probleemgeval" />
  800 + </condition>
  801 +
  802 +
  803 + <condition value="300,301,302,303,304,305,306,309">
  804 + <output name="pos" value="TW" />
  805 + <output name="feat.numtype" value="hoofd" />
  806 + </condition>
  807 + <condition value="310,311,312,313,314,315,316,319">
  808 + <output name="pos" value="TW" />
  809 + <output name="feat.numtype" value="rang" />
  810 + </condition>
  811 + <condition value="320,321,322,323,324,325,326,329">
  812 + <output name="pos" value="TW" />
  813 + </condition>
  814 + <condition value="390,391,392,393,394,395,396,399">
  815 + <output name="pos" value="TW" />
  816 + <output name="feat.probleemgeval" />
  817 + </condition>
  818 +
  819 + <condition value="401">
  820 + <output name="pos" value="VNW" />
  821 + <output name="feat.getal" value="ev" />
  822 + <output name="feat.persoon" value="1" />
  823 + </condition>
  824 + <condition value="402">
  825 + <output name="pos" value="VNW" />
  826 + <output name="feat.getal" value="ev" />
  827 + <output name="feat.persoon" value="2" />
  828 + </condition>
  829 + <condition value="403">
  830 + <output name="pos" value="VNW" />
  831 + <output name="feat.getal" value="ev" />
  832 + <output name="feat.persoon" value="3" />
  833 + </condition>
  834 + <condition value="404">
  835 + <output name="pos" value="VNW" />
  836 + <output name="feat.getal" value="mv" />
  837 + <output name="feat.persoon" value="1" />
  838 + </condition>
  839 + <condition value="405">
  840 + <output name="pos" value="VNW" />
  841 + <output name="feat.getal" value="mv" />
  842 + <output name="feat.persoon" value="2" />
  843 + </condition>
  844 + <condition value="406">
  845 + <output name="pos" value="VNW" />
  846 + <output name="feat.getal" value="mv" />
  847 + <output name="feat.persoon" value="3" />
  848 + </condition>
  849 + <condition value="409">
  850 + <output name="pos" value="VNW" />
  851 + <output name="feat.probleemgeval" />
  852 + </condition>
  853 + <condition value="410,411,412,413,414,415,416,419">
  854 + <output name="pos" value="VNW" />
  855 + <output name="feat.vwtype" value="aanw" />
  856 + </condition>
  857 + <condition value="420,421,422,423,424,425,426,429">
  858 + <output name="pos" value="VNW" />
  859 + <output name="feat.vwtype" value="betr" />
  860 + </condition>
  861 + <condition value="430,431,432,433,434,435,436,439">
  862 + <output name="pos" value="VNW" />
  863 + <output name="feat.vwtype" value="vb" />
  864 + </condition>
  865 + <condition value="434,441,442,443,444,445,446,449">
  866 + <output name="pos" value="VNW" />
  867 + <output name="feat.vwtype" value="vb" />
  868 + </condition>
  869 + <condition value="440,441,442,443,444,445,446,449">
  870 + <output name="pos" value="VNW" />
  871 + <output name="feat.lwtype" value="onbep" />
  872 + </condition>
  873 + <condition value="450,451,452,453,454,455,456,459">
  874 + <output name="pos" value="VNW" />
  875 + <output name="feat.vwtype" value="bez" />
  876 + </condition>
  877 +
  878 +
  879 + <condition value="001,011,021,091">
  880 + <output name="feat.form" value="-e" />
  881 + </condition>
  882 + <condition value="002,012,022,092">
  883 + <output name="feat.form" value="-s/-th" />
  884 + </condition>
  885 + <condition value="003,013,023,092">
  886 + <output name="feat.form" value="-t" />
  887 + </condition>
  888 + <condition value="004,014,024,092">
  889 + <output name="feat.form" value="-n" />
  890 + </condition>
  891 + <condition value="005,015,025,095">
  892 + <output name="feat.form" value="-r/-re" />
  893 + </condition>
  894 + <condition value="006,016,026,096">
  895 + <output name="feat.form" value="-a" />
  896 + </condition>
  897 + <condition value="009,019,029,099">
  898 + <output name="feat.form" value="unclear" />
  899 + </condition>
  900 +
  901 +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90
605 902 </function>
606 903 </functions>
607 904  
... ...
conf/parser/mtas/folia_ddd.xml
... ... @@ -19,8 +19,12 @@
19 19  
20 20 <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
21 21 <autorepair value="true" />
  22 +<<<<<<< HEAD
22 23 <makeunique value="true" />
23 24 <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  25 +=======
  26 + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
  27 +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90
24 28  
25 29 <!-- START REFERENCES -->
26 30 <references>
... ...
1 1 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2 2 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3 3 <properties>
4   - <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  4 + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
5 5 <currentDevelopmentVersion>6.2.0</currentDevelopmentVersion>
6 6 <currentDevelopmentRelease>20160802</currentDevelopmentRelease>
7 7 </properties>
... ... @@ -84,7 +84,7 @@
84 84 <addMavenDescriptor>false</addMavenDescriptor>
85 85 </archive>
86 86 </configuration>
87   - </plugin>
  87 + </plugin>
88 88 </plugins>
89 89 </build>
90 90 <reporting>
... ... @@ -194,5 +194,4 @@
194 194 </dependency>
195 195 </dependencies>
196 196 </dependencyManagement>
197   -
198 197 </project>
199 198 \ No newline at end of file
... ...
src/mtas/analysis/MtasTokenizer.java
... ... @@ -92,7 +92,7 @@ public final class MtasTokenizer&lt;T&gt; extends Tokenizer {
92 92 * Signals that an I/O exception has occurred.
93 93 */
94 94 public MtasTokenizer(MtasConfiguration config) throws IOException {
95   - processConfiguration(config);
  95 + processConfiguration(config);
96 96 }
97 97  
98 98 /**
... ...
src/mtas/analysis/parser/MtasSketchParser.java
... ... @@ -41,7 +41,8 @@ final public class MtasSketchParser extends MtasBasicParser {
41 41 /**
42 42 * Instantiates a new mtas sketch parser.
43 43 *
44   - * @param config the config
  44 + * @param config
  45 + * the config
45 46 */
46 47 public MtasSketchParser(MtasConfiguration config) {
47 48 super(config);
... ... @@ -363,7 +364,8 @@ final public class MtasSketchParser extends MtasBasicParser {
363 364 /**
364 365 * Prints the config types.
365 366 *
366   - * @param types the types
  367 + * @param types
  368 + * the types
367 369 * @return the string
368 370 */
369 371 private String printConfigTypes(HashMap<?, MtasParserType> types) {
... ... @@ -378,15 +380,8 @@ final public class MtasSketchParser extends MtasBasicParser {
378 380 return text;
379 381 }
380 382  
381   - /**
382   - * The Class MtasSketchParserMappingWord.
383   - */
384 383 private class MtasSketchParserMappingWord
385 384 extends MtasParserMapping<MtasSketchParserMappingWord> {
386   -
387   - /**
388   - * Instantiates a new mtas sketch parser mapping word.
389   - */
390 385 public MtasSketchParserMappingWord() {
391 386 super();
392 387 this.position = SOURCE_OWN;
... ... @@ -394,18 +389,14 @@ final public class MtasSketchParser extends MtasBasicParser {
394 389 this.offset = SOURCE_OWN;
395 390 this.type = MAPPING_TYPE_WORD;
396 391 }
397   -
398   - /*
399   - * (non-Javadoc)
400   - *
401   - * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self()
402   - */
  392 +
403 393 @Override
404 394 protected MtasSketchParserMappingWord self() {
405 395 return this;
406 396 }
407 397 }
408 398  
  399 +
409 400 /**
410 401 * The Class MtasSketchParserMappingWordAnnotation.
411 402 */
... ...
src/mtas/analysis/token/MtasTokenCollection.java
1 1 package mtas.analysis.token;
2 2  
  3 +import java.io.IOException;
3 4 import java.util.ArrayList;
4 5 import java.util.Arrays;
5 6 import java.util.Collections;
... ... @@ -95,7 +96,7 @@ public class MtasTokenCollection {
95 96 Iterator<MtasToken<?>> it = this.iterator();
96 97 while (it.hasNext()) {
97 98 MtasToken<?> token = it.next();
98   - System.out.println(token);
  99 + System.out.println(token);
99 100 }
100 101 }
101 102  
... ... @@ -237,7 +238,7 @@ public class MtasTokenCollection {
237 238 trash.add(i);
238 239 } else if ((token.getPositionStart() == null)
239 240 || (token.getPositionEnd() == null)) {
240   - trash.add(i);
  241 + trash.add(i);
241 242 } else if (token.getValue() == null || (token.getValue().equals(""))) {
242 243 trash.add(i);
243 244 } else if (token.getPrefix() == null || (token.getPrefix().equals(""))) {
... ...
src/mtas/codec/MtasCodecPostingsFormat.java
... ... @@ -285,6 +285,10 @@ public class MtasCodecPostingsFormat extends PostingsFormat {
285 285 } catch (Exception e) {
286 286 throw new IOException(e.getMessage());
287 287 }
  288 + Long termRef = inObject.readVLong();
  289 + inTerm.seek(termRef);
  290 + token.setTermRef(termRef);
  291 + token.setValue(inTerm.readString());
288 292 return token;
289 293 }
290 294  
... ...
src/mtas/codec/MtasFieldsConsumer.java
... ... @@ -29,289 +29,21 @@ import org.apache.lucene.index.FieldInfo;
29 29 import org.apache.lucene.index.FieldInfos;
30 30 import org.apache.lucene.index.Fields;
31 31 import org.apache.lucene.index.IndexFileNames;
  32 +import org.apache.lucene.index.MergeState;
32 33 import org.apache.lucene.index.PostingsEnum;
33 34 import org.apache.lucene.index.SegmentWriteState;
34 35 import org.apache.lucene.index.Terms;
35 36 import org.apache.lucene.index.TermsEnum;
36 37 import org.apache.lucene.search.DocIdSetIterator;
  38 +import org.apache.lucene.store.IOContext;
37 39 import org.apache.lucene.store.IndexInput;
38 40 import org.apache.lucene.store.IndexOutput;
  41 +import org.apache.lucene.store.Lock;
39 42 import org.apache.lucene.util.BytesRef;
40 43 import org.apache.lucene.util.IOUtils;
41 44  
42 45 /**
43 46 * The Class MtasFieldsConsumer.
44   - *
45   - *
46   - * The Class MtasFieldsConsumer constructs several temporal and permanent files
47   - * to provide a forward index
48   - *
49   - * <ul>
50   - * <li><b>Temporary files</b><br>
51   - * <ul>
52   - * <li><b>Temporary file {@link #mtasTmpFieldFileName} with extension
53   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_FIELD_EXTENSION} </b><br>
54   - * Contains for each field a reference to the list of documents. Structure of
55   - * content:
56   - * <ul>
57   - * <li><b>String</b>: field</li>
58   - * <li><b>VLong</b>: reference to {@link #mtasDocFileName}</li>
59   - * <li><b>VInt</b>: number of documents</li>
60   - * <li><b>VLong</b>: reference to {@link #mtasTermFileName}</li>
61   - * <li><b>VInt</b>: number of terms</li>
62   - * <li><b>VLong</b>: reference to {@link #mtasPrefixFileName}</li>
63   - * <li><b>VInt</b>: number of prefixes</li>
64   - * </ul>
65   - * </li>
66   - * <li><b>Temporary file {@link #mtasTmpObjectFileName} with extension
67   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_OBJECT_EXTENSION}</b><br>
68   - * Contains for a specific field all objects constructed by
69   - * {@link createObjectAndRegisterPrefix}. For all fields, the objects are later
70   - * on copied to {@link #mtasObjectFileName} while statistics are collected.
71   - * Structure of content identical to {@link #mtasObjectFileName}.</li>
72   - * <li><b>Temporary file {@link #mtasTmpDocsFileName} with extension
73   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOCS_EXTENSION}</b> <br>
74   - * Contains for a specific field for each doc multiple fragments. Each occurring
75   - * term results in a fragment. Structure of content:
76   - * <ul>
77   - * <li><b>VInt</b>: docId</li>
78   - * <li><b>VInt</b>: number of objects in this fragment</li>
79   - * <li><b>VLong</b>: offset references to {@link #mtasTmpObjectFileName}</li>
80   - * <li><b>VInt</b>,<b>VLong</b>: mtasId object, reference temporary object in
81   - * {@link #mtasTmpObjectFileName} minus offset</li>
82   - * <li><b>VInt</b>,<b>VLong</b>: ...</li>
83   - * </ul>
84   - * </li>
85   - * <li><b>Temporary file {@link #mtasTmpDocsChainedFileName} with extension
86   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOCS_CHAINED_EXTENSION}
87   - * </b><br>
88   - * Contains for a specific field for each doc multiple chained fragments.
89   - * Structure of content:
90   - * <ul>
91   - * <li><b>VInt</b>: docId</li>
92   - * <li><b>VInt</b>: number of objects in this fragment</li>
93   - * <li><b>VLong</b>: offset references to {@link #mtasTmpObjectFileName}</li>
94   - * <li><b>VInt</b>,<b>VLong</b>: mtasId object, reference temporary object in
95   - * {@link #mtasTmpObjectFileName} minus offset</li>
96   - * <li><b>VInt</b>,<b>VLong</b>: ...</li>
97   - * <li><b>VLong</b>: reference to next fragment in
98   - * {@link #mtasTmpDocsChainedFileName}, self reference indicates end of chain
99   - * </ul>
100   - * </li>
101   - * <li><b>Temporary file {@link #mtasTmpDocFileName} with extension
102   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOC_EXTENSION}</b><br>
103   - * For each document
104   - * <ul>
105   - * <li><b>VInt</b>: docId</li>
106   - * <li><b>VLong</b>: reference to {@link #mtasIndexObjectIdFileName}</li>
107   - * <li><b>VLong</b>: reference first object, used as offset for tree index
108   - * <li><b>VInt</b>: slope used in approximation reference objects index on id
109   - * </li>
110   - * <li><b>ZLong</b>: offset used in approximation reference objects index on id
111   - * </li>
112   - * <li><b>Byte</b>: flag indicating how corrections on the approximation
113   - * references objects for the index on id are stored:
114   - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_BYTE},
115   - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_SHORT},
116   - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_INTEGER} or
117   - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_LONG}</li>
118   - * <li><b>VInt</b>: number of objects in this document</li>
119   - * <li><b>VInt</b>: first position</li>
120   - * <li><b>VInt</b>: last position</li>
121   - * </ul>
122   - * </li>
123   - * </ul>
124   - * </li>
125   - * <li><b>Final files</b><br>
126   - * <ul>
127   - * <li><b>File {@link #mtasIndexFieldFileName} with extension
128   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_FIELD_EXTENSION}</b><br>
129   - * Contains for each field a reference to the list of documents and the
130   - * prefixes. Structure of content:
131   - * <ul>
132   - * <li><b>String</b>: field</li>
133   - * <li><b>VLong</b>: reference to {@link #mtasDocFileName}</li>
134   - * <li><b>VLong</b>: reference to {@link #mtasIndexDocIdFileName}</li>
135   - * <li><b>VInt</b>: number of documents</li>
136   - * <li><b>VLong</b>: reference to {@link #mtasTermFileName}</li>
137   - * <li><b>VInt</b>: number of terms</li>
138   - * <li><b>VLong</b>: reference to {@link #mtasPrefixFileName}</li>
139   - * <li><b>VInt</b>: number of prefixes</li>
140   - * </ul>
141   - * </li>
142   - * <li><b>File {@link #mtasTermFileName} with extension
143   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TERM_EXTENSION}</b><br>
144   - * For each field, all unique terms are stored here. Structure of content:
145   - * <ul>
146   - * <li><b>String</b>: term</li>
147   - * </ul>
148   - * </li>
149   - * <li><b>File {@link #mtasPrefixFileName} with extension
150   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_PREFIX_EXTENSION}</b><br>
151   - * For each field, all unique prefixes are stored here. Structure of content:
152   - * <ul>
153   - * <li><b>String</b>: prefix</li>
154   - * </ul>
155   - * </li>
156   - * <li><b>File {@link #mtasObjectFileName} with extension
157   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_OBJECT_EXTENSION}</b><br>
158   - * Contains all objects for all fields. Structure of content:
159   - * <ul>
160   - * <li><b>VInt</b>: mtasId</li>
161   - * <li><b>VInt</b>: objectFlags
162   - * <ul>
163   - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PARENT}</li>
164   - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE}</li>
165   - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}</li>
166   - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_OFFSET}</li>
167   - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_REALOFFSET}</li>
168   - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PAYLOAD}</li>
169   - * </ul>
170   - * </li>
171   - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PARENT}<br>
172   - * <b>VInt</b>: parentId
173   - * <li>Only if
174   - * {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE}<br>
175   - * <b>VInt</b>,<b>VInt</b>: startPosition and (endPosition-startPosition)
176   - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}<br>
177   - * <b>VInt</b>,<b>VInt</b>,<b>VInt</b>,...: number of positions, firstPosition,
178   - * (position-previousPosition),...
179   - * <li>Only if no {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE}
180   - * or {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}<br>
181   - * <b>VInt</b>: position
182   - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_OFFSET}<br>
183   - * <b>VInt</b>,<b>VInt</b>: startOffset, (endOffset-startOffset)
184   - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_REALOFFSET}<br>
185   - * <b>VInt</b>,<b>VInt</b>: startRealOffset, (endRealOffset-startRealOffset)
186   - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PAYLOAD}<br>
187   - * <b>VInt</b>,<b>Bytes</b>: number of bytes, payload
188   - * <li><b>VLong</b>: reference to Term in {@link #mtasTermFileName}</li>
189   - * </ul>
190   - * </li>
191   - * <li><b>File {@link #mtasIndexDocIdFileName} with extension
192   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_DOC_ID_EXTENSION}
193   - * </b><br>
194   - * Contains for each field a tree structure {@link MtasTree} to search reference
195   - * to {@link #mtasDocFileName} by id. Structure of content for each node:
196   - * <ul>
197   - * <li><b>VLong</b>: offset references to {@link #mtasIndexDocIdFileName}, only
198   - * available in root node</li>
199   - * <li><b>Byte</b>: flag, should be zero for this tree, only available in root
200   - * node</li>
201   - * <li><b>VInt</b>: left</li>
202   - * <li><b>VInt</b>: right</li>
203   - * <li><b>VInt</b>: max</li>
204   - * <li><b>VLong</b>: left reference to {@link #mtasIndexDocIdFileName} minus the
205   - * offset stored in the root node</li>
206   - * <li><b>VLong</b>: right reference to {@link #mtasIndexDocIdFileName} minus
207   - * the offset stored in the root node</li>
208   - * <li><b>VInt</b>: number of objects on this node (always 1 for this tree)</li>
209   - * <li><b>VLong</b>: reference to {@link #mtasDocFileName} minus offset</li>
210   - * </ul>
211   - * </li>
212   - * <li><b>File {@link #mtasDocFileName} with extension
213   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_DOC_EXTENSION}</b><br>
214   - * For each document
215   - * <ul>
216   - * <li><b>VInt</b>: docId</li>
217   - * <li><b>VLong</b>: reference to {@link #mtasIndexObjectIdFileName}</li>
218   - * <li><b>VLong</b>: reference to {@link #mtasIndexObjectPositionFileName}</li>
219   - * <li><b>VLong</b>: reference to {@link #mtasIndexObjectParentFileName}</li>
220   - * <li><b>VLong</b>: reference first object, used as offset for tree index
221   - * <li><b>VInt</b>: slope used in approximation reference objects index on id
222   - * </li>
223   - * <li><b>ZLong</b>: offset used in approximation reference objects index on id
224   - * </li>
225   - * <li><b>Byte</b>: flag indicating how corrections on the approximation
226   - * references objects for the index on id are stored:
227   - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_BYTE},
228   - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_SHORT},
229   - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_INTEGER} or
230   - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_LONG}</li>
231   - * <li><b>VInt</b>: number of objects</li>
232   - * <li><b>VInt</b>: first position</li>
233   - * <li><b>VInt</b>: last position</li>
234   - * </ul>
235   - * </li>
236   - * <li><b>File {@link #mtasIndexObjectIdFileName} with extension
237   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_ID_EXTENSION}
238   - * </b><br>
239   - * Provides for each mtasId the reference to {@link #mtasObjectFileName}. These
240   - * references are grouped by document, sorted by mtasId, and because the
241   - * mtasId's for each document will always start with 0 and are sequential
242   - * without gaps, a reference can be computed if the position of the first
243   - * reference for a document is known from {@link #mtasDocFileName}. The
244   - * reference is approximated by the reference to the first object plus the
245   - * mtasId times a slope. Only a correction to this approximation is stored.
246   - * Structure of content:
247   - * <ul>
248   - * <li><b>Byte</b>/<b>Short</b>/<b>Int</b>/<b>Long</b>: correction reference to
249   - * {@link #mtasObjectFileName}</li>
250   - * </ul>
251   - * </li>
252   - * <li><b>File {@link #mtasIndexObjectPositionFileName} with extension
253   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_POSITION_EXTENSION}
254   - * </b><br>
255   - * Contains for each document a tree structure {@link MtasTree} to search
256   - * objects by position. Structure of content for each node:
257   - * <ul>
258   - * <li><b>VLong</b>: offset references to
259   - * {@link #mtasIndexObjectPositionFileName}, only available in root node</li>
260   - * <li><b>Byte</b>: flag, should be zero for this tree, only available in root
261   - * node</li>
262   - * <li><b>VInt</b>: left</li>
263   - * <li><b>VInt</b>: right</li>
264   - * <li><b>VInt</b>: max</li>
265   - * <li><b>VLong</b>: left reference to {@link #mtasIndexObjectPositionFileName}
266   - * minus the offset stored in the root node</li>
267   - * <li><b>VLong</b>: right reference to {@link #mtasIndexObjectPositionFileName}
268   - * minus the offset stored in the root node</li>
269   - * <li><b>VInt</b>: number of objects on this node</li>
270   - * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>: set of the first reference to
271   - * {@link #mtasObjectFileName} minus offset, the prefixId referring to the
272   - * position the prefix in {@link #mtasPrefixFileName} and the reference to
273   - * {@link #mtasTermFileName} minus offset</li>
274   - * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>,...: for optional other sets of
275   - * reference to {@link #mtasObjectFileName}, position of the prefix in
276   - * {@link #mtasPrefixFileName} and the reference to {@link #mtasTermFileName};
277   - * for the first item the difference between this reference minus the previous
278   - * reference is stored</li>
279   - * </ul>
280   - * </li>
281   - * <li><b>File {@link #mtasIndexObjectParentFileName} with extension
282   - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_PARENT_EXTENSION}
283   - * </b><br>
284   - * Contains for each document a tree structure {@link MtasTree} to search
285   - * objects by parent. Structure of content for each node:
286   - * <ul>
287   - * <li><b>VLong</b>: offset references to {@link #mtasIndexObjectParentFileName}
288   - * , only available in root node</li>
289   - * <li><b>Byte</b>: flag, for this tree equal to
290   - * {@link mtas.codec.tree.MtasTree#SINGLE_POSITION_TREE} indicating a tree with
291   - * exactly one point at each node, only available in root node</li>
292   - * <li><b>VInt</b>: left</li>
293   - * <li><b>VInt</b>: right</li>
294   - * <li><b>VInt</b>: max</li>
295   - * <li><b>VLong</b>: left reference to {@link #mtasIndexObjectParentFileName}
296   - * minus the offset stored in the root node</li>
297   - * <li><b>VLong</b>: right reference to {@link #mtasIndexObjectParentFileName}
298   - * minus the offset stored in the root node</li>
299   - * <li><b>VInt</b>: number of objects on this node</li>
300   - * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>: set of the first reference to
301   - * {@link #mtasObjectFileName} minus offset, the prefixId referring to the
302   - * position the prefix in {@link #mtasPrefixFileName} and the reference to
303   - * {@link #mtasTermFileName} minus offset</li>
304   - * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>,...: for optional other sets of
305   - * reference to {@link #mtasObjectFileName}, position of the prefix in
306   - * {@link #mtasPrefixFileName} and the reference to {@link #mtasTermFileName};
307   - * for the first item the difference between this reference minus the previous
308   - * reference is stored</li>
309   - * </ul>
310   - * </li>
311   - * </ul>
312   - * </li>
313   - * </ul>
314   - *
315 47 */
316 48  
317 49 public class MtasFieldsConsumer extends FieldsConsumer {
... ... @@ -362,14 +94,10 @@ public class MtasFieldsConsumer extends FieldsConsumer {
362 94 /**
363 95 * Instantiates a new mtas fields consumer.
364 96 *
365   - * @param fieldsConsumer
366   - * the fields consumer
367   - * @param state
368   - * the state
369   - * @param name
370   - * the name
371   - * @param delegatePostingsFormatName
372   - * the delegate postings format name
  97 + * @param fieldsConsumer the fields consumer
  98 + * @param state the state
  99 + * @param name the name
  100 + * @param delegatePostingsFormatName the delegate postings format name
373 101 */
374 102 public MtasFieldsConsumer(FieldsConsumer fieldsConsumer,
375 103 SegmentWriteState state, String name, String delegatePostingsFormatName) {
... ... @@ -426,14 +154,10 @@ public class MtasFieldsConsumer extends FieldsConsumer {
426 154 /**
427 155 * Register prefix.
428 156 *
429   - * @param field
430   - * the field
431   - * @param prefix
432   - * the prefix
433   - * @param outPrefix
434   - * the out prefix
435   - * @throws IOException
436   - * Signals that an I/O exception has occurred.
  157 + * @param field the field
  158 + * @param prefix the prefix
  159 + * @param outPrefix the out prefix
  160 + * @throws IOException Signals that an I/O exception has occurred.
437 161 */
438 162 private void registerPrefix(String field, String prefix,
439 163 IndexOutput outPrefix) throws IOException {
... ... @@ -452,14 +176,10 @@ public class MtasFieldsConsumer extends FieldsConsumer {
452 176 /**
453 177 * Register prefix stats single position value.
454 178 *
455   - * @param field
456   - * the field
457   - * @param value
458   - * the value
459   - * @param outPrefix
460   - * the out prefix
461   - * @throws IOException
462   - * Signals that an I/O exception has occurred.
  179 + * @param field the field
  180 + * @param value the value
  181 + * @param outPrefix the out prefix
  182 + * @throws IOException Signals that an I/O exception has occurred.
463 183 */
464 184 public void registerPrefixStatsSinglePositionValue(String field, String value,
465 185 IndexOutput outPrefix) throws IOException {
... ... @@ -474,14 +194,10 @@ public class MtasFieldsConsumer extends FieldsConsumer {
474 194 /**
475 195 * Register prefix stats range position value.
476 196 *
477   - * @param field
478   - * the field
479   - * @param value
480   - * the value
481   - * @param outPrefix
482   - * the out prefix
483   - * @throws IOException
484   - * Signals that an I/O exception has occurred.
  197 + * @param field the field
  198 + * @param value the value
  199 + * @param outPrefix the out prefix
  200 + * @throws IOException Signals that an I/O exception has occurred.
485 201 */
486 202 public void registerPrefixStatsRangePositionValue(String field, String value,
487 203 IndexOutput outPrefix) throws IOException {
... ... @@ -495,14 +211,10 @@ public class MtasFieldsConsumer extends FieldsConsumer {
495 211 /**
496 212 * Register prefix stats set position value.
497 213 *
498   - * @param field
499   - * the field
500   - * @param value
501   - * the value
502   - * @param outPrefix
503   - * the out prefix
504   - * @throws IOException
505   - * Signals that an I/O exception has occurred.
  214 + * @param field the field
  215 + * @param value the value
  216 + * @param outPrefix the out prefix
  217 + * @throws IOException Signals that an I/O exception has occurred.
506 218 */
507 219 public void registerPrefixStatsSetPositionValue(String field, String value,
508 220 IndexOutput outPrefix) throws IOException {
... ... @@ -517,8 +229,7 @@ public class MtasFieldsConsumer extends FieldsConsumer {
517 229 /**
518 230 * Inits the prefix stats field.
519 231 *
520   - * @param field
521   - * the field
  232 + * @param field the field
522 233 */
523 234 private void initPrefixStatsField(String field) {
524 235 if (!singlePositionPrefix.containsKey(field)) {
... ... @@ -535,8 +246,7 @@ public class MtasFieldsConsumer extends FieldsConsumer {
535 246 /**
536 247 * Gets the prefix stats single position prefix attribute.
537 248 *
538   - * @param field
539   - * the field
  249 + * @param field the field
540 250 * @return the prefix stats single position prefix attribute
541 251 */
542 252 public String getPrefixStatsSinglePositionPrefixAttribute(String field) {
... ... @@ -547,8 +257,7 @@ public class MtasFieldsConsumer extends FieldsConsumer {
547 257 /**
548 258 * Gets the prefix stats multiple position prefix attribute.
549 259 *
550   - * @param field
551   - * the field
  260 + * @param field the field
552 261 * @return the prefix stats multiple position prefix attribute
553 262 */
554 263 public String getPrefixStatsMultiplePositionPrefixAttribute(String field) {
... ... @@ -559,8 +268,7 @@ public class MtasFieldsConsumer extends FieldsConsumer {
559 268 /**
560 269 * Gets the prefix stats set position prefix attribute.
561 270 *
562   - * @param field
563   - * the field
  271 + * @param field the field
564 272 * @return the prefix stats set position prefix attribute
565 273 */
566 274 public String getPrefixStatsSetPositionPrefixAttribute(String field) {
... ... @@ -585,6 +293,14 @@ public class MtasFieldsConsumer extends FieldsConsumer {
585 293 return text;
586 294 }
587 295  
  296 + /* (non-Javadoc)
  297 + * @see org.apache.lucene.codecs.FieldsConsumer#merge(org.apache.lucene.index.MergeState)
  298 + */
  299 + @Override
  300 + public void merge(MergeState mergeState) throws IOException {
  301 + delegateFieldsConsumer.merge(mergeState);
  302 + }
  303 +
588 304 /*
589 305 * (non-Javadoc)
590 306 *
... ... @@ -600,12 +316,9 @@ public class MtasFieldsConsumer extends FieldsConsumer {
600 316 /**
601 317 * Write.
602 318 *
603   - * @param fieldInfos
604   - * the field infos
605   - * @param fields
606   - * the fields
607   - * @throws IOException
608   - * Signals that an I/O exception has occurred.
  319 + * @param fieldInfos the field infos
  320 + * @param fields the fields
  321 + * @throws IOException Signals that an I/O exception has occurred.
609 322 */
610 323 private void write(FieldInfos fieldInfos, Fields fields) {
611 324 IndexOutput outField, outDoc, outIndexDocId, outIndexObjectId,
... ... @@ -1258,27 +971,17 @@ public class MtasFieldsConsumer extends FieldsConsumer {
1258 971 /**
1259 972 * Creates the object and register prefix.
1260 973 *
1261   - * @param field
1262   - * the field
1263   - * @param out
1264   - * the out
1265   - * @param term
1266   - * the term
1267   - * @param termRef
1268   - * the term ref
1269   - * @param startPosition
1270   - * the start position
1271   - * @param payload
1272   - * the payload
1273   - * @param startOffset
1274   - * the start offset
1275   - * @param endOffset
1276   - * the end offset
1277   - * @param outPrefix
1278   - * the out prefix
  974 + * @param field the field
  975 + * @param out the out
  976 + * @param term the term
  977 + * @param termRef the term ref
  978 + * @param startPosition the start position
  979 + * @param payload the payload
  980 + * @param startOffset the start offset
  981 + * @param endOffset the end offset
  982 + * @param outPrefix the out prefix
1279 983 * @return the integer
1280   - * @throws IOException
1281   - * Signals that an I/O exception has occurred.
  984 + * @throws IOException Signals that an I/O exception has occurred.
1282 985 */
1283 986 private Integer createObjectAndRegisterPrefix(String field, IndexOutput out,
1284 987 BytesRef term, Long termRef, int startPosition, BytesRef payload,
... ... @@ -1392,15 +1095,11 @@ public class MtasFieldsConsumer extends FieldsConsumer {
1392 1095 /**
1393 1096 * Store tree.
1394 1097 *
1395   - * @param tree
1396   - * the tree
1397   - * @param out
1398   - * the out
1399   - * @param refApproxOffset
1400   - * the ref approx offset
  1098 + * @param tree the tree
  1099 + * @param out the out
  1100 + * @param refApproxOffset the ref approx offset
1401 1101 * @return the long
1402   - * @throws IOException
1403   - * Signals that an I/O exception has occurred.
  1102 + * @throws IOException Signals that an I/O exception has occurred.
1404 1103 */
1405 1104 private Long storeTree(MtasTree<?> tree, IndexOutput out,
1406 1105 long refApproxOffset) throws IOException {
... ... @@ -1411,21 +1110,14 @@ public class MtasFieldsConsumer extends FieldsConsumer {
1411 1110 /**
1412 1111 * Store tree.
1413 1112 *
1414   - * @param node
1415   - * the node
1416   - * @param isSinglePoint
1417   - * the is single point
1418   - * @param storeAdditionalInformation
1419   - * the store additional information
1420   - * @param out
1421   - * the out
1422   - * @param nodeRefApproxOffset
1423   - * the node ref approx offset
1424   - * @param refApproxOffset
1425   - * the ref approx offset
  1113 + * @param node the node
  1114 + * @param isSinglePoint the is single point
  1115 + * @param storeAdditionalInformation the store additional information
  1116 + * @param out the out
  1117 + * @param nodeRefApproxOffset the node ref approx offset
  1118 + * @param refApproxOffset the ref approx offset
1426 1119 * @return the long
1427   - * @throws IOException
1428   - * Signals that an I/O exception has occurred.
  1120 + * @throws IOException Signals that an I/O exception has occurred.
1429 1121 */
1430 1122 private Long storeTree(MtasTreeNode<?> node, boolean isSinglePoint,
1431 1123 boolean storeAdditionalInformation, IndexOutput out,
... ... @@ -1510,10 +1202,8 @@ public class MtasFieldsConsumer extends FieldsConsumer {
1510 1202 /**
1511 1203 * Token stats add.
1512 1204 *
1513   - * @param min
1514   - * the min
1515   - * @param max
1516   - * the max
  1205 + * @param min the min
  1206 + * @param max the max
1517 1207 */
1518 1208 private void tokenStatsAdd(int min, int max) {
1519 1209 tokenStatsNumber++;
... ... @@ -1532,16 +1222,11 @@ public class MtasFieldsConsumer extends FieldsConsumer {
1532 1222 /**
1533 1223 * Copy object and update stats.
1534 1224 *
1535   - * @param id
1536   - * the id
1537   - * @param in
1538   - * the in
1539   - * @param inRef
1540   - * the in ref
1541   - * @param out
1542   - * the out
1543   - * @throws IOException
1544   - * Signals that an I/O exception has occurred.
  1225 + * @param id the id
  1226 + * @param in the in
  1227 + * @param inRef the in ref
  1228 + * @param out the out
  1229 + * @throws IOException Signals that an I/O exception has occurred.
1545 1230 */
1546 1231 private void copyObjectAndUpdateStats(int id, IndexInput in, Long inRef,
1547 1232 IndexOutput out) throws IOException {
... ...
src/mtas/codec/util/CodecCollector.java
... ... @@ -2138,8 +2138,9 @@ public class CodecCollector {
2138 2138 .checkExistenceNecessaryKeys()) {
2139 2139 needSecondRound = true;
2140 2140 }
2141   - }
2142   - }
  2141 + termVector.subComponentFunction.dataCollector.reduceToSegmentKeys();
  2142 + }
  2143 + }
2143 2144 return needSecondRound;
2144 2145 }
2145 2146  
... ...
src/mtas/codec/util/CodecComponent.java
... ... @@ -23,7 +23,10 @@ import mtas.parser.function.util.MtasFunctionParserFunction;
23 23 import mtas.parser.function.util.MtasFunctionParserFunctionDefault;
24 24 import org.apache.commons.lang.ArrayUtils;
25 25 import org.apache.lucene.search.spans.SpanQuery;
  26 +import org.apache.lucene.util.automaton.Automata;
  27 +import org.apache.lucene.util.automaton.Automaton;
26 28 import org.apache.lucene.util.automaton.CompiledAutomaton;
  29 +import org.apache.lucene.util.automaton.Operations;
27 30 import org.apache.lucene.util.automaton.RegExp;
28 31  
29 32 /**
... ...
src/mtas/codec/util/CodecInfo.java
... ... @@ -274,7 +274,7 @@ public class CodecInfo {
274 274 IndexInput inTerm = indexInputList.get("term");
275 275 for (MtasTreeHit<?> hit : hits) {
276 276 MtasToken<String> token = MtasCodecPostingsFormat.getToken(inObject,
277   - inTerm, hit.ref);
  277 + inTerm, hit.ref);
278 278 if (token != null) {
279 279 if (prefixes.size() > 0) {
280 280 if (prefixes.contains(token.getPrefix())) {
... ...
src/mtas/codec/util/DataCollector.java
... ... @@ -151,4 +151,6 @@ public class DataCollector {
151 151 throw new IOException("unknown dataType " + dataType);
152 152 }
153 153 }
  154 +
  155 +
154 156 }
... ...
src/mtas/codec/util/collector/MtasDataAdvanced.java
... ... @@ -201,6 +201,32 @@ abstract class MtasDataAdvanced&lt;T1 extends Number &amp; Comparable&lt;T1&gt;, T2 extends N
201 201 tmpOldSize);
202 202 }
203 203  
  204 + public void reduceToSegmentKeys() {
  205 + if(segmentRegistration != null) {
  206 + int sizeCopy = size;
  207 + String[] keyListCopy = keyList.clone();
  208 + T1[] advancedValueSumListCopy = advancedValueSumList.clone();
  209 + T1[] advancedValueMaxListCopy = advancedValueMaxList.clone();
  210 + T1[] advancedValueMinListCopy = advancedValueMinList.clone();
  211 + T1[] advancedValueSumOfSquaresListCopy = advancedValueSumOfSquaresList.clone();
  212 + T2[] advancedValueSumOfLogsListCopy = advancedValueSumOfLogsList.clone();
  213 + long[] advancedValueNListCopy = advancedValueNList.clone();
  214 + size = 0;
  215 + for(int i=0; i< sizeCopy; i++) {
  216 + if(segmentKeys.contains(keyListCopy[i])) {
  217 + keyList[size] = keyListCopy[i];
  218 + advancedValueSumList[size] = advancedValueSumListCopy[i];
  219 + advancedValueMaxList[size] = advancedValueMaxListCopy[i];
  220 + advancedValueMinList[size] = advancedValueMinListCopy[i];
  221 + advancedValueSumOfSquaresList[size] = advancedValueSumOfSquaresListCopy[i];
  222 + advancedValueSumOfLogsList[size] = advancedValueSumOfLogsListCopy[i];
  223 + advancedValueNList[size] = advancedValueNListCopy[i];
  224 + size++;
  225 + }
  226 + }
  227 + }
  228 + }
  229 +
204 230 /*
205 231 * (non-Javadoc)
206 232 *
... ...
src/mtas/codec/util/collector/MtasDataBasic.java
... ... @@ -228,6 +228,24 @@ abstract class MtasDataBasic&lt;T1 extends Number &amp; Comparable&lt;T1&gt;, T2 extends Numb
228 228 tmpOldSize);
229 229 }
230 230  
  231 + public void reduceToSegmentKeys() {
  232 + if (segmentRegistration != null) {
  233 + int sizeCopy = size;
  234 + String[] keyListCopy = keyList.clone();
  235 + T1[] basicValueSumListCopy = basicValueSumList.clone();
  236 + long[] basicValueNListCopy = basicValueNList.clone();
  237 + size = 0;
  238 + for (int i = 0; i < sizeCopy; i++) {
  239 + if (segmentKeys.contains(keyListCopy[i])) {
  240 + keyList[size] = keyListCopy[i];
  241 + basicValueSumList[size] = basicValueSumListCopy[i];
  242 + basicValueNList[size] = basicValueNListCopy[i];
  243 + size++;
  244 + }
  245 + }
  246 + }
  247 + }
  248 +
231 249 /*
232 250 * (non-Javadoc)
233 251 *
... ...
src/mtas/codec/util/collector/MtasDataCollector.java
... ... @@ -891,7 +891,7 @@ public abstract class MtasDataCollector&lt;T1 extends Number &amp; Comparable&lt;T1&gt;, T2 e
891 891 /**
892 892 * Reduce to segment keys.
893 893 */
894   - public final void reduceToSegmentKeys() {
  894 + public void reduceToSegmentKeys() {
895 895 if (segmentRegistration != null) {
896 896 reduceToKeys(segmentKeys);
897 897 }
... ...
src/mtas/codec/util/collector/MtasDataFull.java
... ... @@ -158,6 +158,22 @@ abstract class MtasDataFull&lt;T1 extends Number &amp; Comparable&lt;T1&gt;, T2 extends Numbe
158 158 System.arraycopy(tmpNewFullValueList, 0, newFullValueList, 0, tmpOldSize);
159 159 }
160 160  
  161 + public void reduceToSegmentKeys() {
  162 + if(segmentRegistration != null) {
  163 + int sizeCopy = size;
  164 + String[] keyListCopy = keyList.clone();
  165 + T1[][] fullValueListCopy = fullValueList.clone();
  166 + size = 0;
  167 + for(int i=0; i< sizeCopy; i++) {
  168 + if(segmentKeys.contains(keyListCopy[i])) {
  169 + keyList[size] = keyListCopy[i];
  170 + fullValueList[size] = fullValueListCopy[i];
  171 + size++;
  172 + }
  173 + }
  174 + }
  175 + }
  176 +
161 177 /*
162 178 * (non-Javadoc)
163 179 *
... ...
src/mtas/codec/util/collector/MtasDataItem.java
... ... @@ -38,7 +38,7 @@ public abstract class MtasDataItem&lt;T1 extends Number &amp; Comparable&lt;T1&gt;, T2 extend
38 38  
39 39 /** The error list. */
40 40 protected HashMap<String, Integer> errorList;
41   -
  41 +
42 42 /** The comparable sort value. */
43 43 protected NumberComparator<?> comparableSortValue;
44 44  
... ... @@ -101,7 +101,7 @@ public abstract class MtasDataItem&lt;T1 extends Number &amp; Comparable&lt;T1&gt;, T2 extend
101 101 */
102 102 public abstract Map<String, Object> rewrite(boolean showDebugInfo)
103 103 throws IOException;
104   -
  104 +
105 105 /**
106 106 * Gets the sub.
107 107 *
... ...
src/mtas/parser/cql/util/MtasCQLParserSentenceCondition.java
... ... @@ -47,7 +47,7 @@ public class MtasCQLParserSentenceCondition {
47 47 public MtasCQLParserSentenceCondition(MtasCQLParserBasicSentenceCondition s)
48 48 throws ParseException {
49 49 sequenceList = new ArrayList<List<MtasCQLParserSentenceCondition>>();
50   - basicSentence = s;
  50 + basicSentence = s;
51 51 minimumOccurence = 1;
52 52 maximumOccurence = 1;
53 53 simplified = false;
... ...
src/mtas/solr/handler/component/MtasSolrSearchComponent.java
... ... @@ -436,8 +436,8 @@ public class MtasSolrSearchComponent extends SearchComponent {
436 436 * (non-Javadoc)
437 437 *
438 438 * @see
439   - * org.apache.solr.handler.component.SearchComponent#finishStage(org.apache.
440   - * solr.handler.component.ResponseBuilder)
  439 + * org.apache.solr.handler.component.SearchComponent#distributedProcess(org.
  440 + * apache.solr.handler.component.ResponseBuilder)
441 441 */
442 442 @Override
443 443 public void finishStage(ResponseBuilder rb) {
... ... @@ -549,7 +549,6 @@ public class MtasSolrSearchComponent extends SearchComponent {
549 549 return STAGE_GROUP;
550 550 }
551 551 }
552   -
553 552 }
554 553 return ResponseBuilder.STAGE_DONE;
555 554 }
... ... @@ -562,15 +561,9 @@ public class MtasSolrSearchComponent extends SearchComponent {
562 561 * @return the mtas fields
563 562 */
564 563  
565   - /**
566   - * Gets the mtas fields.
567   - *
568   - * @param rb
569   - * the rb
570   - * @return the mtas fields
571   - */
572 564 private ComponentFields getMtasFields(ResponseBuilder rb) {
573 565 return (ComponentFields) rb.req.getContext().get(ComponentFields.class);
574 566 }
575 567  
  568 +
576 569 }
... ...
src/site/markdown/download.md.vm
... ... @@ -13,10 +13,10 @@
13 13 <tr>
14 14 <td>$context.get("currentDevelopmentVersion")</td>
15 15 <td>$context.get("currentDevelopmentRelease")</td>
16   - <td><a href='https://github.com/meertensinstituut/mtas/releases/download/${currentDevelopmentRelease}/mtas-${currentDevelopmentVersion}.jar'>Binary (jar)</a></td>
17   - <td><a href='https://github.com/meertensinstituut/mtas/archive/${currentDevelopmentRelease}.tar.gz'>Source (tgz)</a></td>
18   - <td><a href='https://github.com/meertensinstituut/mtas/archive/${currentDevelopmentRelease}.zip'>Source (zip)</a></td>
  16 + <td><a href='https://github.com/matthijsbrouwer/mtas/releases/download/${currentDevelopmentRelease}/mtas-${currentDevelopmentVersion}.jar'>Binary (jar)</a></td>
  17 + <td><a href='https://github.com/matthijsbrouwer/mtas/archive/${currentDevelopmentRelease}.tar.gz'>Source (tgz)</a></td>
  18 + <td><a href='https://github.com/matthijsbrouwer/mtas/archive/${currentDevelopmentRelease}.zip'>Source (zip)</a></td>
19 19 <td>Development version</td>
20 20 </tr>
21 21 </tbody>
22 22 -</table>
  23 +</table>
23 24 \ No newline at end of file
... ...
src/site/markdown/installation.md 0 → 100644
  1 +#Getting started
  2 +
  3 +
... ...
src/site/markdown/installation_solr.md
1 1 #Mtas and Solr
2   -
3   -
... ...