Commit d43a3ac6d420cd643e7403a2f5b89289437ea6bc
Merge branch 'master' of https://github.com/meertensinstituut/mtas
# Conflicts: # conf/parser/mtas/crm_test.xml # conf/parser/mtas/folia_ddd.xml # pom.xml # src/mtas/analysis/MtasTokenizer.java # src/mtas/analysis/parser/MtasCRMParser.java # src/mtas/analysis/token/MtasTokenCollection.java # src/mtas/codec/MtasCodecPostingsFormat.java # src/mtas/codec/MtasFieldsConsumer.java # src/mtas/codec/util/CodecComponent.java # src/mtas/codec/util/CodecInfo.java # src/mtas/codec/util/DataCollector.java # src/mtas/codec/util/collector/MtasDataAdvanced.java # src/mtas/codec/util/collector/MtasDataBasic.java # src/mtas/codec/util/collector/MtasDataDoubleAdvanced.java # src/mtas/codec/util/collector/MtasDataDoubleBasic.java # src/mtas/codec/util/collector/MtasDataDoubleFull.java # src/mtas/codec/util/collector/MtasDataFull.java # src/mtas/codec/util/collector/MtasDataItem.java # src/mtas/codec/util/collector/MtasDataItemAdvanced.java # src/mtas/codec/util/collector/MtasDataItemBasic.java # src/mtas/codec/util/collector/MtasDataItemDoubleAdvanced.java # src/mtas/codec/util/collector/MtasDataItemDoubleBasic.java # src/mtas/codec/util/collector/MtasDataItemDoubleFull.java # src/mtas/codec/util/collector/MtasDataItemFull.java # src/mtas/codec/util/collector/MtasDataItemLongAdvanced.java # src/mtas/codec/util/collector/MtasDataItemLongBasic.java # src/mtas/codec/util/collector/MtasDataLongAdvanced.java # src/mtas/codec/util/collector/MtasDataLongBasic.java # src/mtas/codec/util/collector/MtasDataLongFull.java # src/mtas/parser/cql/util/MtasCQLParserDefaultPrefixCondition.java # src/mtas/parser/cql/util/MtasCQLParserSentenceCondition.java # src/mtas/parser/cql/util/MtasCQLParserSentencePartCondition.java # src/mtas/solr/handler/component/MtasSolrSearchComponent.java # src/mtas/solr/search/MtasCQLQParser.java # src/mtas/solr/update/processor/MtasUpdateRequestProcessorFactory.java # src/mtas/solr/update/processor/MtasUpdateRequestProcessorResultWriter.java # src/site/markdown/download.md.vm # src/site/markdown/index.md # src/site/markdown/installation.md # src/site/markdown/installation_lucene.md # src/site/markdown/installation_solr.md # src/site/site.xml
Showing
22 changed files
with
466 additions
and
425 deletions
conf/parser/mtas/crm_test.xml
... | ... | @@ -17,16 +17,20 @@ |
17 | 17 | <!-- START CONFIGURATION MTAS FOLIA PARSER --> |
18 | 18 | <parser name="mtas.analysis.parser.MtasCRMParser"> |
19 | 19 | |
20 | +<<<<<<< HEAD | |
20 | 21 | <!-- START GENERAL SETTINGS MTAS PARSER --> |
21 | 22 | <autorepair value="true" /> |
22 | 23 | <makeunique value="true" /> |
23 | 24 | <!-- END GENERAL SETTINGS MTAS PARSER --> |
24 | 25 | |
26 | +======= | |
27 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
25 | 28 | <mappings> |
26 | 29 | |
27 | 30 | <mapping type="word"> |
28 | 31 | </mapping> |
29 | 32 | |
33 | +<<<<<<< HEAD | |
30 | 34 | <mapping type="wordAnnotation" name="0"> |
31 | 35 | <token type="string" offset="false" parent="false"> |
32 | 36 | <pre> |
... | ... | @@ -71,12 +75,19 @@ |
71 | 75 | <token type="string" offset="false" parent="false"> |
72 | 76 | <pre> |
73 | 77 | <item type="string" value="t2" /> |
78 | +======= | |
79 | + <mapping type="wordAnnotation" name="2"> | |
80 | + <token type="string" offset="false" parent="false"> | |
81 | + <pre> | |
82 | + <item type="string" value="t" /> | |
83 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
74 | 84 | </pre> |
75 | 85 | <post> |
76 | 86 | <item type="text" /> |
77 | 87 | </post> |
78 | 88 | </token> |
79 | 89 | </mapping> |
90 | +<<<<<<< HEAD | |
80 | 91 | <mapping type="wordAnnotation" name="2"> |
81 | 92 | <token type="string" offset="false" parent="false"> |
82 | 93 | <pre> |
... | ... | @@ -87,6 +98,8 @@ |
87 | 98 | </post> |
88 | 99 | </token> |
89 | 100 | </mapping> |
101 | +======= | |
102 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
90 | 103 | <mapping type="wordAnnotation" name="3"> |
91 | 104 | <token type="string" offset="false" parent="false"> |
92 | 105 | <pre> |
... | ... | @@ -98,6 +111,7 @@ |
98 | 111 | </token> |
99 | 112 | </mapping> |
100 | 113 | <mapping type="wordAnnotation" name="4"> |
114 | +<<<<<<< HEAD | |
101 | 115 | <token type="string" offset="false" parent="false"> |
102 | 116 | <pre> |
103 | 117 | <item type="string" value="crm" /> |
... | ... | @@ -114,12 +128,34 @@ |
114 | 128 | </condition> |
115 | 129 | </mapping> |
116 | 130 | <mapping type="crmPair" name="part"> |
131 | +======= | |
132 | + </mapping> | |
133 | + <mapping type="wordAnnotation" name="5"> | |
134 | + </mapping> | |
135 | + <mapping type="wordAnnotation" name="6"> | |
136 | + </mapping> | |
137 | + <mapping type="wordAnnotation" name="7"> | |
138 | + <token type="string" offset="false" parent="false"> | |
139 | + <pre> | |
140 | + <item type="string" value="sentence" /> | |
141 | + </pre> | |
142 | + <post> | |
143 | + <item type="text" /> | |
144 | + </post> | |
145 | + </token> | |
146 | + <condition> | |
147 | + <item type="text" not="true" condition="-" /> | |
148 | + </condition> | |
149 | + </mapping> | |
150 | + <mapping type="wordAnnotation" name="pos"> | |
151 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
117 | 152 | <token type="string" offset="false" parent="false"> |
118 | 153 | <pre> |
119 | 154 | <item type="name" /> |
120 | 155 | </pre> |
121 | 156 | <post> |
122 | 157 | <item type="text" /> |
158 | +<<<<<<< HEAD | |
123 | 159 | </post> |
124 | 160 | </token> |
125 | 161 | </mapping> |
... | ... | @@ -282,11 +318,111 @@ |
282 | 318 | </condition> |
283 | 319 | </function> |
284 | 320 | <function type="wordAnnotation" name="4" split="+"> |
321 | +======= | |
322 | + </post> | |
323 | + </token> | |
324 | + </mapping> | |
325 | + <mapping type="wordAnnotation" name="feat.getal"> | |
326 | + <token type="string" offset="false" parent="false"> | |
327 | + <pre> | |
328 | + <item type="name" /> | |
329 | + </pre> | |
330 | + <post> | |
331 | + <item type="text" /> | |
332 | + </post> | |
333 | + </token> | |
334 | + </mapping> | |
335 | + <mapping type="wordAnnotation" name="feat.persoon"> | |
336 | + <token type="string" offset="false" parent="false"> | |
337 | + <pre> | |
338 | + <item type="name" /> | |
339 | + </pre> | |
340 | + <post> | |
341 | + <item type="text" /> | |
342 | + </post> | |
343 | + </token> | |
344 | + </mapping> | |
345 | + <mapping type="wordAnnotation" name="feat.ntype"> | |
346 | + <token type="string" offset="false" parent="false"> | |
347 | + <pre> | |
348 | + <item type="name" /> | |
349 | + </pre> | |
350 | + <post> | |
351 | + <item type="text" /> | |
352 | + </post> | |
353 | + </token> | |
354 | + </mapping> | |
355 | + <mapping type="wordAnnotation" name="feat.pvtijd"> | |
356 | + <token type="string" offset="false" parent="false"> | |
357 | + <pre> | |
358 | + <item type="name" /> | |
359 | + </pre> | |
360 | + <post> | |
361 | + <item type="text" /> | |
362 | + </post> | |
363 | + </token> | |
364 | + </mapping> | |
365 | + <mapping type="wordAnnotation" name="feat.wvorm"> | |
366 | + <token type="string" offset="false" parent="false"> | |
367 | + <pre> | |
368 | + <item type="name" /> | |
369 | + </pre> | |
370 | + <post> | |
371 | + <item type="text" /> | |
372 | + </post> | |
373 | + </token> | |
374 | + </mapping> | |
375 | + <mapping type="wordAnnotation" name="feat.numtype"> | |
376 | + <token type="string" offset="false" parent="false"> | |
377 | + <pre> | |
378 | + <item type="name" /> | |
379 | + </pre> | |
380 | + <post> | |
381 | + <item type="text" /> | |
382 | + </post> | |
383 | + </token> | |
384 | + </mapping> | |
385 | + <mapping type="wordAnnotation" name="feat.vwtype"> | |
386 | + <token type="string" offset="false" parent="false"> | |
387 | + <pre> | |
388 | + <item type="name" /> | |
389 | + </pre> | |
390 | + <post> | |
391 | + <item type="text" /> | |
392 | + </post> | |
393 | + </token> | |
394 | + </mapping> | |
395 | + <mapping type="wordAnnotation" name="feat.lwtype"> | |
396 | + <token type="string" offset="false" parent="false"> | |
397 | + <pre> | |
398 | + <item type="name" /> | |
399 | + </pre> | |
400 | + <post> | |
401 | + <item type="text" /> | |
402 | + </post> | |
403 | + </token> | |
404 | + </mapping> | |
405 | + <mapping type="wordAnnotation" name="feat.probleemgeval"> | |
406 | + <token type="string" offset="false" parent="false"> | |
407 | + <pre> | |
408 | + <item type="name" /> | |
409 | + </pre> | |
410 | + <post> | |
411 | + <item type="text" /> | |
412 | + </post> | |
413 | + </token> | |
414 | + </mapping> | |
415 | + </mappings> | |
416 | + | |
417 | + <functions> | |
418 | + <function name="4" split="+"> | |
419 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
285 | 420 | <condition value="000,001,002,003,004,005,006,009"> |
286 | 421 | <output name="pos" value="N" /> |
287 | 422 | <output name="feat.getal" value="ev" /> |
288 | 423 | </condition> |
289 | 424 | <condition value="010,011,012,013,014,015,016,019"> |
425 | +<<<<<<< HEAD | |
290 | 426 | <output name="pos" value="N" /> |
291 | 427 | <output name="feat.getal" value="mv" /> |
292 | 428 | </condition> |
... | ... | @@ -602,6 +738,167 @@ |
602 | 738 | <output name="feat.form" value="unclear" /> |
603 | 739 | </condition> |
604 | 740 | |
741 | +======= | |
742 | + <output name="pos" value="N" /> | |
743 | + <output name="feat.getal" value="mv" /> | |
744 | + </condition> | |
745 | + <condition value="020,021,022,023,024,025,026,029"> | |
746 | + <output name="pos" value="N" /> | |
747 | + <output name="feat.ntype" value="eigen" /> | |
748 | + </condition> | |
749 | + <condition value="090,091,092,093,094,095,096,099"> | |
750 | + <output name="pos" value="N" /> | |
751 | + <output name="feat.probleemgeval" /> | |
752 | + </condition> | |
753 | + <condition value="100,101,102,103,104,105,106,109"> | |
754 | + <output name="pos" value="ADJ" /> | |
755 | + <output name="feat.getal" value="ev" /> | |
756 | + </condition> | |
757 | + <condition value="110,111,112,113,114,115,116,119"> | |
758 | + <output name="pos" value="ADJ" /> | |
759 | + <output name="feat.getal" value="mv" /> | |
760 | + </condition> | |
761 | + <condition value="190,191,192,193,194,195,196,199"> | |
762 | + <output name="pos" value="ADJ" /> | |
763 | + <output name="feat.probleemgeval" /> | |
764 | + </condition> | |
765 | + | |
766 | + | |
767 | + <condition value="200,201,202,203,204,205,206,209"> | |
768 | + <output name="pos" value="WW" /> | |
769 | + <output name="feat.pvtijd" value="tgw" /> | |
770 | + </condition> | |
771 | + <condition value="210,211,212,213,214,215,216,219"> | |
772 | + <output name="pos" value="WW" /> | |
773 | + <output name="feat.pvtijd" value="tgw" /> | |
774 | + </condition> | |
775 | + <condition value="220,221,222,223,224,225,226,229"> | |
776 | + <output name="pos" value="WW" /> | |
777 | + <output name="feat.pvtijd" value="verl" /> | |
778 | + </condition> | |
779 | + <condition value="230,231,232,233,234,235,236,239"> | |
780 | + <output name="pos" value="WW" /> | |
781 | + <output name="feat.pvtijd" value="verl" /> | |
782 | + </condition> | |
783 | + <condition value="240,241,242,243,244,245,246,249"> | |
784 | + <output name="pos" value="WW" /> | |
785 | + </condition> | |
786 | + <condition value="250,251,252,253,254,255,256,259"> | |
787 | + <output name="pos" value="WW" /> | |
788 | + <output name="feat.wvorm" value="inf" /> | |
789 | + </condition> <condition value="260,261,262,263,264,265,266,269"> | |
790 | + <output name="pos" value="WW" /> | |
791 | + <output name="feat.wvorm" value="inf" /> | |
792 | + </condition> <condition value="270,271,272,273,274,275,276,279"> | |
793 | + <output name="pos" value="WW" /> | |
794 | + </condition> <condition value="280,281,282,283,284,285,286,289"> | |
795 | + <output name="pos" value="WW" /> | |
796 | + </condition> | |
797 | + <condition value="290,291,292,293,294,295,296,299"> | |
798 | + <output name="pos" value="WW" /> | |
799 | + <output name="feat.probleemgeval" /> | |
800 | + </condition> | |
801 | + | |
802 | + | |
803 | + <condition value="300,301,302,303,304,305,306,309"> | |
804 | + <output name="pos" value="TW" /> | |
805 | + <output name="feat.numtype" value="hoofd" /> | |
806 | + </condition> | |
807 | + <condition value="310,311,312,313,314,315,316,319"> | |
808 | + <output name="pos" value="TW" /> | |
809 | + <output name="feat.numtype" value="rang" /> | |
810 | + </condition> | |
811 | + <condition value="320,321,322,323,324,325,326,329"> | |
812 | + <output name="pos" value="TW" /> | |
813 | + </condition> | |
814 | + <condition value="390,391,392,393,394,395,396,399"> | |
815 | + <output name="pos" value="TW" /> | |
816 | + <output name="feat.probleemgeval" /> | |
817 | + </condition> | |
818 | + | |
819 | + <condition value="401"> | |
820 | + <output name="pos" value="VNW" /> | |
821 | + <output name="feat.getal" value="ev" /> | |
822 | + <output name="feat.persoon" value="1" /> | |
823 | + </condition> | |
824 | + <condition value="402"> | |
825 | + <output name="pos" value="VNW" /> | |
826 | + <output name="feat.getal" value="ev" /> | |
827 | + <output name="feat.persoon" value="2" /> | |
828 | + </condition> | |
829 | + <condition value="403"> | |
830 | + <output name="pos" value="VNW" /> | |
831 | + <output name="feat.getal" value="ev" /> | |
832 | + <output name="feat.persoon" value="3" /> | |
833 | + </condition> | |
834 | + <condition value="404"> | |
835 | + <output name="pos" value="VNW" /> | |
836 | + <output name="feat.getal" value="mv" /> | |
837 | + <output name="feat.persoon" value="1" /> | |
838 | + </condition> | |
839 | + <condition value="405"> | |
840 | + <output name="pos" value="VNW" /> | |
841 | + <output name="feat.getal" value="mv" /> | |
842 | + <output name="feat.persoon" value="2" /> | |
843 | + </condition> | |
844 | + <condition value="406"> | |
845 | + <output name="pos" value="VNW" /> | |
846 | + <output name="feat.getal" value="mv" /> | |
847 | + <output name="feat.persoon" value="3" /> | |
848 | + </condition> | |
849 | + <condition value="409"> | |
850 | + <output name="pos" value="VNW" /> | |
851 | + <output name="feat.probleemgeval" /> | |
852 | + </condition> | |
853 | + <condition value="410,411,412,413,414,415,416,419"> | |
854 | + <output name="pos" value="VNW" /> | |
855 | + <output name="feat.vwtype" value="aanw" /> | |
856 | + </condition> | |
857 | + <condition value="420,421,422,423,424,425,426,429"> | |
858 | + <output name="pos" value="VNW" /> | |
859 | + <output name="feat.vwtype" value="betr" /> | |
860 | + </condition> | |
861 | + <condition value="430,431,432,433,434,435,436,439"> | |
862 | + <output name="pos" value="VNW" /> | |
863 | + <output name="feat.vwtype" value="vb" /> | |
864 | + </condition> | |
865 | + <condition value="434,441,442,443,444,445,446,449"> | |
866 | + <output name="pos" value="VNW" /> | |
867 | + <output name="feat.vwtype" value="vb" /> | |
868 | + </condition> | |
869 | + <condition value="440,441,442,443,444,445,446,449"> | |
870 | + <output name="pos" value="VNW" /> | |
871 | + <output name="feat.lwtype" value="onbep" /> | |
872 | + </condition> | |
873 | + <condition value="450,451,452,453,454,455,456,459"> | |
874 | + <output name="pos" value="VNW" /> | |
875 | + <output name="feat.vwtype" value="bez" /> | |
876 | + </condition> | |
877 | + | |
878 | + | |
879 | + <condition value="001,011,021,091"> | |
880 | + <output name="feat.form" value="-e" /> | |
881 | + </condition> | |
882 | + <condition value="002,012,022,092"> | |
883 | + <output name="feat.form" value="-s/-th" /> | |
884 | + </condition> | |
885 | + <condition value="003,013,023,092"> | |
886 | + <output name="feat.form" value="-t" /> | |
887 | + </condition> | |
888 | + <condition value="004,014,024,092"> | |
889 | + <output name="feat.form" value="-n" /> | |
890 | + </condition> | |
891 | + <condition value="005,015,025,095"> | |
892 | + <output name="feat.form" value="-r/-re" /> | |
893 | + </condition> | |
894 | + <condition value="006,016,026,096"> | |
895 | + <output name="feat.form" value="-a" /> | |
896 | + </condition> | |
897 | + <condition value="009,019,029,099"> | |
898 | + <output name="feat.form" value="unclear" /> | |
899 | + </condition> | |
900 | + | |
901 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
605 | 902 | </function> |
606 | 903 | </functions> |
607 | 904 | |
... | ... |
conf/parser/mtas/folia_ddd.xml
... | ... | @@ -19,8 +19,12 @@ |
19 | 19 | |
20 | 20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
21 | 21 | <autorepair value="true" /> |
22 | +<<<<<<< HEAD | |
22 | 23 | <makeunique value="true" /> |
23 | 24 | <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> |
25 | +======= | |
26 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
27 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
24 | 28 | |
25 | 29 | <!-- START REFERENCES --> |
26 | 30 | <references> |
... | ... |
pom.xml
1 | 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
2 | 2 | xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
3 | 3 | <properties> |
4 | - <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | |
4 | + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | |
5 | 5 | <currentDevelopmentVersion>6.2.0</currentDevelopmentVersion> |
6 | 6 | <currentDevelopmentRelease>20160802</currentDevelopmentRelease> |
7 | 7 | </properties> |
... | ... | @@ -84,7 +84,7 @@ |
84 | 84 | <addMavenDescriptor>false</addMavenDescriptor> |
85 | 85 | </archive> |
86 | 86 | </configuration> |
87 | - </plugin> | |
87 | + </plugin> | |
88 | 88 | </plugins> |
89 | 89 | </build> |
90 | 90 | <reporting> |
... | ... | @@ -194,5 +194,4 @@ |
194 | 194 | </dependency> |
195 | 195 | </dependencies> |
196 | 196 | </dependencyManagement> |
197 | - | |
198 | 197 | </project> |
199 | 198 | \ No newline at end of file |
... | ... |
src/mtas/analysis/MtasTokenizer.java
... | ... | @@ -92,7 +92,7 @@ public final class MtasTokenizer<T> extends Tokenizer { |
92 | 92 | * Signals that an I/O exception has occurred. |
93 | 93 | */ |
94 | 94 | public MtasTokenizer(MtasConfiguration config) throws IOException { |
95 | - processConfiguration(config); | |
95 | + processConfiguration(config); | |
96 | 96 | } |
97 | 97 | |
98 | 98 | /** |
... | ... |
src/mtas/analysis/parser/MtasSketchParser.java
... | ... | @@ -41,7 +41,8 @@ final public class MtasSketchParser extends MtasBasicParser { |
41 | 41 | /** |
42 | 42 | * Instantiates a new mtas sketch parser. |
43 | 43 | * |
44 | - * @param config the config | |
44 | + * @param config | |
45 | + * the config | |
45 | 46 | */ |
46 | 47 | public MtasSketchParser(MtasConfiguration config) { |
47 | 48 | super(config); |
... | ... | @@ -363,7 +364,8 @@ final public class MtasSketchParser extends MtasBasicParser { |
363 | 364 | /** |
364 | 365 | * Prints the config types. |
365 | 366 | * |
366 | - * @param types the types | |
367 | + * @param types | |
368 | + * the types | |
367 | 369 | * @return the string |
368 | 370 | */ |
369 | 371 | private String printConfigTypes(HashMap<?, MtasParserType> types) { |
... | ... | @@ -378,15 +380,8 @@ final public class MtasSketchParser extends MtasBasicParser { |
378 | 380 | return text; |
379 | 381 | } |
380 | 382 | |
381 | - /** | |
382 | - * The Class MtasSketchParserMappingWord. | |
383 | - */ | |
384 | 383 | private class MtasSketchParserMappingWord |
385 | 384 | extends MtasParserMapping<MtasSketchParserMappingWord> { |
386 | - | |
387 | - /** | |
388 | - * Instantiates a new mtas sketch parser mapping word. | |
389 | - */ | |
390 | 385 | public MtasSketchParserMappingWord() { |
391 | 386 | super(); |
392 | 387 | this.position = SOURCE_OWN; |
... | ... | @@ -394,18 +389,14 @@ final public class MtasSketchParser extends MtasBasicParser { |
394 | 389 | this.offset = SOURCE_OWN; |
395 | 390 | this.type = MAPPING_TYPE_WORD; |
396 | 391 | } |
397 | - | |
398 | - /* | |
399 | - * (non-Javadoc) | |
400 | - * | |
401 | - * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self() | |
402 | - */ | |
392 | + | |
403 | 393 | @Override |
404 | 394 | protected MtasSketchParserMappingWord self() { |
405 | 395 | return this; |
406 | 396 | } |
407 | 397 | } |
408 | 398 | |
399 | + | |
409 | 400 | /** |
410 | 401 | * The Class MtasSketchParserMappingWordAnnotation. |
411 | 402 | */ |
... | ... |
src/mtas/analysis/token/MtasTokenCollection.java
1 | 1 | package mtas.analysis.token; |
2 | 2 | |
3 | +import java.io.IOException; | |
3 | 4 | import java.util.ArrayList; |
4 | 5 | import java.util.Arrays; |
5 | 6 | import java.util.Collections; |
... | ... | @@ -95,7 +96,7 @@ public class MtasTokenCollection { |
95 | 96 | Iterator<MtasToken<?>> it = this.iterator(); |
96 | 97 | while (it.hasNext()) { |
97 | 98 | MtasToken<?> token = it.next(); |
98 | - System.out.println(token); | |
99 | + System.out.println(token); | |
99 | 100 | } |
100 | 101 | } |
101 | 102 | |
... | ... | @@ -237,7 +238,7 @@ public class MtasTokenCollection { |
237 | 238 | trash.add(i); |
238 | 239 | } else if ((token.getPositionStart() == null) |
239 | 240 | || (token.getPositionEnd() == null)) { |
240 | - trash.add(i); | |
241 | + trash.add(i); | |
241 | 242 | } else if (token.getValue() == null || (token.getValue().equals(""))) { |
242 | 243 | trash.add(i); |
243 | 244 | } else if (token.getPrefix() == null || (token.getPrefix().equals(""))) { |
... | ... |
src/mtas/codec/MtasCodecPostingsFormat.java
... | ... | @@ -285,6 +285,10 @@ public class MtasCodecPostingsFormat extends PostingsFormat { |
285 | 285 | } catch (Exception e) { |
286 | 286 | throw new IOException(e.getMessage()); |
287 | 287 | } |
288 | + Long termRef = inObject.readVLong(); | |
289 | + inTerm.seek(termRef); | |
290 | + token.setTermRef(termRef); | |
291 | + token.setValue(inTerm.readString()); | |
288 | 292 | return token; |
289 | 293 | } |
290 | 294 | |
... | ... |
src/mtas/codec/MtasFieldsConsumer.java
... | ... | @@ -29,289 +29,21 @@ import org.apache.lucene.index.FieldInfo; |
29 | 29 | import org.apache.lucene.index.FieldInfos; |
30 | 30 | import org.apache.lucene.index.Fields; |
31 | 31 | import org.apache.lucene.index.IndexFileNames; |
32 | +import org.apache.lucene.index.MergeState; | |
32 | 33 | import org.apache.lucene.index.PostingsEnum; |
33 | 34 | import org.apache.lucene.index.SegmentWriteState; |
34 | 35 | import org.apache.lucene.index.Terms; |
35 | 36 | import org.apache.lucene.index.TermsEnum; |
36 | 37 | import org.apache.lucene.search.DocIdSetIterator; |
38 | +import org.apache.lucene.store.IOContext; | |
37 | 39 | import org.apache.lucene.store.IndexInput; |
38 | 40 | import org.apache.lucene.store.IndexOutput; |
41 | +import org.apache.lucene.store.Lock; | |
39 | 42 | import org.apache.lucene.util.BytesRef; |
40 | 43 | import org.apache.lucene.util.IOUtils; |
41 | 44 | |
42 | 45 | /** |
43 | 46 | * The Class MtasFieldsConsumer. |
44 | - * | |
45 | - * | |
46 | - * The Class MtasFieldsConsumer constructs several temporal and permanent files | |
47 | - * to provide a forward index | |
48 | - * | |
49 | - * <ul> | |
50 | - * <li><b>Temporary files</b><br> | |
51 | - * <ul> | |
52 | - * <li><b>Temporary file {@link #mtasTmpFieldFileName} with extension | |
53 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_FIELD_EXTENSION} </b><br> | |
54 | - * Contains for each field a reference to the list of documents. Structure of | |
55 | - * content: | |
56 | - * <ul> | |
57 | - * <li><b>String</b>: field</li> | |
58 | - * <li><b>VLong</b>: reference to {@link #mtasDocFileName}</li> | |
59 | - * <li><b>VInt</b>: number of documents</li> | |
60 | - * <li><b>VLong</b>: reference to {@link #mtasTermFileName}</li> | |
61 | - * <li><b>VInt</b>: number of terms</li> | |
62 | - * <li><b>VLong</b>: reference to {@link #mtasPrefixFileName}</li> | |
63 | - * <li><b>VInt</b>: number of prefixes</li> | |
64 | - * </ul> | |
65 | - * </li> | |
66 | - * <li><b>Temporary file {@link #mtasTmpObjectFileName} with extension | |
67 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_OBJECT_EXTENSION}</b><br> | |
68 | - * Contains for a specific field all objects constructed by | |
69 | - * {@link createObjectAndRegisterPrefix}. For all fields, the objects are later | |
70 | - * on copied to {@link #mtasObjectFileName} while statistics are collected. | |
71 | - * Structure of content identical to {@link #mtasObjectFileName}.</li> | |
72 | - * <li><b>Temporary file {@link #mtasTmpDocsFileName} with extension | |
73 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOCS_EXTENSION}</b> <br> | |
74 | - * Contains for a specific field for each doc multiple fragments. Each occurring | |
75 | - * term results in a fragment. Structure of content: | |
76 | - * <ul> | |
77 | - * <li><b>VInt</b>: docId</li> | |
78 | - * <li><b>VInt</b>: number of objects in this fragment</li> | |
79 | - * <li><b>VLong</b>: offset references to {@link #mtasTmpObjectFileName}</li> | |
80 | - * <li><b>VInt</b>,<b>VLong</b>: mtasId object, reference temporary object in | |
81 | - * {@link #mtasTmpObjectFileName} minus offset</li> | |
82 | - * <li><b>VInt</b>,<b>VLong</b>: ...</li> | |
83 | - * </ul> | |
84 | - * </li> | |
85 | - * <li><b>Temporary file {@link #mtasTmpDocsChainedFileName} with extension | |
86 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOCS_CHAINED_EXTENSION} | |
87 | - * </b><br> | |
88 | - * Contains for a specific field for each doc multiple chained fragments. | |
89 | - * Structure of content: | |
90 | - * <ul> | |
91 | - * <li><b>VInt</b>: docId</li> | |
92 | - * <li><b>VInt</b>: number of objects in this fragment</li> | |
93 | - * <li><b>VLong</b>: offset references to {@link #mtasTmpObjectFileName}</li> | |
94 | - * <li><b>VInt</b>,<b>VLong</b>: mtasId object, reference temporary object in | |
95 | - * {@link #mtasTmpObjectFileName} minus offset</li> | |
96 | - * <li><b>VInt</b>,<b>VLong</b>: ...</li> | |
97 | - * <li><b>VLong</b>: reference to next fragment in | |
98 | - * {@link #mtasTmpDocsChainedFileName}, self reference indicates end of chain | |
99 | - * </ul> | |
100 | - * </li> | |
101 | - * <li><b>Temporary file {@link #mtasTmpDocFileName} with extension | |
102 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOC_EXTENSION}</b><br> | |
103 | - * For each document | |
104 | - * <ul> | |
105 | - * <li><b>VInt</b>: docId</li> | |
106 | - * <li><b>VLong</b>: reference to {@link #mtasIndexObjectIdFileName}</li> | |
107 | - * <li><b>VLong</b>: reference first object, used as offset for tree index | |
108 | - * <li><b>VInt</b>: slope used in approximation reference objects index on id | |
109 | - * </li> | |
110 | - * <li><b>ZLong</b>: offset used in approximation reference objects index on id | |
111 | - * </li> | |
112 | - * <li><b>Byte</b>: flag indicating how corrections on the approximation | |
113 | - * references objects for the index on id are stored: | |
114 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_BYTE}, | |
115 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_SHORT}, | |
116 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_INTEGER} or | |
117 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_LONG}</li> | |
118 | - * <li><b>VInt</b>: number of objects in this document</li> | |
119 | - * <li><b>VInt</b>: first position</li> | |
120 | - * <li><b>VInt</b>: last position</li> | |
121 | - * </ul> | |
122 | - * </li> | |
123 | - * </ul> | |
124 | - * </li> | |
125 | - * <li><b>Final files</b><br> | |
126 | - * <ul> | |
127 | - * <li><b>File {@link #mtasIndexFieldFileName} with extension | |
128 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_FIELD_EXTENSION}</b><br> | |
129 | - * Contains for each field a reference to the list of documents and the | |
130 | - * prefixes. Structure of content: | |
131 | - * <ul> | |
132 | - * <li><b>String</b>: field</li> | |
133 | - * <li><b>VLong</b>: reference to {@link #mtasDocFileName}</li> | |
134 | - * <li><b>VLong</b>: reference to {@link #mtasIndexDocIdFileName}</li> | |
135 | - * <li><b>VInt</b>: number of documents</li> | |
136 | - * <li><b>VLong</b>: reference to {@link #mtasTermFileName}</li> | |
137 | - * <li><b>VInt</b>: number of terms</li> | |
138 | - * <li><b>VLong</b>: reference to {@link #mtasPrefixFileName}</li> | |
139 | - * <li><b>VInt</b>: number of prefixes</li> | |
140 | - * </ul> | |
141 | - * </li> | |
142 | - * <li><b>File {@link #mtasTermFileName} with extension | |
143 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TERM_EXTENSION}</b><br> | |
144 | - * For each field, all unique terms are stored here. Structure of content: | |
145 | - * <ul> | |
146 | - * <li><b>String</b>: term</li> | |
147 | - * </ul> | |
148 | - * </li> | |
149 | - * <li><b>File {@link #mtasPrefixFileName} with extension | |
150 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_PREFIX_EXTENSION}</b><br> | |
151 | - * For each field, all unique prefixes are stored here. Structure of content: | |
152 | - * <ul> | |
153 | - * <li><b>String</b>: prefix</li> | |
154 | - * </ul> | |
155 | - * </li> | |
156 | - * <li><b>File {@link #mtasObjectFileName} with extension | |
157 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_OBJECT_EXTENSION}</b><br> | |
158 | - * Contains all objects for all fields. Structure of content: | |
159 | - * <ul> | |
160 | - * <li><b>VInt</b>: mtasId</li> | |
161 | - * <li><b>VInt</b>: objectFlags | |
162 | - * <ul> | |
163 | - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PARENT}</li> | |
164 | - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE}</li> | |
165 | - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}</li> | |
166 | - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_OFFSET}</li> | |
167 | - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_REALOFFSET}</li> | |
168 | - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PAYLOAD}</li> | |
169 | - * </ul> | |
170 | - * </li> | |
171 | - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PARENT}<br> | |
172 | - * <b>VInt</b>: parentId | |
173 | - * <li>Only if | |
174 | - * {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE}<br> | |
175 | - * <b>VInt</b>,<b>VInt</b>: startPosition and (endPosition-startPosition) | |
176 | - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}<br> | |
177 | - * <b>VInt</b>,<b>VInt</b>,<b>VInt</b>,...: number of positions, firstPosition, | |
178 | - * (position-previousPosition),... | |
179 | - * <li>Only if no {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE} | |
180 | - * or {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}<br> | |
181 | - * <b>VInt</b>: position | |
182 | - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_OFFSET}<br> | |
183 | - * <b>VInt</b>,<b>VInt</b>: startOffset, (endOffset-startOffset) | |
184 | - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_REALOFFSET}<br> | |
185 | - * <b>VInt</b>,<b>VInt</b>: startRealOffset, (endRealOffset-startRealOffset) | |
186 | - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PAYLOAD}<br> | |
187 | - * <b>VInt</b>,<b>Bytes</b>: number of bytes, payload | |
188 | - * <li><b>VLong</b>: reference to Term in {@link #mtasTermFileName}</li> | |
189 | - * </ul> | |
190 | - * </li> | |
191 | - * <li><b>File {@link #mtasIndexDocIdFileName} with extension | |
192 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_DOC_ID_EXTENSION} | |
193 | - * </b><br> | |
194 | - * Contains for each field a tree structure {@link MtasTree} to search reference | |
195 | - * to {@link #mtasDocFileName} by id. Structure of content for each node: | |
196 | - * <ul> | |
197 | - * <li><b>VLong</b>: offset references to {@link #mtasIndexDocIdFileName}, only | |
198 | - * available in root node</li> | |
199 | - * <li><b>Byte</b>: flag, should be zero for this tree, only available in root | |
200 | - * node</li> | |
201 | - * <li><b>VInt</b>: left</li> | |
202 | - * <li><b>VInt</b>: right</li> | |
203 | - * <li><b>VInt</b>: max</li> | |
204 | - * <li><b>VLong</b>: left reference to {@link #mtasIndexDocIdFileName} minus the | |
205 | - * offset stored in the root node</li> | |
206 | - * <li><b>VLong</b>: right reference to {@link #mtasIndexDocIdFileName} minus | |
207 | - * the offset stored in the root node</li> | |
208 | - * <li><b>VInt</b>: number of objects on this node (always 1 for this tree)</li> | |
209 | - * <li><b>VLong</b>: reference to {@link #mtasDocFileName} minus offset</li> | |
210 | - * </ul> | |
211 | - * </li> | |
212 | - * <li><b>File {@link #mtasDocFileName} with extension | |
213 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_DOC_EXTENSION}</b><br> | |
214 | - * For each document | |
215 | - * <ul> | |
216 | - * <li><b>VInt</b>: docId</li> | |
217 | - * <li><b>VLong</b>: reference to {@link #mtasIndexObjectIdFileName}</li> | |
218 | - * <li><b>VLong</b>: reference to {@link #mtasIndexObjectPositionFileName}</li> | |
219 | - * <li><b>VLong</b>: reference to {@link #mtasIndexObjectParentFileName}</li> | |
220 | - * <li><b>VLong</b>: reference first object, used as offset for tree index | |
221 | - * <li><b>VInt</b>: slope used in approximation reference objects index on id | |
222 | - * </li> | |
223 | - * <li><b>ZLong</b>: offset used in approximation reference objects index on id | |
224 | - * </li> | |
225 | - * <li><b>Byte</b>: flag indicating how corrections on the approximation | |
226 | - * references objects for the index on id are stored: | |
227 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_BYTE}, | |
228 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_SHORT}, | |
229 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_INTEGER} or | |
230 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_LONG}</li> | |
231 | - * <li><b>VInt</b>: number of objects</li> | |
232 | - * <li><b>VInt</b>: first position</li> | |
233 | - * <li><b>VInt</b>: last position</li> | |
234 | - * </ul> | |
235 | - * </li> | |
236 | - * <li><b>File {@link #mtasIndexObjectIdFileName} with extension | |
237 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_ID_EXTENSION} | |
238 | - * </b><br> | |
239 | - * Provides for each mtasId the reference to {@link #mtasObjectFileName}. These | |
240 | - * references are grouped by document, sorted by mtasId, and because the | |
241 | - * mtasId's for each document will always start with 0 and are sequential | |
242 | - * without gaps, a reference can be computed if the position of the first | |
243 | - * reference for a document is known from {@link #mtasDocFileName}. The | |
244 | - * reference is approximated by the reference to the first object plus the | |
245 | - * mtasId times a slope. Only a correction to this approximation is stored. | |
246 | - * Structure of content: | |
247 | - * <ul> | |
248 | - * <li><b>Byte</b>/<b>Short</b>/<b>Int</b>/<b>Long</b>: correction reference to | |
249 | - * {@link #mtasObjectFileName}</li> | |
250 | - * </ul> | |
251 | - * </li> | |
252 | - * <li><b>File {@link #mtasIndexObjectPositionFileName} with extension | |
253 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_POSITION_EXTENSION} | |
254 | - * </b><br> | |
255 | - * Contains for each document a tree structure {@link MtasTree} to search | |
256 | - * objects by position. Structure of content for each node: | |
257 | - * <ul> | |
258 | - * <li><b>VLong</b>: offset references to | |
259 | - * {@link #mtasIndexObjectPositionFileName}, only available in root node</li> | |
260 | - * <li><b>Byte</b>: flag, should be zero for this tree, only available in root | |
261 | - * node</li> | |
262 | - * <li><b>VInt</b>: left</li> | |
263 | - * <li><b>VInt</b>: right</li> | |
264 | - * <li><b>VInt</b>: max</li> | |
265 | - * <li><b>VLong</b>: left reference to {@link #mtasIndexObjectPositionFileName} | |
266 | - * minus the offset stored in the root node</li> | |
267 | - * <li><b>VLong</b>: right reference to {@link #mtasIndexObjectPositionFileName} | |
268 | - * minus the offset stored in the root node</li> | |
269 | - * <li><b>VInt</b>: number of objects on this node</li> | |
270 | - * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>: set of the first reference to | |
271 | - * {@link #mtasObjectFileName} minus offset, the prefixId referring to the | |
272 | - * position the prefix in {@link #mtasPrefixFileName} and the reference to | |
273 | - * {@link #mtasTermFileName} minus offset</li> | |
274 | - * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>,...: for optional other sets of | |
275 | - * reference to {@link #mtasObjectFileName}, position of the prefix in | |
276 | - * {@link #mtasPrefixFileName} and the reference to {@link #mtasTermFileName}; | |
277 | - * for the first item the difference between this reference minus the previous | |
278 | - * reference is stored</li> | |
279 | - * </ul> | |
280 | - * </li> | |
281 | - * <li><b>File {@link #mtasIndexObjectParentFileName} with extension | |
282 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_PARENT_EXTENSION} | |
283 | - * </b><br> | |
284 | - * Contains for each document a tree structure {@link MtasTree} to search | |
285 | - * objects by parent. Structure of content for each node: | |
286 | - * <ul> | |
287 | - * <li><b>VLong</b>: offset references to {@link #mtasIndexObjectParentFileName} | |
288 | - * , only available in root node</li> | |
289 | - * <li><b>Byte</b>: flag, for this tree equal to | |
290 | - * {@link mtas.codec.tree.MtasTree#SINGLE_POSITION_TREE} indicating a tree with | |
291 | - * exactly one point at each node, only available in root node</li> | |
292 | - * <li><b>VInt</b>: left</li> | |
293 | - * <li><b>VInt</b>: right</li> | |
294 | - * <li><b>VInt</b>: max</li> | |
295 | - * <li><b>VLong</b>: left reference to {@link #mtasIndexObjectParentFileName} | |
296 | - * minus the offset stored in the root node</li> | |
297 | - * <li><b>VLong</b>: right reference to {@link #mtasIndexObjectParentFileName} | |
298 | - * minus the offset stored in the root node</li> | |
299 | - * <li><b>VInt</b>: number of objects on this node</li> | |
300 | - * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>: set of the first reference to | |
301 | - * {@link #mtasObjectFileName} minus offset, the prefixId referring to the | |
302 | - * position the prefix in {@link #mtasPrefixFileName} and the reference to | |
303 | - * {@link #mtasTermFileName} minus offset</li> | |
304 | - * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>,...: for optional other sets of | |
305 | - * reference to {@link #mtasObjectFileName}, position of the prefix in | |
306 | - * {@link #mtasPrefixFileName} and the reference to {@link #mtasTermFileName}; | |
307 | - * for the first item the difference between this reference minus the previous | |
308 | - * reference is stored</li> | |
309 | - * </ul> | |
310 | - * </li> | |
311 | - * </ul> | |
312 | - * </li> | |
313 | - * </ul> | |
314 | - * | |
315 | 47 | */ |
316 | 48 | |
317 | 49 | public class MtasFieldsConsumer extends FieldsConsumer { |
... | ... | @@ -362,14 +94,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
362 | 94 | /** |
363 | 95 | * Instantiates a new mtas fields consumer. |
364 | 96 | * |
365 | - * @param fieldsConsumer | |
366 | - * the fields consumer | |
367 | - * @param state | |
368 | - * the state | |
369 | - * @param name | |
370 | - * the name | |
371 | - * @param delegatePostingsFormatName | |
372 | - * the delegate postings format name | |
97 | + * @param fieldsConsumer the fields consumer | |
98 | + * @param state the state | |
99 | + * @param name the name | |
100 | + * @param delegatePostingsFormatName the delegate postings format name | |
373 | 101 | */ |
374 | 102 | public MtasFieldsConsumer(FieldsConsumer fieldsConsumer, |
375 | 103 | SegmentWriteState state, String name, String delegatePostingsFormatName) { |
... | ... | @@ -426,14 +154,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
426 | 154 | /** |
427 | 155 | * Register prefix. |
428 | 156 | * |
429 | - * @param field | |
430 | - * the field | |
431 | - * @param prefix | |
432 | - * the prefix | |
433 | - * @param outPrefix | |
434 | - * the out prefix | |
435 | - * @throws IOException | |
436 | - * Signals that an I/O exception has occurred. | |
157 | + * @param field the field | |
158 | + * @param prefix the prefix | |
159 | + * @param outPrefix the out prefix | |
160 | + * @throws IOException Signals that an I/O exception has occurred. | |
437 | 161 | */ |
438 | 162 | private void registerPrefix(String field, String prefix, |
439 | 163 | IndexOutput outPrefix) throws IOException { |
... | ... | @@ -452,14 +176,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
452 | 176 | /** |
453 | 177 | * Register prefix stats single position value. |
454 | 178 | * |
455 | - * @param field | |
456 | - * the field | |
457 | - * @param value | |
458 | - * the value | |
459 | - * @param outPrefix | |
460 | - * the out prefix | |
461 | - * @throws IOException | |
462 | - * Signals that an I/O exception has occurred. | |
179 | + * @param field the field | |
180 | + * @param value the value | |
181 | + * @param outPrefix the out prefix | |
182 | + * @throws IOException Signals that an I/O exception has occurred. | |
463 | 183 | */ |
464 | 184 | public void registerPrefixStatsSinglePositionValue(String field, String value, |
465 | 185 | IndexOutput outPrefix) throws IOException { |
... | ... | @@ -474,14 +194,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
474 | 194 | /** |
475 | 195 | * Register prefix stats range position value. |
476 | 196 | * |
477 | - * @param field | |
478 | - * the field | |
479 | - * @param value | |
480 | - * the value | |
481 | - * @param outPrefix | |
482 | - * the out prefix | |
483 | - * @throws IOException | |
484 | - * Signals that an I/O exception has occurred. | |
197 | + * @param field the field | |
198 | + * @param value the value | |
199 | + * @param outPrefix the out prefix | |
200 | + * @throws IOException Signals that an I/O exception has occurred. | |
485 | 201 | */ |
486 | 202 | public void registerPrefixStatsRangePositionValue(String field, String value, |
487 | 203 | IndexOutput outPrefix) throws IOException { |
... | ... | @@ -495,14 +211,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
495 | 211 | /** |
496 | 212 | * Register prefix stats set position value. |
497 | 213 | * |
498 | - * @param field | |
499 | - * the field | |
500 | - * @param value | |
501 | - * the value | |
502 | - * @param outPrefix | |
503 | - * the out prefix | |
504 | - * @throws IOException | |
505 | - * Signals that an I/O exception has occurred. | |
214 | + * @param field the field | |
215 | + * @param value the value | |
216 | + * @param outPrefix the out prefix | |
217 | + * @throws IOException Signals that an I/O exception has occurred. | |
506 | 218 | */ |
507 | 219 | public void registerPrefixStatsSetPositionValue(String field, String value, |
508 | 220 | IndexOutput outPrefix) throws IOException { |
... | ... | @@ -517,8 +229,7 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
517 | 229 | /** |
518 | 230 | * Inits the prefix stats field. |
519 | 231 | * |
520 | - * @param field | |
521 | - * the field | |
232 | + * @param field the field | |
522 | 233 | */ |
523 | 234 | private void initPrefixStatsField(String field) { |
524 | 235 | if (!singlePositionPrefix.containsKey(field)) { |
... | ... | @@ -535,8 +246,7 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
535 | 246 | /** |
536 | 247 | * Gets the prefix stats single position prefix attribute. |
537 | 248 | * |
538 | - * @param field | |
539 | - * the field | |
249 | + * @param field the field | |
540 | 250 | * @return the prefix stats single position prefix attribute |
541 | 251 | */ |
542 | 252 | public String getPrefixStatsSinglePositionPrefixAttribute(String field) { |
... | ... | @@ -547,8 +257,7 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
547 | 257 | /** |
548 | 258 | * Gets the prefix stats multiple position prefix attribute. |
549 | 259 | * |
550 | - * @param field | |
551 | - * the field | |
260 | + * @param field the field | |
552 | 261 | * @return the prefix stats multiple position prefix attribute |
553 | 262 | */ |
554 | 263 | public String getPrefixStatsMultiplePositionPrefixAttribute(String field) { |
... | ... | @@ -559,8 +268,7 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
559 | 268 | /** |
560 | 269 | * Gets the prefix stats set position prefix attribute. |
561 | 270 | * |
562 | - * @param field | |
563 | - * the field | |
271 | + * @param field the field | |
564 | 272 | * @return the prefix stats set position prefix attribute |
565 | 273 | */ |
566 | 274 | public String getPrefixStatsSetPositionPrefixAttribute(String field) { |
... | ... | @@ -585,6 +293,14 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
585 | 293 | return text; |
586 | 294 | } |
587 | 295 | |
296 | + /* (non-Javadoc) | |
297 | + * @see org.apache.lucene.codecs.FieldsConsumer#merge(org.apache.lucene.index.MergeState) | |
298 | + */ | |
299 | + @Override | |
300 | + public void merge(MergeState mergeState) throws IOException { | |
301 | + delegateFieldsConsumer.merge(mergeState); | |
302 | + } | |
303 | + | |
588 | 304 | /* |
589 | 305 | * (non-Javadoc) |
590 | 306 | * |
... | ... | @@ -600,12 +316,9 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
600 | 316 | /** |
601 | 317 | * Write. |
602 | 318 | * |
603 | - * @param fieldInfos | |
604 | - * the field infos | |
605 | - * @param fields | |
606 | - * the fields | |
607 | - * @throws IOException | |
608 | - * Signals that an I/O exception has occurred. | |
319 | + * @param fieldInfos the field infos | |
320 | + * @param fields the fields | |
321 | + * @throws IOException Signals that an I/O exception has occurred. | |
609 | 322 | */ |
610 | 323 | private void write(FieldInfos fieldInfos, Fields fields) { |
611 | 324 | IndexOutput outField, outDoc, outIndexDocId, outIndexObjectId, |
... | ... | @@ -1258,27 +971,17 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
1258 | 971 | /** |
1259 | 972 | * Creates the object and register prefix. |
1260 | 973 | * |
1261 | - * @param field | |
1262 | - * the field | |
1263 | - * @param out | |
1264 | - * the out | |
1265 | - * @param term | |
1266 | - * the term | |
1267 | - * @param termRef | |
1268 | - * the term ref | |
1269 | - * @param startPosition | |
1270 | - * the start position | |
1271 | - * @param payload | |
1272 | - * the payload | |
1273 | - * @param startOffset | |
1274 | - * the start offset | |
1275 | - * @param endOffset | |
1276 | - * the end offset | |
1277 | - * @param outPrefix | |
1278 | - * the out prefix | |
974 | + * @param field the field | |
975 | + * @param out the out | |
976 | + * @param term the term | |
977 | + * @param termRef the term ref | |
978 | + * @param startPosition the start position | |
979 | + * @param payload the payload | |
980 | + * @param startOffset the start offset | |
981 | + * @param endOffset the end offset | |
982 | + * @param outPrefix the out prefix | |
1279 | 983 | * @return the integer |
1280 | - * @throws IOException | |
1281 | - * Signals that an I/O exception has occurred. | |
984 | + * @throws IOException Signals that an I/O exception has occurred. | |
1282 | 985 | */ |
1283 | 986 | private Integer createObjectAndRegisterPrefix(String field, IndexOutput out, |
1284 | 987 | BytesRef term, Long termRef, int startPosition, BytesRef payload, |
... | ... | @@ -1392,15 +1095,11 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
1392 | 1095 | /** |
1393 | 1096 | * Store tree. |
1394 | 1097 | * |
1395 | - * @param tree | |
1396 | - * the tree | |
1397 | - * @param out | |
1398 | - * the out | |
1399 | - * @param refApproxOffset | |
1400 | - * the ref approx offset | |
1098 | + * @param tree the tree | |
1099 | + * @param out the out | |
1100 | + * @param refApproxOffset the ref approx offset | |
1401 | 1101 | * @return the long |
1402 | - * @throws IOException | |
1403 | - * Signals that an I/O exception has occurred. | |
1102 | + * @throws IOException Signals that an I/O exception has occurred. | |
1404 | 1103 | */ |
1405 | 1104 | private Long storeTree(MtasTree<?> tree, IndexOutput out, |
1406 | 1105 | long refApproxOffset) throws IOException { |
... | ... | @@ -1411,21 +1110,14 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
1411 | 1110 | /** |
1412 | 1111 | * Store tree. |
1413 | 1112 | * |
1414 | - * @param node | |
1415 | - * the node | |
1416 | - * @param isSinglePoint | |
1417 | - * the is single point | |
1418 | - * @param storeAdditionalInformation | |
1419 | - * the store additional information | |
1420 | - * @param out | |
1421 | - * the out | |
1422 | - * @param nodeRefApproxOffset | |
1423 | - * the node ref approx offset | |
1424 | - * @param refApproxOffset | |
1425 | - * the ref approx offset | |
1113 | + * @param node the node | |
1114 | + * @param isSinglePoint the is single point | |
1115 | + * @param storeAdditionalInformation the store additional information | |
1116 | + * @param out the out | |
1117 | + * @param nodeRefApproxOffset the node ref approx offset | |
1118 | + * @param refApproxOffset the ref approx offset | |
1426 | 1119 | * @return the long |
1427 | - * @throws IOException | |
1428 | - * Signals that an I/O exception has occurred. | |
1120 | + * @throws IOException Signals that an I/O exception has occurred. | |
1429 | 1121 | */ |
1430 | 1122 | private Long storeTree(MtasTreeNode<?> node, boolean isSinglePoint, |
1431 | 1123 | boolean storeAdditionalInformation, IndexOutput out, |
... | ... | @@ -1510,10 +1202,8 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
1510 | 1202 | /** |
1511 | 1203 | * Token stats add. |
1512 | 1204 | * |
1513 | - * @param min | |
1514 | - * the min | |
1515 | - * @param max | |
1516 | - * the max | |
1205 | + * @param min the min | |
1206 | + * @param max the max | |
1517 | 1207 | */ |
1518 | 1208 | private void tokenStatsAdd(int min, int max) { |
1519 | 1209 | tokenStatsNumber++; |
... | ... | @@ -1532,16 +1222,11 @@ public class MtasFieldsConsumer extends FieldsConsumer { |
1532 | 1222 | /** |
1533 | 1223 | * Copy object and update stats. |
1534 | 1224 | * |
1535 | - * @param id | |
1536 | - * the id | |
1537 | - * @param in | |
1538 | - * the in | |
1539 | - * @param inRef | |
1540 | - * the in ref | |
1541 | - * @param out | |
1542 | - * the out | |
1543 | - * @throws IOException | |
1544 | - * Signals that an I/O exception has occurred. | |
1225 | + * @param id the id | |
1226 | + * @param in the in | |
1227 | + * @param inRef the in ref | |
1228 | + * @param out the out | |
1229 | + * @throws IOException Signals that an I/O exception has occurred. | |
1545 | 1230 | */ |
1546 | 1231 | private void copyObjectAndUpdateStats(int id, IndexInput in, Long inRef, |
1547 | 1232 | IndexOutput out) throws IOException { |
... | ... |
src/mtas/codec/util/CodecCollector.java
... | ... | @@ -2138,8 +2138,9 @@ public class CodecCollector { |
2138 | 2138 | .checkExistenceNecessaryKeys()) { |
2139 | 2139 | needSecondRound = true; |
2140 | 2140 | } |
2141 | - } | |
2142 | - } | |
2141 | + termVector.subComponentFunction.dataCollector.reduceToSegmentKeys(); | |
2142 | + } | |
2143 | + } | |
2143 | 2144 | return needSecondRound; |
2144 | 2145 | } |
2145 | 2146 | |
... | ... |
src/mtas/codec/util/CodecComponent.java
... | ... | @@ -23,7 +23,10 @@ import mtas.parser.function.util.MtasFunctionParserFunction; |
23 | 23 | import mtas.parser.function.util.MtasFunctionParserFunctionDefault; |
24 | 24 | import org.apache.commons.lang.ArrayUtils; |
25 | 25 | import org.apache.lucene.search.spans.SpanQuery; |
26 | +import org.apache.lucene.util.automaton.Automata; | |
27 | +import org.apache.lucene.util.automaton.Automaton; | |
26 | 28 | import org.apache.lucene.util.automaton.CompiledAutomaton; |
29 | +import org.apache.lucene.util.automaton.Operations; | |
27 | 30 | import org.apache.lucene.util.automaton.RegExp; |
28 | 31 | |
29 | 32 | /** |
... | ... |
src/mtas/codec/util/CodecInfo.java
... | ... | @@ -274,7 +274,7 @@ public class CodecInfo { |
274 | 274 | IndexInput inTerm = indexInputList.get("term"); |
275 | 275 | for (MtasTreeHit<?> hit : hits) { |
276 | 276 | MtasToken<String> token = MtasCodecPostingsFormat.getToken(inObject, |
277 | - inTerm, hit.ref); | |
277 | + inTerm, hit.ref); | |
278 | 278 | if (token != null) { |
279 | 279 | if (prefixes.size() > 0) { |
280 | 280 | if (prefixes.contains(token.getPrefix())) { |
... | ... |
src/mtas/codec/util/DataCollector.java
src/mtas/codec/util/collector/MtasDataAdvanced.java
... | ... | @@ -201,6 +201,32 @@ abstract class MtasDataAdvanced<T1 extends Number & Comparable<T1>, T2 extends N |
201 | 201 | tmpOldSize); |
202 | 202 | } |
203 | 203 | |
204 | + public void reduceToSegmentKeys() { | |
205 | + if(segmentRegistration != null) { | |
206 | + int sizeCopy = size; | |
207 | + String[] keyListCopy = keyList.clone(); | |
208 | + T1[] advancedValueSumListCopy = advancedValueSumList.clone(); | |
209 | + T1[] advancedValueMaxListCopy = advancedValueMaxList.clone(); | |
210 | + T1[] advancedValueMinListCopy = advancedValueMinList.clone(); | |
211 | + T1[] advancedValueSumOfSquaresListCopy = advancedValueSumOfSquaresList.clone(); | |
212 | + T2[] advancedValueSumOfLogsListCopy = advancedValueSumOfLogsList.clone(); | |
213 | + long[] advancedValueNListCopy = advancedValueNList.clone(); | |
214 | + size = 0; | |
215 | + for(int i=0; i< sizeCopy; i++) { | |
216 | + if(segmentKeys.contains(keyListCopy[i])) { | |
217 | + keyList[size] = keyListCopy[i]; | |
218 | + advancedValueSumList[size] = advancedValueSumListCopy[i]; | |
219 | + advancedValueMaxList[size] = advancedValueMaxListCopy[i]; | |
220 | + advancedValueMinList[size] = advancedValueMinListCopy[i]; | |
221 | + advancedValueSumOfSquaresList[size] = advancedValueSumOfSquaresListCopy[i]; | |
222 | + advancedValueSumOfLogsList[size] = advancedValueSumOfLogsListCopy[i]; | |
223 | + advancedValueNList[size] = advancedValueNListCopy[i]; | |
224 | + size++; | |
225 | + } | |
226 | + } | |
227 | + } | |
228 | + } | |
229 | + | |
204 | 230 | /* |
205 | 231 | * (non-Javadoc) |
206 | 232 | * |
... | ... |
src/mtas/codec/util/collector/MtasDataBasic.java
... | ... | @@ -228,6 +228,24 @@ abstract class MtasDataBasic<T1 extends Number & Comparable<T1>, T2 extends Numb |
228 | 228 | tmpOldSize); |
229 | 229 | } |
230 | 230 | |
231 | + public void reduceToSegmentKeys() { | |
232 | + if (segmentRegistration != null) { | |
233 | + int sizeCopy = size; | |
234 | + String[] keyListCopy = keyList.clone(); | |
235 | + T1[] basicValueSumListCopy = basicValueSumList.clone(); | |
236 | + long[] basicValueNListCopy = basicValueNList.clone(); | |
237 | + size = 0; | |
238 | + for (int i = 0; i < sizeCopy; i++) { | |
239 | + if (segmentKeys.contains(keyListCopy[i])) { | |
240 | + keyList[size] = keyListCopy[i]; | |
241 | + basicValueSumList[size] = basicValueSumListCopy[i]; | |
242 | + basicValueNList[size] = basicValueNListCopy[i]; | |
243 | + size++; | |
244 | + } | |
245 | + } | |
246 | + } | |
247 | + } | |
248 | + | |
231 | 249 | /* |
232 | 250 | * (non-Javadoc) |
233 | 251 | * |
... | ... |
src/mtas/codec/util/collector/MtasDataCollector.java
... | ... | @@ -891,7 +891,7 @@ public abstract class MtasDataCollector<T1 extends Number & Comparable<T1>, T2 e |
891 | 891 | /** |
892 | 892 | * Reduce to segment keys. |
893 | 893 | */ |
894 | - public final void reduceToSegmentKeys() { | |
894 | + public void reduceToSegmentKeys() { | |
895 | 895 | if (segmentRegistration != null) { |
896 | 896 | reduceToKeys(segmentKeys); |
897 | 897 | } |
... | ... |
src/mtas/codec/util/collector/MtasDataFull.java
... | ... | @@ -158,6 +158,22 @@ abstract class MtasDataFull<T1 extends Number & Comparable<T1>, T2 extends Numbe |
158 | 158 | System.arraycopy(tmpNewFullValueList, 0, newFullValueList, 0, tmpOldSize); |
159 | 159 | } |
160 | 160 | |
161 | + public void reduceToSegmentKeys() { | |
162 | + if(segmentRegistration != null) { | |
163 | + int sizeCopy = size; | |
164 | + String[] keyListCopy = keyList.clone(); | |
165 | + T1[][] fullValueListCopy = fullValueList.clone(); | |
166 | + size = 0; | |
167 | + for(int i=0; i< sizeCopy; i++) { | |
168 | + if(segmentKeys.contains(keyListCopy[i])) { | |
169 | + keyList[size] = keyListCopy[i]; | |
170 | + fullValueList[size] = fullValueListCopy[i]; | |
171 | + size++; | |
172 | + } | |
173 | + } | |
174 | + } | |
175 | + } | |
176 | + | |
161 | 177 | /* |
162 | 178 | * (non-Javadoc) |
163 | 179 | * |
... | ... |
src/mtas/codec/util/collector/MtasDataItem.java
... | ... | @@ -38,7 +38,7 @@ public abstract class MtasDataItem<T1 extends Number & Comparable<T1>, T2 extend |
38 | 38 | |
39 | 39 | /** The error list. */ |
40 | 40 | protected HashMap<String, Integer> errorList; |
41 | - | |
41 | + | |
42 | 42 | /** The comparable sort value. */ |
43 | 43 | protected NumberComparator<?> comparableSortValue; |
44 | 44 | |
... | ... | @@ -101,7 +101,7 @@ public abstract class MtasDataItem<T1 extends Number & Comparable<T1>, T2 extend |
101 | 101 | */ |
102 | 102 | public abstract Map<String, Object> rewrite(boolean showDebugInfo) |
103 | 103 | throws IOException; |
104 | - | |
104 | + | |
105 | 105 | /** |
106 | 106 | * Gets the sub. |
107 | 107 | * |
... | ... |
src/mtas/parser/cql/util/MtasCQLParserSentenceCondition.java
... | ... | @@ -47,7 +47,7 @@ public class MtasCQLParserSentenceCondition { |
47 | 47 | public MtasCQLParserSentenceCondition(MtasCQLParserBasicSentenceCondition s) |
48 | 48 | throws ParseException { |
49 | 49 | sequenceList = new ArrayList<List<MtasCQLParserSentenceCondition>>(); |
50 | - basicSentence = s; | |
50 | + basicSentence = s; | |
51 | 51 | minimumOccurence = 1; |
52 | 52 | maximumOccurence = 1; |
53 | 53 | simplified = false; |
... | ... |
src/mtas/solr/handler/component/MtasSolrSearchComponent.java
... | ... | @@ -436,8 +436,8 @@ public class MtasSolrSearchComponent extends SearchComponent { |
436 | 436 | * (non-Javadoc) |
437 | 437 | * |
438 | 438 | * @see |
439 | - * org.apache.solr.handler.component.SearchComponent#finishStage(org.apache. | |
440 | - * solr.handler.component.ResponseBuilder) | |
439 | + * org.apache.solr.handler.component.SearchComponent#distributedProcess(org. | |
440 | + * apache.solr.handler.component.ResponseBuilder) | |
441 | 441 | */ |
442 | 442 | @Override |
443 | 443 | public void finishStage(ResponseBuilder rb) { |
... | ... | @@ -549,7 +549,6 @@ public class MtasSolrSearchComponent extends SearchComponent { |
549 | 549 | return STAGE_GROUP; |
550 | 550 | } |
551 | 551 | } |
552 | - | |
553 | 552 | } |
554 | 553 | return ResponseBuilder.STAGE_DONE; |
555 | 554 | } |
... | ... | @@ -562,15 +561,9 @@ public class MtasSolrSearchComponent extends SearchComponent { |
562 | 561 | * @return the mtas fields |
563 | 562 | */ |
564 | 563 | |
565 | - /** | |
566 | - * Gets the mtas fields. | |
567 | - * | |
568 | - * @param rb | |
569 | - * the rb | |
570 | - * @return the mtas fields | |
571 | - */ | |
572 | 564 | private ComponentFields getMtasFields(ResponseBuilder rb) { |
573 | 565 | return (ComponentFields) rb.req.getContext().get(ComponentFields.class); |
574 | 566 | } |
575 | 567 | |
568 | + | |
576 | 569 | } |
... | ... |
src/site/markdown/download.md.vm
... | ... | @@ -13,10 +13,10 @@ |
13 | 13 | <tr> |
14 | 14 | <td>$context.get("currentDevelopmentVersion")</td> |
15 | 15 | <td>$context.get("currentDevelopmentRelease")</td> |
16 | - <td><a href='https://github.com/meertensinstituut/mtas/releases/download/${currentDevelopmentRelease}/mtas-${currentDevelopmentVersion}.jar'>Binary (jar)</a></td> | |
17 | - <td><a href='https://github.com/meertensinstituut/mtas/archive/${currentDevelopmentRelease}.tar.gz'>Source (tgz)</a></td> | |
18 | - <td><a href='https://github.com/meertensinstituut/mtas/archive/${currentDevelopmentRelease}.zip'>Source (zip)</a></td> | |
16 | + <td><a href='https://github.com/matthijsbrouwer/mtas/releases/download/${currentDevelopmentRelease}/mtas-${currentDevelopmentVersion}.jar'>Binary (jar)</a></td> | |
17 | + <td><a href='https://github.com/matthijsbrouwer/mtas/archive/${currentDevelopmentRelease}.tar.gz'>Source (tgz)</a></td> | |
18 | + <td><a href='https://github.com/matthijsbrouwer/mtas/archive/${currentDevelopmentRelease}.zip'>Source (zip)</a></td> | |
19 | 19 | <td>Development version</td> |
20 | 20 | </tr> |
21 | 21 | </tbody> |
22 | 22 | -</table> |
23 | +</table> | |
23 | 24 | \ No newline at end of file |
... | ... |
src/site/markdown/installation.md
0 → 100644
src/site/markdown/installation_solr.md