Commit d43a3ac6d420cd643e7403a2f5b89289437ea6bc
Merge branch 'master' of https://github.com/meertensinstituut/mtas
# Conflicts: # conf/parser/mtas/crm_test.xml # conf/parser/mtas/folia_ddd.xml # pom.xml # src/mtas/analysis/MtasTokenizer.java # src/mtas/analysis/parser/MtasCRMParser.java # src/mtas/analysis/token/MtasTokenCollection.java # src/mtas/codec/MtasCodecPostingsFormat.java # src/mtas/codec/MtasFieldsConsumer.java # src/mtas/codec/util/CodecComponent.java # src/mtas/codec/util/CodecInfo.java # src/mtas/codec/util/DataCollector.java # src/mtas/codec/util/collector/MtasDataAdvanced.java # src/mtas/codec/util/collector/MtasDataBasic.java # src/mtas/codec/util/collector/MtasDataDoubleAdvanced.java # src/mtas/codec/util/collector/MtasDataDoubleBasic.java # src/mtas/codec/util/collector/MtasDataDoubleFull.java # src/mtas/codec/util/collector/MtasDataFull.java # src/mtas/codec/util/collector/MtasDataItem.java # src/mtas/codec/util/collector/MtasDataItemAdvanced.java # src/mtas/codec/util/collector/MtasDataItemBasic.java # src/mtas/codec/util/collector/MtasDataItemDoubleAdvanced.java # src/mtas/codec/util/collector/MtasDataItemDoubleBasic.java # src/mtas/codec/util/collector/MtasDataItemDoubleFull.java # src/mtas/codec/util/collector/MtasDataItemFull.java # src/mtas/codec/util/collector/MtasDataItemLongAdvanced.java # src/mtas/codec/util/collector/MtasDataItemLongBasic.java # src/mtas/codec/util/collector/MtasDataLongAdvanced.java # src/mtas/codec/util/collector/MtasDataLongBasic.java # src/mtas/codec/util/collector/MtasDataLongFull.java # src/mtas/parser/cql/util/MtasCQLParserDefaultPrefixCondition.java # src/mtas/parser/cql/util/MtasCQLParserSentenceCondition.java # src/mtas/parser/cql/util/MtasCQLParserSentencePartCondition.java # src/mtas/solr/handler/component/MtasSolrSearchComponent.java # src/mtas/solr/search/MtasCQLQParser.java # src/mtas/solr/update/processor/MtasUpdateRequestProcessorFactory.java # src/mtas/solr/update/processor/MtasUpdateRequestProcessorResultWriter.java # src/site/markdown/download.md.vm # src/site/markdown/index.md # src/site/markdown/installation.md # src/site/markdown/installation_lucene.md # src/site/markdown/installation_solr.md # src/site/site.xml
Showing
22 changed files
with
466 additions
and
425 deletions
conf/parser/mtas/crm_test.xml
@@ -17,16 +17,20 @@ | @@ -17,16 +17,20 @@ | ||
17 | <!-- START CONFIGURATION MTAS FOLIA PARSER --> | 17 | <!-- START CONFIGURATION MTAS FOLIA PARSER --> |
18 | <parser name="mtas.analysis.parser.MtasCRMParser"> | 18 | <parser name="mtas.analysis.parser.MtasCRMParser"> |
19 | 19 | ||
20 | +<<<<<<< HEAD | ||
20 | <!-- START GENERAL SETTINGS MTAS PARSER --> | 21 | <!-- START GENERAL SETTINGS MTAS PARSER --> |
21 | <autorepair value="true" /> | 22 | <autorepair value="true" /> |
22 | <makeunique value="true" /> | 23 | <makeunique value="true" /> |
23 | <!-- END GENERAL SETTINGS MTAS PARSER --> | 24 | <!-- END GENERAL SETTINGS MTAS PARSER --> |
24 | 25 | ||
26 | +======= | ||
27 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | ||
25 | <mappings> | 28 | <mappings> |
26 | 29 | ||
27 | <mapping type="word"> | 30 | <mapping type="word"> |
28 | </mapping> | 31 | </mapping> |
29 | 32 | ||
33 | +<<<<<<< HEAD | ||
30 | <mapping type="wordAnnotation" name="0"> | 34 | <mapping type="wordAnnotation" name="0"> |
31 | <token type="string" offset="false" parent="false"> | 35 | <token type="string" offset="false" parent="false"> |
32 | <pre> | 36 | <pre> |
@@ -71,12 +75,19 @@ | @@ -71,12 +75,19 @@ | ||
71 | <token type="string" offset="false" parent="false"> | 75 | <token type="string" offset="false" parent="false"> |
72 | <pre> | 76 | <pre> |
73 | <item type="string" value="t2" /> | 77 | <item type="string" value="t2" /> |
78 | +======= | ||
79 | + <mapping type="wordAnnotation" name="2"> | ||
80 | + <token type="string" offset="false" parent="false"> | ||
81 | + <pre> | ||
82 | + <item type="string" value="t" /> | ||
83 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | ||
74 | </pre> | 84 | </pre> |
75 | <post> | 85 | <post> |
76 | <item type="text" /> | 86 | <item type="text" /> |
77 | </post> | 87 | </post> |
78 | </token> | 88 | </token> |
79 | </mapping> | 89 | </mapping> |
90 | +<<<<<<< HEAD | ||
80 | <mapping type="wordAnnotation" name="2"> | 91 | <mapping type="wordAnnotation" name="2"> |
81 | <token type="string" offset="false" parent="false"> | 92 | <token type="string" offset="false" parent="false"> |
82 | <pre> | 93 | <pre> |
@@ -87,6 +98,8 @@ | @@ -87,6 +98,8 @@ | ||
87 | </post> | 98 | </post> |
88 | </token> | 99 | </token> |
89 | </mapping> | 100 | </mapping> |
101 | +======= | ||
102 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | ||
90 | <mapping type="wordAnnotation" name="3"> | 103 | <mapping type="wordAnnotation" name="3"> |
91 | <token type="string" offset="false" parent="false"> | 104 | <token type="string" offset="false" parent="false"> |
92 | <pre> | 105 | <pre> |
@@ -98,6 +111,7 @@ | @@ -98,6 +111,7 @@ | ||
98 | </token> | 111 | </token> |
99 | </mapping> | 112 | </mapping> |
100 | <mapping type="wordAnnotation" name="4"> | 113 | <mapping type="wordAnnotation" name="4"> |
114 | +<<<<<<< HEAD | ||
101 | <token type="string" offset="false" parent="false"> | 115 | <token type="string" offset="false" parent="false"> |
102 | <pre> | 116 | <pre> |
103 | <item type="string" value="crm" /> | 117 | <item type="string" value="crm" /> |
@@ -114,12 +128,34 @@ | @@ -114,12 +128,34 @@ | ||
114 | </condition> | 128 | </condition> |
115 | </mapping> | 129 | </mapping> |
116 | <mapping type="crmPair" name="part"> | 130 | <mapping type="crmPair" name="part"> |
131 | +======= | ||
132 | + </mapping> | ||
133 | + <mapping type="wordAnnotation" name="5"> | ||
134 | + </mapping> | ||
135 | + <mapping type="wordAnnotation" name="6"> | ||
136 | + </mapping> | ||
137 | + <mapping type="wordAnnotation" name="7"> | ||
138 | + <token type="string" offset="false" parent="false"> | ||
139 | + <pre> | ||
140 | + <item type="string" value="sentence" /> | ||
141 | + </pre> | ||
142 | + <post> | ||
143 | + <item type="text" /> | ||
144 | + </post> | ||
145 | + </token> | ||
146 | + <condition> | ||
147 | + <item type="text" not="true" condition="-" /> | ||
148 | + </condition> | ||
149 | + </mapping> | ||
150 | + <mapping type="wordAnnotation" name="pos"> | ||
151 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | ||
117 | <token type="string" offset="false" parent="false"> | 152 | <token type="string" offset="false" parent="false"> |
118 | <pre> | 153 | <pre> |
119 | <item type="name" /> | 154 | <item type="name" /> |
120 | </pre> | 155 | </pre> |
121 | <post> | 156 | <post> |
122 | <item type="text" /> | 157 | <item type="text" /> |
158 | +<<<<<<< HEAD | ||
123 | </post> | 159 | </post> |
124 | </token> | 160 | </token> |
125 | </mapping> | 161 | </mapping> |
@@ -282,11 +318,111 @@ | @@ -282,11 +318,111 @@ | ||
282 | </condition> | 318 | </condition> |
283 | </function> | 319 | </function> |
284 | <function type="wordAnnotation" name="4" split="+"> | 320 | <function type="wordAnnotation" name="4" split="+"> |
321 | +======= | ||
322 | + </post> | ||
323 | + </token> | ||
324 | + </mapping> | ||
325 | + <mapping type="wordAnnotation" name="feat.getal"> | ||
326 | + <token type="string" offset="false" parent="false"> | ||
327 | + <pre> | ||
328 | + <item type="name" /> | ||
329 | + </pre> | ||
330 | + <post> | ||
331 | + <item type="text" /> | ||
332 | + </post> | ||
333 | + </token> | ||
334 | + </mapping> | ||
335 | + <mapping type="wordAnnotation" name="feat.persoon"> | ||
336 | + <token type="string" offset="false" parent="false"> | ||
337 | + <pre> | ||
338 | + <item type="name" /> | ||
339 | + </pre> | ||
340 | + <post> | ||
341 | + <item type="text" /> | ||
342 | + </post> | ||
343 | + </token> | ||
344 | + </mapping> | ||
345 | + <mapping type="wordAnnotation" name="feat.ntype"> | ||
346 | + <token type="string" offset="false" parent="false"> | ||
347 | + <pre> | ||
348 | + <item type="name" /> | ||
349 | + </pre> | ||
350 | + <post> | ||
351 | + <item type="text" /> | ||
352 | + </post> | ||
353 | + </token> | ||
354 | + </mapping> | ||
355 | + <mapping type="wordAnnotation" name="feat.pvtijd"> | ||
356 | + <token type="string" offset="false" parent="false"> | ||
357 | + <pre> | ||
358 | + <item type="name" /> | ||
359 | + </pre> | ||
360 | + <post> | ||
361 | + <item type="text" /> | ||
362 | + </post> | ||
363 | + </token> | ||
364 | + </mapping> | ||
365 | + <mapping type="wordAnnotation" name="feat.wvorm"> | ||
366 | + <token type="string" offset="false" parent="false"> | ||
367 | + <pre> | ||
368 | + <item type="name" /> | ||
369 | + </pre> | ||
370 | + <post> | ||
371 | + <item type="text" /> | ||
372 | + </post> | ||
373 | + </token> | ||
374 | + </mapping> | ||
375 | + <mapping type="wordAnnotation" name="feat.numtype"> | ||
376 | + <token type="string" offset="false" parent="false"> | ||
377 | + <pre> | ||
378 | + <item type="name" /> | ||
379 | + </pre> | ||
380 | + <post> | ||
381 | + <item type="text" /> | ||
382 | + </post> | ||
383 | + </token> | ||
384 | + </mapping> | ||
385 | + <mapping type="wordAnnotation" name="feat.vwtype"> | ||
386 | + <token type="string" offset="false" parent="false"> | ||
387 | + <pre> | ||
388 | + <item type="name" /> | ||
389 | + </pre> | ||
390 | + <post> | ||
391 | + <item type="text" /> | ||
392 | + </post> | ||
393 | + </token> | ||
394 | + </mapping> | ||
395 | + <mapping type="wordAnnotation" name="feat.lwtype"> | ||
396 | + <token type="string" offset="false" parent="false"> | ||
397 | + <pre> | ||
398 | + <item type="name" /> | ||
399 | + </pre> | ||
400 | + <post> | ||
401 | + <item type="text" /> | ||
402 | + </post> | ||
403 | + </token> | ||
404 | + </mapping> | ||
405 | + <mapping type="wordAnnotation" name="feat.probleemgeval"> | ||
406 | + <token type="string" offset="false" parent="false"> | ||
407 | + <pre> | ||
408 | + <item type="name" /> | ||
409 | + </pre> | ||
410 | + <post> | ||
411 | + <item type="text" /> | ||
412 | + </post> | ||
413 | + </token> | ||
414 | + </mapping> | ||
415 | + </mappings> | ||
416 | + | ||
417 | + <functions> | ||
418 | + <function name="4" split="+"> | ||
419 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | ||
285 | <condition value="000,001,002,003,004,005,006,009"> | 420 | <condition value="000,001,002,003,004,005,006,009"> |
286 | <output name="pos" value="N" /> | 421 | <output name="pos" value="N" /> |
287 | <output name="feat.getal" value="ev" /> | 422 | <output name="feat.getal" value="ev" /> |
288 | </condition> | 423 | </condition> |
289 | <condition value="010,011,012,013,014,015,016,019"> | 424 | <condition value="010,011,012,013,014,015,016,019"> |
425 | +<<<<<<< HEAD | ||
290 | <output name="pos" value="N" /> | 426 | <output name="pos" value="N" /> |
291 | <output name="feat.getal" value="mv" /> | 427 | <output name="feat.getal" value="mv" /> |
292 | </condition> | 428 | </condition> |
@@ -602,6 +738,167 @@ | @@ -602,6 +738,167 @@ | ||
602 | <output name="feat.form" value="unclear" /> | 738 | <output name="feat.form" value="unclear" /> |
603 | </condition> | 739 | </condition> |
604 | 740 | ||
741 | +======= | ||
742 | + <output name="pos" value="N" /> | ||
743 | + <output name="feat.getal" value="mv" /> | ||
744 | + </condition> | ||
745 | + <condition value="020,021,022,023,024,025,026,029"> | ||
746 | + <output name="pos" value="N" /> | ||
747 | + <output name="feat.ntype" value="eigen" /> | ||
748 | + </condition> | ||
749 | + <condition value="090,091,092,093,094,095,096,099"> | ||
750 | + <output name="pos" value="N" /> | ||
751 | + <output name="feat.probleemgeval" /> | ||
752 | + </condition> | ||
753 | + <condition value="100,101,102,103,104,105,106,109"> | ||
754 | + <output name="pos" value="ADJ" /> | ||
755 | + <output name="feat.getal" value="ev" /> | ||
756 | + </condition> | ||
757 | + <condition value="110,111,112,113,114,115,116,119"> | ||
758 | + <output name="pos" value="ADJ" /> | ||
759 | + <output name="feat.getal" value="mv" /> | ||
760 | + </condition> | ||
761 | + <condition value="190,191,192,193,194,195,196,199"> | ||
762 | + <output name="pos" value="ADJ" /> | ||
763 | + <output name="feat.probleemgeval" /> | ||
764 | + </condition> | ||
765 | + | ||
766 | + | ||
767 | + <condition value="200,201,202,203,204,205,206,209"> | ||
768 | + <output name="pos" value="WW" /> | ||
769 | + <output name="feat.pvtijd" value="tgw" /> | ||
770 | + </condition> | ||
771 | + <condition value="210,211,212,213,214,215,216,219"> | ||
772 | + <output name="pos" value="WW" /> | ||
773 | + <output name="feat.pvtijd" value="tgw" /> | ||
774 | + </condition> | ||
775 | + <condition value="220,221,222,223,224,225,226,229"> | ||
776 | + <output name="pos" value="WW" /> | ||
777 | + <output name="feat.pvtijd" value="verl" /> | ||
778 | + </condition> | ||
779 | + <condition value="230,231,232,233,234,235,236,239"> | ||
780 | + <output name="pos" value="WW" /> | ||
781 | + <output name="feat.pvtijd" value="verl" /> | ||
782 | + </condition> | ||
783 | + <condition value="240,241,242,243,244,245,246,249"> | ||
784 | + <output name="pos" value="WW" /> | ||
785 | + </condition> | ||
786 | + <condition value="250,251,252,253,254,255,256,259"> | ||
787 | + <output name="pos" value="WW" /> | ||
788 | + <output name="feat.wvorm" value="inf" /> | ||
789 | + </condition> <condition value="260,261,262,263,264,265,266,269"> | ||
790 | + <output name="pos" value="WW" /> | ||
791 | + <output name="feat.wvorm" value="inf" /> | ||
792 | + </condition> <condition value="270,271,272,273,274,275,276,279"> | ||
793 | + <output name="pos" value="WW" /> | ||
794 | + </condition> <condition value="280,281,282,283,284,285,286,289"> | ||
795 | + <output name="pos" value="WW" /> | ||
796 | + </condition> | ||
797 | + <condition value="290,291,292,293,294,295,296,299"> | ||
798 | + <output name="pos" value="WW" /> | ||
799 | + <output name="feat.probleemgeval" /> | ||
800 | + </condition> | ||
801 | + | ||
802 | + | ||
803 | + <condition value="300,301,302,303,304,305,306,309"> | ||
804 | + <output name="pos" value="TW" /> | ||
805 | + <output name="feat.numtype" value="hoofd" /> | ||
806 | + </condition> | ||
807 | + <condition value="310,311,312,313,314,315,316,319"> | ||
808 | + <output name="pos" value="TW" /> | ||
809 | + <output name="feat.numtype" value="rang" /> | ||
810 | + </condition> | ||
811 | + <condition value="320,321,322,323,324,325,326,329"> | ||
812 | + <output name="pos" value="TW" /> | ||
813 | + </condition> | ||
814 | + <condition value="390,391,392,393,394,395,396,399"> | ||
815 | + <output name="pos" value="TW" /> | ||
816 | + <output name="feat.probleemgeval" /> | ||
817 | + </condition> | ||
818 | + | ||
819 | + <condition value="401"> | ||
820 | + <output name="pos" value="VNW" /> | ||
821 | + <output name="feat.getal" value="ev" /> | ||
822 | + <output name="feat.persoon" value="1" /> | ||
823 | + </condition> | ||
824 | + <condition value="402"> | ||
825 | + <output name="pos" value="VNW" /> | ||
826 | + <output name="feat.getal" value="ev" /> | ||
827 | + <output name="feat.persoon" value="2" /> | ||
828 | + </condition> | ||
829 | + <condition value="403"> | ||
830 | + <output name="pos" value="VNW" /> | ||
831 | + <output name="feat.getal" value="ev" /> | ||
832 | + <output name="feat.persoon" value="3" /> | ||
833 | + </condition> | ||
834 | + <condition value="404"> | ||
835 | + <output name="pos" value="VNW" /> | ||
836 | + <output name="feat.getal" value="mv" /> | ||
837 | + <output name="feat.persoon" value="1" /> | ||
838 | + </condition> | ||
839 | + <condition value="405"> | ||
840 | + <output name="pos" value="VNW" /> | ||
841 | + <output name="feat.getal" value="mv" /> | ||
842 | + <output name="feat.persoon" value="2" /> | ||
843 | + </condition> | ||
844 | + <condition value="406"> | ||
845 | + <output name="pos" value="VNW" /> | ||
846 | + <output name="feat.getal" value="mv" /> | ||
847 | + <output name="feat.persoon" value="3" /> | ||
848 | + </condition> | ||
849 | + <condition value="409"> | ||
850 | + <output name="pos" value="VNW" /> | ||
851 | + <output name="feat.probleemgeval" /> | ||
852 | + </condition> | ||
853 | + <condition value="410,411,412,413,414,415,416,419"> | ||
854 | + <output name="pos" value="VNW" /> | ||
855 | + <output name="feat.vwtype" value="aanw" /> | ||
856 | + </condition> | ||
857 | + <condition value="420,421,422,423,424,425,426,429"> | ||
858 | + <output name="pos" value="VNW" /> | ||
859 | + <output name="feat.vwtype" value="betr" /> | ||
860 | + </condition> | ||
861 | + <condition value="430,431,432,433,434,435,436,439"> | ||
862 | + <output name="pos" value="VNW" /> | ||
863 | + <output name="feat.vwtype" value="vb" /> | ||
864 | + </condition> | ||
865 | + <condition value="434,441,442,443,444,445,446,449"> | ||
866 | + <output name="pos" value="VNW" /> | ||
867 | + <output name="feat.vwtype" value="vb" /> | ||
868 | + </condition> | ||
869 | + <condition value="440,441,442,443,444,445,446,449"> | ||
870 | + <output name="pos" value="VNW" /> | ||
871 | + <output name="feat.lwtype" value="onbep" /> | ||
872 | + </condition> | ||
873 | + <condition value="450,451,452,453,454,455,456,459"> | ||
874 | + <output name="pos" value="VNW" /> | ||
875 | + <output name="feat.vwtype" value="bez" /> | ||
876 | + </condition> | ||
877 | + | ||
878 | + | ||
879 | + <condition value="001,011,021,091"> | ||
880 | + <output name="feat.form" value="-e" /> | ||
881 | + </condition> | ||
882 | + <condition value="002,012,022,092"> | ||
883 | + <output name="feat.form" value="-s/-th" /> | ||
884 | + </condition> | ||
885 | + <condition value="003,013,023,092"> | ||
886 | + <output name="feat.form" value="-t" /> | ||
887 | + </condition> | ||
888 | + <condition value="004,014,024,092"> | ||
889 | + <output name="feat.form" value="-n" /> | ||
890 | + </condition> | ||
891 | + <condition value="005,015,025,095"> | ||
892 | + <output name="feat.form" value="-r/-re" /> | ||
893 | + </condition> | ||
894 | + <condition value="006,016,026,096"> | ||
895 | + <output name="feat.form" value="-a" /> | ||
896 | + </condition> | ||
897 | + <condition value="009,019,029,099"> | ||
898 | + <output name="feat.form" value="unclear" /> | ||
899 | + </condition> | ||
900 | + | ||
901 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | ||
605 | </function> | 902 | </function> |
606 | </functions> | 903 | </functions> |
607 | 904 |
conf/parser/mtas/folia_ddd.xml
@@ -19,8 +19,12 @@ | @@ -19,8 +19,12 @@ | ||
19 | 19 | ||
20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> | 20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
21 | <autorepair value="true" /> | 21 | <autorepair value="true" /> |
22 | +<<<<<<< HEAD | ||
22 | <makeunique value="true" /> | 23 | <makeunique value="true" /> |
23 | <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | 24 | <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> |
25 | +======= | ||
26 | + <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | ||
27 | +>>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | ||
24 | 28 | ||
25 | <!-- START REFERENCES --> | 29 | <!-- START REFERENCES --> |
26 | <references> | 30 | <references> |
pom.xml
1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
2 | xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | 2 | xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
3 | <properties> | 3 | <properties> |
4 | - <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | 4 | + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
5 | <currentDevelopmentVersion>6.2.0</currentDevelopmentVersion> | 5 | <currentDevelopmentVersion>6.2.0</currentDevelopmentVersion> |
6 | <currentDevelopmentRelease>20160802</currentDevelopmentRelease> | 6 | <currentDevelopmentRelease>20160802</currentDevelopmentRelease> |
7 | </properties> | 7 | </properties> |
@@ -84,7 +84,7 @@ | @@ -84,7 +84,7 @@ | ||
84 | <addMavenDescriptor>false</addMavenDescriptor> | 84 | <addMavenDescriptor>false</addMavenDescriptor> |
85 | </archive> | 85 | </archive> |
86 | </configuration> | 86 | </configuration> |
87 | - </plugin> | 87 | + </plugin> |
88 | </plugins> | 88 | </plugins> |
89 | </build> | 89 | </build> |
90 | <reporting> | 90 | <reporting> |
@@ -194,5 +194,4 @@ | @@ -194,5 +194,4 @@ | ||
194 | </dependency> | 194 | </dependency> |
195 | </dependencies> | 195 | </dependencies> |
196 | </dependencyManagement> | 196 | </dependencyManagement> |
197 | - | ||
198 | </project> | 197 | </project> |
199 | \ No newline at end of file | 198 | \ No newline at end of file |
src/mtas/analysis/MtasTokenizer.java
@@ -92,7 +92,7 @@ public final class MtasTokenizer<T> extends Tokenizer { | @@ -92,7 +92,7 @@ public final class MtasTokenizer<T> extends Tokenizer { | ||
92 | * Signals that an I/O exception has occurred. | 92 | * Signals that an I/O exception has occurred. |
93 | */ | 93 | */ |
94 | public MtasTokenizer(MtasConfiguration config) throws IOException { | 94 | public MtasTokenizer(MtasConfiguration config) throws IOException { |
95 | - processConfiguration(config); | 95 | + processConfiguration(config); |
96 | } | 96 | } |
97 | 97 | ||
98 | /** | 98 | /** |
src/mtas/analysis/parser/MtasSketchParser.java
@@ -41,7 +41,8 @@ final public class MtasSketchParser extends MtasBasicParser { | @@ -41,7 +41,8 @@ final public class MtasSketchParser extends MtasBasicParser { | ||
41 | /** | 41 | /** |
42 | * Instantiates a new mtas sketch parser. | 42 | * Instantiates a new mtas sketch parser. |
43 | * | 43 | * |
44 | - * @param config the config | 44 | + * @param config |
45 | + * the config | ||
45 | */ | 46 | */ |
46 | public MtasSketchParser(MtasConfiguration config) { | 47 | public MtasSketchParser(MtasConfiguration config) { |
47 | super(config); | 48 | super(config); |
@@ -363,7 +364,8 @@ final public class MtasSketchParser extends MtasBasicParser { | @@ -363,7 +364,8 @@ final public class MtasSketchParser extends MtasBasicParser { | ||
363 | /** | 364 | /** |
364 | * Prints the config types. | 365 | * Prints the config types. |
365 | * | 366 | * |
366 | - * @param types the types | 367 | + * @param types |
368 | + * the types | ||
367 | * @return the string | 369 | * @return the string |
368 | */ | 370 | */ |
369 | private String printConfigTypes(HashMap<?, MtasParserType> types) { | 371 | private String printConfigTypes(HashMap<?, MtasParserType> types) { |
@@ -378,15 +380,8 @@ final public class MtasSketchParser extends MtasBasicParser { | @@ -378,15 +380,8 @@ final public class MtasSketchParser extends MtasBasicParser { | ||
378 | return text; | 380 | return text; |
379 | } | 381 | } |
380 | 382 | ||
381 | - /** | ||
382 | - * The Class MtasSketchParserMappingWord. | ||
383 | - */ | ||
384 | private class MtasSketchParserMappingWord | 383 | private class MtasSketchParserMappingWord |
385 | extends MtasParserMapping<MtasSketchParserMappingWord> { | 384 | extends MtasParserMapping<MtasSketchParserMappingWord> { |
386 | - | ||
387 | - /** | ||
388 | - * Instantiates a new mtas sketch parser mapping word. | ||
389 | - */ | ||
390 | public MtasSketchParserMappingWord() { | 385 | public MtasSketchParserMappingWord() { |
391 | super(); | 386 | super(); |
392 | this.position = SOURCE_OWN; | 387 | this.position = SOURCE_OWN; |
@@ -394,18 +389,14 @@ final public class MtasSketchParser extends MtasBasicParser { | @@ -394,18 +389,14 @@ final public class MtasSketchParser extends MtasBasicParser { | ||
394 | this.offset = SOURCE_OWN; | 389 | this.offset = SOURCE_OWN; |
395 | this.type = MAPPING_TYPE_WORD; | 390 | this.type = MAPPING_TYPE_WORD; |
396 | } | 391 | } |
397 | - | ||
398 | - /* | ||
399 | - * (non-Javadoc) | ||
400 | - * | ||
401 | - * @see mtas.analysis.parser.MtasBasicParser.MtasParserMapping#self() | ||
402 | - */ | 392 | + |
403 | @Override | 393 | @Override |
404 | protected MtasSketchParserMappingWord self() { | 394 | protected MtasSketchParserMappingWord self() { |
405 | return this; | 395 | return this; |
406 | } | 396 | } |
407 | } | 397 | } |
408 | 398 | ||
399 | + | ||
409 | /** | 400 | /** |
410 | * The Class MtasSketchParserMappingWordAnnotation. | 401 | * The Class MtasSketchParserMappingWordAnnotation. |
411 | */ | 402 | */ |
src/mtas/analysis/token/MtasTokenCollection.java
1 | package mtas.analysis.token; | 1 | package mtas.analysis.token; |
2 | 2 | ||
3 | +import java.io.IOException; | ||
3 | import java.util.ArrayList; | 4 | import java.util.ArrayList; |
4 | import java.util.Arrays; | 5 | import java.util.Arrays; |
5 | import java.util.Collections; | 6 | import java.util.Collections; |
@@ -95,7 +96,7 @@ public class MtasTokenCollection { | @@ -95,7 +96,7 @@ public class MtasTokenCollection { | ||
95 | Iterator<MtasToken<?>> it = this.iterator(); | 96 | Iterator<MtasToken<?>> it = this.iterator(); |
96 | while (it.hasNext()) { | 97 | while (it.hasNext()) { |
97 | MtasToken<?> token = it.next(); | 98 | MtasToken<?> token = it.next(); |
98 | - System.out.println(token); | 99 | + System.out.println(token); |
99 | } | 100 | } |
100 | } | 101 | } |
101 | 102 | ||
@@ -237,7 +238,7 @@ public class MtasTokenCollection { | @@ -237,7 +238,7 @@ public class MtasTokenCollection { | ||
237 | trash.add(i); | 238 | trash.add(i); |
238 | } else if ((token.getPositionStart() == null) | 239 | } else if ((token.getPositionStart() == null) |
239 | || (token.getPositionEnd() == null)) { | 240 | || (token.getPositionEnd() == null)) { |
240 | - trash.add(i); | 241 | + trash.add(i); |
241 | } else if (token.getValue() == null || (token.getValue().equals(""))) { | 242 | } else if (token.getValue() == null || (token.getValue().equals(""))) { |
242 | trash.add(i); | 243 | trash.add(i); |
243 | } else if (token.getPrefix() == null || (token.getPrefix().equals(""))) { | 244 | } else if (token.getPrefix() == null || (token.getPrefix().equals(""))) { |
src/mtas/codec/MtasCodecPostingsFormat.java
@@ -285,6 +285,10 @@ public class MtasCodecPostingsFormat extends PostingsFormat { | @@ -285,6 +285,10 @@ public class MtasCodecPostingsFormat extends PostingsFormat { | ||
285 | } catch (Exception e) { | 285 | } catch (Exception e) { |
286 | throw new IOException(e.getMessage()); | 286 | throw new IOException(e.getMessage()); |
287 | } | 287 | } |
288 | + Long termRef = inObject.readVLong(); | ||
289 | + inTerm.seek(termRef); | ||
290 | + token.setTermRef(termRef); | ||
291 | + token.setValue(inTerm.readString()); | ||
288 | return token; | 292 | return token; |
289 | } | 293 | } |
290 | 294 |
src/mtas/codec/MtasFieldsConsumer.java
@@ -29,289 +29,21 @@ import org.apache.lucene.index.FieldInfo; | @@ -29,289 +29,21 @@ import org.apache.lucene.index.FieldInfo; | ||
29 | import org.apache.lucene.index.FieldInfos; | 29 | import org.apache.lucene.index.FieldInfos; |
30 | import org.apache.lucene.index.Fields; | 30 | import org.apache.lucene.index.Fields; |
31 | import org.apache.lucene.index.IndexFileNames; | 31 | import org.apache.lucene.index.IndexFileNames; |
32 | +import org.apache.lucene.index.MergeState; | ||
32 | import org.apache.lucene.index.PostingsEnum; | 33 | import org.apache.lucene.index.PostingsEnum; |
33 | import org.apache.lucene.index.SegmentWriteState; | 34 | import org.apache.lucene.index.SegmentWriteState; |
34 | import org.apache.lucene.index.Terms; | 35 | import org.apache.lucene.index.Terms; |
35 | import org.apache.lucene.index.TermsEnum; | 36 | import org.apache.lucene.index.TermsEnum; |
36 | import org.apache.lucene.search.DocIdSetIterator; | 37 | import org.apache.lucene.search.DocIdSetIterator; |
38 | +import org.apache.lucene.store.IOContext; | ||
37 | import org.apache.lucene.store.IndexInput; | 39 | import org.apache.lucene.store.IndexInput; |
38 | import org.apache.lucene.store.IndexOutput; | 40 | import org.apache.lucene.store.IndexOutput; |
41 | +import org.apache.lucene.store.Lock; | ||
39 | import org.apache.lucene.util.BytesRef; | 42 | import org.apache.lucene.util.BytesRef; |
40 | import org.apache.lucene.util.IOUtils; | 43 | import org.apache.lucene.util.IOUtils; |
41 | 44 | ||
42 | /** | 45 | /** |
43 | * The Class MtasFieldsConsumer. | 46 | * The Class MtasFieldsConsumer. |
44 | - * | ||
45 | - * | ||
46 | - * The Class MtasFieldsConsumer constructs several temporal and permanent files | ||
47 | - * to provide a forward index | ||
48 | - * | ||
49 | - * <ul> | ||
50 | - * <li><b>Temporary files</b><br> | ||
51 | - * <ul> | ||
52 | - * <li><b>Temporary file {@link #mtasTmpFieldFileName} with extension | ||
53 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_FIELD_EXTENSION} </b><br> | ||
54 | - * Contains for each field a reference to the list of documents. Structure of | ||
55 | - * content: | ||
56 | - * <ul> | ||
57 | - * <li><b>String</b>: field</li> | ||
58 | - * <li><b>VLong</b>: reference to {@link #mtasDocFileName}</li> | ||
59 | - * <li><b>VInt</b>: number of documents</li> | ||
60 | - * <li><b>VLong</b>: reference to {@link #mtasTermFileName}</li> | ||
61 | - * <li><b>VInt</b>: number of terms</li> | ||
62 | - * <li><b>VLong</b>: reference to {@link #mtasPrefixFileName}</li> | ||
63 | - * <li><b>VInt</b>: number of prefixes</li> | ||
64 | - * </ul> | ||
65 | - * </li> | ||
66 | - * <li><b>Temporary file {@link #mtasTmpObjectFileName} with extension | ||
67 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_OBJECT_EXTENSION}</b><br> | ||
68 | - * Contains for a specific field all objects constructed by | ||
69 | - * {@link createObjectAndRegisterPrefix}. For all fields, the objects are later | ||
70 | - * on copied to {@link #mtasObjectFileName} while statistics are collected. | ||
71 | - * Structure of content identical to {@link #mtasObjectFileName}.</li> | ||
72 | - * <li><b>Temporary file {@link #mtasTmpDocsFileName} with extension | ||
73 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOCS_EXTENSION}</b> <br> | ||
74 | - * Contains for a specific field for each doc multiple fragments. Each occurring | ||
75 | - * term results in a fragment. Structure of content: | ||
76 | - * <ul> | ||
77 | - * <li><b>VInt</b>: docId</li> | ||
78 | - * <li><b>VInt</b>: number of objects in this fragment</li> | ||
79 | - * <li><b>VLong</b>: offset references to {@link #mtasTmpObjectFileName}</li> | ||
80 | - * <li><b>VInt</b>,<b>VLong</b>: mtasId object, reference temporary object in | ||
81 | - * {@link #mtasTmpObjectFileName} minus offset</li> | ||
82 | - * <li><b>VInt</b>,<b>VLong</b>: ...</li> | ||
83 | - * </ul> | ||
84 | - * </li> | ||
85 | - * <li><b>Temporary file {@link #mtasTmpDocsChainedFileName} with extension | ||
86 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOCS_CHAINED_EXTENSION} | ||
87 | - * </b><br> | ||
88 | - * Contains for a specific field for each doc multiple chained fragments. | ||
89 | - * Structure of content: | ||
90 | - * <ul> | ||
91 | - * <li><b>VInt</b>: docId</li> | ||
92 | - * <li><b>VInt</b>: number of objects in this fragment</li> | ||
93 | - * <li><b>VLong</b>: offset references to {@link #mtasTmpObjectFileName}</li> | ||
94 | - * <li><b>VInt</b>,<b>VLong</b>: mtasId object, reference temporary object in | ||
95 | - * {@link #mtasTmpObjectFileName} minus offset</li> | ||
96 | - * <li><b>VInt</b>,<b>VLong</b>: ...</li> | ||
97 | - * <li><b>VLong</b>: reference to next fragment in | ||
98 | - * {@link #mtasTmpDocsChainedFileName}, self reference indicates end of chain | ||
99 | - * </ul> | ||
100 | - * </li> | ||
101 | - * <li><b>Temporary file {@link #mtasTmpDocFileName} with extension | ||
102 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TMP_DOC_EXTENSION}</b><br> | ||
103 | - * For each document | ||
104 | - * <ul> | ||
105 | - * <li><b>VInt</b>: docId</li> | ||
106 | - * <li><b>VLong</b>: reference to {@link #mtasIndexObjectIdFileName}</li> | ||
107 | - * <li><b>VLong</b>: reference first object, used as offset for tree index | ||
108 | - * <li><b>VInt</b>: slope used in approximation reference objects index on id | ||
109 | - * </li> | ||
110 | - * <li><b>ZLong</b>: offset used in approximation reference objects index on id | ||
111 | - * </li> | ||
112 | - * <li><b>Byte</b>: flag indicating how corrections on the approximation | ||
113 | - * references objects for the index on id are stored: | ||
114 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_BYTE}, | ||
115 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_SHORT}, | ||
116 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_INTEGER} or | ||
117 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_LONG}</li> | ||
118 | - * <li><b>VInt</b>: number of objects in this document</li> | ||
119 | - * <li><b>VInt</b>: first position</li> | ||
120 | - * <li><b>VInt</b>: last position</li> | ||
121 | - * </ul> | ||
122 | - * </li> | ||
123 | - * </ul> | ||
124 | - * </li> | ||
125 | - * <li><b>Final files</b><br> | ||
126 | - * <ul> | ||
127 | - * <li><b>File {@link #mtasIndexFieldFileName} with extension | ||
128 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_FIELD_EXTENSION}</b><br> | ||
129 | - * Contains for each field a reference to the list of documents and the | ||
130 | - * prefixes. Structure of content: | ||
131 | - * <ul> | ||
132 | - * <li><b>String</b>: field</li> | ||
133 | - * <li><b>VLong</b>: reference to {@link #mtasDocFileName}</li> | ||
134 | - * <li><b>VLong</b>: reference to {@link #mtasIndexDocIdFileName}</li> | ||
135 | - * <li><b>VInt</b>: number of documents</li> | ||
136 | - * <li><b>VLong</b>: reference to {@link #mtasTermFileName}</li> | ||
137 | - * <li><b>VInt</b>: number of terms</li> | ||
138 | - * <li><b>VLong</b>: reference to {@link #mtasPrefixFileName}</li> | ||
139 | - * <li><b>VInt</b>: number of prefixes</li> | ||
140 | - * </ul> | ||
141 | - * </li> | ||
142 | - * <li><b>File {@link #mtasTermFileName} with extension | ||
143 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_TERM_EXTENSION}</b><br> | ||
144 | - * For each field, all unique terms are stored here. Structure of content: | ||
145 | - * <ul> | ||
146 | - * <li><b>String</b>: term</li> | ||
147 | - * </ul> | ||
148 | - * </li> | ||
149 | - * <li><b>File {@link #mtasPrefixFileName} with extension | ||
150 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_PREFIX_EXTENSION}</b><br> | ||
151 | - * For each field, all unique prefixes are stored here. Structure of content: | ||
152 | - * <ul> | ||
153 | - * <li><b>String</b>: prefix</li> | ||
154 | - * </ul> | ||
155 | - * </li> | ||
156 | - * <li><b>File {@link #mtasObjectFileName} with extension | ||
157 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_OBJECT_EXTENSION}</b><br> | ||
158 | - * Contains all objects for all fields. Structure of content: | ||
159 | - * <ul> | ||
160 | - * <li><b>VInt</b>: mtasId</li> | ||
161 | - * <li><b>VInt</b>: objectFlags | ||
162 | - * <ul> | ||
163 | - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PARENT}</li> | ||
164 | - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE}</li> | ||
165 | - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}</li> | ||
166 | - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_OFFSET}</li> | ||
167 | - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_REALOFFSET}</li> | ||
168 | - * <li>{@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PAYLOAD}</li> | ||
169 | - * </ul> | ||
170 | - * </li> | ||
171 | - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PARENT}<br> | ||
172 | - * <b>VInt</b>: parentId | ||
173 | - * <li>Only if | ||
174 | - * {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE}<br> | ||
175 | - * <b>VInt</b>,<b>VInt</b>: startPosition and (endPosition-startPosition) | ||
176 | - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}<br> | ||
177 | - * <b>VInt</b>,<b>VInt</b>,<b>VInt</b>,...: number of positions, firstPosition, | ||
178 | - * (position-previousPosition),... | ||
179 | - * <li>Only if no {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_RANGE} | ||
180 | - * or {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_POSITION_SET}<br> | ||
181 | - * <b>VInt</b>: position | ||
182 | - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_OFFSET}<br> | ||
183 | - * <b>VInt</b>,<b>VInt</b>: startOffset, (endOffset-startOffset) | ||
184 | - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_REALOFFSET}<br> | ||
185 | - * <b>VInt</b>,<b>VInt</b>: startRealOffset, (endRealOffset-startRealOffset) | ||
186 | - * <li>Only if {@link MtasCodecPostingsFormat#MTAS_OBJECT_HAS_PAYLOAD}<br> | ||
187 | - * <b>VInt</b>,<b>Bytes</b>: number of bytes, payload | ||
188 | - * <li><b>VLong</b>: reference to Term in {@link #mtasTermFileName}</li> | ||
189 | - * </ul> | ||
190 | - * </li> | ||
191 | - * <li><b>File {@link #mtasIndexDocIdFileName} with extension | ||
192 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_DOC_ID_EXTENSION} | ||
193 | - * </b><br> | ||
194 | - * Contains for each field a tree structure {@link MtasTree} to search reference | ||
195 | - * to {@link #mtasDocFileName} by id. Structure of content for each node: | ||
196 | - * <ul> | ||
197 | - * <li><b>VLong</b>: offset references to {@link #mtasIndexDocIdFileName}, only | ||
198 | - * available in root node</li> | ||
199 | - * <li><b>Byte</b>: flag, should be zero for this tree, only available in root | ||
200 | - * node</li> | ||
201 | - * <li><b>VInt</b>: left</li> | ||
202 | - * <li><b>VInt</b>: right</li> | ||
203 | - * <li><b>VInt</b>: max</li> | ||
204 | - * <li><b>VLong</b>: left reference to {@link #mtasIndexDocIdFileName} minus the | ||
205 | - * offset stored in the root node</li> | ||
206 | - * <li><b>VLong</b>: right reference to {@link #mtasIndexDocIdFileName} minus | ||
207 | - * the offset stored in the root node</li> | ||
208 | - * <li><b>VInt</b>: number of objects on this node (always 1 for this tree)</li> | ||
209 | - * <li><b>VLong</b>: reference to {@link #mtasDocFileName} minus offset</li> | ||
210 | - * </ul> | ||
211 | - * </li> | ||
212 | - * <li><b>File {@link #mtasDocFileName} with extension | ||
213 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_DOC_EXTENSION}</b><br> | ||
214 | - * For each document | ||
215 | - * <ul> | ||
216 | - * <li><b>VInt</b>: docId</li> | ||
217 | - * <li><b>VLong</b>: reference to {@link #mtasIndexObjectIdFileName}</li> | ||
218 | - * <li><b>VLong</b>: reference to {@link #mtasIndexObjectPositionFileName}</li> | ||
219 | - * <li><b>VLong</b>: reference to {@link #mtasIndexObjectParentFileName}</li> | ||
220 | - * <li><b>VLong</b>: reference first object, used as offset for tree index | ||
221 | - * <li><b>VInt</b>: slope used in approximation reference objects index on id | ||
222 | - * </li> | ||
223 | - * <li><b>ZLong</b>: offset used in approximation reference objects index on id | ||
224 | - * </li> | ||
225 | - * <li><b>Byte</b>: flag indicating how corrections on the approximation | ||
226 | - * references objects for the index on id are stored: | ||
227 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_BYTE}, | ||
228 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_SHORT}, | ||
229 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_INTEGER} or | ||
230 | - * {@link MtasCodecPostingsFormat#MTAS_STORAGE_LONG}</li> | ||
231 | - * <li><b>VInt</b>: number of objects</li> | ||
232 | - * <li><b>VInt</b>: first position</li> | ||
233 | - * <li><b>VInt</b>: last position</li> | ||
234 | - * </ul> | ||
235 | - * </li> | ||
236 | - * <li><b>File {@link #mtasIndexObjectIdFileName} with extension | ||
237 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_ID_EXTENSION} | ||
238 | - * </b><br> | ||
239 | - * Provides for each mtasId the reference to {@link #mtasObjectFileName}. These | ||
240 | - * references are grouped by document, sorted by mtasId, and because the | ||
241 | - * mtasId's for each document will always start with 0 and are sequential | ||
242 | - * without gaps, a reference can be computed if the position of the first | ||
243 | - * reference for a document is known from {@link #mtasDocFileName}. The | ||
244 | - * reference is approximated by the reference to the first object plus the | ||
245 | - * mtasId times a slope. Only a correction to this approximation is stored. | ||
246 | - * Structure of content: | ||
247 | - * <ul> | ||
248 | - * <li><b>Byte</b>/<b>Short</b>/<b>Int</b>/<b>Long</b>: correction reference to | ||
249 | - * {@link #mtasObjectFileName}</li> | ||
250 | - * </ul> | ||
251 | - * </li> | ||
252 | - * <li><b>File {@link #mtasIndexObjectPositionFileName} with extension | ||
253 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_POSITION_EXTENSION} | ||
254 | - * </b><br> | ||
255 | - * Contains for each document a tree structure {@link MtasTree} to search | ||
256 | - * objects by position. Structure of content for each node: | ||
257 | - * <ul> | ||
258 | - * <li><b>VLong</b>: offset references to | ||
259 | - * {@link #mtasIndexObjectPositionFileName}, only available in root node</li> | ||
260 | - * <li><b>Byte</b>: flag, should be zero for this tree, only available in root | ||
261 | - * node</li> | ||
262 | - * <li><b>VInt</b>: left</li> | ||
263 | - * <li><b>VInt</b>: right</li> | ||
264 | - * <li><b>VInt</b>: max</li> | ||
265 | - * <li><b>VLong</b>: left reference to {@link #mtasIndexObjectPositionFileName} | ||
266 | - * minus the offset stored in the root node</li> | ||
267 | - * <li><b>VLong</b>: right reference to {@link #mtasIndexObjectPositionFileName} | ||
268 | - * minus the offset stored in the root node</li> | ||
269 | - * <li><b>VInt</b>: number of objects on this node</li> | ||
270 | - * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>: set of the first reference to | ||
271 | - * {@link #mtasObjectFileName} minus offset, the prefixId referring to the | ||
272 | - * position the prefix in {@link #mtasPrefixFileName} and the reference to | ||
273 | - * {@link #mtasTermFileName} minus offset</li> | ||
274 | - * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>,...: for optional other sets of | ||
275 | - * reference to {@link #mtasObjectFileName}, position of the prefix in | ||
276 | - * {@link #mtasPrefixFileName} and the reference to {@link #mtasTermFileName}; | ||
277 | - * for the first item the difference between this reference minus the previous | ||
278 | - * reference is stored</li> | ||
279 | - * </ul> | ||
280 | - * </li> | ||
281 | - * <li><b>File {@link #mtasIndexObjectParentFileName} with extension | ||
282 | - * {@value mtas.codec.MtasCodecPostingsFormat#MTAS_INDEX_OBJECT_PARENT_EXTENSION} | ||
283 | - * </b><br> | ||
284 | - * Contains for each document a tree structure {@link MtasTree} to search | ||
285 | - * objects by parent. Structure of content for each node: | ||
286 | - * <ul> | ||
287 | - * <li><b>VLong</b>: offset references to {@link #mtasIndexObjectParentFileName} | ||
288 | - * , only available in root node</li> | ||
289 | - * <li><b>Byte</b>: flag, for this tree equal to | ||
290 | - * {@link mtas.codec.tree.MtasTree#SINGLE_POSITION_TREE} indicating a tree with | ||
291 | - * exactly one point at each node, only available in root node</li> | ||
292 | - * <li><b>VInt</b>: left</li> | ||
293 | - * <li><b>VInt</b>: right</li> | ||
294 | - * <li><b>VInt</b>: max</li> | ||
295 | - * <li><b>VLong</b>: left reference to {@link #mtasIndexObjectParentFileName} | ||
296 | - * minus the offset stored in the root node</li> | ||
297 | - * <li><b>VLong</b>: right reference to {@link #mtasIndexObjectParentFileName} | ||
298 | - * minus the offset stored in the root node</li> | ||
299 | - * <li><b>VInt</b>: number of objects on this node</li> | ||
300 | - * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>: set of the first reference to | ||
301 | - * {@link #mtasObjectFileName} minus offset, the prefixId referring to the | ||
302 | - * position the prefix in {@link #mtasPrefixFileName} and the reference to | ||
303 | - * {@link #mtasTermFileName} minus offset</li> | ||
304 | - * <li><b>VLong</b>,<b>VInt</b>,<b>VLong</b>,...: for optional other sets of | ||
305 | - * reference to {@link #mtasObjectFileName}, position of the prefix in | ||
306 | - * {@link #mtasPrefixFileName} and the reference to {@link #mtasTermFileName}; | ||
307 | - * for the first item the difference between this reference minus the previous | ||
308 | - * reference is stored</li> | ||
309 | - * </ul> | ||
310 | - * </li> | ||
311 | - * </ul> | ||
312 | - * </li> | ||
313 | - * </ul> | ||
314 | - * | ||
315 | */ | 47 | */ |
316 | 48 | ||
317 | public class MtasFieldsConsumer extends FieldsConsumer { | 49 | public class MtasFieldsConsumer extends FieldsConsumer { |
@@ -362,14 +94,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -362,14 +94,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
362 | /** | 94 | /** |
363 | * Instantiates a new mtas fields consumer. | 95 | * Instantiates a new mtas fields consumer. |
364 | * | 96 | * |
365 | - * @param fieldsConsumer | ||
366 | - * the fields consumer | ||
367 | - * @param state | ||
368 | - * the state | ||
369 | - * @param name | ||
370 | - * the name | ||
371 | - * @param delegatePostingsFormatName | ||
372 | - * the delegate postings format name | 97 | + * @param fieldsConsumer the fields consumer |
98 | + * @param state the state | ||
99 | + * @param name the name | ||
100 | + * @param delegatePostingsFormatName the delegate postings format name | ||
373 | */ | 101 | */ |
374 | public MtasFieldsConsumer(FieldsConsumer fieldsConsumer, | 102 | public MtasFieldsConsumer(FieldsConsumer fieldsConsumer, |
375 | SegmentWriteState state, String name, String delegatePostingsFormatName) { | 103 | SegmentWriteState state, String name, String delegatePostingsFormatName) { |
@@ -426,14 +154,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -426,14 +154,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
426 | /** | 154 | /** |
427 | * Register prefix. | 155 | * Register prefix. |
428 | * | 156 | * |
429 | - * @param field | ||
430 | - * the field | ||
431 | - * @param prefix | ||
432 | - * the prefix | ||
433 | - * @param outPrefix | ||
434 | - * the out prefix | ||
435 | - * @throws IOException | ||
436 | - * Signals that an I/O exception has occurred. | 157 | + * @param field the field |
158 | + * @param prefix the prefix | ||
159 | + * @param outPrefix the out prefix | ||
160 | + * @throws IOException Signals that an I/O exception has occurred. | ||
437 | */ | 161 | */ |
438 | private void registerPrefix(String field, String prefix, | 162 | private void registerPrefix(String field, String prefix, |
439 | IndexOutput outPrefix) throws IOException { | 163 | IndexOutput outPrefix) throws IOException { |
@@ -452,14 +176,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -452,14 +176,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
452 | /** | 176 | /** |
453 | * Register prefix stats single position value. | 177 | * Register prefix stats single position value. |
454 | * | 178 | * |
455 | - * @param field | ||
456 | - * the field | ||
457 | - * @param value | ||
458 | - * the value | ||
459 | - * @param outPrefix | ||
460 | - * the out prefix | ||
461 | - * @throws IOException | ||
462 | - * Signals that an I/O exception has occurred. | 179 | + * @param field the field |
180 | + * @param value the value | ||
181 | + * @param outPrefix the out prefix | ||
182 | + * @throws IOException Signals that an I/O exception has occurred. | ||
463 | */ | 183 | */ |
464 | public void registerPrefixStatsSinglePositionValue(String field, String value, | 184 | public void registerPrefixStatsSinglePositionValue(String field, String value, |
465 | IndexOutput outPrefix) throws IOException { | 185 | IndexOutput outPrefix) throws IOException { |
@@ -474,14 +194,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -474,14 +194,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
474 | /** | 194 | /** |
475 | * Register prefix stats range position value. | 195 | * Register prefix stats range position value. |
476 | * | 196 | * |
477 | - * @param field | ||
478 | - * the field | ||
479 | - * @param value | ||
480 | - * the value | ||
481 | - * @param outPrefix | ||
482 | - * the out prefix | ||
483 | - * @throws IOException | ||
484 | - * Signals that an I/O exception has occurred. | 197 | + * @param field the field |
198 | + * @param value the value | ||
199 | + * @param outPrefix the out prefix | ||
200 | + * @throws IOException Signals that an I/O exception has occurred. | ||
485 | */ | 201 | */ |
486 | public void registerPrefixStatsRangePositionValue(String field, String value, | 202 | public void registerPrefixStatsRangePositionValue(String field, String value, |
487 | IndexOutput outPrefix) throws IOException { | 203 | IndexOutput outPrefix) throws IOException { |
@@ -495,14 +211,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -495,14 +211,10 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
495 | /** | 211 | /** |
496 | * Register prefix stats set position value. | 212 | * Register prefix stats set position value. |
497 | * | 213 | * |
498 | - * @param field | ||
499 | - * the field | ||
500 | - * @param value | ||
501 | - * the value | ||
502 | - * @param outPrefix | ||
503 | - * the out prefix | ||
504 | - * @throws IOException | ||
505 | - * Signals that an I/O exception has occurred. | 214 | + * @param field the field |
215 | + * @param value the value | ||
216 | + * @param outPrefix the out prefix | ||
217 | + * @throws IOException Signals that an I/O exception has occurred. | ||
506 | */ | 218 | */ |
507 | public void registerPrefixStatsSetPositionValue(String field, String value, | 219 | public void registerPrefixStatsSetPositionValue(String field, String value, |
508 | IndexOutput outPrefix) throws IOException { | 220 | IndexOutput outPrefix) throws IOException { |
@@ -517,8 +229,7 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -517,8 +229,7 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
517 | /** | 229 | /** |
518 | * Inits the prefix stats field. | 230 | * Inits the prefix stats field. |
519 | * | 231 | * |
520 | - * @param field | ||
521 | - * the field | 232 | + * @param field the field |
522 | */ | 233 | */ |
523 | private void initPrefixStatsField(String field) { | 234 | private void initPrefixStatsField(String field) { |
524 | if (!singlePositionPrefix.containsKey(field)) { | 235 | if (!singlePositionPrefix.containsKey(field)) { |
@@ -535,8 +246,7 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -535,8 +246,7 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
535 | /** | 246 | /** |
536 | * Gets the prefix stats single position prefix attribute. | 247 | * Gets the prefix stats single position prefix attribute. |
537 | * | 248 | * |
538 | - * @param field | ||
539 | - * the field | 249 | + * @param field the field |
540 | * @return the prefix stats single position prefix attribute | 250 | * @return the prefix stats single position prefix attribute |
541 | */ | 251 | */ |
542 | public String getPrefixStatsSinglePositionPrefixAttribute(String field) { | 252 | public String getPrefixStatsSinglePositionPrefixAttribute(String field) { |
@@ -547,8 +257,7 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -547,8 +257,7 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
547 | /** | 257 | /** |
548 | * Gets the prefix stats multiple position prefix attribute. | 258 | * Gets the prefix stats multiple position prefix attribute. |
549 | * | 259 | * |
550 | - * @param field | ||
551 | - * the field | 260 | + * @param field the field |
552 | * @return the prefix stats multiple position prefix attribute | 261 | * @return the prefix stats multiple position prefix attribute |
553 | */ | 262 | */ |
554 | public String getPrefixStatsMultiplePositionPrefixAttribute(String field) { | 263 | public String getPrefixStatsMultiplePositionPrefixAttribute(String field) { |
@@ -559,8 +268,7 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -559,8 +268,7 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
559 | /** | 268 | /** |
560 | * Gets the prefix stats set position prefix attribute. | 269 | * Gets the prefix stats set position prefix attribute. |
561 | * | 270 | * |
562 | - * @param field | ||
563 | - * the field | 271 | + * @param field the field |
564 | * @return the prefix stats set position prefix attribute | 272 | * @return the prefix stats set position prefix attribute |
565 | */ | 273 | */ |
566 | public String getPrefixStatsSetPositionPrefixAttribute(String field) { | 274 | public String getPrefixStatsSetPositionPrefixAttribute(String field) { |
@@ -585,6 +293,14 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -585,6 +293,14 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
585 | return text; | 293 | return text; |
586 | } | 294 | } |
587 | 295 | ||
296 | + /* (non-Javadoc) | ||
297 | + * @see org.apache.lucene.codecs.FieldsConsumer#merge(org.apache.lucene.index.MergeState) | ||
298 | + */ | ||
299 | + @Override | ||
300 | + public void merge(MergeState mergeState) throws IOException { | ||
301 | + delegateFieldsConsumer.merge(mergeState); | ||
302 | + } | ||
303 | + | ||
588 | /* | 304 | /* |
589 | * (non-Javadoc) | 305 | * (non-Javadoc) |
590 | * | 306 | * |
@@ -600,12 +316,9 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -600,12 +316,9 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
600 | /** | 316 | /** |
601 | * Write. | 317 | * Write. |
602 | * | 318 | * |
603 | - * @param fieldInfos | ||
604 | - * the field infos | ||
605 | - * @param fields | ||
606 | - * the fields | ||
607 | - * @throws IOException | ||
608 | - * Signals that an I/O exception has occurred. | 319 | + * @param fieldInfos the field infos |
320 | + * @param fields the fields | ||
321 | + * @throws IOException Signals that an I/O exception has occurred. | ||
609 | */ | 322 | */ |
610 | private void write(FieldInfos fieldInfos, Fields fields) { | 323 | private void write(FieldInfos fieldInfos, Fields fields) { |
611 | IndexOutput outField, outDoc, outIndexDocId, outIndexObjectId, | 324 | IndexOutput outField, outDoc, outIndexDocId, outIndexObjectId, |
@@ -1258,27 +971,17 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -1258,27 +971,17 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
1258 | /** | 971 | /** |
1259 | * Creates the object and register prefix. | 972 | * Creates the object and register prefix. |
1260 | * | 973 | * |
1261 | - * @param field | ||
1262 | - * the field | ||
1263 | - * @param out | ||
1264 | - * the out | ||
1265 | - * @param term | ||
1266 | - * the term | ||
1267 | - * @param termRef | ||
1268 | - * the term ref | ||
1269 | - * @param startPosition | ||
1270 | - * the start position | ||
1271 | - * @param payload | ||
1272 | - * the payload | ||
1273 | - * @param startOffset | ||
1274 | - * the start offset | ||
1275 | - * @param endOffset | ||
1276 | - * the end offset | ||
1277 | - * @param outPrefix | ||
1278 | - * the out prefix | 974 | + * @param field the field |
975 | + * @param out the out | ||
976 | + * @param term the term | ||
977 | + * @param termRef the term ref | ||
978 | + * @param startPosition the start position | ||
979 | + * @param payload the payload | ||
980 | + * @param startOffset the start offset | ||
981 | + * @param endOffset the end offset | ||
982 | + * @param outPrefix the out prefix | ||
1279 | * @return the integer | 983 | * @return the integer |
1280 | - * @throws IOException | ||
1281 | - * Signals that an I/O exception has occurred. | 984 | + * @throws IOException Signals that an I/O exception has occurred. |
1282 | */ | 985 | */ |
1283 | private Integer createObjectAndRegisterPrefix(String field, IndexOutput out, | 986 | private Integer createObjectAndRegisterPrefix(String field, IndexOutput out, |
1284 | BytesRef term, Long termRef, int startPosition, BytesRef payload, | 987 | BytesRef term, Long termRef, int startPosition, BytesRef payload, |
@@ -1392,15 +1095,11 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -1392,15 +1095,11 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
1392 | /** | 1095 | /** |
1393 | * Store tree. | 1096 | * Store tree. |
1394 | * | 1097 | * |
1395 | - * @param tree | ||
1396 | - * the tree | ||
1397 | - * @param out | ||
1398 | - * the out | ||
1399 | - * @param refApproxOffset | ||
1400 | - * the ref approx offset | 1098 | + * @param tree the tree |
1099 | + * @param out the out | ||
1100 | + * @param refApproxOffset the ref approx offset | ||
1401 | * @return the long | 1101 | * @return the long |
1402 | - * @throws IOException | ||
1403 | - * Signals that an I/O exception has occurred. | 1102 | + * @throws IOException Signals that an I/O exception has occurred. |
1404 | */ | 1103 | */ |
1405 | private Long storeTree(MtasTree<?> tree, IndexOutput out, | 1104 | private Long storeTree(MtasTree<?> tree, IndexOutput out, |
1406 | long refApproxOffset) throws IOException { | 1105 | long refApproxOffset) throws IOException { |
@@ -1411,21 +1110,14 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -1411,21 +1110,14 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
1411 | /** | 1110 | /** |
1412 | * Store tree. | 1111 | * Store tree. |
1413 | * | 1112 | * |
1414 | - * @param node | ||
1415 | - * the node | ||
1416 | - * @param isSinglePoint | ||
1417 | - * the is single point | ||
1418 | - * @param storeAdditionalInformation | ||
1419 | - * the store additional information | ||
1420 | - * @param out | ||
1421 | - * the out | ||
1422 | - * @param nodeRefApproxOffset | ||
1423 | - * the node ref approx offset | ||
1424 | - * @param refApproxOffset | ||
1425 | - * the ref approx offset | 1113 | + * @param node the node |
1114 | + * @param isSinglePoint the is single point | ||
1115 | + * @param storeAdditionalInformation the store additional information | ||
1116 | + * @param out the out | ||
1117 | + * @param nodeRefApproxOffset the node ref approx offset | ||
1118 | + * @param refApproxOffset the ref approx offset | ||
1426 | * @return the long | 1119 | * @return the long |
1427 | - * @throws IOException | ||
1428 | - * Signals that an I/O exception has occurred. | 1120 | + * @throws IOException Signals that an I/O exception has occurred. |
1429 | */ | 1121 | */ |
1430 | private Long storeTree(MtasTreeNode<?> node, boolean isSinglePoint, | 1122 | private Long storeTree(MtasTreeNode<?> node, boolean isSinglePoint, |
1431 | boolean storeAdditionalInformation, IndexOutput out, | 1123 | boolean storeAdditionalInformation, IndexOutput out, |
@@ -1510,10 +1202,8 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -1510,10 +1202,8 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
1510 | /** | 1202 | /** |
1511 | * Token stats add. | 1203 | * Token stats add. |
1512 | * | 1204 | * |
1513 | - * @param min | ||
1514 | - * the min | ||
1515 | - * @param max | ||
1516 | - * the max | 1205 | + * @param min the min |
1206 | + * @param max the max | ||
1517 | */ | 1207 | */ |
1518 | private void tokenStatsAdd(int min, int max) { | 1208 | private void tokenStatsAdd(int min, int max) { |
1519 | tokenStatsNumber++; | 1209 | tokenStatsNumber++; |
@@ -1532,16 +1222,11 @@ public class MtasFieldsConsumer extends FieldsConsumer { | @@ -1532,16 +1222,11 @@ public class MtasFieldsConsumer extends FieldsConsumer { | ||
1532 | /** | 1222 | /** |
1533 | * Copy object and update stats. | 1223 | * Copy object and update stats. |
1534 | * | 1224 | * |
1535 | - * @param id | ||
1536 | - * the id | ||
1537 | - * @param in | ||
1538 | - * the in | ||
1539 | - * @param inRef | ||
1540 | - * the in ref | ||
1541 | - * @param out | ||
1542 | - * the out | ||
1543 | - * @throws IOException | ||
1544 | - * Signals that an I/O exception has occurred. | 1225 | + * @param id the id |
1226 | + * @param in the in | ||
1227 | + * @param inRef the in ref | ||
1228 | + * @param out the out | ||
1229 | + * @throws IOException Signals that an I/O exception has occurred. | ||
1545 | */ | 1230 | */ |
1546 | private void copyObjectAndUpdateStats(int id, IndexInput in, Long inRef, | 1231 | private void copyObjectAndUpdateStats(int id, IndexInput in, Long inRef, |
1547 | IndexOutput out) throws IOException { | 1232 | IndexOutput out) throws IOException { |
src/mtas/codec/util/CodecCollector.java
@@ -2138,8 +2138,9 @@ public class CodecCollector { | @@ -2138,8 +2138,9 @@ public class CodecCollector { | ||
2138 | .checkExistenceNecessaryKeys()) { | 2138 | .checkExistenceNecessaryKeys()) { |
2139 | needSecondRound = true; | 2139 | needSecondRound = true; |
2140 | } | 2140 | } |
2141 | - } | ||
2142 | - } | 2141 | + termVector.subComponentFunction.dataCollector.reduceToSegmentKeys(); |
2142 | + } | ||
2143 | + } | ||
2143 | return needSecondRound; | 2144 | return needSecondRound; |
2144 | } | 2145 | } |
2145 | 2146 |
src/mtas/codec/util/CodecComponent.java
@@ -23,7 +23,10 @@ import mtas.parser.function.util.MtasFunctionParserFunction; | @@ -23,7 +23,10 @@ import mtas.parser.function.util.MtasFunctionParserFunction; | ||
23 | import mtas.parser.function.util.MtasFunctionParserFunctionDefault; | 23 | import mtas.parser.function.util.MtasFunctionParserFunctionDefault; |
24 | import org.apache.commons.lang.ArrayUtils; | 24 | import org.apache.commons.lang.ArrayUtils; |
25 | import org.apache.lucene.search.spans.SpanQuery; | 25 | import org.apache.lucene.search.spans.SpanQuery; |
26 | +import org.apache.lucene.util.automaton.Automata; | ||
27 | +import org.apache.lucene.util.automaton.Automaton; | ||
26 | import org.apache.lucene.util.automaton.CompiledAutomaton; | 28 | import org.apache.lucene.util.automaton.CompiledAutomaton; |
29 | +import org.apache.lucene.util.automaton.Operations; | ||
27 | import org.apache.lucene.util.automaton.RegExp; | 30 | import org.apache.lucene.util.automaton.RegExp; |
28 | 31 | ||
29 | /** | 32 | /** |
src/mtas/codec/util/CodecInfo.java
@@ -274,7 +274,7 @@ public class CodecInfo { | @@ -274,7 +274,7 @@ public class CodecInfo { | ||
274 | IndexInput inTerm = indexInputList.get("term"); | 274 | IndexInput inTerm = indexInputList.get("term"); |
275 | for (MtasTreeHit<?> hit : hits) { | 275 | for (MtasTreeHit<?> hit : hits) { |
276 | MtasToken<String> token = MtasCodecPostingsFormat.getToken(inObject, | 276 | MtasToken<String> token = MtasCodecPostingsFormat.getToken(inObject, |
277 | - inTerm, hit.ref); | 277 | + inTerm, hit.ref); |
278 | if (token != null) { | 278 | if (token != null) { |
279 | if (prefixes.size() > 0) { | 279 | if (prefixes.size() > 0) { |
280 | if (prefixes.contains(token.getPrefix())) { | 280 | if (prefixes.contains(token.getPrefix())) { |
src/mtas/codec/util/DataCollector.java
src/mtas/codec/util/collector/MtasDataAdvanced.java
@@ -201,6 +201,32 @@ abstract class MtasDataAdvanced<T1 extends Number & Comparable<T1>, T2 extends N | @@ -201,6 +201,32 @@ abstract class MtasDataAdvanced<T1 extends Number & Comparable<T1>, T2 extends N | ||
201 | tmpOldSize); | 201 | tmpOldSize); |
202 | } | 202 | } |
203 | 203 | ||
204 | + public void reduceToSegmentKeys() { | ||
205 | + if(segmentRegistration != null) { | ||
206 | + int sizeCopy = size; | ||
207 | + String[] keyListCopy = keyList.clone(); | ||
208 | + T1[] advancedValueSumListCopy = advancedValueSumList.clone(); | ||
209 | + T1[] advancedValueMaxListCopy = advancedValueMaxList.clone(); | ||
210 | + T1[] advancedValueMinListCopy = advancedValueMinList.clone(); | ||
211 | + T1[] advancedValueSumOfSquaresListCopy = advancedValueSumOfSquaresList.clone(); | ||
212 | + T2[] advancedValueSumOfLogsListCopy = advancedValueSumOfLogsList.clone(); | ||
213 | + long[] advancedValueNListCopy = advancedValueNList.clone(); | ||
214 | + size = 0; | ||
215 | + for(int i=0; i< sizeCopy; i++) { | ||
216 | + if(segmentKeys.contains(keyListCopy[i])) { | ||
217 | + keyList[size] = keyListCopy[i]; | ||
218 | + advancedValueSumList[size] = advancedValueSumListCopy[i]; | ||
219 | + advancedValueMaxList[size] = advancedValueMaxListCopy[i]; | ||
220 | + advancedValueMinList[size] = advancedValueMinListCopy[i]; | ||
221 | + advancedValueSumOfSquaresList[size] = advancedValueSumOfSquaresListCopy[i]; | ||
222 | + advancedValueSumOfLogsList[size] = advancedValueSumOfLogsListCopy[i]; | ||
223 | + advancedValueNList[size] = advancedValueNListCopy[i]; | ||
224 | + size++; | ||
225 | + } | ||
226 | + } | ||
227 | + } | ||
228 | + } | ||
229 | + | ||
204 | /* | 230 | /* |
205 | * (non-Javadoc) | 231 | * (non-Javadoc) |
206 | * | 232 | * |
src/mtas/codec/util/collector/MtasDataBasic.java
@@ -228,6 +228,24 @@ abstract class MtasDataBasic<T1 extends Number & Comparable<T1>, T2 extends Numb | @@ -228,6 +228,24 @@ abstract class MtasDataBasic<T1 extends Number & Comparable<T1>, T2 extends Numb | ||
228 | tmpOldSize); | 228 | tmpOldSize); |
229 | } | 229 | } |
230 | 230 | ||
231 | + public void reduceToSegmentKeys() { | ||
232 | + if (segmentRegistration != null) { | ||
233 | + int sizeCopy = size; | ||
234 | + String[] keyListCopy = keyList.clone(); | ||
235 | + T1[] basicValueSumListCopy = basicValueSumList.clone(); | ||
236 | + long[] basicValueNListCopy = basicValueNList.clone(); | ||
237 | + size = 0; | ||
238 | + for (int i = 0; i < sizeCopy; i++) { | ||
239 | + if (segmentKeys.contains(keyListCopy[i])) { | ||
240 | + keyList[size] = keyListCopy[i]; | ||
241 | + basicValueSumList[size] = basicValueSumListCopy[i]; | ||
242 | + basicValueNList[size] = basicValueNListCopy[i]; | ||
243 | + size++; | ||
244 | + } | ||
245 | + } | ||
246 | + } | ||
247 | + } | ||
248 | + | ||
231 | /* | 249 | /* |
232 | * (non-Javadoc) | 250 | * (non-Javadoc) |
233 | * | 251 | * |
src/mtas/codec/util/collector/MtasDataCollector.java
@@ -891,7 +891,7 @@ public abstract class MtasDataCollector<T1 extends Number & Comparable<T1>, T2 e | @@ -891,7 +891,7 @@ public abstract class MtasDataCollector<T1 extends Number & Comparable<T1>, T2 e | ||
891 | /** | 891 | /** |
892 | * Reduce to segment keys. | 892 | * Reduce to segment keys. |
893 | */ | 893 | */ |
894 | - public final void reduceToSegmentKeys() { | 894 | + public void reduceToSegmentKeys() { |
895 | if (segmentRegistration != null) { | 895 | if (segmentRegistration != null) { |
896 | reduceToKeys(segmentKeys); | 896 | reduceToKeys(segmentKeys); |
897 | } | 897 | } |
src/mtas/codec/util/collector/MtasDataFull.java
@@ -158,6 +158,22 @@ abstract class MtasDataFull<T1 extends Number & Comparable<T1>, T2 extends Numbe | @@ -158,6 +158,22 @@ abstract class MtasDataFull<T1 extends Number & Comparable<T1>, T2 extends Numbe | ||
158 | System.arraycopy(tmpNewFullValueList, 0, newFullValueList, 0, tmpOldSize); | 158 | System.arraycopy(tmpNewFullValueList, 0, newFullValueList, 0, tmpOldSize); |
159 | } | 159 | } |
160 | 160 | ||
161 | + public void reduceToSegmentKeys() { | ||
162 | + if(segmentRegistration != null) { | ||
163 | + int sizeCopy = size; | ||
164 | + String[] keyListCopy = keyList.clone(); | ||
165 | + T1[][] fullValueListCopy = fullValueList.clone(); | ||
166 | + size = 0; | ||
167 | + for(int i=0; i< sizeCopy; i++) { | ||
168 | + if(segmentKeys.contains(keyListCopy[i])) { | ||
169 | + keyList[size] = keyListCopy[i]; | ||
170 | + fullValueList[size] = fullValueListCopy[i]; | ||
171 | + size++; | ||
172 | + } | ||
173 | + } | ||
174 | + } | ||
175 | + } | ||
176 | + | ||
161 | /* | 177 | /* |
162 | * (non-Javadoc) | 178 | * (non-Javadoc) |
163 | * | 179 | * |
src/mtas/codec/util/collector/MtasDataItem.java
@@ -38,7 +38,7 @@ public abstract class MtasDataItem<T1 extends Number & Comparable<T1>, T2 extend | @@ -38,7 +38,7 @@ public abstract class MtasDataItem<T1 extends Number & Comparable<T1>, T2 extend | ||
38 | 38 | ||
39 | /** The error list. */ | 39 | /** The error list. */ |
40 | protected HashMap<String, Integer> errorList; | 40 | protected HashMap<String, Integer> errorList; |
41 | - | 41 | + |
42 | /** The comparable sort value. */ | 42 | /** The comparable sort value. */ |
43 | protected NumberComparator<?> comparableSortValue; | 43 | protected NumberComparator<?> comparableSortValue; |
44 | 44 | ||
@@ -101,7 +101,7 @@ public abstract class MtasDataItem<T1 extends Number & Comparable<T1>, T2 extend | @@ -101,7 +101,7 @@ public abstract class MtasDataItem<T1 extends Number & Comparable<T1>, T2 extend | ||
101 | */ | 101 | */ |
102 | public abstract Map<String, Object> rewrite(boolean showDebugInfo) | 102 | public abstract Map<String, Object> rewrite(boolean showDebugInfo) |
103 | throws IOException; | 103 | throws IOException; |
104 | - | 104 | + |
105 | /** | 105 | /** |
106 | * Gets the sub. | 106 | * Gets the sub. |
107 | * | 107 | * |
src/mtas/parser/cql/util/MtasCQLParserSentenceCondition.java
@@ -47,7 +47,7 @@ public class MtasCQLParserSentenceCondition { | @@ -47,7 +47,7 @@ public class MtasCQLParserSentenceCondition { | ||
47 | public MtasCQLParserSentenceCondition(MtasCQLParserBasicSentenceCondition s) | 47 | public MtasCQLParserSentenceCondition(MtasCQLParserBasicSentenceCondition s) |
48 | throws ParseException { | 48 | throws ParseException { |
49 | sequenceList = new ArrayList<List<MtasCQLParserSentenceCondition>>(); | 49 | sequenceList = new ArrayList<List<MtasCQLParserSentenceCondition>>(); |
50 | - basicSentence = s; | 50 | + basicSentence = s; |
51 | minimumOccurence = 1; | 51 | minimumOccurence = 1; |
52 | maximumOccurence = 1; | 52 | maximumOccurence = 1; |
53 | simplified = false; | 53 | simplified = false; |
src/mtas/solr/handler/component/MtasSolrSearchComponent.java
@@ -436,8 +436,8 @@ public class MtasSolrSearchComponent extends SearchComponent { | @@ -436,8 +436,8 @@ public class MtasSolrSearchComponent extends SearchComponent { | ||
436 | * (non-Javadoc) | 436 | * (non-Javadoc) |
437 | * | 437 | * |
438 | * @see | 438 | * @see |
439 | - * org.apache.solr.handler.component.SearchComponent#finishStage(org.apache. | ||
440 | - * solr.handler.component.ResponseBuilder) | 439 | + * org.apache.solr.handler.component.SearchComponent#distributedProcess(org. |
440 | + * apache.solr.handler.component.ResponseBuilder) | ||
441 | */ | 441 | */ |
442 | @Override | 442 | @Override |
443 | public void finishStage(ResponseBuilder rb) { | 443 | public void finishStage(ResponseBuilder rb) { |
@@ -549,7 +549,6 @@ public class MtasSolrSearchComponent extends SearchComponent { | @@ -549,7 +549,6 @@ public class MtasSolrSearchComponent extends SearchComponent { | ||
549 | return STAGE_GROUP; | 549 | return STAGE_GROUP; |
550 | } | 550 | } |
551 | } | 551 | } |
552 | - | ||
553 | } | 552 | } |
554 | return ResponseBuilder.STAGE_DONE; | 553 | return ResponseBuilder.STAGE_DONE; |
555 | } | 554 | } |
@@ -562,15 +561,9 @@ public class MtasSolrSearchComponent extends SearchComponent { | @@ -562,15 +561,9 @@ public class MtasSolrSearchComponent extends SearchComponent { | ||
562 | * @return the mtas fields | 561 | * @return the mtas fields |
563 | */ | 562 | */ |
564 | 563 | ||
565 | - /** | ||
566 | - * Gets the mtas fields. | ||
567 | - * | ||
568 | - * @param rb | ||
569 | - * the rb | ||
570 | - * @return the mtas fields | ||
571 | - */ | ||
572 | private ComponentFields getMtasFields(ResponseBuilder rb) { | 564 | private ComponentFields getMtasFields(ResponseBuilder rb) { |
573 | return (ComponentFields) rb.req.getContext().get(ComponentFields.class); | 565 | return (ComponentFields) rb.req.getContext().get(ComponentFields.class); |
574 | } | 566 | } |
575 | 567 | ||
568 | + | ||
576 | } | 569 | } |
src/site/markdown/download.md.vm
@@ -13,10 +13,10 @@ | @@ -13,10 +13,10 @@ | ||
13 | <tr> | 13 | <tr> |
14 | <td>$context.get("currentDevelopmentVersion")</td> | 14 | <td>$context.get("currentDevelopmentVersion")</td> |
15 | <td>$context.get("currentDevelopmentRelease")</td> | 15 | <td>$context.get("currentDevelopmentRelease")</td> |
16 | - <td><a href='https://github.com/meertensinstituut/mtas/releases/download/${currentDevelopmentRelease}/mtas-${currentDevelopmentVersion}.jar'>Binary (jar)</a></td> | ||
17 | - <td><a href='https://github.com/meertensinstituut/mtas/archive/${currentDevelopmentRelease}.tar.gz'>Source (tgz)</a></td> | ||
18 | - <td><a href='https://github.com/meertensinstituut/mtas/archive/${currentDevelopmentRelease}.zip'>Source (zip)</a></td> | 16 | + <td><a href='https://github.com/matthijsbrouwer/mtas/releases/download/${currentDevelopmentRelease}/mtas-${currentDevelopmentVersion}.jar'>Binary (jar)</a></td> |
17 | + <td><a href='https://github.com/matthijsbrouwer/mtas/archive/${currentDevelopmentRelease}.tar.gz'>Source (tgz)</a></td> | ||
18 | + <td><a href='https://github.com/matthijsbrouwer/mtas/archive/${currentDevelopmentRelease}.zip'>Source (zip)</a></td> | ||
19 | <td>Development version</td> | 19 | <td>Development version</td> |
20 | </tr> | 20 | </tr> |
21 | </tbody> | 21 | </tbody> |
22 | -</table> | 22 | -</table> |
23 | +</table> | ||
23 | \ No newline at end of file | 24 | \ No newline at end of file |
src/site/markdown/installation.md
0 → 100644
src/site/markdown/installation_solr.md