Commit f50d9e14d4e72e2fd857959ce7c6340fade289b2
1 parent
70fe7647
update
Showing
4 changed files
with
18 additions
and
302 deletions
conf/parser/mtas/crm_test.xml
... | ... | @@ -17,20 +17,16 @@ |
17 | 17 | <!-- START CONFIGURATION MTAS FOLIA PARSER --> |
18 | 18 | <parser name="mtas.analysis.parser.MtasCRMParser"> |
19 | 19 | |
20 | -<<<<<<< HEAD | |
21 | 20 | <!-- START GENERAL SETTINGS MTAS PARSER --> |
22 | 21 | <autorepair value="true" /> |
23 | 22 | <makeunique value="true" /> |
24 | 23 | <!-- END GENERAL SETTINGS MTAS PARSER --> |
25 | 24 | |
26 | -======= | |
27 | ->>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
28 | 25 | <mappings> |
29 | 26 | |
30 | 27 | <mapping type="word"> |
31 | 28 | </mapping> |
32 | 29 | |
33 | -<<<<<<< HEAD | |
34 | 30 | <mapping type="wordAnnotation" name="0"> |
35 | 31 | <token type="string" offset="false" parent="false"> |
36 | 32 | <pre> |
... | ... | @@ -75,19 +71,12 @@ |
75 | 71 | <token type="string" offset="false" parent="false"> |
76 | 72 | <pre> |
77 | 73 | <item type="string" value="t2" /> |
78 | -======= | |
79 | - <mapping type="wordAnnotation" name="2"> | |
80 | - <token type="string" offset="false" parent="false"> | |
81 | - <pre> | |
82 | - <item type="string" value="t" /> | |
83 | ->>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
84 | 74 | </pre> |
85 | 75 | <post> |
86 | 76 | <item type="text" /> |
87 | 77 | </post> |
88 | 78 | </token> |
89 | 79 | </mapping> |
90 | -<<<<<<< HEAD | |
91 | 80 | <mapping type="wordAnnotation" name="2"> |
92 | 81 | <token type="string" offset="false" parent="false"> |
93 | 82 | <pre> |
... | ... | @@ -98,8 +87,6 @@ |
98 | 87 | </post> |
99 | 88 | </token> |
100 | 89 | </mapping> |
101 | -======= | |
102 | ->>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
103 | 90 | <mapping type="wordAnnotation" name="3"> |
104 | 91 | <token type="string" offset="false" parent="false"> |
105 | 92 | <pre> |
... | ... | @@ -111,7 +98,6 @@ |
111 | 98 | </token> |
112 | 99 | </mapping> |
113 | 100 | <mapping type="wordAnnotation" name="4"> |
114 | -<<<<<<< HEAD | |
115 | 101 | <token type="string" offset="false" parent="false"> |
116 | 102 | <pre> |
117 | 103 | <item type="string" value="crm" /> |
... | ... | @@ -128,34 +114,12 @@ |
128 | 114 | </condition> |
129 | 115 | </mapping> |
130 | 116 | <mapping type="crmPair" name="part"> |
131 | -======= | |
132 | - </mapping> | |
133 | - <mapping type="wordAnnotation" name="5"> | |
134 | - </mapping> | |
135 | - <mapping type="wordAnnotation" name="6"> | |
136 | - </mapping> | |
137 | - <mapping type="wordAnnotation" name="7"> | |
138 | - <token type="string" offset="false" parent="false"> | |
139 | - <pre> | |
140 | - <item type="string" value="sentence" /> | |
141 | - </pre> | |
142 | - <post> | |
143 | - <item type="text" /> | |
144 | - </post> | |
145 | - </token> | |
146 | - <condition> | |
147 | - <item type="text" not="true" condition="-" /> | |
148 | - </condition> | |
149 | - </mapping> | |
150 | - <mapping type="wordAnnotation" name="pos"> | |
151 | ->>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
152 | 117 | <token type="string" offset="false" parent="false"> |
153 | 118 | <pre> |
154 | 119 | <item type="name" /> |
155 | 120 | </pre> |
156 | 121 | <post> |
157 | 122 | <item type="text" /> |
158 | -<<<<<<< HEAD | |
159 | 123 | </post> |
160 | 124 | </token> |
161 | 125 | </mapping> |
... | ... | @@ -318,111 +282,11 @@ |
318 | 282 | </condition> |
319 | 283 | </function> |
320 | 284 | <function type="wordAnnotation" name="4" split="+"> |
321 | -======= | |
322 | - </post> | |
323 | - </token> | |
324 | - </mapping> | |
325 | - <mapping type="wordAnnotation" name="feat.getal"> | |
326 | - <token type="string" offset="false" parent="false"> | |
327 | - <pre> | |
328 | - <item type="name" /> | |
329 | - </pre> | |
330 | - <post> | |
331 | - <item type="text" /> | |
332 | - </post> | |
333 | - </token> | |
334 | - </mapping> | |
335 | - <mapping type="wordAnnotation" name="feat.persoon"> | |
336 | - <token type="string" offset="false" parent="false"> | |
337 | - <pre> | |
338 | - <item type="name" /> | |
339 | - </pre> | |
340 | - <post> | |
341 | - <item type="text" /> | |
342 | - </post> | |
343 | - </token> | |
344 | - </mapping> | |
345 | - <mapping type="wordAnnotation" name="feat.ntype"> | |
346 | - <token type="string" offset="false" parent="false"> | |
347 | - <pre> | |
348 | - <item type="name" /> | |
349 | - </pre> | |
350 | - <post> | |
351 | - <item type="text" /> | |
352 | - </post> | |
353 | - </token> | |
354 | - </mapping> | |
355 | - <mapping type="wordAnnotation" name="feat.pvtijd"> | |
356 | - <token type="string" offset="false" parent="false"> | |
357 | - <pre> | |
358 | - <item type="name" /> | |
359 | - </pre> | |
360 | - <post> | |
361 | - <item type="text" /> | |
362 | - </post> | |
363 | - </token> | |
364 | - </mapping> | |
365 | - <mapping type="wordAnnotation" name="feat.wvorm"> | |
366 | - <token type="string" offset="false" parent="false"> | |
367 | - <pre> | |
368 | - <item type="name" /> | |
369 | - </pre> | |
370 | - <post> | |
371 | - <item type="text" /> | |
372 | - </post> | |
373 | - </token> | |
374 | - </mapping> | |
375 | - <mapping type="wordAnnotation" name="feat.numtype"> | |
376 | - <token type="string" offset="false" parent="false"> | |
377 | - <pre> | |
378 | - <item type="name" /> | |
379 | - </pre> | |
380 | - <post> | |
381 | - <item type="text" /> | |
382 | - </post> | |
383 | - </token> | |
384 | - </mapping> | |
385 | - <mapping type="wordAnnotation" name="feat.vwtype"> | |
386 | - <token type="string" offset="false" parent="false"> | |
387 | - <pre> | |
388 | - <item type="name" /> | |
389 | - </pre> | |
390 | - <post> | |
391 | - <item type="text" /> | |
392 | - </post> | |
393 | - </token> | |
394 | - </mapping> | |
395 | - <mapping type="wordAnnotation" name="feat.lwtype"> | |
396 | - <token type="string" offset="false" parent="false"> | |
397 | - <pre> | |
398 | - <item type="name" /> | |
399 | - </pre> | |
400 | - <post> | |
401 | - <item type="text" /> | |
402 | - </post> | |
403 | - </token> | |
404 | - </mapping> | |
405 | - <mapping type="wordAnnotation" name="feat.probleemgeval"> | |
406 | - <token type="string" offset="false" parent="false"> | |
407 | - <pre> | |
408 | - <item type="name" /> | |
409 | - </pre> | |
410 | - <post> | |
411 | - <item type="text" /> | |
412 | - </post> | |
413 | - </token> | |
414 | - </mapping> | |
415 | - </mappings> | |
416 | - | |
417 | - <functions> | |
418 | - <function name="4" split="+"> | |
419 | ->>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
420 | 285 | <condition value="000,001,002,003,004,005,006,009"> |
421 | 286 | <output name="pos" value="N" /> |
422 | 287 | <output name="feat.getal" value="ev" /> |
423 | 288 | </condition> |
424 | 289 | <condition value="010,011,012,013,014,015,016,019"> |
425 | -<<<<<<< HEAD | |
426 | 290 | <output name="pos" value="N" /> |
427 | 291 | <output name="feat.getal" value="mv" /> |
428 | 292 | </condition> |
... | ... | @@ -738,167 +602,6 @@ |
738 | 602 | <output name="feat.form" value="unclear" /> |
739 | 603 | </condition> |
740 | 604 | |
741 | -======= | |
742 | - <output name="pos" value="N" /> | |
743 | - <output name="feat.getal" value="mv" /> | |
744 | - </condition> | |
745 | - <condition value="020,021,022,023,024,025,026,029"> | |
746 | - <output name="pos" value="N" /> | |
747 | - <output name="feat.ntype" value="eigen" /> | |
748 | - </condition> | |
749 | - <condition value="090,091,092,093,094,095,096,099"> | |
750 | - <output name="pos" value="N" /> | |
751 | - <output name="feat.probleemgeval" /> | |
752 | - </condition> | |
753 | - <condition value="100,101,102,103,104,105,106,109"> | |
754 | - <output name="pos" value="ADJ" /> | |
755 | - <output name="feat.getal" value="ev" /> | |
756 | - </condition> | |
757 | - <condition value="110,111,112,113,114,115,116,119"> | |
758 | - <output name="pos" value="ADJ" /> | |
759 | - <output name="feat.getal" value="mv" /> | |
760 | - </condition> | |
761 | - <condition value="190,191,192,193,194,195,196,199"> | |
762 | - <output name="pos" value="ADJ" /> | |
763 | - <output name="feat.probleemgeval" /> | |
764 | - </condition> | |
765 | - | |
766 | - | |
767 | - <condition value="200,201,202,203,204,205,206,209"> | |
768 | - <output name="pos" value="WW" /> | |
769 | - <output name="feat.pvtijd" value="tgw" /> | |
770 | - </condition> | |
771 | - <condition value="210,211,212,213,214,215,216,219"> | |
772 | - <output name="pos" value="WW" /> | |
773 | - <output name="feat.pvtijd" value="tgw" /> | |
774 | - </condition> | |
775 | - <condition value="220,221,222,223,224,225,226,229"> | |
776 | - <output name="pos" value="WW" /> | |
777 | - <output name="feat.pvtijd" value="verl" /> | |
778 | - </condition> | |
779 | - <condition value="230,231,232,233,234,235,236,239"> | |
780 | - <output name="pos" value="WW" /> | |
781 | - <output name="feat.pvtijd" value="verl" /> | |
782 | - </condition> | |
783 | - <condition value="240,241,242,243,244,245,246,249"> | |
784 | - <output name="pos" value="WW" /> | |
785 | - </condition> | |
786 | - <condition value="250,251,252,253,254,255,256,259"> | |
787 | - <output name="pos" value="WW" /> | |
788 | - <output name="feat.wvorm" value="inf" /> | |
789 | - </condition> <condition value="260,261,262,263,264,265,266,269"> | |
790 | - <output name="pos" value="WW" /> | |
791 | - <output name="feat.wvorm" value="inf" /> | |
792 | - </condition> <condition value="270,271,272,273,274,275,276,279"> | |
793 | - <output name="pos" value="WW" /> | |
794 | - </condition> <condition value="280,281,282,283,284,285,286,289"> | |
795 | - <output name="pos" value="WW" /> | |
796 | - </condition> | |
797 | - <condition value="290,291,292,293,294,295,296,299"> | |
798 | - <output name="pos" value="WW" /> | |
799 | - <output name="feat.probleemgeval" /> | |
800 | - </condition> | |
801 | - | |
802 | - | |
803 | - <condition value="300,301,302,303,304,305,306,309"> | |
804 | - <output name="pos" value="TW" /> | |
805 | - <output name="feat.numtype" value="hoofd" /> | |
806 | - </condition> | |
807 | - <condition value="310,311,312,313,314,315,316,319"> | |
808 | - <output name="pos" value="TW" /> | |
809 | - <output name="feat.numtype" value="rang" /> | |
810 | - </condition> | |
811 | - <condition value="320,321,322,323,324,325,326,329"> | |
812 | - <output name="pos" value="TW" /> | |
813 | - </condition> | |
814 | - <condition value="390,391,392,393,394,395,396,399"> | |
815 | - <output name="pos" value="TW" /> | |
816 | - <output name="feat.probleemgeval" /> | |
817 | - </condition> | |
818 | - | |
819 | - <condition value="401"> | |
820 | - <output name="pos" value="VNW" /> | |
821 | - <output name="feat.getal" value="ev" /> | |
822 | - <output name="feat.persoon" value="1" /> | |
823 | - </condition> | |
824 | - <condition value="402"> | |
825 | - <output name="pos" value="VNW" /> | |
826 | - <output name="feat.getal" value="ev" /> | |
827 | - <output name="feat.persoon" value="2" /> | |
828 | - </condition> | |
829 | - <condition value="403"> | |
830 | - <output name="pos" value="VNW" /> | |
831 | - <output name="feat.getal" value="ev" /> | |
832 | - <output name="feat.persoon" value="3" /> | |
833 | - </condition> | |
834 | - <condition value="404"> | |
835 | - <output name="pos" value="VNW" /> | |
836 | - <output name="feat.getal" value="mv" /> | |
837 | - <output name="feat.persoon" value="1" /> | |
838 | - </condition> | |
839 | - <condition value="405"> | |
840 | - <output name="pos" value="VNW" /> | |
841 | - <output name="feat.getal" value="mv" /> | |
842 | - <output name="feat.persoon" value="2" /> | |
843 | - </condition> | |
844 | - <condition value="406"> | |
845 | - <output name="pos" value="VNW" /> | |
846 | - <output name="feat.getal" value="mv" /> | |
847 | - <output name="feat.persoon" value="3" /> | |
848 | - </condition> | |
849 | - <condition value="409"> | |
850 | - <output name="pos" value="VNW" /> | |
851 | - <output name="feat.probleemgeval" /> | |
852 | - </condition> | |
853 | - <condition value="410,411,412,413,414,415,416,419"> | |
854 | - <output name="pos" value="VNW" /> | |
855 | - <output name="feat.vwtype" value="aanw" /> | |
856 | - </condition> | |
857 | - <condition value="420,421,422,423,424,425,426,429"> | |
858 | - <output name="pos" value="VNW" /> | |
859 | - <output name="feat.vwtype" value="betr" /> | |
860 | - </condition> | |
861 | - <condition value="430,431,432,433,434,435,436,439"> | |
862 | - <output name="pos" value="VNW" /> | |
863 | - <output name="feat.vwtype" value="vb" /> | |
864 | - </condition> | |
865 | - <condition value="434,441,442,443,444,445,446,449"> | |
866 | - <output name="pos" value="VNW" /> | |
867 | - <output name="feat.vwtype" value="vb" /> | |
868 | - </condition> | |
869 | - <condition value="440,441,442,443,444,445,446,449"> | |
870 | - <output name="pos" value="VNW" /> | |
871 | - <output name="feat.lwtype" value="onbep" /> | |
872 | - </condition> | |
873 | - <condition value="450,451,452,453,454,455,456,459"> | |
874 | - <output name="pos" value="VNW" /> | |
875 | - <output name="feat.vwtype" value="bez" /> | |
876 | - </condition> | |
877 | - | |
878 | - | |
879 | - <condition value="001,011,021,091"> | |
880 | - <output name="feat.form" value="-e" /> | |
881 | - </condition> | |
882 | - <condition value="002,012,022,092"> | |
883 | - <output name="feat.form" value="-s/-th" /> | |
884 | - </condition> | |
885 | - <condition value="003,013,023,092"> | |
886 | - <output name="feat.form" value="-t" /> | |
887 | - </condition> | |
888 | - <condition value="004,014,024,092"> | |
889 | - <output name="feat.form" value="-n" /> | |
890 | - </condition> | |
891 | - <condition value="005,015,025,095"> | |
892 | - <output name="feat.form" value="-r/-re" /> | |
893 | - </condition> | |
894 | - <condition value="006,016,026,096"> | |
895 | - <output name="feat.form" value="-a" /> | |
896 | - </condition> | |
897 | - <condition value="009,019,029,099"> | |
898 | - <output name="feat.form" value="unclear" /> | |
899 | - </condition> | |
900 | - | |
901 | ->>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
902 | 605 | </function> |
903 | 606 | </functions> |
904 | 607 | |
... | ... |
conf/parser/mtas/folia_ddd.xml
... | ... | @@ -19,12 +19,8 @@ |
19 | 19 | |
20 | 20 | <!-- START GENERAL SETTINGS MTAS FOLIA PARSER --> |
21 | 21 | <autorepair value="true" /> |
22 | -<<<<<<< HEAD | |
23 | 22 | <makeunique value="true" /> |
24 | 23 | <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> |
25 | -======= | |
26 | - <!-- END GENERAL SETTINGS MTAS FOLIA PARSER --> | |
27 | ->>>>>>> cac6601c611fd300b075447ee1eb8299e5bffc90 | |
28 | 24 | |
29 | 25 | <!-- START REFERENCES --> |
30 | 26 | <references> |
... | ... |
src/site/markdown/features.md
1 | 1 | #Features |
2 | 2 | |
3 | 3 | |
4 | +### Index | |
5 | +* Supports indexing [FoLiA](indexing_formats_folia.html), [TEI](indexing_formats_tei.html), [CRM](indexing_formats_crm.html) and [Sketch](indexing_formats_sketch.html). | |
6 | +* Custom [mapping](indexing_formats.html) from the original document format to the index structure. | |
7 | +* [Configure](search_configuration.html) multiple document formats and mappings within the same core. | |
8 | + | |
9 | + | |
10 | +### Search | |
11 | +* Supports [CQL](search_cql.html) query language. | |
12 | +* [Statistics](search_statistics.html) on number of [words](search_query_stats_positions.html), [tokens](search_query_stats_tokens.html) and [spans](search_query_stats_spans.html). | |
13 | +* Usage of [functions](search_functions.html) to produce statistics for custom defined relations between multiple spans and/or number of words. | |
14 | +* [Facets](search_facet.html) with [statistics](search_statistics.html) on hits. | |
15 | +* [Kwic and lists](search_kwic_and_list.html), [termvectors](search_termvector.html) and [grouping](search_group.html) for spans. | |
16 | + | |
17 | + | |
18 | +### Solr | |
19 | +* Can be used as plugin for [Apache Solr](http://lucene.apache.org/solr/). | |
20 | +* Supports existing Solr functionality including distributed search with [sharding](search_sharding.html). | |
4 | 21 | |
5 | 22 | |
... | ... |
src/site/markdown/installation.md.vm
... | ... | @@ -20,7 +20,7 @@ After a successful build, the directory `target` will contain the new jar librar |
20 | 20 | |
21 | 21 | ###Installation |
22 | 22 | |
23 | -Mtas can be used as plugin for [Apache Solr](http://lucene.apache.org/solr/) or as library in combination with [Apache Lucene](http://lucene.apache.org/solr/). | |
23 | +Mtas can be used as plugin for [Apache Solr](http://lucene.apache.org/solr/) or as library in combination with [Apache Lucene](http://lucene.apache.org/). | |
24 | 24 | |
25 | 25 | - Getting started with [Mtas and Lucene](installation_lucene.html) |
26 | 26 | - Getting started with [Mtas and Solr](installation_solr.html) |
... | ... |