first commit (0237256f) | Commits | dariah / mtas

Browse Code »

Commit 0237256f1832c4225b3d6bf799beff77ffae092d

Authored by Matthijs Brouwer 9 years ago

0 parents

first commit

Inline Side-by-side

Showing 125 changed files with 35025 additions and 0 deletions

Too many changes to show.
Reload with full diff Plain diff Email patch

To preserve performance only 12 of 125 files are displayed.

README.md 0 → 100644

View file @0237256

	1	+++ a/README.md
	1	+Multi Tier Annotation Search
...	...

conf/parser/folia/edbo.xml 0 → 100644

View file @0237256

	1	+++ a/conf/parser/folia/edbo.xml
	1	+<?xml version="1.0" encoding="UTF-8" ?>
	2	+<mtas>
	3	+
	4	+ <!-- START MTAS INDEX CONFIGURATION -->
	5	+ <index>
	6	+ <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
	7	+ <payload index="false" />
	8	+ <offset index="false" />
	9	+ <realoffset index="false" />
	10	+ <parent index="true" />
	11	+ <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
	12	+ </index>
	13	+ <!-- END MTAS INDEX CONFIGURATION -->
	14	+
	15	+ <!-- START CONFIGURATION MTAS FOLIA PARSER -->
	16	+ <parser name="mtas.analysis.parser.MtasFoliaParser">
	17	+
	18	+ <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
	19	+ <namespaceURI value="http://ilk.uvt.nl/folia" />
	20	+ <autorepair value="true" />
	21	+ <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
	22	+
	23	+ <!-- START REFERENCES -->
	24	+ <references>
	25	+ <reference name="wref" ref="id" />
	26	+ </references>
	27	+ <!-- END REFERENCES -->
	28	+
	29	+ <!-- START MAPPINGS -->
	30	+ <mappings>
	31	+
	32	+ <!-- START WORDS -->
	33	+ <mapping type="word" name="w">
	34	+ </mapping>
	35	+ <mapping type="word" name="w">
	36	+ <token type="string" offset="false" realoffset="false" parent="false">
	37	+ <pre>
	38	+ <item type="name" />
	39	+ </pre>
	40	+ <post>
	41	+ <item type="attribute" name="class" />
	42	+ </post>
	43	+ </token>
	44	+ <condition>
	45	+ <item type="attribute" name="class" />
	46	+ <item type="attribute" name="class" not="true" condition="WORD" />
	47	+ </condition>
	48	+ </mapping>
	49	+ <!-- END WORDS -->
	50	+
	51	+ <!-- START WORD ANNOTATIONS -->
	52	+ <mapping type="wordAnnotation" name="t">
	53	+ <token type="string" offset="false">
	54	+ <pre>
	55	+ <item type="name" />
	56	+ </pre>
	57	+ <post>
	58	+ <item type="text" />
	59	+ </post>
	60	+ </token>
	61	+ <token type="string" offset="false" realoffset="false" parent="false">
	62	+ <pre>
	63	+ <item type="name" />
	64	+ <item type="string" value="_lc" />
	65	+ </pre>
	66	+ <post>
	67	+ <item type="text" filter="ascii,lowercase" />
	68	+ </post>
	69	+ </token>
	70	+ <condition>
	71	+ <item type="ancestor" number="0" />
	72	+ <item type="ancestorWord" number="1" />
	73	+ <item type="unknownAncestor" number="0" />
	74	+ </condition>
	75	+ </mapping>
	76	+ <mapping type="wordAnnotation" name="aref">
	77	+ <token type="string" offset="false">
	78	+ <pre>
	79	+ <item type="string" value="translated.t" />
	80	+ </pre>
	81	+ <post>
	82	+ <item type="attribute" name="t" />
	83	+ </post>
	84	+ </token>
	85	+ <token type="string" offset="false" realoffset="false" parent="false">
	86	+ <pre>
	87	+ <item type="string" value="translated.t" />
	88	+ <item type="string" value="_lc" />
	89	+ </pre>
	90	+ <post>
	91	+ <item type="attribute" name="t" filter="ascii,lowercase" />
	92	+ </post>
	93	+ </token>
	94	+ <condition>
	95	+ <item type="ancestor" number="0" />
	96	+ <item type="ancestorWord" number="1" />
	97	+ <item type="unknownAncestor" number="1" />
	98	+ </condition>
	99	+ </mapping>
	100	+ <mapping type="wordAnnotation" name="lemma">
	101	+ <token type="string" offset="false" realoffset="false" parent="false">
	102	+ <pre>
	103	+ <item type="name" />
	104	+ </pre>
	105	+ <post>
	106	+ <item type="attribute" name="class" />
	107	+ </post>
	108	+ </token>
	109	+ <condition>
	110	+ <item type="attribute" name="class" />
	111	+ <item type="ancestor" number="0" />
	112	+ <item type="unknownAncestor" number="0" />
	113	+ <item type="attribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mblem-nl" />
	114	+ </condition>
	115	+ </mapping>
	116	+ <mapping type="wordAnnotation" name="lemma">
	117	+ <token type="string" offset="false" realoffset="false" parent="false">
	118	+ <pre>
	119	+ <item type="string" value="translated." />
	120	+ <item type="name" />
	121	+ </pre>
	122	+ <post>
	123	+ <item type="attribute" name="class" />
	124	+ </post>
	125	+ </token>
	126	+ <condition>
	127	+ <item type="attribute" name="class" />
	128	+ <item type="ancestor" number="0" />
	129	+ <item type="unknownAncestor" number="1" />
	130	+ <item type="attribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mblem-nl" />
	131	+ </condition>
	132	+ </mapping>
	133	+ <mapping type="wordAnnotation" name="morphology">
	134	+ </mapping>
	135	+ <mapping type="wordAnnotation" name="morpheme">
	136	+ <condition>
	137	+ <item type="ancestor" number="1" />
	138	+ <item type="ancestorName" condition="morphology" />
	139	+ </condition>
	140	+ </mapping>
	141	+ <mapping type="wordAnnotation" name="t">
	142	+ <token type="string" offset="false" realoffset="false" parent="false">
	143	+ <pre>
	144	+ <item type="ancestorName" />
	145	+ </pre>
	146	+ <post>
	147	+ <item type="text" />
	148	+ </post>
	149	+ </token>
	150	+ <condition>
	151	+ <item type="ancestorName" distance="0" condition="morpheme" />
	152	+ </condition>
	153	+ </mapping>
	154	+ <mapping type="wordAnnotation" name="pos">
	155	+ <token type="string" offset="false" realoffset="false" parent="false">
	156	+ <pre>
	157	+ <item type="name" />
	158	+ </pre>
	159	+ <post>
	160	+ <item type="attribute" name="head" />
	161	+ </post>
	162	+ <payload>
	163	+ <item type="attribute" name="confidence" />
	164	+ </payload>
	165	+ </token>
	166	+ <condition>
	167	+ <item type="ancestor" number="0" />
	168	+ <item type="unknownAncestor" number="0" />
	169	+ <item type="attribute" name="class" />
	170	+ <item type="attribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />
	171	+ </condition>
	172	+ </mapping>
	173	+ <mapping type="wordAnnotation" name="pos">
	174	+ <token type="string" offset="false" realoffset="false" parent="false">
	175	+ <pre>
	176	+ <item type="string" value="translated." />
	177	+ <item type="name" />
	178	+ </pre>
	179	+ <post>
	180	+ <item type="attribute" name="head" />
	181	+ </post>
	182	+ <payload>
	183	+ <item type="attribute" name="confidence" />
	184	+ </payload>
	185	+ </token>
	186	+ <condition>
	187	+ <item type="ancestor" number="0" />
	188	+ <item type="unknownAncestor" number="1" />
	189	+ <item type="attribute" name="class" />
	190	+ <item type="attribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />
	191	+ </condition>
	192	+ </mapping>
	193	+ <mapping type="wordAnnotation" name="feat">
	194	+ <token type="string" offset="false" realoffset="false" parent="false">
	195	+ <pre>
	196	+ <item type="name" />
	197	+ <item type="attribute" name="subset" prefix="." />
	198	+ </pre>
	199	+ <post>
	200	+ <item type="attribute" name="class" />
	201	+ </post>
	202	+ <payload>
	203	+ <item type="ancestorAttribute" distance="0" name="confidence" />
	204	+ </payload>
	205	+ </token>
	206	+ <condition>
	207	+ <item type="ancestor" number="1" />
	208	+ <item type="unknownAncestor" number="0" />
	209	+ <item type="attribute" name="class" />
	210	+ <item type="attribute" name="subset" />
	211	+ <item type="ancestorAttribute" name="set" condition="original.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />
	212	+ </condition>
	213	+ </mapping>
	214	+ <mapping type="wordAnnotation" name="feat">
	215	+ <token type="string" offset="false" realoffset="false" parent="false">
	216	+ <pre>
	217	+ <item type="string" value="translated." />
	218	+ <item type="name" />
	219	+ <item type="attribute" name="subset" prefix="." />
	220	+ </pre>
	221	+ <post>
	222	+ <item type="attribute" name="class" />
	223	+ </post>
	224	+ <payload>
	225	+ <item type="ancestorAttribute" distance="0" name="confidence" />
	226	+ </payload>
	227	+ </token>
	228	+ <condition>
	229	+ <item type="ancestor" number="1" />
	230	+ <item type="unknownAncestor" number="0" />
	231	+ <item type="attribute" name="class" />
	232	+ <item type="attribute" name="subset" />
	233	+ <item type="ancestorAttribute" name="set" condition="translated.http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />
	234	+ </condition>
	235	+ </mapping>
	236	+ <!-- END WORD ANNOTATIONS -->
	237	+
	238	+ <!-- START RELATIONS -->
	239	+ <mapping type="relation" name="chunk">
	240	+ <token type="string" offset="false" realoffset="false" parent="false">
	241	+ <pre>
	242	+ <item type="name" />
	243	+ </pre>
	244	+ <post>
	245	+ <item type="attribute" name="class" />
	246	+ </post>
	247	+ <payload>
	248	+ <item type="attribute" name="confidence" />
	249	+ </payload>
	250	+ </token>
	251	+ <condition>
	252	+ <item type="attribute" name="class" />
	253	+ </condition>
	254	+ </mapping>
	255	+ <mapping type="relation" name="dependency">
	256	+ <token type="string" offset="false" realoffset="false" parent="false">
	257	+ <pre>
	258	+ <item type="name" />
	259	+ </pre>
	260	+ <post>
	261	+ <item type="attribute" name="class" />
	262	+ </post>
	263	+ </token>
	264	+ <condition>
	265	+ <item type="attribute" name="class" />
	266	+ </condition>
	267	+ </mapping>
	268	+ <mapping type="relation" name="hd">
	269	+ <token type="string" offset="false" realoffset="false" parent="false">
	270	+ <pre>
	271	+ <item type="ancestorName" distance="0" />
	272	+ <item type="name" prefix="." />
	273	+ </pre>
	274	+ </token>
	275	+ <condition>
	276	+ <item type="ancestorName" condition="dependency" />
	277	+ <item type="ancestor" number="1" />
	278	+ </condition>
	279	+ </mapping>
	280	+ <mapping type="relation" name="dep">
	281	+ <token type="string" offset="false" realoffset="false" parent="false">
	282	+ <pre>
	283	+ <item type="ancestorName" distance="0" />
	284	+ <item type="name" prefix="." />
	285	+ </pre>
	286	+ </token>
	287	+ <condition>
	288	+ <item type="ancestor" number="1" />
	289	+ <item type="ancestorName" condition="dependency" />
	290	+ </condition>
	291	+ </mapping>
	292	+ <mapping type="relation" name="entities">
	293	+ </mapping>
	294	+ <mapping type="relation" name="entity">
	295	+ <token type="string" offset="false" realoffset="false" parent="false">
	296	+ <pre>
	297	+ <item type="name" />
	298	+ </pre>
	299	+ <post>
	300	+ <item type="attribute" name="class" />
	301	+ </post>
	302	+ <payload>
	303	+ <item type="attribute" name="confidence" />
	304	+ </payload>
	305	+ </token>
	306	+ <condition>
	307	+ <item type="ancestor" number="1" />
	308	+ <item type="ancestorName" condition="entities" />
	309	+ </condition>
	310	+ </mapping>
	311	+ <!-- END RELATIONS -->
	312	+
	313	+ <!-- START GROUPS -->
	314	+ <mapping type="group" name="s">
	315	+ <token type="string" offset="false">
	316	+ <pre>
	317	+ <item type="name" />
	318	+ </pre>
	319	+ <post>
	320	+ <item type="attribute" name="class" />
	321	+ </post>
	322	+ </token>
	323	+ </mapping>
	324	+ <mapping type="group" name="p">
	325	+ <token type="string" offset="false">
	326	+ <pre>
	327	+ <item type="name" />
	328	+ </pre>
	329	+ <post>
	330	+ <item type="attribute" name="class" />
	331	+ </post>
	332	+ </token>
	333	+ </mapping>
	334	+ <mapping type="group" name="div">
	335	+ <token type="string" offset="false">
	336	+ <pre>
	337	+ <item type="name" />
	338	+ </pre>
	339	+ <post>
	340	+ <item type="attribute" name="class" />
	341	+ </post>
	342	+ </token>
	343	+ </mapping>
	344	+ <mapping type="group" name="head">
	345	+ <token type="string" offset="false">
	346	+ <pre>
	347	+ <item type="name" />
	348	+ </pre>
	349	+ <post>
	350	+ <item type="attribute" name="class" />
	351	+ </post>
	352	+ </token>
	353	+ </mapping>
	354	+ <!-- END GROUPS -->
	355	+
	356	+ <!-- START GROUP ANNOTATIONS -->
	357	+ <mapping type="groupAnnotation" name="lang">
	358	+ <token type="string" offset="false" realoffset="false" parent="false">
	359	+ <pre>
	360	+ <item type="name" />
	361	+ </pre>
	362	+ <post>
	363	+ <item type="attribute" name="class" />
	364	+ </post>
	365	+ </token>
	366	+ </mapping>
	367	+ <!-- END GROUP ANNOTATIONS -->
	368	+
	369	+ </mappings>
	370	+ <!-- END MAPPINGS -->
	371	+
	372	+ </parser>
	373	+ <!-- END CONFIGURATION MTAS FOLIA PARSER -->
	374	+
	375	+</mtas>
0	376	\ No newline at end of file
...	...

conf/parser/folia/mimore.xml 0 → 100644

View file @0237256

	1	+++ a/conf/parser/folia/mimore.xml
	1	+<?xml version="1.0" encoding="UTF-8" ?>
	2	+<mtas>
	3	+
	4	+ <!-- START MTAS INDEX CONFIGURATION -->
	5	+ <index>
	6	+ <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
	7	+ <payload index="false" />
	8	+ <offset index="false" />
	9	+ <realoffset index="false" />
	10	+ <parent index="true" />
	11	+ <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
	12	+ </index>
	13	+ <!-- END MTAS INDEX CONFIGURATION -->
	14	+
	15	+
	16	+ <!-- START CONFIGURATION MTAS FOLIA PARSER -->
	17	+ <parser name="mtas.analysis.parser.MtasFoliaParser">
	18	+
	19	+ <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
	20	+ <namespaceURI value="http://ilk.uvt.nl/folia" />
	21	+ <autorepair value="false" />
	22	+ <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
	23	+
	24	+ <!-- START REFERENCES -->
	25	+ <references>
	26	+ <reference name="wref" ref="id" />
	27	+ </references>
	28	+ <!-- END REFERENCES -->
	29	+
	30	+ <!-- START MAPPINGS -->
	31	+ <mappings>
	32	+
	33	+ <!-- START WORDS -->
	34	+ <mapping type="word" name="w">
	35	+ </mapping>
	36	+ <!-- END WORDS -->
	37	+
	38	+ <!-- START WORD ANNOTATIONS -->
	39	+ <mapping type="wordAnnotation" name="t">
	40	+ <token type="string" offset="false">
	41	+ <pre>
	42	+ <item type="name" />
	43	+ </pre>
	44	+ <post>
	45	+ <item type="text" />
	46	+ </post>
	47	+ </token>
	48	+ <token type="string" offset="false" realoffset="false" parent="false">
	49	+ <pre>
	50	+ <item type="name" />
	51	+ <item type="string" value="_lc" />
	52	+ </pre>
	53	+ <post>
	54	+ <item type="text" filter="ascii,lowercase" />
	55	+ </post>
	56	+ </token>
	57	+ <condition>
	58	+ <item type="ancestor" number="0" />
	59	+ <item type="ancestorWord" number="1" />
	60	+ <item type="unknownAncestor" number="0" />
	61	+ </condition>
	62	+ </mapping>
	63	+ <mapping type="wordAnnotation" name="lemma">
	64	+ <token type="string" offset="false" realoffset="false" parent="false">
	65	+ <pre>
	66	+ <item type="name" />
	67	+ </pre>
	68	+ <post>
	69	+ <item type="attribute" name="class" />
	70	+ </post>
	71	+ </token>
	72	+ <condition>
	73	+ <item type="ancestor" number="0" />
	74	+ <item type="unknownAncestor" number="0" />
	75	+ </condition>
	76	+ </mapping>
	77	+ <mapping type="wordAnnotation" name="pos">
	78	+ <token type="string" offset="false" realoffset="false" parent="false">
	79	+ <pre>
	80	+ <item type="name" />
	81	+ <item type="attribute" name="set" prefix="." />
	82	+ </pre>
	83	+ <post>
	84	+ <item type="attribute" name="head" />
	85	+ </post>
	86	+ <payload>
	87	+ <item type="attribute" name="confidence" />
	88	+ </payload>
	89	+ </token>
	90	+ <condition>
	91	+ <item type="ancestor" number="0" />
	92	+ <item type="unknownAncestor" number="0" />
	93	+ <item type="attribute" name="class" />
	94	+ </condition>
	95	+ </mapping>
	96	+ <mapping type="wordAnnotation" name="feat">
	97	+ <token type="string" offset="false" realoffset="false" parent="false">
	98	+ <pre>
	99	+ <item type="name" />
	100	+ <item type="ancestorAttribute" name="set" prefix="." />
	101	+ <item type="attribute" name="subset" prefix="." />
	102	+ </pre>
	103	+ <post>
	104	+ <item type="attribute" name="class" />
	105	+ </post>
	106	+ <payload>
	107	+ <item type="ancestorAttribute" distance="0" name="confidence" />
	108	+ </payload>
	109	+ </token>
	110	+ <condition>
	111	+ <item type="ancestor" number="1" />
	112	+ <item type="unknownAncestor" number="0" />
	113	+ <item type="attribute" name="class" />
	114	+ <item type="attribute" name="subset" />
	115	+ </condition>
	116	+ </mapping>
	117	+ <!-- END WORD ANNOTATIONS -->
	118	+
	119	+ <!-- START RELATIONS -->
	120	+ <mapping type="relation" name="entities">
	121	+ </mapping>
	122	+ <mapping type="relation" name="entity">
	123	+ </mapping>
	124	+ <!-- END RELATIONS -->
	125	+
	126	+ <!-- START RELATION ANNOTATIONS -->
	127	+ <mapping type="relationAnnotation" name="t">
	128	+ <token type="string" offset="false" realoffset="false" parent="false">
	129	+ <pre>
	130	+ <item type="name" />
	131	+ </pre>
	132	+ <post>
	133	+ <item type="text" />
	134	+ </post>
	135	+ </token>
	136	+ </mapping>
	137	+ <!-- END RELATION ANNOTATIONS -->
	138	+
	139	+ <!-- START GROUPS -->
	140	+ <mapping type="group" name="s">
	141	+ <token type="string" offset="false">
	142	+ <pre>
	143	+ <item type="name" />
	144	+ </pre>
	145	+ <post>
	146	+ <item type="attribute" name="class" />
	147	+ </post>
	148	+ </token>
	149	+ </mapping>
	150	+ <!-- END GROUPS -->
	151	+
	152	+ <!-- START GROUP ANNOTATIONS -->
	153	+ <!-- END GROUP ANNOTATIONS -->
	154	+
	155	+ </mappings>
	156	+ <!-- END MAPPINGS -->
	157	+
	158	+ </parser>
	159	+ <!-- END CONFIGURATION MTAS FOLIA PARSER -->
	160	+
	161	+
	162	+</mtas>
0	163	\ No newline at end of file
...	...

conf/parser/folia/mtas.xml 0 → 100644

View file @0237256

	1	+++ a/conf/parser/folia/mtas.xml
	1	+<?xml version="1.0" encoding="UTF-8" ?>
	2	+<mtas>
	3	+
	4	+ <!-- START MTAS INDEX CONFIGURATION -->
	5	+ <index>
	6	+ <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
	7	+ <payload index="false" />
	8	+ <offset index="false" />
	9	+ <realoffset index="false" />
	10	+ <parent index="true" />
	11	+ <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
	12	+ </index>
	13	+ <!-- END MTAS INDEX CONFIGURATION -->
	14	+
	15	+
	16	+
	17	+ <!-- START CONFIGURATION MTAS FOLIA PARSER -->
	18	+ <parser name="mtas.analysis.parser.MtasFoliaParser">
	19	+
	20	+ <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
	21	+ <namespaceURI value="http://ilk.uvt.nl/folia" />
	22	+ <autorepair value="true" />
	23	+ <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
	24	+
	25	+ <!-- START REFERENCES -->
	26	+ <references>
	27	+ <reference name="wref" ref="id" />
	28	+ </references>
	29	+ <!-- END REFERENCES -->
	30	+
	31	+ <!-- START MAPPINGS -->
	32	+ <mappings>
	33	+
	34	+ <!-- START WORDS -->
	35	+ <mapping type="word" name="w">
	36	+ </mapping>
	37	+ <mapping type="word" name="w">
	38	+ <token type="string" offset="false" realoffset="false" parent="false">
	39	+ <pre>
	40	+ <item type="name" />
	41	+ </pre>
	42	+ <post>
	43	+ <item type="attribute" name="class" />
	44	+ </post>
	45	+ </token>
	46	+ <condition>
	47	+ <item type="attribute" name="class" />
	48	+ <item type="attribute" name="class" not="true" condition="WORD" />
	49	+ </condition>
	50	+ </mapping>
	51	+ <!-- END WORDS -->
	52	+
	53	+ <!-- START WORD ANNOTATIONS -->
	54	+ <mapping type="wordAnnotation" name="t">
	55	+ <token type="string" offset="false">
	56	+ <pre>
	57	+ <item type="name" />
	58	+ </pre>
	59	+ <post>
	60	+ <item type="text" />
	61	+ </post>
	62	+ </token>
	63	+ <token type="string" offset="false" realoffset="false" parent="false">
	64	+ <pre>
	65	+ <item type="name" />
	66	+ <item type="string" value="_lc" />
	67	+ </pre>
	68	+ <post>
	69	+ <item type="text" filter="ascii,lowercase" />
	70	+ </post>
	71	+ </token>
	72	+ <condition>
	73	+ <item type="ancestor" number="0" />
	74	+ <item type="ancestorWord" number="1" />
	75	+ <item type="unknownAncestor" number="0" />
	76	+ </condition>
	77	+ </mapping>
	78	+ <mapping type="wordAnnotation" name="lemma">
	79	+ <token type="string" offset="false" realoffset="false" parent="false">
	80	+ <pre>
	81	+ <item type="name" />
	82	+ </pre>
	83	+ <post>
	84	+ <item type="attribute" name="class" />
	85	+ </post>
	86	+ </token>
	87	+ <condition>
	88	+ <item type="attribute" name="class" />
	89	+ <item type="ancestor" number="0" />
	90	+ <item type="unknownAncestor" number="0" />
	91	+ </condition>
	92	+ </mapping>
	93	+ <mapping type="wordAnnotation" name="morphology">
	94	+ </mapping>
	95	+ <mapping type="wordAnnotation" name="morpheme">
	96	+ <condition>
	97	+ <item type="ancestor" number="1" />
	98	+ <item type="ancestorName" condition="morphology" />
	99	+ </condition>
	100	+ </mapping>
	101	+ <mapping type="wordAnnotation" name="t">
	102	+ <token type="string" offset="false" realoffset="false" parent="false">
	103	+ <pre>
	104	+ <item type="ancestorName" />
	105	+ </pre>
	106	+ <post>
	107	+ <item type="text" />
	108	+ </post>
	109	+ </token>
	110	+ <condition>
	111	+ <item type="ancestorName" distance="0" condition="morpheme" />
	112	+ </condition>
	113	+ </mapping>
	114	+ <mapping type="wordAnnotation" name="pos">
	115	+ <token type="string" offset="false" realoffset="false" parent="false">
	116	+ <pre>
	117	+ <item type="name" />
	118	+ </pre>
	119	+ <post>
	120	+ <item type="attribute" name="head" />
	121	+ </post>
	122	+ <payload>
	123	+ <item type="attribute" name="confidence" />
	124	+ </payload>
	125	+ </token>
	126	+ <condition>
	127	+ <item type="ancestor" number="0" />
	128	+ <item type="unknownAncestor" number="0" />
	129	+ <item type="attribute" name="class" />
	130	+ </condition>
	131	+ </mapping>
	132	+ <mapping type="wordAnnotation" name="feat">
	133	+ <token type="string" offset="false" realoffset="false" parent="false">
	134	+ <pre>
	135	+ <item type="name" />
	136	+ <item type="attribute" name="subset" prefix="." />
	137	+ </pre>
	138	+ <post>
	139	+ <item type="attribute" name="class" />
	140	+ </post>
	141	+ <payload>
	142	+ <item type="ancestorAttribute" distance="0" name="confidence" />
	143	+ </payload>
	144	+ </token>
	145	+ <condition>
	146	+ <item type="ancestor" number="1" />
	147	+ <item type="unknownAncestor" number="0" />
	148	+ <item type="attribute" name="class" />
	149	+ <item type="attribute" name="subset" />
	150	+ </condition>
	151	+ </mapping>
	152	+ <!-- END WORD ANNOTATIONS -->
	153	+
	154	+ <!-- START RELATIONS -->
	155	+ <mapping type="relation" name="chunk">
	156	+ <token type="string" offset="false" realoffset="false" parent="false">
	157	+ <pre>
	158	+ <item type="name" />
	159	+ </pre>
	160	+ <post>
	161	+ <item type="attribute" name="class" />
	162	+ </post>
	163	+ <payload>
	164	+ <item type="attribute" name="confidence" />
	165	+ </payload>
	166	+ </token>
	167	+ <condition>
	168	+ <item type="attribute" name="class" />
	169	+ </condition>
	170	+ </mapping>
	171	+ <mapping type="relation" name="dependency">
	172	+ <token type="string" offset="false" realoffset="false" parent="false">
	173	+ <pre>
	174	+ <item type="name" />
	175	+ </pre>
	176	+ <post>
	177	+ <item type="attribute" name="class" />
	178	+ </post>
	179	+ </token>
	180	+ <condition>
	181	+ <item type="attribute" name="class" />
	182	+ </condition>
	183	+ </mapping>
	184	+ <mapping type="relation" name="hd">
	185	+ <token type="string" offset="false" realoffset="false" parent="false">
	186	+ <pre>
	187	+ <item type="ancestorName" distance="0" />
	188	+ <item type="name" prefix="." />
	189	+ </pre>
	190	+ </token>
	191	+ <condition>
	192	+ <item type="ancestorName" condition="dependency" />
	193	+ <item type="ancestor" number="1" />
	194	+ </condition>
	195	+ </mapping>
	196	+ <mapping type="relation" name="dep">
	197	+ <token type="string" offset="false" realoffset="false" parent="false">
	198	+ <pre>
	199	+ <item type="ancestorName" distance="0" />
	200	+ <item type="name" prefix="." />
	201	+ </pre>
	202	+ </token>
	203	+ <condition>
	204	+ <item type="ancestor" number="1" />
	205	+ <item type="ancestorName" condition="dependency" />
	206	+ </condition>
	207	+ </mapping>
	208	+ <mapping type="relation" name="entities">
	209	+ </mapping>
	210	+ <mapping type="relation" name="entity">
	211	+ <token type="string" offset="false" realoffset="false" parent="false">
	212	+ <pre>
	213	+ <item type="name" />
	214	+ </pre>
	215	+ <post>
	216	+ <item type="attribute" name="class" />
	217	+ </post>
	218	+ <payload>
	219	+ <item type="attribute" name="confidence" />
	220	+ </payload>
	221	+ </token>
	222	+ <condition>
	223	+ <item type="ancestor" number="1" />
	224	+ <item type="ancestorName" condition="entities" />
	225	+ </condition>
	226	+ </mapping>
	227	+ <!-- END RELATIONS -->
	228	+
	229	+ <!-- START GROUPS -->
	230	+ <mapping type="group" name="s">
	231	+ <token type="string" offset="false">
	232	+ <pre>
	233	+ <item type="name" />
	234	+ </pre>
	235	+ <post>
	236	+ <item type="attribute" name="class" />
	237	+ </post>
	238	+ </token>
	239	+ </mapping>
	240	+ <mapping type="group" name="p">
	241	+ <token type="string" offset="false">
	242	+ <pre>
	243	+ <item type="name" />
	244	+ </pre>
	245	+ <post>
	246	+ <item type="attribute" name="class" />
	247	+ </post>
	248	+ </token>
	249	+ </mapping>
	250	+ <mapping type="group" name="div">
	251	+ <token type="string" offset="false">
	252	+ <pre>
	253	+ <item type="name" />
	254	+ </pre>
	255	+ <post>
	256	+ <item type="attribute" name="class" />
	257	+ </post>
	258	+ </token>
	259	+ </mapping>
	260	+ <mapping type="group" name="head">
	261	+ <token type="string" offset="false">
	262	+ <pre>
	263	+ <item type="name" />
	264	+ </pre>
	265	+ <post>
	266	+ <item type="attribute" name="class" />
	267	+ </post>
	268	+ </token>
	269	+ </mapping>
	270	+ <!-- END GROUPS -->
	271	+
	272	+ <!-- START GROUP ANNOTATIONS -->
	273	+ <mapping type="groupAnnotation" name="lang">
	274	+ <token type="string" offset="false" realoffset="false" parent="false">
	275	+ <pre>
	276	+ <item type="name" />
	277	+ </pre>
	278	+ <post>
	279	+ <item type="attribute" name="class" />
	280	+ </post>
	281	+ </token>
	282	+ </mapping>
	283	+ <!-- END GROUP ANNOTATIONS -->
	284	+
	285	+ </mappings>
	286	+ <!-- END MAPPINGS -->
	287	+
	288	+ </parser>
	289	+ <!-- END CONFIGURATION MTAS FOLIA PARSER -->
	290	+
	291	+
	292	+</mtas>
0	293	\ No newline at end of file
...	...

conf/parser/folia/sonar.xml 0 → 100644

View file @0237256

	1	+++ a/conf/parser/folia/sonar.xml
	1	+<?xml version="1.0" encoding="UTF-8" ?>
	2	+<mtas>
	3	+
	4	+ <!-- START MTAS INDEX CONFIGURATION -->
	5	+ <index>
	6	+ <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
	7	+ <payload index="false" />
	8	+ <offset index="false" />
	9	+ <realoffset index="false" />
	10	+ <parent index="true" />
	11	+ <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
	12	+ </index>
	13	+ <!-- END MTAS INDEX CONFIGURATION -->
	14	+
	15	+
	16	+ <!-- START CONFIGURATION MTAS FOLIA PARSER -->
	17	+ <parser name="mtas.analysis.parser.MtasFoliaParser">
	18	+
	19	+ <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
	20	+ <namespaceURI value="http://ilk.uvt.nl/folia" />
	21	+ <autorepair value="true" />
	22	+ <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
	23	+
	24	+ <!-- START REFERENCES -->
	25	+ <references>
	26	+ <reference name="wref" ref="id" />
	27	+ </references>
	28	+ <!-- END REFERENCES -->
	29	+
	30	+ <!-- START MAPPINGS -->
	31	+ <mappings>
	32	+
	33	+ <!-- START WORDS -->
	34	+ <mapping type="word" name="w">
	35	+ </mapping>
	36	+ <mapping type="word" name="w">
	37	+ <token type="string" offset="false" realoffset="false" parent="false">
	38	+ <pre>
	39	+ <item type="name" />
	40	+ </pre>
	41	+ <post>
	42	+ <item type="attribute" name="class" />
	43	+ </post>
	44	+ </token>
	45	+ <condition>
	46	+ <item type="attribute" name="class" />
	47	+ <item type="attribute" name="class" not="true" condition="WORD" />
	48	+ </condition>
	49	+ </mapping>
	50	+ <!-- END WORDS -->
	51	+
	52	+ <!-- START WORD ANNOTATIONS -->
	53	+ <mapping type="wordAnnotation" name="t">
	54	+ <token type="string" offset="false">
	55	+ <pre>
	56	+ <item type="name" />
	57	+ </pre>
	58	+ <post>
	59	+ <item type="text" />
	60	+ </post>
	61	+ </token>
	62	+ <token type="string" offset="false" realoffset="false" parent="false">
	63	+ <pre>
	64	+ <item type="name" />
	65	+ <item type="string" value="_lc" />
	66	+ </pre>
	67	+ <post>
	68	+ <item type="text" filter="ascii,lowercase" />
	69	+ </post>
	70	+ </token>
	71	+ <condition>
	72	+ <item type="ancestor" number="0" />
	73	+ <item type="ancestorWord" number="1" />
	74	+ <item type="unknownAncestor" number="0" />
	75	+ </condition>
	76	+ </mapping>
	77	+ <mapping type="wordAnnotation" name="lemma">
	78	+ <token type="string" offset="false" realoffset="false" parent="false">
	79	+ <pre>
	80	+ <item type="name" />
	81	+ </pre>
	82	+ <post>
	83	+ <item type="attribute" name="class" />
	84	+ </post>
	85	+ </token>
	86	+ <condition>
	87	+ <item type="attribute" name="class" />
	88	+ <item type="ancestor" number="0" />
	89	+ <item type="unknownAncestor" number="0" />
	90	+ <item type="attribute" name="set" condition="http://ilk.uvt.nl/folia/sets/frog-mblem-nl" />
	91	+ </condition>
	92	+ </mapping>
	93	+ <mapping type="wordAnnotation" name="morphology">
	94	+ </mapping>
	95	+ <mapping type="wordAnnotation" name="morpheme">
	96	+ <condition>
	97	+ <item type="ancestor" number="1" />
	98	+ <item type="ancestorName" condition="morphology" />
	99	+ </condition>
	100	+ </mapping>
	101	+ <mapping type="wordAnnotation" name="t">
	102	+ <token type="string" offset="false" realoffset="false" parent="false">
	103	+ <pre>
	104	+ <item type="ancestorName" />
	105	+ </pre>
	106	+ <post>
	107	+ <item type="text" />
	108	+ </post>
	109	+ </token>
	110	+ <condition>
	111	+ <item type="ancestorName" distance="0" condition="morpheme" />
	112	+ </condition>
	113	+ </mapping>
	114	+ <mapping type="wordAnnotation" name="pos">
	115	+ <token type="string" offset="false" realoffset="false" parent="false">
	116	+ <pre>
	117	+ <item type="name" />
	118	+ </pre>
	119	+ <post>
	120	+ <item type="attribute" name="head" />
	121	+ </post>
	122	+ <payload>
	123	+ <item type="attribute" name="confidence" />
	124	+ </payload>
	125	+ </token>
	126	+ <condition>
	127	+ <item type="ancestor" number="0" />
	128	+ <item type="unknownAncestor" number="0" />
	129	+ <item type="attribute" name="class" />
	130	+ <item type="attribute" name="set" condition="http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />
	131	+ </condition>
	132	+ </mapping>
	133	+ <mapping type="wordAnnotation" name="feat">
	134	+ <token type="string" offset="false" realoffset="false" parent="false">
	135	+ <pre>
	136	+ <item type="name" />
	137	+ <item type="attribute" name="subset" prefix="." />
	138	+ </pre>
	139	+ <post>
	140	+ <item type="attribute" name="class" />
	141	+ </post>
	142	+ <payload>
	143	+ <item type="ancestorAttribute" distance="0" name="confidence" />
	144	+ </payload>
	145	+ </token>
	146	+ <condition>
	147	+ <item type="ancestor" number="1" />
	148	+ <item type="unknownAncestor" number="0" />
	149	+ <item type="attribute" name="class" />
	150	+ <item type="attribute" name="subset" />
	151	+ <item type="ancestorAttribute" name="set" condition="http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />
	152	+ </condition>
	153	+ </mapping>
	154	+ <!-- END WORD ANNOTATIONS -->
	155	+
	156	+ <!-- START RELATIONS -->
	157	+ <mapping type="relation" name="chunk">
	158	+ <token type="string" offset="false" realoffset="false" parent="false">
	159	+ <pre>
	160	+ <item type="name" />
	161	+ </pre>
	162	+ <post>
	163	+ <item type="attribute" name="class" />
	164	+ </post>
	165	+ <payload>
	166	+ <item type="attribute" name="confidence" />
	167	+ </payload>
	168	+ </token>
	169	+ <condition>
	170	+ <item type="attribute" name="class" />
	171	+ </condition>
	172	+ </mapping>
	173	+ <mapping type="relation" name="dependency">
	174	+ <token type="string" offset="false" realoffset="false" parent="false">
	175	+ <pre>
	176	+ <item type="name" />
	177	+ </pre>
	178	+ <post>
	179	+ <item type="attribute" name="class" />
	180	+ </post>
	181	+ </token>
	182	+ <condition>
	183	+ <item type="attribute" name="class" />
	184	+ </condition>
	185	+ </mapping>
	186	+ <mapping type="relation" name="hd">
	187	+ <token type="string" offset="false" realoffset="false" parent="false">
	188	+ <pre>
	189	+ <item type="ancestorName" distance="0" />
	190	+ <item type="name" prefix="." />
	191	+ </pre>
	192	+ </token>
	193	+ <condition>
	194	+ <item type="ancestorName" condition="dependency" />
	195	+ <item type="ancestor" number="1" />
	196	+ </condition>
	197	+ </mapping>
	198	+ <mapping type="relation" name="dep">
	199	+ <token type="string" offset="false" realoffset="false" parent="false">
	200	+ <pre>
	201	+ <item type="ancestorName" distance="0" />
	202	+ <item type="name" prefix="." />
	203	+ </pre>
	204	+ </token>
	205	+ <condition>
	206	+ <item type="ancestor" number="1" />
	207	+ <item type="ancestorName" condition="dependency" />
	208	+ </condition>
	209	+ </mapping>
	210	+ <mapping type="relation" name="entities">
	211	+ </mapping>
	212	+ <mapping type="relation" name="entity">
	213	+ <token type="string" offset="false" realoffset="false" parent="false">
	214	+ <pre>
	215	+ <item type="name" />
	216	+ </pre>
	217	+ <post>
	218	+ <item type="attribute" name="class" />
	219	+ </post>
	220	+ <payload>
	221	+ <item type="attribute" name="confidence" />
	222	+ </payload>
	223	+ </token>
	224	+ <condition>
	225	+ <item type="ancestor" number="1" />
	226	+ <item type="ancestorName" condition="entities" />
	227	+ </condition>
	228	+ </mapping>
	229	+ <!-- END RELATIONS -->
	230	+
	231	+ <!-- START GROUPS -->
	232	+ <mapping type="group" name="s">
	233	+ <token type="string" offset="false">
	234	+ <pre>
	235	+ <item type="name" />
	236	+ </pre>
	237	+ <post>
	238	+ <item type="attribute" name="class" />
	239	+ </post>
	240	+ </token>
	241	+ </mapping>
	242	+ <mapping type="group" name="p">
	243	+ <token type="string" offset="false">
	244	+ <pre>
	245	+ <item type="name" />
	246	+ </pre>
	247	+ <post>
	248	+ <item type="attribute" name="class" />
	249	+ </post>
	250	+ </token>
	251	+ </mapping>
	252	+ <mapping type="group" name="div">
	253	+ <token type="string" offset="false">
	254	+ <pre>
	255	+ <item type="name" />
	256	+ </pre>
	257	+ <post>
	258	+ <item type="attribute" name="class" />
	259	+ </post>
	260	+ </token>
	261	+ </mapping>
	262	+ <mapping type="group" name="head">
	263	+ <token type="string" offset="false">
	264	+ <pre>
	265	+ <item type="name" />
	266	+ </pre>
	267	+ <post>
	268	+ <item type="attribute" name="class" />
	269	+ </post>
	270	+ </token>
	271	+ </mapping>
	272	+ <!-- END GROUPS -->
	273	+
	274	+ <!-- START GROUP ANNOTATIONS -->
	275	+ <mapping type="groupAnnotation" name="lang">
	276	+ <token type="string" offset="false" realoffset="false" parent="false">
	277	+ <pre>
	278	+ <item type="name" />
	279	+ </pre>
	280	+ <post>
	281	+ <item type="attribute" name="class" />
	282	+ </post>
	283	+ </token>
	284	+ </mapping>
	285	+ <!-- END GROUP ANNOTATIONS -->
	286	+
	287	+ </mappings>
	288	+ <!-- END MAPPINGS -->
	289	+
	290	+ </parser>
	291	+ <!-- END CONFIGURATION MTAS FOLIA PARSER -->
	292	+
	293	+
	294	+</mtas>
...	...

conf/parser/folia/test.xml 0 → 100644

View file @0237256

	1	+++ a/conf/parser/folia/test.xml
	1	+<?xml version="1.0" encoding="UTF-8" ?>
	2	+<mtas>
	3	+
	4	+ <!-- START MTAS INDEX CONFIGURATION -->
	5	+ <index>
	6	+ <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
	7	+ <payload index="true" />
	8	+ <offset index="true" />
	9	+ <realoffset index="true" />
	10	+ <parent index="true" />
	11	+ <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
	12	+ </index>
	13	+ <!-- END MTAS INDEX CONFIGURATION -->
	14	+
	15	+
	16	+
	17	+ <!-- START CONFIGURATION MTAS FOLIA PARSER -->
	18	+ <parser name="mtas.analysis.parser.MtasFoliaParser">
	19	+
	20	+ <!-- START GENERAL SETTINGS MTAS FOLIA PARSER -->
	21	+ <namespaceURI value="http://ilk.uvt.nl/folia" />
	22	+ <autorepair value="true" />
	23	+ <!-- END GENERAL SETTINGS MTAS FOLIA PARSER -->
	24	+
	25	+ <!-- START REFERENCES -->
	26	+ <references>
	27	+ <reference name="wref" ref="id" />
	28	+ </references>
	29	+ <!-- END REFERENCES -->
	30	+
	31	+ <!-- START MAPPINGS -->
	32	+ <mappings>
	33	+
	34	+ <!-- START WORDS -->
	35	+ <mapping type="word" name="w">
	36	+ </mapping>
	37	+ <mapping type="word" name="w">
	38	+ <token type="string" offset="false" realoffset="false" parent="false">
	39	+ <pre>
	40	+ <item type="name" />
	41	+ </pre>
	42	+ <post>
	43	+ <item type="attribute" name="class" />
	44	+ </post>
	45	+ </token>
	46	+ <condition>
	47	+ <item type="attribute" name="class" />
	48	+ <item type="attribute" name="class" not="true" condition="WORD" />
	49	+ </condition>
	50	+ </mapping>
	51	+ <!-- END WORDS -->
	52	+
	53	+ <!-- START WORD ANNOTATIONS -->
	54	+ <mapping type="wordAnnotation" name="t">
	55	+ <token type="string" offset="false">
	56	+ <pre>
	57	+ <item type="name" />
	58	+ </pre>
	59	+ <post>
	60	+ <item type="text" />
	61	+ </post>
	62	+ </token>
	63	+ <token type="string" offset="false" realoffset="false" parent="false">
	64	+ <pre>
	65	+ <item type="name" />
	66	+ <item type="string" value="_lc" />
	67	+ </pre>
	68	+ <post>
	69	+ <item type="text" filter="ascii,lowercase" />
	70	+ </post>
	71	+ </token>
	72	+ <condition>
	73	+ <item type="ancestor" number="0" />
	74	+ <item type="ancestorWord" number="1" />
	75	+ <item type="unknownAncestor" number="0" />
	76	+ </condition>
	77	+ </mapping>
	78	+ <mapping type="wordAnnotation" name="lemma">
	79	+ <token type="string" offset="false" realoffset="false" parent="false">
	80	+ <pre>
	81	+ <item type="name" />
	82	+ </pre>
	83	+ <post>
	84	+ <item type="attribute" name="class" />
	85	+ </post>
	86	+ </token>
	87	+ <condition>
	88	+ <item type="attribute" name="class" />
	89	+ <item type="ancestor" number="0" />
	90	+ <item type="unknownAncestor" number="0" />
	91	+ <item type="attribute" name="set"
	92	+ condition="http://ilk.uvt.nl/folia/sets/frog-mblem-nl" />
	93	+ </condition>
	94	+ </mapping>
	95	+ <mapping type="wordAnnotation" name="morphology">
	96	+ </mapping>
	97	+ <mapping type="wordAnnotation" name="morpheme">
	98	+ <condition>
	99	+ <item type="ancestor" number="1" />
	100	+ <item type="ancestorName" condition="morphology" />
	101	+ </condition>
	102	+ </mapping>
	103	+ <mapping type="wordAnnotation" name="t">
	104	+ <token type="string" offset="false" realoffset="false" parent="false">
	105	+ <pre>
	106	+ <item type="ancestorName" />
	107	+ </pre>
	108	+ <post>
	109	+ <item type="text" />
	110	+ </post>
	111	+ </token>
	112	+ <condition>
	113	+ <item type="ancestorName" distance="0" condition="morpheme" />
	114	+ </condition>
	115	+ </mapping>
	116	+ <mapping type="wordAnnotation" name="pos">
	117	+ <token type="string" offset="false" realoffset="false" parent="false">
	118	+ <pre>
	119	+ <item type="name" />
	120	+ </pre>
	121	+ <post>
	122	+ <item type="attribute" name="head" />
	123	+ </post>
	124	+ <payload>
	125	+ <item type="attribute" name="confidence" />
	126	+ </payload>
	127	+ </token>
	128	+ <condition>
	129	+ <item type="ancestor" number="0" />
	130	+ <item type="unknownAncestor" number="0" />
	131	+ <item type="attribute" name="class" />
	132	+ <item type="attribute" name="set"
	133	+ condition="http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />
	134	+ </condition>
	135	+ </mapping>
	136	+ <mapping type="wordAnnotation" name="feat">
	137	+ <token type="string" offset="false" realoffset="false" parent="false">
	138	+ <pre>
	139	+ <item type="name" />
	140	+ <item type="attribute" name="subset" prefix="." />
	141	+ </pre>
	142	+ <post>
	143	+ <item type="attribute" name="class" />
	144	+ </post>
	145	+ <payload>
	146	+ <item type="ancestorAttribute" distance="0" name="confidence" />
	147	+ </payload>
	148	+ </token>
	149	+ <condition>
	150	+ <item type="ancestor" number="1" />
	151	+ <item type="unknownAncestor" number="0" />
	152	+ <item type="attribute" name="class" />
	153	+ <item type="attribute" name="subset" />
	154	+ <item type="ancestorAttribute" name="set"
	155	+ condition="http://ilk.uvt.nl/folia/sets/frog-mbpos-cgn" />
	156	+ </condition>
	157	+ </mapping>
	158	+ <!-- END WORD ANNOTATIONS -->
	159	+
	160	+ <!-- START RELATIONS -->
	161	+ <mapping type="relation" name="chunk">
	162	+ <token type="string" offset="false" realoffset="false" parent="false">
	163	+ <pre>
	164	+ <item type="name" />
	165	+ </pre>
	166	+ <post>
	167	+ <item type="attribute" name="class" />
	168	+ </post>
	169	+ <payload>
	170	+ <item type="attribute" name="confidence" />
	171	+ </payload>
	172	+ </token>
	173	+ <condition>
	174	+ <item type="attribute" name="class" />
	175	+ </condition>
	176	+ </mapping>
	177	+ <mapping type="relation" name="dependency">
	178	+ <token type="string" offset="false" realoffset="false" parent="false">
	179	+ <pre>
	180	+ <item type="name" />
	181	+ </pre>
	182	+ <post>
	183	+ <item type="attribute" name="class" />
	184	+ </post>
	185	+ </token>
	186	+ <condition>
	187	+ <item type="attribute" name="class" />
	188	+ </condition>
	189	+ </mapping>
	190	+ <mapping type="relation" name="hd">
	191	+ <token type="string" offset="false" realoffset="false" parent="false">
	192	+ <pre>
	193	+ <item type="ancestorName" distance="0" />
	194	+ <item type="name" prefix="." />
	195	+ </pre>
	196	+ </token>
	197	+ <condition>
	198	+ <item type="ancestorName" condition="dependency" />
	199	+ <item type="ancestor" number="1" />
	200	+ </condition>
	201	+ </mapping>
	202	+ <mapping type="relation" name="dep">
	203	+ <token type="string" offset="false" realoffset="false" parent="false">
	204	+ <pre>
	205	+ <item type="ancestorName" distance="0" />
	206	+ <item type="name" prefix="." />
	207	+ </pre>
	208	+ </token>
	209	+ <condition>
	210	+ <item type="ancestor" number="1" />
	211	+ <item type="ancestorName" condition="dependency" />
	212	+ </condition>
	213	+ </mapping>
	214	+ <mapping type="relation" name="entities">
	215	+ </mapping>
	216	+ <mapping type="relation" name="entity">
	217	+ <token type="string" offset="false" realoffset="false" parent="false">
	218	+ <pre>
	219	+ <item type="name" />
	220	+ </pre>
	221	+ <post>
	222	+ <item type="attribute" name="class" />
	223	+ </post>
	224	+ <payload>
	225	+ <item type="attribute" name="confidence" />
	226	+ </payload>
	227	+ </token>
	228	+ <condition>
	229	+ <item type="ancestor" number="1" />
	230	+ <item type="ancestorName" condition="entities" />
	231	+ </condition>
	232	+ </mapping>
	233	+ <!-- END RELATIONS -->
	234	+
	235	+ <!-- START GROUPS -->
	236	+ <mapping type="group" name="s">
	237	+ <token type="string" offset="false">
	238	+ <pre>
	239	+ <item type="name" />
	240	+ </pre>
	241	+ <post>
	242	+ <item type="attribute" name="class" />
	243	+ </post>
	244	+ </token>
	245	+ </mapping>
	246	+ <mapping type="group" name="p">
	247	+ <token type="string" offset="false">
	248	+ <pre>
	249	+ <item type="name" />
	250	+ </pre>
	251	+ <post>
	252	+ <item type="attribute" name="class" />
	253	+ </post>
	254	+ </token>
	255	+ </mapping>
	256	+ <mapping type="group" name="div">
	257	+ <token type="string" offset="false">
	258	+ <pre>
	259	+ <item type="name" />
	260	+ </pre>
	261	+ <post>
	262	+ <item type="attribute" name="class" />
	263	+ </post>
	264	+ </token>
	265	+ </mapping>
	266	+ <mapping type="group" name="head">
	267	+ <token type="string" offset="false">
	268	+ <pre>
	269	+ <item type="name" />
	270	+ </pre>
	271	+ <post>
	272	+ <item type="attribute" name="class" />
	273	+ </post>
	274	+ </token>
	275	+ </mapping>
	276	+ <!-- END GROUPS -->
	277	+
	278	+ <!-- START GROUP ANNOTATIONS -->
	279	+ <mapping type="groupAnnotation" name="lang">
	280	+ <token type="string" offset="false" realoffset="false" parent="false">
	281	+ <pre>
	282	+ <item type="name" />
	283	+ </pre>
	284	+ <post>
	285	+ <item type="attribute" name="class" />
	286	+ </post>
	287	+ </token>
	288	+ </mapping>
	289	+ <!-- END GROUP ANNOTATIONS -->
	290	+
	291	+ </mappings>
	292	+ <!-- END MAPPINGS -->
	293	+
	294	+ </parser>
	295	+ <!-- END CONFIGURATION MTAS FOLIA PARSER -->
	296	+
	297	+</mtas>
0	298	\ No newline at end of file
...	...

conf/parser/mtas.xml 0 → 100644

View file @0237256

	1	+++ a/conf/parser/mtas.xml
	1	+<?xml version="1.0" encoding="UTF-8" ?>
	2	+<mtas>
	3	+ <configurations type="mtas.analysis.util.MtasTokenizerFactory">
	4	+ <configuration name="DBNL" file="folia/mtas.xml" />
	5	+ </configurations>
	6	+ <configurations type="mtas.analysis.util.MtasCharFilterFactory">
	7	+ <configuration name="DBNL" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
	8	+ </configurations>
	9	+</mtas>
...	...

conf/parser/sketch/acdh.xml 0 → 100644

View file @0237256

	1	+++ a/conf/parser/sketch/acdh.xml
	1	+<?xml version="1.0" encoding="UTF-8" ?>
	2	+<mtas>
	3	+
	4	+ <!-- START MTAS INDEX CONFIGURATION -->
	5	+ <index>
	6	+ <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
	7	+ <payload index="true" />
	8	+ <offset index="true" />
	9	+ <realoffset index="true" />
	10	+ <parent index="true" />
	11	+ <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
	12	+ </index>
	13	+ <!-- END MTAS INDEX CONFIGURATION -->
	14	+
	15	+
	16	+
	17	+ <!-- START CONFIGURATION MTAS SKETCH PARSER -->
	18	+ <parser name="mtas.analysis.parser.MtasSketchParser">
	19	+ <!-- START GENERAL SETTINGS MTAS SKETCH PARSER -->
	20	+ <autorepair value="true" />
	21	+ <!-- END GENERAL SETTINGS MTAS SKETCH PARSER -->
	22	+
	23	+ <mappings>
	24	+
	25	+ <mapping type="word">
	26	+ <condition>
	27	+ <item type="ancestorGroupName" not="true" condition="field" />
	28	+ </condition>
	29	+ </mapping>
	30	+
	31	+ <mapping type="group" name="field">
	32	+ </mapping>
	33	+ <mapping type="group" name="s">
	34	+ <token type="string" offset="false">
	35	+ <pre>
	36	+ <item type="name" />
	37	+ </pre>
	38	+ <post>
	39	+ <item type="attribute" name="class" />
	40	+ </post>
	41	+ </token>
	42	+ </mapping>
	43	+ <mapping type="group" name="p">
	44	+ <token type="string" offset="false">
	45	+ <pre>
	46	+ <item type="name" />
	47	+ </pre>
	48	+ <post>
	49	+ <item type="attribute" name="class" />
	50	+ </post>
	51	+ </token>
	52	+ </mapping>
	53	+
	54	+ <mapping type="wordAnnotation" name="0">
	55	+ <token type="string" offset="false" parent="false">
	56	+ <pre>
	57	+ <item type="string" value="t" />
	58	+ </pre>
	59	+ <post>
	60	+ <item type="text" />
	61	+ </post>
	62	+ </token>
	63	+ <token type="string" offset="false" realoffset="false" parent="false">
	64	+ <pre>
	65	+ <item type="string" value="t_lc" />
	66	+ </pre>
	67	+ <post>
	68	+ <item type="text" filter="ascii,lowercase" />
	69	+ </post>
	70	+ </token>
	71	+ </mapping>
	72	+ <mapping type="wordAnnotation" name="1">
	73	+ <token type="string" offset="false" realoffset="false" parent="false">
	74	+ <pre>
	75	+ <item type="string" value="pos1" />
	76	+ </pre>
	77	+ <post>
	78	+ <item type="textSplit" value="." filter="split(0)" />
	79	+ </post>
	80	+ </token>
	81	+ <token type="string" offset="false" realoffset="false" parent="false">
	82	+ <pre>
	83	+ <item type="string" value="feat" />
	84	+ </pre>
	85	+ <post>
	86	+ <item type="textSplit" value="." filter="split(1-10)" />
	87	+ </post>
	88	+ </token>
	89	+ </mapping>
	90	+ <mapping type="wordAnnotation" name="2">
	91	+ <token type="string" offset="false" realoffset="false" parent="false">
	92	+ <pre>
	93	+ <item type="string" value="pos2" />
	94	+ </pre>
	95	+ <post>
	96	+ <item type="text" />
	97	+ </post>
	98	+ </token>
	99	+ </mapping>
	100	+ <mapping type="wordAnnotation" name="3">
	101	+ <token type="string" offset="false" realoffset="false" parent="false">
	102	+ <pre>
	103	+ <item type="string" value="lemma" />
	104	+ </pre>
	105	+ <post>
	106	+ <item type="text" />
	107	+ </post>
	108	+ </token>
	109	+ </mapping>
	110	+ </mappings>
	111	+
	112	+ </parser>
	113	+ <!-- END CONFIGURATION MTAS SKETCH PARSER -->
	114	+
	115	+</mtas>
0	116	\ No newline at end of file
...	...

conf/solr/schemaNederlab.xml 0 → 100644

View file @0237256

	1	+++ a/conf/solr/schemaNederlab.xml
	1	+<?xml version="1.0" encoding="UTF-8" ?>
	2	+
	3	+<schema name="nederlab" version="1.5">
	4	+
	5	+ <field name="_version_" type="nederlab_long" indexed="true"
	6	+ stored="true" />
	7	+
	8	+ <!-- component Profile -->
	9	+ <field name="NLProfile_name" type="nederlab_string" required="true"
	10	+ multiValued="false" indexed="true" stored="true" />
	11	+
	12	+ <!-- component ResourceProxy -->
	13	+ <field name="ResourceProxy_resourceRef" type="nederlab_string"
	14	+ required="false" multiValued="true" indexed="true" stored="true" />
	15	+ <dynamicField name="ResourceProxy_resourceRef_mimeType_*"
	16	+ type="nederlab_string" required="false" multiValued="true" indexed="true"
	17	+ stored="true" />
	18	+
	19	+ <!-- component NLCore -->
	20	+ <field name="NLCore_NLIdentification_nederlabID" type="nederlab_uuid"
	21	+ required="true" multiValued="false" indexed="true" stored="true" />
	22	+ <field name="NLCore_NLIdentification_editorialCode" type="nederlab_string"
	23	+ required="false" multiValued="false" indexed="true" stored="true" />
	24	+ <field name="NLCore_NLIdentification_versionID" type="nederlab_string"
	25	+ required="true" multiValued="false" indexed="true" stored="true" />
	26	+ <field name="NLCore_NLIdentification_sourceRef" type="nederlab_string"
	27	+ required="false" multiValued="true" indexed="true" stored="true" />
	28	+ <field name="NLCore_NLIdentification_sourceUrl" type="nederlab_string"
	29	+ required="false" multiValued="true" indexed="true" stored="true" />
	30	+ <field name="NLCore_NLIdentification_sourceRefUrl_serialized"
	31	+ type="nederlab_string" required="false" multiValued="true" indexed="false"
	32	+ stored="true" />
	33	+ <field name="NLCore_NLAdministrative_ingestTime" type="nederlab_date"
	34	+ required="true" multiValued="false" indexed="true" stored="true" />
	35	+ <field name="NLCore_NLAdministrative_expirationTime" type="nederlab_date"
	36	+ required="false" multiValued="false" indexed="true" stored="true" />
	37	+ <field name="NLCore_NLAdministrative_lastEditedBy" type="nederlab_string"
	38	+ required="false" multiValued="false" indexed="true" stored="true" />
	39	+ <field name="NLCore_NLAdministrative_modificationTime" type="nederlab_date"
	40	+ required="false" multiValued="false" indexed="true" stored="true" />
	41	+ <field name="NLCore_NLAdministrative_editorialNote" type="nederlab_text"
	42	+ required="false" multiValued="true" indexed="true" stored="true" />
	43	+ <field name="NLCore_NLAdministrative_sourceCollection" type="nederlab_string"
	44	+ required="false" multiValued="false" indexed="true" stored="true" />
	45	+ <field name="NLCore_NLAdministrative_isThesaurusElement" type="nederlab_boolean"
	46	+ required="true" multiValued="false" indexed="true" stored="true" />
	47	+ <field name="NLCore_NLExternalReference_organizationName" type="nederlab_text"
	48	+ required="false" multiValued="true" indexed="true" stored="true" />
	49	+ <field name="NLCore_NLExternalReference_collectionName" type="nederlab_string"
	50	+ required="false" multiValued="true" indexed="true" stored="true" />
	51	+ <field name="NLCore_NLExternalReference_resourceRef" type="nederlab_string"
	52	+ required="false" multiValued="true" indexed="true" stored="true" />
	53	+ <field name="NLCore_NLExternalReference_serialized" type="nederlab_string"
	54	+ required="false" multiValued="true" indexed="false" stored="true" />
	55	+
	56	+ <!-- component NLTitle -->
	57	+ <field name="NLTitle_title" type="nederlab_text" required="false"
	58	+ multiValued="false" indexed="true" stored="true" />
	59	+ <field name="NLTitle_subtitle" type="nederlab_text" required="false"
	60	+ multiValued="false" indexed="true" stored="true" />
	61	+ <field name="NLTitle_genre" type="nederlab_string" required="false"
	62	+ multiValued="true" indexed="true" stored="true" />
	63	+ <field name="NLTitle_category" type="nederlab_string" required="false"
	64	+ multiValued="true" indexed="true" stored="true" />
	65	+ <field name="NLTitle_yearOfPublicationMin" type="nederlab_int"
	66	+ required="false" multiValued="false" indexed="true" stored="true" />
	67	+ <field name="NLTitle_yearOfPublicationMax" type="nederlab_int"
	68	+ required="false" multiValued="false" indexed="true" stored="true" />
	69	+ <field name="NLTitle_yearOfPublicationApprox" type="nederlab_boolean"
	70	+ required="false" multiValued="false" indexed="true" stored="true" />
	71	+ <field name="NLTitle_yearOfPublicationLabel" type="nederlab_text"
	72	+ required="false" multiValued="false" indexed="true" stored="true" />
	73	+ <field name="NLTitle_edition" type="nederlab_string" required="false"
	74	+ multiValued="false" indexed="true" stored="true" />
	75	+ <field name="NLTitle_inNederlabAs" type="nederlab_uuid" required="false"
	76	+ multiValued="false" indexed="true" stored="true" />
	77	+ <field name="NLTitle_NLPublicationPlace_placeOfPublication" type="nederlab_string"
	78	+ required="false" multiValued="true" indexed="true" stored="true" />
	79	+ <field name="NLTitle_NLPublicationPlace_placeID" type="nederlab_string"
	80	+ required="false" multiValued="true" indexed="true" stored="true" />
	81	+ <field name="NLTitle_NLPublicationPlace_placeOfPublicationOriginal"
	82	+ type="nederlab_text" required="false" multiValued="true" indexed="true"
	83	+ stored="true" />
	84	+ <field name="NLTitle_numberOfPages" type="nederlab_int" required="false"
	85	+ multiValued="false" indexed="true" stored="true" />
	86	+ <field name="NLTitle_numberOfWords" type="nederlab_int" required="false"
	87	+ multiValued="false" indexed="true" stored="true" />
	88	+ <field name="NLTitle_primaryLanguage" type="nederlab_string"
	89	+ required="false" multiValued="false" indexed="true" stored="true" />
	90	+ <field name="NLTitle_isTranslation" type="nederlab_boolean"
	91	+ required="false" multiValued="false" indexed="true" stored="true" />
	92	+ <field name="NLTitle_characterEncoding" type="nederlab_string"
	93	+ required="false" multiValued="false" indexed="true" stored="true" />
	94	+ <field name="NLTitle_codingStandard" type="nederlab_string"
	95	+ required="false" multiValued="true" indexed="true" stored="true" />
	96	+ <field name="NLTitle_textQuality" type="nederlab_text" required="false"
	97	+ multiValued="false" indexed="true" stored="true" />
	98	+ <field name="NLTitle_processingMethod" type="nederlab_text"
	99	+ required="false" multiValued="false" indexed="true" stored="true" />
	100	+ <field name="NLTitle_autopsyPerformed" type="nederlab_boolean"
	101	+ required="false" multiValued="false" indexed="true" stored="true" />
	102	+ <field name="NLTitle_NLPersonRef_personID" type="nederlab_uuid"
	103	+ required="false" multiValued="true" indexed="true" stored="true" />
	104	+ <field name="NLTitle_NLPersonRef_role" type="nederlab_string"
	105	+ required="false" multiValued="true" indexed="true" stored="true" />
	106	+ <dynamicField name="NLTitle_NLPersonRef_personID_role_*"
	107	+ type="nederlab_uuid" required="false" multiValued="true" indexed="true"
	108	+ stored="true" />
	109	+ <field name="NLTitle_contains" type="nederlab_uuid" required="false"
	110	+ multiValued="true" indexed="true" stored="true" />
	111	+ <field name="NLTitle_seriesTitleID" type="nederlab_uuid"
	112	+ required="false" multiValued="true" indexed="true" stored="true" />
	113	+ <field name="NLTitle_seriesTitleID_parent" type="nederlab_uuid"
	114	+ required="false" multiValued="false" indexed="true" stored="true" />
	115	+ <field name="NLTitle_seriesTitleID_root" type="nederlab_uuid"
	116	+ required="false" multiValued="false" indexed="true" stored="true" />
	117	+
	118	+ <!-- component NLDependentTitle -->
	119	+ <field name="NLDependentTitle_title" type="nederlab_text"
	120	+ required="false" multiValued="false" indexed="true" stored="true" />
	121	+ <field name="NLDependentTitle_subtitle" type="nederlab_text"
	122	+ required="false" multiValued="false" indexed="true" stored="true" />
	123	+ <field name="NLDependentTitle_primaryLanguage" type="nederlab_string"
	124	+ required="false" multiValued="false" indexed="true" stored="true" />
	125	+ <field name="NLDependentTitle_parentTitleID" type="nederlab_uuid"
	126	+ required="false" multiValued="false" indexed="true" stored="true" />
	127	+ <field name="NLDependentTitle_inNederlabAs" type="nederlab_uuid"
	128	+ required="false" multiValued="false" indexed="true" stored="true" />
	129	+ <field name="NLDependentTitle_NLPersonRef_personID" type="nederlab_uuid"
	130	+ required="false" multiValued="true" indexed="true" stored="true" />
	131	+ <field name="NLDependentTitle_NLPersonRef_role" type="nederlab_string"
	132	+ required="false" multiValued="true" indexed="true" stored="true" />
	133	+ <dynamicField name="NLDependentTitle_NLPersonRef_personID_role_*"
	134	+ type="nederlab_uuid" required="false" multiValued="true" indexed="true"
	135	+ stored="true" />
	136	+ <field name="NLDependentTitle_startPage" type="nederlab_int"
	137	+ required="false" multiValued="false" indexed="true" stored="true" />
	138	+ <field name="NLDependentTitle_endPage" type="nederlab_int"
	139	+ required="false" multiValued="false" indexed="true" stored="true" />
	140	+
	141	+ <!-- component NLPerson -->
	142	+ <field name="NLPerson_NLPersonName_nameId" type="nederlab_uuid"
	143	+ required="false" multiValued="true" indexed="true" stored="true" />
	144	+ <field name="NLPerson_NLPersonName_lastName" type="nederlab_text"
	145	+ required="false" multiValued="true" indexed="true" stored="true" />
	146	+ <field name="NLPerson_NLPersonName_firstName" type="nederlab_text"
	147	+ required="false" multiValued="true" indexed="true" stored="true" />
	148	+ <field name="NLPerson_NLPersonName_infixes" type="nederlab_text"
	149	+ required="false" multiValued="true" indexed="true" stored="true" />
	150	+ <field name="NLPerson_NLPersonName_firstNameFull" type="nederlab_text"
	151	+ required="false" multiValued="true" indexed="true" stored="true" />
	152	+ <field name="NLPerson_NLPersonName_fullName" type="nederlab_text"
	153	+ required="false" multiValued="true" indexed="true" stored="true" />
	154	+ <field name="NLPerson_NLPersonName_fullName_serialized" type="nederlab_string"
	155	+ required="false" multiValued="true" indexed="false" stored="true" />
	156	+ <field name="NLPerson_NLPersonName_preferredNameID" type="nederlab_uuid"
	157	+ required="false" multiValued="false" indexed="true" stored="true" />
	158	+ <field name="NLPerson_NLPersonName_preferredLastName" type="nederlab_string"
	159	+ required="false" multiValued="false" indexed="true" stored="true" />
	160	+ <field name="NLPerson_NLPersonName_preferredFirstName" type="nederlab_string"
	161	+ required="false" multiValued="false" indexed="true" stored="true" />
	162	+ <field name="NLPerson_NLPersonName_preferredFirstNameFull" type="nederlab_string"
	163	+ required="false" multiValued="false" indexed="true" stored="true" />
	164	+ <field name="NLPerson_NLPersonName_preferredInfixes" type="nederlab_string"
	165	+ required="false" multiValued="false" indexed="true" stored="true" />
	166	+ <field name="NLPerson_NLPersonName_preferredFullName" type="nederlab_text"
	167	+ required="false" multiValued="false" indexed="true" stored="true" />
	168	+ <field name="NLPerson_NLPersonName_preferredFullName_serialized"
	169	+ type="nederlab_string" required="false" multiValued="false" indexed="false"
	170	+ stored="true" />
	171	+ <field name="NLPerson_dateOfBirthDayMonth" type="nederlab_text"
	172	+ required="false" multiValued="false" indexed="true" stored="true" />
	173	+ <field name="NLPerson_dateOfBirthMonth" type="nederlab_int"
	174	+ required="false" multiValued="false" indexed="true" stored="true" />
	175	+ <field name="NLPerson_dateOfBirthDay" type="nederlab_int"
	176	+ required="false" multiValued="false" indexed="true" stored="true" />
	177	+ <field name="NLPerson_yearOfBirthMin" type="nederlab_int"
	178	+ required="false" multiValued="false" indexed="true" stored="true" />
	179	+ <field name="NLPerson_yearOfBirthMax" type="nederlab_int"
	180	+ required="false" multiValued="false" indexed="true" stored="true" />
	181	+ <field name="NLPerson_yearOfBirthApprox" type="nederlab_boolean"
	182	+ required="false" multiValued="false" indexed="true" stored="true" />
	183	+ <field name="NLPerson_yearOfBirthLabel" type="nederlab_text"
	184	+ required="false" multiValued="false" indexed="true" stored="true" />
	185	+ <field name="NLPerson_placeOfBirth" type="nederlab_string"
	186	+ required="false" multiValued="false" indexed="true" stored="true" />
	187	+ <field name="NLPerson_placeOfBirthID" type="nederlab_string"
	188	+ required="false" multiValued="false" indexed="true" stored="true" />
	189	+ <field name="NLPerson_dateOfDeathDayMonth" type="nederlab_text"
	190	+ required="false" multiValued="false" indexed="true" stored="true" />
	191	+ <field name="NLPerson_dateOfDeathMonth" type="nederlab_int"
	192	+ required="false" multiValued="false" indexed="true" stored="true" />
	193	+ <field name="NLPerson_dateOfDeathDay" type="nederlab_int"
	194	+ required="false" multiValued="false" indexed="true" stored="true" />
	195	+ <field name="NLPerson_yearOfDeathMin" type="nederlab_int"
	196	+ required="false" multiValued="false" indexed="true" stored="true" />
	197	+ <field name="NLPerson_yearOfDeathMax" type="nederlab_int"
	198	+ required="false" multiValued="false" indexed="true" stored="true" />
	199	+ <field name="NLPerson_yearOfDeathApprox" type="nederlab_boolean"
	200	+ required="false" multiValued="false" indexed="true" stored="true" />
	201	+ <field name="NLPerson_yearOfDeathLabel" type="nederlab_text"
	202	+ required="false" multiValued="false" indexed="true" stored="true" />
	203	+ <field name="NLPerson_placeOfDeath" type="nederlab_string"
	204	+ required="false" multiValued="false" indexed="true" stored="true" />
	205	+ <field name="NLPerson_placeOfDeathID" type="nederlab_string"
	206	+ required="false" multiValued="false" indexed="true" stored="true" />
	207	+ <field name="NLPerson_gender" type="nederlab_string" required="false"
	208	+ multiValued="false" indexed="true" stored="true" />
	209	+ <field name="NLPerson_profession" type="nederlab_string"
	210	+ required="false" multiValued="true" indexed="true" stored="true" />
	211	+ <field name="NLPerson_education" type="nederlab_string" required="false"
	212	+ multiValued="true" indexed="true" stored="true" />
	213	+ <field name="NLPerson_inThesaurusAs" type="nederlab_uuid"
	214	+ required="false" multiValued="false" indexed="true" stored="true" />
	215	+
	216	+ <!-- component NLSeriesTitle -->
	217	+ <field name="NLSeriesTitle_title" type="nederlab_text" required="false"
	218	+ multiValued="false" indexed="true" stored="true" />
	219	+ <field name="NLSeriesTitle_years" type="nederlab_text" required="false"
	220	+ multiValued="false" indexed="true" stored="true" />
	221	+ <field name="NLSeriesTitle_description" type="nederlab_text"
	222	+ required="false" multiValued="false" indexed="true" stored="true" />
	223	+ <field name="NLSeriesTitle_inNederlabAs" type="nederlab_uuid"
	224	+ required="false" multiValued="false" indexed="true" stored="true" />
	225	+ <field name="NLSeriesTitle_seriesTitleID" type="nederlab_uuid"
	226	+ required="false" multiValued="true" indexed="true" stored="true" />
	227	+ <field name="NLSeriesTitle_seriesTitleID_parent" type="nederlab_uuid"
	228	+ required="false" multiValued="false" indexed="true" stored="true" />
	229	+ <field name="NLSeriesTitle_seriesTitleID_root" type="nederlab_uuid"
	230	+ required="false" multiValued="false" indexed="true" stored="true" />
	231	+
	232	+ <!-- component NLCollectionSpecific -->
	233	+ <dynamicField name="NLCollectionSpecific_*" type="nederlab_string"
	234	+ required="false" multiValued="true" indexed="true" stored="true" />
	235	+
	236	+ <!-- component NLContent -->
	237	+ <field name="NLContent_folia_available" type="nederlab_boolean"
	238	+ required="false" multiValued="false" indexed="true" stored="true" />
	239	+ <field name="NLContent_text_available" type="nederlab_boolean"
	240	+ required="false" multiValued="false" indexed="true" stored="true" />
	241	+ <field name="NLContent_text" type="nederlab_content" required="false"
	242	+ multiValued="false" indexed="true" stored="true" termVectors="true"
	243	+ termPositions="true" termOffsets="true" />
	244	+ <field name="NLContent_text_lowercase" type="nederlab_content_lowercase"
	245	+ required="false" multiValued="false" indexed="true" stored="true"
	246	+ termVectors="true" termPositions="true" termOffsets="true" />
	247	+ <copyField source="NLContent_text" dest="NLContent_text_lowercase" />
	248	+ <field name="NLContent_ticcl_available" type="nederlab_boolean"
	249	+ required="false" multiValued="false" indexed="true" stored="true" />
	250	+ <field name="NLContent_ticcl_lowercase" type="nederlab_content_lowercase"
	251	+ required="false" multiValued="false" indexed="true" stored="true"
	252	+ termVectors="true" termPositions="true" termOffsets="true" />
	253	+ <field name="NLContent_mtas" type="mtas_text" indexed="true"
	254	+ stored="true" />
	255	+ <field name="NLContent_mtas_error" type="nederlab_string"
	256	+ indexed="true" stored="true" />
	257	+ <field name="NLContent_mtas_numberOfTokens" type="nederlab_int"
	258	+ indexed="true" stored="true" />
	259	+ <field name="NLContent_mtas_numberOfPositions" type="nederlab_int"
	260	+ indexed="true" stored="true" />
	261	+ <field name="NLContent_mtas_size" type="nederlab_int" indexed="true"
	262	+ stored="true" />
	263	+ <!-- Combined Field Metadata -->
	264	+ <field name="NLMetadata" type="nederlab_text" required="false"
	265	+ multiValued="true" indexed="true" stored="false" />
	266	+ <copyField source="NLCore_NLIdentification_nederlabID" dest="NLMetadata" />
	267	+ <copyField source="NLCore_NLIdentification_editorialCode"
	268	+ dest="NLMetadata" />
	269	+ <copyField source="NLCore_NLIdentification_sourceRef" dest="NLMetadata" />
	270	+ <copyField source="NLCore_NLAdministrative_editorialNote"
	271	+ dest="NLMetadata" />
	272	+ <copyField source="NLCore_NLAdministrative_sourceCollection"
	273	+ dest="NLMetadata" />
	274	+ <copyField source="NLCore_NLExternalReference_organizationName"
	275	+ dest="NLMetadata" />
	276	+ <copyField source="NLCore_NLExternalReference_collectionName"
	277	+ dest="NLMetadata" />
	278	+ <copyField source="NLCore_NLExternalReference_resourceRef"
	279	+ dest="NLMetadata" />
	280	+ <copyField source="NLTitle_title" dest="NLMetadata" />
	281	+ <copyField source="NLTitle_subtitle" dest="NLMetadata" />
	282	+ <copyField source="NLTitle_genre" dest="NLMetadata" />
	283	+ <copyField source="NLTitle_category" dest="NLMetadata" />
	284	+ <copyField source="NLTitle_yearOfPublicationMin" dest="NLMetadata" />
	285	+ <copyField source="NLTitle_yearOfPublicationMax" dest="NLMetadata" />
	286	+ <copyField source="NLTitle_yearOfPublicationLabel" dest="NLMetadata" />
	287	+ <copyField source="NLTitle_edition" dest="NLMetadata" />
	288	+ <copyField source="NLTitle_NLPublicationPlace_placeOfPublication"
	289	+ dest="NLMetadata" />
	290	+ <copyField source="NLTitle_NLPublicationPlace_placeID" dest="NLMetadata" />
	291	+ <copyField source="NLTitle_NLPublicationPlace_placeOfPublicationOriginal"
	292	+ dest="NLMetadata" />
	293	+ <copyField source="NLTitle_primaryLanguage" dest="NLMetadata" />
	294	+ <copyField source="NLTitle_characterEncoding" dest="NLMetadata" />
	295	+ <copyField source="NLTitle_codingStandard" dest="NLMetadata" />
	296	+ <copyField source="NLTitle_textQuality" dest="NLMetadata" />
	297	+ <copyField source="NLTitle_processingMethod" dest="NLMetadata" />
	298	+ <copyField source="NLTitle_NLPersonRef_role" dest="NLMetadata" />
	299	+ <copyField source="NLDependentTitle_title" dest="NLMetadata" />
	300	+ <copyField source="NLDependentTitle_subtitle" dest="NLMetadata" />
	301	+ <copyField source="NLDependentTitle_primaryLanguage" dest="NLMetadata" />
	302	+ <copyField source="NLDependentTitle_NLPersonRef_role" dest="NLMetadata" />
	303	+ <copyField source="NLPerson_NLPersonName_lastName" dest="NLMetadata" />
	304	+ <copyField source="NLPerson_NLPersonName_firstName" dest="NLMetadata" />
	305	+ <copyField source="NLPerson_NLPersonName_infixes" dest="NLMetadata" />
	306	+ <copyField source="NLPerson_NLPersonName_firstNameFull" dest="NLMetadata" />
	307	+ <copyField source="NLPerson_NLPersonName_fullName" dest="NLMetadata" />
	308	+ <copyField source="NLPerson_dateOfBirthDayMonth" dest="NLMetadata" />
	309	+ <copyField source="NLPerson_yearOfBirthMin" dest="NLMetadata" />
	310	+ <copyField source="NLPerson_yearOfBirthMax" dest="NLMetadata" />
	311	+ <copyField source="NLPerson_yearOfBirthLabel" dest="NLMetadata" />
	312	+ <copyField source="NLPerson_placeOfBirth" dest="NLMetadata" />
	313	+ <copyField source="NLPerson_placeOfBirthID" dest="NLMetadata" />
	314	+ <copyField source="NLPerson_dateOfDeathDayMonth" dest="NLMetadata" />
	315	+ <copyField source="NLPerson_yearOfDeathMin" dest="NLMetadata" />
	316	+ <copyField source="NLPerson_yearOfDeathMax" dest="NLMetadata" />
	317	+ <copyField source="NLPerson_yearOfDeathLabel" dest="NLMetadata" />
	318	+ <copyField source="NLPerson_placeOfDeath" dest="NLMetadata" />
	319	+ <copyField source="NLPerson_placeOfDeathID" dest="NLMetadata" />
	320	+ <copyField source="NLPerson_gender" dest="NLMetadata" />
	321	+ <copyField source="NLPerson_profession" dest="NLMetadata" />
	322	+ <copyField source="NLPerson_education" dest="NLMetadata" />
	323	+ <copyField source="NLSeriesTitle_title" dest="NLMetadata" />
	324	+ <copyField source="NLSeriesTitle_years" dest="NLMetadata" />
	325	+ <copyField source="NLSeriesTitle_description" dest="NLMetadata" />
	326	+ <copyField source="NLCollectionSpecific_*" dest="NLMetadata" />
	327	+
	328	+ <uniqueKey>NLCore_NLIdentification_versionID</uniqueKey>
	329	+
	330	+ <fieldType name="nederlab_string" class="solr.StrField"
	331	+ sortMissingLast="true" />
	332	+ <fieldType name="nederlab_uuid" class="solr.StrField"
	333	+ sortMissingLast="true" />
	334	+ <fieldType name="nederlab_boolean" class="solr.BoolField"
	335	+ sortMissingLast="true" />
	336	+ <fieldType name="nederlab_int" class="solr.TrieIntField"
	337	+ precisionStep="8" positionIncrementGap="0" />
	338	+ <fieldType name="nederlab_long" class="solr.TrieLongField"
	339	+ precisionStep="0" positionIncrementGap="0" />
	340	+ <fieldType name="nederlab_date" class="solr.TrieDateField"
	341	+ precisionStep="6" positionIncrementGap="0" />
	342	+ <fieldtype name="nederlab_binary" class="solr.BinaryField" />
	343	+
	344	+ <fieldType name="nederlab_text" class="solr.TextField"
	345	+ positionIncrementGap="100">
	346	+ <analyzer type="index">
	347	+ <tokenizer class="solr.StandardTokenizerFactory" />
	348	+ <filter class="solr.LowerCaseFilterFactory" />
	349	+ </analyzer>
	350	+ <analyzer type="query">
	351	+ <tokenizer class="solr.StandardTokenizerFactory" />
	352	+ <filter class="solr.LowerCaseFilterFactory" />
	353	+ </analyzer>
	354	+ </fieldType>
	355	+
	356	+ <fieldType name="nederlab_content" class="solr.TextField"
	357	+ positionIncrementGap="100">
	358	+ <analyzer type="index">
	359	+ <tokenizer class="solr.StandardTokenizerFactory" />
	360	+ </analyzer>
	361	+ <analyzer type="query">
	362	+ <tokenizer class="solr.StandardTokenizerFactory" />
	363	+ </analyzer>
	364	+ </fieldType>
	365	+
	366	+ <fieldType name="nederlab_content_lowercase" class="solr.TextField"
	367	+ positionIncrementGap="100">
	368	+ <analyzer type="index">
	369	+ <tokenizer class="solr.StandardTokenizerFactory" />
	370	+ <filter class="solr.LowerCaseFilterFactory" />
	371	+ </analyzer>
	372	+ <analyzer type="query">
	373	+ <tokenizer class="solr.StandardTokenizerFactory" />
	374	+ <filter class="solr.LowerCaseFilterFactory" />
	375	+ </analyzer>
	376	+ </fieldType>
	377	+
	378	+ <fieldType name="mtas_text_example_classical" class="solr.TextField"
	379	+ postingsFormat="MtasCodec">
	380	+ <analyzer type="index">
	381	+ <charFilter class="mtas.analysis.util.MtasCharFilterFactory"
	382	+ type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/" />
	383	+ <tokenizer class="mtas.analysis.util.MtasTokenizerFactory"
	384	+ configFile="folia/test.xml" />
	385	+ </analyzer>
	386	+ </fieldType>
	387	+
	388	+ <fieldType name="mtas_text_example_config" class="solr.TextField"
	389	+ postingsFormat="MtasCodec">
	390	+ <analyzer type="index">
	391	+ <charFilter class="mtas.analysis.util.MtasCharFilterFactory"
	392	+ config="mtas.xml" default="default" />
	393	+ <tokenizer class="mtas.analysis.util.MtasTokenizerFactory"
	394	+ config="mtas.xml" default="default" />
	395	+ </analyzer>
	396	+ </fieldType>
	397	+
	398	+ <fieldType name="mtas_text" class="mtas.solr.schema.MtasPreAnalyzedField"
	399	+ followIndexAnalyzer="mtas_text_example_config" defaultConfiguration="default"
	400	+ configurationFromField="NLCore_NLAdministrative_sourceCollection" setNumberOfTokens="NLContent_mtas_numberOfTokens"
	401	+ setNumberOfPositions="NLContent_mtas_numberOfPositions" setSize="NLContent_mtas_size"
	402	+ setError="NLContent_mtas_error" postingsFormat="MtasCodec">
	403	+ <analyzer type="query">
	404	+ <tokenizer class="solr.WhitespaceTokenizerFactory" />
	405	+ <filter class="mtas.analysis.util.MtasPrefixTokenFilterFactory"
	406	+ prefix="t" />
	407	+ </analyzer>
	408	+ </fieldType>
	409	+
	410	+</schema>
...	...

conf/solr/schemaTest.xml 0 → 100644

View file @0237256

	1	+++ a/conf/solr/schemaTest.xml
	1	+<?xml version="1.0" encoding="UTF-8" ?>
	2	+<!--
	3	+ Licensed to the Apache Software Foundation (ASF) under one or more
	4	+ contributor license agreements. See the NOTICE file distributed with
	5	+ this work for additional information regarding copyright ownership.
	6	+ The ASF licenses this file to You under the Apache License, Version 2.0
	7	+ (the "License"); you may not use this file except in compliance with
	8	+ the License. You may obtain a copy of the License at
	9	+
	10	+ http://www.apache.org/licenses/LICENSE-2.0
	11	+
	12	+ Unless required by applicable law or agreed to in writing, software
	13	+ distributed under the License is distributed on an "AS IS" BASIS,
	14	+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	15	+ See the License for the specific language governing permissions and
	16	+ limitations under the License.
	17	+-->
	18	+
	19	+<!--
	20	+ This is the Solr schema file. This file should be named "schema.xml" and
	21	+ should be in the conf directory under the solr home
	22	+ (i.e. ./solr/conf/schema.xml by default)
	23	+ or located where the classloader for the Solr webapp can find it.
	24	+
	25	+ This example schema is the recommended starting point for users.
	26	+ It should be kept correct and concise, usable out-of-the-box.
	27	+
	28	+ For more information, on how to customize this file, please see
	29	+ http://wiki.apache.org/solr/SchemaXml
	30	+
	31	+ PERFORMANCE NOTE: this schema includes many optional features and should not
	32	+ be used for benchmarking. To improve performance one could
	33	+ - set stored="false" for all fields possible (esp large fields) when you
	34	+ only need to search on the field but don't need to return the original
	35	+ value.
	36	+ - set indexed="false" if you don't need to search on the field, but only
	37	+ return the field as a result of searching on other indexed fields.
	38	+ - remove all unneeded copyField statements
	39	+ - for best index size and searching performance, set "index" to false
	40	+ for all general text fields, use copyField to copy them to the
	41	+ catchall "text" field, and use that for searching.
	42	+ - For maximum indexing performance, use the ConcurrentUpdateSolrServer
	43	+ java client.
	44	+ - Remember to run the JVM in server mode, and use a higher logging level
	45	+ that avoids logging every request
	46	+-->
	47	+
	48	+<schema name="example-data-driven-schema" version="1.5">
	49	+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
	50	+ version="x.y" is Solr's version number for the schema syntax and
	51	+ semantics. It should not normally be changed by applications.
	52	+
	53	+ 1.0: multiValued attribute did not exist, all fields are multiValued
	54	+ by nature
	55	+ 1.1: multiValued attribute introduced, false by default
	56	+ 1.2: omitTermFreqAndPositions attribute introduced, true by default
	57	+ except for text fields.
	58	+ 1.3: removed optional field compress feature
	59	+ 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
	60	+ behavior when a single string produces multiple tokens. Defaults
	61	+ to off for version >= 1.4
	62	+ 1.5: omitNorms defaults to true for primitive field types
	63	+ (int, float, boolean, string...)
	64	+ -->
	65	+
	66	+ <!-- Valid attributes for fields:
	67	+ name: mandatory - the name for the field
	68	+ type: mandatory - the name of a field type from the
	69	+ <types> fieldType section
	70	+ indexed: true if this field should be indexed (searchable or sortable)
	71	+ stored: true if this field should be retrievable
	72	+ docValues: true if this field should have doc values. Doc values are
	73	+ useful for faceting, grouping, sorting and function queries. Although not
	74	+ required, doc values will make the index faster to load, more
	75	+ NRT-friendly and more memory-efficient. They however come with some
	76	+ limitations: they are currently only supported by StrField, UUIDField
	77	+ and all Trie*Fields, and depending on the field type, they might
	78	+ require the field to be single-valued, be required or have a default
	79	+ value (check the documentation of the field type you're interested in
	80	+ for more information)
	81	+ multiValued: true if this field may contain multiple values per document
	82	+ omitNorms: (expert) set to true to omit the norms associated with
	83	+ this field (this disables length normalization and index-time
	84	+ boosting for the field, and saves some memory). Only full-text
	85	+ fields or fields that need an index-time boost need norms.
	86	+ Norms are omitted for primitive (non-analyzed) types by default.
	87	+ termVectors: [false] set to true to store the term vector for a
	88	+ given field.
	89	+ When using MoreLikeThis, fields used for similarity should be
	90	+ stored for best performance.
	91	+ termPositions: Store position information with the term vector.
	92	+ This will increase storage costs.
	93	+ termOffsets: Store offset information with the term vector. This
	94	+ will increase storage costs.
	95	+ required: The field is required. It will throw an error if the
	96	+ value does not exist
	97	+ default: a value that should be used if no value is specified
	98	+ when adding a document.
	99	+ -->
	100	+
	101	+ <!-- field names should consist of alphanumeric or underscore characters only and
	102	+ not start with a digit. This is not currently strictly enforced,
	103	+ but other field names will not have first class support from all components
	104	+ and back compatibility is not guaranteed. Names with both leading and
	105	+ trailing underscores (e.g. _version_) are reserved.
	106	+ -->
	107	+
	108	+ <!-- In this data_driven_schema_configs configset, only three fields are pre-declared:
	109	+ id, _version_, and _text_. All other fields will be type guessed and added via the
	110	+ "add-unknown-fields-to-the-schema" update request processor chain declared
	111	+ in solrconfig.xml.
	112	+
	113	+ Note that many dynamic fields are also defined - you can use them to specify a
	114	+ field's type via field naming conventions - see below.
	115	+
	116	+ WARNING: The _text_ catch-all field will significantly increase your index size.
	117	+ If you don't need it, consider removing it and the corresponding copyField directive.
	118	+ -->
	119	+ <!
	120	+ <fieldType name="string_simpletext" class="solr.StrField" postingsFormat="SimpleText" />
	121	+ <field name="simple_string" type="string_simpletext" indexed="true" stored="true" required="false" multiValued="false" />
	122	+ -->
	123	+
	124	+ <fieldType name="mtas_text" class="solr.TextField" postingsFormat="MtasCodec">
	125	+ <analyzer type="index">
	126	+ <charFilter class="mtas.analysis.util.MtasCharFilterFactory" type="url" prefix="https://openskos.meertens.knaw.nl/nederlab/archief/get/"/>
	127	+ <tokenizer class="mtas.analysis.util.MtasTokenizerFactory" config="mtas.xml"/>
	128	+ </analyzer>
	129	+ <analyzer type="query">
	130	+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
	131	+ </analyzer>
	132	+ </fieldType>
	133	+ <field name="text" type="mtas_text" indexed="true" stored="true" />
	134	+
	135	+ <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
	136	+ <field name="_version_" type="long" indexed="true" stored="true"/>
	137	+ <field name="_text_" type="text_general" indexed="true" stored="false" multiValued="true"/>
	138	+ <copyField source="*" dest="_text_"/>
	139	+
	140	+
	141	+ <!-- Dynamic field definitions allow using convention over configuration
	142	+ for fields via the specification of patterns to match field names.
	143	+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
	144	+ RESTRICTION: the glob-like pattern in the name attribute must have
	145	+ a "*" only at the start or the end. -->
	146	+
	147	+ <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
	148	+ <dynamicField name="*_is" type="ints" indexed="true" stored="true"/>
	149	+ <dynamicField name="*_s" type="string" indexed="true" stored="true" />
	150	+ <dynamicField name="*_ss" type="strings" indexed="true" stored="true"/>
	151	+ <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
	152	+ <dynamicField name="*_ls" type="longs" indexed="true" stored="true"/>
	153	+ <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
	154	+ <dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/>
	155	+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
	156	+ <dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/>
	157	+ <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
	158	+ <dynamicField name="*_fs" type="floats" indexed="true" stored="true"/>
	159	+ <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
	160	+ <dynamicField name="*_ds" type="doubles" indexed="true" stored="true"/>
	161	+
	162	+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
	163	+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
	164	+
	165	+ <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
	166	+ <dynamicField name="*_dts" type="date" indexed="true" stored="true" multiValued="true"/>
	167	+ <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
	168	+ <dynamicField name="*_srpt" type="location_rpt" indexed="true" stored="true"/>
	169	+
	170	+ <!-- some trie-coded dynamic fields for faster range queries -->
	171	+ <dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
	172	+ <dynamicField name="*_tis" type="tints" indexed="true" stored="true"/>
	173	+ <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
	174	+ <dynamicField name="*_tls" type="tlongs" indexed="true" stored="true"/>
	175	+ <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
	176	+ <dynamicField name="*_tfs" type="tfloats" indexed="true" stored="true"/>
	177	+ <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
	178	+ <dynamicField name="*_tds" type="tdoubles" indexed="true" stored="true"/>
	179	+ <dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
	180	+ <dynamicField name="*_tdts" type="tdates" indexed="true" stored="true"/>
	181	+
	182	+ <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
	183	+
	184	+ <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
	185	+ <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
	186	+
	187	+ <dynamicField name="random_*" type="random" />
	188	+
	189	+ <!-- uncomment the following to ignore any fields that don't already match an existing
	190	+ field name or dynamic field, rather than reporting them as an error.
	191	+ alternately, change the type="ignored" to some other type e.g. "text" if you want
	192	+ unknown fields indexed and/or stored by default
	193	+
	194	+ NB: use of "*" dynamic fields will disable field type guessing and adding
	195	+ unknown fields to the schema. -->
	196	+ <!--dynamicField name="*" type="ignored" multiValued="true" /-->
	197	+
	198	+
	199	+
	200	+ <!-- Field to use to determine and enforce document uniqueness.
	201	+ Unless this field is marked with required="false", it will be a required field
	202	+ -->
	203	+ <uniqueKey>id</uniqueKey>
	204	+
	205	+ <!-- copyField commands copy one field to another at the time a document
	206	+ is added to the index. It's used either to index the same field differently,
	207	+ or to add multiple fields to the same field for easier/faster searching.
	208	+
	209	+ <copyField source="cat" dest="text"/>
	210	+ <copyField source="name" dest="text"/>
	211	+ <copyField source="manu" dest="text"/>
	212	+ <copyField source="features" dest="text"/>
	213	+ <copyField source="includes" dest="text"/>
	214	+ <copyField source="manu" dest="manu_exact"/>
	215	+ -->
	216	+
	217	+ <!-- Copy the price into a currency enabled field (default USD)
	218	+ <copyField source="price" dest="price_c"/>
	219	+ -->
	220	+
	221	+ <!-- Text fields from SolrCell to search by default in our catch-all field
	222	+ <copyField source="title" dest="text"/>
	223	+ <copyField source="author" dest="text"/>
	224	+ <copyField source="description" dest="text"/>
	225	+ <copyField source="keywords" dest="text"/>
	226	+ <copyField source="content" dest="text"/>
	227	+ <copyField source="content_type" dest="text"/>
	228	+ <copyField source="resourcename" dest="text"/>
	229	+ <copyField source="url" dest="text"/>
	230	+ -->
	231	+
	232	+ <!-- Create a string version of author for faceting
	233	+ <copyField source="author" dest="author_s"/>
	234	+ -->
	235	+
	236	+ <!-- Above, multiple source fields are copied to the [text] field.
	237	+ Another way to map multiple source fields to the same
	238	+ destination field is to use the dynamic field syntax.
	239	+ copyField also supports a maxChars to copy setting. -->
	240	+
	241	+ <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
	242	+
	243	+ <!-- copy name to alphaNameSort, a field designed for sorting by name -->
	244	+ <!-- <copyField source="name" dest="alphaNameSort"/> -->
	245	+
	246	+
	247	+ <!-- field type definitions. The "name" attribute is
	248	+ just a label to be used by field definitions. The "class"
	249	+ attribute and any other attributes determine the real
	250	+ behavior of the fieldType.
	251	+ Class names starting with "solr" refer to java classes in a
	252	+ standard package such as org.apache.solr.analysis
	253	+ -->
	254	+
	255	+ <!-- The StrField type is not analyzed, but indexed/stored verbatim.
	256	+ It supports doc values but in that case the field needs to be
	257	+ single-valued and either required or have a default value.
	258	+ -->
	259	+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
	260	+ <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true"/>
	261	+
	262	+ <!-- boolean type: "true" or "false" -->
	263	+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
	264	+
	265	+ <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
	266	+
	267	+ <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
	268	+ currently supported on types that are sorted internally as strings
	269	+ and on numeric types.
	270	+ This includes "string","boolean", and, as of 3.5 (and 4.x),
	271	+ int, float, long, date, double, including the "Trie" variants.
	272	+ - If sortMissingLast="true", then a sort on this field will cause documents
	273	+ without the field to come after documents with the field,
	274	+ regardless of the requested sort order (asc or desc).
	275	+ - If sortMissingFirst="true", then a sort on this field will cause documents
	276	+ without the field to come before documents with the field,
	277	+ regardless of the requested sort order.
	278	+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
	279	+ then default lucene sorting will be used which places docs without the
	280	+ field first in an ascending sort and last in a descending sort.
	281	+ -->
	282	+
	283	+ <!--
	284	+ Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
	285	+
	286	+ These fields support doc values, but they require the field to be
	287	+ single-valued and either be required or have a default value.
	288	+ -->
	289	+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
	290	+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
	291	+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
	292	+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
	293	+
	294	+ <fieldType name="ints" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
	295	+ <fieldType name="floats" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
	296	+ <fieldType name="longs" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
	297	+ <fieldType name="doubles" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
	298	+
	299	+ <!--
	300	+ Numeric field types that index each value at various levels of precision
	301	+ to accelerate range queries when the number of values between the range
	302	+ endpoints is large. See the javadoc for NumericRangeQuery for internal
	303	+ implementation details.
	304	+
	305	+ Smaller precisionStep values (specified in bits) will lead to more tokens
	306	+ indexed per value, slightly larger index size, and faster range queries.
	307	+ A precisionStep of 0 disables indexing at different precision levels.
	308	+ -->
	309	+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
	310	+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
	311	+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
	312	+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
	313	+
	314	+ <fieldType name="tints" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
	315	+ <fieldType name="tfloats" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
	316	+ <fieldType name="tlongs" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
	317	+ <fieldType name="tdoubles" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
	318	+
	319	+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
	320	+ is a more restricted form of the canonical representation of dateTime
	321	+ http://www.w3.org/TR/xmlschema-2/#dateTime
	322	+ The trailing "Z" designates UTC time and is mandatory.
	323	+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
	324	+ All other components are mandatory.
	325	+
	326	+ Expressions can also be used to denote calculations that should be
	327	+ performed relative to "NOW" to determine the value, ie...
	328	+
	329	+ NOW/HOUR
	330	+ ... Round to the start of the current hour
	331	+ NOW-1DAY
	332	+ ... Exactly 1 day prior to now
	333	+ NOW/DAY+6MONTHS+3DAYS
	334	+ ... 6 months and 3 days in the future from the start of
	335	+ the current day
	336	+
	337	+ Consult the TrieDateField javadocs for more information.
	338	+
	339	+ Note: For faster range queries, consider the tdate type
	340	+ -->
	341	+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
	342	+ <fieldType name="dates" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0" multiValued="true"/>
	343	+
	344	+ <!-- A Trie based date field for faster date range queries and date faceting. -->
	345	+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
	346	+
	347	+ <fieldType name="tdates" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0" multiValued="true"/>
	348	+
	349	+
	350	+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
	351	+ <fieldType name="binary" class="solr.BinaryField"/>
	352	+
	353	+ <!-- The "RandomSortField" is not used to store or search any
	354	+ data. You can declare fields of this type it in your schema
	355	+ to generate pseudo-random orderings of your docs for sorting
	356	+ or function purposes. The ordering is generated based on the field
	357	+ name and the version of the index. As long as the index version
	358	+ remains unchanged, and the same field name is reused,
	359	+ the ordering of the docs will be consistent.
	360	+ If you want different psuedo-random orderings of documents,
	361	+ for the same version of the index, use a dynamicField and
	362	+ change the field name in the request.
	363	+ -->
	364	+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
	365	+
	366	+ <!-- solr.TextField allows the specification of custom text analyzers
	367	+ specified as a tokenizer and a list of token filters. Different
	368	+ analyzers may be specified for indexing and querying.
	369	+
	370	+ The optional positionIncrementGap puts space between multiple fields of
	371	+ this type on the same document, with the purpose of preventing false phrase
	372	+ matching across fields.
	373	+
	374	+ For more info on customizing your analyzer chain, please see
	375	+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
	376	+ -->
	377	+
	378	+ <!-- One can also specify an existing Analyzer class that has a
	379	+ default constructor via the class attribute on the analyzer element.
	380	+ Example:
	381	+ <fieldType name="text_greek" class="solr.TextField">
	382	+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
	383	+ </fieldType>
	384	+ -->
	385	+
	386	+ <!-- A text field that only splits on whitespace for exact matching of words -->
	387	+ <dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/>
	388	+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
	389	+ <analyzer>
	390	+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
	391	+ </analyzer>
	392	+ </fieldType>
	393	+
	394	+ <!-- A general text field that has reasonable, generic
	395	+ cross-language defaults: it tokenizes with StandardTokenizer,
	396	+ removes stop words from case-insensitive "stopwords.txt"
	397	+ (empty by default), and down cases. At query time only, it
	398	+ also applies synonyms. -->
	399	+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
	400	+ <analyzer type="index">
	401	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	402	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
	403	+ <!-- in this example, we will only use synonyms at query time
	404	+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
	405	+ -->
	406	+ <filter class="solr.LowerCaseFilterFactory"/>
	407	+ </analyzer>
	408	+ <analyzer type="query">
	409	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	410	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
	411	+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
	412	+ <filter class="solr.LowerCaseFilterFactory"/>
	413	+ </analyzer>
	414	+ </fieldType>
	415	+
	416	+ <!-- A text field with defaults appropriate for English: it
	417	+ tokenizes with StandardTokenizer, removes English stop words
	418	+ (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
	419	+ finally applies Porter's stemming. The query time analyzer
	420	+ also applies synonyms from synonyms.txt. -->
	421	+ <dynamicField name="*_txt_en" type="text_en" indexed="true" stored="true"/>
	422	+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
	423	+ <analyzer type="index">
	424	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	425	+ <!-- in this example, we will only use synonyms at query time
	426	+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
	427	+ -->
	428	+ <!-- Case insensitive stop word removal.
	429	+ -->
	430	+ <filter class="solr.StopFilterFactory"
	431	+ ignoreCase="true"
	432	+ words="lang/stopwords_en.txt"
	433	+ />
	434	+ <filter class="solr.LowerCaseFilterFactory"/>
	435	+ <filter class="solr.EnglishPossessiveFilterFactory"/>
	436	+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
	437	+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
	438	+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
	439	+ -->
	440	+ <filter class="solr.PorterStemFilterFactory"/>
	441	+ </analyzer>
	442	+ <analyzer type="query">
	443	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	444	+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
	445	+ <filter class="solr.StopFilterFactory"
	446	+ ignoreCase="true"
	447	+ words="lang/stopwords_en.txt"
	448	+ />
	449	+ <filter class="solr.LowerCaseFilterFactory"/>
	450	+ <filter class="solr.EnglishPossessiveFilterFactory"/>
	451	+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
	452	+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
	453	+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
	454	+ -->
	455	+ <filter class="solr.PorterStemFilterFactory"/>
	456	+ </analyzer>
	457	+ </fieldType>
	458	+
	459	+ <!-- A text field with defaults appropriate for English, plus
	460	+ aggressive word-splitting and autophrase features enabled.
	461	+ This field is just like text_en, except it adds
	462	+ WordDelimiterFilter to enable splitting and matching of
	463	+ words on case-change, alpha numeric boundaries, and
	464	+ non-alphanumeric chars. This means certain compound word
	465	+ cases will work, for example query "wi fi" will match
	466	+ document "WiFi" or "wi-fi".
	467	+ -->
	468	+ <dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/>
	469	+ <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
	470	+ <analyzer type="index">
	471	+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
	472	+ <!-- in this example, we will only use synonyms at query time
	473	+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
	474	+ -->
	475	+ <!-- Case insensitive stop word removal.
	476	+ -->
	477	+ <filter class="solr.StopFilterFactory"
	478	+ ignoreCase="true"
	479	+ words="lang/stopwords_en.txt"
	480	+ />
	481	+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
	482	+ <filter class="solr.LowerCaseFilterFactory"/>
	483	+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
	484	+ <filter class="solr.PorterStemFilterFactory"/>
	485	+ </analyzer>
	486	+ <analyzer type="query">
	487	+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
	488	+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
	489	+ <filter class="solr.StopFilterFactory"
	490	+ ignoreCase="true"
	491	+ words="lang/stopwords_en.txt"
	492	+ />
	493	+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
	494	+ <filter class="solr.LowerCaseFilterFactory"/>
	495	+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
	496	+ <filter class="solr.PorterStemFilterFactory"/>
	497	+ </analyzer>
	498	+ </fieldType>
	499	+
	500	+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
	501	+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
	502	+ <dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight" indexed="true" stored="true"/>
	503	+ <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
	504	+ <analyzer>
	505	+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
	506	+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
	507	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
	508	+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
	509	+ <filter class="solr.LowerCaseFilterFactory"/>
	510	+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
	511	+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
	512	+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
	513	+ possible with WordDelimiterFilter in conjuncton with stemming. -->
	514	+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
	515	+ </analyzer>
	516	+ </fieldType>
	517	+
	518	+ <!-- Just like text_general except it reverses the characters of
	519	+ each token, to enable more efficient leading wildcard queries. -->
	520	+ <dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/>
	521	+ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
	522	+ <analyzer type="index">
	523	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	524	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
	525	+ <filter class="solr.LowerCaseFilterFactory"/>
	526	+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
	527	+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
	528	+ </analyzer>
	529	+ <analyzer type="query">
	530	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	531	+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
	532	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
	533	+ <filter class="solr.LowerCaseFilterFactory"/>
	534	+ </analyzer>
	535	+ </fieldType>
	536	+
	537	+ <dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/>
	538	+ <fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
	539	+ <analyzer>
	540	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	541	+ <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
	542	+ </analyzer>
	543	+ </fieldType>
	544	+
	545	+ <!-- lowercases the entire field value, keeping it as a single token. -->
	546	+ <dynamicField name="*_s_lower" type="lowercase" indexed="true" stored="true"/>
	547	+ <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
	548	+ <analyzer>
	549	+ <tokenizer class="solr.KeywordTokenizerFactory"/>
	550	+ <filter class="solr.LowerCaseFilterFactory" />
	551	+ </analyzer>
	552	+ </fieldType>
	553	+
	554	+ <!--
	555	+ Example of using PathHierarchyTokenizerFactory at index time, so
	556	+ queries for paths match documents at that path, or in descendent paths
	557	+ -->
	558	+ <dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/>
	559	+ <fieldType name="descendent_path" class="solr.TextField">
	560	+ <analyzer type="index">
	561	+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
	562	+ </analyzer>
	563	+ <analyzer type="query">
	564	+ <tokenizer class="solr.KeywordTokenizerFactory" />
	565	+ </analyzer>
	566	+ </fieldType>
	567	+ <!--
	568	+ Example of using PathHierarchyTokenizerFactory at query time, so
	569	+ queries for paths match documents at that path, or in ancestor paths
	570	+ -->
	571	+ <dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/>
	572	+ <fieldType name="ancestor_path" class="solr.TextField">
	573	+ <analyzer type="index">
	574	+ <tokenizer class="solr.KeywordTokenizerFactory" />
	575	+ </analyzer>
	576	+ <analyzer type="query">
	577	+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
	578	+ </analyzer>
	579	+ </fieldType>
	580	+
	581	+ <!-- since fields of this type are by default not stored or indexed,
	582	+ any data added to them will be ignored outright. -->
	583	+ <fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
	584	+
	585	+ <!-- This point type indexes the coordinates as separate fields (subFields)
	586	+ If subFieldType is defined, it references a type, and a dynamic field
	587	+ definition is created matching *___<typename>. Alternately, if
	588	+ subFieldSuffix is defined, that is used to create the subFields.
	589	+ Example: if subFieldType="double", then the coordinates would be
	590	+ indexed in fields myloc_0___double,myloc_1___double.
	591	+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
	592	+ in fields myloc_0_d,myloc_1_d
	593	+ The subFields are an implementation detail of the fieldType, and end
	594	+ users normally should not need to know about them.
	595	+ -->
	596	+ <dynamicField name="*_point" type="point" indexed="true" stored="true"/>
	597	+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
	598	+
	599	+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
	600	+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
	601	+
	602	+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
	603	+ For more information about this and other Spatial fields new to Solr 4, see:
	604	+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
	605	+ -->
	606	+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
	607	+ geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />
	608	+
	609	+ <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
	610	+ Parameters:
	611	+ defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
	612	+ precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
	613	+ providerClass: Lets you plug in other exchange provider backend:
	614	+ solr.FileExchangeRateProvider is the default and takes one parameter:
	615	+ currencyConfig: name of an xml file holding exchange rates
	616	+ solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
	617	+ ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
	618	+ refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
	619	+ -->
	620	+ <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
	621	+
	622	+
	623	+
	624	+ <!-- some examples for different languages (generally ordered by ISO code) -->
	625	+
	626	+ <!-- Arabic -->
	627	+ <dynamicField name="*_txt_ar" type="text_ar" indexed="true" stored="true"/>
	628	+ <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
	629	+ <analyzer>
	630	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	631	+ <!-- for any non-arabic -->
	632	+ <filter class="solr.LowerCaseFilterFactory"/>
	633	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
	634	+ <!-- normalizes ﻯ to ﻱ, etc -->
	635	+ <filter class="solr.ArabicNormalizationFilterFactory"/>
	636	+ <filter class="solr.ArabicStemFilterFactory"/>
	637	+ </analyzer>
	638	+ </fieldType>
	639	+
	640	+ <!-- Bulgarian -->
	641	+ <dynamicField name="*_txt_bg" type="text_bg" indexed="true" stored="true"/>
	642	+ <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
	643	+ <analyzer>
	644	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	645	+ <filter class="solr.LowerCaseFilterFactory"/>
	646	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" />
	647	+ <filter class="solr.BulgarianStemFilterFactory"/>
	648	+ </analyzer>
	649	+ </fieldType>
	650	+
	651	+ <!-- Catalan -->
	652	+ <dynamicField name="*_txt_ca" type="text_ca" indexed="true" stored="true"/>
	653	+ <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
	654	+ <analyzer>
	655	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	656	+ <!-- removes l', etc -->
	657	+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
	658	+ <filter class="solr.LowerCaseFilterFactory"/>
	659	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
	660	+ <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
	661	+ </analyzer>
	662	+ </fieldType>
	663	+
	664	+ <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
	665	+ <dynamicField name="*_txt_cjk" type="text_cjk" indexed="true" stored="true"/>
	666	+ <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
	667	+ <analyzer>
	668	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	669	+ <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
	670	+ <filter class="solr.CJKWidthFilterFactory"/>
	671	+ <!-- for any non-CJK -->
	672	+ <filter class="solr.LowerCaseFilterFactory"/>
	673	+ <filter class="solr.CJKBigramFilterFactory"/>
	674	+ </analyzer>
	675	+ </fieldType>
	676	+
	677	+ <!-- Czech -->
	678	+ <dynamicField name="*_txt_cz" type="text_cz" indexed="true" stored="true"/>
	679	+ <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
	680	+ <analyzer>
	681	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	682	+ <filter class="solr.LowerCaseFilterFactory"/>
	683	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
	684	+ <filter class="solr.CzechStemFilterFactory"/>
	685	+ </analyzer>
	686	+ </fieldType>
	687	+
	688	+ <!-- Danish -->
	689	+ <dynamicField name="*_txt_da" type="text_da" indexed="true" stored="true"/>
	690	+ <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
	691	+ <analyzer>
	692	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	693	+ <filter class="solr.LowerCaseFilterFactory"/>
	694	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
	695	+ <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
	696	+ </analyzer>
	697	+ </fieldType>
	698	+
	699	+ <!-- German -->
	700	+ <dynamicField name="*_txt_de" type="text_de" indexed="true" stored="true"/>
	701	+ <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
	702	+ <analyzer>
	703	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	704	+ <filter class="solr.LowerCaseFilterFactory"/>
	705	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
	706	+ <filter class="solr.GermanNormalizationFilterFactory"/>
	707	+ <filter class="solr.GermanLightStemFilterFactory"/>
	708	+ <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
	709	+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
	710	+ </analyzer>
	711	+ </fieldType>
	712	+
	713	+ <!-- Greek -->
	714	+ <dynamicField name="*_txt_el" type="text_el" indexed="true" stored="true"/>
	715	+ <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
	716	+ <analyzer>
	717	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	718	+ <!-- greek specific lowercase for sigma -->
	719	+ <filter class="solr.GreekLowerCaseFilterFactory"/>
	720	+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
	721	+ <filter class="solr.GreekStemFilterFactory"/>
	722	+ </analyzer>
	723	+ </fieldType>
	724	+
	725	+ <!-- Spanish -->
	726	+ <dynamicField name="*_txt_es" type="text_es" indexed="true" stored="true"/>
	727	+ <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
	728	+ <analyzer>
	729	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	730	+ <filter class="solr.LowerCaseFilterFactory"/>
	731	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
	732	+ <filter class="solr.SpanishLightStemFilterFactory"/>
	733	+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
	734	+ </analyzer>
	735	+ </fieldType>
	736	+
	737	+ <!-- Basque -->
	738	+ <dynamicField name="*_txt_eu" type="text_eu" indexed="true" stored="true"/>
	739	+ <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
	740	+ <analyzer>
	741	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	742	+ <filter class="solr.LowerCaseFilterFactory"/>
	743	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
	744	+ <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
	745	+ </analyzer>
	746	+ </fieldType>
	747	+
	748	+ <!-- Persian -->
	749	+ <dynamicField name="*_txt_fa" type="text_fa" indexed="true" stored="true"/>
	750	+ <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
	751	+ <analyzer>
	752	+ <!-- for ZWNJ -->
	753	+ <charFilter class="solr.PersianCharFilterFactory"/>
	754	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	755	+ <filter class="solr.LowerCaseFilterFactory"/>
	756	+ <filter class="solr.ArabicNormalizationFilterFactory"/>
	757	+ <filter class="solr.PersianNormalizationFilterFactory"/>
	758	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
	759	+ </analyzer>
	760	+ </fieldType>
	761	+
	762	+ <!-- Finnish -->
	763	+ <dynamicField name="*_txt_fi" type="text_fi" indexed="true" stored="true"/>
	764	+ <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
	765	+ <analyzer>
	766	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	767	+ <filter class="solr.LowerCaseFilterFactory"/>
	768	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
	769	+ <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
	770	+ <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
	771	+ </analyzer>
	772	+ </fieldType>
	773	+
	774	+ <!-- French -->
	775	+ <dynamicField name="*_txt_fr" type="text_fr" indexed="true" stored="true"/>
	776	+ <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
	777	+ <analyzer>
	778	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	779	+ <!-- removes l', etc -->
	780	+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
	781	+ <filter class="solr.LowerCaseFilterFactory"/>
	782	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
	783	+ <filter class="solr.FrenchLightStemFilterFactory"/>
	784	+ <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
	785	+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
	786	+ </analyzer>
	787	+ </fieldType>
	788	+
	789	+ <!-- Irish -->
	790	+ <dynamicField name="*_txt_ga" type="text_ga" indexed="true" stored="true"/>
	791	+ <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
	792	+ <analyzer>
	793	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	794	+ <!-- removes d', etc -->
	795	+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
	796	+ <!-- removes n-, etc. position increments is intentionally false! -->
	797	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
	798	+ <filter class="solr.IrishLowerCaseFilterFactory"/>
	799	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/>
	800	+ <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
	801	+ </analyzer>
	802	+ </fieldType>
	803	+
	804	+ <!-- Galician -->
	805	+ <dynamicField name="*_txt_gl" type="text_gl" indexed="true" stored="true"/>
	806	+ <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
	807	+ <analyzer>
	808	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	809	+ <filter class="solr.LowerCaseFilterFactory"/>
	810	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
	811	+ <filter class="solr.GalicianStemFilterFactory"/>
	812	+ <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
	813	+ </analyzer>
	814	+ </fieldType>
	815	+
	816	+ <!-- Hindi -->
	817	+ <dynamicField name="*_txt_hi" type="text_hi" indexed="true" stored="true"/>
	818	+ <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
	819	+ <analyzer>
	820	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	821	+ <filter class="solr.LowerCaseFilterFactory"/>
	822	+ <!-- normalizes unicode representation -->
	823	+ <filter class="solr.IndicNormalizationFilterFactory"/>
	824	+ <!-- normalizes variation in spelling -->
	825	+ <filter class="solr.HindiNormalizationFilterFactory"/>
	826	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
	827	+ <filter class="solr.HindiStemFilterFactory"/>
	828	+ </analyzer>
	829	+ </fieldType>
	830	+
	831	+ <!-- Hungarian -->
	832	+ <dynamicField name="*_txt_hu" type="text_hu" indexed="true" stored="true"/>
	833	+ <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
	834	+ <analyzer>
	835	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	836	+ <filter class="solr.LowerCaseFilterFactory"/>
	837	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
	838	+ <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
	839	+ <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
	840	+ </analyzer>
	841	+ </fieldType>
	842	+
	843	+ <!-- Armenian -->
	844	+ <dynamicField name="*_txt_hy" type="text_hy" indexed="true" stored="true"/>
	845	+ <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
	846	+ <analyzer>
	847	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	848	+ <filter class="solr.LowerCaseFilterFactory"/>
	849	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
	850	+ <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
	851	+ </analyzer>
	852	+ </fieldType>
	853	+
	854	+ <!-- Indonesian -->
	855	+ <dynamicField name="*_txt_id" type="text_id" indexed="true" stored="true"/>
	856	+ <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
	857	+ <analyzer>
	858	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	859	+ <filter class="solr.LowerCaseFilterFactory"/>
	860	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
	861	+ <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
	862	+ <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
	863	+ </analyzer>
	864	+ </fieldType>
	865	+
	866	+ <!-- Italian -->
	867	+ <dynamicField name="*_txt_it" type="text_it" indexed="true" stored="true"/>
	868	+ <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
	869	+ <analyzer>
	870	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	871	+ <!-- removes l', etc -->
	872	+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
	873	+ <filter class="solr.LowerCaseFilterFactory"/>
	874	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
	875	+ <filter class="solr.ItalianLightStemFilterFactory"/>
	876	+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
	877	+ </analyzer>
	878	+ </fieldType>
	879	+
	880	+ <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
	881	+
	882	+ NOTE: If you want to optimize search for precision, use default operator AND in your query
	883	+ parser config with <solrQueryParser defaultOperator="AND"/> further down in this file. Use
	884	+ OR if you would like to optimize for recall (default).
	885	+ -->
	886	+ <dynamicField name="*_txt_ja" type="text_ja" indexed="true" stored="true"/>
	887	+ <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
	888	+ <analyzer>
	889	+ <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
	890	+
	891	+ Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
	892	+ is used to segment compounds into its parts and the compound itself is kept as synonym.
	893	+
	894	+ Valid values for attribute mode are:
	895	+ normal: regular segmentation
	896	+ search: segmentation useful for search with synonyms compounds (default)
	897	+ extended: same as search mode, but unigrams unknown words (experimental)
	898	+
	899	+ For some applications it might be good to use search mode for indexing and normal mode for
	900	+ queries to reduce recall and prevent parts of compounds from being matched and highlighted.
	901	+ Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
	902	+
	903	+ Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
	904	+ model with your own entries for segmentation, part-of-speech tags and readings without a need
	905	+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
	906	+
	907	+ User dictionary attributes are:
	908	+ userDictionary: user dictionary filename
	909	+ userDictionaryEncoding: user dictionary encoding (default is UTF-8)
	910	+
	911	+ See lang/userdict_ja.txt for a sample user dictionary file.
	912	+
	913	+ Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
	914	+
	915	+ See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
	916	+ -->
	917	+ <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
	918	+ <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
	919	+ <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
	920	+ <filter class="solr.JapaneseBaseFormFilterFactory"/>
	921	+ <!-- Removes tokens with certain part-of-speech tags -->
	922	+ <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
	923	+ <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
	924	+ <filter class="solr.CJKWidthFilterFactory"/>
	925	+ <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
	926	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
	927	+ <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
	928	+ <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
	929	+ <!-- Lower-cases romaji characters -->
	930	+ <filter class="solr.LowerCaseFilterFactory"/>
	931	+ </analyzer>
	932	+ </fieldType>
	933	+
	934	+ <!-- Latvian -->
	935	+ <dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/>
	936	+ <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
	937	+ <analyzer>
	938	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	939	+ <filter class="solr.LowerCaseFilterFactory"/>
	940	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
	941	+ <filter class="solr.LatvianStemFilterFactory"/>
	942	+ </analyzer>
	943	+ </fieldType>
	944	+
	945	+ <!-- Dutch -->
	946	+ <dynamicField name="*_txt_nl" type="text_nl" indexed="true" stored="true"/>
	947	+ <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
	948	+ <analyzer>
	949	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	950	+ <filter class="solr.LowerCaseFilterFactory"/>
	951	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
	952	+ <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
	953	+ <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
	954	+ </analyzer>
	955	+ </fieldType>
	956	+
	957	+ <!-- Norwegian -->
	958	+ <dynamicField name="*_txt_no" type="text_no" indexed="true" stored="true"/>
	959	+ <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
	960	+ <analyzer>
	961	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	962	+ <filter class="solr.LowerCaseFilterFactory"/>
	963	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
	964	+ <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
	965	+ <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
	966	+ <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
	967	+ </analyzer>
	968	+ </fieldType>
	969	+
	970	+ <!-- Portuguese -->
	971	+ <dynamicField name="*_txt_pt" type="text_pt" indexed="true" stored="true"/>
	972	+ <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
	973	+ <analyzer>
	974	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	975	+ <filter class="solr.LowerCaseFilterFactory"/>
	976	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
	977	+ <filter class="solr.PortugueseLightStemFilterFactory"/>
	978	+ <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
	979	+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
	980	+ <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
	981	+ </analyzer>
	982	+ </fieldType>
	983	+
	984	+ <!-- Romanian -->
	985	+ <dynamicField name="*_txt_ro" type="text_ro" indexed="true" stored="true"/>
	986	+ <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
	987	+ <analyzer>
	988	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	989	+ <filter class="solr.LowerCaseFilterFactory"/>
	990	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
	991	+ <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
	992	+ </analyzer>
	993	+ </fieldType>
	994	+
	995	+ <!-- Russian -->
	996	+ <dynamicField name="*_txt_ru" type="text_ru" indexed="true" stored="true"/>
	997	+ <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
	998	+ <analyzer>
	999	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	1000	+ <filter class="solr.LowerCaseFilterFactory"/>
	1001	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
	1002	+ <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
	1003	+ <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
	1004	+ </analyzer>
	1005	+ </fieldType>
	1006	+
	1007	+ <!-- Swedish -->
	1008	+ <dynamicField name="*_txt_sv" type="text_sv" indexed="true" stored="true"/>
	1009	+ <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
	1010	+ <analyzer>
	1011	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	1012	+ <filter class="solr.LowerCaseFilterFactory"/>
	1013	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
	1014	+ <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
	1015	+ <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
	1016	+ </analyzer>
	1017	+ </fieldType>
	1018	+
	1019	+ <!-- Thai -->
	1020	+ <dynamicField name="*_txt_th" type="text_th" indexed="true" stored="true"/>
	1021	+ <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
	1022	+ <analyzer>
	1023	+ <tokenizer class="solr.ThaiTokenizerFactory"/>
	1024	+ <filter class="solr.LowerCaseFilterFactory"/>
	1025	+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" />
	1026	+ </analyzer>
	1027	+ </fieldType>
	1028	+
	1029	+ <!-- Turkish -->
	1030	+ <dynamicField name="*_txt_tr" type="text_tr" indexed="true" stored="true"/>
	1031	+ <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
	1032	+ <analyzer>
	1033	+ <tokenizer class="solr.StandardTokenizerFactory"/>
	1034	+ <filter class="solr.TurkishLowerCaseFilterFactory"/>
	1035	+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
	1036	+ <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
	1037	+ </analyzer>
	1038	+ </fieldType>
	1039	+
	1040	+ <!-- Similarity is the scoring routine for each document vs. a query.
	1041	+ A custom Similarity or SimilarityFactory may be specified here, but
	1042	+ the default is fine for most applications.
	1043	+ For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
	1044	+ -->
	1045	+ <!--
	1046	+ <similarity class="com.example.solr.CustomSimilarityFactory">
	1047	+ <str name="paramkey">param value</str>
	1048	+ </similarity>
	1049	+ -->
	1050	+</schema>
...	...

conf/solr/solrconfig.xml 0 → 100644

View file @0237256

	1	+++ a/conf/solr/solrconfig.xml
	1	+<?xml version="1.0" encoding="UTF-8" ?>
	2	+<!--
	3	+ Licensed to the Apache Software Foundation (ASF) under one or more
	4	+ contributor license agreements. See the NOTICE file distributed with
	5	+ this work for additional information regarding copyright ownership.
	6	+ The ASF licenses this file to You under the Apache License, Version 2.0
	7	+ (the "License"); you may not use this file except in compliance with
	8	+ the License. You may obtain a copy of the License at
	9	+
	10	+ http://www.apache.org/licenses/LICENSE-2.0
	11	+
	12	+ Unless required by applicable law or agreed to in writing, software
	13	+ distributed under the License is distributed on an "AS IS" BASIS,
	14	+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	15	+ See the License for the specific language governing permissions and
	16	+ limitations under the License.
	17	+-->
	18	+
	19	+<!--
	20	+ For more details about configurations options that may appear in
	21	+ this file, see http://wiki.apache.org/solr/SolrConfigXml.
	22	+-->
	23	+<config>
	24	+ <!-- In all configuration below, a prefix of "solr." for class names
	25	+ is an alias that causes solr to search appropriate packages,
	26	+ including org.apache.solr.(search\|update\|request\|core\|analysis)
	27	+
	28	+ You may also specify a fully qualified Java classname if you
	29	+ have your own custom plugins.
	30	+ -->
	31	+
	32	+ <!-- Controls what version of Lucene various components of Solr
	33	+ adhere to. Generally, you want to use the latest version to
	34	+ get all bug fixes and improvements. It is highly recommended
	35	+ that you fully re-index after changing this setting as it can
	36	+ affect both how text is indexed and queried.
	37	+ -->
	38	+ <luceneMatchVersion>6.0.0</luceneMatchVersion>
	39	+
	40	+ <!-- <lib/> directives can be used to instruct Solr to load any Jars
	41	+ identified and use them to resolve any "plugins" specified in
	42	+ your solrconfig.xml or schema.xml (ie: Analyzers, Request
	43	+ Handlers, etc...).
	44	+
	45	+ All directories and paths are resolved relative to the
	46	+ instanceDir.
	47	+
	48	+ Please note that <lib/> directives are processed in the order
	49	+ that they appear in your solrconfig.xml file, and are "stacked"
	50	+ on top of each other when building a ClassLoader - so if you have
	51	+ plugin jars with dependencies on other jars, the "lower level"
	52	+ dependency jars should be loaded first.
	53	+
	54	+ If a "./lib" directory exists in your instanceDir, all files
	55	+ found in it are included as if you had used the following
	56	+ syntax...
	57	+
	58	+ <lib dir="./lib" />
	59	+ -->
	60	+
	61	+ <!-- A 'dir' option by itself adds any files found in the directory
	62	+ to the classpath, this is useful for including all jars in a
	63	+ directory.
	64	+
	65	+ When a 'regex' is specified in addition to a 'dir', only the
	66	+ files in that directory which completely match the regex
	67	+ (anchored on both ends) will be included.
	68	+
	69	+ If a 'dir' option (with or without a regex) is used and nothing
	70	+ is found that matches, a warning will be logged.
	71	+
	72	+ The examples below can be used to load some solr-contribs along
	73	+ with their external dependencies.
	74	+ -->
	75	+ <lib dir="${solr.install.dir:../../..}/contrib/extraction/lib" regex=".*\.jar" />
	76	+ <lib dir="${solr.install.dir:../../..}/dist/" regex="solr-cell-\d.*\.jar" />
	77	+
	78	+ <lib dir="${solr.install.dir:../../..}/contrib/clustering/lib/" regex=".*\.jar" />
	79	+ <lib dir="${solr.install.dir:../../..}/dist/" regex="solr-clustering-\d.*\.jar" />
	80	+
	81	+ <lib dir="${solr.install.dir:../../..}/contrib/langid/lib/" regex=".*\.jar" />
	82	+ <lib dir="${solr.install.dir:../../..}/dist/" regex="solr-langid-\d.*\.jar" />
	83	+
	84	+ <lib dir="${solr.install.dir:../../..}/contrib/velocity/lib" regex=".*\.jar" />
	85	+ <lib dir="${solr.install.dir:../../..}/dist/" regex="solr-velocity-\d.*\.jar" />
	86	+ <!-- an exact 'path' can be used instead of a 'dir' to specify a
	87	+ specific jar file. This will cause a serious error to be logged
	88	+ if it can't be loaded.
	89	+ -->
	90	+ <!--
	91	+ <lib path="../a-jar-that-does-not-exist.jar" />
	92	+ -->
	93	+
	94	+ <!-- Data Directory
	95	+
	96	+ Used to specify an alternate directory to hold all index data
	97	+ other than the default ./data under the Solr home. If
	98	+ replication is in use, this should match the replication
	99	+ configuration.
	100	+ -->
	101	+ <dataDir>${solr.data.dir:}</dataDir>
	102	+
	103	+
	104	+ <!-- The DirectoryFactory to use for indexes.
	105	+
	106	+ solr.StandardDirectoryFactory is filesystem
	107	+ based and tries to pick the best implementation for the current
	108	+ JVM and platform. solr.NRTCachingDirectoryFactory, the default,
	109	+ wraps solr.StandardDirectoryFactory and caches small files in memory
	110	+ for better NRT performance.
	111	+
	112	+ One can force a particular implementation via solr.MMapDirectoryFactory,
	113	+ solr.NIOFSDirectoryFactory, or solr.SimpleFSDirectoryFactory.
	114	+
	115	+ solr.RAMDirectoryFactory is memory based, not
	116	+ persistent, and doesn't work with replication.
	117	+ -->
	118	+ <directoryFactory name="DirectoryFactory"
	119	+ class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
	120	+
	121	+ <!-- The CodecFactory for defining the format of the inverted index.
	122	+ The default implementation is SchemaCodecFactory, which is the official Lucene
	123	+ index format, but hooks into the schema to provide per-field customization of
	124	+ the postings lists and per-document values in the fieldType element
	125	+ (postingsFormat/docValuesFormat). Note that most of the alternative implementations
	126	+ are experimental, so if you choose to customize the index format, it's a good
	127	+ idea to convert back to the official format e.g. via IndexWriter.addIndexes(IndexReader)
	128	+ before upgrading to a newer version to avoid unnecessary reindexing.
	129	+ -->
	130	+ <codecFactory class="solr.SchemaCodecFactory"/>
	131	+
	132	+ <!-- To disable dynamic schema REST APIs, use the following for <schemaFactory>:
	133	+
	134	+ <schemaFactory class="ClassicIndexSchemaFactory"/>
	135	+
	136	+ When ManagedIndexSchemaFactory is specified instead, Solr will load the schema from
	137	+ the resource named in 'managedSchemaResourceName', rather than from schema.xml.
	138	+ Note that the managed schema resource CANNOT be named schema.xml. If the managed
	139	+ schema does not exist, Solr will create it after reading schema.xml, then rename
	140	+ 'schema.xml' to 'schema.xml.bak'.
	141	+
	142	+ Do NOT hand edit the managed schema - external modifications will be ignored and
	143	+ overwritten as a result of schema modification REST API calls.
	144	+
	145	+ When ManagedIndexSchemaFactory is specified with mutable = true, schema
	146	+ modification REST API calls will be allowed; otherwise, error responses will be
	147	+ sent back for these requests.
	148	+ -->
	149	+ <!--
	150	+ <schemaFactory class="ManagedIndexSchemaFactory">
	151	+ <bool name="mutable">true</bool>
	152	+ <str name="managedSchemaResourceName">managed-schema</str>
	153	+ </schemaFactory>
	154	+ -->
	155	+ <schemaFactory class="ClassicIndexSchemaFactory"/>
	156	+
	157	+ <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	158	+ Index Config - These settings control low-level behavior of indexing
	159	+ Most example settings here show the default value, but are commented
	160	+ out, to more easily see where customizations have been made.
	161	+
	162	+ Note: This replaces <indexDefaults> and <mainIndex> from older versions
	163	+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
	164	+ <indexConfig>
	165	+ <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a
	166	+ LimitTokenCountFilterFactory in your fieldType definition. E.g.
	167	+ <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
	168	+ -->
	169	+ <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
	170	+ <!-- <writeLockTimeout>1000</writeLockTimeout> -->
	171	+
	172	+ <!-- The maximum number of simultaneous threads that may be
	173	+ indexing documents at once in IndexWriter; if more than this
	174	+ many threads arrive they will wait for others to finish.
	175	+ Default in Solr/Lucene is 8. -->
	176	+ <!-- <maxIndexingThreads>8</maxIndexingThreads> -->
	177	+
	178	+ <!-- Expert: Enabling compound file will use less files for the index,
	179	+ using fewer file descriptors on the expense of performance decrease.
	180	+ Default in Lucene is "true". Default in Solr is "false" (since 3.6) -->
	181	+ <!-- <useCompoundFile>false</useCompoundFile> -->
	182	+
	183	+ <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene
	184	+ indexing for buffering added documents and deletions before they are
	185	+ flushed to the Directory.
	186	+ maxBufferedDocs sets a limit on the number of documents buffered
	187	+ before flushing.
	188	+ If both ramBufferSizeMB and maxBufferedDocs is set, then
	189	+ Lucene will flush based on whichever limit is hit first. -->
	190	+ <!-- <ramBufferSizeMB>100</ramBufferSizeMB> -->
	191	+ <!-- <maxBufferedDocs>1000</maxBufferedDocs> -->
	192	+
	193	+ <!-- Expert: Merge Policy
	194	+ The Merge Policy in Lucene controls how merging of segments is done.
	195	+ The default since Solr/Lucene 3.3 is TieredMergePolicy.
	196	+ The default since Lucene 2.3 was the LogByteSizeMergePolicy,
	197	+ Even older versions of Lucene used LogDocMergePolicy.
	198	+ -->
	199	+ <!--
	200	+ <mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
	201	+ <int name="maxMergeAtOnce">10</int>
	202	+ <int name="segmentsPerTier">10</int>
	203	+ <double name="noCFSRatio">0.1</double>
	204	+ </mergePolicy>
	205	+ -->
	206	+
	207	+ <!-- Merge Factor
	208	+ The merge factor controls how many segments will get merged at a time.
	209	+ For TieredMergePolicy, mergeFactor is a convenience parameter which
	210	+ will set both MaxMergeAtOnce and SegmentsPerTier at once.
	211	+ For LogByteSizeMergePolicy, mergeFactor decides how many new segments
	212	+ will be allowed before they are merged into one.
	213	+ Default is 10 for both merge policies.
	214	+ -->
	215	+ <!--
	216	+ <mergeFactor>10</mergeFactor>
	217	+ -->
	218	+
	219	+ <!-- Expert: Merge Scheduler
	220	+ The Merge Scheduler in Lucene controls how merges are
	221	+ performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
	222	+ can perform merges in the background using separate threads.
	223	+ The SerialMergeScheduler (Lucene 2.2 default) does not.
	224	+ -->
	225	+ <!--
	226	+ <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/>
	227	+ -->
	228	+
	229	+ <!-- LockFactory
	230	+
	231	+ This option specifies which Lucene LockFactory implementation
	232	+ to use.
	233	+
	234	+ single = SingleInstanceLockFactory - suggested for a
	235	+ read-only index or when there is no possibility of
	236	+ another process trying to modify the index.
	237	+ native = NativeFSLockFactory - uses OS native file locking.
	238	+ Do not use when multiple solr webapps in the same
	239	+ JVM are attempting to share a single index.
	240	+ simple = SimpleFSLockFactory - uses a plain file for locking
	241	+
	242	+ Defaults: 'native' is default for Solr3.6 and later, otherwise
	243	+ 'simple' is the default
	244	+
	245	+ More details on the nuances of each LockFactory...
	246	+ http://wiki.apache.org/lucene-java/AvailableLockFactories
	247	+ -->
	248	+ <lockType>${solr.lock.type:native}</lockType>
	249	+
	250	+ <!-- Unlock On Startup
	251	+
	252	+ If true, unlock any held write or commit locks on startup.
	253	+ This defeats the locking mechanism that allows multiple
	254	+ processes to safely access a lucene index, and should be used
	255	+ with care. Default is "false".
	256	+
	257	+ This is not needed if lock type is 'single'
	258	+ -->
	259	+ <!--
	260	+ <unlockOnStartup>false</unlockOnStartup>
	261	+ -->
	262	+
	263	+ <!-- Commit Deletion Policy
	264	+ Custom deletion policies can be specified here. The class must
	265	+ implement org.apache.lucene.index.IndexDeletionPolicy.
	266	+
	267	+ The default Solr IndexDeletionPolicy implementation supports
	268	+ deleting index commit points on number of commits, age of
	269	+ commit point and optimized status.
	270	+
	271	+ The latest commit point should always be preserved regardless
	272	+ of the criteria.
	273	+ -->
	274	+ <!--
	275	+ <deletionPolicy class="solr.SolrDeletionPolicy">
	276	+ -->
	277	+ <!-- The number of commit points to be kept -->
	278	+ <!-- <str name="maxCommitsToKeep">1</str> -->
	279	+ <!-- The number of optimized commit points to be kept -->
	280	+ <!-- <str name="maxOptimizedCommitsToKeep">0</str> -->
	281	+ <!--
	282	+ Delete all commit points once they have reached the given age.
	283	+ Supports DateMathParser syntax e.g.
	284	+ -->
	285	+ <!--
	286	+ <str name="maxCommitAge">30MINUTES</str>
	287	+ <str name="maxCommitAge">1DAY</str>
	288	+ -->
	289	+ <!--
	290	+ </deletionPolicy>
	291	+ -->
	292	+
	293	+ <!-- Lucene Infostream
	294	+
	295	+ To aid in advanced debugging, Lucene provides an "InfoStream"
	296	+ of detailed information when indexing.
	297	+
	298	+ Setting The value to true will instruct the underlying Lucene
	299	+ IndexWriter to write its debugging info the specified file
	300	+ -->
	301	+ <!-- <infoStream file="INFOSTREAM.txt">false</infoStream> -->
	302	+ </indexConfig>
	303	+
	304	+
	305	+ <!-- JMX
	306	+
	307	+ This example enables JMX if and only if an existing MBeanServer
	308	+ is found, use this if you want to configure JMX through JVM
	309	+ parameters. Remove this to disable exposing Solr configuration
	310	+ and statistics to JMX.
	311	+
	312	+ For more details see http://wiki.apache.org/solr/SolrJmx
	313	+ -->
	314	+ <jmx />
	315	+ <!-- If you want to connect to a particular server, specify the
	316	+ agentId
	317	+ -->
	318	+ <!-- <jmx agentId="myAgent" /> -->
	319	+ <!-- If you want to start a new MBeanServer, specify the serviceUrl -->
	320	+ <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/>
	321	+ -->
	322	+
	323	+ <!-- The default high-performance update handler -->
	324	+ <updateHandler class="solr.DirectUpdateHandler2">
	325	+
	326	+ <!-- Enables a transaction log, used for real-time get, durability, and
	327	+ and solr cloud replica recovery. The log can grow as big as
	328	+ uncommitted changes to the index, so use of a hard autoCommit
	329	+ is recommended (see below).
	330	+ "dir" - the target directory for transaction logs, defaults to the
	331	+ solr data directory.
	332	+ "numVersionBuckets" - sets the number of buckets used to keep
	333	+ track of max version values when checking for re-ordered
	334	+ updates; increase this value to reduce the cost of
	335	+ synchronizing access to version buckets during high-volume
	336	+ indexing, this requires 8 bytes (long) * numVersionBuckets
	337	+ of heap space per Solr core.
	338	+ -->
	339	+ <updateLog>
	340	+ <str name="dir">${solr.ulog.dir:}</str>
	341	+ <int name="numVersionBuckets">${solr.ulog.numVersionBuckets:65536}</int>
	342	+ </updateLog>
	343	+
	344	+ <!-- AutoCommit
	345	+
	346	+ Perform a hard commit automatically under certain conditions.
	347	+ Instead of enabling autoCommit, consider using "commitWithin"
	348	+ when adding documents.
	349	+
	350	+ http://wiki.apache.org/solr/UpdateXmlMessages
	351	+
	352	+ maxDocs - Maximum number of documents to add since the last
	353	+ commit before automatically triggering a new commit.
	354	+
	355	+ maxTime - Maximum amount of time in ms that is allowed to pass
	356	+ since a document was added before automatically
	357	+ triggering a new commit.
	358	+ openSearcher - if false, the commit causes recent index changes
	359	+ to be flushed to stable storage, but does not cause a new
	360	+ searcher to be opened to make those changes visible.
	361	+
	362	+ If the updateLog is enabled, then it's highly recommended to
	363	+ have some sort of hard autoCommit to limit the log size.
	364	+ -->
	365	+ <autoCommit>
	366	+ <maxTime>${solr.autoCommit.maxTime:15000}</maxTime>
	367	+ <openSearcher>false</openSearcher>
	368	+ </autoCommit>
	369	+
	370	+ <!-- softAutoCommit is like autoCommit except it causes a
	371	+ 'soft' commit which only ensures that changes are visible
	372	+ but does not ensure that data is synced to disk. This is
	373	+ faster and more near-realtime friendly than a hard commit.
	374	+ -->
	375	+
	376	+ <autoSoftCommit>
	377	+ <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
	378	+ </autoSoftCommit>
	379	+
	380	+ <!-- Update Related Event Listeners
	381	+
	382	+ Various IndexWriter related events can trigger Listeners to
	383	+ take actions.
	384	+
	385	+ postCommit - fired after every commit or optimize command
	386	+ postOptimize - fired after every optimize command
	387	+ -->
	388	+ <!-- The RunExecutableListener executes an external command from a
	389	+ hook such as postCommit or postOptimize.
	390	+
	391	+ exe - the name of the executable to run
	392	+ dir - dir to use as the current working directory. (default=".")
	393	+ wait - the calling thread waits until the executable returns.
	394	+ (default="true")
	395	+ args - the arguments to pass to the program. (default is none)
	396	+ env - environment variables to set. (default is none)
	397	+ -->
	398	+ <!-- This example shows how RunExecutableListener could be used
	399	+ with the script based replication...
	400	+ http://wiki.apache.org/solr/CollectionDistribution
	401	+ -->
	402	+ <!--
	403	+ <listener event="postCommit" class="solr.RunExecutableListener">
	404	+ <str name="exe">solr/bin/snapshooter</str>
	405	+ <str name="dir">.</str>
	406	+ <bool name="wait">true</bool>
	407	+ <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
	408	+ <arr name="env"> <str>MYVAR=val1</str> </arr>
	409	+ </listener>
	410	+ -->
	411	+
	412	+ </updateHandler>
	413	+
	414	+ <!-- IndexReaderFactory
	415	+
	416	+ Use the following format to specify a custom IndexReaderFactory,
	417	+ which allows for alternate IndexReader implementations.
	418	+
	419	+ Experimental Feature
	420	+
	421	+ Please note - Using a custom IndexReaderFactory may prevent
	422	+ certain other features from working. The API to
	423	+ IndexReaderFactory may change without warning or may even be
	424	+ removed from future releases if the problems cannot be
	425	+ resolved.
	426	+
	427	+
	428	+ Features that may not work with custom IndexReaderFactory
	429	+
	430	+ The ReplicationHandler assumes a disk-resident index. Using a
	431	+ custom IndexReader implementation may cause incompatibility
	432	+ with ReplicationHandler and may cause replication to not work
	433	+ correctly. See SOLR-1366 for details.
	434	+
	435	+ -->
	436	+ <!--
	437	+ <indexReaderFactory name="IndexReaderFactory" class="package.class">
	438	+ <str name="someArg">Some Value</str>
	439	+ </indexReaderFactory >
	440	+ -->
	441	+
	442	+ <!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	443	+ Query section - these settings control query time things like caches
	444	+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
	445	+ <query>
	446	+ <!-- Max Boolean Clauses
	447	+
	448	+ Maximum number of clauses in each BooleanQuery, an exception
	449	+ is thrown if exceeded.
	450	+
	451	+ WARNING
	452	+
	453	+ This option actually modifies a global Lucene property that
	454	+ will affect all SolrCores. If multiple solrconfig.xml files
	455	+ disagree on this property, the value at any given moment will
	456	+ be based on the last SolrCore to be initialized.
	457	+
	458	+ -->
	459	+ <maxBooleanClauses>1024</maxBooleanClauses>
	460	+
	461	+
	462	+ <!-- Solr Internal Query Caches
	463	+
	464	+ There are two implementations of cache available for Solr,
	465	+ LRUCache, based on a synchronized LinkedHashMap, and
	466	+ FastLRUCache, based on a ConcurrentHashMap.
	467	+
	468	+ FastLRUCache has faster gets and slower puts in single
	469	+ threaded operation and thus is generally faster than LRUCache
	470	+ when the hit ratio of the cache is high (> 75%), and may be
	471	+ faster under other scenarios on multi-cpu systems.
	472	+ -->
	473	+
	474	+ <!-- Filter Cache
	475	+
	476	+ Cache used by SolrIndexSearcher for filters (DocSets),
	477	+ unordered sets of all documents that match a query. When a
	478	+ new searcher is opened, its caches may be prepopulated or
	479	+ "autowarmed" using data from caches in the old searcher.
	480	+ autowarmCount is the number of items to prepopulate. For
	481	+ LRUCache, the autowarmed items will be the most recently
	482	+ accessed items.
	483	+
	484	+ Parameters:
	485	+ class - the SolrCache implementation LRUCache or
	486	+ (LRUCache or FastLRUCache)
	487	+ size - the maximum number of entries in the cache
	488	+ initialSize - the initial capacity (number of entries) of
	489	+ the cache. (see java.util.HashMap)
	490	+ autowarmCount - the number of entries to prepopulate from
	491	+ and old cache.
	492	+ -->
	493	+ <filterCache class="solr.FastLRUCache"
	494	+ size="512"
	495	+ initialSize="512"
	496	+ autowarmCount="0"/>
	497	+
	498	+ <!-- Query Result Cache
	499	+
	500	+ Caches results of searches - ordered lists of document ids
	501	+ (DocList) based on a query, a sort, and the range of documents requested.
	502	+ Additional supported parameter by LRUCache:
	503	+ maxRamMB - the maximum amount of RAM (in MB) that this cache is allowed
	504	+ to occupy
	505	+ -->
	506	+ <queryResultCache class="solr.LRUCache"
	507	+ size="512"
	508	+ initialSize="512"
	509	+ autowarmCount="0"/>
	510	+
	511	+ <!-- Document Cache
	512	+
	513	+ Caches Lucene Document objects (the stored fields for each
	514	+ document). Since Lucene internal document ids are transient,
	515	+ this cache will not be autowarmed.
	516	+ -->
	517	+ <documentCache class="solr.LRUCache"
	518	+ size="512"
	519	+ initialSize="512"
	520	+ autowarmCount="0"/>
	521	+
	522	+ <!-- Field Value Cache
	523	+
	524	+ Cache used to hold field values that are quickly accessible
	525	+ by document id. The fieldValueCache is created by default
	526	+ even if not configured here.
	527	+ -->
	528	+ <!--
	529	+ <fieldValueCache class="solr.FastLRUCache"
	530	+ size="512"
	531	+ autowarmCount="128"
	532	+ showItems="32" />
	533	+ -->
	534	+
	535	+ <!-- Custom Cache
	536	+
	537	+ Example of a generic cache. These caches may be accessed by
	538	+ name through SolrIndexSearcher.getCache(),cacheLookup(), and
	539	+ cacheInsert(). The purpose is to enable easy caching of
	540	+ user/application level data. The regenerator argument should
	541	+ be specified as an implementation of solr.CacheRegenerator
	542	+ if autowarming is desired.
	543	+ -->
	544	+ <!--
	545	+ <cache name="myUserCache"
	546	+ class="solr.LRUCache"
	547	+ size="4096"
	548	+ initialSize="1024"
	549	+ autowarmCount="1024"
	550	+ regenerator="com.mycompany.MyRegenerator"
	551	+ />
	552	+ -->
	553	+
	554	+
	555	+ <!-- Lazy Field Loading
	556	+
	557	+ If true, stored fields that are not requested will be loaded
	558	+ lazily. This can result in a significant speed improvement
	559	+ if the usual case is to not load all stored fields,
	560	+ especially if the skipped fields are large compressed text
	561	+ fields.
	562	+ -->
	563	+ <enableLazyFieldLoading>true</enableLazyFieldLoading>
	564	+
	565	+ <!-- Use Filter For Sorted Query
	566	+
	567	+ A possible optimization that attempts to use a filter to
	568	+ satisfy a search. If the requested sort does not include
	569	+ score, then the filterCache will be checked for a filter
	570	+ matching the query. If found, the filter will be used as the
	571	+ source of document ids, and then the sort will be applied to
	572	+ that.
	573	+
	574	+ For most situations, this will not be useful unless you
	575	+ frequently get the same search repeatedly with different sort
	576	+ options, and none of them ever use "score"
	577	+ -->
	578	+ <!--
	579	+ <useFilterForSortedQuery>true</useFilterForSortedQuery>
	580	+ -->
	581	+
	582	+ <!-- Result Window Size
	583	+
	584	+ An optimization for use with the queryResultCache. When a search
	585	+ is requested, a superset of the requested number of document ids
	586	+ are collected. For example, if a search for a particular query
	587	+ requests matching documents 10 through 19, and queryWindowSize is 50,
	588	+ then documents 0 through 49 will be collected and cached. Any further
	589	+ requests in that range can be satisfied via the cache.
	590	+ -->
	591	+ <queryResultWindowSize>20</queryResultWindowSize>
	592	+
	593	+ <!-- Maximum number of documents to cache for any entry in the
	594	+ queryResultCache.
	595	+ -->
	596	+ <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
	597	+
	598	+ <!-- Query Related Event Listeners
	599	+
	600	+ Various IndexSearcher related events can trigger Listeners to
	601	+ take actions.
	602	+
	603	+ newSearcher - fired whenever a new searcher is being prepared
	604	+ and there is a current searcher handling requests (aka
	605	+ registered). It can be used to prime certain caches to
	606	+ prevent long request times for certain requests.
	607	+
	608	+ firstSearcher - fired whenever a new searcher is being
	609	+ prepared but there is no current registered searcher to handle
	610	+ requests or to gain autowarming data from.
	611	+
	612	+
	613	+ -->
	614	+ <!-- QuerySenderListener takes an array of NamedList and executes a
	615	+ local query request for each NamedList in sequence.
	616	+ -->
	617	+ <listener event="newSearcher" class="solr.QuerySenderListener">
	618	+ <arr name="queries">
	619	+ <!--
	620	+ <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
	621	+ <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
	622	+ -->
	623	+ </arr>
	624	+ </listener>
	625	+ <listener event="firstSearcher" class="solr.QuerySenderListener">
	626	+ <arr name="queries">
	627	+ <!--
	628	+ <lst>
	629	+ <str name="q">static firstSearcher warming in solrconfig.xml</str>
	630	+ </lst>
	631	+ -->
	632	+ </arr>
	633	+ </listener>
	634	+
	635	+ <!-- Use Cold Searcher
	636	+
	637	+ If a search request comes in and there is no current
	638	+ registered searcher, then immediately register the still
	639	+ warming searcher and use it. If "false" then all requests
	640	+ will block until the first searcher is done warming.
	641	+ -->
	642	+ <useColdSearcher>false</useColdSearcher>
	643	+
	644	+ <!-- Max Warming Searchers
	645	+
	646	+ Maximum number of searchers that may be warming in the
	647	+ background concurrently. An error is returned if this limit
	648	+ is exceeded.
	649	+
	650	+ Recommend values of 1-2 for read-only slaves, higher for
	651	+ masters w/o cache warming.
	652	+ -->
	653	+ <maxWarmingSearchers>2</maxWarmingSearchers>
	654	+
	655	+ </query>
	656	+
	657	+
	658	+ <!-- Request Dispatcher
	659	+
	660	+ This section contains instructions for how the SolrDispatchFilter
	661	+ should behave when processing requests for this SolrCore.
	662	+
	663	+ handleSelect is a legacy option that affects the behavior of requests
	664	+ such as /select?qt=XXX
	665	+
	666	+ handleSelect="true" will cause the SolrDispatchFilter to process
	667	+ the request and dispatch the query to a handler specified by the
	668	+ "qt" param, assuming "/select" isn't already registered.
	669	+
	670	+ handleSelect="false" will cause the SolrDispatchFilter to
	671	+ ignore "/select" requests, resulting in a 404 unless a handler
	672	+ is explicitly registered with the name "/select"
	673	+
	674	+ handleSelect="true" is not recommended for new users, but is the default
	675	+ for backwards compatibility
	676	+ -->
	677	+ <requestDispatcher handleSelect="false" >
	678	+ <!-- Request Parsing
	679	+
	680	+ These settings indicate how Solr Requests may be parsed, and
	681	+ what restrictions may be placed on the ContentStreams from
	682	+ those requests
	683	+
	684	+ enableRemoteStreaming - enables use of the stream.file
	685	+ and stream.url parameters for specifying remote streams.
	686	+
	687	+ multipartUploadLimitInKB - specifies the max size (in KiB) of
	688	+ Multipart File Uploads that Solr will allow in a Request.
	689	+
	690	+ formdataUploadLimitInKB - specifies the max size (in KiB) of
	691	+ form data (application/x-www-form-urlencoded) sent via
	692	+ POST. You can use POST to pass request parameters not
	693	+ fitting into the URL.
	694	+
	695	+ addHttpRequestToContext - if set to true, it will instruct
	696	+ the requestParsers to include the original HttpServletRequest
	697	+ object in the context map of the SolrQueryRequest under the
	698	+ key "httpRequest". It will not be used by any of the existing
	699	+ Solr components, but may be useful when developing custom
	700	+ plugins.
	701	+
	702	+ * WARNING *
	703	+ The settings below authorize Solr to fetch remote files, You
	704	+ should make sure your system has some authentication before
	705	+ using enableRemoteStreaming="true"
	706	+
	707	+ -->
	708	+ <requestParsers enableRemoteStreaming="true"
	709	+ multipartUploadLimitInKB="2048000"
	710	+ formdataUploadLimitInKB="2048"
	711	+ addHttpRequestToContext="false"/>
	712	+
	713	+ <!-- HTTP Caching
	714	+
	715	+ Set HTTP caching related parameters (for proxy caches and clients).
	716	+
	717	+ The options below instruct Solr not to output any HTTP Caching
	718	+ related headers
	719	+ -->
	720	+ <httpCaching never304="true" />
	721	+ <!-- If you include a <cacheControl> directive, it will be used to
	722	+ generate a Cache-Control header (as well as an Expires header
	723	+ if the value contains "max-age=")
	724	+
	725	+ By default, no Cache-Control header is generated.
	726	+
	727	+ You can use the <cacheControl> option even if you have set
	728	+ never304="true"
	729	+ -->
	730	+ <!--
	731	+ <httpCaching never304="true" >
	732	+ <cacheControl>max-age=30, public</cacheControl>
	733	+ </httpCaching>
	734	+ -->
	735	+ <!-- To enable Solr to respond with automatically generated HTTP
	736	+ Caching headers, and to response to Cache Validation requests
	737	+ correctly, set the value of never304="false"
	738	+
	739	+ This will cause Solr to generate Last-Modified and ETag
	740	+ headers based on the properties of the Index.
	741	+
	742	+ The following options can also be specified to affect the
	743	+ values of these headers...
	744	+
	745	+ lastModFrom - the default value is "openTime" which means the
	746	+ Last-Modified value (and validation against If-Modified-Since
	747	+ requests) will all be relative to when the current Searcher
	748	+ was opened. You can change it to lastModFrom="dirLastMod" if
	749	+ you want the value to exactly correspond to when the physical
	750	+ index was last modified.
	751	+
	752	+ etagSeed="..." is an option you can change to force the ETag
	753	+ header (and validation against If-None-Match requests) to be
	754	+ different even if the index has not changed (ie: when making
	755	+ significant changes to your config file)
	756	+
	757	+ (lastModifiedFrom and etagSeed are both ignored if you use
	758	+ the never304="true" option)
	759	+ -->
	760	+ <!--
	761	+ <httpCaching lastModifiedFrom="openTime"
	762	+ etagSeed="Solr">
	763	+ <cacheControl>max-age=30, public</cacheControl>
	764	+ </httpCaching>
	765	+ -->
	766	+ </requestDispatcher>
	767	+
	768	+ <!-- Request Handlers
	769	+
	770	+ http://wiki.apache.org/solr/SolrRequestHandler
	771	+
	772	+ Incoming queries will be dispatched to a specific handler by name
	773	+ based on the path specified in the request.
	774	+
	775	+ Legacy behavior: If the request path uses "/select" but no Request
	776	+ Handler has that name, and if handleSelect="true" has been specified in
	777	+ the requestDispatcher, then the Request Handler is dispatched based on
	778	+ the qt parameter. Handlers without a leading '/' are accessed this way
	779	+ like so: http://host/app/[core/]select?qt=name If no qt is
	780	+ given, then the requestHandler that declares default="true" will be
	781	+ used or the one named "standard".
	782	+
	783	+ If a Request Handler is declared with startup="lazy", then it will
	784	+ not be initialized until the first request that uses it.
	785	+
	786	+ -->
	787	+ <!-- SearchHandler
	788	+
	789	+ http://wiki.apache.org/solr/SearchHandler
	790	+
	791	+ For processing Search Queries, the primary Request Handler
	792	+ provided with Solr is "SearchHandler" It delegates to a sequent
	793	+ of SearchComponents (see below) and supports distributed
	794	+ queries across multiple shards
	795	+ -->
	796	+ <requestHandler name="/select" class="solr.SearchHandler">
	797	+ <!-- default values for query parameters can be specified, these
	798	+ will be overridden by parameters in the request
	799	+ -->
	800	+ <arr name="last-components">
	801	+ <str>mtas</str>
	802	+ </arr>
	803	+ <lst name="defaults">
	804	+ <str name="echoParams">explicit</str>
	805	+ <int name="rows">10</int>
	806	+ <!-- <str name="df">text</str> -->
	807	+ </lst>
	808	+ <!-- In addition to defaults, "appends" params can be specified
	809	+ to identify values which should be appended to the list of
	810	+ multi-val params from the query (or the existing "defaults").
	811	+ -->
	812	+ <!-- In this example, the param "fq=instock:true" would be appended to
	813	+ any query time fq params the user may specify, as a mechanism for
	814	+ partitioning the index, independent of any user selected filtering
	815	+ that may also be desired (perhaps as a result of faceted searching).
	816	+
	817	+ NOTE: there is absolutely nothing a client can do to prevent these
	818	+ "appends" values from being used, so don't use this mechanism
	819	+ unless you are sure you always want it.
	820	+ -->
	821	+ <!--
	822	+ <lst name="appends">
	823	+ <str name="fq">inStock:true</str>
	824	+ </lst>
	825	+ -->
	826	+ <!-- "invariants" are a way of letting the Solr maintainer lock down
	827	+ the options available to Solr clients. Any params values
	828	+ specified here are used regardless of what values may be specified
	829	+ in either the query, the "defaults", or the "appends" params.
	830	+
	831	+ In this example, the facet.field and facet.query params would
	832	+ be fixed, limiting the facets clients can use. Faceting is
	833	+ not turned on by default - but if the client does specify
	834	+ facet=true in the request, these are the only facets they
	835	+ will be able to see counts for; regardless of what other
	836	+ facet.field or facet.query params they may specify.
	837	+
	838	+ NOTE: there is absolutely nothing a client can do to prevent these
	839	+ "invariants" values from being used, so don't use this mechanism
	840	+ unless you are sure you always want it.
	841	+ -->
	842	+ <!--
	843	+ <lst name="invariants">
	844	+ <str name="facet.field">cat</str>
	845	+ <str name="facet.field">manu_exact</str>
	846	+ <str name="facet.query">price:[* TO 500]</str>
	847	+ <str name="facet.query">price:[500 TO *]</str>
	848	+ </lst>
	849	+ -->
	850	+ <!-- If the default list of SearchComponents is not desired, that
	851	+ list can either be overridden completely, or components can be
	852	+ prepended or appended to the default list. (see below)
	853	+ -->
	854	+ <!--
	855	+ <arr name="components">
	856	+ <str>nameOfCustomComponent1</str>
	857	+ <str>nameOfCustomComponent2</str>
	858	+ </arr>
	859	+ -->
	860	+ </requestHandler>
	861	+
	862	+ <!-- A request handler that returns indented JSON by default -->
	863	+ <requestHandler name="/query" class="solr.SearchHandler">
	864	+ <lst name="defaults">
	865	+ <str name="echoParams">explicit</str>
	866	+ <str name="wt">json</str>
	867	+ <str name="indent">true</str>
	868	+ </lst>
	869	+ </requestHandler>
	870	+
	871	+
	872	+ <requestHandler name="/browse" class="solr.SearchHandler" useParams="query,facets,velocity,browse">
	873	+ <lst name="defaults">
	874	+ <str name="echoParams">explicit</str>
	875	+ </lst>
	876	+ </requestHandler>
	877	+
	878	+ <initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse">
	879	+ <lst name="defaults">
	880	+ <str name="df">_text_</str>
	881	+ </lst>
	882	+ </initParams>
	883	+
	884	+ <initParams path="/update/**">
	885	+ <lst name="defaults">
	886	+ <!--
	887	+ <str name="update.chain">add-unknown-fields-to-the-schema</str>
	888	+ -->
	889	+ </lst>
	890	+ </initParams>
	891	+
	892	+ <!-- Solr Cell Update Request Handler
	893	+
	894	+ http://wiki.apache.org/solr/ExtractingRequestHandler
	895	+
	896	+ -->
	897	+ <requestHandler name="/update/extract"
	898	+ startup="lazy"
	899	+ class="solr.extraction.ExtractingRequestHandler" >
	900	+ <lst name="defaults">
	901	+ <str name="lowernames">true</str>
	902	+ <str name="fmap.meta">ignored_</str>
	903	+ <str name="fmap.content">_text_</str>
	904	+ </lst>
	905	+ </requestHandler>
	906	+
	907	+ <!--
	908	+ The export request handler is used to export full sorted result sets.
	909	+ Do not change these defaults.
	910	+ -->
	911	+
	912	+ <requestHandler name="/export" class="solr.SearchHandler">
	913	+ <lst name="invariants">
	914	+ <str name="rq">{!xport}</str>
	915	+ <str name="wt">xsort</str>
	916	+ <str name="distrib">false</str>
	917	+ </lst>
	918	+
	919	+ <arr name="components">
	920	+ <str>query</str>
	921	+ </arr>
	922	+ </requestHandler>
	923	+
	924	+
	925	+ <!--
	926	+ Distributed Stream processing.
	927	+ -->
	928	+
	929	+ <requestHandler name="/stream" class="solr.StreamHandler">
	930	+ <lst name="invariants">
	931	+ <str name="wt">json</str>
	932	+ <str name="distrib">false</str>
	933	+ </lst>
	934	+ </requestHandler>
	935	+
	936	+
	937	+
	938	+ <!-- Field Analysis Request Handler
	939	+
	940	+ RequestHandler that provides much the same functionality as
	941	+ analysis.jsp. Provides the ability to specify multiple field
	942	+ types and field names in the same request and outputs
	943	+ index-time and query-time analysis for each of them.
	944	+
	945	+ Request parameters are:
	946	+ analysis.fieldname - field name whose analyzers are to be used
	947	+
	948	+ analysis.fieldtype - field type whose analyzers are to be used
	949	+ analysis.fieldvalue - text for index-time analysis
	950	+ q (or analysis.q) - text for query time analysis
	951	+ analysis.showmatch (true\|false) - When set to true and when
	952	+ query analysis is performed, the produced tokens of the
	953	+ field value analysis will be marked as "matched" for every
	954	+ token that is produces by the query analysis
	955	+ -->
	956	+ <requestHandler name="/analysis/field"
	957	+ startup="lazy"
	958	+ class="solr.FieldAnalysisRequestHandler" />
	959	+
	960	+
	961	+ <!-- Document Analysis Handler
	962	+
	963	+ http://wiki.apache.org/solr/AnalysisRequestHandler
	964	+
	965	+ An analysis handler that provides a breakdown of the analysis
	966	+ process of provided documents. This handler expects a (single)
	967	+ content stream with the following format:
	968	+
	969	+ <docs>
	970	+ <doc>
	971	+ <field name="id">1</field>
	972	+ <field name="name">The Name</field>
	973	+ <field name="text">The Text Value</field>
	974	+ </doc>
	975	+ <doc>...</doc>
	976	+ <doc>...</doc>
	977	+ ...
	978	+ </docs>
	979	+
	980	+ Note: Each document must contain a field which serves as the
	981	+ unique key. This key is used in the returned response to associate
	982	+ an analysis breakdown to the analyzed document.
	983	+
	984	+ Like the FieldAnalysisRequestHandler, this handler also supports
	985	+ query analysis by sending either an "analysis.query" or "q"
	986	+ request parameter that holds the query text to be analyzed. It
	987	+ also supports the "analysis.showmatch" parameter which when set to
	988	+ true, all field tokens that match the query tokens will be marked
	989	+ as a "match".
	990	+ -->
	991	+ <requestHandler name="/analysis/document"
	992	+ class="solr.DocumentAnalysisRequestHandler"
	993	+ startup="lazy" />
	994	+
	995	+ <!-- Echo the request contents back to the client -->
	996	+ <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
	997	+ <lst name="defaults">
	998	+ <str name="echoParams">explicit</str>
	999	+ <str name="echoHandler">true</str>
	1000	+ </lst>
	1001	+ </requestHandler>
	1002	+
	1003	+ <!-- Search Components
	1004	+
	1005	+ Search components are registered to SolrCore and used by
	1006	+ instances of SearchHandler (which can access them by name)
	1007	+
	1008	+ By default, the following components are available:
	1009	+
	1010	+ <searchComponent name="query" class="solr.QueryComponent" />
	1011	+ <searchComponent name="facet" class="solr.FacetComponent" />
	1012	+ <searchComponent name="mlt" class="solr.MoreLikeThisComponent" />
	1013	+ <searchComponent name="highlight" class="solr.HighlightComponent" />
	1014	+ <searchComponent name="stats" class="solr.StatsComponent" />
	1015	+ <searchComponent name="debug" class="solr.DebugComponent" />
	1016	+
	1017	+ Default configuration in a requestHandler would look like:
	1018	+
	1019	+ <arr name="components">
	1020	+ <str>query</str>
	1021	+ <str>facet</str>
	1022	+ <str>mlt</str>
	1023	+ <str>highlight</str>
	1024	+ <str>stats</str>
	1025	+ <str>debug</str>
	1026	+ </arr>
	1027	+
	1028	+ If you register a searchComponent to one of the standard names,
	1029	+ that will be used instead of the default.
	1030	+
	1031	+ To insert components before or after the 'standard' components, use:
	1032	+
	1033	+ <arr name="first-components">
	1034	+ <str>myFirstComponentName</str>
	1035	+ </arr>
	1036	+
	1037	+ <arr name="last-components">
	1038	+ <str>myLastComponentName</str>
	1039	+ </arr>
	1040	+
	1041	+ NOTE: The component registered with the name "debug" will
	1042	+ always be executed after the "last-components"
	1043	+
	1044	+ -->
	1045	+
	1046	+ <!-- Spell Check
	1047	+
	1048	+ The spell check component can return a list of alternative spelling
	1049	+ suggestions.
	1050	+
	1051	+ http://wiki.apache.org/solr/SpellCheckComponent
	1052	+ -->
	1053	+ <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
	1054	+
	1055	+ <str name="queryAnalyzerFieldType">text_general</str>
	1056	+
	1057	+ <!-- Multiple "Spell Checkers" can be declared and used by this
	1058	+ component
	1059	+ -->
	1060	+
	1061	+ <!-- a spellchecker built from a field of the main index -->
	1062	+ <lst name="spellchecker">
	1063	+ <str name="name">default</str>
	1064	+ <str name="field">text</str>
	1065	+ <str name="classname">solr.DirectSolrSpellChecker</str>
	1066	+ <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
	1067	+ <str name="distanceMeasure">internal</str>
	1068	+ <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
	1069	+ <float name="accuracy">0.5</float>
	1070	+ <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
	1071	+ <int name="maxEdits">2</int>
	1072	+ <!-- the minimum shared prefix when enumerating terms -->
	1073	+ <int name="minPrefix">1</int>
	1074	+ <!-- maximum number of inspections per result. -->
	1075	+ <int name="maxInspections">5</int>
	1076	+ <!-- minimum length of a query term to be considered for correction -->
	1077	+ <int name="minQueryLength">4</int>
	1078	+ <!-- maximum threshold of documents a query term can appear to be considered for correction -->
	1079	+ <float name="maxQueryFrequency">0.01</float>
	1080	+ <!-- uncomment this to require suggestions to occur in 1% of the documents
	1081	+ <float name="thresholdTokenFrequency">.01</float>
	1082	+ -->
	1083	+ </lst>
	1084	+
	1085	+ <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
	1086	+ <lst name="spellchecker">
	1087	+ <str name="name">wordbreak</str>
	1088	+ <str name="classname">solr.WordBreakSolrSpellChecker</str>
	1089	+ <str name="field">name</str>
	1090	+ <str name="combineWords">true</str>
	1091	+ <str name="breakWords">true</str>
	1092	+ <int name="maxChanges">10</int>
	1093	+ </lst>
	1094	+
	1095	+ <!-- a spellchecker that uses a different distance measure -->
	1096	+ <!--
	1097	+ <lst name="spellchecker">
	1098	+ <str name="name">jarowinkler</str>
	1099	+ <str name="field">spell</str>
	1100	+ <str name="classname">solr.DirectSolrSpellChecker</str>
	1101	+ <str name="distanceMeasure">
	1102	+ org.apache.lucene.search.spell.JaroWinklerDistance
	1103	+ </str>
	1104	+ </lst>
	1105	+ -->
	1106	+
	1107	+ <!-- a spellchecker that use an alternate comparator
	1108	+
	1109	+ comparatorClass be one of:
	1110	+ 1. score (default)
	1111	+ 2. freq (Frequency first, then score)
	1112	+ 3. A fully qualified class name
	1113	+ -->
	1114	+ <!--
	1115	+ <lst name="spellchecker">
	1116	+ <str name="name">freq</str>
	1117	+ <str name="field">lowerfilt</str>
	1118	+ <str name="classname">solr.DirectSolrSpellChecker</str>
	1119	+ <str name="comparatorClass">freq</str>
	1120	+ -->
	1121	+
	1122	+ <!-- A spellchecker that reads the list of words from a file -->
	1123	+ <!--
	1124	+ <lst name="spellchecker">
	1125	+ <str name="classname">solr.FileBasedSpellChecker</str>
	1126	+ <str name="name">file</str>
	1127	+ <str name="sourceLocation">spellings.txt</str>
	1128	+ <str name="characterEncoding">UTF-8</str>
	1129	+ <str name="spellcheckIndexDir">spellcheckerFile</str>
	1130	+ </lst>
	1131	+ -->
	1132	+ </searchComponent>
	1133	+
	1134	+ <!-- A request handler for demonstrating the spellcheck component.
	1135	+
	1136	+ NOTE: This is purely as an example. The whole purpose of the
	1137	+ SpellCheckComponent is to hook it into the request handler that
	1138	+ handles your normal user queries so that a separate request is
	1139	+ not needed to get suggestions.
	1140	+
	1141	+ IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
	1142	+ NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
	1143	+
	1144	+ See http://wiki.apache.org/solr/SpellCheckComponent for details
	1145	+ on the request parameters.
	1146	+ -->
	1147	+ <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
	1148	+ <lst name="defaults">
	1149	+ <!-- Solr will use suggestions from both the 'default' spellchecker
	1150	+ and from the 'wordbreak' spellchecker and combine them.
	1151	+ collations (re-written queries) can include a combination of
	1152	+ corrections from both spellcheckers -->
	1153	+ <str name="spellcheck.dictionary">default</str>
	1154	+ <str name="spellcheck.dictionary">wordbreak</str>
	1155	+ <str name="spellcheck">on</str>
	1156	+ <str name="spellcheck.extendedResults">true</str>
	1157	+ <str name="spellcheck.count">10</str>
	1158	+ <str name="spellcheck.alternativeTermCount">5</str>
	1159	+ <str name="spellcheck.maxResultsForSuggest">5</str>
	1160	+ <str name="spellcheck.collate">true</str>
	1161	+ <str name="spellcheck.collateExtendedResults">true</str>
	1162	+ <str name="spellcheck.maxCollationTries">10</str>
	1163	+ <str name="spellcheck.maxCollations">5</str>
	1164	+ </lst>
	1165	+ <arr name="last-components">
	1166	+ <str>spellcheck</str>
	1167	+ </arr>
	1168	+ </requestHandler>
	1169	+
	1170	+ <!-- Term Vector Component
	1171	+
	1172	+ http://wiki.apache.org/solr/TermVectorComponent
	1173	+ -->
	1174	+ <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>
	1175	+
	1176	+ <!-- A request handler for demonstrating the term vector component
	1177	+
	1178	+ This is purely as an example.
	1179	+
	1180	+ In reality you will likely want to add the component to your
	1181	+ already specified request handlers.
	1182	+ -->
	1183	+ <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
	1184	+ <lst name="defaults">
	1185	+ <bool name="tv">true</bool>
	1186	+ </lst>
	1187	+ <arr name="last-components">
	1188	+ <str>tvComponent</str>
	1189	+ </arr>
	1190	+ </requestHandler>
	1191	+
	1192	+ <requestHandler name="/mtas"
	1193	+ class="mtas.solr.handler.MtasRequestHandler" />
	1194	+
	1195	+ <!-- Clustering Component. (Omitted here. See the default Solr example for a typical configuration.) -->
	1196	+
	1197	+ <!-- Terms Component
	1198	+
	1199	+ http://wiki.apache.org/solr/TermsComponent
	1200	+
	1201	+ A component to return terms and document frequency of those
	1202	+ terms
	1203	+ -->
	1204	+ <searchComponent name="terms" class="solr.TermsComponent"/>
	1205	+
	1206	+
	1207	+ <!-- A request handler for demonstrating the terms component -->
	1208	+ <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
	1209	+ <lst name="defaults">
	1210	+ <bool name="terms">true</bool>
	1211	+ <bool name="distrib">false</bool>
	1212	+ </lst>
	1213	+ <arr name="components">
	1214	+ <str>terms</str>
	1215	+ </arr>
	1216	+ </requestHandler>
	1217	+
	1218	+
	1219	+ <!-- Query Elevation Component
	1220	+
	1221	+ http://wiki.apache.org/solr/QueryElevationComponent
	1222	+
	1223	+ a search component that enables you to configure the top
	1224	+ results for a given query regardless of the normal lucene
	1225	+ scoring.
	1226	+ -->
	1227	+ <!-- <searchComponent name="elevator" class="solr.QueryElevationComponent" >
	1228	+ pick a fieldType to analyze queries
	1229	+ <str name="queryFieldType">string</str>
	1230	+ <str name="config-file">elevate.xml</str>
	1231	+ </searchComponent> -->
	1232	+
	1233	+ <!-- A request handler for demonstrating the elevator component -->
	1234	+ <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
	1235	+ <lst name="defaults">
	1236	+ <str name="echoParams">explicit</str>
	1237	+ </lst>
	1238	+ <arr name="last-components">
	1239	+ <str>elevator</str>
	1240	+ </arr>
	1241	+ </requestHandler>
	1242	+
	1243	+ <!-- Highlighting Component
	1244	+
	1245	+ http://wiki.apache.org/solr/HighlightingParameters
	1246	+ -->
	1247	+ <searchComponent class="solr.HighlightComponent" name="highlight">
	1248	+ <highlighting>
	1249	+ <!-- Configure the standard fragmenter -->
	1250	+ <!-- This could most likely be commented out in the "default" case -->
	1251	+ <fragmenter name="gap"
	1252	+ default="true"
	1253	+ class="solr.highlight.GapFragmenter">
	1254	+ <lst name="defaults">
	1255	+ <int name="hl.fragsize">100</int>
	1256	+ </lst>
	1257	+ </fragmenter>
	1258	+
	1259	+ <!-- A regular-expression-based fragmenter
	1260	+ (for sentence extraction)
	1261	+ -->
	1262	+ <fragmenter name="regex"
	1263	+ class="solr.highlight.RegexFragmenter">
	1264	+ <lst name="defaults">
	1265	+ <!-- slightly smaller fragsizes work better because of slop -->
	1266	+ <int name="hl.fragsize">70</int>
	1267	+ <!-- allow 50% slop on fragment sizes -->
	1268	+ <float name="hl.regex.slop">0.5</float>
	1269	+ <!-- a basic sentence pattern -->
	1270	+ <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
	1271	+ </lst>
	1272	+ </fragmenter>
	1273	+
	1274	+ <!-- Configure the standard formatter -->
	1275	+ <formatter name="html"
	1276	+ default="true"
	1277	+ class="solr.highlight.HtmlFormatter">
	1278	+ <lst name="defaults">
	1279	+ <str name="hl.simple.pre"><![CDATA[<em>]]></str>
	1280	+ <str name="hl.simple.post"><![CDATA[</em>]]></str>
	1281	+ </lst>
	1282	+ </formatter>
	1283	+
	1284	+ <!-- Configure the standard encoder -->
	1285	+ <encoder name="html"
	1286	+ class="solr.highlight.HtmlEncoder" />
	1287	+
	1288	+ <!-- Configure the standard fragListBuilder -->
	1289	+ <fragListBuilder name="simple"
	1290	+ class="solr.highlight.SimpleFragListBuilder"/>
	1291	+
	1292	+ <!-- Configure the single fragListBuilder -->
	1293	+ <fragListBuilder name="single"
	1294	+ class="solr.highlight.SingleFragListBuilder"/>
	1295	+
	1296	+ <!-- Configure the weighted fragListBuilder -->
	1297	+ <fragListBuilder name="weighted"
	1298	+ default="true"
	1299	+ class="solr.highlight.WeightedFragListBuilder"/>
	1300	+
	1301	+ <!-- default tag FragmentsBuilder -->
	1302	+ <fragmentsBuilder name="default"
	1303	+ default="true"
	1304	+ class="solr.highlight.ScoreOrderFragmentsBuilder">
	1305	+ <!--
	1306	+ <lst name="defaults">
	1307	+ <str name="hl.multiValuedSeparatorChar">/</str>
	1308	+ </lst>
	1309	+ -->
	1310	+ </fragmentsBuilder>
	1311	+
	1312	+ <!-- multi-colored tag FragmentsBuilder -->
	1313	+ <fragmentsBuilder name="colored"
	1314	+ class="solr.highlight.ScoreOrderFragmentsBuilder">
	1315	+ <lst name="defaults">
	1316	+ <str name="hl.tag.pre"><![CDATA[
	1317	+ <b style="background:yellow">,<b style="background:lawgreen">,
	1318	+ <b style="background:aquamarine">,<b style="background:magenta">,
	1319	+ <b style="background:palegreen">,<b style="background:coral">,
	1320	+ <b style="background:wheat">,<b style="background:khaki">,
	1321	+ <b style="background:lime">,<b style="background:deepskyblue">]]></str>
	1322	+ <str name="hl.tag.post"><![CDATA[</b>]]></str>
	1323	+ </lst>
	1324	+ </fragmentsBuilder>
	1325	+
	1326	+ <boundaryScanner name="default"
	1327	+ default="true"
	1328	+ class="solr.highlight.SimpleBoundaryScanner">
	1329	+ <lst name="defaults">
	1330	+ <str name="hl.bs.maxScan">10</str>
	1331	+ <str name="hl.bs.chars">.,!? </str>
	1332	+ </lst>
	1333	+ </boundaryScanner>
	1334	+
	1335	+ <boundaryScanner name="breakIterator"
	1336	+ class="solr.highlight.BreakIteratorBoundaryScanner">
	1337	+ <lst name="defaults">
	1338	+ <!-- type should be one of CHARACTER, WORD(default), LINE and SENTENCE -->
	1339	+ <str name="hl.bs.type">WORD</str>
	1340	+ <!-- language and country are used when constructing Locale object. -->
	1341	+ <!-- And the Locale object will be used when getting instance of BreakIterator -->
	1342	+ <str name="hl.bs.language">en</str>
	1343	+ <str name="hl.bs.country">US</str>
	1344	+ </lst>
	1345	+ </boundaryScanner>
	1346	+ </highlighting>
	1347	+ </searchComponent>
	1348	+
	1349	+ <searchComponent name="mtas" class="mtas.solr.handler.component.MtasSolrSearchComponent"/>
	1350	+
	1351	+ <!-- Update Processors
	1352	+
	1353	+ Chains of Update Processor Factories for dealing with Update
	1354	+ Requests can be declared, and then used by name in Update
	1355	+ Request Processors
	1356	+
	1357	+ http://wiki.apache.org/solr/UpdateRequestProcessor
	1358	+
	1359	+ -->
	1360	+
	1361	+ <!-- Add unknown fields to the schema
	1362	+
	1363	+ An example field type guessing update processor that will
	1364	+ attempt to parse string-typed field values as Booleans, Longs,
	1365	+ Doubles, or Dates, and then add schema fields with the guessed
	1366	+ field types.
	1367	+
	1368	+ This requires that the schema is both managed and mutable, by
	1369	+ declaring schemaFactory as ManagedIndexSchemaFactory, with
	1370	+ mutable specified as true.
	1371	+
	1372	+ See http://wiki.apache.org/solr/GuessingFieldTypes
	1373	+ -->
	1374	+ <!--
	1375	+ <updateRequestProcessorChain name="add-unknown-fields-to-the-schema">
	1376	+ #UUIDUpdateProcessorFactory will generate an id if none is present in the incoming document
	1377	+ <processor class="solr.UUIDUpdateProcessorFactory" />
	1378	+
	1379	+ <processor class="solr.LogUpdateProcessorFactory"/>
	1380	+ <processor class="solr.DistributedUpdateProcessorFactory"/>
	1381	+ <processor class="solr.RemoveBlankFieldUpdateProcessorFactory"/>
	1382	+ <processor class="solr.FieldNameMutatingUpdateProcessorFactory">
	1383	+ <str name="pattern">[^\w-\.]</str>
	1384	+ <str name="replacement">_</str>
	1385	+ </processor>
	1386	+ <processor class="solr.ParseBooleanFieldUpdateProcessorFactory"/>
	1387	+ <processor class="solr.ParseLongFieldUpdateProcessorFactory"/>
	1388	+ <processor class="solr.ParseDoubleFieldUpdateProcessorFactory"/>
	1389	+ <processor class="solr.ParseDateFieldUpdateProcessorFactory">
	1390	+ <arr name="format">
	1391	+ <str>yyyy-MM-dd'T'HH:mm:ss.SSSZ</str>
	1392	+ <str>yyyy-MM-dd'T'HH:mm:ss,SSSZ</str>
	1393	+ <str>yyyy-MM-dd'T'HH:mm:ss.SSS</str>
	1394	+ <str>yyyy-MM-dd'T'HH:mm:ss,SSS</str>
	1395	+ <str>yyyy-MM-dd'T'HH:mm:ssZ</str>
	1396	+ <str>yyyy-MM-dd'T'HH:mm:ss</str>
	1397	+ <str>yyyy-MM-dd'T'HH:mmZ</str>
	1398	+ <str>yyyy-MM-dd'T'HH:mm</str>
	1399	+ <str>yyyy-MM-dd HH:mm:ss.SSSZ</str>
	1400	+ <str>yyyy-MM-dd HH:mm:ss,SSSZ</str>
	1401	+ <str>yyyy-MM-dd HH:mm:ss.SSS</str>
	1402	+ <str>yyyy-MM-dd HH:mm:ss,SSS</str>
	1403	+ <str>yyyy-MM-dd HH:mm:ssZ</str>
	1404	+ <str>yyyy-MM-dd HH:mm:ss</str>
	1405	+ <str>yyyy-MM-dd HH:mmZ</str>
	1406	+ <str>yyyy-MM-dd HH:mm</str>
	1407	+ <str>yyyy-MM-dd</str>
	1408	+ </arr>
	1409	+ </processor>
	1410	+ <processor class="solr.AddSchemaFieldsUpdateProcessorFactory">
	1411	+ <str name="defaultFieldType">strings</str>
	1412	+ <lst name="typeMapping">
	1413	+ <str name="valueClass">java.lang.Boolean</str>
	1414	+ <str name="fieldType">booleans</str>
	1415	+ </lst>
	1416	+ <lst name="typeMapping">
	1417	+ <str name="valueClass">java.util.Date</str>
	1418	+ <str name="fieldType">tdates</str>
	1419	+ </lst>
	1420	+ <lst name="typeMapping">
	1421	+ <str name="valueClass">java.lang.Long</str>
	1422	+ <str name="valueClass">java.lang.Integer</str>
	1423	+ <str name="fieldType">tlongs</str>
	1424	+ </lst>
	1425	+ <lst name="typeMapping">
	1426	+ <str name="valueClass">java.lang.Number</str>
	1427	+ <str name="fieldType">tdoubles</str>
	1428	+ </lst>
	1429	+ </processor>
	1430	+ <processor class="solr.RunUpdateProcessorFactory"/>
	1431	+ </updateRequestProcessorChain>
	1432	+ -->
	1433	+ <!-- Deduplication
	1434	+
	1435	+ An example dedup update processor that creates the "id" field
	1436	+ on the fly based on the hash code of some other fields. This
	1437	+ example has overwriteDupes set to false since we are using the
	1438	+ id field as the signatureField and Solr will maintain
	1439	+ uniqueness based on that anyway.
	1440	+
	1441	+ -->
	1442	+ <!--
	1443	+ <updateRequestProcessorChain name="dedupe">
	1444	+ <processor class="solr.processor.SignatureUpdateProcessorFactory">
	1445	+ <bool name="enabled">true</bool>
	1446	+ <str name="signatureField">id</str>
	1447	+ <bool name="overwriteDupes">false</bool>
	1448	+ <str name="fields">name,features,cat</str>
	1449	+ <str name="signatureClass">solr.processor.Lookup3Signature</str>
	1450	+ </processor>
	1451	+ <processor class="solr.LogUpdateProcessorFactory" />
	1452	+ <processor class="solr.RunUpdateProcessorFactory" />
	1453	+ </updateRequestProcessorChain>
	1454	+ -->
	1455	+
	1456	+ <!-- Language identification
	1457	+
	1458	+ This example update chain identifies the language of the incoming
	1459	+ documents using the langid contrib. The detected language is
	1460	+ written to field language_s. No field name mapping is done.
	1461	+ The fields used for detection are text, title, subject and description,
	1462	+ making this example suitable for detecting languages form full-text
	1463	+ rich documents injected via ExtractingRequestHandler.
	1464	+ See more about langId at http://wiki.apache.org/solr/LanguageDetection
	1465	+ -->
	1466	+ <!--
	1467	+ <updateRequestProcessorChain name="langid">
	1468	+ <processor class="org.apache.solr.update.processor.TikaLanguageIdentifierUpdateProcessorFactory">
	1469	+ <str name="langid.fl">text,title,subject,description</str>
	1470	+ <str name="langid.langField">language_s</str>
	1471	+ <str name="langid.fallback">en</str>
	1472	+ </processor>
	1473	+ <processor class="solr.LogUpdateProcessorFactory" />
	1474	+ <processor class="solr.RunUpdateProcessorFactory" />
	1475	+ </updateRequestProcessorChain>
	1476	+ -->
	1477	+
	1478	+ <!-- Script update processor
	1479	+
	1480	+ This example hooks in an update processor implemented using JavaScript.
	1481	+
	1482	+ See more about the script update processor at http://wiki.apache.org/solr/ScriptUpdateProcessor
	1483	+ -->
	1484	+ <!--
	1485	+ <updateRequestProcessorChain name="script">
	1486	+ <processor class="solr.StatelessScriptUpdateProcessorFactory">
	1487	+ <str name="script">update-script.js</str>
	1488	+ <lst name="params">
	1489	+ <str name="config_param">example config parameter</str>
	1490	+ </lst>
	1491	+ </processor>
	1492	+ <processor class="solr.RunUpdateProcessorFactory" />
	1493	+ </updateRequestProcessorChain>
	1494	+ -->
	1495	+
	1496	+ <requestHandler name="/update" class="solr.UpdateRequestHandler">
	1497	+ <lst name="defaults">
	1498	+ <str name="update.chain">mtasUpdateProcessor</str>
	1499	+ </lst>
	1500	+ </requestHandler>
	1501	+
	1502	+ <updateRequestProcessorChain name="mtasUpdateProcessor">
	1503	+ <processor class="mtas.solr.update.processor.MtasUpdateRequestProcessorFactory" />
	1504	+ <processor class="solr.LogUpdateProcessorFactory" />
	1505	+ <processor class="solr.RunUpdateProcessorFactory" />
	1506	+ </updateRequestProcessorChain>
	1507	+
	1508	+ <!-- Response Writers
	1509	+
	1510	+ http://wiki.apache.org/solr/QueryResponseWriter
	1511	+
	1512	+ Request responses will be written using the writer specified by
	1513	+ the 'wt' request parameter matching the name of a registered
	1514	+ writer.
	1515	+
	1516	+ The "default" writer is the default and will be used if 'wt' is
	1517	+ not specified in the request.
	1518	+ -->
	1519	+ <!-- The following response writers are implicitly configured unless
	1520	+ overridden...
	1521	+ -->
	1522	+ <!--
	1523	+ <queryResponseWriter name="xml"
	1524	+ default="true"
	1525	+ class="solr.XMLResponseWriter" />
	1526	+ <queryResponseWriter name="json" class="solr.JSONResponseWriter"/>
	1527	+ <queryResponseWriter name="python" class="solr.PythonResponseWriter"/>
	1528	+ <queryResponseWriter name="ruby" class="solr.RubyResponseWriter"/>
	1529	+ <queryResponseWriter name="php" class="solr.PHPResponseWriter"/>
	1530	+ <queryResponseWriter name="phps" class="solr.PHPSerializedResponseWriter"/>
	1531	+ <queryResponseWriter name="csv" class="solr.CSVResponseWriter"/>
	1532	+ <queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/>
	1533	+ -->
	1534	+
	1535	+ <queryResponseWriter name="json" class="solr.JSONResponseWriter">
	1536	+ <!-- For the purposes of the tutorial, JSON responses are written as
	1537	+ plain text so that they are easy to read in any browser.
	1538	+ If you expect a MIME type of "application/json" just remove this override.
	1539	+ -->
	1540	+ <str name="content-type">text/plain; charset=UTF-8</str>
	1541	+ </queryResponseWriter>
	1542	+
	1543	+ <!--
	1544	+ Custom response writers can be declared as needed...
	1545	+ -->
	1546	+ <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy">
	1547	+ <str name="template.base.dir">${velocity.template.base.dir:}</str>
	1548	+ </queryResponseWriter>
	1549	+
	1550	+ <!-- XSLT response writer transforms the XML output by any xslt file found
	1551	+ in Solr's conf/xslt directory. Changes to xslt files are checked for
	1552	+ every xsltCacheLifetimeSeconds.
	1553	+ -->
	1554	+ <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
	1555	+ <int name="xsltCacheLifetimeSeconds">5</int>
	1556	+ </queryResponseWriter>
	1557	+
	1558	+ <!-- Query Parsers
	1559	+
	1560	+ http://wiki.apache.org/solr/SolrQuerySyntax
	1561	+
	1562	+ Multiple QParserPlugins can be registered by name, and then
	1563	+ used in either the "defType" param for the QueryComponent (used
	1564	+ by SearchHandler) or in LocalParams
	1565	+ -->
	1566	+ <!-- example of registering a query parser -->
	1567	+ <!--
	1568	+ <queryParser name="myparser" class="com.mycompany.MyQParserPlugin"/>
	1569	+ -->
	1570	+
	1571	+ <queryParser name="mtas_cql" class="mtas.solr.search.MtasSolrCQLQParserPlugin"/>
	1572	+ <queryParser name="collection" class="dev.solr.plugin.collection.CollectionQueryParser"/>
	1573	+ <queryResponseWriter name="collection" class="dev.solr.plugin.collection.CollectionQueryResponseWriter"/>
	1574	+
	1575	+ <!-- Function Parsers
	1576	+
	1577	+ http://wiki.apache.org/solr/FunctionQuery
	1578	+
	1579	+ Multiple ValueSourceParsers can be registered by name, and then
	1580	+ used as function names when using the "func" QParser.
	1581	+ -->
	1582	+ <!-- example of registering a custom function parser -->
	1583	+ <!--
	1584	+ <valueSourceParser name="myfunc"
	1585	+ class="com.mycompany.MyValueSourceParser" />
	1586	+ -->
	1587	+
	1588	+
	1589	+ <!-- Document Transformers
	1590	+ http://wiki.apache.org/solr/DocTransformers
	1591	+ -->
	1592	+ <!--
	1593	+ Could be something like:
	1594	+ <transformer name="db" class="com.mycompany.LoadFromDatabaseTransformer" >
	1595	+ <int name="connection">jdbc://....</int>
	1596	+ </transformer>
	1597	+
	1598	+ To add a constant value to all docs, use:
	1599	+ <transformer name="mytrans2" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
	1600	+ <int name="value">5</int>
	1601	+ </transformer>
	1602	+
	1603	+ If you want the user to still be able to change it with _value:something_ use this:
	1604	+ <transformer name="mytrans3" class="org.apache.solr.response.transform.ValueAugmenterFactory" >
	1605	+ <double name="defaultValue">5</double>
	1606	+ </transformer>
	1607	+
	1608	+ If you are using the QueryElevationComponent, you may wish to mark documents that get boosted. The
	1609	+ EditorialMarkerFactory will do exactly that:
	1610	+ <transformer name="qecBooster" class="org.apache.solr.response.transform.EditorialMarkerFactory" />
	1611	+ -->
	1612	+
	1613	+
	1614	+ <!-- Legacy config for the admin interface -->
	1615	+ <admin>
	1616	+ <defaultQuery>:</defaultQuery>
	1617	+ </admin>
	1618	+
	1619	+</config>
...	...

junit/mtas/parser/MtasCQLParserTestSentence.java 0 → 100644

View file @0237256

	1	+++ a/junit/mtas/parser/MtasCQLParserTestSentence.java
	1	+package mtas.parser;
	2	+
	3	+import static org.junit.Assert.*;
	4	+
	5	+import java.io.BufferedReader;
	6	+import java.io.StringReader;
	7	+import java.util.ArrayList;
	8	+import java.util.List;
	9	+
	10	+import org.apache.lucene.search.spans.SpanContainingQuery;
	11	+import org.apache.lucene.search.spans.SpanNotQuery;
	12	+import org.apache.lucene.search.spans.SpanQuery;
	13	+import org.apache.lucene.search.spans.SpanWithinQuery;
	14	+
	15	+import mtas.parser.cql.MtasCQLParser;
	16	+import mtas.parser.cql.ParseException;
	17	+import mtas.parser.cql.util.MtasCQLParserGroupQuery;
	18	+import mtas.parser.cql.util.MtasCQLParserWordQuery;
	19	+import mtas.search.spans.MtasSpanOrQuery;
	20	+import mtas.search.spans.MtasSpanRecurrenceQuery;
	21	+import mtas.search.spans.MtasSpanSequenceItem;
	22	+import mtas.search.spans.MtasSpanSequenceQuery;
	23	+
	24	+public class MtasCQLParserTestSentence {
	25	+
	26	+ @org.junit.Test
	27	+ public void test() {
	28	+ basicTests();
	29	+ }
	30	+
	31	+ private void testCQLParse(String field, String cql, SpanQuery q) {
	32	+ MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql)));
	33	+ try {
	34	+ System.out.print("CQL parsing:\t"+cql);
	35	+ assertEquals(p.parse(field) ,q);
	36	+ System.out.print("\n");
	37	+ } catch (ParseException e) {
	38	+ System.out.println("Error CQL parsing:\t"+cql);
	39	+ e.printStackTrace();
	40	+ }
	41	+ }
	42	+
	43	+ private void testCQLEquivalent(String field, String cql1, String cql2) {
	44	+ MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1)));
	45	+ MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2)));
	46	+ try {
	47	+ System.out.print("CQL equivalent:\t"+cql1+" and "+cql2);
	48	+ assertEquals(p1.parse(field) ,p2.parse(field));
	49	+ System.out.print("\n");
	50	+ } catch (ParseException e) {
	51	+ System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2);
	52	+ e.printStackTrace();
	53	+ }
	54	+ }
	55	+
	56	+ private void basicTests() {
	57	+ basicTest1();
	58	+ basicTest2();
	59	+ basicTest3();
	60	+ basicTest4();
	61	+ basicTest5();
	62	+ basicTest6();
	63	+ basicTest7();
	64	+ basicTest8();
	65	+ basicTest9();
	66	+ basicTest10();
	67	+ basicTest11();
	68	+ basicTest12();
	69	+ basicTest13();
	70	+ basicTest14();
	71	+ basicTest15();
	72	+ basicTest16();
	73	+ }
	74	+
	75	+ private void basicTest1() {
	76	+ String field = "testveld";
	77	+ String cql = "[pos=\"LID\"] [lemma=\"koe\"]";
	78	+ SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
	79	+ SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe");
	80	+ List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
	81	+ items.add(new MtasSpanSequenceItem(q1, false));
	82	+ items.add(new MtasSpanSequenceItem(q2, false));
	83	+ SpanQuery q = new MtasSpanSequenceQuery(items);
	84	+ testCQLParse(field, cql, q);
	85	+ }
	86	+
	87	+ private void basicTest2() {
	88	+ String field = "testveld";
	89	+ String cql1 = "[pos=\"LID\"] [] []? [] [lemma=\"koe\"]";
	90	+ String cql2 = "[pos=\"LID\"] []{2,3} [lemma=\"koe\"]";
	91	+ testCQLEquivalent(field, cql1, cql2);
	92	+ }
	93	+
	94	+ private void basicTest3() {
	95	+ String field = "testveld";
	96	+ String cql = "[pos=\"LID\"] \| [lemma=\"koe\"]";
	97	+ SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
	98	+ SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe");
	99	+ SpanQuery q = new MtasSpanOrQuery(q1,q2);
	100	+ testCQLParse(field, cql, q);
	101	+ }
	102	+
	103	+ private void basicTest4() {
	104	+ String field = "testveld";
	105	+ String cql = "[pos=\"LID\"] \| ([lemma=\"de\"] [lemma=\"koe\"])";
	106	+ SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
	107	+ SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","de");
	108	+ SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","koe");
	109	+ List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
	110	+ items.add(new MtasSpanSequenceItem(q2, false));
	111	+ items.add(new MtasSpanSequenceItem(q3, false));
	112	+ SpanQuery q4 = new MtasSpanSequenceQuery(items);
	113	+ SpanQuery q = new MtasSpanOrQuery(q1,q4);
	114	+ testCQLParse(field, cql, q);
	115	+ }
	116	+
	117	+ private void basicTest5() {
	118	+ String field = "testveld";
	119	+ String cql1 = "([pos=\"LID\"]([pos=\"ADJ\"][lemma=\"koe\"]))";
	120	+ String cql2 = "[pos=\"LID\"][pos=\"ADJ\"][lemma=\"koe\"]";
	121	+ testCQLEquivalent(field, cql1, cql2);
	122	+ }
	123	+
	124	+ private void basicTest6() {
	125	+ String field = "testveld";
	126	+ String cql1 = "([pos=\"LID\"]\|[lemma=\"de\"][lemma=\"koe\"])\|([pos=\"ADJ\"]\|([lemma=\"het\"]([lemma=\"paard\"])))";
	127	+ String cql2 = "[pos=\"LID\"]\|[lemma=\"de\"][lemma=\"koe\"]\|[pos=\"ADJ\"]\|[lemma=\"het\"][lemma=\"paard\"]";
	128	+ testCQLEquivalent(field, cql1, cql2);
	129	+ }
	130	+
	131	+ private void basicTest7() {
	132	+ String field = "testveld";
	133	+ String cql1 = "[pos=\"LID\"] []{0,1} []{3,5} []{2,4}";
	134	+ String cql2 = "[pos=\"LID\"] []{5,10}";
	135	+ testCQLEquivalent(field, cql1, cql2);
	136	+ }
	137	+
	138	+ private void basicTest8() {
	139	+ String field = "testveld";
	140	+ String cql = "[lemma=\"koe\"]([pos=\"N\"]\|[pos=\"ADJ\"])";
	141	+ SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
	142	+ SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N");
	143	+ SpanQuery q3 = new MtasCQLParserWordQuery(field,"pos","ADJ");
	144	+ SpanQuery q4 = new MtasSpanOrQuery(q2,q3);
	145	+ List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
	146	+ items.add(new MtasSpanSequenceItem(q1, false));
	147	+ items.add(new MtasSpanSequenceItem(q4, false));
	148	+ SpanQuery q = new MtasSpanSequenceQuery(items);
	149	+ testCQLParse(field, cql, q);
	150	+ }
	151	+
	152	+ private void basicTest9() {
	153	+ String field = "testveld";
	154	+ String cql = "[lemma=\"koe\"]([pos=\"N\"]\|[pos=\"ADJ\"]){2,3}[lemma=\"paard\"]";
	155	+ SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
	156	+ SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N");
	157	+ SpanQuery q3 = new MtasCQLParserWordQuery(field,"pos","ADJ");
	158	+ SpanQuery q4 = new MtasCQLParserWordQuery(field,"lemma","paard");
	159	+ SpanQuery q5 = new MtasSpanOrQuery(new MtasSpanRecurrenceQuery(q2,2,3),new MtasSpanRecurrenceQuery(q3,2,3));
	160	+ List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
	161	+ items.add(new MtasSpanSequenceItem(q1, false));
	162	+ items.add(new MtasSpanSequenceItem(q5, false));
	163	+ items.add(new MtasSpanSequenceItem(q4, false));
	164	+ SpanQuery q = new MtasSpanSequenceQuery(items);
	165	+ testCQLParse(field, cql, q);
	166	+ }
	167	+
	168	+ private void basicTest10() {
	169	+ String field = "testveld";
	170	+ String cql = "[pos=\"LID\"]? [pos=\"ADJ\"]{1,3} [lemma=\"koe\"]";
	171	+ SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
	172	+ SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","ADJ");
	173	+ SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","koe");
	174	+ List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
	175	+ items.add(new MtasSpanSequenceItem(q1, true));
	176	+ items.add(new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q2,1,3), false));
	177	+ items.add(new MtasSpanSequenceItem(q3, false));
	178	+ SpanQuery q = new MtasSpanSequenceQuery(items);
	179	+ testCQLParse(field, cql, q);
	180	+ }
	181	+
	182	+ private void basicTest11() {
	183	+ String field = "testveld";
	184	+ String cql = "<sentence/> containing [lemma=\"koe\"]";
	185	+ SpanQuery q1 = new MtasCQLParserGroupQuery(field,"sentence");
	186	+ SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe");
	187	+ SpanQuery q = new SpanContainingQuery(q1, q2);
	188	+ testCQLParse(field, cql, q);
	189	+ }
	190	+
	191	+ private void basicTest12() {
	192	+ String field = "testveld";
	193	+ String cql = "[lemma=\"koe\"] within <sentence/>";
	194	+ SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
	195	+ SpanQuery q2 = new MtasCQLParserGroupQuery(field,"sentence");
	196	+ SpanQuery q = new SpanWithinQuery(q2, q1);
	197	+ testCQLParse(field, cql, q);
	198	+ }
	199	+
	200	+ private void basicTest13() {
	201	+ String field = "testveld";
	202	+ String cql = "[lemma=\"koe\"]([t=\"de\"] within <sentence/>)";
	203	+ SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
	204	+ SpanQuery q2 = new MtasCQLParserWordQuery(field,"t","de");
	205	+ SpanQuery q3 = new MtasCQLParserGroupQuery(field,"sentence");
	206	+ SpanQuery q4 = new SpanWithinQuery(q3, q2);
	207	+ List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
	208	+ items.add(new MtasSpanSequenceItem(q1, false));
	209	+ items.add(new MtasSpanSequenceItem(q4, false));
	210	+ SpanQuery q = new MtasSpanSequenceQuery(items);
	211	+ testCQLParse(field, cql, q);
	212	+ }
	213	+
	214	+ private void basicTest14() {
	215	+ String field = "testveld";
	216	+ String cql = "([t=\"de\"] within <sentence/>)[lemma=\"koe\"]";
	217	+ SpanQuery q1 = new MtasCQLParserWordQuery(field,"t","de");
	218	+ SpanQuery q2 = new MtasCQLParserGroupQuery(field,"sentence");
	219	+ SpanQuery q3 = new SpanWithinQuery(q2, q1);
	220	+ SpanQuery q4 = new MtasCQLParserWordQuery(field,"lemma","koe");
	221	+ List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
	222	+ items.add(new MtasSpanSequenceItem(q3, false));
	223	+ items.add(new MtasSpanSequenceItem(q4, false));
	224	+ SpanQuery q = new MtasSpanSequenceQuery(items);
	225	+ testCQLParse(field, cql, q);
	226	+ }
	227	+
	228	+ private void basicTest15() {
	229	+ String field = "testveld";
	230	+ String cql = "[lemma=\"koe\"](<sentence/> containing [t=\"de\"]) within <sentence/>[lemma=\"paard\"]";
	231	+ SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
	232	+ SpanQuery q2 = new MtasCQLParserGroupQuery(field,"sentence");
	233	+ SpanQuery q3 = new MtasCQLParserWordQuery(field,"t","de");
	234	+ SpanQuery q4 = new SpanContainingQuery(q2, q3);
	235	+ SpanQuery q5 = new MtasCQLParserGroupQuery(field,"sentence");
	236	+ SpanQuery q6 = new MtasCQLParserWordQuery(field,"lemma","paard");
	237	+ List<MtasSpanSequenceItem> items1 = new ArrayList<MtasSpanSequenceItem>();
	238	+ items1.add(new MtasSpanSequenceItem(q5, false));
	239	+ items1.add(new MtasSpanSequenceItem(q6, false));
	240	+ SpanQuery q7 = new MtasSpanSequenceQuery(items1);
	241	+ SpanQuery q8 = new SpanWithinQuery(q7, q4);
	242	+ List<MtasSpanSequenceItem> items2 = new ArrayList<MtasSpanSequenceItem>();
	243	+ items2.add(new MtasSpanSequenceItem(q1, false));
	244	+ items2.add(new MtasSpanSequenceItem(q8, false));
	245	+ SpanQuery q = new MtasSpanSequenceQuery(items2);
	246	+ testCQLParse(field, cql, q);
	247	+ }
	248	+
	249	+ private void basicTest16() {
	250	+ String field = "testveld";
	251	+ String cql = "(<entity=\"loc\"/> within (<s/> containing [t_lc=\"amsterdam\"])) !containing ([t_lc=\"amsterdam\"])";
	252	+ SpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc");
	253	+ SpanQuery q2 = new MtasCQLParserGroupQuery(field,"s");
	254	+ SpanQuery q3 = new MtasCQLParserWordQuery(field,"t_lc","amsterdam");
	255	+ SpanQuery q4 = new SpanContainingQuery(q2, q3);
	256	+ SpanQuery q5 = new SpanWithinQuery(q4, q1);
	257	+ SpanQuery q = new SpanNotQuery(q5,new SpanContainingQuery(q5, q3));
	258	+ testCQLParse(field, cql, q);
	259	+ }
	260	+
	261	+}
...	...

first commit

Too many changes to show. Reload with full diff Plain diff Email patch

Too many changes to show.
Reload with full diff Plain diff Email patch