demo_tei.xml 3.62 KB
<?xml version="1.0" encoding="UTF-8" ?>
<mtas>

	<!-- START MTAS INDEX CONFIGURATION -->
	<index>
		<!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
		<payload index="true" />
		<offset index="true" />
		<realoffset index="true" />
		<parent index="true" />
		<!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
	</index>
	<!-- END MTAS INDEX CONFIGURATION -->



	<!-- START CONFIGURATION MTAS TEI PARSER -->
	<parser name="mtas.analysis.parser.MtasTEIParser">

		<!-- START GENERAL SETTINGS MTAS TEI PARSER -->
		<autorepair value="true" />
		<makeunique value="true" />
        <!-- END GENERAL SETTINGS MTAS TEI PARSER -->

		<!-- START REFERENCES -->
		<references>
		</references>
		<!-- END REFERENCES -->

		<!-- START MAPPINGS -->
		<mappings>

			<!-- START WORDS -->
			<mapping type="word" name="w">
				<token type="string" offset="false" realoffset="false" parent="true">
					<pre>
						<item type="string" value="t" />
					</pre>
					<post>
						<item type="text" />
					</post>
				</token>
				<token type="string" offset="false" realoffset="false" parent="false">
					<pre>
						<item type="string" value="t_lc" />
					</pre>
					<post>
						<item type="text" filter="ascii,lowercase" />
					</post>
				</token>
			</mapping>
			<mapping type="word" name="w">
				<token type="string" offset="false" realoffset="false" parent="false">
					<pre>
						<item type="string" value="lemma" />
					</pre>
					<post>
						<item type="attribute" name="lemma" />
					</post>
				</token>
				<condition>
					<item type="attribute" name="lemma" />
				</condition>
			</mapping>
			<mapping type="word" name="w">
        <token type="string" offset="false" realoffset="false" parent="false">
          <pre>
            <item type="string" value="type" />
          </pre>
          <post>
            <item type="attribute" name="type" />
          </post>
        </token>
        <condition>
          <item type="attribute" name="type" />
        </condition>
      </mapping>
      <mapping type="word" name="pc">
        <token type="string" offset="false" realoffset="false" parent="false">
          <pre>
            <item type="string" value="t" />
          </pre>
          <post>
            <item type="text" />
          </post>
        </token>
        <token type="string" offset="false" realoffset="false" parent="false">
          <pre>
            <item type="string" value="t_lc" />
          </pre>
          <post>
            <item type="text" filter="ascii,lowercase" />
          </post>
        </token>
      </mapping>  
			<!-- END WORDS -->

			<!-- START WORD ANNOTATIONS -->
			<!-- END WORD ANNOTATIONS -->

			<!-- START RELATIONS -->
			<!-- END RELATIONS -->

			<!-- START GROUPS -->
			<mapping type="group" name="p">
				<token type="string" offset="false">
					<pre>
						<item type="name" />
					</pre>
					<post>
						<item type="attribute" name="type" />
					</post>
				</token>
			</mapping>
			<mapping type="group" name="div">
        <token type="string" offset="false">
          <pre>
            <item type="name" />
          </pre>
          <post>
            <item type="attribute" name="type" />
          </post>
        </token>
      </mapping>
      <mapping type="group" name="rs">
        <token type="string" offset="false">
          <pre>
            <item type="name" />
          </pre>
          <post>
            <item type="attribute" name="type" />
          </post>
        </token>
      </mapping>
			<!-- END GROUPS -->

			<!-- START GROUP ANNOTATIONS -->
			<!-- END GROUP ANNOTATIONS -->

		</mappings>
		<!-- END MAPPINGS -->

	</parser>
	<!-- END CONFIGURATION MTAS TEI PARSER -->

</mtas>