diff --git a/conf/parser/mtas/test_iso_tei.xml b/conf/parser/mtas/test_iso_tei.xml deleted file mode 100644 index 92742e6..0000000 --- a/conf/parser/mtas/test_iso_tei.xml +++ /dev/null @@ -1,195 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<mtas> - - <!-- START MTAS INDEX CONFIGURATION --> - <index> - <!-- START GENERAL SETTINGS MTAS INDEX PROCESS --> - <payload index="true" /> - <offset index="true" /> - <realoffset index="true" /> - <parent index="true" /> - <!-- END GENERAL SETTINGS MTAS INDEX PROCESS --> - </index> - <!-- END MTAS INDEX CONFIGURATION --> - - <!-- START CONFIGURATION MTAS ISO/TEI PARSER --> - <parser name="mtas.analysis.parser.MtasTEIParser"> - - <!-- START GENERAL SETTINGS MTAS ISO/TEI PARSER --> - <autorepair value="true" /> - <makeunique value="true" /> - <!-- END GENERAL SETTINGS MTAS ISO/TEI PARSER --> - - <!-- START VARIABLES --> - <variables> - <variable name="when" value="interval"> - <value> - <item type="attribute" name="interval" /> - </value> - </variable> - <variable name="person" value="sex"> - <value> - <item type="attribute" name="sex" /> - </value> - </variable> - </variables> - <!-- END VARIABLES --> - - <!-- START REFERENCES --> - <!-- END REFERENCES --> - - <!-- START MAPPINGS --> - <mappings> - - <!-- START WORDS --> - <mapping type="word" name="w" start="#" end="#"> - <token type="string" offset="false" realoffset="false" parent="true"> - <pre> - <item type="string" value="w" /> - </pre> - <post> - <item type="text" /> - </post> - </token> - <token type="string" offset="false" realoffset="false" parent="false"> - <pre> - <item type="string" value="w_lc" /> - </pre> - <post> - <item type="text" filter="ascii,lowercase" /> - </post> - </token> - <token type="string" offset="false" realoffset="false" parent="true"> - <pre> - <item type="string" value="w.id" /> - </pre> - <post> - <item type="attribute" name="#" /> - </post> - </token> - </mapping> - <mapping type="word" name="anchor" start="synch" end="synch"> - <token type="string" offset="false" realoffset="false" parent="false"> - <pre> - <item type="name" /> - <item type="string" value=".time" /> - </pre> - <post> - <item type="variableFromAttribute" name="interval" value="synch" /> - </post> - </token> - </mapping> - <mapping type="word" name="pause"> - <token type="string" offset="false" realoffset="false" parent="false"> - <pre> - <item type="name" /> - </pre> - <post> - <item type="attribute" name="type" /> - </post> - </token> - </mapping> - <mapping type="word" name="vocal"></mapping> - <mapping type="word" name="incident"></mapping> - <!-- END WORDS --> - - <!-- START WORD ANNOTATIONS --> - <mapping type="wordAnnotation" name="desc"> - <token type="string" offset="false" realoffset="false" parent="false"> - <pre> - <item type="ancestorWordName" /> - </pre> - <post> - <item type="text" /> - </post> - </token> - </mapping> - <!-- END WORD ANNOTATIONS --> - - <!-- START RELATIONS --> - <!-- END RELATIONS --> - - - <!-- START GROUPS --> - <mapping type="group" name="annotationBlock" start="start" - end="end"> - <token type="string" offset="false" realoffset="false" parent="false"> - <pre> - <item type="name" /> - <item type="string" value=".interval" /> - </pre> - <post> - <item type="variableFromAttribute" name="interval" value="start" /> - <item type="variableFromAttribute" name="interval" value="end" - prefix=" - " /> - </post> - </token> - <token type="string" offset="false" realoffset="false" parent="false"> - <pre> - <item type="name" /> - <item type="string" value=".sex" /> - </pre> - <post> - <item type="variableFromAttribute" name="sex" value="who" /> - </post> - </token> - </mapping> - <mapping type="group" name="u" > - <token type="string" offset="false" realoffset="false" parent="true"> - <pre> - <item type="name" /> - </pre> - </token> - <token type="string" offset="false" realoffset="false" parent="true"> - <pre> - <item type="name" /> - <item type="string" value=".id" /> - </pre> - <post> - <item type="attribute" name="#" /> - </post> - </token> - </mapping> - <!-- END GROUPS --> - - <!-- START GROUP ANNOTATIONS --> - <mapping type="groupAnnotation" name="spanGrp"> - </mapping> - - <mapping type="groupAnnotation" name="span" start="from" end="to"> - <token type="string" offset="false" realoffset="false" parent="false"> - <pre> - <item type="name" /> - <item type="ancestorAttribute" distance="0" name="type" - prefix="." /> - <item type="string" value=".interval" /> - </pre> - <post> - <item type="variableFromAttribute" name="interval" value="from" /> - <item type="variableFromAttribute" name="interval" value="to" - prefix=" - " /> - </post> - </token> - </mapping> - - <mapping type="groupAnnotation" name="span" start="from" end="to"> - <token type="string" offset="false" realoffset="false" parent="false"> - <pre> - <item type="ancestorAttribute" distance="0" name="type" /> - </pre> - <post> - <item type="text" /> - </post> - </token> - </mapping> - - - <!-- END GROUP ANNOTATIONS --> - - </mappings> - <!-- END MAPPINGS --> - - </parser> - <!-- END CONFIGURATION MTAS ISO/TEI PARSER --> - -</mtas> diff --git a/docker/isotei-samples/EXMARaLDA_DEMO_ISO_TEI/DE_AnneWill.xml.gz b/docker/isotei-samples/EXMARaLDA_DEMO_ISO_TEI/DE_AnneWill.xml.gz index de9dd81..a098fdf 100755 --- a/docker/isotei-samples/EXMARaLDA_DEMO_ISO_TEI/DE_AnneWill.xml.gz +++ b/docker/isotei-samples/EXMARaLDA_DEMO_ISO_TEI/DE_AnneWill.xml.gz diff --git a/docker/isotei-samples/EXMARaLDA_DEMO_ISO_TEI/DE_ForumWaffenrecht.xml.gz b/docker/isotei-samples/EXMARaLDA_DEMO_ISO_TEI/DE_ForumWaffenrecht.xml.gz index 5be1ca4..7aa517d 100755 --- a/docker/isotei-samples/EXMARaLDA_DEMO_ISO_TEI/DE_ForumWaffenrecht.xml.gz +++ b/docker/isotei-samples/EXMARaLDA_DEMO_ISO_TEI/DE_ForumWaffenrecht.xml.gz diff --git a/docker/isotei-samples/EXMARaLDA_DEMO_ISO_TEI/DE_HartAberFair.xml.gz b/docker/isotei-samples/EXMARaLDA_DEMO_ISO_TEI/DE_HartAberFair.xml.gz index b3ce52c..ff3ba55 100755 --- a/docker/isotei-samples/EXMARaLDA_DEMO_ISO_TEI/DE_HartAberFair.xml.gz +++ b/docker/isotei-samples/EXMARaLDA_DEMO_ISO_TEI/DE_HartAberFair.xml.gz diff --git a/docker/isotei-samples/FOLK_STUTTGART_21_TEI_ISO/FOLK_E_00064_SE_01_T_01_DF_01.xml.gz b/docker/isotei-samples/FOLK_STUTTGART_21_TEI_ISO/FOLK_E_00064_SE_01_T_01_DF_01.xml.gz index f3c1fdd..1a384b5 100755 --- a/docker/isotei-samples/FOLK_STUTTGART_21_TEI_ISO/FOLK_E_00064_SE_01_T_01_DF_01.xml.gz +++ b/docker/isotei-samples/FOLK_STUTTGART_21_TEI_ISO/FOLK_E_00064_SE_01_T_01_DF_01.xml.gz diff --git a/docker/isotei-samples/FOLK_STUTTGART_21_TEI_ISO/FOLK_E_00064_SE_01_T_02_DF_01.xml.gz b/docker/isotei-samples/FOLK_STUTTGART_21_TEI_ISO/FOLK_E_00064_SE_01_T_02_DF_01.xml.gz index 2e1625a..fd5e9ea 100755 --- a/docker/isotei-samples/FOLK_STUTTGART_21_TEI_ISO/FOLK_E_00064_SE_01_T_02_DF_01.xml.gz +++ b/docker/isotei-samples/FOLK_STUTTGART_21_TEI_ISO/FOLK_E_00064_SE_01_T_02_DF_01.xml.gz diff --git a/docker/isotei-samples/FOLK_STUTTGART_21_TEI_ISO/FOLK_E_00064_SE_01_T_03_DF_01.xml.gz b/docker/isotei-samples/FOLK_STUTTGART_21_TEI_ISO/FOLK_E_00064_SE_01_T_03_DF_01.xml.gz index 2821502..08f9219 100755 --- a/docker/isotei-samples/FOLK_STUTTGART_21_TEI_ISO/FOLK_E_00064_SE_01_T_03_DF_01.xml.gz +++ b/docker/isotei-samples/FOLK_STUTTGART_21_TEI_ISO/FOLK_E_00064_SE_01_T_03_DF_01.xml.gz