From 347732b5ea42bbaad59f38253f5bcd1e4e0de87a Mon Sep 17 00:00:00 2001
From: Matthijs Brouwer <matthijs@brouwer.info>
Date: Sat, 15 Jul 2017 08:44:33 +0200
Subject: [PATCH] add lowercase to chat config
---
conf/parser/mtas/chat_test.xml | 9 +++++++++
docker/mtas/demo_chat.xml | 723 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
2 files changed, 375 insertions(+), 357 deletions(-)
diff --git a/conf/parser/mtas/chat_test.xml b/conf/parser/mtas/chat_test.xml
index 79fbfa4..b537397 100644
--- a/conf/parser/mtas/chat_test.xml
+++ b/conf/parser/mtas/chat_test.xml
@@ -103,6 +103,15 @@
<item type="text" />
</post>
</token>
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="string" value="_lc" />
+ </pre>
+ <post>
+ <item type="text" filter="ascii,lowercase" />
+ </post>
+ </token>
</mapping>
<mapping type="word" name="t">
<token type="string" offset="false">
diff --git a/docker/mtas/demo_chat.xml b/docker/mtas/demo_chat.xml
index 79fbfa4..2a8fe62 100644
--- a/docker/mtas/demo_chat.xml
+++ b/docker/mtas/demo_chat.xml
@@ -1,129 +1,138 @@
<?xml version="1.0" encoding="UTF-8" ?>
<mtas>
- <!-- START MTAS INDEX CONFIGURATION -->
- <index>
- <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
- <payload index="true" />
- <offset index="true" />
- <realoffset index="true" />
- <parent index="true" />
- <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
- </index>
- <!-- END MTAS INDEX CONFIGURATION -->
+ <!-- START MTAS INDEX CONFIGURATION -->
+ <index>
+ <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
+ <payload index="true" />
+ <offset index="true" />
+ <realoffset index="true" />
+ <parent index="true" />
+ <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
+ </index>
+ <!-- END MTAS INDEX CONFIGURATION -->
- <!-- START CONFIGURATION MTAS CHAT PARSER -->
- <parser name="mtas.analysis.parser.MtasChatParser">
+ <!-- START CONFIGURATION MTAS CHAT PARSER -->
+ <parser name="mtas.analysis.parser.MtasChatParser">
- <!-- START GENERAL SETTINGS MTAS CHAT PARSER -->
- <autorepair value="true" />
- <makeunique value="true" />
- <!-- END GENERAL SETTINGS MTAS CHAT PARSER -->
+ <!-- START GENERAL SETTINGS MTAS CHAT PARSER -->
+ <autorepair value="true" />
+ <makeunique value="true" />
+ <!-- END GENERAL SETTINGS MTAS CHAT PARSER -->
- <!-- START VARIABLES -->
- <variables>
- <variable name="participant" value="participant.role">
- <value>
- <item type="attribute" name="role" />
- </value>
- </variable>
- <variable name="participant" value="participant.name">
- <value>
- <item type="attribute" name="name" />
- </value>
- </variable>
- <variable name="participant" value="participant.age">
- <value>
- <item type="attribute" name="age" />
- </value>
- </variable>
- <variable name="participant" value="participant.group">
- <value>
- <item type="attribute" name="group" />
- </value>
- </variable>
- <variable name="participant" value="participant.sex">
- <value>
- <item type="attribute" name="sex" />
- </value>
- </variable>
- <variable name="participant" value="participant.SES">
- <value>
- <item type="attribute" name="SES" />
- </value>
- </variable>
- <variable name="participant" value="participant.education">
- <value>
- <item type="attribute" name="education" />
- </value>
- </variable>
- <variable name="participant" value="participant.custom-field">
- <value>
- <item type="attribute" name="custom-field" />
- </value>
- </variable>
- <variable name="participant" value="participant.birthday">
- <value>
- <item type="attribute" name="birthday" />
- </value>
- </variable>
- <variable name="participant" value="participant.language">
- <value>
- <item type="attribute" name="language" />
- </value>
- </variable>
- <variable name="participant" value="participant.first-language">
- <value>
- <item type="attribute" name="first-language" />
- </value>
- </variable>
- <variable name="participant" value="participant.birthplace">
- <value>
- <item type="attribute" name="birthplace" />
- </value>
- </variable>
- </variables>
- <!-- END VARIABLES -->
+ <!-- START VARIABLES -->
+ <variables>
+ <variable name="participant" value="participant.role">
+ <value>
+ <item type="attribute" name="role" />
+ </value>
+ </variable>
+ <variable name="participant" value="participant.name">
+ <value>
+ <item type="attribute" name="name" />
+ </value>
+ </variable>
+ <variable name="participant" value="participant.age">
+ <value>
+ <item type="attribute" name="age" />
+ </value>
+ </variable>
+ <variable name="participant" value="participant.group">
+ <value>
+ <item type="attribute" name="group" />
+ </value>
+ </variable>
+ <variable name="participant" value="participant.sex">
+ <value>
+ <item type="attribute" name="sex" />
+ </value>
+ </variable>
+ <variable name="participant" value="participant.SES">
+ <value>
+ <item type="attribute" name="SES" />
+ </value>
+ </variable>
+ <variable name="participant" value="participant.education">
+ <value>
+ <item type="attribute" name="education" />
+ </value>
+ </variable>
+ <variable name="participant" value="participant.custom-field">
+ <value>
+ <item type="attribute" name="custom-field" />
+ </value>
+ </variable>
+ <variable name="participant" value="participant.birthday">
+ <value>
+ <item type="attribute" name="birthday" />
+ </value>
+ </variable>
+ <variable name="participant" value="participant.language">
+ <value>
+ <item type="attribute" name="language" />
+ </value>
+ </variable>
+ <variable name="participant" value="participant.first-language">
+ <value>
+ <item type="attribute" name="first-language" />
+ </value>
+ </variable>
+ <variable name="participant" value="participant.birthplace">
+ <value>
+ <item type="attribute" name="birthplace" />
+ </value>
+ </variable>
+ </variables>
+ <!-- END VARIABLES -->
- <!-- START REFERENCES -->
- <!-- END REFERENCES -->
+ <!-- START REFERENCES -->
+ <!-- END REFERENCES -->
- <!-- START MAPPINGS -->
- <mappings>
+ <!-- START MAPPINGS -->
+ <mappings>
- <!-- START WORDS -->
- <mapping type="word" name="w">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- </pre>
- <post>
- <item type="text" />
- </post>
- </token>
- </mapping>
- <mapping type="word" name="t">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="attribute" name="type" prefix="." />
- </pre>
- </token>
- </mapping>
- <mapping type="word" name="a">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="attribute" name="type" prefix="." />
- </pre>
- <post>
- <item type="text" />
- </post>
- </token>
- </mapping>
- <mapping type="word" name="e">
+ <!-- START WORDS -->
+ <mapping type="word" name="w">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ </pre>
+ <post>
+ <item type="text" />
+ </post>
+ </token>
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="string" value="_lc" />
+ </pre>
+ <post>
+ <item type="text" filter="ascii,lowercase" />
+ </post>
+ </token>
+ </mapping>
+ <mapping type="word" name="t">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="attribute" name="type" prefix="." />
+ </pre>
+ </token>
+ </mapping>
+ <mapping type="word" name="a">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="attribute" name="type" prefix="." />
+ </pre>
+ <post>
+ <item type="text" />
+ </post>
+ </token>
+ </mapping>
+ <mapping type="word" name="e">
<token type="string" offset="false">
<pre>
<item type="name" />
@@ -143,69 +152,69 @@
</post>
</token>
</mapping>
- <!-- END WORDS -->
+ <!-- END WORDS -->
- <!-- START WORD ANNOTATIONS -->
- <mapping type="wordAnnotation" name="pos">
- </mapping>
- <mapping type="wordAnnotation" name="c">
- <token type="string" offset="false">
- <pre>
- <item type="ancestorName" />
- <item type="name" prefix="." />
- </pre>
- <post>
- <item type="text" />
- </post>
- </token>
- <condition>
- <item type="ancestorName" condition="pos" />
- <item type="ancestorWordName" condition="w" />
- </condition>
- </mapping>
- <mapping type="wordAnnotation" name="s">
- <token type="string" offset="false">
- <pre>
- <item type="ancestorName" />
- <item type="name" prefix="." />
- </pre>
- <post>
- <item type="text" />
- </post>
- </token>
- <condition>
- <item type="ancestorName" condition="pos" />
- <item type="ancestorWordName" condition="w" />
- </condition>
- </mapping>
- <mapping type="wordAnnotation" name="stem">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- </pre>
- <post>
- <item type="text" />
- </post>
- </token>
- <condition>
+ <!-- START WORD ANNOTATIONS -->
+ <mapping type="wordAnnotation" name="pos">
+ </mapping>
+ <mapping type="wordAnnotation" name="c">
+ <token type="string" offset="false">
+ <pre>
+ <item type="ancestorName" />
+ <item type="name" prefix="." />
+ </pre>
+ <post>
+ <item type="text" />
+ </post>
+ </token>
+ <condition>
+ <item type="ancestorName" condition="pos" />
+ <item type="ancestorWordName" condition="w" />
+ </condition>
+ </mapping>
+ <mapping type="wordAnnotation" name="s">
+ <token type="string" offset="false">
+ <pre>
+ <item type="ancestorName" />
+ <item type="name" prefix="." />
+ </pre>
+ <post>
+ <item type="text" />
+ </post>
+ </token>
+ <condition>
+ <item type="ancestorName" condition="pos" />
+ <item type="ancestorWordName" condition="w" />
+ </condition>
+ </mapping>
+ <mapping type="wordAnnotation" name="stem">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ </pre>
+ <post>
+ <item type="text" />
+ </post>
+ </token>
+ <condition>
<item type="ancestorWordName" condition="w" />
</condition>
- </mapping>
- <mapping type="wordAnnotation" name="mk">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="attribute" name="type" prefix="." />
- </pre>
- <post>
- <item type="text" />
- </post>
- </token>
- <condition>
+ </mapping>
+ <mapping type="wordAnnotation" name="mk">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="attribute" name="type" prefix="." />
+ </pre>
+ <post>
+ <item type="text" />
+ </post>
+ </token>
+ <condition>
<item type="ancestorWordName" condition="w" />
</condition>
- </mapping>
- <mapping type="wordAnnotation" name="ga">
+ </mapping>
+ <mapping type="wordAnnotation" name="ga">
<token type="string" offset="false">
<pre>
<item type="ancestorWordName" />
@@ -231,194 +240,194 @@
<item type="ancestorWordName" condition="e" />
</condition>
</mapping>
- <!-- END WORD ANNOTATIONS -->
+ <!-- END WORD ANNOTATIONS -->
- <!-- START RELATIONS -->
- <!-- END RELATIONS -->
+ <!-- START RELATIONS -->
+ <!-- END RELATIONS -->
- <!-- START GROUPS -->
- <mapping type="group" name="tcu">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- </pre>
- </token>
- </mapping>
- <mapping type="group" name="u">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- </pre>
- </token>
- </mapping>
- <mapping type="group" name="u">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="string" value=".id" />
- </pre>
- <post>
- <item type="attribute" name="uID" />
- </post>
- </token>
- <condition>
- <item type="attribute" name="uID" />
- </condition>
- </mapping>
- <mapping type="group" name="u">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="string" value=".role" />
- </pre>
- <post>
- <item type="variableFromAttribute" name="participant.role"
- value="who" />
- </post>
- </token>
- </mapping>
- <mapping type="group" name="u">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="string" value=".name" />
- </pre>
- <post>
- <item type="variableFromAttribute" name="participant.name"
- value="who" />
- </post>
- </token>
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="string" value=".age" />
- </pre>
- <post>
- <item type="variableFromAttribute" name="participant.age"
- value="who" />
- </post>
- </token>
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="string" value=".sex" />
- </pre>
- <post>
- <item type="variableFromAttribute" name="participant.sex"
- value="who" />
- </post>
- </token>
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="string" value=".birthday" />
- </pre>
- <post>
- <item type="variableFromAttribute" name="participant.birthday"
- value="who" />
- </post>
- </token>
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="string" value=".language" />
- </pre>
- <post>
- <item type="variableFromAttribute" name="participant.language"
- value="who" />
- </post>
- </token>
- <condition>
- <item type="attribute" name="who" />
- </condition>
- </mapping>
- <mapping type="group" name="g">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- </pre>
- </token>
- </mapping>
- <mapping type="group" name="pg">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- </pre>
- </token>
- </mapping>
- <mapping type="group" name="sg">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- </pre>
- </token>
- </mapping>
- <!-- END GROUPS -->
+ <!-- START GROUPS -->
+ <mapping type="group" name="tcu">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ </pre>
+ </token>
+ </mapping>
+ <mapping type="group" name="u">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ </pre>
+ </token>
+ </mapping>
+ <mapping type="group" name="u">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="string" value=".id" />
+ </pre>
+ <post>
+ <item type="attribute" name="uID" />
+ </post>
+ </token>
+ <condition>
+ <item type="attribute" name="uID" />
+ </condition>
+ </mapping>
+ <mapping type="group" name="u">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="string" value=".role" />
+ </pre>
+ <post>
+ <item type="variableFromAttribute" name="participant.role"
+ value="who" />
+ </post>
+ </token>
+ </mapping>
+ <mapping type="group" name="u">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="string" value=".name" />
+ </pre>
+ <post>
+ <item type="variableFromAttribute" name="participant.name"
+ value="who" />
+ </post>
+ </token>
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="string" value=".age" />
+ </pre>
+ <post>
+ <item type="variableFromAttribute" name="participant.age"
+ value="who" />
+ </post>
+ </token>
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="string" value=".sex" />
+ </pre>
+ <post>
+ <item type="variableFromAttribute" name="participant.sex"
+ value="who" />
+ </post>
+ </token>
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="string" value=".birthday" />
+ </pre>
+ <post>
+ <item type="variableFromAttribute" name="participant.birthday"
+ value="who" />
+ </post>
+ </token>
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="string" value=".language" />
+ </pre>
+ <post>
+ <item type="variableFromAttribute" name="participant.language"
+ value="who" />
+ </post>
+ </token>
+ <condition>
+ <item type="attribute" name="who" />
+ </condition>
+ </mapping>
+ <mapping type="group" name="g">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ </pre>
+ </token>
+ </mapping>
+ <mapping type="group" name="pg">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ </pre>
+ </token>
+ </mapping>
+ <mapping type="group" name="sg">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ </pre>
+ </token>
+ </mapping>
+ <!-- END GROUPS -->
- <!-- START GROUP ANNOTATIONS -->
- <mapping type="groupAnnotation" name="a">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="attribute" name="type" prefix="." />
- <item type="attribute" name="flavor" prefix="." />
- </pre>
- <post>
- <item type="text" />
- </post>
- </token>
- <condition>
- <item type="attribute" name="type" />
- </condition>
- </mapping>
- <mapping type="groupAnnotation" name="media">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="string" value=".start" />
- </pre>
- <post>
- <item type="attribute" name="start" />
- </post>
- </token>
- <condition>
- <item type="attribute" name="start" />
- </condition>
- </mapping>
- <mapping type="groupAnnotation" name="media">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="string" value=".end" />
- </pre>
- <post>
- <item type="attribute" name="end" />
- </post>
- </token>
- <condition>
- <item type="attribute" name="end" />
- </condition>
- </mapping>
- <mapping type="groupAnnotation" name="media">
- <token type="string" offset="false">
- <pre>
- <item type="name" />
- <item type="string" value=".unit" />
- </pre>
- <post>
- <item type="attribute" name="unit" />
- </post>
- </token>
- <condition>
- <item type="attribute" name="unit" />
- </condition>
- </mapping>
- <!-- END GROUP ANNOTATIONS -->
+ <!-- START GROUP ANNOTATIONS -->
+ <mapping type="groupAnnotation" name="a">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="attribute" name="type" prefix="." />
+ <item type="attribute" name="flavor" prefix="." />
+ </pre>
+ <post>
+ <item type="text" />
+ </post>
+ </token>
+ <condition>
+ <item type="attribute" name="type" />
+ </condition>
+ </mapping>
+ <mapping type="groupAnnotation" name="media">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="string" value=".start" />
+ </pre>
+ <post>
+ <item type="attribute" name="start" />
+ </post>
+ </token>
+ <condition>
+ <item type="attribute" name="start" />
+ </condition>
+ </mapping>
+ <mapping type="groupAnnotation" name="media">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="string" value=".end" />
+ </pre>
+ <post>
+ <item type="attribute" name="end" />
+ </post>
+ </token>
+ <condition>
+ <item type="attribute" name="end" />
+ </condition>
+ </mapping>
+ <mapping type="groupAnnotation" name="media">
+ <token type="string" offset="false">
+ <pre>
+ <item type="name" />
+ <item type="string" value=".unit" />
+ </pre>
+ <post>
+ <item type="attribute" name="unit" />
+ </post>
+ </token>
+ <condition>
+ <item type="attribute" name="unit" />
+ </condition>
+ </mapping>
+ <!-- END GROUP ANNOTATIONS -->
- </mappings>
- <!-- END MAPPINGS -->
+ </mappings>
+ <!-- END MAPPINGS -->
- </parser>
- <!-- END CONFIGURATION MTAS CHAT PARSER -->
+ </parser>
+ <!-- END CONFIGURATION MTAS CHAT PARSER -->
</mtas>
\ No newline at end of file
--
libgit2 0.22.2