Commit 347732b5ea42bbaad59f38253f5bcd1e4e0de87a

Authored by Matthijs Brouwer
1 parent 0ed73db7

add lowercase to chat config

conf/parser/mtas/chat_test.xml
... ... @@ -103,6 +103,15 @@
103 103 <item type="text" />
104 104 </post>
105 105 </token>
  106 + <token type="string" offset="false">
  107 + <pre>
  108 + <item type="name" />
  109 + <item type="string" value="_lc" />
  110 + </pre>
  111 + <post>
  112 + <item type="text" filter="ascii,lowercase" />
  113 + </post>
  114 + </token>
106 115 </mapping>
107 116 <mapping type="word" name="t">
108 117 <token type="string" offset="false">
... ...
docker/mtas/demo_chat.xml
1 1 <?xml version="1.0" encoding="UTF-8" ?>
2 2 <mtas>
3 3  
4   - <!-- START MTAS INDEX CONFIGURATION -->
5   - <index>
6   - <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
7   - <payload index="true" />
8   - <offset index="true" />
9   - <realoffset index="true" />
10   - <parent index="true" />
11   - <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
12   - </index>
13   - <!-- END MTAS INDEX CONFIGURATION -->
  4 + <!-- START MTAS INDEX CONFIGURATION -->
  5 + <index>
  6 + <!-- START GENERAL SETTINGS MTAS INDEX PROCESS -->
  7 + <payload index="true" />
  8 + <offset index="true" />
  9 + <realoffset index="true" />
  10 + <parent index="true" />
  11 + <!-- END GENERAL SETTINGS MTAS INDEX PROCESS -->
  12 + </index>
  13 + <!-- END MTAS INDEX CONFIGURATION -->
14 14  
15 15  
16 16  
17   - <!-- START CONFIGURATION MTAS CHAT PARSER -->
18   - <parser name="mtas.analysis.parser.MtasChatParser">
  17 + <!-- START CONFIGURATION MTAS CHAT PARSER -->
  18 + <parser name="mtas.analysis.parser.MtasChatParser">
19 19  
20   - <!-- START GENERAL SETTINGS MTAS CHAT PARSER -->
21   - <autorepair value="true" />
22   - <makeunique value="true" />
23   - <!-- END GENERAL SETTINGS MTAS CHAT PARSER -->
  20 + <!-- START GENERAL SETTINGS MTAS CHAT PARSER -->
  21 + <autorepair value="true" />
  22 + <makeunique value="true" />
  23 + <!-- END GENERAL SETTINGS MTAS CHAT PARSER -->
24 24  
25   - <!-- START VARIABLES -->
26   - <variables>
27   - <variable name="participant" value="participant.role">
28   - <value>
29   - <item type="attribute" name="role" />
30   - </value>
31   - </variable>
32   - <variable name="participant" value="participant.name">
33   - <value>
34   - <item type="attribute" name="name" />
35   - </value>
36   - </variable>
37   - <variable name="participant" value="participant.age">
38   - <value>
39   - <item type="attribute" name="age" />
40   - </value>
41   - </variable>
42   - <variable name="participant" value="participant.group">
43   - <value>
44   - <item type="attribute" name="group" />
45   - </value>
46   - </variable>
47   - <variable name="participant" value="participant.sex">
48   - <value>
49   - <item type="attribute" name="sex" />
50   - </value>
51   - </variable>
52   - <variable name="participant" value="participant.SES">
53   - <value>
54   - <item type="attribute" name="SES" />
55   - </value>
56   - </variable>
57   - <variable name="participant" value="participant.education">
58   - <value>
59   - <item type="attribute" name="education" />
60   - </value>
61   - </variable>
62   - <variable name="participant" value="participant.custom-field">
63   - <value>
64   - <item type="attribute" name="custom-field" />
65   - </value>
66   - </variable>
67   - <variable name="participant" value="participant.birthday">
68   - <value>
69   - <item type="attribute" name="birthday" />
70   - </value>
71   - </variable>
72   - <variable name="participant" value="participant.language">
73   - <value>
74   - <item type="attribute" name="language" />
75   - </value>
76   - </variable>
77   - <variable name="participant" value="participant.first-language">
78   - <value>
79   - <item type="attribute" name="first-language" />
80   - </value>
81   - </variable>
82   - <variable name="participant" value="participant.birthplace">
83   - <value>
84   - <item type="attribute" name="birthplace" />
85   - </value>
86   - </variable>
87   - </variables>
88   - <!-- END VARIABLES -->
  25 + <!-- START VARIABLES -->
  26 + <variables>
  27 + <variable name="participant" value="participant.role">
  28 + <value>
  29 + <item type="attribute" name="role" />
  30 + </value>
  31 + </variable>
  32 + <variable name="participant" value="participant.name">
  33 + <value>
  34 + <item type="attribute" name="name" />
  35 + </value>
  36 + </variable>
  37 + <variable name="participant" value="participant.age">
  38 + <value>
  39 + <item type="attribute" name="age" />
  40 + </value>
  41 + </variable>
  42 + <variable name="participant" value="participant.group">
  43 + <value>
  44 + <item type="attribute" name="group" />
  45 + </value>
  46 + </variable>
  47 + <variable name="participant" value="participant.sex">
  48 + <value>
  49 + <item type="attribute" name="sex" />
  50 + </value>
  51 + </variable>
  52 + <variable name="participant" value="participant.SES">
  53 + <value>
  54 + <item type="attribute" name="SES" />
  55 + </value>
  56 + </variable>
  57 + <variable name="participant" value="participant.education">
  58 + <value>
  59 + <item type="attribute" name="education" />
  60 + </value>
  61 + </variable>
  62 + <variable name="participant" value="participant.custom-field">
  63 + <value>
  64 + <item type="attribute" name="custom-field" />
  65 + </value>
  66 + </variable>
  67 + <variable name="participant" value="participant.birthday">
  68 + <value>
  69 + <item type="attribute" name="birthday" />
  70 + </value>
  71 + </variable>
  72 + <variable name="participant" value="participant.language">
  73 + <value>
  74 + <item type="attribute" name="language" />
  75 + </value>
  76 + </variable>
  77 + <variable name="participant" value="participant.first-language">
  78 + <value>
  79 + <item type="attribute" name="first-language" />
  80 + </value>
  81 + </variable>
  82 + <variable name="participant" value="participant.birthplace">
  83 + <value>
  84 + <item type="attribute" name="birthplace" />
  85 + </value>
  86 + </variable>
  87 + </variables>
  88 + <!-- END VARIABLES -->
89 89  
90   - <!-- START REFERENCES -->
91   - <!-- END REFERENCES -->
  90 + <!-- START REFERENCES -->
  91 + <!-- END REFERENCES -->
92 92  
93   - <!-- START MAPPINGS -->
94   - <mappings>
  93 + <!-- START MAPPINGS -->
  94 + <mappings>
95 95  
96   - <!-- START WORDS -->
97   - <mapping type="word" name="w">
98   - <token type="string" offset="false">
99   - <pre>
100   - <item type="name" />
101   - </pre>
102   - <post>
103   - <item type="text" />
104   - </post>
105   - </token>
106   - </mapping>
107   - <mapping type="word" name="t">
108   - <token type="string" offset="false">
109   - <pre>
110   - <item type="name" />
111   - <item type="attribute" name="type" prefix="." />
112   - </pre>
113   - </token>
114   - </mapping>
115   - <mapping type="word" name="a">
116   - <token type="string" offset="false">
117   - <pre>
118   - <item type="name" />
119   - <item type="attribute" name="type" prefix="." />
120   - </pre>
121   - <post>
122   - <item type="text" />
123   - </post>
124   - </token>
125   - </mapping>
126   - <mapping type="word" name="e">
  96 + <!-- START WORDS -->
  97 + <mapping type="word" name="w">
  98 + <token type="string" offset="false">
  99 + <pre>
  100 + <item type="name" />
  101 + </pre>
  102 + <post>
  103 + <item type="text" />
  104 + </post>
  105 + </token>
  106 + <token type="string" offset="false">
  107 + <pre>
  108 + <item type="name" />
  109 + <item type="string" value="_lc" />
  110 + </pre>
  111 + <post>
  112 + <item type="text" filter="ascii,lowercase" />
  113 + </post>
  114 + </token>
  115 + </mapping>
  116 + <mapping type="word" name="t">
  117 + <token type="string" offset="false">
  118 + <pre>
  119 + <item type="name" />
  120 + <item type="attribute" name="type" prefix="." />
  121 + </pre>
  122 + </token>
  123 + </mapping>
  124 + <mapping type="word" name="a">
  125 + <token type="string" offset="false">
  126 + <pre>
  127 + <item type="name" />
  128 + <item type="attribute" name="type" prefix="." />
  129 + </pre>
  130 + <post>
  131 + <item type="text" />
  132 + </post>
  133 + </token>
  134 + </mapping>
  135 + <mapping type="word" name="e">
127 136 <token type="string" offset="false">
128 137 <pre>
129 138 <item type="name" />
... ... @@ -143,69 +152,69 @@
143 152 </post>
144 153 </token>
145 154 </mapping>
146   - <!-- END WORDS -->
  155 + <!-- END WORDS -->
147 156  
148   - <!-- START WORD ANNOTATIONS -->
149   - <mapping type="wordAnnotation" name="pos">
150   - </mapping>
151   - <mapping type="wordAnnotation" name="c">
152   - <token type="string" offset="false">
153   - <pre>
154   - <item type="ancestorName" />
155   - <item type="name" prefix="." />
156   - </pre>
157   - <post>
158   - <item type="text" />
159   - </post>
160   - </token>
161   - <condition>
162   - <item type="ancestorName" condition="pos" />
163   - <item type="ancestorWordName" condition="w" />
164   - </condition>
165   - </mapping>
166   - <mapping type="wordAnnotation" name="s">
167   - <token type="string" offset="false">
168   - <pre>
169   - <item type="ancestorName" />
170   - <item type="name" prefix="." />
171   - </pre>
172   - <post>
173   - <item type="text" />
174   - </post>
175   - </token>
176   - <condition>
177   - <item type="ancestorName" condition="pos" />
178   - <item type="ancestorWordName" condition="w" />
179   - </condition>
180   - </mapping>
181   - <mapping type="wordAnnotation" name="stem">
182   - <token type="string" offset="false">
183   - <pre>
184   - <item type="name" />
185   - </pre>
186   - <post>
187   - <item type="text" />
188   - </post>
189   - </token>
190   - <condition>
  157 + <!-- START WORD ANNOTATIONS -->
  158 + <mapping type="wordAnnotation" name="pos">
  159 + </mapping>
  160 + <mapping type="wordAnnotation" name="c">
  161 + <token type="string" offset="false">
  162 + <pre>
  163 + <item type="ancestorName" />
  164 + <item type="name" prefix="." />
  165 + </pre>
  166 + <post>
  167 + <item type="text" />
  168 + </post>
  169 + </token>
  170 + <condition>
  171 + <item type="ancestorName" condition="pos" />
  172 + <item type="ancestorWordName" condition="w" />
  173 + </condition>
  174 + </mapping>
  175 + <mapping type="wordAnnotation" name="s">
  176 + <token type="string" offset="false">
  177 + <pre>
  178 + <item type="ancestorName" />
  179 + <item type="name" prefix="." />
  180 + </pre>
  181 + <post>
  182 + <item type="text" />
  183 + </post>
  184 + </token>
  185 + <condition>
  186 + <item type="ancestorName" condition="pos" />
  187 + <item type="ancestorWordName" condition="w" />
  188 + </condition>
  189 + </mapping>
  190 + <mapping type="wordAnnotation" name="stem">
  191 + <token type="string" offset="false">
  192 + <pre>
  193 + <item type="name" />
  194 + </pre>
  195 + <post>
  196 + <item type="text" />
  197 + </post>
  198 + </token>
  199 + <condition>
191 200 <item type="ancestorWordName" condition="w" />
192 201 </condition>
193   - </mapping>
194   - <mapping type="wordAnnotation" name="mk">
195   - <token type="string" offset="false">
196   - <pre>
197   - <item type="name" />
198   - <item type="attribute" name="type" prefix="." />
199   - </pre>
200   - <post>
201   - <item type="text" />
202   - </post>
203   - </token>
204   - <condition>
  202 + </mapping>
  203 + <mapping type="wordAnnotation" name="mk">
  204 + <token type="string" offset="false">
  205 + <pre>
  206 + <item type="name" />
  207 + <item type="attribute" name="type" prefix="." />
  208 + </pre>
  209 + <post>
  210 + <item type="text" />
  211 + </post>
  212 + </token>
  213 + <condition>
205 214 <item type="ancestorWordName" condition="w" />
206 215 </condition>
207   - </mapping>
208   - <mapping type="wordAnnotation" name="ga">
  216 + </mapping>
  217 + <mapping type="wordAnnotation" name="ga">
209 218 <token type="string" offset="false">
210 219 <pre>
211 220 <item type="ancestorWordName" />
... ... @@ -231,194 +240,194 @@
231 240 <item type="ancestorWordName" condition="e" />
232 241 </condition>
233 242 </mapping>
234   - <!-- END WORD ANNOTATIONS -->
  243 + <!-- END WORD ANNOTATIONS -->
235 244  
236   - <!-- START RELATIONS -->
237   - <!-- END RELATIONS -->
  245 + <!-- START RELATIONS -->
  246 + <!-- END RELATIONS -->
238 247  
239   - <!-- START GROUPS -->
240   - <mapping type="group" name="tcu">
241   - <token type="string" offset="false">
242   - <pre>
243   - <item type="name" />
244   - </pre>
245   - </token>
246   - </mapping>
247   - <mapping type="group" name="u">
248   - <token type="string" offset="false">
249   - <pre>
250   - <item type="name" />
251   - </pre>
252   - </token>
253   - </mapping>
254   - <mapping type="group" name="u">
255   - <token type="string" offset="false">
256   - <pre>
257   - <item type="name" />
258   - <item type="string" value=".id" />
259   - </pre>
260   - <post>
261   - <item type="attribute" name="uID" />
262   - </post>
263   - </token>
264   - <condition>
265   - <item type="attribute" name="uID" />
266   - </condition>
267   - </mapping>
268   - <mapping type="group" name="u">
269   - <token type="string" offset="false">
270   - <pre>
271   - <item type="name" />
272   - <item type="string" value=".role" />
273   - </pre>
274   - <post>
275   - <item type="variableFromAttribute" name="participant.role"
276   - value="who" />
277   - </post>
278   - </token>
279   - </mapping>
280   - <mapping type="group" name="u">
281   - <token type="string" offset="false">
282   - <pre>
283   - <item type="name" />
284   - <item type="string" value=".name" />
285   - </pre>
286   - <post>
287   - <item type="variableFromAttribute" name="participant.name"
288   - value="who" />
289   - </post>
290   - </token>
291   - <token type="string" offset="false">
292   - <pre>
293   - <item type="name" />
294   - <item type="string" value=".age" />
295   - </pre>
296   - <post>
297   - <item type="variableFromAttribute" name="participant.age"
298   - value="who" />
299   - </post>
300   - </token>
301   - <token type="string" offset="false">
302   - <pre>
303   - <item type="name" />
304   - <item type="string" value=".sex" />
305   - </pre>
306   - <post>
307   - <item type="variableFromAttribute" name="participant.sex"
308   - value="who" />
309   - </post>
310   - </token>
311   - <token type="string" offset="false">
312   - <pre>
313   - <item type="name" />
314   - <item type="string" value=".birthday" />
315   - </pre>
316   - <post>
317   - <item type="variableFromAttribute" name="participant.birthday"
318   - value="who" />
319   - </post>
320   - </token>
321   - <token type="string" offset="false">
322   - <pre>
323   - <item type="name" />
324   - <item type="string" value=".language" />
325   - </pre>
326   - <post>
327   - <item type="variableFromAttribute" name="participant.language"
328   - value="who" />
329   - </post>
330   - </token>
331   - <condition>
332   - <item type="attribute" name="who" />
333   - </condition>
334   - </mapping>
335   - <mapping type="group" name="g">
336   - <token type="string" offset="false">
337   - <pre>
338   - <item type="name" />
339   - </pre>
340   - </token>
341   - </mapping>
342   - <mapping type="group" name="pg">
343   - <token type="string" offset="false">
344   - <pre>
345   - <item type="name" />
346   - </pre>
347   - </token>
348   - </mapping>
349   - <mapping type="group" name="sg">
350   - <token type="string" offset="false">
351   - <pre>
352   - <item type="name" />
353   - </pre>
354   - </token>
355   - </mapping>
356   - <!-- END GROUPS -->
  248 + <!-- START GROUPS -->
  249 + <mapping type="group" name="tcu">
  250 + <token type="string" offset="false">
  251 + <pre>
  252 + <item type="name" />
  253 + </pre>
  254 + </token>
  255 + </mapping>
  256 + <mapping type="group" name="u">
  257 + <token type="string" offset="false">
  258 + <pre>
  259 + <item type="name" />
  260 + </pre>
  261 + </token>
  262 + </mapping>
  263 + <mapping type="group" name="u">
  264 + <token type="string" offset="false">
  265 + <pre>
  266 + <item type="name" />
  267 + <item type="string" value=".id" />
  268 + </pre>
  269 + <post>
  270 + <item type="attribute" name="uID" />
  271 + </post>
  272 + </token>
  273 + <condition>
  274 + <item type="attribute" name="uID" />
  275 + </condition>
  276 + </mapping>
  277 + <mapping type="group" name="u">
  278 + <token type="string" offset="false">
  279 + <pre>
  280 + <item type="name" />
  281 + <item type="string" value=".role" />
  282 + </pre>
  283 + <post>
  284 + <item type="variableFromAttribute" name="participant.role"
  285 + value="who" />
  286 + </post>
  287 + </token>
  288 + </mapping>
  289 + <mapping type="group" name="u">
  290 + <token type="string" offset="false">
  291 + <pre>
  292 + <item type="name" />
  293 + <item type="string" value=".name" />
  294 + </pre>
  295 + <post>
  296 + <item type="variableFromAttribute" name="participant.name"
  297 + value="who" />
  298 + </post>
  299 + </token>
  300 + <token type="string" offset="false">
  301 + <pre>
  302 + <item type="name" />
  303 + <item type="string" value=".age" />
  304 + </pre>
  305 + <post>
  306 + <item type="variableFromAttribute" name="participant.age"
  307 + value="who" />
  308 + </post>
  309 + </token>
  310 + <token type="string" offset="false">
  311 + <pre>
  312 + <item type="name" />
  313 + <item type="string" value=".sex" />
  314 + </pre>
  315 + <post>
  316 + <item type="variableFromAttribute" name="participant.sex"
  317 + value="who" />
  318 + </post>
  319 + </token>
  320 + <token type="string" offset="false">
  321 + <pre>
  322 + <item type="name" />
  323 + <item type="string" value=".birthday" />
  324 + </pre>
  325 + <post>
  326 + <item type="variableFromAttribute" name="participant.birthday"
  327 + value="who" />
  328 + </post>
  329 + </token>
  330 + <token type="string" offset="false">
  331 + <pre>
  332 + <item type="name" />
  333 + <item type="string" value=".language" />
  334 + </pre>
  335 + <post>
  336 + <item type="variableFromAttribute" name="participant.language"
  337 + value="who" />
  338 + </post>
  339 + </token>
  340 + <condition>
  341 + <item type="attribute" name="who" />
  342 + </condition>
  343 + </mapping>
  344 + <mapping type="group" name="g">
  345 + <token type="string" offset="false">
  346 + <pre>
  347 + <item type="name" />
  348 + </pre>
  349 + </token>
  350 + </mapping>
  351 + <mapping type="group" name="pg">
  352 + <token type="string" offset="false">
  353 + <pre>
  354 + <item type="name" />
  355 + </pre>
  356 + </token>
  357 + </mapping>
  358 + <mapping type="group" name="sg">
  359 + <token type="string" offset="false">
  360 + <pre>
  361 + <item type="name" />
  362 + </pre>
  363 + </token>
  364 + </mapping>
  365 + <!-- END GROUPS -->
357 366  
358   - <!-- START GROUP ANNOTATIONS -->
359   - <mapping type="groupAnnotation" name="a">
360   - <token type="string" offset="false">
361   - <pre>
362   - <item type="name" />
363   - <item type="attribute" name="type" prefix="." />
364   - <item type="attribute" name="flavor" prefix="." />
365   - </pre>
366   - <post>
367   - <item type="text" />
368   - </post>
369   - </token>
370   - <condition>
371   - <item type="attribute" name="type" />
372   - </condition>
373   - </mapping>
374   - <mapping type="groupAnnotation" name="media">
375   - <token type="string" offset="false">
376   - <pre>
377   - <item type="name" />
378   - <item type="string" value=".start" />
379   - </pre>
380   - <post>
381   - <item type="attribute" name="start" />
382   - </post>
383   - </token>
384   - <condition>
385   - <item type="attribute" name="start" />
386   - </condition>
387   - </mapping>
388   - <mapping type="groupAnnotation" name="media">
389   - <token type="string" offset="false">
390   - <pre>
391   - <item type="name" />
392   - <item type="string" value=".end" />
393   - </pre>
394   - <post>
395   - <item type="attribute" name="end" />
396   - </post>
397   - </token>
398   - <condition>
399   - <item type="attribute" name="end" />
400   - </condition>
401   - </mapping>
402   - <mapping type="groupAnnotation" name="media">
403   - <token type="string" offset="false">
404   - <pre>
405   - <item type="name" />
406   - <item type="string" value=".unit" />
407   - </pre>
408   - <post>
409   - <item type="attribute" name="unit" />
410   - </post>
411   - </token>
412   - <condition>
413   - <item type="attribute" name="unit" />
414   - </condition>
415   - </mapping>
416   - <!-- END GROUP ANNOTATIONS -->
  367 + <!-- START GROUP ANNOTATIONS -->
  368 + <mapping type="groupAnnotation" name="a">
  369 + <token type="string" offset="false">
  370 + <pre>
  371 + <item type="name" />
  372 + <item type="attribute" name="type" prefix="." />
  373 + <item type="attribute" name="flavor" prefix="." />
  374 + </pre>
  375 + <post>
  376 + <item type="text" />
  377 + </post>
  378 + </token>
  379 + <condition>
  380 + <item type="attribute" name="type" />
  381 + </condition>
  382 + </mapping>
  383 + <mapping type="groupAnnotation" name="media">
  384 + <token type="string" offset="false">
  385 + <pre>
  386 + <item type="name" />
  387 + <item type="string" value=".start" />
  388 + </pre>
  389 + <post>
  390 + <item type="attribute" name="start" />
  391 + </post>
  392 + </token>
  393 + <condition>
  394 + <item type="attribute" name="start" />
  395 + </condition>
  396 + </mapping>
  397 + <mapping type="groupAnnotation" name="media">
  398 + <token type="string" offset="false">
  399 + <pre>
  400 + <item type="name" />
  401 + <item type="string" value=".end" />
  402 + </pre>
  403 + <post>
  404 + <item type="attribute" name="end" />
  405 + </post>
  406 + </token>
  407 + <condition>
  408 + <item type="attribute" name="end" />
  409 + </condition>
  410 + </mapping>
  411 + <mapping type="groupAnnotation" name="media">
  412 + <token type="string" offset="false">
  413 + <pre>
  414 + <item type="name" />
  415 + <item type="string" value=".unit" />
  416 + </pre>
  417 + <post>
  418 + <item type="attribute" name="unit" />
  419 + </post>
  420 + </token>
  421 + <condition>
  422 + <item type="attribute" name="unit" />
  423 + </condition>
  424 + </mapping>
  425 + <!-- END GROUP ANNOTATIONS -->
417 426  
418   - </mappings>
419   - <!-- END MAPPINGS -->
  427 + </mappings>
  428 + <!-- END MAPPINGS -->
420 429  
421   - </parser>
422   - <!-- END CONFIGURATION MTAS CHAT PARSER -->
  430 + </parser>
  431 + <!-- END CONFIGURATION MTAS CHAT PARSER -->
423 432  
424 433 </mtas>
425 434 \ No newline at end of file
... ...