From f8c23b7a792807decfe930787375c0410db16d15 Mon Sep 17 00:00:00 2001 From: Matthijs Brouwer <matthijs@brouwer.info> Date: Fri, 14 Jul 2017 15:21:08 +0200 Subject: [PATCH] include chat xml format with examples --- docker/Dockerfile | 4 ++-- docker/site/example_demo4.html | 240 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/mtas/codec/util/collector/MtasDataItemDoubleFull.java | 30 +++++++++++++++--------------- src/mtas/codec/util/collector/MtasDataItemFull.java | 18 +++--------------- src/mtas/codec/util/collector/MtasDataItemLongFull.java | 2 +- src/mtas/solr/handler/component/util/MtasSolrResultUtil.java | 6 +++--- src/site/markdown/indexing_formats.md | 1 + src/site/markdown/indexing_formats_chat.md | 20 ++++++++++++++++++++ src/site/site.xml | 1 + 9 files changed, 286 insertions(+), 36 deletions(-) create mode 100644 docker/site/example_demo4.html create mode 100644 src/site/markdown/indexing_formats_chat.md diff --git a/docker/Dockerfile b/docker/Dockerfile index c7d3d0f..156a74d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,5 +1,5 @@ # Automatically generated Dockerfile -# - Build 2017-07-13 06:32 +# - Build 2017-07-13 14:19 # - Lucene/Solr version 6.6.0 # - Mtas release 20170713 # @@ -74,7 +74,7 @@ RUN service apache2 stop && \ chmod -R 755 /var/www/html && \ printf "echo\n" >> /start.sh && \ printf "echo \"================ Mtas -- Multi Tier Annotation Search =================\"\n" >> /start.sh && \ - printf "echo \" Timestamp 2017-07-13 06:32\"\n" >> /start.sh && \ + printf "echo \" Timestamp 2017-07-13 14:19\"\n" >> /start.sh && \ printf "echo \" Lucene/Solr version 6.6.0\"\n" >> /start.sh && \ printf "echo \" Mtas release 20170713\"\n" >> /start.sh && \ printf "echo \" See https://meertensinstituut.github.io/mtas/ for more information\"\n" >> /start.sh && \ diff --git a/docker/site/example_demo4.html b/docker/site/example_demo4.html new file mode 100644 index 0000000..bcbb11a --- /dev/null +++ b/docker/site/example_demo4.html @@ -0,0 +1,240 @@ +<!DOCTYPE html> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> + <meta charset="UTF-8" /> + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> + <title>Multi Tier Annotation Search</title> + <script type="text/javascript" src="./js/jquery-3.1.1.min.js"></script> + <script type="text/javascript" src="./js/solr.js"></script> + <link rel="stylesheet" type="text/css" href="css/style.css"> + </head> + <body> + + <h1>Multi Tier Annotation Search - example demo4 (CHAT examples)</h1> + + <div> + Go to <a href="index.html">main page</a>. + </div> + + <hr noshade /> + + <h3>Create index</h3> + Post to /solr/demo4/update + <div class="solr" data-type="json" data-url="/solr/demo4/update?wt=json&commitWithin=1000"> + <div class="post"><textarea data-autoresize>[{ "id": "1","type": "chat","author":"Julie","title":"02-0_11_18","text":"chat-samples/Julie/02-0_11_18.xml" }, +{ "id": "2","type": "chat","author":"Julie","title":"09-1_03_16","text":"chat-samples/Julie/09-1_03_16.xml" }, +{ "id": "3","type": "chat","author":"Julie","title":"10-1_04_23","text":"chat-samples/Julie/10-1_04_23.xml" }, +{ "id": "4","type": "chat","author":"Julie","title":"11-1_06_04","text":"chat-samples/Julie/11-1_06_04.xml" }, +{ "id": "5","type": "chat","author":"Julie","title":"13-1_07_26","text":"chat-samples/Julie/13-1_07_26.xml" }, +{ "id": "6","type": "chat","author":"Julie","title":"14-1_09_16","text":"chat-samples/Julie/14-1_09_16.xml" }, +{ "id": "7","type": "chat","author":"Julie","title":"16-1_11_11","text":"chat-samples/Julie/16-1_11_11.xml" }, +{ "id": "8","type": "chat","author":"Julie","title":"17-2_00_10","text":"chat-samples/Julie/17-2_00_10.xml" }, +{ "id": "9","type": "chat","author":"Julie","title":"19-2_01_23","text":"chat-samples/Julie/19-2_01_23.xml" }, +{ "id": "10","type": "chat","author":"Julie","title":"25-2_06_22","text":"chat-samples/Julie/25-2_06_22.xml" }, +{ "id": "11","type": "chat","author":"Julie","title":"41-4_08_29","text":"chat-samples/Julie/41-4_08_29.xml" }, +{ "id": "12","type": "chat","author":"Julie","title":"42-5_03_19","text":"chat-samples/Julie/42-5_03_19.xml" }]</textarea></div> + <input class="button post" type="button" value="create index" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + + <hr noshade /> + + <h3>Empty index</h3> + Post to /solr/demo4/update + <div class="solr" data-type="json" data-url="/solr/demo4/update?wt=json&commitWithin=1000"> + <div class="post"><textarea data-autoresize>{ + "delete": { + "query": "*:*" + } +}</textarea></div> + <input class="button post" type="button" value="delete index" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + + <hr noshade /> + + <h3>Query</h3> + + <div> + Search for 'anniversaire' - classic; post to /solr/demo4/select + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> + <div class="post"><textarea data-autoresize>q=text:*anniversaire*</textarea></div> + <input class="button post" type="button" value="post" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + </div> + + <br /> + + <div> + Search for 'anniversaire' - corpus query language (cql); post to /solr/demo4/select + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> + <div class="post"><textarea data-autoresize>q={!mtas_cql field="text" query="[w=\"anniversaire\"]"}</textarea></div> + <input class="button post" type="button" value="post" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + </div> + + <hr noshade /> + + <h3>Basic stats</h3> + + <div> + Get the number of positions; post to /solr/demo4/select + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.stats=true&mtas.stats.positions=true&mtas.stats.positions.0.key=number of positions&mtas.stats.positions.0.field=text&mtas.stats.positions.0.type=all</textarea></div> + <input class="button post" type="button" value="post" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + </div> + + <br /> + + <div> + Get the number of tokens; post to /solr/demo4/select + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.stats=true&mtas.stats.tokens=true&mtas.stats.tokens.0.key=number of tokens&mtas.stats.tokens.0.field=text&mtas.stats.tokens.0.type=all</textarea></div> + <input class="button post" type="button" value="post" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + </div> + + <br /> + + <div> + Get the number of nouns; post to /solr/demo4/select + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.stats=true&mtas.stats.spans=true&mtas.stats.spans.0.key=number of nouns&mtas.stats.spans.0.field=text&mtas.stats.spans.0.type=all&mtas.stats.spans.0.query.0.type=cql&mtas.stats.spans.0.query.0.value=[pos.c="n"]&mtas.stats.spans.0.function.0.key=fraction of nouns&mtas.stats.spans.0.function.0.expression=$q0/$n&mtas.stats.spans.0.function.0.type=all</textarea></div> + <input class="button post" type="button" value="post" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + </div> + + <hr noshade /> + + <h3>Kwic</h3> + + Keyword in Context while searching for 'anniversaire'; post to /solr/demo4/select + + <div> + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> + <div class="post"><textarea data-autoresize>q={!mtas_cql field="text" query="[w=\"anniversaire\"]"}&mtas=true&mtas.kwic=true&mtas.kwic.0.field=text&mtas.kwic.0.query.type=cql&mtas.kwic.0.query.value=[w="anniversaire"]&mtas.kwic.0.prefix=w&mtas.kwic.0.output=hit&mtas.kwic.0.left=2&mtas.kwic.0.right=3&mtas.kwic.0.number=2&mtas.kwic.0.start=0</textarea></div> + <input class="button post" type="button" value="post" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + </div> + + <br /> + + Keyword in Context while searching for 'fête' followed by 'anniversaire' within 5 positions; post to /solr/demo4/select + + <div> + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> + <div class="post"><textarea data-autoresize>q={!mtas_cql field="text" query="[w=\"fête\"][]{0,4}[w=\"anniversaire\"]"}&mtas=true&mtas.kwic=true&mtas.kwic.0.field=text&mtas.kwic.0.query.type=cql&mtas.kwic.0.query.value=[w="fête"][]{0,4}[w="anniversaire"]&mtas.kwic.0.prefix=w,stem,pos.c,u,u.role,u.name,u.sex&mtas.kwic.0.output=token&mtas.kwic.0.left=0&mtas.kwic.0.right=0&mtas.kwic.0.number=2&mtas.kwic.0.start=0</textarea></div> + <input class="button post" type="button" value="post" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + </div> + + <hr noshade /> + + <h3>Termvector</h3> + + Termvector of w with regexp [a-z]{5,*} for documents containing 'anniversaire'; post to /solr/demo4/select + + <div> + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> + <div class="post"><textarea data-autoresize>q={!mtas_cql field="text" query="[w=\"anniversaire\"]"}&mtas=true&mtas.termvector=true&mtas.termvector.0.field=text&mtas.termvector.0.prefix=w&mtas.termvector.0.key=termvector on w&mtas.termvector.0.type=n,sum,mean&mtas.termvector.0.sort.type=sum&mtas.termvector.0.sort.direction=desc&mtas.termvector.0.number=20&mtas.termvector.0.regexp=[a-z]{5,}</textarea></div> + <input class="button post" type="button" value="post" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + </div> + + <hr noshade /> + + <h3>Group</h3> + + Group w for any verb followed by 'pas' en preceded by 'ne'; post to /solr/demo4/select + + <div> + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.group=true&mtas.group.0.field=text&mtas.group.0.query.type=cql&mtas.group.0.query.value=([pos.c="v"] followedby [w="pas"]) precededby [w="ne"]&mtas.group.0.grouping.hit.inside.prefixes=w</textarea></div> + <input class="button post" type="button" value="post" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + </div> + + <br/> + + Group w for any verb followed by 'pas' en preceded by 'ne' and used by Mother; post to /solr/demo4/select + + <div> + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.group=true&mtas.group.0.field=text&mtas.group.0.query.type=cql&mtas.group.0.query.value=(([pos.c="v"] followedby [w="pas"]) precededby [w="ne"]) within <u.role="Mother">&mtas.group.0.grouping.hit.inside.prefixes=w</textarea></div> + <input class="button post" type="button" value="post" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + </div> + + <br/> + + Group u.role and w for any verb followed by 'pas' en preceded by 'ne'; post to /solr/demo4/select + + <div> + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.group=true&mtas.group.0.field=text&mtas.group.0.query.type=cql&mtas.group.0.query.value=([pos.c="v"] followedby [w="pas"]) precededby [w="ne"]&mtas.group.0.grouping.hit.inside.prefixes=u.role,w</textarea></div> + <input class="button post" type="button" value="post" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + </div> + + <hr noshade /> + + <h3>Prefixes</h3> + + Available prefixes; post to /solr/demo4/select + + <div> + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.prefix=true&mtas.prefix.0.field=text&mtas.prefix.0.key=prefixes</textarea></div> + <input class="button post" type="button" value="post" /> + <input class="button reset" type="button" value="reset" /> + <div class="output"></div> + <div class="error"></div> + </div> + </div> + + <hr noshade /> + + <div> + Go to <a href="index.html">main page</a>. + </div> + + </body> +</html> diff --git a/src/mtas/codec/util/collector/MtasDataItemDoubleFull.java b/src/mtas/codec/util/collector/MtasDataItemDoubleFull.java index d6ac0ae..40d580a 100644 --- a/src/mtas/codec/util/collector/MtasDataItemDoubleFull.java +++ b/src/mtas/codec/util/collector/MtasDataItemDoubleFull.java @@ -62,7 +62,7 @@ public class MtasDataItemDoubleFull extends MtasDataItemFull<Double, Double> { */ @Override protected HashMap<String, Object> getDistribution(String argument) { - HashMap<String, Object> result = new LinkedHashMap<String, Object>(); + HashMap<String, Object> result = new LinkedHashMap<>(); Double start = null; Double end = null; Double step = null; @@ -202,16 +202,16 @@ public class MtasDataItemDoubleFull extends MtasDataItemFull<Double, Double> { createStats(); switch (sortType) { case CodecUtil.STATS_TYPE_SUM: - return new MtasDataItemNumberComparator<Double>(stats.getSum(), + return new MtasDataItemNumberComparator<>(stats.getSum(), sortDirection); case CodecUtil.STATS_TYPE_MAX: - return new MtasDataItemNumberComparator<Double>(stats.getMax(), + return new MtasDataItemNumberComparator<>(stats.getMax(), sortDirection); case CodecUtil.STATS_TYPE_MIN: - return new MtasDataItemNumberComparator<Double>(stats.getMin(), + return new MtasDataItemNumberComparator<>(stats.getMin(), sortDirection); case CodecUtil.STATS_TYPE_SUMSQ: - return new MtasDataItemNumberComparator<Double>(stats.getSumsq(), + return new MtasDataItemNumberComparator<>(stats.getSumsq(), sortDirection); default: return null; @@ -228,34 +228,34 @@ public class MtasDataItemDoubleFull extends MtasDataItemFull<Double, Double> { createStats(); switch (sortType) { case CodecUtil.STATS_TYPE_SUMOFLOGS: - return new MtasDataItemNumberComparator<Double>( + return new MtasDataItemNumberComparator<>( stats.getN() * Math.log(stats.getGeometricMean()), sortDirection); case CodecUtil.STATS_TYPE_MEAN: - return new MtasDataItemNumberComparator<Double>(stats.getMean(), + return new MtasDataItemNumberComparator<>(stats.getMean(), sortDirection); case CodecUtil.STATS_TYPE_GEOMETRICMEAN: - return new MtasDataItemNumberComparator<Double>(stats.getGeometricMean(), + return new MtasDataItemNumberComparator<>(stats.getGeometricMean(), sortDirection); case CodecUtil.STATS_TYPE_STANDARDDEVIATION: - return new MtasDataItemNumberComparator<Double>( + return new MtasDataItemNumberComparator<>( stats.getStandardDeviation(), sortDirection); case CodecUtil.STATS_TYPE_VARIANCE: - return new MtasDataItemNumberComparator<Double>(stats.getVariance(), + return new MtasDataItemNumberComparator<>(stats.getVariance(), sortDirection); case CodecUtil.STATS_TYPE_POPULATIONVARIANCE: - return new MtasDataItemNumberComparator<Double>( + return new MtasDataItemNumberComparator<>( stats.getPopulationVariance(), sortDirection); case CodecUtil.STATS_TYPE_QUADRATICMEAN: - return new MtasDataItemNumberComparator<Double>(stats.getQuadraticMean(), + return new MtasDataItemNumberComparator<>(Math.sqrt(stats.getSumsq()/stats.getN()), sortDirection); case CodecUtil.STATS_TYPE_KURTOSIS: - return new MtasDataItemNumberComparator<Double>(stats.getKurtosis(), + return new MtasDataItemNumberComparator<>(stats.getKurtosis(), sortDirection); case CodecUtil.STATS_TYPE_MEDIAN: - return new MtasDataItemNumberComparator<Double>(stats.getPercentile(50), + return new MtasDataItemNumberComparator<>(stats.getPercentile(50), sortDirection); case CodecUtil.STATS_TYPE_SKEWNESS: - return new MtasDataItemNumberComparator<Double>(stats.getSkewness(), + return new MtasDataItemNumberComparator<>(stats.getSkewness(), sortDirection); default: return null; diff --git a/src/mtas/codec/util/collector/MtasDataItemFull.java b/src/mtas/codec/util/collector/MtasDataItemFull.java index 62a45e5..31cb247 100644 --- a/src/mtas/codec/util/collector/MtasDataItemFull.java +++ b/src/mtas/codec/util/collector/MtasDataItemFull.java @@ -135,7 +135,7 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N } else if (statsItem.equals(CodecUtil.STATS_TYPE_POPULATIONVARIANCE)) { response.put(statsItem, stats.getPopulationVariance()); } else if (statsItem.equals(CodecUtil.STATS_TYPE_QUADRATICMEAN)) { - response.put(statsItem, stats.getQuadraticMean()); + response.put(statsItem, Math.sqrt(stats.getSumsq()/stats.getN())); } else if (statsItem.equals(CodecUtil.STATS_TYPE_KURTOSIS)) { response.put(statsItem, stats.getKurtosis()); } else if (statsItem.equals(CodecUtil.STATS_TYPE_MEDIAN)) { @@ -153,7 +153,7 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N } } if (errorNumber > 0) { - Map<String, Object> errorResponse = new HashMap<String, Object>(); + Map<String, Object> errorResponse = new HashMap<>(); for (Entry<String, Integer> entry : getErrorList().entrySet()) { errorResponse.put(entry.getKey(), entry.getValue()); } @@ -178,31 +178,19 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N case CodecUtil.STATS_TYPE_N: return 0; case CodecUtil.STATS_TYPE_SUM: - return 1; case CodecUtil.STATS_TYPE_MAX: - return 1; case CodecUtil.STATS_TYPE_MIN: - return 1; case CodecUtil.STATS_TYPE_SUMSQ: return 1; case CodecUtil.STATS_TYPE_SUMOFLOGS: - return 2; case CodecUtil.STATS_TYPE_MEAN: - return 2; case CodecUtil.STATS_TYPE_GEOMETRICMEAN: - return 2; case CodecUtil.STATS_TYPE_STANDARDDEVIATION: - return 2; case CodecUtil.STATS_TYPE_VARIANCE: - return 2; case CodecUtil.STATS_TYPE_POPULATIONVARIANCE: - return 2; case CodecUtil.STATS_TYPE_QUADRATICMEAN: - return 2; case CodecUtil.STATS_TYPE_KURTOSIS: - return 2; case CodecUtil.STATS_TYPE_MEDIAN: - return 2; case CodecUtil.STATS_TYPE_SKEWNESS: return 2; default: @@ -219,7 +207,7 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N createStats(); switch (sortType) { case CodecUtil.STATS_TYPE_N: - return new MtasDataItemNumberComparator<Long>(stats.getN(), + return new MtasDataItemNumberComparator<>(stats.getN(), sortDirection); default: return null; diff --git a/src/mtas/codec/util/collector/MtasDataItemLongFull.java b/src/mtas/codec/util/collector/MtasDataItemLongFull.java index f39e144..7ebce08 100644 --- a/src/mtas/codec/util/collector/MtasDataItemLongFull.java +++ b/src/mtas/codec/util/collector/MtasDataItemLongFull.java @@ -210,7 +210,7 @@ class MtasDataItemLongFull extends MtasDataItemFull<Long, Double> { return new MtasDataItemNumberComparator<>(stats.getPopulationVariance(), sortDirection); case CodecUtil.STATS_TYPE_QUADRATICMEAN: - return new MtasDataItemNumberComparator<>(stats.getQuadraticMean(), + return new MtasDataItemNumberComparator<>(Math.sqrt(stats.getSumsq()/stats.getN()), sortDirection); case CodecUtil.STATS_TYPE_KURTOSIS: return new MtasDataItemNumberComparator<>(stats.getKurtosis(), diff --git a/src/mtas/solr/handler/component/util/MtasSolrResultUtil.java b/src/mtas/solr/handler/component/util/MtasSolrResultUtil.java index acd3032..dc95dbe 100644 --- a/src/mtas/solr/handler/component/util/MtasSolrResultUtil.java +++ b/src/mtas/solr/handler/component/util/MtasSolrResultUtil.java @@ -93,7 +93,7 @@ public class MtasSolrResultUtil { private static void rewrite(NamedList<Object> nl, boolean doCollapse) throws IOException { boolean showDebugInfo = false; - HashMap<String, NamedList<Object>> collapseNamedList = new HashMap<String, NamedList<Object>>(); + HashMap<String, NamedList<Object>> collapseNamedList = new HashMap<>(); int length = nl.size(); for (int i = 0; i < length; i++) { if (nl.getVal(i) instanceof NamedList) { @@ -172,7 +172,7 @@ public class MtasSolrResultUtil { */ private static ArrayList<NamedList<Object>> rewriteToArray( NamedList<Object> nnl) { - ArrayList<NamedList<Object>> al = new ArrayList<NamedList<Object>>(); + ArrayList<NamedList<Object>> al = new ArrayList<>(); String key; Iterator<Entry<String, Object>> it = nnl.iterator(); while (it.hasNext()) { @@ -384,7 +384,7 @@ public class MtasSolrResultUtil { "unequal size " + nameNew + " and " + nameOriginal); } if (unique) { - Set<String> set = new HashSet<String>(); + Set<String> set = new HashSet<>(); for (int i = 0; i < list.length; i++) { set.add(list[i]); } diff --git a/src/site/markdown/indexing_formats.md b/src/site/markdown/indexing_formats.md index f8df5a1..941ab08 100644 --- a/src/site/markdown/indexing_formats.md +++ b/src/site/markdown/indexing_formats.md @@ -5,6 +5,7 @@ To configure the mapping from resources to the index structure, several parsers * [MtasFoliaParser](indexing_formats_folia.html) : mapping [FoLiA](https://proycon.github.io/folia/) resources * [MtasTEIParser](indexing_formats_tei.html): mapping [ISO-TEI](http://www.tei-c.org/) resources * [MtasSketchParser](indexing_formats_sketch.html): mapping [Sketch Engine](https://www.sketchengine.co.uk/word-sketch-index-format/) resources +* [MtasChatParser](indexing_formats_chat.html): mapping [CHAT transcription format](http://talkbank.org/manuals/CHAT.pdf) resources converted to [XML](http://talkbank.org/software/xsddoc/) * [MtasCRMParser](indexing_formats_crm.html): mapping resources with format Corpus Van Reenen-Mulder/Adelheid For XML-based formats, these parsers often just slightly extend the abstract MtasXMLParser by defining the correct namespaces and root tags. diff --git a/src/site/markdown/indexing_formats_chat.md b/src/site/markdown/indexing_formats_chat.md new file mode 100644 index 0000000..034f877 --- /dev/null +++ b/src/site/markdown/indexing_formats_chat.md @@ -0,0 +1,20 @@ +#CHAT + +For indexing [CHAT transcription format](http://talkbank.org/manuals/CHAT.pdf) resources converted to [XML](http://talkbank.org/software/xsddoc/), the *mtas.analysis.parser.MtasChatParser* extending the abstract *MtasXMLParser* is available; full examples of configuration files are provided on [GitHub](https://github.com/meertensinstituut/mtas/tree/master/conf/parser/mtas). + +```xml +<!-- START CONFIGURATION MTAS PARSER --> +<parser name="mtas.analysis.parser.MtasChatParser"> +... + <!-- START MAPPINGS --> + <mappings> + ... + </mapping> + <!-- END MAPPINGS ---> + ... +</parser> +<!-- END CONFIGURATION MTAS PARSER --> +``` + +The syntax of the parser part in the [configuration file](indexing_configuration.html#configuration) is, besides from the *name* attribute, almost identical to the configuration of the [FoLiA-parser](indexing_formats_folia.html) and [TEI-parser](indexing_formats_tei.html). + diff --git a/src/site/site.xml b/src/site/site.xml index 3ca6c74..580fefd 100644 --- a/src/site/site.xml +++ b/src/site/site.xml @@ -33,6 +33,7 @@ <item name="FoLiA" href="indexing_formats_folia.html" collapse="true"/> <item name="TEI" href="indexing_formats_tei.html" collapse="true"/> <item name="Sketch" href="indexing_formats_sketch.html" collapse="true"/> + <item name="CHAT" href="indexing_formats_chat.html" collapse="true"/> <item name="CRM" href="indexing_formats_crm.html" collapse="true"/> </item> </item> -- libgit2 0.22.2