Commit f8c23b7a792807decfe930787375c0410db16d15
1 parent
a38f5648
include chat xml format with examples
Showing
9 changed files
with
286 additions
and
36 deletions
docker/Dockerfile
1 | 1 | # Automatically generated Dockerfile |
2 | -# - Build 2017-07-13 06:32 | |
2 | +# - Build 2017-07-13 14:19 | |
3 | 3 | # - Lucene/Solr version 6.6.0 |
4 | 4 | # - Mtas release 20170713 |
5 | 5 | # |
... | ... | @@ -74,7 +74,7 @@ RUN service apache2 stop && \ |
74 | 74 | chmod -R 755 /var/www/html && \ |
75 | 75 | printf "echo\n" >> /start.sh && \ |
76 | 76 | printf "echo \"================ Mtas -- Multi Tier Annotation Search =================\"\n" >> /start.sh && \ |
77 | - printf "echo \" Timestamp 2017-07-13 06:32\"\n" >> /start.sh && \ | |
77 | + printf "echo \" Timestamp 2017-07-13 14:19\"\n" >> /start.sh && \ | |
78 | 78 | printf "echo \" Lucene/Solr version 6.6.0\"\n" >> /start.sh && \ |
79 | 79 | printf "echo \" Mtas release 20170713\"\n" >> /start.sh && \ |
80 | 80 | printf "echo \" See https://meertensinstituut.github.io/mtas/ for more information\"\n" >> /start.sh && \ |
... | ... |
docker/site/example_demo4.html
0 → 100644
1 | +<!DOCTYPE html> | |
2 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
3 | + <head> | |
4 | + <meta charset="UTF-8" /> | |
5 | + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> | |
6 | + <title>Multi Tier Annotation Search</title> | |
7 | + <script type="text/javascript" src="./js/jquery-3.1.1.min.js"></script> | |
8 | + <script type="text/javascript" src="./js/solr.js"></script> | |
9 | + <link rel="stylesheet" type="text/css" href="css/style.css"> | |
10 | + </head> | |
11 | + <body> | |
12 | + | |
13 | + <h1>Multi Tier Annotation Search - example demo4 (CHAT examples)</h1> | |
14 | + | |
15 | + <div> | |
16 | + Go to <a href="index.html">main page</a>. | |
17 | + </div> | |
18 | + | |
19 | + <hr noshade /> | |
20 | + | |
21 | + <h3>Create index</h3> | |
22 | + Post to /solr/demo4/update | |
23 | + <div class="solr" data-type="json" data-url="/solr/demo4/update?wt=json&commitWithin=1000"> | |
24 | + <div class="post"><textarea data-autoresize>[{ "id": "1","type": "chat","author":"Julie","title":"02-0_11_18","text":"chat-samples/Julie/02-0_11_18.xml" }, | |
25 | +{ "id": "2","type": "chat","author":"Julie","title":"09-1_03_16","text":"chat-samples/Julie/09-1_03_16.xml" }, | |
26 | +{ "id": "3","type": "chat","author":"Julie","title":"10-1_04_23","text":"chat-samples/Julie/10-1_04_23.xml" }, | |
27 | +{ "id": "4","type": "chat","author":"Julie","title":"11-1_06_04","text":"chat-samples/Julie/11-1_06_04.xml" }, | |
28 | +{ "id": "5","type": "chat","author":"Julie","title":"13-1_07_26","text":"chat-samples/Julie/13-1_07_26.xml" }, | |
29 | +{ "id": "6","type": "chat","author":"Julie","title":"14-1_09_16","text":"chat-samples/Julie/14-1_09_16.xml" }, | |
30 | +{ "id": "7","type": "chat","author":"Julie","title":"16-1_11_11","text":"chat-samples/Julie/16-1_11_11.xml" }, | |
31 | +{ "id": "8","type": "chat","author":"Julie","title":"17-2_00_10","text":"chat-samples/Julie/17-2_00_10.xml" }, | |
32 | +{ "id": "9","type": "chat","author":"Julie","title":"19-2_01_23","text":"chat-samples/Julie/19-2_01_23.xml" }, | |
33 | +{ "id": "10","type": "chat","author":"Julie","title":"25-2_06_22","text":"chat-samples/Julie/25-2_06_22.xml" }, | |
34 | +{ "id": "11","type": "chat","author":"Julie","title":"41-4_08_29","text":"chat-samples/Julie/41-4_08_29.xml" }, | |
35 | +{ "id": "12","type": "chat","author":"Julie","title":"42-5_03_19","text":"chat-samples/Julie/42-5_03_19.xml" }]</textarea></div> | |
36 | + <input class="button post" type="button" value="create index" /> | |
37 | + <input class="button reset" type="button" value="reset" /> | |
38 | + <div class="output"></div> | |
39 | + <div class="error"></div> | |
40 | + </div> | |
41 | + | |
42 | + <hr noshade /> | |
43 | + | |
44 | + <h3>Empty index</h3> | |
45 | + Post to /solr/demo4/update | |
46 | + <div class="solr" data-type="json" data-url="/solr/demo4/update?wt=json&commitWithin=1000"> | |
47 | + <div class="post"><textarea data-autoresize>{ | |
48 | + "delete": { | |
49 | + "query": "*:*" | |
50 | + } | |
51 | +}</textarea></div> | |
52 | + <input class="button post" type="button" value="delete index" /> | |
53 | + <input class="button reset" type="button" value="reset" /> | |
54 | + <div class="output"></div> | |
55 | + <div class="error"></div> | |
56 | + </div> | |
57 | + | |
58 | + <hr noshade /> | |
59 | + | |
60 | + <h3>Query</h3> | |
61 | + | |
62 | + <div> | |
63 | + Search for 'anniversaire' - classic; post to /solr/demo4/select | |
64 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | |
65 | + <div class="post"><textarea data-autoresize>q=text:*anniversaire*</textarea></div> | |
66 | + <input class="button post" type="button" value="post" /> | |
67 | + <input class="button reset" type="button" value="reset" /> | |
68 | + <div class="output"></div> | |
69 | + <div class="error"></div> | |
70 | + </div> | |
71 | + </div> | |
72 | + | |
73 | + <br /> | |
74 | + | |
75 | + <div> | |
76 | + Search for 'anniversaire' - corpus query language (cql); post to /solr/demo4/select | |
77 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | |
78 | + <div class="post"><textarea data-autoresize>q={!mtas_cql field="text" query="[w=\"anniversaire\"]"}</textarea></div> | |
79 | + <input class="button post" type="button" value="post" /> | |
80 | + <input class="button reset" type="button" value="reset" /> | |
81 | + <div class="output"></div> | |
82 | + <div class="error"></div> | |
83 | + </div> | |
84 | + </div> | |
85 | + | |
86 | + <hr noshade /> | |
87 | + | |
88 | + <h3>Basic stats</h3> | |
89 | + | |
90 | + <div> | |
91 | + Get the number of positions; post to /solr/demo4/select | |
92 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | |
93 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.stats=true&mtas.stats.positions=true&mtas.stats.positions.0.key=number of positions&mtas.stats.positions.0.field=text&mtas.stats.positions.0.type=all</textarea></div> | |
94 | + <input class="button post" type="button" value="post" /> | |
95 | + <input class="button reset" type="button" value="reset" /> | |
96 | + <div class="output"></div> | |
97 | + <div class="error"></div> | |
98 | + </div> | |
99 | + </div> | |
100 | + | |
101 | + <br /> | |
102 | + | |
103 | + <div> | |
104 | + Get the number of tokens; post to /solr/demo4/select | |
105 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | |
106 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.stats=true&mtas.stats.tokens=true&mtas.stats.tokens.0.key=number of tokens&mtas.stats.tokens.0.field=text&mtas.stats.tokens.0.type=all</textarea></div> | |
107 | + <input class="button post" type="button" value="post" /> | |
108 | + <input class="button reset" type="button" value="reset" /> | |
109 | + <div class="output"></div> | |
110 | + <div class="error"></div> | |
111 | + </div> | |
112 | + </div> | |
113 | + | |
114 | + <br /> | |
115 | + | |
116 | + <div> | |
117 | + Get the number of nouns; post to /solr/demo4/select | |
118 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | |
119 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.stats=true&mtas.stats.spans=true&mtas.stats.spans.0.key=number of nouns&mtas.stats.spans.0.field=text&mtas.stats.spans.0.type=all&mtas.stats.spans.0.query.0.type=cql&mtas.stats.spans.0.query.0.value=[pos.c="n"]&mtas.stats.spans.0.function.0.key=fraction of nouns&mtas.stats.spans.0.function.0.expression=$q0/$n&mtas.stats.spans.0.function.0.type=all</textarea></div> | |
120 | + <input class="button post" type="button" value="post" /> | |
121 | + <input class="button reset" type="button" value="reset" /> | |
122 | + <div class="output"></div> | |
123 | + <div class="error"></div> | |
124 | + </div> | |
125 | + </div> | |
126 | + | |
127 | + <hr noshade /> | |
128 | + | |
129 | + <h3>Kwic</h3> | |
130 | + | |
131 | + Keyword in Context while searching for 'anniversaire'; post to /solr/demo4/select | |
132 | + | |
133 | + <div> | |
134 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | |
135 | + <div class="post"><textarea data-autoresize>q={!mtas_cql field="text" query="[w=\"anniversaire\"]"}&mtas=true&mtas.kwic=true&mtas.kwic.0.field=text&mtas.kwic.0.query.type=cql&mtas.kwic.0.query.value=[w="anniversaire"]&mtas.kwic.0.prefix=w&mtas.kwic.0.output=hit&mtas.kwic.0.left=2&mtas.kwic.0.right=3&mtas.kwic.0.number=2&mtas.kwic.0.start=0</textarea></div> | |
136 | + <input class="button post" type="button" value="post" /> | |
137 | + <input class="button reset" type="button" value="reset" /> | |
138 | + <div class="output"></div> | |
139 | + <div class="error"></div> | |
140 | + </div> | |
141 | + </div> | |
142 | + | |
143 | + <br /> | |
144 | + | |
145 | + Keyword in Context while searching for 'fête' followed by 'anniversaire' within 5 positions; post to /solr/demo4/select | |
146 | + | |
147 | + <div> | |
148 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | |
149 | + <div class="post"><textarea data-autoresize>q={!mtas_cql field="text" query="[w=\"fête\"][]{0,4}[w=\"anniversaire\"]"}&mtas=true&mtas.kwic=true&mtas.kwic.0.field=text&mtas.kwic.0.query.type=cql&mtas.kwic.0.query.value=[w="fête"][]{0,4}[w="anniversaire"]&mtas.kwic.0.prefix=w,stem,pos.c,u,u.role,u.name,u.sex&mtas.kwic.0.output=token&mtas.kwic.0.left=0&mtas.kwic.0.right=0&mtas.kwic.0.number=2&mtas.kwic.0.start=0</textarea></div> | |
150 | + <input class="button post" type="button" value="post" /> | |
151 | + <input class="button reset" type="button" value="reset" /> | |
152 | + <div class="output"></div> | |
153 | + <div class="error"></div> | |
154 | + </div> | |
155 | + </div> | |
156 | + | |
157 | + <hr noshade /> | |
158 | + | |
159 | + <h3>Termvector</h3> | |
160 | + | |
161 | + Termvector of w with regexp [a-z]{5,*} for documents containing 'anniversaire'; post to /solr/demo4/select | |
162 | + | |
163 | + <div> | |
164 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | |
165 | + <div class="post"><textarea data-autoresize>q={!mtas_cql field="text" query="[w=\"anniversaire\"]"}&mtas=true&mtas.termvector=true&mtas.termvector.0.field=text&mtas.termvector.0.prefix=w&mtas.termvector.0.key=termvector on w&mtas.termvector.0.type=n,sum,mean&mtas.termvector.0.sort.type=sum&mtas.termvector.0.sort.direction=desc&mtas.termvector.0.number=20&mtas.termvector.0.regexp=[a-z]{5,}</textarea></div> | |
166 | + <input class="button post" type="button" value="post" /> | |
167 | + <input class="button reset" type="button" value="reset" /> | |
168 | + <div class="output"></div> | |
169 | + <div class="error"></div> | |
170 | + </div> | |
171 | + </div> | |
172 | + | |
173 | + <hr noshade /> | |
174 | + | |
175 | + <h3>Group</h3> | |
176 | + | |
177 | + Group w for any verb followed by 'pas' en preceded by 'ne'; post to /solr/demo4/select | |
178 | + | |
179 | + <div> | |
180 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | |
181 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.group=true&mtas.group.0.field=text&mtas.group.0.query.type=cql&mtas.group.0.query.value=([pos.c="v"] followedby [w="pas"]) precededby [w="ne"]&mtas.group.0.grouping.hit.inside.prefixes=w</textarea></div> | |
182 | + <input class="button post" type="button" value="post" /> | |
183 | + <input class="button reset" type="button" value="reset" /> | |
184 | + <div class="output"></div> | |
185 | + <div class="error"></div> | |
186 | + </div> | |
187 | + </div> | |
188 | + | |
189 | + <br/> | |
190 | + | |
191 | + Group w for any verb followed by 'pas' en preceded by 'ne' and used by Mother; post to /solr/demo4/select | |
192 | + | |
193 | + <div> | |
194 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | |
195 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.group=true&mtas.group.0.field=text&mtas.group.0.query.type=cql&mtas.group.0.query.value=(([pos.c="v"] followedby [w="pas"]) precededby [w="ne"]) within <u.role="Mother">&mtas.group.0.grouping.hit.inside.prefixes=w</textarea></div> | |
196 | + <input class="button post" type="button" value="post" /> | |
197 | + <input class="button reset" type="button" value="reset" /> | |
198 | + <div class="output"></div> | |
199 | + <div class="error"></div> | |
200 | + </div> | |
201 | + </div> | |
202 | + | |
203 | + <br/> | |
204 | + | |
205 | + Group u.role and w for any verb followed by 'pas' en preceded by 'ne'; post to /solr/demo4/select | |
206 | + | |
207 | + <div> | |
208 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | |
209 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.group=true&mtas.group.0.field=text&mtas.group.0.query.type=cql&mtas.group.0.query.value=([pos.c="v"] followedby [w="pas"]) precededby [w="ne"]&mtas.group.0.grouping.hit.inside.prefixes=u.role,w</textarea></div> | |
210 | + <input class="button post" type="button" value="post" /> | |
211 | + <input class="button reset" type="button" value="reset" /> | |
212 | + <div class="output"></div> | |
213 | + <div class="error"></div> | |
214 | + </div> | |
215 | + </div> | |
216 | + | |
217 | + <hr noshade /> | |
218 | + | |
219 | + <h3>Prefixes</h3> | |
220 | + | |
221 | + Available prefixes; post to /solr/demo4/select | |
222 | + | |
223 | + <div> | |
224 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | |
225 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.prefix=true&mtas.prefix.0.field=text&mtas.prefix.0.key=prefixes</textarea></div> | |
226 | + <input class="button post" type="button" value="post" /> | |
227 | + <input class="button reset" type="button" value="reset" /> | |
228 | + <div class="output"></div> | |
229 | + <div class="error"></div> | |
230 | + </div> | |
231 | + </div> | |
232 | + | |
233 | + <hr noshade /> | |
234 | + | |
235 | + <div> | |
236 | + Go to <a href="index.html">main page</a>. | |
237 | + </div> | |
238 | + | |
239 | + </body> | |
240 | +</html> | |
... | ... |
src/mtas/codec/util/collector/MtasDataItemDoubleFull.java
... | ... | @@ -62,7 +62,7 @@ public class MtasDataItemDoubleFull extends MtasDataItemFull<Double, Double> { |
62 | 62 | */ |
63 | 63 | @Override |
64 | 64 | protected HashMap<String, Object> getDistribution(String argument) { |
65 | - HashMap<String, Object> result = new LinkedHashMap<String, Object>(); | |
65 | + HashMap<String, Object> result = new LinkedHashMap<>(); | |
66 | 66 | Double start = null; |
67 | 67 | Double end = null; |
68 | 68 | Double step = null; |
... | ... | @@ -202,16 +202,16 @@ public class MtasDataItemDoubleFull extends MtasDataItemFull<Double, Double> { |
202 | 202 | createStats(); |
203 | 203 | switch (sortType) { |
204 | 204 | case CodecUtil.STATS_TYPE_SUM: |
205 | - return new MtasDataItemNumberComparator<Double>(stats.getSum(), | |
205 | + return new MtasDataItemNumberComparator<>(stats.getSum(), | |
206 | 206 | sortDirection); |
207 | 207 | case CodecUtil.STATS_TYPE_MAX: |
208 | - return new MtasDataItemNumberComparator<Double>(stats.getMax(), | |
208 | + return new MtasDataItemNumberComparator<>(stats.getMax(), | |
209 | 209 | sortDirection); |
210 | 210 | case CodecUtil.STATS_TYPE_MIN: |
211 | - return new MtasDataItemNumberComparator<Double>(stats.getMin(), | |
211 | + return new MtasDataItemNumberComparator<>(stats.getMin(), | |
212 | 212 | sortDirection); |
213 | 213 | case CodecUtil.STATS_TYPE_SUMSQ: |
214 | - return new MtasDataItemNumberComparator<Double>(stats.getSumsq(), | |
214 | + return new MtasDataItemNumberComparator<>(stats.getSumsq(), | |
215 | 215 | sortDirection); |
216 | 216 | default: |
217 | 217 | return null; |
... | ... | @@ -228,34 +228,34 @@ public class MtasDataItemDoubleFull extends MtasDataItemFull<Double, Double> { |
228 | 228 | createStats(); |
229 | 229 | switch (sortType) { |
230 | 230 | case CodecUtil.STATS_TYPE_SUMOFLOGS: |
231 | - return new MtasDataItemNumberComparator<Double>( | |
231 | + return new MtasDataItemNumberComparator<>( | |
232 | 232 | stats.getN() * Math.log(stats.getGeometricMean()), sortDirection); |
233 | 233 | case CodecUtil.STATS_TYPE_MEAN: |
234 | - return new MtasDataItemNumberComparator<Double>(stats.getMean(), | |
234 | + return new MtasDataItemNumberComparator<>(stats.getMean(), | |
235 | 235 | sortDirection); |
236 | 236 | case CodecUtil.STATS_TYPE_GEOMETRICMEAN: |
237 | - return new MtasDataItemNumberComparator<Double>(stats.getGeometricMean(), | |
237 | + return new MtasDataItemNumberComparator<>(stats.getGeometricMean(), | |
238 | 238 | sortDirection); |
239 | 239 | case CodecUtil.STATS_TYPE_STANDARDDEVIATION: |
240 | - return new MtasDataItemNumberComparator<Double>( | |
240 | + return new MtasDataItemNumberComparator<>( | |
241 | 241 | stats.getStandardDeviation(), sortDirection); |
242 | 242 | case CodecUtil.STATS_TYPE_VARIANCE: |
243 | - return new MtasDataItemNumberComparator<Double>(stats.getVariance(), | |
243 | + return new MtasDataItemNumberComparator<>(stats.getVariance(), | |
244 | 244 | sortDirection); |
245 | 245 | case CodecUtil.STATS_TYPE_POPULATIONVARIANCE: |
246 | - return new MtasDataItemNumberComparator<Double>( | |
246 | + return new MtasDataItemNumberComparator<>( | |
247 | 247 | stats.getPopulationVariance(), sortDirection); |
248 | 248 | case CodecUtil.STATS_TYPE_QUADRATICMEAN: |
249 | - return new MtasDataItemNumberComparator<Double>(stats.getQuadraticMean(), | |
249 | + return new MtasDataItemNumberComparator<>(Math.sqrt(stats.getSumsq()/stats.getN()), | |
250 | 250 | sortDirection); |
251 | 251 | case CodecUtil.STATS_TYPE_KURTOSIS: |
252 | - return new MtasDataItemNumberComparator<Double>(stats.getKurtosis(), | |
252 | + return new MtasDataItemNumberComparator<>(stats.getKurtosis(), | |
253 | 253 | sortDirection); |
254 | 254 | case CodecUtil.STATS_TYPE_MEDIAN: |
255 | - return new MtasDataItemNumberComparator<Double>(stats.getPercentile(50), | |
255 | + return new MtasDataItemNumberComparator<>(stats.getPercentile(50), | |
256 | 256 | sortDirection); |
257 | 257 | case CodecUtil.STATS_TYPE_SKEWNESS: |
258 | - return new MtasDataItemNumberComparator<Double>(stats.getSkewness(), | |
258 | + return new MtasDataItemNumberComparator<>(stats.getSkewness(), | |
259 | 259 | sortDirection); |
260 | 260 | default: |
261 | 261 | return null; |
... | ... |
src/mtas/codec/util/collector/MtasDataItemFull.java
... | ... | @@ -135,7 +135,7 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N |
135 | 135 | } else if (statsItem.equals(CodecUtil.STATS_TYPE_POPULATIONVARIANCE)) { |
136 | 136 | response.put(statsItem, stats.getPopulationVariance()); |
137 | 137 | } else if (statsItem.equals(CodecUtil.STATS_TYPE_QUADRATICMEAN)) { |
138 | - response.put(statsItem, stats.getQuadraticMean()); | |
138 | + response.put(statsItem, Math.sqrt(stats.getSumsq()/stats.getN())); | |
139 | 139 | } else if (statsItem.equals(CodecUtil.STATS_TYPE_KURTOSIS)) { |
140 | 140 | response.put(statsItem, stats.getKurtosis()); |
141 | 141 | } else if (statsItem.equals(CodecUtil.STATS_TYPE_MEDIAN)) { |
... | ... | @@ -153,7 +153,7 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N |
153 | 153 | } |
154 | 154 | } |
155 | 155 | if (errorNumber > 0) { |
156 | - Map<String, Object> errorResponse = new HashMap<String, Object>(); | |
156 | + Map<String, Object> errorResponse = new HashMap<>(); | |
157 | 157 | for (Entry<String, Integer> entry : getErrorList().entrySet()) { |
158 | 158 | errorResponse.put(entry.getKey(), entry.getValue()); |
159 | 159 | } |
... | ... | @@ -178,31 +178,19 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N |
178 | 178 | case CodecUtil.STATS_TYPE_N: |
179 | 179 | return 0; |
180 | 180 | case CodecUtil.STATS_TYPE_SUM: |
181 | - return 1; | |
182 | 181 | case CodecUtil.STATS_TYPE_MAX: |
183 | - return 1; | |
184 | 182 | case CodecUtil.STATS_TYPE_MIN: |
185 | - return 1; | |
186 | 183 | case CodecUtil.STATS_TYPE_SUMSQ: |
187 | 184 | return 1; |
188 | 185 | case CodecUtil.STATS_TYPE_SUMOFLOGS: |
189 | - return 2; | |
190 | 186 | case CodecUtil.STATS_TYPE_MEAN: |
191 | - return 2; | |
192 | 187 | case CodecUtil.STATS_TYPE_GEOMETRICMEAN: |
193 | - return 2; | |
194 | 188 | case CodecUtil.STATS_TYPE_STANDARDDEVIATION: |
195 | - return 2; | |
196 | 189 | case CodecUtil.STATS_TYPE_VARIANCE: |
197 | - return 2; | |
198 | 190 | case CodecUtil.STATS_TYPE_POPULATIONVARIANCE: |
199 | - return 2; | |
200 | 191 | case CodecUtil.STATS_TYPE_QUADRATICMEAN: |
201 | - return 2; | |
202 | 192 | case CodecUtil.STATS_TYPE_KURTOSIS: |
203 | - return 2; | |
204 | 193 | case CodecUtil.STATS_TYPE_MEDIAN: |
205 | - return 2; | |
206 | 194 | case CodecUtil.STATS_TYPE_SKEWNESS: |
207 | 195 | return 2; |
208 | 196 | default: |
... | ... | @@ -219,7 +207,7 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N |
219 | 207 | createStats(); |
220 | 208 | switch (sortType) { |
221 | 209 | case CodecUtil.STATS_TYPE_N: |
222 | - return new MtasDataItemNumberComparator<Long>(stats.getN(), | |
210 | + return new MtasDataItemNumberComparator<>(stats.getN(), | |
223 | 211 | sortDirection); |
224 | 212 | default: |
225 | 213 | return null; |
... | ... |
src/mtas/codec/util/collector/MtasDataItemLongFull.java
... | ... | @@ -210,7 +210,7 @@ class MtasDataItemLongFull extends MtasDataItemFull<Long, Double> { |
210 | 210 | return new MtasDataItemNumberComparator<>(stats.getPopulationVariance(), |
211 | 211 | sortDirection); |
212 | 212 | case CodecUtil.STATS_TYPE_QUADRATICMEAN: |
213 | - return new MtasDataItemNumberComparator<>(stats.getQuadraticMean(), | |
213 | + return new MtasDataItemNumberComparator<>(Math.sqrt(stats.getSumsq()/stats.getN()), | |
214 | 214 | sortDirection); |
215 | 215 | case CodecUtil.STATS_TYPE_KURTOSIS: |
216 | 216 | return new MtasDataItemNumberComparator<>(stats.getKurtosis(), |
... | ... |
src/mtas/solr/handler/component/util/MtasSolrResultUtil.java
... | ... | @@ -93,7 +93,7 @@ public class MtasSolrResultUtil { |
93 | 93 | private static void rewrite(NamedList<Object> nl, boolean doCollapse) |
94 | 94 | throws IOException { |
95 | 95 | boolean showDebugInfo = false; |
96 | - HashMap<String, NamedList<Object>> collapseNamedList = new HashMap<String, NamedList<Object>>(); | |
96 | + HashMap<String, NamedList<Object>> collapseNamedList = new HashMap<>(); | |
97 | 97 | int length = nl.size(); |
98 | 98 | for (int i = 0; i < length; i++) { |
99 | 99 | if (nl.getVal(i) instanceof NamedList) { |
... | ... | @@ -172,7 +172,7 @@ public class MtasSolrResultUtil { |
172 | 172 | */ |
173 | 173 | private static ArrayList<NamedList<Object>> rewriteToArray( |
174 | 174 | NamedList<Object> nnl) { |
175 | - ArrayList<NamedList<Object>> al = new ArrayList<NamedList<Object>>(); | |
175 | + ArrayList<NamedList<Object>> al = new ArrayList<>(); | |
176 | 176 | String key; |
177 | 177 | Iterator<Entry<String, Object>> it = nnl.iterator(); |
178 | 178 | while (it.hasNext()) { |
... | ... | @@ -384,7 +384,7 @@ public class MtasSolrResultUtil { |
384 | 384 | "unequal size " + nameNew + " and " + nameOriginal); |
385 | 385 | } |
386 | 386 | if (unique) { |
387 | - Set<String> set = new HashSet<String>(); | |
387 | + Set<String> set = new HashSet<>(); | |
388 | 388 | for (int i = 0; i < list.length; i++) { |
389 | 389 | set.add(list[i]); |
390 | 390 | } |
... | ... |
src/site/markdown/indexing_formats.md
... | ... | @@ -5,6 +5,7 @@ To configure the mapping from resources to the index structure, several parsers |
5 | 5 | * [MtasFoliaParser](indexing_formats_folia.html) : mapping [FoLiA](https://proycon.github.io/folia/) resources |
6 | 6 | * [MtasTEIParser](indexing_formats_tei.html): mapping [ISO-TEI](http://www.tei-c.org/) resources |
7 | 7 | * [MtasSketchParser](indexing_formats_sketch.html): mapping [Sketch Engine](https://www.sketchengine.co.uk/word-sketch-index-format/) resources |
8 | +* [MtasChatParser](indexing_formats_chat.html): mapping [CHAT transcription format](http://talkbank.org/manuals/CHAT.pdf) resources converted to [XML](http://talkbank.org/software/xsddoc/) | |
8 | 9 | * [MtasCRMParser](indexing_formats_crm.html): mapping resources with format Corpus Van Reenen-Mulder/Adelheid |
9 | 10 | |
10 | 11 | For XML-based formats, these parsers often just slightly extend the abstract MtasXMLParser by defining the correct namespaces and root tags. |
... | ... |
src/site/markdown/indexing_formats_chat.md
0 → 100644
1 | +#CHAT | |
2 | + | |
3 | +For indexing [CHAT transcription format](http://talkbank.org/manuals/CHAT.pdf) resources converted to [XML](http://talkbank.org/software/xsddoc/), the *mtas.analysis.parser.MtasChatParser* extending the abstract *MtasXMLParser* is available; full examples of configuration files are provided on [GitHub](https://github.com/meertensinstituut/mtas/tree/master/conf/parser/mtas). | |
4 | + | |
5 | +```xml | |
6 | +<!-- START CONFIGURATION MTAS PARSER --> | |
7 | +<parser name="mtas.analysis.parser.MtasChatParser"> | |
8 | +... | |
9 | + <!-- START MAPPINGS --> | |
10 | + <mappings> | |
11 | + ... | |
12 | + </mapping> | |
13 | + <!-- END MAPPINGS ---> | |
14 | + ... | |
15 | +</parser> | |
16 | +<!-- END CONFIGURATION MTAS PARSER --> | |
17 | +``` | |
18 | + | |
19 | +The syntax of the parser part in the [configuration file](indexing_configuration.html#configuration) is, besides from the *name* attribute, almost identical to the configuration of the [FoLiA-parser](indexing_formats_folia.html) and [TEI-parser](indexing_formats_tei.html). | |
20 | + | |
... | ... |
src/site/site.xml
... | ... | @@ -33,6 +33,7 @@ |
33 | 33 | <item name="FoLiA" href="indexing_formats_folia.html" collapse="true"/> |
34 | 34 | <item name="TEI" href="indexing_formats_tei.html" collapse="true"/> |
35 | 35 | <item name="Sketch" href="indexing_formats_sketch.html" collapse="true"/> |
36 | + <item name="CHAT" href="indexing_formats_chat.html" collapse="true"/> | |
36 | 37 | <item name="CRM" href="indexing_formats_crm.html" collapse="true"/> |
37 | 38 | </item> |
38 | 39 | </item> |
... | ... |