Commit f8c23b7a792807decfe930787375c0410db16d15
1 parent
a38f5648
include chat xml format with examples
Showing
9 changed files
with
286 additions
and
36 deletions
docker/Dockerfile
1 | # Automatically generated Dockerfile | 1 | # Automatically generated Dockerfile |
2 | -# - Build 2017-07-13 06:32 | 2 | +# - Build 2017-07-13 14:19 |
3 | # - Lucene/Solr version 6.6.0 | 3 | # - Lucene/Solr version 6.6.0 |
4 | # - Mtas release 20170713 | 4 | # - Mtas release 20170713 |
5 | # | 5 | # |
@@ -74,7 +74,7 @@ RUN service apache2 stop && \ | @@ -74,7 +74,7 @@ RUN service apache2 stop && \ | ||
74 | chmod -R 755 /var/www/html && \ | 74 | chmod -R 755 /var/www/html && \ |
75 | printf "echo\n" >> /start.sh && \ | 75 | printf "echo\n" >> /start.sh && \ |
76 | printf "echo \"================ Mtas -- Multi Tier Annotation Search =================\"\n" >> /start.sh && \ | 76 | printf "echo \"================ Mtas -- Multi Tier Annotation Search =================\"\n" >> /start.sh && \ |
77 | - printf "echo \" Timestamp 2017-07-13 06:32\"\n" >> /start.sh && \ | 77 | + printf "echo \" Timestamp 2017-07-13 14:19\"\n" >> /start.sh && \ |
78 | printf "echo \" Lucene/Solr version 6.6.0\"\n" >> /start.sh && \ | 78 | printf "echo \" Lucene/Solr version 6.6.0\"\n" >> /start.sh && \ |
79 | printf "echo \" Mtas release 20170713\"\n" >> /start.sh && \ | 79 | printf "echo \" Mtas release 20170713\"\n" >> /start.sh && \ |
80 | printf "echo \" See https://meertensinstituut.github.io/mtas/ for more information\"\n" >> /start.sh && \ | 80 | printf "echo \" See https://meertensinstituut.github.io/mtas/ for more information\"\n" >> /start.sh && \ |
docker/site/example_demo4.html
0 → 100644
1 | +<!DOCTYPE html> | ||
2 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
3 | + <head> | ||
4 | + <meta charset="UTF-8" /> | ||
5 | + <meta name="viewport" content="width=device-width, initial-scale=1.0" /> | ||
6 | + <title>Multi Tier Annotation Search</title> | ||
7 | + <script type="text/javascript" src="./js/jquery-3.1.1.min.js"></script> | ||
8 | + <script type="text/javascript" src="./js/solr.js"></script> | ||
9 | + <link rel="stylesheet" type="text/css" href="css/style.css"> | ||
10 | + </head> | ||
11 | + <body> | ||
12 | + | ||
13 | + <h1>Multi Tier Annotation Search - example demo4 (CHAT examples)</h1> | ||
14 | + | ||
15 | + <div> | ||
16 | + Go to <a href="index.html">main page</a>. | ||
17 | + </div> | ||
18 | + | ||
19 | + <hr noshade /> | ||
20 | + | ||
21 | + <h3>Create index</h3> | ||
22 | + Post to /solr/demo4/update | ||
23 | + <div class="solr" data-type="json" data-url="/solr/demo4/update?wt=json&commitWithin=1000"> | ||
24 | + <div class="post"><textarea data-autoresize>[{ "id": "1","type": "chat","author":"Julie","title":"02-0_11_18","text":"chat-samples/Julie/02-0_11_18.xml" }, | ||
25 | +{ "id": "2","type": "chat","author":"Julie","title":"09-1_03_16","text":"chat-samples/Julie/09-1_03_16.xml" }, | ||
26 | +{ "id": "3","type": "chat","author":"Julie","title":"10-1_04_23","text":"chat-samples/Julie/10-1_04_23.xml" }, | ||
27 | +{ "id": "4","type": "chat","author":"Julie","title":"11-1_06_04","text":"chat-samples/Julie/11-1_06_04.xml" }, | ||
28 | +{ "id": "5","type": "chat","author":"Julie","title":"13-1_07_26","text":"chat-samples/Julie/13-1_07_26.xml" }, | ||
29 | +{ "id": "6","type": "chat","author":"Julie","title":"14-1_09_16","text":"chat-samples/Julie/14-1_09_16.xml" }, | ||
30 | +{ "id": "7","type": "chat","author":"Julie","title":"16-1_11_11","text":"chat-samples/Julie/16-1_11_11.xml" }, | ||
31 | +{ "id": "8","type": "chat","author":"Julie","title":"17-2_00_10","text":"chat-samples/Julie/17-2_00_10.xml" }, | ||
32 | +{ "id": "9","type": "chat","author":"Julie","title":"19-2_01_23","text":"chat-samples/Julie/19-2_01_23.xml" }, | ||
33 | +{ "id": "10","type": "chat","author":"Julie","title":"25-2_06_22","text":"chat-samples/Julie/25-2_06_22.xml" }, | ||
34 | +{ "id": "11","type": "chat","author":"Julie","title":"41-4_08_29","text":"chat-samples/Julie/41-4_08_29.xml" }, | ||
35 | +{ "id": "12","type": "chat","author":"Julie","title":"42-5_03_19","text":"chat-samples/Julie/42-5_03_19.xml" }]</textarea></div> | ||
36 | + <input class="button post" type="button" value="create index" /> | ||
37 | + <input class="button reset" type="button" value="reset" /> | ||
38 | + <div class="output"></div> | ||
39 | + <div class="error"></div> | ||
40 | + </div> | ||
41 | + | ||
42 | + <hr noshade /> | ||
43 | + | ||
44 | + <h3>Empty index</h3> | ||
45 | + Post to /solr/demo4/update | ||
46 | + <div class="solr" data-type="json" data-url="/solr/demo4/update?wt=json&commitWithin=1000"> | ||
47 | + <div class="post"><textarea data-autoresize>{ | ||
48 | + "delete": { | ||
49 | + "query": "*:*" | ||
50 | + } | ||
51 | +}</textarea></div> | ||
52 | + <input class="button post" type="button" value="delete index" /> | ||
53 | + <input class="button reset" type="button" value="reset" /> | ||
54 | + <div class="output"></div> | ||
55 | + <div class="error"></div> | ||
56 | + </div> | ||
57 | + | ||
58 | + <hr noshade /> | ||
59 | + | ||
60 | + <h3>Query</h3> | ||
61 | + | ||
62 | + <div> | ||
63 | + Search for 'anniversaire' - classic; post to /solr/demo4/select | ||
64 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | ||
65 | + <div class="post"><textarea data-autoresize>q=text:*anniversaire*</textarea></div> | ||
66 | + <input class="button post" type="button" value="post" /> | ||
67 | + <input class="button reset" type="button" value="reset" /> | ||
68 | + <div class="output"></div> | ||
69 | + <div class="error"></div> | ||
70 | + </div> | ||
71 | + </div> | ||
72 | + | ||
73 | + <br /> | ||
74 | + | ||
75 | + <div> | ||
76 | + Search for 'anniversaire' - corpus query language (cql); post to /solr/demo4/select | ||
77 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | ||
78 | + <div class="post"><textarea data-autoresize>q={!mtas_cql field="text" query="[w=\"anniversaire\"]"}</textarea></div> | ||
79 | + <input class="button post" type="button" value="post" /> | ||
80 | + <input class="button reset" type="button" value="reset" /> | ||
81 | + <div class="output"></div> | ||
82 | + <div class="error"></div> | ||
83 | + </div> | ||
84 | + </div> | ||
85 | + | ||
86 | + <hr noshade /> | ||
87 | + | ||
88 | + <h3>Basic stats</h3> | ||
89 | + | ||
90 | + <div> | ||
91 | + Get the number of positions; post to /solr/demo4/select | ||
92 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | ||
93 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.stats=true&mtas.stats.positions=true&mtas.stats.positions.0.key=number of positions&mtas.stats.positions.0.field=text&mtas.stats.positions.0.type=all</textarea></div> | ||
94 | + <input class="button post" type="button" value="post" /> | ||
95 | + <input class="button reset" type="button" value="reset" /> | ||
96 | + <div class="output"></div> | ||
97 | + <div class="error"></div> | ||
98 | + </div> | ||
99 | + </div> | ||
100 | + | ||
101 | + <br /> | ||
102 | + | ||
103 | + <div> | ||
104 | + Get the number of tokens; post to /solr/demo4/select | ||
105 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | ||
106 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.stats=true&mtas.stats.tokens=true&mtas.stats.tokens.0.key=number of tokens&mtas.stats.tokens.0.field=text&mtas.stats.tokens.0.type=all</textarea></div> | ||
107 | + <input class="button post" type="button" value="post" /> | ||
108 | + <input class="button reset" type="button" value="reset" /> | ||
109 | + <div class="output"></div> | ||
110 | + <div class="error"></div> | ||
111 | + </div> | ||
112 | + </div> | ||
113 | + | ||
114 | + <br /> | ||
115 | + | ||
116 | + <div> | ||
117 | + Get the number of nouns; post to /solr/demo4/select | ||
118 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | ||
119 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.stats=true&mtas.stats.spans=true&mtas.stats.spans.0.key=number of nouns&mtas.stats.spans.0.field=text&mtas.stats.spans.0.type=all&mtas.stats.spans.0.query.0.type=cql&mtas.stats.spans.0.query.0.value=[pos.c="n"]&mtas.stats.spans.0.function.0.key=fraction of nouns&mtas.stats.spans.0.function.0.expression=$q0/$n&mtas.stats.spans.0.function.0.type=all</textarea></div> | ||
120 | + <input class="button post" type="button" value="post" /> | ||
121 | + <input class="button reset" type="button" value="reset" /> | ||
122 | + <div class="output"></div> | ||
123 | + <div class="error"></div> | ||
124 | + </div> | ||
125 | + </div> | ||
126 | + | ||
127 | + <hr noshade /> | ||
128 | + | ||
129 | + <h3>Kwic</h3> | ||
130 | + | ||
131 | + Keyword in Context while searching for 'anniversaire'; post to /solr/demo4/select | ||
132 | + | ||
133 | + <div> | ||
134 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | ||
135 | + <div class="post"><textarea data-autoresize>q={!mtas_cql field="text" query="[w=\"anniversaire\"]"}&mtas=true&mtas.kwic=true&mtas.kwic.0.field=text&mtas.kwic.0.query.type=cql&mtas.kwic.0.query.value=[w="anniversaire"]&mtas.kwic.0.prefix=w&mtas.kwic.0.output=hit&mtas.kwic.0.left=2&mtas.kwic.0.right=3&mtas.kwic.0.number=2&mtas.kwic.0.start=0</textarea></div> | ||
136 | + <input class="button post" type="button" value="post" /> | ||
137 | + <input class="button reset" type="button" value="reset" /> | ||
138 | + <div class="output"></div> | ||
139 | + <div class="error"></div> | ||
140 | + </div> | ||
141 | + </div> | ||
142 | + | ||
143 | + <br /> | ||
144 | + | ||
145 | + Keyword in Context while searching for 'fête' followed by 'anniversaire' within 5 positions; post to /solr/demo4/select | ||
146 | + | ||
147 | + <div> | ||
148 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | ||
149 | + <div class="post"><textarea data-autoresize>q={!mtas_cql field="text" query="[w=\"fête\"][]{0,4}[w=\"anniversaire\"]"}&mtas=true&mtas.kwic=true&mtas.kwic.0.field=text&mtas.kwic.0.query.type=cql&mtas.kwic.0.query.value=[w="fête"][]{0,4}[w="anniversaire"]&mtas.kwic.0.prefix=w,stem,pos.c,u,u.role,u.name,u.sex&mtas.kwic.0.output=token&mtas.kwic.0.left=0&mtas.kwic.0.right=0&mtas.kwic.0.number=2&mtas.kwic.0.start=0</textarea></div> | ||
150 | + <input class="button post" type="button" value="post" /> | ||
151 | + <input class="button reset" type="button" value="reset" /> | ||
152 | + <div class="output"></div> | ||
153 | + <div class="error"></div> | ||
154 | + </div> | ||
155 | + </div> | ||
156 | + | ||
157 | + <hr noshade /> | ||
158 | + | ||
159 | + <h3>Termvector</h3> | ||
160 | + | ||
161 | + Termvector of w with regexp [a-z]{5,*} for documents containing 'anniversaire'; post to /solr/demo4/select | ||
162 | + | ||
163 | + <div> | ||
164 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | ||
165 | + <div class="post"><textarea data-autoresize>q={!mtas_cql field="text" query="[w=\"anniversaire\"]"}&mtas=true&mtas.termvector=true&mtas.termvector.0.field=text&mtas.termvector.0.prefix=w&mtas.termvector.0.key=termvector on w&mtas.termvector.0.type=n,sum,mean&mtas.termvector.0.sort.type=sum&mtas.termvector.0.sort.direction=desc&mtas.termvector.0.number=20&mtas.termvector.0.regexp=[a-z]{5,}</textarea></div> | ||
166 | + <input class="button post" type="button" value="post" /> | ||
167 | + <input class="button reset" type="button" value="reset" /> | ||
168 | + <div class="output"></div> | ||
169 | + <div class="error"></div> | ||
170 | + </div> | ||
171 | + </div> | ||
172 | + | ||
173 | + <hr noshade /> | ||
174 | + | ||
175 | + <h3>Group</h3> | ||
176 | + | ||
177 | + Group w for any verb followed by 'pas' en preceded by 'ne'; post to /solr/demo4/select | ||
178 | + | ||
179 | + <div> | ||
180 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | ||
181 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.group=true&mtas.group.0.field=text&mtas.group.0.query.type=cql&mtas.group.0.query.value=([pos.c="v"] followedby [w="pas"]) precededby [w="ne"]&mtas.group.0.grouping.hit.inside.prefixes=w</textarea></div> | ||
182 | + <input class="button post" type="button" value="post" /> | ||
183 | + <input class="button reset" type="button" value="reset" /> | ||
184 | + <div class="output"></div> | ||
185 | + <div class="error"></div> | ||
186 | + </div> | ||
187 | + </div> | ||
188 | + | ||
189 | + <br/> | ||
190 | + | ||
191 | + Group w for any verb followed by 'pas' en preceded by 'ne' and used by Mother; post to /solr/demo4/select | ||
192 | + | ||
193 | + <div> | ||
194 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | ||
195 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.group=true&mtas.group.0.field=text&mtas.group.0.query.type=cql&mtas.group.0.query.value=(([pos.c="v"] followedby [w="pas"]) precededby [w="ne"]) within <u.role="Mother">&mtas.group.0.grouping.hit.inside.prefixes=w</textarea></div> | ||
196 | + <input class="button post" type="button" value="post" /> | ||
197 | + <input class="button reset" type="button" value="reset" /> | ||
198 | + <div class="output"></div> | ||
199 | + <div class="error"></div> | ||
200 | + </div> | ||
201 | + </div> | ||
202 | + | ||
203 | + <br/> | ||
204 | + | ||
205 | + Group u.role and w for any verb followed by 'pas' en preceded by 'ne'; post to /solr/demo4/select | ||
206 | + | ||
207 | + <div> | ||
208 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | ||
209 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.group=true&mtas.group.0.field=text&mtas.group.0.query.type=cql&mtas.group.0.query.value=([pos.c="v"] followedby [w="pas"]) precededby [w="ne"]&mtas.group.0.grouping.hit.inside.prefixes=u.role,w</textarea></div> | ||
210 | + <input class="button post" type="button" value="post" /> | ||
211 | + <input class="button reset" type="button" value="reset" /> | ||
212 | + <div class="output"></div> | ||
213 | + <div class="error"></div> | ||
214 | + </div> | ||
215 | + </div> | ||
216 | + | ||
217 | + <hr noshade /> | ||
218 | + | ||
219 | + <h3>Prefixes</h3> | ||
220 | + | ||
221 | + Available prefixes; post to /solr/demo4/select | ||
222 | + | ||
223 | + <div> | ||
224 | + <div class="solr" data-type="post" data-url="/solr/demo4/select?indent=true&wt=json"> | ||
225 | + <div class="post"><textarea data-autoresize>q=*:*&rows=0&mtas=true&mtas.prefix=true&mtas.prefix.0.field=text&mtas.prefix.0.key=prefixes</textarea></div> | ||
226 | + <input class="button post" type="button" value="post" /> | ||
227 | + <input class="button reset" type="button" value="reset" /> | ||
228 | + <div class="output"></div> | ||
229 | + <div class="error"></div> | ||
230 | + </div> | ||
231 | + </div> | ||
232 | + | ||
233 | + <hr noshade /> | ||
234 | + | ||
235 | + <div> | ||
236 | + Go to <a href="index.html">main page</a>. | ||
237 | + </div> | ||
238 | + | ||
239 | + </body> | ||
240 | +</html> |
src/mtas/codec/util/collector/MtasDataItemDoubleFull.java
@@ -62,7 +62,7 @@ public class MtasDataItemDoubleFull extends MtasDataItemFull<Double, Double> { | @@ -62,7 +62,7 @@ public class MtasDataItemDoubleFull extends MtasDataItemFull<Double, Double> { | ||
62 | */ | 62 | */ |
63 | @Override | 63 | @Override |
64 | protected HashMap<String, Object> getDistribution(String argument) { | 64 | protected HashMap<String, Object> getDistribution(String argument) { |
65 | - HashMap<String, Object> result = new LinkedHashMap<String, Object>(); | 65 | + HashMap<String, Object> result = new LinkedHashMap<>(); |
66 | Double start = null; | 66 | Double start = null; |
67 | Double end = null; | 67 | Double end = null; |
68 | Double step = null; | 68 | Double step = null; |
@@ -202,16 +202,16 @@ public class MtasDataItemDoubleFull extends MtasDataItemFull<Double, Double> { | @@ -202,16 +202,16 @@ public class MtasDataItemDoubleFull extends MtasDataItemFull<Double, Double> { | ||
202 | createStats(); | 202 | createStats(); |
203 | switch (sortType) { | 203 | switch (sortType) { |
204 | case CodecUtil.STATS_TYPE_SUM: | 204 | case CodecUtil.STATS_TYPE_SUM: |
205 | - return new MtasDataItemNumberComparator<Double>(stats.getSum(), | 205 | + return new MtasDataItemNumberComparator<>(stats.getSum(), |
206 | sortDirection); | 206 | sortDirection); |
207 | case CodecUtil.STATS_TYPE_MAX: | 207 | case CodecUtil.STATS_TYPE_MAX: |
208 | - return new MtasDataItemNumberComparator<Double>(stats.getMax(), | 208 | + return new MtasDataItemNumberComparator<>(stats.getMax(), |
209 | sortDirection); | 209 | sortDirection); |
210 | case CodecUtil.STATS_TYPE_MIN: | 210 | case CodecUtil.STATS_TYPE_MIN: |
211 | - return new MtasDataItemNumberComparator<Double>(stats.getMin(), | 211 | + return new MtasDataItemNumberComparator<>(stats.getMin(), |
212 | sortDirection); | 212 | sortDirection); |
213 | case CodecUtil.STATS_TYPE_SUMSQ: | 213 | case CodecUtil.STATS_TYPE_SUMSQ: |
214 | - return new MtasDataItemNumberComparator<Double>(stats.getSumsq(), | 214 | + return new MtasDataItemNumberComparator<>(stats.getSumsq(), |
215 | sortDirection); | 215 | sortDirection); |
216 | default: | 216 | default: |
217 | return null; | 217 | return null; |
@@ -228,34 +228,34 @@ public class MtasDataItemDoubleFull extends MtasDataItemFull<Double, Double> { | @@ -228,34 +228,34 @@ public class MtasDataItemDoubleFull extends MtasDataItemFull<Double, Double> { | ||
228 | createStats(); | 228 | createStats(); |
229 | switch (sortType) { | 229 | switch (sortType) { |
230 | case CodecUtil.STATS_TYPE_SUMOFLOGS: | 230 | case CodecUtil.STATS_TYPE_SUMOFLOGS: |
231 | - return new MtasDataItemNumberComparator<Double>( | 231 | + return new MtasDataItemNumberComparator<>( |
232 | stats.getN() * Math.log(stats.getGeometricMean()), sortDirection); | 232 | stats.getN() * Math.log(stats.getGeometricMean()), sortDirection); |
233 | case CodecUtil.STATS_TYPE_MEAN: | 233 | case CodecUtil.STATS_TYPE_MEAN: |
234 | - return new MtasDataItemNumberComparator<Double>(stats.getMean(), | 234 | + return new MtasDataItemNumberComparator<>(stats.getMean(), |
235 | sortDirection); | 235 | sortDirection); |
236 | case CodecUtil.STATS_TYPE_GEOMETRICMEAN: | 236 | case CodecUtil.STATS_TYPE_GEOMETRICMEAN: |
237 | - return new MtasDataItemNumberComparator<Double>(stats.getGeometricMean(), | 237 | + return new MtasDataItemNumberComparator<>(stats.getGeometricMean(), |
238 | sortDirection); | 238 | sortDirection); |
239 | case CodecUtil.STATS_TYPE_STANDARDDEVIATION: | 239 | case CodecUtil.STATS_TYPE_STANDARDDEVIATION: |
240 | - return new MtasDataItemNumberComparator<Double>( | 240 | + return new MtasDataItemNumberComparator<>( |
241 | stats.getStandardDeviation(), sortDirection); | 241 | stats.getStandardDeviation(), sortDirection); |
242 | case CodecUtil.STATS_TYPE_VARIANCE: | 242 | case CodecUtil.STATS_TYPE_VARIANCE: |
243 | - return new MtasDataItemNumberComparator<Double>(stats.getVariance(), | 243 | + return new MtasDataItemNumberComparator<>(stats.getVariance(), |
244 | sortDirection); | 244 | sortDirection); |
245 | case CodecUtil.STATS_TYPE_POPULATIONVARIANCE: | 245 | case CodecUtil.STATS_TYPE_POPULATIONVARIANCE: |
246 | - return new MtasDataItemNumberComparator<Double>( | 246 | + return new MtasDataItemNumberComparator<>( |
247 | stats.getPopulationVariance(), sortDirection); | 247 | stats.getPopulationVariance(), sortDirection); |
248 | case CodecUtil.STATS_TYPE_QUADRATICMEAN: | 248 | case CodecUtil.STATS_TYPE_QUADRATICMEAN: |
249 | - return new MtasDataItemNumberComparator<Double>(stats.getQuadraticMean(), | 249 | + return new MtasDataItemNumberComparator<>(Math.sqrt(stats.getSumsq()/stats.getN()), |
250 | sortDirection); | 250 | sortDirection); |
251 | case CodecUtil.STATS_TYPE_KURTOSIS: | 251 | case CodecUtil.STATS_TYPE_KURTOSIS: |
252 | - return new MtasDataItemNumberComparator<Double>(stats.getKurtosis(), | 252 | + return new MtasDataItemNumberComparator<>(stats.getKurtosis(), |
253 | sortDirection); | 253 | sortDirection); |
254 | case CodecUtil.STATS_TYPE_MEDIAN: | 254 | case CodecUtil.STATS_TYPE_MEDIAN: |
255 | - return new MtasDataItemNumberComparator<Double>(stats.getPercentile(50), | 255 | + return new MtasDataItemNumberComparator<>(stats.getPercentile(50), |
256 | sortDirection); | 256 | sortDirection); |
257 | case CodecUtil.STATS_TYPE_SKEWNESS: | 257 | case CodecUtil.STATS_TYPE_SKEWNESS: |
258 | - return new MtasDataItemNumberComparator<Double>(stats.getSkewness(), | 258 | + return new MtasDataItemNumberComparator<>(stats.getSkewness(), |
259 | sortDirection); | 259 | sortDirection); |
260 | default: | 260 | default: |
261 | return null; | 261 | return null; |
src/mtas/codec/util/collector/MtasDataItemFull.java
@@ -135,7 +135,7 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N | @@ -135,7 +135,7 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N | ||
135 | } else if (statsItem.equals(CodecUtil.STATS_TYPE_POPULATIONVARIANCE)) { | 135 | } else if (statsItem.equals(CodecUtil.STATS_TYPE_POPULATIONVARIANCE)) { |
136 | response.put(statsItem, stats.getPopulationVariance()); | 136 | response.put(statsItem, stats.getPopulationVariance()); |
137 | } else if (statsItem.equals(CodecUtil.STATS_TYPE_QUADRATICMEAN)) { | 137 | } else if (statsItem.equals(CodecUtil.STATS_TYPE_QUADRATICMEAN)) { |
138 | - response.put(statsItem, stats.getQuadraticMean()); | 138 | + response.put(statsItem, Math.sqrt(stats.getSumsq()/stats.getN())); |
139 | } else if (statsItem.equals(CodecUtil.STATS_TYPE_KURTOSIS)) { | 139 | } else if (statsItem.equals(CodecUtil.STATS_TYPE_KURTOSIS)) { |
140 | response.put(statsItem, stats.getKurtosis()); | 140 | response.put(statsItem, stats.getKurtosis()); |
141 | } else if (statsItem.equals(CodecUtil.STATS_TYPE_MEDIAN)) { | 141 | } else if (statsItem.equals(CodecUtil.STATS_TYPE_MEDIAN)) { |
@@ -153,7 +153,7 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N | @@ -153,7 +153,7 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N | ||
153 | } | 153 | } |
154 | } | 154 | } |
155 | if (errorNumber > 0) { | 155 | if (errorNumber > 0) { |
156 | - Map<String, Object> errorResponse = new HashMap<String, Object>(); | 156 | + Map<String, Object> errorResponse = new HashMap<>(); |
157 | for (Entry<String, Integer> entry : getErrorList().entrySet()) { | 157 | for (Entry<String, Integer> entry : getErrorList().entrySet()) { |
158 | errorResponse.put(entry.getKey(), entry.getValue()); | 158 | errorResponse.put(entry.getKey(), entry.getValue()); |
159 | } | 159 | } |
@@ -178,31 +178,19 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N | @@ -178,31 +178,19 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N | ||
178 | case CodecUtil.STATS_TYPE_N: | 178 | case CodecUtil.STATS_TYPE_N: |
179 | return 0; | 179 | return 0; |
180 | case CodecUtil.STATS_TYPE_SUM: | 180 | case CodecUtil.STATS_TYPE_SUM: |
181 | - return 1; | ||
182 | case CodecUtil.STATS_TYPE_MAX: | 181 | case CodecUtil.STATS_TYPE_MAX: |
183 | - return 1; | ||
184 | case CodecUtil.STATS_TYPE_MIN: | 182 | case CodecUtil.STATS_TYPE_MIN: |
185 | - return 1; | ||
186 | case CodecUtil.STATS_TYPE_SUMSQ: | 183 | case CodecUtil.STATS_TYPE_SUMSQ: |
187 | return 1; | 184 | return 1; |
188 | case CodecUtil.STATS_TYPE_SUMOFLOGS: | 185 | case CodecUtil.STATS_TYPE_SUMOFLOGS: |
189 | - return 2; | ||
190 | case CodecUtil.STATS_TYPE_MEAN: | 186 | case CodecUtil.STATS_TYPE_MEAN: |
191 | - return 2; | ||
192 | case CodecUtil.STATS_TYPE_GEOMETRICMEAN: | 187 | case CodecUtil.STATS_TYPE_GEOMETRICMEAN: |
193 | - return 2; | ||
194 | case CodecUtil.STATS_TYPE_STANDARDDEVIATION: | 188 | case CodecUtil.STATS_TYPE_STANDARDDEVIATION: |
195 | - return 2; | ||
196 | case CodecUtil.STATS_TYPE_VARIANCE: | 189 | case CodecUtil.STATS_TYPE_VARIANCE: |
197 | - return 2; | ||
198 | case CodecUtil.STATS_TYPE_POPULATIONVARIANCE: | 190 | case CodecUtil.STATS_TYPE_POPULATIONVARIANCE: |
199 | - return 2; | ||
200 | case CodecUtil.STATS_TYPE_QUADRATICMEAN: | 191 | case CodecUtil.STATS_TYPE_QUADRATICMEAN: |
201 | - return 2; | ||
202 | case CodecUtil.STATS_TYPE_KURTOSIS: | 192 | case CodecUtil.STATS_TYPE_KURTOSIS: |
203 | - return 2; | ||
204 | case CodecUtil.STATS_TYPE_MEDIAN: | 193 | case CodecUtil.STATS_TYPE_MEDIAN: |
205 | - return 2; | ||
206 | case CodecUtil.STATS_TYPE_SKEWNESS: | 194 | case CodecUtil.STATS_TYPE_SKEWNESS: |
207 | return 2; | 195 | return 2; |
208 | default: | 196 | default: |
@@ -219,7 +207,7 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N | @@ -219,7 +207,7 @@ abstract class MtasDataItemFull<T1 extends Number & Comparable<T1>, T2 extends N | ||
219 | createStats(); | 207 | createStats(); |
220 | switch (sortType) { | 208 | switch (sortType) { |
221 | case CodecUtil.STATS_TYPE_N: | 209 | case CodecUtil.STATS_TYPE_N: |
222 | - return new MtasDataItemNumberComparator<Long>(stats.getN(), | 210 | + return new MtasDataItemNumberComparator<>(stats.getN(), |
223 | sortDirection); | 211 | sortDirection); |
224 | default: | 212 | default: |
225 | return null; | 213 | return null; |
src/mtas/codec/util/collector/MtasDataItemLongFull.java
@@ -210,7 +210,7 @@ class MtasDataItemLongFull extends MtasDataItemFull<Long, Double> { | @@ -210,7 +210,7 @@ class MtasDataItemLongFull extends MtasDataItemFull<Long, Double> { | ||
210 | return new MtasDataItemNumberComparator<>(stats.getPopulationVariance(), | 210 | return new MtasDataItemNumberComparator<>(stats.getPopulationVariance(), |
211 | sortDirection); | 211 | sortDirection); |
212 | case CodecUtil.STATS_TYPE_QUADRATICMEAN: | 212 | case CodecUtil.STATS_TYPE_QUADRATICMEAN: |
213 | - return new MtasDataItemNumberComparator<>(stats.getQuadraticMean(), | 213 | + return new MtasDataItemNumberComparator<>(Math.sqrt(stats.getSumsq()/stats.getN()), |
214 | sortDirection); | 214 | sortDirection); |
215 | case CodecUtil.STATS_TYPE_KURTOSIS: | 215 | case CodecUtil.STATS_TYPE_KURTOSIS: |
216 | return new MtasDataItemNumberComparator<>(stats.getKurtosis(), | 216 | return new MtasDataItemNumberComparator<>(stats.getKurtosis(), |
src/mtas/solr/handler/component/util/MtasSolrResultUtil.java
@@ -93,7 +93,7 @@ public class MtasSolrResultUtil { | @@ -93,7 +93,7 @@ public class MtasSolrResultUtil { | ||
93 | private static void rewrite(NamedList<Object> nl, boolean doCollapse) | 93 | private static void rewrite(NamedList<Object> nl, boolean doCollapse) |
94 | throws IOException { | 94 | throws IOException { |
95 | boolean showDebugInfo = false; | 95 | boolean showDebugInfo = false; |
96 | - HashMap<String, NamedList<Object>> collapseNamedList = new HashMap<String, NamedList<Object>>(); | 96 | + HashMap<String, NamedList<Object>> collapseNamedList = new HashMap<>(); |
97 | int length = nl.size(); | 97 | int length = nl.size(); |
98 | for (int i = 0; i < length; i++) { | 98 | for (int i = 0; i < length; i++) { |
99 | if (nl.getVal(i) instanceof NamedList) { | 99 | if (nl.getVal(i) instanceof NamedList) { |
@@ -172,7 +172,7 @@ public class MtasSolrResultUtil { | @@ -172,7 +172,7 @@ public class MtasSolrResultUtil { | ||
172 | */ | 172 | */ |
173 | private static ArrayList<NamedList<Object>> rewriteToArray( | 173 | private static ArrayList<NamedList<Object>> rewriteToArray( |
174 | NamedList<Object> nnl) { | 174 | NamedList<Object> nnl) { |
175 | - ArrayList<NamedList<Object>> al = new ArrayList<NamedList<Object>>(); | 175 | + ArrayList<NamedList<Object>> al = new ArrayList<>(); |
176 | String key; | 176 | String key; |
177 | Iterator<Entry<String, Object>> it = nnl.iterator(); | 177 | Iterator<Entry<String, Object>> it = nnl.iterator(); |
178 | while (it.hasNext()) { | 178 | while (it.hasNext()) { |
@@ -384,7 +384,7 @@ public class MtasSolrResultUtil { | @@ -384,7 +384,7 @@ public class MtasSolrResultUtil { | ||
384 | "unequal size " + nameNew + " and " + nameOriginal); | 384 | "unequal size " + nameNew + " and " + nameOriginal); |
385 | } | 385 | } |
386 | if (unique) { | 386 | if (unique) { |
387 | - Set<String> set = new HashSet<String>(); | 387 | + Set<String> set = new HashSet<>(); |
388 | for (int i = 0; i < list.length; i++) { | 388 | for (int i = 0; i < list.length; i++) { |
389 | set.add(list[i]); | 389 | set.add(list[i]); |
390 | } | 390 | } |
src/site/markdown/indexing_formats.md
@@ -5,6 +5,7 @@ To configure the mapping from resources to the index structure, several parsers | @@ -5,6 +5,7 @@ To configure the mapping from resources to the index structure, several parsers | ||
5 | * [MtasFoliaParser](indexing_formats_folia.html) : mapping [FoLiA](https://proycon.github.io/folia/) resources | 5 | * [MtasFoliaParser](indexing_formats_folia.html) : mapping [FoLiA](https://proycon.github.io/folia/) resources |
6 | * [MtasTEIParser](indexing_formats_tei.html): mapping [ISO-TEI](http://www.tei-c.org/) resources | 6 | * [MtasTEIParser](indexing_formats_tei.html): mapping [ISO-TEI](http://www.tei-c.org/) resources |
7 | * [MtasSketchParser](indexing_formats_sketch.html): mapping [Sketch Engine](https://www.sketchengine.co.uk/word-sketch-index-format/) resources | 7 | * [MtasSketchParser](indexing_formats_sketch.html): mapping [Sketch Engine](https://www.sketchengine.co.uk/word-sketch-index-format/) resources |
8 | +* [MtasChatParser](indexing_formats_chat.html): mapping [CHAT transcription format](http://talkbank.org/manuals/CHAT.pdf) resources converted to [XML](http://talkbank.org/software/xsddoc/) | ||
8 | * [MtasCRMParser](indexing_formats_crm.html): mapping resources with format Corpus Van Reenen-Mulder/Adelheid | 9 | * [MtasCRMParser](indexing_formats_crm.html): mapping resources with format Corpus Van Reenen-Mulder/Adelheid |
9 | 10 | ||
10 | For XML-based formats, these parsers often just slightly extend the abstract MtasXMLParser by defining the correct namespaces and root tags. | 11 | For XML-based formats, these parsers often just slightly extend the abstract MtasXMLParser by defining the correct namespaces and root tags. |
src/site/markdown/indexing_formats_chat.md
0 → 100644
1 | +#CHAT | ||
2 | + | ||
3 | +For indexing [CHAT transcription format](http://talkbank.org/manuals/CHAT.pdf) resources converted to [XML](http://talkbank.org/software/xsddoc/), the *mtas.analysis.parser.MtasChatParser* extending the abstract *MtasXMLParser* is available; full examples of configuration files are provided on [GitHub](https://github.com/meertensinstituut/mtas/tree/master/conf/parser/mtas). | ||
4 | + | ||
5 | +```xml | ||
6 | +<!-- START CONFIGURATION MTAS PARSER --> | ||
7 | +<parser name="mtas.analysis.parser.MtasChatParser"> | ||
8 | +... | ||
9 | + <!-- START MAPPINGS --> | ||
10 | + <mappings> | ||
11 | + ... | ||
12 | + </mapping> | ||
13 | + <!-- END MAPPINGS ---> | ||
14 | + ... | ||
15 | +</parser> | ||
16 | +<!-- END CONFIGURATION MTAS PARSER --> | ||
17 | +``` | ||
18 | + | ||
19 | +The syntax of the parser part in the [configuration file](indexing_configuration.html#configuration) is, besides from the *name* attribute, almost identical to the configuration of the [FoLiA-parser](indexing_formats_folia.html) and [TEI-parser](indexing_formats_tei.html). | ||
20 | + |
src/site/site.xml
@@ -33,6 +33,7 @@ | @@ -33,6 +33,7 @@ | ||
33 | <item name="FoLiA" href="indexing_formats_folia.html" collapse="true"/> | 33 | <item name="FoLiA" href="indexing_formats_folia.html" collapse="true"/> |
34 | <item name="TEI" href="indexing_formats_tei.html" collapse="true"/> | 34 | <item name="TEI" href="indexing_formats_tei.html" collapse="true"/> |
35 | <item name="Sketch" href="indexing_formats_sketch.html" collapse="true"/> | 35 | <item name="Sketch" href="indexing_formats_sketch.html" collapse="true"/> |
36 | + <item name="CHAT" href="indexing_formats_chat.html" collapse="true"/> | ||
36 | <item name="CRM" href="indexing_formats_crm.html" collapse="true"/> | 37 | <item name="CRM" href="indexing_formats_crm.html" collapse="true"/> |
37 | </item> | 38 | </item> |
38 | </item> | 39 | </item> |