CodecUtil.java.html
17.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" lang="en"><head><meta http-equiv="Content-Type" content="text/html;charset=UTF-8"/><link rel="stylesheet" href="../jacoco-resources/report.css" type="text/css"/><link rel="shortcut icon" href="../jacoco-resources/report.gif" type="image/gif"/><title>CodecUtil.java</title><link rel="stylesheet" href="../jacoco-resources/prettify.css" type="text/css"/><script type="text/javascript" src="../jacoco-resources/prettify.js"></script></head><body onload="window['PR_TAB_WIDTH']=4;prettyPrint()"><div class="breadcrumb" id="breadcrumb"><span class="info"><a href="../jacoco-sessions.html" class="el_session">Sessions</a></span><a href="../index.html" class="el_report">MTAS</a> > <a href="index.source.html" class="el_package">mtas.codec.util</a> > <span class="el_source">CodecUtil.java</span></div><h1>CodecUtil.java</h1><pre class="source lang-java linenums">package mtas.codec.util;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import mtas.analysis.token.MtasToken;
import mtas.codec.MtasCodecPostingsFormat;
import mtas.parser.function.util.MtasFunctionParserFunction;
import mtas.search.spans.util.MtasSpanQuery;
import mtas.codec.util.CodecComponent.ComponentField;
import mtas.codec.util.CodecComponent.ComponentJoin;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.spans.SpanWeight;
/**
* The Class CodecUtil.
*/
public class CodecUtil {
/** The Constant STATS_TYPE_GEOMETRICMEAN. */
public static final String STATS_TYPE_GEOMETRICMEAN = "geometricmean";
/** The Constant STATS_TYPE_KURTOSIS. */
public static final String STATS_TYPE_KURTOSIS = "kurtosis";
/** The Constant STATS_TYPE_MAX. */
public static final String STATS_TYPE_MAX = "max";
/** The Constant STATS_TYPE_MEAN. */
public static final String STATS_TYPE_MEAN = "mean";
/** The Constant STATS_TYPE_MIN. */
public static final String STATS_TYPE_MIN = "min";
/** The Constant STATS_TYPE_N. */
public static final String STATS_TYPE_N = "n";
/** The Constant STATS_TYPE_MEDIAN. */
public static final String STATS_TYPE_MEDIAN = "median";
/** The Constant STATS_TYPE_POPULATIONVARIANCE. */
public static final String STATS_TYPE_POPULATIONVARIANCE = "populationvariance";
/** The Constant STATS_TYPE_QUADRATICMEAN. */
public static final String STATS_TYPE_QUADRATICMEAN = "quadraticmean";
/** The Constant STATS_TYPE_SKEWNESS. */
public static final String STATS_TYPE_SKEWNESS = "skewness";
/** The Constant STATS_TYPE_STANDARDDEVIATION. */
public static final String STATS_TYPE_STANDARDDEVIATION = "standarddeviation";
/** The Constant STATS_TYPE_SUM. */
public static final String STATS_TYPE_SUM = "sum";
/** The Constant STATS_TYPE_SUMSQ. */
public static final String STATS_TYPE_SUMSQ = "sumsq";
/** The Constant STATS_TYPE_SUMOFLOGS. */
public static final String STATS_TYPE_SUMOFLOGS = "sumoflogs";
/** The Constant STATS_TYPE_VARIANCE. */
public static final String STATS_TYPE_VARIANCE = "variance";
/** The Constant STATS_TYPE_ALL. */
public static final String STATS_TYPE_ALL = "all";
/** The Constant STATS_FUNCTION_DISTRIBUTION. */
public static final String STATS_FUNCTION_DISTRIBUTION = "distribution";
/** The Constant SORT_TERM. */
public static final String SORT_TERM = "term";
/** The Constant SORT_ASC. */
public static final String SORT_ASC = "asc";
/** The Constant SORT_DESC. */
public static final String SORT_DESC = "desc";
/** The Constant STATS_FUNCTIONS. */
<span class="fc" id="L92"> private static final List<String> STATS_FUNCTIONS = Arrays</span>
<span class="fc" id="L93"> .asList(STATS_FUNCTION_DISTRIBUTION);</span>
/** The Constant STATS_TYPES. */
<span class="fc" id="L96"> private static final List<String> STATS_TYPES = Arrays.asList(</span>
STATS_TYPE_GEOMETRICMEAN, STATS_TYPE_KURTOSIS, STATS_TYPE_MAX,
STATS_TYPE_MEAN, STATS_TYPE_MIN, STATS_TYPE_N, STATS_TYPE_MEDIAN,
STATS_TYPE_POPULATIONVARIANCE, STATS_TYPE_QUADRATICMEAN,
STATS_TYPE_SKEWNESS, STATS_TYPE_STANDARDDEVIATION, STATS_TYPE_SUM,
STATS_TYPE_SUMSQ, STATS_TYPE_SUMOFLOGS, STATS_TYPE_VARIANCE);
/** The Constant STATS_BASIC_TYPES. */
<span class="fc" id="L104"> private static final List<String> STATS_BASIC_TYPES = Arrays</span>
<span class="fc" id="L105"> .asList(STATS_TYPE_N, STATS_TYPE_SUM, STATS_TYPE_MEAN);</span>
/** The Constant STATS_ADVANCED_TYPES. */
<span class="fc" id="L108"> private static final List<String> STATS_ADVANCED_TYPES = Arrays.asList(</span>
STATS_TYPE_MAX, STATS_TYPE_MIN, STATS_TYPE_SUMSQ, STATS_TYPE_SUMOFLOGS,
STATS_TYPE_GEOMETRICMEAN, STATS_TYPE_STANDARDDEVIATION,
STATS_TYPE_VARIANCE, STATS_TYPE_POPULATIONVARIANCE,
STATS_TYPE_QUADRATICMEAN);
/** The Constant STATS_FULL_TYPES. */
<span class="fc" id="L115"> private static final List<String> STATS_FULL_TYPES = Arrays</span>
<span class="fc" id="L116"> .asList(STATS_TYPE_KURTOSIS, STATS_TYPE_MEDIAN, STATS_TYPE_SKEWNESS);</span>
/** The Constant STATS_BASIC. */
public static final String STATS_BASIC = "basic";
/** The Constant STATS_ADVANCED. */
public static final String STATS_ADVANCED = "advanced";
/** The Constant STATS_FULL. */
public static final String STATS_FULL = "full";
/** The Constant DATA_TYPE_LONG. */
public static final String DATA_TYPE_LONG = "long";
/** The Constant DATA_TYPE_DOUBLE. */
public static final String DATA_TYPE_DOUBLE = "double";
/** The fp stats items. */
<span class="fc" id="L134"> private static Pattern fpStatsItems = Pattern</span>
<span class="fc" id="L135"> .compile("(([^\\(,]+)(\\([^\\)]*\\))?)");</span>
/** The fp stats function items. */
<span class="fc" id="L138"> private static Pattern fpStatsFunctionItems = Pattern</span>
<span class="fc" id="L139"> .compile("(([^\\(,]+)(\\(([^\\)]*)\\)))");</span>
/**
* Instantiates a new codec util.
*/
<span class="nc" id="L144"> private CodecUtil() {</span>
//don't do anything
<span class="nc" id="L146"> }</span>
/**
* Checks if is single position prefix.
*
* @param fieldInfo the field info
* @param prefix the prefix
* @return true, if is single position prefix
* @throws IOException Signals that an I/O exception has occurred.
*/
public static boolean isSinglePositionPrefix(FieldInfo fieldInfo,
String prefix) throws IOException {
<span class="pc bpc" id="L158" title="1 of 2 branches missed."> if (fieldInfo == null) {</span>
<span class="nc" id="L159"> throw new IOException("no fieldInfo");</span>
} else {
<span class="fc" id="L161"> String info = fieldInfo.getAttribute(</span>
MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION);
<span class="pc bpc" id="L163" title="1 of 2 branches missed."> if (info == null) {</span>
<span class="nc" id="L164"> throw new IOException("no "</span>
+ MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION);
} else {
<span class="fc" id="L167"> return Arrays.asList(info.split(Pattern.quote(MtasToken.DELIMITER)))</span>
<span class="fc" id="L168"> .contains(prefix);</span>
}
}
}
/**
* Term value.
*
* @param term the term
* @return the string
*/
public static String termValue(String term) {
<span class="nc" id="L180"> int i = term.indexOf(MtasToken.DELIMITER);</span>
<span class="nc" id="L181"> String value = null;</span>
<span class="nc bnc" id="L182" title="All 2 branches missed."> if (i >= 0) {</span>
<span class="nc" id="L183"> value = term.substring((i + MtasToken.DELIMITER.length()));</span>
<span class="nc bnc" id="L184" title="All 2 branches missed."> value = (value.length() > 0) ? value : null;</span>
}
<span class="nc bnc" id="L186" title="All 2 branches missed."> return (value == null) ? null : value.replace("\u0000", "");</span>
}
/**
* Term prefix.
*
* @param term the term
* @return the string
*/
public static String termPrefix(String term) {
<span class="fc" id="L196"> int i = term.indexOf(MtasToken.DELIMITER);</span>
<span class="fc" id="L197"> String prefix = term;</span>
<span class="pc bpc" id="L198" title="1 of 2 branches missed."> if (i >= 0) {</span>
<span class="fc" id="L199"> prefix = term.substring(0, i);</span>
}
<span class="fc" id="L201"> return prefix.replace("\u0000", "");</span>
}
/**
* Term prefix value.
*
* @param term the term
* @return the string
*/
public static String termPrefixValue(String term) {
<span class="nc bnc" id="L211" title="All 2 branches missed."> return (term == null) ? null : term.replace("\u0000", "");</span>
}
/**
* Collect field.
*
* @param field the field
* @param searcher the searcher
* @param rawReader the raw reader
* @param fullDocList the full doc list
* @param fullDocSet the full doc set
* @param fieldStats the field stats
* @throws IllegalAccessException the illegal access exception
* @throws IllegalArgumentException the illegal argument exception
* @throws InvocationTargetException the invocation target exception
* @throws IOException Signals that an I/O exception has occurred.
*/
public static void collectField(String field, IndexSearcher searcher,
IndexReader rawReader, ArrayList<Integer> fullDocList,
ArrayList<Integer> fullDocSet, ComponentField fieldStats)
throws IllegalAccessException, IllegalArgumentException,
InvocationTargetException, IOException {
<span class="pc bpc" id="L233" title="1 of 2 branches missed."> if (fieldStats != null) {</span>
<span class="fc" id="L234"> IndexReader reader = searcher.getIndexReader();</span>
<span class="fc" id="L235"> HashMap<MtasSpanQuery, SpanWeight> spansQueryWeight = new HashMap<>();</span>
// only if spanQueryList is not empty
<span class="fc bfc" id="L237" title="All 2 branches covered."> if (fieldStats.spanQueryList.size() > 0) {</span>
<span class="fc bfc" id="L238" title="All 2 branches covered."> for (MtasSpanQuery sq : fieldStats.spanQueryList) {</span>
<span class="fc" id="L239"> spansQueryWeight.put(sq, ((MtasSpanQuery) sq.rewrite(reader))</span>
<span class="fc" id="L240"> .createWeight(searcher, false));</span>
<span class="fc" id="L241"> }</span>
}
// collect
<span class="fc" id="L244"> CodecCollector.collectField(field, searcher, reader, rawReader,</span>
fullDocList, fullDocSet, fieldStats, spansQueryWeight);
}
<span class="fc" id="L247"> }</span>
/**
* Collect join.
*
* @param reader the reader
* @param fullDocSet the full doc set
* @param joinInfo the join info
* @throws IOException Signals that an I/O exception has occurred.
*/
public static void collectJoin(IndexReader reader,
ArrayList<Integer> fullDocSet, ComponentJoin joinInfo)
throws IOException {
<span class="pc bpc" id="L260" title="1 of 2 branches missed."> if (joinInfo != null) {</span>
<span class="nc" id="L261"> CodecCollector.collectJoin(reader, fullDocSet, joinInfo);</span>
}
<span class="fc" id="L263"> }</span>
/**
* Creates the stats items.
*
* @param statsType the stats type
* @return the sorted set
* @throws IOException Signals that an I/O exception has occurred.
*/
static SortedSet<String> createStatsItems(String statsType) throws IOException {
<span class="fc" id="L273"> SortedSet<String> statsItems = new TreeSet<>();</span>
<span class="fc" id="L274"> SortedSet<String> functionItems = new TreeSet<>();</span>
<span class="fc bfc" id="L275" title="All 2 branches covered."> if (statsType != null) {</span>
<span class="fc" id="L276"> Matcher m = fpStatsItems.matcher(statsType.trim());</span>
<span class="fc bfc" id="L277" title="All 2 branches covered."> while (m.find()) {</span>
<span class="fc" id="L278"> String tmpStatsItem = m.group(2).trim();</span>
<span class="fc bfc" id="L279" title="All 2 branches covered."> if (STATS_TYPES.contains(tmpStatsItem)) {</span>
<span class="fc" id="L280"> statsItems.add(tmpStatsItem);</span>
<span class="pc bpc" id="L281" title="1 of 2 branches missed."> } else if (tmpStatsItem.equals(STATS_TYPE_ALL)) {</span>
<span class="fc bfc" id="L282" title="All 2 branches covered."> for (String type : STATS_TYPES) {</span>
<span class="fc" id="L283"> statsItems.add(type);</span>
<span class="fc" id="L284"> }</span>
<span class="nc bnc" id="L285" title="All 2 branches missed."> } else if (STATS_FUNCTIONS.contains(tmpStatsItem)) {</span>
<span class="nc bnc" id="L286" title="All 2 branches missed."> if (m.group(3) == null) {</span>
<span class="nc" id="L287"> throw new IOException("'" + tmpStatsItem + "' should be called as '"</span>
+ tmpStatsItem + "()' with an optional argument");
} else {
<span class="nc" id="L290"> functionItems.add(m.group(1).trim());</span>
}
} else {
<span class="nc" id="L293"> throw new IOException("unknown statsType '" + tmpStatsItem + "'");</span>
}
<span class="fc" id="L295"> }</span>
}
<span class="pc bpc" id="L297" title="1 of 4 branches missed."> if (statsItems.size() == 0 && functionItems.size() == 0) {</span>
<span class="fc" id="L298"> statsItems.add(STATS_TYPE_SUM);</span>
<span class="fc" id="L299"> statsItems.add(STATS_TYPE_N);</span>
<span class="fc" id="L300"> statsItems.add(STATS_TYPE_MEAN);</span>
}
<span class="pc bpc" id="L302" title="1 of 2 branches missed."> if (functionItems.size() > 0) {</span>
<span class="nc" id="L303"> statsItems.addAll(functionItems);</span>
}
<span class="fc" id="L305"> return statsItems;</span>
}
/**
* Creates the stats type.
*
* @param statsItems the stats items
* @param sortType the sort type
* @param functionParser the function parser
* @return the string
*/
static String createStatsType(Set<String> statsItems, String sortType,
MtasFunctionParserFunction functionParser) {
<span class="fc" id="L318"> String statsType = STATS_BASIC;</span>
<span class="fc bfc" id="L319" title="All 2 branches covered."> for (String statsItem : statsItems) {</span>
<span class="fc bfc" id="L320" title="All 2 branches covered."> if (STATS_FULL_TYPES.contains(statsItem)) {</span>
<span class="fc" id="L321"> statsType = STATS_FULL;</span>
<span class="fc" id="L322"> break;</span>
<span class="fc bfc" id="L323" title="All 2 branches covered."> } else if (STATS_ADVANCED_TYPES.contains(statsItem)) {</span>
<span class="fc" id="L324"> statsType = STATS_ADVANCED;</span>
<span class="fc bfc" id="L325" title="All 2 branches covered."> } else if (statsType != STATS_ADVANCED</span>
<span class="pc bpc" id="L326" title="1 of 2 branches missed."> && STATS_BASIC_TYPES.contains(statsItem)) {</span>
<span class="fc" id="L327"> statsType = STATS_BASIC;</span>
} else {
<span class="fc" id="L329"> Matcher m = fpStatsFunctionItems.matcher(statsItem.trim());</span>
<span class="pc bpc" id="L330" title="1 of 2 branches missed."> if (m.find()) {</span>
<span class="nc bnc" id="L331" title="All 2 branches missed."> if (STATS_FUNCTIONS.contains(m.group(2).trim())) {</span>
<span class="nc" id="L332"> statsType = STATS_FULL;</span>
<span class="nc" id="L333"> break;</span>
}
}
}
<span class="fc" id="L337"> }</span>
<span class="fc bfc" id="L338" title="All 4 branches covered."> if (sortType != null && STATS_TYPES.contains(sortType)) {</span>
<span class="pc bpc" id="L339" title="1 of 2 branches missed."> if (STATS_FULL_TYPES.contains(sortType)) {</span>
<span class="nc" id="L340"> statsType = STATS_FULL;</span>
<span class="pc bpc" id="L341" title="1 of 2 branches missed."> } else if (STATS_ADVANCED_TYPES.contains(sortType)) {</span>
<span class="nc bnc" id="L342" title="All 4 branches missed."> statsType = (statsType == null || statsType != STATS_FULL)</span>
? STATS_ADVANCED : statsType;
}
}
<span class="fc" id="L346"> return statsType;</span>
}
/**
* Checks if is stats type.
*
* @param type the type
* @return true, if is stats type
*/
public static boolean isStatsType(String type) {
<span class="fc" id="L356"> return STATS_TYPES.contains(type);</span>
}
}
</pre><div class="footer"><span class="right">Created with <a href="http://www.jacoco.org/jacoco">JaCoCo</a> 0.7.9.201702052155</span></div></body></html>