Commit 2d2953f8b0526be6490970ce829e5fbfa953c935

Authored by Matthijs Brouwer
1 parent 3bedd0f4

include distances for termvector

src/main/java/mtas/codec/util/CodecCollector.java
... ... @@ -43,6 +43,7 @@ import mtas.codec.util.CodecComponent.KwicToken;
43 43 import mtas.codec.util.CodecComponent.ListHit;
44 44 import mtas.codec.util.CodecComponent.ListToken;
45 45 import mtas.codec.util.CodecComponent.Match;
  46 +import mtas.codec.util.CodecComponent.SubComponentDistance;
46 47 import mtas.codec.util.CodecComponent.SubComponentFunction;
47 48 import mtas.codec.util.CodecInfo.IndexDoc;
48 49 import mtas.codec.util.CodecSearchTree.MtasTreeHit;
... ... @@ -87,21 +88,18 @@ import org.apache.lucene.util.automaton.RegExp;
87 88 import org.apache.solr.legacy.LegacyNumericUtils;
88 89 import org.apache.solr.schema.NumberType;
89 90  
90   -import com.sun.tools.javac.code.Attribute.Array;
91   -
92 91 /**
93 92 * The Class CodecCollector.
94 93 */
95 94 public class CodecCollector {
96 95  
97 96 /** The Constant log. */
98   - private static final Log log = LogFactory.getLog(CodecCollector.class);
  97 + private static final Log log = LogFactory.getLog(CodecCollector.class);
99 98  
100 99 /**
101 100 * Instantiates a new codec collector.
102 101 */
103   - private CodecCollector() {
104   - // don't do anything
  102 + public CodecCollector() {
105 103 }
106 104  
107 105 /**
... ... @@ -3007,7 +3005,6 @@ public class CodecCollector {
3007 3005 ignoreByteRunAutomatonList.add(new ByteRunAutomaton(automaton));
3008 3006 }
3009 3007 }
3010   -
3011 3008 for (CompiledAutomaton compiledAutomaton : listAutomata) {
3012 3009 if (!compiledAutomaton.type
3013 3010 .equals(CompiledAutomaton.AUTOMATON_TYPE.NORMAL)) {
... ... @@ -3042,7 +3039,7 @@ public class CodecCollector {
3042 3039 String key;
3043 3040 // loop over terms
3044 3041 while ((term = termsEnum.next()) != null) {
3045   - if (validateTermWithStartValue(term, termVector)) {
  3042 + if (validateTermWithStartValue(term, termVector) && validateTermWithDistance(term, termVector)) {
3046 3043 termDocId = -1;
3047 3044 acceptedTerm = true;
3048 3045 if (ignoreByteRunAutomatonList != null) {
... ... @@ -3266,7 +3263,7 @@ public class CodecCollector {
3266 3263 // loop over terms
3267 3264 boolean acceptedTerm;
3268 3265 while ((term = termsEnum.next()) != null) {
3269   - if (validateTermWithStartValue(term, termVector)) {
  3266 + if (validateTermWithStartValue(term, termVector) && validateTermWithDistance(term, termVector)) {
3270 3267 termDocId = -1;
3271 3268 acceptedTerm = true;
3272 3269 if (ignoreByteRunAutomatonList != null) {
... ... @@ -3328,7 +3325,7 @@ public class CodecCollector {
3328 3325 if (computeFullList.size() > 0) {
3329 3326 termsEnum = t.intersect(compiledAutomaton, null);
3330 3327 while ((term = termsEnum.next()) != null) {
3331   - if (validateTermWithStartValue(term, termVector)) {
  3328 + if (validateTermWithStartValue(term, termVector) && validateTermWithDistance(term, termVector)) {
3332 3329 termDocId = -1;
3333 3330 mutableKey[0] = null;
3334 3331 // only if (probably) needed
... ... @@ -3522,6 +3519,24 @@ public class CodecCollector {
3522 3519 return false;
3523 3520 }
3524 3521  
  3522 + private static boolean validateTermWithDistance(BytesRef term,
  3523 + ComponentTermVector termVector) throws IOException {
  3524 + if(termVector.distances==null || termVector.distances.isEmpty()) {
  3525 + return true;
  3526 + } else {
  3527 + for(SubComponentDistance item : termVector.distances) {
  3528 + if(item.maximum==null) {
  3529 + continue;
  3530 + } else {
  3531 + if(!item.getDistance().validate(term)) {
  3532 + return false;
  3533 + }
  3534 + }
  3535 + }
  3536 + return true;
  3537 + }
  3538 + }
  3539 +
3525 3540 /**
3526 3541 * Need second round termvector.
3527 3542 *
... ...
src/main/java/mtas/codec/util/CodecComponent.java
... ... @@ -4,6 +4,7 @@ import java.io.BufferedReader;
4 4 import java.io.IOException;
5 5 import java.io.InputStream;
6 6 import java.io.InputStreamReader;
  7 +import java.io.Serializable;
7 8 import java.io.StringReader;
8 9 import java.io.UnsupportedEncodingException;
9 10 import java.net.HttpURLConnection;
... ... @@ -30,6 +31,9 @@ import mtas.analysis.token.MtasToken;
30 31 import mtas.analysis.token.MtasTokenString;
31 32 import mtas.codec.util.CodecSearchTree.MtasTreeHit;
32 33 import mtas.codec.util.collector.MtasDataCollector;
  34 +import mtas.codec.util.distance.LevenshteinDistance;
  35 +import mtas.codec.util.distance.DamerauLevenshteinDistance;
  36 +import mtas.codec.util.distance.Distance;
33 37 import mtas.parser.function.MtasFunctionParser;
34 38 import mtas.parser.function.ParseException;
35 39 import mtas.parser.function.util.MtasFunctionParserFunction;
... ... @@ -175,7 +179,8 @@ public class CodecComponent {
175 179 /**
176 180 * Instantiates a new component field.
177 181 *
178   - * @param uniqueKeyField the unique key field
  182 + * @param uniqueKeyField
  183 + * the unique key field
179 184 */
180 185 public ComponentField(String uniqueKeyField) {
181 186 this.uniqueKeyField = uniqueKeyField;
... ... @@ -217,7 +222,8 @@ public class CodecComponent {
217 222 /**
218 223 * Instantiates a new component prefix.
219 224 *
220   - * @param key the key
  225 + * @param key
  226 + * the key
221 227 */
222 228 public ComponentPrefix(String key) {
223 229 this.key = key;
... ... @@ -230,7 +236,8 @@ public class CodecComponent {
230 236 /**
231 237 * Adds the single position.
232 238 *
233   - * @param prefix the prefix
  239 + * @param prefix
  240 + * the prefix
234 241 */
235 242 public void addSinglePosition(String prefix) {
236 243 if (!prefix.trim().isEmpty() && !singlePositionList.contains(prefix)
... ... @@ -242,7 +249,8 @@ public class CodecComponent {
242 249 /**
243 250 * Adds the multiple position.
244 251 *
245   - * @param prefix the prefix
  252 + * @param prefix
  253 + * the prefix
246 254 */
247 255 public void addMultiplePosition(String prefix) {
248 256 if (!prefix.trim().isEmpty()) {
... ... @@ -260,7 +268,8 @@ public class CodecComponent {
260 268 /**
261 269 * Adds the set position.
262 270 *
263   - * @param prefix the prefix
  271 + * @param prefix
  272 + * the prefix
264 273 */
265 274 public void addSetPosition(String prefix) {
266 275 if (!prefix.trim().isEmpty()) {
... ... @@ -278,7 +287,8 @@ public class CodecComponent {
278 287 /**
279 288 * Adds the intersecting.
280 289 *
281   - * @param prefix the prefix
  290 + * @param prefix
  291 + * the prefix
282 292 */
283 293 public void addIntersecting(String prefix) {
284 294 if (!prefix.trim().isEmpty()) {
... ... @@ -347,19 +357,32 @@ public class CodecComponent {
347 357 /**
348 358 * Instantiates a new component document.
349 359 *
350   - * @param key the key
351   - * @param prefix the prefix
352   - * @param statsType the stats type
353   - * @param regexp the regexp
354   - * @param list the list
355   - * @param listNumber the list number
356   - * @param listRegexp the list regexp
357   - * @param listExpand the list expand
358   - * @param listExpandNumber the list expand number
359   - * @param ignoreRegexp the ignore regexp
360   - * @param ignoreList the ignore list
361   - * @param ignoreListRegexp the ignore list regexp
362   - * @throws IOException Signals that an I/O exception has occurred.
  360 + * @param key
  361 + * the key
  362 + * @param prefix
  363 + * the prefix
  364 + * @param statsType
  365 + * the stats type
  366 + * @param regexp
  367 + * the regexp
  368 + * @param list
  369 + * the list
  370 + * @param listNumber
  371 + * the list number
  372 + * @param listRegexp
  373 + * the list regexp
  374 + * @param listExpand
  375 + * the list expand
  376 + * @param listExpandNumber
  377 + * the list expand number
  378 + * @param ignoreRegexp
  379 + * the ignore regexp
  380 + * @param ignoreList
  381 + * the ignore list
  382 + * @param ignoreListRegexp
  383 + * the ignore list regexp
  384 + * @throws IOException
  385 + * Signals that an I/O exception has occurred.
363 386 */
364 387 public ComponentDocument(String key, String prefix, String statsType,
365 388 String regexp, String[] list, int listNumber, Boolean listRegexp,
... ... @@ -463,15 +486,24 @@ public class CodecComponent {
463 486 /**
464 487 * Instantiates a new component kwic.
465 488 *
466   - * @param query the query
467   - * @param key the key
468   - * @param prefixes the prefixes
469   - * @param number the number
470   - * @param start the start
471   - * @param left the left
472   - * @param right the right
473   - * @param output the output
474   - * @throws IOException Signals that an I/O exception has occurred.
  489 + * @param query
  490 + * the query
  491 + * @param key
  492 + * the key
  493 + * @param prefixes
  494 + * the prefixes
  495 + * @param number
  496 + * the number
  497 + * @param start
  498 + * the start
  499 + * @param left
  500 + * the left
  501 + * @param right
  502 + * the right
  503 + * @param output
  504 + * the output
  505 + * @throws IOException
  506 + * Signals that an I/O exception has occurred.
475 507 */
476 508 public ComponentKwic(MtasSpanQuery query, String key, String prefixes,
477 509 Integer number, int start, int left, int right, String output)
... ... @@ -597,22 +629,38 @@ public class CodecComponent {
597 629 /**
598 630 * Instantiates a new component list.
599 631 *
600   - * @param spanQuery the span query
601   - * @param field the field
602   - * @param queryValue the query value
603   - * @param queryType the query type
604   - * @param queryPrefix the query prefix
605   - * @param queryVariables the query variables
606   - * @param queryIgnore the query ignore
607   - * @param queryMaximumIgnoreLength the query maximum ignore length
608   - * @param key the key
609   - * @param prefix the prefix
610   - * @param start the start
611   - * @param number the number
612   - * @param left the left
613   - * @param right the right
614   - * @param output the output
615   - * @throws IOException Signals that an I/O exception has occurred.
  632 + * @param spanQuery
  633 + * the span query
  634 + * @param field
  635 + * the field
  636 + * @param queryValue
  637 + * the query value
  638 + * @param queryType
  639 + * the query type
  640 + * @param queryPrefix
  641 + * the query prefix
  642 + * @param queryVariables
  643 + * the query variables
  644 + * @param queryIgnore
  645 + * the query ignore
  646 + * @param queryMaximumIgnoreLength
  647 + * the query maximum ignore length
  648 + * @param key
  649 + * the key
  650 + * @param prefix
  651 + * the prefix
  652 + * @param start
  653 + * the start
  654 + * @param number
  655 + * the number
  656 + * @param left
  657 + * the left
  658 + * @param right
  659 + * the right
  660 + * @param output
  661 + * the output
  662 + * @throws IOException
  663 + * Signals that an I/O exception has occurred.
616 664 */
617 665 public ComponentList(MtasSpanQuery spanQuery, String field,
618 666 String queryValue, String queryType, String queryPrefix,
... ... @@ -727,24 +775,42 @@ public class CodecComponent {
727 775 /**
728 776 * Instantiates a new component group.
729 777 *
730   - * @param spanQuery the span query
731   - * @param key the key
732   - * @param number the number
733   - * @param start the start
734   - * @param groupingHitInsidePrefixes the grouping hit inside prefixes
735   - * @param groupingHitInsideLeftPosition the grouping hit inside left position
736   - * @param groupingHitInsideLeftPrefixes the grouping hit inside left prefixes
737   - * @param groupingHitInsideRightPosition the grouping hit inside right position
738   - * @param groupingHitInsideRightPrefixes the grouping hit inside right prefixes
739   - * @param groupingHitLeftPosition the grouping hit left position
740   - * @param groupingHitLeftPrefixes the grouping hit left prefixes
741   - * @param groupingHitRightPosition the grouping hit right position
742   - * @param groupingHitRightPrefixes the grouping hit right prefixes
743   - * @param groupingLeftPosition the grouping left position
744   - * @param groupingLeftPrefixes the grouping left prefixes
745   - * @param groupingRightPosition the grouping right position
746   - * @param groupingRightPrefixes the grouping right prefixes
747   - * @throws IOException Signals that an I/O exception has occurred.
  778 + * @param spanQuery
  779 + * the span query
  780 + * @param key
  781 + * the key
  782 + * @param number
  783 + * the number
  784 + * @param start
  785 + * the start
  786 + * @param groupingHitInsidePrefixes
  787 + * the grouping hit inside prefixes
  788 + * @param groupingHitInsideLeftPosition
  789 + * the grouping hit inside left position
  790 + * @param groupingHitInsideLeftPrefixes
  791 + * the grouping hit inside left prefixes
  792 + * @param groupingHitInsideRightPosition
  793 + * the grouping hit inside right position
  794 + * @param groupingHitInsideRightPrefixes
  795 + * the grouping hit inside right prefixes
  796 + * @param groupingHitLeftPosition
  797 + * the grouping hit left position
  798 + * @param groupingHitLeftPrefixes
  799 + * the grouping hit left prefixes
  800 + * @param groupingHitRightPosition
  801 + * the grouping hit right position
  802 + * @param groupingHitRightPrefixes
  803 + * the grouping hit right prefixes
  804 + * @param groupingLeftPosition
  805 + * the grouping left position
  806 + * @param groupingLeftPrefixes
  807 + * the grouping left prefixes
  808 + * @param groupingRightPosition
  809 + * the grouping right position
  810 + * @param groupingRightPrefixes
  811 + * the grouping right prefixes
  812 + * @throws IOException
  813 + * Signals that an I/O exception has occurred.
748 814 */
749 815 public ComponentGroup(MtasSpanQuery spanQuery, String key, int number,
750 816 int start, String groupingHitInsidePrefixes,
... ... @@ -804,11 +870,15 @@ public class CodecComponent {
804 870 /**
805 871 * Creates the positioned prefixes.
806 872 *
807   - * @param prefixList the prefix list
808   - * @param position the position
809   - * @param prefixes the prefixes
  873 + * @param prefixList
  874 + * the prefix list
  875 + * @param position
  876 + * the position
  877 + * @param prefixes
  878 + * the prefixes
810 879 * @return the hash set[]
811   - * @throws IOException Signals that an I/O exception has occurred.
  880 + * @throws IOException
  881 + * Signals that an I/O exception has occurred.
812 882 */
813 883 private static HashSet<String>[] createPositionedPrefixes(
814 884 HashSet<String> prefixList, String[] position, String[] prefixes)
... ... @@ -962,24 +1032,42 @@ public class CodecComponent {
962 1032 /**
963 1033 * Instantiates a new component facet.
964 1034 *
965   - * @param spanQueries the span queries
966   - * @param field the field
967   - * @param key the key
968   - * @param baseFields the base fields
969   - * @param baseFieldTypes the base field types
970   - * @param baseTypes the base types
971   - * @param baseRangeSizes the base range sizes
972   - * @param baseRangeBases the base range bases
973   - * @param baseSortTypes the base sort types
974   - * @param baseSortDirections the base sort directions
975   - * @param baseNumbers the base numbers
976   - * @param baseMinimumDoubles the base minimum doubles
977   - * @param baseMaximumDoubles the base maximum doubles
978   - * @param baseFunctionKeys the base function keys
979   - * @param baseFunctionExpressions the base function expressions
980   - * @param baseFunctionTypes the base function types
981   - * @throws IOException Signals that an I/O exception has occurred.
982   - * @throws ParseException the parse exception
  1035 + * @param spanQueries
  1036 + * the span queries
  1037 + * @param field
  1038 + * the field
  1039 + * @param key
  1040 + * the key
  1041 + * @param baseFields
  1042 + * the base fields
  1043 + * @param baseFieldTypes
  1044 + * the base field types
  1045 + * @param baseTypes
  1046 + * the base types
  1047 + * @param baseRangeSizes
  1048 + * the base range sizes
  1049 + * @param baseRangeBases
  1050 + * the base range bases
  1051 + * @param baseSortTypes
  1052 + * the base sort types
  1053 + * @param baseSortDirections
  1054 + * the base sort directions
  1055 + * @param baseNumbers
  1056 + * the base numbers
  1057 + * @param baseMinimumDoubles
  1058 + * the base minimum doubles
  1059 + * @param baseMaximumDoubles
  1060 + * the base maximum doubles
  1061 + * @param baseFunctionKeys
  1062 + * the base function keys
  1063 + * @param baseFunctionExpressions
  1064 + * the base function expressions
  1065 + * @param baseFunctionTypes
  1066 + * the base function types
  1067 + * @throws IOException
  1068 + * Signals that an I/O exception has occurred.
  1069 + * @throws ParseException
  1070 + * the parse exception
983 1071 */
984 1072 @SuppressWarnings("unchecked")
985 1073 public ComponentFacet(MtasSpanQuery[] spanQueries, String field, String key,
... ... @@ -1191,6 +1279,9 @@ public class CodecComponent {
1191 1279 /** The prefix. */
1192 1280 public String prefix;
1193 1281  
  1282 + /** The distances. */
  1283 + public List<SubComponentDistance> distances;
  1284 +
1194 1285 /** The regexp. */
1195 1286 public String regexp;
1196 1287  
... ... @@ -1239,37 +1330,87 @@ public class CodecComponent {
1239 1330 /**
1240 1331 * Instantiates a new component term vector.
1241 1332 *
1242   - * @param key the key
1243   - * @param prefix the prefix
1244   - * @param regexp the regexp
1245   - * @param full the full
1246   - * @param type the type
1247   - * @param sortType the sort type
1248   - * @param sortDirection the sort direction
1249   - * @param startValue the start value
1250   - * @param number the number
1251   - * @param functionKey the function key
1252   - * @param functionExpression the function expression
1253   - * @param functionType the function type
1254   - * @param boundary the boundary
1255   - * @param list the list
1256   - * @param listRegexp the list regexp
1257   - * @param ignoreRegexp the ignore regexp
1258   - * @param ignoreList the ignore list
1259   - * @param ignoreListRegexp the ignore list regexp
1260   - * @throws IOException Signals that an I/O exception has occurred.
1261   - * @throws ParseException the parse exception
  1333 + * @param key
  1334 + * the key
  1335 + * @param prefix
  1336 + * the prefix
  1337 + * @param distanceKey
  1338 + * the distance key
  1339 + * @param distanceType
  1340 + * the distance type
  1341 + * @param distanceBase
  1342 + * the distance base
  1343 + * @param distanceParameter
  1344 + * the distance parameter
  1345 + * @param distanceMinimum
  1346 + * the distance minimum
  1347 + * @param distanceMaximum
  1348 + * the distance maximum
  1349 + * @param regexp
  1350 + * the regexp
  1351 + * @param full
  1352 + * the full
  1353 + * @param type
  1354 + * the type
  1355 + * @param sortType
  1356 + * the sort type
  1357 + * @param sortDirection
  1358 + * the sort direction
  1359 + * @param startValue
  1360 + * the start value
  1361 + * @param number
  1362 + * the number
  1363 + * @param functionKey
  1364 + * the function key
  1365 + * @param functionExpression
  1366 + * the function expression
  1367 + * @param functionType
  1368 + * the function type
  1369 + * @param boundary
  1370 + * the boundary
  1371 + * @param list
  1372 + * the list
  1373 + * @param listRegexp
  1374 + * the list regexp
  1375 + * @param ignoreRegexp
  1376 + * the ignore regexp
  1377 + * @param ignoreList
  1378 + * the ignore list
  1379 + * @param ignoreListRegexp
  1380 + * the ignore list regexp
  1381 + * @throws IOException
  1382 + * Signals that an I/O exception has occurred.
  1383 + * @throws ParseException
  1384 + * the parse exception
1262 1385 */
1263 1386 @SuppressWarnings({ "unchecked", "rawtypes" })
1264   - public ComponentTermVector(String key, String prefix, String regexp,
1265   - Boolean full, String type, String sortType, String sortDirection,
1266   - String startValue, int number, String[] functionKey,
1267   - String[] functionExpression, String[] functionType, String boundary,
1268   - String[] list, Boolean listRegexp, String ignoreRegexp,
1269   - String[] ignoreList, Boolean ignoreListRegexp)
1270   - throws IOException, ParseException {
  1387 + public ComponentTermVector(String key, String prefix, String[] distanceKey,
  1388 + String[] distanceType, String[] distanceBase,
  1389 + Map[] distanceParameter,
  1390 + String[] distanceMaximum, String regexp, Boolean full, String type,
  1391 + String sortType, String sortDirection, String startValue, int number,
  1392 + String[] functionKey, String[] functionExpression,
  1393 + String[] functionType, String boundary, String[] list,
  1394 + Boolean listRegexp, String ignoreRegexp, String[] ignoreList,
  1395 + Boolean ignoreListRegexp) throws IOException, ParseException {
1271 1396 this.key = key;
1272 1397 this.prefix = prefix;
  1398 + distances = new ArrayList<>();
  1399 + if (distanceKey != null && distanceType != null && distanceBase != null
  1400 + && distanceParameter != null
  1401 + && distanceMaximum != null) {
  1402 + if (distanceKey.length == distanceType.length
  1403 + && distanceKey.length == distanceBase.length
  1404 + && distanceKey.length == distanceParameter.length
  1405 + && distanceKey.length == distanceMaximum.length) {
  1406 + for (int i = 0; i < distanceKey.length; i++) {
  1407 + SubComponentDistance item = new SubComponentDistance(distanceKey[i],
  1408 + distanceType[i], this.prefix, distanceBase[i],
  1409 + distanceParameter[i], distanceMaximum[i]);
  1410 + distances.add(item);
  1411 + }
  1412 + }
  1413 + }
1273 1414 this.regexp = regexp;
1274 1415 this.full = (full != null && full) ? true : false;
1275 1416 if (sortType == null) {
... ... @@ -1465,16 +1606,26 @@ public class CodecComponent {
1465 1606 /**
1466 1607 * Instantiates a new component span.
1467 1608 *
1468   - * @param queries the queries
1469   - * @param key the key
1470   - * @param minimumDouble the minimum double
1471   - * @param maximumDouble the maximum double
1472   - * @param type the type
1473   - * @param functionKey the function key
1474   - * @param functionExpression the function expression
1475   - * @param functionType the function type
1476   - * @throws IOException Signals that an I/O exception has occurred.
1477   - * @throws ParseException the parse exception
  1609 + * @param queries
  1610 + * the queries
  1611 + * @param key
  1612 + * the key
  1613 + * @param minimumDouble
  1614 + * the minimum double
  1615 + * @param maximumDouble
  1616 + * the maximum double
  1617 + * @param type
  1618 + * the type
  1619 + * @param functionKey
  1620 + * the function key
  1621 + * @param functionExpression
  1622 + * the function expression
  1623 + * @param functionType
  1624 + * the function type
  1625 + * @throws IOException
  1626 + * Signals that an I/O exception has occurred.
  1627 + * @throws ParseException
  1628 + * the parse exception
1478 1629 */
1479 1630 public ComponentSpan(MtasSpanQuery[] queries, String key,
1480 1631 Double minimumDouble, Double maximumDouble, String type,
... ... @@ -1607,12 +1758,18 @@ public class CodecComponent {
1607 1758 /**
1608 1759 * Instantiates a new component position.
1609 1760 *
1610   - * @param key the key
1611   - * @param minimumDouble the minimum double
1612   - * @param maximumDouble the maximum double
1613   - * @param statsType the stats type
1614   - * @throws IOException Signals that an I/O exception has occurred.
1615   - * @throws ParseException the parse exception
  1761 + * @param key
  1762 + * the key
  1763 + * @param minimumDouble
  1764 + * the minimum double
  1765 + * @param maximumDouble
  1766 + * the maximum double
  1767 + * @param statsType
  1768 + * the stats type
  1769 + * @throws IOException
  1770 + * Signals that an I/O exception has occurred.
  1771 + * @throws ParseException
  1772 + * the parse exception
1616 1773 */
1617 1774 public ComponentPosition(String key, Double minimumDouble,
1618 1775 Double maximumDouble, String statsType)
... ... @@ -1666,12 +1823,18 @@ public class CodecComponent {
1666 1823 /**
1667 1824 * Instantiates a new component token.
1668 1825 *
1669   - * @param key the key
1670   - * @param minimumDouble the minimum double
1671   - * @param maximumDouble the maximum double
1672   - * @param statsType the stats type
1673   - * @throws IOException Signals that an I/O exception has occurred.
1674   - * @throws ParseException the parse exception
  1826 + * @param key
  1827 + * the key
  1828 + * @param minimumDouble
  1829 + * the minimum double
  1830 + * @param maximumDouble
  1831 + * the maximum double
  1832 + * @param statsType
  1833 + * the stats type
  1834 + * @throws IOException
  1835 + * Signals that an I/O exception has occurred.
  1836 + * @throws ParseException
  1837 + * the parse exception
1675 1838 */
1676 1839 public ComponentToken(String key, Double minimumDouble,
1677 1840 Double maximumDouble, String statsType)
... ... @@ -1746,8 +1909,10 @@ public class CodecComponent {
1746 1909 /**
1747 1910 * Instantiates a new component collection.
1748 1911 *
1749   - * @param key the key
1750   - * @param action the action
  1912 + * @param key
  1913 + * the key
  1914 + * @param action
  1915 + * the action
1751 1916 */
1752 1917 public ComponentCollection(String key, String action) {
1753 1918 this.key = key;
... ... @@ -1759,7 +1924,8 @@ public class CodecComponent {
1759 1924 /**
1760 1925 * Sets the list variables.
1761 1926 *
1762   - * @throws IOException Signals that an I/O exception has occurred.
  1927 + * @throws IOException
  1928 + * Signals that an I/O exception has occurred.
1763 1929 */
1764 1930 public void setListVariables() throws IOException {
1765 1931 if (action.equals(ACTION_LIST)) {
... ... @@ -1772,9 +1938,12 @@ public class CodecComponent {
1772 1938 /**
1773 1939 * Sets the create variables.
1774 1940 *
1775   - * @param id the id
1776   - * @param fields the fields
1777   - * @throws IOException Signals that an I/O exception has occurred.
  1941 + * @param id
  1942 + * the id
  1943 + * @param fields
  1944 + * the fields
  1945 + * @throws IOException
  1946 + * Signals that an I/O exception has occurred.
1778 1947 */
1779 1948 public void setCreateVariables(String id, Set<String> fields)
1780 1949 throws IOException {
... ... @@ -1789,8 +1958,10 @@ public class CodecComponent {
1789 1958 /**
1790 1959 * Sets the check variables.
1791 1960 *
1792   - * @param id the new check variables
1793   - * @throws IOException Signals that an I/O exception has occurred.
  1961 + * @param id
  1962 + * the new check variables
  1963 + * @throws IOException
  1964 + * Signals that an I/O exception has occurred.
1794 1965 */
1795 1966 public void setCheckVariables(String id) throws IOException {
1796 1967 if (action.equals(ACTION_CHECK)) {
... ... @@ -1803,8 +1974,10 @@ public class CodecComponent {
1803 1974 /**
1804 1975 * Sets the gets the variables.
1805 1976 *
1806   - * @param id the new gets the variables
1807   - * @throws IOException Signals that an I/O exception has occurred.
  1977 + * @param id
  1978 + * the new gets the variables
  1979 + * @throws IOException
  1980 + * Signals that an I/O exception has occurred.
1808 1981 */
1809 1982 public void setGetVariables(String id) throws IOException {
1810 1983 if (action.equals(ACTION_GET)) {
... ... @@ -1817,9 +1990,12 @@ public class CodecComponent {
1817 1990 /**
1818 1991 * Sets the post variables.
1819 1992 *
1820   - * @param id the id
1821   - * @param values the values
1822   - * @throws IOException Signals that an I/O exception has occurred.
  1993 + * @param id
  1994 + * the id
  1995 + * @param values
  1996 + * the values
  1997 + * @throws IOException
  1998 + * Signals that an I/O exception has occurred.
1823 1999 */
1824 2000 public void setPostVariables(String id, HashSet<String> values)
1825 2001 throws IOException {
... ... @@ -1834,10 +2010,14 @@ public class CodecComponent {
1834 2010 /**
1835 2011 * Sets the import variables.
1836 2012 *
1837   - * @param id the id
1838   - * @param url the url
1839   - * @param collection the collection
1840   - * @throws IOException Signals that an I/O exception has occurred.
  2013 + * @param id
  2014 + * the id
  2015 + * @param url
  2016 + * the url
  2017 + * @param collection
  2018 + * the collection
  2019 + * @throws IOException
  2020 + * Signals that an I/O exception has occurred.
1841 2021 */
1842 2022 public void setImportVariables(String id, String url, String collection)
1843 2023 throws IOException {
... ... @@ -1895,9 +2075,11 @@ public class CodecComponent {
1895 2075 /**
1896 2076 * Gets the import.
1897 2077 *
1898   - * @param collectionGetUrl the collection get url
  2078 + * @param collectionGetUrl
  2079 + * the collection get url
1899 2080 * @return the import
1900   - * @throws IOException Signals that an I/O exception has occurred.
  2081 + * @throws IOException
  2082 + * Signals that an I/O exception has occurred.
1901 2083 */
1902 2084 private Map<String, Object> getImport(String collectionGetUrl)
1903 2085 throws IOException {
... ... @@ -1929,8 +2111,10 @@ public class CodecComponent {
1929 2111 /**
1930 2112 * Sets the delete variables.
1931 2113 *
1932   - * @param id the new delete variables
1933   - * @throws IOException Signals that an I/O exception has occurred.
  2114 + * @param id
  2115 + * the new delete variables
  2116 + * @throws IOException
  2117 + * Signals that an I/O exception has occurred.
1934 2118 */
1935 2119 public void setDeleteVariables(String id) throws IOException {
1936 2120 if (action.equals(ACTION_DELETE)) {
... ... @@ -1970,8 +2154,10 @@ public class CodecComponent {
1970 2154 /**
1971 2155 * Adds the value.
1972 2156 *
1973   - * @param value the value
1974   - * @throws IOException Signals that an I/O exception has occurred.
  2157 + * @param value
  2158 + * the value
  2159 + * @throws IOException
  2160 + * Signals that an I/O exception has occurred.
1975 2161 */
1976 2162 public void addValue(String value) throws IOException {
1977 2163 if (action.equals(ACTION_CREATE)) {
... ... @@ -1988,8 +2174,10 @@ public class CodecComponent {
1988 2174 /**
1989 2175 * Gets the params from JSON.
1990 2176 *
1991   - * @param params the params
1992   - * @param json the json
  2177 + * @param params
  2178 + * the params
  2179 + * @param json
  2180 + * the json
1993 2181 * @return the params from JSON
1994 2182 */
1995 2183 private static void getParamsFromJSON(Map<String, Object> params,
... ... @@ -2031,6 +2219,84 @@ public class CodecComponent {
2031 2219 }
2032 2220  
2033 2221 /**
  2222 + * The Class SubComponentDistance.
  2223 + */
  2224 + public static class SubComponentDistance implements Serializable {
  2225 +
  2226 + /** The key. */
  2227 + public String key;
  2228 +
  2229 + /** The type. */
  2230 + public String type;
  2231 +
  2232 + /** The base. */
  2233 + public String base;
  2234 +
  2235 + /** The prefix. */
  2236 + public String prefix;
  2237 +
  2238 + /** The maximum. */
  2239 + public Double maximum;
  2240 +
  2241 + Map<String,String> parameters;
  2242 +
  2243 + public transient Distance distance = null;
  2244 +
  2245 + /** The Constant NAME_LEVENSHTEIN. */
  2246 + private static final String NAME_LEVENSHTEIN = "levenshtein";
  2247 + private static final String NAME_DAMERAULEVENSHTEIN = "damerau-levenshtein";
  2248 +
  2249 + /**
  2250 + * Instantiates a new sub component distance.
  2251 + *
  2252 + * @param key
  2253 + * the key
  2254 + * @param type
  2255 + * the type
  2256 + * @param prefix
  2257 + * the prefix
  2258 + * @param base
  2259 + * the base
  2260 + * @param parameter
  2261 + * the parameter
  2262 + * @param minimum
  2263 + * the minimum
  2264 + * @param maximum
  2265 + * the maximum
  2266 + * @throws IOException
  2267 + * Signals that an I/O exception has occurred.
  2268 + */
  2269 + public SubComponentDistance(String key, String type, String prefix,
  2270 + String base, Map<String,String> parameters, String maximum) {
  2271 + this.key = key;
  2272 + this.prefix = prefix;
  2273 + this.type = type;
  2274 + this.base = base;
  2275 + this.parameters = parameters;
  2276 + this.maximum = maximum != null ? Double.parseDouble(maximum) : null;
  2277 + }
  2278 +
  2279 + /**
  2280 + * Gets the distance.
  2281 + *
  2282 + * @return the distance
  2283 + * @throws IOException Signals that an I/O exception has occurred.
  2284 + */
  2285 + public Distance getDistance() throws IOException {
  2286 + if (distance == null) {
  2287 + if (type != null && type.equals(NAME_LEVENSHTEIN)) {
  2288 + distance = new LevenshteinDistance(prefix, base, maximum, parameters);
  2289 + } else if (type != null && type.equals(NAME_DAMERAULEVENSHTEIN)) {
  2290 + distance = new DamerauLevenshteinDistance(prefix, base, maximum, parameters);
  2291 + } else {
  2292 + throw new IOException("unrecognized distance " + type);
  2293 + }
  2294 + }
  2295 + return distance;
  2296 + }
  2297 + }
  2298 +
  2299 + /**
2034 2300 * The Class SubComponentFunction.
2035 2301 */
2036 2302 public static class SubComponentFunction {
... ... @@ -2068,18 +2334,30 @@ public class CodecComponent {
2068 2334 /**
2069 2335 * Instantiates a new sub component function.
2070 2336 *
2071   - * @param collectorType the collector type
2072   - * @param key the key
2073   - * @param type the type
2074   - * @param parserFunction the parser function
2075   - * @param sortType the sort type
2076   - * @param sortDirection the sort direction
2077   - * @param start the start
2078   - * @param number the number
2079   - * @param segmentRegistration the segment registration
2080   - * @param boundary the boundary
2081   - * @throws ParseException the parse exception
2082   - * @throws IOException Signals that an I/O exception has occurred.
  2337 + * @param collectorType
  2338 + * the collector type
  2339 + * @param key
  2340 + * the key
  2341 + * @param type
  2342 + * the type
  2343 + * @param parserFunction
  2344 + * the parser function
  2345 + * @param sortType
  2346 + * the sort type
  2347 + * @param sortDirection
  2348 + * the sort direction
  2349 + * @param start
  2350 + * the start
  2351 + * @param number
  2352 + * the number
  2353 + * @param segmentRegistration
  2354 + * the segment registration
  2355 + * @param boundary
  2356 + * the boundary
  2357 + * @throws ParseException
  2358 + * the parse exception
  2359 + * @throws IOException
  2360 + * Signals that an I/O exception has occurred.
2083 2361 */
2084 2362 public SubComponentFunction(String collectorType, String key, String type,
2085 2363 MtasFunctionParserFunction parserFunction, String sortType,
... ... @@ -2112,12 +2390,18 @@ public class CodecComponent {
2112 2390 /**
2113 2391 * Instantiates a new sub component function.
2114 2392 *
2115   - * @param collectorType the collector type
2116   - * @param key the key
2117   - * @param expression the expression
2118   - * @param type the type
2119   - * @throws ParseException the parse exception
2120   - * @throws IOException Signals that an I/O exception has occurred.
  2393 + * @param collectorType
  2394 + * the collector type
  2395 + * @param key
  2396 + * the key
  2397 + * @param expression
  2398 + * the expression
  2399 + * @param type
  2400 + * the type
  2401 + * @throws ParseException
  2402 + * the parse exception
  2403 + * @throws IOException
  2404 + * Signals that an I/O exception has occurred.
2121 2405 */
2122 2406 public SubComponentFunction(String collectorType, String key,
2123 2407 String expression, String type) throws ParseException, IOException {
... ... @@ -2160,8 +2444,10 @@ public class CodecComponent {
2160 2444 /**
2161 2445 * Instantiates a new kwic token.
2162 2446 *
2163   - * @param match the match
2164   - * @param tokens the tokens
  2447 + * @param match
  2448 + * the match
  2449 + * @param tokens
  2450 + * the tokens
2165 2451 */
2166 2452 public KwicToken(Match match, List<MtasTokenString> tokens) {
2167 2453 startPosition = match.startPosition;
... ... @@ -2188,8 +2474,10 @@ public class CodecComponent {
2188 2474 /**
2189 2475 * Instantiates a new kwic hit.
2190 2476 *
2191   - * @param match the match
2192   - * @param hits the hits
  2477 + * @param match
  2478 + * the match
  2479 + * @param hits
  2480 + * the hits
2193 2481 */
2194 2482 public KwicHit(Match match, Map<Integer, List<String>> hits) {
2195 2483 startPosition = match.startPosition;
... ... @@ -2261,7 +2549,8 @@ public class CodecComponent {
2261 2549 /**
2262 2550 * Sort.
2263 2551 *
2264   - * @param data the data
  2552 + * @param data
  2553 + * the data
2265 2554 * @return the list
2266 2555 */
2267 2556 private List<MtasTreeHit<String>> sort(List<MtasTreeHit<String>> data) {
... ... @@ -2280,14 +2569,22 @@ public class CodecComponent {
2280 2569 /**
2281 2570 * Instantiates a new group hit.
2282 2571 *
2283   - * @param list the list
2284   - * @param start the start
2285   - * @param end the end
2286   - * @param hitStart the hit start
2287   - * @param hitEnd the hit end
2288   - * @param group the group
2289   - * @param knownPrefixes the known prefixes
2290   - * @throws UnsupportedEncodingException the unsupported encoding exception
  2572 + * @param list
  2573 + * the list
  2574 + * @param start
  2575 + * the start
  2576 + * @param end
  2577 + * the end
  2578 + * @param hitStart
  2579 + * the hit start
  2580 + * @param hitEnd
  2581 + * the hit end
  2582 + * @param group
  2583 + * the group
  2584 + * @param knownPrefixes
  2585 + * the known prefixes
  2586 + * @throws UnsupportedEncodingException
  2587 + * the unsupported encoding exception
2291 2588 */
2292 2589 @SuppressWarnings("unchecked")
2293 2590 public GroupHit(List<MtasTreeHit<String>> list, int start, int end,
... ... @@ -2596,8 +2893,10 @@ public class CodecComponent {
2596 2893 /**
2597 2894 * Data equals.
2598 2895 *
2599   - * @param d1 the d 1
2600   - * @param d2 the d 2
  2896 + * @param d1
  2897 + * the d 1
  2898 + * @param d2
  2899 + * the d 2
2601 2900 * @return true, if successful
2602 2901 */
2603 2902 private boolean dataEquals(List<String>[] d1, List<String>[] d2) {
... ... @@ -2636,7 +2935,7 @@ public class CodecComponent {
2636 2935 */
2637 2936 @Override
2638 2937 public boolean equals(Object obj) {
2639   - if (this == obj)
  2938 + if (this == obj)
2640 2939 return true;
2641 2940 if (obj == null)
2642 2941 return false;
... ... @@ -2657,13 +2956,18 @@ public class CodecComponent {
2657 2956 /**
2658 2957 * Data to string.
2659 2958 *
2660   - * @param data the data
2661   - * @param missing the missing
  2959 + * @param data
  2960 + * the data
  2961 + * @param missing
  2962 + * the missing
  2963 + * @param reverse
  2964 + * the reverse
2662 2965 * @return the string
2663   - * @throws UnsupportedEncodingException the unsupported encoding exception
  2966 + * @throws UnsupportedEncodingException
  2967 + * the unsupported encoding exception
2664 2968 */
2665   - private String dataToString(List<String>[] data, Set<String>[] missing, boolean reverse)
2666   - throws UnsupportedEncodingException {
  2969 + private String dataToString(List<String>[] data, Set<String>[] missing,
  2970 + boolean reverse) throws UnsupportedEncodingException {
2667 2971 StringBuilder text = null;
2668 2972 Encoder encoder = Base64.getEncoder();
2669 2973 String prefix;
... ... @@ -2672,9 +2976,9 @@ public class CodecComponent {
2672 2976 Set<String> missingItem;
2673 2977 if (data != null && missing != null && data.length == missing.length) {
2674 2978 for (int i = 0; i < data.length; i++) {
2675   - if(reverse) {
2676   - dataItem = data[(data.length-i-1)];
2677   - missingItem = missing[(data.length-i-1)];
  2979 + if (reverse) {
  2980 + dataItem = data[(data.length - i - 1)];
  2981 + missingItem = missing[(data.length - i - 1)];
2678 2982 } else {
2679 2983 dataItem = data[i];
2680 2984 missingItem = missing[i];
... ... @@ -2726,8 +3030,10 @@ public class CodecComponent {
2726 3030 /**
2727 3031 * Key to sub sub object.
2728 3032 *
2729   - * @param key the key
2730   - * @param newKey the new key
  3033 + * @param key
  3034 + * the key
  3035 + * @param newKey
  3036 + * the new key
2731 3037 * @return the map[]
2732 3038 */
2733 3039 private static Map<String, String>[] keyToSubSubObject(String key,
... ... @@ -2794,8 +3100,10 @@ public class CodecComponent {
2794 3100 /**
2795 3101 * Key to sub object.
2796 3102 *
2797   - * @param key the key
2798   - * @param newKey the new key
  3103 + * @param key
  3104 + * the key
  3105 + * @param newKey
  3106 + * the new key
2799 3107 * @return the map
2800 3108 */
2801 3109 private static Map<Integer, Map<String, String>[]> keyToSubObject(
... ... @@ -2819,8 +3127,10 @@ public class CodecComponent {
2819 3127 /**
2820 3128 * Key to object.
2821 3129 *
2822   - * @param key the key
2823   - * @param newKey the new key
  3130 + * @param key
  3131 + * the key
  3132 + * @param newKey
  3133 + * the new key
2824 3134 * @return the map
2825 3135 */
2826 3136 public static Map<String, Map<Integer, Map<String, String>[]>> keyToObject(
... ... @@ -2885,10 +3195,14 @@ public class CodecComponent {
2885 3195 /**
2886 3196 * Instantiates a new list token.
2887 3197 *
2888   - * @param docId the doc id
2889   - * @param docPosition the doc position
2890   - * @param match the match
2891   - * @param tokens the tokens
  3198 + * @param docId
  3199 + * the doc id
  3200 + * @param docPosition
  3201 + * the doc position
  3202 + * @param match
  3203 + * the match
  3204 + * @param tokens
  3205 + * the tokens
2892 3206 */
2893 3207 public ListToken(Integer docId, Integer docPosition, Match match,
2894 3208 List<MtasTokenString> tokens) {
... ... @@ -2923,10 +3237,14 @@ public class CodecComponent {
2923 3237 /**
2924 3238 * Instantiates a new list hit.
2925 3239 *
2926   - * @param docId the doc id
2927   - * @param docPosition the doc position
2928   - * @param match the match
2929   - * @param hits the hits
  3240 + * @param docId
  3241 + * the doc id
  3242 + * @param docPosition
  3243 + * the doc position
  3244 + * @param match
  3245 + * the match
  3246 + * @param hits
  3247 + * the hits
2930 3248 */
2931 3249 public ListHit(Integer docId, Integer docPosition, Match match,
2932 3250 Map<Integer, List<String>> hits) {
... ... @@ -2952,8 +3270,10 @@ public class CodecComponent {
2952 3270 /**
2953 3271 * Instantiates a new match.
2954 3272 *
2955   - * @param startPosition the start position
2956   - * @param endPosition the end position
  3273 + * @param startPosition
  3274 + * the start position
  3275 + * @param endPosition
  3276 + * the end position
2957 3277 */
2958 3278 public Match(int startPosition, int endPosition) {
2959 3279 this.startPosition = startPosition;
... ...
src/main/java/mtas/codec/util/distance/DamerauLevenshteinDistance.java 0 โ†’ 100644
  1 +package mtas.codec.util.distance;
  2 +
  3 +import java.io.IOException;
  4 +import java.util.Arrays;
  5 +import java.util.Map;
  6 +import java.util.Map.Entry;
  7 +
  8 +import org.apache.lucene.util.BytesRef;
  9 +
  10 +import mtas.analysis.token.MtasToken;
  11 +
  12 +/**
  13 + * The Class DamerauLevenshteinDistance.
  14 + */
  15 +public class DamerauLevenshteinDistance extends LevenshteinDistance {
  16 +
  17 + /** The Constant defaultTranspositionDistance. */
  18 + protected final static double defaultTranspositionDistance = 1.0;
  19 +
  20 + /** The transposition distance. */
  21 + protected double transpositionDistance;
  22 +
  23 + /** The Constant PARAMETER_TRANSPOSITIONDISTANCE. */
  24 + protected final static String PARAMETER_TRANSPOSITIONDISTANCE = "transpositionDistance";
  25 +
  26 + /**
  27 + * Instantiates a new damerau levenshtein distance.
  28 + *
  29 + * @param prefix the prefix
  30 + * @param base the base
  31 + * @param maximum the maximum
  32 + * @param parameters the parameters
  33 + * @throws IOException Signals that an I/O exception has occurred.
  34 + */
  35 + public DamerauLevenshteinDistance(String prefix, String base, Double maximum,
  36 + Map<String, String> parameters) throws IOException {
  37 + super(prefix, base, maximum, parameters);
  38 + transpositionDistance = defaultTranspositionDistance;
  39 + if (parameters != null) {
  40 + for (Entry<String, String> entry : parameters.entrySet()) {
  41 + if (entry.getKey().equals(PARAMETER_TRANSPOSITIONDISTANCE)) {
  42 + transpositionDistance = Double.parseDouble(entry.getValue());
  43 + }
  44 + }
  45 + }
  46 + if (transpositionDistance < 0) {
  47 + throw new IOException("distances should be zero or positive");
  48 + }
  49 + }
  50 +
  51 + /*
  52 + * (non-Javadoc)
  53 + *
  54 + * @see
  55 + * mtas.codec.util.distance.LevenshteinDistance#validate(org.apache.lucene.
  56 + * util.BytesRef)
  57 + */
  58 + @Override
  59 + public boolean validate(BytesRef term) {
  60 + if (maximum == null) {
  61 + return true;
  62 + } else {
  63 + double[][] state = _start();
  64 + char ch1;
  65 + char ch2 = 0x00;
  66 + int i = term.offset + MtasToken.DELIMITER.length() + prefix.length();
  67 + for (; i < term.length; i++) {
  68 + ch1 = (char) term.bytes[i];
  69 + if (ch1 == 0x00) {
  70 + break;
  71 + }
  72 + state = _step(state, ch1, ch2);
  73 + if (!_can_match(state)) {
  74 + return false;
  75 + }
  76 + ch2 = ch1;
  77 + }
  78 + return _is_match(state);
  79 + }
  80 + }
  81 +
  82 + /*
  83 + * (non-Javadoc)
  84 + *
  85 + * @see mtas.codec.util.distance.LevenshteinDistance#compute(java.lang.String)
  86 + */
  87 + @Override
  88 + public double compute(String key) {
  89 + double[][] state = _start();
  90 + char ch2 = 0x00;
  91 + for (char ch1 : key.toCharArray()) {
  92 + if (ch1 == 0x00) {
  93 + break;
  94 + }
  95 + state = _step(state, ch1, ch2);
  96 + ch2 = ch1;
  97 + }
  98 + return _distance(state);
  99 + }
  100 +
  101 + /**
  102 + * Start.
  103 + *
  104 + * @return the double[][]
  105 + */
  106 + private double[][] _start() {
  107 + double[][] startState = new double[3][];
  108 + startState[0] = new double[initialState.length];
  109 + startState[1] = new double[initialState.length];
  110 + startState[2] = Arrays.copyOf(initialState, initialState.length);
  111 + return startState;
  112 + }
  113 +
  114 + /**
  115 + * Step.
  116 + *
  117 + * @param state the state
  118 + * @param ch1 the ch 1
  119 + * @param ch2 the ch 2
  120 + * @return the double[][]
  121 + */
  122 + private double[][] _step(double[][] state, char ch1, char ch2) {
  123 + double cost;
  124 + _shift(state);
  125 + state[2][0] = state[1][0] + deletionDistance;
  126 + for (int i = 0; i < base.length(); i++) {
  127 + cost = (base.charAt(i) == ch1) ? 0 : replaceDistance;
  128 + state[2][i + 1] = Math.min(state[2][i] + insertionDistance,
  129 + state[1][i] + cost);
  130 + state[2][i + 1] = Math.min(state[2][i + 1],
  131 + state[1][i + 1] + deletionDistance);
  132 + if (i > 0 && ch2 != 0x00 && (base.charAt(i - 1) == ch1)
  133 + && (base.charAt(i) == ch2)) {
  134 + state[2][i + 1] = Math.min(state[2][i + 1],
  135 + state[0][i - 1] + transpositionDistance);
  136 + }
  137 + }
  138 + return state;
  139 + }
  140 +
  141 + /**
  142 + * Shift.
  143 + *
  144 + * @param state the state
  145 + */
  146 + private void _shift(double[][] state) {
  147 + double[] tmpState = state[0];
  148 + state[0] = state[1];
  149 + state[1] = state[2];
  150 + state[2] = tmpState;
  151 + }
  152 +
  153 + /**
  154 + * Checks if is match.
  155 + *
  156 + * @param state the state
  157 + * @return true, if successful
  158 + */
  159 + private boolean _is_match(double[][] state) {
  160 + return state[2][state[2].length - 1] <= maximum;
  161 + }
  162 +
  163 + /**
  164 + * Can match.
  165 + *
  166 + * @param state the state
  167 + * @return true, if successful
  168 + */
  169 + private boolean _can_match(double[][] state) {
  170 + for (double d : state[2]) {
  171 + if (d <= maximum) {
  172 + return true;
  173 + }
  174 + }
  175 + return false;
  176 + }
  177 +
  178 + /**
  179 + * Distance.
  180 + *
  181 + * @param state the state
  182 + * @return the double
  183 + */
  184 + private double _distance(double[][] state) {
  185 + return state[2][state[2].length - 1];
  186 + }
  187 +
  188 +}
... ...
src/main/java/mtas/codec/util/distance/Distance.java 0 โ†’ 100644
  1 +package mtas.codec.util.distance;
  2 +
  3 +import java.io.IOException;
  4 +import java.util.Map;
  5 +
  6 +import org.apache.lucene.util.BytesRef;
  7 +
  8 +public abstract class Distance {
  9 +
  10 + protected final String prefix;
  11 + protected final String base;
  12 + protected final Double maximum;
  13 + protected final Map<String,String> parameters;
  14 +
  15 + public static final String NAME = "distance";
  16 +
  17 + public Distance(String prefix, String base, Double maximum, Map<String,String> parameters) throws IOException {
  18 + this.prefix = prefix;
  19 + this.base = base;
  20 + this.maximum = maximum;
  21 + this.parameters = parameters;
  22 + }
  23 +
  24 + public abstract double compute(String key);
  25 +
  26 + public abstract boolean validate(BytesRef term);
  27 +
  28 +}
... ...
src/main/java/mtas/codec/util/distance/LevenshteinDistance.java 0 โ†’ 100644
  1 +package mtas.codec.util.distance;
  2 +
  3 +import java.io.IOException;
  4 +import java.util.Arrays;
  5 +import java.util.Map;
  6 +import java.util.Map.Entry;
  7 +import org.apache.lucene.util.BytesRef;
  8 +
  9 +import mtas.analysis.token.MtasToken;
  10 +
  11 +/**
  12 + * The Class LevenshteinDistance.
  13 + */
  14 +public class LevenshteinDistance extends Distance {
  15 +
  16 + /** The initial state. */
  17 + protected final double[] initialState;
  18 +
  19 + /** The Constant defaultDeletionDistance. */
  20 + protected final static double defaultDeletionDistance = 1.0;
  21 +
  22 + /** The Constant defaultInsertionDistance. */
  23 + protected final static double defaultInsertionDistance = 1.0;
  24 +
  25 + /** The Constant defaultReplaceDistance. */
  26 + protected final static double defaultReplaceDistance = 1.0;
  27 +
  28 + /** The deletion distance. */
  29 + protected double deletionDistance;
  30 +
  31 + /** The insertion distance. */
  32 + protected double insertionDistance;
  33 +
  34 + /** The replace distance. */
  35 + protected double replaceDistance;
  36 +
  37 + /** The Constant PARAMETER_DELETIONDISTANCE. */
  38 + protected final static String PARAMETER_DELETIONDISTANCE = "deletionDistance";
  39 +
  40 + /** The Constant PARAMETER_INSERTIONDISTANCE. */
  41 + protected final static String PARAMETER_INSERTIONDISTANCE = "insertionDistance";
  42 +
  43 + /** The Constant PARAMETER_REPLACEDISTANCE. */
  44 + protected final static String PARAMETER_REPLACEDISTANCE = "replaceDistance";
  45 +
  46 + /**
  47 + * Instantiates a new levenshtein distance.
  48 + *
  49 + * @param prefix the prefix
  50 + * @param base the base
  51 + * @param maximum the maximum
  52 + * @param parameters the parameters
  53 + * @throws IOException Signals that an I/O exception has occurred.
  54 + */
  55 + public LevenshteinDistance(String prefix, String base, Double maximum,
  56 + Map<String, String> parameters) throws IOException {
  57 + super(prefix, base, maximum, parameters);
  58 + deletionDistance = defaultDeletionDistance;
  59 + insertionDistance = defaultInsertionDistance;
  60 + replaceDistance = defaultReplaceDistance;
  61 + if (parameters != null) {
  62 + for (Entry<String, String> entry : parameters.entrySet()) {
  63 + if (entry.getKey().equals(PARAMETER_DELETIONDISTANCE)) {
  64 + deletionDistance = Double.parseDouble(entry.getValue());
  65 + } else if (entry.getKey().equals(PARAMETER_INSERTIONDISTANCE)) {
  66 + insertionDistance = Double.parseDouble(entry.getValue());
  67 + } else if (entry.getKey().equals(PARAMETER_REPLACEDISTANCE)) {
  68 + replaceDistance = Double.parseDouble(entry.getValue());
  69 + }
  70 + }
  71 + }
  72 + if (deletionDistance < 0 || insertionDistance < 0 || replaceDistance < 0) {
  73 + throw new IOException("distances should be zero or positive");
  74 + }
  75 + initialState = new double[base.length() + 1];
  76 + for (int i = 0; i <= base.length(); i++) {
  77 + initialState[i] = i * insertionDistance;
  78 + }
  79 + }
  80 +
  81 + /* (non-Javadoc)
  82 + * @see mtas.codec.util.distance.Distance#validate(org.apache.lucene.util.BytesRef)
  83 + */
  84 + public boolean validate(BytesRef term) {
  85 + if (maximum == null) {
  86 + return true;
  87 + } else {
  88 + double[][] state = _start();
  89 + char ch1;
  90 + int i = term.offset + MtasToken.DELIMITER.length() + prefix.length();
  91 + for (; i < term.length; i++) {
  92 + ch1 = (char) term.bytes[i];
  93 + if (ch1 == 0x00) {
  94 + break;
  95 + }
  96 + state = _step(state, ch1);
  97 + if (!_can_match(state)) {
  98 + return false;
  99 + }
  100 + }
  101 + return _is_match(state);
  102 + }
  103 + }
  104 +
  105 + /* (non-Javadoc)
  106 + * @see mtas.codec.util.distance.Distance#compute(java.lang.String)
  107 + */
  108 + @Override
  109 + public double compute(String key) {
  110 + double[][] state = _start();
  111 + for (char ch1 : key.toCharArray()) {
  112 + if (ch1 == 0x00) {
  113 + break;
  114 + }
  115 + state = _step(state, ch1);
  116 + }
  117 + return _distance(state);
  118 + }
  119 +
  120 + /**
  121 + * Start.
  122 + *
  123 + * @return the double[][]
  124 + */
  125 + private double[][] _start() {
  126 + double[][] startState = new double[2][];
  127 + startState[0] = new double[initialState.length];
  128 + startState[1] = Arrays.copyOf(initialState, initialState.length);
  129 + return startState;
  130 + }
  131 +
  132 + /**
  133 + * Step.
  134 + *
  135 + * @param state the state
  136 + * @param ch1 the ch 1
  137 + * @return the double[][]
  138 + */
  139 + private double[][] _step(double[][] state, char ch1) {
  140 + double cost;
  141 + _shift(state);
  142 + state[1][0] = state[0][0] + deletionDistance;
  143 + for (int i = 0; i < base.length(); i++) {
  144 + cost = (base.charAt(i) == ch1) ? 0 : replaceDistance;
  145 + state[1][i + 1] = Math.min(state[1][i] + insertionDistance,
  146 + state[0][i] + cost);
  147 + state[1][i + 1] = Math.min(state[1][i + 1],
  148 + state[0][i + 1] + deletionDistance);
  149 + }
  150 + return state;
  151 + }
  152 +
  153 + /**
  154 + * Shift.
  155 + *
  156 + * @param state the state
  157 + */
  158 + private void _shift(double[][] state) {
  159 + double[] tmpState = state[0];
  160 + state[0] = state[1];
  161 + state[1] = tmpState;
  162 + }
  163 +
  164 + /**
  165 + * Checks if is match.
  166 + *
  167 + * @param state the state
  168 + * @return true, if successful
  169 + */
  170 + private boolean _is_match(double[][] state) {
  171 + return state[1][state[1].length - 1] <= maximum;
  172 + }
  173 +
  174 + /**
  175 + * Can match.
  176 + *
  177 + * @param state the state
  178 + * @return true, if successful
  179 + */
  180 + private boolean _can_match(double[][] state) {
  181 + for (double d : state[1]) {
  182 + if (d <= maximum) {
  183 + return true;
  184 + }
  185 + }
  186 + return false;
  187 + }
  188 +
  189 + /**
  190 + * Distance.
  191 + *
  192 + * @param state the state
  193 + * @return the double
  194 + */
  195 + private double _distance(double[][] state) {
  196 + return state[1][state[1].length - 1];
  197 + }
  198 +
  199 +}
... ...
src/main/java/mtas/solr/handler/component/MtasSolrSearchComponent.java
... ... @@ -10,7 +10,6 @@ import mtas.codec.util.CodecComponent.ComponentDocument;
10 10 import mtas.codec.util.CodecComponent.ComponentFacet;
11 11 import mtas.codec.util.CodecComponent.ComponentFields;
12 12 import mtas.codec.util.CodecComponent.ComponentGroup;
13   -import mtas.codec.MtasCodecPostingsFormat;
14 13 import mtas.codec.util.CodecComponent.ComponentCollection;
15 14 import mtas.codec.util.CodecComponent.ComponentKwic;
16 15 import mtas.codec.util.CodecComponent.ComponentList;
... ...
src/main/java/mtas/solr/handler/component/util/MtasSolrComponentDocument.java
... ... @@ -2,6 +2,7 @@ package mtas.solr.handler.component.util;
2 2  
3 3 import java.io.IOException;
4 4 import java.util.ArrayList;
  5 +import java.util.List;
5 6 import java.util.Set;
6 7 import java.util.SortedSet;
7 8  
... ... @@ -240,7 +241,7 @@ public class MtasSolrComponentDocument
240 241 }
241 242 mtasDocumentItemResponse.add("stats",
242 243 new MtasSolrMtasResult(stats, stats.getDataType(),
243   - stats.getStatsType(), stats.getStatsItems(), null));
  244 + stats.getStatsType(), stats.getStatsItems(), null, null));
244 245 mtasDocumentItemResponse.add("documentKey",
245 246 document.uniqueKey.get(docId));
246 247 if (list != null) {
... ... @@ -249,12 +250,13 @@ public class MtasSolrComponentDocument
249 250 new String[] { list.getDataType(), list.getDataType() },
250 251 new String[] { list.getStatsType(), list.getStatsType() },
251 252 new SortedSet[] { list.getStatsItems(), list.getStatsItems() },
  253 + new List[] {null, null},
252 254 new String[] { null, null }, new String[] { null, null },
253 255 new Integer[] { 0, 0 }, new Integer[] { 1, 1 }, null));
254 256 } else {
255 257 mtasDocumentItemResponse.add("list",
256 258 new MtasSolrMtasResult(list, list.getDataType(),
257   - list.getStatsType(), list.getStatsItems(), null));
  259 + list.getStatsType(), list.getStatsItems(), null, null));
258 260 }
259 261  
260 262 }
... ...
src/main/java/mtas/solr/handler/component/util/MtasSolrComponentFacet.java
... ... @@ -559,7 +559,7 @@ public class MtasSolrComponentFacet
559 559 new MtasSolrMtasResult(tmpSubComponentFunction.dataCollector,
560 560 tmpSubComponentFunction.dataType,
561 561 tmpSubComponentFunction.statsType,
562   - tmpSubComponentFunction.statsItems, null));
  562 + tmpSubComponentFunction.statsItems, null, null));
563 563 }
564 564 functionData.put(functionDataCollector, tmpList);
565 565 }
... ... @@ -567,7 +567,7 @@ public class MtasSolrComponentFacet
567 567 }
568 568 }
569 569 MtasSolrMtasResult data = new MtasSolrMtasResult(facet.dataCollector,
570   - facet.baseDataTypes, facet.baseStatsTypes, facet.baseStatsItems,
  570 + facet.baseDataTypes, facet.baseStatsTypes, facet.baseStatsItems,null,
571 571 facet.baseSortTypes, facet.baseSortDirections, null, facet.baseNumbers,
572 572 functionData);
573 573  
... ...
src/main/java/mtas/solr/handler/component/util/MtasSolrComponentGroup.java
... ... @@ -3,6 +3,7 @@ package mtas.solr.handler.component.util;
3 3 import java.io.IOException;
4 4 import java.util.ArrayList;
5 5 import java.util.HashMap;
  6 +import java.util.List;
6 7 import java.util.Map.Entry;
7 8 import java.util.Set;
8 9 import java.util.SortedSet;
... ... @@ -491,7 +492,7 @@ public class MtasSolrComponentGroup
491 492 mtasGroupResponse.add("key", group.key);
492 493 MtasSolrMtasResult data = new MtasSolrMtasResult(group.dataCollector,
493 494 new String[] { group.dataType }, new String[] { group.statsType },
494   - new SortedSet[] { group.statsItems }, new String[] { group.sortType },
  495 + new SortedSet[] { group.statsItems }, new List[] {null}, new String[] { group.sortType },
495 496 new String[] { group.sortDirection }, new Integer[] { group.start },
496 497 new Integer[] { group.number }, null);
497 498 if (encode) {
... ...
src/main/java/mtas/solr/handler/component/util/MtasSolrComponentStats.java
... ... @@ -5,6 +5,7 @@ import java.util.ArrayList;
5 5 import java.util.HashMap;
6 6 import java.util.HashSet;
7 7 import java.util.Iterator;
  8 +import java.util.List;
8 9 import java.util.Map.Entry;
9 10 import java.util.Set;
10 11 import java.util.SortedSet;
... ... @@ -872,7 +873,7 @@ public class MtasSolrComponentStats
872 873 SimpleOrderedMap<Object> mtasPositionResponse = new SimpleOrderedMap<>();
873 874 mtasPositionResponse.add("key", position.key);
874 875 MtasSolrMtasResult data = new MtasSolrMtasResult(position.dataCollector,
875   - position.dataType, position.statsType, position.statsItems, null);
  876 + position.dataType, position.statsType, position.statsItems, null, null);
876 877 if (encode) {
877 878 mtasPositionResponse.add("_encoded_data",
878 879 MtasSolrResultUtil.encode(data));
... ... @@ -898,7 +899,7 @@ public class MtasSolrComponentStats
898 899 SimpleOrderedMap<Object> mtasTokenResponse = new SimpleOrderedMap<>();
899 900 mtasTokenResponse.add("key", token.key);
900 901 MtasSolrMtasResult data = new MtasSolrMtasResult(token.dataCollector,
901   - token.dataType, token.statsType, token.statsItems, null);
  902 + token.dataType, token.statsType, token.statsItems, null, null);
902 903 if (encode) {
903 904 mtasTokenResponse.add("_encoded_data", MtasSolrResultUtil.encode(data));
904 905 } else {
... ... @@ -933,13 +934,13 @@ public class MtasSolrComponentStats
933 934 new MtasSolrMtasResult(function.dataCollector,
934 935 new String[] { function.dataType },
935 936 new String[] { function.statsType },
936   - new SortedSet[] { function.statsItems }, new String[] { null },
  937 + new SortedSet[] { function.statsItems }, new List[] {null}, new String[] { null },
937 938 new String[] { null }, new Integer[] { 0 },
938 939 new Integer[] { Integer.MAX_VALUE }, null));
939 940 }
940 941 }
941 942 MtasSolrMtasResult data = new MtasSolrMtasResult(span.dataCollector,
942   - span.dataType, span.statsType, span.statsItems, functionData);
  943 + span.dataType, span.statsType, span.statsItems, null, functionData);
943 944 if (encode) {
944 945 mtasSpanResponse.add("_encoded_data", MtasSolrResultUtil.encode(data));
945 946 } else {
... ...
src/main/java/mtas/solr/handler/component/util/MtasSolrComponentTermvector.java
... ... @@ -64,6 +64,24 @@ public class MtasSolrComponentTermvector
64 64 /** The Constant NAME_MTAS_TERMVECTOR_PREFIX. */
65 65 public static final String NAME_MTAS_TERMVECTOR_PREFIX = "prefix";
66 66  
  67 + /** The Constant NAME_MTAS_TERMVECTOR_DISTANCE. */
  68 + public static final String NAME_MTAS_TERMVECTOR_DISTANCE = "distance";
  69 +
  70 + /** The Constant NAME_MTAS_TERMVECTOR_DISTANCE_KEY. */
  71 + public static final String NAME_MTAS_TERMVECTOR_DISTANCE_KEY = "key";
  72 +
  73 + /** The Constant NAME_MTAS_TERMVECTOR_DISTANCE_TYPE. */
  74 + public static final String NAME_MTAS_TERMVECTOR_DISTANCE_TYPE = "type";
  75 +
  76 + /** The Constant NAME_MTAS_TERMVECTOR_DISTANCE_BASE. */
  77 + public static final String NAME_MTAS_TERMVECTOR_DISTANCE_BASE = "base";
  78 +
  79 + /** The Constant NAME_MTAS_TERMVECTOR_DISTANCE_PARAMETERS. */
  80 + public static final String NAME_MTAS_TERMVECTOR_DISTANCE_PARAMETER = "parameter";
  81 +
  82 + /** The Constant NAME_MTAS_TERMVECTOR_DISTANCE_MAXIMUM. */
  83 + public static final String NAME_MTAS_TERMVECTOR_DISTANCE_MAXIMUM = "maximum";
  84 +
67 85 /** The Constant NAME_MTAS_TERMVECTOR_REGEXP. */
68 86 public static final String NAME_MTAS_TERMVECTOR_REGEXP = "regexp";
69 87  
... ... @@ -150,6 +168,12 @@ public class MtasSolrComponentTermvector
150 168 String[] fields = new String[ids.size()];
151 169 String[] keys = new String[ids.size()];
152 170 String[] prefixes = new String[ids.size()];
  171 + String[][] distanceKeys = new String[ids.size()][];
  172 + String[][] distanceTypes = new String[ids.size()][];
  173 + String[][] distanceBases = new String[ids.size()][];
  174 + Map<String,String>[][] distanceParameters = new Map[ids.size()][];
  175 + String[][] distanceMinimums = new String[ids.size()][];
  176 + String[][] distanceMaximums = new String[ids.size()][];
153 177 String[] regexps = new String[ids.size()];
154 178 String[] fulls = new String[ids.size()];
155 179 String[] sortTypes = new String[ids.size()];
... ... @@ -176,6 +200,47 @@ public class MtasSolrComponentTermvector
176 200 String.valueOf(tmpCounter)).trim();
177 201 prefixes[tmpCounter] = rb.req.getParams().get(PARAM_MTAS_TERMVECTOR
178 202 + "." + id + "." + NAME_MTAS_TERMVECTOR_PREFIX, null);
  203 + Set<String> distanceIds = MtasSolrResultUtil
  204 + .getIdsFromParameters(rb.req.getParams(), PARAM_MTAS_TERMVECTOR
  205 + + "." + id + "." + NAME_MTAS_TERMVECTOR_DISTANCE);
  206 + distanceKeys[tmpCounter] = new String[distanceIds.size()];
  207 + distanceTypes[tmpCounter] = new String[distanceIds.size()];
  208 + distanceBases[tmpCounter] = new String[distanceIds.size()];
  209 + distanceParameters[tmpCounter] = new Map[distanceIds.size()];
  210 + distanceMinimums[tmpCounter] = new String[distanceIds.size()];
  211 + distanceMaximums[tmpCounter] = new String[distanceIds.size()];
  212 + int tmpSubDistanceCounter = 0;
  213 + for (String distanceId : distanceIds) {
  214 + distanceKeys[tmpCounter][tmpSubDistanceCounter] = rb.req.getParams()
  215 + .get(
  216 + PARAM_MTAS_TERMVECTOR + "." + id + "."
  217 + + NAME_MTAS_TERMVECTOR_DISTANCE + "." + distanceId + "."
  218 + + NAME_MTAS_TERMVECTOR_DISTANCE_KEY,
  219 + String.valueOf(tmpSubDistanceCounter))
  220 + .trim();
  221 + distanceTypes[tmpCounter][tmpSubDistanceCounter] = rb.req.getParams()
  222 + .get(PARAM_MTAS_TERMVECTOR + "." + id + "."
  223 + + NAME_MTAS_TERMVECTOR_DISTANCE + "." + distanceId + "."
  224 + + NAME_MTAS_TERMVECTOR_DISTANCE_TYPE, null);
  225 + distanceBases[tmpCounter][tmpSubDistanceCounter] = rb.req.getParams()
  226 + .get(PARAM_MTAS_TERMVECTOR + "." + id + "."
  227 + + NAME_MTAS_TERMVECTOR_DISTANCE + "." + distanceId + "."
  228 + + NAME_MTAS_TERMVECTOR_DISTANCE_BASE, null);
  229 + distanceParameters[tmpCounter][tmpSubDistanceCounter] = new HashMap<String, String>();
  230 + Set<String> parameters = MtasSolrResultUtil
  231 + .getIdsFromParameters(rb.req.getParams(), PARAM_MTAS_TERMVECTOR
  232 + + "." + id + "." + NAME_MTAS_TERMVECTOR_DISTANCE+ "." + distanceId + "." +NAME_MTAS_TERMVECTOR_DISTANCE_PARAMETER);
  233 + for(String parameter : parameters) {
  234 + distanceParameters[tmpCounter][tmpSubDistanceCounter].put(parameter, rb.req.getParams().get(PARAM_MTAS_TERMVECTOR
  235 + + "." + id + "." + NAME_MTAS_TERMVECTOR_DISTANCE+ "." + distanceId + "." +NAME_MTAS_TERMVECTOR_DISTANCE_PARAMETER+"."+parameter));
  236 + }
  237 + distanceMaximums[tmpCounter][tmpSubDistanceCounter] = rb.req
  238 + .getParams()
  239 + .get(PARAM_MTAS_TERMVECTOR + "." + id + "."
  240 + + NAME_MTAS_TERMVECTOR_DISTANCE + "." + distanceId + "."
  241 + + NAME_MTAS_TERMVECTOR_DISTANCE_MAXIMUM, null);
  242 + tmpSubDistanceCounter++;
  243 + }
179 244 regexps[tmpCounter] = rb.req.getParams().get(PARAM_MTAS_TERMVECTOR + "."
180 245 + id + "." + NAME_MTAS_TERMVECTOR_REGEXP, null);
181 246 fulls[tmpCounter] = rb.req.getParams().get(
... ... @@ -202,24 +267,25 @@ public class MtasSolrComponentTermvector
202 267 functionExpressions[tmpCounter] = new String[functionIds.size()];
203 268 functionKeys[tmpCounter] = new String[functionIds.size()];
204 269 functionTypes[tmpCounter] = new String[functionIds.size()];
205   - int tmpSubCounter = 0;
  270 + int tmpSubFunctionCounter = 0;
206 271 for (String functionId : functionIds) {
207   - functionKeys[tmpCounter][tmpSubCounter] = rb.req.getParams()
  272 + functionKeys[tmpCounter][tmpSubFunctionCounter] = rb.req.getParams()
208 273 .get(
209 274 PARAM_MTAS_TERMVECTOR + "." + id + "."
210 275 + NAME_MTAS_TERMVECTOR_FUNCTION + "." + functionId + "."
211 276 + NAME_MTAS_TERMVECTOR_FUNCTION_KEY,
212   - String.valueOf(tmpSubCounter))
  277 + String.valueOf(tmpSubFunctionCounter))
213 278 .trim();
214   - functionExpressions[tmpCounter][tmpSubCounter] = rb.req.getParams()
  279 + functionExpressions[tmpCounter][tmpSubFunctionCounter] = rb.req
  280 + .getParams()
215 281 .get(PARAM_MTAS_TERMVECTOR + "." + id + "."
216 282 + NAME_MTAS_TERMVECTOR_FUNCTION + "." + functionId + "."
217 283 + NAME_MTAS_TERMVECTOR_FUNCTION_EXPRESSION, null);
218   - functionTypes[tmpCounter][tmpSubCounter] = rb.req.getParams()
  284 + functionTypes[tmpCounter][tmpSubFunctionCounter] = rb.req.getParams()
219 285 .get(PARAM_MTAS_TERMVECTOR + "." + id + "."
220 286 + NAME_MTAS_TERMVECTOR_FUNCTION + "." + functionId + "."
221 287 + NAME_MTAS_TERMVECTOR_FUNCTION_TYPE, null);
222   - tmpSubCounter++;
  288 + tmpSubFunctionCounter++;
223 289 }
224 290 boundaries[tmpCounter] = rb.req.getParams().get(PARAM_MTAS_TERMVECTOR
225 291 + "." + id + "." + NAME_MTAS_TERMVECTOR_BOUNDARY, null);
... ... @@ -282,6 +348,12 @@ public class MtasSolrComponentTermvector
282 348 : prefixes[i].trim();
283 349 String key = (keys[i] == null) || (keys[i].isEmpty())
284 350 ? String.valueOf(i) + ":" + field + ":" + prefix : keys[i].trim();
  351 + String[] distanceKey = distanceKeys[i];
  352 + String[] distanceType = distanceTypes[i];
  353 + String[] distanceBase = distanceBases[i];
  354 + Map<String, String>[] distanceParameter = distanceParameters[i];
  355 + String[] distanceMinimum = distanceMinimums[i];
  356 + String[] distanceMaximum = distanceMaximums[i];
285 357 String regexp = (regexps[i] == null) || (regexps[i].isEmpty()) ? null
286 358 : regexps[i].trim();
287 359 Boolean full = (fulls[i] == null) || (!fulls[i].equals("true")) ? false
... ... @@ -330,11 +402,12 @@ public class MtasSolrComponentTermvector
330 402 } else {
331 403 try {
332 404 mtasFields.list.get(field).termVectorList
333   - .add(new ComponentTermVector(key, prefix, regexp, full, type,
334   - sortType, sortDirection, startValue, numberFinal,
335   - functionKey, functionExpression, functionType, boundary,
336   - list, listRegexp, ignoreRegexp, ignoreList,
337   - ignoreListRegexp));
  405 + .add(new ComponentTermVector(key, prefix, distanceKey,
  406 + distanceType, distanceBase, distanceParameter,
  407 + distanceMaximum, regexp, full, type, sortType,
  408 + sortDirection, startValue, numberFinal, functionKey,
  409 + functionExpression, functionType, boundary, list,
  410 + listRegexp, ignoreRegexp, ignoreList, ignoreListRegexp));
338 411 } catch (ParseException e) {
339 412 throw new IOException(e);
340 413 }
... ... @@ -386,6 +459,26 @@ public class MtasSolrComponentTermvector
386 459 + NAME_MTAS_TERMVECTOR_KEY);
387 460 sreq.params.remove(PARAM_MTAS_TERMVECTOR + "." + key + "."
388 461 + NAME_MTAS_TERMVECTOR_PREFIX);
  462 + Set<String> distanceKeys = MtasSolrResultUtil
  463 + .getIdsFromParameters(rb.req.getParams(), PARAM_MTAS_TERMVECTOR
  464 + + "." + key + "." + NAME_MTAS_TERMVECTOR_DISTANCE);
  465 + for (String distanceKey : distanceKeys) {
  466 + sreq.params.remove(PARAM_MTAS_TERMVECTOR + "." + key + "."
  467 + + NAME_MTAS_TERMVECTOR_DISTANCE + "." + distanceKey + "."
  468 + + NAME_MTAS_TERMVECTOR_DISTANCE_KEY);
  469 + sreq.params.remove(PARAM_MTAS_TERMVECTOR + "." + key + "."
  470 + + NAME_MTAS_TERMVECTOR_DISTANCE + "." + distanceKey + "."
  471 + + NAME_MTAS_TERMVECTOR_DISTANCE_TYPE);
  472 + sreq.params.remove(PARAM_MTAS_TERMVECTOR + "." + key + "."
  473 + + NAME_MTAS_TERMVECTOR_DISTANCE + "." + distanceKey + "."
  474 + + NAME_MTAS_TERMVECTOR_DISTANCE_BASE);
  475 + sreq.params.remove(PARAM_MTAS_TERMVECTOR + "." + key + "."
  476 + + NAME_MTAS_TERMVECTOR_DISTANCE + "." + distanceKey + "."
  477 + + NAME_MTAS_TERMVECTOR_DISTANCE_PARAMETER);
  478 + sreq.params.remove(PARAM_MTAS_TERMVECTOR + "." + key + "."
  479 + + NAME_MTAS_TERMVECTOR_DISTANCE + "." + distanceKey + "."
  480 + + NAME_MTAS_TERMVECTOR_DISTANCE_MAXIMUM);
  481 + }
389 482 sreq.params.remove(PARAM_MTAS_TERMVECTOR + "." + key + "."
390 483 + NAME_MTAS_TERMVECTOR_REGEXP);
391 484 sreq.params.remove(PARAM_MTAS_TERMVECTOR + "." + key + "."
... ... @@ -454,13 +547,12 @@ public class MtasSolrComponentTermvector
454 547 function.dataCollector.reduceToKeys(
455 548 termVector.subComponentFunction.dataCollector.getKeyList());
456 549 function.dataCollector.close();
457   - functionDataItem.put(function.key,
458   - new MtasSolrMtasResult(function.dataCollector,
459   - new String[] { function.dataType },
460   - new String[] { function.statsType },
461   - new SortedSet[] { function.statsItems }, new String[] { null },
462   - new String[] { null }, new Integer[] { 0 },
463   - new Integer[] { Integer.MAX_VALUE }, null));
  550 + functionDataItem.put(function.key, new MtasSolrMtasResult(
  551 + function.dataCollector, new String[] { function.dataType },
  552 + new String[] { function.statsType },
  553 + new SortedSet[] { function.statsItems }, new List[] { null },
  554 + new String[] { null }, new String[] { null }, new Integer[] { 0 },
  555 + new Integer[] { Integer.MAX_VALUE }, null));
464 556 }
465 557 }
466 558 MtasSolrMtasResult data = new MtasSolrMtasResult(
... ... @@ -468,6 +560,7 @@ public class MtasSolrComponentTermvector
468 560 new String[] { termVector.subComponentFunction.dataType },
469 561 new String[] { termVector.subComponentFunction.statsType },
470 562 new SortedSet[] { termVector.subComponentFunction.statsItems },
  563 + new List[] { termVector.distances },
471 564 new String[] { termVector.subComponentFunction.sortType },
472 565 new String[] { termVector.subComponentFunction.sortDirection },
473 566 new Integer[] { 0 }, new Integer[] { termVector.number }, functionData);
... ...
src/main/java/mtas/solr/handler/component/util/MtasSolrMtasResult.java
... ... @@ -3,17 +3,22 @@ package mtas.solr.handler.component.util;
3 3 import java.io.IOException;
4 4 import java.io.Serializable;
5 5 import java.util.HashMap;
  6 +import java.util.List;
6 7 import java.util.Map;
7 8 import java.util.Map.Entry;
8 9 import java.util.Set;
9 10 import java.util.SortedSet;
10 11 import java.util.TreeSet;
  12 +
11 13 import org.apache.solr.common.util.NamedList;
12 14 import org.apache.solr.common.util.SimpleOrderedMap;
  15 +
  16 +import mtas.codec.util.CodecComponent.SubComponentDistance;
13 17 import mtas.codec.util.DataCollector;
14 18 import mtas.codec.util.collector.MtasDataCollector;
15 19 import mtas.codec.util.collector.MtasDataCollectorResult;
16 20 import mtas.codec.util.collector.MtasDataItem;
  21 +import mtas.codec.util.distance.Distance;
17 22  
18 23 /**
19 24 * The Class MtasSolrMtasResult.
... ... @@ -28,9 +33,8 @@ public class MtasSolrMtasResult implements Serializable {
28 33  
29 34 /** The stats type. */
30 35 public String statsType;
31   -
32   - /** The stats items. */
33   - public SortedSet<String> statsItems;
  36 +
  37 + public List<SubComponentDistance> distances;
34 38  
35 39 /** The sort type. */
36 40 public String sortType;
... ... @@ -59,6 +63,8 @@ public class MtasSolrMtasResult implements Serializable {
59 63 /** The sub stats items. */
60 64 private SortedSet<String>[] subStatsItems;
61 65  
  66 + private List<SubComponentDistance>[] subDistances;
  67 +
62 68 /** The sub sort type. */
63 69 private String[] subSortType;
64 70  
... ... @@ -74,27 +80,36 @@ public class MtasSolrMtasResult implements Serializable {
74 80 /**
75 81 * Instantiates a new mtas solr mtas result.
76 82 *
77   - * @param dataCollector the data collector
78   - * @param dataType the data type
79   - * @param statsType the stats type
80   - * @param statsItems the stats items
81   - * @param sortType the sort type
82   - * @param sortDirection the sort direction
83   - * @param start the start
84   - * @param number the number
85   - * @param functionData the function data
  83 + * @param dataCollector
  84 + * the data collector
  85 + * @param dataType
  86 + * the data type
  87 + * @param statsType
  88 + * the stats type
  89 + * @param statsItems
  90 + * the stats items
  91 + * @param sortType
  92 + * the sort type
  93 + * @param sortDirection
  94 + * the sort direction
  95 + * @param start
  96 + * the start
  97 + * @param number
  98 + * the number
  99 + * @param functionData
  100 + * the function data
86 101 */
87 102 @SuppressWarnings("unchecked")
88 103 public MtasSolrMtasResult(MtasDataCollector<?, ?> dataCollector,
89 104 String[] dataType, String[] statsType, SortedSet<String>[] statsItems,
90   - String[] sortType, String[] sortDirection, Integer[] start,
91   - Integer[] number,
  105 + List<SubComponentDistance>[] distances, String[] sortType, String[] sortDirection,
  106 + Integer[] start, Integer[] number,
92 107 Map<MtasDataCollector<?, ?>, HashMap<String, MtasSolrMtasResult>> functionData) {
93 108 this.dataCollector = dataCollector;
94 109 this.functionData = functionData;
95 110 this.dataType = (dataType == null) ? null : dataType[0];
96 111 this.statsType = (statsType == null) ? null : statsType[0];
97   - this.statsItems = (statsItems == null) ? null : statsItems[0];
  112 + this.distances = (distances==null) ? null : distances[0];
98 113 this.sortType = (sortType == null) ? null : sortType[0];
99 114 this.sortDirection = (sortDirection == null) ? null : sortDirection[0];
100 115 this.start = (start == null) ? null : start[0];
... ... @@ -105,11 +120,13 @@ public class MtasSolrMtasResult implements Serializable {
105 120 subDataType = new String[dataType.length - 1];
106 121 subStatsType = new String[dataType.length - 1];
107 122 subStatsItems = new TreeSet[dataType.length - 1];
  123 + subDistances = new List[dataType.length - 1];
108 124 subSortType = new String[dataType.length - 1];
109 125 subSortDirection = new String[dataType.length - 1];
110 126 System.arraycopy(dataType, 1, subDataType, 0, dataType.length - 1);
111 127 System.arraycopy(statsType, 1, subStatsType, 0, dataType.length - 1);
112 128 System.arraycopy(statsItems, 1, subStatsItems, 0, dataType.length - 1);
  129 + System.arraycopy(distances, 1, subDistances, 0, dataType.length - 1);
113 130 System.arraycopy(sortType, 1, subSortType, 0, dataType.length - 1);
114 131 System.arraycopy(sortDirection, 1, subSortDirection, 0,
115 132 dataType.length - 1);
... ... @@ -117,6 +134,7 @@ public class MtasSolrMtasResult implements Serializable {
117 134 subDataType = null;
118 135 subStatsType = null;
119 136 subStatsItems = null;
  137 + subDistances = null;
120 138 subSortType = null;
121 139 subSortDirection = null;
122 140 }
... ... @@ -125,27 +143,35 @@ public class MtasSolrMtasResult implements Serializable {
125 143 /**
126 144 * Instantiates a new mtas solr mtas result.
127 145 *
128   - * @param dataCollector the data collector
129   - * @param dataType the data type
130   - * @param statsType the stats type
131   - * @param statsItems the stats items
132   - * @param functionData the function data
  146 + * @param dataCollector
  147 + * the data collector
  148 + * @param dataType
  149 + * the data type
  150 + * @param statsType
  151 + * the stats type
  152 + * @param statsItems
  153 + * the stats items
  154 + * @param functionData
  155 + * the function data
133 156 */
134 157 @SuppressWarnings("unchecked")
135 158 public MtasSolrMtasResult(MtasDataCollector<?, ?> dataCollector,
136 159 String dataType, String statsType, SortedSet<String> statsItems,
  160 + List<SubComponentDistance> distance,
137 161 Map<MtasDataCollector<?, ?>, HashMap<String, MtasSolrMtasResult>> functionData) {
138 162 this(dataCollector, new String[] { dataType }, new String[] { statsType },
139   - new SortedSet[] { statsItems }, new String[] { null },
140   - new String[] { null }, new Integer[] { 0 }, new Integer[] { 1 },
141   - functionData);
  163 + new SortedSet[] { statsItems }, new List[] { distance },
  164 + new String[] { null }, new String[] { null }, new Integer[] { 0 },
  165 + new Integer[] { 1 }, functionData);
142 166 }
143 167  
144 168 /**
145 169 * Merge.
146 170 *
147   - * @param newItem the new item
148   - * @throws IOException Signals that an I/O exception has occurred.
  171 + * @param newItem
  172 + * the new item
  173 + * @throws IOException
  174 + * Signals that an I/O exception has occurred.
149 175 */
150 176 void merge(MtasSolrMtasResult newItem) throws IOException {
151 177 HashMap<MtasDataCollector<?, ?>, MtasDataCollector<?, ?>> map = new HashMap<>();
... ... @@ -190,9 +216,11 @@ public class MtasSolrMtasResult implements Serializable {
190 216 /**
191 217 * Gets the data.
192 218 *
193   - * @param showDebugInfo the show debug info
  219 + * @param showDebugInfo
  220 + * the show debug info
194 221 * @return the data
195   - * @throws IOException Signals that an I/O exception has occurred.
  222 + * @throws IOException
  223 + * Signals that an I/O exception has occurred.
196 224 */
197 225 NamedList<Object> getData(boolean showDebugInfo) throws IOException {
198 226 if (dataCollector.getCollectorType()
... ... @@ -226,7 +254,7 @@ public class MtasSolrMtasResult implements Serializable {
226 254 }
227 255 if ((subDataType != null) && (dataItem.getSub() != null)) {
228 256 MtasSolrMtasResult css = new MtasSolrMtasResult(dataItem.getSub(),
229   - subDataType, subStatsType, subStatsItems, subSortType,
  257 + subDataType, subStatsType, subStatsItems, subDistances, subSortType,
230 258 subSortDirection, subStart, subNumber, functionData);
231 259 if (dataItem.getSub().getCollectorType()
232 260 .equals(DataCollector.COLLECTOR_TYPE_LIST)) {
... ... @@ -250,7 +278,8 @@ public class MtasSolrMtasResult implements Serializable {
250 278 * Gets the key list.
251 279 *
252 280 * @return the key list
253   - * @throws IOException Signals that an I/O exception has occurred.
  281 + * @throws IOException
  282 + * Signals that an I/O exception has occurred.
254 283 */
255 284 public Set<String> getKeyList() throws IOException {
256 285 if (dataCollector.getCollectorType()
... ... @@ -266,7 +295,8 @@ public class MtasSolrMtasResult implements Serializable {
266 295 * Gets the full key list.
267 296 *
268 297 * @return the full key list
269   - * @throws IOException Signals that an I/O exception has occurred.
  298 + * @throws IOException
  299 + * Signals that an I/O exception has occurred.
270 300 */
271 301 public Set<String> getFullKeyList() throws IOException {
272 302 if (dataCollector.getCollectorType()
... ... @@ -281,9 +311,11 @@ public class MtasSolrMtasResult implements Serializable {
281 311 /**
282 312 * Gets the named list.
283 313 *
284   - * @param showDebugInfo the show debug info
  314 + * @param showDebugInfo
  315 + * the show debug info
285 316 * @return the named list
286   - * @throws IOException Signals that an I/O exception has occurred.
  317 + * @throws IOException
  318 + * Signals that an I/O exception has occurred.
287 319 */
288 320 NamedList<Object> getNamedList(boolean showDebugInfo) throws IOException {
289 321 if (dataCollector.getCollectorType()
... ... @@ -323,6 +355,13 @@ public class MtasSolrMtasResult implements Serializable {
323 355 for (Entry<String, ?> entry : dataList.entrySet()) {
324 356 SimpleOrderedMap<Object> mtasResponseListItem = new SimpleOrderedMap<>();
325 357 MtasDataItem<?, ?> dataItem = (MtasDataItem<?, ?>) entry.getValue();
  358 + if(this.distances!=null && !this.distances.isEmpty()) {
  359 + SimpleOrderedMap<Object> mtasResponseListItemDistance = new SimpleOrderedMap<>();
  360 + for(SubComponentDistance item : this.distances) {
  361 + mtasResponseListItemDistance.add(item.key, item.getDistance().compute(entry.getKey()));
  362 + }
  363 + mtasResponseListItem.add(Distance.NAME, mtasResponseListItemDistance);
  364 + }
326 365 mtasResponseListItem.addAll(dataItem.rewrite(showDebugInfo));
327 366 if (functionList.containsKey(entry.getKey())) {
328 367 mtasResponseListItem.add("functions",
... ... @@ -330,7 +369,7 @@ public class MtasSolrMtasResult implements Serializable {
330 369 }
331 370 if ((subDataType != null) && (dataItem.getSub() != null)) {
332 371 MtasSolrMtasResult css = new MtasSolrMtasResult(dataItem.getSub(),
333   - subDataType, subStatsType, subStatsItems, subSortType,
  372 + subDataType, subStatsType, subStatsItems, subDistances, subSortType,
334 373 subSortDirection, subStart, subNumber, functionData);
335 374 if (dataItem.getSub().getCollectorType()
336 375 .equals(DataCollector.COLLECTOR_TYPE_LIST)) {
... ... @@ -380,7 +419,8 @@ public class MtasSolrMtasResult implements Serializable {
380 419 * Gets the result.
381 420 *
382 421 * @return the result
383   - * @throws IOException Signals that an I/O exception has occurred.
  422 + * @throws IOException
  423 + * Signals that an I/O exception has occurred.
384 424 */
385 425 public MtasDataCollectorResult getResult() throws IOException {
386 426 return dataCollector.getResult();
... ...
src/site/markdown/download.md.vm
... ... @@ -30,7 +30,7 @@
30 30 #end
31 31 <tr>
32 32 <td>${fullversion}</td>
33   - <td>${majorversion}.${minorversion}.${incrementalversion}</td>
  33 + <td><a href='http://archive.apache.org/dist/lucene/solr/${majorversion}.${minorversion}.${incrementalversion}/'>${majorversion}.${minorversion}.${incrementalversion}</a></td>
34 34 <td>${mtasversion}</td>
35 35 <td><a href='https://github.com/meertensinstituut/mtas/releases/download/v${fullversion}/mtas-${fullversion}.jar'>Binary (jar)</a></td>
36 36 <td><a href='https://github.com/meertensinstituut/mtas/archive/v${fullversion}.tar.gz'>Source (tgz)</a></td>
... ... @@ -72,7 +72,7 @@
72 72 #end
73 73 <tr>
74 74 <td>${fullversion}</td>
75   - <td>${majorversion}.${minorversion}.${incrementalversion}</td>
  75 + <td><a href='http://archive.apache.org/dist/lucene/solr/${majorversion}.${minorversion}.${incrementalversion}/'>${majorversion}.${minorversion}.${incrementalversion}</a></td>
76 76 <td>${mtasversion}</td>
77 77 <td><a href='https://github.com/meertensinstituut/mtas/releases/download/v${fullversion}/mtas-${fullversion}.jar'>Binary (jar)</a></td>
78 78 <td><a href='https://github.com/meertensinstituut/mtas/archive/v${fullversion}.tar.gz'>Source (tgz)</a></td>
... ... @@ -87,4 +87,4 @@
87 87 </tbody>
88 88 </table>
89 89  
90   -See also the [Maven Central Repository](http://central.maven.org/maven2/nl/knaw/meertens/mtas/mtas/) and [GitHub Releases](https://github.com/meertensinstituut/mtas/releases).
  90 +See also the [Maven Central Repository](https://mvnrepository.com/artifact/nl.knaw.meertens.mtas/mtas) and [GitHub Releases](https://github.com/meertensinstituut/mtas/releases).
... ...
src/test/java/mtas/search/MtasSearchTestConsistency.java
... ... @@ -96,20 +96,20 @@ public class MtasSearchTestConsistency {
96 96 @org.junit.BeforeClass
97 97 public static void initialize() {
98 98 try {
99   - Path dataPath = Paths.get("src"+File.separator+"test"+File.separator+"resources"+File.separator+"data");
  99 + Path dataPath = Paths.get("src" + File.separator + "test" + File.separator
  100 + + "resources" + File.separator + "data");
100 101 // directory = FSDirectory.open(Paths.get("testindexMtas"));
101 102 directory = new RAMDirectory();
102 103 files = new HashMap<>();
103 104 files.put("Een onaangenaam mens in de Haarlemmerhout",
104   - dataPath.resolve("resources").resolve("beets1.xml.gz").toAbsolutePath()
105   - .toString());
  105 + dataPath.resolve("resources").resolve("beets1.xml.gz")
  106 + .toAbsolutePath().toString());
106 107 files.put("Een oude kennis", dataPath.resolve("resources")
107 108 .resolve("beets2.xml.gz").toAbsolutePath().toString());
108 109 files.put("Varen en Rijden", dataPath.resolve("resources")
109 110 .resolve("beets3.xml.gz").toAbsolutePath().toString());
110   - createIndex(
111   - dataPath.resolve("conf").resolve("folia.xml").toAbsolutePath().toString(),
112   - files);
  111 + createIndex(dataPath.resolve("conf").resolve("folia.xml").toAbsolutePath()
  112 + .toString(), files);
113 113 docs = getLiveDocs(DirectoryReader.open(directory));
114 114 } catch (IOException e) {
115 115 log.error(e);
... ... @@ -119,7 +119,8 @@ public class MtasSearchTestConsistency {
119 119 /**
120 120 * Basic search number of words.
121 121 *
122   - * @throws IOException Signals that an I/O exception has occurred.
  122 + * @throws IOException
  123 + * Signals that an I/O exception has occurred.
123 124 */
124 125 @org.junit.Test
125 126 public void basicSearchNumberOfWords() throws IOException {
... ... @@ -132,7 +133,8 @@ public class MtasSearchTestConsistency {
132 133 /**
133 134 * Basic search start sentence 1.
134 135 *
135   - * @throws IOException Signals that an I/O exception has occurred.
  136 + * @throws IOException
  137 + * Signals that an I/O exception has occurred.
136 138 */
137 139 @org.junit.Test
138 140 public void basicSearchStartSentence1() throws IOException {
... ... @@ -145,7 +147,8 @@ public class MtasSearchTestConsistency {
145 147 /**
146 148 * Basic search start sentence 2.
147 149 *
148   - * @throws IOException Signals that an I/O exception has occurred.
  150 + * @throws IOException
  151 + * Signals that an I/O exception has occurred.
149 152 */
150 153 @org.junit.Test
151 154 public void basicSearchStartSentence2() throws IOException {
... ... @@ -158,7 +161,8 @@ public class MtasSearchTestConsistency {
158 161 /**
159 162 * Basic search intersecting 1.
160 163 *
161   - * @throws IOException Signals that an I/O exception has occurred.
  164 + * @throws IOException
  165 + * Signals that an I/O exception has occurred.
162 166 */
163 167 @org.junit.Test
164 168 public void basicSearchIntersecting1() throws IOException {
... ... @@ -172,7 +176,8 @@ public class MtasSearchTestConsistency {
172 176 /**
173 177 * Basic search intersecting 2.
174 178 *
175   - * @throws IOException Signals that an I/O exception has occurred.
  179 + * @throws IOException
  180 + * Signals that an I/O exception has occurred.
176 181 */
177 182 @org.junit.Test
178 183 public void basicSearchIntersecting2() throws IOException {
... ... @@ -193,7 +198,8 @@ public class MtasSearchTestConsistency {
193 198 /**
194 199 * Basic search ignore.
195 200 *
196   - * @throws IOException Signals that an I/O exception has occurred.
  201 + * @throws IOException
  202 + * Signals that an I/O exception has occurred.
197 203 */
198 204 @org.junit.Test
199 205 public void basicSearchIgnore() throws IOException {
... ... @@ -231,7 +237,8 @@ public class MtasSearchTestConsistency {
231 237 /**
232 238 * Basic search sequence.
233 239 *
234   - * @throws IOException Signals that an I/O exception has occurred.
  240 + * @throws IOException
  241 + * Signals that an I/O exception has occurred.
235 242 */
236 243 @org.junit.Test
237 244 public void basicSearchSequence() throws IOException {
... ... @@ -280,7 +287,8 @@ public class MtasSearchTestConsistency {
280 287 /**
281 288 * Basic search within 1.
282 289 *
283   - * @throws IOException Signals that an I/O exception has occurred.
  290 + * @throws IOException
  291 + * Signals that an I/O exception has occurred.
284 292 */
285 293 @org.junit.Test
286 294 public void basicSearchWithin1() throws IOException {
... ... @@ -293,7 +301,8 @@ public class MtasSearchTestConsistency {
293 301 /**
294 302 * Basic search within 2.
295 303 *
296   - * @throws IOException Signals that an I/O exception has occurred.
  304 + * @throws IOException
  305 + * Signals that an I/O exception has occurred.
297 306 */
298 307 @org.junit.Test
299 308 public void basicSearchWithin2() throws IOException {
... ... @@ -332,7 +341,8 @@ public class MtasSearchTestConsistency {
332 341 /**
333 342 * Basic search within 3.
334 343 *
335   - * @throws IOException Signals that an I/O exception has occurred.
  344 + * @throws IOException
  345 + * Signals that an I/O exception has occurred.
336 346 */
337 347 @org.junit.Test
338 348 public void basicSearchWithin3() throws IOException {
... ... @@ -376,7 +386,8 @@ public class MtasSearchTestConsistency {
376 386 /**
377 387 * Basic search within 4.
378 388 *
379   - * @throws IOException Signals that an I/O exception has occurred.
  389 + * @throws IOException
  390 + * Signals that an I/O exception has occurred.
380 391 */
381 392 @org.junit.Test
382 393 public void basicSearchWithin4() throws IOException {
... ... @@ -396,7 +407,8 @@ public class MtasSearchTestConsistency {
396 407 /**
397 408 * Basic search within 5.
398 409 *
399   - * @throws IOException Signals that an I/O exception has occurred.
  410 + * @throws IOException
  411 + * Signals that an I/O exception has occurred.
400 412 */
401 413 @org.junit.Test
402 414 public void basicSearchWithin5() throws IOException {
... ... @@ -416,7 +428,8 @@ public class MtasSearchTestConsistency {
416 428 /**
417 429 * Basic search containing 1.
418 430 *
419   - * @throws IOException Signals that an I/O exception has occurred.
  431 + * @throws IOException
  432 + * Signals that an I/O exception has occurred.
420 433 */
421 434 @org.junit.Test
422 435 public void basicSearchContaining1() throws IOException {
... ... @@ -430,7 +443,8 @@ public class MtasSearchTestConsistency {
430 443 /**
431 444 * Basic search containing 2.
432 445 *
433   - * @throws IOException Signals that an I/O exception has occurred.
  446 + * @throws IOException
  447 + * Signals that an I/O exception has occurred.
434 448 */
435 449 @org.junit.Test
436 450 public void basicSearchContaining2() throws IOException {
... ... @@ -478,7 +492,8 @@ public class MtasSearchTestConsistency {
478 492 /**
479 493 * Basic search containing 3.
480 494 *
481   - * @throws IOException Signals that an I/O exception has occurred.
  495 + * @throws IOException
  496 + * Signals that an I/O exception has occurred.
482 497 */
483 498 @org.junit.Test
484 499 public void basicSearchContaining3() throws IOException {
... ... @@ -498,7 +513,8 @@ public class MtasSearchTestConsistency {
498 513 /**
499 514 * Basic search containing 4.
500 515 *
501   - * @throws IOException Signals that an I/O exception has occurred.
  516 + * @throws IOException
  517 + * Signals that an I/O exception has occurred.
502 518 */
503 519 @org.junit.Test
504 520 public void basicSearchContaining4() throws IOException {
... ... @@ -519,7 +535,8 @@ public class MtasSearchTestConsistency {
519 535 /**
520 536 * Basic search followed by 1.
521 537 *
522   - * @throws IOException Signals that an I/O exception has occurred.
  538 + * @throws IOException
  539 + * Signals that an I/O exception has occurred.
523 540 */
524 541 @org.junit.Test
525 542 public void basicSearchFollowedBy1() throws IOException {
... ... @@ -555,7 +572,8 @@ public class MtasSearchTestConsistency {
555 572 /**
556 573 * Basic search followed by 2.
557 574 *
558   - * @throws IOException Signals that an I/O exception has occurred.
  575 + * @throws IOException
  576 + * Signals that an I/O exception has occurred.
559 577 */
560 578 @org.junit.Test
561 579 public void basicSearchFollowedBy2() throws IOException {
... ... @@ -591,7 +609,8 @@ public class MtasSearchTestConsistency {
591 609 /**
592 610 * Basic search preceded by 1.
593 611 *
594   - * @throws IOException Signals that an I/O exception has occurred.
  612 + * @throws IOException
  613 + * Signals that an I/O exception has occurred.
595 614 */
596 615 @org.junit.Test
597 616 public void basicSearchPrecededBy1() throws IOException {
... ... @@ -629,7 +648,8 @@ public class MtasSearchTestConsistency {
629 648 /**
630 649 * Basic search preceded by 2.
631 650 *
632   - * @throws IOException Signals that an I/O exception has occurred.
  651 + * @throws IOException
  652 + * Signals that an I/O exception has occurred.
633 653 */
634 654 @org.junit.Test
635 655 public void basicSearchPrecededBy2() throws IOException {
... ... @@ -659,7 +679,8 @@ public class MtasSearchTestConsistency {
659 679 /**
660 680 * Basic search fully aligned with 1.
661 681 *
662   - * @throws IOException Signals that an I/O exception has occurred.
  682 + * @throws IOException
  683 + * Signals that an I/O exception has occurred.
663 684 */
664 685 @org.junit.Test
665 686 public void basicSearchFullyAlignedWith1() throws IOException {
... ... @@ -729,7 +750,8 @@ public class MtasSearchTestConsistency {
729 750 /**
730 751 * Basic search fully aligned with 2.
731 752 *
732   - * @throws IOException Signals that an I/O exception has occurred.
  753 + * @throws IOException
  754 + * Signals that an I/O exception has occurred.
733 755 */
734 756 @org.junit.Test
735 757 public void basicSearchFullyAlignedWith2() throws IOException {
... ... @@ -750,7 +772,8 @@ public class MtasSearchTestConsistency {
750 772 /**
751 773 * Collect stats positions 1.
752 774 *
753   - * @throws IOException Signals that an I/O exception has occurred.
  775 + * @throws IOException
  776 + * Signals that an I/O exception has occurred.
754 777 */
755 778 @org.junit.Test
756 779 public void collectStatsPositions1() throws IOException {
... ... @@ -795,7 +818,8 @@ public class MtasSearchTestConsistency {
795 818 /**
796 819 * Collect stats positions 2.
797 820 *
798   - * @throws IOException Signals that an I/O exception has occurred.
  821 + * @throws IOException
  822 + * Signals that an I/O exception has occurred.
799 823 */
800 824 @org.junit.Test
801 825 public void collectStatsPositions2() throws IOException {
... ... @@ -858,7 +882,8 @@ public class MtasSearchTestConsistency {
858 882 /**
859 883 * Collect stats tokens.
860 884 *
861   - * @throws IOException Signals that an I/O exception has occurred.
  885 + * @throws IOException
  886 + * Signals that an I/O exception has occurred.
862 887 */
863 888 @org.junit.Test
864 889 public void collectStatsTokens() throws IOException {
... ... @@ -921,7 +946,8 @@ public class MtasSearchTestConsistency {
921 946 /**
922 947 * Collect stats spans 1.
923 948 *
924   - * @throws IOException Signals that an I/O exception has occurred.
  949 + * @throws IOException
  950 + * Signals that an I/O exception has occurred.
925 951 */
926 952 @org.junit.Test
927 953 public void collectStatsSpans1() throws IOException {
... ... @@ -1038,7 +1064,8 @@ public class MtasSearchTestConsistency {
1038 1064 /**
1039 1065 * Collect group.
1040 1066 *
1041   - * @throws IOException Signals that an I/O exception has occurred.
  1067 + * @throws IOException
  1068 + * Signals that an I/O exception has occurred.
1042 1069 */
1043 1070 @org.junit.Test
1044 1071 public void collectGroup() throws IOException {
... ... @@ -1052,8 +1079,8 @@ public class MtasSearchTestConsistency {
1052 1079 fieldStats.statsSpanList.add(new ComponentSpan(new MtasSpanQuery[] { q },
1053 1080 "total", null, null, "sum", null, null, null));
1054 1081 fieldStats.groupList.add(new ComponentGroup(q, "articles",
1055   - Integer.MAX_VALUE, 0, "t_lc", null, null, null, null, null, null, null,
1056   - null, null, null, null, null));
  1082 + Integer.MAX_VALUE, 0, "t_lc", null, null, null, null, null, null,
  1083 + null, null, null, null, null, null));
1057 1084 HashMap<String, HashMap<String, Object>> response = doAdvancedSearch(
1058 1085 fullDocSet, fieldStats);
1059 1086 ArrayList<HashMap<String, Object>> list = (ArrayList<HashMap<String, Object>>) response
... ... @@ -1088,7 +1115,8 @@ public class MtasSearchTestConsistency {
1088 1115 /**
1089 1116 * Collect termvector.
1090 1117 *
1091   - * @throws IOException Signals that an I/O exception has occurred.
  1118 + * @throws IOException
  1119 + * Signals that an I/O exception has occurred.
1092 1120 */
1093 1121 @org.junit.Test
1094 1122 public void collectTermvector() throws IOException {
... ... @@ -1101,13 +1129,14 @@ public class MtasSearchTestConsistency {
1101 1129 fieldStats.statsPositionList
1102 1130 .add(new ComponentPosition("total", null, null, "sum"));
1103 1131 fieldStats.termVectorList
1104   - .add(new ComponentTermVector("toplist", prefix, null, false, "sum",
1105   - CodecUtil.STATS_TYPE_SUM, CodecUtil.SORT_DESC, null, number, null,
1106   - null, null, null, null, null, prefix, null, null));
1107   - fieldStats.termVectorList.add(new ComponentTermVector("fulllist", prefix,
1108   - null, true, "sum", CodecUtil.STATS_TYPE_SUM, CodecUtil.SORT_DESC,
1109   - null, Integer.MAX_VALUE, null, null, null, null, null, null, prefix,
1110   - null, null));
  1132 + .add(new ComponentTermVector("toplist", prefix, null, null, null, null, null, null, false,
  1133 + "sum", CodecUtil.STATS_TYPE_SUM, CodecUtil.SORT_DESC, null,
  1134 + number, null, null, null, null, null, null, prefix, null, null));
  1135 + fieldStats.termVectorList
  1136 + .add(new ComponentTermVector("fulllist", prefix, null, null, null,
  1137 + null, null, null, true, "sum", CodecUtil.STATS_TYPE_SUM,
  1138 + CodecUtil.SORT_DESC, null, Integer.MAX_VALUE, null, null, null,
  1139 + null, null, null, prefix, null, null));
1111 1140 HashMap<String, HashMap<String, Object>> response = doAdvancedSearch(
1112 1141 fullDocSet, fieldStats);
1113 1142 HashMap<String, Object> responseTotal = (HashMap<String, Object>) response
... ... @@ -1159,8 +1188,10 @@ public class MtasSearchTestConsistency {
1159 1188 /**
1160 1189 * Do advanced search.
1161 1190 *
1162   - * @param fullDocSet the full doc set
1163   - * @param fieldStats the field stats
  1191 + * @param fullDocSet
  1192 + * the full doc set
  1193 + * @param fieldStats
  1194 + * the field stats
1164 1195 * @return the hash map
1165 1196 */
1166 1197 private HashMap<String, HashMap<String, Object>> doAdvancedSearch(
... ... @@ -1233,9 +1264,12 @@ public class MtasSearchTestConsistency {
1233 1264 /**
1234 1265 * Creates the index.
1235 1266 *
1236   - * @param configFile the config file
1237   - * @param files the files
1238   - * @throws IOException Signals that an I/O exception has occurred.
  1267 + * @param configFile
  1268 + * the config file
  1269 + * @param files
  1270 + * the files
  1271 + * @throws IOException
  1272 + * Signals that an I/O exception has occurred.
1239 1273 */
1240 1274 private static void createIndex(String configFile,
1241 1275 HashMap<String, String> files) throws IOException {
... ... @@ -1283,11 +1317,16 @@ public class MtasSearchTestConsistency {
1283 1317 /**
1284 1318 * Adds the doc.
1285 1319 *
1286   - * @param w the w
1287   - * @param id the id
1288   - * @param title the title
1289   - * @param file the file
1290   - * @throws IOException Signals that an I/O exception has occurred.
  1320 + * @param w
  1321 + * the w
  1322 + * @param id
  1323 + * the id
  1324 + * @param title
  1325 + * the title
  1326 + * @param file
  1327 + * the file
  1328 + * @throws IOException
  1329 + * Signals that an I/O exception has occurred.
1291 1330 */
1292 1331 private static void addDoc(IndexWriter w, Integer id, String title,
1293 1332 String file) throws IOException {
... ... @@ -1305,7 +1344,8 @@ public class MtasSearchTestConsistency {
1305 1344 /**
1306 1345 * Gets the live docs.
1307 1346 *
1308   - * @param indexReader the index reader
  1347 + * @param indexReader
  1348 + * the index reader
1309 1349 * @return the live docs
1310 1350 */
1311 1351 private static ArrayList<Integer> getLiveDocs(IndexReader indexReader) {
... ... @@ -1327,13 +1367,19 @@ public class MtasSearchTestConsistency {
1327 1367 /**
1328 1368 * Creates the query.
1329 1369 *
1330   - * @param field the field
1331   - * @param cql the cql
1332   - * @param ignore the ignore
1333   - * @param maximumIgnoreLength the maximum ignore length
1334   - * @param disableTwoPhaseIterator the disable two phase iterator
  1370 + * @param field
  1371 + * the field
  1372 + * @param cql
  1373 + * the cql
  1374 + * @param ignore
  1375 + * the ignore
  1376 + * @param maximumIgnoreLength
  1377 + * the maximum ignore length
  1378 + * @param disableTwoPhaseIterator
  1379 + * the disable two phase iterator
1335 1380 * @return the mtas span query
1336   - * @throws ParseException the parse exception
  1381 + * @throws ParseException
  1382 + * the parse exception
1337 1383 */
1338 1384 private MtasSpanQuery createQuery(String field, String cql,
1339 1385 MtasSpanQuery ignore, Integer maximumIgnoreLength,
... ... @@ -1351,15 +1397,23 @@ public class MtasSearchTestConsistency {
1351 1397 /**
1352 1398 * Do query.
1353 1399 *
1354   - * @param indexReader the index reader
1355   - * @param field the field
1356   - * @param cql the cql
1357   - * @param ignore the ignore
1358   - * @param maximumIgnoreLength the maximum ignore length
1359   - * @param prefixes the prefixes
1360   - * @param disableTwoPhaseIterator the disable two phase iterator
  1400 + * @param indexReader
  1401 + * the index reader
  1402 + * @param field
  1403 + * the field
  1404 + * @param cql
  1405 + * the cql
  1406 + * @param ignore
  1407 + * the ignore
  1408 + * @param maximumIgnoreLength
  1409 + * the maximum ignore length
  1410 + * @param prefixes
  1411 + * the prefixes
  1412 + * @param disableTwoPhaseIterator
  1413 + * the disable two phase iterator
1361 1414 * @return the query result
1362   - * @throws IOException Signals that an I/O exception has occurred.
  1415 + * @throws IOException
  1416 + * Signals that an I/O exception has occurred.
1363 1417 */
1364 1418 private QueryResult doQuery(IndexReader indexReader, String field, String cql,
1365 1419 MtasSpanQuery ignore, Integer maximumIgnoreLength,
... ... @@ -1379,12 +1433,17 @@ public class MtasSearchTestConsistency {
1379 1433 /**
1380 1434 * Do query.
1381 1435 *
1382   - * @param indexReader the index reader
1383   - * @param field the field
1384   - * @param q the q
1385   - * @param prefixes the prefixes
  1436 + * @param indexReader
  1437 + * the index reader
  1438 + * @param field
  1439 + * the field
  1440 + * @param q
  1441 + * the q
  1442 + * @param prefixes
  1443 + * the prefixes
1386 1444 * @return the query result
1387   - * @throws IOException Signals that an I/O exception has occurred.
  1445 + * @throws IOException
  1446 + * Signals that an I/O exception has occurred.
1388 1447 */
1389 1448 private QueryResult doQuery(IndexReader indexReader, String field,
1390 1449 MtasSpanQuery q, ArrayList<String> prefixes) throws IOException {
... ... @@ -1393,8 +1452,8 @@ public class MtasSearchTestConsistency {
1393 1452 .listIterator();
1394 1453 IndexSearcher searcher = new IndexSearcher(indexReader);
1395 1454 final float boost = 0;
1396   - SpanWeight spanweight = q.rewrite(indexReader).createWeight(searcher,
1397   - false, boost);
  1455 + SpanWeight spanweight = q.rewrite(indexReader).createWeight(searcher, false,
  1456 + boost);
1398 1457  
1399 1458 while (iterator.hasNext()) {
1400 1459 LeafReaderContext lrc = iterator.next();
... ... @@ -1431,11 +1490,16 @@ public class MtasSearchTestConsistency {
1431 1490 /**
1432 1491 * Test number of hits.
1433 1492 *
1434   - * @param indexReader the index reader
1435   - * @param field the field
1436   - * @param cqls1 the cqls 1
1437   - * @param cqls2 the cqls 2
1438   - * @throws IOException Signals that an I/O exception has occurred.
  1493 + * @param indexReader
  1494 + * the index reader
  1495 + * @param field
  1496 + * the field
  1497 + * @param cqls1
  1498 + * the cqls 1
  1499 + * @param cqls2
  1500 + * the cqls 2
  1501 + * @throws IOException
  1502 + * Signals that an I/O exception has occurred.
1439 1503 */
1440 1504 private void testNumberOfHits(IndexReader indexReader, String field,
1441 1505 List<String> cqls1, List<String> cqls2) throws IOException {
... ... @@ -1545,11 +1609,16 @@ public class MtasSearchTestConsistency {
1545 1609 /**
1546 1610 * Instantiates a new query hit.
1547 1611 *
1548   - * @param docId the doc id
1549   - * @param startPosition the start position
1550   - * @param endPosition the end position
1551   - * @param prefix the prefix
1552   - * @param value the value
  1612 + * @param docId
  1613 + * the doc id
  1614 + * @param startPosition
  1615 + * the start position
  1616 + * @param endPosition
  1617 + * the end position
  1618 + * @param prefix
  1619 + * the prefix
  1620 + * @param value
  1621 + * the value
1553 1622 */
1554 1623 protected QueryHit(int docId, int startPosition, int endPosition,
1555 1624 String prefix, String value) {
... ...