Commit 3e66a04a2eab4cfe1403a004f6e0e160134c9b49

Authored by Matthijs Brouwer
1 parent c3c71090

fix rewrite

docker/Dockerfile
1 1 # Automatically generated Dockerfile
2   -# - Build 2017-01-13 14:47
  2 +# - Build 2017-01-23 14:37
3 3 # - Lucene/Solr version 6.3.0
4 4 # - Mtas release 20170110
5 5 #
... ... @@ -55,7 +55,7 @@ RUN apt-get update && apt-get install -y lsof software-properties-common python-
55 55 && chmod -R 755 /var/www/html \
56 56 && printf "echo\n" >> /start.sh \
57 57 && printf "echo \"================ Mtas -- Multi Tier Annotation Search =================\"\n" >> /start.sh \
58   -&& printf "echo \" Timestamp 2017-01-13 14:47\"\n" >> /start.sh \
  58 +&& printf "echo \" Timestamp 2017-01-23 14:37\"\n" >> /start.sh \
59 59 && printf "echo \" Lucene/Solr version 6.3.0\"\n" >> /start.sh \
60 60 && printf "echo \" Mtas release 20170110\"\n" >> /start.sh \
61 61 && printf "echo \" See https://meertensinstituut.github.io/mtas/ for more information\"\n" >> /start.sh \
... ...
src/mtas/codec/util/CodecCollector.java
... ... @@ -1111,8 +1111,9 @@ public class CodecCollector {
1111 1111 // try to call functionParser as little as possible
1112 1112 if (span.statsType.equals(CodecUtil.STATS_BASIC)
1113 1113 && (span.minimumLong == null) && (span.maximumLong == null)
1114   - && (span.functions == null || (span.functionSumRule()
1115   - && !span.functionNeedPositions()))) {
  1114 + && (span.functions == null
  1115 + || (span.functionBasic() && span.functionSumRule()
  1116 + && !span.functionNeedPositions()))) {
1116 1117 // initialise
1117 1118 int length = span.parser.needArgumentsNumber();
1118 1119 long[] valueSum = new long[length];
... ... @@ -2335,11 +2336,26 @@ public class CodecCollector {
2335 2336 HashMap<Integer, long[]> args = computeArguments(spansNumberData,
2336 2337 cf.spanQueries, docSet);
2337 2338 if (cf.baseDataTypes[level].equals(CodecUtil.DATA_TYPE_LONG)) {
2338   - // sumrule
  2339 + // check functions
  2340 + boolean applySumRule = false;
2339 2341 if (cf.baseStatsTypes[level].equals(CodecUtil.STATS_BASIC)
2340 2342 && cf.baseParsers[level].sumRule()
2341 2343 && (cf.baseMinimumLongs[level] == null)
2342 2344 && (cf.baseMaximumLongs[level] == null)) {
  2345 + applySumRule = true;
  2346 + if (cf.baseFunctionList[level].get(dataCollector) != null) {
  2347 + for (SubComponentFunction function : cf.baseFunctionList[level]
  2348 + .get(dataCollector)) {
  2349 + if (!function.statsType.equals(CodecUtil.STATS_BASIC)
  2350 + || !function.parserFunction.sumRule()
  2351 + || function.parserFunction.needPositions()) {
  2352 + applySumRule = false;
  2353 + break;
  2354 + }
  2355 + }
  2356 + }
  2357 + }
  2358 + if (applySumRule) {
2343 2359 for (String key : groupedKeys.values()) {
2344 2360 if (docLists.get(key).length > 0) {
2345 2361 // initialise
... ... @@ -2695,127 +2711,142 @@ public class CodecCollector {
2695 2711 }
2696 2712  
2697 2713 for (CompiledAutomaton compiledAutomaton : listAutomata) {
2698   - termsEnum = t.intersect(compiledAutomaton, null);
2699   - int initSize = Math.min((int) t.size(), 1000);
2700   - termVector.subComponentFunction.dataCollector.initNewList(initSize,
2701   - segmentName, segmentNumber, termVector.boundary);
2702   - boolean doBasic = termVector.subComponentFunction.dataCollector
2703   - .getStatsType().equals(CodecUtil.STATS_BASIC);
2704   - if (termVector.functions != null) {
2705   - for (SubComponentFunction function : termVector.functions) {
2706   - function.dataCollector.initNewList(initSize);
2707   - doBasic = doBasic ? (function.parserFunction.sumRule()
2708   - && !function.parserFunction.needPositions()
2709   - && function.dataCollector.getStatsType()
2710   - .equals(CodecUtil.STATS_BASIC))
2711   - : doBasic;
  2714 + if (!compiledAutomaton.type
  2715 + .equals(CompiledAutomaton.AUTOMATON_TYPE.NORMAL)) {
  2716 + if (compiledAutomaton.type
  2717 + .equals(CompiledAutomaton.AUTOMATON_TYPE.NONE)) {
  2718 + // do nothing
  2719 + } else {
  2720 + throw new IOException(
  2721 + "compiledAutomaton is " + compiledAutomaton.type);
2712 2722 }
2713   - }
2714   - // only if documents
2715   - if (docSet.size() > 0) {
2716   - int termDocId;
2717   - // loop over terms
2718   - while ((term = termsEnum.next()) != null) {
2719   - termDocId = -1;
2720   - if (doBasic) {
2721   - // compute numbers;
2722   - TermvectorNumberBasic numberBasic = computeTermvectorNumberBasic(
2723   - docSet, termDocId, termsEnum, r, lrc, postingsEnum);
2724   - // register
2725   - if (numberBasic.docNumber > 0) {
2726   - long valueLong = 0;
2727   - try {
2728   - valueLong = termVector.subComponentFunction.parserFunction
2729   - .getValueLong(numberBasic.valueSum, 1);
2730   - } catch (IOException e) {
2731   - termVector.subComponentFunction.dataCollector.error(
2732   - MtasToken.getPostfixFromValue(term), e.getMessage());
2733   - }
2734   - String key = MtasToken.getPostfixFromValue(term);
2735   - termVector.subComponentFunction.dataCollector.add(key,
2736   - valueLong, numberBasic.docNumber);
2737   - if (termVector.functions != null) {
2738   - for (SubComponentFunction function : termVector.functions) {
2739   - if (function.dataType
2740   - .equals(CodecUtil.DATA_TYPE_LONG)) {
2741   - long valueFunction = function.parserFunction
2742   - .getValueLong(numberBasic.valueSum, 0);
2743   - function.dataCollector.add(key, valueFunction,
2744   - numberBasic.docNumber);
2745   - } else if (function.dataType
2746   - .equals(CodecUtil.DATA_TYPE_DOUBLE)) {
2747   - double valueFunction = function.parserFunction
2748   - .getValueDouble(numberBasic.valueSum, 0);
2749   - function.dataCollector.add(key, valueFunction,
2750   - numberBasic.docNumber);
2751   - }
2752   - }
2753   - }
2754   -
2755   - }
2756   - } else {
2757   - TermvectorNumberFull numberFull = computeTermvectorNumberFull(
2758   - docSet, termDocId, termsEnum, r, lrc, postingsEnum,
2759   - positionsData);
2760   - if (numberFull.docNumber > 0) {
2761   - long[] valuesLong = new long[numberFull.docNumber];
2762   - String key = MtasToken.getPostfixFromValue(term);
2763   - for (int i = 0; i < numberFull.docNumber; i++) {
  2723 + } else {
  2724 + termsEnum = t.intersect(compiledAutomaton, null);
  2725 +
  2726 + int initSize = Math.min((int) t.size(), 1000);
  2727 + termVector.subComponentFunction.dataCollector.initNewList(
  2728 + initSize, segmentName, segmentNumber, termVector.boundary);
  2729 + boolean doBasic = termVector.subComponentFunction.dataCollector
  2730 + .getStatsType().equals(CodecUtil.STATS_BASIC);
  2731 + if (termVector.functions != null) {
  2732 + for (SubComponentFunction function : termVector.functions) {
  2733 + function.dataCollector.initNewList(initSize);
  2734 + doBasic = doBasic ? (function.parserFunction.sumRule()
  2735 + && !function.parserFunction.needPositions()
  2736 + && function.dataCollector.getStatsType()
  2737 + .equals(CodecUtil.STATS_BASIC))
  2738 + : doBasic;
  2739 + }
  2740 + }
  2741 + // only if documents
  2742 + if (docSet.size() > 0) {
  2743 + int termDocId;
  2744 + // loop over terms
  2745 + while ((term = termsEnum.next()) != null) {
  2746 + termDocId = -1;
  2747 + if (doBasic) {
  2748 + // compute numbers;
  2749 + TermvectorNumberBasic numberBasic = computeTermvectorNumberBasic(
  2750 + docSet, termDocId, termsEnum, r, lrc, postingsEnum);
  2751 + // register
  2752 + if (numberBasic.docNumber > 0) {
  2753 + long valueLong = 0;
2764 2754 try {
2765   - valuesLong[i] = termVector.subComponentFunction.parserFunction
2766   - .getValueLong(new long[] { numberFull.args[i] },
2767   - numberFull.positions[i]);
  2755 + valueLong = termVector.subComponentFunction.parserFunction
  2756 + .getValueLong(numberBasic.valueSum, 1);
2768 2757 } catch (IOException e) {
2769   - termVector.subComponentFunction.dataCollector.error(key,
  2758 + termVector.subComponentFunction.dataCollector.error(
  2759 + MtasToken.getPostfixFromValue(term),
2770 2760 e.getMessage());
2771 2761 }
  2762 + String key = MtasToken.getPostfixFromValue(term);
  2763 + termVector.subComponentFunction.dataCollector.add(key,
  2764 + valueLong, numberBasic.docNumber);
  2765 + if (termVector.functions != null) {
  2766 + for (SubComponentFunction function : termVector.functions) {
  2767 + if (function.dataType
  2768 + .equals(CodecUtil.DATA_TYPE_LONG)) {
  2769 + long valueFunction = function.parserFunction
  2770 + .getValueLong(numberBasic.valueSum, 0);
  2771 + function.dataCollector.add(key, valueFunction,
  2772 + numberBasic.docNumber);
  2773 + } else if (function.dataType
  2774 + .equals(CodecUtil.DATA_TYPE_DOUBLE)) {
  2775 + double valueFunction = function.parserFunction
  2776 + .getValueDouble(numberBasic.valueSum, 0);
  2777 + function.dataCollector.add(key, valueFunction,
  2778 + numberBasic.docNumber);
  2779 + }
  2780 + }
  2781 + }
  2782 +
2772 2783 }
2773   - termVector.subComponentFunction.dataCollector.add(key,
2774   - valuesLong, valuesLong.length);
2775   - if (termVector.functions != null) {
2776   - for (SubComponentFunction function : termVector.functions) {
2777   - if (function.dataType
2778   - .equals(CodecUtil.DATA_TYPE_LONG)) {
2779   - valuesLong = new long[numberFull.docNumber];
2780   - for (int i = 0; i < numberFull.docNumber; i++) {
2781   - try {
2782   - valuesLong[i] = function.parserFunction
2783   - .getValueLong(
2784   - new long[] { numberFull.args[i] },
2785   - numberFull.positions[i]);
2786   - } catch (IOException e) {
2787   - function.dataCollector.error(key, e.getMessage());
  2784 + } else {
  2785 + TermvectorNumberFull numberFull = computeTermvectorNumberFull(
  2786 + docSet, termDocId, termsEnum, r, lrc, postingsEnum,
  2787 + positionsData);
  2788 + if (numberFull.docNumber > 0) {
  2789 + long[] valuesLong = new long[numberFull.docNumber];
  2790 + String key = MtasToken.getPostfixFromValue(term);
  2791 + for (int i = 0; i < numberFull.docNumber; i++) {
  2792 + try {
  2793 + valuesLong[i] = termVector.subComponentFunction.parserFunction
  2794 + .getValueLong(new long[] { numberFull.args[i] },
  2795 + numberFull.positions[i]);
  2796 + } catch (IOException e) {
  2797 + termVector.subComponentFunction.dataCollector
  2798 + .error(key, e.getMessage());
  2799 + }
  2800 + }
  2801 + termVector.subComponentFunction.dataCollector.add(key,
  2802 + valuesLong, valuesLong.length);
  2803 + if (termVector.functions != null) {
  2804 + for (SubComponentFunction function : termVector.functions) {
  2805 + if (function.dataType
  2806 + .equals(CodecUtil.DATA_TYPE_LONG)) {
  2807 + valuesLong = new long[numberFull.docNumber];
  2808 + for (int i = 0; i < numberFull.docNumber; i++) {
  2809 + try {
  2810 + valuesLong[i] = function.parserFunction
  2811 + .getValueLong(
  2812 + new long[] { numberFull.args[i] },
  2813 + numberFull.positions[i]);
  2814 + } catch (IOException e) {
  2815 + function.dataCollector.error(key,
  2816 + e.getMessage());
  2817 + }
2788 2818 }
2789   - }
2790   - function.dataCollector.add(key, valuesLong,
2791   - valuesLong.length);
2792   - } else if (function.dataType
2793   - .equals(CodecUtil.DATA_TYPE_DOUBLE)) {
2794   - double[] valuesDouble = new double[numberFull.docNumber];
2795   - for (int i = 0; i < numberFull.docNumber; i++) {
2796   - try {
2797   - valuesDouble[i] = function.parserFunction
2798   - .getValueDouble(
2799   - new long[] { numberFull.args[i] },
2800   - numberFull.positions[i]);
2801   - } catch (IOException e) {
2802   - function.dataCollector.error(key, e.getMessage());
  2819 + function.dataCollector.add(key, valuesLong,
  2820 + valuesLong.length);
  2821 + } else if (function.dataType
  2822 + .equals(CodecUtil.DATA_TYPE_DOUBLE)) {
  2823 + double[] valuesDouble = new double[numberFull.docNumber];
  2824 + for (int i = 0; i < numberFull.docNumber; i++) {
  2825 + try {
  2826 + valuesDouble[i] = function.parserFunction
  2827 + .getValueDouble(
  2828 + new long[] { numberFull.args[i] },
  2829 + numberFull.positions[i]);
  2830 + } catch (IOException e) {
  2831 + function.dataCollector.error(key,
  2832 + e.getMessage());
  2833 + }
2803 2834 }
  2835 + function.dataCollector.add(key, valuesDouble,
  2836 + valuesDouble.length);
2804 2837 }
2805   - function.dataCollector.add(key, valuesDouble,
2806   - valuesDouble.length);
2807 2838 }
2808 2839 }
2809 2840 }
2810   - }
2811 2841  
  2842 + }
2812 2843 }
2813 2844 }
2814   - }
2815   - termVector.subComponentFunction.dataCollector.closeNewList();
2816   - if (termVector.functions != null) {
2817   - for (SubComponentFunction function : termVector.functions) {
2818   - function.dataCollector.closeNewList();
  2845 + termVector.subComponentFunction.dataCollector.closeNewList();
  2846 + if (termVector.functions != null) {
  2847 + for (SubComponentFunction function : termVector.functions) {
  2848 + function.dataCollector.closeNewList();
  2849 + }
2819 2850 }
2820 2851 }
2821 2852 }
... ...
src/mtas/codec/util/CodecComponent.java
... ... @@ -1333,6 +1333,17 @@ public class CodecComponent {
1333 1333 }
1334 1334 return true;
1335 1335 }
  1336 +
  1337 + public boolean functionBasic() {
  1338 + if (functions != null) {
  1339 + for (SubComponentFunction function : functions) {
  1340 + if (!function.statsType.equals(CodecUtil.STATS_BASIC)) {
  1341 + return false;
  1342 + }
  1343 + }
  1344 + }
  1345 + return true;
  1346 + }
1336 1347  
1337 1348 /**
1338 1349 * Function need positions.
... ...
src/mtas/search/spans/MtasSpanNotQuery.java
... ... @@ -2,7 +2,9 @@ package mtas.search.spans;
2 2  
3 3 import java.io.IOException;
4 4  
  5 +import org.apache.lucene.index.IndexReader;
5 6 import org.apache.lucene.search.IndexSearcher;
  7 +import org.apache.lucene.search.spans.SpanContainingQuery;
6 8 import org.apache.lucene.search.spans.SpanNotQuery;
7 9 import org.apache.lucene.search.spans.SpanQuery;
8 10 import org.apache.lucene.search.spans.SpanWeight;
... ... @@ -30,6 +32,12 @@ public class MtasSpanNotQuery extends MtasSpanQuery {
30 32 }
31 33  
32 34 @Override
  35 + public MtasSpanQuery rewrite(IndexReader reader) throws IOException {
  36 + baseQuery = (SpanNotQuery) baseQuery.rewrite(reader);
  37 + return this;
  38 + }
  39 +
  40 + @Override
33 41 public String toString(String field) {
34 42 return baseQuery.toString(field);
35 43 }
... ...
src/mtas/solr/handler/component/MtasSolrSearchComponent.java
... ... @@ -244,6 +244,7 @@ public class MtasSolrSearchComponent extends SearchComponent {
244 244 docListList, docSetList, mtasFields.list.get(field));
245 245 } catch (IllegalAccessException | IllegalArgumentException
246 246 | InvocationTargetException e) {
  247 + e.printStackTrace();
247 248 throw new IOException(e.getMessage());
248 249 }
249 250 }
... ...
src/mtas/solr/handler/component/util/MtasSolrComponentTermvector.java
... ... @@ -1017,7 +1017,7 @@ public class MtasSolrComponentTermvector {
1017 1017 }
1018 1018 paramsNewRequest.add(PARAM_MTAS_TERMVECTOR + "."
1019 1019 + termvectorCounter + "." + NAME_MTAS_TERMVECTOR_FULL,
1020   - "true");
  1020 + tv.full?"true":"false");
1021 1021 paramsNewRequest.add(PARAM_MTAS_TERMVECTOR + "."
1022 1022 + termvectorCounter + "." + NAME_MTAS_TERMVECTOR_LIST,
1023 1023 listValue);
... ...