Commit c9d66534fcdb0b1a4474bc798ed755b4d47b2359

Authored by Matthijs Brouwer
1 parent 44ad8971

query optimalization

docker/Dockerfile
1 1 # Automatically generated Dockerfile
2   -# - Build 2017-03-09 09:11
  2 +# - Build 2017-03-14 08:49
3 3 # - Lucene/Solr version 6.4.2
4 4 # - Mtas release 20170309
5 5 #
... ... @@ -55,7 +55,7 @@ RUN apt-get update && apt-get install -y lsof software-properties-common python-
55 55 && chmod -R 755 /var/www/html \
56 56 && printf "echo\n" >> /start.sh \
57 57 && printf "echo \"================ Mtas -- Multi Tier Annotation Search =================\"\n" >> /start.sh \
58   -&& printf "echo \" Timestamp 2017-03-09 09:11\"\n" >> /start.sh \
  58 +&& printf "echo \" Timestamp 2017-03-14 08:49\"\n" >> /start.sh \
59 59 && printf "echo \" Lucene/Solr version 6.4.2\"\n" >> /start.sh \
60 60 && printf "echo \" Mtas release 20170309\"\n" >> /start.sh \
61 61 && printf "echo \" See https://meertensinstituut.github.io/mtas/ for more information\"\n" >> /start.sh \
... ...
junit/mtas/parser/MtasCQLParserTestSentence.java
... ... @@ -91,7 +91,7 @@ public class MtasCQLParserTestSentence {
91 91 items.add(new MtasSpanSequenceItem(q3, false));
92 92 MtasSpanQuery q4 = new MtasSpanSequenceQuery(items, null, null);
93 93 MtasSpanQuery q = new MtasSpanOrQuery(q1,q4);
94   - testCQLParse(field, null, cql, q);
  94 + testCQLParse(field, null, cql, q);
95 95 }
96 96  
97 97 @org.junit.Test
... ...
src/mtas/search/spans/MtasSpanAndQuery.java
... ... @@ -2,6 +2,7 @@ package mtas.search.spans;
2 2  
3 3 import java.io.IOException;
4 4 import java.util.ArrayList;
  5 +import java.util.HashSet;
5 6 import java.util.List;
6 7  
7 8 import org.apache.lucene.index.IndexReader;
... ... @@ -19,7 +20,7 @@ public class MtasSpanAndQuery extends MtasSpanQuery {
19 20  
20 21 /** The base query. */
21 22 private SpanNearQuery baseQuery;
22   - private List<MtasSpanQuery> clauses;
  23 + private HashSet<MtasSpanQuery> clauses;
23 24  
24 25 /**
25 26 * Instantiates a new mtas span and query.
... ... @@ -31,7 +32,7 @@ public class MtasSpanAndQuery extends MtasSpanQuery {
31 32 public MtasSpanAndQuery(MtasSpanQuery... initialClauses) {
32 33 super(null, null);
33 34 Integer minimum = null, maximum = null;
34   - clauses = new ArrayList<MtasSpanQuery>();
  35 + clauses = new HashSet<MtasSpanQuery>();
35 36 for (MtasSpanQuery item : initialClauses) {
36 37 if (!clauses.contains(item)) {
37 38 clauses.add(item);
... ... @@ -80,12 +81,13 @@ public class MtasSpanAndQuery extends MtasSpanQuery {
80 81 if (clauses.size() > 1) {
81 82 // rewrite, count MtasSpanMatchAllQuery and check for MtasSpanMatchNoneQuery
82 83 MtasSpanQuery[] newClauses = new MtasSpanQuery[clauses.size()];
  84 + MtasSpanQuery[] oldClauses = clauses.toArray(new MtasSpanQuery[clauses.size()]);
83 85 int singlePositionQueries = 0;
84 86 int matchAllSinglePositionQueries = 0;
85 87 boolean actuallyRewritten = false;
86   - for (int i = 0; i < clauses.size(); i++) {
87   - newClauses[i] = clauses.get(i).rewrite(reader);
88   - actuallyRewritten |= clauses.get(i) != newClauses[i];
  88 + for (int i = 0; i < oldClauses.length; i++) {
  89 + newClauses[i] = oldClauses[i].rewrite(reader);
  90 + actuallyRewritten |= oldClauses[i] != newClauses[i];
89 91 if (newClauses[i] instanceof MtasSpanMatchNoneQuery) {
90 92 return (new MtasSpanMatchNoneQuery(this.getField())).rewrite(reader);
91 93 } else {
... ... @@ -129,7 +131,7 @@ public class MtasSpanAndQuery extends MtasSpanQuery {
129 131 return super.rewrite(reader);
130 132 }
131 133 } else if (clauses.size() == 1) {
132   - return clauses.get(0).rewrite(reader);
  134 + return clauses.iterator().next().rewrite(reader);
133 135 } else {
134 136 return (new MtasSpanMatchNoneQuery(this.getField())).rewrite(reader);
135 137 }
... ... @@ -170,7 +172,9 @@ public class MtasSpanAndQuery extends MtasSpanQuery {
170 172 */
171 173 @Override
172 174 public int hashCode() {
173   - return baseQuery.hashCode();
  175 + int h = this.getClass().getSimpleName().hashCode();
  176 + h = (h * 7) ^ clauses.hashCode();
  177 + return h;
174 178 }
175 179  
176 180 }
... ...
src/mtas/search/spans/MtasSpanMatchNoneQuery.java
... ... @@ -180,7 +180,11 @@ public class MtasSpanMatchNoneQuery extends MtasSpanQuery {
180 180 if (getClass() != obj.getClass())
181 181 return false;
182 182 final MtasSpanMatchNoneQuery that = (MtasSpanMatchNoneQuery) obj;
183   - return field.equals(that.field);
  183 + if(field==null) {
  184 + return that.field==null;
  185 + } else {
  186 + return field.equals(that.field);
  187 + }
184 188 }
185 189  
186 190 /*
... ... @@ -191,7 +195,9 @@ public class MtasSpanMatchNoneQuery extends MtasSpanQuery {
191 195 @Override
192 196 public int hashCode() {
193 197 int h = this.getClass().getSimpleName().hashCode();
194   - h = (h * 7) ^ field.hashCode();
  198 + if(field!=null) {
  199 + h = (h * 7) ^ field.hashCode();
  200 + }
195 201 return h;
196 202 }
197 203  
... ...
src/mtas/search/spans/MtasSpanOrQuery.java
... ... @@ -2,6 +2,7 @@ package mtas.search.spans;
2 2  
3 3 import java.io.IOException;
4 4 import java.util.ArrayList;
  5 +import java.util.HashSet;
5 6 import java.util.Iterator;
6 7 import java.util.List;
7 8  
... ... @@ -19,7 +20,7 @@ import mtas.search.spans.util.MtasSpanQuery;
19 20 public class MtasSpanOrQuery extends MtasSpanQuery {
20 21  
21 22 /** The clauses. */
22   - private List<MtasSpanQuery> clauses;
  23 + private HashSet<MtasSpanQuery> clauses;
23 24  
24 25 private SpanQuery baseQuery;
25 26  
... ... @@ -32,7 +33,7 @@ public class MtasSpanOrQuery extends MtasSpanQuery {
32 33 public MtasSpanOrQuery(MtasSpanQuery... initialClauses) {
33 34 super(null, null);
34 35 Integer minimum = null, maximum = null;
35   - clauses = new ArrayList<MtasSpanQuery>();
  36 + clauses = new HashSet<MtasSpanQuery>();
36 37 for (MtasSpanQuery item : initialClauses) {
37 38 if (!clauses.contains(item)) {
38 39 minimum = clauses.isEmpty() ? item.getMinimumWidth()
... ... @@ -66,13 +67,15 @@ public class MtasSpanOrQuery extends MtasSpanQuery {
66 67 // rewrite, count MtasSpanMatchAllQuery and check for
67 68 // MtasSpanMatchNoneQuery
68 69 MtasSpanQuery[] newClauses = new MtasSpanQuery[clauses.size()];
  70 + MtasSpanQuery[] oldClauses = clauses
  71 + .toArray(new MtasSpanQuery[clauses.size()]);
69 72 int singlePositionQueries = 0;
70 73 int matchAllSinglePositionQueries = 0;
71 74 int matchNoneQueries = 0;
72 75 boolean actuallyRewritten = false;
73   - for (int i = 0; i < clauses.size(); i++) {
74   - newClauses[i] = clauses.get(i).rewrite(reader);
75   - actuallyRewritten |= clauses.get(i) != newClauses[i];
  76 + for (int i = 0; i < oldClauses.length; i++) {
  77 + newClauses[i] = oldClauses[i].rewrite(reader);
  78 + actuallyRewritten |= oldClauses[i] != newClauses[i];
76 79 if (newClauses[i] instanceof MtasSpanMatchNoneQuery) {
77 80 matchNoneQueries++;
78 81 } else if (newClauses[i].isSinglePositionQuery()) {
... ... @@ -118,10 +121,10 @@ public class MtasSpanOrQuery extends MtasSpanQuery {
118 121 return super.rewrite(reader);
119 122 }
120 123 } else if (clauses.size() == 1) {
121   - return clauses.get(0).rewrite(reader);
  124 + return clauses.iterator().next().rewrite(reader);
122 125 } else {
123 126 return (new MtasSpanMatchNoneQuery(this.getField())).rewrite(reader);
124   - }
  127 + }
125 128 }
126 129  
127 130 /*
... ... @@ -171,7 +174,7 @@ public class MtasSpanOrQuery extends MtasSpanQuery {
171 174 @Override
172 175 public int hashCode() {
173 176 int h = this.getClass().getSimpleName().hashCode();
174   - h = (h * 7) ^ baseQuery.hashCode();
  177 + h = (h * 7) ^ clauses.hashCode();
175 178 return h;
176 179 }
177 180  
... ...
src/mtas/search/spans/MtasSpanPositionSpans.java
... ... @@ -98,7 +98,7 @@ public class MtasSpanPositionSpans extends Spans implements MtasSpans {
98 98 */
99 99 @Override
100 100 public int width() {
101   - return 0;
  101 + return 1;
102 102 }
103 103  
104 104 /*
... ...
src/mtas/search/spans/MtasSpanRecurrenceQuery.java
... ... @@ -22,7 +22,7 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery
22 22 implements Cloneable {
23 23  
24 24 /** The clause. */
25   - private MtasSpanQuery clause;
  25 + private MtasSpanQuery query;
26 26  
27 27 /** The minimum recurrence. */
28 28 private int minimumRecurrence;
... ... @@ -31,7 +31,7 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery
31 31 private int maximumRecurrence;
32 32  
33 33 /** The ignore clause. */
34   - private MtasSpanQuery ignoreClause;
  34 + private MtasSpanQuery ignoreQuery;
35 35  
36 36 /** The maximum ignore length. */
37 37 private Integer maximumIgnoreLength;
... ... @@ -42,56 +42,86 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery
42 42 /**
43 43 * Instantiates a new mtas span recurrence query.
44 44 *
45   - * @param clause
  45 + * @param query
46 46 * the clause
47 47 * @param minimumRecurrence
48 48 * the minimum recurrence
49 49 * @param maximumRecurrence
50 50 * the maximum recurrence
51   - * @param ignore
  51 + * @param ignoreQuery
52 52 * the ignore
53 53 * @param maximumIgnoreLength
54 54 * the maximum ignore length
55 55 */
56   - public MtasSpanRecurrenceQuery(MtasSpanQuery clause, int minimumRecurrence,
57   - int maximumRecurrence, MtasSpanQuery ignore,
  56 + public MtasSpanRecurrenceQuery(MtasSpanQuery query, int minimumRecurrence,
  57 + int maximumRecurrence, MtasSpanQuery ignoreQuery,
58 58 Integer maximumIgnoreLength) {
59 59 super(null, null);
  60 + field = query.getField();
  61 + this.query = query;
  62 + if (field != null && ignoreQuery != null) {
  63 + if (ignoreQuery.getField() == null
  64 + || field.equals(ignoreQuery.getField())) {
  65 + this.ignoreQuery = ignoreQuery;
  66 + this.maximumIgnoreLength = maximumIgnoreLength==null?1:maximumIgnoreLength;
  67 + } else {
  68 + throw new IllegalArgumentException(
  69 + "ignore must have same field as clauses");
  70 + }
  71 + } else {
  72 + this.ignoreQuery = null;
  73 + this.maximumIgnoreLength = null;
  74 + }
  75 + setRecurrence(minimumRecurrence, maximumRecurrence);
  76 + }
  77 +
  78 + /**
  79 + * Gets the clause.
  80 + *
  81 + * @return the clause
  82 + */
  83 + public MtasSpanQuery getQuery() {
  84 + return query;
  85 + }
  86 +
  87 + public MtasSpanQuery getIgnoreQuery() {
  88 + return ignoreQuery;
  89 + }
  90 +
  91 + public Integer getMaximumIgnoreLength() {
  92 + return maximumIgnoreLength;
  93 + }
  94 +
  95 + public int getMinimumRecurrence() {
  96 + return minimumRecurrence;
  97 + }
  98 +
  99 + public int getMaximumRecurrence() {
  100 + return maximumRecurrence;
  101 + }
  102 +
  103 + public void setRecurrence(int minimumRecurrence, int maximumRecurrence) {
60 104 if (minimumRecurrence > maximumRecurrence) {
61 105 throw new IllegalArgumentException(
62 106 "minimumRecurrence > maximumRecurrence");
63 107 } else if (minimumRecurrence < 1) {
64 108 throw new IllegalArgumentException("minimumRecurrence < 1 not supported");
65   - } else if (clause == null) {
  109 + } else if (query == null) {
66 110 throw new IllegalArgumentException("no clause");
67 111 }
68 112 this.minimumRecurrence = minimumRecurrence;
69 113 this.maximumRecurrence = maximumRecurrence;
70   - field = clause.getField();
71   - this.clause = clause;
72   - if (field != null && ignore != null) {
73   - if (ignore.getField() == null || field.equals(ignore.getField())) {
74   - this.ignoreClause = ignore;
75   - this.maximumIgnoreLength = maximumIgnoreLength;
76   - } else {
77   - throw new IllegalArgumentException(
78   - "ignore must have same field as clauses");
79   - }
80   - } else {
81   - this.ignoreClause = null;
82   - this.maximumIgnoreLength = null;
83   - }
84 114 // set minimum/maximum
85 115 Integer minimum = null, maximum = null;
86   - if (clause.getMinimumWidth() != null) {
87   - minimum = minimumRecurrence * clause.getMinimumWidth();
  116 + if (query.getMinimumWidth() != null) {
  117 + minimum = minimumRecurrence * query.getMinimumWidth();
88 118 }
89   - if (clause.getMaximumWidth() != null) {
90   - maximum = maximumRecurrence * clause.getMaximumWidth();
91   - if (ignore != null && maximumIgnoreLength != null) {
92   - if (ignore.getMaximumWidth() != null) {
  119 + if (query.getMaximumWidth() != null) {
  120 + maximum = maximumRecurrence * query.getMaximumWidth();
  121 + if (ignoreQuery != null && maximumIgnoreLength != null) {
  122 + if (ignoreQuery.getMaximumWidth() != null) {
93 123 maximum += (maximumRecurrence - 1) * maximumIgnoreLength
94   - * ignore.getMaximumWidth();
  124 + * ignoreQuery.getMaximumWidth();
95 125 } else {
96 126 maximum = null;
97 127 }
... ... @@ -100,15 +130,6 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery
100 130 setWidth(minimum, maximum);
101 131 }
102 132  
103   - /**
104   - * Gets the clause.
105   - *
106   - * @return the clause
107   - */
108   - public MtasSpanQuery getClause() {
109   - return clause;
110   - }
111   -
112 133 /*
113 134 * (non-Javadoc)
114 135 *
... ... @@ -127,18 +148,23 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery
127 148 */
128 149 @Override
129 150 public MtasSpanQuery rewrite(IndexReader reader) throws IOException {
130   - MtasSpanQuery newClause = clause.rewrite(reader);
131   - MtasSpanQuery newIgnoreClause = (ignoreClause != null)
132   - ? ignoreClause.rewrite(reader) : null;
133   - if(newClause instanceof MtasSpanRecurrenceQuery) {
134   - //for now too difficult, possibly merge later
135   - }
136   - if (newClause != clause
137   - || (newIgnoreClause != null && newIgnoreClause != ignoreClause)) {
138   - return new MtasSpanRecurrenceQuery(newClause, minimumRecurrence,
139   - maximumRecurrence, newIgnoreClause, maximumIgnoreLength).rewrite(reader);
  151 + MtasSpanQuery newQuery = query.rewrite(reader);
  152 + if (maximumRecurrence == 1) {
  153 + return newQuery;
140 154 } else {
141   - return super.rewrite(reader);
  155 + MtasSpanQuery newIgnoreQuery = (ignoreQuery != null)
  156 + ? ignoreQuery.rewrite(reader) : null;
  157 + if (newQuery instanceof MtasSpanRecurrenceQuery) {
  158 + // TODO: for now too difficult, possibly merge later
  159 + }
  160 + if (newQuery != query
  161 + || (newIgnoreQuery != null && newIgnoreQuery != ignoreQuery)) {
  162 + return new MtasSpanRecurrenceQuery(newQuery, minimumRecurrence,
  163 + maximumRecurrence, newIgnoreQuery, maximumIgnoreLength)
  164 + .rewrite(reader);
  165 + } else {
  166 + return super.rewrite(reader);
  167 + }
142 168 }
143 169 }
144 170  
... ... @@ -151,10 +177,10 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery
151 177 public String toString(String field) {
152 178 StringBuilder buffer = new StringBuilder();
153 179 buffer.append(this.getClass().getSimpleName() + "([");
154   - buffer.append(clause.toString(clause.getField()));
  180 + buffer.append(query.toString(query.getField()));
155 181 buffer.append("," + minimumRecurrence + "," + maximumRecurrence);
156 182 buffer.append(", ");
157   - buffer.append(ignoreClause);
  183 + buffer.append(ignoreQuery);
158 184 buffer.append("])");
159 185 return buffer.toString();
160 186 }
... ... @@ -173,12 +199,12 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery
173 199 if (getClass() != obj.getClass())
174 200 return false;
175 201 final MtasSpanRecurrenceQuery other = (MtasSpanRecurrenceQuery) obj;
176   - return clause.equals(other.clause)
  202 + return query.equals(other.query)
177 203 && minimumRecurrence == other.minimumRecurrence
178 204 && maximumRecurrence == other.maximumRecurrence
179   - && ((ignoreClause == null && other.ignoreClause == null)
180   - || ignoreClause != null && other.ignoreClause != null
181   - && ignoreClause.equals(other.ignoreClause));
  205 + && ((ignoreQuery == null && other.ignoreQuery == null)
  206 + || ignoreQuery != null && other.ignoreQuery != null
  207 + && ignoreQuery.equals(other.ignoreQuery));
182 208 }
183 209  
184 210 /*
... ... @@ -189,7 +215,7 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery
189 215 @Override
190 216 public int hashCode() {
191 217 int h = this.getClass().getSimpleName().hashCode();
192   - h = (h * 7) ^ clause.hashCode();
  218 + h = (h * 7) ^ query.hashCode();
193 219 h = (h * 11) ^ minimumRecurrence;
194 220 h = (h * 13) ^ maximumRecurrence;
195 221 return h;
... ... @@ -205,10 +231,10 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery
205 231 @Override
206 232 public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores)
207 233 throws IOException {
208   - SpanWeight subWeight = clause.createWeight(searcher, false);
  234 + SpanWeight subWeight = query.createWeight(searcher, false);
209 235 SpanWeight ignoreWeight = null;
210   - if (ignoreClause != null) {
211   - ignoreWeight = ignoreClause.createWeight(searcher, false);
  236 + if (ignoreQuery != null) {
  237 + ignoreWeight = ignoreQuery.createWeight(searcher, false);
212 238 }
213 239 return new SpanRecurrenceWeight(subWeight, ignoreWeight,
214 240 maximumIgnoreLength, searcher,
... ...
src/mtas/search/spans/MtasSpanSequenceItem.java
... ... @@ -77,26 +77,180 @@ public class MtasSpanSequenceItem {
77 77 @Override
78 78 public boolean equals(Object o) {
79 79 if (o instanceof MtasSpanSequenceItem) {
80   - final MtasSpanSequenceItem that = (MtasSpanSequenceItem) o;
  80 + MtasSpanSequenceItem that = (MtasSpanSequenceItem) o;
81 81 return spanQuery.equals(that.getQuery())
82 82 && (optional == that.isOptional());
83 83 } else {
84   - return false;
  84 + return false;
85 85 }
86 86 }
87 87  
  88 + @Override
  89 + public int hashCode() {
  90 + int h = this.getClass().getSimpleName().hashCode();
  91 + h = (h * 3) ^ spanQuery.hashCode();
  92 + h += (optional ? 1 : 0);
  93 + return h;
  94 + }
  95 +
88 96 public MtasSpanSequenceItem rewrite(IndexReader reader) throws IOException {
89 97 MtasSpanQuery newSpanQuery = spanQuery.rewrite(reader);
90   - if(newSpanQuery!=spanQuery) {
  98 + if (newSpanQuery != spanQuery) {
91 99 return new MtasSpanSequenceItem(newSpanQuery, optional);
92 100 } else {
93 101 return this;
94 102 }
95 103 }
96   -
  104 +
97 105 @Override
98 106 public String toString() {
99   - return "["+spanQuery.toString()+" - "+(optional?"OPTIONAL":"NOT OPTIONAL")+"]";
  107 + return "[" + spanQuery.toString() + " - "
  108 + + (optional ? "OPTIONAL" : "NOT OPTIONAL") + "]";
  109 + }
  110 +
  111 + public static MtasSpanSequenceItem merge(MtasSpanSequenceItem item1,
  112 + MtasSpanSequenceItem item2, MtasSpanQuery ignoreQuery,
  113 + Integer maximumIgnoreLength) {
  114 + if (item1 == null || item2 == null) {
  115 + return null;
  116 + } else {
  117 + MtasSpanQuery q1 = item1.getQuery();
  118 + MtasSpanQuery q2 = item2.getQuery();
  119 + boolean optional = item1.optional && item2.optional;
  120 + // first spanRecurrenceQuery
  121 + if (q1 instanceof MtasSpanRecurrenceQuery) {
  122 + MtasSpanRecurrenceQuery rq1 = (MtasSpanRecurrenceQuery) q1;
  123 + // both spanRecurrenceQuery
  124 + if (q2 instanceof MtasSpanRecurrenceQuery) {
  125 + MtasSpanRecurrenceQuery rq2 = (MtasSpanRecurrenceQuery) q2;
  126 + // equal query
  127 + if (rq1.getQuery().equals(rq2.getQuery())) {
  128 + // equal ignoreQuery settings
  129 + if ((ignoreQuery == null && rq1.getIgnoreQuery() == null
  130 + && rq2.getIgnoreQuery() == null)
  131 + || (ignoreQuery != null && rq1.getIgnoreQuery() != null
  132 + && ignoreQuery.equals(rq1.getIgnoreQuery())
  133 + && maximumIgnoreLength == rq1.getMaximumIgnoreLength()
  134 + && rq2.getIgnoreQuery() != null
  135 + && ignoreQuery.equals(rq2.getIgnoreQuery())
  136 + && maximumIgnoreLength == rq2.getMaximumIgnoreLength())) {
  137 + // at least one optional
  138 + if (item1.optional || item2.optional) {
  139 + int minimum = Math.min(rq1.getMinimumRecurrence(),
  140 + rq2.getMinimumRecurrence());
  141 + int maximum = rq1.getMaximumRecurrence()
  142 + + rq2.getMaximumRecurrence();
  143 + // only if ranges match
  144 + if ((rq1.getMaximumRecurrence() + 1) >= rq2
  145 + .getMinimumRecurrence()
  146 + && (rq2.getMaximumRecurrence() + 1) >= rq1
  147 + .getMinimumRecurrence()) {
  148 + return new MtasSpanSequenceItem(
  149 + new MtasSpanRecurrenceQuery(rq1.getQuery(), minimum,
  150 + maximum, ignoreQuery, maximumIgnoreLength),
  151 + optional);
  152 + }
  153 + // not optional
  154 + } else {
  155 + int minimum = rq1.getMinimumRecurrence()
  156 + + rq2.getMinimumRecurrence();
  157 + int maximum = rq1.getMaximumRecurrence()
  158 + + rq2.getMaximumRecurrence();
  159 + // only if ranges match
  160 + if ((rq1.getMaximumRecurrence() + 1) >= rq2
  161 + .getMinimumRecurrence()
  162 + && (rq2.getMaximumRecurrence() + 1) >= rq1
  163 + .getMinimumRecurrence()) {
  164 + return new MtasSpanSequenceItem(
  165 + new MtasSpanRecurrenceQuery(rq1.getQuery(), minimum,
  166 + maximum, ignoreQuery, maximumIgnoreLength),
  167 + optional);
  168 + }
  169 + }
  170 + }
  171 + }
  172 + } else {
  173 + if (rq1.getQuery().equals(q2)) {
  174 + if ((ignoreQuery == null && rq1.getIgnoreQuery() == null)
  175 + || (ignoreQuery != null && rq1.getIgnoreQuery() != null
  176 + && ignoreQuery.equals(rq1.getIgnoreQuery())
  177 + && rq1.getMaximumIgnoreLength() != null
  178 + && maximumIgnoreLength
  179 + .equals(rq1.getMaximumIgnoreLength()))) {
  180 + if (!optional) {
  181 + if (item1.optional) {
  182 + if (rq1.getMinimumRecurrence() == 1) {
  183 + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(
  184 + q2, 1, rq1.getMaximumRecurrence() + 1, ignoreQuery,
  185 + maximumIgnoreLength), false);
  186 + }
  187 + } else if (item2.optional) {
  188 + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(
  189 + q2, rq1.getMinimumRecurrence(),
  190 + rq1.getMaximumRecurrence() + 1, ignoreQuery,
  191 + maximumIgnoreLength), false);
  192 + } else {
  193 + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(
  194 + q2, rq1.getMinimumRecurrence() + 1,
  195 + rq1.getMaximumRecurrence() + 1, ignoreQuery,
  196 + maximumIgnoreLength), false);
  197 + }
  198 + } else {
  199 + if (rq1.getMinimumRecurrence() == 1) {
  200 + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(
  201 + q2, 1, rq1.getMaximumRecurrence() + 1, ignoreQuery,
  202 + maximumIgnoreLength), true);
  203 + }
  204 + }
  205 + }
  206 + }
  207 + }
  208 + // second spanRecurrenceQuery
  209 + } else if (q2 instanceof MtasSpanRecurrenceQuery) {
  210 + MtasSpanRecurrenceQuery rq2 = (MtasSpanRecurrenceQuery) q2;
  211 + if (rq2.getQuery().equals(q1)) {
  212 + if ((ignoreQuery == null && rq2.getIgnoreQuery() == null)
  213 + || (ignoreQuery != null && rq2.getIgnoreQuery()!=null && ignoreQuery.equals(rq2.getIgnoreQuery())
  214 + && maximumIgnoreLength == rq2.getMaximumIgnoreLength())) {
  215 + if (!optional) {
  216 + if (item1.optional) {
  217 + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q1,
  218 + rq2.getMinimumRecurrence(), rq2.getMaximumRecurrence() + 1,
  219 + ignoreQuery, maximumIgnoreLength), false);
  220 + } else if (item2.optional) {
  221 + if (rq2.getMinimumRecurrence() == 1) {
  222 + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(
  223 + q1, 1, rq2.getMaximumRecurrence() + 1, ignoreQuery,
  224 + maximumIgnoreLength), false);
  225 + }
  226 + } else {
  227 + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q1,
  228 + rq2.getMinimumRecurrence() + 1,
  229 + rq2.getMaximumRecurrence() + 1, ignoreQuery,
  230 + maximumIgnoreLength), false);
  231 + }
  232 + } else {
  233 + if (rq2.getMinimumRecurrence() == 1) {
  234 + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q1,
  235 + 1, rq2.getMaximumRecurrence() + 1, ignoreQuery,
  236 + maximumIgnoreLength), true);
  237 + }
  238 + }
  239 + }
  240 + }
  241 + // both no spanRecurrenceQuery
  242 + } else if (q1.equals(q2)) {
  243 + // at least one optional
  244 + if (item1.optional || item2.optional) {
  245 + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q1, 1, 2,
  246 + ignoreQuery, maximumIgnoreLength), optional);
  247 + } else {
  248 + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q1, 2, 2,
  249 + ignoreQuery, maximumIgnoreLength), optional);
  250 + }
  251 + }
  252 + return null;
  253 + }
100 254 }
101 255  
102 256 }
... ...
src/mtas/search/spans/MtasSpanSequenceQuery.java
... ... @@ -2,6 +2,7 @@ package mtas.search.spans;
2 2  
3 3 import java.io.IOException;
4 4 import java.util.ArrayList;
  5 +import java.util.Arrays;
5 6 import java.util.Iterator;
6 7 import java.util.List;
7 8 import java.util.Map;
... ... @@ -27,7 +28,7 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery {
27 28 private List<MtasSpanSequenceItem> items;
28 29  
29 30 /** The ignore clause. */
30   - private MtasSpanQuery ignoreClause;
  31 + private MtasSpanQuery ignoreQuery;
31 32  
32 33 /** The maximum ignore length. */
33 34 private Integer maximumIgnoreLength;
... ... @@ -40,13 +41,13 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery {
40 41 *
41 42 * @param items
42 43 * the items
43   - * @param ignore
  44 + * @param ignoreQuery
44 45 * the ignore
45 46 * @param maximumIgnoreLength
46 47 * the maximum ignore length
47 48 */
48 49 public MtasSpanSequenceQuery(List<MtasSpanSequenceItem> items,
49   - MtasSpanQuery ignore, Integer maximumIgnoreLength) {
  50 + MtasSpanQuery ignoreQuery, Integer maximumIgnoreLength) {
50 51 super(null, null);
51 52 Integer minimum = 0, maximum = 0;
52 53 this.items = items;
... ... @@ -68,24 +69,26 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery {
68 69 }
69 70 }
70 71 // check ignore
71   - if (field != null && ignore != null) {
72   - if (ignore.getField() == null || field.equals(ignore.getField())) {
73   - this.ignoreClause = ignore;
74   - this.maximumIgnoreLength = maximumIgnoreLength;
  72 + if (field != null && ignoreQuery != null) {
  73 + if (ignoreQuery.getField() == null
  74 + || field.equals(ignoreQuery.getField())) {
  75 + this.ignoreQuery = ignoreQuery;
  76 + this.maximumIgnoreLength = maximumIgnoreLength == null ? 1
  77 + : maximumIgnoreLength;
75 78 } else {
76 79 throw new IllegalArgumentException(
77 80 "ignore must have same field as clauses");
78 81 }
79 82 if (maximum != null && items.size() > 1) {
80   - if (ignore.getMaximumWidth() != null) {
81   - maximum += (items.size() - 1) * maximumIgnoreLength
82   - * ignoreClause.getMaximumWidth();
  83 + if (this.ignoreQuery.getMaximumWidth() != null) {
  84 + maximum += (items.size() - 1) * this.maximumIgnoreLength
  85 + * this.ignoreQuery.getMaximumWidth();
83 86 } else {
84 87 maximum = null;
85 88 }
86 89 }
87 90 } else {
88   - this.ignoreClause = null;
  91 + this.ignoreQuery = null;
89 92 this.maximumIgnoreLength = null;
90 93 }
91 94 setWidth(minimum, maximum);
... ... @@ -101,6 +104,18 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery {
101 104 return field;
102 105 }
103 106  
  107 + public List<MtasSpanSequenceItem> getItems() {
  108 + return items;
  109 + }
  110 +
  111 + public MtasSpanQuery getIgnoreQuery() {
  112 + return ignoreQuery;
  113 + }
  114 +
  115 + public Integer getMaximumIgnoreLength() {
  116 + return maximumIgnoreLength;
  117 + }
  118 +
104 119 /*
105 120 * (non-Javadoc)
106 121 *
... ... @@ -112,24 +127,43 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery {
112 127 if (items.size() == 1) {
113 128 return items.get(0).getQuery().rewrite(reader);
114 129 } else {
115   - MtasSpanSequenceItem newItem;
  130 + MtasSpanSequenceItem newItem, previousNewItem = null;
116 131 ArrayList<MtasSpanSequenceItem> newItems = new ArrayList<MtasSpanSequenceItem>(
117 132 items.size());
118   - MtasSpanQuery newIgnoreClause = ignoreClause != null
119   - ? ignoreClause.rewrite(reader) : null;
120   - boolean actuallyRewritten = ignoreClause != null
121   - ? newIgnoreClause != ignoreClause : false;
  133 + MtasSpanQuery newIgnoreClause = ignoreQuery != null
  134 + ? ignoreQuery.rewrite(reader) : null;
  135 + boolean actuallyRewritten = ignoreQuery != null
  136 + ? newIgnoreClause != ignoreQuery : false;
122 137 for (int i = 0; i < items.size(); i++) {
123 138 newItem = items.get(i).rewrite(reader);
124   - actuallyRewritten |= items.get(i) != newItem;
125   - // for now too difficult, possibly later merge with previous if possible
126   - newItems.add(newItem);
  139 + if (newItem.getQuery() instanceof MtasSpanMatchNoneQuery) {
  140 + if (!newItem.isOptional()) {
  141 + return new MtasSpanMatchNoneQuery(field);
  142 + } else {
  143 + actuallyRewritten = true;
  144 + }
  145 + } else {
  146 + actuallyRewritten |= items.get(i) != newItem;
  147 + MtasSpanSequenceItem previousMergedItem = MtasSpanSequenceItem.merge(
  148 + previousNewItem, newItem, ignoreQuery, maximumIgnoreLength);
  149 + if (previousMergedItem != null) {
  150 + newItems.set((newItems.size() - 1), previousMergedItem);
  151 + actuallyRewritten = true;
  152 + } else {
  153 + newItems.add(newItem);
  154 + }
  155 + previousNewItem = newItem;
  156 + }
127 157 }
128 158 if (!actuallyRewritten) {
129 159 return super.rewrite(reader);
130 160 } else {
131   - return new MtasSpanSequenceQuery(newItems, newIgnoreClause,
132   - maximumIgnoreLength).rewrite(reader);
  161 + if (newItems.size() > 0) {
  162 + return new MtasSpanSequenceQuery(newItems, newIgnoreClause,
  163 + maximumIgnoreLength).rewrite(reader);
  164 + } else {
  165 + return new MtasSpanMatchNoneQuery(field);
  166 + }
133 167 }
134 168 }
135 169 }
... ... @@ -157,7 +191,7 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery {
157 191 }
158 192 buffer.append("]");
159 193 buffer.append(", ");
160   - buffer.append(ignoreClause);
  194 + buffer.append(ignoreQuery);
161 195 buffer.append(")");
162 196 return buffer.toString();
163 197 }
... ... @@ -177,9 +211,9 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery {
177 211 return false;
178 212 MtasSpanSequenceQuery other = (MtasSpanSequenceQuery) obj;
179 213 return field.equals(other.field) && items.equals(other.items)
180   - && ((ignoreClause == null && other.ignoreClause == null)
181   - || ignoreClause != null && other.ignoreClause != null
182   - && ignoreClause.equals(other.ignoreClause));
  214 + && ((ignoreQuery == null && other.ignoreQuery == null)
  215 + || ignoreQuery != null && other.ignoreQuery != null
  216 + && ignoreQuery.equals(other.ignoreQuery));
183 217 }
184 218  
185 219 /*
... ... @@ -192,6 +226,10 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery {
192 226 int h = this.getClass().getSimpleName().hashCode();
193 227 h = (h * 3) ^ field.hashCode();
194 228 h = (h * 5) ^ items.hashCode();
  229 + if (ignoreQuery != null) {
  230 + h = (h * 7) ^ ignoreQuery.hashCode();
  231 + h = (h * 11) ^ maximumIgnoreLength.hashCode();
  232 + }
195 233 return h;
196 234 }
197 235  
... ... @@ -211,8 +249,8 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery {
211 249 subWeights.add(new MtasSpanSequenceQueryWeight(
212 250 item.getQuery().createWeight(searcher, false), item.isOptional()));
213 251 }
214   - if (ignoreClause != null) {
215   - ignoreWeight = ignoreClause.createWeight(searcher, false);
  252 + if (ignoreQuery != null) {
  253 + ignoreWeight = ignoreQuery.createWeight(searcher, false);
216 254 }
217 255 return new SpanSequenceWeight(subWeights, ignoreWeight, maximumIgnoreLength,
218 256 searcher, needsScores ? getTermContexts(subWeights) : null);
... ...
src/mtas/search/spans/MtasSpanWithinQuery.java
1 1 package mtas.search.spans;
2 2  
3 3 import java.io.IOException;
  4 +import java.util.ArrayList;
  5 +import java.util.List;
  6 +import java.util.Map;
  7 +import java.util.Set;
4 8  
5 9 import org.apache.lucene.index.IndexReader;
  10 +import org.apache.lucene.index.LeafReaderContext;
  11 +import org.apache.lucene.index.Term;
  12 +import org.apache.lucene.index.TermContext;
6 13 import org.apache.lucene.search.IndexSearcher;
7   -import org.apache.lucene.search.spans.SpanContainingQuery;
  14 +import org.apache.lucene.search.spans.SpanQuery;
8 15 import org.apache.lucene.search.spans.SpanWeight;
9 16 import org.apache.lucene.search.spans.SpanWithinQuery;
  17 +import org.apache.lucene.search.spans.Spans;
10 18  
  19 +import mtas.search.spans.util.MtasSpanMaximumExpandQuery;
11 20 import mtas.search.spans.util.MtasSpanQuery;
12 21  
13 22 /**
... ... @@ -17,25 +26,58 @@ public class MtasSpanWithinQuery extends MtasSpanQuery {
17 26  
18 27 /** The base query. */
19 28 private SpanWithinQuery baseQuery;
20   - private MtasSpanQuery bigQuery, smallQuery;
21   -
  29 + private MtasSpanQuery smallQuery, bigQuery;
  30 + private int leftBoundaryMinimum, leftBoundaryMaximum, rightBoundaryMaximum,
  31 + rightBoundaryMinimum;
  32 + private boolean autoAdjustBigQuery;
  33 + String field;
22 34  
23 35 /**
24 36 * Instantiates a new mtas span within query.
25 37 *
26   - * @param q1 the q1
27   - * @param q2 the q2
  38 + * @param q1
  39 + * the q1
  40 + * @param q2
  41 + * the q2
28 42 */
  43 +
29 44 public MtasSpanWithinQuery(MtasSpanQuery q1, MtasSpanQuery q2) {
30   - super(q1!=null?q1.getMinimumWidth():null, q1!=null?q1.getMaximumWidth():null);
31   - if(q2!=null && q2.getMinimumWidth()!=null) {
32   - if(this.getMinimumWidth()==null || this.getMinimumWidth()<q2.getMinimumWidth()) {
  45 + this(q1, q2, 0, 0, 0, 0, true);
  46 + }
  47 +
  48 + public MtasSpanWithinQuery(MtasSpanQuery q1, MtasSpanQuery q2,
  49 + int leftMinimum, int leftMaximum, int rightMinimum, int rightMaximum,
  50 + boolean adjustBigQuery) {
  51 + super(q1 != null ? q1.getMinimumWidth() : null,
  52 + q1 != null ? q1.getMaximumWidth() : null);
  53 + if (q2 != null && q2.getMinimumWidth() != null) {
  54 + if (this.getMinimumWidth() == null
  55 + || this.getMinimumWidth() < q2.getMinimumWidth()) {
33 56 this.setWidth(q2.getMinimumWidth(), this.getMaximumWidth());
34 57 }
35   - }
36   - smallQuery=q1;
37   - bigQuery=q2;
38   - baseQuery = new SpanWithinQuery(smallQuery, bigQuery);
  58 + }
  59 + bigQuery = q1;
  60 + smallQuery = q2;
  61 + leftBoundaryMinimum = leftMinimum;
  62 + leftBoundaryMaximum = leftMaximum;
  63 + rightBoundaryMinimum = rightMinimum;
  64 + rightBoundaryMaximum = rightMaximum;
  65 + autoAdjustBigQuery = adjustBigQuery;
  66 + if (bigQuery.getField() != null) {
  67 + field = bigQuery.getField();
  68 + } else if (smallQuery.getField() != null) {
  69 + field = smallQuery.getField();
  70 + } else {
  71 + field = null;
  72 + }
  73 + if (field != null) {
  74 + baseQuery = new SpanWithinQuery(
  75 + new MtasSpanMaximumExpandQuery(bigQuery, leftBoundaryMinimum,
  76 + leftBoundaryMaximum, rightBoundaryMinimum, rightBoundaryMaximum),
  77 + smallQuery);
  78 + } else {
  79 + baseQuery = null;
  80 + }
39 81 }
40 82  
41 83 /*
... ... @@ -46,12 +88,143 @@ public class MtasSpanWithinQuery extends MtasSpanQuery {
46 88 */
47 89 @Override
48 90 public MtasSpanQuery rewrite(IndexReader reader) throws IOException {
49   - MtasSpanQuery newSmallQuery = smallQuery.rewrite(reader);
50 91 MtasSpanQuery newBigQuery = bigQuery.rewrite(reader);
51   - if(newSmallQuery!=smallQuery || newBigQuery!=bigQuery) {
52   - return new MtasSpanWithinQuery(newSmallQuery, newBigQuery).rewrite(reader);
53   - } else if(newSmallQuery!=null && newBigQuery!=null && newSmallQuery.equals(newBigQuery)) {
54   - return newSmallQuery;
  92 + MtasSpanQuery newSmallQuery = smallQuery.rewrite(reader);
  93 +
  94 + if (newBigQuery == null || newBigQuery instanceof MtasSpanMatchNoneQuery
  95 + || newSmallQuery == null
  96 + || newSmallQuery instanceof MtasSpanMatchNoneQuery) {
  97 + return new MtasSpanMatchNoneQuery(field);
  98 + }
  99 +
  100 + if (autoAdjustBigQuery) {
  101 + if (newBigQuery instanceof MtasSpanRecurrenceQuery) {
  102 + MtasSpanRecurrenceQuery recurrenceQuery = (MtasSpanRecurrenceQuery) newBigQuery;
  103 + if (recurrenceQuery.getIgnoreQuery() == null
  104 + && recurrenceQuery.getQuery() instanceof MtasSpanMatchAllQuery) {
  105 + rightBoundaryMaximum += leftBoundaryMaximum
  106 + + recurrenceQuery.getMaximumRecurrence();
  107 + rightBoundaryMinimum += leftBoundaryMinimum
  108 + + recurrenceQuery.getMinimumRecurrence();
  109 + leftBoundaryMaximum = 0;
  110 + leftBoundaryMinimum = 0;
  111 + newBigQuery = new MtasSpanMatchAllQuery(field);
  112 + // System.out.println("REPLACE WITH " + newBigQuery + " (["
  113 + // + leftBoundaryMinimum + "," + leftBoundaryMaximum + "],["
  114 + // + rightBoundaryMinimum + "," + rightBoundaryMaximum + "])");
  115 + return new MtasSpanWithinQuery(newBigQuery, newSmallQuery,
  116 + leftBoundaryMinimum, leftBoundaryMaximum, rightBoundaryMinimum,
  117 + rightBoundaryMaximum, autoAdjustBigQuery).rewrite(reader);
  118 + }
  119 + } else if (newBigQuery instanceof MtasSpanMatchAllQuery) {
  120 + if (leftBoundaryMaximum > 0) {
  121 + rightBoundaryMaximum += leftBoundaryMaximum;
  122 + rightBoundaryMinimum += leftBoundaryMinimum;
  123 + leftBoundaryMaximum = 0;
  124 + leftBoundaryMinimum = 0;
  125 + // System.out.println("REPLACE WITH " + newBigQuery + " (["
  126 + // + leftBoundaryMinimum + "," + leftBoundaryMaximum + "],["
  127 + // + rightBoundaryMinimum + "," + rightBoundaryMaximum + "])");
  128 + return new MtasSpanWithinQuery(newBigQuery, newSmallQuery,
  129 + leftBoundaryMinimum, leftBoundaryMaximum, rightBoundaryMinimum,
  130 + rightBoundaryMaximum, autoAdjustBigQuery).rewrite(reader);
  131 + }
  132 + } else if (newBigQuery instanceof MtasSpanSequenceQuery) {
  133 + MtasSpanSequenceQuery sequenceQuery = (MtasSpanSequenceQuery) newBigQuery;
  134 + if (sequenceQuery.getIgnoreQuery() == null) {
  135 + List<MtasSpanSequenceItem> items = sequenceQuery.getItems();
  136 + List<MtasSpanSequenceItem> newItems = new ArrayList<MtasSpanSequenceItem>();
  137 + int newLeftBoundaryMinimum = 0, newLeftBoundaryMaximum = 0,
  138 + newRightBoundaryMinimum = 0, newRightBoundaryMaximum = 0;
  139 + for (int i = 0; i < items.size(); i++) {
  140 + // first item
  141 + if (i == 0) {
  142 + if (items.get(i).getQuery() instanceof MtasSpanMatchAllQuery) {
  143 + newLeftBoundaryMaximum++;
  144 + if (!items.get(i).isOptional()) {
  145 + newLeftBoundaryMinimum++;
  146 + }
  147 + } else if (items.get(i)
  148 + .getQuery() instanceof MtasSpanRecurrenceQuery) {
  149 + MtasSpanRecurrenceQuery msrq = (MtasSpanRecurrenceQuery) items
  150 + .get(i).getQuery();
  151 + if (msrq.getQuery() instanceof MtasSpanMatchAllQuery) {
  152 + newLeftBoundaryMaximum += msrq.getMaximumRecurrence();
  153 + if (!items.get(i).isOptional()) {
  154 + newLeftBoundaryMinimum += msrq.getMinimumRecurrence();
  155 + }
  156 + } else {
  157 + newItems.add(items.get(i));
  158 + }
  159 + } else {
  160 + newItems.add(items.get(i));
  161 + }
  162 + // last item
  163 + } else if (i == (items.size() - 1)) {
  164 + if (items.get(i).getQuery() instanceof MtasSpanMatchAllQuery) {
  165 + newRightBoundaryMaximum++;
  166 + if (!items.get(i).isOptional()) {
  167 + newRightBoundaryMinimum++;
  168 + }
  169 + } else if (items.get(i)
  170 + .getQuery() instanceof MtasSpanRecurrenceQuery) {
  171 + MtasSpanRecurrenceQuery msrq = (MtasSpanRecurrenceQuery) items
  172 + .get(i).getQuery();
  173 + if (msrq.getQuery() instanceof MtasSpanMatchAllQuery) {
  174 + newRightBoundaryMaximum += msrq.getMaximumRecurrence();
  175 + if (!items.get(i).isOptional()) {
  176 + newRightBoundaryMinimum += msrq.getMinimumRecurrence();
  177 + }
  178 + } else {
  179 + newItems.add(items.get(i));
  180 + }
  181 + } else {
  182 + newItems.add(items.get(i));
  183 + }
  184 + // other items
  185 + } else {
  186 + newItems.add(items.get(i));
  187 + }
  188 + }
  189 + leftBoundaryMaximum += newLeftBoundaryMaximum;
  190 + leftBoundaryMinimum += newLeftBoundaryMinimum;
  191 + rightBoundaryMaximum += newRightBoundaryMaximum;
  192 + rightBoundaryMinimum += newRightBoundaryMinimum;
  193 + if (newItems.size() == 0) {
  194 + rightBoundaryMaximum = Math.max(0,
  195 + rightBoundaryMaximum + leftBoundaryMaximum - 1);
  196 + rightBoundaryMinimum = Math.max(0,
  197 + rightBoundaryMinimum + leftBoundaryMinimum - 1);
  198 + leftBoundaryMaximum = 0;
  199 + leftBoundaryMinimum = 0;
  200 + newItems.add(new MtasSpanSequenceItem(
  201 + new MtasSpanMatchAllQuery(field), false));
  202 + }
  203 + if (!items.equals(newItems) || newLeftBoundaryMaximum > 0
  204 + || newRightBoundaryMaximum > 0) {
  205 + newBigQuery = (new MtasSpanSequenceQuery(newItems, null, null))
  206 + .rewrite(reader);
  207 + System.out.println(newBigQuery.getField() + "\t" + newBigQuery);
  208 + // System.out.println("REPLACE WITH " + newBigQuery + " (["
  209 + // + leftBoundaryMinimum + "," + leftBoundaryMaximum + "],["
  210 + // + rightBoundaryMinimum + "," + rightBoundaryMaximum + "])");
  211 + return new MtasSpanWithinQuery(newBigQuery, newSmallQuery,
  212 + leftBoundaryMinimum, leftBoundaryMaximum, rightBoundaryMinimum,
  213 + rightBoundaryMaximum, autoAdjustBigQuery).rewrite(reader);
  214 + }
  215 + }
  216 + }
  217 + }
  218 +
  219 + if (newBigQuery != bigQuery || newSmallQuery != smallQuery) {
  220 + System.out.println(newBigQuery.getField() + "\t" + newBigQuery);
  221 + System.out.println(newSmallQuery.getField() + "\t" + newSmallQuery);
  222 + return (new MtasSpanWithinQuery(newBigQuery, newSmallQuery,
  223 + leftBoundaryMinimum, leftBoundaryMaximum, rightBoundaryMinimum,
  224 + rightBoundaryMaximum, autoAdjustBigQuery)).rewrite(reader);
  225 + } else if (newBigQuery != null && newSmallQuery != null
  226 + && newBigQuery.equals(newSmallQuery)) {
  227 + return newBigQuery;
55 228 } else {
56 229 baseQuery = (SpanWithinQuery) baseQuery.rewrite(reader);
57 230 return super.rewrite(reader);
... ... @@ -65,7 +238,7 @@ public class MtasSpanWithinQuery extends MtasSpanQuery {
65 238 */
66 239 @Override
67 240 public String getField() {
68   - return baseQuery.getField();
  241 + return field;
69 242 }
70 243  
71 244 /*
... ... @@ -78,9 +251,7 @@ public class MtasSpanWithinQuery extends MtasSpanQuery {
78 251 @Override
79 252 public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores)
80 253 throws IOException {
81   - SpanWeight sw = baseQuery.createWeight(searcher, needsScores);
82   - return sw;
83   - // return baseQuery.createWeight(searcher, needsScores);
  254 + return baseQuery.createWeight(searcher, needsScores);
84 255 }
85 256  
86 257 /*
... ... @@ -90,7 +261,22 @@ public class MtasSpanWithinQuery extends MtasSpanQuery {
90 261 */
91 262 @Override
92 263 public String toString(String field) {
93   - return baseQuery.toString(field);
  264 + StringBuilder buffer = new StringBuilder();
  265 + buffer.append(this.getClass().getSimpleName() + "([");
  266 + if (smallQuery != null) {
  267 + buffer.append(smallQuery.toString(smallQuery.getField()));
  268 + } else {
  269 + buffer.append("null");
  270 + }
  271 + buffer.append(",");
  272 + if (bigQuery != null) {
  273 + buffer.append(bigQuery.toString(bigQuery.getField()));
  274 + } else {
  275 + buffer.append("null");
  276 + }
  277 + buffer.append("],[" + leftBoundaryMinimum + "," + leftBoundaryMaximum
  278 + + "],[" + rightBoundaryMinimum + "," + rightBoundaryMaximum + "])");
  279 + return buffer.toString();
94 280 }
95 281  
96 282 /*
... ... @@ -107,7 +293,11 @@ public class MtasSpanWithinQuery extends MtasSpanQuery {
107 293 if (getClass() != obj.getClass())
108 294 return false;
109 295 final MtasSpanWithinQuery that = (MtasSpanWithinQuery) obj;
110   - return baseQuery.equals(that.baseQuery);
  296 + return baseQuery.equals(that.baseQuery)
  297 + && leftBoundaryMinimum == that.leftBoundaryMinimum
  298 + && leftBoundaryMaximum == that.leftBoundaryMaximum
  299 + && rightBoundaryMinimum == that.rightBoundaryMinimum
  300 + && rightBoundaryMaximum == that.rightBoundaryMaximum;
111 301 }
112 302  
113 303 /*
... ... @@ -117,7 +307,18 @@ public class MtasSpanWithinQuery extends MtasSpanQuery {
117 307 */
118 308 @Override
119 309 public int hashCode() {
120   - return baseQuery.hashCode();
  310 + int h = Integer.rotateLeft(classHash(), 1);
  311 + h ^= smallQuery.hashCode();
  312 + h = Integer.rotateLeft(h, 1);
  313 + h ^= bigQuery.hashCode();
  314 + h = Integer.rotateLeft(h, leftBoundaryMinimum) + leftBoundaryMinimum;
  315 + h ^= 2;
  316 + h = Integer.rotateLeft(h, leftBoundaryMaximum) + leftBoundaryMaximum;
  317 + h ^= 3;
  318 + h = Integer.rotateLeft(h, rightBoundaryMinimum) + rightBoundaryMinimum;
  319 + h ^= 5;
  320 + h = Integer.rotateLeft(h, rightBoundaryMaximum) + rightBoundaryMaximum;
  321 + return h;
121 322 }
122 323  
123 324 }
... ...
src/mtas/search/spans/util/MtasExtendedSpanAndQuery.java
1 1 package mtas.search.spans.util;
2 2  
3 3 import java.util.ArrayList;
  4 +import java.util.HashSet;
4 5 import java.util.Iterator;
5 6 import java.util.List;
6 7  
... ... @@ -13,7 +14,7 @@ import org.apache.lucene.search.spans.SpanQuery;
13 14 public class MtasExtendedSpanAndQuery extends SpanNearQuery {
14 15  
15 16 /** The clauses. */
16   - private List<SpanQuery> clauses;
  17 + private HashSet<SpanQuery> clauses;
17 18  
18 19 /**
19 20 * Instantiates a new mtas extended span and query.
... ... @@ -22,7 +23,7 @@ public class MtasExtendedSpanAndQuery extends SpanNearQuery {
22 23 */
23 24 public MtasExtendedSpanAndQuery(SpanQuery... clauses) {
24 25 super(clauses, -1 * (clauses.length - 1), false);
25   - this.clauses = new ArrayList<>(clauses.length);
  26 + this.clauses = new HashSet<SpanQuery>();
26 27 for (SpanQuery clause : clauses) {
27 28 this.clauses.add(clause);
28 29 }
... ... @@ -63,7 +64,7 @@ public class MtasExtendedSpanAndQuery extends SpanNearQuery {
63 64 return false;
64 65 if (getClass() != obj.getClass())
65 66 return false;
66   - final MtasExtendedSpanAndQuery that = (MtasExtendedSpanAndQuery) obj;
  67 + final MtasExtendedSpanAndQuery that = (MtasExtendedSpanAndQuery) obj;
67 68 return clauses.equals(that.clauses);
68 69 }
69 70  
... ...
src/mtas/search/spans/util/MtasSpanMaximumExpandQuery.java 0 โ†’ 100644
  1 +package mtas.search.spans.util;
  2 +
  3 +import java.io.IOException;
  4 +import java.lang.reflect.Method;
  5 +import java.util.Map;
  6 +import java.util.Set;
  7 +
  8 +import org.apache.lucene.codecs.FieldsProducer;
  9 +import org.apache.lucene.index.IndexReader;
  10 +import org.apache.lucene.index.LeafReader;
  11 +import org.apache.lucene.index.LeafReaderContext;
  12 +import org.apache.lucene.index.Term;
  13 +import org.apache.lucene.index.TermContext;
  14 +import org.apache.lucene.index.Terms;
  15 +import org.apache.lucene.search.IndexSearcher;
  16 +import org.apache.lucene.search.spans.SpanCollector;
  17 +import org.apache.lucene.search.spans.SpanWeight;
  18 +import org.apache.lucene.search.spans.Spans;
  19 +
  20 +import mtas.codec.util.CodecInfo;
  21 +import mtas.codec.util.CodecInfo.IndexDoc;
  22 +import mtas.search.spans.MtasSpanMatchNoneSpans;
  23 +
  24 +public class MtasSpanMaximumExpandQuery extends MtasSpanQuery {
  25 +
  26 + MtasSpanQuery query;
  27 + int minimumLeft, maximumLeft, minimumRight, maximumRight;
  28 +
  29 + public MtasSpanMaximumExpandQuery(MtasSpanQuery query, int minimumLeft,
  30 + int maximumLeft, int minimumRight, int maximumRight) {
  31 + super(null, null);
  32 + this.query = query;
  33 + if (minimumLeft > maximumLeft || minimumRight > maximumRight
  34 + || minimumLeft < 0 || minimumRight < 0) {
  35 + throw new IllegalArgumentException();
  36 + }
  37 + this.minimumLeft = minimumLeft;
  38 + this.maximumLeft = maximumLeft;
  39 + this.minimumRight = minimumRight;
  40 + this.maximumRight = maximumRight;
  41 + Integer minimum = query.getMinimumWidth();
  42 + Integer maximum = query.getMaximumWidth();
  43 + if (minimum != null) {
  44 + minimum += minimumLeft + minimumRight;
  45 + }
  46 + if (maximum != null) {
  47 + maximum += maximumLeft + maximumRight;
  48 + }
  49 + setWidth(minimum, maximum);
  50 + }
  51 +
  52 + @Override
  53 + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores)
  54 + throws IOException {
  55 + SpanWeight subWeight = query.createWeight(searcher, needsScores);
  56 + if (maximumLeft == 0 && maximumRight == 0) {
  57 + return subWeight;
  58 + } else {
  59 + return new MtasMaximumExpandWeight(subWeight, searcher, needsScores);
  60 + }
  61 + }
  62 +
  63 + @Override
  64 + public String getField() {
  65 + return query.getField();
  66 + }
  67 +
  68 + @Override
  69 + public String toString(String field) {
  70 + StringBuilder buffer = new StringBuilder();
  71 + buffer.append(this.getClass().getSimpleName() + "([");
  72 + buffer.append(query.toString(field) + "]["+minimumLeft+","+maximumLeft+"]["+minimumRight+","+maximumRight+"])");
  73 + return buffer.toString();
  74 + }
  75 +
  76 + @Override
  77 + public boolean equals(Object obj) {
  78 + if (this == obj)
  79 + return true;
  80 + if (obj == null)
  81 + return false;
  82 + if (getClass() != obj.getClass())
  83 + return false;
  84 + final MtasSpanMaximumExpandQuery that = (MtasSpanMaximumExpandQuery) obj;
  85 + return query.equals(that.query) && minimumLeft == that.minimumLeft
  86 + && maximumLeft == that.maximumLeft && minimumRight == that.minimumRight
  87 + && maximumRight == that.maximumRight;
  88 + }
  89 +
  90 + @Override
  91 + public int hashCode() {
  92 + int h = Integer.rotateLeft(classHash(), 1);
  93 + h ^= query.hashCode();
  94 + h = Integer.rotateLeft(h, minimumLeft) + minimumLeft;
  95 + h ^= 2;
  96 + h = Integer.rotateLeft(h, maximumLeft) + maximumLeft;
  97 + h ^= 3;
  98 + h = Integer.rotateLeft(h, minimumRight) + minimumRight;
  99 + h ^= 5;
  100 + h = Integer.rotateLeft(h, maximumRight) + maximumRight;
  101 + return h;
  102 + }
  103 +
  104 + @Override
  105 + public MtasSpanQuery rewrite(IndexReader reader) throws IOException {
  106 + MtasSpanQuery newQuery = (MtasSpanQuery) query.rewrite(reader);
  107 + if (maximumLeft == 0 && maximumRight == 0) {
  108 + return newQuery;
  109 + } else if (query != newQuery) {
  110 + return new MtasSpanMaximumExpandQuery(newQuery, minimumLeft, maximumLeft,
  111 + minimumRight, maximumRight);
  112 + } else {
  113 + return super.rewrite(reader);
  114 + }
  115 + }
  116 +
  117 + private class MtasMaximumExpandWeight extends SpanWeight {
  118 + SpanWeight subWeight;
  119 +
  120 + public MtasMaximumExpandWeight(SpanWeight subWeight, IndexSearcher searcher,
  121 + boolean needsScores) throws IOException {
  122 + super(MtasSpanMaximumExpandQuery.this, searcher,
  123 + needsScores ? getTermContexts(subWeight) : null);
  124 + this.subWeight = subWeight;
  125 + }
  126 +
  127 + @Override
  128 + public void extractTermContexts(Map<Term, TermContext> contexts) {
  129 + subWeight.extractTermContexts(contexts);
  130 + }
  131 +
  132 + @Override
  133 + public Spans getSpans(LeafReaderContext ctx, Postings requiredPostings)
  134 + throws IOException {
  135 + Spans spans = subWeight.getSpans(ctx, requiredPostings);
  136 + if (maximumLeft == 0 && maximumRight == 0) {
  137 + return spans;
  138 + } else {
  139 + try {
  140 + // get leafreader
  141 + LeafReader r = ctx.reader();
  142 + // get delegate
  143 + Boolean hasMethod = true;
  144 + while (hasMethod) {
  145 + hasMethod = false;
  146 + Method[] methods = r.getClass().getMethods();
  147 + for (Method m : methods) {
  148 + if (m.getName().equals("getDelegate")) {
  149 + hasMethod = true;
  150 + r = (LeafReader) m.invoke(r, (Object[]) null);
  151 + break;
  152 + }
  153 + }
  154 + } // get fieldsproducer
  155 + Method fpm = r.getClass().getMethod("getPostingsReader",
  156 + (Class<?>[]) null);
  157 + FieldsProducer fp = (FieldsProducer) fpm.invoke(r, (Object[]) null);
  158 + // get MtasFieldsProducer using terms
  159 + Terms t = fp.terms(field);
  160 + if (t == null) {
  161 + return new MtasSpanMatchNoneSpans(field);
  162 + } else {
  163 + CodecInfo mtasCodecInfo = CodecInfo.getCodecInfoFromTerms(t);
  164 + return new MtasMaximumExpandSpans(mtasCodecInfo, query.getField(),
  165 + spans);
  166 + }
  167 + } catch (Exception e) {
  168 + throw new IOException("Can't get reader");
  169 + }
  170 +
  171 + }
  172 + }
  173 +
  174 + @Override
  175 + public void extractTerms(Set<Term> terms) {
  176 + subWeight.extractTerms(terms);
  177 + }
  178 +
  179 + }
  180 +
  181 + private class MtasMaximumExpandSpans extends Spans {
  182 +
  183 + Spans subSpans;
  184 + int minPosition, maxPosition;
  185 + String field;
  186 + CodecInfo mtasCodecInfo;
  187 + int startPosition, endPosition;
  188 +
  189 + public MtasMaximumExpandSpans(CodecInfo mtasCodecInfo, String field,
  190 + Spans subSpans) {
  191 + super();
  192 + this.subSpans = subSpans;
  193 + this.field = field;
  194 + this.mtasCodecInfo = mtasCodecInfo;
  195 + this.minPosition = 0;
  196 + this.maxPosition = 0;
  197 + this.startPosition = -1;
  198 + this.endPosition = -1;
  199 + }
  200 +
  201 + @Override
  202 + public int nextStartPosition() throws IOException {
  203 + int basicStartPosition, basicEndPosition;
  204 + while ((basicStartPosition = subSpans
  205 + .nextStartPosition()) != NO_MORE_POSITIONS) {
  206 + basicEndPosition = subSpans.endPosition();
  207 + startPosition = Math.max(minPosition,
  208 + (basicStartPosition - maximumLeft));
  209 + endPosition = Math.min(maxPosition + 1,
  210 + (basicEndPosition + maximumRight));
  211 + if (startPosition <= (basicStartPosition - minimumLeft)
  212 + && endPosition >= (basicEndPosition + minimumRight)) {
  213 + return this.startPosition;
  214 + }
  215 + }
  216 + startPosition = NO_MORE_POSITIONS;
  217 + endPosition = NO_MORE_POSITIONS;
  218 + return NO_MORE_POSITIONS;
  219 + }
  220 +
  221 + @Override
  222 + public int startPosition() {
  223 + return startPosition;
  224 + }
  225 +
  226 + @Override
  227 + public int endPosition() {
  228 + return endPosition;
  229 + }
  230 +
  231 + @Override
  232 + public int width() {
  233 + return endPosition-startPosition;
  234 + }
  235 +
  236 + @Override
  237 + public void collect(SpanCollector collector) throws IOException {
  238 + subSpans.collect(collector);
  239 + }
  240 +
  241 + @Override
  242 + public float positionsCost() {
  243 + return subSpans.positionsCost();
  244 + }
  245 +
  246 + @Override
  247 + public int docID() {
  248 + return subSpans.docID();
  249 + }
  250 +
  251 + @Override
  252 + public int nextDoc() throws IOException {
  253 + int docId = subSpans.nextDoc();
  254 + startPosition = -1;
  255 + endPosition = -1;
  256 + if (docId != NO_MORE_DOCS) {
  257 + IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
  258 + if (doc != null) {
  259 + minPosition = doc.minPosition;
  260 + maxPosition = doc.maxPosition;
  261 + } else {
  262 + minPosition = NO_MORE_POSITIONS;
  263 + maxPosition = NO_MORE_POSITIONS;
  264 + }
  265 + } else {
  266 + minPosition = NO_MORE_POSITIONS;
  267 + maxPosition = NO_MORE_POSITIONS;
  268 + }
  269 + return docId;
  270 + }
  271 +
  272 + @Override
  273 + public int advance(int target) throws IOException {
  274 + int docId = subSpans.advance(target);
  275 + startPosition = -1;
  276 + endPosition = -1;
  277 + if (docId != NO_MORE_DOCS) {
  278 + IndexDoc doc = mtasCodecInfo.getDoc(field, docId);
  279 + if (doc != null) {
  280 + minPosition = doc.minPosition;
  281 + maxPosition = doc.maxPosition;
  282 + } else {
  283 + minPosition = NO_MORE_POSITIONS;
  284 + maxPosition = NO_MORE_POSITIONS;
  285 + }
  286 + } else {
  287 + minPosition = NO_MORE_POSITIONS;
  288 + maxPosition = NO_MORE_POSITIONS;
  289 + }
  290 + return docId;
  291 + }
  292 +
  293 + @Override
  294 + public long cost() {
  295 + return subSpans.cost();
  296 + }
  297 + }
  298 +
  299 +}
... ...
src/mtas/search/spans/util/MtasSpanQuery.java
... ... @@ -6,8 +6,6 @@ import org.apache.lucene.index.IndexReader;
6 6 import org.apache.lucene.search.IndexSearcher;
7 7 import org.apache.lucene.search.spans.SpanQuery;
8 8 import org.apache.lucene.search.spans.SpanWeight;
9   -
10   -import mtas.search.spans.MtasSpanMatchAllQuery;
11 9 import mtas.search.spans.MtasSpanMatchNoneQuery;
12 10  
13 11 public abstract class MtasSpanQuery extends SpanQuery {
... ...