Commit c9d66534fcdb0b1a4474bc798ed755b4d47b2359
1 parent
44ad8971
query optimalization
Showing
13 changed files
with
870 additions
and
140 deletions
docker/Dockerfile
| 1 | 1 | # Automatically generated Dockerfile |
| 2 | -# - Build 2017-03-09 09:11 | |
| 2 | +# - Build 2017-03-14 08:49 | |
| 3 | 3 | # - Lucene/Solr version 6.4.2 |
| 4 | 4 | # - Mtas release 20170309 |
| 5 | 5 | # |
| ... | ... | @@ -55,7 +55,7 @@ RUN apt-get update && apt-get install -y lsof software-properties-common python- |
| 55 | 55 | && chmod -R 755 /var/www/html \ |
| 56 | 56 | && printf "echo\n" >> /start.sh \ |
| 57 | 57 | && printf "echo \"================ Mtas -- Multi Tier Annotation Search =================\"\n" >> /start.sh \ |
| 58 | -&& printf "echo \" Timestamp 2017-03-09 09:11\"\n" >> /start.sh \ | |
| 58 | +&& printf "echo \" Timestamp 2017-03-14 08:49\"\n" >> /start.sh \ | |
| 59 | 59 | && printf "echo \" Lucene/Solr version 6.4.2\"\n" >> /start.sh \ |
| 60 | 60 | && printf "echo \" Mtas release 20170309\"\n" >> /start.sh \ |
| 61 | 61 | && printf "echo \" See https://meertensinstituut.github.io/mtas/ for more information\"\n" >> /start.sh \ |
| ... | ... |
junit/mtas/parser/MtasCQLParserTestSentence.java
| ... | ... | @@ -91,7 +91,7 @@ public class MtasCQLParserTestSentence { |
| 91 | 91 | items.add(new MtasSpanSequenceItem(q3, false)); |
| 92 | 92 | MtasSpanQuery q4 = new MtasSpanSequenceQuery(items, null, null); |
| 93 | 93 | MtasSpanQuery q = new MtasSpanOrQuery(q1,q4); |
| 94 | - testCQLParse(field, null, cql, q); | |
| 94 | + testCQLParse(field, null, cql, q); | |
| 95 | 95 | } |
| 96 | 96 | |
| 97 | 97 | @org.junit.Test |
| ... | ... |
src/mtas/search/spans/MtasSpanAndQuery.java
| ... | ... | @@ -2,6 +2,7 @@ package mtas.search.spans; |
| 2 | 2 | |
| 3 | 3 | import java.io.IOException; |
| 4 | 4 | import java.util.ArrayList; |
| 5 | +import java.util.HashSet; | |
| 5 | 6 | import java.util.List; |
| 6 | 7 | |
| 7 | 8 | import org.apache.lucene.index.IndexReader; |
| ... | ... | @@ -19,7 +20,7 @@ public class MtasSpanAndQuery extends MtasSpanQuery { |
| 19 | 20 | |
| 20 | 21 | /** The base query. */ |
| 21 | 22 | private SpanNearQuery baseQuery; |
| 22 | - private List<MtasSpanQuery> clauses; | |
| 23 | + private HashSet<MtasSpanQuery> clauses; | |
| 23 | 24 | |
| 24 | 25 | /** |
| 25 | 26 | * Instantiates a new mtas span and query. |
| ... | ... | @@ -31,7 +32,7 @@ public class MtasSpanAndQuery extends MtasSpanQuery { |
| 31 | 32 | public MtasSpanAndQuery(MtasSpanQuery... initialClauses) { |
| 32 | 33 | super(null, null); |
| 33 | 34 | Integer minimum = null, maximum = null; |
| 34 | - clauses = new ArrayList<MtasSpanQuery>(); | |
| 35 | + clauses = new HashSet<MtasSpanQuery>(); | |
| 35 | 36 | for (MtasSpanQuery item : initialClauses) { |
| 36 | 37 | if (!clauses.contains(item)) { |
| 37 | 38 | clauses.add(item); |
| ... | ... | @@ -80,12 +81,13 @@ public class MtasSpanAndQuery extends MtasSpanQuery { |
| 80 | 81 | if (clauses.size() > 1) { |
| 81 | 82 | // rewrite, count MtasSpanMatchAllQuery and check for MtasSpanMatchNoneQuery |
| 82 | 83 | MtasSpanQuery[] newClauses = new MtasSpanQuery[clauses.size()]; |
| 84 | + MtasSpanQuery[] oldClauses = clauses.toArray(new MtasSpanQuery[clauses.size()]); | |
| 83 | 85 | int singlePositionQueries = 0; |
| 84 | 86 | int matchAllSinglePositionQueries = 0; |
| 85 | 87 | boolean actuallyRewritten = false; |
| 86 | - for (int i = 0; i < clauses.size(); i++) { | |
| 87 | - newClauses[i] = clauses.get(i).rewrite(reader); | |
| 88 | - actuallyRewritten |= clauses.get(i) != newClauses[i]; | |
| 88 | + for (int i = 0; i < oldClauses.length; i++) { | |
| 89 | + newClauses[i] = oldClauses[i].rewrite(reader); | |
| 90 | + actuallyRewritten |= oldClauses[i] != newClauses[i]; | |
| 89 | 91 | if (newClauses[i] instanceof MtasSpanMatchNoneQuery) { |
| 90 | 92 | return (new MtasSpanMatchNoneQuery(this.getField())).rewrite(reader); |
| 91 | 93 | } else { |
| ... | ... | @@ -129,7 +131,7 @@ public class MtasSpanAndQuery extends MtasSpanQuery { |
| 129 | 131 | return super.rewrite(reader); |
| 130 | 132 | } |
| 131 | 133 | } else if (clauses.size() == 1) { |
| 132 | - return clauses.get(0).rewrite(reader); | |
| 134 | + return clauses.iterator().next().rewrite(reader); | |
| 133 | 135 | } else { |
| 134 | 136 | return (new MtasSpanMatchNoneQuery(this.getField())).rewrite(reader); |
| 135 | 137 | } |
| ... | ... | @@ -170,7 +172,9 @@ public class MtasSpanAndQuery extends MtasSpanQuery { |
| 170 | 172 | */ |
| 171 | 173 | @Override |
| 172 | 174 | public int hashCode() { |
| 173 | - return baseQuery.hashCode(); | |
| 175 | + int h = this.getClass().getSimpleName().hashCode(); | |
| 176 | + h = (h * 7) ^ clauses.hashCode(); | |
| 177 | + return h; | |
| 174 | 178 | } |
| 175 | 179 | |
| 176 | 180 | } |
| ... | ... |
src/mtas/search/spans/MtasSpanMatchNoneQuery.java
| ... | ... | @@ -180,7 +180,11 @@ public class MtasSpanMatchNoneQuery extends MtasSpanQuery { |
| 180 | 180 | if (getClass() != obj.getClass()) |
| 181 | 181 | return false; |
| 182 | 182 | final MtasSpanMatchNoneQuery that = (MtasSpanMatchNoneQuery) obj; |
| 183 | - return field.equals(that.field); | |
| 183 | + if(field==null) { | |
| 184 | + return that.field==null; | |
| 185 | + } else { | |
| 186 | + return field.equals(that.field); | |
| 187 | + } | |
| 184 | 188 | } |
| 185 | 189 | |
| 186 | 190 | /* |
| ... | ... | @@ -191,7 +195,9 @@ public class MtasSpanMatchNoneQuery extends MtasSpanQuery { |
| 191 | 195 | @Override |
| 192 | 196 | public int hashCode() { |
| 193 | 197 | int h = this.getClass().getSimpleName().hashCode(); |
| 194 | - h = (h * 7) ^ field.hashCode(); | |
| 198 | + if(field!=null) { | |
| 199 | + h = (h * 7) ^ field.hashCode(); | |
| 200 | + } | |
| 195 | 201 | return h; |
| 196 | 202 | } |
| 197 | 203 | |
| ... | ... |
src/mtas/search/spans/MtasSpanOrQuery.java
| ... | ... | @@ -2,6 +2,7 @@ package mtas.search.spans; |
| 2 | 2 | |
| 3 | 3 | import java.io.IOException; |
| 4 | 4 | import java.util.ArrayList; |
| 5 | +import java.util.HashSet; | |
| 5 | 6 | import java.util.Iterator; |
| 6 | 7 | import java.util.List; |
| 7 | 8 | |
| ... | ... | @@ -19,7 +20,7 @@ import mtas.search.spans.util.MtasSpanQuery; |
| 19 | 20 | public class MtasSpanOrQuery extends MtasSpanQuery { |
| 20 | 21 | |
| 21 | 22 | /** The clauses. */ |
| 22 | - private List<MtasSpanQuery> clauses; | |
| 23 | + private HashSet<MtasSpanQuery> clauses; | |
| 23 | 24 | |
| 24 | 25 | private SpanQuery baseQuery; |
| 25 | 26 | |
| ... | ... | @@ -32,7 +33,7 @@ public class MtasSpanOrQuery extends MtasSpanQuery { |
| 32 | 33 | public MtasSpanOrQuery(MtasSpanQuery... initialClauses) { |
| 33 | 34 | super(null, null); |
| 34 | 35 | Integer minimum = null, maximum = null; |
| 35 | - clauses = new ArrayList<MtasSpanQuery>(); | |
| 36 | + clauses = new HashSet<MtasSpanQuery>(); | |
| 36 | 37 | for (MtasSpanQuery item : initialClauses) { |
| 37 | 38 | if (!clauses.contains(item)) { |
| 38 | 39 | minimum = clauses.isEmpty() ? item.getMinimumWidth() |
| ... | ... | @@ -66,13 +67,15 @@ public class MtasSpanOrQuery extends MtasSpanQuery { |
| 66 | 67 | // rewrite, count MtasSpanMatchAllQuery and check for |
| 67 | 68 | // MtasSpanMatchNoneQuery |
| 68 | 69 | MtasSpanQuery[] newClauses = new MtasSpanQuery[clauses.size()]; |
| 70 | + MtasSpanQuery[] oldClauses = clauses | |
| 71 | + .toArray(new MtasSpanQuery[clauses.size()]); | |
| 69 | 72 | int singlePositionQueries = 0; |
| 70 | 73 | int matchAllSinglePositionQueries = 0; |
| 71 | 74 | int matchNoneQueries = 0; |
| 72 | 75 | boolean actuallyRewritten = false; |
| 73 | - for (int i = 0; i < clauses.size(); i++) { | |
| 74 | - newClauses[i] = clauses.get(i).rewrite(reader); | |
| 75 | - actuallyRewritten |= clauses.get(i) != newClauses[i]; | |
| 76 | + for (int i = 0; i < oldClauses.length; i++) { | |
| 77 | + newClauses[i] = oldClauses[i].rewrite(reader); | |
| 78 | + actuallyRewritten |= oldClauses[i] != newClauses[i]; | |
| 76 | 79 | if (newClauses[i] instanceof MtasSpanMatchNoneQuery) { |
| 77 | 80 | matchNoneQueries++; |
| 78 | 81 | } else if (newClauses[i].isSinglePositionQuery()) { |
| ... | ... | @@ -118,10 +121,10 @@ public class MtasSpanOrQuery extends MtasSpanQuery { |
| 118 | 121 | return super.rewrite(reader); |
| 119 | 122 | } |
| 120 | 123 | } else if (clauses.size() == 1) { |
| 121 | - return clauses.get(0).rewrite(reader); | |
| 124 | + return clauses.iterator().next().rewrite(reader); | |
| 122 | 125 | } else { |
| 123 | 126 | return (new MtasSpanMatchNoneQuery(this.getField())).rewrite(reader); |
| 124 | - } | |
| 127 | + } | |
| 125 | 128 | } |
| 126 | 129 | |
| 127 | 130 | /* |
| ... | ... | @@ -171,7 +174,7 @@ public class MtasSpanOrQuery extends MtasSpanQuery { |
| 171 | 174 | @Override |
| 172 | 175 | public int hashCode() { |
| 173 | 176 | int h = this.getClass().getSimpleName().hashCode(); |
| 174 | - h = (h * 7) ^ baseQuery.hashCode(); | |
| 177 | + h = (h * 7) ^ clauses.hashCode(); | |
| 175 | 178 | return h; |
| 176 | 179 | } |
| 177 | 180 | |
| ... | ... |
src/mtas/search/spans/MtasSpanPositionSpans.java
src/mtas/search/spans/MtasSpanRecurrenceQuery.java
| ... | ... | @@ -22,7 +22,7 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery |
| 22 | 22 | implements Cloneable { |
| 23 | 23 | |
| 24 | 24 | /** The clause. */ |
| 25 | - private MtasSpanQuery clause; | |
| 25 | + private MtasSpanQuery query; | |
| 26 | 26 | |
| 27 | 27 | /** The minimum recurrence. */ |
| 28 | 28 | private int minimumRecurrence; |
| ... | ... | @@ -31,7 +31,7 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery |
| 31 | 31 | private int maximumRecurrence; |
| 32 | 32 | |
| 33 | 33 | /** The ignore clause. */ |
| 34 | - private MtasSpanQuery ignoreClause; | |
| 34 | + private MtasSpanQuery ignoreQuery; | |
| 35 | 35 | |
| 36 | 36 | /** The maximum ignore length. */ |
| 37 | 37 | private Integer maximumIgnoreLength; |
| ... | ... | @@ -42,56 +42,86 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery |
| 42 | 42 | /** |
| 43 | 43 | * Instantiates a new mtas span recurrence query. |
| 44 | 44 | * |
| 45 | - * @param clause | |
| 45 | + * @param query | |
| 46 | 46 | * the clause |
| 47 | 47 | * @param minimumRecurrence |
| 48 | 48 | * the minimum recurrence |
| 49 | 49 | * @param maximumRecurrence |
| 50 | 50 | * the maximum recurrence |
| 51 | - * @param ignore | |
| 51 | + * @param ignoreQuery | |
| 52 | 52 | * the ignore |
| 53 | 53 | * @param maximumIgnoreLength |
| 54 | 54 | * the maximum ignore length |
| 55 | 55 | */ |
| 56 | - public MtasSpanRecurrenceQuery(MtasSpanQuery clause, int minimumRecurrence, | |
| 57 | - int maximumRecurrence, MtasSpanQuery ignore, | |
| 56 | + public MtasSpanRecurrenceQuery(MtasSpanQuery query, int minimumRecurrence, | |
| 57 | + int maximumRecurrence, MtasSpanQuery ignoreQuery, | |
| 58 | 58 | Integer maximumIgnoreLength) { |
| 59 | 59 | super(null, null); |
| 60 | + field = query.getField(); | |
| 61 | + this.query = query; | |
| 62 | + if (field != null && ignoreQuery != null) { | |
| 63 | + if (ignoreQuery.getField() == null | |
| 64 | + || field.equals(ignoreQuery.getField())) { | |
| 65 | + this.ignoreQuery = ignoreQuery; | |
| 66 | + this.maximumIgnoreLength = maximumIgnoreLength==null?1:maximumIgnoreLength; | |
| 67 | + } else { | |
| 68 | + throw new IllegalArgumentException( | |
| 69 | + "ignore must have same field as clauses"); | |
| 70 | + } | |
| 71 | + } else { | |
| 72 | + this.ignoreQuery = null; | |
| 73 | + this.maximumIgnoreLength = null; | |
| 74 | + } | |
| 75 | + setRecurrence(minimumRecurrence, maximumRecurrence); | |
| 76 | + } | |
| 77 | + | |
| 78 | + /** | |
| 79 | + * Gets the clause. | |
| 80 | + * | |
| 81 | + * @return the clause | |
| 82 | + */ | |
| 83 | + public MtasSpanQuery getQuery() { | |
| 84 | + return query; | |
| 85 | + } | |
| 86 | + | |
| 87 | + public MtasSpanQuery getIgnoreQuery() { | |
| 88 | + return ignoreQuery; | |
| 89 | + } | |
| 90 | + | |
| 91 | + public Integer getMaximumIgnoreLength() { | |
| 92 | + return maximumIgnoreLength; | |
| 93 | + } | |
| 94 | + | |
| 95 | + public int getMinimumRecurrence() { | |
| 96 | + return minimumRecurrence; | |
| 97 | + } | |
| 98 | + | |
| 99 | + public int getMaximumRecurrence() { | |
| 100 | + return maximumRecurrence; | |
| 101 | + } | |
| 102 | + | |
| 103 | + public void setRecurrence(int minimumRecurrence, int maximumRecurrence) { | |
| 60 | 104 | if (minimumRecurrence > maximumRecurrence) { |
| 61 | 105 | throw new IllegalArgumentException( |
| 62 | 106 | "minimumRecurrence > maximumRecurrence"); |
| 63 | 107 | } else if (minimumRecurrence < 1) { |
| 64 | 108 | throw new IllegalArgumentException("minimumRecurrence < 1 not supported"); |
| 65 | - } else if (clause == null) { | |
| 109 | + } else if (query == null) { | |
| 66 | 110 | throw new IllegalArgumentException("no clause"); |
| 67 | 111 | } |
| 68 | 112 | this.minimumRecurrence = minimumRecurrence; |
| 69 | 113 | this.maximumRecurrence = maximumRecurrence; |
| 70 | - field = clause.getField(); | |
| 71 | - this.clause = clause; | |
| 72 | - if (field != null && ignore != null) { | |
| 73 | - if (ignore.getField() == null || field.equals(ignore.getField())) { | |
| 74 | - this.ignoreClause = ignore; | |
| 75 | - this.maximumIgnoreLength = maximumIgnoreLength; | |
| 76 | - } else { | |
| 77 | - throw new IllegalArgumentException( | |
| 78 | - "ignore must have same field as clauses"); | |
| 79 | - } | |
| 80 | - } else { | |
| 81 | - this.ignoreClause = null; | |
| 82 | - this.maximumIgnoreLength = null; | |
| 83 | - } | |
| 84 | 114 | // set minimum/maximum |
| 85 | 115 | Integer minimum = null, maximum = null; |
| 86 | - if (clause.getMinimumWidth() != null) { | |
| 87 | - minimum = minimumRecurrence * clause.getMinimumWidth(); | |
| 116 | + if (query.getMinimumWidth() != null) { | |
| 117 | + minimum = minimumRecurrence * query.getMinimumWidth(); | |
| 88 | 118 | } |
| 89 | - if (clause.getMaximumWidth() != null) { | |
| 90 | - maximum = maximumRecurrence * clause.getMaximumWidth(); | |
| 91 | - if (ignore != null && maximumIgnoreLength != null) { | |
| 92 | - if (ignore.getMaximumWidth() != null) { | |
| 119 | + if (query.getMaximumWidth() != null) { | |
| 120 | + maximum = maximumRecurrence * query.getMaximumWidth(); | |
| 121 | + if (ignoreQuery != null && maximumIgnoreLength != null) { | |
| 122 | + if (ignoreQuery.getMaximumWidth() != null) { | |
| 93 | 123 | maximum += (maximumRecurrence - 1) * maximumIgnoreLength |
| 94 | - * ignore.getMaximumWidth(); | |
| 124 | + * ignoreQuery.getMaximumWidth(); | |
| 95 | 125 | } else { |
| 96 | 126 | maximum = null; |
| 97 | 127 | } |
| ... | ... | @@ -100,15 +130,6 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery |
| 100 | 130 | setWidth(minimum, maximum); |
| 101 | 131 | } |
| 102 | 132 | |
| 103 | - /** | |
| 104 | - * Gets the clause. | |
| 105 | - * | |
| 106 | - * @return the clause | |
| 107 | - */ | |
| 108 | - public MtasSpanQuery getClause() { | |
| 109 | - return clause; | |
| 110 | - } | |
| 111 | - | |
| 112 | 133 | /* |
| 113 | 134 | * (non-Javadoc) |
| 114 | 135 | * |
| ... | ... | @@ -127,18 +148,23 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery |
| 127 | 148 | */ |
| 128 | 149 | @Override |
| 129 | 150 | public MtasSpanQuery rewrite(IndexReader reader) throws IOException { |
| 130 | - MtasSpanQuery newClause = clause.rewrite(reader); | |
| 131 | - MtasSpanQuery newIgnoreClause = (ignoreClause != null) | |
| 132 | - ? ignoreClause.rewrite(reader) : null; | |
| 133 | - if(newClause instanceof MtasSpanRecurrenceQuery) { | |
| 134 | - //for now too difficult, possibly merge later | |
| 135 | - } | |
| 136 | - if (newClause != clause | |
| 137 | - || (newIgnoreClause != null && newIgnoreClause != ignoreClause)) { | |
| 138 | - return new MtasSpanRecurrenceQuery(newClause, minimumRecurrence, | |
| 139 | - maximumRecurrence, newIgnoreClause, maximumIgnoreLength).rewrite(reader); | |
| 151 | + MtasSpanQuery newQuery = query.rewrite(reader); | |
| 152 | + if (maximumRecurrence == 1) { | |
| 153 | + return newQuery; | |
| 140 | 154 | } else { |
| 141 | - return super.rewrite(reader); | |
| 155 | + MtasSpanQuery newIgnoreQuery = (ignoreQuery != null) | |
| 156 | + ? ignoreQuery.rewrite(reader) : null; | |
| 157 | + if (newQuery instanceof MtasSpanRecurrenceQuery) { | |
| 158 | + // TODO: for now too difficult, possibly merge later | |
| 159 | + } | |
| 160 | + if (newQuery != query | |
| 161 | + || (newIgnoreQuery != null && newIgnoreQuery != ignoreQuery)) { | |
| 162 | + return new MtasSpanRecurrenceQuery(newQuery, minimumRecurrence, | |
| 163 | + maximumRecurrence, newIgnoreQuery, maximumIgnoreLength) | |
| 164 | + .rewrite(reader); | |
| 165 | + } else { | |
| 166 | + return super.rewrite(reader); | |
| 167 | + } | |
| 142 | 168 | } |
| 143 | 169 | } |
| 144 | 170 | |
| ... | ... | @@ -151,10 +177,10 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery |
| 151 | 177 | public String toString(String field) { |
| 152 | 178 | StringBuilder buffer = new StringBuilder(); |
| 153 | 179 | buffer.append(this.getClass().getSimpleName() + "(["); |
| 154 | - buffer.append(clause.toString(clause.getField())); | |
| 180 | + buffer.append(query.toString(query.getField())); | |
| 155 | 181 | buffer.append("," + minimumRecurrence + "," + maximumRecurrence); |
| 156 | 182 | buffer.append(", "); |
| 157 | - buffer.append(ignoreClause); | |
| 183 | + buffer.append(ignoreQuery); | |
| 158 | 184 | buffer.append("])"); |
| 159 | 185 | return buffer.toString(); |
| 160 | 186 | } |
| ... | ... | @@ -173,12 +199,12 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery |
| 173 | 199 | if (getClass() != obj.getClass()) |
| 174 | 200 | return false; |
| 175 | 201 | final MtasSpanRecurrenceQuery other = (MtasSpanRecurrenceQuery) obj; |
| 176 | - return clause.equals(other.clause) | |
| 202 | + return query.equals(other.query) | |
| 177 | 203 | && minimumRecurrence == other.minimumRecurrence |
| 178 | 204 | && maximumRecurrence == other.maximumRecurrence |
| 179 | - && ((ignoreClause == null && other.ignoreClause == null) | |
| 180 | - || ignoreClause != null && other.ignoreClause != null | |
| 181 | - && ignoreClause.equals(other.ignoreClause)); | |
| 205 | + && ((ignoreQuery == null && other.ignoreQuery == null) | |
| 206 | + || ignoreQuery != null && other.ignoreQuery != null | |
| 207 | + && ignoreQuery.equals(other.ignoreQuery)); | |
| 182 | 208 | } |
| 183 | 209 | |
| 184 | 210 | /* |
| ... | ... | @@ -189,7 +215,7 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery |
| 189 | 215 | @Override |
| 190 | 216 | public int hashCode() { |
| 191 | 217 | int h = this.getClass().getSimpleName().hashCode(); |
| 192 | - h = (h * 7) ^ clause.hashCode(); | |
| 218 | + h = (h * 7) ^ query.hashCode(); | |
| 193 | 219 | h = (h * 11) ^ minimumRecurrence; |
| 194 | 220 | h = (h * 13) ^ maximumRecurrence; |
| 195 | 221 | return h; |
| ... | ... | @@ -205,10 +231,10 @@ public class MtasSpanRecurrenceQuery extends MtasSpanQuery |
| 205 | 231 | @Override |
| 206 | 232 | public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) |
| 207 | 233 | throws IOException { |
| 208 | - SpanWeight subWeight = clause.createWeight(searcher, false); | |
| 234 | + SpanWeight subWeight = query.createWeight(searcher, false); | |
| 209 | 235 | SpanWeight ignoreWeight = null; |
| 210 | - if (ignoreClause != null) { | |
| 211 | - ignoreWeight = ignoreClause.createWeight(searcher, false); | |
| 236 | + if (ignoreQuery != null) { | |
| 237 | + ignoreWeight = ignoreQuery.createWeight(searcher, false); | |
| 212 | 238 | } |
| 213 | 239 | return new SpanRecurrenceWeight(subWeight, ignoreWeight, |
| 214 | 240 | maximumIgnoreLength, searcher, |
| ... | ... |
src/mtas/search/spans/MtasSpanSequenceItem.java
| ... | ... | @@ -77,26 +77,180 @@ public class MtasSpanSequenceItem { |
| 77 | 77 | @Override |
| 78 | 78 | public boolean equals(Object o) { |
| 79 | 79 | if (o instanceof MtasSpanSequenceItem) { |
| 80 | - final MtasSpanSequenceItem that = (MtasSpanSequenceItem) o; | |
| 80 | + MtasSpanSequenceItem that = (MtasSpanSequenceItem) o; | |
| 81 | 81 | return spanQuery.equals(that.getQuery()) |
| 82 | 82 | && (optional == that.isOptional()); |
| 83 | 83 | } else { |
| 84 | - return false; | |
| 84 | + return false; | |
| 85 | 85 | } |
| 86 | 86 | } |
| 87 | 87 | |
| 88 | + @Override | |
| 89 | + public int hashCode() { | |
| 90 | + int h = this.getClass().getSimpleName().hashCode(); | |
| 91 | + h = (h * 3) ^ spanQuery.hashCode(); | |
| 92 | + h += (optional ? 1 : 0); | |
| 93 | + return h; | |
| 94 | + } | |
| 95 | + | |
| 88 | 96 | public MtasSpanSequenceItem rewrite(IndexReader reader) throws IOException { |
| 89 | 97 | MtasSpanQuery newSpanQuery = spanQuery.rewrite(reader); |
| 90 | - if(newSpanQuery!=spanQuery) { | |
| 98 | + if (newSpanQuery != spanQuery) { | |
| 91 | 99 | return new MtasSpanSequenceItem(newSpanQuery, optional); |
| 92 | 100 | } else { |
| 93 | 101 | return this; |
| 94 | 102 | } |
| 95 | 103 | } |
| 96 | - | |
| 104 | + | |
| 97 | 105 | @Override |
| 98 | 106 | public String toString() { |
| 99 | - return "["+spanQuery.toString()+" - "+(optional?"OPTIONAL":"NOT OPTIONAL")+"]"; | |
| 107 | + return "[" + spanQuery.toString() + " - " | |
| 108 | + + (optional ? "OPTIONAL" : "NOT OPTIONAL") + "]"; | |
| 109 | + } | |
| 110 | + | |
| 111 | + public static MtasSpanSequenceItem merge(MtasSpanSequenceItem item1, | |
| 112 | + MtasSpanSequenceItem item2, MtasSpanQuery ignoreQuery, | |
| 113 | + Integer maximumIgnoreLength) { | |
| 114 | + if (item1 == null || item2 == null) { | |
| 115 | + return null; | |
| 116 | + } else { | |
| 117 | + MtasSpanQuery q1 = item1.getQuery(); | |
| 118 | + MtasSpanQuery q2 = item2.getQuery(); | |
| 119 | + boolean optional = item1.optional && item2.optional; | |
| 120 | + // first spanRecurrenceQuery | |
| 121 | + if (q1 instanceof MtasSpanRecurrenceQuery) { | |
| 122 | + MtasSpanRecurrenceQuery rq1 = (MtasSpanRecurrenceQuery) q1; | |
| 123 | + // both spanRecurrenceQuery | |
| 124 | + if (q2 instanceof MtasSpanRecurrenceQuery) { | |
| 125 | + MtasSpanRecurrenceQuery rq2 = (MtasSpanRecurrenceQuery) q2; | |
| 126 | + // equal query | |
| 127 | + if (rq1.getQuery().equals(rq2.getQuery())) { | |
| 128 | + // equal ignoreQuery settings | |
| 129 | + if ((ignoreQuery == null && rq1.getIgnoreQuery() == null | |
| 130 | + && rq2.getIgnoreQuery() == null) | |
| 131 | + || (ignoreQuery != null && rq1.getIgnoreQuery() != null | |
| 132 | + && ignoreQuery.equals(rq1.getIgnoreQuery()) | |
| 133 | + && maximumIgnoreLength == rq1.getMaximumIgnoreLength() | |
| 134 | + && rq2.getIgnoreQuery() != null | |
| 135 | + && ignoreQuery.equals(rq2.getIgnoreQuery()) | |
| 136 | + && maximumIgnoreLength == rq2.getMaximumIgnoreLength())) { | |
| 137 | + // at least one optional | |
| 138 | + if (item1.optional || item2.optional) { | |
| 139 | + int minimum = Math.min(rq1.getMinimumRecurrence(), | |
| 140 | + rq2.getMinimumRecurrence()); | |
| 141 | + int maximum = rq1.getMaximumRecurrence() | |
| 142 | + + rq2.getMaximumRecurrence(); | |
| 143 | + // only if ranges match | |
| 144 | + if ((rq1.getMaximumRecurrence() + 1) >= rq2 | |
| 145 | + .getMinimumRecurrence() | |
| 146 | + && (rq2.getMaximumRecurrence() + 1) >= rq1 | |
| 147 | + .getMinimumRecurrence()) { | |
| 148 | + return new MtasSpanSequenceItem( | |
| 149 | + new MtasSpanRecurrenceQuery(rq1.getQuery(), minimum, | |
| 150 | + maximum, ignoreQuery, maximumIgnoreLength), | |
| 151 | + optional); | |
| 152 | + } | |
| 153 | + // not optional | |
| 154 | + } else { | |
| 155 | + int minimum = rq1.getMinimumRecurrence() | |
| 156 | + + rq2.getMinimumRecurrence(); | |
| 157 | + int maximum = rq1.getMaximumRecurrence() | |
| 158 | + + rq2.getMaximumRecurrence(); | |
| 159 | + // only if ranges match | |
| 160 | + if ((rq1.getMaximumRecurrence() + 1) >= rq2 | |
| 161 | + .getMinimumRecurrence() | |
| 162 | + && (rq2.getMaximumRecurrence() + 1) >= rq1 | |
| 163 | + .getMinimumRecurrence()) { | |
| 164 | + return new MtasSpanSequenceItem( | |
| 165 | + new MtasSpanRecurrenceQuery(rq1.getQuery(), minimum, | |
| 166 | + maximum, ignoreQuery, maximumIgnoreLength), | |
| 167 | + optional); | |
| 168 | + } | |
| 169 | + } | |
| 170 | + } | |
| 171 | + } | |
| 172 | + } else { | |
| 173 | + if (rq1.getQuery().equals(q2)) { | |
| 174 | + if ((ignoreQuery == null && rq1.getIgnoreQuery() == null) | |
| 175 | + || (ignoreQuery != null && rq1.getIgnoreQuery() != null | |
| 176 | + && ignoreQuery.equals(rq1.getIgnoreQuery()) | |
| 177 | + && rq1.getMaximumIgnoreLength() != null | |
| 178 | + && maximumIgnoreLength | |
| 179 | + .equals(rq1.getMaximumIgnoreLength()))) { | |
| 180 | + if (!optional) { | |
| 181 | + if (item1.optional) { | |
| 182 | + if (rq1.getMinimumRecurrence() == 1) { | |
| 183 | + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery( | |
| 184 | + q2, 1, rq1.getMaximumRecurrence() + 1, ignoreQuery, | |
| 185 | + maximumIgnoreLength), false); | |
| 186 | + } | |
| 187 | + } else if (item2.optional) { | |
| 188 | + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery( | |
| 189 | + q2, rq1.getMinimumRecurrence(), | |
| 190 | + rq1.getMaximumRecurrence() + 1, ignoreQuery, | |
| 191 | + maximumIgnoreLength), false); | |
| 192 | + } else { | |
| 193 | + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery( | |
| 194 | + q2, rq1.getMinimumRecurrence() + 1, | |
| 195 | + rq1.getMaximumRecurrence() + 1, ignoreQuery, | |
| 196 | + maximumIgnoreLength), false); | |
| 197 | + } | |
| 198 | + } else { | |
| 199 | + if (rq1.getMinimumRecurrence() == 1) { | |
| 200 | + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery( | |
| 201 | + q2, 1, rq1.getMaximumRecurrence() + 1, ignoreQuery, | |
| 202 | + maximumIgnoreLength), true); | |
| 203 | + } | |
| 204 | + } | |
| 205 | + } | |
| 206 | + } | |
| 207 | + } | |
| 208 | + // second spanRecurrenceQuery | |
| 209 | + } else if (q2 instanceof MtasSpanRecurrenceQuery) { | |
| 210 | + MtasSpanRecurrenceQuery rq2 = (MtasSpanRecurrenceQuery) q2; | |
| 211 | + if (rq2.getQuery().equals(q1)) { | |
| 212 | + if ((ignoreQuery == null && rq2.getIgnoreQuery() == null) | |
| 213 | + || (ignoreQuery != null && rq2.getIgnoreQuery()!=null && ignoreQuery.equals(rq2.getIgnoreQuery()) | |
| 214 | + && maximumIgnoreLength == rq2.getMaximumIgnoreLength())) { | |
| 215 | + if (!optional) { | |
| 216 | + if (item1.optional) { | |
| 217 | + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q1, | |
| 218 | + rq2.getMinimumRecurrence(), rq2.getMaximumRecurrence() + 1, | |
| 219 | + ignoreQuery, maximumIgnoreLength), false); | |
| 220 | + } else if (item2.optional) { | |
| 221 | + if (rq2.getMinimumRecurrence() == 1) { | |
| 222 | + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery( | |
| 223 | + q1, 1, rq2.getMaximumRecurrence() + 1, ignoreQuery, | |
| 224 | + maximumIgnoreLength), false); | |
| 225 | + } | |
| 226 | + } else { | |
| 227 | + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q1, | |
| 228 | + rq2.getMinimumRecurrence() + 1, | |
| 229 | + rq2.getMaximumRecurrence() + 1, ignoreQuery, | |
| 230 | + maximumIgnoreLength), false); | |
| 231 | + } | |
| 232 | + } else { | |
| 233 | + if (rq2.getMinimumRecurrence() == 1) { | |
| 234 | + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q1, | |
| 235 | + 1, rq2.getMaximumRecurrence() + 1, ignoreQuery, | |
| 236 | + maximumIgnoreLength), true); | |
| 237 | + } | |
| 238 | + } | |
| 239 | + } | |
| 240 | + } | |
| 241 | + // both no spanRecurrenceQuery | |
| 242 | + } else if (q1.equals(q2)) { | |
| 243 | + // at least one optional | |
| 244 | + if (item1.optional || item2.optional) { | |
| 245 | + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q1, 1, 2, | |
| 246 | + ignoreQuery, maximumIgnoreLength), optional); | |
| 247 | + } else { | |
| 248 | + return new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q1, 2, 2, | |
| 249 | + ignoreQuery, maximumIgnoreLength), optional); | |
| 250 | + } | |
| 251 | + } | |
| 252 | + return null; | |
| 253 | + } | |
| 100 | 254 | } |
| 101 | 255 | |
| 102 | 256 | } |
| ... | ... |
src/mtas/search/spans/MtasSpanSequenceQuery.java
| ... | ... | @@ -2,6 +2,7 @@ package mtas.search.spans; |
| 2 | 2 | |
| 3 | 3 | import java.io.IOException; |
| 4 | 4 | import java.util.ArrayList; |
| 5 | +import java.util.Arrays; | |
| 5 | 6 | import java.util.Iterator; |
| 6 | 7 | import java.util.List; |
| 7 | 8 | import java.util.Map; |
| ... | ... | @@ -27,7 +28,7 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery { |
| 27 | 28 | private List<MtasSpanSequenceItem> items; |
| 28 | 29 | |
| 29 | 30 | /** The ignore clause. */ |
| 30 | - private MtasSpanQuery ignoreClause; | |
| 31 | + private MtasSpanQuery ignoreQuery; | |
| 31 | 32 | |
| 32 | 33 | /** The maximum ignore length. */ |
| 33 | 34 | private Integer maximumIgnoreLength; |
| ... | ... | @@ -40,13 +41,13 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery { |
| 40 | 41 | * |
| 41 | 42 | * @param items |
| 42 | 43 | * the items |
| 43 | - * @param ignore | |
| 44 | + * @param ignoreQuery | |
| 44 | 45 | * the ignore |
| 45 | 46 | * @param maximumIgnoreLength |
| 46 | 47 | * the maximum ignore length |
| 47 | 48 | */ |
| 48 | 49 | public MtasSpanSequenceQuery(List<MtasSpanSequenceItem> items, |
| 49 | - MtasSpanQuery ignore, Integer maximumIgnoreLength) { | |
| 50 | + MtasSpanQuery ignoreQuery, Integer maximumIgnoreLength) { | |
| 50 | 51 | super(null, null); |
| 51 | 52 | Integer minimum = 0, maximum = 0; |
| 52 | 53 | this.items = items; |
| ... | ... | @@ -68,24 +69,26 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery { |
| 68 | 69 | } |
| 69 | 70 | } |
| 70 | 71 | // check ignore |
| 71 | - if (field != null && ignore != null) { | |
| 72 | - if (ignore.getField() == null || field.equals(ignore.getField())) { | |
| 73 | - this.ignoreClause = ignore; | |
| 74 | - this.maximumIgnoreLength = maximumIgnoreLength; | |
| 72 | + if (field != null && ignoreQuery != null) { | |
| 73 | + if (ignoreQuery.getField() == null | |
| 74 | + || field.equals(ignoreQuery.getField())) { | |
| 75 | + this.ignoreQuery = ignoreQuery; | |
| 76 | + this.maximumIgnoreLength = maximumIgnoreLength == null ? 1 | |
| 77 | + : maximumIgnoreLength; | |
| 75 | 78 | } else { |
| 76 | 79 | throw new IllegalArgumentException( |
| 77 | 80 | "ignore must have same field as clauses"); |
| 78 | 81 | } |
| 79 | 82 | if (maximum != null && items.size() > 1) { |
| 80 | - if (ignore.getMaximumWidth() != null) { | |
| 81 | - maximum += (items.size() - 1) * maximumIgnoreLength | |
| 82 | - * ignoreClause.getMaximumWidth(); | |
| 83 | + if (this.ignoreQuery.getMaximumWidth() != null) { | |
| 84 | + maximum += (items.size() - 1) * this.maximumIgnoreLength | |
| 85 | + * this.ignoreQuery.getMaximumWidth(); | |
| 83 | 86 | } else { |
| 84 | 87 | maximum = null; |
| 85 | 88 | } |
| 86 | 89 | } |
| 87 | 90 | } else { |
| 88 | - this.ignoreClause = null; | |
| 91 | + this.ignoreQuery = null; | |
| 89 | 92 | this.maximumIgnoreLength = null; |
| 90 | 93 | } |
| 91 | 94 | setWidth(minimum, maximum); |
| ... | ... | @@ -101,6 +104,18 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery { |
| 101 | 104 | return field; |
| 102 | 105 | } |
| 103 | 106 | |
| 107 | + public List<MtasSpanSequenceItem> getItems() { | |
| 108 | + return items; | |
| 109 | + } | |
| 110 | + | |
| 111 | + public MtasSpanQuery getIgnoreQuery() { | |
| 112 | + return ignoreQuery; | |
| 113 | + } | |
| 114 | + | |
| 115 | + public Integer getMaximumIgnoreLength() { | |
| 116 | + return maximumIgnoreLength; | |
| 117 | + } | |
| 118 | + | |
| 104 | 119 | /* |
| 105 | 120 | * (non-Javadoc) |
| 106 | 121 | * |
| ... | ... | @@ -112,24 +127,43 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery { |
| 112 | 127 | if (items.size() == 1) { |
| 113 | 128 | return items.get(0).getQuery().rewrite(reader); |
| 114 | 129 | } else { |
| 115 | - MtasSpanSequenceItem newItem; | |
| 130 | + MtasSpanSequenceItem newItem, previousNewItem = null; | |
| 116 | 131 | ArrayList<MtasSpanSequenceItem> newItems = new ArrayList<MtasSpanSequenceItem>( |
| 117 | 132 | items.size()); |
| 118 | - MtasSpanQuery newIgnoreClause = ignoreClause != null | |
| 119 | - ? ignoreClause.rewrite(reader) : null; | |
| 120 | - boolean actuallyRewritten = ignoreClause != null | |
| 121 | - ? newIgnoreClause != ignoreClause : false; | |
| 133 | + MtasSpanQuery newIgnoreClause = ignoreQuery != null | |
| 134 | + ? ignoreQuery.rewrite(reader) : null; | |
| 135 | + boolean actuallyRewritten = ignoreQuery != null | |
| 136 | + ? newIgnoreClause != ignoreQuery : false; | |
| 122 | 137 | for (int i = 0; i < items.size(); i++) { |
| 123 | 138 | newItem = items.get(i).rewrite(reader); |
| 124 | - actuallyRewritten |= items.get(i) != newItem; | |
| 125 | - // for now too difficult, possibly later merge with previous if possible | |
| 126 | - newItems.add(newItem); | |
| 139 | + if (newItem.getQuery() instanceof MtasSpanMatchNoneQuery) { | |
| 140 | + if (!newItem.isOptional()) { | |
| 141 | + return new MtasSpanMatchNoneQuery(field); | |
| 142 | + } else { | |
| 143 | + actuallyRewritten = true; | |
| 144 | + } | |
| 145 | + } else { | |
| 146 | + actuallyRewritten |= items.get(i) != newItem; | |
| 147 | + MtasSpanSequenceItem previousMergedItem = MtasSpanSequenceItem.merge( | |
| 148 | + previousNewItem, newItem, ignoreQuery, maximumIgnoreLength); | |
| 149 | + if (previousMergedItem != null) { | |
| 150 | + newItems.set((newItems.size() - 1), previousMergedItem); | |
| 151 | + actuallyRewritten = true; | |
| 152 | + } else { | |
| 153 | + newItems.add(newItem); | |
| 154 | + } | |
| 155 | + previousNewItem = newItem; | |
| 156 | + } | |
| 127 | 157 | } |
| 128 | 158 | if (!actuallyRewritten) { |
| 129 | 159 | return super.rewrite(reader); |
| 130 | 160 | } else { |
| 131 | - return new MtasSpanSequenceQuery(newItems, newIgnoreClause, | |
| 132 | - maximumIgnoreLength).rewrite(reader); | |
| 161 | + if (newItems.size() > 0) { | |
| 162 | + return new MtasSpanSequenceQuery(newItems, newIgnoreClause, | |
| 163 | + maximumIgnoreLength).rewrite(reader); | |
| 164 | + } else { | |
| 165 | + return new MtasSpanMatchNoneQuery(field); | |
| 166 | + } | |
| 133 | 167 | } |
| 134 | 168 | } |
| 135 | 169 | } |
| ... | ... | @@ -157,7 +191,7 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery { |
| 157 | 191 | } |
| 158 | 192 | buffer.append("]"); |
| 159 | 193 | buffer.append(", "); |
| 160 | - buffer.append(ignoreClause); | |
| 194 | + buffer.append(ignoreQuery); | |
| 161 | 195 | buffer.append(")"); |
| 162 | 196 | return buffer.toString(); |
| 163 | 197 | } |
| ... | ... | @@ -177,9 +211,9 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery { |
| 177 | 211 | return false; |
| 178 | 212 | MtasSpanSequenceQuery other = (MtasSpanSequenceQuery) obj; |
| 179 | 213 | return field.equals(other.field) && items.equals(other.items) |
| 180 | - && ((ignoreClause == null && other.ignoreClause == null) | |
| 181 | - || ignoreClause != null && other.ignoreClause != null | |
| 182 | - && ignoreClause.equals(other.ignoreClause)); | |
| 214 | + && ((ignoreQuery == null && other.ignoreQuery == null) | |
| 215 | + || ignoreQuery != null && other.ignoreQuery != null | |
| 216 | + && ignoreQuery.equals(other.ignoreQuery)); | |
| 183 | 217 | } |
| 184 | 218 | |
| 185 | 219 | /* |
| ... | ... | @@ -192,6 +226,10 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery { |
| 192 | 226 | int h = this.getClass().getSimpleName().hashCode(); |
| 193 | 227 | h = (h * 3) ^ field.hashCode(); |
| 194 | 228 | h = (h * 5) ^ items.hashCode(); |
| 229 | + if (ignoreQuery != null) { | |
| 230 | + h = (h * 7) ^ ignoreQuery.hashCode(); | |
| 231 | + h = (h * 11) ^ maximumIgnoreLength.hashCode(); | |
| 232 | + } | |
| 195 | 233 | return h; |
| 196 | 234 | } |
| 197 | 235 | |
| ... | ... | @@ -211,8 +249,8 @@ public class MtasSpanSequenceQuery extends MtasSpanQuery { |
| 211 | 249 | subWeights.add(new MtasSpanSequenceQueryWeight( |
| 212 | 250 | item.getQuery().createWeight(searcher, false), item.isOptional())); |
| 213 | 251 | } |
| 214 | - if (ignoreClause != null) { | |
| 215 | - ignoreWeight = ignoreClause.createWeight(searcher, false); | |
| 252 | + if (ignoreQuery != null) { | |
| 253 | + ignoreWeight = ignoreQuery.createWeight(searcher, false); | |
| 216 | 254 | } |
| 217 | 255 | return new SpanSequenceWeight(subWeights, ignoreWeight, maximumIgnoreLength, |
| 218 | 256 | searcher, needsScores ? getTermContexts(subWeights) : null); |
| ... | ... |
src/mtas/search/spans/MtasSpanWithinQuery.java
| 1 | 1 | package mtas.search.spans; |
| 2 | 2 | |
| 3 | 3 | import java.io.IOException; |
| 4 | +import java.util.ArrayList; | |
| 5 | +import java.util.List; | |
| 6 | +import java.util.Map; | |
| 7 | +import java.util.Set; | |
| 4 | 8 | |
| 5 | 9 | import org.apache.lucene.index.IndexReader; |
| 10 | +import org.apache.lucene.index.LeafReaderContext; | |
| 11 | +import org.apache.lucene.index.Term; | |
| 12 | +import org.apache.lucene.index.TermContext; | |
| 6 | 13 | import org.apache.lucene.search.IndexSearcher; |
| 7 | -import org.apache.lucene.search.spans.SpanContainingQuery; | |
| 14 | +import org.apache.lucene.search.spans.SpanQuery; | |
| 8 | 15 | import org.apache.lucene.search.spans.SpanWeight; |
| 9 | 16 | import org.apache.lucene.search.spans.SpanWithinQuery; |
| 17 | +import org.apache.lucene.search.spans.Spans; | |
| 10 | 18 | |
| 19 | +import mtas.search.spans.util.MtasSpanMaximumExpandQuery; | |
| 11 | 20 | import mtas.search.spans.util.MtasSpanQuery; |
| 12 | 21 | |
| 13 | 22 | /** |
| ... | ... | @@ -17,25 +26,58 @@ public class MtasSpanWithinQuery extends MtasSpanQuery { |
| 17 | 26 | |
| 18 | 27 | /** The base query. */ |
| 19 | 28 | private SpanWithinQuery baseQuery; |
| 20 | - private MtasSpanQuery bigQuery, smallQuery; | |
| 21 | - | |
| 29 | + private MtasSpanQuery smallQuery, bigQuery; | |
| 30 | + private int leftBoundaryMinimum, leftBoundaryMaximum, rightBoundaryMaximum, | |
| 31 | + rightBoundaryMinimum; | |
| 32 | + private boolean autoAdjustBigQuery; | |
| 33 | + String field; | |
| 22 | 34 | |
| 23 | 35 | /** |
| 24 | 36 | * Instantiates a new mtas span within query. |
| 25 | 37 | * |
| 26 | - * @param q1 the q1 | |
| 27 | - * @param q2 the q2 | |
| 38 | + * @param q1 | |
| 39 | + * the q1 | |
| 40 | + * @param q2 | |
| 41 | + * the q2 | |
| 28 | 42 | */ |
| 43 | + | |
| 29 | 44 | public MtasSpanWithinQuery(MtasSpanQuery q1, MtasSpanQuery q2) { |
| 30 | - super(q1!=null?q1.getMinimumWidth():null, q1!=null?q1.getMaximumWidth():null); | |
| 31 | - if(q2!=null && q2.getMinimumWidth()!=null) { | |
| 32 | - if(this.getMinimumWidth()==null || this.getMinimumWidth()<q2.getMinimumWidth()) { | |
| 45 | + this(q1, q2, 0, 0, 0, 0, true); | |
| 46 | + } | |
| 47 | + | |
| 48 | + public MtasSpanWithinQuery(MtasSpanQuery q1, MtasSpanQuery q2, | |
| 49 | + int leftMinimum, int leftMaximum, int rightMinimum, int rightMaximum, | |
| 50 | + boolean adjustBigQuery) { | |
| 51 | + super(q1 != null ? q1.getMinimumWidth() : null, | |
| 52 | + q1 != null ? q1.getMaximumWidth() : null); | |
| 53 | + if (q2 != null && q2.getMinimumWidth() != null) { | |
| 54 | + if (this.getMinimumWidth() == null | |
| 55 | + || this.getMinimumWidth() < q2.getMinimumWidth()) { | |
| 33 | 56 | this.setWidth(q2.getMinimumWidth(), this.getMaximumWidth()); |
| 34 | 57 | } |
| 35 | - } | |
| 36 | - smallQuery=q1; | |
| 37 | - bigQuery=q2; | |
| 38 | - baseQuery = new SpanWithinQuery(smallQuery, bigQuery); | |
| 58 | + } | |
| 59 | + bigQuery = q1; | |
| 60 | + smallQuery = q2; | |
| 61 | + leftBoundaryMinimum = leftMinimum; | |
| 62 | + leftBoundaryMaximum = leftMaximum; | |
| 63 | + rightBoundaryMinimum = rightMinimum; | |
| 64 | + rightBoundaryMaximum = rightMaximum; | |
| 65 | + autoAdjustBigQuery = adjustBigQuery; | |
| 66 | + if (bigQuery.getField() != null) { | |
| 67 | + field = bigQuery.getField(); | |
| 68 | + } else if (smallQuery.getField() != null) { | |
| 69 | + field = smallQuery.getField(); | |
| 70 | + } else { | |
| 71 | + field = null; | |
| 72 | + } | |
| 73 | + if (field != null) { | |
| 74 | + baseQuery = new SpanWithinQuery( | |
| 75 | + new MtasSpanMaximumExpandQuery(bigQuery, leftBoundaryMinimum, | |
| 76 | + leftBoundaryMaximum, rightBoundaryMinimum, rightBoundaryMaximum), | |
| 77 | + smallQuery); | |
| 78 | + } else { | |
| 79 | + baseQuery = null; | |
| 80 | + } | |
| 39 | 81 | } |
| 40 | 82 | |
| 41 | 83 | /* |
| ... | ... | @@ -46,12 +88,143 @@ public class MtasSpanWithinQuery extends MtasSpanQuery { |
| 46 | 88 | */ |
| 47 | 89 | @Override |
| 48 | 90 | public MtasSpanQuery rewrite(IndexReader reader) throws IOException { |
| 49 | - MtasSpanQuery newSmallQuery = smallQuery.rewrite(reader); | |
| 50 | 91 | MtasSpanQuery newBigQuery = bigQuery.rewrite(reader); |
| 51 | - if(newSmallQuery!=smallQuery || newBigQuery!=bigQuery) { | |
| 52 | - return new MtasSpanWithinQuery(newSmallQuery, newBigQuery).rewrite(reader); | |
| 53 | - } else if(newSmallQuery!=null && newBigQuery!=null && newSmallQuery.equals(newBigQuery)) { | |
| 54 | - return newSmallQuery; | |
| 92 | + MtasSpanQuery newSmallQuery = smallQuery.rewrite(reader); | |
| 93 | + | |
| 94 | + if (newBigQuery == null || newBigQuery instanceof MtasSpanMatchNoneQuery | |
| 95 | + || newSmallQuery == null | |
| 96 | + || newSmallQuery instanceof MtasSpanMatchNoneQuery) { | |
| 97 | + return new MtasSpanMatchNoneQuery(field); | |
| 98 | + } | |
| 99 | + | |
| 100 | + if (autoAdjustBigQuery) { | |
| 101 | + if (newBigQuery instanceof MtasSpanRecurrenceQuery) { | |
| 102 | + MtasSpanRecurrenceQuery recurrenceQuery = (MtasSpanRecurrenceQuery) newBigQuery; | |
| 103 | + if (recurrenceQuery.getIgnoreQuery() == null | |
| 104 | + && recurrenceQuery.getQuery() instanceof MtasSpanMatchAllQuery) { | |
| 105 | + rightBoundaryMaximum += leftBoundaryMaximum | |
| 106 | + + recurrenceQuery.getMaximumRecurrence(); | |
| 107 | + rightBoundaryMinimum += leftBoundaryMinimum | |
| 108 | + + recurrenceQuery.getMinimumRecurrence(); | |
| 109 | + leftBoundaryMaximum = 0; | |
| 110 | + leftBoundaryMinimum = 0; | |
| 111 | + newBigQuery = new MtasSpanMatchAllQuery(field); | |
| 112 | + // System.out.println("REPLACE WITH " + newBigQuery + " ([" | |
| 113 | + // + leftBoundaryMinimum + "," + leftBoundaryMaximum + "],[" | |
| 114 | + // + rightBoundaryMinimum + "," + rightBoundaryMaximum + "])"); | |
| 115 | + return new MtasSpanWithinQuery(newBigQuery, newSmallQuery, | |
| 116 | + leftBoundaryMinimum, leftBoundaryMaximum, rightBoundaryMinimum, | |
| 117 | + rightBoundaryMaximum, autoAdjustBigQuery).rewrite(reader); | |
| 118 | + } | |
| 119 | + } else if (newBigQuery instanceof MtasSpanMatchAllQuery) { | |
| 120 | + if (leftBoundaryMaximum > 0) { | |
| 121 | + rightBoundaryMaximum += leftBoundaryMaximum; | |
| 122 | + rightBoundaryMinimum += leftBoundaryMinimum; | |
| 123 | + leftBoundaryMaximum = 0; | |
| 124 | + leftBoundaryMinimum = 0; | |
| 125 | + // System.out.println("REPLACE WITH " + newBigQuery + " ([" | |
| 126 | + // + leftBoundaryMinimum + "," + leftBoundaryMaximum + "],[" | |
| 127 | + // + rightBoundaryMinimum + "," + rightBoundaryMaximum + "])"); | |
| 128 | + return new MtasSpanWithinQuery(newBigQuery, newSmallQuery, | |
| 129 | + leftBoundaryMinimum, leftBoundaryMaximum, rightBoundaryMinimum, | |
| 130 | + rightBoundaryMaximum, autoAdjustBigQuery).rewrite(reader); | |
| 131 | + } | |
| 132 | + } else if (newBigQuery instanceof MtasSpanSequenceQuery) { | |
| 133 | + MtasSpanSequenceQuery sequenceQuery = (MtasSpanSequenceQuery) newBigQuery; | |
| 134 | + if (sequenceQuery.getIgnoreQuery() == null) { | |
| 135 | + List<MtasSpanSequenceItem> items = sequenceQuery.getItems(); | |
| 136 | + List<MtasSpanSequenceItem> newItems = new ArrayList<MtasSpanSequenceItem>(); | |
| 137 | + int newLeftBoundaryMinimum = 0, newLeftBoundaryMaximum = 0, | |
| 138 | + newRightBoundaryMinimum = 0, newRightBoundaryMaximum = 0; | |
| 139 | + for (int i = 0; i < items.size(); i++) { | |
| 140 | + // first item | |
| 141 | + if (i == 0) { | |
| 142 | + if (items.get(i).getQuery() instanceof MtasSpanMatchAllQuery) { | |
| 143 | + newLeftBoundaryMaximum++; | |
| 144 | + if (!items.get(i).isOptional()) { | |
| 145 | + newLeftBoundaryMinimum++; | |
| 146 | + } | |
| 147 | + } else if (items.get(i) | |
| 148 | + .getQuery() instanceof MtasSpanRecurrenceQuery) { | |
| 149 | + MtasSpanRecurrenceQuery msrq = (MtasSpanRecurrenceQuery) items | |
| 150 | + .get(i).getQuery(); | |
| 151 | + if (msrq.getQuery() instanceof MtasSpanMatchAllQuery) { | |
| 152 | + newLeftBoundaryMaximum += msrq.getMaximumRecurrence(); | |
| 153 | + if (!items.get(i).isOptional()) { | |
| 154 | + newLeftBoundaryMinimum += msrq.getMinimumRecurrence(); | |
| 155 | + } | |
| 156 | + } else { | |
| 157 | + newItems.add(items.get(i)); | |
| 158 | + } | |
| 159 | + } else { | |
| 160 | + newItems.add(items.get(i)); | |
| 161 | + } | |
| 162 | + // last item | |
| 163 | + } else if (i == (items.size() - 1)) { | |
| 164 | + if (items.get(i).getQuery() instanceof MtasSpanMatchAllQuery) { | |
| 165 | + newRightBoundaryMaximum++; | |
| 166 | + if (!items.get(i).isOptional()) { | |
| 167 | + newRightBoundaryMinimum++; | |
| 168 | + } | |
| 169 | + } else if (items.get(i) | |
| 170 | + .getQuery() instanceof MtasSpanRecurrenceQuery) { | |
| 171 | + MtasSpanRecurrenceQuery msrq = (MtasSpanRecurrenceQuery) items | |
| 172 | + .get(i).getQuery(); | |
| 173 | + if (msrq.getQuery() instanceof MtasSpanMatchAllQuery) { | |
| 174 | + newRightBoundaryMaximum += msrq.getMaximumRecurrence(); | |
| 175 | + if (!items.get(i).isOptional()) { | |
| 176 | + newRightBoundaryMinimum += msrq.getMinimumRecurrence(); | |
| 177 | + } | |
| 178 | + } else { | |
| 179 | + newItems.add(items.get(i)); | |
| 180 | + } | |
| 181 | + } else { | |
| 182 | + newItems.add(items.get(i)); | |
| 183 | + } | |
| 184 | + // other items | |
| 185 | + } else { | |
| 186 | + newItems.add(items.get(i)); | |
| 187 | + } | |
| 188 | + } | |
| 189 | + leftBoundaryMaximum += newLeftBoundaryMaximum; | |
| 190 | + leftBoundaryMinimum += newLeftBoundaryMinimum; | |
| 191 | + rightBoundaryMaximum += newRightBoundaryMaximum; | |
| 192 | + rightBoundaryMinimum += newRightBoundaryMinimum; | |
| 193 | + if (newItems.size() == 0) { | |
| 194 | + rightBoundaryMaximum = Math.max(0, | |
| 195 | + rightBoundaryMaximum + leftBoundaryMaximum - 1); | |
| 196 | + rightBoundaryMinimum = Math.max(0, | |
| 197 | + rightBoundaryMinimum + leftBoundaryMinimum - 1); | |
| 198 | + leftBoundaryMaximum = 0; | |
| 199 | + leftBoundaryMinimum = 0; | |
| 200 | + newItems.add(new MtasSpanSequenceItem( | |
| 201 | + new MtasSpanMatchAllQuery(field), false)); | |
| 202 | + } | |
| 203 | + if (!items.equals(newItems) || newLeftBoundaryMaximum > 0 | |
| 204 | + || newRightBoundaryMaximum > 0) { | |
| 205 | + newBigQuery = (new MtasSpanSequenceQuery(newItems, null, null)) | |
| 206 | + .rewrite(reader); | |
| 207 | + System.out.println(newBigQuery.getField() + "\t" + newBigQuery); | |
| 208 | + // System.out.println("REPLACE WITH " + newBigQuery + " ([" | |
| 209 | + // + leftBoundaryMinimum + "," + leftBoundaryMaximum + "],[" | |
| 210 | + // + rightBoundaryMinimum + "," + rightBoundaryMaximum + "])"); | |
| 211 | + return new MtasSpanWithinQuery(newBigQuery, newSmallQuery, | |
| 212 | + leftBoundaryMinimum, leftBoundaryMaximum, rightBoundaryMinimum, | |
| 213 | + rightBoundaryMaximum, autoAdjustBigQuery).rewrite(reader); | |
| 214 | + } | |
| 215 | + } | |
| 216 | + } | |
| 217 | + } | |
| 218 | + | |
| 219 | + if (newBigQuery != bigQuery || newSmallQuery != smallQuery) { | |
| 220 | + System.out.println(newBigQuery.getField() + "\t" + newBigQuery); | |
| 221 | + System.out.println(newSmallQuery.getField() + "\t" + newSmallQuery); | |
| 222 | + return (new MtasSpanWithinQuery(newBigQuery, newSmallQuery, | |
| 223 | + leftBoundaryMinimum, leftBoundaryMaximum, rightBoundaryMinimum, | |
| 224 | + rightBoundaryMaximum, autoAdjustBigQuery)).rewrite(reader); | |
| 225 | + } else if (newBigQuery != null && newSmallQuery != null | |
| 226 | + && newBigQuery.equals(newSmallQuery)) { | |
| 227 | + return newBigQuery; | |
| 55 | 228 | } else { |
| 56 | 229 | baseQuery = (SpanWithinQuery) baseQuery.rewrite(reader); |
| 57 | 230 | return super.rewrite(reader); |
| ... | ... | @@ -65,7 +238,7 @@ public class MtasSpanWithinQuery extends MtasSpanQuery { |
| 65 | 238 | */ |
| 66 | 239 | @Override |
| 67 | 240 | public String getField() { |
| 68 | - return baseQuery.getField(); | |
| 241 | + return field; | |
| 69 | 242 | } |
| 70 | 243 | |
| 71 | 244 | /* |
| ... | ... | @@ -78,9 +251,7 @@ public class MtasSpanWithinQuery extends MtasSpanQuery { |
| 78 | 251 | @Override |
| 79 | 252 | public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) |
| 80 | 253 | throws IOException { |
| 81 | - SpanWeight sw = baseQuery.createWeight(searcher, needsScores); | |
| 82 | - return sw; | |
| 83 | - // return baseQuery.createWeight(searcher, needsScores); | |
| 254 | + return baseQuery.createWeight(searcher, needsScores); | |
| 84 | 255 | } |
| 85 | 256 | |
| 86 | 257 | /* |
| ... | ... | @@ -90,7 +261,22 @@ public class MtasSpanWithinQuery extends MtasSpanQuery { |
| 90 | 261 | */ |
| 91 | 262 | @Override |
| 92 | 263 | public String toString(String field) { |
| 93 | - return baseQuery.toString(field); | |
| 264 | + StringBuilder buffer = new StringBuilder(); | |
| 265 | + buffer.append(this.getClass().getSimpleName() + "(["); | |
| 266 | + if (smallQuery != null) { | |
| 267 | + buffer.append(smallQuery.toString(smallQuery.getField())); | |
| 268 | + } else { | |
| 269 | + buffer.append("null"); | |
| 270 | + } | |
| 271 | + buffer.append(","); | |
| 272 | + if (bigQuery != null) { | |
| 273 | + buffer.append(bigQuery.toString(bigQuery.getField())); | |
| 274 | + } else { | |
| 275 | + buffer.append("null"); | |
| 276 | + } | |
| 277 | + buffer.append("],[" + leftBoundaryMinimum + "," + leftBoundaryMaximum | |
| 278 | + + "],[" + rightBoundaryMinimum + "," + rightBoundaryMaximum + "])"); | |
| 279 | + return buffer.toString(); | |
| 94 | 280 | } |
| 95 | 281 | |
| 96 | 282 | /* |
| ... | ... | @@ -107,7 +293,11 @@ public class MtasSpanWithinQuery extends MtasSpanQuery { |
| 107 | 293 | if (getClass() != obj.getClass()) |
| 108 | 294 | return false; |
| 109 | 295 | final MtasSpanWithinQuery that = (MtasSpanWithinQuery) obj; |
| 110 | - return baseQuery.equals(that.baseQuery); | |
| 296 | + return baseQuery.equals(that.baseQuery) | |
| 297 | + && leftBoundaryMinimum == that.leftBoundaryMinimum | |
| 298 | + && leftBoundaryMaximum == that.leftBoundaryMaximum | |
| 299 | + && rightBoundaryMinimum == that.rightBoundaryMinimum | |
| 300 | + && rightBoundaryMaximum == that.rightBoundaryMaximum; | |
| 111 | 301 | } |
| 112 | 302 | |
| 113 | 303 | /* |
| ... | ... | @@ -117,7 +307,18 @@ public class MtasSpanWithinQuery extends MtasSpanQuery { |
| 117 | 307 | */ |
| 118 | 308 | @Override |
| 119 | 309 | public int hashCode() { |
| 120 | - return baseQuery.hashCode(); | |
| 310 | + int h = Integer.rotateLeft(classHash(), 1); | |
| 311 | + h ^= smallQuery.hashCode(); | |
| 312 | + h = Integer.rotateLeft(h, 1); | |
| 313 | + h ^= bigQuery.hashCode(); | |
| 314 | + h = Integer.rotateLeft(h, leftBoundaryMinimum) + leftBoundaryMinimum; | |
| 315 | + h ^= 2; | |
| 316 | + h = Integer.rotateLeft(h, leftBoundaryMaximum) + leftBoundaryMaximum; | |
| 317 | + h ^= 3; | |
| 318 | + h = Integer.rotateLeft(h, rightBoundaryMinimum) + rightBoundaryMinimum; | |
| 319 | + h ^= 5; | |
| 320 | + h = Integer.rotateLeft(h, rightBoundaryMaximum) + rightBoundaryMaximum; | |
| 321 | + return h; | |
| 121 | 322 | } |
| 122 | 323 | |
| 123 | 324 | } |
| ... | ... |
src/mtas/search/spans/util/MtasExtendedSpanAndQuery.java
| 1 | 1 | package mtas.search.spans.util; |
| 2 | 2 | |
| 3 | 3 | import java.util.ArrayList; |
| 4 | +import java.util.HashSet; | |
| 4 | 5 | import java.util.Iterator; |
| 5 | 6 | import java.util.List; |
| 6 | 7 | |
| ... | ... | @@ -13,7 +14,7 @@ import org.apache.lucene.search.spans.SpanQuery; |
| 13 | 14 | public class MtasExtendedSpanAndQuery extends SpanNearQuery { |
| 14 | 15 | |
| 15 | 16 | /** The clauses. */ |
| 16 | - private List<SpanQuery> clauses; | |
| 17 | + private HashSet<SpanQuery> clauses; | |
| 17 | 18 | |
| 18 | 19 | /** |
| 19 | 20 | * Instantiates a new mtas extended span and query. |
| ... | ... | @@ -22,7 +23,7 @@ public class MtasExtendedSpanAndQuery extends SpanNearQuery { |
| 22 | 23 | */ |
| 23 | 24 | public MtasExtendedSpanAndQuery(SpanQuery... clauses) { |
| 24 | 25 | super(clauses, -1 * (clauses.length - 1), false); |
| 25 | - this.clauses = new ArrayList<>(clauses.length); | |
| 26 | + this.clauses = new HashSet<SpanQuery>(); | |
| 26 | 27 | for (SpanQuery clause : clauses) { |
| 27 | 28 | this.clauses.add(clause); |
| 28 | 29 | } |
| ... | ... | @@ -63,7 +64,7 @@ public class MtasExtendedSpanAndQuery extends SpanNearQuery { |
| 63 | 64 | return false; |
| 64 | 65 | if (getClass() != obj.getClass()) |
| 65 | 66 | return false; |
| 66 | - final MtasExtendedSpanAndQuery that = (MtasExtendedSpanAndQuery) obj; | |
| 67 | + final MtasExtendedSpanAndQuery that = (MtasExtendedSpanAndQuery) obj; | |
| 67 | 68 | return clauses.equals(that.clauses); |
| 68 | 69 | } |
| 69 | 70 | |
| ... | ... |
src/mtas/search/spans/util/MtasSpanMaximumExpandQuery.java
0 โ 100644
| 1 | +package mtas.search.spans.util; | |
| 2 | + | |
| 3 | +import java.io.IOException; | |
| 4 | +import java.lang.reflect.Method; | |
| 5 | +import java.util.Map; | |
| 6 | +import java.util.Set; | |
| 7 | + | |
| 8 | +import org.apache.lucene.codecs.FieldsProducer; | |
| 9 | +import org.apache.lucene.index.IndexReader; | |
| 10 | +import org.apache.lucene.index.LeafReader; | |
| 11 | +import org.apache.lucene.index.LeafReaderContext; | |
| 12 | +import org.apache.lucene.index.Term; | |
| 13 | +import org.apache.lucene.index.TermContext; | |
| 14 | +import org.apache.lucene.index.Terms; | |
| 15 | +import org.apache.lucene.search.IndexSearcher; | |
| 16 | +import org.apache.lucene.search.spans.SpanCollector; | |
| 17 | +import org.apache.lucene.search.spans.SpanWeight; | |
| 18 | +import org.apache.lucene.search.spans.Spans; | |
| 19 | + | |
| 20 | +import mtas.codec.util.CodecInfo; | |
| 21 | +import mtas.codec.util.CodecInfo.IndexDoc; | |
| 22 | +import mtas.search.spans.MtasSpanMatchNoneSpans; | |
| 23 | + | |
| 24 | +public class MtasSpanMaximumExpandQuery extends MtasSpanQuery { | |
| 25 | + | |
| 26 | + MtasSpanQuery query; | |
| 27 | + int minimumLeft, maximumLeft, minimumRight, maximumRight; | |
| 28 | + | |
| 29 | + public MtasSpanMaximumExpandQuery(MtasSpanQuery query, int minimumLeft, | |
| 30 | + int maximumLeft, int minimumRight, int maximumRight) { | |
| 31 | + super(null, null); | |
| 32 | + this.query = query; | |
| 33 | + if (minimumLeft > maximumLeft || minimumRight > maximumRight | |
| 34 | + || minimumLeft < 0 || minimumRight < 0) { | |
| 35 | + throw new IllegalArgumentException(); | |
| 36 | + } | |
| 37 | + this.minimumLeft = minimumLeft; | |
| 38 | + this.maximumLeft = maximumLeft; | |
| 39 | + this.minimumRight = minimumRight; | |
| 40 | + this.maximumRight = maximumRight; | |
| 41 | + Integer minimum = query.getMinimumWidth(); | |
| 42 | + Integer maximum = query.getMaximumWidth(); | |
| 43 | + if (minimum != null) { | |
| 44 | + minimum += minimumLeft + minimumRight; | |
| 45 | + } | |
| 46 | + if (maximum != null) { | |
| 47 | + maximum += maximumLeft + maximumRight; | |
| 48 | + } | |
| 49 | + setWidth(minimum, maximum); | |
| 50 | + } | |
| 51 | + | |
| 52 | + @Override | |
| 53 | + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) | |
| 54 | + throws IOException { | |
| 55 | + SpanWeight subWeight = query.createWeight(searcher, needsScores); | |
| 56 | + if (maximumLeft == 0 && maximumRight == 0) { | |
| 57 | + return subWeight; | |
| 58 | + } else { | |
| 59 | + return new MtasMaximumExpandWeight(subWeight, searcher, needsScores); | |
| 60 | + } | |
| 61 | + } | |
| 62 | + | |
| 63 | + @Override | |
| 64 | + public String getField() { | |
| 65 | + return query.getField(); | |
| 66 | + } | |
| 67 | + | |
| 68 | + @Override | |
| 69 | + public String toString(String field) { | |
| 70 | + StringBuilder buffer = new StringBuilder(); | |
| 71 | + buffer.append(this.getClass().getSimpleName() + "(["); | |
| 72 | + buffer.append(query.toString(field) + "]["+minimumLeft+","+maximumLeft+"]["+minimumRight+","+maximumRight+"])"); | |
| 73 | + return buffer.toString(); | |
| 74 | + } | |
| 75 | + | |
| 76 | + @Override | |
| 77 | + public boolean equals(Object obj) { | |
| 78 | + if (this == obj) | |
| 79 | + return true; | |
| 80 | + if (obj == null) | |
| 81 | + return false; | |
| 82 | + if (getClass() != obj.getClass()) | |
| 83 | + return false; | |
| 84 | + final MtasSpanMaximumExpandQuery that = (MtasSpanMaximumExpandQuery) obj; | |
| 85 | + return query.equals(that.query) && minimumLeft == that.minimumLeft | |
| 86 | + && maximumLeft == that.maximumLeft && minimumRight == that.minimumRight | |
| 87 | + && maximumRight == that.maximumRight; | |
| 88 | + } | |
| 89 | + | |
| 90 | + @Override | |
| 91 | + public int hashCode() { | |
| 92 | + int h = Integer.rotateLeft(classHash(), 1); | |
| 93 | + h ^= query.hashCode(); | |
| 94 | + h = Integer.rotateLeft(h, minimumLeft) + minimumLeft; | |
| 95 | + h ^= 2; | |
| 96 | + h = Integer.rotateLeft(h, maximumLeft) + maximumLeft; | |
| 97 | + h ^= 3; | |
| 98 | + h = Integer.rotateLeft(h, minimumRight) + minimumRight; | |
| 99 | + h ^= 5; | |
| 100 | + h = Integer.rotateLeft(h, maximumRight) + maximumRight; | |
| 101 | + return h; | |
| 102 | + } | |
| 103 | + | |
| 104 | + @Override | |
| 105 | + public MtasSpanQuery rewrite(IndexReader reader) throws IOException { | |
| 106 | + MtasSpanQuery newQuery = (MtasSpanQuery) query.rewrite(reader); | |
| 107 | + if (maximumLeft == 0 && maximumRight == 0) { | |
| 108 | + return newQuery; | |
| 109 | + } else if (query != newQuery) { | |
| 110 | + return new MtasSpanMaximumExpandQuery(newQuery, minimumLeft, maximumLeft, | |
| 111 | + minimumRight, maximumRight); | |
| 112 | + } else { | |
| 113 | + return super.rewrite(reader); | |
| 114 | + } | |
| 115 | + } | |
| 116 | + | |
| 117 | + private class MtasMaximumExpandWeight extends SpanWeight { | |
| 118 | + SpanWeight subWeight; | |
| 119 | + | |
| 120 | + public MtasMaximumExpandWeight(SpanWeight subWeight, IndexSearcher searcher, | |
| 121 | + boolean needsScores) throws IOException { | |
| 122 | + super(MtasSpanMaximumExpandQuery.this, searcher, | |
| 123 | + needsScores ? getTermContexts(subWeight) : null); | |
| 124 | + this.subWeight = subWeight; | |
| 125 | + } | |
| 126 | + | |
| 127 | + @Override | |
| 128 | + public void extractTermContexts(Map<Term, TermContext> contexts) { | |
| 129 | + subWeight.extractTermContexts(contexts); | |
| 130 | + } | |
| 131 | + | |
| 132 | + @Override | |
| 133 | + public Spans getSpans(LeafReaderContext ctx, Postings requiredPostings) | |
| 134 | + throws IOException { | |
| 135 | + Spans spans = subWeight.getSpans(ctx, requiredPostings); | |
| 136 | + if (maximumLeft == 0 && maximumRight == 0) { | |
| 137 | + return spans; | |
| 138 | + } else { | |
| 139 | + try { | |
| 140 | + // get leafreader | |
| 141 | + LeafReader r = ctx.reader(); | |
| 142 | + // get delegate | |
| 143 | + Boolean hasMethod = true; | |
| 144 | + while (hasMethod) { | |
| 145 | + hasMethod = false; | |
| 146 | + Method[] methods = r.getClass().getMethods(); | |
| 147 | + for (Method m : methods) { | |
| 148 | + if (m.getName().equals("getDelegate")) { | |
| 149 | + hasMethod = true; | |
| 150 | + r = (LeafReader) m.invoke(r, (Object[]) null); | |
| 151 | + break; | |
| 152 | + } | |
| 153 | + } | |
| 154 | + } // get fieldsproducer | |
| 155 | + Method fpm = r.getClass().getMethod("getPostingsReader", | |
| 156 | + (Class<?>[]) null); | |
| 157 | + FieldsProducer fp = (FieldsProducer) fpm.invoke(r, (Object[]) null); | |
| 158 | + // get MtasFieldsProducer using terms | |
| 159 | + Terms t = fp.terms(field); | |
| 160 | + if (t == null) { | |
| 161 | + return new MtasSpanMatchNoneSpans(field); | |
| 162 | + } else { | |
| 163 | + CodecInfo mtasCodecInfo = CodecInfo.getCodecInfoFromTerms(t); | |
| 164 | + return new MtasMaximumExpandSpans(mtasCodecInfo, query.getField(), | |
| 165 | + spans); | |
| 166 | + } | |
| 167 | + } catch (Exception e) { | |
| 168 | + throw new IOException("Can't get reader"); | |
| 169 | + } | |
| 170 | + | |
| 171 | + } | |
| 172 | + } | |
| 173 | + | |
| 174 | + @Override | |
| 175 | + public void extractTerms(Set<Term> terms) { | |
| 176 | + subWeight.extractTerms(terms); | |
| 177 | + } | |
| 178 | + | |
| 179 | + } | |
| 180 | + | |
| 181 | + private class MtasMaximumExpandSpans extends Spans { | |
| 182 | + | |
| 183 | + Spans subSpans; | |
| 184 | + int minPosition, maxPosition; | |
| 185 | + String field; | |
| 186 | + CodecInfo mtasCodecInfo; | |
| 187 | + int startPosition, endPosition; | |
| 188 | + | |
| 189 | + public MtasMaximumExpandSpans(CodecInfo mtasCodecInfo, String field, | |
| 190 | + Spans subSpans) { | |
| 191 | + super(); | |
| 192 | + this.subSpans = subSpans; | |
| 193 | + this.field = field; | |
| 194 | + this.mtasCodecInfo = mtasCodecInfo; | |
| 195 | + this.minPosition = 0; | |
| 196 | + this.maxPosition = 0; | |
| 197 | + this.startPosition = -1; | |
| 198 | + this.endPosition = -1; | |
| 199 | + } | |
| 200 | + | |
| 201 | + @Override | |
| 202 | + public int nextStartPosition() throws IOException { | |
| 203 | + int basicStartPosition, basicEndPosition; | |
| 204 | + while ((basicStartPosition = subSpans | |
| 205 | + .nextStartPosition()) != NO_MORE_POSITIONS) { | |
| 206 | + basicEndPosition = subSpans.endPosition(); | |
| 207 | + startPosition = Math.max(minPosition, | |
| 208 | + (basicStartPosition - maximumLeft)); | |
| 209 | + endPosition = Math.min(maxPosition + 1, | |
| 210 | + (basicEndPosition + maximumRight)); | |
| 211 | + if (startPosition <= (basicStartPosition - minimumLeft) | |
| 212 | + && endPosition >= (basicEndPosition + minimumRight)) { | |
| 213 | + return this.startPosition; | |
| 214 | + } | |
| 215 | + } | |
| 216 | + startPosition = NO_MORE_POSITIONS; | |
| 217 | + endPosition = NO_MORE_POSITIONS; | |
| 218 | + return NO_MORE_POSITIONS; | |
| 219 | + } | |
| 220 | + | |
| 221 | + @Override | |
| 222 | + public int startPosition() { | |
| 223 | + return startPosition; | |
| 224 | + } | |
| 225 | + | |
| 226 | + @Override | |
| 227 | + public int endPosition() { | |
| 228 | + return endPosition; | |
| 229 | + } | |
| 230 | + | |
| 231 | + @Override | |
| 232 | + public int width() { | |
| 233 | + return endPosition-startPosition; | |
| 234 | + } | |
| 235 | + | |
| 236 | + @Override | |
| 237 | + public void collect(SpanCollector collector) throws IOException { | |
| 238 | + subSpans.collect(collector); | |
| 239 | + } | |
| 240 | + | |
| 241 | + @Override | |
| 242 | + public float positionsCost() { | |
| 243 | + return subSpans.positionsCost(); | |
| 244 | + } | |
| 245 | + | |
| 246 | + @Override | |
| 247 | + public int docID() { | |
| 248 | + return subSpans.docID(); | |
| 249 | + } | |
| 250 | + | |
| 251 | + @Override | |
| 252 | + public int nextDoc() throws IOException { | |
| 253 | + int docId = subSpans.nextDoc(); | |
| 254 | + startPosition = -1; | |
| 255 | + endPosition = -1; | |
| 256 | + if (docId != NO_MORE_DOCS) { | |
| 257 | + IndexDoc doc = mtasCodecInfo.getDoc(field, docId); | |
| 258 | + if (doc != null) { | |
| 259 | + minPosition = doc.minPosition; | |
| 260 | + maxPosition = doc.maxPosition; | |
| 261 | + } else { | |
| 262 | + minPosition = NO_MORE_POSITIONS; | |
| 263 | + maxPosition = NO_MORE_POSITIONS; | |
| 264 | + } | |
| 265 | + } else { | |
| 266 | + minPosition = NO_MORE_POSITIONS; | |
| 267 | + maxPosition = NO_MORE_POSITIONS; | |
| 268 | + } | |
| 269 | + return docId; | |
| 270 | + } | |
| 271 | + | |
| 272 | + @Override | |
| 273 | + public int advance(int target) throws IOException { | |
| 274 | + int docId = subSpans.advance(target); | |
| 275 | + startPosition = -1; | |
| 276 | + endPosition = -1; | |
| 277 | + if (docId != NO_MORE_DOCS) { | |
| 278 | + IndexDoc doc = mtasCodecInfo.getDoc(field, docId); | |
| 279 | + if (doc != null) { | |
| 280 | + minPosition = doc.minPosition; | |
| 281 | + maxPosition = doc.maxPosition; | |
| 282 | + } else { | |
| 283 | + minPosition = NO_MORE_POSITIONS; | |
| 284 | + maxPosition = NO_MORE_POSITIONS; | |
| 285 | + } | |
| 286 | + } else { | |
| 287 | + minPosition = NO_MORE_POSITIONS; | |
| 288 | + maxPosition = NO_MORE_POSITIONS; | |
| 289 | + } | |
| 290 | + return docId; | |
| 291 | + } | |
| 292 | + | |
| 293 | + @Override | |
| 294 | + public long cost() { | |
| 295 | + return subSpans.cost(); | |
| 296 | + } | |
| 297 | + } | |
| 298 | + | |
| 299 | +} | |
| ... | ... |
src/mtas/search/spans/util/MtasSpanQuery.java
| ... | ... | @@ -6,8 +6,6 @@ import org.apache.lucene.index.IndexReader; |
| 6 | 6 | import org.apache.lucene.search.IndexSearcher; |
| 7 | 7 | import org.apache.lucene.search.spans.SpanQuery; |
| 8 | 8 | import org.apache.lucene.search.spans.SpanWeight; |
| 9 | - | |
| 10 | -import mtas.search.spans.MtasSpanMatchAllQuery; | |
| 11 | 9 | import mtas.search.spans.MtasSpanMatchNoneQuery; |
| 12 | 10 | |
| 13 | 11 | public abstract class MtasSpanQuery extends SpanQuery { |
| ... | ... |