Commit 45ef1daff384f56165e26a861c6d50a273c11017
1 parent
ca176909
bugfixes
Showing
8 changed files
with
427 additions
and
60 deletions
docker/Dockerfile
1 | 1 | # Automatically generated Dockerfile |
2 | -# - Build 2017-02-11 10:16 | |
2 | +# - Build 2017-02-20 13:44 | |
3 | 3 | # - Lucene/Solr version 6.4.1 |
4 | 4 | # - Mtas release 20170211 |
5 | 5 | # |
... | ... | @@ -55,7 +55,7 @@ RUN apt-get update && apt-get install -y lsof software-properties-common python- |
55 | 55 | && chmod -R 755 /var/www/html \ |
56 | 56 | && printf "echo\n" >> /start.sh \ |
57 | 57 | && printf "echo \"================ Mtas -- Multi Tier Annotation Search =================\"\n" >> /start.sh \ |
58 | -&& printf "echo \" Timestamp 2017-02-11 10:16\"\n" >> /start.sh \ | |
58 | +&& printf "echo \" Timestamp 2017-02-20 13:44\"\n" >> /start.sh \ | |
59 | 59 | && printf "echo \" Lucene/Solr version 6.4.1\"\n" >> /start.sh \ |
60 | 60 | && printf "echo \" Mtas release 20170211\"\n" >> /start.sh \ |
61 | 61 | && printf "echo \" See https://meertensinstituut.github.io/mtas/ for more information\"\n" >> /start.sh \ |
... | ... |
src/mtas/codec/util/CodecCollector.java
... | ... | @@ -3172,7 +3172,7 @@ public class CodecCollector { |
3172 | 3172 | if (recomputeKeyList.size() > 0) { |
3173 | 3173 | HashMap<String, Automaton> automatonMap = MtasToken |
3174 | 3174 | .createAutomatonMap(termVector.prefix, |
3175 | - new ArrayList<String>(termVector.list), true); | |
3175 | + new ArrayList<String>(recomputeKeyList), true); | |
3176 | 3176 | List<CompiledAutomaton> listCompiledAutomata = MtasToken |
3177 | 3177 | .createAutomata(termVector.prefix, termVector.regexp, |
3178 | 3178 | automatonMap); |
... | ... |
src/mtas/parser/cql/MtasCQLParser.java
... | ... | @@ -387,17 +387,18 @@ public class MtasCQLParser implements MtasCQLParserConstants { |
387 | 387 | if (slash == null) |
388 | 388 | { |
389 | 389 | startGroup = true; |
390 | + endGroup = false; | |
390 | 391 | } |
391 | 392 | else |
392 | 393 | { |
393 | 394 | startGroup = false; |
395 | + endGroup = false; | |
394 | 396 | } |
395 | - endGroup = false; | |
396 | 397 | } else if (jj_2_30(1000)) { |
397 | 398 | jj_consume_token(SLASH); |
398 | 399 | condition = groupCondition(field); |
399 | - startGroup = true; | |
400 | - endGroup = false; | |
400 | + startGroup = false; | |
401 | + endGroup = true; | |
401 | 402 | } else { |
402 | 403 | jj_consume_token(-1); |
403 | 404 | throw new ParseException(); |
... | ... | @@ -1465,14 +1466,23 @@ public class MtasCQLParser implements MtasCQLParserConstants { |
1465 | 1466 | return false; |
1466 | 1467 | } |
1467 | 1468 | |
1469 | + private boolean jj_3_47() { | |
1470 | + if (jj_scan_token(CURLY_BRACKET_START)) return true; | |
1471 | + if (jj_scan_token(NUMBER)) return true; | |
1472 | + if (jj_scan_token(CURLY_BRACKET_END)) return true; | |
1473 | + return false; | |
1474 | + } | |
1475 | + | |
1468 | 1476 | private boolean jj_3_26() { |
1469 | 1477 | if (jj_3R_13()) return true; |
1470 | 1478 | return false; |
1471 | 1479 | } |
1472 | 1480 | |
1473 | - private boolean jj_3_47() { | |
1481 | + private boolean jj_3_46() { | |
1474 | 1482 | if (jj_scan_token(CURLY_BRACKET_START)) return true; |
1475 | 1483 | if (jj_scan_token(NUMBER)) return true; |
1484 | + if (jj_scan_token(KOMMA)) return true; | |
1485 | + if (jj_scan_token(NUMBER)) return true; | |
1476 | 1486 | if (jj_scan_token(CURLY_BRACKET_END)) return true; |
1477 | 1487 | return false; |
1478 | 1488 | } |
... | ... | @@ -1489,15 +1499,6 @@ public class MtasCQLParser implements MtasCQLParserConstants { |
1489 | 1499 | return false; |
1490 | 1500 | } |
1491 | 1501 | |
1492 | - private boolean jj_3_46() { | |
1493 | - if (jj_scan_token(CURLY_BRACKET_START)) return true; | |
1494 | - if (jj_scan_token(NUMBER)) return true; | |
1495 | - if (jj_scan_token(KOMMA)) return true; | |
1496 | - if (jj_scan_token(NUMBER)) return true; | |
1497 | - if (jj_scan_token(CURLY_BRACKET_END)) return true; | |
1498 | - return false; | |
1499 | - } | |
1500 | - | |
1501 | 1502 | private boolean jj_3_12() { |
1502 | 1503 | if (jj_3R_10()) return true; |
1503 | 1504 | return false; |
... | ... | @@ -1526,18 +1527,13 @@ public class MtasCQLParser implements MtasCQLParserConstants { |
1526 | 1527 | return false; |
1527 | 1528 | } |
1528 | 1529 | |
1529 | - private boolean jj_3_10() { | |
1530 | - if (jj_scan_token(NOT_INTERSECTING)) return true; | |
1531 | - return false; | |
1532 | - } | |
1533 | - | |
1534 | 1530 | private boolean jj_3_76() { |
1535 | 1531 | if (jj_scan_token(TOKEN_EQUALS)) return true; |
1536 | 1532 | return false; |
1537 | 1533 | } |
1538 | 1534 | |
1539 | - private boolean jj_3_24() { | |
1540 | - if (jj_3R_14()) return true; | |
1535 | + private boolean jj_3_10() { | |
1536 | + if (jj_scan_token(NOT_INTERSECTING)) return true; | |
1541 | 1537 | return false; |
1542 | 1538 | } |
1543 | 1539 | |
... | ... | @@ -1546,13 +1542,13 @@ public class MtasCQLParser implements MtasCQLParserConstants { |
1546 | 1542 | return false; |
1547 | 1543 | } |
1548 | 1544 | |
1549 | - private boolean jj_3_9() { | |
1550 | - if (jj_scan_token(INTERSECTING)) return true; | |
1545 | + private boolean jj_3_24() { | |
1546 | + if (jj_3R_14()) return true; | |
1551 | 1547 | return false; |
1552 | 1548 | } |
1553 | 1549 | |
1554 | - private boolean jj_3_23() { | |
1555 | - if (jj_3R_13()) return true; | |
1550 | + private boolean jj_3_9() { | |
1551 | + if (jj_scan_token(INTERSECTING)) return true; | |
1556 | 1552 | return false; |
1557 | 1553 | } |
1558 | 1554 | |
... | ... | @@ -1562,6 +1558,11 @@ public class MtasCQLParser implements MtasCQLParserConstants { |
1562 | 1558 | return false; |
1563 | 1559 | } |
1564 | 1560 | |
1561 | + private boolean jj_3_23() { | |
1562 | + if (jj_3R_13()) return true; | |
1563 | + return false; | |
1564 | + } | |
1565 | + | |
1565 | 1566 | private boolean jj_3_42() { |
1566 | 1567 | if (jj_3R_16()) return true; |
1567 | 1568 | return false; |
... | ... | @@ -1581,11 +1582,6 @@ public class MtasCQLParser implements MtasCQLParserConstants { |
1581 | 1582 | return false; |
1582 | 1583 | } |
1583 | 1584 | |
1584 | - private boolean jj_3_8() { | |
1585 | - if (jj_scan_token(NOT_WITHIN)) return true; | |
1586 | - return false; | |
1587 | - } | |
1588 | - | |
1589 | 1585 | private boolean jj_3_80() { |
1590 | 1586 | if (jj_scan_token(UNQUOTED_VALUE)) return true; |
1591 | 1587 | Token xsp; |
... | ... | @@ -1598,6 +1594,11 @@ public class MtasCQLParser implements MtasCQLParserConstants { |
1598 | 1594 | return false; |
1599 | 1595 | } |
1600 | 1596 | |
1597 | + private boolean jj_3_8() { | |
1598 | + if (jj_scan_token(NOT_WITHIN)) return true; | |
1599 | + return false; | |
1600 | + } | |
1601 | + | |
1601 | 1602 | private boolean jj_3_38() { |
1602 | 1603 | if (jj_scan_token(OR)) return true; |
1603 | 1604 | if (jj_3R_16()) return true; |
... | ... | @@ -1864,6 +1865,11 @@ public class MtasCQLParser implements MtasCQLParserConstants { |
1864 | 1865 | return false; |
1865 | 1866 | } |
1866 | 1867 | |
1868 | + private boolean jj_3_66() { | |
1869 | + if (jj_3R_16()) return true; | |
1870 | + return false; | |
1871 | + } | |
1872 | + | |
1867 | 1873 | private boolean jj_3R_12() { |
1868 | 1874 | Token xsp; |
1869 | 1875 | xsp = jj_scanpos; |
... | ... | @@ -1876,11 +1882,6 @@ public class MtasCQLParser implements MtasCQLParserConstants { |
1876 | 1882 | return false; |
1877 | 1883 | } |
1878 | 1884 | |
1879 | - private boolean jj_3_66() { | |
1880 | - if (jj_3R_16()) return true; | |
1881 | - return false; | |
1882 | - } | |
1883 | - | |
1884 | 1885 | private boolean jj_3_65() { |
1885 | 1886 | if (jj_3R_17()) return true; |
1886 | 1887 | return false; |
... | ... | @@ -1944,13 +1945,13 @@ public class MtasCQLParser implements MtasCQLParserConstants { |
1944 | 1945 | return false; |
1945 | 1946 | } |
1946 | 1947 | |
1947 | - private boolean jj_3R_10() { | |
1948 | - if (jj_3R_12()) return true; | |
1948 | + private boolean jj_3_60() { | |
1949 | + if (jj_3R_17()) return true; | |
1949 | 1950 | return false; |
1950 | 1951 | } |
1951 | 1952 | |
1952 | - private boolean jj_3_60() { | |
1953 | - if (jj_3R_17()) return true; | |
1953 | + private boolean jj_3R_10() { | |
1954 | + if (jj_3R_12()) return true; | |
1954 | 1955 | return false; |
1955 | 1956 | } |
1956 | 1957 | |
... | ... |
src/mtas/parser/cql/MtasCQLParser.jj
... | ... | @@ -563,17 +563,18 @@ private MtasCQLParserGroupFullCondition group(String field) throws ParseExceptio |
563 | 563 | if (slash == null) |
564 | 564 | { |
565 | 565 | startGroup = true; |
566 | + endGroup = false; | |
566 | 567 | } |
567 | 568 | else |
568 | 569 | { |
569 | 570 | startGroup = false; |
570 | - } | |
571 | - endGroup = false; | |
571 | + endGroup = false; | |
572 | + } | |
572 | 573 | } |
573 | 574 | | < SLASH > condition = groupCondition(field) |
574 | 575 | { |
575 | - startGroup = true; | |
576 | - endGroup = false; | |
576 | + startGroup = false; | |
577 | + endGroup = true; | |
577 | 578 | } |
578 | 579 | ) |
579 | 580 | < GROUP_END > |
... | ... |
src/mtas/search/spans/MtasSpanEndSpans.java
... | ... | @@ -32,7 +32,8 @@ public class MtasSpanEndSpans extends Spans implements MtasSpans { |
32 | 32 | */ |
33 | 33 | @Override |
34 | 34 | public int nextStartPosition() throws IOException { |
35 | - return (spans == null) ? NO_MORE_POSITIONS : spans.nextStartPosition(); | |
35 | + spans.nextStartPosition(); | |
36 | + return startPosition(); | |
36 | 37 | } |
37 | 38 | |
38 | 39 | /* |
... | ... |
src/mtas/search/spans/MtasSpanWithinQuery.java
1 | 1 | package mtas.search.spans; |
2 | 2 | |
3 | 3 | import java.io.IOException; |
4 | +import java.util.Map; | |
5 | +import java.util.Set; | |
4 | 6 | |
5 | 7 | import org.apache.lucene.index.IndexReader; |
8 | +import org.apache.lucene.index.LeafReaderContext; | |
9 | +import org.apache.lucene.index.Term; | |
10 | +import org.apache.lucene.index.TermContext; | |
6 | 11 | import org.apache.lucene.search.IndexSearcher; |
7 | 12 | import org.apache.lucene.search.spans.SpanQuery; |
8 | 13 | import org.apache.lucene.search.spans.SpanWeight; |
9 | -import org.apache.lucene.search.spans.SpanWithinQuery; | |
14 | +import org.apache.lucene.search.spans.Spans; | |
10 | 15 | |
11 | 16 | import mtas.search.spans.util.MtasSpanQuery; |
12 | 17 | |
13 | 18 | public class MtasSpanWithinQuery extends MtasSpanQuery { |
14 | 19 | |
15 | - /** The base query. */ | |
16 | - private SpanWithinQuery baseQuery; | |
20 | + private SpanQuery little, big; | |
17 | 21 | |
18 | - public MtasSpanWithinQuery(SpanQuery q1, SpanQuery q2) { | |
22 | + public MtasSpanWithinQuery(SpanQuery little, SpanQuery big) { | |
19 | 23 | super(); |
20 | - baseQuery = new SpanWithinQuery(q1, q2); | |
24 | + if(little==null || big==null) { | |
25 | + throw new IllegalArgumentException("queries shouldn't be null"); | |
26 | + } else if (little.getField()!=null && big.getField()!=null && !little.getField().equals(big.getField())) { | |
27 | + throw new IllegalArgumentException("big ("+big.getField()+") and little ("+little.getField()+") not same field"); | |
28 | + } else { | |
29 | + this.little=little; | |
30 | + this.big=big; | |
31 | + } | |
21 | 32 | } |
22 | 33 | |
23 | 34 | @Override |
24 | 35 | public MtasSpanQuery rewrite(IndexReader reader) throws IOException { |
25 | - baseQuery = (SpanWithinQuery) baseQuery.rewrite(reader); | |
26 | - return this; | |
36 | + SpanQuery newLittle = (SpanQuery) little.rewrite(reader); | |
37 | + SpanQuery newBig = (SpanQuery) big.rewrite(reader); | |
38 | + if(newLittle!=little || newBig!=big) { | |
39 | + return new MtasSpanWithinQuery(newLittle, newBig); | |
40 | + } else { | |
41 | + return this; | |
42 | + } | |
27 | 43 | } |
28 | 44 | |
29 | 45 | @Override |
30 | 46 | public String getField() { |
31 | - return baseQuery.getField(); | |
47 | + return little.getField(); | |
32 | 48 | } |
33 | 49 | |
34 | 50 | @Override |
35 | 51 | public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) |
36 | 52 | throws IOException { |
37 | - SpanWeight sw = baseQuery.createWeight(searcher, needsScores); | |
38 | - return sw; | |
39 | - //return baseQuery.createWeight(searcher, needsScores); | |
53 | + SpanWeight littleWeight = little.createWeight(searcher, false); | |
54 | + SpanWeight bigWeight = big.createWeight(searcher, false); | |
55 | + return new MtasSpanWithinWeight(searcher, needsScores ? getTermContexts(littleWeight, bigWeight) : null, | |
56 | + bigWeight, littleWeight); | |
57 | + | |
40 | 58 | } |
41 | 59 | |
42 | 60 | @Override |
43 | 61 | public String toString(String field) { |
44 | - return baseQuery.toString(field); | |
62 | + StringBuilder buffer = new StringBuilder(); | |
63 | + buffer.append(this.getClass().getSimpleName()); | |
64 | + buffer.append("("); | |
65 | + buffer.append(little.toString(field)); | |
66 | + buffer.append(", "); | |
67 | + buffer.append(big.toString(field)); | |
68 | + buffer.append(")"); | |
69 | + return buffer.toString(); | |
45 | 70 | } |
46 | 71 | |
47 | 72 | @Override |
... | ... | @@ -53,12 +78,55 @@ public class MtasSpanWithinQuery extends MtasSpanQuery { |
53 | 78 | if (getClass() != obj.getClass()) |
54 | 79 | return false; |
55 | 80 | final MtasSpanWithinQuery that = (MtasSpanWithinQuery) obj; |
56 | - return baseQuery.equals(that.baseQuery); | |
81 | + return little.equals(that.little) && big.equals(that.big); | |
57 | 82 | } |
58 | 83 | |
59 | 84 | @Override |
60 | 85 | public int hashCode() { |
61 | - return baseQuery.hashCode(); | |
86 | + int h = Integer.rotateLeft(classHash(), 1); | |
87 | + h ^= little.hashCode(); | |
88 | + h = Integer.rotateLeft(h, 1); | |
89 | + h ^= big.hashCode(); | |
90 | + return h; | |
62 | 91 | } |
92 | + | |
93 | + public class MtasSpanWithinWeight extends SpanWeight { | |
94 | + | |
95 | + final SpanWeight bigWeight; | |
96 | + final SpanWeight littleWeight; | |
97 | + | |
98 | + public MtasSpanWithinWeight(IndexSearcher searcher, Map<Term, TermContext> terms | |
99 | + , SpanWeight littleWeight, SpanWeight bigWeight) throws IOException { | |
100 | + super(MtasSpanWithinQuery.this, searcher, terms); | |
101 | + this.littleWeight = littleWeight; | |
102 | + this.bigWeight = bigWeight; | |
103 | + } | |
104 | + | |
105 | + @Override | |
106 | + public void extractTermContexts(Map<Term, TermContext> contexts) { | |
107 | + bigWeight.extractTermContexts(contexts); | |
108 | + littleWeight.extractTermContexts(contexts); | |
109 | + } | |
110 | + | |
111 | + @Override | |
112 | + public Spans getSpans(LeafReaderContext context, Postings postings) | |
113 | + throws IOException { | |
114 | + Spans bigSpans = bigWeight.getSpans(context, postings); | |
115 | + if(bigSpans==null) { | |
116 | + return null; | |
117 | + } | |
118 | + Spans littleSpans = littleWeight.getSpans(context, postings); | |
119 | + if(littleSpans==null) { | |
120 | + return null; | |
121 | + } | |
122 | + return new MtasSpanWithinSpans(littleSpans, bigSpans); | |
123 | + } | |
124 | + | |
125 | + @Override | |
126 | + public void extractTerms(Set<Term> terms) { | |
127 | + bigWeight.extractTerms(terms); | |
128 | + littleWeight.extractTerms(terms); | |
129 | + } | |
130 | + } | |
63 | 131 | |
64 | 132 | } |
... | ... |
src/mtas/search/spans/MtasSpanWithinQueryOLD.java
0 โ 100644
1 | +package mtas.search.spans; | |
2 | + | |
3 | +import java.io.IOException; | |
4 | + | |
5 | +import org.apache.lucene.index.IndexReader; | |
6 | +import org.apache.lucene.search.IndexSearcher; | |
7 | +import org.apache.lucene.search.spans.SpanQuery; | |
8 | +import org.apache.lucene.search.spans.SpanWeight; | |
9 | +import org.apache.lucene.search.spans.SpanWithinQuery; | |
10 | + | |
11 | +import mtas.search.spans.util.MtasSpanQuery; | |
12 | + | |
13 | +public class MtasSpanWithinQueryOLD extends MtasSpanQuery { | |
14 | + | |
15 | + /** The base query. */ | |
16 | + private SpanWithinQuery baseQuery; | |
17 | + | |
18 | + public MtasSpanWithinQueryOLD(SpanQuery q1, SpanQuery q2) { | |
19 | + super(); | |
20 | + baseQuery = new SpanWithinQuery(q1, q2); | |
21 | + } | |
22 | + | |
23 | + @Override | |
24 | + public MtasSpanQuery rewrite(IndexReader reader) throws IOException { | |
25 | + SpanWithinQuery newBaseQuery = (SpanWithinQuery) baseQuery.rewrite(reader); | |
26 | + if(newBaseQuery!=baseQuery) { | |
27 | + try { | |
28 | + MtasSpanWithinQueryOLD clone = (MtasSpanWithinQueryOLD) this.clone(); | |
29 | + clone.baseQuery = newBaseQuery; | |
30 | + return clone; | |
31 | + } catch (CloneNotSupportedException e) { | |
32 | + throw new AssertionError(e); | |
33 | + } | |
34 | + } else { | |
35 | + return this; | |
36 | + } | |
37 | + } | |
38 | + | |
39 | + @Override | |
40 | + public String getField() { | |
41 | + return baseQuery.getField(); | |
42 | + } | |
43 | + | |
44 | + @Override | |
45 | + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) | |
46 | + throws IOException { | |
47 | + SpanWeight sw = baseQuery.createWeight(searcher, needsScores); | |
48 | + return sw; | |
49 | + //return baseQuery.createWeight(searcher, needsScores); | |
50 | + } | |
51 | + | |
52 | + @Override | |
53 | + public String toString(String field) { | |
54 | + return baseQuery.toString(field); | |
55 | + } | |
56 | + | |
57 | + @Override | |
58 | + public boolean equals(Object obj) { | |
59 | + if (this == obj) | |
60 | + return true; | |
61 | + if (obj == null) | |
62 | + return false; | |
63 | + if (getClass() != obj.getClass()) | |
64 | + return false; | |
65 | + final MtasSpanWithinQueryOLD that = (MtasSpanWithinQueryOLD) obj; | |
66 | + return baseQuery.equals(that.baseQuery); | |
67 | + } | |
68 | + | |
69 | + @Override | |
70 | + public int hashCode() { | |
71 | + return baseQuery.hashCode(); | |
72 | + } | |
73 | + | |
74 | +} | |
... | ... |
src/mtas/search/spans/MtasSpanWithinSpans.java
0 โ 100644
1 | +package mtas.search.spans; | |
2 | + | |
3 | +import java.io.IOException; | |
4 | +import java.util.Arrays; | |
5 | +import java.util.HashMap; | |
6 | +import java.util.Iterator; | |
7 | +import java.util.List; | |
8 | + | |
9 | +import org.apache.lucene.search.spans.SpanCollector; | |
10 | +import org.apache.lucene.search.spans.Spans; | |
11 | + | |
12 | +public class MtasSpanWithinSpans extends Spans { | |
13 | + | |
14 | + Spans littleSpans, bigSpans; | |
15 | + | |
16 | + private int docId; | |
17 | + private boolean calledNextStartPosition, noMorePositions; | |
18 | + private HashMap<Integer, Integer> minimumStartPositionForEndPosition; | |
19 | + private int lastBigStartPosition, largestStoredEndPosition; | |
20 | + private int lastBigSpansStartPosition, lastBigSpansEndPosition; | |
21 | + | |
22 | + public MtasSpanWithinSpans(Spans littleSpans, Spans bigSpans) { | |
23 | + this.littleSpans = littleSpans; | |
24 | + this.bigSpans = bigSpans; | |
25 | + docId = -1; | |
26 | + minimumStartPositionForEndPosition = new HashMap<Integer, Integer>(); | |
27 | + } | |
28 | + | |
29 | + @Override | |
30 | + public int nextStartPosition() throws IOException { | |
31 | + // no document | |
32 | + if (docId == -1 || docId == NO_MORE_DOCS) { | |
33 | + throw new IOException("no document"); | |
34 | + // finished | |
35 | + } else if (noMorePositions) { | |
36 | + return NO_MORE_POSITIONS; | |
37 | + // littleSpans already at start match, because of check for matching | |
38 | + // document | |
39 | + } else if (!calledNextStartPosition) { | |
40 | + calledNextStartPosition = true; | |
41 | + return littleSpans.startPosition(); | |
42 | + // compute next match | |
43 | + } else { | |
44 | + if (goToNextStartPosition()) { | |
45 | + // match found | |
46 | + return littleSpans.startPosition(); | |
47 | + } else { | |
48 | + // no more matches: document finished | |
49 | + noMorePositions = true; | |
50 | + return NO_MORE_POSITIONS; | |
51 | + } | |
52 | + } | |
53 | + } | |
54 | + | |
55 | + @Override | |
56 | + public int startPosition() { | |
57 | + return littleSpans.startPosition(); | |
58 | + } | |
59 | + | |
60 | + @Override | |
61 | + public int endPosition() { | |
62 | + return littleSpans.endPosition(); | |
63 | + } | |
64 | + | |
65 | + @Override | |
66 | + public int width() { | |
67 | + return littleSpans.width(); | |
68 | + } | |
69 | + | |
70 | + @Override | |
71 | + public void collect(SpanCollector collector) throws IOException { | |
72 | + bigSpans.collect(collector); | |
73 | + littleSpans.collect(collector); | |
74 | + } | |
75 | + | |
76 | + @Override | |
77 | + public float positionsCost() { | |
78 | + return 0; | |
79 | + } | |
80 | + | |
81 | + @Override | |
82 | + public int docID() { | |
83 | + return docId; | |
84 | + } | |
85 | + | |
86 | + @Override | |
87 | + public int nextDoc() throws IOException { | |
88 | + reset(); | |
89 | + while (!goToNextDoc()) | |
90 | + ; | |
91 | + return docId; | |
92 | + } | |
93 | + | |
94 | + @Override | |
95 | + public int advance(int target) throws IOException { | |
96 | + reset(); | |
97 | + if (docId == NO_MORE_DOCS) { | |
98 | + return docId; | |
99 | + } else if (target < docId) { | |
100 | + // should not happen | |
101 | + docId = NO_MORE_DOCS; | |
102 | + return docId; | |
103 | + } else { | |
104 | + int littleDocId = littleSpans.docID(); | |
105 | + int bigDocId = littleSpans.docID(); | |
106 | + // advance little | |
107 | + if (littleDocId < target) { | |
108 | + littleDocId = littleSpans.advance(target); | |
109 | + } | |
110 | + // advance big | |
111 | + if (bigDocId < target) { | |
112 | + bigDocId = bigSpans.advance(target); | |
113 | + } | |
114 | + docId = Math.max(littleDocId, bigDocId); | |
115 | + if (docId == NO_MORE_DOCS) { | |
116 | + return docId; | |
117 | + } else { | |
118 | + if (!goToNextStartPosition()) { | |
119 | + return nextDoc(); | |
120 | + } else { | |
121 | + return docId; | |
122 | + } | |
123 | + } | |
124 | + } | |
125 | + } | |
126 | + | |
127 | + private boolean goToNextDoc() throws IOException { | |
128 | + if (docId == NO_MORE_DOCS) { | |
129 | + return true; | |
130 | + } else { | |
131 | + int littleDocId = littleSpans.nextDoc(); | |
132 | + int bigDocId = bigSpans.advance(littleDocId); | |
133 | + docId = bigDocId; | |
134 | + while (littleDocId != bigDocId && docId != NO_MORE_DOCS) { | |
135 | + if (littleDocId < bigDocId) { | |
136 | + littleDocId = littleSpans.advance(bigDocId); | |
137 | + docId = littleDocId; | |
138 | + } else { | |
139 | + bigDocId = bigSpans.advance(littleDocId); | |
140 | + docId = bigDocId; | |
141 | + } | |
142 | + } | |
143 | + if (docId != NO_MORE_DOCS) { | |
144 | + if(!goToNextStartPosition()) { | |
145 | + reset(); | |
146 | + return false; | |
147 | + } | |
148 | + } | |
149 | + return true; | |
150 | + } | |
151 | + } | |
152 | + | |
153 | + private boolean goToNextStartPosition() throws IOException { | |
154 | + int nextLittleSpansStartPosition, nextLittleSpansEndPosition; | |
155 | + while ((nextLittleSpansStartPosition = littleSpans | |
156 | + .nextStartPosition()) != NO_MORE_POSITIONS) { | |
157 | + nextLittleSpansEndPosition = littleSpans.endPosition(); | |
158 | + // check last | |
159 | + if (nextLittleSpansStartPosition >= lastBigSpansStartPosition | |
160 | + && nextLittleSpansEndPosition <= lastBigSpansEndPosition) { | |
161 | + return true; | |
162 | + // check stored values | |
163 | + } else if (nextLittleSpansEndPosition <= largestStoredEndPosition) { | |
164 | + if (nextLittleSpansStartPosition >= lastBigStartPosition) { | |
165 | + return true; | |
166 | + } else { | |
167 | + Iterator<Integer> it = minimumStartPositionForEndPosition.keySet() | |
168 | + .iterator(); | |
169 | + int bigEndPosition; | |
170 | + while (it.hasNext()) { | |
171 | + bigEndPosition = it.next(); | |
172 | + // remove | |
173 | + if (bigEndPosition < nextLittleSpansStartPosition) { | |
174 | + it.remove(); | |
175 | + // check for match | |
176 | + } else if (nextLittleSpansEndPosition <= bigEndPosition | |
177 | + && nextLittleSpansStartPosition >= minimumStartPositionForEndPosition | |
178 | + .get(bigEndPosition)) { | |
179 | + return true; | |
180 | + } | |
181 | + } | |
182 | + } | |
183 | + } | |
184 | + //check new bigSpans | |
185 | + while(nextLittleSpansStartPosition>=lastBigStartPosition) { | |
186 | + // store previous | |
187 | + if (nextLittleSpansStartPosition <= lastBigSpansEndPosition) { | |
188 | + minimumStartPositionForEndPosition.put(lastBigSpansEndPosition, | |
189 | + lastBigSpansStartPosition); | |
190 | + largestStoredEndPosition = Math.max(lastBigSpansEndPosition, | |
191 | + largestStoredEndPosition); | |
192 | + } | |
193 | + lastBigSpansStartPosition = bigSpans.nextStartPosition(); | |
194 | + lastBigSpansEndPosition = bigSpans.endPosition(); | |
195 | + lastBigStartPosition = lastBigSpansStartPosition; | |
196 | + if (lastBigSpansStartPosition == NO_MORE_POSITIONS) { | |
197 | + noMorePositions = true; | |
198 | + return false; | |
199 | + } else if(nextLittleSpansStartPosition>=lastBigSpansStartPosition && nextLittleSpansEndPosition<=lastBigSpansEndPosition) { | |
200 | + return true; | |
201 | + } | |
202 | + } | |
203 | + } | |
204 | + return false; | |
205 | + } | |
206 | + | |
207 | + private void reset() { | |
208 | + noMorePositions = false; | |
209 | + calledNextStartPosition = false; | |
210 | + lastBigStartPosition = -1; | |
211 | + largestStoredEndPosition = -1; | |
212 | + lastBigSpansStartPosition = -1; | |
213 | + lastBigSpansEndPosition = -1; | |
214 | + minimumStartPositionForEndPosition.clear(); | |
215 | + } | |
216 | + | |
217 | + @Override | |
218 | + public long cost() { | |
219 | + return 0; | |
220 | + } | |
221 | + | |
222 | +} | |
... | ... |