MtasSpanRecurrenceSpans.java.html
19.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" lang="en"><head><meta http-equiv="Content-Type" content="text/html;charset=UTF-8"/><link rel="stylesheet" href="../jacoco-resources/report.css" type="text/css"/><link rel="shortcut icon" href="../jacoco-resources/report.gif" type="image/gif"/><title>MtasSpanRecurrenceSpans.java</title><link rel="stylesheet" href="../jacoco-resources/prettify.css" type="text/css"/><script type="text/javascript" src="../jacoco-resources/prettify.js"></script></head><body onload="window['PR_TAB_WIDTH']=4;prettyPrint()"><div class="breadcrumb" id="breadcrumb"><span class="info"><a href="../jacoco-sessions.html" class="el_session">Sessions</a></span><a href="../index.html" class="el_report">MTAS</a> > <a href="index.source.html" class="el_package">mtas.search.spans</a> > <span class="el_source">MtasSpanRecurrenceSpans.java</span></div><h1>MtasSpanRecurrenceSpans.java</h1><pre class="source lang-java linenums">package mtas.search.spans;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.Spans;
import mtas.search.spans.util.MtasIgnoreItem;
import mtas.search.spans.util.MtasSpans;
/**
* The Class MtasSpanRecurrenceSpans.
*/
<span class="pc bpc" id="L20" title="1 of 2 branches missed.">public class MtasSpanRecurrenceSpans extends MtasSpans {</span>
/** The log. */
<span class="fc" id="L23"> private static Log log = LogFactory.getLog(MtasSpanRecurrenceSpans.class);</span>
/** The query. */
private MtasSpanRecurrenceQuery query;
/** The spans. */
private Spans spans;
/** The ignore item. */
private MtasIgnoreItem ignoreItem;
/** The minimum recurrence. */
int minimumRecurrence;
/** The maximum recurrence. */
int maximumRecurrence;
/** The queue spans. */
List<Match> queueSpans;
/** The queue matches. */
List<Match> queueMatches;
/** The current match. */
Match currentMatch;
/** The no more positions. */
boolean noMorePositions;
/** The last start position. */
int lastStartPosition; // startPosition of last retrieved span
/** The last span. */
boolean lastSpan; // last span for this document added to queue
/**
* Instantiates a new mtas span recurrence spans.
*
* @param query the query
* @param spans the spans
* @param minimumRecurrence the minimum recurrence
* @param maximumRecurrence the maximum recurrence
* @param ignoreSpans the ignore spans
* @param maximumIgnoreLength the maximum ignore length
*/
public MtasSpanRecurrenceSpans(MtasSpanRecurrenceQuery query, Spans spans,
int minimumRecurrence, int maximumRecurrence, Spans ignoreSpans,
<span class="fc" id="L70"> Integer maximumIgnoreLength) {</span>
<span class="pc bpc" id="L71" title="2 of 4 branches missed."> assert minimumRecurrence <= maximumRecurrence : "minimumRecurrence > maximumRecurrence";</span>
<span class="pc bpc" id="L72" title="2 of 4 branches missed."> assert minimumRecurrence > 0 : "minimumRecurrence < 1 not supported";</span>
<span class="fc" id="L73"> this.query = query;</span>
<span class="fc" id="L74"> this.spans = spans;</span>
<span class="fc" id="L75"> this.minimumRecurrence = minimumRecurrence;</span>
<span class="fc" id="L76"> this.maximumRecurrence = maximumRecurrence;</span>
<span class="fc" id="L77"> queueSpans = new ArrayList<>();</span>
<span class="fc" id="L78"> queueMatches = new ArrayList<>();</span>
<span class="fc" id="L79"> ignoreItem = new MtasIgnoreItem(ignoreSpans, maximumIgnoreLength);</span>
<span class="fc" id="L80"> resetQueue();</span>
<span class="fc" id="L81"> }</span>
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#nextStartPosition()
*/
@Override
public int nextStartPosition() throws IOException {
<span class="fc bfc" id="L90" title="All 2 branches covered."> if (findMatches()) {</span>
<span class="fc" id="L91"> currentMatch = queueMatches.get(0);</span>
<span class="fc" id="L92"> queueMatches.remove(0);</span>
<span class="fc" id="L93"> noMorePositions = false;</span>
<span class="fc" id="L94"> return currentMatch.startPosition();</span>
} else {
<span class="fc" id="L96"> currentMatch = null;</span>
<span class="fc" id="L97"> noMorePositions = true;</span>
<span class="fc" id="L98"> return NO_MORE_POSITIONS;</span>
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#startPosition()
*/
@Override
public int startPosition() {
<span class="pc bpc" id="L109" title="1 of 2 branches missed."> if (currentMatch == null) {</span>
<span class="nc bnc" id="L110" title="All 2 branches missed."> if (noMorePositions) {</span>
<span class="nc" id="L111"> return NO_MORE_POSITIONS;</span>
} else {
<span class="nc" id="L113"> return -1;</span>
}
} else {
<span class="fc" id="L116"> return currentMatch.startPosition();</span>
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#endPosition()
*/
@Override
public int endPosition() {
<span class="pc bpc" id="L127" title="1 of 2 branches missed."> if (currentMatch == null) {</span>
<span class="nc bnc" id="L128" title="All 2 branches missed."> if (noMorePositions) {</span>
<span class="nc" id="L129"> return NO_MORE_POSITIONS;</span>
} else {
<span class="nc" id="L131"> return -1;</span>
}
} else {
<span class="fc" id="L134"> return currentMatch.endPosition();</span>
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#width()
*/
@Override
public int width() {
<span class="nc" id="L145"> return 1;</span>
}
/*
* (non-Javadoc)
*
* @see
* org.apache.lucene.search.spans.Spans#collect(org.apache.lucene.search.spans
* .SpanCollector)
*/
@Override
public void collect(SpanCollector collector) throws IOException {
<span class="nc" id="L157"> spans.collect(collector);</span>
<span class="nc" id="L158"> }</span>
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.DocIdSetIterator#docID()
*/
@Override
public int docID() {
<span class="fc" id="L167"> return spans.docID();</span>
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.DocIdSetIterator#nextDoc()
*/
@Override
public int nextDoc() throws IOException {
<span class="fc" id="L177"> resetQueue();</span>
<span class="fc bfc" id="L178" title="All 2 branches covered."> return (spans.nextDoc() == NO_MORE_DOCS) ? NO_MORE_DOCS : toMatchDoc();</span>
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.DocIdSetIterator#advance(int)
*/
@Override
public int advance(int target) throws IOException {
<span class="fc" id="L188"> resetQueue();</span>
<span class="pc bpc" id="L189" title="1 of 2 branches missed."> return (spans.advance(target) == NO_MORE_DOCS) ? NO_MORE_DOCS</span>
<span class="fc" id="L190"> : toMatchDoc();</span>
}
/**
* Reset queue.
*/
void resetQueue() {
<span class="fc" id="L197"> queueSpans.clear();</span>
<span class="fc" id="L198"> queueMatches.clear();</span>
<span class="fc" id="L199"> lastStartPosition = 0;</span>
<span class="fc" id="L200"> lastSpan = false;</span>
<span class="fc" id="L201"> currentMatch = null;</span>
<span class="fc" id="L202"> noMorePositions = false;</span>
<span class="fc" id="L203"> }</span>
/**
* To match doc.
*
* @return the int
* @throws IOException Signals that an I/O exception has occurred.
*/
int toMatchDoc() throws IOException {
while (true) {
<span class="pc bpc" id="L213" title="1 of 2 branches missed."> if (findMatches()) {</span>
<span class="fc" id="L214"> return docID();</span>
}
<span class="nc" id="L216"> resetQueue();</span>
<span class="nc bnc" id="L217" title="All 2 branches missed."> if (spans.nextDoc() == NO_MORE_DOCS) {</span>
<span class="nc" id="L218"> return NO_MORE_DOCS;</span>
}
}
}
/**
* Collect span.
*
* @return true, if successful
* @throws IOException Signals that an I/O exception has occurred.
*/
// try to get something in the queue of spans
private boolean collectSpan() throws IOException {
<span class="fc bfc" id="L231" title="All 2 branches covered."> if (lastSpan) {</span>
<span class="fc" id="L232"> return false;</span>
<span class="fc bfc" id="L233" title="All 2 branches covered."> } else if (spans.nextStartPosition() == NO_MORE_POSITIONS) {</span>
<span class="fc" id="L234"> lastSpan = true;</span>
<span class="fc" id="L235"> return false;</span>
} else {
<span class="fc" id="L237"> queueSpans.add(new Match(spans.startPosition(), spans.endPosition()));</span>
<span class="fc" id="L238"> lastStartPosition = spans.startPosition();</span>
<span class="fc" id="L239"> return true;</span>
}
}
/**
* Find matches.
*
* @return true, if successful
* @throws IOException Signals that an I/O exception has occurred.
*/
private boolean findMatches() throws IOException {
// check for something in queue of matches
<span class="fc bfc" id="L251" title="All 2 branches covered."> if (!queueMatches.isEmpty()) {</span>
<span class="fc" id="L252"> return true;</span>
} else {
<span class="fc" id="L254"> ignoreItem.advanceToDoc(spans.docID());</span>
while (true) {
// try to get something in queue of spans
<span class="fc bfc" id="L257" title="All 4 branches covered."> if (queueSpans.isEmpty() && !collectSpan()) {</span>
<span class="fc" id="L258"> return false;</span>
}
// try to get matches with first span in queue
<span class="fc" id="L261"> Match firstMatch = queueSpans.remove(0);</span>
// create a list of matches with same startPosition as firstMatch
<span class="fc" id="L263"> List<Match> matches = new ArrayList<>();</span>
<span class="fc" id="L264"> matches.add(firstMatch);</span>
// matches.addAll(expandWithIgnoreItem(spans.docID(), firstMatch));
// try to collect spans until lastStartPosition not equal to
// startPosition of firstMatch
<span class="fc bfc" id="L268" title="All 4 branches covered."> while (!lastSpan && (lastStartPosition == firstMatch.startPosition())) {</span>
<span class="fc" id="L269"> collectSpan();</span>
}
<span class="fc bfc" id="L271" title="All 2 branches covered."> while (!queueSpans.isEmpty() && (queueSpans.get(0)</span>
<span class="pc bpc" id="L272" title="1 of 2 branches missed."> .startPosition() == firstMatch.startPosition())) {</span>
<span class="nc" id="L273"> Match additionalMatch = queueSpans.remove(0);</span>
<span class="nc" id="L274"> matches.add(additionalMatch);</span>
<span class="nc" id="L275"> matches.addAll(expandWithIgnoreItem(spans.docID(), additionalMatch));</span>
<span class="nc" id="L276"> }</span>
// construct all matches for this startPosition
<span class="fc bfc" id="L278" title="All 2 branches covered."> for (Match match : matches) {</span>
<span class="fc bfc" id="L279" title="All 2 branches covered."> for (int n = (minimumRecurrence - 1); n <= (maximumRecurrence</span>
<span class="fc" id="L280"> - 1); n++) {</span>
<span class="fc" id="L281"> findMatches(match, n);</span>
}
<span class="fc" id="L283"> }</span>
// check for something in queue of matches
<span class="fc bfc" id="L285" title="All 2 branches covered."> if (!queueMatches.isEmpty()) {</span>
<span class="fc" id="L286"> ignoreItem.removeBefore(spans.docID(),</span>
<span class="fc" id="L287"> queueMatches.get(0).startPosition());</span>
<span class="fc" id="L288"> return true;</span>
}
<span class="fc" id="L290"> }</span>
}
}
/**
* Find matches.
*
* @param match the match
* @param n the n
* @throws IOException Signals that an I/O exception has occurred.
*/
private void findMatches(Match match, int n) throws IOException {
<span class="fc bfc" id="L302" title="All 2 branches covered."> if (n > 0) {</span>
<span class="fc" id="L303"> int largestMatchingEndPosition = match.endPosition();</span>
<span class="fc" id="L304"> Set<Integer> list = ignoreItem.getFullEndPositionList(spans.docID(),</span>
<span class="fc" id="L305"> match.endPosition());</span>
// try to find matches with existing queue
<span class="fc bfc" id="L307" title="All 2 branches covered."> if (!queueSpans.isEmpty()) {</span>
Match span;
<span class="fc bfc" id="L309" title="All 2 branches covered."> for (int i = 0; i < queueSpans.size(); i++) {</span>
<span class="fc" id="L310"> span = queueSpans.get(i);</span>
<span class="pc bpc" id="L311" title="1 of 4 branches missed."> if (match.endPosition() == span.startPosition()</span>
<span class="nc bnc" id="L312" title="All 2 branches missed."> || (list != null && list.contains(span.startPosition()))) {</span>
<span class="fc" id="L313"> findMatches(new Match(match.startPosition(), span.endPosition()),</span>
(n - 1));
<span class="fc" id="L315"> largestMatchingEndPosition = Math.max(largestMatchingEndPosition,</span>
<span class="fc" id="L316"> span.endPosition());</span>
}
}
}
// extend queue if necessary and possible
<span class="fc bfc" id="L321" title="All 4 branches covered."> while (!lastSpan && (largestMatchingEndPosition >= lastStartPosition)) {</span>
<span class="fc bfc" id="L322" title="All 2 branches covered."> if (spans.nextStartPosition() == NO_MORE_POSITIONS) {</span>
<span class="fc" id="L323"> lastSpan = true;</span>
} else {
<span class="fc" id="L325"> Match span = new Match(spans.startPosition(), spans.endPosition());</span>
<span class="fc" id="L326"> queueSpans.add(span);</span>
<span class="fc" id="L327"> lastStartPosition = spans.startPosition();</span>
// check if this provides new match
<span class="pc bpc" id="L329" title="2 of 4 branches missed."> if (match.endPosition() == span.startPosition()</span>
<span class="nc bnc" id="L330" title="All 2 branches missed."> || (list != null && list.contains(span.startPosition()))) {</span>
<span class="nc" id="L331"> findMatches(new Match(match.startPosition(), span.endPosition()),</span>
(n - 1));
<span class="nc" id="L333"> largestMatchingEndPosition = Math.max(largestMatchingEndPosition,</span>
<span class="nc" id="L334"> span.endPosition());</span>
}
<span class="fc" id="L336"> }</span>
}
<span class="fc" id="L338"> } else {</span>
// only unique spans
<span class="pc bpc" id="L340" title="1 of 2 branches missed."> if (!queueMatches.contains(match)) {</span>
<span class="fc" id="L341"> queueMatches.add(match);</span>
}
}
<span class="fc" id="L344"> }</span>
/**
* Expand with ignore item.
*
* @param docId the doc id
* @param match the match
* @return the list
*/
private List<Match> expandWithIgnoreItem(int docId, Match match) {
<span class="nc" id="L354"> List<Match> list = new ArrayList<>();</span>
try {
<span class="nc" id="L356"> Set<Integer> ignoreList = ignoreItem.getFullEndPositionList(docId,</span>
<span class="nc" id="L357"> match.endPosition);</span>
<span class="nc bnc" id="L358" title="All 2 branches missed."> if (ignoreList != null) {</span>
<span class="nc bnc" id="L359" title="All 2 branches missed."> for (Integer endPosition : ignoreList) {</span>
<span class="nc" id="L360"> list.add(new Match(match.startPosition, endPosition));</span>
<span class="nc" id="L361"> }</span>
}
<span class="nc" id="L363"> } catch (IOException e) {</span>
<span class="nc" id="L364"> log.debug(e);</span>
<span class="nc" id="L365"> }</span>
<span class="nc" id="L366"> return list;</span>
}
/**
* The Class Match.
*/
private static class Match {
/** The start position. */
private int startPosition;
/** The end position. */
private int endPosition;
/**
* Instantiates a new match.
*
* @param startPosition the start position
* @param endPosition the end position
*/
<span class="fc" id="L386"> Match(int startPosition, int endPosition) {</span>
<span class="fc" id="L387"> this.startPosition = startPosition;</span>
<span class="fc" id="L388"> this.endPosition = endPosition;</span>
<span class="fc" id="L389"> }</span>
/**
* Start position.
*
* @return the int
*/
public int startPosition() {
<span class="fc" id="L397"> return startPosition;</span>
}
/**
* End position.
*
* @return the int
*/
public int endPosition() {
<span class="fc" id="L406"> return endPosition;</span>
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#equals(java.lang.Object)
*/
@Override
public boolean equals(Object obj) {
<span class="pc bpc" id="L416" title="1 of 2 branches missed."> if (this == obj)</span>
<span class="nc" id="L417"> return true;</span>
<span class="pc bpc" id="L418" title="1 of 2 branches missed."> if (obj == null)</span>
<span class="nc" id="L419"> return false;</span>
<span class="pc bpc" id="L420" title="1 of 2 branches missed."> if (getClass() != obj.getClass())</span>
<span class="nc" id="L421"> return false;</span>
<span class="fc" id="L422"> final Match that = (Match) obj;</span>
<span class="pc bpc" id="L423" title="2 of 4 branches missed."> return startPosition == that.startPosition</span>
&& endPosition == that.endPosition;
}
/*
* (non-Javadoc)
*
* @see java.lang.Object#hashCode()
*/
@Override
public int hashCode() {
<span class="nc" id="L434"> int h = this.getClass().getSimpleName().hashCode();</span>
<span class="nc" id="L435"> h = (h * 5) ^ startPosition;</span>
<span class="nc" id="L436"> h = (h * 7) ^ endPosition;</span>
<span class="nc" id="L437"> return h;</span>
}
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.DocIdSetIterator#cost()
*/
@Override
public long cost() {
<span class="pc bpc" id="L449" title="1 of 2 branches missed."> return (spans == null) ? 0 : spans.cost();</span>
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.search.spans.Spans#positionsCost()
*/
@Override
public float positionsCost() {
<span class="nc" id="L459"> return 0;</span>
}
/*
* (non-Javadoc)
*
* @see mtas.search.spans.util.MtasSpans#asTwoPhaseIterator()
*/
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
<span class="nc bnc" id="L469" title="All 4 branches missed."> if (spans == null || !query.twoPhaseIteratorAllowed()) {</span>
<span class="nc" id="L470"> return null;</span>
} else {
// TODO
<span class="nc" id="L473"> return null;</span>
}
}
}
</pre><div class="footer"><span class="right">Created with <a href="http://www.jacoco.org/jacoco">JaCoCo</a> 0.7.9.201702052155</span></div></body></html>