Commit e6d70fdccba030002b4862e8605e2847faaa23bd
1 parent
c5b577e1
new tests
Showing
23 changed files
with
1118 additions
and
302 deletions
docker/Dockerfile
1 | 1 | # Automatically generated Dockerfile |
2 | -# - Build 2017-02-21 07:48 | |
2 | +# - Build 2017-02-27 11:37 | |
3 | 3 | # - Lucene/Solr version 6.4.1 |
4 | 4 | # - Mtas release 20170220 |
5 | 5 | # |
... | ... | @@ -55,7 +55,7 @@ RUN apt-get update && apt-get install -y lsof software-properties-common python- |
55 | 55 | && chmod -R 755 /var/www/html \ |
56 | 56 | && printf "echo\n" >> /start.sh \ |
57 | 57 | && printf "echo \"================ Mtas -- Multi Tier Annotation Search =================\"\n" >> /start.sh \ |
58 | -&& printf "echo \" Timestamp 2017-02-21 07:48\"\n" >> /start.sh \ | |
58 | +&& printf "echo \" Timestamp 2017-02-27 11:37\"\n" >> /start.sh \ | |
59 | 59 | && printf "echo \" Lucene/Solr version 6.4.1\"\n" >> /start.sh \ |
60 | 60 | && printf "echo \" Mtas release 20170220\"\n" >> /start.sh \ |
61 | 61 | && printf "echo \" See https://meertensinstituut.github.io/mtas/ for more information\"\n" >> /start.sh \ |
... | ... |
junit/mtas/parser/MtasCQLParserTestSentence.java
... | ... | @@ -23,16 +23,6 @@ import mtas.search.spans.util.MtasSpanQuery; |
23 | 23 | |
24 | 24 | public class MtasCQLParserTestSentence { |
25 | 25 | |
26 | - @org.junit.Test | |
27 | - public void test() { | |
28 | - try { | |
29 | - basicTests(); | |
30 | - } catch (ParseException e) { | |
31 | - // TODO Auto-generated catch block | |
32 | - e.printStackTrace(); | |
33 | - } | |
34 | - } | |
35 | - | |
36 | 26 | private void testCQLParse(String field, String defaultPrefix, String cql, MtasSpanQuery q) { |
37 | 27 | MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql))); |
38 | 28 | try { |
... | ... | @@ -57,31 +47,9 @@ public class MtasCQLParserTestSentence { |
57 | 47 | } |
58 | 48 | } |
59 | 49 | |
60 | - private void basicTests() throws ParseException { | |
61 | - basicTest1(); | |
62 | - basicTest2(); | |
63 | - basicTest3(); | |
64 | - basicTest4(); | |
65 | - basicTest5(); | |
66 | - basicTest6(); | |
67 | - basicTest7(); | |
68 | - basicTest8(); | |
69 | - basicTest9(); | |
70 | - basicTest10(); | |
71 | - basicTest11(); | |
72 | - basicTest12(); | |
73 | - basicTest13(); | |
74 | - basicTest14(); | |
75 | - basicTest15(); | |
76 | - basicTest16(); | |
77 | - basicTest17(); | |
78 | - basicTest18(); | |
79 | - basicTest19(); | |
80 | - basicTest20(); | |
81 | - basicTest21(); | |
82 | - } | |
83 | 50 | |
84 | - private void basicTest1() throws ParseException { | |
51 | + @org.junit.Test | |
52 | + public void basicTestCQL1() throws ParseException { | |
85 | 53 | String field = "testveld"; |
86 | 54 | String cql = "[pos=\"LID\"] [lemma=\"koe\"]"; |
87 | 55 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID", null, null); |
... | ... | @@ -93,14 +61,16 @@ public class MtasCQLParserTestSentence { |
93 | 61 | testCQLParse(field, null, cql, q); |
94 | 62 | } |
95 | 63 | |
96 | - private void basicTest2() { | |
64 | + @org.junit.Test | |
65 | + public void basicTestCQL2() { | |
97 | 66 | String field = "testveld"; |
98 | 67 | String cql1 = "[pos=\"LID\"] [] []? [] [lemma=\"koe\"]"; |
99 | 68 | String cql2 = "[pos=\"LID\"] []{2,3} [lemma=\"koe\"]"; |
100 | 69 | testCQLEquivalent(field, null, cql1, cql2); |
101 | 70 | } |
102 | 71 | |
103 | - private void basicTest3() throws ParseException { | |
72 | + @org.junit.Test | |
73 | + public void basicTestCQL3() throws ParseException { | |
104 | 74 | String field = "testveld"; |
105 | 75 | String cql = "[pos=\"LID\"] | [lemma=\"koe\"]"; |
106 | 76 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID", null, null); |
... | ... | @@ -109,7 +79,8 @@ public class MtasCQLParserTestSentence { |
109 | 79 | testCQLParse(field, null, cql, q); |
110 | 80 | } |
111 | 81 | |
112 | - private void basicTest4() throws ParseException { | |
82 | + @org.junit.Test | |
83 | + public void basicTestCQL4() throws ParseException { | |
113 | 84 | String field = "testveld"; |
114 | 85 | String cql = "[pos=\"LID\"] | ([lemma=\"de\"] [lemma=\"koe\"])"; |
115 | 86 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID", null, null); |
... | ... | @@ -123,28 +94,32 @@ public class MtasCQLParserTestSentence { |
123 | 94 | testCQLParse(field, null, cql, q); |
124 | 95 | } |
125 | 96 | |
126 | - private void basicTest5() { | |
97 | + @org.junit.Test | |
98 | + public void basicTestCQL5() { | |
127 | 99 | String field = "testveld"; |
128 | 100 | String cql1 = "([pos=\"LID\"]([pos=\"ADJ\"][lemma=\"koe\"]))"; |
129 | 101 | String cql2 = "[pos=\"LID\"][pos=\"ADJ\"][lemma=\"koe\"]"; |
130 | 102 | testCQLEquivalent(field, null, cql1, cql2); |
131 | 103 | } |
132 | 104 | |
133 | - private void basicTest6() { | |
105 | + @org.junit.Test | |
106 | + public void basicTestCQL6() { | |
134 | 107 | String field = "testveld"; |
135 | 108 | String cql1 = "([pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"])|([pos=\"ADJ\"]|([lemma=\"het\"]([lemma=\"paard\"])))"; |
136 | 109 | String cql2 = "[pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"]|[pos=\"ADJ\"]|[lemma=\"het\"][lemma=\"paard\"]"; |
137 | 110 | testCQLEquivalent(field, null, cql1, cql2); |
138 | 111 | } |
139 | 112 | |
140 | - private void basicTest7() { | |
113 | + @org.junit.Test | |
114 | + public void basicTestCQL7() { | |
141 | 115 | String field = "testveld"; |
142 | 116 | String cql1 = "[pos=\"LID\"] []{0,1} []{3,5} []{2,4}"; |
143 | 117 | String cql2 = "[pos=\"LID\"] []{5,10}"; |
144 | 118 | testCQLEquivalent(field, null, cql1, cql2); |
145 | 119 | } |
146 | 120 | |
147 | - private void basicTest8() throws ParseException { | |
121 | + @org.junit.Test | |
122 | + public void basicTestCQL8() throws ParseException { | |
148 | 123 | String field = "testveld"; |
149 | 124 | String cql = "[lemma=\"koe\"]([pos=\"N\"]|[pos=\"ADJ\"])"; |
150 | 125 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe", null, null); |
... | ... | @@ -158,7 +133,8 @@ public class MtasCQLParserTestSentence { |
158 | 133 | testCQLParse(field, null, cql, q); |
159 | 134 | } |
160 | 135 | |
161 | - private void basicTest9() throws ParseException { | |
136 | + @org.junit.Test | |
137 | + public void basicTestCQL9() throws ParseException { | |
162 | 138 | String field = "testveld"; |
163 | 139 | String cql = "[lemma=\"koe\"]([pos=\"N\"]|[pos=\"ADJ\"]){2,3}[lemma=\"paard\"]"; |
164 | 140 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe", null, null); |
... | ... | @@ -174,7 +150,8 @@ public class MtasCQLParserTestSentence { |
174 | 150 | testCQLParse(field, null, cql, q); |
175 | 151 | } |
176 | 152 | |
177 | - private void basicTest10() throws ParseException { | |
153 | + @org.junit.Test | |
154 | + public void basicTestCQL10() throws ParseException { | |
178 | 155 | String field = "testveld"; |
179 | 156 | String cql = "[pos=\"LID\"]? [pos=\"ADJ\"]{1,3} [lemma=\"koe\"]"; |
180 | 157 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID",null, null); |
... | ... | @@ -188,7 +165,8 @@ public class MtasCQLParserTestSentence { |
188 | 165 | testCQLParse(field, null, cql, q); |
189 | 166 | } |
190 | 167 | |
191 | - private void basicTest11() throws ParseException { | |
168 | + @org.junit.Test | |
169 | + public void basicTestCQL11() throws ParseException { | |
192 | 170 | String field = "testveld"; |
193 | 171 | String cql = "<sentence/> containing [lemma=\"koe\"]"; |
194 | 172 | MtasSpanQuery q1 = new MtasCQLParserGroupQuery(field,"sentence"); |
... | ... | @@ -197,7 +175,8 @@ public class MtasCQLParserTestSentence { |
197 | 175 | testCQLParse(field, null, cql, q); |
198 | 176 | } |
199 | 177 | |
200 | - private void basicTest12() throws ParseException { | |
178 | + @org.junit.Test | |
179 | + public void basicTestCQL12() throws ParseException { | |
201 | 180 | String field = "testveld"; |
202 | 181 | String cql = "[lemma=\"koe\"] within <sentence/>"; |
203 | 182 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe",null, null); |
... | ... | @@ -206,7 +185,8 @@ public class MtasCQLParserTestSentence { |
206 | 185 | testCQLParse(field, null, cql, q); |
207 | 186 | } |
208 | 187 | |
209 | - private void basicTest13() throws ParseException { | |
188 | + @org.junit.Test | |
189 | + public void basicTestCQL13() throws ParseException { | |
210 | 190 | String field = "testveld"; |
211 | 191 | String cql = "[lemma=\"koe\"]([t=\"de\"] within <sentence/>)"; |
212 | 192 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe",null, null); |
... | ... | @@ -220,7 +200,8 @@ public class MtasCQLParserTestSentence { |
220 | 200 | testCQLParse(field, null, cql, q); |
221 | 201 | } |
222 | 202 | |
223 | - private void basicTest14() throws ParseException { | |
203 | + @org.junit.Test | |
204 | + public void basicTestCQL14() throws ParseException { | |
224 | 205 | String field = "testveld"; |
225 | 206 | String cql = "([t=\"de\"] within <sentence/>)[lemma=\"koe\"]"; |
226 | 207 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"t","de",null, null); |
... | ... | @@ -234,7 +215,8 @@ public class MtasCQLParserTestSentence { |
234 | 215 | testCQLParse(field, null, cql, q); |
235 | 216 | } |
236 | 217 | |
237 | - private void basicTest15() throws ParseException { | |
218 | + @org.junit.Test | |
219 | + public void basicTestCQL15() throws ParseException { | |
238 | 220 | String field = "testveld"; |
239 | 221 | String cql = "[lemma=\"koe\"](<sentence/> containing [t=\"de\"]) within <sentence/>[lemma=\"paard\"]"; |
240 | 222 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe",null, null); |
... | ... | @@ -255,7 +237,8 @@ public class MtasCQLParserTestSentence { |
255 | 237 | testCQLParse(field, null, cql, q); |
256 | 238 | } |
257 | 239 | |
258 | - private void basicTest16() throws ParseException { | |
240 | + @org.junit.Test | |
241 | + public void basicTestCQL16() throws ParseException { | |
259 | 242 | String field = "testveld"; |
260 | 243 | String cql = "(<entity=\"loc\"/> within (<s/> containing [t_lc=\"amsterdam\"])) !containing ([t_lc=\"amsterdam\"])"; |
261 | 244 | MtasSpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc"); |
... | ... | @@ -267,7 +250,8 @@ public class MtasCQLParserTestSentence { |
267 | 250 | testCQLParse(field, null, cql, q); |
268 | 251 | } |
269 | 252 | |
270 | - private void basicTest17() { | |
253 | + @org.junit.Test | |
254 | + public void basicTestCQL17() { | |
271 | 255 | String field = "testveld"; |
272 | 256 | String cql = "[]<entity=\"loc\"/>{1,2}[]"; |
273 | 257 | MtasSpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc"); |
... | ... | @@ -280,7 +264,8 @@ public class MtasCQLParserTestSentence { |
280 | 264 | testCQLParse(field, null, cql, q); |
281 | 265 | } |
282 | 266 | |
283 | - private void basicTest18() throws ParseException { | |
267 | + @org.junit.Test | |
268 | + public void basicTestCQL18() throws ParseException { | |
284 | 269 | String field = "testveld"; |
285 | 270 | String cql = "\"de\" [pos=\"N\"]"; |
286 | 271 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"t_lc","de",null, null); |
... | ... | @@ -292,7 +277,8 @@ public class MtasCQLParserTestSentence { |
292 | 277 | testCQLParse(field, "t_lc", cql, q); |
293 | 278 | } |
294 | 279 | |
295 | - private void basicTest19() { | |
280 | + @org.junit.Test | |
281 | + public void basicTestCQL19() { | |
296 | 282 | String field = "testveld"; |
297 | 283 | String cql = "([]<entity=\"loc\"/>{1,2}[]){3,4}"; |
298 | 284 | MtasSpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc"); |
... | ... | @@ -306,7 +292,8 @@ public class MtasCQLParserTestSentence { |
306 | 292 | testCQLParse(field, null, cql, q); |
307 | 293 | } |
308 | 294 | |
309 | - private void basicTest20() { | |
295 | + @org.junit.Test | |
296 | + public void basicTestCQL20() { | |
310 | 297 | String field = "testveld"; |
311 | 298 | String cql1 = "[pos=\"N\"]?[pos=\"ADJ\"]"; |
312 | 299 | String cql2 = "([pos=\"N\"])?[pos=\"ADJ\"]"; |
... | ... | @@ -325,7 +312,8 @@ public class MtasCQLParserTestSentence { |
325 | 312 | testCQLEquivalent(field, null, cql9, cql10); |
326 | 313 | } |
327 | 314 | |
328 | - private void basicTest21() { | |
315 | + @org.junit.Test | |
316 | + public void basicTestCQL21() { | |
329 | 317 | String field = "testveld"; |
330 | 318 | String cql1 = "(<s/>(<s/> containing [t_lc=\"rembrandt\"])</s>)"; |
331 | 319 | String cql2 = "<s/>(<s/> containing [t_lc=\"rembrandt\"])</s>"; |
... | ... |
junit/mtas/parser/MtasCQLParserTestWord.java
... | ... | @@ -16,18 +16,7 @@ import mtas.search.spans.util.MtasSpanQuery; |
16 | 16 | |
17 | 17 | public class MtasCQLParserTestWord { |
18 | 18 | |
19 | - @org.junit.Test | |
20 | - public void test() { | |
21 | - try { | |
22 | - basicTests(); | |
23 | - basicNotTests(); | |
24 | - } catch (ParseException e) { | |
25 | - // TODO Auto-generated catch block | |
26 | - e.printStackTrace(); | |
27 | - } | |
28 | - } | |
29 | - | |
30 | - private void testCQLParse(String field, String defaultPrefix, String cql, MtasSpanQuery q) { | |
19 | + private void testCQLParse(String field, String defaultPrefix, String cql, MtasSpanQuery q) { | |
31 | 20 | MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql))); |
32 | 21 | try { |
33 | 22 | assertEquals(p.parse(field, defaultPrefix, null, null, null) ,q); |
... | ... | @@ -50,31 +39,10 @@ public class MtasCQLParserTestWord { |
50 | 39 | } |
51 | 40 | } |
52 | 41 | |
53 | - private void basicNotTests() throws ParseException { | |
54 | - basicNotTest1(); | |
55 | - basicNotTest2(); | |
56 | - basicNotTest3(); | |
57 | - basicNotTest4(); | |
58 | - basicNotTest5(); | |
59 | - } | |
60 | 42 | |
61 | - private void basicTests() throws ParseException { | |
62 | - basicTest1(); | |
63 | - basicTest2(); | |
64 | - basicTest3(); | |
65 | - basicTest4(); | |
66 | - basicTest5(); | |
67 | - basicTest6(); | |
68 | - basicTest7(); | |
69 | - basicTest8(); | |
70 | - basicTest9(); | |
71 | - basicTest10(); | |
72 | - basicTest11(); | |
73 | - basicTest12(); | |
74 | - basicTest13(); | |
75 | - } | |
76 | 43 | |
77 | - private void basicNotTest1() throws ParseException { | |
44 | + @org.junit.Test | |
45 | + public void basicNotTestCQL1() throws ParseException { | |
78 | 46 | String field = "testveld"; |
79 | 47 | String cql = "[pos=\"LID\" & !lemma=\"de\"]"; |
80 | 48 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID",null, null); |
... | ... | @@ -83,14 +51,16 @@ public class MtasCQLParserTestWord { |
83 | 51 | testCQLParse(field, null, cql, q); |
84 | 52 | } |
85 | 53 | |
86 | - private void basicNotTest2() { | |
54 | + @org.junit.Test | |
55 | + public void basicNotTestCQL2() { | |
87 | 56 | String field = "testveld"; |
88 | 57 | String cql1 = "[pos=\"LID\" & (!lemma=\"de\")]"; |
89 | 58 | String cql2 = "[pos=\"LID\" & !(lemma=\"de\")]"; |
90 | 59 | testCQLEquivalent(field, null, cql1, cql2); |
91 | 60 | } |
92 | 61 | |
93 | - private void basicNotTest3() throws ParseException { | |
62 | + @org.junit.Test | |
63 | + public void basicNotTestCQL3() throws ParseException { | |
94 | 64 | String field = "testveld"; |
95 | 65 | String cql = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]"; |
96 | 66 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID",null, null); |
... | ... | @@ -101,28 +71,32 @@ public class MtasCQLParserTestWord { |
101 | 71 | testCQLParse(field, null, cql, q); |
102 | 72 | } |
103 | 73 | |
104 | - private void basicNotTest4() { | |
74 | + @org.junit.Test | |
75 | + public void basicNotTestCQL4() { | |
105 | 76 | String field = "testveld"; |
106 | 77 | String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]"; |
107 | 78 | String cql2 = "[pos=\"LID\" & (!lemma=\"de\" & !lemma=\"een\")]"; |
108 | 79 | testCQLEquivalent(field, null, cql1, cql2); |
109 | 80 | } |
110 | 81 | |
111 | - private void basicNotTest5() { | |
82 | + @org.junit.Test | |
83 | + public void basicNotTestCQL5() { | |
112 | 84 | String field = "testveld"; |
113 | 85 | String cql1 = "[pos=\"LID\" & !(lemma=\"de\" | lemma=\"een\")]"; |
114 | 86 | String cql2 = "[pos=\"LID\" & !lemma=\"de\" & !lemma=\"een\"]"; |
115 | 87 | testCQLEquivalent(field, null, cql1, cql2); |
116 | 88 | } |
117 | 89 | |
118 | - private void basicTest1() throws ParseException { | |
90 | + @org.junit.Test | |
91 | + public void basicTestCQL1() throws ParseException { | |
119 | 92 | String field = "testveld"; |
120 | 93 | String cql = "[lemma=\"koe\"]"; |
121 | 94 | MtasSpanQuery q = new MtasCQLParserWordQuery(field, "lemma", "koe",null, null); |
122 | 95 | testCQLParse(field, null, cql, q); |
123 | 96 | } |
124 | 97 | |
125 | - private void basicTest2() throws ParseException { | |
98 | + @org.junit.Test | |
99 | + public void basicTestCQL2() throws ParseException { | |
126 | 100 | String field = "testveld"; |
127 | 101 | String cql = "[lemma=\"koe\" & pos=\"N\"]"; |
128 | 102 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe",null, null); |
... | ... | @@ -131,7 +105,8 @@ public class MtasCQLParserTestWord { |
131 | 105 | testCQLParse(field, null, cql, q); |
132 | 106 | } |
133 | 107 | |
134 | - private void basicTest3() throws ParseException { | |
108 | + @org.junit.Test | |
109 | + public void basicTestCQL3() throws ParseException { | |
135 | 110 | String field = "testveld"; |
136 | 111 | String cql = "[lemma=\"koe\" | lemma=\"paard\"]"; |
137 | 112 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe",null, null); |
... | ... | @@ -140,14 +115,16 @@ public class MtasCQLParserTestWord { |
140 | 115 | testCQLParse(field, null, cql, q); |
141 | 116 | } |
142 | 117 | |
143 | - private void basicTest4() { | |
118 | + @org.junit.Test | |
119 | + public void basicTestCQL4() { | |
144 | 120 | String field = "testveld"; |
145 | 121 | String cql1 = "[lemma=\"koe\" | lemma=\"paard\"]"; |
146 | 122 | String cql2 = "[(lemma=\"koe\" | lemma=\"paard\")]"; |
147 | 123 | testCQLEquivalent(field, null, cql1, cql2); |
148 | 124 | } |
149 | 125 | |
150 | - private void basicTest5() throws ParseException { | |
126 | + @org.junit.Test | |
127 | + public void basicTestCQL5() throws ParseException { | |
151 | 128 | String field = "testveld"; |
152 | 129 | String cql = "[(lemma=\"koe\" | lemma=\"paard\") & pos=\"N\"]"; |
153 | 130 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe",null, null); |
... | ... | @@ -158,7 +135,8 @@ public class MtasCQLParserTestWord { |
158 | 135 | testCQLParse(field, null, cql, q); |
159 | 136 | } |
160 | 137 | |
161 | - private void basicTest6() throws ParseException { | |
138 | + @org.junit.Test | |
139 | + public void basicTestCQL6() throws ParseException { | |
162 | 140 | String field = "testveld"; |
163 | 141 | String cql = "[pos=\"N\" & (lemma=\"koe\" | lemma=\"paard\")]"; |
164 | 142 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","N",null, null); |
... | ... | @@ -169,7 +147,8 @@ public class MtasCQLParserTestWord { |
169 | 147 | testCQLParse(field, null, cql, q); |
170 | 148 | } |
171 | 149 | |
172 | - private void basicTest7() throws ParseException { | |
150 | + @org.junit.Test | |
151 | + public void basicTestCQL7() throws ParseException { | |
173 | 152 | String field = "testveld"; |
174 | 153 | String cql = "[pos=\"LID\" | (lemma=\"koe\" & pos=\"N\")]"; |
175 | 154 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID",null, null); |
... | ... | @@ -180,7 +159,8 @@ public class MtasCQLParserTestWord { |
180 | 159 | testCQLParse(field, null, cql, q); |
181 | 160 | } |
182 | 161 | |
183 | - private void basicTest8() throws ParseException { | |
162 | + @org.junit.Test | |
163 | + public void basicTestCQL8() throws ParseException { | |
184 | 164 | String field = "testveld"; |
185 | 165 | String cql = "[(lemma=\"de\" & pos=\"LID\") | (lemma=\"koe\" & pos=\"N\")]"; |
186 | 166 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","de",null, null); |
... | ... | @@ -193,7 +173,8 @@ public class MtasCQLParserTestWord { |
193 | 173 | testCQLParse(field, null, cql, q); |
194 | 174 | } |
195 | 175 | |
196 | - private void basicTest9() throws ParseException { | |
176 | + @org.junit.Test | |
177 | + public void basicTestCQL9() throws ParseException { | |
197 | 178 | String field = "testveld"; |
198 | 179 | String cql = "[((lemma=\"de\"|lemma=\"het\") & pos=\"LID\") | (lemma=\"koe\" & pos=\"N\")]"; |
199 | 180 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","de",null, null); |
... | ... | @@ -208,7 +189,8 @@ public class MtasCQLParserTestWord { |
208 | 189 | testCQLParse(field, null, cql, q); |
209 | 190 | } |
210 | 191 | |
211 | - private void basicTest10() throws ParseException { | |
192 | + @org.junit.Test | |
193 | + public void basicTestCQL10() throws ParseException { | |
212 | 194 | String field = "testveld"; |
213 | 195 | String cql = "[((lemma=\"de\"|lemma=\"het\") & pos=\"LID\") | ((lemma=\"koe\"|lemma=\"paard\") & pos=\"N\")]"; |
214 | 196 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","de",null, null); |
... | ... | @@ -225,7 +207,8 @@ public class MtasCQLParserTestWord { |
225 | 207 | testCQLParse(field, null, cql, q); |
226 | 208 | } |
227 | 209 | |
228 | - private void basicTest11() { | |
210 | + @org.junit.Test | |
211 | + public void basicTestCQL11() { | |
229 | 212 | String field = "testveld"; |
230 | 213 | String cql1 = "[#300]"; |
231 | 214 | MtasSpanQuery q1 = new MtasCQLParserWordPositionQuery(field, 300); |
... | ... | @@ -240,7 +223,8 @@ public class MtasCQLParserTestWord { |
240 | 223 | testCQLParse(field, null, cql3, q3); |
241 | 224 | } |
242 | 225 | |
243 | - private void basicTest12() throws ParseException { | |
226 | + @org.junit.Test | |
227 | + public void basicTestCQL12() throws ParseException { | |
244 | 228 | String field = "testveld"; |
245 | 229 | String cql = "[(t_lc=\"de\"|t_lc=\"het\"|t_lc=\"paard\")]"; |
246 | 230 | MtasSpanQuery q1 = new MtasCQLParserWordQuery(field,"t_lc","de",null, null); |
... | ... | @@ -250,7 +234,8 @@ public class MtasCQLParserTestWord { |
250 | 234 | testCQLParse(field, null, cql, q); |
251 | 235 | } |
252 | 236 | |
253 | - private void basicTest13() throws ParseException { | |
237 | + @org.junit.Test | |
238 | + public void basicTestCQL13() throws ParseException { | |
254 | 239 | String field = "testveld"; |
255 | 240 | String cql = "\"de\""; |
256 | 241 | MtasSpanQuery q = new MtasCQLParserWordQuery(field,"t_lc","de",null, null); |
... | ... |
junit/mtas/parser/MtasFunctionParserTest.java
... | ... | @@ -19,30 +19,12 @@ public class MtasFunctionParserTest { |
19 | 19 | |
20 | 20 | Random generator = new Random(); |
21 | 21 | |
22 | - @org.junit.Test | |
23 | - public void test() { | |
24 | - basicTests(); | |
25 | - } | |
26 | - | |
27 | 22 | private void testFunction(MtasFunctionParserFunction pf, long[] args, int n, |
28 | 23 | MtasFunctionParserFunctionResponse r) { |
29 | 24 | assertEquals(pf + "\tn:" + n + "\targs:" + Arrays.toString(args), |
30 | 25 | pf.getResponse(args, n), r); |
31 | 26 | } |
32 | 27 | |
33 | - private void basicTests() { | |
34 | - basicTest1(); | |
35 | - basicTest2(); | |
36 | - basicTest3(); | |
37 | - basicTest4(); | |
38 | - basicTest5(); | |
39 | - basicTest6(); | |
40 | - basicTest7(); | |
41 | - basicTest8(); | |
42 | - basicTest9(); | |
43 | - basicTest10(); | |
44 | - } | |
45 | - | |
46 | 28 | private long[] getArgs(int n, int min, int max) { |
47 | 29 | long[] args = new long[n]; |
48 | 30 | for (int i = 0; i < n; i++) { |
... | ... | @@ -55,7 +37,8 @@ public class MtasFunctionParserTest { |
55 | 37 | return min + generator.nextInt((1 + max - min)); |
56 | 38 | } |
57 | 39 | |
58 | - private void basicTest1() { | |
40 | + @org.junit.Test | |
41 | + public void basicTestFunction1() { | |
59 | 42 | String function = null; |
60 | 43 | MtasFunctionParser p; |
61 | 44 | MtasFunctionParserFunction pf; |
... | ... | @@ -77,7 +60,8 @@ public class MtasFunctionParserTest { |
77 | 60 | } |
78 | 61 | } |
79 | 62 | |
80 | - private void basicTest2() { | |
63 | + @org.junit.Test | |
64 | + public void basicTestFunction2() { | |
81 | 65 | String function = null; |
82 | 66 | MtasFunctionParser p; |
83 | 67 | MtasFunctionParserFunction pf; |
... | ... | @@ -98,7 +82,8 @@ public class MtasFunctionParserTest { |
98 | 82 | } |
99 | 83 | } |
100 | 84 | |
101 | - private void basicTest3() { | |
85 | + @org.junit.Test | |
86 | + public void basicTestFunction3() { | |
102 | 87 | String function = null; |
103 | 88 | MtasFunctionParser p; |
104 | 89 | MtasFunctionParserFunction pf; |
... | ... | @@ -175,7 +160,8 @@ public class MtasFunctionParserTest { |
175 | 160 | } |
176 | 161 | } |
177 | 162 | |
178 | - private void basicTest4() { | |
163 | + @org.junit.Test | |
164 | + public void basicTestFunction4() { | |
179 | 165 | String function = null; |
180 | 166 | MtasFunctionParser p; |
181 | 167 | MtasFunctionParserFunction pf; |
... | ... | @@ -196,7 +182,8 @@ public class MtasFunctionParserTest { |
196 | 182 | } |
197 | 183 | } |
198 | 184 | |
199 | - private void basicTest5() { | |
185 | + @org.junit.Test | |
186 | + public void basicTestFunction5() { | |
200 | 187 | String function = null; |
201 | 188 | MtasFunctionParser p; |
202 | 189 | MtasFunctionParserFunction pf; |
... | ... | @@ -214,7 +201,8 @@ public class MtasFunctionParserTest { |
214 | 201 | } |
215 | 202 | } |
216 | 203 | |
217 | - private void basicTest6() { | |
204 | + @org.junit.Test | |
205 | + public void basicTestFunction6() { | |
218 | 206 | String function = null; |
219 | 207 | MtasFunctionParser p; |
220 | 208 | MtasFunctionParserFunction pf; |
... | ... | @@ -237,7 +225,8 @@ public class MtasFunctionParserTest { |
237 | 225 | } |
238 | 226 | } |
239 | 227 | |
240 | - private void basicTest7() { | |
228 | + @org.junit.Test | |
229 | + public void basicTestFunction7() { | |
241 | 230 | String function = null; |
242 | 231 | MtasFunctionParser p; |
243 | 232 | MtasFunctionParserFunction pf; |
... | ... | @@ -263,7 +252,8 @@ public class MtasFunctionParserTest { |
263 | 252 | } |
264 | 253 | } |
265 | 254 | |
266 | - private void basicTest8() { | |
255 | + @org.junit.Test | |
256 | + public void basicTestFunction8() { | |
267 | 257 | String function = null; |
268 | 258 | MtasFunctionParser p; |
269 | 259 | MtasFunctionParserFunction pf; |
... | ... | @@ -293,7 +283,8 @@ public class MtasFunctionParserTest { |
293 | 283 | } |
294 | 284 | } |
295 | 285 | |
296 | - private void basicTest9() { | |
286 | + @org.junit.Test | |
287 | + public void basicTestFunction9() { | |
297 | 288 | String function = null; |
298 | 289 | MtasFunctionParser p; |
299 | 290 | MtasFunctionParserFunction pf; |
... | ... | @@ -314,7 +305,8 @@ public class MtasFunctionParserTest { |
314 | 305 | } |
315 | 306 | } |
316 | 307 | |
317 | - private void basicTest10() { | |
308 | + @org.junit.Test | |
309 | + public void basicTestFunction10() { | |
318 | 310 | String function = "(1+2)/3"; |
319 | 311 | MtasFunctionParser p; |
320 | 312 | MtasFunctionParserFunction pf; |
... | ... | @@ -329,9 +321,11 @@ public class MtasFunctionParserTest { |
329 | 321 | new BufferedReader(new StringReader(function))); |
330 | 322 | pf = p.parse(); |
331 | 323 | args = getArgs(10 + generator.nextInt(20), 0, 2); |
332 | - double answer = (double)(k1+k2) / (args[0]+1+k3-2); | |
333 | - testFunction(pf, args, n, | |
324 | + if((args[0]+1+k3-2)!=0) { | |
325 | + double answer = (double)(k1+k2) / (args[0]+1+k3-2); | |
326 | + testFunction(pf, args, n, | |
334 | 327 | new MtasFunctionParserFunctionResponseDouble(answer, true)); |
328 | + } | |
335 | 329 | } catch (ParseException e) { |
336 | 330 | e.printStackTrace(); |
337 | 331 | } |
... | ... |
junit/mtas/search/MtasSearchTest.java deleted
1 | -package mtas.search; | |
2 | - | |
3 | -import java.io.IOException; | |
4 | -import java.nio.file.Paths; | |
5 | -import java.util.ArrayList; | |
6 | -import java.util.HashMap; | |
7 | -import java.util.Map; | |
8 | - | |
9 | -import org.apache.lucene.analysis.Analyzer; | |
10 | -import org.apache.lucene.analysis.custom.CustomAnalyzer; | |
11 | -import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; | |
12 | -import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
13 | -import org.apache.lucene.codecs.Codec; | |
14 | -import org.apache.lucene.document.Document; | |
15 | -import org.apache.lucene.document.Field; | |
16 | -import org.apache.lucene.document.StringField; | |
17 | -import org.apache.lucene.index.IndexWriter; | |
18 | -import org.apache.lucene.index.IndexWriterConfig; | |
19 | -import org.apache.lucene.store.Directory; | |
20 | -import org.apache.lucene.store.RAMDirectory; | |
21 | - | |
22 | - | |
23 | -public class MtasSearchTest { | |
24 | - | |
25 | - @org.junit.Test | |
26 | - public void test() { | |
27 | - //constructIndex(); | |
28 | - } | |
29 | - | |
30 | - private void constructIndex() { | |
31 | - Directory directory = new RAMDirectory(); | |
32 | - HashMap<String, String> files = new HashMap<String, String>(); | |
33 | - files.put("title 1", "folia-samples/beets1.xml.gz"); | |
34 | - try { | |
35 | - createIndex(directory,"mtas.xml", files); | |
36 | - } catch (IOException e) { | |
37 | - | |
38 | - } | |
39 | - | |
40 | - | |
41 | - } | |
42 | - | |
43 | - private void createIndex(Directory directory, String configFile, HashMap<String, String> files) throws IOException { | |
44 | - //analyzer | |
45 | - Map<String,String> paramsCharFilterMtas = new HashMap<String,String>(); | |
46 | - paramsCharFilterMtas.put("type","file"); | |
47 | - Map<String,String> paramsTokenizer = new HashMap<String,String>(); | |
48 | - paramsTokenizer.put("configFile", configFile); | |
49 | - Analyzer mtasAnalyzer = CustomAnalyzer.builder(Paths.get("docker").toAbsolutePath()).addCharFilter("mtas", paramsCharFilterMtas).withTokenizer("mtas", paramsTokenizer).build(); | |
50 | - Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); | |
51 | - analyzerPerField.put("mtas", mtasAnalyzer); | |
52 | - PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( | |
53 | - new StandardAnalyzer(), analyzerPerField); | |
54 | - | |
55 | - //indexwriter | |
56 | - IndexWriterConfig config = new IndexWriterConfig(analyzer); | |
57 | - config.setUseCompoundFile(false); | |
58 | - config.setCodec(Codec.forName("MtasCodec")); | |
59 | - IndexWriter w = new IndexWriter(directory, config); | |
60 | - | |
61 | - //delete | |
62 | - w.deleteAll(); | |
63 | - | |
64 | - //add | |
65 | - for(String title : files.keySet()) { | |
66 | - addDoc(w, title, files.get(title)); | |
67 | - } | |
68 | - w.commit(); | |
69 | - | |
70 | - //finish | |
71 | - w.close(); | |
72 | - | |
73 | - } | |
74 | - | |
75 | - private static void addDoc(IndexWriter w, String title, String file) | |
76 | - throws IOException { | |
77 | - Document doc = new Document(); | |
78 | - doc.add(new StringField("title", title, Field.Store.YES)); | |
79 | - //doc.add(new TestField("mtas", file, Field.Store.YES)); | |
80 | - w.addDocument(doc); | |
81 | - } | |
82 | - | |
83 | - | |
84 | -} |
junit/mtas/search/MtasSearchTestConsistency.java
0 → 100644
1 | +package mtas.search; | |
2 | + | |
3 | +import static org.junit.Assert.assertEquals; | |
4 | + | |
5 | +import java.io.BufferedReader; | |
6 | +import java.io.File; | |
7 | +import java.io.IOException; | |
8 | +import java.io.Reader; | |
9 | +import java.io.StringReader; | |
10 | +import java.lang.reflect.InvocationTargetException; | |
11 | +import java.nio.file.Paths; | |
12 | +import java.util.ArrayList; | |
13 | +import java.util.Arrays; | |
14 | +import java.util.HashMap; | |
15 | +import java.util.List; | |
16 | +import java.util.ListIterator; | |
17 | +import java.util.Map; | |
18 | +import java.util.SortedMap; | |
19 | +import java.util.stream.IntStream; | |
20 | + | |
21 | +import org.apache.lucene.analysis.Analyzer; | |
22 | +import org.apache.lucene.analysis.custom.CustomAnalyzer; | |
23 | +import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; | |
24 | +import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
25 | +import org.apache.lucene.codecs.Codec; | |
26 | +import org.apache.lucene.document.Document; | |
27 | +import org.apache.lucene.document.Field; | |
28 | +import org.apache.lucene.document.StringField; | |
29 | +import org.apache.lucene.document.TextField; | |
30 | +import org.apache.lucene.index.DirectoryReader; | |
31 | +import org.apache.lucene.index.IndexReader; | |
32 | +import org.apache.lucene.index.IndexWriter; | |
33 | +import org.apache.lucene.index.IndexWriterConfig; | |
34 | +import org.apache.lucene.index.LeafReaderContext; | |
35 | +import org.apache.lucene.index.SegmentReader; | |
36 | +import org.apache.lucene.index.Terms; | |
37 | +import org.apache.lucene.search.IndexSearcher; | |
38 | +import org.apache.lucene.search.spans.SpanWeight; | |
39 | +import org.apache.lucene.search.spans.Spans; | |
40 | +import org.apache.lucene.store.Directory; | |
41 | +import org.apache.lucene.store.FSDirectory; | |
42 | +import org.apache.lucene.store.RAMDirectory; | |
43 | + | |
44 | +import mtas.codec.util.CodecInfo; | |
45 | +import mtas.codec.util.CodecUtil; | |
46 | +import mtas.codec.util.collector.MtasDataItem; | |
47 | +import mtas.codec.util.CodecComponent.ComponentField; | |
48 | +import mtas.codec.util.CodecComponent.ComponentGroup; | |
49 | +import mtas.codec.util.CodecComponent.ComponentPosition; | |
50 | +import mtas.codec.util.CodecComponent.ComponentSpan; | |
51 | +import mtas.codec.util.CodecComponent.ComponentToken; | |
52 | +import mtas.codec.util.CodecComponent.GroupHit; | |
53 | +import mtas.codec.util.CodecComponent.SubComponentFunction; | |
54 | +import mtas.codec.util.CodecSearchTree.MtasTreeHit; | |
55 | +import mtas.parser.cql.MtasCQLParser; | |
56 | +import mtas.parser.cql.ParseException; | |
57 | +import mtas.search.spans.MtasSpanSequenceQuery; | |
58 | +import mtas.search.spans.util.MtasSpanQuery; | |
59 | + | |
60 | +public class MtasSearchTestConsistency { | |
61 | + | |
62 | + private static String FIELD_ID = "id"; | |
63 | + private static String FIELD_TITLE = "title"; | |
64 | + private static String FIELD_CONTENT = "content"; | |
65 | + | |
66 | + private static Directory directory; | |
67 | + | |
68 | + private static HashMap<String, String> files; | |
69 | + | |
70 | + @org.junit.BeforeClass | |
71 | + public static void initialize() { | |
72 | + try { | |
73 | + String path = new File("junit/data").getCanonicalPath() + File.separator; | |
74 | + // directory = FSDirectory.open(Paths.get("testindexMtas")); | |
75 | + directory = new RAMDirectory(); | |
76 | + files = new HashMap<String, String>(); | |
77 | + files.put("Een onaangenaam mens in de Haarlemmerhout", | |
78 | + path + "resources/beets1.xml.gz"); | |
79 | + files.put("Een oude kennis", path + "resources/beets2.xml.gz"); | |
80 | + files.put("Varen en Rijden", path + "resources/beets3.xml.gz"); | |
81 | + createIndex(path + "conf/folia.xml", files); | |
82 | + } catch (IOException e) { | |
83 | + e.printStackTrace(); | |
84 | + } | |
85 | + } | |
86 | + | |
87 | + @org.junit.Test | |
88 | + public void basicSearchNumberOfWords() throws IOException { | |
89 | + IndexReader indexReader = DirectoryReader.open(directory); | |
90 | + testNumberOfHits(indexReader, FIELD_CONTENT, Arrays.asList("[]"), | |
91 | + Arrays.asList("[][]", "[#0]")); | |
92 | + indexReader.close(); | |
93 | + } | |
94 | + | |
95 | + @org.junit.Test | |
96 | + public void basicSearchStartSentence1() throws IOException { | |
97 | + IndexReader indexReader = DirectoryReader.open(directory); | |
98 | + testNumberOfHits(indexReader, FIELD_CONTENT, Arrays.asList("<s/>"), | |
99 | + Arrays.asList("<s>[]")); | |
100 | + indexReader.close(); | |
101 | + } | |
102 | + | |
103 | + @org.junit.Test | |
104 | + public void basicSearchStartSentence2() throws IOException { | |
105 | + IndexReader indexReader = DirectoryReader.open(directory); | |
106 | + testNumberOfHits(indexReader, FIELD_CONTENT, | |
107 | + Arrays.asList("[]</s><s>[]", "[#0]"), Arrays.asList("<s>[]")); | |
108 | + indexReader.close(); | |
109 | + } | |
110 | + | |
111 | + @org.junit.Test | |
112 | + public void basicSearchContaining() throws IOException { | |
113 | + IndexReader indexReader = DirectoryReader.open(directory); | |
114 | + testNumberOfHits(indexReader, FIELD_CONTENT, Arrays.asList("<s/>"), | |
115 | + Arrays.asList("<s/> containing [pos=\"ADJ\"]", | |
116 | + "<s/> !containing [pos=\"ADJ\"]")); | |
117 | + indexReader.close(); | |
118 | + } | |
119 | + | |
120 | + @org.junit.Test | |
121 | + public void basicSearchIntersecting() throws IOException { | |
122 | + IndexReader indexReader = DirectoryReader.open(directory); | |
123 | + testNumberOfHits(indexReader, FIELD_CONTENT, Arrays.asList("<s/>"), | |
124 | + Arrays.asList("<s/> intersecting [pos=\"ADJ\"]", | |
125 | + "<s/> !intersecting [pos=\"ADJ\"]")); | |
126 | + indexReader.close(); | |
127 | + } | |
128 | + | |
129 | + @org.junit.Test | |
130 | + public void basicSearchWithin() throws IOException { | |
131 | + IndexReader indexReader = DirectoryReader.open(directory); | |
132 | + testNumberOfHits(indexReader, FIELD_CONTENT, Arrays.asList("[]"), | |
133 | + Arrays.asList("[] within <s/>")); | |
134 | + indexReader.close(); | |
135 | + } | |
136 | + | |
137 | + @org.junit.Test | |
138 | + public void basicSearchIgnore() throws IOException, ParseException { | |
139 | + int ignoreNumber = 10; | |
140 | + String cql1 = "[pos=\"LID\"][pos=\"ADJ\"]{0," + ignoreNumber | |
141 | + + "}[pos=\"N\"]"; | |
142 | + String cql2 = "[pos=\"LID\"][pos=\"N\"]"; | |
143 | + String cql2ignore = "[pos=\"ADJ\"]"; | |
144 | + // get total number of nouns | |
145 | + IndexReader indexReader = DirectoryReader.open(directory); | |
146 | + QueryResult queryResult1 = doQuery(indexReader, FIELD_CONTENT, cql1, null, | |
147 | + null, null); | |
148 | + MtasSpanQuery ignore = createQuery(FIELD_CONTENT, cql2ignore, null, null); | |
149 | + QueryResult queryResult2 = doQuery(indexReader, FIELD_CONTENT, cql2, ignore, | |
150 | + ignoreNumber, null); | |
151 | + assertEquals("Article followed by Noun ignoring Adjectives", | |
152 | + queryResult1.hits, queryResult2.hits); | |
153 | + indexReader.close(); | |
154 | + } | |
155 | + | |
156 | + @org.junit.Test | |
157 | + public void collectStatsPositions1() throws IOException { | |
158 | + // get total number of words | |
159 | + IndexReader indexReader = DirectoryReader.open(directory); | |
160 | + QueryResult queryResult = doQuery(indexReader, FIELD_CONTENT, "[]", null, | |
161 | + null, null); | |
162 | + indexReader.close(); | |
163 | + int averageNumberOfPositions = Math | |
164 | + .round(queryResult.hits / queryResult.docs); | |
165 | + // do position query | |
166 | + try { | |
167 | + ArrayList<Integer> fullDocSet = new ArrayList<Integer>( | |
168 | + Arrays.asList(IntStream.rangeClosed(0, files.size() - 1).boxed() | |
169 | + .toArray(Integer[]::new))); | |
170 | + ComponentField fieldStats = new ComponentField(FIELD_CONTENT, FIELD_ID); | |
171 | + fieldStats.statsPositionList.add( | |
172 | + new ComponentPosition(FIELD_CONTENT, "total", null, null, "all")); | |
173 | + fieldStats.statsPositionList.add(new ComponentPosition(FIELD_CONTENT, | |
174 | + "minimum", (double) (averageNumberOfPositions - 1), null, | |
175 | + "n,sum,mean,min,max")); | |
176 | + fieldStats.statsPositionList.add(new ComponentPosition(FIELD_CONTENT, | |
177 | + "maximum", null, (double) averageNumberOfPositions, "sum")); | |
178 | + HashMap<String, HashMap<String, Object>> response = doAdvancedSearch( | |
179 | + fullDocSet, fieldStats); | |
180 | + HashMap<String, Object> responseTotal = (HashMap<String, Object>) response | |
181 | + .get("statsPositions").get("total"); | |
182 | + HashMap<String, Object> responseMinimum = (HashMap<String, Object>) response | |
183 | + .get("statsPositions").get("minimum"); | |
184 | + HashMap<String, Object> responseMaximum = (HashMap<String, Object>) response | |
185 | + .get("statsPositions").get("maximum"); | |
186 | + Double total = responseTotal != null ? (Double) responseTotal.get("sum") | |
187 | + : 0; | |
188 | + Long totalMinimum = responseTotal != null | |
189 | + ? (Long) responseMinimum.get("sum") : 0; | |
190 | + Long totalMaximum = responseTotal != null | |
191 | + ? (Long) responseMaximum.get("sum") : 0; | |
192 | + assertEquals("Number of positions", new Long(total.longValue()), | |
193 | + new Long(queryResult.hits)); | |
194 | + assertEquals("Minimum and maximum on number of positions", | |
195 | + new Long(total.longValue()), new Long(totalMinimum + totalMaximum)); | |
196 | + } catch (mtas.parser.function.ParseException e) { | |
197 | + e.printStackTrace(); | |
198 | + } | |
199 | + } | |
200 | + | |
201 | + @org.junit.Test | |
202 | + public void collectStatsPositions2() throws IOException { | |
203 | + ArrayList<Integer> fullDocSet = new ArrayList<Integer>( | |
204 | + Arrays.asList(IntStream.rangeClosed(0, files.size() - 1).boxed() | |
205 | + .toArray(Integer[]::new))); | |
206 | + try { | |
207 | + // compute total | |
208 | + ComponentField fieldStats = new ComponentField(FIELD_CONTENT, FIELD_ID); | |
209 | + fieldStats.statsPositionList.add(new ComponentPosition(FIELD_CONTENT, | |
210 | + "total", null, null, "n,sum,min,max")); | |
211 | + HashMap<String, HashMap<String, Object>> response = doAdvancedSearch( | |
212 | + fullDocSet, fieldStats); | |
213 | + HashMap<String, Object> responseTotal = (HashMap<String, Object>) response | |
214 | + .get("statsPositions").get("total"); | |
215 | + Long docs = responseTotal != null ? (Long) responseTotal.get("n") : 0; | |
216 | + Long total = responseTotal != null ? (Long) responseTotal.get("sum") : 0; | |
217 | + Long minimum = responseTotal != null ? (Long) responseTotal.get("min") | |
218 | + : 0; | |
219 | + Long maximum = responseTotal != null ? (Long) responseTotal.get("max") | |
220 | + : 0; | |
221 | + // compute for each doc | |
222 | + Long subDocs = Long.valueOf(0), subTotal = Long.valueOf(0), | |
223 | + subMinimum = null, subMaximum = null; | |
224 | + ArrayList<Integer> subDocSet = new ArrayList<Integer>(); | |
225 | + for (Integer docId : fullDocSet) { | |
226 | + subDocSet.add(docId); | |
227 | + fieldStats = new ComponentField(FIELD_CONTENT, FIELD_ID); | |
228 | + fieldStats.statsPositionList.add(new ComponentPosition(FIELD_CONTENT, | |
229 | + "total", null, null, "n,sum,min,max")); | |
230 | + response = doAdvancedSearch(subDocSet, fieldStats); | |
231 | + responseTotal = (HashMap<String, Object>) response.get("statsPositions") | |
232 | + .get("total"); | |
233 | + subDocs += responseTotal != null ? (Long) responseTotal.get("n") : 0; | |
234 | + subTotal += responseTotal != null ? (Long) responseTotal.get("sum") : 0; | |
235 | + if (subMinimum == null) { | |
236 | + subMinimum = responseTotal != null ? (Long) responseTotal.get("sum") | |
237 | + : null; | |
238 | + } else if (responseTotal != null) { | |
239 | + subMinimum = Math.min(subMinimum, (Long) responseTotal.get("sum")); | |
240 | + } | |
241 | + if (subMaximum == null) { | |
242 | + subMaximum = responseTotal != null ? (Long) responseTotal.get("sum") | |
243 | + : null; | |
244 | + } else if (responseTotal != null) { | |
245 | + subMaximum = Math.max(subMaximum, (Long) responseTotal.get("sum")); | |
246 | + } | |
247 | + subDocSet.clear(); | |
248 | + } | |
249 | + assertEquals("Number of docs", docs, Long.valueOf(files.size())); | |
250 | + assertEquals("Number of docs", docs, subDocs); | |
251 | + assertEquals("Total position", total, subTotal); | |
252 | + assertEquals("Minimum positions", minimum, subMinimum); | |
253 | + assertEquals("Maximum positions", maximum, subMaximum); | |
254 | + } catch (mtas.parser.function.ParseException e) { | |
255 | + e.printStackTrace(); | |
256 | + } | |
257 | + } | |
258 | + | |
259 | + @org.junit.Test | |
260 | + public void collectStatsTokens() throws IOException { | |
261 | + ArrayList<Integer> fullDocSet = new ArrayList<Integer>( | |
262 | + Arrays.asList(IntStream.rangeClosed(0, files.size() - 1).boxed() | |
263 | + .toArray(Integer[]::new))); | |
264 | + try { | |
265 | + // compute total | |
266 | + ComponentField fieldStats = new ComponentField(FIELD_CONTENT, FIELD_ID); | |
267 | + fieldStats.statsTokenList.add(new ComponentToken(FIELD_CONTENT, "total", | |
268 | + null, null, "n,sum,min,max")); | |
269 | + HashMap<String, HashMap<String, Object>> response = doAdvancedSearch( | |
270 | + fullDocSet, fieldStats); | |
271 | + HashMap<String, Object> responseTotal = (HashMap<String, Object>) response | |
272 | + .get("statsTokens").get("total"); | |
273 | + Long docs = responseTotal != null ? (Long) responseTotal.get("n") : 0; | |
274 | + Long total = responseTotal != null ? (Long) responseTotal.get("sum") : 0; | |
275 | + Long minimum = responseTotal != null ? (Long) responseTotal.get("min") | |
276 | + : 0; | |
277 | + Long maximum = responseTotal != null ? (Long) responseTotal.get("max") | |
278 | + : 0; | |
279 | + // compute for each doc | |
280 | + Long subDocs = Long.valueOf(0), subTotal = Long.valueOf(0), | |
281 | + subMinimum = null, subMaximum = null; | |
282 | + ArrayList<Integer> subDocSet = new ArrayList<Integer>(); | |
283 | + for (Integer docId : fullDocSet) { | |
284 | + subDocSet.add(docId); | |
285 | + fieldStats = new ComponentField(FIELD_CONTENT, FIELD_ID); | |
286 | + fieldStats.statsTokenList.add(new ComponentToken(FIELD_CONTENT, "total", | |
287 | + null, null, "n,sum,min,max")); | |
288 | + response = doAdvancedSearch(subDocSet, fieldStats); | |
289 | + responseTotal = (HashMap<String, Object>) response.get("statsTokens") | |
290 | + .get("total"); | |
291 | + subDocs += responseTotal != null ? (Long) responseTotal.get("n") : 0; | |
292 | + subTotal += responseTotal != null ? (Long) responseTotal.get("sum") : 0; | |
293 | + if (subMinimum == null) { | |
294 | + subMinimum = responseTotal != null ? (Long) responseTotal.get("sum") | |
295 | + : null; | |
296 | + } else if (responseTotal != null) { | |
297 | + subMinimum = Math.min(subMinimum, (Long) responseTotal.get("sum")); | |
298 | + } | |
299 | + if (subMaximum == null) { | |
300 | + subMaximum = responseTotal != null ? (Long) responseTotal.get("sum") | |
301 | + : null; | |
302 | + } else if (responseTotal != null) { | |
303 | + subMaximum = Math.max(subMaximum, (Long) responseTotal.get("sum")); | |
304 | + } | |
305 | + subDocSet.clear(); | |
306 | + } | |
307 | + assertEquals("Number of docs", docs, Long.valueOf(files.size())); | |
308 | + assertEquals("Number of docs", docs, subDocs); | |
309 | + assertEquals("Total position", total, subTotal); | |
310 | + assertEquals("Minimum positions", minimum, subMinimum); | |
311 | + assertEquals("Maximum positions", maximum, subMaximum); | |
312 | + } catch (mtas.parser.function.ParseException e) { | |
313 | + e.printStackTrace(); | |
314 | + } | |
315 | + } | |
316 | + | |
317 | + @org.junit.Test | |
318 | + public void collectStatsSpans() throws IOException { | |
319 | + String cql1 = "[pos=\"N\"]"; | |
320 | + String cql2 = "[pos=\"LID\"]"; | |
321 | + String cql3 = "[pos=\"N\" | pos=\"LID\"]"; | |
322 | + // get total number of nouns | |
323 | + IndexReader indexReader = DirectoryReader.open(directory); | |
324 | + QueryResult queryResult1 = doQuery(indexReader, FIELD_CONTENT, cql1, null, | |
325 | + null, null); | |
326 | + QueryResult queryResult2 = doQuery(indexReader, FIELD_CONTENT, cql2, null, | |
327 | + null, null); | |
328 | + QueryResult queryResult3 = doQuery(indexReader, FIELD_CONTENT, cql3, null, | |
329 | + null, null); | |
330 | + indexReader.close(); | |
331 | + int averageNumberOfPositions = Math | |
332 | + .round(queryResult1.hits / queryResult1.docs); | |
333 | + // do stats query for nouns | |
334 | + try { | |
335 | + ArrayList<Integer> fullDocSet = new ArrayList<Integer>( | |
336 | + Arrays.asList(IntStream.rangeClosed(0, files.size() - 1).boxed() | |
337 | + .toArray(Integer[]::new))); | |
338 | + ComponentField fieldStats = new ComponentField(FIELD_CONTENT, FIELD_ID); | |
339 | + MtasSpanQuery q1 = createQuery(FIELD_CONTENT, cql1, null, null); | |
340 | + MtasSpanQuery q2 = createQuery(FIELD_CONTENT, cql2, null, null); | |
341 | + MtasSpanQuery q3 = createQuery(FIELD_CONTENT, cql3, null, null); | |
342 | + MtasSpanQuery[] queries1 = new MtasSpanQuery[] { q1 }; | |
343 | + MtasSpanQuery[] queries2 = new MtasSpanQuery[] { q2 }; | |
344 | + MtasSpanQuery[] queries12 = new MtasSpanQuery[] { q1, q2 }; | |
345 | + MtasSpanQuery[] queries3 = new MtasSpanQuery[] { q3 }; | |
346 | + fieldStats.spanQueryList.add(q1); | |
347 | + fieldStats.spanQueryList.add(q2); | |
348 | + fieldStats.spanQueryList.add(q3); | |
349 | + fieldStats.statsSpanList.add(new ComponentSpan(queries1, "total1", null, | |
350 | + null, "all", null, null, null)); | |
351 | + fieldStats.statsSpanList.add(new ComponentSpan(queries1, "minimum1", | |
352 | + (double) (averageNumberOfPositions - 1), null, "n,sum,mean,min,max", | |
353 | + null, null, null)); | |
354 | + fieldStats.statsSpanList.add(new ComponentSpan(queries1, "maximum1", null, | |
355 | + (double) averageNumberOfPositions, "sum", null, null, null)); | |
356 | + fieldStats.statsSpanList.add(new ComponentSpan(queries2, "total2", null, | |
357 | + null, "sum", null, null, null)); | |
358 | + fieldStats.statsSpanList.add(new ComponentSpan(queries12, "total12", null, | |
359 | + null, "sum", new String[] { "difference12" }, | |
360 | + new String[] { "$q0-$q1" }, new String[] { "sum" })); | |
361 | + fieldStats.statsSpanList.add(new ComponentSpan(queries3, "total3", null, | |
362 | + null, "sum", null, null, null)); | |
363 | + HashMap<String, HashMap<String, Object>> response = doAdvancedSearch( | |
364 | + fullDocSet, fieldStats); | |
365 | + HashMap<String, Object> responseTotal1 = (HashMap<String, Object>) response | |
366 | + .get("statsSpans").get("total1"); | |
367 | + HashMap<String, Object> responseTotal2 = (HashMap<String, Object>) response | |
368 | + .get("statsSpans").get("total2"); | |
369 | + HashMap<String, Object> responseTotal12 = (HashMap<String, Object>) response | |
370 | + .get("statsSpans").get("total12"); | |
371 | + HashMap<String, HashMap<String, Object>> responseFunctionsTotal12 = (HashMap<String, HashMap<String, Object>>) response | |
372 | + .get("statsSpansFunctions").get("total12"); | |
373 | + HashMap<String, Object> responseTotal3 = (HashMap<String, Object>) response | |
374 | + .get("statsSpans").get("total3"); | |
375 | + HashMap<String, Object> responseMinimum1 = (HashMap<String, Object>) response | |
376 | + .get("statsSpans").get("minimum1"); | |
377 | + HashMap<String, Object> responseMaximum1 = (HashMap<String, Object>) response | |
378 | + .get("statsSpans").get("maximum1"); | |
379 | + Double total1 = responseTotal1 != null | |
380 | + ? (Double) responseTotal1.get("sum") : 0; | |
381 | + Long total2 = responseTotal2 != null ? (Long) responseTotal2.get("sum") | |
382 | + : 0; | |
383 | + Long total12 = responseTotal12 != null ? (Long) responseTotal12.get("sum") | |
384 | + : 0; | |
385 | + Long total3 = responseTotal3 != null ? (Long) responseTotal3.get("sum") | |
386 | + : 0; | |
387 | + Long difference12 = responseFunctionsTotal12 != null | |
388 | + ? (Long) responseFunctionsTotal12.get("difference12").get("sum") : 0; | |
389 | + Long totalMinimum1 = responseTotal1 != null | |
390 | + ? (Long) responseMinimum1.get("sum") : 0; | |
391 | + Long totalMaximum1 = responseTotal1 != null | |
392 | + ? (Long) responseMaximum1.get("sum") : 0; | |
393 | + assertEquals("Number of nouns", new Long(total1.longValue()), | |
394 | + new Long(queryResult1.hits)); | |
395 | + assertEquals("Number of articles", new Long(total2.longValue()), | |
396 | + new Long(queryResult2.hits)); | |
397 | + assertEquals("Number of nouns and articles - external 1", | |
398 | + new Long(total12.longValue()), | |
399 | + new Long(queryResult1.hits + queryResult2.hits)); | |
400 | + assertEquals("Number of nouns and articles - external 2", | |
401 | + new Long(total12.longValue()), new Long(queryResult3.hits)); | |
402 | + assertEquals("Number of nouns and articles - internal", | |
403 | + new Long(total12.longValue()), new Long(total3.longValue())); | |
404 | + assertEquals("Number of nouns and articles - functions", | |
405 | + new Long(difference12.longValue()), | |
406 | + new Long(queryResult1.hits - queryResult2.hits)); | |
407 | + assertEquals("Minimum and maximum on number of positions nouns", | |
408 | + new Long(total1.longValue()), | |
409 | + new Long(totalMinimum1 + totalMaximum1)); | |
410 | + } catch (mtas.parser.function.ParseException | ParseException e) { | |
411 | + e.printStackTrace(); | |
412 | + } | |
413 | + } | |
414 | + | |
415 | + @org.junit.Test | |
416 | + public void collectGroup() throws IOException { | |
417 | + String cql = "[pos=\"LID\"]"; | |
418 | + try { | |
419 | + ArrayList<Integer> fullDocSet = new ArrayList<Integer>( | |
420 | + Arrays.asList(IntStream.rangeClosed(0, files.size() - 1).boxed() | |
421 | + .toArray(Integer[]::new))); | |
422 | + ComponentField fieldStats = new ComponentField(FIELD_CONTENT, FIELD_ID); | |
423 | + MtasSpanQuery q = createQuery(FIELD_CONTENT, cql, null, null); | |
424 | + fieldStats.spanQueryList.add(q); | |
425 | + fieldStats.statsSpanList.add(new ComponentSpan(new MtasSpanQuery[]{q}, "total", null, | |
426 | + null, "sum", null, null, null)); | |
427 | + fieldStats.groupList.add(new ComponentGroup(q, FIELD_CONTENT, cql, "cql", | |
428 | + null, null, "articles", Integer.MAX_VALUE, "t_lc", null, null, null, null, null, null, | |
429 | + null, null, null, null, null, null)); | |
430 | + HashMap<String, HashMap<String, Object>> response = doAdvancedSearch( | |
431 | + fullDocSet, fieldStats); | |
432 | + ArrayList<HashMap<String,Object>> list = (ArrayList<HashMap<String, Object>>) response.get("group").get("articles"); | |
433 | + DirectoryReader indexReader = DirectoryReader.open(directory); | |
434 | + IndexSearcher searcher = new IndexSearcher(indexReader); | |
435 | + int subTotal = 0; | |
436 | + for(HashMap<String, Object> listItem: list) { | |
437 | + HashMap<String, HashMap<Integer, HashMap<String, String>[]>> group = (HashMap<String, HashMap<Integer, HashMap<String, String>[]>>) listItem.get("group"); | |
438 | + HashMap<Integer, HashMap<String, String>[]> hitList = group.get("hit"); | |
439 | + HashMap<String, String> hitListItem = hitList.get(0)[0]; | |
440 | + cql = "[pos=\"LID\" & "+hitListItem.get("prefix")+"=\""+hitListItem.get("value")+"\"]"; | |
441 | + QueryResult queryResult = doQuery(indexReader, FIELD_CONTENT, cql, null, null, null); | |
442 | + assertEquals("number of hits for articles equals to "+hitListItem.get("value"),listItem.get("sum"), Long.valueOf(queryResult.hits)); | |
443 | + subTotal+=queryResult.hits; | |
444 | + } | |
445 | + HashMap<String, Object> responseTotal = (HashMap<String, Object>) response | |
446 | + .get("statsSpans").get("total"); | |
447 | + Long total = responseTotal != null | |
448 | + ? (Long) responseTotal.get("sum") : 0; | |
449 | + assertEquals("Total number of articles",total, Long.valueOf(subTotal)); | |
450 | + indexReader.close(); | |
451 | + } catch (ParseException | mtas.parser.function.ParseException e) { | |
452 | + e.printStackTrace(); | |
453 | + } | |
454 | + } | |
455 | + | |
456 | + private HashMap<String, HashMap<String, Object>> doAdvancedSearch( | |
457 | + ArrayList<Integer> fullDocSet, ComponentField fieldStats) { | |
458 | + HashMap<String, HashMap<String, Object>> response = new HashMap<String, HashMap<String, Object>>(); | |
459 | + IndexReader indexReader; | |
460 | + try { | |
461 | + indexReader = DirectoryReader.open(directory); | |
462 | + IndexSearcher searcher = new IndexSearcher(indexReader); | |
463 | + ArrayList<Integer> fullDocList = new ArrayList<Integer>(); | |
464 | + CodecUtil.collect(FIELD_CONTENT, searcher, indexReader, fullDocList, | |
465 | + fullDocSet, fieldStats); | |
466 | + // add stats - position | |
467 | + response.put("statsPositions", new HashMap<String, Object>()); | |
468 | + for (ComponentPosition cp : fieldStats.statsPositionList) { | |
469 | + response.get("statsPositions").put(cp.key, | |
470 | + cp.dataCollector.getResult().getData().rewrite(false)); | |
471 | + } | |
472 | + response.put("statsTokens", new HashMap<String, Object>()); | |
473 | + for (ComponentToken ct : fieldStats.statsTokenList) { | |
474 | + response.get("statsTokens").put(ct.key, | |
475 | + ct.dataCollector.getResult().getData().rewrite(false)); | |
476 | + } | |
477 | + response.put("statsSpans", new HashMap<String, Object>()); | |
478 | + response.put("statsSpansFunctions", new HashMap<String, Object>()); | |
479 | + for (ComponentSpan cs : fieldStats.statsSpanList) { | |
480 | + response.get("statsSpans").put(cs.key, | |
481 | + cs.dataCollector.getResult().getData().rewrite(false)); | |
482 | + HashMap<String, Object> functions = new HashMap<String, Object>(); | |
483 | + response.get("statsSpansFunctions").put(cs.key, functions); | |
484 | + for (SubComponentFunction scf : cs.functions) { | |
485 | + functions.put(scf.key, | |
486 | + scf.dataCollector.getResult().getData().rewrite(false)); | |
487 | + } | |
488 | + } | |
489 | + response.put("group", new HashMap<String, Object>()); | |
490 | + for (ComponentGroup cg : fieldStats.groupList) { | |
491 | + SortedMap<String, ?> list = cg.dataCollector.getResult().getList(); | |
492 | + ArrayList<HashMap<String, Object>> groupList = new ArrayList<HashMap<String, Object>>(); | |
493 | + for(String key : list.keySet()) { | |
494 | + HashMap<String, Object> subList = new HashMap<String, Object>(); | |
495 | + StringBuilder newKey = new StringBuilder(""); | |
496 | + subList.put("group", GroupHit.keyToObject(key, newKey)); | |
497 | + subList.put("key", newKey.toString().trim()); | |
498 | + subList.putAll(((MtasDataItem<?,?>) list.get(key)).rewrite(false)); | |
499 | + groupList.add(subList); | |
500 | + } | |
501 | + response.get("group").put(cg.key, groupList); | |
502 | + } | |
503 | + indexReader.close(); | |
504 | + } catch (IOException | IllegalAccessException | IllegalArgumentException | |
505 | + | InvocationTargetException e) { | |
506 | + // TODO Auto-generated catch block | |
507 | + e.printStackTrace(); | |
508 | + } | |
509 | + return response; | |
510 | + } | |
511 | + | |
512 | + private static void createIndex(String configFile, | |
513 | + HashMap<String, String> files) throws IOException { | |
514 | + // analyzer | |
515 | + Map<String, String> paramsCharFilterMtas = new HashMap<String, String>(); | |
516 | + paramsCharFilterMtas.put("type", "file"); | |
517 | + Map<String, String> paramsTokenizer = new HashMap<String, String>(); | |
518 | + paramsTokenizer.put("configFile", configFile); | |
519 | + Analyzer mtasAnalyzer = CustomAnalyzer | |
520 | + .builder(Paths.get("docker").toAbsolutePath()) | |
521 | + .addCharFilter("mtas", paramsCharFilterMtas) | |
522 | + .withTokenizer("mtas", paramsTokenizer).build(); | |
523 | + Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); | |
524 | + analyzerPerField.put(FIELD_CONTENT, mtasAnalyzer); | |
525 | + PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( | |
526 | + new StandardAnalyzer(), analyzerPerField); | |
527 | + // indexwriter | |
528 | + IndexWriterConfig config = new IndexWriterConfig(analyzer); | |
529 | + config.setUseCompoundFile(false); | |
530 | + config.setCodec(Codec.forName("MtasCodec")); | |
531 | + IndexWriter w = new IndexWriter(directory, config); | |
532 | + // delete | |
533 | + w.deleteAll(); | |
534 | + // add | |
535 | + int counter = 0; | |
536 | + for (String title : files.keySet()) { | |
537 | + addDoc(w, title, files.get(title)); | |
538 | + if (counter > 0) { | |
539 | + w.commit(); | |
540 | + } | |
541 | + counter++; | |
542 | + } | |
543 | + w.commit(); | |
544 | + // finish | |
545 | + w.close(); | |
546 | + } | |
547 | + | |
548 | + private static void addDoc(IndexWriter w, String title, String file) | |
549 | + throws IOException { | |
550 | + try { | |
551 | + Document doc = new Document(); | |
552 | + doc.add(new StringField(FIELD_TITLE, title, Field.Store.YES)); | |
553 | + doc.add(new TextField(FIELD_CONTENT, file, Field.Store.YES)); | |
554 | + w.addDocument(doc); | |
555 | + } catch (Exception e) { | |
556 | + System.out.println("Couldn't add " + title + " (" + file + ")"); | |
557 | + e.printStackTrace(); | |
558 | + } | |
559 | + } | |
560 | + | |
561 | + private MtasSpanQuery createQuery(String field, String cql, | |
562 | + MtasSpanQuery ignore, Integer maximumIgnoreLength) throws ParseException { | |
563 | + Reader reader = new BufferedReader(new StringReader(cql)); | |
564 | + MtasCQLParser p = new MtasCQLParser(reader); | |
565 | + return p.parse(field, null, null, ignore, maximumIgnoreLength); | |
566 | + } | |
567 | + | |
568 | + private QueryResult doQuery(IndexReader indexReader, String field, String cql, | |
569 | + MtasSpanQuery ignore, Integer maximumIgnoreLength, | |
570 | + ArrayList<String> prefixes) throws IOException { | |
571 | + QueryResult queryResult = new QueryResult(); | |
572 | + try { | |
573 | + MtasSpanQuery q = createQuery(field, cql, ignore, maximumIgnoreLength); | |
574 | + queryResult.query = q.toString(field); | |
575 | + ListIterator<LeafReaderContext> iterator = indexReader.leaves() | |
576 | + .listIterator(); | |
577 | + IndexSearcher searcher = new IndexSearcher(indexReader); | |
578 | + SpanWeight spanweight = ((MtasSpanQuery) q.rewrite(indexReader)) | |
579 | + .createWeight(searcher, false); | |
580 | + | |
581 | + while (iterator.hasNext()) { | |
582 | + LeafReaderContext lrc = iterator.next(); | |
583 | + Spans spans = spanweight.getSpans(lrc, SpanWeight.Postings.POSITIONS); | |
584 | + SegmentReader r = (SegmentReader) lrc.reader(); | |
585 | + Terms t = r.terms(field); | |
586 | + CodecInfo mtasCodecInfo = CodecInfo.getCodecInfoFromTerms(t); | |
587 | + if (spans != null) { | |
588 | + while (spans.nextDoc() != Spans.NO_MORE_DOCS) { | |
589 | + queryResult.docs++; | |
590 | + while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { | |
591 | + queryResult.hits++; | |
592 | + if (prefixes != null && prefixes.size() > 0) { | |
593 | + ArrayList<MtasTreeHit<String>> terms = mtasCodecInfo | |
594 | + .getPositionedTermsByPrefixesAndPositionRange(field, | |
595 | + spans.docID(), prefixes, spans.startPosition(), | |
596 | + (spans.endPosition() - 1)); | |
597 | + for (MtasTreeHit<String> term : terms) { | |
598 | + queryResult.resultList.add(new QueryHit( | |
599 | + lrc.docBase + spans.docID(), term.startPosition, | |
600 | + term.endPosition, CodecUtil.termPrefix(term.data), | |
601 | + CodecUtil.termValue(term.data))); | |
602 | + } | |
603 | + } | |
604 | + } | |
605 | + } | |
606 | + } | |
607 | + } | |
608 | + } catch (mtas.parser.cql.ParseException e) { | |
609 | + e.printStackTrace(); | |
610 | + } | |
611 | + return queryResult; | |
612 | + } | |
613 | + | |
614 | + private void testNumberOfHits(IndexReader indexReader, String field, | |
615 | + List<String> cqls1, List<String> cqls2) throws IOException { | |
616 | + Integer sum1 = 0, sum2 = 0; | |
617 | + QueryResult queryResult; | |
618 | + for (String cql1 : cqls1) { | |
619 | + queryResult = doQuery(indexReader, field, cql1, null, null, null); | |
620 | + sum1 += queryResult.hits; | |
621 | + } | |
622 | + for (String cql2 : cqls2) { | |
623 | + queryResult = doQuery(indexReader, field, cql2, null, null, null); | |
624 | + sum2 += queryResult.hits; | |
625 | + } | |
626 | + assertEquals(sum1, sum2); | |
627 | + } | |
628 | + | |
629 | + public class QueryResult { | |
630 | + | |
631 | + public String query; | |
632 | + public int docs; | |
633 | + public int hits; | |
634 | + public List<QueryHit> resultList; | |
635 | + | |
636 | + public QueryResult() { | |
637 | + docs = 0; | |
638 | + hits = 0; | |
639 | + resultList = new ArrayList<QueryHit>(); | |
640 | + } | |
641 | + | |
642 | + @Override | |
643 | + public String toString() { | |
644 | + StringBuilder buffer = new StringBuilder(); | |
645 | + buffer.append(docs + " document(s), "); | |
646 | + buffer.append(hits + " hit(s)"); | |
647 | + return buffer.toString(); | |
648 | + } | |
649 | + | |
650 | + @Override | |
651 | + public boolean equals(Object obj) { | |
652 | + if (this == obj) | |
653 | + return true; | |
654 | + if (obj == null) | |
655 | + return false; | |
656 | + if (getClass() != obj.getClass()) | |
657 | + return false; | |
658 | + QueryResult other = (QueryResult) obj; | |
659 | + return other.hits == hits && other.docs == docs; | |
660 | + } | |
661 | + | |
662 | + } | |
663 | + | |
664 | + public class QueryHit { | |
665 | + int docId, startposition, endPosition; | |
666 | + String prefix, value; | |
667 | + | |
668 | + public QueryHit(int docId, int startPosition, int endPosition, | |
669 | + String prefix, String value) { | |
670 | + this.docId = docId; | |
671 | + this.startposition = startPosition; | |
672 | + this.endPosition = endPosition; | |
673 | + this.prefix = prefix; | |
674 | + this.value = value; | |
675 | + } | |
676 | + } | |
677 | + | |
678 | +} | |
... | ... |
pom.xml
... | ... | @@ -5,7 +5,7 @@ |
5 | 5 | <maven.build.timestamp.format>yyyy-MM-dd HH:mm</maven.build.timestamp.format> |
6 | 6 | <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
7 | 7 | <currentDevelopmentVersion>6.4.1</currentDevelopmentVersion> |
8 | - <currentDevelopmentRelease>20170220</currentDevelopmentRelease> | |
8 | + <currentDevelopmentRelease>20170228</currentDevelopmentRelease> | |
9 | 9 | </properties> |
10 | 10 | <modelVersion>4.0.0</modelVersion> |
11 | 11 | <groupId>dev.meertens.mtas</groupId> |
... | ... | @@ -57,6 +57,7 @@ |
57 | 57 | <include>MtasCQLParserTestSentence.java</include> |
58 | 58 | <include>MtasCQLParserTestWord.java</include> |
59 | 59 | <include>MtasFunctionParserTest.java</include> |
60 | + <include>MtasSearchTestConsistency.java</include> | |
60 | 61 | </includes> |
61 | 62 | </configuration> |
62 | 63 | </plugin> |
... | ... |
src/mtas/analysis/MtasTokenizer.java
... | ... | @@ -228,7 +228,7 @@ public final class MtasTokenizer<T> extends Tokenizer { |
228 | 228 | private void constructTokenCollection(Reader reader) |
229 | 229 | throws MtasConfigException, MtasParserException { |
230 | 230 | tokenCollection = null; |
231 | - try { | |
231 | + try { | |
232 | 232 | Constructor<?> c = Class.forName(parserName) |
233 | 233 | .getDeclaredConstructor(MtasConfiguration.class); |
234 | 234 | // try { |
... | ... |
src/mtas/analysis/util/MtasFetchData.java
... | ... | @@ -116,7 +116,7 @@ public class MtasFetchData { |
116 | 116 | GZIPInputStream in; |
117 | 117 | try { |
118 | 118 | in = new GZIPInputStream(new FileInputStream(file)); |
119 | - return new InputStreamReader(in); | |
119 | + return new InputStreamReader(in); | |
120 | 120 | } catch (IOException e1) { |
121 | 121 | try { |
122 | 122 | String text = new String(Files.readAllBytes(Paths.get(file))); |
... | ... |
src/mtas/codec/util/CodecCollector.java
... | ... | @@ -110,7 +110,7 @@ public class CodecCollector { |
110 | 110 | HashMap<MtasSpanQuery, SpanWeight> spansQueryWeight) |
111 | 111 | throws IllegalAccessException, IllegalArgumentException, |
112 | 112 | InvocationTargetException, IOException { |
113 | - | |
113 | + | |
114 | 114 | HashMap<Integer, List<Integer>> docSets = new HashMap<Integer, List<Integer>>(); |
115 | 115 | |
116 | 116 | ListIterator<LeafReaderContext> iterator = reader.leaves().listIterator(); |
... | ... | @@ -591,7 +591,6 @@ public class CodecCollector { |
591 | 591 | createDocument(fieldInfo.documentList, docList, field, lrc.docBase, |
592 | 592 | fieldInfo.uniqueKeyField, searcher, t, r, lrc); |
593 | 593 | } |
594 | - | |
595 | 594 | if (fieldInfo.spanQueryList.size() > 0) { |
596 | 595 | if (fieldInfo.statsSpanList.size() > 0) { |
597 | 596 | // create stats |
... | ... |
src/mtas/codec/util/collector/MtasDataCollectorResult.java
... | ... | @@ -241,5 +241,14 @@ public class MtasDataCollectorResult<T1 extends Number & Comparable<T1>, T2 exte |
241 | 241 | throw new IOException("type " + collectorType + " not supported"); |
242 | 242 | } |
243 | 243 | } |
244 | + | |
245 | + @Override | |
246 | + public String toString() { | |
247 | + StringBuilder buffer = new StringBuilder(); | |
248 | + buffer.append(this.getClass().getSimpleName() + "("); | |
249 | + buffer.append(collectorType+","+sortType+","+sortDirection); | |
250 | + buffer.append(")"); | |
251 | + return buffer.toString(); | |
252 | + } | |
244 | 253 | |
245 | 254 | } |
... | ... |
src/mtas/parser/cql/util/MtasCQLParserGroupQuery.java
... | ... | @@ -12,7 +12,6 @@ import mtas.search.spans.util.MtasSpanQuery; |
12 | 12 | import org.apache.lucene.index.IndexReader; |
13 | 13 | import org.apache.lucene.index.Term; |
14 | 14 | import org.apache.lucene.search.IndexSearcher; |
15 | -import org.apache.lucene.search.Query; | |
16 | 15 | import org.apache.lucene.search.spans.SpanWeight; |
17 | 16 | |
18 | 17 | /** |
... | ... |
src/mtas/parser/cql/util/MtasCQLParserWordPositionQuery.java
... | ... | @@ -8,7 +8,6 @@ import mtas.search.spans.util.MtasSpanQuery; |
8 | 8 | import org.apache.lucene.index.IndexReader; |
9 | 9 | import org.apache.lucene.index.Term; |
10 | 10 | import org.apache.lucene.search.IndexSearcher; |
11 | -import org.apache.lucene.search.Query; | |
12 | 11 | import org.apache.lucene.search.spans.SpanWeight; |
13 | 12 | |
14 | 13 | /** |
... | ... |
src/mtas/parser/cql/util/MtasCQLParserWordQuery.java
... | ... | @@ -16,7 +16,6 @@ import mtas.search.spans.util.MtasSpanQuery; |
16 | 16 | import org.apache.lucene.index.IndexReader; |
17 | 17 | import org.apache.lucene.index.Term; |
18 | 18 | import org.apache.lucene.search.IndexSearcher; |
19 | -import org.apache.lucene.search.Query; | |
20 | 19 | import org.apache.lucene.search.spans.SpanWeight; |
21 | 20 | |
22 | 21 | /** |
... | ... |
src/mtas/search/spans/MtasSpanContainingQuery.java
... | ... | @@ -8,40 +8,76 @@ import org.apache.lucene.search.spans.SpanContainingQuery; |
8 | 8 | import org.apache.lucene.search.spans.SpanWeight; |
9 | 9 | import mtas.search.spans.util.MtasSpanQuery; |
10 | 10 | |
11 | +/** | |
12 | + * The Class MtasSpanContainingQuery. | |
13 | + */ | |
11 | 14 | public class MtasSpanContainingQuery extends MtasSpanQuery { |
12 | - | |
15 | + | |
13 | 16 | /** The base query. */ |
14 | 17 | private SpanContainingQuery baseQuery; |
15 | 18 | |
19 | + /** | |
20 | + * Instantiates a new mtas span containing query. | |
21 | + * | |
22 | + * @param q1 the q1 | |
23 | + * @param q2 the q2 | |
24 | + */ | |
16 | 25 | public MtasSpanContainingQuery(MtasSpanQuery q1, MtasSpanQuery q2) { |
17 | - super(); | |
26 | + super(); | |
18 | 27 | baseQuery = new SpanContainingQuery(q1, q2); |
19 | 28 | } |
20 | 29 | |
30 | + /* | |
31 | + * (non-Javadoc) | |
32 | + * | |
33 | + * @see org.apache.lucene.search.spans.SpanQuery#getField() | |
34 | + */ | |
21 | 35 | @Override |
22 | 36 | public String getField() { |
23 | 37 | return baseQuery.getField(); |
24 | 38 | } |
25 | 39 | |
40 | + /* | |
41 | + * (non-Javadoc) | |
42 | + * | |
43 | + * @see | |
44 | + * org.apache.lucene.search.spans.SpanQuery#createWeight(org.apache.lucene. | |
45 | + * search.IndexSearcher, boolean) | |
46 | + */ | |
26 | 47 | @Override |
27 | 48 | public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) |
28 | 49 | throws IOException { |
29 | 50 | return baseQuery.createWeight(searcher, needsScores); |
30 | 51 | } |
31 | 52 | |
53 | + /* | |
54 | + * (non-Javadoc) | |
55 | + * | |
56 | + * @see org.apache.lucene.search.Query#toString(java.lang.String) | |
57 | + */ | |
32 | 58 | @Override |
33 | 59 | public String toString(String field) { |
34 | 60 | return baseQuery.toString(field); |
35 | 61 | } |
36 | 62 | |
63 | + /* | |
64 | + * (non-Javadoc) | |
65 | + * | |
66 | + * @see mtas.search.spans.util.MtasSpanQuery#rewrite(org.apache.lucene.index. | |
67 | + * IndexReader) | |
68 | + */ | |
37 | 69 | @Override |
38 | 70 | public MtasSpanQuery rewrite(IndexReader reader) throws IOException { |
39 | 71 | baseQuery = (SpanContainingQuery) baseQuery.rewrite(reader); |
40 | 72 | return this; |
41 | - } | |
42 | - | |
43 | - | |
44 | - @Override | |
73 | + } | |
74 | + | |
75 | + /* | |
76 | + * (non-Javadoc) | |
77 | + * | |
78 | + * @see org.apache.lucene.search.Query#equals(java.lang.Object) | |
79 | + */ | |
80 | + @Override | |
45 | 81 | public boolean equals(Object obj) { |
46 | 82 | if (this == obj) |
47 | 83 | return true; |
... | ... | @@ -53,9 +89,16 @@ public class MtasSpanContainingQuery extends MtasSpanQuery { |
53 | 89 | return baseQuery.equals(that.baseQuery); |
54 | 90 | } |
55 | 91 | |
92 | + /* | |
93 | + * (non-Javadoc) | |
94 | + * | |
95 | + * @see org.apache.lucene.search.Query#hashCode() | |
96 | + */ | |
56 | 97 | @Override |
57 | 98 | public int hashCode() { |
58 | 99 | return baseQuery.hashCode(); |
59 | 100 | } |
101 | + | |
102 | + | |
60 | 103 | |
61 | 104 | } |
... | ... |
src/mtas/search/spans/MtasSpanIntersectingQuery.java
... | ... | @@ -17,16 +17,28 @@ import org.apache.lucene.search.spans.SpanWeight; |
17 | 17 | import org.apache.lucene.search.spans.Spans; |
18 | 18 | import mtas.search.spans.util.MtasSpanQuery; |
19 | 19 | |
20 | +/** | |
21 | + * The Class MtasSpanIntersectingQuery. | |
22 | + */ | |
20 | 23 | public class MtasSpanIntersectingQuery extends MtasSpanQuery { |
21 | 24 | |
25 | + /** The field. */ | |
22 | 26 | private String field; |
23 | 27 | |
28 | + /** The q2. */ | |
24 | 29 | private SpanQuery q1, q2; |
25 | 30 | |
31 | + /** | |
32 | + * Instantiates a new mtas span intersecting query. | |
33 | + * | |
34 | + * @param q1 | |
35 | + * the q1 | |
36 | + * @param q2 | |
37 | + * the q2 | |
38 | + */ | |
26 | 39 | public MtasSpanIntersectingQuery(MtasSpanQuery q1, MtasSpanQuery q2) { |
27 | - if (q1 != null) { | |
28 | - field = q1.getField(); | |
29 | - if (q2 != null && !q2.getField().equals(field)) { | |
40 | + if (q1 != null && (field = q1.getField())!=null) { | |
41 | + if (q2 != null && ((field==null && q2.getField()!=null) || !q2.getField().equals(field))) { | |
30 | 42 | throw new IllegalArgumentException("Clauses must have same field."); |
31 | 43 | } |
32 | 44 | } else if (q2 != null) { |
... | ... | @@ -38,11 +50,23 @@ public class MtasSpanIntersectingQuery extends MtasSpanQuery { |
38 | 50 | this.q2 = q2; |
39 | 51 | } |
40 | 52 | |
53 | + /* | |
54 | + * (non-Javadoc) | |
55 | + * | |
56 | + * @see org.apache.lucene.search.spans.SpanQuery#getField() | |
57 | + */ | |
41 | 58 | @Override |
42 | 59 | public String getField() { |
43 | 60 | return field; |
44 | 61 | } |
45 | 62 | |
63 | + /* | |
64 | + * (non-Javadoc) | |
65 | + * | |
66 | + * @see | |
67 | + * org.apache.lucene.search.spans.SpanQuery#createWeight(org.apache.lucene. | |
68 | + * search.IndexSearcher, boolean) | |
69 | + */ | |
46 | 70 | @Override |
47 | 71 | public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) |
48 | 72 | throws IOException { |
... | ... | @@ -53,15 +77,23 @@ public class MtasSpanIntersectingQuery extends MtasSpanQuery { |
53 | 77 | q1.createWeight(searcher, needsScores)); |
54 | 78 | MtasSpanIntersectingQueryWeight w2 = new MtasSpanIntersectingQueryWeight( |
55 | 79 | q2.createWeight(searcher, needsScores)); |
56 | - //subWeights | |
57 | - List<MtasSpanIntersectingQueryWeight> subWeights = new ArrayList<MtasSpanIntersectingQueryWeight>(); | |
80 | + // subWeights | |
81 | + List<MtasSpanIntersectingQueryWeight> subWeights = new ArrayList<MtasSpanIntersectingQueryWeight>(); | |
58 | 82 | subWeights.add(w1); |
59 | 83 | subWeights.add(w2); |
60 | - //return | |
61 | - return new SpanIntersectingWeight(w1, w2, searcher, needsScores ? getTermContexts(subWeights) : null); | |
84 | + // return | |
85 | + return new SpanIntersectingWeight(w1, w2, searcher, | |
86 | + needsScores ? getTermContexts(subWeights) : null); | |
62 | 87 | } |
63 | 88 | } |
64 | - | |
89 | + | |
90 | + /** | |
91 | + * Gets the term contexts. | |
92 | + * | |
93 | + * @param items | |
94 | + * the items | |
95 | + * @return the term contexts | |
96 | + */ | |
65 | 97 | protected Map<Term, TermContext> getTermContexts( |
66 | 98 | List<MtasSpanIntersectingQueryWeight> items) { |
67 | 99 | List<SpanWeight> weights = new ArrayList<SpanWeight>(); |
... | ... | @@ -71,6 +103,11 @@ public class MtasSpanIntersectingQuery extends MtasSpanQuery { |
71 | 103 | return getTermContexts(weights); |
72 | 104 | } |
73 | 105 | |
106 | + /* | |
107 | + * (non-Javadoc) | |
108 | + * | |
109 | + * @see org.apache.lucene.search.Query#toString(java.lang.String) | |
110 | + */ | |
74 | 111 | @Override |
75 | 112 | public String toString(String field) { |
76 | 113 | StringBuilder buffer = new StringBuilder(); |
... | ... | @@ -90,6 +127,11 @@ public class MtasSpanIntersectingQuery extends MtasSpanQuery { |
90 | 127 | return buffer.toString(); |
91 | 128 | } |
92 | 129 | |
130 | + /* | |
131 | + * (non-Javadoc) | |
132 | + * | |
133 | + * @see org.apache.lucene.search.Query#equals(java.lang.Object) | |
134 | + */ | |
93 | 135 | @Override |
94 | 136 | public boolean equals(Object obj) { |
95 | 137 | if (this == obj) |
... | ... | @@ -102,6 +144,11 @@ public class MtasSpanIntersectingQuery extends MtasSpanQuery { |
102 | 144 | return q1.equals(other.q1) && q2.equals(other.q2); |
103 | 145 | } |
104 | 146 | |
147 | + /* | |
148 | + * (non-Javadoc) | |
149 | + * | |
150 | + * @see org.apache.lucene.search.Query#hashCode() | |
151 | + */ | |
105 | 152 | @Override |
106 | 153 | public int hashCode() { |
107 | 154 | int h = Integer.rotateLeft(classHash(), 1); |
... | ... | @@ -110,35 +157,75 @@ public class MtasSpanIntersectingQuery extends MtasSpanQuery { |
110 | 157 | h ^= q2.hashCode(); |
111 | 158 | return h; |
112 | 159 | } |
113 | - | |
160 | + | |
161 | + /* | |
162 | + * (non-Javadoc) | |
163 | + * | |
164 | + * @see mtas.search.spans.util.MtasSpanQuery#rewrite(org.apache.lucene.index. | |
165 | + * IndexReader) | |
166 | + */ | |
114 | 167 | @Override |
115 | 168 | public MtasSpanQuery rewrite(IndexReader reader) throws IOException { |
116 | - MtasSpanQuery newQ1 = (MtasSpanQuery) q1.rewrite(reader); | |
117 | - MtasSpanQuery newQ2 = (MtasSpanQuery) q2.rewrite(reader); | |
118 | - if(newQ1!=q1 || newQ2!=q2) { | |
119 | - return new MtasSpanIntersectingQuery(newQ1, newQ2); | |
169 | + MtasSpanQuery newQ1 = (MtasSpanQuery) q1.rewrite(reader); | |
170 | + MtasSpanQuery newQ2 = (MtasSpanQuery) q2.rewrite(reader); | |
171 | + if (newQ1 != q1 || newQ2 != q2) { | |
172 | + return new MtasSpanIntersectingQuery(newQ1, newQ2); | |
120 | 173 | } else { |
121 | 174 | return this; |
122 | - } | |
123 | -} | |
175 | + } | |
176 | + } | |
124 | 177 | |
178 | + /** | |
179 | + * The Class SpanIntersectingWeight. | |
180 | + */ | |
125 | 181 | public class SpanIntersectingWeight extends SpanWeight { |
126 | - | |
127 | - MtasSpanIntersectingQueryWeight w1,w2; | |
128 | 182 | |
129 | - public SpanIntersectingWeight(MtasSpanIntersectingQueryWeight w1, MtasSpanIntersectingQueryWeight w2, IndexSearcher searcher, | |
183 | + /** The w2. */ | |
184 | + MtasSpanIntersectingQueryWeight w1, w2; | |
185 | + | |
186 | + /** | |
187 | + * Instantiates a new span intersecting weight. | |
188 | + * | |
189 | + * @param w1 | |
190 | + * the w1 | |
191 | + * @param w2 | |
192 | + * the w2 | |
193 | + * @param searcher | |
194 | + * the searcher | |
195 | + * @param terms | |
196 | + * the terms | |
197 | + * @throws IOException | |
198 | + * Signals that an I/O exception has occurred. | |
199 | + */ | |
200 | + public SpanIntersectingWeight(MtasSpanIntersectingQueryWeight w1, | |
201 | + MtasSpanIntersectingQueryWeight w2, IndexSearcher searcher, | |
130 | 202 | Map<Term, TermContext> terms) throws IOException { |
131 | 203 | super(MtasSpanIntersectingQuery.this, searcher, terms); |
132 | - this.w1=w1; | |
133 | - this.w2=w2; | |
204 | + this.w1 = w1; | |
205 | + this.w2 = w2; | |
134 | 206 | } |
135 | 207 | |
208 | + /* | |
209 | + * (non-Javadoc) | |
210 | + * | |
211 | + * @see | |
212 | + * org.apache.lucene.search.spans.SpanWeight#extractTermContexts(java.util. | |
213 | + * Map) | |
214 | + */ | |
136 | 215 | @Override |
137 | 216 | public void extractTermContexts(Map<Term, TermContext> contexts) { |
138 | 217 | w1.spanWeight.extractTermContexts(contexts); |
139 | 218 | w2.spanWeight.extractTermContexts(contexts); |
140 | 219 | } |
141 | 220 | |
221 | + /* | |
222 | + * (non-Javadoc) | |
223 | + * | |
224 | + * @see | |
225 | + * org.apache.lucene.search.spans.SpanWeight#getSpans(org.apache.lucene. | |
226 | + * index.LeafReaderContext, | |
227 | + * org.apache.lucene.search.spans.SpanWeight.Postings) | |
228 | + */ | |
142 | 229 | @Override |
143 | 230 | public Spans getSpans(LeafReaderContext context, Postings requiredPostings) |
144 | 231 | throws IOException { |
... | ... | @@ -146,12 +233,19 @@ public class MtasSpanIntersectingQuery extends MtasSpanQuery { |
146 | 233 | if (terms == null) { |
147 | 234 | return null; // field does not exist |
148 | 235 | } |
149 | - MtasSpanIntersectingQuerySpans s1 = new MtasSpanIntersectingQuerySpans(w1.spanWeight.getSpans(context, requiredPostings)); | |
150 | - MtasSpanIntersectingQuerySpans s2 = new MtasSpanIntersectingQuerySpans(w2.spanWeight.getSpans(context, requiredPostings)); | |
151 | - return new MtasSpanIntersectingSpans(MtasSpanIntersectingQuery.this, | |
152 | - s1, s2); | |
236 | + MtasSpanIntersectingQuerySpans s1 = new MtasSpanIntersectingQuerySpans( | |
237 | + w1.spanWeight.getSpans(context, requiredPostings)); | |
238 | + MtasSpanIntersectingQuerySpans s2 = new MtasSpanIntersectingQuerySpans( | |
239 | + w2.spanWeight.getSpans(context, requiredPostings)); | |
240 | + return new MtasSpanIntersectingSpans(MtasSpanIntersectingQuery.this, s1, | |
241 | + s2); | |
153 | 242 | } |
154 | 243 | |
244 | + /* | |
245 | + * (non-Javadoc) | |
246 | + * | |
247 | + * @see org.apache.lucene.search.Weight#extractTerms(java.util.Set) | |
248 | + */ | |
155 | 249 | @Override |
156 | 250 | public void extractTerms(Set<Term> terms) { |
157 | 251 | w1.spanWeight.extractTerms(terms); |
... | ... | @@ -160,20 +254,40 @@ public class MtasSpanIntersectingQuery extends MtasSpanQuery { |
160 | 254 | |
161 | 255 | } |
162 | 256 | |
257 | + /** | |
258 | + * The Class MtasSpanIntersectingQuerySpans. | |
259 | + */ | |
163 | 260 | public class MtasSpanIntersectingQuerySpans { |
261 | + | |
262 | + /** The spans. */ | |
164 | 263 | public Spans spans; |
165 | - | |
264 | + | |
265 | + /** | |
266 | + * Instantiates a new mtas span intersecting query spans. | |
267 | + * | |
268 | + * @param spans | |
269 | + * the spans | |
270 | + */ | |
166 | 271 | public MtasSpanIntersectingQuerySpans(Spans spans) { |
167 | 272 | this.spans = spans; |
168 | 273 | } |
169 | - | |
274 | + | |
170 | 275 | } |
171 | - | |
276 | + | |
277 | + /** | |
278 | + * The Class MtasSpanIntersectingQueryWeight. | |
279 | + */ | |
172 | 280 | public class MtasSpanIntersectingQueryWeight { |
173 | 281 | |
174 | 282 | /** The span weight. */ |
175 | 283 | public SpanWeight spanWeight; |
176 | 284 | |
285 | + /** | |
286 | + * Instantiates a new mtas span intersecting query weight. | |
287 | + * | |
288 | + * @param spanWeight | |
289 | + * the span weight | |
290 | + */ | |
177 | 291 | public MtasSpanIntersectingQueryWeight(SpanWeight spanWeight) { |
178 | 292 | this.spanWeight = spanWeight; |
179 | 293 | } |
... | ... |
src/mtas/search/spans/MtasSpanIntersectingSpans.java
... | ... | @@ -7,16 +7,30 @@ import org.apache.lucene.search.spans.Spans; |
7 | 7 | import mtas.search.spans.MtasSpanIntersectingQuery.MtasSpanIntersectingQuerySpans; |
8 | 8 | import mtas.search.spans.util.MtasSpans; |
9 | 9 | |
10 | +/** | |
11 | + * The Class MtasSpanIntersectingSpans. | |
12 | + */ | |
10 | 13 | public class MtasSpanIntersectingSpans extends Spans implements MtasSpans { |
11 | 14 | |
15 | + /** The spans2. */ | |
12 | 16 | private MtasSpanIntersectingQuerySpans spans1, spans2; |
13 | 17 | |
18 | + /** The no more positions. */ | |
14 | 19 | private boolean calledNextStartPosition, noMorePositions; |
15 | 20 | |
21 | + /** The last spans2 end position. */ | |
16 | 22 | private int lastSpans2StartPosition, lastSpans2EndPosition; |
17 | - | |
23 | + | |
24 | + /** The doc id. */ | |
18 | 25 | private int docId; |
19 | 26 | |
27 | + /** | |
28 | + * Instantiates a new mtas span intersecting spans. | |
29 | + * | |
30 | + * @param mtasSpanIntersectingQuery the mtas span intersecting query | |
31 | + * @param spans1 the spans1 | |
32 | + * @param spans2 the spans2 | |
33 | + */ | |
20 | 34 | public MtasSpanIntersectingSpans( |
21 | 35 | MtasSpanIntersectingQuery mtasSpanIntersectingQuery, |
22 | 36 | MtasSpanIntersectingQuerySpans spans1, |
... | ... | @@ -27,6 +41,9 @@ public class MtasSpanIntersectingSpans extends Spans implements MtasSpans { |
27 | 41 | this.spans2 = spans2; |
28 | 42 | } |
29 | 43 | |
44 | + /* (non-Javadoc) | |
45 | + * @see org.apache.lucene.search.spans.Spans#nextStartPosition() | |
46 | + */ | |
30 | 47 | @Override |
31 | 48 | public int nextStartPosition() throws IOException { |
32 | 49 | // no document |
... | ... | @@ -52,6 +69,9 @@ public class MtasSpanIntersectingSpans extends Spans implements MtasSpans { |
52 | 69 | } |
53 | 70 | } |
54 | 71 | |
72 | + /* (non-Javadoc) | |
73 | + * @see org.apache.lucene.search.spans.Spans#startPosition() | |
74 | + */ | |
55 | 75 | @Override |
56 | 76 | public int startPosition() { |
57 | 77 | return calledNextStartPosition |
... | ... | @@ -59,6 +79,9 @@ public class MtasSpanIntersectingSpans extends Spans implements MtasSpans { |
59 | 79 | : -1; |
60 | 80 | } |
61 | 81 | |
82 | + /* (non-Javadoc) | |
83 | + * @see org.apache.lucene.search.spans.Spans#endPosition() | |
84 | + */ | |
62 | 85 | @Override |
63 | 86 | public int endPosition() { |
64 | 87 | return calledNextStartPosition |
... | ... | @@ -66,28 +89,43 @@ public class MtasSpanIntersectingSpans extends Spans implements MtasSpans { |
66 | 89 | : -1; |
67 | 90 | } |
68 | 91 | |
92 | + /* (non-Javadoc) | |
93 | + * @see org.apache.lucene.search.spans.Spans#width() | |
94 | + */ | |
69 | 95 | @Override |
70 | 96 | public int width() { |
71 | 97 | return calledNextStartPosition ? (noMorePositions ? 0 |
72 | 98 | : spans1.spans.endPosition() - spans1.spans.startPosition()) : 0; |
73 | 99 | } |
74 | 100 | |
101 | + /* (non-Javadoc) | |
102 | + * @see org.apache.lucene.search.spans.Spans#collect(org.apache.lucene.search.spans.SpanCollector) | |
103 | + */ | |
75 | 104 | @Override |
76 | 105 | public void collect(SpanCollector collector) throws IOException { |
77 | 106 | spans1.spans.collect(collector); |
78 | 107 | spans2.spans.collect(collector); |
79 | 108 | } |
80 | 109 | |
110 | + /* (non-Javadoc) | |
111 | + * @see org.apache.lucene.search.spans.Spans#positionsCost() | |
112 | + */ | |
81 | 113 | @Override |
82 | 114 | public float positionsCost() { |
83 | 115 | return 0; |
84 | 116 | } |
85 | 117 | |
118 | + /* (non-Javadoc) | |
119 | + * @see org.apache.lucene.search.DocIdSetIterator#docID() | |
120 | + */ | |
86 | 121 | @Override |
87 | 122 | public int docID() { |
88 | 123 | return docId; |
89 | 124 | } |
90 | 125 | |
126 | + /* (non-Javadoc) | |
127 | + * @see org.apache.lucene.search.DocIdSetIterator#nextDoc() | |
128 | + */ | |
91 | 129 | @Override |
92 | 130 | public int nextDoc() throws IOException { |
93 | 131 | reset(); |
... | ... | @@ -96,6 +134,9 @@ public class MtasSpanIntersectingSpans extends Spans implements MtasSpans { |
96 | 134 | return docId; |
97 | 135 | } |
98 | 136 | |
137 | + /* (non-Javadoc) | |
138 | + * @see org.apache.lucene.search.DocIdSetIterator#advance(int) | |
139 | + */ | |
99 | 140 | @Override |
100 | 141 | public int advance(int target) throws IOException { |
101 | 142 | reset(); |
... | ... | @@ -125,10 +166,10 @@ public class MtasSpanIntersectingSpans extends Spans implements MtasSpans { |
125 | 166 | return docId; |
126 | 167 | } |
127 | 168 | } |
128 | - //check equal docId, otherwise next | |
169 | + // check equal docId, otherwise next | |
129 | 170 | if (spans1DocId == spans2DocId) { |
130 | 171 | docId = spans1DocId; |
131 | - //check match | |
172 | + // check match | |
132 | 173 | if (goToNextStartPosition()) { |
133 | 174 | return docId; |
134 | 175 | } else { |
... | ... | @@ -140,6 +181,12 @@ public class MtasSpanIntersectingSpans extends Spans implements MtasSpans { |
140 | 181 | } |
141 | 182 | } |
142 | 183 | |
184 | + /** | |
185 | + * Go to next doc. | |
186 | + * | |
187 | + * @return true, if successful | |
188 | + * @throws IOException Signals that an I/O exception has occurred. | |
189 | + */ | |
143 | 190 | private boolean goToNextDoc() throws IOException { |
144 | 191 | if (docId == NO_MORE_DOCS) { |
145 | 192 | return true; |
... | ... | @@ -157,56 +204,70 @@ public class MtasSpanIntersectingSpans extends Spans implements MtasSpans { |
157 | 204 | } |
158 | 205 | } |
159 | 206 | if (docId != NO_MORE_DOCS) { |
160 | - if(!goToNextStartPosition()) { | |
207 | + if (!goToNextStartPosition()) { | |
161 | 208 | reset(); |
162 | 209 | return false; |
163 | 210 | } |
164 | - } | |
211 | + } | |
165 | 212 | return true; |
166 | 213 | } |
167 | 214 | } |
168 | - | |
215 | + | |
216 | + /** | |
217 | + * Go to next start position. | |
218 | + * | |
219 | + * @return true, if successful | |
220 | + * @throws IOException Signals that an I/O exception has occurred. | |
221 | + */ | |
169 | 222 | private boolean goToNextStartPosition() throws IOException { |
170 | 223 | int nextSpans1StartPosition, nextSpans1EndPosition; |
171 | 224 | int nextSpans2StartPosition, nextSpans2EndPosition; |
172 | 225 | while ((nextSpans1StartPosition = spans1.spans |
173 | 226 | .nextStartPosition()) != NO_MORE_POSITIONS) { |
174 | - nextSpans1EndPosition = spans1.spans | |
175 | - .endPosition(); | |
176 | - if(nextSpans1StartPosition<=lastSpans2EndPosition && nextSpans1EndPosition>=lastSpans2StartPosition) { | |
227 | + nextSpans1EndPosition = spans1.spans.endPosition(); | |
228 | + if (nextSpans1StartPosition <= lastSpans2EndPosition | |
229 | + && nextSpans1EndPosition >= lastSpans2StartPosition) { | |
177 | 230 | return true; |
178 | 231 | } else { |
179 | - while(lastSpans2StartPosition<=nextSpans1EndPosition) { | |
180 | - nextSpans2StartPosition = spans2.spans.nextStartPosition(); | |
181 | - if(nextSpans2StartPosition==NO_MORE_POSITIONS) { | |
232 | + while (lastSpans2StartPosition <= nextSpans1EndPosition) { | |
233 | + nextSpans2StartPosition = spans2.spans.nextStartPosition(); | |
234 | + if (nextSpans2StartPosition == NO_MORE_POSITIONS) { | |
182 | 235 | noMorePositions = true; |
183 | 236 | return false; |
184 | 237 | } else { |
185 | 238 | nextSpans2EndPosition = spans2.spans.endPosition(); |
186 | - if(nextSpans2StartPosition>lastSpans2StartPosition || nextSpans2EndPosition>lastSpans2EndPosition) { | |
187 | - if(nextSpans2EndPosition>lastSpans2EndPosition) { | |
239 | + if (nextSpans2StartPosition > lastSpans2StartPosition | |
240 | + || nextSpans2EndPosition > lastSpans2EndPosition) { | |
241 | + if (nextSpans2EndPosition > lastSpans2EndPosition) { | |
188 | 242 | lastSpans2StartPosition = nextSpans2StartPosition; |
189 | 243 | lastSpans2EndPosition = nextSpans2EndPosition; |
190 | - if(nextSpans1StartPosition<=lastSpans2EndPosition && nextSpans1EndPosition>=lastSpans2StartPosition) { | |
244 | + if (nextSpans1StartPosition <= lastSpans2EndPosition | |
245 | + && nextSpans1EndPosition >= lastSpans2StartPosition) { | |
191 | 246 | return true; |
192 | 247 | } |
193 | 248 | } |
194 | - } | |
249 | + } | |
195 | 250 | } |
196 | 251 | } |
197 | 252 | } |
198 | - } | |
199 | - noMorePositions = true; | |
253 | + } | |
254 | + noMorePositions = true; | |
200 | 255 | return false; |
201 | 256 | } |
202 | 257 | |
258 | + /** | |
259 | + * Reset. | |
260 | + */ | |
203 | 261 | private void reset() { |
204 | 262 | calledNextStartPosition = false; |
205 | 263 | noMorePositions = false; |
206 | 264 | lastSpans2StartPosition = -1; |
207 | - lastSpans2EndPosition = -1; | |
265 | + lastSpans2EndPosition = -1; | |
208 | 266 | } |
209 | 267 | |
268 | + /* (non-Javadoc) | |
269 | + * @see org.apache.lucene.search.DocIdSetIterator#cost() | |
270 | + */ | |
210 | 271 | @Override |
211 | 272 | public long cost() { |
212 | 273 | return 0; |
... | ... |
src/mtas/search/spans/MtasSpanOrQuery.java
... | ... | @@ -11,7 +11,6 @@ import org.apache.lucene.search.spans.SpanQuery; |
11 | 11 | import org.apache.lucene.search.spans.SpanWeight; |
12 | 12 | |
13 | 13 | import mtas.search.spans.util.MtasSpanQuery; |
14 | -import mtas.search.spans.util.MtasSpanUniquePositionQuery; | |
15 | 14 | |
16 | 15 | /** |
17 | 16 | * The Class MtasSpanOrQuery. |
... | ... |
src/mtas/search/spans/MtasSpanWithinQuery.java
... | ... | @@ -4,55 +4,83 @@ import java.io.IOException; |
4 | 4 | |
5 | 5 | import org.apache.lucene.index.IndexReader; |
6 | 6 | import org.apache.lucene.search.IndexSearcher; |
7 | +import org.apache.lucene.search.spans.SpanContainingQuery; | |
7 | 8 | import org.apache.lucene.search.spans.SpanWeight; |
8 | 9 | import org.apache.lucene.search.spans.SpanWithinQuery; |
9 | 10 | |
10 | 11 | import mtas.search.spans.util.MtasSpanQuery; |
11 | 12 | |
13 | +/** | |
14 | + * The Class MtasSpanWithinQuery. | |
15 | + */ | |
12 | 16 | public class MtasSpanWithinQuery extends MtasSpanQuery { |
13 | - | |
17 | + | |
14 | 18 | /** The base query. */ |
15 | 19 | private SpanWithinQuery baseQuery; |
16 | 20 | |
21 | + /** | |
22 | + * Instantiates a new mtas span within query. | |
23 | + * | |
24 | + * @param q1 the q1 | |
25 | + * @param q2 the q2 | |
26 | + */ | |
17 | 27 | public MtasSpanWithinQuery(MtasSpanQuery q1, MtasSpanQuery q2) { |
18 | 28 | super(); |
19 | 29 | baseQuery = new SpanWithinQuery(q1, q2); |
20 | 30 | } |
21 | - | |
31 | + | |
32 | + /* | |
33 | + * (non-Javadoc) | |
34 | + * | |
35 | + * @see mtas.search.spans.util.MtasSpanQuery#rewrite(org.apache.lucene.index. | |
36 | + * IndexReader) | |
37 | + */ | |
22 | 38 | @Override |
23 | 39 | public MtasSpanQuery rewrite(IndexReader reader) throws IOException { |
24 | - SpanWithinQuery newBaseQuery = (SpanWithinQuery) baseQuery.rewrite(reader); | |
25 | - if(newBaseQuery!=baseQuery) { | |
26 | - try { | |
27 | - MtasSpanWithinQuery clone = (MtasSpanWithinQuery) this.clone(); | |
28 | - clone.baseQuery = newBaseQuery; | |
29 | - return clone; | |
30 | - } catch (CloneNotSupportedException e) { | |
31 | - throw new AssertionError(e); | |
32 | - } | |
33 | - } else { | |
34 | - return this; | |
35 | - } | |
36 | - } | |
40 | + baseQuery = (SpanWithinQuery) baseQuery.rewrite(reader); | |
41 | + return this; | |
42 | + } | |
37 | 43 | |
44 | + /* | |
45 | + * (non-Javadoc) | |
46 | + * | |
47 | + * @see org.apache.lucene.search.spans.SpanQuery#getField() | |
48 | + */ | |
38 | 49 | @Override |
39 | 50 | public String getField() { |
40 | 51 | return baseQuery.getField(); |
41 | 52 | } |
42 | 53 | |
54 | + /* | |
55 | + * (non-Javadoc) | |
56 | + * | |
57 | + * @see | |
58 | + * org.apache.lucene.search.spans.SpanQuery#createWeight(org.apache.lucene. | |
59 | + * search.IndexSearcher, boolean) | |
60 | + */ | |
43 | 61 | @Override |
44 | 62 | public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) |
45 | 63 | throws IOException { |
46 | 64 | SpanWeight sw = baseQuery.createWeight(searcher, needsScores); |
47 | 65 | return sw; |
48 | - //return baseQuery.createWeight(searcher, needsScores); | |
66 | + // return baseQuery.createWeight(searcher, needsScores); | |
49 | 67 | } |
50 | 68 | |
69 | + /* | |
70 | + * (non-Javadoc) | |
71 | + * | |
72 | + * @see org.apache.lucene.search.Query#toString(java.lang.String) | |
73 | + */ | |
51 | 74 | @Override |
52 | 75 | public String toString(String field) { |
53 | 76 | return baseQuery.toString(field); |
54 | 77 | } |
55 | 78 | |
79 | + /* | |
80 | + * (non-Javadoc) | |
81 | + * | |
82 | + * @see org.apache.lucene.search.Query#equals(java.lang.Object) | |
83 | + */ | |
56 | 84 | @Override |
57 | 85 | public boolean equals(Object obj) { |
58 | 86 | if (this == obj) |
... | ... | @@ -65,6 +93,11 @@ public class MtasSpanWithinQuery extends MtasSpanQuery { |
65 | 93 | return baseQuery.equals(that.baseQuery); |
66 | 94 | } |
67 | 95 | |
96 | + /* | |
97 | + * (non-Javadoc) | |
98 | + * | |
99 | + * @see org.apache.lucene.search.Query#hashCode() | |
100 | + */ | |
68 | 101 | @Override |
69 | 102 | public int hashCode() { |
70 | 103 | return baseQuery.hashCode(); |
... | ... |
src/mtas/search/spans/util/MtasExtendedSpanTermQuery.java
... | ... | @@ -12,7 +12,6 @@ import org.apache.lucene.index.IndexReaderContext; |
12 | 12 | import org.apache.lucene.index.LeafReader; |
13 | 13 | import org.apache.lucene.index.LeafReaderContext; |
14 | 14 | import org.apache.lucene.index.PostingsEnum; |
15 | -import org.apache.lucene.index.ReaderUtil; | |
16 | 15 | import org.apache.lucene.index.Term; |
17 | 16 | import org.apache.lucene.index.TermContext; |
18 | 17 | import org.apache.lucene.index.TermState; |
... | ... |
src/site/resources/images/meertens.png
src/site/resources/images/meertens_old.png
0 → 100644
12.7 KB
src/site/site.xml
... | ... | @@ -5,8 +5,8 @@ |
5 | 5 | <name>Meertens Instituut</name> |
6 | 6 | <src>images/meertens.png</src> |
7 | 7 | <href>http://www.meertens.knaw.nl/</href> |
8 | - <width>147</width> | |
9 | - <height>100</height> | |
8 | + <width>93</width> | |
9 | + <height>104</height> | |
10 | 10 | <title>Meertens Instituut</title> |
11 | 11 | </bannerRight> |
12 | 12 | <skin> |
... | ... |