MtasCQLParserTestSentence.java
12.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
package mtas.parser;
import static org.junit.Assert.*;
import java.io.BufferedReader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.search.spans.SpanContainingQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanWithinQuery;
import mtas.parser.cql.MtasCQLParser;
import mtas.parser.cql.ParseException;
import mtas.parser.cql.util.MtasCQLParserGroupQuery;
import mtas.parser.cql.util.MtasCQLParserWordQuery;
import mtas.search.spans.MtasSpanMatchAllQuery;
import mtas.search.spans.MtasSpanOrQuery;
import mtas.search.spans.MtasSpanRecurrenceQuery;
import mtas.search.spans.MtasSpanSequenceItem;
import mtas.search.spans.MtasSpanSequenceQuery;
public class MtasCQLParserTestSentence {
@org.junit.Test
public void test() {
basicTests();
}
private void testCQLParse(String field, String defaultPrefix, String cql, SpanQuery q) {
MtasCQLParser p = new MtasCQLParser(new BufferedReader(new StringReader(cql)));
try {
System.out.print("CQL parsing:\t"+cql);
assertEquals(p.parse(field, defaultPrefix) ,q);
System.out.print("\n");
} catch (ParseException e) {
System.out.println("Error CQL parsing:\t"+cql);
e.printStackTrace();
}
}
private void testCQLEquivalent(String field, String defaultPrefix, String cql1, String cql2) {
MtasCQLParser p1 = new MtasCQLParser(new BufferedReader(new StringReader(cql1)));
MtasCQLParser p2 = new MtasCQLParser(new BufferedReader(new StringReader(cql2)));
try {
System.out.print("CQL equivalent:\t"+cql1+" and "+cql2);
assertEquals(p1.parse(field, defaultPrefix) ,p2.parse(field, defaultPrefix));
System.out.print("\n");
} catch (ParseException e) {
System.out.println("Error CQL equivalent:\t"+cql1+" and "+cql2);
e.printStackTrace();
}
}
private void basicTests() {
basicTest1();
basicTest2();
basicTest3();
basicTest4();
basicTest5();
basicTest6();
basicTest7();
basicTest8();
basicTest9();
basicTest10();
basicTest11();
basicTest12();
basicTest13();
basicTest14();
basicTest15();
basicTest16();
basicTest17();
basicTest18();
basicTest19();
}
private void basicTest1() {
String field = "testveld";
String cql = "[pos=\"LID\"] [lemma=\"koe\"]";
SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe");
List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
items.add(new MtasSpanSequenceItem(q1, false));
items.add(new MtasSpanSequenceItem(q2, false));
SpanQuery q = new MtasSpanSequenceQuery(items);
testCQLParse(field, null, cql, q);
}
private void basicTest2() {
String field = "testveld";
String cql1 = "[pos=\"LID\"] [] []? [] [lemma=\"koe\"]";
String cql2 = "[pos=\"LID\"] []{2,3} [lemma=\"koe\"]";
testCQLEquivalent(field, null, cql1, cql2);
}
private void basicTest3() {
String field = "testveld";
String cql = "[pos=\"LID\"] | [lemma=\"koe\"]";
SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe");
SpanQuery q = new MtasSpanOrQuery(q1,q2);
testCQLParse(field, null, cql, q);
}
private void basicTest4() {
String field = "testveld";
String cql = "[pos=\"LID\"] | ([lemma=\"de\"] [lemma=\"koe\"])";
SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","de");
SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","koe");
List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
items.add(new MtasSpanSequenceItem(q2, false));
items.add(new MtasSpanSequenceItem(q3, false));
SpanQuery q4 = new MtasSpanSequenceQuery(items);
SpanQuery q = new MtasSpanOrQuery(q1,q4);
testCQLParse(field, null, cql, q);
}
private void basicTest5() {
String field = "testveld";
String cql1 = "([pos=\"LID\"]([pos=\"ADJ\"][lemma=\"koe\"]))";
String cql2 = "[pos=\"LID\"][pos=\"ADJ\"][lemma=\"koe\"]";
testCQLEquivalent(field, null, cql1, cql2);
}
private void basicTest6() {
String field = "testveld";
String cql1 = "([pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"])|([pos=\"ADJ\"]|([lemma=\"het\"]([lemma=\"paard\"])))";
String cql2 = "[pos=\"LID\"]|[lemma=\"de\"][lemma=\"koe\"]|[pos=\"ADJ\"]|[lemma=\"het\"][lemma=\"paard\"]";
testCQLEquivalent(field, null, cql1, cql2);
}
private void basicTest7() {
String field = "testveld";
String cql1 = "[pos=\"LID\"] []{0,1} []{3,5} []{2,4}";
String cql2 = "[pos=\"LID\"] []{5,10}";
testCQLEquivalent(field, null, cql1, cql2);
}
private void basicTest8() {
String field = "testveld";
String cql = "[lemma=\"koe\"]([pos=\"N\"]|[pos=\"ADJ\"])";
SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N");
SpanQuery q3 = new MtasCQLParserWordQuery(field,"pos","ADJ");
SpanQuery q4 = new MtasSpanOrQuery(q2,q3);
List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
items.add(new MtasSpanSequenceItem(q1, false));
items.add(new MtasSpanSequenceItem(q4, false));
SpanQuery q = new MtasSpanSequenceQuery(items);
testCQLParse(field, null, cql, q);
}
private void basicTest9() {
String field = "testveld";
String cql = "[lemma=\"koe\"]([pos=\"N\"]|[pos=\"ADJ\"]){2,3}[lemma=\"paard\"]";
SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N");
SpanQuery q3 = new MtasCQLParserWordQuery(field,"pos","ADJ");
SpanQuery q4 = new MtasCQLParserWordQuery(field,"lemma","paard");
SpanQuery q5 = new MtasSpanOrQuery(new MtasSpanRecurrenceQuery(q2,2,3),new MtasSpanRecurrenceQuery(q3,2,3));
List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
items.add(new MtasSpanSequenceItem(q1, false));
items.add(new MtasSpanSequenceItem(q5, false));
items.add(new MtasSpanSequenceItem(q4, false));
SpanQuery q = new MtasSpanSequenceQuery(items);
testCQLParse(field, null, cql, q);
}
private void basicTest10() {
String field = "testveld";
String cql = "[pos=\"LID\"]? [pos=\"ADJ\"]{1,3} [lemma=\"koe\"]";
SpanQuery q1 = new MtasCQLParserWordQuery(field,"pos","LID");
SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","ADJ");
SpanQuery q3 = new MtasCQLParserWordQuery(field,"lemma","koe");
List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
items.add(new MtasSpanSequenceItem(q1, true));
items.add(new MtasSpanSequenceItem(new MtasSpanRecurrenceQuery(q2,1,3), false));
items.add(new MtasSpanSequenceItem(q3, false));
SpanQuery q = new MtasSpanSequenceQuery(items);
testCQLParse(field, null, cql, q);
}
private void basicTest11() {
String field = "testveld";
String cql = "<sentence/> containing [lemma=\"koe\"]";
SpanQuery q1 = new MtasCQLParserGroupQuery(field,"sentence");
SpanQuery q2 = new MtasCQLParserWordQuery(field,"lemma","koe");
SpanQuery q = new SpanContainingQuery(q1, q2);
testCQLParse(field, null, cql, q);
}
private void basicTest12() {
String field = "testveld";
String cql = "[lemma=\"koe\"] within <sentence/>";
SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
SpanQuery q2 = new MtasCQLParserGroupQuery(field,"sentence");
SpanQuery q = new SpanWithinQuery(q2, q1);
testCQLParse(field, null, cql, q);
}
private void basicTest13() {
String field = "testveld";
String cql = "[lemma=\"koe\"]([t=\"de\"] within <sentence/>)";
SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
SpanQuery q2 = new MtasCQLParserWordQuery(field,"t","de");
SpanQuery q3 = new MtasCQLParserGroupQuery(field,"sentence");
SpanQuery q4 = new SpanWithinQuery(q3, q2);
List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
items.add(new MtasSpanSequenceItem(q1, false));
items.add(new MtasSpanSequenceItem(q4, false));
SpanQuery q = new MtasSpanSequenceQuery(items);
testCQLParse(field, null, cql, q);
}
private void basicTest14() {
String field = "testveld";
String cql = "([t=\"de\"] within <sentence/>)[lemma=\"koe\"]";
SpanQuery q1 = new MtasCQLParserWordQuery(field,"t","de");
SpanQuery q2 = new MtasCQLParserGroupQuery(field,"sentence");
SpanQuery q3 = new SpanWithinQuery(q2, q1);
SpanQuery q4 = new MtasCQLParserWordQuery(field,"lemma","koe");
List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
items.add(new MtasSpanSequenceItem(q3, false));
items.add(new MtasSpanSequenceItem(q4, false));
SpanQuery q = new MtasSpanSequenceQuery(items);
testCQLParse(field, null, cql, q);
}
private void basicTest15() {
String field = "testveld";
String cql = "[lemma=\"koe\"](<sentence/> containing [t=\"de\"]) within <sentence/>[lemma=\"paard\"]";
SpanQuery q1 = new MtasCQLParserWordQuery(field,"lemma","koe");
SpanQuery q2 = new MtasCQLParserGroupQuery(field,"sentence");
SpanQuery q3 = new MtasCQLParserWordQuery(field,"t","de");
SpanQuery q4 = new SpanContainingQuery(q2, q3);
SpanQuery q5 = new MtasCQLParserGroupQuery(field,"sentence");
SpanQuery q6 = new MtasCQLParserWordQuery(field,"lemma","paard");
List<MtasSpanSequenceItem> items1 = new ArrayList<MtasSpanSequenceItem>();
items1.add(new MtasSpanSequenceItem(q5, false));
items1.add(new MtasSpanSequenceItem(q6, false));
SpanQuery q7 = new MtasSpanSequenceQuery(items1);
SpanQuery q8 = new SpanWithinQuery(q7, q4);
List<MtasSpanSequenceItem> items2 = new ArrayList<MtasSpanSequenceItem>();
items2.add(new MtasSpanSequenceItem(q1, false));
items2.add(new MtasSpanSequenceItem(q8, false));
SpanQuery q = new MtasSpanSequenceQuery(items2);
testCQLParse(field, null, cql, q);
}
private void basicTest16() {
String field = "testveld";
String cql = "(<entity=\"loc\"/> within (<s/> containing [t_lc=\"amsterdam\"])) !containing ([t_lc=\"amsterdam\"])";
SpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc");
SpanQuery q2 = new MtasCQLParserGroupQuery(field,"s");
SpanQuery q3 = new MtasCQLParserWordQuery(field,"t_lc","amsterdam");
SpanQuery q4 = new SpanContainingQuery(q2, q3);
SpanQuery q5 = new SpanWithinQuery(q4, q1);
SpanQuery q = new SpanNotQuery(q5,new SpanContainingQuery(q5, q3));
testCQLParse(field, null, cql, q);
}
private void basicTest17() {
String field = "testveld";
String cql = "[]<entity=\"loc\"/>{1,2}[]";
SpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc");
SpanQuery q2 = new MtasSpanRecurrenceQuery(q1,1,2);
List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false));
items.add(new MtasSpanSequenceItem(q2, false));
items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false));
SpanQuery q = new MtasSpanSequenceQuery(items);
testCQLParse(field, null, cql, q);
}
private void basicTest18() {
String field = "testveld";
String cql = "\"de\" [pos=\"N\"]";
SpanQuery q1 = new MtasCQLParserWordQuery(field,"t_lc","de");
SpanQuery q2 = new MtasCQLParserWordQuery(field,"pos","N");
List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
items.add(new MtasSpanSequenceItem(q1, false));
items.add(new MtasSpanSequenceItem(q2, false));
SpanQuery q = new MtasSpanSequenceQuery(items);
testCQLParse(field, "t_lc", cql, q);
}
private void basicTest19() {
String field = "testveld";
String cql = "([]<entity=\"loc\"/>{1,2}[]){3,4}";
SpanQuery q1 = new MtasCQLParserGroupQuery(field,"entity","loc");
SpanQuery q2 = new MtasSpanRecurrenceQuery(q1,1,2);
List<MtasSpanSequenceItem> items = new ArrayList<MtasSpanSequenceItem>();
items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false));
items.add(new MtasSpanSequenceItem(q2, false));
items.add(new MtasSpanSequenceItem(new MtasSpanMatchAllQuery(field), false));
SpanQuery q3 = new MtasSpanSequenceQuery(items);
SpanQuery q = new MtasSpanRecurrenceQuery(q3,3,4);
testCQLParse(field, null, cql, q);
}
}