Commit 6a180233e802814f169c8054d3b0a6ffd2315ae3
1 parent
ec0be433
grouping in cql list query
Showing
5 changed files
with
437 additions
and
0 deletions
src/main/java/mtas/solr/handler/IpiMtasSearchHandler.java
0 โ 100644
1 | +package mtas.solr.handler; | ||
2 | + | ||
3 | + | ||
4 | +import mtas.solr.handler.stat.MtasGroupQueryHandler; | ||
5 | +import org.apache.commons.lang3.StringUtils; | ||
6 | +import org.apache.solr.common.params.ModifiableSolrParams; | ||
7 | +import org.apache.solr.common.params.SolrParams; | ||
8 | +import org.apache.solr.handler.component.SearchHandler; | ||
9 | +import org.apache.solr.request.SolrQueryRequest; | ||
10 | +import org.apache.solr.response.SolrQueryResponse; | ||
11 | + | ||
12 | +public class IpiMtasSearchHandler extends SearchHandler { | ||
13 | + | ||
14 | + | ||
15 | + @Override | ||
16 | + public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { | ||
17 | + transformParams(req); | ||
18 | + super.handleRequestBody(req, rsp); | ||
19 | + } | ||
20 | + | ||
21 | + private void transformParams(SolrQueryRequest req) { | ||
22 | + SolrParams params = req.getParams(); | ||
23 | + ModifiableSolrParams newParams = ModifiableSolrParams.of(params); | ||
24 | + | ||
25 | + if (!isMtasRequest(params)) { | ||
26 | + return; | ||
27 | + } | ||
28 | + | ||
29 | + handleGroups(newParams); | ||
30 | + | ||
31 | + | ||
32 | + req.setParams(newParams); | ||
33 | + } | ||
34 | + private boolean isMtasRequest(SolrParams params) { | ||
35 | + return Boolean.TRUE.equals(params.getBool("mtas")); | ||
36 | + } | ||
37 | + | ||
38 | + private void handleGroups(ModifiableSolrParams params) { | ||
39 | + int groupIndex = handleGroupsInListQueries(0, params); | ||
40 | + | ||
41 | + } | ||
42 | + | ||
43 | + | ||
44 | + private int handleGroupsInListQueries(int groupIndex, ModifiableSolrParams params) { | ||
45 | + for(int i=0; i<100 ;i++){ | ||
46 | + String queryParamName = "mtas.list."+i+".query.value"; | ||
47 | + String queryText = params.get(queryParamName); | ||
48 | + if(StringUtils.isBlank(queryText)){ | ||
49 | + return groupIndex; | ||
50 | + } | ||
51 | + if (!MtasGroupQueryHandler.hasGroupQueryCOmponent(queryText)) { | ||
52 | + continue; | ||
53 | + } | ||
54 | + String mtasField = params.get("mtas.list."+i+".field"); | ||
55 | + MtasGroupQueryHandler handler = new MtasGroupQueryHandler(); | ||
56 | + if(handler.handleGroups(groupIndex, queryText, mtasField, params)){ | ||
57 | + groupIndex++; | ||
58 | + } | ||
59 | + params.set(queryParamName, handler.getSimpleQueryText()); | ||
60 | + | ||
61 | + } | ||
62 | + return groupIndex; | ||
63 | + } | ||
64 | + | ||
65 | + | ||
66 | +} |
src/main/java/mtas/solr/handler/stat/MtasGroupQueryHandler.java
0 โ 100644
1 | +package mtas.solr.handler.stat; | ||
2 | + | ||
3 | +import org.apache.commons.lang3.StringUtils; | ||
4 | +import org.apache.solr.common.params.ModifiableSolrParams; | ||
5 | + | ||
6 | +public class MtasGroupQueryHandler { | ||
7 | + int countLimit = 1000; | ||
8 | + int displLimit = 10; | ||
9 | + int occurLimit = 1; | ||
10 | + String sortOrder = null; | ||
11 | + String groupby; | ||
12 | + String leftGroupby, rightGroupby; | ||
13 | + int maxCount = 0; | ||
14 | + private String simpleQueryText; | ||
15 | + private String groupText; | ||
16 | + | ||
17 | + public boolean handleGroups(int groupIndex, String queryText, String mtasField, ModifiableSolrParams params) { | ||
18 | + | ||
19 | + this.simpleQueryText = queryText; | ||
20 | + if (!hasGroupQueryCOmponent(queryText)) { | ||
21 | + return false; | ||
22 | + } | ||
23 | + | ||
24 | + String groupByString = " group by "; | ||
25 | + int ind = queryText.indexOf(groupByString); | ||
26 | + if (ind < 0) { | ||
27 | + return false; | ||
28 | + } | ||
29 | + this.groupText = queryText.substring(ind + groupByString.length()); | ||
30 | + this.simpleQueryText = queryText.substring(0, ind); | ||
31 | + | ||
32 | + if (groupIndex < 0) { | ||
33 | + return false; | ||
34 | + } | ||
35 | + | ||
36 | + try { | ||
37 | + | ||
38 | + groupby = stripModifiers(groupText); | ||
39 | + params.set("mtas.group", true); | ||
40 | + params.set("mtas.group."+groupIndex+".query.type", "cql"); | ||
41 | + params.set("mtas.group."+groupIndex+".number", displLimit); | ||
42 | + params.set("rows", 0); | ||
43 | + params.set("mtas.group."+groupIndex+".query.value", simpleQueryText); | ||
44 | + | ||
45 | + params.set("mtas.group."+groupIndex+".field", mtasField); | ||
46 | + | ||
47 | + int i = groupby.indexOf(';'); | ||
48 | + if (i != -1) { | ||
49 | + leftGroupby = groupby.substring(0, i).trim(); | ||
50 | + rightGroupby = groupby.substring(i+1).trim(); | ||
51 | + //TODO advanced collocations | ||
52 | + } else { | ||
53 | + leftGroupby = groupby; | ||
54 | + rightGroupby = ""; | ||
55 | + | ||
56 | + | ||
57 | + String[] split = groupby.split(" *, *"); | ||
58 | + for (int gi = 0; gi < split.length; gi++) { | ||
59 | + | ||
60 | + String[] h = split[gi].split("\\."); | ||
61 | + | ||
62 | + | ||
63 | + int segno; | ||
64 | + String type; | ||
65 | + | ||
66 | + if (h.length == 2) { | ||
67 | + segno = Math.max(0, Util.safeParseInt(h[0], 1) - 1); | ||
68 | + | ||
69 | + type = h[1]; | ||
70 | + } else if (h.length == 1) { | ||
71 | + segno = 0; | ||
72 | + type = h[0]; | ||
73 | + }else{ | ||
74 | + continue; | ||
75 | + } | ||
76 | + | ||
77 | + params.set("mtas.group."+groupIndex+".grouping.hit.insideLeft."+gi+".position", segno); | ||
78 | + params.set("mtas.group."+groupIndex+".grouping.hit.insideLeft."+gi+".prefixes", type); | ||
79 | + | ||
80 | + | ||
81 | + } | ||
82 | + | ||
83 | + } | ||
84 | + | ||
85 | + } catch (StatQueryException e) { | ||
86 | + e.printStackTrace(); | ||
87 | + return false; | ||
88 | + } | ||
89 | + return true; | ||
90 | + } | ||
91 | + | ||
92 | + public static boolean hasGroupQueryCOmponent(String query) { | ||
93 | + return StringUtils.isNotBlank(query) && query.toLowerCase().contains(" group by "); | ||
94 | + } | ||
95 | + | ||
96 | + | ||
97 | + | ||
98 | + | ||
99 | + private int modifierEndPosition(String groupby, int start) | ||
100 | + throws StatQueryException | ||
101 | + { | ||
102 | + boolean checkNumber = false, checkAll = false; | ||
103 | + int offset = -1; | ||
104 | + int len = groupby.length(); | ||
105 | + | ||
106 | + if (groupby.regionMatches(start, " display ", 0, 9)) { | ||
107 | + checkNumber = true; | ||
108 | + offset = 9; | ||
109 | + } else if (groupby.regionMatches(start, " min ", 0, 5)) { | ||
110 | + checkNumber = true; | ||
111 | + offset = 5; | ||
112 | + } else if (groupby.regionMatches(start, " count ", 0, 7)) { | ||
113 | + checkNumber = true; | ||
114 | + checkAll = true; | ||
115 | + offset = 7; | ||
116 | + } else if (groupby.regionMatches(start, " sort ", 0, 6)) { | ||
117 | + offset = 6; | ||
118 | + } else { | ||
119 | + throw new StatQueryException(); | ||
120 | + } // failed to parse | ||
121 | + | ||
122 | + offset += start; | ||
123 | + if (checkNumber) { | ||
124 | + if (checkAll && groupby.regionMatches(offset, "all", 0, 3)) { | ||
125 | + return offset + 3; | ||
126 | + } | ||
127 | + while (offset < len && Character.isDigit(groupby.charAt(offset))) { | ||
128 | + offset++; | ||
129 | + } | ||
130 | + return offset; | ||
131 | + } | ||
132 | + // sort modifier, check possible cases | ||
133 | + if (groupby.regionMatches(offset, "a fronte", 0, 8)) { | ||
134 | + return offset + 8; | ||
135 | + } | ||
136 | + if (groupby.regionMatches(offset, "a tergo", 0, 7)) { | ||
137 | + return offset + 7; | ||
138 | + } | ||
139 | + if (groupby.regionMatches(offset, "by ", 0, 3)) { | ||
140 | + offset += 3; | ||
141 | + if (groupby.regionMatches(offset, "freq", 0, 4)) { | ||
142 | + return offset + 4; | ||
143 | + } | ||
144 | + if (groupby.regionMatches(offset, "maxcp", 0, 5)) { | ||
145 | + return offset + 5; | ||
146 | + } | ||
147 | + if (groupby.regionMatches(offset, "dice", 0, 4)) { | ||
148 | + return offset + 4; | ||
149 | + } | ||
150 | + if (groupby.regionMatches(offset, "cp", 0, 2)) { | ||
151 | + return offset + 2; | ||
152 | + } | ||
153 | + if (groupby.regionMatches(offset, "scp bias ", 0, 9)) { | ||
154 | + offset += 9; | ||
155 | + while (offset < len && groupby.charAt(offset) != ' ') { | ||
156 | + offset++; | ||
157 | + } | ||
158 | + return offset; | ||
159 | + } | ||
160 | + if (groupby.regionMatches(offset, "scp", 0, 3)) { | ||
161 | + return offset + 3; | ||
162 | + } | ||
163 | + } | ||
164 | + throw new StatQueryException(); | ||
165 | + } | ||
166 | + | ||
167 | + | ||
168 | + String stripModifiers(String groupby) throws StatQueryException { | ||
169 | + int i, end; | ||
170 | + | ||
171 | + i = groupby.indexOf(" display "); | ||
172 | + if (i != -1) { | ||
173 | + end = modifierEndPosition(groupby, i); | ||
174 | + displLimit = Util.safeParseInt(groupby.substring(i + 9, end), displLimit); | ||
175 | + groupby = groupby.substring(0, i) + groupby.substring(end); | ||
176 | + } | ||
177 | + | ||
178 | + i = groupby.indexOf(" count "); | ||
179 | + if (i != -1) { | ||
180 | + end = modifierEndPosition(groupby, i); | ||
181 | + countLimit = Util.safeParseInt(groupby.substring(i + 7, end), countLimit); | ||
182 | + groupby = groupby.substring(0, i) + groupby.substring(end); | ||
183 | + } | ||
184 | + | ||
185 | + i = groupby.indexOf(" min "); | ||
186 | + if (i != -1) { | ||
187 | + end = modifierEndPosition(groupby, i); | ||
188 | + occurLimit = Util.safeParseInt(groupby.substring(i + 5, end), occurLimit); | ||
189 | + groupby = groupby.substring(0, i) + groupby.substring(end); | ||
190 | + } | ||
191 | + | ||
192 | + i = groupby.indexOf(" sort "); | ||
193 | + if (i != -1) { | ||
194 | + end = modifierEndPosition(groupby, i); | ||
195 | + sortOrder = (groupby.substring(i + 6, end)); | ||
196 | + groupby = groupby.substring(0, i) + groupby.substring(end); | ||
197 | + } | ||
198 | + | ||
199 | + return groupby; | ||
200 | + } | ||
201 | + | ||
202 | + public int getCountLimit() { | ||
203 | + return countLimit; | ||
204 | + } | ||
205 | + | ||
206 | + public void setCountLimit(int countLimit) { | ||
207 | + this.countLimit = countLimit; | ||
208 | + } | ||
209 | + | ||
210 | + public int getDisplLimit() { | ||
211 | + return displLimit; | ||
212 | + } | ||
213 | + | ||
214 | + public void setDisplLimit(int displLimit) { | ||
215 | + this.displLimit = displLimit; | ||
216 | + } | ||
217 | + | ||
218 | + public int getOccurLimit() { | ||
219 | + return occurLimit; | ||
220 | + } | ||
221 | + | ||
222 | + public void setOccurLimit(int occurLimit) { | ||
223 | + this.occurLimit = occurLimit; | ||
224 | + } | ||
225 | + | ||
226 | + public String getSortOrder() { | ||
227 | + return sortOrder; | ||
228 | + } | ||
229 | + | ||
230 | + public void setSortOrder(String sortOrder) { | ||
231 | + this.sortOrder = sortOrder; | ||
232 | + } | ||
233 | + | ||
234 | + public String getGroupby() { | ||
235 | + return groupby; | ||
236 | + } | ||
237 | + | ||
238 | + public void setGroupby(String groupby) { | ||
239 | + this.groupby = groupby; | ||
240 | + } | ||
241 | + | ||
242 | + public String getLeftGroupby() { | ||
243 | + return leftGroupby; | ||
244 | + } | ||
245 | + | ||
246 | + public void setLeftGroupby(String leftGroupby) { | ||
247 | + this.leftGroupby = leftGroupby; | ||
248 | + } | ||
249 | + | ||
250 | + public String getRightGroupby() { | ||
251 | + return rightGroupby; | ||
252 | + } | ||
253 | + | ||
254 | + public void setRightGroupby(String rightGroupby) { | ||
255 | + this.rightGroupby = rightGroupby; | ||
256 | + } | ||
257 | + | ||
258 | + public int getMaxCount() { | ||
259 | + return maxCount; | ||
260 | + } | ||
261 | + | ||
262 | + public void setMaxCount(int maxCount) { | ||
263 | + this.maxCount = maxCount; | ||
264 | + } | ||
265 | + | ||
266 | + public String getSimpleQueryText() { | ||
267 | + return simpleQueryText; | ||
268 | + } | ||
269 | + | ||
270 | + public void setSimpleQueryText(String simpleQueryText) { | ||
271 | + this.simpleQueryText = simpleQueryText; | ||
272 | + } | ||
273 | + | ||
274 | + public String getGroupText() { | ||
275 | + return groupText; | ||
276 | + } | ||
277 | + | ||
278 | + public void setGroupText(String groupText) { | ||
279 | + this.groupText = groupText; | ||
280 | + } | ||
281 | +} |
src/main/java/mtas/solr/handler/stat/StatQueryException.java
0 โ 100644
1 | +/* | ||
2 | + * This file is part of the Poliqarp suite. | ||
3 | + * | ||
4 | + * Copyright (C) 2004-2009 by Instytut Podstaw Informatyki Polskiej | ||
5 | + * Akademii Nauk (IPI PAN; Institute of Computer Science, Polish | ||
6 | + * Academy of Sciences; cf. www.ipipan.waw.pl). All rights reserved. | ||
7 | + * | ||
8 | + * This file may be distributed and/or modified under the terms of the | ||
9 | + * GNU General Public License version 2 as published by the Free Software | ||
10 | + * Foundation and appearing in the file gpl.txt included in the packaging | ||
11 | + * of this file. (See http://www.gnu.org/licenses/translations.html for | ||
12 | + * unofficial translations.) | ||
13 | + * | ||
14 | + * A commercial license is available from IPI PAN (contact | ||
15 | + * Michal.Ciesiolka@ipipan.waw.pl or ipi@ipipan.waw.pl for more | ||
16 | + * information). Licensees holding a valid commercial license from IPI | ||
17 | + * PAN may use this file in accordance with that license. | ||
18 | + * | ||
19 | + * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING | ||
20 | + * THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
21 | + * PURPOSE. | ||
22 | + */ | ||
23 | + | ||
24 | +package mtas.solr.handler.stat; | ||
25 | + | ||
26 | +/** | ||
27 | + * This exception is thrown by query-related methods to indicate error | ||
28 | + * conditions. | ||
29 | + */ | ||
30 | +public class StatQueryException extends Exception | ||
31 | +{ | ||
32 | +} |
src/main/java/mtas/solr/handler/stat/Util.java
0 โ 100644
1 | +package mtas.solr.handler.stat; | ||
2 | + | ||
3 | +public class Util { | ||
4 | + static public int safeParseInt(String c, int def) { | ||
5 | + if (c.equals("all")) | ||
6 | + return -1; | ||
7 | + else | ||
8 | + try { | ||
9 | + return Integer.parseInt(c); | ||
10 | + } catch (NumberFormatException e) { | ||
11 | + System.err.println("safeParseInt(): could not parse <"+c+">"); | ||
12 | + return def; | ||
13 | + } | ||
14 | + } | ||
15 | + | ||
16 | + static public float safeParseFloat(String c) { | ||
17 | + try { | ||
18 | + return Float.parseFloat(c); | ||
19 | + } catch (NumberFormatException e) { | ||
20 | + System.err.println("safeParseFloat(): could not parse <"+c+">"); | ||
21 | + return 0; | ||
22 | + } | ||
23 | + } | ||
24 | +} |
src/main/java/mtas/solr/search/IpiMtasSolrCQLQParserPlugin.java
0 โ 100644
1 | +package mtas.solr.search; | ||
2 | + | ||
3 | +import mtas.solr.handler.stat.MtasGroupQueryHandler; | ||
4 | +import org.apache.solr.common.params.ModifiableSolrParams; | ||
5 | +import org.apache.solr.common.params.SolrParams; | ||
6 | +import org.apache.solr.common.util.NamedList; | ||
7 | +import org.apache.solr.request.SolrQueryRequest; | ||
8 | +import org.apache.solr.search.QParser; | ||
9 | +import org.apache.solr.search.QParserPlugin; | ||
10 | + | ||
11 | +/** | ||
12 | + * The Class MtasSolrCQLQParserPlugin. | ||
13 | + */ | ||
14 | +public class IpiMtasSolrCQLQParserPlugin extends MtasSolrCQLQParserPlugin { | ||
15 | + | ||
16 | + @Override | ||
17 | + public QParser createParser(String qstr, SolrParams localParams, | ||
18 | + SolrParams params, SolrQueryRequest req) { | ||
19 | + | ||
20 | + //Remove grouping from query | ||
21 | + ModifiableSolrParams newLocalParams = ModifiableSolrParams.of(localParams); | ||
22 | + String query = newLocalParams.get("query"); | ||
23 | + if (MtasGroupQueryHandler.hasGroupQueryCOmponent(query)) { | ||
24 | + MtasGroupQueryHandler gh = new MtasGroupQueryHandler(); | ||
25 | + gh.handleGroups(-1, query, null, null); | ||
26 | + newLocalParams.set("query", gh.getSimpleQueryText()); | ||
27 | + } | ||
28 | + | ||
29 | + | ||
30 | + | ||
31 | + return new MtasCQLQParser(qstr, newLocalParams, params, req); | ||
32 | + } | ||
33 | + | ||
34 | +} |