Commit 6a180233e802814f169c8054d3b0a6ffd2315ae3
1 parent
ec0be433
grouping in cql list query
Showing
5 changed files
with
437 additions
and
0 deletions
src/main/java/mtas/solr/handler/IpiMtasSearchHandler.java
0 → 100644
1 | +package mtas.solr.handler; | |
2 | + | |
3 | + | |
4 | +import mtas.solr.handler.stat.MtasGroupQueryHandler; | |
5 | +import org.apache.commons.lang3.StringUtils; | |
6 | +import org.apache.solr.common.params.ModifiableSolrParams; | |
7 | +import org.apache.solr.common.params.SolrParams; | |
8 | +import org.apache.solr.handler.component.SearchHandler; | |
9 | +import org.apache.solr.request.SolrQueryRequest; | |
10 | +import org.apache.solr.response.SolrQueryResponse; | |
11 | + | |
12 | +public class IpiMtasSearchHandler extends SearchHandler { | |
13 | + | |
14 | + | |
15 | + @Override | |
16 | + public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { | |
17 | + transformParams(req); | |
18 | + super.handleRequestBody(req, rsp); | |
19 | + } | |
20 | + | |
21 | + private void transformParams(SolrQueryRequest req) { | |
22 | + SolrParams params = req.getParams(); | |
23 | + ModifiableSolrParams newParams = ModifiableSolrParams.of(params); | |
24 | + | |
25 | + if (!isMtasRequest(params)) { | |
26 | + return; | |
27 | + } | |
28 | + | |
29 | + handleGroups(newParams); | |
30 | + | |
31 | + | |
32 | + req.setParams(newParams); | |
33 | + } | |
34 | + private boolean isMtasRequest(SolrParams params) { | |
35 | + return Boolean.TRUE.equals(params.getBool("mtas")); | |
36 | + } | |
37 | + | |
38 | + private void handleGroups(ModifiableSolrParams params) { | |
39 | + int groupIndex = handleGroupsInListQueries(0, params); | |
40 | + | |
41 | + } | |
42 | + | |
43 | + | |
44 | + private int handleGroupsInListQueries(int groupIndex, ModifiableSolrParams params) { | |
45 | + for(int i=0; i<100 ;i++){ | |
46 | + String queryParamName = "mtas.list."+i+".query.value"; | |
47 | + String queryText = params.get(queryParamName); | |
48 | + if(StringUtils.isBlank(queryText)){ | |
49 | + return groupIndex; | |
50 | + } | |
51 | + if (!MtasGroupQueryHandler.hasGroupQueryCOmponent(queryText)) { | |
52 | + continue; | |
53 | + } | |
54 | + String mtasField = params.get("mtas.list."+i+".field"); | |
55 | + MtasGroupQueryHandler handler = new MtasGroupQueryHandler(); | |
56 | + if(handler.handleGroups(groupIndex, queryText, mtasField, params)){ | |
57 | + groupIndex++; | |
58 | + } | |
59 | + params.set(queryParamName, handler.getSimpleQueryText()); | |
60 | + | |
61 | + } | |
62 | + return groupIndex; | |
63 | + } | |
64 | + | |
65 | + | |
66 | +} | |
... | ... |
src/main/java/mtas/solr/handler/stat/MtasGroupQueryHandler.java
0 → 100644
1 | +package mtas.solr.handler.stat; | |
2 | + | |
3 | +import org.apache.commons.lang3.StringUtils; | |
4 | +import org.apache.solr.common.params.ModifiableSolrParams; | |
5 | + | |
6 | +public class MtasGroupQueryHandler { | |
7 | + int countLimit = 1000; | |
8 | + int displLimit = 10; | |
9 | + int occurLimit = 1; | |
10 | + String sortOrder = null; | |
11 | + String groupby; | |
12 | + String leftGroupby, rightGroupby; | |
13 | + int maxCount = 0; | |
14 | + private String simpleQueryText; | |
15 | + private String groupText; | |
16 | + | |
17 | + public boolean handleGroups(int groupIndex, String queryText, String mtasField, ModifiableSolrParams params) { | |
18 | + | |
19 | + this.simpleQueryText = queryText; | |
20 | + if (!hasGroupQueryCOmponent(queryText)) { | |
21 | + return false; | |
22 | + } | |
23 | + | |
24 | + String groupByString = " group by "; | |
25 | + int ind = queryText.indexOf(groupByString); | |
26 | + if (ind < 0) { | |
27 | + return false; | |
28 | + } | |
29 | + this.groupText = queryText.substring(ind + groupByString.length()); | |
30 | + this.simpleQueryText = queryText.substring(0, ind); | |
31 | + | |
32 | + if (groupIndex < 0) { | |
33 | + return false; | |
34 | + } | |
35 | + | |
36 | + try { | |
37 | + | |
38 | + groupby = stripModifiers(groupText); | |
39 | + params.set("mtas.group", true); | |
40 | + params.set("mtas.group."+groupIndex+".query.type", "cql"); | |
41 | + params.set("mtas.group."+groupIndex+".number", displLimit); | |
42 | + params.set("rows", 0); | |
43 | + params.set("mtas.group."+groupIndex+".query.value", simpleQueryText); | |
44 | + | |
45 | + params.set("mtas.group."+groupIndex+".field", mtasField); | |
46 | + | |
47 | + int i = groupby.indexOf(';'); | |
48 | + if (i != -1) { | |
49 | + leftGroupby = groupby.substring(0, i).trim(); | |
50 | + rightGroupby = groupby.substring(i+1).trim(); | |
51 | + //TODO advanced collocations | |
52 | + } else { | |
53 | + leftGroupby = groupby; | |
54 | + rightGroupby = ""; | |
55 | + | |
56 | + | |
57 | + String[] split = groupby.split(" *, *"); | |
58 | + for (int gi = 0; gi < split.length; gi++) { | |
59 | + | |
60 | + String[] h = split[gi].split("\\."); | |
61 | + | |
62 | + | |
63 | + int segno; | |
64 | + String type; | |
65 | + | |
66 | + if (h.length == 2) { | |
67 | + segno = Math.max(0, Util.safeParseInt(h[0], 1) - 1); | |
68 | + | |
69 | + type = h[1]; | |
70 | + } else if (h.length == 1) { | |
71 | + segno = 0; | |
72 | + type = h[0]; | |
73 | + }else{ | |
74 | + continue; | |
75 | + } | |
76 | + | |
77 | + params.set("mtas.group."+groupIndex+".grouping.hit.insideLeft."+gi+".position", segno); | |
78 | + params.set("mtas.group."+groupIndex+".grouping.hit.insideLeft."+gi+".prefixes", type); | |
79 | + | |
80 | + | |
81 | + } | |
82 | + | |
83 | + } | |
84 | + | |
85 | + } catch (StatQueryException e) { | |
86 | + e.printStackTrace(); | |
87 | + return false; | |
88 | + } | |
89 | + return true; | |
90 | + } | |
91 | + | |
92 | + public static boolean hasGroupQueryCOmponent(String query) { | |
93 | + return StringUtils.isNotBlank(query) && query.toLowerCase().contains(" group by "); | |
94 | + } | |
95 | + | |
96 | + | |
97 | + | |
98 | + | |
99 | + private int modifierEndPosition(String groupby, int start) | |
100 | + throws StatQueryException | |
101 | + { | |
102 | + boolean checkNumber = false, checkAll = false; | |
103 | + int offset = -1; | |
104 | + int len = groupby.length(); | |
105 | + | |
106 | + if (groupby.regionMatches(start, " display ", 0, 9)) { | |
107 | + checkNumber = true; | |
108 | + offset = 9; | |
109 | + } else if (groupby.regionMatches(start, " min ", 0, 5)) { | |
110 | + checkNumber = true; | |
111 | + offset = 5; | |
112 | + } else if (groupby.regionMatches(start, " count ", 0, 7)) { | |
113 | + checkNumber = true; | |
114 | + checkAll = true; | |
115 | + offset = 7; | |
116 | + } else if (groupby.regionMatches(start, " sort ", 0, 6)) { | |
117 | + offset = 6; | |
118 | + } else { | |
119 | + throw new StatQueryException(); | |
120 | + } // failed to parse | |
121 | + | |
122 | + offset += start; | |
123 | + if (checkNumber) { | |
124 | + if (checkAll && groupby.regionMatches(offset, "all", 0, 3)) { | |
125 | + return offset + 3; | |
126 | + } | |
127 | + while (offset < len && Character.isDigit(groupby.charAt(offset))) { | |
128 | + offset++; | |
129 | + } | |
130 | + return offset; | |
131 | + } | |
132 | + // sort modifier, check possible cases | |
133 | + if (groupby.regionMatches(offset, "a fronte", 0, 8)) { | |
134 | + return offset + 8; | |
135 | + } | |
136 | + if (groupby.regionMatches(offset, "a tergo", 0, 7)) { | |
137 | + return offset + 7; | |
138 | + } | |
139 | + if (groupby.regionMatches(offset, "by ", 0, 3)) { | |
140 | + offset += 3; | |
141 | + if (groupby.regionMatches(offset, "freq", 0, 4)) { | |
142 | + return offset + 4; | |
143 | + } | |
144 | + if (groupby.regionMatches(offset, "maxcp", 0, 5)) { | |
145 | + return offset + 5; | |
146 | + } | |
147 | + if (groupby.regionMatches(offset, "dice", 0, 4)) { | |
148 | + return offset + 4; | |
149 | + } | |
150 | + if (groupby.regionMatches(offset, "cp", 0, 2)) { | |
151 | + return offset + 2; | |
152 | + } | |
153 | + if (groupby.regionMatches(offset, "scp bias ", 0, 9)) { | |
154 | + offset += 9; | |
155 | + while (offset < len && groupby.charAt(offset) != ' ') { | |
156 | + offset++; | |
157 | + } | |
158 | + return offset; | |
159 | + } | |
160 | + if (groupby.regionMatches(offset, "scp", 0, 3)) { | |
161 | + return offset + 3; | |
162 | + } | |
163 | + } | |
164 | + throw new StatQueryException(); | |
165 | + } | |
166 | + | |
167 | + | |
168 | + String stripModifiers(String groupby) throws StatQueryException { | |
169 | + int i, end; | |
170 | + | |
171 | + i = groupby.indexOf(" display "); | |
172 | + if (i != -1) { | |
173 | + end = modifierEndPosition(groupby, i); | |
174 | + displLimit = Util.safeParseInt(groupby.substring(i + 9, end), displLimit); | |
175 | + groupby = groupby.substring(0, i) + groupby.substring(end); | |
176 | + } | |
177 | + | |
178 | + i = groupby.indexOf(" count "); | |
179 | + if (i != -1) { | |
180 | + end = modifierEndPosition(groupby, i); | |
181 | + countLimit = Util.safeParseInt(groupby.substring(i + 7, end), countLimit); | |
182 | + groupby = groupby.substring(0, i) + groupby.substring(end); | |
183 | + } | |
184 | + | |
185 | + i = groupby.indexOf(" min "); | |
186 | + if (i != -1) { | |
187 | + end = modifierEndPosition(groupby, i); | |
188 | + occurLimit = Util.safeParseInt(groupby.substring(i + 5, end), occurLimit); | |
189 | + groupby = groupby.substring(0, i) + groupby.substring(end); | |
190 | + } | |
191 | + | |
192 | + i = groupby.indexOf(" sort "); | |
193 | + if (i != -1) { | |
194 | + end = modifierEndPosition(groupby, i); | |
195 | + sortOrder = (groupby.substring(i + 6, end)); | |
196 | + groupby = groupby.substring(0, i) + groupby.substring(end); | |
197 | + } | |
198 | + | |
199 | + return groupby; | |
200 | + } | |
201 | + | |
202 | + public int getCountLimit() { | |
203 | + return countLimit; | |
204 | + } | |
205 | + | |
206 | + public void setCountLimit(int countLimit) { | |
207 | + this.countLimit = countLimit; | |
208 | + } | |
209 | + | |
210 | + public int getDisplLimit() { | |
211 | + return displLimit; | |
212 | + } | |
213 | + | |
214 | + public void setDisplLimit(int displLimit) { | |
215 | + this.displLimit = displLimit; | |
216 | + } | |
217 | + | |
218 | + public int getOccurLimit() { | |
219 | + return occurLimit; | |
220 | + } | |
221 | + | |
222 | + public void setOccurLimit(int occurLimit) { | |
223 | + this.occurLimit = occurLimit; | |
224 | + } | |
225 | + | |
226 | + public String getSortOrder() { | |
227 | + return sortOrder; | |
228 | + } | |
229 | + | |
230 | + public void setSortOrder(String sortOrder) { | |
231 | + this.sortOrder = sortOrder; | |
232 | + } | |
233 | + | |
234 | + public String getGroupby() { | |
235 | + return groupby; | |
236 | + } | |
237 | + | |
238 | + public void setGroupby(String groupby) { | |
239 | + this.groupby = groupby; | |
240 | + } | |
241 | + | |
242 | + public String getLeftGroupby() { | |
243 | + return leftGroupby; | |
244 | + } | |
245 | + | |
246 | + public void setLeftGroupby(String leftGroupby) { | |
247 | + this.leftGroupby = leftGroupby; | |
248 | + } | |
249 | + | |
250 | + public String getRightGroupby() { | |
251 | + return rightGroupby; | |
252 | + } | |
253 | + | |
254 | + public void setRightGroupby(String rightGroupby) { | |
255 | + this.rightGroupby = rightGroupby; | |
256 | + } | |
257 | + | |
258 | + public int getMaxCount() { | |
259 | + return maxCount; | |
260 | + } | |
261 | + | |
262 | + public void setMaxCount(int maxCount) { | |
263 | + this.maxCount = maxCount; | |
264 | + } | |
265 | + | |
266 | + public String getSimpleQueryText() { | |
267 | + return simpleQueryText; | |
268 | + } | |
269 | + | |
270 | + public void setSimpleQueryText(String simpleQueryText) { | |
271 | + this.simpleQueryText = simpleQueryText; | |
272 | + } | |
273 | + | |
274 | + public String getGroupText() { | |
275 | + return groupText; | |
276 | + } | |
277 | + | |
278 | + public void setGroupText(String groupText) { | |
279 | + this.groupText = groupText; | |
280 | + } | |
281 | +} | |
... | ... |
src/main/java/mtas/solr/handler/stat/StatQueryException.java
0 → 100644
1 | +/* | |
2 | + * This file is part of the Poliqarp suite. | |
3 | + * | |
4 | + * Copyright (C) 2004-2009 by Instytut Podstaw Informatyki Polskiej | |
5 | + * Akademii Nauk (IPI PAN; Institute of Computer Science, Polish | |
6 | + * Academy of Sciences; cf. www.ipipan.waw.pl). All rights reserved. | |
7 | + * | |
8 | + * This file may be distributed and/or modified under the terms of the | |
9 | + * GNU General Public License version 2 as published by the Free Software | |
10 | + * Foundation and appearing in the file gpl.txt included in the packaging | |
11 | + * of this file. (See http://www.gnu.org/licenses/translations.html for | |
12 | + * unofficial translations.) | |
13 | + * | |
14 | + * A commercial license is available from IPI PAN (contact | |
15 | + * Michal.Ciesiolka@ipipan.waw.pl or ipi@ipipan.waw.pl for more | |
16 | + * information). Licensees holding a valid commercial license from IPI | |
17 | + * PAN may use this file in accordance with that license. | |
18 | + * | |
19 | + * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING | |
20 | + * THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
21 | + * PURPOSE. | |
22 | + */ | |
23 | + | |
24 | +package mtas.solr.handler.stat; | |
25 | + | |
26 | +/** | |
27 | + * This exception is thrown by query-related methods to indicate error | |
28 | + * conditions. | |
29 | + */ | |
30 | +public class StatQueryException extends Exception | |
31 | +{ | |
32 | +} | |
... | ... |
src/main/java/mtas/solr/handler/stat/Util.java
0 → 100644
1 | +package mtas.solr.handler.stat; | |
2 | + | |
3 | +public class Util { | |
4 | + static public int safeParseInt(String c, int def) { | |
5 | + if (c.equals("all")) | |
6 | + return -1; | |
7 | + else | |
8 | + try { | |
9 | + return Integer.parseInt(c); | |
10 | + } catch (NumberFormatException e) { | |
11 | + System.err.println("safeParseInt(): could not parse <"+c+">"); | |
12 | + return def; | |
13 | + } | |
14 | + } | |
15 | + | |
16 | + static public float safeParseFloat(String c) { | |
17 | + try { | |
18 | + return Float.parseFloat(c); | |
19 | + } catch (NumberFormatException e) { | |
20 | + System.err.println("safeParseFloat(): could not parse <"+c+">"); | |
21 | + return 0; | |
22 | + } | |
23 | + } | |
24 | +} | |
... | ... |
src/main/java/mtas/solr/search/IpiMtasSolrCQLQParserPlugin.java
0 → 100644
1 | +package mtas.solr.search; | |
2 | + | |
3 | +import mtas.solr.handler.stat.MtasGroupQueryHandler; | |
4 | +import org.apache.solr.common.params.ModifiableSolrParams; | |
5 | +import org.apache.solr.common.params.SolrParams; | |
6 | +import org.apache.solr.common.util.NamedList; | |
7 | +import org.apache.solr.request.SolrQueryRequest; | |
8 | +import org.apache.solr.search.QParser; | |
9 | +import org.apache.solr.search.QParserPlugin; | |
10 | + | |
11 | +/** | |
12 | + * The Class MtasSolrCQLQParserPlugin. | |
13 | + */ | |
14 | +public class IpiMtasSolrCQLQParserPlugin extends MtasSolrCQLQParserPlugin { | |
15 | + | |
16 | + @Override | |
17 | + public QParser createParser(String qstr, SolrParams localParams, | |
18 | + SolrParams params, SolrQueryRequest req) { | |
19 | + | |
20 | + //Remove grouping from query | |
21 | + ModifiableSolrParams newLocalParams = ModifiableSolrParams.of(localParams); | |
22 | + String query = newLocalParams.get("query"); | |
23 | + if (MtasGroupQueryHandler.hasGroupQueryCOmponent(query)) { | |
24 | + MtasGroupQueryHandler gh = new MtasGroupQueryHandler(); | |
25 | + gh.handleGroups(-1, query, null, null); | |
26 | + newLocalParams.set("query", gh.getSimpleQueryText()); | |
27 | + } | |
28 | + | |
29 | + | |
30 | + | |
31 | + return new MtasCQLQParser(qstr, newLocalParams, params, req); | |
32 | + } | |
33 | + | |
34 | +} | |
... | ... |