diff --git a/src/main/java/mtas/solr/handler/IpiMtasSearchHandler.java b/src/main/java/mtas/solr/handler/IpiMtasSearchHandler.java new file mode 100644 index 0000000..a923191 --- /dev/null +++ b/src/main/java/mtas/solr/handler/IpiMtasSearchHandler.java @@ -0,0 +1,66 @@ +package mtas.solr.handler; + + +import mtas.solr.handler.stat.MtasGroupQueryHandler; +import org.apache.commons.lang3.StringUtils; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.handler.component.SearchHandler; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.SolrQueryResponse; + +public class IpiMtasSearchHandler extends SearchHandler { + + + @Override + public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { + transformParams(req); + super.handleRequestBody(req, rsp); + } + + private void transformParams(SolrQueryRequest req) { + SolrParams params = req.getParams(); + ModifiableSolrParams newParams = ModifiableSolrParams.of(params); + + if (!isMtasRequest(params)) { + return; + } + + handleGroups(newParams); + + + req.setParams(newParams); + } + private boolean isMtasRequest(SolrParams params) { + return Boolean.TRUE.equals(params.getBool("mtas")); + } + + private void handleGroups(ModifiableSolrParams params) { + int groupIndex = handleGroupsInListQueries(0, params); + + } + + + private int handleGroupsInListQueries(int groupIndex, ModifiableSolrParams params) { + for(int i=0; i<100 ;i++){ + String queryParamName = "mtas.list."+i+".query.value"; + String queryText = params.get(queryParamName); + if(StringUtils.isBlank(queryText)){ + return groupIndex; + } + if (!MtasGroupQueryHandler.hasGroupQueryCOmponent(queryText)) { + continue; + } + String mtasField = params.get("mtas.list."+i+".field"); + MtasGroupQueryHandler handler = new MtasGroupQueryHandler(); + if(handler.handleGroups(groupIndex, queryText, mtasField, params)){ + groupIndex++; + } + params.set(queryParamName, handler.getSimpleQueryText()); + + } + return groupIndex; + } + + +} diff --git a/src/main/java/mtas/solr/handler/stat/MtasGroupQueryHandler.java b/src/main/java/mtas/solr/handler/stat/MtasGroupQueryHandler.java new file mode 100644 index 0000000..3ca9c08 --- /dev/null +++ b/src/main/java/mtas/solr/handler/stat/MtasGroupQueryHandler.java @@ -0,0 +1,281 @@ +package mtas.solr.handler.stat; + +import org.apache.commons.lang3.StringUtils; +import org.apache.solr.common.params.ModifiableSolrParams; + +public class MtasGroupQueryHandler { + int countLimit = 1000; + int displLimit = 10; + int occurLimit = 1; + String sortOrder = null; + String groupby; + String leftGroupby, rightGroupby; + int maxCount = 0; + private String simpleQueryText; + private String groupText; + + public boolean handleGroups(int groupIndex, String queryText, String mtasField, ModifiableSolrParams params) { + + this.simpleQueryText = queryText; + if (!hasGroupQueryCOmponent(queryText)) { + return false; + } + + String groupByString = " group by "; + int ind = queryText.indexOf(groupByString); + if (ind < 0) { + return false; + } + this.groupText = queryText.substring(ind + groupByString.length()); + this.simpleQueryText = queryText.substring(0, ind); + + if (groupIndex < 0) { + return false; + } + + try { + + groupby = stripModifiers(groupText); + params.set("mtas.group", true); + params.set("mtas.group."+groupIndex+".query.type", "cql"); + params.set("mtas.group."+groupIndex+".number", displLimit); + params.set("rows", 0); + params.set("mtas.group."+groupIndex+".query.value", simpleQueryText); + + params.set("mtas.group."+groupIndex+".field", mtasField); + + int i = groupby.indexOf(';'); + if (i != -1) { + leftGroupby = groupby.substring(0, i).trim(); + rightGroupby = groupby.substring(i+1).trim(); + //TODO advanced collocations + } else { + leftGroupby = groupby; + rightGroupby = ""; + + + String[] split = groupby.split(" *, *"); + for (int gi = 0; gi < split.length; gi++) { + + String[] h = split[gi].split("\\."); + + + int segno; + String type; + + if (h.length == 2) { + segno = Math.max(0, Util.safeParseInt(h[0], 1) - 1); + + type = h[1]; + } else if (h.length == 1) { + segno = 0; + type = h[0]; + }else{ + continue; + } + + params.set("mtas.group."+groupIndex+".grouping.hit.insideLeft."+gi+".position", segno); + params.set("mtas.group."+groupIndex+".grouping.hit.insideLeft."+gi+".prefixes", type); + + + } + + } + + } catch (StatQueryException e) { + e.printStackTrace(); + return false; + } + return true; + } + + public static boolean hasGroupQueryCOmponent(String query) { + return StringUtils.isNotBlank(query) && query.toLowerCase().contains(" group by "); + } + + + + + private int modifierEndPosition(String groupby, int start) + throws StatQueryException + { + boolean checkNumber = false, checkAll = false; + int offset = -1; + int len = groupby.length(); + + if (groupby.regionMatches(start, " display ", 0, 9)) { + checkNumber = true; + offset = 9; + } else if (groupby.regionMatches(start, " min ", 0, 5)) { + checkNumber = true; + offset = 5; + } else if (groupby.regionMatches(start, " count ", 0, 7)) { + checkNumber = true; + checkAll = true; + offset = 7; + } else if (groupby.regionMatches(start, " sort ", 0, 6)) { + offset = 6; + } else { + throw new StatQueryException(); + } // failed to parse + + offset += start; + if (checkNumber) { + if (checkAll && groupby.regionMatches(offset, "all", 0, 3)) { + return offset + 3; + } + while (offset < len && Character.isDigit(groupby.charAt(offset))) { + offset++; + } + return offset; + } + // sort modifier, check possible cases + if (groupby.regionMatches(offset, "a fronte", 0, 8)) { + return offset + 8; + } + if (groupby.regionMatches(offset, "a tergo", 0, 7)) { + return offset + 7; + } + if (groupby.regionMatches(offset, "by ", 0, 3)) { + offset += 3; + if (groupby.regionMatches(offset, "freq", 0, 4)) { + return offset + 4; + } + if (groupby.regionMatches(offset, "maxcp", 0, 5)) { + return offset + 5; + } + if (groupby.regionMatches(offset, "dice", 0, 4)) { + return offset + 4; + } + if (groupby.regionMatches(offset, "cp", 0, 2)) { + return offset + 2; + } + if (groupby.regionMatches(offset, "scp bias ", 0, 9)) { + offset += 9; + while (offset < len && groupby.charAt(offset) != ' ') { + offset++; + } + return offset; + } + if (groupby.regionMatches(offset, "scp", 0, 3)) { + return offset + 3; + } + } + throw new StatQueryException(); + } + + + String stripModifiers(String groupby) throws StatQueryException { + int i, end; + + i = groupby.indexOf(" display "); + if (i != -1) { + end = modifierEndPosition(groupby, i); + displLimit = Util.safeParseInt(groupby.substring(i + 9, end), displLimit); + groupby = groupby.substring(0, i) + groupby.substring(end); + } + + i = groupby.indexOf(" count "); + if (i != -1) { + end = modifierEndPosition(groupby, i); + countLimit = Util.safeParseInt(groupby.substring(i + 7, end), countLimit); + groupby = groupby.substring(0, i) + groupby.substring(end); + } + + i = groupby.indexOf(" min "); + if (i != -1) { + end = modifierEndPosition(groupby, i); + occurLimit = Util.safeParseInt(groupby.substring(i + 5, end), occurLimit); + groupby = groupby.substring(0, i) + groupby.substring(end); + } + + i = groupby.indexOf(" sort "); + if (i != -1) { + end = modifierEndPosition(groupby, i); + sortOrder = (groupby.substring(i + 6, end)); + groupby = groupby.substring(0, i) + groupby.substring(end); + } + + return groupby; + } + + public int getCountLimit() { + return countLimit; + } + + public void setCountLimit(int countLimit) { + this.countLimit = countLimit; + } + + public int getDisplLimit() { + return displLimit; + } + + public void setDisplLimit(int displLimit) { + this.displLimit = displLimit; + } + + public int getOccurLimit() { + return occurLimit; + } + + public void setOccurLimit(int occurLimit) { + this.occurLimit = occurLimit; + } + + public String getSortOrder() { + return sortOrder; + } + + public void setSortOrder(String sortOrder) { + this.sortOrder = sortOrder; + } + + public String getGroupby() { + return groupby; + } + + public void setGroupby(String groupby) { + this.groupby = groupby; + } + + public String getLeftGroupby() { + return leftGroupby; + } + + public void setLeftGroupby(String leftGroupby) { + this.leftGroupby = leftGroupby; + } + + public String getRightGroupby() { + return rightGroupby; + } + + public void setRightGroupby(String rightGroupby) { + this.rightGroupby = rightGroupby; + } + + public int getMaxCount() { + return maxCount; + } + + public void setMaxCount(int maxCount) { + this.maxCount = maxCount; + } + + public String getSimpleQueryText() { + return simpleQueryText; + } + + public void setSimpleQueryText(String simpleQueryText) { + this.simpleQueryText = simpleQueryText; + } + + public String getGroupText() { + return groupText; + } + + public void setGroupText(String groupText) { + this.groupText = groupText; + } +} diff --git a/src/main/java/mtas/solr/handler/stat/StatQueryException.java b/src/main/java/mtas/solr/handler/stat/StatQueryException.java new file mode 100644 index 0000000..3e913a8 --- /dev/null +++ b/src/main/java/mtas/solr/handler/stat/StatQueryException.java @@ -0,0 +1,32 @@ +/* + * This file is part of the Poliqarp suite. + * + * Copyright (C) 2004-2009 by Instytut Podstaw Informatyki Polskiej + * Akademii Nauk (IPI PAN; Institute of Computer Science, Polish + * Academy of Sciences; cf. www.ipipan.waw.pl). All rights reserved. + * + * This file may be distributed and/or modified under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation and appearing in the file gpl.txt included in the packaging + * of this file. (See http://www.gnu.org/licenses/translations.html for + * unofficial translations.) + * + * A commercial license is available from IPI PAN (contact + * Michal.Ciesiolka@ipipan.waw.pl or ipi@ipipan.waw.pl for more + * information). Licensees holding a valid commercial license from IPI + * PAN may use this file in accordance with that license. + * + * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING + * THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE. + */ + +package mtas.solr.handler.stat; + +/** + * This exception is thrown by query-related methods to indicate error + * conditions. + */ +public class StatQueryException extends Exception +{ +} diff --git a/src/main/java/mtas/solr/handler/stat/Util.java b/src/main/java/mtas/solr/handler/stat/Util.java new file mode 100644 index 0000000..d65683e --- /dev/null +++ b/src/main/java/mtas/solr/handler/stat/Util.java @@ -0,0 +1,24 @@ +package mtas.solr.handler.stat; + +public class Util { + static public int safeParseInt(String c, int def) { + if (c.equals("all")) + return -1; + else + try { + return Integer.parseInt(c); + } catch (NumberFormatException e) { + System.err.println("safeParseInt(): could not parse <"+c+">"); + return def; + } + } + + static public float safeParseFloat(String c) { + try { + return Float.parseFloat(c); + } catch (NumberFormatException e) { + System.err.println("safeParseFloat(): could not parse <"+c+">"); + return 0; + } + } +} diff --git a/src/main/java/mtas/solr/search/IpiMtasSolrCQLQParserPlugin.java b/src/main/java/mtas/solr/search/IpiMtasSolrCQLQParserPlugin.java new file mode 100644 index 0000000..3084b9a --- /dev/null +++ b/src/main/java/mtas/solr/search/IpiMtasSolrCQLQParserPlugin.java @@ -0,0 +1,34 @@ +package mtas.solr.search; + +import mtas.solr.handler.stat.MtasGroupQueryHandler; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QParserPlugin; + +/** + * The Class MtasSolrCQLQParserPlugin. + */ +public class IpiMtasSolrCQLQParserPlugin extends MtasSolrCQLQParserPlugin { + + @Override + public QParser createParser(String qstr, SolrParams localParams, + SolrParams params, SolrQueryRequest req) { + + //Remove grouping from query + ModifiableSolrParams newLocalParams = ModifiableSolrParams.of(localParams); + String query = newLocalParams.get("query"); + if (MtasGroupQueryHandler.hasGroupQueryCOmponent(query)) { + MtasGroupQueryHandler gh = new MtasGroupQueryHandler(); + gh.handleGroups(-1, query, null, null); + newLocalParams.set("query", gh.getSimpleQueryText()); + } + + + + return new MtasCQLQParser(qstr, newLocalParams, params, req); + } + +}