CodecUtil.java.html 18.3 KB
<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" lang="en"><head><meta http-equiv="Content-Type" content="text/html;charset=UTF-8"/><link rel="stylesheet" href="../jacoco-resources/report.css" type="text/css"/><link rel="shortcut icon" href="../jacoco-resources/report.gif" type="image/gif"/><title>CodecUtil.java</title><link rel="stylesheet" href="../jacoco-resources/prettify.css" type="text/css"/><script type="text/javascript" src="../jacoco-resources/prettify.js"></script></head><body onload="window['PR_TAB_WIDTH']=4;prettyPrint()"><div class="breadcrumb" id="breadcrumb"><span class="info"><a href="../jacoco-sessions.html" class="el_session">Sessions</a></span><a href="../index.html" class="el_report">MTAS</a> &gt; <a href="index.source.html" class="el_package">mtas.codec.util</a> &gt; <span class="el_source">CodecUtil.java</span></div><h1>CodecUtil.java</h1><pre class="source lang-java linenums">package mtas.codec.util;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import mtas.analysis.token.MtasToken;
import mtas.codec.MtasCodecPostingsFormat;
import mtas.parser.function.util.MtasFunctionParserFunction;
import mtas.search.spans.util.MtasSpanQuery;
import mtas.codec.util.CodecComponent.ComponentField;
import mtas.codec.util.CodecComponent.ComponentCollection;

import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.spans.SpanWeight;

/**
 * The Class CodecUtil.
 */
public class CodecUtil {

  /** The Constant STATS_TYPE_GEOMETRICMEAN. */
  public static final String STATS_TYPE_GEOMETRICMEAN = &quot;geometricmean&quot;;

  /** The Constant STATS_TYPE_KURTOSIS. */
  public static final String STATS_TYPE_KURTOSIS = &quot;kurtosis&quot;;

  /** The Constant STATS_TYPE_MAX. */
  public static final String STATS_TYPE_MAX = &quot;max&quot;;

  /** The Constant STATS_TYPE_MEAN. */
  public static final String STATS_TYPE_MEAN = &quot;mean&quot;;

  /** The Constant STATS_TYPE_MIN. */
  public static final String STATS_TYPE_MIN = &quot;min&quot;;

  /** The Constant STATS_TYPE_N. */
  public static final String STATS_TYPE_N = &quot;n&quot;;

  /** The Constant STATS_TYPE_MEDIAN. */
  public static final String STATS_TYPE_MEDIAN = &quot;median&quot;;

  /** The Constant STATS_TYPE_POPULATIONVARIANCE. */
  public static final String STATS_TYPE_POPULATIONVARIANCE = &quot;populationvariance&quot;;

  /** The Constant STATS_TYPE_QUADRATICMEAN. */
  public static final String STATS_TYPE_QUADRATICMEAN = &quot;quadraticmean&quot;;

  /** The Constant STATS_TYPE_SKEWNESS. */
  public static final String STATS_TYPE_SKEWNESS = &quot;skewness&quot;;

  /** The Constant STATS_TYPE_STANDARDDEVIATION. */
  public static final String STATS_TYPE_STANDARDDEVIATION = &quot;standarddeviation&quot;;

  /** The Constant STATS_TYPE_SUM. */
  public static final String STATS_TYPE_SUM = &quot;sum&quot;;

  /** The Constant STATS_TYPE_SUMSQ. */
  public static final String STATS_TYPE_SUMSQ = &quot;sumsq&quot;;

  /** The Constant STATS_TYPE_SUMOFLOGS. */
  public static final String STATS_TYPE_SUMOFLOGS = &quot;sumoflogs&quot;;

  /** The Constant STATS_TYPE_VARIANCE. */
  public static final String STATS_TYPE_VARIANCE = &quot;variance&quot;;

  /** The Constant STATS_TYPE_ALL. */
  public static final String STATS_TYPE_ALL = &quot;all&quot;;

  /** The Constant STATS_FUNCTION_DISTRIBUTION. */
  public static final String STATS_FUNCTION_DISTRIBUTION = &quot;distribution&quot;;

  /** The Constant SORT_TERM. */
  public static final String SORT_TERM = &quot;term&quot;;

  /** The Constant SORT_ASC. */
  public static final String SORT_ASC = &quot;asc&quot;;

  /** The Constant SORT_DESC. */
  public static final String SORT_DESC = &quot;desc&quot;;

  /** The Constant STATS_FUNCTIONS. */
<span class="fc" id="L92">  private static final List&lt;String&gt; STATS_FUNCTIONS = Arrays</span>
<span class="fc" id="L93">      .asList(STATS_FUNCTION_DISTRIBUTION);</span>

  /** The Constant STATS_TYPES. */
<span class="fc" id="L96">  private static final List&lt;String&gt; STATS_TYPES = Arrays.asList(</span>
<span class="fc" id="L97">      STATS_TYPE_GEOMETRICMEAN, STATS_TYPE_KURTOSIS, STATS_TYPE_MAX,</span>
<span class="fc" id="L98">      STATS_TYPE_MEAN, STATS_TYPE_MIN, STATS_TYPE_N, STATS_TYPE_MEDIAN,</span>
<span class="fc" id="L99">      STATS_TYPE_POPULATIONVARIANCE, STATS_TYPE_QUADRATICMEAN,</span>
<span class="fc" id="L100">      STATS_TYPE_SKEWNESS, STATS_TYPE_STANDARDDEVIATION, STATS_TYPE_SUM,</span>
<span class="fc" id="L101">      STATS_TYPE_SUMSQ, STATS_TYPE_SUMOFLOGS, STATS_TYPE_VARIANCE);</span>

  /** The Constant STATS_BASIC_TYPES. */
<span class="fc" id="L104">  private static final List&lt;String&gt; STATS_BASIC_TYPES = Arrays</span>
<span class="fc" id="L105">      .asList(STATS_TYPE_N, STATS_TYPE_SUM, STATS_TYPE_MEAN);</span>

  /** The Constant STATS_ADVANCED_TYPES. */
<span class="fc" id="L108">  private static final List&lt;String&gt; STATS_ADVANCED_TYPES = Arrays.asList(</span>
<span class="fc" id="L109">      STATS_TYPE_MAX, STATS_TYPE_MIN, STATS_TYPE_SUMSQ, STATS_TYPE_SUMOFLOGS,</span>
<span class="fc" id="L110">      STATS_TYPE_GEOMETRICMEAN, STATS_TYPE_STANDARDDEVIATION,</span>
<span class="fc" id="L111">      STATS_TYPE_VARIANCE, STATS_TYPE_POPULATIONVARIANCE,</span>
<span class="fc" id="L112">      STATS_TYPE_QUADRATICMEAN);</span>

  /** The Constant STATS_FULL_TYPES. */
<span class="fc" id="L115">  private static final List&lt;String&gt; STATS_FULL_TYPES = Arrays</span>
<span class="fc" id="L116">      .asList(STATS_TYPE_KURTOSIS, STATS_TYPE_MEDIAN, STATS_TYPE_SKEWNESS);</span>

  /** The Constant STATS_BASIC. */
  public static final String STATS_BASIC = &quot;basic&quot;;

  /** The Constant STATS_ADVANCED. */
  public static final String STATS_ADVANCED = &quot;advanced&quot;;

  /** The Constant STATS_FULL. */
  public static final String STATS_FULL = &quot;full&quot;;

  /** The Constant DATA_TYPE_LONG. */
  public static final String DATA_TYPE_LONG = &quot;long&quot;;

  /** The Constant DATA_TYPE_DOUBLE. */
  public static final String DATA_TYPE_DOUBLE = &quot;double&quot;;

  /** The fp stats items. */
<span class="fc" id="L134">  private static Pattern fpStatsItems = Pattern</span>
<span class="fc" id="L135">      .compile(&quot;(([^\\(,]+)(\\([^\\)]*\\))?)&quot;);</span>

  /** The fp stats function items. */
<span class="fc" id="L138">  private static Pattern fpStatsFunctionItems = Pattern</span>
<span class="fc" id="L139">      .compile(&quot;(([^\\(,]+)(\\(([^\\)]*)\\)))&quot;);</span>

  /**
   * Instantiates a new codec util.
   */
<span class="nc" id="L144">  private CodecUtil() {</span>
    // don't do anything
<span class="nc" id="L146">  }</span>

  /**
   * Checks if is single position prefix.
   *
   * @param fieldInfo
   *          the field info
   * @param prefix
   *          the prefix
   * @return true, if is single position prefix
   * @throws IOException
   *           Signals that an I/O exception has occurred.
   */
  public static boolean isSinglePositionPrefix(FieldInfo fieldInfo,
      String prefix) throws IOException {
<span class="pc bpc" id="L161" title="1 of 2 branches missed.">    if (fieldInfo == null) {</span>
<span class="nc" id="L162">      throw new IOException(&quot;no fieldInfo&quot;);</span>
    } else {
<span class="fc" id="L164">      String info = fieldInfo.getAttribute(</span>
<span class="fc" id="L165">          MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION);</span>
<span class="pc bpc" id="L166" title="1 of 2 branches missed.">      if (info == null) {</span>
<span class="nc" id="L167">        throw new IOException(&quot;no &quot;</span>
            + MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION);
      } else {
<span class="fc" id="L170">        return Arrays.asList(info.split(Pattern.quote(MtasToken.DELIMITER)))</span>
<span class="fc" id="L171">            .contains(prefix);</span>
      }
    }
  }

  /**
   * Term value.
   *
   * @param term
   *          the term
   * @return the string
   */
  public static String termValue(String term) {
<span class="nc" id="L184">    int i = term.indexOf(MtasToken.DELIMITER);</span>
<span class="nc" id="L185">    String value = null;</span>
<span class="nc bnc" id="L186" title="All 2 branches missed.">    if (i &gt;= 0) {</span>
<span class="nc" id="L187">      value = term.substring((i + MtasToken.DELIMITER.length()));</span>
<span class="nc bnc" id="L188" title="All 2 branches missed.">      value = (value.length() &gt; 0) ? value : null;</span>
    }
<span class="nc bnc" id="L190" title="All 2 branches missed.">    return (value == null) ? null : value.replace(&quot;\u0000&quot;, &quot;&quot;);</span>
  }

  /**
   * Term prefix.
   *
   * @param term
   *          the term
   * @return the string
   */
  public static String termPrefix(String term) {
<span class="fc" id="L201">    int i = term.indexOf(MtasToken.DELIMITER);</span>
<span class="fc" id="L202">    String prefix = term;</span>
<span class="pc bpc" id="L203" title="1 of 2 branches missed.">    if (i &gt;= 0) {</span>
<span class="fc" id="L204">      prefix = term.substring(0, i);</span>
    }
<span class="fc" id="L206">    return prefix.replace(&quot;\u0000&quot;, &quot;&quot;);</span>
  }

  /**
   * Term prefix value.
   *
   * @param term
   *          the term
   * @return the string
   */
  public static String termPrefixValue(String term) {
<span class="nc bnc" id="L217" title="All 2 branches missed.">    return (term == null) ? null : term.replace(&quot;\u0000&quot;, &quot;&quot;);</span>
  }

  /**
   * Collect field.
   *
   * @param field
   *          the field
   * @param searcher
   *          the searcher
   * @param rawReader
   *          the raw reader
   * @param fullDocList
   *          the full doc list
   * @param fullDocSet
   *          the full doc set
   * @param fieldStats
   *          the field stats
   * @throws IllegalAccessException
   *           the illegal access exception
   * @throws IllegalArgumentException
   *           the illegal argument exception
   * @throws InvocationTargetException
   *           the invocation target exception
   * @throws IOException
   *           Signals that an I/O exception has occurred.
   */
  public static void collectField(String field, IndexSearcher searcher,
      IndexReader rawReader, ArrayList&lt;Integer&gt; fullDocList,
      ArrayList&lt;Integer&gt; fullDocSet, ComponentField fieldStats)
      throws IllegalAccessException, IllegalArgumentException,
      InvocationTargetException, IOException {
<span class="pc bpc" id="L249" title="1 of 2 branches missed.">    if (fieldStats != null) {</span>
<span class="fc" id="L250">      IndexReader reader = searcher.getIndexReader();</span>
<span class="fc" id="L251">      HashMap&lt;MtasSpanQuery, SpanWeight&gt; spansQueryWeight = new HashMap&lt;&gt;();</span>
      // only if spanQueryList is not empty
<span class="fc bfc" id="L253" title="All 2 branches covered.">      if (fieldStats.spanQueryList.size() &gt; 0) {</span>
<span class="fc" id="L254">        final float boost = 0;</span>
<span class="fc bfc" id="L255" title="All 2 branches covered.">        for (MtasSpanQuery sq : fieldStats.spanQueryList) {</span>
<span class="fc" id="L256">          spansQueryWeight.put(sq, ((MtasSpanQuery) sq.rewrite(reader))</span>
<span class="fc" id="L257">              .createWeight(searcher, false, boost));</span>
        }
      }
      // collect
<span class="fc" id="L261">      CodecCollector.collectField(field, searcher, reader, rawReader,</span>
<span class="fc" id="L262">          fullDocList, fullDocSet, fieldStats, spansQueryWeight);</span>
    }
<span class="fc" id="L264">  }</span>

  /**
   * Collect collection.
   *
   * @param reader
   *          the reader
   * @param fullDocSet
   *          the full doc set
   * @param collectionInfo
   *          the collection info
   * @throws IOException
   *           Signals that an I/O exception has occurred.
   */
  public static void collectCollection(IndexReader reader,
      List&lt;Integer&gt; fullDocSet, ComponentCollection collectionInfo)
      throws IOException {
<span class="pc bpc" id="L281" title="1 of 2 branches missed.">    if (collectionInfo != null) {</span>
<span class="fc" id="L282">      CodecCollector.collectCollection(reader, fullDocSet, collectionInfo);</span>
    }
<span class="fc" id="L284">  }</span>

  /**
   * Creates the stats items.
   *
   * @param statsType
   *          the stats type
   * @return the sorted set
   * @throws IOException
   *           Signals that an I/O exception has occurred.
   */
  static SortedSet&lt;String&gt; createStatsItems(String statsType)
      throws IOException {
<span class="fc" id="L297">    SortedSet&lt;String&gt; statsItems = new TreeSet&lt;&gt;();</span>
<span class="fc" id="L298">    SortedSet&lt;String&gt; functionItems = new TreeSet&lt;&gt;();</span>
<span class="fc bfc" id="L299" title="All 2 branches covered.">    if (statsType != null) {</span>
<span class="fc" id="L300">      Matcher m = fpStatsItems.matcher(statsType.trim());</span>
<span class="fc bfc" id="L301" title="All 2 branches covered.">      while (m.find()) {</span>
<span class="fc" id="L302">        String tmpStatsItem = m.group(2).trim();</span>
<span class="fc bfc" id="L303" title="All 2 branches covered.">        if (STATS_TYPES.contains(tmpStatsItem)) {</span>
<span class="fc" id="L304">          statsItems.add(tmpStatsItem);</span>
<span class="pc bpc" id="L305" title="1 of 2 branches missed.">        } else if (tmpStatsItem.equals(STATS_TYPE_ALL)) {</span>
<span class="fc bfc" id="L306" title="All 2 branches covered.">          for (String type : STATS_TYPES) {</span>
<span class="fc" id="L307">            statsItems.add(type);</span>
          }
<span class="pc bnc" id="L309" title="All 2 branches missed.">        } else if (STATS_FUNCTIONS.contains(tmpStatsItem)) {</span>
<span class="nc bnc" id="L310" title="All 2 branches missed.">          if (m.group(3) == null) {</span>
<span class="nc" id="L311">            throw new IOException(&quot;'&quot; + tmpStatsItem + &quot;' should be called as '&quot;</span>
<span class="nc" id="L312">                + tmpStatsItem + &quot;()' with an optional argument&quot;);</span>
          } else {
<span class="nc" id="L314">            functionItems.add(m.group(1).trim());</span>
          }
<span class="nc" id="L316">        } else {</span>
<span class="nc" id="L317">          throw new IOException(&quot;unknown statsType '&quot; + tmpStatsItem + &quot;'&quot;);</span>
        }
      }
    }
<span class="pc bpc" id="L321" title="1 of 4 branches missed.">    if (statsItems.size() == 0 &amp;&amp; functionItems.size() == 0) {</span>
<span class="fc" id="L322">      statsItems.add(STATS_TYPE_SUM);</span>
<span class="fc" id="L323">      statsItems.add(STATS_TYPE_N);</span>
<span class="fc" id="L324">      statsItems.add(STATS_TYPE_MEAN);</span>
    }
<span class="pc bpc" id="L326" title="1 of 2 branches missed.">    if (functionItems.size() &gt; 0) {</span>
<span class="nc" id="L327">      statsItems.addAll(functionItems);</span>
    }
<span class="fc" id="L329">    return statsItems;</span>
  }

  /**
   * Creates the stats type.
   *
   * @param statsItems
   *          the stats items
   * @param sortType
   *          the sort type
   * @param functionParser
   *          the function parser
   * @return the string
   */
  static String createStatsType(Set&lt;String&gt; statsItems, String sortType,
      MtasFunctionParserFunction functionParser) {
<span class="fc" id="L345">    String statsType = STATS_BASIC;</span>
<span class="fc bfc" id="L346" title="All 2 branches covered.">    for (String statsItem : statsItems) {</span>
<span class="fc bfc" id="L347" title="All 2 branches covered.">      if (STATS_FULL_TYPES.contains(statsItem)) {</span>
<span class="fc" id="L348">        statsType = STATS_FULL;</span>
<span class="fc" id="L349">        break;</span>
<span class="fc bfc" id="L350" title="All 2 branches covered.">      } else if (STATS_ADVANCED_TYPES.contains(statsItem)) {</span>
<span class="fc" id="L351">        statsType = STATS_ADVANCED;</span>
<span class="fc bfc" id="L352" title="All 2 branches covered.">      } else if (statsType != STATS_ADVANCED</span>
<span class="pc bpc" id="L353" title="1 of 2 branches missed.">          &amp;&amp; STATS_BASIC_TYPES.contains(statsItem)) {</span>
<span class="fc" id="L354">        statsType = STATS_BASIC;</span>
<span class="fc" id="L355">      } else {</span>
<span class="fc" id="L356">        Matcher m = fpStatsFunctionItems.matcher(statsItem.trim());</span>
<span class="pc bpc" id="L357" title="1 of 2 branches missed.">        if (m.find()) {</span>
<span class="nc bnc" id="L358" title="All 2 branches missed.">          if (STATS_FUNCTIONS.contains(m.group(2).trim())) {</span>
<span class="nc" id="L359">            statsType = STATS_FULL;</span>
<span class="nc" id="L360">            break;</span>
          }
        }
      }
    }
<span class="fc bfc" id="L365" title="All 4 branches covered.">    if (sortType != null &amp;&amp; STATS_TYPES.contains(sortType)) {</span>
<span class="pc bpc" id="L366" title="1 of 2 branches missed.">      if (STATS_FULL_TYPES.contains(sortType)) {</span>
<span class="nc" id="L367">        statsType = STATS_FULL;</span>
<span class="pc bpc" id="L368" title="1 of 2 branches missed.">      } else if (STATS_ADVANCED_TYPES.contains(sortType)) {</span>
<span class="nc bnc" id="L369" title="All 4 branches missed.">        statsType = (statsType == null || statsType != STATS_FULL)</span>
<span class="nc" id="L370">            ? STATS_ADVANCED : statsType;</span>
      }
    }
<span class="fc" id="L373">    return statsType;</span>
  }

  /**
   * Checks if is stats type.
   *
   * @param type
   *          the type
   * @return true, if is stats type
   */
  public static boolean isStatsType(String type) {
<span class="fc" id="L384">    return STATS_TYPES.contains(type);</span>
  }

}
</pre><div class="footer"><span class="right">Created with <a href="http://www.jacoco.org/jacoco">JaCoCo</a> 0.7.9.201702052155</span></div></body></html>