<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" lang="en"><head><meta http-equiv="Content-Type" content="text/html;charset=UTF-8"/><link rel="stylesheet" href="../jacoco-resources/report.css" type="text/css"/><link rel="shortcut icon" href="../jacoco-resources/report.gif" type="image/gif"/><title>MtasTokenizer.java</title><link rel="stylesheet" href="../jacoco-resources/prettify.css" type="text/css"/><script type="text/javascript" src="../jacoco-resources/prettify.js"></script></head><body onload="window['PR_TAB_WIDTH']=4;prettyPrint()"><div class="breadcrumb" id="breadcrumb"><span class="info"><a href="../jacoco-sessions.html" class="el_session">Sessions</a></span><a href="../index.html" class="el_report">MTAS</a> > <a href="index.source.html" class="el_package">mtas.analysis</a> > <span class="el_source">MtasTokenizer.java</span></div><h1>MtasTokenizer.java</h1><pre class="source lang-java linenums">package mtas.analysis;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.Iterator;
import mtas.analysis.parser.MtasParser;
import mtas.analysis.token.MtasToken;
import mtas.analysis.token.MtasTokenCollection;
import mtas.analysis.util.MtasConfigException;
import mtas.analysis.util.MtasConfiguration;
import mtas.analysis.util.MtasParserException;
import mtas.codec.payload.MtasPayloadEncoder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeFactory;
/**
* The Class MtasTokenizer.
*/
public final class MtasTokenizer extends Tokenizer {
/** The Constant log. */
<span class="fc" id="L35"> private static final Log log = LogFactory.getLog(MtasTokenizer.class);</span>
/** The Constant CONFIGURATION_MTAS. */
public static final String CONFIGURATION_MTAS = "mtas";
public static final String CONFIGURATION_MTAS_INDEX = "index";
public static final String CONFIGURATION_MTAS_INDEX_ATTRIBUTE = "index";
public static final String CONFIGURATION_MTAS_PARSER = "parser";
public static final String CONFIGURATION_MTAS_PARSER_ATTRIBUTE = "name";
private static final String VALUE_TRUE = "true";
private static final String VALUE_FALSE = "false";
private static final String VALUE_0 = "0";
private static final String VALUE_1 = "1";
/** The current position. */
<span class="pc" id="L52"> private int currentPosition = 0;</span>
/** The encoding flags. */
<span class="pc" id="L55"> private int encodingFlags = MtasPayloadEncoder.ENCODE_DEFAULT;</span>
/** The parser name. */
<span class="pc" id="L58"> private String parserName = null;</span>
/** The parser configuration. */
<span class="pc" id="L61"> private MtasConfiguration parserConfiguration = null;</span>
/** The token collection. */
private MtasTokenCollection tokenCollection;
/** The term att. */
<span class="pc" id="L67"> private final CharTermAttribute termAtt = addAttribute(</span>
CharTermAttribute.class);
/** The offset att. */
<span class="pc" id="L71"> private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);</span>
/** The payload att. */
<span class="pc" id="L74"> private final PayloadAttribute payloadAtt = addAttribute(</span>
PayloadAttribute.class);
/** The position increment att. */
<span class="pc" id="L78"> private final PositionIncrementAttribute positionIncrementAtt = addAttribute(</span>
PositionIncrementAttribute.class);
/** The token collection iterator. */
private Iterator<MtasToken> tokenCollectionIterator;
/**
* Instantiates a new mtas tokenizer.
*/
<span class="nc" id="L87"> public MtasTokenizer() {</span>
<span class="nc" id="L88"> }</span>
/**
* Instantiates a new mtas tokenizer.
*
* @param configFileName the config file name
*/
<span class="nc" id="L95"> public MtasTokenizer(final String configFileName) {</span>
<span class="nc" id="L96"> readConfigurationFile(configFileName);</span>
<span class="nc" id="L97"> }</span>
/**
* Instantiates a new mtas tokenizer.
*
* @param config the config
* @throws IOException Signals that an I/O exception has occurred.
*/
<span class="nc" id="L105"> public MtasTokenizer(final MtasConfiguration config) throws IOException {</span>
<span class="nc" id="L106"> processConfiguration(config);</span>
<span class="nc" id="L107"> }</span>
/**
* Instantiates a new mtas tokenizer.
*
* @param reader the reader
* @throws IOException Signals that an I/O exception has occurred.
*/
<span class="nc" id="L115"> public MtasTokenizer(final InputStream reader) throws IOException {</span>
<span class="nc" id="L116"> processConfiguration(MtasConfiguration.readConfiguration(reader));</span>
<span class="nc" id="L117"> }</span>
/**
* Instantiates a new mtas tokenizer.
*
* @param factory the factory
* @param config the config
* @throws IOException Signals that an I/O exception has occurred.
*/
public MtasTokenizer(final AttributeFactory factory,
final MtasConfiguration config) throws IOException {
<span class="fc" id="L128"> super(factory);</span>
<span class="fc" id="L129"> processConfiguration(config);</span>
<span class="fc" id="L130"> }</span>
/*
* (non-Javadoc)
*
* @see org.apache.lucene.analysis.TokenStream#incrementToken()
*/
@Override
public boolean incrementToken() throws IOException {
<span class="fc" id="L139"> clearAttributes();</span>
MtasToken token;
Integer positionIncrement;
MtasPayloadEncoder payloadEncoder;
<span class="pc bpc" id="L143" title="1 of 2 branches missed."> if (tokenCollectionIterator == null) {</span>
<span class="nc" id="L144"> return false;</span>
<span class="fc bfc" id="L145" title="All 2 branches covered."> } else if (tokenCollectionIterator.hasNext()) {</span>
<span class="fc" id="L146"> token = tokenCollectionIterator.next();</span>
// compute info
<span class="fc" id="L148"> positionIncrement = token.getPositionStart() - currentPosition;</span>
<span class="fc" id="L149"> currentPosition = token.getPositionStart();</span>
<span class="fc" id="L150"> payloadEncoder = new MtasPayloadEncoder(token, encodingFlags);</span>
// set info
<span class="fc" id="L152"> termAtt.append(token.getValue());</span>
<span class="fc" id="L153"> positionIncrementAtt.setPositionIncrement(positionIncrement);</span>
<span class="fc" id="L154"> offsetAtt.setOffset(token.getOffsetStart(), token.getOffsetEnd());</span>
<span class="fc" id="L155"> payloadAtt.setPayload(payloadEncoder.getPayload());</span>
<span class="fc" id="L156"> return true;</span>
}
<span class="fc" id="L158"> return false;</span>
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.analysis.Tokenizer#reset()
*/
@Override
public void reset() throws IOException {
<span class="fc" id="L168"> super.reset();</span>
<span class="fc" id="L169"> currentPosition = -1;</span>
try {
<span class="fc" id="L171"> constructTokenCollection(input);</span>
<span class="fc" id="L172"> tokenCollectionIterator = tokenCollection.iterator();</span>
<span class="nc" id="L173"> } catch (MtasConfigException | MtasParserException e) { </span>
<span class="nc" id="L174"> tokenCollectionIterator = null;</span>
<span class="nc" id="L175"> throw new IOException(e);</span>
<span class="fc" id="L176"> }</span>
<span class="fc" id="L177"> }</span>
/**
* Prints the.
*
* @param r the r
* @throws MtasParserException the mtas parser exception
*/
public void print(final Reader r) throws MtasParserException {
try {
<span class="nc" id="L187"> setReader(r);</span>
<span class="nc" id="L188"> reset();</span>
<span class="nc bnc" id="L189" title="All 2 branches missed."> if (tokenCollection != null) {</span>
<span class="nc" id="L190"> tokenCollection.print();</span>
}
<span class="nc" id="L192"> end();</span>
<span class="nc" id="L193"> close();</span>
<span class="nc" id="L194"> } catch (IOException e) {</span>
<span class="nc" id="L195"> log.error(e);</span>
<span class="nc" id="L196"> throw new MtasParserException(e.getClass() + " : " + e.getMessage());</span>
<span class="nc" id="L197"> }</span>
<span class="nc" id="L198"> }</span>
/**
* Gets the list.
*
* @param r the r
* @return the list
* @throws IOException Signals that an I/O exception has occurred.
*/
public String[][] getList(final Reader r) throws IOException {
try {
<span class="nc" id="L209"> setReader(r);</span>
<span class="nc" id="L210"> reset();</span>
<span class="nc" id="L211"> String[][] result = tokenCollection.getList();</span>
<span class="nc" id="L212"> end();</span>
<span class="nc" id="L213"> close();</span>
<span class="nc" id="L214"> return result;</span>
<span class="nc" id="L215"> } catch (MtasParserException e) {</span>
<span class="nc" id="L216"> log.info(e);</span>
<span class="nc" id="L217"> throw new IOException("can't produce list");</span>
}
}
/**
* Construct token collection.
*
* @param reader the reader
* @throws MtasConfigException the mtas config exception
* @throws MtasParserException the mtas parser exception
*/
private void constructTokenCollection(final Reader reader)
throws MtasConfigException, MtasParserException {
<span class="fc" id="L230"> tokenCollection = null;</span>
try {
<span class="fc" id="L232"> Constructor<?> c = Class.forName(parserName)</span>
<span class="fc" id="L233"> .getDeclaredConstructor(MtasConfiguration.class);</span>
// try {
<span class="fc" id="L235"> Object p = c.newInstance(parserConfiguration);</span>
<span class="pc bpc" id="L236" title="1 of 2 branches missed."> if (p instanceof MtasParser) {</span>
<span class="fc" id="L237"> MtasParser parser = (MtasParser) p;</span>
<span class="fc" id="L238"> tokenCollection = parser.createTokenCollection(reader);</span>
<span class="fc" id="L239"> return;</span>
} else {
<span class="nc" id="L241"> throw new MtasConfigException("no instance of MtasParser");</span>
}
<span class="nc" id="L243"> } catch (MtasParserException e) {</span>
<span class="nc" id="L244"> log.debug(e);</span>
<span class="nc" id="L245"> tokenCollection = new MtasTokenCollection();</span>
<span class="nc" id="L246"> throw new MtasParserException(e.getMessage());</span>
<span class="nc" id="L247"> } catch (NoSuchMethodException | InvocationTargetException</span>
| IllegalAccessException | ClassNotFoundException
| InstantiationException e) {
<span class="nc" id="L250"> log.debug(e);</span>
<span class="nc" id="L251"> throw new MtasConfigException(</span>
<span class="nc" id="L252"> e.getClass().getName() + " : '" + e.getMessage() + "'");</span>
}
}
/**
* Read configuration file.
*
* @param configFile the config file
*/
private void readConfigurationFile(final String configFile) {
InputStream is;
try {
<span class="nc" id="L265"> is = new FileInputStream(configFile);</span>
<span class="nc" id="L266"> processConfiguration(MtasConfiguration.readConfiguration(is));</span>
<span class="nc" id="L267"> is.close();</span>
<span class="nc" id="L268"> } catch (FileNotFoundException e) {</span>
<span class="nc" id="L269"> log.error("Couldn't find " + configFile, e);</span>
<span class="nc" id="L270"> } catch (IOException e) {</span>
<span class="nc" id="L271"> log.error("Couldn't read " + configFile, e);</span>
<span class="nc" id="L272"> }</span>
<span class="nc" id="L273"> }</span>
/**
* Process configuration.
*
* @param config the config
* @throws IOException Signals that an I/O exception has occurred.
*/
private void processConfiguration(final MtasConfiguration config)
throws IOException {
<span class="fc" id="L283"> HashMap<String, Integer> indexEncodingMapper = new HashMap<>();</span>
<span class="fc" id="L284"> indexEncodingMapper.put("payload", MtasPayloadEncoder.ENCODE_PAYLOAD);</span>
<span class="fc" id="L285"> indexEncodingMapper.put("offset", MtasPayloadEncoder.ENCODE_OFFSET);</span>
<span class="fc" id="L286"> indexEncodingMapper.put("realoffset", MtasPayloadEncoder.ENCODE_REALOFFSET);</span>
<span class="fc" id="L287"> indexEncodingMapper.put("parent", MtasPayloadEncoder.ENCODE_PARENT);</span>
// process
<span class="pc bpc" id="L289" title="1 of 2 branches missed."> if (config != null) {</span>
<span class="fc bfc" id="L290" title="All 2 branches covered."> for (int i = 0; i < config.children.size(); i++) {</span>
<span class="fc bfc" id="L291" title="All 2 branches covered."> if (config.children.get(i).name.equals(CONFIGURATION_MTAS_INDEX)) {</span>
<span class="fc" id="L292"> MtasConfiguration index = config.children.get(i);</span>
<span class="fc bfc" id="L293" title="All 2 branches covered."> for (int j = 0; j < index.children.size(); j++) {</span>
<span class="pc bpc" id="L294" title="1 of 2 branches missed."> if (indexEncodingMapper.containsKey(index.children.get(j).name)) {</span>
<span class="fc" id="L295"> String value = index.children.get(j).attributes.get(CONFIGURATION_MTAS_INDEX_ATTRIBUTE);</span>
<span class="pc bpc" id="L296" title="1 of 4 branches missed."> if ((value.equals(VALUE_TRUE)) || (value.equals(VALUE_1))) {</span>
<span class="fc" id="L297"> encodingFlags |= indexEncodingMapper</span>
<span class="fc" id="L298"> .get(index.children.get(j).name);</span>
<span class="pc bpc" id="L299" title="3 of 4 branches missed."> } else if ((value.equals(VALUE_FALSE)) || (value.equals(VALUE_0))) {</span>
<span class="fc" id="L300"> encodingFlags &= ~indexEncodingMapper</span>
<span class="fc" id="L301"> .get(index.children.get(j).name);</span>
}
}
}
<span class="pc bpc" id="L305" title="1 of 2 branches missed."> } else if (config.children.get(i).name.equals(CONFIGURATION_MTAS_PARSER)) {</span>
<span class="pc bpc" id="L306" title="1 of 2 branches missed."> if (config.children.get(i).attributes.containsKey(CONFIGURATION_MTAS_PARSER_ATTRIBUTE)) {</span>
<span class="fc" id="L307"> parserName = config.children.get(i).attributes.get(CONFIGURATION_MTAS_PARSER_ATTRIBUTE);</span>
<span class="fc" id="L308"> parserConfiguration = config.children.get(i);</span>
} else {
<span class="nc" id="L310"> throw new IOException("no parser configuration");</span>
}
}
}
} else {
<span class="nc" id="L315"> throw new IOException("no (valid) configuration");</span>
}
<span class="fc" id="L317"> }</span>
/*
* (non-Javadoc)
*
* @see org.apache.lucene.util.AttributeSource#equals(java.lang.Object)
*/
@Override
public boolean equals(Object obj) {
<span class="nc bnc" id="L326" title="All 2 branches missed."> if (this == obj)</span>
<span class="nc" id="L327"> return true;</span>
<span class="nc bnc" id="L328" title="All 2 branches missed."> if (obj == null)</span>
<span class="nc" id="L329"> return false;</span>
<span class="nc bnc" id="L330" title="All 2 branches missed."> if (getClass() != obj.getClass())</span>
<span class="nc" id="L331"> return false;</span>
<span class="nc" id="L332"> final MtasTokenizer that = (MtasTokenizer) obj;</span>
<span class="nc" id="L333"> return super.equals(that);</span>
}
/*
* (non-Javadoc)
*
* @see org.apache.lucene.util.AttributeSource#hashCode()
*/
@Override
public int hashCode() {
<span class="nc" id="L343"> return super.hashCode();</span>
}
}
</pre><div class="footer"><span class="right">Created with <a href="http://www.jacoco.org/jacoco">JaCoCo</a> 0.7.9.201702052155</span></div></body></html>