<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" lang="en"><head><meta http-equiv="Content-Type" content="text/html;charset=UTF-8"/><link rel="stylesheet" href="../jacoco-resources/report.css" type="text/css"/><link rel="shortcut icon" href="../jacoco-resources/report.gif" type="image/gif"/><title>MtasTokenizer.java</title><link rel="stylesheet" href="../jacoco-resources/prettify.css" type="text/css"/><script type="text/javascript" src="../jacoco-resources/prettify.js"></script></head><body onload="window['PR_TAB_WIDTH']=4;prettyPrint()"><div class="breadcrumb" id="breadcrumb"><span class="info"><a href="../jacoco-sessions.html" class="el_session">Sessions</a></span><a href="../index.html" class="el_report">MTAS</a> > <a href="index.source.html" class="el_package">mtas.analysis</a> > <span class="el_source">MtasTokenizer.java</span></div><h1>MtasTokenizer.java</h1><pre class="source lang-java linenums">package mtas.analysis; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.Reader; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.util.HashMap; import java.util.Iterator; import mtas.analysis.parser.MtasParser; import mtas.analysis.token.MtasToken; import mtas.analysis.token.MtasTokenCollection; import mtas.analysis.util.MtasConfigException; import mtas.analysis.util.MtasConfiguration; import mtas.analysis.util.MtasParserException; import mtas.codec.payload.MtasPayloadEncoder; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.util.AttributeFactory; /** * The Class MtasTokenizer. */ public final class MtasTokenizer extends Tokenizer { /** The Constant log. */ <span class="fc" id="L35"> private static final Log log = LogFactory.getLog(MtasTokenizer.class);</span> /** The Constant CONFIGURATION_MTAS. */ public static final String CONFIGURATION_MTAS = "mtas"; public static final String CONFIGURATION_MTAS_INDEX = "index"; public static final String CONFIGURATION_MTAS_INDEX_ATTRIBUTE = "index"; public static final String CONFIGURATION_MTAS_PARSER = "parser"; public static final String CONFIGURATION_MTAS_PARSER_ATTRIBUTE = "name"; private static final String VALUE_TRUE = "true"; private static final String VALUE_FALSE = "false"; private static final String VALUE_0 = "0"; private static final String VALUE_1 = "1"; /** The current position. */ <span class="pc" id="L52"> private int currentPosition = 0;</span> /** The encoding flags. */ <span class="pc" id="L55"> private int encodingFlags = MtasPayloadEncoder.ENCODE_DEFAULT;</span> /** The parser name. */ <span class="pc" id="L58"> private String parserName = null;</span> /** The parser configuration. */ <span class="pc" id="L61"> private MtasConfiguration parserConfiguration = null;</span> /** The token collection. */ private MtasTokenCollection tokenCollection; /** The term att. */ <span class="pc" id="L67"> private final CharTermAttribute termAtt = addAttribute(</span> CharTermAttribute.class); /** The offset att. */ <span class="pc" id="L71"> private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);</span> /** The payload att. */ <span class="pc" id="L74"> private final PayloadAttribute payloadAtt = addAttribute(</span> PayloadAttribute.class); /** The position increment att. */ <span class="pc" id="L78"> private final PositionIncrementAttribute positionIncrementAtt = addAttribute(</span> PositionIncrementAttribute.class); /** The token collection iterator. */ private Iterator<MtasToken> tokenCollectionIterator; /** * Instantiates a new mtas tokenizer. */ <span class="nc" id="L87"> public MtasTokenizer() {</span> <span class="nc" id="L88"> }</span> /** * Instantiates a new mtas tokenizer. * * @param configFileName the config file name */ <span class="nc" id="L95"> public MtasTokenizer(final String configFileName) {</span> <span class="nc" id="L96"> readConfigurationFile(configFileName);</span> <span class="nc" id="L97"> }</span> /** * Instantiates a new mtas tokenizer. * * @param config the config * @throws IOException Signals that an I/O exception has occurred. */ <span class="nc" id="L105"> public MtasTokenizer(final MtasConfiguration config) throws IOException {</span> <span class="nc" id="L106"> processConfiguration(config);</span> <span class="nc" id="L107"> }</span> /** * Instantiates a new mtas tokenizer. * * @param reader the reader * @throws IOException Signals that an I/O exception has occurred. */ <span class="nc" id="L115"> public MtasTokenizer(final InputStream reader) throws IOException {</span> <span class="nc" id="L116"> processConfiguration(MtasConfiguration.readConfiguration(reader));</span> <span class="nc" id="L117"> }</span> /** * Instantiates a new mtas tokenizer. * * @param factory the factory * @param config the config * @throws IOException Signals that an I/O exception has occurred. */ public MtasTokenizer(final AttributeFactory factory, final MtasConfiguration config) throws IOException { <span class="fc" id="L128"> super(factory);</span> <span class="fc" id="L129"> processConfiguration(config);</span> <span class="fc" id="L130"> }</span> /* * (non-Javadoc) * * @see org.apache.lucene.analysis.TokenStream#incrementToken() */ @Override public boolean incrementToken() throws IOException { <span class="fc" id="L139"> clearAttributes();</span> MtasToken token; Integer positionIncrement; MtasPayloadEncoder payloadEncoder; <span class="pc bpc" id="L143" title="1 of 2 branches missed."> if (tokenCollectionIterator == null) {</span> <span class="nc" id="L144"> return false;</span> <span class="fc bfc" id="L145" title="All 2 branches covered."> } else if (tokenCollectionIterator.hasNext()) {</span> <span class="fc" id="L146"> token = tokenCollectionIterator.next();</span> // compute info <span class="fc" id="L148"> positionIncrement = token.getPositionStart() - currentPosition;</span> <span class="fc" id="L149"> currentPosition = token.getPositionStart();</span> <span class="fc" id="L150"> payloadEncoder = new MtasPayloadEncoder(token, encodingFlags);</span> // set info <span class="fc" id="L152"> termAtt.append(token.getValue());</span> <span class="fc" id="L153"> positionIncrementAtt.setPositionIncrement(positionIncrement);</span> <span class="fc" id="L154"> offsetAtt.setOffset(token.getOffsetStart(), token.getOffsetEnd());</span> <span class="fc" id="L155"> payloadAtt.setPayload(payloadEncoder.getPayload());</span> <span class="fc" id="L156"> return true;</span> } <span class="fc" id="L158"> return false;</span> } /* * (non-Javadoc) * * @see org.apache.lucene.analysis.Tokenizer#reset() */ @Override public void reset() throws IOException { <span class="fc" id="L168"> super.reset();</span> <span class="fc" id="L169"> currentPosition = -1;</span> try { <span class="fc" id="L171"> constructTokenCollection(input);</span> <span class="fc" id="L172"> tokenCollectionIterator = tokenCollection.iterator();</span> <span class="nc" id="L173"> } catch (MtasConfigException | MtasParserException e) { </span> <span class="nc" id="L174"> tokenCollectionIterator = null;</span> <span class="nc" id="L175"> throw new IOException(e);</span> <span class="fc" id="L176"> }</span> <span class="fc" id="L177"> }</span> /** * Prints the. * * @param r the r * @throws MtasParserException the mtas parser exception */ public void print(final Reader r) throws MtasParserException { try { <span class="nc" id="L187"> setReader(r);</span> <span class="nc" id="L188"> reset();</span> <span class="nc bnc" id="L189" title="All 2 branches missed."> if (tokenCollection != null) {</span> <span class="nc" id="L190"> tokenCollection.print();</span> } <span class="nc" id="L192"> end();</span> <span class="nc" id="L193"> close();</span> <span class="nc" id="L194"> } catch (IOException e) {</span> <span class="nc" id="L195"> log.error(e);</span> <span class="nc" id="L196"> throw new MtasParserException(e.getClass() + " : " + e.getMessage());</span> <span class="nc" id="L197"> }</span> <span class="nc" id="L198"> }</span> /** * Gets the list. * * @param r the r * @return the list * @throws IOException Signals that an I/O exception has occurred. */ public String[][] getList(final Reader r) throws IOException { try { <span class="nc" id="L209"> setReader(r);</span> <span class="nc" id="L210"> reset();</span> <span class="nc" id="L211"> String[][] result = tokenCollection.getList();</span> <span class="nc" id="L212"> end();</span> <span class="nc" id="L213"> close();</span> <span class="nc" id="L214"> return result;</span> <span class="nc" id="L215"> } catch (MtasParserException e) {</span> <span class="nc" id="L216"> log.info(e);</span> <span class="nc" id="L217"> throw new IOException("can't produce list");</span> } } /** * Construct token collection. * * @param reader the reader * @throws MtasConfigException the mtas config exception * @throws MtasParserException the mtas parser exception */ private void constructTokenCollection(final Reader reader) throws MtasConfigException, MtasParserException { <span class="fc" id="L230"> tokenCollection = null;</span> try { <span class="fc" id="L232"> Constructor<?> c = Class.forName(parserName)</span> <span class="fc" id="L233"> .getDeclaredConstructor(MtasConfiguration.class);</span> // try { <span class="fc" id="L235"> Object p = c.newInstance(parserConfiguration);</span> <span class="pc bpc" id="L236" title="1 of 2 branches missed."> if (p instanceof MtasParser) {</span> <span class="fc" id="L237"> MtasParser parser = (MtasParser) p;</span> <span class="fc" id="L238"> tokenCollection = parser.createTokenCollection(reader);</span> <span class="fc" id="L239"> return;</span> } else { <span class="nc" id="L241"> throw new MtasConfigException("no instance of MtasParser");</span> } <span class="nc" id="L243"> } catch (MtasParserException e) {</span> <span class="nc" id="L244"> log.debug(e);</span> <span class="nc" id="L245"> tokenCollection = new MtasTokenCollection();</span> <span class="nc" id="L246"> throw new MtasParserException(e.getMessage());</span> <span class="nc" id="L247"> } catch (NoSuchMethodException | InvocationTargetException</span> | IllegalAccessException | ClassNotFoundException | InstantiationException e) { <span class="nc" id="L250"> log.debug(e);</span> <span class="nc" id="L251"> throw new MtasConfigException(</span> <span class="nc" id="L252"> e.getClass().getName() + " : '" + e.getMessage() + "'");</span> } } /** * Read configuration file. * * @param configFile the config file */ private void readConfigurationFile(final String configFile) { InputStream is; try { <span class="nc" id="L265"> is = new FileInputStream(configFile);</span> <span class="nc" id="L266"> processConfiguration(MtasConfiguration.readConfiguration(is));</span> <span class="nc" id="L267"> is.close();</span> <span class="nc" id="L268"> } catch (FileNotFoundException e) {</span> <span class="nc" id="L269"> log.error("Couldn't find " + configFile, e);</span> <span class="nc" id="L270"> } catch (IOException e) {</span> <span class="nc" id="L271"> log.error("Couldn't read " + configFile, e);</span> <span class="nc" id="L272"> }</span> <span class="nc" id="L273"> }</span> /** * Process configuration. * * @param config the config * @throws IOException Signals that an I/O exception has occurred. */ private void processConfiguration(final MtasConfiguration config) throws IOException { <span class="fc" id="L283"> HashMap<String, Integer> indexEncodingMapper = new HashMap<>();</span> <span class="fc" id="L284"> indexEncodingMapper.put("payload", MtasPayloadEncoder.ENCODE_PAYLOAD);</span> <span class="fc" id="L285"> indexEncodingMapper.put("offset", MtasPayloadEncoder.ENCODE_OFFSET);</span> <span class="fc" id="L286"> indexEncodingMapper.put("realoffset", MtasPayloadEncoder.ENCODE_REALOFFSET);</span> <span class="fc" id="L287"> indexEncodingMapper.put("parent", MtasPayloadEncoder.ENCODE_PARENT);</span> // process <span class="pc bpc" id="L289" title="1 of 2 branches missed."> if (config != null) {</span> <span class="fc bfc" id="L290" title="All 2 branches covered."> for (int i = 0; i < config.children.size(); i++) {</span> <span class="fc bfc" id="L291" title="All 2 branches covered."> if (config.children.get(i).name.equals(CONFIGURATION_MTAS_INDEX)) {</span> <span class="fc" id="L292"> MtasConfiguration index = config.children.get(i);</span> <span class="fc bfc" id="L293" title="All 2 branches covered."> for (int j = 0; j < index.children.size(); j++) {</span> <span class="pc bpc" id="L294" title="1 of 2 branches missed."> if (indexEncodingMapper.containsKey(index.children.get(j).name)) {</span> <span class="fc" id="L295"> String value = index.children.get(j).attributes.get(CONFIGURATION_MTAS_INDEX_ATTRIBUTE);</span> <span class="pc bpc" id="L296" title="1 of 4 branches missed."> if ((value.equals(VALUE_TRUE)) || (value.equals(VALUE_1))) {</span> <span class="fc" id="L297"> encodingFlags |= indexEncodingMapper</span> <span class="fc" id="L298"> .get(index.children.get(j).name);</span> <span class="pc bpc" id="L299" title="3 of 4 branches missed."> } else if ((value.equals(VALUE_FALSE)) || (value.equals(VALUE_0))) {</span> <span class="fc" id="L300"> encodingFlags &= ~indexEncodingMapper</span> <span class="fc" id="L301"> .get(index.children.get(j).name);</span> } } } <span class="pc bpc" id="L305" title="1 of 2 branches missed."> } else if (config.children.get(i).name.equals(CONFIGURATION_MTAS_PARSER)) {</span> <span class="pc bpc" id="L306" title="1 of 2 branches missed."> if (config.children.get(i).attributes.containsKey(CONFIGURATION_MTAS_PARSER_ATTRIBUTE)) {</span> <span class="fc" id="L307"> parserName = config.children.get(i).attributes.get(CONFIGURATION_MTAS_PARSER_ATTRIBUTE);</span> <span class="fc" id="L308"> parserConfiguration = config.children.get(i);</span> } else { <span class="nc" id="L310"> throw new IOException("no parser configuration");</span> } } } } else { <span class="nc" id="L315"> throw new IOException("no (valid) configuration");</span> } <span class="fc" id="L317"> }</span> /* * (non-Javadoc) * * @see org.apache.lucene.util.AttributeSource#equals(java.lang.Object) */ @Override public boolean equals(Object obj) { <span class="nc bnc" id="L326" title="All 2 branches missed."> if (this == obj)</span> <span class="nc" id="L327"> return true;</span> <span class="nc bnc" id="L328" title="All 2 branches missed."> if (obj == null)</span> <span class="nc" id="L329"> return false;</span> <span class="nc bnc" id="L330" title="All 2 branches missed."> if (getClass() != obj.getClass())</span> <span class="nc" id="L331"> return false;</span> <span class="nc" id="L332"> final MtasTokenizer that = (MtasTokenizer) obj;</span> <span class="nc" id="L333"> return super.equals(that);</span> } /* * (non-Javadoc) * * @see org.apache.lucene.util.AttributeSource#hashCode() */ @Override public int hashCode() { <span class="nc" id="L343"> return super.hashCode();</span> } } </pre><div class="footer"><span class="right">Created with <a href="http://www.jacoco.org/jacoco">JaCoCo</a> 0.7.9.201702052155</span></div></body></html>