<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" lang="en"><head><meta http-equiv="Content-Type" content="text/html;charset=UTF-8"/><link rel="stylesheet" href="../jacoco-resources/report.css" type="text/css"/><link rel="shortcut icon" href="../jacoco-resources/report.gif" type="image/gif"/><title>MtasTokenizer.java</title><link rel="stylesheet" href="../jacoco-resources/prettify.css" type="text/css"/><script type="text/javascript" src="../jacoco-resources/prettify.js"></script></head><body onload="window['PR_TAB_WIDTH']=4;prettyPrint()"><div class="breadcrumb" id="breadcrumb"><span class="info"><a href="../jacoco-sessions.html" class="el_session">Sessions</a></span><a href="../index.html" class="el_report">MTAS</a> > <a href="index.source.html" class="el_package">mtas.analysis</a> > <span class="el_source">MtasTokenizer.java</span></div><h1>MtasTokenizer.java</h1><pre class="source lang-java linenums">package mtas.analysis; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.Reader; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.util.HashMap; import java.util.Iterator; import mtas.analysis.parser.MtasBasicParser; import mtas.analysis.token.MtasToken; import mtas.analysis.token.MtasTokenCollection; import mtas.analysis.util.MtasConfigException; import mtas.analysis.util.MtasConfiguration; import mtas.analysis.util.MtasParserException; import mtas.codec.payload.MtasPayloadEncoder; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.util.AttributeFactory; /** * The Class MtasTokenizer. */ public final class MtasTokenizer extends Tokenizer { /** The Constant log. */ <span class="fc" id="L35"> private static final Log log = LogFactory.getLog(MtasTokenizer.class);</span> /** The Constant CONFIGURATION_MTAS. */ public static final String CONFIGURATION_MTAS = "mtas"; /** The current position. */ <span class="pc" id="L41"> private int currentPosition = 0;</span> /** The encoding flags. */ <span class="pc" id="L44"> private int encodingFlags = MtasPayloadEncoder.ENCODE_DEFAULT;</span> /** The parser name. */ <span class="pc" id="L47"> private String parserName = null;</span> /** The parser configuration. */ <span class="pc" id="L50"> private MtasConfiguration parserConfiguration = null;</span> /** The token collection. */ private MtasTokenCollection tokenCollection; /** The term att. */ <span class="pc" id="L56"> private final CharTermAttribute termAtt = addAttribute(</span> CharTermAttribute.class); /** The offset att. */ <span class="pc" id="L60"> private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);</span> /** The payload att. */ <span class="pc" id="L63"> private final PayloadAttribute payloadAtt = addAttribute(</span> PayloadAttribute.class); /** The position increment att. */ <span class="pc" id="L67"> private final PositionIncrementAttribute positionIncrementAtt = addAttribute(</span> PositionIncrementAttribute.class); /** The token collection iterator. */ private Iterator<MtasToken> tokenCollectionIterator; /** * Instantiates a new mtas tokenizer. */ <span class="nc" id="L76"> public MtasTokenizer() {</span> <span class="nc" id="L77"> }</span> /** * Instantiates a new mtas tokenizer. * * @param configFileName the config file name */ <span class="nc" id="L84"> public MtasTokenizer(final String configFileName) {</span> <span class="nc" id="L85"> readConfigurationFile(configFileName);</span> <span class="nc" id="L86"> }</span> /** * Instantiates a new mtas tokenizer. * * @param config the config * @throws IOException Signals that an I/O exception has occurred. */ <span class="nc" id="L94"> public MtasTokenizer(final MtasConfiguration config) throws IOException {</span> <span class="nc" id="L95"> processConfiguration(config);</span> <span class="nc" id="L96"> }</span> /** * Instantiates a new mtas tokenizer. * * @param reader the reader * @throws IOException Signals that an I/O exception has occurred. */ <span class="nc" id="L104"> public MtasTokenizer(final InputStream reader) throws IOException {</span> <span class="nc" id="L105"> processConfiguration(MtasConfiguration.readConfiguration(reader));</span> <span class="nc" id="L106"> }</span> /** * Instantiates a new mtas tokenizer. * * @param factory the factory * @param config the config * @throws IOException Signals that an I/O exception has occurred. */ public MtasTokenizer(final AttributeFactory factory, final MtasConfiguration config) throws IOException { <span class="fc" id="L117"> super(factory);</span> <span class="fc" id="L118"> processConfiguration(config);</span> <span class="fc" id="L119"> }</span> /* * (non-Javadoc) * * @see org.apache.lucene.analysis.TokenStream#incrementToken() */ @Override public boolean incrementToken() throws IOException { <span class="fc" id="L128"> clearAttributes();</span> MtasToken token; Integer positionIncrement; MtasPayloadEncoder payloadEncoder; <span class="pc bpc" id="L132" title="1 of 2 branches missed."> if (tokenCollectionIterator == null) {</span> <span class="nc" id="L133"> return false;</span> <span class="fc bfc" id="L134" title="All 2 branches covered."> } else if (tokenCollectionIterator.hasNext()) {</span> <span class="fc" id="L135"> token = tokenCollectionIterator.next();</span> // compute info <span class="fc" id="L137"> positionIncrement = token.getPositionStart() - currentPosition;</span> <span class="fc" id="L138"> currentPosition = token.getPositionStart();</span> <span class="fc" id="L139"> payloadEncoder = new MtasPayloadEncoder(token, encodingFlags);</span> // set info <span class="fc" id="L141"> termAtt.append(token.getValue());</span> <span class="fc" id="L142"> positionIncrementAtt.setPositionIncrement(positionIncrement);</span> <span class="fc" id="L143"> offsetAtt.setOffset(token.getOffsetStart(), token.getOffsetEnd());</span> <span class="fc" id="L144"> payloadAtt.setPayload(payloadEncoder.getPayload());</span> <span class="fc" id="L145"> return true;</span> } <span class="fc" id="L147"> return false;</span> } /* * (non-Javadoc) * * @see org.apache.lucene.analysis.Tokenizer#reset() */ @Override public void reset() throws IOException { <span class="fc" id="L157"> super.reset();</span> <span class="fc" id="L158"> currentPosition = -1;</span> try { <span class="fc" id="L160"> constructTokenCollection(input);</span> <span class="fc" id="L161"> tokenCollectionIterator = tokenCollection.iterator();</span> <span class="nc" id="L162"> } catch (MtasConfigException | MtasParserException e) {</span> <span class="nc" id="L163"> tokenCollectionIterator = null;</span> <span class="nc" id="L164"> throw new IOException(e);</span> <span class="fc" id="L165"> }</span> <span class="fc" id="L166"> }</span> /** * Prints the. * * @param r the r * @throws MtasParserException the mtas parser exception */ public void print(final Reader r) throws MtasParserException { try { <span class="nc" id="L176"> setReader(r);</span> <span class="nc" id="L177"> reset();</span> <span class="nc bnc" id="L178" title="All 2 branches missed."> if (tokenCollection != null) {</span> <span class="nc" id="L179"> tokenCollection.print();</span> } <span class="nc" id="L181"> end();</span> <span class="nc" id="L182"> close();</span> <span class="nc" id="L183"> } catch (IOException e) {</span> <span class="nc" id="L184"> log.error(e);</span> <span class="nc" id="L185"> throw new MtasParserException(e.getClass() + " : " + e.getMessage());</span> <span class="nc" id="L186"> }</span> <span class="nc" id="L187"> }</span> /** * Gets the list. * * @param r the r * @return the list * @throws IOException Signals that an I/O exception has occurred. */ public String[][] getList(final Reader r) throws IOException { try { <span class="nc" id="L198"> setReader(r);</span> <span class="nc" id="L199"> reset();</span> <span class="nc" id="L200"> String[][] result = tokenCollection.getList();</span> <span class="nc" id="L201"> end();</span> <span class="nc" id="L202"> close();</span> <span class="nc" id="L203"> return result;</span> <span class="nc" id="L204"> } catch (MtasParserException e) {</span> <span class="nc" id="L205"> log.info(e);</span> <span class="nc" id="L206"> throw new IOException("can't produce list");</span> } } /** * Construct token collection. * * @param reader the reader * @throws MtasConfigException the mtas config exception * @throws MtasParserException the mtas parser exception */ private void constructTokenCollection(final Reader reader) throws MtasConfigException, MtasParserException { <span class="fc" id="L219"> tokenCollection = null;</span> try { <span class="fc" id="L221"> Constructor<?> c = Class.forName(parserName)</span> <span class="fc" id="L222"> .getDeclaredConstructor(MtasConfiguration.class);</span> // try { <span class="fc" id="L224"> Object p = c.newInstance(parserConfiguration);</span> <span class="pc bpc" id="L225" title="1 of 2 branches missed."> if (p instanceof MtasBasicParser) {</span> <span class="fc" id="L226"> MtasBasicParser parser = (MtasBasicParser) p;</span> <span class="fc" id="L227"> tokenCollection = parser.createTokenCollection(reader);</span> <span class="fc" id="L228"> return;</span> } else { <span class="nc" id="L230"> throw new MtasConfigException("no instance of MtasParser");</span> } <span class="nc" id="L232"> } catch (MtasParserException e) {</span> <span class="nc" id="L233"> log.debug(e);</span> <span class="nc" id="L234"> tokenCollection = new MtasTokenCollection();</span> <span class="nc" id="L235"> throw new MtasParserException(e.getMessage());</span> <span class="nc" id="L236"> } catch (NoSuchMethodException | InvocationTargetException</span> | IllegalAccessException | ClassNotFoundException | InstantiationException e) { <span class="nc" id="L239"> log.debug(e);</span> <span class="nc" id="L240"> throw new MtasConfigException(</span> <span class="nc" id="L241"> e.getClass().getName() + " : '" + e.getMessage() + "'");</span> } } /** * Read configuration file. * * @param configFile the config file */ private void readConfigurationFile(final String configFile) { InputStream is; try { <span class="nc" id="L254"> is = new FileInputStream(configFile);</span> <span class="nc" id="L255"> processConfiguration(MtasConfiguration.readConfiguration(is));</span> <span class="nc" id="L256"> is.close();</span> <span class="nc" id="L257"> } catch (FileNotFoundException e) {</span> <span class="nc" id="L258"> log.error("Couldn't find " + configFile, e);</span> <span class="nc" id="L259"> } catch (IOException e) {</span> <span class="nc" id="L260"> log.error("Couldn't read " + configFile, e);</span> <span class="nc" id="L261"> }</span> <span class="nc" id="L262"> }</span> /** * Process configuration. * * @param config the config * @throws IOException Signals that an I/O exception has occurred. */ private void processConfiguration(final MtasConfiguration config) throws IOException { <span class="fc" id="L272"> final String nameIndex = "index";</span> <span class="fc" id="L273"> final String nameParser = "parser";</span> <span class="fc" id="L274"> final String nameName = "name";</span> <span class="fc" id="L275"> final String valueTrue = "true";</span> <span class="fc" id="L276"> final String valueFalse = "false";</span> <span class="fc" id="L277"> final String value0 = "0";</span> <span class="fc" id="L278"> final String value1 = "1";</span> <span class="fc" id="L279"> HashMap<String, Integer> indexEncodingMapper = new HashMap<>();</span> <span class="fc" id="L280"> indexEncodingMapper.put("payload", MtasPayloadEncoder.ENCODE_PAYLOAD);</span> <span class="fc" id="L281"> indexEncodingMapper.put("offset", MtasPayloadEncoder.ENCODE_OFFSET);</span> <span class="fc" id="L282"> indexEncodingMapper.put("realoffset", MtasPayloadEncoder.ENCODE_REALOFFSET);</span> <span class="fc" id="L283"> indexEncodingMapper.put("parent", MtasPayloadEncoder.ENCODE_PARENT);</span> // process <span class="pc bpc" id="L285" title="1 of 2 branches missed."> if (config != null) {</span> <span class="fc bfc" id="L286" title="All 2 branches covered."> for (int i = 0; i < config.children.size(); i++) {</span> <span class="fc bfc" id="L287" title="All 2 branches covered."> if (config.children.get(i).name.equals(nameIndex)) {</span> <span class="fc" id="L288"> MtasConfiguration index = config.children.get(i);</span> <span class="fc bfc" id="L289" title="All 2 branches covered."> for (int j = 0; j < index.children.size(); j++) {</span> <span class="pc bpc" id="L290" title="1 of 2 branches missed."> if (indexEncodingMapper.containsKey(index.children.get(j).name)) {</span> <span class="fc" id="L291"> String value = index.children.get(j).attributes.get(nameIndex);</span> <span class="pc bpc" id="L292" title="1 of 4 branches missed."> if ((value.equals(valueTrue)) || (value.equals(value1))) {</span> <span class="fc" id="L293"> encodingFlags |= indexEncodingMapper</span> <span class="fc" id="L294"> .get(index.children.get(j).name);</span> <span class="pc bpc" id="L295" title="3 of 4 branches missed."> } else if ((value.equals(valueFalse)) || (value.equals(value0))) {</span> <span class="fc" id="L296"> encodingFlags &= ~indexEncodingMapper</span> <span class="fc" id="L297"> .get(index.children.get(j).name);</span> } } } <span class="pc bpc" id="L301" title="1 of 2 branches missed."> } else if (config.children.get(i).name.equals(nameParser)) {</span> <span class="pc bpc" id="L302" title="1 of 2 branches missed."> if (config.children.get(i).attributes.containsKey(nameName)) {</span> <span class="fc" id="L303"> parserName = config.children.get(i).attributes.get(nameName);</span> <span class="fc" id="L304"> parserConfiguration = config.children.get(i);</span> } else { <span class="nc" id="L306"> throw new IOException("no parser configuration");</span> } } } } else { <span class="nc" id="L311"> throw new IOException("no (valid) configuration");</span> } <span class="fc" id="L313"> }</span> /* * (non-Javadoc) * * @see org.apache.lucene.util.AttributeSource#equals(java.lang.Object) */ @Override public boolean equals(Object obj) { <span class="nc bnc" id="L322" title="All 2 branches missed."> if (this == obj)</span> <span class="nc" id="L323"> return true;</span> <span class="nc bnc" id="L324" title="All 2 branches missed."> if (obj == null)</span> <span class="nc" id="L325"> return false;</span> <span class="nc bnc" id="L326" title="All 2 branches missed."> if (getClass() != obj.getClass())</span> <span class="nc" id="L327"> return false;</span> <span class="nc" id="L328"> final MtasTokenizer that = (MtasTokenizer) obj;</span> <span class="nc" id="L329"> return super.equals(that);</span> } /* * (non-Javadoc) * * @see org.apache.lucene.util.AttributeSource#hashCode() */ @Override public int hashCode() { <span class="nc" id="L339"> return super.hashCode();</span> } } </pre><div class="footer"><span class="right">Created with <a href="http://www.jacoco.org/jacoco">JaCoCo</a> 0.7.9.201702052155</span></div></body></html>