installation_solr.html 15.2 KB
<!DOCTYPE html>
<!--
 | Generated by Apache Maven Doxia Site Renderer 1.7.4 at 2017-05-18 
 | Rendered using Apache Maven Fluido Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta name="Date-Revision-yyyymmdd" content="20170518" />
    <meta http-equiv="Content-Language" content="en" />
    <title>Multi Tier Annotation Search &#x2013; Apache Solr</title>
    <link rel="stylesheet" href="./css/apache-maven-fluido-1.5.min.css" />
    <link rel="stylesheet" href="./css/site.css" />
    <link rel="stylesheet" href="./css/print.css" media="print" />

      
    <script type="text/javascript" src="./js/apache-maven-fluido-1.5.min.js"></script>

                      </head>
        <body class="topBarDisabled">
          
        
    
        <div class="container-fluid">
          <div id="banner">
        <div class="pull-left">
                                <div id="bannerLeft">
                <h2>MTAS</h2>
                </div>
                      </div>
        <div class="pull-right">                  <a href="http://www.meertens.knaw.nl/" id="bannerRight">
                                                                                                <img src="images/meertens.png"  alt="Meertens Instituut" width="93" height="104"/>
                </a>
      </div>
        <div class="clear"><hr/></div>
      </div>

      <div id="breadcrumbs">
        <ul class="breadcrumb">
              
                  <li id="projectVersion">Version: 6.5.1
                          <span class="divider">|</span>
                    </li>
                              <li class="">
                    <a href="index.html" title="Mtas">
        Mtas</a>
                    <span class="divider">/</span>
      </li>
        <li class="active ">Apache Solr</li>
                
              
                  <li id="publishDate" class="pull-right">Last Published: 2017-05-18</li>
            
                            </ul>
      </div>

            
      <div class="row-fluid">
        <div id="leftColumn" class="span2">
          <div class="well sidebar-nav">
              
                <ul class="nav nav-list">
                    <li class="nav-header">Mtas</li>
                              
      <li>
  
                          <a href="index.html" title="Introduction">
          <span class="none"></span>
        Introduction</a>
            </li>
                
      <li>
  
                          <a href="features.html" title="Features">
          <span class="none"></span>
        Features</a>
            </li>
                                                                                                                
      <li>
  
                          <a href="installation.html" title="Getting started">
          <span class="icon-chevron-down"></span>
        Getting started</a>
                    <ul class="nav nav-list">
                    
      <li>
  
                          <a href="installation_lucene.html" title="Lucene">
          <span class="none"></span>
        Lucene</a>
            </li>
                    
      <li class="active">
  
            <a href="#"><span class="none"></span>Solr</a>
          </li>
                    
      <li>
  
                          <a href="installation_docker.html" title="Docker">
          <span class="none"></span>
        Docker</a>
            </li>
              </ul>
        </li>
                                                                                                                                                                                    
      <li>
  
                          <a href="indexing.html" title="Indexing">
          <span class="icon-chevron-right"></span>
        Indexing</a>
                  </li>
                                                                                                                                                                                                                                                                                                                                                                              
      <li>
  
                          <a href="search.html" title="Search">
          <span class="icon-chevron-right"></span>
        Search</a>
                  </li>
                
      <li>
  
                          <a href="download.html" title="Download">
          <span class="none"></span>
        Download</a>
            </li>
                              <li class="nav-header">Project Documentation</li>
                                                                                                                                                                                            
      <li>
  
                          <a href="project-info.html" title="Project Information">
          <span class="icon-chevron-right"></span>
        Project Information</a>
                  </li>
                                                                                                                        
      <li>
  
                          <a href="project-reports.html" title="Project Reports">
          <span class="icon-chevron-right"></span>
        Project Reports</a>
                  </li>
            </ul>
              
                
          <hr />

           <div id="poweredBy">
                            <div class="clear"></div>
                            <div class="clear"></div>
                            <div class="clear"></div>
                            <div class="clear"></div>
                             <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
        <img class="builtBy" alt="Built by Maven" src="./images/logos/maven-feather.png" />
      </a>
                  </div>
          </div>
        </div>
        
                
        <div id="bodyColumn"  class="span10" >
                                  
            <h1>Apache Solr</h1>
<p>Mtas can be used as plugin for Apache Solr</p>
<p><b>Prerequisites</b></p>

<ul>
  
<li>Installed <a class="externalLink" href="https://lucene.apache.org/solr/">Apache Solr</a></li>
  
<li>Currently supported and advised version is 6.5.1</li>
</ul>
<p>Start with a new Solr core.</p>
<p><b>Libraries</b></p>
<p>Add the <tt>mtas-6.5.1.jar</tt> to the <tt>lib</tt> directory of the new Solr core. A prebuilt <tt>mtas-6.5.1.jar</tt> is available from the <a href="download.html">download</a> page.</p>
<p>Furthermore, add the <a class="externalLink" href="http://commons.apache.org/proper/commons-math/">Apache Commons Mathematics Library</a> to the <tt>lib</tt> directory of the new Solr core.</p>
<p><b>Solrconfig.xml</b></p>
<p>Some changes have to be made within the <tt>solrconfig.xml</tt> file, elements have to be added to the <tt>&lt;config/&gt;</tt> or existing elements have te be adjusted:</p>
<p>Define a new <b>mtas searchComponent</b>: </p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;searchComponent name=&quot;mtas&quot; class=&quot;mtas.solr.handler.component.MtasSolrSearchComponent&quot;/&gt;
</pre></div></div>
<p>Add this component to the select requestHandler by inserting the following within the <tt>&lt;requestHandler/&gt;</tt> with name <tt>&quot;/select&quot;</tt>:</p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;arr name=&quot;last-components&quot;&gt;
  &lt;str&gt;mtas&lt;/str&gt;
&lt;/arr&gt;
</pre></div></div>
<p>Define a new <b>mtas_cql queryParser</b> and <b>mtas_join queryParser</b>:</p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;queryParser name=&quot;mtas_cql&quot; class=&quot;mtas.solr.search.MtasSolrCQLQParserPlugin&quot;/&gt;
&lt;queryParser name=&quot;mtas_join&quot; class=&quot;mtas.solr.search.MtasSolrJoinQParserPlugin&quot;/&gt;
</pre></div></div>
<p>Define a new <b>mtas requestHandler</b>:</p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;requestHandler name=&quot;/mtas&quot; class=&quot;mtas.solr.handler.MtasRequestHandler&quot; /&gt;
</pre></div></div>
<p>Define a new updateRequestProcessorChain:</p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;updateRequestProcessorChain name=&quot;mtasUpdateProcessor&quot;&gt;
  &lt;processor class=&quot;mtas.solr.update.processor.MtasUpdateRequestProcessorFactory&quot; /&gt;
  &lt;processor class=&quot;solr.LogUpdateProcessorFactory&quot; /&gt;
  &lt;processor class=&quot;solr.RunUpdateProcessorFactory&quot; /&gt;
&lt;/updateRequestProcessorChain&gt;
</pre></div></div>
<p>Define or adjust the update requestHandler with this updateRequestProcessorChain:</p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;requestHandler name=&quot;/update&quot; class=&quot;solr.UpdateRequestHandler&quot;&gt;
  &lt;lst name=&quot;defaults&quot;&gt;
    &lt;str name=&quot;update.chain&quot;&gt;mtasUpdateProcessor&lt;/str&gt;
  &lt;/lst&gt;    
&lt;/requestHandler&gt;
</pre></div></div>
<p>Finally, in this instruction we will use a classic schema instead of the managed-schema. So the configuration must contain:</p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;schemaFactory class=&quot;ClassicIndexSchemaFactory&quot;/&gt;
</pre></div></div>
<p><b>Schema.xml</b></p>
<p>We extend a (classic) schema with one (or multiple) fields that may contain annotated text, e.g.</p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;field name=&quot;text&quot; type=&quot;mtas&quot; required=&quot;false&quot; multiValued=&quot;false&quot; indexed=&quot;true&quot; stored=&quot;true&quot; /&gt;
</pre></div></div>
<p>We define the referred Mtas fieldType by</p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;fieldType name=&quot;mtas&quot; class=&quot;solr.TextField&quot; postingsFormat=&quot;MtasCodec&quot;&gt;
  &lt;analyzer type=&quot;index&quot;&gt;
    &lt;charFilter class=&quot;mtas.analysis.util.MtasCharFilterFactory&quot; type=&quot;url&quot; prefix=&quot;http://localhost/demo/&quot; postfix=&quot;&quot; /&gt;
    &lt;tokenizer class=&quot;mtas.analysis.util.MtasTokenizerFactory&quot; configFile=&quot;folia.xml&quot; /&gt;
  &lt;/analyzer&gt;
  &lt;analyzer type=&quot;query&quot;&gt;
    &lt;tokenizer class=&quot;solr.WhitespaceTokenizerFactory&quot; /&gt;
    &lt;filter class=&quot;mtas.analysis.util.MtasPrefixTokenFilterFactory&quot; prefix=&quot;t&quot; /&gt;
  &lt;/analyzer&gt;
&lt;/fieldType&gt;
</pre></div></div>
<p>The charFilter with class <i>mtas.analysis.util.MtasCharFilterFactory</i> in the index analyzer contains an obligatory attribute <tt>type</tt> and two optional attributes <tt>prefix</tt> and <tt>postfix</tt>. The <i>type</i> can be <i>url</i> or <i>file</i>, referring to either an external url or a file on the filesystem. On indexing, the optional <i>prefix</i> and <i>postfix</i> attributes will be added to the provided value, resulting in a full url or location of a file. </p>
<p>The tokenizer with class <i>mtas.analysis.util.MtasTokenizerFactory</i> in the index analyzer has an attribute <tt>configFile</tt> containing the name of the required tokenizer configuration.</p>
<p>The filter in the query analyzer contains an obligatory attribute <tt>prefix</tt> defining the assumed prefix when this field will be queried directly within Solr.</p>
<p>See <a href="indexing_configuration.html">configuration</a> for more information about the definition of a tokenizer configuration.</p>
<p><b>Multiple tokenize configurations</b></p>
<p>If multiple tokenizer configurations are required, the Mtas fieldType has to be defined slightly different: </p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;fieldType name=&quot;mtas_config&quot; class=&quot;solr.TextField&quot; postingsFormat=&quot;MtasCodec&quot;&gt;
  &lt;analyzer type=&quot;index&quot;&gt;
    &lt;charFilter class=&quot;mtas.analysis.util.MtasCharFilterFactory&quot; config=&quot;mtas.xml&quot; /&gt;
    &lt;tokenizer class=&quot;mtas.analysis.util.MtasTokenizerFactory&quot; config=&quot;mtas.xml&quot; /&gt;
  &lt;/analyzer&gt;
&lt;/fieldType&gt;
&lt;fieldType name=&quot;mtas&quot; class=&quot;mtas.solr.schema.MtasPreAnalyzedField&quot;
  followIndexAnalyzer=&quot;mtas_config&quot; defaultConfiguration=&quot;default&quot;
  configurationFromField=&quot;type&quot; setNumberOfTokens=&quot;numberOfTokens&quot;
  setNumberOfPositions=&quot;numberOfPositions&quot; setSize=&quot;size&quot;
  setError=&quot;error&quot; postingsFormat=&quot;MtasCodec&quot;&gt;
  &lt;analyzer type=&quot;query&quot;&gt;
    &lt;tokenizer class=&quot;solr.WhitespaceTokenizerFactory&quot; /&gt;
    &lt;filter class=&quot;mtas.analysis.util.MtasPrefixTokenFilterFactory&quot; prefix=&quot;t&quot; /&gt;
  &lt;/analyzer&gt;
&lt;/fieldType&gt;
</pre></div></div>
<p>An additional fieldType (here named mtas_config) is defined, containing only an index analyzer. Both the charFilter and tokenizer within this analyzer have an attribute <tt>config</tt> referring to a Mtas configuration file. Depending on the required tokenizer configuration, for the charFilter this file will define <i>type</i>, <i>prefix</i> and <i>postfix</i> and for the tokenizer this file will define the <i>configFile</i>. An example of a Mtas configuration file is added below.</p>
<p>The Mtas fieldType is defined with class <i>mtas.solr.schema.MtasPreAnalyzedField</i>, an obligatory attribute <tt>followIndexAnalyzer</tt> referring to the additional fieldType we defined before. The optional attribute <tt>defaultConfiguration</tt> contains the name of the default configuration to be used, and the obligatory attribute <tt>configurationFromField</tt> contains the name of the field defining the required configuration. The optional attributes <tt>setNumberOfTokens</tt>, <tt>setNumberOfPositions</tt>, <tt>setSize</tt> and <tt>setError</tt> define fields that may be filled with respectively number of tokens, number of positions, filesize and possible errors.</p>
<p><b>Example of a Mtas configuration file</b></p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; ?&gt;
&lt;mtas&gt;
  &lt;configurations type=&quot;mtas.analysis.util.MtasTokenizerFactory&quot;&gt;
    &lt;configuration name=&quot;folia&quot; file=&quot;folia.xml&quot; /&gt;
    &lt;configuration name=&quot;tei&quot; file=&quot;tei.xml&quot; /&gt;
  &lt;/configurations&gt;
  &lt;configurations type=&quot;mtas.analysis.util.MtasCharFilterFactory&quot;&gt;
    &lt;configuration name=&quot;folia&quot; type=&quot;url&quot; prefix=&quot;http://www.mycompany.com/archive/&quot; postfix=&quot;.xml&quot; /&gt;
    &lt;configuration name=&quot;tei&quot; type=&quot;file&quot; prefix=&quot;/storage/tei/&quot; postfix=&quot;&quot; /&gt;
  &lt;/configurations&gt;
&lt;/mtas&gt;
</pre></div></div>
                  </div>
            </div>
          </div>

    <hr/>

    <footer>
            <div class="container-fluid">
                      <div class="row-fluid">
                                      <p >Copyright &copy;                    2017
                        <a href="http://www.meertens.knaw.nl/">Meertens Institute</a>.
            All rights reserved.    
      </p>
                </div>

        
                </div>
    </footer>
        </body>
</html>