indexing_formats.html 9.59 KB
<!DOCTYPE html>
<!--
 | Generated by Apache Maven Doxia Site Renderer 1.7.4 at 2017-05-16 
 | Rendered using Apache Maven Fluido Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta name="Date-Revision-yyyymmdd" content="20170516" />
    <meta http-equiv="Content-Language" content="en" />
    <title>Multi Tier Annotation Search &#x2013; Formats</title>
    <link rel="stylesheet" href="./css/apache-maven-fluido-1.5.min.css" />
    <link rel="stylesheet" href="./css/site.css" />
    <link rel="stylesheet" href="./css/print.css" media="print" />

      
    <script type="text/javascript" src="./js/apache-maven-fluido-1.5.min.js"></script>

                      </head>
        <body class="topBarDisabled">
          
        
    
        <div class="container-fluid">
          <div id="banner">
        <div class="pull-left">
                                <div id="bannerLeft">
                <h2>MTAS</h2>
                </div>
                      </div>
        <div class="pull-right">                  <a href="http://www.meertens.knaw.nl/" id="bannerRight">
                                                                                                <img src="images/meertens.png"  alt="Meertens Instituut" width="93" height="104"/>
                </a>
      </div>
        <div class="clear"><hr/></div>
      </div>

      <div id="breadcrumbs">
        <ul class="breadcrumb">
              
                  <li id="projectVersion">Version: 6.5.1
                          <span class="divider">|</span>
                    </li>
                              <li class="">
                    <a href="index.html" title="Mtas">
        Mtas</a>
                    <span class="divider">/</span>
      </li>
        <li class="active ">Formats</li>
                
              
                  <li id="publishDate" class="pull-right">Last Published: 2017-05-16</li>
            
                            </ul>
      </div>

            
      <div class="row-fluid">
        <div id="leftColumn" class="span2">
          <div class="well sidebar-nav">
              
                <ul class="nav nav-list">
                    <li class="nav-header">Mtas</li>
                              
      <li>
  
                          <a href="index.html" title="Introduction">
          <span class="none"></span>
        Introduction</a>
            </li>
                
      <li>
  
                          <a href="features.html" title="Features">
          <span class="none"></span>
        Features</a>
            </li>
                                                                                                      
      <li>
  
                          <a href="installation.html" title="Getting started">
          <span class="icon-chevron-right"></span>
        Getting started</a>
                  </li>
                                                                                                                                                                                              
      <li>
  
                          <a href="indexing.html" title="Indexing">
          <span class="icon-chevron-down"></span>
        Indexing</a>
                    <ul class="nav nav-list">
                    
      <li>
  
                          <a href="indexing_configuration.html" title="Configuration">
          <span class="none"></span>
        Configuration</a>
            </li>
                    
      <li>
  
                          <a href="indexing_mapping.html" title="Mapping">
          <span class="none"></span>
        Mapping</a>
            </li>
                                                                                                                          
      <li class="active">
  
            <a href="#"><span class="icon-chevron-down"></span>Formats</a>
                  <ul class="nav nav-list">
                    
      <li>
  
                          <a href="indexing_formats_folia.html" title="FoLiA">
          <span class="none"></span>
        FoLiA</a>
            </li>
                    
      <li>
  
                          <a href="indexing_formats_tei.html" title="TEI">
          <span class="none"></span>
        TEI</a>
            </li>
                    
      <li>
  
                          <a href="indexing_formats_sketch.html" title="Sketch">
          <span class="none"></span>
        Sketch</a>
            </li>
                    
      <li>
  
                          <a href="indexing_formats_crm.html" title="CRM">
          <span class="none"></span>
        CRM</a>
            </li>
              </ul>
        </li>
              </ul>
        </li>
                                                                                                                                                                                                                                                                                                                                                                              
      <li>
  
                          <a href="search.html" title="Search">
          <span class="icon-chevron-right"></span>
        Search</a>
                  </li>
                
      <li>
  
                          <a href="download.html" title="Download">
          <span class="none"></span>
        Download</a>
            </li>
                              <li class="nav-header">Project Documentation</li>
                                                                                                                                                                                            
      <li>
  
                          <a href="project-info.html" title="Project Information">
          <span class="icon-chevron-right"></span>
        Project Information</a>
                  </li>
                                                                                                                        
      <li>
  
                          <a href="project-reports.html" title="Project Reports">
          <span class="icon-chevron-right"></span>
        Project Reports</a>
                  </li>
            </ul>
              
                
          <hr />

           <div id="poweredBy">
                            <div class="clear"></div>
                            <div class="clear"></div>
                            <div class="clear"></div>
                            <div class="clear"></div>
                             <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
        <img class="builtBy" alt="Built by Maven" src="./images/logos/maven-feather.png" />
      </a>
                  </div>
          </div>
        </div>
        
                
        <div id="bodyColumn"  class="span10" >
                                  
            <h1>Formats</h1>
<p>To configure the mapping from resources to the index structure, several parsers are available for different formats:</p>

<ul>
  
<li><a href="indexing_formats_folia.html">MtasFoliaParser</a> : mapping <a class="externalLink" href="https://proycon.github.io/folia/">FoLiA</a> resources</li>
  
<li><a href="indexing_formats_tei.html">MtasTEIParser</a>: mapping <a class="externalLink" href="http://www.tei-c.org/">ISO-TEI</a> resources</li>
  
<li><a href="indexing_formats_sketch.html">MtasSketchParser</a>: mapping <a class="externalLink" href="https://www.sketchengine.co.uk/word-sketch-index-format/">Sketch Engine</a> resources</li>
  
<li><a href="indexing_formats_crm.html">MtasCRMParser</a>: mapping resources with format Corpus Van Reenen-Mulder/Adelheid</li>
</ul>
<p>For XML-based formats, these parsers often just slightly extend the abstract MtasXMLParser by defining the correct namespaces and root tags. </p>
<p>The <a href="indexing_configuration.html#configuration">configuration file</a> defining the <a href="indexing_mapping.html">mapping</a> contains general settings and more specific settings defining and configuring the parser. </p>
<p>The index part may contain general default settings to be applied in the mapping, the content of the parser part is more specific for the defined Mtas parser.</p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; ?&gt;
&lt;mtas&gt;

  &lt;!-- START MTAS INDEX CONFIGURATION --&gt;
  &lt;index&gt;
    &lt;!-- START GENERAL SETTINGS MTAS INDEX PROCESS --&gt;
    &lt;payload index=&quot;false&quot; /&gt;
    &lt;offset index=&quot;false&quot; /&gt;
    &lt;realoffset index=&quot;false&quot; /&gt;
    &lt;parent index=&quot;true&quot; /&gt;
    &lt;!-- END GENERAL SETTINGS MTAS INDEX PROCESS --&gt;
  &lt;/index&gt;
  &lt;!-- END MTAS INDEX CONFIGURATION --&gt;
  
  &lt;!-- START CONFIGURATION MTAS PARSER --&gt;
  &lt;parser name=&quot;...&quot;&gt;
  ...
    &lt;!-- START MAPPINGS --&gt;
    &lt;mappings&gt;
    ...
    &lt;/mapping&gt;
    &lt;!-- END MAPPINGS ---&gt;
    ...
  &lt;/parser&gt;
  &lt;!-- END CONFIGURATION MTAS PARSER --&gt;
 
&lt;/mtas&gt;  
</pre></div></div>
                  </div>
            </div>
          </div>

    <hr/>

    <footer>
            <div class="container-fluid">
                      <div class="row-fluid">
                                      <p >Copyright &copy;                    2017
                        <a href="http://www.meertens.knaw.nl/">Meertens Institute</a>.
            All rights reserved.    
      </p>
                </div>

        
                </div>
    </footer>
        </body>
</html>