indexing_formats_tei.html 12.3 KB
<!DOCTYPE html>
<!--
 | Generated by Apache Maven Doxia Site Renderer 1.7.4 at 2017-05-16 
 | Rendered using Apache Maven Fluido Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <meta name="Date-Revision-yyyymmdd" content="20170516" />
    <meta http-equiv="Content-Language" content="en" />
    <title>Multi Tier Annotation Search &#x2013; TEI</title>
    <link rel="stylesheet" href="./css/apache-maven-fluido-1.5.min.css" />
    <link rel="stylesheet" href="./css/site.css" />
    <link rel="stylesheet" href="./css/print.css" media="print" />

      
    <script type="text/javascript" src="./js/apache-maven-fluido-1.5.min.js"></script>

                      </head>
        <body class="topBarDisabled">
          
        
    
        <div class="container-fluid">
          <div id="banner">
        <div class="pull-left">
                                <div id="bannerLeft">
                <h2>MTAS</h2>
                </div>
                      </div>
        <div class="pull-right">                  <a href="http://www.meertens.knaw.nl/" id="bannerRight">
                                                                                                <img src="images/meertens.png"  alt="Meertens Instituut" width="93" height="104"/>
                </a>
      </div>
        <div class="clear"><hr/></div>
      </div>

      <div id="breadcrumbs">
        <ul class="breadcrumb">
              
                  <li id="projectVersion">Version: 6.5.1
                          <span class="divider">|</span>
                    </li>
                              <li class="">
                    <a href="index.html" title="Mtas">
        Mtas</a>
                    <span class="divider">/</span>
      </li>
        <li class="active ">TEI</li>
                
              
                  <li id="publishDate" class="pull-right">Last Published: 2017-05-16</li>
            
                            </ul>
      </div>

            
      <div class="row-fluid">
        <div id="leftColumn" class="span2">
          <div class="well sidebar-nav">
              
                <ul class="nav nav-list">
                    <li class="nav-header">Mtas</li>
                              
      <li>
  
                          <a href="index.html" title="Introduction">
          <span class="none"></span>
        Introduction</a>
            </li>
                
      <li>
  
                          <a href="features.html" title="Features">
          <span class="none"></span>
        Features</a>
            </li>
                                                                                                      
      <li>
  
                          <a href="installation.html" title="Getting started">
          <span class="icon-chevron-right"></span>
        Getting started</a>
                  </li>
                                                                                                                                                                                              
      <li>
  
                          <a href="indexing.html" title="Indexing">
          <span class="icon-chevron-down"></span>
        Indexing</a>
                    <ul class="nav nav-list">
                    
      <li>
  
                          <a href="indexing_configuration.html" title="Configuration">
          <span class="none"></span>
        Configuration</a>
            </li>
                    
      <li>
  
                          <a href="indexing_mapping.html" title="Mapping">
          <span class="none"></span>
        Mapping</a>
            </li>
                                                                                                                                
      <li>
  
                          <a href="indexing_formats.html" title="Formats">
          <span class="icon-chevron-down"></span>
        Formats</a>
                    <ul class="nav nav-list">
                    
      <li>
  
                          <a href="indexing_formats_folia.html" title="FoLiA">
          <span class="none"></span>
        FoLiA</a>
            </li>
                    
      <li class="active">
  
            <a href="#"><span class="none"></span>TEI</a>
          </li>
                    
      <li>
  
                          <a href="indexing_formats_sketch.html" title="Sketch">
          <span class="none"></span>
        Sketch</a>
            </li>
                    
      <li>
  
                          <a href="indexing_formats_crm.html" title="CRM">
          <span class="none"></span>
        CRM</a>
            </li>
              </ul>
        </li>
              </ul>
        </li>
                                                                                                                                                                                                                                                                                                                                                                              
      <li>
  
                          <a href="search.html" title="Search">
          <span class="icon-chevron-right"></span>
        Search</a>
                  </li>
                
      <li>
  
                          <a href="download.html" title="Download">
          <span class="none"></span>
        Download</a>
            </li>
                              <li class="nav-header">Project Documentation</li>
                                                                                                                                                                                            
      <li>
  
                          <a href="project-info.html" title="Project Information">
          <span class="icon-chevron-right"></span>
        Project Information</a>
                  </li>
                                                                                                                        
      <li>
  
                          <a href="project-reports.html" title="Project Reports">
          <span class="icon-chevron-right"></span>
        Project Reports</a>
                  </li>
            </ul>
              
                
          <hr />

           <div id="poweredBy">
                            <div class="clear"></div>
                            <div class="clear"></div>
                            <div class="clear"></div>
                            <div class="clear"></div>
                             <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
        <img class="builtBy" alt="Built by Maven" src="./images/logos/maven-feather.png" />
      </a>
                  </div>
          </div>
        </div>
        
                
        <div id="bodyColumn"  class="span10" >
                                  
            <h1>TEI</h1>
<p>For indexing <a class="externalLink" href="http://www.tei-c.org/">ISO-TEI</a> resources, the <i>mtas.analysis.parser.MtasTEIParser</i> extending the abstract <i>MtasXMLParser</i> is available; full examples of configuration files are provided on <a class="externalLink" href="https://github.com/meertensinstituut/mtas/tree/master/conf/parser/mtas">GitHub</a>.</p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;!-- START CONFIGURATION MTAS PARSER --&gt;
&lt;parser name=&quot;mtas.analysis.parser.MtasTEIParser&quot;&gt;
...
  &lt;!-- START MAPPINGS --&gt;
  &lt;mappings&gt;
  ...
  &lt;/mapping&gt;
  &lt;!-- END MAPPINGS ---&gt;
  ...
&lt;/parser&gt;
&lt;!-- END CONFIGURATION MTAS PARSER --&gt;
</pre></div></div>
<p>The syntax of the parser part in the <a href="indexing_configuration.html#configuration">configuration file</a> is, besides from the <i>name</i> attribute, almost identical to the configuration of the <a href="indexing_formats_folia.html">FoLiA-parser</a>. An additional feature is the definition and use of <i>variables</i>, again illustrated and explained with examples.</p>
<p><b>Variables</b></p>
<p>From occurring elements, variable-mappings may be derived and defined. Just as <i>references</i>, these definitions are placed within a <i>variables</i>-tag outside the <i>mappings</i>-tag within the <i>parser</i> configuration section. In the example below the variable-mapping <i>interval</i> is defined from each occurring <i>when</i>-tag, defining a mapping from the <i>id</i> of the <i>when</i>-tag to value of the <i>interval</i> attribute.</p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;variables&gt;
  &lt;variable name=&quot;when&quot; value=&quot;interval&quot;&gt;
    &lt;value&gt;
      &lt;item type=&quot;attribute&quot; name=&quot;interval&quot; /&gt;
    &lt;/value&gt;
  &lt;/variable&gt;
&lt;/variables&gt;
</pre></div></div>
<p>This will define for a TEI resource containing</p>

<div class="source">
<div class="source"><pre class="prettyprint">...
&lt;timeline unit=&quot;s&quot;&gt;
  &lt;when xml:id=&quot;TLI_0&quot;/&gt;
  &lt;when xml:id=&quot;TLI_1&quot; interval=&quot;0.64&quot; since=&quot;#TLI_0&quot;/&gt;
  &lt;when xml:id=&quot;TLI_2&quot; interval=&quot;9.7&quot; since=&quot;#TLI_0&quot;/&gt;
  &lt;when xml:id=&quot;TLI_3&quot; interval=&quot;10.216&quot; since=&quot;#TLI_0&quot;/&gt;
  &lt;when xml:id=&quot;TLI_4&quot; interval=&quot;13.052&quot; since=&quot;#TLI_0&quot;/&gt;
  &lt;when xml:id=&quot;TLI_5&quot; interval=&quot;16.28&quot; since=&quot;#TLI_0&quot;/&gt;
...  
</pre></div></div>
<p>a mapping <i>interval</i> that will map for example &#x201c;TLI_3&#x201d; to &#x201c;10.216&#x201d;. Now, when defining other elements, for example a word, we can refer to this defined <i>variable</i>: </p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;mapping type=&quot;word&quot; name=&quot;anchor&quot;&gt;
  &lt;token type=&quot;string&quot; offset=&quot;false&quot; realoffset=&quot;false&quot; parent=&quot;false&quot;&gt;
    &lt;pre&gt;
      &lt;item type=&quot;name&quot; /&gt;
      &lt;item type=&quot;string&quot; value=&quot;.time&quot; /&gt;
    &lt;/pre&gt;
    &lt;post&gt;
      &lt;item type=&quot;variableFromAttribute&quot; name=&quot;interval&quot; value=&quot;synch&quot; /&gt;
    &lt;/post&gt;
  &lt;/token&gt;
&lt;/mapping&gt;
</pre></div></div>
<p>describing the mapping for resource elements like</p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;anchor synch=&quot;#TLI_3&quot;/&gt;
</pre></div></div>
<p>This will define the <i>postfix</i> value from the generated token as the value in the defined mapping <i>interval</i> for the value defined by the <i>sync</i> attribute of the matching <i>anchor</i> tag. In the example above, this will generate a token with <i>prefix</i> &#x201c;anchor.time&#x201d; and <i>postfix</i> &#x201c;10.216&#x201d;.</p>
<p>Furthermore, if for an element in the mapping a <i>start</i> and <i>end</i> is defined, for example</p>

<div class="source">
<div class="source"><pre class="prettyprint">&lt;mapping type=&quot;groupAnnotation&quot; name=&quot;span&quot; start=&quot;from&quot; end=&quot;to&quot;&gt;
...
&lt;/mapping&gt;
</pre></div></div>
<p>the start and end position of the elements referenced in the defined attributes is used for position and offset of the generated tokens. So, if the source contains</p>

<div class="source">
<div class="source"><pre class="prettyprint">...
&lt;w xml:id=&quot;w115&quot;&gt;hier&lt;/w&gt;
&lt;w xml:id=&quot;w116&quot;&gt;sehn&lt;/w&gt;
&lt;w xml:id=&quot;w117&quot;&gt;wir&lt;/w&gt;
...
</pre></div></div>
<p>and</p>

<div class="source">
<div class="source"><pre class="prettyprint">...
&lt;span from=&quot;#w116&quot; to=&quot;#w116&quot;&gt;sehen&lt;/span&gt;
...
</pre></div></div>
<p>the tokens generated from the groupAnnotation mapping on the <i>span</i>-tag will have the position and offset from the <i>word</i>-tag with <i>id</i> &#x201c;w116&#x201d;.</p>
                  </div>
            </div>
          </div>

    <hr/>

    <footer>
            <div class="container-fluid">
                      <div class="row-fluid">
                                      <p >Copyright &copy;                    2017
                        <a href="http://www.meertens.knaw.nl/">Meertens Institute</a>.
            All rights reserved.    
      </p>
                </div>

        
                </div>
    </footer>
        </body>
</html>