1.3 release

Mateusz Kopeć
1 parent 62ccdfdc
Showing 19 changed files with 1441 additions and 1465 deletions
doc/compile.sh
doc/manual.pdf
doc/manual.tex
pom.xml
src/main/java/pl/waw/ipipan/zil/core/md/Main.java
src/main/java/pl/waw/ipipan/zil/core/md/detection/Cleaner.java
src/main/java/pl/waw/ipipan/zil/core/md/detection/Constants.java
src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
src/main/java/pl/waw/ipipan/zil/core/md/detection/zero/FeatureGeneration.java
src/main/java/pl/waw/ipipan/zil/core/md/detection/zero/InstanceCreator.java
src/main/java/pl/waw/ipipan/zil/core/md/detection/zero/Model.java
src/main/java/pl/waw/ipipan/zil/core/md/detection/zero/Serializer.java
src/main/java/pl/waw/ipipan/zil/core/md/detection/zero/Trainer.java
src/main/java/pl/waw/ipipan/zil/core/md/detection/zero/ZeroSubjectDetector.java
src/main/java/pl/waw/ipipan/zil/core/md/entities/Token.java
src/main/java/pl/waw/ipipan/zil/core/md/io/tei/TeiLoader.java
src/main/java/pl/waw/ipipan/zil/core/md/io/tei/TeiSaver.java
src/main/java/pl/waw/ipipan/zil/core/md/io/thrift/ThriftLoader.java
src/main/java/pl/waw/ipipan/zil/core/md/io/thrift/ThriftSaver.java
+#!/bin/bash
+
+pdflatex manual.tex
+bibtex manual.aux
+pdflatex manual.tex
+pdflatex manual.tex
+
+rm manual.aux
+rm manual.bbl
+rm manual.blg
+rm manual.log
 \ No newline at end of file
@@ -38,10 +38,10 @@ The current version of the program facilitates the automatic mention detection, 
 MentionDetector uses information provided in it's input to produce mentions for coreference resolution. It merges entities provided by named entity recognition tools, shallow parsers and taggers.
  
 It also finds zero subjects in clauses and marks the verbs using zero subjects as mentions, using the algorithm presented in \cite{kop:14:eacl:short}, for which a model was trained using the full Polish Coreference Corpus, version 0.92 (corpus description in \cite{ogro:etal:13:ltc}). Training data had 15875 positive and 37798 negative examples; 10-fold cross validation yielded an accuracy of 86.14\% for the task of finding zero subjects. Precision of 79.8\% and recall of 71.2\% for the zero subject class of verbs was obtained.
-
+
 \textbf{Homepage:} \url{http://zil.ipipan.waw.pl/MentionDetector} \\
 \textbf{Contact person:} Mateusz Kopeć [mateusz.kopec@ipipan.waw.pl] \\
-\textbf{Author:} Mateusz Kopeć \\
+\textbf{Author:} Mateusz Kopeć \\
 \textbf{License:} CC BY v.3
  
  
@@ -49,7 +49,7 @@ It also finds zero subjects in clauses and marks the verbs using zero subjects a
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  
 \section{Requirements}
-Java Runtime Environment (JRE) 1.7 or newer.
+Java Runtime Environment (JRE) 1.8 or newer.
  
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -143,7 +143,7 @@ Zero subjects are distinguished from other mentions by having an additional feat
  
 Standalone jar doesn't need any installation. To run it, simply execute:\\
  
-\texttt{java -jar md-1.0-SNAPSHOT.one-jar.jar <dir with input texts> <dir for output texts>}\\
+\texttt{java -jar md-1.3-jar-with-dependencies.jar <dir with input texts> <dir for output texts>}\\
  
 All texts recursively found in \texttt{<dir with input texts>} are going to be annotated with mentions layer and saved in \texttt{<dir for output texts>}.\\
  
@@ -153,7 +153,7 @@ All texts recursively found in \texttt{&lt;dir with input texts&gt;} are going to be a
 \section{Custom zero subject detection model}
 If you want to use custom zero subject detection model, you may try:\\
  
-\texttt{java -jar md-1.0-SNAPSHOT.one-jar.jar <dir with input texts> <dir for output texts> <model\_path>}
+\texttt{java -jar md-1.3-jar-with-dependencies.jar <dir with input texts> <dir for output texts> <model\_path>}
  
 To create such model, use the \texttt{pl.waw.ipipan.zil.core.md.detection.zero.Trainer} class.
  
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-	<modelVersion>4.0.0</modelVersion>
-	<groupId>pl.waw.ipipan.zil.core</groupId>
-	<artifactId>md</artifactId>
-	<version>1.2-SNAPSHOT</version>
-	<properties>
-		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-	</properties>
-	<build>
-		<plugins>
-			<plugin>
-				<artifactId>maven-compiler-plugin</artifactId>
-				<version>2.3.2</version>
-				<configuration>
-					<source>1.7</source>
-					<target>1.7</target>
-				</configuration>
-			</plugin>
-			<plugin>
-				<artifactId>maven-source-plugin</artifactId>
-				<version>2.4</version>
-				<executions>
-					<execution>
-						<id>attach-sources</id>
-						<phase>deploy</phase>
-						<goals>
-							<goal>jar-no-fork</goal>
-						</goals>
-					</execution>
-				</executions>
-			</plugin>
-			<plugin>
-				<artifactId>maven-javadoc-plugin</artifactId>
-				<version>2.10.3</version>
-				<executions>
-					<execution>
-						<id>attach-javadocs</id>
-						<phase>deploy</phase>
-						<goals>
-							<goal>jar</goal>
-						</goals>
-					</execution>
-				</executions>
-			</plugin>
-			<plugin>
-				<!-- explicitly define maven-deploy-plugin after other to force exec 
-					order -->
-				<artifactId>maven-deploy-plugin</artifactId>
-				<version>2.7</version>
-				<executions>
-					<execution>
-						<id>deploy</id>
-						<phase>deploy</phase>
-						<goals>
-							<goal>deploy</goal>
-						</goals>
-					</execution>
-				</executions>
-			</plugin>
-			<plugin>
-				<groupId>org.dstovall</groupId>
-				<artifactId>onejar-maven-plugin</artifactId>
-				<version>1.4.4</version>
-				<executions>
-					<execution>
-						<configuration>
-							<mainClass>pl.waw.ipipan.zil.core.md.Main</mainClass>
-						</configuration>
-						<goals>
-							<goal>one-jar</goal>
-						</goals>
-					</execution>
-				</executions>
-			</plugin>
-		</plugins>
-	</build>
-	<dependencies>
-		<dependency>
-			<groupId>log4j</groupId>
-			<artifactId>log4j</artifactId>
-			<version>1.2.17</version>
-		</dependency>
-		<dependency>
-			<groupId>pl.waw.ipipan.zil.multiservice</groupId>
-			<artifactId>utils</artifactId>
-			<version>1.0-SNAPSHOT</version>
-		</dependency>
-		<dependency>
-			<groupId>pl.waw.ipipan.zil.nkjp</groupId>
-			<artifactId>teiapi</artifactId>
-			<version>1.0-SNAPSHOT</version>
-		</dependency>
-		<dependency>
-			<groupId>junit</groupId>
-			<artifactId>junit</artifactId>
-			<version>4.11</version>
-		</dependency>
-		<dependency>
-			<groupId>nz.ac.waikato.cms.weka</groupId>
-			<artifactId>weka-stable</artifactId>
-			<version>3.6.10</version>
-		</dependency>
-	</dependencies>
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
  
-	<repositories>
-		<repository>
-			<id>zil-maven-repo</id>
-			<name>ZIL maven repository</name>
-			<url>http://maven.nlp.ipipan.waw.pl/content/repositories/snapshots</url>
-		</repository>
-	</repositories>
+    <groupId>pl.waw.ipipan.zil.core</groupId>
+    <artifactId>md</artifactId>
+    <version>1.3</version>
  
-	<pluginRepositories>
-		<pluginRepository>
-			<id>onejar-maven-plugin.googlecode.com</id>
-			<url>http://onejar-maven-plugin.googlecode.com/svn/mavenrepo</url>
-		</pluginRepository>
-	</pluginRepositories>
+    <developers>
+        <developer>
+            <name>Mateusz Kopeć</name>
+            <organization>ICS PAS</organization>
+            <email>m.kopec@ipipan.waw.pl</email>
+        </developer>
+    </developers>
  
-	<distributionManagement>
-		<repository>
-			<id>deployment</id>
-			<url>http://maven.nlp.ipipan.waw.pl/content/repositories/releases/</url>
-		</repository>
-		<snapshotRepository>
-			<id>deployment</id>
-			<url>http://maven.nlp.ipipan.waw.pl/content/repositories/snapshots/</url>
-		</snapshotRepository>
-	</distributionManagement>
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <java.version>1.8</java.version>
+
+        <junit.version>4.12</junit.version>
+        <slf4j.version>1.7.21</slf4j.version>
+    </properties>
+
+    <prerequisites>
+        <maven>3.0.5</maven>
+    </prerequisites>
+
+    <build>
+        <pluginManagement>
+            <plugins>
+                <plugin>
+                    <artifactId>maven-compiler-plugin</artifactId>
+                    <version>3.5.1</version>
+                    <configuration>
+                        <source>${java.version}</source>
+                        <target>${java.version}</target>
+                    </configuration>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-clean-plugin</artifactId>
+                    <version>3.0.0</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-install-plugin</artifactId>
+                    <version>2.5.2</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-jar-plugin</artifactId>
+                    <version>3.0.2</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-resources-plugin</artifactId>
+                    <version>3.0.1</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-site-plugin</artifactId>
+                    <version>3.5.1</version>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-surefire-plugin</artifactId>
+                    <version>2.19.1</version>
+                </plugin>
+
+                <plugin>
+                    <artifactId>maven-source-plugin</artifactId>
+                    <version>3.0.1</version>
+                    <executions>
+                        <execution>
+                            <id>attach-sources</id>
+                            <phase>deploy</phase>
+                            <goals>
+                                <goal>jar-no-fork</goal>
+                            </goals>
+                        </execution>
+                    </executions>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-javadoc-plugin</artifactId>
+                    <version>2.10.4</version>
+                    <executions>
+                        <execution>
+                            <id>attach-javadocs</id>
+                            <phase>deploy</phase>
+                            <goals>
+                                <goal>jar</goal>
+                            </goals>
+                        </execution>
+                    </executions>
+                </plugin>
+                <plugin>
+                    <!-- explicitly define maven-deploy-plugin after other to force exec order -->
+                    <artifactId>maven-deploy-plugin</artifactId>
+                    <version>2.8.2</version>
+                    <executions>
+                        <execution>
+                            <id>deploy</id>
+                            <phase>deploy</phase>
+                            <goals>
+                                <goal>deploy</goal>
+                            </goals>
+                        </execution>
+                    </executions>
+                </plugin>
+                <plugin>
+                    <artifactId>maven-assembly-plugin</artifactId>
+                    <version>2.6</version>
+                </plugin>
+            </plugins>
+        </pluginManagement>
+
+        <plugins>
+            <plugin>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <configuration>
+                    <descriptorRefs>
+                        <descriptorRef>jar-with-dependencies</descriptorRef>
+                    </descriptorRefs>
+                    <archive>
+                        <manifest>
+                            <mainClass>pl.waw.ipipan.zil.core.md.Main</mainClass>
+                        </manifest>
+                    </archive>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>make-assembly</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>single</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+    <dependencies>
+        <!-- internal -->
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.multiservice</groupId>
+            <artifactId>utils</artifactId>
+            <version>1.0</version>
+        </dependency>
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.nkjp</groupId>
+            <artifactId>teiapi</artifactId>
+            <version>1.0</version>
+        </dependency>
+
+        <!-- third party -->
+        <dependency>
+            <groupId>nz.ac.waikato.cms.weka</groupId>
+            <artifactId>weka-stable</artifactId>
+            <version>3.6.10</version>
+        </dependency>
+
+        <!-- logging -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+            <version>1.7.21</version>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-simple</artifactId>
+            <version>1.7.21</version>
+            <scope>runtime</scope>
+        </dependency>
+
+        <!-- test -->
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.12</version>
+            <scope>test</scope>
+        </dependency>
+
+    </dependencies>
+
+    <repositories>
+        <repository>
+            <id>zil-maven-snapshot-repo</id>
+            <name>ZIL maven snapshot repository</name>
+            <url>http://maven.nlp.ipipan.waw.pl/content/repositories/snapshots/</url>
+        </repository>
+        <repository>
+            <id>zil-maven-release-repo</id>
+            <name>ZIL maven release repository</name>
+            <url>http://maven.nlp.ipipan.waw.pl/content/repositories/releases/</url>
+        </repository>
+        <repository>
+            <id>zil-maven-repo-3rdparty</id>
+            <name>ZIL maven repository 3rdparty</name>
+            <url>http://maven.nlp.ipipan.waw.pl/content/repositories/thirdparty/</url>
+        </repository>
+    </repositories>
+
+    <distributionManagement>
+        <repository>
+            <id>deployment</id>
+            <url>http://maven.nlp.ipipan.waw.pl/content/repositories/releases/</url>
+        </repository>
+        <snapshotRepository>
+            <id>deployment</id>
+            <url>http://maven.nlp.ipipan.waw.pl/content/repositories/snapshots/</url>
+        </snapshotRepository>
+    </distributionManagement>
 </project>
 package pl.waw.ipipan.zil.core.md;
  
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.log4j.Logger;
-
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import pl.waw.ipipan.zil.core.md.detection.Detector;
 import pl.waw.ipipan.zil.core.md.detection.zero.ZeroSubjectDetector;
 import pl.waw.ipipan.zil.core.md.entities.Text;
@@ -20,134 +15,128 @@ import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEICorpusText;
 import pl.waw.ipipan.zil.nkjp.teiapi.api.exceptions.TEIException;
 import pl.waw.ipipan.zil.nkjp.teiapi.api.io.IOUtils;
  
-/**
- * @author Mateusz Kopeć
- * 
- */
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
 public class Main {
  
-	private final static Logger logger = Logger.getLogger(Main.class);
-	private final static boolean GZIP_OUTPUT = true;
-
-	private final static String DEFAULT_ZERO_SUBJECT_MODEL = "/zero_subject_model.bin";
-
-	private static ZeroSubjectDetector zeroSubjectModel;
-	static {
-		InputStream zeroSubjectDetectionModelStream = Main.class.getResourceAsStream(DEFAULT_ZERO_SUBJECT_MODEL);
-		zeroSubjectModel = new ZeroSubjectDetector(zeroSubjectDetectionModelStream);
-	}
-
-	/**
-	 * Main method for detecting mentions in corpus encoded in Tei format.
-	 * 
-	 * @param args
-	 *            arguments
-	 */
-	public static void main(String[] args) {
-
-		if (args.length != 2 && args.length != 3) {
-			logger.error("Wrong usage! should be: " + Main.class.getSimpleName()
-					+ " input_dir result_dir [zero_subject_model]");
-			return;
-		}
-
-		File inputDir = new File(args[0]);
-		File outputDir = new File(args[1]);
-
-		if (!inputDir.isDirectory()) {
-			logger.error(inputDir + " is not a directory!");
-			return;
-		}
-		if (!outputDir.isDirectory()) {
-			logger.error(outputDir + " is not a directory!");
-			return;
-		}
-		if (args.length == 3) {
-			try {
-				InputStream zeroSubjectDetectionModelStream;
-				zeroSubjectDetectionModelStream = new FileInputStream(new File(args[2]));
-				zeroSubjectModel = new ZeroSubjectDetector(zeroSubjectDetectionModelStream);
-				if (zeroSubjectModel == null)
-					throw new IOException();
-			} catch (IOException e) {
-				logger.error("Unable to load model from file: " + args[2] + ": " + e);
-				return;
-			}
-		}
-
-		int all = 0;
-		int errors = 0;
-		for (File teiDir : IOUtils.getNKJPDirs(inputDir)) {
-			all++;
-			try {
-				File targetDir = createTargetTextDir(inputDir, outputDir, teiDir);
-				TEICorpusText teiText = TeiLoader.readTeiText(teiDir);
-				annotateTeiText(teiText);
-				TeiSaver.saveTeiText(teiText, targetDir, GZIP_OUTPUT);
-			} catch (IOException e) {
-				logger.error("Error processing text in dir:" + teiDir + " Error details: " + e.getLocalizedMessage());
-				errors++;
-			}
-		}
-
-		logger.info(all + " texts processed succesfully.");
-		if (errors > 0)
-			logger.info(errors + " texts not processed.");
-		logger.info(ZeroSubjectDetector.verbsWithoutSubject + " verbs with zero subject detected.");
-		logger.info(ZeroSubjectDetector.verbsWithSubject + " verbs with explicit subject detected.");
-	}
-
-	/**
-	 * Find relative path of text directory in the corpus directory and create
-	 * similar directory structure in the output corpus directory.
-	 * 
-	 * @param inputCorpusDir
-	 *            input corpus directory
-	 * @param outputCorpusDir
-	 *            output corpus directory
-	 * @param textDir
-	 *            input text dir
-	 * @return target text dir
-	 * @throws IOException
-	 *             when an error occurs
-	 */
-	private static File createTargetTextDir(File inputCorpusDir, File outputCorpusDir, File textDir) throws IOException {
-		String relativeDirPath = textDir.toString().substring(inputCorpusDir.toString().length());
-		File targetDir = new File(outputCorpusDir, relativeDirPath);
-		targetDir.mkdirs();
-		if (!targetDir.exists() || !targetDir.isDirectory())
-			throw new IOException("Failed to create output directory at: " + targetDir);
-		return targetDir;
-	}
-
-	/**
-	 * Find mentions in Thrift text and update this Thrift text with mention
-	 * annotation.
-	 * 
-	 * @param thriftText
-	 *            text to annotate with mentions
-	 * @throws MultiserviceException
-	 *             when an error occures
-	 */
-	public static void annotateThriftText(TText thriftText) throws MultiserviceException {
-		Text responseText = ThriftLoader.loadTextFromThrift(thriftText);
-		Detector.findMentionsInText(responseText, zeroSubjectModel);
-		ThriftSaver.updateThriftText(responseText, thriftText);
-	}
-
-	/**
-	 * Find mentions in Tei text and update this Tei text with mention
-	 * annotation. This method does not save this Tei text on disk.
-	 * 
-	 * @param teiText
-	 *            text to annotate with mentions
-	 * @throws TEIException
-	 *             when an error occurs
-	 */
-	public static void annotateTeiText(TEICorpusText teiText) throws TEIException {
-		Text responseText = TeiLoader.loadTextFromTei(teiText);
-		Detector.findMentionsInText(responseText, zeroSubjectModel);
-		TeiSaver.updateTeiText(responseText, teiText);
-	}
+    private static final Logger logger = LoggerFactory.getLogger(Main.class);
+
+    private static final boolean GZIP_OUTPUT = true;
+    private static final String DEFAULT_ZERO_SUBJECT_MODEL = "/zero_subject_model.bin";
+
+    private static ZeroSubjectDetector zeroSubjectModel;
+
+    static {
+        InputStream zeroSubjectDetectionModelStream = Main.class.getResourceAsStream(DEFAULT_ZERO_SUBJECT_MODEL);
+        zeroSubjectModel = new ZeroSubjectDetector(zeroSubjectDetectionModelStream);
+    }
+
+    private Main() {
+    }
+
+    /**
+     * Main method for detecting mentions in corpus encoded in Tei format.
+     *
+     * @param args arguments
+     */
+    public static void main(String[] args) {
+
+        if (args.length != 2 && args.length != 3) {
+            logger.error("Wrong usage! should be: " + Main.class.getSimpleName()
+                    + " input_dir result_dir [zero_subject_model]");
+            return;
+        }
+
+        File inputDir = new File(args[0]);
+        File outputDir = new File(args[1]);
+
+        if (!inputDir.isDirectory()) {
+            logger.error(inputDir + " is not a directory!");
+            return;
+        }
+        if (!outputDir.isDirectory()) {
+            logger.error(outputDir + " is not a directory!");
+            return;
+        }
+        if (args.length == 3) {
+            try {
+                InputStream zeroSubjectDetectionModelStream;
+                zeroSubjectDetectionModelStream = new FileInputStream(new File(args[2]));
+                zeroSubjectModel = new ZeroSubjectDetector(zeroSubjectDetectionModelStream);
+            } catch (IOException e) {
+                logger.error("Unable to load model from file: " + args[2] + ": " + e, e);
+                return;
+            }
+        }
+
+        int all = 0;
+        int errors = 0;
+        for (File teiDir : IOUtils.getNKJPDirs(inputDir)) {
+            all++;
+            try {
+                File targetDir = createTargetTextDir(inputDir, outputDir, teiDir);
+                TEICorpusText teiText = TeiLoader.readTeiText(teiDir);
+                annotateTeiText(teiText);
+                TeiSaver.saveTeiText(teiText, targetDir, GZIP_OUTPUT);
+            } catch (IOException e) {
+                logger.error("Error processing text in dir:" + teiDir + " Error details: " + e.getLocalizedMessage(), e);
+                errors++;
+            }
+        }
+
+        logger.info(all + " texts processed succesfully.");
+        if (errors > 0)
+            logger.info(errors + " texts not processed.");
+        logger.info(ZeroSubjectDetector.verbsWithoutSubject + " verbs with zero subject detected.");
+        logger.info(ZeroSubjectDetector.verbsWithSubject + " verbs with explicit subject detected.");
+    }
+
+    /**
+     * Find relative path of text directory in the corpus directory and create
+     * similar directory structure in the output corpus directory.
+     *
+     * @param inputCorpusDir  input corpus directory
+     * @param outputCorpusDir output corpus directory
+     * @param textDir         input text dir
+     * @return target text dir
+     * @throws IOException when an error occurs
+     */
+    private static File createTargetTextDir(File inputCorpusDir, File outputCorpusDir, File textDir) throws IOException {
+        String relativeDirPath = textDir.toString().substring(inputCorpusDir.toString().length());
+        File targetDir = new File(outputCorpusDir, relativeDirPath);
+        targetDir.mkdirs();
+        if (!targetDir.exists() || !targetDir.isDirectory())
+            throw new IOException("Failed to create output directory at: " + targetDir);
+        return targetDir;
+    }
+
+    /**
+     * Find mentions in Thrift text and update this Thrift text with mention
+     * annotation.
+     *
+     * @param thriftText text to annotate with mentions
+     * @throws MultiserviceException when an error occures
+     */
+    public static void annotateThriftText(TText thriftText) throws MultiserviceException {
+        Text responseText = ThriftLoader.loadTextFromThrift(thriftText);
+        Detector.findMentionsInText(responseText, zeroSubjectModel);
+        ThriftSaver.updateThriftText(responseText, thriftText);
+    }
+
+    /**
+     * Find mentions in Tei text and update this Tei text with mention
+     * annotation. This method does not save this Tei text on disk.
+     *
+     * @param teiText text to annotate with mentions
+     * @throws TEIException when an error occurs
+     */
+    public static void annotateTeiText(TEICorpusText teiText) throws TEIException {
+        Text responseText = TeiLoader.loadTextFromTei(teiText);
+        Detector.findMentionsInText(responseText, zeroSubjectModel);
+        TeiSaver.updateTeiText(responseText, teiText);
+    }
  
 }
 package pl.waw.ipipan.zil.core.md.detection;
  
+import pl.waw.ipipan.zil.core.md.entities.Mention;
+import pl.waw.ipipan.zil.core.md.entities.Sentence;
+import pl.waw.ipipan.zil.core.md.entities.Token;
+
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
  
-import pl.waw.ipipan.zil.core.md.entities.Mention;
-import pl.waw.ipipan.zil.core.md.entities.Sentence;
-import pl.waw.ipipan.zil.core.md.entities.Token;
-
 public class Cleaner {
-	public static void cleanUnnecessarySentenceMentions(Sentence sentence) {
-		List<Mention> mentions = sentence.getMentions();
-		Collection<Mention> unnecessaryMentions = new HashSet<Mention>();
-
-		for (int i = 0; i < mentions.size(); i++) {
-			Mention m1 = mentions.get(i);
-			for (int j = i + 1; j < mentions.size(); j++) {
-				Mention m2 = mentions.get(j);
-
-				Mention lessImportantMention = getLessImportantMention(m1, m2);
-				Mention moreImportantMention = m1 == lessImportantMention ? m2
-						: m1;
-
-				// same mention borders
-				if (m1.getSegments().equals(m2.getSegments())) {
-					unnecessaryMentions.add(lessImportantMention);
-					// System.out.println("Same borders: "+ m1 +", "+
-					// m2+":    "+getLessImportantMention(m1, m2)+" removed");
-					continue;
-				}
-				// same mention heads
-				if (!m1.getHeadSegments().isEmpty()
-						&& !m2.getHeadSegments().isEmpty()) {
-					if (m1.getHeadSegments().equals(m2.getHeadSegments())) {
-
-						List<Token> segments = moreImportantMention
-								.getSegments();
-
-						boolean isConj = false;
-						for (Token seg : segments) {
-							if (seg.getChosenInterpretation().getCtag()
-									.equals("conj")) {
-								isConj = true;
-								break;
-							}
-						}
-
-						if (!isConj) {
-							unnecessaryMentions.add(lessImportantMention);
-							// System.out.println("Same heads: " + m1 + ", " +
-							// m2 + ":    " + lessImportantMention
-							// + " removed");
-
-							continue;
-						}
-					}
-				}
-
-				// mention head equals whole other mention
-				if (m1.getHeadSegments().isEmpty()
-						&& !m2.getHeadSegments().isEmpty()) {
-					if (m2.getHeadSegments().equals(m1.getSegments())) {
-						unnecessaryMentions.add(lessImportantMention);
-						continue;
-						// System.out.println("head is other mention: " + m1 +
-						// ", " + m2 + ":    "
-						// + getLessImportantMention(m1, m2) + " removed");
-					}
-				}
-
-				// the same, but other way round
-				if (m2.getHeadSegments().isEmpty()
-						&& !m1.getHeadSegments().isEmpty()) {
-
-					if (m1.getHeadSegments().equals(m2.getSegments())) {
-						unnecessaryMentions.add(lessImportantMention);
-						continue;
-						// System.out.println("head is other mention: " + m1 +
-						// ", " + m2 + ":    "
-						// + getLessImportantMention(m1, m2) + " removed");
-					}
-				}
-
-				// nie zawieraja sie w sobie, lecz maja czesc wspolna
-				boolean intersect = false;
-
-				Set<Token> notInM1 = new HashSet<Token>(m2.getSegments());
-				notInM1.removeAll(m1.getSegments());
-				if (notInM1.size() < m2.getSegments().size())
-					intersect = true;
-
-				Set<Token> notInM2 = new HashSet<Token>(m1.getSegments());
-				notInM2.removeAll(m2.getSegments());
-				if (notInM2.size() < m1.getSegments().size())
-					intersect = true;
-
-				// if (intersect)
-				// System.out.println(m1+","+m2);
-
-				if (intersect && !notInM1.isEmpty() && !notInM2.isEmpty()) {
-					unnecessaryMentions.add(lessImportantMention);
-					continue;
-					// System.out.println("intersection!" + m1 + ", " + m2 +
-					// ":    "
-					// + getLessImportantMention(m1, m2) + " removed");
-				}
-
-			}
-		}
-
-		for (Mention m : unnecessaryMentions)
-			sentence.removeMention(m);
-
-		// heurystyka dla usuwania rzeczy w stylu: [[Ernest][Kwiecien]]
-		unnecessaryMentions.clear();
-
-		OUTER: for (Mention m : sentence.getMentions()) {
-			for (Token seg : m.getSegments())
-				if (seg.getOrth().toLowerCase().equals(seg.getOrth()))
-					continue OUTER;				
-			
-			//only for children of fully capitalized mentions 
-			Set<Mention> allMentions = new HashSet<Mention>();
-			for (Token seg : m.getSegments())
-				for (Mention m2 : seg.getMentions())
-					if (m.getSegments().containsAll(m2.getSegments()))
-						allMentions.add(m2);
-
-			allMentions.remove(m);
-
-			unnecessaryMentions.addAll(allMentions);
-		}
-		for (Mention m : unnecessaryMentions)
-			sentence.removeMention(m);
-	}
-
-	private static Mention getLessImportantMention(Mention m1, Mention m2) {
-		if (m1.getSegments().size() > m2.getSegments().size())
-			return m2;
-		else
-			return m1;
-	}
+    public static void cleanUnnecessarySentenceMentions(Sentence sentence) {
+        List<Mention> mentions = sentence.getMentions();
+        Collection<Mention> unnecessaryMentions = new HashSet<>();
+
+        for (int i = 0; i < mentions.size(); i++) {
+            Mention m1 = mentions.get(i);
+            for (int j = i + 1; j < mentions.size(); j++) {
+                Mention m2 = mentions.get(j);
+
+                Mention lessImportantMention = getLessImportantMention(m1, m2);
+                Mention moreImportantMention = m1 == lessImportantMention ? m2
+                        : m1;
+
+                // same mention borders
+                if (m1.getSegments().equals(m2.getSegments())) {
+                    unnecessaryMentions.add(lessImportantMention);
+                    continue;
+                }
+                // same mention heads
+                if (!m1.getHeadSegments().isEmpty()
+                        && !m2.getHeadSegments().isEmpty()) {
+                    if (m1.getHeadSegments().equals(m2.getHeadSegments())) {
+
+                        List<Token> segments = moreImportantMention
+                                .getSegments();
+
+                        boolean isConj = false;
+                        for (Token seg : segments) {
+                            if (seg.getChosenInterpretation().getCtag()
+                                    .equals("conj")) {
+                                isConj = true;
+                                break;
+                            }
+                        }
+
+                        if (!isConj) {
+                            unnecessaryMentions.add(lessImportantMention);
+                            continue;
+                        }
+                    }
+                }
+
+                // mention head equals whole other mention
+                if (m1.getHeadSegments().isEmpty()
+                        && !m2.getHeadSegments().isEmpty()) {
+                    if (m2.getHeadSegments().equals(m1.getSegments())) {
+                        unnecessaryMentions.add(lessImportantMention);
+                        continue;
+                    }
+                }
+
+                // the same, but other way round
+                if (m2.getHeadSegments().isEmpty()
+                        && !m1.getHeadSegments().isEmpty()) {
+
+                    if (m1.getHeadSegments().equals(m2.getSegments())) {
+                        unnecessaryMentions.add(lessImportantMention);
+                        continue;
+                    }
+                }
+
+                // nie zawieraja sie w sobie, lecz maja czesc wspolna
+                boolean intersect = false;
+
+                Set<Token> notInM1 = new HashSet<>(m2.getSegments());
+                notInM1.removeAll(m1.getSegments());
+                if (notInM1.size() < m2.getSegments().size())
+                    intersect = true;
+
+                Set<Token> notInM2 = new HashSet<>(m1.getSegments());
+                notInM2.removeAll(m2.getSegments());
+                if (notInM2.size() < m1.getSegments().size())
+                    intersect = true;
+
+                if (intersect && !notInM1.isEmpty() && !notInM2.isEmpty()) {
+                    unnecessaryMentions.add(lessImportantMention);
+                    continue;
+                }
+
+            }
+        }
+
+        for (Mention m : unnecessaryMentions)
+            sentence.removeMention(m);
+
+        // heurystyka dla usuwania rzeczy w stylu: [[Ernest][Kwiecien]]
+        unnecessaryMentions.clear();
+
+        OUTER:
+        for (Mention m : sentence.getMentions()) {
+            for (Token seg : m.getSegments())
+                if (seg.getOrth().toLowerCase().equals(seg.getOrth()))
+                    continue OUTER;
+
+            //only for children of fully capitalized mentions
+            Set<Mention> allMentions = new HashSet<>();
+            for (Token seg : m.getSegments())
+                for (Mention m2 : seg.getMentions())
+                    if (m.getSegments().containsAll(m2.getSegments()))
+                        allMentions.add(m2);
+
+            allMentions.remove(m);
+
+            unnecessaryMentions.addAll(allMentions);
+        }
+        for (Mention m : unnecessaryMentions)
+            sentence.removeMention(m);
+    }
+
+    private static Mention getLessImportantMention(Mention m1, Mention m2) {
+        if (m1.getSegments().size() > m2.getSegments().size())
+            return m2;
+        else
+            return m1;
+    }
 }
 package pl.waw.ipipan.zil.core.md.detection;
  
 public class Constants {
-	public static final String MORPHO_NOUN_CTAGS = "subst|depr|ger";
-	public static final String MORPHO_VERB_CTAGS = "fin|bedzie|aglt|impt";
-	public static final String MORPHO_PRONOUN_CTAGS = "ppron3|ppron12";
-	public static final String MORPHO_CTAGS = MORPHO_NOUN_CTAGS + "|"
-			+ MORPHO_PRONOUN_CTAGS;
-	public static final String WORDS_CTAGS = "Noun|Ppron.*";
+    public static final String MORPHO_NOUN_CTAGS = "subst|depr|ger";
+    public static final String MORPHO_VERB_CTAGS = "fin|bedzie|aglt|impt";
+    public static final String MORPHO_PRONOUN_CTAGS = "ppron3|ppron12";
+    public static final String MORPHO_CTAGS = MORPHO_NOUN_CTAGS + "|"
+            + MORPHO_PRONOUN_CTAGS;
+    public static final String WORDS_CTAGS = "Noun|Ppron.*";
+
+    private Constants() {
+    }
 }
 package pl.waw.ipipan.zil.core.md.detection;
  
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.core.md.detection.zero.ZeroSubjectDetector;
+import pl.waw.ipipan.zil.core.md.entities.*;
+
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
  
-import org.apache.log4j.Logger;
-
-import pl.waw.ipipan.zil.core.md.detection.zero.ZeroSubjectDetector;
-import pl.waw.ipipan.zil.core.md.entities.Mention;
-import pl.waw.ipipan.zil.core.md.entities.NamedEntity;
-import pl.waw.ipipan.zil.core.md.entities.Paragraph;
-import pl.waw.ipipan.zil.core.md.entities.Sentence;
-import pl.waw.ipipan.zil.core.md.entities.SyntacticGroup;
-import pl.waw.ipipan.zil.core.md.entities.SyntacticWord;
-import pl.waw.ipipan.zil.core.md.entities.Text;
-import pl.waw.ipipan.zil.core.md.entities.Token;
-
 public class Detector {
-	private static Logger logger = Logger.getLogger(Detector.class);
-
-	public static void findMentionsInText(Text text,
-			ZeroSubjectDetector zeroSubjectModel) {
-		text.clearMentions();
-		logger.debug("Detecting mentions in text " + text.getId());
-		for (Paragraph p : text)
-			for (Sentence s : p)
-				detectMentionsInSentence(s, zeroSubjectModel);
-	}
-
-	private static void detectMentionsInSentence(Sentence sentence,
-			ZeroSubjectDetector zeroSubjectModel) {
-		// adding mentions
-		addMentionsByTokenCtag(sentence);
-		addMentionsBySyntacticWordsCtag(sentence);
-		addMentionsByNamedEntities(sentence);
-		addMentionsByGroups(sentence);
-		addSpeakerMentionsInSpoken(sentence);
-
-		// zero subject detection
-		zeroSubjectModel.addZeroSubjectMentions(sentence);
-
-		// removing mentions
-		removeTo(sentence);
-		Cleaner.cleanUnnecessarySentenceMentions(sentence);
-
-		// updating mention heads
-		updateMentionHeads(sentence);
-	}
-
-	/**
-	 * heurystyka ustawiajaca jako glowe pierwszy segment gdy glowy brak
-	 * 
-	 * @param sentence
-	 */
-	private static void updateMentionHeads(Sentence sentence) {
-		for (Mention m : sentence.getMentions())
-			if (m.getHeadSegments().isEmpty())
-				m.addHeadSegment(m.getFirstSegment());
-	}
-
-	/**
-	 * heurystyka dla "to" w zdaniu z ""jeśli"/"jeżeli"/"skoro""
-	 * 
-	 * @param sentence
-	 */
-	private static void removeTo(Sentence sentence) {
-		Set<String> orths = new HashSet<String>();
-		for (Token morph : sentence)
-			orths.add(morph.getOrth());
-
-		if (orths.contains("jeśli") || orths.contains("jeżeli")
-				|| orths.contains("skoro")) {
-			for (Mention mention : sentence.getMentions()) {
-				List<Token> mentSegs = mention.getSegments();
-				if (mentSegs.size() == 1
-						&& mentSegs.get(0).getBase().equals("to")) {
-					sentence.removeMention(mention);
-				}
-			}
-		}
-	}
-
-	private static void addSpeakerMentionsInSpoken(Sentence sentence) {
-		// heurystyka dla sp1:, sp2:, MarszałekJAkistam:
-		if (sentence.size() > 2) {
-			Token first = sentence.get(0);
-			Token second = sentence.get(1);
-			if (second.getOrth().equals(":")) {
-				sentence.addMention(new Mention(first));
-			}
-		}
-	}
-
-	/**
-	 * Wyszukuję i oznaczam wszystkie NG*
-	 * 
-	 * @param sentence
-	 */
-	private static void addMentionsByGroups(Sentence sentence) {
-		for (SyntacticGroup group : sentence.getGroups()) {
-			if (group.getType().startsWith("NG")) {
-				List<Token> segments = group.getTokens();
-				List<Token> heads = group.getSemanticHeadTokens();
-
-				sentence.addMention(new Mention(segments, heads));
-			}
-		}
-	}
-
-	/**
-	 * Wyszukuję i oznaczam wszystkie NER
-	 * 
-	 * @param sentence
-	 */
-	private static void addMentionsByNamedEntities(Sentence sentence) {
-		for (NamedEntity ne : sentence.getNamedEntities()) {
-
-			List<Token> headTokens = new ArrayList<Token>();
-			List<Token> tokens = ne.getTokens();
-
-			boolean containsNoun = false;
-			for (Token seg : tokens) {
-				if (seg.getCtag().matches(Constants.MORPHO_NOUN_CTAGS)) {
-					containsNoun = true;
-					break;
-				}
-			}
-			if (!containsNoun)
-				continue;
-
-			sentence.addMention(new Mention(tokens, headTokens));
-		}
-	}
-
-	/**
-	 * @param sentence
-	 */
-	private static void addMentionsBySyntacticWordsCtag(Sentence sentence) {
-		for (SyntacticWord w : sentence.getSyntacticWords())
-			if (w.getCtag().matches(Constants.WORDS_CTAGS)) {
-				List<Token> tokens = w.getTokens();
-				if (tokens.size() == 1) {
-					sentence.addMention(new Mention(tokens.get(0)));
-				} else {
-					List<Token> heads = new ArrayList<Token>();
-					sentence.addMention(new Mention(tokens, heads));
-				}
-			}
-	}
-
-	/**
-	 * Wyszukuję wszystkie interesujace czesci mowy jesli jest poziom slow
-	 * skladniowych, to korzystam z niego zamiast morfoskladni
-	 * 
-	 * @param sentence
-	 */
-	private static void addMentionsByTokenCtag(Sentence sentence) {
-		for (Token token : sentence)
-			if (token.getCtag().matches(Constants.MORPHO_CTAGS))
-				sentence.addMention(new Mention(token));
-	}
+
+    private static final Logger logger = LoggerFactory.getLogger(Detector.class);
+
+    private Detector() {
+    }
+
+    public static void findMentionsInText(Text text,
+                                          ZeroSubjectDetector zeroSubjectModel) {
+        text.clearMentions();
+        logger.debug("Detecting mentions in text " + text.getId());
+        for (Paragraph p : text)
+            for (Sentence s : p)
+                detectMentionsInSentence(s, zeroSubjectModel);
+    }
+
+    private static void detectMentionsInSentence(Sentence sentence,
+                                                 ZeroSubjectDetector zeroSubjectModel) {
+        // adding mentions
+        addMentionsByTokenCtag(sentence);
+        addMentionsBySyntacticWordsCtag(sentence);
+        addMentionsByNamedEntities(sentence);
+        addMentionsByGroups(sentence);
+        addSpeakerMentionsInSpoken(sentence);
+
+        // zero subject detection
+        zeroSubjectModel.addZeroSubjectMentions(sentence);
+
+        // removing mentions
+        removeTo(sentence);
+        Cleaner.cleanUnnecessarySentenceMentions(sentence);
+
+        // updating mention heads
+        updateMentionHeads(sentence);
+    }
+
+    /**
+     * heurystyka ustawiajaca jako glowe pierwszy segment gdy glowy brak
+     *
+     * @param sentence
+     */
+    private static void updateMentionHeads(Sentence sentence) {
+        for (Mention m : sentence.getMentions())
+            if (m.getHeadSegments().isEmpty())
+                m.addHeadSegment(m.getFirstSegment());
+    }
+
+    /**
+     * heurystyka dla "to" w zdaniu z ""jeśli"/"jeżeli"/"skoro""
+     *
+     * @param sentence
+     */
+    private static void removeTo(Sentence sentence) {
+        Set<String> orths = new HashSet<>();
+        for (Token morph : sentence)
+            orths.add(morph.getOrth());
+
+        if (orths.contains("jeśli") || orths.contains("jeżeli")
+                || orths.contains("skoro")) {
+            for (Mention mention : sentence.getMentions()) {
+                List<Token> mentSegs = mention.getSegments();
+                if (mentSegs.size() == 1
+                        && "to".equals(mentSegs.get(0).getBase())) {
+                    sentence.removeMention(mention);
+                }
+            }
+        }
+    }
+
+    private static void addSpeakerMentionsInSpoken(Sentence sentence) {
+        // heurystyka dla sp1:, sp2:, MarszałekJAkistam:
+        if (sentence.size() > 2) {
+            Token first = sentence.get(0);
+            Token second = sentence.get(1);
+            if (":".equals(second.getOrth())) {
+                sentence.addMention(new Mention(first));
+            }
+        }
+    }
+
+    /**
+     * Wyszukuję i oznaczam wszystkie NG*
+     *
+     * @param sentence
+     */
+    private static void addMentionsByGroups(Sentence sentence) {
+        for (SyntacticGroup group : sentence.getGroups()) {
+            if (group.getType().startsWith("NG")) {
+                List<Token> segments = group.getTokens();
+                List<Token> heads = group.getSemanticHeadTokens();
+
+                sentence.addMention(new Mention(segments, heads));
+            }
+        }
+    }
+
+    /**
+     * Wyszukuję i oznaczam wszystkie NER
+     *
+     * @param sentence
+     */
+    private static void addMentionsByNamedEntities(Sentence sentence) {
+        for (NamedEntity ne : sentence.getNamedEntities()) {
+
+            List<Token> headTokens = new ArrayList<>();
+            List<Token> tokens = ne.getTokens();
+
+            boolean containsNoun = false;
+            for (Token seg : tokens) {
+                if (seg.getCtag().matches(Constants.MORPHO_NOUN_CTAGS)) {
+                    containsNoun = true;
+                    break;
+                }
+            }
+            if (!containsNoun)
+                continue;
+
+            sentence.addMention(new Mention(tokens, headTokens));
+        }
+    }
+
+    private static void addMentionsBySyntacticWordsCtag(Sentence sentence) {
+        for (SyntacticWord w : sentence.getSyntacticWords())
+            if (w.getCtag().matches(Constants.WORDS_CTAGS)) {
+                List<Token> tokens = w.getTokens();
+                if (tokens.size() == 1) {
+                    sentence.addMention(new Mention(tokens.get(0)));
+                } else {
+                    List<Token> heads = new ArrayList<>();
+                    sentence.addMention(new Mention(tokens, heads));
+                }
+            }
+    }
+
+    /**
+     * Wyszukuję wszystkie interesujace czesci mowy jesli jest poziom slow
+     * skladniowych, to korzystam z niego zamiast morfoskladni
+     *
+     * @param sentence
+     */
+    private static void addMentionsByTokenCtag(Sentence sentence) {
+        for (Token token : sentence)
+            if (token.getCtag().matches(Constants.MORPHO_CTAGS))
+                sentence.addMention(new Mention(token));
+    }
 }
 package pl.waw.ipipan.zil.core.md.detection.zero;
  
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import pl.waw.ipipan.zil.core.md.entities.Mention;
-import pl.waw.ipipan.zil.core.md.entities.Sentence;
-import pl.waw.ipipan.zil.core.md.entities.SyntacticGroup;
-import pl.waw.ipipan.zil.core.md.entities.SyntacticWord;
-import pl.waw.ipipan.zil.core.md.entities.Token;
+import pl.waw.ipipan.zil.core.md.entities.*;
 import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMention;
 import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMorph;
  
+import java.util.*;
+
 public class FeatureGeneration {
 	final private static Set<String> CLAUSE_SPLIT_LEMMAS = new HashSet<>(Arrays.asList(new String[] { "i", "albo",
 			"lub", "oraz", "bądź", "ani", "czy", "niż", "tudzież", ",", ";", "-", "–", ":" }));
 package pl.waw.ipipan.zil.core.md.detection.zero;
  
-import java.io.File;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-import org.apache.log4j.Logger;
-
-import pl.waw.ipipan.zil.core.md.entities.Mention;
-import pl.waw.ipipan.zil.core.md.entities.Paragraph;
-import pl.waw.ipipan.zil.core.md.entities.Sentence;
-import pl.waw.ipipan.zil.core.md.entities.Text;
-import pl.waw.ipipan.zil.core.md.entities.Token;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.core.md.entities.*;
 import pl.waw.ipipan.zil.core.md.io.tei.TeiLoader;
 import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEICorpusText;
 import pl.waw.ipipan.zil.nkjp.teiapi.api.io.IOUtils;
@@ -25,154 +12,161 @@ import weka.core.FastVector;
 import weka.core.Instance;
 import weka.core.Instances;
  
+import java.io.File;
+import java.util.*;
+import java.util.Map.Entry;
+
 public class InstanceCreator {
  
-	final private static Logger logger = Logger.getLogger(InstanceCreator.class);
-	final private static TEI_IO teiIO = TEI_IO.getInstance();
-
-	public static List<TreeMap<String, Object>> loadExamples(File dataDir, Set<String> quasiVerbs) {
-		int allTexts = 0;
-		int exceptions = 0;
-		int allSentences = 0;
-
-		List<TreeMap<String, Object>> examples = new ArrayList<>();
-		for (File textDir : IOUtils.getNKJPDirs(dataDir)) {
-			try {
-				allTexts++;
-				logger.info("Processing text " + textDir);
-				TEICorpusText ct = teiIO.readFromNKJPDirectory(textDir);
-				Text text = TeiLoader.loadTextFromTei(ct);
-
-				for (Paragraph p : text)
-					for (Sentence s : p) {
-						allSentences++;
-						loadExamplesFromSentence(quasiVerbs, examples, s);
-					}
-
-			} catch (Exception e) {
-				logger.error(e.getLocalizedMessage());
-				exceptions++;
-			}
-		}
-
-		logger.info(allTexts + " texts found.");
-		if (exceptions != 0)
-			logger.error(exceptions + " texts with exceptions.");
-		logger.info(allSentences + " sentences found.");
-
-		return examples;
-	}
-
-	public static void loadExamplesFromSentence(Set<String> quasiVerbs, List<TreeMap<String, Object>> examples,
-			Sentence s) {
-
-		// collect positive examples
-		Set<Token> positive = new HashSet<>();
-		for (Mention m : s.getMentions()) {
-			if (FeatureGeneration.isVerb(m)) {
-				positive.addAll(m.getSegments());
-			}
-		}
-
-		for (Token m : s) {
-			if (!FeatureGeneration.isVerb(m))
-				continue;
-
-			TreeMap<String, Object> features = new TreeMap<>();
-			if (positive.contains(m)) {
-				features.put("class", Boolean.valueOf(true));
-			} else {
-				features.put("class", Boolean.valueOf(false));
-			}
-
-			FeatureGeneration.generateFeatures(features, m, s, quasiVerbs);
-			examples.add(features);
-		}
-	}
-
-	public static Instances createInstances(List<TreeMap<String, Object>> examples, String classFeatureName) {
-
-		TreeSet<String> booleanAttsOccurred = new TreeSet<>();
-		TreeSet<String> doubleAttsOccurred = new TreeSet<>();
-		TreeMap<String, Set<String>> att2values = new TreeMap<>();
-		for (TreeMap<String, Object> example : examples) {
-			for (Entry<String, Object> e : example.entrySet()) {
-				String key = e.getKey();
-				Object val = e.getValue();
-				if (val instanceof Integer || val instanceof Double) {
-					doubleAttsOccurred.add(key);
-					continue;
-				}
-				if (val instanceof Boolean) {
-					booleanAttsOccurred.add(key);
-					continue;
-				}
-				if (!att2values.containsKey(key))
-					att2values.put(key, new HashSet<String>());
-				att2values.get(key).add(val.toString());
-			}
-		}
-
-		List<Attribute> atts = new ArrayList<>();
-
-		// double attributes
-		for (String attName : doubleAttsOccurred) {
-			Attribute att = new Attribute(attName);
-			atts.add(att);
-		}
-
-		// boolean attributes (treated as nominal)
-		FastVector values = new FastVector(2);
-		values.addElement("false");
-		values.addElement("true");
-		for (String attName : booleanAttsOccurred) {
-			Attribute att = new Attribute(attName, values);
-			atts.add(att);
-		}
-
-		// nominal attributes
-		for (Entry<String, Set<String>> attVals : att2values.entrySet()) {
-			FastVector vals = new FastVector(attVals.getValue().size());
-			for (String val : attVals.getValue())
-				vals.addElement(val);
-			Attribute att = new Attribute(attVals.getKey(), vals);
-			atts.add(att);
-		}
-
-		FastVector fvWekaAttributes = new FastVector(atts.size());
-		for (Attribute attr : atts) {
-			fvWekaAttributes.addElement(attr);
-		}
-
-		Instances data = new Instances("Zero", fvWekaAttributes, 10);
-		data.setClass(data.attribute(classFeatureName));
-		return data;
-	}
-
-	public static void fillInstances(List<TreeMap<String, Object>> examples, Instances instances) {
-		for (TreeMap<String, Object> example : examples) {
-			Instance instance = new Instance(instances.numAttributes());
-
-			for (Entry<String, Object> e : example.entrySet()) {
-				Object val = e.getValue();
-				String name = e.getKey();
-				if (val instanceof Integer) {
-					instance.setValue(instances.attribute(name), (int) val);
-				} else if (val instanceof Boolean) {
-					instance.setValue(instances.attribute(name), ((Boolean) val) ? "true" : "false");
-				} else {
-					int indexOfValue = instances.attribute(name).indexOfValue(val.toString());
-					if (indexOfValue == -1) {
-						logger.debug("Unkown value: " + val.toString() + " of feature: " + name
-								+ ". Marking as missing value.");
-						instance.setMissing(instances.attribute(name));
-					} else
-						instance.setValue(instances.attribute(name), indexOfValue);
-				}
-			}
-
-			instance.setDataset(instances);
-			instances.add(instance);
-		}
-	}
+    private static final Logger logger = LoggerFactory.getLogger(InstanceCreator.class);
+    private static final TEI_IO teiIO = TEI_IO.getInstance();
+
+    private InstanceCreator() {
+    }
+
+    public static List<TreeMap<String, Object>> loadExamples(File dataDir, Set<String> quasiVerbs) {
+        int allTexts = 0;
+        int exceptions = 0;
+        int allSentences = 0;
+
+        List<TreeMap<String, Object>> examples = new ArrayList<>();
+        for (File textDir : IOUtils.getNKJPDirs(dataDir)) {
+            try {
+                allTexts++;
+                logger.info("Processing text " + textDir);
+                TEICorpusText ct = teiIO.readFromNKJPDirectory(textDir);
+                Text text = TeiLoader.loadTextFromTei(ct);
+
+                for (Paragraph p : text)
+                    for (Sentence s : p) {
+                        allSentences++;
+                        loadExamplesFromSentence(quasiVerbs, examples, s);
+                    }
+
+            } catch (Exception e) {
+                logger.error(e.getLocalizedMessage());
+                exceptions++;
+            }
+        }
+
+        logger.info(allTexts + " texts found.");
+        if (exceptions != 0)
+            logger.error(exceptions + " texts with exceptions.");
+        logger.info(allSentences + " sentences found.");
+
+        return examples;
+    }
+
+    public static void loadExamplesFromSentence(Set<String> quasiVerbs, List<TreeMap<String, Object>> examples,
+                                                Sentence s) {
+
+        // collect positive examples
+        Set<Token> positive = new HashSet<>();
+        for (Mention m : s.getMentions()) {
+            if (FeatureGeneration.isVerb(m)) {
+                positive.addAll(m.getSegments());
+            }
+        }
+
+        for (Token m : s) {
+            if (!FeatureGeneration.isVerb(m))
+                continue;
+
+            TreeMap<String, Object> features = new TreeMap<>();
+            if (positive.contains(m)) {
+                features.put("class", Boolean.valueOf(true));
+            } else {
+                features.put("class", Boolean.valueOf(false));
+            }
+
+            FeatureGeneration.generateFeatures(features, m, s, quasiVerbs);
+            examples.add(features);
+        }
+    }
+
+    public static Instances createInstances(List<TreeMap<String, Object>> examples, String classFeatureName) {
+
+        TreeSet<String> booleanAttsOccurred = new TreeSet<>();
+        TreeSet<String> doubleAttsOccurred = new TreeSet<>();
+        TreeMap<String, Set<String>> att2values = new TreeMap<>();
+        for (TreeMap<String, Object> example : examples) {
+            for (Entry<String, Object> e : example.entrySet()) {
+                String key = e.getKey();
+                Object val = e.getValue();
+                if (val instanceof Integer || val instanceof Double) {
+                    doubleAttsOccurred.add(key);
+                    continue;
+                }
+                if (val instanceof Boolean) {
+                    booleanAttsOccurred.add(key);
+                    continue;
+                }
+                if (!att2values.containsKey(key))
+                    att2values.put(key, new HashSet<>());
+                att2values.get(key).add(val.toString());
+            }
+        }
+
+        List<Attribute> atts = new ArrayList<>();
+
+        // double attributes
+        for (String attName : doubleAttsOccurred) {
+            Attribute att = new Attribute(attName);
+            atts.add(att);
+        }
+
+        // boolean attributes (treated as nominal)
+        FastVector values = new FastVector(2);
+        values.addElement("false");
+        values.addElement("true");
+        for (String attName : booleanAttsOccurred) {
+            Attribute att = new Attribute(attName, values);
+            atts.add(att);
+        }
+
+        // nominal attributes
+        for (Entry<String, Set<String>> attVals : att2values.entrySet()) {
+            FastVector vals = new FastVector(attVals.getValue().size());
+            for (String val : attVals.getValue())
+                vals.addElement(val);
+            Attribute att = new Attribute(attVals.getKey(), vals);
+            atts.add(att);
+        }
+
+        FastVector fvWekaAttributes = new FastVector(atts.size());
+        for (Attribute attr : atts) {
+            fvWekaAttributes.addElement(attr);
+        }
+
+        Instances data = new Instances("Zero", fvWekaAttributes, 10);
+        data.setClass(data.attribute(classFeatureName));
+        return data;
+    }
+
+    public static void fillInstances(List<TreeMap<String, Object>> examples, Instances instances) {
+        for (TreeMap<String, Object> example : examples) {
+            Instance instance = new Instance(instances.numAttributes());
+
+            for (Entry<String, Object> e : example.entrySet()) {
+                Object val = e.getValue();
+                String name = e.getKey();
+                if (val instanceof Integer) {
+                    instance.setValue(instances.attribute(name), (int) val);
+                } else if (val instanceof Boolean) {
+                    instance.setValue(instances.attribute(name), ((Boolean) val) ? "true" : "false");
+                } else {
+                    int indexOfValue = instances.attribute(name).indexOfValue(val.toString());
+                    if (indexOfValue == -1) {
+                        logger.debug("Unkown value: " + val.toString() + " of feature: " + name
+                                + ". Marking as missing value.");
+                        instance.setMissing(instances.attribute(name));
+                    } else
+                        instance.setValue(instances.attribute(name), indexOfValue);
+                }
+            }
+
+            instance.setDataset(instances);
+            instances.add(instance);
+        }
+    }
 }
 package pl.waw.ipipan.zil.core.md.detection.zero;
  
-import java.io.Serializable;
-import java.util.List;
-import java.util.Set;
-import java.util.TreeMap;
-
-import org.apache.log4j.Logger;
-
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import pl.waw.ipipan.zil.core.md.entities.Sentence;
 import weka.classifiers.Classifier;
 import weka.core.Instance;
 import weka.core.Instances;
  
+import java.io.Serializable;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeMap;
+
 public class Model implements Serializable {
  
-	private static final long serialVersionUID = 3351727361273283076L;
-	private static final Logger logger = Logger.getLogger(Model.class);
-
-	private Classifier classifier;
-	private Set<String> quasiVerbs;
-	private Instances instances;
-
-	public Model(Classifier classifier, Instances instances, Set<String> quasiVerbs) {
-		this.classifier = classifier;
-		this.instances = instances;
-		this.quasiVerbs = quasiVerbs;
-	}
-
-	public boolean isZeroSubject(Instance instance, Sentence sentence) {
-		try {
-			double response = this.classifier.classifyInstance(instance);
-			return response > 0;
-		} catch (Exception e) {
-			logger.error("Error classyfing verb in sentence: " + sentence);
-			return false;
-		}
-	}
-
-	public Instances getInstances(List<TreeMap<String, Object>> examples) {
-		Instances instances = new Instances(this.instances);
-		InstanceCreator.fillInstances(examples, instances);
-		return instances;
-	}
-
-	public Set<String> getQuasiVerbs() {
-		return quasiVerbs;
-	}
+    private static final long serialVersionUID = 3351727361273283076L;
+    private static final Logger logger = LoggerFactory.getLogger(Model.class);
+
+    private Classifier classifier;
+    private Set<String> quasiVerbs;
+    private Instances instances;
+
+    public Model(Classifier classifier, Instances instances, Set<String> quasiVerbs) {
+        this.classifier = classifier;
+        this.instances = instances;
+        this.quasiVerbs = quasiVerbs;
+    }
+
+    public boolean isZeroSubject(Instance instance, Sentence sentence) {
+        try {
+            double response = this.classifier.classifyInstance(instance);
+            return response > 0;
+        } catch (Exception e) {
+            logger.error("Error classyfing verb in sentence: " + sentence, e);
+            return false;
+        }
+    }
+
+    public Instances getInstances(List<TreeMap<String, Object>> examples) {
+        Instances instances = new Instances(this.instances);
+        InstanceCreator.fillInstances(examples, instances);
+        return instances;
+    }
+
+    public Set<String> getQuasiVerbs() {
+        return quasiVerbs;
+    }
 }
 package pl.waw.ipipan.zil.core.md.detection.zero;
  
-import java.io.InputStream;
-
 import weka.core.SerializationHelper;
  
+import java.io.InputStream;
+
 public class Serializer {
  
 	public static void saveModel(Model m, String targetModelFilePath) throws Exception {
 package pl.waw.ipipan.zil.core.md.detection.zero;
  
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Random;
-import java.util.Set;
-import java.util.TreeMap;
-
-import org.apache.log4j.Logger;
-
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import weka.classifiers.Evaluation;
 import weka.classifiers.rules.JRip;
 import weka.classifiers.rules.JRip.RipperRule;
@@ -20,104 +9,111 @@ import weka.core.Attribute;
 import weka.core.Instance;
 import weka.core.Instances;
  
+import java.io.*;
+import java.util.*;
+
 public class Trainer {
  
-	final private static Logger logger = Logger.getLogger(Trainer.class);
-
-	private static final boolean DO_CV = false;
-	private static final String QUASI_LIST_PATH = "/quasi_verbs.txt";
-
-	public static void main(String[] args) {
-
-		if (args.length != 2) {
-			logger.error("Wrong number of arguments! Should be: " + Trainer.class.getSimpleName()
-					+ " trainDir targetModelFile");
-			return;
-		}
-
-		File dataDir = new File(args[0]);
-		String targetModelFilePath = args[1];
-
-		if (!dataDir.isDirectory()) {
-			logger.error(dataDir + " is not a directory!");
-			return;
-		}
-
-		Set<String> quasiVerbs = loadQuasiVerbs();
-
-		List<TreeMap<String, Object>> examples = InstanceCreator.loadExamples(dataDir, quasiVerbs);
-		Instances instances = InstanceCreator.createInstances(examples, "class");
-		InstanceCreator.fillInstances(examples, instances);
-
-		printStats(instances);
-
-		try {
-			JRip model = new JRip();
-
-			if (DO_CV) {
-				logger.info("Crossvalidation...");
-				Evaluation eval = new Evaluation(instances);
-				eval.crossValidateModel(model, instances, 10, new Random(1));
-				logger.info(eval.toSummaryString());
-				logger.info(eval.toMatrixString());
-				logger.info(eval.toClassDetailsString());
-			}
-
-			logger.info("Building final classifier...");
-			model = new JRip();
-			model.buildClassifier(instances);
-			logger.info(model.getRuleset().size() + " rules generated.");
-			for (int i = 0; i < model.getRuleset().size(); i++) {
-				RipperRule v = (RipperRule) model.getRuleset().elementAt(i);
-				logger.info("\t" + v.toString(instances.classAttribute()));
-			}
-
-			instances.delete();
-			logger.info("Features stats:");
-			for (int i = 0; i < instances.numAttributes(); i++) {
-				Attribute att = instances.attribute(i);
-				logger.info(i + ".\t" + att.toString());
-			}
-
-			logger.info("Saving classifier...");
-			Model m = new Model(model, instances, quasiVerbs);
-			Serializer.saveModel(m, targetModelFilePath);
-			logger.info("Done.");
-
-		} catch (Exception e) {
-			logger.error("Error: " + e);
-		}
-	}
-
-	private static Set<String> loadQuasiVerbs() {
-		Set<String> quasiVerbs = new HashSet<>();
-		InputStream stream = Trainer.class.getResourceAsStream(QUASI_LIST_PATH);
-		try (BufferedReader br = new BufferedReader(new InputStreamReader(stream))) {
-			String line = null;
-			while ((line = br.readLine()) != null) {
-				quasiVerbs.add(line.trim());
-			}
-		} catch (IOException e) {
-			logger.error(e.getLocalizedMessage());
-		}
-		return quasiVerbs;
-	}
-
-	private static void printStats(Instances instances) {
-		int positive = 0;
-		int negative = 0;
-		for (int i = 0; i < instances.numInstances(); i++) {
-			Instance inst = instances.instance(i);
-			if (inst.classValue() > 0)
-				negative++;
-			else
-				positive++;
-		}
-		logger.info(positive + " positive examples");
-		logger.info(negative + " negative examples");
-		logger.info((positive + negative) + " examples total");
-		logger.info((instances.numAttributes() - 1) + " attributes");
-		logger.info(instances.toSummaryString());
-	}
+    private static final Logger logger = LoggerFactory.getLogger(Trainer.class);
+
+    private static final boolean DO_CV = false;
+    private static final String QUASI_LIST_PATH = "/quasi_verbs.txt";
+
+    private Trainer() {
+    }
+
+    public static void main(String[] args) {
+
+        if (args.length != 2) {
+            logger.error("Wrong number of arguments! Should be: " + Trainer.class.getSimpleName()
+                    + " trainDir targetModelFile");
+            return;
+        }
+
+        File dataDir = new File(args[0]);
+        String targetModelFilePath = args[1];
+
+        if (!dataDir.isDirectory()) {
+            logger.error(dataDir + " is not a directory!");
+            return;
+        }
+
+        Set<String> quasiVerbs = loadQuasiVerbs();
+
+        List<TreeMap<String, Object>> examples = InstanceCreator.loadExamples(dataDir, quasiVerbs);
+        Instances instances = InstanceCreator.createInstances(examples, "class");
+        InstanceCreator.fillInstances(examples, instances);
+
+        printStats(instances);
+
+        try {
+            JRip model;
+
+            if (DO_CV) {
+                logger.info("Crossvalidation...");
+                model = new JRip();
+                Evaluation eval = new Evaluation(instances);
+                eval.crossValidateModel(model, instances, 10, new Random(1));
+                logger.info(eval.toSummaryString());
+                logger.info(eval.toMatrixString());
+                logger.info(eval.toClassDetailsString());
+            }
+
+            logger.info("Building final classifier...");
+            model = new JRip();
+            model.buildClassifier(instances);
+            logger.info(model.getRuleset().size() + " rules generated.");
+            for (int i = 0; i < model.getRuleset().size(); i++) {
+                RipperRule v = (RipperRule) model.getRuleset().elementAt(i);
+                logger.info("\t" + v.toString(instances.classAttribute()));
+            }
+
+            instances.delete();
+            logger.info("Features stats:");
+            for (int i = 0; i < instances.numAttributes(); i++) {
+                Attribute att = instances.attribute(i);
+                logger.info(i + ".\t" + att.toString());
+            }
+
+            logger.info("Saving classifier...");
+            Model m = new Model(model, instances, quasiVerbs);
+            Serializer.saveModel(m, targetModelFilePath);
+            logger.info("Done.");
+
+        } catch (Exception e) {
+            logger.error("Error: " + e);
+        }
+    }
+
+    private static Set<String> loadQuasiVerbs() {
+        Set<String> quasiVerbs = new HashSet<>();
+        InputStream stream = Trainer.class.getResourceAsStream(QUASI_LIST_PATH);
+        try (BufferedReader br = new BufferedReader(new InputStreamReader(stream))) {
+            String line;
+            while ((line = br.readLine()) != null) {
+                quasiVerbs.add(line.trim());
+            }
+        } catch (IOException e) {
+            logger.error(e.getLocalizedMessage(), e);
+        }
+        return quasiVerbs;
+    }
+
+    private static void printStats(Instances instances) {
+        int positive = 0;
+        int negative = 0;
+        for (int i = 0; i < instances.numInstances(); i++) {
+            Instance inst = instances.instance(i);
+            if (inst.classValue() > 0)
+                negative++;
+            else
+                positive++;
+        }
+        logger.info(positive + " positive examples");
+        logger.info(negative + " negative examples");
+        logger.info((positive + negative) + " examples total");
+        logger.info((instances.numAttributes() - 1) + " attributes");
+        logger.info(instances.toSummaryString());
+    }
  
 }
 package pl.waw.ipipan.zil.core.md.detection.zero;
  
-import java.io.File;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.TreeMap;
-
-import org.apache.log4j.Logger;
-
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import pl.waw.ipipan.zil.core.md.entities.Mention;
 import pl.waw.ipipan.zil.core.md.entities.Sentence;
 import pl.waw.ipipan.zil.core.md.entities.Token;
 import weka.core.Instances;
  
+import java.io.File;
+import java.io.InputStream;
+import java.util.*;
+
 public class ZeroSubjectDetector {
-	final private static Logger logger = Logger.getLogger(ZeroSubjectDetector.class);
  
-	private Model model;
-	private Set<String> quasiVerbs = new HashSet<>();
+    final private static Logger logger = LoggerFactory.getLogger(ZeroSubjectDetector.class);
+
+    private Model model;
+    private Set<String> quasiVerbs = new HashSet<>();
  
-	public static int verbsWithoutSubject = 0;
-	public static int verbsWithSubject = 0;
+    public static int verbsWithoutSubject = 0;
+    public static int verbsWithSubject = 0;
  
-	public void addZeroSubjectMentions(Sentence sentence) {
-		List<TreeMap<String, Object>> examples = new ArrayList<>();
-		InstanceCreator.loadExamplesFromSentence(quasiVerbs, examples, sentence);
-		if (examples.isEmpty())
-			return;
+    public void addZeroSubjectMentions(Sentence sentence) {
+        List<TreeMap<String, Object>> examples = new ArrayList<>();
+        InstanceCreator.loadExamplesFromSentence(quasiVerbs, examples, sentence);
+        if (examples.isEmpty())
+            return;
  
-		Instances instances = model.getInstances(examples);
+        Instances instances = model.getInstances(examples);
  
-		// label instances
-		List<Boolean> areZeros = new ArrayList<>();
-		for (int i = 0; i < instances.numInstances(); i++) {
-			boolean isZero = model.isZeroSubject(instances.instance(i), sentence);
-			areZeros.add(isZero);
-			if (isZero)
-				verbsWithoutSubject++;
-			else
-				verbsWithSubject++;
-		}
+        // label instances
+        List<Boolean> areZeros = new ArrayList<>();
+        for (int i = 0; i < instances.numInstances(); i++) {
+            boolean isZero = model.isZeroSubject(instances.instance(i), sentence);
+            areZeros.add(isZero);
+            if (isZero)
+                verbsWithoutSubject++;
+            else
+                verbsWithSubject++;
+        }
  
-		int i = 0;
-		for (Token m : sentence) {
-			if (!FeatureGeneration.isVerb(m))
-				continue;
-			if (areZeros.get(i))
-				sentence.addMention(new Mention(m, true));
-			i++;
-		}
-	}
+        int i = 0;
+        for (Token m : sentence) {
+            if (!FeatureGeneration.isVerb(m))
+                continue;
+            if (areZeros.get(i))
+                sentence.addMention(new Mention(m, true));
+            i++;
+        }
+    }
  
-	public ZeroSubjectDetector(File zeroSubjectDetectionModel) {
-		try {
-			this.model = Serializer.loadModel(zeroSubjectDetectionModel.getAbsolutePath());
-			this.quasiVerbs = this.model.getQuasiVerbs();
-		} catch (Exception e) {
-			logger.error("Error loading model:" + e);
-		}
-	}
+    public ZeroSubjectDetector(File zeroSubjectDetectionModel) {
+        try {
+            this.model = Serializer.loadModel(zeroSubjectDetectionModel.getAbsolutePath());
+            this.quasiVerbs = this.model.getQuasiVerbs();
+        } catch (Exception e) {
+            logger.error("Error loading model:" + e);
+        }
+    }
  
-	public ZeroSubjectDetector(InputStream zeroSubjectDetectionModelStream) {
-		try {
-			this.model = Serializer.loadModelFromStream(zeroSubjectDetectionModelStream);
-			this.quasiVerbs = this.model.getQuasiVerbs();
-		} catch (Exception e) {
-			logger.error("Error loading model:" + e);
-		}
-	}
+    public ZeroSubjectDetector(InputStream zeroSubjectDetectionModelStream) {
+        try {
+            this.model = Serializer.loadModelFromStream(zeroSubjectDetectionModelStream);
+            this.quasiVerbs = this.model.getQuasiVerbs();
+        } catch (Exception e) {
+            logger.error("Error loading model:" + e);
+        }
+    }
 }
 package pl.waw.ipipan.zil.core.md.entities;
  
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
  
 public class Token implements Comparable<Token> {
 	private Sentence sentence;
 package pl.waw.ipipan.zil.core.md.io.tei;
  
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.core.md.entities.*;
+import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.*;
+import pl.waw.ipipan.zil.nkjp.teiapi.api.exceptions.TEIException;
+import pl.waw.ipipan.zil.nkjp.teiapi.api.io.TEI_IO;
+
 import java.io.File;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
  
-import org.apache.log4j.Logger;
-
-import pl.waw.ipipan.zil.core.md.entities.Interpretation;
-import pl.waw.ipipan.zil.core.md.entities.Mention;
-import pl.waw.ipipan.zil.core.md.entities.NamedEntity;
-import pl.waw.ipipan.zil.core.md.entities.Paragraph;
-import pl.waw.ipipan.zil.core.md.entities.Sentence;
-import pl.waw.ipipan.zil.core.md.entities.SyntacticGroup;
-import pl.waw.ipipan.zil.core.md.entities.SyntacticWord;
-import pl.waw.ipipan.zil.core.md.entities.Text;
-import pl.waw.ipipan.zil.core.md.entities.Token;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEICorpusText;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIGroup;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIInterpretation;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMention;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMorph;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEINamedEntity;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIParagraph;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEISentence;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEISyntacticEntity;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIWord;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.exceptions.TEIException;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.io.TEI_IO;
-
 public class TeiLoader {
  
-	private static Logger logger = Logger.getLogger(TeiLoader.class);
-	private static TEI_IO teiAPI = TEI_IO.getInstance();
-
-	public static TEICorpusText readTeiText(File teiDir) throws TEIException {
-		return teiAPI.readFromNKJPDirectory(teiDir);
-	}
-
-	public static Text loadTextFromTei(TEICorpusText teiText) {
-		Text text = new Text(teiText.getCorpusHeader().getId());
-
-		logger.debug("Loading tei text " + text.getId() + "...");
-		for (TEIParagraph teiP : teiText.getParagraphs())
-			loadParagraph(text, teiP);
-		logger.debug("Tei text loaded.");
-
-		return text;
-	}
-
-	private static void loadParagraph(Text text, TEIParagraph teiP) {
-		Paragraph p = new Paragraph();
-		text.add(p);
-		for (TEISentence teiS : teiP.getSentences())
-			loadSentence(p, teiS);
-	}
-
-	private static void loadSentence(Paragraph p, TEISentence teiS) {
-		Sentence s = new Sentence();
-		p.add(s);
-		Map<TEIMorph, Token> teiMorph2Segment = new HashMap<>();
-		for (TEIMorph teiM : teiS.getMorphs()) {
-			Token token = loadToken(s, teiM);
-			teiMorph2Segment.put(teiM, token);
-		}
-		for (TEINamedEntity ne : teiS.getAllNamedEntities())
-			loadNE(s, ne, teiMorph2Segment);
-		for (TEIWord w : teiS.getAllWords())
-			loadSyntacticWord(s, w, teiMorph2Segment);
-		for (TEIGroup g : teiS.getAllGroups())
-			loadSyntacticGroup(s, g, teiMorph2Segment);
-		for (TEIMention m : teiS.getAllMentions())
-			loadMentions(s, m, teiMorph2Segment);
-	}
-
-	private static void loadMentions(Sentence s, TEIMention m,
-			Map<TEIMorph, Token> teiMorph2Segment) {
-		List<Token> tokens = new ArrayList<>();
-		for (TEIMorph mo : m.getMorphs())
-			tokens.add(teiMorph2Segment.get(mo));
-		List<Token> headTokens = new ArrayList<>();
-		for (TEIMorph mo : m.getHeadMorphs())
-			headTokens.add(teiMorph2Segment.get(mo));
-		s.addMention(new Mention(tokens, headTokens, m.isZeroSubject()));
-	}
-
-	private static void loadSyntacticGroup(Sentence s, TEIGroup g,
-			Map<TEIMorph, Token> teiMorph2Segment) {
-		String type = g.getType();
-
-		List<Token> tokens = new ArrayList<>();
-		for (TEIMorph m : g.getLeaves())
-			tokens.add(teiMorph2Segment.get(m));
-
-		List<Token> headTokens = new ArrayList<>();
-		TEISyntacticEntity semanticHead = g;
-		while (semanticHead.isGroup()
-				&& semanticHead.asGroup().getSemanticHead() != null)
-			semanticHead = semanticHead.asGroup().getSemanticHead();
-		for (TEIMorph m : semanticHead.getLeaves())
-			headTokens.add(teiMorph2Segment.get(m));
-
-		s.addSyntacticGroup(new SyntacticGroup(type, tokens, headTokens));
-	}
-
-	private static void loadSyntacticWord(Sentence s, TEIWord w,
-			Map<TEIMorph, Token> teiMorph2Segment) {
-		String ctag = w.getInterpretation().getCtag();
-		List<Token> tokens = new ArrayList<>();
-		for (TEIMorph m : w.getAllMorphs())
-			tokens.add(teiMorph2Segment.get(m));
-		s.addSyntacticWord(new SyntacticWord(ctag, tokens));
-	}
-
-	private static void loadNE(Sentence s, TEINamedEntity ne,
-			Map<TEIMorph, Token> teiMorph2Segment) {
-		List<Token> tokens = new ArrayList<>();
-		for (TEIMorph m : ne.getLeaves())
-			tokens.add(teiMorph2Segment.get(m));
-		s.addNamedEntity(new NamedEntity(tokens));
-	}
-
-	private static Token loadToken(Sentence s, TEIMorph teiM) {
-		Token seg = new Token();
-		s.add(seg);
-
-		seg.setOrth(teiM.getOrth());
-		TEIInterpretation interp = teiM.getChosenInterpretation();
-		Interpretation chosenIterpretation = new Interpretation(
-				interp.getCtag(), interp.getMorph(), interp.getBase());
-		seg.addChosenInterpretation(chosenIterpretation);
-
-		for (TEIInterpretation interp2 : teiM.getAllInterpretations()) {
-			Interpretation inter = new Interpretation(interp2.getCtag(),
-					interp2.getMorph(), interp.getBase());
-			seg.addInterpretation(inter);
-		}
-
-		return seg;
-	}
+    private static Logger logger = LoggerFactory.getLogger(TeiLoader.class);
+    private static TEI_IO teiAPI = TEI_IO.getInstance();
+
+    private TeiLoader() {
+    }
+
+    public static TEICorpusText readTeiText(File teiDir) throws TEIException {
+        return teiAPI.readFromNKJPDirectory(teiDir);
+    }
+
+    public static Text loadTextFromTei(TEICorpusText teiText) {
+        Text text = new Text(teiText.getCorpusHeader().getId());
+
+        logger.debug("Loading tei text " + text.getId() + "...");
+        for (TEIParagraph teiP : teiText.getParagraphs())
+            loadParagraph(text, teiP);
+        logger.debug("Tei text loaded.");
+
+        return text;
+    }
+
+    private static void loadParagraph(Text text, TEIParagraph teiP) {
+        Paragraph p = new Paragraph();
+        text.add(p);
+        for (TEISentence teiS : teiP.getSentences())
+            loadSentence(p, teiS);
+    }
+
+    private static void loadSentence(Paragraph p, TEISentence teiS) {
+        Sentence s = new Sentence();
+        p.add(s);
+        Map<TEIMorph, Token> teiMorph2Segment = new HashMap<>();
+        for (TEIMorph teiM : teiS.getMorphs()) {
+            Token token = loadToken(s, teiM);
+            teiMorph2Segment.put(teiM, token);
+        }
+        for (TEINamedEntity ne : teiS.getAllNamedEntities())
+            loadNE(s, ne, teiMorph2Segment);
+        for (TEIWord w : teiS.getAllWords())
+            loadSyntacticWord(s, w, teiMorph2Segment);
+        for (TEIGroup g : teiS.getAllGroups())
+            loadSyntacticGroup(s, g, teiMorph2Segment);
+        for (TEIMention m : teiS.getAllMentions())
+            loadMentions(s, m, teiMorph2Segment);
+    }
+
+    private static void loadMentions(Sentence s, TEIMention m,
+                                     Map<TEIMorph, Token> teiMorph2Segment) {
+        List<Token> tokens = new ArrayList<>();
+        for (TEIMorph mo : m.getMorphs())
+            tokens.add(teiMorph2Segment.get(mo));
+        List<Token> headTokens = new ArrayList<>();
+        for (TEIMorph mo : m.getHeadMorphs())
+            headTokens.add(teiMorph2Segment.get(mo));
+        s.addMention(new Mention(tokens, headTokens, m.isZeroSubject()));
+    }
+
+    private static void loadSyntacticGroup(Sentence s, TEIGroup g,
+                                           Map<TEIMorph, Token> teiMorph2Segment) {
+        String type = g.getType();
+
+        List<Token> tokens = new ArrayList<>();
+        for (TEIMorph m : g.getLeaves())
+            tokens.add(teiMorph2Segment.get(m));
+
+        List<Token> headTokens = new ArrayList<>();
+        TEISyntacticEntity semanticHead = g;
+        while (semanticHead.isGroup()
+                && semanticHead.asGroup().getSemanticHead() != null)
+            semanticHead = semanticHead.asGroup().getSemanticHead();
+        for (TEIMorph m : semanticHead.getLeaves())
+            headTokens.add(teiMorph2Segment.get(m));
+
+        s.addSyntacticGroup(new SyntacticGroup(type, tokens, headTokens));
+    }
+
+    private static void loadSyntacticWord(Sentence s, TEIWord w,
+                                          Map<TEIMorph, Token> teiMorph2Segment) {
+        String ctag = w.getInterpretation().getCtag();
+        List<Token> tokens = new ArrayList<>();
+        for (TEIMorph m : w.getAllMorphs())
+            tokens.add(teiMorph2Segment.get(m));
+        s.addSyntacticWord(new SyntacticWord(ctag, tokens));
+    }
+
+    private static void loadNE(Sentence s, TEINamedEntity ne,
+                               Map<TEIMorph, Token> teiMorph2Segment) {
+        List<Token> tokens = new ArrayList<>();
+        for (TEIMorph m : ne.getLeaves())
+            tokens.add(teiMorph2Segment.get(m));
+        s.addNamedEntity(new NamedEntity(tokens));
+    }
+
+    private static Token loadToken(Sentence s, TEIMorph teiM) {
+        Token seg = new Token();
+        s.add(seg);
+
+        seg.setOrth(teiM.getOrth());
+        TEIInterpretation interp = teiM.getChosenInterpretation();
+        Interpretation chosenIterpretation = new Interpretation(
+                interp.getCtag(), interp.getMorph(), interp.getBase());
+        seg.addChosenInterpretation(chosenIterpretation);
+
+        for (TEIInterpretation interp2 : teiM.getAllInterpretations()) {
+            Interpretation inter = new Interpretation(interp2.getCtag(),
+                    interp2.getMorph(), interp.getBase());
+            seg.addInterpretation(inter);
+        }
+
+        return seg;
+    }
  
 }
 package pl.waw.ipipan.zil.core.md.io.tei;
  
-import java.io.File;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.log4j.Logger;
-
-import pl.waw.ipipan.zil.core.md.entities.Mention;
-import pl.waw.ipipan.zil.core.md.entities.Paragraph;
-import pl.waw.ipipan.zil.core.md.entities.Sentence;
-import pl.waw.ipipan.zil.core.md.entities.Text;
-import pl.waw.ipipan.zil.core.md.entities.Token;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.AnnotationLayer;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.EntitiesFactory;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEICoreference;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEICorpusText;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMention;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMorph;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIParagraph;
-import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEISentence;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.core.md.entities.*;
+import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.*;
 import pl.waw.ipipan.zil.nkjp.teiapi.api.exceptions.TEIException;
 import pl.waw.ipipan.zil.nkjp.teiapi.api.io.TEI_IO;
 import pl.waw.ipipan.zil.nkjp.teiapi.api.io.TEI_IO.CompressionMethod;
  
+import java.io.File;
+import java.util.*;
+
 public class TeiSaver {
  
-	private static Logger logger = Logger.getLogger(TeiSaver.class);
-	private static TEI_IO teiAPI = TEI_IO.getInstance();
-	final private static EntitiesFactory ef = EntitiesFactory.getInstance();
-
-	public static void saveTeiText(TEICorpusText teiText, File targetDir, boolean gzip) throws TEIException {
-		logger.debug("Saving text in " + targetDir);
-		CompressionMethod cm = gzip ? CompressionMethod.GZIP : CompressionMethod.NONE;
-		teiAPI.writeToNKJPDirectory(teiText, targetDir, cm);
-	}
-
-	public static void updateTeiText(Text t, TEICorpusText teiText) throws TEIException {
-		Map<Mention, TEIMention> mention2mention = new HashMap<Mention, TEIMention>();
-
-		Iterator<Paragraph> pIt = t.iterator();
-		Iterator<TEIParagraph> pItTei = teiText.getParagraphs().iterator();
-		int mentionId = 0;
-		while (pIt.hasNext() && pItTei.hasNext()) {
-			Paragraph p = pIt.next();
-			TEIParagraph pTei = pItTei.next();
-
-			mentionId = updateTeiParagraph(mention2mention, mentionId, p, pTei);
-		}
-		checkIterators(pIt, pItTei, "paragraph");
-
-		teiText.addAnnotationLayer(AnnotationLayer.MENTIONS,
-				EntitiesFactory.getInstance().createHeader(AnnotationLayer.MENTIONS));
-
-		// clear coreference as we have new mentions it became invalid
-		teiText.getAnnotationLayers().remove(AnnotationLayer.COREFERENCE);
-		teiText.setCoreferences(new ArrayList<TEICoreference>());
-
-		logger.debug(mentionId + " mentions added");
-	}
-
-	private static int updateTeiParagraph(Map<Mention, TEIMention> mention2mention, int mentionId, Paragraph p,
-			TEIParagraph pTei) throws TEIException {
-		Iterator<Sentence> sIt = p.iterator();
-		Iterator<TEISentence> sItTei = pTei.getSentences().iterator();
-
-		while (sIt.hasNext() && sItTei.hasNext()) {
-			Sentence s = sIt.next();
-			TEISentence sTei = sItTei.next();
-			mentionId = updateTeiSentence(mention2mention, mentionId, s, sTei);
-		}
-		checkIterators(sIt, sItTei, "sentence");
-		return mentionId;
-	}
-
-	private static int updateTeiSentence(Map<Mention, TEIMention> mention2mention, int mentionId, Sentence s,
-			TEISentence sTei) throws TEIException {
-		sTei.getAllMentions().clear();
-
-		Map<Token, TEIMorph> seg2morph = new HashMap<Token, TEIMorph>();
-
-		Iterator<Token> segIt = s.iterator();
-		Iterator<TEIMorph> segItTei = sTei.getMorphs().iterator();
-
-		while (segIt.hasNext() && segItTei.hasNext()) {
-			seg2morph.put(segIt.next(), segItTei.next());
-		}
-		checkIterators(segIt, segItTei, "token");
-
-		List<TEIMention> mentions = new ArrayList<TEIMention>();
-
-		for (Mention m : s.getMentions()) {
-			List<TEIMorph> morphs = new ArrayList<TEIMorph>();
-			List<TEIMorph> heads = new ArrayList<TEIMorph>();
-
-			for (Token seg : m.getSegments())
-				morphs.add(seg2morph.get(seg));
-
-			for (Token seg : m.getHeadSegments())
-				heads.add(seg2morph.get(seg));
-
-			TEIMention mention = ef.createMention("mention_" + mentionId++, morphs, heads, m.isZeroSubject());
-			mentions.add(mention);
-			mention2mention.put(m, mention);
-		}
-		sTei.setMentions(mentions);
-		return mentionId;
-	}
-
-	private static void checkIterators(Iterator<? extends Object> one, Iterator<? extends Object> other, String level)
-			throws TEIException {
-		if (one.hasNext() || other.hasNext())
-			throw new TEIException("Problem mapping tei to thrift for level " + level);
-	}
+    private static final Logger logger = LoggerFactory.getLogger(TeiSaver.class);
+    private static final TEI_IO teiAPI = TEI_IO.getInstance();
+    private static final EntitiesFactory ef = EntitiesFactory.getInstance();
+
+    private TeiSaver() {
+    }
+
+    public static void saveTeiText(TEICorpusText teiText, File targetDir, boolean gzip) throws TEIException {
+        logger.debug("Saving text in " + targetDir);
+        CompressionMethod cm = gzip ? CompressionMethod.GZIP : CompressionMethod.NONE;
+        teiAPI.writeToNKJPDirectory(teiText, targetDir, cm);
+    }
+
+    public static void updateTeiText(Text t, TEICorpusText teiText) throws TEIException {
+        Map<Mention, TEIMention> mention2mention = new HashMap<Mention, TEIMention>();
+
+        Iterator<Paragraph> pIt = t.iterator();
+        Iterator<TEIParagraph> pItTei = teiText.getParagraphs().iterator();
+        int mentionId = 0;
+        while (pIt.hasNext() && pItTei.hasNext()) {
+            Paragraph p = pIt.next();
+            TEIParagraph pTei = pItTei.next();
+
+            mentionId = updateTeiParagraph(mention2mention, mentionId, p, pTei);
+        }
+        checkIterators(pIt, pItTei, "paragraph");
+
+        teiText.addAnnotationLayer(AnnotationLayer.MENTIONS,
+                EntitiesFactory.getInstance().createHeader(AnnotationLayer.MENTIONS));
+
+        // clear coreference as we have new mentions it became invalid
+        teiText.getAnnotationLayers().remove(AnnotationLayer.COREFERENCE);
+        teiText.setCoreferences(new ArrayList<TEICoreference>());
+
+        logger.debug(mentionId + " mentions added");
+    }
+
+    private static int updateTeiParagraph(Map<Mention, TEIMention> mention2mention, int mentionId, Paragraph p,
+                                          TEIParagraph pTei) throws TEIException {
+        Iterator<Sentence> sIt = p.iterator();
+        Iterator<TEISentence> sItTei = pTei.getSentences().iterator();
+
+        while (sIt.hasNext() && sItTei.hasNext()) {
+            Sentence s = sIt.next();
+            TEISentence sTei = sItTei.next();
+            mentionId = updateTeiSentence(mention2mention, mentionId, s, sTei);
+        }
+        checkIterators(sIt, sItTei, "sentence");
+        return mentionId;
+    }
+
+    private static int updateTeiSentence(Map<Mention, TEIMention> mention2mention, int mentionId, Sentence s,
+                                         TEISentence sTei) throws TEIException {
+        sTei.getAllMentions().clear();
+
+        Map<Token, TEIMorph> seg2morph = new HashMap<>();
+
+        Iterator<Token> segIt = s.iterator();
+        Iterator<TEIMorph> segItTei = sTei.getMorphs().iterator();
+
+        while (segIt.hasNext() && segItTei.hasNext()) {
+            seg2morph.put(segIt.next(), segItTei.next());
+        }
+        checkIterators(segIt, segItTei, "token");
+
+        List<TEIMention> mentions = new ArrayList<>();
+
+        for (Mention m : s.getMentions()) {
+            List<TEIMorph> morphs = new ArrayList<>();
+            List<TEIMorph> heads = new ArrayList<>();
+
+            for (Token seg : m.getSegments())
+                morphs.add(seg2morph.get(seg));
+
+            for (Token seg : m.getHeadSegments())
+                heads.add(seg2morph.get(seg));
+
+            TEIMention mention = ef.createMention("mention_" + mentionId++, morphs, heads, m.isZeroSubject());
+            mentions.add(mention);
+            mention2mention.put(m, mention);
+        }
+        sTei.setMentions(mentions);
+        return mentionId;
+    }
+
+    private static void checkIterators(Iterator<?> one, Iterator<?> other, String level)
+            throws TEIException {
+        if (one.hasNext() || other.hasNext())
+            throw new TEIException("Problem mapping tei to thrift for level " + level);
+    }
  
 }
 package pl.waw.ipipan.zil.core.md.io.thrift;
  
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.core.md.entities.*;
+import pl.waw.ipipan.zil.multiservice.thrift.types.*;
+
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
  
-import org.apache.log4j.Logger;
-
-import pl.waw.ipipan.zil.core.md.entities.Interpretation;
-import pl.waw.ipipan.zil.core.md.entities.NamedEntity;
-import pl.waw.ipipan.zil.core.md.entities.Paragraph;
-import pl.waw.ipipan.zil.core.md.entities.Sentence;
-import pl.waw.ipipan.zil.core.md.entities.SyntacticGroup;
-import pl.waw.ipipan.zil.core.md.entities.SyntacticWord;
-import pl.waw.ipipan.zil.core.md.entities.Text;
-import pl.waw.ipipan.zil.core.md.entities.Token;
-import pl.waw.ipipan.zil.multiservice.thrift.types.MultiserviceException;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TInterpretation;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TNamedEntity;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TParagraph;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TSyntacticGroup;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TSyntacticWord;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TToken;
-
 public class ThriftLoader {
  
-	private static Logger logger = Logger.getLogger(ThriftLoader.class);
-
-	public static Text loadTextFromThrift(TText thriftText)
-			throws MultiserviceException {
-		Text text = new Text(thriftText.getTextHeader() == null ? "null"
-				: thriftText.getTextHeader().getId());
-
-		logger.debug("Loading text " + text.getId() + " from thrift format...");
-		for (TParagraph teiP : thriftText.getParagraphs())
-			loadParagraph(text, teiP);
-		logger.debug("Thrift text loaded.");
-
-		return text;
-	}
-
-	private static void loadParagraph(Text text, TParagraph teiP)
-			throws MultiserviceException {
-		Paragraph p = new Paragraph();
-		text.add(p);
-
-		for (TSentence teiS : teiP.getSentences())
-			loadSentence(p, teiS);
-	}
-
-	private static void loadSentence(Paragraph p, TSentence thriftSent)
-			throws MultiserviceException {
-		Sentence s = new Sentence();
-		p.add(s);
-
-		Map<String, Object> thirftId2Entity = getThriftId2EntityMap(thriftSent);
-
-		Map<String, Token> thiftTokenId2Token = new HashMap<>();
-		for (TToken teiM : thriftSent.getTokens()) {
-			Token token = loadToken(s, teiM);
-			thiftTokenId2Token.put(teiM.getId(), token);
-		}
-		if (thriftSent.isSetNames())
-			for (TNamedEntity ne : thriftSent.getNames())
-				loadNE(s, ne, thirftId2Entity, thiftTokenId2Token);
-		if (thriftSent.isSetWords())
-			for (TSyntacticWord w : thriftSent.getWords())
-				loadSyntacticWord(s, w, thirftId2Entity, thiftTokenId2Token);
-		if (thriftSent.isSetGroups())
-			for (TSyntacticGroup g : thriftSent.getGroups())
-				loadSyntacticGroup(s, g, thirftId2Entity, thiftTokenId2Token);
-	}
-
-	private static void loadSyntacticGroup(Sentence s, TSyntacticGroup g,
-			Map<String, Object> thirftId2Entity,
-			Map<String, Token> thiftTokenId2Token) {
-		String type = g.getType();
-		List<Token> tokens = getUnderlyingSegments(g, thirftId2Entity,
-				thiftTokenId2Token, false);
-		List<Token> headTokens = getUnderlyingSegments(g, thirftId2Entity,
-				thiftTokenId2Token, true);
-		s.addSyntacticGroup(new SyntacticGroup(type, tokens, headTokens));
-	}
-
-	private static void loadSyntacticWord(Sentence s, TSyntacticWord w,
-			Map<String, Object> thirftId2Entity,
-			Map<String, Token> thiftTokenId2Token) {
-		String ctag = w.getChosenInterpretation().getCtag();
-		List<Token> tokens = getUnderlyingSegments(w, thirftId2Entity,
-				thiftTokenId2Token, false);
-		s.addSyntacticWord(new SyntacticWord(ctag, tokens));
-	}
-
-	private static void loadNE(Sentence s, TNamedEntity ne,
-			Map<String, Object> thirftId2Entity,
-			Map<String, Token> thiftTokenId2Token) {
-		List<Token> tokens = getUnderlyingSegments(ne, thirftId2Entity,
-				thiftTokenId2Token, false);
-		s.addNamedEntity(new NamedEntity(tokens));
-	}
-
-	private static Map<String, Object> getThriftId2EntityMap(
-			TSentence thriftSent) {
-		Map<String, Object> idToEntity = new HashMap<>();
-		for (TToken tok : thriftSent.getTokens())
-			idToEntity.put(tok.getId(), tok);
-		if (thriftSent.isSetWords())
-			for (TSyntacticWord w : thriftSent.getWords())
-				idToEntity.put(w.getId(), w);
-		if (thriftSent.isSetNames())
-			for (TNamedEntity ne : thriftSent.getNames())
-				idToEntity.put(ne.getId(), ne);
-		if (thriftSent.isSetGroups())
-			for (TSyntacticGroup group : thriftSent.getGroups())
-				idToEntity.put(group.getId(), group);
-		return idToEntity;
-	}
-
-	private static Token loadToken(Sentence s, TToken teiM)
-			throws MultiserviceException {
-		Token seg = new Token();
-		s.add(seg);
-
-		seg.setOrth(teiM.getOrth());
-		TInterpretation interp = getTokenChosenInt(teiM);
-		Interpretation chosenIterpretation = new Interpretation(
-				interp.getCtag(), interp.getMsd(), interp.getBase());
-		seg.addChosenInterpretation(chosenIterpretation);
-
-		for (TInterpretation interp2 : teiM.getInterpretations()) {
-			Interpretation inter = new Interpretation(interp2.getCtag(),
-					interp2.getMsd(), interp.getBase());
-			seg.addInterpretation(inter);
-		}
-		return seg;
-	}
-
-	private static TInterpretation getTokenChosenInt(TToken token)
-			throws MultiserviceException {
-		TInterpretation interp = token.getChosenInterpretation();
-		if (interp == null || interp.getBase() == null
-				|| interp.getBase().equals("")) {
-			if (token.getCandidateInterpretations() == null
-					|| token.getCandidateInterpretations().size() == 0
-					|| token.getCandidateInterpretations().get(0).getBase() == null
-					|| token.getCandidateInterpretations().get(0).getBase()
-							.equals(""))
-				throw new MultiserviceException(
-						"No proper chosen or candidate interpretation for segment: "
-								+ token.id);
-			interp = token.getCandidateInterpretations().get(0);
-		}
-		return interp;
-	}
-
-	private static List<Token> getUnderlyingSegments(Object entity,
-			Map<String, Object> idToEntity, Map<String, Token> tokenId2Segment,
-			boolean headsOnly) {
-		List<Token> result = new ArrayList<>();
-
-		if (entity instanceof TToken) {
-			result.add(tokenId2Segment.get(((TToken) entity).getId()));
-			return result;
-		}
-
-		List<String> childIds = new ArrayList<>();
-		if (entity instanceof TSyntacticWord)
-			childIds = ((TSyntacticWord) entity).getChildIds();
-		else if (entity instanceof TNamedEntity)
-			childIds = ((TNamedEntity) entity).getChildIds();
-		else if (entity instanceof TSyntacticGroup)
-			if (headsOnly) {
-				childIds = new ArrayList<String>();
-				childIds.add(((TSyntacticGroup) entity).getSemanticHeadId());
-			} else
-				childIds = ((TSyntacticGroup) entity).getChildIds();
-
-		for (String id : childIds)
-			result.addAll(getUnderlyingSegments(idToEntity.get(id), idToEntity,
-					tokenId2Segment, headsOnly));
-
-		return result;
-	}
+    private static Logger logger = LoggerFactory.getLogger(ThriftLoader.class);
+
+    public static Text loadTextFromThrift(TText thriftText)
+            throws MultiserviceException {
+        Text text = new Text(thriftText.getTextHeader() == null ? "null"
+                : thriftText.getTextHeader().getId());
+
+        logger.debug("Loading text " + text.getId() + " from thrift format...");
+        for (TParagraph teiP : thriftText.getParagraphs())
+            loadParagraph(text, teiP);
+        logger.debug("Thrift text loaded.");
+
+        return text;
+    }
+
+    private static void loadParagraph(Text text, TParagraph teiP)
+            throws MultiserviceException {
+        Paragraph p = new Paragraph();
+        text.add(p);
+
+        for (TSentence teiS : teiP.getSentences())
+            loadSentence(p, teiS);
+    }
+
+    private static void loadSentence(Paragraph p, TSentence thriftSent)
+            throws MultiserviceException {
+        Sentence s = new Sentence();
+        p.add(s);
+
+        Map<String, Object> thirftId2Entity = getThriftId2EntityMap(thriftSent);
+
+        Map<String, Token> thiftTokenId2Token = new HashMap<>();
+        for (TToken teiM : thriftSent.getTokens()) {
+            Token token = loadToken(s, teiM);
+            thiftTokenId2Token.put(teiM.getId(), token);
+        }
+        if (thriftSent.isSetNames())
+            for (TNamedEntity ne : thriftSent.getNames())
+                loadNE(s, ne, thirftId2Entity, thiftTokenId2Token);
+        if (thriftSent.isSetWords())
+            for (TSyntacticWord w : thriftSent.getWords())
+                loadSyntacticWord(s, w, thirftId2Entity, thiftTokenId2Token);
+        if (thriftSent.isSetGroups())
+            for (TSyntacticGroup g : thriftSent.getGroups())
+                loadSyntacticGroup(s, g, thirftId2Entity, thiftTokenId2Token);
+    }
+
+    private static void loadSyntacticGroup(Sentence s, TSyntacticGroup g,
+                                           Map<String, Object> thirftId2Entity,
+                                           Map<String, Token> thiftTokenId2Token) {
+        String type = g.getType();
+        List<Token> tokens = getUnderlyingSegments(g, thirftId2Entity,
+                thiftTokenId2Token, false);
+        List<Token> headTokens = getUnderlyingSegments(g, thirftId2Entity,
+                thiftTokenId2Token, true);
+        s.addSyntacticGroup(new SyntacticGroup(type, tokens, headTokens));
+    }
+
+    private static void loadSyntacticWord(Sentence s, TSyntacticWord w,
+                                          Map<String, Object> thirftId2Entity,
+                                          Map<String, Token> thiftTokenId2Token) {
+        String ctag = w.getChosenInterpretation().getCtag();
+        List<Token> tokens = getUnderlyingSegments(w, thirftId2Entity,
+                thiftTokenId2Token, false);
+        s.addSyntacticWord(new SyntacticWord(ctag, tokens));
+    }
+
+    private static void loadNE(Sentence s, TNamedEntity ne,
+                               Map<String, Object> thirftId2Entity,
+                               Map<String, Token> thiftTokenId2Token) {
+        List<Token> tokens = getUnderlyingSegments(ne, thirftId2Entity,
+                thiftTokenId2Token, false);
+        s.addNamedEntity(new NamedEntity(tokens));
+    }
+
+    private static Map<String, Object> getThriftId2EntityMap(
+            TSentence thriftSent) {
+        Map<String, Object> idToEntity = new HashMap<>();
+        for (TToken tok : thriftSent.getTokens())
+            idToEntity.put(tok.getId(), tok);
+        if (thriftSent.isSetWords())
+            for (TSyntacticWord w : thriftSent.getWords())
+                idToEntity.put(w.getId(), w);
+        if (thriftSent.isSetNames())
+            for (TNamedEntity ne : thriftSent.getNames())
+                idToEntity.put(ne.getId(), ne);
+        if (thriftSent.isSetGroups())
+            for (TSyntacticGroup group : thriftSent.getGroups())
+                idToEntity.put(group.getId(), group);
+        return idToEntity;
+    }
+
+    private static Token loadToken(Sentence s, TToken teiM)
+            throws MultiserviceException {
+        Token seg = new Token();
+        s.add(seg);
+
+        seg.setOrth(teiM.getOrth());
+        TInterpretation interp = getTokenChosenInt(teiM);
+        Interpretation chosenIterpretation = new Interpretation(
+                interp.getCtag(), interp.getMsd(), interp.getBase());
+        seg.addChosenInterpretation(chosenIterpretation);
+
+        for (TInterpretation interp2 : teiM.getInterpretations()) {
+            Interpretation inter = new Interpretation(interp2.getCtag(),
+                    interp2.getMsd(), interp.getBase());
+            seg.addInterpretation(inter);
+        }
+        return seg;
+    }
+
+    private static TInterpretation getTokenChosenInt(TToken token)
+            throws MultiserviceException {
+        TInterpretation interp = token.getChosenInterpretation();
+        if (interp == null || interp.getBase() == null
+                || "".equals(interp.getBase())) {
+            if (token.getCandidateInterpretations() == null
+                    || token.getCandidateInterpretations().isEmpty()
+                    || token.getCandidateInterpretations().get(0).getBase() == null
+                    || "".equals(token.getCandidateInterpretations().get(0).getBase()))
+                throw new MultiserviceException(
+                        "No proper chosen or candidate interpretation for segment: "
+                                + token.id);
+            interp = token.getCandidateInterpretations().get(0);
+        }
+        return interp;
+    }
+
+    private static List<Token> getUnderlyingSegments(Object entity,
+                                                     Map<String, Object> idToEntity, Map<String, Token> tokenId2Segment,
+                                                     boolean headsOnly) {
+        List<Token> result = new ArrayList<>();
+
+        if (entity instanceof TToken) {
+            result.add(tokenId2Segment.get(((TToken) entity).getId()));
+            return result;
+        }
+
+        List<String> childIds = new ArrayList<>();
+        if (entity instanceof TSyntacticWord)
+            childIds = ((TSyntacticWord) entity).getChildIds();
+        else if (entity instanceof TNamedEntity)
+            childIds = ((TNamedEntity) entity).getChildIds();
+        else if (entity instanceof TSyntacticGroup)
+            if (headsOnly) {
+                childIds = new ArrayList<>();
+                childIds.add(((TSyntacticGroup) entity).getSemanticHeadId());
+            } else
+                childIds = ((TSyntacticGroup) entity).getChildIds();
+
+        for (String id : childIds)
+            result.addAll(getUnderlyingSegments(idToEntity.get(id), idToEntity,
+                    tokenId2Segment, headsOnly));
+
+        return result;
+    }
 }
 package pl.waw.ipipan.zil.core.md.io.thrift;
  
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.log4j.Logger;
-
-import pl.waw.ipipan.zil.core.md.entities.Mention;
-import pl.waw.ipipan.zil.core.md.entities.Paragraph;
-import pl.waw.ipipan.zil.core.md.entities.Sentence;
-import pl.waw.ipipan.zil.core.md.entities.Text;
-import pl.waw.ipipan.zil.core.md.entities.Token;
-import pl.waw.ipipan.zil.multiservice.thrift.types.MultiserviceException;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TParagraph;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TToken;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.core.md.entities.*;
+import pl.waw.ipipan.zil.multiservice.thrift.types.*;
+
+import java.util.*;
  
 public class ThriftSaver {
  
-	private static Logger logger = Logger.getLogger(ThriftSaver.class);
-
-	public static void updateThriftText(Text responseText, TText text)
-			throws MultiserviceException {
-
-		logger.debug("Updating thrift text...");
-		Map<Mention, TMention> teiMention2ThriftMention = new HashMap<>();
-
-		Iterator<TParagraph> thrPI = text.getParagraphsIterator();
-		Iterator<Paragraph> teiPI = responseText.iterator();
-		int freeMentionId = 0;
-		while (thrPI.hasNext() && teiPI.hasNext()) {
-			TParagraph thrP = thrPI.next();
-			Paragraph teiP = teiPI.next();
-
-			freeMentionId = updateThriftParagraph(teiMention2ThriftMention,
-					freeMentionId, thrP, teiP);
-		}
-		checkIterators(thrPI, teiPI, "paragraph");
-	}
-
-	private static int updateThriftParagraph(
-			Map<Mention, TMention> teiMention2ThriftMention, int freeMentionId,
-			TParagraph thrP, Paragraph teiP) throws MultiserviceException {
-		Iterator<TSentence> thrSI = thrP.getSentencesIterator();
-		Iterator<Sentence> teiSI = teiP.iterator();
-		while (thrSI.hasNext() && teiSI.hasNext()) {
-			TSentence thrS = thrSI.next();
-			Sentence teiS = teiSI.next();
-			freeMentionId = updateThriftSentence(teiMention2ThriftMention,
-					freeMentionId, thrS, teiS);
-		}
-		checkIterators(thrSI, teiSI, "sentence");
-		return freeMentionId;
-	}
-
-	private static int updateThriftSentence(
-			Map<Mention, TMention> teiMention2ThriftMention, int id,
-			TSentence thrS, Sentence teiS) throws MultiserviceException {
-		thrS.unsetMentions();
-		thrS.setMentions(new ArrayList<TMention>());
-
-		Map<Token, TToken> teiMorph2ThriftToken = new HashMap<>();
-		Iterator<TToken> thrMI = thrS.getTokensIterator();
-		Iterator<Token> teiMI = teiS.iterator();
-		while (thrMI.hasNext() && teiMI.hasNext()) {
-			teiMorph2ThriftToken.put(teiMI.next(), thrMI.next());
-		}
-		checkIterators(thrMI, teiMI, "morph");
-
-		for (Mention m : teiS.getMentions()) {
-			List<String> childIds = new ArrayList<>();
-			List<String> headIds = new ArrayList<>();
-			for (Token ch : m.getSegments())
-				childIds.add(teiMorph2ThriftToken.get(ch).getId());
-			for (Token h : m.getHeadSegments())
-				headIds.add(teiMorph2ThriftToken.get(h).getId());
-
-			TMention tm = new TMention("m-" + (id++), headIds, childIds,
-					m.isZeroSubject());
-			teiMention2ThriftMention.put(m, tm);
-			thrS.addToMentions(tm);
-		}
-		return id;
-	}
-
-	private static void checkIterators(Iterator<? extends Object> one,
-			Iterator<? extends Object> other, String level)
-			throws MultiserviceException {
-		if (one.hasNext() || other.hasNext())
-			throw new MultiserviceException(
-					"Problem mapping interal text representation to thrift for level "
-							+ level);
-	}
+    private static final Logger LOG = LoggerFactory.getLogger(ThriftSaver.class);
+
+    private ThriftSaver() {
+    }
+
+    public static void updateThriftText(Text responseText, TText text)
+            throws MultiserviceException {
+
+        LOG.debug("Updating thrift text...");
+        Map<Mention, TMention> teiMention2ThriftMention = new HashMap<>();
+
+        Iterator<TParagraph> thrPI = text.getParagraphsIterator();
+        Iterator<Paragraph> teiPI = responseText.iterator();
+        int freeMentionId = 0;
+        while (thrPI.hasNext() && teiPI.hasNext()) {
+            TParagraph thrP = thrPI.next();
+            Paragraph teiP = teiPI.next();
+
+            freeMentionId = updateThriftParagraph(teiMention2ThriftMention,
+                    freeMentionId, thrP, teiP);
+        }
+        checkIterators(thrPI, teiPI, "paragraph");
+    }
+
+    private static int updateThriftParagraph(
+            Map<Mention, TMention> teiMention2ThriftMention, int freeMentionId,
+            TParagraph thrP, Paragraph teiP) throws MultiserviceException {
+        Iterator<TSentence> thrSI = thrP.getSentencesIterator();
+        Iterator<Sentence> teiSI = teiP.iterator();
+        while (thrSI.hasNext() && teiSI.hasNext()) {
+            TSentence thrS = thrSI.next();
+            Sentence teiS = teiSI.next();
+            freeMentionId = updateThriftSentence(teiMention2ThriftMention,
+                    freeMentionId, thrS, teiS);
+        }
+        checkIterators(thrSI, teiSI, "sentence");
+        return freeMentionId;
+    }
+
+    private static int updateThriftSentence(
+            Map<Mention, TMention> teiMention2ThriftMention, int id,
+            TSentence thrS, Sentence teiS) throws MultiserviceException {
+        thrS.unsetMentions();
+        thrS.setMentions(new ArrayList<>());
+
+        Map<Token, TToken> teiMorph2ThriftToken = new HashMap<>();
+        Iterator<TToken> thrMI = thrS.getTokensIterator();
+        Iterator<Token> teiMI = teiS.iterator();
+        while (thrMI.hasNext() && teiMI.hasNext()) {
+            teiMorph2ThriftToken.put(teiMI.next(), thrMI.next());
+        }
+        checkIterators(thrMI, teiMI, "morph");
+
+        for (Mention m : teiS.getMentions()) {
+            List<String> childIds = new ArrayList<>();
+            List<String> headIds = new ArrayList<>();
+            for (Token ch : m.getSegments())
+                childIds.add(teiMorph2ThriftToken.get(ch).getId());
+            for (Token h : m.getHeadSegments())
+                headIds.add(teiMorph2ThriftToken.get(h).getId());
+
+            TMention tm = new TMention("m-" + (id++), headIds, childIds,
+                    m.isZeroSubject());
+            teiMention2ThriftMention.put(m, tm);
+            thrS.addToMentions(tm);
+        }
+        return id;
+    }
+
+    private static void checkIterators(Iterator<?> one,
+                                       Iterator<?> other, String level)
+            throws MultiserviceException {
+        if (one.hasNext() || other.hasNext())
+            throw new MultiserviceException(
+                    "Problem mapping interal text representation to thrift for level "
+                            + level);
+    }
  
 }