From eac83d24d0d460300033f920fafbc7fa3d5ecdbb Mon Sep 17 00:00:00 2001
From: Mateusz Kopeć <m.kopec@ipipan.waw.pl>
Date: Mon, 31 Oct 2016 16:51:36 +0100
Subject: [PATCH] refactor

---
 nicolas-common/pom.xml                                                                               |  39 +++++++++++++++++++++++++++++++++++++++
 nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Utils.java                        | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-core/pom.xml                                                                                 |   6 +++++-
 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java                               |   3 ++-
 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/ThriftUtils.java                           |  73 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Utils.java                                 | 200 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/apply/ApplyModel2.java                     |   5 +++--
 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java                |   6 ++----
 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java                  |   5 +++--
 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionScorer.java                 |   3 +--
 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/PrepareTrainingData.java           |   5 +++--
 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/PrepareTrainingData.java          |   5 +++--
 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceScorer.java               |   2 +-
 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java                             | 128 --------------------------------------------------------------------------------------------------------------------------------
 nicolas-train/pom.xml                                                                                |  17 +++++++++++++++++
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/Trainer.java                        |   8 ++++++++
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/MultiserviceProxy.java | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcess.java        |  90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-zero/pom.xml                                                                                 |  30 ++++++++++++++++++++++++++++++
 nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinder.java                  |  63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java                             |  76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectCandidate.java             |  29 +++++++++++++++++++++++++++++
 nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjector.java              |   5 +++++
 nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinderTest.java              |  61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjectorTest.java          |  11 +++++++++++
 pom.xml                                                                                              |  25 +++++++++++++++++++++++++
 26 files changed, 815 insertions(+), 345 deletions(-)
 create mode 100644 nicolas-common/pom.xml
 create mode 100644 nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Utils.java
 create mode 100644 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/ThriftUtils.java
 delete mode 100644 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Utils.java
 delete mode 100644 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java
 create mode 100644 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/Trainer.java
 create mode 100644 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/MultiserviceProxy.java
 create mode 100644 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcess.java
 create mode 100644 nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinder.java
 create mode 100644 nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java
 create mode 100644 nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectCandidate.java
 create mode 100644 nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjector.java
 create mode 100644 nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinderTest.java
 create mode 100644 nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjectorTest.java

diff --git a/nicolas-common/pom.xml b/nicolas-common/pom.xml
new file mode 100644
index 0000000..6dbb4fe
--- /dev/null
+++ b/nicolas-common/pom.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <artifactId>nicolas-container</artifactId>
+        <groupId>pl.waw.ipipan.zil.summ</groupId>
+        <version>1.0-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>nicolas-common</artifactId>
+
+    <dependencies>
+        <!-- internal -->
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.summ</groupId>
+            <artifactId>pscapi</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.multiservice</groupId>
+            <artifactId>utils</artifactId>
+        </dependency>
+
+        <!-- third party -->
+        <dependency>
+            <groupId>nz.ac.waikato.cms.weka</groupId>
+            <artifactId>weka-dev</artifactId>
+        </dependency>
+
+        <!-- logging -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+
+    </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Utils.java b/nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Utils.java
new file mode 100644
index 0000000..b76153d
--- /dev/null
+++ b/nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Utils.java
@@ -0,0 +1,155 @@
+package pl.waw.ipipan.zil.summ.nicolas.common;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TToken;
+import weka.classifiers.Classifier;
+import weka.core.Attribute;
+import weka.core.Instances;
+
+import java.io.*;
+import java.util.*;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+public class Utils {
+
+    private static final Logger LOG = LoggerFactory.getLogger(Utils.class);
+
+    private static final String DATASET_NAME = "Dataset";
+
+    public static Instances createNewInstances(ArrayList<Attribute> attributesList) {
+        Instances instances = new Instances(DATASET_NAME, attributesList, 0);
+        instances.setClassIndex(0);
+        return instances;
+    }
+
+    public static Classifier loadClassifier(String path) throws IOException, ClassNotFoundException {
+        LOG.info("Loading classifier...");
+        try (ObjectInputStream ois = new ObjectInputStream(new FileInputStream(path))) {
+            Classifier classifier = (Classifier) ois.readObject();
+            LOG.info("Done. " + classifier.toString());
+            return classifier;
+        }
+    }
+
+    public static Map<String, TText> loadPreprocessedTexts(String path) {
+        Map<String, TText> id2text = Maps.newHashMap();
+        for (File processedFullTextFile : new File(path).listFiles()) {
+            TText processedFullText = loadThrifted(processedFullTextFile);
+            id2text.put(processedFullTextFile.getName().split("\\.")[0], processedFullText);
+        }
+        LOG.info(id2text.size() + " preprocessed texts found.");
+        return id2text;
+    }
+
+
+    public static TText loadThrifted(File originalFile) {
+        try (FileInputStream inputStream = new FileInputStream(originalFile)) {
+            return loadThrifted(inputStream);
+        } catch (IOException e) {
+            LOG.error("Error reading serialized file: " + e);
+            return null;
+        }
+    }
+
+    public static TText loadThrifted(InputStream stream) {
+        try (VersionIgnoringObjectInputStream ois = new VersionIgnoringObjectInputStream(stream)) {
+            return (TText) ois.readObject();
+        } catch (ClassNotFoundException | IOException e) {
+            LOG.error("Error reading serialized file: " + e);
+            return null;
+        }
+    }
+
+    public static List<String> tokenize(String text) {
+        return Arrays.asList(text.split("[^\\p{L}0-9]+"));
+    }
+
+    public static List<String> tokenizeOnWhitespace(String text) {
+        return Arrays.asList(text.split(" +"));
+    }
+
+    public static Map<TMention, String> loadMention2HeadOrth(List<TSentence> sents) {
+        Map<TMention, String> mention2orth = Maps.newHashMap();
+        for (TSentence s : sents) {
+            Map<String, String> tokId2orth = s.getTokens().stream().collect(Collectors.toMap(TToken::getId, TToken::getOrth));
+            Map<String, Boolean> tokId2nps = s.getTokens().stream().collect(Collectors.toMap(TToken::getId, TToken::isNoPrecedingSpace));
+
+            for (TMention m : s.getMentions()) {
+                StringBuffer mentionOrth = new StringBuffer();
+                for (String tokId : m.getHeadIds()) {
+                    if (!tokId2nps.get(tokId))
+                        mentionOrth.append(" ");
+                    mentionOrth.append(tokId2orth.get(tokId));
+                }
+                mention2orth.put(m, mentionOrth.toString().trim());
+            }
+        }
+        return mention2orth;
+    }
+
+    private static final Collection<String> STOPWORDS = Sets.newHashSet();
+
+    static {
+        STOPWORDS.addAll(Lists.newArrayList("i", "się", "to", "co"));
+    }
+
+    public static Map<TMention, String> loadMention2Orth(List<TSentence> sents) {
+        Map<TMention, String> mention2orth = Maps.newHashMap();
+        for (TSentence s : sents) {
+            Map<String, TToken> tokId2tok = s.getTokens().stream().collect(Collectors.toMap(TToken::getId, Function.identity()));
+
+            for (TMention m : s.getMentions()) {
+                StringBuffer mentionOrth = new StringBuffer();
+                for (String tokId : m.getChildIds()) {
+                    TToken token = tokId2tok.get(tokId);
+                    if (STOPWORDS.contains(token.getChosenInterpretation().getBase().toLowerCase())) {
+                        continue;
+                    }
+
+                    if (!token.isNoPrecedingSpace())
+                        mentionOrth.append(" ");
+                    mentionOrth.append(token.getOrth());
+                }
+                mention2orth.put(m, mentionOrth.toString().trim());
+            }
+        }
+        return mention2orth;
+    }
+
+    public static Map<TMention, String> loadMention2Base(List<TSentence> sents) {
+        Map<TMention, String> mention2base = Maps.newHashMap();
+        for (TSentence s : sents) {
+            Map<String, String> tokId2base = s.getTokens().stream().collect(Collectors.toMap(tok -> tok.getId(), tok -> tok.getChosenInterpretation().getBase()));
+
+            for (TMention m : s.getMentions()) {
+                StringBuilder mentionBase = new StringBuilder();
+                for (String tokId : m.getChildIds()) {
+                    mentionBase.append(" ");
+                    mentionBase.append(tokId2base.get(tokId));
+                }
+                mention2base.put(m, mentionBase.toString().toLowerCase().trim());
+            }
+        }
+        return mention2base;
+    }
+
+    public static String loadSentence2Orth(TSentence sentence) {
+        StringBuilder sb = new StringBuilder();
+        for (TToken token : sentence.getTokens()) {
+            if (!token.isNoPrecedingSpace())
+                sb.append(" ");
+            sb.append(token.getOrth());
+        }
+        return sb.toString().trim();
+    }
+
+
+}
\ No newline at end of file
diff --git a/nicolas-core/pom.xml b/nicolas-core/pom.xml
index c2fa0a9..0047276 100644
--- a/nicolas-core/pom.xml
+++ b/nicolas-core/pom.xml
@@ -12,10 +12,14 @@
     <artifactId>nicolas</artifactId>
 
     <dependencies>
+        <!-- project -->
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.summ</groupId>
+            <artifactId>nicolas-common</artifactId>
+        </dependency>
         <dependency>
             <groupId>pl.waw.ipipan.zil.summ</groupId>
             <artifactId>nicolas-model</artifactId>
-            <scope>runtime</scope>
         </dependency>
 
         <dependency>
diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java
index c6573ba..96f3786 100644
--- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java
+++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java
@@ -6,6 +6,7 @@ import com.google.common.collect.Sets;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
+import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor;
 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel;
 import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor;
@@ -53,7 +54,7 @@ public class Nicolas {
         List<TSentence> sents = thrifted.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList());
 
         Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList());
-        Map<TSentence, Instance> sentence2instance = Utils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions);
+        Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions);
 
         Map<TSentence, Double> sentence2score = Maps.newHashMap();
         for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) {
diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/ThriftUtils.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/ThriftUtils.java
new file mode 100644
index 0000000..c0de645
--- /dev/null
+++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/ThriftUtils.java
@@ -0,0 +1,73 @@
+package pl.waw.ipipan.zil.summ.nicolas;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Maps;
+import com.google.common.io.Files;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
+import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor;
+import pl.waw.ipipan.zil.summ.nicolas.mention.MentionScorer;
+import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor;
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instance;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import static java.util.stream.Collectors.toList;
+
+public class ThriftUtils {
+
+    private static final Logger LOG = LoggerFactory.getLogger(ThriftUtils.class);
+
+    public static Set<TMention> loadGoldGoodMentions(String id, TText text, boolean dev) throws IOException {
+        String optimalSummary = Files.toString(new File("src/main/resources/optimal_summaries/" + (dev ? "dev" : "test") + "/" + id + "_theoretic_ub_rouge_1.txt"), Charsets.UTF_8);
+
+        MentionScorer scorer = new MentionScorer();
+        Map<TMention, Double> mention2score = scorer.calculateMentionScores(optimalSummary, text);
+
+        mention2score.keySet().removeIf(tMention -> mention2score.get(tMention) != 1.0);
+        return mention2score.keySet();
+    }
+
+    public static Map<TMention, Instance> extractInstancesFromMentions(TText preprocessedText, MentionFeatureExtractor featureExtractor) {
+        List<TSentence> sentences = preprocessedText.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList());
+        Map<TMention, Map<Attribute, Double>> mention2features = featureExtractor.calculateFeatures(preprocessedText);
+
+        LOG.info("Extracting " + featureExtractor.getAttributesList().size() + " features of each mention.");
+        Map<TMention, Instance> mention2instance = Maps.newHashMap();
+        for (TMention tMention : sentences.stream().flatMap(s -> s.getMentions().stream()).collect(toList())) {
+            Instance instance = new DenseInstance(featureExtractor.getAttributesList().size());
+            Map<Attribute, Double> mentionFeatures = mention2features.get(tMention);
+            for (Attribute attribute : featureExtractor.getAttributesList()) {
+                instance.setValue(attribute, mentionFeatures.get(attribute));
+            }
+            mention2instance.put(tMention, instance);
+        }
+        return mention2instance;
+    }
+
+    public static Map<TSentence, Instance> extractInstancesFromSentences(TText preprocessedText, SentenceFeatureExtractor featureExtractor, Set<TMention> goodMentions) {
+        List<TSentence> sentences = preprocessedText.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList());
+        Map<TSentence, Map<Attribute, Double>> sentence2features = featureExtractor.calculateFeatures(preprocessedText, goodMentions);
+
+        LOG.info("Extracting " + featureExtractor.getAttributesList().size() + " features of each sentence.");
+        Map<TSentence, Instance> sentence2instance = Maps.newHashMap();
+        for (TSentence sentence : sentences) {
+            Instance instance = new DenseInstance(featureExtractor.getAttributesList().size());
+            Map<Attribute, Double> sentenceFeatures = sentence2features.get(sentence);
+            for (Attribute attribute : featureExtractor.getAttributesList()) {
+                instance.setValue(attribute, sentenceFeatures.get(attribute));
+            }
+            sentence2instance.put(sentence, instance);
+        }
+        return sentence2instance;
+    }
+}
diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Utils.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Utils.java
deleted file mode 100644
index 6b0ff0a..0000000
--- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Utils.java
+++ /dev/null
@@ -1,200 +0,0 @@
-package pl.waw.ipipan.zil.summ.nicolas;
-
-import com.google.common.base.Charsets;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-import com.google.common.io.Files;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TToken;
-import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor;
-import pl.waw.ipipan.zil.summ.nicolas.mention.MentionScorer;
-import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor;
-import weka.classifiers.Classifier;
-import weka.core.Attribute;
-import weka.core.DenseInstance;
-import weka.core.Instance;
-import weka.core.Instances;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.util.*;
-import java.util.function.Function;
-import java.util.stream.Collectors;
-
-import static java.util.stream.Collectors.toList;
-
-public class Utils {
-
-    private static final Logger LOG = LoggerFactory.getLogger(Utils.class);
-
-    private static final String DATASET_NAME = "Dataset";
-
-    public static Map<TMention, Instance> extractInstancesFromMentions(TText preprocessedText, MentionFeatureExtractor featureExtractor) {
-        List<TSentence> sentences = preprocessedText.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList());
-        Map<TMention, Map<Attribute, Double>> mention2features = featureExtractor.calculateFeatures(preprocessedText);
-
-        LOG.info("Extracting " + featureExtractor.getAttributesList().size() + " features of each mention.");
-        Map<TMention, Instance> mention2instance = Maps.newHashMap();
-        for (TMention tMention : sentences.stream().flatMap(s -> s.getMentions().stream()).collect(toList())) {
-            Instance instance = new DenseInstance(featureExtractor.getAttributesList().size());
-            Map<Attribute, Double> mentionFeatures = mention2features.get(tMention);
-            for (Attribute attribute : featureExtractor.getAttributesList()) {
-                instance.setValue(attribute, mentionFeatures.get(attribute));
-            }
-            mention2instance.put(tMention, instance);
-        }
-        return mention2instance;
-    }
-
-    public static Map<TSentence, Instance> extractInstancesFromSentences(TText preprocessedText, SentenceFeatureExtractor featureExtractor, Set<TMention> goodMentions) {
-        List<TSentence> sentences = preprocessedText.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList());
-        Map<TSentence, Map<Attribute, Double>> sentence2features = featureExtractor.calculateFeatures(preprocessedText, goodMentions);
-
-        LOG.info("Extracting " + featureExtractor.getAttributesList().size() + " features of each sentence.");
-        Map<TSentence, Instance> sentence2instance = Maps.newHashMap();
-        for (TSentence sentence : sentences) {
-            Instance instance = new DenseInstance(featureExtractor.getAttributesList().size());
-            Map<Attribute, Double> sentenceFeatures = sentence2features.get(sentence);
-            for (Attribute attribute : featureExtractor.getAttributesList()) {
-                instance.setValue(attribute, sentenceFeatures.get(attribute));
-            }
-            sentence2instance.put(sentence, instance);
-        }
-        return sentence2instance;
-    }
-
-    public static Instances createNewInstances(ArrayList<Attribute> attributesList) {
-        Instances instances = new Instances(DATASET_NAME, attributesList, 0);
-        instances.setClassIndex(0);
-        return instances;
-    }
-
-    public static Classifier loadClassifier(String path) throws IOException, ClassNotFoundException {
-        LOG.info("Loading classifier...");
-        try (ObjectInputStream ois = new ObjectInputStream(new FileInputStream(path))) {
-            Classifier classifier = (Classifier) ois.readObject();
-            LOG.info("Done. " + classifier.toString());
-            return classifier;
-        }
-    }
-
-    public static Map<String, TText> loadPreprocessedTexts(String path) {
-        Map<String, TText> id2text = Maps.newHashMap();
-        for (File processedFullTextFile : new File(path).listFiles()) {
-            TText processedFullText = loadThrifted(processedFullTextFile);
-            id2text.put(processedFullTextFile.getName().split("\\.")[0], processedFullText);
-        }
-        LOG.info(id2text.size() + " preprocessed texts found.");
-        return id2text;
-    }
-
-
-    public static TText loadThrifted(File originalFile) {
-        try (ObjectInputStream ois = new ObjectInputStream(new FileInputStream(originalFile))) {
-            return (TText) ois.readObject();
-        } catch (ClassNotFoundException | IOException e) {
-            LOG.error("Error reading serialized file: " + e);
-            return null;
-        }
-    }
-
-    public static List<String> tokenize(String text) {
-        return Arrays.asList(text.split("[^\\p{L}0-9]+"));
-    }
-
-    public static List<String> tokenizeOnWhitespace(String text) {
-        return Arrays.asList(text.split(" +"));
-    }
-
-    public static Map<TMention, String> loadMention2HeadOrth(List<TSentence> sents) {
-        Map<TMention, String> mention2orth = Maps.newHashMap();
-        for (TSentence s : sents) {
-            Map<String, String> tokId2orth = s.getTokens().stream().collect(Collectors.toMap(TToken::getId, TToken::getOrth));
-            Map<String, Boolean> tokId2nps = s.getTokens().stream().collect(Collectors.toMap(TToken::getId, TToken::isNoPrecedingSpace));
-
-            for (TMention m : s.getMentions()) {
-                StringBuffer mentionOrth = new StringBuffer();
-                for (String tokId : m.getHeadIds()) {
-                    if (!tokId2nps.get(tokId))
-                        mentionOrth.append(" ");
-                    mentionOrth.append(tokId2orth.get(tokId));
-                }
-                mention2orth.put(m, mentionOrth.toString().trim());
-            }
-        }
-        return mention2orth;
-    }
-
-    private static final Collection<String> STOPWORDS = Sets.newHashSet();
-
-    static {
-        STOPWORDS.addAll(Lists.newArrayList("i", "się", "to", "co"));
-    }
-
-    public static Map<TMention, String> loadMention2Orth(List<TSentence> sents) {
-        Map<TMention, String> mention2orth = Maps.newHashMap();
-        for (TSentence s : sents) {
-            Map<String, TToken> tokId2tok = s.getTokens().stream().collect(Collectors.toMap(TToken::getId, Function.identity()));
-
-            for (TMention m : s.getMentions()) {
-                StringBuffer mentionOrth = new StringBuffer();
-                for (String tokId : m.getChildIds()) {
-                    TToken token = tokId2tok.get(tokId);
-                    if (STOPWORDS.contains(token.getChosenInterpretation().getBase().toLowerCase())) {
-                        continue;
-                    }
-
-                    if (!token.isNoPrecedingSpace())
-                        mentionOrth.append(" ");
-                    mentionOrth.append(token.getOrth());
-                }
-                mention2orth.put(m, mentionOrth.toString().trim());
-            }
-        }
-        return mention2orth;
-    }
-
-    public static Map<TMention, String> loadMention2Base(List<TSentence> sents) {
-        Map<TMention, String> mention2base = Maps.newHashMap();
-        for (TSentence s : sents) {
-            Map<String, String> tokId2base = s.getTokens().stream().collect(Collectors.toMap(tok -> tok.getId(), tok -> tok.getChosenInterpretation().getBase()));
-
-            for (TMention m : s.getMentions()) {
-                StringBuilder mentionBase = new StringBuilder();
-                for (String tokId : m.getChildIds()) {
-                    mentionBase.append(" ");
-                    mentionBase.append(tokId2base.get(tokId));
-                }
-                mention2base.put(m, mentionBase.toString().toLowerCase().trim());
-            }
-        }
-        return mention2base;
-    }
-
-    public static String loadSentence2Orth(TSentence sentence) {
-        StringBuilder sb = new StringBuilder();
-        for (TToken token : sentence.getTokens()) {
-            if (!token.isNoPrecedingSpace())
-                sb.append(" ");
-            sb.append(token.getOrth());
-        }
-        return sb.toString().trim();
-    }
-
-    public static Set<TMention> loadGoldGoodMentions(String id, TText text, boolean dev) throws IOException {
-        String optimalSummary = Files.toString(new File("src/main/resources/optimal_summaries/" + (dev ? "dev" : "test") + "/" + id + "_theoretic_ub_rouge_1.txt"), Charsets.UTF_8);
-
-        MentionScorer scorer = new MentionScorer();
-        Map<TMention, Double> mention2score = scorer.calculateMentionScores(optimalSummary, text);
-
-        mention2score.keySet().removeIf(tMention -> mention2score.get(tMention) != 1.0);
-        return mention2score.keySet();
-    }
-}
\ No newline at end of file
diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/apply/ApplyModel2.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/apply/ApplyModel2.java
index f687d4a..2de5225 100644
--- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/apply/ApplyModel2.java
+++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/apply/ApplyModel2.java
@@ -9,7 +9,8 @@ import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
 import pl.waw.ipipan.zil.summ.nicolas.Constants;
-import pl.waw.ipipan.zil.summ.nicolas.Utils;
+import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils;
+import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor;
 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel;
 import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor;
@@ -85,7 +86,7 @@ public class ApplyModel2 {
         List<TSentence> sents = thrifted.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList());
 
         Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList());
-        Map<TSentence, Instance> sentence2instance = Utils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions);
+        Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions);
 
         Map<TSentence, Double> sentence2score = Maps.newHashMap();
         for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) {
diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java
index 4dc2446..d774b0a 100644
--- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java
+++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java
@@ -3,7 +3,7 @@ package pl.waw.ipipan.zil.summ.nicolas.features;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import pl.waw.ipipan.zil.multiservice.thrift.types.*;
-import pl.waw.ipipan.zil.summ.nicolas.Utils;
+import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
 
 import java.util.List;
 import java.util.Map;
@@ -14,9 +14,7 @@ import java.util.stream.Collectors;
 import static java.util.stream.Collectors.toList;
 import static java.util.stream.Collectors.toMap;
 
-/**
- * Created by me2 on 04.04.16.
- */
+
 public class FeatureHelper {
 
     private final List<TMention> mentions;
diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java
index 7e85be6..1ba0ef0 100644
--- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java
+++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java
@@ -5,7 +5,8 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
-import pl.waw.ipipan.zil.summ.nicolas.Utils;
+import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils;
+import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
 import weka.classifiers.Classifier;
 import weka.core.Instance;
 import weka.core.Instances;
@@ -21,7 +22,7 @@ public class MentionModel {
         Set<TMention> goodMentions = Sets.newHashSet();
 
         Instances instances = Utils.createNewInstances(featureExtractor.getAttributesList());
-        Map<TMention, Instance> mention2instance = Utils.extractInstancesFromMentions(text, featureExtractor);
+        Map<TMention, Instance> mention2instance = ThriftUtils.extractInstancesFromMentions(text, featureExtractor);
         for (Map.Entry<TMention, Instance> entry : mention2instance.entrySet()) {
             Instance instance = entry.getValue();
             instance.setDataset(instances);
diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionScorer.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionScorer.java
index a16edec..5fa8e7c 100644
--- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionScorer.java
+++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionScorer.java
@@ -6,9 +6,8 @@ import com.google.common.collect.Multiset;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
-import pl.waw.ipipan.zil.summ.nicolas.Utils;
+import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
 
-import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/PrepareTrainingData.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/PrepareTrainingData.java
index 6aa49f6..3810574 100644
--- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/PrepareTrainingData.java
+++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/PrepareTrainingData.java
@@ -8,7 +8,8 @@ import org.slf4j.LoggerFactory;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
 import pl.waw.ipipan.zil.summ.nicolas.Constants;
-import pl.waw.ipipan.zil.summ.nicolas.Utils;
+import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils;
+import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
 import weka.core.Instance;
 import weka.core.Instances;
 import weka.core.converters.ArffSaver;
@@ -45,7 +46,7 @@ public class PrepareTrainingData {
                 continue;
             Map<TMention, Double> mention2score = mentionScorer.calculateMentionScores(optimalSummary, preprocessedText);
 
-            Map<TMention, Instance> mention2instance = Utils.extractInstancesFromMentions(preprocessedText, featureExtractor);
+            Map<TMention, Instance> mention2instance = ThriftUtils.extractInstancesFromMentions(preprocessedText, featureExtractor);
             for (Map.Entry<TMention, Instance> entry : mention2instance.entrySet()) {
                 TMention mention = entry.getKey();
                 Instance instance = entry.getValue();
diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/PrepareTrainingData.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/PrepareTrainingData.java
index fb16339..f9ab453 100644
--- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/PrepareTrainingData.java
+++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/PrepareTrainingData.java
@@ -9,7 +9,8 @@ import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
 import pl.waw.ipipan.zil.summ.nicolas.Constants;
-import pl.waw.ipipan.zil.summ.nicolas.Utils;
+import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils;
+import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor;
 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel;
 import weka.classifiers.Classifier;
@@ -58,7 +59,7 @@ public class PrepareTrainingData {
 //            Set<TMention> goodMentions
 //                    = Utils.loadGoldGoodMentions(textId, preprocessedText, true);
 
-            Map<TSentence, Instance> sentence2instance = Utils.extractInstancesFromSentences(preprocessedText, featureExtractor, goodMentions);
+            Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(preprocessedText, featureExtractor, goodMentions);
             for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) {
                 TSentence sentence = entry.getKey();
                 Instance instance = entry.getValue();
diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceScorer.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceScorer.java
index f96ea34..0ebb515 100644
--- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceScorer.java
+++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceScorer.java
@@ -6,7 +6,7 @@ import com.google.common.collect.Multiset;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TParagraph;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
 import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
-import pl.waw.ipipan.zil.summ.nicolas.Utils;
+import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
 
 import java.util.List;
 import java.util.Map;
diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java
deleted file mode 100644
index cb3c5ef..0000000
--- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java
+++ /dev/null
@@ -1,128 +0,0 @@
-package pl.waw.ipipan.zil.summ.nicolas.zero;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import com.google.common.collect.Sets;
-import org.apache.commons.csv.CSVFormat;
-import org.apache.commons.csv.CSVPrinter;
-import org.apache.commons.csv.QuoteMode;
-import org.apache.commons.io.IOUtils;
-import pl.waw.ipipan.zil.multiservice.thrift.types.*;
-import pl.waw.ipipan.zil.summ.nicolas.Utils;
-
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-/**
- * Created by me2 on 26.07.16.
- */
-public class Zero {
-
-    private static final String IDS_PATH = "summaries_dev";
-    private static final String THRIFTED_PATH = "src/main/resources/preprocessed_full_texts/dev/";
-
-    public static void main(String[] args) throws IOException {
-
-        Map<String, TText> id2preprocessedText = Utils.loadPreprocessedTexts(THRIFTED_PATH);
-        Map<String, List<String>> id2sentIds = loadSentenceIds(IDS_PATH);
-
-        int mentionCount = 0;
-        int mentionInNom = 0;
-        int mentionInNomSequential = 0;
-
-        List<List<Object>> rows = Lists.newArrayList();
-        for (Map.Entry<String, TText> entry : id2preprocessedText.entrySet()) {
-            String textId = entry.getKey();
-//            System.out.println(id);
-
-            TText text = entry.getValue();
-            List<String> sentenceIds = id2sentIds.get(textId);
-//            System.out.println(sentenceIds);
-
-            Map<String, Set<String>> mentionId2Cluster = Maps.newHashMap();
-            for (TCoreference coreference : text.getCoreferences()) {
-                for (String mentionId : coreference.getMentionIds()) {
-                    mentionId2Cluster.put(mentionId, Sets.newHashSet(coreference.getMentionIds()));
-                }
-            }
-
-            Set<String> prevSentenceNominativeMentionIds = Sets.newHashSet();
-            TSentence prevSentence = null;
-            for (TParagraph p : text.getParagraphs()) {
-                Map<TMention, String> tMentionStringMap = Utils.loadMention2Orth(p.getSentences());
-
-                for (TSentence sentence : p.getSentences()) {
-                    if (!sentenceIds.contains(sentence.getId()))
-                        continue;
-                    Set<String> currentSentenceNominativeMentionIds = Sets.newHashSet();
-
-                    Map<String, TToken> tokenId2Token = Maps.newHashMap();
-                    for (TToken t : sentence.getTokens())
-                        tokenId2Token.put(t.getId(), t);
-
-                    for (TMention mention : sentence.getMentions()) {
-                        mentionCount++;
-
-                        for (String tokenId : mention.getHeadIds()) {
-                            TInterpretation interp = tokenId2Token.get(tokenId).getChosenInterpretation();
-                            if (isInNominative(interp)) {
-                                mentionInNom++;
-
-                                currentSentenceNominativeMentionIds.add(mention.getId());
-                                if (mentionId2Cluster.get(mention.getId()).stream().anyMatch(prevSentenceNominativeMentionIds::contains)) {
-                                    mentionInNomSequential++;
-                                    System.out.println(tMentionStringMap.get(mention)
-                                            + "\n\t" + Utils.loadSentence2Orth(prevSentence)
-                                            + "\n\t" + Utils.loadSentence2Orth(sentence));
-
-                                    List<Object> row = Lists.newArrayList();
-                                    row.add("C");
-                                    row.add(textId);
-                                    row.add(tMentionStringMap.get(mention));
-                                    row.add(Utils.loadSentence2Orth(prevSentence));
-                                    row.add(Utils.loadSentence2Orth(sentence));
-                                    rows.add(row);
-                                }
-                                break;
-                            }
-                        }
-                    }
-
-                    prevSentence = sentence;
-                    prevSentenceNominativeMentionIds = currentSentenceNominativeMentionIds;
-                }
-            }
-        }
-
-        System.out.println(mentionCount + " mentions");
-        System.out.println(mentionInNom + " mention in nom");
-        System.out.println(mentionInNomSequential + " mention in nom with previous in nom");
-
-        try (CSVPrinter csvPrinter = new CSVPrinter(new FileWriter("zeros.tsv"), CSVFormat.DEFAULT.withDelimiter('\t').withEscape('\\').withQuoteMode(QuoteMode.NONE).withQuote('"'))) {
-            for (List<Object> row : rows) {
-                csvPrinter.printRecord(row);
-            }
-        }
-
-    }
-
-    private static boolean isInNominative(TInterpretation interp) {
-        return interp.getCtag().equals("subst") && Arrays.stream(interp.getMsd().split(":")).anyMatch(t -> t.equals("nom"));
-    }
-
-    private static Map<String, List<String>> loadSentenceIds(String idsPath) throws IOException {
-        Map<String, List<String>> result = Maps.newHashMap();
-        for (File f : new File(idsPath).listFiles()) {
-            String id = f.getName().split("_")[0];
-            List<String> sentenceIds = IOUtils.readLines(new FileReader(f));
-            result.put(id, sentenceIds);
-        }
-        return result;
-    }
-}
diff --git a/nicolas-train/pom.xml b/nicolas-train/pom.xml
index 0773393..62ae3a7 100644
--- a/nicolas-train/pom.xml
+++ b/nicolas-train/pom.xml
@@ -11,4 +11,21 @@
 
     <artifactId>nicolas-train</artifactId>
 
+    <dependencies>
+        <!-- internal -->
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.summ</groupId>
+            <artifactId>pscapi</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.multiservice</groupId>
+            <artifactId>utils</artifactId>
+        </dependency>
+
+        <!-- logging -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+    </dependencies>
 </project>
\ No newline at end of file
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/Trainer.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/Trainer.java
new file mode 100644
index 0000000..c4b4d7c
--- /dev/null
+++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/Trainer.java
@@ -0,0 +1,8 @@
+package pl.waw.ipipan.zil.summ.nicolas.train;
+
+public class Trainer {
+
+    public static void main(String[] args) {
+
+    }
+}
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/MultiserviceProxy.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/MultiserviceProxy.java
new file mode 100644
index 0000000..2c4455a
--- /dev/null
+++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/MultiserviceProxy.java
@@ -0,0 +1,110 @@
+package pl.waw.ipipan.zil.summ.nicolas.train.multiservice;
+
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TBinaryProtocol;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.transport.TSocket;
+import org.apache.thrift.transport.TTransport;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.multiservice.thrift.Multiservice;
+import pl.waw.ipipan.zil.multiservice.thrift.ObjectRequest;
+import pl.waw.ipipan.zil.multiservice.thrift.RequestPart;
+import pl.waw.ipipan.zil.multiservice.thrift.RequestStatus;
+import pl.waw.ipipan.zil.multiservice.thrift.types.MultiserviceException;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TParagraph;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class MultiserviceProxy {
+
+    private static final Logger LOG = LoggerFactory.getLogger(MultiserviceProxy.class);
+
+    private int port;
+    private String host;
+
+    public MultiserviceProxy(String host, int port) {
+        this.host = host;
+        this.port = port;
+        LOG.info("Multiservice at " + host + ":" + port);
+    }
+
+    public TText process(String text, List<String> services) throws Exception {
+        List<Map<String, String>> options = new ArrayList<>();
+        for (int i = 0; i < services.size(); i++)
+            options.add(new HashMap<>());
+        return process(text, "", services, options);
+    }
+
+    public TText process(String text, String title, List<String> services, List<Map<String, String>> options)
+            throws Exception {
+        TTransport transport = new TSocket(host, port);
+        ObjectRequest objectRequest = createRequest(text, title, services, options);
+
+        try {
+            transport.open();
+
+            TProtocol protocol = new TBinaryProtocol(transport);
+            Multiservice.Client client = new Multiservice.Client(protocol);
+
+            LOG.debug("Sending Multservice request...");
+            TText responseText = request(objectRequest, client);
+            LOG.debug("...done");
+
+            return responseText;
+
+        } catch (TException e) {
+            LOG.error("Error processing request:" + e);
+            throw new Exception(e);
+
+        } finally {
+            transport.close();
+        }
+    }
+
+    private TText request(ObjectRequest objectRequest, Multiservice.Client client) throws TException {
+
+        String requestToken = client.putObjectRequest(objectRequest);
+        while (true) {
+            RequestStatus status = client.getRequestStatus(requestToken);
+            if (RequestStatus.DONE.equals(status)) {
+                TText result = client.getResultObject(requestToken);
+                return result;
+            } else if (RequestStatus.FAILED.equals(status) || RequestStatus.DUMPED.equals(status)) {
+                try {
+                    MultiserviceException exception = client.getException(requestToken);
+                    throw exception;
+                } catch (TException e) {
+                    throw e;
+                }
+            }
+        }
+    }
+
+    private ObjectRequest createRequest(String textBody, String textTitle, List<String> services,
+                                        List<Map<String, String>> options) {
+        TText text = new TText();
+
+        TParagraph par = new TParagraph();
+        par.setText(textTitle);
+        text.addToParagraphs(par);
+
+        for (String p : textBody.split("\n\n")) {
+            par = new TParagraph();
+            par.setText(p);
+            text.addToParagraphs(par);
+        }
+
+        List<RequestPart> processingChain = new ArrayList<>();
+        int i = 0;
+        for (String serviceName : services)
+            processingChain.add(new RequestPart(serviceName, options.get(i++)));
+
+        return new ObjectRequest(text, processingChain);
+    }
+
+}
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcess.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcess.java
new file mode 100644
index 0000000..bef8a7c
--- /dev/null
+++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcess.java
@@ -0,0 +1,90 @@
+package pl.waw.ipipan.zil.summ.nicolas.train.multiservice;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
+import pl.waw.ipipan.zil.summ.pscapi.io.PSC_IO;
+import pl.waw.ipipan.zil.summ.pscapi.xml.Text;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectOutputStream;
+import java.util.Arrays;
+import java.util.List;
+
+public class NLPProcess {
+
+    private static final Logger LOG = LoggerFactory.getLogger(NLPProcess.class);
+
+    private static final List<String> SERVICES = Arrays.asList("Concraft", "Spejd", "Nerf", "MentionDetector",
+            "Bartek");
+    private static final int PORT = 20000;
+    private static final String HOST = "multiservice.nlp.ipipan.waw.pl";
+
+    private static final MultiserviceProxy MSPROXY = new MultiserviceProxy(HOST, PORT);
+
+    private NLPProcess() {
+    }
+
+    public static void main(String[] args) {
+        if (args.length != 2) {
+            LOG.error("Wrong usage! Try " + NLPProcess.class.getSimpleName() + " dirWithCorpusFiles targetDir");
+            return;
+        }
+        File corpusDir = new File(args[0]);
+        if (!corpusDir.isDirectory()) {
+            LOG.error("Corpus directory does not exist: " + corpusDir);
+            return;
+        }
+        File targetDir = new File(args[1]);
+        if (!targetDir.isDirectory()) {
+            LOG.error("Target directory does not exist: " + targetDir);
+            return;
+        }
+
+        int ok = 0;
+        int err = 0;
+        File[] files = corpusDir.listFiles(f -> f.getName().endsWith(".xml"));
+        Arrays.sort(files);
+        for (File file : files) {
+            try {
+                Text text = PSC_IO.readText(file);
+                File targetFile = new File(targetDir, file.getName().replaceFirst(".xml$", ".bin"));
+                annotateNLP(text, targetFile);
+                ok++;
+            } catch (Exception e) {
+                err++;
+                LOG.error("Problem with text in " + file + ", " + e);
+            }
+        }
+        LOG.info(ok + " texts processed successfully.");
+        LOG.info(err + " texts with errors.");
+    }
+
+    private static void annotateNLP(Text text, File targetFile) throws Exception {
+        annotate(text.getBody(), targetFile);
+    }
+
+    private static void annotate(String body, File targetFile) throws Exception {
+        if (targetFile.exists()) {
+            LOG.debug("Skipping existing file..");
+            return;
+        }
+        LOG.info("Processing text into " + targetFile.getPath());
+        TText ttext = MSPROXY.process(body, SERVICES);
+        serialize(ttext, targetFile);
+    }
+
+    public static void serialize(TText ttext, File targetFile) throws IOException {
+        try (FileOutputStream fout = new FileOutputStream(targetFile);
+             ObjectOutputStream oos = new ObjectOutputStream(fout)) {
+            oos.writeObject(ttext);
+        }
+    }
+
+    public static TText annotate(String body) throws Exception {
+        return MSPROXY.process(body, SERVICES);
+    }
+
+}
diff --git a/nicolas-zero/pom.xml b/nicolas-zero/pom.xml
index 26bf7dd..6f4d656 100644
--- a/nicolas-zero/pom.xml
+++ b/nicolas-zero/pom.xml
@@ -11,4 +11,34 @@
 
     <artifactId>nicolas-zero</artifactId>
 
+    <dependencies>
+        <!-- project -->
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.summ</groupId>
+            <artifactId>nicolas-common</artifactId>
+        </dependency>
+
+        <!-- third party -->
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-csv</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+        </dependency>
+
+        <!-- logging -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+
+        <!-- test -->
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+        </dependency>
+    </dependencies>
+
 </project>
\ No newline at end of file
diff --git a/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinder.java b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinder.java
new file mode 100644
index 0000000..cceb8ac
--- /dev/null
+++ b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinder.java
@@ -0,0 +1,63 @@
+package pl.waw.ipipan.zil.summ.nicolas.zero;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import pl.waw.ipipan.zil.multiservice.thrift.types.*;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class CandidateFinder {
+
+    public List<ZeroSubjectCandidate> findZeroSubjectCandidates(TText text, Set<String> summarySentenceIds) {
+        List<ZeroSubjectCandidate> candidates = Lists.newArrayList();
+
+        Map<String, Set<String>> mentionId2Cluster = Maps.newHashMap();
+        for (TCoreference coreference : text.getCoreferences()) {
+            for (String mentionId : coreference.getMentionIds()) {
+                mentionId2Cluster.put(mentionId, Sets.newHashSet(coreference.getMentionIds()));
+            }
+        }
+
+        Set<String> prevSentenceNominativeMentionIds = Sets.newHashSet();
+        TSentence prevSentence = null;
+        for (TParagraph p : text.getParagraphs()) {
+            for (TSentence sentence : p.getSentences()) {
+                if (!summarySentenceIds.contains(sentence.getId()))
+                    continue;
+                Set<String> currentSentenceNominativeMentionIds = Sets.newHashSet();
+
+                Map<String, TToken> tokenId2Token = Maps.newHashMap();
+                for (TToken t : sentence.getTokens())
+                    tokenId2Token.put(t.getId(), t);
+
+                for (TMention mention : sentence.getMentions()) {
+
+                    for (String tokenId : mention.getHeadIds()) {
+                        TInterpretation interp = tokenId2Token.get(tokenId).getChosenInterpretation();
+                        if (isInNominative(interp)) {
+
+                            currentSentenceNominativeMentionIds.add(mention.getId());
+                            if (mentionId2Cluster.get(mention.getId()).stream().anyMatch(prevSentenceNominativeMentionIds::contains)) {
+                                ZeroSubjectCandidate candidate = new ZeroSubjectCandidate(prevSentence, sentence, mention);
+                                candidates.add(candidate);
+                            }
+                            break;
+                        }
+                    }
+                }
+
+                prevSentence = sentence;
+                prevSentenceNominativeMentionIds = currentSentenceNominativeMentionIds;
+            }
+        }
+        return candidates;
+    }
+
+    private static boolean isInNominative(TInterpretation interp) {
+        return interp.getCtag().equals("subst") && Arrays.stream(interp.getMsd().split(":")).anyMatch(t -> t.equals("nom"));
+    }
+}
diff --git a/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java
new file mode 100644
index 0000000..1414f45
--- /dev/null
+++ b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java
@@ -0,0 +1,76 @@
+package pl.waw.ipipan.zil.summ.nicolas.zero;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVPrinter;
+import org.apache.commons.csv.QuoteMode;
+import org.apache.commons.io.IOUtils;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
+import pl.waw.ipipan.zil.summ.nicolas.common.ThriftTextHelper;
+import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class Zero {
+
+    private static final String IDS_PATH = "corpora/summaries_dev";
+    private static final String THRIFTED_PATH = "corpora/preprocessed_full_texts/dev/";
+
+    private Zero() {
+    }
+
+    public static void main(String[] args) throws IOException {
+
+        CandidateFinder candidateFinder = new CandidateFinder();
+
+        Map<String, TText> id2preprocessedText = Utils.loadPreprocessedTexts(THRIFTED_PATH);
+        Map<String, Set<String>> id2sentIds = loadSentenceIds(IDS_PATH);
+
+        List<List<Object>> rows = Lists.newArrayList();
+        for (Map.Entry<String, TText> entry : id2preprocessedText.entrySet()) {
+            String textId = entry.getKey();
+
+            TText text = entry.getValue();
+            ThriftTextHelper thriftTextHelper = new ThriftTextHelper(text);
+
+            Set<String> sentenceIds = id2sentIds.get(textId);
+
+            List<ZeroSubjectCandidate> zeroSubjectCandidates = candidateFinder.findZeroSubjectCandidates(text, sentenceIds);
+
+            for (ZeroSubjectCandidate candidate : zeroSubjectCandidates) {
+                List<Object> row = Lists.newArrayList();
+                row.add("C");
+                row.add(textId);
+                row.add(thriftTextHelper.getMentionText(candidate.getZeroCandidateMention()));
+                row.add(thriftTextHelper.getSentenceText(candidate.getPreviousSentence()));
+                row.add(thriftTextHelper.getSentenceText(candidate.getSentence()));
+                rows.add(row);
+            }
+        }
+
+        try (CSVPrinter csvPrinter = new CSVPrinter(new FileWriter("zeros.tsv"), CSVFormat.DEFAULT.withDelimiter('\t').withEscape('\\').withQuoteMode(QuoteMode.NONE).withQuote('"'))) {
+            for (List<Object> row : rows) {
+                csvPrinter.printRecord(row);
+            }
+        }
+
+    }
+
+    private static Map<String, Set<String>> loadSentenceIds(String idsPath) throws IOException {
+        Map<String, Set<String>> result = Maps.newHashMap();
+        for (File f : new File(idsPath).listFiles()) {
+            String id = f.getName().split("_")[0];
+            List<String> sentenceIds = IOUtils.readLines(new FileReader(f));
+            result.put(id, Sets.newHashSet(sentenceIds));
+        }
+        return result;
+    }
+}
diff --git a/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectCandidate.java b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectCandidate.java
new file mode 100644
index 0000000..6d0a76f
--- /dev/null
+++ b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectCandidate.java
@@ -0,0 +1,29 @@
+package pl.waw.ipipan.zil.summ.nicolas.zero;
+
+import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
+
+public class ZeroSubjectCandidate {
+
+    private final TSentence previousSentence;
+    private final TSentence sentence;
+    private final TMention zeroCandidateMention;
+
+    public ZeroSubjectCandidate(TSentence previousSentence, TSentence sentence, TMention zeroCandidateMention) {
+        this.previousSentence = previousSentence;
+        this.sentence = sentence;
+        this.zeroCandidateMention = zeroCandidateMention;
+    }
+
+    public TSentence getPreviousSentence() {
+        return previousSentence;
+    }
+
+    public TSentence getSentence() {
+        return sentence;
+    }
+
+    public TMention getZeroCandidateMention() {
+        return zeroCandidateMention;
+    }
+}
diff --git a/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjector.java b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjector.java
new file mode 100644
index 0000000..ca4f915
--- /dev/null
+++ b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjector.java
@@ -0,0 +1,5 @@
+package pl.waw.ipipan.zil.summ.nicolas.zero;
+
+
+public class ZeroSubjectInjector {
+}
diff --git a/nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinderTest.java b/nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinderTest.java
new file mode 100644
index 0000000..7948faa
--- /dev/null
+++ b/nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinderTest.java
@@ -0,0 +1,61 @@
+package pl.waw.ipipan.zil.summ.nicolas.zero;
+
+import com.google.common.collect.Sets;
+import org.apache.commons.io.IOUtils;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
+import pl.waw.ipipan.zil.summ.nicolas.common.ThriftTextHelper;
+import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.List;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+
+public class CandidateFinderTest {
+
+    private static final String SAMPLE_TEXT_PATH = "/pl/waw/ipipan/zil/summ/nicolas/zero/sample_serialized_text.bin";
+    private static final String SAMPLE_TEXT_SUMMARY_IDS_PATH = "/pl/waw/ipipan/zil/summ/nicolas/zero/sample_summary_sentence_ids.txt";
+
+    private static CandidateFinder candidateFinder;
+
+    @BeforeClass
+    public static void init() {
+        candidateFinder = new CandidateFinder();
+    }
+
+    @Test
+    public void shouldFindZeroSubjectCandidateInSampleText() throws Exception {
+        ThriftTextHelper sampleTextHelper = loadSampleTextHelper();
+        Set<String> summarySentenceIds = loadSampleTextSummarySentenceIds();
+        List<ZeroSubjectCandidate> candidates = candidateFinder.findZeroSubjectCandidates(sampleTextHelper.getText(), summarySentenceIds);
+        assertEquals(1, candidates.size());
+
+        ZeroSubjectCandidate zeroSubjectCandidate = candidates.get(0);
+        TSentence firstSentence = zeroSubjectCandidate.getPreviousSentence();
+        TSentence secondSentence = zeroSubjectCandidate.getSentence();
+        TMention zeroCandidate = zeroSubjectCandidate.getZeroCandidateMention();
+
+        assertEquals("Ala ma kota.", sampleTextHelper.getSentenceText(firstSentence));
+        assertEquals("Ala ma też psa.", sampleTextHelper.getSentenceText(secondSentence));
+        assertEquals("Ala", sampleTextHelper.getMentionText(zeroCandidate));
+    }
+
+    private Set<String> loadSampleTextSummarySentenceIds() throws IOException {
+        try (InputStream stream = CandidateFinderTest.class.getResourceAsStream(SAMPLE_TEXT_SUMMARY_IDS_PATH);
+             InputStreamReader reader = new InputStreamReader(stream)) {
+            return Sets.newHashSet(IOUtils.readLines(reader));
+        }
+    }
+
+    private ThriftTextHelper loadSampleTextHelper() throws IOException {
+        try (InputStream stream = CandidateFinderTest.class.getResourceAsStream(SAMPLE_TEXT_PATH)) {
+            return new ThriftTextHelper(Utils.loadThrifted(stream));
+        }
+    }
+}
\ No newline at end of file
diff --git a/nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjectorTest.java b/nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjectorTest.java
new file mode 100644
index 0000000..e98bc27
--- /dev/null
+++ b/nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjectorTest.java
@@ -0,0 +1,11 @@
+package pl.waw.ipipan.zil.summ.nicolas.zero;
+
+import org.junit.Test;
+
+public class ZeroSubjectInjectorTest {
+
+    @Test
+    public void shouldInit() throws Exception {
+        ZeroSubjectInjector injector = new ZeroSubjectInjector();
+    }
+}
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 6484758..3a2ba87 100644
--- a/pom.xml
+++ b/pom.xml
@@ -16,6 +16,7 @@
         <module>nicolas-model</module>
         <module>nicolas-train</module>
         <module>nicolas-zero</module>
+        <module>nicolas-common</module>
     </modules>
 
     <properties>
@@ -30,6 +31,8 @@
         <weka-dev.version>3.9.0</weka-dev.version>
         <commons-lang3.version>3.5</commons-lang3.version>
         <commons-io.version>2.5</commons-io.version>
+        <slf4j-api.version>1.7.12</slf4j-api.version>
+        <junit.version>4.12</junit.version>
     </properties>
 
     <prerequisites>
@@ -46,13 +49,20 @@
 
     <dependencyManagement>
         <dependencies>
+            <!-- project -->
             <dependency>
                 <groupId>pl.waw.ipipan.zil.summ</groupId>
                 <artifactId>nicolas-model</artifactId>
                 <version>${project.version}</version>
                 <scope>runtime</scope>
             </dependency>
+            <dependency>
+                <groupId>pl.waw.ipipan.zil.summ</groupId>
+                <artifactId>nicolas-common</artifactId>
+                <version>${project.version}</version>
+            </dependency>
 
+            <!-- internal -->
             <dependency>
                 <groupId>pl.waw.ipipan.zil.summ</groupId>
                 <artifactId>pscapi</artifactId>
@@ -64,6 +74,7 @@
                 <version>${utils.version}</version>
             </dependency>
 
+            <!-- third party -->
             <dependency>
                 <groupId>org.apache.commons</groupId>
                 <artifactId>commons-csv</artifactId>
@@ -89,6 +100,20 @@
                 <artifactId>commons-io</artifactId>
                 <version>${commons-io.version}</version>
             </dependency>
+
+            <!-- logging -->
+            <dependency>
+                <groupId>org.slf4j</groupId>
+                <artifactId>slf4j-api</artifactId>
+                <version>${slf4j-api.version}</version>
+            </dependency>
+
+            <!-- test -->
+            <dependency>
+                <groupId>junit</groupId>
+                <artifactId>junit</artifactId>
+                <version>${junit.version}</version>
+            </dependency>
         </dependencies>
     </dependencyManagement>
 
--
libgit2 0.22.2