From cb490cab2a2bfe90e2c4ad0661117321fb2f2e1f Mon Sep 17 00:00:00 2001
From: Mateusz Kopeć <m.kopec@ipipan.waw.pl>
Date: Wed, 1 Feb 2017 22:11:55 +0100
Subject: [PATCH] create sample cli client

---
 nicolas-cli/README.md                                                                                 |  12 ++++++++++++
 nicolas-cli/pom.xml                                                                                   |  66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Cli.java                                 |  87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Client.java                              |  77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Main.java                                |  36 ++++++++++++++++++++++++++++++++++++
 nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/CliTest.java                             |  75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/ClientTest.java                          |  54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/MainIT.java                              |  38 ++++++++++++++++++++++++++++++++++++++
 nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/TestUtils.java                           |  24 ++++++++++++++++++++++++
 nicolas-cli/src/test/resources/pl/waw/ipipan/zil/summ/nicolas/cli/sample_input.thrift                 | Bin 0 -> 497720 bytes
 nicolas-cli/src/test/resources/pl/waw/ipipan/zil/summ/nicolas/cli/sample_input.txt                    |   9 +++++++++
 nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Utils.java                         |  16 ++++++++++------
 nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java                                 |  11 +++++++----
 nicolas-multiservice/pom.xml                                                                          |  39 +++++++++++++++++++++++++++++++++++++++
 nicolas-multiservice/src/main/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/MultiserviceProxy.java | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-multiservice/src/main/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/Preprocessor.java      |  52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-multiservice/src/test/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/PreprocessorIT.java    |  74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-train/pom.xml                                                                                 |   4 ++++
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/DownloadAndPreprocessCorpus.java     |   4 ++--
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/MultiserviceProxy.java  | 110 --------------------------------------------------------------------------------------------------------------
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcess.java         |  97 -------------------------------------------------------------------------------------------------
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/preprocess/Main.java                 |  63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 nicolas-train/src/test/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcessIT.java       |  31 -------------------------------
 pom.xml                                                                                               | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 24 files changed, 1039 insertions(+), 257 deletions(-)
 create mode 100644 nicolas-cli/README.md
 create mode 100644 nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Cli.java
 create mode 100644 nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Client.java
 create mode 100644 nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Main.java
 create mode 100644 nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/CliTest.java
 create mode 100644 nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/ClientTest.java
 create mode 100644 nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/MainIT.java
 create mode 100644 nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/TestUtils.java
 create mode 100644 nicolas-cli/src/test/resources/pl/waw/ipipan/zil/summ/nicolas/cli/sample_input.thrift
 create mode 100644 nicolas-cli/src/test/resources/pl/waw/ipipan/zil/summ/nicolas/cli/sample_input.txt
 create mode 100644 nicolas-multiservice/pom.xml
 create mode 100644 nicolas-multiservice/src/main/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/MultiserviceProxy.java
 create mode 100644 nicolas-multiservice/src/main/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/Preprocessor.java
 create mode 100644 nicolas-multiservice/src/test/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/PreprocessorIT.java
 delete mode 100644 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/MultiserviceProxy.java
 delete mode 100644 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcess.java
 create mode 100644 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/preprocess/Main.java
 delete mode 100644 nicolas-train/src/test/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcessIT.java

diff --git a/nicolas-cli/README.md b/nicolas-cli/README.md
new file mode 100644
index 0000000..b0c409f
--- /dev/null
+++ b/nicolas-cli/README.md
@@ -0,0 +1,12 @@
+# nicolas-cli
+
+This module contains a sample command-line application, which uses Nicolas library to summarize chosen input text file.
+Summary is written to target output file. Additionally, user needs to specify desired number of tokens in the summary.
+
+## Installation
+
+    mvn clean install
+
+## Usage
+
+    java -jar target/nicolas-cli.jar -help
\ No newline at end of file
diff --git a/nicolas-cli/pom.xml b/nicolas-cli/pom.xml
index e65a5b6..422e8f1 100644
--- a/nicolas-cli/pom.xml
+++ b/nicolas-cli/pom.xml
@@ -11,4 +11,70 @@
 
     <artifactId>nicolas-cli</artifactId>
 
+    <dependencies>
+
+        <!-- project -->
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.summ</groupId>
+            <artifactId>nicolas-multiservice</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.summ</groupId>
+            <artifactId>nicolas-lib</artifactId>
+        </dependency>
+
+        <!-- third party -->
+        <dependency>
+            <groupId>com.beust</groupId>
+            <artifactId>jcommander</artifactId>
+        </dependency>
+
+        <!-- logging -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-simple</artifactId>
+        </dependency>
+
+        <!-- test -->
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-core</artifactId>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <configuration>
+                    <appendAssemblyId>false</appendAssemblyId>
+                    <archive>
+                        <manifest>
+                            <mainClass>pl.waw.ipipan.zil.summ.nicolas.cli.Main</mainClass>
+                        </manifest>
+                    </archive>
+                    <descriptorRefs>
+                        <descriptorRef>jar-with-dependencies</descriptorRef>
+                    </descriptorRefs>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>make-assembly</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>single</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
 </project>
\ No newline at end of file
diff --git a/nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Cli.java b/nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Cli.java
new file mode 100644
index 0000000..ace95d1
--- /dev/null
+++ b/nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Cli.java
@@ -0,0 +1,87 @@
+package pl.waw.ipipan.zil.summ.nicolas.cli;
+
+import com.beust.jcommander.IParameterValidator;
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.ParameterException;
+import com.beust.jcommander.converters.FileConverter;
+import com.beust.jcommander.validators.PositiveInteger;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+
+class Cli {
+
+    private static final Logger LOG = LoggerFactory.getLogger(Cli.class);
+
+    @Parameter(names = {"-help", "-h"}, description = "Print help")
+    private boolean help = false;
+
+    @Parameter(names = {"-input", "-i"}, description = "Input text file to summarize", required = true, validateWith = FileValidator.class, converter = FileConverter.class)
+    private File inputFile;
+
+    @Parameter(names = {"-output", "-o"}, description = "Output file path for summary", required = true, validateWith = FileValidator.class, converter = FileConverter.class)
+    private File outputFile;
+
+    @Parameter(names = {"-target", "-t"}, description = "Target summary token count", required = true, validateWith = PositiveInteger.class)
+    private int targetTokenCount;
+
+    private boolean invalid = false;
+
+    boolean isHelp() {
+        return help;
+    }
+
+    File getInputFile() {
+        return inputFile;
+    }
+
+    File getOutputFile() {
+        return outputFile;
+    }
+
+    int getTargetTokenCount() {
+        return targetTokenCount;
+    }
+
+    @SuppressWarnings("squid:S1166")
+    static Cli parse(String[] args) {
+        Cli cli = new Cli();
+        JCommander jCommander;
+        try {
+            jCommander = new JCommander(cli, args);
+        } catch (ParameterException ex) {
+            LOG.error("Error parsing parameters: {}", ex.getLocalizedMessage());
+            cli.setInvalid();
+            return cli;
+        }
+        if (cli.isHelp()) {
+            StringBuilder stringBuilder = new StringBuilder();
+            jCommander.usage(stringBuilder);
+            LOG.info("{}", stringBuilder);
+        }
+        return cli;
+    }
+
+    private void setInvalid() {
+        invalid = true;
+    }
+
+    boolean isInvalid() {
+        return invalid;
+    }
+
+    public static class FileValidator implements IParameterValidator {
+
+        @Override
+        public void validate(String name, String value) {
+            File file = new File(value);
+            if (!file.isFile()) {
+                throw new ParameterException("Parameter " + name
+                        + " should be a valid file path (found " + value + ")");
+            }
+        }
+
+    }
+}
diff --git a/nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Client.java b/nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Client.java
new file mode 100644
index 0000000..4adaa48
--- /dev/null
+++ b/nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Client.java
@@ -0,0 +1,77 @@
+package pl.waw.ipipan.zil.summ.nicolas.cli;
+
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.multiservice.thrift.types.MultiserviceException;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
+import pl.waw.ipipan.zil.summ.nicolas.Nicolas;
+import pl.waw.ipipan.zil.summ.nicolas.NicolasException;
+import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
+import pl.waw.ipipan.zil.summ.nicolas.multiservice.Preprocessor;
+
+import java.io.*;
+
+class Client {
+
+    private static final Logger LOG = LoggerFactory.getLogger(Client.class);
+
+    private final Preprocessor preprocessor;
+    private final Nicolas nicolas;
+
+    Client(Preprocessor preprocessor, Nicolas nicolas) {
+        this.preprocessor = preprocessor;
+        this.nicolas = nicolas;
+    }
+
+    @SuppressWarnings("squid:S1166")
+    void summarize(File inputFile, File outputFile, int targetTokenCount) {
+        try {
+            String inputText = loadInputText(inputFile);
+            TText preprocessed = preprocess(inputText);
+            String summary = summarize(preprocessed, targetTokenCount);
+            saveSummaryToFile(summary, outputFile);
+        } catch (IOException | MultiserviceException | NicolasException e) {
+            LOG.error("Exiting because of an error.");
+        }
+    }
+
+    private String loadInputText(File inputFile) throws IOException {
+        String inputText;
+        try (FileInputStream inputStream = new FileInputStream(inputFile)) {
+            inputText = IOUtils.toString(inputStream, Constants.ENCODING);
+        } catch (IOException e) {
+            LOG.error("Error reading input text.");
+            throw e;
+        }
+        return inputText;
+    }
+
+    private TText preprocess(String inputText) throws MultiserviceException {
+        try {
+            return preprocessor.preprocess(inputText);
+        } catch (MultiserviceException e) {
+            LOG.error("Error preprocessing input text.");
+            throw e;
+        }
+    }
+
+    private String summarize(TText preprocessed, int targetTokenCount) throws NicolasException {
+        try {
+            return nicolas.summarizeThrift(preprocessed, targetTokenCount);
+        } catch (NicolasException e) {
+            LOG.error("Error preprocessing input text.");
+            throw e;
+        }
+    }
+
+    private void saveSummaryToFile(String summary, File outputFile) throws IOException {
+        try (OutputStream outputStream = new FileOutputStream(outputFile)) {
+            IOUtils.write(summary, outputStream, Constants.ENCODING);
+        } catch (IOException e) {
+            LOG.error("Error writing file with summary.");
+            throw e;
+        }
+    }
+
+}
diff --git a/nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Main.java b/nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Main.java
new file mode 100644
index 0000000..4a49f65
--- /dev/null
+++ b/nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Main.java
@@ -0,0 +1,36 @@
+package pl.waw.ipipan.zil.summ.nicolas.cli;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.summ.nicolas.Nicolas;
+import pl.waw.ipipan.zil.summ.nicolas.multiservice.Preprocessor;
+
+import java.io.IOException;
+
+public class Main {
+
+    private static final Logger LOG = LoggerFactory.getLogger(Main.class);
+
+    private Main() {
+    }
+
+    @SuppressWarnings("squid:S1166")
+    public static void main(String[] args) {
+        Cli cli = Cli.parse(args);
+        if (cli.isHelp() || cli.isInvalid()) {
+            return;
+        }
+
+        Nicolas nicolas;
+        Preprocessor preprocessor;
+        try {
+            nicolas = new Nicolas();
+            preprocessor = new Preprocessor();
+        } catch (IOException | ClassNotFoundException e) {
+            LOG.error("Error loading Nicolas or Multiservice preprocessor! Will exit.");
+            return;
+        }
+        Client client = new Client(preprocessor, nicolas);
+        client.summarize(cli.getInputFile(), cli.getOutputFile(), cli.getTargetTokenCount());
+    }
+}
diff --git a/nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/CliTest.java b/nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/CliTest.java
new file mode 100644
index 0000000..8b09280
--- /dev/null
+++ b/nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/CliTest.java
@@ -0,0 +1,75 @@
+package pl.waw.ipipan.zil.summ.nicolas.cli;
+
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import java.io.File;
+import java.io.IOException;
+
+import static org.junit.Assert.*;
+
+public class CliTest {
+
+    @ClassRule
+    public static TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder();
+
+    private static File sampleInputFile;
+    private static File sampleOutputFile;
+    private static final int TARGET_TOKEN_COUNT = 50;
+    private static final String INVALID_FILE_PATH = "nonexistent_dir/nonexistent_file";
+
+    @BeforeClass
+    public static void initSampleFiles() throws IOException {
+        sampleInputFile = TEMPORARY_FOLDER.newFile();
+        sampleOutputFile = TEMPORARY_FOLDER.newFile();
+    }
+
+    @Test
+    public void failNoArguments() throws Exception {
+        String[] args = new String[]{};
+        Cli cli = Cli.parse(args);
+        assertTrue(cli.isInvalid());
+    }
+
+    @Test
+    public void failInvalidArgument() throws Exception {
+        String[] args = new String[]{"-xxxx", "xxx", "-i", sampleInputFile.getPath(), "-o", sampleOutputFile.getPath(), "-t", Integer.toString(TARGET_TOKEN_COUNT)};
+        Cli cli = Cli.parse(args);
+        assertTrue(cli.isInvalid());
+    }
+
+    @Test
+    public void failInvalidInputFile() throws Exception {
+        String[] args = new String[]{"-i", INVALID_FILE_PATH, "-o", sampleOutputFile.getPath(), "-t", Integer.toString(TARGET_TOKEN_COUNT)};
+        Cli cli = Cli.parse(args);
+        assertTrue(cli.isInvalid());
+    }
+
+    @Test
+    public void failInvalidOutputFile() throws Exception {
+        String[] args = new String[]{"-i", sampleInputFile.getPath(), "-o", INVALID_FILE_PATH, "-t", Integer.toString(TARGET_TOKEN_COUNT)};
+        Cli cli = Cli.parse(args);
+        assertTrue(cli.isInvalid());
+    }
+
+    @Test
+    public void failInvalidTargetTokenCount() throws Exception {
+        String[] args = new String[]{"-i", sampleInputFile.getPath(), "-o", sampleOutputFile.getPath(), "-t", Integer.toString(-1)};
+        Cli cli = Cli.parse(args);
+        assertTrue(cli.isInvalid());
+    }
+
+    @Test
+    public void validArguments() throws Exception {
+        String[] args = new String[]{"-i", sampleInputFile.getPath(), "-o", sampleOutputFile.getPath(), "-t", Integer.toString(TARGET_TOKEN_COUNT)};
+        Cli cli = Cli.parse(args);
+        assertFalse(cli.isInvalid());
+        assertEquals(sampleInputFile, cli.getInputFile());
+        assertEquals(sampleOutputFile, cli.getOutputFile());
+        assertEquals(TARGET_TOKEN_COUNT, cli.getTargetTokenCount());
+    }
+
+
+}
diff --git a/nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/ClientTest.java b/nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/ClientTest.java
new file mode 100644
index 0000000..2509618
--- /dev/null
+++ b/nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/ClientTest.java
@@ -0,0 +1,54 @@
+package pl.waw.ipipan.zil.summ.nicolas.cli;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
+import pl.waw.ipipan.zil.summ.nicolas.Nicolas;
+import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
+import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
+import pl.waw.ipipan.zil.summ.nicolas.multiservice.Preprocessor;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.ArgumentMatchers.*;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static pl.waw.ipipan.zil.summ.nicolas.cli.TestUtils.SAMPLE_INPUT_RESOURCE_PATH;
+import static pl.waw.ipipan.zil.summ.nicolas.cli.TestUtils.SAMPLE_THRIFT_TEXT_RESOURCE_PATH;
+
+public class ClientTest {
+
+    @ClassRule
+    public static TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder();
+
+    @Test
+    public void processSampleText() throws Exception {
+        Preprocessor preprocessor = mock(Preprocessor.class);
+        TText ttext = Utils.loadThriftTextFromResource(SAMPLE_THRIFT_TEXT_RESOURCE_PATH);
+        when(preprocessor.preprocess(any())).thenReturn(ttext);
+
+        Nicolas nicolas = mock(Nicolas.class);
+        String targetSummary = "This is a summary";
+        when(nicolas.summarizeThrift(eq(ttext), anyInt())).thenReturn(targetSummary);
+
+        Client client = new Client(preprocessor, nicolas);
+
+        File inputFile = TestUtils.copyResourceToFile(SAMPLE_INPUT_RESOURCE_PATH, TEMPORARY_FOLDER.newFile());
+        File outputFile = TEMPORARY_FOLDER.newFile();
+        int targetTokenCount = 50;
+
+        String[] args = new String[]{"-i", inputFile.getPath(), "-o", outputFile.getPath(), "-t", Integer.toString(targetTokenCount)};
+        Cli cli = Cli.parse(args);
+        client.summarize(cli.getInputFile(), cli.getOutputFile(), cli.getTargetTokenCount());
+
+        try (InputStream inputStream = new FileInputStream(outputFile)) {
+            String summary = IOUtils.toString(inputStream, Constants.ENCODING);
+            assertEquals(targetSummary, summary);
+        }
+    }
+}
\ No newline at end of file
diff --git a/nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/MainIT.java b/nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/MainIT.java
new file mode 100644
index 0000000..4067383
--- /dev/null
+++ b/nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/MainIT.java
@@ -0,0 +1,38 @@
+package pl.waw.ipipan.zil.summ.nicolas.cli;
+
+import org.apache.commons.io.IOUtils;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+
+import static junit.framework.TestCase.assertTrue;
+
+public class MainIT {
+
+    private final static String SAMPLE_INPUT_RESOURCE_PATH = "/pl/waw/ipipan/zil/summ/nicolas/cli/sample_input.txt";
+
+    @ClassRule
+    public static TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder();
+
+    @Test
+    public void processSampleText() throws Exception {
+        File inputFile = TestUtils.copyResourceToFile(SAMPLE_INPUT_RESOURCE_PATH, TEMPORARY_FOLDER.newFile());
+        File outputFile = TEMPORARY_FOLDER.newFile();
+        int targetTokenCount = 50;
+
+        String[] args = new String[]{"-i", inputFile.getPath(), "-o", outputFile.getPath(), "-t", Integer.toString(targetTokenCount)};
+        Main.main(args);
+
+        try (InputStream inputStream = new FileInputStream(outputFile)) {
+            String summary = IOUtils.toString(inputStream, Constants.ENCODING);
+            assertTrue(summary.length() > 0);
+            assertTrue(summary.length() < targetTokenCount * 10);
+        }
+    }
+
+}
\ No newline at end of file
diff --git a/nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/TestUtils.java b/nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/TestUtils.java
new file mode 100644
index 0000000..47412af
--- /dev/null
+++ b/nicolas-cli/src/test/java/pl/waw/ipipan/zil/summ/nicolas/cli/TestUtils.java
@@ -0,0 +1,24 @@
+package pl.waw.ipipan.zil.summ.nicolas.cli;
+
+import org.apache.commons.io.IOUtils;
+
+import java.io.*;
+
+class TestUtils {
+
+    private static final String PACKAGE = "/pl/waw/ipipan/zil/summ/nicolas/cli/";
+
+    static final String SAMPLE_INPUT_RESOURCE_PATH = PACKAGE + "sample_input.txt";
+    static final String SAMPLE_THRIFT_TEXT_RESOURCE_PATH = PACKAGE + "sample_input.thrift";
+
+    private TestUtils() {
+    }
+
+    static File copyResourceToFile(String resourcePath, File file) throws IOException {
+        try (InputStream inputStream = MainIT.class.getResourceAsStream(resourcePath);
+             OutputStream outputStream = new FileOutputStream(file)) {
+            IOUtils.copy(inputStream, outputStream);
+        }
+        return file;
+    }
+}
diff --git a/nicolas-cli/src/test/resources/pl/waw/ipipan/zil/summ/nicolas/cli/sample_input.thrift b/nicolas-cli/src/test/resources/pl/waw/ipipan/zil/summ/nicolas/cli/sample_input.thrift
new file mode 100644
index 0000000..cf072c2
Binary files /dev/null and b/nicolas-cli/src/test/resources/pl/waw/ipipan/zil/summ/nicolas/cli/sample_input.thrift differ
diff --git a/nicolas-cli/src/test/resources/pl/waw/ipipan/zil/summ/nicolas/cli/sample_input.txt b/nicolas-cli/src/test/resources/pl/waw/ipipan/zil/summ/nicolas/cli/sample_input.txt
new file mode 100644
index 0000000..3026aea
--- /dev/null
+++ b/nicolas-cli/src/test/resources/pl/waw/ipipan/zil/summ/nicolas/cli/sample_input.txt
@@ -0,0 +1,9 @@
+To będzie już druga próba licytacji nieruchomości na pl. Słonecznym, którą urzędnicy wytropili po latach poszukiwań majątku Adama Gesslera.
+
+Jego dług wobec miasta szacują dziś na ok. 27 mln zł. Już w 1992 r., wkrótce po podpisaniu umowy najmu lokalu na Rynku Staromiejskim, zaczęły się problemy z czynszem. Sąd orzekł eksmisję. Dotąd miastu udało się odzyskać ledwie kilkadziesiąt tysięcy złotych długu.
+
+Sprawa budzi wielkie emocje, bo choć Adam Gessler jest słynnym restauratorem, oficjalnie nie ma nic. Nawet wynajęta przez Zakład Gospodarowania Nieruchomościami w Śródmieściu firma detektywistyczna nie znalazła majątku.
+
+Pozostają dwa mieszkania na Żoliborzu, wyceniane przed rokiem na blisko 4,3 mln zł. Będą licytowane za dwie trzecie ceny. W ZGN wymyślili, żeby miasto przystąpiło do licytacji. Jeśli uda się kupić nieruchomość, komornik pospłaca wierzycieli Adama i Piotra Gesslerów. A miasto będzie mogło w przyszłości sprzedać korzystnie atrakcyjny dom.
+
+Licytacje odbędą się w środę. - Korzyści z wylicytowania domu będą niewielkie w stosunku do ogromnego długu pana Gesslera. Chodzi jednak o to, żeby wiedział, że miasto nie zrezygnuje z upominania się o swoje - tłumaczyła "Gazecie" Małgorzata Mazur, dyrektorka ZGN.
diff --git a/nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Utils.java b/nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Utils.java
index 5524abc..ecba84f 100644
--- a/nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Utils.java
+++ b/nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Utils.java
@@ -45,17 +45,21 @@ public class Utils {
         }
     }
 
+    public static TText loadThriftTextFromStream(InputStream inputStream) throws IOException {
+        try (VersionIgnoringObjectInputStream ois = new VersionIgnoringObjectInputStream(inputStream)) {
+            return (TText) ois.readObject();
+        } catch (ClassNotFoundException e) {
+            LOG.error("Error reading serialized thrift text file, class not found.", e);
+            throw new IOException(e);
+        }
+    }
+
     public static TText loadThriftTextFromResource(String textResourcePath) throws IOException {
         try (InputStream stream = Utils.class.getResourceAsStream(textResourcePath)) {
             if (stream == null) {
                 throw new IOException("Resource not found at: " + textResourcePath);
             }
-            try (VersionIgnoringObjectInputStream ois = new VersionIgnoringObjectInputStream(stream)) {
-                return (TText) ois.readObject();
-            } catch (ClassNotFoundException e) {
-                LOG.error("Error reading serialized thrift text file, class not found.", e);
-                throw new IOException(e);
-            }
+            return loadThriftTextFromStream(stream);
         }
     }
 
diff --git a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java
index 3b5b55a..415de45 100644
--- a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java
+++ b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java
@@ -39,10 +39,13 @@ public class Nicolas {
         zeroFeatureExtractor = new ZeroFeatureExtractor();
     }
 
-    public String summarizeThrift(TText text, int targetTokenCount) throws Exception {
-        Set<TMention> goodMentions
-                = MentionModel.detectGoodMentions(mentionModel, mentionFeatureExtractor, text);
-        return calculateSummary(text, goodMentions, targetTokenCount);
+    public String summarizeThrift(TText text, int targetTokenCount) throws NicolasException {
+        try {
+            Set<TMention> goodMentions = MentionModel.detectGoodMentions(mentionModel, mentionFeatureExtractor, text);
+            return calculateSummary(text, goodMentions, targetTokenCount);
+        } catch (Exception e) {
+            throw new NicolasException(e);
+        }
     }
 
     private String calculateSummary(TText thrifted, Set<TMention> goodMentions, int targetSize) throws Exception {
diff --git a/nicolas-multiservice/pom.xml b/nicolas-multiservice/pom.xml
new file mode 100644
index 0000000..cc051be
--- /dev/null
+++ b/nicolas-multiservice/pom.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>nicolas-container</artifactId>
+        <groupId>pl.waw.ipipan.zil.summ</groupId>
+        <version>1.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>nicolas-multiservice</artifactId>
+
+    <dependencies>
+        <!-- internal -->
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.multiservice</groupId>
+            <artifactId>utils</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.summ</groupId>
+            <artifactId>pscapi</artifactId>
+        </dependency>
+
+        <!-- logging -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+
+        <!-- test -->
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.summ</groupId>
+            <artifactId>nicolas-common</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+    </dependencies>
+</project>
\ No newline at end of file
diff --git a/nicolas-multiservice/src/main/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/MultiserviceProxy.java b/nicolas-multiservice/src/main/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/MultiserviceProxy.java
new file mode 100644
index 0000000..4cdaf84
--- /dev/null
+++ b/nicolas-multiservice/src/main/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/MultiserviceProxy.java
@@ -0,0 +1,104 @@
+package pl.waw.ipipan.zil.summ.nicolas.multiservice;
+
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TBinaryProtocol;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.transport.TSocket;
+import org.apache.thrift.transport.TTransport;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.multiservice.thrift.Multiservice;
+import pl.waw.ipipan.zil.multiservice.thrift.ObjectRequest;
+import pl.waw.ipipan.zil.multiservice.thrift.RequestPart;
+import pl.waw.ipipan.zil.multiservice.thrift.RequestStatus;
+import pl.waw.ipipan.zil.multiservice.thrift.types.MultiserviceException;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TParagraph;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class MultiserviceProxy {
+
+    private static final Logger LOG = LoggerFactory.getLogger(MultiserviceProxy.class);
+
+    private int port;
+    private String host;
+
+    public MultiserviceProxy(String host, int port) {
+        this.host = host;
+        this.port = port;
+        LOG.info("Multiservice at {}:{}", host, port);
+    }
+
+    public TText process(String text, List<String> services) throws MultiserviceException {
+        List<Map<String, String>> options = new ArrayList<>();
+        for (int i = 0; i < services.size(); i++)
+            options.add(new HashMap<>());
+        return process(text, "", services, options);
+    }
+
+    public TText process(String text, String title, List<String> services, List<Map<String, String>> options)
+            throws MultiserviceException {
+        TTransport transport = new TSocket(host, port);
+        ObjectRequest objectRequest = createRequest(text, title, services, options);
+
+        try {
+            transport.open();
+
+            TProtocol protocol = new TBinaryProtocol(transport);
+            Multiservice.Client client = new Multiservice.Client(protocol);
+
+            LOG.debug("Sending Multiservice request...");
+            TText responseText = request(objectRequest, client);
+            LOG.debug("...done");
+
+            return responseText;
+
+        } catch (TException e) {
+            LOG.error("Error processing request:" + e);
+            throw new MultiserviceException(e.getMessage());
+
+        } finally {
+            transport.close();
+        }
+    }
+
+    private TText request(ObjectRequest objectRequest, Multiservice.Client client) throws TException {
+
+        String requestToken = client.putObjectRequest(objectRequest);
+        while (true) {
+            RequestStatus status = client.getRequestStatus(requestToken);
+            if (RequestStatus.DONE.equals(status)) {
+                return client.getResultObject(requestToken);
+            } else if (RequestStatus.FAILED.equals(status) || RequestStatus.DUMPED.equals(status)) {
+                throw client.getException(requestToken);
+            }
+        }
+    }
+
+    private ObjectRequest createRequest(String textBody, String textTitle, List<String> services,
+                                        List<Map<String, String>> options) {
+        TText text = new TText();
+
+        TParagraph par = new TParagraph();
+        par.setText(textTitle);
+        text.addToParagraphs(par);
+
+        for (String p : textBody.split("\n\n")) {
+            par = new TParagraph();
+            par.setText(p);
+            text.addToParagraphs(par);
+        }
+
+        List<RequestPart> processingChain = new ArrayList<>();
+        int i = 0;
+        for (String serviceName : services)
+            processingChain.add(new RequestPart(serviceName, options.get(i++)));
+
+        return new ObjectRequest(text, processingChain);
+    }
+
+}
diff --git a/nicolas-multiservice/src/main/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/Preprocessor.java b/nicolas-multiservice/src/main/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/Preprocessor.java
new file mode 100644
index 0000000..4199c53
--- /dev/null
+++ b/nicolas-multiservice/src/main/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/Preprocessor.java
@@ -0,0 +1,52 @@
+package pl.waw.ipipan.zil.summ.nicolas.multiservice;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.multiservice.thrift.types.MultiserviceException;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectOutputStream;
+import java.util.Arrays;
+import java.util.List;
+
+public class Preprocessor {
+
+    private static final Logger LOG = LoggerFactory.getLogger(Preprocessor.class);
+
+    private static final List<String> SERVICES = Arrays.asList("Concraft", "Spejd", "Nerf", "MentionDetector",
+            "Bartek");
+    private static final int PORT = 20000;
+    private static final String HOST = "multiservice.nlp.ipipan.waw.pl";
+
+    private static final MultiserviceProxy MS_PROXY = new MultiserviceProxy(HOST, PORT);
+
+    public TText preprocess(String body) throws MultiserviceException {
+        return MS_PROXY.process(body, SERVICES);
+    }
+
+    public void preprocessToFile(String body, File targetFile) throws MultiserviceException {
+        if (targetFile.exists()) {
+            LOG.debug("Skipping existing file..");
+            return;
+        }
+        LOG.info("Processing text into " + targetFile.getPath());
+        TText ttext = preprocess(body);
+        try {
+            serialize(ttext, targetFile);
+        } catch (IOException e) {
+            LOG.error("Error serializing preprocessed text", e);
+            throw new MultiserviceException(e.getLocalizedMessage());
+        }
+    }
+
+    private static void serialize(TText ttext, File targetFile) throws IOException {
+        try (FileOutputStream fileOutputStream = new FileOutputStream(targetFile);
+             ObjectOutputStream oos = new ObjectOutputStream(fileOutputStream)) {
+            oos.writeObject(ttext);
+        }
+    }
+
+}
diff --git a/nicolas-multiservice/src/test/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/PreprocessorIT.java b/nicolas-multiservice/src/test/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/PreprocessorIT.java
new file mode 100644
index 0000000..e3ce61d
--- /dev/null
+++ b/nicolas-multiservice/src/test/java/pl/waw/ipipan/zil/summ/nicolas/multiservice/PreprocessorIT.java
@@ -0,0 +1,74 @@
+package pl.waw.ipipan.zil.summ.nicolas.multiservice;
+
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TParagraph;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
+import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
+import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.util.List;
+
+import static junit.framework.TestCase.assertEquals;
+import static junit.framework.TestCase.assertTrue;
+
+public class PreprocessorIT {
+
+    @ClassRule
+    public static TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder();
+
+    private static Preprocessor preprocessor;
+
+    @BeforeClass
+    public static void initPreprocessor() {
+        preprocessor = new Preprocessor();
+    }
+
+    @Test
+    public void shouldProcessSampleText() throws Exception {
+        String text = "Ala ma kota. Ala ma też psa.";
+        TText processed = preprocessor.preprocess(text);
+
+        assertSampleProcessedText(processed);
+    }
+
+    private void assertSampleProcessedText(TText processed) {
+        assertEquals(2, processed.getParagraphsSize());
+
+        // first paragraph is empty (placeholder for text title)
+        TParagraph firstParagraph = processed.getParagraphs().get(0);
+        assertEquals(0, firstParagraph.getSentencesSize());
+
+        TParagraph secondParagraph = processed.getParagraphs().get(1);
+        assertEquals(2, secondParagraph.getSentencesSize());
+        List<TSentence> sentences = secondParagraph.getSentences();
+
+        TSentence firstSentence = sentences.get(0);
+        assertEquals(4, firstSentence.getTokensSize());
+        assertEquals("Ala", firstSentence.getTokens().get(0).getOrth());
+
+        TSentence secondSentence = sentences.get(1);
+        assertEquals(5, secondSentence.getTokensSize());
+        assertEquals("Ala", secondSentence.getTokens().get(0).getOrth());
+
+        assertEquals(3, processed.getCoreferencesSize()); //Ala, pies, kot
+    }
+
+
+    @Test
+    public void shouldProcessSampleTextToFile() throws Exception {
+        String text = "Ala ma kota. Ala ma też psa.";
+        File targetFile = TEMPORARY_FOLDER.newFile();
+        assertTrue(targetFile.delete()); //delete file, because preprocessor skips existing files
+        preprocessor.preprocessToFile(text, targetFile);
+
+        try (FileInputStream inputStream = new FileInputStream(targetFile)) {
+            TText processed = Utils.loadThriftTextFromStream(inputStream);
+            assertSampleProcessedText(processed);
+        }
+    }
+}
\ No newline at end of file
diff --git a/nicolas-train/pom.xml b/nicolas-train/pom.xml
index 6d71d47..4401bfb 100644
--- a/nicolas-train/pom.xml
+++ b/nicolas-train/pom.xml
@@ -21,6 +21,10 @@
             <groupId>pl.waw.ipipan.zil.summ</groupId>
             <artifactId>nicolas-lib</artifactId>
         </dependency>
+        <dependency>
+            <groupId>pl.waw.ipipan.zil.summ</groupId>
+            <artifactId>nicolas-multiservice</artifactId>
+        </dependency>
 
         <!-- internal -->
         <dependency>
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/DownloadAndPreprocessCorpus.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/DownloadAndPreprocessCorpus.java
index 439a33b..b62061e 100644
--- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/DownloadAndPreprocessCorpus.java
+++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/DownloadAndPreprocessCorpus.java
@@ -4,7 +4,7 @@ import net.lingala.zip4j.core.ZipFile;
 import org.apache.commons.io.FileUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import pl.waw.ipipan.zil.summ.nicolas.train.multiservice.NLPProcess;
+import pl.waw.ipipan.zil.summ.nicolas.train.preprocess.Main;
 
 import java.io.File;
 import java.net.URL;
@@ -45,7 +45,7 @@ public class DownloadAndPreprocessCorpus {
 
         File preprocessed = new File(WORKING_DIR, "preprocessed");
         createFolder(preprocessed.getPath());
-        NLPProcess.main(new String[]{dataDir.getPath(), preprocessed.getPath()});
+        Main.main(new String[]{dataDir.getPath(), preprocessed.getPath()});
     }
 
     private static File createFolder(String path) {
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/MultiserviceProxy.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/MultiserviceProxy.java
deleted file mode 100644
index 2c4455a..0000000
--- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/MultiserviceProxy.java
+++ /dev/null
@@ -1,110 +0,0 @@
-package pl.waw.ipipan.zil.summ.nicolas.train.multiservice;
-
-import org.apache.thrift.TException;
-import org.apache.thrift.protocol.TBinaryProtocol;
-import org.apache.thrift.protocol.TProtocol;
-import org.apache.thrift.transport.TSocket;
-import org.apache.thrift.transport.TTransport;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import pl.waw.ipipan.zil.multiservice.thrift.Multiservice;
-import pl.waw.ipipan.zil.multiservice.thrift.ObjectRequest;
-import pl.waw.ipipan.zil.multiservice.thrift.RequestPart;
-import pl.waw.ipipan.zil.multiservice.thrift.RequestStatus;
-import pl.waw.ipipan.zil.multiservice.thrift.types.MultiserviceException;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TParagraph;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-public class MultiserviceProxy {
-
-    private static final Logger LOG = LoggerFactory.getLogger(MultiserviceProxy.class);
-
-    private int port;
-    private String host;
-
-    public MultiserviceProxy(String host, int port) {
-        this.host = host;
-        this.port = port;
-        LOG.info("Multiservice at " + host + ":" + port);
-    }
-
-    public TText process(String text, List<String> services) throws Exception {
-        List<Map<String, String>> options = new ArrayList<>();
-        for (int i = 0; i < services.size(); i++)
-            options.add(new HashMap<>());
-        return process(text, "", services, options);
-    }
-
-    public TText process(String text, String title, List<String> services, List<Map<String, String>> options)
-            throws Exception {
-        TTransport transport = new TSocket(host, port);
-        ObjectRequest objectRequest = createRequest(text, title, services, options);
-
-        try {
-            transport.open();
-
-            TProtocol protocol = new TBinaryProtocol(transport);
-            Multiservice.Client client = new Multiservice.Client(protocol);
-
-            LOG.debug("Sending Multservice request...");
-            TText responseText = request(objectRequest, client);
-            LOG.debug("...done");
-
-            return responseText;
-
-        } catch (TException e) {
-            LOG.error("Error processing request:" + e);
-            throw new Exception(e);
-
-        } finally {
-            transport.close();
-        }
-    }
-
-    private TText request(ObjectRequest objectRequest, Multiservice.Client client) throws TException {
-
-        String requestToken = client.putObjectRequest(objectRequest);
-        while (true) {
-            RequestStatus status = client.getRequestStatus(requestToken);
-            if (RequestStatus.DONE.equals(status)) {
-                TText result = client.getResultObject(requestToken);
-                return result;
-            } else if (RequestStatus.FAILED.equals(status) || RequestStatus.DUMPED.equals(status)) {
-                try {
-                    MultiserviceException exception = client.getException(requestToken);
-                    throw exception;
-                } catch (TException e) {
-                    throw e;
-                }
-            }
-        }
-    }
-
-    private ObjectRequest createRequest(String textBody, String textTitle, List<String> services,
-                                        List<Map<String, String>> options) {
-        TText text = new TText();
-
-        TParagraph par = new TParagraph();
-        par.setText(textTitle);
-        text.addToParagraphs(par);
-
-        for (String p : textBody.split("\n\n")) {
-            par = new TParagraph();
-            par.setText(p);
-            text.addToParagraphs(par);
-        }
-
-        List<RequestPart> processingChain = new ArrayList<>();
-        int i = 0;
-        for (String serviceName : services)
-            processingChain.add(new RequestPart(serviceName, options.get(i++)));
-
-        return new ObjectRequest(text, processingChain);
-    }
-
-}
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcess.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcess.java
deleted file mode 100644
index 2922942..0000000
--- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcess.java
+++ /dev/null
@@ -1,97 +0,0 @@
-package pl.waw.ipipan.zil.summ.nicolas.train.multiservice;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
-import pl.waw.ipipan.zil.summ.pscapi.io.PSC_IO;
-import pl.waw.ipipan.zil.summ.pscapi.xml.Text;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.ObjectOutputStream;
-import java.util.Arrays;
-import java.util.List;
-
-public class NLPProcess {
-
-    private static final Logger LOG = LoggerFactory.getLogger(NLPProcess.class);
-
-    private static final List<String> SERVICES = Arrays.asList("Concraft", "Spejd", "Nerf", "MentionDetector",
-            "Bartek");
-    private static final int PORT = 20000;
-    private static final String HOST = "multiservice.nlp.ipipan.waw.pl";
-
-    private static final MultiserviceProxy MSPROXY = new MultiserviceProxy(HOST, PORT);
-
-    private static final String CORPUS_FILE_SUFFIX = ".xml";
-    private static final String OUTPUT_FILE_SUFFIX = ".thrift";
-
-    private NLPProcess() {
-    }
-
-    public static void main(String[] args) {
-        if (args.length != 2) {
-            LOG.error("Wrong usage! Try " + NLPProcess.class.getSimpleName() + " dirWithCorpusFiles targetDir");
-            return;
-        }
-        File corpusDir = new File(args[0]);
-        if (!corpusDir.isDirectory()) {
-            LOG.error("Corpus directory does not exist: {}", corpusDir);
-            return;
-        }
-        File targetDir = new File(args[1]);
-        if (!targetDir.isDirectory()) {
-            LOG.error("Target directory does not exist: {}", targetDir);
-            return;
-        }
-
-        int ok = 0;
-        int err = 0;
-        File[] files = corpusDir.listFiles(f -> f.getName().endsWith(CORPUS_FILE_SUFFIX));
-        if (files == null || files.length == 0) {
-            LOG.error("No corpus files found at: {}", corpusDir);
-            return;
-        }
-        Arrays.sort(files);
-        for (File file : files) {
-            try {
-                Text text = PSC_IO.readText(file);
-                File targetFile = new File(targetDir, file.getName().replaceFirst(CORPUS_FILE_SUFFIX + "$", OUTPUT_FILE_SUFFIX));
-                annotateNLP(text, targetFile);
-                ok++;
-            } catch (Exception e) {
-                err++;
-                LOG.error("Problem with text in " + file + ", " + e);
-            }
-        }
-        LOG.info("{} texts processed successfully.", ok);
-        LOG.info("{} texts with errors.", err);
-    }
-
-    private static void annotateNLP(Text text, File targetFile) throws Exception {
-        annotate(text.getBody(), targetFile);
-    }
-
-    private static void annotate(String body, File targetFile) throws Exception {
-        if (targetFile.exists()) {
-            LOG.debug("Skipping existing file..");
-            return;
-        }
-        LOG.info("Processing text into " + targetFile.getPath());
-        TText ttext = MSPROXY.process(body, SERVICES);
-        serialize(ttext, targetFile);
-    }
-
-    public static void serialize(TText ttext, File targetFile) throws IOException {
-        try (FileOutputStream fileOutputStream = new FileOutputStream(targetFile);
-             ObjectOutputStream oos = new ObjectOutputStream(fileOutputStream)) {
-            oos.writeObject(ttext);
-        }
-    }
-
-    public static TText annotate(String body) throws Exception {
-        return MSPROXY.process(body, SERVICES);
-    }
-
-}
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/preprocess/Main.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/preprocess/Main.java
new file mode 100644
index 0000000..738591d
--- /dev/null
+++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/preprocess/Main.java
@@ -0,0 +1,63 @@
+package pl.waw.ipipan.zil.summ.nicolas.train.preprocess;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import pl.waw.ipipan.zil.summ.nicolas.multiservice.Preprocessor;
+import pl.waw.ipipan.zil.summ.pscapi.io.PSC_IO;
+import pl.waw.ipipan.zil.summ.pscapi.xml.Text;
+
+import java.io.File;
+import java.util.Arrays;
+
+public class Main {
+
+    private static final Logger LOG = LoggerFactory.getLogger(Main.class);
+
+    private static final String CORPUS_FILE_SUFFIX = ".xml";
+    private static final String OUTPUT_FILE_SUFFIX = ".thrift";
+
+    private Main() {
+    }
+
+    public static void main(String[] args) {
+        if (args.length != 2) {
+            LOG.error("Wrong usage! Try " + Main.class.getSimpleName() + " dirWithCorpusFiles targetDir");
+            return;
+        }
+        File corpusDir = new File(args[0]);
+        if (!corpusDir.isDirectory()) {
+            LOG.error("Corpus directory does not exist: {}", corpusDir);
+            return;
+        }
+        File targetDir = new File(args[1]);
+        if (!targetDir.isDirectory()) {
+            LOG.error("Target directory does not exist: {}", targetDir);
+            return;
+        }
+
+        int ok = 0;
+        int err = 0;
+        File[] files = corpusDir.listFiles(f -> f.getName().endsWith(CORPUS_FILE_SUFFIX));
+        if (files == null || files.length == 0) {
+            LOG.error("No corpus files found at: {}", corpusDir);
+            return;
+        }
+        Arrays.sort(files);
+
+        Preprocessor processor = new Preprocessor();
+
+        for (File file : files) {
+            try {
+                Text text = PSC_IO.readText(file);
+                File targetFile = new File(targetDir, file.getName().replaceFirst(CORPUS_FILE_SUFFIX + "$", OUTPUT_FILE_SUFFIX));
+                processor.preprocessToFile(text.getBody(), targetFile);
+                ok++;
+            } catch (Exception e) {
+                err++;
+                LOG.error("Problem with text in " + file + ", " + e);
+            }
+        }
+        LOG.info("{} texts processed successfully.", ok);
+        LOG.info("{} texts with errors.", err);
+    }
+}
diff --git a/nicolas-train/src/test/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcessIT.java b/nicolas-train/src/test/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcessIT.java
deleted file mode 100644
index d66b72a..0000000
--- a/nicolas-train/src/test/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcessIT.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package pl.waw.ipipan.zil.summ.nicolas.train.multiservice;
-
-import com.google.common.collect.Lists;
-import org.junit.ClassRule;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
-import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
-
-import java.io.File;
-import java.util.List;
-import java.util.stream.Collectors;
-
-import static junit.framework.TestCase.assertEquals;
-
-public class NLPProcessIT {
-
-    @ClassRule
-    public static TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder();
-
-    @Test
-    public void shouldProcessSampleText() throws Exception {
-        String text = "Ala ma kota. Ala ma też psa.";
-        TText processed = NLPProcess.annotate(text);
-        List<String> ids = processed.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).map(TSentence::getId).collect(Collectors.toList());
-        assertEquals(Lists.newArrayList("s-2.1", "s-2.2"), ids);
-
-        File targetFile = TEMPORARY_FOLDER.newFile();
-        NLPProcess.serialize(processed, targetFile);
-    }
-}
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 115e398..a67adaf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -10,29 +10,34 @@
 
     <packaging>pom</packaging>
 
+
     <modules>
         <module>nicolas-lib</module>
         <module>nicolas-cli</module>
         <module>nicolas-model</module>
         <module>nicolas-train</module>
         <module>nicolas-common</module>
+        <module>nicolas-multiservice</module>
     </modules>
 
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
         <java.version.build>1.8</java.version.build>
 
         <pscapi.version>1.0</pscapi.version>
         <utils.version>1.0</utils.version>
 
         <commons-csv.version>1.4</commons-csv.version>
-        <guava.version>20.0</guava.version>
+        <guava.version>21.0</guava.version>
         <weka-dev.version>3.9.1</weka-dev.version>
         <commons-lang3.version>3.5</commons-lang3.version>
         <commons-io.version>2.5</commons-io.version>
         <slf4j-api.version>1.7.22</slf4j-api.version>
         <junit.version>4.12</junit.version>
         <zip4j.version>1.3.2</zip4j.version>
+        <mockito-core.version>2.7.1</mockito-core.version>
+        <jcommander.version>1.60</jcommander.version>
     </properties>
 
     <prerequisites>
@@ -76,6 +81,11 @@
                 <artifactId>nicolas-train</artifactId>
                 <version>${project.version}</version>
             </dependency>
+            <dependency>
+                <groupId>pl.waw.ipipan.zil.summ</groupId>
+                <artifactId>nicolas-multiservice</artifactId>
+                <version>${project.version}</version>
+            </dependency>
 
             <!-- internal -->
             <dependency>
@@ -126,6 +136,11 @@
                 <artifactId>zip4j</artifactId>
                 <version>${zip4j.version}</version>
             </dependency>
+            <dependency>
+                <groupId>com.beust</groupId>
+                <artifactId>jcommander</artifactId>
+                <version>${jcommander.version}</version>
+            </dependency>
 
             <!-- logging -->
             <dependency>
@@ -144,20 +159,204 @@
                 <groupId>junit</groupId>
                 <artifactId>junit</artifactId>
                 <version>${junit.version}</version>
+                <scope>test</scope>
+            </dependency>
+            <dependency>
+                <groupId>org.mockito</groupId>
+                <artifactId>mockito-core</artifactId>
+                <version>${mockito-core.version}</version>
+                <scope>test</scope>
             </dependency>
         </dependencies>
     </dependencyManagement>
 
+
     <build>
+        <pluginManagement>
+            <plugins>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-jar-plugin</artifactId>
+                    <version>3.0.2</version>
+                </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-resources-plugin</artifactId>
+                    <version>3.0.1</version>
+                </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-clean-plugin</artifactId>
+                    <version>3.0.0</version>
+                </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-site-plugin</artifactId>
+                    <version>3.5.1</version>
+                </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-install-plugin</artifactId>
+                    <version>2.5.2</version>
+                </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-deploy-plugin</artifactId>
+                    <version>2.8.2</version>
+                </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-assembly-plugin</artifactId>
+                    <version>2.6</version>
+                </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-compiler-plugin</artifactId>
+                    <version>3.5.1</version>
+                    <configuration>
+                        <source>${java.version.build}</source>
+                        <target>${java.version.build}</target>
+                    </configuration>
+                </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-surefire-plugin</artifactId>
+                    <version>2.19.1</version>
+                    <configuration>
+                        <!-- Sets the VM argument line used when unit tests are run. -->
+                        <argLine>${surefireArgLine}</argLine>
+                        <!-- Skips unit tests if the value of skip.unit.tests property is true -->
+                        <skipTests>${skip.unit.tests}</skipTests>
+                        <!-- Excludes integration tests when unit tests are run. -->
+                        <excludes>
+                            <exclude>**/IT*.java</exclude>
+                        </excludes>
+                    </configuration>
+                </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-failsafe-plugin</artifactId>
+                    <version>2.19.1</version>
+                    <executions>
+                        <execution>
+                            <id>integration-test</id>
+                            <goals>
+                                <goal>integration-test</goal>
+                                <goal>verify</goal>
+                            </goals>
+                            <configuration>
+                                <!-- Sets the VM argument line used when integration tests are run. -->
+                                <argLine>${failsafeArgLine}</argLine>
+                                <!--
+                                    Skips integration tests if the value of skip.integration.tests property
+                                    is true
+                                -->
+                                <skipTests>${skip.integration.tests}</skipTests>
+                            </configuration>
+                        </execution>
+                        <execution>
+                            <id>verify</id>
+                            <goals>
+                                <goal>verify</goal>
+                            </goals>
+                        </execution>
+                    </executions>
+                </plugin>
+                <plugin>
+                    <groupId>org.jacoco</groupId>
+                    <artifactId>jacoco-maven-plugin</artifactId>
+                    <version>0.7.8</version>
+                    <executions>
+                        <!--
+                            Prepares the property pointing to the JaCoCo runtime agent which
+                            is passed as VM argument when Maven the Surefire plugin is executed.
+                        -->
+                        <execution>
+                            <id>pre-unit-test</id>
+                            <goals>
+                                <goal>prepare-agent</goal>
+                            </goals>
+                            <configuration>
+                                <!-- Sets the path to the file which contains the execution data. -->
+                                <destFile>${project.build.directory}/jacoco.exec</destFile>
+                                <!--
+                                    Sets the name of the property containing the settings
+                                    for JaCoCo runtime agent.
+                                -->
+                                <propertyName>surefireArgLine</propertyName>
+                            </configuration>
+                        </execution>
+                        <!--
+                            Ensures that the code coverage report for unit tests is created after
+                            unit tests have been run.
+                        -->
+                        <execution>
+                            <id>post-unit-test</id>
+                            <phase>test</phase>
+                            <goals>
+                                <goal>report</goal>
+                            </goals>
+                            <configuration>
+                                <!-- Sets the path to the file which contains the execution data. -->
+                                <dataFile>${project.build.directory}/jacoco.exec</dataFile>
+                                <!-- Sets the output directory for the code coverage report. -->
+                                <outputDirectory>${project.reporting.outputDirectory}/jacoco-ut</outputDirectory>
+                            </configuration>
+                        </execution>
+
+                        <!--
+                            Prepares the property pointing to the JaCoCo runtime agent which
+                            is passed as VM argument when Maven the Failsafe plugin is executed.
+                        -->
+                        <execution>
+                            <id>pre-integration-test</id>
+                            <phase>pre-integration-test</phase>
+                            <goals>
+                                <goal>prepare-agent</goal>
+                            </goals>
+                            <configuration>
+                                <!-- Sets the path to the file which contains the execution data. -->
+                                <destFile>${project.build.directory}/jacoco-it.exec</destFile>
+                                <!--
+                                    Sets the name of the property containing the settings
+                                    for JaCoCo runtime agent.
+                                -->
+                                <propertyName>failsafeArgLine</propertyName>
+                            </configuration>
+                        </execution>
+                        <!--
+                            Ensures that the code coverage report for integration tests after
+                            integration tests have been run.
+                        -->
+                        <execution>
+                            <id>post-integration-test</id>
+                            <phase>post-integration-test</phase>
+                            <goals>
+                                <goal>report</goal>
+                            </goals>
+                            <configuration>
+                                <!-- Sets the path to the file which contains the execution data. -->
+                                <dataFile>${project.build.directory}/jacoco-it.exec</dataFile>
+                                <!-- Sets the output directory for the code coverage report. -->
+                                <outputDirectory>${project.reporting.outputDirectory}/jacoco-it</outputDirectory>
+                            </configuration>
+                        </execution>
+                    </executions>
+                </plugin>
+            </plugins>
+        </pluginManagement>
         <plugins>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-compiler-plugin</artifactId>
-                <version>3.5.1</version>
-                <configuration>
-                    <source>${java.version.build}</source>
-                    <target>${java.version.build}</target>
-                </configuration>
+                <artifactId>maven-failsafe-plugin</artifactId>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+            </plugin>
+            <plugin>
+                <groupId>org.jacoco</groupId>
+                <artifactId>jacoco-maven-plugin</artifactId>
             </plugin>
         </plugins>
     </build>
--
libgit2 0.22.2