diff --git a/nicolas-common/pom.xml b/nicolas-common/pom.xml new file mode 100644 index 0000000..6dbb4fe --- /dev/null +++ b/nicolas-common/pom.xml @@ -0,0 +1,39 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <artifactId>nicolas-container</artifactId> + <groupId>pl.waw.ipipan.zil.summ</groupId> + <version>1.0-SNAPSHOT</version> + </parent> + + <artifactId>nicolas-common</artifactId> + + <dependencies> + <!-- internal --> + <dependency> + <groupId>pl.waw.ipipan.zil.summ</groupId> + <artifactId>pscapi</artifactId> + </dependency> + <dependency> + <groupId>pl.waw.ipipan.zil.multiservice</groupId> + <artifactId>utils</artifactId> + </dependency> + + <!-- third party --> + <dependency> + <groupId>nz.ac.waikato.cms.weka</groupId> + <artifactId>weka-dev</artifactId> + </dependency> + + <!-- logging --> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + </dependency> + + </dependencies> + +</project> \ No newline at end of file diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Utils.java b/nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Utils.java index 6b0ff0a..b76153d 100644 --- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Utils.java +++ b/nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Utils.java @@ -1,75 +1,29 @@ -package pl.waw.ipipan.zil.summ.nicolas; +package pl.waw.ipipan.zil.summ.nicolas.common; -import com.google.common.base.Charsets; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; -import com.google.common.io.Files; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; import pl.waw.ipipan.zil.multiservice.thrift.types.TText; import pl.waw.ipipan.zil.multiservice.thrift.types.TToken; -import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor; -import pl.waw.ipipan.zil.summ.nicolas.mention.MentionScorer; -import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor; import weka.classifiers.Classifier; import weka.core.Attribute; -import weka.core.DenseInstance; -import weka.core.Instance; import weka.core.Instances; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.ObjectInputStream; +import java.io.*; import java.util.*; import java.util.function.Function; import java.util.stream.Collectors; -import static java.util.stream.Collectors.toList; - public class Utils { private static final Logger LOG = LoggerFactory.getLogger(Utils.class); private static final String DATASET_NAME = "Dataset"; - public static Map<TMention, Instance> extractInstancesFromMentions(TText preprocessedText, MentionFeatureExtractor featureExtractor) { - List<TSentence> sentences = preprocessedText.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList()); - Map<TMention, Map<Attribute, Double>> mention2features = featureExtractor.calculateFeatures(preprocessedText); - - LOG.info("Extracting " + featureExtractor.getAttributesList().size() + " features of each mention."); - Map<TMention, Instance> mention2instance = Maps.newHashMap(); - for (TMention tMention : sentences.stream().flatMap(s -> s.getMentions().stream()).collect(toList())) { - Instance instance = new DenseInstance(featureExtractor.getAttributesList().size()); - Map<Attribute, Double> mentionFeatures = mention2features.get(tMention); - for (Attribute attribute : featureExtractor.getAttributesList()) { - instance.setValue(attribute, mentionFeatures.get(attribute)); - } - mention2instance.put(tMention, instance); - } - return mention2instance; - } - - public static Map<TSentence, Instance> extractInstancesFromSentences(TText preprocessedText, SentenceFeatureExtractor featureExtractor, Set<TMention> goodMentions) { - List<TSentence> sentences = preprocessedText.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList()); - Map<TSentence, Map<Attribute, Double>> sentence2features = featureExtractor.calculateFeatures(preprocessedText, goodMentions); - - LOG.info("Extracting " + featureExtractor.getAttributesList().size() + " features of each sentence."); - Map<TSentence, Instance> sentence2instance = Maps.newHashMap(); - for (TSentence sentence : sentences) { - Instance instance = new DenseInstance(featureExtractor.getAttributesList().size()); - Map<Attribute, Double> sentenceFeatures = sentence2features.get(sentence); - for (Attribute attribute : featureExtractor.getAttributesList()) { - instance.setValue(attribute, sentenceFeatures.get(attribute)); - } - sentence2instance.put(sentence, instance); - } - return sentence2instance; - } - public static Instances createNewInstances(ArrayList<Attribute> attributesList) { Instances instances = new Instances(DATASET_NAME, attributesList, 0); instances.setClassIndex(0); @@ -97,7 +51,16 @@ public class Utils { public static TText loadThrifted(File originalFile) { - try (ObjectInputStream ois = new ObjectInputStream(new FileInputStream(originalFile))) { + try (FileInputStream inputStream = new FileInputStream(originalFile)) { + return loadThrifted(inputStream); + } catch (IOException e) { + LOG.error("Error reading serialized file: " + e); + return null; + } + } + + public static TText loadThrifted(InputStream stream) { + try (VersionIgnoringObjectInputStream ois = new VersionIgnoringObjectInputStream(stream)) { return (TText) ois.readObject(); } catch (ClassNotFoundException | IOException e) { LOG.error("Error reading serialized file: " + e); @@ -188,13 +151,5 @@ public class Utils { return sb.toString().trim(); } - public static Set<TMention> loadGoldGoodMentions(String id, TText text, boolean dev) throws IOException { - String optimalSummary = Files.toString(new File("src/main/resources/optimal_summaries/" + (dev ? "dev" : "test") + "/" + id + "_theoretic_ub_rouge_1.txt"), Charsets.UTF_8); - MentionScorer scorer = new MentionScorer(); - Map<TMention, Double> mention2score = scorer.calculateMentionScores(optimalSummary, text); - - mention2score.keySet().removeIf(tMention -> mention2score.get(tMention) != 1.0); - return mention2score.keySet(); - } } \ No newline at end of file diff --git a/nicolas-core/pom.xml b/nicolas-core/pom.xml index c2fa0a9..0047276 100644 --- a/nicolas-core/pom.xml +++ b/nicolas-core/pom.xml @@ -12,10 +12,14 @@ <artifactId>nicolas</artifactId> <dependencies> + <!-- project --> + <dependency> + <groupId>pl.waw.ipipan.zil.summ</groupId> + <artifactId>nicolas-common</artifactId> + </dependency> <dependency> <groupId>pl.waw.ipipan.zil.summ</groupId> <artifactId>nicolas-model</artifactId> - <scope>runtime</scope> </dependency> <dependency> diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java index c6573ba..96f3786 100644 --- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java +++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java @@ -6,6 +6,7 @@ import com.google.common.collect.Sets; import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; import pl.waw.ipipan.zil.multiservice.thrift.types.TText; +import pl.waw.ipipan.zil.summ.nicolas.common.Utils; import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor; import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel; import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor; @@ -53,7 +54,7 @@ public class Nicolas { List<TSentence> sents = thrifted.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList()); Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList()); - Map<TSentence, Instance> sentence2instance = Utils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions); + Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions); Map<TSentence, Double> sentence2score = Maps.newHashMap(); for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) { diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/ThriftUtils.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/ThriftUtils.java new file mode 100644 index 0000000..c0de645 --- /dev/null +++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/ThriftUtils.java @@ -0,0 +1,73 @@ +package pl.waw.ipipan.zil.summ.nicolas; + +import com.google.common.base.Charsets; +import com.google.common.collect.Maps; +import com.google.common.io.Files; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; +import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; +import pl.waw.ipipan.zil.multiservice.thrift.types.TText; +import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor; +import pl.waw.ipipan.zil.summ.nicolas.mention.MentionScorer; +import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor; +import weka.core.Attribute; +import weka.core.DenseInstance; +import weka.core.Instance; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static java.util.stream.Collectors.toList; + +public class ThriftUtils { + + private static final Logger LOG = LoggerFactory.getLogger(ThriftUtils.class); + + public static Set<TMention> loadGoldGoodMentions(String id, TText text, boolean dev) throws IOException { + String optimalSummary = Files.toString(new File("src/main/resources/optimal_summaries/" + (dev ? "dev" : "test") + "/" + id + "_theoretic_ub_rouge_1.txt"), Charsets.UTF_8); + + MentionScorer scorer = new MentionScorer(); + Map<TMention, Double> mention2score = scorer.calculateMentionScores(optimalSummary, text); + + mention2score.keySet().removeIf(tMention -> mention2score.get(tMention) != 1.0); + return mention2score.keySet(); + } + + public static Map<TMention, Instance> extractInstancesFromMentions(TText preprocessedText, MentionFeatureExtractor featureExtractor) { + List<TSentence> sentences = preprocessedText.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList()); + Map<TMention, Map<Attribute, Double>> mention2features = featureExtractor.calculateFeatures(preprocessedText); + + LOG.info("Extracting " + featureExtractor.getAttributesList().size() + " features of each mention."); + Map<TMention, Instance> mention2instance = Maps.newHashMap(); + for (TMention tMention : sentences.stream().flatMap(s -> s.getMentions().stream()).collect(toList())) { + Instance instance = new DenseInstance(featureExtractor.getAttributesList().size()); + Map<Attribute, Double> mentionFeatures = mention2features.get(tMention); + for (Attribute attribute : featureExtractor.getAttributesList()) { + instance.setValue(attribute, mentionFeatures.get(attribute)); + } + mention2instance.put(tMention, instance); + } + return mention2instance; + } + + public static Map<TSentence, Instance> extractInstancesFromSentences(TText preprocessedText, SentenceFeatureExtractor featureExtractor, Set<TMention> goodMentions) { + List<TSentence> sentences = preprocessedText.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList()); + Map<TSentence, Map<Attribute, Double>> sentence2features = featureExtractor.calculateFeatures(preprocessedText, goodMentions); + + LOG.info("Extracting " + featureExtractor.getAttributesList().size() + " features of each sentence."); + Map<TSentence, Instance> sentence2instance = Maps.newHashMap(); + for (TSentence sentence : sentences) { + Instance instance = new DenseInstance(featureExtractor.getAttributesList().size()); + Map<Attribute, Double> sentenceFeatures = sentence2features.get(sentence); + for (Attribute attribute : featureExtractor.getAttributesList()) { + instance.setValue(attribute, sentenceFeatures.get(attribute)); + } + sentence2instance.put(sentence, instance); + } + return sentence2instance; + } +} diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/apply/ApplyModel2.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/apply/ApplyModel2.java index f687d4a..2de5225 100644 --- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/apply/ApplyModel2.java +++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/apply/ApplyModel2.java @@ -9,7 +9,8 @@ import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; import pl.waw.ipipan.zil.multiservice.thrift.types.TText; import pl.waw.ipipan.zil.summ.nicolas.Constants; -import pl.waw.ipipan.zil.summ.nicolas.Utils; +import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils; +import pl.waw.ipipan.zil.summ.nicolas.common.Utils; import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor; import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel; import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor; @@ -85,7 +86,7 @@ public class ApplyModel2 { List<TSentence> sents = thrifted.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList()); Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList()); - Map<TSentence, Instance> sentence2instance = Utils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions); + Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions); Map<TSentence, Double> sentence2score = Maps.newHashMap(); for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) { diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java index 4dc2446..d774b0a 100644 --- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java +++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java @@ -3,7 +3,7 @@ package pl.waw.ipipan.zil.summ.nicolas.features; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import pl.waw.ipipan.zil.multiservice.thrift.types.*; -import pl.waw.ipipan.zil.summ.nicolas.Utils; +import pl.waw.ipipan.zil.summ.nicolas.common.Utils; import java.util.List; import java.util.Map; @@ -14,9 +14,7 @@ import java.util.stream.Collectors; import static java.util.stream.Collectors.toList; import static java.util.stream.Collectors.toMap; -/** - * Created by me2 on 04.04.16. - */ + public class FeatureHelper { private final List<TMention> mentions; diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java index 7e85be6..1ba0ef0 100644 --- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java +++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java @@ -5,7 +5,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; import pl.waw.ipipan.zil.multiservice.thrift.types.TText; -import pl.waw.ipipan.zil.summ.nicolas.Utils; +import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils; +import pl.waw.ipipan.zil.summ.nicolas.common.Utils; import weka.classifiers.Classifier; import weka.core.Instance; import weka.core.Instances; @@ -21,7 +22,7 @@ public class MentionModel { Set<TMention> goodMentions = Sets.newHashSet(); Instances instances = Utils.createNewInstances(featureExtractor.getAttributesList()); - Map<TMention, Instance> mention2instance = Utils.extractInstancesFromMentions(text, featureExtractor); + Map<TMention, Instance> mention2instance = ThriftUtils.extractInstancesFromMentions(text, featureExtractor); for (Map.Entry<TMention, Instance> entry : mention2instance.entrySet()) { Instance instance = entry.getValue(); instance.setDataset(instances); diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionScorer.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionScorer.java index a16edec..5fa8e7c 100644 --- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionScorer.java +++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionScorer.java @@ -6,9 +6,8 @@ import com.google.common.collect.Multiset; import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; import pl.waw.ipipan.zil.multiservice.thrift.types.TText; -import pl.waw.ipipan.zil.summ.nicolas.Utils; +import pl.waw.ipipan.zil.summ.nicolas.common.Utils; -import java.util.Collection; import java.util.List; import java.util.Map; import java.util.stream.Collectors; diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/PrepareTrainingData.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/PrepareTrainingData.java index 6aa49f6..3810574 100644 --- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/PrepareTrainingData.java +++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/PrepareTrainingData.java @@ -8,7 +8,8 @@ import org.slf4j.LoggerFactory; import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; import pl.waw.ipipan.zil.multiservice.thrift.types.TText; import pl.waw.ipipan.zil.summ.nicolas.Constants; -import pl.waw.ipipan.zil.summ.nicolas.Utils; +import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils; +import pl.waw.ipipan.zil.summ.nicolas.common.Utils; import weka.core.Instance; import weka.core.Instances; import weka.core.converters.ArffSaver; @@ -45,7 +46,7 @@ public class PrepareTrainingData { continue; Map<TMention, Double> mention2score = mentionScorer.calculateMentionScores(optimalSummary, preprocessedText); - Map<TMention, Instance> mention2instance = Utils.extractInstancesFromMentions(preprocessedText, featureExtractor); + Map<TMention, Instance> mention2instance = ThriftUtils.extractInstancesFromMentions(preprocessedText, featureExtractor); for (Map.Entry<TMention, Instance> entry : mention2instance.entrySet()) { TMention mention = entry.getKey(); Instance instance = entry.getValue(); diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/PrepareTrainingData.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/PrepareTrainingData.java index fb16339..f9ab453 100644 --- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/PrepareTrainingData.java +++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/PrepareTrainingData.java @@ -9,7 +9,8 @@ import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; import pl.waw.ipipan.zil.multiservice.thrift.types.TText; import pl.waw.ipipan.zil.summ.nicolas.Constants; -import pl.waw.ipipan.zil.summ.nicolas.Utils; +import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils; +import pl.waw.ipipan.zil.summ.nicolas.common.Utils; import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor; import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel; import weka.classifiers.Classifier; @@ -58,7 +59,7 @@ public class PrepareTrainingData { // Set<TMention> goodMentions // = Utils.loadGoldGoodMentions(textId, preprocessedText, true); - Map<TSentence, Instance> sentence2instance = Utils.extractInstancesFromSentences(preprocessedText, featureExtractor, goodMentions); + Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(preprocessedText, featureExtractor, goodMentions); for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) { TSentence sentence = entry.getKey(); Instance instance = entry.getValue(); diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceScorer.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceScorer.java index f96ea34..0ebb515 100644 --- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceScorer.java +++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceScorer.java @@ -6,7 +6,7 @@ import com.google.common.collect.Multiset; import pl.waw.ipipan.zil.multiservice.thrift.types.TParagraph; import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; import pl.waw.ipipan.zil.multiservice.thrift.types.TText; -import pl.waw.ipipan.zil.summ.nicolas.Utils; +import pl.waw.ipipan.zil.summ.nicolas.common.Utils; import java.util.List; import java.util.Map; diff --git a/nicolas-train/pom.xml b/nicolas-train/pom.xml index 0773393..62ae3a7 100644 --- a/nicolas-train/pom.xml +++ b/nicolas-train/pom.xml @@ -11,4 +11,21 @@ <artifactId>nicolas-train</artifactId> + <dependencies> + <!-- internal --> + <dependency> + <groupId>pl.waw.ipipan.zil.summ</groupId> + <artifactId>pscapi</artifactId> + </dependency> + <dependency> + <groupId>pl.waw.ipipan.zil.multiservice</groupId> + <artifactId>utils</artifactId> + </dependency> + + <!-- logging --> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + </dependency> + </dependencies> </project> \ No newline at end of file diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/Trainer.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/Trainer.java new file mode 100644 index 0000000..c4b4d7c --- /dev/null +++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/Trainer.java @@ -0,0 +1,8 @@ +package pl.waw.ipipan.zil.summ.nicolas.train; + +public class Trainer { + + public static void main(String[] args) { + + } +} diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/MultiserviceProxy.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/MultiserviceProxy.java new file mode 100644 index 0000000..2c4455a --- /dev/null +++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/MultiserviceProxy.java @@ -0,0 +1,110 @@ +package pl.waw.ipipan.zil.summ.nicolas.train.multiservice; + +import org.apache.thrift.TException; +import org.apache.thrift.protocol.TBinaryProtocol; +import org.apache.thrift.protocol.TProtocol; +import org.apache.thrift.transport.TSocket; +import org.apache.thrift.transport.TTransport; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import pl.waw.ipipan.zil.multiservice.thrift.Multiservice; +import pl.waw.ipipan.zil.multiservice.thrift.ObjectRequest; +import pl.waw.ipipan.zil.multiservice.thrift.RequestPart; +import pl.waw.ipipan.zil.multiservice.thrift.RequestStatus; +import pl.waw.ipipan.zil.multiservice.thrift.types.MultiserviceException; +import pl.waw.ipipan.zil.multiservice.thrift.types.TParagraph; +import pl.waw.ipipan.zil.multiservice.thrift.types.TText; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class MultiserviceProxy { + + private static final Logger LOG = LoggerFactory.getLogger(MultiserviceProxy.class); + + private int port; + private String host; + + public MultiserviceProxy(String host, int port) { + this.host = host; + this.port = port; + LOG.info("Multiservice at " + host + ":" + port); + } + + public TText process(String text, List<String> services) throws Exception { + List<Map<String, String>> options = new ArrayList<>(); + for (int i = 0; i < services.size(); i++) + options.add(new HashMap<>()); + return process(text, "", services, options); + } + + public TText process(String text, String title, List<String> services, List<Map<String, String>> options) + throws Exception { + TTransport transport = new TSocket(host, port); + ObjectRequest objectRequest = createRequest(text, title, services, options); + + try { + transport.open(); + + TProtocol protocol = new TBinaryProtocol(transport); + Multiservice.Client client = new Multiservice.Client(protocol); + + LOG.debug("Sending Multservice request..."); + TText responseText = request(objectRequest, client); + LOG.debug("...done"); + + return responseText; + + } catch (TException e) { + LOG.error("Error processing request:" + e); + throw new Exception(e); + + } finally { + transport.close(); + } + } + + private TText request(ObjectRequest objectRequest, Multiservice.Client client) throws TException { + + String requestToken = client.putObjectRequest(objectRequest); + while (true) { + RequestStatus status = client.getRequestStatus(requestToken); + if (RequestStatus.DONE.equals(status)) { + TText result = client.getResultObject(requestToken); + return result; + } else if (RequestStatus.FAILED.equals(status) || RequestStatus.DUMPED.equals(status)) { + try { + MultiserviceException exception = client.getException(requestToken); + throw exception; + } catch (TException e) { + throw e; + } + } + } + } + + private ObjectRequest createRequest(String textBody, String textTitle, List<String> services, + List<Map<String, String>> options) { + TText text = new TText(); + + TParagraph par = new TParagraph(); + par.setText(textTitle); + text.addToParagraphs(par); + + for (String p : textBody.split("\n\n")) { + par = new TParagraph(); + par.setText(p); + text.addToParagraphs(par); + } + + List<RequestPart> processingChain = new ArrayList<>(); + int i = 0; + for (String serviceName : services) + processingChain.add(new RequestPart(serviceName, options.get(i++))); + + return new ObjectRequest(text, processingChain); + } + +} diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcess.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcess.java new file mode 100644 index 0000000..bef8a7c --- /dev/null +++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/multiservice/NLPProcess.java @@ -0,0 +1,90 @@ +package pl.waw.ipipan.zil.summ.nicolas.train.multiservice; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import pl.waw.ipipan.zil.multiservice.thrift.types.TText; +import pl.waw.ipipan.zil.summ.pscapi.io.PSC_IO; +import pl.waw.ipipan.zil.summ.pscapi.xml.Text; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectOutputStream; +import java.util.Arrays; +import java.util.List; + +public class NLPProcess { + + private static final Logger LOG = LoggerFactory.getLogger(NLPProcess.class); + + private static final List<String> SERVICES = Arrays.asList("Concraft", "Spejd", "Nerf", "MentionDetector", + "Bartek"); + private static final int PORT = 20000; + private static final String HOST = "multiservice.nlp.ipipan.waw.pl"; + + private static final MultiserviceProxy MSPROXY = new MultiserviceProxy(HOST, PORT); + + private NLPProcess() { + } + + public static void main(String[] args) { + if (args.length != 2) { + LOG.error("Wrong usage! Try " + NLPProcess.class.getSimpleName() + " dirWithCorpusFiles targetDir"); + return; + } + File corpusDir = new File(args[0]); + if (!corpusDir.isDirectory()) { + LOG.error("Corpus directory does not exist: " + corpusDir); + return; + } + File targetDir = new File(args[1]); + if (!targetDir.isDirectory()) { + LOG.error("Target directory does not exist: " + targetDir); + return; + } + + int ok = 0; + int err = 0; + File[] files = corpusDir.listFiles(f -> f.getName().endsWith(".xml")); + Arrays.sort(files); + for (File file : files) { + try { + Text text = PSC_IO.readText(file); + File targetFile = new File(targetDir, file.getName().replaceFirst(".xml$", ".bin")); + annotateNLP(text, targetFile); + ok++; + } catch (Exception e) { + err++; + LOG.error("Problem with text in " + file + ", " + e); + } + } + LOG.info(ok + " texts processed successfully."); + LOG.info(err + " texts with errors."); + } + + private static void annotateNLP(Text text, File targetFile) throws Exception { + annotate(text.getBody(), targetFile); + } + + private static void annotate(String body, File targetFile) throws Exception { + if (targetFile.exists()) { + LOG.debug("Skipping existing file.."); + return; + } + LOG.info("Processing text into " + targetFile.getPath()); + TText ttext = MSPROXY.process(body, SERVICES); + serialize(ttext, targetFile); + } + + public static void serialize(TText ttext, File targetFile) throws IOException { + try (FileOutputStream fout = new FileOutputStream(targetFile); + ObjectOutputStream oos = new ObjectOutputStream(fout)) { + oos.writeObject(ttext); + } + } + + public static TText annotate(String body) throws Exception { + return MSPROXY.process(body, SERVICES); + } + +} diff --git a/nicolas-zero/pom.xml b/nicolas-zero/pom.xml index 26bf7dd..6f4d656 100644 --- a/nicolas-zero/pom.xml +++ b/nicolas-zero/pom.xml @@ -11,4 +11,34 @@ <artifactId>nicolas-zero</artifactId> + <dependencies> + <!-- project --> + <dependency> + <groupId>pl.waw.ipipan.zil.summ</groupId> + <artifactId>nicolas-common</artifactId> + </dependency> + + <!-- third party --> + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-csv</artifactId> + </dependency> + <dependency> + <groupId>commons-io</groupId> + <artifactId>commons-io</artifactId> + </dependency> + + <!-- logging --> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + </dependency> + + <!-- test --> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + </dependency> + </dependencies> + </project> \ No newline at end of file diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinder.java index cb3c5ef..cceb8ac 100644 --- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java +++ b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinder.java @@ -3,126 +3,61 @@ package pl.waw.ipipan.zil.summ.nicolas.zero; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; -import org.apache.commons.csv.CSVFormat; -import org.apache.commons.csv.CSVPrinter; -import org.apache.commons.csv.QuoteMode; -import org.apache.commons.io.IOUtils; import pl.waw.ipipan.zil.multiservice.thrift.types.*; -import pl.waw.ipipan.zil.summ.nicolas.Utils; -import java.io.File; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Set; -/** - * Created by me2 on 26.07.16. - */ -public class Zero { +public class CandidateFinder { - private static final String IDS_PATH = "summaries_dev"; - private static final String THRIFTED_PATH = "src/main/resources/preprocessed_full_texts/dev/"; + public List<ZeroSubjectCandidate> findZeroSubjectCandidates(TText text, Set<String> summarySentenceIds) { + List<ZeroSubjectCandidate> candidates = Lists.newArrayList(); - public static void main(String[] args) throws IOException { - - Map<String, TText> id2preprocessedText = Utils.loadPreprocessedTexts(THRIFTED_PATH); - Map<String, List<String>> id2sentIds = loadSentenceIds(IDS_PATH); - - int mentionCount = 0; - int mentionInNom = 0; - int mentionInNomSequential = 0; - - List<List<Object>> rows = Lists.newArrayList(); - for (Map.Entry<String, TText> entry : id2preprocessedText.entrySet()) { - String textId = entry.getKey(); -// System.out.println(id); - - TText text = entry.getValue(); - List<String> sentenceIds = id2sentIds.get(textId); -// System.out.println(sentenceIds); - - Map<String, Set<String>> mentionId2Cluster = Maps.newHashMap(); - for (TCoreference coreference : text.getCoreferences()) { - for (String mentionId : coreference.getMentionIds()) { - mentionId2Cluster.put(mentionId, Sets.newHashSet(coreference.getMentionIds())); - } + Map<String, Set<String>> mentionId2Cluster = Maps.newHashMap(); + for (TCoreference coreference : text.getCoreferences()) { + for (String mentionId : coreference.getMentionIds()) { + mentionId2Cluster.put(mentionId, Sets.newHashSet(coreference.getMentionIds())); } + } - Set<String> prevSentenceNominativeMentionIds = Sets.newHashSet(); - TSentence prevSentence = null; - for (TParagraph p : text.getParagraphs()) { - Map<TMention, String> tMentionStringMap = Utils.loadMention2Orth(p.getSentences()); - - for (TSentence sentence : p.getSentences()) { - if (!sentenceIds.contains(sentence.getId())) - continue; - Set<String> currentSentenceNominativeMentionIds = Sets.newHashSet(); - - Map<String, TToken> tokenId2Token = Maps.newHashMap(); - for (TToken t : sentence.getTokens()) - tokenId2Token.put(t.getId(), t); + Set<String> prevSentenceNominativeMentionIds = Sets.newHashSet(); + TSentence prevSentence = null; + for (TParagraph p : text.getParagraphs()) { + for (TSentence sentence : p.getSentences()) { + if (!summarySentenceIds.contains(sentence.getId())) + continue; + Set<String> currentSentenceNominativeMentionIds = Sets.newHashSet(); - for (TMention mention : sentence.getMentions()) { - mentionCount++; + Map<String, TToken> tokenId2Token = Maps.newHashMap(); + for (TToken t : sentence.getTokens()) + tokenId2Token.put(t.getId(), t); - for (String tokenId : mention.getHeadIds()) { - TInterpretation interp = tokenId2Token.get(tokenId).getChosenInterpretation(); - if (isInNominative(interp)) { - mentionInNom++; + for (TMention mention : sentence.getMentions()) { - currentSentenceNominativeMentionIds.add(mention.getId()); - if (mentionId2Cluster.get(mention.getId()).stream().anyMatch(prevSentenceNominativeMentionIds::contains)) { - mentionInNomSequential++; - System.out.println(tMentionStringMap.get(mention) - + "\n\t" + Utils.loadSentence2Orth(prevSentence) - + "\n\t" + Utils.loadSentence2Orth(sentence)); + for (String tokenId : mention.getHeadIds()) { + TInterpretation interp = tokenId2Token.get(tokenId).getChosenInterpretation(); + if (isInNominative(interp)) { - List<Object> row = Lists.newArrayList(); - row.add("C"); - row.add(textId); - row.add(tMentionStringMap.get(mention)); - row.add(Utils.loadSentence2Orth(prevSentence)); - row.add(Utils.loadSentence2Orth(sentence)); - rows.add(row); - } - break; + currentSentenceNominativeMentionIds.add(mention.getId()); + if (mentionId2Cluster.get(mention.getId()).stream().anyMatch(prevSentenceNominativeMentionIds::contains)) { + ZeroSubjectCandidate candidate = new ZeroSubjectCandidate(prevSentence, sentence, mention); + candidates.add(candidate); } + break; } } - - prevSentence = sentence; - prevSentenceNominativeMentionIds = currentSentenceNominativeMentionIds; } - } - } - - System.out.println(mentionCount + " mentions"); - System.out.println(mentionInNom + " mention in nom"); - System.out.println(mentionInNomSequential + " mention in nom with previous in nom"); - try (CSVPrinter csvPrinter = new CSVPrinter(new FileWriter("zeros.tsv"), CSVFormat.DEFAULT.withDelimiter('\t').withEscape('\\').withQuoteMode(QuoteMode.NONE).withQuote('"'))) { - for (List<Object> row : rows) { - csvPrinter.printRecord(row); + prevSentence = sentence; + prevSentenceNominativeMentionIds = currentSentenceNominativeMentionIds; } } - + return candidates; } private static boolean isInNominative(TInterpretation interp) { return interp.getCtag().equals("subst") && Arrays.stream(interp.getMsd().split(":")).anyMatch(t -> t.equals("nom")); } - - private static Map<String, List<String>> loadSentenceIds(String idsPath) throws IOException { - Map<String, List<String>> result = Maps.newHashMap(); - for (File f : new File(idsPath).listFiles()) { - String id = f.getName().split("_")[0]; - List<String> sentenceIds = IOUtils.readLines(new FileReader(f)); - result.put(id, sentenceIds); - } - return result; - } } diff --git a/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java new file mode 100644 index 0000000..1414f45 --- /dev/null +++ b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java @@ -0,0 +1,76 @@ +package pl.waw.ipipan.zil.summ.nicolas.zero; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.csv.QuoteMode; +import org.apache.commons.io.IOUtils; +import pl.waw.ipipan.zil.multiservice.thrift.types.TText; +import pl.waw.ipipan.zil.summ.nicolas.common.ThriftTextHelper; +import pl.waw.ipipan.zil.summ.nicolas.common.Utils; + +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class Zero { + + private static final String IDS_PATH = "corpora/summaries_dev"; + private static final String THRIFTED_PATH = "corpora/preprocessed_full_texts/dev/"; + + private Zero() { + } + + public static void main(String[] args) throws IOException { + + CandidateFinder candidateFinder = new CandidateFinder(); + + Map<String, TText> id2preprocessedText = Utils.loadPreprocessedTexts(THRIFTED_PATH); + Map<String, Set<String>> id2sentIds = loadSentenceIds(IDS_PATH); + + List<List<Object>> rows = Lists.newArrayList(); + for (Map.Entry<String, TText> entry : id2preprocessedText.entrySet()) { + String textId = entry.getKey(); + + TText text = entry.getValue(); + ThriftTextHelper thriftTextHelper = new ThriftTextHelper(text); + + Set<String> sentenceIds = id2sentIds.get(textId); + + List<ZeroSubjectCandidate> zeroSubjectCandidates = candidateFinder.findZeroSubjectCandidates(text, sentenceIds); + + for (ZeroSubjectCandidate candidate : zeroSubjectCandidates) { + List<Object> row = Lists.newArrayList(); + row.add("C"); + row.add(textId); + row.add(thriftTextHelper.getMentionText(candidate.getZeroCandidateMention())); + row.add(thriftTextHelper.getSentenceText(candidate.getPreviousSentence())); + row.add(thriftTextHelper.getSentenceText(candidate.getSentence())); + rows.add(row); + } + } + + try (CSVPrinter csvPrinter = new CSVPrinter(new FileWriter("zeros.tsv"), CSVFormat.DEFAULT.withDelimiter('\t').withEscape('\\').withQuoteMode(QuoteMode.NONE).withQuote('"'))) { + for (List<Object> row : rows) { + csvPrinter.printRecord(row); + } + } + + } + + private static Map<String, Set<String>> loadSentenceIds(String idsPath) throws IOException { + Map<String, Set<String>> result = Maps.newHashMap(); + for (File f : new File(idsPath).listFiles()) { + String id = f.getName().split("_")[0]; + List<String> sentenceIds = IOUtils.readLines(new FileReader(f)); + result.put(id, Sets.newHashSet(sentenceIds)); + } + return result; + } +} diff --git a/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectCandidate.java b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectCandidate.java new file mode 100644 index 0000000..6d0a76f --- /dev/null +++ b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectCandidate.java @@ -0,0 +1,29 @@ +package pl.waw.ipipan.zil.summ.nicolas.zero; + +import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; +import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; + +public class ZeroSubjectCandidate { + + private final TSentence previousSentence; + private final TSentence sentence; + private final TMention zeroCandidateMention; + + public ZeroSubjectCandidate(TSentence previousSentence, TSentence sentence, TMention zeroCandidateMention) { + this.previousSentence = previousSentence; + this.sentence = sentence; + this.zeroCandidateMention = zeroCandidateMention; + } + + public TSentence getPreviousSentence() { + return previousSentence; + } + + public TSentence getSentence() { + return sentence; + } + + public TMention getZeroCandidateMention() { + return zeroCandidateMention; + } +} diff --git a/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjector.java b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjector.java new file mode 100644 index 0000000..ca4f915 --- /dev/null +++ b/nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjector.java @@ -0,0 +1,5 @@ +package pl.waw.ipipan.zil.summ.nicolas.zero; + + +public class ZeroSubjectInjector { +} diff --git a/nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinderTest.java b/nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinderTest.java new file mode 100644 index 0000000..7948faa --- /dev/null +++ b/nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinderTest.java @@ -0,0 +1,61 @@ +package pl.waw.ipipan.zil.summ.nicolas.zero; + +import com.google.common.collect.Sets; +import org.apache.commons.io.IOUtils; +import org.junit.BeforeClass; +import org.junit.Test; +import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; +import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; +import pl.waw.ipipan.zil.summ.nicolas.common.ThriftTextHelper; +import pl.waw.ipipan.zil.summ.nicolas.common.Utils; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.List; +import java.util.Set; + +import static org.junit.Assert.assertEquals; + +public class CandidateFinderTest { + + private static final String SAMPLE_TEXT_PATH = "/pl/waw/ipipan/zil/summ/nicolas/zero/sample_serialized_text.bin"; + private static final String SAMPLE_TEXT_SUMMARY_IDS_PATH = "/pl/waw/ipipan/zil/summ/nicolas/zero/sample_summary_sentence_ids.txt"; + + private static CandidateFinder candidateFinder; + + @BeforeClass + public static void init() { + candidateFinder = new CandidateFinder(); + } + + @Test + public void shouldFindZeroSubjectCandidateInSampleText() throws Exception { + ThriftTextHelper sampleTextHelper = loadSampleTextHelper(); + Set<String> summarySentenceIds = loadSampleTextSummarySentenceIds(); + List<ZeroSubjectCandidate> candidates = candidateFinder.findZeroSubjectCandidates(sampleTextHelper.getText(), summarySentenceIds); + assertEquals(1, candidates.size()); + + ZeroSubjectCandidate zeroSubjectCandidate = candidates.get(0); + TSentence firstSentence = zeroSubjectCandidate.getPreviousSentence(); + TSentence secondSentence = zeroSubjectCandidate.getSentence(); + TMention zeroCandidate = zeroSubjectCandidate.getZeroCandidateMention(); + + assertEquals("Ala ma kota.", sampleTextHelper.getSentenceText(firstSentence)); + assertEquals("Ala ma też psa.", sampleTextHelper.getSentenceText(secondSentence)); + assertEquals("Ala", sampleTextHelper.getMentionText(zeroCandidate)); + } + + private Set<String> loadSampleTextSummarySentenceIds() throws IOException { + try (InputStream stream = CandidateFinderTest.class.getResourceAsStream(SAMPLE_TEXT_SUMMARY_IDS_PATH); + InputStreamReader reader = new InputStreamReader(stream)) { + return Sets.newHashSet(IOUtils.readLines(reader)); + } + } + + private ThriftTextHelper loadSampleTextHelper() throws IOException { + try (InputStream stream = CandidateFinderTest.class.getResourceAsStream(SAMPLE_TEXT_PATH)) { + return new ThriftTextHelper(Utils.loadThrifted(stream)); + } + } +} \ No newline at end of file diff --git a/nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjectorTest.java b/nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjectorTest.java new file mode 100644 index 0000000..e98bc27 --- /dev/null +++ b/nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjectorTest.java @@ -0,0 +1,11 @@ +package pl.waw.ipipan.zil.summ.nicolas.zero; + +import org.junit.Test; + +public class ZeroSubjectInjectorTest { + + @Test + public void shouldInit() throws Exception { + ZeroSubjectInjector injector = new ZeroSubjectInjector(); + } +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index 6484758..3a2ba87 100644 --- a/pom.xml +++ b/pom.xml @@ -16,6 +16,7 @@ <module>nicolas-model</module> <module>nicolas-train</module> <module>nicolas-zero</module> + <module>nicolas-common</module> </modules> <properties> @@ -30,6 +31,8 @@ <weka-dev.version>3.9.0</weka-dev.version> <commons-lang3.version>3.5</commons-lang3.version> <commons-io.version>2.5</commons-io.version> + <slf4j-api.version>1.7.12</slf4j-api.version> + <junit.version>4.12</junit.version> </properties> <prerequisites> @@ -46,13 +49,20 @@ <dependencyManagement> <dependencies> + <!-- project --> <dependency> <groupId>pl.waw.ipipan.zil.summ</groupId> <artifactId>nicolas-model</artifactId> <version>${project.version}</version> <scope>runtime</scope> </dependency> + <dependency> + <groupId>pl.waw.ipipan.zil.summ</groupId> + <artifactId>nicolas-common</artifactId> + <version>${project.version}</version> + </dependency> + <!-- internal --> <dependency> <groupId>pl.waw.ipipan.zil.summ</groupId> <artifactId>pscapi</artifactId> @@ -64,6 +74,7 @@ <version>${utils.version}</version> </dependency> + <!-- third party --> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-csv</artifactId> @@ -89,6 +100,20 @@ <artifactId>commons-io</artifactId> <version>${commons-io.version}</version> </dependency> + + <!-- logging --> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + <version>${slf4j-api.version}</version> + </dependency> + + <!-- test --> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>${junit.version}</version> + </dependency> </dependencies> </dependencyManagement>