Commit 91b27b24a9fb0d6427debc133c923e3188f9a768

Authored by Mateusz Kopeć
1 parent e058b3c2

zeros corpus wip

Showing 32 changed files with 401 additions and 86 deletions
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Constants.java renamed to nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Constants.java
1   -package pl.waw.ipipan.zil.summ.nicolas;
  1 +package pl.waw.ipipan.zil.summ.nicolas.common;
2 2  
  3 +import com.google.common.base.Charsets;
3 4 import weka.classifiers.Classifier;
  5 +import weka.classifiers.functions.Logistic;
4 6 import weka.classifiers.trees.RandomForest;
5 7  
  8 +import java.nio.charset.Charset;
  9 +
6 10  
7 11 public class Constants {
8 12  
9 13 public static final String MENTIONS_MODEL_PATH = "mentions_model.bin";
10 14 public static final String SENTENCES_MODEL_PATH = "sentences_model.bin";
  15 + public static final String ZERO_MODEL_PATH = "zeros_model.bin";
  16 +
11 17 public static final String MENTIONS_DATASET_PATH = "mentions_train.arff";
12 18 public static final String SENTENCES_DATASET_PATH = "sentences_train.arff";
  19 + public static final String ZERO_DATASET_PATH = "zeros_train.arff";
  20 +
  21 + public static final Charset ENCODING = Charsets.UTF_8;
13 22  
14 23 private Constants() {
15 24 }
16 25  
17   - public static Classifier getClassifier() {
  26 + public static Classifier getMentionClassifier() {
18 27 RandomForest classifier = new RandomForest();
19 28 classifier.setNumIterations(250);
20 29 classifier.setSeed(0);
... ... @@ -22,7 +31,6 @@ public class Constants {
22 31 return classifier;
23 32 }
24 33  
25   -
26 34 public static Classifier getSentencesClassifier() {
27 35 RandomForest classifier = new RandomForest();
28 36 classifier.setNumIterations(250);
... ... @@ -30,4 +38,9 @@ public class Constants {
30 38 classifier.setNumExecutionSlots(8);
31 39 return classifier;
32 40 }
  41 +
  42 + public static Classifier getZerosClassifier() {
  43 + Logistic classifier = new Logistic();
  44 + return classifier;
  45 + }
33 46 }
... ...
nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/Utils.java
... ... @@ -101,7 +101,7 @@ public class Utils {
101 101 STOPWORDS.addAll(Lists.newArrayList("i", "się", "to", "co"));
102 102 }
103 103  
104   - public static Map<TMention, String> loadMention2Orth(List<TSentence> sents) {
  104 + public static Map<TMention, String> loadMention2Orth(List<TSentence> sents, boolean discardStopwords) {
105 105 Map<TMention, String> mention2orth = Maps.newHashMap();
106 106 for (TSentence s : sents) {
107 107 Map<String, TToken> tokId2tok = s.getTokens().stream().collect(Collectors.toMap(TToken::getId, Function.identity()));
... ... @@ -110,7 +110,7 @@ public class Utils {
110 110 StringBuffer mentionOrth = new StringBuffer();
111 111 for (String tokId : m.getChildIds()) {
112 112 TToken token = tokId2tok.get(tokId);
113   - if (STOPWORDS.contains(token.getChosenInterpretation().getBase().toLowerCase())) {
  113 + if (discardStopwords && STOPWORDS.contains(token.getChosenInterpretation().getBase().toLowerCase())) {
114 114 continue;
115 115 }
116 116  
... ... @@ -142,8 +142,16 @@ public class Utils {
142 142 }
143 143  
144 144 public static String loadSentence2Orth(TSentence sentence) {
  145 + return loadSentence2Orth(sentence, Sets.newHashSet());
  146 + }
  147 +
  148 + public static String loadSentence2Orth(TSentence sentence, Set<String> tokenIdsToSkip) {
145 149 StringBuilder sb = new StringBuilder();
146 150 for (TToken token : sentence.getTokens()) {
  151 + if (tokenIdsToSkip.contains(token.getId())) {
  152 + System.out.println("Skipping " + token.getOrth() + " in sentence: " + loadSentence2Orth(sentence));
  153 + continue;
  154 + }
147 155 if (!token.isNoPrecedingSpace())
148 156 sb.append(" ");
149 157 sb.append(token.getOrth());
... ...
nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/VersionIgnoringObjectInputStream.java 0 → 100644
  1 +package pl.waw.ipipan.zil.summ.nicolas.common;
  2 +
  3 +import java.io.IOException;
  4 +import java.io.InputStream;
  5 +import java.io.ObjectInputStream;
  6 +import java.io.ObjectStreamClass;
  7 +
  8 +
  9 +public class VersionIgnoringObjectInputStream extends ObjectInputStream {
  10 +
  11 + public VersionIgnoringObjectInputStream(InputStream in) throws IOException {
  12 + super(in);
  13 + }
  14 +
  15 + protected ObjectStreamClass readClassDescriptor() throws IOException, ClassNotFoundException {
  16 + ObjectStreamClass resultClassDescriptor = super.readClassDescriptor(); // initially streams descriptor
  17 + Class localClass; // the class in the local JVM that this descriptor represents.
  18 + try {
  19 + localClass = Class.forName(resultClassDescriptor.getName());
  20 + } catch (ClassNotFoundException e) {
  21 + return resultClassDescriptor;
  22 + }
  23 + ObjectStreamClass localClassDescriptor = ObjectStreamClass.lookup(localClass);
  24 + if (localClassDescriptor != null) { // only if class implements serializable
  25 + final long localSUID = localClassDescriptor.getSerialVersionUID();
  26 + final long streamSUID = resultClassDescriptor.getSerialVersionUID();
  27 + if (streamSUID != localSUID) { // check for serialVersionUID mismatch.
  28 + resultClassDescriptor = localClassDescriptor; // Use local class descriptor for deserialization
  29 + }
  30 + }
  31 + return resultClassDescriptor;
  32 + }
  33 +}
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureExtractor.java renamed to nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/features/FeatureExtractor.java
1   -package pl.waw.ipipan.zil.summ.nicolas.features;
  1 +package pl.waw.ipipan.zil.summ.nicolas.common.features;
2 2  
3 3 import com.google.common.collect.*;
4 4 import org.slf4j.Logger;
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java renamed to nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/features/FeatureHelper.java
1   -package pl.waw.ipipan.zil.summ.nicolas.features;
  1 +package pl.waw.ipipan.zil.summ.nicolas.common.features;
2 2  
3 3 import com.google.common.collect.Maps;
4 4 import com.google.common.collect.Sets;
... ... @@ -17,6 +17,8 @@ import static java.util.stream.Collectors.toMap;
17 17  
18 18 public class FeatureHelper {
19 19  
  20 + private final TText text;
  21 +
20 22 private final List<TMention> mentions;
21 23 private final Map<String, TMention> mentionId2mention;
22 24 private final Map<TCoreference, List<TMention>> coref2mentions = Maps.newHashMap();
... ... @@ -37,6 +39,8 @@ public class FeatureHelper {
37 39  
38 40  
39 41 public FeatureHelper(TText preprocessedText) {
  42 + text = preprocessedText;
  43 +
40 44 mentions = preprocessedText.getParagraphs().stream()
41 45 .flatMap(p -> p.getSentences().stream())
42 46 .flatMap(s -> s.getMentions().stream()).collect(Collectors.toList());
... ... @@ -55,7 +59,7 @@ public class FeatureHelper {
55 59 int sentIdx = 0;
56 60 int mentionIdx = 0;
57 61 for (TParagraph par : preprocessedText.getParagraphs()) {
58   - Map<TMention, String> m2o = Utils.loadMention2Orth(par.getSentences());
  62 + Map<TMention, String> m2o = Utils.loadMention2Orth(par.getSentences(), false);
59 63 mention2Orth.putAll(m2o);
60 64 Map<TMention, String> m2b = Utils.loadMention2Base(par.getSentences());
61 65 mention2Base.putAll(m2b);
... ... @@ -182,4 +186,18 @@ public class FeatureHelper {
182 186 public TCoreference getMentionCluster(TMention tMention) {
183 187 return this.mention2coref.get(tMention);
184 188 }
  189 +
  190 + public String getSentenceOrth(TSentence sentence) {
  191 + StringBuilder sb = new StringBuilder();
  192 + for (TToken token : sentence.getTokens()) {
  193 + if (!token.isNoPrecedingSpace())
  194 + sb.append(" ");
  195 + sb.append(token.getOrth());
  196 + }
  197 + return sb.toString().trim();
  198 + }
  199 +
  200 + public TText getText() {
  201 + return text;
  202 + }
185 203 }
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/Interpretation.java renamed to nicolas-common/src/main/java/pl/waw/ipipan/zil/summ/nicolas/common/features/Interpretation.java
1   -package pl.waw.ipipan.zil.summ.nicolas.features;
  1 +package pl.waw.ipipan.zil.summ.nicolas.common.features;
2 2  
3 3 import pl.waw.ipipan.zil.multiservice.thrift.types.TInterpretation;
4 4  
... ...
nicolas-core/pom.xml
... ... @@ -21,6 +21,10 @@
21 21 <groupId>pl.waw.ipipan.zil.summ</groupId>
22 22 <artifactId>nicolas-model</artifactId>
23 23 </dependency>
  24 + <dependency>
  25 + <groupId>pl.waw.ipipan.zil.summ</groupId>
  26 + <artifactId>nicolas-zero</artifactId>
  27 + </dependency>
24 28  
25 29 <dependency>
26 30 <groupId>pl.waw.ipipan.zil.summ</groupId>
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java
... ... @@ -6,6 +6,7 @@ import com.google.common.collect.Sets;
6 6 import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
7 7 import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
8 8 import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
  9 +import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
9 10 import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
10 11 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor;
11 12 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel;
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/apply/ApplyModel2.java
... ... @@ -8,12 +8,13 @@ import org.slf4j.LoggerFactory;
8 8 import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
9 9 import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
10 10 import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
11   -import pl.waw.ipipan.zil.summ.nicolas.Constants;
12 11 import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils;
  12 +import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
13 13 import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
14 14 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor;
15 15 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel;
16 16 import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor;
  17 +import pl.waw.ipipan.zil.summ.nicolas.zero.ZeroSubjectInjector;
17 18 import weka.classifiers.Classifier;
18 19 import weka.core.Instance;
19 20 import weka.core.Instances;
... ... @@ -29,8 +30,8 @@ public class ApplyModel2 {
29 30  
30 31 private static final Logger LOG = LoggerFactory.getLogger(ApplyModel2.class);
31 32  
32   - private static final String TEST_PREPROCESSED_DATA_PATH = "src/main/resources/preprocessed_full_texts/test";
33   - private static final String TARGET_DIR = "summaries";
  33 + private static final String TEST_PREPROCESSED_DATA_PATH = "corpora/preprocessed_full_texts/test";
  34 + private static final String TARGET_DIR = "corpora/summaries";
34 35  
35 36 public static void main(String[] args) throws Exception {
36 37 Classifier mentionClassifier = Utils.loadClassifier(Constants.MENTIONS_MODEL_PATH);
... ... @@ -39,6 +40,8 @@ public class ApplyModel2 {
39 40 Classifier sentenceClassifier = Utils.loadClassifier(Constants.SENTENCES_MODEL_PATH);
40 41 SentenceFeatureExtractor sentenceFeatureExtractor = new SentenceFeatureExtractor();
41 42  
  43 + ZeroSubjectInjector zeroSubjectInjector = new ZeroSubjectInjector();
  44 +
42 45 Map<String, TText> id2preprocessedText = Utils.loadPreprocessedTexts(TEST_PREPROCESSED_DATA_PATH);
43 46 int i = 1;
44 47 double avgSize = 0;
... ... @@ -49,10 +52,10 @@ public class ApplyModel2 {
49 52 = MentionModel.detectGoodMentions(mentionClassifier, featureExtractor, text);
50 53  
51 54 int targetSize = calculateTargetSize(text);
52   - String summary = calculateSummary(text, goodMentions, targetSize, sentenceClassifier, sentenceFeatureExtractor);
  55 + String summary = calculateSummary(text, goodMentions, targetSize, sentenceClassifier, sentenceFeatureExtractor, zeroSubjectInjector);
53 56 int size = Utils.tokenize(summary).size();
54 57 avgSize += size;
55   - try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(TARGET_DIR, entry.getKey() + "_emily3.txt")))) {
  58 + try (BufferedWriter bw = new BufferedWriter(new FileWriter(new File(TARGET_DIR, entry.getKey() + "_emily4.txt")))) {
56 59 bw.append(summary);
57 60 }
58 61  
... ... @@ -71,12 +74,14 @@ public class ApplyModel2 {
71 74 return (int) (0.2 * tokenCount);
72 75 }
73 76  
74   - private static String calculateSummary(TText thrifted, Set<TMention> goodMentions, int targetSize, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception {
  77 + private static String calculateSummary(TText thrifted, Set<TMention> goodMentions, int targetSize, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor, ZeroSubjectInjector zeroSubjectInjector) throws Exception {
75 78 List<TSentence> selectedSentences = selectSummarySentences(thrifted, goodMentions, targetSize, sentenceClassifier, sentenceFeatureExtractor);
76 79  
77   - StringBuffer sb = new StringBuffer();
  80 + Set<String> zeroSubjectTokenIds = zeroSubjectInjector.findZeroSubjectTokenIds(thrifted, selectedSentences);
  81 +
  82 + StringBuilder sb = new StringBuilder();
78 83 for (TSentence sent : selectedSentences) {
79   - sb.append(" " + Utils.loadSentence2Orth(sent));
  84 + sb.append(" " + Utils.loadSentence2Orth(sent, zeroSubjectTokenIds));
80 85 }
81 86 return sb.toString().trim();
82 87 }
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionFeatureExtractor.java
... ... @@ -2,9 +2,9 @@ package pl.waw.ipipan.zil.summ.nicolas.mention;
2 2  
3 3 import com.google.common.collect.*;
4 4 import pl.waw.ipipan.zil.multiservice.thrift.types.*;
5   -import pl.waw.ipipan.zil.summ.nicolas.features.FeatureExtractor;
6   -import pl.waw.ipipan.zil.summ.nicolas.features.FeatureHelper;
7   -import pl.waw.ipipan.zil.summ.nicolas.features.Interpretation;
  5 +import pl.waw.ipipan.zil.summ.nicolas.common.features.FeatureExtractor;
  6 +import pl.waw.ipipan.zil.summ.nicolas.common.features.FeatureHelper;
  7 +import pl.waw.ipipan.zil.summ.nicolas.common.features.Interpretation;
8 8 import weka.core.Attribute;
9 9  
10 10 import java.io.File;
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionScorer.java
... ... @@ -19,7 +19,7 @@ public class MentionScorer {
19 19 Multiset<String> tokenCounts = HashMultiset.create(Utils.tokenize(optimalSummary.toLowerCase()));
20 20  
21 21 List<TSentence> sentences = text.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(Collectors.toList());
22   - Map<TMention, String> mention2Orth = Utils.loadMention2Orth(sentences);
  22 + Map<TMention, String> mention2Orth = Utils.loadMention2Orth(sentences, true);
23 23  
24 24 return booleanTokenIntersection(mention2Orth, tokenCounts);
25 25 }
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/PrepareTrainingData.java
... ... @@ -7,7 +7,7 @@ import org.slf4j.Logger;
7 7 import org.slf4j.LoggerFactory;
8 8 import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
9 9 import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
10   -import pl.waw.ipipan.zil.summ.nicolas.Constants;
  10 +import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
11 11 import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils;
12 12 import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
13 13 import weka.core.Instance;
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/TrainModel.java
... ... @@ -3,7 +3,7 @@ package pl.waw.ipipan.zil.summ.nicolas.mention;
3 3 import org.apache.commons.lang3.time.StopWatch;
4 4 import org.slf4j.Logger;
5 5 import org.slf4j.LoggerFactory;
6   -import pl.waw.ipipan.zil.summ.nicolas.Constants;
  6 +import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
7 7 import weka.classifiers.Classifier;
8 8 import weka.core.Instances;
9 9 import weka.core.converters.ArffLoader;
... ... @@ -28,7 +28,7 @@ public class TrainModel {
28 28 StopWatch watch = new StopWatch();
29 29 watch.start();
30 30  
31   - Classifier classifier = Constants.getClassifier();
  31 + Classifier classifier = Constants.getMentionClassifier();
32 32  
33 33 LOG.info("Building classifier...");
34 34 classifier.buildClassifier(instances);
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/test/Crossvalidate.java
... ... @@ -3,7 +3,7 @@ package pl.waw.ipipan.zil.summ.nicolas.mention.test;
3 3 import org.apache.commons.lang3.time.StopWatch;
4 4 import org.slf4j.Logger;
5 5 import org.slf4j.LoggerFactory;
6   -import pl.waw.ipipan.zil.summ.nicolas.Constants;
  6 +import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
7 7 import weka.classifiers.Classifier;
8 8 import weka.classifiers.evaluation.Evaluation;
9 9 import weka.core.Instances;
... ... @@ -32,7 +32,7 @@ public class Crossvalidate {
32 32 StopWatch watch = new StopWatch();
33 33 watch.start();
34 34  
35   - Classifier tree = Constants.getClassifier();
  35 + Classifier tree = Constants.getMentionClassifier();
36 36  
37 37 Evaluation eval = new Evaluation(instances);
38 38 eval.crossValidateModel(tree, instances, 10, new Random(1));
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/test/Validate.java
... ... @@ -3,7 +3,7 @@ package pl.waw.ipipan.zil.summ.nicolas.mention.test;
3 3 import org.apache.commons.lang3.time.StopWatch;
4 4 import org.slf4j.Logger;
5 5 import org.slf4j.LoggerFactory;
6   -import pl.waw.ipipan.zil.summ.nicolas.Constants;
  6 +import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
7 7 import weka.classifiers.Classifier;
8 8 import weka.classifiers.evaluation.Evaluation;
9 9 import weka.core.Instances;
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/PrepareTrainingData.java
... ... @@ -8,7 +8,7 @@ import org.slf4j.LoggerFactory;
8 8 import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
9 9 import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
10 10 import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
11   -import pl.waw.ipipan.zil.summ.nicolas.Constants;
  11 +import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
12 12 import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils;
13 13 import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
14 14 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor;
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceFeatureExtractor.java
... ... @@ -2,8 +2,8 @@ package pl.waw.ipipan.zil.summ.nicolas.sentence;
2 2  
3 3 import com.google.common.collect.Maps;
4 4 import pl.waw.ipipan.zil.multiservice.thrift.types.*;
5   -import pl.waw.ipipan.zil.summ.nicolas.features.FeatureExtractor;
6   -import pl.waw.ipipan.zil.summ.nicolas.features.FeatureHelper;
  5 +import pl.waw.ipipan.zil.summ.nicolas.common.features.FeatureExtractor;
  6 +import pl.waw.ipipan.zil.summ.nicolas.common.features.FeatureHelper;
7 7 import weka.core.Attribute;
8 8  
9 9 import java.util.List;
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceScorer.java
... ... @@ -3,6 +3,7 @@ package pl.waw.ipipan.zil.summ.nicolas.sentence;
3 3 import com.google.common.collect.HashMultiset;
4 4 import com.google.common.collect.Maps;
5 5 import com.google.common.collect.Multiset;
  6 +import com.google.common.collect.Sets;
6 7 import pl.waw.ipipan.zil.multiservice.thrift.types.TParagraph;
7 8 import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
8 9 import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/TrainModel.java
... ... @@ -3,7 +3,7 @@ package pl.waw.ipipan.zil.summ.nicolas.sentence;
3 3 import org.apache.commons.lang3.time.StopWatch;
4 4 import org.slf4j.Logger;
5 5 import org.slf4j.LoggerFactory;
6   -import pl.waw.ipipan.zil.summ.nicolas.Constants;
  6 +import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
7 7 import weka.classifiers.Classifier;
8 8 import weka.core.Instances;
9 9 import weka.core.converters.ArffLoader;
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/test/Crossvalidate.java
... ... @@ -3,7 +3,7 @@ package pl.waw.ipipan.zil.summ.nicolas.sentence.test;
3 3 import org.apache.commons.lang3.time.StopWatch;
4 4 import org.slf4j.Logger;
5 5 import org.slf4j.LoggerFactory;
6   -import pl.waw.ipipan.zil.summ.nicolas.Constants;
  6 +import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
7 7 import weka.classifiers.Classifier;
8 8 import weka.classifiers.evaluation.Evaluation;
9 9 import weka.core.Instances;
... ...
nicolas-zero/pom.xml
... ... @@ -27,6 +27,10 @@
27 27 <groupId>commons-io</groupId>
28 28 <artifactId>commons-io</artifactId>
29 29 </dependency>
  30 + <dependency>
  31 + <groupId>org.apache.commons</groupId>
  32 + <artifactId>commons-lang3</artifactId>
  33 + </dependency>
30 34  
31 35 <!-- logging -->
32 36 <dependency>
... ...
nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinder.java
... ... @@ -12,7 +12,10 @@ import java.util.Set;
12 12  
13 13 public class CandidateFinder {
14 14  
15   - public List<ZeroSubjectCandidate> findZeroSubjectCandidates(TText text, Set<String> summarySentenceIds) {
  15 + private CandidateFinder() {
  16 + }
  17 +
  18 + public static List<ZeroSubjectCandidate> findZeroSubjectCandidates(TText text, Set<String> summarySentenceIds) {
16 19 List<ZeroSubjectCandidate> candidates = Lists.newArrayList();
17 20  
18 21 Map<String, Set<String>> mentionId2Cluster = Maps.newHashMap();
... ...
nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroFeatureExtractor.java 0 → 100644
  1 +package pl.waw.ipipan.zil.summ.nicolas.zero;
  2 +
  3 +import com.google.common.collect.Lists;
  4 +import com.google.common.collect.Maps;
  5 +import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
  6 +import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
  7 +import pl.waw.ipipan.zil.summ.nicolas.common.features.FeatureExtractor;
  8 +import pl.waw.ipipan.zil.summ.nicolas.common.features.FeatureHelper;
  9 +import weka.core.Attribute;
  10 +
  11 +import java.util.List;
  12 +import java.util.Map;
  13 +
  14 +
  15 +public class ZeroFeatureExtractor extends FeatureExtractor {
  16 +
  17 + public ZeroFeatureExtractor() {
  18 +
  19 + for (String prefix : new String[]{"antecedent", "candidate"}) {
  20 + addNumericAttribute(prefix + "_index_in_sent");
  21 + addNumericAttribute(prefix + "_token_count");
  22 + addBinaryAttribute(prefix + "_is_zero");
  23 + addBinaryAttribute(prefix + "_is_pronoun");
  24 + addBinaryAttribute(prefix + "_is_named");
  25 + }
  26 +
  27 + addBinaryAttribute("pair_equal_orth");
  28 +
  29 + addNominalAttribute("score", Lists.newArrayList("bad", "good"));
  30 + fillSortedAttributes("score");
  31 + }
  32 +
  33 + public Map<ZeroSubjectCandidate, Map<Attribute, Double>> calculateFeatures(List<ZeroSubjectCandidate> candidates, TText text) {
  34 + Map<ZeroSubjectCandidate, Map<Attribute, Double>> result = Maps.newHashMap();
  35 +
  36 + FeatureHelper helper = new FeatureHelper(text);
  37 + for (ZeroSubjectCandidate candidate : candidates) {
  38 + Map<Attribute, Double> candidateFeatures = calculateFeatures(candidate, helper);
  39 + result.put(candidate, candidateFeatures);
  40 + }
  41 +
  42 + return result;
  43 + }
  44 +
  45 + private Map<Attribute, Double> calculateFeatures(ZeroSubjectCandidate candidate, FeatureHelper helper) {
  46 +
  47 + Map<Attribute, Double> candidateFeatures = Maps.newHashMap();
  48 + candidateFeatures.put(getAttributeByName("score"), weka.core.Utils.missingValue());
  49 +
  50 + TMention mention = candidate.getZeroCandidateMention();
  51 + TMention antecedent = candidate.getPreviousSentence().getMentions().stream().filter(ante -> helper.getCoreferentMentions(mention).contains(ante)).findFirst().get();
  52 +
  53 + addMentionFeatures(helper, candidateFeatures, mention, "candidate");
  54 + addMentionFeatures(helper, candidateFeatures, antecedent, "antecedent");
  55 +
  56 + candidateFeatures.put(getAttributeByName("pair_equal_orth"), toBinary(helper.getMentionOrth(mention).equalsIgnoreCase(helper.getMentionOrth(antecedent))));
  57 +
  58 + return candidateFeatures;
  59 + }
  60 +
  61 + private void addMentionFeatures(FeatureHelper helper, Map<Attribute, Double> candidateFeatures, TMention mention, String attributePrefix) {
  62 + candidateFeatures.put(getAttributeByName(attributePrefix + "_index_in_sent"), (double) helper.getMentionIndexInSent(mention));
  63 + candidateFeatures.put(getAttributeByName(attributePrefix + "_token_count"), (double) mention.getChildIdsSize());
  64 + candidateFeatures.put(getAttributeByName(attributePrefix + "_is_zero"), toBinary(mention.isZeroSubject()));
  65 + candidateFeatures.put(getAttributeByName(attributePrefix + "_is_pronoun"), toBinary(helper.getMentionHeadToken(mention).getChosenInterpretation().getCtag().matches("ppron.*")));
  66 + candidateFeatures.put(getAttributeByName(attributePrefix + "_is_named"), toBinary(helper.isMentionNamedEntity(mention)));
  67 + }
  68 +
  69 +}
... ...
nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjector.java
1 1 package pl.waw.ipipan.zil.summ.nicolas.zero;
2 2  
  3 +import com.google.common.collect.Sets;
  4 +import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
  5 +import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
  6 +import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
  7 +import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
  8 +import pl.waw.ipipan.zil.summ.nicolas.zero.train.TrainingDataExtractor;
  9 +import weka.classifiers.Classifier;
  10 +import weka.core.Instance;
  11 +import weka.core.Instances;
  12 +
  13 +import java.io.IOException;
  14 +import java.util.List;
  15 +import java.util.Map;
  16 +import java.util.Set;
  17 +import java.util.stream.Collectors;
3 18  
4 19 public class ZeroSubjectInjector {
  20 +
  21 + private final ZeroFeatureExtractor featureExtractor;
  22 + private final Classifier classifier;
  23 + private final Instances instances;
  24 +
  25 + public ZeroSubjectInjector() throws IOException, ClassNotFoundException {
  26 + classifier = Utils.loadClassifier(Constants.ZERO_MODEL_PATH);
  27 + featureExtractor = new ZeroFeatureExtractor();
  28 + instances = Utils.createNewInstances(featureExtractor.getAttributesList());
  29 + }
  30 +
  31 + public Set<String> findZeroSubjectTokenIds(TText text, List<TSentence> selectedSentences) throws Exception {
  32 + Set<String> summarySentenceIds = selectedSentences.stream().map(TSentence::getId).collect(Collectors.toSet());
  33 + List<ZeroSubjectCandidate> zeroSubjectCandidates = CandidateFinder.findZeroSubjectCandidates(text, summarySentenceIds);
  34 + Map<ZeroSubjectCandidate, Instance> candidate2instance =
  35 + TrainingDataExtractor.extractInstancesFromZeroCandidates(zeroSubjectCandidates, text, featureExtractor);
  36 +
  37 + Set<String> result = Sets.newHashSet();
  38 + for (Map.Entry<ZeroSubjectCandidate, Instance> entry : candidate2instance.entrySet()) {
  39 + ZeroSubjectCandidate candidate = entry.getKey();
  40 + Instance instance = entry.getValue();
  41 + instance.setDataset(instances);
  42 + instance.setClassMissing();
  43 + boolean good = classifier.classifyInstance(instance) > 0.5;
  44 + if (good) {
  45 + result.addAll(candidate.getZeroCandidateMention().getChildIds());
  46 + }
  47 + }
  48 + return result;
  49 + }
  50 +
5 51 }
... ...
nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/train/TrainModel.java 0 → 100644
  1 +package pl.waw.ipipan.zil.summ.nicolas.zero.train;
  2 +
  3 +import org.apache.commons.lang3.time.StopWatch;
  4 +import org.slf4j.Logger;
  5 +import org.slf4j.LoggerFactory;
  6 +import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
  7 +import weka.classifiers.Classifier;
  8 +import weka.core.Instances;
  9 +import weka.core.converters.ArffLoader;
  10 +
  11 +import java.io.File;
  12 +import java.io.FileOutputStream;
  13 +import java.io.ObjectOutputStream;
  14 +
  15 +
  16 +public class TrainModel {
  17 +
  18 + private static final Logger LOG = LoggerFactory.getLogger(TrainModel.class);
  19 +
  20 + private TrainModel() {
  21 + }
  22 +
  23 + public static void main(String[] args) throws Exception {
  24 +
  25 + ArffLoader loader = new ArffLoader();
  26 + loader.setFile(new File(Constants.ZERO_DATASET_PATH));
  27 + Instances instances = loader.getDataSet();
  28 + instances.setClassIndex(0);
  29 + LOG.info(instances.size() + " instances loaded.");
  30 + LOG.info(instances.numAttributes() + " attributes for each instance.");
  31 +
  32 + StopWatch watch = new StopWatch();
  33 + watch.start();
  34 +
  35 + Classifier classifier = Constants.getZerosClassifier();
  36 +
  37 + LOG.info("Building classifier...");
  38 + classifier.buildClassifier(instances);
  39 + LOG.info("...done.");
  40 +
  41 + try (ObjectOutputStream oos = new ObjectOutputStream(
  42 + new FileOutputStream(Constants.ZERO_MODEL_PATH))) {
  43 + oos.writeObject(classifier);
  44 + }
  45 +
  46 + watch.stop();
  47 + LOG.info("Elapsed time: " + watch);
  48 +
  49 + LOG.info(classifier.toString());
  50 + }
  51 +}
... ...
nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/Zero.java renamed to nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/train/TrainingDataExtractor.java
1   -package pl.waw.ipipan.zil.summ.nicolas.zero;
  1 +package pl.waw.ipipan.zil.summ.nicolas.zero.train;
2 2  
3   -import com.google.common.collect.Lists;
4 3 import com.google.common.collect.Maps;
5 4 import com.google.common.collect.Sets;
6   -import org.apache.commons.csv.CSVFormat;
7   -import org.apache.commons.csv.CSVPrinter;
8   -import org.apache.commons.csv.QuoteMode;
9 5 import org.apache.commons.io.IOUtils;
10 6 import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
11   -import pl.waw.ipipan.zil.summ.nicolas.common.ThriftTextHelper;
  7 +import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
12 8 import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
  9 +import pl.waw.ipipan.zil.summ.nicolas.common.features.FeatureHelper;
  10 +import pl.waw.ipipan.zil.summ.nicolas.zero.CandidateFinder;
  11 +import pl.waw.ipipan.zil.summ.nicolas.zero.ZeroFeatureExtractor;
  12 +import pl.waw.ipipan.zil.summ.nicolas.zero.ZeroSubjectCandidate;
  13 +import weka.core.Attribute;
  14 +import weka.core.DenseInstance;
  15 +import weka.core.Instance;
  16 +import weka.core.Instances;
  17 +import weka.core.converters.ArffSaver;
13 18  
14 19 import java.io.File;
15 20 import java.io.FileReader;
16   -import java.io.FileWriter;
17 21 import java.io.IOException;
18 22 import java.util.List;
19 23 import java.util.Map;
20 24 import java.util.Set;
21 25  
22   -public class Zero {
  26 +public class TrainingDataExtractor {
23 27  
24 28 private static final String IDS_PATH = "corpora/summaries_dev";
25 29 private static final String THRIFTED_PATH = "corpora/preprocessed_full_texts/dev/";
  30 + private static final String GOLD_ZEROS_PATH = "/zeros.tsv";
26 31  
27   - private Zero() {
  32 + private TrainingDataExtractor() {
28 33 }
29 34  
30 35 public static void main(String[] args) throws IOException {
31 36  
32   - CandidateFinder candidateFinder = new CandidateFinder();
33   -
34 37 Map<String, TText> id2preprocessedText = Utils.loadPreprocessedTexts(THRIFTED_PATH);
35 38 Map<String, Set<String>> id2sentIds = loadSentenceIds(IDS_PATH);
36 39  
37   - List<List<Object>> rows = Lists.newArrayList();
  40 + ZeroScorer zeroScorer = new ZeroScorer(GOLD_ZEROS_PATH);
  41 + ZeroFeatureExtractor featureExtractor = new ZeroFeatureExtractor();
  42 +
  43 + Instances instances = Utils.createNewInstances(featureExtractor.getAttributesList());
  44 +
38 45 for (Map.Entry<String, TText> entry : id2preprocessedText.entrySet()) {
39 46 String textId = entry.getKey();
40 47  
41 48 TText text = entry.getValue();
42   - ThriftTextHelper thriftTextHelper = new ThriftTextHelper(text);
43   -
44 49 Set<String> sentenceIds = id2sentIds.get(textId);
  50 + FeatureHelper featureHelper = new FeatureHelper(text);
45 51  
46   - List<ZeroSubjectCandidate> zeroSubjectCandidates = candidateFinder.findZeroSubjectCandidates(text, sentenceIds);
  52 + List<ZeroSubjectCandidate> zeroSubjectCandidates = CandidateFinder.findZeroSubjectCandidates(text, sentenceIds);
  53 + Map<ZeroSubjectCandidate, Instance> candidate2instance = extractInstancesFromZeroCandidates(zeroSubjectCandidates, text, featureExtractor);
47 54  
48   - for (ZeroSubjectCandidate candidate : zeroSubjectCandidates) {
49   - List<Object> row = Lists.newArrayList();
50   - row.add("C");
51   - row.add(textId);
52   - row.add(thriftTextHelper.getMentionText(candidate.getZeroCandidateMention()));
53   - row.add(thriftTextHelper.getSentenceText(candidate.getPreviousSentence()));
54   - row.add(thriftTextHelper.getSentenceText(candidate.getSentence()));
55   - rows.add(row);
  55 + for (Map.Entry<ZeroSubjectCandidate, Instance> entry2 : candidate2instance.entrySet()) {
  56 + boolean good = zeroScorer.isValidCandidate(entry2.getKey(), featureHelper);
  57 + Instance instance = entry2.getValue();
  58 + instance.setDataset(instances);
  59 + instance.setClassValue(good ? 1 : 0);
  60 + instances.add(instance);
56 61 }
57 62 }
58 63  
59   - try (CSVPrinter csvPrinter = new CSVPrinter(new FileWriter("zeros.tsv"), CSVFormat.DEFAULT.withDelimiter('\t').withEscape('\\').withQuoteMode(QuoteMode.NONE).withQuote('"'))) {
60   - for (List<Object> row : rows) {
61   - csvPrinter.printRecord(row);
  64 + saveInstancesToFile(instances);
  65 + }
  66 +
  67 + public static Map<ZeroSubjectCandidate, Instance> extractInstancesFromZeroCandidates(List<ZeroSubjectCandidate> candidates, TText text, ZeroFeatureExtractor featureExtractor) {
  68 + Map<ZeroSubjectCandidate, Map<Attribute, Double>> candidate2features = featureExtractor.calculateFeatures(candidates, text);
  69 + Map<ZeroSubjectCandidate, Instance> candidate2instance = Maps.newHashMap();
  70 + for (Map.Entry<ZeroSubjectCandidate, Map<Attribute, Double>> entry : candidate2features.entrySet()) {
  71 + Instance instance = new DenseInstance(featureExtractor.getAttributesList().size());
  72 + Map<Attribute, Double> sentenceFeatures = entry.getValue();
  73 + for (Attribute attribute : featureExtractor.getAttributesList()) {
  74 + instance.setValue(attribute, sentenceFeatures.get(attribute));
62 75 }
  76 + candidate2instance.put(entry.getKey(), instance);
63 77 }
  78 + return candidate2instance;
  79 + }
64 80  
  81 + private static void saveInstancesToFile(Instances instances) throws IOException {
  82 + ArffSaver saver = new ArffSaver();
  83 + saver.setInstances(instances);
  84 + saver.setFile(new File(Constants.ZERO_DATASET_PATH));
  85 + saver.writeBatch();
65 86 }
66 87  
67 88 private static Map<String, Set<String>> loadSentenceIds(String idsPath) throws IOException {
... ...
nicolas-zero/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/train/ZeroScorer.java 0 → 100644
  1 +package pl.waw.ipipan.zil.summ.nicolas.zero.train;
  2 +
  3 +import com.google.common.collect.Maps;
  4 +import org.apache.commons.csv.CSVFormat;
  5 +import org.apache.commons.csv.CSVParser;
  6 +import org.apache.commons.csv.CSVRecord;
  7 +import org.apache.commons.csv.QuoteMode;
  8 +import pl.waw.ipipan.zil.summ.nicolas.common.Constants;
  9 +import pl.waw.ipipan.zil.summ.nicolas.common.features.FeatureHelper;
  10 +import pl.waw.ipipan.zil.summ.nicolas.zero.ZeroSubjectCandidate;
  11 +
  12 +import java.io.IOException;
  13 +import java.io.InputStream;
  14 +import java.io.InputStreamReader;
  15 +import java.util.List;
  16 +import java.util.Map;
  17 +
  18 +public class ZeroScorer {
  19 +
  20 + private static final char DELIMITER = '\t';
  21 +
  22 + private final Map<String, Boolean> candidateEncoding2Decision = Maps.newHashMap();
  23 +
  24 + public ZeroScorer(String goldZerosPath) throws IOException {
  25 + try (InputStream stream = ZeroScorer.class.getResourceAsStream(goldZerosPath);
  26 + InputStreamReader reader = new InputStreamReader(stream, Constants.ENCODING);
  27 + CSVParser parser = new CSVParser(reader, CSVFormat.DEFAULT.withDelimiter(DELIMITER).withEscape('|').withQuoteMode(QuoteMode.NONE).withQuote('~'))) {
  28 + List<CSVRecord> records = parser.getRecords();
  29 + for (CSVRecord record : records) {
  30 + candidateEncoding2Decision.put(encode(record.get(2), record.get(3), record.get(4)), record.get(0).equalsIgnoreCase("C"));
  31 + }
  32 + }
  33 + }
  34 +
  35 + private String encode(String mentionOrth, String firstSentenceOrth, String secondSentenceOrth) {
  36 + return mentionOrth + DELIMITER + firstSentenceOrth + DELIMITER + secondSentenceOrth;
  37 + }
  38 +
  39 + private String encode(ZeroSubjectCandidate candidate, FeatureHelper helper) {
  40 + String mentionOrth = helper.getMentionOrth(candidate.getZeroCandidateMention());
  41 + String firstSentenceOrth = helper.getSentenceOrth(candidate.getPreviousSentence());
  42 + String secondSentenceOrth = helper.getSentenceOrth(candidate.getSentence());
  43 + return encode(mentionOrth, firstSentenceOrth, secondSentenceOrth);
  44 + }
  45 +
  46 + public boolean isValidCandidate(ZeroSubjectCandidate candidate, FeatureHelper helper) {
  47 + return candidateEncoding2Decision.get(encode(candidate, helper));
  48 + }
  49 +
  50 +}
... ...
nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/CandidateFinderTest.java
... ... @@ -2,12 +2,11 @@ package pl.waw.ipipan.zil.summ.nicolas.zero;
2 2  
3 3 import com.google.common.collect.Sets;
4 4 import org.apache.commons.io.IOUtils;
5   -import org.junit.BeforeClass;
6 5 import org.junit.Test;
7 6 import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
8 7 import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
9   -import pl.waw.ipipan.zil.summ.nicolas.common.ThriftTextHelper;
10 8 import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
  9 +import pl.waw.ipipan.zil.summ.nicolas.common.features.FeatureHelper;
11 10  
12 11 import java.io.IOException;
13 12 import java.io.InputStream;
... ... @@ -22,18 +21,11 @@ public class CandidateFinderTest {
22 21 private static final String SAMPLE_TEXT_PATH = "/pl/waw/ipipan/zil/summ/nicolas/zero/sample_serialized_text.bin";
23 22 private static final String SAMPLE_TEXT_SUMMARY_IDS_PATH = "/pl/waw/ipipan/zil/summ/nicolas/zero/sample_summary_sentence_ids.txt";
24 23  
25   - private static CandidateFinder candidateFinder;
26   -
27   - @BeforeClass
28   - public static void init() {
29   - candidateFinder = new CandidateFinder();
30   - }
31   -
32 24 @Test
33 25 public void shouldFindZeroSubjectCandidateInSampleText() throws Exception {
34   - ThriftTextHelper sampleTextHelper = loadSampleTextHelper();
  26 + FeatureHelper sampleTextHelper = loadSampleTextHelper();
35 27 Set<String> summarySentenceIds = loadSampleTextSummarySentenceIds();
36   - List<ZeroSubjectCandidate> candidates = candidateFinder.findZeroSubjectCandidates(sampleTextHelper.getText(), summarySentenceIds);
  28 + List<ZeroSubjectCandidate> candidates = CandidateFinder.findZeroSubjectCandidates(sampleTextHelper.getText(), summarySentenceIds);
37 29 assertEquals(1, candidates.size());
38 30  
39 31 ZeroSubjectCandidate zeroSubjectCandidate = candidates.get(0);
... ... @@ -41,9 +33,9 @@ public class CandidateFinderTest {
41 33 TSentence secondSentence = zeroSubjectCandidate.getSentence();
42 34 TMention zeroCandidate = zeroSubjectCandidate.getZeroCandidateMention();
43 35  
44   - assertEquals("Ala ma kota.", sampleTextHelper.getSentenceText(firstSentence));
45   - assertEquals("Ala ma też psa.", sampleTextHelper.getSentenceText(secondSentence));
46   - assertEquals("Ala", sampleTextHelper.getMentionText(zeroCandidate));
  36 + assertEquals("Ala ma kota.", sampleTextHelper.getSentenceOrth(firstSentence));
  37 + assertEquals("Ala ma też psa.", sampleTextHelper.getSentenceOrth(secondSentence));
  38 + assertEquals("Ala", sampleTextHelper.getMentionOrth(zeroCandidate));
47 39 }
48 40  
49 41 private Set<String> loadSampleTextSummarySentenceIds() throws IOException {
... ... @@ -53,9 +45,9 @@ public class CandidateFinderTest {
53 45 }
54 46 }
55 47  
56   - private ThriftTextHelper loadSampleTextHelper() throws IOException {
  48 + private FeatureHelper loadSampleTextHelper() throws IOException {
57 49 try (InputStream stream = CandidateFinderTest.class.getResourceAsStream(SAMPLE_TEXT_PATH)) {
58   - return new ThriftTextHelper(Utils.loadThrifted(stream));
  50 + return new FeatureHelper(Utils.loadThrifted(stream));
59 51 }
60 52 }
61 53 }
62 54 \ No newline at end of file
... ...
nicolas-zero/src/test/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroSubjectInjectorTest.java deleted
1   -package pl.waw.ipipan.zil.summ.nicolas.zero;
2   -
3   -import org.junit.Test;
4   -
5   -public class ZeroSubjectInjectorTest {
6   -
7   - @Test
8   - public void shouldInit() throws Exception {
9   - ZeroSubjectInjector injector = new ZeroSubjectInjector();
10   - }
11   -}
12 0 \ No newline at end of file
nicolas-zero/src/test/resources/pl/waw/ipipan/zil/summ/nicolas/zero/sample_serialized_text.bin 0 → 100644
No preview for this file type
nicolas-zero/src/test/resources/pl/waw/ipipan/zil/summ/nicolas/zero/sample_summary_sentence_ids.txt 0 → 100644
  1 +s-2.1
  2 +s-2.2
... ...
... ... @@ -61,6 +61,11 @@
61 61 <artifactId>nicolas-common</artifactId>
62 62 <version>${project.version}</version>
63 63 </dependency>
  64 + <dependency>
  65 + <groupId>pl.waw.ipipan.zil.summ</groupId>
  66 + <artifactId>nicolas-zero</artifactId>
  67 + <version>${project.version}</version>
  68 + </dependency>
64 69  
65 70 <!-- internal -->
66 71 <dependency>
... ...