Commit f04fcb1a5b52c5ef5aa8b1f2e18384e6348f6910

Authored by Mateusz Kopeć
1 parent 4ced813d

small refactor

nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java
1 1 package pl.waw.ipipan.zil.summ.nicolas;
2 2  
3 3 import com.google.common.collect.Lists;
4   -import com.google.common.collect.Maps;
5 4 import com.google.common.collect.Sets;
6 5 import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
7 6 import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
... ... @@ -11,9 +10,8 @@ import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
11 10 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor;
12 11 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel;
13 12 import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor;
  13 +import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceModel;
14 14 import weka.classifiers.Classifier;
15   -import weka.core.Instance;
16   -import weka.core.Instances;
17 15  
18 16 import java.io.IOException;
19 17 import java.util.*;
... ... @@ -38,11 +36,11 @@ public class Nicolas {
38 36 public String summarizeThrift(TText text, int targetTokenCount) throws Exception {
39 37 Set<TMention> goodMentions
40 38 = MentionModel.detectGoodMentions(mentionClassifier, featureExtractor, text);
41   - return calculateSummary(text, goodMentions, targetTokenCount, sentenceClassifier, sentenceFeatureExtractor);
  39 + return calculateSummary(text, goodMentions, targetTokenCount);
42 40 }
43 41  
44   - private static String calculateSummary(TText thrifted, Set<TMention> goodMentions, int targetSize, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception {
45   - List<TSentence> selectedSentences = selectSummarySentences(thrifted, goodMentions, targetSize, sentenceClassifier, sentenceFeatureExtractor);
  42 + private String calculateSummary(TText thrifted, Set<TMention> goodMentions, int targetSize) throws Exception {
  43 + List<TSentence> selectedSentences = selectSummarySentences(thrifted, goodMentions, targetSize);
46 44  
47 45 StringBuilder sb = new StringBuilder();
48 46 for (TSentence sent : selectedSentences) {
... ... @@ -51,19 +49,10 @@ public class Nicolas {
51 49 return sb.toString().trim();
52 50 }
53 51  
54   - private static List<TSentence> selectSummarySentences(TText thrifted, Set<TMention> goodMentions, int targetSize, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception {
  52 + private List<TSentence> selectSummarySentences(TText thrifted, Set<TMention> goodMentions, int targetSize) throws Exception {
55 53 List<TSentence> sents = thrifted.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList());
56 54  
57   - Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList());
58   - Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions);
59   -
60   - Map<TSentence, Double> sentence2score = Maps.newHashMap();
61   - for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) {
62   - Instance instance = entry.getValue();
63   - instance.setDataset(instances);
64   - double score = sentenceClassifier.classifyInstance(instance);
65   - sentence2score.put(entry.getKey(), score);
66   - }
  55 + Map<TSentence, Double> sentence2score = SentenceModel.scoreSentences(thrifted, goodMentions, sentenceClassifier, sentenceFeatureExtractor);
67 56  
68 57 List<TSentence> sortedSents = Lists.newArrayList(sents);
69 58 Collections.sort(sortedSents, Comparator.comparing(sentence2score::get).reversed());
... ... @@ -86,4 +75,5 @@ public class Nicolas {
86 75 }
87 76 return selectedSentences;
88 77 }
  78 +
89 79 }
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java
... ... @@ -18,6 +18,9 @@ public class MentionModel {
18 18  
19 19 private static final Logger LOG = LoggerFactory.getLogger(MentionModel.class);
20 20  
  21 + private MentionModel() {
  22 + }
  23 +
21 24 public static Set<TMention> detectGoodMentions(Classifier classifier, MentionFeatureExtractor featureExtractor, TText text) throws Exception {
22 25 Set<TMention> goodMentions = Sets.newHashSet();
23 26  
... ... @@ -31,7 +34,7 @@ public class MentionModel {
31 34 if (good)
32 35 goodMentions.add(entry.getKey());
33 36 }
34   - LOG.info("\t" + goodMentions.size() + "\t" + mention2instance.size());
  37 + LOG.info("Classified " + goodMentions.size() + " mentions as good.");
35 38 return goodMentions;
36 39 }
37 40  
... ...
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceModel.java 0 → 100644
  1 +package pl.waw.ipipan.zil.summ.nicolas.sentence;
  2 +
  3 +import com.google.common.collect.Maps;
  4 +import org.slf4j.Logger;
  5 +import org.slf4j.LoggerFactory;
  6 +import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
  7 +import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
  8 +import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
  9 +import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils;
  10 +import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
  11 +import weka.classifiers.Classifier;
  12 +import weka.core.Instance;
  13 +import weka.core.Instances;
  14 +
  15 +import java.util.Map;
  16 +import java.util.Set;
  17 +
  18 +public class SentenceModel {
  19 +
  20 + private static final Logger LOG = LoggerFactory.getLogger(SentenceModel.class);
  21 +
  22 + private SentenceModel() {
  23 + }
  24 +
  25 + public static Map<TSentence, Double> scoreSentences(TText thrifted, Set<TMention> goodMentions, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception {
  26 + Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList());
  27 + Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions);
  28 +
  29 + Map<TSentence, Double> sentence2score = Maps.newHashMap();
  30 + for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) {
  31 + Instance instance = entry.getValue();
  32 + instance.setDataset(instances);
  33 + double score = sentenceClassifier.classifyInstance(instance);
  34 + sentence2score.put(entry.getKey(), score);
  35 + }
  36 + LOG.info("Scored " + sentence2score.size() + " sentences.");
  37 +
  38 + return sentence2score;
  39 + }
  40 +}
... ...