Commit f04fcb1a5b52c5ef5aa8b1f2e18384e6348f6910

Authored by Mateusz Kopeć
1 parent 4ced813d

small refactor

nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java
1 package pl.waw.ipipan.zil.summ.nicolas; 1 package pl.waw.ipipan.zil.summ.nicolas;
2 2
3 import com.google.common.collect.Lists; 3 import com.google.common.collect.Lists;
4 -import com.google.common.collect.Maps;  
5 import com.google.common.collect.Sets; 4 import com.google.common.collect.Sets;
6 import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; 5 import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
7 import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; 6 import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
@@ -11,9 +10,8 @@ import pl.waw.ipipan.zil.summ.nicolas.common.Utils; @@ -11,9 +10,8 @@ import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
11 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor; 10 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor;
12 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel; 11 import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel;
13 import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor; 12 import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor;
  13 +import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceModel;
14 import weka.classifiers.Classifier; 14 import weka.classifiers.Classifier;
15 -import weka.core.Instance;  
16 -import weka.core.Instances;  
17 15
18 import java.io.IOException; 16 import java.io.IOException;
19 import java.util.*; 17 import java.util.*;
@@ -38,11 +36,11 @@ public class Nicolas { @@ -38,11 +36,11 @@ public class Nicolas {
38 public String summarizeThrift(TText text, int targetTokenCount) throws Exception { 36 public String summarizeThrift(TText text, int targetTokenCount) throws Exception {
39 Set<TMention> goodMentions 37 Set<TMention> goodMentions
40 = MentionModel.detectGoodMentions(mentionClassifier, featureExtractor, text); 38 = MentionModel.detectGoodMentions(mentionClassifier, featureExtractor, text);
41 - return calculateSummary(text, goodMentions, targetTokenCount, sentenceClassifier, sentenceFeatureExtractor); 39 + return calculateSummary(text, goodMentions, targetTokenCount);
42 } 40 }
43 41
44 - private static String calculateSummary(TText thrifted, Set<TMention> goodMentions, int targetSize, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception {  
45 - List<TSentence> selectedSentences = selectSummarySentences(thrifted, goodMentions, targetSize, sentenceClassifier, sentenceFeatureExtractor); 42 + private String calculateSummary(TText thrifted, Set<TMention> goodMentions, int targetSize) throws Exception {
  43 + List<TSentence> selectedSentences = selectSummarySentences(thrifted, goodMentions, targetSize);
46 44
47 StringBuilder sb = new StringBuilder(); 45 StringBuilder sb = new StringBuilder();
48 for (TSentence sent : selectedSentences) { 46 for (TSentence sent : selectedSentences) {
@@ -51,19 +49,10 @@ public class Nicolas { @@ -51,19 +49,10 @@ public class Nicolas {
51 return sb.toString().trim(); 49 return sb.toString().trim();
52 } 50 }
53 51
54 - private static List<TSentence> selectSummarySentences(TText thrifted, Set<TMention> goodMentions, int targetSize, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception { 52 + private List<TSentence> selectSummarySentences(TText thrifted, Set<TMention> goodMentions, int targetSize) throws Exception {
55 List<TSentence> sents = thrifted.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList()); 53 List<TSentence> sents = thrifted.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList());
56 54
57 - Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList());  
58 - Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions);  
59 -  
60 - Map<TSentence, Double> sentence2score = Maps.newHashMap();  
61 - for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) {  
62 - Instance instance = entry.getValue();  
63 - instance.setDataset(instances);  
64 - double score = sentenceClassifier.classifyInstance(instance);  
65 - sentence2score.put(entry.getKey(), score);  
66 - } 55 + Map<TSentence, Double> sentence2score = SentenceModel.scoreSentences(thrifted, goodMentions, sentenceClassifier, sentenceFeatureExtractor);
67 56
68 List<TSentence> sortedSents = Lists.newArrayList(sents); 57 List<TSentence> sortedSents = Lists.newArrayList(sents);
69 Collections.sort(sortedSents, Comparator.comparing(sentence2score::get).reversed()); 58 Collections.sort(sortedSents, Comparator.comparing(sentence2score::get).reversed());
@@ -86,4 +75,5 @@ public class Nicolas { @@ -86,4 +75,5 @@ public class Nicolas {
86 } 75 }
87 return selectedSentences; 76 return selectedSentences;
88 } 77 }
  78 +
89 } 79 }
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java
@@ -18,6 +18,9 @@ public class MentionModel { @@ -18,6 +18,9 @@ public class MentionModel {
18 18
19 private static final Logger LOG = LoggerFactory.getLogger(MentionModel.class); 19 private static final Logger LOG = LoggerFactory.getLogger(MentionModel.class);
20 20
  21 + private MentionModel() {
  22 + }
  23 +
21 public static Set<TMention> detectGoodMentions(Classifier classifier, MentionFeatureExtractor featureExtractor, TText text) throws Exception { 24 public static Set<TMention> detectGoodMentions(Classifier classifier, MentionFeatureExtractor featureExtractor, TText text) throws Exception {
22 Set<TMention> goodMentions = Sets.newHashSet(); 25 Set<TMention> goodMentions = Sets.newHashSet();
23 26
@@ -31,7 +34,7 @@ public class MentionModel { @@ -31,7 +34,7 @@ public class MentionModel {
31 if (good) 34 if (good)
32 goodMentions.add(entry.getKey()); 35 goodMentions.add(entry.getKey());
33 } 36 }
34 - LOG.info("\t" + goodMentions.size() + "\t" + mention2instance.size()); 37 + LOG.info("Classified " + goodMentions.size() + " mentions as good.");
35 return goodMentions; 38 return goodMentions;
36 } 39 }
37 40
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceModel.java 0 → 100644
  1 +package pl.waw.ipipan.zil.summ.nicolas.sentence;
  2 +
  3 +import com.google.common.collect.Maps;
  4 +import org.slf4j.Logger;
  5 +import org.slf4j.LoggerFactory;
  6 +import pl.waw.ipipan.zil.multiservice.thrift.types.TMention;
  7 +import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence;
  8 +import pl.waw.ipipan.zil.multiservice.thrift.types.TText;
  9 +import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils;
  10 +import pl.waw.ipipan.zil.summ.nicolas.common.Utils;
  11 +import weka.classifiers.Classifier;
  12 +import weka.core.Instance;
  13 +import weka.core.Instances;
  14 +
  15 +import java.util.Map;
  16 +import java.util.Set;
  17 +
  18 +public class SentenceModel {
  19 +
  20 + private static final Logger LOG = LoggerFactory.getLogger(SentenceModel.class);
  21 +
  22 + private SentenceModel() {
  23 + }
  24 +
  25 + public static Map<TSentence, Double> scoreSentences(TText thrifted, Set<TMention> goodMentions, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception {
  26 + Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList());
  27 + Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions);
  28 +
  29 + Map<TSentence, Double> sentence2score = Maps.newHashMap();
  30 + for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) {
  31 + Instance instance = entry.getValue();
  32 + instance.setDataset(instances);
  33 + double score = sentenceClassifier.classifyInstance(instance);
  34 + sentence2score.put(entry.getKey(), score);
  35 + }
  36 + LOG.info("Scored " + sentence2score.size() + " sentences.");
  37 +
  38 + return sentence2score;
  39 + }
  40 +}