Commit f04fcb1a5b52c5ef5aa8b1f2e18384e6348f6910
1 parent
4ced813d
small refactor
Showing
3 changed files
with
51 additions
and
18 deletions
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java
1 | package pl.waw.ipipan.zil.summ.nicolas; | 1 | package pl.waw.ipipan.zil.summ.nicolas; |
2 | 2 | ||
3 | import com.google.common.collect.Lists; | 3 | import com.google.common.collect.Lists; |
4 | -import com.google.common.collect.Maps; | ||
5 | import com.google.common.collect.Sets; | 4 | import com.google.common.collect.Sets; |
6 | import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; | 5 | import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; |
7 | import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; | 6 | import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; |
@@ -11,9 +10,8 @@ import pl.waw.ipipan.zil.summ.nicolas.common.Utils; | @@ -11,9 +10,8 @@ import pl.waw.ipipan.zil.summ.nicolas.common.Utils; | ||
11 | import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor; | 10 | import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor; |
12 | import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel; | 11 | import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel; |
13 | import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor; | 12 | import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor; |
13 | +import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceModel; | ||
14 | import weka.classifiers.Classifier; | 14 | import weka.classifiers.Classifier; |
15 | -import weka.core.Instance; | ||
16 | -import weka.core.Instances; | ||
17 | 15 | ||
18 | import java.io.IOException; | 16 | import java.io.IOException; |
19 | import java.util.*; | 17 | import java.util.*; |
@@ -38,11 +36,11 @@ public class Nicolas { | @@ -38,11 +36,11 @@ public class Nicolas { | ||
38 | public String summarizeThrift(TText text, int targetTokenCount) throws Exception { | 36 | public String summarizeThrift(TText text, int targetTokenCount) throws Exception { |
39 | Set<TMention> goodMentions | 37 | Set<TMention> goodMentions |
40 | = MentionModel.detectGoodMentions(mentionClassifier, featureExtractor, text); | 38 | = MentionModel.detectGoodMentions(mentionClassifier, featureExtractor, text); |
41 | - return calculateSummary(text, goodMentions, targetTokenCount, sentenceClassifier, sentenceFeatureExtractor); | 39 | + return calculateSummary(text, goodMentions, targetTokenCount); |
42 | } | 40 | } |
43 | 41 | ||
44 | - private static String calculateSummary(TText thrifted, Set<TMention> goodMentions, int targetSize, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception { | ||
45 | - List<TSentence> selectedSentences = selectSummarySentences(thrifted, goodMentions, targetSize, sentenceClassifier, sentenceFeatureExtractor); | 42 | + private String calculateSummary(TText thrifted, Set<TMention> goodMentions, int targetSize) throws Exception { |
43 | + List<TSentence> selectedSentences = selectSummarySentences(thrifted, goodMentions, targetSize); | ||
46 | 44 | ||
47 | StringBuilder sb = new StringBuilder(); | 45 | StringBuilder sb = new StringBuilder(); |
48 | for (TSentence sent : selectedSentences) { | 46 | for (TSentence sent : selectedSentences) { |
@@ -51,19 +49,10 @@ public class Nicolas { | @@ -51,19 +49,10 @@ public class Nicolas { | ||
51 | return sb.toString().trim(); | 49 | return sb.toString().trim(); |
52 | } | 50 | } |
53 | 51 | ||
54 | - private static List<TSentence> selectSummarySentences(TText thrifted, Set<TMention> goodMentions, int targetSize, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception { | 52 | + private List<TSentence> selectSummarySentences(TText thrifted, Set<TMention> goodMentions, int targetSize) throws Exception { |
55 | List<TSentence> sents = thrifted.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList()); | 53 | List<TSentence> sents = thrifted.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList()); |
56 | 54 | ||
57 | - Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList()); | ||
58 | - Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions); | ||
59 | - | ||
60 | - Map<TSentence, Double> sentence2score = Maps.newHashMap(); | ||
61 | - for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) { | ||
62 | - Instance instance = entry.getValue(); | ||
63 | - instance.setDataset(instances); | ||
64 | - double score = sentenceClassifier.classifyInstance(instance); | ||
65 | - sentence2score.put(entry.getKey(), score); | ||
66 | - } | 55 | + Map<TSentence, Double> sentence2score = SentenceModel.scoreSentences(thrifted, goodMentions, sentenceClassifier, sentenceFeatureExtractor); |
67 | 56 | ||
68 | List<TSentence> sortedSents = Lists.newArrayList(sents); | 57 | List<TSentence> sortedSents = Lists.newArrayList(sents); |
69 | Collections.sort(sortedSents, Comparator.comparing(sentence2score::get).reversed()); | 58 | Collections.sort(sortedSents, Comparator.comparing(sentence2score::get).reversed()); |
@@ -86,4 +75,5 @@ public class Nicolas { | @@ -86,4 +75,5 @@ public class Nicolas { | ||
86 | } | 75 | } |
87 | return selectedSentences; | 76 | return selectedSentences; |
88 | } | 77 | } |
78 | + | ||
89 | } | 79 | } |
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java
@@ -18,6 +18,9 @@ public class MentionModel { | @@ -18,6 +18,9 @@ public class MentionModel { | ||
18 | 18 | ||
19 | private static final Logger LOG = LoggerFactory.getLogger(MentionModel.class); | 19 | private static final Logger LOG = LoggerFactory.getLogger(MentionModel.class); |
20 | 20 | ||
21 | + private MentionModel() { | ||
22 | + } | ||
23 | + | ||
21 | public static Set<TMention> detectGoodMentions(Classifier classifier, MentionFeatureExtractor featureExtractor, TText text) throws Exception { | 24 | public static Set<TMention> detectGoodMentions(Classifier classifier, MentionFeatureExtractor featureExtractor, TText text) throws Exception { |
22 | Set<TMention> goodMentions = Sets.newHashSet(); | 25 | Set<TMention> goodMentions = Sets.newHashSet(); |
23 | 26 | ||
@@ -31,7 +34,7 @@ public class MentionModel { | @@ -31,7 +34,7 @@ public class MentionModel { | ||
31 | if (good) | 34 | if (good) |
32 | goodMentions.add(entry.getKey()); | 35 | goodMentions.add(entry.getKey()); |
33 | } | 36 | } |
34 | - LOG.info("\t" + goodMentions.size() + "\t" + mention2instance.size()); | 37 | + LOG.info("Classified " + goodMentions.size() + " mentions as good."); |
35 | return goodMentions; | 38 | return goodMentions; |
36 | } | 39 | } |
37 | 40 |
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceModel.java
0 → 100644
1 | +package pl.waw.ipipan.zil.summ.nicolas.sentence; | ||
2 | + | ||
3 | +import com.google.common.collect.Maps; | ||
4 | +import org.slf4j.Logger; | ||
5 | +import org.slf4j.LoggerFactory; | ||
6 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; | ||
7 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; | ||
8 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TText; | ||
9 | +import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils; | ||
10 | +import pl.waw.ipipan.zil.summ.nicolas.common.Utils; | ||
11 | +import weka.classifiers.Classifier; | ||
12 | +import weka.core.Instance; | ||
13 | +import weka.core.Instances; | ||
14 | + | ||
15 | +import java.util.Map; | ||
16 | +import java.util.Set; | ||
17 | + | ||
18 | +public class SentenceModel { | ||
19 | + | ||
20 | + private static final Logger LOG = LoggerFactory.getLogger(SentenceModel.class); | ||
21 | + | ||
22 | + private SentenceModel() { | ||
23 | + } | ||
24 | + | ||
25 | + public static Map<TSentence, Double> scoreSentences(TText thrifted, Set<TMention> goodMentions, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception { | ||
26 | + Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList()); | ||
27 | + Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions); | ||
28 | + | ||
29 | + Map<TSentence, Double> sentence2score = Maps.newHashMap(); | ||
30 | + for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) { | ||
31 | + Instance instance = entry.getValue(); | ||
32 | + instance.setDataset(instances); | ||
33 | + double score = sentenceClassifier.classifyInstance(instance); | ||
34 | + sentence2score.put(entry.getKey(), score); | ||
35 | + } | ||
36 | + LOG.info("Scored " + sentence2score.size() + " sentences."); | ||
37 | + | ||
38 | + return sentence2score; | ||
39 | + } | ||
40 | +} |