Commit f04fcb1a5b52c5ef5aa8b1f2e18384e6348f6910
1 parent
4ced813d
small refactor
Showing
3 changed files
with
51 additions
and
18 deletions
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java
1 | 1 | package pl.waw.ipipan.zil.summ.nicolas; |
2 | 2 | |
3 | 3 | import com.google.common.collect.Lists; |
4 | -import com.google.common.collect.Maps; | |
5 | 4 | import com.google.common.collect.Sets; |
6 | 5 | import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; |
7 | 6 | import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; |
... | ... | @@ -11,9 +10,8 @@ import pl.waw.ipipan.zil.summ.nicolas.common.Utils; |
11 | 10 | import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor; |
12 | 11 | import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel; |
13 | 12 | import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor; |
13 | +import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceModel; | |
14 | 14 | import weka.classifiers.Classifier; |
15 | -import weka.core.Instance; | |
16 | -import weka.core.Instances; | |
17 | 15 | |
18 | 16 | import java.io.IOException; |
19 | 17 | import java.util.*; |
... | ... | @@ -38,11 +36,11 @@ public class Nicolas { |
38 | 36 | public String summarizeThrift(TText text, int targetTokenCount) throws Exception { |
39 | 37 | Set<TMention> goodMentions |
40 | 38 | = MentionModel.detectGoodMentions(mentionClassifier, featureExtractor, text); |
41 | - return calculateSummary(text, goodMentions, targetTokenCount, sentenceClassifier, sentenceFeatureExtractor); | |
39 | + return calculateSummary(text, goodMentions, targetTokenCount); | |
42 | 40 | } |
43 | 41 | |
44 | - private static String calculateSummary(TText thrifted, Set<TMention> goodMentions, int targetSize, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception { | |
45 | - List<TSentence> selectedSentences = selectSummarySentences(thrifted, goodMentions, targetSize, sentenceClassifier, sentenceFeatureExtractor); | |
42 | + private String calculateSummary(TText thrifted, Set<TMention> goodMentions, int targetSize) throws Exception { | |
43 | + List<TSentence> selectedSentences = selectSummarySentences(thrifted, goodMentions, targetSize); | |
46 | 44 | |
47 | 45 | StringBuilder sb = new StringBuilder(); |
48 | 46 | for (TSentence sent : selectedSentences) { |
... | ... | @@ -51,19 +49,10 @@ public class Nicolas { |
51 | 49 | return sb.toString().trim(); |
52 | 50 | } |
53 | 51 | |
54 | - private static List<TSentence> selectSummarySentences(TText thrifted, Set<TMention> goodMentions, int targetSize, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception { | |
52 | + private List<TSentence> selectSummarySentences(TText thrifted, Set<TMention> goodMentions, int targetSize) throws Exception { | |
55 | 53 | List<TSentence> sents = thrifted.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList()); |
56 | 54 | |
57 | - Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList()); | |
58 | - Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions); | |
59 | - | |
60 | - Map<TSentence, Double> sentence2score = Maps.newHashMap(); | |
61 | - for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) { | |
62 | - Instance instance = entry.getValue(); | |
63 | - instance.setDataset(instances); | |
64 | - double score = sentenceClassifier.classifyInstance(instance); | |
65 | - sentence2score.put(entry.getKey(), score); | |
66 | - } | |
55 | + Map<TSentence, Double> sentence2score = SentenceModel.scoreSentences(thrifted, goodMentions, sentenceClassifier, sentenceFeatureExtractor); | |
67 | 56 | |
68 | 57 | List<TSentence> sortedSents = Lists.newArrayList(sents); |
69 | 58 | Collections.sort(sortedSents, Comparator.comparing(sentence2score::get).reversed()); |
... | ... | @@ -86,4 +75,5 @@ public class Nicolas { |
86 | 75 | } |
87 | 76 | return selectedSentences; |
88 | 77 | } |
78 | + | |
89 | 79 | } |
... | ... |
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java
... | ... | @@ -18,6 +18,9 @@ public class MentionModel { |
18 | 18 | |
19 | 19 | private static final Logger LOG = LoggerFactory.getLogger(MentionModel.class); |
20 | 20 | |
21 | + private MentionModel() { | |
22 | + } | |
23 | + | |
21 | 24 | public static Set<TMention> detectGoodMentions(Classifier classifier, MentionFeatureExtractor featureExtractor, TText text) throws Exception { |
22 | 25 | Set<TMention> goodMentions = Sets.newHashSet(); |
23 | 26 | |
... | ... | @@ -31,7 +34,7 @@ public class MentionModel { |
31 | 34 | if (good) |
32 | 35 | goodMentions.add(entry.getKey()); |
33 | 36 | } |
34 | - LOG.info("\t" + goodMentions.size() + "\t" + mention2instance.size()); | |
37 | + LOG.info("Classified " + goodMentions.size() + " mentions as good."); | |
35 | 38 | return goodMentions; |
36 | 39 | } |
37 | 40 | |
... | ... |
nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceModel.java
0 → 100644
1 | +package pl.waw.ipipan.zil.summ.nicolas.sentence; | |
2 | + | |
3 | +import com.google.common.collect.Maps; | |
4 | +import org.slf4j.Logger; | |
5 | +import org.slf4j.LoggerFactory; | |
6 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; | |
7 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; | |
8 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TText; | |
9 | +import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils; | |
10 | +import pl.waw.ipipan.zil.summ.nicolas.common.Utils; | |
11 | +import weka.classifiers.Classifier; | |
12 | +import weka.core.Instance; | |
13 | +import weka.core.Instances; | |
14 | + | |
15 | +import java.util.Map; | |
16 | +import java.util.Set; | |
17 | + | |
18 | +public class SentenceModel { | |
19 | + | |
20 | + private static final Logger LOG = LoggerFactory.getLogger(SentenceModel.class); | |
21 | + | |
22 | + private SentenceModel() { | |
23 | + } | |
24 | + | |
25 | + public static Map<TSentence, Double> scoreSentences(TText thrifted, Set<TMention> goodMentions, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception { | |
26 | + Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList()); | |
27 | + Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions); | |
28 | + | |
29 | + Map<TSentence, Double> sentence2score = Maps.newHashMap(); | |
30 | + for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) { | |
31 | + Instance instance = entry.getValue(); | |
32 | + instance.setDataset(instances); | |
33 | + double score = sentenceClassifier.classifyInstance(instance); | |
34 | + sentence2score.put(entry.getKey(), score); | |
35 | + } | |
36 | + LOG.info("Scored " + sentence2score.size() + " sentences."); | |
37 | + | |
38 | + return sentence2score; | |
39 | + } | |
40 | +} | |
... | ... |