From f04fcb1a5b52c5ef5aa8b1f2e18384e6348f6910 Mon Sep 17 00:00:00 2001 From: Mateusz Kopeć <m.kopec@ipipan.waw.pl> Date: Wed, 30 Nov 2016 22:07:23 +0100 Subject: [PATCH] small refactor --- nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java | 24 +++++++----------------- nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java | 5 ++++- nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceModel.java | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 18 deletions(-) create mode 100644 nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceModel.java diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java index e4f86d4..8003c5a 100644 --- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java +++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/Nicolas.java @@ -1,7 +1,6 @@ package pl.waw.ipipan.zil.summ.nicolas; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import com.google.common.collect.Sets; import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; @@ -11,9 +10,8 @@ import pl.waw.ipipan.zil.summ.nicolas.common.Utils; import pl.waw.ipipan.zil.summ.nicolas.mention.MentionFeatureExtractor; import pl.waw.ipipan.zil.summ.nicolas.mention.MentionModel; import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceFeatureExtractor; +import pl.waw.ipipan.zil.summ.nicolas.sentence.SentenceModel; import weka.classifiers.Classifier; -import weka.core.Instance; -import weka.core.Instances; import java.io.IOException; import java.util.*; @@ -38,11 +36,11 @@ public class Nicolas { public String summarizeThrift(TText text, int targetTokenCount) throws Exception { Set<TMention> goodMentions = MentionModel.detectGoodMentions(mentionClassifier, featureExtractor, text); - return calculateSummary(text, goodMentions, targetTokenCount, sentenceClassifier, sentenceFeatureExtractor); + return calculateSummary(text, goodMentions, targetTokenCount); } - private static String calculateSummary(TText thrifted, Set<TMention> goodMentions, int targetSize, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception { - List<TSentence> selectedSentences = selectSummarySentences(thrifted, goodMentions, targetSize, sentenceClassifier, sentenceFeatureExtractor); + private String calculateSummary(TText thrifted, Set<TMention> goodMentions, int targetSize) throws Exception { + List<TSentence> selectedSentences = selectSummarySentences(thrifted, goodMentions, targetSize); StringBuilder sb = new StringBuilder(); for (TSentence sent : selectedSentences) { @@ -51,19 +49,10 @@ public class Nicolas { return sb.toString().trim(); } - private static List<TSentence> selectSummarySentences(TText thrifted, Set<TMention> goodMentions, int targetSize, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception { + private List<TSentence> selectSummarySentences(TText thrifted, Set<TMention> goodMentions, int targetSize) throws Exception { List<TSentence> sents = thrifted.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(toList()); - Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList()); - Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions); - - Map<TSentence, Double> sentence2score = Maps.newHashMap(); - for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) { - Instance instance = entry.getValue(); - instance.setDataset(instances); - double score = sentenceClassifier.classifyInstance(instance); - sentence2score.put(entry.getKey(), score); - } + Map<TSentence, Double> sentence2score = SentenceModel.scoreSentences(thrifted, goodMentions, sentenceClassifier, sentenceFeatureExtractor); List<TSentence> sortedSents = Lists.newArrayList(sents); Collections.sort(sortedSents, Comparator.comparing(sentence2score::get).reversed()); @@ -86,4 +75,5 @@ public class Nicolas { } return selectedSentences; } + } diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java index 1ba0ef0..3f65c48 100644 --- a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java +++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionModel.java @@ -18,6 +18,9 @@ public class MentionModel { private static final Logger LOG = LoggerFactory.getLogger(MentionModel.class); + private MentionModel() { + } + public static Set<TMention> detectGoodMentions(Classifier classifier, MentionFeatureExtractor featureExtractor, TText text) throws Exception { Set<TMention> goodMentions = Sets.newHashSet(); @@ -31,7 +34,7 @@ public class MentionModel { if (good) goodMentions.add(entry.getKey()); } - LOG.info("\t" + goodMentions.size() + "\t" + mention2instance.size()); + LOG.info("Classified " + goodMentions.size() + " mentions as good."); return goodMentions; } diff --git a/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceModel.java b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceModel.java new file mode 100644 index 0000000..c9a43d0 --- /dev/null +++ b/nicolas-core/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceModel.java @@ -0,0 +1,40 @@ +package pl.waw.ipipan.zil.summ.nicolas.sentence; + +import com.google.common.collect.Maps; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; +import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; +import pl.waw.ipipan.zil.multiservice.thrift.types.TText; +import pl.waw.ipipan.zil.summ.nicolas.ThriftUtils; +import pl.waw.ipipan.zil.summ.nicolas.common.Utils; +import weka.classifiers.Classifier; +import weka.core.Instance; +import weka.core.Instances; + +import java.util.Map; +import java.util.Set; + +public class SentenceModel { + + private static final Logger LOG = LoggerFactory.getLogger(SentenceModel.class); + + private SentenceModel() { + } + + public static Map<TSentence, Double> scoreSentences(TText thrifted, Set<TMention> goodMentions, Classifier sentenceClassifier, SentenceFeatureExtractor sentenceFeatureExtractor) throws Exception { + Instances instances = Utils.createNewInstances(sentenceFeatureExtractor.getAttributesList()); + Map<TSentence, Instance> sentence2instance = ThriftUtils.extractInstancesFromSentences(thrifted, sentenceFeatureExtractor, goodMentions); + + Map<TSentence, Double> sentence2score = Maps.newHashMap(); + for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) { + Instance instance = entry.getValue(); + instance.setDataset(instances); + double score = sentenceClassifier.classifyInstance(instance); + sentence2score.put(entry.getKey(), score); + } + LOG.info("Scored " + sentence2score.size() + " sentences."); + + return sentence2score; + } +} -- libgit2 0.22.2