diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/eval/SummarizeTestCorpus.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/eval/SummarizeTestCorpus.java index 557ab02..54fffa2 100644 --- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/eval/SummarizeTestCorpus.java +++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/eval/SummarizeTestCorpus.java @@ -57,7 +57,11 @@ public class SummarizeTestCorpus { private static Map<String, String> summarizeTexts(Map<String, TText> id2preprocessedText, Nicolas nicolas) throws NicolasException { Map<String, String> id2summary = Maps.newHashMap(); + int textNumber = 0; for (Map.Entry<String, TText> entry : id2preprocessedText.entrySet()) { + if (textNumber++ % 10 == 0) { + LOG.info("{} texts summarized.", textNumber); + } TText text = entry.getValue(); int targetSize = calculateTargetSize(text); String summary = nicolas.summarizeThrift(text, targetSize); diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/model/Settings.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/model/Settings.java index 923fb2b..72ce339 100644 --- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/model/Settings.java +++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/model/Settings.java @@ -4,6 +4,7 @@ import weka.classifiers.Classifier; import weka.classifiers.meta.AttributeSelectedClassifier; import weka.classifiers.trees.LMT; import weka.classifiers.trees.RandomForest; +import weka.classifiers.trees.RandomTree; public class Settings { @@ -16,6 +17,10 @@ public class Settings { public static Classifier getMentionClassifier() { RandomForest classifier = new RandomForest(); + RandomTree tree = new RandomTree(); + tree.setMaxDepth(10); + tree.setMinNum(2); + classifier.setClassifier(tree); classifier.setNumIterations(NUM_ITERATIONS); classifier.setSeed(SEED); classifier.setNumExecutionSlots(NUM_EXECUTION_SLOTS); @@ -24,6 +29,10 @@ public class Settings { public static Classifier getSentenceClassifier() { RandomForest classifier = new RandomForest(); + RandomTree tree = new RandomTree(); + tree.setMaxDepth(10); + tree.setMinNum(2); + classifier.setClassifier(tree); classifier.setNumIterations(NUM_ITERATIONS); classifier.setSeed(SEED); classifier.setNumExecutionSlots(NUM_EXECUTION_SLOTS); diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/TrainAllModels.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/TrainAllModels.java index f8b9d7c..e0d4332 100644 --- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/TrainAllModels.java +++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/TrainAllModels.java @@ -44,6 +44,7 @@ public class TrainAllModels { LOG.info("Building classifier..."); classifier.buildClassifier(instances); LOG.info("...done. Build classifier: {}", classifier); + instances.clear(); String target = TARGET_MODEL_DIR + targetPath; LOG.info("Saving classifier at: {}", target);