From c9b1021d986bf2636bfd978885170dedd7d45546 Mon Sep 17 00:00:00 2001
From: Mateusz Kopeć <m.kopec@ipipan.waw.pl>
Date: Wed, 12 Apr 2017 23:20:45 +0200
Subject: [PATCH] fix sonar issues

---
 nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Cli.java                                      |   5 ++++-
 nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureExtractor.java                    |   7 +++++--
 nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java                       | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------
 nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionFeatureExtractor.java              |  31 +++++++++++++++++++------------
 nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceFeatureExtractor.java            |  12 +++++++-----
 nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/utils/ResourceUtils.java                          |   3 ++-
 nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroFeatureExtractor.java                    |  16 ++++++----------
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/CorpusHelper.java                               |   2 +-
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/PathConstants.java                              |  12 ++++++++----
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/eval/search/Crossvalidate.java                  |  13 +++++--------
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/model/MentionScorer.java                  |  30 +++++++++++++++++-------------
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/ExtractGoldSummaries.java        |  36 ++++++++++++++++++++----------------
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/ExtractMostFrequentMentions.java |   3 +--
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/PrepareTrainingData.java         |  16 +++++++++-------
 nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/TrainAllModels.java              |   8 ++------
 15 files changed, 177 insertions(+), 142 deletions(-)

diff --git a/nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Cli.java b/nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Cli.java
index d3257ea..2eb5527 100644
--- a/nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Cli.java
+++ b/nicolas-cli/src/main/java/pl/waw/ipipan/zil/summ/nicolas/cli/Cli.java
@@ -92,7 +92,10 @@ class Cli {
         public void validate(String name, String value) {
             File file = new File(value);
             try {
-                file.createNewFile();
+                boolean newFile = file.createNewFile();
+                if (!newFile) {
+                    LOG.warn("Output file exists and will be overridden.");
+                }
             } catch (IOException ex) {
                 throw new ParameterException("Parameter " + name
                         + " should be a valid file path (found " + value + ")", ex);
diff --git a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureExtractor.java b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureExtractor.java
index 1243c73..4a49879 100644
--- a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureExtractor.java
+++ b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureExtractor.java
@@ -67,11 +67,14 @@ public class FeatureExtractor {
             }
         }
         for (Map<Attribute, Double> entityAttributes : entity2attributes.values()) {
-            for (Attribute attribute : attribute2max.keySet()) {
+            for (Map.Entry<Attribute, Double> entry : attribute2max.entrySet()) {
+                Attribute attribute = entry.getKey();
+                Double max = entry.getValue();
+
                 Attribute normalizedAttribute = getAttributeByName(name2attribute.inverse().get(attribute) + "_normalized");
                 entityAttributes.put(normalizedAttribute,
                         (entityAttributes.get(attribute) - attribute2min.get(attribute))
-                                / (attribute2max.get(attribute) - attribute2min.get(attribute)));
+                                / (max - attribute2min.get(attribute)));
             }
         }
     }
diff --git a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java
index 0bd02ff..2750afd 100644
--- a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java
+++ b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/features/FeatureHelper.java
@@ -54,49 +54,56 @@ public class FeatureHelper {
             coref2mentions.put(coref, ments);
         }
 
-        int parIdx = 0;
-        int sentIdx = 0;
-        int mentionIdx = 0;
+        Counters counters = new Counters();
         for (TParagraph par : preprocessedText.getParagraphs()) {
-            Map<TMention, String> m2o = loadMention2Orth(par.getSentences());
-            mention2Orth.putAll(m2o);
-            Map<TMention, String> m2b = loadMention2Base(par.getSentences());
-            mention2Base.putAll(m2b);
-
-            int sentIdxInPar = 0;
-            int mentionIdxInPar = 0;
-            for (TSentence sent : par.getSentences()) {
-
-                Map<String, TToken> tokenId2token = sent.getTokens().stream().collect(toMap(TToken::getId, Function.identity()));
-
-                Map<String, Set<TNamedEntity>> tokenId2namedEntities = Maps.newHashMap();
-                for (TNamedEntity namedEntity : sent.getNames()) {
-                    for (String childId : namedEntity.getChildIds()) {
-                        tokenId2namedEntities.putIfAbsent(childId, Sets.newHashSet());
-                        tokenId2namedEntities.get(childId).add(namedEntity);
-                    }
-                }
+            processParagraph(counters, par);
+        }
+    }
 
-                int mentionIdxInSent = 0;
-                for (TMention mention : sent.getMentions()) {
-                    mention2sent.put(mention, sent);
-                    mention2par.put(mention, par);
-                    mention2index.put(mention, mentionIdx++);
-                    mention2firstTokenIndex.put(mention, sent.getTokens().indexOf(tokenId2token.get(mention.getChildIds().iterator().next())));
-                    mention2indexInSent.put(mention, mentionIdxInSent++);
-                    mention2indexInPar.put(mention, mentionIdxInPar++);
-
-                    String firstHeadTokenId = mention.getHeadIds().iterator().next();
-                    mention2head.put(mention, tokenId2token.get(firstHeadTokenId));
-                    if (tokenId2namedEntities.containsKey(firstHeadTokenId))
-                        mentionsInNamedEntities.add(mention);
-                }
-                sent2Index.put(sent, sentIdx++);
-                sent2IndexInPar.put(sent, sentIdxInPar++);
+    private void processParagraph(Counters counters, TParagraph par) {
+        Map<TMention, String> m2o = loadMention2Orth(par.getSentences());
+        mention2Orth.putAll(m2o);
+        Map<TMention, String> m2b = loadMention2Base(par.getSentences());
+        mention2Base.putAll(m2b);
+
+        int sentIdxInPar = 0;
+        int mentionIdxInPar = 0;
+        for (TSentence sent : par.getSentences()) {
+
+            Map<String, TToken> tokenId2token = sent.getTokens().stream().collect(toMap(TToken::getId, Function.identity()));
+
+            Map<String, Set<TNamedEntity>> tokenId2namedEntities = getTokenId2NamedEntities(sent);
+
+            int mentionIdxInSent = 0;
+            for (TMention mention : sent.getMentions()) {
+                mention2sent.put(mention, sent);
+                mention2par.put(mention, par);
+                mention2index.put(mention, counters.mentionIdx++);
+                mention2firstTokenIndex.put(mention, sent.getTokens().indexOf(tokenId2token.get(mention.getChildIds().iterator().next())));
+                mention2indexInSent.put(mention, mentionIdxInSent++);
+                mention2indexInPar.put(mention, mentionIdxInPar++);
+
+                String firstHeadTokenId = mention.getHeadIds().iterator().next();
+                mention2head.put(mention, tokenId2token.get(firstHeadTokenId));
+                if (tokenId2namedEntities.containsKey(firstHeadTokenId))
+                    mentionsInNamedEntities.add(mention);
             }
+            sent2Index.put(sent, counters.sentIdx++);
+            sent2IndexInPar.put(sent, sentIdxInPar++);
+        }
 
-            par2Index.put(par, parIdx++);
+        par2Index.put(par, counters.parIdx++);
+    }
+
+    private Map<String, Set<TNamedEntity>> getTokenId2NamedEntities(TSentence sent) {
+        Map<String, Set<TNamedEntity>> tokenId2namedEntities = Maps.newHashMap();
+        for (TNamedEntity namedEntity : sent.getNames()) {
+            for (String childId : namedEntity.getChildIds()) {
+                tokenId2namedEntities.putIfAbsent(childId, Sets.newHashSet());
+                tokenId2namedEntities.get(childId).add(namedEntity);
+            }
         }
+        return tokenId2namedEntities;
     }
 
     public List<TMention> getMentions() {
@@ -220,31 +227,35 @@ public class FeatureHelper {
         return mention2sent.get(mention).getTokens().get(idx - 1);
     }
 
-    private static Map<TMention, String> loadMention2Orth(List<TSentence> sents) {
+    private static Map<TMention, String> loadMention2Orth(List<TSentence> sentences) {
         Map<TMention, String> mention2orth = Maps.newHashMap();
-        for (TSentence s : sents) {
-            Map<String, TToken> tokId2tok = s.getTokens().stream().collect(Collectors.toMap(TToken::getId, Function.identity()));
+        for (TSentence sentence : sentences) {
+            Map<String, TToken> tokId2tok = sentence.getTokens().stream().collect(Collectors.toMap(TToken::getId, Function.identity()));
 
-            for (TMention m : s.getMentions()) {
-                StringBuilder mentionOrth = new StringBuilder();
-                for (String tokId : m.getChildIds()) {
-                    TToken token = tokId2tok.get(tokId);
-                    if (!token.isNoPrecedingSpace())
-                        mentionOrth.append(" ");
-                    mentionOrth.append(token.getOrth());
-                }
-                mention2orth.put(m, mentionOrth.toString().trim());
+            for (TMention mention : sentence.getMentions()) {
+                mention2orth.put(mention, getMentionOrth(tokId2tok, mention));
             }
         }
         return mention2orth;
     }
 
-    private static Map<TMention, String> loadMention2Base(List<TSentence> sents) {
+    private static String getMentionOrth(Map<String, TToken> tokId2tok, TMention m) {
+        StringBuilder mentionOrth = new StringBuilder();
+        for (String tokId : m.getChildIds()) {
+            TToken token = tokId2tok.get(tokId);
+            if (!token.isNoPrecedingSpace())
+                mentionOrth.append(" ");
+            mentionOrth.append(token.getOrth());
+        }
+        return mentionOrth.toString().trim();
+    }
+
+    private static Map<TMention, String> loadMention2Base(List<TSentence> sentences) {
         Map<TMention, String> mention2base = Maps.newHashMap();
-        for (TSentence s : sents) {
-            Map<String, String> tokId2base = s.getTokens().stream().collect(Collectors.toMap(TToken::getId, tok -> tok.getChosenInterpretation().getBase()));
+        for (TSentence sentence : sentences) {
+            Map<String, String> tokId2base = sentence.getTokens().stream().collect(Collectors.toMap(TToken::getId, tok -> tok.getChosenInterpretation().getBase()));
 
-            for (TMention m : s.getMentions()) {
+            for (TMention m : sentence.getMentions()) {
                 StringBuilder mentionBase = new StringBuilder();
                 for (String tokId : m.getChildIds()) {
                     mentionBase.append(" ");
@@ -255,4 +266,10 @@ public class FeatureHelper {
         }
         return mention2base;
     }
+
+    private class Counters {
+        int parIdx = 0;
+        int sentIdx = 0;
+        int mentionIdx = 0;
+    }
 }
diff --git a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionFeatureExtractor.java b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionFeatureExtractor.java
index 9b2b8b5..7cb379a 100644
--- a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionFeatureExtractor.java
+++ b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/mention/MentionFeatureExtractor.java
@@ -18,6 +18,10 @@ import java.util.stream.Collectors;
 
 public class MentionFeatureExtractor extends FeatureExtractor {
 
+    private static final String SCORE_ATTRIBUTE_NAME = "score";
+    private static final String OTHER_VALUE = "other";
+    private static final String NULL_VALUE = "null";
+
     private final List<String> frequentBases;
 
     public MentionFeatureExtractor() throws IOException {
@@ -48,10 +52,10 @@ public class MentionFeatureExtractor extends FeatureExtractor {
             addBinaryAttribute(prefix + "_is_named");
             addBinaryAttribute(prefix + "_is_pronoun");
             addNominalAttribute(prefix + "_ctag", Constants.POS_TAGS);
-            addNominalAttribute(prefix + "_person", Lists.newArrayList("other", "null", "pri", "sec", "ter"));
-            addNominalAttribute(prefix + "_case", Lists.newArrayList("other", "null", "nom", "acc", "dat", "gen", "loc", "inst", "voc"));
-            addNominalAttribute(prefix + "_number", Lists.newArrayList("other", "null", "sg", "pl"));
-            addNominalAttribute(prefix + "_gender", Lists.newArrayList("other", "null", "f", "m1", "m2", "m3", "n"));
+            addNominalAttribute(prefix + "_person", Lists.newArrayList(OTHER_VALUE, NULL_VALUE, "pri", "sec", "ter"));
+            addNominalAttribute(prefix + "_case", Lists.newArrayList(OTHER_VALUE, NULL_VALUE, "nom", "acc", "dat", "gen", "loc", "inst", "voc"));
+            addNominalAttribute(prefix + "_number", Lists.newArrayList(OTHER_VALUE, NULL_VALUE, "sg", "pl"));
+            addNominalAttribute(prefix + "_gender", Lists.newArrayList(OTHER_VALUE, NULL_VALUE, "f", "m1", "m2", "m3", "n"));
 
             // relation to other
             addBinaryAttribute(prefix + "_is_nested");
@@ -76,8 +80,8 @@ public class MentionFeatureExtractor extends FeatureExtractor {
             }
         }
 
-        addNominalAttribute("score", Lists.newArrayList("bad", "good"));
-        fillSortedAttributes("score");
+        addNominalAttribute(SCORE_ATTRIBUTE_NAME, Lists.newArrayList("bad", "good"));
+        fillSortedAttributes(SCORE_ATTRIBUTE_NAME);
     }
 
     private String encodeBase(String base) {
@@ -143,8 +147,11 @@ public class MentionFeatureExtractor extends FeatureExtractor {
         attribute2value.put(getAttributeByName(attributePrefix + "_is_nesting"), toBinary(helper.isNesting(mention)));
 
         String orth = helper.getMentionOrth(mention);
-        attribute2value.put(getAttributeByName(attributePrefix + "_capitalized"), toBinary(orth.length() != 0 && orth.substring(0, 1).toUpperCase().equals(orth.substring(0, 1))));
-        attribute2value.put(getAttributeByName(attributePrefix + "_all_caps"), toBinary(orth.toUpperCase().equals(orth)));
+        String firstLetter = orth.substring(0, 1);
+        String firstLetterUpperCased = firstLetter.toUpperCase();
+        attribute2value.put(getAttributeByName(attributePrefix + "_capitalized"), toBinary(orth.length() != 0 && firstLetterUpperCased.equals(firstLetter)));
+        String upperCased = orth.toUpperCase();
+        attribute2value.put(getAttributeByName(attributePrefix + "_all_caps"), toBinary(upperCased.equals(orth)));
         attribute2value.put(getAttributeByName(attributePrefix + "_char_count"), (double) orth.length());
 
         // par characteristics
@@ -159,8 +166,8 @@ public class MentionFeatureExtractor extends FeatureExtractor {
         attribute2value.put(getAttributeByName(attributePrefix + "_sent_mention_count"), (double) mentionSentence.getMentions().size());
         attribute2value.put(getAttributeByName(attributePrefix + "_sent_idx"), (double) helper.getSentIndex(mentionSentence));
         attribute2value.put(getAttributeByName(attributePrefix + "_sent_idx_in_par"), (double) helper.getSentIndexInPar(mentionSentence));
-        attribute2value.put(getAttributeByName(attributePrefix + "_sent_ends_with_dot"), toBinary(helper.getSentenceLastTokenOrth(mentionSentence).equals(".")));
-        attribute2value.put(getAttributeByName(attributePrefix + "_sent_ends_with_questionmark"), toBinary(helper.getSentenceLastTokenOrth(mentionSentence).equals("?")));
+        attribute2value.put(getAttributeByName(attributePrefix + "_sent_ends_with_dot"), toBinary(".".equals(helper.getSentenceLastTokenOrth(mentionSentence))));
+        attribute2value.put(getAttributeByName(attributePrefix + "_sent_ends_with_questionmark"), toBinary("?".equals(helper.getSentenceLastTokenOrth(mentionSentence))));
 
         // frequent bases
         String mentionBase = helper.getMentionBase(mention);
@@ -174,14 +181,14 @@ public class MentionFeatureExtractor extends FeatureExtractor {
         int index = att.indexOfValue(value);
         if (index == -1)
             LOG.warn("{} not found for attribute {}", value, attributeName);
-        attribute2value.put(att, (double) (index == -1 ? att.indexOfValue("other") : index));
+        attribute2value.put(att, (double) (index == -1 ? att.indexOfValue(OTHER_VALUE) : index));
     }
 
 
     private void addScoreFeature(Map<TMention, Map<Attribute, Double>> result, List<TMention> mentions) {
         for (TMention m : mentions) {
             Map<Attribute, Double> map = Maps.newHashMap();
-            map.put(getAttributeByName("score"), weka.core.Utils.missingValue());
+            map.put(getAttributeByName(SCORE_ATTRIBUTE_NAME), weka.core.Utils.missingValue());
             result.put(m, map);
         }
     }
diff --git a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceFeatureExtractor.java b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceFeatureExtractor.java
index 1f429a5..0ba0f7b 100644
--- a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceFeatureExtractor.java
+++ b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/sentence/SentenceFeatureExtractor.java
@@ -13,6 +13,8 @@ import java.util.stream.Collectors;
 
 public class SentenceFeatureExtractor extends FeatureExtractor {
 
+    private static final String SCORE_ATTRIBUTE_NAME = "score";
+
     public SentenceFeatureExtractor() {
 
         addNumericAttributeNormalized("sent_mention_cluster_count");
@@ -39,8 +41,8 @@ public class SentenceFeatureExtractor extends FeatureExtractor {
         addNumericAttribute("text_mention_count");
         addNumericAttribute("text_cluster_count");
 
-        addNumericAttribute("score");
-        fillSortedAttributes("score");
+        addNumericAttribute(SCORE_ATTRIBUTE_NAME);
+        fillSortedAttributes(SCORE_ATTRIBUTE_NAME);
     }
 
     public Map<TSentence, Map<Attribute, Double>> calculateFeatures(TText preprocessedText, Set<TMention> goodMentions) {
@@ -70,8 +72,8 @@ public class SentenceFeatureExtractor extends FeatureExtractor {
                 feature2value.put(getAttributeByName("sent_token_length"), (double) sentence.getTokens().size());
                 feature2value.put(getAttributeByName("sent_idx_in_par"), (double) sentenceIdxInPar);
                 feature2value.put(getAttributeByName("sent_idx"), (double) sentenceIdx);
-                feature2value.put(getAttributeByName("sent_ends_with_dot"), toBinary(helper.getSentenceLastTokenOrth(sentence).equals(".")));
-                feature2value.put(getAttributeByName("sent_ends_with_questionmark"), toBinary(helper.getSentenceLastTokenOrth(sentence).equals("?")));
+                feature2value.put(getAttributeByName("sent_ends_with_dot"), toBinary(".".equals(helper.getSentenceLastTokenOrth(sentence))));
+                feature2value.put(getAttributeByName("sent_ends_with_questionmark"), toBinary("?".equals(helper.getSentenceLastTokenOrth(sentence))));
 
                 feature2value.put(getAttributeByName("par_idx"), (double) parIdx);
                 feature2value.put(getAttributeByName("par_token_count"), paragraph.getSentences().stream().map(s -> s.getTokens().size()).mapToDouble(s -> s).sum());
@@ -84,7 +86,7 @@ public class SentenceFeatureExtractor extends FeatureExtractor {
                 feature2value.put(getAttributeByName("text_mention_count"), (double) helper.getMentions().size());
                 feature2value.put(getAttributeByName("text_cluster_count"), (double) helper.getClusters().size());
 
-                feature2value.put(getAttributeByName("score"), weka.core.Utils.missingValue());
+                feature2value.put(getAttributeByName(SCORE_ATTRIBUTE_NAME), weka.core.Utils.missingValue());
 
                 feature2value.remove(null);
 
diff --git a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/utils/ResourceUtils.java b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/utils/ResourceUtils.java
index 5476e17..105494b 100644
--- a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/utils/ResourceUtils.java
+++ b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/utils/ResourceUtils.java
@@ -42,12 +42,13 @@ public class ResourceUtils {
     }
 
     private static List<String> loadUniqueLowercaseSortedNonemptyLinesFromResource(String resourcePath) throws IOException {
+        Predicate<String> stringIsNonempty = (String s) -> !s.isEmpty();
         try (InputStream stream = ResourceUtils.class.getResourceAsStream(resourcePath)) {
             return IOUtils.readLines(stream, Constants.ENCODING)
                     .stream()
                     .map(String::trim)
                     .map(String::toLowerCase)
-                    .filter(((Predicate<String>) String::isEmpty).negate())
+                    .filter(stringIsNonempty)
                     .sorted()
                     .distinct()
                     .collect(Collectors.toList());
diff --git a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroFeatureExtractor.java b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroFeatureExtractor.java
index d63fe0b..87a52e1 100644
--- a/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroFeatureExtractor.java
+++ b/nicolas-lib/src/main/java/pl/waw/ipipan/zil/summ/nicolas/zero/ZeroFeatureExtractor.java
@@ -37,15 +37,8 @@ public class ZeroFeatureExtractor extends FeatureExtractor {
     private static final String SENTENCE_MENTION_COUNT = "_sentence_mention_count";
     private static final String SENTENCE_TOKEN_LENGTH = "_sentence_token_length";
     private static final String IS_PAN_OR_PANI = "_is_pan_or_pani";
-
-    //    private static final Set<String> PREV_TOKEN_LEMMAS = Sets.newHashSet(
-//            "zespół", "tylko", "gdy", ".", ":", "też", "kandydat", "do", "dziś", "bo", "by", "z", "a", "jednak", "jak", "który", "ale", "czy", "i", "się", "rok", "-", "\"", "to", "być", "że", ",");
     private static final Set<String> PREV_TOKEN_LEMMAS = Sets.newHashSet("to", "z", "do", "o", "czyli", "nie", "\"", "też", "jak", "czy");
-
     private static final Set<String> NEXT_TOKEN_LEMMAS = Sets.newHashSet();
-//    private static final Set<String> NEXT_TOKEN_LEMMAS = Sets.newHashSet(
-//            "mówić", "ii", "twierdzić", "już", "(", "budzić", "stanowić", "powinien", "do", "stać", "musieć", "stanąć", "móc", "o", "chcieć", "się", "-", "zostać", ":", "?", "i", "na", "z", "mieć", "\"", "to", "w", "nie", "być", ".", ",");
-
     private static final String PREV_TOKEN_LEMMA = "_prev_token_lemma_equal_";
     private static final String NEXT_TOKEN_LEMMA = "_next_token_lemma_equal_";
 
@@ -105,7 +98,10 @@ public class ZeroFeatureExtractor extends FeatureExtractor {
         candidateFeatures.put(getAttributeByName(SCORE), weka.core.Utils.missingValue());
 
         TMention mention = candidate.getZeroCandidateMention();
-        TMention antecedent = candidate.getPreviousSentence().getMentions().stream().filter(ante -> helper.getCoreferentMentions(mention).contains(ante)).findFirst().get();
+        TMention antecedent = candidate.getPreviousSentence().getMentions().stream().filter(ante -> helper.getCoreferentMentions(mention).contains(ante)).findFirst().orElse(null);
+        if (antecedent == null) {
+            throw new IllegalArgumentException("Mention pair without first element!");
+        }
 
         addMentionFeatures(helper, candidateFeatures, mention, CANDIDATE_PREFIX);
         addMentionFeatures(helper, candidateFeatures, antecedent, ANTECEDENT_PREFIX);
@@ -165,14 +161,14 @@ public class ZeroFeatureExtractor extends FeatureExtractor {
         TSentence mentionSentence = helper.getMentionSentence(mention);
         candidateFeatures.put(getAttributeByName(attributePrefix + SENTENCE_MENTION_COUNT), (double) mentionSentence.getMentions().size());
         candidateFeatures.put(getAttributeByName(attributePrefix + SENTENCE_TOKEN_LENGTH), (double) mentionSentence.getTokens().size());
-        candidateFeatures.put(getAttributeByName(attributePrefix + SENTENCE_ENDS_WITH_QUESTION_MARK), toBinary(mentionSentence.getTokens().get(mentionSentence.getTokensSize() - 1).getOrth().equals("?")));
+        candidateFeatures.put(getAttributeByName(attributePrefix + SENTENCE_ENDS_WITH_QUESTION_MARK), toBinary("?".equals(mentionSentence.getTokens().get(mentionSentence.getTokensSize() - 1).getOrth())));
     }
 
     private void addNominalAttributeValue(String value, Map<Attribute, Double> attribute2value, String attributeName) {
         Attribute att = getAttributeByName(attributeName);
         int index = att.indexOfValue(value);
         if (index == -1)
-            LOG.warn(value + "not found for attribute " + attributeName);
+            LOG.warn("{} not found for attribute {}", value, attributeName);
         attribute2value.put(att, (double) (index == -1 ? att.indexOfValue("other") : index));
     }
 }
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/CorpusHelper.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/CorpusHelper.java
index e938576..4688869 100644
--- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/CorpusHelper.java
+++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/CorpusHelper.java
@@ -32,7 +32,7 @@ public class CorpusHelper {
     }
 
     public static List<Summary> getAbstractSummaries(Text text) {
-        return text.getSummaries().getSummary().stream().filter(summary -> !summary.getType().equals(ABSTRACT_SUMMARY_TYPE) && summary.getRatio().equals(SUMMARY_RATIO)).collect(Collectors.toList());
+        return text.getSummaries().getSummary().stream().filter(summary -> summary.getType().equals(ABSTRACT_SUMMARY_TYPE) && summary.getRatio().equals(SUMMARY_RATIO)).collect(Collectors.toList());
     }
 
     public static Set<String> loadTrainTextIds() throws IOException {
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/PathConstants.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/PathConstants.java
index f7c0e1d..d5f76d9 100644
--- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/PathConstants.java
+++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/PathConstants.java
@@ -67,15 +67,19 @@ public class PathConstants {
         }
     }
 
-    public static void downloadFileAndExtract(String url, File targetZipFile, File targetDir) throws IOException, ZipException {
+    public static void downloadFileAndExtract(String url, File targetZipFile, File targetDir) throws IOException {
         downloadFile(url, targetZipFile);
         extractZipFile(targetZipFile, targetDir);
     }
 
-    private static void extractZipFile(File targetZipFile, File targetDir) throws ZipException {
+    private static void extractZipFile(File targetZipFile, File targetDir) throws IOException {
         createFolder(targetDir);
-        ZipFile zipFile = new ZipFile(targetZipFile);
-        zipFile.extractAll(targetDir.getPath());
+        try {
+            ZipFile zipFile = new ZipFile(targetZipFile);
+            zipFile.extractAll(targetDir.getPath());
+        } catch (ZipException e) {
+            throw new IOException(e);
+        }
         LOG.info("Extracted zip file: {} to dir: {}.", targetZipFile, targetDir);
     }
 }
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/eval/search/Crossvalidate.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/eval/search/Crossvalidate.java
index 27bc63a..5fe0270 100644
--- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/eval/search/Crossvalidate.java
+++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/eval/search/Crossvalidate.java
@@ -106,6 +106,10 @@ class Crossvalidate {
             return Pair.of(acc, name);
         }).max(Comparator.comparingDouble(Pair::getLeft));
 
+        printBestResult(watch, max);
+    }
+
+    private static void printBestResult(StopWatch watch, Optional<Pair<Double, String>> max) {
         LOG.info("#########");
         if (max.isPresent()) {
             LOG.info("Best: " + max.get().getRight() + " : " + max.get().getLeft());
@@ -142,13 +146,6 @@ class Crossvalidate {
             return Pair.of(acc, name);
         }).max(Comparator.comparingDouble(Pair::getLeft));
 
-        LOG.info("#########");
-        if (max.isPresent()) {
-            LOG.info("Best: " + max.get().getRight() + " : " + max.get().getLeft());
-        } else {
-            LOG.info("Empty algorithms list");
-        }
-        watch.stop();
-        LOG.info("Elapsed time: {}", watch);
+        printBestResult(watch, max);
     }
 }
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/model/MentionScorer.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/model/MentionScorer.java
index aa341fc..922443d 100644
--- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/model/MentionScorer.java
+++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/model/MentionScorer.java
@@ -22,33 +22,37 @@ public class MentionScorer {
         Multiset<String> tokenCounts = HashMultiset.create(TextUtils.tokenize(optimalSummary.toLowerCase()));
 
         List<TSentence> sentences = text.getParagraphs().stream().flatMap(p -> p.getSentences().stream()).collect(Collectors.toList());
-        Map<TMention, String> mention2Orth = loadMention2OrthExcludingStopwords(sentences);
+        Map<TMention, String> mention2Orth = loadMention2OrthExcludingStoptags(sentences);
 
         return booleanTokenIntersection(mention2Orth, tokenCounts);
     }
 
-    private Map<TMention, String> loadMention2OrthExcludingStopwords(List<TSentence> sentences) {
+    private Map<TMention, String> loadMention2OrthExcludingStoptags(List<TSentence> sentences) {
         Map<TMention, String> mention2orth = Maps.newHashMap();
         for (TSentence sentence : sentences) {
             Map<String, TToken> tokId2tok = sentence.getTokens().stream().collect(Collectors.toMap(TToken::getId, Function.identity()));
 
             for (TMention mention : sentence.getMentions()) {
-                StringBuilder mentionOrth = new StringBuilder();
-                for (String tokId : mention.getChildIds()) {
-                    TToken token = tokId2tok.get(tokId);
-                    if (STOP_POS_TAGS.contains(token.getChosenInterpretation().getCtag()))
-                        continue;
-
-                    if (!token.isNoPrecedingSpace())
-                        mentionOrth.append(" ");
-                    mentionOrth.append(token.getOrth());
-                }
-                mention2orth.put(mention, mentionOrth.toString().trim());
+                mention2orth.put(mention, getMentionOrth(tokId2tok, mention));
             }
         }
         return mention2orth;
     }
 
+    private String getMentionOrth(Map<String, TToken> tokId2tok, TMention mention) {
+        StringBuilder mentionOrth = new StringBuilder();
+        for (String tokId : mention.getChildIds()) {
+            TToken token = tokId2tok.get(tokId);
+            if (STOP_POS_TAGS.contains(token.getChosenInterpretation().getCtag()))
+                continue;
+
+            if (!token.isNoPrecedingSpace())
+                mentionOrth.append(" ");
+            mentionOrth.append(token.getOrth());
+        }
+        return mentionOrth.toString().trim();
+    }
+
     private static Map<TMention, Double> booleanTokenIntersection(Map<TMention, String> mention2Orth, Multiset<String> tokenCounts) {
         Map<TMention, Double> mention2score = Maps.newHashMap();
         for (Map.Entry<TMention, String> entry : mention2Orth.entrySet()) {
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/ExtractGoldSummaries.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/ExtractGoldSummaries.java
index 2b311f0..6985061 100644
--- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/ExtractGoldSummaries.java
+++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/ExtractGoldSummaries.java
@@ -17,7 +17,6 @@ import static pl.waw.ipipan.zil.summ.nicolas.PathConstants.*;
 
 public class ExtractGoldSummaries {
 
-
     private ExtractGoldSummaries() {
     }
 
@@ -28,25 +27,30 @@ public class ExtractGoldSummaries {
         File[] files = EXTRACTED_CORPUS_DATA_DIR.listFiles();
         if (files != null) {
             for (File file : files) {
-                Text text = PSC_IO.readText(file);
+                extractGoldSummariesFromFile(file);
+            }
+        }
+    }
 
-                List<Summary> goldSummaries;
+    private static void extractGoldSummariesFromFile(File file) throws IOException, JAXBException {
+        Text text = PSC_IO.readText(file);
 
-                boolean isTest = CorpusHelper.isTest(text);
-                if (isTest) {
-                    goldSummaries = CorpusHelper.getAbstractSummaries(text);
-                } else {
-                    goldSummaries = CorpusHelper.getExtractSummaries(text);
-                }
+        List<Summary> goldSummaries;
+        File targetDir;
 
-                for (Summary summary : goldSummaries) {
-                    File targetDir = isTest ? GOLD_TEST_SUMMARIES_DIR : GOLD_TRAIN_SUMMARIES_DIR;
-                    File targetFile = new File(targetDir, text.getId() + "_" + summary.getAuthor() + ".txt");
+        boolean isTest = CorpusHelper.isTest(text);
+        if (isTest) {
+            goldSummaries = CorpusHelper.getAbstractSummaries(text);
+            targetDir = GOLD_TEST_SUMMARIES_DIR;
+        } else {
+            goldSummaries = CorpusHelper.getExtractSummaries(text);
+            targetDir = GOLD_TRAIN_SUMMARIES_DIR;
+        }
 
-                    try (OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(targetFile), Constants.ENCODING)) {
-                        writer.append(summary.getBody());
-                    }
-                }
+        for (Summary summary : goldSummaries) {
+            File targetFile = new File(targetDir, text.getId() + "_" + summary.getAuthor() + ".txt");
+            try (OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(targetFile), Constants.ENCODING)) {
+                writer.append(summary.getBody());
             }
         }
     }
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/ExtractMostFrequentMentions.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/ExtractMostFrequentMentions.java
index 44d8bb7..4b24879 100644
--- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/ExtractMostFrequentMentions.java
+++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/ExtractMostFrequentMentions.java
@@ -9,7 +9,6 @@ import pl.waw.ipipan.zil.summ.nicolas.PathConstants;
 import pl.waw.ipipan.zil.summ.nicolas.features.FeatureHelper;
 import pl.waw.ipipan.zil.summ.nicolas.utils.thrift.ThriftUtils;
 
-import javax.xml.bind.JAXBException;
 import java.io.BufferedWriter;
 import java.io.FileWriter;
 import java.io.IOException;
@@ -26,7 +25,7 @@ public class ExtractMostFrequentMentions {
     private ExtractMostFrequentMentions() {
     }
 
-    public static void main(String[] args) throws IOException, JAXBException {
+    public static void main(String[] args) throws IOException {
         List<String> mostFrequentMentionBases = getMostFrequentMentionBases();
         try (BufferedWriter bw = new BufferedWriter(new FileWriter(PathConstants.TARGET_MODEL_DIR + Constants.FREQUENT_BASES_RESOURCE_PATH))) {
             for (String base : mostFrequentMentionBases) {
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/PrepareTrainingData.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/PrepareTrainingData.java
index 9c4f012..6215502 100644
--- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/PrepareTrainingData.java
+++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/PrepareTrainingData.java
@@ -89,7 +89,7 @@ public class PrepareTrainingData {
         }
     }
 
-    private static void prepareSentencesDataset(Map<String, TText> id2preprocessedText, Map<String, String> id2optimalSummary) throws Exception {
+    private static void prepareSentencesDataset(Map<String, TText> id2preprocessedText, Map<String, String> id2optimalSummary) throws IOException {
 
         SentenceScorer sentenceScorer = new SentenceScorer();
         SentenceFeatureExtractor featureExtractor = new SentenceFeatureExtractor();
@@ -97,10 +97,12 @@ public class PrepareTrainingData {
         Instances instances = InstanceUtils.createNewInstances(featureExtractor.getAttributesList());
 
         int i = 1;
-        for (String textId : id2preprocessedText.keySet()) {
+        for (Map.Entry<String, TText> entry : id2preprocessedText.entrySet()) {
             logProgress(id2preprocessedText, i++);
 
-            TText preprocessedText = id2preprocessedText.get(textId);
+            String textId = entry.getKey();
+            TText preprocessedText = entry.getValue();
+
             String optimalSummary = id2optimalSummary.get(textId);
             if (optimalSummary == null)
                 continue;
@@ -110,9 +112,9 @@ public class PrepareTrainingData {
                     = loadGoldGoodMentions(textId, preprocessedText, id2optimalSummary);
 
             Map<TSentence, Instance> sentence2instance = InstanceUtils.extractInstancesFromSentences(preprocessedText, featureExtractor, goodMentions);
-            for (Map.Entry<TSentence, Instance> entry : sentence2instance.entrySet()) {
-                TSentence sentence = entry.getKey();
-                Instance instance = entry.getValue();
+            for (Map.Entry<TSentence, Instance> sentenceInstance : sentence2instance.entrySet()) {
+                TSentence sentence = sentenceInstance.getKey();
+                Instance instance = sentenceInstance.getValue();
                 instance.setDataset(instances);
                 instance.setClassValue(sentence2score.get(sentence));
                 instances.add(instance);
@@ -121,7 +123,7 @@ public class PrepareTrainingData {
         saveInstancesToFile(instances, SENTENCE_ARFF);
     }
 
-    private static Set<TMention> loadGoldGoodMentions(String id, TText text, Map<String, String> id2optimalSummary) throws IOException {
+    private static Set<TMention> loadGoldGoodMentions(String id, TText text, Map<String, String> id2optimalSummary) {
         String optimalSummary = id2optimalSummary.get(id);
 
         MentionScorer scorer = new MentionScorer();
diff --git a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/TrainAllModels.java b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/TrainAllModels.java
index e6d9588..f8b9d7c 100644
--- a/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/TrainAllModels.java
+++ b/nicolas-train/src/main/java/pl/waw/ipipan/zil/summ/nicolas/train/pipeline/TrainAllModels.java
@@ -7,11 +7,10 @@ import pl.waw.ipipan.zil.summ.nicolas.Constants;
 import pl.waw.ipipan.zil.summ.nicolas.train.model.Settings;
 import weka.classifiers.Classifier;
 import weka.core.Instances;
+import weka.core.SerializationHelper;
 import weka.core.converters.ArffLoader;
 
 import java.io.File;
-import java.io.FileOutputStream;
-import java.io.ObjectOutputStream;
 import java.util.logging.LogManager;
 
 import static pl.waw.ipipan.zil.summ.nicolas.PathConstants.*;
@@ -48,10 +47,7 @@ public class TrainAllModels {
 
         String target = TARGET_MODEL_DIR + targetPath;
         LOG.info("Saving classifier at: {}", target);
-        try (ObjectOutputStream oos = new ObjectOutputStream(
-                new FileOutputStream(target))) {
-            oos.writeObject(classifier);
-        }
+        SerializationHelper.write(target, classifier);
 
         watch.stop();
         LOG.info("Elapsed time: {}", watch);
--
libgit2 0.22.2