- poprawiona obsługa słowników w morfeuszbuilderze (popoprawiane opcje)

- poprawki testów (by uwzględniały nową obsługę słowników) git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/trunk@256 ff4e3ee1-f430-4e82-ade0-24591c43f1fd

- poprawiona obsługa słowników w morfeuszbuilderze (popoprawiane opcje)
- poprawki testów (by uwzględniały nową obsługę słowników) git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/trunk@256 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Michał Lenart
1 parent 1d174433
Showing 12 changed files with 180 additions and 137 deletions
fsabuilder/morfeusz_builder
morfeusz/CMakeLists.txt
morfeusz/Dictionary.cpp
morfeusz/Dictionary.hpp
morfeusz/Environment.cpp
morfeusz/MorfeuszImpl.cpp
morfeusz/tests/TestMorfeusz.cpp
morfeusz/tests/TestMorfeusz.hpp
morfeusz/wrappers/java/JMorfeuszTest.java
morfeusz/wrappers/morfeusz_java.i
morfeusz/wrappers/morfeusz_python.i
morfeusz/wrappers/python/test.py
@@ -23,8 +23,8 @@ def _checkOption(opt, parser, msg):
         parser.print_help()
         exit(1)
  
-def _checkExactlyOneOptionSet(optsList, parser, msg):
-    if optsList.count(True) != 1:
+def _checkCondition(cond, parser, msg):
+    if not cond:
         print >> sys.stderr, msg
         parser.print_help()
         exit(1)
@@ -42,6 +42,11 @@ def _checkOpen(filename, mode):
         print >> sys.stderr, str(ex)
         exit(1)
  
+def _getDictFilename(opts, isGenerator):
+    typeCode = 's' if isGenerator else 'a'
+    fname = '%s-%s.dict' % (opts.dictName, typeCode)
+    return os.path.join(opts.dictDir, fname)
+
 def _parseOptions():
     """
     Parses commandline args
@@ -53,7 +58,7 @@ def _parseOptions():
                         action='callback',
                         callback=_parseListCallback,
                         metavar='FILES',
-                        help='comma separated list of files')
+                        help='comma separated list of dictionary files')
     parser.add_option('--tagset-file',
                         dest='tagsetFile',
                         metavar='FILE',
@@ -62,37 +67,45 @@ def _parseOptions():
                         dest='segmentsFile',
                         metavar='FILE',
                         help='path to the file with segment rules')
-    parser.add_option('--trim-supneg',
-                        dest='trimSupneg',
-                        default=False,
-                        action='store_true',
-                        help='this option is ignored and exists only for backwards compatibility')
-    parser.add_option('-o', '--output-file',
-                        dest='outputFile',
+    #~ parser.add_option('--trim-supneg',
+                        #~ dest='trimSupneg',
+                        #~ default=False,
+                        #~ action='store_true',
+                        #~ help='this option is ignored and exists only for backwards compatibility')
+    parser.add_option('--dict-name',
+                        dest='dictName',
+                        help='the name of result dictionary')
+    parser.add_option('--dict-dir',
+                        dest='dictDir',
                         metavar='FILE',
-                        help='path to output file')
-    parser.add_option('-a', '--analyzer',
-                        dest='analyzer',
-                        action='store_true',
-                        default=False,
-                        help='Generate FSA for morphological analysis')
-    parser.add_option('-g', '--generator',
-                        dest='generator',
-                        action='store_true',
-                        default=False,
-                        help='Generate FSA for morphological synthesis')
-    parser.add_option('--cpp',
-                        dest='cpp',
+                        default=os.getcwd(),
+                        help='path to output directory (the default is current dir)')
+    parser.add_option('--only-analyzer',
+                        dest='onlyAnalyzer',
                         action='store_true',
                         default=False,
-                        help='Encode binary data in c++ file')
-    parser.add_option('--use-arrays',
-                        dest='useArrays',
+                        help='Generate dictionary for morphological analysis only (default is both analysis and synthesis)')
+    parser.add_option('--only-generator',
+                        dest='onlyGenerator',
                         action='store_true',
                         default=False,
-                        help='store states reachable by 2 transitions in arrays (should speed up recognition, available only when --serialization-method=V1)')
+                        help='Generate dictionary for morphological synthesis only (default is both analysis and synthesis)')
+    parser.add_option('--analyzer-cpp',
+                        dest='analyzerCpp',
+                        metavar='FILE',
+                        help='Encode analyzer dictionary data in given c++ file')
+    parser.add_option('--generator-cpp',
+                        dest='generatorCpp',
+                        metavar='FILE',
+                        help='Encode generator dictionary data in given c++ file')
+    #~ parser.add_option('--use-arrays',
+                        #~ dest='useArrays',
+                        #~ action='store_true',
+                        #~ default=False,
+                        #~ help='store states reachable by 2 transitions in arrays (should speed up recognition, available only when --serialization-method=V1)')
     parser.add_option('--serialization-method',
                         dest='serializationMethod',
+                        default='V1',
                         help="FSA serialization method: \
                         SIMPLE - fixed-length transitions, fastest and weakest compression \
                         V1 - variable-length transitions, compressed labels - strongest compression \
@@ -102,9 +115,12 @@ def _parseOptions():
                         #~ action='store_true', 
                         #~ default=False,
                         #~ help='visualize result')
-    parser.add_option('--train-file',
-                        dest='trainFile',
-                        help='A text file used for training. Should contain words from some large corpus - one word in each line')
+    parser.add_option('--analyzer-train-file',
+                        dest='analyzerTrainFile',
+                        help='A text file used for analyzer training. Should contain words from some large corpus - one word in each line. Resulting analysis automaton should be faster with proper train file.')
+    parser.add_option('--generator-train-file',
+                        dest='generatorTrainFile',
+                        help='A text file used for generator training. Should contain words from some large corpus - one word in each line. Resulting synthesis automaton should be faster with proper train file.')
     parser.add_option('--debug',
                         dest='debug',
                         action='store_true',
@@ -119,22 +135,33 @@ def _parseOptions():
     opts, args = parser.parse_args()
  
     _checkOption(opts.inputFiles, parser, "Input file is missing")
-    _checkOption(opts.outputFile, parser, "Output file is missing")
+    _checkOption(opts.dictDir, parser, "Output dictionary dir is missing")
+    _checkCondition((opts.onlyAnalyzer, opts.onlyGenerator) != (True, True), 
+                              parser, 'Cannot set both --only-analyzer and --only-generator')
+    writeCpp = {opts.analyzerCpp, opts.generatorCpp} != {None}
+    _checkCondition(opts.dictName or writeCpp, parser, "Dictionary name is missing")
+    _checkCondition(opts.onlyGenerator or opts.analyzerCpp or not writeCpp, parser, "Analyzer .cpp output file path is missing")
+    _checkCondition(opts.onlyAnalyzer or opts.generatorCpp or not writeCpp, parser, "Generator .cpp output file path is missing")
+    #~ _checkCondition((opts.dictName, opts.outputCpp) != (None, None), 
+                              #~ parser, 'Must set at least one of: --dict-name, --output-cpp')
+    #~ _checkOption(opts.outputFile, parser, "Output file is missing")
     _checkOption(opts.tagsetFile, parser, "Tagset file is missing")
-    _checkOption(opts.serializationMethod, parser, "Serialization method file is missing")
-    _checkExactlyOneOptionSet([opts.analyzer, opts.generator], 
-                              parser, 'Must set exactly one FSA type: --analyzer or --generator')
+    _checkOption(opts.segmentsFile, parser, "Segmentation file is missing")
+    #~ _checkOption(opts.serializationMethod, parser, "Serialization method file is missing")
+    #~ _checkExactlyOneOptionSet([opts.analyzer, opts.generator], 
+                              #~ parser, 'Must set exactly one FSA type: --analyzer or --generator')
  
     _checkOpen(opts.tagsetFile, 'r')
+    _checkOpen(opts.segmentsFile, 'r')
     for filename in opts.inputFiles:
         _checkOpen(filename, 'r')
-    _checkOpen(opts.outputFile, 'w')
-    _checkOption(opts.segmentsFile, parser, "Segment rules file is missing")
-    if opts.analyzer:
-        _checkOpen(opts.segmentsFile, 'r')
+    if not opts.onlyGenerator:
+        _checkOpen(_getDictFilename(opts, isGenerator=False), 'w')
+    if not opts.onlyAnalyzer:
+        _checkOpen(_getDictFilename(opts, isGenerator=True), 'w')
  
-    if not opts.serializationMethod.upper() in [SerializationMethod.SIMPLE, SerializationMethod.V1, SerializationMethod.V2]:
-        print >> sys.stderr, '--serialization-method must be one of ('+str([SerializationMethod.SIMPLE, SerializationMethod.V1, SerializationMethod.V2])+')'
+    if not opts.serializationMethod.upper() in [SerializationMethod.SIMPLE, SerializationMethod.V1]:
+        print >> sys.stderr, '--serialization-method must be one of ('+str([SerializationMethod.SIMPLE, SerializationMethod.V1])+')'
         parser.print_help()
         exit(1)
  
@@ -211,49 +238,43 @@ def buildGeneratorFromPoliMorf(inputFiles, tagset, segmentRulesManager):
     _printStats(fsa)
     return fsa, encoder.qualifiersMap
  
+def _doBuildDictionaryPart(opts, isGenerator):
+    tagset = Tagset(opts.tagsetFile)
+    rulesParserVersion = rulesParser.RulesParser.PARSE4ANALYZER if not isGenerator else rulesParser.RulesParser.PARSE4GENERATOR
+    segmentRulesManager = rulesParser.RulesParser(tagset, rulesParserVersion).parse(opts.segmentsFile)
+    fsa, qualifiersMap = buildAnalyzerFromPoliMorf(opts.inputFiles, tagset, segmentRulesManager)
+    segmentationRulesData = segmentRulesManager.serialize()
+    
+    if opts.analyzerTrainFile:
+        logging.info('training with '+opts.analyzerTrainFile+' ...')
+        fsa.train(_readTrainData(opts.analyzerTrainFile))
+        logging.info('done training')
+    
+    serializer = Serializer.getSerializer(opts.serializationMethod, fsa, tagset, qualifiersMap, segmentationRulesData)
+    if opts.generatorCpp and isGenerator:
+        serializer.serialize2CppFile(opts.generatorCpp, isGenerator=isGenerator)
+    if opts.analyzerCpp and not isGenerator:
+        serializer.serialize2CppFile(opts.analyzerCpp, isGenerator=isGenerator)
+    
+    if opts.dictDir:
+        serializer.serialize2BinaryFile(_getDictFilename(opts, isGenerator=isGenerator), isGenerator=isGenerator)
+    
+    logging.info('total FSA size (in bytes): '+str(fsa.initialState.reverseOffset))
+
 def main(opts):
     if opts.debug:
         logging.basicConfig(level=logging.DEBUG)
     else:
         logging.basicConfig(level=logging.INFO)
  
-    if opts.analyzer:
-        logging.info('*** building analyzer ***')
-    else:
-        logging.info('*** building generator ***')
-    
     logging.info('reading tagset from %s', opts.tagsetFile)
     tagset = Tagset(opts.tagsetFile)
-    rulesType = rulesParser.RulesParser.PARSE4ANALYZER if opts.analyzer else rulesParser.RulesParser.PARSE4GENERATOR
-    segmentRulesManager = rulesParser.RulesParser(tagset, rulesType).parse(opts.segmentsFile)
-    segmentationRulesData = segmentRulesManager.serialize()
  
-    if opts.analyzer:
-        fsa, qualifiersMap = buildAnalyzerFromPoliMorf(opts.inputFiles, tagset, segmentRulesManager)
-    else:
-        fsa, qualifiersMap = buildGeneratorFromPoliMorf(opts.inputFiles, tagset, segmentRulesManager)
-    if opts.trainFile:
-        logging.info('training with '+opts.trainFile+' ...')
-        fsa.train(_readTrainData(opts.trainFile))
-        logging.info('done training')
-        
-#     serializer = {
-#                   SerializationMethod.SIMPLE: SimpleSerializer,
-#                   SerializationMethod.V1: VLengthSerializer1,
-#                   SerializationMethod.V2: VLengthSerializer2,
-#                   }[opts.serializationMethod](fsa)
-    serializer = Serializer.getSerializer(opts.serializationMethod, fsa, tagset, qualifiersMap, segmentationRulesData)
-    
-    if opts.cpp:
-        serializer.serialize2CppFile(opts.outputFile, isGenerator=opts.generator)
-    else:
-        serializer.serialize2BinaryFile(opts.outputFile, isGenerator=opts.generator)
-    
-    logging.info('total FSA size (in bytes): '+str(fsa.initialState.reverseOffset))
-#     {
-#      OutputFormat.CPP: serializer.serialize2CppFile,
-#      OutputFormat.BINARY: serializer.serialize2BinaryFile
-#      }[opts.outputFormat](opts.outputFile)
+    if not opts.onlyGenerator:
+        _doBuildDictionaryPart(opts, isGenerator=False)
+
+    if not opts.onlyGenerator:
+        _doBuildDictionaryPart(opts, isGenerator=True)
  
 if __name__ == '__main__':
     import os
@@ -6,21 +6,15 @@ if (SKIP_DICTIONARY_BUILDING)
     message ("SKIPPING dictionary building")
 else ()
     add_custom_command (
-            OUTPUT "${ANALYZER_DICTIONARY_CPP}"
-            COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --analyzer --input-files="${INPUT_DICTIONARIES}" -o "${ANALYZER_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1
-            DEPENDS "${INPUT_DICTIONARY}"
-            COMMENT "Building default dictionary C++ file"
-    )
-    add_custom_command (
-            OUTPUT "${GENERATOR_DICTIONARY_CPP}"
-            COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --generator --input-files="${INPUT_DICTIONARIES}" -o "${GENERATOR_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1
+            OUTPUT "${ANALYZER_DICTIONARY_CPP}" "${GENERATOR_DICTIONARY_CPP}"
+            COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --input-files="${INPUT_DICTIONARIES}" --analyzer-cpp="${ANALYZER_DICTIONARY_CPP}" --generator-cpp="${GENERATOR_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}"
             DEPENDS "${INPUT_DICTIONARY}"
             COMMENT "Building default dictionary C++ file"
     )
 endif()
  
-add_custom_target ( analyzer-dictionary DEPENDS "${INPUT_DICTIONARY_CPP}")
-add_custom_target ( generator-dictionary DEPENDS "${INPUT_SYNTH_DICTIONARY_CPP}")
+add_custom_target ( analyzer-dictionary DEPENDS "${ANALYZER_DICTIONARY_CPP}")
+add_custom_target ( generator-dictionary DEPENDS "${GENERATOR_DICTIONARY_CPP}")
 add_custom_target ( dictionaries DEPENDS analyzer-dictionary generator-dictionary)
  
 include_directories( ${CMAKE_CURRENT_SOURCE_DIR} )
@@ -33,8 +33,8 @@ namespace morfeusz {
     }
  
     Dictionary::Dictionary(const unsigned char* fsaFileStartPtr, MorfeuszProcessorType processorType)
-    : idResolver(fsaFileStartPtr, &UTF8CharsetConverter::getInstance()),
-    fsa(FSAType::getFSA(fsaFileStartPtr, initializeDeserializer(processorType))),
+    : fsa(FSAType::getFSA(fsaFileStartPtr, initializeDeserializer(processorType))),
+    idResolver(fsaFileStartPtr, &UTF8CharsetConverter::getInstance()),
     separatorsList(getSeparatorsList(fsaFileStartPtr)),
     segrulesFSAsMap(createSegrulesFSAsMap(fsaFileStartPtr)),
     defaultSegrulesOptions(getDefaultSegrulesOptions(fsaFileStartPtr)),
@@ -24,9 +24,9 @@ namespace morfeusz {
  
     struct Dictionary {
         Dictionary(const unsigned char* ptr, MorfeuszProcessorType processorType);
-
-        IdResolverImpl idResolver;
+        
         FSAType* fsa;
+        IdResolverImpl idResolver;
         std::vector<uint32_t> separatorsList;
         std::map<SegrulesOptions, SegrulesFSA*> segrulesFSAsMap;
         SegrulesOptions defaultSegrulesOptions;
@@ -173,8 +173,8 @@ namespace morfeusz {
     }
  
     void Environment::setDictionary(const std::string& dictName) {
+        cerr << "SETTING DICT: " << dictName << endl;
         this->dictionary = DictionariesRepository::instance.getDictionary(dictName, this->processorType);
-
         idResolver = dictionary->idResolver;
         this->idResolver.setCharsetConverter(currentCharsetConverter);
         currSegrulesOptions = dictionary->defaultSegrulesOptions;
@@ -156,14 +156,6 @@ namespace morfeusz {
         return getAnyEnvironment().getAvailablePraetOptions();
     }
  
-    //    void MorfeuszImpl::setAnalyzerDictionary(const string& filename) {
-    //        this->analyzerEnv.setDictionaryFile(filename);
-    //    }
-    //
-    //    void MorfeuszImpl::setGeneratorDictionary(const string& filename) {
-    //        this->generatorEnv.setDictionaryFile(filename);
-    //    }
-
     MorfeuszImpl::~MorfeuszImpl() {
     }
  
@@ -101,27 +101,28 @@ void TestMorfeusz::testAnalyzeVector1() {
     CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].lemma);
 }
  
-static inline string prepareErrorneusTmpFile() {
-    char* filename = tmpnam(NULL);
+static inline string prepareErrorneusDictFile(const string& dictName) {
+    string filename = dictName + "-a.dict";
     ofstream out;
-    out.open(filename);
+    out.open(filename.c_str());
     out << "asfasdfa" << endl;
-    out.close();
-    return string(filename);
+    return filename;
 }
  
-void TestMorfeusz::testOpenInvalidFile() {
-    CPPUNIT_FAIL("not implemented yet");
-//    cerr << "testOpenInvalidFile" << endl;
-//    string filename(prepareErrorneusTmpFile());
-//    CPPUNIT_ASSERT_THROW(morfeusz->setAnalyzerDictionary(filename), FileFormatException);
+void TestMorfeusz::testOpenInvalidDict() {
+    cerr << "testOpenInvalidDict" << endl;
+    string dictName = "asdfasdfasdfa";
+    string filename = prepareErrorneusDictFile(dictName);
+    morfeusz->dictionarySearchPaths.push_front(".");
+    cerr << "still alive..." << endl;
+    CPPUNIT_ASSERT_THROW(morfeusz->setDictionary(dictName), FileFormatException);
+    remove(filename.c_str());
 }
  
-void TestMorfeusz::testOpenNonExistentFile() {
-    CPPUNIT_FAIL("not implemented yet");
+void TestMorfeusz::testOpenNonExistentDict() {
 //    cerr << "testOpenNonExistentFile" << endl;
 //    string filename(tmpnam(NULL));
-//    CPPUNIT_ASSERT_THROW(morfeusz->setAnalyzerDictionary(filename), std::ios_base::failure);
+    CPPUNIT_ASSERT_THROW(morfeusz->setDictionary("asdfasdfa"), MorfeuszException);
 }
  
 void TestMorfeusz::testSetInvalidAgglOption() {
@@ -19,8 +19,8 @@ class TestMorfeusz : public CPPUNIT_NS::TestFixture {
     CPPUNIT_TEST(testAnalyzeIterateWithWhitespaceHandlingKEEP);
     CPPUNIT_TEST(testAnalyzeIterateWithWhitespaceHandlingAPPEND);
     CPPUNIT_TEST(testAnalyzeVector1);
-    CPPUNIT_TEST(testOpenInvalidFile);
-    CPPUNIT_TEST(testOpenNonExistentFile);
+    CPPUNIT_TEST(testOpenInvalidDict);
+    CPPUNIT_TEST(testOpenNonExistentDict);
     CPPUNIT_TEST(testSetInvalidAgglOption);
     CPPUNIT_TEST(testSetInvalidPraetOption);
     CPPUNIT_TEST(testWhitespaceHandlingKEEP);
@@ -39,8 +39,8 @@ private:
     void testAnalyzeIterateWithWhitespaceHandlingKEEP();
     void testAnalyzeIterateWithWhitespaceHandlingAPPEND();
     void testAnalyzeVector1();
-    void testOpenInvalidFile();
-    void testOpenNonExistentFile();
+    void testOpenInvalidDict();
+    void testOpenNonExistentDict();
     void testSetInvalidAgglOption();
     void testSetInvalidPraetOption();
     void testWhitespaceHandlingKEEP();
  
 import java.io.File;
 import java.io.IOException;
+import java.io.PrintStream;
 import java.util.List;
 import java.util.NoSuchElementException;
 import org.junit.After;
@@ -52,12 +53,11 @@ public class JMorfeuszTest {
         try {
             res.get(2);
             fail();
-        }
-        catch (IndexOutOfBoundsException ex) {
-            
+        } catch (IndexOutOfBoundsException ex) {
+
         }
     }
-    
+
     @Test
     public void testAnalyzeAsIterator() {
         ResultsIterator it = morfeusz.analyseAsIterator("Aaaa żżżż");
@@ -68,9 +68,8 @@ public class JMorfeuszTest {
         try {
             it.next();
             fail();
-        }
-        catch (NoSuchElementException ex) {
-            
+        } catch (NoSuchElementException ex) {
+
         }
     }
  
@@ -78,7 +77,7 @@ public class JMorfeuszTest {
     public void testInvalidAgglOption() {
         morfeusz.setAggl("XXXXYYYYZZZZ");
     }
-    
+
     @Test(expected = MorfeuszException.class)
     public void testInvalidPraetOption() {
         morfeusz.setPraet("XXXXYYYYZZZZ");
@@ -94,18 +93,21 @@ public class JMorfeuszTest {
         morfeusz.setCaseHandling(null);
     }
  
-    @Test(expected = IOException.class)
-    public void testNonExistingDictionaryFile() throws IOException {
-        fail("not implemented yet");
-//        File tmpFile = File.createTempFile("morfeusz_invalid_dict", ".test");
-//        tmpFile.delete();
-//        morfeusz.setGeneratorDictionary(tmpFile.getAbsolutePath());
+    @Test(expected = MorfeuszException.class)
+    public void testNonExistingDictionary() throws IOException {
+        morfeusz.setDictionary("ee2rmtsq");
     }
  
     @Test(expected = IOException.class)
-    public void testInvalidDictionaryFile() throws IOException {
-        fail("not implemented yet");
-//        File tmpFile = File.createTempFile("morfeusz_invalid_dict", ".test");
-//        morfeusz.setGeneratorDictionary(tmpFile.getAbsolutePath());
+    public void testInvalidDictionary() throws Exception {
+        String dictName = "6J1vMiqY";
+        File tmpFile = new File(dictName + "-a.dict");
+        assertTrue(tmpFile.createNewFile());
+        tmpFile.deleteOnExit();
+        try (PrintStream out = new PrintStream(tmpFile)) {
+            out.print("IzEne9FXuc");
+        }
+        morfeusz.getDictionarySearchPaths().add(0, ".");
+        morfeusz.setDictionary(dictName);
     }
 }
@@ -147,11 +147,21 @@ import java.util.ArrayList;
         jenv->ThrowNew(clazz, "Invalid file format");
         return $null;
     }
+    catch(morfeusz::MorfeuszException & e) {
+        jclass clazz = jenv->FindClass("pl/waw/ipipan/morfeusz/MorfeuszException");
+        jenv->ThrowNew(clazz, e.what());
+        return $null;
+    }
     catch(std::ios_base::failure & e) {
         jclass clazz = jenv->FindClass("java/io/IOException");
         jenv->ThrowNew(clazz, e.what());
         return $null;
     }
+    catch(...) {
+        jclass clazz = jenv->FindClass("java/lang/RuntimeException");
+        jenv->ThrowNew(clazz, "Unknown exception");
+        return $null;
+    }
 }
  
 //%javaexception("java.io.IOException") morfeusz::Morfeusz::setGeneratorDictionary {
@@ -23,6 +23,27 @@
     }
 }
  
+%exception morfeusz::Morfeusz::setDictionary {
+    try{
+        $action
+    }
+    catch(const std::ios_base::failure& e) {
+        SWIG_exception(SWIG_IOError, const_cast<char*>(e.what()));
+    }
+    catch(const morfeusz::MorfeuszException& e) {
+        SWIG_exception(SWIG_IOError, const_cast<char*>(e.what()));
+    }
+    catch(const std::invalid_argument& e) {
+        SWIG_exception(SWIG_ValueError, const_cast<char*>(e.what()));
+    }
+    catch(const std::string& e) {
+        SWIG_exception(SWIG_RuntimeError, const_cast<char*>(e.c_str()));
+    }
+    catch(...) {
+        SWIG_exception(SWIG_RuntimeError, "Unknown exception");
+    }
+}
+
 %ignore morfeusz::MorfeuszException;
 %ignore morfeusz::FileFormatException;
  
@@ -76,18 +76,20 @@ class TestSequenceFunctions(unittest.TestCase):
             pass
  
     def testNonExistingDictionaryFile(self):
-        _, path = tempfile.mkstemp()
-        os.remove(path)
         try:
-            self.morfeusz.setGeneratorDictionary(path)
+            self.morfeusz.setDictionary("1P4sEBuWv")
             self.fail()
         except IOError:
             pass
  
     def testInvalidDictionaryFile(self):
-        _, path = tempfile.mkstemp()
+        dirpath = tempfile.mkdtemp()
+        dictName = '6J1vMiqY'
+        path = os.path.join(dirpath, dictName + '-a.dict')
+        with open(path, "a+") as f:
+            f.write('ee2rmtsq')
         try:
-            self.morfeusz.setGeneratorDictionary(path)
+            self.morfeusz.setDictionary(dictName)
             self.fail()
         except IOError:
             pass