Blame view

morfeusz/tests/TestMorfeusz.cpp 6.42 KB
Michał Lenart authored
1
2
3
4
5
6
7
8
9
10
11
12
/*
 * File:   TestMorfeusz.cpp
 * Author: lennyn
 *
 * Created on Jun 27, 2014, 1:03:19 PM
 */

#include "TestMorfeusz.hpp"

#include <cstdio>
#include <vector>
#include <fstream>
Michał Lenart authored
13
#include <stdexcept>
Michał Lenart authored
14
#include <iostream>
Michał Lenart authored
15
16
17
18
19
20
21
22
23
24
25
26
27

CPPUNIT_TEST_SUITE_REGISTRATION(TestMorfeusz);

using namespace std;
using namespace morfeusz;

TestMorfeusz::TestMorfeusz() {
}

TestMorfeusz::~TestMorfeusz() {
}

void TestMorfeusz::setUp() {
Michał Lenart authored
28
    cerr << "SET UP" << endl;
Michał Lenart authored
29
    morfeusz = Morfeusz::createInstance(BOTH_ANALYSE_AND_GENERATE);
Michał Lenart authored
30
    cerr << "SET UP done" << endl;
Michał Lenart authored
31
32
33
}

void TestMorfeusz::tearDown() {
Michał Lenart authored
34
    cerr << "TEAR DOWN" << endl;
Michał Lenart authored
35
    //    delete morfeusz;
Michał Lenart authored
36
37
38
}

void TestMorfeusz::testAnalyzeIterate1() {
Michał Lenart authored
39
    cerr << "testAnalyzeIterate1" << endl;
Michał Lenart authored
40
    ResultsIterator* it = morfeusz->analyse("AAAAbbbbCCCC");
Michał Lenart authored
41
    CPPUNIT_ASSERT(it->hasNext());
Michał Lenart authored
42
43
    CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->peek().orth);
    CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->next().orth);
Michał Lenart authored
44
    CPPUNIT_ASSERT(!it->hasNext());
Michał Lenart authored
45
46
    CPPUNIT_ASSERT_THROW(it->peek(), std::out_of_range);
    CPPUNIT_ASSERT_THROW(it->next(), std::out_of_range);
Michał Lenart authored
47
48
49
    delete it;
}
Michał Lenart authored
50
51
void TestMorfeusz::testAnalyzeIterateWithWhitespaceHandlingKEEP() {
    cerr << "testAnalyzeIterateWithWhitespaceHandlingKEEP" << endl;
Michał Lenart authored
52
    morfeusz->setWhitespaceHandling(KEEP_WHITESPACES);
Michał Lenart authored
53
    ResultsIterator* it = morfeusz->analyse(" AAAAbbbbCCCC  DDDDeeee.\t");
Michał Lenart authored
54
Michał Lenart authored
55
    CPPUNIT_ASSERT(it->hasNext());
Michał Lenart authored
56
    CPPUNIT_ASSERT_EQUAL(string(" "), it->next().orth);
Michał Lenart authored
57
Michał Lenart authored
58
    CPPUNIT_ASSERT(it->hasNext());
Michał Lenart authored
59
    CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->next().orth);
Michał Lenart authored
60
Michał Lenart authored
61
    CPPUNIT_ASSERT(it->hasNext());
Michał Lenart authored
62
    CPPUNIT_ASSERT_EQUAL(string("  "), it->next().orth);
Michał Lenart authored
63
Michał Lenart authored
64
    CPPUNIT_ASSERT(it->hasNext());
Michał Lenart authored
65
    CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), it->next().orth);
Michał Lenart authored
66
Michał Lenart authored
67
    CPPUNIT_ASSERT(it->hasNext());
Michał Lenart authored
68
    CPPUNIT_ASSERT_EQUAL(string("."), it->next().orth);
Michał Lenart authored
69
Michał Lenart authored
70
    CPPUNIT_ASSERT(it->hasNext());
Michał Lenart authored
71
    CPPUNIT_ASSERT_EQUAL(string("\t"), it->next().orth);
Michał Lenart authored
72
Michał Lenart authored
73
    CPPUNIT_ASSERT(!it->hasNext());
Michał Lenart authored
74
75
    CPPUNIT_ASSERT_THROW(it->peek(), std::out_of_range);
    CPPUNIT_ASSERT_THROW(it->next(), std::out_of_range);
Michał Lenart authored
76
77
78
79
80
    delete it;
}

void TestMorfeusz::testAnalyzeIterateWithWhitespaceHandlingAPPEND() {
    cerr << "testAnalyzeIterateWithWhitespaceHandlingAPPEND" << endl;
Michał Lenart authored
81
    morfeusz->setWhitespaceHandling(APPEND_WHITESPACES);
Michał Lenart authored
82
    ResultsIterator* it = morfeusz->analyse(" AAAAbbbbCCCC  DDDDeeee.\t");
Michał Lenart authored
83
Michał Lenart authored
84
    CPPUNIT_ASSERT(it->hasNext());
Michał Lenart authored
85
    CPPUNIT_ASSERT_EQUAL(string(" AAAAbbbbCCCC  "), it->next().orth);
Michał Lenart authored
86
Michał Lenart authored
87
    CPPUNIT_ASSERT(it->hasNext());
Michał Lenart authored
88
    CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), it->next().orth);
Michał Lenart authored
89
Michał Lenart authored
90
    CPPUNIT_ASSERT(it->hasNext());
Michał Lenart authored
91
    CPPUNIT_ASSERT_EQUAL(string(".\t"), it->next().orth);
Michał Lenart authored
92
Michał Lenart authored
93
    CPPUNIT_ASSERT(!it->hasNext());
Michał Lenart authored
94
95
    CPPUNIT_ASSERT_THROW(it->peek(), std::out_of_range);
    CPPUNIT_ASSERT_THROW(it->next(), std::out_of_range);
Michał Lenart authored
96
97
98
    delete it;
}
Michał Lenart authored
99
void TestMorfeusz::testAnalyzeVector1() {
Michał Lenart authored
100
    cerr << "testAnalyzeVector1" << endl;
Michał Lenart authored
101
    vector<MorphInterpretation> res;
Michał Lenart authored
102
    morfeusz->analyse("AAAAbbbbCCCC", res);
Michał Lenart authored
103
    CPPUNIT_ASSERT_EQUAL((size_t) 1, res.size());
Michał Lenart authored
104
105
    CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].orth);
    CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].lemma);
Michał Lenart authored
106
107
}
Michał Lenart authored
108
109
static inline string prepareErrorneusDictFile(const string& dictName) {
    string filename = dictName + "-a.dict";
Michał Lenart authored
110
    ofstream out;
Michał Lenart authored
111
    out.open(filename.c_str());
Michał Lenart authored
112
    out << "asfasdfa" << endl;
Michał Lenart authored
113
    return filename;
Michał Lenart authored
114
115
}
Michał Lenart authored
116
117
118
119
120
121
122
123
void TestMorfeusz::testOpenInvalidDict() {
    cerr << "testOpenInvalidDict" << endl;
    string dictName = "asdfasdfasdfa";
    string filename = prepareErrorneusDictFile(dictName);
    morfeusz->dictionarySearchPaths.push_front(".");
    cerr << "still alive..." << endl;
    CPPUNIT_ASSERT_THROW(morfeusz->setDictionary(dictName), FileFormatException);
    remove(filename.c_str());
Michał Lenart authored
124
125
}
Michał Lenart authored
126
void TestMorfeusz::testOpenNonExistentDict() {
Michał Lenart authored
127
128
    //    cerr << "testOpenNonExistentFile" << endl;
    //    string filename(tmpnam(NULL));
Michał Lenart authored
129
    CPPUNIT_ASSERT_THROW(morfeusz->setDictionary("asdfasdfa"), MorfeuszException);
Michał Lenart authored
130
131
132
}

void TestMorfeusz::testSetInvalidAgglOption() {
Michał Lenart authored
133
    cerr << "testSetInvalidAgglOption" << endl;
Michał Lenart authored
134
135
136
137
    CPPUNIT_ASSERT_THROW(morfeusz->setAggl("asdfasdfa"), MorfeuszException);
}

void TestMorfeusz::testSetInvalidPraetOption() {
Michał Lenart authored
138
    cerr << "testSetInvalidPraetOption" << endl;
Michał Lenart authored
139
140
141
142
    CPPUNIT_ASSERT_THROW(morfeusz->setPraet("asdfasdfa"), MorfeuszException);
}

void TestMorfeusz::testWhitespaceHandlingKEEP() {
Michał Lenart authored
143
144
    cerr << "testWhitespaceHandlingKEEP" << endl;
    vector<MorphInterpretation> res;
Michał Lenart authored
145
    morfeusz->setWhitespaceHandling(KEEP_WHITESPACES);
Michał Lenart authored
146
    morfeusz->analyse("  AAAAbbbbCCCC DDDDeeee\t", res);
Michał Lenart authored
147
    CPPUNIT_ASSERT_EQUAL((size_t) 5, res.size());
Michał Lenart authored
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
    CPPUNIT_ASSERT_EQUAL(string("  "), res[0].orth);
    CPPUNIT_ASSERT_EQUAL(string("  "), res[0].lemma);
    CPPUNIT_ASSERT_EQUAL(1, res[0].tagId);
    CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[1].orth);
    CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[1].lemma);
    CPPUNIT_ASSERT_EQUAL(0, res[1].tagId);
    CPPUNIT_ASSERT_EQUAL(string(" "), res[2].orth);
    CPPUNIT_ASSERT_EQUAL(string(" "), res[2].lemma);
    CPPUNIT_ASSERT_EQUAL(1, res[2].tagId);
    CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[3].orth);
    CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[3].lemma);
    CPPUNIT_ASSERT_EQUAL(0, res[3].tagId);
    CPPUNIT_ASSERT_EQUAL(string("\t"), res[4].orth);
    CPPUNIT_ASSERT_EQUAL(string("\t"), res[4].lemma);
    CPPUNIT_ASSERT_EQUAL(1, res[4].tagId);
Michał Lenart authored
163
164
165
}

void TestMorfeusz::testWhitespaceHandlingAPPEND() {
Michał Lenart authored
166
167
    cerr << "testWhitespaceHandlingAPPEND" << endl;
    vector<MorphInterpretation> res;
Michał Lenart authored
168
    morfeusz->setWhitespaceHandling(APPEND_WHITESPACES);
Michał Lenart authored
169
    morfeusz->analyse("  AAAAbbbbCCCC DDDDeeee\t", res);
Michał Lenart authored
170
    CPPUNIT_ASSERT_EQUAL((size_t) 2, res.size());
Michał Lenart authored
171
172
173
174
175
176
    CPPUNIT_ASSERT_EQUAL(string("  AAAAbbbbCCCC "), res[0].orth);
    CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].lemma);
    CPPUNIT_ASSERT_EQUAL(0, res[0].tagId);
    CPPUNIT_ASSERT_EQUAL(string("DDDDeeee\t"), res[1].orth);
    CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[1].lemma);
    CPPUNIT_ASSERT_EQUAL(0, res[1].tagId);
Michał Lenart authored
177
}
Michał Lenart authored
178
179
180
181
182
183
184
185
186
187
188
189
190

void TestMorfeusz::testDefaultWhitespaceHandling() {
    CPPUNIT_ASSERT_EQUAL(morfeusz::SKIP_WHITESPACES, morfeusz->getWhitespaceHandling());
}

void TestMorfeusz::testDefaultCaseHandling() {
    CPPUNIT_ASSERT_EQUAL(morfeusz::CONDITIONALLY_CASE_SENSITIVE, morfeusz->getCaseHandling());

}
void TestMorfeusz::testDefaultTokenNumbering() {
    CPPUNIT_ASSERT_EQUAL(morfeusz::SEPARATE_NUMBERING, morfeusz->getTokenNumbering());

}