|
1
2
3
4
5
6
7
8
9
10
11
12
|
/*
* File: TestMorfeusz.cpp
* Author: lennyn
*
* Created on Jun 27, 2014, 1:03:19 PM
*/
#include "TestMorfeusz.hpp"
#include <cstdio>
#include <vector>
#include <fstream>
|
|
13
|
#include <stdexcept>
|
|
14
|
#include <iostream>
|
|
15
16
17
18
19
20
21
22
23
24
25
26
27
|
CPPUNIT_TEST_SUITE_REGISTRATION(TestMorfeusz);
using namespace std;
using namespace morfeusz;
TestMorfeusz::TestMorfeusz() {
}
TestMorfeusz::~TestMorfeusz() {
}
void TestMorfeusz::setUp() {
|
|
28
|
cerr << "SET UP" << endl;
|
|
29
|
morfeusz = Morfeusz::createInstance(BOTH_ANALYSE_AND_GENERATE);
|
|
30
|
cerr << "SET UP done" << endl;
|
|
31
32
33
|
}
void TestMorfeusz::tearDown() {
|
|
34
|
cerr << "TEAR DOWN" << endl;
|
|
35
|
// delete morfeusz;
|
|
36
37
38
|
}
void TestMorfeusz::testAnalyzeIterate1() {
|
|
39
|
cerr << "testAnalyzeIterate1" << endl;
|
|
40
|
ResultsIterator* it = morfeusz->analyse("AAAAbbbbCCCC");
|
|
41
|
CPPUNIT_ASSERT(it->hasNext());
|
|
42
43
|
CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->peek().orth);
CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->next().orth);
|
|
44
|
CPPUNIT_ASSERT(!it->hasNext());
|
|
45
46
|
CPPUNIT_ASSERT_THROW(it->peek(), std::out_of_range);
CPPUNIT_ASSERT_THROW(it->next(), std::out_of_range);
|
|
47
48
49
|
delete it;
}
|
|
50
51
|
void TestMorfeusz::testAnalyzeIterateWithWhitespaceHandlingKEEP() {
cerr << "testAnalyzeIterateWithWhitespaceHandlingKEEP" << endl;
|
|
52
|
morfeusz->setWhitespaceHandling(KEEP_WHITESPACES);
|
|
53
|
ResultsIterator* it = morfeusz->analyse(" AAAAbbbbCCCC DDDDeeee.\t");
|
|
54
|
|
|
55
|
CPPUNIT_ASSERT(it->hasNext());
|
|
56
|
CPPUNIT_ASSERT_EQUAL(string(" "), it->next().orth);
|
|
57
|
|
|
58
|
CPPUNIT_ASSERT(it->hasNext());
|
|
59
|
CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), it->next().orth);
|
|
60
|
|
|
61
|
CPPUNIT_ASSERT(it->hasNext());
|
|
62
|
CPPUNIT_ASSERT_EQUAL(string(" "), it->next().orth);
|
|
63
|
|
|
64
|
CPPUNIT_ASSERT(it->hasNext());
|
|
65
|
CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), it->next().orth);
|
|
66
|
|
|
67
|
CPPUNIT_ASSERT(it->hasNext());
|
|
68
|
CPPUNIT_ASSERT_EQUAL(string("."), it->next().orth);
|
|
69
|
|
|
70
|
CPPUNIT_ASSERT(it->hasNext());
|
|
71
|
CPPUNIT_ASSERT_EQUAL(string("\t"), it->next().orth);
|
|
72
|
|
|
73
|
CPPUNIT_ASSERT(!it->hasNext());
|
|
74
75
|
CPPUNIT_ASSERT_THROW(it->peek(), std::out_of_range);
CPPUNIT_ASSERT_THROW(it->next(), std::out_of_range);
|
|
76
77
78
79
80
|
delete it;
}
void TestMorfeusz::testAnalyzeIterateWithWhitespaceHandlingAPPEND() {
cerr << "testAnalyzeIterateWithWhitespaceHandlingAPPEND" << endl;
|
|
81
|
morfeusz->setWhitespaceHandling(APPEND_WHITESPACES);
|
|
82
|
ResultsIterator* it = morfeusz->analyse(" AAAAbbbbCCCC DDDDeeee.\t");
|
|
83
|
|
|
84
|
CPPUNIT_ASSERT(it->hasNext());
|
|
85
|
CPPUNIT_ASSERT_EQUAL(string(" AAAAbbbbCCCC "), it->next().orth);
|
|
86
|
|
|
87
|
CPPUNIT_ASSERT(it->hasNext());
|
|
88
|
CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), it->next().orth);
|
|
89
|
|
|
90
|
CPPUNIT_ASSERT(it->hasNext());
|
|
91
|
CPPUNIT_ASSERT_EQUAL(string(".\t"), it->next().orth);
|
|
92
|
|
|
93
|
CPPUNIT_ASSERT(!it->hasNext());
|
|
94
95
|
CPPUNIT_ASSERT_THROW(it->peek(), std::out_of_range);
CPPUNIT_ASSERT_THROW(it->next(), std::out_of_range);
|
|
96
97
98
|
delete it;
}
|
|
99
|
void TestMorfeusz::testAnalyzeVector1() {
|
|
100
|
cerr << "testAnalyzeVector1" << endl;
|
|
101
|
vector<MorphInterpretation> res;
|
|
102
|
morfeusz->analyse("AAAAbbbbCCCC", res);
|
|
103
|
CPPUNIT_ASSERT_EQUAL((size_t) 1, res.size());
|
|
104
105
|
CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].orth);
CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].lemma);
|
|
106
107
|
}
|
|
108
109
|
static inline string prepareErrorneusDictFile(const string& dictName) {
string filename = dictName + "-a.dict";
|
|
110
|
ofstream out;
|
|
111
|
out.open(filename.c_str());
|
|
112
|
out << "asfasdfa" << endl;
|
|
113
|
return filename;
|
|
114
115
|
}
|
|
116
117
118
119
120
121
122
123
|
void TestMorfeusz::testOpenInvalidDict() {
cerr << "testOpenInvalidDict" << endl;
string dictName = "asdfasdfasdfa";
string filename = prepareErrorneusDictFile(dictName);
morfeusz->dictionarySearchPaths.push_front(".");
cerr << "still alive..." << endl;
CPPUNIT_ASSERT_THROW(morfeusz->setDictionary(dictName), FileFormatException);
remove(filename.c_str());
|
|
124
125
|
}
|
|
126
|
void TestMorfeusz::testOpenNonExistentDict() {
|
|
127
128
|
// cerr << "testOpenNonExistentFile" << endl;
// string filename(tmpnam(NULL));
|
|
129
|
CPPUNIT_ASSERT_THROW(morfeusz->setDictionary("asdfasdfa"), MorfeuszException);
|
|
130
131
132
|
}
void TestMorfeusz::testSetInvalidAgglOption() {
|
|
133
|
cerr << "testSetInvalidAgglOption" << endl;
|
|
134
135
136
137
|
CPPUNIT_ASSERT_THROW(morfeusz->setAggl("asdfasdfa"), MorfeuszException);
}
void TestMorfeusz::testSetInvalidPraetOption() {
|
|
138
|
cerr << "testSetInvalidPraetOption" << endl;
|
|
139
140
141
142
|
CPPUNIT_ASSERT_THROW(morfeusz->setPraet("asdfasdfa"), MorfeuszException);
}
void TestMorfeusz::testWhitespaceHandlingKEEP() {
|
|
143
144
|
cerr << "testWhitespaceHandlingKEEP" << endl;
vector<MorphInterpretation> res;
|
|
145
|
morfeusz->setWhitespaceHandling(KEEP_WHITESPACES);
|
|
146
|
morfeusz->analyse(" AAAAbbbbCCCC DDDDeeee\t", res);
|
|
147
|
CPPUNIT_ASSERT_EQUAL((size_t) 5, res.size());
|
|
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
CPPUNIT_ASSERT_EQUAL(string(" "), res[0].orth);
CPPUNIT_ASSERT_EQUAL(string(" "), res[0].lemma);
CPPUNIT_ASSERT_EQUAL(1, res[0].tagId);
CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[1].orth);
CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[1].lemma);
CPPUNIT_ASSERT_EQUAL(0, res[1].tagId);
CPPUNIT_ASSERT_EQUAL(string(" "), res[2].orth);
CPPUNIT_ASSERT_EQUAL(string(" "), res[2].lemma);
CPPUNIT_ASSERT_EQUAL(1, res[2].tagId);
CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[3].orth);
CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[3].lemma);
CPPUNIT_ASSERT_EQUAL(0, res[3].tagId);
CPPUNIT_ASSERT_EQUAL(string("\t"), res[4].orth);
CPPUNIT_ASSERT_EQUAL(string("\t"), res[4].lemma);
CPPUNIT_ASSERT_EQUAL(1, res[4].tagId);
|
|
163
164
165
|
}
void TestMorfeusz::testWhitespaceHandlingAPPEND() {
|
|
166
167
|
cerr << "testWhitespaceHandlingAPPEND" << endl;
vector<MorphInterpretation> res;
|
|
168
|
morfeusz->setWhitespaceHandling(APPEND_WHITESPACES);
|
|
169
|
morfeusz->analyse(" AAAAbbbbCCCC DDDDeeee\t", res);
|
|
170
|
CPPUNIT_ASSERT_EQUAL((size_t) 2, res.size());
|
|
171
172
173
174
175
176
|
CPPUNIT_ASSERT_EQUAL(string(" AAAAbbbbCCCC "), res[0].orth);
CPPUNIT_ASSERT_EQUAL(string("AAAAbbbbCCCC"), res[0].lemma);
CPPUNIT_ASSERT_EQUAL(0, res[0].tagId);
CPPUNIT_ASSERT_EQUAL(string("DDDDeeee\t"), res[1].orth);
CPPUNIT_ASSERT_EQUAL(string("DDDDeeee"), res[1].lemma);
CPPUNIT_ASSERT_EQUAL(0, res[1].tagId);
|
|
177
|
}
|
|
178
179
180
181
182
183
184
185
186
187
188
189
190
|
void TestMorfeusz::testDefaultWhitespaceHandling() {
CPPUNIT_ASSERT_EQUAL(morfeusz::SKIP_WHITESPACES, morfeusz->getWhitespaceHandling());
}
void TestMorfeusz::testDefaultCaseHandling() {
CPPUNIT_ASSERT_EQUAL(morfeusz::CONDITIONALLY_CASE_SENSITIVE, morfeusz->getCaseHandling());
}
void TestMorfeusz::testDefaultTokenNumbering() {
CPPUNIT_ASSERT_EQUAL(morfeusz::SEPARATE_NUMBERING, morfeusz->getTokenNumbering());
}
|