Commit 2aa5d412d78f1cd31c8f1554c05fe4e3bff9880e
1 parent
62f69e6e
poprawienie morfeusz_analyzer i morfeusz_generator tak, by używały nowszej wersji API
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@224 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
8 changed files
with
45 additions
and
88 deletions
morfeusz/MorfeuszInternal.hpp
... | ... | @@ -198,18 +198,6 @@ private: |
198 | 198 | mutable InflexionGraph graph; |
199 | 199 | }; |
200 | 200 | |
201 | -//class ResultsIterator { | |
202 | -//public: | |
203 | -// MorphInterpretation getNext(); | |
204 | -// bool hasNext(); | |
205 | -// friend class MorfeuszInternal; | |
206 | -//private: | |
207 | -// ResultsIterator(const std::vector<MorphInterpretation>& res); | |
208 | -// const char* rawInput; | |
209 | -// std::list<MorphInterpretation> resultsBuffer; | |
210 | -// int startNode; | |
211 | -//}; | |
212 | - | |
213 | 201 | } |
214 | 202 | |
215 | 203 | #endif /* MORFEUSZ_HPP */ |
... | ... |
morfeusz/MorphInterpretation.cpp
... | ... | @@ -20,7 +20,6 @@ MorphInterpretation::MorphInterpretation( |
20 | 20 | int endNode, |
21 | 21 | const string& orth, |
22 | 22 | const string& lemma, |
23 | - // const string& homonymId, | |
24 | 23 | int tagnum, |
25 | 24 | int namenum, |
26 | 25 | int qualifiersNum, |
... | ... | @@ -29,29 +28,23 @@ MorphInterpretation::MorphInterpretation( |
29 | 28 | endNode(endNode), |
30 | 29 | orth(orth), |
31 | 30 | lemma(lemma), |
32 | -//homonymId(homonymId), | |
33 | 31 | tagnum(tagnum), |
34 | 32 | namenum(namenum), |
35 | -tag(env.getTagset().getTag(tagnum, env.getCharsetConverter())), | |
36 | -name(env.getTagset().getName(namenum, env.getCharsetConverter())), | |
37 | -qualifiers(&env.getQualifiersHelper().getQualifiers(qualifiersNum)) { | |
33 | +qualifiersNum(qualifiersNum), | |
34 | +env(&env) { | |
38 | 35 | |
39 | 36 | |
40 | 37 | } |
41 | 38 | |
42 | -static const vector<std::string> emptyQualifiers; | |
43 | - | |
44 | 39 | MorphInterpretation::MorphInterpretation() |
45 | 40 | : startNode(), |
46 | 41 | endNode(), |
47 | 42 | orth(), |
48 | 43 | lemma(), |
49 | -//homonymId(homonymId), | |
50 | 44 | tagnum(), |
51 | 45 | namenum(), |
52 | -tag(), | |
53 | -name(), | |
54 | -qualifiers(&emptyQualifiers){ | |
46 | +qualifiersNum(0), | |
47 | +env(NULL) { | |
55 | 48 | |
56 | 49 | } |
57 | 50 | |
... | ... | @@ -63,13 +56,10 @@ MorphInterpretation::MorphInterpretation( |
63 | 56 | endNode(startNode + 1), |
64 | 57 | orth(orth), |
65 | 58 | lemma(orth), |
66 | -//homonymId(""), | |
67 | 59 | tagnum(0), |
68 | 60 | namenum(0), |
69 | -// qualifiersNum(0), | |
70 | -tag(env.getTagset().getTag(0, env.getCharsetConverter())), | |
71 | -name(env.getTagset().getName(0, env.getCharsetConverter())), | |
72 | -qualifiers(&emptyQualifiers) { | |
61 | +qualifiersNum(0), | |
62 | +env(&env) { | |
73 | 63 | |
74 | 64 | } |
75 | 65 | |
... | ... | @@ -117,16 +107,16 @@ int MorphInterpretation::getNamenum() const { |
117 | 107 | return this->namenum; |
118 | 108 | } |
119 | 109 | |
120 | -const std::string& MorphInterpretation::getTag() const { | |
121 | - return this->tag; | |
110 | +const std::string MorphInterpretation::getTag() const { | |
111 | + return env->getTagset().getTag(tagnum, env->getCharsetConverter()); | |
122 | 112 | } |
123 | 113 | |
124 | -const std::string& MorphInterpretation::getName() const { | |
125 | - return this->name; | |
114 | +const std::string MorphInterpretation::getName() const { | |
115 | + return env->getTagset().getName(namenum, env->getCharsetConverter()); | |
126 | 116 | } |
127 | 117 | |
128 | 118 | const vector<string>& MorphInterpretation::getQualifiers() const { |
129 | - return *this->qualifiers; | |
119 | + return env->getQualifiersHelper().getQualifiers(qualifiersNum); | |
130 | 120 | } |
131 | 121 | |
132 | 122 | static inline string getQualifiersStr(const MorphInterpretation& mi) { |
... | ... | @@ -148,16 +138,13 @@ std::string MorphInterpretation::toString(bool includeNodeNumbers) const { |
148 | 138 | res << orth << ","; |
149 | 139 | |
150 | 140 | res << lemma; |
151 | -// if (!this->homonymId.empty()) { | |
152 | -// res << ":" << homonymId; | |
153 | -// } | |
154 | 141 | res << ","; |
155 | 142 | |
156 | - res << tag; | |
157 | - if (!name.empty()) { | |
158 | - res << "," << name; | |
143 | + res << getTag(); | |
144 | + if (!getName().empty()) { | |
145 | + res << "," << getName(); | |
159 | 146 | } |
160 | - if (!qualifiers->empty()) { | |
147 | + if (!getQualifiers().empty()) { | |
161 | 148 | res << "," << getQualifiersStr(*this); |
162 | 149 | } |
163 | 150 | return res.str(); |
... | ... |
morfeusz/cli/cli.cpp
... | ... | @@ -140,7 +140,7 @@ static Charset getCharset(const string& encodingStr) { |
140 | 140 | } |
141 | 141 | } |
142 | 142 | |
143 | -void initializeMorfeusz(ezOptionParser& opt, MorfeuszInternal& morfeusz, MorfeuszProcessorType processorType) { | |
143 | +void initializeMorfeusz(ezOptionParser& opt, Morfeusz& morfeusz, MorfeuszProcessorType processorType) { | |
144 | 144 | if (opt.isSet("-i")) { |
145 | 145 | string dictFile; |
146 | 146 | opt.get("-i")->getString(dictFile); |
... | ... |
morfeusz/cli/cli.hpp
... | ... | @@ -14,7 +14,9 @@ |
14 | 14 | #endif |
15 | 15 | |
16 | 16 | #include <iostream> |
17 | -#include "MorfeuszInternal.hpp" | |
17 | +#include "morfeusz2.h" | |
18 | +#include "outputUtils.hpp" | |
19 | +#include "const.hpp" | |
18 | 20 | |
19 | 21 | #pragma GCC diagnostic push |
20 | 22 | #pragma GCC diagnostic ignored "-Wsign-compare" |
... | ... | @@ -33,7 +35,7 @@ |
33 | 35 | namespace morfeusz { |
34 | 36 | |
35 | 37 | ez::ezOptionParser* getOptions(int argc, const char** argv, MorfeuszProcessorType processorType); |
36 | - void initializeMorfeusz(ez::ezOptionParser& opt, MorfeuszInternal& morfeusz, MorfeuszProcessorType processorType); | |
38 | + void initializeMorfeusz(ez::ezOptionParser& opt, Morfeusz& morfeusz, MorfeuszProcessorType processorType); | |
37 | 39 | |
38 | 40 | } |
39 | 41 | |
... | ... |
morfeusz/cli/outputUtils.hpp
morfeusz/morfeusz2.h
... | ... | @@ -21,13 +21,13 @@ namespace morfeusz { |
21 | 21 | class MorfeuszException; |
22 | 22 | |
23 | 23 | enum Charset { |
24 | - UTF8, | |
24 | + UTF8 = 101, | |
25 | 25 | // UTF16LE, |
26 | 26 | // UTF16BE, |
27 | 27 | // UTF32, |
28 | - ISO8859_2, | |
29 | - CP1250, | |
30 | - CP852 | |
28 | + ISO8859_2 = 102, | |
29 | + CP1250 = 103, | |
30 | + CP852 = 104 | |
31 | 31 | }; |
32 | 32 | |
33 | 33 | /** |
... | ... | @@ -55,9 +55,6 @@ namespace morfeusz { |
55 | 55 | */ |
56 | 56 | virtual void setGeneratorDictionary(const std::string& filename) = 0; |
57 | 57 | |
58 | - /** | |
59 | - * Destroys Morfeusz object. | |
60 | - */ | |
61 | 58 | virtual ~Morfeusz(); |
62 | 59 | |
63 | 60 | /** |
... | ... | @@ -147,8 +144,6 @@ namespace morfeusz { |
147 | 144 | * @param debug |
148 | 145 | */ |
149 | 146 | virtual void setDebug(bool debug) = 0; |
150 | - | |
151 | - friend class ResultsIterator; | |
152 | 147 | }; |
153 | 148 | |
154 | 149 | class ResultsIterator { |
... | ... | @@ -179,17 +174,16 @@ namespace morfeusz { |
179 | 174 | const Environment& env); |
180 | 175 | MorphInterpretation(); |
181 | 176 | static MorphInterpretation createIgn(int startNode, const std::string& orth, const Environment& env); |
182 | - // virtual ~MorphInterpretation(); | |
177 | + virtual ~MorphInterpretation() {} | |
183 | 178 | int getStartNode() const; |
184 | 179 | int getEndNode() const; |
185 | 180 | const std::string& getOrth() const; |
186 | 181 | const std::string& getLemma() const; |
187 | - // const std::string& getHomonymId() const; | |
188 | 182 | bool hasHomonym(const std::string& homonymId) const; |
189 | 183 | int getTagnum() const; |
190 | 184 | int getNamenum() const; |
191 | - const std::string& getTag() const; | |
192 | - const std::string& getName() const; | |
185 | + const std::string getTag() const; | |
186 | + const std::string getName() const; | |
193 | 187 | const std::vector<std::string>& getQualifiers() const; |
194 | 188 | |
195 | 189 | std::string toString(bool includeNodeNumbers) const; |
... | ... | @@ -202,12 +196,14 @@ namespace morfeusz { |
202 | 196 | int endNode; |
203 | 197 | std::string orth; |
204 | 198 | std::string lemma; |
205 | - // std::string homonymId; | |
206 | 199 | int tagnum; |
207 | 200 | int namenum; |
208 | - std::string tag; | |
209 | - std::string name; | |
210 | - const std::vector<std::string>* qualifiers; | |
201 | + int qualifiersNum; | |
202 | + | |
203 | + /** | |
204 | + * not owned by this | |
205 | + */ | |
206 | + const Environment* env; | |
211 | 207 | }; |
212 | 208 | |
213 | 209 | class MorfeuszException : public std::exception { |
... | ... | @@ -226,20 +222,11 @@ namespace morfeusz { |
226 | 222 | const std::string msg; |
227 | 223 | }; |
228 | 224 | |
229 | - class FileFormatException : public std::exception { | |
225 | + class FileFormatException : public MorfeuszException { | |
230 | 226 | public: |
231 | 227 | |
232 | - FileFormatException(const std::string& what) : msg(what.c_str()) { | |
233 | - } | |
234 | - | |
235 | - virtual ~FileFormatException() throw () { | |
228 | + FileFormatException(const std::string& what) : MorfeuszException(what) { | |
236 | 229 | } |
237 | - | |
238 | - virtual const char* what() const throw () { | |
239 | - return this->msg.c_str(); | |
240 | - } | |
241 | - private: | |
242 | - const std::string msg; | |
243 | 230 | }; |
244 | 231 | } |
245 | 232 | |
... | ... |
morfeusz/morfeusz_analyzer.cpp
... | ... | @@ -9,14 +9,9 @@ |
9 | 9 | #include <iostream> |
10 | 10 | #include <vector> |
11 | 11 | #include <map> |
12 | -#include "fsa/fsa.hpp" | |
13 | -#include "Tagset.hpp" | |
14 | -#include "MorfeuszInternal.hpp" | |
12 | +#include "morfeusz2.h" | |
15 | 13 | #include "morfeusz_version.h" |
16 | -#include "const.hpp" | |
17 | - | |
18 | 14 | #include "cli/cli.hpp" |
19 | -#include "cli/outputUtils.hpp" | |
20 | 15 | |
21 | 16 | using namespace std; |
22 | 17 | using namespace morfeusz; |
... | ... | @@ -24,8 +19,8 @@ using namespace morfeusz; |
24 | 19 | int main(int argc, const char** argv) { |
25 | 20 | cerr << "Morfeusz analyzer, version: " << MORFEUSZ_VERSION << endl; |
26 | 21 | ez::ezOptionParser& opt = *getOptions(argc, argv, ANALYZER); |
27 | - MorfeuszInternal morfeusz; | |
28 | - initializeMorfeusz(opt, morfeusz, ANALYZER); | |
22 | + Morfeusz* morfeusz = Morfeusz::createInstance(); | |
23 | + initializeMorfeusz(opt, *morfeusz, ANALYZER); | |
29 | 24 | // Morfeusz morfeusz(getMorfeuszFromCLI(argc, argv, "Morfeusz analyzer")); |
30 | 25 | |
31 | 26 | string line; |
... | ... | @@ -33,9 +28,10 @@ int main(int argc, const char** argv) { |
33 | 28 | while (getline(cin, line)) { |
34 | 29 | // printf("%s\n", line.c_str()); |
35 | 30 | res.clear(); |
36 | - morfeusz.analyze(line, res); | |
31 | + morfeusz->analyze(line, res); | |
37 | 32 | printMorphResults(res, true); |
38 | 33 | } |
34 | + delete morfeusz; | |
39 | 35 | printf("\n"); |
40 | 36 | delete &opt; |
41 | 37 | return 0; |
... | ... |
morfeusz/morfeusz_generator.cpp
... | ... | @@ -8,13 +8,9 @@ |
8 | 8 | #include <cstdlib> |
9 | 9 | #include <iostream> |
10 | 10 | #include <vector> |
11 | -#include "fsa/fsa.hpp" | |
12 | -#include "Tagset.hpp" | |
13 | -#include "MorfeuszInternal.hpp" | |
11 | +#include "morfeusz2.h" | |
14 | 12 | #include "morfeusz_version.h" |
15 | -#include "const.hpp" | |
16 | 13 | #include "cli/cli.hpp" |
17 | -#include "cli/outputUtils.hpp" | |
18 | 14 | |
19 | 15 | using namespace std; |
20 | 16 | using namespace morfeusz; |
... | ... | @@ -22,13 +18,13 @@ using namespace morfeusz; |
22 | 18 | int main(int argc, const char** argv) { |
23 | 19 | cerr << "Morfeusz generator, version: " << MORFEUSZ_VERSION << endl; |
24 | 20 | ez::ezOptionParser& opt = *getOptions(argc, argv, GENERATOR); |
25 | - MorfeuszInternal morfeusz; | |
26 | - initializeMorfeusz(opt, morfeusz, GENERATOR); | |
21 | + Morfeusz* morfeusz = Morfeusz::createInstance(); | |
22 | + initializeMorfeusz(opt, *morfeusz, GENERATOR); | |
27 | 23 | string line; |
28 | 24 | while (getline(cin, line)) { |
29 | 25 | // printf("%s\n", line.c_str()); |
30 | 26 | vector<MorphInterpretation> res; |
31 | - morfeusz.generate(line, res); | |
27 | + morfeusz->generate(line, res); | |
32 | 28 | printMorphResults(res, false); |
33 | 29 | } |
34 | 30 | printf("\n"); |
... | ... |