Commit 2aa5d412d78f1cd31c8f1554c05fe4e3bff9880e
1 parent
62f69e6e
poprawienie morfeusz_analyzer i morfeusz_generator tak, by używały nowszej wersji API
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@224 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
8 changed files
with
45 additions
and
88 deletions
morfeusz/MorfeuszInternal.hpp
| ... | ... | @@ -198,18 +198,6 @@ private: |
| 198 | 198 | mutable InflexionGraph graph; |
| 199 | 199 | }; |
| 200 | 200 | |
| 201 | -//class ResultsIterator { | |
| 202 | -//public: | |
| 203 | -// MorphInterpretation getNext(); | |
| 204 | -// bool hasNext(); | |
| 205 | -// friend class MorfeuszInternal; | |
| 206 | -//private: | |
| 207 | -// ResultsIterator(const std::vector<MorphInterpretation>& res); | |
| 208 | -// const char* rawInput; | |
| 209 | -// std::list<MorphInterpretation> resultsBuffer; | |
| 210 | -// int startNode; | |
| 211 | -//}; | |
| 212 | - | |
| 213 | 201 | } |
| 214 | 202 | |
| 215 | 203 | #endif /* MORFEUSZ_HPP */ |
| ... | ... |
morfeusz/MorphInterpretation.cpp
| ... | ... | @@ -20,7 +20,6 @@ MorphInterpretation::MorphInterpretation( |
| 20 | 20 | int endNode, |
| 21 | 21 | const string& orth, |
| 22 | 22 | const string& lemma, |
| 23 | - // const string& homonymId, | |
| 24 | 23 | int tagnum, |
| 25 | 24 | int namenum, |
| 26 | 25 | int qualifiersNum, |
| ... | ... | @@ -29,29 +28,23 @@ MorphInterpretation::MorphInterpretation( |
| 29 | 28 | endNode(endNode), |
| 30 | 29 | orth(orth), |
| 31 | 30 | lemma(lemma), |
| 32 | -//homonymId(homonymId), | |
| 33 | 31 | tagnum(tagnum), |
| 34 | 32 | namenum(namenum), |
| 35 | -tag(env.getTagset().getTag(tagnum, env.getCharsetConverter())), | |
| 36 | -name(env.getTagset().getName(namenum, env.getCharsetConverter())), | |
| 37 | -qualifiers(&env.getQualifiersHelper().getQualifiers(qualifiersNum)) { | |
| 33 | +qualifiersNum(qualifiersNum), | |
| 34 | +env(&env) { | |
| 38 | 35 | |
| 39 | 36 | |
| 40 | 37 | } |
| 41 | 38 | |
| 42 | -static const vector<std::string> emptyQualifiers; | |
| 43 | - | |
| 44 | 39 | MorphInterpretation::MorphInterpretation() |
| 45 | 40 | : startNode(), |
| 46 | 41 | endNode(), |
| 47 | 42 | orth(), |
| 48 | 43 | lemma(), |
| 49 | -//homonymId(homonymId), | |
| 50 | 44 | tagnum(), |
| 51 | 45 | namenum(), |
| 52 | -tag(), | |
| 53 | -name(), | |
| 54 | -qualifiers(&emptyQualifiers){ | |
| 46 | +qualifiersNum(0), | |
| 47 | +env(NULL) { | |
| 55 | 48 | |
| 56 | 49 | } |
| 57 | 50 | |
| ... | ... | @@ -63,13 +56,10 @@ MorphInterpretation::MorphInterpretation( |
| 63 | 56 | endNode(startNode + 1), |
| 64 | 57 | orth(orth), |
| 65 | 58 | lemma(orth), |
| 66 | -//homonymId(""), | |
| 67 | 59 | tagnum(0), |
| 68 | 60 | namenum(0), |
| 69 | -// qualifiersNum(0), | |
| 70 | -tag(env.getTagset().getTag(0, env.getCharsetConverter())), | |
| 71 | -name(env.getTagset().getName(0, env.getCharsetConverter())), | |
| 72 | -qualifiers(&emptyQualifiers) { | |
| 61 | +qualifiersNum(0), | |
| 62 | +env(&env) { | |
| 73 | 63 | |
| 74 | 64 | } |
| 75 | 65 | |
| ... | ... | @@ -117,16 +107,16 @@ int MorphInterpretation::getNamenum() const { |
| 117 | 107 | return this->namenum; |
| 118 | 108 | } |
| 119 | 109 | |
| 120 | -const std::string& MorphInterpretation::getTag() const { | |
| 121 | - return this->tag; | |
| 110 | +const std::string MorphInterpretation::getTag() const { | |
| 111 | + return env->getTagset().getTag(tagnum, env->getCharsetConverter()); | |
| 122 | 112 | } |
| 123 | 113 | |
| 124 | -const std::string& MorphInterpretation::getName() const { | |
| 125 | - return this->name; | |
| 114 | +const std::string MorphInterpretation::getName() const { | |
| 115 | + return env->getTagset().getName(namenum, env->getCharsetConverter()); | |
| 126 | 116 | } |
| 127 | 117 | |
| 128 | 118 | const vector<string>& MorphInterpretation::getQualifiers() const { |
| 129 | - return *this->qualifiers; | |
| 119 | + return env->getQualifiersHelper().getQualifiers(qualifiersNum); | |
| 130 | 120 | } |
| 131 | 121 | |
| 132 | 122 | static inline string getQualifiersStr(const MorphInterpretation& mi) { |
| ... | ... | @@ -148,16 +138,13 @@ std::string MorphInterpretation::toString(bool includeNodeNumbers) const { |
| 148 | 138 | res << orth << ","; |
| 149 | 139 | |
| 150 | 140 | res << lemma; |
| 151 | -// if (!this->homonymId.empty()) { | |
| 152 | -// res << ":" << homonymId; | |
| 153 | -// } | |
| 154 | 141 | res << ","; |
| 155 | 142 | |
| 156 | - res << tag; | |
| 157 | - if (!name.empty()) { | |
| 158 | - res << "," << name; | |
| 143 | + res << getTag(); | |
| 144 | + if (!getName().empty()) { | |
| 145 | + res << "," << getName(); | |
| 159 | 146 | } |
| 160 | - if (!qualifiers->empty()) { | |
| 147 | + if (!getQualifiers().empty()) { | |
| 161 | 148 | res << "," << getQualifiersStr(*this); |
| 162 | 149 | } |
| 163 | 150 | return res.str(); |
| ... | ... |
morfeusz/cli/cli.cpp
| ... | ... | @@ -140,7 +140,7 @@ static Charset getCharset(const string& encodingStr) { |
| 140 | 140 | } |
| 141 | 141 | } |
| 142 | 142 | |
| 143 | -void initializeMorfeusz(ezOptionParser& opt, MorfeuszInternal& morfeusz, MorfeuszProcessorType processorType) { | |
| 143 | +void initializeMorfeusz(ezOptionParser& opt, Morfeusz& morfeusz, MorfeuszProcessorType processorType) { | |
| 144 | 144 | if (opt.isSet("-i")) { |
| 145 | 145 | string dictFile; |
| 146 | 146 | opt.get("-i")->getString(dictFile); |
| ... | ... |
morfeusz/cli/cli.hpp
| ... | ... | @@ -14,7 +14,9 @@ |
| 14 | 14 | #endif |
| 15 | 15 | |
| 16 | 16 | #include <iostream> |
| 17 | -#include "MorfeuszInternal.hpp" | |
| 17 | +#include "morfeusz2.h" | |
| 18 | +#include "outputUtils.hpp" | |
| 19 | +#include "const.hpp" | |
| 18 | 20 | |
| 19 | 21 | #pragma GCC diagnostic push |
| 20 | 22 | #pragma GCC diagnostic ignored "-Wsign-compare" |
| ... | ... | @@ -33,7 +35,7 @@ |
| 33 | 35 | namespace morfeusz { |
| 34 | 36 | |
| 35 | 37 | ez::ezOptionParser* getOptions(int argc, const char** argv, MorfeuszProcessorType processorType); |
| 36 | - void initializeMorfeusz(ez::ezOptionParser& opt, MorfeuszInternal& morfeusz, MorfeuszProcessorType processorType); | |
| 38 | + void initializeMorfeusz(ez::ezOptionParser& opt, Morfeusz& morfeusz, MorfeuszProcessorType processorType); | |
| 37 | 39 | |
| 38 | 40 | } |
| 39 | 41 | |
| ... | ... |
morfeusz/cli/outputUtils.hpp
morfeusz/morfeusz2.h
| ... | ... | @@ -21,13 +21,13 @@ namespace morfeusz { |
| 21 | 21 | class MorfeuszException; |
| 22 | 22 | |
| 23 | 23 | enum Charset { |
| 24 | - UTF8, | |
| 24 | + UTF8 = 101, | |
| 25 | 25 | // UTF16LE, |
| 26 | 26 | // UTF16BE, |
| 27 | 27 | // UTF32, |
| 28 | - ISO8859_2, | |
| 29 | - CP1250, | |
| 30 | - CP852 | |
| 28 | + ISO8859_2 = 102, | |
| 29 | + CP1250 = 103, | |
| 30 | + CP852 = 104 | |
| 31 | 31 | }; |
| 32 | 32 | |
| 33 | 33 | /** |
| ... | ... | @@ -55,9 +55,6 @@ namespace morfeusz { |
| 55 | 55 | */ |
| 56 | 56 | virtual void setGeneratorDictionary(const std::string& filename) = 0; |
| 57 | 57 | |
| 58 | - /** | |
| 59 | - * Destroys Morfeusz object. | |
| 60 | - */ | |
| 61 | 58 | virtual ~Morfeusz(); |
| 62 | 59 | |
| 63 | 60 | /** |
| ... | ... | @@ -147,8 +144,6 @@ namespace morfeusz { |
| 147 | 144 | * @param debug |
| 148 | 145 | */ |
| 149 | 146 | virtual void setDebug(bool debug) = 0; |
| 150 | - | |
| 151 | - friend class ResultsIterator; | |
| 152 | 147 | }; |
| 153 | 148 | |
| 154 | 149 | class ResultsIterator { |
| ... | ... | @@ -179,17 +174,16 @@ namespace morfeusz { |
| 179 | 174 | const Environment& env); |
| 180 | 175 | MorphInterpretation(); |
| 181 | 176 | static MorphInterpretation createIgn(int startNode, const std::string& orth, const Environment& env); |
| 182 | - // virtual ~MorphInterpretation(); | |
| 177 | + virtual ~MorphInterpretation() {} | |
| 183 | 178 | int getStartNode() const; |
| 184 | 179 | int getEndNode() const; |
| 185 | 180 | const std::string& getOrth() const; |
| 186 | 181 | const std::string& getLemma() const; |
| 187 | - // const std::string& getHomonymId() const; | |
| 188 | 182 | bool hasHomonym(const std::string& homonymId) const; |
| 189 | 183 | int getTagnum() const; |
| 190 | 184 | int getNamenum() const; |
| 191 | - const std::string& getTag() const; | |
| 192 | - const std::string& getName() const; | |
| 185 | + const std::string getTag() const; | |
| 186 | + const std::string getName() const; | |
| 193 | 187 | const std::vector<std::string>& getQualifiers() const; |
| 194 | 188 | |
| 195 | 189 | std::string toString(bool includeNodeNumbers) const; |
| ... | ... | @@ -202,12 +196,14 @@ namespace morfeusz { |
| 202 | 196 | int endNode; |
| 203 | 197 | std::string orth; |
| 204 | 198 | std::string lemma; |
| 205 | - // std::string homonymId; | |
| 206 | 199 | int tagnum; |
| 207 | 200 | int namenum; |
| 208 | - std::string tag; | |
| 209 | - std::string name; | |
| 210 | - const std::vector<std::string>* qualifiers; | |
| 201 | + int qualifiersNum; | |
| 202 | + | |
| 203 | + /** | |
| 204 | + * not owned by this | |
| 205 | + */ | |
| 206 | + const Environment* env; | |
| 211 | 207 | }; |
| 212 | 208 | |
| 213 | 209 | class MorfeuszException : public std::exception { |
| ... | ... | @@ -226,20 +222,11 @@ namespace morfeusz { |
| 226 | 222 | const std::string msg; |
| 227 | 223 | }; |
| 228 | 224 | |
| 229 | - class FileFormatException : public std::exception { | |
| 225 | + class FileFormatException : public MorfeuszException { | |
| 230 | 226 | public: |
| 231 | 227 | |
| 232 | - FileFormatException(const std::string& what) : msg(what.c_str()) { | |
| 233 | - } | |
| 234 | - | |
| 235 | - virtual ~FileFormatException() throw () { | |
| 228 | + FileFormatException(const std::string& what) : MorfeuszException(what) { | |
| 236 | 229 | } |
| 237 | - | |
| 238 | - virtual const char* what() const throw () { | |
| 239 | - return this->msg.c_str(); | |
| 240 | - } | |
| 241 | - private: | |
| 242 | - const std::string msg; | |
| 243 | 230 | }; |
| 244 | 231 | } |
| 245 | 232 | |
| ... | ... |
morfeusz/morfeusz_analyzer.cpp
| ... | ... | @@ -9,14 +9,9 @@ |
| 9 | 9 | #include <iostream> |
| 10 | 10 | #include <vector> |
| 11 | 11 | #include <map> |
| 12 | -#include "fsa/fsa.hpp" | |
| 13 | -#include "Tagset.hpp" | |
| 14 | -#include "MorfeuszInternal.hpp" | |
| 12 | +#include "morfeusz2.h" | |
| 15 | 13 | #include "morfeusz_version.h" |
| 16 | -#include "const.hpp" | |
| 17 | - | |
| 18 | 14 | #include "cli/cli.hpp" |
| 19 | -#include "cli/outputUtils.hpp" | |
| 20 | 15 | |
| 21 | 16 | using namespace std; |
| 22 | 17 | using namespace morfeusz; |
| ... | ... | @@ -24,8 +19,8 @@ using namespace morfeusz; |
| 24 | 19 | int main(int argc, const char** argv) { |
| 25 | 20 | cerr << "Morfeusz analyzer, version: " << MORFEUSZ_VERSION << endl; |
| 26 | 21 | ez::ezOptionParser& opt = *getOptions(argc, argv, ANALYZER); |
| 27 | - MorfeuszInternal morfeusz; | |
| 28 | - initializeMorfeusz(opt, morfeusz, ANALYZER); | |
| 22 | + Morfeusz* morfeusz = Morfeusz::createInstance(); | |
| 23 | + initializeMorfeusz(opt, *morfeusz, ANALYZER); | |
| 29 | 24 | // Morfeusz morfeusz(getMorfeuszFromCLI(argc, argv, "Morfeusz analyzer")); |
| 30 | 25 | |
| 31 | 26 | string line; |
| ... | ... | @@ -33,9 +28,10 @@ int main(int argc, const char** argv) { |
| 33 | 28 | while (getline(cin, line)) { |
| 34 | 29 | // printf("%s\n", line.c_str()); |
| 35 | 30 | res.clear(); |
| 36 | - morfeusz.analyze(line, res); | |
| 31 | + morfeusz->analyze(line, res); | |
| 37 | 32 | printMorphResults(res, true); |
| 38 | 33 | } |
| 34 | + delete morfeusz; | |
| 39 | 35 | printf("\n"); |
| 40 | 36 | delete &opt; |
| 41 | 37 | return 0; |
| ... | ... |
morfeusz/morfeusz_generator.cpp
| ... | ... | @@ -8,13 +8,9 @@ |
| 8 | 8 | #include <cstdlib> |
| 9 | 9 | #include <iostream> |
| 10 | 10 | #include <vector> |
| 11 | -#include "fsa/fsa.hpp" | |
| 12 | -#include "Tagset.hpp" | |
| 13 | -#include "MorfeuszInternal.hpp" | |
| 11 | +#include "morfeusz2.h" | |
| 14 | 12 | #include "morfeusz_version.h" |
| 15 | -#include "const.hpp" | |
| 16 | 13 | #include "cli/cli.hpp" |
| 17 | -#include "cli/outputUtils.hpp" | |
| 18 | 14 | |
| 19 | 15 | using namespace std; |
| 20 | 16 | using namespace morfeusz; |
| ... | ... | @@ -22,13 +18,13 @@ using namespace morfeusz; |
| 22 | 18 | int main(int argc, const char** argv) { |
| 23 | 19 | cerr << "Morfeusz generator, version: " << MORFEUSZ_VERSION << endl; |
| 24 | 20 | ez::ezOptionParser& opt = *getOptions(argc, argv, GENERATOR); |
| 25 | - MorfeuszInternal morfeusz; | |
| 26 | - initializeMorfeusz(opt, morfeusz, GENERATOR); | |
| 21 | + Morfeusz* morfeusz = Morfeusz::createInstance(); | |
| 22 | + initializeMorfeusz(opt, *morfeusz, GENERATOR); | |
| 27 | 23 | string line; |
| 28 | 24 | while (getline(cin, line)) { |
| 29 | 25 | // printf("%s\n", line.c_str()); |
| 30 | 26 | vector<MorphInterpretation> res; |
| 31 | - morfeusz.generate(line, res); | |
| 27 | + morfeusz->generate(line, res); | |
| 32 | 28 | printMorphResults(res, false); |
| 33 | 29 | } |
| 34 | 30 | printf("\n"); |
| ... | ... |