Commit 58aafafe36f62bfa9e6b785ad28a1ea4c9042b24
1 parent
612cbdc9
- trochę refaktoryzacji, zrobienie klasy MorphInterpretation będącej krawędzią w grafie fleksyjnym
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@18 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
10 changed files
with
172 additions
and
180 deletions
morfeusz/CMakeLists.txt
... | ... | @@ -6,7 +6,7 @@ |
6 | 6 | include_directories (${Morfeusz_SOURCE_DIR}/fsa) |
7 | 7 | add_library (morfeusz2 morfeusz.hpp morfeusz.cpp) |
8 | 8 | add_executable (morfeusz2_analyze main.cpp) |
9 | -add_executable (test_morph test_morph.cpp interpretations.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp) | |
9 | +add_executable (test_morph test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp) | |
10 | 10 | |
11 | 11 | # Link the executable to the Hello library. |
12 | 12 | target_link_libraries (morfeusz2_analyze morfeusz2) |
... | ... |
morfeusz/EncodedInterpretation.hpp
0 → 100644
1 | +/* | |
2 | + * File: interpretation.hpp | |
3 | + * Author: mlenart | |
4 | + * | |
5 | + * Created on November 4, 2013, 3:11 PM | |
6 | + */ | |
7 | + | |
8 | +#ifndef INTERPRETATION_HPP | |
9 | +#define INTERPRETATION_HPP | |
10 | + | |
11 | +#include <string> | |
12 | +#include <sstream> | |
13 | +#include <iterator> | |
14 | +#include "Tagset.hpp" | |
15 | + | |
16 | +using namespace std; | |
17 | + | |
18 | +struct EncodedLemma { | |
19 | + int suffixToCut; | |
20 | + string suffixToAdd; | |
21 | +}; | |
22 | + | |
23 | +/* | |
24 | + * Internal representation of an interpretation - with lemma encoded | |
25 | + */ | |
26 | +struct EncodedInterpretation { | |
27 | + EncodedLemma lemma; | |
28 | + int tag; | |
29 | + int nameClassifier; | |
30 | +}; | |
31 | + | |
32 | +#endif /* INTERPRETATION_HPP */ | |
... | ... |
morfeusz/Morfeusz.hpp
morfeusz/MorphDeserializer.hpp
morfeusz/MorphInterpretation.cpp
0 → 100644
1 | +/* | |
2 | + * File: MorphInterpretation.cpp | |
3 | + * Author: mlenart | |
4 | + * | |
5 | + * Created on November 14, 2013, 11:47 AM | |
6 | + */ | |
7 | + | |
8 | +#include <string> | |
9 | +#include "MorphInterpretation.hpp" | |
10 | +#include "EncodedInterpretation.hpp" | |
11 | + | |
12 | +using namespace std; | |
13 | + | |
14 | +static string convertLemma( | |
15 | + const string& orth, | |
16 | + const EncodedLemma& lemma) { | |
17 | + string res(orth); | |
18 | + res.erase( | |
19 | + res.end() - lemma.suffixToCut, | |
20 | + res.end()); | |
21 | + res.append(lemma.suffixToAdd); | |
22 | + return res; | |
23 | +} | |
24 | + | |
25 | +MorphInterpretation::MorphInterpretation( | |
26 | + int startNode, | |
27 | + int endNode, | |
28 | + const std::string& orth, | |
29 | + const EncodedInterpretation& encodedInterp, | |
30 | + const Tagset& tagset) | |
31 | +: startNode(startNode), | |
32 | + endNode(endNode), | |
33 | + orth(orth), | |
34 | + lemma(convertLemma(orth, encodedInterp.lemma)), | |
35 | + tagnum(encodedInterp.tag), | |
36 | + namenum(encodedInterp.nameClassifier), | |
37 | + tag(tagset.getTag(encodedInterp.tag)), | |
38 | + name(tagset.getName(encodedInterp.nameClassifier)) { | |
39 | + | |
40 | +} | |
41 | + | |
42 | +MorphInterpretation::~MorphInterpretation() { | |
43 | +} | |
44 | + | |
45 | +const std::string& MorphInterpretation::getOrth() const { | |
46 | + return this->orth; | |
47 | +} | |
48 | + | |
49 | +const std::string& MorphInterpretation::getLemma() const { | |
50 | + return this->lemma; | |
51 | +} | |
52 | + | |
53 | +int MorphInterpretation::getTagnum() const { | |
54 | + return this->tagnum; | |
55 | +} | |
56 | + | |
57 | +int MorphInterpretation::getNamenum() const { | |
58 | + return this->namenum; | |
59 | +} | |
60 | + | |
61 | +const std::string& MorphInterpretation::getTag() const { | |
62 | + return this->tag; | |
63 | +} | |
64 | + | |
65 | +const std::string& MorphInterpretation::getName() const { | |
66 | + return this->name; | |
67 | +} | |
68 | + | |
... | ... |
morfeusz/MorphInterpretation.hpp
0 → 100644
1 | +/* | |
2 | + * File: MorphInterpretation.hpp | |
3 | + * Author: mlenart | |
4 | + * | |
5 | + * Created on November 14, 2013, 11:47 AM | |
6 | + */ | |
7 | + | |
8 | +#ifndef MORPHINTERPRETATION_HPP | |
9 | +#define MORPHINTERPRETATION_HPP | |
10 | + | |
11 | +#include <string> | |
12 | +#include "Tagset.hpp" | |
13 | +#include "EncodedInterpretation.hpp" | |
14 | + | |
15 | +class MorphInterpretation { | |
16 | +public: | |
17 | + MorphInterpretation( | |
18 | + int startNode, | |
19 | + int endNode, | |
20 | + const std::string& orth, | |
21 | + const EncodedInterpretation& encodedInterp, | |
22 | + const Tagset& tagset); | |
23 | + virtual ~MorphInterpretation(); | |
24 | + const std::string& getOrth() const; | |
25 | + const std::string& getLemma() const; | |
26 | + int getTagnum() const; | |
27 | + int getNamenum() const; | |
28 | + const std::string& getTag() const; | |
29 | + const std::string& getName() const; | |
30 | +private: | |
31 | + int startNode; | |
32 | + int endNode; | |
33 | + std::string orth; | |
34 | + std::string lemma; | |
35 | + int tagnum; | |
36 | + int namenum; | |
37 | + const std::string& tag; | |
38 | + const std::string& name; | |
39 | +}; | |
40 | + | |
41 | +#endif /* MORPHINTERPRETATION_HPP */ | |
42 | + | |
... | ... |
morfeusz/interpretations.cpp deleted
1 | - | |
2 | -#include "interpretations.hpp" | |
3 | -#include "Tagset.hpp" | |
4 | - | |
5 | -using namespace std; | |
6 | - | |
7 | -string TaggedInterpretation::toString() const { | |
8 | - std::stringstream ss; | |
9 | - ss << lemma << ":" << tag << ":" << name; | |
10 | - return ss.str(); | |
11 | -} | |
12 | - | |
13 | -template <class T> | |
14 | -string InterpretationsDecoder<T>::convertLemma( | |
15 | - const string& orth, | |
16 | - const EncodedLemma& lemma) const { | |
17 | - string res(orth); | |
18 | - res.erase( | |
19 | - res.end() - lemma.suffixToCut, | |
20 | - res.end()); | |
21 | - res.append(lemma.suffixToAdd); | |
22 | - return res; | |
23 | -} | |
24 | - | |
25 | -RawInterpretation RawInterpretationsDecoder::getInterpretation( | |
26 | - const string& orth, | |
27 | - const EncodedInterpretation& interp) const { | |
28 | - string lemma = this->convertLemma(orth, interp.lemma); | |
29 | - RawInterpretation res = {lemma, interp.tag, interp.nameClassifier}; | |
30 | - return res; | |
31 | -} | |
32 | - | |
33 | -TaggedInterpretationsDecoder::TaggedInterpretationsDecoder(const Tagset& tagset) | |
34 | -: tagset(tagset) { | |
35 | - | |
36 | -} | |
37 | - | |
38 | -TaggedInterpretation TaggedInterpretationsDecoder::getInterpretation( | |
39 | - const string& orth, | |
40 | - const EncodedInterpretation& interp) const { | |
41 | - string lemma = this->convertLemma(orth, interp.lemma); | |
42 | - const string& tag = this->tagset.getTag(interp.tag); | |
43 | - const string& name = this->tagset.getName(interp.nameClassifier); | |
44 | - TaggedInterpretation res = {lemma, tag, name}; | |
45 | - return res; | |
46 | -} |
morfeusz/interpretations.hpp deleted
1 | -/* | |
2 | - * File: interpretation.hpp | |
3 | - * Author: mlenart | |
4 | - * | |
5 | - * Created on November 4, 2013, 3:11 PM | |
6 | - */ | |
7 | - | |
8 | -#ifndef INTERPRETATION_HPP | |
9 | -#define INTERPRETATION_HPP | |
10 | - | |
11 | -#include <string> | |
12 | -#include <sstream> | |
13 | -#include <iterator> | |
14 | -#include "Tagset.hpp" | |
15 | - | |
16 | -using namespace std; | |
17 | - | |
18 | -struct EncodedLemma { | |
19 | - int suffixToCut; | |
20 | - string suffixToAdd; | |
21 | -}; | |
22 | - | |
23 | -/* | |
24 | - * Internal representation of an interpretation - with lemma encoded | |
25 | - */ | |
26 | -struct EncodedInterpretation { | |
27 | - EncodedLemma lemma; | |
28 | - int tag; | |
29 | - int nameClassifier; | |
30 | -}; | |
31 | - | |
32 | -class MorphInterpretation { | |
33 | -public: | |
34 | - MorphInterpretation( | |
35 | - int startNode, | |
36 | - int endNode, | |
37 | - const std::string& orth, | |
38 | - const EncodedInterpretation& encodedInterp); | |
39 | - const std::string& getOrth() const; | |
40 | - const std::string& getLemma() const; | |
41 | - int getTagnum() const; | |
42 | - int getNamenum() const; | |
43 | - const std::string& getTag(const Tagset& tagset) const; | |
44 | - const std::string& getName(const Tagset& tagset) const; | |
45 | -private: | |
46 | - int startNode; | |
47 | - int endNode; | |
48 | - std::string orth; | |
49 | - std::string lemma; | |
50 | - int tagnum; | |
51 | - int namenum; | |
52 | -}; | |
53 | - | |
54 | -// ALL BELOW IS DEPRECATED | |
55 | - | |
56 | -/* | |
57 | - * Interpretation with tags as integers (need a Tagset object to decode them) | |
58 | - */ | |
59 | -struct RawInterpretation { | |
60 | - string lemma; | |
61 | - int tagnum; | |
62 | - int namenum; | |
63 | -}; | |
64 | - | |
65 | -/* | |
66 | - * Interpretation with tags as strings (already processed with a Tagset object) | |
67 | - */ | |
68 | -struct TaggedInterpretation { | |
69 | - std::string lemma; | |
70 | - const std::string& tag; // np. subst:sg:nom:m1 | |
71 | - const std::string& name; // np. "pospolita" | |
72 | - std::string toString() const; | |
73 | -}; | |
74 | - | |
75 | -template <class InterpType> | |
76 | -class InterpretationsDecoder { | |
77 | -public: | |
78 | -// explicit InterpretationsDecoder(const Tagset& tagset); | |
79 | - | |
80 | - virtual InterpType getInterpretation( | |
81 | - const std::string& orth, | |
82 | - const EncodedInterpretation& interp) const = 0; | |
83 | - | |
84 | -protected: | |
85 | - std::string convertLemma(const std::string& orth, const EncodedLemma& interp) const; | |
86 | -}; | |
87 | - | |
88 | -class TaggedInterpretationsDecoder: public InterpretationsDecoder<TaggedInterpretation> { | |
89 | -public: | |
90 | - explicit TaggedInterpretationsDecoder(const Tagset& tagset); | |
91 | - | |
92 | - TaggedInterpretation getInterpretation( | |
93 | - const std::string& orth, | |
94 | - const EncodedInterpretation& interp) const; | |
95 | -private: | |
96 | - Tagset tagset; | |
97 | -}; | |
98 | - | |
99 | -class RawInterpretationsDecoder: public InterpretationsDecoder<RawInterpretation> { | |
100 | -public: | |
101 | - RawInterpretationsDecoder(); | |
102 | - | |
103 | - RawInterpretation getInterpretation( | |
104 | - const std::string& orth, | |
105 | - const EncodedInterpretation& interp) const; | |
106 | -}; | |
107 | - | |
108 | -#endif /* INTERPRETATION_HPP */ | |
109 | - |
morfeusz/test_morph.cpp
... | ... | @@ -9,10 +9,11 @@ |
9 | 9 | #include <sstream> |
10 | 10 | #include <iostream> |
11 | 11 | #include "fsa.hpp" |
12 | -#include "interpretations.hpp" | |
12 | +#include "EncodedInterpretation.hpp" | |
13 | 13 | #include "utils.hpp" |
14 | 14 | #include "MorphDeserializer.hpp" |
15 | 15 | #include "Morfeusz.hpp" |
16 | +#include "MorphInterpretation.hpp" | |
16 | 17 | |
17 | 18 | using namespace std; |
18 | 19 | |
... | ... | @@ -27,13 +28,14 @@ void debug(const string& key, const vector<EncodedInterpretation> value) { |
27 | 28 | cerr << "==================" << endl; |
28 | 29 | } |
29 | 30 | |
30 | -void debug(const string& key, const TaggedInterpretation& value) { | |
31 | - cerr << key << '\t' << value.toString() << endl; | |
32 | -} | |
31 | +//void debug(const string& key, const TaggedInterpretation& value) { | |
32 | +// cerr << key << '\t' << value.toString() << endl; | |
33 | +//} | |
33 | 34 | |
34 | 35 | void doTest( |
35 | - const FSA<vector<EncodedInterpretation>>& fsa, | |
36 | - const InterpretationsDecoder<TaggedInterpretation>& interpsConverter, | |
36 | + const FSA<vector<EncodedInterpretation>>& fsa, | |
37 | + const Tagset& tagset, | |
38 | +// const InterpretationsDecoder<TaggedInterpretation>& interpsConverter, | |
37 | 39 | const char* fname) { |
38 | 40 | ifstream ifs; |
39 | 41 | // ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit); |
... | ... | @@ -41,28 +43,29 @@ void doTest( |
41 | 43 | string line; |
42 | 44 | while (getline(ifs, line)) { |
43 | 45 | vector<string> splitVector(split(line, '\t')); |
44 | - string key = splitVector[0]; | |
46 | + string orth = splitVector[0]; | |
45 | 47 | string lemma = splitVector[1]; |
46 | 48 | string tag = splitVector[2]; |
47 | 49 | string name = splitVector[3]; |
48 | 50 | vector<EncodedInterpretation> value2; |
49 | - fsa.tryToRecognize(key.c_str(), value2); | |
51 | + fsa.tryToRecognize(orth.c_str(), value2); | |
50 | 52 | DEBUG("recognized "+to_string(value2.size())); |
51 | - vector<TaggedInterpretation> parsedValues; | |
53 | +// vector<TaggedInterpretation> parsedValues; | |
52 | 54 | bool found = false; |
53 | - for (EncodedInterpretation interp: value2) { | |
54 | - TaggedInterpretation parsedValue = interpsConverter.getInterpretation(key, interp); | |
55 | + for (EncodedInterpretation encodedInterp: value2) { | |
56 | +// TaggedInterpretation parsedValue = interpsConverter.getInterpretation(key, interp); | |
57 | + MorphInterpretation interp(0, 0, orth, encodedInterp, tagset); | |
55 | 58 | // parsedValues.push_back(parsedValue); |
56 | - debug(key, parsedValue); | |
57 | - if (lemma == parsedValue.lemma && tag == parsedValue.tag && name == parsedValue.name) { | |
59 | +// debug(orth, parsedValue); | |
60 | + if (lemma == interp.getLemma() && tag == interp.getTag() && name == interp.getName()) { | |
58 | 61 | DEBUG("RECOGNIZED"); |
59 | 62 | found = true; |
60 | 63 | } |
61 | 64 | else { |
62 | - DEBUG("not matching "+parsedValue.lemma+ " " + parsedValue.tag + " " + parsedValue.name); | |
65 | + DEBUG("not matching "+interp.getLemma()+ " " + interp.getTag() + " " + interp.getName()); | |
63 | 66 | } |
64 | 67 | } |
65 | - validate(found, "Failed to recognize " + key + " " + lemma + ":" + tag + ":" + name); | |
68 | + validate(found, "Failed to recognize " + orth + " " + lemma + ":" + tag + ":" + name); | |
66 | 69 | // debug(key, value2); |
67 | 70 | // validate(fsa.tryToRecognize(key.c_str(), value2), "Failed to recognize " + key); |
68 | 71 | } |
... | ... | @@ -79,10 +82,10 @@ int main(int argc, char** argv) { |
79 | 82 | DEBUG("DONE read FSA"); |
80 | 83 | DEBUG("will read tagset"); |
81 | 84 | Tagset tagset(fsaData); |
82 | - TaggedInterpretationsDecoder interpsDecoder(tagset); | |
85 | +// TaggedInterpretationsDecoder interpsDecoder(tagset); | |
83 | 86 | DEBUG("DONE read tagset"); |
84 | 87 | DEBUG("still alive"); |
85 | - doTest(*fsa, interpsDecoder, argv[2]); | |
88 | + doTest(*fsa, tagset, argv[2]); | |
86 | 89 | delete fsa; |
87 | 90 | return 0; |
88 | 91 | } |
... | ... |
nbproject/configurations.xml
... | ... | @@ -11,8 +11,9 @@ |
11 | 11 | <in>Morfeusz.cpp</in> |
12 | 12 | <in>Morfeusz.hpp</in> |
13 | 13 | <in>MorphDeserializer.cpp</in> |
14 | + <in>MorphInterpretation.cpp</in> | |
15 | + <in>MorphInterpretation.hpp</in> | |
14 | 16 | <in>Tagset.cpp</in> |
15 | - <in>interpretations.cpp</in> | |
16 | 17 | <in>main.cpp</in> |
17 | 18 | <in>morfeusz.cpp</in> |
18 | 19 | <in>test_morph.cpp</in> |
... | ... | @@ -93,11 +94,11 @@ |
93 | 94 | <ccTool> |
94 | 95 | </ccTool> |
95 | 96 | </item> |
96 | - <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="8"> | |
97 | - <ccTool> | |
98 | - </ccTool> | |
97 | + <item path="morfeusz/MorphInterpretation.cpp" ex="false" tool="1" flavor2="0"> | |
99 | 98 | </item> |
100 | - <item path="morfeusz/interpretations.cpp" ex="false" tool="1" flavor2="8"> | |
99 | + <item path="morfeusz/MorphInterpretation.hpp" ex="false" tool="3" flavor2="0"> | |
100 | + </item> | |
101 | + <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="8"> | |
101 | 102 | <ccTool> |
102 | 103 | </ccTool> |
103 | 104 | </item> |
... | ... |