Commit 58aafafe36f62bfa9e6b785ad28a1ea4c9042b24

Authored by Michał Lenart
1 parent 612cbdc9

- trochę refaktoryzacji, zrobienie klasy MorphInterpretation będącej krawędzią w grafie fleksyjnym

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@18 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
morfeusz/CMakeLists.txt
@@ -6,7 +6,7 @@ @@ -6,7 +6,7 @@
6 include_directories (${Morfeusz_SOURCE_DIR}/fsa) 6 include_directories (${Morfeusz_SOURCE_DIR}/fsa)
7 add_library (morfeusz2 morfeusz.hpp morfeusz.cpp) 7 add_library (morfeusz2 morfeusz.hpp morfeusz.cpp)
8 add_executable (morfeusz2_analyze main.cpp) 8 add_executable (morfeusz2_analyze main.cpp)
9 -add_executable (test_morph test_morph.cpp interpretations.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp) 9 +add_executable (test_morph test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp)
10 10
11 # Link the executable to the Hello library. 11 # Link the executable to the Hello library.
12 target_link_libraries (morfeusz2_analyze morfeusz2) 12 target_link_libraries (morfeusz2_analyze morfeusz2)
morfeusz/EncodedInterpretation.hpp 0 → 100644
  1 +/*
  2 + * File: interpretation.hpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on November 4, 2013, 3:11 PM
  6 + */
  7 +
  8 +#ifndef INTERPRETATION_HPP
  9 +#define INTERPRETATION_HPP
  10 +
  11 +#include <string>
  12 +#include <sstream>
  13 +#include <iterator>
  14 +#include "Tagset.hpp"
  15 +
  16 +using namespace std;
  17 +
  18 +struct EncodedLemma {
  19 + int suffixToCut;
  20 + string suffixToAdd;
  21 +};
  22 +
  23 +/*
  24 + * Internal representation of an interpretation - with lemma encoded
  25 + */
  26 +struct EncodedInterpretation {
  27 + EncodedLemma lemma;
  28 + int tag;
  29 + int nameClassifier;
  30 +};
  31 +
  32 +#endif /* INTERPRETATION_HPP */
morfeusz/Morfeusz.hpp
@@ -9,7 +9,8 @@ @@ -9,7 +9,8 @@
9 #define MORFEUSZ_HPP 9 #define MORFEUSZ_HPP
10 10
11 #include <string> 11 #include <string>
12 -#include "interpretations.hpp" 12 +#include "MorphInterpretation.hpp"
  13 +//#include "interpretations.hpp"
13 14
14 class Morfeusz; 15 class Morfeusz;
15 class AnalyzeResult; 16 class AnalyzeResult;
morfeusz/MorphDeserializer.hpp
@@ -10,7 +10,7 @@ @@ -10,7 +10,7 @@
10 10
11 #include <vector> 11 #include <vector>
12 #include "fsa.hpp" 12 #include "fsa.hpp"
13 -#include "interpretations.hpp" 13 +#include "EncodedInterpretation.hpp"
14 14
15 class MorphDeserializer: public Deserializer<std::vector<EncodedInterpretation>> { 15 class MorphDeserializer: public Deserializer<std::vector<EncodedInterpretation>> {
16 public: 16 public:
morfeusz/MorphInterpretation.cpp 0 → 100644
  1 +/*
  2 + * File: MorphInterpretation.cpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on November 14, 2013, 11:47 AM
  6 + */
  7 +
  8 +#include <string>
  9 +#include "MorphInterpretation.hpp"
  10 +#include "EncodedInterpretation.hpp"
  11 +
  12 +using namespace std;
  13 +
  14 +static string convertLemma(
  15 + const string& orth,
  16 + const EncodedLemma& lemma) {
  17 + string res(orth);
  18 + res.erase(
  19 + res.end() - lemma.suffixToCut,
  20 + res.end());
  21 + res.append(lemma.suffixToAdd);
  22 + return res;
  23 +}
  24 +
  25 +MorphInterpretation::MorphInterpretation(
  26 + int startNode,
  27 + int endNode,
  28 + const std::string& orth,
  29 + const EncodedInterpretation& encodedInterp,
  30 + const Tagset& tagset)
  31 +: startNode(startNode),
  32 + endNode(endNode),
  33 + orth(orth),
  34 + lemma(convertLemma(orth, encodedInterp.lemma)),
  35 + tagnum(encodedInterp.tag),
  36 + namenum(encodedInterp.nameClassifier),
  37 + tag(tagset.getTag(encodedInterp.tag)),
  38 + name(tagset.getName(encodedInterp.nameClassifier)) {
  39 +
  40 +}
  41 +
  42 +MorphInterpretation::~MorphInterpretation() {
  43 +}
  44 +
  45 +const std::string& MorphInterpretation::getOrth() const {
  46 + return this->orth;
  47 +}
  48 +
  49 +const std::string& MorphInterpretation::getLemma() const {
  50 + return this->lemma;
  51 +}
  52 +
  53 +int MorphInterpretation::getTagnum() const {
  54 + return this->tagnum;
  55 +}
  56 +
  57 +int MorphInterpretation::getNamenum() const {
  58 + return this->namenum;
  59 +}
  60 +
  61 +const std::string& MorphInterpretation::getTag() const {
  62 + return this->tag;
  63 +}
  64 +
  65 +const std::string& MorphInterpretation::getName() const {
  66 + return this->name;
  67 +}
  68 +
morfeusz/MorphInterpretation.hpp 0 → 100644
  1 +/*
  2 + * File: MorphInterpretation.hpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on November 14, 2013, 11:47 AM
  6 + */
  7 +
  8 +#ifndef MORPHINTERPRETATION_HPP
  9 +#define MORPHINTERPRETATION_HPP
  10 +
  11 +#include <string>
  12 +#include "Tagset.hpp"
  13 +#include "EncodedInterpretation.hpp"
  14 +
  15 +class MorphInterpretation {
  16 +public:
  17 + MorphInterpretation(
  18 + int startNode,
  19 + int endNode,
  20 + const std::string& orth,
  21 + const EncodedInterpretation& encodedInterp,
  22 + const Tagset& tagset);
  23 + virtual ~MorphInterpretation();
  24 + const std::string& getOrth() const;
  25 + const std::string& getLemma() const;
  26 + int getTagnum() const;
  27 + int getNamenum() const;
  28 + const std::string& getTag() const;
  29 + const std::string& getName() const;
  30 +private:
  31 + int startNode;
  32 + int endNode;
  33 + std::string orth;
  34 + std::string lemma;
  35 + int tagnum;
  36 + int namenum;
  37 + const std::string& tag;
  38 + const std::string& name;
  39 +};
  40 +
  41 +#endif /* MORPHINTERPRETATION_HPP */
  42 +
morfeusz/interpretations.cpp deleted
1 -  
2 -#include "interpretations.hpp"  
3 -#include "Tagset.hpp"  
4 -  
5 -using namespace std;  
6 -  
7 -string TaggedInterpretation::toString() const {  
8 - std::stringstream ss;  
9 - ss << lemma << ":" << tag << ":" << name;  
10 - return ss.str();  
11 -}  
12 -  
13 -template <class T>  
14 -string InterpretationsDecoder<T>::convertLemma(  
15 - const string& orth,  
16 - const EncodedLemma& lemma) const {  
17 - string res(orth);  
18 - res.erase(  
19 - res.end() - lemma.suffixToCut,  
20 - res.end());  
21 - res.append(lemma.suffixToAdd);  
22 - return res;  
23 -}  
24 -  
25 -RawInterpretation RawInterpretationsDecoder::getInterpretation(  
26 - const string& orth,  
27 - const EncodedInterpretation& interp) const {  
28 - string lemma = this->convertLemma(orth, interp.lemma);  
29 - RawInterpretation res = {lemma, interp.tag, interp.nameClassifier};  
30 - return res;  
31 -}  
32 -  
33 -TaggedInterpretationsDecoder::TaggedInterpretationsDecoder(const Tagset& tagset)  
34 -: tagset(tagset) {  
35 -  
36 -}  
37 -  
38 -TaggedInterpretation TaggedInterpretationsDecoder::getInterpretation(  
39 - const string& orth,  
40 - const EncodedInterpretation& interp) const {  
41 - string lemma = this->convertLemma(orth, interp.lemma);  
42 - const string& tag = this->tagset.getTag(interp.tag);  
43 - const string& name = this->tagset.getName(interp.nameClassifier);  
44 - TaggedInterpretation res = {lemma, tag, name};  
45 - return res;  
46 -}  
morfeusz/interpretations.hpp deleted
1 -/*  
2 - * File: interpretation.hpp  
3 - * Author: mlenart  
4 - *  
5 - * Created on November 4, 2013, 3:11 PM  
6 - */  
7 -  
8 -#ifndef INTERPRETATION_HPP  
9 -#define INTERPRETATION_HPP  
10 -  
11 -#include <string>  
12 -#include <sstream>  
13 -#include <iterator>  
14 -#include "Tagset.hpp"  
15 -  
16 -using namespace std;  
17 -  
18 -struct EncodedLemma {  
19 - int suffixToCut;  
20 - string suffixToAdd;  
21 -};  
22 -  
23 -/*  
24 - * Internal representation of an interpretation - with lemma encoded  
25 - */  
26 -struct EncodedInterpretation {  
27 - EncodedLemma lemma;  
28 - int tag;  
29 - int nameClassifier;  
30 -};  
31 -  
32 -class MorphInterpretation {  
33 -public:  
34 - MorphInterpretation(  
35 - int startNode,  
36 - int endNode,  
37 - const std::string& orth,  
38 - const EncodedInterpretation& encodedInterp);  
39 - const std::string& getOrth() const;  
40 - const std::string& getLemma() const;  
41 - int getTagnum() const;  
42 - int getNamenum() const;  
43 - const std::string& getTag(const Tagset& tagset) const;  
44 - const std::string& getName(const Tagset& tagset) const;  
45 -private:  
46 - int startNode;  
47 - int endNode;  
48 - std::string orth;  
49 - std::string lemma;  
50 - int tagnum;  
51 - int namenum;  
52 -};  
53 -  
54 -// ALL BELOW IS DEPRECATED  
55 -  
56 -/*  
57 - * Interpretation with tags as integers (need a Tagset object to decode them)  
58 - */  
59 -struct RawInterpretation {  
60 - string lemma;  
61 - int tagnum;  
62 - int namenum;  
63 -};  
64 -  
65 -/*  
66 - * Interpretation with tags as strings (already processed with a Tagset object)  
67 - */  
68 -struct TaggedInterpretation {  
69 - std::string lemma;  
70 - const std::string& tag; // np. subst:sg:nom:m1  
71 - const std::string& name; // np. "pospolita"  
72 - std::string toString() const;  
73 -};  
74 -  
75 -template <class InterpType>  
76 -class InterpretationsDecoder {  
77 -public:  
78 -// explicit InterpretationsDecoder(const Tagset& tagset);  
79 -  
80 - virtual InterpType getInterpretation(  
81 - const std::string& orth,  
82 - const EncodedInterpretation& interp) const = 0;  
83 -  
84 -protected:  
85 - std::string convertLemma(const std::string& orth, const EncodedLemma& interp) const;  
86 -};  
87 -  
88 -class TaggedInterpretationsDecoder: public InterpretationsDecoder<TaggedInterpretation> {  
89 -public:  
90 - explicit TaggedInterpretationsDecoder(const Tagset& tagset);  
91 -  
92 - TaggedInterpretation getInterpretation(  
93 - const std::string& orth,  
94 - const EncodedInterpretation& interp) const;  
95 -private:  
96 - Tagset tagset;  
97 -};  
98 -  
99 -class RawInterpretationsDecoder: public InterpretationsDecoder<RawInterpretation> {  
100 -public:  
101 - RawInterpretationsDecoder();  
102 -  
103 - RawInterpretation getInterpretation(  
104 - const std::string& orth,  
105 - const EncodedInterpretation& interp) const;  
106 -};  
107 -  
108 -#endif /* INTERPRETATION_HPP */  
109 -  
morfeusz/test_morph.cpp
@@ -9,10 +9,11 @@ @@ -9,10 +9,11 @@
9 #include <sstream> 9 #include <sstream>
10 #include <iostream> 10 #include <iostream>
11 #include "fsa.hpp" 11 #include "fsa.hpp"
12 -#include "interpretations.hpp" 12 +#include "EncodedInterpretation.hpp"
13 #include "utils.hpp" 13 #include "utils.hpp"
14 #include "MorphDeserializer.hpp" 14 #include "MorphDeserializer.hpp"
15 #include "Morfeusz.hpp" 15 #include "Morfeusz.hpp"
  16 +#include "MorphInterpretation.hpp"
16 17
17 using namespace std; 18 using namespace std;
18 19
@@ -27,13 +28,14 @@ void debug(const string&amp; key, const vector&lt;EncodedInterpretation&gt; value) { @@ -27,13 +28,14 @@ void debug(const string&amp; key, const vector&lt;EncodedInterpretation&gt; value) {
27 cerr << "==================" << endl; 28 cerr << "==================" << endl;
28 } 29 }
29 30
30 -void debug(const string& key, const TaggedInterpretation& value) {  
31 - cerr << key << '\t' << value.toString() << endl;  
32 -} 31 +//void debug(const string& key, const TaggedInterpretation& value) {
  32 +// cerr << key << '\t' << value.toString() << endl;
  33 +//}
33 34
34 void doTest( 35 void doTest(
35 - const FSA<vector<EncodedInterpretation>>& fsa,  
36 - const InterpretationsDecoder<TaggedInterpretation>& interpsConverter, 36 + const FSA<vector<EncodedInterpretation>>& fsa,
  37 + const Tagset& tagset,
  38 +// const InterpretationsDecoder<TaggedInterpretation>& interpsConverter,
37 const char* fname) { 39 const char* fname) {
38 ifstream ifs; 40 ifstream ifs;
39 // ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit); 41 // ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
@@ -41,28 +43,29 @@ void doTest( @@ -41,28 +43,29 @@ void doTest(
41 string line; 43 string line;
42 while (getline(ifs, line)) { 44 while (getline(ifs, line)) {
43 vector<string> splitVector(split(line, '\t')); 45 vector<string> splitVector(split(line, '\t'));
44 - string key = splitVector[0]; 46 + string orth = splitVector[0];
45 string lemma = splitVector[1]; 47 string lemma = splitVector[1];
46 string tag = splitVector[2]; 48 string tag = splitVector[2];
47 string name = splitVector[3]; 49 string name = splitVector[3];
48 vector<EncodedInterpretation> value2; 50 vector<EncodedInterpretation> value2;
49 - fsa.tryToRecognize(key.c_str(), value2); 51 + fsa.tryToRecognize(orth.c_str(), value2);
50 DEBUG("recognized "+to_string(value2.size())); 52 DEBUG("recognized "+to_string(value2.size()));
51 - vector<TaggedInterpretation> parsedValues; 53 +// vector<TaggedInterpretation> parsedValues;
52 bool found = false; 54 bool found = false;
53 - for (EncodedInterpretation interp: value2) {  
54 - TaggedInterpretation parsedValue = interpsConverter.getInterpretation(key, interp); 55 + for (EncodedInterpretation encodedInterp: value2) {
  56 +// TaggedInterpretation parsedValue = interpsConverter.getInterpretation(key, interp);
  57 + MorphInterpretation interp(0, 0, orth, encodedInterp, tagset);
55 // parsedValues.push_back(parsedValue); 58 // parsedValues.push_back(parsedValue);
56 - debug(key, parsedValue);  
57 - if (lemma == parsedValue.lemma && tag == parsedValue.tag && name == parsedValue.name) { 59 +// debug(orth, parsedValue);
  60 + if (lemma == interp.getLemma() && tag == interp.getTag() && name == interp.getName()) {
58 DEBUG("RECOGNIZED"); 61 DEBUG("RECOGNIZED");
59 found = true; 62 found = true;
60 } 63 }
61 else { 64 else {
62 - DEBUG("not matching "+parsedValue.lemma+ " " + parsedValue.tag + " " + parsedValue.name); 65 + DEBUG("not matching "+interp.getLemma()+ " " + interp.getTag() + " " + interp.getName());
63 } 66 }
64 } 67 }
65 - validate(found, "Failed to recognize " + key + " " + lemma + ":" + tag + ":" + name); 68 + validate(found, "Failed to recognize " + orth + " " + lemma + ":" + tag + ":" + name);
66 // debug(key, value2); 69 // debug(key, value2);
67 // validate(fsa.tryToRecognize(key.c_str(), value2), "Failed to recognize " + key); 70 // validate(fsa.tryToRecognize(key.c_str(), value2), "Failed to recognize " + key);
68 } 71 }
@@ -79,10 +82,10 @@ int main(int argc, char** argv) { @@ -79,10 +82,10 @@ int main(int argc, char** argv) {
79 DEBUG("DONE read FSA"); 82 DEBUG("DONE read FSA");
80 DEBUG("will read tagset"); 83 DEBUG("will read tagset");
81 Tagset tagset(fsaData); 84 Tagset tagset(fsaData);
82 - TaggedInterpretationsDecoder interpsDecoder(tagset); 85 +// TaggedInterpretationsDecoder interpsDecoder(tagset);
83 DEBUG("DONE read tagset"); 86 DEBUG("DONE read tagset");
84 DEBUG("still alive"); 87 DEBUG("still alive");
85 - doTest(*fsa, interpsDecoder, argv[2]); 88 + doTest(*fsa, tagset, argv[2]);
86 delete fsa; 89 delete fsa;
87 return 0; 90 return 0;
88 } 91 }
nbproject/configurations.xml
@@ -11,8 +11,9 @@ @@ -11,8 +11,9 @@
11 <in>Morfeusz.cpp</in> 11 <in>Morfeusz.cpp</in>
12 <in>Morfeusz.hpp</in> 12 <in>Morfeusz.hpp</in>
13 <in>MorphDeserializer.cpp</in> 13 <in>MorphDeserializer.cpp</in>
  14 + <in>MorphInterpretation.cpp</in>
  15 + <in>MorphInterpretation.hpp</in>
14 <in>Tagset.cpp</in> 16 <in>Tagset.cpp</in>
15 - <in>interpretations.cpp</in>  
16 <in>main.cpp</in> 17 <in>main.cpp</in>
17 <in>morfeusz.cpp</in> 18 <in>morfeusz.cpp</in>
18 <in>test_morph.cpp</in> 19 <in>test_morph.cpp</in>
@@ -93,11 +94,11 @@ @@ -93,11 +94,11 @@
93 <ccTool> 94 <ccTool>
94 </ccTool> 95 </ccTool>
95 </item> 96 </item>
96 - <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="8">  
97 - <ccTool>  
98 - </ccTool> 97 + <item path="morfeusz/MorphInterpretation.cpp" ex="false" tool="1" flavor2="0">
99 </item> 98 </item>
100 - <item path="morfeusz/interpretations.cpp" ex="false" tool="1" flavor2="8"> 99 + <item path="morfeusz/MorphInterpretation.hpp" ex="false" tool="3" flavor2="0">
  100 + </item>
  101 + <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="8">
101 <ccTool> 102 <ccTool>
102 </ccTool> 103 </ccTool>
103 </item> 104 </item>