Commit 58aafafe36f62bfa9e6b785ad28a1ea4c9042b24

Authored by Michał Lenart
1 parent 612cbdc9

- trochę refaktoryzacji, zrobienie klasy MorphInterpretation będącej krawędzią w grafie fleksyjnym

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@18 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
morfeusz/CMakeLists.txt
... ... @@ -6,7 +6,7 @@
6 6 include_directories (${Morfeusz_SOURCE_DIR}/fsa)
7 7 add_library (morfeusz2 morfeusz.hpp morfeusz.cpp)
8 8 add_executable (morfeusz2_analyze main.cpp)
9   -add_executable (test_morph test_morph.cpp interpretations.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp)
  9 +add_executable (test_morph test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp)
10 10  
11 11 # Link the executable to the Hello library.
12 12 target_link_libraries (morfeusz2_analyze morfeusz2)
... ...
morfeusz/EncodedInterpretation.hpp 0 → 100644
  1 +/*
  2 + * File: interpretation.hpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on November 4, 2013, 3:11 PM
  6 + */
  7 +
  8 +#ifndef INTERPRETATION_HPP
  9 +#define INTERPRETATION_HPP
  10 +
  11 +#include <string>
  12 +#include <sstream>
  13 +#include <iterator>
  14 +#include "Tagset.hpp"
  15 +
  16 +using namespace std;
  17 +
  18 +struct EncodedLemma {
  19 + int suffixToCut;
  20 + string suffixToAdd;
  21 +};
  22 +
  23 +/*
  24 + * Internal representation of an interpretation - with lemma encoded
  25 + */
  26 +struct EncodedInterpretation {
  27 + EncodedLemma lemma;
  28 + int tag;
  29 + int nameClassifier;
  30 +};
  31 +
  32 +#endif /* INTERPRETATION_HPP */
... ...
morfeusz/Morfeusz.hpp
... ... @@ -9,7 +9,8 @@
9 9 #define MORFEUSZ_HPP
10 10  
11 11 #include <string>
12   -#include "interpretations.hpp"
  12 +#include "MorphInterpretation.hpp"
  13 +//#include "interpretations.hpp"
13 14  
14 15 class Morfeusz;
15 16 class AnalyzeResult;
... ...
morfeusz/MorphDeserializer.hpp
... ... @@ -10,7 +10,7 @@
10 10  
11 11 #include <vector>
12 12 #include "fsa.hpp"
13   -#include "interpretations.hpp"
  13 +#include "EncodedInterpretation.hpp"
14 14  
15 15 class MorphDeserializer: public Deserializer<std::vector<EncodedInterpretation>> {
16 16 public:
... ...
morfeusz/MorphInterpretation.cpp 0 → 100644
  1 +/*
  2 + * File: MorphInterpretation.cpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on November 14, 2013, 11:47 AM
  6 + */
  7 +
  8 +#include <string>
  9 +#include "MorphInterpretation.hpp"
  10 +#include "EncodedInterpretation.hpp"
  11 +
  12 +using namespace std;
  13 +
  14 +static string convertLemma(
  15 + const string& orth,
  16 + const EncodedLemma& lemma) {
  17 + string res(orth);
  18 + res.erase(
  19 + res.end() - lemma.suffixToCut,
  20 + res.end());
  21 + res.append(lemma.suffixToAdd);
  22 + return res;
  23 +}
  24 +
  25 +MorphInterpretation::MorphInterpretation(
  26 + int startNode,
  27 + int endNode,
  28 + const std::string& orth,
  29 + const EncodedInterpretation& encodedInterp,
  30 + const Tagset& tagset)
  31 +: startNode(startNode),
  32 + endNode(endNode),
  33 + orth(orth),
  34 + lemma(convertLemma(orth, encodedInterp.lemma)),
  35 + tagnum(encodedInterp.tag),
  36 + namenum(encodedInterp.nameClassifier),
  37 + tag(tagset.getTag(encodedInterp.tag)),
  38 + name(tagset.getName(encodedInterp.nameClassifier)) {
  39 +
  40 +}
  41 +
  42 +MorphInterpretation::~MorphInterpretation() {
  43 +}
  44 +
  45 +const std::string& MorphInterpretation::getOrth() const {
  46 + return this->orth;
  47 +}
  48 +
  49 +const std::string& MorphInterpretation::getLemma() const {
  50 + return this->lemma;
  51 +}
  52 +
  53 +int MorphInterpretation::getTagnum() const {
  54 + return this->tagnum;
  55 +}
  56 +
  57 +int MorphInterpretation::getNamenum() const {
  58 + return this->namenum;
  59 +}
  60 +
  61 +const std::string& MorphInterpretation::getTag() const {
  62 + return this->tag;
  63 +}
  64 +
  65 +const std::string& MorphInterpretation::getName() const {
  66 + return this->name;
  67 +}
  68 +
... ...
morfeusz/MorphInterpretation.hpp 0 → 100644
  1 +/*
  2 + * File: MorphInterpretation.hpp
  3 + * Author: mlenart
  4 + *
  5 + * Created on November 14, 2013, 11:47 AM
  6 + */
  7 +
  8 +#ifndef MORPHINTERPRETATION_HPP
  9 +#define MORPHINTERPRETATION_HPP
  10 +
  11 +#include <string>
  12 +#include "Tagset.hpp"
  13 +#include "EncodedInterpretation.hpp"
  14 +
  15 +class MorphInterpretation {
  16 +public:
  17 + MorphInterpretation(
  18 + int startNode,
  19 + int endNode,
  20 + const std::string& orth,
  21 + const EncodedInterpretation& encodedInterp,
  22 + const Tagset& tagset);
  23 + virtual ~MorphInterpretation();
  24 + const std::string& getOrth() const;
  25 + const std::string& getLemma() const;
  26 + int getTagnum() const;
  27 + int getNamenum() const;
  28 + const std::string& getTag() const;
  29 + const std::string& getName() const;
  30 +private:
  31 + int startNode;
  32 + int endNode;
  33 + std::string orth;
  34 + std::string lemma;
  35 + int tagnum;
  36 + int namenum;
  37 + const std::string& tag;
  38 + const std::string& name;
  39 +};
  40 +
  41 +#endif /* MORPHINTERPRETATION_HPP */
  42 +
... ...
morfeusz/interpretations.cpp deleted
1   -
2   -#include "interpretations.hpp"
3   -#include "Tagset.hpp"
4   -
5   -using namespace std;
6   -
7   -string TaggedInterpretation::toString() const {
8   - std::stringstream ss;
9   - ss << lemma << ":" << tag << ":" << name;
10   - return ss.str();
11   -}
12   -
13   -template <class T>
14   -string InterpretationsDecoder<T>::convertLemma(
15   - const string& orth,
16   - const EncodedLemma& lemma) const {
17   - string res(orth);
18   - res.erase(
19   - res.end() - lemma.suffixToCut,
20   - res.end());
21   - res.append(lemma.suffixToAdd);
22   - return res;
23   -}
24   -
25   -RawInterpretation RawInterpretationsDecoder::getInterpretation(
26   - const string& orth,
27   - const EncodedInterpretation& interp) const {
28   - string lemma = this->convertLemma(orth, interp.lemma);
29   - RawInterpretation res = {lemma, interp.tag, interp.nameClassifier};
30   - return res;
31   -}
32   -
33   -TaggedInterpretationsDecoder::TaggedInterpretationsDecoder(const Tagset& tagset)
34   -: tagset(tagset) {
35   -
36   -}
37   -
38   -TaggedInterpretation TaggedInterpretationsDecoder::getInterpretation(
39   - const string& orth,
40   - const EncodedInterpretation& interp) const {
41   - string lemma = this->convertLemma(orth, interp.lemma);
42   - const string& tag = this->tagset.getTag(interp.tag);
43   - const string& name = this->tagset.getName(interp.nameClassifier);
44   - TaggedInterpretation res = {lemma, tag, name};
45   - return res;
46   -}
morfeusz/interpretations.hpp deleted
1   -/*
2   - * File: interpretation.hpp
3   - * Author: mlenart
4   - *
5   - * Created on November 4, 2013, 3:11 PM
6   - */
7   -
8   -#ifndef INTERPRETATION_HPP
9   -#define INTERPRETATION_HPP
10   -
11   -#include <string>
12   -#include <sstream>
13   -#include <iterator>
14   -#include "Tagset.hpp"
15   -
16   -using namespace std;
17   -
18   -struct EncodedLemma {
19   - int suffixToCut;
20   - string suffixToAdd;
21   -};
22   -
23   -/*
24   - * Internal representation of an interpretation - with lemma encoded
25   - */
26   -struct EncodedInterpretation {
27   - EncodedLemma lemma;
28   - int tag;
29   - int nameClassifier;
30   -};
31   -
32   -class MorphInterpretation {
33   -public:
34   - MorphInterpretation(
35   - int startNode,
36   - int endNode,
37   - const std::string& orth,
38   - const EncodedInterpretation& encodedInterp);
39   - const std::string& getOrth() const;
40   - const std::string& getLemma() const;
41   - int getTagnum() const;
42   - int getNamenum() const;
43   - const std::string& getTag(const Tagset& tagset) const;
44   - const std::string& getName(const Tagset& tagset) const;
45   -private:
46   - int startNode;
47   - int endNode;
48   - std::string orth;
49   - std::string lemma;
50   - int tagnum;
51   - int namenum;
52   -};
53   -
54   -// ALL BELOW IS DEPRECATED
55   -
56   -/*
57   - * Interpretation with tags as integers (need a Tagset object to decode them)
58   - */
59   -struct RawInterpretation {
60   - string lemma;
61   - int tagnum;
62   - int namenum;
63   -};
64   -
65   -/*
66   - * Interpretation with tags as strings (already processed with a Tagset object)
67   - */
68   -struct TaggedInterpretation {
69   - std::string lemma;
70   - const std::string& tag; // np. subst:sg:nom:m1
71   - const std::string& name; // np. "pospolita"
72   - std::string toString() const;
73   -};
74   -
75   -template <class InterpType>
76   -class InterpretationsDecoder {
77   -public:
78   -// explicit InterpretationsDecoder(const Tagset& tagset);
79   -
80   - virtual InterpType getInterpretation(
81   - const std::string& orth,
82   - const EncodedInterpretation& interp) const = 0;
83   -
84   -protected:
85   - std::string convertLemma(const std::string& orth, const EncodedLemma& interp) const;
86   -};
87   -
88   -class TaggedInterpretationsDecoder: public InterpretationsDecoder<TaggedInterpretation> {
89   -public:
90   - explicit TaggedInterpretationsDecoder(const Tagset& tagset);
91   -
92   - TaggedInterpretation getInterpretation(
93   - const std::string& orth,
94   - const EncodedInterpretation& interp) const;
95   -private:
96   - Tagset tagset;
97   -};
98   -
99   -class RawInterpretationsDecoder: public InterpretationsDecoder<RawInterpretation> {
100   -public:
101   - RawInterpretationsDecoder();
102   -
103   - RawInterpretation getInterpretation(
104   - const std::string& orth,
105   - const EncodedInterpretation& interp) const;
106   -};
107   -
108   -#endif /* INTERPRETATION_HPP */
109   -
morfeusz/test_morph.cpp
... ... @@ -9,10 +9,11 @@
9 9 #include <sstream>
10 10 #include <iostream>
11 11 #include "fsa.hpp"
12   -#include "interpretations.hpp"
  12 +#include "EncodedInterpretation.hpp"
13 13 #include "utils.hpp"
14 14 #include "MorphDeserializer.hpp"
15 15 #include "Morfeusz.hpp"
  16 +#include "MorphInterpretation.hpp"
16 17  
17 18 using namespace std;
18 19  
... ... @@ -27,13 +28,14 @@ void debug(const string&amp; key, const vector&lt;EncodedInterpretation&gt; value) {
27 28 cerr << "==================" << endl;
28 29 }
29 30  
30   -void debug(const string& key, const TaggedInterpretation& value) {
31   - cerr << key << '\t' << value.toString() << endl;
32   -}
  31 +//void debug(const string& key, const TaggedInterpretation& value) {
  32 +// cerr << key << '\t' << value.toString() << endl;
  33 +//}
33 34  
34 35 void doTest(
35   - const FSA<vector<EncodedInterpretation>>& fsa,
36   - const InterpretationsDecoder<TaggedInterpretation>& interpsConverter,
  36 + const FSA<vector<EncodedInterpretation>>& fsa,
  37 + const Tagset& tagset,
  38 +// const InterpretationsDecoder<TaggedInterpretation>& interpsConverter,
37 39 const char* fname) {
38 40 ifstream ifs;
39 41 // ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
... ... @@ -41,28 +43,29 @@ void doTest(
41 43 string line;
42 44 while (getline(ifs, line)) {
43 45 vector<string> splitVector(split(line, '\t'));
44   - string key = splitVector[0];
  46 + string orth = splitVector[0];
45 47 string lemma = splitVector[1];
46 48 string tag = splitVector[2];
47 49 string name = splitVector[3];
48 50 vector<EncodedInterpretation> value2;
49   - fsa.tryToRecognize(key.c_str(), value2);
  51 + fsa.tryToRecognize(orth.c_str(), value2);
50 52 DEBUG("recognized "+to_string(value2.size()));
51   - vector<TaggedInterpretation> parsedValues;
  53 +// vector<TaggedInterpretation> parsedValues;
52 54 bool found = false;
53   - for (EncodedInterpretation interp: value2) {
54   - TaggedInterpretation parsedValue = interpsConverter.getInterpretation(key, interp);
  55 + for (EncodedInterpretation encodedInterp: value2) {
  56 +// TaggedInterpretation parsedValue = interpsConverter.getInterpretation(key, interp);
  57 + MorphInterpretation interp(0, 0, orth, encodedInterp, tagset);
55 58 // parsedValues.push_back(parsedValue);
56   - debug(key, parsedValue);
57   - if (lemma == parsedValue.lemma && tag == parsedValue.tag && name == parsedValue.name) {
  59 +// debug(orth, parsedValue);
  60 + if (lemma == interp.getLemma() && tag == interp.getTag() && name == interp.getName()) {
58 61 DEBUG("RECOGNIZED");
59 62 found = true;
60 63 }
61 64 else {
62   - DEBUG("not matching "+parsedValue.lemma+ " " + parsedValue.tag + " " + parsedValue.name);
  65 + DEBUG("not matching "+interp.getLemma()+ " " + interp.getTag() + " " + interp.getName());
63 66 }
64 67 }
65   - validate(found, "Failed to recognize " + key + " " + lemma + ":" + tag + ":" + name);
  68 + validate(found, "Failed to recognize " + orth + " " + lemma + ":" + tag + ":" + name);
66 69 // debug(key, value2);
67 70 // validate(fsa.tryToRecognize(key.c_str(), value2), "Failed to recognize " + key);
68 71 }
... ... @@ -79,10 +82,10 @@ int main(int argc, char** argv) {
79 82 DEBUG("DONE read FSA");
80 83 DEBUG("will read tagset");
81 84 Tagset tagset(fsaData);
82   - TaggedInterpretationsDecoder interpsDecoder(tagset);
  85 +// TaggedInterpretationsDecoder interpsDecoder(tagset);
83 86 DEBUG("DONE read tagset");
84 87 DEBUG("still alive");
85   - doTest(*fsa, interpsDecoder, argv[2]);
  88 + doTest(*fsa, tagset, argv[2]);
86 89 delete fsa;
87 90 return 0;
88 91 }
... ...
nbproject/configurations.xml
... ... @@ -11,8 +11,9 @@
11 11 <in>Morfeusz.cpp</in>
12 12 <in>Morfeusz.hpp</in>
13 13 <in>MorphDeserializer.cpp</in>
  14 + <in>MorphInterpretation.cpp</in>
  15 + <in>MorphInterpretation.hpp</in>
14 16 <in>Tagset.cpp</in>
15   - <in>interpretations.cpp</in>
16 17 <in>main.cpp</in>
17 18 <in>morfeusz.cpp</in>
18 19 <in>test_morph.cpp</in>
... ... @@ -93,11 +94,11 @@
93 94 <ccTool>
94 95 </ccTool>
95 96 </item>
96   - <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="8">
97   - <ccTool>
98   - </ccTool>
  97 + <item path="morfeusz/MorphInterpretation.cpp" ex="false" tool="1" flavor2="0">
99 98 </item>
100   - <item path="morfeusz/interpretations.cpp" ex="false" tool="1" flavor2="8">
  99 + <item path="morfeusz/MorphInterpretation.hpp" ex="false" tool="3" flavor2="0">
  100 + </item>
  101 + <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="8">
101 102 <ccTool>
102 103 </ccTool>
103 104 </item>
... ...