Commit e5220b907d711d35e0832ea2ca0734a2be1e0711

Authored by Michał Lenart
1 parent 3cc7bcb1

- podpięcie pierwszego malutkiego testu analizy morfologicznej jednego słowa

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@22 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
fsa/fsa_impl.hpp
... ... @@ -18,7 +18,7 @@
18 18 #include "utils.hpp"
19 19 #include "const.hpp"
20 20  
21   -using namespace std;
  21 +//using namespace std;
22 22 //static const unsigned int FSA_OFFSET = 6;
23 23  
24 24 template <class T>
... ... @@ -70,7 +70,7 @@ FSA&lt;T&gt;* FSA&lt;T&gt;::getFSA(const unsigned char* ptr, const Deserializer&lt;T&gt;&amp; deserial
70 70  
71 71 uint8_t versionNum = *(ptr + VERSION_NUM_OFFSET);
72 72 if (versionNum != VERSION_NUM) {
73   - throw FSAException(string("Invalid version number: ") + std::to_string(versionNum) + ", should be: " + to_string(VERSION_NUM));
  73 + throw FSAException(string("Invalid version number: ") + std::to_string(versionNum) + ", should be: " + std::to_string(VERSION_NUM));
74 74 }
75 75  
76 76 uint8_t implementationNum = *(ptr + IMPLEMENTATION_NUM_OFFSET);
... ... @@ -85,7 +85,7 @@ FSA&lt;T&gt;* FSA&lt;T&gt;::getFSA(const unsigned char* ptr, const Deserializer&lt;T&gt;&amp; deserial
85 85 case 2:
86 86 return new CompressedFSA2<T>(startPtr, deserializer);
87 87 default:
88   - throw FSAException(string("Invalid implementation number: ") + to_string(versionNum) + ", should be: " + to_string(VERSION_NUM));
  88 + throw FSAException(string("Invalid implementation number: ") + std::to_string(versionNum) + ", should be: " + std::to_string(VERSION_NUM));
89 89 }
90 90 }
91 91  
... ...
morfeusz/CMakeLists.txt
... ... @@ -8,10 +8,12 @@ add_library (morfeusz2 morfeusz.hpp morfeusz.cpp)
8 8 add_executable (morfeusz2_analyze main.cpp)
9 9 add_executable (test_morph test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp)
10 10 add_executable (test_morfeusz test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp Morfeusz.cpp charset/CharsetConverter.cpp FlexionGraph.cpp)
  11 +add_executable (test_simple test_simple.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp Morfeusz.cpp charset/CharsetConverter.cpp FlexionGraph.cpp)
11 12  
12 13 # Link the executable to the Hello library.
13 14 target_link_libraries (morfeusz2_analyze morfeusz2)
14 15 set_target_properties ( morfeusz2_analyze PROPERTIES COMPILE_FLAGS "-std=gnu++0x" )
15 16  
16 17 set_target_properties ( test_morph PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" )
17   -set_target_properties ( test_morfeusz PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" )
18 18 \ No newline at end of file
  19 +set_target_properties ( test_morfeusz PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" )
  20 +set_target_properties ( test_simple PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" )
19 21 \ No newline at end of file
... ...
morfeusz/FlexionGraph.cpp
... ... @@ -30,7 +30,7 @@ void FlexionGraph::addPath(const std::vector&lt;InterpretedChunk&gt;&amp; path) {
30 30  
31 31 void FlexionGraph::appendToResults(const Tagset& tagset, std::vector<MorphInterpretation>& results) {
32 32 int endNode = graph.size();
33   - for (int i = 0; i < graph.size(); i++) {
  33 + for (unsigned int i = 0; i < graph.size(); i++) {
34 34 vector<Edge>& edges = graph[i];
35 35 for (Edge& e: edges) {
36 36 int realStartNode = i + this->startNode;
... ...
morfeusz/InterpretedChunk.hpp
... ... @@ -13,7 +13,7 @@
13 13 struct InterpretedChunk {
14 14 const char* chunk;
15 15 long chunkLength;
16   - InterpsGroup& interpsGroup;
  16 + InterpsGroup interpsGroup;
17 17 };
18 18  
19 19 #endif /* INTERPRETEDCHUNK_HPP */
... ...
morfeusz/Morfeusz.cpp
... ... @@ -6,6 +6,7 @@
6 6 */
7 7  
8 8 #include <string>
  9 +#include <iostream>
9 10 #include "fsa.hpp"
10 11 #include "utils.hpp"
11 12 #include "Morfeusz.hpp"
... ... @@ -18,18 +19,28 @@
18 19 using namespace std;
19 20  
20 21 static FSA<vector<InterpsGroup >> *initializeFSA(const string& filename) {
  22 + cerr << "initialize FSA" << endl;
21 23 static Deserializer < vector < InterpsGroup >> *deserializer
22 24 = new MorphDeserializer();
23 25 return FSA < vector < InterpsGroup >> ::getFSA(filename, *deserializer);
24 26 }
25 27  
26 28 static CharsetConverter* initializeCharsetConverter() {
  29 + cerr << "initialize charset converter" << endl;
27 30 static CharsetConverter* converter = new UTF8CharsetConverter();
28 31 return converter;
29 32 }
30 33  
  34 +static Tagset* initializeTagset(const string& filename) {
  35 + cerr << "initialize tagset" << endl;
  36 + static Tagset* tagset = new Tagset(readFile(filename.c_str()));
  37 + return tagset;
  38 +}
  39 +
31 40 Morfeusz::Morfeusz(const string& filename)
32   -: fsa(initializeFSA(filename)), charsetConverter(initializeCharsetConverter()) {
  41 +: fsa(initializeFSA(filename)),
  42 + charsetConverter(initializeCharsetConverter()),
  43 + tagset(initializeTagset(filename)) {
33 44  
34 45 }
35 46  
... ... @@ -47,7 +58,7 @@ void Morfeusz::processOneWord(
47 58 FlexionGraph graph(startNodeNum);
48 59 const char* currInput = inputData;
49 60 doProcessOneWord(currInput, inputEnd, accum, graph);
50   - graph.appendToResults(this->tagset, results);
  61 + graph.appendToResults(*this->tagset, results);
51 62 inputData = currInput;
52 63 }
53 64  
... ... @@ -88,12 +99,16 @@ void Morfeusz::feedState(
88 99 }
89 100 }
90 101  
91   -ResultsIterator Morfeusz::analyze(const std::string& text) {
  102 +ResultsIterator Morfeusz::analyze(const string& text) {
92 103 // const char* textStart = text.c_str();
93 104 // const char* textEnd = text.c_str() + text.length();
94 105 return ResultsIterator(text, *this);
95 106 }
96 107  
  108 +void Morfeusz::analyze(const string& text, vector<MorphInterpretation>& results) {
  109 +
  110 +}
  111 +
97 112 ResultsIterator::ResultsIterator(const string& text, const Morfeusz& morfeusz)
98 113 : rawInput(text.c_str()),
99 114 morfeusz(morfeusz) {
... ...
morfeusz/Morfeusz.hpp
... ... @@ -32,15 +32,17 @@ public:
32 32 virtual ~Morfeusz();
33 33 // Morfeusz(const Morfeusz& orig);
34 34 ResultsIterator analyze(const std::string& text);
35   -
36   -// Morfeusz();
37   - friend class ResultsIterator;
38   -private:
  35 + void analyze(const std::string& text, std::vector<MorphInterpretation>& result);
  36 +
39 37 void processOneWord(
40 38 const char*& inputData,
41 39 const char* inputEnd,
42 40 const int startNodeNum,
43 41 std::vector<MorphInterpretation>& result) const;
  42 +
  43 +// Morfeusz();
  44 + friend class ResultsIterator;
  45 +private:
44 46  
45 47 void doProcessOneWord(
46 48 const char*& inputData,
... ... @@ -54,6 +56,7 @@ private:
54 56  
55 57 FSAType* fsa;
56 58 CharsetConverter* charsetConverter;
  59 + Tagset* tagset;
57 60 };
58 61  
59 62 class ResultsIterator {
... ...
morfeusz/test_simple.cpp 0 → 100644
  1 +/*
  2 + * File: test_simple.cpp
  3 + * Author: lennyn
  4 + *
  5 + * Created on November 18, 2013, 10:30 PM
  6 + */
  7 +
  8 +#include <cstdlib>
  9 +
  10 +#include "Morfeusz.hpp"
  11 +#include "MorphInterpretation.hpp"
  12 +
  13 +using namespace std;
  14 +
  15 +/*
  16 + *
  17 + */
  18 +int main(int argc, char** argv) {
  19 + Morfeusz morfeusz(argv[1]);
  20 + vector<MorphInterpretation> res;
  21 + string word = "mijałem";
  22 + const char* ptr = word.c_str();
  23 + morfeusz.processOneWord(ptr, word.c_str() + word.size(), 0, res);
  24 + for (MorphInterpretation& mi: res) {
  25 + cerr << mi.getStartNode() << " " << mi.getEndNode() << " " << mi.getLemma() << " " << mi.getTag() << " " << mi.getName() << endl;
  26 + }
  27 + return 0;
  28 +}
  29 +
... ...
nbproject/configurations.xml
... ... @@ -11,12 +11,6 @@
11 11 <df name="charset">
12 12 <in>CharsetConverter.cpp</in>
13 13 </df>
14   - <df name="encoding">
15   - <in>CharsetConverter.cpp</in>
16   - </df>
17   - <df name="flexion">
18   - <in>FlexionGraph.cpp</in>
19   - </df>
20 14 <in>FlexionGraph.cpp</in>
21 15 <in>Morfeusz.cpp</in>
22 16 <in>MorphDeserializer.cpp</in>
... ... @@ -24,6 +18,7 @@
24 18 <in>Tagset.cpp</in>
25 19 <in>main.cpp</in>
26 20 <in>morfeusz.cpp</in>
  21 + <in>test_morfeusz.cpp</in>
27 22 <in>test_morph.cpp</in>
28 23 </df>
29 24 <logicalFolder name="ExternalFiles"
... ... @@ -57,7 +52,7 @@
57 52 <executablePath>build/fsa/test_dict</executablePath>
58 53 </makeTool>
59 54 </makefileType>
60   - <folder path="1/charset">
  55 + <folder path="1">
61 56 <ccTool>
62 57 <incDir>
63 58 <pElem>fsa</pElem>
... ... @@ -96,42 +91,22 @@
96 91 </item>
97 92 <item path="morfeusz/FlexionGraph.cpp" ex="false" tool="1" flavor2="8">
98 93 <ccTool>
99   - <incDir>
100   - <pElem>fsa</pElem>
101   - <pElem>build/morfeusz</pElem>
102   - </incDir>
103 94 </ccTool>
104 95 </item>
105 96 <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="8">
106 97 <ccTool>
107   - <incDir>
108   - <pElem>fsa</pElem>
109   - <pElem>build/morfeusz</pElem>
110   - </incDir>
111 98 </ccTool>
112 99 </item>
113 100 <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="8">
114 101 <ccTool>
115   - <incDir>
116   - <pElem>fsa</pElem>
117   - <pElem>build/morfeusz</pElem>
118   - </incDir>
119 102 </ccTool>
120 103 </item>
121 104 <item path="morfeusz/MorphInterpretation.cpp" ex="false" tool="1" flavor2="8">
122 105 <ccTool>
123   - <incDir>
124   - <pElem>fsa</pElem>
125   - <pElem>build/morfeusz</pElem>
126   - </incDir>
127 106 </ccTool>
128 107 </item>
129 108 <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="8">
130 109 <ccTool>
131   - <incDir>
132   - <pElem>fsa</pElem>
133   - <pElem>build/morfeusz</pElem>
134   - </incDir>
135 110 </ccTool>
136 111 </item>
137 112 <item path="morfeusz/charset/CharsetConverter.cpp"
... ... @@ -141,33 +116,23 @@
141 116 <ccTool>
142 117 </ccTool>
143 118 </item>
144   - <item path="morfeusz/encoding/CharsetConverter.cpp"
145   - ex="false"
146   - tool="1"
147   - flavor2="4">
  119 + <item path="morfeusz/main.cpp" ex="false" tool="1" flavor2="8">
148 120 <ccTool>
149 121 </ccTool>
150 122 </item>
151   - <item path="morfeusz/flexion/FlexionGraph.cpp" ex="false" tool="1" flavor2="4">
  123 + <item path="morfeusz/morfeusz.cpp" ex="false" tool="1" flavor2="4">
152 124 <ccTool>
153 125 </ccTool>
154 126 </item>
155   - <item path="morfeusz/main.cpp" ex="false" tool="1" flavor2="8">
  127 + <item path="morfeusz/test_morfeusz.cpp" ex="false" tool="1" flavor2="4">
156 128 <ccTool>
157   - <incDir>
158   - <pElem>fsa</pElem>
159   - <pElem>build/morfeusz</pElem>
160   - </incDir>
161 129 </ccTool>
162 130 </item>
163   - <item path="morfeusz/morfeusz.cpp" ex="false" tool="1" flavor2="4">
  131 + <item path="morfeusz/test_morph.cpp" ex="false" tool="1" flavor2="8">
164 132 <ccTool>
165   - <incDir>
166   - <pElem>morfeusz</pElem>
167   - </incDir>
168 133 </ccTool>
169 134 </item>
170   - <item path="morfeusz/test_morph.cpp" ex="false" tool="1" flavor2="8">
  135 + <item path="morfeusz/test_simple.cpp" ex="false" tool="1" flavor2="0">
171 136 <ccTool>
172 137 <incDir>
173 138 <pElem>fsa</pElem>
... ...