Commit e5220b907d711d35e0832ea2ca0734a2be1e0711
1 parent
3cc7bcb1
- podpięcie pierwszego malutkiego testu analizy morfologicznej jednego słowa
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@22 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
8 changed files
with
69 additions
and
55 deletions
fsa/fsa_impl.hpp
... | ... | @@ -18,7 +18,7 @@ |
18 | 18 | #include "utils.hpp" |
19 | 19 | #include "const.hpp" |
20 | 20 | |
21 | -using namespace std; | |
21 | +//using namespace std; | |
22 | 22 | //static const unsigned int FSA_OFFSET = 6; |
23 | 23 | |
24 | 24 | template <class T> |
... | ... | @@ -70,7 +70,7 @@ FSA<T>* FSA<T>::getFSA(const unsigned char* ptr, const Deserializer<T>& deserial |
70 | 70 | |
71 | 71 | uint8_t versionNum = *(ptr + VERSION_NUM_OFFSET); |
72 | 72 | if (versionNum != VERSION_NUM) { |
73 | - throw FSAException(string("Invalid version number: ") + std::to_string(versionNum) + ", should be: " + to_string(VERSION_NUM)); | |
73 | + throw FSAException(string("Invalid version number: ") + std::to_string(versionNum) + ", should be: " + std::to_string(VERSION_NUM)); | |
74 | 74 | } |
75 | 75 | |
76 | 76 | uint8_t implementationNum = *(ptr + IMPLEMENTATION_NUM_OFFSET); |
... | ... | @@ -85,7 +85,7 @@ FSA<T>* FSA<T>::getFSA(const unsigned char* ptr, const Deserializer<T>& deserial |
85 | 85 | case 2: |
86 | 86 | return new CompressedFSA2<T>(startPtr, deserializer); |
87 | 87 | default: |
88 | - throw FSAException(string("Invalid implementation number: ") + to_string(versionNum) + ", should be: " + to_string(VERSION_NUM)); | |
88 | + throw FSAException(string("Invalid implementation number: ") + std::to_string(versionNum) + ", should be: " + std::to_string(VERSION_NUM)); | |
89 | 89 | } |
90 | 90 | } |
91 | 91 | |
... | ... |
morfeusz/CMakeLists.txt
... | ... | @@ -8,10 +8,12 @@ add_library (morfeusz2 morfeusz.hpp morfeusz.cpp) |
8 | 8 | add_executable (morfeusz2_analyze main.cpp) |
9 | 9 | add_executable (test_morph test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp) |
10 | 10 | add_executable (test_morfeusz test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp Morfeusz.cpp charset/CharsetConverter.cpp FlexionGraph.cpp) |
11 | +add_executable (test_simple test_simple.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp Morfeusz.cpp charset/CharsetConverter.cpp FlexionGraph.cpp) | |
11 | 12 | |
12 | 13 | # Link the executable to the Hello library. |
13 | 14 | target_link_libraries (morfeusz2_analyze morfeusz2) |
14 | 15 | set_target_properties ( morfeusz2_analyze PROPERTIES COMPILE_FLAGS "-std=gnu++0x" ) |
15 | 16 | |
16 | 17 | set_target_properties ( test_morph PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" ) |
17 | -set_target_properties ( test_morfeusz PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" ) | |
18 | 18 | \ No newline at end of file |
19 | +set_target_properties ( test_morfeusz PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" ) | |
20 | +set_target_properties ( test_simple PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" ) | |
19 | 21 | \ No newline at end of file |
... | ... |
morfeusz/FlexionGraph.cpp
... | ... | @@ -30,7 +30,7 @@ void FlexionGraph::addPath(const std::vector<InterpretedChunk>& path) { |
30 | 30 | |
31 | 31 | void FlexionGraph::appendToResults(const Tagset& tagset, std::vector<MorphInterpretation>& results) { |
32 | 32 | int endNode = graph.size(); |
33 | - for (int i = 0; i < graph.size(); i++) { | |
33 | + for (unsigned int i = 0; i < graph.size(); i++) { | |
34 | 34 | vector<Edge>& edges = graph[i]; |
35 | 35 | for (Edge& e: edges) { |
36 | 36 | int realStartNode = i + this->startNode; |
... | ... |
morfeusz/InterpretedChunk.hpp
morfeusz/Morfeusz.cpp
... | ... | @@ -6,6 +6,7 @@ |
6 | 6 | */ |
7 | 7 | |
8 | 8 | #include <string> |
9 | +#include <iostream> | |
9 | 10 | #include "fsa.hpp" |
10 | 11 | #include "utils.hpp" |
11 | 12 | #include "Morfeusz.hpp" |
... | ... | @@ -18,18 +19,28 @@ |
18 | 19 | using namespace std; |
19 | 20 | |
20 | 21 | static FSA<vector<InterpsGroup >> *initializeFSA(const string& filename) { |
22 | + cerr << "initialize FSA" << endl; | |
21 | 23 | static Deserializer < vector < InterpsGroup >> *deserializer |
22 | 24 | = new MorphDeserializer(); |
23 | 25 | return FSA < vector < InterpsGroup >> ::getFSA(filename, *deserializer); |
24 | 26 | } |
25 | 27 | |
26 | 28 | static CharsetConverter* initializeCharsetConverter() { |
29 | + cerr << "initialize charset converter" << endl; | |
27 | 30 | static CharsetConverter* converter = new UTF8CharsetConverter(); |
28 | 31 | return converter; |
29 | 32 | } |
30 | 33 | |
34 | +static Tagset* initializeTagset(const string& filename) { | |
35 | + cerr << "initialize tagset" << endl; | |
36 | + static Tagset* tagset = new Tagset(readFile(filename.c_str())); | |
37 | + return tagset; | |
38 | +} | |
39 | + | |
31 | 40 | Morfeusz::Morfeusz(const string& filename) |
32 | -: fsa(initializeFSA(filename)), charsetConverter(initializeCharsetConverter()) { | |
41 | +: fsa(initializeFSA(filename)), | |
42 | + charsetConverter(initializeCharsetConverter()), | |
43 | + tagset(initializeTagset(filename)) { | |
33 | 44 | |
34 | 45 | } |
35 | 46 | |
... | ... | @@ -47,7 +58,7 @@ void Morfeusz::processOneWord( |
47 | 58 | FlexionGraph graph(startNodeNum); |
48 | 59 | const char* currInput = inputData; |
49 | 60 | doProcessOneWord(currInput, inputEnd, accum, graph); |
50 | - graph.appendToResults(this->tagset, results); | |
61 | + graph.appendToResults(*this->tagset, results); | |
51 | 62 | inputData = currInput; |
52 | 63 | } |
53 | 64 | |
... | ... | @@ -88,12 +99,16 @@ void Morfeusz::feedState( |
88 | 99 | } |
89 | 100 | } |
90 | 101 | |
91 | -ResultsIterator Morfeusz::analyze(const std::string& text) { | |
102 | +ResultsIterator Morfeusz::analyze(const string& text) { | |
92 | 103 | // const char* textStart = text.c_str(); |
93 | 104 | // const char* textEnd = text.c_str() + text.length(); |
94 | 105 | return ResultsIterator(text, *this); |
95 | 106 | } |
96 | 107 | |
108 | +void Morfeusz::analyze(const string& text, vector<MorphInterpretation>& results) { | |
109 | + | |
110 | +} | |
111 | + | |
97 | 112 | ResultsIterator::ResultsIterator(const string& text, const Morfeusz& morfeusz) |
98 | 113 | : rawInput(text.c_str()), |
99 | 114 | morfeusz(morfeusz) { |
... | ... |
morfeusz/Morfeusz.hpp
... | ... | @@ -32,15 +32,17 @@ public: |
32 | 32 | virtual ~Morfeusz(); |
33 | 33 | // Morfeusz(const Morfeusz& orig); |
34 | 34 | ResultsIterator analyze(const std::string& text); |
35 | - | |
36 | -// Morfeusz(); | |
37 | - friend class ResultsIterator; | |
38 | -private: | |
35 | + void analyze(const std::string& text, std::vector<MorphInterpretation>& result); | |
36 | + | |
39 | 37 | void processOneWord( |
40 | 38 | const char*& inputData, |
41 | 39 | const char* inputEnd, |
42 | 40 | const int startNodeNum, |
43 | 41 | std::vector<MorphInterpretation>& result) const; |
42 | + | |
43 | +// Morfeusz(); | |
44 | + friend class ResultsIterator; | |
45 | +private: | |
44 | 46 | |
45 | 47 | void doProcessOneWord( |
46 | 48 | const char*& inputData, |
... | ... | @@ -54,6 +56,7 @@ private: |
54 | 56 | |
55 | 57 | FSAType* fsa; |
56 | 58 | CharsetConverter* charsetConverter; |
59 | + Tagset* tagset; | |
57 | 60 | }; |
58 | 61 | |
59 | 62 | class ResultsIterator { |
... | ... |
morfeusz/test_simple.cpp
0 → 100644
1 | +/* | |
2 | + * File: test_simple.cpp | |
3 | + * Author: lennyn | |
4 | + * | |
5 | + * Created on November 18, 2013, 10:30 PM | |
6 | + */ | |
7 | + | |
8 | +#include <cstdlib> | |
9 | + | |
10 | +#include "Morfeusz.hpp" | |
11 | +#include "MorphInterpretation.hpp" | |
12 | + | |
13 | +using namespace std; | |
14 | + | |
15 | +/* | |
16 | + * | |
17 | + */ | |
18 | +int main(int argc, char** argv) { | |
19 | + Morfeusz morfeusz(argv[1]); | |
20 | + vector<MorphInterpretation> res; | |
21 | + string word = "mijałem"; | |
22 | + const char* ptr = word.c_str(); | |
23 | + morfeusz.processOneWord(ptr, word.c_str() + word.size(), 0, res); | |
24 | + for (MorphInterpretation& mi: res) { | |
25 | + cerr << mi.getStartNode() << " " << mi.getEndNode() << " " << mi.getLemma() << " " << mi.getTag() << " " << mi.getName() << endl; | |
26 | + } | |
27 | + return 0; | |
28 | +} | |
29 | + | |
... | ... |
nbproject/configurations.xml
... | ... | @@ -11,12 +11,6 @@ |
11 | 11 | <df name="charset"> |
12 | 12 | <in>CharsetConverter.cpp</in> |
13 | 13 | </df> |
14 | - <df name="encoding"> | |
15 | - <in>CharsetConverter.cpp</in> | |
16 | - </df> | |
17 | - <df name="flexion"> | |
18 | - <in>FlexionGraph.cpp</in> | |
19 | - </df> | |
20 | 14 | <in>FlexionGraph.cpp</in> |
21 | 15 | <in>Morfeusz.cpp</in> |
22 | 16 | <in>MorphDeserializer.cpp</in> |
... | ... | @@ -24,6 +18,7 @@ |
24 | 18 | <in>Tagset.cpp</in> |
25 | 19 | <in>main.cpp</in> |
26 | 20 | <in>morfeusz.cpp</in> |
21 | + <in>test_morfeusz.cpp</in> | |
27 | 22 | <in>test_morph.cpp</in> |
28 | 23 | </df> |
29 | 24 | <logicalFolder name="ExternalFiles" |
... | ... | @@ -57,7 +52,7 @@ |
57 | 52 | <executablePath>build/fsa/test_dict</executablePath> |
58 | 53 | </makeTool> |
59 | 54 | </makefileType> |
60 | - <folder path="1/charset"> | |
55 | + <folder path="1"> | |
61 | 56 | <ccTool> |
62 | 57 | <incDir> |
63 | 58 | <pElem>fsa</pElem> |
... | ... | @@ -96,42 +91,22 @@ |
96 | 91 | </item> |
97 | 92 | <item path="morfeusz/FlexionGraph.cpp" ex="false" tool="1" flavor2="8"> |
98 | 93 | <ccTool> |
99 | - <incDir> | |
100 | - <pElem>fsa</pElem> | |
101 | - <pElem>build/morfeusz</pElem> | |
102 | - </incDir> | |
103 | 94 | </ccTool> |
104 | 95 | </item> |
105 | 96 | <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="8"> |
106 | 97 | <ccTool> |
107 | - <incDir> | |
108 | - <pElem>fsa</pElem> | |
109 | - <pElem>build/morfeusz</pElem> | |
110 | - </incDir> | |
111 | 98 | </ccTool> |
112 | 99 | </item> |
113 | 100 | <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="8"> |
114 | 101 | <ccTool> |
115 | - <incDir> | |
116 | - <pElem>fsa</pElem> | |
117 | - <pElem>build/morfeusz</pElem> | |
118 | - </incDir> | |
119 | 102 | </ccTool> |
120 | 103 | </item> |
121 | 104 | <item path="morfeusz/MorphInterpretation.cpp" ex="false" tool="1" flavor2="8"> |
122 | 105 | <ccTool> |
123 | - <incDir> | |
124 | - <pElem>fsa</pElem> | |
125 | - <pElem>build/morfeusz</pElem> | |
126 | - </incDir> | |
127 | 106 | </ccTool> |
128 | 107 | </item> |
129 | 108 | <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="8"> |
130 | 109 | <ccTool> |
131 | - <incDir> | |
132 | - <pElem>fsa</pElem> | |
133 | - <pElem>build/morfeusz</pElem> | |
134 | - </incDir> | |
135 | 110 | </ccTool> |
136 | 111 | </item> |
137 | 112 | <item path="morfeusz/charset/CharsetConverter.cpp" |
... | ... | @@ -141,33 +116,23 @@ |
141 | 116 | <ccTool> |
142 | 117 | </ccTool> |
143 | 118 | </item> |
144 | - <item path="morfeusz/encoding/CharsetConverter.cpp" | |
145 | - ex="false" | |
146 | - tool="1" | |
147 | - flavor2="4"> | |
119 | + <item path="morfeusz/main.cpp" ex="false" tool="1" flavor2="8"> | |
148 | 120 | <ccTool> |
149 | 121 | </ccTool> |
150 | 122 | </item> |
151 | - <item path="morfeusz/flexion/FlexionGraph.cpp" ex="false" tool="1" flavor2="4"> | |
123 | + <item path="morfeusz/morfeusz.cpp" ex="false" tool="1" flavor2="4"> | |
152 | 124 | <ccTool> |
153 | 125 | </ccTool> |
154 | 126 | </item> |
155 | - <item path="morfeusz/main.cpp" ex="false" tool="1" flavor2="8"> | |
127 | + <item path="morfeusz/test_morfeusz.cpp" ex="false" tool="1" flavor2="4"> | |
156 | 128 | <ccTool> |
157 | - <incDir> | |
158 | - <pElem>fsa</pElem> | |
159 | - <pElem>build/morfeusz</pElem> | |
160 | - </incDir> | |
161 | 129 | </ccTool> |
162 | 130 | </item> |
163 | - <item path="morfeusz/morfeusz.cpp" ex="false" tool="1" flavor2="4"> | |
131 | + <item path="morfeusz/test_morph.cpp" ex="false" tool="1" flavor2="8"> | |
164 | 132 | <ccTool> |
165 | - <incDir> | |
166 | - <pElem>morfeusz</pElem> | |
167 | - </incDir> | |
168 | 133 | </ccTool> |
169 | 134 | </item> |
170 | - <item path="morfeusz/test_morph.cpp" ex="false" tool="1" flavor2="8"> | |
135 | + <item path="morfeusz/test_simple.cpp" ex="false" tool="1" flavor2="0"> | |
171 | 136 | <ccTool> |
172 | 137 | <incDir> |
173 | 138 | <pElem>fsa</pElem> |
... | ... |