Commit 461a6aa1f981c49555f6e7a9f0ced6176c35c4b1
1 parent
6e66c9c5
- usunięcie niektórych warningów
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@26 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
5 changed files
with
47 additions
and
24 deletions
morfeusz/FlexionGraph.cpp
1 | 1 | |
2 | 2 | #include <string> |
3 | 3 | #include <cassert> |
4 | +#include <climits> | |
4 | 5 | #include "utils.hpp" |
5 | 6 | #include "FlexionGraph.hpp" |
6 | 7 | |
... | ... | @@ -32,16 +33,16 @@ void FlexionGraph::addPath(const std::vector<InterpretedChunk>& path) { |
32 | 33 | for (const InterpretedChunk& chunk : path) { |
33 | 34 | if (&chunk == &(path.front()) |
34 | 35 | && &chunk == &(path.back())) { |
35 | - Edge e = {chunk, -1}; | |
36 | + Edge e = {chunk, UINT_MAX}; | |
36 | 37 | this->addStartEdge(e); |
37 | 38 | } else if (&chunk == &(path.front())) { |
38 | - Edge e = {chunk, (int) this->graph.size() + 1}; | |
39 | + Edge e = {chunk, this->graph.size() + 1}; | |
39 | 40 | this->addStartEdge(e); |
40 | 41 | } else if (&chunk == &(path.back())) { |
41 | - Edge e = {chunk, -1}; | |
42 | + Edge e = {chunk, UINT_MAX}; | |
42 | 43 | this->addMiddleEdge(e); |
43 | 44 | } else { |
44 | - Edge e = {chunk, (int) this->graph.size() + 1}; | |
45 | + Edge e = {chunk, this->graph.size() + 1}; | |
45 | 46 | this->addMiddleEdge(e); |
46 | 47 | } |
47 | 48 | } |
... | ... | @@ -53,7 +54,7 @@ bool FlexionGraph::canMergeNodes(unsigned int node1, unsigned int node2) { |
53 | 54 | } |
54 | 55 | |
55 | 56 | set<FlexionGraph::Path> FlexionGraph::getPossiblePaths(unsigned int node) { |
56 | - if (node == -1 || node == this->graph.size() - 1) { | |
57 | + if (node == UINT_MAX || node == this->graph.size() - 1) { | |
57 | 58 | return set<FlexionGraph::Path>(); |
58 | 59 | } |
59 | 60 | else { |
... | ... | @@ -147,7 +148,7 @@ bool FlexionGraph::empty() const { |
147 | 148 | void FlexionGraph::repairLastNodeNumbers() { |
148 | 149 | for (vector<Edge>& edges: this->graph) { |
149 | 150 | for (Edge& e: edges) { |
150 | - if (e.nextNode == -1) { | |
151 | + if (e.nextNode == UINT_MAX) { | |
151 | 152 | e.nextNode = this->graph.size(); |
152 | 153 | } |
153 | 154 | } |
... | ... |
morfeusz/FlexionGraph.hpp
morfeusz/Morfeusz.cpp
... | ... | @@ -98,7 +98,6 @@ void Morfeusz::doProcessOneWord( |
98 | 98 | FlexionGraph& graph) const { |
99 | 99 | bool endOfWord = inputData == inputEnd; |
100 | 100 | const char* currInput = inputData; |
101 | - const char* prevInput = inputData; | |
102 | 101 | uint32_t codepoint = endOfWord ? 0 : this->charsetConverter->next(currInput, inputEnd); |
103 | 102 | // UnicodeChunk uchunk(*(this->charsetConverter), *(this->caseConverter)); |
104 | 103 | vector<uint32_t> originalCodepoints; |
... | ... | @@ -113,6 +112,9 @@ void Morfeusz::doProcessOneWord( |
113 | 112 | this->feedState(state, lowerCP); |
114 | 113 | if (state.isAccepting()) { |
115 | 114 | for (InterpsGroup& ig : state.getValue()) { |
115 | + for (EncodedInterpretation& ei: ig.interps) { | |
116 | + cerr << "CUT: " << ei.lemma.suffixToCut << "; ADD: " << ei.lemma.suffixToAdd << endl; | |
117 | + } | |
116 | 118 | InterpretedChunk ic = {inputData, originalCodepoints, lowercaseCodepoints, ig}; |
117 | 119 | accum.push_back(ic); |
118 | 120 | const char* newCurrInput = currInput; |
... | ... | @@ -120,7 +122,6 @@ void Morfeusz::doProcessOneWord( |
120 | 122 | accum.pop_back(); |
121 | 123 | } |
122 | 124 | } |
123 | - prevInput = currInput; | |
124 | 125 | codepoint = currInput == inputEnd ? 0 : this->charsetConverter->next(currInput, inputEnd); |
125 | 126 | } |
126 | 127 | if (state.isAccepting()) { |
... | ... |
morfeusz/test_simple.cpp
... | ... | @@ -20,7 +20,7 @@ int main(int argc, char** argv) { |
20 | 20 | Morfeusz morfeusz("/tmp/test-SIMPLE-PoliMorfSmall.tab.fsa"); |
21 | 21 | DEBUG("żyję"); |
22 | 22 | vector<MorphInterpretation> res; |
23 | - string word = " mijałem fasASDfasd abdOminalności "; | |
23 | + string word = " mijałem aa fasASDfasd abdOminalności "; | |
24 | 24 | morfeusz.analyze(word, res); |
25 | 25 | DEBUG("znaleziono "+to_string(res.size())); |
26 | 26 | for (MorphInterpretation& mi: res) { |
... | ... |
nbproject/configurations.xml
... | ... | @@ -14,13 +14,11 @@ |
14 | 14 | <in>case_folding.cpp</in> |
15 | 15 | </df> |
16 | 16 | <in>FlexionGraph.cpp</in> |
17 | - <in>InterpretedChunksDecoder.hpp</in> | |
18 | 17 | <in>Morfeusz.cpp</in> |
19 | 18 | <in>MorphDeserializer.cpp</in> |
20 | 19 | <in>MorphInterpretation.cpp</in> |
21 | 20 | <in>Tagset.cpp</in> |
22 | 21 | <in>main.cpp</in> |
23 | - <in>test_morfeusz.cpp</in> | |
24 | 22 | <in>test_morph.cpp</in> |
25 | 23 | <in>test_simple.cpp</in> |
26 | 24 | </df> |
... | ... | @@ -52,10 +50,10 @@ |
52 | 50 | <buildCommandWorkingDir>build</buildCommandWorkingDir> |
53 | 51 | <buildCommand>${MAKE} -f Makefile</buildCommand> |
54 | 52 | <cleanCommand>${MAKE} -f Makefile clean</cleanCommand> |
55 | - <executablePath>build/morfeusz/test_simple</executablePath> | |
53 | + <executablePath>build/fsa/test_dict</executablePath> | |
56 | 54 | </makeTool> |
57 | 55 | </makefileType> |
58 | - <folder path="1"> | |
56 | + <folder path="1/charset"> | |
59 | 57 | <ccTool> |
60 | 58 | <incDir> |
61 | 59 | <pElem>fsa</pElem> |
... | ... | @@ -94,30 +92,45 @@ |
94 | 92 | </item> |
95 | 93 | <item path="morfeusz/FlexionGraph.cpp" ex="false" tool="1" flavor2="8"> |
96 | 94 | <ccTool> |
95 | + <incDir> | |
96 | + <pElem>fsa</pElem> | |
97 | + <pElem>build/morfeusz</pElem> | |
98 | + </incDir> | |
97 | 99 | </ccTool> |
98 | 100 | </item> |
99 | - <item path="morfeusz/InterpretedChunksDecoder.hpp" | |
100 | - ex="false" | |
101 | - tool="3" | |
102 | - flavor2="0"> | |
103 | - </item> | |
104 | 101 | <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="8"> |
105 | 102 | <ccTool> |
103 | + <incDir> | |
104 | + <pElem>fsa</pElem> | |
105 | + <pElem>build/morfeusz</pElem> | |
106 | + </incDir> | |
106 | 107 | </ccTool> |
107 | 108 | </item> |
108 | 109 | <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="8"> |
109 | 110 | <ccTool> |
111 | + <incDir> | |
112 | + <pElem>fsa</pElem> | |
113 | + <pElem>build/morfeusz</pElem> | |
114 | + </incDir> | |
110 | 115 | </ccTool> |
111 | 116 | </item> |
112 | 117 | <item path="morfeusz/MorphInterpretation.cpp" ex="false" tool="1" flavor2="8"> |
113 | 118 | <ccTool> |
119 | + <incDir> | |
120 | + <pElem>fsa</pElem> | |
121 | + <pElem>build/morfeusz</pElem> | |
122 | + </incDir> | |
114 | 123 | </ccTool> |
115 | 124 | </item> |
116 | 125 | <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="8"> |
117 | 126 | <ccTool> |
127 | + <incDir> | |
128 | + <pElem>fsa</pElem> | |
129 | + <pElem>build/morfeusz</pElem> | |
130 | + </incDir> | |
118 | 131 | </ccTool> |
119 | 132 | </item> |
120 | - <item path="morfeusz/charset/CaseConverter.cpp" ex="false" tool="1" flavor2="4"> | |
133 | + <item path="morfeusz/charset/CaseConverter.cpp" ex="false" tool="1" flavor2="8"> | |
121 | 134 | <ccTool> |
122 | 135 | </ccTool> |
123 | 136 | </item> |
... | ... | @@ -134,18 +147,26 @@ |
134 | 147 | </item> |
135 | 148 | <item path="morfeusz/main.cpp" ex="false" tool="1" flavor2="8"> |
136 | 149 | <ccTool> |
137 | - </ccTool> | |
138 | - </item> | |
139 | - <item path="morfeusz/test_morfeusz.cpp" ex="false" tool="1" flavor2="4"> | |
140 | - <ccTool> | |
150 | + <incDir> | |
151 | + <pElem>fsa</pElem> | |
152 | + <pElem>build/morfeusz</pElem> | |
153 | + </incDir> | |
141 | 154 | </ccTool> |
142 | 155 | </item> |
143 | 156 | <item path="morfeusz/test_morph.cpp" ex="false" tool="1" flavor2="8"> |
144 | 157 | <ccTool> |
158 | + <incDir> | |
159 | + <pElem>fsa</pElem> | |
160 | + <pElem>build/morfeusz</pElem> | |
161 | + </incDir> | |
145 | 162 | </ccTool> |
146 | 163 | </item> |
147 | 164 | <item path="morfeusz/test_simple.cpp" ex="false" tool="1" flavor2="8"> |
148 | 165 | <ccTool> |
166 | + <incDir> | |
167 | + <pElem>fsa</pElem> | |
168 | + <pElem>build/morfeusz</pElem> | |
169 | + </incDir> | |
149 | 170 | </ccTool> |
150 | 171 | </item> |
151 | 172 | </conf> |
... | ... |