Commit 63cafb79f1f1c0fbb69aa9926a4cc4b0dca77f45
1 parent
482c6f80
obsługa "naj" i "nie"
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@96 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
5 changed files
with
56 additions
and
35 deletions
input/PoliMorfSmall.tab
... | ... | @@ -691,3 +691,5 @@ lubianemu lubić ppas:sg:dat:m1.m2.m3.n1.n2:imperf:aff pospolita |
691 | 691 | lubianemu lubić ppas:sg:dat:m1.m2.m3.n1.n2:imperf:aff pospolita |
692 | 692 | nielubianymi lubić ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg pospolita |
693 | 693 | nielubianego lubić ppas:sg:acc:m1.m2:imperf:neg pospolita |
694 | +nienajpierwszych nienajpierwszy adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos pospolita | |
695 | +nienajlepiej nienajlepiej adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos pospolita | |
... | ... |
morfeusz/FlexionGraph.cpp
... | ... | @@ -33,19 +33,21 @@ void FlexionGraph::addPath(const std::vector<InterpretedChunk>& path) { |
33 | 33 | // debugGraph(this->graph); |
34 | 34 | for (unsigned int i = 0; i < path.size(); i++) { |
35 | 35 | const InterpretedChunk& chunk = path[i]; |
36 | - if (&chunk == &(path.front()) | |
37 | - && &chunk == &(path.back())) { | |
38 | - Edge e = {chunk, UINT_MAX}; | |
39 | - this->addStartEdge(e); | |
40 | - } else if (&chunk == &(path.front())) { | |
41 | - Edge e = {chunk, this->graph.empty() ? 1 : (unsigned int) this->graph.size()}; | |
42 | - this->addStartEdge(e); | |
43 | - } else if (&chunk == &(path.back())) { | |
44 | - Edge e = {chunk, UINT_MAX}; | |
45 | - this->addMiddleEdge((unsigned int) this->graph.size(), e); | |
46 | - } else { | |
47 | - Edge e = {chunk, (int) this->graph.size() + 1}; | |
48 | - this->addMiddleEdge((unsigned int) this->graph.size(), e); | |
36 | + if (!chunk.shiftOrth) { | |
37 | + if (&chunk == &(path.front()) | |
38 | + && &chunk == &(path.back())) { | |
39 | + Edge e = {chunk, UINT_MAX}; | |
40 | + this->addStartEdge(e); | |
41 | + } else if (&chunk == &(path.front())) { | |
42 | + Edge e = {chunk, this->graph.empty() ? 1 : (unsigned int) this->graph.size()}; | |
43 | + this->addStartEdge(e); | |
44 | + } else if (&chunk == &(path.back())) { | |
45 | + Edge e = {chunk, UINT_MAX}; | |
46 | + this->addMiddleEdge((unsigned int) this->graph.size(), e); | |
47 | + } else { | |
48 | + Edge e = {chunk, (int) this->graph.size() + 1}; | |
49 | + this->addMiddleEdge((unsigned int) this->graph.size(), e); | |
50 | + } | |
49 | 51 | } |
50 | 52 | } |
51 | 53 | } |
... | ... | @@ -103,8 +105,7 @@ void FlexionGraph::redirectEdges(unsigned int fromNode, unsigned int toNode) { |
103 | 105 | // if newEdge is not in edges, redirect edgeEdge |
104 | 106 | // so it becomes newEdge |
105 | 107 | oldEdge.nextNode = toNode; |
106 | - } | |
107 | - else { | |
108 | + } else { | |
108 | 109 | // if newEdge is already there, just remove old edge |
109 | 110 | edges.erase(edgesIt); |
110 | 111 | } |
... | ... |
morfeusz/Morfeusz.cpp
... | ... | @@ -108,8 +108,7 @@ void Morfeusz::analyzeOneWord( |
108 | 108 | srcNode++; |
109 | 109 | } |
110 | 110 | // graph.getResults(*this->tagset, results); |
111 | - } | |
112 | - else if (inputStart != inputEnd) { | |
111 | + } else if (inputStart != inputEnd) { | |
113 | 112 | this->appendIgnotiumToResults(string(inputStart, currInput), startNodeNum, results); |
114 | 113 | } |
115 | 114 | inputStart = currInput; |
... | ... | @@ -141,20 +140,24 @@ void Morfeusz::doAnalyzeOneWord( |
141 | 140 | vector<InterpsGroup> val(state.getValue()); |
142 | 141 | for (unsigned int i = 0; i < val.size(); i++) { |
143 | 142 | InterpsGroup& ig = val[i]; |
144 | - | |
143 | + | |
145 | 144 | SegrulesStateType newSegrulesState = segrulesState; |
146 | 145 | newSegrulesState.proceedToNext(ig.type); |
147 | - | |
146 | + | |
148 | 147 | if (!newSegrulesState.isSink()) { |
149 | 148 | bool shiftOrth = newSegrulesState.getLastTransitionValue(); |
150 | 149 | InterpretedChunk ic = {inputData, originalCodepoints, lowercaseCodepoints, ig, shiftOrth}; |
150 | + if (!accum.empty() && accum.back().shiftOrth) { | |
151 | + ic.originalCodepoints.insert( | |
152 | + ic.originalCodepoints.begin(), | |
153 | + accum.back().originalCodepoints.begin(), | |
154 | + accum.back().originalCodepoints.end()); | |
155 | + } | |
151 | 156 | accum.push_back(ic); |
152 | 157 | const char* newCurrInput = currInput; |
153 | 158 | doAnalyzeOneWord(newCurrInput, inputEnd, accum, graph, newSegrulesState); |
154 | 159 | accum.pop_back(); |
155 | 160 | } |
156 | - else { | |
157 | - } | |
158 | 161 | } |
159 | 162 | } |
160 | 163 | |
... | ... | @@ -166,20 +169,24 @@ void Morfeusz::doAnalyzeOneWord( |
166 | 169 | vector<InterpsGroup > val(state.getValue()); |
167 | 170 | for (unsigned int i = 0; i < val.size(); i++) { |
168 | 171 | InterpsGroup& ig = val[i]; |
169 | - | |
172 | + | |
170 | 173 | SegrulesStateType newSegrulesState = segrulesState; |
171 | 174 | newSegrulesState.proceedToNext(ig.type); |
172 | - | |
175 | + | |
173 | 176 | if (newSegrulesState.isAccepting()) { |
174 | 177 | bool shiftOrth = newSegrulesState.getLastTransitionValue(); |
175 | 178 | InterpretedChunk ic = {inputData, originalCodepoints, lowercaseCodepoints, ig, shiftOrth}; |
179 | + if (!accum.empty() && accum.back().shiftOrth) { | |
180 | + ic.originalCodepoints.insert( | |
181 | + ic.originalCodepoints.begin(), | |
182 | + accum.back().originalCodepoints.begin(), | |
183 | + accum.back().originalCodepoints.end()); | |
184 | + } | |
176 | 185 | accum.push_back(ic); |
177 | 186 | graph.addPath(accum); |
178 | 187 | accum.pop_back(); |
179 | - } | |
180 | - else if (!newSegrulesState.isSink()) { | |
181 | - } | |
182 | - else { | |
188 | + } else if (!newSegrulesState.isSink()) { | |
189 | + } else { | |
183 | 190 | } |
184 | 191 | } |
185 | 192 | } |
... | ... |
nbproject/configurations.xml
... | ... | @@ -39,11 +39,11 @@ |
39 | 39 | <in>MorphInterpretation.cpp</in> |
40 | 40 | <in>Tagset.cpp</in> |
41 | 41 | <in>const.cpp</in> |
42 | + <in>main.cpp</in> | |
42 | 43 | <in>morfeusz_analyzer.cpp</in> |
43 | 44 | <in>morfeusz_generator.cpp</in> |
44 | 45 | <in>test_recognize_dict.cpp</in> |
45 | 46 | <in>test_result_equals.cpp</in> |
46 | - <in>test_synth_dict.cpp</in> | |
47 | 47 | </df> |
48 | 48 | <logicalFolder name="morfeusz" |
49 | 49 | displayName="morfeusz" |
... | ... | @@ -86,7 +86,7 @@ |
86 | 86 | <buildCommandWorkingDir>build</buildCommandWorkingDir> |
87 | 87 | <buildCommand>${MAKE} -f Makefile</buildCommand> |
88 | 88 | <cleanCommand>${MAKE} -f Makefile clean</cleanCommand> |
89 | - <executablePath>build/morfeusz/morfeusz_generator</executablePath> | |
89 | + <executablePath>build/morfeusz/test_result_equals</executablePath> | |
90 | 90 | </makeTool> |
91 | 91 | </makefileType> |
92 | 92 | <item path="build/default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
... | ... | @@ -236,7 +236,7 @@ |
236 | 236 | <ccTool> |
237 | 237 | <incDir> |
238 | 238 | <pElem>morfeusz</pElem> |
239 | - <pElem>/usr/lib/jvm/default-java/include</pElem> | |
239 | + <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem> | |
240 | 240 | </incDir> |
241 | 241 | <preprocessorList> |
242 | 242 | <Elem>jmorfeusz_EXPORTS</Elem> |
... | ... | @@ -479,6 +479,21 @@ |
479 | 479 | </incDir> |
480 | 480 | </ccTool> |
481 | 481 | </item> |
482 | + <item path="morfeusz/main.cpp" ex="false" tool="1" flavor2="4"> | |
483 | + <ccTool> | |
484 | + <incDir> | |
485 | + <pElem>build</pElem> | |
486 | + <pElem>build/morfeusz</pElem> | |
487 | + </incDir> | |
488 | + <preprocessorList> | |
489 | + <Elem>NDEBUG</Elem> | |
490 | + <Elem>_OPTIMIZE__=1</Elem> | |
491 | + </preprocessorList> | |
492 | + <undefinedList> | |
493 | + <Elem>__NO_INLINE__</Elem> | |
494 | + </undefinedList> | |
495 | + </ccTool> | |
496 | + </item> | |
482 | 497 | <item path="morfeusz/morfeusz_analyzer.cpp" ex="false" tool="1" flavor2="4"> |
483 | 498 | <ccTool> |
484 | 499 | <incDir> |
... | ... | @@ -526,10 +541,6 @@ |
526 | 541 | </incDir> |
527 | 542 | </ccTool> |
528 | 543 | </item> |
529 | - <item path="morfeusz/test_synth_dict.cpp" ex="false" tool="1" flavor2="4"> | |
530 | - <ccTool> | |
531 | - </ccTool> | |
532 | - </item> | |
533 | 544 | </conf> |
534 | 545 | </confs> |
535 | 546 | </configurationDescriptor> |
... | ... |
nbproject/project.xml
... | ... | @@ -6,7 +6,7 @@ |
6 | 6 | <name>morfeusz</name> |
7 | 7 | <c-extensions>i</c-extensions> |
8 | 8 | <cpp-extensions>cpp,cxx</cpp-extensions> |
9 | - <header-extensions>h,hpp</header-extensions> | |
9 | + <header-extensions>h,hpp,in</header-extensions> | |
10 | 10 | <sourceEncoding>UTF-8</sourceEncoding> |
11 | 11 | <make-dep-projects/> |
12 | 12 | <sourceRootList> |
... | ... |