Commit 84a01b04cbbc132e4a7dd77d458a0c952ebbc104
1 parent
362d261a
- segmentacja wreszcie działa w miarę tak, jak trzeba (poprawiony bug z naj-)
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@109 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
14 changed files
with
163 additions
and
55 deletions
CMakeLists.txt
@@ -30,13 +30,13 @@ set (PROJECT_VERSION "${Morfeusz_VERSION_MAJOR}.${Morfeusz_VERSION_MINOR}.${Morf | @@ -30,13 +30,13 @@ set (PROJECT_VERSION "${Morfeusz_VERSION_MAJOR}.${Morfeusz_VERSION_MINOR}.${Morf | ||
30 | ### USER DEFINED VARIABLES | 30 | ### USER DEFINED VARIABLES |
31 | 31 | ||
32 | # INPUT_DICTIONARY_CPP | 32 | # INPUT_DICTIONARY_CPP |
33 | -set (INPUT_DICTIONARY_CPP "${PROJECT_SOURCE_DIR}/default_fsa.cpp") | ||
34 | -set (INPUT_SYNTH_DICTIONARY_CPP "${PROJECT_SOURCE_DIR}/default_synth_fsa.cpp") | 33 | +set (INPUT_DICTIONARY_CPP "${PROJECT_SOURCE_DIR}/../default_fsa.cpp") |
34 | +set (INPUT_SYNTH_DICTIONARY_CPP "${PROJECT_SOURCE_DIR}/../default_synth_fsa.cpp") | ||
35 | if ("${INPUT_DICTIONARIES}" STREQUAL "") | 35 | if ("${INPUT_DICTIONARIES}" STREQUAL "") |
36 | if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE") | 36 | if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE") |
37 | set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt) | 37 | set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt) |
38 | else () | 38 | else () |
39 | - set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/PoliMorf-0.6.7.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab") | 39 | + set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/PoliMorfSmall.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab") |
40 | endif () | 40 | endif () |
41 | endif () | 41 | endif () |
42 | 42 |
fsabuilder/morfeuszbuilder/fsa/state.py
@@ -70,6 +70,6 @@ class State(object): | @@ -70,6 +70,6 @@ class State(object): | ||
70 | 70 | ||
71 | def debug(self): | 71 | def debug(self): |
72 | print '----------------' | 72 | print '----------------' |
73 | - print 'STATE:', self.idx | 73 | + print 'STATE:', self.idx, 'accepting', self.isAccepting() |
74 | for label, s in self.transitionsMap.iteritems(): | 74 | for label, s in self.transitionsMap.iteritems(): |
75 | print label, '-->', s.idx | 75 | print label, '-->', s.idx |
fsabuilder/morfeuszbuilder/segrules/preprocessor.py
@@ -6,7 +6,7 @@ Created on 23 sty 2014 | @@ -6,7 +6,7 @@ Created on 23 sty 2014 | ||
6 | import re | 6 | import re |
7 | from pyparsing import * | 7 | from pyparsing import * |
8 | 8 | ||
9 | -identifier = Word(alphas, bodyChars=alphanums+'_>*+') | 9 | +identifier = Word(alphas, bodyChars=alphanums+'_>*+!') |
10 | define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd() | 10 | define = Keyword('#define').suppress() + identifier + Optional(Suppress('(') + identifier + Suppress(')')) + restOfLine + LineEnd() + StringEnd() |
11 | ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd() | 11 | ifdef = Keyword('#ifdef').suppress() + identifier + LineEnd() + StringEnd() |
12 | endif = Keyword('#endif').suppress() + LineEnd() + StringEnd() | 12 | endif = Keyword('#endif').suppress() + LineEnd() + StringEnd() |
fsabuilder/morfeuszbuilder/segrules/rules.py
@@ -122,4 +122,19 @@ class ShiftOrthRule(UnaryRule): | @@ -122,4 +122,19 @@ class ShiftOrthRule(UnaryRule): | ||
122 | startState.setTransitionData(self.child.segnum, 1) | 122 | startState.setTransitionData(self.child.segnum, 1) |
123 | 123 | ||
124 | def __str__(self): | 124 | def __str__(self): |
125 | - return u'(' + str(self.child) + ')>' | ||
126 | \ No newline at end of file | 125 | \ No newline at end of file |
126 | + return u'(' + str(self.child) + ')>' | ||
127 | + | ||
128 | +class ShiftOrthSameTypeRule(UnaryRule): | ||
129 | + | ||
130 | + def __init__(self, child): | ||
131 | + super(ShiftOrthSameTypeRule, self).__init__(child) | ||
132 | + | ||
133 | + def addToNFA(self, fsa): | ||
134 | + raise ValueError() | ||
135 | + | ||
136 | + def _doAddToNFA(self, startState, endState): | ||
137 | + self.child._doAddToNFA(startState, endState) | ||
138 | + startState.setTransitionData(self.child.segnum, 2) | ||
139 | + | ||
140 | + def __str__(self): | ||
141 | + return u'(' + str(self.child) + ')!>' |
fsabuilder/morfeuszbuilder/segrules/rulesParser.py
@@ -43,19 +43,19 @@ class RulesParser(object): | @@ -43,19 +43,19 @@ class RulesParser(object): | ||
43 | firstNFA = None | 43 | firstNFA = None |
44 | for defs in itertools.product(*key2Defs.values()): | 44 | for defs in itertools.product(*key2Defs.values()): |
45 | key2Def = dict([(def2Key[define], define) for define in defs]) | 45 | key2Def = dict([(def2Key[define], define) for define in defs]) |
46 | - print key2Def | 46 | +# print key2Def |
47 | nfa = rulesNFA.RulesNFA() | 47 | nfa = rulesNFA.RulesNFA() |
48 | if not firstNFA: | 48 | if not firstNFA: |
49 | firstNFA = nfa | 49 | firstNFA = nfa |
50 | combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection('combinations') | 50 | combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection('combinations') |
51 | combinationEnumeratedLines = list(preprocessor.preprocess(combinationEnumeratedLines, defs)) | 51 | combinationEnumeratedLines = list(preprocessor.preprocess(combinationEnumeratedLines, defs)) |
52 | for rule in self._doParse(combinationEnumeratedLines, segtypesHelper): | 52 | for rule in self._doParse(combinationEnumeratedLines, segtypesHelper): |
53 | - print rule | 53 | +# print rule |
54 | rule.addToNFA(nfa) | 54 | rule.addToNFA(nfa) |
55 | - nfa.debug() | 55 | +# nfa.debug() |
56 | dfa = nfa.convertToDFA() | 56 | dfa = nfa.convertToDFA() |
57 | - print '********* DFA **************' | ||
58 | - dfa.debug() | 57 | +# print '********* DFA **************' |
58 | +# dfa.debug() | ||
59 | # print dfa.tryToRecognize(bytearray([14])) | 59 | # print dfa.tryToRecognize(bytearray([14])) |
60 | res.addDFA(key2Def, dfa) | 60 | res.addDFA(key2Def, dfa) |
61 | return res | 61 | return res |
@@ -76,8 +76,9 @@ class RulesParser(object): | @@ -76,8 +76,9 @@ class RulesParser(object): | ||
76 | rule = Forward() | 76 | rule = Forward() |
77 | tagRule = Word(alphanums+'_') | 77 | tagRule = Word(alphanums+'_') |
78 | shiftOrthRule = tagRule + '>' | 78 | shiftOrthRule = tagRule + '>' |
79 | + shiftOrthSameTypeRule = tagRule + '!' + '>' | ||
79 | parenRule = Suppress('(') + rule + Suppress(')') | 80 | parenRule = Suppress('(') + rule + Suppress(')') |
80 | - atomicRule = tagRule ^ shiftOrthRule ^ parenRule | 81 | + atomicRule = tagRule ^ shiftOrthRule ^ shiftOrthSameTypeRule ^ parenRule |
81 | zeroOrMoreRule = atomicRule + Suppress('*') | 82 | zeroOrMoreRule = atomicRule + Suppress('*') |
82 | oneOrMoreRule = atomicRule + Suppress('+') | 83 | oneOrMoreRule = atomicRule + Suppress('+') |
83 | unaryRule = atomicRule ^ zeroOrMoreRule ^ oneOrMoreRule | 84 | unaryRule = atomicRule ^ zeroOrMoreRule ^ oneOrMoreRule |
@@ -88,6 +89,7 @@ class RulesParser(object): | @@ -88,6 +89,7 @@ class RulesParser(object): | ||
88 | 89 | ||
89 | tagRule.setParseAction(lambda string, loc, toks: self._createNewTagRule(toks[0], lineNum, line, segtypesHelper)) | 90 | tagRule.setParseAction(lambda string, loc, toks: self._createNewTagRule(toks[0], lineNum, line, segtypesHelper)) |
90 | shiftOrthRule.setParseAction(lambda string, loc, toks: rules.ShiftOrthRule(toks[0])) | 91 | shiftOrthRule.setParseAction(lambda string, loc, toks: rules.ShiftOrthRule(toks[0])) |
92 | + shiftOrthSameTypeRule.setParseAction(lambda string, loc, toks: rules.ShiftOrthSameTypeRule(toks[0])) | ||
91 | # parenRule.setParseAction(lambda string, loc, toks: toks[0]) | 93 | # parenRule.setParseAction(lambda string, loc, toks: toks[0]) |
92 | zeroOrMoreRule.setParseAction(lambda string, loc, toks: rules.ZeroOrMoreRule(toks[0])) | 94 | zeroOrMoreRule.setParseAction(lambda string, loc, toks: rules.ZeroOrMoreRule(toks[0])) |
93 | oneOrMoreRule.setParseAction(lambda string, loc, toks: rules.ConcatRule([toks[0], rules.ZeroOrMoreRule(toks[0])])) | 95 | oneOrMoreRule.setParseAction(lambda string, loc, toks: rules.ConcatRule([toks[0], rules.ZeroOrMoreRule(toks[0])])) |
fsabuilder/morfeuszbuilder/tagset/segtypes.py
@@ -26,6 +26,8 @@ class Segtypes(object): | @@ -26,6 +26,8 @@ class Segtypes(object): | ||
26 | self._readTags(segrulesConfigFile) | 26 | self._readTags(segrulesConfigFile) |
27 | self._indexSegnums() | 27 | self._indexSegnums() |
28 | 28 | ||
29 | + print self.segnum2Segtype | ||
30 | + | ||
29 | def _validate(self, msg, lineNum, cond): | 31 | def _validate(self, msg, lineNum, cond): |
30 | if not cond: | 32 | if not cond: |
31 | raise exceptions.ConfigFileException(self.filename, lineNum, msg) | 33 | raise exceptions.ConfigFileException(self.filename, lineNum, msg) |
@@ -77,7 +79,7 @@ class Segtypes(object): | @@ -77,7 +79,7 @@ class Segtypes(object): | ||
77 | 79 | ||
78 | lemma, pos = pattern.split(':') | 80 | lemma, pos = pattern.split(':') |
79 | 81 | ||
80 | - self.patternsList.append(SegtypePattern(lemma, pos + ':%', segnum)) | 82 | + self.patternsList.append(SegtypePattern(lemma, '%s|%s:%%' % (pos, pos), segnum)) |
81 | 83 | ||
82 | def _debugSegnums(self): | 84 | def _debugSegnums(self): |
83 | for tagnum, segnum in self._tagnum2Segnum.items(): | 85 | for tagnum, segnum in self._tagnum2Segnum.items(): |
input/segmenty.dat
@@ -112,7 +112,7 @@ moze_interp(z_on_agl) | @@ -112,7 +112,7 @@ moze_interp(z_on_agl) | ||
112 | moze_interp(z_on_agl on_agl) | 112 | moze_interp(z_on_agl on_agl) |
113 | 113 | ||
114 | # Liczba zapisana jako ciąg cyfr: | 114 | # Liczba zapisana jako ciąg cyfr: |
115 | -moze_interp( dig>* dig ) | 115 | +moze_interp( dig!>+ ) |
116 | 116 | ||
117 | # Formacje prefiksalne | 117 | # Formacje prefiksalne |
118 | #### trzeba wydzielić odpowiednie samodze! | 118 | #### trzeba wydzielić odpowiednie samodze! |
input/segmenty1.dat
@@ -7,7 +7,10 @@ praet=split composite | @@ -7,7 +7,10 @@ praet=split composite | ||
7 | 7 | ||
8 | #define moze_interp(segmenty) wsz_interp segmenty wsz_interp | 8 | #define moze_interp(segmenty) wsz_interp segmenty wsz_interp |
9 | 9 | ||
10 | -dig>* dig | 10 | +(adja dywiz)+ adj |
11 | +dig!>+ | ||
12 | +dig!> dig!> dig!> | ||
13 | +naj> adj_sup | ||
11 | 14 | ||
12 | [segment types] | 15 | [segment types] |
13 | naj | 16 | naj |
morfeusz/FlexionGraph.cpp
@@ -35,18 +35,21 @@ void FlexionGraph::addPath(const std::vector<InterpretedChunk>& path) { | @@ -35,18 +35,21 @@ void FlexionGraph::addPath(const std::vector<InterpretedChunk>& path) { | ||
35 | // debugGraph(this->graph); | 35 | // debugGraph(this->graph); |
36 | for (unsigned int i = 0; i < path.size(); i++) { | 36 | for (unsigned int i = 0; i < path.size(); i++) { |
37 | const InterpretedChunk& chunk = path[i]; | 37 | const InterpretedChunk& chunk = path[i]; |
38 | - if (!chunk.shiftOrth) { | 38 | + if (!chunk.orthWasShifted) { |
39 | if (&chunk == &(path.front()) | 39 | if (&chunk == &(path.front()) |
40 | && &chunk == &(path.back())) { | 40 | && &chunk == &(path.back())) { |
41 | Edge e = {chunk, UINT_MAX}; | 41 | Edge e = {chunk, UINT_MAX}; |
42 | this->addStartEdge(e); | 42 | this->addStartEdge(e); |
43 | - } else if (&chunk == &(path.front())) { | 43 | + } |
44 | + else if (&chunk == &(path.front())) { | ||
44 | Edge e = {chunk, this->graph.empty() ? 1 : (unsigned int) this->graph.size()}; | 45 | Edge e = {chunk, this->graph.empty() ? 1 : (unsigned int) this->graph.size()}; |
45 | this->addStartEdge(e); | 46 | this->addStartEdge(e); |
46 | - } else if (&chunk == &(path.back())) { | 47 | + } |
48 | + else if (&chunk == &(path.back())) { | ||
47 | Edge e = {chunk, UINT_MAX}; | 49 | Edge e = {chunk, UINT_MAX}; |
48 | this->addMiddleEdge((unsigned int) this->graph.size(), e); | 50 | this->addMiddleEdge((unsigned int) this->graph.size(), e); |
49 | - } else { | 51 | + } |
52 | + else { | ||
50 | Edge e = {chunk, (int) this->graph.size() + 1}; | 53 | Edge e = {chunk, (int) this->graph.size() + 1}; |
51 | this->addMiddleEdge((unsigned int) this->graph.size(), e); | 54 | this->addMiddleEdge((unsigned int) this->graph.size(), e); |
52 | } | 55 | } |
morfeusz/InterpretedChunk.hpp
@@ -17,6 +17,9 @@ struct InterpretedChunk { | @@ -17,6 +17,9 @@ struct InterpretedChunk { | ||
17 | std::vector<uint32_t> lowercaseCodepoints; | 17 | std::vector<uint32_t> lowercaseCodepoints; |
18 | InterpsGroup interpsGroup; | 18 | InterpsGroup interpsGroup; |
19 | bool shiftOrth; | 19 | bool shiftOrth; |
20 | + bool shiftOrthSameType; | ||
21 | + bool orthWasShifted; | ||
22 | + std::vector<InterpretedChunk> prefixChunks; | ||
20 | }; | 23 | }; |
21 | 24 | ||
22 | #endif /* INTERPRETEDCHUNK_HPP */ | 25 | #endif /* INTERPRETEDCHUNK_HPP */ |
morfeusz/InterpretedChunksDecoder.hpp
@@ -29,10 +29,19 @@ public: | @@ -29,10 +29,19 @@ public: | ||
29 | unsigned int endNode, | 29 | unsigned int endNode, |
30 | const InterpretedChunk& interpretedChunk, | 30 | const InterpretedChunk& interpretedChunk, |
31 | OutputIterator out) { | 31 | OutputIterator out) { |
32 | - string orth = env.getCharsetConverter().toString(interpretedChunk.originalCodepoints); | 32 | + string orth; |
33 | + string lemmaPrefix; | ||
34 | + for (unsigned int i = 0; i < interpretedChunk.prefixChunks.size(); i++) { | ||
35 | + const InterpretedChunk& prefixChunk = interpretedChunk.prefixChunks[i]; | ||
36 | + orth += env.getCharsetConverter().toString(prefixChunk.originalCodepoints); | ||
37 | + lemmaPrefix += convertLemma( | ||
38 | + prefixChunk.lowercaseCodepoints, | ||
39 | + prefixChunk.interpsGroup.interps[0].lemma); | ||
40 | + } | ||
41 | + orth += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints); | ||
33 | for (unsigned int i = 0; i < interpretedChunk.interpsGroup.interps.size(); i++) { | 42 | for (unsigned int i = 0; i < interpretedChunk.interpsGroup.interps.size(); i++) { |
34 | const EncodedInterpretation& ei = interpretedChunk.interpsGroup.interps[i]; | 43 | const EncodedInterpretation& ei = interpretedChunk.interpsGroup.interps[i]; |
35 | - string lemma = convertLemma( | 44 | + string lemma = lemmaPrefix + convertLemma( |
36 | interpretedChunk.lowercaseCodepoints, | 45 | interpretedChunk.lowercaseCodepoints, |
37 | ei.lemma); | 46 | ei.lemma); |
38 | *out = MorphInterpretation( | 47 | *out = MorphInterpretation( |
morfeusz/Morfeusz.cpp
@@ -124,6 +124,15 @@ void Morfeusz::analyzeOneWord( | @@ -124,6 +124,15 @@ void Morfeusz::analyzeOneWord( | ||
124 | inputStart = currInput; | 124 | inputStart = currInput; |
125 | } | 125 | } |
126 | 126 | ||
127 | +static inline void doShiftOrth(InterpretedChunk& from, InterpretedChunk& to) { | ||
128 | + to.prefixChunks.insert( | ||
129 | + to.prefixChunks.begin(), | ||
130 | + from.prefixChunks.begin(), | ||
131 | + from.prefixChunks.end()); | ||
132 | + to.prefixChunks.push_back(from); | ||
133 | + from.orthWasShifted = true; | ||
134 | +} | ||
135 | + | ||
127 | void Morfeusz::doAnalyzeOneWord( | 136 | void Morfeusz::doAnalyzeOneWord( |
128 | const char*& inputData, | 137 | const char*& inputData, |
129 | const char* inputEnd, | 138 | const char* inputEnd, |
@@ -150,18 +159,17 @@ void Morfeusz::doAnalyzeOneWord( | @@ -150,18 +159,17 @@ void Morfeusz::doAnalyzeOneWord( | ||
150 | vector<InterpsGroup> val(state.getValue()); | 159 | vector<InterpsGroup> val(state.getValue()); |
151 | for (unsigned int i = 0; i < val.size(); i++) { | 160 | for (unsigned int i = 0; i < val.size(); i++) { |
152 | InterpsGroup& ig = val[i]; | 161 | InterpsGroup& ig = val[i]; |
153 | - | 162 | + cerr << (int) ig.type << endl; |
154 | SegrulesStateType newSegrulesState = segrulesState; | 163 | SegrulesStateType newSegrulesState = segrulesState; |
155 | newSegrulesState.proceedToNext(ig.type); | 164 | newSegrulesState.proceedToNext(ig.type); |
156 | if (!newSegrulesState.isSink()) { | 165 | if (!newSegrulesState.isSink()) { |
157 | - bool shiftOrth = newSegrulesState.getLastTransitionValue(); | ||
158 | - InterpretedChunk ic = {inputData, originalCodepoints, lowercaseCodepoints, ig, shiftOrth}; | ||
159 | - if (!accum.empty() && accum.back().shiftOrth) { | ||
160 | - ic.originalCodepoints.insert( | ||
161 | - ic.originalCodepoints.begin(), | ||
162 | - accum.back().originalCodepoints.begin(), | ||
163 | - accum.back().originalCodepoints.end()); | ||
164 | - ic.chunkStartPtr = accum.back().chunkStartPtr; | 166 | + bool shiftOrth = newSegrulesState.getLastTransitionValue() == 1; |
167 | + bool shiftOrthSameType = newSegrulesState.getLastTransitionValue() == 2; | ||
168 | + InterpretedChunk ic = {inputData, originalCodepoints, lowercaseCodepoints, ig, shiftOrth, shiftOrthSameType, false}; | ||
169 | + if (!accum.empty() | ||
170 | + && (accum.back().shiftOrth | ||
171 | + || (accum.back().shiftOrthSameType && accum.back().interpsGroup.type == ig.type))) { | ||
172 | + doShiftOrth(accum.back(), ic); | ||
165 | } | 173 | } |
166 | accum.push_back(ic); | 174 | accum.push_back(ic); |
167 | const char* newCurrInput = currInput; | 175 | const char* newCurrInput = currInput; |
@@ -179,17 +187,17 @@ void Morfeusz::doAnalyzeOneWord( | @@ -179,17 +187,17 @@ void Morfeusz::doAnalyzeOneWord( | ||
179 | vector<InterpsGroup > val(state.getValue()); | 187 | vector<InterpsGroup > val(state.getValue()); |
180 | for (unsigned int i = 0; i < val.size(); i++) { | 188 | for (unsigned int i = 0; i < val.size(); i++) { |
181 | InterpsGroup& ig = val[i]; | 189 | InterpsGroup& ig = val[i]; |
190 | + cerr << (int) ig.type << endl; | ||
182 | SegrulesStateType newSegrulesState = segrulesState; | 191 | SegrulesStateType newSegrulesState = segrulesState; |
183 | newSegrulesState.proceedToNext(ig.type); | 192 | newSegrulesState.proceedToNext(ig.type); |
184 | if (newSegrulesState.isAccepting()) { | 193 | if (newSegrulesState.isAccepting()) { |
185 | - bool shiftOrth = newSegrulesState.getLastTransitionValue(); | ||
186 | - InterpretedChunk ic = {inputData, originalCodepoints, lowercaseCodepoints, ig, shiftOrth}; | ||
187 | - if (!accum.empty() && accum.back().shiftOrth) { | ||
188 | - ic.originalCodepoints.insert( | ||
189 | - ic.originalCodepoints.begin(), | ||
190 | - accum.back().originalCodepoints.begin(), | ||
191 | - accum.back().originalCodepoints.end()); | ||
192 | - ic.chunkStartPtr = accum.back().chunkStartPtr; | 194 | + bool shiftOrth = newSegrulesState.getLastTransitionValue() == 1; |
195 | + bool shiftOrthSameType = newSegrulesState.getLastTransitionValue() == 2; | ||
196 | + InterpretedChunk ic = {inputData, originalCodepoints, lowercaseCodepoints, ig, shiftOrth, shiftOrthSameType, false}; | ||
197 | + if (!accum.empty() | ||
198 | + && (accum.back().shiftOrth | ||
199 | + || (accum.back().shiftOrthSameType && accum.back().interpsGroup.type == ig.type))) { | ||
200 | + doShiftOrth(accum.back(), ic); | ||
193 | } | 201 | } |
194 | accum.push_back(ic); | 202 | accum.push_back(ic); |
195 | graph.addPath(accum); | 203 | graph.addPath(accum); |
morfeusz/java/dupa
0 → 100644
1 | + | ||
2 | +# SWIG | ||
3 | +#set(CMAKE_SWIG_OUTDIR swig) | ||
4 | +#FIND_PACKAGE(SWIG REQUIRED) | ||
5 | +FIND_PACKAGE(JNI REQUIRED) | ||
6 | +#INCLUDE(${SWIG_USE_FILE}) | ||
7 | +include(UseJava) | ||
8 | + | ||
9 | +# SWIG Java | ||
10 | +include_directories (${JAVA_INCLUDE_PATH}) | ||
11 | +include_directories (..) | ||
12 | + | ||
13 | +set (SWIG_JAVA_OUTFILE swigJAVA.cpp) | ||
14 | +# set (JAVA_WRAPPER_FILE ${CMAKE_SHARED_LIBRARY_PREFIX}morfeusz${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
15 | +add_custom_command ( | ||
16 | + OUTPUT ${SWIG_JAVA_OUTFILE} | ||
17 | + COMMAND swig -java -c++ -package pl.waw.ipipan.morfeusz -o ${SWIG_JAVA_OUTFILE} -outdir ${CMAKE_SOURCE_DIR}/jmorfeusz/src/main/java/pl/waw/ipipan/morfeusz ${CMAKE_SOURCE_DIR}/morfeusz/morfeusz.i | ||
18 | + DEPENDS libmorfeusz | ||
19 | +) | ||
20 | +#set (CMAKE_SHARED_LINKER_FLAGS "-s -Os -static-libstdc++ -static-libgcc") | ||
21 | +add_library (jmorfeusz SHARED ${SWIG_JAVA_OUTFILE}) | ||
22 | +target_link_libraries (jmorfeusz ${JAVA_LIBRARIES} libmorfeusz) | ||
23 | +add_dependencies (jmorfeusz ${SWIG_JAVA_OUTFILE}) | ||
24 | + | ||
25 | +#set (CMAKE_SWIG_FLAGS -package pl.waw.ipipan.morfeusz) | ||
26 | +#set (CMAKE_SWIG_OUTDIR ${CMAKE_SOURCE_DIR}/jmorfeusz/src/main/java/pl/waw/ipipan/morfeusz) | ||
27 | + | ||
28 | +#set_source_files_properties (../morfeusz.i PROPERTIES CPLUSPLUS ON) | ||
29 | +#SWIG_ADD_MODULE(jmorfeusz java ../morfeusz.i) | ||
30 | +#SWIG_LINK_LIBRARIES(jmorfeusz ${JAVA_LIBRARIES}) | ||
31 | +#SWIG_LINK_LIBRARIES(jmorfeusz libmorfeusz) | ||
32 | + | ||
33 | +#if (${CMAKE_SYSTEM_NAME} MATCHES "Windows") | ||
34 | +# set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | ||
35 | +# set (CMAKE_SHARED_LINKER_FLAGS "-s -Os -static-libstdc++ -static-libgcc") | ||
36 | +#endif () |
nbproject/configurations.xml
@@ -10,6 +10,12 @@ | @@ -10,6 +10,12 @@ | ||
10 | </logicalFolder> | 10 | </logicalFolder> |
11 | <logicalFolder name="f1" displayName="input" projectFiles="true"> | 11 | <logicalFolder name="f1" displayName="input" projectFiles="true"> |
12 | </logicalFolder> | 12 | </logicalFolder> |
13 | + <logicalFolder name="java" | ||
14 | + displayName="java" | ||
15 | + projectFiles="true" | ||
16 | + root="build1/morfeusz/java"> | ||
17 | + <itemPath>build1/morfeusz/java/swigJAVA.cpp</itemPath> | ||
18 | + </logicalFolder> | ||
13 | <df root="morfeusz" name="0"> | 19 | <df root="morfeusz" name="0"> |
14 | <df name="charset"> | 20 | <df name="charset"> |
15 | <in>CaseConverter.cpp</in> | 21 | <in>CaseConverter.cpp</in> |
@@ -60,6 +66,10 @@ | @@ -60,6 +66,10 @@ | ||
60 | <itemPath>build/morfeusz/morfeuszJAVA_wrap.cxx</itemPath> | 66 | <itemPath>build/morfeusz/morfeuszJAVA_wrap.cxx</itemPath> |
61 | <itemPath>build/morfeusz/morfeuszPYTHON_wrap.cxx</itemPath> | 67 | <itemPath>build/morfeusz/morfeuszPYTHON_wrap.cxx</itemPath> |
62 | </logicalFolder> | 68 | </logicalFolder> |
69 | + <logicalFolder name="xxx" displayName="xxx" projectFiles="true" root=".."> | ||
70 | + <itemPath>../default_fsa.cpp</itemPath> | ||
71 | + <itemPath>../default_synth_fsa.cpp</itemPath> | ||
72 | + </logicalFolder> | ||
63 | <logicalFolder name="ExternalFiles" | 73 | <logicalFolder name="ExternalFiles" |
64 | displayName="Important Files" | 74 | displayName="Important Files" |
65 | projectFiles="false" | 75 | projectFiles="false" |
@@ -95,6 +105,10 @@ | @@ -95,6 +105,10 @@ | ||
95 | <executablePath>build/morfeusz/morfeusz_analyzer</executablePath> | 105 | <executablePath>build/morfeusz/morfeusz_analyzer</executablePath> |
96 | </makeTool> | 106 | </makeTool> |
97 | </makefileType> | 107 | </makefileType> |
108 | + <item path="../default_fsa.cpp" ex="false" tool="1" flavor2="4"> | ||
109 | + </item> | ||
110 | + <item path="../default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> | ||
111 | + </item> | ||
98 | <item path="build/default_fsa.cpp" ex="false" tool="1" flavor2="4"> | 112 | <item path="build/default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
99 | </item> | 113 | </item> |
100 | <item path="build/default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> | 114 | <item path="build/default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> |
@@ -107,6 +121,7 @@ | @@ -107,6 +121,7 @@ | ||
107 | flavor2="8"> | 121 | flavor2="8"> |
108 | <ccTool> | 122 | <ccTool> |
109 | <incDir> | 123 | <incDir> |
124 | + <pElem>build</pElem> | ||
110 | <pElem>/usr/lib/jvm/default-java/include</pElem> | 125 | <pElem>/usr/lib/jvm/default-java/include</pElem> |
111 | <pElem>morfeusz</pElem> | 126 | <pElem>morfeusz</pElem> |
112 | <pElem>build/morfeusz/java</pElem> | 127 | <pElem>build/morfeusz/java</pElem> |
@@ -130,6 +145,7 @@ | @@ -130,6 +145,7 @@ | ||
130 | flavor2="8"> | 145 | flavor2="8"> |
131 | <ccTool> | 146 | <ccTool> |
132 | <incDir> | 147 | <incDir> |
148 | + <pElem>build</pElem> | ||
133 | <pElem>/usr/include/python2.7</pElem> | 149 | <pElem>/usr/include/python2.7</pElem> |
134 | <pElem>morfeusz</pElem> | 150 | <pElem>morfeusz</pElem> |
135 | <pElem>build/morfeusz/python</pElem> | 151 | <pElem>build/morfeusz/python</pElem> |
@@ -152,11 +168,14 @@ | @@ -152,11 +168,14 @@ | ||
152 | tool="1" | 168 | tool="1" |
153 | flavor2="4"> | 169 | flavor2="4"> |
154 | </item> | 170 | </item> |
171 | + <item path="build1/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4"> | ||
172 | + </item> | ||
155 | <item path="default_fsa.cpp" ex="false" tool="1" flavor2="4"> | 173 | <item path="default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
156 | <ccTool flags="1"> | 174 | <ccTool flags="1"> |
157 | <incDir> | 175 | <incDir> |
176 | + <pElem>build1</pElem> | ||
158 | <pElem>morfeusz</pElem> | 177 | <pElem>morfeusz</pElem> |
159 | - <pElem>build/morfeusz</pElem> | 178 | + <pElem>build1/morfeusz</pElem> |
160 | </incDir> | 179 | </incDir> |
161 | <preprocessorList> | 180 | <preprocessorList> |
162 | <Elem>libmorfeusz_EXPORTS</Elem> | 181 | <Elem>libmorfeusz_EXPORTS</Elem> |
@@ -166,8 +185,9 @@ | @@ -166,8 +185,9 @@ | ||
166 | <item path="default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> | 185 | <item path="default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> |
167 | <ccTool flags="1"> | 186 | <ccTool flags="1"> |
168 | <incDir> | 187 | <incDir> |
188 | + <pElem>build1</pElem> | ||
169 | <pElem>morfeusz</pElem> | 189 | <pElem>morfeusz</pElem> |
170 | - <pElem>build/morfeusz</pElem> | 190 | + <pElem>build1/morfeusz</pElem> |
171 | </incDir> | 191 | </incDir> |
172 | <preprocessorList> | 192 | <preprocessorList> |
173 | <Elem>libmorfeusz_EXPORTS</Elem> | 193 | <Elem>libmorfeusz_EXPORTS</Elem> |
@@ -234,18 +254,24 @@ | @@ -234,18 +254,24 @@ | ||
234 | </undefinedList> | 254 | </undefinedList> |
235 | </ccTool> | 255 | </ccTool> |
236 | </folder> | 256 | </folder> |
237 | - <folder path="morfeusz"> | 257 | + <folder path="java"> |
238 | <ccTool> | 258 | <ccTool> |
239 | <incDir> | 259 | <incDir> |
240 | - <pElem>build</pElem> | 260 | + <pElem>build1</pElem> |
261 | + <pElem>morfeusz</pElem> | ||
262 | + <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem> | ||
241 | </incDir> | 263 | </incDir> |
264 | + <preprocessorList> | ||
265 | + <Elem>libjmorfeusz_EXPORTS</Elem> | ||
266 | + </preprocessorList> | ||
242 | </ccTool> | 267 | </ccTool> |
243 | </folder> | 268 | </folder> |
244 | <folder path="morfeusz/java"> | 269 | <folder path="morfeusz/java"> |
245 | <ccTool> | 270 | <ccTool> |
246 | <incDir> | 271 | <incDir> |
272 | + <pElem>build</pElem> | ||
247 | <pElem>morfeusz</pElem> | 273 | <pElem>morfeusz</pElem> |
248 | - <pElem>/usr/lib/jvm/default-java/include</pElem> | 274 | + <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem> |
249 | </incDir> | 275 | </incDir> |
250 | <preprocessorList> | 276 | <preprocessorList> |
251 | <Elem>libjmorfeusz_EXPORTS</Elem> | 277 | <Elem>libjmorfeusz_EXPORTS</Elem> |
@@ -255,6 +281,7 @@ | @@ -255,6 +281,7 @@ | ||
255 | <folder path="morfeusz/python"> | 281 | <folder path="morfeusz/python"> |
256 | <ccTool> | 282 | <ccTool> |
257 | <incDir> | 283 | <incDir> |
284 | + <pElem>build</pElem> | ||
258 | <pElem>/usr/include/python2.7</pElem> | 285 | <pElem>/usr/include/python2.7</pElem> |
259 | <pElem>morfeusz</pElem> | 286 | <pElem>morfeusz</pElem> |
260 | </incDir> | 287 | </incDir> |
@@ -271,6 +298,18 @@ | @@ -271,6 +298,18 @@ | ||
271 | </undefinedList> | 298 | </undefinedList> |
272 | </ccTool> | 299 | </ccTool> |
273 | </folder> | 300 | </folder> |
301 | + <folder path="xxx"> | ||
302 | + <ccTool> | ||
303 | + <incDir> | ||
304 | + <pElem>build</pElem> | ||
305 | + <pElem>morfeusz</pElem> | ||
306 | + <pElem>build/morfeusz</pElem> | ||
307 | + </incDir> | ||
308 | + <preprocessorList> | ||
309 | + <Elem>libmorfeusz_EXPORTS</Elem> | ||
310 | + </preprocessorList> | ||
311 | + </ccTool> | ||
312 | + </folder> | ||
274 | <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4"> | 313 | <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4"> |
275 | <ccTool flags="1"> | 314 | <ccTool flags="1"> |
276 | <incDir> | 315 | <incDir> |
@@ -368,26 +407,18 @@ | @@ -368,26 +407,18 @@ | ||
368 | </ccTool> | 407 | </ccTool> |
369 | </item> | 408 | </item> |
370 | <item path="morfeusz/charset/CaseConverter.cpp" ex="false" tool="1" flavor2="4"> | 409 | <item path="morfeusz/charset/CaseConverter.cpp" ex="false" tool="1" flavor2="4"> |
371 | - <ccTool flags="1"> | ||
372 | - </ccTool> | ||
373 | </item> | 410 | </item> |
374 | <item path="morfeusz/charset/CharsetConverter.cpp" | 411 | <item path="morfeusz/charset/CharsetConverter.cpp" |
375 | ex="false" | 412 | ex="false" |
376 | tool="1" | 413 | tool="1" |
377 | flavor2="4"> | 414 | flavor2="4"> |
378 | - <ccTool flags="1"> | ||
379 | - </ccTool> | ||
380 | </item> | 415 | </item> |
381 | <item path="morfeusz/charset/caseconv.cpp" ex="false" tool="1" flavor2="4"> | 416 | <item path="morfeusz/charset/caseconv.cpp" ex="false" tool="1" flavor2="4"> |
382 | - <ccTool flags="1"> | ||
383 | - </ccTool> | ||
384 | </item> | 417 | </item> |
385 | <item path="morfeusz/charset/conversion_tables.cpp" | 418 | <item path="morfeusz/charset/conversion_tables.cpp" |
386 | ex="false" | 419 | ex="false" |
387 | tool="1" | 420 | tool="1" |
388 | flavor2="4"> | 421 | flavor2="4"> |
389 | - <ccTool flags="1"> | ||
390 | - </ccTool> | ||
391 | </item> | 422 | </item> |
392 | <item path="morfeusz/const.cpp" ex="false" tool="1" flavor2="4"> | 423 | <item path="morfeusz/const.cpp" ex="false" tool="1" flavor2="4"> |
393 | <ccTool flags="1"> | 424 | <ccTool flags="1"> |
@@ -476,12 +507,8 @@ | @@ -476,12 +507,8 @@ | ||
476 | ex="false" | 507 | ex="false" |
477 | tool="1" | 508 | tool="1" |
478 | flavor2="4"> | 509 | flavor2="4"> |
479 | - <ccTool flags="1"> | ||
480 | - </ccTool> | ||
481 | </item> | 510 | </item> |
482 | <item path="morfeusz/segrules/segrules.cpp" ex="false" tool="1" flavor2="4"> | 511 | <item path="morfeusz/segrules/segrules.cpp" ex="false" tool="1" flavor2="4"> |
483 | - <ccTool flags="1"> | ||
484 | - </ccTool> | ||
485 | </item> | 512 | </item> |
486 | <item path="morfeusz/test_recognize_dict.cpp" ex="false" tool="1" flavor2="4"> | 513 | <item path="morfeusz/test_recognize_dict.cpp" ex="false" tool="1" flavor2="4"> |
487 | <ccTool flags="0"> | 514 | <ccTool flags="0"> |