Commit df3ada33fcf61cba25a0112bf0c9940779eb6458

Authored by Michał Lenart
1 parent 22f68665

ogarnięcie (w końcu!) kwestii homonimów w całości

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@128 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
CMakeLists.txt
... ... @@ -36,7 +36,7 @@ if ("${INPUT_DICTIONARIES}" STREQUAL "")
36 36 if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE")
37 37 set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt)
38 38 else ()
39   - set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/PoliMorfSmall.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab")
  39 + set (INPUT_DICTIONARIES "${PROJECT_SOURCE_DIR}/input/sgjp-hom.tab,${PROJECT_SOURCE_DIR}/input/dodatki.tab")
40 40 endif ()
41 41 endif ()
42 42  
... ... @@ -72,7 +72,9 @@ elseif (${CMAKE_SYSTEM_NAME} MATCHES "Windows")
72 72 set (CMAKE_SHARED_LINKER_FLAGS "-s -Os -static-libstdc++ -static-libgcc -Wl,--exclude-libs,libgcc_eh.a")
73 73 set (CMAKE_EXE_LINKER_FLAGS "-s -Os -static-libstdc++ -static-libgcc")
74 74 elseif (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
75   - set (MACOSX_RPATH TRUE)
  75 + #~ set (CMAKE_INSTALL_NAME_DIR @executable_path)
  76 + #~ set (CMAKE_BUILD_WITH_INSTALL_RPATH ON)
  77 + #~ set (MACOSX_RPATH TRUE)
76 78 endif ()
77 79  
78 80 ########## Setup RPATH ##########
... ...
fsabuilder/morfeuszbuilder/tagset/segtypes.py
... ... @@ -179,7 +179,7 @@ class Segtypes(object):
179 179 class SegtypePattern(object):
180 180  
181 181 def __init__(self, lemma, pattern, segnum):
182   - self.lemma = lemma.split(':')[0] if len(lemma) > 1 else lemma
  182 + self.lemma = lemma.split(':')[0] if lemma and len(lemma) > 1 else lemma
183 183 self.pattern = pattern
184 184 self.segnum = segnum
185 185  
... ... @@ -190,7 +190,7 @@ class SegtypePattern(object):
190 190 patterns2Match.append(self.pattern.replace('%', '.*'))
191 191 patterns2Match.append(re.sub(r'\:\%$', '', self.pattern).replace('%', '.*'))
192 192 if lemma:
193   - lemma = lemma.split(':')[0] if len(lemma) > 1 else lemma
  193 + lemma = lemma.split(':')[0] if lemma and len(lemma) > 1 else lemma
194 194 if (self.lemma is None or self.lemma == lemma) \
195 195 and any([re.match(p, tag) for p in patterns2Match]):
196 196 return self.segnum
... ...
morfeusz/CMakeLists.txt
... ... @@ -26,8 +26,6 @@ set(SRC_FILES
26 26 ${INPUT_SYNTH_DICTIONARY_CPP}
27 27 Environment.cpp
28 28 MorphDeserializer.cpp
29   - GeneratorDeserializer.cpp
30   - Generator.cpp
31 29 Tagset.cpp
32 30 fsa/const.cpp
33 31 MorphInterpretation.cpp
... ... @@ -44,8 +42,6 @@ set(INCLUDE_FILES
44 42 const.hpp
45 43 data/default_fsa.hpp
46 44 MorphDeserializer.hpp
47   - GeneratorDeserializer.hpp
48   - Generator.hpp
49 45 Tagset.hpp
50 46 fsa/const.hpp
51 47 MorphInterpretation.hpp
... ...
morfeusz/Generator.cpp deleted
1   -/*
2   - * File: Generator.cpp
3   - * Author: mlenart
4   - *
5   - * Created on 21 styczeń 2014, 14:38
6   - */
7   -
8   -#include <string>
9   -#include <iostream>
10   -#include "charset/charset_utils.hpp"
11   -#include "MorphInterpretation.hpp"
12   -#include "Generator.hpp"
13   -#include "Environment.hpp"
14   -
15   -
16   -using namespace std;
17   -
18   -Generator::Generator(
19   - const unsigned char* ptr,
20   - const Environment& env)
21   -: deserializer(env),
22   -fsa(SynthFSAType::getFSA(ptr, deserializer)),
23   -env(env),
24   -generatorPtr(ptr) {
25   -}
26   -
27   -Generator::~Generator() {
28   -}
29   -
30   -void Generator::setGeneratorPtr(const unsigned char* ptr) {
31   - delete this->fsa;
32   - this->generatorPtr = ptr;
33   - this->fsa = SynthFSAType::getFSA(ptr, deserializer);
34   -}
35   -
36   -void Generator::appendString(const string& str, string& res) const {
37   - const char* suffixPtr = str.c_str();
38   - const char* suffixEnd = suffixPtr + str.length();
39   - while (suffixPtr != suffixEnd) {
40   - uint32_t cp = UTF8CharsetConverter().next(suffixPtr, suffixEnd);
41   - env.getCharsetConverter().append(cp, res);
42   - }
43   -}
44   -
45   -std::string Generator::decodeOrth(
46   - const EncodedOrth& orth,
47   - const std::vector<uint32_t>& lemma) const {
48   - string res;
49   - this->appendString(orth.prefixToAdd, res);
50   - for (unsigned int i = 0; i < lemma.size() - orth.suffixToCut; i++) {
51   - uint32_t cp = lemma[i];
52   - env.getCharsetConverter().append(cp, res);
53   - }
54   - this->appendString(orth.suffixToAdd, res);
55   -// const char* suffixPtr = orth.suffixToAdd.c_str();
56   -// const char* suffixEnd = suffixPtr + orth.suffixToAdd.length();
57   -// while (suffixPtr != suffixEnd) {
58   -// uint32_t cp = UTF8CharsetConverter().next(suffixPtr, suffixEnd);
59   -// env.getCharsetConverter().append(cp, res);
60   -// }
61   - return res;
62   -}
63   -
64   -void Generator::decodeRes(
65   - const std::vector<EncodedGeneratorInterpretation>& encodedRes,
66   - const std::string& lemma,
67   - const std::vector<uint32_t>& lemmaCodepoints,
68   - std::vector<MorphInterpretation>& result) const {
69   -
70   - for (unsigned int i = 0; i < encodedRes.size(); i++) {
71   - EncodedGeneratorInterpretation egi = encodedRes[i];
72   - string decodedOrth = this->decodeOrth(egi.orth, lemmaCodepoints);
73   - MorphInterpretation mi(
74   - 0, 0,
75   - decodedOrth, lemma,
76   - egi.tag,
77   - egi.nameClassifier,
78   - env.getTagset(),
79   - env.getCharsetConverter());
80   - result.push_back(mi);
81   - }
82   -}
83   -
84   -void Generator::generate(const string& lemma, vector<MorphInterpretation>& result) const {
85   - const char* currInput = lemma.c_str();
86   - const char* inputEnd = currInput + lemma.length();
87   - vector<uint32_t> codepoints;
88   - SynthStateType state = this->fsa->getInitialState();
89   - while (currInput != inputEnd && !state.isSink()) {
90   - uint32_t codepoint = this->env.getCharsetConverter().next(currInput, inputEnd);
91   - feedState(state, codepoint, this->env.getCharsetConverter());
92   - codepoints.push_back(codepoint);
93   - }
94   - if (state.isAccepting()) {
95   - vector<EncodedGeneratorInterpretation> encodedRes = state.getValue();
96   - decodeRes(encodedRes, lemma, codepoints, result);
97   - }
98   -}
morfeusz/Generator.hpp deleted
1   -/*
2   - * File: Generator.hpp
3   - * Author: mlenart
4   - *
5   - * Created on 21 styczeń 2014, 14:38
6   - */
7   -
8   -#ifndef GENERATOR_HPP
9   -#define GENERATOR_HPP
10   -
11   -#include <string>
12   -#include <vector>
13   -#include "charset/CharsetConverter.hpp"
14   -#include "MorphInterpretation.hpp"
15   -#include "Tagset.hpp"
16   -#include "GeneratorDeserializer.hpp"
17   -
18   -typedef FSA< std::vector<EncodedGeneratorInterpretation > > SynthFSAType;
19   -typedef State< std::vector<EncodedGeneratorInterpretation > > SynthStateType;
20   -
21   -class Generator {
22   -public:
23   - Generator(
24   - const unsigned char* ptr,
25   - const Environment& env);
26   - void generate(const std::string& lemma, std::vector<MorphInterpretation>& result) const;
27   - void setGeneratorPtr(const unsigned char* ptr);
28   - virtual ~Generator();
29   -private:
30   -// Generator(const SynthDeserializer& deserializer);
31   - GeneratorDeserializer deserializer;
32   - const SynthFSAType* fsa;
33   - const Environment& env;
34   - const unsigned char* generatorPtr;
35   -
36   - std::string decodeOrth(
37   - const EncodedOrth& orth,
38   - const std::vector<uint32_t>& lemmaCodepoints) const;
39   -
40   - void decodeRes(
41   - const std::vector<EncodedGeneratorInterpretation>& encodedRes,
42   - const std::string& lemma,
43   - const std::vector<uint32_t>& lemmaCodepoints,
44   - std::vector<MorphInterpretation>& result) const;
45   -
46   - void appendString(const string& str, string& res) const;
47   -};
48   -
49   -#endif /* GENERATOR_HPP */
50   -
morfeusz/GeneratorDeserializer.cpp deleted
1   -/*
2   - * File: GeneratorDeserializer.cpp
3   - * Author: mlenart
4   - *
5   - * Created on 20 styczeń 2014, 17:14
6   - */
7   -
8   -#include "GeneratorDeserializer.hpp"
9   -#include "EncodedGeneratorInterpretation.hpp"
10   -
11   -using namespace std;
12   -
13   -GeneratorDeserializer::GeneratorDeserializer(const Environment& env)
14   -: env(env) {
15   -
16   -}
17   -
18   -void GeneratorDeserializer::deserializeOrth(const unsigned char*& ptr, EncodedOrth& orth) const {
19   - orth.prefixToAdd = (const char*) ptr;
20   - ptr += strlen((const char*) ptr) + 1;
21   -
22   - orth.suffixToCut = *ptr;
23   - ptr++;
24   -
25   - orth.suffixToAdd = (const char*) ptr;
26   - ptr += strlen((const char*) ptr) + 1;
27   -}
28   -
29   -void GeneratorDeserializer::deserializeInterp(const unsigned char*& ptr, EncodedGeneratorInterpretation& interp) const {
30   - deserializeOrth(ptr, interp.orth);
31   - interp.tag = ntohs(*(reinterpret_cast<const uint16_t*> (ptr)));
32   - ptr += 2;
33   - interp.nameClassifier = *ptr;
34   - ptr++;
35   -}
36   -
37   -long GeneratorDeserializer::deserialize(
38   - const unsigned char* ptr,
39   - std::vector<EncodedGeneratorInterpretation>& interps) const {
40   - const unsigned char* currPtr = ptr;
41   - uint8_t interpsNum = *ptr;
42   - interps.clear();
43   - interps.reserve(interpsNum);
44   - currPtr++;
45   - for (unsigned int i = 0; i < interpsNum; ++i) {
46   - EncodedGeneratorInterpretation interp;
47   - this->deserializeInterp(currPtr, interp);
48   - interps.push_back(interp);
49   - }
50   - return currPtr - ptr;
51   -}
52   -
53   -GeneratorDeserializer::~GeneratorDeserializer() {
54   -
55   -}
morfeusz/GeneratorDeserializer.hpp deleted
1   -/*
2   - * File: GeneratorDeserializer.hpp
3   - * Author: mlenart
4   - *
5   - * Created on 20 styczeń 2014, 17:14
6   - */
7   -
8   -#ifndef SYNTHDESERIALIZER_HPP
9   -#define SYNTHDESERIALIZER_HPP
10   -
11   -#include <string>
12   -#include <vector>
13   -#include "fsa/fsa.hpp"
14   -#include "Tagset.hpp"
15   -#include "EncodedGeneratorInterpretation.hpp"
16   -#include "Environment.hpp"
17   -
18   -class GeneratorDeserializer: public Deserializer< std::vector<EncodedGeneratorInterpretation> > {
19   -public:
20   - explicit GeneratorDeserializer(const Environment& env);
21   - long deserialize(
22   - const unsigned char* ptr,
23   - std::vector<EncodedGeneratorInterpretation>& interps) const;
24   - virtual ~GeneratorDeserializer();
25   -private:
26   - const Environment& env;
27   -
28   - void deserializeInterp(const unsigned char*& ptr, EncodedGeneratorInterpretation& interp) const;
29   - void deserializeOrth(const unsigned char*& ptr, EncodedOrth& orth) const;
30   -};
31   -
32   -#endif /* SYNTHDESERIALIZER_HPP */
33   -
morfeusz/InterpretedChunk.hpp
... ... @@ -19,6 +19,7 @@ struct InterpretedChunk {
19 19 bool shiftOrth;
20 20 bool orthWasShifted;
21 21 std::vector<InterpretedChunk> prefixChunks;
  22 + std::string requiredHomonymId;
22 23 };
23 24  
24 25 #endif /* INTERPRETEDCHUNK_HPP */
... ...
morfeusz/InterpretedChunksDecoder.hpp
... ... @@ -10,6 +10,7 @@
10 10  
11 11 #include <string>
12 12 #include <vector>
  13 +#include <utility>
13 14  
14 15 #include "charset/CharsetConverter.hpp"
15 16 #include "EncodedInterpretation.hpp"
... ... @@ -147,6 +148,16 @@ protected:
147 148 }
148 149 private:
149 150  
  151 + pair<string, string> getLemmaHomonymIdPair(const string& lemma) const {
  152 + vector<string> splitRes(split(lemma, ':'));
  153 + if (splitRes.size() == 2) {
  154 + return make_pair(splitRes[0], splitRes[1]);
  155 + }
  156 + else {
  157 + return make_pair(lemma, "");
  158 + }
  159 + }
  160 +
150 161 MorphInterpretation decodeMorphInterpretation(
151 162 unsigned int startNode, unsigned int endNode,
152 163 const string& orth,
... ... @@ -156,9 +167,11 @@ private:
156 167 string lemma = lemmaPrefix;
157 168 EncodedInterpretation ei = this->deserializeInterp(ptr);
158 169 this->decodeForm(chunk.lowercaseCodepoints, ei.value, lemma);
  170 + pair<string, string> lemmaHomonymId = getLemmaHomonymIdPair(lemma);
159 171 return MorphInterpretation(
160 172 startNode, endNode,
161   - orth, lemma,
  173 + orth, lemmaHomonymId.first,
  174 + lemmaHomonymId.second,
162 175 ei.tag,
163 176 ei.nameClassifier,
164 177 env.getTagset(),
... ... @@ -193,7 +206,12 @@ public:
193 206 lemma += env.getCharsetConverter().toString(interpretedChunk.originalCodepoints);
194 207 const unsigned char* currPtr = interpretedChunk.interpsGroup.ptr;
195 208 while (currPtr - interpretedChunk.interpsGroup.ptr < interpretedChunk.interpsGroup.size) {
196   - out.push_back(this->decodeMorphInterpretation(startNode, endNode, orthPrefix, lemma, interpretedChunk, currPtr));
  209 + MorphInterpretation mi = this->decodeMorphInterpretation(startNode, endNode, orthPrefix, lemma, interpretedChunk, currPtr);
  210 +// cerr << mi.toString(false) << endl;
  211 +// cerr << "required='" << interpretedChunk.requiredHomonymId << "' morphInterp='" << mi.getHomonymId() << "'" << endl;
  212 + if (interpretedChunk.requiredHomonymId.empty() || mi.getHomonymId() == interpretedChunk.requiredHomonymId) {
  213 + out.push_back(mi);
  214 + }
197 215 }
198 216 }
199 217  
... ... @@ -220,10 +238,11 @@ private:
220 238 ptr += strlen((const char*) ptr) + 1;
221 239 EncodedInterpretation ei = this->deserializeInterp(ptr);
222 240 this->decodeForm(chunk.originalCodepoints, ei.value, orth);
223   - string realLemma = homonymId.empty() ? lemma : (lemma + ":" + homonymId);
  241 +// string realLemma = homonymId.empty() ? lemma : (lemma + ":" + homonymId);
224 242 return MorphInterpretation(
225 243 startNode, endNode,
226   - orth, realLemma,
  244 + orth, lemma,
  245 + homonymId,
227 246 ei.tag,
228 247 ei.nameClassifier,
229 248 env.getTagset(),
... ...
morfeusz/Morfeusz.cpp
... ... @@ -12,7 +12,6 @@
12 12 #include "data/default_fsa.hpp"
13 13 #include "Morfeusz.hpp"
14 14 #include "MorphDeserializer.hpp"
15   -#include "GeneratorDeserializer.hpp"
16 15 #include "InterpretedChunksDecoder.hpp"
17 16 #include "charset/CharsetConverter.hpp"
18 17 #include "charset/charset_utils.hpp"
... ... @@ -118,6 +117,16 @@ void Morfeusz::doProcessOneWord(
118 117 normalizedCodepoints.push_back(normalizedCodepoint);
119 118 feedState(state, normalizedCodepoint, UTF8CharsetConverter());
120 119 codepoint = currInput == inputEnd ? 0 : env.getCharsetConverter().peek(currInput, inputEnd);
  120 + string homonymId;
  121 + if (env.getProcessorType() == GENERATOR && codepoint == 0x3A && currInput + 1 != inputEnd) {
  122 + if (originalCodepoints.size() == 1) {
  123 + throw MorfeuszException("Lemma of length > 1 cannot start with a colon");
  124 + }
  125 + homonymId = string(currInput + 1, inputEnd);
  126 +// cerr << "homonym " << homonymId << endl;
  127 + currInput = inputEnd;
  128 + codepoint = 0x00;
  129 + }
121 130 if (state.isAccepting()) {
122 131 vector<InterpsGroup> val(state.getValue());
123 132 for (unsigned int i = 0; i < val.size(); i++) {
... ... @@ -138,24 +147,18 @@ void Morfeusz::doProcessOneWord(
138 147 ig,
139 148 newSegrulesState.shiftOrthFromPrevious,
140 149 false,
141   - vector<InterpretedChunk>()
  150 + vector<InterpretedChunk>(),
  151 + homonymId
142 152 };
143 153 if (!accum.empty() && accum.back().shiftOrth) {
144   -// cerr << "shift orth from " << (int) accum.back().interpsGroup.type << " to " << (int) ig.type << endl;
145 154 doShiftOrth(accum.back(), ic);
146 155 }
147 156 accum.push_back(ic);
148   - if (isEndOfWord(codepoint)) {
149   -// cerr << "end of word" << endl;
150   - if (newSegrulesState.accepting) {
151   -// cerr << "accept " << (int) ig.type << endl;
152   - graph.addPath(accum);
153   - }
154   - else {
155   -// cerr << "not accept " << (int) ig.type << endl;
156   - }
  157 + if (isEndOfWord(codepoint) && newSegrulesState.accepting) {
  158 + graph.addPath(accum);
157 159 }
158   - else {
  160 + else if (!isEndOfWord(codepoint)) {
  161 +// cerr << "will process " << currInput << endl;
159 162 const char* newCurrInput = currInput;
160 163 doProcessOneWord(env, newCurrInput, inputEnd, newSegrulesState, accum, graph);
161 164 }
... ...
morfeusz/Morfeusz.hpp
... ... @@ -24,7 +24,6 @@
24 24 #include "MorfeuszOptions.hpp"
25 25 #include "const.hpp"
26 26 #include "exceptions.hpp"
27   -#include "Generator.hpp"
28 27 #include "Environment.hpp"
29 28  
30 29 #include "segrules/segrules.hpp"
... ...
morfeusz/MorphInterpretation.cpp
... ... @@ -6,6 +6,7 @@
6 6 */
7 7  
8 8 #include <string>
  9 +#include <sstream>
9 10 #include "MorphInterpretation.hpp"
10 11 #include "EncodedInterpretation.hpp"
11 12  
... ... @@ -16,6 +17,7 @@ MorphInterpretation::MorphInterpretation(
16 17 int endNode,
17 18 const string& orth,
18 19 const string& lemma,
  20 + const string& homonymId,
19 21 int tagnum,
20 22 int namenum,
21 23 const Tagset& tagset,
... ... @@ -24,6 +26,7 @@ MorphInterpretation::MorphInterpretation(
24 26 endNode(endNode),
25 27 orth(orth),
26 28 lemma(lemma),
  29 + homonymId(homonymId),
27 30 tagnum(tagnum),
28 31 namenum(namenum),
29 32 tag(tagset.getTag(tagnum, charsetConverter)),
... ... @@ -40,6 +43,7 @@ MorphInterpretation::MorphInterpretation(
40 43 endNode(startNode + 1),
41 44 orth(orth),
42 45 lemma(orth),
  46 + homonymId(""),
43 47 tagnum(0),
44 48 namenum(0),
45 49 tag(tagset.getTag(0, charsetConverter)),
... ... @@ -67,6 +71,10 @@ const std::string&amp; MorphInterpretation::getLemma() const {
67 71 return this->lemma;
68 72 }
69 73  
  74 +const std::string& MorphInterpretation::getHomonymId() const {
  75 + return this->homonymId;
  76 +}
  77 +
70 78 int MorphInterpretation::getTagnum() const {
71 79 return this->tagnum;
72 80 }
... ... @@ -83,3 +91,22 @@ const std::string&amp; MorphInterpretation::getName() const {
83 91 return this->name;
84 92 }
85 93  
  94 +std::string MorphInterpretation::toString(bool includeNodeNumbers) const {
  95 + std::stringstream res;
  96 + if (includeNodeNumbers) {
  97 + res << startNode << "," << endNode << ",";
  98 + }
  99 + res << orth << ",";
  100 +
  101 + res << lemma;
  102 + if (!this->homonymId.empty()) {
  103 + res << ":" << homonymId;
  104 + }
  105 + res << ",";
  106 +
  107 + res << tag;
  108 + if (!name.empty()) {
  109 + res << "," << name;
  110 + }
  111 + return res.str();
  112 +}
... ...
morfeusz/MorphInterpretation.hpp
... ... @@ -20,6 +20,7 @@ public:
20 20 int endNode,
21 21 const std::string& orth,
22 22 const std::string& lemma,
  23 + const std::string& homonymId,
23 24 int tagnum,
24 25 int namenum,
25 26 const Tagset& tagset,
... ... @@ -30,10 +31,13 @@ public:
30 31 int getEndNode() const;
31 32 const std::string& getOrth() const;
32 33 const std::string& getLemma() const;
  34 + const std::string& getHomonymId() const;
33 35 int getTagnum() const;
34 36 int getNamenum() const;
35 37 const std::string& getTag() const;
36 38 const std::string& getName() const;
  39 +
  40 + std::string toString(bool includeNodeNumbers) const;
37 41 private:
38 42 MorphInterpretation(
39 43 int startNode,
... ... @@ -44,6 +48,7 @@ private:
44 48 int endNode;
45 49 std::string orth;
46 50 std::string lemma;
  51 + std::string homonymId;
47 52 int tagnum;
48 53 int namenum;
49 54 std::string tag;
... ...
morfeusz/java/CMakeLists.txt
... ... @@ -31,20 +31,26 @@ endif ()
31 31  
32 32 if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
33 33 set (CMAKE_SHARED_LIBRARY_SUFFIX ".jnilib")
  34 + #~ set (CMAKE_SHARED_LINKER_FLAGS "${-dylib")
34 35 endif ()
35 36  
36 37 set (CMAKE_JAVA_TARGET_VERSION ${JMORFEUSZ_VERSION})
37 38 set (CMAKE_JAVA_TARGET_OUTPUT_NAME jmorfeusz)
38 39  
  40 +#~ add_custom_target (jmorfeusz-repair-library
  41 + #~ COMMAND ${DARWIN64_ROOT}/x86_64-apple-darwin9/bin/x86_64-apple-darwin9-install_name_tool -change /home/mlenart/xxx/morfeusz/buildall/build-Darwin-amd64/morfeusz/libmorfeusz2.dylib morfeusz2 ${PROJECT_BINARY_DIR}/morfeusz/java/libjmorfeusz.jnilib
  42 + #~ DEPENDS libjmorfeusz)
  43 +
39 44 # build jmorfeusz
40 45 file(GLOB_RECURSE JAVA_SOURCES ${JAVA_SRC_DIR} "*.java")
41 46 add_jar (jmorfeusz
42 47 SOURCES "${JAVA_SOURCES}"
43 48 DEPENDS libjmorfeusz)
44 49  
45   -add_custom_target(jmorfeusz-copy-readme
  50 +add_custom_target (jmorfeusz-copy-readme
46 51 COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/README" "${CMAKE_CURRENT_BINARY_DIR}/README")
47 52  
  53 +
48 54 add_custom_target(package-java
49 55 COMMAND mkdir -p "${TARGET_DIR}" && ${CMAKE_COMMAND} -E tar "cfvz" "${TARGET_DIR}/jmorfeusz-${JMORFEUSZ_VERSION}-${CMAKE_SYSTEM_NAME}-${ARCHITECTURE}.tar.gz" "${CMAKE_CURRENT_BINARY_DIR}/*.jar" "${CMAKE_CURRENT_BINARY_DIR}/*${CMAKE_SHARED_LIBRARY_SUFFIX}" "${CMAKE_CURRENT_BINARY_DIR}/README"
50 56 DEPENDS jmorfeusz jmorfeusz-copy-readme libjmorfeusz)
... ...
morfeusz/morfeusz_analyzer.cpp
... ... @@ -126,6 +126,11 @@ int main(int argc, const char** argv) {
126 126 printf("[");
127 127 for (unsigned int i = 0; i < res.size(); i++) {
128 128 MorphInterpretation& mi = res[i];
  129 + string lemmaToShow = mi.getLemma().c_str();
  130 + if (!mi.getHomonymId().empty()) {
  131 + lemmaToShow += ":";
  132 + lemmaToShow += mi.getHomonymId();
  133 + }
129 134 if (prevStart != -1
130 135 && (prevStart != mi.getStartNode() || prevEnd != mi.getEndNode())) {
131 136 printf("]\n[");
... ... @@ -133,10 +138,11 @@ int main(int argc, const char** argv) {
133 138 else if (prevStart != -1) {
134 139 printf("; ");
135 140 }
136   - printf("%d,%d,%s,%s,%s,%s",
137   - mi.getStartNode(), mi.getEndNode(),
138   - mi.getOrth().c_str(), mi.getLemma().c_str(),
139   - mi.getTag().c_str(), mi.getName().c_str());
  141 + printf("%s", mi.toString(true).c_str());
  142 +// printf("%d,%d,%s,%s,%s,%s",
  143 +// mi.getStartNode(), mi.getEndNode(),
  144 +// mi.getOrth().c_str(), lemmaToShow.c_str(),
  145 +// mi.getTag().c_str(), lemmaToShow.c_str());
140 146 prevStart = mi.getStartNode();
141 147 prevEnd = mi.getEndNode();
142 148 }
... ...
morfeusz/morfeusz_generator.cpp
... ... @@ -38,9 +38,7 @@ int main(int argc, char** argv) {
38 38 printf("; ");
39 39 }
40 40 MorphInterpretation& mi = res[i];
41   - printf("%s,%s,%s,%s",
42   - mi.getOrth().c_str(), mi.getLemma().c_str(),
43   - mi.getTag().c_str(), mi.getName().c_str());
  41 + printf("%s", mi.toString(false).c_str());
44 42 }
45 43 printf("]\n");
46 44 }
... ...
nbproject/configurations.xml
... ... @@ -38,8 +38,6 @@
38 38 </df>
39 39 <in>Environment.cpp</in>
40 40 <in>FlexionGraph.cpp</in>
41   - <in>Generator.cpp</in>
42   - <in>GeneratorDeserializer.cpp</in>
43 41 <in>Morfeusz.cpp</in>
44 42 <in>MorphDeserializer.cpp</in>
45 43 <in>MorphInterpretation.cpp</in>
... ... @@ -106,12 +104,8 @@
106 104 </makeTool>
107 105 </makefileType>
108 106 <item path="../default_fsa.cpp" ex="false" tool="1" flavor2="4">
109   - <ccTool flags="1">
110   - </ccTool>
111 107 </item>
112 108 <item path="../default_synth_fsa.cpp" ex="false" tool="1" flavor2="4">
113   - <ccTool flags="1">
114   - </ccTool>
115 109 </item>
116 110 <item path="build/default_fsa.cpp" ex="false" tool="1" flavor2="4">
117 111 </item>
... ... @@ -281,7 +275,7 @@
281 275 <ccTool>
282 276 <incDir>
283 277 <pElem>morfeusz</pElem>
284   - <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem>
  278 + <pElem>/usr/lib/jvm/default-java/include</pElem>
285 279 </incDir>
286 280 <preprocessorList>
287 281 <Elem>NDEBUG</Elem>
... ... @@ -347,32 +341,6 @@
347 341 </preprocessorList>
348 342 </ccTool>
349 343 </item>
350   - <item path="morfeusz/Generator.cpp" ex="false" tool="1" flavor2="4">
351   - <ccTool flags="1">
352   - <incDir>
353   - <pElem>build</pElem>
354   - <pElem>morfeusz</pElem>
355   - <pElem>build/morfeusz</pElem>
356   - </incDir>
357   - <preprocessorList>
358   - <Elem>NDEBUG</Elem>
359   - <Elem>libmorfeusz_EXPORTS</Elem>
360   - </preprocessorList>
361   - </ccTool>
362   - </item>
363   - <item path="morfeusz/GeneratorDeserializer.cpp" ex="false" tool="1" flavor2="4">
364   - <ccTool flags="1">
365   - <incDir>
366   - <pElem>build</pElem>
367   - <pElem>morfeusz</pElem>
368   - <pElem>build/morfeusz</pElem>
369   - </incDir>
370   - <preprocessorList>
371   - <Elem>NDEBUG</Elem>
372   - <Elem>libmorfeusz_EXPORTS</Elem>
373   - </preprocessorList>
374   - </ccTool>
375   - </item>
376 344 <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="4">
377 345 <ccTool flags="1">
378 346 <incDir>
... ...