Commit 9158a52956ab667cfebdd81715e90f7de4693174

Authored by Michał Lenart
1 parent a7ab50de

- dalsza praca nad kompilacją do Javy

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@52 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
CMakeLists.txt
... ... @@ -16,9 +16,22 @@ configure_file (
16 16  
17 17 include_directories("${PROJECT_BINARY_DIR}" )
18 18  
  19 +set (CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib")
19 20 add_subdirectory (morfeusz)
20 21  
21   -file(COPY fsabuilder testfiles DESTINATION .)
  22 +# copy jmorfeusz to java source tree
  23 +file(GLOB files "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/*jmorfeusz*")
  24 +foreach(file ${files})
  25 + file (COPY ${file} DESTINATION ${PROJECT_SOURCE_DIR}/jmorfeusz/src/main/native)
  26 +endforeach()
  27 +
  28 +# build jmorfeusz
  29 +add_custom_command(TARGET all POST_BUILD mvn package -PotherOutputDir -DoutputDir=${PROJECT_BINARY_DIR}/jmorfeusz/target WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/jmorfeusz)
  30 +
  31 +
  32 +# TESTS
  33 +
  34 +file (COPY fsabuilder testfiles DESTINATION .)
22 35  
23 36 macro (test_build_and_recognize fname method)
24 37 add_test (TestBuild-${method}-${fname} python fsabuilder/fsa/buildfsa.py -i testfiles/${fname} -o /tmp/test-${method}-${fname}.fsa --tagset-file=testfiles/polimorf.tagset --output-format=BINARY --serialization-method=${method})
... ...
morfeusz/Morfeusz.cpp
... ... @@ -226,18 +226,17 @@ void Morfeusz::setEncoding(MorfeuszCharset encoding) {
226 226 ResultsIterator::ResultsIterator(const string& text, const Morfeusz& morfeusz)
227 227 : rawInput(text.c_str()),
228 228 morfeusz(morfeusz) {
  229 + vector<MorphInterpretation> res;
  230 + morfeusz.analyze(text, res);
  231 + resultsBuffer.insert(resultsBuffer.begin(), res.begin(), res.end());
229 232 }
230 233  
231 234 MorphInterpretation ResultsIterator::getNext() {
232   - // if (resultsBuffer.empty()) {
233   - // morfeusz.processOneWord(rawInput, startNode, back_inserter(resultsBuffer));
234   - // }
235   - // startNode = resultsBuffer.back().getEndNode();
236   - // MorphInterpretation res = resultsBuffer.front();
237   - // resultsBuffer.pop_front();
238   - // return res;
  235 + MorphInterpretation res = this->resultsBuffer.front();
  236 + this->resultsBuffer.pop_front();
  237 + return res;
239 238 }
240 239  
241 240 bool ResultsIterator::hasNext() {
242   - return rawInput[0] != '\0' && resultsBuffer.empty();
  241 + return !resultsBuffer.empty();
243 242 }
... ...
morfeusz/Morfeusz.hpp
... ... @@ -22,11 +22,21 @@
22 22 #include "MorfeuszOptions.hpp"
23 23 #include "const.hpp"
24 24  
  25 +class Morfeusz;
  26 +class ResultsIterator;
  27 +
  28 +typedef FSA<std::vector<InterpsGroup > > FSAType;
  29 +typedef State<std::vector<InterpsGroup > > StateType;
  30 +
25 31 class MorfeuszException : public std::exception {
26 32 public:
27   - MorfeuszException(const char* what): msg(what) {}
28   - MorfeuszException(const std::string& what): msg(what.c_str()) {}
29   - virtual ~MorfeuszException() throw() {}
  33 +
  34 + MorfeuszException(const std::string& what) : msg(what.c_str()) {
  35 + }
  36 +
  37 + virtual ~MorfeuszException() throw () {
  38 + }
  39 +
30 40 virtual const char* what() const throw () {
31 41 return this->msg.c_str();
32 42 }
... ... @@ -34,13 +44,6 @@ private:
34 44 const std::string msg;
35 45 };
36 46  
37   -class Morfeusz;
38   -//class AnalyzeResult;
39   -class ResultsIterator;
40   -
41   -typedef FSA<std::vector<InterpsGroup > > FSAType;
42   -typedef State<std::vector<InterpsGroup > > StateType;
43   -
44 47 class Morfeusz {
45 48 public:
46 49 Morfeusz();
... ... @@ -50,17 +53,17 @@ public:
50 53 ResultsIterator analyze(const std::string& text) const;
51 54 void analyze(const std::string& text, std::vector<MorphInterpretation>& result) const;
52 55  
  56 + void setEncoding(MorfeuszCharset encoding);
  57 +
  58 + // Morfeusz();
  59 + friend class ResultsIterator;
  60 +private:
  61 +
53 62 void processOneWord(
54 63 const char*& inputData,
55 64 const char* inputEnd,
56 65 int startNodeNum,
57 66 std::vector<MorphInterpretation>& result) const;
58   -
59   - void setEncoding(MorfeuszCharset encoding);
60   -
61   - // Morfeusz();
62   - friend class ResultsIterator;
63   -private:
64 67  
65 68 void doProcessOneWord(
66 69 const char*& inputData,
... ... @@ -81,48 +84,24 @@ private:
81 84 CharsetConverter* charsetConverter;
82 85 Tagset* tagset;
83 86 CaseConverter* caseConverter;
84   -
  87 +
85 88 UTF8CharsetConverter utf8CharsetConverter;
86   -
  89 +
87 90 MorfeuszOptions options;
88 91 };
89 92  
90 93 class ResultsIterator {
91 94 public:
92   - ResultsIterator(const std::string& text, const Morfeusz& morfeusz);
93 95 MorphInterpretation getNext();
94 96 bool hasNext();
  97 + friend class Morfeusz;
95 98 private:
  99 + ResultsIterator(const std::string& text, const Morfeusz& morfeusz);
96 100 const char* rawInput;
97 101 const Morfeusz& morfeusz;
98 102 std::list<MorphInterpretation> resultsBuffer;
99 103 int startNode;
100 104 };
101 105  
102   -//class ResultsIterator {
103   -//public:
104   -// ResultsIterator(
105   -// const char* startOfInput,
106   -// const char* endOfInput,
107   -// const Morfeusz& morfeusz);
108   -// virtual ~ResultsIterator();
109   -// ResultsIterator(const ResultsIterator& mit);
110   -// ResultsIterator& operator++();
111   -// ResultsIterator operator++(int);
112   -// bool operator==(const ResultsIterator& rhs);
113   -// bool operator!=(const ResultsIterator& rhs);
114   -// MorphInterpretation& operator*();
115   -//private:
116   -// const char* rawInput;
117   -// const char* endOfInput;
118   -// const Morfeusz& morfeusz;
119   -// vector<MorphInterpretation> resultsBuffer;
120   -//};
121   -
122   -//struct AnalyzeResult {
123   -// ResultsIterator iterator;
124   -// const ResultsIterator end;
125   -//};
126   -
127 106 #endif /* MORFEUSZ_HPP */
128 107  
... ...
morfeusz/MorphInterpretation.hpp
... ... @@ -22,7 +22,6 @@ public:
22 22 const std::string& lemma,
23 23 int tagnum,
24 24 int namenum,
25   -// const EncodedInterpretation& encodedInterp,
26 25 const Tagset& tagset,
27 26 const CharsetConverter& charsetConverter);
28 27 static MorphInterpretation createIgn(int startNode, const std::string& orth, const Tagset& tagset, const CharsetConverter& charsetConverter);
... ...
morfeusz/java/CMakeLists.txt
... ... @@ -8,15 +8,15 @@ include(UseJava)
8 8  
9 9 # SWIG Java
10 10 INCLUDE_DIRECTORIES(${JAVA_INCLUDE_PATH})
11   -#INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
12 11 INCLUDE_DIRECTORIES(..)
13 12  
14   -SET(CMAKE_SWIG_FLAGS "")
  13 +set (CMAKE_SWIG_FLAGS -package pl.waw.ipipan.morfeusz)
  14 +set (CMAKE_SWIG_OUTDIR ${CMAKE_SOURCE_DIR}/jmorfeusz/src/main/java/pl/waw/ipipan/morfeusz)
15 15  
16 16 SET_SOURCE_FILES_PROPERTIES(../morfeusz.i PROPERTIES CPLUSPLUS ON)
17   -SWIG_ADD_MODULE(morfeusz-java java ../morfeusz.i)
18   -SWIG_LINK_LIBRARIES(morfeusz-java ${JAVA_LIBRARIES})
19   -SWIG_LINK_LIBRARIES(morfeusz-java libmorfeusz)
  17 +SWIG_ADD_MODULE(jmorfeusz java ../morfeusz.i)
  18 +SWIG_LINK_LIBRARIES(jmorfeusz ${JAVA_LIBRARIES})
  19 +SWIG_LINK_LIBRARIES(jmorfeusz libmorfeusz)
20 20  
21 21 set (CMAKE_SHARED_LINKER_FLAGS "")
22 22 set (CMAKE_EXE_LINKER_FLAGS "")
... ...
morfeusz/morfeusz.i
... ... @@ -4,23 +4,45 @@
4 4 %{
5 5 #include "Morfeusz.hpp"
6 6 #include "MorphInterpretation.hpp"
  7 +#include "const.hpp"
7 8 %}
8 9  
9 10 %include "std_vector.i"
10 11 %include "std_string.i"
11 12  
12 13 #ifdef SWIGJAVA
  14 +
13 15 %include "enums.swg"
  16 +
14 17 /* Force the generated Java code to use the C enum values rather than making a JNI call */
15 18 %javaconst(1);
  19 +
  20 +%pragma(java) jniclasscode=%{
  21 + static {
  22 + System.loadLibrary("jmorfeusz");
  23 + }
  24 +%}
16 25 #endif
17 26  
18   -// Very simple C++ example for linked list
  27 +%ignore MorphInterpretation::MorphInterpretation(
  28 + int startNode,
  29 + int endNode,
  30 + const std::string& orth,
  31 + const std::string& lemma,
  32 + int tagnum,
  33 + int namenum,
  34 + const Tagset& tagset,
  35 + const CharsetConverter& charsetConverter);
  36 +
  37 +%ignore MorphInterpretation::createIgn(int startNode, const std::string& orth, const Tagset& tagset, const CharsetConverter& charsetConverter);
  38 +
  39 +%ignore Tagset::Tagset(const unsigned char* fsaData);
19 40  
20 41 %include "Morfeusz.hpp"
21 42 %include "MorphInterpretation.hpp"
22 43 %include "const.hpp"
23 44  
  45 +// instantiate vector of interpretations
24 46 namespace std {
25 47 // dirty hack so it will compile without no-arg constructor for MorphInterpretation
26 48 %ignore vector<MorphInterpretation>::vector(size_type);
... ...
nbproject/configurations.xml
... ... @@ -68,7 +68,7 @@
68 68 <item path="build/morfeusz/morfeuszJAVA_wrap.cxx"
69 69 ex="false"
70 70 tool="1"
71   - flavor2="4">
  71 + flavor2="8">
72 72 <ccTool>
73 73 <incDir>
74 74 <pElem>/usr/lib/jvm/default-java/include</pElem>
... ... @@ -76,14 +76,14 @@
76 76 <pElem>build/morfeusz/java</pElem>
77 77 </incDir>
78 78 <preprocessorList>
79   - <Elem>morfeusz_java_EXPORTS</Elem>
  79 + <Elem>jmorfeusz_EXPORTS</Elem>
80 80 </preprocessorList>
81 81 </ccTool>
82 82 </item>
83 83 <item path="build/morfeusz/morfeuszPYTHON_wrap.cxx"
84 84 ex="false"
85 85 tool="1"
86   - flavor2="4">
  86 + flavor2="8">
87 87 <ccTool>
88 88 <incDir>
89 89 <pElem>/usr/include/python2.7</pElem>
... ...