Commit e147486dfdda22b5c87c21982022032ab4232d5e
1 parent
a2756826
- poprawki warningów itp.
- prace nad cross-kompilacją git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@63 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
21 changed files
with
154 additions
and
83 deletions
README
1 | + | |
2 | +*************** | |
3 | +Compilation - prerequisites | |
4 | +*************** | |
5 | + | |
6 | +This tutorial assumes that build process is performed on Linux 64bit machine (preferably from Debian/Ubuntu family). | |
7 | + | |
8 | +sudo apt-get install build-essential autotools | |
9 | + | |
1 | 10 | For cross compiling: |
11 | +-------------------- | |
2 | 12 | |
3 | 13 | sudo apt-get install g++-multilib g++-mingw-w64 |
4 | 14 | |
5 | 15 | For Java and Python bindings: |
16 | +----------------------------- | |
17 | +sudo apt-get install default-jdk python swig | |
18 | + | |
19 | +Java and Python bindings with cross-compilation support: | |
20 | +-------------------------------------------------------- | |
21 | +Download CMake 2.8.x (tested on 2.8.12.1). | |
22 | +Apply cmake-2.8.12..1-patch.diff on the sources. | |
23 | +Build and install CMake from patched sources. | |
24 | + | |
25 | +Java and Python bindings without cross-compilation support: | |
26 | +----------------------------------------------------------- | |
27 | +sudo apt-get install cmake | |
28 | + | |
29 | +*************** | |
30 | +Compilation | |
31 | +*************** | |
32 | +Create separate build directory in morfeusz root, run cmake and make in it: | |
33 | + | |
34 | +mkdir build | |
35 | +cd build | |
36 | +cmake .. | |
37 | +make | |
38 | + | |
39 | +************************** | |
40 | +Notes on cross-compilation | |
41 | +************************** | |
42 | + | |
43 | +Create separate build directory, for example build-darwin. | |
44 | + | |
45 | +Run: | |
46 | +cmake -DCMAKE_TOOLCHAIN_FILE=../morfeusz/Toolchain-xxx.cmake .. | |
47 | +make | |
48 | + | |
49 | + | |
50 | +Copy Java JDK and Python libraries from target platform. Set JAVA_ROOT and PYTHON_ROOT in Toolchain files to match directories copied from target platform. | |
51 | + | |
52 | +[Mac OS X 64bit] Libraries are at /System/Library/Frameworks/JavaVM.framework and /System/Library/Frameworks/Python.framework | |
53 | + | |
54 | +[Windows 32 and 64 bit] usually at C:\Program Files\Java\jdk* and C:\Python27. You must also copy python27.dll from C:\Windows\System32 to Python27/libs directory. | |
55 | + | |
56 | +[Linux 32bit] Java is usually at /usr/lib/jvm/default-java (alternatively you can download a package from Oracle website). The preferred way of obtaining Python is to compile it: | |
57 | +- compile latest version using ./configure --prefix=/some/directory/python | |
58 | +- copy contents of /some/directory/python to the build machine and set it as PYTHON_ROOT in Toolchain file | |
6 | 59 | |
7 | -sudo apt-get install swig default-jdk python | |
8 | 60 | |
9 | 61 | ************* |
10 | 62 | Mac OS X |
... | ... | @@ -15,7 +67,7 @@ Mac OS X |
15 | 67 | # install brew |
16 | 68 | ruby -e "$(curl -fsSL https://raw.github.com/mxcl/homebrew/go/install)" |
17 | 69 | |
18 | -# install svn | |
70 | +# install some other stuff | |
19 | 71 | brew install svn |
20 | 72 | brew install swig |
21 | 73 | brew install make |
... | ... | @@ -35,19 +87,4 @@ brew doctor |
35 | 87 | Windows |
36 | 88 | ************* |
37 | 89 | |
38 | -Install MinGW and make sure it is included in Path system variable (when installed at C:\MinGW then C:\MinGW\bin must be in the Path variable). | |
39 | -Install CMake. | |
40 | -Download SWIG. Unpack the .zip contents somewhere (for example C:\swigwin-VERSION) | |
41 | -Add the unpacked SWIG directory to the Path variable. | |
42 | -Install latest Java JDK. | |
43 | -Go to morfeusz sources root. | |
44 | -mkdir build | |
45 | -cd build | |
46 | -cmake | |
47 | - | |
48 | -CROSS COMPILE | |
49 | -------------- | |
50 | - | |
51 | -Make your own CMake from sources using this patch: http://cmake.org/gitweb?p=cmake.git;a=commitdiff;h=19b7a54d | |
52 | -(or use version > 3.0) | |
53 | -Add Java and Python libraries to the Toolchain file. Copy python27.dll from C:\Windows\System32 to some shared libs directory. | |
54 | 90 | \ No newline at end of file |
91 | +Better cross-compile on Linux... | |
... | ... |
build-darwin.tar.gz
0 → 100644
No preview for this file type
jmorfeusz/src/main/java/pl/waw/ipipan/morfeusz/app/App.java
0 → 100644
1 | +package pl.waw.ipipan.morfeusz.app; | |
2 | + | |
3 | +import pl.waw.ipipan.morfeusz.Morfeusz; | |
4 | +import pl.waw.ipipan.morfeusz.MorphInterpretation; | |
5 | +import pl.waw.ipipan.morfeusz.ResultsIterator; | |
6 | + | |
7 | +/** | |
8 | + * | |
9 | + * @author mlenart | |
10 | + */ | |
11 | +public class App { | |
12 | + | |
13 | + static { | |
14 | + System.loadLibrary("morfeusz"); | |
15 | + System.err.println("LOADED MORFEUSZ"); | |
16 | + System.loadLibrary("jmorfeusz"); | |
17 | + } | |
18 | + | |
19 | + /** | |
20 | + * @param args the command line arguments | |
21 | + */ | |
22 | + public static void main(String[] args) { | |
23 | + Morfeusz m = new Morfeusz(); | |
24 | + ResultsIterator resIt = m.analyze("Ala ma kota żółć."); | |
25 | + while (resIt.hasNext()) { | |
26 | + MorphInterpretation mi = resIt.getNext(); | |
27 | + System.out.printf( | |
28 | + "%d %d %s %s %s %s%n", | |
29 | + mi.getStartNode(), mi.getEndNode(), | |
30 | + mi.getOrth(), mi.getLemma(), | |
31 | + mi.getTag(), mi.getName()); | |
32 | + } | |
33 | + } | |
34 | + | |
35 | +} | |
... | ... |
jmorfeusz/src/main/native/libjmorfeusz.dll
No preview for this file type
jmorfeusz/src/main/native/libjmorfeusz.so
No preview for this file type
morfeusz/MorphDeserializer.cpp
morfeusz/MorphDeserializer.hpp
... | ... | @@ -15,7 +15,6 @@ |
15 | 15 | class MorphDeserializer: public Deserializer< std::vector<InterpsGroup> > { |
16 | 16 | public: |
17 | 17 | MorphDeserializer(); |
18 | - MorphDeserializer(const MorphDeserializer& orig); | |
19 | 18 | virtual ~MorphDeserializer(); |
20 | 19 | long deserialize( |
21 | 20 | const unsigned char* ptr, |
... | ... |
morfeusz/Toolchain-darwin32.cmake deleted
1 | - | |
2 | -SET(CMAKE_SYSTEM_NAME Darwin) | |
3 | -SET(CMAKE_SYSTEM_VERSION 9) | |
4 | -SET(CMAKE_C_COMPILER /mnt/storage/usr/i686-apple-darwin9/i686-apple-darwin9-gcc) | |
5 | -SET(CMAKE_CXX_COMPILER /mnt/storage/usr/i686-apple-darwin9/i686-apple-darwin9-g++) | |
6 | - | |
7 | -# here is the target environment located | |
8 | -SET(CMAKE_FIND_ROOT_PATH /mnt/storage/usr/i686-apple-darwin9) | |
9 | - | |
10 | -# adjust the default behaviour of the FIND_XXX() commands: | |
11 | -# search headers and libraries in the target environment, search | |
12 | -# programs in the host environment | |
13 | -set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) | |
14 | -set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) | |
15 | -set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) |
morfeusz/Toolchain-darwin64.cmake
1 | 1 | |
2 | -SET(CMAKE_SYSTEM_NAME Darwin) | |
2 | +set (DARWIN64_ROOT /mnt/storage/crossmorfeusz/darwin64) | |
3 | + | |
4 | +set (CMAKE_SYSTEM_NAME Darwin) | |
3 | 5 | set (CMAKE_SYSTEM_VERSION 1) |
4 | -set (CMAKE_C_COMPILER /usr/x86_64-apple-darwin9/bin/x86_64-apple-darwin9-gcc) | |
5 | -set (CMAKE_CXX_COMPILER /usr/x86_64-apple-darwin9/bin/x86_64-apple-darwin9-g++) | |
6 | +set (CMAKE_C_COMPILER ${DARWIN64_ROOT}/x86_64-apple-darwin9/bin/x86_64-apple-darwin9-gcc) | |
7 | +set (CMAKE_CXX_COMPILER ${DARWIN64_ROOT}/x86_64-apple-darwin9/bin/x86_64-apple-darwin9-g++) | |
8 | + | |
9 | +set (JAVA_ROOT ${DARWIN64_ROOT}/JavaVM.framework) | |
10 | +set (PYTHON_ROOT ${DARWIN64_ROOT}/Python.framework/Versions/2.7) | |
6 | 11 | |
7 | 12 | # here is the target environment located |
8 | -set (CMAKE_FIND_ROOT_PATH /usr/x86_64-apple-darwin9 /mnt/storage/JavaVM.framework) | |
13 | +set (CMAKE_FIND_ROOT_PATH ${DARWIN64_ROOT}/x86_64-apple-darwin9 ${JAVA_ROOT} ${PYTHON_ROOT}) | |
9 | 14 | |
10 | 15 | # adjust the default behaviour of the FIND_XXX() commands: |
11 | 16 | # search headers and libraries in the target environment, search |
... | ... | @@ -14,6 +19,6 @@ set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) |
14 | 19 | set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) |
15 | 20 | set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) |
16 | 21 | |
17 | -set (JAVA_JVM_LIBRARY /mnt/storage/JavaVM.framework) | |
18 | -set (JAVA_AWT_LIBRARY /mnt/storage/JavaVM.framework) | |
19 | -set (JAVA_INCLUDE_PATH /mnt/storage/JavaVM.framework/Headers) | |
22 | +set (JAVA_JVM_LIBRARY ${JAVA_ROOT}) | |
23 | +set (JAVA_AWT_LIBRARY ${JAVA_ROOT}) | |
24 | +set (JAVA_INCLUDE_PATH ${JAVA_ROOT}/Headers) | |
... | ... |
morfeusz/Toolchain-linux32.cmake
... | ... | @@ -3,3 +3,19 @@ SET(CMAKE_SYSTEM_NAME Linux) |
3 | 3 | SET(CMAKE_SYSTEM_VERSION 1) |
4 | 4 | SET(CMAKE_C_COMPILER gcc -m32) |
5 | 5 | SET(CMAKE_CXX_COMPILER g++ -m32) |
6 | + | |
7 | +set (JAVA_ROOT /mnt/storage/crossmorfeusz/linux32/jdk1.7.0_45) | |
8 | +set (PYTHON_ROOT /mnt/storage/crossmorfeusz/linux32/python) | |
9 | + | |
10 | +# here is the target environment located | |
11 | +set (CMAKE_FIND_ROOT_PATH ${JAVA_ROOT} ${PYTHON_ROOT}) | |
12 | + | |
13 | +# adjust the default behaviour of the FIND_XXX() commands: | |
14 | +# search headers and libraries in the target environment, search | |
15 | +# programs in the host environment | |
16 | +set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH) | |
17 | +set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) | |
18 | +set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) | |
19 | + | |
20 | +set (JAVA_INCLUDE_PATH ${JAVA_ROOT}/include) | |
21 | +set (JAVA_INCLUDE_PATH2 ${JAVA_ROOT}/include/linux) | |
6 | 22 | \ No newline at end of file |
... | ... |
morfeusz/Toolchain-linux64.cmake deleted
morfeusz/Toolchain-win32.cmake
... | ... | @@ -13,4 +13,4 @@ SET(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32 ) |
13 | 13 | # programs in the host environment |
14 | 14 | set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) |
15 | 15 | set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) |
16 | -set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) | |
17 | 16 | \ No newline at end of file |
17 | +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) | |
... | ... |
morfeusz/Toolchain-win64.cmake
1 | 1 | |
2 | -SET(CMAKE_SYSTEM_NAME Windows) | |
3 | -SET(CMAKE_SYSTEM_VERSION 1) | |
4 | -SET(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc) | |
5 | -SET(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++) | |
6 | -SET(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres) | |
2 | +set (CMAKE_SYSTEM_NAME Windows) | |
3 | +set (CMAKE_SYSTEM_VERSION 1) | |
4 | +set (CMAKE_C_COMPILER x86_64-w64-mingw32-gcc) | |
5 | +set (CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++) | |
6 | +set (CMAKE_RC_COMPILER x86_64-w64-mingw32-windres) | |
7 | 7 | |
8 | 8 | # here is the target environment located |
9 | -SET(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32 /home/lennyn/opt/windupa/Java/jdk1.7.0_45 /opt/windupa/Java/jre7 /home/lennyn/opt/windupa/Python27 /home/lennyn/opt/swig) | |
9 | +set (WIN64_ROOT /mnt/storage/crossmorfeusz/windows64) | |
10 | +set (PYTHON_ROOT ${WIN64_ROOT}/Python27) | |
11 | +set (JAVA_ROOT ${WIN64_ROOT}/Java/jdk1.7.0_45) | |
12 | +set (CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32 ${JAVA_ROOT} ${PYTHON_ROOT}) | |
13 | +set (CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} ${PYTHON_ROOT}/libs) | |
10 | 14 | |
11 | 15 | # adjust the default behaviour of the FIND_XXX() commands: |
12 | 16 | # search headers and libraries in the target environment, search |
... | ... |
morfeusz/charset/CharsetConverter.cpp
... | ... | @@ -21,6 +21,10 @@ string CharsetConverter::toString(const vector<uint32_t>& codepoints) const { |
21 | 21 | return res; |
22 | 22 | } |
23 | 23 | |
24 | +CharsetConverter::~CharsetConverter() { | |
25 | + | |
26 | +} | |
27 | + | |
24 | 28 | uint32_t UTF8CharsetConverter::peek(const char*& it, const char* end) const { |
25 | 29 | return utf8::peek_next(it, end); |
26 | 30 | } |
... | ... |
morfeusz/charset/CharsetConverter.hpp
... | ... | @@ -21,6 +21,8 @@ public: |
21 | 21 | virtual std::string fromUTF8(const std::string& input) const; |
22 | 22 | |
23 | 23 | std::string toString(const std::vector<uint32_t>& codepoints) const; |
24 | + | |
25 | + virtual ~CharsetConverter(); | |
24 | 26 | private: |
25 | 27 | }; |
26 | 28 | |
... | ... | @@ -33,26 +35,6 @@ public: |
33 | 35 | private: |
34 | 36 | }; |
35 | 37 | |
36 | -//class UTF16CharsetConverter : public CharsetConverter { | |
37 | -//public: | |
38 | -// enum Endianness { LE, BE }; | |
39 | -// explicit UTF16CharsetConverter(UTF16CharsetConverter::Endianness endianness); | |
40 | -// uint32_t peek(const char*& it, const char* end) const; | |
41 | -// uint32_t next(const char*& it, const char* end) const; | |
42 | -// void append(uint32_t cp, std::string& result) const; | |
43 | -//private: | |
44 | -// uint16_t convertEndianness(uint16_t cp) const; | |
45 | -// UTF16CharsetConverter::Endianness endianness; | |
46 | -//}; | |
47 | -// | |
48 | -//class UTF32CharsetConverter : public CharsetConverter { | |
49 | -//public: | |
50 | -// uint32_t peek(const char*& it, const char* end) const; | |
51 | -// uint32_t next(const char*& it, const char* end) const; | |
52 | -// void append(uint32_t cp, std::string& result) const; | |
53 | -//private: | |
54 | -//}; | |
55 | - | |
56 | 38 | /* |
57 | 39 | * Converter that uses a simple conversion table |
58 | 40 | */ |
... | ... |
morfeusz/fsa/cfsa1_impl.hpp
... | ... | @@ -14,8 +14,7 @@ |
14 | 14 | |
15 | 15 | using namespace std; |
16 | 16 | |
17 | -#pragma pack(push) /* push current alignment to stack */ | |
18 | -#pragma pack(1) /* set alignment to 1 byte boundary */ | |
17 | +#pragma pack(push, 1) /* push current alignment to stack */ | |
19 | 18 | |
20 | 19 | struct StateData2 { |
21 | 20 | unsigned transitionsNum: 6; |
... | ... |
morfeusz/fsa/fsa.hpp
... | ... | @@ -31,6 +31,7 @@ public: |
31 | 31 | * Returns number of bytes read or -1 on error. |
32 | 32 | */ |
33 | 33 | virtual long deserialize(const unsigned char* ptr, T& object) const = 0; |
34 | + virtual ~Deserializer() {} | |
34 | 35 | }; |
35 | 36 | |
36 | 37 | class StringDeserializer : public Deserializer<char*> { |
... | ... | @@ -48,6 +49,8 @@ public: |
48 | 49 | return strlen(text) + 1; |
49 | 50 | // return 1; |
50 | 51 | } |
52 | + | |
53 | + virtual ~StringDeserializer() {} | |
51 | 54 | }; |
52 | 55 | |
53 | 56 | //class Counter { |
... | ... |
morfeusz/fsa/simplefsa_impl.hpp
... | ... | @@ -8,8 +8,7 @@ |
8 | 8 | #ifndef SIMPLEFSA_IMPL_HPP |
9 | 9 | #define SIMPLEFSA_IMPL_HPP |
10 | 10 | |
11 | -#pragma pack(push) /* push current alignment to stack */ | |
12 | -#pragma pack(1) /* set alignment to 1 byte boundary */ | |
11 | +#pragma pack(push, 1) /* push current alignment to stack */ | |
13 | 12 | |
14 | 13 | struct StateData { |
15 | 14 | unsigned transitionsNum : 7; |
... | ... | @@ -44,7 +43,7 @@ void SimpleFSA<T>::proceedToNext(const char c, State<T>& state) const { |
44 | 43 | StateData stateData = *reinterpret_cast<const StateData*>(fromPointer); |
45 | 44 | const unsigned char* foundTransition = fromPointer + transitionsTableOffset; |
46 | 45 | bool found = false; |
47 | - for (int i = 0; i < stateData.transitionsNum; i++, foundTransition += 4) { | |
46 | + for (unsigned int i = 0; i < stateData.transitionsNum; i++, foundTransition += 4) { | |
48 | 47 | if ((char) *foundTransition == c) { |
49 | 48 | found = true; |
50 | 49 | break; |
... | ... |
morfeusz/java/CMakeLists.txt
... | ... | @@ -13,7 +13,7 @@ INCLUDE_DIRECTORIES(..) |
13 | 13 | set (CMAKE_SWIG_FLAGS -package pl.waw.ipipan.morfeusz) |
14 | 14 | set (CMAKE_SWIG_OUTDIR ${CMAKE_SOURCE_DIR}/jmorfeusz/src/main/java/pl/waw/ipipan/morfeusz) |
15 | 15 | |
16 | -SET_SOURCE_FILES_PROPERTIES(../morfeusz.i PROPERTIES CPLUSPLUS ON) | |
16 | +set_source_files_properties (../morfeusz.i PROPERTIES CPLUSPLUS ON) | |
17 | 17 | SWIG_ADD_MODULE(jmorfeusz java ../morfeusz.i) |
18 | 18 | SWIG_LINK_LIBRARIES(jmorfeusz ${JAVA_LIBRARIES}) |
19 | 19 | SWIG_LINK_LIBRARIES(jmorfeusz libmorfeusz) |
... | ... |
morfeusz/test_result_equals.cpp
... | ... | @@ -27,7 +27,7 @@ static MorfeuszCharset getEncoding(const string& encodingStr) { |
27 | 27 | return CP852; |
28 | 28 | else { |
29 | 29 | cerr << "Invalid encoding: " << encodingStr << " must be one of: UTF8, ISO8859_2, WINDOWS1250" << endl; |
30 | - assert(false); | |
30 | + throw "Invalid encoding"; | |
31 | 31 | } |
32 | 32 | } |
33 | 33 | |
... | ... |
test-darwin.sh
0 → 100644