Commit e147486dfdda22b5c87c21982022032ab4232d5e

Authored by Michał Lenart
1 parent a2756826

- poprawki warningów itp.

- prace nad cross-kompilacją

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@63 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
  1 +
  2 +***************
  3 +Compilation - prerequisites
  4 +***************
  5 +
  6 +This tutorial assumes that build process is performed on Linux 64bit machine (preferably from Debian/Ubuntu family).
  7 +
  8 +sudo apt-get install build-essential autotools
  9 +
1 10 For cross compiling:
  11 +--------------------
2 12  
3 13 sudo apt-get install g++-multilib g++-mingw-w64
4 14  
5 15 For Java and Python bindings:
  16 +-----------------------------
  17 +sudo apt-get install default-jdk python swig
  18 +
  19 +Java and Python bindings with cross-compilation support:
  20 +--------------------------------------------------------
  21 +Download CMake 2.8.x (tested on 2.8.12.1).
  22 +Apply cmake-2.8.12..1-patch.diff on the sources.
  23 +Build and install CMake from patched sources.
  24 +
  25 +Java and Python bindings without cross-compilation support:
  26 +-----------------------------------------------------------
  27 +sudo apt-get install cmake
  28 +
  29 +***************
  30 +Compilation
  31 +***************
  32 +Create separate build directory in morfeusz root, run cmake and make in it:
  33 +
  34 +mkdir build
  35 +cd build
  36 +cmake ..
  37 +make
  38 +
  39 +**************************
  40 +Notes on cross-compilation
  41 +**************************
  42 +
  43 +Create separate build directory, for example build-darwin.
  44 +
  45 +Run:
  46 +cmake -DCMAKE_TOOLCHAIN_FILE=../morfeusz/Toolchain-xxx.cmake ..
  47 +make
  48 +
  49 +
  50 +Copy Java JDK and Python libraries from target platform. Set JAVA_ROOT and PYTHON_ROOT in Toolchain files to match directories copied from target platform.
  51 +
  52 +[Mac OS X 64bit] Libraries are at /System/Library/Frameworks/JavaVM.framework and /System/Library/Frameworks/Python.framework
  53 +
  54 +[Windows 32 and 64 bit] usually at C:\Program Files\Java\jdk* and C:\Python27. You must also copy python27.dll from C:\Windows\System32 to Python27/libs directory.
  55 +
  56 +[Linux 32bit] Java is usually at /usr/lib/jvm/default-java (alternatively you can download a package from Oracle website). The preferred way of obtaining Python is to compile it:
  57 +- compile latest version using ./configure --prefix=/some/directory/python
  58 +- copy contents of /some/directory/python to the build machine and set it as PYTHON_ROOT in Toolchain file
6 59  
7   -sudo apt-get install swig default-jdk python
8 60  
9 61 *************
10 62 Mac OS X
... ... @@ -15,7 +67,7 @@ Mac OS X
15 67 # install brew
16 68 ruby -e "$(curl -fsSL https://raw.github.com/mxcl/homebrew/go/install)"
17 69  
18   -# install svn
  70 +# install some other stuff
19 71 brew install svn
20 72 brew install swig
21 73 brew install make
... ... @@ -35,19 +87,4 @@ brew doctor
35 87 Windows
36 88 *************
37 89  
38   -Install MinGW and make sure it is included in Path system variable (when installed at C:\MinGW then C:\MinGW\bin must be in the Path variable).
39   -Install CMake.
40   -Download SWIG. Unpack the .zip contents somewhere (for example C:\swigwin-VERSION)
41   -Add the unpacked SWIG directory to the Path variable.
42   -Install latest Java JDK.
43   -Go to morfeusz sources root.
44   -mkdir build
45   -cd build
46   -cmake
47   -
48   -CROSS COMPILE
49   --------------
50   -
51   -Make your own CMake from sources using this patch: http://cmake.org/gitweb?p=cmake.git;a=commitdiff;h=19b7a54d
52   -(or use version > 3.0)
53   -Add Java and Python libraries to the Toolchain file. Copy python27.dll from C:\Windows\System32 to some shared libs directory.
54 90 \ No newline at end of file
  91 +Better cross-compile on Linux...
... ...
build-darwin.tar.gz 0 → 100644
No preview for this file type
jmorfeusz/src/main/java/pl/waw/ipipan/morfeusz/app/App.java 0 → 100644
  1 +package pl.waw.ipipan.morfeusz.app;
  2 +
  3 +import pl.waw.ipipan.morfeusz.Morfeusz;
  4 +import pl.waw.ipipan.morfeusz.MorphInterpretation;
  5 +import pl.waw.ipipan.morfeusz.ResultsIterator;
  6 +
  7 +/**
  8 + *
  9 + * @author mlenart
  10 + */
  11 +public class App {
  12 +
  13 + static {
  14 + System.loadLibrary("morfeusz");
  15 + System.err.println("LOADED MORFEUSZ");
  16 + System.loadLibrary("jmorfeusz");
  17 + }
  18 +
  19 + /**
  20 + * @param args the command line arguments
  21 + */
  22 + public static void main(String[] args) {
  23 + Morfeusz m = new Morfeusz();
  24 + ResultsIterator resIt = m.analyze("Ala ma kota żółć.");
  25 + while (resIt.hasNext()) {
  26 + MorphInterpretation mi = resIt.getNext();
  27 + System.out.printf(
  28 + "%d %d %s %s %s %s%n",
  29 + mi.getStartNode(), mi.getEndNode(),
  30 + mi.getOrth(), mi.getLemma(),
  31 + mi.getTag(), mi.getName());
  32 + }
  33 + }
  34 +
  35 +}
... ...
jmorfeusz/src/main/native/libjmorfeusz.dll
No preview for this file type
jmorfeusz/src/main/native/libjmorfeusz.so
No preview for this file type
morfeusz/MorphDeserializer.cpp
... ... @@ -20,9 +20,6 @@ const unsigned int MAX_WORD_SIZE = 256;
20 20 MorphDeserializer::MorphDeserializer() {
21 21 }
22 22  
23   -MorphDeserializer::MorphDeserializer(const MorphDeserializer& orig) {
24   -}
25   -
26 23 MorphDeserializer::~MorphDeserializer() {
27 24 }
28 25  
... ...
morfeusz/MorphDeserializer.hpp
... ... @@ -15,7 +15,6 @@
15 15 class MorphDeserializer: public Deserializer< std::vector<InterpsGroup> > {
16 16 public:
17 17 MorphDeserializer();
18   - MorphDeserializer(const MorphDeserializer& orig);
19 18 virtual ~MorphDeserializer();
20 19 long deserialize(
21 20 const unsigned char* ptr,
... ...
morfeusz/Toolchain-darwin32.cmake deleted
1   -
2   -SET(CMAKE_SYSTEM_NAME Darwin)
3   -SET(CMAKE_SYSTEM_VERSION 9)
4   -SET(CMAKE_C_COMPILER /mnt/storage/usr/i686-apple-darwin9/i686-apple-darwin9-gcc)
5   -SET(CMAKE_CXX_COMPILER /mnt/storage/usr/i686-apple-darwin9/i686-apple-darwin9-g++)
6   -
7   -# here is the target environment located
8   -SET(CMAKE_FIND_ROOT_PATH /mnt/storage/usr/i686-apple-darwin9)
9   -
10   -# adjust the default behaviour of the FIND_XXX() commands:
11   -# search headers and libraries in the target environment, search
12   -# programs in the host environment
13   -set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
14   -set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
15   -set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
morfeusz/Toolchain-darwin64.cmake
1 1  
2   -SET(CMAKE_SYSTEM_NAME Darwin)
  2 +set (DARWIN64_ROOT /mnt/storage/crossmorfeusz/darwin64)
  3 +
  4 +set (CMAKE_SYSTEM_NAME Darwin)
3 5 set (CMAKE_SYSTEM_VERSION 1)
4   -set (CMAKE_C_COMPILER /usr/x86_64-apple-darwin9/bin/x86_64-apple-darwin9-gcc)
5   -set (CMAKE_CXX_COMPILER /usr/x86_64-apple-darwin9/bin/x86_64-apple-darwin9-g++)
  6 +set (CMAKE_C_COMPILER ${DARWIN64_ROOT}/x86_64-apple-darwin9/bin/x86_64-apple-darwin9-gcc)
  7 +set (CMAKE_CXX_COMPILER ${DARWIN64_ROOT}/x86_64-apple-darwin9/bin/x86_64-apple-darwin9-g++)
  8 +
  9 +set (JAVA_ROOT ${DARWIN64_ROOT}/JavaVM.framework)
  10 +set (PYTHON_ROOT ${DARWIN64_ROOT}/Python.framework/Versions/2.7)
6 11  
7 12 # here is the target environment located
8   -set (CMAKE_FIND_ROOT_PATH /usr/x86_64-apple-darwin9 /mnt/storage/JavaVM.framework)
  13 +set (CMAKE_FIND_ROOT_PATH ${DARWIN64_ROOT}/x86_64-apple-darwin9 ${JAVA_ROOT} ${PYTHON_ROOT})
9 14  
10 15 # adjust the default behaviour of the FIND_XXX() commands:
11 16 # search headers and libraries in the target environment, search
... ... @@ -14,6 +19,6 @@ set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
14 19 set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
15 20 set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
16 21  
17   -set (JAVA_JVM_LIBRARY /mnt/storage/JavaVM.framework)
18   -set (JAVA_AWT_LIBRARY /mnt/storage/JavaVM.framework)
19   -set (JAVA_INCLUDE_PATH /mnt/storage/JavaVM.framework/Headers)
  22 +set (JAVA_JVM_LIBRARY ${JAVA_ROOT})
  23 +set (JAVA_AWT_LIBRARY ${JAVA_ROOT})
  24 +set (JAVA_INCLUDE_PATH ${JAVA_ROOT}/Headers)
... ...
morfeusz/Toolchain-linux32.cmake
... ... @@ -3,3 +3,19 @@ SET(CMAKE_SYSTEM_NAME Linux)
3 3 SET(CMAKE_SYSTEM_VERSION 1)
4 4 SET(CMAKE_C_COMPILER gcc -m32)
5 5 SET(CMAKE_CXX_COMPILER g++ -m32)
  6 +
  7 +set (JAVA_ROOT /mnt/storage/crossmorfeusz/linux32/jdk1.7.0_45)
  8 +set (PYTHON_ROOT /mnt/storage/crossmorfeusz/linux32/python)
  9 +
  10 +# here is the target environment located
  11 +set (CMAKE_FIND_ROOT_PATH ${JAVA_ROOT} ${PYTHON_ROOT})
  12 +
  13 +# adjust the default behaviour of the FIND_XXX() commands:
  14 +# search headers and libraries in the target environment, search
  15 +# programs in the host environment
  16 +set (CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH)
  17 +set (CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
  18 +set (CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
  19 +
  20 +set (JAVA_INCLUDE_PATH ${JAVA_ROOT}/include)
  21 +set (JAVA_INCLUDE_PATH2 ${JAVA_ROOT}/include/linux)
6 22 \ No newline at end of file
... ...
morfeusz/Toolchain-linux64.cmake deleted
1   -
2   -SET(CMAKE_SYSTEM_NAME Linux)
3   -SET(CMAKE_SYSTEM_VERSION 1)
4   -SET(CMAKE_C_COMPILER gcc -m64)
5   -SET(CMAKE_CXX_COMPILER g++ -m64)
morfeusz/Toolchain-win32.cmake
... ... @@ -13,4 +13,4 @@ SET(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32 )
13 13 # programs in the host environment
14 14 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
15 15 set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
16   -set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
17 16 \ No newline at end of file
  17 +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
... ...
morfeusz/Toolchain-win64.cmake
1 1  
2   -SET(CMAKE_SYSTEM_NAME Windows)
3   -SET(CMAKE_SYSTEM_VERSION 1)
4   -SET(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)
5   -SET(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)
6   -SET(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)
  2 +set (CMAKE_SYSTEM_NAME Windows)
  3 +set (CMAKE_SYSTEM_VERSION 1)
  4 +set (CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)
  5 +set (CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)
  6 +set (CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)
7 7  
8 8 # here is the target environment located
9   -SET(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32 /home/lennyn/opt/windupa/Java/jdk1.7.0_45 /opt/windupa/Java/jre7 /home/lennyn/opt/windupa/Python27 /home/lennyn/opt/swig)
  9 +set (WIN64_ROOT /mnt/storage/crossmorfeusz/windows64)
  10 +set (PYTHON_ROOT ${WIN64_ROOT}/Python27)
  11 +set (JAVA_ROOT ${WIN64_ROOT}/Java/jdk1.7.0_45)
  12 +set (CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32 ${JAVA_ROOT} ${PYTHON_ROOT})
  13 +set (CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH} ${PYTHON_ROOT}/libs)
10 14  
11 15 # adjust the default behaviour of the FIND_XXX() commands:
12 16 # search headers and libraries in the target environment, search
... ...
morfeusz/charset/CharsetConverter.cpp
... ... @@ -21,6 +21,10 @@ string CharsetConverter::toString(const vector&lt;uint32_t&gt;&amp; codepoints) const {
21 21 return res;
22 22 }
23 23  
  24 +CharsetConverter::~CharsetConverter() {
  25 +
  26 +}
  27 +
24 28 uint32_t UTF8CharsetConverter::peek(const char*& it, const char* end) const {
25 29 return utf8::peek_next(it, end);
26 30 }
... ...
morfeusz/charset/CharsetConverter.hpp
... ... @@ -21,6 +21,8 @@ public:
21 21 virtual std::string fromUTF8(const std::string& input) const;
22 22  
23 23 std::string toString(const std::vector<uint32_t>& codepoints) const;
  24 +
  25 + virtual ~CharsetConverter();
24 26 private:
25 27 };
26 28  
... ... @@ -33,26 +35,6 @@ public:
33 35 private:
34 36 };
35 37  
36   -//class UTF16CharsetConverter : public CharsetConverter {
37   -//public:
38   -// enum Endianness { LE, BE };
39   -// explicit UTF16CharsetConverter(UTF16CharsetConverter::Endianness endianness);
40   -// uint32_t peek(const char*& it, const char* end) const;
41   -// uint32_t next(const char*& it, const char* end) const;
42   -// void append(uint32_t cp, std::string& result) const;
43   -//private:
44   -// uint16_t convertEndianness(uint16_t cp) const;
45   -// UTF16CharsetConverter::Endianness endianness;
46   -//};
47   -//
48   -//class UTF32CharsetConverter : public CharsetConverter {
49   -//public:
50   -// uint32_t peek(const char*& it, const char* end) const;
51   -// uint32_t next(const char*& it, const char* end) const;
52   -// void append(uint32_t cp, std::string& result) const;
53   -//private:
54   -//};
55   -
56 38 /*
57 39 * Converter that uses a simple conversion table
58 40 */
... ...
morfeusz/fsa/cfsa1_impl.hpp
... ... @@ -14,8 +14,7 @@
14 14  
15 15 using namespace std;
16 16  
17   -#pragma pack(push) /* push current alignment to stack */
18   -#pragma pack(1) /* set alignment to 1 byte boundary */
  17 +#pragma pack(push, 1) /* push current alignment to stack */
19 18  
20 19 struct StateData2 {
21 20 unsigned transitionsNum: 6;
... ...
morfeusz/fsa/fsa.hpp
... ... @@ -31,6 +31,7 @@ public:
31 31 * Returns number of bytes read or -1 on error.
32 32 */
33 33 virtual long deserialize(const unsigned char* ptr, T& object) const = 0;
  34 + virtual ~Deserializer() {}
34 35 };
35 36  
36 37 class StringDeserializer : public Deserializer<char*> {
... ... @@ -48,6 +49,8 @@ public:
48 49 return strlen(text) + 1;
49 50 // return 1;
50 51 }
  52 +
  53 + virtual ~StringDeserializer() {}
51 54 };
52 55  
53 56 //class Counter {
... ...
morfeusz/fsa/simplefsa_impl.hpp
... ... @@ -8,8 +8,7 @@
8 8 #ifndef SIMPLEFSA_IMPL_HPP
9 9 #define SIMPLEFSA_IMPL_HPP
10 10  
11   -#pragma pack(push) /* push current alignment to stack */
12   -#pragma pack(1) /* set alignment to 1 byte boundary */
  11 +#pragma pack(push, 1) /* push current alignment to stack */
13 12  
14 13 struct StateData {
15 14 unsigned transitionsNum : 7;
... ... @@ -44,7 +43,7 @@ void SimpleFSA&lt;T&gt;::proceedToNext(const char c, State&lt;T&gt;&amp; state) const {
44 43 StateData stateData = *reinterpret_cast<const StateData*>(fromPointer);
45 44 const unsigned char* foundTransition = fromPointer + transitionsTableOffset;
46 45 bool found = false;
47   - for (int i = 0; i < stateData.transitionsNum; i++, foundTransition += 4) {
  46 + for (unsigned int i = 0; i < stateData.transitionsNum; i++, foundTransition += 4) {
48 47 if ((char) *foundTransition == c) {
49 48 found = true;
50 49 break;
... ...
morfeusz/java/CMakeLists.txt
... ... @@ -13,7 +13,7 @@ INCLUDE_DIRECTORIES(..)
13 13 set (CMAKE_SWIG_FLAGS -package pl.waw.ipipan.morfeusz)
14 14 set (CMAKE_SWIG_OUTDIR ${CMAKE_SOURCE_DIR}/jmorfeusz/src/main/java/pl/waw/ipipan/morfeusz)
15 15  
16   -SET_SOURCE_FILES_PROPERTIES(../morfeusz.i PROPERTIES CPLUSPLUS ON)
  16 +set_source_files_properties (../morfeusz.i PROPERTIES CPLUSPLUS ON)
17 17 SWIG_ADD_MODULE(jmorfeusz java ../morfeusz.i)
18 18 SWIG_LINK_LIBRARIES(jmorfeusz ${JAVA_LIBRARIES})
19 19 SWIG_LINK_LIBRARIES(jmorfeusz libmorfeusz)
... ...
morfeusz/test_result_equals.cpp
... ... @@ -27,7 +27,7 @@ static MorfeuszCharset getEncoding(const string&amp; encodingStr) {
27 27 return CP852;
28 28 else {
29 29 cerr << "Invalid encoding: " << encodingStr << " must be one of: UTF8, ISO8859_2, WINDOWS1250" << endl;
30   - assert(false);
  30 + throw "Invalid encoding";
31 31 }
32 32 }
33 33  
... ...
test-darwin.sh 0 → 100644
  1 +#!/bin/bash
  2 +
  3 +set -exo pipefail
  4 +
  5 +cd morfeusz
  6 +echo 'Ala ma kota i żółć' | DYLD_LIBRARY_PATH=. ./morfeusz
  7 +cd ../jmorfeusz
  8 +DYLD_LIBRARY_PATH=../morfeusz:. java -jar *.jar
  9 +cd ../pymorfeusz
  10 +DYLD_LIBRARY_PATH=../morfeusz:. python test.py
  11 +cd ..
... ...