Commit a1d73c9e33b179bc6d3765aed2bb1cf812903764

Authored by Michał Lenart
1 parent 23e8055f

- poprawienie buga w generatorze (zawsze się wywalał)

- sprawienie by buildAll.sh tylko raz budował automaty

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@209 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
CMakeLists.txt
@@ -40,6 +40,12 @@ endif () @@ -40,6 +40,12 @@ endif ()
40 # INPUT_DICTIONARY_CPP 40 # INPUT_DICTIONARY_CPP
41 set (INPUT_DICTIONARY_CPP "default_fsa.cpp") 41 set (INPUT_DICTIONARY_CPP "default_fsa.cpp")
42 set (INPUT_SYNTH_DICTIONARY_CPP "default_synth_fsa.cpp") 42 set (INPUT_SYNTH_DICTIONARY_CPP "default_synth_fsa.cpp")
  43 +
  44 +if (CPP_DICTIONARIES_DIR)
  45 + set (INPUT_DICTIONARY_CPP "${CPP_DICTIONARIES_DIR}/${INPUT_DICTIONARY_CPP}")
  46 + set (INPUT_SYNTH_DICTIONARY_CPP "${CPP_DICTIONARIES_DIR}/${INPUT_SYNTH_DICTIONARY_CPP}")
  47 +endif ()
  48 +
43 if ("${INPUT_DICTIONARIES}" STREQUAL "") 49 if ("${INPUT_DICTIONARIES}" STREQUAL "")
44 if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE") 50 if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE")
45 set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt) 51 set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt)
@@ -84,6 +84,8 @@ INPUT_TAGSET - tagset file @@ -84,6 +84,8 @@ INPUT_TAGSET - tagset file
84 INPUT_DICTIONARIES - comma-separated list of dictionary files (can use empty one from input/empty.txt) 84 INPUT_DICTIONARIES - comma-separated list of dictionary files (can use empty one from input/empty.txt)
85 SEGMENT_RULES_FILE - segmentation rules file 85 SEGMENT_RULES_FILE - segmentation rules file
86 TARGET_DIR - a dir where packages built by CMake are put in 86 TARGET_DIR - a dir where packages built by CMake are put in
  87 +CPP_DICTIONARIES_DIR - a dir where analyzer and generator embedded C++ dictionary files a read/written to
  88 +SKIP_DICTIONARY_BUILDING - if set, the dictionary-building part of the build process is skipped
87 89
88 Execute like following: 90 Execute like following:
89 cmake -D INPUT_TAGSET=<file1> -D INPUT_DICTIONARIES="<dict_file1>,<dict_file2>" .. 91 cmake -D INPUT_TAGSET=<file1> -D INPUT_DICTIONARIES="<dict_file1>,<dict_file2>" ..
buildAll.sh
@@ -5,6 +5,19 @@ set -e -o pipefail @@ -5,6 +5,19 @@ set -e -o pipefail
5 export CROSSMORFEUSZ_ROOT="$1" 5 export CROSSMORFEUSZ_ROOT="$1"
6 export INPUT_DICTIONARIES="$2" 6 export INPUT_DICTIONARIES="$2"
7 export VERSION_SUFFIX="$3" 7 export VERSION_SUFFIX="$3"
  8 +export CPP_DICTIONARIES_DIR=`mktemp -d`
  9 +
  10 +function buildDictionaries {
  11 + buildDir=`mktemp -d`
  12 + srcDir=`pwd`
  13 + cd $buildDir
  14 + cmake -D INPUT_DICTIONARIES=$INPUT_DICTIONARIES \
  15 + -D CPP_DICTIONARIES_DIR=$CPP_DICTIONARIES_DIR \
  16 + $srcDir
  17 + make -j2 dictionaries
  18 + cd $srcDir
  19 + rm -r $buildDir
  20 +}
8 21
9 function build { 22 function build {
10 set -e -o pipefail 23 set -e -o pipefail
@@ -31,6 +44,8 @@ function build { @@ -31,6 +44,8 @@ function build {
31 -D TARGET_DIR=$targetDir \ 44 -D TARGET_DIR=$targetDir \
32 -D INPUT_DICTIONARIES=$INPUT_DICTIONARIES \ 45 -D INPUT_DICTIONARIES=$INPUT_DICTIONARIES \
33 -D VERSION_SUFFIX=$VERSION_SUFFIX \ 46 -D VERSION_SUFFIX=$VERSION_SUFFIX \
  47 + -D CPP_DICTIONARIES_DIR=$CPP_DICTIONARIES_DIR \
  48 + -D SKIP_DICTIONARY_BUILDING=1 \
34 $srcDir 2>&1 49 $srcDir 2>&1
35 echo "building $toolchain" >&2 50 echo "building $toolchain" >&2
36 make 51 make
@@ -56,11 +71,14 @@ export -f log @@ -56,11 +71,14 @@ export -f log
56 rm -rf log 71 rm -rf log
57 mkdir -p log 72 mkdir -p log
58 73
59 -parallel -j5 bash -c -- \  
60 - "build Linux amd64 package package-java package-python package-builder 2>&1 | log Linux amd64" \  
61 - "LDFLAGS=-m32;CFLAGS=-m32;CXXFLAGS=-m32 build Linux i386 package package-java 2>&1 | log Linux i386" \  
62 - "build Windows amd64 package package-java 2>&1 | log Windows amd64" \  
63 - "build Windows i386 package package-java 2>&1 | log Windows i386" \  
64 - "build Darwin amd64 package package-java 2>&1 | log Darwin amd64" 74 +buildDictionaries 2>&1 | log All all
  75 +
  76 +{
  77 + echo "build Linux amd64 package package-java package-python package-builder 2>&1 | log Linux amd64"
  78 + echo "LDFLAGS=-m32;CFLAGS=-m32;CXXFLAGS=-m32 build Linux i386 package package-java 2>&1 | log Linux i386"
  79 + echo "build Windows amd64 package package-java 2>&1 | log Windows amd64"
  80 + echo "build Windows i386 package package-java 2>&1 | log Windows i386"
  81 + echo "build Darwin amd64 package package-java 2>&1 | log Darwin amd64"
  82 +} | xargs -n1 -P8 -d$'\n' bash -c
65 83
66 84
morfeusz/CMakeLists.txt
1 1
2 2
3 ########## generate default dictionary data ################# 3 ########## generate default dictionary data #################
4 -add_custom_command (  
5 - OUTPUT "${INPUT_DICTIONARY_CPP}"  
6 - COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --analyzer --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 --trim-supneg  
7 - DEPENDS "${INPUT_DICTIONARY}"  
8 - COMMENT "Building default dictionary C++ file"  
9 -)  
10 -add_custom_command (  
11 - OUTPUT "${INPUT_SYNTH_DICTIONARY_CPP}"  
12 - COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --generator --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_SYNTH_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1  
13 - DEPENDS "${INPUT_DICTIONARY}"  
14 - COMMENT "Building default dictionary C++ file"  
15 -) 4 +
  5 +if (SKIP_DICTIONARY_BUILDING)
  6 + message ("SKIPPING dictionary building")
  7 +else ()
  8 + add_custom_command (
  9 + OUTPUT "${INPUT_DICTIONARY_CPP}"
  10 + COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --analyzer --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1
  11 + DEPENDS "${INPUT_DICTIONARY}"
  12 + COMMENT "Building default dictionary C++ file"
  13 + )
  14 + add_custom_command (
  15 + OUTPUT "${INPUT_SYNTH_DICTIONARY_CPP}"
  16 + COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --generator --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_SYNTH_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1
  17 + DEPENDS "${INPUT_DICTIONARY}"
  18 + COMMENT "Building default dictionary C++ file"
  19 + )
  20 +endif()
  21 +
  22 +add_custom_target ( analyzer-dictionary DEPENDS "${INPUT_DICTIONARY_CPP}")
  23 +add_custom_target ( generator-dictionary DEPENDS "${INPUT_SYNTH_DICTIONARY_CPP}")
  24 +add_custom_target ( dictionaries DEPENDS analyzer-dictionary generator-dictionary)
16 25
17 include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ) 26 include_directories( ${CMAKE_CURRENT_SOURCE_DIR} )
18 27
@@ -68,6 +77,7 @@ set(INCLUDE_FILES @@ -68,6 +77,7 @@ set(INCLUDE_FILES
68 77
69 add_library (libmorfeusz SHARED ${SRC_FILES}) 78 add_library (libmorfeusz SHARED ${SRC_FILES})
70 set_source_files_properties ( SOURCE "${INPUT_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE) 79 set_source_files_properties ( SOURCE "${INPUT_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE)
  80 +set_source_files_properties ( SOURCE "${INPUT_SYNTH_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE)
71 set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz2") 81 set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz2")
72 82
73 add_executable (morfeusz_analyzer morfeusz_analyzer.cpp) 83 add_executable (morfeusz_analyzer morfeusz_analyzer.cpp)
morfeusz/Morfeusz.cpp
@@ -335,7 +335,7 @@ void Morfeusz::generate(const string&amp; lemma, vector&lt;MorphInterpretation&gt;&amp; result @@ -335,7 +335,7 @@ void Morfeusz::generate(const string&amp; lemma, vector&lt;MorphInterpretation&gt;&amp; result
335 int startNode = 0; 335 int startNode = 0;
336 TextReader reader(input, inputEnd, this->generatorEnv); 336 TextReader reader(input, inputEnd, this->generatorEnv);
337 this->processOneWord(this->generatorEnv, reader, startNode, results); 337 this->processOneWord(this->generatorEnv, reader, startNode, results);
338 - if (input != inputEnd) { 338 + if (reader.getCurrPtr() != reader.getEndPtr()) {
339 throw MorfeuszException("Input contains more than one word"); 339 throw MorfeuszException("Input contains more than one word");
340 } 340 }
341 } 341 }
morfeusz/cli/cli.cpp
@@ -175,10 +175,3 @@ void initializeMorfeusz(ezOptionParser&amp; opt, Morfeusz&amp; morfeusz) { @@ -175,10 +175,3 @@ void initializeMorfeusz(ezOptionParser&amp; opt, Morfeusz&amp; morfeusz) {
175 morfeusz.setCharset(CP852); 175 morfeusz.setCharset(CP852);
176 #endif 176 #endif
177 } 177 }
178 -//  
179 -//Morfeusz getMorfeuszFromCLI(int argc, const char** argv, const std::string& titleText) {  
180 -// ezOptionParser opt = getOptions(argc, argv, titleText);  
181 -// Morfeusz morfeusz;  
182 -// initializeMorfeusz(opt, morfeusz);  
183 -// return morfeusz;  
184 -//}