Commit a1d73c9e33b179bc6d3765aed2bb1cf812903764

Authored by Michał Lenart
1 parent 23e8055f

- poprawienie buga w generatorze (zawsze się wywalał)

- sprawienie by buildAll.sh tylko raz budował automaty

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@209 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
CMakeLists.txt
... ... @@ -40,6 +40,12 @@ endif ()
40 40 # INPUT_DICTIONARY_CPP
41 41 set (INPUT_DICTIONARY_CPP "default_fsa.cpp")
42 42 set (INPUT_SYNTH_DICTIONARY_CPP "default_synth_fsa.cpp")
  43 +
  44 +if (CPP_DICTIONARIES_DIR)
  45 + set (INPUT_DICTIONARY_CPP "${CPP_DICTIONARIES_DIR}/${INPUT_DICTIONARY_CPP}")
  46 + set (INPUT_SYNTH_DICTIONARY_CPP "${CPP_DICTIONARIES_DIR}/${INPUT_SYNTH_DICTIONARY_CPP}")
  47 +endif ()
  48 +
43 49 if ("${INPUT_DICTIONARIES}" STREQUAL "")
44 50 if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE")
45 51 set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt)
... ...
... ... @@ -84,6 +84,8 @@ INPUT_TAGSET - tagset file
84 84 INPUT_DICTIONARIES - comma-separated list of dictionary files (can use empty one from input/empty.txt)
85 85 SEGMENT_RULES_FILE - segmentation rules file
86 86 TARGET_DIR - a dir where packages built by CMake are put in
  87 +CPP_DICTIONARIES_DIR - a dir where analyzer and generator embedded C++ dictionary files a read/written to
  88 +SKIP_DICTIONARY_BUILDING - if set, the dictionary-building part of the build process is skipped
87 89  
88 90 Execute like following:
89 91 cmake -D INPUT_TAGSET=<file1> -D INPUT_DICTIONARIES="<dict_file1>,<dict_file2>" ..
... ...
buildAll.sh
... ... @@ -5,6 +5,19 @@ set -e -o pipefail
5 5 export CROSSMORFEUSZ_ROOT="$1"
6 6 export INPUT_DICTIONARIES="$2"
7 7 export VERSION_SUFFIX="$3"
  8 +export CPP_DICTIONARIES_DIR=`mktemp -d`
  9 +
  10 +function buildDictionaries {
  11 + buildDir=`mktemp -d`
  12 + srcDir=`pwd`
  13 + cd $buildDir
  14 + cmake -D INPUT_DICTIONARIES=$INPUT_DICTIONARIES \
  15 + -D CPP_DICTIONARIES_DIR=$CPP_DICTIONARIES_DIR \
  16 + $srcDir
  17 + make -j2 dictionaries
  18 + cd $srcDir
  19 + rm -r $buildDir
  20 +}
8 21  
9 22 function build {
10 23 set -e -o pipefail
... ... @@ -31,6 +44,8 @@ function build {
31 44 -D TARGET_DIR=$targetDir \
32 45 -D INPUT_DICTIONARIES=$INPUT_DICTIONARIES \
33 46 -D VERSION_SUFFIX=$VERSION_SUFFIX \
  47 + -D CPP_DICTIONARIES_DIR=$CPP_DICTIONARIES_DIR \
  48 + -D SKIP_DICTIONARY_BUILDING=1 \
34 49 $srcDir 2>&1
35 50 echo "building $toolchain" >&2
36 51 make
... ... @@ -56,11 +71,14 @@ export -f log
56 71 rm -rf log
57 72 mkdir -p log
58 73  
59   -parallel -j5 bash -c -- \
60   - "build Linux amd64 package package-java package-python package-builder 2>&1 | log Linux amd64" \
61   - "LDFLAGS=-m32;CFLAGS=-m32;CXXFLAGS=-m32 build Linux i386 package package-java 2>&1 | log Linux i386" \
62   - "build Windows amd64 package package-java 2>&1 | log Windows amd64" \
63   - "build Windows i386 package package-java 2>&1 | log Windows i386" \
64   - "build Darwin amd64 package package-java 2>&1 | log Darwin amd64"
  74 +buildDictionaries 2>&1 | log All all
  75 +
  76 +{
  77 + echo "build Linux amd64 package package-java package-python package-builder 2>&1 | log Linux amd64"
  78 + echo "LDFLAGS=-m32;CFLAGS=-m32;CXXFLAGS=-m32 build Linux i386 package package-java 2>&1 | log Linux i386"
  79 + echo "build Windows amd64 package package-java 2>&1 | log Windows amd64"
  80 + echo "build Windows i386 package package-java 2>&1 | log Windows i386"
  81 + echo "build Darwin amd64 package package-java 2>&1 | log Darwin amd64"
  82 +} | xargs -n1 -P8 -d$'\n' bash -c
65 83  
66 84  
... ...
morfeusz/CMakeLists.txt
1 1  
2 2  
3 3 ########## generate default dictionary data #################
4   -add_custom_command (
5   - OUTPUT "${INPUT_DICTIONARY_CPP}"
6   - COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --analyzer --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 --trim-supneg
7   - DEPENDS "${INPUT_DICTIONARY}"
8   - COMMENT "Building default dictionary C++ file"
9   -)
10   -add_custom_command (
11   - OUTPUT "${INPUT_SYNTH_DICTIONARY_CPP}"
12   - COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --generator --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_SYNTH_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1
13   - DEPENDS "${INPUT_DICTIONARY}"
14   - COMMENT "Building default dictionary C++ file"
15   -)
  4 +
  5 +if (SKIP_DICTIONARY_BUILDING)
  6 + message ("SKIPPING dictionary building")
  7 +else ()
  8 + add_custom_command (
  9 + OUTPUT "${INPUT_DICTIONARY_CPP}"
  10 + COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --analyzer --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1
  11 + DEPENDS "${INPUT_DICTIONARY}"
  12 + COMMENT "Building default dictionary C++ file"
  13 + )
  14 + add_custom_command (
  15 + OUTPUT "${INPUT_SYNTH_DICTIONARY_CPP}"
  16 + COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --generator --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_SYNTH_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1
  17 + DEPENDS "${INPUT_DICTIONARY}"
  18 + COMMENT "Building default dictionary C++ file"
  19 + )
  20 +endif()
  21 +
  22 +add_custom_target ( analyzer-dictionary DEPENDS "${INPUT_DICTIONARY_CPP}")
  23 +add_custom_target ( generator-dictionary DEPENDS "${INPUT_SYNTH_DICTIONARY_CPP}")
  24 +add_custom_target ( dictionaries DEPENDS analyzer-dictionary generator-dictionary)
16 25  
17 26 include_directories( ${CMAKE_CURRENT_SOURCE_DIR} )
18 27  
... ... @@ -68,6 +77,7 @@ set(INCLUDE_FILES
68 77  
69 78 add_library (libmorfeusz SHARED ${SRC_FILES})
70 79 set_source_files_properties ( SOURCE "${INPUT_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE)
  80 +set_source_files_properties ( SOURCE "${INPUT_SYNTH_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE)
71 81 set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz2")
72 82  
73 83 add_executable (morfeusz_analyzer morfeusz_analyzer.cpp)
... ...
morfeusz/Morfeusz.cpp
... ... @@ -335,7 +335,7 @@ void Morfeusz::generate(const string&amp; lemma, vector&lt;MorphInterpretation&gt;&amp; result
335 335 int startNode = 0;
336 336 TextReader reader(input, inputEnd, this->generatorEnv);
337 337 this->processOneWord(this->generatorEnv, reader, startNode, results);
338   - if (input != inputEnd) {
  338 + if (reader.getCurrPtr() != reader.getEndPtr()) {
339 339 throw MorfeuszException("Input contains more than one word");
340 340 }
341 341 }
... ...
morfeusz/cli/cli.cpp
... ... @@ -175,10 +175,3 @@ void initializeMorfeusz(ezOptionParser&amp; opt, Morfeusz&amp; morfeusz) {
175 175 morfeusz.setCharset(CP852);
176 176 #endif
177 177 }
178   -//
179   -//Morfeusz getMorfeuszFromCLI(int argc, const char** argv, const std::string& titleText) {
180   -// ezOptionParser opt = getOptions(argc, argv, titleText);
181   -// Morfeusz morfeusz;
182   -// initializeMorfeusz(opt, morfeusz);
183   -// return morfeusz;
184   -//}
... ...