From a1d73c9e33b179bc6d3765aed2bb1cf812903764 Mon Sep 17 00:00:00 2001 From: Michał Lenart <michall@ipipan.waw.pl> Date: Wed, 11 Jun 2014 15:57:18 +0000 Subject: [PATCH] - poprawienie buga w generatorze (zawsze się wywalał) - sprawienie by buildAll.sh tylko raz budował automaty --- CMakeLists.txt | 6 ++++++ README | 2 ++ buildAll.sh | 30 ++++++++++++++++++++++++------ morfeusz/CMakeLists.txt | 34 ++++++++++++++++++++++------------ morfeusz/Morfeusz.cpp | 2 +- morfeusz/cli/cli.cpp | 7 ------- 6 files changed, 55 insertions(+), 26 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 163c61b..9f0a411 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,12 @@ endif () # INPUT_DICTIONARY_CPP set (INPUT_DICTIONARY_CPP "default_fsa.cpp") set (INPUT_SYNTH_DICTIONARY_CPP "default_synth_fsa.cpp") + +if (CPP_DICTIONARIES_DIR) + set (INPUT_DICTIONARY_CPP "${CPP_DICTIONARIES_DIR}/${INPUT_DICTIONARY_CPP}") + set (INPUT_SYNTH_DICTIONARY_CPP "${CPP_DICTIONARIES_DIR}/${INPUT_SYNTH_DICTIONARY_CPP}") +endif () + if ("${INPUT_DICTIONARIES}" STREQUAL "") if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE") set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt) diff --git a/README b/README index b77727f..a24ea25 100644 --- a/README +++ b/README @@ -84,6 +84,8 @@ INPUT_TAGSET - tagset file INPUT_DICTIONARIES - comma-separated list of dictionary files (can use empty one from input/empty.txt) SEGMENT_RULES_FILE - segmentation rules file TARGET_DIR - a dir where packages built by CMake are put in +CPP_DICTIONARIES_DIR - a dir where analyzer and generator embedded C++ dictionary files a read/written to +SKIP_DICTIONARY_BUILDING - if set, the dictionary-building part of the build process is skipped Execute like following: cmake -D INPUT_TAGSET=<file1> -D INPUT_DICTIONARIES="<dict_file1>,<dict_file2>" .. diff --git a/buildAll.sh b/buildAll.sh index 84f24c7..f8c700d 100755 --- a/buildAll.sh +++ b/buildAll.sh @@ -5,6 +5,19 @@ set -e -o pipefail export CROSSMORFEUSZ_ROOT="$1" export INPUT_DICTIONARIES="$2" export VERSION_SUFFIX="$3" +export CPP_DICTIONARIES_DIR=`mktemp -d` + +function buildDictionaries { + buildDir=`mktemp -d` + srcDir=`pwd` + cd $buildDir + cmake -D INPUT_DICTIONARIES=$INPUT_DICTIONARIES \ + -D CPP_DICTIONARIES_DIR=$CPP_DICTIONARIES_DIR \ + $srcDir + make -j2 dictionaries + cd $srcDir + rm -r $buildDir +} function build { set -e -o pipefail @@ -31,6 +44,8 @@ function build { -D TARGET_DIR=$targetDir \ -D INPUT_DICTIONARIES=$INPUT_DICTIONARIES \ -D VERSION_SUFFIX=$VERSION_SUFFIX \ + -D CPP_DICTIONARIES_DIR=$CPP_DICTIONARIES_DIR \ + -D SKIP_DICTIONARY_BUILDING=1 \ $srcDir 2>&1 echo "building $toolchain" >&2 make @@ -56,11 +71,14 @@ export -f log rm -rf log mkdir -p log -parallel -j5 bash -c -- \ - "build Linux amd64 package package-java package-python package-builder 2>&1 | log Linux amd64" \ - "LDFLAGS=-m32;CFLAGS=-m32;CXXFLAGS=-m32 build Linux i386 package package-java 2>&1 | log Linux i386" \ - "build Windows amd64 package package-java 2>&1 | log Windows amd64" \ - "build Windows i386 package package-java 2>&1 | log Windows i386" \ - "build Darwin amd64 package package-java 2>&1 | log Darwin amd64" +buildDictionaries 2>&1 | log All all + +{ + echo "build Linux amd64 package package-java package-python package-builder 2>&1 | log Linux amd64" + echo "LDFLAGS=-m32;CFLAGS=-m32;CXXFLAGS=-m32 build Linux i386 package package-java 2>&1 | log Linux i386" + echo "build Windows amd64 package package-java 2>&1 | log Windows amd64" + echo "build Windows i386 package package-java 2>&1 | log Windows i386" + echo "build Darwin amd64 package package-java 2>&1 | log Darwin amd64" +} | xargs -n1 -P8 -d$'\n' bash -c diff --git a/morfeusz/CMakeLists.txt b/morfeusz/CMakeLists.txt index 20f96df..0c9b004 100644 --- a/morfeusz/CMakeLists.txt +++ b/morfeusz/CMakeLists.txt @@ -1,18 +1,27 @@ ########## generate default dictionary data ################# -add_custom_command ( - OUTPUT "${INPUT_DICTIONARY_CPP}" - COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --analyzer --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 --trim-supneg - DEPENDS "${INPUT_DICTIONARY}" - COMMENT "Building default dictionary C++ file" -) -add_custom_command ( - OUTPUT "${INPUT_SYNTH_DICTIONARY_CPP}" - COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --generator --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_SYNTH_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 - DEPENDS "${INPUT_DICTIONARY}" - COMMENT "Building default dictionary C++ file" -) + +if (SKIP_DICTIONARY_BUILDING) + message ("SKIPPING dictionary building") +else () + add_custom_command ( + OUTPUT "${INPUT_DICTIONARY_CPP}" + COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --analyzer --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 + DEPENDS "${INPUT_DICTIONARY}" + COMMENT "Building default dictionary C++ file" + ) + add_custom_command ( + OUTPUT "${INPUT_SYNTH_DICTIONARY_CPP}" + COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --generator --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_SYNTH_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 + DEPENDS "${INPUT_DICTIONARY}" + COMMENT "Building default dictionary C++ file" + ) +endif() + +add_custom_target ( analyzer-dictionary DEPENDS "${INPUT_DICTIONARY_CPP}") +add_custom_target ( generator-dictionary DEPENDS "${INPUT_SYNTH_DICTIONARY_CPP}") +add_custom_target ( dictionaries DEPENDS analyzer-dictionary generator-dictionary) include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ) @@ -68,6 +77,7 @@ set(INCLUDE_FILES add_library (libmorfeusz SHARED ${SRC_FILES}) set_source_files_properties ( SOURCE "${INPUT_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE) +set_source_files_properties ( SOURCE "${INPUT_SYNTH_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE) set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz2") add_executable (morfeusz_analyzer morfeusz_analyzer.cpp) diff --git a/morfeusz/Morfeusz.cpp b/morfeusz/Morfeusz.cpp index a19b2c2..c0b80a5 100644 --- a/morfeusz/Morfeusz.cpp +++ b/morfeusz/Morfeusz.cpp @@ -335,7 +335,7 @@ void Morfeusz::generate(const string& lemma, vector<MorphInterpretation>& result int startNode = 0; TextReader reader(input, inputEnd, this->generatorEnv); this->processOneWord(this->generatorEnv, reader, startNode, results); - if (input != inputEnd) { + if (reader.getCurrPtr() != reader.getEndPtr()) { throw MorfeuszException("Input contains more than one word"); } } diff --git a/morfeusz/cli/cli.cpp b/morfeusz/cli/cli.cpp index 3c69baf..d54a756 100644 --- a/morfeusz/cli/cli.cpp +++ b/morfeusz/cli/cli.cpp @@ -175,10 +175,3 @@ void initializeMorfeusz(ezOptionParser& opt, Morfeusz& morfeusz) { morfeusz.setCharset(CP852); #endif } -// -//Morfeusz getMorfeuszFromCLI(int argc, const char** argv, const std::string& titleText) { -// ezOptionParser opt = getOptions(argc, argv, titleText); -// Morfeusz morfeusz; -// initializeMorfeusz(opt, morfeusz); -// return morfeusz; -//} -- libgit2 0.22.2