Commit a1d73c9e33b179bc6d3765aed2bb1cf812903764
1 parent
23e8055f
- poprawienie buga w generatorze (zawsze się wywalał)
- sprawienie by buildAll.sh tylko raz budował automaty git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@209 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
6 changed files
with
55 additions
and
26 deletions
CMakeLists.txt
@@ -40,6 +40,12 @@ endif () | @@ -40,6 +40,12 @@ endif () | ||
40 | # INPUT_DICTIONARY_CPP | 40 | # INPUT_DICTIONARY_CPP |
41 | set (INPUT_DICTIONARY_CPP "default_fsa.cpp") | 41 | set (INPUT_DICTIONARY_CPP "default_fsa.cpp") |
42 | set (INPUT_SYNTH_DICTIONARY_CPP "default_synth_fsa.cpp") | 42 | set (INPUT_SYNTH_DICTIONARY_CPP "default_synth_fsa.cpp") |
43 | + | ||
44 | +if (CPP_DICTIONARIES_DIR) | ||
45 | + set (INPUT_DICTIONARY_CPP "${CPP_DICTIONARIES_DIR}/${INPUT_DICTIONARY_CPP}") | ||
46 | + set (INPUT_SYNTH_DICTIONARY_CPP "${CPP_DICTIONARIES_DIR}/${INPUT_SYNTH_DICTIONARY_CPP}") | ||
47 | +endif () | ||
48 | + | ||
43 | if ("${INPUT_DICTIONARIES}" STREQUAL "") | 49 | if ("${INPUT_DICTIONARIES}" STREQUAL "") |
44 | if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE") | 50 | if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE") |
45 | set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt) | 51 | set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt) |
README
@@ -84,6 +84,8 @@ INPUT_TAGSET - tagset file | @@ -84,6 +84,8 @@ INPUT_TAGSET - tagset file | ||
84 | INPUT_DICTIONARIES - comma-separated list of dictionary files (can use empty one from input/empty.txt) | 84 | INPUT_DICTIONARIES - comma-separated list of dictionary files (can use empty one from input/empty.txt) |
85 | SEGMENT_RULES_FILE - segmentation rules file | 85 | SEGMENT_RULES_FILE - segmentation rules file |
86 | TARGET_DIR - a dir where packages built by CMake are put in | 86 | TARGET_DIR - a dir where packages built by CMake are put in |
87 | +CPP_DICTIONARIES_DIR - a dir where analyzer and generator embedded C++ dictionary files a read/written to | ||
88 | +SKIP_DICTIONARY_BUILDING - if set, the dictionary-building part of the build process is skipped | ||
87 | 89 | ||
88 | Execute like following: | 90 | Execute like following: |
89 | cmake -D INPUT_TAGSET=<file1> -D INPUT_DICTIONARIES="<dict_file1>,<dict_file2>" .. | 91 | cmake -D INPUT_TAGSET=<file1> -D INPUT_DICTIONARIES="<dict_file1>,<dict_file2>" .. |
buildAll.sh
@@ -5,6 +5,19 @@ set -e -o pipefail | @@ -5,6 +5,19 @@ set -e -o pipefail | ||
5 | export CROSSMORFEUSZ_ROOT="$1" | 5 | export CROSSMORFEUSZ_ROOT="$1" |
6 | export INPUT_DICTIONARIES="$2" | 6 | export INPUT_DICTIONARIES="$2" |
7 | export VERSION_SUFFIX="$3" | 7 | export VERSION_SUFFIX="$3" |
8 | +export CPP_DICTIONARIES_DIR=`mktemp -d` | ||
9 | + | ||
10 | +function buildDictionaries { | ||
11 | + buildDir=`mktemp -d` | ||
12 | + srcDir=`pwd` | ||
13 | + cd $buildDir | ||
14 | + cmake -D INPUT_DICTIONARIES=$INPUT_DICTIONARIES \ | ||
15 | + -D CPP_DICTIONARIES_DIR=$CPP_DICTIONARIES_DIR \ | ||
16 | + $srcDir | ||
17 | + make -j2 dictionaries | ||
18 | + cd $srcDir | ||
19 | + rm -r $buildDir | ||
20 | +} | ||
8 | 21 | ||
9 | function build { | 22 | function build { |
10 | set -e -o pipefail | 23 | set -e -o pipefail |
@@ -31,6 +44,8 @@ function build { | @@ -31,6 +44,8 @@ function build { | ||
31 | -D TARGET_DIR=$targetDir \ | 44 | -D TARGET_DIR=$targetDir \ |
32 | -D INPUT_DICTIONARIES=$INPUT_DICTIONARIES \ | 45 | -D INPUT_DICTIONARIES=$INPUT_DICTIONARIES \ |
33 | -D VERSION_SUFFIX=$VERSION_SUFFIX \ | 46 | -D VERSION_SUFFIX=$VERSION_SUFFIX \ |
47 | + -D CPP_DICTIONARIES_DIR=$CPP_DICTIONARIES_DIR \ | ||
48 | + -D SKIP_DICTIONARY_BUILDING=1 \ | ||
34 | $srcDir 2>&1 | 49 | $srcDir 2>&1 |
35 | echo "building $toolchain" >&2 | 50 | echo "building $toolchain" >&2 |
36 | make | 51 | make |
@@ -56,11 +71,14 @@ export -f log | @@ -56,11 +71,14 @@ export -f log | ||
56 | rm -rf log | 71 | rm -rf log |
57 | mkdir -p log | 72 | mkdir -p log |
58 | 73 | ||
59 | -parallel -j5 bash -c -- \ | ||
60 | - "build Linux amd64 package package-java package-python package-builder 2>&1 | log Linux amd64" \ | ||
61 | - "LDFLAGS=-m32;CFLAGS=-m32;CXXFLAGS=-m32 build Linux i386 package package-java 2>&1 | log Linux i386" \ | ||
62 | - "build Windows amd64 package package-java 2>&1 | log Windows amd64" \ | ||
63 | - "build Windows i386 package package-java 2>&1 | log Windows i386" \ | ||
64 | - "build Darwin amd64 package package-java 2>&1 | log Darwin amd64" | 74 | +buildDictionaries 2>&1 | log All all |
75 | + | ||
76 | +{ | ||
77 | + echo "build Linux amd64 package package-java package-python package-builder 2>&1 | log Linux amd64" | ||
78 | + echo "LDFLAGS=-m32;CFLAGS=-m32;CXXFLAGS=-m32 build Linux i386 package package-java 2>&1 | log Linux i386" | ||
79 | + echo "build Windows amd64 package package-java 2>&1 | log Windows amd64" | ||
80 | + echo "build Windows i386 package package-java 2>&1 | log Windows i386" | ||
81 | + echo "build Darwin amd64 package package-java 2>&1 | log Darwin amd64" | ||
82 | +} | xargs -n1 -P8 -d$'\n' bash -c | ||
65 | 83 | ||
66 | 84 |
morfeusz/CMakeLists.txt
1 | 1 | ||
2 | 2 | ||
3 | ########## generate default dictionary data ################# | 3 | ########## generate default dictionary data ################# |
4 | -add_custom_command ( | ||
5 | - OUTPUT "${INPUT_DICTIONARY_CPP}" | ||
6 | - COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --analyzer --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 --trim-supneg | ||
7 | - DEPENDS "${INPUT_DICTIONARY}" | ||
8 | - COMMENT "Building default dictionary C++ file" | ||
9 | -) | ||
10 | -add_custom_command ( | ||
11 | - OUTPUT "${INPUT_SYNTH_DICTIONARY_CPP}" | ||
12 | - COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --generator --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_SYNTH_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 | ||
13 | - DEPENDS "${INPUT_DICTIONARY}" | ||
14 | - COMMENT "Building default dictionary C++ file" | ||
15 | -) | 4 | + |
5 | +if (SKIP_DICTIONARY_BUILDING) | ||
6 | + message ("SKIPPING dictionary building") | ||
7 | +else () | ||
8 | + add_custom_command ( | ||
9 | + OUTPUT "${INPUT_DICTIONARY_CPP}" | ||
10 | + COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --analyzer --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 | ||
11 | + DEPENDS "${INPUT_DICTIONARY}" | ||
12 | + COMMENT "Building default dictionary C++ file" | ||
13 | + ) | ||
14 | + add_custom_command ( | ||
15 | + OUTPUT "${INPUT_SYNTH_DICTIONARY_CPP}" | ||
16 | + COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --generator --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_SYNTH_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 | ||
17 | + DEPENDS "${INPUT_DICTIONARY}" | ||
18 | + COMMENT "Building default dictionary C++ file" | ||
19 | + ) | ||
20 | +endif() | ||
21 | + | ||
22 | +add_custom_target ( analyzer-dictionary DEPENDS "${INPUT_DICTIONARY_CPP}") | ||
23 | +add_custom_target ( generator-dictionary DEPENDS "${INPUT_SYNTH_DICTIONARY_CPP}") | ||
24 | +add_custom_target ( dictionaries DEPENDS analyzer-dictionary generator-dictionary) | ||
16 | 25 | ||
17 | include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ) | 26 | include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ) |
18 | 27 | ||
@@ -68,6 +77,7 @@ set(INCLUDE_FILES | @@ -68,6 +77,7 @@ set(INCLUDE_FILES | ||
68 | 77 | ||
69 | add_library (libmorfeusz SHARED ${SRC_FILES}) | 78 | add_library (libmorfeusz SHARED ${SRC_FILES}) |
70 | set_source_files_properties ( SOURCE "${INPUT_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE) | 79 | set_source_files_properties ( SOURCE "${INPUT_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE) |
80 | +set_source_files_properties ( SOURCE "${INPUT_SYNTH_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE) | ||
71 | set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz2") | 81 | set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz2") |
72 | 82 | ||
73 | add_executable (morfeusz_analyzer morfeusz_analyzer.cpp) | 83 | add_executable (morfeusz_analyzer morfeusz_analyzer.cpp) |
morfeusz/Morfeusz.cpp
@@ -335,7 +335,7 @@ void Morfeusz::generate(const string& lemma, vector<MorphInterpretation>& result | @@ -335,7 +335,7 @@ void Morfeusz::generate(const string& lemma, vector<MorphInterpretation>& result | ||
335 | int startNode = 0; | 335 | int startNode = 0; |
336 | TextReader reader(input, inputEnd, this->generatorEnv); | 336 | TextReader reader(input, inputEnd, this->generatorEnv); |
337 | this->processOneWord(this->generatorEnv, reader, startNode, results); | 337 | this->processOneWord(this->generatorEnv, reader, startNode, results); |
338 | - if (input != inputEnd) { | 338 | + if (reader.getCurrPtr() != reader.getEndPtr()) { |
339 | throw MorfeuszException("Input contains more than one word"); | 339 | throw MorfeuszException("Input contains more than one word"); |
340 | } | 340 | } |
341 | } | 341 | } |
morfeusz/cli/cli.cpp
@@ -175,10 +175,3 @@ void initializeMorfeusz(ezOptionParser& opt, Morfeusz& morfeusz) { | @@ -175,10 +175,3 @@ void initializeMorfeusz(ezOptionParser& opt, Morfeusz& morfeusz) { | ||
175 | morfeusz.setCharset(CP852); | 175 | morfeusz.setCharset(CP852); |
176 | #endif | 176 | #endif |
177 | } | 177 | } |
178 | -// | ||
179 | -//Morfeusz getMorfeuszFromCLI(int argc, const char** argv, const std::string& titleText) { | ||
180 | -// ezOptionParser opt = getOptions(argc, argv, titleText); | ||
181 | -// Morfeusz morfeusz; | ||
182 | -// initializeMorfeusz(opt, morfeusz); | ||
183 | -// return morfeusz; | ||
184 | -//} |