Commit a1d73c9e33b179bc6d3765aed2bb1cf812903764
1 parent
23e8055f
- poprawienie buga w generatorze (zawsze się wywalał)
- sprawienie by buildAll.sh tylko raz budował automaty git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@209 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
6 changed files
with
55 additions
and
26 deletions
CMakeLists.txt
... | ... | @@ -40,6 +40,12 @@ endif () |
40 | 40 | # INPUT_DICTIONARY_CPP |
41 | 41 | set (INPUT_DICTIONARY_CPP "default_fsa.cpp") |
42 | 42 | set (INPUT_SYNTH_DICTIONARY_CPP "default_synth_fsa.cpp") |
43 | + | |
44 | +if (CPP_DICTIONARIES_DIR) | |
45 | + set (INPUT_DICTIONARY_CPP "${CPP_DICTIONARIES_DIR}/${INPUT_DICTIONARY_CPP}") | |
46 | + set (INPUT_SYNTH_DICTIONARY_CPP "${CPP_DICTIONARIES_DIR}/${INPUT_SYNTH_DICTIONARY_CPP}") | |
47 | +endif () | |
48 | + | |
43 | 49 | if ("${INPUT_DICTIONARIES}" STREQUAL "") |
44 | 50 | if ("${EMPTY_INPUT_DICTIONARY}" STREQUAL "TRUE") |
45 | 51 | set (INPUT_DICTIONARIES ${PROJECT_SOURCE_DIR}/input/empty.txt) |
... | ... |
README
... | ... | @@ -84,6 +84,8 @@ INPUT_TAGSET - tagset file |
84 | 84 | INPUT_DICTIONARIES - comma-separated list of dictionary files (can use empty one from input/empty.txt) |
85 | 85 | SEGMENT_RULES_FILE - segmentation rules file |
86 | 86 | TARGET_DIR - a dir where packages built by CMake are put in |
87 | +CPP_DICTIONARIES_DIR - a dir where analyzer and generator embedded C++ dictionary files a read/written to | |
88 | +SKIP_DICTIONARY_BUILDING - if set, the dictionary-building part of the build process is skipped | |
87 | 89 | |
88 | 90 | Execute like following: |
89 | 91 | cmake -D INPUT_TAGSET=<file1> -D INPUT_DICTIONARIES="<dict_file1>,<dict_file2>" .. |
... | ... |
buildAll.sh
... | ... | @@ -5,6 +5,19 @@ set -e -o pipefail |
5 | 5 | export CROSSMORFEUSZ_ROOT="$1" |
6 | 6 | export INPUT_DICTIONARIES="$2" |
7 | 7 | export VERSION_SUFFIX="$3" |
8 | +export CPP_DICTIONARIES_DIR=`mktemp -d` | |
9 | + | |
10 | +function buildDictionaries { | |
11 | + buildDir=`mktemp -d` | |
12 | + srcDir=`pwd` | |
13 | + cd $buildDir | |
14 | + cmake -D INPUT_DICTIONARIES=$INPUT_DICTIONARIES \ | |
15 | + -D CPP_DICTIONARIES_DIR=$CPP_DICTIONARIES_DIR \ | |
16 | + $srcDir | |
17 | + make -j2 dictionaries | |
18 | + cd $srcDir | |
19 | + rm -r $buildDir | |
20 | +} | |
8 | 21 | |
9 | 22 | function build { |
10 | 23 | set -e -o pipefail |
... | ... | @@ -31,6 +44,8 @@ function build { |
31 | 44 | -D TARGET_DIR=$targetDir \ |
32 | 45 | -D INPUT_DICTIONARIES=$INPUT_DICTIONARIES \ |
33 | 46 | -D VERSION_SUFFIX=$VERSION_SUFFIX \ |
47 | + -D CPP_DICTIONARIES_DIR=$CPP_DICTIONARIES_DIR \ | |
48 | + -D SKIP_DICTIONARY_BUILDING=1 \ | |
34 | 49 | $srcDir 2>&1 |
35 | 50 | echo "building $toolchain" >&2 |
36 | 51 | make |
... | ... | @@ -56,11 +71,14 @@ export -f log |
56 | 71 | rm -rf log |
57 | 72 | mkdir -p log |
58 | 73 | |
59 | -parallel -j5 bash -c -- \ | |
60 | - "build Linux amd64 package package-java package-python package-builder 2>&1 | log Linux amd64" \ | |
61 | - "LDFLAGS=-m32;CFLAGS=-m32;CXXFLAGS=-m32 build Linux i386 package package-java 2>&1 | log Linux i386" \ | |
62 | - "build Windows amd64 package package-java 2>&1 | log Windows amd64" \ | |
63 | - "build Windows i386 package package-java 2>&1 | log Windows i386" \ | |
64 | - "build Darwin amd64 package package-java 2>&1 | log Darwin amd64" | |
74 | +buildDictionaries 2>&1 | log All all | |
75 | + | |
76 | +{ | |
77 | + echo "build Linux amd64 package package-java package-python package-builder 2>&1 | log Linux amd64" | |
78 | + echo "LDFLAGS=-m32;CFLAGS=-m32;CXXFLAGS=-m32 build Linux i386 package package-java 2>&1 | log Linux i386" | |
79 | + echo "build Windows amd64 package package-java 2>&1 | log Windows amd64" | |
80 | + echo "build Windows i386 package package-java 2>&1 | log Windows i386" | |
81 | + echo "build Darwin amd64 package package-java 2>&1 | log Darwin amd64" | |
82 | +} | xargs -n1 -P8 -d$'\n' bash -c | |
65 | 83 | |
66 | 84 | |
... | ... |
morfeusz/CMakeLists.txt
1 | 1 | |
2 | 2 | |
3 | 3 | ########## generate default dictionary data ################# |
4 | -add_custom_command ( | |
5 | - OUTPUT "${INPUT_DICTIONARY_CPP}" | |
6 | - COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --analyzer --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 --trim-supneg | |
7 | - DEPENDS "${INPUT_DICTIONARY}" | |
8 | - COMMENT "Building default dictionary C++ file" | |
9 | -) | |
10 | -add_custom_command ( | |
11 | - OUTPUT "${INPUT_SYNTH_DICTIONARY_CPP}" | |
12 | - COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --generator --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_SYNTH_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 | |
13 | - DEPENDS "${INPUT_DICTIONARY}" | |
14 | - COMMENT "Building default dictionary C++ file" | |
15 | -) | |
4 | + | |
5 | +if (SKIP_DICTIONARY_BUILDING) | |
6 | + message ("SKIPPING dictionary building") | |
7 | +else () | |
8 | + add_custom_command ( | |
9 | + OUTPUT "${INPUT_DICTIONARY_CPP}" | |
10 | + COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --analyzer --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 | |
11 | + DEPENDS "${INPUT_DICTIONARY}" | |
12 | + COMMENT "Building default dictionary C++ file" | |
13 | + ) | |
14 | + add_custom_command ( | |
15 | + OUTPUT "${INPUT_SYNTH_DICTIONARY_CPP}" | |
16 | + COMMAND python ${PROJECT_SOURCE_DIR}/fsabuilder/morfeusz_builder --generator --input-files="${INPUT_DICTIONARIES}" -o "${INPUT_SYNTH_DICTIONARY_CPP}" --tagset-file="${INPUT_TAGSET}" --segments-file="${SEGMENT_RULES_FILE}" --cpp --serialization-method=V1 | |
17 | + DEPENDS "${INPUT_DICTIONARY}" | |
18 | + COMMENT "Building default dictionary C++ file" | |
19 | + ) | |
20 | +endif() | |
21 | + | |
22 | +add_custom_target ( analyzer-dictionary DEPENDS "${INPUT_DICTIONARY_CPP}") | |
23 | +add_custom_target ( generator-dictionary DEPENDS "${INPUT_SYNTH_DICTIONARY_CPP}") | |
24 | +add_custom_target ( dictionaries DEPENDS analyzer-dictionary generator-dictionary) | |
16 | 25 | |
17 | 26 | include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ) |
18 | 27 | |
... | ... | @@ -68,6 +77,7 @@ set(INCLUDE_FILES |
68 | 77 | |
69 | 78 | add_library (libmorfeusz SHARED ${SRC_FILES}) |
70 | 79 | set_source_files_properties ( SOURCE "${INPUT_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE) |
80 | +set_source_files_properties ( SOURCE "${INPUT_SYNTH_DICTIONARY_CPP}" PROPERTIES GENERATED TRUE) | |
71 | 81 | set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz2") |
72 | 82 | |
73 | 83 | add_executable (morfeusz_analyzer morfeusz_analyzer.cpp) |
... | ... |
morfeusz/Morfeusz.cpp
... | ... | @@ -335,7 +335,7 @@ void Morfeusz::generate(const string& lemma, vector<MorphInterpretation>& result |
335 | 335 | int startNode = 0; |
336 | 336 | TextReader reader(input, inputEnd, this->generatorEnv); |
337 | 337 | this->processOneWord(this->generatorEnv, reader, startNode, results); |
338 | - if (input != inputEnd) { | |
338 | + if (reader.getCurrPtr() != reader.getEndPtr()) { | |
339 | 339 | throw MorfeuszException("Input contains more than one word"); |
340 | 340 | } |
341 | 341 | } |
... | ... |
morfeusz/cli/cli.cpp
... | ... | @@ -175,10 +175,3 @@ void initializeMorfeusz(ezOptionParser& opt, Morfeusz& morfeusz) { |
175 | 175 | morfeusz.setCharset(CP852); |
176 | 176 | #endif |
177 | 177 | } |
178 | -// | |
179 | -//Morfeusz getMorfeuszFromCLI(int argc, const char** argv, const std::string& titleText) { | |
180 | -// ezOptionParser opt = getOptions(argc, argv, titleText); | |
181 | -// Morfeusz morfeusz; | |
182 | -// initializeMorfeusz(opt, morfeusz); | |
183 | -// return morfeusz; | |
184 | -//} | |
... | ... |