Commit a65ee056dbe81292da3af8dcacf7f040494e0d8f
1 parent
a1d73c9e
generalne porządki w kodzie
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@210 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
71 changed files
with
470 additions
and
337 deletions
CMakeLists.txt
@@ -6,7 +6,7 @@ set (Morfeusz_VERSION_MAJOR 2) | @@ -6,7 +6,7 @@ set (Morfeusz_VERSION_MAJOR 2) | ||
6 | set (Morfeusz_VERSION_MINOR 0) | 6 | set (Morfeusz_VERSION_MINOR 0) |
7 | set (Morfeusz_VERSION_PATCH 0) | 7 | set (Morfeusz_VERSION_PATCH 0) |
8 | set (Morfeusz_VERSION "${Morfeusz_VERSION_MAJOR}.${Morfeusz_VERSION_MINOR}.${Morfeusz_VERSION_PATCH}") | 8 | set (Morfeusz_VERSION "${Morfeusz_VERSION_MAJOR}.${Morfeusz_VERSION_MINOR}.${Morfeusz_VERSION_PATCH}") |
9 | -if (NOT ${VERSION_SUFFIX} STREQUAL "") | 9 | +if (VERSION_SUFFIX) |
10 | set (Morfeusz_VERSION_TWEAK "${VERSION_SUFFIX}") | 10 | set (Morfeusz_VERSION_TWEAK "${VERSION_SUFFIX}") |
11 | set (Morfeusz_VERSION "${Morfeusz_VERSION}_${Morfeusz_VERSION_TWEAK}") | 11 | set (Morfeusz_VERSION "${Morfeusz_VERSION}_${Morfeusz_VERSION_TWEAK}") |
12 | endif () | 12 | endif () |
@@ -148,13 +148,11 @@ include (CPack) | @@ -148,13 +148,11 @@ include (CPack) | ||
148 | file (COPY fsabuilder testfiles input DESTINATION .) | 148 | file (COPY fsabuilder testfiles input DESTINATION .) |
149 | 149 | ||
150 | configure_file ( | 150 | configure_file ( |
151 | - "${PROJECT_SOURCE_DIR}/morfeusz/MorfeuszConfig.hpp.in" | ||
152 | - "${PROJECT_BINARY_DIR}/morfeusz/MorfeuszConfig.hpp" | 151 | + "${PROJECT_SOURCE_DIR}/morfeusz/MorfeuszVersion.hpp.in" |
152 | + "${PROJECT_BINARY_DIR}/MorfeuszVersion.hpp" | ||
153 | ) | 153 | ) |
154 | - | ||
155 | -###### add main sources ######## | ||
156 | - | ||
157 | include_directories("${PROJECT_BINARY_DIR}" ) | 154 | include_directories("${PROJECT_BINARY_DIR}" ) |
155 | +###### add main sources ######## | ||
158 | 156 | ||
159 | add_subdirectory (morfeusz) | 157 | add_subdirectory (morfeusz) |
160 | add_subdirectory (fsabuilder) | 158 | add_subdirectory (fsabuilder) |
README
@@ -109,7 +109,7 @@ Create separate build directory, for example build-darwin. | @@ -109,7 +109,7 @@ Create separate build directory, for example build-darwin. | ||
109 | Run cross-compilation: | 109 | Run cross-compilation: |
110 | ---------------------- | 110 | ---------------------- |
111 | 111 | ||
112 | -cmake -D CROSSMORFEUSZ_ROOT=<path_to_crossmorfeusz_dir> -DCMAKE_TOOLCHAIN_FILE=../morfeusz/Toolchain-xxx.cmake .. | 112 | +cmake -D CROSSMORFEUSZ_ROOT=<path_to_crossmorfeusz_dir> -DCMAKE_TOOLCHAIN_FILE=../toolchains/Toolchain-xxx.cmake .. |
113 | make | 113 | make |
114 | make package package-java package-python | 114 | make package package-java package-python |
115 | 115 |
buildAll.sh
@@ -30,12 +30,12 @@ function build { | @@ -30,12 +30,12 @@ function build { | ||
30 | srcDir=`pwd` | 30 | srcDir=`pwd` |
31 | buildDir=buildall/$os-$arch | 31 | buildDir=buildall/$os-$arch |
32 | targetDir=$srcDir/target | 32 | targetDir=$srcDir/target |
33 | - toolchain=$srcDir/morfeusz/Toolchain-$os-$arch.cmake | 33 | + toolchain=$srcDir/toolchains/Toolchain-$os-$arch.cmake |
34 | 34 | ||
35 | echo "Will use $toolchain toolchain" | 35 | echo "Will use $toolchain toolchain" |
36 | 36 | ||
37 | rm -rf $buildDir | 37 | rm -rf $buildDir |
38 | - rm -rf $targetDir | 38 | + #~ rm -rf $targetDir |
39 | mkdir -p $buildDir | 39 | mkdir -p $buildDir |
40 | mkdir -p $targetDir | 40 | mkdir -p $targetDir |
41 | cd $buildDir | 41 | cd $buildDir |
@@ -68,7 +68,7 @@ function log { | @@ -68,7 +68,7 @@ function log { | ||
68 | export -f build | 68 | export -f build |
69 | export -f log | 69 | export -f log |
70 | 70 | ||
71 | -rm -rf log | 71 | +rm -rf log target |
72 | mkdir -p log | 72 | mkdir -p log |
73 | 73 | ||
74 | buildDictionaries 2>&1 | log All all | 74 | buildDictionaries 2>&1 | log All all |
@@ -79,6 +79,6 @@ buildDictionaries 2>&1 | log All all | @@ -79,6 +79,6 @@ buildDictionaries 2>&1 | log All all | ||
79 | echo "build Windows amd64 package package-java 2>&1 | log Windows amd64" | 79 | echo "build Windows amd64 package package-java 2>&1 | log Windows amd64" |
80 | echo "build Windows i386 package package-java 2>&1 | log Windows i386" | 80 | echo "build Windows i386 package package-java 2>&1 | log Windows i386" |
81 | echo "build Darwin amd64 package package-java 2>&1 | log Darwin amd64" | 81 | echo "build Darwin amd64 package package-java 2>&1 | log Darwin amd64" |
82 | -} | xargs -n1 -P8 -d$'\n' bash -c | 82 | +} | xargs -n1 -P5 -d$'\n' bash -c |
83 | 83 | ||
84 | 84 |
morfeusz/CMakeLists.txt
@@ -25,8 +25,6 @@ add_custom_target ( dictionaries DEPENDS analyzer-dictionary generator-dictionar | @@ -25,8 +25,6 @@ add_custom_target ( dictionaries DEPENDS analyzer-dictionary generator-dictionar | ||
25 | 25 | ||
26 | include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ) | 26 | include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ) |
27 | 27 | ||
28 | -# add_custom_target (dupa DEPENDS "${INPUT_DICTIONARY_CPP}") | ||
29 | - | ||
30 | #### build ##### | 28 | #### build ##### |
31 | 29 | ||
32 | set(SRC_FILES | 30 | set(SRC_FILES |
@@ -42,37 +40,39 @@ set(SRC_FILES | @@ -42,37 +40,39 @@ set(SRC_FILES | ||
42 | InflexionGraph.cpp | 40 | InflexionGraph.cpp |
43 | charset/TextReader.cpp | 41 | charset/TextReader.cpp |
44 | charset/CharsetConverter.cpp | 42 | charset/CharsetConverter.cpp |
45 | - charset/CaseConverter.cpp | ||
46 | - charset/caseconv.cpp | 43 | + case/CaseConverter.cpp |
44 | + case/caseconv.cpp | ||
47 | charset/conversion_tables.cpp | 45 | charset/conversion_tables.cpp |
48 | cli/cli.cpp | 46 | cli/cli.cpp |
49 | segrules/segrules.cpp | 47 | segrules/segrules.cpp |
50 | segrules/SegrulesFSA.cpp | 48 | segrules/SegrulesFSA.cpp |
51 | - CasePatternHelper.cpp | ||
52 | - decoder/InterpretedChunksDecoder.cpp | ||
53 | - decoder/InterpretedChunksDecoder4Analyzer.cpp | ||
54 | - decoder/InterpretedChunksDecoder4Generator.cpp | ||
55 | - deserializer/InterpsGroupsReader.cpp | ||
56 | - deserializer/MorphDeserializer.cpp | 49 | + case/CasePatternHelper.cpp |
50 | + deserialization/morphInterps/InterpretedChunksDecoder.cpp | ||
51 | + deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.cpp | ||
52 | + deserialization/morphInterps/InterpretedChunksDecoder4Generator.cpp | ||
53 | + deserialization/InterpsGroupsReader.cpp | ||
54 | + deserialization/MorphDeserializer.cpp | ||
57 | ) | 55 | ) |
58 | 56 | ||
59 | set(INCLUDE_FILES | 57 | set(INCLUDE_FILES |
60 | const.hpp | 58 | const.hpp |
61 | data/default_fsa.hpp | 59 | data/default_fsa.hpp |
60 | + Environment.hpp | ||
62 | Tagset.hpp | 61 | Tagset.hpp |
63 | Qualifiers.hpp | 62 | Qualifiers.hpp |
64 | fsa/const.hpp | 63 | fsa/const.hpp |
65 | MorphInterpretation.hpp | 64 | MorphInterpretation.hpp |
66 | Morfeusz.hpp | 65 | Morfeusz.hpp |
66 | + MorfeuszVersion.hpp | ||
67 | InflexionGraph.hpp | 67 | InflexionGraph.hpp |
68 | charset/CharsetConverter.hpp | 68 | charset/CharsetConverter.hpp |
69 | charset/TextReader.hpp | 69 | charset/TextReader.hpp |
70 | - charset/CaseConverter.hpp | ||
71 | - charset/caseconv.hpp | 70 | + case/CaseConverter.hpp |
71 | + case/caseconv.hpp | ||
72 | charset/conversion_tables.hpp | 72 | charset/conversion_tables.hpp |
73 | cli/cli.hpp | 73 | cli/cli.hpp |
74 | segrules/segrules.hpp | 74 | segrules/segrules.hpp |
75 | - deserializer/MorphDeserializer.cpp | 75 | + deserialization/MorphDeserializer.cpp |
76 | ) | 76 | ) |
77 | 77 | ||
78 | add_library (libmorfeusz SHARED ${SRC_FILES}) | 78 | add_library (libmorfeusz SHARED ${SRC_FILES}) |
@@ -82,24 +82,15 @@ set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz2") | @@ -82,24 +82,15 @@ set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz2") | ||
82 | 82 | ||
83 | add_executable (morfeusz_analyzer morfeusz_analyzer.cpp) | 83 | add_executable (morfeusz_analyzer morfeusz_analyzer.cpp) |
84 | add_executable (morfeusz_generator morfeusz_generator.cpp) | 84 | add_executable (morfeusz_generator morfeusz_generator.cpp) |
85 | -add_executable (test_result_equals test_result_equals.cpp) | ||
86 | -add_executable (test_recognize_dict test_recognize_dict.cpp) | 85 | +add_executable (test_result_equals test/test_result_equals.cpp) |
86 | +add_executable (test_recognize_dict test/test_recognize_dict.cpp) | ||
87 | 87 | ||
88 | target_link_libraries (morfeusz_analyzer libmorfeusz) | 88 | target_link_libraries (morfeusz_analyzer libmorfeusz) |
89 | target_link_libraries (morfeusz_generator libmorfeusz) | 89 | target_link_libraries (morfeusz_generator libmorfeusz) |
90 | target_link_libraries (test_result_equals libmorfeusz) | 90 | target_link_libraries (test_result_equals libmorfeusz) |
91 | target_link_libraries (test_recognize_dict libmorfeusz) | 91 | target_link_libraries (test_recognize_dict libmorfeusz) |
92 | 92 | ||
93 | -if (${CMAKE_SYSTEM_NAME} MATCHES "Windows") | ||
94 | - target_link_libraries (libmorfeusz ws2_32) | ||
95 | - set (TARGET_LIB_DIR bin) | ||
96 | -else () | ||
97 | - set (TARGET_LIB_DIR lib) | ||
98 | -endif () | ||
99 | - | ||
100 | -add_subdirectory (java) | ||
101 | -add_subdirectory (python) | ||
102 | -add_subdirectory (perl) | 93 | +add_subdirectory (wrappers) |
103 | 94 | ||
104 | if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") | 95 | if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") |
105 | add_custom_target (morfeusz-repair-library | 96 | add_custom_target (morfeusz-repair-library |
@@ -108,6 +99,13 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") | @@ -108,6 +99,13 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") | ||
108 | add_dependencies (morfeusz_analyzer morfeusz-repair-library) | 99 | add_dependencies (morfeusz_analyzer morfeusz-repair-library) |
109 | endif () | 100 | endif () |
110 | 101 | ||
102 | +if (${CMAKE_SYSTEM_NAME} MATCHES "Windows") | ||
103 | + target_link_libraries (libmorfeusz ws2_32) | ||
104 | + set (TARGET_LIB_DIR bin) | ||
105 | +else () | ||
106 | + set (TARGET_LIB_DIR lib) | ||
107 | +endif () | ||
108 | + | ||
111 | install (FILES ${INCLUDE_FILES} DESTINATION include/morfeusz) | 109 | install (FILES ${INCLUDE_FILES} DESTINATION include/morfeusz) |
112 | install (TARGETS libmorfeusz DESTINATION ${TARGET_LIB_DIR}) | 110 | install (TARGETS libmorfeusz DESTINATION ${TARGET_LIB_DIR}) |
113 | install (TARGETS morfeusz_analyzer morfeusz_generator DESTINATION bin) | 111 | install (TARGETS morfeusz_analyzer morfeusz_generator DESTINATION bin) |
morfeusz/Environment.cpp
@@ -8,14 +8,11 @@ | @@ -8,14 +8,11 @@ | ||
8 | #include <vector> | 8 | #include <vector> |
9 | #include <algorithm> | 9 | #include <algorithm> |
10 | #include "Environment.hpp" | 10 | #include "Environment.hpp" |
11 | -#include "decoder/InterpretedChunksDecoder.hpp" | ||
12 | -#include "deserializer/MorphDeserializer.hpp" | 11 | +#include "deserialization/MorphDeserializer.hpp" |
13 | #include "exceptions.hpp" | 12 | #include "exceptions.hpp" |
14 | -#include "decoder/InterpretedChunksDecoder4Analyzer.hpp" | ||
15 | -#include "decoder/InterpretedChunksDecoder4Generator.hpp" | ||
16 | - | ||
17 | -//class InterpretedChunksDecoder4Analyzer; | ||
18 | -//class InterpretedChunksDecoder4Generator; | 13 | +#include "deserialization/morphInterps/InterpretedChunksDecoder.hpp" |
14 | +#include "deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.hpp" | ||
15 | +#include "deserialization/morphInterps/InterpretedChunksDecoder4Generator.hpp" | ||
19 | 16 | ||
20 | static Deserializer<InterpsGroupsReader>& initializeDeserializer(MorfeuszProcessorType processorType) { | 17 | static Deserializer<InterpsGroupsReader>& initializeDeserializer(MorfeuszProcessorType processorType) { |
21 | static Deserializer<InterpsGroupsReader> *analyzerDeserializer | 18 | static Deserializer<InterpsGroupsReader> *analyzerDeserializer |
morfeusz/Environment.hpp
@@ -13,56 +13,142 @@ | @@ -13,56 +13,142 @@ | ||
13 | class InterpretedChunksDecoder; | 13 | class InterpretedChunksDecoder; |
14 | class CasePatternHelper; | 14 | class CasePatternHelper; |
15 | 15 | ||
16 | -#include "charset/CaseConverter.hpp" | 16 | +#include "case/CaseConverter.hpp" |
17 | #include "charset/CharsetConverter.hpp" | 17 | #include "charset/CharsetConverter.hpp" |
18 | #include "fsa/fsa.hpp" | 18 | #include "fsa/fsa.hpp" |
19 | #include "segrules/segrules.hpp" | 19 | #include "segrules/segrules.hpp" |
20 | #include "const.hpp" | 20 | #include "const.hpp" |
21 | #include "Tagset.hpp" | 21 | #include "Tagset.hpp" |
22 | -//#include "InterpretedChunksDecoder.hpp" | ||
23 | #include "InterpsGroup.hpp" | 22 | #include "InterpsGroup.hpp" |
24 | -#include "CasePatternHelper.hpp" | 23 | +#include "case/CasePatternHelper.hpp" |
25 | #include "Qualifiers.hpp" | 24 | #include "Qualifiers.hpp" |
26 | -#include "deserializer/InterpsGroupsReader.hpp" | 25 | +#include "deserialization/InterpsGroupsReader.hpp" |
27 | 26 | ||
28 | struct InterpsGroup; | 27 | struct InterpsGroup; |
29 | typedef FSA<InterpsGroupsReader> FSAType; | 28 | typedef FSA<InterpsGroupsReader> FSAType; |
30 | -//typedef FSA< std::vector<InterpsGroup > > FSAType; | ||
31 | 29 | ||
30 | +/** | ||
31 | + * This class contains data required for morphological analysis/synthesis. | ||
32 | + * It contains references to dictionary automaton, charset converter, tagset data etc. | ||
33 | + * All of these can be changed by setters, changing Morfeusz behavior (different dictionary, charset, and other options). | ||
34 | + */ | ||
32 | class Environment { | 35 | class Environment { |
33 | public: | 36 | public: |
37 | + /** | ||
38 | + * Creates default environment with given initial charset, processor type (analyzer/generator) and default dictionary data ptr. | ||
39 | + * | ||
40 | + * @param charset | ||
41 | + * @param morfeuszProcessor | ||
42 | + * @param fileStartPtr | ||
43 | + */ | ||
34 | Environment( | 44 | Environment( |
35 | MorfeuszCharset charset, | 45 | MorfeuszCharset charset, |
36 | MorfeuszProcessorType morfeuszProcessor, | 46 | MorfeuszProcessorType morfeuszProcessor, |
37 | const unsigned char* fileStartPtr); | 47 | const unsigned char* fileStartPtr); |
38 | 48 | ||
49 | + /** | ||
50 | + * Sets charset for this environment. | ||
51 | + * | ||
52 | + * @param charset | ||
53 | + */ | ||
39 | void setCharset(MorfeuszCharset charset); | 54 | void setCharset(MorfeuszCharset charset); |
40 | 55 | ||
56 | + /** | ||
57 | + * Sets case sensitivity options. | ||
58 | + * | ||
59 | + * @param caseSensitive - if true, interpretations not matching case will be discarded. | ||
60 | + */ | ||
41 | void setCaseSensitive(bool caseSensitive); | 61 | void setCaseSensitive(bool caseSensitive); |
42 | 62 | ||
63 | + /** | ||
64 | + * Gets charset converter that is currently used by this environment. | ||
65 | + * Changed by setting charset. | ||
66 | + * | ||
67 | + * @return - reference to charset converter. | ||
68 | + */ | ||
43 | const CharsetConverter& getCharsetConverter() const; | 69 | const CharsetConverter& getCharsetConverter() const; |
44 | 70 | ||
71 | + /** | ||
72 | + * Returns case converter that is currently used by this environment. | ||
73 | + * Changed by setting case sensitivity option. | ||
74 | + * | ||
75 | + * @return - reference to case converter. | ||
76 | + */ | ||
45 | const CaseConverter& getCaseConverter() const; | 77 | const CaseConverter& getCaseConverter() const; |
46 | 78 | ||
79 | + /** | ||
80 | + * Sets new tagset for this environment. | ||
81 | + * | ||
82 | + * @param tagset | ||
83 | + */ | ||
47 | void setTagset(const Tagset& tagset); | 84 | void setTagset(const Tagset& tagset); |
85 | + | ||
86 | + /** | ||
87 | + * Gets currently used tagset. | ||
88 | + * | ||
89 | + * @return | ||
90 | + */ | ||
48 | const Tagset& getTagset() const; | 91 | const Tagset& getTagset() const; |
49 | 92 | ||
93 | + /** | ||
94 | + * Sets binary dictionary file used by this environment. | ||
95 | + * | ||
96 | + * @param filename - filename of the dictionary | ||
97 | + */ | ||
50 | void setFSAFile(const std::string& filename); | 98 | void setFSAFile(const std::string& filename); |
51 | 99 | ||
100 | + /** | ||
101 | + * Sets segmentation rules option. | ||
102 | + * | ||
103 | + * @param option | ||
104 | + * @param value | ||
105 | + */ | ||
52 | void setSegrulesOption(const std::string& option, const std::string& value); | 106 | void setSegrulesOption(const std::string& option, const std::string& value); |
53 | 107 | ||
108 | + /** | ||
109 | + * Gets segmentation rules automaton. | ||
110 | + * | ||
111 | + * @return | ||
112 | + */ | ||
54 | const SegrulesFSA& getCurrentSegrulesFSA() const; | 113 | const SegrulesFSA& getCurrentSegrulesFSA() const; |
55 | 114 | ||
115 | + /** | ||
116 | + * Gets dictionary automaton. | ||
117 | + * | ||
118 | + * @return | ||
119 | + */ | ||
56 | const FSAType& getFSA() const; | 120 | const FSAType& getFSA() const; |
57 | 121 | ||
122 | + /** | ||
123 | + * Returns decoder that converts interpretations to external format. | ||
124 | + * @return | ||
125 | + */ | ||
58 | const InterpretedChunksDecoder& getInterpretedChunksDecoder() const; | 126 | const InterpretedChunksDecoder& getInterpretedChunksDecoder() const; |
59 | 127 | ||
128 | + /** | ||
129 | + * Gets processor type (info if this is analyzer or generator environment) | ||
130 | + * @return | ||
131 | + */ | ||
60 | MorfeuszProcessorType getProcessorType() const; | 132 | MorfeuszProcessorType getProcessorType() const; |
61 | 133 | ||
134 | + /** | ||
135 | + * Return current case pattern helper | ||
136 | + * | ||
137 | + * @return | ||
138 | + */ | ||
62 | const CasePatternHelper& getCasePatternHelper() const; | 139 | const CasePatternHelper& getCasePatternHelper() const; |
63 | 140 | ||
141 | + /** | ||
142 | + * Return current qualifiers helper. | ||
143 | + * @return | ||
144 | + */ | ||
64 | const Qualifiers& getQualifiersHelper() const; | 145 | const Qualifiers& getQualifiersHelper() const; |
65 | 146 | ||
147 | + /** | ||
148 | + * Returns true iff given codepoint denotes a separator char for ign handling. | ||
149 | + * @param codepoint | ||
150 | + * @return | ||
151 | + */ | ||
66 | bool isSeparator(uint32_t codepoint) const; | 152 | bool isSeparator(uint32_t codepoint) const; |
67 | 153 | ||
68 | virtual ~Environment(); | 154 | virtual ~Environment(); |
morfeusz/InflexionGraph.hpp
@@ -13,6 +13,10 @@ | @@ -13,6 +13,10 @@ | ||
13 | #include <utility> | 13 | #include <utility> |
14 | #include "InterpretedChunk.hpp" | 14 | #include "InterpretedChunk.hpp" |
15 | 15 | ||
16 | +/** | ||
17 | + * This class build inflection graph (indexes the nodes, takes into account segments marked as "weak"). | ||
18 | + * Takes care to make the number of nodes as little as possible. | ||
19 | + */ | ||
16 | class InflexionGraph { | 20 | class InflexionGraph { |
17 | public: | 21 | public: |
18 | 22 | ||
@@ -24,30 +28,60 @@ public: | @@ -24,30 +28,60 @@ public: | ||
24 | InterpretedChunk chunk; | 28 | InterpretedChunk chunk; |
25 | unsigned int nextNode; | 29 | unsigned int nextNode; |
26 | }; | 30 | }; |
27 | - | 31 | + |
32 | + /** | ||
33 | + * Adds new path to the graph. | ||
34 | + * | ||
35 | + * @param path | ||
36 | + * @param weak | ||
37 | + */ | ||
28 | void addPath(const std::vector<InterpretedChunk>& path, bool weak); | 38 | void addPath(const std::vector<InterpretedChunk>& path, bool weak); |
29 | 39 | ||
30 | // void getResults(const Tagset& tagset, const CharsetConverter& charsetConverter, std::vector<MorphInterpretation>& results); | 40 | // void getResults(const Tagset& tagset, const CharsetConverter& charsetConverter, std::vector<MorphInterpretation>& results); |
31 | 41 | ||
42 | + /** | ||
43 | + * Return current graph. | ||
44 | + * | ||
45 | + * @return | ||
46 | + */ | ||
32 | const std::vector< std::vector<InflexionGraph::Edge> >& getTheGraph(); | 47 | const std::vector< std::vector<InflexionGraph::Edge> >& getTheGraph(); |
33 | 48 | ||
49 | + /** | ||
50 | + * True iff the graph is empty. | ||
51 | + * | ||
52 | + * @return | ||
53 | + */ | ||
34 | bool empty() const; | 54 | bool empty() const; |
35 | 55 | ||
56 | + /** | ||
57 | + * Clears the graph. | ||
58 | + */ | ||
36 | void clear(); | 59 | void clear(); |
37 | 60 | ||
38 | - | ||
39 | - // virtual ~FlexionGraph(); | ||
40 | private: | 61 | private: |
41 | 62 | ||
42 | typedef std::pair<const char*, int> PathElement; | 63 | typedef std::pair<const char*, int> PathElement; |
43 | typedef std::set<PathElement> Path; | 64 | typedef std::set<PathElement> Path; |
44 | 65 | ||
66 | + /** | ||
67 | + * Adds an edge that starts a chunk. | ||
68 | + * | ||
69 | + * @param e | ||
70 | + */ | ||
45 | void addStartEdge(const Edge& e); | 71 | void addStartEdge(const Edge& e); |
46 | - | 72 | + |
73 | + /** | ||
74 | + * Adds non-starting edge. | ||
75 | + * @param startNode | ||
76 | + * @param e | ||
77 | + */ | ||
47 | void addMiddleEdge(unsigned int startNode, const Edge& e); | 78 | void addMiddleEdge(unsigned int startNode, const Edge& e); |
48 | 79 | ||
80 | + /** | ||
81 | + * Minimizes the graph so it contains as little number of nodes as possible. | ||
82 | + */ | ||
49 | void minimizeGraph(); | 83 | void minimizeGraph(); |
50 | - | 84 | + |
51 | bool canMergeNodes(unsigned int node1, unsigned int node2); | 85 | bool canMergeNodes(unsigned int node1, unsigned int node2); |
52 | 86 | ||
53 | void doMergeNodes(unsigned int node1, unsigned int node2); | 87 | void doMergeNodes(unsigned int node1, unsigned int node2); |
morfeusz/InterpretedChunk.hpp
@@ -11,16 +11,59 @@ | @@ -11,16 +11,59 @@ | ||
11 | #include <vector> | 11 | #include <vector> |
12 | #include "InterpsGroup.hpp" | 12 | #include "InterpsGroup.hpp" |
13 | 13 | ||
14 | +/** | ||
15 | + * Denotes a part of text that has some not-yet-deserialized interpretations attached to it. | ||
16 | + */ | ||
14 | struct InterpretedChunk { | 17 | struct InterpretedChunk { |
18 | + | ||
19 | + /** | ||
20 | + * The type of segment for this chunk. | ||
21 | + */ | ||
15 | unsigned char segmentType; | 22 | unsigned char segmentType; |
23 | + | ||
24 | + /** | ||
25 | + * Pointer to start of this chunks text | ||
26 | + */ | ||
16 | const char* textStartPtr; | 27 | const char* textStartPtr; |
28 | + | ||
29 | + /** | ||
30 | + * Pointer to end of this chunks text (exclusive) | ||
31 | + */ | ||
17 | const char* textEndPtr; | 32 | const char* textEndPtr; |
33 | + | ||
34 | + /** | ||
35 | + * Pointer to the start of this chunks binary data. | ||
36 | + */ | ||
18 | const unsigned char* interpsGroupPtr; | 37 | const unsigned char* interpsGroupPtr; |
38 | + | ||
39 | + /** | ||
40 | + * Pointer to the end of this chunks binary data (exclusive) | ||
41 | + */ | ||
19 | const unsigned char* interpsEndPtr; | 42 | const unsigned char* interpsEndPtr; |
43 | + | ||
44 | + /** | ||
45 | + * true iff this chunk shifts orth to the one right to it (it is "A" in "A> B") | ||
46 | + */ | ||
20 | bool shiftOrth; | 47 | bool shiftOrth; |
48 | + | ||
49 | + /** | ||
50 | + * true iff this chunk has attached data from its prefix chunk (when it is "B" segment in "A> B" segmentation rule) | ||
51 | + */ | ||
21 | bool orthWasShifted; | 52 | bool orthWasShifted; |
53 | + | ||
54 | + /** | ||
55 | + * Number of codepoints this chunks consists of. | ||
56 | + */ | ||
22 | int codepointsNum; | 57 | int codepointsNum; |
58 | + | ||
59 | + /** | ||
60 | + * Chunks that are in the prefix segments (those with ">" in segmentation rules, ie. "dig>* dig") | ||
61 | + */ | ||
23 | std::vector<InterpretedChunk> prefixChunks; | 62 | std::vector<InterpretedChunk> prefixChunks; |
63 | + | ||
64 | + /** | ||
65 | + * Homonym id specified by the user. | ||
66 | + */ | ||
24 | std::string requiredHomonymId; | 67 | std::string requiredHomonymId; |
25 | }; | 68 | }; |
26 | 69 |
morfeusz/InterpsGroup.hpp
@@ -10,6 +10,10 @@ | @@ -10,6 +10,10 @@ | ||
10 | 10 | ||
11 | #include <stdint.h> | 11 | #include <stdint.h> |
12 | 12 | ||
13 | +/** | ||
14 | + * A structure representing one segment | ||
15 | + * with pointer to its interpretations, case patterns etc. | ||
16 | + */ | ||
13 | struct InterpsGroup { | 17 | struct InterpsGroup { |
14 | unsigned char type; | 18 | unsigned char type; |
15 | uint16_t size; | 19 | uint16_t size; |
morfeusz/Morfeusz.cpp
@@ -11,15 +11,13 @@ | @@ -11,15 +11,13 @@ | ||
11 | #include "utils.hpp" | 11 | #include "utils.hpp" |
12 | #include "data/default_fsa.hpp" | 12 | #include "data/default_fsa.hpp" |
13 | #include "Morfeusz.hpp" | 13 | #include "Morfeusz.hpp" |
14 | -#include "decoder/InterpretedChunksDecoder.hpp" | 14 | +#include "deserialization/morphInterps/InterpretedChunksDecoder.hpp" |
15 | #include "charset/CharsetConverter.hpp" | 15 | #include "charset/CharsetConverter.hpp" |
16 | #include "charset/charset_utils.hpp" | 16 | #include "charset/charset_utils.hpp" |
17 | -#include "charset/CaseConverter.hpp" | 17 | +#include "case/CaseConverter.hpp" |
18 | #include "segrules/segrules.hpp" | 18 | #include "segrules/segrules.hpp" |
19 | #include "const.hpp" | 19 | #include "const.hpp" |
20 | -#include "deserializationUtils.hpp" | ||
21 | #include "charset/utf8.h" | 20 | #include "charset/utf8.h" |
22 | -#include "compressionByteUtils.hpp" | ||
23 | 21 | ||
24 | // TODO - konstruktor kopiujący działający Tak-Jak-Trzeba | 22 | // TODO - konstruktor kopiujący działający Tak-Jak-Trzeba |
25 | 23 |
morfeusz/Morfeusz.hpp
@@ -13,12 +13,11 @@ | @@ -13,12 +13,11 @@ | ||
13 | #include <vector> | 13 | #include <vector> |
14 | #include <map> | 14 | #include <map> |
15 | #include <set> | 15 | #include <set> |
16 | -#include "EncodedInterpretation.hpp" | ||
17 | #include "fsa/fsa.hpp" | 16 | #include "fsa/fsa.hpp" |
18 | #include "MorphInterpretation.hpp" | 17 | #include "MorphInterpretation.hpp" |
19 | #include "InterpsGroup.hpp" | 18 | #include "InterpsGroup.hpp" |
19 | +#include "case/CaseConverter.hpp" | ||
20 | #include "charset/CharsetConverter.hpp" | 20 | #include "charset/CharsetConverter.hpp" |
21 | -#include "charset/CaseConverter.hpp" | ||
22 | #include "charset/TextReader.hpp" | 21 | #include "charset/TextReader.hpp" |
23 | #include "InterpretedChunk.hpp" | 22 | #include "InterpretedChunk.hpp" |
24 | #include "InflexionGraph.hpp" | 23 | #include "InflexionGraph.hpp" |
@@ -30,8 +29,8 @@ | @@ -30,8 +29,8 @@ | ||
30 | #include "segrules/segrules.hpp" | 29 | #include "segrules/segrules.hpp" |
31 | #include "segrules/SegrulesFSA.hpp" | 30 | #include "segrules/SegrulesFSA.hpp" |
32 | 31 | ||
33 | -#include "deserializer/InterpsGroupsReader.hpp" | ||
34 | -#include "deserializer/MorphDeserializer.hpp" | 32 | +#include "deserialization/InterpsGroupsReader.hpp" |
33 | +#include "deserialization/MorphDeserializer.hpp" | ||
35 | 34 | ||
36 | class Morfeusz; | 35 | class Morfeusz; |
37 | class ResultsIterator; | 36 | class ResultsIterator; |
morfeusz/MorfeuszOptions.hpp
@@ -10,6 +10,9 @@ | @@ -10,6 +10,9 @@ | ||
10 | 10 | ||
11 | #include "const.hpp" | 11 | #include "const.hpp" |
12 | 12 | ||
13 | +/** | ||
14 | + * Represents options for Morfeusz analyzer/generator. | ||
15 | + */ | ||
13 | struct MorfeuszOptions { | 16 | struct MorfeuszOptions { |
14 | bool caseSensitive; | 17 | bool caseSensitive; |
15 | MorfeuszCharset encoding; | 18 | MorfeuszCharset encoding; |
morfeusz/MorfeuszConfig.hpp.in renamed to morfeusz/MorfeuszVersion.hpp.in
@@ -5,6 +5,4 @@ | @@ -5,6 +5,4 @@ | ||
5 | * Created on November 29, 2013, 10:03 PM | 5 | * Created on November 29, 2013, 10:03 PM |
6 | */ | 6 | */ |
7 | 7 | ||
8 | -#define Morfeusz_VERSION_MAJOR @Morfeusz_VERSION_MAJOR@ | ||
9 | -#define Morfeusz_VERSION_MINOR @Morfeusz_VERSION_MINOR@ | ||
10 | - | 8 | +#define MORFEUSZ_VERSION "@Morfeusz_VERSION@" |
morfeusz/MorphDeserializer.cpp deleted
1 | -/* | ||
2 | - * File: MorphDeserializer.cpp | ||
3 | - * Author: mlenart | ||
4 | - * | ||
5 | - * Created on 12 listopad 2013, 15:31 | ||
6 | - */ | ||
7 | - | ||
8 | -#include <map> | ||
9 | -#include <algorithm> | ||
10 | -#include "MorphDeserializer.hpp" | ||
11 | -#include "EncodedInterpretation.hpp" | ||
12 | -#include "InterpsGroup.hpp" | ||
13 | -#include "deserializationUtils.hpp" | ||
14 | - | ||
15 | -MorphDeserializer::MorphDeserializer() { | ||
16 | -} | ||
17 | - | ||
18 | -MorphDeserializer::~MorphDeserializer() { | ||
19 | -} | ||
20 | - | ||
21 | -long MorphDeserializer::deserialize(const unsigned char* ptr, vector<InterpsGroup>& interps) const { | ||
22 | - const unsigned char* currPtr = ptr; | ||
23 | - uint8_t interpTypesNum = readInt8(currPtr); | ||
24 | - interps.clear(); | ||
25 | - interps.reserve(interpTypesNum); | ||
26 | - for (unsigned int i = 0; i < interpTypesNum; i++) { | ||
27 | - InterpsGroup ig; | ||
28 | - ig.type = readInt8(currPtr); | ||
29 | - ig.size = readInt16(currPtr); | ||
30 | - ig.ptr = currPtr; | ||
31 | - currPtr += ig.size; | ||
32 | - interps.push_back(ig); | ||
33 | - } | ||
34 | - return currPtr - ptr; | ||
35 | -} |
morfeusz/MorphInterpretation.cpp
@@ -8,7 +8,6 @@ | @@ -8,7 +8,6 @@ | ||
8 | #include <string> | 8 | #include <string> |
9 | #include <sstream> | 9 | #include <sstream> |
10 | #include "MorphInterpretation.hpp" | 10 | #include "MorphInterpretation.hpp" |
11 | -#include "EncodedInterpretation.hpp" | ||
12 | #include "const.hpp" | 11 | #include "const.hpp" |
13 | 12 | ||
14 | using namespace std; | 13 | using namespace std; |
@@ -32,11 +31,13 @@ tagnum(tagnum), | @@ -32,11 +31,13 @@ tagnum(tagnum), | ||
32 | namenum(namenum), | 31 | namenum(namenum), |
33 | tag(env.getTagset().getTag(tagnum, env.getCharsetConverter())), | 32 | tag(env.getTagset().getTag(tagnum, env.getCharsetConverter())), |
34 | name(env.getTagset().getName(namenum, env.getCharsetConverter())), | 33 | name(env.getTagset().getName(namenum, env.getCharsetConverter())), |
35 | -qualifiers(env.getQualifiersHelper().getQualifiers(qualifiersNum)) { | 34 | +qualifiers(&env.getQualifiersHelper().getQualifiers(qualifiersNum)) { |
36 | 35 | ||
37 | 36 | ||
38 | } | 37 | } |
39 | 38 | ||
39 | +static const vector<std::string> emptyQualifiers; | ||
40 | + | ||
40 | MorphInterpretation::MorphInterpretation() | 41 | MorphInterpretation::MorphInterpretation() |
41 | : startNode(), | 42 | : startNode(), |
42 | endNode(), | 43 | endNode(), |
@@ -47,7 +48,7 @@ tagnum(), | @@ -47,7 +48,7 @@ tagnum(), | ||
47 | namenum(), | 48 | namenum(), |
48 | tag(), | 49 | tag(), |
49 | name(), | 50 | name(), |
50 | -qualifiers(){ | 51 | +qualifiers(&emptyQualifiers){ |
51 | 52 | ||
52 | } | 53 | } |
53 | 54 | ||
@@ -65,7 +66,7 @@ namenum(0), | @@ -65,7 +66,7 @@ namenum(0), | ||
65 | // qualifiersNum(0), | 66 | // qualifiersNum(0), |
66 | tag(env.getTagset().getTag(0, env.getCharsetConverter())), | 67 | tag(env.getTagset().getTag(0, env.getCharsetConverter())), |
67 | name(env.getTagset().getName(0, env.getCharsetConverter())), | 68 | name(env.getTagset().getName(0, env.getCharsetConverter())), |
68 | -qualifiers() { | 69 | +qualifiers(&emptyQualifiers) { |
69 | 70 | ||
70 | } | 71 | } |
71 | 72 | ||
@@ -126,7 +127,7 @@ const std::string& MorphInterpretation::getName() const { | @@ -126,7 +127,7 @@ const std::string& MorphInterpretation::getName() const { | ||
126 | } | 127 | } |
127 | 128 | ||
128 | const vector<string>& MorphInterpretation::getQualifiers() const { | 129 | const vector<string>& MorphInterpretation::getQualifiers() const { |
129 | - return this->qualifiers; | 130 | + return *this->qualifiers; |
130 | } | 131 | } |
131 | 132 | ||
132 | static inline string getQualifiersStr(const MorphInterpretation& mi) { | 133 | static inline string getQualifiersStr(const MorphInterpretation& mi) { |
@@ -157,7 +158,7 @@ std::string MorphInterpretation::toString(bool includeNodeNumbers) const { | @@ -157,7 +158,7 @@ std::string MorphInterpretation::toString(bool includeNodeNumbers) const { | ||
157 | if (!name.empty()) { | 158 | if (!name.empty()) { |
158 | res << "," << name; | 159 | res << "," << name; |
159 | } | 160 | } |
160 | - if (!qualifiers.empty()) { | 161 | + if (!qualifiers->empty()) { |
161 | res << "," << getQualifiersStr(*this); | 162 | res << "," << getQualifiersStr(*this); |
162 | } | 163 | } |
163 | return res.str(); | 164 | return res.str(); |
morfeusz/MorphInterpretation.hpp
@@ -13,10 +13,12 @@ | @@ -13,10 +13,12 @@ | ||
13 | class Environment; | 13 | class Environment; |
14 | 14 | ||
15 | #include "Tagset.hpp" | 15 | #include "Tagset.hpp" |
16 | -#include "EncodedInterpretation.hpp" | ||
17 | #include "charset/CharsetConverter.hpp" | 16 | #include "charset/CharsetConverter.hpp" |
18 | #include "Environment.hpp" | 17 | #include "Environment.hpp" |
19 | 18 | ||
19 | +/** | ||
20 | + * Morphological interpretation as seen by the user in the analysis/generation results. | ||
21 | + */ | ||
20 | class MorphInterpretation { | 22 | class MorphInterpretation { |
21 | public: | 23 | public: |
22 | MorphInterpretation( | 24 | MorphInterpretation( |
@@ -59,7 +61,7 @@ private: | @@ -59,7 +61,7 @@ private: | ||
59 | int namenum; | 61 | int namenum; |
60 | std::string tag; | 62 | std::string tag; |
61 | std::string name; | 63 | std::string name; |
62 | - std::vector<std::string> qualifiers; | 64 | + const std::vector<std::string>* qualifiers; |
63 | }; | 65 | }; |
64 | 66 | ||
65 | #endif /* MORPHINTERPRETATION_HPP */ | 67 | #endif /* MORPHINTERPRETATION_HPP */ |
morfeusz/Qualifiers.cpp
@@ -7,7 +7,7 @@ | @@ -7,7 +7,7 @@ | ||
7 | 7 | ||
8 | #include <iostream> | 8 | #include <iostream> |
9 | #include "Qualifiers.hpp" | 9 | #include "Qualifiers.hpp" |
10 | -#include "deserializationUtils.hpp" | 10 | +#include "deserialization/deserializationUtils.hpp" |
11 | #include "fsa/const.hpp" | 11 | #include "fsa/const.hpp" |
12 | 12 | ||
13 | using namespace std; | 13 | using namespace std; |
@@ -32,13 +32,9 @@ qualifiers() { | @@ -32,13 +32,9 @@ qualifiers() { | ||
32 | } | 32 | } |
33 | } | 33 | } |
34 | 34 | ||
35 | -vector<string> Qualifiers::getQualifiers(int n) const { | 35 | +const vector<string>& Qualifiers::getQualifiers(int n) const { |
36 | return this->qualifiers.at(n); | 36 | return this->qualifiers.at(n); |
37 | } | 37 | } |
38 | 38 | ||
39 | -unsigned int Qualifiers::getQualifiersNum() const { | ||
40 | - return (unsigned int) this->qualifiers.size(); | ||
41 | -} | ||
42 | - | ||
43 | Qualifiers::~Qualifiers() { | 39 | Qualifiers::~Qualifiers() { |
44 | } | 40 | } |
morfeusz/Qualifiers.hpp
@@ -12,11 +12,21 @@ | @@ -12,11 +12,21 @@ | ||
12 | #include <string> | 12 | #include <string> |
13 | #include <stdint.h> | 13 | #include <stdint.h> |
14 | 14 | ||
15 | +/** | ||
16 | + * Helper class used for decoding qualifiers set number into a vector of strings. | ||
17 | + * | ||
18 | + * @param ptr | ||
19 | + */ | ||
15 | class Qualifiers { | 20 | class Qualifiers { |
16 | public: | 21 | public: |
17 | explicit Qualifiers(const unsigned char* ptr); | 22 | explicit Qualifiers(const unsigned char* ptr); |
18 | - std::vector<std::string> getQualifiers(int n) const; | ||
19 | - unsigned int getQualifiersNum() const; | 23 | + |
24 | + /** | ||
25 | + * Returns vector of qualifiers represented as strings. | ||
26 | + * @param n - the index in qualifiers tab. | ||
27 | + * @return - vector of qualifiers represented as strings. | ||
28 | + */ | ||
29 | + const std::vector<std::string>& getQualifiers(int n) const; | ||
20 | virtual ~Qualifiers(); | 30 | virtual ~Qualifiers(); |
21 | private: | 31 | private: |
22 | std::vector< std::vector<std::string> > qualifiers; | 32 | std::vector< std::vector<std::string> > qualifiers; |
morfeusz/Tagset.cpp
@@ -3,8 +3,7 @@ | @@ -3,8 +3,7 @@ | ||
3 | #include "Tagset.hpp" | 3 | #include "Tagset.hpp" |
4 | #include "fsa/const.hpp" | 4 | #include "fsa/const.hpp" |
5 | #include "utils.hpp" | 5 | #include "utils.hpp" |
6 | -#include "endianness.hpp" | ||
7 | -#include "deserializationUtils.hpp" | 6 | +#include "deserialization/deserializationUtils.hpp" |
8 | 7 | ||
9 | using namespace std; | 8 | using namespace std; |
10 | 9 |
morfeusz/Tagset.hpp
@@ -12,11 +12,34 @@ | @@ -12,11 +12,34 @@ | ||
12 | #include <vector> | 12 | #include <vector> |
13 | #include "charset/CharsetConverter.hpp" | 13 | #include "charset/CharsetConverter.hpp" |
14 | 14 | ||
15 | +/** | ||
16 | + * Represents a tagset | ||
17 | + */ | ||
15 | class Tagset { | 18 | class Tagset { |
16 | public: | 19 | public: |
20 | + /** | ||
21 | + * Constructs a tagset from binary data. | ||
22 | + * | ||
23 | + * @param fsaData - pointer to the beginning of automaton data. | ||
24 | + */ | ||
17 | explicit Tagset(const unsigned char* fsaData); | 25 | explicit Tagset(const unsigned char* fsaData); |
18 | -// Tagset(const Tagset& tagset); | 26 | + |
27 | + /** | ||
28 | + * Returns tag (denoted by its index) as a string. | ||
29 | + * | ||
30 | + * @param tagNum - tag index in the tagset. | ||
31 | + * @param charsetConverter - the charset converter used to convert from tagset internal encoding (UTF-8) into target encoding. | ||
32 | + * @return - the tag encoded as string. | ||
33 | + */ | ||
19 | const std::string getTag(const int tagNum, const CharsetConverter& charsetConverter) const; | 34 | const std::string getTag(const int tagNum, const CharsetConverter& charsetConverter) const; |
35 | + | ||
36 | + /** | ||
37 | + * Returns named entity type (denoted by its index) as a string. | ||
38 | + * | ||
39 | + * @param nameNum - name index in the tagset. | ||
40 | + * @param charsetConverter - the charset converter used to convert from tagset internal encoding (UTF-8) into target encoding. | ||
41 | + * @return - the named entity type encoded as string. | ||
42 | + */ | ||
20 | const std::string getName(const int nameNum, const CharsetConverter& charsetConverter) const; | 43 | const std::string getName(const int nameNum, const CharsetConverter& charsetConverter) const; |
21 | private: | 44 | private: |
22 | std::vector<std::string> tags; | 45 | std::vector<std::string> tags; |
morfeusz/charset/CaseConverter.cpp renamed to morfeusz/case/CaseConverter.cpp
morfeusz/charset/CaseConverter.hpp renamed to morfeusz/case/CaseConverter.hpp
morfeusz/CasePatternHelper.cpp renamed to morfeusz/case/CasePatternHelper.cpp
morfeusz/CasePatternHelper.hpp renamed to morfeusz/case/CasePatternHelper.hpp
@@ -10,12 +10,15 @@ | @@ -10,12 +10,15 @@ | ||
10 | 10 | ||
11 | #include <vector> | 11 | #include <vector> |
12 | #include "InterpsGroup.hpp" | 12 | #include "InterpsGroup.hpp" |
13 | -#include "CasePatternHelper.hpp" | ||
14 | -#include "compressionByteUtils.hpp" | 13 | +#include "deserialization/morphInterps/compressionByteUtils.hpp" |
15 | #include "Environment.hpp" | 14 | #include "Environment.hpp" |
16 | 15 | ||
17 | class Environment; | 16 | class Environment; |
18 | 17 | ||
18 | +/** | ||
19 | + * Utility class used to for case-sensitive interpretations filtering | ||
20 | + * (ie. to filter out "berlin" and keep "Berlin") | ||
21 | + */ | ||
19 | class CasePatternHelper { | 22 | class CasePatternHelper { |
20 | public: | 23 | public: |
21 | 24 | ||
@@ -23,10 +26,23 @@ public: | @@ -23,10 +26,23 @@ public: | ||
23 | 26 | ||
24 | } | 27 | } |
25 | 28 | ||
29 | + /** | ||
30 | + * Set if this case pattern helper cares about case-sensitivity | ||
31 | + * | ||
32 | + * @param caseSensitive | ||
33 | + */ | ||
26 | void setCaseSensitive(bool caseSensitive) { | 34 | void setCaseSensitive(bool caseSensitive) { |
27 | this->caseSensitive = caseSensitive; | 35 | this->caseSensitive = caseSensitive; |
28 | } | 36 | } |
29 | - | 37 | + |
38 | + /** | ||
39 | + * Check if given word matches given case pattern | ||
40 | + * | ||
41 | + * @param lowercaseCodepoints - codepoints of checked word converter to lowercase | ||
42 | + * @param originalCodepoints - codepoints of checked word | ||
43 | + * @param casePattern - vector representing case pattern ( ie. [False, True] for "mBank") | ||
44 | + * @return - true iff word denoted by given codepoints matches given case pattern | ||
45 | + */ | ||
30 | bool checkCasePattern( | 46 | bool checkCasePattern( |
31 | const std::vector<uint32_t>& lowercaseCodepoints, | 47 | const std::vector<uint32_t>& lowercaseCodepoints, |
32 | const std::vector<uint32_t>& originalCodepoints, | 48 | const std::vector<uint32_t>& originalCodepoints, |
@@ -41,12 +57,28 @@ public: | @@ -41,12 +57,28 @@ public: | ||
41 | return true; | 57 | return true; |
42 | } | 58 | } |
43 | 59 | ||
60 | + /** | ||
61 | + * Check if given word has a chance of matching any of case patterns in given interps group. | ||
62 | + * | ||
63 | + * @param env - environment | ||
64 | + * @param orthStart - pointer to start of word | ||
65 | + * @param orthEnd - pointer to end of word | ||
66 | + * @param ig - interps group | ||
67 | + * @return - true iff word encoded from orthStart to orthEnd | ||
68 | + * matches at least one of the interp group's morph interpretation's case pattern. | ||
69 | + */ | ||
44 | bool checkInterpsGroupOrthCasePatterns( | 70 | bool checkInterpsGroupOrthCasePatterns( |
45 | const Environment& env, | 71 | const Environment& env, |
46 | const char* orthStart, | 72 | const char* orthStart, |
47 | const char* orthEnd, | 73 | const char* orthEnd, |
48 | const InterpsGroup& ig) const; | 74 | const InterpsGroup& ig) const; |
49 | 75 | ||
76 | + /** | ||
77 | + * Deserializes case pattern encoded at given pointer. | ||
78 | + * | ||
79 | + * @param ptr | ||
80 | + * @return - case pattern | ||
81 | + */ | ||
50 | static std::vector<bool> deserializeOneCasePattern(const unsigned char*& ptr); | 82 | static std::vector<bool> deserializeOneCasePattern(const unsigned char*& ptr); |
51 | private: | 83 | private: |
52 | bool caseSensitive; | 84 | bool caseSensitive; |
morfeusz/charset/caseconv.cpp renamed to morfeusz/case/caseconv.cpp
morfeusz/charset/caseconv.hpp renamed to morfeusz/case/caseconv.hpp
@@ -8,6 +8,10 @@ | @@ -8,6 +8,10 @@ | ||
8 | #ifndef CASECONV_HPP | 8 | #ifndef CASECONV_HPP |
9 | #define CASECONV_HPP | 9 | #define CASECONV_HPP |
10 | 10 | ||
11 | +/* | ||
12 | + * Case conversion tables | ||
13 | + */ | ||
14 | + | ||
11 | extern const unsigned int TO_LOWERCASE_TABLE_SIZE; | 15 | extern const unsigned int TO_LOWERCASE_TABLE_SIZE; |
12 | extern const unsigned int EXT_TO_LOWERCASE_TABLE_SIZE; | 16 | extern const unsigned int EXT_TO_LOWERCASE_TABLE_SIZE; |
13 | extern const uint32_t TO_LOWERCASE_TABLE[]; | 17 | extern const uint32_t TO_LOWERCASE_TABLE[]; |
morfeusz/charset/CharsetConverter.cpp
@@ -4,7 +4,7 @@ | @@ -4,7 +4,7 @@ | ||
4 | #include <algorithm> | 4 | #include <algorithm> |
5 | #include <inttypes.h> | 5 | #include <inttypes.h> |
6 | #include <iostream> | 6 | #include <iostream> |
7 | -#include "../endianness.hpp" | 7 | +#include "deserialization/endianness.hpp" |
8 | #include "utf8.h" | 8 | #include "utf8.h" |
9 | #include "CharsetConverter.hpp" | 9 | #include "CharsetConverter.hpp" |
10 | #include "conversion_tables.hpp" | 10 | #include "conversion_tables.hpp" |
morfeusz/charset/TextReader.hpp
morfeusz/outputUtils.hpp renamed to morfeusz/cli/outputUtils.hpp
morfeusz/deserializer/InterpsGroupsReader.cpp renamed to morfeusz/deserialization/InterpsGroupsReader.cpp
@@ -6,7 +6,7 @@ | @@ -6,7 +6,7 @@ | ||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include "InterpsGroupsReader.hpp" | 8 | #include "InterpsGroupsReader.hpp" |
9 | -#include "../deserializationUtils.hpp" | 9 | +#include "deserialization/deserializationUtils.hpp" |
10 | 10 | ||
11 | InterpsGroupsReader::InterpsGroupsReader() | 11 | InterpsGroupsReader::InterpsGroupsReader() |
12 | : currPtr(NULL), endPtr(NULL) { | 12 | : currPtr(NULL), endPtr(NULL) { |
morfeusz/deserializer/InterpsGroupsReader.hpp renamed to morfeusz/deserialization/InterpsGroupsReader.hpp
morfeusz/deserializer/MorphDeserializer.cpp renamed to morfeusz/deserialization/MorphDeserializer.cpp
@@ -6,7 +6,7 @@ | @@ -6,7 +6,7 @@ | ||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include "MorphDeserializer.hpp" | 8 | #include "MorphDeserializer.hpp" |
9 | -#include "../deserializationUtils.hpp" | 9 | +#include "deserialization/deserializationUtils.hpp" |
10 | 10 | ||
11 | MorphDeserializer::MorphDeserializer() { | 11 | MorphDeserializer::MorphDeserializer() { |
12 | } | 12 | } |
morfeusz/deserializer/MorphDeserializer.hpp renamed to morfeusz/deserialization/MorphDeserializer.hpp
morfeusz/deserializationUtils.hpp renamed to morfeusz/deserialization/deserializationUtils.hpp
morfeusz/endianness.hpp renamed to morfeusz/deserialization/endianness.hpp
morfeusz/EncodedInterpretation.hpp renamed to morfeusz/deserialization/morphInterps/EncodedInterpretation.hpp
morfeusz/decoder/InterpretedChunksDecoder.cpp renamed to morfeusz/deserialization/morphInterps/InterpretedChunksDecoder.cpp
morfeusz/decoder/InterpretedChunksDecoder.hpp renamed to morfeusz/deserialization/morphInterps/InterpretedChunksDecoder.hpp
@@ -16,11 +16,10 @@ | @@ -16,11 +16,10 @@ | ||
16 | #include "EncodedInterpretation.hpp" | 16 | #include "EncodedInterpretation.hpp" |
17 | #include "InterpretedChunk.hpp" | 17 | #include "InterpretedChunk.hpp" |
18 | #include "EncodedInterpretation.hpp" | 18 | #include "EncodedInterpretation.hpp" |
19 | -#include "charset/CaseConverter.hpp" | 19 | +#include "case/CaseConverter.hpp" |
20 | #include "Environment.hpp" | 20 | #include "Environment.hpp" |
21 | #include "MorphInterpretation.hpp" | 21 | #include "MorphInterpretation.hpp" |
22 | -#include "CasePatternHelper.hpp" | ||
23 | -#include "deserializationUtils.hpp" | 22 | +#include "case/CasePatternHelper.hpp" |
24 | #include "compressionByteUtils.hpp" | 23 | #include "compressionByteUtils.hpp" |
25 | #include "const.hpp" | 24 | #include "const.hpp" |
26 | 25 |
morfeusz/decoder/InterpretedChunksDecoder4Analyzer.cpp renamed to morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.cpp
morfeusz/decoder/InterpretedChunksDecoder4Analyzer.hpp renamed to morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.hpp
morfeusz/decoder/InterpretedChunksDecoder4Generator.cpp renamed to morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Generator.cpp
morfeusz/decoder/InterpretedChunksDecoder4Generator.hpp renamed to morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Generator.hpp
morfeusz/compressionByteUtils.hpp renamed to morfeusz/deserialization/morphInterps/compressionByteUtils.hpp
morfeusz/fsa/cfsa1_impl.hpp
@@ -12,7 +12,7 @@ | @@ -12,7 +12,7 @@ | ||
12 | #include <climits> | 12 | #include <climits> |
13 | 13 | ||
14 | #include "fsa.hpp" | 14 | #include "fsa.hpp" |
15 | -#include "../deserializationUtils.hpp" | 15 | +#include "../deserialization/deserializationUtils.hpp" |
16 | 16 | ||
17 | static const unsigned char CFSA1_ACCEPTING_FLAG = 128; | 17 | static const unsigned char CFSA1_ACCEPTING_FLAG = 128; |
18 | //static const unsigned char CFSA1_ARRAY_FLAG = 64; | 18 | //static const unsigned char CFSA1_ARRAY_FLAG = 64; |
morfeusz/fsa/cfsa2_impl.hpp
@@ -13,7 +13,7 @@ | @@ -13,7 +13,7 @@ | ||
13 | #include <iostream> | 13 | #include <iostream> |
14 | #include "fsa.hpp" | 14 | #include "fsa.hpp" |
15 | #include "../utils.hpp" | 15 | #include "../utils.hpp" |
16 | -#include "../endianness.hpp" | 16 | +#include "../deserialization/endianness.hpp" |
17 | 17 | ||
18 | static const unsigned char HAS_REMAINING_FLAG = 128; | 18 | static const unsigned char HAS_REMAINING_FLAG = 128; |
19 | static const unsigned char ACCEPTING_FLAG = 64; | 19 | static const unsigned char ACCEPTING_FLAG = 64; |
morfeusz/fsa/fsa_impl.hpp
@@ -17,7 +17,7 @@ | @@ -17,7 +17,7 @@ | ||
17 | #include <sstream> | 17 | #include <sstream> |
18 | #include "const.hpp" | 18 | #include "const.hpp" |
19 | #include "../utils.hpp" | 19 | #include "../utils.hpp" |
20 | -#include "../endianness.hpp" | 20 | +#include "../deserialization/endianness.hpp" |
21 | 21 | ||
22 | //using namespace std; | 22 | //using namespace std; |
23 | //static const unsigned int FSA_OFFSET = 6; | 23 | //static const unsigned int FSA_OFFSET = 6; |
morfeusz/java/dupa deleted
1 | - | ||
2 | -# SWIG | ||
3 | -#set(CMAKE_SWIG_OUTDIR swig) | ||
4 | -#FIND_PACKAGE(SWIG REQUIRED) | ||
5 | -FIND_PACKAGE(JNI REQUIRED) | ||
6 | -#INCLUDE(${SWIG_USE_FILE}) | ||
7 | -include(UseJava) | ||
8 | - | ||
9 | -# SWIG Java | ||
10 | -include_directories (${JAVA_INCLUDE_PATH}) | ||
11 | -include_directories (..) | ||
12 | - | ||
13 | -set (SWIG_JAVA_OUTFILE swigJAVA.cpp) | ||
14 | -# set (JAVA_WRAPPER_FILE ${CMAKE_SHARED_LIBRARY_PREFIX}morfeusz${CMAKE_SHARED_LIBRARY_SUFFIX}) | ||
15 | -add_custom_command ( | ||
16 | - OUTPUT ${SWIG_JAVA_OUTFILE} | ||
17 | - COMMAND swig -java -c++ -package pl.waw.ipipan.morfeusz -o ${SWIG_JAVA_OUTFILE} -outdir ${CMAKE_SOURCE_DIR}/jmorfeusz/src/main/java/pl/waw/ipipan/morfeusz ${CMAKE_SOURCE_DIR}/morfeusz/morfeusz.i | ||
18 | - DEPENDS libmorfeusz | ||
19 | -) | ||
20 | -#set (CMAKE_SHARED_LINKER_FLAGS "-s -Os -static-libstdc++ -static-libgcc") | ||
21 | -add_library (jmorfeusz SHARED ${SWIG_JAVA_OUTFILE}) | ||
22 | -target_link_libraries (jmorfeusz ${JAVA_LIBRARIES} libmorfeusz) | ||
23 | -add_dependencies (jmorfeusz ${SWIG_JAVA_OUTFILE}) | ||
24 | - | ||
25 | -#set (CMAKE_SWIG_FLAGS -package pl.waw.ipipan.morfeusz) | ||
26 | -#set (CMAKE_SWIG_OUTDIR ${CMAKE_SOURCE_DIR}/jmorfeusz/src/main/java/pl/waw/ipipan/morfeusz) | ||
27 | - | ||
28 | -#set_source_files_properties (../morfeusz.i PROPERTIES CPLUSPLUS ON) | ||
29 | -#SWIG_ADD_MODULE(jmorfeusz java ../morfeusz.i) | ||
30 | -#SWIG_LINK_LIBRARIES(jmorfeusz ${JAVA_LIBRARIES}) | ||
31 | -#SWIG_LINK_LIBRARIES(jmorfeusz libmorfeusz) | ||
32 | - | ||
33 | -#if (${CMAKE_SYSTEM_NAME} MATCHES "Windows") | ||
34 | -# set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | ||
35 | -# set (CMAKE_SHARED_LINKER_FLAGS "-s -Os -static-libstdc++ -static-libgcc") | ||
36 | -#endif () |
morfeusz/morfeusz_analyzer.cpp
@@ -12,14 +12,16 @@ | @@ -12,14 +12,16 @@ | ||
12 | #include "fsa/fsa.hpp" | 12 | #include "fsa/fsa.hpp" |
13 | #include "Tagset.hpp" | 13 | #include "Tagset.hpp" |
14 | #include "Morfeusz.hpp" | 14 | #include "Morfeusz.hpp" |
15 | +#include "MorfeuszVersion.hpp" | ||
15 | #include "const.hpp" | 16 | #include "const.hpp" |
16 | 17 | ||
17 | #include "cli/cli.hpp" | 18 | #include "cli/cli.hpp" |
18 | -#include "outputUtils.hpp" | 19 | +#include "cli/outputUtils.hpp" |
19 | 20 | ||
20 | using namespace std; | 21 | using namespace std; |
21 | 22 | ||
22 | int main(int argc, const char** argv) { | 23 | int main(int argc, const char** argv) { |
24 | + cerr << "Morfeusz analyzer, version: " << MORFEUSZ_VERSION << endl; | ||
23 | ez::ezOptionParser& opt = *getOptions(argc, argv, ANALYZER); | 25 | ez::ezOptionParser& opt = *getOptions(argc, argv, ANALYZER); |
24 | Morfeusz morfeusz; | 26 | Morfeusz morfeusz; |
25 | initializeMorfeusz(opt, morfeusz); | 27 | initializeMorfeusz(opt, morfeusz); |
morfeusz/morfeusz_generator.cpp
@@ -11,13 +11,15 @@ | @@ -11,13 +11,15 @@ | ||
11 | #include "fsa/fsa.hpp" | 11 | #include "fsa/fsa.hpp" |
12 | #include "Tagset.hpp" | 12 | #include "Tagset.hpp" |
13 | #include "Morfeusz.hpp" | 13 | #include "Morfeusz.hpp" |
14 | +#include "MorfeuszVersion.hpp" | ||
14 | #include "const.hpp" | 15 | #include "const.hpp" |
15 | #include "cli/cli.hpp" | 16 | #include "cli/cli.hpp" |
16 | -#include "outputUtils.hpp" | 17 | +#include "cli/outputUtils.hpp" |
17 | 18 | ||
18 | using namespace std; | 19 | using namespace std; |
19 | 20 | ||
20 | int main(int argc, const char** argv) { | 21 | int main(int argc, const char** argv) { |
22 | + cerr << "Morfeusz generator, version: " << MORFEUSZ_VERSION << endl; | ||
21 | ez::ezOptionParser& opt = *getOptions(argc, argv, GENERATOR); | 23 | ez::ezOptionParser& opt = *getOptions(argc, argv, GENERATOR); |
22 | Morfeusz morfeusz; | 24 | Morfeusz morfeusz; |
23 | initializeMorfeusz(opt, morfeusz); | 25 | initializeMorfeusz(opt, morfeusz); |
morfeusz/segrules/SegrulesFSA.hpp
@@ -10,7 +10,7 @@ | @@ -10,7 +10,7 @@ | ||
10 | 10 | ||
11 | #include <set> | 11 | #include <set> |
12 | #include <iostream> | 12 | #include <iostream> |
13 | -#include "../deserializationUtils.hpp" | 13 | +#include "../deserialization/deserializationUtils.hpp" |
14 | 14 | ||
15 | struct SegrulesState { | 15 | struct SegrulesState { |
16 | uint16_t offset; | 16 | uint16_t offset; |
morfeusz/segrules/segrules.cpp
1 | 1 | ||
2 | #include "segrules.hpp" | 2 | #include "segrules.hpp" |
3 | -#include "../fsa/fsa.hpp" | ||
4 | -#include "../fsa/const.hpp" | ||
5 | -#include "../deserializationUtils.hpp" | 3 | +#include "fsa/fsa.hpp" |
4 | +#include "fsa/const.hpp" | ||
5 | +#include "deserialization/deserializationUtils.hpp" | ||
6 | 6 | ||
7 | using namespace std; | 7 | using namespace std; |
8 | 8 |
morfeusz/consoleUtils.hpp renamed to morfeusz/test/consoleUtils.hpp
morfeusz/test_recognize_dict.cpp renamed to morfeusz/test/test_recognize_dict.cpp
@@ -8,7 +8,6 @@ | @@ -8,7 +8,6 @@ | ||
8 | //#include <cstdlib> | 8 | //#include <cstdlib> |
9 | #include <sstream> | 9 | #include <sstream> |
10 | #include <iostream> | 10 | #include <iostream> |
11 | -#include "EncodedInterpretation.hpp" | ||
12 | #include "utils.hpp" | 11 | #include "utils.hpp" |
13 | #include "Morfeusz.hpp" | 12 | #include "Morfeusz.hpp" |
14 | #include "MorphInterpretation.hpp" | 13 | #include "MorphInterpretation.hpp" |
morfeusz/test_result_equals.cpp renamed to morfeusz/test/test_result_equals.cpp
morfeusz/test_synth_dict.cpp deleted
morfeusz/wrappers/CMakeLists.txt
0 → 100644
morfeusz/java/CMakeLists.txt renamed to morfeusz/wrappers/java/CMakeLists.txt
@@ -6,14 +6,14 @@ find_package(Java REQUIRED) | @@ -6,14 +6,14 @@ find_package(Java REQUIRED) | ||
6 | 6 | ||
7 | include_directories (${JAVA_INCLUDE_PATH}) | 7 | include_directories (${JAVA_INCLUDE_PATH}) |
8 | include_directories (${JAVA_INCLUDE_PATH2}) | 8 | include_directories (${JAVA_INCLUDE_PATH2}) |
9 | -include_directories (..) | 9 | +include_directories (${CMAKE_SOURCE_DIR}/morfeusz) |
10 | 10 | ||
11 | set (SWIG_JAVA_OUTFILE "${CMAKE_CURRENT_BINARY_DIR}/swigJAVA.cpp") | 11 | set (SWIG_JAVA_OUTFILE "${CMAKE_CURRENT_BINARY_DIR}/swigJAVA.cpp") |
12 | file (COPY pl DESTINATION .) | 12 | file (COPY pl DESTINATION .) |
13 | set (JAVA_SRC_DIR "${CMAKE_CURRENT_BINARY_DIR}/pl/waw/ipipan/morfeusz") | 13 | set (JAVA_SRC_DIR "${CMAKE_CURRENT_BINARY_DIR}/pl/waw/ipipan/morfeusz") |
14 | add_custom_command ( | 14 | add_custom_command ( |
15 | OUTPUT ${SWIG_JAVA_OUTFILE} | 15 | OUTPUT ${SWIG_JAVA_OUTFILE} |
16 | - COMMAND swig -java -c++ -package pl.waw.ipipan.morfeusz -o ${SWIG_JAVA_OUTFILE} -outdir ${JAVA_SRC_DIR} ${CMAKE_SOURCE_DIR}/morfeusz/morfeusz.i | 16 | + COMMAND swig -java -c++ -package pl.waw.ipipan.morfeusz -o ${SWIG_JAVA_OUTFILE} -outdir ${JAVA_SRC_DIR} ${CMAKE_SOURCE_DIR}/morfeusz/wrappers/morfeusz.i |
17 | DEPENDS libmorfeusz | 17 | DEPENDS libmorfeusz |
18 | ) | 18 | ) |
19 | add_custom_target(generate_java_wrapper ALL | 19 | add_custom_target(generate_java_wrapper ALL |
morfeusz/java/README renamed to morfeusz/wrappers/java/README
morfeusz/java/pl/waw/ipipan/morfeusz/app/App.java renamed to morfeusz/wrappers/java/pl/waw/ipipan/morfeusz/app/App.java
morfeusz/java/pl/waw/ipipan/morfeusz/app/MorfeuszUtils.java renamed to morfeusz/wrappers/java/pl/waw/ipipan/morfeusz/app/MorfeuszUtils.java
morfeusz/morfeusz.i renamed to morfeusz/wrappers/morfeusz.i
@@ -91,10 +91,10 @@ import java.io.IOException; | @@ -91,10 +91,10 @@ import java.io.IOException; | ||
91 | 91 | ||
92 | %ignore Tagset::Tagset(const unsigned char* fsaData); | 92 | %ignore Tagset::Tagset(const unsigned char* fsaData); |
93 | 93 | ||
94 | -%include "Morfeusz.hpp" | ||
95 | -%include "MorphInterpretation.hpp" | ||
96 | -%include "const.hpp" | ||
97 | -%include "exceptions.hpp" | 94 | +%include "../Morfeusz.hpp" |
95 | +%include "../MorphInterpretation.hpp" | ||
96 | +%include "../const.hpp" | ||
97 | +%include "../exceptions.hpp" | ||
98 | 98 | ||
99 | // instantiate vector of interpretations | 99 | // instantiate vector of interpretations |
100 | namespace std { | 100 | namespace std { |
morfeusz/perl/CMakeLists.txt renamed to morfeusz/wrappers/perl/CMakeLists.txt
@@ -6,7 +6,7 @@ if (NOT CMAKE_CROSSCOMPILING) | @@ -6,7 +6,7 @@ if (NOT CMAKE_CROSSCOMPILING) | ||
6 | 6 | ||
7 | find_package (PerlLibs REQUIRED) | 7 | find_package (PerlLibs REQUIRED) |
8 | include_directories (${PERL_INCLUDE_PATH}) | 8 | include_directories (${PERL_INCLUDE_PATH}) |
9 | - include_directories (..) | 9 | + include_directories (../..) |
10 | 10 | ||
11 | set (CMAKE_SWIG_FLAGS "") | 11 | set (CMAKE_SWIG_FLAGS "") |
12 | 12 |
morfeusz/python/CMakeLists.txt renamed to morfeusz/wrappers/python/CMakeLists.txt
@@ -9,14 +9,14 @@ set (PYMORFEUSZ_VERSION "0.1.0") | @@ -9,14 +9,14 @@ set (PYMORFEUSZ_VERSION "0.1.0") | ||
9 | 9 | ||
10 | # SWIG Java | 10 | # SWIG Java |
11 | INCLUDE_DIRECTORIES (${PYTHON_INCLUDE_PATH}) | 11 | INCLUDE_DIRECTORIES (${PYTHON_INCLUDE_PATH}) |
12 | -INCLUDE_DIRECTORIES (..) | 12 | +INCLUDE_DIRECTORIES (../..) |
13 | 13 | ||
14 | 14 | ||
15 | set (SWIG_PYTHON_OUTFILE_CXX "${CMAKE_CURRENT_BINARY_DIR}/swigPYTHON.cpp") | 15 | set (SWIG_PYTHON_OUTFILE_CXX "${CMAKE_CURRENT_BINARY_DIR}/swigPYTHON.cpp") |
16 | set (SWIG_PYTHON_OUTFILE_PY "${CMAKE_CURRENT_BINARY_DIR}/morfeusz2.py") | 16 | set (SWIG_PYTHON_OUTFILE_PY "${CMAKE_CURRENT_BINARY_DIR}/morfeusz2.py") |
17 | add_custom_command ( | 17 | add_custom_command ( |
18 | OUTPUT "${SWIG_PYTHON_OUTFILE_CXX}" "${SWIG_PYTHON_OUTFILE_PY}" | 18 | OUTPUT "${SWIG_PYTHON_OUTFILE_CXX}" "${SWIG_PYTHON_OUTFILE_PY}" |
19 | - COMMAND swig -python -c++ -o "${SWIG_PYTHON_OUTFILE_CXX}" "${CMAKE_SOURCE_DIR}/morfeusz/morfeusz.i" | 19 | + COMMAND swig -python -c++ -o "${SWIG_PYTHON_OUTFILE_CXX}" "${CMAKE_SOURCE_DIR}/morfeusz/wrappers/morfeusz.i" |
20 | DEPENDS libmorfeusz | 20 | DEPENDS libmorfeusz |
21 | ) | 21 | ) |
22 | add_custom_target (generate_python_wrapper | 22 | add_custom_target (generate_python_wrapper |
morfeusz/python/setup.py.in renamed to morfeusz/wrappers/python/setup.py.in
nbproject/configurations.xml
@@ -5,6 +5,14 @@ | @@ -5,6 +5,14 @@ | ||
5 | displayName="build" | 5 | displayName="build" |
6 | projectFiles="true" | 6 | projectFiles="true" |
7 | root="build"> | 7 | root="build"> |
8 | + <logicalFolder name="morfeusz" displayName="morfeusz" projectFiles="true"> | ||
9 | + <logicalFolder name="wrappers" displayName="wrappers" projectFiles="true"> | ||
10 | + <logicalFolder name="java" displayName="java" projectFiles="true"> | ||
11 | + <itemPath>build/morfeusz/wrappers/java/swigJAVA.cpp</itemPath> | ||
12 | + </logicalFolder> | ||
13 | + <itemPath>build/morfeusz/wrappers/morfeuszPERL_wrap.cxx</itemPath> | ||
14 | + </logicalFolder> | ||
15 | + </logicalFolder> | ||
8 | <itemPath>build/default_fsa.cpp</itemPath> | 16 | <itemPath>build/default_fsa.cpp</itemPath> |
9 | <itemPath>build/default_synth_fsa.cpp</itemPath> | 17 | <itemPath>build/default_synth_fsa.cpp</itemPath> |
10 | </logicalFolder> | 18 | </logicalFolder> |
@@ -17,22 +25,25 @@ | @@ -17,22 +25,25 @@ | ||
17 | <itemPath>build1/morfeusz/java/swigJAVA.cpp</itemPath> | 25 | <itemPath>build1/morfeusz/java/swigJAVA.cpp</itemPath> |
18 | </logicalFolder> | 26 | </logicalFolder> |
19 | <df root="morfeusz" name="0"> | 27 | <df root="morfeusz" name="0"> |
20 | - <df name="charset"> | 28 | + <df name="case"> |
21 | <in>CaseConverter.cpp</in> | 29 | <in>CaseConverter.cpp</in> |
30 | + <in>CasePatternHelper.cpp</in> | ||
31 | + <in>caseconv.cpp</in> | ||
32 | + </df> | ||
33 | + <df name="charset"> | ||
22 | <in>CharsetConverter.cpp</in> | 34 | <in>CharsetConverter.cpp</in> |
23 | <in>TextReader.cpp</in> | 35 | <in>TextReader.cpp</in> |
24 | - <in>caseconv.cpp</in> | ||
25 | <in>conversion_tables.cpp</in> | 36 | <in>conversion_tables.cpp</in> |
26 | </df> | 37 | </df> |
27 | <df name="cli"> | 38 | <df name="cli"> |
28 | <in>cli.cpp</in> | 39 | <in>cli.cpp</in> |
29 | </df> | 40 | </df> |
30 | - <df name="decoder"> | ||
31 | - <in>InterpretedChunksDecoder.cpp</in> | ||
32 | - <in>InterpretedChunksDecoder4Analyzer.cpp</in> | ||
33 | - <in>InterpretedChunksDecoder4Generator.cpp</in> | ||
34 | - </df> | ||
35 | - <df name="deserializer"> | 41 | + <df name="deserialization"> |
42 | + <df name="morphInterps"> | ||
43 | + <in>InterpretedChunksDecoder.cpp</in> | ||
44 | + <in>InterpretedChunksDecoder4Analyzer.cpp</in> | ||
45 | + <in>InterpretedChunksDecoder4Generator.cpp</in> | ||
46 | + </df> | ||
36 | <in>InterpsGroupsReader.cpp</in> | 47 | <in>InterpsGroupsReader.cpp</in> |
37 | <in>MorphDeserializer.cpp</in> | 48 | <in>MorphDeserializer.cpp</in> |
38 | </df> | 49 | </df> |
@@ -46,20 +57,19 @@ | @@ -46,20 +57,19 @@ | ||
46 | <in>SegrulesFSA.cpp</in> | 57 | <in>SegrulesFSA.cpp</in> |
47 | <in>segrules.cpp</in> | 58 | <in>segrules.cpp</in> |
48 | </df> | 59 | </df> |
49 | - <in>CasePatternHelper.cpp</in> | 60 | + <df name="test"> |
61 | + <in>test_recognize_dict.cpp</in> | ||
62 | + <in>test_result_equals.cpp</in> | ||
63 | + </df> | ||
50 | <in>Environment.cpp</in> | 64 | <in>Environment.cpp</in> |
51 | <in>InflexionGraph.cpp</in> | 65 | <in>InflexionGraph.cpp</in> |
52 | <in>Morfeusz.cpp</in> | 66 | <in>Morfeusz.cpp</in> |
53 | - <in>MorphDeserializer.cpp</in> | ||
54 | <in>MorphInterpretation.cpp</in> | 67 | <in>MorphInterpretation.cpp</in> |
55 | <in>Qualifiers.cpp</in> | 68 | <in>Qualifiers.cpp</in> |
56 | <in>Tagset.cpp</in> | 69 | <in>Tagset.cpp</in> |
57 | <in>const.cpp</in> | 70 | <in>const.cpp</in> |
58 | - <in>main.cpp</in> | ||
59 | <in>morfeusz_analyzer.cpp</in> | 71 | <in>morfeusz_analyzer.cpp</in> |
60 | <in>morfeusz_generator.cpp</in> | 72 | <in>morfeusz_generator.cpp</in> |
61 | - <in>test_recognize_dict.cpp</in> | ||
62 | - <in>test_result_equals.cpp</in> | ||
63 | </df> | 73 | </df> |
64 | <logicalFolder name="morfeusz" | 74 | <logicalFolder name="morfeusz" |
65 | displayName="morfeusz" | 75 | displayName="morfeusz" |
@@ -101,10 +111,9 @@ | @@ -101,10 +111,9 @@ | ||
101 | <rebuildPropChanged>false</rebuildPropChanged> | 111 | <rebuildPropChanged>false</rebuildPropChanged> |
102 | </toolsSet> | 112 | </toolsSet> |
103 | <flagsDictionary> | 113 | <flagsDictionary> |
104 | - <element flagsID="0" commonFlags="-O2 -std=c++98"/> | ||
105 | - <element flagsID="1" commonFlags="-O2 -std=c++98 -fPIC"/> | ||
106 | - <element flagsID="2" commonFlags="-std=c++98 -O3"/> | ||
107 | - <element flagsID="3" commonFlags="-std=c++98 -O3 -fPIC"/> | 114 | + <element flagsID="0" commonFlags="-std=c++98 -O3"/> |
115 | + <element flagsID="1" commonFlags="-std=c++98 -O3 -fPIC"/> | ||
116 | + <element flagsID="2" commonFlags="3"/> | ||
108 | </flagsDictionary> | 117 | </flagsDictionary> |
109 | <codeAssistance> | 118 | <codeAssistance> |
110 | </codeAssistance> | 119 | </codeAssistance> |
@@ -114,18 +123,49 @@ | @@ -114,18 +123,49 @@ | ||
114 | <buildCommand>${MAKE} -f Makefile</buildCommand> | 123 | <buildCommand>${MAKE} -f Makefile</buildCommand> |
115 | <cleanCommand>${MAKE} -f Makefile clean</cleanCommand> | 124 | <cleanCommand>${MAKE} -f Makefile clean</cleanCommand> |
116 | <executablePath>build/morfeusz/morfeusz_analyzer</executablePath> | 125 | <executablePath>build/morfeusz/morfeusz_analyzer</executablePath> |
126 | + <ccTool flags="1"> | ||
127 | + <incDir> | ||
128 | + <pElem>build</pElem> | ||
129 | + <pElem>morfeusz</pElem> | ||
130 | + </incDir> | ||
131 | + <preprocessorList> | ||
132 | + <Elem>NDEBUG</Elem> | ||
133 | + </preprocessorList> | ||
134 | + </ccTool> | ||
117 | </makeTool> | 135 | </makeTool> |
118 | </makefileType> | 136 | </makefileType> |
119 | <item path="build/default_fsa.cpp" ex="false" tool="1" flavor2="4"> | 137 | <item path="build/default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
138 | + <ccTool> | ||
139 | + <incDir> | ||
140 | + <pElem>build/morfeusz</pElem> | ||
141 | + </incDir> | ||
142 | + <preprocessorList> | ||
143 | + <Elem>__PIC__=2</Elem> | ||
144 | + <Elem>__pic__=2</Elem> | ||
145 | + <Elem>libmorfeusz_EXPORTS</Elem> | ||
146 | + </preprocessorList> | ||
147 | + <undefinedList> | ||
148 | + <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem> | ||
149 | + </undefinedList> | ||
150 | + </ccTool> | ||
120 | </item> | 151 | </item> |
121 | <item path="build/default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> | 152 | <item path="build/default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> |
153 | + <ccTool> | ||
154 | + <incDir> | ||
155 | + <pElem>build/morfeusz</pElem> | ||
156 | + </incDir> | ||
157 | + <preprocessorList> | ||
158 | + <Elem>__PIC__=2</Elem> | ||
159 | + <Elem>__pic__=2</Elem> | ||
160 | + <Elem>libmorfeusz_EXPORTS</Elem> | ||
161 | + </preprocessorList> | ||
162 | + <undefinedList> | ||
163 | + <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem> | ||
164 | + </undefinedList> | ||
165 | + </ccTool> | ||
122 | </item> | 166 | </item> |
123 | <item path="build/morfeusz/default_fsa.cpp" ex="false" tool="1" flavor2="4"> | 167 | <item path="build/morfeusz/default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
124 | <ccTool flags="1"> | 168 | <ccTool flags="1"> |
125 | - <incDir> | ||
126 | - <pElem>build</pElem> | ||
127 | - <pElem>morfeusz</pElem> | ||
128 | - </incDir> | ||
129 | <preprocessorList> | 169 | <preprocessorList> |
130 | <Elem>libmorfeusz_EXPORTS</Elem> | 170 | <Elem>libmorfeusz_EXPORTS</Elem> |
131 | </preprocessorList> | 171 | </preprocessorList> |
@@ -136,17 +176,13 @@ | @@ -136,17 +176,13 @@ | ||
136 | tool="1" | 176 | tool="1" |
137 | flavor2="4"> | 177 | flavor2="4"> |
138 | <ccTool flags="1"> | 178 | <ccTool flags="1"> |
139 | - <incDir> | ||
140 | - <pElem>build</pElem> | ||
141 | - <pElem>morfeusz</pElem> | ||
142 | - </incDir> | ||
143 | <preprocessorList> | 179 | <preprocessorList> |
144 | <Elem>libmorfeusz_EXPORTS</Elem> | 180 | <Elem>libmorfeusz_EXPORTS</Elem> |
145 | </preprocessorList> | 181 | </preprocessorList> |
146 | </ccTool> | 182 | </ccTool> |
147 | </item> | 183 | </item> |
148 | <item path="build/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4"> | 184 | <item path="build/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4"> |
149 | - <ccTool flags="3"> | 185 | + <ccTool flags="1"> |
150 | </ccTool> | 186 | </ccTool> |
151 | </item> | 187 | </item> |
152 | <item path="build/morfeusz/morfeuszJAVA_wrap.cxx" | 188 | <item path="build/morfeusz/morfeuszJAVA_wrap.cxx" |
@@ -155,14 +191,11 @@ | @@ -155,14 +191,11 @@ | ||
155 | flavor2="8"> | 191 | flavor2="8"> |
156 | <ccTool> | 192 | <ccTool> |
157 | <incDir> | 193 | <incDir> |
158 | - <pElem>build</pElem> | ||
159 | - <pElem>morfeusz</pElem> | ||
160 | <pElem>build/morfeusz</pElem> | 194 | <pElem>build/morfeusz</pElem> |
161 | <pElem>/usr/lib/jvm/default-java/include</pElem> | 195 | <pElem>/usr/lib/jvm/default-java/include</pElem> |
162 | <pElem>build/morfeusz/java</pElem> | 196 | <pElem>build/morfeusz/java</pElem> |
163 | </incDir> | 197 | </incDir> |
164 | <preprocessorList> | 198 | <preprocessorList> |
165 | - <Elem>NDEBUG</Elem> | ||
166 | <Elem>_OPTIMIZE__=1</Elem> | 199 | <Elem>_OPTIMIZE__=1</Elem> |
167 | <Elem>__PIC__=2</Elem> | 200 | <Elem>__PIC__=2</Elem> |
168 | <Elem>__pic__=2</Elem> | 201 | <Elem>__pic__=2</Elem> |
@@ -181,8 +214,6 @@ | @@ -181,8 +214,6 @@ | ||
181 | flavor2="4"> | 214 | flavor2="4"> |
182 | <ccTool flags="1"> | 215 | <ccTool flags="1"> |
183 | <incDir> | 216 | <incDir> |
184 | - <pElem>build</pElem> | ||
185 | - <pElem>morfeusz</pElem> | ||
186 | <pElem>/usr/lib/perl/5.14/CORE</pElem> | 217 | <pElem>/usr/lib/perl/5.14/CORE</pElem> |
187 | <pElem>build/morfeusz/perl</pElem> | 218 | <pElem>build/morfeusz/perl</pElem> |
188 | </incDir> | 219 | </incDir> |
@@ -197,14 +228,11 @@ | @@ -197,14 +228,11 @@ | ||
197 | flavor2="8"> | 228 | flavor2="8"> |
198 | <ccTool> | 229 | <ccTool> |
199 | <incDir> | 230 | <incDir> |
200 | - <pElem>build</pElem> | ||
201 | - <pElem>morfeusz</pElem> | ||
202 | <pElem>build/morfeusz</pElem> | 231 | <pElem>build/morfeusz</pElem> |
203 | <pElem>/usr/include/python2.7</pElem> | 232 | <pElem>/usr/include/python2.7</pElem> |
204 | <pElem>build/morfeusz/python</pElem> | 233 | <pElem>build/morfeusz/python</pElem> |
205 | </incDir> | 234 | </incDir> |
206 | <preprocessorList> | 235 | <preprocessorList> |
207 | - <Elem>NDEBUG</Elem> | ||
208 | <Elem>_OPTIMIZE__=1</Elem> | 236 | <Elem>_OPTIMIZE__=1</Elem> |
209 | <Elem>__PIC__=2</Elem> | 237 | <Elem>__PIC__=2</Elem> |
210 | <Elem>__pic__=2</Elem> | 238 | <Elem>__pic__=2</Elem> |
@@ -218,22 +246,40 @@ | @@ -218,22 +246,40 @@ | ||
218 | </ccTool> | 246 | </ccTool> |
219 | </item> | 247 | </item> |
220 | <item path="build/morfeusz/python/swigPYTHON.cpp" | 248 | <item path="build/morfeusz/python/swigPYTHON.cpp" |
249 | + ex="true" | ||
250 | + tool="3" | ||
251 | + flavor2="4"> | ||
252 | + </item> | ||
253 | + <item path="build/morfeusz/wrappers/java/swigJAVA.cpp" | ||
221 | ex="false" | 254 | ex="false" |
222 | tool="1" | 255 | tool="1" |
223 | flavor2="4"> | 256 | flavor2="4"> |
257 | + <ccTool flags="1"> | ||
258 | + </ccTool> | ||
259 | + </item> | ||
260 | + <item path="build/morfeusz/wrappers/morfeuszPERL_wrap.cxx" | ||
261 | + ex="false" | ||
262 | + tool="1" | ||
263 | + flavor2="4"> | ||
264 | + <ccTool flags="1"> | ||
265 | + <incDir> | ||
266 | + <pElem>/usr/lib/perl/5.14/CORE</pElem> | ||
267 | + <pElem>build/morfeusz/wrappers/perl</pElem> | ||
268 | + </incDir> | ||
269 | + <preprocessorList> | ||
270 | + <Elem>morfeusz_perl_EXPORTS</Elem> | ||
271 | + </preprocessorList> | ||
272 | + </ccTool> | ||
224 | </item> | 273 | </item> |
225 | <item path="build1/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4"> | 274 | <item path="build1/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4"> |
226 | </item> | 275 | </item> |
227 | <item path="default_fsa.cpp" ex="false" tool="1" flavor2="4"> | 276 | <item path="default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
228 | <ccTool> | 277 | <ccTool> |
229 | <incDir> | 278 | <incDir> |
230 | - <pElem>build</pElem> | ||
231 | - <pElem>morfeusz</pElem> | ||
232 | <pElem>build/morfeusz</pElem> | 279 | <pElem>build/morfeusz</pElem> |
233 | <pElem>morfeusz/build/morfeusz</pElem> | 280 | <pElem>morfeusz/build/morfeusz</pElem> |
234 | </incDir> | 281 | </incDir> |
235 | <preprocessorList> | 282 | <preprocessorList> |
236 | - <Elem>NDEBUG</Elem> | ||
237 | <Elem>libmorfeusz_EXPORTS</Elem> | 283 | <Elem>libmorfeusz_EXPORTS</Elem> |
238 | </preprocessorList> | 284 | </preprocessorList> |
239 | </ccTool> | 285 | </ccTool> |
@@ -241,13 +287,10 @@ | @@ -241,13 +287,10 @@ | ||
241 | <item path="default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> | 287 | <item path="default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> |
242 | <ccTool> | 288 | <ccTool> |
243 | <incDir> | 289 | <incDir> |
244 | - <pElem>build</pElem> | ||
245 | - <pElem>morfeusz</pElem> | ||
246 | <pElem>build/morfeusz</pElem> | 290 | <pElem>build/morfeusz</pElem> |
247 | <pElem>morfeusz/build/morfeusz</pElem> | 291 | <pElem>morfeusz/build/morfeusz</pElem> |
248 | </incDir> | 292 | </incDir> |
249 | <preprocessorList> | 293 | <preprocessorList> |
250 | - <Elem>NDEBUG</Elem> | ||
251 | <Elem>libmorfeusz_EXPORTS</Elem> | 294 | <Elem>libmorfeusz_EXPORTS</Elem> |
252 | </preprocessorList> | 295 | </preprocessorList> |
253 | </ccTool> | 296 | </ccTool> |
@@ -255,34 +298,32 @@ | @@ -255,34 +298,32 @@ | ||
255 | <folder path="0"> | 298 | <folder path="0"> |
256 | <ccTool> | 299 | <ccTool> |
257 | <incDir> | 300 | <incDir> |
258 | - <pElem>build</pElem> | ||
259 | - <pElem>morfeusz</pElem> | ||
260 | <pElem>build/morfeusz</pElem> | 301 | <pElem>build/morfeusz</pElem> |
261 | </incDir> | 302 | </incDir> |
262 | </ccTool> | 303 | </ccTool> |
263 | </folder> | 304 | </folder> |
264 | - <folder path="0/charset"> | 305 | + <folder path="0/case"> |
265 | <ccTool> | 306 | <ccTool> |
266 | <preprocessorList> | 307 | <preprocessorList> |
267 | <Elem>libmorfeusz_EXPORTS</Elem> | 308 | <Elem>libmorfeusz_EXPORTS</Elem> |
268 | </preprocessorList> | 309 | </preprocessorList> |
269 | </ccTool> | 310 | </ccTool> |
270 | </folder> | 311 | </folder> |
271 | - <folder path="0/cli"> | 312 | + <folder path="0/charset"> |
272 | <ccTool> | 313 | <ccTool> |
273 | <preprocessorList> | 314 | <preprocessorList> |
274 | <Elem>libmorfeusz_EXPORTS</Elem> | 315 | <Elem>libmorfeusz_EXPORTS</Elem> |
275 | </preprocessorList> | 316 | </preprocessorList> |
276 | </ccTool> | 317 | </ccTool> |
277 | </folder> | 318 | </folder> |
278 | - <folder path="0/decoder"> | 319 | + <folder path="0/cli"> |
279 | <ccTool> | 320 | <ccTool> |
280 | <preprocessorList> | 321 | <preprocessorList> |
281 | <Elem>libmorfeusz_EXPORTS</Elem> | 322 | <Elem>libmorfeusz_EXPORTS</Elem> |
282 | </preprocessorList> | 323 | </preprocessorList> |
283 | </ccTool> | 324 | </ccTool> |
284 | </folder> | 325 | </folder> |
285 | - <folder path="0/deserializer"> | 326 | + <folder path="0/deserialization"> |
286 | <ccTool> | 327 | <ccTool> |
287 | <preprocessorList> | 328 | <preprocessorList> |
288 | <Elem>libmorfeusz_EXPORTS</Elem> | 329 | <Elem>libmorfeusz_EXPORTS</Elem> |
@@ -303,35 +344,24 @@ | @@ -303,35 +344,24 @@ | ||
303 | </preprocessorList> | 344 | </preprocessorList> |
304 | </ccTool> | 345 | </ccTool> |
305 | </folder> | 346 | </folder> |
306 | - <folder path="build"> | 347 | + <folder path="build/morfeusz/wrappers/java"> |
307 | <ccTool> | 348 | <ccTool> |
308 | <incDir> | 349 | <incDir> |
309 | - <pElem>build</pElem> | ||
310 | - <pElem>morfeusz</pElem> | ||
311 | - <pElem>build/morfeusz</pElem> | 350 | + <pElem>/usr/lib/jvm/default-java/include</pElem> |
312 | </incDir> | 351 | </incDir> |
313 | <preprocessorList> | 352 | <preprocessorList> |
314 | - <Elem>NDEBUG</Elem> | ||
315 | - <Elem>__PIC__=2</Elem> | ||
316 | - <Elem>__pic__=2</Elem> | ||
317 | - <Elem>libmorfeusz_EXPORTS</Elem> | 353 | + <Elem>libjmorfeusz_EXPORTS</Elem> |
318 | </preprocessorList> | 354 | </preprocessorList> |
319 | - <undefinedList> | ||
320 | - <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem> | ||
321 | - </undefinedList> | ||
322 | </ccTool> | 355 | </ccTool> |
323 | </folder> | 356 | </folder> |
324 | <folder path="java"> | 357 | <folder path="java"> |
325 | <ccTool> | 358 | <ccTool> |
326 | <incDir> | 359 | <incDir> |
327 | - <pElem>build</pElem> | ||
328 | - <pElem>morfeusz</pElem> | ||
329 | <pElem>build/morfeusz</pElem> | 360 | <pElem>build/morfeusz</pElem> |
330 | <pElem>build1</pElem> | 361 | <pElem>build1</pElem> |
331 | <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem> | 362 | <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem> |
332 | </incDir> | 363 | </incDir> |
333 | <preprocessorList> | 364 | <preprocessorList> |
334 | - <Elem>NDEBUG</Elem> | ||
335 | <Elem>libjmorfeusz_EXPORTS</Elem> | 365 | <Elem>libjmorfeusz_EXPORTS</Elem> |
336 | <Elem>libmorfeusz_EXPORTS</Elem> | 366 | <Elem>libmorfeusz_EXPORTS</Elem> |
337 | </preprocessorList> | 367 | </preprocessorList> |
@@ -340,12 +370,9 @@ | @@ -340,12 +370,9 @@ | ||
340 | <folder path="morfeusz/java"> | 370 | <folder path="morfeusz/java"> |
341 | <ccTool> | 371 | <ccTool> |
342 | <incDir> | 372 | <incDir> |
343 | - <pElem>build</pElem> | ||
344 | - <pElem>morfeusz</pElem> | ||
345 | - <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem> | 373 | + <pElem>/usr/lib/jvm/default-java/include</pElem> |
346 | </incDir> | 374 | </incDir> |
347 | <preprocessorList> | 375 | <preprocessorList> |
348 | - <Elem>NDEBUG</Elem> | ||
349 | <Elem>libjmorfeusz_EXPORTS</Elem> | 376 | <Elem>libjmorfeusz_EXPORTS</Elem> |
350 | </preprocessorList> | 377 | </preprocessorList> |
351 | </ccTool> | 378 | </ccTool> |
@@ -353,21 +380,10 @@ | @@ -353,21 +380,10 @@ | ||
353 | <folder path="morfeusz/python"> | 380 | <folder path="morfeusz/python"> |
354 | <ccTool> | 381 | <ccTool> |
355 | <incDir> | 382 | <incDir> |
356 | - <pElem>morfeusz</pElem> | ||
357 | <pElem>/usr/include/python2.7</pElem> | 383 | <pElem>/usr/include/python2.7</pElem> |
358 | </incDir> | 384 | </incDir> |
359 | - <preprocessorList> | ||
360 | - <Elem>NDEBUG</Elem> | ||
361 | - </preprocessorList> | ||
362 | </ccTool> | 385 | </ccTool> |
363 | </folder> | 386 | </folder> |
364 | - <item path="morfeusz/CasePatternHelper.cpp" ex="false" tool="1" flavor2="4"> | ||
365 | - <ccTool flags="1"> | ||
366 | - <preprocessorList> | ||
367 | - <Elem>libmorfeusz_EXPORTS</Elem> | ||
368 | - </preprocessorList> | ||
369 | - </ccTool> | ||
370 | - </item> | ||
371 | <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4"> | 387 | <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4"> |
372 | <ccTool flags="1"> | 388 | <ccTool flags="1"> |
373 | <preprocessorList> | 389 | <preprocessorList> |
@@ -383,17 +399,8 @@ | @@ -383,17 +399,8 @@ | ||
383 | </ccTool> | 399 | </ccTool> |
384 | </item> | 400 | </item> |
385 | <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="4"> | 401 | <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="4"> |
386 | - <ccTool flags="3"> | ||
387 | - <preprocessorList> | ||
388 | - <Elem>NDEBUG</Elem> | ||
389 | - <Elem>libmorfeusz_EXPORTS</Elem> | ||
390 | - </preprocessorList> | ||
391 | - </ccTool> | ||
392 | - </item> | ||
393 | - <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="4"> | ||
394 | - <ccTool flags="3"> | 402 | + <ccTool flags="2"> |
395 | <preprocessorList> | 403 | <preprocessorList> |
396 | - <Elem>NDEBUG</Elem> | ||
397 | <Elem>libmorfeusz_EXPORTS</Elem> | 404 | <Elem>libmorfeusz_EXPORTS</Elem> |
398 | </preprocessorList> | 405 | </preprocessorList> |
399 | </ccTool> | 406 | </ccTool> |
@@ -419,36 +426,36 @@ | @@ -419,36 +426,36 @@ | ||
419 | </preprocessorList> | 426 | </preprocessorList> |
420 | </ccTool> | 427 | </ccTool> |
421 | </item> | 428 | </item> |
422 | - <item path="morfeusz/charset/CaseConverter.cpp" ex="false" tool="1" flavor2="4"> | 429 | + <item path="morfeusz/case/CaseConverter.cpp" ex="false" tool="1" flavor2="4"> |
423 | <ccTool flags="1"> | 430 | <ccTool flags="1"> |
424 | </ccTool> | 431 | </ccTool> |
425 | </item> | 432 | </item> |
426 | - <item path="morfeusz/charset/CharsetConverter.cpp" | 433 | + <item path="morfeusz/case/CasePatternHelper.cpp" |
427 | ex="false" | 434 | ex="false" |
428 | tool="1" | 435 | tool="1" |
429 | flavor2="4"> | 436 | flavor2="4"> |
430 | - <ccTool flags="3"> | ||
431 | - <preprocessorList> | ||
432 | - <Elem>NDEBUG</Elem> | ||
433 | - </preprocessorList> | ||
434 | - </ccTool> | ||
435 | - </item> | ||
436 | - <item path="morfeusz/charset/TextReader.cpp" ex="false" tool="1" flavor2="4"> | ||
437 | <ccTool flags="1"> | 437 | <ccTool flags="1"> |
438 | </ccTool> | 438 | </ccTool> |
439 | </item> | 439 | </item> |
440 | - <item path="morfeusz/charset/caseconv.cpp" ex="false" tool="1" flavor2="4"> | 440 | + <item path="morfeusz/case/caseconv.cpp" ex="false" tool="1" flavor2="4"> |
441 | <ccTool flags="1"> | 441 | <ccTool flags="1"> |
442 | </ccTool> | 442 | </ccTool> |
443 | </item> | 443 | </item> |
444 | + <item path="morfeusz/charset/CharsetConverter.cpp" | ||
445 | + ex="false" | ||
446 | + tool="1" | ||
447 | + flavor2="4"> | ||
448 | + </item> | ||
449 | + <item path="morfeusz/charset/TextReader.cpp" ex="false" tool="1" flavor2="4"> | ||
450 | + </item> | ||
444 | <item path="morfeusz/charset/conversion_tables.cpp" | 451 | <item path="morfeusz/charset/conversion_tables.cpp" |
445 | ex="false" | 452 | ex="false" |
446 | tool="1" | 453 | tool="1" |
447 | flavor2="4"> | 454 | flavor2="4"> |
448 | - <ccTool flags="1"> | ||
449 | - </ccTool> | ||
450 | </item> | 455 | </item> |
451 | <item path="morfeusz/cli/cli.cpp" ex="false" tool="1" flavor2="4"> | 456 | <item path="morfeusz/cli/cli.cpp" ex="false" tool="1" flavor2="4"> |
457 | + <ccTool flags="1"> | ||
458 | + </ccTool> | ||
452 | </item> | 459 | </item> |
453 | <item path="morfeusz/const.cpp" ex="false" tool="1" flavor2="4"> | 460 | <item path="morfeusz/const.cpp" ex="false" tool="1" flavor2="4"> |
454 | <ccTool flags="1"> | 461 | <ccTool flags="1"> |
@@ -457,29 +464,31 @@ | @@ -457,29 +464,31 @@ | ||
457 | </preprocessorList> | 464 | </preprocessorList> |
458 | </ccTool> | 465 | </ccTool> |
459 | </item> | 466 | </item> |
460 | - <item path="morfeusz/decoder/InterpretedChunksDecoder.cpp" | 467 | + <item path="morfeusz/deserialization/InterpsGroupsReader.cpp" |
461 | ex="false" | 468 | ex="false" |
462 | tool="1" | 469 | tool="1" |
463 | flavor2="4"> | 470 | flavor2="4"> |
464 | </item> | 471 | </item> |
465 | - <item path="morfeusz/decoder/InterpretedChunksDecoder4Analyzer.cpp" | 472 | + <item path="morfeusz/deserialization/MorphDeserializer.cpp" |
466 | ex="false" | 473 | ex="false" |
467 | tool="1" | 474 | tool="1" |
468 | flavor2="4"> | 475 | flavor2="4"> |
469 | </item> | 476 | </item> |
470 | - <item path="morfeusz/decoder/InterpretedChunksDecoder4Generator.cpp" | 477 | + <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder.cpp" |
471 | ex="false" | 478 | ex="false" |
472 | tool="1" | 479 | tool="1" |
473 | flavor2="4"> | 480 | flavor2="4"> |
481 | + <ccTool flags="1"> | ||
482 | + </ccTool> | ||
474 | </item> | 483 | </item> |
475 | - <item path="morfeusz/deserializer/InterpsGroupsReader.cpp" | 484 | + <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.cpp" |
476 | ex="false" | 485 | ex="false" |
477 | tool="1" | 486 | tool="1" |
478 | flavor2="4"> | 487 | flavor2="4"> |
479 | <ccTool flags="1"> | 488 | <ccTool flags="1"> |
480 | </ccTool> | 489 | </ccTool> |
481 | </item> | 490 | </item> |
482 | - <item path="morfeusz/deserializer/MorphDeserializer.cpp" | 491 | + <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Generator.cpp" |
483 | ex="false" | 492 | ex="false" |
484 | tool="1" | 493 | tool="1" |
485 | flavor2="4"> | 494 | flavor2="4"> |
@@ -496,9 +505,6 @@ | @@ -496,9 +505,6 @@ | ||
496 | <incDir> | 505 | <incDir> |
497 | <pElem>build/fsa</pElem> | 506 | <pElem>build/fsa</pElem> |
498 | </incDir> | 507 | </incDir> |
499 | - <preprocessorList> | ||
500 | - <Elem>NDEBUG</Elem> | ||
501 | - </preprocessorList> | ||
502 | </ccTool> | 508 | </ccTool> |
503 | </item> | 509 | </item> |
504 | <item path="morfeusz/fsa/test_recognize.cpp" ex="false" tool="1" flavor2="8"> | 510 | <item path="morfeusz/fsa/test_recognize.cpp" ex="false" tool="1" flavor2="8"> |
@@ -506,9 +512,6 @@ | @@ -506,9 +512,6 @@ | ||
506 | <incDir> | 512 | <incDir> |
507 | <pElem>build/fsa</pElem> | 513 | <pElem>build/fsa</pElem> |
508 | </incDir> | 514 | </incDir> |
509 | - <preprocessorList> | ||
510 | - <Elem>NDEBUG</Elem> | ||
511 | - </preprocessorList> | ||
512 | </ccTool> | 515 | </ccTool> |
513 | </item> | 516 | </item> |
514 | <item path="morfeusz/fsa/test_speed.cpp" ex="false" tool="1" flavor2="8"> | 517 | <item path="morfeusz/fsa/test_speed.cpp" ex="false" tool="1" flavor2="8"> |
@@ -516,43 +519,33 @@ | @@ -516,43 +519,33 @@ | ||
516 | <incDir> | 519 | <incDir> |
517 | <pElem>build/fsa</pElem> | 520 | <pElem>build/fsa</pElem> |
518 | </incDir> | 521 | </incDir> |
519 | - <preprocessorList> | ||
520 | - <Elem>NDEBUG</Elem> | ||
521 | - </preprocessorList> | ||
522 | - </ccTool> | ||
523 | - </item> | ||
524 | - <item path="morfeusz/main.cpp" ex="false" tool="1" flavor2="4"> | ||
525 | - <ccTool> | ||
526 | - <preprocessorList> | ||
527 | - <Elem>NDEBUG</Elem> | ||
528 | - <Elem>_OPTIMIZE__=1</Elem> | ||
529 | - <Elem>libmorfeusz_EXPORTS</Elem> | ||
530 | - </preprocessorList> | ||
531 | - <undefinedList> | ||
532 | - <Elem>__NO_INLINE__</Elem> | ||
533 | - </undefinedList> | ||
534 | </ccTool> | 522 | </ccTool> |
535 | </item> | 523 | </item> |
536 | <item path="morfeusz/morfeusz_analyzer.cpp" ex="false" tool="1" flavor2="4"> | 524 | <item path="morfeusz/morfeusz_analyzer.cpp" ex="false" tool="1" flavor2="4"> |
537 | - <ccTool flags="2"> | ||
538 | - <preprocessorList> | ||
539 | - <Elem>NDEBUG</Elem> | ||
540 | - </preprocessorList> | 525 | + <ccTool flags="0"> |
541 | </ccTool> | 526 | </ccTool> |
542 | </item> | 527 | </item> |
543 | <item path="morfeusz/morfeusz_generator.cpp" ex="false" tool="1" flavor2="4"> | 528 | <item path="morfeusz/morfeusz_generator.cpp" ex="false" tool="1" flavor2="4"> |
529 | + <ccTool flags="0"> | ||
530 | + </ccTool> | ||
544 | </item> | 531 | </item> |
545 | <item path="morfeusz/segrules/SegrulesFSA.cpp" ex="false" tool="1" flavor2="4"> | 532 | <item path="morfeusz/segrules/SegrulesFSA.cpp" ex="false" tool="1" flavor2="4"> |
546 | - <ccTool flags="1"> | ||
547 | - </ccTool> | ||
548 | </item> | 533 | </item> |
549 | <item path="morfeusz/segrules/segrules.cpp" ex="false" tool="1" flavor2="4"> | 534 | <item path="morfeusz/segrules/segrules.cpp" ex="false" tool="1" flavor2="4"> |
550 | - <ccTool flags="1"> | ||
551 | - </ccTool> | ||
552 | </item> | 535 | </item> |
553 | - <item path="morfeusz/test_recognize_dict.cpp" ex="false" tool="1" flavor2="4"> | 536 | + <item path="morfeusz/test/test_recognize_dict.cpp" |
537 | + ex="false" | ||
538 | + tool="1" | ||
539 | + flavor2="4"> | ||
540 | + <ccTool flags="0"> | ||
541 | + </ccTool> | ||
554 | </item> | 542 | </item> |
555 | - <item path="morfeusz/test_result_equals.cpp" ex="false" tool="1" flavor2="4"> | 543 | + <item path="morfeusz/test/test_result_equals.cpp" |
544 | + ex="false" | ||
545 | + tool="1" | ||
546 | + flavor2="4"> | ||
547 | + <ccTool flags="0"> | ||
548 | + </ccTool> | ||
556 | </item> | 549 | </item> |
557 | </conf> | 550 | </conf> |
558 | </confs> | 551 | </confs> |
profile.sh
@@ -4,9 +4,9 @@ rm -rf profbuild | @@ -4,9 +4,9 @@ rm -rf profbuild | ||
4 | mkdir -p profbuild | 4 | mkdir -p profbuild |
5 | cd profbuild | 5 | cd profbuild |
6 | cmake -D INPUT_DICTIONARIES=../input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-g -O2" -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" .. | 6 | cmake -D INPUT_DICTIONARIES=../input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-g -O2" -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" .. |
7 | -make -j4 | 7 | +make |
8 | rm -f /tmp/morfeusz.prof | 8 | rm -f /tmp/morfeusz.prof |
9 | export LD_PRELOAD="/usr/lib/libprofiler.so" | 9 | export LD_PRELOAD="/usr/lib/libprofiler.so" |
10 | export CPUPROFILE="/tmp/morfeusz.prof" | 10 | export CPUPROFILE="/tmp/morfeusz.prof" |
11 | -morfeusz/morfeusz_analyzer -i /tmp/dupadupa < /mnt/storage/morfeusz/sents30k > /dev/null | 11 | +morfeusz/morfeusz_analyzer -i /home/wkieras/output/sgjp_analyzer.fsa < /mnt/storage/morfeusz/sents10k > /dev/null |
12 | ### pprof --gv profbuild/morfeusz/morfeusz_analyzer /tmp/morfeusz.prof | 12 | ### pprof --gv profbuild/morfeusz/morfeusz_analyzer /tmp/morfeusz.prof |
morfeusz/Toolchain-Darwin-amd64.cmake renamed to toolchains/Toolchain-Darwin-amd64.cmake
morfeusz/Toolchain-Linux-amd64.cmake renamed to toolchains/Toolchain-Linux-amd64.cmake
morfeusz/Toolchain-Linux-i386.cmake renamed to toolchains/Toolchain-Linux-i386.cmake
morfeusz/Toolchain-Windows-amd64.cmake renamed to toolchains/Toolchain-Windows-amd64.cmake
morfeusz/Toolchain-Windows-i386.cmake renamed to toolchains/Toolchain-Windows-i386.cmake