Commit a65ee056dbe81292da3af8dcacf7f040494e0d8f
1 parent
a1d73c9e
generalne porządki w kodzie
git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@210 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
Showing
71 changed files
with
470 additions
and
337 deletions
CMakeLists.txt
... | ... | @@ -6,7 +6,7 @@ set (Morfeusz_VERSION_MAJOR 2) |
6 | 6 | set (Morfeusz_VERSION_MINOR 0) |
7 | 7 | set (Morfeusz_VERSION_PATCH 0) |
8 | 8 | set (Morfeusz_VERSION "${Morfeusz_VERSION_MAJOR}.${Morfeusz_VERSION_MINOR}.${Morfeusz_VERSION_PATCH}") |
9 | -if (NOT ${VERSION_SUFFIX} STREQUAL "") | |
9 | +if (VERSION_SUFFIX) | |
10 | 10 | set (Morfeusz_VERSION_TWEAK "${VERSION_SUFFIX}") |
11 | 11 | set (Morfeusz_VERSION "${Morfeusz_VERSION}_${Morfeusz_VERSION_TWEAK}") |
12 | 12 | endif () |
... | ... | @@ -148,13 +148,11 @@ include (CPack) |
148 | 148 | file (COPY fsabuilder testfiles input DESTINATION .) |
149 | 149 | |
150 | 150 | configure_file ( |
151 | - "${PROJECT_SOURCE_DIR}/morfeusz/MorfeuszConfig.hpp.in" | |
152 | - "${PROJECT_BINARY_DIR}/morfeusz/MorfeuszConfig.hpp" | |
151 | + "${PROJECT_SOURCE_DIR}/morfeusz/MorfeuszVersion.hpp.in" | |
152 | + "${PROJECT_BINARY_DIR}/MorfeuszVersion.hpp" | |
153 | 153 | ) |
154 | - | |
155 | -###### add main sources ######## | |
156 | - | |
157 | 154 | include_directories("${PROJECT_BINARY_DIR}" ) |
155 | +###### add main sources ######## | |
158 | 156 | |
159 | 157 | add_subdirectory (morfeusz) |
160 | 158 | add_subdirectory (fsabuilder) |
... | ... |
README
... | ... | @@ -109,7 +109,7 @@ Create separate build directory, for example build-darwin. |
109 | 109 | Run cross-compilation: |
110 | 110 | ---------------------- |
111 | 111 | |
112 | -cmake -D CROSSMORFEUSZ_ROOT=<path_to_crossmorfeusz_dir> -DCMAKE_TOOLCHAIN_FILE=../morfeusz/Toolchain-xxx.cmake .. | |
112 | +cmake -D CROSSMORFEUSZ_ROOT=<path_to_crossmorfeusz_dir> -DCMAKE_TOOLCHAIN_FILE=../toolchains/Toolchain-xxx.cmake .. | |
113 | 113 | make |
114 | 114 | make package package-java package-python |
115 | 115 | |
... | ... |
buildAll.sh
... | ... | @@ -30,12 +30,12 @@ function build { |
30 | 30 | srcDir=`pwd` |
31 | 31 | buildDir=buildall/$os-$arch |
32 | 32 | targetDir=$srcDir/target |
33 | - toolchain=$srcDir/morfeusz/Toolchain-$os-$arch.cmake | |
33 | + toolchain=$srcDir/toolchains/Toolchain-$os-$arch.cmake | |
34 | 34 | |
35 | 35 | echo "Will use $toolchain toolchain" |
36 | 36 | |
37 | 37 | rm -rf $buildDir |
38 | - rm -rf $targetDir | |
38 | + #~ rm -rf $targetDir | |
39 | 39 | mkdir -p $buildDir |
40 | 40 | mkdir -p $targetDir |
41 | 41 | cd $buildDir |
... | ... | @@ -68,7 +68,7 @@ function log { |
68 | 68 | export -f build |
69 | 69 | export -f log |
70 | 70 | |
71 | -rm -rf log | |
71 | +rm -rf log target | |
72 | 72 | mkdir -p log |
73 | 73 | |
74 | 74 | buildDictionaries 2>&1 | log All all |
... | ... | @@ -79,6 +79,6 @@ buildDictionaries 2>&1 | log All all |
79 | 79 | echo "build Windows amd64 package package-java 2>&1 | log Windows amd64" |
80 | 80 | echo "build Windows i386 package package-java 2>&1 | log Windows i386" |
81 | 81 | echo "build Darwin amd64 package package-java 2>&1 | log Darwin amd64" |
82 | -} | xargs -n1 -P8 -d$'\n' bash -c | |
82 | +} | xargs -n1 -P5 -d$'\n' bash -c | |
83 | 83 | |
84 | 84 | |
... | ... |
morfeusz/CMakeLists.txt
... | ... | @@ -25,8 +25,6 @@ add_custom_target ( dictionaries DEPENDS analyzer-dictionary generator-dictionar |
25 | 25 | |
26 | 26 | include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ) |
27 | 27 | |
28 | -# add_custom_target (dupa DEPENDS "${INPUT_DICTIONARY_CPP}") | |
29 | - | |
30 | 28 | #### build ##### |
31 | 29 | |
32 | 30 | set(SRC_FILES |
... | ... | @@ -42,37 +40,39 @@ set(SRC_FILES |
42 | 40 | InflexionGraph.cpp |
43 | 41 | charset/TextReader.cpp |
44 | 42 | charset/CharsetConverter.cpp |
45 | - charset/CaseConverter.cpp | |
46 | - charset/caseconv.cpp | |
43 | + case/CaseConverter.cpp | |
44 | + case/caseconv.cpp | |
47 | 45 | charset/conversion_tables.cpp |
48 | 46 | cli/cli.cpp |
49 | 47 | segrules/segrules.cpp |
50 | 48 | segrules/SegrulesFSA.cpp |
51 | - CasePatternHelper.cpp | |
52 | - decoder/InterpretedChunksDecoder.cpp | |
53 | - decoder/InterpretedChunksDecoder4Analyzer.cpp | |
54 | - decoder/InterpretedChunksDecoder4Generator.cpp | |
55 | - deserializer/InterpsGroupsReader.cpp | |
56 | - deserializer/MorphDeserializer.cpp | |
49 | + case/CasePatternHelper.cpp | |
50 | + deserialization/morphInterps/InterpretedChunksDecoder.cpp | |
51 | + deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.cpp | |
52 | + deserialization/morphInterps/InterpretedChunksDecoder4Generator.cpp | |
53 | + deserialization/InterpsGroupsReader.cpp | |
54 | + deserialization/MorphDeserializer.cpp | |
57 | 55 | ) |
58 | 56 | |
59 | 57 | set(INCLUDE_FILES |
60 | 58 | const.hpp |
61 | 59 | data/default_fsa.hpp |
60 | + Environment.hpp | |
62 | 61 | Tagset.hpp |
63 | 62 | Qualifiers.hpp |
64 | 63 | fsa/const.hpp |
65 | 64 | MorphInterpretation.hpp |
66 | 65 | Morfeusz.hpp |
66 | + MorfeuszVersion.hpp | |
67 | 67 | InflexionGraph.hpp |
68 | 68 | charset/CharsetConverter.hpp |
69 | 69 | charset/TextReader.hpp |
70 | - charset/CaseConverter.hpp | |
71 | - charset/caseconv.hpp | |
70 | + case/CaseConverter.hpp | |
71 | + case/caseconv.hpp | |
72 | 72 | charset/conversion_tables.hpp |
73 | 73 | cli/cli.hpp |
74 | 74 | segrules/segrules.hpp |
75 | - deserializer/MorphDeserializer.cpp | |
75 | + deserialization/MorphDeserializer.cpp | |
76 | 76 | ) |
77 | 77 | |
78 | 78 | add_library (libmorfeusz SHARED ${SRC_FILES}) |
... | ... | @@ -82,24 +82,15 @@ set_target_properties (libmorfeusz PROPERTIES OUTPUT_NAME "morfeusz2") |
82 | 82 | |
83 | 83 | add_executable (morfeusz_analyzer morfeusz_analyzer.cpp) |
84 | 84 | add_executable (morfeusz_generator morfeusz_generator.cpp) |
85 | -add_executable (test_result_equals test_result_equals.cpp) | |
86 | -add_executable (test_recognize_dict test_recognize_dict.cpp) | |
85 | +add_executable (test_result_equals test/test_result_equals.cpp) | |
86 | +add_executable (test_recognize_dict test/test_recognize_dict.cpp) | |
87 | 87 | |
88 | 88 | target_link_libraries (morfeusz_analyzer libmorfeusz) |
89 | 89 | target_link_libraries (morfeusz_generator libmorfeusz) |
90 | 90 | target_link_libraries (test_result_equals libmorfeusz) |
91 | 91 | target_link_libraries (test_recognize_dict libmorfeusz) |
92 | 92 | |
93 | -if (${CMAKE_SYSTEM_NAME} MATCHES "Windows") | |
94 | - target_link_libraries (libmorfeusz ws2_32) | |
95 | - set (TARGET_LIB_DIR bin) | |
96 | -else () | |
97 | - set (TARGET_LIB_DIR lib) | |
98 | -endif () | |
99 | - | |
100 | -add_subdirectory (java) | |
101 | -add_subdirectory (python) | |
102 | -add_subdirectory (perl) | |
93 | +add_subdirectory (wrappers) | |
103 | 94 | |
104 | 95 | if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") |
105 | 96 | add_custom_target (morfeusz-repair-library |
... | ... | @@ -108,6 +99,13 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") |
108 | 99 | add_dependencies (morfeusz_analyzer morfeusz-repair-library) |
109 | 100 | endif () |
110 | 101 | |
102 | +if (${CMAKE_SYSTEM_NAME} MATCHES "Windows") | |
103 | + target_link_libraries (libmorfeusz ws2_32) | |
104 | + set (TARGET_LIB_DIR bin) | |
105 | +else () | |
106 | + set (TARGET_LIB_DIR lib) | |
107 | +endif () | |
108 | + | |
111 | 109 | install (FILES ${INCLUDE_FILES} DESTINATION include/morfeusz) |
112 | 110 | install (TARGETS libmorfeusz DESTINATION ${TARGET_LIB_DIR}) |
113 | 111 | install (TARGETS morfeusz_analyzer morfeusz_generator DESTINATION bin) |
... | ... |
morfeusz/Environment.cpp
... | ... | @@ -8,14 +8,11 @@ |
8 | 8 | #include <vector> |
9 | 9 | #include <algorithm> |
10 | 10 | #include "Environment.hpp" |
11 | -#include "decoder/InterpretedChunksDecoder.hpp" | |
12 | -#include "deserializer/MorphDeserializer.hpp" | |
11 | +#include "deserialization/MorphDeserializer.hpp" | |
13 | 12 | #include "exceptions.hpp" |
14 | -#include "decoder/InterpretedChunksDecoder4Analyzer.hpp" | |
15 | -#include "decoder/InterpretedChunksDecoder4Generator.hpp" | |
16 | - | |
17 | -//class InterpretedChunksDecoder4Analyzer; | |
18 | -//class InterpretedChunksDecoder4Generator; | |
13 | +#include "deserialization/morphInterps/InterpretedChunksDecoder.hpp" | |
14 | +#include "deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.hpp" | |
15 | +#include "deserialization/morphInterps/InterpretedChunksDecoder4Generator.hpp" | |
19 | 16 | |
20 | 17 | static Deserializer<InterpsGroupsReader>& initializeDeserializer(MorfeuszProcessorType processorType) { |
21 | 18 | static Deserializer<InterpsGroupsReader> *analyzerDeserializer |
... | ... |
morfeusz/Environment.hpp
... | ... | @@ -13,56 +13,142 @@ |
13 | 13 | class InterpretedChunksDecoder; |
14 | 14 | class CasePatternHelper; |
15 | 15 | |
16 | -#include "charset/CaseConverter.hpp" | |
16 | +#include "case/CaseConverter.hpp" | |
17 | 17 | #include "charset/CharsetConverter.hpp" |
18 | 18 | #include "fsa/fsa.hpp" |
19 | 19 | #include "segrules/segrules.hpp" |
20 | 20 | #include "const.hpp" |
21 | 21 | #include "Tagset.hpp" |
22 | -//#include "InterpretedChunksDecoder.hpp" | |
23 | 22 | #include "InterpsGroup.hpp" |
24 | -#include "CasePatternHelper.hpp" | |
23 | +#include "case/CasePatternHelper.hpp" | |
25 | 24 | #include "Qualifiers.hpp" |
26 | -#include "deserializer/InterpsGroupsReader.hpp" | |
25 | +#include "deserialization/InterpsGroupsReader.hpp" | |
27 | 26 | |
28 | 27 | struct InterpsGroup; |
29 | 28 | typedef FSA<InterpsGroupsReader> FSAType; |
30 | -//typedef FSA< std::vector<InterpsGroup > > FSAType; | |
31 | 29 | |
30 | +/** | |
31 | + * This class contains data required for morphological analysis/synthesis. | |
32 | + * It contains references to dictionary automaton, charset converter, tagset data etc. | |
33 | + * All of these can be changed by setters, changing Morfeusz behavior (different dictionary, charset, and other options). | |
34 | + */ | |
32 | 35 | class Environment { |
33 | 36 | public: |
37 | + /** | |
38 | + * Creates default environment with given initial charset, processor type (analyzer/generator) and default dictionary data ptr. | |
39 | + * | |
40 | + * @param charset | |
41 | + * @param morfeuszProcessor | |
42 | + * @param fileStartPtr | |
43 | + */ | |
34 | 44 | Environment( |
35 | 45 | MorfeuszCharset charset, |
36 | 46 | MorfeuszProcessorType morfeuszProcessor, |
37 | 47 | const unsigned char* fileStartPtr); |
38 | 48 | |
49 | + /** | |
50 | + * Sets charset for this environment. | |
51 | + * | |
52 | + * @param charset | |
53 | + */ | |
39 | 54 | void setCharset(MorfeuszCharset charset); |
40 | 55 | |
56 | + /** | |
57 | + * Sets case sensitivity options. | |
58 | + * | |
59 | + * @param caseSensitive - if true, interpretations not matching case will be discarded. | |
60 | + */ | |
41 | 61 | void setCaseSensitive(bool caseSensitive); |
42 | 62 | |
63 | + /** | |
64 | + * Gets charset converter that is currently used by this environment. | |
65 | + * Changed by setting charset. | |
66 | + * | |
67 | + * @return - reference to charset converter. | |
68 | + */ | |
43 | 69 | const CharsetConverter& getCharsetConverter() const; |
44 | 70 | |
71 | + /** | |
72 | + * Returns case converter that is currently used by this environment. | |
73 | + * Changed by setting case sensitivity option. | |
74 | + * | |
75 | + * @return - reference to case converter. | |
76 | + */ | |
45 | 77 | const CaseConverter& getCaseConverter() const; |
46 | 78 | |
79 | + /** | |
80 | + * Sets new tagset for this environment. | |
81 | + * | |
82 | + * @param tagset | |
83 | + */ | |
47 | 84 | void setTagset(const Tagset& tagset); |
85 | + | |
86 | + /** | |
87 | + * Gets currently used tagset. | |
88 | + * | |
89 | + * @return | |
90 | + */ | |
48 | 91 | const Tagset& getTagset() const; |
49 | 92 | |
93 | + /** | |
94 | + * Sets binary dictionary file used by this environment. | |
95 | + * | |
96 | + * @param filename - filename of the dictionary | |
97 | + */ | |
50 | 98 | void setFSAFile(const std::string& filename); |
51 | 99 | |
100 | + /** | |
101 | + * Sets segmentation rules option. | |
102 | + * | |
103 | + * @param option | |
104 | + * @param value | |
105 | + */ | |
52 | 106 | void setSegrulesOption(const std::string& option, const std::string& value); |
53 | 107 | |
108 | + /** | |
109 | + * Gets segmentation rules automaton. | |
110 | + * | |
111 | + * @return | |
112 | + */ | |
54 | 113 | const SegrulesFSA& getCurrentSegrulesFSA() const; |
55 | 114 | |
115 | + /** | |
116 | + * Gets dictionary automaton. | |
117 | + * | |
118 | + * @return | |
119 | + */ | |
56 | 120 | const FSAType& getFSA() const; |
57 | 121 | |
122 | + /** | |
123 | + * Returns decoder that converts interpretations to external format. | |
124 | + * @return | |
125 | + */ | |
58 | 126 | const InterpretedChunksDecoder& getInterpretedChunksDecoder() const; |
59 | 127 | |
128 | + /** | |
129 | + * Gets processor type (info if this is analyzer or generator environment) | |
130 | + * @return | |
131 | + */ | |
60 | 132 | MorfeuszProcessorType getProcessorType() const; |
61 | 133 | |
134 | + /** | |
135 | + * Return current case pattern helper | |
136 | + * | |
137 | + * @return | |
138 | + */ | |
62 | 139 | const CasePatternHelper& getCasePatternHelper() const; |
63 | 140 | |
141 | + /** | |
142 | + * Return current qualifiers helper. | |
143 | + * @return | |
144 | + */ | |
64 | 145 | const Qualifiers& getQualifiersHelper() const; |
65 | 146 | |
147 | + /** | |
148 | + * Returns true iff given codepoint denotes a separator char for ign handling. | |
149 | + * @param codepoint | |
150 | + * @return | |
151 | + */ | |
66 | 152 | bool isSeparator(uint32_t codepoint) const; |
67 | 153 | |
68 | 154 | virtual ~Environment(); |
... | ... |
morfeusz/InflexionGraph.hpp
... | ... | @@ -13,6 +13,10 @@ |
13 | 13 | #include <utility> |
14 | 14 | #include "InterpretedChunk.hpp" |
15 | 15 | |
16 | +/** | |
17 | + * This class build inflection graph (indexes the nodes, takes into account segments marked as "weak"). | |
18 | + * Takes care to make the number of nodes as little as possible. | |
19 | + */ | |
16 | 20 | class InflexionGraph { |
17 | 21 | public: |
18 | 22 | |
... | ... | @@ -24,30 +28,60 @@ public: |
24 | 28 | InterpretedChunk chunk; |
25 | 29 | unsigned int nextNode; |
26 | 30 | }; |
27 | - | |
31 | + | |
32 | + /** | |
33 | + * Adds new path to the graph. | |
34 | + * | |
35 | + * @param path | |
36 | + * @param weak | |
37 | + */ | |
28 | 38 | void addPath(const std::vector<InterpretedChunk>& path, bool weak); |
29 | 39 | |
30 | 40 | // void getResults(const Tagset& tagset, const CharsetConverter& charsetConverter, std::vector<MorphInterpretation>& results); |
31 | 41 | |
42 | + /** | |
43 | + * Return current graph. | |
44 | + * | |
45 | + * @return | |
46 | + */ | |
32 | 47 | const std::vector< std::vector<InflexionGraph::Edge> >& getTheGraph(); |
33 | 48 | |
49 | + /** | |
50 | + * True iff the graph is empty. | |
51 | + * | |
52 | + * @return | |
53 | + */ | |
34 | 54 | bool empty() const; |
35 | 55 | |
56 | + /** | |
57 | + * Clears the graph. | |
58 | + */ | |
36 | 59 | void clear(); |
37 | 60 | |
38 | - | |
39 | - // virtual ~FlexionGraph(); | |
40 | 61 | private: |
41 | 62 | |
42 | 63 | typedef std::pair<const char*, int> PathElement; |
43 | 64 | typedef std::set<PathElement> Path; |
44 | 65 | |
66 | + /** | |
67 | + * Adds an edge that starts a chunk. | |
68 | + * | |
69 | + * @param e | |
70 | + */ | |
45 | 71 | void addStartEdge(const Edge& e); |
46 | - | |
72 | + | |
73 | + /** | |
74 | + * Adds non-starting edge. | |
75 | + * @param startNode | |
76 | + * @param e | |
77 | + */ | |
47 | 78 | void addMiddleEdge(unsigned int startNode, const Edge& e); |
48 | 79 | |
80 | + /** | |
81 | + * Minimizes the graph so it contains as little number of nodes as possible. | |
82 | + */ | |
49 | 83 | void minimizeGraph(); |
50 | - | |
84 | + | |
51 | 85 | bool canMergeNodes(unsigned int node1, unsigned int node2); |
52 | 86 | |
53 | 87 | void doMergeNodes(unsigned int node1, unsigned int node2); |
... | ... |
morfeusz/InterpretedChunk.hpp
... | ... | @@ -11,16 +11,59 @@ |
11 | 11 | #include <vector> |
12 | 12 | #include "InterpsGroup.hpp" |
13 | 13 | |
14 | +/** | |
15 | + * Denotes a part of text that has some not-yet-deserialized interpretations attached to it. | |
16 | + */ | |
14 | 17 | struct InterpretedChunk { |
18 | + | |
19 | + /** | |
20 | + * The type of segment for this chunk. | |
21 | + */ | |
15 | 22 | unsigned char segmentType; |
23 | + | |
24 | + /** | |
25 | + * Pointer to start of this chunks text | |
26 | + */ | |
16 | 27 | const char* textStartPtr; |
28 | + | |
29 | + /** | |
30 | + * Pointer to end of this chunks text (exclusive) | |
31 | + */ | |
17 | 32 | const char* textEndPtr; |
33 | + | |
34 | + /** | |
35 | + * Pointer to the start of this chunks binary data. | |
36 | + */ | |
18 | 37 | const unsigned char* interpsGroupPtr; |
38 | + | |
39 | + /** | |
40 | + * Pointer to the end of this chunks binary data (exclusive) | |
41 | + */ | |
19 | 42 | const unsigned char* interpsEndPtr; |
43 | + | |
44 | + /** | |
45 | + * true iff this chunk shifts orth to the one right to it (it is "A" in "A> B") | |
46 | + */ | |
20 | 47 | bool shiftOrth; |
48 | + | |
49 | + /** | |
50 | + * true iff this chunk has attached data from its prefix chunk (when it is "B" segment in "A> B" segmentation rule) | |
51 | + */ | |
21 | 52 | bool orthWasShifted; |
53 | + | |
54 | + /** | |
55 | + * Number of codepoints this chunks consists of. | |
56 | + */ | |
22 | 57 | int codepointsNum; |
58 | + | |
59 | + /** | |
60 | + * Chunks that are in the prefix segments (those with ">" in segmentation rules, ie. "dig>* dig") | |
61 | + */ | |
23 | 62 | std::vector<InterpretedChunk> prefixChunks; |
63 | + | |
64 | + /** | |
65 | + * Homonym id specified by the user. | |
66 | + */ | |
24 | 67 | std::string requiredHomonymId; |
25 | 68 | }; |
26 | 69 | |
... | ... |
morfeusz/InterpsGroup.hpp
morfeusz/Morfeusz.cpp
... | ... | @@ -11,15 +11,13 @@ |
11 | 11 | #include "utils.hpp" |
12 | 12 | #include "data/default_fsa.hpp" |
13 | 13 | #include "Morfeusz.hpp" |
14 | -#include "decoder/InterpretedChunksDecoder.hpp" | |
14 | +#include "deserialization/morphInterps/InterpretedChunksDecoder.hpp" | |
15 | 15 | #include "charset/CharsetConverter.hpp" |
16 | 16 | #include "charset/charset_utils.hpp" |
17 | -#include "charset/CaseConverter.hpp" | |
17 | +#include "case/CaseConverter.hpp" | |
18 | 18 | #include "segrules/segrules.hpp" |
19 | 19 | #include "const.hpp" |
20 | -#include "deserializationUtils.hpp" | |
21 | 20 | #include "charset/utf8.h" |
22 | -#include "compressionByteUtils.hpp" | |
23 | 21 | |
24 | 22 | // TODO - konstruktor kopiujący działający Tak-Jak-Trzeba |
25 | 23 | |
... | ... |
morfeusz/Morfeusz.hpp
... | ... | @@ -13,12 +13,11 @@ |
13 | 13 | #include <vector> |
14 | 14 | #include <map> |
15 | 15 | #include <set> |
16 | -#include "EncodedInterpretation.hpp" | |
17 | 16 | #include "fsa/fsa.hpp" |
18 | 17 | #include "MorphInterpretation.hpp" |
19 | 18 | #include "InterpsGroup.hpp" |
19 | +#include "case/CaseConverter.hpp" | |
20 | 20 | #include "charset/CharsetConverter.hpp" |
21 | -#include "charset/CaseConverter.hpp" | |
22 | 21 | #include "charset/TextReader.hpp" |
23 | 22 | #include "InterpretedChunk.hpp" |
24 | 23 | #include "InflexionGraph.hpp" |
... | ... | @@ -30,8 +29,8 @@ |
30 | 29 | #include "segrules/segrules.hpp" |
31 | 30 | #include "segrules/SegrulesFSA.hpp" |
32 | 31 | |
33 | -#include "deserializer/InterpsGroupsReader.hpp" | |
34 | -#include "deserializer/MorphDeserializer.hpp" | |
32 | +#include "deserialization/InterpsGroupsReader.hpp" | |
33 | +#include "deserialization/MorphDeserializer.hpp" | |
35 | 34 | |
36 | 35 | class Morfeusz; |
37 | 36 | class ResultsIterator; |
... | ... |
morfeusz/MorfeuszOptions.hpp
morfeusz/MorfeuszConfig.hpp.in renamed to morfeusz/MorfeuszVersion.hpp.in
morfeusz/MorphDeserializer.cpp deleted
1 | -/* | |
2 | - * File: MorphDeserializer.cpp | |
3 | - * Author: mlenart | |
4 | - * | |
5 | - * Created on 12 listopad 2013, 15:31 | |
6 | - */ | |
7 | - | |
8 | -#include <map> | |
9 | -#include <algorithm> | |
10 | -#include "MorphDeserializer.hpp" | |
11 | -#include "EncodedInterpretation.hpp" | |
12 | -#include "InterpsGroup.hpp" | |
13 | -#include "deserializationUtils.hpp" | |
14 | - | |
15 | -MorphDeserializer::MorphDeserializer() { | |
16 | -} | |
17 | - | |
18 | -MorphDeserializer::~MorphDeserializer() { | |
19 | -} | |
20 | - | |
21 | -long MorphDeserializer::deserialize(const unsigned char* ptr, vector<InterpsGroup>& interps) const { | |
22 | - const unsigned char* currPtr = ptr; | |
23 | - uint8_t interpTypesNum = readInt8(currPtr); | |
24 | - interps.clear(); | |
25 | - interps.reserve(interpTypesNum); | |
26 | - for (unsigned int i = 0; i < interpTypesNum; i++) { | |
27 | - InterpsGroup ig; | |
28 | - ig.type = readInt8(currPtr); | |
29 | - ig.size = readInt16(currPtr); | |
30 | - ig.ptr = currPtr; | |
31 | - currPtr += ig.size; | |
32 | - interps.push_back(ig); | |
33 | - } | |
34 | - return currPtr - ptr; | |
35 | -} |
morfeusz/MorphInterpretation.cpp
... | ... | @@ -8,7 +8,6 @@ |
8 | 8 | #include <string> |
9 | 9 | #include <sstream> |
10 | 10 | #include "MorphInterpretation.hpp" |
11 | -#include "EncodedInterpretation.hpp" | |
12 | 11 | #include "const.hpp" |
13 | 12 | |
14 | 13 | using namespace std; |
... | ... | @@ -32,11 +31,13 @@ tagnum(tagnum), |
32 | 31 | namenum(namenum), |
33 | 32 | tag(env.getTagset().getTag(tagnum, env.getCharsetConverter())), |
34 | 33 | name(env.getTagset().getName(namenum, env.getCharsetConverter())), |
35 | -qualifiers(env.getQualifiersHelper().getQualifiers(qualifiersNum)) { | |
34 | +qualifiers(&env.getQualifiersHelper().getQualifiers(qualifiersNum)) { | |
36 | 35 | |
37 | 36 | |
38 | 37 | } |
39 | 38 | |
39 | +static const vector<std::string> emptyQualifiers; | |
40 | + | |
40 | 41 | MorphInterpretation::MorphInterpretation() |
41 | 42 | : startNode(), |
42 | 43 | endNode(), |
... | ... | @@ -47,7 +48,7 @@ tagnum(), |
47 | 48 | namenum(), |
48 | 49 | tag(), |
49 | 50 | name(), |
50 | -qualifiers(){ | |
51 | +qualifiers(&emptyQualifiers){ | |
51 | 52 | |
52 | 53 | } |
53 | 54 | |
... | ... | @@ -65,7 +66,7 @@ namenum(0), |
65 | 66 | // qualifiersNum(0), |
66 | 67 | tag(env.getTagset().getTag(0, env.getCharsetConverter())), |
67 | 68 | name(env.getTagset().getName(0, env.getCharsetConverter())), |
68 | -qualifiers() { | |
69 | +qualifiers(&emptyQualifiers) { | |
69 | 70 | |
70 | 71 | } |
71 | 72 | |
... | ... | @@ -126,7 +127,7 @@ const std::string& MorphInterpretation::getName() const { |
126 | 127 | } |
127 | 128 | |
128 | 129 | const vector<string>& MorphInterpretation::getQualifiers() const { |
129 | - return this->qualifiers; | |
130 | + return *this->qualifiers; | |
130 | 131 | } |
131 | 132 | |
132 | 133 | static inline string getQualifiersStr(const MorphInterpretation& mi) { |
... | ... | @@ -157,7 +158,7 @@ std::string MorphInterpretation::toString(bool includeNodeNumbers) const { |
157 | 158 | if (!name.empty()) { |
158 | 159 | res << "," << name; |
159 | 160 | } |
160 | - if (!qualifiers.empty()) { | |
161 | + if (!qualifiers->empty()) { | |
161 | 162 | res << "," << getQualifiersStr(*this); |
162 | 163 | } |
163 | 164 | return res.str(); |
... | ... |
morfeusz/MorphInterpretation.hpp
... | ... | @@ -13,10 +13,12 @@ |
13 | 13 | class Environment; |
14 | 14 | |
15 | 15 | #include "Tagset.hpp" |
16 | -#include "EncodedInterpretation.hpp" | |
17 | 16 | #include "charset/CharsetConverter.hpp" |
18 | 17 | #include "Environment.hpp" |
19 | 18 | |
19 | +/** | |
20 | + * Morphological interpretation as seen by the user in the analysis/generation results. | |
21 | + */ | |
20 | 22 | class MorphInterpretation { |
21 | 23 | public: |
22 | 24 | MorphInterpretation( |
... | ... | @@ -59,7 +61,7 @@ private: |
59 | 61 | int namenum; |
60 | 62 | std::string tag; |
61 | 63 | std::string name; |
62 | - std::vector<std::string> qualifiers; | |
64 | + const std::vector<std::string>* qualifiers; | |
63 | 65 | }; |
64 | 66 | |
65 | 67 | #endif /* MORPHINTERPRETATION_HPP */ |
... | ... |
morfeusz/Qualifiers.cpp
... | ... | @@ -7,7 +7,7 @@ |
7 | 7 | |
8 | 8 | #include <iostream> |
9 | 9 | #include "Qualifiers.hpp" |
10 | -#include "deserializationUtils.hpp" | |
10 | +#include "deserialization/deserializationUtils.hpp" | |
11 | 11 | #include "fsa/const.hpp" |
12 | 12 | |
13 | 13 | using namespace std; |
... | ... | @@ -32,13 +32,9 @@ qualifiers() { |
32 | 32 | } |
33 | 33 | } |
34 | 34 | |
35 | -vector<string> Qualifiers::getQualifiers(int n) const { | |
35 | +const vector<string>& Qualifiers::getQualifiers(int n) const { | |
36 | 36 | return this->qualifiers.at(n); |
37 | 37 | } |
38 | 38 | |
39 | -unsigned int Qualifiers::getQualifiersNum() const { | |
40 | - return (unsigned int) this->qualifiers.size(); | |
41 | -} | |
42 | - | |
43 | 39 | Qualifiers::~Qualifiers() { |
44 | 40 | } |
... | ... |
morfeusz/Qualifiers.hpp
... | ... | @@ -12,11 +12,21 @@ |
12 | 12 | #include <string> |
13 | 13 | #include <stdint.h> |
14 | 14 | |
15 | +/** | |
16 | + * Helper class used for decoding qualifiers set number into a vector of strings. | |
17 | + * | |
18 | + * @param ptr | |
19 | + */ | |
15 | 20 | class Qualifiers { |
16 | 21 | public: |
17 | 22 | explicit Qualifiers(const unsigned char* ptr); |
18 | - std::vector<std::string> getQualifiers(int n) const; | |
19 | - unsigned int getQualifiersNum() const; | |
23 | + | |
24 | + /** | |
25 | + * Returns vector of qualifiers represented as strings. | |
26 | + * @param n - the index in qualifiers tab. | |
27 | + * @return - vector of qualifiers represented as strings. | |
28 | + */ | |
29 | + const std::vector<std::string>& getQualifiers(int n) const; | |
20 | 30 | virtual ~Qualifiers(); |
21 | 31 | private: |
22 | 32 | std::vector< std::vector<std::string> > qualifiers; |
... | ... |
morfeusz/Tagset.cpp
morfeusz/Tagset.hpp
... | ... | @@ -12,11 +12,34 @@ |
12 | 12 | #include <vector> |
13 | 13 | #include "charset/CharsetConverter.hpp" |
14 | 14 | |
15 | +/** | |
16 | + * Represents a tagset | |
17 | + */ | |
15 | 18 | class Tagset { |
16 | 19 | public: |
20 | + /** | |
21 | + * Constructs a tagset from binary data. | |
22 | + * | |
23 | + * @param fsaData - pointer to the beginning of automaton data. | |
24 | + */ | |
17 | 25 | explicit Tagset(const unsigned char* fsaData); |
18 | -// Tagset(const Tagset& tagset); | |
26 | + | |
27 | + /** | |
28 | + * Returns tag (denoted by its index) as a string. | |
29 | + * | |
30 | + * @param tagNum - tag index in the tagset. | |
31 | + * @param charsetConverter - the charset converter used to convert from tagset internal encoding (UTF-8) into target encoding. | |
32 | + * @return - the tag encoded as string. | |
33 | + */ | |
19 | 34 | const std::string getTag(const int tagNum, const CharsetConverter& charsetConverter) const; |
35 | + | |
36 | + /** | |
37 | + * Returns named entity type (denoted by its index) as a string. | |
38 | + * | |
39 | + * @param nameNum - name index in the tagset. | |
40 | + * @param charsetConverter - the charset converter used to convert from tagset internal encoding (UTF-8) into target encoding. | |
41 | + * @return - the named entity type encoded as string. | |
42 | + */ | |
20 | 43 | const std::string getName(const int nameNum, const CharsetConverter& charsetConverter) const; |
21 | 44 | private: |
22 | 45 | std::vector<std::string> tags; |
... | ... |
morfeusz/charset/CaseConverter.cpp renamed to morfeusz/case/CaseConverter.cpp
morfeusz/charset/CaseConverter.hpp renamed to morfeusz/case/CaseConverter.hpp
morfeusz/CasePatternHelper.cpp renamed to morfeusz/case/CasePatternHelper.cpp
morfeusz/CasePatternHelper.hpp renamed to morfeusz/case/CasePatternHelper.hpp
... | ... | @@ -10,12 +10,15 @@ |
10 | 10 | |
11 | 11 | #include <vector> |
12 | 12 | #include "InterpsGroup.hpp" |
13 | -#include "CasePatternHelper.hpp" | |
14 | -#include "compressionByteUtils.hpp" | |
13 | +#include "deserialization/morphInterps/compressionByteUtils.hpp" | |
15 | 14 | #include "Environment.hpp" |
16 | 15 | |
17 | 16 | class Environment; |
18 | 17 | |
18 | +/** | |
19 | + * Utility class used to for case-sensitive interpretations filtering | |
20 | + * (ie. to filter out "berlin" and keep "Berlin") | |
21 | + */ | |
19 | 22 | class CasePatternHelper { |
20 | 23 | public: |
21 | 24 | |
... | ... | @@ -23,10 +26,23 @@ public: |
23 | 26 | |
24 | 27 | } |
25 | 28 | |
29 | + /** | |
30 | + * Set if this case pattern helper cares about case-sensitivity | |
31 | + * | |
32 | + * @param caseSensitive | |
33 | + */ | |
26 | 34 | void setCaseSensitive(bool caseSensitive) { |
27 | 35 | this->caseSensitive = caseSensitive; |
28 | 36 | } |
29 | - | |
37 | + | |
38 | + /** | |
39 | + * Check if given word matches given case pattern | |
40 | + * | |
41 | + * @param lowercaseCodepoints - codepoints of checked word converter to lowercase | |
42 | + * @param originalCodepoints - codepoints of checked word | |
43 | + * @param casePattern - vector representing case pattern ( ie. [False, True] for "mBank") | |
44 | + * @return - true iff word denoted by given codepoints matches given case pattern | |
45 | + */ | |
30 | 46 | bool checkCasePattern( |
31 | 47 | const std::vector<uint32_t>& lowercaseCodepoints, |
32 | 48 | const std::vector<uint32_t>& originalCodepoints, |
... | ... | @@ -41,12 +57,28 @@ public: |
41 | 57 | return true; |
42 | 58 | } |
43 | 59 | |
60 | + /** | |
61 | + * Check if given word has a chance of matching any of case patterns in given interps group. | |
62 | + * | |
63 | + * @param env - environment | |
64 | + * @param orthStart - pointer to start of word | |
65 | + * @param orthEnd - pointer to end of word | |
66 | + * @param ig - interps group | |
67 | + * @return - true iff word encoded from orthStart to orthEnd | |
68 | + * matches at least one of the interp group's morph interpretation's case pattern. | |
69 | + */ | |
44 | 70 | bool checkInterpsGroupOrthCasePatterns( |
45 | 71 | const Environment& env, |
46 | 72 | const char* orthStart, |
47 | 73 | const char* orthEnd, |
48 | 74 | const InterpsGroup& ig) const; |
49 | 75 | |
76 | + /** | |
77 | + * Deserializes case pattern encoded at given pointer. | |
78 | + * | |
79 | + * @param ptr | |
80 | + * @return - case pattern | |
81 | + */ | |
50 | 82 | static std::vector<bool> deserializeOneCasePattern(const unsigned char*& ptr); |
51 | 83 | private: |
52 | 84 | bool caseSensitive; |
... | ... |
morfeusz/charset/caseconv.cpp renamed to morfeusz/case/caseconv.cpp
morfeusz/charset/caseconv.hpp renamed to morfeusz/case/caseconv.hpp
... | ... | @@ -8,6 +8,10 @@ |
8 | 8 | #ifndef CASECONV_HPP |
9 | 9 | #define CASECONV_HPP |
10 | 10 | |
11 | +/* | |
12 | + * Case conversion tables | |
13 | + */ | |
14 | + | |
11 | 15 | extern const unsigned int TO_LOWERCASE_TABLE_SIZE; |
12 | 16 | extern const unsigned int EXT_TO_LOWERCASE_TABLE_SIZE; |
13 | 17 | extern const uint32_t TO_LOWERCASE_TABLE[]; |
... | ... |
morfeusz/charset/CharsetConverter.cpp
morfeusz/charset/TextReader.hpp
morfeusz/outputUtils.hpp renamed to morfeusz/cli/outputUtils.hpp
morfeusz/deserializer/InterpsGroupsReader.cpp renamed to morfeusz/deserialization/InterpsGroupsReader.cpp
morfeusz/deserializer/InterpsGroupsReader.hpp renamed to morfeusz/deserialization/InterpsGroupsReader.hpp
morfeusz/deserializer/MorphDeserializer.cpp renamed to morfeusz/deserialization/MorphDeserializer.cpp
morfeusz/deserializer/MorphDeserializer.hpp renamed to morfeusz/deserialization/MorphDeserializer.hpp
morfeusz/deserializationUtils.hpp renamed to morfeusz/deserialization/deserializationUtils.hpp
morfeusz/endianness.hpp renamed to morfeusz/deserialization/endianness.hpp
morfeusz/EncodedInterpretation.hpp renamed to morfeusz/deserialization/morphInterps/EncodedInterpretation.hpp
morfeusz/decoder/InterpretedChunksDecoder.cpp renamed to morfeusz/deserialization/morphInterps/InterpretedChunksDecoder.cpp
morfeusz/decoder/InterpretedChunksDecoder.hpp renamed to morfeusz/deserialization/morphInterps/InterpretedChunksDecoder.hpp
... | ... | @@ -16,11 +16,10 @@ |
16 | 16 | #include "EncodedInterpretation.hpp" |
17 | 17 | #include "InterpretedChunk.hpp" |
18 | 18 | #include "EncodedInterpretation.hpp" |
19 | -#include "charset/CaseConverter.hpp" | |
19 | +#include "case/CaseConverter.hpp" | |
20 | 20 | #include "Environment.hpp" |
21 | 21 | #include "MorphInterpretation.hpp" |
22 | -#include "CasePatternHelper.hpp" | |
23 | -#include "deserializationUtils.hpp" | |
22 | +#include "case/CasePatternHelper.hpp" | |
24 | 23 | #include "compressionByteUtils.hpp" |
25 | 24 | #include "const.hpp" |
26 | 25 | |
... | ... |
morfeusz/decoder/InterpretedChunksDecoder4Analyzer.cpp renamed to morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.cpp
morfeusz/decoder/InterpretedChunksDecoder4Analyzer.hpp renamed to morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.hpp
morfeusz/decoder/InterpretedChunksDecoder4Generator.cpp renamed to morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Generator.cpp
morfeusz/decoder/InterpretedChunksDecoder4Generator.hpp renamed to morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Generator.hpp
morfeusz/compressionByteUtils.hpp renamed to morfeusz/deserialization/morphInterps/compressionByteUtils.hpp
morfeusz/fsa/cfsa1_impl.hpp
... | ... | @@ -12,7 +12,7 @@ |
12 | 12 | #include <climits> |
13 | 13 | |
14 | 14 | #include "fsa.hpp" |
15 | -#include "../deserializationUtils.hpp" | |
15 | +#include "../deserialization/deserializationUtils.hpp" | |
16 | 16 | |
17 | 17 | static const unsigned char CFSA1_ACCEPTING_FLAG = 128; |
18 | 18 | //static const unsigned char CFSA1_ARRAY_FLAG = 64; |
... | ... |
morfeusz/fsa/cfsa2_impl.hpp
... | ... | @@ -13,7 +13,7 @@ |
13 | 13 | #include <iostream> |
14 | 14 | #include "fsa.hpp" |
15 | 15 | #include "../utils.hpp" |
16 | -#include "../endianness.hpp" | |
16 | +#include "../deserialization/endianness.hpp" | |
17 | 17 | |
18 | 18 | static const unsigned char HAS_REMAINING_FLAG = 128; |
19 | 19 | static const unsigned char ACCEPTING_FLAG = 64; |
... | ... |
morfeusz/fsa/fsa_impl.hpp
morfeusz/java/dupa deleted
1 | - | |
2 | -# SWIG | |
3 | -#set(CMAKE_SWIG_OUTDIR swig) | |
4 | -#FIND_PACKAGE(SWIG REQUIRED) | |
5 | -FIND_PACKAGE(JNI REQUIRED) | |
6 | -#INCLUDE(${SWIG_USE_FILE}) | |
7 | -include(UseJava) | |
8 | - | |
9 | -# SWIG Java | |
10 | -include_directories (${JAVA_INCLUDE_PATH}) | |
11 | -include_directories (..) | |
12 | - | |
13 | -set (SWIG_JAVA_OUTFILE swigJAVA.cpp) | |
14 | -# set (JAVA_WRAPPER_FILE ${CMAKE_SHARED_LIBRARY_PREFIX}morfeusz${CMAKE_SHARED_LIBRARY_SUFFIX}) | |
15 | -add_custom_command ( | |
16 | - OUTPUT ${SWIG_JAVA_OUTFILE} | |
17 | - COMMAND swig -java -c++ -package pl.waw.ipipan.morfeusz -o ${SWIG_JAVA_OUTFILE} -outdir ${CMAKE_SOURCE_DIR}/jmorfeusz/src/main/java/pl/waw/ipipan/morfeusz ${CMAKE_SOURCE_DIR}/morfeusz/morfeusz.i | |
18 | - DEPENDS libmorfeusz | |
19 | -) | |
20 | -#set (CMAKE_SHARED_LINKER_FLAGS "-s -Os -static-libstdc++ -static-libgcc") | |
21 | -add_library (jmorfeusz SHARED ${SWIG_JAVA_OUTFILE}) | |
22 | -target_link_libraries (jmorfeusz ${JAVA_LIBRARIES} libmorfeusz) | |
23 | -add_dependencies (jmorfeusz ${SWIG_JAVA_OUTFILE}) | |
24 | - | |
25 | -#set (CMAKE_SWIG_FLAGS -package pl.waw.ipipan.morfeusz) | |
26 | -#set (CMAKE_SWIG_OUTDIR ${CMAKE_SOURCE_DIR}/jmorfeusz/src/main/java/pl/waw/ipipan/morfeusz) | |
27 | - | |
28 | -#set_source_files_properties (../morfeusz.i PROPERTIES CPLUSPLUS ON) | |
29 | -#SWIG_ADD_MODULE(jmorfeusz java ../morfeusz.i) | |
30 | -#SWIG_LINK_LIBRARIES(jmorfeusz ${JAVA_LIBRARIES}) | |
31 | -#SWIG_LINK_LIBRARIES(jmorfeusz libmorfeusz) | |
32 | - | |
33 | -#if (${CMAKE_SYSTEM_NAME} MATCHES "Windows") | |
34 | -# set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") | |
35 | -# set (CMAKE_SHARED_LINKER_FLAGS "-s -Os -static-libstdc++ -static-libgcc") | |
36 | -#endif () |
morfeusz/morfeusz_analyzer.cpp
... | ... | @@ -12,14 +12,16 @@ |
12 | 12 | #include "fsa/fsa.hpp" |
13 | 13 | #include "Tagset.hpp" |
14 | 14 | #include "Morfeusz.hpp" |
15 | +#include "MorfeuszVersion.hpp" | |
15 | 16 | #include "const.hpp" |
16 | 17 | |
17 | 18 | #include "cli/cli.hpp" |
18 | -#include "outputUtils.hpp" | |
19 | +#include "cli/outputUtils.hpp" | |
19 | 20 | |
20 | 21 | using namespace std; |
21 | 22 | |
22 | 23 | int main(int argc, const char** argv) { |
24 | + cerr << "Morfeusz analyzer, version: " << MORFEUSZ_VERSION << endl; | |
23 | 25 | ez::ezOptionParser& opt = *getOptions(argc, argv, ANALYZER); |
24 | 26 | Morfeusz morfeusz; |
25 | 27 | initializeMorfeusz(opt, morfeusz); |
... | ... |
morfeusz/morfeusz_generator.cpp
... | ... | @@ -11,13 +11,15 @@ |
11 | 11 | #include "fsa/fsa.hpp" |
12 | 12 | #include "Tagset.hpp" |
13 | 13 | #include "Morfeusz.hpp" |
14 | +#include "MorfeuszVersion.hpp" | |
14 | 15 | #include "const.hpp" |
15 | 16 | #include "cli/cli.hpp" |
16 | -#include "outputUtils.hpp" | |
17 | +#include "cli/outputUtils.hpp" | |
17 | 18 | |
18 | 19 | using namespace std; |
19 | 20 | |
20 | 21 | int main(int argc, const char** argv) { |
22 | + cerr << "Morfeusz generator, version: " << MORFEUSZ_VERSION << endl; | |
21 | 23 | ez::ezOptionParser& opt = *getOptions(argc, argv, GENERATOR); |
22 | 24 | Morfeusz morfeusz; |
23 | 25 | initializeMorfeusz(opt, morfeusz); |
... | ... |
morfeusz/segrules/SegrulesFSA.hpp
morfeusz/segrules/segrules.cpp
morfeusz/consoleUtils.hpp renamed to morfeusz/test/consoleUtils.hpp
morfeusz/test_recognize_dict.cpp renamed to morfeusz/test/test_recognize_dict.cpp
morfeusz/test_result_equals.cpp renamed to morfeusz/test/test_result_equals.cpp
morfeusz/test_synth_dict.cpp deleted
morfeusz/wrappers/CMakeLists.txt
0 → 100644
morfeusz/java/CMakeLists.txt renamed to morfeusz/wrappers/java/CMakeLists.txt
... | ... | @@ -6,14 +6,14 @@ find_package(Java REQUIRED) |
6 | 6 | |
7 | 7 | include_directories (${JAVA_INCLUDE_PATH}) |
8 | 8 | include_directories (${JAVA_INCLUDE_PATH2}) |
9 | -include_directories (..) | |
9 | +include_directories (${CMAKE_SOURCE_DIR}/morfeusz) | |
10 | 10 | |
11 | 11 | set (SWIG_JAVA_OUTFILE "${CMAKE_CURRENT_BINARY_DIR}/swigJAVA.cpp") |
12 | 12 | file (COPY pl DESTINATION .) |
13 | 13 | set (JAVA_SRC_DIR "${CMAKE_CURRENT_BINARY_DIR}/pl/waw/ipipan/morfeusz") |
14 | 14 | add_custom_command ( |
15 | 15 | OUTPUT ${SWIG_JAVA_OUTFILE} |
16 | - COMMAND swig -java -c++ -package pl.waw.ipipan.morfeusz -o ${SWIG_JAVA_OUTFILE} -outdir ${JAVA_SRC_DIR} ${CMAKE_SOURCE_DIR}/morfeusz/morfeusz.i | |
16 | + COMMAND swig -java -c++ -package pl.waw.ipipan.morfeusz -o ${SWIG_JAVA_OUTFILE} -outdir ${JAVA_SRC_DIR} ${CMAKE_SOURCE_DIR}/morfeusz/wrappers/morfeusz.i | |
17 | 17 | DEPENDS libmorfeusz |
18 | 18 | ) |
19 | 19 | add_custom_target(generate_java_wrapper ALL |
... | ... |
morfeusz/java/README renamed to morfeusz/wrappers/java/README
morfeusz/java/pl/waw/ipipan/morfeusz/app/App.java renamed to morfeusz/wrappers/java/pl/waw/ipipan/morfeusz/app/App.java
morfeusz/java/pl/waw/ipipan/morfeusz/app/MorfeuszUtils.java renamed to morfeusz/wrappers/java/pl/waw/ipipan/morfeusz/app/MorfeuszUtils.java
morfeusz/morfeusz.i renamed to morfeusz/wrappers/morfeusz.i
... | ... | @@ -91,10 +91,10 @@ import java.io.IOException; |
91 | 91 | |
92 | 92 | %ignore Tagset::Tagset(const unsigned char* fsaData); |
93 | 93 | |
94 | -%include "Morfeusz.hpp" | |
95 | -%include "MorphInterpretation.hpp" | |
96 | -%include "const.hpp" | |
97 | -%include "exceptions.hpp" | |
94 | +%include "../Morfeusz.hpp" | |
95 | +%include "../MorphInterpretation.hpp" | |
96 | +%include "../const.hpp" | |
97 | +%include "../exceptions.hpp" | |
98 | 98 | |
99 | 99 | // instantiate vector of interpretations |
100 | 100 | namespace std { |
... | ... |
morfeusz/perl/CMakeLists.txt renamed to morfeusz/wrappers/perl/CMakeLists.txt
morfeusz/python/CMakeLists.txt renamed to morfeusz/wrappers/python/CMakeLists.txt
... | ... | @@ -9,14 +9,14 @@ set (PYMORFEUSZ_VERSION "0.1.0") |
9 | 9 | |
10 | 10 | # SWIG Java |
11 | 11 | INCLUDE_DIRECTORIES (${PYTHON_INCLUDE_PATH}) |
12 | -INCLUDE_DIRECTORIES (..) | |
12 | +INCLUDE_DIRECTORIES (../..) | |
13 | 13 | |
14 | 14 | |
15 | 15 | set (SWIG_PYTHON_OUTFILE_CXX "${CMAKE_CURRENT_BINARY_DIR}/swigPYTHON.cpp") |
16 | 16 | set (SWIG_PYTHON_OUTFILE_PY "${CMAKE_CURRENT_BINARY_DIR}/morfeusz2.py") |
17 | 17 | add_custom_command ( |
18 | 18 | OUTPUT "${SWIG_PYTHON_OUTFILE_CXX}" "${SWIG_PYTHON_OUTFILE_PY}" |
19 | - COMMAND swig -python -c++ -o "${SWIG_PYTHON_OUTFILE_CXX}" "${CMAKE_SOURCE_DIR}/morfeusz/morfeusz.i" | |
19 | + COMMAND swig -python -c++ -o "${SWIG_PYTHON_OUTFILE_CXX}" "${CMAKE_SOURCE_DIR}/morfeusz/wrappers/morfeusz.i" | |
20 | 20 | DEPENDS libmorfeusz |
21 | 21 | ) |
22 | 22 | add_custom_target (generate_python_wrapper |
... | ... |
morfeusz/python/setup.py.in renamed to morfeusz/wrappers/python/setup.py.in
nbproject/configurations.xml
... | ... | @@ -5,6 +5,14 @@ |
5 | 5 | displayName="build" |
6 | 6 | projectFiles="true" |
7 | 7 | root="build"> |
8 | + <logicalFolder name="morfeusz" displayName="morfeusz" projectFiles="true"> | |
9 | + <logicalFolder name="wrappers" displayName="wrappers" projectFiles="true"> | |
10 | + <logicalFolder name="java" displayName="java" projectFiles="true"> | |
11 | + <itemPath>build/morfeusz/wrappers/java/swigJAVA.cpp</itemPath> | |
12 | + </logicalFolder> | |
13 | + <itemPath>build/morfeusz/wrappers/morfeuszPERL_wrap.cxx</itemPath> | |
14 | + </logicalFolder> | |
15 | + </logicalFolder> | |
8 | 16 | <itemPath>build/default_fsa.cpp</itemPath> |
9 | 17 | <itemPath>build/default_synth_fsa.cpp</itemPath> |
10 | 18 | </logicalFolder> |
... | ... | @@ -17,22 +25,25 @@ |
17 | 25 | <itemPath>build1/morfeusz/java/swigJAVA.cpp</itemPath> |
18 | 26 | </logicalFolder> |
19 | 27 | <df root="morfeusz" name="0"> |
20 | - <df name="charset"> | |
28 | + <df name="case"> | |
21 | 29 | <in>CaseConverter.cpp</in> |
30 | + <in>CasePatternHelper.cpp</in> | |
31 | + <in>caseconv.cpp</in> | |
32 | + </df> | |
33 | + <df name="charset"> | |
22 | 34 | <in>CharsetConverter.cpp</in> |
23 | 35 | <in>TextReader.cpp</in> |
24 | - <in>caseconv.cpp</in> | |
25 | 36 | <in>conversion_tables.cpp</in> |
26 | 37 | </df> |
27 | 38 | <df name="cli"> |
28 | 39 | <in>cli.cpp</in> |
29 | 40 | </df> |
30 | - <df name="decoder"> | |
31 | - <in>InterpretedChunksDecoder.cpp</in> | |
32 | - <in>InterpretedChunksDecoder4Analyzer.cpp</in> | |
33 | - <in>InterpretedChunksDecoder4Generator.cpp</in> | |
34 | - </df> | |
35 | - <df name="deserializer"> | |
41 | + <df name="deserialization"> | |
42 | + <df name="morphInterps"> | |
43 | + <in>InterpretedChunksDecoder.cpp</in> | |
44 | + <in>InterpretedChunksDecoder4Analyzer.cpp</in> | |
45 | + <in>InterpretedChunksDecoder4Generator.cpp</in> | |
46 | + </df> | |
36 | 47 | <in>InterpsGroupsReader.cpp</in> |
37 | 48 | <in>MorphDeserializer.cpp</in> |
38 | 49 | </df> |
... | ... | @@ -46,20 +57,19 @@ |
46 | 57 | <in>SegrulesFSA.cpp</in> |
47 | 58 | <in>segrules.cpp</in> |
48 | 59 | </df> |
49 | - <in>CasePatternHelper.cpp</in> | |
60 | + <df name="test"> | |
61 | + <in>test_recognize_dict.cpp</in> | |
62 | + <in>test_result_equals.cpp</in> | |
63 | + </df> | |
50 | 64 | <in>Environment.cpp</in> |
51 | 65 | <in>InflexionGraph.cpp</in> |
52 | 66 | <in>Morfeusz.cpp</in> |
53 | - <in>MorphDeserializer.cpp</in> | |
54 | 67 | <in>MorphInterpretation.cpp</in> |
55 | 68 | <in>Qualifiers.cpp</in> |
56 | 69 | <in>Tagset.cpp</in> |
57 | 70 | <in>const.cpp</in> |
58 | - <in>main.cpp</in> | |
59 | 71 | <in>morfeusz_analyzer.cpp</in> |
60 | 72 | <in>morfeusz_generator.cpp</in> |
61 | - <in>test_recognize_dict.cpp</in> | |
62 | - <in>test_result_equals.cpp</in> | |
63 | 73 | </df> |
64 | 74 | <logicalFolder name="morfeusz" |
65 | 75 | displayName="morfeusz" |
... | ... | @@ -101,10 +111,9 @@ |
101 | 111 | <rebuildPropChanged>false</rebuildPropChanged> |
102 | 112 | </toolsSet> |
103 | 113 | <flagsDictionary> |
104 | - <element flagsID="0" commonFlags="-O2 -std=c++98"/> | |
105 | - <element flagsID="1" commonFlags="-O2 -std=c++98 -fPIC"/> | |
106 | - <element flagsID="2" commonFlags="-std=c++98 -O3"/> | |
107 | - <element flagsID="3" commonFlags="-std=c++98 -O3 -fPIC"/> | |
114 | + <element flagsID="0" commonFlags="-std=c++98 -O3"/> | |
115 | + <element flagsID="1" commonFlags="-std=c++98 -O3 -fPIC"/> | |
116 | + <element flagsID="2" commonFlags="3"/> | |
108 | 117 | </flagsDictionary> |
109 | 118 | <codeAssistance> |
110 | 119 | </codeAssistance> |
... | ... | @@ -114,18 +123,49 @@ |
114 | 123 | <buildCommand>${MAKE} -f Makefile</buildCommand> |
115 | 124 | <cleanCommand>${MAKE} -f Makefile clean</cleanCommand> |
116 | 125 | <executablePath>build/morfeusz/morfeusz_analyzer</executablePath> |
126 | + <ccTool flags="1"> | |
127 | + <incDir> | |
128 | + <pElem>build</pElem> | |
129 | + <pElem>morfeusz</pElem> | |
130 | + </incDir> | |
131 | + <preprocessorList> | |
132 | + <Elem>NDEBUG</Elem> | |
133 | + </preprocessorList> | |
134 | + </ccTool> | |
117 | 135 | </makeTool> |
118 | 136 | </makefileType> |
119 | 137 | <item path="build/default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
138 | + <ccTool> | |
139 | + <incDir> | |
140 | + <pElem>build/morfeusz</pElem> | |
141 | + </incDir> | |
142 | + <preprocessorList> | |
143 | + <Elem>__PIC__=2</Elem> | |
144 | + <Elem>__pic__=2</Elem> | |
145 | + <Elem>libmorfeusz_EXPORTS</Elem> | |
146 | + </preprocessorList> | |
147 | + <undefinedList> | |
148 | + <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem> | |
149 | + </undefinedList> | |
150 | + </ccTool> | |
120 | 151 | </item> |
121 | 152 | <item path="build/default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> |
153 | + <ccTool> | |
154 | + <incDir> | |
155 | + <pElem>build/morfeusz</pElem> | |
156 | + </incDir> | |
157 | + <preprocessorList> | |
158 | + <Elem>__PIC__=2</Elem> | |
159 | + <Elem>__pic__=2</Elem> | |
160 | + <Elem>libmorfeusz_EXPORTS</Elem> | |
161 | + </preprocessorList> | |
162 | + <undefinedList> | |
163 | + <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem> | |
164 | + </undefinedList> | |
165 | + </ccTool> | |
122 | 166 | </item> |
123 | 167 | <item path="build/morfeusz/default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
124 | 168 | <ccTool flags="1"> |
125 | - <incDir> | |
126 | - <pElem>build</pElem> | |
127 | - <pElem>morfeusz</pElem> | |
128 | - </incDir> | |
129 | 169 | <preprocessorList> |
130 | 170 | <Elem>libmorfeusz_EXPORTS</Elem> |
131 | 171 | </preprocessorList> |
... | ... | @@ -136,17 +176,13 @@ |
136 | 176 | tool="1" |
137 | 177 | flavor2="4"> |
138 | 178 | <ccTool flags="1"> |
139 | - <incDir> | |
140 | - <pElem>build</pElem> | |
141 | - <pElem>morfeusz</pElem> | |
142 | - </incDir> | |
143 | 179 | <preprocessorList> |
144 | 180 | <Elem>libmorfeusz_EXPORTS</Elem> |
145 | 181 | </preprocessorList> |
146 | 182 | </ccTool> |
147 | 183 | </item> |
148 | 184 | <item path="build/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4"> |
149 | - <ccTool flags="3"> | |
185 | + <ccTool flags="1"> | |
150 | 186 | </ccTool> |
151 | 187 | </item> |
152 | 188 | <item path="build/morfeusz/morfeuszJAVA_wrap.cxx" |
... | ... | @@ -155,14 +191,11 @@ |
155 | 191 | flavor2="8"> |
156 | 192 | <ccTool> |
157 | 193 | <incDir> |
158 | - <pElem>build</pElem> | |
159 | - <pElem>morfeusz</pElem> | |
160 | 194 | <pElem>build/morfeusz</pElem> |
161 | 195 | <pElem>/usr/lib/jvm/default-java/include</pElem> |
162 | 196 | <pElem>build/morfeusz/java</pElem> |
163 | 197 | </incDir> |
164 | 198 | <preprocessorList> |
165 | - <Elem>NDEBUG</Elem> | |
166 | 199 | <Elem>_OPTIMIZE__=1</Elem> |
167 | 200 | <Elem>__PIC__=2</Elem> |
168 | 201 | <Elem>__pic__=2</Elem> |
... | ... | @@ -181,8 +214,6 @@ |
181 | 214 | flavor2="4"> |
182 | 215 | <ccTool flags="1"> |
183 | 216 | <incDir> |
184 | - <pElem>build</pElem> | |
185 | - <pElem>morfeusz</pElem> | |
186 | 217 | <pElem>/usr/lib/perl/5.14/CORE</pElem> |
187 | 218 | <pElem>build/morfeusz/perl</pElem> |
188 | 219 | </incDir> |
... | ... | @@ -197,14 +228,11 @@ |
197 | 228 | flavor2="8"> |
198 | 229 | <ccTool> |
199 | 230 | <incDir> |
200 | - <pElem>build</pElem> | |
201 | - <pElem>morfeusz</pElem> | |
202 | 231 | <pElem>build/morfeusz</pElem> |
203 | 232 | <pElem>/usr/include/python2.7</pElem> |
204 | 233 | <pElem>build/morfeusz/python</pElem> |
205 | 234 | </incDir> |
206 | 235 | <preprocessorList> |
207 | - <Elem>NDEBUG</Elem> | |
208 | 236 | <Elem>_OPTIMIZE__=1</Elem> |
209 | 237 | <Elem>__PIC__=2</Elem> |
210 | 238 | <Elem>__pic__=2</Elem> |
... | ... | @@ -218,22 +246,40 @@ |
218 | 246 | </ccTool> |
219 | 247 | </item> |
220 | 248 | <item path="build/morfeusz/python/swigPYTHON.cpp" |
249 | + ex="true" | |
250 | + tool="3" | |
251 | + flavor2="4"> | |
252 | + </item> | |
253 | + <item path="build/morfeusz/wrappers/java/swigJAVA.cpp" | |
221 | 254 | ex="false" |
222 | 255 | tool="1" |
223 | 256 | flavor2="4"> |
257 | + <ccTool flags="1"> | |
258 | + </ccTool> | |
259 | + </item> | |
260 | + <item path="build/morfeusz/wrappers/morfeuszPERL_wrap.cxx" | |
261 | + ex="false" | |
262 | + tool="1" | |
263 | + flavor2="4"> | |
264 | + <ccTool flags="1"> | |
265 | + <incDir> | |
266 | + <pElem>/usr/lib/perl/5.14/CORE</pElem> | |
267 | + <pElem>build/morfeusz/wrappers/perl</pElem> | |
268 | + </incDir> | |
269 | + <preprocessorList> | |
270 | + <Elem>morfeusz_perl_EXPORTS</Elem> | |
271 | + </preprocessorList> | |
272 | + </ccTool> | |
224 | 273 | </item> |
225 | 274 | <item path="build1/morfeusz/java/swigJAVA.cpp" ex="false" tool="1" flavor2="4"> |
226 | 275 | </item> |
227 | 276 | <item path="default_fsa.cpp" ex="false" tool="1" flavor2="4"> |
228 | 277 | <ccTool> |
229 | 278 | <incDir> |
230 | - <pElem>build</pElem> | |
231 | - <pElem>morfeusz</pElem> | |
232 | 279 | <pElem>build/morfeusz</pElem> |
233 | 280 | <pElem>morfeusz/build/morfeusz</pElem> |
234 | 281 | </incDir> |
235 | 282 | <preprocessorList> |
236 | - <Elem>NDEBUG</Elem> | |
237 | 283 | <Elem>libmorfeusz_EXPORTS</Elem> |
238 | 284 | </preprocessorList> |
239 | 285 | </ccTool> |
... | ... | @@ -241,13 +287,10 @@ |
241 | 287 | <item path="default_synth_fsa.cpp" ex="false" tool="1" flavor2="4"> |
242 | 288 | <ccTool> |
243 | 289 | <incDir> |
244 | - <pElem>build</pElem> | |
245 | - <pElem>morfeusz</pElem> | |
246 | 290 | <pElem>build/morfeusz</pElem> |
247 | 291 | <pElem>morfeusz/build/morfeusz</pElem> |
248 | 292 | </incDir> |
249 | 293 | <preprocessorList> |
250 | - <Elem>NDEBUG</Elem> | |
251 | 294 | <Elem>libmorfeusz_EXPORTS</Elem> |
252 | 295 | </preprocessorList> |
253 | 296 | </ccTool> |
... | ... | @@ -255,34 +298,32 @@ |
255 | 298 | <folder path="0"> |
256 | 299 | <ccTool> |
257 | 300 | <incDir> |
258 | - <pElem>build</pElem> | |
259 | - <pElem>morfeusz</pElem> | |
260 | 301 | <pElem>build/morfeusz</pElem> |
261 | 302 | </incDir> |
262 | 303 | </ccTool> |
263 | 304 | </folder> |
264 | - <folder path="0/charset"> | |
305 | + <folder path="0/case"> | |
265 | 306 | <ccTool> |
266 | 307 | <preprocessorList> |
267 | 308 | <Elem>libmorfeusz_EXPORTS</Elem> |
268 | 309 | </preprocessorList> |
269 | 310 | </ccTool> |
270 | 311 | </folder> |
271 | - <folder path="0/cli"> | |
312 | + <folder path="0/charset"> | |
272 | 313 | <ccTool> |
273 | 314 | <preprocessorList> |
274 | 315 | <Elem>libmorfeusz_EXPORTS</Elem> |
275 | 316 | </preprocessorList> |
276 | 317 | </ccTool> |
277 | 318 | </folder> |
278 | - <folder path="0/decoder"> | |
319 | + <folder path="0/cli"> | |
279 | 320 | <ccTool> |
280 | 321 | <preprocessorList> |
281 | 322 | <Elem>libmorfeusz_EXPORTS</Elem> |
282 | 323 | </preprocessorList> |
283 | 324 | </ccTool> |
284 | 325 | </folder> |
285 | - <folder path="0/deserializer"> | |
326 | + <folder path="0/deserialization"> | |
286 | 327 | <ccTool> |
287 | 328 | <preprocessorList> |
288 | 329 | <Elem>libmorfeusz_EXPORTS</Elem> |
... | ... | @@ -303,35 +344,24 @@ |
303 | 344 | </preprocessorList> |
304 | 345 | </ccTool> |
305 | 346 | </folder> |
306 | - <folder path="build"> | |
347 | + <folder path="build/morfeusz/wrappers/java"> | |
307 | 348 | <ccTool> |
308 | 349 | <incDir> |
309 | - <pElem>build</pElem> | |
310 | - <pElem>morfeusz</pElem> | |
311 | - <pElem>build/morfeusz</pElem> | |
350 | + <pElem>/usr/lib/jvm/default-java/include</pElem> | |
312 | 351 | </incDir> |
313 | 352 | <preprocessorList> |
314 | - <Elem>NDEBUG</Elem> | |
315 | - <Elem>__PIC__=2</Elem> | |
316 | - <Elem>__pic__=2</Elem> | |
317 | - <Elem>libmorfeusz_EXPORTS</Elem> | |
353 | + <Elem>libjmorfeusz_EXPORTS</Elem> | |
318 | 354 | </preprocessorList> |
319 | - <undefinedList> | |
320 | - <Elem>__GCC_HAVE_DWARF2_CFI_ASM=1</Elem> | |
321 | - </undefinedList> | |
322 | 355 | </ccTool> |
323 | 356 | </folder> |
324 | 357 | <folder path="java"> |
325 | 358 | <ccTool> |
326 | 359 | <incDir> |
327 | - <pElem>build</pElem> | |
328 | - <pElem>morfeusz</pElem> | |
329 | 360 | <pElem>build/morfeusz</pElem> |
330 | 361 | <pElem>build1</pElem> |
331 | 362 | <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem> |
332 | 363 | </incDir> |
333 | 364 | <preprocessorList> |
334 | - <Elem>NDEBUG</Elem> | |
335 | 365 | <Elem>libjmorfeusz_EXPORTS</Elem> |
336 | 366 | <Elem>libmorfeusz_EXPORTS</Elem> |
337 | 367 | </preprocessorList> |
... | ... | @@ -340,12 +370,9 @@ |
340 | 370 | <folder path="morfeusz/java"> |
341 | 371 | <ccTool> |
342 | 372 | <incDir> |
343 | - <pElem>build</pElem> | |
344 | - <pElem>morfeusz</pElem> | |
345 | - <pElem>/usr/lib/jvm/java-6-openjdk/include</pElem> | |
373 | + <pElem>/usr/lib/jvm/default-java/include</pElem> | |
346 | 374 | </incDir> |
347 | 375 | <preprocessorList> |
348 | - <Elem>NDEBUG</Elem> | |
349 | 376 | <Elem>libjmorfeusz_EXPORTS</Elem> |
350 | 377 | </preprocessorList> |
351 | 378 | </ccTool> |
... | ... | @@ -353,21 +380,10 @@ |
353 | 380 | <folder path="morfeusz/python"> |
354 | 381 | <ccTool> |
355 | 382 | <incDir> |
356 | - <pElem>morfeusz</pElem> | |
357 | 383 | <pElem>/usr/include/python2.7</pElem> |
358 | 384 | </incDir> |
359 | - <preprocessorList> | |
360 | - <Elem>NDEBUG</Elem> | |
361 | - </preprocessorList> | |
362 | 385 | </ccTool> |
363 | 386 | </folder> |
364 | - <item path="morfeusz/CasePatternHelper.cpp" ex="false" tool="1" flavor2="4"> | |
365 | - <ccTool flags="1"> | |
366 | - <preprocessorList> | |
367 | - <Elem>libmorfeusz_EXPORTS</Elem> | |
368 | - </preprocessorList> | |
369 | - </ccTool> | |
370 | - </item> | |
371 | 387 | <item path="morfeusz/Environment.cpp" ex="false" tool="1" flavor2="4"> |
372 | 388 | <ccTool flags="1"> |
373 | 389 | <preprocessorList> |
... | ... | @@ -383,17 +399,8 @@ |
383 | 399 | </ccTool> |
384 | 400 | </item> |
385 | 401 | <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="4"> |
386 | - <ccTool flags="3"> | |
387 | - <preprocessorList> | |
388 | - <Elem>NDEBUG</Elem> | |
389 | - <Elem>libmorfeusz_EXPORTS</Elem> | |
390 | - </preprocessorList> | |
391 | - </ccTool> | |
392 | - </item> | |
393 | - <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="4"> | |
394 | - <ccTool flags="3"> | |
402 | + <ccTool flags="2"> | |
395 | 403 | <preprocessorList> |
396 | - <Elem>NDEBUG</Elem> | |
397 | 404 | <Elem>libmorfeusz_EXPORTS</Elem> |
398 | 405 | </preprocessorList> |
399 | 406 | </ccTool> |
... | ... | @@ -419,36 +426,36 @@ |
419 | 426 | </preprocessorList> |
420 | 427 | </ccTool> |
421 | 428 | </item> |
422 | - <item path="morfeusz/charset/CaseConverter.cpp" ex="false" tool="1" flavor2="4"> | |
429 | + <item path="morfeusz/case/CaseConverter.cpp" ex="false" tool="1" flavor2="4"> | |
423 | 430 | <ccTool flags="1"> |
424 | 431 | </ccTool> |
425 | 432 | </item> |
426 | - <item path="morfeusz/charset/CharsetConverter.cpp" | |
433 | + <item path="morfeusz/case/CasePatternHelper.cpp" | |
427 | 434 | ex="false" |
428 | 435 | tool="1" |
429 | 436 | flavor2="4"> |
430 | - <ccTool flags="3"> | |
431 | - <preprocessorList> | |
432 | - <Elem>NDEBUG</Elem> | |
433 | - </preprocessorList> | |
434 | - </ccTool> | |
435 | - </item> | |
436 | - <item path="morfeusz/charset/TextReader.cpp" ex="false" tool="1" flavor2="4"> | |
437 | 437 | <ccTool flags="1"> |
438 | 438 | </ccTool> |
439 | 439 | </item> |
440 | - <item path="morfeusz/charset/caseconv.cpp" ex="false" tool="1" flavor2="4"> | |
440 | + <item path="morfeusz/case/caseconv.cpp" ex="false" tool="1" flavor2="4"> | |
441 | 441 | <ccTool flags="1"> |
442 | 442 | </ccTool> |
443 | 443 | </item> |
444 | + <item path="morfeusz/charset/CharsetConverter.cpp" | |
445 | + ex="false" | |
446 | + tool="1" | |
447 | + flavor2="4"> | |
448 | + </item> | |
449 | + <item path="morfeusz/charset/TextReader.cpp" ex="false" tool="1" flavor2="4"> | |
450 | + </item> | |
444 | 451 | <item path="morfeusz/charset/conversion_tables.cpp" |
445 | 452 | ex="false" |
446 | 453 | tool="1" |
447 | 454 | flavor2="4"> |
448 | - <ccTool flags="1"> | |
449 | - </ccTool> | |
450 | 455 | </item> |
451 | 456 | <item path="morfeusz/cli/cli.cpp" ex="false" tool="1" flavor2="4"> |
457 | + <ccTool flags="1"> | |
458 | + </ccTool> | |
452 | 459 | </item> |
453 | 460 | <item path="morfeusz/const.cpp" ex="false" tool="1" flavor2="4"> |
454 | 461 | <ccTool flags="1"> |
... | ... | @@ -457,29 +464,31 @@ |
457 | 464 | </preprocessorList> |
458 | 465 | </ccTool> |
459 | 466 | </item> |
460 | - <item path="morfeusz/decoder/InterpretedChunksDecoder.cpp" | |
467 | + <item path="morfeusz/deserialization/InterpsGroupsReader.cpp" | |
461 | 468 | ex="false" |
462 | 469 | tool="1" |
463 | 470 | flavor2="4"> |
464 | 471 | </item> |
465 | - <item path="morfeusz/decoder/InterpretedChunksDecoder4Analyzer.cpp" | |
472 | + <item path="morfeusz/deserialization/MorphDeserializer.cpp" | |
466 | 473 | ex="false" |
467 | 474 | tool="1" |
468 | 475 | flavor2="4"> |
469 | 476 | </item> |
470 | - <item path="morfeusz/decoder/InterpretedChunksDecoder4Generator.cpp" | |
477 | + <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder.cpp" | |
471 | 478 | ex="false" |
472 | 479 | tool="1" |
473 | 480 | flavor2="4"> |
481 | + <ccTool flags="1"> | |
482 | + </ccTool> | |
474 | 483 | </item> |
475 | - <item path="morfeusz/deserializer/InterpsGroupsReader.cpp" | |
484 | + <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Analyzer.cpp" | |
476 | 485 | ex="false" |
477 | 486 | tool="1" |
478 | 487 | flavor2="4"> |
479 | 488 | <ccTool flags="1"> |
480 | 489 | </ccTool> |
481 | 490 | </item> |
482 | - <item path="morfeusz/deserializer/MorphDeserializer.cpp" | |
491 | + <item path="morfeusz/deserialization/morphInterps/InterpretedChunksDecoder4Generator.cpp" | |
483 | 492 | ex="false" |
484 | 493 | tool="1" |
485 | 494 | flavor2="4"> |
... | ... | @@ -496,9 +505,6 @@ |
496 | 505 | <incDir> |
497 | 506 | <pElem>build/fsa</pElem> |
498 | 507 | </incDir> |
499 | - <preprocessorList> | |
500 | - <Elem>NDEBUG</Elem> | |
501 | - </preprocessorList> | |
502 | 508 | </ccTool> |
503 | 509 | </item> |
504 | 510 | <item path="morfeusz/fsa/test_recognize.cpp" ex="false" tool="1" flavor2="8"> |
... | ... | @@ -506,9 +512,6 @@ |
506 | 512 | <incDir> |
507 | 513 | <pElem>build/fsa</pElem> |
508 | 514 | </incDir> |
509 | - <preprocessorList> | |
510 | - <Elem>NDEBUG</Elem> | |
511 | - </preprocessorList> | |
512 | 515 | </ccTool> |
513 | 516 | </item> |
514 | 517 | <item path="morfeusz/fsa/test_speed.cpp" ex="false" tool="1" flavor2="8"> |
... | ... | @@ -516,43 +519,33 @@ |
516 | 519 | <incDir> |
517 | 520 | <pElem>build/fsa</pElem> |
518 | 521 | </incDir> |
519 | - <preprocessorList> | |
520 | - <Elem>NDEBUG</Elem> | |
521 | - </preprocessorList> | |
522 | - </ccTool> | |
523 | - </item> | |
524 | - <item path="morfeusz/main.cpp" ex="false" tool="1" flavor2="4"> | |
525 | - <ccTool> | |
526 | - <preprocessorList> | |
527 | - <Elem>NDEBUG</Elem> | |
528 | - <Elem>_OPTIMIZE__=1</Elem> | |
529 | - <Elem>libmorfeusz_EXPORTS</Elem> | |
530 | - </preprocessorList> | |
531 | - <undefinedList> | |
532 | - <Elem>__NO_INLINE__</Elem> | |
533 | - </undefinedList> | |
534 | 522 | </ccTool> |
535 | 523 | </item> |
536 | 524 | <item path="morfeusz/morfeusz_analyzer.cpp" ex="false" tool="1" flavor2="4"> |
537 | - <ccTool flags="2"> | |
538 | - <preprocessorList> | |
539 | - <Elem>NDEBUG</Elem> | |
540 | - </preprocessorList> | |
525 | + <ccTool flags="0"> | |
541 | 526 | </ccTool> |
542 | 527 | </item> |
543 | 528 | <item path="morfeusz/morfeusz_generator.cpp" ex="false" tool="1" flavor2="4"> |
529 | + <ccTool flags="0"> | |
530 | + </ccTool> | |
544 | 531 | </item> |
545 | 532 | <item path="morfeusz/segrules/SegrulesFSA.cpp" ex="false" tool="1" flavor2="4"> |
546 | - <ccTool flags="1"> | |
547 | - </ccTool> | |
548 | 533 | </item> |
549 | 534 | <item path="morfeusz/segrules/segrules.cpp" ex="false" tool="1" flavor2="4"> |
550 | - <ccTool flags="1"> | |
551 | - </ccTool> | |
552 | 535 | </item> |
553 | - <item path="morfeusz/test_recognize_dict.cpp" ex="false" tool="1" flavor2="4"> | |
536 | + <item path="morfeusz/test/test_recognize_dict.cpp" | |
537 | + ex="false" | |
538 | + tool="1" | |
539 | + flavor2="4"> | |
540 | + <ccTool flags="0"> | |
541 | + </ccTool> | |
554 | 542 | </item> |
555 | - <item path="morfeusz/test_result_equals.cpp" ex="false" tool="1" flavor2="4"> | |
543 | + <item path="morfeusz/test/test_result_equals.cpp" | |
544 | + ex="false" | |
545 | + tool="1" | |
546 | + flavor2="4"> | |
547 | + <ccTool flags="0"> | |
548 | + </ccTool> | |
556 | 549 | </item> |
557 | 550 | </conf> |
558 | 551 | </confs> |
... | ... |
profile.sh
... | ... | @@ -4,9 +4,9 @@ rm -rf profbuild |
4 | 4 | mkdir -p profbuild |
5 | 5 | cd profbuild |
6 | 6 | cmake -D INPUT_DICTIONARIES=../input/dodatki.tab,../input/PoliMorfSmall.tab -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-g -O2" -D CMAKE_SHARED_LINKER_FLAGS="-lprofiler" -D CMAKE_EXE_LINKER_FLAGS="-lprofiler" .. |
7 | -make -j4 | |
7 | +make | |
8 | 8 | rm -f /tmp/morfeusz.prof |
9 | 9 | export LD_PRELOAD="/usr/lib/libprofiler.so" |
10 | 10 | export CPUPROFILE="/tmp/morfeusz.prof" |
11 | -morfeusz/morfeusz_analyzer -i /tmp/dupadupa < /mnt/storage/morfeusz/sents30k > /dev/null | |
11 | +morfeusz/morfeusz_analyzer -i /home/wkieras/output/sgjp_analyzer.fsa < /mnt/storage/morfeusz/sents10k > /dev/null | |
12 | 12 | ### pprof --gv profbuild/morfeusz/morfeusz_analyzer /tmp/morfeusz.prof |
... | ... |
morfeusz/Toolchain-Darwin-amd64.cmake renamed to toolchains/Toolchain-Darwin-amd64.cmake
morfeusz/Toolchain-Linux-amd64.cmake renamed to toolchains/Toolchain-Linux-amd64.cmake
morfeusz/Toolchain-Linux-i386.cmake renamed to toolchains/Toolchain-Linux-i386.cmake
morfeusz/Toolchain-Windows-amd64.cmake renamed to toolchains/Toolchain-Windows-amd64.cmake
morfeusz/Toolchain-Windows-i386.cmake renamed to toolchains/Toolchain-Windows-i386.cmake