diff --git a/fsa/fsa_impl.hpp b/fsa/fsa_impl.hpp index abd6cc6..8d0896b 100644 --- a/fsa/fsa_impl.hpp +++ b/fsa/fsa_impl.hpp @@ -18,7 +18,7 @@ #include "utils.hpp" #include "const.hpp" -using namespace std; +//using namespace std; //static const unsigned int FSA_OFFSET = 6; template <class T> @@ -70,7 +70,7 @@ FSA<T>* FSA<T>::getFSA(const unsigned char* ptr, const Deserializer<T>& deserial uint8_t versionNum = *(ptr + VERSION_NUM_OFFSET); if (versionNum != VERSION_NUM) { - throw FSAException(string("Invalid version number: ") + std::to_string(versionNum) + ", should be: " + to_string(VERSION_NUM)); + throw FSAException(string("Invalid version number: ") + std::to_string(versionNum) + ", should be: " + std::to_string(VERSION_NUM)); } uint8_t implementationNum = *(ptr + IMPLEMENTATION_NUM_OFFSET); @@ -85,7 +85,7 @@ FSA<T>* FSA<T>::getFSA(const unsigned char* ptr, const Deserializer<T>& deserial case 2: return new CompressedFSA2<T>(startPtr, deserializer); default: - throw FSAException(string("Invalid implementation number: ") + to_string(versionNum) + ", should be: " + to_string(VERSION_NUM)); + throw FSAException(string("Invalid implementation number: ") + std::to_string(versionNum) + ", should be: " + std::to_string(VERSION_NUM)); } } diff --git a/morfeusz/CMakeLists.txt b/morfeusz/CMakeLists.txt index dc0509a..52f710c 100644 --- a/morfeusz/CMakeLists.txt +++ b/morfeusz/CMakeLists.txt @@ -8,10 +8,12 @@ add_library (morfeusz2 morfeusz.hpp morfeusz.cpp) add_executable (morfeusz2_analyze main.cpp) add_executable (test_morph test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp) add_executable (test_morfeusz test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp Morfeusz.cpp charset/CharsetConverter.cpp FlexionGraph.cpp) +add_executable (test_simple test_simple.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp Morfeusz.cpp charset/CharsetConverter.cpp FlexionGraph.cpp) # Link the executable to the Hello library. target_link_libraries (morfeusz2_analyze morfeusz2) set_target_properties ( morfeusz2_analyze PROPERTIES COMPILE_FLAGS "-std=gnu++0x" ) set_target_properties ( test_morph PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" ) -set_target_properties ( test_morfeusz PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" ) \ No newline at end of file +set_target_properties ( test_morfeusz PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" ) +set_target_properties ( test_simple PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" ) \ No newline at end of file diff --git a/morfeusz/FlexionGraph.cpp b/morfeusz/FlexionGraph.cpp index 55f5dd0..1aa59be 100644 --- a/morfeusz/FlexionGraph.cpp +++ b/morfeusz/FlexionGraph.cpp @@ -30,7 +30,7 @@ void FlexionGraph::addPath(const std::vector<InterpretedChunk>& path) { void FlexionGraph::appendToResults(const Tagset& tagset, std::vector<MorphInterpretation>& results) { int endNode = graph.size(); - for (int i = 0; i < graph.size(); i++) { + for (unsigned int i = 0; i < graph.size(); i++) { vector<Edge>& edges = graph[i]; for (Edge& e: edges) { int realStartNode = i + this->startNode; diff --git a/morfeusz/InterpretedChunk.hpp b/morfeusz/InterpretedChunk.hpp index 6e8fd78..c4b74ba 100644 --- a/morfeusz/InterpretedChunk.hpp +++ b/morfeusz/InterpretedChunk.hpp @@ -13,7 +13,7 @@ struct InterpretedChunk { const char* chunk; long chunkLength; - InterpsGroup& interpsGroup; + InterpsGroup interpsGroup; }; #endif /* INTERPRETEDCHUNK_HPP */ diff --git a/morfeusz/Morfeusz.cpp b/morfeusz/Morfeusz.cpp index ef21bf2..99ca9ac 100644 --- a/morfeusz/Morfeusz.cpp +++ b/morfeusz/Morfeusz.cpp @@ -6,6 +6,7 @@ */ #include <string> +#include <iostream> #include "fsa.hpp" #include "utils.hpp" #include "Morfeusz.hpp" @@ -18,18 +19,28 @@ using namespace std; static FSA<vector<InterpsGroup >> *initializeFSA(const string& filename) { + cerr << "initialize FSA" << endl; static Deserializer < vector < InterpsGroup >> *deserializer = new MorphDeserializer(); return FSA < vector < InterpsGroup >> ::getFSA(filename, *deserializer); } static CharsetConverter* initializeCharsetConverter() { + cerr << "initialize charset converter" << endl; static CharsetConverter* converter = new UTF8CharsetConverter(); return converter; } +static Tagset* initializeTagset(const string& filename) { + cerr << "initialize tagset" << endl; + static Tagset* tagset = new Tagset(readFile(filename.c_str())); + return tagset; +} + Morfeusz::Morfeusz(const string& filename) -: fsa(initializeFSA(filename)), charsetConverter(initializeCharsetConverter()) { +: fsa(initializeFSA(filename)), + charsetConverter(initializeCharsetConverter()), + tagset(initializeTagset(filename)) { } @@ -47,7 +58,7 @@ void Morfeusz::processOneWord( FlexionGraph graph(startNodeNum); const char* currInput = inputData; doProcessOneWord(currInput, inputEnd, accum, graph); - graph.appendToResults(this->tagset, results); + graph.appendToResults(*this->tagset, results); inputData = currInput; } @@ -88,12 +99,16 @@ void Morfeusz::feedState( } } -ResultsIterator Morfeusz::analyze(const std::string& text) { +ResultsIterator Morfeusz::analyze(const string& text) { // const char* textStart = text.c_str(); // const char* textEnd = text.c_str() + text.length(); return ResultsIterator(text, *this); } +void Morfeusz::analyze(const string& text, vector<MorphInterpretation>& results) { + +} + ResultsIterator::ResultsIterator(const string& text, const Morfeusz& morfeusz) : rawInput(text.c_str()), morfeusz(morfeusz) { diff --git a/morfeusz/Morfeusz.hpp b/morfeusz/Morfeusz.hpp index 739f63f..b7a82a8 100644 --- a/morfeusz/Morfeusz.hpp +++ b/morfeusz/Morfeusz.hpp @@ -32,15 +32,17 @@ public: virtual ~Morfeusz(); // Morfeusz(const Morfeusz& orig); ResultsIterator analyze(const std::string& text); - -// Morfeusz(); - friend class ResultsIterator; -private: + void analyze(const std::string& text, std::vector<MorphInterpretation>& result); + void processOneWord( const char*& inputData, const char* inputEnd, const int startNodeNum, std::vector<MorphInterpretation>& result) const; + +// Morfeusz(); + friend class ResultsIterator; +private: void doProcessOneWord( const char*& inputData, @@ -54,6 +56,7 @@ private: FSAType* fsa; CharsetConverter* charsetConverter; + Tagset* tagset; }; class ResultsIterator { diff --git a/morfeusz/test_simple.cpp b/morfeusz/test_simple.cpp new file mode 100644 index 0000000..b8afe79 --- /dev/null +++ b/morfeusz/test_simple.cpp @@ -0,0 +1,29 @@ +/* + * File: test_simple.cpp + * Author: lennyn + * + * Created on November 18, 2013, 10:30 PM + */ + +#include <cstdlib> + +#include "Morfeusz.hpp" +#include "MorphInterpretation.hpp" + +using namespace std; + +/* + * + */ +int main(int argc, char** argv) { + Morfeusz morfeusz(argv[1]); + vector<MorphInterpretation> res; + string word = "mijaĆem"; + const char* ptr = word.c_str(); + morfeusz.processOneWord(ptr, word.c_str() + word.size(), 0, res); + for (MorphInterpretation& mi: res) { + cerr << mi.getStartNode() << " " << mi.getEndNode() << " " << mi.getLemma() << " " << mi.getTag() << " " << mi.getName() << endl; + } + return 0; +} + diff --git a/nbproject/configurations.xml b/nbproject/configurations.xml index c0d81ea..60de1ce 100644 --- a/nbproject/configurations.xml +++ b/nbproject/configurations.xml @@ -11,12 +11,6 @@ <df name="charset"> <in>CharsetConverter.cpp</in> </df> - <df name="encoding"> - <in>CharsetConverter.cpp</in> - </df> - <df name="flexion"> - <in>FlexionGraph.cpp</in> - </df> <in>FlexionGraph.cpp</in> <in>Morfeusz.cpp</in> <in>MorphDeserializer.cpp</in> @@ -24,6 +18,7 @@ <in>Tagset.cpp</in> <in>main.cpp</in> <in>morfeusz.cpp</in> + <in>test_morfeusz.cpp</in> <in>test_morph.cpp</in> </df> <logicalFolder name="ExternalFiles" @@ -57,7 +52,7 @@ <executablePath>build/fsa/test_dict</executablePath> </makeTool> </makefileType> - <folder path="1/charset"> + <folder path="1"> <ccTool> <incDir> <pElem>fsa</pElem> @@ -96,42 +91,22 @@ </item> <item path="morfeusz/FlexionGraph.cpp" ex="false" tool="1" flavor2="8"> <ccTool> - <incDir> - <pElem>fsa</pElem> - <pElem>build/morfeusz</pElem> - </incDir> </ccTool> </item> <item path="morfeusz/Morfeusz.cpp" ex="false" tool="1" flavor2="8"> <ccTool> - <incDir> - <pElem>fsa</pElem> - <pElem>build/morfeusz</pElem> - </incDir> </ccTool> </item> <item path="morfeusz/MorphDeserializer.cpp" ex="false" tool="1" flavor2="8"> <ccTool> - <incDir> - <pElem>fsa</pElem> - <pElem>build/morfeusz</pElem> - </incDir> </ccTool> </item> <item path="morfeusz/MorphInterpretation.cpp" ex="false" tool="1" flavor2="8"> <ccTool> - <incDir> - <pElem>fsa</pElem> - <pElem>build/morfeusz</pElem> - </incDir> </ccTool> </item> <item path="morfeusz/Tagset.cpp" ex="false" tool="1" flavor2="8"> <ccTool> - <incDir> - <pElem>fsa</pElem> - <pElem>build/morfeusz</pElem> - </incDir> </ccTool> </item> <item path="morfeusz/charset/CharsetConverter.cpp" @@ -141,33 +116,23 @@ <ccTool> </ccTool> </item> - <item path="morfeusz/encoding/CharsetConverter.cpp" - ex="false" - tool="1" - flavor2="4"> + <item path="morfeusz/main.cpp" ex="false" tool="1" flavor2="8"> <ccTool> </ccTool> </item> - <item path="morfeusz/flexion/FlexionGraph.cpp" ex="false" tool="1" flavor2="4"> + <item path="morfeusz/morfeusz.cpp" ex="false" tool="1" flavor2="4"> <ccTool> </ccTool> </item> - <item path="morfeusz/main.cpp" ex="false" tool="1" flavor2="8"> + <item path="morfeusz/test_morfeusz.cpp" ex="false" tool="1" flavor2="4"> <ccTool> - <incDir> - <pElem>fsa</pElem> - <pElem>build/morfeusz</pElem> - </incDir> </ccTool> </item> - <item path="morfeusz/morfeusz.cpp" ex="false" tool="1" flavor2="4"> + <item path="morfeusz/test_morph.cpp" ex="false" tool="1" flavor2="8"> <ccTool> - <incDir> - <pElem>morfeusz</pElem> - </incDir> </ccTool> </item> - <item path="morfeusz/test_morph.cpp" ex="false" tool="1" flavor2="8"> + <item path="morfeusz/test_simple.cpp" ex="false" tool="1" flavor2="0"> <ccTool> <incDir> <pElem>fsa</pElem>