Commit 5700d3a02747423a76b75148b338f19e6912b168

Authored by Michał Lenart
1 parent e5220b90

- podstawa analizy tekstu już działa

- obsług ign-ów w zasadzie też

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@23 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
CMakeLists.txt
1 1
2 cmake_minimum_required (VERSION 2.8) 2 cmake_minimum_required (VERSION 2.8)
3 project (Morfeusz) 3 project (Morfeusz)
4 - 4 +set(CMAKE_BUILD_TYPE "Debug")
5 enable_testing() 5 enable_testing()
6 6
7 add_subdirectory (fsa) 7 add_subdirectory (fsa)
morfeusz/CMakeLists.txt
@@ -4,15 +4,14 @@ @@ -4,15 +4,14 @@
4 # Make sure the linker can find the Hello library once it is built. 4 # Make sure the linker can find the Hello library once it is built.
5 #link_directories (${Morfeusz_BINARY_DIR}/Hello) 5 #link_directories (${Morfeusz_BINARY_DIR}/Hello)
6 include_directories (${Morfeusz_SOURCE_DIR}/fsa) 6 include_directories (${Morfeusz_SOURCE_DIR}/fsa)
7 -add_library (morfeusz2 morfeusz.hpp morfeusz.cpp)  
8 -add_executable (morfeusz2_analyze main.cpp) 7 +# add_executable (morfeusz2_analyze main.cpp)
9 add_executable (test_morph test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp) 8 add_executable (test_morph test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp)
10 add_executable (test_morfeusz test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp Morfeusz.cpp charset/CharsetConverter.cpp FlexionGraph.cpp) 9 add_executable (test_morfeusz test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp Morfeusz.cpp charset/CharsetConverter.cpp FlexionGraph.cpp)
11 add_executable (test_simple test_simple.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp Morfeusz.cpp charset/CharsetConverter.cpp FlexionGraph.cpp) 10 add_executable (test_simple test_simple.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp Morfeusz.cpp charset/CharsetConverter.cpp FlexionGraph.cpp)
12 11
13 # Link the executable to the Hello library. 12 # Link the executable to the Hello library.
14 -target_link_libraries (morfeusz2_analyze morfeusz2)  
15 -set_target_properties ( morfeusz2_analyze PROPERTIES COMPILE_FLAGS "-std=gnu++0x" ) 13 +#target_link_libraries (morfeusz2_analyze morfeusz2)
  14 +#set_target_properties ( morfeusz2_analyze PROPERTIES COMPILE_FLAGS "-std=gnu++0x" )
16 15
17 set_target_properties ( test_morph PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" ) 16 set_target_properties ( test_morph PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" )
18 set_target_properties ( test_morfeusz PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" ) 17 set_target_properties ( test_morfeusz PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" )
morfeusz/EncodedInterpretation.hpp
@@ -31,8 +31,6 @@ struct EncodedInterpretation { @@ -31,8 +31,6 @@ struct EncodedInterpretation {
31 int type; 31 int type;
32 int tag; 32 int tag;
33 int nameClassifier; 33 int nameClassifier;
34 - int startNode;  
35 - int endNode;  
36 }; 34 };
37 35
38 #endif /* INTERPRETATION_HPP */ 36 #endif /* INTERPRETATION_HPP */
morfeusz/FlexionGraph.cpp
1 1
  2 +#include <string>
  3 +#include "utils.hpp"
2 #include "FlexionGraph.hpp" 4 #include "FlexionGraph.hpp"
3 5
4 FlexionGraph::FlexionGraph(int startNode) 6 FlexionGraph::FlexionGraph(int startNode)
@@ -6,29 +8,56 @@ FlexionGraph::FlexionGraph(int startNode) @@ -6,29 +8,56 @@ FlexionGraph::FlexionGraph(int startNode)
6 8
7 } 9 }
8 10
  11 +static inline void debugPath(const std::vector<InterpretedChunk>& path) {
  12 + for (const InterpretedChunk& chunk: path) {
  13 + std::string text(chunk.chunk, chunk.chunkLength);
  14 + DEBUG(text);
  15 + DEBUG(chunk.chunkLength);
  16 + }
  17 +}
  18 +
  19 +void FlexionGraph::addStartEdge(const Edge& e) {
  20 + if (this->graph.empty()) {
  21 + this->graph.push_back(vector<Edge>());
  22 + }
  23 + this->graph[0].push_back(e);
  24 +}
  25 +
  26 +void FlexionGraph::addMiddleEdge(const Edge& e) {
  27 + this->graph.push_back(vector<Edge>(1, e));
  28 +}
  29 +
9 void FlexionGraph::addPath(const std::vector<InterpretedChunk>& path) { 30 void FlexionGraph::addPath(const std::vector<InterpretedChunk>& path) {
  31 +// debugPath(path);
10 for (const InterpretedChunk& chunk: path) { 32 for (const InterpretedChunk& chunk: path) {
11 - if (&chunk == &(path.back())) { 33 + if (&chunk == &(path.front())
  34 + && &chunk == &(path.back())) {
12 Edge e = { chunk, -1 }; 35 Edge e = { chunk, -1 };
13 - vector<Edge> v;  
14 - v.push_back(e);  
15 - this->graph.push_back(v);  
16 -// this->graph[node].push_back(e); 36 + this->addStartEdge(e);
17 } 37 }
18 else if (&chunk == &(path.front())) { 38 else if (&chunk == &(path.front())) {
19 - Edge e = { chunk, (int) this->graph.size() };  
20 - this->graph[0].push_back(e); 39 + Edge e = { chunk, (int) this->graph.size() + 1 };
  40 + this->addStartEdge(e);
  41 + }
  42 + else if (&chunk == &(path.back())) {
  43 + Edge e = { chunk, -1 };
  44 + this->addMiddleEdge(e);
21 } 45 }
22 else { 46 else {
23 - Edge e = { chunk, (int) this->graph.size() };  
24 - vector<Edge> v;  
25 - v.push_back(e);  
26 - this->graph.push_back(v); 47 + Edge e = { chunk, (int) this->graph.size() + 1 };
  48 + this->addMiddleEdge(e);
27 } 49 }
28 } 50 }
29 } 51 }
30 52
  53 +void FlexionGraph::minimizeGraph() {
  54 + if (this->graph.size() > 2) {
  55 +
  56 + }
  57 +}
  58 +
31 void FlexionGraph::appendToResults(const Tagset& tagset, std::vector<MorphInterpretation>& results) { 59 void FlexionGraph::appendToResults(const Tagset& tagset, std::vector<MorphInterpretation>& results) {
  60 + this->minimizeGraph();
32 int endNode = graph.size(); 61 int endNode = graph.size();
33 for (unsigned int i = 0; i < graph.size(); i++) { 62 for (unsigned int i = 0; i < graph.size(); i++) {
34 vector<Edge>& edges = graph[i]; 63 vector<Edge>& edges = graph[i];
@@ -41,3 +70,7 @@ void FlexionGraph::appendToResults(const Tagset&amp; tagset, std::vector&lt;MorphInterp @@ -41,3 +70,7 @@ void FlexionGraph::appendToResults(const Tagset&amp; tagset, std::vector&lt;MorphInterp
41 } 70 }
42 } 71 }
43 } 72 }
  73 +
  74 +bool FlexionGraph::empty() const {
  75 + return this->graph.empty();
  76 +}
morfeusz/FlexionGraph.hpp
@@ -16,6 +16,22 @@ struct Edge { @@ -16,6 +16,22 @@ struct Edge {
16 int nextNode; 16 int nextNode;
17 }; 17 };
18 18
  19 +//struct EdgeLabel {
  20 +// int type;
  21 +// const char* textStart;
  22 +// int textLength;
  23 +//
  24 +// bool operator==(const EdgeLabel &el) const {
  25 +// return this->type == el.type
  26 +// && this->textStart == el.textStart
  27 +// && this->textLength == el.textLength;
  28 +// }
  29 +//
  30 +// bool operator<(const coord &o) {
  31 +// return x < o.x || (x == o.x && y < o.y);
  32 +// }
  33 +//};
  34 +
19 class FlexionGraph { 35 class FlexionGraph {
20 public: 36 public:
21 37
@@ -25,8 +41,17 @@ public: @@ -25,8 +41,17 @@ public:
25 41
26 void appendToResults(const Tagset& tagset, std::vector<MorphInterpretation>& results); 42 void appendToResults(const Tagset& tagset, std::vector<MorphInterpretation>& results);
27 43
  44 + bool empty() const;
  45 +
28 // virtual ~FlexionGraph(); 46 // virtual ~FlexionGraph();
29 private: 47 private:
  48 +
  49 + void addStartEdge(const Edge& e);
  50 +
  51 + void addMiddleEdge(const Edge& e);
  52 +
  53 + void minimizeGraph();
  54 +
30 int startNode; 55 int startNode;
31 std::vector< std::vector<Edge> > graph; 56 std::vector< std::vector<Edge> > graph;
32 }; 57 };
morfeusz/Morfeusz.cpp
@@ -38,27 +38,37 @@ static Tagset* initializeTagset(const string&amp; filename) { @@ -38,27 +38,37 @@ static Tagset* initializeTagset(const string&amp; filename) {
38 } 38 }
39 39
40 Morfeusz::Morfeusz(const string& filename) 40 Morfeusz::Morfeusz(const string& filename)
41 -: fsa(initializeFSA(filename)),  
42 - charsetConverter(initializeCharsetConverter()),  
43 - tagset(initializeTagset(filename)) { 41 +: fsa(initializeFSA(filename)),
  42 +charsetConverter(initializeCharsetConverter()),
  43 +tagset(initializeTagset(filename)) {
44 44
45 } 45 }
46 46
47 Morfeusz::~Morfeusz() { 47 Morfeusz::~Morfeusz() {
48 - delete &this->fsa;  
49 - delete &this->charsetConverter; 48 + // delete &this->fsa;
  49 + // delete &this->charsetConverter;
50 } 50 }
51 51
52 void Morfeusz::processOneWord( 52 void Morfeusz::processOneWord(
53 const char*& inputData, 53 const char*& inputData,
54 const char* inputEnd, 54 const char* inputEnd,
55 - const int startNodeNum, 55 + int startNodeNum,
56 std::vector<MorphInterpretation>& results) const { 56 std::vector<MorphInterpretation>& results) const {
  57 + while (inputData != inputEnd
  58 + && isEndOfWord(this->charsetConverter->peek(inputData, inputEnd))) {
  59 + this->charsetConverter->next(inputData, inputEnd);
  60 + }
  61 + const char* wordStart = inputData;
57 vector<InterpretedChunk> accum; 62 vector<InterpretedChunk> accum;
58 FlexionGraph graph(startNodeNum); 63 FlexionGraph graph(startNodeNum);
59 const char* currInput = inputData; 64 const char* currInput = inputData;
60 doProcessOneWord(currInput, inputEnd, accum, graph); 65 doProcessOneWord(currInput, inputEnd, accum, graph);
61 - graph.appendToResults(*this->tagset, results); 66 + if (!graph.empty()) {
  67 + graph.appendToResults(*this->tagset, results);
  68 + }
  69 + else if (wordStart != currInput) {
  70 + this->appendIgnotiumToResults(string(wordStart, currInput), startNodeNum, results);
  71 + }
62 inputData = currInput; 72 inputData = currInput;
63 } 73 }
64 74
@@ -67,38 +77,56 @@ void Morfeusz::doProcessOneWord( @@ -67,38 +77,56 @@ void Morfeusz::doProcessOneWord(
67 const char* inputEnd, 77 const char* inputEnd,
68 vector<InterpretedChunk>& accum, 78 vector<InterpretedChunk>& accum,
69 FlexionGraph& graph) const { 79 FlexionGraph& graph) const {
  80 + bool endOfWord = inputData == inputEnd;
70 const char* currInput = inputData; 81 const char* currInput = inputData;
71 - StateType state = this->fsa->getInitialState();  
72 - int codepoint = this->charsetConverter->next(currInput, inputEnd); 82 + const char* prevInput = inputData;
  83 + int codepoint = endOfWord ? 0 : this->charsetConverter->next(currInput, inputEnd);
73 84
74 - if (!accum.empty() && isEndOfWord(codepoint)) {  
75 - graph.addPath(accum);  
76 - }  
77 - else  
78 - while (!isEndOfWord(codepoint)) {  
79 - this->feedState(state, codepoint);  
80 - codepoint = this->charsetConverter->next(currInput, inputEnd);  
81 - if (state.isAccepting()) {  
82 - for (InterpsGroup& ig : state.getValue()) {  
83 - InterpretedChunk ic = {inputData, currInput - inputData, ig};  
84 - accum.push_back(ic);  
85 - doProcessOneWord(currInput, inputEnd, accum, graph);  
86 - accum.pop_back();  
87 - } 85 + StateType state = this->fsa->getInitialState();
  86 +
  87 + while (!isEndOfWord(codepoint)) {
  88 + this->feedState(state, codepoint);
  89 + if (state.isAccepting()) {
  90 + for (InterpsGroup& ig : state.getValue()) {
  91 + InterpretedChunk ic = {inputData, currInput - inputData, ig};
  92 + accum.push_back(ic);
  93 + const char* newCurrInput = currInput;
  94 + doProcessOneWord(newCurrInput, inputEnd, accum, graph);
  95 + accum.pop_back();
88 } 96 }
89 } 97 }
  98 + prevInput = currInput;
  99 + codepoint = currInput == inputEnd ? 0 : this->charsetConverter->next(currInput, inputEnd);
  100 + }
  101 + if (state.isAccepting()) {
  102 + for (InterpsGroup& ig : state.getValue()) {
  103 + InterpretedChunk ic = {inputData, prevInput - inputData, ig};
  104 + accum.push_back(ic);
  105 + graph.addPath(accum);
  106 + accum.pop_back();
  107 + }
  108 + }
  109 + inputData = currInput;
90 } 110 }
91 111
92 void Morfeusz::feedState( 112 void Morfeusz::feedState(
93 StateType& state, 113 StateType& state,
94 - const int codepoint) const { 114 + int codepoint) const {
95 vector<char> chars; 115 vector<char> chars;
96 this->charsetConverter->append(codepoint, chars); 116 this->charsetConverter->append(codepoint, chars);
97 - for (char c: chars) { 117 + for (char c : chars) {
98 state.proceedToNext(c); 118 state.proceedToNext(c);
99 } 119 }
100 } 120 }
101 121
  122 +void Morfeusz::appendIgnotiumToResults(
  123 + const string& word,
  124 + int startNodeNum,
  125 + std::vector<MorphInterpretation>& results) const {
  126 + MorphInterpretation interp = MorphInterpretation::createIgn(startNodeNum, word, *this->tagset);
  127 + results.push_back(interp);
  128 +}
  129 +
102 ResultsIterator Morfeusz::analyze(const string& text) { 130 ResultsIterator Morfeusz::analyze(const string& text) {
103 // const char* textStart = text.c_str(); 131 // const char* textStart = text.c_str();
104 // const char* textEnd = text.c_str() + text.length(); 132 // const char* textEnd = text.c_str() + text.length();
@@ -106,7 +134,12 @@ ResultsIterator Morfeusz::analyze(const string&amp; text) { @@ -106,7 +134,12 @@ ResultsIterator Morfeusz::analyze(const string&amp; text) {
106 } 134 }
107 135
108 void Morfeusz::analyze(const string& text, vector<MorphInterpretation>& results) { 136 void Morfeusz::analyze(const string& text, vector<MorphInterpretation>& results) {
109 - 137 + const char* input = text.c_str();
  138 + const char* inputEnd = input + text.length();
  139 + while (input != inputEnd) {
  140 + int startNode = results.empty() ? 0 : results.back().getEndNode();
  141 + this->processOneWord(input, inputEnd, startNode, results);
  142 + }
110 } 143 }
111 144
112 ResultsIterator::ResultsIterator(const string& text, const Morfeusz& morfeusz) 145 ResultsIterator::ResultsIterator(const string& text, const Morfeusz& morfeusz)
morfeusz/Morfeusz.hpp
1 /* 1 /*
2 * File: Morfeusz.hpp 2 * File: Morfeusz.hpp
3 - * Author: lennyn 3 + * Author: mlenart
4 * 4 *
5 * Created on November 13, 2013, 5:21 PM 5 * Created on November 13, 2013, 5:21 PM
6 */ 6 */
@@ -37,7 +37,7 @@ public: @@ -37,7 +37,7 @@ public:
37 void processOneWord( 37 void processOneWord(
38 const char*& inputData, 38 const char*& inputData,
39 const char* inputEnd, 39 const char* inputEnd,
40 - const int startNodeNum, 40 + int startNodeNum,
41 std::vector<MorphInterpretation>& result) const; 41 std::vector<MorphInterpretation>& result) const;
42 42
43 // Morfeusz(); 43 // Morfeusz();
@@ -52,7 +52,12 @@ private: @@ -52,7 +52,12 @@ private:
52 52
53 void feedState( 53 void feedState(
54 StateType& state, 54 StateType& state,
55 - const int codepoint) const; 55 + int codepoint) const;
  56 +
  57 + void appendIgnotiumToResults(
  58 + const std::string& word,
  59 + int startNodeNum,
  60 + std::vector<MorphInterpretation>& results) const;
56 61
57 FSAType* fsa; 62 FSAType* fsa;
58 CharsetConverter* charsetConverter; 63 CharsetConverter* charsetConverter;
morfeusz/MorphInterpretation.cpp
@@ -39,6 +39,25 @@ MorphInterpretation::MorphInterpretation( @@ -39,6 +39,25 @@ MorphInterpretation::MorphInterpretation(
39 39
40 } 40 }
41 41
  42 +MorphInterpretation::MorphInterpretation(
  43 + int startNode,
  44 + const std::string& orth,
  45 + const Tagset& tagset)
  46 +: startNode(startNode),
  47 + endNode(startNode + 1),
  48 + orth(orth),
  49 + lemma(orth),
  50 + tagnum(0),
  51 + namenum(0),
  52 + tag(tagset.getTag(0)),
  53 + name(tagset.getName(0)) {
  54 +
  55 +}
  56 +
  57 +MorphInterpretation MorphInterpretation::createIgn(int startNode, const std::string& orth, const Tagset& tagset) {
  58 + return MorphInterpretation(startNode, orth, tagset);
  59 +}
  60 +
42 MorphInterpretation::~MorphInterpretation() { 61 MorphInterpretation::~MorphInterpretation() {
43 } 62 }
44 63
morfeusz/MorphInterpretation.hpp
@@ -20,6 +20,7 @@ public: @@ -20,6 +20,7 @@ public:
20 const std::string& orth, 20 const std::string& orth,
21 const EncodedInterpretation& encodedInterp, 21 const EncodedInterpretation& encodedInterp,
22 const Tagset& tagset); 22 const Tagset& tagset);
  23 + static MorphInterpretation createIgn(int startNode, const std::string& orth, const Tagset& tagset);
23 virtual ~MorphInterpretation(); 24 virtual ~MorphInterpretation();
24 int getStartNode() const; 25 int getStartNode() const;
25 int getEndNode() const; 26 int getEndNode() const;
@@ -30,6 +31,10 @@ public: @@ -30,6 +31,10 @@ public:
30 const std::string& getTag() const; 31 const std::string& getTag() const;
31 const std::string& getName() const; 32 const std::string& getName() const;
32 private: 33 private:
  34 + MorphInterpretation(
  35 + int startNode,
  36 + const std::string& orth,
  37 + const Tagset& tagset);
33 int startNode; 38 int startNode;
34 int endNode; 39 int endNode;
35 std::string orth; 40 std::string orth;
morfeusz/charset/CharsetConverter.cpp
@@ -6,6 +6,10 @@ @@ -6,6 +6,10 @@
6 6
7 using namespace std; 7 using namespace std;
8 8
  9 +uint32_t UTF8CharsetConverter::peek(const char*& it, const char* end) const {
  10 + return utf8::peek_next(it, end);
  11 +}
  12 +
9 uint32_t UTF8CharsetConverter::next(const char*& it, const char* end) const { 13 uint32_t UTF8CharsetConverter::next(const char*& it, const char* end) const {
10 return utf8::next(it, end); 14 return utf8::next(it, end);
11 } 15 }
morfeusz/charset/CharsetConverter.hpp
@@ -10,6 +10,7 @@ @@ -10,6 +10,7 @@
10 10
11 class CharsetConverter { 11 class CharsetConverter {
12 public: 12 public:
  13 + virtual uint32_t peek(const char*& it, const char* end) const = 0;
13 virtual uint32_t next(const char*& it, const char* end) const = 0; 14 virtual uint32_t next(const char*& it, const char* end) const = 0;
14 virtual void append(uint32_t cp, std::vector<char>& result) const = 0; 15 virtual void append(uint32_t cp, std::vector<char>& result) const = 0;
15 private: 16 private:
@@ -17,6 +18,7 @@ private: @@ -17,6 +18,7 @@ private:
17 18
18 class UTF8CharsetConverter: public CharsetConverter { 19 class UTF8CharsetConverter: public CharsetConverter {
19 public: 20 public:
  21 + uint32_t peek(const char*& it, const char* end) const;
20 uint32_t next(const char*& it, const char* end) const; 22 uint32_t next(const char*& it, const char* end) const;
21 void append(uint32_t cp, std::vector<char>& result) const; 23 void append(uint32_t cp, std::vector<char>& result) const;
22 private: 24 private:
@@ -24,6 +26,7 @@ private: @@ -24,6 +26,7 @@ private:
24 26
25 class UTF16CharsetConverter: public CharsetConverter { 27 class UTF16CharsetConverter: public CharsetConverter {
26 public: 28 public:
  29 + uint32_t peek(const char*& it, const char* end) const;
27 uint32_t next(const char*& it, const char* end) const; 30 uint32_t next(const char*& it, const char* end) const;
28 void append(uint32_t cp, std::vector<char>& result) const; 31 void append(uint32_t cp, std::vector<char>& result) const;
29 private: 32 private:
@@ -31,6 +34,7 @@ private: @@ -31,6 +34,7 @@ private:
31 34
32 class UTF32CharsetConverter: public CharsetConverter { 35 class UTF32CharsetConverter: public CharsetConverter {
33 public: 36 public:
  37 + uint32_t peek(const char*& it, const char* end) const;
34 uint32_t next(const char*& it, const char* end) const; 38 uint32_t next(const char*& it, const char* end) const;
35 void append(uint32_t cp, std::vector<char>& result) const; 39 void append(uint32_t cp, std::vector<char>& result) const;
36 private: 40 private:
@@ -38,6 +42,7 @@ private: @@ -38,6 +42,7 @@ private:
38 42
39 class ISO8859_2_CharsetConverter: public CharsetConverter { 43 class ISO8859_2_CharsetConverter: public CharsetConverter {
40 public: 44 public:
  45 + uint32_t peek(const char*& it, const char* end) const;
41 uint32_t next(const char*& it, const char* end) const; 46 uint32_t next(const char*& it, const char* end) const;
42 void append(uint32_t cp, std::vector<char>& result) const; 47 void append(uint32_t cp, std::vector<char>& result) const;
43 private: 48 private:
morfeusz/morfeusz.cpp deleted
morfeusz/morfeusz.hpp deleted
1 -  
2 -  
morfeusz/test_simple.cpp
@@ -7,6 +7,7 @@ @@ -7,6 +7,7 @@
7 7
8 #include <cstdlib> 8 #include <cstdlib>
9 9
  10 +#include "utils.hpp"
10 #include "Morfeusz.hpp" 11 #include "Morfeusz.hpp"
11 #include "MorphInterpretation.hpp" 12 #include "MorphInterpretation.hpp"
12 13
@@ -16,11 +17,11 @@ using namespace std; @@ -16,11 +17,11 @@ using namespace std;
16 * 17 *
17 */ 18 */
18 int main(int argc, char** argv) { 19 int main(int argc, char** argv) {
19 - Morfeusz morfeusz(argv[1]); 20 + Morfeusz morfeusz("/tmp/test-SIMPLE-PoliMorfSmall.tab.fsa");
20 vector<MorphInterpretation> res; 21 vector<MorphInterpretation> res;
21 - string word = "mijałem";  
22 - const char* ptr = word.c_str();  
23 - morfeusz.processOneWord(ptr, word.c_str() + word.size(), 0, res); 22 + string word = " mijałem fasdfasd abdominalności ";
  23 + morfeusz.analyze(word, res);
  24 + DEBUG("znaleziono "+to_string(res.size()));
24 for (MorphInterpretation& mi: res) { 25 for (MorphInterpretation& mi: res) {
25 cerr << mi.getStartNode() << " " << mi.getEndNode() << " " << mi.getLemma() << " " << mi.getTag() << " " << mi.getName() << endl; 26 cerr << mi.getStartNode() << " " << mi.getEndNode() << " " << mi.getLemma() << " " << mi.getTag() << " " << mi.getName() << endl;
26 } 27 }
nbproject/configurations.xml
@@ -17,9 +17,9 @@ @@ -17,9 +17,9 @@
17 <in>MorphInterpretation.cpp</in> 17 <in>MorphInterpretation.cpp</in>
18 <in>Tagset.cpp</in> 18 <in>Tagset.cpp</in>
19 <in>main.cpp</in> 19 <in>main.cpp</in>
20 - <in>morfeusz.cpp</in>  
21 <in>test_morfeusz.cpp</in> 20 <in>test_morfeusz.cpp</in>
22 <in>test_morph.cpp</in> 21 <in>test_morph.cpp</in>
  22 + <in>test_simple.cpp</in>
23 </df> 23 </df>
24 <logicalFolder name="ExternalFiles" 24 <logicalFolder name="ExternalFiles"
25 displayName="Important Files" 25 displayName="Important Files"
@@ -49,7 +49,7 @@ @@ -49,7 +49,7 @@
49 <buildCommandWorkingDir>build</buildCommandWorkingDir> 49 <buildCommandWorkingDir>build</buildCommandWorkingDir>
50 <buildCommand>${MAKE} -f Makefile</buildCommand> 50 <buildCommand>${MAKE} -f Makefile</buildCommand>
51 <cleanCommand>${MAKE} -f Makefile clean</cleanCommand> 51 <cleanCommand>${MAKE} -f Makefile clean</cleanCommand>
52 - <executablePath>build/fsa/test_dict</executablePath> 52 + <executablePath>build/morfeusz/test_simple</executablePath>
53 </makeTool> 53 </makeTool>
54 </makefileType> 54 </makefileType>
55 <folder path="1"> 55 <folder path="1">
@@ -120,10 +120,6 @@ @@ -120,10 +120,6 @@
120 <ccTool> 120 <ccTool>
121 </ccTool> 121 </ccTool>
122 </item> 122 </item>
123 - <item path="morfeusz/morfeusz.cpp" ex="false" tool="1" flavor2="4">  
124 - <ccTool>  
125 - </ccTool>  
126 - </item>  
127 <item path="morfeusz/test_morfeusz.cpp" ex="false" tool="1" flavor2="4"> 123 <item path="morfeusz/test_morfeusz.cpp" ex="false" tool="1" flavor2="4">
128 <ccTool> 124 <ccTool>
129 </ccTool> 125 </ccTool>
@@ -132,12 +128,8 @@ @@ -132,12 +128,8 @@
132 <ccTool> 128 <ccTool>
133 </ccTool> 129 </ccTool>
134 </item> 130 </item>
135 - <item path="morfeusz/test_simple.cpp" ex="false" tool="1" flavor2="0"> 131 + <item path="morfeusz/test_simple.cpp" ex="false" tool="1" flavor2="8">
136 <ccTool> 132 <ccTool>
137 - <incDir>  
138 - <pElem>fsa</pElem>  
139 - <pElem>build/morfeusz</pElem>  
140 - </incDir>  
141 </ccTool> 133 </ccTool>
142 </item> 134 </item>
143 </conf> 135 </conf>
testfiles/PoliMorfSmall.tab
@@ -579,3 +579,43 @@ abdominoplastyki abdominoplastyka subst:pl:voc:f pospolita @@ -579,3 +579,43 @@ abdominoplastyki abdominoplastyka subst:pl:voc:f pospolita
579 abdominoplastyki abdominoplastyka subst:sg:gen:f pospolita 579 abdominoplastyki abdominoplastyka subst:sg:gen:f pospolita
580 abdominoplastyko abdominoplastyka subst:sg:voc:f pospolita 580 abdominoplastyko abdominoplastyka subst:sg:voc:f pospolita
581 abdominoplastykom abdominoplastyka subst:pl:dat:f pospolita 581 abdominoplastykom abdominoplastyka subst:pl:dat:f pospolita
  582 +mijał mijać praet:sg:m1.m2.m3:imperf pospolita
  583 +mijała mijać praet:sg:f:imperf pospolita
  584 +mijało mijać praet:sg:n1.n2:imperf pospolita
  585 +mijały mijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  586 +omijał omijać praet:sg:m1.m2.m3:imperf pospolita
  587 +omijała omijać praet:sg:f:imperf pospolita
  588 +omijało omijać praet:sg:n1.n2:imperf pospolita
  589 +omijały omijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  590 +pomijał pomijać praet:sg:m1.m2.m3:imperf pospolita
  591 +pomijała pomijać praet:sg:f:imperf pospolita
  592 +pomijało pomijać praet:sg:n1.n2:imperf pospolita
  593 +pomijały pomijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  594 +powymijał powymijać praet:sg:m1.m2.m3:perf pospolita
  595 +powymijała powymijać praet:sg:f:perf pospolita
  596 +powymijało powymijać praet:sg:n1.n2:perf pospolita
  597 +powymijały powymijać praet:pl:m2.m3.f.n1.n2.p2.p3:perf pospolita
  598 +przemijał przemijać praet:sg:m1.m2.m3:imperf pospolita
  599 +przemijała przemijać praet:sg:f:imperf pospolita
  600 +przemijało przemijać praet:sg:n1.n2:imperf pospolita
  601 +przemijały przemijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  602 +rozmijał rozmijać praet:sg:m1.m2.m3:imperf pospolita
  603 +rozmijała rozmijać praet:sg:f:imperf pospolita
  604 +rozmijało rozmijać praet:sg:n1.n2:imperf pospolita
  605 +rozmijały rozmijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  606 +wymijał wymijać praet:sg:m1.m2.m3:imperf pospolita
  607 +wymijała wymijać praet:sg:f:imperf pospolita
  608 +wymijało wymijać praet:sg:n1.n2:imperf pospolita
  609 +wymijały wymijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  610 +zmijał zmijać praet:sg:m1.m2.m3:imperf pospolita
  611 +zmijała zmijać praet:sg:f:imperf pospolita
  612 +zmijało zmijać praet:sg:n1.n2:imperf pospolita
  613 +zmijały zmijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  614 +em być aglt:sg:pri:imperf:wok pospolita
  615 +eś być aglt:sg:sec:imperf:wok pospolita
  616 +eście być aglt:pl:sec:imperf:wok pospolita
  617 +eśmy być aglt:pl:pri:imperf:wok pospolita
  618 +m być aglt:sg:pri:imperf:nwok pospolita
  619 +ś być aglt:sg:sec:imperf:nwok pospolita
  620 +ście być aglt:pl:sec:imperf:nwok pospolita
  621 +śmy być aglt:pl:pri:imperf:nwok pospolita
testfiles/polimorf.tagset
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 2
3 [TAGS] 3 [TAGS]
4 4
5 -0 adj:pl:acc:m1.p1:com 5 +0 ign
6 1 adj:pl:acc:m1.p1:pos 6 1 adj:pl:acc:m1.p1:pos
7 2 adj:pl:acc:m1.p1:sup 7 2 adj:pl:acc:m1.p1:sup
8 3 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:com 8 3 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:com
@@ -576,6 +576,7 @@ @@ -576,6 +576,7 @@
576 571 winien:sg:f:imperf 576 571 winien:sg:f:imperf
577 572 winien:sg:m1.m2.m3:imperf 577 572 winien:sg:m1.m2.m3:imperf
578 573 winien:sg:n1.n2:imperf 578 573 winien:sg:n1.n2:imperf
  579 +574 adj:pl:acc:m1.p1:com
579 580
580 [NAMES] 581 [NAMES]
581 582