Commit 5700d3a02747423a76b75148b338f19e6912b168

Authored by Michał Lenart
1 parent e5220b90

- podstawa analizy tekstu już działa

- obsług ign-ów w zasadzie też

git-svn-id: svn://svn.nlp.ipipan.waw.pl/morfeusz/morfeusz@23 ff4e3ee1-f430-4e82-ade0-24591c43f1fd
CMakeLists.txt
1 1  
2 2 cmake_minimum_required (VERSION 2.8)
3 3 project (Morfeusz)
4   -
  4 +set(CMAKE_BUILD_TYPE "Debug")
5 5 enable_testing()
6 6  
7 7 add_subdirectory (fsa)
... ...
morfeusz/CMakeLists.txt
... ... @@ -4,15 +4,14 @@
4 4 # Make sure the linker can find the Hello library once it is built.
5 5 #link_directories (${Morfeusz_BINARY_DIR}/Hello)
6 6 include_directories (${Morfeusz_SOURCE_DIR}/fsa)
7   -add_library (morfeusz2 morfeusz.hpp morfeusz.cpp)
8   -add_executable (morfeusz2_analyze main.cpp)
  7 +# add_executable (morfeusz2_analyze main.cpp)
9 8 add_executable (test_morph test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp)
10 9 add_executable (test_morfeusz test_morph.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp Morfeusz.cpp charset/CharsetConverter.cpp FlexionGraph.cpp)
11 10 add_executable (test_simple test_simple.cpp MorphDeserializer.cpp Tagset.cpp ../fsa/const.cpp MorphInterpretation.cpp Morfeusz.cpp charset/CharsetConverter.cpp FlexionGraph.cpp)
12 11  
13 12 # Link the executable to the Hello library.
14   -target_link_libraries (morfeusz2_analyze morfeusz2)
15   -set_target_properties ( morfeusz2_analyze PROPERTIES COMPILE_FLAGS "-std=gnu++0x" )
  13 +#target_link_libraries (morfeusz2_analyze morfeusz2)
  14 +#set_target_properties ( morfeusz2_analyze PROPERTIES COMPILE_FLAGS "-std=gnu++0x" )
16 15  
17 16 set_target_properties ( test_morph PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" )
18 17 set_target_properties ( test_morfeusz PROPERTIES COMPILE_FLAGS "-std=gnu++0x -Wall -O2" )
... ...
morfeusz/EncodedInterpretation.hpp
... ... @@ -31,8 +31,6 @@ struct EncodedInterpretation {
31 31 int type;
32 32 int tag;
33 33 int nameClassifier;
34   - int startNode;
35   - int endNode;
36 34 };
37 35  
38 36 #endif /* INTERPRETATION_HPP */
... ...
morfeusz/FlexionGraph.cpp
1 1  
  2 +#include <string>
  3 +#include "utils.hpp"
2 4 #include "FlexionGraph.hpp"
3 5  
4 6 FlexionGraph::FlexionGraph(int startNode)
... ... @@ -6,29 +8,56 @@ FlexionGraph::FlexionGraph(int startNode)
6 8  
7 9 }
8 10  
  11 +static inline void debugPath(const std::vector<InterpretedChunk>& path) {
  12 + for (const InterpretedChunk& chunk: path) {
  13 + std::string text(chunk.chunk, chunk.chunkLength);
  14 + DEBUG(text);
  15 + DEBUG(chunk.chunkLength);
  16 + }
  17 +}
  18 +
  19 +void FlexionGraph::addStartEdge(const Edge& e) {
  20 + if (this->graph.empty()) {
  21 + this->graph.push_back(vector<Edge>());
  22 + }
  23 + this->graph[0].push_back(e);
  24 +}
  25 +
  26 +void FlexionGraph::addMiddleEdge(const Edge& e) {
  27 + this->graph.push_back(vector<Edge>(1, e));
  28 +}
  29 +
9 30 void FlexionGraph::addPath(const std::vector<InterpretedChunk>& path) {
  31 +// debugPath(path);
10 32 for (const InterpretedChunk& chunk: path) {
11   - if (&chunk == &(path.back())) {
  33 + if (&chunk == &(path.front())
  34 + && &chunk == &(path.back())) {
12 35 Edge e = { chunk, -1 };
13   - vector<Edge> v;
14   - v.push_back(e);
15   - this->graph.push_back(v);
16   -// this->graph[node].push_back(e);
  36 + this->addStartEdge(e);
17 37 }
18 38 else if (&chunk == &(path.front())) {
19   - Edge e = { chunk, (int) this->graph.size() };
20   - this->graph[0].push_back(e);
  39 + Edge e = { chunk, (int) this->graph.size() + 1 };
  40 + this->addStartEdge(e);
  41 + }
  42 + else if (&chunk == &(path.back())) {
  43 + Edge e = { chunk, -1 };
  44 + this->addMiddleEdge(e);
21 45 }
22 46 else {
23   - Edge e = { chunk, (int) this->graph.size() };
24   - vector<Edge> v;
25   - v.push_back(e);
26   - this->graph.push_back(v);
  47 + Edge e = { chunk, (int) this->graph.size() + 1 };
  48 + this->addMiddleEdge(e);
27 49 }
28 50 }
29 51 }
30 52  
  53 +void FlexionGraph::minimizeGraph() {
  54 + if (this->graph.size() > 2) {
  55 +
  56 + }
  57 +}
  58 +
31 59 void FlexionGraph::appendToResults(const Tagset& tagset, std::vector<MorphInterpretation>& results) {
  60 + this->minimizeGraph();
32 61 int endNode = graph.size();
33 62 for (unsigned int i = 0; i < graph.size(); i++) {
34 63 vector<Edge>& edges = graph[i];
... ... @@ -41,3 +70,7 @@ void FlexionGraph::appendToResults(const Tagset&amp; tagset, std::vector&lt;MorphInterp
41 70 }
42 71 }
43 72 }
  73 +
  74 +bool FlexionGraph::empty() const {
  75 + return this->graph.empty();
  76 +}
... ...
morfeusz/FlexionGraph.hpp
... ... @@ -16,6 +16,22 @@ struct Edge {
16 16 int nextNode;
17 17 };
18 18  
  19 +//struct EdgeLabel {
  20 +// int type;
  21 +// const char* textStart;
  22 +// int textLength;
  23 +//
  24 +// bool operator==(const EdgeLabel &el) const {
  25 +// return this->type == el.type
  26 +// && this->textStart == el.textStart
  27 +// && this->textLength == el.textLength;
  28 +// }
  29 +//
  30 +// bool operator<(const coord &o) {
  31 +// return x < o.x || (x == o.x && y < o.y);
  32 +// }
  33 +//};
  34 +
19 35 class FlexionGraph {
20 36 public:
21 37  
... ... @@ -25,8 +41,17 @@ public:
25 41  
26 42 void appendToResults(const Tagset& tagset, std::vector<MorphInterpretation>& results);
27 43  
  44 + bool empty() const;
  45 +
28 46 // virtual ~FlexionGraph();
29 47 private:
  48 +
  49 + void addStartEdge(const Edge& e);
  50 +
  51 + void addMiddleEdge(const Edge& e);
  52 +
  53 + void minimizeGraph();
  54 +
30 55 int startNode;
31 56 std::vector< std::vector<Edge> > graph;
32 57 };
... ...
morfeusz/Morfeusz.cpp
... ... @@ -38,27 +38,37 @@ static Tagset* initializeTagset(const string&amp; filename) {
38 38 }
39 39  
40 40 Morfeusz::Morfeusz(const string& filename)
41   -: fsa(initializeFSA(filename)),
42   - charsetConverter(initializeCharsetConverter()),
43   - tagset(initializeTagset(filename)) {
  41 +: fsa(initializeFSA(filename)),
  42 +charsetConverter(initializeCharsetConverter()),
  43 +tagset(initializeTagset(filename)) {
44 44  
45 45 }
46 46  
47 47 Morfeusz::~Morfeusz() {
48   - delete &this->fsa;
49   - delete &this->charsetConverter;
  48 + // delete &this->fsa;
  49 + // delete &this->charsetConverter;
50 50 }
51 51  
52 52 void Morfeusz::processOneWord(
53 53 const char*& inputData,
54 54 const char* inputEnd,
55   - const int startNodeNum,
  55 + int startNodeNum,
56 56 std::vector<MorphInterpretation>& results) const {
  57 + while (inputData != inputEnd
  58 + && isEndOfWord(this->charsetConverter->peek(inputData, inputEnd))) {
  59 + this->charsetConverter->next(inputData, inputEnd);
  60 + }
  61 + const char* wordStart = inputData;
57 62 vector<InterpretedChunk> accum;
58 63 FlexionGraph graph(startNodeNum);
59 64 const char* currInput = inputData;
60 65 doProcessOneWord(currInput, inputEnd, accum, graph);
61   - graph.appendToResults(*this->tagset, results);
  66 + if (!graph.empty()) {
  67 + graph.appendToResults(*this->tagset, results);
  68 + }
  69 + else if (wordStart != currInput) {
  70 + this->appendIgnotiumToResults(string(wordStart, currInput), startNodeNum, results);
  71 + }
62 72 inputData = currInput;
63 73 }
64 74  
... ... @@ -67,38 +77,56 @@ void Morfeusz::doProcessOneWord(
67 77 const char* inputEnd,
68 78 vector<InterpretedChunk>& accum,
69 79 FlexionGraph& graph) const {
  80 + bool endOfWord = inputData == inputEnd;
70 81 const char* currInput = inputData;
71   - StateType state = this->fsa->getInitialState();
72   - int codepoint = this->charsetConverter->next(currInput, inputEnd);
  82 + const char* prevInput = inputData;
  83 + int codepoint = endOfWord ? 0 : this->charsetConverter->next(currInput, inputEnd);
73 84  
74   - if (!accum.empty() && isEndOfWord(codepoint)) {
75   - graph.addPath(accum);
76   - }
77   - else
78   - while (!isEndOfWord(codepoint)) {
79   - this->feedState(state, codepoint);
80   - codepoint = this->charsetConverter->next(currInput, inputEnd);
81   - if (state.isAccepting()) {
82   - for (InterpsGroup& ig : state.getValue()) {
83   - InterpretedChunk ic = {inputData, currInput - inputData, ig};
84   - accum.push_back(ic);
85   - doProcessOneWord(currInput, inputEnd, accum, graph);
86   - accum.pop_back();
87   - }
  85 + StateType state = this->fsa->getInitialState();
  86 +
  87 + while (!isEndOfWord(codepoint)) {
  88 + this->feedState(state, codepoint);
  89 + if (state.isAccepting()) {
  90 + for (InterpsGroup& ig : state.getValue()) {
  91 + InterpretedChunk ic = {inputData, currInput - inputData, ig};
  92 + accum.push_back(ic);
  93 + const char* newCurrInput = currInput;
  94 + doProcessOneWord(newCurrInput, inputEnd, accum, graph);
  95 + accum.pop_back();
88 96 }
89 97 }
  98 + prevInput = currInput;
  99 + codepoint = currInput == inputEnd ? 0 : this->charsetConverter->next(currInput, inputEnd);
  100 + }
  101 + if (state.isAccepting()) {
  102 + for (InterpsGroup& ig : state.getValue()) {
  103 + InterpretedChunk ic = {inputData, prevInput - inputData, ig};
  104 + accum.push_back(ic);
  105 + graph.addPath(accum);
  106 + accum.pop_back();
  107 + }
  108 + }
  109 + inputData = currInput;
90 110 }
91 111  
92 112 void Morfeusz::feedState(
93 113 StateType& state,
94   - const int codepoint) const {
  114 + int codepoint) const {
95 115 vector<char> chars;
96 116 this->charsetConverter->append(codepoint, chars);
97   - for (char c: chars) {
  117 + for (char c : chars) {
98 118 state.proceedToNext(c);
99 119 }
100 120 }
101 121  
  122 +void Morfeusz::appendIgnotiumToResults(
  123 + const string& word,
  124 + int startNodeNum,
  125 + std::vector<MorphInterpretation>& results) const {
  126 + MorphInterpretation interp = MorphInterpretation::createIgn(startNodeNum, word, *this->tagset);
  127 + results.push_back(interp);
  128 +}
  129 +
102 130 ResultsIterator Morfeusz::analyze(const string& text) {
103 131 // const char* textStart = text.c_str();
104 132 // const char* textEnd = text.c_str() + text.length();
... ... @@ -106,7 +134,12 @@ ResultsIterator Morfeusz::analyze(const string&amp; text) {
106 134 }
107 135  
108 136 void Morfeusz::analyze(const string& text, vector<MorphInterpretation>& results) {
109   -
  137 + const char* input = text.c_str();
  138 + const char* inputEnd = input + text.length();
  139 + while (input != inputEnd) {
  140 + int startNode = results.empty() ? 0 : results.back().getEndNode();
  141 + this->processOneWord(input, inputEnd, startNode, results);
  142 + }
110 143 }
111 144  
112 145 ResultsIterator::ResultsIterator(const string& text, const Morfeusz& morfeusz)
... ...
morfeusz/Morfeusz.hpp
1 1 /*
2 2 * File: Morfeusz.hpp
3   - * Author: lennyn
  3 + * Author: mlenart
4 4 *
5 5 * Created on November 13, 2013, 5:21 PM
6 6 */
... ... @@ -37,7 +37,7 @@ public:
37 37 void processOneWord(
38 38 const char*& inputData,
39 39 const char* inputEnd,
40   - const int startNodeNum,
  40 + int startNodeNum,
41 41 std::vector<MorphInterpretation>& result) const;
42 42  
43 43 // Morfeusz();
... ... @@ -52,7 +52,12 @@ private:
52 52  
53 53 void feedState(
54 54 StateType& state,
55   - const int codepoint) const;
  55 + int codepoint) const;
  56 +
  57 + void appendIgnotiumToResults(
  58 + const std::string& word,
  59 + int startNodeNum,
  60 + std::vector<MorphInterpretation>& results) const;
56 61  
57 62 FSAType* fsa;
58 63 CharsetConverter* charsetConverter;
... ...
morfeusz/MorphInterpretation.cpp
... ... @@ -39,6 +39,25 @@ MorphInterpretation::MorphInterpretation(
39 39  
40 40 }
41 41  
  42 +MorphInterpretation::MorphInterpretation(
  43 + int startNode,
  44 + const std::string& orth,
  45 + const Tagset& tagset)
  46 +: startNode(startNode),
  47 + endNode(startNode + 1),
  48 + orth(orth),
  49 + lemma(orth),
  50 + tagnum(0),
  51 + namenum(0),
  52 + tag(tagset.getTag(0)),
  53 + name(tagset.getName(0)) {
  54 +
  55 +}
  56 +
  57 +MorphInterpretation MorphInterpretation::createIgn(int startNode, const std::string& orth, const Tagset& tagset) {
  58 + return MorphInterpretation(startNode, orth, tagset);
  59 +}
  60 +
42 61 MorphInterpretation::~MorphInterpretation() {
43 62 }
44 63  
... ...
morfeusz/MorphInterpretation.hpp
... ... @@ -20,6 +20,7 @@ public:
20 20 const std::string& orth,
21 21 const EncodedInterpretation& encodedInterp,
22 22 const Tagset& tagset);
  23 + static MorphInterpretation createIgn(int startNode, const std::string& orth, const Tagset& tagset);
23 24 virtual ~MorphInterpretation();
24 25 int getStartNode() const;
25 26 int getEndNode() const;
... ... @@ -30,6 +31,10 @@ public:
30 31 const std::string& getTag() const;
31 32 const std::string& getName() const;
32 33 private:
  34 + MorphInterpretation(
  35 + int startNode,
  36 + const std::string& orth,
  37 + const Tagset& tagset);
33 38 int startNode;
34 39 int endNode;
35 40 std::string orth;
... ...
morfeusz/charset/CharsetConverter.cpp
... ... @@ -6,6 +6,10 @@
6 6  
7 7 using namespace std;
8 8  
  9 +uint32_t UTF8CharsetConverter::peek(const char*& it, const char* end) const {
  10 + return utf8::peek_next(it, end);
  11 +}
  12 +
9 13 uint32_t UTF8CharsetConverter::next(const char*& it, const char* end) const {
10 14 return utf8::next(it, end);
11 15 }
... ...
morfeusz/charset/CharsetConverter.hpp
... ... @@ -10,6 +10,7 @@
10 10  
11 11 class CharsetConverter {
12 12 public:
  13 + virtual uint32_t peek(const char*& it, const char* end) const = 0;
13 14 virtual uint32_t next(const char*& it, const char* end) const = 0;
14 15 virtual void append(uint32_t cp, std::vector<char>& result) const = 0;
15 16 private:
... ... @@ -17,6 +18,7 @@ private:
17 18  
18 19 class UTF8CharsetConverter: public CharsetConverter {
19 20 public:
  21 + uint32_t peek(const char*& it, const char* end) const;
20 22 uint32_t next(const char*& it, const char* end) const;
21 23 void append(uint32_t cp, std::vector<char>& result) const;
22 24 private:
... ... @@ -24,6 +26,7 @@ private:
24 26  
25 27 class UTF16CharsetConverter: public CharsetConverter {
26 28 public:
  29 + uint32_t peek(const char*& it, const char* end) const;
27 30 uint32_t next(const char*& it, const char* end) const;
28 31 void append(uint32_t cp, std::vector<char>& result) const;
29 32 private:
... ... @@ -31,6 +34,7 @@ private:
31 34  
32 35 class UTF32CharsetConverter: public CharsetConverter {
33 36 public:
  37 + uint32_t peek(const char*& it, const char* end) const;
34 38 uint32_t next(const char*& it, const char* end) const;
35 39 void append(uint32_t cp, std::vector<char>& result) const;
36 40 private:
... ... @@ -38,6 +42,7 @@ private:
38 42  
39 43 class ISO8859_2_CharsetConverter: public CharsetConverter {
40 44 public:
  45 + uint32_t peek(const char*& it, const char* end) const;
41 46 uint32_t next(const char*& it, const char* end) const;
42 47 void append(uint32_t cp, std::vector<char>& result) const;
43 48 private:
... ...
morfeusz/morfeusz.cpp deleted
morfeusz/morfeusz.hpp deleted
1   -
2   -
morfeusz/test_simple.cpp
... ... @@ -7,6 +7,7 @@
7 7  
8 8 #include <cstdlib>
9 9  
  10 +#include "utils.hpp"
10 11 #include "Morfeusz.hpp"
11 12 #include "MorphInterpretation.hpp"
12 13  
... ... @@ -16,11 +17,11 @@ using namespace std;
16 17 *
17 18 */
18 19 int main(int argc, char** argv) {
19   - Morfeusz morfeusz(argv[1]);
  20 + Morfeusz morfeusz("/tmp/test-SIMPLE-PoliMorfSmall.tab.fsa");
20 21 vector<MorphInterpretation> res;
21   - string word = "mijałem";
22   - const char* ptr = word.c_str();
23   - morfeusz.processOneWord(ptr, word.c_str() + word.size(), 0, res);
  22 + string word = " mijałem fasdfasd abdominalności ";
  23 + morfeusz.analyze(word, res);
  24 + DEBUG("znaleziono "+to_string(res.size()));
24 25 for (MorphInterpretation& mi: res) {
25 26 cerr << mi.getStartNode() << " " << mi.getEndNode() << " " << mi.getLemma() << " " << mi.getTag() << " " << mi.getName() << endl;
26 27 }
... ...
nbproject/configurations.xml
... ... @@ -17,9 +17,9 @@
17 17 <in>MorphInterpretation.cpp</in>
18 18 <in>Tagset.cpp</in>
19 19 <in>main.cpp</in>
20   - <in>morfeusz.cpp</in>
21 20 <in>test_morfeusz.cpp</in>
22 21 <in>test_morph.cpp</in>
  22 + <in>test_simple.cpp</in>
23 23 </df>
24 24 <logicalFolder name="ExternalFiles"
25 25 displayName="Important Files"
... ... @@ -49,7 +49,7 @@
49 49 <buildCommandWorkingDir>build</buildCommandWorkingDir>
50 50 <buildCommand>${MAKE} -f Makefile</buildCommand>
51 51 <cleanCommand>${MAKE} -f Makefile clean</cleanCommand>
52   - <executablePath>build/fsa/test_dict</executablePath>
  52 + <executablePath>build/morfeusz/test_simple</executablePath>
53 53 </makeTool>
54 54 </makefileType>
55 55 <folder path="1">
... ... @@ -120,10 +120,6 @@
120 120 <ccTool>
121 121 </ccTool>
122 122 </item>
123   - <item path="morfeusz/morfeusz.cpp" ex="false" tool="1" flavor2="4">
124   - <ccTool>
125   - </ccTool>
126   - </item>
127 123 <item path="morfeusz/test_morfeusz.cpp" ex="false" tool="1" flavor2="4">
128 124 <ccTool>
129 125 </ccTool>
... ... @@ -132,12 +128,8 @@
132 128 <ccTool>
133 129 </ccTool>
134 130 </item>
135   - <item path="morfeusz/test_simple.cpp" ex="false" tool="1" flavor2="0">
  131 + <item path="morfeusz/test_simple.cpp" ex="false" tool="1" flavor2="8">
136 132 <ccTool>
137   - <incDir>
138   - <pElem>fsa</pElem>
139   - <pElem>build/morfeusz</pElem>
140   - </incDir>
141 133 </ccTool>
142 134 </item>
143 135 </conf>
... ...
testfiles/PoliMorfSmall.tab
... ... @@ -579,3 +579,43 @@ abdominoplastyki abdominoplastyka subst:pl:voc:f pospolita
579 579 abdominoplastyki abdominoplastyka subst:sg:gen:f pospolita
580 580 abdominoplastyko abdominoplastyka subst:sg:voc:f pospolita
581 581 abdominoplastykom abdominoplastyka subst:pl:dat:f pospolita
  582 +mijał mijać praet:sg:m1.m2.m3:imperf pospolita
  583 +mijała mijać praet:sg:f:imperf pospolita
  584 +mijało mijać praet:sg:n1.n2:imperf pospolita
  585 +mijały mijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  586 +omijał omijać praet:sg:m1.m2.m3:imperf pospolita
  587 +omijała omijać praet:sg:f:imperf pospolita
  588 +omijało omijać praet:sg:n1.n2:imperf pospolita
  589 +omijały omijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  590 +pomijał pomijać praet:sg:m1.m2.m3:imperf pospolita
  591 +pomijała pomijać praet:sg:f:imperf pospolita
  592 +pomijało pomijać praet:sg:n1.n2:imperf pospolita
  593 +pomijały pomijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  594 +powymijał powymijać praet:sg:m1.m2.m3:perf pospolita
  595 +powymijała powymijać praet:sg:f:perf pospolita
  596 +powymijało powymijać praet:sg:n1.n2:perf pospolita
  597 +powymijały powymijać praet:pl:m2.m3.f.n1.n2.p2.p3:perf pospolita
  598 +przemijał przemijać praet:sg:m1.m2.m3:imperf pospolita
  599 +przemijała przemijać praet:sg:f:imperf pospolita
  600 +przemijało przemijać praet:sg:n1.n2:imperf pospolita
  601 +przemijały przemijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  602 +rozmijał rozmijać praet:sg:m1.m2.m3:imperf pospolita
  603 +rozmijała rozmijać praet:sg:f:imperf pospolita
  604 +rozmijało rozmijać praet:sg:n1.n2:imperf pospolita
  605 +rozmijały rozmijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  606 +wymijał wymijać praet:sg:m1.m2.m3:imperf pospolita
  607 +wymijała wymijać praet:sg:f:imperf pospolita
  608 +wymijało wymijać praet:sg:n1.n2:imperf pospolita
  609 +wymijały wymijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  610 +zmijał zmijać praet:sg:m1.m2.m3:imperf pospolita
  611 +zmijała zmijać praet:sg:f:imperf pospolita
  612 +zmijało zmijać praet:sg:n1.n2:imperf pospolita
  613 +zmijały zmijać praet:pl:m2.m3.f.n1.n2.p2.p3:imperf pospolita
  614 +em być aglt:sg:pri:imperf:wok pospolita
  615 +eś być aglt:sg:sec:imperf:wok pospolita
  616 +eście być aglt:pl:sec:imperf:wok pospolita
  617 +eśmy być aglt:pl:pri:imperf:wok pospolita
  618 +m być aglt:sg:pri:imperf:nwok pospolita
  619 +ś być aglt:sg:sec:imperf:nwok pospolita
  620 +ście być aglt:pl:sec:imperf:nwok pospolita
  621 +śmy być aglt:pl:pri:imperf:nwok pospolita
... ...
testfiles/polimorf.tagset
... ... @@ -2,7 +2,7 @@
2 2  
3 3 [TAGS]
4 4  
5   -0 adj:pl:acc:m1.p1:com
  5 +0 ign
6 6 1 adj:pl:acc:m1.p1:pos
7 7 2 adj:pl:acc:m1.p1:sup
8 8 3 adj:pl:acc:m2.m3.f.n1.n2.p2.p3:com
... ... @@ -576,6 +576,7 @@
576 576 571 winien:sg:f:imperf
577 577 572 winien:sg:m1.m2.m3:imperf
578 578 573 winien:sg:n1.n2:imperf
  579 +574 adj:pl:acc:m1.p1:com
579 580  
580 581 [NAMES]
581 582  
... ...