|
1
|
|
|
2
|
#include <string>
|
|
3
|
#include <cassert>
|
|
4
|
#include <climits>
|
|
5
|
#include <vector>
|
|
6
|
#include <iostream>
|
|
7
|
#include "utils.hpp"
|
|
8
|
#include "InflexionGraph.hpp"
|
|
9
|
|
|
10
11
|
using namespace std;
|
|
12
13
|
namespace morfeusz {
|
|
14
|
void InflexionGraph::addStartEdge(const Edge& e) {
|
|
15
|
if (this->graph.empty()) {
|
|
16
|
assert(this->node2ChunkStartPtr.empty());
|
|
17
|
this->graph.push_back(vector<Edge>());
|
|
18
|
this->node2ChunkStartPtr.push_back(e.chunk.textStartPtr);
|
|
19
|
}
|
|
20
|
assert(this->node2ChunkStartPtr[0] == e.chunk.textStartPtr);
|
|
21
22
|
this->graph[0].push_back(e);
}
|
|
23
|
|
|
24
|
void InflexionGraph::addMiddleEdge(unsigned int startNode, const Edge& e) {
|
|
25
26
27
28
|
assert(startNode < e.nextNode);
assert(startNode == this->graph.size());
if (startNode == this->graph.size()) {
this->graph.push_back(vector<Edge>());
|
|
29
|
this->node2ChunkStartPtr.push_back(e.chunk.textStartPtr);
|
|
30
31
|
}
this->graph[startNode].push_back(e);
|
|
32
33
|
}
|
|
34
|
static inline bool chunkIsAtFront(
|
|
35
|
const InterpretedChunk& chunk,
|
|
36
37
38
39
40
41
42
43
44
|
const std::vector<InterpretedChunk>& path) {
unsigned int i;
for (i = 0; i < path.size() - 1 && path[i].orthWasShifted; i++) {
}
assert(!path[i].orthWasShifted);
return &chunk == &(path[i]);
}
static inline bool chunkIsAtBack(
|
|
45
|
const InterpretedChunk& chunk,
|
|
46
47
48
49
50
|
const std::vector<InterpretedChunk>& path) {
return &chunk == &(path.back());
}
static inline bool chunkIsTheOnlyOne(
|
|
51
|
const InterpretedChunk& chunk,
|
|
52
53
54
55
|
const std::vector<InterpretedChunk>& path) {
return chunkIsAtFront(chunk, path) && chunkIsAtBack(chunk, path);
}
|
|
56
|
void InflexionGraph::addPath(const std::vector<InterpretedChunk>& path, bool weak) {
|
|
57
|
|
|
58
59
60
61
|
if (weak && !this->empty() && !this->onlyWeakPaths) {
return;
}
else if (this->onlyWeakPaths && !weak) {
|
|
62
63
|
this->graph.resize(0);
this->node2ChunkStartPtr.resize(0);
|
|
64
65
|
this->onlyWeakPaths = false;
}
|
|
66
67
|
for (unsigned int i = 0; i < path.size(); i++) {
const InterpretedChunk& chunk = path[i];
|
|
68
|
if (!chunk.orthWasShifted) {
|
|
69
|
if (chunkIsTheOnlyOne(chunk, path)) {
|
|
70
71
|
Edge e = {chunk, UINT_MAX};
this->addStartEdge(e);
|
|
72
|
}
|
|
73
|
else if (chunkIsAtFront(chunk, path)) {
|
|
74
75
|
Edge e = {chunk, this->graph.empty() ? 1 : (unsigned int) this->graph.size()};
this->addStartEdge(e);
|
|
76
|
}
|
|
77
|
else if (chunkIsAtBack(chunk, path)) {
|
|
78
79
|
Edge e = {chunk, UINT_MAX};
this->addMiddleEdge((unsigned int) this->graph.size(), e);
|
|
80
81
|
}
else {
|
|
82
|
Edge e = {chunk, (unsigned int) this->graph.size() + 1};
|
|
83
84
|
this->addMiddleEdge((unsigned int) this->graph.size(), e);
}
|
|
85
86
87
88
|
}
}
}
|
|
89
|
bool InflexionGraph::canMergeNodes(unsigned int node1, unsigned int node2) {
|
|
90
|
return this->node2ChunkStartPtr[node1] == this->node2ChunkStartPtr[node2]
|
|
91
92
93
|
&& this->getPossiblePaths(node1) == this->getPossiblePaths(node2);
}
|
|
94
|
set<InflexionGraph::Path> InflexionGraph::getPossiblePaths(unsigned int node) {
|
|
95
|
if (node == UINT_MAX || node == this->graph.size() - 1) {
|
|
96
|
return set<InflexionGraph::Path>();
|
|
97
98
|
}
else {
|
|
99
|
set<InflexionGraph::Path> res;
|
|
100
101
102
|
vector<Edge>& edges = this->graph.at(node);
for (unsigned int i = 0; i < edges.size(); i++) {
Edge& e = edges[i];
|
|
103
|
InflexionGraph::PathElement pathElem(e.chunk.textStartPtr, e.chunk.segmentType);
|
|
104
105
106
107
108
109
110
111
|
if (e.nextNode != this->graph.size()) {
set<Path> possiblePaths = this->getPossiblePaths(e.nextNode);
vector<Path> nextPaths(possiblePaths.begin(), possiblePaths.end());
vector<Path>::iterator it;
for (it = nextPaths.begin(); it != nextPaths.end(); ++it) {
(*it).insert(pathElem);
}
res.insert(nextPaths.begin(), nextPaths.end());
|
|
112
113
114
115
116
117
|
}
}
return res;
}
}
|
|
118
|
static bool containsEqualEdge(const vector<InflexionGraph::Edge>& edges, const InflexionGraph::Edge& e) {
|
|
119
|
for (unsigned int i = 0; i < edges.size(); i++) {
|
|
120
|
const InflexionGraph::Edge& e1 = edges[i];
|
|
121
|
if (e1.chunk.textStartPtr == e.chunk.textStartPtr
|
|
122
123
|
&& e1.chunk.textStartPtr == e.chunk.textStartPtr
&& e1.chunk.textEndPtr == e.chunk.textEndPtr
|
|
124
|
&& e1.chunk.segmentType == e.chunk.segmentType
|
|
125
126
127
128
129
|
&& e1.nextNode == e.nextNode) {
return true;
}
}
return false;
|
|
130
131
|
}
|
|
132
|
void InflexionGraph::redirectEdges(unsigned int fromNode, unsigned int toNode) {
|
|
133
134
|
for (unsigned int node = 0; node < fromNode; node++) {
vector<Edge>& edges = this->graph[node];
|
|
135
|
vector<Edge>::iterator edgesIt = edges.begin();
|
|
136
|
while (edgesIt != edges.end()) {
|
|
137
138
139
|
Edge& oldEdge = *edgesIt;
if (oldEdge.nextNode == fromNode) {
Edge newEdge = {oldEdge.chunk, toNode};
|
|
140
|
if (!containsEqualEdge(edges, newEdge)) {
|
|
141
142
143
|
// if newEdge is not in edges, redirect edgeEdge
// so it becomes newEdge
oldEdge.nextNode = toNode;
|
|
144
145
|
}
else {
|
|
146
147
|
// if newEdge is already there, just remove old edge
edges.erase(edgesIt);
|
|
148
|
}
|
|
149
150
|
}
else {
|
|
151
|
++edgesIt;
|
|
152
|
}
|
|
153
154
155
|
}
}
}
|
|
156
|
|
|
157
|
void InflexionGraph::doRemoveNode(unsigned int node) {
|
|
158
159
160
|
for (unsigned int i = node + 1; i < this->graph.size(); i++) {
redirectEdges(i, i - 1);
this->graph[i - 1] = this->graph[i];
|
|
161
|
this->node2ChunkStartPtr[i - 1] = this->node2ChunkStartPtr[i];
|
|
162
163
|
}
this->graph.pop_back();
|
|
164
|
this->node2ChunkStartPtr.pop_back();
|
|
165
166
|
}
|
|
167
|
void InflexionGraph::doMergeNodes(unsigned int node1, unsigned int node2) {
|
|
168
169
|
if (node1 > node2) {
doMergeNodes(node2, node1);
|
|
170
171
|
}
else {
|
|
172
|
// node1 < node2
|
|
173
174
|
for (unsigned int i = 0; i < this->graph[node2].size(); i++) {
Edge& e = this->graph[node2][i];
|
|
175
176
177
178
|
if (!containsEqualEdge(graph[node1], e)) {
this->graph[node1].push_back(e);
}
}
|
|
179
180
|
// DEBUG("1");
// debugGraph(this->graph);
|
|
181
|
this->redirectEdges(node2, node1);
|
|
182
183
|
// DEBUG("2");
// debugGraph(this->graph);
|
|
184
|
this->doRemoveNode(node2);
|
|
185
186
|
// DEBUG("3");
// debugGraph(this->graph);
|
|
187
188
189
|
}
}
|
|
190
|
bool InflexionGraph::tryToMergeTwoNodes() {
|
|
191
|
for (unsigned int node1 = 0; node1 < this->graph.size(); node1++) {
|
|
192
|
for (unsigned int node2 = (unsigned int) this->graph.size() - 1; node2 > node1; node2--) {
|
|
193
194
195
196
197
198
199
200
201
|
if (this->canMergeNodes(node1, node2)) {
this->doMergeNodes(node1, node2);
return true;
}
}
}
return false;
}
|
|
202
|
void InflexionGraph::minimizeGraph() {
|
|
203
|
if (this->graph.size() > 2) {
|
|
204
|
// debugGraph(this->graph);
|
|
205
|
while (this->tryToMergeTwoNodes()) {
|
|
206
|
// debugGraph(this->graph);
|
|
207
208
209
210
|
}
}
}
|
|
211
|
bool InflexionGraph::empty() const {
|
|
212
213
|
return this->graph.empty();
}
|
|
214
|
|
|
215
|
void InflexionGraph::repairLastNodeNumbers() {
|
|
216
217
218
219
|
for (unsigned int i = 0; i < this->graph.size(); i++) {
vector<Edge>& edges = this->graph[i];
for (unsigned int j = 0; j < edges.size(); j++) {
Edge& e = edges[j];
|
|
220
|
if (e.nextNode == UINT_MAX) {
|
|
221
|
e.nextNode = (unsigned int) this->graph.size();
|
|
222
223
224
225
|
}
}
}
}
|
|
226
|
|
|
227
228
229
230
231
232
|
const vector< vector<InflexionGraph::Edge> >& InflexionGraph::getTheGraph() {
minimizeGraph();
repairLastNodeNumbers();
return this->graph;
}
|
|
233
|
void InflexionGraph::clear() {
|
|
234
235
|
graph.resize(0);
node2ChunkStartPtr.resize(0);
|
|
236
|
onlyWeakPaths = true;
|
|
237
|
}
|
|
238
239
|
}
|