#include <string> #include <cassert> #include <climits> #include <vector> #include <iostream> #include <algorithm> #include "utils.hpp" #include "InflexionGraph.hpp" using namespace std; namespace morfeusz { void InflexionGraph::addStartEdge(const Edge& e) { if (this->graph.empty()) { assert(this->node2ChunkStartPtr.empty()); this->graph.push_back(vector<Edge>()); this->node2ChunkStartPtr.push_back(e.chunk.textStartPtr); } assert(this->node2ChunkStartPtr[0] == e.chunk.textStartPtr); this->graph[0].push_back(e); } void InflexionGraph::addMiddleEdge(unsigned int startNode, const Edge& e) { assert(startNode < e.nextNode); assert(startNode == this->graph.size()); if (startNode == this->graph.size()) { this->graph.push_back(vector<Edge>()); this->node2ChunkStartPtr.push_back(e.chunk.textStartPtr); } this->graph[startNode].push_back(e); } static inline bool chunkIsAtFront( const InterpretedChunk& chunk, const std::vector<InterpretedChunk>& path) { unsigned int i; for (i = 0; i < path.size() - 1 && path[i].orthWasShifted; i++) { } assert(!path[i].orthWasShifted); return &chunk == &(path[i]); } static inline bool chunkIsAtBack( const InterpretedChunk& chunk, const std::vector<InterpretedChunk>& path) { return &chunk == &(path.back()); } static inline bool chunkIsTheOnlyOne( const InterpretedChunk& chunk, const std::vector<InterpretedChunk>& path) { return chunkIsAtFront(chunk, path) && chunkIsAtBack(chunk, path); } void InflexionGraph::addPath(const std::vector<InterpretedChunk>& path, bool weak) { if (weak && !this->empty() && !this->onlyWeakPaths) { return; } else if (this->onlyWeakPaths && !weak) { this->graph.resize(0); this->node2ChunkStartPtr.resize(0); this->onlyWeakPaths = false; } for (unsigned int i = 0; i < path.size(); i++) { const InterpretedChunk& chunk = path[i]; if (!chunk.orthWasShifted) { if (chunkIsTheOnlyOne(chunk, path)) { Edge e = {chunk, UINT_MAX}; this->addStartEdge(e); } else if (chunkIsAtFront(chunk, path)) { Edge e = {chunk, this->graph.empty() ? 1 : (unsigned int) this->graph.size()}; this->addStartEdge(e); } else if (chunkIsAtBack(chunk, path)) { Edge e = {chunk, UINT_MAX}; this->addMiddleEdge((unsigned int) this->graph.size(), e); } else { Edge e = {chunk, (unsigned int) this->graph.size() + 1}; this->addMiddleEdge((unsigned int) this->graph.size(), e); } } } } bool InflexionGraph::canMergeNodes(unsigned int node1, unsigned int node2) { return this->node2ChunkStartPtr[node1] == this->node2ChunkStartPtr[node2] && this->getPossiblePaths(node1) == this->getPossiblePaths(node2); } set<InflexionGraph::Path> InflexionGraph::getPossiblePaths(unsigned int node) { if (node == UINT_MAX || node == this->graph.size() - 1) { set<InflexionGraph::Path> res; return res; } else { set<InflexionGraph::Path> res; vector<Edge>& edges = this->graph.at(node); for (unsigned int i = 0; i < edges.size(); i++) { Edge& e = edges[i]; InflexionGraph::PathElement pathElem(e.chunk.textStartPtr, e.chunk.segmentType); if (e.nextNode != this->graph.size()) { set<Path> possiblePaths = this->getPossiblePaths(e.nextNode); vector<Path> nextPaths(possiblePaths.begin(), possiblePaths.end()); vector<Path>::iterator it; for (it = nextPaths.begin(); it != nextPaths.end(); ++it) { (*it).insert(pathElem); } res.insert(nextPaths.begin(), nextPaths.end()); } } return res; } } static bool containsEqualEdge(const vector<InflexionGraph::Edge>& edges, const InflexionGraph::Edge& e) { for (unsigned int i = 0; i < edges.size(); i++) { const InflexionGraph::Edge& e1 = edges[i]; if (e1.chunk.textStartPtr == e.chunk.textStartPtr && e1.chunk.textNoPrefixesStartPtr == e.chunk.textNoPrefixesStartPtr && e1.chunk.textEndPtr == e.chunk.textEndPtr && e1.chunk.segmentType == e.chunk.segmentType && e1.nextNode == e.nextNode && e1.chunk.interpsGroupPtr == e.chunk.interpsGroupPtr) { return true; } } return false; } void InflexionGraph::redirectEdges(unsigned int fromNode, unsigned int toNode) { for (unsigned int node = 0; node < fromNode; node++) { vector<Edge>& edges = this->graph[node]; vector<Edge>::iterator edgesIt = edges.begin(); while (edgesIt != edges.end()) { Edge& oldEdge = *edgesIt; if (oldEdge.nextNode == fromNode) { Edge newEdge = {oldEdge.chunk, toNode}; if (!containsEqualEdge(edges, newEdge)) { // if newEdge is not in edges, redirect oldEdge // so it becomes newEdge oldEdge.nextNode = toNode; } else { // if newEdge is already there, just remove old edge edges.erase(edgesIt); } } else { ++edgesIt; } } } } void InflexionGraph::doRemoveNode(unsigned int node) { for (unsigned int i = node + 1; i < this->graph.size(); i++) { redirectEdges(i, i - 1); this->graph[i - 1] = this->graph[i]; this->node2ChunkStartPtr[i - 1] = this->node2ChunkStartPtr[i]; } this->graph.pop_back(); this->node2ChunkStartPtr.pop_back(); } void InflexionGraph::doMergeNodes(unsigned int node1, unsigned int node2) { if (node1 > node2) { doMergeNodes(node2, node1); } else { // node1 < node2 for (unsigned int i = 0; i < this->graph[node2].size(); i++) { Edge& e = this->graph[node2][i]; if (!containsEqualEdge(graph[node1], e)) { this->graph[node1].push_back(e); } } this->redirectEdges(node2, node1); this->doRemoveNode(node2); } } bool InflexionGraph::tryToMergeTwoNodes() { for (unsigned int node1 = 0; node1 < this->graph.size(); node1++) { for (unsigned int node2 = (unsigned int) this->graph.size() - 1; node2 > node1; node2--) { if (this->canMergeNodes(node1, node2)) { this->doMergeNodes(node1, node2); return true; } } } return false; } void InflexionGraph::minimizeGraph() { if (this->graph.size() > 2) { // debugGraph(this->graph); while (this->tryToMergeTwoNodes()) { //debugGraph(this->graph); } } } bool InflexionGraph::empty() const { return this->graph.empty(); } void InflexionGraph::repairLastNodeNumbers() { for (unsigned int i = 0; i < this->graph.size(); i++) { vector<Edge>& edges = this->graph[i]; for (unsigned int j = 0; j < edges.size(); j++) { Edge& e = edges[j]; if (e.nextNode == UINT_MAX) { e.nextNode = (unsigned int) this->graph.size(); } } } } static vector<unsigned int> createIdentityNodesMap(long unsigned int n) { vector<unsigned int> res(n); for (unsigned int i = 0; i < n; i++) { res[i] = i; } return res; } class TopologicalComparator { public: TopologicalComparator(vector< const char* > node2ChunkStartPtr) : node2ChunkStartPtr(node2ChunkStartPtr) {} bool operator()(unsigned int i, unsigned int j) { return node2ChunkStartPtr[i] < node2ChunkStartPtr[j]; } private: vector< const char* > node2ChunkStartPtr; }; // XXX this is a bit dirty // fixes problem with "radem," (incorrect node numbers when inflexion graph is NOT a tree) // and "Z rozdrażnienieniem:)" void InflexionGraph::sortNodeNumbersTopologically() { vector<unsigned int> nodesTopologicallySorted(createIdentityNodesMap(this->graph.size())); TopologicalComparator comparator(this->node2ChunkStartPtr); sort(nodesTopologicallySorted.begin(), nodesTopologicallySorted.end(), comparator); vector<unsigned int> oldNode2NewNode(nodesTopologicallySorted.size()); for (unsigned int newNode = 0; newNode < nodesTopologicallySorted.size(); newNode++) { unsigned int oldNode = nodesTopologicallySorted[newNode]; oldNode2NewNode[oldNode] = newNode; } // swap pointers in edges for (unsigned int node = 0; node < this->graph.size(); node++) { for (unsigned int edgeIdx = 0; edgeIdx < this->graph[node].size(); edgeIdx++) { InflexionGraph::Edge& edge = this->graph[node][edgeIdx]; if (edge.nextNode < this->graph.size()) { // don't change UINT_MAX nodes (outside current graph) edge.nextNode = oldNode2NewNode[edge.nextNode]; } } } // swap nodes vector< vector<Edge> > graphCopy(this->graph); vector< const char* > node2ChunkStartPtrCopy(this->node2ChunkStartPtr); for (unsigned int oldNode = 0; oldNode < this->graph.size(); oldNode++) { unsigned int newNode = oldNode2NewNode[oldNode]; this->graph[newNode] = graphCopy[oldNode]; this->node2ChunkStartPtr[newNode] = node2ChunkStartPtrCopy[oldNode]; } } const vector< vector<InflexionGraph::Edge> >& InflexionGraph::getTheGraph() { minimizeGraph(); if (this->graph.size() > 2) { sortNodeNumbersTopologically(); } repairLastNodeNumbers(); return this->graph; } void InflexionGraph::clear() { graph.resize(0); node2ChunkStartPtr.resize(0); onlyWeakPaths = true; } }