InterpretedChunk.hpp 1.94 KB
/* 
 * File:   InterpretedChunk.hpp
 * Author: mlenart
 *
 * Created on 18 listopad 2013, 15:00
 */

#ifndef INTERPRETEDCHUNK_HPP
#define	INTERPRETEDCHUNK_HPP

#include <vector>
#include "InterpsGroup.hpp"

namespace morfeusz {

/**
 * Denotes a part of text that has some not-yet-deserialized interpretations attached to it.
 */
struct InterpretedChunk {
    
    /**
     * The type of segment for this chunk.
     */
    unsigned char segmentType;
    
    /**
     * Pointer to start of this chunks text
     */
    const char* textStartPtr;
    
    /**
     * Pointer to end of this chunks text (exclusive)
     */
    const char* textEndPtr;
    
    /**
     * Pointer to start of this chunks text (exclusive)
     * including preceding whitespaces if whitespace-handling set to APPEND
     */
    const char* chunkStartPtr;
    
    /**
     * Pointer to end of this chunks text (exclusive)
     * including following whitespaces if whitespace-handling set to APPEND
     */
    const char* chunkEndPtr;
    
    /**
     * Pointer to the start of this chunks binary data.
     */
    const unsigned char* interpsGroupPtr;
    
    /**
     * Pointer to the end of this chunks binary data (exclusive)
     */
    const unsigned char* interpsEndPtr;
    
    /**
     * true iff this chunk shifts orth to the one right to it (it is "A" in "A> B")
     */
    bool shiftOrth;
    
    /**
     * true iff this chunk has attached data from its prefix chunk (when it is "B" segment in "A> B" segmentation rule)
     */
    bool orthWasShifted;
    
    /**
     * Number of codepoints this chunks consists of.
     */
    int codepointsNum;
    
    /**
     * Chunks that are in the prefix segments (those with ">" in segmentation rules, ie. "dig>* dig")
     */
    std::vector<InterpretedChunk> prefixChunks;
    
    /**
     * Homonym id specified by the user.
     */
    std::string requiredHomonymId;
    
    bool forceIgnoreCase;
};

}

#endif	/* INTERPRETEDCHUNK_HPP */