|
1
|
/*
|
|
2
|
* File: TextReader.hpp
|
|
3
4
5
6
7
8
9
10
|
* Author: lennyn
*
* Created on May 28, 2014, 11:43 AM
*/
#ifndef TEXTREADER_HPP
#define TEXTREADER_HPP
|
|
11
|
#include <string>
|
|
12
|
#include "Environment.hpp"
|
|
13
|
|
|
14
15
|
namespace morfeusz {
|
|
16
17
18
|
class TextReader {
public:
TextReader(const char* inputStart, const char* inputEnd, const Environment& env);
|
|
19
|
TextReader(const std::string& text, const Environment& env);
|
|
20
|
void markChunkStartsHere();
|
|
21
22
|
void markWordStartsHere();
const char* getWordStartPtr() const;
|
|
23
|
const char* getChunkStartPtr() const;
|
|
24
25
26
27
28
29
|
const char* getCurrPtr() const;
const char* getNextPtr();
const char* getEndPtr() const;
int getCodepointsRead() const;
bool isAtEnd() const;
bool isAtWhitespace();
|
|
30
|
bool isInsideAWord();
|
|
31
32
33
34
|
uint32_t peek();
uint32_t normalizedPeek();
uint32_t next();
void skipWhitespaces();
|
|
35
|
std::string readWhitespacesChunk();
|
|
36
37
38
39
|
void proceedToEnd();
virtual ~TextReader();
private:
int codepointsNum;
|
|
40
|
const char* chunkStartPtr;
|
|
41
42
43
44
45
46
47
48
49
50
51
52
|
const char* wordStartPtr;
const char* currPtr;
const char* inputEnd;
const Environment& env;
bool knowsAboutWhitespace;
bool atWhitespace;
bool peekIsRead;
uint32_t thePeek;
uint32_t theNormalizedPeek;
const char* ptrAfterThePeek;
};
|
|
53
54
|
}
|
|
55
56
|
#endif /* TEXTREADER_HPP */
|