|
1
2
3
4
5
6
7
8
9
10
|
/*
* File: EncodingConverter.hpp
* Author: mlenart
*
* Created on 14 listopad 2013, 17:28
*/
#ifndef ENCODINGCONVERTER_HPP
#define ENCODINGCONVERTER_HPP
|
|
11
|
#include <inttypes.h>
|
|
12
|
#include <string>
|
|
13
14
|
#include <vector>
#include <map>
|
|
15
|
|
|
16
17
|
namespace morfeusz {
|
|
18
19
|
class CharsetConverter {
public:
|
|
20
|
// uint32_t peek(const char* it, const char* end) const;
|
|
21
|
virtual uint32_t next(const char*& it, const char* end) const = 0;
|
|
22
|
virtual void append(uint32_t cp, std::string& result) const = 0;
|
|
23
|
virtual std::string fromUTF8(const std::string& input) const;
|
|
24
|
|
|
25
|
std::string toString(const std::vector<uint32_t>& codepoints) const;
|
|
26
|
|
|
27
|
virtual ~CharsetConverter();
|
|
28
29
30
|
private:
};
|
|
31
|
class UTF8CharsetConverter : public CharsetConverter {
|
|
32
|
public:
|
|
33
34
35
|
static const UTF8CharsetConverter& getInstance();
|
|
36
|
uint32_t next(const char*& it, const char* end) const;
|
|
37
|
void append(uint32_t cp, std::string& result) const;
|
|
38
|
std::string fromUTF8(const std::string& input) const;
|
|
39
|
private:
|
|
40
41
42
|
UTF8CharsetConverter();
UTF8CharsetConverter(const UTF8CharsetConverter&); // do not implement
void operator=(const UTF8CharsetConverter&); // do not implement
|
|
43
44
|
};
|
|
45
46
47
|
/*
* Converter that uses a simple conversion table
*/
|
|
48
|
class OneByteCharsetConverter : public CharsetConverter {
|
|
49
|
public:
|
|
50
|
explicit OneByteCharsetConverter(const uint32_t* array);
|
|
51
|
uint32_t next(const char*& it, const char* end) const;
|
|
52
|
void append(uint32_t cp, std::string& result) const;
|
|
53
|
private:
|
|
54
55
56
57
|
const uint32_t* array;
const std::vector<char> codepoint2Char;
};
|
|
58
|
class ISO8859_2_CharsetConverter : public OneByteCharsetConverter {
|
|
59
|
public:
|
|
60
|
static const ISO8859_2_CharsetConverter& getInstance();
|
|
61
|
private:
|
|
62
63
64
|
ISO8859_2_CharsetConverter();
ISO8859_2_CharsetConverter(const ISO8859_2_CharsetConverter&); // do not implement
void operator=(const ISO8859_2_CharsetConverter&); // do not implement
|
|
65
66
|
};
|
|
67
|
class Windows_1250_CharsetConverter : public OneByteCharsetConverter {
|
|
68
|
public:
|
|
69
|
static const Windows_1250_CharsetConverter& getInstance();
|
|
70
|
private:
|
|
71
72
73
|
Windows_1250_CharsetConverter();
Windows_1250_CharsetConverter(const Windows_1250_CharsetConverter&); // do not implement
void operator=(const Windows_1250_CharsetConverter&); // do not implement
|
|
74
75
|
};
|
|
76
77
|
class CP852_CharsetConverter : public OneByteCharsetConverter {
public:
|
|
78
|
static const CP852_CharsetConverter& getInstance();
|
|
79
|
private:
|
|
80
81
82
|
CP852_CharsetConverter();
CP852_CharsetConverter(const CP852_CharsetConverter&); // do not implement
void operator=(const CP852_CharsetConverter&); // do not implement
|
|
83
84
|
};
|
|
85
86
|
}
|
|
87
88
|
#endif /* ENCODINGCONVERTER_HPP */
|