|
1
2
3
4
5
6
7
8
9
10
|
/*
* File: EncodingConverter.hpp
* Author: mlenart
*
* Created on 14 listopad 2013, 17:28
*/
#ifndef ENCODINGCONVERTER_HPP
#define ENCODINGCONVERTER_HPP
|
|
11
|
#include <inttypes.h>
|
|
12
|
#include <string>
|
|
13
14
|
#include <vector>
#include <map>
|
|
15
|
|
|
16
17
|
namespace morfeusz {
|
|
18
19
|
class CharsetConverter {
public:
|
|
20
|
// uint32_t peek(const char* it, const char* end) const;
|
|
21
|
virtual uint32_t next(const char*& it, const char* end) const = 0;
|
|
22
|
virtual void append(uint32_t cp, std::string& result) const = 0;
|
|
23
|
virtual std::string fromUTF8(const std::string& input) const;
|
|
24
|
virtual std::string toUTF8(const std::string& input) const;
|
|
25
|
|
|
26
|
std::string toString(const std::vector<uint32_t>& codepoints) const;
|
|
27
|
|
|
28
|
virtual ~CharsetConverter();
|
|
29
30
31
|
private:
};
|
|
32
|
class UTF8CharsetConverter : public CharsetConverter {
|
|
33
|
public:
|
|
34
35
36
|
static const UTF8CharsetConverter& getInstance();
|
|
37
|
uint32_t next(const char*& it, const char* end) const;
|
|
38
|
void append(uint32_t cp, std::string& result) const;
|
|
39
|
std::string fromUTF8(const std::string& input) const;
|
|
40
|
std::string toUTF8(const std::string& input) const;
|
|
41
|
private:
|
|
42
43
44
|
UTF8CharsetConverter();
UTF8CharsetConverter(const UTF8CharsetConverter&); // do not implement
void operator=(const UTF8CharsetConverter&); // do not implement
|
|
45
46
|
};
|
|
47
48
49
|
/*
* Converter that uses a simple conversion table
*/
|
|
50
|
class OneByteCharsetConverter : public CharsetConverter {
|
|
51
|
public:
|
|
52
|
explicit OneByteCharsetConverter(const uint32_t* array);
|
|
53
|
uint32_t next(const char*& it, const char* end) const;
|
|
54
|
void append(uint32_t cp, std::string& result) const;
|
|
55
|
private:
|
|
56
57
58
59
|
const uint32_t* array;
const std::vector<char> codepoint2Char;
};
|
|
60
|
class ISO8859_2_CharsetConverter : public OneByteCharsetConverter {
|
|
61
|
public:
|
|
62
|
static const ISO8859_2_CharsetConverter& getInstance();
|
|
63
|
private:
|
|
64
65
66
|
ISO8859_2_CharsetConverter();
ISO8859_2_CharsetConverter(const ISO8859_2_CharsetConverter&); // do not implement
void operator=(const ISO8859_2_CharsetConverter&); // do not implement
|
|
67
68
|
};
|
|
69
|
class Windows_1250_CharsetConverter : public OneByteCharsetConverter {
|
|
70
|
public:
|
|
71
|
static const Windows_1250_CharsetConverter& getInstance();
|
|
72
|
private:
|
|
73
74
75
|
Windows_1250_CharsetConverter();
Windows_1250_CharsetConverter(const Windows_1250_CharsetConverter&); // do not implement
void operator=(const Windows_1250_CharsetConverter&); // do not implement
|
|
76
77
|
};
|
|
78
79
|
class CP852_CharsetConverter : public OneByteCharsetConverter {
public:
|
|
80
|
static const CP852_CharsetConverter& getInstance();
|
|
81
|
private:
|
|
82
83
84
|
CP852_CharsetConverter();
CP852_CharsetConverter(const CP852_CharsetConverter&); // do not implement
void operator=(const CP852_CharsetConverter&); // do not implement
|
|
85
86
|
};
|
|
87
88
|
}
|
|
89
90
|
#endif /* ENCODINGCONVERTER_HPP */
|