|
1
2
3
4
5
6
7
8
9
10
|
/*
* File: EncodingConverter.hpp
* Author: mlenart
*
* Created on 14 listopad 2013, 17:28
*/
#ifndef ENCODINGCONVERTER_HPP
#define ENCODINGCONVERTER_HPP
|
|
11
|
#include <inttypes.h>
|
|
12
|
#include <string>
|
|
13
14
|
#include <vector>
#include <map>
|
|
15
|
|
|
16
17
|
class CharsetConverter {
public:
|
|
18
|
virtual uint32_t peek(const char*& it, const char* end) const = 0;
|
|
19
|
virtual uint32_t next(const char*& it, const char* end) const = 0;
|
|
20
|
virtual void append(uint32_t cp, std::string& result) const = 0;
|
|
21
|
virtual std::string fromUTF8(const std::string& input) const;
|
|
22
|
|
|
23
|
std::string toString(const std::vector<uint32_t>& codepoints) const;
|
|
24
25
26
|
private:
};
|
|
27
|
class UTF8CharsetConverter : public CharsetConverter {
|
|
28
|
public:
|
|
29
|
uint32_t peek(const char*& it, const char* end) const;
|
|
30
|
uint32_t next(const char*& it, const char* end) const;
|
|
31
|
void append(uint32_t cp, std::string& result) const;
|
|
32
|
// std::string fromUTF8(const std::string& input) const;
|
|
33
34
35
|
private:
};
|
|
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
//class UTF16CharsetConverter : public CharsetConverter {
//public:
// enum Endianness { LE, BE };
// explicit UTF16CharsetConverter(UTF16CharsetConverter::Endianness endianness);
// uint32_t peek(const char*& it, const char* end) const;
// uint32_t next(const char*& it, const char* end) const;
// void append(uint32_t cp, std::string& result) const;
//private:
// uint16_t convertEndianness(uint16_t cp) const;
// UTF16CharsetConverter::Endianness endianness;
//};
//
//class UTF32CharsetConverter : public CharsetConverter {
//public:
// uint32_t peek(const char*& it, const char* end) const;
// uint32_t next(const char*& it, const char* end) const;
// void append(uint32_t cp, std::string& result) const;
//private:
//};
|
|
55
|
|
|
56
57
58
|
/*
* Converter that uses a simple conversion table
*/
|
|
59
|
class OneByteCharsetConverter : public CharsetConverter {
|
|
60
|
public:
|
|
61
|
explicit OneByteCharsetConverter(const uint32_t* array);
|
|
62
|
uint32_t peek(const char*& it, const char* end) const;
|
|
63
|
uint32_t next(const char*& it, const char* end) const;
|
|
64
|
void append(uint32_t cp, std::string& result) const;
|
|
65
|
private:
|
|
66
67
68
69
|
const uint32_t* array;
const std::vector<char> codepoint2Char;
};
|
|
70
|
class ISO8859_2_CharsetConverter : public OneByteCharsetConverter {
|
|
71
|
public:
|
|
72
|
ISO8859_2_CharsetConverter();
|
|
73
74
75
|
private:
};
|
|
76
|
class Windows_1250_CharsetConverter : public OneByteCharsetConverter {
|
|
77
|
public:
|
|
78
|
Windows_1250_CharsetConverter();
|
|
79
|
private:
|
|
80
81
82
83
|
};
#endif /* ENCODINGCONVERTER_HPP */
|