Blame view

morfeusz/charset/CharsetConverter.hpp 2.38 KB
Michał Lenart authored
1
2
3
4
5
6
7
8
9
10
/* 
 * File:   EncodingConverter.hpp
 * Author: mlenart
 *
 * Created on 14 listopad 2013, 17:28
 */

#ifndef ENCODINGCONVERTER_HPP
#define	ENCODINGCONVERTER_HPP
Michał Lenart authored
11
#include <inttypes.h>
Michał Lenart authored
12
#include <string>
Michał Lenart authored
13
14
#include <vector>
#include <map>
Michał Lenart authored
15
Michał Lenart authored
16
17
class CharsetConverter {
public:
Michał Lenart authored
18
    virtual uint32_t peek(const char*& it, const char* end) const = 0;
Michał Lenart authored
19
    virtual uint32_t next(const char*& it, const char* end) const = 0;
Michał Lenart authored
20
    virtual void append(uint32_t cp, std::string& result) const = 0;
Michał Lenart authored
21
    virtual std::string fromUTF8(const std::string& input) const;
Michał Lenart authored
22
Michał Lenart authored
23
    std::string toString(const std::vector<uint32_t>& codepoints) const;
Michał Lenart authored
24
25
26
private:
};
Michał Lenart authored
27
class UTF8CharsetConverter : public CharsetConverter {
Michał Lenart authored
28
public:
Michał Lenart authored
29
    uint32_t peek(const char*& it, const char* end) const;
Michał Lenart authored
30
    uint32_t next(const char*& it, const char* end) const;
Michał Lenart authored
31
    void append(uint32_t cp, std::string& result) const;
Michał Lenart authored
32
    //    std::string fromUTF8(const std::string& input) const;
Michał Lenart authored
33
34
35
private:
};
Michał Lenart authored
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
//class UTF16CharsetConverter : public CharsetConverter {
//public:
//    enum Endianness { LE, BE };
//    explicit UTF16CharsetConverter(UTF16CharsetConverter::Endianness endianness);
//    uint32_t peek(const char*& it, const char* end) const;
//    uint32_t next(const char*& it, const char* end) const;
//    void append(uint32_t cp, std::string& result) const;
//private:
//    uint16_t convertEndianness(uint16_t cp) const;
//    UTF16CharsetConverter::Endianness endianness;
//};
//
//class UTF32CharsetConverter : public CharsetConverter {
//public:
//    uint32_t peek(const char*& it, const char* end) const;
//    uint32_t next(const char*& it, const char* end) const;
//    void append(uint32_t cp, std::string& result) const;
//private:
//};
Michał Lenart authored
55
Michał Lenart authored
56
57
58
/*
 * Converter that uses a simple conversion table
 */
Michał Lenart authored
59
class OneByteCharsetConverter : public CharsetConverter {
Michał Lenart authored
60
public:
Michał Lenart authored
61
    explicit OneByteCharsetConverter(const uint32_t* array);
Michał Lenart authored
62
    uint32_t peek(const char*& it, const char* end) const;
Michał Lenart authored
63
    uint32_t next(const char*& it, const char* end) const;
Michał Lenart authored
64
    void append(uint32_t cp, std::string& result) const;
Michał Lenart authored
65
private:
Michał Lenart authored
66
67
68
69
    const uint32_t* array;
    const std::vector<char> codepoint2Char;
};
Michał Lenart authored
70
class ISO8859_2_CharsetConverter : public OneByteCharsetConverter {
Michał Lenart authored
71
public:
Michał Lenart authored
72
    ISO8859_2_CharsetConverter();
Michał Lenart authored
73
74
75
private:
};
Michał Lenart authored
76
class Windows_1250_CharsetConverter : public OneByteCharsetConverter {
Michał Lenart authored
77
public:
Michał Lenart authored
78
    Windows_1250_CharsetConverter();
Michał Lenart authored
79
private:
Michał Lenart authored
80
81
82
83
};

#endif	/* ENCODINGCONVERTER_HPP */