Blame view

morfeusz/charset/CharsetConverter.hpp 2.66 KB
Michał Lenart authored
1
2
3
4
5
6
7
8
9
10
/* 
 * File:   EncodingConverter.hpp
 * Author: mlenart
 *
 * Created on 14 listopad 2013, 17:28
 */

#ifndef ENCODINGCONVERTER_HPP
#define	ENCODINGCONVERTER_HPP
Michał Lenart authored
11
#include <inttypes.h>
Michał Lenart authored
12
#include <string>
Michał Lenart authored
13
14
#include <vector>
#include <map>
Michał Lenart authored
15
Michał Lenart authored
16
17
namespace morfeusz {
Michał Lenart authored
18
19
class CharsetConverter {
public:
Michał Lenart authored
20
//    uint32_t peek(const char* it, const char* end) const;
Michał Lenart authored
21
    virtual uint32_t next(const char*& it, const char* end) const = 0;
Michał Lenart authored
22
    virtual void append(uint32_t cp, std::string& result) const = 0;
Michał Lenart authored
23
    virtual std::string fromUTF8(const std::string& input) const;
Michał Lenart authored
24
    virtual std::string toUTF8(const std::string& input) const;
Michał Lenart authored
25
Michał Lenart authored
26
    std::string toString(const std::vector<uint32_t>& codepoints) const;
Michał Lenart authored
27
Michał Lenart authored
28
    virtual ~CharsetConverter();
Michał Lenart authored
29
30
31
private:
};
Michał Lenart authored
32
class UTF8CharsetConverter : public CharsetConverter {
Michał Lenart authored
33
public:
Michał Lenart authored
34
35
36

    static const UTF8CharsetConverter& getInstance();
Michał Lenart authored
37
    uint32_t next(const char*& it, const char* end) const;
Michał Lenart authored
38
    void append(uint32_t cp, std::string& result) const;
Michał Lenart authored
39
    std::string fromUTF8(const std::string& input) const;
Michał Lenart authored
40
    std::string toUTF8(const std::string& input) const;
Michał Lenart authored
41
private:
Michał Lenart authored
42
43
44
    UTF8CharsetConverter();
    UTF8CharsetConverter(const UTF8CharsetConverter&); // do not implement
    void operator=(const UTF8CharsetConverter&); // do not implement
Michał Lenart authored
45
46
};
Michał Lenart authored
47
48
49
/*
 * Converter that uses a simple conversion table
 */
Michał Lenart authored
50
class OneByteCharsetConverter : public CharsetConverter {
Michał Lenart authored
51
public:
Michał Lenart authored
52
    explicit OneByteCharsetConverter(const uint32_t* array);
Michał Lenart authored
53
    uint32_t next(const char*& it, const char* end) const;
Michał Lenart authored
54
    void append(uint32_t cp, std::string& result) const;
Michał Lenart authored
55
private:
Michał Lenart authored
56
57
58
59
    const uint32_t* array;
    const std::vector<char> codepoint2Char;
};
Michał Lenart authored
60
class ISO8859_2_CharsetConverter : public OneByteCharsetConverter {
Michał Lenart authored
61
public:
Michał Lenart authored
62
    static const ISO8859_2_CharsetConverter& getInstance();
Michał Lenart authored
63
private:
Michał Lenart authored
64
65
66
    ISO8859_2_CharsetConverter();
    ISO8859_2_CharsetConverter(const ISO8859_2_CharsetConverter&); // do not implement
    void operator=(const ISO8859_2_CharsetConverter&); // do not implement
Michał Lenart authored
67
68
};
Michał Lenart authored
69
class Windows_1250_CharsetConverter : public OneByteCharsetConverter {
Michał Lenart authored
70
public:
Michał Lenart authored
71
    static const Windows_1250_CharsetConverter& getInstance();
Michał Lenart authored
72
private:
Michał Lenart authored
73
74
75
    Windows_1250_CharsetConverter();
    Windows_1250_CharsetConverter(const Windows_1250_CharsetConverter&); // do not implement
    void operator=(const Windows_1250_CharsetConverter&); // do not implement
Michał Lenart authored
76
77
};
Michał Lenart authored
78
79
class CP852_CharsetConverter : public OneByteCharsetConverter {
public:
Michał Lenart authored
80
    static const CP852_CharsetConverter& getInstance();
Michał Lenart authored
81
private:
Michał Lenart authored
82
83
84
    CP852_CharsetConverter();
    CP852_CharsetConverter(const CP852_CharsetConverter&); // do not implement
    void operator=(const CP852_CharsetConverter&); // do not implement
Michał Lenart authored
85
86
};
Michał Lenart authored
87
88
}
Michał Lenart authored
89
90
#endif	/* ENCODINGCONVERTER_HPP */