Blame view

fsabuilder/caseconv/generate.py 4.42 KB
Michał Lenart authored
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
'''
Created on Nov 22, 2013

@author: mlenart
'''

import os
import sys
import logging
from optparse import OptionParser

ARRAY_SIZE = 0x4000

def _parseOptions():
    """
    Parses commandline args
    """
    parser = OptionParser()
#     parser.add_option('-i', '--input-file',
#                         dest='inputFile',
#                         metavar='FILE',
#                         help='path to input file (CaseFolding.txt)')
    parser.add_option('-o', '--output-file',
                        dest='outputFile',
                        metavar='FILE',
                        help='path to output C++ source file')
#     parser.add_option('--header-filename', 
#                         dest='headerFilename',
#                         help='name of the C++ header file')


    opts, args = parser.parse_args()

    if None in [opts.outputFile, opts.constName, opts.headerFilename]:
        logging.error('Missing some options')
        parser.print_help()
        exit(1)
    return opts

def _parseCaseFoldingTxtFile(f):
    table = [code for code in range(ARRAY_SIZE)]
    extendedTable = {}
    for line in f:
        line = line.strip()
        if line and not line.startswith('#'):
            split = line.split('; ')
            code = int(split[0], 16)
            if split[1] in 'CS':
                targetCode = int(split[2], 16)
                if code < ARRAY_SIZE:
                    table[code] = targetCode
                else:
                    extendedTable[code] = targetCode
    return table, extendedTable
Michał Lenart authored
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def _parseUnicodeDataTxtFile(f):
    toLowerTable = [code for code in range(ARRAY_SIZE)]
    extToLowerTable = {}
    toTitleTable = [code for code in range(ARRAY_SIZE)]
    extToTitleTable = {}
    for line in f:
        line = line.strip()
        if line and not line.startswith('#'):
            split = line.split(';')
            code = int(split[0], 16)
            lowercaseCode = int(split[13], 16) if split[13] else code
            titlecaseCode = int(split[14], 16) if split[14] else code
            if lowercaseCode != code:
                if code < ARRAY_SIZE:
                    toLowerTable[code] = lowercaseCode
                else:
                    extToLowerTable[code] = lowercaseCode
            if titlecaseCode != code:
                if code < ARRAY_SIZE:
                    toTitleTable[code] = titlecaseCode
                else:
                    extToTitleTable[code] = titlecaseCode
    return toLowerTable, extToLowerTable, toTitleTable, extToTitleTable
Michał Lenart authored
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
def _serializeTable(table):
    res = []
    res.append('{')
    for targetCode in table:
        res.append(str(targetCode))
        res.append(',')
    res.append('}')
    return ''.join(res)

def _serializeExtendedTable(table):
    res = []
    res.append('{')
    for code, targetCode in table.iteritems():
        res.append('{')
        res.append(str(code))
        res.append(',')
        res.append(str(targetCode))
        res.append('},')
    res.append('}')
    return ''.join(res)
Michał Lenart authored
102
def _serialize(toLowerTable, extToLowerTable, toTitleTable, extToTitleTable):
Michał Lenart authored
103
104
105
    return '''
#include "case_folding.hpp"
Michał Lenart authored
106
107
108
109
110
111
112
113
114
const unsigned int TO_LOWERCASE_TABLE_SIZE = {toLowerTableSize};
const unsigned int EXT_TO_LOWERCASE_TABLE_SIZE = {extToLowerTableSize};
const uint32_t TO_LOWERCASE_TABLE[] = {toLowerTable};
const uint32_t EXT_TO_LOWERCASE_TABLE[][2] = {extToLowerTable};

const unsigned int TO_TITLECASE_TABLE_SIZE = {toTitleTableSize};
const unsigned int EXT_TO_TITLECASE_TABLE_SIZE = {extToTitleTableSize};
const uint32_t TO_TITLECASE_TABLE[] = {toTitleTable};
const uint32_t EXT_TO_TITLECASE_TABLE[][2] = {extToTitleTable};
Michał Lenart authored
115
'''.format(
Michał Lenart authored
116
117
118
119
120
121
122
123
           toLowerTableSize=len(toLowerTable), 
           toLowerTable=_serializeTable(toLowerTable),
           extToLowerTableSize=len(extToLowerTable),
           extToLowerTable=_serializeExtendedTable(extToLowerTable),
           toTitleTableSize=len(toTitleTable),
           toTitleTable=_serializeTable(toTitleTable),
           extToTitleTableSize=len(extToTitleTable),
           extToTitleTable=_serializeExtendedTable(extToTitleTable))
Michał Lenart authored
124
125
126

if __name__ == '__main__':
    outfile = sys.argv[1]
Michał Lenart authored
127
128
    with open(os.path.join(os.path.dirname(__file__), 'UnicodeData.txt'), 'r') as f:
        toLowerTable, extToLowerTable, toTitleTable, extToTitleTable = _parseUnicodeDataTxtFile(f)
Michał Lenart authored
129
        with open(sys.argv[1], 'w') as f1:
Michał Lenart authored
130
            f1.write(_serialize(toLowerTable, extToLowerTable, toTitleTable, extToTitleTable))