|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
|
'''
Created on Nov 22, 2013
@author: mlenart
'''
import os
import sys
import logging
from optparse import OptionParser
ARRAY_SIZE = 0x4000
def _parseOptions():
"""
Parses commandline args
"""
parser = OptionParser()
# parser.add_option('-i', '--input-file',
# dest='inputFile',
# metavar='FILE',
# help='path to input file (CaseFolding.txt)')
parser.add_option('-o', '--output-file',
dest='outputFile',
metavar='FILE',
help='path to output C++ source file')
# parser.add_option('--header-filename',
# dest='headerFilename',
# help='name of the C++ header file')
opts, args = parser.parse_args()
if None in [opts.outputFile, opts.constName, opts.headerFilename]:
logging.error('Missing some options')
parser.print_help()
exit(1)
return opts
def _parseCaseFoldingTxtFile(f):
table = [code for code in range(ARRAY_SIZE)]
extendedTable = {}
for line in f:
line = line.strip()
if line and not line.startswith('#'):
split = line.split('; ')
code = int(split[0], 16)
if split[1] in 'CS':
targetCode = int(split[2], 16)
if code < ARRAY_SIZE:
table[code] = targetCode
else:
extendedTable[code] = targetCode
return table, extendedTable
|
|
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
def _parseUnicodeDataTxtFile(f):
toLowerTable = [code for code in range(ARRAY_SIZE)]
extToLowerTable = {}
toTitleTable = [code for code in range(ARRAY_SIZE)]
extToTitleTable = {}
for line in f:
line = line.strip()
if line and not line.startswith('#'):
split = line.split(';')
code = int(split[0], 16)
lowercaseCode = int(split[13], 16) if split[13] else code
titlecaseCode = int(split[14], 16) if split[14] else code
if lowercaseCode != code:
if code < ARRAY_SIZE:
toLowerTable[code] = lowercaseCode
else:
extToLowerTable[code] = lowercaseCode
if titlecaseCode != code:
if code < ARRAY_SIZE:
toTitleTable[code] = titlecaseCode
else:
extToTitleTable[code] = titlecaseCode
return toLowerTable, extToLowerTable, toTitleTable, extToTitleTable
|
|
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
def _serializeTable(table):
res = []
res.append('{')
for targetCode in table:
res.append(str(targetCode))
res.append(',')
res.append('}')
return ''.join(res)
def _serializeExtendedTable(table):
res = []
res.append('{')
for code, targetCode in table.iteritems():
res.append('{')
res.append(str(code))
res.append(',')
res.append(str(targetCode))
res.append('},')
res.append('}')
return ''.join(res)
|
|
102
|
def _serialize(toLowerTable, extToLowerTable, toTitleTable, extToTitleTable):
|
|
103
104
105
|
return '''
#include "case_folding.hpp"
|
|
106
107
108
109
110
111
112
113
114
|
const unsigned int TO_LOWERCASE_TABLE_SIZE = {toLowerTableSize};
const unsigned int EXT_TO_LOWERCASE_TABLE_SIZE = {extToLowerTableSize};
const uint32_t TO_LOWERCASE_TABLE[] = {toLowerTable};
const uint32_t EXT_TO_LOWERCASE_TABLE[][2] = {extToLowerTable};
const unsigned int TO_TITLECASE_TABLE_SIZE = {toTitleTableSize};
const unsigned int EXT_TO_TITLECASE_TABLE_SIZE = {extToTitleTableSize};
const uint32_t TO_TITLECASE_TABLE[] = {toTitleTable};
const uint32_t EXT_TO_TITLECASE_TABLE[][2] = {extToTitleTable};
|
|
115
|
'''.format(
|
|
116
117
118
119
120
121
122
123
|
toLowerTableSize=len(toLowerTable),
toLowerTable=_serializeTable(toLowerTable),
extToLowerTableSize=len(extToLowerTable),
extToLowerTable=_serializeExtendedTable(extToLowerTable),
toTitleTableSize=len(toTitleTable),
toTitleTable=_serializeTable(toTitleTable),
extToTitleTableSize=len(extToTitleTable),
extToTitleTable=_serializeExtendedTable(extToTitleTable))
|
|
124
125
126
|
if __name__ == '__main__':
outfile = sys.argv[1]
|
|
127
128
|
with open(os.path.join(os.path.dirname(__file__), 'UnicodeData.txt'), 'r') as f:
toLowerTable, extToLowerTable, toTitleTable, extToTitleTable = _parseUnicodeDataTxtFile(f)
|
|
129
|
with open(sys.argv[1], 'w') as f1:
|
|
130
|
f1.write(_serialize(toLowerTable, extToLowerTable, toTitleTable, extToTitleTable))
|